Mercurial > libavcodec.hg
annotate dsputil.h @ 386:f49629bab18d libavcodec
hopefully faster mmx2&3dnow MC
author | michaelni |
---|---|
date | Fri, 17 May 2002 01:04:14 +0000 |
parents | 9c6f056f0e41 |
children | 2c3e25f4c496 |
rev | line source |
---|---|
0 | 1 #ifndef DSPUTIL_H |
2 #define DSPUTIL_H | |
3 | |
4 #include "common.h" | |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
5 #include "avcodec.h" |
0 | 6 |
255 | 7 //#define DEBUG |
0 | 8 /* dct code */ |
9 typedef short DCTELEM; | |
10 | |
11 void jpeg_fdct_ifast (DCTELEM *data); | |
12 | |
13 void j_rev_dct (DCTELEM *data); | |
14 | |
15 void fdct_mmx(DCTELEM *block); | |
16 | |
17 void (*av_fdct)(DCTELEM *block); | |
18 | |
34 | 19 /* encoding scans */ |
20 extern UINT8 ff_alternate_horizontal_scan[64]; | |
21 extern UINT8 ff_alternate_vertical_scan[64]; | |
22 extern UINT8 zigzag_direct[64]; | |
23 | |
190
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
24 /* permutation table */ |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
25 extern UINT8 permutation[64]; |
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
26 |
0 | 27 /* pixel operations */ |
28 #define MAX_NEG_CROP 384 | |
29 | |
30 /* temporary */ | |
31 extern UINT32 squareTbl[512]; | |
50 | 32 extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
0 | 33 |
34 void dsputil_init(void); | |
35 | |
36 /* pixel ops : interface with DCT */ | |
37 | |
19
82d4c9be9873
MMX/MMXEXT iDCT support, using external functions currently defined in libmpeg2
arpi_esp
parents:
6
diff
changeset
|
38 extern void (*ff_idct)(DCTELEM *block); |
0 | 39 extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); |
324 | 40 extern void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); |
0 | 41 extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); |
42 extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); | |
255 | 43 extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder); |
296 | 44 extern void (*clear_blocks)(DCTELEM *blocks); |
255 | 45 |
0 | 46 |
47 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); | |
324 | 48 void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); |
0 | 49 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); |
50 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); | |
296 | 51 void clear_blocks_c(DCTELEM *blocks); |
0 | 52 |
53 /* add and put pixel (decoding) */ | |
54 typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h); | |
255 | 55 typedef void (*qpel_mc_func)(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my); |
0 | 56 |
57 extern op_pixels_func put_pixels_tab[4]; | |
58 extern op_pixels_func avg_pixels_tab[4]; | |
59 extern op_pixels_func put_no_rnd_pixels_tab[4]; | |
60 extern op_pixels_func avg_no_rnd_pixels_tab[4]; | |
255 | 61 extern qpel_mc_func qpel_mc_rnd_tab[16]; |
62 extern qpel_mc_func qpel_mc_no_rnd_tab[16]; | |
63 | |
0 | 64 |
65 /* sub pixel (encoding) */ | |
66 extern void (*sub_pixels_tab[4])(DCTELEM *block, const UINT8 *pixels, int line_size, int h); | |
67 | |
68 #define sub_pixels_2(block, pixels, line_size, dxy) \ | |
69 sub_pixels_tab[dxy](block, pixels, line_size, 8) | |
70 | |
71 /* motion estimation */ | |
72 | |
294 | 73 typedef int (*op_pixels_abs_func)(UINT8 *blk1, UINT8 *blk2, int line_size); |
0 | 74 |
75 extern op_pixels_abs_func pix_abs16x16; | |
76 extern op_pixels_abs_func pix_abs16x16_x2; | |
77 extern op_pixels_abs_func pix_abs16x16_y2; | |
78 extern op_pixels_abs_func pix_abs16x16_xy2; | |
294 | 79 extern op_pixels_abs_func pix_abs8x8; |
80 extern op_pixels_abs_func pix_abs8x8_x2; | |
81 extern op_pixels_abs_func pix_abs8x8_y2; | |
82 extern op_pixels_abs_func pix_abs8x8_xy2; | |
0 | 83 |
294 | 84 int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); |
85 int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |
86 int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |
87 int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); | |
0 | 88 |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
89 static inline int block_permute_op(int j) |
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
90 { |
190
9e0e56869d05
fix for non-mmx runtimedetect encoding bugs - patch by Michael Niedermayer <michaelni@gmx.at>
uid46427
parents:
174
diff
changeset
|
91 return permutation[j]; |
174
ac5075a55488
new IDCT code by Michael Niedermayer (michaelni@gmx.at) - #define SIMPLE_IDCT to enable
arpi_esp
parents:
88
diff
changeset
|
92 } |
34 | 93 |
94 void block_permute(INT16 *block); | |
95 | |
62 | 96 #if defined(HAVE_MMX) |
0 | 97 |
98 #define MM_MMX 0x0001 /* standard MMX */ | |
99 #define MM_3DNOW 0x0004 /* AMD 3DNOW */ | |
100 #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ | |
101 #define MM_SSE 0x0008 /* SSE functions */ | |
102 #define MM_SSE2 0x0010 /* PIV SSE2 functions */ | |
103 | |
104 extern int mm_flags; | |
105 | |
106 int mm_support(void); | |
107 | |
108 static inline void emms(void) | |
109 { | |
6
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
110 __asm __volatile ("emms;":::"memory"); |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
111 } |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
112 |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
113 #define emms_c() \ |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
114 {\ |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
115 if (mm_flags & MM_MMX)\ |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
116 emms();\ |
0 | 117 } |
118 | |
119 #define __align8 __attribute__ ((aligned (8))) | |
120 | |
121 void dsputil_init_mmx(void); | |
122 | |
62 | 123 #elif defined(ARCH_ARMV4L) |
124 | |
125 #define emms_c() | |
126 | |
127 /* This is to use 4 bytes read to the IDCT pointers for some 'zero' | |
128 line ptimizations */ | |
129 #define __align8 __attribute__ ((aligned (4))) | |
130 | |
131 void dsputil_init_armv4l(void); | |
132 | |
88 | 133 #elif defined(HAVE_MLIB) |
134 | |
135 #define emms_c() | |
136 | |
137 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ | |
138 #define __align8 __attribute__ ((aligned (8))) | |
139 | |
140 void dsputil_init_mlib(void); | |
141 | |
214
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
142 #elif defined(ARCH_ALPHA) |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
143 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
144 #define emms_c() |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
145 #define __align8 __attribute__ ((aligned (8))) |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
146 |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
147 void dsputil_init_alpha(void); |
73df666cacc7
Alpha optimizations by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
nickols_k
parents:
190
diff
changeset
|
148 |
0 | 149 #else |
150 | |
6
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
151 #define emms_c() |
ec4642daa6fe
added emms_c() macro which should can used in c code in both mmx/non mmx cases
glantau
parents:
2
diff
changeset
|
152 |
0 | 153 #define __align8 |
154 | |
155 #endif | |
156 | |
252
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
157 /* PSNR */ |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
158 void get_psnr(UINT8 *orig_image[3], UINT8 *coded_image[3], |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
159 int orig_linesize[3], int coded_linesize, |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
160 AVCodecContext *avctx); |
ddb1a0e94cf4
- Added PSNR feature to libavcodec and ffmpeg. By now just Y PSNR until I'm
pulento
parents:
214
diff
changeset
|
161 |
0 | 162 #endif |