Mercurial > libavcodec.hg
comparison dsputil.h @ 853:eacc2dd8fd9d libavcodec
* using DSPContext - so each codec could use its local (sub)set of CPU extension
author | kabi |
---|---|
date | Mon, 11 Nov 2002 09:40:17 +0000 |
parents | b78812db886f |
children | 058194d7ade6 |
comparison
equal
deleted
inserted
replaced
852:c01c98206ee6 | 853:eacc2dd8fd9d |
---|---|
43 | 43 |
44 /* temporary */ | 44 /* temporary */ |
45 extern UINT32 squareTbl[512]; | 45 extern UINT32 squareTbl[512]; |
46 extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; | 46 extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; |
47 | 47 |
48 void dsputil_init(void); | |
49 | 48 |
50 /* minimum alignment rules ;) | 49 /* minimum alignment rules ;) |
51 if u notice errors in the align stuff, need more alignment for some asm code for some cpu | 50 if u notice errors in the align stuff, need more alignment for some asm code for some cpu |
52 or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... | 51 or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... |
53 | 52 |
54 !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible) | 53 !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible) |
55 i (michael) didnt check them, these are just the alignents which i think could be reached easily ... | 54 i (michael) didnt check them, these are just the alignents which i think could be reached easily ... |
56 | 55 |
57 !future video codecs might need functions with less strict alignment | 56 !future video codecs might need functions with less strict alignment |
58 */ | 57 */ |
59 | 58 |
60 /* pixel ops : interface with DCT */ | 59 /* |
61 extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); | |
62 extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); | |
63 extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |
64 extern void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |
65 extern void (*ff_gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); | |
66 extern void (*ff_gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy, | |
67 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | |
68 extern void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | |
69 extern int (*pix_sum)(UINT8 * pix, int line_size); | |
70 extern int (*pix_norm1)(UINT8 * pix, int line_size); | |
71 | |
72 | |
73 | |
74 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); | 60 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); |
75 void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); | 61 void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); |
76 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); | 62 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); |
77 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); | 63 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); |
78 void clear_blocks_c(DCTELEM *blocks); | 64 void clear_blocks_c(DCTELEM *blocks); |
65 */ | |
79 | 66 |
80 /* add and put pixel (decoding) */ | 67 /* add and put pixel (decoding) */ |
81 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 | 68 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 |
82 typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h); | 69 typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h); |
83 typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride); | 70 typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride); |
84 | 71 |
85 extern op_pixels_func put_pixels_tab[2][4]; | 72 |
86 extern op_pixels_func avg_pixels_tab[2][4]; | |
87 extern op_pixels_func put_no_rnd_pixels_tab[2][4]; | |
88 extern op_pixels_func avg_no_rnd_pixels_tab[2][4]; | |
89 extern qpel_mc_func put_qpel_pixels_tab[2][16]; | |
90 extern qpel_mc_func avg_qpel_pixels_tab[2][16]; | |
91 extern qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | |
92 extern qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; | |
93 | 73 |
94 #define CALL_2X_PIXELS(a, b, n)\ | 74 #define CALL_2X_PIXELS(a, b, n)\ |
95 static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ | 75 static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ |
96 b(block , pixels , line_size, h);\ | 76 b(block , pixels , line_size, h);\ |
97 b(block+n, pixels+n, line_size, h);\ | 77 b(block+n, pixels+n, line_size, h);\ |
98 } | 78 } |
99 | 79 |
100 /* motion estimation */ | 80 /* motion estimation */ |
101 | 81 |
102 typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size); | 82 typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size); |
103 | 83 /* |
104 extern op_pixels_abs_func pix_abs16x16; | |
105 extern op_pixels_abs_func pix_abs16x16_x2; | |
106 extern op_pixels_abs_func pix_abs16x16_y2; | |
107 extern op_pixels_abs_func pix_abs16x16_xy2; | |
108 extern op_pixels_abs_func pix_abs8x8; | |
109 extern op_pixels_abs_func pix_abs8x8_x2; | |
110 extern op_pixels_abs_func pix_abs8x8_y2; | |
111 extern op_pixels_abs_func pix_abs8x8_xy2; | |
112 | |
113 int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); | 84 int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); |
114 int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); | 85 int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); |
115 int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); | 86 int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); |
116 int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); | 87 int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); |
88 */ | |
89 typedef struct DSPContext { | |
90 /* pixel ops : interface with DCT */ | |
91 void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); | |
92 void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); | |
93 void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |
94 void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); | |
95 void (*gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder); | |
96 void (*gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy, | |
97 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height); | |
98 void (*clear_blocks)(DCTELEM *blocks/*align 16*/); | |
99 int (*pix_sum)(UINT8 * pix, int line_size); | |
100 int (*pix_norm1)(UINT8 * pix, int line_size); | |
101 | |
102 /* maybe create an array for 16/8 functions */ | |
103 op_pixels_func put_pixels_tab[2][4]; | |
104 op_pixels_func avg_pixels_tab[2][4]; | |
105 op_pixels_func put_no_rnd_pixels_tab[2][4]; | |
106 op_pixels_func avg_no_rnd_pixels_tab[2][4]; | |
107 qpel_mc_func put_qpel_pixels_tab[2][16]; | |
108 qpel_mc_func avg_qpel_pixels_tab[2][16]; | |
109 qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16]; | |
110 qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16]; | |
111 | |
112 op_pixels_abs_func pix_abs16x16; | |
113 op_pixels_abs_func pix_abs16x16_x2; | |
114 op_pixels_abs_func pix_abs16x16_y2; | |
115 op_pixels_abs_func pix_abs16x16_xy2; | |
116 op_pixels_abs_func pix_abs8x8; | |
117 op_pixels_abs_func pix_abs8x8_x2; | |
118 op_pixels_abs_func pix_abs8x8_y2; | |
119 op_pixels_abs_func pix_abs8x8_xy2; | |
120 } DSPContext; | |
121 | |
122 void dsputil_init(DSPContext* p, unsigned mask); | |
117 | 123 |
118 /** | 124 /** |
119 * permute block according to permuatation. | 125 * permute block according to permuatation. |
120 * @param last last non zero element in scantable order | 126 * @param last last non zero element in scantable order |
121 */ | 127 */ |
122 void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last); | 128 void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last); |
123 | 129 |
130 #define emms_c() | |
131 | |
124 #if defined(HAVE_MMX) | 132 #if defined(HAVE_MMX) |
133 | |
134 #undef emms_c() | |
125 | 135 |
126 #define MM_MMX 0x0001 /* standard MMX */ | 136 #define MM_MMX 0x0001 /* standard MMX */ |
127 #define MM_3DNOW 0x0004 /* AMD 3DNOW */ | 137 #define MM_3DNOW 0x0004 /* AMD 3DNOW */ |
128 #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ | 138 #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ |
129 #define MM_SSE 0x0008 /* SSE functions */ | 139 #define MM_SSE 0x0008 /* SSE functions */ |
130 #define MM_SSE2 0x0010 /* PIV SSE2 functions */ | 140 #define MM_SSE2 0x0010 /* PIV SSE2 functions */ |
131 | 141 |
132 extern int mm_flags; | 142 extern int mm_flags; |
133 | 143 |
134 int mm_support(void); | 144 int mm_support(void); |
145 void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size); | |
146 void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size); | |
135 | 147 |
136 static inline void emms(void) | 148 static inline void emms(void) |
137 { | 149 { |
138 __asm __volatile ("emms;":::"memory"); | 150 __asm __volatile ("emms;":::"memory"); |
139 } | 151 } |
144 emms();\ | 156 emms();\ |
145 } | 157 } |
146 | 158 |
147 #define __align8 __attribute__ ((aligned (8))) | 159 #define __align8 __attribute__ ((aligned (8))) |
148 | 160 |
149 void dsputil_init_mmx(void); | 161 void dsputil_init_mmx(DSPContext* c, unsigned mask); |
150 void dsputil_set_bit_exact_mmx(void); | 162 void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask); |
151 | 163 |
152 #elif defined(ARCH_ARMV4L) | 164 #elif defined(ARCH_ARMV4L) |
153 | |
154 #define emms_c() | |
155 | 165 |
156 /* This is to use 4 bytes read to the IDCT pointers for some 'zero' | 166 /* This is to use 4 bytes read to the IDCT pointers for some 'zero' |
157 line ptimizations */ | 167 line ptimizations */ |
158 #define __align8 __attribute__ ((aligned (4))) | 168 #define __align8 __attribute__ ((aligned (4))) |
159 | 169 |
160 void dsputil_init_armv4l(void); | 170 void dsputil_init_armv4l(DSPContext* c, unsigned mask); |
161 | 171 |
162 #elif defined(HAVE_MLIB) | 172 #elif defined(HAVE_MLIB) |
163 | |
164 #define emms_c() | |
165 | 173 |
166 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ | 174 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ |
167 #define __align8 __attribute__ ((aligned (8))) | 175 #define __align8 __attribute__ ((aligned (8))) |
168 | 176 |
169 void dsputil_init_mlib(void); | 177 void dsputil_init_mlib(DSPContext* c, unsigned mask); |
170 | 178 |
171 #elif defined(ARCH_ALPHA) | 179 #elif defined(ARCH_ALPHA) |
172 | 180 |
173 #define emms_c() | |
174 #define __align8 __attribute__ ((aligned (8))) | 181 #define __align8 __attribute__ ((aligned (8))) |
175 | 182 |
176 void dsputil_init_alpha(void); | 183 void dsputil_init_alpha(DSPContext* c, unsigned mask); |
177 | 184 |
178 #elif defined(ARCH_POWERPC) | 185 #elif defined(ARCH_POWERPC) |
179 | 186 |
180 #define emms_c() | |
181 #define __align8 __attribute__ ((aligned (16))) | 187 #define __align8 __attribute__ ((aligned (16))) |
182 | 188 |
183 void dsputil_init_ppc(void); | 189 void dsputil_init_ppc(DSPContext* c, unsigned mask); |
184 | 190 |
185 #elif defined(HAVE_MMI) | 191 #elif defined(HAVE_MMI) |
186 | 192 |
187 #define emms_c() | |
188 | |
189 #define __align8 __attribute__ ((aligned (16))) | 193 #define __align8 __attribute__ ((aligned (16))) |
190 | 194 |
191 void dsputil_init_mmi(void); | 195 void dsputil_init_mmi(DSPContext* c, unsigned mask); |
192 | 196 |
193 #else | 197 #else |
194 | |
195 #define emms_c() | |
196 | 198 |
197 #define __align8 | 199 #define __align8 |
198 | 200 |
199 #endif | 201 #endif |
200 | 202 |
261 FFTSample *tsin; | 263 FFTSample *tsin; |
262 FFTContext fft; | 264 FFTContext fft; |
263 } MDCTContext; | 265 } MDCTContext; |
264 | 266 |
265 int ff_mdct_init(MDCTContext *s, int nbits, int inverse); | 267 int ff_mdct_init(MDCTContext *s, int nbits, int inverse); |
266 void ff_imdct_calc(MDCTContext *s, FFTSample *output, | 268 void ff_imdct_calc(MDCTContext *s, FFTSample *output, |
267 const FFTSample *input, FFTSample *tmp); | 269 const FFTSample *input, FFTSample *tmp); |
268 void ff_mdct_calc(MDCTContext *s, FFTSample *out, | 270 void ff_mdct_calc(MDCTContext *s, FFTSample *out, |
269 const FFTSample *input, FFTSample *tmp); | 271 const FFTSample *input, FFTSample *tmp); |
270 void ff_mdct_end(MDCTContext *s); | 272 void ff_mdct_end(MDCTContext *s); |
271 | 273 |
272 #ifndef HAVE_LRINTF | 274 #ifndef HAVE_LRINTF |
273 /* XXX: add ISOC specific test to avoid specific BSD testing. */ | 275 /* XXX: add ISOC specific test to avoid specific BSD testing. */ |