comparison dsputil.h @ 853:eacc2dd8fd9d libavcodec

* using DSPContext - so each codec could use its local (sub)set of CPU extension
author kabi
date Mon, 11 Nov 2002 09:40:17 +0000
parents b78812db886f
children 058194d7ade6
comparison
equal deleted inserted replaced
852:c01c98206ee6 853:eacc2dd8fd9d
43 43
44 /* temporary */ 44 /* temporary */
45 extern UINT32 squareTbl[512]; 45 extern UINT32 squareTbl[512];
46 extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; 46 extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
47 47
48 void dsputil_init(void);
49 48
50 /* minimum alignment rules ;) 49 /* minimum alignment rules ;)
51 if u notice errors in the align stuff, need more alignment for some asm code for some cpu 50 if u notice errors in the align stuff, need more alignment for some asm code for some cpu
52 or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ... 51 or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ...
53 52
54 !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible) 53 !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible)
55 i (michael) didnt check them, these are just the alignents which i think could be reached easily ... 54 i (michael) didnt check them, these are just the alignents which i think could be reached easily ...
56 55
57 !future video codecs might need functions with less strict alignment 56 !future video codecs might need functions with less strict alignment
58 */ 57 */
59 58
60 /* pixel ops : interface with DCT */ 59 /*
61 extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
62 extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
63 extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
64 extern void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
65 extern void (*ff_gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
66 extern void (*ff_gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy,
67 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
68 extern void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
69 extern int (*pix_sum)(UINT8 * pix, int line_size);
70 extern int (*pix_norm1)(UINT8 * pix, int line_size);
71
72
73
74 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size); 60 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
75 void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); 61 void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
76 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); 62 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
77 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size); 63 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
78 void clear_blocks_c(DCTELEM *blocks); 64 void clear_blocks_c(DCTELEM *blocks);
65 */
79 66
80 /* add and put pixel (decoding) */ 67 /* add and put pixel (decoding) */
81 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16 68 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
82 typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h); 69 typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h);
83 typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride); 70 typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);
84 71
85 extern op_pixels_func put_pixels_tab[2][4]; 72
86 extern op_pixels_func avg_pixels_tab[2][4];
87 extern op_pixels_func put_no_rnd_pixels_tab[2][4];
88 extern op_pixels_func avg_no_rnd_pixels_tab[2][4];
89 extern qpel_mc_func put_qpel_pixels_tab[2][16];
90 extern qpel_mc_func avg_qpel_pixels_tab[2][16];
91 extern qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
92 extern qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
93 73
94 #define CALL_2X_PIXELS(a, b, n)\ 74 #define CALL_2X_PIXELS(a, b, n)\
95 static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\ 75 static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
96 b(block , pixels , line_size, h);\ 76 b(block , pixels , line_size, h);\
97 b(block+n, pixels+n, line_size, h);\ 77 b(block+n, pixels+n, line_size, h);\
98 } 78 }
99 79
100 /* motion estimation */ 80 /* motion estimation */
101 81
102 typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size); 82 typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size);
103 83 /*
104 extern op_pixels_abs_func pix_abs16x16;
105 extern op_pixels_abs_func pix_abs16x16_x2;
106 extern op_pixels_abs_func pix_abs16x16_y2;
107 extern op_pixels_abs_func pix_abs16x16_xy2;
108 extern op_pixels_abs_func pix_abs8x8;
109 extern op_pixels_abs_func pix_abs8x8_x2;
110 extern op_pixels_abs_func pix_abs8x8_y2;
111 extern op_pixels_abs_func pix_abs8x8_xy2;
112
113 int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx); 84 int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx);
114 int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx); 85 int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
115 int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); 86 int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
116 int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); 87 int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
88 */
89 typedef struct DSPContext {
90 /* pixel ops : interface with DCT */
91 void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
92 void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
93 void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
94 void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
95 void (*gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
96 void (*gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy,
97 int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
98 void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
99 int (*pix_sum)(UINT8 * pix, int line_size);
100 int (*pix_norm1)(UINT8 * pix, int line_size);
101
102 /* maybe create an array for 16/8 functions */
103 op_pixels_func put_pixels_tab[2][4];
104 op_pixels_func avg_pixels_tab[2][4];
105 op_pixels_func put_no_rnd_pixels_tab[2][4];
106 op_pixels_func avg_no_rnd_pixels_tab[2][4];
107 qpel_mc_func put_qpel_pixels_tab[2][16];
108 qpel_mc_func avg_qpel_pixels_tab[2][16];
109 qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
110 qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
111
112 op_pixels_abs_func pix_abs16x16;
113 op_pixels_abs_func pix_abs16x16_x2;
114 op_pixels_abs_func pix_abs16x16_y2;
115 op_pixels_abs_func pix_abs16x16_xy2;
116 op_pixels_abs_func pix_abs8x8;
117 op_pixels_abs_func pix_abs8x8_x2;
118 op_pixels_abs_func pix_abs8x8_y2;
119 op_pixels_abs_func pix_abs8x8_xy2;
120 } DSPContext;
121
122 void dsputil_init(DSPContext* p, unsigned mask);
117 123
118 /** 124 /**
119 * permute block according to permuatation. 125 * permute block according to permuatation.
120 * @param last last non zero element in scantable order 126 * @param last last non zero element in scantable order
121 */ 127 */
122 void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last); 128 void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last);
123 129
130 #define emms_c()
131
124 #if defined(HAVE_MMX) 132 #if defined(HAVE_MMX)
133
134 #undef emms_c()
125 135
126 #define MM_MMX 0x0001 /* standard MMX */ 136 #define MM_MMX 0x0001 /* standard MMX */
127 #define MM_3DNOW 0x0004 /* AMD 3DNOW */ 137 #define MM_3DNOW 0x0004 /* AMD 3DNOW */
128 #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */ 138 #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
129 #define MM_SSE 0x0008 /* SSE functions */ 139 #define MM_SSE 0x0008 /* SSE functions */
130 #define MM_SSE2 0x0010 /* PIV SSE2 functions */ 140 #define MM_SSE2 0x0010 /* PIV SSE2 functions */
131 141
132 extern int mm_flags; 142 extern int mm_flags;
133 143
134 int mm_support(void); 144 int mm_support(void);
145 void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
146 void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
135 147
136 static inline void emms(void) 148 static inline void emms(void)
137 { 149 {
138 __asm __volatile ("emms;":::"memory"); 150 __asm __volatile ("emms;":::"memory");
139 } 151 }
144 emms();\ 156 emms();\
145 } 157 }
146 158
147 #define __align8 __attribute__ ((aligned (8))) 159 #define __align8 __attribute__ ((aligned (8)))
148 160
149 void dsputil_init_mmx(void); 161 void dsputil_init_mmx(DSPContext* c, unsigned mask);
150 void dsputil_set_bit_exact_mmx(void); 162 void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask);
151 163
152 #elif defined(ARCH_ARMV4L) 164 #elif defined(ARCH_ARMV4L)
153
154 #define emms_c()
155 165
156 /* This is to use 4 bytes read to the IDCT pointers for some 'zero' 166 /* This is to use 4 bytes read to the IDCT pointers for some 'zero'
157 line ptimizations */ 167 line ptimizations */
158 #define __align8 __attribute__ ((aligned (4))) 168 #define __align8 __attribute__ ((aligned (4)))
159 169
160 void dsputil_init_armv4l(void); 170 void dsputil_init_armv4l(DSPContext* c, unsigned mask);
161 171
162 #elif defined(HAVE_MLIB) 172 #elif defined(HAVE_MLIB)
163
164 #define emms_c()
165 173
166 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ 174 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
167 #define __align8 __attribute__ ((aligned (8))) 175 #define __align8 __attribute__ ((aligned (8)))
168 176
169 void dsputil_init_mlib(void); 177 void dsputil_init_mlib(DSPContext* c, unsigned mask);
170 178
171 #elif defined(ARCH_ALPHA) 179 #elif defined(ARCH_ALPHA)
172 180
173 #define emms_c()
174 #define __align8 __attribute__ ((aligned (8))) 181 #define __align8 __attribute__ ((aligned (8)))
175 182
176 void dsputil_init_alpha(void); 183 void dsputil_init_alpha(DSPContext* c, unsigned mask);
177 184
178 #elif defined(ARCH_POWERPC) 185 #elif defined(ARCH_POWERPC)
179 186
180 #define emms_c()
181 #define __align8 __attribute__ ((aligned (16))) 187 #define __align8 __attribute__ ((aligned (16)))
182 188
183 void dsputil_init_ppc(void); 189 void dsputil_init_ppc(DSPContext* c, unsigned mask);
184 190
185 #elif defined(HAVE_MMI) 191 #elif defined(HAVE_MMI)
186 192
187 #define emms_c()
188
189 #define __align8 __attribute__ ((aligned (16))) 193 #define __align8 __attribute__ ((aligned (16)))
190 194
191 void dsputil_init_mmi(void); 195 void dsputil_init_mmi(DSPContext* c, unsigned mask);
192 196
193 #else 197 #else
194
195 #define emms_c()
196 198
197 #define __align8 199 #define __align8
198 200
199 #endif 201 #endif
200 202
261 FFTSample *tsin; 263 FFTSample *tsin;
262 FFTContext fft; 264 FFTContext fft;
263 } MDCTContext; 265 } MDCTContext;
264 266
265 int ff_mdct_init(MDCTContext *s, int nbits, int inverse); 267 int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
266 void ff_imdct_calc(MDCTContext *s, FFTSample *output, 268 void ff_imdct_calc(MDCTContext *s, FFTSample *output,
267 const FFTSample *input, FFTSample *tmp); 269 const FFTSample *input, FFTSample *tmp);
268 void ff_mdct_calc(MDCTContext *s, FFTSample *out, 270 void ff_mdct_calc(MDCTContext *s, FFTSample *out,
269 const FFTSample *input, FFTSample *tmp); 271 const FFTSample *input, FFTSample *tmp);
270 void ff_mdct_end(MDCTContext *s); 272 void ff_mdct_end(MDCTContext *s);
271 273
272 #ifndef HAVE_LRINTF 274 #ifndef HAVE_LRINTF
273 /* XXX: add ISOC specific test to avoid specific BSD testing. */ 275 /* XXX: add ISOC specific test to avoid specific BSD testing. */