comparison i386/motion_est_mmx.c @ 1064:b32afefe7d33 libavcodec

* UINTX -> uintx_t INTX -> intx_t
author kabi
date Tue, 11 Feb 2003 16:35:48 +0000
parents bb5de8a59da8
children 92fb44eae6b6
comparison
equal deleted inserted replaced
1063:fdeac9642346 1064:b32afefe7d33
21 #include "../dsputil.h" 21 #include "../dsputil.h"
22 22
23 void dsputil_init_pix_mmx(DSPContext* c, unsigned mask); 23 void dsputil_init_pix_mmx(DSPContext* c, unsigned mask);
24 void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask); 24 void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask);
25 25
26 static const __attribute__ ((aligned(8))) UINT64 round_tab[3]={ 26 static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={
27 0x0000000000000000, 27 0x0000000000000000,
28 0x0001000100010001, 28 0x0001000100010001,
29 0x0002000200020002, 29 0x0002000200020002,
30 }; 30 };
31 31
32 static __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL; 32 static __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL;
33 33
34 static inline void sad8_mmx(UINT8 *blk1, UINT8 *blk2, int stride, int h) 34 static inline void sad8_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
35 { 35 {
36 int len= -(stride<<h); 36 int len= -(stride<<h);
37 asm volatile( 37 asm volatile(
38 ".balign 16 \n\t" 38 ".balign 16 \n\t"
39 "1: \n\t" 39 "1: \n\t"
65 : "+a" (len) 65 : "+a" (len)
66 : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) 66 : "r" (blk1 - len), "r" (blk2 - len), "r" (stride)
67 ); 67 );
68 } 68 }
69 69
70 static inline void sad8_mmx2(UINT8 *blk1, UINT8 *blk2, int stride, int h) 70 static inline void sad8_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
71 { 71 {
72 int len= -(stride<<h); 72 int len= -(stride<<h);
73 asm volatile( 73 asm volatile(
74 ".balign 16 \n\t" 74 ".balign 16 \n\t"
75 "1: \n\t" 75 "1: \n\t"
87 : "+a" (len) 87 : "+a" (len)
88 : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) 88 : "r" (blk1 - len), "r" (blk2 - len), "r" (stride)
89 ); 89 );
90 } 90 }
91 91
92 static inline void sad8_2_mmx2(UINT8 *blk1a, UINT8 *blk1b, UINT8 *blk2, int stride, int h) 92 static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
93 { 93 {
94 int len= -(stride<<h); 94 int len= -(stride<<h);
95 asm volatile( 95 asm volatile(
96 ".balign 16 \n\t" 96 ".balign 16 \n\t"
97 "1: \n\t" 97 "1: \n\t"
113 : "+a" (len) 113 : "+a" (len)
114 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) 114 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride)
115 ); 115 );
116 } 116 }
117 117
118 static inline void sad8_4_mmx2(UINT8 *blk1, UINT8 *blk2, int stride, int h) 118 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
119 { //FIXME reuse src 119 { //FIXME reuse src
120 int len= -(stride<<h); 120 int len= -(stride<<h);
121 asm volatile( 121 asm volatile(
122 ".balign 16 \n\t" 122 ".balign 16 \n\t"
123 "movq "MANGLE(bone)", %%mm5 \n\t" 123 "movq "MANGLE(bone)", %%mm5 \n\t"
150 : "+a" (len) 150 : "+a" (len)
151 : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" (stride) 151 : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" (stride)
152 ); 152 );
153 } 153 }
154 154
155 static inline void sad8_2_mmx(UINT8 *blk1a, UINT8 *blk1b, UINT8 *blk2, int stride, int h) 155 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
156 { 156 {
157 int len= -(stride<<h); 157 int len= -(stride<<h);
158 asm volatile( 158 asm volatile(
159 ".balign 16 \n\t" 159 ".balign 16 \n\t"
160 "1: \n\t" 160 "1: \n\t"
188 : "+a" (len) 188 : "+a" (len)
189 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) 189 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride)
190 ); 190 );
191 } 191 }
192 192
193 static inline void sad8_4_mmx(UINT8 *blk1, UINT8 *blk2, int stride, int h) 193 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
194 { 194 {
195 int len= -(stride<<h); 195 int len= -(stride<<h);
196 asm volatile( 196 asm volatile(
197 ".balign 16 \n\t" 197 ".balign 16 \n\t"
198 "1: \n\t" 198 "1: \n\t"
266 return ret; 266 return ret;
267 } 267 }
268 268
269 269
270 #define PIX_SAD(suf)\ 270 #define PIX_SAD(suf)\
271 static int pix_abs8x8_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ 271 static int pix_abs8x8_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
272 {\ 272 {\
273 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 273 asm volatile("pxor %%mm7, %%mm7 \n\t"\
274 "pxor %%mm6, %%mm6 \n\t":);\ 274 "pxor %%mm6, %%mm6 \n\t":);\
275 \ 275 \
276 sad8_ ## suf(blk1, blk2, stride, 3);\ 276 sad8_ ## suf(blk1, blk2, stride, 3);\
277 \ 277 \
278 return sum_ ## suf();\ 278 return sum_ ## suf();\
279 }\ 279 }\
280 static int sad8x8_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\ 280 static int sad8x8_ ## suf(void *s, uint8_t *blk2, uint8_t *blk1, int stride)\
281 {\ 281 {\
282 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 282 asm volatile("pxor %%mm7, %%mm7 \n\t"\
283 "pxor %%mm6, %%mm6 \n\t":);\ 283 "pxor %%mm6, %%mm6 \n\t":);\
284 \ 284 \
285 sad8_ ## suf(blk1, blk2, stride, 3);\ 285 sad8_ ## suf(blk1, blk2, stride, 3);\
286 \ 286 \
287 return sum_ ## suf();\ 287 return sum_ ## suf();\
288 }\ 288 }\
289 \ 289 \
290 static int pix_abs8x8_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ 290 static int pix_abs8x8_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
291 {\ 291 {\
292 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 292 asm volatile("pxor %%mm7, %%mm7 \n\t"\
293 "pxor %%mm6, %%mm6 \n\t"\ 293 "pxor %%mm6, %%mm6 \n\t"\
294 "movq %0, %%mm5 \n\t"\ 294 "movq %0, %%mm5 \n\t"\
295 :: "m"(round_tab[1]) \ 295 :: "m"(round_tab[1]) \
298 sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 3);\ 298 sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 3);\
299 \ 299 \
300 return sum_ ## suf();\ 300 return sum_ ## suf();\
301 }\ 301 }\
302 \ 302 \
303 static int pix_abs8x8_y2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ 303 static int pix_abs8x8_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
304 {\ 304 {\
305 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 305 asm volatile("pxor %%mm7, %%mm7 \n\t"\
306 "pxor %%mm6, %%mm6 \n\t"\ 306 "pxor %%mm6, %%mm6 \n\t"\
307 "movq %0, %%mm5 \n\t"\ 307 "movq %0, %%mm5 \n\t"\
308 :: "m"(round_tab[1]) \ 308 :: "m"(round_tab[1]) \
311 sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 3);\ 311 sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 3);\
312 \ 312 \
313 return sum_ ## suf();\ 313 return sum_ ## suf();\
314 }\ 314 }\
315 \ 315 \
316 static int pix_abs8x8_xy2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ 316 static int pix_abs8x8_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
317 {\ 317 {\
318 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 318 asm volatile("pxor %%mm7, %%mm7 \n\t"\
319 "pxor %%mm6, %%mm6 \n\t"\ 319 "pxor %%mm6, %%mm6 \n\t"\
320 "movq %0, %%mm5 \n\t"\ 320 "movq %0, %%mm5 \n\t"\
321 :: "m"(round_tab[2]) \ 321 :: "m"(round_tab[2]) \
324 sad8_4_ ## suf(blk1, blk2, stride, 3);\ 324 sad8_4_ ## suf(blk1, blk2, stride, 3);\
325 \ 325 \
326 return sum_ ## suf();\ 326 return sum_ ## suf();\
327 }\ 327 }\
328 \ 328 \
329 static int pix_abs16x16_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ 329 static int pix_abs16x16_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
330 {\ 330 {\
331 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 331 asm volatile("pxor %%mm7, %%mm7 \n\t"\
332 "pxor %%mm6, %%mm6 \n\t":);\ 332 "pxor %%mm6, %%mm6 \n\t":);\
333 \ 333 \
334 sad8_ ## suf(blk1 , blk2 , stride, 4);\ 334 sad8_ ## suf(blk1 , blk2 , stride, 4);\
335 sad8_ ## suf(blk1+8, blk2+8, stride, 4);\ 335 sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
336 \ 336 \
337 return sum_ ## suf();\ 337 return sum_ ## suf();\
338 }\ 338 }\
339 static int sad16x16_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\ 339 static int sad16x16_ ## suf(void *s, uint8_t *blk2, uint8_t *blk1, int stride)\
340 {\ 340 {\
341 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 341 asm volatile("pxor %%mm7, %%mm7 \n\t"\
342 "pxor %%mm6, %%mm6 \n\t":);\ 342 "pxor %%mm6, %%mm6 \n\t":);\
343 \ 343 \
344 sad8_ ## suf(blk1 , blk2 , stride, 4);\ 344 sad8_ ## suf(blk1 , blk2 , stride, 4);\
345 sad8_ ## suf(blk1+8, blk2+8, stride, 4);\ 345 sad8_ ## suf(blk1+8, blk2+8, stride, 4);\
346 \ 346 \
347 return sum_ ## suf();\ 347 return sum_ ## suf();\
348 }\ 348 }\
349 static int pix_abs16x16_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ 349 static int pix_abs16x16_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
350 {\ 350 {\
351 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 351 asm volatile("pxor %%mm7, %%mm7 \n\t"\
352 "pxor %%mm6, %%mm6 \n\t"\ 352 "pxor %%mm6, %%mm6 \n\t"\
353 "movq %0, %%mm5 \n\t"\ 353 "movq %0, %%mm5 \n\t"\
354 :: "m"(round_tab[1]) \ 354 :: "m"(round_tab[1]) \
357 sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, 4);\ 357 sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, 4);\
358 sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, 4);\ 358 sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, 4);\
359 \ 359 \
360 return sum_ ## suf();\ 360 return sum_ ## suf();\
361 }\ 361 }\
362 static int pix_abs16x16_y2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ 362 static int pix_abs16x16_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
363 {\ 363 {\
364 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 364 asm volatile("pxor %%mm7, %%mm7 \n\t"\
365 "pxor %%mm6, %%mm6 \n\t"\ 365 "pxor %%mm6, %%mm6 \n\t"\
366 "movq %0, %%mm5 \n\t"\ 366 "movq %0, %%mm5 \n\t"\
367 :: "m"(round_tab[1]) \ 367 :: "m"(round_tab[1]) \
370 sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, 4);\ 370 sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, 4);\
371 sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, 4);\ 371 sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, 4);\
372 \ 372 \
373 return sum_ ## suf();\ 373 return sum_ ## suf();\
374 }\ 374 }\
375 static int pix_abs16x16_xy2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ 375 static int pix_abs16x16_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\
376 {\ 376 {\
377 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 377 asm volatile("pxor %%mm7, %%mm7 \n\t"\
378 "pxor %%mm6, %%mm6 \n\t"\ 378 "pxor %%mm6, %%mm6 \n\t"\
379 "movq %0, %%mm5 \n\t"\ 379 "movq %0, %%mm5 \n\t"\
380 :: "m"(round_tab[2]) \ 380 :: "m"(round_tab[2]) \