Mercurial > libavcodec.hg
comparison i386/motion_est_mmx.c @ 1064:b32afefe7d33 libavcodec
* UINTX -> uintx_t INTX -> intx_t
author | kabi |
---|---|
date | Tue, 11 Feb 2003 16:35:48 +0000 |
parents | bb5de8a59da8 |
children | 92fb44eae6b6 |
comparison
equal
deleted
inserted
replaced
1063:fdeac9642346 | 1064:b32afefe7d33 |
---|---|
21 #include "../dsputil.h" | 21 #include "../dsputil.h" |
22 | 22 |
23 void dsputil_init_pix_mmx(DSPContext* c, unsigned mask); | 23 void dsputil_init_pix_mmx(DSPContext* c, unsigned mask); |
24 void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask); | 24 void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask); |
25 | 25 |
26 static const __attribute__ ((aligned(8))) UINT64 round_tab[3]={ | 26 static const __attribute__ ((aligned(8))) uint64_t round_tab[3]={ |
27 0x0000000000000000, | 27 0x0000000000000000, |
28 0x0001000100010001, | 28 0x0001000100010001, |
29 0x0002000200020002, | 29 0x0002000200020002, |
30 }; | 30 }; |
31 | 31 |
32 static __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL; | 32 static __attribute__ ((aligned(8))) uint64_t bone= 0x0101010101010101LL; |
33 | 33 |
34 static inline void sad8_mmx(UINT8 *blk1, UINT8 *blk2, int stride, int h) | 34 static inline void sad8_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
35 { | 35 { |
36 int len= -(stride<<h); | 36 int len= -(stride<<h); |
37 asm volatile( | 37 asm volatile( |
38 ".balign 16 \n\t" | 38 ".balign 16 \n\t" |
39 "1: \n\t" | 39 "1: \n\t" |
65 : "+a" (len) | 65 : "+a" (len) |
66 : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) | 66 : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) |
67 ); | 67 ); |
68 } | 68 } |
69 | 69 |
70 static inline void sad8_mmx2(UINT8 *blk1, UINT8 *blk2, int stride, int h) | 70 static inline void sad8_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
71 { | 71 { |
72 int len= -(stride<<h); | 72 int len= -(stride<<h); |
73 asm volatile( | 73 asm volatile( |
74 ".balign 16 \n\t" | 74 ".balign 16 \n\t" |
75 "1: \n\t" | 75 "1: \n\t" |
87 : "+a" (len) | 87 : "+a" (len) |
88 : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) | 88 : "r" (blk1 - len), "r" (blk2 - len), "r" (stride) |
89 ); | 89 ); |
90 } | 90 } |
91 | 91 |
92 static inline void sad8_2_mmx2(UINT8 *blk1a, UINT8 *blk1b, UINT8 *blk2, int stride, int h) | 92 static inline void sad8_2_mmx2(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) |
93 { | 93 { |
94 int len= -(stride<<h); | 94 int len= -(stride<<h); |
95 asm volatile( | 95 asm volatile( |
96 ".balign 16 \n\t" | 96 ".balign 16 \n\t" |
97 "1: \n\t" | 97 "1: \n\t" |
113 : "+a" (len) | 113 : "+a" (len) |
114 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) | 114 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) |
115 ); | 115 ); |
116 } | 116 } |
117 | 117 |
118 static inline void sad8_4_mmx2(UINT8 *blk1, UINT8 *blk2, int stride, int h) | 118 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
119 { //FIXME reuse src | 119 { //FIXME reuse src |
120 int len= -(stride<<h); | 120 int len= -(stride<<h); |
121 asm volatile( | 121 asm volatile( |
122 ".balign 16 \n\t" | 122 ".balign 16 \n\t" |
123 "movq "MANGLE(bone)", %%mm5 \n\t" | 123 "movq "MANGLE(bone)", %%mm5 \n\t" |
150 : "+a" (len) | 150 : "+a" (len) |
151 : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" (stride) | 151 : "r" (blk1 - len), "r" (blk1 - len + stride), "r" (blk2 - len), "r" (stride) |
152 ); | 152 ); |
153 } | 153 } |
154 | 154 |
155 static inline void sad8_2_mmx(UINT8 *blk1a, UINT8 *blk1b, UINT8 *blk2, int stride, int h) | 155 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) |
156 { | 156 { |
157 int len= -(stride<<h); | 157 int len= -(stride<<h); |
158 asm volatile( | 158 asm volatile( |
159 ".balign 16 \n\t" | 159 ".balign 16 \n\t" |
160 "1: \n\t" | 160 "1: \n\t" |
188 : "+a" (len) | 188 : "+a" (len) |
189 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) | 189 : "r" (blk1a - len), "r" (blk1b -len), "r" (blk2 - len), "r" (stride) |
190 ); | 190 ); |
191 } | 191 } |
192 | 192 |
193 static inline void sad8_4_mmx(UINT8 *blk1, UINT8 *blk2, int stride, int h) | 193 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
194 { | 194 { |
195 int len= -(stride<<h); | 195 int len= -(stride<<h); |
196 asm volatile( | 196 asm volatile( |
197 ".balign 16 \n\t" | 197 ".balign 16 \n\t" |
198 "1: \n\t" | 198 "1: \n\t" |
266 return ret; | 266 return ret; |
267 } | 267 } |
268 | 268 |
269 | 269 |
270 #define PIX_SAD(suf)\ | 270 #define PIX_SAD(suf)\ |
271 static int pix_abs8x8_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ | 271 static int pix_abs8x8_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\ |
272 {\ | 272 {\ |
273 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 273 asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
274 "pxor %%mm6, %%mm6 \n\t":);\ | 274 "pxor %%mm6, %%mm6 \n\t":);\ |
275 \ | 275 \ |
276 sad8_ ## suf(blk1, blk2, stride, 3);\ | 276 sad8_ ## suf(blk1, blk2, stride, 3);\ |
277 \ | 277 \ |
278 return sum_ ## suf();\ | 278 return sum_ ## suf();\ |
279 }\ | 279 }\ |
280 static int sad8x8_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\ | 280 static int sad8x8_ ## suf(void *s, uint8_t *blk2, uint8_t *blk1, int stride)\ |
281 {\ | 281 {\ |
282 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 282 asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
283 "pxor %%mm6, %%mm6 \n\t":);\ | 283 "pxor %%mm6, %%mm6 \n\t":);\ |
284 \ | 284 \ |
285 sad8_ ## suf(blk1, blk2, stride, 3);\ | 285 sad8_ ## suf(blk1, blk2, stride, 3);\ |
286 \ | 286 \ |
287 return sum_ ## suf();\ | 287 return sum_ ## suf();\ |
288 }\ | 288 }\ |
289 \ | 289 \ |
290 static int pix_abs8x8_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ | 290 static int pix_abs8x8_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\ |
291 {\ | 291 {\ |
292 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 292 asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
293 "pxor %%mm6, %%mm6 \n\t"\ | 293 "pxor %%mm6, %%mm6 \n\t"\ |
294 "movq %0, %%mm5 \n\t"\ | 294 "movq %0, %%mm5 \n\t"\ |
295 :: "m"(round_tab[1]) \ | 295 :: "m"(round_tab[1]) \ |
298 sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 3);\ | 298 sad8_2_ ## suf(blk1, blk1+1, blk2, stride, 3);\ |
299 \ | 299 \ |
300 return sum_ ## suf();\ | 300 return sum_ ## suf();\ |
301 }\ | 301 }\ |
302 \ | 302 \ |
303 static int pix_abs8x8_y2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ | 303 static int pix_abs8x8_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\ |
304 {\ | 304 {\ |
305 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 305 asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
306 "pxor %%mm6, %%mm6 \n\t"\ | 306 "pxor %%mm6, %%mm6 \n\t"\ |
307 "movq %0, %%mm5 \n\t"\ | 307 "movq %0, %%mm5 \n\t"\ |
308 :: "m"(round_tab[1]) \ | 308 :: "m"(round_tab[1]) \ |
311 sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 3);\ | 311 sad8_2_ ## suf(blk1, blk1+stride, blk2, stride, 3);\ |
312 \ | 312 \ |
313 return sum_ ## suf();\ | 313 return sum_ ## suf();\ |
314 }\ | 314 }\ |
315 \ | 315 \ |
316 static int pix_abs8x8_xy2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ | 316 static int pix_abs8x8_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\ |
317 {\ | 317 {\ |
318 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 318 asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
319 "pxor %%mm6, %%mm6 \n\t"\ | 319 "pxor %%mm6, %%mm6 \n\t"\ |
320 "movq %0, %%mm5 \n\t"\ | 320 "movq %0, %%mm5 \n\t"\ |
321 :: "m"(round_tab[2]) \ | 321 :: "m"(round_tab[2]) \ |
324 sad8_4_ ## suf(blk1, blk2, stride, 3);\ | 324 sad8_4_ ## suf(blk1, blk2, stride, 3);\ |
325 \ | 325 \ |
326 return sum_ ## suf();\ | 326 return sum_ ## suf();\ |
327 }\ | 327 }\ |
328 \ | 328 \ |
329 static int pix_abs16x16_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ | 329 static int pix_abs16x16_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\ |
330 {\ | 330 {\ |
331 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 331 asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
332 "pxor %%mm6, %%mm6 \n\t":);\ | 332 "pxor %%mm6, %%mm6 \n\t":);\ |
333 \ | 333 \ |
334 sad8_ ## suf(blk1 , blk2 , stride, 4);\ | 334 sad8_ ## suf(blk1 , blk2 , stride, 4);\ |
335 sad8_ ## suf(blk1+8, blk2+8, stride, 4);\ | 335 sad8_ ## suf(blk1+8, blk2+8, stride, 4);\ |
336 \ | 336 \ |
337 return sum_ ## suf();\ | 337 return sum_ ## suf();\ |
338 }\ | 338 }\ |
339 static int sad16x16_ ## suf(void *s, UINT8 *blk2, UINT8 *blk1, int stride)\ | 339 static int sad16x16_ ## suf(void *s, uint8_t *blk2, uint8_t *blk1, int stride)\ |
340 {\ | 340 {\ |
341 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 341 asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
342 "pxor %%mm6, %%mm6 \n\t":);\ | 342 "pxor %%mm6, %%mm6 \n\t":);\ |
343 \ | 343 \ |
344 sad8_ ## suf(blk1 , blk2 , stride, 4);\ | 344 sad8_ ## suf(blk1 , blk2 , stride, 4);\ |
345 sad8_ ## suf(blk1+8, blk2+8, stride, 4);\ | 345 sad8_ ## suf(blk1+8, blk2+8, stride, 4);\ |
346 \ | 346 \ |
347 return sum_ ## suf();\ | 347 return sum_ ## suf();\ |
348 }\ | 348 }\ |
349 static int pix_abs16x16_x2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ | 349 static int pix_abs16x16_x2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\ |
350 {\ | 350 {\ |
351 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 351 asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
352 "pxor %%mm6, %%mm6 \n\t"\ | 352 "pxor %%mm6, %%mm6 \n\t"\ |
353 "movq %0, %%mm5 \n\t"\ | 353 "movq %0, %%mm5 \n\t"\ |
354 :: "m"(round_tab[1]) \ | 354 :: "m"(round_tab[1]) \ |
357 sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, 4);\ | 357 sad8_2_ ## suf(blk1 , blk1+1, blk2 , stride, 4);\ |
358 sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, 4);\ | 358 sad8_2_ ## suf(blk1+8, blk1+9, blk2+8, stride, 4);\ |
359 \ | 359 \ |
360 return sum_ ## suf();\ | 360 return sum_ ## suf();\ |
361 }\ | 361 }\ |
362 static int pix_abs16x16_y2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ | 362 static int pix_abs16x16_y2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\ |
363 {\ | 363 {\ |
364 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 364 asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
365 "pxor %%mm6, %%mm6 \n\t"\ | 365 "pxor %%mm6, %%mm6 \n\t"\ |
366 "movq %0, %%mm5 \n\t"\ | 366 "movq %0, %%mm5 \n\t"\ |
367 :: "m"(round_tab[1]) \ | 367 :: "m"(round_tab[1]) \ |
370 sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, 4);\ | 370 sad8_2_ ## suf(blk1 , blk1+stride, blk2 , stride, 4);\ |
371 sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, 4);\ | 371 sad8_2_ ## suf(blk1+8, blk1+stride+8,blk2+8, stride, 4);\ |
372 \ | 372 \ |
373 return sum_ ## suf();\ | 373 return sum_ ## suf();\ |
374 }\ | 374 }\ |
375 static int pix_abs16x16_xy2_ ## suf(UINT8 *blk2, UINT8 *blk1, int stride)\ | 375 static int pix_abs16x16_xy2_ ## suf(uint8_t *blk2, uint8_t *blk1, int stride)\ |
376 {\ | 376 {\ |
377 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 377 asm volatile("pxor %%mm7, %%mm7 \n\t"\ |
378 "pxor %%mm6, %%mm6 \n\t"\ | 378 "pxor %%mm6, %%mm6 \n\t"\ |
379 "movq %0, %%mm5 \n\t"\ | 379 "movq %0, %%mm5 \n\t"\ |
380 :: "m"(round_tab[2]) \ | 380 :: "m"(round_tab[2]) \ |