comparison i386/motion_est_mmx.c @ 8031:eebc7209c47f libavcodec

Convert asm keyword into __asm__. Neither the asm() nor the __asm__() keyword is part of the C99 standard, but while GCC accepts the former in C89 syntax, it is not accepted in C99 unless GNU extensions are turned on (with -fasm). The latter form is accepted in any syntax as an extension (without requiring further command-line options). Sun Studio C99 compiler also does not accept asm() while accepting __asm__(), albeit reporting warnings that it's not valid C99 syntax.
author flameeyes
date Thu, 16 Oct 2008 13:34:09 +0000
parents f7cbb7733146
children 0d108ec85620
comparison
equal deleted inserted replaced
8030:a512ac8fa540 8031:eebc7209c47f
34 DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL; 34 DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL;
35 35
36 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) 36 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
37 { 37 {
38 x86_reg len= -(stride*h); 38 x86_reg len= -(stride*h);
39 asm volatile( 39 __asm__ volatile(
40 ASMALIGN(4) 40 ASMALIGN(4)
41 "1: \n\t" 41 "1: \n\t"
42 "movq (%1, %%"REG_a"), %%mm0 \n\t" 42 "movq (%1, %%"REG_a"), %%mm0 \n\t"
43 "movq (%2, %%"REG_a"), %%mm2 \n\t" 43 "movq (%2, %%"REG_a"), %%mm2 \n\t"
44 "movq (%2, %%"REG_a"), %%mm4 \n\t" 44 "movq (%2, %%"REG_a"), %%mm4 \n\t"
69 ); 69 );
70 } 70 }
71 71
72 static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) 72 static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
73 { 73 {
74 asm volatile( 74 __asm__ volatile(
75 ASMALIGN(4) 75 ASMALIGN(4)
76 "1: \n\t" 76 "1: \n\t"
77 "movq (%1), %%mm0 \n\t" 77 "movq (%1), %%mm0 \n\t"
78 "movq (%1, %3), %%mm1 \n\t" 78 "movq (%1, %3), %%mm1 \n\t"
79 "psadbw (%2), %%mm0 \n\t" 79 "psadbw (%2), %%mm0 \n\t"
90 } 90 }
91 91
92 static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) 92 static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)
93 { 93 {
94 int ret; 94 int ret;
95 asm volatile( 95 __asm__ volatile(
96 "pxor %%xmm6, %%xmm6 \n\t" 96 "pxor %%xmm6, %%xmm6 \n\t"
97 ASMALIGN(4) 97 ASMALIGN(4)
98 "1: \n\t" 98 "1: \n\t"
99 "movdqu (%1), %%xmm0 \n\t" 99 "movdqu (%1), %%xmm0 \n\t"
100 "movdqu (%1, %3), %%xmm1 \n\t" 100 "movdqu (%1, %3), %%xmm1 \n\t"
107 "sub $2, %0 \n\t" 107 "sub $2, %0 \n\t"
108 " jg 1b \n\t" 108 " jg 1b \n\t"
109 : "+r" (h), "+r" (blk1), "+r" (blk2) 109 : "+r" (h), "+r" (blk1), "+r" (blk2)
110 : "r" ((x86_reg)stride) 110 : "r" ((x86_reg)stride)
111 ); 111 );
112 asm volatile( 112 __asm__ volatile(
113 "movhlps %%xmm6, %%xmm0 \n\t" 113 "movhlps %%xmm6, %%xmm0 \n\t"
114 "paddw %%xmm0, %%xmm6 \n\t" 114 "paddw %%xmm0, %%xmm6 \n\t"
115 "movd %%xmm6, %0 \n\t" 115 "movd %%xmm6, %0 \n\t"
116 : "=r"(ret) 116 : "=r"(ret)
117 ); 117 );
118 return ret; 118 return ret;
119 } 119 }
120 120
121 static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) 121 static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
122 { 122 {
123 asm volatile( 123 __asm__ volatile(
124 ASMALIGN(4) 124 ASMALIGN(4)
125 "1: \n\t" 125 "1: \n\t"
126 "movq (%1), %%mm0 \n\t" 126 "movq (%1), %%mm0 \n\t"
127 "movq (%1, %3), %%mm1 \n\t" 127 "movq (%1, %3), %%mm1 \n\t"
128 "pavgb 1(%1), %%mm0 \n\t" 128 "pavgb 1(%1), %%mm0 \n\t"
140 ); 140 );
141 } 141 }
142 142
143 static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) 143 static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
144 { 144 {
145 asm volatile( 145 __asm__ volatile(
146 "movq (%1), %%mm0 \n\t" 146 "movq (%1), %%mm0 \n\t"
147 "add %3, %1 \n\t" 147 "add %3, %1 \n\t"
148 ASMALIGN(4) 148 ASMALIGN(4)
149 "1: \n\t" 149 "1: \n\t"
150 "movq (%1), %%mm1 \n\t" 150 "movq (%1), %%mm1 \n\t"
165 ); 165 );
166 } 166 }
167 167
168 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) 168 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h)
169 { 169 {
170 asm volatile( 170 __asm__ volatile(
171 "movq "MANGLE(bone)", %%mm5 \n\t" 171 "movq "MANGLE(bone)", %%mm5 \n\t"
172 "movq (%1), %%mm0 \n\t" 172 "movq (%1), %%mm0 \n\t"
173 "pavgb 1(%1), %%mm0 \n\t" 173 "pavgb 1(%1), %%mm0 \n\t"
174 "add %3, %1 \n\t" 174 "add %3, %1 \n\t"
175 ASMALIGN(4) 175 ASMALIGN(4)
196 } 196 }
197 197
198 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) 198 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h)
199 { 199 {
200 x86_reg len= -(stride*h); 200 x86_reg len= -(stride*h);
201 asm volatile( 201 __asm__ volatile(
202 ASMALIGN(4) 202 ASMALIGN(4)
203 "1: \n\t" 203 "1: \n\t"
204 "movq (%1, %%"REG_a"), %%mm0 \n\t" 204 "movq (%1, %%"REG_a"), %%mm0 \n\t"
205 "movq (%2, %%"REG_a"), %%mm1 \n\t" 205 "movq (%2, %%"REG_a"), %%mm1 \n\t"
206 "movq (%1, %%"REG_a"), %%mm2 \n\t" 206 "movq (%1, %%"REG_a"), %%mm2 \n\t"
234 } 234 }
235 235
236 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) 236 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h)
237 { 237 {
238 x86_reg len= -(stride*h); 238 x86_reg len= -(stride*h);
239 asm volatile( 239 __asm__ volatile(
240 "movq (%1, %%"REG_a"), %%mm0 \n\t" 240 "movq (%1, %%"REG_a"), %%mm0 \n\t"
241 "movq 1(%1, %%"REG_a"), %%mm2 \n\t" 241 "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
242 "movq %%mm0, %%mm1 \n\t" 242 "movq %%mm0, %%mm1 \n\t"
243 "movq %%mm2, %%mm3 \n\t" 243 "movq %%mm2, %%mm3 \n\t"
244 "punpcklbw %%mm7, %%mm0 \n\t" 244 "punpcklbw %%mm7, %%mm0 \n\t"
287 } 287 }
288 288
289 static inline int sum_mmx(void) 289 static inline int sum_mmx(void)
290 { 290 {
291 int ret; 291 int ret;
292 asm volatile( 292 __asm__ volatile(
293 "movq %%mm6, %%mm0 \n\t" 293 "movq %%mm6, %%mm0 \n\t"
294 "psrlq $32, %%mm6 \n\t" 294 "psrlq $32, %%mm6 \n\t"
295 "paddw %%mm0, %%mm6 \n\t" 295 "paddw %%mm0, %%mm6 \n\t"
296 "movq %%mm6, %%mm0 \n\t" 296 "movq %%mm6, %%mm0 \n\t"
297 "psrlq $16, %%mm6 \n\t" 297 "psrlq $16, %%mm6 \n\t"
303 } 303 }
304 304
305 static inline int sum_mmx2(void) 305 static inline int sum_mmx2(void)
306 { 306 {
307 int ret; 307 int ret;
308 asm volatile( 308 __asm__ volatile(
309 "movd %%mm6, %0 \n\t" 309 "movd %%mm6, %0 \n\t"
310 : "=r" (ret) 310 : "=r" (ret)
311 ); 311 );
312 return ret; 312 return ret;
313 } 313 }
324 324
325 #define PIX_SAD(suf)\ 325 #define PIX_SAD(suf)\
326 static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ 326 static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
327 {\ 327 {\
328 assert(h==8);\ 328 assert(h==8);\
329 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 329 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
330 "pxor %%mm6, %%mm6 \n\t":);\ 330 "pxor %%mm6, %%mm6 \n\t":);\
331 \ 331 \
332 sad8_1_ ## suf(blk1, blk2, stride, 8);\ 332 sad8_1_ ## suf(blk1, blk2, stride, 8);\
333 \ 333 \
334 return sum_ ## suf();\ 334 return sum_ ## suf();\
335 }\ 335 }\
336 static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ 336 static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
337 {\ 337 {\
338 assert(h==8);\ 338 assert(h==8);\
339 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 339 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
340 "pxor %%mm6, %%mm6 \n\t"\ 340 "pxor %%mm6, %%mm6 \n\t"\
341 "movq %0, %%mm5 \n\t"\ 341 "movq %0, %%mm5 \n\t"\
342 :: "m"(round_tab[1]) \ 342 :: "m"(round_tab[1]) \
343 );\ 343 );\
344 \ 344 \
348 }\ 348 }\
349 \ 349 \
350 static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ 350 static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
351 {\ 351 {\
352 assert(h==8);\ 352 assert(h==8);\
353 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 353 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
354 "pxor %%mm6, %%mm6 \n\t"\ 354 "pxor %%mm6, %%mm6 \n\t"\
355 "movq %0, %%mm5 \n\t"\ 355 "movq %0, %%mm5 \n\t"\
356 :: "m"(round_tab[1]) \ 356 :: "m"(round_tab[1]) \
357 );\ 357 );\
358 \ 358 \
362 }\ 362 }\
363 \ 363 \
364 static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ 364 static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
365 {\ 365 {\
366 assert(h==8);\ 366 assert(h==8);\
367 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 367 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
368 "pxor %%mm6, %%mm6 \n\t"\ 368 "pxor %%mm6, %%mm6 \n\t"\
369 ::);\ 369 ::);\
370 \ 370 \
371 sad8_4_ ## suf(blk1, blk2, stride, 8);\ 371 sad8_4_ ## suf(blk1, blk2, stride, 8);\
372 \ 372 \
373 return sum_ ## suf();\ 373 return sum_ ## suf();\
374 }\ 374 }\
375 \ 375 \
376 static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ 376 static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
377 {\ 377 {\
378 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 378 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
379 "pxor %%mm6, %%mm6 \n\t":);\ 379 "pxor %%mm6, %%mm6 \n\t":);\
380 \ 380 \
381 sad8_1_ ## suf(blk1 , blk2 , stride, h);\ 381 sad8_1_ ## suf(blk1 , blk2 , stride, h);\
382 sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\ 382 sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\
383 \ 383 \
384 return sum_ ## suf();\ 384 return sum_ ## suf();\
385 }\ 385 }\
386 static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ 386 static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
387 {\ 387 {\
388 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 388 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
389 "pxor %%mm6, %%mm6 \n\t"\ 389 "pxor %%mm6, %%mm6 \n\t"\
390 "movq %0, %%mm5 \n\t"\ 390 "movq %0, %%mm5 \n\t"\
391 :: "m"(round_tab[1]) \ 391 :: "m"(round_tab[1]) \
392 );\ 392 );\
393 \ 393 \
396 \ 396 \
397 return sum_ ## suf();\ 397 return sum_ ## suf();\
398 }\ 398 }\
399 static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ 399 static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
400 {\ 400 {\
401 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 401 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
402 "pxor %%mm6, %%mm6 \n\t"\ 402 "pxor %%mm6, %%mm6 \n\t"\
403 "movq %0, %%mm5 \n\t"\ 403 "movq %0, %%mm5 \n\t"\
404 :: "m"(round_tab[1]) \ 404 :: "m"(round_tab[1]) \
405 );\ 405 );\
406 \ 406 \
409 \ 409 \
410 return sum_ ## suf();\ 410 return sum_ ## suf();\
411 }\ 411 }\
412 static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ 412 static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\
413 {\ 413 {\
414 asm volatile("pxor %%mm7, %%mm7 \n\t"\ 414 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\
415 "pxor %%mm6, %%mm6 \n\t"\ 415 "pxor %%mm6, %%mm6 \n\t"\
416 ::);\ 416 ::);\
417 \ 417 \
418 sad8_4_ ## suf(blk1 , blk2 , stride, h);\ 418 sad8_4_ ## suf(blk1 , blk2 , stride, h);\
419 sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\ 419 sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\