Mercurial > libavcodec.hg
comparison i386/motion_est_mmx.c @ 8031:eebc7209c47f libavcodec
Convert asm keyword into __asm__.
Neither the asm() nor the __asm__() keyword is part of the C99
standard, but while GCC accepts the former in C89 syntax, it is not
accepted in C99 unless GNU extensions are turned on (with -fasm). The
latter form is accepted in any syntax as an extension (without
requiring further command-line options).
Sun Studio C99 compiler also does not accept asm() while accepting
__asm__(), albeit reporting warnings that it's not valid C99 syntax.
author | flameeyes |
---|---|
date | Thu, 16 Oct 2008 13:34:09 +0000 |
parents | f7cbb7733146 |
children | 0d108ec85620 |
comparison
equal
deleted
inserted
replaced
8030:a512ac8fa540 | 8031:eebc7209c47f |
---|---|
34 DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL; | 34 DECLARE_ASM_CONST(8, uint64_t, bone)= 0x0101010101010101LL; |
35 | 35 |
36 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | 36 static inline void sad8_1_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
37 { | 37 { |
38 x86_reg len= -(stride*h); | 38 x86_reg len= -(stride*h); |
39 asm volatile( | 39 __asm__ volatile( |
40 ASMALIGN(4) | 40 ASMALIGN(4) |
41 "1: \n\t" | 41 "1: \n\t" |
42 "movq (%1, %%"REG_a"), %%mm0 \n\t" | 42 "movq (%1, %%"REG_a"), %%mm0 \n\t" |
43 "movq (%2, %%"REG_a"), %%mm2 \n\t" | 43 "movq (%2, %%"REG_a"), %%mm2 \n\t" |
44 "movq (%2, %%"REG_a"), %%mm4 \n\t" | 44 "movq (%2, %%"REG_a"), %%mm4 \n\t" |
69 ); | 69 ); |
70 } | 70 } |
71 | 71 |
72 static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | 72 static inline void sad8_1_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
73 { | 73 { |
74 asm volatile( | 74 __asm__ volatile( |
75 ASMALIGN(4) | 75 ASMALIGN(4) |
76 "1: \n\t" | 76 "1: \n\t" |
77 "movq (%1), %%mm0 \n\t" | 77 "movq (%1), %%mm0 \n\t" |
78 "movq (%1, %3), %%mm1 \n\t" | 78 "movq (%1, %3), %%mm1 \n\t" |
79 "psadbw (%2), %%mm0 \n\t" | 79 "psadbw (%2), %%mm0 \n\t" |
90 } | 90 } |
91 | 91 |
92 static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) | 92 static int sad16_sse2(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h) |
93 { | 93 { |
94 int ret; | 94 int ret; |
95 asm volatile( | 95 __asm__ volatile( |
96 "pxor %%xmm6, %%xmm6 \n\t" | 96 "pxor %%xmm6, %%xmm6 \n\t" |
97 ASMALIGN(4) | 97 ASMALIGN(4) |
98 "1: \n\t" | 98 "1: \n\t" |
99 "movdqu (%1), %%xmm0 \n\t" | 99 "movdqu (%1), %%xmm0 \n\t" |
100 "movdqu (%1, %3), %%xmm1 \n\t" | 100 "movdqu (%1, %3), %%xmm1 \n\t" |
107 "sub $2, %0 \n\t" | 107 "sub $2, %0 \n\t" |
108 " jg 1b \n\t" | 108 " jg 1b \n\t" |
109 : "+r" (h), "+r" (blk1), "+r" (blk2) | 109 : "+r" (h), "+r" (blk1), "+r" (blk2) |
110 : "r" ((x86_reg)stride) | 110 : "r" ((x86_reg)stride) |
111 ); | 111 ); |
112 asm volatile( | 112 __asm__ volatile( |
113 "movhlps %%xmm6, %%xmm0 \n\t" | 113 "movhlps %%xmm6, %%xmm0 \n\t" |
114 "paddw %%xmm0, %%xmm6 \n\t" | 114 "paddw %%xmm0, %%xmm6 \n\t" |
115 "movd %%xmm6, %0 \n\t" | 115 "movd %%xmm6, %0 \n\t" |
116 : "=r"(ret) | 116 : "=r"(ret) |
117 ); | 117 ); |
118 return ret; | 118 return ret; |
119 } | 119 } |
120 | 120 |
121 static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | 121 static inline void sad8_x2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
122 { | 122 { |
123 asm volatile( | 123 __asm__ volatile( |
124 ASMALIGN(4) | 124 ASMALIGN(4) |
125 "1: \n\t" | 125 "1: \n\t" |
126 "movq (%1), %%mm0 \n\t" | 126 "movq (%1), %%mm0 \n\t" |
127 "movq (%1, %3), %%mm1 \n\t" | 127 "movq (%1, %3), %%mm1 \n\t" |
128 "pavgb 1(%1), %%mm0 \n\t" | 128 "pavgb 1(%1), %%mm0 \n\t" |
140 ); | 140 ); |
141 } | 141 } |
142 | 142 |
143 static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | 143 static inline void sad8_y2a_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
144 { | 144 { |
145 asm volatile( | 145 __asm__ volatile( |
146 "movq (%1), %%mm0 \n\t" | 146 "movq (%1), %%mm0 \n\t" |
147 "add %3, %1 \n\t" | 147 "add %3, %1 \n\t" |
148 ASMALIGN(4) | 148 ASMALIGN(4) |
149 "1: \n\t" | 149 "1: \n\t" |
150 "movq (%1), %%mm1 \n\t" | 150 "movq (%1), %%mm1 \n\t" |
165 ); | 165 ); |
166 } | 166 } |
167 | 167 |
168 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) | 168 static inline void sad8_4_mmx2(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
169 { | 169 { |
170 asm volatile( | 170 __asm__ volatile( |
171 "movq "MANGLE(bone)", %%mm5 \n\t" | 171 "movq "MANGLE(bone)", %%mm5 \n\t" |
172 "movq (%1), %%mm0 \n\t" | 172 "movq (%1), %%mm0 \n\t" |
173 "pavgb 1(%1), %%mm0 \n\t" | 173 "pavgb 1(%1), %%mm0 \n\t" |
174 "add %3, %1 \n\t" | 174 "add %3, %1 \n\t" |
175 ASMALIGN(4) | 175 ASMALIGN(4) |
196 } | 196 } |
197 | 197 |
198 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) | 198 static inline void sad8_2_mmx(uint8_t *blk1a, uint8_t *blk1b, uint8_t *blk2, int stride, int h) |
199 { | 199 { |
200 x86_reg len= -(stride*h); | 200 x86_reg len= -(stride*h); |
201 asm volatile( | 201 __asm__ volatile( |
202 ASMALIGN(4) | 202 ASMALIGN(4) |
203 "1: \n\t" | 203 "1: \n\t" |
204 "movq (%1, %%"REG_a"), %%mm0 \n\t" | 204 "movq (%1, %%"REG_a"), %%mm0 \n\t" |
205 "movq (%2, %%"REG_a"), %%mm1 \n\t" | 205 "movq (%2, %%"REG_a"), %%mm1 \n\t" |
206 "movq (%1, %%"REG_a"), %%mm2 \n\t" | 206 "movq (%1, %%"REG_a"), %%mm2 \n\t" |
234 } | 234 } |
235 | 235 |
236 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) | 236 static inline void sad8_4_mmx(uint8_t *blk1, uint8_t *blk2, int stride, int h) |
237 { | 237 { |
238 x86_reg len= -(stride*h); | 238 x86_reg len= -(stride*h); |
239 asm volatile( | 239 __asm__ volatile( |
240 "movq (%1, %%"REG_a"), %%mm0 \n\t" | 240 "movq (%1, %%"REG_a"), %%mm0 \n\t" |
241 "movq 1(%1, %%"REG_a"), %%mm2 \n\t" | 241 "movq 1(%1, %%"REG_a"), %%mm2 \n\t" |
242 "movq %%mm0, %%mm1 \n\t" | 242 "movq %%mm0, %%mm1 \n\t" |
243 "movq %%mm2, %%mm3 \n\t" | 243 "movq %%mm2, %%mm3 \n\t" |
244 "punpcklbw %%mm7, %%mm0 \n\t" | 244 "punpcklbw %%mm7, %%mm0 \n\t" |
287 } | 287 } |
288 | 288 |
289 static inline int sum_mmx(void) | 289 static inline int sum_mmx(void) |
290 { | 290 { |
291 int ret; | 291 int ret; |
292 asm volatile( | 292 __asm__ volatile( |
293 "movq %%mm6, %%mm0 \n\t" | 293 "movq %%mm6, %%mm0 \n\t" |
294 "psrlq $32, %%mm6 \n\t" | 294 "psrlq $32, %%mm6 \n\t" |
295 "paddw %%mm0, %%mm6 \n\t" | 295 "paddw %%mm0, %%mm6 \n\t" |
296 "movq %%mm6, %%mm0 \n\t" | 296 "movq %%mm6, %%mm0 \n\t" |
297 "psrlq $16, %%mm6 \n\t" | 297 "psrlq $16, %%mm6 \n\t" |
303 } | 303 } |
304 | 304 |
305 static inline int sum_mmx2(void) | 305 static inline int sum_mmx2(void) |
306 { | 306 { |
307 int ret; | 307 int ret; |
308 asm volatile( | 308 __asm__ volatile( |
309 "movd %%mm6, %0 \n\t" | 309 "movd %%mm6, %0 \n\t" |
310 : "=r" (ret) | 310 : "=r" (ret) |
311 ); | 311 ); |
312 return ret; | 312 return ret; |
313 } | 313 } |
324 | 324 |
325 #define PIX_SAD(suf)\ | 325 #define PIX_SAD(suf)\ |
326 static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | 326 static int sad8_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
327 {\ | 327 {\ |
328 assert(h==8);\ | 328 assert(h==8);\ |
329 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 329 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
330 "pxor %%mm6, %%mm6 \n\t":);\ | 330 "pxor %%mm6, %%mm6 \n\t":);\ |
331 \ | 331 \ |
332 sad8_1_ ## suf(blk1, blk2, stride, 8);\ | 332 sad8_1_ ## suf(blk1, blk2, stride, 8);\ |
333 \ | 333 \ |
334 return sum_ ## suf();\ | 334 return sum_ ## suf();\ |
335 }\ | 335 }\ |
336 static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | 336 static int sad8_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
337 {\ | 337 {\ |
338 assert(h==8);\ | 338 assert(h==8);\ |
339 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 339 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
340 "pxor %%mm6, %%mm6 \n\t"\ | 340 "pxor %%mm6, %%mm6 \n\t"\ |
341 "movq %0, %%mm5 \n\t"\ | 341 "movq %0, %%mm5 \n\t"\ |
342 :: "m"(round_tab[1]) \ | 342 :: "m"(round_tab[1]) \ |
343 );\ | 343 );\ |
344 \ | 344 \ |
348 }\ | 348 }\ |
349 \ | 349 \ |
350 static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | 350 static int sad8_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
351 {\ | 351 {\ |
352 assert(h==8);\ | 352 assert(h==8);\ |
353 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 353 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
354 "pxor %%mm6, %%mm6 \n\t"\ | 354 "pxor %%mm6, %%mm6 \n\t"\ |
355 "movq %0, %%mm5 \n\t"\ | 355 "movq %0, %%mm5 \n\t"\ |
356 :: "m"(round_tab[1]) \ | 356 :: "m"(round_tab[1]) \ |
357 );\ | 357 );\ |
358 \ | 358 \ |
362 }\ | 362 }\ |
363 \ | 363 \ |
364 static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | 364 static int sad8_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
365 {\ | 365 {\ |
366 assert(h==8);\ | 366 assert(h==8);\ |
367 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 367 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
368 "pxor %%mm6, %%mm6 \n\t"\ | 368 "pxor %%mm6, %%mm6 \n\t"\ |
369 ::);\ | 369 ::);\ |
370 \ | 370 \ |
371 sad8_4_ ## suf(blk1, blk2, stride, 8);\ | 371 sad8_4_ ## suf(blk1, blk2, stride, 8);\ |
372 \ | 372 \ |
373 return sum_ ## suf();\ | 373 return sum_ ## suf();\ |
374 }\ | 374 }\ |
375 \ | 375 \ |
376 static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | 376 static int sad16_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
377 {\ | 377 {\ |
378 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 378 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
379 "pxor %%mm6, %%mm6 \n\t":);\ | 379 "pxor %%mm6, %%mm6 \n\t":);\ |
380 \ | 380 \ |
381 sad8_1_ ## suf(blk1 , blk2 , stride, h);\ | 381 sad8_1_ ## suf(blk1 , blk2 , stride, h);\ |
382 sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\ | 382 sad8_1_ ## suf(blk1+8, blk2+8, stride, h);\ |
383 \ | 383 \ |
384 return sum_ ## suf();\ | 384 return sum_ ## suf();\ |
385 }\ | 385 }\ |
386 static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | 386 static int sad16_x2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
387 {\ | 387 {\ |
388 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 388 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
389 "pxor %%mm6, %%mm6 \n\t"\ | 389 "pxor %%mm6, %%mm6 \n\t"\ |
390 "movq %0, %%mm5 \n\t"\ | 390 "movq %0, %%mm5 \n\t"\ |
391 :: "m"(round_tab[1]) \ | 391 :: "m"(round_tab[1]) \ |
392 );\ | 392 );\ |
393 \ | 393 \ |
396 \ | 396 \ |
397 return sum_ ## suf();\ | 397 return sum_ ## suf();\ |
398 }\ | 398 }\ |
399 static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | 399 static int sad16_y2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
400 {\ | 400 {\ |
401 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 401 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
402 "pxor %%mm6, %%mm6 \n\t"\ | 402 "pxor %%mm6, %%mm6 \n\t"\ |
403 "movq %0, %%mm5 \n\t"\ | 403 "movq %0, %%mm5 \n\t"\ |
404 :: "m"(round_tab[1]) \ | 404 :: "m"(round_tab[1]) \ |
405 );\ | 405 );\ |
406 \ | 406 \ |
409 \ | 409 \ |
410 return sum_ ## suf();\ | 410 return sum_ ## suf();\ |
411 }\ | 411 }\ |
412 static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ | 412 static int sad16_xy2_ ## suf(void *v, uint8_t *blk2, uint8_t *blk1, int stride, int h)\ |
413 {\ | 413 {\ |
414 asm volatile("pxor %%mm7, %%mm7 \n\t"\ | 414 __asm__ volatile("pxor %%mm7, %%mm7 \n\t"\ |
415 "pxor %%mm6, %%mm6 \n\t"\ | 415 "pxor %%mm6, %%mm6 \n\t"\ |
416 ::);\ | 416 ::);\ |
417 \ | 417 \ |
418 sad8_4_ ## suf(blk1 , blk2 , stride, h);\ | 418 sad8_4_ ## suf(blk1 , blk2 , stride, h);\ |
419 sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\ | 419 sad8_4_ ## suf(blk1+8, blk2+8, stride, h);\ |