Mercurial > libavcodec.hg
comparison i386/dsputil_mmx_rnd.h @ 1064:b32afefe7d33 libavcodec
* UINTX -> uintx_t INTX -> intx_t
author | kabi |
---|---|
date | Tue, 11 Feb 2003 16:35:48 +0000 |
parents | 42fdf7b24d2e |
children | 07a484280a82 |
comparison
equal
deleted
inserted
replaced
1063:fdeac9642346 | 1064:b32afefe7d33 |
---|---|
20 * mostly rewritten by Michael Niedermayer <michaelni@gmx.at> | 20 * mostly rewritten by Michael Niedermayer <michaelni@gmx.at> |
21 * and improved by Zdenek Kabelac <kabi@users.sf.net> | 21 * and improved by Zdenek Kabelac <kabi@users.sf.net> |
22 */ | 22 */ |
23 | 23 |
24 // put_pixels | 24 // put_pixels |
25 static void DEF(put, pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 25 static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
26 { | 26 { |
27 MOVQ_BFE(mm6); | 27 MOVQ_BFE(mm6); |
28 __asm __volatile( | 28 __asm __volatile( |
29 "lea (%3, %3), %%eax \n\t" | 29 "lea (%3, %3), %%eax \n\t" |
30 ".balign 8 \n\t" | 30 ".balign 8 \n\t" |
102 #endif | 102 #endif |
103 :"S"(src1Stride), "D"(dstStride) | 103 :"S"(src1Stride), "D"(dstStride) |
104 :"memory"); | 104 :"memory"); |
105 } | 105 } |
106 | 106 |
107 static void DEF(put, pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 107 static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
108 { | 108 { |
109 MOVQ_BFE(mm6); | 109 MOVQ_BFE(mm6); |
110 __asm __volatile( | 110 __asm __volatile( |
111 "lea (%3, %3), %%eax \n\t" | 111 "lea (%3, %3), %%eax \n\t" |
112 ".balign 8 \n\t" | 112 ".balign 8 \n\t" |
197 #endif | 197 #endif |
198 :"S"(src1Stride), "D"(dstStride) | 198 :"S"(src1Stride), "D"(dstStride) |
199 :"memory"); | 199 :"memory"); |
200 } | 200 } |
201 | 201 |
202 static void DEF(put, pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 202 static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
203 { | 203 { |
204 MOVQ_BFE(mm6); | 204 MOVQ_BFE(mm6); |
205 __asm __volatile( | 205 __asm __volatile( |
206 "lea (%3, %3), %%eax \n\t" | 206 "lea (%3, %3), %%eax \n\t" |
207 "movq (%1), %%mm0 \n\t" | 207 "movq (%1), %%mm0 \n\t" |
226 :"+g"(h), "+S"(pixels), "+D"(block) | 226 :"+g"(h), "+S"(pixels), "+D"(block) |
227 :"r"(line_size) | 227 :"r"(line_size) |
228 :"eax", "memory"); | 228 :"eax", "memory"); |
229 } | 229 } |
230 | 230 |
231 static void DEF(put, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 231 static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
232 { | 232 { |
233 MOVQ_ZERO(mm7); | 233 MOVQ_ZERO(mm7); |
234 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | 234 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version |
235 __asm __volatile( | 235 __asm __volatile( |
236 "movq (%1), %%mm0 \n\t" | 236 "movq (%1), %%mm0 \n\t" |
294 :"eax", "memory"); | 294 :"eax", "memory"); |
295 } | 295 } |
296 | 296 |
297 // avg_pixels | 297 // avg_pixels |
298 // in case more speed is needed - unroling would certainly help | 298 // in case more speed is needed - unroling would certainly help |
299 static void DEF(avg, pixels8)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 299 static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
300 { | 300 { |
301 MOVQ_BFE(mm6); | 301 MOVQ_BFE(mm6); |
302 JUMPALIGN(); | 302 JUMPALIGN(); |
303 do { | 303 do { |
304 __asm __volatile( | 304 __asm __volatile( |
313 block += line_size; | 313 block += line_size; |
314 } | 314 } |
315 while (--h); | 315 while (--h); |
316 } | 316 } |
317 | 317 |
318 static void DEF(avg, pixels16)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 318 static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
319 { | 319 { |
320 MOVQ_BFE(mm6); | 320 MOVQ_BFE(mm6); |
321 JUMPALIGN(); | 321 JUMPALIGN(); |
322 do { | 322 do { |
323 __asm __volatile( | 323 __asm __volatile( |
336 block += line_size; | 336 block += line_size; |
337 } | 337 } |
338 while (--h); | 338 while (--h); |
339 } | 339 } |
340 | 340 |
341 static void DEF(avg, pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 341 static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
342 { | 342 { |
343 MOVQ_BFE(mm6); | 343 MOVQ_BFE(mm6); |
344 JUMPALIGN(); | 344 JUMPALIGN(); |
345 do { | 345 do { |
346 __asm __volatile( | 346 __asm __volatile( |
377 src1 += src1Stride; | 377 src1 += src1Stride; |
378 src2 += 8; | 378 src2 += 8; |
379 } while (--h); | 379 } while (--h); |
380 } | 380 } |
381 | 381 |
382 static void DEF(avg, pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 382 static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
383 { | 383 { |
384 MOVQ_BFE(mm6); | 384 MOVQ_BFE(mm6); |
385 JUMPALIGN(); | 385 JUMPALIGN(); |
386 do { | 386 do { |
387 __asm __volatile( | 387 __asm __volatile( |
430 src1 += src1Stride; | 430 src1 += src1Stride; |
431 src2 += 16; | 431 src2 += 16; |
432 } while (--h); | 432 } while (--h); |
433 } | 433 } |
434 | 434 |
435 static void DEF(avg, pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 435 static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
436 { | 436 { |
437 MOVQ_BFE(mm6); | 437 MOVQ_BFE(mm6); |
438 __asm __volatile( | 438 __asm __volatile( |
439 "lea (%3, %3), %%eax \n\t" | 439 "lea (%3, %3), %%eax \n\t" |
440 "movq (%1), %%mm0 \n\t" | 440 "movq (%1), %%mm0 \n\t" |
470 :"r"(line_size) | 470 :"r"(line_size) |
471 :"eax", "memory"); | 471 :"eax", "memory"); |
472 } | 472 } |
473 | 473 |
474 // this routine is 'slightly' suboptimal but mostly unused | 474 // this routine is 'slightly' suboptimal but mostly unused |
475 static void DEF(avg, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) | 475 static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) |
476 { | 476 { |
477 MOVQ_ZERO(mm7); | 477 MOVQ_ZERO(mm7); |
478 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version | 478 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version |
479 __asm __volatile( | 479 __asm __volatile( |
480 "movq (%1), %%mm0 \n\t" | 480 "movq (%1), %%mm0 \n\t" |
545 :"D"(block), "r"(line_size) | 545 :"D"(block), "r"(line_size) |
546 :"eax", "memory"); | 546 :"eax", "memory"); |
547 } | 547 } |
548 | 548 |
549 //FIXME optimize | 549 //FIXME optimize |
550 static void DEF(put, pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ | 550 static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
551 DEF(put, pixels8_y2)(block , pixels , line_size, h); | 551 DEF(put, pixels8_y2)(block , pixels , line_size, h); |
552 DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h); | 552 DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h); |
553 } | 553 } |
554 | 554 |
555 static void DEF(put, pixels16_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ | 555 static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
556 DEF(put, pixels8_xy2)(block , pixels , line_size, h); | 556 DEF(put, pixels8_xy2)(block , pixels , line_size, h); |
557 DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h); | 557 DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h); |
558 } | 558 } |
559 | 559 |
560 static void DEF(avg, pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ | 560 static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
561 DEF(avg, pixels8_y2)(block , pixels , line_size, h); | 561 DEF(avg, pixels8_y2)(block , pixels , line_size, h); |
562 DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h); | 562 DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h); |
563 } | 563 } |
564 | 564 |
565 static void DEF(avg, pixels16_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ | 565 static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){ |
566 DEF(avg, pixels8_xy2)(block , pixels , line_size, h); | 566 DEF(avg, pixels8_xy2)(block , pixels , line_size, h); |
567 DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h); | 567 DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h); |
568 } | 568 } |
569 | 569 |
570 | 570 |