comparison i386/dsputil_mmx_rnd.h @ 1064:b32afefe7d33 libavcodec

* UINTX -> uintx_t INTX -> intx_t
author kabi
date Tue, 11 Feb 2003 16:35:48 +0000
parents 42fdf7b24d2e
children 07a484280a82
comparison
equal deleted inserted replaced
1063:fdeac9642346 1064:b32afefe7d33
20 * mostly rewritten by Michael Niedermayer <michaelni@gmx.at> 20 * mostly rewritten by Michael Niedermayer <michaelni@gmx.at>
21 * and improved by Zdenek Kabelac <kabi@users.sf.net> 21 * and improved by Zdenek Kabelac <kabi@users.sf.net>
22 */ 22 */
23 23
24 // put_pixels 24 // put_pixels
25 static void DEF(put, pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 25 static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
26 { 26 {
27 MOVQ_BFE(mm6); 27 MOVQ_BFE(mm6);
28 __asm __volatile( 28 __asm __volatile(
29 "lea (%3, %3), %%eax \n\t" 29 "lea (%3, %3), %%eax \n\t"
30 ".balign 8 \n\t" 30 ".balign 8 \n\t"
102 #endif 102 #endif
103 :"S"(src1Stride), "D"(dstStride) 103 :"S"(src1Stride), "D"(dstStride)
104 :"memory"); 104 :"memory");
105 } 105 }
106 106
107 static void DEF(put, pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 107 static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
108 { 108 {
109 MOVQ_BFE(mm6); 109 MOVQ_BFE(mm6);
110 __asm __volatile( 110 __asm __volatile(
111 "lea (%3, %3), %%eax \n\t" 111 "lea (%3, %3), %%eax \n\t"
112 ".balign 8 \n\t" 112 ".balign 8 \n\t"
197 #endif 197 #endif
198 :"S"(src1Stride), "D"(dstStride) 198 :"S"(src1Stride), "D"(dstStride)
199 :"memory"); 199 :"memory");
200 } 200 }
201 201
202 static void DEF(put, pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 202 static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
203 { 203 {
204 MOVQ_BFE(mm6); 204 MOVQ_BFE(mm6);
205 __asm __volatile( 205 __asm __volatile(
206 "lea (%3, %3), %%eax \n\t" 206 "lea (%3, %3), %%eax \n\t"
207 "movq (%1), %%mm0 \n\t" 207 "movq (%1), %%mm0 \n\t"
226 :"+g"(h), "+S"(pixels), "+D"(block) 226 :"+g"(h), "+S"(pixels), "+D"(block)
227 :"r"(line_size) 227 :"r"(line_size)
228 :"eax", "memory"); 228 :"eax", "memory");
229 } 229 }
230 230
231 static void DEF(put, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 231 static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
232 { 232 {
233 MOVQ_ZERO(mm7); 233 MOVQ_ZERO(mm7);
234 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version 234 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
235 __asm __volatile( 235 __asm __volatile(
236 "movq (%1), %%mm0 \n\t" 236 "movq (%1), %%mm0 \n\t"
294 :"eax", "memory"); 294 :"eax", "memory");
295 } 295 }
296 296
297 // avg_pixels 297 // avg_pixels
298 // in case more speed is needed - unroling would certainly help 298 // in case more speed is needed - unroling would certainly help
299 static void DEF(avg, pixels8)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 299 static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
300 { 300 {
301 MOVQ_BFE(mm6); 301 MOVQ_BFE(mm6);
302 JUMPALIGN(); 302 JUMPALIGN();
303 do { 303 do {
304 __asm __volatile( 304 __asm __volatile(
313 block += line_size; 313 block += line_size;
314 } 314 }
315 while (--h); 315 while (--h);
316 } 316 }
317 317
318 static void DEF(avg, pixels16)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 318 static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
319 { 319 {
320 MOVQ_BFE(mm6); 320 MOVQ_BFE(mm6);
321 JUMPALIGN(); 321 JUMPALIGN();
322 do { 322 do {
323 __asm __volatile( 323 __asm __volatile(
336 block += line_size; 336 block += line_size;
337 } 337 }
338 while (--h); 338 while (--h);
339 } 339 }
340 340
341 static void DEF(avg, pixels8_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 341 static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
342 { 342 {
343 MOVQ_BFE(mm6); 343 MOVQ_BFE(mm6);
344 JUMPALIGN(); 344 JUMPALIGN();
345 do { 345 do {
346 __asm __volatile( 346 __asm __volatile(
377 src1 += src1Stride; 377 src1 += src1Stride;
378 src2 += 8; 378 src2 += 8;
379 } while (--h); 379 } while (--h);
380 } 380 }
381 381
382 static void DEF(avg, pixels16_x2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 382 static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
383 { 383 {
384 MOVQ_BFE(mm6); 384 MOVQ_BFE(mm6);
385 JUMPALIGN(); 385 JUMPALIGN();
386 do { 386 do {
387 __asm __volatile( 387 __asm __volatile(
430 src1 += src1Stride; 430 src1 += src1Stride;
431 src2 += 16; 431 src2 += 16;
432 } while (--h); 432 } while (--h);
433 } 433 }
434 434
435 static void DEF(avg, pixels8_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 435 static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
436 { 436 {
437 MOVQ_BFE(mm6); 437 MOVQ_BFE(mm6);
438 __asm __volatile( 438 __asm __volatile(
439 "lea (%3, %3), %%eax \n\t" 439 "lea (%3, %3), %%eax \n\t"
440 "movq (%1), %%mm0 \n\t" 440 "movq (%1), %%mm0 \n\t"
470 :"r"(line_size) 470 :"r"(line_size)
471 :"eax", "memory"); 471 :"eax", "memory");
472 } 472 }
473 473
474 // this routine is 'slightly' suboptimal but mostly unused 474 // this routine is 'slightly' suboptimal but mostly unused
475 static void DEF(avg, pixels8_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h) 475 static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
476 { 476 {
477 MOVQ_ZERO(mm7); 477 MOVQ_ZERO(mm7);
478 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version 478 SET_RND(mm6); // =2 for rnd and =1 for no_rnd version
479 __asm __volatile( 479 __asm __volatile(
480 "movq (%1), %%mm0 \n\t" 480 "movq (%1), %%mm0 \n\t"
545 :"D"(block), "r"(line_size) 545 :"D"(block), "r"(line_size)
546 :"eax", "memory"); 546 :"eax", "memory");
547 } 547 }
548 548
549 //FIXME optimize 549 //FIXME optimize
550 static void DEF(put, pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ 550 static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
551 DEF(put, pixels8_y2)(block , pixels , line_size, h); 551 DEF(put, pixels8_y2)(block , pixels , line_size, h);
552 DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h); 552 DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h);
553 } 553 }
554 554
555 static void DEF(put, pixels16_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ 555 static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
556 DEF(put, pixels8_xy2)(block , pixels , line_size, h); 556 DEF(put, pixels8_xy2)(block , pixels , line_size, h);
557 DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h); 557 DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h);
558 } 558 }
559 559
560 static void DEF(avg, pixels16_y2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ 560 static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
561 DEF(avg, pixels8_y2)(block , pixels , line_size, h); 561 DEF(avg, pixels8_y2)(block , pixels , line_size, h);
562 DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h); 562 DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h);
563 } 563 }
564 564
565 static void DEF(avg, pixels16_xy2)(UINT8 *block, const UINT8 *pixels, int line_size, int h){ 565 static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
566 DEF(avg, pixels8_xy2)(block , pixels , line_size, h); 566 DEF(avg, pixels8_xy2)(block , pixels , line_size, h);
567 DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h); 567 DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h);
568 } 568 }
569 569
570 570