comparison libswscale/rgb2rgb_template.c @ 26059:ba2efd11abf9

__asm __volatile -> asm volatile part 3
author reimar
date Sun, 24 Feb 2008 14:59:16 +0000
parents 2ad528dd42a0
children b7eeb3590bc9
comparison
equal deleted inserted replaced
26058:e2ee46838240 26059:ba2efd11abf9
90 #ifdef HAVE_MMX 90 #ifdef HAVE_MMX
91 const uint8_t *mm_end; 91 const uint8_t *mm_end;
92 #endif 92 #endif
93 end = s + src_size; 93 end = s + src_size;
94 #ifdef HAVE_MMX 94 #ifdef HAVE_MMX
95 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); 95 asm volatile(PREFETCH" %0"::"m"(*s):"memory");
96 mm_end = end - 23; 96 mm_end = end - 23;
97 __asm __volatile("movq %0, %%mm7"::"m"(mask32):"memory"); 97 asm volatile("movq %0, %%mm7"::"m"(mask32):"memory");
98 while (s < mm_end) 98 while (s < mm_end)
99 { 99 {
100 __asm __volatile( 100 asm volatile(
101 PREFETCH" 32%1 \n\t" 101 PREFETCH" 32%1 \n\t"
102 "movd %1, %%mm0 \n\t" 102 "movd %1, %%mm0 \n\t"
103 "punpckldq 3%1, %%mm0 \n\t" 103 "punpckldq 3%1, %%mm0 \n\t"
104 "movd 6%1, %%mm1 \n\t" 104 "movd 6%1, %%mm1 \n\t"
105 "punpckldq 9%1, %%mm1 \n\t" 105 "punpckldq 9%1, %%mm1 \n\t"
119 :"m"(*s) 119 :"m"(*s)
120 :"memory"); 120 :"memory");
121 dest += 32; 121 dest += 32;
122 s += 24; 122 s += 24;
123 } 123 }
124 __asm __volatile(SFENCE:::"memory"); 124 asm volatile(SFENCE:::"memory");
125 __asm __volatile(EMMS:::"memory"); 125 asm volatile(EMMS:::"memory");
126 #endif 126 #endif
127 while (s < end) 127 while (s < end)
128 { 128 {
129 #ifdef WORDS_BIGENDIAN 129 #ifdef WORDS_BIGENDIAN
130 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ 130 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
150 #ifdef HAVE_MMX 150 #ifdef HAVE_MMX
151 const uint8_t *mm_end; 151 const uint8_t *mm_end;
152 #endif 152 #endif
153 end = s + src_size; 153 end = s + src_size;
154 #ifdef HAVE_MMX 154 #ifdef HAVE_MMX
155 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); 155 asm volatile(PREFETCH" %0"::"m"(*s):"memory");
156 mm_end = end - 31; 156 mm_end = end - 31;
157 while (s < mm_end) 157 while (s < mm_end)
158 { 158 {
159 __asm __volatile( 159 asm volatile(
160 PREFETCH" 32%1 \n\t" 160 PREFETCH" 32%1 \n\t"
161 "movq %1, %%mm0 \n\t" 161 "movq %1, %%mm0 \n\t"
162 "movq 8%1, %%mm1 \n\t" 162 "movq 8%1, %%mm1 \n\t"
163 "movq 16%1, %%mm4 \n\t" 163 "movq 16%1, %%mm4 \n\t"
164 "movq 24%1, %%mm5 \n\t" 164 "movq 24%1, %%mm5 \n\t"
205 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) 205 "m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
206 :"memory"); 206 :"memory");
207 dest += 24; 207 dest += 24;
208 s += 32; 208 s += 32;
209 } 209 }
210 __asm __volatile(SFENCE:::"memory"); 210 asm volatile(SFENCE:::"memory");
211 __asm __volatile(EMMS:::"memory"); 211 asm volatile(EMMS:::"memory");
212 #endif 212 #endif
213 while (s < end) 213 while (s < end)
214 { 214 {
215 #ifdef WORDS_BIGENDIAN 215 #ifdef WORDS_BIGENDIAN
216 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ 216 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
240 register uint8_t* d=dst; 240 register uint8_t* d=dst;
241 register const uint8_t *end; 241 register const uint8_t *end;
242 const uint8_t *mm_end; 242 const uint8_t *mm_end;
243 end = s + src_size; 243 end = s + src_size;
244 #ifdef HAVE_MMX 244 #ifdef HAVE_MMX
245 __asm __volatile(PREFETCH" %0"::"m"(*s)); 245 asm volatile(PREFETCH" %0"::"m"(*s));
246 __asm __volatile("movq %0, %%mm4"::"m"(mask15s)); 246 asm volatile("movq %0, %%mm4"::"m"(mask15s));
247 mm_end = end - 15; 247 mm_end = end - 15;
248 while (s<mm_end) 248 while (s<mm_end)
249 { 249 {
250 __asm __volatile( 250 asm volatile(
251 PREFETCH" 32%1 \n\t" 251 PREFETCH" 32%1 \n\t"
252 "movq %1, %%mm0 \n\t" 252 "movq %1, %%mm0 \n\t"
253 "movq 8%1, %%mm2 \n\t" 253 "movq 8%1, %%mm2 \n\t"
254 "movq %%mm0, %%mm1 \n\t" 254 "movq %%mm0, %%mm1 \n\t"
255 "movq %%mm2, %%mm3 \n\t" 255 "movq %%mm2, %%mm3 \n\t"
263 :"m"(*s) 263 :"m"(*s)
264 ); 264 );
265 d+=16; 265 d+=16;
266 s+=16; 266 s+=16;
267 } 267 }
268 __asm __volatile(SFENCE:::"memory"); 268 asm volatile(SFENCE:::"memory");
269 __asm __volatile(EMMS:::"memory"); 269 asm volatile(EMMS:::"memory");
270 #endif 270 #endif
271 mm_end = end - 3; 271 mm_end = end - 3;
272 while (s < mm_end) 272 while (s < mm_end)
273 { 273 {
274 register unsigned x= *((uint32_t *)s); 274 register unsigned x= *((uint32_t *)s);
289 register uint8_t* d=dst; 289 register uint8_t* d=dst;
290 register const uint8_t *end; 290 register const uint8_t *end;
291 const uint8_t *mm_end; 291 const uint8_t *mm_end;
292 end = s + src_size; 292 end = s + src_size;
293 #ifdef HAVE_MMX 293 #ifdef HAVE_MMX
294 __asm __volatile(PREFETCH" %0"::"m"(*s)); 294 asm volatile(PREFETCH" %0"::"m"(*s));
295 __asm __volatile("movq %0, %%mm7"::"m"(mask15rg)); 295 asm volatile("movq %0, %%mm7"::"m"(mask15rg));
296 __asm __volatile("movq %0, %%mm6"::"m"(mask15b)); 296 asm volatile("movq %0, %%mm6"::"m"(mask15b));
297 mm_end = end - 15; 297 mm_end = end - 15;
298 while (s<mm_end) 298 while (s<mm_end)
299 { 299 {
300 __asm __volatile( 300 asm volatile(
301 PREFETCH" 32%1 \n\t" 301 PREFETCH" 32%1 \n\t"
302 "movq %1, %%mm0 \n\t" 302 "movq %1, %%mm0 \n\t"
303 "movq 8%1, %%mm2 \n\t" 303 "movq 8%1, %%mm2 \n\t"
304 "movq %%mm0, %%mm1 \n\t" 304 "movq %%mm0, %%mm1 \n\t"
305 "movq %%mm2, %%mm3 \n\t" 305 "movq %%mm2, %%mm3 \n\t"
317 :"m"(*s) 317 :"m"(*s)
318 ); 318 );
319 d+=16; 319 d+=16;
320 s+=16; 320 s+=16;
321 } 321 }
322 __asm __volatile(SFENCE:::"memory"); 322 asm volatile(SFENCE:::"memory");
323 __asm __volatile(EMMS:::"memory"); 323 asm volatile(EMMS:::"memory");
324 #endif 324 #endif
325 mm_end = end - 3; 325 mm_end = end - 3;
326 while (s < mm_end) 326 while (s < mm_end)
327 { 327 {
328 register uint32_t x= *((uint32_t *)s); 328 register uint32_t x= *((uint32_t *)s);
384 " jb 1b \n\t" 384 " jb 1b \n\t"
385 : "+r" (d), "+r"(s) 385 : "+r" (d), "+r"(s)
386 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216) 386 : "r" (mm_end), "m" (mask3216g), "m" (mask3216br), "m" (mul3216)
387 ); 387 );
388 #else 388 #else
389 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 389 asm volatile(PREFETCH" %0"::"m"(*src):"memory");
390 __asm __volatile( 390 asm volatile(
391 "movq %0, %%mm7 \n\t" 391 "movq %0, %%mm7 \n\t"
392 "movq %1, %%mm6 \n\t" 392 "movq %1, %%mm6 \n\t"
393 ::"m"(red_16mask),"m"(green_16mask)); 393 ::"m"(red_16mask),"m"(green_16mask));
394 while (s < mm_end) 394 while (s < mm_end)
395 { 395 {
396 __asm __volatile( 396 asm volatile(
397 PREFETCH" 32%1 \n\t" 397 PREFETCH" 32%1 \n\t"
398 "movd %1, %%mm0 \n\t" 398 "movd %1, %%mm0 \n\t"
399 "movd 4%1, %%mm3 \n\t" 399 "movd 4%1, %%mm3 \n\t"
400 "punpckldq 8%1, %%mm0 \n\t" 400 "punpckldq 8%1, %%mm0 \n\t"
401 "punpckldq 12%1, %%mm3 \n\t" 401 "punpckldq 12%1, %%mm3 \n\t"
425 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); 425 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
426 d += 4; 426 d += 4;
427 s += 16; 427 s += 16;
428 } 428 }
429 #endif 429 #endif
430 __asm __volatile(SFENCE:::"memory"); 430 asm volatile(SFENCE:::"memory");
431 __asm __volatile(EMMS:::"memory"); 431 asm volatile(EMMS:::"memory");
432 #endif 432 #endif
433 while (s < end) 433 while (s < end)
434 { 434 {
435 register int rgb = *(uint32_t*)s; s += 4; 435 register int rgb = *(uint32_t*)s; s += 4;
436 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); 436 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
445 const uint8_t *mm_end; 445 const uint8_t *mm_end;
446 #endif 446 #endif
447 uint16_t *d = (uint16_t *)dst; 447 uint16_t *d = (uint16_t *)dst;
448 end = s + src_size; 448 end = s + src_size;
449 #ifdef HAVE_MMX 449 #ifdef HAVE_MMX
450 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 450 asm volatile(PREFETCH" %0"::"m"(*src):"memory");
451 __asm __volatile( 451 asm volatile(
452 "movq %0, %%mm7 \n\t" 452 "movq %0, %%mm7 \n\t"
453 "movq %1, %%mm6 \n\t" 453 "movq %1, %%mm6 \n\t"
454 ::"m"(red_16mask),"m"(green_16mask)); 454 ::"m"(red_16mask),"m"(green_16mask));
455 mm_end = end - 15; 455 mm_end = end - 15;
456 while (s < mm_end) 456 while (s < mm_end)
457 { 457 {
458 __asm __volatile( 458 asm volatile(
459 PREFETCH" 32%1 \n\t" 459 PREFETCH" 32%1 \n\t"
460 "movd %1, %%mm0 \n\t" 460 "movd %1, %%mm0 \n\t"
461 "movd 4%1, %%mm3 \n\t" 461 "movd 4%1, %%mm3 \n\t"
462 "punpckldq 8%1, %%mm0 \n\t" 462 "punpckldq 8%1, %%mm0 \n\t"
463 "punpckldq 12%1, %%mm3 \n\t" 463 "punpckldq 12%1, %%mm3 \n\t"
486 MOVNTQ" %%mm0, %0 \n\t" 486 MOVNTQ" %%mm0, %0 \n\t"
487 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); 487 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
488 d += 4; 488 d += 4;
489 s += 16; 489 s += 16;
490 } 490 }
491 __asm __volatile(SFENCE:::"memory"); 491 asm volatile(SFENCE:::"memory");
492 __asm __volatile(EMMS:::"memory"); 492 asm volatile(EMMS:::"memory");
493 #endif 493 #endif
494 while (s < end) 494 while (s < end)
495 { 495 {
496 register int rgb = *(uint32_t*)s; s += 4; 496 register int rgb = *(uint32_t*)s; s += 4;
497 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); 497 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
543 " jb 1b \n\t" 543 " jb 1b \n\t"
544 : "+r" (d), "+r"(s) 544 : "+r" (d), "+r"(s)
545 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) 545 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
546 ); 546 );
547 #else 547 #else
548 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 548 asm volatile(PREFETCH" %0"::"m"(*src):"memory");
549 __asm __volatile( 549 asm volatile(
550 "movq %0, %%mm7 \n\t" 550 "movq %0, %%mm7 \n\t"
551 "movq %1, %%mm6 \n\t" 551 "movq %1, %%mm6 \n\t"
552 ::"m"(red_15mask),"m"(green_15mask)); 552 ::"m"(red_15mask),"m"(green_15mask));
553 while (s < mm_end) 553 while (s < mm_end)
554 { 554 {
555 __asm __volatile( 555 asm volatile(
556 PREFETCH" 32%1 \n\t" 556 PREFETCH" 32%1 \n\t"
557 "movd %1, %%mm0 \n\t" 557 "movd %1, %%mm0 \n\t"
558 "movd 4%1, %%mm3 \n\t" 558 "movd 4%1, %%mm3 \n\t"
559 "punpckldq 8%1, %%mm0 \n\t" 559 "punpckldq 8%1, %%mm0 \n\t"
560 "punpckldq 12%1, %%mm3 \n\t" 560 "punpckldq 12%1, %%mm3 \n\t"
584 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); 584 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
585 d += 4; 585 d += 4;
586 s += 16; 586 s += 16;
587 } 587 }
588 #endif 588 #endif
589 __asm __volatile(SFENCE:::"memory"); 589 asm volatile(SFENCE:::"memory");
590 __asm __volatile(EMMS:::"memory"); 590 asm volatile(EMMS:::"memory");
591 #endif 591 #endif
592 while (s < end) 592 while (s < end)
593 { 593 {
594 register int rgb = *(uint32_t*)s; s += 4; 594 register int rgb = *(uint32_t*)s; s += 4;
595 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); 595 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
604 const uint8_t *mm_end; 604 const uint8_t *mm_end;
605 #endif 605 #endif
606 uint16_t *d = (uint16_t *)dst; 606 uint16_t *d = (uint16_t *)dst;
607 end = s + src_size; 607 end = s + src_size;
608 #ifdef HAVE_MMX 608 #ifdef HAVE_MMX
609 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 609 asm volatile(PREFETCH" %0"::"m"(*src):"memory");
610 __asm __volatile( 610 asm volatile(
611 "movq %0, %%mm7 \n\t" 611 "movq %0, %%mm7 \n\t"
612 "movq %1, %%mm6 \n\t" 612 "movq %1, %%mm6 \n\t"
613 ::"m"(red_15mask),"m"(green_15mask)); 613 ::"m"(red_15mask),"m"(green_15mask));
614 mm_end = end - 15; 614 mm_end = end - 15;
615 while (s < mm_end) 615 while (s < mm_end)
616 { 616 {
617 __asm __volatile( 617 asm volatile(
618 PREFETCH" 32%1 \n\t" 618 PREFETCH" 32%1 \n\t"
619 "movd %1, %%mm0 \n\t" 619 "movd %1, %%mm0 \n\t"
620 "movd 4%1, %%mm3 \n\t" 620 "movd 4%1, %%mm3 \n\t"
621 "punpckldq 8%1, %%mm0 \n\t" 621 "punpckldq 8%1, %%mm0 \n\t"
622 "punpckldq 12%1, %%mm3 \n\t" 622 "punpckldq 12%1, %%mm3 \n\t"
645 MOVNTQ" %%mm0, %0 \n\t" 645 MOVNTQ" %%mm0, %0 \n\t"
646 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); 646 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
647 d += 4; 647 d += 4;
648 s += 16; 648 s += 16;
649 } 649 }
650 __asm __volatile(SFENCE:::"memory"); 650 asm volatile(SFENCE:::"memory");
651 __asm __volatile(EMMS:::"memory"); 651 asm volatile(EMMS:::"memory");
652 #endif 652 #endif
653 while (s < end) 653 while (s < end)
654 { 654 {
655 register int rgb = *(uint32_t*)s; s += 4; 655 register int rgb = *(uint32_t*)s; s += 4;
656 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); 656 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
665 const uint8_t *mm_end; 665 const uint8_t *mm_end;
666 #endif 666 #endif
667 uint16_t *d = (uint16_t *)dst; 667 uint16_t *d = (uint16_t *)dst;
668 end = s + src_size; 668 end = s + src_size;
669 #ifdef HAVE_MMX 669 #ifdef HAVE_MMX
670 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 670 asm volatile(PREFETCH" %0"::"m"(*src):"memory");
671 __asm __volatile( 671 asm volatile(
672 "movq %0, %%mm7 \n\t" 672 "movq %0, %%mm7 \n\t"
673 "movq %1, %%mm6 \n\t" 673 "movq %1, %%mm6 \n\t"
674 ::"m"(red_16mask),"m"(green_16mask)); 674 ::"m"(red_16mask),"m"(green_16mask));
675 mm_end = end - 11; 675 mm_end = end - 11;
676 while (s < mm_end) 676 while (s < mm_end)
677 { 677 {
678 __asm __volatile( 678 asm volatile(
679 PREFETCH" 32%1 \n\t" 679 PREFETCH" 32%1 \n\t"
680 "movd %1, %%mm0 \n\t" 680 "movd %1, %%mm0 \n\t"
681 "movd 3%1, %%mm3 \n\t" 681 "movd 3%1, %%mm3 \n\t"
682 "punpckldq 6%1, %%mm0 \n\t" 682 "punpckldq 6%1, %%mm0 \n\t"
683 "punpckldq 9%1, %%mm3 \n\t" 683 "punpckldq 9%1, %%mm3 \n\t"
706 MOVNTQ" %%mm0, %0 \n\t" 706 MOVNTQ" %%mm0, %0 \n\t"
707 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); 707 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
708 d += 4; 708 d += 4;
709 s += 12; 709 s += 12;
710 } 710 }
711 __asm __volatile(SFENCE:::"memory"); 711 asm volatile(SFENCE:::"memory");
712 __asm __volatile(EMMS:::"memory"); 712 asm volatile(EMMS:::"memory");
713 #endif 713 #endif
714 while (s < end) 714 while (s < end)
715 { 715 {
716 const int b = *s++; 716 const int b = *s++;
717 const int g = *s++; 717 const int g = *s++;
728 const uint8_t *mm_end; 728 const uint8_t *mm_end;
729 #endif 729 #endif
730 uint16_t *d = (uint16_t *)dst; 730 uint16_t *d = (uint16_t *)dst;
731 end = s + src_size; 731 end = s + src_size;
732 #ifdef HAVE_MMX 732 #ifdef HAVE_MMX
733 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 733 asm volatile(PREFETCH" %0"::"m"(*src):"memory");
734 __asm __volatile( 734 asm volatile(
735 "movq %0, %%mm7 \n\t" 735 "movq %0, %%mm7 \n\t"
736 "movq %1, %%mm6 \n\t" 736 "movq %1, %%mm6 \n\t"
737 ::"m"(red_16mask),"m"(green_16mask)); 737 ::"m"(red_16mask),"m"(green_16mask));
738 mm_end = end - 15; 738 mm_end = end - 15;
739 while (s < mm_end) 739 while (s < mm_end)
740 { 740 {
741 __asm __volatile( 741 asm volatile(
742 PREFETCH" 32%1 \n\t" 742 PREFETCH" 32%1 \n\t"
743 "movd %1, %%mm0 \n\t" 743 "movd %1, %%mm0 \n\t"
744 "movd 3%1, %%mm3 \n\t" 744 "movd 3%1, %%mm3 \n\t"
745 "punpckldq 6%1, %%mm0 \n\t" 745 "punpckldq 6%1, %%mm0 \n\t"
746 "punpckldq 9%1, %%mm3 \n\t" 746 "punpckldq 9%1, %%mm3 \n\t"
769 MOVNTQ" %%mm0, %0 \n\t" 769 MOVNTQ" %%mm0, %0 \n\t"
770 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory"); 770 :"=m"(*d):"m"(*s),"m"(blue_16mask):"memory");
771 d += 4; 771 d += 4;
772 s += 12; 772 s += 12;
773 } 773 }
774 __asm __volatile(SFENCE:::"memory"); 774 asm volatile(SFENCE:::"memory");
775 __asm __volatile(EMMS:::"memory"); 775 asm volatile(EMMS:::"memory");
776 #endif 776 #endif
777 while (s < end) 777 while (s < end)
778 { 778 {
779 const int r = *s++; 779 const int r = *s++;
780 const int g = *s++; 780 const int g = *s++;
791 const uint8_t *mm_end; 791 const uint8_t *mm_end;
792 #endif 792 #endif
793 uint16_t *d = (uint16_t *)dst; 793 uint16_t *d = (uint16_t *)dst;
794 end = s + src_size; 794 end = s + src_size;
795 #ifdef HAVE_MMX 795 #ifdef HAVE_MMX
796 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 796 asm volatile(PREFETCH" %0"::"m"(*src):"memory");
797 __asm __volatile( 797 asm volatile(
798 "movq %0, %%mm7 \n\t" 798 "movq %0, %%mm7 \n\t"
799 "movq %1, %%mm6 \n\t" 799 "movq %1, %%mm6 \n\t"
800 ::"m"(red_15mask),"m"(green_15mask)); 800 ::"m"(red_15mask),"m"(green_15mask));
801 mm_end = end - 11; 801 mm_end = end - 11;
802 while (s < mm_end) 802 while (s < mm_end)
803 { 803 {
804 __asm __volatile( 804 asm volatile(
805 PREFETCH" 32%1 \n\t" 805 PREFETCH" 32%1 \n\t"
806 "movd %1, %%mm0 \n\t" 806 "movd %1, %%mm0 \n\t"
807 "movd 3%1, %%mm3 \n\t" 807 "movd 3%1, %%mm3 \n\t"
808 "punpckldq 6%1, %%mm0 \n\t" 808 "punpckldq 6%1, %%mm0 \n\t"
809 "punpckldq 9%1, %%mm3 \n\t" 809 "punpckldq 9%1, %%mm3 \n\t"
832 MOVNTQ" %%mm0, %0 \n\t" 832 MOVNTQ" %%mm0, %0 \n\t"
833 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); 833 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
834 d += 4; 834 d += 4;
835 s += 12; 835 s += 12;
836 } 836 }
837 __asm __volatile(SFENCE:::"memory"); 837 asm volatile(SFENCE:::"memory");
838 __asm __volatile(EMMS:::"memory"); 838 asm volatile(EMMS:::"memory");
839 #endif 839 #endif
840 while (s < end) 840 while (s < end)
841 { 841 {
842 const int b = *s++; 842 const int b = *s++;
843 const int g = *s++; 843 const int g = *s++;
854 const uint8_t *mm_end; 854 const uint8_t *mm_end;
855 #endif 855 #endif
856 uint16_t *d = (uint16_t *)dst; 856 uint16_t *d = (uint16_t *)dst;
857 end = s + src_size; 857 end = s + src_size;
858 #ifdef HAVE_MMX 858 #ifdef HAVE_MMX
859 __asm __volatile(PREFETCH" %0"::"m"(*src):"memory"); 859 asm volatile(PREFETCH" %0"::"m"(*src):"memory");
860 __asm __volatile( 860 asm volatile(
861 "movq %0, %%mm7 \n\t" 861 "movq %0, %%mm7 \n\t"
862 "movq %1, %%mm6 \n\t" 862 "movq %1, %%mm6 \n\t"
863 ::"m"(red_15mask),"m"(green_15mask)); 863 ::"m"(red_15mask),"m"(green_15mask));
864 mm_end = end - 15; 864 mm_end = end - 15;
865 while (s < mm_end) 865 while (s < mm_end)
866 { 866 {
867 __asm __volatile( 867 asm volatile(
868 PREFETCH" 32%1 \n\t" 868 PREFETCH" 32%1 \n\t"
869 "movd %1, %%mm0 \n\t" 869 "movd %1, %%mm0 \n\t"
870 "movd 3%1, %%mm3 \n\t" 870 "movd 3%1, %%mm3 \n\t"
871 "punpckldq 6%1, %%mm0 \n\t" 871 "punpckldq 6%1, %%mm0 \n\t"
872 "punpckldq 9%1, %%mm3 \n\t" 872 "punpckldq 9%1, %%mm3 \n\t"
895 MOVNTQ" %%mm0, %0 \n\t" 895 MOVNTQ" %%mm0, %0 \n\t"
896 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory"); 896 :"=m"(*d):"m"(*s),"m"(blue_15mask):"memory");
897 d += 4; 897 d += 4;
898 s += 12; 898 s += 12;
899 } 899 }
900 __asm __volatile(SFENCE:::"memory"); 900 asm volatile(SFENCE:::"memory");
901 __asm __volatile(EMMS:::"memory"); 901 asm volatile(EMMS:::"memory");
902 #endif 902 #endif
903 while (s < end) 903 while (s < end)
904 { 904 {
905 const int r = *s++; 905 const int r = *s++;
906 const int g = *s++; 906 const int g = *s++;
938 #endif 938 #endif
939 uint8_t *d = (uint8_t *)dst; 939 uint8_t *d = (uint8_t *)dst;
940 const uint16_t *s = (uint16_t *)src; 940 const uint16_t *s = (uint16_t *)src;
941 end = s + src_size/2; 941 end = s + src_size/2;
942 #ifdef HAVE_MMX 942 #ifdef HAVE_MMX
943 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); 943 asm volatile(PREFETCH" %0"::"m"(*s):"memory");
944 mm_end = end - 7; 944 mm_end = end - 7;
945 while (s < mm_end) 945 while (s < mm_end)
946 { 946 {
947 __asm __volatile( 947 asm volatile(
948 PREFETCH" 32%1 \n\t" 948 PREFETCH" 32%1 \n\t"
949 "movq %1, %%mm0 \n\t" 949 "movq %1, %%mm0 \n\t"
950 "movq %1, %%mm1 \n\t" 950 "movq %1, %%mm1 \n\t"
951 "movq %1, %%mm2 \n\t" 951 "movq %1, %%mm2 \n\t"
952 "pand %2, %%mm0 \n\t" 952 "pand %2, %%mm0 \n\t"
1005 1005
1006 :"=m"(*d) 1006 :"=m"(*d)
1007 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null) 1007 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r), "m"(mmx_null)
1008 :"memory"); 1008 :"memory");
1009 /* Borrowed 32 to 24 */ 1009 /* Borrowed 32 to 24 */
1010 __asm __volatile( 1010 asm volatile(
1011 "movq %%mm0, %%mm4 \n\t" 1011 "movq %%mm0, %%mm4 \n\t"
1012 "movq %%mm3, %%mm5 \n\t" 1012 "movq %%mm3, %%mm5 \n\t"
1013 "movq %%mm6, %%mm0 \n\t" 1013 "movq %%mm6, %%mm0 \n\t"
1014 "movq %%mm7, %%mm1 \n\t" 1014 "movq %%mm7, %%mm1 \n\t"
1015 1015
1057 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) 1057 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
1058 :"memory"); 1058 :"memory");
1059 d += 24; 1059 d += 24;
1060 s += 8; 1060 s += 8;
1061 } 1061 }
1062 __asm __volatile(SFENCE:::"memory"); 1062 asm volatile(SFENCE:::"memory");
1063 __asm __volatile(EMMS:::"memory"); 1063 asm volatile(EMMS:::"memory");
1064 #endif 1064 #endif
1065 while (s < end) 1065 while (s < end)
1066 { 1066 {
1067 register uint16_t bgr; 1067 register uint16_t bgr;
1068 bgr = *s++; 1068 bgr = *s++;
1080 #endif 1080 #endif
1081 uint8_t *d = (uint8_t *)dst; 1081 uint8_t *d = (uint8_t *)dst;
1082 const uint16_t *s = (const uint16_t *)src; 1082 const uint16_t *s = (const uint16_t *)src;
1083 end = s + src_size/2; 1083 end = s + src_size/2;
1084 #ifdef HAVE_MMX 1084 #ifdef HAVE_MMX
1085 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); 1085 asm volatile(PREFETCH" %0"::"m"(*s):"memory");
1086 mm_end = end - 7; 1086 mm_end = end - 7;
1087 while (s < mm_end) 1087 while (s < mm_end)
1088 { 1088 {
1089 __asm __volatile( 1089 asm volatile(
1090 PREFETCH" 32%1 \n\t" 1090 PREFETCH" 32%1 \n\t"
1091 "movq %1, %%mm0 \n\t" 1091 "movq %1, %%mm0 \n\t"
1092 "movq %1, %%mm1 \n\t" 1092 "movq %1, %%mm1 \n\t"
1093 "movq %1, %%mm2 \n\t" 1093 "movq %1, %%mm2 \n\t"
1094 "pand %2, %%mm0 \n\t" 1094 "pand %2, %%mm0 \n\t"
1146 "por %%mm5, %%mm3 \n\t" 1146 "por %%mm5, %%mm3 \n\t"
1147 :"=m"(*d) 1147 :"=m"(*d)
1148 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null) 1148 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r),"m"(mmx_null)
1149 :"memory"); 1149 :"memory");
1150 /* Borrowed 32 to 24 */ 1150 /* Borrowed 32 to 24 */
1151 __asm __volatile( 1151 asm volatile(
1152 "movq %%mm0, %%mm4 \n\t" 1152 "movq %%mm0, %%mm4 \n\t"
1153 "movq %%mm3, %%mm5 \n\t" 1153 "movq %%mm3, %%mm5 \n\t"
1154 "movq %%mm6, %%mm0 \n\t" 1154 "movq %%mm6, %%mm0 \n\t"
1155 "movq %%mm7, %%mm1 \n\t" 1155 "movq %%mm7, %%mm1 \n\t"
1156 1156
1198 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh) 1198 :"m"(*s),"m"(mask24l),"m"(mask24h),"m"(mask24hh),"m"(mask24hhh),"m"(mask24hhhh)
1199 :"memory"); 1199 :"memory");
1200 d += 24; 1200 d += 24;
1201 s += 8; 1201 s += 8;
1202 } 1202 }
1203 __asm __volatile(SFENCE:::"memory"); 1203 asm volatile(SFENCE:::"memory");
1204 __asm __volatile(EMMS:::"memory"); 1204 asm volatile(EMMS:::"memory");
1205 #endif 1205 #endif
1206 while (s < end) 1206 while (s < end)
1207 { 1207 {
1208 register uint16_t bgr; 1208 register uint16_t bgr;
1209 bgr = *s++; 1209 bgr = *s++;
1221 #endif 1221 #endif
1222 uint8_t *d = (uint8_t *)dst; 1222 uint8_t *d = (uint8_t *)dst;
1223 const uint16_t *s = (const uint16_t *)src; 1223 const uint16_t *s = (const uint16_t *)src;
1224 end = s + src_size/2; 1224 end = s + src_size/2;
1225 #ifdef HAVE_MMX 1225 #ifdef HAVE_MMX
1226 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); 1226 asm volatile(PREFETCH" %0"::"m"(*s):"memory");
1227 __asm __volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); 1227 asm volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
1228 mm_end = end - 3; 1228 mm_end = end - 3;
1229 while (s < mm_end) 1229 while (s < mm_end)
1230 { 1230 {
1231 __asm __volatile( 1231 asm volatile(
1232 PREFETCH" 32%1 \n\t" 1232 PREFETCH" 32%1 \n\t"
1233 "movq %1, %%mm0 \n\t" 1233 "movq %1, %%mm0 \n\t"
1234 "movq %1, %%mm1 \n\t" 1234 "movq %1, %%mm1 \n\t"
1235 "movq %1, %%mm2 \n\t" 1235 "movq %1, %%mm2 \n\t"
1236 "pand %2, %%mm0 \n\t" 1236 "pand %2, %%mm0 \n\t"
1262 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r) 1262 :"m"(*s),"m"(mask15b),"m"(mask15g),"m"(mask15r)
1263 :"memory"); 1263 :"memory");
1264 d += 16; 1264 d += 16;
1265 s += 4; 1265 s += 4;
1266 } 1266 }
1267 __asm __volatile(SFENCE:::"memory"); 1267 asm volatile(SFENCE:::"memory");
1268 __asm __volatile(EMMS:::"memory"); 1268 asm volatile(EMMS:::"memory");
1269 #endif 1269 #endif
1270 while (s < end) 1270 while (s < end)
1271 { 1271 {
1272 #if 0 //slightly slower on Athlon 1272 #if 0 //slightly slower on Athlon
1273 int bgr= *s++; 1273 int bgr= *s++;
1299 #endif 1299 #endif
1300 uint8_t *d = (uint8_t *)dst; 1300 uint8_t *d = (uint8_t *)dst;
1301 const uint16_t *s = (uint16_t *)src; 1301 const uint16_t *s = (uint16_t *)src;
1302 end = s + src_size/2; 1302 end = s + src_size/2;
1303 #ifdef HAVE_MMX 1303 #ifdef HAVE_MMX
1304 __asm __volatile(PREFETCH" %0"::"m"(*s):"memory"); 1304 asm volatile(PREFETCH" %0"::"m"(*s):"memory");
1305 __asm __volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); 1305 asm volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
1306 mm_end = end - 3; 1306 mm_end = end - 3;
1307 while (s < mm_end) 1307 while (s < mm_end)
1308 { 1308 {
1309 __asm __volatile( 1309 asm volatile(
1310 PREFETCH" 32%1 \n\t" 1310 PREFETCH" 32%1 \n\t"
1311 "movq %1, %%mm0 \n\t" 1311 "movq %1, %%mm0 \n\t"
1312 "movq %1, %%mm1 \n\t" 1312 "movq %1, %%mm1 \n\t"
1313 "movq %1, %%mm2 \n\t" 1313 "movq %1, %%mm2 \n\t"
1314 "pand %2, %%mm0 \n\t" 1314 "pand %2, %%mm0 \n\t"
1340 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r) 1340 :"m"(*s),"m"(mask16b),"m"(mask16g),"m"(mask16r)
1341 :"memory"); 1341 :"memory");
1342 d += 16; 1342 d += 16;
1343 s += 4; 1343 s += 4;
1344 } 1344 }
1345 __asm __volatile(SFENCE:::"memory"); 1345 asm volatile(SFENCE:::"memory");
1346 __asm __volatile(EMMS:::"memory"); 1346 asm volatile(EMMS:::"memory");
1347 #endif 1347 #endif
1348 while (s < end) 1348 while (s < end)
1349 { 1349 {
1350 register uint16_t bgr; 1350 register uint16_t bgr;
1351 bgr = *s++; 1351 bgr = *s++;
1366 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) 1366 static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size)
1367 { 1367 {
1368 long idx = 15 - src_size; 1368 long idx = 15 - src_size;
1369 uint8_t *s = (uint8_t *) src-idx, *d = dst-idx; 1369 uint8_t *s = (uint8_t *) src-idx, *d = dst-idx;
1370 #ifdef HAVE_MMX 1370 #ifdef HAVE_MMX
1371 __asm __volatile( 1371 asm volatile(
1372 "test %0, %0 \n\t" 1372 "test %0, %0 \n\t"
1373 "jns 2f \n\t" 1373 "jns 2f \n\t"
1374 PREFETCH" (%1, %0) \n\t" 1374 PREFETCH" (%1, %0) \n\t"
1375 "movq %3, %%mm7 \n\t" 1375 "movq %3, %%mm7 \n\t"
1376 "pxor %4, %%mm7 \n\t" 1376 "pxor %4, %%mm7 \n\t"
1473 "2: \n\t" 1473 "2: \n\t"
1474 : "+a" (mmx_size) 1474 : "+a" (mmx_size)
1475 : "r" (src-mmx_size), "r"(dst-mmx_size) 1475 : "r" (src-mmx_size), "r"(dst-mmx_size)
1476 ); 1476 );
1477 1477
1478 __asm __volatile(SFENCE:::"memory"); 1478 asm volatile(SFENCE:::"memory");
1479 __asm __volatile(EMMS:::"memory"); 1479 asm volatile(EMMS:::"memory");
1480 1480
1481 if (mmx_size==23) return; //finihsed, was multiple of 8 1481 if (mmx_size==23) return; //finihsed, was multiple of 8
1482 1482
1483 src+= src_size; 1483 src+= src_size;
1484 dst+= src_size; 1484 dst+= src_size;