comparison libswscale/rgb2rgb_template.c @ 29481:c080f1f5c07e

Cosmetics: - Place curly brackets in the same line as while/for/if/switch/else/do; - Place curly brackets at column 0 in the next line starting a function.
author ramiro
date Sun, 16 Aug 2009 21:11:28 +0000
parents a4d8dee13834
children 01b933e5d04c
comparison
equal deleted inserted replaced
29480:a4d8dee13834 29481:c080f1f5c07e
82 end = s + src_size; 82 end = s + src_size;
83 #if HAVE_MMX 83 #if HAVE_MMX
84 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); 84 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
85 mm_end = end - 23; 85 mm_end = end - 23;
86 __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); 86 __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory");
87 while (s < mm_end) 87 while (s < mm_end) {
88 {
89 __asm__ volatile( 88 __asm__ volatile(
90 PREFETCH" 32%1 \n\t" 89 PREFETCH" 32%1 \n\t"
91 "movd %1, %%mm0 \n\t" 90 "movd %1, %%mm0 \n\t"
92 "punpckldq 3%1, %%mm0 \n\t" 91 "punpckldq 3%1, %%mm0 \n\t"
93 "movd 6%1, %%mm1 \n\t" 92 "movd 6%1, %%mm1 \n\t"
111 s += 24; 110 s += 24;
112 } 111 }
113 __asm__ volatile(SFENCE:::"memory"); 112 __asm__ volatile(SFENCE:::"memory");
114 __asm__ volatile(EMMS:::"memory"); 113 __asm__ volatile(EMMS:::"memory");
115 #endif 114 #endif
116 while (s < end) 115 while (s < end) {
117 {
118 #if HAVE_BIGENDIAN 116 #if HAVE_BIGENDIAN
119 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ 117 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */
120 *dest++ = 255; 118 *dest++ = 255;
121 *dest++ = s[2]; 119 *dest++ = s[2];
122 *dest++ = s[1]; 120 *dest++ = s[1];
141 #endif 139 #endif
142 end = s + src_size; 140 end = s + src_size;
143 #if HAVE_MMX 141 #if HAVE_MMX
144 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); 142 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
145 mm_end = end - 31; 143 mm_end = end - 31;
146 while (s < mm_end) 144 while (s < mm_end) {
147 {
148 __asm__ volatile( 145 __asm__ volatile(
149 PREFETCH" 32%1 \n\t" 146 PREFETCH" 32%1 \n\t"
150 "movq %1, %%mm0 \n\t" 147 "movq %1, %%mm0 \n\t"
151 "movq 8%1, %%mm1 \n\t" 148 "movq 8%1, %%mm1 \n\t"
152 "movq 16%1, %%mm4 \n\t" 149 "movq 16%1, %%mm4 \n\t"
197 s += 32; 194 s += 32;
198 } 195 }
199 __asm__ volatile(SFENCE:::"memory"); 196 __asm__ volatile(SFENCE:::"memory");
200 __asm__ volatile(EMMS:::"memory"); 197 __asm__ volatile(EMMS:::"memory");
201 #endif 198 #endif
202 while (s < end) 199 while (s < end) {
203 {
204 #if HAVE_BIGENDIAN 200 #if HAVE_BIGENDIAN
205 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ 201 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */
206 s++; 202 s++;
207 dest[2] = *s++; 203 dest[2] = *s++;
208 dest[1] = *s++; 204 dest[1] = *s++;
232 end = s + src_size; 228 end = s + src_size;
233 #if HAVE_MMX 229 #if HAVE_MMX
234 __asm__ volatile(PREFETCH" %0"::"m"(*s)); 230 __asm__ volatile(PREFETCH" %0"::"m"(*s));
235 __asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); 231 __asm__ volatile("movq %0, %%mm4"::"m"(mask15s));
236 mm_end = end - 15; 232 mm_end = end - 15;
237 while (s<mm_end) 233 while (s<mm_end) {
238 {
239 __asm__ volatile( 234 __asm__ volatile(
240 PREFETCH" 32%1 \n\t" 235 PREFETCH" 32%1 \n\t"
241 "movq %1, %%mm0 \n\t" 236 "movq %1, %%mm0 \n\t"
242 "movq 8%1, %%mm2 \n\t" 237 "movq 8%1, %%mm2 \n\t"
243 "movq %%mm0, %%mm1 \n\t" 238 "movq %%mm0, %%mm1 \n\t"
256 } 251 }
257 __asm__ volatile(SFENCE:::"memory"); 252 __asm__ volatile(SFENCE:::"memory");
258 __asm__ volatile(EMMS:::"memory"); 253 __asm__ volatile(EMMS:::"memory");
259 #endif 254 #endif
260 mm_end = end - 3; 255 mm_end = end - 3;
261 while (s < mm_end) 256 while (s < mm_end) {
262 {
263 register unsigned x= *((const uint32_t *)s); 257 register unsigned x= *((const uint32_t *)s);
264 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); 258 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0);
265 d+=4; 259 d+=4;
266 s+=4; 260 s+=4;
267 } 261 }
268 if (s < end) 262 if (s < end) {
269 {
270 register unsigned short x= *((const uint16_t *)s); 263 register unsigned short x= *((const uint16_t *)s);
271 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); 264 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0);
272 } 265 }
273 } 266 }
274 267
282 #if HAVE_MMX 275 #if HAVE_MMX
283 __asm__ volatile(PREFETCH" %0"::"m"(*s)); 276 __asm__ volatile(PREFETCH" %0"::"m"(*s));
284 __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); 277 __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg));
285 __asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); 278 __asm__ volatile("movq %0, %%mm6"::"m"(mask15b));
286 mm_end = end - 15; 279 mm_end = end - 15;
287 while (s<mm_end) 280 while (s<mm_end) {
288 {
289 __asm__ volatile( 281 __asm__ volatile(
290 PREFETCH" 32%1 \n\t" 282 PREFETCH" 32%1 \n\t"
291 "movq %1, %%mm0 \n\t" 283 "movq %1, %%mm0 \n\t"
292 "movq 8%1, %%mm2 \n\t" 284 "movq 8%1, %%mm2 \n\t"
293 "movq %%mm0, %%mm1 \n\t" 285 "movq %%mm0, %%mm1 \n\t"
310 } 302 }
311 __asm__ volatile(SFENCE:::"memory"); 303 __asm__ volatile(SFENCE:::"memory");
312 __asm__ volatile(EMMS:::"memory"); 304 __asm__ volatile(EMMS:::"memory");
313 #endif 305 #endif
314 mm_end = end - 3; 306 mm_end = end - 3;
315 while (s < mm_end) 307 while (s < mm_end) {
316 {
317 register uint32_t x= *((const uint32_t*)s); 308 register uint32_t x= *((const uint32_t*)s);
318 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); 309 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F);
319 s+=4; 310 s+=4;
320 d+=4; 311 d+=4;
321 } 312 }
322 if (s < end) 313 if (s < end) {
323 {
324 register uint16_t x= *((const uint16_t*)s); 314 register uint16_t x= *((const uint16_t*)s);
325 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); 315 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F);
326 } 316 }
327 } 317 }
328 318
376 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); 366 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
377 __asm__ volatile( 367 __asm__ volatile(
378 "movq %0, %%mm7 \n\t" 368 "movq %0, %%mm7 \n\t"
379 "movq %1, %%mm6 \n\t" 369 "movq %1, %%mm6 \n\t"
380 ::"m"(red_16mask),"m"(green_16mask)); 370 ::"m"(red_16mask),"m"(green_16mask));
381 while (s < mm_end) 371 while (s < mm_end) {
382 {
383 __asm__ volatile( 372 __asm__ volatile(
384 PREFETCH" 32%1 \n\t" 373 PREFETCH" 32%1 \n\t"
385 "movd %1, %%mm0 \n\t" 374 "movd %1, %%mm0 \n\t"
386 "movd 4%1, %%mm3 \n\t" 375 "movd 4%1, %%mm3 \n\t"
387 "punpckldq 8%1, %%mm0 \n\t" 376 "punpckldq 8%1, %%mm0 \n\t"
415 } 404 }
416 #endif 405 #endif
417 __asm__ volatile(SFENCE:::"memory"); 406 __asm__ volatile(SFENCE:::"memory");
418 __asm__ volatile(EMMS:::"memory"); 407 __asm__ volatile(EMMS:::"memory");
419 #endif 408 #endif
420 while (s < end) 409 while (s < end) {
421 {
422 register int rgb = *(const uint32_t*)s; s += 4; 410 register int rgb = *(const uint32_t*)s; s += 4;
423 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); 411 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8);
424 } 412 }
425 } 413 }
426 414
438 __asm__ volatile( 426 __asm__ volatile(
439 "movq %0, %%mm7 \n\t" 427 "movq %0, %%mm7 \n\t"
440 "movq %1, %%mm6 \n\t" 428 "movq %1, %%mm6 \n\t"
441 ::"m"(red_16mask),"m"(green_16mask)); 429 ::"m"(red_16mask),"m"(green_16mask));
442 mm_end = end - 15; 430 mm_end = end - 15;
443 while (s < mm_end) 431 while (s < mm_end) {
444 {
445 __asm__ volatile( 432 __asm__ volatile(
446 PREFETCH" 32%1 \n\t" 433 PREFETCH" 32%1 \n\t"
447 "movd %1, %%mm0 \n\t" 434 "movd %1, %%mm0 \n\t"
448 "movd 4%1, %%mm3 \n\t" 435 "movd 4%1, %%mm3 \n\t"
449 "punpckldq 8%1, %%mm0 \n\t" 436 "punpckldq 8%1, %%mm0 \n\t"
476 s += 16; 463 s += 16;
477 } 464 }
478 __asm__ volatile(SFENCE:::"memory"); 465 __asm__ volatile(SFENCE:::"memory");
479 __asm__ volatile(EMMS:::"memory"); 466 __asm__ volatile(EMMS:::"memory");
480 #endif 467 #endif
481 while (s < end) 468 while (s < end) {
482 {
483 register int rgb = *(const uint32_t*)s; s += 4; 469 register int rgb = *(const uint32_t*)s; s += 4;
484 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); 470 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19);
485 } 471 }
486 } 472 }
487 473
535 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); 521 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory");
536 __asm__ volatile( 522 __asm__ volatile(
537 "movq %0, %%mm7 \n\t" 523 "movq %0, %%mm7 \n\t"
538 "movq %1, %%mm6 \n\t" 524 "movq %1, %%mm6 \n\t"
539 ::"m"(red_15mask),"m"(green_15mask)); 525 ::"m"(red_15mask),"m"(green_15mask));
540 while (s < mm_end) 526 while (s < mm_end) {
541 {
542 __asm__ volatile( 527 __asm__ volatile(
543 PREFETCH" 32%1 \n\t" 528 PREFETCH" 32%1 \n\t"
544 "movd %1, %%mm0 \n\t" 529 "movd %1, %%mm0 \n\t"
545 "movd 4%1, %%mm3 \n\t" 530 "movd 4%1, %%mm3 \n\t"
546 "punpckldq 8%1, %%mm0 \n\t" 531 "punpckldq 8%1, %%mm0 \n\t"
574 } 559 }
575 #endif 560 #endif
576 __asm__ volatile(SFENCE:::"memory"); 561 __asm__ volatile(SFENCE:::"memory");
577 __asm__ volatile(EMMS:::"memory"); 562 __asm__ volatile(EMMS:::"memory");
578 #endif 563 #endif
579 while (s < end) 564 while (s < end) {
580 {
581 register int rgb = *(const uint32_t*)s; s += 4; 565 register int rgb = *(const uint32_t*)s; s += 4;
582 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); 566 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9);
583 } 567 }
584 } 568 }
585 569
597 __asm__ volatile( 581 __asm__ volatile(
598 "movq %0, %%mm7 \n\t" 582 "movq %0, %%mm7 \n\t"
599 "movq %1, %%mm6 \n\t" 583 "movq %1, %%mm6 \n\t"
600 ::"m"(red_15mask),"m"(green_15mask)); 584 ::"m"(red_15mask),"m"(green_15mask));
601 mm_end = end - 15; 585 mm_end = end - 15;
602 while (s < mm_end) 586 while (s < mm_end) {
603 {
604 __asm__ volatile( 587 __asm__ volatile(
605 PREFETCH" 32%1 \n\t" 588 PREFETCH" 32%1 \n\t"
606 "movd %1, %%mm0 \n\t" 589 "movd %1, %%mm0 \n\t"
607 "movd 4%1, %%mm3 \n\t" 590 "movd 4%1, %%mm3 \n\t"
608 "punpckldq 8%1, %%mm0 \n\t" 591 "punpckldq 8%1, %%mm0 \n\t"
635 s += 16; 618 s += 16;
636 } 619 }
637 __asm__ volatile(SFENCE:::"memory"); 620 __asm__ volatile(SFENCE:::"memory");
638 __asm__ volatile(EMMS:::"memory"); 621 __asm__ volatile(EMMS:::"memory");
639 #endif 622 #endif
640 while (s < end) 623 while (s < end) {
641 {
642 register int rgb = *(const uint32_t*)s; s += 4; 624 register int rgb = *(const uint32_t*)s; s += 4;
643 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); 625 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19);
644 } 626 }
645 } 627 }
646 628
658 __asm__ volatile( 640 __asm__ volatile(
659 "movq %0, %%mm7 \n\t" 641 "movq %0, %%mm7 \n\t"
660 "movq %1, %%mm6 \n\t" 642 "movq %1, %%mm6 \n\t"
661 ::"m"(red_16mask),"m"(green_16mask)); 643 ::"m"(red_16mask),"m"(green_16mask));
662 mm_end = end - 11; 644 mm_end = end - 11;
663 while (s < mm_end) 645 while (s < mm_end) {
664 {
665 __asm__ volatile( 646 __asm__ volatile(
666 PREFETCH" 32%1 \n\t" 647 PREFETCH" 32%1 \n\t"
667 "movd %1, %%mm0 \n\t" 648 "movd %1, %%mm0 \n\t"
668 "movd 3%1, %%mm3 \n\t" 649 "movd 3%1, %%mm3 \n\t"
669 "punpckldq 6%1, %%mm0 \n\t" 650 "punpckldq 6%1, %%mm0 \n\t"
696 s += 12; 677 s += 12;
697 } 678 }
698 __asm__ volatile(SFENCE:::"memory"); 679 __asm__ volatile(SFENCE:::"memory");
699 __asm__ volatile(EMMS:::"memory"); 680 __asm__ volatile(EMMS:::"memory");
700 #endif 681 #endif
701 while (s < end) 682 while (s < end) {
702 {
703 const int b = *s++; 683 const int b = *s++;
704 const int g = *s++; 684 const int g = *s++;
705 const int r = *s++; 685 const int r = *s++;
706 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); 686 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
707 } 687 }
721 __asm__ volatile( 701 __asm__ volatile(
722 "movq %0, %%mm7 \n\t" 702 "movq %0, %%mm7 \n\t"
723 "movq %1, %%mm6 \n\t" 703 "movq %1, %%mm6 \n\t"
724 ::"m"(red_16mask),"m"(green_16mask)); 704 ::"m"(red_16mask),"m"(green_16mask));
725 mm_end = end - 15; 705 mm_end = end - 15;
726 while (s < mm_end) 706 while (s < mm_end) {
727 {
728 __asm__ volatile( 707 __asm__ volatile(
729 PREFETCH" 32%1 \n\t" 708 PREFETCH" 32%1 \n\t"
730 "movd %1, %%mm0 \n\t" 709 "movd %1, %%mm0 \n\t"
731 "movd 3%1, %%mm3 \n\t" 710 "movd 3%1, %%mm3 \n\t"
732 "punpckldq 6%1, %%mm0 \n\t" 711 "punpckldq 6%1, %%mm0 \n\t"
759 s += 12; 738 s += 12;
760 } 739 }
761 __asm__ volatile(SFENCE:::"memory"); 740 __asm__ volatile(SFENCE:::"memory");
762 __asm__ volatile(EMMS:::"memory"); 741 __asm__ volatile(EMMS:::"memory");
763 #endif 742 #endif
764 while (s < end) 743 while (s < end) {
765 {
766 const int r = *s++; 744 const int r = *s++;
767 const int g = *s++; 745 const int g = *s++;
768 const int b = *s++; 746 const int b = *s++;
769 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); 747 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8);
770 } 748 }
784 __asm__ volatile( 762 __asm__ volatile(
785 "movq %0, %%mm7 \n\t" 763 "movq %0, %%mm7 \n\t"
786 "movq %1, %%mm6 \n\t" 764 "movq %1, %%mm6 \n\t"
787 ::"m"(red_15mask),"m"(green_15mask)); 765 ::"m"(red_15mask),"m"(green_15mask));
788 mm_end = end - 11; 766 mm_end = end - 11;
789 while (s < mm_end) 767 while (s < mm_end) {
790 {
791 __asm__ volatile( 768 __asm__ volatile(
792 PREFETCH" 32%1 \n\t" 769 PREFETCH" 32%1 \n\t"
793 "movd %1, %%mm0 \n\t" 770 "movd %1, %%mm0 \n\t"
794 "movd 3%1, %%mm3 \n\t" 771 "movd 3%1, %%mm3 \n\t"
795 "punpckldq 6%1, %%mm0 \n\t" 772 "punpckldq 6%1, %%mm0 \n\t"
822 s += 12; 799 s += 12;
823 } 800 }
824 __asm__ volatile(SFENCE:::"memory"); 801 __asm__ volatile(SFENCE:::"memory");
825 __asm__ volatile(EMMS:::"memory"); 802 __asm__ volatile(EMMS:::"memory");
826 #endif 803 #endif
827 while (s < end) 804 while (s < end) {
828 {
829 const int b = *s++; 805 const int b = *s++;
830 const int g = *s++; 806 const int g = *s++;
831 const int r = *s++; 807 const int r = *s++;
832 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); 808 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
833 } 809 }
847 __asm__ volatile( 823 __asm__ volatile(
848 "movq %0, %%mm7 \n\t" 824 "movq %0, %%mm7 \n\t"
849 "movq %1, %%mm6 \n\t" 825 "movq %1, %%mm6 \n\t"
850 ::"m"(red_15mask),"m"(green_15mask)); 826 ::"m"(red_15mask),"m"(green_15mask));
851 mm_end = end - 15; 827 mm_end = end - 15;
852 while (s < mm_end) 828 while (s < mm_end) {
853 {
854 __asm__ volatile( 829 __asm__ volatile(
855 PREFETCH" 32%1 \n\t" 830 PREFETCH" 32%1 \n\t"
856 "movd %1, %%mm0 \n\t" 831 "movd %1, %%mm0 \n\t"
857 "movd 3%1, %%mm3 \n\t" 832 "movd 3%1, %%mm3 \n\t"
858 "punpckldq 6%1, %%mm0 \n\t" 833 "punpckldq 6%1, %%mm0 \n\t"
885 s += 12; 860 s += 12;
886 } 861 }
887 __asm__ volatile(SFENCE:::"memory"); 862 __asm__ volatile(SFENCE:::"memory");
888 __asm__ volatile(EMMS:::"memory"); 863 __asm__ volatile(EMMS:::"memory");
889 #endif 864 #endif
890 while (s < end) 865 while (s < end) {
891 {
892 const int r = *s++; 866 const int r = *s++;
893 const int g = *s++; 867 const int g = *s++;
894 const int b = *s++; 868 const int b = *s++;
895 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); 869 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7);
896 } 870 }
927 const uint16_t *s = (const uint16_t*)src; 901 const uint16_t *s = (const uint16_t*)src;
928 end = s + src_size/2; 902 end = s + src_size/2;
929 #if HAVE_MMX 903 #if HAVE_MMX
930 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); 904 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
931 mm_end = end - 7; 905 mm_end = end - 7;
932 while (s < mm_end) 906 while (s < mm_end) {
933 {
934 __asm__ volatile( 907 __asm__ volatile(
935 PREFETCH" 32%1 \n\t" 908 PREFETCH" 32%1 \n\t"
936 "movq %1, %%mm0 \n\t" 909 "movq %1, %%mm0 \n\t"
937 "movq %1, %%mm1 \n\t" 910 "movq %1, %%mm1 \n\t"
938 "movq %1, %%mm2 \n\t" 911 "movq %1, %%mm2 \n\t"
1047 s += 8; 1020 s += 8;
1048 } 1021 }
1049 __asm__ volatile(SFENCE:::"memory"); 1022 __asm__ volatile(SFENCE:::"memory");
1050 __asm__ volatile(EMMS:::"memory"); 1023 __asm__ volatile(EMMS:::"memory");
1051 #endif 1024 #endif
1052 while (s < end) 1025 while (s < end) {
1053 {
1054 register uint16_t bgr; 1026 register uint16_t bgr;
1055 bgr = *s++; 1027 bgr = *s++;
1056 *d++ = (bgr&0x1F)<<3; 1028 *d++ = (bgr&0x1F)<<3;
1057 *d++ = (bgr&0x3E0)>>2; 1029 *d++ = (bgr&0x3E0)>>2;
1058 *d++ = (bgr&0x7C00)>>7; 1030 *d++ = (bgr&0x7C00)>>7;
1069 const uint16_t *s = (const uint16_t *)src; 1041 const uint16_t *s = (const uint16_t *)src;
1070 end = s + src_size/2; 1042 end = s + src_size/2;
1071 #if HAVE_MMX 1043 #if HAVE_MMX
1072 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); 1044 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
1073 mm_end = end - 7; 1045 mm_end = end - 7;
1074 while (s < mm_end) 1046 while (s < mm_end) {
1075 {
1076 __asm__ volatile( 1047 __asm__ volatile(
1077 PREFETCH" 32%1 \n\t" 1048 PREFETCH" 32%1 \n\t"
1078 "movq %1, %%mm0 \n\t" 1049 "movq %1, %%mm0 \n\t"
1079 "movq %1, %%mm1 \n\t" 1050 "movq %1, %%mm1 \n\t"
1080 "movq %1, %%mm2 \n\t" 1051 "movq %1, %%mm2 \n\t"
1188 s += 8; 1159 s += 8;
1189 } 1160 }
1190 __asm__ volatile(SFENCE:::"memory"); 1161 __asm__ volatile(SFENCE:::"memory");
1191 __asm__ volatile(EMMS:::"memory"); 1162 __asm__ volatile(EMMS:::"memory");
1192 #endif 1163 #endif
1193 while (s < end) 1164 while (s < end) {
1194 {
1195 register uint16_t bgr; 1165 register uint16_t bgr;
1196 bgr = *s++; 1166 bgr = *s++;
1197 *d++ = (bgr&0x1F)<<3; 1167 *d++ = (bgr&0x1F)<<3;
1198 *d++ = (bgr&0x7E0)>>3; 1168 *d++ = (bgr&0x7E0)>>3;
1199 *d++ = (bgr&0xF800)>>8; 1169 *d++ = (bgr&0xF800)>>8;
1231 #if HAVE_MMX 1201 #if HAVE_MMX
1232 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); 1202 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
1233 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); 1203 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
1234 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); 1204 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
1235 mm_end = end - 3; 1205 mm_end = end - 3;
1236 while (s < mm_end) 1206 while (s < mm_end) {
1237 {
1238 __asm__ volatile( 1207 __asm__ volatile(
1239 PREFETCH" 32%1 \n\t" 1208 PREFETCH" 32%1 \n\t"
1240 "movq %1, %%mm0 \n\t" 1209 "movq %1, %%mm0 \n\t"
1241 "movq %1, %%mm1 \n\t" 1210 "movq %1, %%mm1 \n\t"
1242 "movq %1, %%mm2 \n\t" 1211 "movq %1, %%mm2 \n\t"
1254 s += 4; 1223 s += 4;
1255 } 1224 }
1256 __asm__ volatile(SFENCE:::"memory"); 1225 __asm__ volatile(SFENCE:::"memory");
1257 __asm__ volatile(EMMS:::"memory"); 1226 __asm__ volatile(EMMS:::"memory");
1258 #endif 1227 #endif
1259 while (s < end) 1228 while (s < end) {
1260 {
1261 register uint16_t bgr; 1229 register uint16_t bgr;
1262 bgr = *s++; 1230 bgr = *s++;
1263 #if HAVE_BIGENDIAN 1231 #if HAVE_BIGENDIAN
1264 *d++ = 255; 1232 *d++ = 255;
1265 *d++ = (bgr&0x7C00)>>7; 1233 *d++ = (bgr&0x7C00)>>7;
1286 #if HAVE_MMX 1254 #if HAVE_MMX
1287 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); 1255 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory");
1288 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); 1256 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory");
1289 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); 1257 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory");
1290 mm_end = end - 3; 1258 mm_end = end - 3;
1291 while (s < mm_end) 1259 while (s < mm_end) {
1292 {
1293 __asm__ volatile( 1260 __asm__ volatile(
1294 PREFETCH" 32%1 \n\t" 1261 PREFETCH" 32%1 \n\t"
1295 "movq %1, %%mm0 \n\t" 1262 "movq %1, %%mm0 \n\t"
1296 "movq %1, %%mm1 \n\t" 1263 "movq %1, %%mm1 \n\t"
1297 "movq %1, %%mm2 \n\t" 1264 "movq %1, %%mm2 \n\t"
1309 s += 4; 1276 s += 4;
1310 } 1277 }
1311 __asm__ volatile(SFENCE:::"memory"); 1278 __asm__ volatile(SFENCE:::"memory");
1312 __asm__ volatile(EMMS:::"memory"); 1279 __asm__ volatile(EMMS:::"memory");
1313 #endif 1280 #endif
1314 while (s < end) 1281 while (s < end) {
1315 {
1316 register uint16_t bgr; 1282 register uint16_t bgr;
1317 bgr = *s++; 1283 bgr = *s++;
1318 #if HAVE_BIGENDIAN 1284 #if HAVE_BIGENDIAN
1319 *d++ = 255; 1285 *d++ = 255;
1320 *d++ = (bgr&0xF800)>>8; 1286 *d++ = (bgr&0xF800)>>8;
1451 dst+= src_size; 1417 dst+= src_size;
1452 src_size= 23-mmx_size; 1418 src_size= 23-mmx_size;
1453 src-= src_size; 1419 src-= src_size;
1454 dst-= src_size; 1420 dst-= src_size;
1455 #endif 1421 #endif
1456 for (i=0; i<src_size; i+=3) 1422 for (i=0; i<src_size; i+=3) {
1457 {
1458 register uint8_t x; 1423 register uint8_t x;
1459 x = src[i + 2]; 1424 x = src[i + 2];
1460 dst[i + 1] = src[i + 1]; 1425 dst[i + 1] = src[i + 1];
1461 dst[i + 2] = src[i + 0]; 1426 dst[i + 2] = src[i + 0];
1462 dst[i + 0] = x; 1427 dst[i + 0] = x;
1467 long width, long height, 1432 long width, long height,
1468 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) 1433 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
1469 { 1434 {
1470 long y; 1435 long y;
1471 const x86_reg chromWidth= width>>1; 1436 const x86_reg chromWidth= width>>1;
1472 for (y=0; y<height; y++) 1437 for (y=0; y<height; y++) {
1473 {
1474 #if HAVE_MMX 1438 #if HAVE_MMX
1475 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) 1439 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1476 __asm__ volatile( 1440 __asm__ volatile(
1477 "xor %%"REG_a", %%"REG_a" \n\t" 1441 "xor %%"REG_a", %%"REG_a" \n\t"
1478 ASMALIGN(4) 1442 ASMALIGN(4)
1528 uint64_t *qdst = (uint64_t *) dst; 1492 uint64_t *qdst = (uint64_t *) dst;
1529 uint64_t *qdst2 = (uint64_t *) (dst + dstStride); 1493 uint64_t *qdst2 = (uint64_t *) (dst + dstStride);
1530 const uint32_t *yc = (uint32_t *) ysrc; 1494 const uint32_t *yc = (uint32_t *) ysrc;
1531 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); 1495 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride);
1532 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; 1496 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc;
1533 for (i = 0; i < chromWidth; i += 8){ 1497 for (i = 0; i < chromWidth; i += 8) {
1534 uint64_t y1, y2, yuv1, yuv2; 1498 uint64_t y1, y2, yuv1, yuv2;
1535 uint64_t u, v; 1499 uint64_t u, v;
1536 /* Prefetch */ 1500 /* Prefetch */
1537 __asm__("ldq $31,64(%0)" :: "r"(yc)); 1501 __asm__("ldq $31,64(%0)" :: "r"(yc));
1538 __asm__("ldq $31,64(%0)" :: "r"(yc2)); 1502 __asm__("ldq $31,64(%0)" :: "r"(yc2));
1557 1521
1558 #elif HAVE_FAST_64BIT 1522 #elif HAVE_FAST_64BIT
1559 int i; 1523 int i;
1560 uint64_t *ldst = (uint64_t *) dst; 1524 uint64_t *ldst = (uint64_t *) dst;
1561 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; 1525 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1562 for (i = 0; i < chromWidth; i += 2){ 1526 for (i = 0; i < chromWidth; i += 2) {
1563 uint64_t k, l; 1527 uint64_t k, l;
1564 k = yc[0] + (uc[0] << 8) + 1528 k = yc[0] + (uc[0] << 8) +
1565 (yc[1] << 16) + (vc[0] << 24); 1529 (yc[1] << 16) + (vc[0] << 24);
1566 l = yc[2] + (uc[1] << 8) + 1530 l = yc[2] + (uc[1] << 8) +
1567 (yc[3] << 16) + (vc[1] << 24); 1531 (yc[3] << 16) + (vc[1] << 24);
1572 } 1536 }
1573 1537
1574 #else 1538 #else
1575 int i, *idst = (int32_t *) dst; 1539 int i, *idst = (int32_t *) dst;
1576 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; 1540 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1577 for (i = 0; i < chromWidth; i++){ 1541 for (i = 0; i < chromWidth; i++) {
1578 #if HAVE_BIGENDIAN 1542 #if HAVE_BIGENDIAN
1579 *idst++ = (yc[0] << 24)+ (uc[0] << 16) + 1543 *idst++ = (yc[0] << 24)+ (uc[0] << 16) +
1580 (yc[1] << 8) + (vc[0] << 0); 1544 (yc[1] << 8) + (vc[0] << 0);
1581 #else 1545 #else
1582 *idst++ = yc[0] + (uc[0] << 8) + 1546 *idst++ = yc[0] + (uc[0] << 8) +
1586 uc++; 1550 uc++;
1587 vc++; 1551 vc++;
1588 } 1552 }
1589 #endif 1553 #endif
1590 #endif 1554 #endif
1591 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) 1555 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
1592 {
1593 usrc += chromStride; 1556 usrc += chromStride;
1594 vsrc += chromStride; 1557 vsrc += chromStride;
1595 } 1558 }
1596 ysrc += lumStride; 1559 ysrc += lumStride;
1597 dst += dstStride; 1560 dst += dstStride;
1619 long width, long height, 1582 long width, long height,
1620 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) 1583 long lumStride, long chromStride, long dstStride, long vertLumPerChroma)
1621 { 1584 {
1622 long y; 1585 long y;
1623 const x86_reg chromWidth= width>>1; 1586 const x86_reg chromWidth= width>>1;
1624 for (y=0; y<height; y++) 1587 for (y=0; y<height; y++) {
1625 {
1626 #if HAVE_MMX 1588 #if HAVE_MMX
1627 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) 1589 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway)
1628 __asm__ volatile( 1590 __asm__ volatile(
1629 "xor %%"REG_a", %%"REG_a" \n\t" 1591 "xor %%"REG_a", %%"REG_a" \n\t"
1630 ASMALIGN(4) 1592 ASMALIGN(4)
1663 1625
1664 #if HAVE_FAST_64BIT 1626 #if HAVE_FAST_64BIT
1665 int i; 1627 int i;
1666 uint64_t *ldst = (uint64_t *) dst; 1628 uint64_t *ldst = (uint64_t *) dst;
1667 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; 1629 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1668 for (i = 0; i < chromWidth; i += 2){ 1630 for (i = 0; i < chromWidth; i += 2) {
1669 uint64_t k, l; 1631 uint64_t k, l;
1670 k = uc[0] + (yc[0] << 8) + 1632 k = uc[0] + (yc[0] << 8) +
1671 (vc[0] << 16) + (yc[1] << 24); 1633 (vc[0] << 16) + (yc[1] << 24);
1672 l = uc[1] + (yc[2] << 8) + 1634 l = uc[1] + (yc[2] << 8) +
1673 (vc[1] << 16) + (yc[3] << 24); 1635 (vc[1] << 16) + (yc[3] << 24);
1678 } 1640 }
1679 1641
1680 #else 1642 #else
1681 int i, *idst = (int32_t *) dst; 1643 int i, *idst = (int32_t *) dst;
1682 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; 1644 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc;
1683 for (i = 0; i < chromWidth; i++){ 1645 for (i = 0; i < chromWidth; i++) {
1684 #if HAVE_BIGENDIAN 1646 #if HAVE_BIGENDIAN
1685 *idst++ = (uc[0] << 24)+ (yc[0] << 16) + 1647 *idst++ = (uc[0] << 24)+ (yc[0] << 16) +
1686 (vc[0] << 8) + (yc[1] << 0); 1648 (vc[0] << 8) + (yc[1] << 0);
1687 #else 1649 #else
1688 *idst++ = uc[0] + (yc[0] << 8) + 1650 *idst++ = uc[0] + (yc[0] << 8) +
1692 uc++; 1654 uc++;
1693 vc++; 1655 vc++;
1694 } 1656 }
1695 #endif 1657 #endif
1696 #endif 1658 #endif
1697 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) 1659 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) {
1698 {
1699 usrc += chromStride; 1660 usrc += chromStride;
1700 vsrc += chromStride; 1661 vsrc += chromStride;
1701 } 1662 }
1702 ysrc += lumStride; 1663 ysrc += lumStride;
1703 dst += dstStride; 1664 dst += dstStride;
1749 long width, long height, 1710 long width, long height,
1750 long lumStride, long chromStride, long srcStride) 1711 long lumStride, long chromStride, long srcStride)
1751 { 1712 {
1752 long y; 1713 long y;
1753 const x86_reg chromWidth= width>>1; 1714 const x86_reg chromWidth= width>>1;
1754 for (y=0; y<height; y+=2) 1715 for (y=0; y<height; y+=2) {
1755 {
1756 #if HAVE_MMX 1716 #if HAVE_MMX
1757 __asm__ volatile( 1717 __asm__ volatile(
1758 "xor %%"REG_a", %%"REG_a" \n\t" 1718 "xor %%"REG_a", %%"REG_a" \n\t"
1759 "pcmpeqw %%mm7, %%mm7 \n\t" 1719 "pcmpeqw %%mm7, %%mm7 \n\t"
1760 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... 1720 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
1835 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) 1795 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
1836 : "memory", "%"REG_a 1796 : "memory", "%"REG_a
1837 ); 1797 );
1838 #else 1798 #else
1839 long i; 1799 long i;
1840 for (i=0; i<chromWidth; i++) 1800 for (i=0; i<chromWidth; i++) {
1841 {
1842 ydst[2*i+0] = src[4*i+0]; 1801 ydst[2*i+0] = src[4*i+0];
1843 udst[i] = src[4*i+1]; 1802 udst[i] = src[4*i+1];
1844 ydst[2*i+1] = src[4*i+2]; 1803 ydst[2*i+1] = src[4*i+2];
1845 vdst[i] = src[4*i+3]; 1804 vdst[i] = src[4*i+3];
1846 } 1805 }
1847 ydst += lumStride; 1806 ydst += lumStride;
1848 src += srcStride; 1807 src += srcStride;
1849 1808
1850 for (i=0; i<chromWidth; i++) 1809 for (i=0; i<chromWidth; i++) {
1851 {
1852 ydst[2*i+0] = src[4*i+0]; 1810 ydst[2*i+0] = src[4*i+0];
1853 ydst[2*i+1] = src[4*i+2]; 1811 ydst[2*i+1] = src[4*i+2];
1854 } 1812 }
1855 #endif 1813 #endif
1856 udst += chromStride; 1814 udst += chromStride;
1880 long x,y; 1838 long x,y;
1881 1839
1882 dst[0]= src[0]; 1840 dst[0]= src[0];
1883 1841
1884 // first line 1842 // first line
1885 for (x=0; x<srcWidth-1; x++){ 1843 for (x=0; x<srcWidth-1; x++) {
1886 dst[2*x+1]= (3*src[x] + src[x+1])>>2; 1844 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
1887 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; 1845 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
1888 } 1846 }
1889 dst[2*srcWidth-1]= src[srcWidth-1]; 1847 dst[2*srcWidth-1]= src[srcWidth-1];
1890 1848
1891 dst+= dstStride; 1849 dst+= dstStride;
1892 1850
1893 for (y=1; y<srcHeight; y++){ 1851 for (y=1; y<srcHeight; y++) {
1894 #if HAVE_MMX2 || HAVE_AMD3DNOW 1852 #if HAVE_MMX2 || HAVE_AMD3DNOW
1895 const x86_reg mmxSize= srcWidth&~15; 1853 const x86_reg mmxSize= srcWidth&~15;
1896 __asm__ volatile( 1854 __asm__ volatile(
1897 "mov %4, %%"REG_a" \n\t" 1855 "mov %4, %%"REG_a" \n\t"
1898 "1: \n\t" 1856 "1: \n\t"
1939 const x86_reg mmxSize=1; 1897 const x86_reg mmxSize=1;
1940 #endif 1898 #endif
1941 dst[0 ]= (3*src[0] + src[srcStride])>>2; 1899 dst[0 ]= (3*src[0] + src[srcStride])>>2;
1942 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; 1900 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2;
1943 1901
1944 for (x=mmxSize-1; x<srcWidth-1; x++){ 1902 for (x=mmxSize-1; x<srcWidth-1; x++) {
1945 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; 1903 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2;
1946 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; 1904 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2;
1947 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; 1905 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2;
1948 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; 1906 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2;
1949 } 1907 }
1956 1914
1957 // last line 1915 // last line
1958 #if 1 1916 #if 1
1959 dst[0]= src[0]; 1917 dst[0]= src[0];
1960 1918
1961 for (x=0; x<srcWidth-1; x++){ 1919 for (x=0; x<srcWidth-1; x++) {
1962 dst[2*x+1]= (3*src[x] + src[x+1])>>2; 1920 dst[2*x+1]= (3*src[x] + src[x+1])>>2;
1963 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; 1921 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2;
1964 } 1922 }
1965 dst[2*srcWidth-1]= src[srcWidth-1]; 1923 dst[2*srcWidth-1]= src[srcWidth-1];
1966 #else 1924 #else
1967 for (x=0; x<srcWidth; x++){ 1925 for (x=0; x<srcWidth; x++) {
1968 dst[2*x+0]= 1926 dst[2*x+0]=
1969 dst[2*x+1]= src[x]; 1927 dst[2*x+1]= src[x];
1970 } 1928 }
1971 #endif 1929 #endif
1972 1930
1987 long width, long height, 1945 long width, long height,
1988 long lumStride, long chromStride, long srcStride) 1946 long lumStride, long chromStride, long srcStride)
1989 { 1947 {
1990 long y; 1948 long y;
1991 const x86_reg chromWidth= width>>1; 1949 const x86_reg chromWidth= width>>1;
1992 for (y=0; y<height; y+=2) 1950 for (y=0; y<height; y+=2) {
1993 {
1994 #if HAVE_MMX 1951 #if HAVE_MMX
1995 __asm__ volatile( 1952 __asm__ volatile(
1996 "xor %%"REG_a", %%"REG_a" \n\t" 1953 "xor %%"REG_a", %%"REG_a" \n\t"
1997 "pcmpeqw %%mm7, %%mm7 \n\t" 1954 "pcmpeqw %%mm7, %%mm7 \n\t"
1998 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... 1955 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
2073 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) 2030 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
2074 : "memory", "%"REG_a 2031 : "memory", "%"REG_a
2075 ); 2032 );
2076 #else 2033 #else
2077 long i; 2034 long i;
2078 for (i=0; i<chromWidth; i++) 2035 for (i=0; i<chromWidth; i++) {
2079 {
2080 udst[i] = src[4*i+0]; 2036 udst[i] = src[4*i+0];
2081 ydst[2*i+0] = src[4*i+1]; 2037 ydst[2*i+0] = src[4*i+1];
2082 vdst[i] = src[4*i+2]; 2038 vdst[i] = src[4*i+2];
2083 ydst[2*i+1] = src[4*i+3]; 2039 ydst[2*i+1] = src[4*i+3];
2084 } 2040 }
2085 ydst += lumStride; 2041 ydst += lumStride;
2086 src += srcStride; 2042 src += srcStride;
2087 2043
2088 for (i=0; i<chromWidth; i++) 2044 for (i=0; i<chromWidth; i++) {
2089 {
2090 ydst[2*i+0] = src[4*i+1]; 2045 ydst[2*i+0] = src[4*i+1];
2091 ydst[2*i+1] = src[4*i+3]; 2046 ydst[2*i+1] = src[4*i+3];
2092 } 2047 }
2093 #endif 2048 #endif
2094 udst += chromStride; 2049 udst += chromStride;
2115 long lumStride, long chromStride, long srcStride) 2070 long lumStride, long chromStride, long srcStride)
2116 { 2071 {
2117 long y; 2072 long y;
2118 const x86_reg chromWidth= width>>1; 2073 const x86_reg chromWidth= width>>1;
2119 #if HAVE_MMX 2074 #if HAVE_MMX
2120 for (y=0; y<height-2; y+=2) 2075 for (y=0; y<height-2; y+=2) {
2121 {
2122 long i; 2076 long i;
2123 for (i=0; i<2; i++) 2077 for (i=0; i<2; i++) {
2124 {
2125 __asm__ volatile( 2078 __asm__ volatile(
2126 "mov %2, %%"REG_a" \n\t" 2079 "mov %2, %%"REG_a" \n\t"
2127 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" 2080 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t"
2128 "movq "MANGLE(ff_w1111)", %%mm5 \n\t" 2081 "movq "MANGLE(ff_w1111)", %%mm5 \n\t"
2129 "pxor %%mm7, %%mm7 \n\t" 2082 "pxor %%mm7, %%mm7 \n\t"
2353 SFENCE" \n\t" 2306 SFENCE" \n\t"
2354 :::"memory"); 2307 :::"memory");
2355 #else 2308 #else
2356 y=0; 2309 y=0;
2357 #endif 2310 #endif
2358 for (; y<height; y+=2) 2311 for (; y<height; y+=2) {
2359 {
2360 long i; 2312 long i;
2361 for (i=0; i<chromWidth; i++) 2313 for (i=0; i<chromWidth; i++) {
2362 {
2363 unsigned int b = src[6*i+0]; 2314 unsigned int b = src[6*i+0];
2364 unsigned int g = src[6*i+1]; 2315 unsigned int g = src[6*i+1];
2365 unsigned int r = src[6*i+2]; 2316 unsigned int r = src[6*i+2];
2366 2317
2367 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; 2318 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
2380 ydst[2*i+1] = Y; 2331 ydst[2*i+1] = Y;
2381 } 2332 }
2382 ydst += lumStride; 2333 ydst += lumStride;
2383 src += srcStride; 2334 src += srcStride;
2384 2335
2385 for (i=0; i<chromWidth; i++) 2336 for (i=0; i<chromWidth; i++) {
2386 {
2387 unsigned int b = src[6*i+0]; 2337 unsigned int b = src[6*i+0];
2388 unsigned int g = src[6*i+1]; 2338 unsigned int g = src[6*i+1];
2389 unsigned int r = src[6*i+2]; 2339 unsigned int r = src[6*i+2];
2390 2340
2391 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; 2341 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
2406 } 2356 }
2407 } 2357 }
2408 2358
2409 static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, 2359 static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest,
2410 long width, long height, long src1Stride, 2360 long width, long height, long src1Stride,
2411 long src2Stride, long dstStride){ 2361 long src2Stride, long dstStride)
2362 {
2412 long h; 2363 long h;
2413 2364
2414 for (h=0; h < height; h++) 2365 for (h=0; h < height; h++) {
2415 {
2416 long w; 2366 long w;
2417 2367
2418 #if HAVE_MMX 2368 #if HAVE_MMX
2419 #if HAVE_SSE2 2369 #if HAVE_SSE2
2420 __asm__( 2370 __asm__(
2460 " jb 1b \n\t" 2410 " jb 1b \n\t"
2461 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) 2411 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15)
2462 : "memory", "%"REG_a 2412 : "memory", "%"REG_a
2463 ); 2413 );
2464 #endif 2414 #endif
2465 for (w= (width&(~15)); w < width; w++) 2415 for (w= (width&(~15)); w < width; w++) {
2466 {
2467 dest[2*w+0] = src1[w]; 2416 dest[2*w+0] = src1[w];
2468 dest[2*w+1] = src2[w]; 2417 dest[2*w+1] = src2[w];
2469 } 2418 }
2470 #else 2419 #else
2471 for (w=0; w < width; w++) 2420 for (w=0; w < width; w++) {
2472 {
2473 dest[2*w+0] = src1[w]; 2421 dest[2*w+0] = src1[w];
2474 dest[2*w+1] = src2[w]; 2422 dest[2*w+1] = src2[w];
2475 } 2423 }
2476 #endif 2424 #endif
2477 dest += dstStride; 2425 dest += dstStride;
2500 __asm__ volatile( 2448 __asm__ volatile(
2501 PREFETCH" %0 \n\t" 2449 PREFETCH" %0 \n\t"
2502 PREFETCH" %1 \n\t" 2450 PREFETCH" %1 \n\t"
2503 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); 2451 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory");
2504 #endif 2452 #endif
2505 for (y=0;y<h;y++){ 2453 for (y=0;y<h;y++) {
2506 const uint8_t* s1=src1+srcStride1*(y>>1); 2454 const uint8_t* s1=src1+srcStride1*(y>>1);
2507 uint8_t* d=dst1+dstStride1*y; 2455 uint8_t* d=dst1+dstStride1*y;
2508 x=0; 2456 x=0;
2509 #if HAVE_MMX 2457 #if HAVE_MMX
2510 for (;x<w-31;x+=32) 2458 for (;x<w-31;x+=32) {
2511 {
2512 __asm__ volatile( 2459 __asm__ volatile(
2513 PREFETCH" 32%1 \n\t" 2460 PREFETCH" 32%1 \n\t"
2514 "movq %1, %%mm0 \n\t" 2461 "movq %1, %%mm0 \n\t"
2515 "movq 8%1, %%mm2 \n\t" 2462 "movq 8%1, %%mm2 \n\t"
2516 "movq 16%1, %%mm4 \n\t" 2463 "movq 16%1, %%mm4 \n\t"
2540 :"memory"); 2487 :"memory");
2541 } 2488 }
2542 #endif 2489 #endif
2543 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; 2490 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x];
2544 } 2491 }
2545 for (y=0;y<h;y++){ 2492 for (y=0;y<h;y++) {
2546 const uint8_t* s2=src2+srcStride2*(y>>1); 2493 const uint8_t* s2=src2+srcStride2*(y>>1);
2547 uint8_t* d=dst2+dstStride2*y; 2494 uint8_t* d=dst2+dstStride2*y;
2548 x=0; 2495 x=0;
2549 #if HAVE_MMX 2496 #if HAVE_MMX
2550 for (;x<w-31;x+=32) 2497 for (;x<w-31;x+=32) {
2551 {
2552 __asm__ volatile( 2498 __asm__ volatile(
2553 PREFETCH" 32%1 \n\t" 2499 PREFETCH" 32%1 \n\t"
2554 "movq %1, %%mm0 \n\t" 2500 "movq %1, %%mm0 \n\t"
2555 "movq 8%1, %%mm2 \n\t" 2501 "movq 8%1, %%mm2 \n\t"
2556 "movq 16%1, %%mm4 \n\t" 2502 "movq 16%1, %%mm4 \n\t"
2598 long srcStride3, long dstStride) 2544 long srcStride3, long dstStride)
2599 { 2545 {
2600 x86_reg x; 2546 x86_reg x;
2601 long y,w,h; 2547 long y,w,h;
2602 w=width/2; h=height; 2548 w=width/2; h=height;
2603 for (y=0;y<h;y++){ 2549 for (y=0;y<h;y++) {
2604 const uint8_t* yp=src1+srcStride1*y; 2550 const uint8_t* yp=src1+srcStride1*y;
2605 const uint8_t* up=src2+srcStride2*(y>>2); 2551 const uint8_t* up=src2+srcStride2*(y>>2);
2606 const uint8_t* vp=src3+srcStride3*(y>>2); 2552 const uint8_t* vp=src3+srcStride3*(y>>2);
2607 uint8_t* d=dst+dstStride*y; 2553 uint8_t* d=dst+dstStride*y;
2608 x=0; 2554 x=0;
2609 #if HAVE_MMX 2555 #if HAVE_MMX
2610 for (;x<w-7;x+=8) 2556 for (;x<w-7;x+=8) {
2611 {
2612 __asm__ volatile( 2557 __asm__ volatile(
2613 PREFETCH" 32(%1, %0) \n\t" 2558 PREFETCH" 32(%1, %0) \n\t"
2614 PREFETCH" 32(%2, %0) \n\t" 2559 PREFETCH" 32(%2, %0) \n\t"
2615 PREFETCH" 32(%3, %0) \n\t" 2560 PREFETCH" 32(%3, %0) \n\t"
2616 "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ 2561 "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */
2659 : "+r" (x) 2604 : "+r" (x)
2660 : "r"(yp), "r" (up), "r"(vp), "r"(d) 2605 : "r"(yp), "r" (up), "r"(vp), "r"(d)
2661 :"memory"); 2606 :"memory");
2662 } 2607 }
2663 #endif 2608 #endif
2664 for (; x<w; x++) 2609 for (; x<w; x++) {
2665 {
2666 const long x2 = x<<2; 2610 const long x2 = x<<2;
2667 d[8*x+0] = yp[x2]; 2611 d[8*x+0] = yp[x2];
2668 d[8*x+1] = up[x]; 2612 d[8*x+1] = up[x];
2669 d[8*x+2] = yp[x2+1]; 2613 d[8*x+2] = yp[x2+1];
2670 d[8*x+3] = vp[x]; 2614 d[8*x+3] = vp[x];
2688 dst += count; 2632 dst += count;
2689 src += 2*count; 2633 src += 2*count;
2690 count= - count; 2634 count= - count;
2691 2635
2692 #if HAVE_MMX 2636 #if HAVE_MMX
2693 if(count <= -16){ 2637 if(count <= -16) {
2694 count += 15; 2638 count += 15;
2695 __asm__ volatile( 2639 __asm__ volatile(
2696 "pcmpeqw %%mm7, %%mm7 \n\t" 2640 "pcmpeqw %%mm7, %%mm7 \n\t"
2697 "psrlw $8, %%mm7 \n\t" 2641 "psrlw $8, %%mm7 \n\t"
2698 "1: \n\t" 2642 "1: \n\t"
2714 : "r"(src), "r"(dst) 2658 : "r"(src), "r"(dst)
2715 ); 2659 );
2716 count -= 15; 2660 count -= 15;
2717 } 2661 }
2718 #endif 2662 #endif
2719 while(count<0){ 2663 while(count<0) {
2720 dst[count]= src[2*count]; 2664 dst[count]= src[2*count];
2721 count++; 2665 count++;
2722 } 2666 }
2723 } 2667 }
2724 2668
2727 dst0+= count; 2671 dst0+= count;
2728 dst1+= count; 2672 dst1+= count;
2729 src += 4*count; 2673 src += 4*count;
2730 count= - count; 2674 count= - count;
2731 #if HAVE_MMX 2675 #if HAVE_MMX
2732 if(count <= -8){ 2676 if(count <= -8) {
2733 count += 7; 2677 count += 7;
2734 __asm__ volatile( 2678 __asm__ volatile(
2735 "pcmpeqw %%mm7, %%mm7 \n\t" 2679 "pcmpeqw %%mm7, %%mm7 \n\t"
2736 "psrlw $8, %%mm7 \n\t" 2680 "psrlw $8, %%mm7 \n\t"
2737 "1: \n\t" 2681 "1: \n\t"
2761 : "r"(src), "r"(dst0), "r"(dst1) 2705 : "r"(src), "r"(dst0), "r"(dst1)
2762 ); 2706 );
2763 count -= 7; 2707 count -= 7;
2764 } 2708 }
2765 #endif 2709 #endif
2766 while(count<0){ 2710 while(count<0) {
2767 dst0[count]= src[4*count+0]; 2711 dst0[count]= src[4*count+0];
2768 dst1[count]= src[4*count+2]; 2712 dst1[count]= src[4*count+2];
2769 count++; 2713 count++;
2770 } 2714 }
2771 } 2715 }
2776 dst1 += count; 2720 dst1 += count;
2777 src0 += 4*count; 2721 src0 += 4*count;
2778 src1 += 4*count; 2722 src1 += 4*count;
2779 count= - count; 2723 count= - count;
2780 #ifdef PAVGB 2724 #ifdef PAVGB
2781 if(count <= -8){ 2725 if(count <= -8) {
2782 count += 7; 2726 count += 7;
2783 __asm__ volatile( 2727 __asm__ volatile(
2784 "pcmpeqw %%mm7, %%mm7 \n\t" 2728 "pcmpeqw %%mm7, %%mm7 \n\t"
2785 "psrlw $8, %%mm7 \n\t" 2729 "psrlw $8, %%mm7 \n\t"
2786 "1: \n\t" 2730 "1: \n\t"
2814 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) 2758 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1)
2815 ); 2759 );
2816 count -= 7; 2760 count -= 7;
2817 } 2761 }
2818 #endif 2762 #endif
2819 while(count<0){ 2763 while(count<0) {
2820 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; 2764 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
2821 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; 2765 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
2822 count++; 2766 count++;
2823 } 2767 }
2824 } 2768 }
2828 dst0+= count; 2772 dst0+= count;
2829 dst1+= count; 2773 dst1+= count;
2830 src += 4*count; 2774 src += 4*count;
2831 count= - count; 2775 count= - count;
2832 #if HAVE_MMX 2776 #if HAVE_MMX
2833 if(count <= -8){ 2777 if(count <= -8) {
2834 count += 7; 2778 count += 7;
2835 __asm__ volatile( 2779 __asm__ volatile(
2836 "pcmpeqw %%mm7, %%mm7 \n\t" 2780 "pcmpeqw %%mm7, %%mm7 \n\t"
2837 "psrlw $8, %%mm7 \n\t" 2781 "psrlw $8, %%mm7 \n\t"
2838 "1: \n\t" 2782 "1: \n\t"
2863 ); 2807 );
2864 count -= 7; 2808 count -= 7;
2865 } 2809 }
2866 #endif 2810 #endif
2867 src++; 2811 src++;
2868 while(count<0){ 2812 while(count<0) {
2869 dst0[count]= src[4*count+0]; 2813 dst0[count]= src[4*count+0];
2870 dst1[count]= src[4*count+2]; 2814 dst1[count]= src[4*count+2];
2871 count++; 2815 count++;
2872 } 2816 }
2873 } 2817 }
2878 dst1 += count; 2822 dst1 += count;
2879 src0 += 4*count; 2823 src0 += 4*count;
2880 src1 += 4*count; 2824 src1 += 4*count;
2881 count= - count; 2825 count= - count;
2882 #ifdef PAVGB 2826 #ifdef PAVGB
2883 if(count <= -8){ 2827 if(count <= -8) {
2884 count += 7; 2828 count += 7;
2885 __asm__ volatile( 2829 __asm__ volatile(
2886 "pcmpeqw %%mm7, %%mm7 \n\t" 2830 "pcmpeqw %%mm7, %%mm7 \n\t"
2887 "psrlw $8, %%mm7 \n\t" 2831 "psrlw $8, %%mm7 \n\t"
2888 "1: \n\t" 2832 "1: \n\t"
2918 count -= 7; 2862 count -= 7;
2919 } 2863 }
2920 #endif 2864 #endif
2921 src0++; 2865 src0++;
2922 src1++; 2866 src1++;
2923 while(count<0){ 2867 while(count<0) {
2924 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; 2868 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1;
2925 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; 2869 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1;
2926 count++; 2870 count++;
2927 } 2871 }
2928 } 2872 }
2932 long lumStride, long chromStride, long srcStride) 2876 long lumStride, long chromStride, long srcStride)
2933 { 2877 {
2934 long y; 2878 long y;
2935 const long chromWidth= -((-width)>>1); 2879 const long chromWidth= -((-width)>>1);
2936 2880
2937 for (y=0; y<height; y++){ 2881 for (y=0; y<height; y++) {
2938 RENAME(extract_even)(src, ydst, width); 2882 RENAME(extract_even)(src, ydst, width);
2939 if(y&1){ 2883 if(y&1) {
2940 RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth); 2884 RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth);
2941 udst+= chromStride; 2885 udst+= chromStride;
2942 vdst+= chromStride; 2886 vdst+= chromStride;
2943 } 2887 }
2944 2888
2959 long lumStride, long chromStride, long srcStride) 2903 long lumStride, long chromStride, long srcStride)
2960 { 2904 {
2961 long y; 2905 long y;
2962 const long chromWidth= -((-width)>>1); 2906 const long chromWidth= -((-width)>>1);
2963 2907
2964 for (y=0; y<height; y++){ 2908 for (y=0; y<height; y++) {
2965 RENAME(extract_even)(src, ydst, width); 2909 RENAME(extract_even)(src, ydst, width);
2966 RENAME(extract_odd2)(src, udst, vdst, chromWidth); 2910 RENAME(extract_odd2)(src, udst, vdst, chromWidth);
2967 2911
2968 src += srcStride; 2912 src += srcStride;
2969 ydst+= lumStride; 2913 ydst+= lumStride;
2984 long lumStride, long chromStride, long srcStride) 2928 long lumStride, long chromStride, long srcStride)
2985 { 2929 {
2986 long y; 2930 long y;
2987 const long chromWidth= -((-width)>>1); 2931 const long chromWidth= -((-width)>>1);
2988 2932
2989 for (y=0; y<height; y++){ 2933 for (y=0; y<height; y++) {
2990 RENAME(extract_even)(src+1, ydst, width); 2934 RENAME(extract_even)(src+1, ydst, width);
2991 if(y&1){ 2935 if(y&1) {
2992 RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); 2936 RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth);
2993 udst+= chromStride; 2937 udst+= chromStride;
2994 vdst+= chromStride; 2938 vdst+= chromStride;
2995 } 2939 }
2996 2940
3011 long lumStride, long chromStride, long srcStride) 2955 long lumStride, long chromStride, long srcStride)
3012 { 2956 {
3013 long y; 2957 long y;
3014 const long chromWidth= -((-width)>>1); 2958 const long chromWidth= -((-width)>>1);
3015 2959
3016 for (y=0; y<height; y++){ 2960 for (y=0; y<height; y++) {
3017 RENAME(extract_even)(src+1, ydst, width); 2961 RENAME(extract_even)(src+1, ydst, width);
3018 RENAME(extract_even2)(src, udst, vdst, chromWidth); 2962 RENAME(extract_even2)(src, udst, vdst, chromWidth);
3019 2963
3020 src += srcStride; 2964 src += srcStride;
3021 ydst+= lumStride; 2965 ydst+= lumStride;
3029 ::: "memory" 2973 ::: "memory"
3030 ); 2974 );
3031 #endif 2975 #endif
3032 } 2976 }
3033 2977
3034 static inline void RENAME(rgb2rgb_init)(void){ 2978 static inline void RENAME(rgb2rgb_init)(void)
2979 {
3035 rgb15to16 = RENAME(rgb15to16); 2980 rgb15to16 = RENAME(rgb15to16);
3036 rgb15tobgr24 = RENAME(rgb15tobgr24); 2981 rgb15tobgr24 = RENAME(rgb15tobgr24);
3037 rgb15to32 = RENAME(rgb15to32); 2982 rgb15to32 = RENAME(rgb15to32);
3038 rgb16tobgr24 = RENAME(rgb16tobgr24); 2983 rgb16tobgr24 = RENAME(rgb16tobgr24);
3039 rgb16to32 = RENAME(rgb16to32); 2984 rgb16to32 = RENAME(rgb16to32);