Mercurial > mplayer.hg
comparison libswscale/rgb2rgb_template.c @ 29481:c080f1f5c07e
Cosmetics:
- Place curly brackets in the same line as while/for/if/switch/else/do;
- Place curly brackets at column 0 in the next line starting a function.
author | ramiro |
---|---|
date | Sun, 16 Aug 2009 21:11:28 +0000 |
parents | a4d8dee13834 |
children | 01b933e5d04c |
comparison
equal
deleted
inserted
replaced
29480:a4d8dee13834 | 29481:c080f1f5c07e |
---|---|
82 end = s + src_size; | 82 end = s + src_size; |
83 #if HAVE_MMX | 83 #if HAVE_MMX |
84 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); | 84 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
85 mm_end = end - 23; | 85 mm_end = end - 23; |
86 __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); | 86 __asm__ volatile("movq %0, %%mm7"::"m"(mask32a):"memory"); |
87 while (s < mm_end) | 87 while (s < mm_end) { |
88 { | |
89 __asm__ volatile( | 88 __asm__ volatile( |
90 PREFETCH" 32%1 \n\t" | 89 PREFETCH" 32%1 \n\t" |
91 "movd %1, %%mm0 \n\t" | 90 "movd %1, %%mm0 \n\t" |
92 "punpckldq 3%1, %%mm0 \n\t" | 91 "punpckldq 3%1, %%mm0 \n\t" |
93 "movd 6%1, %%mm1 \n\t" | 92 "movd 6%1, %%mm1 \n\t" |
111 s += 24; | 110 s += 24; |
112 } | 111 } |
113 __asm__ volatile(SFENCE:::"memory"); | 112 __asm__ volatile(SFENCE:::"memory"); |
114 __asm__ volatile(EMMS:::"memory"); | 113 __asm__ volatile(EMMS:::"memory"); |
115 #endif | 114 #endif |
116 while (s < end) | 115 while (s < end) { |
117 { | |
118 #if HAVE_BIGENDIAN | 116 #if HAVE_BIGENDIAN |
119 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ | 117 /* RGB24 (= R,G,B) -> RGB32 (= A,B,G,R) */ |
120 *dest++ = 255; | 118 *dest++ = 255; |
121 *dest++ = s[2]; | 119 *dest++ = s[2]; |
122 *dest++ = s[1]; | 120 *dest++ = s[1]; |
141 #endif | 139 #endif |
142 end = s + src_size; | 140 end = s + src_size; |
143 #if HAVE_MMX | 141 #if HAVE_MMX |
144 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); | 142 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
145 mm_end = end - 31; | 143 mm_end = end - 31; |
146 while (s < mm_end) | 144 while (s < mm_end) { |
147 { | |
148 __asm__ volatile( | 145 __asm__ volatile( |
149 PREFETCH" 32%1 \n\t" | 146 PREFETCH" 32%1 \n\t" |
150 "movq %1, %%mm0 \n\t" | 147 "movq %1, %%mm0 \n\t" |
151 "movq 8%1, %%mm1 \n\t" | 148 "movq 8%1, %%mm1 \n\t" |
152 "movq 16%1, %%mm4 \n\t" | 149 "movq 16%1, %%mm4 \n\t" |
197 s += 32; | 194 s += 32; |
198 } | 195 } |
199 __asm__ volatile(SFENCE:::"memory"); | 196 __asm__ volatile(SFENCE:::"memory"); |
200 __asm__ volatile(EMMS:::"memory"); | 197 __asm__ volatile(EMMS:::"memory"); |
201 #endif | 198 #endif |
202 while (s < end) | 199 while (s < end) { |
203 { | |
204 #if HAVE_BIGENDIAN | 200 #if HAVE_BIGENDIAN |
205 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ | 201 /* RGB32 (= A,B,G,R) -> RGB24 (= R,G,B) */ |
206 s++; | 202 s++; |
207 dest[2] = *s++; | 203 dest[2] = *s++; |
208 dest[1] = *s++; | 204 dest[1] = *s++; |
232 end = s + src_size; | 228 end = s + src_size; |
233 #if HAVE_MMX | 229 #if HAVE_MMX |
234 __asm__ volatile(PREFETCH" %0"::"m"(*s)); | 230 __asm__ volatile(PREFETCH" %0"::"m"(*s)); |
235 __asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); | 231 __asm__ volatile("movq %0, %%mm4"::"m"(mask15s)); |
236 mm_end = end - 15; | 232 mm_end = end - 15; |
237 while (s<mm_end) | 233 while (s<mm_end) { |
238 { | |
239 __asm__ volatile( | 234 __asm__ volatile( |
240 PREFETCH" 32%1 \n\t" | 235 PREFETCH" 32%1 \n\t" |
241 "movq %1, %%mm0 \n\t" | 236 "movq %1, %%mm0 \n\t" |
242 "movq 8%1, %%mm2 \n\t" | 237 "movq 8%1, %%mm2 \n\t" |
243 "movq %%mm0, %%mm1 \n\t" | 238 "movq %%mm0, %%mm1 \n\t" |
256 } | 251 } |
257 __asm__ volatile(SFENCE:::"memory"); | 252 __asm__ volatile(SFENCE:::"memory"); |
258 __asm__ volatile(EMMS:::"memory"); | 253 __asm__ volatile(EMMS:::"memory"); |
259 #endif | 254 #endif |
260 mm_end = end - 3; | 255 mm_end = end - 3; |
261 while (s < mm_end) | 256 while (s < mm_end) { |
262 { | |
263 register unsigned x= *((const uint32_t *)s); | 257 register unsigned x= *((const uint32_t *)s); |
264 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); | 258 *((uint32_t *)d) = (x&0x7FFF7FFF) + (x&0x7FE07FE0); |
265 d+=4; | 259 d+=4; |
266 s+=4; | 260 s+=4; |
267 } | 261 } |
268 if (s < end) | 262 if (s < end) { |
269 { | |
270 register unsigned short x= *((const uint16_t *)s); | 263 register unsigned short x= *((const uint16_t *)s); |
271 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); | 264 *((uint16_t *)d) = (x&0x7FFF) + (x&0x7FE0); |
272 } | 265 } |
273 } | 266 } |
274 | 267 |
282 #if HAVE_MMX | 275 #if HAVE_MMX |
283 __asm__ volatile(PREFETCH" %0"::"m"(*s)); | 276 __asm__ volatile(PREFETCH" %0"::"m"(*s)); |
284 __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); | 277 __asm__ volatile("movq %0, %%mm7"::"m"(mask15rg)); |
285 __asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); | 278 __asm__ volatile("movq %0, %%mm6"::"m"(mask15b)); |
286 mm_end = end - 15; | 279 mm_end = end - 15; |
287 while (s<mm_end) | 280 while (s<mm_end) { |
288 { | |
289 __asm__ volatile( | 281 __asm__ volatile( |
290 PREFETCH" 32%1 \n\t" | 282 PREFETCH" 32%1 \n\t" |
291 "movq %1, %%mm0 \n\t" | 283 "movq %1, %%mm0 \n\t" |
292 "movq 8%1, %%mm2 \n\t" | 284 "movq 8%1, %%mm2 \n\t" |
293 "movq %%mm0, %%mm1 \n\t" | 285 "movq %%mm0, %%mm1 \n\t" |
310 } | 302 } |
311 __asm__ volatile(SFENCE:::"memory"); | 303 __asm__ volatile(SFENCE:::"memory"); |
312 __asm__ volatile(EMMS:::"memory"); | 304 __asm__ volatile(EMMS:::"memory"); |
313 #endif | 305 #endif |
314 mm_end = end - 3; | 306 mm_end = end - 3; |
315 while (s < mm_end) | 307 while (s < mm_end) { |
316 { | |
317 register uint32_t x= *((const uint32_t*)s); | 308 register uint32_t x= *((const uint32_t*)s); |
318 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); | 309 *((uint32_t *)d) = ((x>>1)&0x7FE07FE0) | (x&0x001F001F); |
319 s+=4; | 310 s+=4; |
320 d+=4; | 311 d+=4; |
321 } | 312 } |
322 if (s < end) | 313 if (s < end) { |
323 { | |
324 register uint16_t x= *((const uint16_t*)s); | 314 register uint16_t x= *((const uint16_t*)s); |
325 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); | 315 *((uint16_t *)d) = ((x>>1)&0x7FE0) | (x&0x001F); |
326 } | 316 } |
327 } | 317 } |
328 | 318 |
376 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); | 366 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
377 __asm__ volatile( | 367 __asm__ volatile( |
378 "movq %0, %%mm7 \n\t" | 368 "movq %0, %%mm7 \n\t" |
379 "movq %1, %%mm6 \n\t" | 369 "movq %1, %%mm6 \n\t" |
380 ::"m"(red_16mask),"m"(green_16mask)); | 370 ::"m"(red_16mask),"m"(green_16mask)); |
381 while (s < mm_end) | 371 while (s < mm_end) { |
382 { | |
383 __asm__ volatile( | 372 __asm__ volatile( |
384 PREFETCH" 32%1 \n\t" | 373 PREFETCH" 32%1 \n\t" |
385 "movd %1, %%mm0 \n\t" | 374 "movd %1, %%mm0 \n\t" |
386 "movd 4%1, %%mm3 \n\t" | 375 "movd 4%1, %%mm3 \n\t" |
387 "punpckldq 8%1, %%mm0 \n\t" | 376 "punpckldq 8%1, %%mm0 \n\t" |
415 } | 404 } |
416 #endif | 405 #endif |
417 __asm__ volatile(SFENCE:::"memory"); | 406 __asm__ volatile(SFENCE:::"memory"); |
418 __asm__ volatile(EMMS:::"memory"); | 407 __asm__ volatile(EMMS:::"memory"); |
419 #endif | 408 #endif |
420 while (s < end) | 409 while (s < end) { |
421 { | |
422 register int rgb = *(const uint32_t*)s; s += 4; | 410 register int rgb = *(const uint32_t*)s; s += 4; |
423 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); | 411 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>8); |
424 } | 412 } |
425 } | 413 } |
426 | 414 |
438 __asm__ volatile( | 426 __asm__ volatile( |
439 "movq %0, %%mm7 \n\t" | 427 "movq %0, %%mm7 \n\t" |
440 "movq %1, %%mm6 \n\t" | 428 "movq %1, %%mm6 \n\t" |
441 ::"m"(red_16mask),"m"(green_16mask)); | 429 ::"m"(red_16mask),"m"(green_16mask)); |
442 mm_end = end - 15; | 430 mm_end = end - 15; |
443 while (s < mm_end) | 431 while (s < mm_end) { |
444 { | |
445 __asm__ volatile( | 432 __asm__ volatile( |
446 PREFETCH" 32%1 \n\t" | 433 PREFETCH" 32%1 \n\t" |
447 "movd %1, %%mm0 \n\t" | 434 "movd %1, %%mm0 \n\t" |
448 "movd 4%1, %%mm3 \n\t" | 435 "movd 4%1, %%mm3 \n\t" |
449 "punpckldq 8%1, %%mm0 \n\t" | 436 "punpckldq 8%1, %%mm0 \n\t" |
476 s += 16; | 463 s += 16; |
477 } | 464 } |
478 __asm__ volatile(SFENCE:::"memory"); | 465 __asm__ volatile(SFENCE:::"memory"); |
479 __asm__ volatile(EMMS:::"memory"); | 466 __asm__ volatile(EMMS:::"memory"); |
480 #endif | 467 #endif |
481 while (s < end) | 468 while (s < end) { |
482 { | |
483 register int rgb = *(const uint32_t*)s; s += 4; | 469 register int rgb = *(const uint32_t*)s; s += 4; |
484 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); | 470 *d++ = ((rgb&0xF8)<<8) + ((rgb&0xFC00)>>5) + ((rgb&0xF80000)>>19); |
485 } | 471 } |
486 } | 472 } |
487 | 473 |
535 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); | 521 __asm__ volatile(PREFETCH" %0"::"m"(*src):"memory"); |
536 __asm__ volatile( | 522 __asm__ volatile( |
537 "movq %0, %%mm7 \n\t" | 523 "movq %0, %%mm7 \n\t" |
538 "movq %1, %%mm6 \n\t" | 524 "movq %1, %%mm6 \n\t" |
539 ::"m"(red_15mask),"m"(green_15mask)); | 525 ::"m"(red_15mask),"m"(green_15mask)); |
540 while (s < mm_end) | 526 while (s < mm_end) { |
541 { | |
542 __asm__ volatile( | 527 __asm__ volatile( |
543 PREFETCH" 32%1 \n\t" | 528 PREFETCH" 32%1 \n\t" |
544 "movd %1, %%mm0 \n\t" | 529 "movd %1, %%mm0 \n\t" |
545 "movd 4%1, %%mm3 \n\t" | 530 "movd 4%1, %%mm3 \n\t" |
546 "punpckldq 8%1, %%mm0 \n\t" | 531 "punpckldq 8%1, %%mm0 \n\t" |
574 } | 559 } |
575 #endif | 560 #endif |
576 __asm__ volatile(SFENCE:::"memory"); | 561 __asm__ volatile(SFENCE:::"memory"); |
577 __asm__ volatile(EMMS:::"memory"); | 562 __asm__ volatile(EMMS:::"memory"); |
578 #endif | 563 #endif |
579 while (s < end) | 564 while (s < end) { |
580 { | |
581 register int rgb = *(const uint32_t*)s; s += 4; | 565 register int rgb = *(const uint32_t*)s; s += 4; |
582 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); | 566 *d++ = ((rgb&0xFF)>>3) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>9); |
583 } | 567 } |
584 } | 568 } |
585 | 569 |
597 __asm__ volatile( | 581 __asm__ volatile( |
598 "movq %0, %%mm7 \n\t" | 582 "movq %0, %%mm7 \n\t" |
599 "movq %1, %%mm6 \n\t" | 583 "movq %1, %%mm6 \n\t" |
600 ::"m"(red_15mask),"m"(green_15mask)); | 584 ::"m"(red_15mask),"m"(green_15mask)); |
601 mm_end = end - 15; | 585 mm_end = end - 15; |
602 while (s < mm_end) | 586 while (s < mm_end) { |
603 { | |
604 __asm__ volatile( | 587 __asm__ volatile( |
605 PREFETCH" 32%1 \n\t" | 588 PREFETCH" 32%1 \n\t" |
606 "movd %1, %%mm0 \n\t" | 589 "movd %1, %%mm0 \n\t" |
607 "movd 4%1, %%mm3 \n\t" | 590 "movd 4%1, %%mm3 \n\t" |
608 "punpckldq 8%1, %%mm0 \n\t" | 591 "punpckldq 8%1, %%mm0 \n\t" |
635 s += 16; | 618 s += 16; |
636 } | 619 } |
637 __asm__ volatile(SFENCE:::"memory"); | 620 __asm__ volatile(SFENCE:::"memory"); |
638 __asm__ volatile(EMMS:::"memory"); | 621 __asm__ volatile(EMMS:::"memory"); |
639 #endif | 622 #endif |
640 while (s < end) | 623 while (s < end) { |
641 { | |
642 register int rgb = *(const uint32_t*)s; s += 4; | 624 register int rgb = *(const uint32_t*)s; s += 4; |
643 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); | 625 *d++ = ((rgb&0xF8)<<7) + ((rgb&0xF800)>>6) + ((rgb&0xF80000)>>19); |
644 } | 626 } |
645 } | 627 } |
646 | 628 |
658 __asm__ volatile( | 640 __asm__ volatile( |
659 "movq %0, %%mm7 \n\t" | 641 "movq %0, %%mm7 \n\t" |
660 "movq %1, %%mm6 \n\t" | 642 "movq %1, %%mm6 \n\t" |
661 ::"m"(red_16mask),"m"(green_16mask)); | 643 ::"m"(red_16mask),"m"(green_16mask)); |
662 mm_end = end - 11; | 644 mm_end = end - 11; |
663 while (s < mm_end) | 645 while (s < mm_end) { |
664 { | |
665 __asm__ volatile( | 646 __asm__ volatile( |
666 PREFETCH" 32%1 \n\t" | 647 PREFETCH" 32%1 \n\t" |
667 "movd %1, %%mm0 \n\t" | 648 "movd %1, %%mm0 \n\t" |
668 "movd 3%1, %%mm3 \n\t" | 649 "movd 3%1, %%mm3 \n\t" |
669 "punpckldq 6%1, %%mm0 \n\t" | 650 "punpckldq 6%1, %%mm0 \n\t" |
696 s += 12; | 677 s += 12; |
697 } | 678 } |
698 __asm__ volatile(SFENCE:::"memory"); | 679 __asm__ volatile(SFENCE:::"memory"); |
699 __asm__ volatile(EMMS:::"memory"); | 680 __asm__ volatile(EMMS:::"memory"); |
700 #endif | 681 #endif |
701 while (s < end) | 682 while (s < end) { |
702 { | |
703 const int b = *s++; | 683 const int b = *s++; |
704 const int g = *s++; | 684 const int g = *s++; |
705 const int r = *s++; | 685 const int r = *s++; |
706 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | 686 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
707 } | 687 } |
721 __asm__ volatile( | 701 __asm__ volatile( |
722 "movq %0, %%mm7 \n\t" | 702 "movq %0, %%mm7 \n\t" |
723 "movq %1, %%mm6 \n\t" | 703 "movq %1, %%mm6 \n\t" |
724 ::"m"(red_16mask),"m"(green_16mask)); | 704 ::"m"(red_16mask),"m"(green_16mask)); |
725 mm_end = end - 15; | 705 mm_end = end - 15; |
726 while (s < mm_end) | 706 while (s < mm_end) { |
727 { | |
728 __asm__ volatile( | 707 __asm__ volatile( |
729 PREFETCH" 32%1 \n\t" | 708 PREFETCH" 32%1 \n\t" |
730 "movd %1, %%mm0 \n\t" | 709 "movd %1, %%mm0 \n\t" |
731 "movd 3%1, %%mm3 \n\t" | 710 "movd 3%1, %%mm3 \n\t" |
732 "punpckldq 6%1, %%mm0 \n\t" | 711 "punpckldq 6%1, %%mm0 \n\t" |
759 s += 12; | 738 s += 12; |
760 } | 739 } |
761 __asm__ volatile(SFENCE:::"memory"); | 740 __asm__ volatile(SFENCE:::"memory"); |
762 __asm__ volatile(EMMS:::"memory"); | 741 __asm__ volatile(EMMS:::"memory"); |
763 #endif | 742 #endif |
764 while (s < end) | 743 while (s < end) { |
765 { | |
766 const int r = *s++; | 744 const int r = *s++; |
767 const int g = *s++; | 745 const int g = *s++; |
768 const int b = *s++; | 746 const int b = *s++; |
769 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); | 747 *d++ = (b>>3) | ((g&0xFC)<<3) | ((r&0xF8)<<8); |
770 } | 748 } |
784 __asm__ volatile( | 762 __asm__ volatile( |
785 "movq %0, %%mm7 \n\t" | 763 "movq %0, %%mm7 \n\t" |
786 "movq %1, %%mm6 \n\t" | 764 "movq %1, %%mm6 \n\t" |
787 ::"m"(red_15mask),"m"(green_15mask)); | 765 ::"m"(red_15mask),"m"(green_15mask)); |
788 mm_end = end - 11; | 766 mm_end = end - 11; |
789 while (s < mm_end) | 767 while (s < mm_end) { |
790 { | |
791 __asm__ volatile( | 768 __asm__ volatile( |
792 PREFETCH" 32%1 \n\t" | 769 PREFETCH" 32%1 \n\t" |
793 "movd %1, %%mm0 \n\t" | 770 "movd %1, %%mm0 \n\t" |
794 "movd 3%1, %%mm3 \n\t" | 771 "movd 3%1, %%mm3 \n\t" |
795 "punpckldq 6%1, %%mm0 \n\t" | 772 "punpckldq 6%1, %%mm0 \n\t" |
822 s += 12; | 799 s += 12; |
823 } | 800 } |
824 __asm__ volatile(SFENCE:::"memory"); | 801 __asm__ volatile(SFENCE:::"memory"); |
825 __asm__ volatile(EMMS:::"memory"); | 802 __asm__ volatile(EMMS:::"memory"); |
826 #endif | 803 #endif |
827 while (s < end) | 804 while (s < end) { |
828 { | |
829 const int b = *s++; | 805 const int b = *s++; |
830 const int g = *s++; | 806 const int g = *s++; |
831 const int r = *s++; | 807 const int r = *s++; |
832 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | 808 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
833 } | 809 } |
847 __asm__ volatile( | 823 __asm__ volatile( |
848 "movq %0, %%mm7 \n\t" | 824 "movq %0, %%mm7 \n\t" |
849 "movq %1, %%mm6 \n\t" | 825 "movq %1, %%mm6 \n\t" |
850 ::"m"(red_15mask),"m"(green_15mask)); | 826 ::"m"(red_15mask),"m"(green_15mask)); |
851 mm_end = end - 15; | 827 mm_end = end - 15; |
852 while (s < mm_end) | 828 while (s < mm_end) { |
853 { | |
854 __asm__ volatile( | 829 __asm__ volatile( |
855 PREFETCH" 32%1 \n\t" | 830 PREFETCH" 32%1 \n\t" |
856 "movd %1, %%mm0 \n\t" | 831 "movd %1, %%mm0 \n\t" |
857 "movd 3%1, %%mm3 \n\t" | 832 "movd 3%1, %%mm3 \n\t" |
858 "punpckldq 6%1, %%mm0 \n\t" | 833 "punpckldq 6%1, %%mm0 \n\t" |
885 s += 12; | 860 s += 12; |
886 } | 861 } |
887 __asm__ volatile(SFENCE:::"memory"); | 862 __asm__ volatile(SFENCE:::"memory"); |
888 __asm__ volatile(EMMS:::"memory"); | 863 __asm__ volatile(EMMS:::"memory"); |
889 #endif | 864 #endif |
890 while (s < end) | 865 while (s < end) { |
891 { | |
892 const int r = *s++; | 866 const int r = *s++; |
893 const int g = *s++; | 867 const int g = *s++; |
894 const int b = *s++; | 868 const int b = *s++; |
895 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); | 869 *d++ = (b>>3) | ((g&0xF8)<<2) | ((r&0xF8)<<7); |
896 } | 870 } |
927 const uint16_t *s = (const uint16_t*)src; | 901 const uint16_t *s = (const uint16_t*)src; |
928 end = s + src_size/2; | 902 end = s + src_size/2; |
929 #if HAVE_MMX | 903 #if HAVE_MMX |
930 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); | 904 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
931 mm_end = end - 7; | 905 mm_end = end - 7; |
932 while (s < mm_end) | 906 while (s < mm_end) { |
933 { | |
934 __asm__ volatile( | 907 __asm__ volatile( |
935 PREFETCH" 32%1 \n\t" | 908 PREFETCH" 32%1 \n\t" |
936 "movq %1, %%mm0 \n\t" | 909 "movq %1, %%mm0 \n\t" |
937 "movq %1, %%mm1 \n\t" | 910 "movq %1, %%mm1 \n\t" |
938 "movq %1, %%mm2 \n\t" | 911 "movq %1, %%mm2 \n\t" |
1047 s += 8; | 1020 s += 8; |
1048 } | 1021 } |
1049 __asm__ volatile(SFENCE:::"memory"); | 1022 __asm__ volatile(SFENCE:::"memory"); |
1050 __asm__ volatile(EMMS:::"memory"); | 1023 __asm__ volatile(EMMS:::"memory"); |
1051 #endif | 1024 #endif |
1052 while (s < end) | 1025 while (s < end) { |
1053 { | |
1054 register uint16_t bgr; | 1026 register uint16_t bgr; |
1055 bgr = *s++; | 1027 bgr = *s++; |
1056 *d++ = (bgr&0x1F)<<3; | 1028 *d++ = (bgr&0x1F)<<3; |
1057 *d++ = (bgr&0x3E0)>>2; | 1029 *d++ = (bgr&0x3E0)>>2; |
1058 *d++ = (bgr&0x7C00)>>7; | 1030 *d++ = (bgr&0x7C00)>>7; |
1069 const uint16_t *s = (const uint16_t *)src; | 1041 const uint16_t *s = (const uint16_t *)src; |
1070 end = s + src_size/2; | 1042 end = s + src_size/2; |
1071 #if HAVE_MMX | 1043 #if HAVE_MMX |
1072 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); | 1044 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
1073 mm_end = end - 7; | 1045 mm_end = end - 7; |
1074 while (s < mm_end) | 1046 while (s < mm_end) { |
1075 { | |
1076 __asm__ volatile( | 1047 __asm__ volatile( |
1077 PREFETCH" 32%1 \n\t" | 1048 PREFETCH" 32%1 \n\t" |
1078 "movq %1, %%mm0 \n\t" | 1049 "movq %1, %%mm0 \n\t" |
1079 "movq %1, %%mm1 \n\t" | 1050 "movq %1, %%mm1 \n\t" |
1080 "movq %1, %%mm2 \n\t" | 1051 "movq %1, %%mm2 \n\t" |
1188 s += 8; | 1159 s += 8; |
1189 } | 1160 } |
1190 __asm__ volatile(SFENCE:::"memory"); | 1161 __asm__ volatile(SFENCE:::"memory"); |
1191 __asm__ volatile(EMMS:::"memory"); | 1162 __asm__ volatile(EMMS:::"memory"); |
1192 #endif | 1163 #endif |
1193 while (s < end) | 1164 while (s < end) { |
1194 { | |
1195 register uint16_t bgr; | 1165 register uint16_t bgr; |
1196 bgr = *s++; | 1166 bgr = *s++; |
1197 *d++ = (bgr&0x1F)<<3; | 1167 *d++ = (bgr&0x1F)<<3; |
1198 *d++ = (bgr&0x7E0)>>3; | 1168 *d++ = (bgr&0x7E0)>>3; |
1199 *d++ = (bgr&0xF800)>>8; | 1169 *d++ = (bgr&0xF800)>>8; |
1231 #if HAVE_MMX | 1201 #if HAVE_MMX |
1232 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); | 1202 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
1233 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); | 1203 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); |
1234 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); | 1204 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); |
1235 mm_end = end - 3; | 1205 mm_end = end - 3; |
1236 while (s < mm_end) | 1206 while (s < mm_end) { |
1237 { | |
1238 __asm__ volatile( | 1207 __asm__ volatile( |
1239 PREFETCH" 32%1 \n\t" | 1208 PREFETCH" 32%1 \n\t" |
1240 "movq %1, %%mm0 \n\t" | 1209 "movq %1, %%mm0 \n\t" |
1241 "movq %1, %%mm1 \n\t" | 1210 "movq %1, %%mm1 \n\t" |
1242 "movq %1, %%mm2 \n\t" | 1211 "movq %1, %%mm2 \n\t" |
1254 s += 4; | 1223 s += 4; |
1255 } | 1224 } |
1256 __asm__ volatile(SFENCE:::"memory"); | 1225 __asm__ volatile(SFENCE:::"memory"); |
1257 __asm__ volatile(EMMS:::"memory"); | 1226 __asm__ volatile(EMMS:::"memory"); |
1258 #endif | 1227 #endif |
1259 while (s < end) | 1228 while (s < end) { |
1260 { | |
1261 register uint16_t bgr; | 1229 register uint16_t bgr; |
1262 bgr = *s++; | 1230 bgr = *s++; |
1263 #if HAVE_BIGENDIAN | 1231 #if HAVE_BIGENDIAN |
1264 *d++ = 255; | 1232 *d++ = 255; |
1265 *d++ = (bgr&0x7C00)>>7; | 1233 *d++ = (bgr&0x7C00)>>7; |
1286 #if HAVE_MMX | 1254 #if HAVE_MMX |
1287 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); | 1255 __asm__ volatile(PREFETCH" %0"::"m"(*s):"memory"); |
1288 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); | 1256 __asm__ volatile("pxor %%mm7,%%mm7 \n\t":::"memory"); |
1289 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); | 1257 __asm__ volatile("pcmpeqd %%mm6,%%mm6 \n\t":::"memory"); |
1290 mm_end = end - 3; | 1258 mm_end = end - 3; |
1291 while (s < mm_end) | 1259 while (s < mm_end) { |
1292 { | |
1293 __asm__ volatile( | 1260 __asm__ volatile( |
1294 PREFETCH" 32%1 \n\t" | 1261 PREFETCH" 32%1 \n\t" |
1295 "movq %1, %%mm0 \n\t" | 1262 "movq %1, %%mm0 \n\t" |
1296 "movq %1, %%mm1 \n\t" | 1263 "movq %1, %%mm1 \n\t" |
1297 "movq %1, %%mm2 \n\t" | 1264 "movq %1, %%mm2 \n\t" |
1309 s += 4; | 1276 s += 4; |
1310 } | 1277 } |
1311 __asm__ volatile(SFENCE:::"memory"); | 1278 __asm__ volatile(SFENCE:::"memory"); |
1312 __asm__ volatile(EMMS:::"memory"); | 1279 __asm__ volatile(EMMS:::"memory"); |
1313 #endif | 1280 #endif |
1314 while (s < end) | 1281 while (s < end) { |
1315 { | |
1316 register uint16_t bgr; | 1282 register uint16_t bgr; |
1317 bgr = *s++; | 1283 bgr = *s++; |
1318 #if HAVE_BIGENDIAN | 1284 #if HAVE_BIGENDIAN |
1319 *d++ = 255; | 1285 *d++ = 255; |
1320 *d++ = (bgr&0xF800)>>8; | 1286 *d++ = (bgr&0xF800)>>8; |
1451 dst+= src_size; | 1417 dst+= src_size; |
1452 src_size= 23-mmx_size; | 1418 src_size= 23-mmx_size; |
1453 src-= src_size; | 1419 src-= src_size; |
1454 dst-= src_size; | 1420 dst-= src_size; |
1455 #endif | 1421 #endif |
1456 for (i=0; i<src_size; i+=3) | 1422 for (i=0; i<src_size; i+=3) { |
1457 { | |
1458 register uint8_t x; | 1423 register uint8_t x; |
1459 x = src[i + 2]; | 1424 x = src[i + 2]; |
1460 dst[i + 1] = src[i + 1]; | 1425 dst[i + 1] = src[i + 1]; |
1461 dst[i + 2] = src[i + 0]; | 1426 dst[i + 2] = src[i + 0]; |
1462 dst[i + 0] = x; | 1427 dst[i + 0] = x; |
1467 long width, long height, | 1432 long width, long height, |
1468 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) | 1433 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) |
1469 { | 1434 { |
1470 long y; | 1435 long y; |
1471 const x86_reg chromWidth= width>>1; | 1436 const x86_reg chromWidth= width>>1; |
1472 for (y=0; y<height; y++) | 1437 for (y=0; y<height; y++) { |
1473 { | |
1474 #if HAVE_MMX | 1438 #if HAVE_MMX |
1475 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) | 1439 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) |
1476 __asm__ volatile( | 1440 __asm__ volatile( |
1477 "xor %%"REG_a", %%"REG_a" \n\t" | 1441 "xor %%"REG_a", %%"REG_a" \n\t" |
1478 ASMALIGN(4) | 1442 ASMALIGN(4) |
1528 uint64_t *qdst = (uint64_t *) dst; | 1492 uint64_t *qdst = (uint64_t *) dst; |
1529 uint64_t *qdst2 = (uint64_t *) (dst + dstStride); | 1493 uint64_t *qdst2 = (uint64_t *) (dst + dstStride); |
1530 const uint32_t *yc = (uint32_t *) ysrc; | 1494 const uint32_t *yc = (uint32_t *) ysrc; |
1531 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); | 1495 const uint32_t *yc2 = (uint32_t *) (ysrc + lumStride); |
1532 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; | 1496 const uint16_t *uc = (uint16_t*) usrc, *vc = (uint16_t*) vsrc; |
1533 for (i = 0; i < chromWidth; i += 8){ | 1497 for (i = 0; i < chromWidth; i += 8) { |
1534 uint64_t y1, y2, yuv1, yuv2; | 1498 uint64_t y1, y2, yuv1, yuv2; |
1535 uint64_t u, v; | 1499 uint64_t u, v; |
1536 /* Prefetch */ | 1500 /* Prefetch */ |
1537 __asm__("ldq $31,64(%0)" :: "r"(yc)); | 1501 __asm__("ldq $31,64(%0)" :: "r"(yc)); |
1538 __asm__("ldq $31,64(%0)" :: "r"(yc2)); | 1502 __asm__("ldq $31,64(%0)" :: "r"(yc2)); |
1557 | 1521 |
1558 #elif HAVE_FAST_64BIT | 1522 #elif HAVE_FAST_64BIT |
1559 int i; | 1523 int i; |
1560 uint64_t *ldst = (uint64_t *) dst; | 1524 uint64_t *ldst = (uint64_t *) dst; |
1561 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | 1525 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
1562 for (i = 0; i < chromWidth; i += 2){ | 1526 for (i = 0; i < chromWidth; i += 2) { |
1563 uint64_t k, l; | 1527 uint64_t k, l; |
1564 k = yc[0] + (uc[0] << 8) + | 1528 k = yc[0] + (uc[0] << 8) + |
1565 (yc[1] << 16) + (vc[0] << 24); | 1529 (yc[1] << 16) + (vc[0] << 24); |
1566 l = yc[2] + (uc[1] << 8) + | 1530 l = yc[2] + (uc[1] << 8) + |
1567 (yc[3] << 16) + (vc[1] << 24); | 1531 (yc[3] << 16) + (vc[1] << 24); |
1572 } | 1536 } |
1573 | 1537 |
1574 #else | 1538 #else |
1575 int i, *idst = (int32_t *) dst; | 1539 int i, *idst = (int32_t *) dst; |
1576 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | 1540 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
1577 for (i = 0; i < chromWidth; i++){ | 1541 for (i = 0; i < chromWidth; i++) { |
1578 #if HAVE_BIGENDIAN | 1542 #if HAVE_BIGENDIAN |
1579 *idst++ = (yc[0] << 24)+ (uc[0] << 16) + | 1543 *idst++ = (yc[0] << 24)+ (uc[0] << 16) + |
1580 (yc[1] << 8) + (vc[0] << 0); | 1544 (yc[1] << 8) + (vc[0] << 0); |
1581 #else | 1545 #else |
1582 *idst++ = yc[0] + (uc[0] << 8) + | 1546 *idst++ = yc[0] + (uc[0] << 8) + |
1586 uc++; | 1550 uc++; |
1587 vc++; | 1551 vc++; |
1588 } | 1552 } |
1589 #endif | 1553 #endif |
1590 #endif | 1554 #endif |
1591 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) | 1555 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { |
1592 { | |
1593 usrc += chromStride; | 1556 usrc += chromStride; |
1594 vsrc += chromStride; | 1557 vsrc += chromStride; |
1595 } | 1558 } |
1596 ysrc += lumStride; | 1559 ysrc += lumStride; |
1597 dst += dstStride; | 1560 dst += dstStride; |
1619 long width, long height, | 1582 long width, long height, |
1620 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) | 1583 long lumStride, long chromStride, long dstStride, long vertLumPerChroma) |
1621 { | 1584 { |
1622 long y; | 1585 long y; |
1623 const x86_reg chromWidth= width>>1; | 1586 const x86_reg chromWidth= width>>1; |
1624 for (y=0; y<height; y++) | 1587 for (y=0; y<height; y++) { |
1625 { | |
1626 #if HAVE_MMX | 1588 #if HAVE_MMX |
1627 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) | 1589 //FIXME handle 2 lines at once (fewer prefetches, reuse some chroma, but very likely memory-limited anyway) |
1628 __asm__ volatile( | 1590 __asm__ volatile( |
1629 "xor %%"REG_a", %%"REG_a" \n\t" | 1591 "xor %%"REG_a", %%"REG_a" \n\t" |
1630 ASMALIGN(4) | 1592 ASMALIGN(4) |
1663 | 1625 |
1664 #if HAVE_FAST_64BIT | 1626 #if HAVE_FAST_64BIT |
1665 int i; | 1627 int i; |
1666 uint64_t *ldst = (uint64_t *) dst; | 1628 uint64_t *ldst = (uint64_t *) dst; |
1667 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | 1629 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
1668 for (i = 0; i < chromWidth; i += 2){ | 1630 for (i = 0; i < chromWidth; i += 2) { |
1669 uint64_t k, l; | 1631 uint64_t k, l; |
1670 k = uc[0] + (yc[0] << 8) + | 1632 k = uc[0] + (yc[0] << 8) + |
1671 (vc[0] << 16) + (yc[1] << 24); | 1633 (vc[0] << 16) + (yc[1] << 24); |
1672 l = uc[1] + (yc[2] << 8) + | 1634 l = uc[1] + (yc[2] << 8) + |
1673 (vc[1] << 16) + (yc[3] << 24); | 1635 (vc[1] << 16) + (yc[3] << 24); |
1678 } | 1640 } |
1679 | 1641 |
1680 #else | 1642 #else |
1681 int i, *idst = (int32_t *) dst; | 1643 int i, *idst = (int32_t *) dst; |
1682 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; | 1644 const uint8_t *yc = ysrc, *uc = usrc, *vc = vsrc; |
1683 for (i = 0; i < chromWidth; i++){ | 1645 for (i = 0; i < chromWidth; i++) { |
1684 #if HAVE_BIGENDIAN | 1646 #if HAVE_BIGENDIAN |
1685 *idst++ = (uc[0] << 24)+ (yc[0] << 16) + | 1647 *idst++ = (uc[0] << 24)+ (yc[0] << 16) + |
1686 (vc[0] << 8) + (yc[1] << 0); | 1648 (vc[0] << 8) + (yc[1] << 0); |
1687 #else | 1649 #else |
1688 *idst++ = uc[0] + (yc[0] << 8) + | 1650 *idst++ = uc[0] + (yc[0] << 8) + |
1692 uc++; | 1654 uc++; |
1693 vc++; | 1655 vc++; |
1694 } | 1656 } |
1695 #endif | 1657 #endif |
1696 #endif | 1658 #endif |
1697 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) | 1659 if ((y&(vertLumPerChroma-1)) == vertLumPerChroma-1) { |
1698 { | |
1699 usrc += chromStride; | 1660 usrc += chromStride; |
1700 vsrc += chromStride; | 1661 vsrc += chromStride; |
1701 } | 1662 } |
1702 ysrc += lumStride; | 1663 ysrc += lumStride; |
1703 dst += dstStride; | 1664 dst += dstStride; |
1749 long width, long height, | 1710 long width, long height, |
1750 long lumStride, long chromStride, long srcStride) | 1711 long lumStride, long chromStride, long srcStride) |
1751 { | 1712 { |
1752 long y; | 1713 long y; |
1753 const x86_reg chromWidth= width>>1; | 1714 const x86_reg chromWidth= width>>1; |
1754 for (y=0; y<height; y+=2) | 1715 for (y=0; y<height; y+=2) { |
1755 { | |
1756 #if HAVE_MMX | 1716 #if HAVE_MMX |
1757 __asm__ volatile( | 1717 __asm__ volatile( |
1758 "xor %%"REG_a", %%"REG_a" \n\t" | 1718 "xor %%"REG_a", %%"REG_a" \n\t" |
1759 "pcmpeqw %%mm7, %%mm7 \n\t" | 1719 "pcmpeqw %%mm7, %%mm7 \n\t" |
1760 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | 1720 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... |
1835 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | 1795 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
1836 : "memory", "%"REG_a | 1796 : "memory", "%"REG_a |
1837 ); | 1797 ); |
1838 #else | 1798 #else |
1839 long i; | 1799 long i; |
1840 for (i=0; i<chromWidth; i++) | 1800 for (i=0; i<chromWidth; i++) { |
1841 { | |
1842 ydst[2*i+0] = src[4*i+0]; | 1801 ydst[2*i+0] = src[4*i+0]; |
1843 udst[i] = src[4*i+1]; | 1802 udst[i] = src[4*i+1]; |
1844 ydst[2*i+1] = src[4*i+2]; | 1803 ydst[2*i+1] = src[4*i+2]; |
1845 vdst[i] = src[4*i+3]; | 1804 vdst[i] = src[4*i+3]; |
1846 } | 1805 } |
1847 ydst += lumStride; | 1806 ydst += lumStride; |
1848 src += srcStride; | 1807 src += srcStride; |
1849 | 1808 |
1850 for (i=0; i<chromWidth; i++) | 1809 for (i=0; i<chromWidth; i++) { |
1851 { | |
1852 ydst[2*i+0] = src[4*i+0]; | 1810 ydst[2*i+0] = src[4*i+0]; |
1853 ydst[2*i+1] = src[4*i+2]; | 1811 ydst[2*i+1] = src[4*i+2]; |
1854 } | 1812 } |
1855 #endif | 1813 #endif |
1856 udst += chromStride; | 1814 udst += chromStride; |
1880 long x,y; | 1838 long x,y; |
1881 | 1839 |
1882 dst[0]= src[0]; | 1840 dst[0]= src[0]; |
1883 | 1841 |
1884 // first line | 1842 // first line |
1885 for (x=0; x<srcWidth-1; x++){ | 1843 for (x=0; x<srcWidth-1; x++) { |
1886 dst[2*x+1]= (3*src[x] + src[x+1])>>2; | 1844 dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
1887 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; | 1845 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
1888 } | 1846 } |
1889 dst[2*srcWidth-1]= src[srcWidth-1]; | 1847 dst[2*srcWidth-1]= src[srcWidth-1]; |
1890 | 1848 |
1891 dst+= dstStride; | 1849 dst+= dstStride; |
1892 | 1850 |
1893 for (y=1; y<srcHeight; y++){ | 1851 for (y=1; y<srcHeight; y++) { |
1894 #if HAVE_MMX2 || HAVE_AMD3DNOW | 1852 #if HAVE_MMX2 || HAVE_AMD3DNOW |
1895 const x86_reg mmxSize= srcWidth&~15; | 1853 const x86_reg mmxSize= srcWidth&~15; |
1896 __asm__ volatile( | 1854 __asm__ volatile( |
1897 "mov %4, %%"REG_a" \n\t" | 1855 "mov %4, %%"REG_a" \n\t" |
1898 "1: \n\t" | 1856 "1: \n\t" |
1939 const x86_reg mmxSize=1; | 1897 const x86_reg mmxSize=1; |
1940 #endif | 1898 #endif |
1941 dst[0 ]= (3*src[0] + src[srcStride])>>2; | 1899 dst[0 ]= (3*src[0] + src[srcStride])>>2; |
1942 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; | 1900 dst[dstStride]= ( src[0] + 3*src[srcStride])>>2; |
1943 | 1901 |
1944 for (x=mmxSize-1; x<srcWidth-1; x++){ | 1902 for (x=mmxSize-1; x<srcWidth-1; x++) { |
1945 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; | 1903 dst[2*x +1]= (3*src[x+0] + src[x+srcStride+1])>>2; |
1946 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; | 1904 dst[2*x+dstStride+2]= ( src[x+0] + 3*src[x+srcStride+1])>>2; |
1947 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; | 1905 dst[2*x+dstStride+1]= ( src[x+1] + 3*src[x+srcStride ])>>2; |
1948 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; | 1906 dst[2*x +2]= (3*src[x+1] + src[x+srcStride ])>>2; |
1949 } | 1907 } |
1956 | 1914 |
1957 // last line | 1915 // last line |
1958 #if 1 | 1916 #if 1 |
1959 dst[0]= src[0]; | 1917 dst[0]= src[0]; |
1960 | 1918 |
1961 for (x=0; x<srcWidth-1; x++){ | 1919 for (x=0; x<srcWidth-1; x++) { |
1962 dst[2*x+1]= (3*src[x] + src[x+1])>>2; | 1920 dst[2*x+1]= (3*src[x] + src[x+1])>>2; |
1963 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; | 1921 dst[2*x+2]= ( src[x] + 3*src[x+1])>>2; |
1964 } | 1922 } |
1965 dst[2*srcWidth-1]= src[srcWidth-1]; | 1923 dst[2*srcWidth-1]= src[srcWidth-1]; |
1966 #else | 1924 #else |
1967 for (x=0; x<srcWidth; x++){ | 1925 for (x=0; x<srcWidth; x++) { |
1968 dst[2*x+0]= | 1926 dst[2*x+0]= |
1969 dst[2*x+1]= src[x]; | 1927 dst[2*x+1]= src[x]; |
1970 } | 1928 } |
1971 #endif | 1929 #endif |
1972 | 1930 |
1987 long width, long height, | 1945 long width, long height, |
1988 long lumStride, long chromStride, long srcStride) | 1946 long lumStride, long chromStride, long srcStride) |
1989 { | 1947 { |
1990 long y; | 1948 long y; |
1991 const x86_reg chromWidth= width>>1; | 1949 const x86_reg chromWidth= width>>1; |
1992 for (y=0; y<height; y+=2) | 1950 for (y=0; y<height; y+=2) { |
1993 { | |
1994 #if HAVE_MMX | 1951 #if HAVE_MMX |
1995 __asm__ volatile( | 1952 __asm__ volatile( |
1996 "xor %%"REG_a", %%"REG_a" \n\t" | 1953 "xor %%"REG_a", %%"REG_a" \n\t" |
1997 "pcmpeqw %%mm7, %%mm7 \n\t" | 1954 "pcmpeqw %%mm7, %%mm7 \n\t" |
1998 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... | 1955 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00... |
2073 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) | 2030 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth) |
2074 : "memory", "%"REG_a | 2031 : "memory", "%"REG_a |
2075 ); | 2032 ); |
2076 #else | 2033 #else |
2077 long i; | 2034 long i; |
2078 for (i=0; i<chromWidth; i++) | 2035 for (i=0; i<chromWidth; i++) { |
2079 { | |
2080 udst[i] = src[4*i+0]; | 2036 udst[i] = src[4*i+0]; |
2081 ydst[2*i+0] = src[4*i+1]; | 2037 ydst[2*i+0] = src[4*i+1]; |
2082 vdst[i] = src[4*i+2]; | 2038 vdst[i] = src[4*i+2]; |
2083 ydst[2*i+1] = src[4*i+3]; | 2039 ydst[2*i+1] = src[4*i+3]; |
2084 } | 2040 } |
2085 ydst += lumStride; | 2041 ydst += lumStride; |
2086 src += srcStride; | 2042 src += srcStride; |
2087 | 2043 |
2088 for (i=0; i<chromWidth; i++) | 2044 for (i=0; i<chromWidth; i++) { |
2089 { | |
2090 ydst[2*i+0] = src[4*i+1]; | 2045 ydst[2*i+0] = src[4*i+1]; |
2091 ydst[2*i+1] = src[4*i+3]; | 2046 ydst[2*i+1] = src[4*i+3]; |
2092 } | 2047 } |
2093 #endif | 2048 #endif |
2094 udst += chromStride; | 2049 udst += chromStride; |
2115 long lumStride, long chromStride, long srcStride) | 2070 long lumStride, long chromStride, long srcStride) |
2116 { | 2071 { |
2117 long y; | 2072 long y; |
2118 const x86_reg chromWidth= width>>1; | 2073 const x86_reg chromWidth= width>>1; |
2119 #if HAVE_MMX | 2074 #if HAVE_MMX |
2120 for (y=0; y<height-2; y+=2) | 2075 for (y=0; y<height-2; y+=2) { |
2121 { | |
2122 long i; | 2076 long i; |
2123 for (i=0; i<2; i++) | 2077 for (i=0; i<2; i++) { |
2124 { | |
2125 __asm__ volatile( | 2078 __asm__ volatile( |
2126 "mov %2, %%"REG_a" \n\t" | 2079 "mov %2, %%"REG_a" \n\t" |
2127 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" | 2080 "movq "MANGLE(ff_bgr2YCoeff)", %%mm6 \n\t" |
2128 "movq "MANGLE(ff_w1111)", %%mm5 \n\t" | 2081 "movq "MANGLE(ff_w1111)", %%mm5 \n\t" |
2129 "pxor %%mm7, %%mm7 \n\t" | 2082 "pxor %%mm7, %%mm7 \n\t" |
2353 SFENCE" \n\t" | 2306 SFENCE" \n\t" |
2354 :::"memory"); | 2307 :::"memory"); |
2355 #else | 2308 #else |
2356 y=0; | 2309 y=0; |
2357 #endif | 2310 #endif |
2358 for (; y<height; y+=2) | 2311 for (; y<height; y+=2) { |
2359 { | |
2360 long i; | 2312 long i; |
2361 for (i=0; i<chromWidth; i++) | 2313 for (i=0; i<chromWidth; i++) { |
2362 { | |
2363 unsigned int b = src[6*i+0]; | 2314 unsigned int b = src[6*i+0]; |
2364 unsigned int g = src[6*i+1]; | 2315 unsigned int g = src[6*i+1]; |
2365 unsigned int r = src[6*i+2]; | 2316 unsigned int r = src[6*i+2]; |
2366 | 2317 |
2367 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | 2318 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
2380 ydst[2*i+1] = Y; | 2331 ydst[2*i+1] = Y; |
2381 } | 2332 } |
2382 ydst += lumStride; | 2333 ydst += lumStride; |
2383 src += srcStride; | 2334 src += srcStride; |
2384 | 2335 |
2385 for (i=0; i<chromWidth; i++) | 2336 for (i=0; i<chromWidth; i++) { |
2386 { | |
2387 unsigned int b = src[6*i+0]; | 2337 unsigned int b = src[6*i+0]; |
2388 unsigned int g = src[6*i+1]; | 2338 unsigned int g = src[6*i+1]; |
2389 unsigned int r = src[6*i+2]; | 2339 unsigned int r = src[6*i+2]; |
2390 | 2340 |
2391 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | 2341 unsigned int Y = ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; |
2406 } | 2356 } |
2407 } | 2357 } |
2408 | 2358 |
2409 static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, | 2359 static void RENAME(interleaveBytes)(uint8_t *src1, uint8_t *src2, uint8_t *dest, |
2410 long width, long height, long src1Stride, | 2360 long width, long height, long src1Stride, |
2411 long src2Stride, long dstStride){ | 2361 long src2Stride, long dstStride) |
2362 { | |
2412 long h; | 2363 long h; |
2413 | 2364 |
2414 for (h=0; h < height; h++) | 2365 for (h=0; h < height; h++) { |
2415 { | |
2416 long w; | 2366 long w; |
2417 | 2367 |
2418 #if HAVE_MMX | 2368 #if HAVE_MMX |
2419 #if HAVE_SSE2 | 2369 #if HAVE_SSE2 |
2420 __asm__( | 2370 __asm__( |
2460 " jb 1b \n\t" | 2410 " jb 1b \n\t" |
2461 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) | 2411 ::"r"(dest), "r"(src1), "r"(src2), "r" ((x86_reg)width-15) |
2462 : "memory", "%"REG_a | 2412 : "memory", "%"REG_a |
2463 ); | 2413 ); |
2464 #endif | 2414 #endif |
2465 for (w= (width&(~15)); w < width; w++) | 2415 for (w= (width&(~15)); w < width; w++) { |
2466 { | |
2467 dest[2*w+0] = src1[w]; | 2416 dest[2*w+0] = src1[w]; |
2468 dest[2*w+1] = src2[w]; | 2417 dest[2*w+1] = src2[w]; |
2469 } | 2418 } |
2470 #else | 2419 #else |
2471 for (w=0; w < width; w++) | 2420 for (w=0; w < width; w++) { |
2472 { | |
2473 dest[2*w+0] = src1[w]; | 2421 dest[2*w+0] = src1[w]; |
2474 dest[2*w+1] = src2[w]; | 2422 dest[2*w+1] = src2[w]; |
2475 } | 2423 } |
2476 #endif | 2424 #endif |
2477 dest += dstStride; | 2425 dest += dstStride; |
2500 __asm__ volatile( | 2448 __asm__ volatile( |
2501 PREFETCH" %0 \n\t" | 2449 PREFETCH" %0 \n\t" |
2502 PREFETCH" %1 \n\t" | 2450 PREFETCH" %1 \n\t" |
2503 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); | 2451 ::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)):"memory"); |
2504 #endif | 2452 #endif |
2505 for (y=0;y<h;y++){ | 2453 for (y=0;y<h;y++) { |
2506 const uint8_t* s1=src1+srcStride1*(y>>1); | 2454 const uint8_t* s1=src1+srcStride1*(y>>1); |
2507 uint8_t* d=dst1+dstStride1*y; | 2455 uint8_t* d=dst1+dstStride1*y; |
2508 x=0; | 2456 x=0; |
2509 #if HAVE_MMX | 2457 #if HAVE_MMX |
2510 for (;x<w-31;x+=32) | 2458 for (;x<w-31;x+=32) { |
2511 { | |
2512 __asm__ volatile( | 2459 __asm__ volatile( |
2513 PREFETCH" 32%1 \n\t" | 2460 PREFETCH" 32%1 \n\t" |
2514 "movq %1, %%mm0 \n\t" | 2461 "movq %1, %%mm0 \n\t" |
2515 "movq 8%1, %%mm2 \n\t" | 2462 "movq 8%1, %%mm2 \n\t" |
2516 "movq 16%1, %%mm4 \n\t" | 2463 "movq 16%1, %%mm4 \n\t" |
2540 :"memory"); | 2487 :"memory"); |
2541 } | 2488 } |
2542 #endif | 2489 #endif |
2543 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; | 2490 for (;x<w;x++) d[2*x]=d[2*x+1]=s1[x]; |
2544 } | 2491 } |
2545 for (y=0;y<h;y++){ | 2492 for (y=0;y<h;y++) { |
2546 const uint8_t* s2=src2+srcStride2*(y>>1); | 2493 const uint8_t* s2=src2+srcStride2*(y>>1); |
2547 uint8_t* d=dst2+dstStride2*y; | 2494 uint8_t* d=dst2+dstStride2*y; |
2548 x=0; | 2495 x=0; |
2549 #if HAVE_MMX | 2496 #if HAVE_MMX |
2550 for (;x<w-31;x+=32) | 2497 for (;x<w-31;x+=32) { |
2551 { | |
2552 __asm__ volatile( | 2498 __asm__ volatile( |
2553 PREFETCH" 32%1 \n\t" | 2499 PREFETCH" 32%1 \n\t" |
2554 "movq %1, %%mm0 \n\t" | 2500 "movq %1, %%mm0 \n\t" |
2555 "movq 8%1, %%mm2 \n\t" | 2501 "movq 8%1, %%mm2 \n\t" |
2556 "movq 16%1, %%mm4 \n\t" | 2502 "movq 16%1, %%mm4 \n\t" |
2598 long srcStride3, long dstStride) | 2544 long srcStride3, long dstStride) |
2599 { | 2545 { |
2600 x86_reg x; | 2546 x86_reg x; |
2601 long y,w,h; | 2547 long y,w,h; |
2602 w=width/2; h=height; | 2548 w=width/2; h=height; |
2603 for (y=0;y<h;y++){ | 2549 for (y=0;y<h;y++) { |
2604 const uint8_t* yp=src1+srcStride1*y; | 2550 const uint8_t* yp=src1+srcStride1*y; |
2605 const uint8_t* up=src2+srcStride2*(y>>2); | 2551 const uint8_t* up=src2+srcStride2*(y>>2); |
2606 const uint8_t* vp=src3+srcStride3*(y>>2); | 2552 const uint8_t* vp=src3+srcStride3*(y>>2); |
2607 uint8_t* d=dst+dstStride*y; | 2553 uint8_t* d=dst+dstStride*y; |
2608 x=0; | 2554 x=0; |
2609 #if HAVE_MMX | 2555 #if HAVE_MMX |
2610 for (;x<w-7;x+=8) | 2556 for (;x<w-7;x+=8) { |
2611 { | |
2612 __asm__ volatile( | 2557 __asm__ volatile( |
2613 PREFETCH" 32(%1, %0) \n\t" | 2558 PREFETCH" 32(%1, %0) \n\t" |
2614 PREFETCH" 32(%2, %0) \n\t" | 2559 PREFETCH" 32(%2, %0) \n\t" |
2615 PREFETCH" 32(%3, %0) \n\t" | 2560 PREFETCH" 32(%3, %0) \n\t" |
2616 "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ | 2561 "movq (%1, %0, 4), %%mm0 \n\t" /* Y0Y1Y2Y3Y4Y5Y6Y7 */ |
2659 : "+r" (x) | 2604 : "+r" (x) |
2660 : "r"(yp), "r" (up), "r"(vp), "r"(d) | 2605 : "r"(yp), "r" (up), "r"(vp), "r"(d) |
2661 :"memory"); | 2606 :"memory"); |
2662 } | 2607 } |
2663 #endif | 2608 #endif |
2664 for (; x<w; x++) | 2609 for (; x<w; x++) { |
2665 { | |
2666 const long x2 = x<<2; | 2610 const long x2 = x<<2; |
2667 d[8*x+0] = yp[x2]; | 2611 d[8*x+0] = yp[x2]; |
2668 d[8*x+1] = up[x]; | 2612 d[8*x+1] = up[x]; |
2669 d[8*x+2] = yp[x2+1]; | 2613 d[8*x+2] = yp[x2+1]; |
2670 d[8*x+3] = vp[x]; | 2614 d[8*x+3] = vp[x]; |
2688 dst += count; | 2632 dst += count; |
2689 src += 2*count; | 2633 src += 2*count; |
2690 count= - count; | 2634 count= - count; |
2691 | 2635 |
2692 #if HAVE_MMX | 2636 #if HAVE_MMX |
2693 if(count <= -16){ | 2637 if(count <= -16) { |
2694 count += 15; | 2638 count += 15; |
2695 __asm__ volatile( | 2639 __asm__ volatile( |
2696 "pcmpeqw %%mm7, %%mm7 \n\t" | 2640 "pcmpeqw %%mm7, %%mm7 \n\t" |
2697 "psrlw $8, %%mm7 \n\t" | 2641 "psrlw $8, %%mm7 \n\t" |
2698 "1: \n\t" | 2642 "1: \n\t" |
2714 : "r"(src), "r"(dst) | 2658 : "r"(src), "r"(dst) |
2715 ); | 2659 ); |
2716 count -= 15; | 2660 count -= 15; |
2717 } | 2661 } |
2718 #endif | 2662 #endif |
2719 while(count<0){ | 2663 while(count<0) { |
2720 dst[count]= src[2*count]; | 2664 dst[count]= src[2*count]; |
2721 count++; | 2665 count++; |
2722 } | 2666 } |
2723 } | 2667 } |
2724 | 2668 |
2727 dst0+= count; | 2671 dst0+= count; |
2728 dst1+= count; | 2672 dst1+= count; |
2729 src += 4*count; | 2673 src += 4*count; |
2730 count= - count; | 2674 count= - count; |
2731 #if HAVE_MMX | 2675 #if HAVE_MMX |
2732 if(count <= -8){ | 2676 if(count <= -8) { |
2733 count += 7; | 2677 count += 7; |
2734 __asm__ volatile( | 2678 __asm__ volatile( |
2735 "pcmpeqw %%mm7, %%mm7 \n\t" | 2679 "pcmpeqw %%mm7, %%mm7 \n\t" |
2736 "psrlw $8, %%mm7 \n\t" | 2680 "psrlw $8, %%mm7 \n\t" |
2737 "1: \n\t" | 2681 "1: \n\t" |
2761 : "r"(src), "r"(dst0), "r"(dst1) | 2705 : "r"(src), "r"(dst0), "r"(dst1) |
2762 ); | 2706 ); |
2763 count -= 7; | 2707 count -= 7; |
2764 } | 2708 } |
2765 #endif | 2709 #endif |
2766 while(count<0){ | 2710 while(count<0) { |
2767 dst0[count]= src[4*count+0]; | 2711 dst0[count]= src[4*count+0]; |
2768 dst1[count]= src[4*count+2]; | 2712 dst1[count]= src[4*count+2]; |
2769 count++; | 2713 count++; |
2770 } | 2714 } |
2771 } | 2715 } |
2776 dst1 += count; | 2720 dst1 += count; |
2777 src0 += 4*count; | 2721 src0 += 4*count; |
2778 src1 += 4*count; | 2722 src1 += 4*count; |
2779 count= - count; | 2723 count= - count; |
2780 #ifdef PAVGB | 2724 #ifdef PAVGB |
2781 if(count <= -8){ | 2725 if(count <= -8) { |
2782 count += 7; | 2726 count += 7; |
2783 __asm__ volatile( | 2727 __asm__ volatile( |
2784 "pcmpeqw %%mm7, %%mm7 \n\t" | 2728 "pcmpeqw %%mm7, %%mm7 \n\t" |
2785 "psrlw $8, %%mm7 \n\t" | 2729 "psrlw $8, %%mm7 \n\t" |
2786 "1: \n\t" | 2730 "1: \n\t" |
2814 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) | 2758 : "r"(src0), "r"(src1), "r"(dst0), "r"(dst1) |
2815 ); | 2759 ); |
2816 count -= 7; | 2760 count -= 7; |
2817 } | 2761 } |
2818 #endif | 2762 #endif |
2819 while(count<0){ | 2763 while(count<0) { |
2820 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; | 2764 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; |
2821 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; | 2765 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; |
2822 count++; | 2766 count++; |
2823 } | 2767 } |
2824 } | 2768 } |
2828 dst0+= count; | 2772 dst0+= count; |
2829 dst1+= count; | 2773 dst1+= count; |
2830 src += 4*count; | 2774 src += 4*count; |
2831 count= - count; | 2775 count= - count; |
2832 #if HAVE_MMX | 2776 #if HAVE_MMX |
2833 if(count <= -8){ | 2777 if(count <= -8) { |
2834 count += 7; | 2778 count += 7; |
2835 __asm__ volatile( | 2779 __asm__ volatile( |
2836 "pcmpeqw %%mm7, %%mm7 \n\t" | 2780 "pcmpeqw %%mm7, %%mm7 \n\t" |
2837 "psrlw $8, %%mm7 \n\t" | 2781 "psrlw $8, %%mm7 \n\t" |
2838 "1: \n\t" | 2782 "1: \n\t" |
2863 ); | 2807 ); |
2864 count -= 7; | 2808 count -= 7; |
2865 } | 2809 } |
2866 #endif | 2810 #endif |
2867 src++; | 2811 src++; |
2868 while(count<0){ | 2812 while(count<0) { |
2869 dst0[count]= src[4*count+0]; | 2813 dst0[count]= src[4*count+0]; |
2870 dst1[count]= src[4*count+2]; | 2814 dst1[count]= src[4*count+2]; |
2871 count++; | 2815 count++; |
2872 } | 2816 } |
2873 } | 2817 } |
2878 dst1 += count; | 2822 dst1 += count; |
2879 src0 += 4*count; | 2823 src0 += 4*count; |
2880 src1 += 4*count; | 2824 src1 += 4*count; |
2881 count= - count; | 2825 count= - count; |
2882 #ifdef PAVGB | 2826 #ifdef PAVGB |
2883 if(count <= -8){ | 2827 if(count <= -8) { |
2884 count += 7; | 2828 count += 7; |
2885 __asm__ volatile( | 2829 __asm__ volatile( |
2886 "pcmpeqw %%mm7, %%mm7 \n\t" | 2830 "pcmpeqw %%mm7, %%mm7 \n\t" |
2887 "psrlw $8, %%mm7 \n\t" | 2831 "psrlw $8, %%mm7 \n\t" |
2888 "1: \n\t" | 2832 "1: \n\t" |
2918 count -= 7; | 2862 count -= 7; |
2919 } | 2863 } |
2920 #endif | 2864 #endif |
2921 src0++; | 2865 src0++; |
2922 src1++; | 2866 src1++; |
2923 while(count<0){ | 2867 while(count<0) { |
2924 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; | 2868 dst0[count]= (src0[4*count+0]+src1[4*count+0])>>1; |
2925 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; | 2869 dst1[count]= (src0[4*count+2]+src1[4*count+2])>>1; |
2926 count++; | 2870 count++; |
2927 } | 2871 } |
2928 } | 2872 } |
2932 long lumStride, long chromStride, long srcStride) | 2876 long lumStride, long chromStride, long srcStride) |
2933 { | 2877 { |
2934 long y; | 2878 long y; |
2935 const long chromWidth= -((-width)>>1); | 2879 const long chromWidth= -((-width)>>1); |
2936 | 2880 |
2937 for (y=0; y<height; y++){ | 2881 for (y=0; y<height; y++) { |
2938 RENAME(extract_even)(src, ydst, width); | 2882 RENAME(extract_even)(src, ydst, width); |
2939 if(y&1){ | 2883 if(y&1) { |
2940 RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth); | 2884 RENAME(extract_odd2avg)(src-srcStride, src, udst, vdst, chromWidth); |
2941 udst+= chromStride; | 2885 udst+= chromStride; |
2942 vdst+= chromStride; | 2886 vdst+= chromStride; |
2943 } | 2887 } |
2944 | 2888 |
2959 long lumStride, long chromStride, long srcStride) | 2903 long lumStride, long chromStride, long srcStride) |
2960 { | 2904 { |
2961 long y; | 2905 long y; |
2962 const long chromWidth= -((-width)>>1); | 2906 const long chromWidth= -((-width)>>1); |
2963 | 2907 |
2964 for (y=0; y<height; y++){ | 2908 for (y=0; y<height; y++) { |
2965 RENAME(extract_even)(src, ydst, width); | 2909 RENAME(extract_even)(src, ydst, width); |
2966 RENAME(extract_odd2)(src, udst, vdst, chromWidth); | 2910 RENAME(extract_odd2)(src, udst, vdst, chromWidth); |
2967 | 2911 |
2968 src += srcStride; | 2912 src += srcStride; |
2969 ydst+= lumStride; | 2913 ydst+= lumStride; |
2984 long lumStride, long chromStride, long srcStride) | 2928 long lumStride, long chromStride, long srcStride) |
2985 { | 2929 { |
2986 long y; | 2930 long y; |
2987 const long chromWidth= -((-width)>>1); | 2931 const long chromWidth= -((-width)>>1); |
2988 | 2932 |
2989 for (y=0; y<height; y++){ | 2933 for (y=0; y<height; y++) { |
2990 RENAME(extract_even)(src+1, ydst, width); | 2934 RENAME(extract_even)(src+1, ydst, width); |
2991 if(y&1){ | 2935 if(y&1) { |
2992 RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); | 2936 RENAME(extract_even2avg)(src-srcStride, src, udst, vdst, chromWidth); |
2993 udst+= chromStride; | 2937 udst+= chromStride; |
2994 vdst+= chromStride; | 2938 vdst+= chromStride; |
2995 } | 2939 } |
2996 | 2940 |
3011 long lumStride, long chromStride, long srcStride) | 2955 long lumStride, long chromStride, long srcStride) |
3012 { | 2956 { |
3013 long y; | 2957 long y; |
3014 const long chromWidth= -((-width)>>1); | 2958 const long chromWidth= -((-width)>>1); |
3015 | 2959 |
3016 for (y=0; y<height; y++){ | 2960 for (y=0; y<height; y++) { |
3017 RENAME(extract_even)(src+1, ydst, width); | 2961 RENAME(extract_even)(src+1, ydst, width); |
3018 RENAME(extract_even2)(src, udst, vdst, chromWidth); | 2962 RENAME(extract_even2)(src, udst, vdst, chromWidth); |
3019 | 2963 |
3020 src += srcStride; | 2964 src += srcStride; |
3021 ydst+= lumStride; | 2965 ydst+= lumStride; |
3029 ::: "memory" | 2973 ::: "memory" |
3030 ); | 2974 ); |
3031 #endif | 2975 #endif |
3032 } | 2976 } |
3033 | 2977 |
3034 static inline void RENAME(rgb2rgb_init)(void){ | 2978 static inline void RENAME(rgb2rgb_init)(void) |
2979 { | |
3035 rgb15to16 = RENAME(rgb15to16); | 2980 rgb15to16 = RENAME(rgb15to16); |
3036 rgb15tobgr24 = RENAME(rgb15tobgr24); | 2981 rgb15tobgr24 = RENAME(rgb15tobgr24); |
3037 rgb15to32 = RENAME(rgb15to32); | 2982 rgb15to32 = RENAME(rgb15to32); |
3038 rgb16tobgr24 = RENAME(rgb16tobgr24); | 2983 rgb16tobgr24 = RENAME(rgb16tobgr24); |
3039 rgb16to32 = RENAME(rgb16to32); | 2984 rgb16to32 = RENAME(rgb16to32); |