Mercurial > mplayer.hg
comparison postproc/swscale_template.c @ 2572:f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
author | michael |
---|---|
date | Tue, 30 Oct 2001 21:33:01 +0000 |
parents | 30b736e7feef |
children | 37da7219ebaf |
comparison
equal
deleted
inserted
replaced
2571:13608ff3d1e6 | 2572:f2353173d52c |
---|---|
504 | 504 |
505 asm volatile ("\n\t"::: "memory"); | 505 asm volatile ("\n\t"::: "memory"); |
506 | 506 |
507 for(i=0;i<dstw;i++) | 507 for(i=0;i<dstw;i++) |
508 { | 508 { |
509 ((uint8_t*)dest)[0] = (buf0[i]*yalpha1+buf1[i]*yalpha)>>19; | 509 ((uint8_t*)dest)[i] = (buf0[i]*yalpha1+buf1[i]*yalpha)>>19; |
510 dest++; | |
511 } | 510 } |
512 | 511 |
513 if(uvalpha != -1) | 512 if(uvalpha != -1) |
514 { | 513 { |
515 for(i=0; i<dstw/2; i++) | 514 for(i=0; i<dstw/2; i++) |
516 { | 515 { |
517 ((uint8_t*)uDest)[0] = (uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19; | 516 ((uint8_t*)uDest)[i] = (uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19; |
518 ((uint8_t*)vDest)[0] = (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19; | 517 ((uint8_t*)vDest)[i] = (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19; |
519 uDest++; | |
520 vDest++; | |
521 } | 518 } |
522 } | 519 } |
523 } | 520 } |
524 | 521 |
525 /** | 522 /** |
702 // vertical linear interpolation && yuv2rgb in a single step: | 699 // vertical linear interpolation && yuv2rgb in a single step: |
703 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; | 700 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
704 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); | 701 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
705 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); | 702 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
706 | 703 |
707 ((uint16_t*)dest)[0] = | 704 ((uint16_t*)dest)[i] = |
708 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | | 705 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | |
709 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) | | 706 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) | |
710 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800); | 707 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800); |
711 dest+=2; | |
712 } | 708 } |
713 } | 709 } |
714 else if(dstbpp==15) | 710 else if(dstbpp==15) |
715 { | 711 { |
716 for(i=0;i<dstw;i++){ | 712 for(i=0;i<dstw;i++){ |
717 // vertical linear interpolation && yuv2rgb in a single step: | 713 // vertical linear interpolation && yuv2rgb in a single step: |
718 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; | 714 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
719 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); | 715 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
720 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); | 716 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
721 | 717 |
722 ((uint16_t*)dest)[0] = | 718 ((uint16_t*)dest)[i] = |
723 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | | 719 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | |
724 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) | | 720 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) | |
725 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00); | 721 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00); |
726 dest+=2; | |
727 } | 722 } |
728 } | 723 } |
729 #endif | 724 #endif |
730 }//FULL_UV_IPOL | 725 }//FULL_UV_IPOL |
731 else | 726 else |
812 // vertical linear interpolation && yuv2rgb in a single step: | 807 // vertical linear interpolation && yuv2rgb in a single step: |
813 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; | 808 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
814 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); | 809 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); |
815 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); | 810 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); |
816 | 811 |
817 ((uint16_t*)dest)[0] = | 812 ((uint16_t*)dest)[i] = |
818 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | | 813 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | |
819 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) | | 814 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) | |
820 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800); | 815 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800); |
821 dest+=2; | |
822 } | 816 } |
823 } | 817 } |
824 else if(dstbpp==15) | 818 else if(dstbpp==15) |
825 { | 819 { |
826 for(i=0;i<dstw;i++){ | 820 for(i=0;i<dstw;i++){ |
827 // vertical linear interpolation && yuv2rgb in a single step: | 821 // vertical linear interpolation && yuv2rgb in a single step: |
828 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; | 822 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
829 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); | 823 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); |
830 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); | 824 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); |
831 | 825 |
832 ((uint16_t*)dest)[0] = | 826 ((uint16_t*)dest)[i] = |
833 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | | 827 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | |
834 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) | | 828 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) | |
835 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00); | 829 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00); |
836 dest+=2; | |
837 } | 830 } |
838 } | 831 } |
839 #endif | 832 #endif |
840 } //!FULL_UV_IPOL | 833 } //!FULL_UV_IPOL |
841 } | 834 } |
991 // vertical linear interpolation && yuv2rgb in a single step: | 984 // vertical linear interpolation && yuv2rgb in a single step: |
992 int Y=yuvtab_2568[buf0[i]>>7]; | 985 int Y=yuvtab_2568[buf0[i]>>7]; |
993 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); | 986 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); |
994 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); | 987 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); |
995 | 988 |
996 ((uint16_t*)dest)[0] = | 989 ((uint16_t*)dest)[i] = |
997 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | | 990 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | |
998 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) | | 991 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) | |
999 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800); | 992 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800); |
1000 dest+=2; | |
1001 } | 993 } |
1002 } | 994 } |
1003 else if(dstbpp==15) | 995 else if(dstbpp==15) |
1004 { | 996 { |
1005 for(i=0;i<dstw;i++){ | 997 for(i=0;i<dstw;i++){ |
1006 // vertical linear interpolation && yuv2rgb in a single step: | 998 // vertical linear interpolation && yuv2rgb in a single step: |
1007 int Y=yuvtab_2568[buf0[i]>>7]; | 999 int Y=yuvtab_2568[buf0[i]>>7]; |
1008 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); | 1000 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); |
1009 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); | 1001 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); |
1010 | 1002 |
1011 ((uint16_t*)dest)[0] = | 1003 ((uint16_t*)dest)[i] = |
1012 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | | 1004 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | |
1013 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) | | 1005 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) | |
1014 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00); | 1006 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00); |
1015 dest+=2; | |
1016 } | 1007 } |
1017 } | 1008 } |
1018 #endif | 1009 #endif |
1019 } | 1010 } |
1020 | 1011 |