comparison postproc/swscale_template.c @ 2572:f2353173d52c

c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
author michael
date Tue, 30 Oct 2001 21:33:01 +0000
parents 30b736e7feef
children 37da7219ebaf
comparison
equal deleted inserted replaced
2571:13608ff3d1e6 2572:f2353173d52c
504 504
505 asm volatile ("\n\t"::: "memory"); 505 asm volatile ("\n\t"::: "memory");
506 506
507 for(i=0;i<dstw;i++) 507 for(i=0;i<dstw;i++)
508 { 508 {
509 ((uint8_t*)dest)[0] = (buf0[i]*yalpha1+buf1[i]*yalpha)>>19; 509 ((uint8_t*)dest)[i] = (buf0[i]*yalpha1+buf1[i]*yalpha)>>19;
510 dest++;
511 } 510 }
512 511
513 if(uvalpha != -1) 512 if(uvalpha != -1)
514 { 513 {
515 for(i=0; i<dstw/2; i++) 514 for(i=0; i<dstw/2; i++)
516 { 515 {
517 ((uint8_t*)uDest)[0] = (uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19; 516 ((uint8_t*)uDest)[i] = (uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19;
518 ((uint8_t*)vDest)[0] = (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19; 517 ((uint8_t*)vDest)[i] = (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;
519 uDest++;
520 vDest++;
521 } 518 }
522 } 519 }
523 } 520 }
524 521
525 /** 522 /**
702 // vertical linear interpolation && yuv2rgb in a single step: 699 // vertical linear interpolation && yuv2rgb in a single step:
703 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; 700 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
704 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); 701 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
705 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); 702 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
706 703
707 ((uint16_t*)dest)[0] = 704 ((uint16_t*)dest)[i] =
708 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | 705 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
709 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) | 706 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) |
710 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800); 707 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800);
711 dest+=2;
712 } 708 }
713 } 709 }
714 else if(dstbpp==15) 710 else if(dstbpp==15)
715 { 711 {
716 for(i=0;i<dstw;i++){ 712 for(i=0;i<dstw;i++){
717 // vertical linear interpolation && yuv2rgb in a single step: 713 // vertical linear interpolation && yuv2rgb in a single step:
718 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; 714 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
719 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); 715 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19);
720 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); 716 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19);
721 717
722 ((uint16_t*)dest)[0] = 718 ((uint16_t*)dest)[i] =
723 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | 719 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
724 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) | 720 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) |
725 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00); 721 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00);
726 dest+=2;
727 } 722 }
728 } 723 }
729 #endif 724 #endif
730 }//FULL_UV_IPOL 725 }//FULL_UV_IPOL
731 else 726 else
812 // vertical linear interpolation && yuv2rgb in a single step: 807 // vertical linear interpolation && yuv2rgb in a single step:
813 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; 808 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
814 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); 809 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
815 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); 810 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
816 811
817 ((uint16_t*)dest)[0] = 812 ((uint16_t*)dest)[i] =
818 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | 813 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
819 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) | 814 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) |
820 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800); 815 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800);
821 dest+=2;
822 } 816 }
823 } 817 }
824 else if(dstbpp==15) 818 else if(dstbpp==15)
825 { 819 {
826 for(i=0;i<dstw;i++){ 820 for(i=0;i<dstw;i++){
827 // vertical linear interpolation && yuv2rgb in a single step: 821 // vertical linear interpolation && yuv2rgb in a single step:
828 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; 822 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
829 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); 823 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
830 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); 824 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
831 825
832 ((uint16_t*)dest)[0] = 826 ((uint16_t*)dest)[i] =
833 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | 827 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
834 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) | 828 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) |
835 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00); 829 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00);
836 dest+=2;
837 } 830 }
838 } 831 }
839 #endif 832 #endif
840 } //!FULL_UV_IPOL 833 } //!FULL_UV_IPOL
841 } 834 }
991 // vertical linear interpolation && yuv2rgb in a single step: 984 // vertical linear interpolation && yuv2rgb in a single step:
992 int Y=yuvtab_2568[buf0[i]>>7]; 985 int Y=yuvtab_2568[buf0[i]>>7];
993 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); 986 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
994 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); 987 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
995 988
996 ((uint16_t*)dest)[0] = 989 ((uint16_t*)dest)[i] =
997 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | 990 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
998 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) | 991 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<3)&0x07E0) |
999 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800); 992 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<8)&0xF800);
1000 dest+=2;
1001 } 993 }
1002 } 994 }
1003 else if(dstbpp==15) 995 else if(dstbpp==15)
1004 { 996 {
1005 for(i=0;i<dstw;i++){ 997 for(i=0;i<dstw;i++){
1006 // vertical linear interpolation && yuv2rgb in a single step: 998 // vertical linear interpolation && yuv2rgb in a single step:
1007 int Y=yuvtab_2568[buf0[i]>>7]; 999 int Y=yuvtab_2568[buf0[i]>>7];
1008 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19); 1000 int U=((uvbuf0[i/2]*uvalpha1+uvbuf1[i/2]*uvalpha)>>19);
1009 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19); 1001 int V=((uvbuf0[i/2+2048]*uvalpha1+uvbuf1[i/2+2048]*uvalpha)>>19);
1010 1002
1011 ((uint16_t*)dest)[0] = 1003 ((uint16_t*)dest)[i] =
1012 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) | 1004 (clip_table[(Y + yuvtab_40cf[U]) >>13]>>3) |
1013 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) | 1005 ((clip_table[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13]<<2)&0x03E0) |
1014 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00); 1006 ((clip_table[(Y + yuvtab_3343[V]) >>13]<<7)&0x7C00);
1015 dest+=2;
1016 } 1007 }
1017 } 1008 }
1018 #endif 1009 #endif
1019 } 1010 }
1020 1011