comparison postproc/rgb2rgb_template.c @ 2847:1d92268eb8fc

uyvytoyv12 in MMX (untested)
author michael
date Sun, 11 Nov 2001 22:26:15 +0000
parents cbb62e07bc0e
children ab67556586fa
comparison
equal deleted inserted replaced
2846:ab51228bf3cf 2847:1d92268eb8fc
809 vdst += chromStride; 809 vdst += chromStride;
810 ydst += lumStride; 810 ydst += lumStride;
811 src += srcStride; 811 src += srcStride;
812 } 812 }
813 #ifdef HAVE_MMX 813 #ifdef HAVE_MMX
814 asm( EMMS" \n\t" 814 asm volatile( EMMS" \n\t"
815 SFENCE" \n\t" 815 SFENCE" \n\t"
816 :::"memory"); 816 :::"memory");
817 #endif 817 #endif
818 } 818 }
819 819
820 /** 820 /**
821 * 821 *
828 { 828 {
829 int y; 829 int y;
830 const int chromWidth= width>>1; 830 const int chromWidth= width>>1;
831 for(y=0; y<height; y+=2) 831 for(y=0; y<height; y+=2)
832 { 832 {
833 #ifdef HAVE_MMX
834 asm volatile(
835 "xorl %%eax, %%eax \n\t"
836 "pcmpeqw %%mm7, %%mm7 \n\t"
837 "psrlw $8, %%mm7 \n\t" // FF,00,FF,00...
838 ".balign 16 \n\t"
839 "1: \n\t"
840 PREFETCH" 64(%0, %%eax, 4) \n\t"
841 "movq (%0, %%eax, 4), %%mm0 \n\t" // UYVY UYVY(0)
842 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(4)
843 "movq %%mm0, %%mm2 \n\t" // UYVY UYVY(0)
844 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(4)
845 "pand %%mm7, %%mm0 \n\t" // U0V0 U0V0(0)
846 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(4)
847 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(0)
848 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(4)
849 "packuswb %%mm1, %%mm0 \n\t" // UVUV UVUV(0)
850 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(0)
851
852 MOVNTQ" %%mm2, (%1, %%eax, 2) \n\t"
853
854 "movq 16(%0, %%eax, 4), %%mm1 \n\t" // UYVY UYVY(8)
855 "movq 24(%0, %%eax, 4), %%mm2 \n\t" // UYVY UYVY(12)
856 "movq %%mm1, %%mm3 \n\t" // UYVY UYVY(8)
857 "movq %%mm2, %%mm4 \n\t" // UYVY UYVY(12)
858 "pand %%mm7, %%mm1 \n\t" // U0V0 U0V0(8)
859 "pand %%mm7, %%mm2 \n\t" // U0V0 U0V0(12)
860 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(8)
861 "psrlw $8, %%mm4 \n\t" // Y0Y0 Y0Y0(12)
862 "packuswb %%mm2, %%mm1 \n\t" // UVUV UVUV(8)
863 "packuswb %%mm4, %%mm3 \n\t" // YYYY YYYY(8)
864
865 MOVNTQ" %%mm3, 8(%1, %%eax, 2) \n\t"
866
867 "movq %%mm0, %%mm2 \n\t" // UVUV UVUV(0)
868 "movq %%mm1, %%mm3 \n\t" // UVUV UVUV(8)
869 "psrlw $8, %%mm0 \n\t" // V0V0 V0V0(0)
870 "psrlw $8, %%mm1 \n\t" // V0V0 V0V0(8)
871 "pand %%mm7, %%mm2 \n\t" // U0U0 U0U0(0)
872 "pand %%mm7, %%mm3 \n\t" // U0U0 U0U0(8)
873 "packuswb %%mm1, %%mm0 \n\t" // VVVV VVVV(0)
874 "packuswb %%mm3, %%mm2 \n\t" // UUUU UUUU(0)
875
876 MOVNTQ" %%mm0, (%3, %%eax) \n\t"
877 MOVNTQ" %%mm2, (%2, %%eax) \n\t"
878
879 "addl $8, %%eax \n\t"
880 "cmpl %4, %%eax \n\t"
881 " jb 1b \n\t"
882 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth)
883 : "memory", "%eax"
884 );
885
886 ydst += lumStride;
887 src += srcStride;
888
889 asm volatile(
890 "xorl %%eax, %%eax \n\t"
891 ".balign 16 \n\t"
892 "1: \n\t"
893 PREFETCH" 64(%0, %%eax, 4) \n\t"
894 "movq (%0, %%eax, 4), %%mm0 \n\t" // YUYV YUYV(0)
895 "movq 8(%0, %%eax, 4), %%mm1 \n\t" // YUYV YUYV(4)
896 "movq 16(%0, %%eax, 4), %%mm2 \n\t" // YUYV YUYV(8)
897 "movq 24(%0, %%eax, 4), %%mm3 \n\t" // YUYV YUYV(12)
898 "psrlw $8, %%mm0 \n\t" // Y0Y0 Y0Y0(0)
899 "psrlw $8, %%mm1 \n\t" // Y0Y0 Y0Y0(4)
900 "psrlw $8, %%mm2 \n\t" // Y0Y0 Y0Y0(8)
901 "psrlw $8, %%mm3 \n\t" // Y0Y0 Y0Y0(12)
902 "packuswb %%mm1, %%mm0 \n\t" // YYYY YYYY(0)
903 "packuswb %%mm3, %%mm2 \n\t" // YYYY YYYY(8)
904
905 MOVNTQ" %%mm0, (%1, %%eax, 2) \n\t"
906 MOVNTQ" %%mm2, 8(%1, %%eax, 2) \n\t"
907
908 "addl $8, %%eax \n\t"
909 "cmpl %4, %%eax \n\t"
910 " jb 1b \n\t"
911
912 ::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth)
913 : "memory", "%eax"
914 );
915 #else
833 int i; 916 int i;
834 for(i=0; i<chromWidth; i++) 917 for(i=0; i<chromWidth; i++)
835 { 918 {
836 udst[i] = src[4*i+0]; 919 udst[i] = src[4*i+0];
837 ydst[2*i+0] = src[4*i+1]; 920 ydst[2*i+0] = src[4*i+1];
844 for(i=0; i<chromWidth; i++) 927 for(i=0; i<chromWidth; i++)
845 { 928 {
846 ydst[2*i+0] = src[4*i+1]; 929 ydst[2*i+0] = src[4*i+1];
847 ydst[2*i+1] = src[4*i+3]; 930 ydst[2*i+1] = src[4*i+3];
848 } 931 }
932 #endif
849 udst += chromStride; 933 udst += chromStride;
850 vdst += chromStride; 934 vdst += chromStride;
851 ydst += lumStride; 935 ydst += lumStride;
852 src += srcStride; 936 src += srcStride;
853 } 937 }
854 } 938 #ifdef HAVE_MMX
855 939 asm volatile( EMMS" \n\t"
856 940 SFENCE" \n\t"
941 :::"memory");
942 #endif
943 }
944
945