comparison postproc/swscale_template.c @ 9413:0d86fe21b281

cleanup
author michael
date Thu, 13 Feb 2003 14:19:05 +0000
parents 25baacd1c650
children 04c6fd75ed96
comparison
equal deleted inserted replaced
9412:ed98b850668d 9413:0d86fe21b281
57 #define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" 57 #define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t"
58 #else 58 #else
59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" 59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t"
60 #endif 60 #endif
61 61
62 #define YSCALEYUV2YV12X(x) \ 62 #define YSCALEYUV2YV12X(x, offset) \
63 "xorl %%eax, %%eax \n\t"\ 63 "xorl %%eax, %%eax \n\t"\
64 "pxor %%mm3, %%mm3 \n\t"\ 64 "pxor %%mm3, %%mm3 \n\t"\
65 "pxor %%mm4, %%mm4 \n\t"\ 65 "pxor %%mm4, %%mm4 \n\t"\
66 "movl %0, %%edx \n\t"\ 66 "leal " offset "(%0), %%edx \n\t"\
67 "movl (%%edx), %%esi \n\t"\
67 ".balign 16 \n\t" /* FIXME Unroll? */\ 68 ".balign 16 \n\t" /* FIXME Unroll? */\
68 "1: \n\t"\ 69 "1: \n\t"\
69 "movl (%1, %%edx, 4), %%esi \n\t"\ 70 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
70 "movq (%2, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\
71 "movq " #x "(%%esi, %%eax, 2), %%mm2 \n\t" /* srcData */\ 71 "movq " #x "(%%esi, %%eax, 2), %%mm2 \n\t" /* srcData */\
72 "movq 8+" #x "(%%esi, %%eax, 2), %%mm5 \n\t" /* srcData */\ 72 "movq 8+" #x "(%%esi, %%eax, 2), %%mm5 \n\t" /* srcData */\
73 "addl $16, %%edx \n\t"\
74 "movl (%%edx), %%esi \n\t"\
75 "testl %%esi, %%esi \n\t"\
73 "pmulhw %%mm0, %%mm2 \n\t"\ 76 "pmulhw %%mm0, %%mm2 \n\t"\
74 "pmulhw %%mm0, %%mm5 \n\t"\ 77 "pmulhw %%mm0, %%mm5 \n\t"\
75 "paddw %%mm2, %%mm3 \n\t"\ 78 "paddw %%mm2, %%mm3 \n\t"\
76 "paddw %%mm5, %%mm4 \n\t"\ 79 "paddw %%mm5, %%mm4 \n\t"\
77 "addl $1, %%edx \n\t"\
78 " jnz 1b \n\t"\ 80 " jnz 1b \n\t"\
79 "psraw $3, %%mm3 \n\t"\ 81 "psraw $3, %%mm3 \n\t"\
80 "psraw $3, %%mm4 \n\t"\ 82 "psraw $3, %%mm4 \n\t"\
81 "packuswb %%mm4, %%mm3 \n\t"\ 83 "packuswb %%mm4, %%mm3 \n\t"\
82 MOVNTQ(%%mm3, (%3, %%eax))\ 84 MOVNTQ(%%mm3, (%1, %%eax))\
83 "addl $8, %%eax \n\t"\ 85 "addl $8, %%eax \n\t"\
84 "cmpl %4, %%eax \n\t"\ 86 "cmpl %2, %%eax \n\t"\
85 "pxor %%mm3, %%mm3 \n\t"\ 87 "pxor %%mm3, %%mm3 \n\t"\
86 "pxor %%mm4, %%mm4 \n\t"\ 88 "pxor %%mm4, %%mm4 \n\t"\
87 "movl %0, %%edx \n\t"\ 89 "leal " offset "(%0), %%edx \n\t"\
90 "movl (%%edx), %%esi \n\t"\
88 "jb 1b \n\t" 91 "jb 1b \n\t"
89 92
90 #define YSCALEYUV2YV121 \ 93 #define YSCALEYUV2YV121 \
91 "movl %2, %%eax \n\t"\ 94 "movl %2, %%eax \n\t"\
92 ".balign 16 \n\t" /* FIXME Unroll? */\ 95 ".balign 16 \n\t" /* FIXME Unroll? */\
108 : "%eax", "%ebx", "%ecx", "%edx", "%esi" 111 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
109 */ 112 */
110 #define YSCALEYUV2PACKEDX \ 113 #define YSCALEYUV2PACKEDX \
111 "xorl %%eax, %%eax \n\t"\ 114 "xorl %%eax, %%eax \n\t"\
112 ".balign 16 \n\t"\ 115 ".balign 16 \n\t"\
116 "nop \n\t"\
113 "1: \n\t"\ 117 "1: \n\t"\
114 "movl %1, %%edx \n\t" /* -chrFilterSize */\ 118 "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
115 "movl %3, %%ebx \n\t" /* chrMmxFilter+chrFilterSize */\ 119 "movl (%%edx), %%esi \n\t"\
116 "movl %7, %%ecx \n\t" /* chrSrc+chrFilterSize */\
117 "pxor %%mm3, %%mm3 \n\t"\ 120 "pxor %%mm3, %%mm3 \n\t"\
118 "pxor %%mm4, %%mm4 \n\t"\ 121 "pxor %%mm4, %%mm4 \n\t"\
122 ".balign 16 \n\t"\
119 "2: \n\t"\ 123 "2: \n\t"\
120 "movl (%%ecx, %%edx, 4), %%esi \n\t"\ 124 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
121 "movq (%%ebx, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\
122 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\ 125 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\
123 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\ 126 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\
127 "addl $16, %%edx \n\t"\
128 "movl (%%edx), %%esi \n\t"\
124 "pmulhw %%mm0, %%mm2 \n\t"\ 129 "pmulhw %%mm0, %%mm2 \n\t"\
125 "pmulhw %%mm0, %%mm5 \n\t"\ 130 "pmulhw %%mm0, %%mm5 \n\t"\
126 "paddw %%mm2, %%mm3 \n\t"\ 131 "paddw %%mm2, %%mm3 \n\t"\
127 "paddw %%mm5, %%mm4 \n\t"\ 132 "paddw %%mm5, %%mm4 \n\t"\
128 "addl $1, %%edx \n\t"\ 133 "testl %%esi, %%esi \n\t"\
129 " jnz 2b \n\t"\ 134 " jnz 2b \n\t"\
130 \ 135 \
131 "movl %0, %%edx \n\t" /* -lumFilterSize */\ 136 "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\
132 "movl %2, %%ebx \n\t" /* lumMmxFilter+lumFilterSize */\ 137 "movl (%%edx), %%esi \n\t"\
133 "movl %6, %%ecx \n\t" /* lumSrc+lumFilterSize */\
134 "pxor %%mm1, %%mm1 \n\t"\ 138 "pxor %%mm1, %%mm1 \n\t"\
135 "pxor %%mm7, %%mm7 \n\t"\ 139 "pxor %%mm7, %%mm7 \n\t"\
140 ".balign 16 \n\t"\
136 "2: \n\t"\ 141 "2: \n\t"\
137 "movl (%%ecx, %%edx, 4), %%esi \n\t"\ 142 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\
138 "movq (%%ebx, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\
139 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\ 143 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\
140 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\ 144 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\
145 "addl $16, %%edx \n\t"\
146 "movl (%%edx), %%esi \n\t"\
141 "pmulhw %%mm0, %%mm2 \n\t"\ 147 "pmulhw %%mm0, %%mm2 \n\t"\
142 "pmulhw %%mm0, %%mm5 \n\t"\ 148 "pmulhw %%mm0, %%mm5 \n\t"\
143 "paddw %%mm2, %%mm1 \n\t"\ 149 "paddw %%mm2, %%mm1 \n\t"\
144 "paddw %%mm5, %%mm7 \n\t"\ 150 "paddw %%mm5, %%mm7 \n\t"\
145 "addl $1, %%edx \n\t"\ 151 "testl %%esi, %%esi \n\t"\
146 " jnz 2b \n\t"\ 152 " jnz 2b \n\t"\
147 153
148 154
149 #define YSCALEYUV2RGBX \ 155 #define YSCALEYUV2RGBX \
150 YSCALEYUV2PACKEDX\ 156 YSCALEYUV2PACKEDX\
151 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ 157 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\
152 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ 158 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\
153 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ 159 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\
154 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ 160 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\
155 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ 161 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\
156 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ 162 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\
157 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ 163 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\
158 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ 164 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\
159 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ 165 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\
160 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ 166 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\
161 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ 167 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\
162 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ 168 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\
163 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ 169 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\
164 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ 170 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\
165 "paddw %%mm3, %%mm4 \n\t"\ 171 "paddw %%mm3, %%mm4 \n\t"\
166 "movq %%mm2, %%mm0 \n\t"\ 172 "movq %%mm2, %%mm0 \n\t"\
167 "movq %%mm5, %%mm6 \n\t"\ 173 "movq %%mm5, %%mm6 \n\t"\
168 "movq %%mm4, %%mm3 \n\t"\ 174 "movq %%mm4, %%mm3 \n\t"\
181 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ 187 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\
182 "packuswb %%mm0, %%mm2 \n\t"\ 188 "packuswb %%mm0, %%mm2 \n\t"\
183 "packuswb %%mm6, %%mm5 \n\t"\ 189 "packuswb %%mm6, %%mm5 \n\t"\
184 "packuswb %%mm3, %%mm4 \n\t"\ 190 "packuswb %%mm3, %%mm4 \n\t"\
185 "pxor %%mm7, %%mm7 \n\t" 191 "pxor %%mm7, %%mm7 \n\t"
186 192 #if 0
187 #define FULL_YSCALEYUV2RGB \ 193 #define FULL_YSCALEYUV2RGB \
188 "pxor %%mm7, %%mm7 \n\t"\ 194 "pxor %%mm7, %%mm7 \n\t"\
189 "movd %6, %%mm6 \n\t" /*yalpha1*/\ 195 "movd %6, %%mm6 \n\t" /*yalpha1*/\
190 "punpcklwd %%mm6, %%mm6 \n\t"\ 196 "punpcklwd %%mm6, %%mm6 \n\t"\
191 "punpcklwd %%mm6, %%mm6 \n\t"\ 197 "punpcklwd %%mm6, %%mm6 \n\t"\
234 "packuswb %%mm0, %%mm0 \n\t"\ 240 "packuswb %%mm0, %%mm0 \n\t"\
235 "paddw %%mm4, %%mm2 \n\t"\ 241 "paddw %%mm4, %%mm2 \n\t"\
236 "paddw %%mm2, %%mm1 \n\t" /* G*/\ 242 "paddw %%mm2, %%mm1 \n\t" /* G*/\
237 \ 243 \
238 "packuswb %%mm1, %%mm1 \n\t" 244 "packuswb %%mm1, %%mm1 \n\t"
245 #endif
239 246
240 #define YSCALEYUV2PACKED \ 247 #define YSCALEYUV2PACKED \
241 "movd %6, %%mm6 \n\t" /*yalpha1*/\ 248 "movd %6, %%mm6 \n\t" /*yalpha1*/\
242 "punpcklwd %%mm6, %%mm6 \n\t"\ 249 "punpcklwd %%mm6, %%mm6 \n\t"\
243 "punpcklwd %%mm6, %%mm6 \n\t"\ 250 "punpcklwd %%mm6, %%mm6 \n\t"\
740 "addl $8, %%eax \n\t"\ 747 "addl $8, %%eax \n\t"\
741 "cmpl %5, %%eax \n\t"\ 748 "cmpl %5, %%eax \n\t"\
742 " jb 1b \n\t" 749 " jb 1b \n\t"
743 750
744 751
745 static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, 752 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
746 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, 753 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
747 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW, 754 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW,
748 int16_t * lumMmxFilter, int16_t * chrMmxFilter) 755 int32_t * lumMmxFilter, int32_t * chrMmxFilter)
749 { 756 {
757 int dummy=0;
750 #ifdef HAVE_MMX 758 #ifdef HAVE_MMX
751 if(uDest != NULL) 759 if(uDest != NULL)
752 { 760 {
753 asm volatile( 761 asm volatile(
754 YSCALEYUV2YV12X(0) 762 YSCALEYUV2YV12X(0, CHR_MMX_FILTER_OFFSET)
755 :: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), 763 :: "r" (&c->redDither),
756 "r" (chrMmxFilter+chrFilterSize*4), "r" (uDest), "m" (chrDstW) 764 "r" (uDest), "m" (chrDstW)
757 : "%eax", "%edx", "%esi" 765 : "%eax", "%edx", "%esi"
758 ); 766 );
759 767
760 asm volatile( 768 asm volatile(
761 YSCALEYUV2YV12X(4096) 769 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET)
762 :: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), 770 :: "r" (&c->redDither),
763 "r" (chrMmxFilter+chrFilterSize*4), "r" (vDest), "m" (chrDstW) 771 "r" (vDest), "m" (chrDstW)
764 : "%eax", "%edx", "%esi" 772 : "%eax", "%edx", "%esi"
765 ); 773 );
766 } 774 }
767 775
768 asm volatile( 776 asm volatile(
769 YSCALEYUV2YV12X(0) 777 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET)
770 :: "m" (-lumFilterSize), "r" (lumSrc+lumFilterSize), 778 :: "r" (&c->redDither),
771 "r" (lumMmxFilter+lumFilterSize*4), "r" (dest), "m" (dstW) 779 "r" (dest), "m" (dstW)
772 : "%eax", "%edx", "%esi" 780 : "%eax", "%edx", "%esi"
773 ); 781 );
774 #else 782 #else
775 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, 783 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize,
776 chrFilter, chrSrc, chrFilterSize, 784 chrFilter, chrSrc, chrFilterSize,
842 /** 850 /**
843 * vertical scale YV12 to RGB 851 * vertical scale YV12 to RGB
844 */ 852 */
845 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, 853 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
846 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, 854 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
847 uint8_t *dest, int dstW, int16_t * lumMmxFilter, int16_t * chrMmxFilter, int dstY) 855 uint8_t *dest, int dstW, int dstY)
848 { 856 {
857 int dummy=0;
849 switch(c->dstFormat) 858 switch(c->dstFormat)
850 { 859 {
851 #ifdef HAVE_MMX 860 #ifdef HAVE_MMX
852 case IMGFMT_BGR32: 861 case IMGFMT_BGR32:
853 { 862 {
854 asm volatile( 863 asm volatile(
855 YSCALEYUV2RGBX 864 YSCALEYUV2RGBX
856 WRITEBGR32 865 WRITEBGR32
857 866
858 :: "m" (-lumFilterSize), "m" (-chrFilterSize), 867 :: "r" (&c->redDither),
859 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), 868 "m" (dummy), "m" (dummy), "m" (dummy),
860 "r" (dest), "m" (dstW), 869 "r" (dest), "m" (dstW)
861 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) 870 : "%eax", "%edx", "%esi"
862 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
863 ); 871 );
864 } 872 }
865 break; 873 break;
866 case IMGFMT_BGR24: 874 case IMGFMT_BGR24:
867 { 875 {
869 YSCALEYUV2RGBX 877 YSCALEYUV2RGBX
870 "leal (%%eax, %%eax, 2), %%ebx \n\t" //FIXME optimize 878 "leal (%%eax, %%eax, 2), %%ebx \n\t" //FIXME optimize
871 "addl %4, %%ebx \n\t" 879 "addl %4, %%ebx \n\t"
872 WRITEBGR24 880 WRITEBGR24
873 881
874 :: "m" (-lumFilterSize), "m" (-chrFilterSize), 882 :: "r" (&c->redDither),
875 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), 883 "m" (dummy), "m" (dummy), "m" (dummy),
876 "r" (dest), "m" (dstW), 884 "r" (dest), "m" (dstW)
877 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) 885 : "%eax", "%ebx", "%edx", "%esi" //FIXME ebx
878 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
879 ); 886 );
880 } 887 }
881 break; 888 break;
882 case IMGFMT_BGR15: 889 case IMGFMT_BGR15:
883 { 890 {
890 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" 897 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
891 #endif 898 #endif
892 899
893 WRITEBGR15 900 WRITEBGR15
894 901
895 :: "m" (-lumFilterSize), "m" (-chrFilterSize), 902 :: "r" (&c->redDither),
896 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), 903 "m" (dummy), "m" (dummy), "m" (dummy),
897 "r" (dest), "m" (dstW), 904 "r" (dest), "m" (dstW)
898 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) 905 : "%eax", "%edx", "%esi"
899 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
900 ); 906 );
901 } 907 }
902 break; 908 break;
903 case IMGFMT_BGR16: 909 case IMGFMT_BGR16:
904 { 910 {
911 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" 917 "paddusb "MANGLE(r5Dither)", %%mm5\n\t"
912 #endif 918 #endif
913 919
914 WRITEBGR16 920 WRITEBGR16
915 921
916 :: "m" (-lumFilterSize), "m" (-chrFilterSize), 922 :: "r" (&c->redDither),
917 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), 923 "m" (dummy), "m" (dummy), "m" (dummy),
918 "r" (dest), "m" (dstW), 924 "r" (dest), "m" (dstW)
919 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) 925 : "%eax", "%edx", "%esi"
920 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
921 ); 926 );
922 } 927 }
923 break; 928 break;
924 case IMGFMT_YUY2: 929 case IMGFMT_YUY2:
925 { 930 {
931 "psraw $3, %%mm4 \n\t" 936 "psraw $3, %%mm4 \n\t"
932 "psraw $3, %%mm1 \n\t" 937 "psraw $3, %%mm1 \n\t"
933 "psraw $3, %%mm7 \n\t" 938 "psraw $3, %%mm7 \n\t"
934 WRITEYUY2 939 WRITEYUY2
935 940
936 :: "m" (-lumFilterSize), "m" (-chrFilterSize), 941 :: "r" (&c->redDither),
937 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), 942 "m" (dummy), "m" (dummy), "m" (dummy),
938 "r" (dest), "m" (dstW), 943 "r" (dest), "m" (dstW)
939 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) 944 : "%eax", "%edx", "%esi"
940 : "%eax", "%ebx", "%ecx", "%edx", "%esi"
941 ); 945 );
942 } 946 }
943 break; 947 break;
944 #endif 948 #endif
945 default: 949 default:
2526 int16_t *hChrFilterPos= c->hChrFilterPos; 2530 int16_t *hChrFilterPos= c->hChrFilterPos;
2527 int16_t *vLumFilter= c->vLumFilter; 2531 int16_t *vLumFilter= c->vLumFilter;
2528 int16_t *vChrFilter= c->vChrFilter; 2532 int16_t *vChrFilter= c->vChrFilter;
2529 int16_t *hLumFilter= c->hLumFilter; 2533 int16_t *hLumFilter= c->hLumFilter;
2530 int16_t *hChrFilter= c->hChrFilter; 2534 int16_t *hChrFilter= c->hChrFilter;
2531 int16_t *lumMmxFilter= c->lumMmxFilter; 2535 int32_t *lumMmxFilter= c->lumMmxFilter;
2532 int16_t *chrMmxFilter= c->chrMmxFilter; 2536 int32_t *chrMmxFilter= c->chrMmxFilter;
2533 const int vLumFilterSize= c->vLumFilterSize; 2537 const int vLumFilterSize= c->vLumFilterSize;
2534 const int vChrFilterSize= c->vChrFilterSize; 2538 const int vChrFilterSize= c->vChrFilterSize;
2535 const int hLumFilterSize= c->hLumFilterSize; 2539 const int hLumFilterSize= c->hLumFilterSize;
2536 const int hChrFilterSize= c->hChrFilterSize; 2540 const int hChrFilterSize= c->hChrFilterSize;
2537 int16_t **lumPixBuf= c->lumPixBuf; 2541 int16_t **lumPixBuf= c->lumPixBuf;
2727 } 2731 }
2728 else //General YV12 2732 else //General YV12
2729 { 2733 {
2730 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; 2734 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2731 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; 2735 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize;
2732 RENAME(yuv2yuvX)( 2736 int i;
2737 #ifdef HAVE_MMX
2738 for(i=0; i<vLumFilterSize; i++)
2739 {
2740 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
2741 lumMmxFilter[4*i+2]=
2742 lumMmxFilter[4*i+3]=
2743 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
2744 }
2745 for(i=0; i<vChrFilterSize; i++)
2746 {
2747 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
2748 chrMmxFilter[4*i+2]=
2749 chrMmxFilter[4*i+3]=
2750 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
2751 }
2752 #endif
2753 RENAME(yuv2yuvX)(c,
2733 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, 2754 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize,
2734 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, 2755 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2735 dest, uDest, vDest, dstW, chrDstW, 2756 dest, uDest, vDest, dstW, chrDstW,
2736 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+chrDstY*vChrFilterSize*4); 2757 lumMmxFilter, chrMmxFilter);
2737 } 2758 }
2738 } 2759 }
2739 else 2760 else
2740 { 2761 {
2741 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; 2762 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize;
2758 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), 2779 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1),
2759 dest, dstW, lumAlpha, chrAlpha, dstY); 2780 dest, dstW, lumAlpha, chrAlpha, dstY);
2760 } 2781 }
2761 else //General RGB 2782 else //General RGB
2762 { 2783 {
2784 int i;
2785 #ifdef HAVE_MMX
2786 for(i=0; i<vLumFilterSize; i++)
2787 {
2788 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i];
2789 lumMmxFilter[4*i+2]=
2790 lumMmxFilter[4*i+3]=
2791 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001;
2792 }
2793 for(i=0; i<vChrFilterSize; i++)
2794 {
2795 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i];
2796 chrMmxFilter[4*i+2]=
2797 chrMmxFilter[4*i+3]=
2798 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001;
2799 }
2800 #endif
2763 RENAME(yuv2packedX)(c, 2801 RENAME(yuv2packedX)(c,
2764 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, 2802 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize,
2765 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, 2803 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize,
2766 dest, dstW, 2804 dest, dstW, dstY);
2767 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4, dstY);
2768 } 2805 }
2769 } 2806 }
2770 } 2807 }
2771 else // hmm looks like we cant use MMX here without overwriting this arrays tail 2808 else // hmm looks like we cant use MMX here without overwriting this arrays tail
2772 { 2809 {