Mercurial > mplayer.hg
comparison postproc/swscale_template.c @ 9413:0d86fe21b281
cleanup
author | michael |
---|---|
date | Thu, 13 Feb 2003 14:19:05 +0000 |
parents | 25baacd1c650 |
children | 04c6fd75ed96 |
comparison
equal
deleted
inserted
replaced
9412:ed98b850668d | 9413:0d86fe21b281 |
---|---|
57 #define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" | 57 #define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" |
58 #else | 58 #else |
59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" | 59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" |
60 #endif | 60 #endif |
61 | 61 |
62 #define YSCALEYUV2YV12X(x) \ | 62 #define YSCALEYUV2YV12X(x, offset) \ |
63 "xorl %%eax, %%eax \n\t"\ | 63 "xorl %%eax, %%eax \n\t"\ |
64 "pxor %%mm3, %%mm3 \n\t"\ | 64 "pxor %%mm3, %%mm3 \n\t"\ |
65 "pxor %%mm4, %%mm4 \n\t"\ | 65 "pxor %%mm4, %%mm4 \n\t"\ |
66 "movl %0, %%edx \n\t"\ | 66 "leal " offset "(%0), %%edx \n\t"\ |
67 "movl (%%edx), %%esi \n\t"\ | |
67 ".balign 16 \n\t" /* FIXME Unroll? */\ | 68 ".balign 16 \n\t" /* FIXME Unroll? */\ |
68 "1: \n\t"\ | 69 "1: \n\t"\ |
69 "movl (%1, %%edx, 4), %%esi \n\t"\ | 70 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
70 "movq (%2, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\ | |
71 "movq " #x "(%%esi, %%eax, 2), %%mm2 \n\t" /* srcData */\ | 71 "movq " #x "(%%esi, %%eax, 2), %%mm2 \n\t" /* srcData */\ |
72 "movq 8+" #x "(%%esi, %%eax, 2), %%mm5 \n\t" /* srcData */\ | 72 "movq 8+" #x "(%%esi, %%eax, 2), %%mm5 \n\t" /* srcData */\ |
73 "addl $16, %%edx \n\t"\ | |
74 "movl (%%edx), %%esi \n\t"\ | |
75 "testl %%esi, %%esi \n\t"\ | |
73 "pmulhw %%mm0, %%mm2 \n\t"\ | 76 "pmulhw %%mm0, %%mm2 \n\t"\ |
74 "pmulhw %%mm0, %%mm5 \n\t"\ | 77 "pmulhw %%mm0, %%mm5 \n\t"\ |
75 "paddw %%mm2, %%mm3 \n\t"\ | 78 "paddw %%mm2, %%mm3 \n\t"\ |
76 "paddw %%mm5, %%mm4 \n\t"\ | 79 "paddw %%mm5, %%mm4 \n\t"\ |
77 "addl $1, %%edx \n\t"\ | |
78 " jnz 1b \n\t"\ | 80 " jnz 1b \n\t"\ |
79 "psraw $3, %%mm3 \n\t"\ | 81 "psraw $3, %%mm3 \n\t"\ |
80 "psraw $3, %%mm4 \n\t"\ | 82 "psraw $3, %%mm4 \n\t"\ |
81 "packuswb %%mm4, %%mm3 \n\t"\ | 83 "packuswb %%mm4, %%mm3 \n\t"\ |
82 MOVNTQ(%%mm3, (%3, %%eax))\ | 84 MOVNTQ(%%mm3, (%1, %%eax))\ |
83 "addl $8, %%eax \n\t"\ | 85 "addl $8, %%eax \n\t"\ |
84 "cmpl %4, %%eax \n\t"\ | 86 "cmpl %2, %%eax \n\t"\ |
85 "pxor %%mm3, %%mm3 \n\t"\ | 87 "pxor %%mm3, %%mm3 \n\t"\ |
86 "pxor %%mm4, %%mm4 \n\t"\ | 88 "pxor %%mm4, %%mm4 \n\t"\ |
87 "movl %0, %%edx \n\t"\ | 89 "leal " offset "(%0), %%edx \n\t"\ |
90 "movl (%%edx), %%esi \n\t"\ | |
88 "jb 1b \n\t" | 91 "jb 1b \n\t" |
89 | 92 |
90 #define YSCALEYUV2YV121 \ | 93 #define YSCALEYUV2YV121 \ |
91 "movl %2, %%eax \n\t"\ | 94 "movl %2, %%eax \n\t"\ |
92 ".balign 16 \n\t" /* FIXME Unroll? */\ | 95 ".balign 16 \n\t" /* FIXME Unroll? */\ |
108 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | 111 : "%eax", "%ebx", "%ecx", "%edx", "%esi" |
109 */ | 112 */ |
110 #define YSCALEYUV2PACKEDX \ | 113 #define YSCALEYUV2PACKEDX \ |
111 "xorl %%eax, %%eax \n\t"\ | 114 "xorl %%eax, %%eax \n\t"\ |
112 ".balign 16 \n\t"\ | 115 ".balign 16 \n\t"\ |
116 "nop \n\t"\ | |
113 "1: \n\t"\ | 117 "1: \n\t"\ |
114 "movl %1, %%edx \n\t" /* -chrFilterSize */\ | 118 "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ |
115 "movl %3, %%ebx \n\t" /* chrMmxFilter+chrFilterSize */\ | 119 "movl (%%edx), %%esi \n\t"\ |
116 "movl %7, %%ecx \n\t" /* chrSrc+chrFilterSize */\ | |
117 "pxor %%mm3, %%mm3 \n\t"\ | 120 "pxor %%mm3, %%mm3 \n\t"\ |
118 "pxor %%mm4, %%mm4 \n\t"\ | 121 "pxor %%mm4, %%mm4 \n\t"\ |
122 ".balign 16 \n\t"\ | |
119 "2: \n\t"\ | 123 "2: \n\t"\ |
120 "movl (%%ecx, %%edx, 4), %%esi \n\t"\ | 124 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
121 "movq (%%ebx, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\ | |
122 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\ | 125 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\ |
123 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\ | 126 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\ |
127 "addl $16, %%edx \n\t"\ | |
128 "movl (%%edx), %%esi \n\t"\ | |
124 "pmulhw %%mm0, %%mm2 \n\t"\ | 129 "pmulhw %%mm0, %%mm2 \n\t"\ |
125 "pmulhw %%mm0, %%mm5 \n\t"\ | 130 "pmulhw %%mm0, %%mm5 \n\t"\ |
126 "paddw %%mm2, %%mm3 \n\t"\ | 131 "paddw %%mm2, %%mm3 \n\t"\ |
127 "paddw %%mm5, %%mm4 \n\t"\ | 132 "paddw %%mm5, %%mm4 \n\t"\ |
128 "addl $1, %%edx \n\t"\ | 133 "testl %%esi, %%esi \n\t"\ |
129 " jnz 2b \n\t"\ | 134 " jnz 2b \n\t"\ |
130 \ | 135 \ |
131 "movl %0, %%edx \n\t" /* -lumFilterSize */\ | 136 "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ |
132 "movl %2, %%ebx \n\t" /* lumMmxFilter+lumFilterSize */\ | 137 "movl (%%edx), %%esi \n\t"\ |
133 "movl %6, %%ecx \n\t" /* lumSrc+lumFilterSize */\ | |
134 "pxor %%mm1, %%mm1 \n\t"\ | 138 "pxor %%mm1, %%mm1 \n\t"\ |
135 "pxor %%mm7, %%mm7 \n\t"\ | 139 "pxor %%mm7, %%mm7 \n\t"\ |
140 ".balign 16 \n\t"\ | |
136 "2: \n\t"\ | 141 "2: \n\t"\ |
137 "movl (%%ecx, %%edx, 4), %%esi \n\t"\ | 142 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
138 "movq (%%ebx, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\ | |
139 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\ | 143 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\ |
140 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\ | 144 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\ |
145 "addl $16, %%edx \n\t"\ | |
146 "movl (%%edx), %%esi \n\t"\ | |
141 "pmulhw %%mm0, %%mm2 \n\t"\ | 147 "pmulhw %%mm0, %%mm2 \n\t"\ |
142 "pmulhw %%mm0, %%mm5 \n\t"\ | 148 "pmulhw %%mm0, %%mm5 \n\t"\ |
143 "paddw %%mm2, %%mm1 \n\t"\ | 149 "paddw %%mm2, %%mm1 \n\t"\ |
144 "paddw %%mm5, %%mm7 \n\t"\ | 150 "paddw %%mm5, %%mm7 \n\t"\ |
145 "addl $1, %%edx \n\t"\ | 151 "testl %%esi, %%esi \n\t"\ |
146 " jnz 2b \n\t"\ | 152 " jnz 2b \n\t"\ |
147 | 153 |
148 | 154 |
149 #define YSCALEYUV2RGBX \ | 155 #define YSCALEYUV2RGBX \ |
150 YSCALEYUV2PACKEDX\ | 156 YSCALEYUV2PACKEDX\ |
151 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ | 157 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\ |
152 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | 158 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\ |
153 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ | 159 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
154 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ | 160 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
155 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ | 161 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\ |
156 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | 162 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\ |
157 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ | 163 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
158 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ | 164 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\ |
159 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | 165 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\ |
160 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | 166 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\ |
161 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | 167 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\ |
162 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | 168 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\ |
163 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | 169 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\ |
164 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ | 170 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
165 "paddw %%mm3, %%mm4 \n\t"\ | 171 "paddw %%mm3, %%mm4 \n\t"\ |
166 "movq %%mm2, %%mm0 \n\t"\ | 172 "movq %%mm2, %%mm0 \n\t"\ |
167 "movq %%mm5, %%mm6 \n\t"\ | 173 "movq %%mm5, %%mm6 \n\t"\ |
168 "movq %%mm4, %%mm3 \n\t"\ | 174 "movq %%mm4, %%mm3 \n\t"\ |
181 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ | 187 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
182 "packuswb %%mm0, %%mm2 \n\t"\ | 188 "packuswb %%mm0, %%mm2 \n\t"\ |
183 "packuswb %%mm6, %%mm5 \n\t"\ | 189 "packuswb %%mm6, %%mm5 \n\t"\ |
184 "packuswb %%mm3, %%mm4 \n\t"\ | 190 "packuswb %%mm3, %%mm4 \n\t"\ |
185 "pxor %%mm7, %%mm7 \n\t" | 191 "pxor %%mm7, %%mm7 \n\t" |
186 | 192 #if 0 |
187 #define FULL_YSCALEYUV2RGB \ | 193 #define FULL_YSCALEYUV2RGB \ |
188 "pxor %%mm7, %%mm7 \n\t"\ | 194 "pxor %%mm7, %%mm7 \n\t"\ |
189 "movd %6, %%mm6 \n\t" /*yalpha1*/\ | 195 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
190 "punpcklwd %%mm6, %%mm6 \n\t"\ | 196 "punpcklwd %%mm6, %%mm6 \n\t"\ |
191 "punpcklwd %%mm6, %%mm6 \n\t"\ | 197 "punpcklwd %%mm6, %%mm6 \n\t"\ |
234 "packuswb %%mm0, %%mm0 \n\t"\ | 240 "packuswb %%mm0, %%mm0 \n\t"\ |
235 "paddw %%mm4, %%mm2 \n\t"\ | 241 "paddw %%mm4, %%mm2 \n\t"\ |
236 "paddw %%mm2, %%mm1 \n\t" /* G*/\ | 242 "paddw %%mm2, %%mm1 \n\t" /* G*/\ |
237 \ | 243 \ |
238 "packuswb %%mm1, %%mm1 \n\t" | 244 "packuswb %%mm1, %%mm1 \n\t" |
245 #endif | |
239 | 246 |
240 #define YSCALEYUV2PACKED \ | 247 #define YSCALEYUV2PACKED \ |
241 "movd %6, %%mm6 \n\t" /*yalpha1*/\ | 248 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
242 "punpcklwd %%mm6, %%mm6 \n\t"\ | 249 "punpcklwd %%mm6, %%mm6 \n\t"\ |
243 "punpcklwd %%mm6, %%mm6 \n\t"\ | 250 "punpcklwd %%mm6, %%mm6 \n\t"\ |
740 "addl $8, %%eax \n\t"\ | 747 "addl $8, %%eax \n\t"\ |
741 "cmpl %5, %%eax \n\t"\ | 748 "cmpl %5, %%eax \n\t"\ |
742 " jb 1b \n\t" | 749 " jb 1b \n\t" |
743 | 750 |
744 | 751 |
745 static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | 752 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
746 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | 753 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
747 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW, | 754 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW, |
748 int16_t * lumMmxFilter, int16_t * chrMmxFilter) | 755 int32_t * lumMmxFilter, int32_t * chrMmxFilter) |
749 { | 756 { |
757 int dummy=0; | |
750 #ifdef HAVE_MMX | 758 #ifdef HAVE_MMX |
751 if(uDest != NULL) | 759 if(uDest != NULL) |
752 { | 760 { |
753 asm volatile( | 761 asm volatile( |
754 YSCALEYUV2YV12X(0) | 762 YSCALEYUV2YV12X(0, CHR_MMX_FILTER_OFFSET) |
755 :: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), | 763 :: "r" (&c->redDither), |
756 "r" (chrMmxFilter+chrFilterSize*4), "r" (uDest), "m" (chrDstW) | 764 "r" (uDest), "m" (chrDstW) |
757 : "%eax", "%edx", "%esi" | 765 : "%eax", "%edx", "%esi" |
758 ); | 766 ); |
759 | 767 |
760 asm volatile( | 768 asm volatile( |
761 YSCALEYUV2YV12X(4096) | 769 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET) |
762 :: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), | 770 :: "r" (&c->redDither), |
763 "r" (chrMmxFilter+chrFilterSize*4), "r" (vDest), "m" (chrDstW) | 771 "r" (vDest), "m" (chrDstW) |
764 : "%eax", "%edx", "%esi" | 772 : "%eax", "%edx", "%esi" |
765 ); | 773 ); |
766 } | 774 } |
767 | 775 |
768 asm volatile( | 776 asm volatile( |
769 YSCALEYUV2YV12X(0) | 777 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET) |
770 :: "m" (-lumFilterSize), "r" (lumSrc+lumFilterSize), | 778 :: "r" (&c->redDither), |
771 "r" (lumMmxFilter+lumFilterSize*4), "r" (dest), "m" (dstW) | 779 "r" (dest), "m" (dstW) |
772 : "%eax", "%edx", "%esi" | 780 : "%eax", "%edx", "%esi" |
773 ); | 781 ); |
774 #else | 782 #else |
775 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, | 783 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, |
776 chrFilter, chrSrc, chrFilterSize, | 784 chrFilter, chrSrc, chrFilterSize, |
842 /** | 850 /** |
843 * vertical scale YV12 to RGB | 851 * vertical scale YV12 to RGB |
844 */ | 852 */ |
845 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | 853 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
846 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | 854 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
847 uint8_t *dest, int dstW, int16_t * lumMmxFilter, int16_t * chrMmxFilter, int dstY) | 855 uint8_t *dest, int dstW, int dstY) |
848 { | 856 { |
857 int dummy=0; | |
849 switch(c->dstFormat) | 858 switch(c->dstFormat) |
850 { | 859 { |
851 #ifdef HAVE_MMX | 860 #ifdef HAVE_MMX |
852 case IMGFMT_BGR32: | 861 case IMGFMT_BGR32: |
853 { | 862 { |
854 asm volatile( | 863 asm volatile( |
855 YSCALEYUV2RGBX | 864 YSCALEYUV2RGBX |
856 WRITEBGR32 | 865 WRITEBGR32 |
857 | 866 |
858 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | 867 :: "r" (&c->redDither), |
859 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | 868 "m" (dummy), "m" (dummy), "m" (dummy), |
860 "r" (dest), "m" (dstW), | 869 "r" (dest), "m" (dstW) |
861 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | 870 : "%eax", "%edx", "%esi" |
862 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
863 ); | 871 ); |
864 } | 872 } |
865 break; | 873 break; |
866 case IMGFMT_BGR24: | 874 case IMGFMT_BGR24: |
867 { | 875 { |
869 YSCALEYUV2RGBX | 877 YSCALEYUV2RGBX |
870 "leal (%%eax, %%eax, 2), %%ebx \n\t" //FIXME optimize | 878 "leal (%%eax, %%eax, 2), %%ebx \n\t" //FIXME optimize |
871 "addl %4, %%ebx \n\t" | 879 "addl %4, %%ebx \n\t" |
872 WRITEBGR24 | 880 WRITEBGR24 |
873 | 881 |
874 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | 882 :: "r" (&c->redDither), |
875 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | 883 "m" (dummy), "m" (dummy), "m" (dummy), |
876 "r" (dest), "m" (dstW), | 884 "r" (dest), "m" (dstW) |
877 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | 885 : "%eax", "%ebx", "%edx", "%esi" //FIXME ebx |
878 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
879 ); | 886 ); |
880 } | 887 } |
881 break; | 888 break; |
882 case IMGFMT_BGR15: | 889 case IMGFMT_BGR15: |
883 { | 890 { |
890 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | 897 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" |
891 #endif | 898 #endif |
892 | 899 |
893 WRITEBGR15 | 900 WRITEBGR15 |
894 | 901 |
895 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | 902 :: "r" (&c->redDither), |
896 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | 903 "m" (dummy), "m" (dummy), "m" (dummy), |
897 "r" (dest), "m" (dstW), | 904 "r" (dest), "m" (dstW) |
898 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | 905 : "%eax", "%edx", "%esi" |
899 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
900 ); | 906 ); |
901 } | 907 } |
902 break; | 908 break; |
903 case IMGFMT_BGR16: | 909 case IMGFMT_BGR16: |
904 { | 910 { |
911 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | 917 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" |
912 #endif | 918 #endif |
913 | 919 |
914 WRITEBGR16 | 920 WRITEBGR16 |
915 | 921 |
916 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | 922 :: "r" (&c->redDither), |
917 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | 923 "m" (dummy), "m" (dummy), "m" (dummy), |
918 "r" (dest), "m" (dstW), | 924 "r" (dest), "m" (dstW) |
919 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | 925 : "%eax", "%edx", "%esi" |
920 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
921 ); | 926 ); |
922 } | 927 } |
923 break; | 928 break; |
924 case IMGFMT_YUY2: | 929 case IMGFMT_YUY2: |
925 { | 930 { |
931 "psraw $3, %%mm4 \n\t" | 936 "psraw $3, %%mm4 \n\t" |
932 "psraw $3, %%mm1 \n\t" | 937 "psraw $3, %%mm1 \n\t" |
933 "psraw $3, %%mm7 \n\t" | 938 "psraw $3, %%mm7 \n\t" |
934 WRITEYUY2 | 939 WRITEYUY2 |
935 | 940 |
936 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | 941 :: "r" (&c->redDither), |
937 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | 942 "m" (dummy), "m" (dummy), "m" (dummy), |
938 "r" (dest), "m" (dstW), | 943 "r" (dest), "m" (dstW) |
939 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | 944 : "%eax", "%edx", "%esi" |
940 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
941 ); | 945 ); |
942 } | 946 } |
943 break; | 947 break; |
944 #endif | 948 #endif |
945 default: | 949 default: |
2526 int16_t *hChrFilterPos= c->hChrFilterPos; | 2530 int16_t *hChrFilterPos= c->hChrFilterPos; |
2527 int16_t *vLumFilter= c->vLumFilter; | 2531 int16_t *vLumFilter= c->vLumFilter; |
2528 int16_t *vChrFilter= c->vChrFilter; | 2532 int16_t *vChrFilter= c->vChrFilter; |
2529 int16_t *hLumFilter= c->hLumFilter; | 2533 int16_t *hLumFilter= c->hLumFilter; |
2530 int16_t *hChrFilter= c->hChrFilter; | 2534 int16_t *hChrFilter= c->hChrFilter; |
2531 int16_t *lumMmxFilter= c->lumMmxFilter; | 2535 int32_t *lumMmxFilter= c->lumMmxFilter; |
2532 int16_t *chrMmxFilter= c->chrMmxFilter; | 2536 int32_t *chrMmxFilter= c->chrMmxFilter; |
2533 const int vLumFilterSize= c->vLumFilterSize; | 2537 const int vLumFilterSize= c->vLumFilterSize; |
2534 const int vChrFilterSize= c->vChrFilterSize; | 2538 const int vChrFilterSize= c->vChrFilterSize; |
2535 const int hLumFilterSize= c->hLumFilterSize; | 2539 const int hLumFilterSize= c->hLumFilterSize; |
2536 const int hChrFilterSize= c->hChrFilterSize; | 2540 const int hChrFilterSize= c->hChrFilterSize; |
2537 int16_t **lumPixBuf= c->lumPixBuf; | 2541 int16_t **lumPixBuf= c->lumPixBuf; |
2727 } | 2731 } |
2728 else //General YV12 | 2732 else //General YV12 |
2729 { | 2733 { |
2730 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | 2734 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; |
2731 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | 2735 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; |
2732 RENAME(yuv2yuvX)( | 2736 int i; |
2737 #ifdef HAVE_MMX | |
2738 for(i=0; i<vLumFilterSize; i++) | |
2739 { | |
2740 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i]; | |
2741 lumMmxFilter[4*i+2]= | |
2742 lumMmxFilter[4*i+3]= | |
2743 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001; | |
2744 } | |
2745 for(i=0; i<vChrFilterSize; i++) | |
2746 { | |
2747 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i]; | |
2748 chrMmxFilter[4*i+2]= | |
2749 chrMmxFilter[4*i+3]= | |
2750 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001; | |
2751 } | |
2752 #endif | |
2753 RENAME(yuv2yuvX)(c, | |
2733 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, | 2754 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
2734 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | 2755 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
2735 dest, uDest, vDest, dstW, chrDstW, | 2756 dest, uDest, vDest, dstW, chrDstW, |
2736 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+chrDstY*vChrFilterSize*4); | 2757 lumMmxFilter, chrMmxFilter); |
2737 } | 2758 } |
2738 } | 2759 } |
2739 else | 2760 else |
2740 { | 2761 { |
2741 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | 2762 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; |
2758 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), | 2779 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), |
2759 dest, dstW, lumAlpha, chrAlpha, dstY); | 2780 dest, dstW, lumAlpha, chrAlpha, dstY); |
2760 } | 2781 } |
2761 else //General RGB | 2782 else //General RGB |
2762 { | 2783 { |
2784 int i; | |
2785 #ifdef HAVE_MMX | |
2786 for(i=0; i<vLumFilterSize; i++) | |
2787 { | |
2788 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i]; | |
2789 lumMmxFilter[4*i+2]= | |
2790 lumMmxFilter[4*i+3]= | |
2791 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001; | |
2792 } | |
2793 for(i=0; i<vChrFilterSize; i++) | |
2794 { | |
2795 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i]; | |
2796 chrMmxFilter[4*i+2]= | |
2797 chrMmxFilter[4*i+3]= | |
2798 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001; | |
2799 } | |
2800 #endif | |
2763 RENAME(yuv2packedX)(c, | 2801 RENAME(yuv2packedX)(c, |
2764 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | 2802 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
2765 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | 2803 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
2766 dest, dstW, | 2804 dest, dstW, dstY); |
2767 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4, dstY); | |
2768 } | 2805 } |
2769 } | 2806 } |
2770 } | 2807 } |
2771 else // hmm looks like we cant use MMX here without overwriting this arrays tail | 2808 else // hmm looks like we cant use MMX here without overwriting this arrays tail |
2772 { | 2809 { |