Mercurial > mplayer.hg
annotate postproc/swscale_template.c @ 9417:5eea6d903b4c
cleanup
author | michael |
---|---|
date | Thu, 13 Feb 2003 21:38:43 +0000 |
parents | 04c6fd75ed96 |
children | 53f03173e48f |
rev | line source |
---|---|
4295 | 1 /* |
2 Copyright (C) 2001-2002 Michael Niedermayer <michaelni@gmx.at> | |
2216 | 3 |
4295 | 4 This program is free software; you can redistribute it and/or modify |
5 it under the terms of the GNU General Public License as published by | |
6 the Free Software Foundation; either version 2 of the License, or | |
7 (at your option) any later version. | |
2216 | 8 |
4295 | 9 This program is distributed in the hope that it will be useful, |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 GNU General Public License for more details. | |
13 | |
14 You should have received a copy of the GNU General Public License | |
15 along with this program; if not, write to the Free Software | |
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 */ | |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
18 |
2540 | 19 #undef MOVNTQ |
2680 | 20 #undef PAVGB |
3136 | 21 #undef PREFETCH |
22 #undef PREFETCHW | |
23 #undef EMMS | |
24 #undef SFENCE | |
25 | |
26 #ifdef HAVE_3DNOW | |
27 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |
28 #define EMMS "femms" | |
29 #else | |
30 #define EMMS "emms" | |
31 #endif | |
32 | |
33 #ifdef HAVE_3DNOW | |
34 #define PREFETCH "prefetch" | |
35 #define PREFETCHW "prefetchw" | |
36 #elif defined ( HAVE_MMX2 ) | |
37 #define PREFETCH "prefetchnta" | |
38 #define PREFETCHW "prefetcht0" | |
39 #else | |
40 #define PREFETCH "/nop" | |
41 #define PREFETCHW "/nop" | |
42 #endif | |
43 | |
44 #ifdef HAVE_MMX2 | |
45 #define SFENCE "sfence" | |
46 #else | |
47 #define SFENCE "/nop" | |
48 #endif | |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
49 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
50 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
51 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
52 #elif defined (HAVE_3DNOW) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
53 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
54 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
55 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
56 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
57 #define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
58 #else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
60 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
61 |
9413 | 62 #define YSCALEYUV2YV12X(x, offset) \ |
3344 | 63 "xorl %%eax, %%eax \n\t"\ |
64 "pxor %%mm3, %%mm3 \n\t"\ | |
65 "pxor %%mm4, %%mm4 \n\t"\ | |
9413 | 66 "leal " offset "(%0), %%edx \n\t"\ |
67 "movl (%%edx), %%esi \n\t"\ | |
3344 | 68 ".balign 16 \n\t" /* FIXME Unroll? */\ |
69 "1: \n\t"\ | |
9413 | 70 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
3344 | 71 "movq " #x "(%%esi, %%eax, 2), %%mm2 \n\t" /* srcData */\ |
72 "movq 8+" #x "(%%esi, %%eax, 2), %%mm5 \n\t" /* srcData */\ | |
9413 | 73 "addl $16, %%edx \n\t"\ |
74 "movl (%%edx), %%esi \n\t"\ | |
75 "testl %%esi, %%esi \n\t"\ | |
3344 | 76 "pmulhw %%mm0, %%mm2 \n\t"\ |
77 "pmulhw %%mm0, %%mm5 \n\t"\ | |
78 "paddw %%mm2, %%mm3 \n\t"\ | |
79 "paddw %%mm5, %%mm4 \n\t"\ | |
80 " jnz 1b \n\t"\ | |
81 "psraw $3, %%mm3 \n\t"\ | |
82 "psraw $3, %%mm4 \n\t"\ | |
83 "packuswb %%mm4, %%mm3 \n\t"\ | |
9413 | 84 MOVNTQ(%%mm3, (%1, %%eax))\ |
3344 | 85 "addl $8, %%eax \n\t"\ |
9413 | 86 "cmpl %2, %%eax \n\t"\ |
3344 | 87 "pxor %%mm3, %%mm3 \n\t"\ |
88 "pxor %%mm4, %%mm4 \n\t"\ | |
9413 | 89 "leal " offset "(%0), %%edx \n\t"\ |
90 "movl (%%edx), %%esi \n\t"\ | |
3344 | 91 "jb 1b \n\t" |
92 | |
93 #define YSCALEYUV2YV121 \ | |
94 "movl %2, %%eax \n\t"\ | |
95 ".balign 16 \n\t" /* FIXME Unroll? */\ | |
96 "1: \n\t"\ | |
97 "movq (%0, %%eax, 2), %%mm0 \n\t"\ | |
98 "movq 8(%0, %%eax, 2), %%mm1 \n\t"\ | |
99 "psraw $7, %%mm0 \n\t"\ | |
100 "psraw $7, %%mm1 \n\t"\ | |
101 "packuswb %%mm1, %%mm0 \n\t"\ | |
102 MOVNTQ(%%mm0, (%1, %%eax))\ | |
103 "addl $8, %%eax \n\t"\ | |
104 "jnc 1b \n\t" | |
105 | |
106 /* | |
107 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
108 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
109 "r" (dest), "m" (dstW), | |
110 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
111 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
112 */ | |
7723 | 113 #define YSCALEYUV2PACKEDX \ |
3344 | 114 "xorl %%eax, %%eax \n\t"\ |
115 ".balign 16 \n\t"\ | |
9413 | 116 "nop \n\t"\ |
3344 | 117 "1: \n\t"\ |
9413 | 118 "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ |
119 "movl (%%edx), %%esi \n\t"\ | |
3344 | 120 "pxor %%mm3, %%mm3 \n\t"\ |
121 "pxor %%mm4, %%mm4 \n\t"\ | |
9413 | 122 ".balign 16 \n\t"\ |
3344 | 123 "2: \n\t"\ |
9413 | 124 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
3344 | 125 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\ |
126 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\ | |
9413 | 127 "addl $16, %%edx \n\t"\ |
128 "movl (%%edx), %%esi \n\t"\ | |
3344 | 129 "pmulhw %%mm0, %%mm2 \n\t"\ |
130 "pmulhw %%mm0, %%mm5 \n\t"\ | |
131 "paddw %%mm2, %%mm3 \n\t"\ | |
132 "paddw %%mm5, %%mm4 \n\t"\ | |
9413 | 133 "testl %%esi, %%esi \n\t"\ |
3344 | 134 " jnz 2b \n\t"\ |
135 \ | |
9413 | 136 "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ |
137 "movl (%%edx), %%esi \n\t"\ | |
3344 | 138 "pxor %%mm1, %%mm1 \n\t"\ |
139 "pxor %%mm7, %%mm7 \n\t"\ | |
9413 | 140 ".balign 16 \n\t"\ |
3344 | 141 "2: \n\t"\ |
9413 | 142 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
3344 | 143 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\ |
144 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\ | |
9413 | 145 "addl $16, %%edx \n\t"\ |
146 "movl (%%edx), %%esi \n\t"\ | |
3344 | 147 "pmulhw %%mm0, %%mm2 \n\t"\ |
148 "pmulhw %%mm0, %%mm5 \n\t"\ | |
149 "paddw %%mm2, %%mm1 \n\t"\ | |
150 "paddw %%mm5, %%mm7 \n\t"\ | |
9413 | 151 "testl %%esi, %%esi \n\t"\ |
3344 | 152 " jnz 2b \n\t"\ |
7723 | 153 |
154 | |
155 #define YSCALEYUV2RGBX \ | |
156 YSCALEYUV2PACKEDX\ | |
9413 | 157 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\ |
158 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\ | |
3344 | 159 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
160 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ | |
9413 | 161 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\ |
162 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\ | |
3344 | 163 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
9413 | 164 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\ |
165 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\ | |
166 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\ | |
167 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\ | |
168 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\ | |
169 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\ | |
3344 | 170 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
171 "paddw %%mm3, %%mm4 \n\t"\ | |
172 "movq %%mm2, %%mm0 \n\t"\ | |
173 "movq %%mm5, %%mm6 \n\t"\ | |
174 "movq %%mm4, %%mm3 \n\t"\ | |
175 "punpcklwd %%mm2, %%mm2 \n\t"\ | |
176 "punpcklwd %%mm5, %%mm5 \n\t"\ | |
177 "punpcklwd %%mm4, %%mm4 \n\t"\ | |
178 "paddw %%mm1, %%mm2 \n\t"\ | |
179 "paddw %%mm1, %%mm5 \n\t"\ | |
180 "paddw %%mm1, %%mm4 \n\t"\ | |
181 "punpckhwd %%mm0, %%mm0 \n\t"\ | |
182 "punpckhwd %%mm6, %%mm6 \n\t"\ | |
183 "punpckhwd %%mm3, %%mm3 \n\t"\ | |
184 "paddw %%mm7, %%mm0 \n\t"\ | |
185 "paddw %%mm7, %%mm6 \n\t"\ | |
186 "paddw %%mm7, %%mm3 \n\t"\ | |
187 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ | |
188 "packuswb %%mm0, %%mm2 \n\t"\ | |
189 "packuswb %%mm6, %%mm5 \n\t"\ | |
190 "packuswb %%mm3, %%mm4 \n\t"\ | |
191 "pxor %%mm7, %%mm7 \n\t" | |
9413 | 192 #if 0 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
193 #define FULL_YSCALEYUV2RGB \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
194 "pxor %%mm7, %%mm7 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
195 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
196 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
197 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
198 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
199 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
200 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
201 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
202 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
203 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
204 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
205 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
206 "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
207 "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
208 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
209 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
210 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
211 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
212 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
213 "movq 4096(%2, %%eax,2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
214 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
215 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
216 "movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
217 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
218 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
4248 | 219 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ |
220 "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\ | |
221 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
222 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
223 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
224 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
225 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
4248 | 226 "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
227 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
4248 | 228 "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
229 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
4248 | 230 "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
231 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
232 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
233 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\ |
4248 | 234 "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\ |
235 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
236 "paddw %%mm1, %%mm3 \n\t" /* B*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
237 "paddw %%mm1, %%mm0 \n\t" /* R*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
238 "packuswb %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
239 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
240 "packuswb %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
241 "paddw %%mm4, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
242 "paddw %%mm2, %%mm1 \n\t" /* G*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
243 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
244 "packuswb %%mm1, %%mm1 \n\t" |
9413 | 245 #endif |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
246 |
9414 | 247 #define YSCALEYUV2PACKED(index, c) \ |
248 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\ | |
249 "movq "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm1\n\t"\ | |
250 "psraw $3, %%mm0 \n\t"\ | |
251 "psraw $3, %%mm1 \n\t"\ | |
252 "movq %%mm0, "CHR_MMX_FILTER_OFFSET"+8("#c")\n\t"\ | |
253 "movq %%mm1, "LUM_MMX_FILTER_OFFSET"+8("#c")\n\t"\ | |
254 "xorl "#index", "#index" \n\t"\ | |
7723 | 255 ".balign 16 \n\t"\ |
256 "1: \n\t"\ | |
9414 | 257 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
258 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |
259 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | |
260 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | |
7723 | 261 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
262 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ | |
9414 | 263 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\ |
7723 | 264 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
265 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ | |
266 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ | |
267 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ | |
268 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ | |
269 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ | |
9414 | 270 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ |
271 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ | |
272 "movq 8(%0, "#index", 2), %%mm6 \n\t" /*buf0[eax]*/\ | |
273 "movq 8(%1, "#index", 2), %%mm7 \n\t" /*buf1[eax]*/\ | |
7723 | 274 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
275 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ | |
9414 | 276 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
277 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ | |
7723 | 278 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
279 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ | |
280 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ | |
281 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ | |
282 | |
9414 | 283 #define YSCALEYUV2RGB(index, c) \ |
284 "xorl "#index", "#index" \n\t"\ | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
285 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
286 "1: \n\t"\ |
9414 | 287 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
288 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |
289 "movq 4096(%2, "#index"), %%mm5\n\t" /* uvbuf0[eax+2048]*/\ | |
290 "movq 4096(%3, "#index"), %%mm4\n\t" /* uvbuf1[eax+2048]*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
291 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
292 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
9414 | 293 "movq "CHR_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
294 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
295 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
296 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
297 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
298 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
299 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
9414 | 300 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ |
301 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
302 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
303 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
9414 | 304 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\ |
305 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
306 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
9414 | 307 "movq (%0, "#index", 2), %%mm0 \n\t" /*buf0[eax]*/\ |
308 "movq (%1, "#index", 2), %%mm1 \n\t" /*buf1[eax]*/\ | |
309 "movq 8(%0, "#index", 2), %%mm6\n\t" /*buf0[eax]*/\ | |
310 "movq 8(%1, "#index", 2), %%mm7\n\t" /*buf1[eax]*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
311 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
312 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ |
9414 | 313 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm0\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
314 "pmulhw "LUM_MMX_FILTER_OFFSET"+8("#c"), %%mm6\n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
315 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
316 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
317 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
318 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
9414 | 319 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\ |
320 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\ | |
321 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ | |
322 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ | |
323 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ | |
324 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
325 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
326 "paddw %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
327 "movq %%mm2, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
328 "movq %%mm5, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
329 "movq %%mm4, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
330 "punpcklwd %%mm2, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
331 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
332 "punpcklwd %%mm4, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
333 "paddw %%mm1, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
334 "paddw %%mm1, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
335 "paddw %%mm1, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
336 "punpckhwd %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
337 "punpckhwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
338 "punpckhwd %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
339 "paddw %%mm7, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
340 "paddw %%mm7, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
341 "paddw %%mm7, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
342 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
343 "packuswb %%mm0, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
344 "packuswb %%mm6, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
345 "packuswb %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
346 "pxor %%mm7, %%mm7 \n\t" |
7723 | 347 |
9417 | 348 #define YSCALEYUV2PACKED1(index, c) \ |
349 "xorl "#index", "#index" \n\t"\ | |
7723 | 350 ".balign 16 \n\t"\ |
351 "1: \n\t"\ | |
9417 | 352 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
353 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | |
7723 | 354 "psraw $7, %%mm3 \n\t" \ |
355 "psraw $7, %%mm4 \n\t" \ | |
9417 | 356 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
357 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ | |
7723 | 358 "psraw $7, %%mm1 \n\t" \ |
359 "psraw $7, %%mm7 \n\t" \ | |
360 | |
9417 | 361 #define YSCALEYUV2RGB1(index, c) \ |
362 "xorl "#index", "#index" \n\t"\ | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
363 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
364 "1: \n\t"\ |
9417 | 365 "movq (%2, "#index"), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
366 "movq 4096(%2, "#index"), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
367 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
368 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
9417 | 369 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ |
370 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
371 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
372 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
9417 | 373 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\ |
374 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
375 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
9417 | 376 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
377 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
378 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
379 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
9417 | 380 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\ |
381 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\ | |
382 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ | |
383 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ | |
384 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ | |
385 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
386 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
387 "paddw %%mm3, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
388 "movq %%mm2, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
389 "movq %%mm5, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
390 "movq %%mm4, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
391 "punpcklwd %%mm2, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
392 "punpcklwd %%mm5, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
393 "punpcklwd %%mm4, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
394 "paddw %%mm1, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
395 "paddw %%mm1, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
396 "paddw %%mm1, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
397 "punpckhwd %%mm0, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
398 "punpckhwd %%mm6, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
399 "punpckhwd %%mm3, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
400 "paddw %%mm7, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
401 "paddw %%mm7, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
402 "paddw %%mm7, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
403 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
404 "packuswb %%mm0, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
405 "packuswb %%mm6, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
406 "packuswb %%mm3, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
407 "pxor %%mm7, %%mm7 \n\t" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
408 |
9417 | 409 #define YSCALEYUV2PACKED1b(index, c) \ |
410 "xorl "#index", "#index" \n\t"\ | |
7723 | 411 ".balign 16 \n\t"\ |
412 "1: \n\t"\ | |
9417 | 413 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
414 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |
415 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | |
416 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | |
7723 | 417 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ |
418 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ | |
419 "psrlw $8, %%mm3 \n\t" \ | |
420 "psrlw $8, %%mm4 \n\t" \ | |
9417 | 421 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
422 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ | |
7723 | 423 "psraw $7, %%mm1 \n\t" \ |
424 "psraw $7, %%mm7 \n\t" | |
425 | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
426 // do vertical chrominance interpolation |
9417 | 427 #define YSCALEYUV2RGB1b(index, c) \ |
428 "xorl "#index", "#index" \n\t"\ | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
429 ".balign 16 \n\t"\ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
430 "1: \n\t"\ |
9417 | 431 "movq (%2, "#index"), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
432 "movq (%3, "#index"), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |
433 "movq 4096(%2, "#index"), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | |
434 "movq 4096(%3, "#index"), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | |
2576 | 435 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ |
436 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ | |
3344 | 437 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ |
438 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ | |
9417 | 439 "psubw "U_OFFSET"("#c"), %%mm3 \n\t" /* (U-128)8*/\ |
440 "psubw "V_OFFSET"("#c"), %%mm4 \n\t" /* (V-128)8*/\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
441 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
442 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
9417 | 443 "pmulhw "UG_COEFF"("#c"), %%mm3\n\t"\ |
444 "pmulhw "VG_COEFF"("#c"), %%mm4\n\t"\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
445 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
9417 | 446 "movq (%0, "#index", 2), %%mm1 \n\t" /*buf0[eax]*/\ |
447 "movq 8(%0, "#index", 2), %%mm7 \n\t" /*buf0[eax]*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
448 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
449 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
9417 | 450 "pmulhw "UB_COEFF"("#c"), %%mm2\n\t"\ |
451 "pmulhw "VR_COEFF"("#c"), %%mm5\n\t"\ | |
452 "psubw "Y_OFFSET"("#c"), %%mm1 \n\t" /* 8(Y-16)*/\ | |
453 "psubw "Y_OFFSET"("#c"), %%mm7 \n\t" /* 8(Y-16)*/\ | |
454 "pmulhw "Y_COEFF"("#c"), %%mm1 \n\t"\ | |
455 "pmulhw "Y_COEFF"("#c"), %%mm7 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
456 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
457 "paddw %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
458 "movq %%mm2, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
459 "movq %%mm5, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
460 "movq %%mm4, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
461 "punpcklwd %%mm2, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
462 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
463 "punpcklwd %%mm4, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
464 "paddw %%mm1, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
465 "paddw %%mm1, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
466 "paddw %%mm1, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
467 "punpckhwd %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
468 "punpckhwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
469 "punpckhwd %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
470 "paddw %%mm7, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
471 "paddw %%mm7, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
472 "paddw %%mm7, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
473 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
474 "packuswb %%mm0, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
475 "packuswb %%mm6, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
476 "packuswb %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
477 "pxor %%mm7, %%mm7 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
478 |
9414 | 479 #define WRITEBGR32(dst, dstw, index) \ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
480 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
481 "movq %%mm2, %%mm1 \n\t" /* B */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
482 "movq %%mm5, %%mm6 \n\t" /* R */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
483 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
484 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
485 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
486 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
487 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
488 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
489 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
490 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
491 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
492 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
493 \ |
9414 | 494 MOVNTQ(%%mm0, (dst, index, 4))\ |
495 MOVNTQ(%%mm2, 8(dst, index, 4))\ | |
496 MOVNTQ(%%mm1, 16(dst, index, 4))\ | |
497 MOVNTQ(%%mm3, 24(dst, index, 4))\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
498 \ |
9414 | 499 "addl $8, "#index" \n\t"\ |
500 "cmpl "#dstw", "#index" \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
501 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
502 |
9414 | 503 #define WRITEBGR16(dst, dstw, index) \ |
4248 | 504 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ |
505 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ | |
506 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ | |
2669 | 507 "psrlq $3, %%mm2 \n\t"\ |
508 \ | |
509 "movq %%mm2, %%mm1 \n\t"\ | |
510 "movq %%mm4, %%mm3 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
511 \ |
2669 | 512 "punpcklbw %%mm7, %%mm3 \n\t"\ |
513 "punpcklbw %%mm5, %%mm2 \n\t"\ | |
514 "punpckhbw %%mm7, %%mm4 \n\t"\ | |
515 "punpckhbw %%mm5, %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
516 \ |
2669 | 517 "psllq $3, %%mm3 \n\t"\ |
518 "psllq $3, %%mm4 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
519 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
520 "por %%mm3, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
521 "por %%mm4, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
522 \ |
9414 | 523 MOVNTQ(%%mm2, (dst, index, 2))\ |
524 MOVNTQ(%%mm1, 8(dst, index, 2))\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
525 \ |
9414 | 526 "addl $8, "#index" \n\t"\ |
527 "cmpl "#dstw", "#index" \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
528 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
529 |
9414 | 530 #define WRITEBGR15(dst, dstw, index) \ |
4248 | 531 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ |
532 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\ | |
533 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ | |
2669 | 534 "psrlq $3, %%mm2 \n\t"\ |
535 "psrlq $1, %%mm5 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
536 \ |
2669 | 537 "movq %%mm2, %%mm1 \n\t"\ |
538 "movq %%mm4, %%mm3 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
539 \ |
2669 | 540 "punpcklbw %%mm7, %%mm3 \n\t"\ |
541 "punpcklbw %%mm5, %%mm2 \n\t"\ | |
542 "punpckhbw %%mm7, %%mm4 \n\t"\ | |
543 "punpckhbw %%mm5, %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
544 \ |
2669 | 545 "psllq $2, %%mm3 \n\t"\ |
546 "psllq $2, %%mm4 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
547 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
548 "por %%mm3, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
549 "por %%mm4, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
550 \ |
9414 | 551 MOVNTQ(%%mm2, (dst, index, 2))\ |
552 MOVNTQ(%%mm1, 8(dst, index, 2))\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
553 \ |
9414 | 554 "addl $8, "#index" \n\t"\ |
555 "cmpl "#dstw", "#index" \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
556 " jb 1b \n\t" |
2669 | 557 |
9414 | 558 #define WRITEBGR24OLD(dst, dstw, index) \ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
559 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
560 "movq %%mm2, %%mm1 \n\t" /* B */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
561 "movq %%mm5, %%mm6 \n\t" /* R */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
562 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
563 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
564 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
565 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
566 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
567 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
2326 | 568 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
569 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ | |
570 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ | |
571 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
572 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
573 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
574 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\ |
4248 | 575 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\ |
576 "pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
577 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
578 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
579 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
580 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
581 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
582 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
583 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
584 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
585 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\ |
4248 | 586 "pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
587 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
588 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\ |
4248 | 589 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\ |
590 "pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
591 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
592 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
593 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
594 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
595 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
596 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
597 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
598 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\ |
4248 | 599 "pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\ |
600 "pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
601 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
602 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
603 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
604 \ |
9414 | 605 MOVNTQ(%%mm0, (dst))\ |
606 MOVNTQ(%%mm2, 8(dst))\ | |
607 MOVNTQ(%%mm3, 16(dst))\ | |
608 "addl $24, "#dst" \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
609 \ |
9414 | 610 "addl $8, "#index" \n\t"\ |
611 "cmpl "#dstw", "#index" \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
612 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
613 |
9414 | 614 #define WRITEBGR24MMX(dst, dstw, index) \ |
2730 | 615 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
616 "movq %%mm2, %%mm1 \n\t" /* B */\ | |
617 "movq %%mm5, %%mm6 \n\t" /* R */\ | |
618 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ | |
619 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ | |
620 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ | |
621 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ | |
622 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ | |
623 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ | |
624 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ | |
625 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ | |
626 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ | |
627 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ | |
628 \ | |
629 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ | |
630 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\ | |
631 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\ | |
632 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\ | |
633 \ | |
634 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\ | |
635 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\ | |
636 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\ | |
637 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\ | |
638 \ | |
639 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\ | |
640 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\ | |
641 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\ | |
642 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\ | |
643 \ | |
644 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\ | |
645 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\ | |
646 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\ | |
647 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ | |
9414 | 648 MOVNTQ(%%mm0, (dst))\ |
2730 | 649 \ |
650 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\ | |
651 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\ | |
652 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\ | |
653 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\ | |
9414 | 654 MOVNTQ(%%mm6, 8(dst))\ |
2730 | 655 \ |
656 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\ | |
657 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\ | |
658 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\ | |
9414 | 659 MOVNTQ(%%mm5, 16(dst))\ |
2730 | 660 \ |
9414 | 661 "addl $24, "#dst" \n\t"\ |
2730 | 662 \ |
9414 | 663 "addl $8, "#index" \n\t"\ |
664 "cmpl "#dstw", "#index" \n\t"\ | |
2730 | 665 " jb 1b \n\t" |
666 | |
9414 | 667 #define WRITEBGR24MMX2(dst, dstw, index) \ |
2730 | 668 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
4248 | 669 "movq "MANGLE(M24A)", %%mm0 \n\t"\ |
670 "movq "MANGLE(M24C)", %%mm7 \n\t"\ | |
2730 | 671 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ |
672 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ | |
673 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ | |
674 \ | |
675 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\ | |
676 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\ | |
677 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\ | |
678 \ | |
679 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\ | |
680 "por %%mm1, %%mm6 \n\t"\ | |
681 "por %%mm3, %%mm6 \n\t"\ | |
9414 | 682 MOVNTQ(%%mm6, (dst))\ |
2730 | 683 \ |
684 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\ | |
685 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\ | |
686 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ | |
687 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ | |
688 \ | |
4248 | 689 "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ |
2730 | 690 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ |
691 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ | |
692 \ | |
693 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\ | |
694 "por %%mm3, %%mm6 \n\t"\ | |
9414 | 695 MOVNTQ(%%mm6, 8(dst))\ |
2730 | 696 \ |
697 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\ | |
698 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\ | |
699 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\ | |
700 \ | |
701 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ | |
702 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ | |
4248 | 703 "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ |
2730 | 704 \ |
705 "por %%mm1, %%mm3 \n\t"\ | |
706 "por %%mm3, %%mm6 \n\t"\ | |
9414 | 707 MOVNTQ(%%mm6, 16(dst))\ |
2730 | 708 \ |
9414 | 709 "addl $24, "#dst" \n\t"\ |
2730 | 710 \ |
9414 | 711 "addl $8, "#index" \n\t"\ |
712 "cmpl "#dstw", "#index" \n\t"\ | |
2730 | 713 " jb 1b \n\t" |
714 | |
715 #ifdef HAVE_MMX2 | |
3126 | 716 #undef WRITEBGR24 |
2730 | 717 #define WRITEBGR24 WRITEBGR24MMX2 |
718 #else | |
3126 | 719 #undef WRITEBGR24 |
2730 | 720 #define WRITEBGR24 WRITEBGR24MMX |
721 #endif | |
722 | |
9414 | 723 #define WRITEYUY2(dst, dstw, index) \ |
7723 | 724 "packuswb %%mm3, %%mm3 \n\t"\ |
725 "packuswb %%mm4, %%mm4 \n\t"\ | |
726 "packuswb %%mm7, %%mm1 \n\t"\ | |
727 "punpcklbw %%mm4, %%mm3 \n\t"\ | |
728 "movq %%mm1, %%mm7 \n\t"\ | |
729 "punpcklbw %%mm3, %%mm1 \n\t"\ | |
730 "punpckhbw %%mm3, %%mm7 \n\t"\ | |
731 \ | |
9414 | 732 MOVNTQ(%%mm1, (dst, index, 2))\ |
733 MOVNTQ(%%mm7, 8(dst, index, 2))\ | |
7723 | 734 \ |
9414 | 735 "addl $8, "#index" \n\t"\ |
736 "cmpl "#dstw", "#index" \n\t"\ | |
7723 | 737 " jb 1b \n\t" |
738 | |
739 | |
9413 | 740 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
3344 | 741 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
9414 | 742 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) |
2519 | 743 { |
9413 | 744 int dummy=0; |
3344 | 745 #ifdef HAVE_MMX |
746 if(uDest != NULL) | |
747 { | |
748 asm volatile( | |
9413 | 749 YSCALEYUV2YV12X(0, CHR_MMX_FILTER_OFFSET) |
750 :: "r" (&c->redDither), | |
751 "r" (uDest), "m" (chrDstW) | |
3344 | 752 : "%eax", "%edx", "%esi" |
753 ); | |
2519 | 754 |
3344 | 755 asm volatile( |
9413 | 756 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET) |
757 :: "r" (&c->redDither), | |
758 "r" (vDest), "m" (chrDstW) | |
3344 | 759 : "%eax", "%edx", "%esi" |
760 ); | |
761 } | |
2521 | 762 |
3344 | 763 asm volatile( |
9413 | 764 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET) |
765 :: "r" (&c->redDither), | |
766 "r" (dest), "m" (dstW) | |
3344 | 767 : "%eax", "%edx", "%esi" |
768 ); | |
769 #else | |
6540 | 770 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, |
3352 | 771 chrFilter, chrSrc, chrFilterSize, |
6540 | 772 dest, uDest, vDest, dstW, chrDstW); |
3344 | 773 #endif |
774 } | |
775 | |
776 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
777 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) |
3344 | 778 { |
779 #ifdef HAVE_MMX | |
780 if(uDest != NULL) | |
781 { | |
782 asm volatile( | |
783 YSCALEYUV2YV121 | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
784 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW), |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
785 "g" (-chrDstW) |
3344 | 786 : "%eax" |
787 ); | |
788 | |
789 asm volatile( | |
790 YSCALEYUV2YV121 | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
791 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW), |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
792 "g" (-chrDstW) |
3344 | 793 : "%eax" |
794 ); | |
2519 | 795 } |
3344 | 796 |
797 asm volatile( | |
798 YSCALEYUV2YV121 | |
799 :: "r" (lumSrc + dstW), "r" (dest + dstW), | |
800 "g" (-dstW) | |
801 : "%eax" | |
802 ); | |
803 #else | |
804 int i; | |
805 for(i=0; i<dstW; i++) | |
806 { | |
807 int val= lumSrc[i]>>7; | |
6503 | 808 |
809 if(val&256){ | |
810 if(val<0) val=0; | |
811 else val=255; | |
812 } | |
3344 | 813 |
6503 | 814 dest[i]= val; |
3344 | 815 } |
816 | |
817 if(uDest != NULL) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
818 for(i=0; i<chrDstW; i++) |
3344 | 819 { |
820 int u=chrSrc[i]>>7; | |
821 int v=chrSrc[i + 2048]>>7; | |
822 | |
6503 | 823 if((u|v)&256){ |
824 if(u<0) u=0; | |
825 else if (u>255) u=255; | |
826 if(v<0) v=0; | |
827 else if (v>255) v=255; | |
828 } | |
829 | |
830 uDest[i]= u; | |
831 vDest[i]= v; | |
3344 | 832 } |
833 #endif | |
2519 | 834 } |
835 | |
3344 | 836 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
837 /** |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
838 * vertical scale YV12 to RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
839 */ |
7723 | 840 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
3344 | 841 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
9413 | 842 uint8_t *dest, int dstW, int dstY) |
3344 | 843 { |
9413 | 844 int dummy=0; |
6578 | 845 switch(c->dstFormat) |
3344 | 846 { |
847 #ifdef HAVE_MMX | |
6578 | 848 case IMGFMT_BGR32: |
3344 | 849 { |
850 asm volatile( | |
851 YSCALEYUV2RGBX | |
9414 | 852 WRITEBGR32(%4, %5, %%eax) |
3344 | 853 |
9413 | 854 :: "r" (&c->redDither), |
855 "m" (dummy), "m" (dummy), "m" (dummy), | |
856 "r" (dest), "m" (dstW) | |
857 : "%eax", "%edx", "%esi" | |
3344 | 858 ); |
859 } | |
6578 | 860 break; |
861 case IMGFMT_BGR24: | |
3344 | 862 { |
863 asm volatile( | |
864 YSCALEYUV2RGBX | |
865 "leal (%%eax, %%eax, 2), %%ebx \n\t" //FIXME optimize | |
866 "addl %4, %%ebx \n\t" | |
9414 | 867 WRITEBGR24(%%ebx, %5, %%eax) |
3344 | 868 |
9413 | 869 :: "r" (&c->redDither), |
870 "m" (dummy), "m" (dummy), "m" (dummy), | |
871 "r" (dest), "m" (dstW) | |
872 : "%eax", "%ebx", "%edx", "%esi" //FIXME ebx | |
3344 | 873 ); |
874 } | |
6578 | 875 break; |
876 case IMGFMT_BGR15: | |
3344 | 877 { |
878 asm volatile( | |
879 YSCALEYUV2RGBX | |
880 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
881 #ifdef DITHER1XBPP | |
4248 | 882 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
883 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
884 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
3344 | 885 #endif |
886 | |
9414 | 887 WRITEBGR15(%4, %5, %%eax) |
3344 | 888 |
9413 | 889 :: "r" (&c->redDither), |
890 "m" (dummy), "m" (dummy), "m" (dummy), | |
891 "r" (dest), "m" (dstW) | |
892 : "%eax", "%edx", "%esi" | |
3344 | 893 ); |
894 } | |
6578 | 895 break; |
896 case IMGFMT_BGR16: | |
3344 | 897 { |
898 asm volatile( | |
899 YSCALEYUV2RGBX | |
900 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
901 #ifdef DITHER1XBPP | |
4248 | 902 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
903 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
904 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
3344 | 905 #endif |
906 | |
9414 | 907 WRITEBGR16(%4, %5, %%eax) |
3344 | 908 |
9413 | 909 :: "r" (&c->redDither), |
910 "m" (dummy), "m" (dummy), "m" (dummy), | |
911 "r" (dest), "m" (dstW) | |
912 : "%eax", "%edx", "%esi" | |
3344 | 913 ); |
914 } | |
6578 | 915 break; |
7723 | 916 case IMGFMT_YUY2: |
917 { | |
918 asm volatile( | |
919 YSCALEYUV2PACKEDX | |
920 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
921 | |
922 "psraw $3, %%mm3 \n\t" | |
923 "psraw $3, %%mm4 \n\t" | |
924 "psraw $3, %%mm1 \n\t" | |
925 "psraw $3, %%mm7 \n\t" | |
9414 | 926 WRITEYUY2(%4, %5, %%eax) |
7723 | 927 |
9413 | 928 :: "r" (&c->redDither), |
929 "m" (dummy), "m" (dummy), "m" (dummy), | |
930 "r" (dest), "m" (dstW) | |
931 : "%eax", "%edx", "%esi" | |
7723 | 932 ); |
933 } | |
934 break; | |
3344 | 935 #endif |
6578 | 936 default: |
7723 | 937 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, |
6578 | 938 chrFilter, chrSrc, chrFilterSize, |
939 dest, dstW, dstY); | |
940 break; | |
941 } | |
3344 | 942 } |
943 | |
944 /** | |
945 * vertical bilinear scale YV12 to RGB | |
946 */ | |
7723 | 947 static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, |
6578 | 948 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
949 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
950 int yalpha1=yalpha^4095; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
951 int uvalpha1=uvalpha^4095; |
6578 | 952 int i; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
953 |
6578 | 954 #if 0 //isnt used |
4467 | 955 if(flags&SWS_FULL_CHR_H_INT) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
956 { |
6578 | 957 switch(dstFormat) |
958 { | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
959 #ifdef HAVE_MMX |
6578 | 960 case IMGFMT_BGR32: |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
961 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
962 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
963 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
964 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
965 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
966 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
967 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
968 "movq %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
969 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
970 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
971 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
972 MOVNTQ(%%mm3, (%4, %%eax, 4)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
973 MOVNTQ(%%mm1, 8(%4, %%eax, 4)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
974 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
975 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
976 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
977 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
978 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
979 |
3209 | 980 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
981 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
982 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
983 ); |
6578 | 984 break; |
985 case IMGFMT_BGR24: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
986 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
987 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
988 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
989 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
990 // lsb ... msb |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
991 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
992 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
993 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
994 "movq %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
995 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
996 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
997 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
998 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
999 "psrlq $8, %%mm3 \n\t" // GR0BGR00 |
4248 | 1000 "pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000 |
1001 "pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1002 "por %%mm2, %%mm3 \n\t" // BGRBGR00 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1003 "movq %%mm1, %%mm2 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1004 "psllq $48, %%mm1 \n\t" // 000000BG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1005 "por %%mm1, %%mm3 \n\t" // BGRBGRBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1006 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1007 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1008 "psrld $16, %%mm2 \n\t" // R000R000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1009 "psrlq $24, %%mm1 \n\t" // 0BGR0000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1010 "por %%mm2, %%mm1 \n\t" // RBGRR000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1011 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1012 "movl %4, %%ebx \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1013 "addl %%eax, %%ebx \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1014 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1015 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1016 //FIXME Alignment |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1017 "movntq %%mm3, (%%ebx, %%eax, 2)\n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1018 "movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1019 #else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1020 "movd %%mm3, (%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1021 "psrlq $32, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1022 "movd %%mm3, 4(%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1023 "movd %%mm1, 8(%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1024 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1025 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1026 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1027 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1028 |
3209 | 1029 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1030 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1031 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1032 ); |
6578 | 1033 break; |
1034 case IMGFMT_BGR15: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1035 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1036 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1037 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1038 #ifdef DITHER1XBPP |
4248 | 1039 "paddusb "MANGLE(g5Dither)", %%mm1\n\t" |
1040 "paddusb "MANGLE(r5Dither)", %%mm0\n\t" | |
1041 "paddusb "MANGLE(b5Dither)", %%mm3\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1042 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1043 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1044 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1045 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1046 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1047 "psrlw $3, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1048 "psllw $2, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1049 "psllw $7, %%mm0 \n\t" |
4248 | 1050 "pand "MANGLE(g15Mask)", %%mm1 \n\t" |
1051 "pand "MANGLE(r15Mask)", %%mm0 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1052 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1053 "por %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1054 "por %%mm1, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1055 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1056 MOVNTQ(%%mm0, (%4, %%eax, 2)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1057 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1058 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1059 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1060 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1061 |
3209 | 1062 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1063 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1064 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1065 ); |
6578 | 1066 break; |
1067 case IMGFMT_BGR16: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1068 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1069 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1070 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1071 #ifdef DITHER1XBPP |
4248 | 1072 "paddusb "MANGLE(g6Dither)", %%mm1\n\t" |
1073 "paddusb "MANGLE(r5Dither)", %%mm0\n\t" | |
1074 "paddusb "MANGLE(b5Dither)", %%mm3\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1075 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1076 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1077 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1078 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1079 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1080 "psrlw $3, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1081 "psllw $3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1082 "psllw $8, %%mm0 \n\t" |
4248 | 1083 "pand "MANGLE(g16Mask)", %%mm1 \n\t" |
1084 "pand "MANGLE(r16Mask)", %%mm0 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1085 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1086 "por %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1087 "por %%mm1, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1088 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1089 MOVNTQ(%%mm0, (%4, %%eax, 2)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1090 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1091 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1092 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1093 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1094 |
3209 | 1095 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1096 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1097 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1098 ); |
6578 | 1099 break; |
1100 #endif | |
1101 case IMGFMT_RGB32: | |
1102 #ifndef HAVE_MMX | |
1103 case IMGFMT_BGR32: | |
1104 #endif | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1105 if(dstFormat==IMGFMT_BGR32) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1106 { |
4794 | 1107 int i; |
4793 | 1108 #ifdef WORDS_BIGENDIAN |
1109 dest++; | |
1110 #endif | |
3209 | 1111 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1112 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1113 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1114 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1115 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
2503 | 1116 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; |
1117 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; | |
1118 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1119 dest+= 4; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1120 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1121 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1122 else if(dstFormat==IMGFMT_BGR24) |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1123 { |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1124 int i; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1125 for(i=0;i<dstW;i++){ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1126 // vertical linear interpolation && yuv2rgb in a single step: |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1127 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1128 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1129 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1130 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1131 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1132 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1133 dest+= 3; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1134 } |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1135 } |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1136 else if(dstFormat==IMGFMT_BGR16) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1137 { |
2671 | 1138 int i; |
3209 | 1139 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1140 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1141 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1142 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1143 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1144 |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1145 ((uint16_t*)dest)[i] = |
2584 | 1146 clip_table16b[(Y + yuvtab_40cf[U]) >>13] | |
1147 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | | |
1148 clip_table16r[(Y + yuvtab_3343[V]) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1149 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1150 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1151 else if(dstFormat==IMGFMT_BGR15) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1152 { |
2671 | 1153 int i; |
3209 | 1154 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1155 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1156 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1157 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1158 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1159 |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1160 ((uint16_t*)dest)[i] = |
2584 | 1161 clip_table15b[(Y + yuvtab_40cf[U]) >>13] | |
1162 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | | |
1163 clip_table15r[(Y + yuvtab_3343[V]) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1164 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1165 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1166 }//FULL_UV_IPOL |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1167 else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1168 { |
6578 | 1169 #endif // if 0 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1170 #ifdef HAVE_MMX |
6578 | 1171 switch(c->dstFormat) |
1172 { | |
9414 | 1173 //Note 8280 == DSTW_OFFSET but the preprocessor cant handle that there :( |
6578 | 1174 case IMGFMT_BGR32: |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1175 asm volatile( |
9414 | 1176 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1177 "movl %4, %%esp \n\t" | |
1178 YSCALEYUV2RGB(%%eax, %5) | |
1179 WRITEBGR32(%%esp, 8280(%5), %%eax) | |
1180 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1181 |
9414 | 1182 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), |
1183 "r" (&c->redDither) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1184 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1185 ); |
6578 | 1186 return; |
1187 case IMGFMT_BGR24: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1188 asm volatile( |
9414 | 1189 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1190 "movl %4, %%esp \n\t" | |
1191 YSCALEYUV2RGB(%%eax, %5) | |
1192 WRITEBGR24(%%esp, 8280(%5), %%eax) | |
1193 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1194 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1195 "r" (&c->redDither) | |
1196 : "%eax" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1197 ); |
6578 | 1198 return; |
1199 case IMGFMT_BGR15: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1200 asm volatile( |
9414 | 1201 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1202 "movl %4, %%esp \n\t" | |
1203 YSCALEYUV2RGB(%%eax, %5) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1204 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1205 #ifdef DITHER1XBPP |
4248 | 1206 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1207 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1208 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1209 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1210 |
9414 | 1211 WRITEBGR15(%%esp, 8280(%5), %%eax) |
1212 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1213 |
9414 | 1214 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), |
1215 "r" (&c->redDither) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1216 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1217 ); |
6578 | 1218 return; |
1219 case IMGFMT_BGR16: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1220 asm volatile( |
9414 | 1221 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1222 "movl %4, %%esp \n\t" | |
1223 YSCALEYUV2RGB(%%eax, %5) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1224 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1225 #ifdef DITHER1XBPP |
4248 | 1226 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1227 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1228 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1229 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1230 |
9414 | 1231 WRITEBGR16(%%esp, 8280(%5), %%eax) |
1232 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1233 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1234 "r" (&c->redDither) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1235 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1236 ); |
6578 | 1237 return; |
7723 | 1238 case IMGFMT_YUY2: |
1239 asm volatile( | |
9414 | 1240 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1241 "movl %4, %%esp \n\t" | |
1242 YSCALEYUV2PACKED(%%eax, %5) | |
1243 WRITEYUY2(%%esp, 8280(%5), %%eax) | |
1244 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1245 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1246 "r" (&c->redDither) | |
7723 | 1247 : "%eax" |
1248 ); | |
1249 return; | |
6578 | 1250 default: break; |
1251 } | |
1252 #endif //HAVE_MMX | |
7723 | 1253 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1254 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1255 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1256 /** |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1257 * YV12 to RGB without scaling or interpolating |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1258 */ |
7723 | 1259 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, |
6578 | 1260 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1261 { |
3344 | 1262 const int yalpha1=0; |
6578 | 1263 int i; |
1264 | |
1265 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 | |
1266 const int yalpha= 4096; //FIXME ... | |
2671 | 1267 |
4467 | 1268 if(flags&SWS_FULL_CHR_H_INT) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1269 { |
7723 | 1270 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y); |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1271 return; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1272 } |
2576 | 1273 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1274 #ifdef HAVE_MMX |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1275 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1276 { |
6578 | 1277 switch(dstFormat) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1278 { |
6578 | 1279 case IMGFMT_BGR32: |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1280 asm volatile( |
9417 | 1281 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1282 "movl %4, %%esp \n\t" | |
1283 YSCALEYUV2RGB1(%%eax, %5) | |
1284 WRITEBGR32(%%esp, 8280(%5), %%eax) | |
1285 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1286 | |
1287 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1288 "r" (&c->redDither) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1289 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1290 ); |
6578 | 1291 return; |
1292 case IMGFMT_BGR24: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1293 asm volatile( |
9417 | 1294 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1295 "movl %4, %%esp \n\t" | |
1296 YSCALEYUV2RGB1(%%eax, %5) | |
1297 WRITEBGR24(%%esp, 8280(%5), %%eax) | |
1298 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1299 | |
1300 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1301 "r" (&c->redDither) | |
1302 : "%eax" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1303 ); |
6578 | 1304 return; |
1305 case IMGFMT_BGR15: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1306 asm volatile( |
9417 | 1307 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1308 "movl %4, %%esp \n\t" | |
1309 YSCALEYUV2RGB1(%%eax, %5) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1310 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1311 #ifdef DITHER1XBPP |
4248 | 1312 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1313 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1314 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1315 #endif |
9417 | 1316 WRITEBGR15(%%esp, 8280(%5), %%eax) |
1317 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1318 | |
1319 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1320 "r" (&c->redDither) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1321 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1322 ); |
6578 | 1323 return; |
1324 case IMGFMT_BGR16: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1325 asm volatile( |
9417 | 1326 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1327 "movl %4, %%esp \n\t" | |
1328 YSCALEYUV2RGB1(%%eax, %5) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1329 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1330 #ifdef DITHER1XBPP |
4248 | 1331 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1332 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1333 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1334 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1335 |
9417 | 1336 WRITEBGR16(%%esp, 8280(%5), %%eax) |
1337 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1338 | |
1339 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1340 "r" (&c->redDither) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1341 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1342 ); |
6578 | 1343 return; |
7723 | 1344 case IMGFMT_YUY2: |
1345 asm volatile( | |
9417 | 1346 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1347 "movl %4, %%esp \n\t" | |
1348 YSCALEYUV2PACKED1(%%eax, %5) | |
1349 WRITEYUY2(%%esp, 8280(%5), %%eax) | |
1350 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1351 | |
1352 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1353 "r" (&c->redDither) | |
7723 | 1354 : "%eax" |
1355 ); | |
1356 return; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1357 } |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1358 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1359 else |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1360 { |
6578 | 1361 switch(dstFormat) |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1362 { |
6578 | 1363 case IMGFMT_BGR32: |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1364 asm volatile( |
9417 | 1365 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1366 "movl %4, %%esp \n\t" | |
1367 YSCALEYUV2RGB1b(%%eax, %5) | |
1368 WRITEBGR32(%%esp, 8280(%5), %%eax) | |
1369 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1370 | |
1371 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1372 "r" (&c->redDither) | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1373 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1374 ); |
6578 | 1375 return; |
1376 case IMGFMT_BGR24: | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1377 asm volatile( |
9417 | 1378 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1379 "movl %4, %%esp \n\t" | |
1380 YSCALEYUV2RGB1b(%%eax, %5) | |
1381 WRITEBGR24(%%esp, 8280(%5), %%eax) | |
1382 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1383 | |
1384 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1385 "r" (&c->redDither) | |
1386 : "%eax" | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1387 ); |
6578 | 1388 return; |
1389 case IMGFMT_BGR15: | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1390 asm volatile( |
9417 | 1391 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1392 "movl %4, %%esp \n\t" | |
1393 YSCALEYUV2RGB1b(%%eax, %5) | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1394 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1395 #ifdef DITHER1XBPP |
4248 | 1396 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1397 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1398 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1399 #endif |
9417 | 1400 WRITEBGR15(%%esp, 8280(%5), %%eax) |
1401 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1402 | |
1403 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1404 "r" (&c->redDither) | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1405 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1406 ); |
6578 | 1407 return; |
1408 case IMGFMT_BGR16: | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1409 asm volatile( |
9417 | 1410 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1411 "movl %4, %%esp \n\t" | |
1412 YSCALEYUV2RGB1b(%%eax, %5) | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1413 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1414 #ifdef DITHER1XBPP |
4248 | 1415 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1416 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1417 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1418 #endif |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1419 |
9417 | 1420 WRITEBGR16(%%esp, 8280(%5), %%eax) |
1421 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1422 | |
1423 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1424 "r" (&c->redDither) | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1425 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1426 ); |
6578 | 1427 return; |
7723 | 1428 case IMGFMT_YUY2: |
1429 asm volatile( | |
9417 | 1430 "movl %%esp, "ESP_OFFSET"(%5) \n\t" |
1431 "movl %4, %%esp \n\t" | |
1432 YSCALEYUV2PACKED1b(%%eax, %5) | |
1433 WRITEYUY2(%%esp, 8280(%5), %%eax) | |
1434 "movl "ESP_OFFSET"(%5), %%esp \n\t" | |
1435 | |
1436 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), | |
1437 "r" (&c->redDither) | |
7723 | 1438 : "%eax" |
1439 ); | |
1440 return; | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1441 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1442 } |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1443 #endif |
6578 | 1444 if( uvalpha < 2048 ) |
1445 { | |
7723 | 1446 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C) |
6578 | 1447 }else{ |
7723 | 1448 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C) |
6578 | 1449 } |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1450 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1451 |
4481 | 1452 //FIXME yuy2* can read upto 7 samples to much |
1453 | |
4467 | 1454 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width) |
1455 { | |
4481 | 1456 #ifdef HAVE_MMX |
1457 asm volatile( | |
1458 "movq "MANGLE(bm01010101)", %%mm2\n\t" | |
1459 "movl %0, %%eax \n\t" | |
1460 "1: \n\t" | |
1461 "movq (%1, %%eax,2), %%mm0 \n\t" | |
1462 "movq 8(%1, %%eax,2), %%mm1 \n\t" | |
1463 "pand %%mm2, %%mm0 \n\t" | |
1464 "pand %%mm2, %%mm1 \n\t" | |
1465 "packuswb %%mm1, %%mm0 \n\t" | |
1466 "movq %%mm0, (%2, %%eax) \n\t" | |
1467 "addl $8, %%eax \n\t" | |
1468 " js 1b \n\t" | |
1469 : : "g" (-width), "r" (src+width*2), "r" (dst+width) | |
1470 : "%eax" | |
1471 ); | |
4467 | 1472 #else |
1473 int i; | |
1474 for(i=0; i<width; i++) | |
1475 dst[i]= src[2*i]; | |
1476 #endif | |
1477 } | |
1478 | |
1479 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1480 { | |
4481 | 1481 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
1482 asm volatile( | |
1483 "movq "MANGLE(bm01010101)", %%mm4\n\t" | |
1484 "movl %0, %%eax \n\t" | |
1485 "1: \n\t" | |
1486 "movq (%1, %%eax,4), %%mm0 \n\t" | |
1487 "movq 8(%1, %%eax,4), %%mm1 \n\t" | |
1488 "movq (%2, %%eax,4), %%mm2 \n\t" | |
1489 "movq 8(%2, %%eax,4), %%mm3 \n\t" | |
1490 PAVGB(%%mm2, %%mm0) | |
1491 PAVGB(%%mm3, %%mm1) | |
1492 "psrlw $8, %%mm0 \n\t" | |
1493 "psrlw $8, %%mm1 \n\t" | |
1494 "packuswb %%mm1, %%mm0 \n\t" | |
1495 "movq %%mm0, %%mm1 \n\t" | |
1496 "psrlw $8, %%mm0 \n\t" | |
1497 "pand %%mm4, %%mm1 \n\t" | |
1498 "packuswb %%mm0, %%mm0 \n\t" | |
1499 "packuswb %%mm1, %%mm1 \n\t" | |
1500 "movd %%mm0, (%4, %%eax) \n\t" | |
1501 "movd %%mm1, (%3, %%eax) \n\t" | |
1502 "addl $4, %%eax \n\t" | |
1503 " js 1b \n\t" | |
1504 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) | |
1505 : "%eax" | |
1506 ); | |
4467 | 1507 #else |
1508 int i; | |
1509 for(i=0; i<width; i++) | |
1510 { | |
1511 dstU[i]= (src1[4*i + 1] + src2[4*i + 1])>>1; | |
1512 dstV[i]= (src1[4*i + 3] + src2[4*i + 3])>>1; | |
1513 } | |
1514 #endif | |
1515 } | |
1516 | |
9071 | 1517 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses |
1518 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, int width) | |
1519 { | |
1520 #ifdef HAVE_MMX | |
1521 asm volatile( | |
1522 "movl %0, %%eax \n\t" | |
1523 "1: \n\t" | |
1524 "movq (%1, %%eax,2), %%mm0 \n\t" | |
1525 "movq 8(%1, %%eax,2), %%mm1 \n\t" | |
1526 "psrlw $8, %%mm0 \n\t" | |
1527 "psrlw $8, %%mm1 \n\t" | |
1528 "packuswb %%mm1, %%mm0 \n\t" | |
1529 "movq %%mm0, (%2, %%eax) \n\t" | |
1530 "addl $8, %%eax \n\t" | |
1531 " js 1b \n\t" | |
1532 : : "g" (-width), "r" (src+width*2), "r" (dst+width) | |
1533 : "%eax" | |
1534 ); | |
1535 #else | |
1536 int i; | |
1537 for(i=0; i<width; i++) | |
1538 dst[i]= src[2*i+1]; | |
1539 #endif | |
1540 } | |
1541 | |
1542 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1543 { | |
1544 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1545 asm volatile( | |
1546 "movq "MANGLE(bm01010101)", %%mm4\n\t" | |
1547 "movl %0, %%eax \n\t" | |
1548 "1: \n\t" | |
1549 "movq (%1, %%eax,4), %%mm0 \n\t" | |
1550 "movq 8(%1, %%eax,4), %%mm1 \n\t" | |
1551 "movq (%2, %%eax,4), %%mm2 \n\t" | |
1552 "movq 8(%2, %%eax,4), %%mm3 \n\t" | |
1553 PAVGB(%%mm2, %%mm0) | |
1554 PAVGB(%%mm3, %%mm1) | |
1555 "pand %%mm4, %%mm0 \n\t" | |
1556 "pand %%mm4, %%mm1 \n\t" | |
1557 "packuswb %%mm1, %%mm0 \n\t" | |
1558 "movq %%mm0, %%mm1 \n\t" | |
1559 "psrlw $8, %%mm0 \n\t" | |
1560 "pand %%mm4, %%mm1 \n\t" | |
1561 "packuswb %%mm0, %%mm0 \n\t" | |
1562 "packuswb %%mm1, %%mm1 \n\t" | |
1563 "movd %%mm0, (%4, %%eax) \n\t" | |
1564 "movd %%mm1, (%3, %%eax) \n\t" | |
1565 "addl $4, %%eax \n\t" | |
1566 " js 1b \n\t" | |
1567 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) | |
1568 : "%eax" | |
1569 ); | |
1570 #else | |
1571 int i; | |
1572 for(i=0; i<width; i++) | |
1573 { | |
1574 dstU[i]= (src1[4*i + 0] + src2[4*i + 0])>>1; | |
1575 dstV[i]= (src1[4*i + 2] + src2[4*i + 2])>>1; | |
1576 } | |
1577 #endif | |
1578 } | |
1579 | |
4467 | 1580 static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width) |
1581 { | |
1582 #ifdef HAVE_MMXFIXME | |
1583 #else | |
1584 int i; | |
1585 for(i=0; i<width; i++) | |
1586 { | |
1587 int b= src[i*4+0]; | |
1588 int g= src[i*4+1]; | |
1589 int r= src[i*4+2]; | |
1590 | |
1591 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1592 } | |
1593 #endif | |
1594 } | |
1595 | |
1596 static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1597 { | |
1598 #ifdef HAVE_MMXFIXME | |
1599 #else | |
1600 int i; | |
1601 for(i=0; i<width; i++) | |
1602 { | |
1603 int b= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4]; | |
1604 int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5]; | |
1605 int r= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6]; | |
1606 | |
1607 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1608 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1609 } | |
1610 #endif | |
1611 } | |
1612 | |
1613 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, int width) | |
1614 { | |
4612 | 1615 #ifdef HAVE_MMX |
1616 asm volatile( | |
1617 "movl %2, %%eax \n\t" | |
4923 | 1618 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" |
1619 "movq "MANGLE(w1111)", %%mm5 \n\t" | |
4612 | 1620 "pxor %%mm7, %%mm7 \n\t" |
1621 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
1622 ".balign 16 \n\t" | |
1623 "1: \n\t" | |
1624 PREFETCH" 64(%0, %%ebx) \n\t" | |
1625 "movd (%0, %%ebx), %%mm0 \n\t" | |
1626 "movd 3(%0, %%ebx), %%mm1 \n\t" | |
1627 "punpcklbw %%mm7, %%mm0 \n\t" | |
1628 "punpcklbw %%mm7, %%mm1 \n\t" | |
1629 "movd 6(%0, %%ebx), %%mm2 \n\t" | |
1630 "movd 9(%0, %%ebx), %%mm3 \n\t" | |
1631 "punpcklbw %%mm7, %%mm2 \n\t" | |
1632 "punpcklbw %%mm7, %%mm3 \n\t" | |
1633 "pmaddwd %%mm6, %%mm0 \n\t" | |
1634 "pmaddwd %%mm6, %%mm1 \n\t" | |
1635 "pmaddwd %%mm6, %%mm2 \n\t" | |
1636 "pmaddwd %%mm6, %%mm3 \n\t" | |
1637 #ifndef FAST_BGR2YV12 | |
1638 "psrad $8, %%mm0 \n\t" | |
1639 "psrad $8, %%mm1 \n\t" | |
1640 "psrad $8, %%mm2 \n\t" | |
1641 "psrad $8, %%mm3 \n\t" | |
1642 #endif | |
1643 "packssdw %%mm1, %%mm0 \n\t" | |
1644 "packssdw %%mm3, %%mm2 \n\t" | |
1645 "pmaddwd %%mm5, %%mm0 \n\t" | |
1646 "pmaddwd %%mm5, %%mm2 \n\t" | |
1647 "packssdw %%mm2, %%mm0 \n\t" | |
1648 "psraw $7, %%mm0 \n\t" | |
1649 | |
1650 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
1651 "movd 15(%0, %%ebx), %%mm1 \n\t" | |
1652 "punpcklbw %%mm7, %%mm4 \n\t" | |
1653 "punpcklbw %%mm7, %%mm1 \n\t" | |
1654 "movd 18(%0, %%ebx), %%mm2 \n\t" | |
1655 "movd 21(%0, %%ebx), %%mm3 \n\t" | |
1656 "punpcklbw %%mm7, %%mm2 \n\t" | |
1657 "punpcklbw %%mm7, %%mm3 \n\t" | |
1658 "pmaddwd %%mm6, %%mm4 \n\t" | |
1659 "pmaddwd %%mm6, %%mm1 \n\t" | |
1660 "pmaddwd %%mm6, %%mm2 \n\t" | |
1661 "pmaddwd %%mm6, %%mm3 \n\t" | |
1662 #ifndef FAST_BGR2YV12 | |
1663 "psrad $8, %%mm4 \n\t" | |
1664 "psrad $8, %%mm1 \n\t" | |
1665 "psrad $8, %%mm2 \n\t" | |
1666 "psrad $8, %%mm3 \n\t" | |
1667 #endif | |
1668 "packssdw %%mm1, %%mm4 \n\t" | |
1669 "packssdw %%mm3, %%mm2 \n\t" | |
1670 "pmaddwd %%mm5, %%mm4 \n\t" | |
1671 "pmaddwd %%mm5, %%mm2 \n\t" | |
1672 "addl $24, %%ebx \n\t" | |
1673 "packssdw %%mm2, %%mm4 \n\t" | |
1674 "psraw $7, %%mm4 \n\t" | |
1675 | |
1676 "packuswb %%mm4, %%mm0 \n\t" | |
4923 | 1677 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" |
4612 | 1678 |
4619 | 1679 "movq %%mm0, (%1, %%eax) \n\t" |
4612 | 1680 "addl $8, %%eax \n\t" |
1681 " js 1b \n\t" | |
1682 : : "r" (src+width*3), "r" (dst+width), "g" (-width) | |
1683 : "%eax", "%ebx" | |
1684 ); | |
4467 | 1685 #else |
1686 int i; | |
1687 for(i=0; i<width; i++) | |
1688 { | |
1689 int b= src[i*3+0]; | |
1690 int g= src[i*3+1]; | |
1691 int r= src[i*3+2]; | |
1692 | |
1693 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1694 } | |
1695 #endif | |
1696 } | |
1697 | |
1698 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1699 { | |
4619 | 1700 #ifdef HAVE_MMX |
1701 asm volatile( | |
1702 "movl %4, %%eax \n\t" | |
4923 | 1703 "movq "MANGLE(w1111)", %%mm5 \n\t" |
1704 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | |
4619 | 1705 "pxor %%mm7, %%mm7 \n\t" |
1706 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
1707 "addl %%ebx, %%ebx \n\t" | |
1708 ".balign 16 \n\t" | |
1709 "1: \n\t" | |
1710 PREFETCH" 64(%0, %%ebx) \n\t" | |
1711 PREFETCH" 64(%1, %%ebx) \n\t" | |
1712 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1713 "movq (%0, %%ebx), %%mm0 \n\t" | |
1714 "movq (%1, %%ebx), %%mm1 \n\t" | |
1715 "movq 6(%0, %%ebx), %%mm2 \n\t" | |
1716 "movq 6(%1, %%ebx), %%mm3 \n\t" | |
1717 PAVGB(%%mm1, %%mm0) | |
1718 PAVGB(%%mm3, %%mm2) | |
1719 "movq %%mm0, %%mm1 \n\t" | |
1720 "movq %%mm2, %%mm3 \n\t" | |
1721 "psrlq $24, %%mm0 \n\t" | |
1722 "psrlq $24, %%mm2 \n\t" | |
1723 PAVGB(%%mm1, %%mm0) | |
1724 PAVGB(%%mm3, %%mm2) | |
1725 "punpcklbw %%mm7, %%mm0 \n\t" | |
1726 "punpcklbw %%mm7, %%mm2 \n\t" | |
1727 #else | |
1728 "movd (%0, %%ebx), %%mm0 \n\t" | |
1729 "movd (%1, %%ebx), %%mm1 \n\t" | |
1730 "movd 3(%0, %%ebx), %%mm2 \n\t" | |
1731 "movd 3(%1, %%ebx), %%mm3 \n\t" | |
1732 "punpcklbw %%mm7, %%mm0 \n\t" | |
1733 "punpcklbw %%mm7, %%mm1 \n\t" | |
1734 "punpcklbw %%mm7, %%mm2 \n\t" | |
1735 "punpcklbw %%mm7, %%mm3 \n\t" | |
1736 "paddw %%mm1, %%mm0 \n\t" | |
1737 "paddw %%mm3, %%mm2 \n\t" | |
1738 "paddw %%mm2, %%mm0 \n\t" | |
1739 "movd 6(%0, %%ebx), %%mm4 \n\t" | |
1740 "movd 6(%1, %%ebx), %%mm1 \n\t" | |
1741 "movd 9(%0, %%ebx), %%mm2 \n\t" | |
1742 "movd 9(%1, %%ebx), %%mm3 \n\t" | |
1743 "punpcklbw %%mm7, %%mm4 \n\t" | |
1744 "punpcklbw %%mm7, %%mm1 \n\t" | |
1745 "punpcklbw %%mm7, %%mm2 \n\t" | |
1746 "punpcklbw %%mm7, %%mm3 \n\t" | |
1747 "paddw %%mm1, %%mm4 \n\t" | |
1748 "paddw %%mm3, %%mm2 \n\t" | |
1749 "paddw %%mm4, %%mm2 \n\t" | |
1750 "psrlw $2, %%mm0 \n\t" | |
1751 "psrlw $2, %%mm2 \n\t" | |
1752 #endif | |
4923 | 1753 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
1754 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4619 | 1755 |
1756 "pmaddwd %%mm0, %%mm1 \n\t" | |
1757 "pmaddwd %%mm2, %%mm3 \n\t" | |
1758 "pmaddwd %%mm6, %%mm0 \n\t" | |
1759 "pmaddwd %%mm6, %%mm2 \n\t" | |
1760 #ifndef FAST_BGR2YV12 | |
1761 "psrad $8, %%mm0 \n\t" | |
1762 "psrad $8, %%mm1 \n\t" | |
1763 "psrad $8, %%mm2 \n\t" | |
1764 "psrad $8, %%mm3 \n\t" | |
1765 #endif | |
1766 "packssdw %%mm2, %%mm0 \n\t" | |
1767 "packssdw %%mm3, %%mm1 \n\t" | |
1768 "pmaddwd %%mm5, %%mm0 \n\t" | |
1769 "pmaddwd %%mm5, %%mm1 \n\t" | |
1770 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 | |
1771 "psraw $7, %%mm0 \n\t" | |
1772 | |
1773 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1774 "movq 12(%0, %%ebx), %%mm4 \n\t" | |
1775 "movq 12(%1, %%ebx), %%mm1 \n\t" | |
1776 "movq 18(%0, %%ebx), %%mm2 \n\t" | |
1777 "movq 18(%1, %%ebx), %%mm3 \n\t" | |
1778 PAVGB(%%mm1, %%mm4) | |
1779 PAVGB(%%mm3, %%mm2) | |
1780 "movq %%mm4, %%mm1 \n\t" | |
1781 "movq %%mm2, %%mm3 \n\t" | |
1782 "psrlq $24, %%mm4 \n\t" | |
1783 "psrlq $24, %%mm2 \n\t" | |
1784 PAVGB(%%mm1, %%mm4) | |
1785 PAVGB(%%mm3, %%mm2) | |
1786 "punpcklbw %%mm7, %%mm4 \n\t" | |
1787 "punpcklbw %%mm7, %%mm2 \n\t" | |
1788 #else | |
1789 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
1790 "movd 12(%1, %%ebx), %%mm1 \n\t" | |
1791 "movd 15(%0, %%ebx), %%mm2 \n\t" | |
1792 "movd 15(%1, %%ebx), %%mm3 \n\t" | |
1793 "punpcklbw %%mm7, %%mm4 \n\t" | |
1794 "punpcklbw %%mm7, %%mm1 \n\t" | |
1795 "punpcklbw %%mm7, %%mm2 \n\t" | |
1796 "punpcklbw %%mm7, %%mm3 \n\t" | |
1797 "paddw %%mm1, %%mm4 \n\t" | |
1798 "paddw %%mm3, %%mm2 \n\t" | |
1799 "paddw %%mm2, %%mm4 \n\t" | |
1800 "movd 18(%0, %%ebx), %%mm5 \n\t" | |
1801 "movd 18(%1, %%ebx), %%mm1 \n\t" | |
1802 "movd 21(%0, %%ebx), %%mm2 \n\t" | |
1803 "movd 21(%1, %%ebx), %%mm3 \n\t" | |
1804 "punpcklbw %%mm7, %%mm5 \n\t" | |
1805 "punpcklbw %%mm7, %%mm1 \n\t" | |
1806 "punpcklbw %%mm7, %%mm2 \n\t" | |
1807 "punpcklbw %%mm7, %%mm3 \n\t" | |
1808 "paddw %%mm1, %%mm5 \n\t" | |
1809 "paddw %%mm3, %%mm2 \n\t" | |
1810 "paddw %%mm5, %%mm2 \n\t" | |
4923 | 1811 "movq "MANGLE(w1111)", %%mm5 \n\t" |
4619 | 1812 "psrlw $2, %%mm4 \n\t" |
1813 "psrlw $2, %%mm2 \n\t" | |
1814 #endif | |
4923 | 1815 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
1816 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4619 | 1817 |
1818 "pmaddwd %%mm4, %%mm1 \n\t" | |
1819 "pmaddwd %%mm2, %%mm3 \n\t" | |
1820 "pmaddwd %%mm6, %%mm4 \n\t" | |
1821 "pmaddwd %%mm6, %%mm2 \n\t" | |
1822 #ifndef FAST_BGR2YV12 | |
1823 "psrad $8, %%mm4 \n\t" | |
1824 "psrad $8, %%mm1 \n\t" | |
1825 "psrad $8, %%mm2 \n\t" | |
1826 "psrad $8, %%mm3 \n\t" | |
1827 #endif | |
1828 "packssdw %%mm2, %%mm4 \n\t" | |
1829 "packssdw %%mm3, %%mm1 \n\t" | |
1830 "pmaddwd %%mm5, %%mm4 \n\t" | |
1831 "pmaddwd %%mm5, %%mm1 \n\t" | |
1832 "addl $24, %%ebx \n\t" | |
1833 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 | |
1834 "psraw $7, %%mm4 \n\t" | |
1835 | |
1836 "movq %%mm0, %%mm1 \n\t" | |
1837 "punpckldq %%mm4, %%mm0 \n\t" | |
1838 "punpckhdq %%mm4, %%mm1 \n\t" | |
1839 "packsswb %%mm1, %%mm0 \n\t" | |
4923 | 1840 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" |
4619 | 1841 |
1842 "movd %%mm0, (%2, %%eax) \n\t" | |
1843 "punpckhdq %%mm0, %%mm0 \n\t" | |
1844 "movd %%mm0, (%3, %%eax) \n\t" | |
1845 "addl $4, %%eax \n\t" | |
1846 " js 1b \n\t" | |
1847 : : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width) | |
1848 : "%eax", "%ebx" | |
1849 ); | |
4467 | 1850 #else |
1851 int i; | |
1852 for(i=0; i<width; i++) | |
1853 { | |
1854 int b= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3]; | |
1855 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4]; | |
1856 int r= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5]; | |
1857 | |
1858 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1859 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1860 } | |
1861 #endif | |
1862 } | |
1863 | |
4578 | 1864 static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width) |
1865 { | |
1866 int i; | |
1867 for(i=0; i<width; i++) | |
1868 { | |
1869 int d= src[i*2] + (src[i*2+1]<<8); | |
1870 int b= d&0x1F; | |
1871 int g= (d>>5)&0x3F; | |
1872 int r= (d>>11)&0x1F; | |
1873 | |
1874 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16; | |
1875 } | |
1876 } | |
1877 | |
1878 static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1879 { | |
1880 int i; | |
1881 for(i=0; i<width; i++) | |
1882 { | |
4579 | 1883 #if 1 |
1884 int d0= le2me_32( ((uint32_t*)src1)[i] ); | |
1885 int d1= le2me_32( ((uint32_t*)src2)[i] ); | |
1886 | |
1887 int dl= (d0&0x07E0F81F) + (d1&0x07E0F81F); | |
1888 int dh= ((d0>>5)&0x07C0F83F) + ((d1>>5)&0x07C0F83F); | |
1889 | |
1890 int dh2= (dh>>11) + (dh<<21); | |
1891 int d= dh2 + dl; | |
1892 | |
1893 int b= d&0x7F; | |
1894 int r= (d>>11)&0x7F; | |
1895 int g= d>>21; | |
1896 #else | |
4578 | 1897 int d0= src1[i*4] + (src1[i*4+1]<<8); |
1898 int b0= d0&0x1F; | |
1899 int g0= (d0>>5)&0x3F; | |
1900 int r0= (d0>>11)&0x1F; | |
1901 | |
1902 int d1= src1[i*4+2] + (src1[i*4+3]<<8); | |
1903 int b1= d1&0x1F; | |
1904 int g1= (d1>>5)&0x3F; | |
1905 int r1= (d1>>11)&0x1F; | |
1906 | |
1907 int d2= src2[i*4] + (src2[i*4+1]<<8); | |
1908 int b2= d2&0x1F; | |
1909 int g2= (d2>>5)&0x3F; | |
1910 int r2= (d2>>11)&0x1F; | |
1911 | |
1912 int d3= src2[i*4+2] + (src2[i*4+3]<<8); | |
1913 int b3= d3&0x1F; | |
1914 int g3= (d3>>5)&0x3F; | |
1915 int r3= (d3>>11)&0x1F; | |
1916 | |
1917 int b= b0 + b1 + b2 + b3; | |
1918 int g= g0 + g1 + g2 + g3; | |
1919 int r= r0 + r1 + r2 + r3; | |
4579 | 1920 #endif |
4578 | 1921 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+2-2)) + 128; |
1922 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+2-2)) + 128; | |
1923 } | |
1924 } | |
1925 | |
4580 | 1926 static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width) |
1927 { | |
1928 int i; | |
1929 for(i=0; i<width; i++) | |
1930 { | |
1931 int d= src[i*2] + (src[i*2+1]<<8); | |
1932 int b= d&0x1F; | |
1933 int g= (d>>5)&0x1F; | |
1934 int r= (d>>10)&0x1F; | |
1935 | |
1936 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16; | |
1937 } | |
1938 } | |
1939 | |
1940 static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1941 { | |
1942 int i; | |
1943 for(i=0; i<width; i++) | |
1944 { | |
1945 #if 1 | |
1946 int d0= le2me_32( ((uint32_t*)src1)[i] ); | |
1947 int d1= le2me_32( ((uint32_t*)src2)[i] ); | |
1948 | |
1949 int dl= (d0&0x03E07C1F) + (d1&0x03E07C1F); | |
1950 int dh= ((d0>>5)&0x03E0F81F) + ((d1>>5)&0x03E0F81F); | |
1951 | |
1952 int dh2= (dh>>11) + (dh<<21); | |
1953 int d= dh2 + dl; | |
1954 | |
1955 int b= d&0x7F; | |
1956 int r= (d>>10)&0x7F; | |
1957 int g= d>>21; | |
1958 #else | |
1959 int d0= src1[i*4] + (src1[i*4+1]<<8); | |
1960 int b0= d0&0x1F; | |
1961 int g0= (d0>>5)&0x1F; | |
1962 int r0= (d0>>10)&0x1F; | |
1963 | |
1964 int d1= src1[i*4+2] + (src1[i*4+3]<<8); | |
1965 int b1= d1&0x1F; | |
1966 int g1= (d1>>5)&0x1F; | |
1967 int r1= (d1>>10)&0x1F; | |
1968 | |
1969 int d2= src2[i*4] + (src2[i*4+1]<<8); | |
1970 int b2= d2&0x1F; | |
1971 int g2= (d2>>5)&0x1F; | |
1972 int r2= (d2>>10)&0x1F; | |
1973 | |
1974 int d3= src2[i*4+2] + (src2[i*4+3]<<8); | |
1975 int b3= d3&0x1F; | |
1976 int g3= (d3>>5)&0x1F; | |
1977 int r3= (d3>>10)&0x1F; | |
1978 | |
1979 int b= b0 + b1 + b2 + b3; | |
1980 int g= g0 + g1 + g2 + g3; | |
1981 int r= r0 + r1 + r2 + r3; | |
1982 #endif | |
1983 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2-3)) + 128; | |
1984 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2-3)) + 128; | |
1985 } | |
1986 } | |
1987 | |
1988 | |
4558 | 1989 static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width) |
1990 { | |
1991 int i; | |
1992 for(i=0; i<width; i++) | |
1993 { | |
1994 int r= src[i*4+0]; | |
1995 int g= src[i*4+1]; | |
1996 int b= src[i*4+2]; | |
1997 | |
1998 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1999 } | |
2000 } | |
2001 | |
2002 static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
2003 { | |
2004 int i; | |
2005 for(i=0; i<width; i++) | |
2006 { | |
2007 int r= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4]; | |
2008 int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5]; | |
2009 int b= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6]; | |
2010 | |
2011 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2012 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2013 } | |
2014 } | |
2015 | |
2016 static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width) | |
2017 { | |
2018 int i; | |
2019 for(i=0; i<width; i++) | |
2020 { | |
2021 int r= src[i*3+0]; | |
2022 int g= src[i*3+1]; | |
2023 int b= src[i*3+2]; | |
2024 | |
2025 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
2026 } | |
2027 } | |
2028 | |
2029 static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
2030 { | |
2031 int i; | |
2032 for(i=0; i<width; i++) | |
2033 { | |
2034 int r= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3]; | |
2035 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4]; | |
2036 int b= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5]; | |
2037 | |
2038 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2039 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2040 } | |
2041 } | |
2042 | |
4467 | 2043 |
3272 | 2044 // Bilinear / Bicubic scaling |
2045 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, | |
2046 int16_t *filter, int16_t *filterPos, int filterSize) | |
2047 { | |
2048 #ifdef HAVE_MMX | |
2049 if(filterSize==4) // allways true for upscaling, sometimes for down too | |
2050 { | |
2051 int counter= -2*dstW; | |
2052 filter-= counter*2; | |
2053 filterPos-= counter/2; | |
2054 dst-= counter/2; | |
2055 asm volatile( | |
2056 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2057 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2058 "pushl %%ebp \n\t" // we use 7 regs here ... |
2059 "movl %%eax, %%ebp \n\t" | |
2060 ".balign 16 \n\t" | |
2061 "1: \n\t" | |
2062 "movzwl (%2, %%ebp), %%eax \n\t" | |
2063 "movzwl 2(%2, %%ebp), %%ebx \n\t" | |
2064 "movq (%1, %%ebp, 4), %%mm1 \n\t" | |
2065 "movq 8(%1, %%ebp, 4), %%mm3 \n\t" | |
2066 "movd (%3, %%eax), %%mm0 \n\t" | |
2067 "movd (%3, %%ebx), %%mm2 \n\t" | |
2068 "punpcklbw %%mm7, %%mm0 \n\t" | |
2069 "punpcklbw %%mm7, %%mm2 \n\t" | |
2070 "pmaddwd %%mm1, %%mm0 \n\t" | |
2071 "pmaddwd %%mm2, %%mm3 \n\t" | |
2072 "psrad $8, %%mm0 \n\t" | |
2073 "psrad $8, %%mm3 \n\t" | |
2074 "packssdw %%mm3, %%mm0 \n\t" | |
2075 "pmaddwd %%mm6, %%mm0 \n\t" | |
2076 "packssdw %%mm0, %%mm0 \n\t" | |
2077 "movd %%mm0, (%4, %%ebp) \n\t" | |
2078 "addl $4, %%ebp \n\t" | |
2079 " jnc 1b \n\t" | |
3352 | 2080 |
3272 | 2081 "popl %%ebp \n\t" |
2082 : "+a" (counter) | |
2083 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) | |
2084 : "%ebx" | |
2085 ); | |
2086 } | |
2087 else if(filterSize==8) | |
2088 { | |
2089 int counter= -2*dstW; | |
2090 filter-= counter*4; | |
2091 filterPos-= counter/2; | |
2092 dst-= counter/2; | |
2093 asm volatile( | |
2094 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2095 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2096 "pushl %%ebp \n\t" // we use 7 regs here ... |
2097 "movl %%eax, %%ebp \n\t" | |
2098 ".balign 16 \n\t" | |
2099 "1: \n\t" | |
2100 "movzwl (%2, %%ebp), %%eax \n\t" | |
2101 "movzwl 2(%2, %%ebp), %%ebx \n\t" | |
2102 "movq (%1, %%ebp, 8), %%mm1 \n\t" | |
2103 "movq 16(%1, %%ebp, 8), %%mm3 \n\t" | |
2104 "movd (%3, %%eax), %%mm0 \n\t" | |
2105 "movd (%3, %%ebx), %%mm2 \n\t" | |
2106 "punpcklbw %%mm7, %%mm0 \n\t" | |
2107 "punpcklbw %%mm7, %%mm2 \n\t" | |
2108 "pmaddwd %%mm1, %%mm0 \n\t" | |
2109 "pmaddwd %%mm2, %%mm3 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
2110 |
3272 | 2111 "movq 8(%1, %%ebp, 8), %%mm1 \n\t" |
2112 "movq 24(%1, %%ebp, 8), %%mm5 \n\t" | |
2113 "movd 4(%3, %%eax), %%mm4 \n\t" | |
2114 "movd 4(%3, %%ebx), %%mm2 \n\t" | |
2115 "punpcklbw %%mm7, %%mm4 \n\t" | |
2116 "punpcklbw %%mm7, %%mm2 \n\t" | |
2117 "pmaddwd %%mm1, %%mm4 \n\t" | |
2118 "pmaddwd %%mm2, %%mm5 \n\t" | |
2119 "paddd %%mm4, %%mm0 \n\t" | |
2120 "paddd %%mm5, %%mm3 \n\t" | |
2121 | |
2122 "psrad $8, %%mm0 \n\t" | |
2123 "psrad $8, %%mm3 \n\t" | |
2124 "packssdw %%mm3, %%mm0 \n\t" | |
2125 "pmaddwd %%mm6, %%mm0 \n\t" | |
2126 "packssdw %%mm0, %%mm0 \n\t" | |
2127 "movd %%mm0, (%4, %%ebp) \n\t" | |
2128 "addl $4, %%ebp \n\t" | |
2129 " jnc 1b \n\t" | |
3344 | 2130 |
3272 | 2131 "popl %%ebp \n\t" |
2132 : "+a" (counter) | |
2133 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) | |
2134 : "%ebx" | |
2135 ); | |
2136 } | |
2137 else | |
2138 { | |
2139 int counter= -2*dstW; | |
2140 // filter-= counter*filterSize/2; | |
2141 filterPos-= counter/2; | |
2142 dst-= counter/2; | |
2143 asm volatile( | |
2144 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2145 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2146 ".balign 16 \n\t" |
2147 "1: \n\t" | |
2148 "movl %2, %%ecx \n\t" | |
2149 "movzwl (%%ecx, %0), %%eax \n\t" | |
2150 "movzwl 2(%%ecx, %0), %%ebx \n\t" | |
2151 "movl %5, %%ecx \n\t" | |
2152 "pxor %%mm4, %%mm4 \n\t" | |
2153 "pxor %%mm5, %%mm5 \n\t" | |
2154 "2: \n\t" | |
2155 "movq (%1), %%mm1 \n\t" | |
2156 "movq (%1, %6), %%mm3 \n\t" | |
2157 "movd (%%ecx, %%eax), %%mm0 \n\t" | |
2158 "movd (%%ecx, %%ebx), %%mm2 \n\t" | |
2159 "punpcklbw %%mm7, %%mm0 \n\t" | |
2160 "punpcklbw %%mm7, %%mm2 \n\t" | |
2161 "pmaddwd %%mm1, %%mm0 \n\t" | |
2162 "pmaddwd %%mm2, %%mm3 \n\t" | |
2163 "paddd %%mm3, %%mm5 \n\t" | |
2164 "paddd %%mm0, %%mm4 \n\t" | |
2165 "addl $8, %1 \n\t" | |
2166 "addl $4, %%ecx \n\t" | |
2167 "cmpl %4, %%ecx \n\t" | |
2168 " jb 2b \n\t" | |
2169 "addl %6, %1 \n\t" | |
2170 "psrad $8, %%mm4 \n\t" | |
2171 "psrad $8, %%mm5 \n\t" | |
2172 "packssdw %%mm5, %%mm4 \n\t" | |
2173 "pmaddwd %%mm6, %%mm4 \n\t" | |
2174 "packssdw %%mm4, %%mm4 \n\t" | |
2175 "movl %3, %%eax \n\t" | |
2176 "movd %%mm4, (%%eax, %0) \n\t" | |
2177 "addl $4, %0 \n\t" | |
2178 " jnc 1b \n\t" | |
3344 | 2179 |
3641 | 2180 : "+r" (counter), "+r" (filter) |
2181 : "m" (filterPos), "m" (dst), "m"(src+filterSize), | |
3272 | 2182 "m" (src), "r" (filterSize*2) |
3299 | 2183 : "%ebx", "%eax", "%ecx" |
3272 | 2184 ); |
2185 } | |
2186 #else | |
2187 int i; | |
2188 for(i=0; i<dstW; i++) | |
2189 { | |
2190 int j; | |
2191 int srcPos= filterPos[i]; | |
2192 int val=0; | |
3344 | 2193 // printf("filterPos: %d\n", filterPos[i]); |
3272 | 2194 for(j=0; j<filterSize; j++) |
2195 { | |
2196 // printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]); | |
2197 val += ((int)src[srcPos + j])*filter[filterSize*i + j]; | |
2198 } | |
2199 // filter += hFilterSize; | |
2200 dst[i] = MIN(MAX(0, val>>7), (1<<15)-1); // the cubic equation does overflow ... | |
2201 // dst[i] = val>>7; | |
2202 } | |
2203 #endif | |
2204 } | |
2205 // *** horizontal scale Y line to temp buffer | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2206 static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2207 int flags, int canMMX2BeUsed, int16_t *hLumFilter, |
4467 | 2208 int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode, |
5452 | 2209 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, |
2210 int32_t *mmx2FilterPos) | |
2469 | 2211 { |
4467 | 2212 if(srcFormat==IMGFMT_YUY2) |
2213 { | |
2214 RENAME(yuy2ToY)(formatConvBuffer, src, srcW); | |
2215 src= formatConvBuffer; | |
2216 } | |
9071 | 2217 else if(srcFormat==IMGFMT_UYVY) |
2218 { | |
2219 RENAME(uyvyToY)(formatConvBuffer, src, srcW); | |
2220 src= formatConvBuffer; | |
2221 } | |
4467 | 2222 else if(srcFormat==IMGFMT_BGR32) |
2223 { | |
2224 RENAME(bgr32ToY)(formatConvBuffer, src, srcW); | |
2225 src= formatConvBuffer; | |
2226 } | |
2227 else if(srcFormat==IMGFMT_BGR24) | |
2228 { | |
2229 RENAME(bgr24ToY)(formatConvBuffer, src, srcW); | |
2230 src= formatConvBuffer; | |
2231 } | |
4578 | 2232 else if(srcFormat==IMGFMT_BGR16) |
2233 { | |
2234 RENAME(bgr16ToY)(formatConvBuffer, src, srcW); | |
2235 src= formatConvBuffer; | |
2236 } | |
4580 | 2237 else if(srcFormat==IMGFMT_BGR15) |
2238 { | |
2239 RENAME(bgr15ToY)(formatConvBuffer, src, srcW); | |
2240 src= formatConvBuffer; | |
2241 } | |
4558 | 2242 else if(srcFormat==IMGFMT_RGB32) |
2243 { | |
2244 RENAME(rgb32ToY)(formatConvBuffer, src, srcW); | |
2245 src= formatConvBuffer; | |
2246 } | |
2247 else if(srcFormat==IMGFMT_RGB24) | |
2248 { | |
2249 RENAME(rgb24ToY)(formatConvBuffer, src, srcW); | |
2250 src= formatConvBuffer; | |
2251 } | |
4467 | 2252 |
3352 | 2253 #ifdef HAVE_MMX |
2254 // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2255 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) |
3352 | 2256 #else |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2257 if(!(flags&SWS_FAST_BILINEAR)) |
3352 | 2258 #endif |
3272 | 2259 { |
2260 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); | |
2261 } | |
2262 else // Fast Bilinear upscale / crap downscale | |
2263 { | |
2469 | 2264 #ifdef ARCH_X86 |
2265 #ifdef HAVE_MMX2 | |
2671 | 2266 int i; |
2469 | 2267 if(canMMX2BeUsed) |
2268 { | |
2269 asm volatile( | |
2270 "pxor %%mm7, %%mm7 \n\t" | |
5452 | 2271 "movl %0, %%ecx \n\t" |
2272 "movl %1, %%edi \n\t" | |
2273 "movl %2, %%edx \n\t" | |
2274 "movl %3, %%ebx \n\t" | |
2469 | 2275 "xorl %%eax, %%eax \n\t" // i |
5452 | 2276 PREFETCH" (%%ecx) \n\t" |
2277 PREFETCH" 32(%%ecx) \n\t" | |
2278 PREFETCH" 64(%%ecx) \n\t" | |
2520 | 2279 |
2469 | 2280 #define FUNNY_Y_CODE \ |
5452 | 2281 "movl (%%ebx), %%esi \n\t"\ |
2282 "call *%4 \n\t"\ | |
2283 "addl (%%ebx, %%eax), %%ecx \n\t"\ | |
2284 "addl %%eax, %%edi \n\t"\ | |
2285 "xorl %%eax, %%eax \n\t"\ | |
2520 | 2286 |
2469 | 2287 FUNNY_Y_CODE |
2288 FUNNY_Y_CODE | |
2289 FUNNY_Y_CODE | |
2290 FUNNY_Y_CODE | |
2291 FUNNY_Y_CODE | |
2292 FUNNY_Y_CODE | |
2293 FUNNY_Y_CODE | |
2294 FUNNY_Y_CODE | |
2295 | |
5452 | 2296 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos), |
2297 "m" (funnyYCode) | |
2469 | 2298 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" |
2299 ); | |
3215 | 2300 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128; |
2469 | 2301 } |
2302 else | |
2303 { | |
2304 #endif | |
2305 //NO MMX just normal asm ... | |
2306 asm volatile( | |
2307 "xorl %%eax, %%eax \n\t" // i | |
2308 "xorl %%ebx, %%ebx \n\t" // xx | |
2309 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
2310 ".balign 16 \n\t" |
2469 | 2311 "1: \n\t" |
2312 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] | |
2313 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] | |
2314 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2315 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2316 "shll $16, %%edi \n\t" | |
2317 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2318 "movl %1, %%edi \n\t" | |
2319 "shrl $9, %%esi \n\t" | |
2320 "movw %%si, (%%edi, %%eax, 2) \n\t" | |
2321 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2322 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2323 | |
2324 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] | |
2325 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] | |
2326 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2327 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2328 "shll $16, %%edi \n\t" | |
2329 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2330 "movl %1, %%edi \n\t" | |
2331 "shrl $9, %%esi \n\t" | |
2332 "movw %%si, 2(%%edi, %%eax, 2) \n\t" | |
2333 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2334 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2335 | |
2336 | |
2337 "addl $2, %%eax \n\t" | |
2338 "cmpl %2, %%eax \n\t" | |
2339 " jb 1b \n\t" | |
2340 | |
2341 | |
2342 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF) | |
2343 : "%eax", "%ebx", "%ecx", "%edi", "%esi" | |
2344 ); | |
2345 #ifdef HAVE_MMX2 | |
2346 } //if MMX2 cant be used | |
2347 #endif | |
2348 #else | |
2671 | 2349 int i; |
2350 unsigned int xpos=0; | |
2351 for(i=0;i<dstWidth;i++) | |
2352 { | |
2353 register unsigned int xx=xpos>>16; | |
2354 register unsigned int xalpha=(xpos&0xFFFF)>>9; | |
2355 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; | |
2356 xpos+=xInc; | |
2357 } | |
2469 | 2358 #endif |
3272 | 2359 } |
2469 | 2360 } |
2361 | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2362 inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2363 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, |
4467 | 2364 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode, |
5452 | 2365 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, |
2366 int32_t *mmx2FilterPos) | |
2469 | 2367 { |
4467 | 2368 if(srcFormat==IMGFMT_YUY2) |
2369 { | |
2370 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2371 src1= formatConvBuffer; | |
2372 src2= formatConvBuffer+2048; | |
2373 } | |
9071 | 2374 else if(srcFormat==IMGFMT_UYVY) |
2375 { | |
2376 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2377 src1= formatConvBuffer; | |
2378 src2= formatConvBuffer+2048; | |
2379 } | |
4467 | 2380 else if(srcFormat==IMGFMT_BGR32) |
2381 { | |
2382 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2383 src1= formatConvBuffer; | |
2384 src2= formatConvBuffer+2048; | |
2385 } | |
2386 else if(srcFormat==IMGFMT_BGR24) | |
2387 { | |
2388 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2389 src1= formatConvBuffer; | |
2390 src2= formatConvBuffer+2048; | |
2391 } | |
4578 | 2392 else if(srcFormat==IMGFMT_BGR16) |
2393 { | |
2394 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2395 src1= formatConvBuffer; | |
2396 src2= formatConvBuffer+2048; | |
2397 } | |
4580 | 2398 else if(srcFormat==IMGFMT_BGR15) |
2399 { | |
2400 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2401 src1= formatConvBuffer; | |
2402 src2= formatConvBuffer+2048; | |
2403 } | |
4558 | 2404 else if(srcFormat==IMGFMT_RGB32) |
2405 { | |
2406 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2407 src1= formatConvBuffer; | |
2408 src2= formatConvBuffer+2048; | |
2409 } | |
2410 else if(srcFormat==IMGFMT_RGB24) | |
2411 { | |
2412 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2413 src1= formatConvBuffer; | |
2414 src2= formatConvBuffer+2048; | |
2415 } | |
4481 | 2416 else if(isGray(srcFormat)) |
2417 { | |
2418 return; | |
2419 } | |
4467 | 2420 |
3352 | 2421 #ifdef HAVE_MMX |
2422 // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2423 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) |
3352 | 2424 #else |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2425 if(!(flags&SWS_FAST_BILINEAR)) |
3352 | 2426 #endif |
3272 | 2427 { |
2428 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |
2429 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |
2430 } | |
2431 else // Fast Bilinear upscale / crap downscale | |
2432 { | |
2469 | 2433 #ifdef ARCH_X86 |
2434 #ifdef HAVE_MMX2 | |
2671 | 2435 int i; |
2469 | 2436 if(canMMX2BeUsed) |
2437 { | |
2438 asm volatile( | |
5452 | 2439 "pxor %%mm7, %%mm7 \n\t" |
2440 "movl %0, %%ecx \n\t" | |
2441 "movl %1, %%edi \n\t" | |
2442 "movl %2, %%edx \n\t" | |
2443 "movl %3, %%ebx \n\t" | |
2444 "xorl %%eax, %%eax \n\t" // i | |
2445 PREFETCH" (%%ecx) \n\t" | |
2446 PREFETCH" 32(%%ecx) \n\t" | |
2447 PREFETCH" 64(%%ecx) \n\t" | |
2448 | |
2449 #define FUNNY_UV_CODE \ | |
2450 "movl (%%ebx), %%esi \n\t"\ | |
2451 "call *%4 \n\t"\ | |
2452 "addl (%%ebx, %%eax), %%ecx \n\t"\ | |
2453 "addl %%eax, %%edi \n\t"\ | |
2454 "xorl %%eax, %%eax \n\t"\ | |
2469 | 2455 |
5452 | 2456 FUNNY_UV_CODE |
2457 FUNNY_UV_CODE | |
2458 FUNNY_UV_CODE | |
2459 FUNNY_UV_CODE | |
2460 "xorl %%eax, %%eax \n\t" // i | |
2461 "movl %5, %%ecx \n\t" // src | |
2462 "movl %1, %%edi \n\t" // buf1 | |
2463 "addl $4096, %%edi \n\t" | |
2464 PREFETCH" (%%ecx) \n\t" | |
2465 PREFETCH" 32(%%ecx) \n\t" | |
2466 PREFETCH" 64(%%ecx) \n\t" | |
2469 | 2467 |
5452 | 2468 FUNNY_UV_CODE |
2469 FUNNY_UV_CODE | |
2470 FUNNY_UV_CODE | |
2471 FUNNY_UV_CODE | |
2469 | 2472 |
5452 | 2473 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos), |
2474 "m" (funnyUVCode), "m" (src2) | |
2475 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" | |
2476 ); | |
3344 | 2477 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) |
2469 | 2478 { |
3344 | 2479 // printf("%d %d %d\n", dstWidth, i, srcW); |
2480 dst[i] = src1[srcW-1]*128; | |
2481 dst[i+2048] = src2[srcW-1]*128; | |
2469 | 2482 } |
2483 } | |
2484 else | |
2485 { | |
2486 #endif | |
2487 asm volatile( | |
2488 "xorl %%eax, %%eax \n\t" // i | |
2489 "xorl %%ebx, %%ebx \n\t" // xx | |
2490 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
2491 ".balign 16 \n\t" |
2469 | 2492 "1: \n\t" |
2493 "movl %0, %%esi \n\t" | |
2494 "movzbl (%%esi, %%ebx), %%edi \n\t" //src[xx] | |
2495 "movzbl 1(%%esi, %%ebx), %%esi \n\t" //src[xx+1] | |
2496 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2497 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2498 "shll $16, %%edi \n\t" | |
2499 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2500 "movl %1, %%edi \n\t" | |
2501 "shrl $9, %%esi \n\t" | |
2502 "movw %%si, (%%edi, %%eax, 2) \n\t" | |
2503 | |
2504 "movzbl (%5, %%ebx), %%edi \n\t" //src[xx] | |
2505 "movzbl 1(%5, %%ebx), %%esi \n\t" //src[xx+1] | |
2506 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2507 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2508 "shll $16, %%edi \n\t" | |
2509 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2510 "movl %1, %%edi \n\t" | |
2511 "shrl $9, %%esi \n\t" | |
2512 "movw %%si, 4096(%%edi, %%eax, 2)\n\t" | |
2513 | |
2514 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2515 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2516 "addl $1, %%eax \n\t" | |
2517 "cmpl %2, %%eax \n\t" | |
2518 " jb 1b \n\t" | |
2519 | |
2520 :: "m" (src1), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF), | |
2521 "r" (src2) | |
2522 : "%eax", "%ebx", "%ecx", "%edi", "%esi" | |
2523 ); | |
2524 #ifdef HAVE_MMX2 | |
2525 } //if MMX2 cant be used | |
2526 #endif | |
2527 #else | |
2671 | 2528 int i; |
2529 unsigned int xpos=0; | |
2530 for(i=0;i<dstWidth;i++) | |
2531 { | |
2532 register unsigned int xx=xpos>>16; | |
2533 register unsigned int xalpha=(xpos&0xFFFF)>>9; | |
2534 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); | |
2535 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); | |
2566 | 2536 /* slower |
2537 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; | |
2538 dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; | |
2539 */ | |
2671 | 2540 xpos+=xInc; |
2541 } | |
2469 | 2542 #endif |
3272 | 2543 } |
2544 } | |
2545 | |
4467 | 2546 static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY, |
4698 | 2547 int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){ |
3344 | 2548 |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2549 /* load a few things into local vars to make the code more readable? and faster */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2550 const int srcW= c->srcW; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2551 const int dstW= c->dstW; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2552 const int dstH= c->dstH; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2553 const int chrDstW= c->chrDstW; |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2554 const int chrSrcW= c->chrSrcW; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2555 const int lumXInc= c->lumXInc; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2556 const int chrXInc= c->chrXInc; |
4295 | 2557 const int dstFormat= c->dstFormat; |
6503 | 2558 const int srcFormat= c->srcFormat; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2559 const int flags= c->flags; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2560 const int canMMX2BeUsed= c->canMMX2BeUsed; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2561 int16_t *vLumFilterPos= c->vLumFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2562 int16_t *vChrFilterPos= c->vChrFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2563 int16_t *hLumFilterPos= c->hLumFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2564 int16_t *hChrFilterPos= c->hChrFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2565 int16_t *vLumFilter= c->vLumFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2566 int16_t *vChrFilter= c->vChrFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2567 int16_t *hLumFilter= c->hLumFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2568 int16_t *hChrFilter= c->hChrFilter; |
9413 | 2569 int32_t *lumMmxFilter= c->lumMmxFilter; |
2570 int32_t *chrMmxFilter= c->chrMmxFilter; | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2571 const int vLumFilterSize= c->vLumFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2572 const int vChrFilterSize= c->vChrFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2573 const int hLumFilterSize= c->hLumFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2574 const int hChrFilterSize= c->hChrFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2575 int16_t **lumPixBuf= c->lumPixBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2576 int16_t **chrPixBuf= c->chrPixBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2577 const int vLumBufSize= c->vLumBufSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2578 const int vChrBufSize= c->vChrBufSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2579 uint8_t *funnyYCode= c->funnyYCode; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2580 uint8_t *funnyUVCode= c->funnyUVCode; |
4467 | 2581 uint8_t *formatConvBuffer= c->formatConvBuffer; |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2582 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2583 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); |
3344 | 2584 |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2585 /* vars whch will change and which we need to storw back in the context */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2586 int dstY= c->dstY; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2587 int lumBufIndex= c->lumBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2588 int chrBufIndex= c->chrBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2589 int lastInLumBuf= c->lastInLumBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2590 int lastInChrBuf= c->lastInChrBuf; |
4467 | 2591 int srcStride[3]; |
4698 | 2592 int dstStride[3]; |
4419 | 2593 uint8_t *src[3]; |
2594 uint8_t *dst[3]; | |
6540 | 2595 |
2596 orderYUV(c->srcFormat, src, srcStride, srcParam, srcStrideParam); | |
2597 orderYUV(c->dstFormat, dst, dstStride, dstParam, dstStrideParam); | |
6503 | 2598 |
6540 | 2599 if(isPacked(c->srcFormat)){ |
4467 | 2600 src[0]= |
2601 src[1]= | |
2602 src[2]= srcParam[0]; | |
6540 | 2603 srcStride[0]= |
4467 | 2604 srcStride[1]= |
6540 | 2605 srcStride[2]= srcStrideParam[0]; |
4467 | 2606 } |
6540 | 2607 srcStride[1]<<= c->vChrDrop; |
2608 srcStride[2]<<= c->vChrDrop; | |
4419 | 2609 |
6517 | 2610 // printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2], |
2611 // (int)dst[0], (int)dst[1], (int)dst[2]); | |
2612 | |
2613 #if 0 //self test FIXME move to a vfilter or something | |
2614 { | |
2615 static volatile int i=0; | |
2616 i++; | |
2617 if(srcFormat==IMGFMT_YV12 && i==1 && srcSliceH>= c->srcH) | |
2618 selfTest(src, srcStride, c->srcW, c->srcH); | |
2619 i--; | |
2620 } | |
2621 #endif | |
4554 | 2622 |
2623 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2], | |
2624 //dstStride[0],dstStride[1],dstStride[2]); | |
4419 | 2625 |
2626 if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0) | |
2627 { | |
2628 static int firstTime=1; //FIXME move this into the context perhaps | |
2629 if(flags & SWS_PRINT_INFO && firstTime) | |
2630 { | |
5937 | 2631 mp_msg(MSGT_SWS,MSGL_WARN,"SwScaler: Warning: dstStride is not aligned!\n" |
4419 | 2632 "SwScaler: ->cannot do aligned memory acesses anymore\n"); |
2633 firstTime=0; | |
2634 } | |
2635 } | |
3344 | 2636 |
4467 | 2637 /* Note the user might start scaling the picture in the middle so this will not get executed |
2638 this is not really intended but works currently, so ppl might do it */ | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2639 if(srcSliceY ==0){ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2640 lumBufIndex=0; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2641 chrBufIndex=0; |
4467 | 2642 dstY=0; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2643 lastInLumBuf= -1; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2644 lastInChrBuf= -1; |
3272 | 2645 } |
3344 | 2646 |
2647 for(;dstY < dstH; dstY++){ | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2648 unsigned char *dest =dst[0]+dstStride[0]*dstY; |
6520 | 2649 const int chrDstY= dstY>>c->chrDstVSubSample; |
2650 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; | |
2651 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; | |
3344 | 2652 |
2653 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input | |
2654 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input | |
2655 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input | |
2656 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input | |
2657 | |
4290
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2658 //handle holes (FAST_BILINEAR & weird filters) |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2659 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2660 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2661 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize); |
3344 | 2662 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1) |
2663 ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1) | |
2216 | 2664 |
3344 | 2665 // Do we have enough lines in this slice to output the dstY line |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2666 if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample)) |
2469 | 2667 { |
3344 | 2668 //Do horizontal scaling |
2669 while(lastInLumBuf < lastLumSrcY) | |
2670 { | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2671 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; |
3344 | 2672 lumBufIndex++; |
4290
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2673 // printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY); |
3344 | 2674 ASSERT(lumBufIndex < 2*vLumBufSize) |
2675 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH) | |
2676 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) | |
2677 // printf("%d %d\n", lumBufIndex, vLumBufSize); | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2678 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2679 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, |
5452 | 2680 funnyYCode, c->srcFormat, formatConvBuffer, |
2681 c->lumMmx2Filter, c->lumMmx2FilterPos); | |
3344 | 2682 lastInLumBuf++; |
2683 } | |
2684 while(lastInChrBuf < lastChrSrcY) | |
2685 { | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2686 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2687 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; |
3344 | 2688 chrBufIndex++; |
2689 ASSERT(chrBufIndex < 2*vChrBufSize) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2690 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)) |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2691 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0) |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2692 //FIXME replace parameters through context struct (some at least) |
6503 | 2693 |
2694 if(!(isGray(srcFormat) || isGray(dstFormat))) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2695 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2696 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, |
5452 | 2697 funnyUVCode, c->srcFormat, formatConvBuffer, |
2698 c->chrMmx2Filter, c->chrMmx2FilterPos); | |
3344 | 2699 lastInChrBuf++; |
2700 } | |
2701 //wrap buf index around to stay inside the ring buffer | |
2702 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize; | |
2703 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize; | |
2469 | 2704 } |
3344 | 2705 else // not enough lines left in this slice -> load the rest in the buffer |
2469 | 2706 { |
3344 | 2707 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n", |
2708 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY, | |
2709 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize, | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2710 vChrBufSize, vLumBufSize);*/ |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2711 |
3344 | 2712 //Do horizontal scaling |
2713 while(lastInLumBuf+1 < srcSliceY + srcSliceH) | |
2469 | 2714 { |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2715 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; |
3344 | 2716 lumBufIndex++; |
2717 ASSERT(lumBufIndex < 2*vLumBufSize) | |
2718 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH) | |
2719 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2720 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2721 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, |
5452 | 2722 funnyYCode, c->srcFormat, formatConvBuffer, |
2723 c->lumMmx2Filter, c->lumMmx2FilterPos); | |
3344 | 2724 lastInLumBuf++; |
2469 | 2725 } |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2726 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH)) |
3344 | 2727 { |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2728 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2729 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; |
3344 | 2730 chrBufIndex++; |
2731 ASSERT(chrBufIndex < 2*vChrBufSize) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2732 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH) |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2733 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0) |
6503 | 2734 |
2735 if(!(isGray(srcFormat) || isGray(dstFormat))) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2736 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2737 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, |
5452 | 2738 funnyUVCode, c->srcFormat, formatConvBuffer, |
2739 c->chrMmx2Filter, c->chrMmx2FilterPos); | |
3344 | 2740 lastInChrBuf++; |
2741 } | |
2742 //wrap buf index around to stay inside the ring buffer | |
2743 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize; | |
2744 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize; | |
2745 break; //we cant output a dstY line so lets try with the next slice | |
2469 | 2746 } |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
2747 |
2748 | 2748 #ifdef HAVE_MMX |
3344 | 2749 b5Dither= dither8[dstY&1]; |
2750 g6Dither= dither4[dstY&1]; | |
2751 g5Dither= dither8[dstY&1]; | |
2752 r5Dither= dither8[(dstY+1)&1]; | |
2748 | 2753 #endif |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2754 if(dstY < dstH-2) |
3352 | 2755 { |
9414 | 2756 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; |
2757 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |
2758 #ifdef HAVE_MMX | |
2759 int i; | |
2760 for(i=0; i<vLumFilterSize; i++) | |
2761 { | |
2762 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i]; | |
2763 lumMmxFilter[4*i+2]= | |
2764 lumMmxFilter[4*i+3]= | |
2765 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001; | |
2766 } | |
2767 for(i=0; i<vChrFilterSize; i++) | |
2768 { | |
2769 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i]; | |
2770 chrMmxFilter[4*i+2]= | |
2771 chrMmxFilter[4*i+3]= | |
2772 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001; | |
2773 } | |
2774 #endif | |
6503 | 2775 if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like |
3344 | 2776 { |
7351 | 2777 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
2778 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi | |
3344 | 2779 if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12 |
2780 { | |
2781 int16_t *lumBuf = lumPixBuf[0]; | |
2782 int16_t *chrBuf= chrPixBuf[0]; | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2783 RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW); |
3344 | 2784 } |
2785 else //General YV12 | |
2786 { | |
9413 | 2787 RENAME(yuv2yuvX)(c, |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2788 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2789 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
9414 | 2790 dest, uDest, vDest, dstW, chrDstW); |
3344 | 2791 } |
2792 } | |
2793 else | |
2794 { | |
2795 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |
2796 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | |
2797 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB | |
2798 { | |
2799 int chrAlpha= vChrFilter[2*dstY+1]; | |
7723 | 2800 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), |
6578 | 2801 dest, dstW, chrAlpha, dstFormat, flags, dstY); |
3344 | 2802 } |
2803 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB | |
2804 { | |
2805 int lumAlpha= vLumFilter[2*dstY+1]; | |
2806 int chrAlpha= vChrFilter[2*dstY+1]; | |
7723 | 2807 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), |
6578 | 2808 dest, dstW, lumAlpha, chrAlpha, dstY); |
3344 | 2809 } |
2810 else //General RGB | |
2811 { | |
7723 | 2812 RENAME(yuv2packedX)(c, |
3344 | 2813 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
2814 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |
9413 | 2815 dest, dstW, dstY); |
3344 | 2816 } |
2817 } | |
3352 | 2818 } |
2819 else // hmm looks like we cant use MMX here without overwriting this arrays tail | |
2820 { | |
2821 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |
2822 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |
6615 | 2823 if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 |
3352 | 2824 { |
7351 | 2825 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
2826 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi | |
6540 | 2827 yuv2yuvXinC( |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2828 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2829 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
6540 | 2830 dest, uDest, vDest, dstW, chrDstW); |
3352 | 2831 } |
2832 else | |
2833 { | |
2834 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |
2835 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | |
7723 | 2836 yuv2packedXinC(c, |
3352 | 2837 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
2838 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |
6578 | 2839 dest, dstW, dstY); |
3352 | 2840 } |
2841 } | |
3344 | 2842 } |
2534
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2843 |
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2844 #ifdef HAVE_MMX |
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2845 __asm __volatile(SFENCE:::"memory"); |
2566 | 2846 __asm __volatile(EMMS:::"memory"); |
2534
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2847 #endif |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2848 /* store changed local vars back in the context */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2849 c->dstY= dstY; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2850 c->lumBufIndex= lumBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2851 c->chrBufIndex= chrBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2852 c->lastInLumBuf= lastInLumBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2853 c->lastInChrBuf= lastInChrBuf; |
3641 | 2854 } |