Mercurial > mplayer.hg
annotate postproc/swscale_template.c @ 9413:0d86fe21b281
cleanup
author | michael |
---|---|
date | Thu, 13 Feb 2003 14:19:05 +0000 |
parents | 25baacd1c650 |
children | 04c6fd75ed96 |
rev | line source |
---|---|
4295 | 1 /* |
2 Copyright (C) 2001-2002 Michael Niedermayer <michaelni@gmx.at> | |
2216 | 3 |
4295 | 4 This program is free software; you can redistribute it and/or modify |
5 it under the terms of the GNU General Public License as published by | |
6 the Free Software Foundation; either version 2 of the License, or | |
7 (at your option) any later version. | |
2216 | 8 |
4295 | 9 This program is distributed in the hope that it will be useful, |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 GNU General Public License for more details. | |
13 | |
14 You should have received a copy of the GNU General Public License | |
15 along with this program; if not, write to the Free Software | |
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 */ | |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
18 |
2540 | 19 #undef MOVNTQ |
2680 | 20 #undef PAVGB |
3136 | 21 #undef PREFETCH |
22 #undef PREFETCHW | |
23 #undef EMMS | |
24 #undef SFENCE | |
25 | |
26 #ifdef HAVE_3DNOW | |
27 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |
28 #define EMMS "femms" | |
29 #else | |
30 #define EMMS "emms" | |
31 #endif | |
32 | |
33 #ifdef HAVE_3DNOW | |
34 #define PREFETCH "prefetch" | |
35 #define PREFETCHW "prefetchw" | |
36 #elif defined ( HAVE_MMX2 ) | |
37 #define PREFETCH "prefetchnta" | |
38 #define PREFETCHW "prefetcht0" | |
39 #else | |
40 #define PREFETCH "/nop" | |
41 #define PREFETCHW "/nop" | |
42 #endif | |
43 | |
44 #ifdef HAVE_MMX2 | |
45 #define SFENCE "sfence" | |
46 #else | |
47 #define SFENCE "/nop" | |
48 #endif | |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
49 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
50 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
51 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
52 #elif defined (HAVE_3DNOW) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
53 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
54 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
55 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
56 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
57 #define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
58 #else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
60 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
61 |
9413 | 62 #define YSCALEYUV2YV12X(x, offset) \ |
3344 | 63 "xorl %%eax, %%eax \n\t"\ |
64 "pxor %%mm3, %%mm3 \n\t"\ | |
65 "pxor %%mm4, %%mm4 \n\t"\ | |
9413 | 66 "leal " offset "(%0), %%edx \n\t"\ |
67 "movl (%%edx), %%esi \n\t"\ | |
3344 | 68 ".balign 16 \n\t" /* FIXME Unroll? */\ |
69 "1: \n\t"\ | |
9413 | 70 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
3344 | 71 "movq " #x "(%%esi, %%eax, 2), %%mm2 \n\t" /* srcData */\ |
72 "movq 8+" #x "(%%esi, %%eax, 2), %%mm5 \n\t" /* srcData */\ | |
9413 | 73 "addl $16, %%edx \n\t"\ |
74 "movl (%%edx), %%esi \n\t"\ | |
75 "testl %%esi, %%esi \n\t"\ | |
3344 | 76 "pmulhw %%mm0, %%mm2 \n\t"\ |
77 "pmulhw %%mm0, %%mm5 \n\t"\ | |
78 "paddw %%mm2, %%mm3 \n\t"\ | |
79 "paddw %%mm5, %%mm4 \n\t"\ | |
80 " jnz 1b \n\t"\ | |
81 "psraw $3, %%mm3 \n\t"\ | |
82 "psraw $3, %%mm4 \n\t"\ | |
83 "packuswb %%mm4, %%mm3 \n\t"\ | |
9413 | 84 MOVNTQ(%%mm3, (%1, %%eax))\ |
3344 | 85 "addl $8, %%eax \n\t"\ |
9413 | 86 "cmpl %2, %%eax \n\t"\ |
3344 | 87 "pxor %%mm3, %%mm3 \n\t"\ |
88 "pxor %%mm4, %%mm4 \n\t"\ | |
9413 | 89 "leal " offset "(%0), %%edx \n\t"\ |
90 "movl (%%edx), %%esi \n\t"\ | |
3344 | 91 "jb 1b \n\t" |
92 | |
93 #define YSCALEYUV2YV121 \ | |
94 "movl %2, %%eax \n\t"\ | |
95 ".balign 16 \n\t" /* FIXME Unroll? */\ | |
96 "1: \n\t"\ | |
97 "movq (%0, %%eax, 2), %%mm0 \n\t"\ | |
98 "movq 8(%0, %%eax, 2), %%mm1 \n\t"\ | |
99 "psraw $7, %%mm0 \n\t"\ | |
100 "psraw $7, %%mm1 \n\t"\ | |
101 "packuswb %%mm1, %%mm0 \n\t"\ | |
102 MOVNTQ(%%mm0, (%1, %%eax))\ | |
103 "addl $8, %%eax \n\t"\ | |
104 "jnc 1b \n\t" | |
105 | |
106 /* | |
107 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
108 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
109 "r" (dest), "m" (dstW), | |
110 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
111 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
112 */ | |
7723 | 113 #define YSCALEYUV2PACKEDX \ |
3344 | 114 "xorl %%eax, %%eax \n\t"\ |
115 ".balign 16 \n\t"\ | |
9413 | 116 "nop \n\t"\ |
3344 | 117 "1: \n\t"\ |
9413 | 118 "leal "CHR_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ |
119 "movl (%%edx), %%esi \n\t"\ | |
3344 | 120 "pxor %%mm3, %%mm3 \n\t"\ |
121 "pxor %%mm4, %%mm4 \n\t"\ | |
9413 | 122 ".balign 16 \n\t"\ |
3344 | 123 "2: \n\t"\ |
9413 | 124 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
3344 | 125 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\ |
126 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\ | |
9413 | 127 "addl $16, %%edx \n\t"\ |
128 "movl (%%edx), %%esi \n\t"\ | |
3344 | 129 "pmulhw %%mm0, %%mm2 \n\t"\ |
130 "pmulhw %%mm0, %%mm5 \n\t"\ | |
131 "paddw %%mm2, %%mm3 \n\t"\ | |
132 "paddw %%mm5, %%mm4 \n\t"\ | |
9413 | 133 "testl %%esi, %%esi \n\t"\ |
3344 | 134 " jnz 2b \n\t"\ |
135 \ | |
9413 | 136 "leal "LUM_MMX_FILTER_OFFSET"(%0), %%edx \n\t"\ |
137 "movl (%%edx), %%esi \n\t"\ | |
3344 | 138 "pxor %%mm1, %%mm1 \n\t"\ |
139 "pxor %%mm7, %%mm7 \n\t"\ | |
9413 | 140 ".balign 16 \n\t"\ |
3344 | 141 "2: \n\t"\ |
9413 | 142 "movq 8(%%edx), %%mm0 \n\t" /* filterCoeff */\ |
3344 | 143 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\ |
144 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\ | |
9413 | 145 "addl $16, %%edx \n\t"\ |
146 "movl (%%edx), %%esi \n\t"\ | |
3344 | 147 "pmulhw %%mm0, %%mm2 \n\t"\ |
148 "pmulhw %%mm0, %%mm5 \n\t"\ | |
149 "paddw %%mm2, %%mm1 \n\t"\ | |
150 "paddw %%mm5, %%mm7 \n\t"\ | |
9413 | 151 "testl %%esi, %%esi \n\t"\ |
3344 | 152 " jnz 2b \n\t"\ |
7723 | 153 |
154 | |
155 #define YSCALEYUV2RGBX \ | |
156 YSCALEYUV2PACKEDX\ | |
9413 | 157 "psubw "U_OFFSET"(%0), %%mm3 \n\t" /* (U-128)8*/\ |
158 "psubw "V_OFFSET"(%0), %%mm4 \n\t" /* (V-128)8*/\ | |
3344 | 159 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
160 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ | |
9413 | 161 "pmulhw "UG_COEFF"(%0), %%mm3 \n\t"\ |
162 "pmulhw "VG_COEFF"(%0), %%mm4 \n\t"\ | |
3344 | 163 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
9413 | 164 "pmulhw "UB_COEFF"(%0), %%mm2 \n\t"\ |
165 "pmulhw "VR_COEFF"(%0), %%mm5 \n\t"\ | |
166 "psubw "Y_OFFSET"(%0), %%mm1 \n\t" /* 8(Y-16)*/\ | |
167 "psubw "Y_OFFSET"(%0), %%mm7 \n\t" /* 8(Y-16)*/\ | |
168 "pmulhw "Y_COEFF"(%0), %%mm1 \n\t"\ | |
169 "pmulhw "Y_COEFF"(%0), %%mm7 \n\t"\ | |
3344 | 170 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
171 "paddw %%mm3, %%mm4 \n\t"\ | |
172 "movq %%mm2, %%mm0 \n\t"\ | |
173 "movq %%mm5, %%mm6 \n\t"\ | |
174 "movq %%mm4, %%mm3 \n\t"\ | |
175 "punpcklwd %%mm2, %%mm2 \n\t"\ | |
176 "punpcklwd %%mm5, %%mm5 \n\t"\ | |
177 "punpcklwd %%mm4, %%mm4 \n\t"\ | |
178 "paddw %%mm1, %%mm2 \n\t"\ | |
179 "paddw %%mm1, %%mm5 \n\t"\ | |
180 "paddw %%mm1, %%mm4 \n\t"\ | |
181 "punpckhwd %%mm0, %%mm0 \n\t"\ | |
182 "punpckhwd %%mm6, %%mm6 \n\t"\ | |
183 "punpckhwd %%mm3, %%mm3 \n\t"\ | |
184 "paddw %%mm7, %%mm0 \n\t"\ | |
185 "paddw %%mm7, %%mm6 \n\t"\ | |
186 "paddw %%mm7, %%mm3 \n\t"\ | |
187 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ | |
188 "packuswb %%mm0, %%mm2 \n\t"\ | |
189 "packuswb %%mm6, %%mm5 \n\t"\ | |
190 "packuswb %%mm3, %%mm4 \n\t"\ | |
191 "pxor %%mm7, %%mm7 \n\t" | |
9413 | 192 #if 0 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
193 #define FULL_YSCALEYUV2RGB \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
194 "pxor %%mm7, %%mm7 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
195 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
196 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
197 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
198 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
199 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
200 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
201 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
202 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
203 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
204 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
205 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
206 "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
207 "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
208 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
209 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
210 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
211 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
212 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
213 "movq 4096(%2, %%eax,2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
214 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
215 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
216 "movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
217 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
218 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
4248 | 219 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ |
220 "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\ | |
221 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
222 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
223 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
224 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
225 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
4248 | 226 "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
227 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
4248 | 228 "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
229 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
4248 | 230 "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
231 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
232 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
233 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\ |
4248 | 234 "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\ |
235 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
236 "paddw %%mm1, %%mm3 \n\t" /* B*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
237 "paddw %%mm1, %%mm0 \n\t" /* R*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
238 "packuswb %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
239 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
240 "packuswb %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
241 "paddw %%mm4, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
242 "paddw %%mm2, %%mm1 \n\t" /* G*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
243 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
244 "packuswb %%mm1, %%mm1 \n\t" |
9413 | 245 #endif |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
246 |
7723 | 247 #define YSCALEYUV2PACKED \ |
248 "movd %6, %%mm6 \n\t" /*yalpha1*/\ | |
249 "punpcklwd %%mm6, %%mm6 \n\t"\ | |
250 "punpcklwd %%mm6, %%mm6 \n\t"\ | |
251 "psraw $3, %%mm6 \n\t"\ | |
252 "movq %%mm6, 3968(%2) \n\t"\ | |
253 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ | |
254 "punpcklwd %%mm5, %%mm5 \n\t"\ | |
255 "punpcklwd %%mm5, %%mm5 \n\t"\ | |
256 "psraw $3, %%mm5 \n\t"\ | |
257 "movq %%mm5, 3976(%2) \n\t"\ | |
258 "xorl %%eax, %%eax \n\t"\ | |
259 ".balign 16 \n\t"\ | |
260 "1: \n\t"\ | |
261 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ | |
262 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |
263 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | |
264 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | |
265 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ | |
266 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ | |
267 "movq 3976(%2), %%mm0 \n\t"\ | |
268 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ | |
269 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ | |
270 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ | |
271 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ | |
272 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ | |
273 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ | |
274 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ | |
275 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ | |
276 "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\ | |
277 "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\ | |
278 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ | |
279 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ | |
280 "pmulhw 3968(%2), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ | |
281 "pmulhw 3968(%2), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ | |
282 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ | |
283 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ | |
284 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ | |
285 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ | |
286 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
287 #define YSCALEYUV2RGB \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
288 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
289 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
290 "punpcklwd %%mm6, %%mm6 \n\t"\ |
6554 | 291 "movq %%mm6, 3968(%2) \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
292 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
293 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
294 "punpcklwd %%mm5, %%mm5 \n\t"\ |
6554 | 295 "movq %%mm5, 3976(%2) \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
296 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
297 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
298 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
299 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
300 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
301 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
302 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
303 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
304 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
6554 | 305 "movq 3976(%2), %%mm0 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
306 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
307 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
308 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
309 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
310 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
311 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
4248 | 312 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
313 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
314 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
315 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
4248 | 316 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
317 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
318 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
319 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
320 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
321 "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
322 "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
323 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
324 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ |
6554 | 325 "pmulhw 3968(%2), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
326 "pmulhw 3968(%2), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
327 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
328 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
329 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
330 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
4248 | 331 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ |
332 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | |
333 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | |
334 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | |
335 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
336 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
337 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
338 "paddw %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
339 "movq %%mm2, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
340 "movq %%mm5, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
341 "movq %%mm4, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
342 "punpcklwd %%mm2, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
343 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
344 "punpcklwd %%mm4, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
345 "paddw %%mm1, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
346 "paddw %%mm1, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
347 "paddw %%mm1, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
348 "punpckhwd %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
349 "punpckhwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
350 "punpckhwd %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
351 "paddw %%mm7, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
352 "paddw %%mm7, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
353 "paddw %%mm7, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
354 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
355 "packuswb %%mm0, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
356 "packuswb %%mm6, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
357 "packuswb %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
358 "pxor %%mm7, %%mm7 \n\t" |
7723 | 359 |
360 #define YSCALEYUV2PACKED1 \ | |
361 "xorl %%eax, %%eax \n\t"\ | |
362 ".balign 16 \n\t"\ | |
363 "1: \n\t"\ | |
364 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ | |
365 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | |
366 "psraw $7, %%mm3 \n\t" \ | |
367 "psraw $7, %%mm4 \n\t" \ | |
368 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ | |
369 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ | |
370 "psraw $7, %%mm1 \n\t" \ | |
371 "psraw $7, %%mm7 \n\t" \ | |
372 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
373 #define YSCALEYUV2RGB1 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
374 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
375 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
376 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
377 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
378 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
379 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
380 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
4248 | 381 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
382 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
383 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
384 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
4248 | 385 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
386 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
387 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
388 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
389 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
390 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
391 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
4248 | 392 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ |
393 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | |
394 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | |
395 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | |
396 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
397 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
398 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
399 "paddw %%mm3, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
400 "movq %%mm2, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
401 "movq %%mm5, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
402 "movq %%mm4, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
403 "punpcklwd %%mm2, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
404 "punpcklwd %%mm5, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
405 "punpcklwd %%mm4, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
406 "paddw %%mm1, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
407 "paddw %%mm1, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
408 "paddw %%mm1, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
409 "punpckhwd %%mm0, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
410 "punpckhwd %%mm6, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
411 "punpckhwd %%mm3, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
412 "paddw %%mm7, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
413 "paddw %%mm7, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
414 "paddw %%mm7, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
415 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
416 "packuswb %%mm0, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
417 "packuswb %%mm6, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
418 "packuswb %%mm3, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
419 "pxor %%mm7, %%mm7 \n\t" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
420 |
7723 | 421 #define YSCALEYUV2PACKED1b \ |
422 "xorl %%eax, %%eax \n\t"\ | |
423 ".balign 16 \n\t"\ | |
424 "1: \n\t"\ | |
425 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ | |
426 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |
427 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | |
428 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | |
429 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ | |
430 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ | |
431 "psrlw $8, %%mm3 \n\t" \ | |
432 "psrlw $8, %%mm4 \n\t" \ | |
433 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ | |
434 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ | |
435 "psraw $7, %%mm1 \n\t" \ | |
436 "psraw $7, %%mm7 \n\t" | |
437 | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
438 // do vertical chrominance interpolation |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
439 #define YSCALEYUV2RGB1b \ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
440 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
441 ".balign 16 \n\t"\ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
442 "1: \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
443 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
444 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
445 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
446 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
2576 | 447 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ |
448 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ | |
3344 | 449 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ |
450 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ | |
4248 | 451 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
452 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
453 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
454 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
4248 | 455 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
456 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
457 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
458 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
459 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
460 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
461 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
4248 | 462 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ |
463 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | |
464 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | |
465 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | |
466 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
467 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
468 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
469 "paddw %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
470 "movq %%mm2, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
471 "movq %%mm5, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
472 "movq %%mm4, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
473 "punpcklwd %%mm2, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
474 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
475 "punpcklwd %%mm4, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
476 "paddw %%mm1, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
477 "paddw %%mm1, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
478 "paddw %%mm1, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
479 "punpckhwd %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
480 "punpckhwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
481 "punpckhwd %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
482 "paddw %%mm7, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
483 "paddw %%mm7, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
484 "paddw %%mm7, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
485 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
486 "packuswb %%mm0, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
487 "packuswb %%mm6, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
488 "packuswb %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
489 "pxor %%mm7, %%mm7 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
490 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
491 #define WRITEBGR32 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
492 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
493 "movq %%mm2, %%mm1 \n\t" /* B */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
494 "movq %%mm5, %%mm6 \n\t" /* R */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
495 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
496 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
497 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
498 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
499 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
500 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
501 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
502 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
503 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
504 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
505 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
506 MOVNTQ(%%mm0, (%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
507 MOVNTQ(%%mm2, 8(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
508 MOVNTQ(%%mm1, 16(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
509 MOVNTQ(%%mm3, 24(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
510 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
511 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
512 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
513 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
514 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
515 #define WRITEBGR16 \ |
4248 | 516 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ |
517 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ | |
518 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ | |
2669 | 519 "psrlq $3, %%mm2 \n\t"\ |
520 \ | |
521 "movq %%mm2, %%mm1 \n\t"\ | |
522 "movq %%mm4, %%mm3 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
523 \ |
2669 | 524 "punpcklbw %%mm7, %%mm3 \n\t"\ |
525 "punpcklbw %%mm5, %%mm2 \n\t"\ | |
526 "punpckhbw %%mm7, %%mm4 \n\t"\ | |
527 "punpckhbw %%mm5, %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
528 \ |
2669 | 529 "psllq $3, %%mm3 \n\t"\ |
530 "psllq $3, %%mm4 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
531 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
532 "por %%mm3, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
533 "por %%mm4, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
534 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
535 MOVNTQ(%%mm2, (%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
536 MOVNTQ(%%mm1, 8(%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
537 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
538 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
539 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
540 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
541 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
542 #define WRITEBGR15 \ |
4248 | 543 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ |
544 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\ | |
545 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ | |
2669 | 546 "psrlq $3, %%mm2 \n\t"\ |
547 "psrlq $1, %%mm5 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
548 \ |
2669 | 549 "movq %%mm2, %%mm1 \n\t"\ |
550 "movq %%mm4, %%mm3 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
551 \ |
2669 | 552 "punpcklbw %%mm7, %%mm3 \n\t"\ |
553 "punpcklbw %%mm5, %%mm2 \n\t"\ | |
554 "punpckhbw %%mm7, %%mm4 \n\t"\ | |
555 "punpckhbw %%mm5, %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
556 \ |
2669 | 557 "psllq $2, %%mm3 \n\t"\ |
558 "psllq $2, %%mm4 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
559 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
560 "por %%mm3, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
561 "por %%mm4, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
562 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
563 MOVNTQ(%%mm2, (%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
564 MOVNTQ(%%mm1, 8(%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
565 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
566 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
567 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
568 " jb 1b \n\t" |
2669 | 569 |
2730 | 570 #define WRITEBGR24OLD \ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
571 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
572 "movq %%mm2, %%mm1 \n\t" /* B */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
573 "movq %%mm5, %%mm6 \n\t" /* R */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
574 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
575 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
576 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
577 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
578 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
579 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
2326 | 580 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
581 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ | |
582 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ | |
583 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
584 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
585 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
586 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\ |
4248 | 587 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\ |
588 "pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
589 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
590 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
591 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
592 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
593 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
594 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
595 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
596 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
597 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\ |
4248 | 598 "pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
599 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
600 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\ |
4248 | 601 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\ |
602 "pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
603 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
604 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
605 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
606 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
607 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
608 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
609 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
610 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\ |
4248 | 611 "pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\ |
612 "pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
613 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
614 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
615 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
616 \ |
2728 | 617 MOVNTQ(%%mm0, (%%ebx))\ |
618 MOVNTQ(%%mm2, 8(%%ebx))\ | |
619 MOVNTQ(%%mm3, 16(%%ebx))\ | |
620 "addl $24, %%ebx \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
621 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
622 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
623 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
624 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
625 |
2730 | 626 #define WRITEBGR24MMX \ |
627 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | |
628 "movq %%mm2, %%mm1 \n\t" /* B */\ | |
629 "movq %%mm5, %%mm6 \n\t" /* R */\ | |
630 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ | |
631 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ | |
632 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ | |
633 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ | |
634 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ | |
635 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ | |
636 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ | |
637 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ | |
638 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ | |
639 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ | |
640 \ | |
641 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ | |
642 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\ | |
643 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\ | |
644 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\ | |
645 \ | |
646 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\ | |
647 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\ | |
648 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\ | |
649 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\ | |
650 \ | |
651 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\ | |
652 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\ | |
653 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\ | |
654 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\ | |
655 \ | |
656 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\ | |
657 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\ | |
658 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\ | |
659 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ | |
660 MOVNTQ(%%mm0, (%%ebx))\ | |
661 \ | |
662 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\ | |
663 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\ | |
664 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\ | |
665 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\ | |
666 MOVNTQ(%%mm6, 8(%%ebx))\ | |
667 \ | |
668 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\ | |
669 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\ | |
670 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\ | |
671 MOVNTQ(%%mm5, 16(%%ebx))\ | |
672 \ | |
673 "addl $24, %%ebx \n\t"\ | |
674 \ | |
675 "addl $8, %%eax \n\t"\ | |
676 "cmpl %5, %%eax \n\t"\ | |
677 " jb 1b \n\t" | |
678 | |
679 #define WRITEBGR24MMX2 \ | |
680 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | |
4248 | 681 "movq "MANGLE(M24A)", %%mm0 \n\t"\ |
682 "movq "MANGLE(M24C)", %%mm7 \n\t"\ | |
2730 | 683 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ |
684 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ | |
685 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ | |
686 \ | |
687 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\ | |
688 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\ | |
689 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\ | |
690 \ | |
691 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\ | |
692 "por %%mm1, %%mm6 \n\t"\ | |
693 "por %%mm3, %%mm6 \n\t"\ | |
694 MOVNTQ(%%mm6, (%%ebx))\ | |
695 \ | |
696 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\ | |
697 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\ | |
698 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ | |
699 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ | |
700 \ | |
4248 | 701 "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ |
2730 | 702 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ |
703 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ | |
704 \ | |
705 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\ | |
706 "por %%mm3, %%mm6 \n\t"\ | |
707 MOVNTQ(%%mm6, 8(%%ebx))\ | |
708 \ | |
709 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\ | |
710 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\ | |
711 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\ | |
712 \ | |
713 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ | |
714 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ | |
4248 | 715 "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ |
2730 | 716 \ |
717 "por %%mm1, %%mm3 \n\t"\ | |
718 "por %%mm3, %%mm6 \n\t"\ | |
719 MOVNTQ(%%mm6, 16(%%ebx))\ | |
720 \ | |
721 "addl $24, %%ebx \n\t"\ | |
722 \ | |
723 "addl $8, %%eax \n\t"\ | |
724 "cmpl %5, %%eax \n\t"\ | |
725 " jb 1b \n\t" | |
726 | |
727 #ifdef HAVE_MMX2 | |
3126 | 728 #undef WRITEBGR24 |
2730 | 729 #define WRITEBGR24 WRITEBGR24MMX2 |
730 #else | |
3126 | 731 #undef WRITEBGR24 |
2730 | 732 #define WRITEBGR24 WRITEBGR24MMX |
733 #endif | |
734 | |
7723 | 735 #define WRITEYUY2 \ |
736 "packuswb %%mm3, %%mm3 \n\t"\ | |
737 "packuswb %%mm4, %%mm4 \n\t"\ | |
738 "packuswb %%mm7, %%mm1 \n\t"\ | |
739 "punpcklbw %%mm4, %%mm3 \n\t"\ | |
740 "movq %%mm1, %%mm7 \n\t"\ | |
741 "punpcklbw %%mm3, %%mm1 \n\t"\ | |
742 "punpckhbw %%mm3, %%mm7 \n\t"\ | |
743 \ | |
744 MOVNTQ(%%mm1, (%4, %%eax, 2))\ | |
745 MOVNTQ(%%mm7, 8(%4, %%eax, 2))\ | |
746 \ | |
747 "addl $8, %%eax \n\t"\ | |
748 "cmpl %5, %%eax \n\t"\ | |
749 " jb 1b \n\t" | |
750 | |
751 | |
9413 | 752 static inline void RENAME(yuv2yuvX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
3344 | 753 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
754 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW, |
9413 | 755 int32_t * lumMmxFilter, int32_t * chrMmxFilter) |
2519 | 756 { |
9413 | 757 int dummy=0; |
3344 | 758 #ifdef HAVE_MMX |
759 if(uDest != NULL) | |
760 { | |
761 asm volatile( | |
9413 | 762 YSCALEYUV2YV12X(0, CHR_MMX_FILTER_OFFSET) |
763 :: "r" (&c->redDither), | |
764 "r" (uDest), "m" (chrDstW) | |
3344 | 765 : "%eax", "%edx", "%esi" |
766 ); | |
2519 | 767 |
3344 | 768 asm volatile( |
9413 | 769 YSCALEYUV2YV12X(4096, CHR_MMX_FILTER_OFFSET) |
770 :: "r" (&c->redDither), | |
771 "r" (vDest), "m" (chrDstW) | |
3344 | 772 : "%eax", "%edx", "%esi" |
773 ); | |
774 } | |
2521 | 775 |
3344 | 776 asm volatile( |
9413 | 777 YSCALEYUV2YV12X(0, LUM_MMX_FILTER_OFFSET) |
778 :: "r" (&c->redDither), | |
779 "r" (dest), "m" (dstW) | |
3344 | 780 : "%eax", "%edx", "%esi" |
781 ); | |
782 #else | |
6540 | 783 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, |
3352 | 784 chrFilter, chrSrc, chrFilterSize, |
6540 | 785 dest, uDest, vDest, dstW, chrDstW); |
3344 | 786 #endif |
787 } | |
788 | |
789 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
790 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) |
3344 | 791 { |
792 #ifdef HAVE_MMX | |
793 if(uDest != NULL) | |
794 { | |
795 asm volatile( | |
796 YSCALEYUV2YV121 | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
797 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW), |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
798 "g" (-chrDstW) |
3344 | 799 : "%eax" |
800 ); | |
801 | |
802 asm volatile( | |
803 YSCALEYUV2YV121 | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
804 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW), |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
805 "g" (-chrDstW) |
3344 | 806 : "%eax" |
807 ); | |
2519 | 808 } |
3344 | 809 |
810 asm volatile( | |
811 YSCALEYUV2YV121 | |
812 :: "r" (lumSrc + dstW), "r" (dest + dstW), | |
813 "g" (-dstW) | |
814 : "%eax" | |
815 ); | |
816 #else | |
817 int i; | |
818 for(i=0; i<dstW; i++) | |
819 { | |
820 int val= lumSrc[i]>>7; | |
6503 | 821 |
822 if(val&256){ | |
823 if(val<0) val=0; | |
824 else val=255; | |
825 } | |
3344 | 826 |
6503 | 827 dest[i]= val; |
3344 | 828 } |
829 | |
830 if(uDest != NULL) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
831 for(i=0; i<chrDstW; i++) |
3344 | 832 { |
833 int u=chrSrc[i]>>7; | |
834 int v=chrSrc[i + 2048]>>7; | |
835 | |
6503 | 836 if((u|v)&256){ |
837 if(u<0) u=0; | |
838 else if (u>255) u=255; | |
839 if(v<0) v=0; | |
840 else if (v>255) v=255; | |
841 } | |
842 | |
843 uDest[i]= u; | |
844 vDest[i]= v; | |
3344 | 845 } |
846 #endif | |
2519 | 847 } |
848 | |
3344 | 849 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
850 /** |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
851 * vertical scale YV12 to RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
852 */ |
7723 | 853 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
3344 | 854 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
9413 | 855 uint8_t *dest, int dstW, int dstY) |
3344 | 856 { |
9413 | 857 int dummy=0; |
6578 | 858 switch(c->dstFormat) |
3344 | 859 { |
860 #ifdef HAVE_MMX | |
6578 | 861 case IMGFMT_BGR32: |
3344 | 862 { |
863 asm volatile( | |
864 YSCALEYUV2RGBX | |
865 WRITEBGR32 | |
866 | |
9413 | 867 :: "r" (&c->redDither), |
868 "m" (dummy), "m" (dummy), "m" (dummy), | |
869 "r" (dest), "m" (dstW) | |
870 : "%eax", "%edx", "%esi" | |
3344 | 871 ); |
872 } | |
6578 | 873 break; |
874 case IMGFMT_BGR24: | |
3344 | 875 { |
876 asm volatile( | |
877 YSCALEYUV2RGBX | |
878 "leal (%%eax, %%eax, 2), %%ebx \n\t" //FIXME optimize | |
879 "addl %4, %%ebx \n\t" | |
880 WRITEBGR24 | |
881 | |
9413 | 882 :: "r" (&c->redDither), |
883 "m" (dummy), "m" (dummy), "m" (dummy), | |
884 "r" (dest), "m" (dstW) | |
885 : "%eax", "%ebx", "%edx", "%esi" //FIXME ebx | |
3344 | 886 ); |
887 } | |
6578 | 888 break; |
889 case IMGFMT_BGR15: | |
3344 | 890 { |
891 asm volatile( | |
892 YSCALEYUV2RGBX | |
893 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
894 #ifdef DITHER1XBPP | |
4248 | 895 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
896 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
897 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
3344 | 898 #endif |
899 | |
900 WRITEBGR15 | |
901 | |
9413 | 902 :: "r" (&c->redDither), |
903 "m" (dummy), "m" (dummy), "m" (dummy), | |
904 "r" (dest), "m" (dstW) | |
905 : "%eax", "%edx", "%esi" | |
3344 | 906 ); |
907 } | |
6578 | 908 break; |
909 case IMGFMT_BGR16: | |
3344 | 910 { |
911 asm volatile( | |
912 YSCALEYUV2RGBX | |
913 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
914 #ifdef DITHER1XBPP | |
4248 | 915 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
916 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
917 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
3344 | 918 #endif |
919 | |
920 WRITEBGR16 | |
921 | |
9413 | 922 :: "r" (&c->redDither), |
923 "m" (dummy), "m" (dummy), "m" (dummy), | |
924 "r" (dest), "m" (dstW) | |
925 : "%eax", "%edx", "%esi" | |
3344 | 926 ); |
927 } | |
6578 | 928 break; |
7723 | 929 case IMGFMT_YUY2: |
930 { | |
931 asm volatile( | |
932 YSCALEYUV2PACKEDX | |
933 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
934 | |
935 "psraw $3, %%mm3 \n\t" | |
936 "psraw $3, %%mm4 \n\t" | |
937 "psraw $3, %%mm1 \n\t" | |
938 "psraw $3, %%mm7 \n\t" | |
939 WRITEYUY2 | |
940 | |
9413 | 941 :: "r" (&c->redDither), |
942 "m" (dummy), "m" (dummy), "m" (dummy), | |
943 "r" (dest), "m" (dstW) | |
944 : "%eax", "%edx", "%esi" | |
7723 | 945 ); |
946 } | |
947 break; | |
3344 | 948 #endif |
6578 | 949 default: |
7723 | 950 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, |
6578 | 951 chrFilter, chrSrc, chrFilterSize, |
952 dest, dstW, dstY); | |
953 break; | |
954 } | |
3344 | 955 } |
956 | |
957 /** | |
958 * vertical bilinear scale YV12 to RGB | |
959 */ | |
7723 | 960 static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, |
6578 | 961 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
962 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
963 int yalpha1=yalpha^4095; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
964 int uvalpha1=uvalpha^4095; |
6578 | 965 int i; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
966 |
6578 | 967 #if 0 //isnt used |
4467 | 968 if(flags&SWS_FULL_CHR_H_INT) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
969 { |
6578 | 970 switch(dstFormat) |
971 { | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
972 #ifdef HAVE_MMX |
6578 | 973 case IMGFMT_BGR32: |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
974 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
975 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
976 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
977 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
978 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
979 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
980 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
981 "movq %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
982 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
983 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
984 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
985 MOVNTQ(%%mm3, (%4, %%eax, 4)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
986 MOVNTQ(%%mm1, 8(%4, %%eax, 4)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
987 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
988 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
989 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
990 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
991 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
992 |
3209 | 993 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
994 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
995 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
996 ); |
6578 | 997 break; |
998 case IMGFMT_BGR24: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
999 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1001 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1002 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1003 // lsb ... msb |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1004 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1005 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1006 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1007 "movq %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1008 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1009 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1010 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1011 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1012 "psrlq $8, %%mm3 \n\t" // GR0BGR00 |
4248 | 1013 "pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000 |
1014 "pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1015 "por %%mm2, %%mm3 \n\t" // BGRBGR00 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1016 "movq %%mm1, %%mm2 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1017 "psllq $48, %%mm1 \n\t" // 000000BG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1018 "por %%mm1, %%mm3 \n\t" // BGRBGRBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1019 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1020 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1021 "psrld $16, %%mm2 \n\t" // R000R000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1022 "psrlq $24, %%mm1 \n\t" // 0BGR0000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1023 "por %%mm2, %%mm1 \n\t" // RBGRR000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1024 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1025 "movl %4, %%ebx \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1026 "addl %%eax, %%ebx \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1027 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1028 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1029 //FIXME Alignment |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1030 "movntq %%mm3, (%%ebx, %%eax, 2)\n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1031 "movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1032 #else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1033 "movd %%mm3, (%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1034 "psrlq $32, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1035 "movd %%mm3, 4(%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1036 "movd %%mm1, 8(%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1037 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1038 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1039 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1040 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1041 |
3209 | 1042 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1043 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1044 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1045 ); |
6578 | 1046 break; |
1047 case IMGFMT_BGR15: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1048 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1049 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1050 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1051 #ifdef DITHER1XBPP |
4248 | 1052 "paddusb "MANGLE(g5Dither)", %%mm1\n\t" |
1053 "paddusb "MANGLE(r5Dither)", %%mm0\n\t" | |
1054 "paddusb "MANGLE(b5Dither)", %%mm3\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1055 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1056 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1057 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1058 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1059 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1060 "psrlw $3, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1061 "psllw $2, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1062 "psllw $7, %%mm0 \n\t" |
4248 | 1063 "pand "MANGLE(g15Mask)", %%mm1 \n\t" |
1064 "pand "MANGLE(r15Mask)", %%mm0 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1065 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1066 "por %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1067 "por %%mm1, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1068 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1069 MOVNTQ(%%mm0, (%4, %%eax, 2)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1070 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1071 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1072 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1073 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1074 |
3209 | 1075 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1076 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1077 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1078 ); |
6578 | 1079 break; |
1080 case IMGFMT_BGR16: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1081 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1082 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1083 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1084 #ifdef DITHER1XBPP |
4248 | 1085 "paddusb "MANGLE(g6Dither)", %%mm1\n\t" |
1086 "paddusb "MANGLE(r5Dither)", %%mm0\n\t" | |
1087 "paddusb "MANGLE(b5Dither)", %%mm3\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1088 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1089 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1090 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1091 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1092 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1093 "psrlw $3, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1094 "psllw $3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1095 "psllw $8, %%mm0 \n\t" |
4248 | 1096 "pand "MANGLE(g16Mask)", %%mm1 \n\t" |
1097 "pand "MANGLE(r16Mask)", %%mm0 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1098 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1099 "por %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1100 "por %%mm1, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1101 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1102 MOVNTQ(%%mm0, (%4, %%eax, 2)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1103 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1104 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1105 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1106 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1107 |
3209 | 1108 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1109 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1110 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1111 ); |
6578 | 1112 break; |
1113 #endif | |
1114 case IMGFMT_RGB32: | |
1115 #ifndef HAVE_MMX | |
1116 case IMGFMT_BGR32: | |
1117 #endif | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1118 if(dstFormat==IMGFMT_BGR32) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1119 { |
4794 | 1120 int i; |
4793 | 1121 #ifdef WORDS_BIGENDIAN |
1122 dest++; | |
1123 #endif | |
3209 | 1124 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1125 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1126 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1127 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1128 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
2503 | 1129 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; |
1130 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; | |
1131 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1132 dest+= 4; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1133 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1134 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1135 else if(dstFormat==IMGFMT_BGR24) |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1136 { |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1137 int i; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1138 for(i=0;i<dstW;i++){ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1139 // vertical linear interpolation && yuv2rgb in a single step: |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1140 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1141 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1142 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1143 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1144 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1145 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1146 dest+= 3; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1147 } |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1148 } |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1149 else if(dstFormat==IMGFMT_BGR16) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1150 { |
2671 | 1151 int i; |
3209 | 1152 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1153 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1154 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1155 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1156 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1157 |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1158 ((uint16_t*)dest)[i] = |
2584 | 1159 clip_table16b[(Y + yuvtab_40cf[U]) >>13] | |
1160 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | | |
1161 clip_table16r[(Y + yuvtab_3343[V]) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1162 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1163 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1164 else if(dstFormat==IMGFMT_BGR15) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1165 { |
2671 | 1166 int i; |
3209 | 1167 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1168 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1169 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1170 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1171 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1172 |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1173 ((uint16_t*)dest)[i] = |
2584 | 1174 clip_table15b[(Y + yuvtab_40cf[U]) >>13] | |
1175 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | | |
1176 clip_table15r[(Y + yuvtab_3343[V]) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1177 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1178 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1179 }//FULL_UV_IPOL |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1180 else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1181 { |
6578 | 1182 #endif // if 0 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1183 #ifdef HAVE_MMX |
6578 | 1184 switch(c->dstFormat) |
1185 { | |
1186 case IMGFMT_BGR32: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1187 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1188 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1189 WRITEBGR32 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1190 |
3209 | 1191 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1192 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1193 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1194 ); |
6578 | 1195 return; |
1196 case IMGFMT_BGR24: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1197 asm volatile( |
2728 | 1198 "movl %4, %%ebx \n\t" |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1199 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1200 WRITEBGR24 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1201 |
3209 | 1202 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1203 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1204 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1205 ); |
6578 | 1206 return; |
1207 case IMGFMT_BGR15: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1208 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1209 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1210 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1211 #ifdef DITHER1XBPP |
4248 | 1212 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1213 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1214 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1215 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1216 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1217 WRITEBGR15 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1218 |
3209 | 1219 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1220 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1221 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1222 ); |
6578 | 1223 return; |
1224 case IMGFMT_BGR16: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1225 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1226 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1227 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1228 #ifdef DITHER1XBPP |
4248 | 1229 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1230 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1231 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1232 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1233 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1234 WRITEBGR16 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1235 |
3209 | 1236 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1237 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1238 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1239 ); |
6578 | 1240 return; |
7723 | 1241 case IMGFMT_YUY2: |
1242 asm volatile( | |
1243 YSCALEYUV2PACKED | |
1244 WRITEYUY2 | |
1245 | |
1246 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | |
1247 "m" (yalpha1), "m" (uvalpha1) | |
1248 : "%eax" | |
1249 ); | |
1250 return; | |
6578 | 1251 default: break; |
1252 } | |
1253 #endif //HAVE_MMX | |
7723 | 1254 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1255 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1256 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1257 /** |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1258 * YV12 to RGB without scaling or interpolating |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1259 */ |
7723 | 1260 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, |
6578 | 1261 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1262 { |
8254
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
7723
diff
changeset
|
1263 #ifdef HAVE_MMX |
2671 | 1264 int uvalpha1=uvalpha^4095; |
8254
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
7723
diff
changeset
|
1265 #endif |
3344 | 1266 const int yalpha1=0; |
6578 | 1267 int i; |
1268 | |
1269 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 | |
1270 const int yalpha= 4096; //FIXME ... | |
2671 | 1271 |
4467 | 1272 if(flags&SWS_FULL_CHR_H_INT) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1273 { |
7723 | 1274 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y); |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1275 return; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1276 } |
2576 | 1277 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1278 #ifdef HAVE_MMX |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1279 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1280 { |
6578 | 1281 switch(dstFormat) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1282 { |
6578 | 1283 case IMGFMT_BGR32: |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1284 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1285 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1286 WRITEBGR32 |
3344 | 1287 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1288 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1289 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1290 ); |
6578 | 1291 return; |
1292 case IMGFMT_BGR24: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1293 asm volatile( |
2728 | 1294 "movl %4, %%ebx \n\t" |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1295 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1296 WRITEBGR24 |
3344 | 1297 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1298 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1299 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1300 ); |
6578 | 1301 return; |
1302 case IMGFMT_BGR15: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1303 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1304 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1305 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1306 #ifdef DITHER1XBPP |
4248 | 1307 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1308 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1309 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1310 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1311 WRITEBGR15 |
3344 | 1312 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1313 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1314 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1315 ); |
6578 | 1316 return; |
1317 case IMGFMT_BGR16: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1318 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1319 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1320 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1321 #ifdef DITHER1XBPP |
4248 | 1322 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1323 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1324 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1325 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1326 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1327 WRITEBGR16 |
3344 | 1328 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1329 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1330 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1331 ); |
6578 | 1332 return; |
7723 | 1333 case IMGFMT_YUY2: |
1334 asm volatile( | |
1335 YSCALEYUV2PACKED1 | |
1336 WRITEYUY2 | |
1337 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | |
1338 "m" (yalpha1), "m" (uvalpha1) | |
1339 : "%eax" | |
1340 ); | |
1341 return; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1342 } |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1343 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1344 else |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1345 { |
6578 | 1346 switch(dstFormat) |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1347 { |
6578 | 1348 case IMGFMT_BGR32: |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1349 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1350 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1351 WRITEBGR32 |
3344 | 1352 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1353 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1354 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1355 ); |
6578 | 1356 return; |
1357 case IMGFMT_BGR24: | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1358 asm volatile( |
2728 | 1359 "movl %4, %%ebx \n\t" |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1360 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1361 WRITEBGR24 |
3344 | 1362 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1363 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1364 : "%eax", "%ebx" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1365 ); |
6578 | 1366 return; |
1367 case IMGFMT_BGR15: | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1368 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1369 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1370 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1371 #ifdef DITHER1XBPP |
4248 | 1372 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1373 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1374 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1375 #endif |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1376 WRITEBGR15 |
3344 | 1377 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1378 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1379 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1380 ); |
6578 | 1381 return; |
1382 case IMGFMT_BGR16: | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1383 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1384 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1385 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1386 #ifdef DITHER1XBPP |
4248 | 1387 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1388 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1389 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1390 #endif |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1391 |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1392 WRITEBGR16 |
3344 | 1393 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1394 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1395 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1396 ); |
6578 | 1397 return; |
7723 | 1398 case IMGFMT_YUY2: |
1399 asm volatile( | |
1400 YSCALEYUV2PACKED1b | |
1401 WRITEYUY2 | |
1402 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | |
1403 "m" (yalpha1), "m" (uvalpha1) | |
1404 : "%eax" | |
1405 ); | |
1406 return; | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1407 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1408 } |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1409 #endif |
6578 | 1410 if( uvalpha < 2048 ) |
1411 { | |
7723 | 1412 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C) |
6578 | 1413 }else{ |
7723 | 1414 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C) |
6578 | 1415 } |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1416 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1417 |
4481 | 1418 //FIXME yuy2* can read upto 7 samples to much |
1419 | |
4467 | 1420 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width) |
1421 { | |
4481 | 1422 #ifdef HAVE_MMX |
1423 asm volatile( | |
1424 "movq "MANGLE(bm01010101)", %%mm2\n\t" | |
1425 "movl %0, %%eax \n\t" | |
1426 "1: \n\t" | |
1427 "movq (%1, %%eax,2), %%mm0 \n\t" | |
1428 "movq 8(%1, %%eax,2), %%mm1 \n\t" | |
1429 "pand %%mm2, %%mm0 \n\t" | |
1430 "pand %%mm2, %%mm1 \n\t" | |
1431 "packuswb %%mm1, %%mm0 \n\t" | |
1432 "movq %%mm0, (%2, %%eax) \n\t" | |
1433 "addl $8, %%eax \n\t" | |
1434 " js 1b \n\t" | |
1435 : : "g" (-width), "r" (src+width*2), "r" (dst+width) | |
1436 : "%eax" | |
1437 ); | |
4467 | 1438 #else |
1439 int i; | |
1440 for(i=0; i<width; i++) | |
1441 dst[i]= src[2*i]; | |
1442 #endif | |
1443 } | |
1444 | |
1445 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1446 { | |
4481 | 1447 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
1448 asm volatile( | |
1449 "movq "MANGLE(bm01010101)", %%mm4\n\t" | |
1450 "movl %0, %%eax \n\t" | |
1451 "1: \n\t" | |
1452 "movq (%1, %%eax,4), %%mm0 \n\t" | |
1453 "movq 8(%1, %%eax,4), %%mm1 \n\t" | |
1454 "movq (%2, %%eax,4), %%mm2 \n\t" | |
1455 "movq 8(%2, %%eax,4), %%mm3 \n\t" | |
1456 PAVGB(%%mm2, %%mm0) | |
1457 PAVGB(%%mm3, %%mm1) | |
1458 "psrlw $8, %%mm0 \n\t" | |
1459 "psrlw $8, %%mm1 \n\t" | |
1460 "packuswb %%mm1, %%mm0 \n\t" | |
1461 "movq %%mm0, %%mm1 \n\t" | |
1462 "psrlw $8, %%mm0 \n\t" | |
1463 "pand %%mm4, %%mm1 \n\t" | |
1464 "packuswb %%mm0, %%mm0 \n\t" | |
1465 "packuswb %%mm1, %%mm1 \n\t" | |
1466 "movd %%mm0, (%4, %%eax) \n\t" | |
1467 "movd %%mm1, (%3, %%eax) \n\t" | |
1468 "addl $4, %%eax \n\t" | |
1469 " js 1b \n\t" | |
1470 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) | |
1471 : "%eax" | |
1472 ); | |
4467 | 1473 #else |
1474 int i; | |
1475 for(i=0; i<width; i++) | |
1476 { | |
1477 dstU[i]= (src1[4*i + 1] + src2[4*i + 1])>>1; | |
1478 dstV[i]= (src1[4*i + 3] + src2[4*i + 3])>>1; | |
1479 } | |
1480 #endif | |
1481 } | |
1482 | |
9071 | 1483 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses |
1484 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, int width) | |
1485 { | |
1486 #ifdef HAVE_MMX | |
1487 asm volatile( | |
1488 "movl %0, %%eax \n\t" | |
1489 "1: \n\t" | |
1490 "movq (%1, %%eax,2), %%mm0 \n\t" | |
1491 "movq 8(%1, %%eax,2), %%mm1 \n\t" | |
1492 "psrlw $8, %%mm0 \n\t" | |
1493 "psrlw $8, %%mm1 \n\t" | |
1494 "packuswb %%mm1, %%mm0 \n\t" | |
1495 "movq %%mm0, (%2, %%eax) \n\t" | |
1496 "addl $8, %%eax \n\t" | |
1497 " js 1b \n\t" | |
1498 : : "g" (-width), "r" (src+width*2), "r" (dst+width) | |
1499 : "%eax" | |
1500 ); | |
1501 #else | |
1502 int i; | |
1503 for(i=0; i<width; i++) | |
1504 dst[i]= src[2*i+1]; | |
1505 #endif | |
1506 } | |
1507 | |
1508 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1509 { | |
1510 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1511 asm volatile( | |
1512 "movq "MANGLE(bm01010101)", %%mm4\n\t" | |
1513 "movl %0, %%eax \n\t" | |
1514 "1: \n\t" | |
1515 "movq (%1, %%eax,4), %%mm0 \n\t" | |
1516 "movq 8(%1, %%eax,4), %%mm1 \n\t" | |
1517 "movq (%2, %%eax,4), %%mm2 \n\t" | |
1518 "movq 8(%2, %%eax,4), %%mm3 \n\t" | |
1519 PAVGB(%%mm2, %%mm0) | |
1520 PAVGB(%%mm3, %%mm1) | |
1521 "pand %%mm4, %%mm0 \n\t" | |
1522 "pand %%mm4, %%mm1 \n\t" | |
1523 "packuswb %%mm1, %%mm0 \n\t" | |
1524 "movq %%mm0, %%mm1 \n\t" | |
1525 "psrlw $8, %%mm0 \n\t" | |
1526 "pand %%mm4, %%mm1 \n\t" | |
1527 "packuswb %%mm0, %%mm0 \n\t" | |
1528 "packuswb %%mm1, %%mm1 \n\t" | |
1529 "movd %%mm0, (%4, %%eax) \n\t" | |
1530 "movd %%mm1, (%3, %%eax) \n\t" | |
1531 "addl $4, %%eax \n\t" | |
1532 " js 1b \n\t" | |
1533 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) | |
1534 : "%eax" | |
1535 ); | |
1536 #else | |
1537 int i; | |
1538 for(i=0; i<width; i++) | |
1539 { | |
1540 dstU[i]= (src1[4*i + 0] + src2[4*i + 0])>>1; | |
1541 dstV[i]= (src1[4*i + 2] + src2[4*i + 2])>>1; | |
1542 } | |
1543 #endif | |
1544 } | |
1545 | |
4467 | 1546 static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width) |
1547 { | |
1548 #ifdef HAVE_MMXFIXME | |
1549 #else | |
1550 int i; | |
1551 for(i=0; i<width; i++) | |
1552 { | |
1553 int b= src[i*4+0]; | |
1554 int g= src[i*4+1]; | |
1555 int r= src[i*4+2]; | |
1556 | |
1557 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1558 } | |
1559 #endif | |
1560 } | |
1561 | |
1562 static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1563 { | |
1564 #ifdef HAVE_MMXFIXME | |
1565 #else | |
1566 int i; | |
1567 for(i=0; i<width; i++) | |
1568 { | |
1569 int b= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4]; | |
1570 int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5]; | |
1571 int r= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6]; | |
1572 | |
1573 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1574 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1575 } | |
1576 #endif | |
1577 } | |
1578 | |
1579 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, int width) | |
1580 { | |
4612 | 1581 #ifdef HAVE_MMX |
1582 asm volatile( | |
1583 "movl %2, %%eax \n\t" | |
4923 | 1584 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" |
1585 "movq "MANGLE(w1111)", %%mm5 \n\t" | |
4612 | 1586 "pxor %%mm7, %%mm7 \n\t" |
1587 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
1588 ".balign 16 \n\t" | |
1589 "1: \n\t" | |
1590 PREFETCH" 64(%0, %%ebx) \n\t" | |
1591 "movd (%0, %%ebx), %%mm0 \n\t" | |
1592 "movd 3(%0, %%ebx), %%mm1 \n\t" | |
1593 "punpcklbw %%mm7, %%mm0 \n\t" | |
1594 "punpcklbw %%mm7, %%mm1 \n\t" | |
1595 "movd 6(%0, %%ebx), %%mm2 \n\t" | |
1596 "movd 9(%0, %%ebx), %%mm3 \n\t" | |
1597 "punpcklbw %%mm7, %%mm2 \n\t" | |
1598 "punpcklbw %%mm7, %%mm3 \n\t" | |
1599 "pmaddwd %%mm6, %%mm0 \n\t" | |
1600 "pmaddwd %%mm6, %%mm1 \n\t" | |
1601 "pmaddwd %%mm6, %%mm2 \n\t" | |
1602 "pmaddwd %%mm6, %%mm3 \n\t" | |
1603 #ifndef FAST_BGR2YV12 | |
1604 "psrad $8, %%mm0 \n\t" | |
1605 "psrad $8, %%mm1 \n\t" | |
1606 "psrad $8, %%mm2 \n\t" | |
1607 "psrad $8, %%mm3 \n\t" | |
1608 #endif | |
1609 "packssdw %%mm1, %%mm0 \n\t" | |
1610 "packssdw %%mm3, %%mm2 \n\t" | |
1611 "pmaddwd %%mm5, %%mm0 \n\t" | |
1612 "pmaddwd %%mm5, %%mm2 \n\t" | |
1613 "packssdw %%mm2, %%mm0 \n\t" | |
1614 "psraw $7, %%mm0 \n\t" | |
1615 | |
1616 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
1617 "movd 15(%0, %%ebx), %%mm1 \n\t" | |
1618 "punpcklbw %%mm7, %%mm4 \n\t" | |
1619 "punpcklbw %%mm7, %%mm1 \n\t" | |
1620 "movd 18(%0, %%ebx), %%mm2 \n\t" | |
1621 "movd 21(%0, %%ebx), %%mm3 \n\t" | |
1622 "punpcklbw %%mm7, %%mm2 \n\t" | |
1623 "punpcklbw %%mm7, %%mm3 \n\t" | |
1624 "pmaddwd %%mm6, %%mm4 \n\t" | |
1625 "pmaddwd %%mm6, %%mm1 \n\t" | |
1626 "pmaddwd %%mm6, %%mm2 \n\t" | |
1627 "pmaddwd %%mm6, %%mm3 \n\t" | |
1628 #ifndef FAST_BGR2YV12 | |
1629 "psrad $8, %%mm4 \n\t" | |
1630 "psrad $8, %%mm1 \n\t" | |
1631 "psrad $8, %%mm2 \n\t" | |
1632 "psrad $8, %%mm3 \n\t" | |
1633 #endif | |
1634 "packssdw %%mm1, %%mm4 \n\t" | |
1635 "packssdw %%mm3, %%mm2 \n\t" | |
1636 "pmaddwd %%mm5, %%mm4 \n\t" | |
1637 "pmaddwd %%mm5, %%mm2 \n\t" | |
1638 "addl $24, %%ebx \n\t" | |
1639 "packssdw %%mm2, %%mm4 \n\t" | |
1640 "psraw $7, %%mm4 \n\t" | |
1641 | |
1642 "packuswb %%mm4, %%mm0 \n\t" | |
4923 | 1643 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" |
4612 | 1644 |
4619 | 1645 "movq %%mm0, (%1, %%eax) \n\t" |
4612 | 1646 "addl $8, %%eax \n\t" |
1647 " js 1b \n\t" | |
1648 : : "r" (src+width*3), "r" (dst+width), "g" (-width) | |
1649 : "%eax", "%ebx" | |
1650 ); | |
4467 | 1651 #else |
1652 int i; | |
1653 for(i=0; i<width; i++) | |
1654 { | |
1655 int b= src[i*3+0]; | |
1656 int g= src[i*3+1]; | |
1657 int r= src[i*3+2]; | |
1658 | |
1659 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1660 } | |
1661 #endif | |
1662 } | |
1663 | |
1664 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1665 { | |
4619 | 1666 #ifdef HAVE_MMX |
1667 asm volatile( | |
1668 "movl %4, %%eax \n\t" | |
4923 | 1669 "movq "MANGLE(w1111)", %%mm5 \n\t" |
1670 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | |
4619 | 1671 "pxor %%mm7, %%mm7 \n\t" |
1672 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
1673 "addl %%ebx, %%ebx \n\t" | |
1674 ".balign 16 \n\t" | |
1675 "1: \n\t" | |
1676 PREFETCH" 64(%0, %%ebx) \n\t" | |
1677 PREFETCH" 64(%1, %%ebx) \n\t" | |
1678 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1679 "movq (%0, %%ebx), %%mm0 \n\t" | |
1680 "movq (%1, %%ebx), %%mm1 \n\t" | |
1681 "movq 6(%0, %%ebx), %%mm2 \n\t" | |
1682 "movq 6(%1, %%ebx), %%mm3 \n\t" | |
1683 PAVGB(%%mm1, %%mm0) | |
1684 PAVGB(%%mm3, %%mm2) | |
1685 "movq %%mm0, %%mm1 \n\t" | |
1686 "movq %%mm2, %%mm3 \n\t" | |
1687 "psrlq $24, %%mm0 \n\t" | |
1688 "psrlq $24, %%mm2 \n\t" | |
1689 PAVGB(%%mm1, %%mm0) | |
1690 PAVGB(%%mm3, %%mm2) | |
1691 "punpcklbw %%mm7, %%mm0 \n\t" | |
1692 "punpcklbw %%mm7, %%mm2 \n\t" | |
1693 #else | |
1694 "movd (%0, %%ebx), %%mm0 \n\t" | |
1695 "movd (%1, %%ebx), %%mm1 \n\t" | |
1696 "movd 3(%0, %%ebx), %%mm2 \n\t" | |
1697 "movd 3(%1, %%ebx), %%mm3 \n\t" | |
1698 "punpcklbw %%mm7, %%mm0 \n\t" | |
1699 "punpcklbw %%mm7, %%mm1 \n\t" | |
1700 "punpcklbw %%mm7, %%mm2 \n\t" | |
1701 "punpcklbw %%mm7, %%mm3 \n\t" | |
1702 "paddw %%mm1, %%mm0 \n\t" | |
1703 "paddw %%mm3, %%mm2 \n\t" | |
1704 "paddw %%mm2, %%mm0 \n\t" | |
1705 "movd 6(%0, %%ebx), %%mm4 \n\t" | |
1706 "movd 6(%1, %%ebx), %%mm1 \n\t" | |
1707 "movd 9(%0, %%ebx), %%mm2 \n\t" | |
1708 "movd 9(%1, %%ebx), %%mm3 \n\t" | |
1709 "punpcklbw %%mm7, %%mm4 \n\t" | |
1710 "punpcklbw %%mm7, %%mm1 \n\t" | |
1711 "punpcklbw %%mm7, %%mm2 \n\t" | |
1712 "punpcklbw %%mm7, %%mm3 \n\t" | |
1713 "paddw %%mm1, %%mm4 \n\t" | |
1714 "paddw %%mm3, %%mm2 \n\t" | |
1715 "paddw %%mm4, %%mm2 \n\t" | |
1716 "psrlw $2, %%mm0 \n\t" | |
1717 "psrlw $2, %%mm2 \n\t" | |
1718 #endif | |
4923 | 1719 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
1720 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4619 | 1721 |
1722 "pmaddwd %%mm0, %%mm1 \n\t" | |
1723 "pmaddwd %%mm2, %%mm3 \n\t" | |
1724 "pmaddwd %%mm6, %%mm0 \n\t" | |
1725 "pmaddwd %%mm6, %%mm2 \n\t" | |
1726 #ifndef FAST_BGR2YV12 | |
1727 "psrad $8, %%mm0 \n\t" | |
1728 "psrad $8, %%mm1 \n\t" | |
1729 "psrad $8, %%mm2 \n\t" | |
1730 "psrad $8, %%mm3 \n\t" | |
1731 #endif | |
1732 "packssdw %%mm2, %%mm0 \n\t" | |
1733 "packssdw %%mm3, %%mm1 \n\t" | |
1734 "pmaddwd %%mm5, %%mm0 \n\t" | |
1735 "pmaddwd %%mm5, %%mm1 \n\t" | |
1736 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 | |
1737 "psraw $7, %%mm0 \n\t" | |
1738 | |
1739 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1740 "movq 12(%0, %%ebx), %%mm4 \n\t" | |
1741 "movq 12(%1, %%ebx), %%mm1 \n\t" | |
1742 "movq 18(%0, %%ebx), %%mm2 \n\t" | |
1743 "movq 18(%1, %%ebx), %%mm3 \n\t" | |
1744 PAVGB(%%mm1, %%mm4) | |
1745 PAVGB(%%mm3, %%mm2) | |
1746 "movq %%mm4, %%mm1 \n\t" | |
1747 "movq %%mm2, %%mm3 \n\t" | |
1748 "psrlq $24, %%mm4 \n\t" | |
1749 "psrlq $24, %%mm2 \n\t" | |
1750 PAVGB(%%mm1, %%mm4) | |
1751 PAVGB(%%mm3, %%mm2) | |
1752 "punpcklbw %%mm7, %%mm4 \n\t" | |
1753 "punpcklbw %%mm7, %%mm2 \n\t" | |
1754 #else | |
1755 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
1756 "movd 12(%1, %%ebx), %%mm1 \n\t" | |
1757 "movd 15(%0, %%ebx), %%mm2 \n\t" | |
1758 "movd 15(%1, %%ebx), %%mm3 \n\t" | |
1759 "punpcklbw %%mm7, %%mm4 \n\t" | |
1760 "punpcklbw %%mm7, %%mm1 \n\t" | |
1761 "punpcklbw %%mm7, %%mm2 \n\t" | |
1762 "punpcklbw %%mm7, %%mm3 \n\t" | |
1763 "paddw %%mm1, %%mm4 \n\t" | |
1764 "paddw %%mm3, %%mm2 \n\t" | |
1765 "paddw %%mm2, %%mm4 \n\t" | |
1766 "movd 18(%0, %%ebx), %%mm5 \n\t" | |
1767 "movd 18(%1, %%ebx), %%mm1 \n\t" | |
1768 "movd 21(%0, %%ebx), %%mm2 \n\t" | |
1769 "movd 21(%1, %%ebx), %%mm3 \n\t" | |
1770 "punpcklbw %%mm7, %%mm5 \n\t" | |
1771 "punpcklbw %%mm7, %%mm1 \n\t" | |
1772 "punpcklbw %%mm7, %%mm2 \n\t" | |
1773 "punpcklbw %%mm7, %%mm3 \n\t" | |
1774 "paddw %%mm1, %%mm5 \n\t" | |
1775 "paddw %%mm3, %%mm2 \n\t" | |
1776 "paddw %%mm5, %%mm2 \n\t" | |
4923 | 1777 "movq "MANGLE(w1111)", %%mm5 \n\t" |
4619 | 1778 "psrlw $2, %%mm4 \n\t" |
1779 "psrlw $2, %%mm2 \n\t" | |
1780 #endif | |
4923 | 1781 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
1782 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4619 | 1783 |
1784 "pmaddwd %%mm4, %%mm1 \n\t" | |
1785 "pmaddwd %%mm2, %%mm3 \n\t" | |
1786 "pmaddwd %%mm6, %%mm4 \n\t" | |
1787 "pmaddwd %%mm6, %%mm2 \n\t" | |
1788 #ifndef FAST_BGR2YV12 | |
1789 "psrad $8, %%mm4 \n\t" | |
1790 "psrad $8, %%mm1 \n\t" | |
1791 "psrad $8, %%mm2 \n\t" | |
1792 "psrad $8, %%mm3 \n\t" | |
1793 #endif | |
1794 "packssdw %%mm2, %%mm4 \n\t" | |
1795 "packssdw %%mm3, %%mm1 \n\t" | |
1796 "pmaddwd %%mm5, %%mm4 \n\t" | |
1797 "pmaddwd %%mm5, %%mm1 \n\t" | |
1798 "addl $24, %%ebx \n\t" | |
1799 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 | |
1800 "psraw $7, %%mm4 \n\t" | |
1801 | |
1802 "movq %%mm0, %%mm1 \n\t" | |
1803 "punpckldq %%mm4, %%mm0 \n\t" | |
1804 "punpckhdq %%mm4, %%mm1 \n\t" | |
1805 "packsswb %%mm1, %%mm0 \n\t" | |
4923 | 1806 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" |
4619 | 1807 |
1808 "movd %%mm0, (%2, %%eax) \n\t" | |
1809 "punpckhdq %%mm0, %%mm0 \n\t" | |
1810 "movd %%mm0, (%3, %%eax) \n\t" | |
1811 "addl $4, %%eax \n\t" | |
1812 " js 1b \n\t" | |
1813 : : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width) | |
1814 : "%eax", "%ebx" | |
1815 ); | |
4467 | 1816 #else |
1817 int i; | |
1818 for(i=0; i<width; i++) | |
1819 { | |
1820 int b= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3]; | |
1821 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4]; | |
1822 int r= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5]; | |
1823 | |
1824 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1825 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1826 } | |
1827 #endif | |
1828 } | |
1829 | |
4578 | 1830 static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width) |
1831 { | |
1832 int i; | |
1833 for(i=0; i<width; i++) | |
1834 { | |
1835 int d= src[i*2] + (src[i*2+1]<<8); | |
1836 int b= d&0x1F; | |
1837 int g= (d>>5)&0x3F; | |
1838 int r= (d>>11)&0x1F; | |
1839 | |
1840 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16; | |
1841 } | |
1842 } | |
1843 | |
1844 static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1845 { | |
1846 int i; | |
1847 for(i=0; i<width; i++) | |
1848 { | |
4579 | 1849 #if 1 |
1850 int d0= le2me_32( ((uint32_t*)src1)[i] ); | |
1851 int d1= le2me_32( ((uint32_t*)src2)[i] ); | |
1852 | |
1853 int dl= (d0&0x07E0F81F) + (d1&0x07E0F81F); | |
1854 int dh= ((d0>>5)&0x07C0F83F) + ((d1>>5)&0x07C0F83F); | |
1855 | |
1856 int dh2= (dh>>11) + (dh<<21); | |
1857 int d= dh2 + dl; | |
1858 | |
1859 int b= d&0x7F; | |
1860 int r= (d>>11)&0x7F; | |
1861 int g= d>>21; | |
1862 #else | |
4578 | 1863 int d0= src1[i*4] + (src1[i*4+1]<<8); |
1864 int b0= d0&0x1F; | |
1865 int g0= (d0>>5)&0x3F; | |
1866 int r0= (d0>>11)&0x1F; | |
1867 | |
1868 int d1= src1[i*4+2] + (src1[i*4+3]<<8); | |
1869 int b1= d1&0x1F; | |
1870 int g1= (d1>>5)&0x3F; | |
1871 int r1= (d1>>11)&0x1F; | |
1872 | |
1873 int d2= src2[i*4] + (src2[i*4+1]<<8); | |
1874 int b2= d2&0x1F; | |
1875 int g2= (d2>>5)&0x3F; | |
1876 int r2= (d2>>11)&0x1F; | |
1877 | |
1878 int d3= src2[i*4+2] + (src2[i*4+3]<<8); | |
1879 int b3= d3&0x1F; | |
1880 int g3= (d3>>5)&0x3F; | |
1881 int r3= (d3>>11)&0x1F; | |
1882 | |
1883 int b= b0 + b1 + b2 + b3; | |
1884 int g= g0 + g1 + g2 + g3; | |
1885 int r= r0 + r1 + r2 + r3; | |
4579 | 1886 #endif |
4578 | 1887 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+2-2)) + 128; |
1888 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+2-2)) + 128; | |
1889 } | |
1890 } | |
1891 | |
4580 | 1892 static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width) |
1893 { | |
1894 int i; | |
1895 for(i=0; i<width; i++) | |
1896 { | |
1897 int d= src[i*2] + (src[i*2+1]<<8); | |
1898 int b= d&0x1F; | |
1899 int g= (d>>5)&0x1F; | |
1900 int r= (d>>10)&0x1F; | |
1901 | |
1902 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16; | |
1903 } | |
1904 } | |
1905 | |
1906 static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1907 { | |
1908 int i; | |
1909 for(i=0; i<width; i++) | |
1910 { | |
1911 #if 1 | |
1912 int d0= le2me_32( ((uint32_t*)src1)[i] ); | |
1913 int d1= le2me_32( ((uint32_t*)src2)[i] ); | |
1914 | |
1915 int dl= (d0&0x03E07C1F) + (d1&0x03E07C1F); | |
1916 int dh= ((d0>>5)&0x03E0F81F) + ((d1>>5)&0x03E0F81F); | |
1917 | |
1918 int dh2= (dh>>11) + (dh<<21); | |
1919 int d= dh2 + dl; | |
1920 | |
1921 int b= d&0x7F; | |
1922 int r= (d>>10)&0x7F; | |
1923 int g= d>>21; | |
1924 #else | |
1925 int d0= src1[i*4] + (src1[i*4+1]<<8); | |
1926 int b0= d0&0x1F; | |
1927 int g0= (d0>>5)&0x1F; | |
1928 int r0= (d0>>10)&0x1F; | |
1929 | |
1930 int d1= src1[i*4+2] + (src1[i*4+3]<<8); | |
1931 int b1= d1&0x1F; | |
1932 int g1= (d1>>5)&0x1F; | |
1933 int r1= (d1>>10)&0x1F; | |
1934 | |
1935 int d2= src2[i*4] + (src2[i*4+1]<<8); | |
1936 int b2= d2&0x1F; | |
1937 int g2= (d2>>5)&0x1F; | |
1938 int r2= (d2>>10)&0x1F; | |
1939 | |
1940 int d3= src2[i*4+2] + (src2[i*4+3]<<8); | |
1941 int b3= d3&0x1F; | |
1942 int g3= (d3>>5)&0x1F; | |
1943 int r3= (d3>>10)&0x1F; | |
1944 | |
1945 int b= b0 + b1 + b2 + b3; | |
1946 int g= g0 + g1 + g2 + g3; | |
1947 int r= r0 + r1 + r2 + r3; | |
1948 #endif | |
1949 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2-3)) + 128; | |
1950 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2-3)) + 128; | |
1951 } | |
1952 } | |
1953 | |
1954 | |
4558 | 1955 static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width) |
1956 { | |
1957 int i; | |
1958 for(i=0; i<width; i++) | |
1959 { | |
1960 int r= src[i*4+0]; | |
1961 int g= src[i*4+1]; | |
1962 int b= src[i*4+2]; | |
1963 | |
1964 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1965 } | |
1966 } | |
1967 | |
1968 static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1969 { | |
1970 int i; | |
1971 for(i=0; i<width; i++) | |
1972 { | |
1973 int r= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4]; | |
1974 int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5]; | |
1975 int b= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6]; | |
1976 | |
1977 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1978 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1979 } | |
1980 } | |
1981 | |
1982 static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width) | |
1983 { | |
1984 int i; | |
1985 for(i=0; i<width; i++) | |
1986 { | |
1987 int r= src[i*3+0]; | |
1988 int g= src[i*3+1]; | |
1989 int b= src[i*3+2]; | |
1990 | |
1991 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1992 } | |
1993 } | |
1994 | |
1995 static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1996 { | |
1997 int i; | |
1998 for(i=0; i<width; i++) | |
1999 { | |
2000 int r= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3]; | |
2001 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4]; | |
2002 int b= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5]; | |
2003 | |
2004 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2005 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2006 } | |
2007 } | |
2008 | |
4467 | 2009 |
3272 | 2010 // Bilinear / Bicubic scaling |
2011 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, | |
2012 int16_t *filter, int16_t *filterPos, int filterSize) | |
2013 { | |
2014 #ifdef HAVE_MMX | |
2015 if(filterSize==4) // allways true for upscaling, sometimes for down too | |
2016 { | |
2017 int counter= -2*dstW; | |
2018 filter-= counter*2; | |
2019 filterPos-= counter/2; | |
2020 dst-= counter/2; | |
2021 asm volatile( | |
2022 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2023 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2024 "pushl %%ebp \n\t" // we use 7 regs here ... |
2025 "movl %%eax, %%ebp \n\t" | |
2026 ".balign 16 \n\t" | |
2027 "1: \n\t" | |
2028 "movzwl (%2, %%ebp), %%eax \n\t" | |
2029 "movzwl 2(%2, %%ebp), %%ebx \n\t" | |
2030 "movq (%1, %%ebp, 4), %%mm1 \n\t" | |
2031 "movq 8(%1, %%ebp, 4), %%mm3 \n\t" | |
2032 "movd (%3, %%eax), %%mm0 \n\t" | |
2033 "movd (%3, %%ebx), %%mm2 \n\t" | |
2034 "punpcklbw %%mm7, %%mm0 \n\t" | |
2035 "punpcklbw %%mm7, %%mm2 \n\t" | |
2036 "pmaddwd %%mm1, %%mm0 \n\t" | |
2037 "pmaddwd %%mm2, %%mm3 \n\t" | |
2038 "psrad $8, %%mm0 \n\t" | |
2039 "psrad $8, %%mm3 \n\t" | |
2040 "packssdw %%mm3, %%mm0 \n\t" | |
2041 "pmaddwd %%mm6, %%mm0 \n\t" | |
2042 "packssdw %%mm0, %%mm0 \n\t" | |
2043 "movd %%mm0, (%4, %%ebp) \n\t" | |
2044 "addl $4, %%ebp \n\t" | |
2045 " jnc 1b \n\t" | |
3352 | 2046 |
3272 | 2047 "popl %%ebp \n\t" |
2048 : "+a" (counter) | |
2049 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) | |
2050 : "%ebx" | |
2051 ); | |
2052 } | |
2053 else if(filterSize==8) | |
2054 { | |
2055 int counter= -2*dstW; | |
2056 filter-= counter*4; | |
2057 filterPos-= counter/2; | |
2058 dst-= counter/2; | |
2059 asm volatile( | |
2060 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2061 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2062 "pushl %%ebp \n\t" // we use 7 regs here ... |
2063 "movl %%eax, %%ebp \n\t" | |
2064 ".balign 16 \n\t" | |
2065 "1: \n\t" | |
2066 "movzwl (%2, %%ebp), %%eax \n\t" | |
2067 "movzwl 2(%2, %%ebp), %%ebx \n\t" | |
2068 "movq (%1, %%ebp, 8), %%mm1 \n\t" | |
2069 "movq 16(%1, %%ebp, 8), %%mm3 \n\t" | |
2070 "movd (%3, %%eax), %%mm0 \n\t" | |
2071 "movd (%3, %%ebx), %%mm2 \n\t" | |
2072 "punpcklbw %%mm7, %%mm0 \n\t" | |
2073 "punpcklbw %%mm7, %%mm2 \n\t" | |
2074 "pmaddwd %%mm1, %%mm0 \n\t" | |
2075 "pmaddwd %%mm2, %%mm3 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
2076 |
3272 | 2077 "movq 8(%1, %%ebp, 8), %%mm1 \n\t" |
2078 "movq 24(%1, %%ebp, 8), %%mm5 \n\t" | |
2079 "movd 4(%3, %%eax), %%mm4 \n\t" | |
2080 "movd 4(%3, %%ebx), %%mm2 \n\t" | |
2081 "punpcklbw %%mm7, %%mm4 \n\t" | |
2082 "punpcklbw %%mm7, %%mm2 \n\t" | |
2083 "pmaddwd %%mm1, %%mm4 \n\t" | |
2084 "pmaddwd %%mm2, %%mm5 \n\t" | |
2085 "paddd %%mm4, %%mm0 \n\t" | |
2086 "paddd %%mm5, %%mm3 \n\t" | |
2087 | |
2088 "psrad $8, %%mm0 \n\t" | |
2089 "psrad $8, %%mm3 \n\t" | |
2090 "packssdw %%mm3, %%mm0 \n\t" | |
2091 "pmaddwd %%mm6, %%mm0 \n\t" | |
2092 "packssdw %%mm0, %%mm0 \n\t" | |
2093 "movd %%mm0, (%4, %%ebp) \n\t" | |
2094 "addl $4, %%ebp \n\t" | |
2095 " jnc 1b \n\t" | |
3344 | 2096 |
3272 | 2097 "popl %%ebp \n\t" |
2098 : "+a" (counter) | |
2099 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) | |
2100 : "%ebx" | |
2101 ); | |
2102 } | |
2103 else | |
2104 { | |
2105 int counter= -2*dstW; | |
2106 // filter-= counter*filterSize/2; | |
2107 filterPos-= counter/2; | |
2108 dst-= counter/2; | |
2109 asm volatile( | |
2110 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2111 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2112 ".balign 16 \n\t" |
2113 "1: \n\t" | |
2114 "movl %2, %%ecx \n\t" | |
2115 "movzwl (%%ecx, %0), %%eax \n\t" | |
2116 "movzwl 2(%%ecx, %0), %%ebx \n\t" | |
2117 "movl %5, %%ecx \n\t" | |
2118 "pxor %%mm4, %%mm4 \n\t" | |
2119 "pxor %%mm5, %%mm5 \n\t" | |
2120 "2: \n\t" | |
2121 "movq (%1), %%mm1 \n\t" | |
2122 "movq (%1, %6), %%mm3 \n\t" | |
2123 "movd (%%ecx, %%eax), %%mm0 \n\t" | |
2124 "movd (%%ecx, %%ebx), %%mm2 \n\t" | |
2125 "punpcklbw %%mm7, %%mm0 \n\t" | |
2126 "punpcklbw %%mm7, %%mm2 \n\t" | |
2127 "pmaddwd %%mm1, %%mm0 \n\t" | |
2128 "pmaddwd %%mm2, %%mm3 \n\t" | |
2129 "paddd %%mm3, %%mm5 \n\t" | |
2130 "paddd %%mm0, %%mm4 \n\t" | |
2131 "addl $8, %1 \n\t" | |
2132 "addl $4, %%ecx \n\t" | |
2133 "cmpl %4, %%ecx \n\t" | |
2134 " jb 2b \n\t" | |
2135 "addl %6, %1 \n\t" | |
2136 "psrad $8, %%mm4 \n\t" | |
2137 "psrad $8, %%mm5 \n\t" | |
2138 "packssdw %%mm5, %%mm4 \n\t" | |
2139 "pmaddwd %%mm6, %%mm4 \n\t" | |
2140 "packssdw %%mm4, %%mm4 \n\t" | |
2141 "movl %3, %%eax \n\t" | |
2142 "movd %%mm4, (%%eax, %0) \n\t" | |
2143 "addl $4, %0 \n\t" | |
2144 " jnc 1b \n\t" | |
3344 | 2145 |
3641 | 2146 : "+r" (counter), "+r" (filter) |
2147 : "m" (filterPos), "m" (dst), "m"(src+filterSize), | |
3272 | 2148 "m" (src), "r" (filterSize*2) |
3299 | 2149 : "%ebx", "%eax", "%ecx" |
3272 | 2150 ); |
2151 } | |
2152 #else | |
2153 int i; | |
2154 for(i=0; i<dstW; i++) | |
2155 { | |
2156 int j; | |
2157 int srcPos= filterPos[i]; | |
2158 int val=0; | |
3344 | 2159 // printf("filterPos: %d\n", filterPos[i]); |
3272 | 2160 for(j=0; j<filterSize; j++) |
2161 { | |
2162 // printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]); | |
2163 val += ((int)src[srcPos + j])*filter[filterSize*i + j]; | |
2164 } | |
2165 // filter += hFilterSize; | |
2166 dst[i] = MIN(MAX(0, val>>7), (1<<15)-1); // the cubic equation does overflow ... | |
2167 // dst[i] = val>>7; | |
2168 } | |
2169 #endif | |
2170 } | |
2171 // *** horizontal scale Y line to temp buffer | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2172 static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2173 int flags, int canMMX2BeUsed, int16_t *hLumFilter, |
4467 | 2174 int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode, |
5452 | 2175 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, |
2176 int32_t *mmx2FilterPos) | |
2469 | 2177 { |
4467 | 2178 if(srcFormat==IMGFMT_YUY2) |
2179 { | |
2180 RENAME(yuy2ToY)(formatConvBuffer, src, srcW); | |
2181 src= formatConvBuffer; | |
2182 } | |
9071 | 2183 else if(srcFormat==IMGFMT_UYVY) |
2184 { | |
2185 RENAME(uyvyToY)(formatConvBuffer, src, srcW); | |
2186 src= formatConvBuffer; | |
2187 } | |
4467 | 2188 else if(srcFormat==IMGFMT_BGR32) |
2189 { | |
2190 RENAME(bgr32ToY)(formatConvBuffer, src, srcW); | |
2191 src= formatConvBuffer; | |
2192 } | |
2193 else if(srcFormat==IMGFMT_BGR24) | |
2194 { | |
2195 RENAME(bgr24ToY)(formatConvBuffer, src, srcW); | |
2196 src= formatConvBuffer; | |
2197 } | |
4578 | 2198 else if(srcFormat==IMGFMT_BGR16) |
2199 { | |
2200 RENAME(bgr16ToY)(formatConvBuffer, src, srcW); | |
2201 src= formatConvBuffer; | |
2202 } | |
4580 | 2203 else if(srcFormat==IMGFMT_BGR15) |
2204 { | |
2205 RENAME(bgr15ToY)(formatConvBuffer, src, srcW); | |
2206 src= formatConvBuffer; | |
2207 } | |
4558 | 2208 else if(srcFormat==IMGFMT_RGB32) |
2209 { | |
2210 RENAME(rgb32ToY)(formatConvBuffer, src, srcW); | |
2211 src= formatConvBuffer; | |
2212 } | |
2213 else if(srcFormat==IMGFMT_RGB24) | |
2214 { | |
2215 RENAME(rgb24ToY)(formatConvBuffer, src, srcW); | |
2216 src= formatConvBuffer; | |
2217 } | |
4467 | 2218 |
3352 | 2219 #ifdef HAVE_MMX |
2220 // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2221 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) |
3352 | 2222 #else |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2223 if(!(flags&SWS_FAST_BILINEAR)) |
3352 | 2224 #endif |
3272 | 2225 { |
2226 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); | |
2227 } | |
2228 else // Fast Bilinear upscale / crap downscale | |
2229 { | |
2469 | 2230 #ifdef ARCH_X86 |
2231 #ifdef HAVE_MMX2 | |
2671 | 2232 int i; |
2469 | 2233 if(canMMX2BeUsed) |
2234 { | |
2235 asm volatile( | |
2236 "pxor %%mm7, %%mm7 \n\t" | |
5452 | 2237 "movl %0, %%ecx \n\t" |
2238 "movl %1, %%edi \n\t" | |
2239 "movl %2, %%edx \n\t" | |
2240 "movl %3, %%ebx \n\t" | |
2469 | 2241 "xorl %%eax, %%eax \n\t" // i |
5452 | 2242 PREFETCH" (%%ecx) \n\t" |
2243 PREFETCH" 32(%%ecx) \n\t" | |
2244 PREFETCH" 64(%%ecx) \n\t" | |
2520 | 2245 |
2469 | 2246 #define FUNNY_Y_CODE \ |
5452 | 2247 "movl (%%ebx), %%esi \n\t"\ |
2248 "call *%4 \n\t"\ | |
2249 "addl (%%ebx, %%eax), %%ecx \n\t"\ | |
2250 "addl %%eax, %%edi \n\t"\ | |
2251 "xorl %%eax, %%eax \n\t"\ | |
2520 | 2252 |
2469 | 2253 FUNNY_Y_CODE |
2254 FUNNY_Y_CODE | |
2255 FUNNY_Y_CODE | |
2256 FUNNY_Y_CODE | |
2257 FUNNY_Y_CODE | |
2258 FUNNY_Y_CODE | |
2259 FUNNY_Y_CODE | |
2260 FUNNY_Y_CODE | |
2261 | |
5452 | 2262 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos), |
2263 "m" (funnyYCode) | |
2469 | 2264 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" |
2265 ); | |
3215 | 2266 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128; |
2469 | 2267 } |
2268 else | |
2269 { | |
2270 #endif | |
2271 //NO MMX just normal asm ... | |
2272 asm volatile( | |
2273 "xorl %%eax, %%eax \n\t" // i | |
2274 "xorl %%ebx, %%ebx \n\t" // xx | |
2275 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
2276 ".balign 16 \n\t" |
2469 | 2277 "1: \n\t" |
2278 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] | |
2279 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] | |
2280 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2281 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2282 "shll $16, %%edi \n\t" | |
2283 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2284 "movl %1, %%edi \n\t" | |
2285 "shrl $9, %%esi \n\t" | |
2286 "movw %%si, (%%edi, %%eax, 2) \n\t" | |
2287 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2288 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2289 | |
2290 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] | |
2291 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] | |
2292 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2293 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2294 "shll $16, %%edi \n\t" | |
2295 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2296 "movl %1, %%edi \n\t" | |
2297 "shrl $9, %%esi \n\t" | |
2298 "movw %%si, 2(%%edi, %%eax, 2) \n\t" | |
2299 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2300 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2301 | |
2302 | |
2303 "addl $2, %%eax \n\t" | |
2304 "cmpl %2, %%eax \n\t" | |
2305 " jb 1b \n\t" | |
2306 | |
2307 | |
2308 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF) | |
2309 : "%eax", "%ebx", "%ecx", "%edi", "%esi" | |
2310 ); | |
2311 #ifdef HAVE_MMX2 | |
2312 } //if MMX2 cant be used | |
2313 #endif | |
2314 #else | |
2671 | 2315 int i; |
2316 unsigned int xpos=0; | |
2317 for(i=0;i<dstWidth;i++) | |
2318 { | |
2319 register unsigned int xx=xpos>>16; | |
2320 register unsigned int xalpha=(xpos&0xFFFF)>>9; | |
2321 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; | |
2322 xpos+=xInc; | |
2323 } | |
2469 | 2324 #endif |
3272 | 2325 } |
2469 | 2326 } |
2327 | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2328 inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2329 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, |
4467 | 2330 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode, |
5452 | 2331 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, |
2332 int32_t *mmx2FilterPos) | |
2469 | 2333 { |
4467 | 2334 if(srcFormat==IMGFMT_YUY2) |
2335 { | |
2336 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2337 src1= formatConvBuffer; | |
2338 src2= formatConvBuffer+2048; | |
2339 } | |
9071 | 2340 else if(srcFormat==IMGFMT_UYVY) |
2341 { | |
2342 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2343 src1= formatConvBuffer; | |
2344 src2= formatConvBuffer+2048; | |
2345 } | |
4467 | 2346 else if(srcFormat==IMGFMT_BGR32) |
2347 { | |
2348 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2349 src1= formatConvBuffer; | |
2350 src2= formatConvBuffer+2048; | |
2351 } | |
2352 else if(srcFormat==IMGFMT_BGR24) | |
2353 { | |
2354 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2355 src1= formatConvBuffer; | |
2356 src2= formatConvBuffer+2048; | |
2357 } | |
4578 | 2358 else if(srcFormat==IMGFMT_BGR16) |
2359 { | |
2360 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2361 src1= formatConvBuffer; | |
2362 src2= formatConvBuffer+2048; | |
2363 } | |
4580 | 2364 else if(srcFormat==IMGFMT_BGR15) |
2365 { | |
2366 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2367 src1= formatConvBuffer; | |
2368 src2= formatConvBuffer+2048; | |
2369 } | |
4558 | 2370 else if(srcFormat==IMGFMT_RGB32) |
2371 { | |
2372 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2373 src1= formatConvBuffer; | |
2374 src2= formatConvBuffer+2048; | |
2375 } | |
2376 else if(srcFormat==IMGFMT_RGB24) | |
2377 { | |
2378 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2379 src1= formatConvBuffer; | |
2380 src2= formatConvBuffer+2048; | |
2381 } | |
4481 | 2382 else if(isGray(srcFormat)) |
2383 { | |
2384 return; | |
2385 } | |
4467 | 2386 |
3352 | 2387 #ifdef HAVE_MMX |
2388 // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2389 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) |
3352 | 2390 #else |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2391 if(!(flags&SWS_FAST_BILINEAR)) |
3352 | 2392 #endif |
3272 | 2393 { |
2394 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |
2395 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |
2396 } | |
2397 else // Fast Bilinear upscale / crap downscale | |
2398 { | |
2469 | 2399 #ifdef ARCH_X86 |
2400 #ifdef HAVE_MMX2 | |
2671 | 2401 int i; |
2469 | 2402 if(canMMX2BeUsed) |
2403 { | |
2404 asm volatile( | |
5452 | 2405 "pxor %%mm7, %%mm7 \n\t" |
2406 "movl %0, %%ecx \n\t" | |
2407 "movl %1, %%edi \n\t" | |
2408 "movl %2, %%edx \n\t" | |
2409 "movl %3, %%ebx \n\t" | |
2410 "xorl %%eax, %%eax \n\t" // i | |
2411 PREFETCH" (%%ecx) \n\t" | |
2412 PREFETCH" 32(%%ecx) \n\t" | |
2413 PREFETCH" 64(%%ecx) \n\t" | |
2414 | |
2415 #define FUNNY_UV_CODE \ | |
2416 "movl (%%ebx), %%esi \n\t"\ | |
2417 "call *%4 \n\t"\ | |
2418 "addl (%%ebx, %%eax), %%ecx \n\t"\ | |
2419 "addl %%eax, %%edi \n\t"\ | |
2420 "xorl %%eax, %%eax \n\t"\ | |
2469 | 2421 |
5452 | 2422 FUNNY_UV_CODE |
2423 FUNNY_UV_CODE | |
2424 FUNNY_UV_CODE | |
2425 FUNNY_UV_CODE | |
2426 "xorl %%eax, %%eax \n\t" // i | |
2427 "movl %5, %%ecx \n\t" // src | |
2428 "movl %1, %%edi \n\t" // buf1 | |
2429 "addl $4096, %%edi \n\t" | |
2430 PREFETCH" (%%ecx) \n\t" | |
2431 PREFETCH" 32(%%ecx) \n\t" | |
2432 PREFETCH" 64(%%ecx) \n\t" | |
2469 | 2433 |
5452 | 2434 FUNNY_UV_CODE |
2435 FUNNY_UV_CODE | |
2436 FUNNY_UV_CODE | |
2437 FUNNY_UV_CODE | |
2469 | 2438 |
5452 | 2439 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos), |
2440 "m" (funnyUVCode), "m" (src2) | |
2441 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" | |
2442 ); | |
3344 | 2443 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) |
2469 | 2444 { |
3344 | 2445 // printf("%d %d %d\n", dstWidth, i, srcW); |
2446 dst[i] = src1[srcW-1]*128; | |
2447 dst[i+2048] = src2[srcW-1]*128; | |
2469 | 2448 } |
2449 } | |
2450 else | |
2451 { | |
2452 #endif | |
2453 asm volatile( | |
2454 "xorl %%eax, %%eax \n\t" // i | |
2455 "xorl %%ebx, %%ebx \n\t" // xx | |
2456 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
2457 ".balign 16 \n\t" |
2469 | 2458 "1: \n\t" |
2459 "movl %0, %%esi \n\t" | |
2460 "movzbl (%%esi, %%ebx), %%edi \n\t" //src[xx] | |
2461 "movzbl 1(%%esi, %%ebx), %%esi \n\t" //src[xx+1] | |
2462 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2463 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2464 "shll $16, %%edi \n\t" | |
2465 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2466 "movl %1, %%edi \n\t" | |
2467 "shrl $9, %%esi \n\t" | |
2468 "movw %%si, (%%edi, %%eax, 2) \n\t" | |
2469 | |
2470 "movzbl (%5, %%ebx), %%edi \n\t" //src[xx] | |
2471 "movzbl 1(%5, %%ebx), %%esi \n\t" //src[xx+1] | |
2472 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2473 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2474 "shll $16, %%edi \n\t" | |
2475 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2476 "movl %1, %%edi \n\t" | |
2477 "shrl $9, %%esi \n\t" | |
2478 "movw %%si, 4096(%%edi, %%eax, 2)\n\t" | |
2479 | |
2480 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2481 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2482 "addl $1, %%eax \n\t" | |
2483 "cmpl %2, %%eax \n\t" | |
2484 " jb 1b \n\t" | |
2485 | |
2486 :: "m" (src1), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF), | |
2487 "r" (src2) | |
2488 : "%eax", "%ebx", "%ecx", "%edi", "%esi" | |
2489 ); | |
2490 #ifdef HAVE_MMX2 | |
2491 } //if MMX2 cant be used | |
2492 #endif | |
2493 #else | |
2671 | 2494 int i; |
2495 unsigned int xpos=0; | |
2496 for(i=0;i<dstWidth;i++) | |
2497 { | |
2498 register unsigned int xx=xpos>>16; | |
2499 register unsigned int xalpha=(xpos&0xFFFF)>>9; | |
2500 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); | |
2501 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); | |
2566 | 2502 /* slower |
2503 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; | |
2504 dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; | |
2505 */ | |
2671 | 2506 xpos+=xInc; |
2507 } | |
2469 | 2508 #endif |
3272 | 2509 } |
2510 } | |
2511 | |
4467 | 2512 static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY, |
4698 | 2513 int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){ |
3344 | 2514 |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2515 /* load a few things into local vars to make the code more readable? and faster */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2516 const int srcW= c->srcW; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2517 const int dstW= c->dstW; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2518 const int dstH= c->dstH; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2519 const int chrDstW= c->chrDstW; |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2520 const int chrSrcW= c->chrSrcW; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2521 const int lumXInc= c->lumXInc; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2522 const int chrXInc= c->chrXInc; |
4295 | 2523 const int dstFormat= c->dstFormat; |
6503 | 2524 const int srcFormat= c->srcFormat; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2525 const int flags= c->flags; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2526 const int canMMX2BeUsed= c->canMMX2BeUsed; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2527 int16_t *vLumFilterPos= c->vLumFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2528 int16_t *vChrFilterPos= c->vChrFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2529 int16_t *hLumFilterPos= c->hLumFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2530 int16_t *hChrFilterPos= c->hChrFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2531 int16_t *vLumFilter= c->vLumFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2532 int16_t *vChrFilter= c->vChrFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2533 int16_t *hLumFilter= c->hLumFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2534 int16_t *hChrFilter= c->hChrFilter; |
9413 | 2535 int32_t *lumMmxFilter= c->lumMmxFilter; |
2536 int32_t *chrMmxFilter= c->chrMmxFilter; | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2537 const int vLumFilterSize= c->vLumFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2538 const int vChrFilterSize= c->vChrFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2539 const int hLumFilterSize= c->hLumFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2540 const int hChrFilterSize= c->hChrFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2541 int16_t **lumPixBuf= c->lumPixBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2542 int16_t **chrPixBuf= c->chrPixBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2543 const int vLumBufSize= c->vLumBufSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2544 const int vChrBufSize= c->vChrBufSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2545 uint8_t *funnyYCode= c->funnyYCode; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2546 uint8_t *funnyUVCode= c->funnyUVCode; |
4467 | 2547 uint8_t *formatConvBuffer= c->formatConvBuffer; |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2548 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2549 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); |
3344 | 2550 |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2551 /* vars whch will change and which we need to storw back in the context */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2552 int dstY= c->dstY; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2553 int lumBufIndex= c->lumBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2554 int chrBufIndex= c->chrBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2555 int lastInLumBuf= c->lastInLumBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2556 int lastInChrBuf= c->lastInChrBuf; |
4467 | 2557 int srcStride[3]; |
4698 | 2558 int dstStride[3]; |
4419 | 2559 uint8_t *src[3]; |
2560 uint8_t *dst[3]; | |
6540 | 2561 |
2562 orderYUV(c->srcFormat, src, srcStride, srcParam, srcStrideParam); | |
2563 orderYUV(c->dstFormat, dst, dstStride, dstParam, dstStrideParam); | |
6503 | 2564 |
6540 | 2565 if(isPacked(c->srcFormat)){ |
4467 | 2566 src[0]= |
2567 src[1]= | |
2568 src[2]= srcParam[0]; | |
6540 | 2569 srcStride[0]= |
4467 | 2570 srcStride[1]= |
6540 | 2571 srcStride[2]= srcStrideParam[0]; |
4467 | 2572 } |
6540 | 2573 srcStride[1]<<= c->vChrDrop; |
2574 srcStride[2]<<= c->vChrDrop; | |
4419 | 2575 |
6517 | 2576 // printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2], |
2577 // (int)dst[0], (int)dst[1], (int)dst[2]); | |
2578 | |
2579 #if 0 //self test FIXME move to a vfilter or something | |
2580 { | |
2581 static volatile int i=0; | |
2582 i++; | |
2583 if(srcFormat==IMGFMT_YV12 && i==1 && srcSliceH>= c->srcH) | |
2584 selfTest(src, srcStride, c->srcW, c->srcH); | |
2585 i--; | |
2586 } | |
2587 #endif | |
4554 | 2588 |
2589 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2], | |
2590 //dstStride[0],dstStride[1],dstStride[2]); | |
4419 | 2591 |
2592 if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0) | |
2593 { | |
2594 static int firstTime=1; //FIXME move this into the context perhaps | |
2595 if(flags & SWS_PRINT_INFO && firstTime) | |
2596 { | |
5937 | 2597 mp_msg(MSGT_SWS,MSGL_WARN,"SwScaler: Warning: dstStride is not aligned!\n" |
4419 | 2598 "SwScaler: ->cannot do aligned memory acesses anymore\n"); |
2599 firstTime=0; | |
2600 } | |
2601 } | |
3344 | 2602 |
4467 | 2603 /* Note the user might start scaling the picture in the middle so this will not get executed |
2604 this is not really intended but works currently, so ppl might do it */ | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2605 if(srcSliceY ==0){ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2606 lumBufIndex=0; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2607 chrBufIndex=0; |
4467 | 2608 dstY=0; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2609 lastInLumBuf= -1; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2610 lastInChrBuf= -1; |
3272 | 2611 } |
3344 | 2612 |
2613 for(;dstY < dstH; dstY++){ | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2614 unsigned char *dest =dst[0]+dstStride[0]*dstY; |
6520 | 2615 const int chrDstY= dstY>>c->chrDstVSubSample; |
2616 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; | |
2617 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; | |
3344 | 2618 |
2619 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input | |
2620 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input | |
2621 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input | |
2622 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input | |
2623 | |
4290
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2624 //handle holes (FAST_BILINEAR & weird filters) |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2625 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2626 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2627 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize); |
3344 | 2628 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1) |
2629 ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1) | |
2216 | 2630 |
3344 | 2631 // Do we have enough lines in this slice to output the dstY line |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2632 if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample)) |
2469 | 2633 { |
3344 | 2634 //Do horizontal scaling |
2635 while(lastInLumBuf < lastLumSrcY) | |
2636 { | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2637 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; |
3344 | 2638 lumBufIndex++; |
4290
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2639 // printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY); |
3344 | 2640 ASSERT(lumBufIndex < 2*vLumBufSize) |
2641 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH) | |
2642 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) | |
2643 // printf("%d %d\n", lumBufIndex, vLumBufSize); | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2644 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2645 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, |
5452 | 2646 funnyYCode, c->srcFormat, formatConvBuffer, |
2647 c->lumMmx2Filter, c->lumMmx2FilterPos); | |
3344 | 2648 lastInLumBuf++; |
2649 } | |
2650 while(lastInChrBuf < lastChrSrcY) | |
2651 { | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2652 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2653 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; |
3344 | 2654 chrBufIndex++; |
2655 ASSERT(chrBufIndex < 2*vChrBufSize) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2656 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)) |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2657 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0) |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2658 //FIXME replace parameters through context struct (some at least) |
6503 | 2659 |
2660 if(!(isGray(srcFormat) || isGray(dstFormat))) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2661 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2662 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, |
5452 | 2663 funnyUVCode, c->srcFormat, formatConvBuffer, |
2664 c->chrMmx2Filter, c->chrMmx2FilterPos); | |
3344 | 2665 lastInChrBuf++; |
2666 } | |
2667 //wrap buf index around to stay inside the ring buffer | |
2668 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize; | |
2669 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize; | |
2469 | 2670 } |
3344 | 2671 else // not enough lines left in this slice -> load the rest in the buffer |
2469 | 2672 { |
3344 | 2673 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n", |
2674 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY, | |
2675 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize, | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2676 vChrBufSize, vLumBufSize);*/ |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2677 |
3344 | 2678 //Do horizontal scaling |
2679 while(lastInLumBuf+1 < srcSliceY + srcSliceH) | |
2469 | 2680 { |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2681 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; |
3344 | 2682 lumBufIndex++; |
2683 ASSERT(lumBufIndex < 2*vLumBufSize) | |
2684 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH) | |
2685 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2686 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2687 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, |
5452 | 2688 funnyYCode, c->srcFormat, formatConvBuffer, |
2689 c->lumMmx2Filter, c->lumMmx2FilterPos); | |
3344 | 2690 lastInLumBuf++; |
2469 | 2691 } |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2692 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH)) |
3344 | 2693 { |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2694 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2695 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; |
3344 | 2696 chrBufIndex++; |
2697 ASSERT(chrBufIndex < 2*vChrBufSize) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2698 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH) |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2699 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0) |
6503 | 2700 |
2701 if(!(isGray(srcFormat) || isGray(dstFormat))) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2702 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2703 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, |
5452 | 2704 funnyUVCode, c->srcFormat, formatConvBuffer, |
2705 c->chrMmx2Filter, c->chrMmx2FilterPos); | |
3344 | 2706 lastInChrBuf++; |
2707 } | |
2708 //wrap buf index around to stay inside the ring buffer | |
2709 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize; | |
2710 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize; | |
2711 break; //we cant output a dstY line so lets try with the next slice | |
2469 | 2712 } |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
2713 |
2748 | 2714 #ifdef HAVE_MMX |
3344 | 2715 b5Dither= dither8[dstY&1]; |
2716 g6Dither= dither4[dstY&1]; | |
2717 g5Dither= dither8[dstY&1]; | |
2718 r5Dither= dither8[(dstY+1)&1]; | |
2748 | 2719 #endif |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2720 if(dstY < dstH-2) |
3352 | 2721 { |
6503 | 2722 if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like |
3344 | 2723 { |
7351 | 2724 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
2725 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi | |
3344 | 2726 if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12 |
2727 { | |
2728 int16_t *lumBuf = lumPixBuf[0]; | |
2729 int16_t *chrBuf= chrPixBuf[0]; | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2730 RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW); |
3344 | 2731 } |
2732 else //General YV12 | |
2733 { | |
2734 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |
2735 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |
9413 | 2736 int i; |
2737 #ifdef HAVE_MMX | |
2738 for(i=0; i<vLumFilterSize; i++) | |
2739 { | |
2740 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i]; | |
2741 lumMmxFilter[4*i+2]= | |
2742 lumMmxFilter[4*i+3]= | |
2743 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001; | |
2744 } | |
2745 for(i=0; i<vChrFilterSize; i++) | |
2746 { | |
2747 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i]; | |
2748 chrMmxFilter[4*i+2]= | |
2749 chrMmxFilter[4*i+3]= | |
2750 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001; | |
2751 } | |
2752 #endif | |
2753 RENAME(yuv2yuvX)(c, | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2754 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2755 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2756 dest, uDest, vDest, dstW, chrDstW, |
9413 | 2757 lumMmxFilter, chrMmxFilter); |
3344 | 2758 } |
2759 } | |
2760 else | |
2761 { | |
2762 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |
2763 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |
2764 | |
2765 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |
2766 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | |
2767 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB | |
2768 { | |
2769 int chrAlpha= vChrFilter[2*dstY+1]; | |
2770 | |
7723 | 2771 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), |
6578 | 2772 dest, dstW, chrAlpha, dstFormat, flags, dstY); |
3344 | 2773 } |
2774 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB | |
2775 { | |
2776 int lumAlpha= vLumFilter[2*dstY+1]; | |
2777 int chrAlpha= vChrFilter[2*dstY+1]; | |
2778 | |
7723 | 2779 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), |
6578 | 2780 dest, dstW, lumAlpha, chrAlpha, dstY); |
3344 | 2781 } |
2782 else //General RGB | |
2783 { | |
9413 | 2784 int i; |
2785 #ifdef HAVE_MMX | |
2786 for(i=0; i<vLumFilterSize; i++) | |
2787 { | |
2788 lumMmxFilter[4*i+0]= (int32_t)lumSrcPtr[i]; | |
2789 lumMmxFilter[4*i+2]= | |
2790 lumMmxFilter[4*i+3]= | |
2791 ((uint16_t)vLumFilter[dstY*vLumFilterSize + i])*0x10001; | |
2792 } | |
2793 for(i=0; i<vChrFilterSize; i++) | |
2794 { | |
2795 chrMmxFilter[4*i+0]= (int32_t)chrSrcPtr[i]; | |
2796 chrMmxFilter[4*i+2]= | |
2797 chrMmxFilter[4*i+3]= | |
2798 ((uint16_t)vChrFilter[chrDstY*vChrFilterSize + i])*0x10001; | |
2799 } | |
2800 #endif | |
7723 | 2801 RENAME(yuv2packedX)(c, |
3344 | 2802 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
2803 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |
9413 | 2804 dest, dstW, dstY); |
3344 | 2805 } |
2806 } | |
3352 | 2807 } |
2808 else // hmm looks like we cant use MMX here without overwriting this arrays tail | |
2809 { | |
2810 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |
2811 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |
6615 | 2812 if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 |
3352 | 2813 { |
7351 | 2814 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
2815 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi | |
6540 | 2816 yuv2yuvXinC( |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2817 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2818 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
6540 | 2819 dest, uDest, vDest, dstW, chrDstW); |
3352 | 2820 } |
2821 else | |
2822 { | |
2823 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |
2824 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | |
7723 | 2825 yuv2packedXinC(c, |
3352 | 2826 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
2827 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |
6578 | 2828 dest, dstW, dstY); |
3352 | 2829 } |
2830 } | |
3344 | 2831 } |
2534
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2832 |
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2833 #ifdef HAVE_MMX |
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2834 __asm __volatile(SFENCE:::"memory"); |
2566 | 2835 __asm __volatile(EMMS:::"memory"); |
2534
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2836 #endif |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2837 /* store changed local vars back in the context */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2838 c->dstY= dstY; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2839 c->lumBufIndex= lumBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2840 c->chrBufIndex= chrBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2841 c->lastInLumBuf= lastInLumBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2842 c->lastInChrBuf= lastInChrBuf; |
3641 | 2843 } |