Mercurial > mplayer.hg
annotate postproc/swscale_template.c @ 9307:ec18ad315bbe
10l (copy & pasting the generator poly for crc32 from ogg was a bad idea...)
author | michael |
---|---|
date | Fri, 07 Feb 2003 00:18:09 +0000 |
parents | 25baacd1c650 |
children | 0d86fe21b281 |
rev | line source |
---|---|
4295 | 1 /* |
2 Copyright (C) 2001-2002 Michael Niedermayer <michaelni@gmx.at> | |
2216 | 3 |
4295 | 4 This program is free software; you can redistribute it and/or modify |
5 it under the terms of the GNU General Public License as published by | |
6 the Free Software Foundation; either version 2 of the License, or | |
7 (at your option) any later version. | |
2216 | 8 |
4295 | 9 This program is distributed in the hope that it will be useful, |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 GNU General Public License for more details. | |
13 | |
14 You should have received a copy of the GNU General Public License | |
15 along with this program; if not, write to the Free Software | |
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 */ | |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
18 |
2540 | 19 #undef MOVNTQ |
2680 | 20 #undef PAVGB |
3136 | 21 #undef PREFETCH |
22 #undef PREFETCHW | |
23 #undef EMMS | |
24 #undef SFENCE | |
25 | |
26 #ifdef HAVE_3DNOW | |
27 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |
28 #define EMMS "femms" | |
29 #else | |
30 #define EMMS "emms" | |
31 #endif | |
32 | |
33 #ifdef HAVE_3DNOW | |
34 #define PREFETCH "prefetch" | |
35 #define PREFETCHW "prefetchw" | |
36 #elif defined ( HAVE_MMX2 ) | |
37 #define PREFETCH "prefetchnta" | |
38 #define PREFETCHW "prefetcht0" | |
39 #else | |
40 #define PREFETCH "/nop" | |
41 #define PREFETCHW "/nop" | |
42 #endif | |
43 | |
44 #ifdef HAVE_MMX2 | |
45 #define SFENCE "sfence" | |
46 #else | |
47 #define SFENCE "/nop" | |
48 #endif | |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
49 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
50 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
51 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
52 #elif defined (HAVE_3DNOW) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
53 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
54 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
55 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
56 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
57 #define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
58 #else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
60 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
61 |
3344 | 62 #define YSCALEYUV2YV12X(x) \ |
63 "xorl %%eax, %%eax \n\t"\ | |
64 "pxor %%mm3, %%mm3 \n\t"\ | |
65 "pxor %%mm4, %%mm4 \n\t"\ | |
66 "movl %0, %%edx \n\t"\ | |
67 ".balign 16 \n\t" /* FIXME Unroll? */\ | |
68 "1: \n\t"\ | |
69 "movl (%1, %%edx, 4), %%esi \n\t"\ | |
70 "movq (%2, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\ | |
71 "movq " #x "(%%esi, %%eax, 2), %%mm2 \n\t" /* srcData */\ | |
72 "movq 8+" #x "(%%esi, %%eax, 2), %%mm5 \n\t" /* srcData */\ | |
73 "pmulhw %%mm0, %%mm2 \n\t"\ | |
74 "pmulhw %%mm0, %%mm5 \n\t"\ | |
75 "paddw %%mm2, %%mm3 \n\t"\ | |
76 "paddw %%mm5, %%mm4 \n\t"\ | |
77 "addl $1, %%edx \n\t"\ | |
78 " jnz 1b \n\t"\ | |
79 "psraw $3, %%mm3 \n\t"\ | |
80 "psraw $3, %%mm4 \n\t"\ | |
81 "packuswb %%mm4, %%mm3 \n\t"\ | |
82 MOVNTQ(%%mm3, (%3, %%eax))\ | |
83 "addl $8, %%eax \n\t"\ | |
84 "cmpl %4, %%eax \n\t"\ | |
85 "pxor %%mm3, %%mm3 \n\t"\ | |
86 "pxor %%mm4, %%mm4 \n\t"\ | |
87 "movl %0, %%edx \n\t"\ | |
88 "jb 1b \n\t" | |
89 | |
90 #define YSCALEYUV2YV121 \ | |
91 "movl %2, %%eax \n\t"\ | |
92 ".balign 16 \n\t" /* FIXME Unroll? */\ | |
93 "1: \n\t"\ | |
94 "movq (%0, %%eax, 2), %%mm0 \n\t"\ | |
95 "movq 8(%0, %%eax, 2), %%mm1 \n\t"\ | |
96 "psraw $7, %%mm0 \n\t"\ | |
97 "psraw $7, %%mm1 \n\t"\ | |
98 "packuswb %%mm1, %%mm0 \n\t"\ | |
99 MOVNTQ(%%mm0, (%1, %%eax))\ | |
100 "addl $8, %%eax \n\t"\ | |
101 "jnc 1b \n\t" | |
102 | |
103 /* | |
104 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
105 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
106 "r" (dest), "m" (dstW), | |
107 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
108 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
109 */ | |
7723 | 110 #define YSCALEYUV2PACKEDX \ |
3344 | 111 "xorl %%eax, %%eax \n\t"\ |
112 ".balign 16 \n\t"\ | |
113 "1: \n\t"\ | |
114 "movl %1, %%edx \n\t" /* -chrFilterSize */\ | |
6679
5c4beb993674
fixing sinc filter (seems the problem was caused by rounding in pmulhw -> solution use shorter filter, its long and slow enough anyway)
michael
parents:
6615
diff
changeset
|
115 "movl %3, %%ebx \n\t" /* chrMmxFilter+chrFilterSize */\ |
5c4beb993674
fixing sinc filter (seems the problem was caused by rounding in pmulhw -> solution use shorter filter, its long and slow enough anyway)
michael
parents:
6615
diff
changeset
|
116 "movl %7, %%ecx \n\t" /* chrSrc+chrFilterSize */\ |
3344 | 117 "pxor %%mm3, %%mm3 \n\t"\ |
118 "pxor %%mm4, %%mm4 \n\t"\ | |
119 "2: \n\t"\ | |
120 "movl (%%ecx, %%edx, 4), %%esi \n\t"\ | |
121 "movq (%%ebx, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\ | |
122 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\ | |
123 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\ | |
124 "pmulhw %%mm0, %%mm2 \n\t"\ | |
125 "pmulhw %%mm0, %%mm5 \n\t"\ | |
126 "paddw %%mm2, %%mm3 \n\t"\ | |
127 "paddw %%mm5, %%mm4 \n\t"\ | |
128 "addl $1, %%edx \n\t"\ | |
129 " jnz 2b \n\t"\ | |
130 \ | |
131 "movl %0, %%edx \n\t" /* -lumFilterSize */\ | |
132 "movl %2, %%ebx \n\t" /* lumMmxFilter+lumFilterSize */\ | |
133 "movl %6, %%ecx \n\t" /* lumSrc+lumFilterSize */\ | |
134 "pxor %%mm1, %%mm1 \n\t"\ | |
135 "pxor %%mm7, %%mm7 \n\t"\ | |
136 "2: \n\t"\ | |
137 "movl (%%ecx, %%edx, 4), %%esi \n\t"\ | |
138 "movq (%%ebx, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\ | |
139 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\ | |
140 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\ | |
141 "pmulhw %%mm0, %%mm2 \n\t"\ | |
142 "pmulhw %%mm0, %%mm5 \n\t"\ | |
143 "paddw %%mm2, %%mm1 \n\t"\ | |
144 "paddw %%mm5, %%mm7 \n\t"\ | |
145 "addl $1, %%edx \n\t"\ | |
146 " jnz 2b \n\t"\ | |
7723 | 147 |
148 | |
149 #define YSCALEYUV2RGBX \ | |
150 YSCALEYUV2PACKEDX\ | |
4248 | 151 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
152 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | |
3344 | 153 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
154 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ | |
4248 | 155 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
156 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
3344 | 157 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
4248 | 158 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ |
159 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | |
160 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | |
161 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | |
162 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
163 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | |
3344 | 164 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
165 "paddw %%mm3, %%mm4 \n\t"\ | |
166 "movq %%mm2, %%mm0 \n\t"\ | |
167 "movq %%mm5, %%mm6 \n\t"\ | |
168 "movq %%mm4, %%mm3 \n\t"\ | |
169 "punpcklwd %%mm2, %%mm2 \n\t"\ | |
170 "punpcklwd %%mm5, %%mm5 \n\t"\ | |
171 "punpcklwd %%mm4, %%mm4 \n\t"\ | |
172 "paddw %%mm1, %%mm2 \n\t"\ | |
173 "paddw %%mm1, %%mm5 \n\t"\ | |
174 "paddw %%mm1, %%mm4 \n\t"\ | |
175 "punpckhwd %%mm0, %%mm0 \n\t"\ | |
176 "punpckhwd %%mm6, %%mm6 \n\t"\ | |
177 "punpckhwd %%mm3, %%mm3 \n\t"\ | |
178 "paddw %%mm7, %%mm0 \n\t"\ | |
179 "paddw %%mm7, %%mm6 \n\t"\ | |
180 "paddw %%mm7, %%mm3 \n\t"\ | |
181 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ | |
182 "packuswb %%mm0, %%mm2 \n\t"\ | |
183 "packuswb %%mm6, %%mm5 \n\t"\ | |
184 "packuswb %%mm3, %%mm4 \n\t"\ | |
185 "pxor %%mm7, %%mm7 \n\t" | |
186 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
187 #define FULL_YSCALEYUV2RGB \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
188 "pxor %%mm7, %%mm7 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
189 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
190 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
191 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
192 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
193 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
194 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
195 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
196 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
197 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
198 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
199 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
200 "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
201 "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
202 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
203 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
204 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
205 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
206 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
207 "movq 4096(%2, %%eax,2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
208 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
209 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
210 "movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
211 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
212 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
4248 | 213 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ |
214 "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\ | |
215 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
216 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
217 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
218 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
219 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
4248 | 220 "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
221 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
4248 | 222 "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
223 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
4248 | 224 "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
225 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
226 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
227 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\ |
4248 | 228 "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\ |
229 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
230 "paddw %%mm1, %%mm3 \n\t" /* B*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
231 "paddw %%mm1, %%mm0 \n\t" /* R*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
232 "packuswb %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
233 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
234 "packuswb %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
235 "paddw %%mm4, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
236 "paddw %%mm2, %%mm1 \n\t" /* G*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
237 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
238 "packuswb %%mm1, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
239 |
7723 | 240 #define YSCALEYUV2PACKED \ |
241 "movd %6, %%mm6 \n\t" /*yalpha1*/\ | |
242 "punpcklwd %%mm6, %%mm6 \n\t"\ | |
243 "punpcklwd %%mm6, %%mm6 \n\t"\ | |
244 "psraw $3, %%mm6 \n\t"\ | |
245 "movq %%mm6, 3968(%2) \n\t"\ | |
246 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ | |
247 "punpcklwd %%mm5, %%mm5 \n\t"\ | |
248 "punpcklwd %%mm5, %%mm5 \n\t"\ | |
249 "psraw $3, %%mm5 \n\t"\ | |
250 "movq %%mm5, 3976(%2) \n\t"\ | |
251 "xorl %%eax, %%eax \n\t"\ | |
252 ".balign 16 \n\t"\ | |
253 "1: \n\t"\ | |
254 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ | |
255 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |
256 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | |
257 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | |
258 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ | |
259 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ | |
260 "movq 3976(%2), %%mm0 \n\t"\ | |
261 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ | |
262 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ | |
263 "psraw $7, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ | |
264 "psraw $7, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ | |
265 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ | |
266 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ | |
267 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ | |
268 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ | |
269 "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\ | |
270 "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\ | |
271 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ | |
272 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ | |
273 "pmulhw 3968(%2), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ | |
274 "pmulhw 3968(%2), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ | |
275 "psraw $7, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ | |
276 "psraw $7, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ | |
277 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ | |
278 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ | |
279 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
280 #define YSCALEYUV2RGB \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
281 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
282 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
283 "punpcklwd %%mm6, %%mm6 \n\t"\ |
6554 | 284 "movq %%mm6, 3968(%2) \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
285 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
286 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
287 "punpcklwd %%mm5, %%mm5 \n\t"\ |
6554 | 288 "movq %%mm5, 3976(%2) \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
289 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
290 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
291 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
292 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
293 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
294 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
295 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
296 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
297 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
6554 | 298 "movq 3976(%2), %%mm0 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
299 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
300 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
301 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
302 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
303 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
304 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
4248 | 305 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
306 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
307 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
308 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
4248 | 309 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
310 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
311 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
312 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
313 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
314 "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
315 "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
316 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
317 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ |
6554 | 318 "pmulhw 3968(%2), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
319 "pmulhw 3968(%2), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
320 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
321 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
322 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
323 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
4248 | 324 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ |
325 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | |
326 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | |
327 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | |
328 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
329 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
330 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
331 "paddw %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
332 "movq %%mm2, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
333 "movq %%mm5, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
334 "movq %%mm4, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
335 "punpcklwd %%mm2, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
336 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
337 "punpcklwd %%mm4, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
338 "paddw %%mm1, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
339 "paddw %%mm1, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
340 "paddw %%mm1, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
341 "punpckhwd %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
342 "punpckhwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
343 "punpckhwd %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
344 "paddw %%mm7, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
345 "paddw %%mm7, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
346 "paddw %%mm7, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
347 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
348 "packuswb %%mm0, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
349 "packuswb %%mm6, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
350 "packuswb %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
351 "pxor %%mm7, %%mm7 \n\t" |
7723 | 352 |
353 #define YSCALEYUV2PACKED1 \ | |
354 "xorl %%eax, %%eax \n\t"\ | |
355 ".balign 16 \n\t"\ | |
356 "1: \n\t"\ | |
357 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ | |
358 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ | |
359 "psraw $7, %%mm3 \n\t" \ | |
360 "psraw $7, %%mm4 \n\t" \ | |
361 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ | |
362 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ | |
363 "psraw $7, %%mm1 \n\t" \ | |
364 "psraw $7, %%mm7 \n\t" \ | |
365 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
366 #define YSCALEYUV2RGB1 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
367 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
368 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
369 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
370 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
371 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
372 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
373 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
4248 | 374 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
375 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
376 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
377 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
4248 | 378 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
379 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
380 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
381 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
382 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
383 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
384 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
4248 | 385 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ |
386 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | |
387 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | |
388 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | |
389 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
390 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
391 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
392 "paddw %%mm3, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
393 "movq %%mm2, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
394 "movq %%mm5, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
395 "movq %%mm4, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
396 "punpcklwd %%mm2, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
397 "punpcklwd %%mm5, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
398 "punpcklwd %%mm4, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
399 "paddw %%mm1, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
400 "paddw %%mm1, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
401 "paddw %%mm1, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
402 "punpckhwd %%mm0, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
403 "punpckhwd %%mm6, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
404 "punpckhwd %%mm3, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
405 "paddw %%mm7, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
406 "paddw %%mm7, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
407 "paddw %%mm7, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
408 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
409 "packuswb %%mm0, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
410 "packuswb %%mm6, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
411 "packuswb %%mm3, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
412 "pxor %%mm7, %%mm7 \n\t" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
413 |
7723 | 414 #define YSCALEYUV2PACKED1b \ |
415 "xorl %%eax, %%eax \n\t"\ | |
416 ".balign 16 \n\t"\ | |
417 "1: \n\t"\ | |
418 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ | |
419 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ | |
420 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ | |
421 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ | |
422 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ | |
423 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ | |
424 "psrlw $8, %%mm3 \n\t" \ | |
425 "psrlw $8, %%mm4 \n\t" \ | |
426 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ | |
427 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ | |
428 "psraw $7, %%mm1 \n\t" \ | |
429 "psraw $7, %%mm7 \n\t" | |
430 | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
431 // do vertical chrominance interpolation |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
432 #define YSCALEYUV2RGB1b \ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
433 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
434 ".balign 16 \n\t"\ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
435 "1: \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
436 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
437 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
438 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
439 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
2576 | 440 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ |
441 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ | |
3344 | 442 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ |
443 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ | |
4248 | 444 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
445 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
446 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
447 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
4248 | 448 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
449 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
450 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
451 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
452 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
453 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
454 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
4248 | 455 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ |
456 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | |
457 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | |
458 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | |
459 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
460 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
461 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
462 "paddw %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
463 "movq %%mm2, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
464 "movq %%mm5, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
465 "movq %%mm4, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
466 "punpcklwd %%mm2, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
467 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
468 "punpcklwd %%mm4, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
469 "paddw %%mm1, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
470 "paddw %%mm1, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
471 "paddw %%mm1, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
472 "punpckhwd %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
473 "punpckhwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
474 "punpckhwd %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
475 "paddw %%mm7, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
476 "paddw %%mm7, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
477 "paddw %%mm7, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
478 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
479 "packuswb %%mm0, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
480 "packuswb %%mm6, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
481 "packuswb %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
482 "pxor %%mm7, %%mm7 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
483 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
484 #define WRITEBGR32 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
485 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
486 "movq %%mm2, %%mm1 \n\t" /* B */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
487 "movq %%mm5, %%mm6 \n\t" /* R */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
488 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
489 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
490 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
491 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
492 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
493 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
494 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
495 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
496 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
497 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
498 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
499 MOVNTQ(%%mm0, (%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
500 MOVNTQ(%%mm2, 8(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
501 MOVNTQ(%%mm1, 16(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
502 MOVNTQ(%%mm3, 24(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
503 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
504 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
505 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
506 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
507 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
508 #define WRITEBGR16 \ |
4248 | 509 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ |
510 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ | |
511 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ | |
2669 | 512 "psrlq $3, %%mm2 \n\t"\ |
513 \ | |
514 "movq %%mm2, %%mm1 \n\t"\ | |
515 "movq %%mm4, %%mm3 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
516 \ |
2669 | 517 "punpcklbw %%mm7, %%mm3 \n\t"\ |
518 "punpcklbw %%mm5, %%mm2 \n\t"\ | |
519 "punpckhbw %%mm7, %%mm4 \n\t"\ | |
520 "punpckhbw %%mm5, %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
521 \ |
2669 | 522 "psllq $3, %%mm3 \n\t"\ |
523 "psllq $3, %%mm4 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
524 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
525 "por %%mm3, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
526 "por %%mm4, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
527 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
528 MOVNTQ(%%mm2, (%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
529 MOVNTQ(%%mm1, 8(%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
530 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
531 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
532 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
533 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
534 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
535 #define WRITEBGR15 \ |
4248 | 536 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ |
537 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\ | |
538 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ | |
2669 | 539 "psrlq $3, %%mm2 \n\t"\ |
540 "psrlq $1, %%mm5 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
541 \ |
2669 | 542 "movq %%mm2, %%mm1 \n\t"\ |
543 "movq %%mm4, %%mm3 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
544 \ |
2669 | 545 "punpcklbw %%mm7, %%mm3 \n\t"\ |
546 "punpcklbw %%mm5, %%mm2 \n\t"\ | |
547 "punpckhbw %%mm7, %%mm4 \n\t"\ | |
548 "punpckhbw %%mm5, %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
549 \ |
2669 | 550 "psllq $2, %%mm3 \n\t"\ |
551 "psllq $2, %%mm4 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
552 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
553 "por %%mm3, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
554 "por %%mm4, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
555 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
556 MOVNTQ(%%mm2, (%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
557 MOVNTQ(%%mm1, 8(%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
558 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
559 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
560 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
561 " jb 1b \n\t" |
2669 | 562 |
2730 | 563 #define WRITEBGR24OLD \ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
564 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
565 "movq %%mm2, %%mm1 \n\t" /* B */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
566 "movq %%mm5, %%mm6 \n\t" /* R */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
567 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
568 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
569 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
570 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
571 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
572 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
2326 | 573 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
574 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ | |
575 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ | |
576 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
577 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
578 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
579 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\ |
4248 | 580 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\ |
581 "pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
582 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
583 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
584 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
585 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
586 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
587 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
588 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
589 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
590 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\ |
4248 | 591 "pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
592 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
593 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\ |
4248 | 594 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\ |
595 "pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
596 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
597 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
598 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
599 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
600 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
601 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
602 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
603 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\ |
4248 | 604 "pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\ |
605 "pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
606 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
607 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
608 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
609 \ |
2728 | 610 MOVNTQ(%%mm0, (%%ebx))\ |
611 MOVNTQ(%%mm2, 8(%%ebx))\ | |
612 MOVNTQ(%%mm3, 16(%%ebx))\ | |
613 "addl $24, %%ebx \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
614 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
615 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
616 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
617 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
618 |
2730 | 619 #define WRITEBGR24MMX \ |
620 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | |
621 "movq %%mm2, %%mm1 \n\t" /* B */\ | |
622 "movq %%mm5, %%mm6 \n\t" /* R */\ | |
623 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ | |
624 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ | |
625 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ | |
626 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ | |
627 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ | |
628 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ | |
629 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ | |
630 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ | |
631 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ | |
632 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ | |
633 \ | |
634 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ | |
635 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\ | |
636 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\ | |
637 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\ | |
638 \ | |
639 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\ | |
640 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\ | |
641 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\ | |
642 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\ | |
643 \ | |
644 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\ | |
645 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\ | |
646 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\ | |
647 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\ | |
648 \ | |
649 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\ | |
650 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\ | |
651 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\ | |
652 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ | |
653 MOVNTQ(%%mm0, (%%ebx))\ | |
654 \ | |
655 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\ | |
656 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\ | |
657 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\ | |
658 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\ | |
659 MOVNTQ(%%mm6, 8(%%ebx))\ | |
660 \ | |
661 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\ | |
662 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\ | |
663 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\ | |
664 MOVNTQ(%%mm5, 16(%%ebx))\ | |
665 \ | |
666 "addl $24, %%ebx \n\t"\ | |
667 \ | |
668 "addl $8, %%eax \n\t"\ | |
669 "cmpl %5, %%eax \n\t"\ | |
670 " jb 1b \n\t" | |
671 | |
672 #define WRITEBGR24MMX2 \ | |
673 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | |
4248 | 674 "movq "MANGLE(M24A)", %%mm0 \n\t"\ |
675 "movq "MANGLE(M24C)", %%mm7 \n\t"\ | |
2730 | 676 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ |
677 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ | |
678 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ | |
679 \ | |
680 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\ | |
681 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\ | |
682 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\ | |
683 \ | |
684 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\ | |
685 "por %%mm1, %%mm6 \n\t"\ | |
686 "por %%mm3, %%mm6 \n\t"\ | |
687 MOVNTQ(%%mm6, (%%ebx))\ | |
688 \ | |
689 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\ | |
690 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\ | |
691 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ | |
692 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ | |
693 \ | |
4248 | 694 "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ |
2730 | 695 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ |
696 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ | |
697 \ | |
698 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\ | |
699 "por %%mm3, %%mm6 \n\t"\ | |
700 MOVNTQ(%%mm6, 8(%%ebx))\ | |
701 \ | |
702 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\ | |
703 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\ | |
704 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\ | |
705 \ | |
706 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ | |
707 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ | |
4248 | 708 "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ |
2730 | 709 \ |
710 "por %%mm1, %%mm3 \n\t"\ | |
711 "por %%mm3, %%mm6 \n\t"\ | |
712 MOVNTQ(%%mm6, 16(%%ebx))\ | |
713 \ | |
714 "addl $24, %%ebx \n\t"\ | |
715 \ | |
716 "addl $8, %%eax \n\t"\ | |
717 "cmpl %5, %%eax \n\t"\ | |
718 " jb 1b \n\t" | |
719 | |
720 #ifdef HAVE_MMX2 | |
3126 | 721 #undef WRITEBGR24 |
2730 | 722 #define WRITEBGR24 WRITEBGR24MMX2 |
723 #else | |
3126 | 724 #undef WRITEBGR24 |
2730 | 725 #define WRITEBGR24 WRITEBGR24MMX |
726 #endif | |
727 | |
7723 | 728 #define WRITEYUY2 \ |
729 "packuswb %%mm3, %%mm3 \n\t"\ | |
730 "packuswb %%mm4, %%mm4 \n\t"\ | |
731 "packuswb %%mm7, %%mm1 \n\t"\ | |
732 "punpcklbw %%mm4, %%mm3 \n\t"\ | |
733 "movq %%mm1, %%mm7 \n\t"\ | |
734 "punpcklbw %%mm3, %%mm1 \n\t"\ | |
735 "punpckhbw %%mm3, %%mm7 \n\t"\ | |
736 \ | |
737 MOVNTQ(%%mm1, (%4, %%eax, 2))\ | |
738 MOVNTQ(%%mm7, 8(%4, %%eax, 2))\ | |
739 \ | |
740 "addl $8, %%eax \n\t"\ | |
741 "cmpl %5, %%eax \n\t"\ | |
742 " jb 1b \n\t" | |
743 | |
744 | |
3344 | 745 static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
746 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
747 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW, |
3344 | 748 int16_t * lumMmxFilter, int16_t * chrMmxFilter) |
2519 | 749 { |
3344 | 750 #ifdef HAVE_MMX |
751 if(uDest != NULL) | |
752 { | |
753 asm volatile( | |
754 YSCALEYUV2YV12X(0) | |
755 :: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
756 "r" (chrMmxFilter+chrFilterSize*4), "r" (uDest), "m" (chrDstW) |
3344 | 757 : "%eax", "%edx", "%esi" |
758 ); | |
2519 | 759 |
3344 | 760 asm volatile( |
761 YSCALEYUV2YV12X(4096) | |
762 :: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
763 "r" (chrMmxFilter+chrFilterSize*4), "r" (vDest), "m" (chrDstW) |
3344 | 764 : "%eax", "%edx", "%esi" |
765 ); | |
766 } | |
2521 | 767 |
3344 | 768 asm volatile( |
769 YSCALEYUV2YV12X(0) | |
770 :: "m" (-lumFilterSize), "r" (lumSrc+lumFilterSize), | |
771 "r" (lumMmxFilter+lumFilterSize*4), "r" (dest), "m" (dstW) | |
772 : "%eax", "%edx", "%esi" | |
773 ); | |
774 #else | |
6540 | 775 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, |
3352 | 776 chrFilter, chrSrc, chrFilterSize, |
6540 | 777 dest, uDest, vDest, dstW, chrDstW); |
3344 | 778 #endif |
779 } | |
780 | |
781 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
782 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) |
3344 | 783 { |
784 #ifdef HAVE_MMX | |
785 if(uDest != NULL) | |
786 { | |
787 asm volatile( | |
788 YSCALEYUV2YV121 | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
789 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW), |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
790 "g" (-chrDstW) |
3344 | 791 : "%eax" |
792 ); | |
793 | |
794 asm volatile( | |
795 YSCALEYUV2YV121 | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
796 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW), |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
797 "g" (-chrDstW) |
3344 | 798 : "%eax" |
799 ); | |
2519 | 800 } |
3344 | 801 |
802 asm volatile( | |
803 YSCALEYUV2YV121 | |
804 :: "r" (lumSrc + dstW), "r" (dest + dstW), | |
805 "g" (-dstW) | |
806 : "%eax" | |
807 ); | |
808 #else | |
809 int i; | |
810 for(i=0; i<dstW; i++) | |
811 { | |
812 int val= lumSrc[i]>>7; | |
6503 | 813 |
814 if(val&256){ | |
815 if(val<0) val=0; | |
816 else val=255; | |
817 } | |
3344 | 818 |
6503 | 819 dest[i]= val; |
3344 | 820 } |
821 | |
822 if(uDest != NULL) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
823 for(i=0; i<chrDstW; i++) |
3344 | 824 { |
825 int u=chrSrc[i]>>7; | |
826 int v=chrSrc[i + 2048]>>7; | |
827 | |
6503 | 828 if((u|v)&256){ |
829 if(u<0) u=0; | |
830 else if (u>255) u=255; | |
831 if(v<0) v=0; | |
832 else if (v>255) v=255; | |
833 } | |
834 | |
835 uDest[i]= u; | |
836 vDest[i]= v; | |
3344 | 837 } |
838 #endif | |
2519 | 839 } |
840 | |
3344 | 841 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
842 /** |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
843 * vertical scale YV12 to RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
844 */ |
7723 | 845 static inline void RENAME(yuv2packedX)(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
3344 | 846 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, |
6578 | 847 uint8_t *dest, int dstW, int16_t * lumMmxFilter, int16_t * chrMmxFilter, int dstY) |
3344 | 848 { |
6578 | 849 switch(c->dstFormat) |
3344 | 850 { |
851 #ifdef HAVE_MMX | |
6578 | 852 case IMGFMT_BGR32: |
3344 | 853 { |
854 asm volatile( | |
855 YSCALEYUV2RGBX | |
856 WRITEBGR32 | |
857 | |
858 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
859 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
860 "r" (dest), "m" (dstW), | |
861 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
862 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
863 ); | |
864 } | |
6578 | 865 break; |
866 case IMGFMT_BGR24: | |
3344 | 867 { |
868 asm volatile( | |
869 YSCALEYUV2RGBX | |
870 "leal (%%eax, %%eax, 2), %%ebx \n\t" //FIXME optimize | |
871 "addl %4, %%ebx \n\t" | |
872 WRITEBGR24 | |
873 | |
874 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
875 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
876 "r" (dest), "m" (dstW), | |
877 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
878 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
879 ); | |
880 } | |
6578 | 881 break; |
882 case IMGFMT_BGR15: | |
3344 | 883 { |
884 asm volatile( | |
885 YSCALEYUV2RGBX | |
886 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
887 #ifdef DITHER1XBPP | |
4248 | 888 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
889 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
890 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
3344 | 891 #endif |
892 | |
893 WRITEBGR15 | |
894 | |
895 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
896 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
897 "r" (dest), "m" (dstW), | |
898 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
899 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
900 ); | |
901 } | |
6578 | 902 break; |
903 case IMGFMT_BGR16: | |
3344 | 904 { |
905 asm volatile( | |
906 YSCALEYUV2RGBX | |
907 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
908 #ifdef DITHER1XBPP | |
4248 | 909 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
910 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
911 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
3344 | 912 #endif |
913 | |
914 WRITEBGR16 | |
915 | |
916 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
917 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
918 "r" (dest), "m" (dstW), | |
919 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
920 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
921 ); | |
922 } | |
6578 | 923 break; |
7723 | 924 case IMGFMT_YUY2: |
925 { | |
926 asm volatile( | |
927 YSCALEYUV2PACKEDX | |
928 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
929 | |
930 "psraw $3, %%mm3 \n\t" | |
931 "psraw $3, %%mm4 \n\t" | |
932 "psraw $3, %%mm1 \n\t" | |
933 "psraw $3, %%mm7 \n\t" | |
934 WRITEYUY2 | |
935 | |
936 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
937 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
938 "r" (dest), "m" (dstW), | |
939 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
940 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
941 ); | |
942 } | |
943 break; | |
3344 | 944 #endif |
6578 | 945 default: |
7723 | 946 yuv2packedXinC(c, lumFilter, lumSrc, lumFilterSize, |
6578 | 947 chrFilter, chrSrc, chrFilterSize, |
948 dest, dstW, dstY); | |
949 break; | |
950 } | |
3344 | 951 } |
952 | |
953 /** | |
954 * vertical bilinear scale YV12 to RGB | |
955 */ | |
7723 | 956 static inline void RENAME(yuv2packed2)(SwsContext *c, uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, |
6578 | 957 uint8_t *dest, int dstW, int yalpha, int uvalpha, int y) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
958 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
959 int yalpha1=yalpha^4095; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
960 int uvalpha1=uvalpha^4095; |
6578 | 961 int i; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
962 |
6578 | 963 #if 0 //isnt used |
4467 | 964 if(flags&SWS_FULL_CHR_H_INT) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
965 { |
6578 | 966 switch(dstFormat) |
967 { | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
968 #ifdef HAVE_MMX |
6578 | 969 case IMGFMT_BGR32: |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
970 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
971 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
972 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
973 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
974 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
975 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
976 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
977 "movq %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
978 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
979 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
980 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
981 MOVNTQ(%%mm3, (%4, %%eax, 4)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
982 MOVNTQ(%%mm1, 8(%4, %%eax, 4)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
983 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
984 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
985 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
986 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
987 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
988 |
3209 | 989 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
990 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
991 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
992 ); |
6578 | 993 break; |
994 case IMGFMT_BGR24: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
995 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
996 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
997 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
998 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
999 // lsb ... msb |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1000 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1001 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1002 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1003 "movq %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1004 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1005 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1006 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1007 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1008 "psrlq $8, %%mm3 \n\t" // GR0BGR00 |
4248 | 1009 "pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000 |
1010 "pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1011 "por %%mm2, %%mm3 \n\t" // BGRBGR00 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1012 "movq %%mm1, %%mm2 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1013 "psllq $48, %%mm1 \n\t" // 000000BG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1014 "por %%mm1, %%mm3 \n\t" // BGRBGRBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1015 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1016 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1017 "psrld $16, %%mm2 \n\t" // R000R000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1018 "psrlq $24, %%mm1 \n\t" // 0BGR0000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1019 "por %%mm2, %%mm1 \n\t" // RBGRR000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1020 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1021 "movl %4, %%ebx \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1022 "addl %%eax, %%ebx \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1023 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1024 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1025 //FIXME Alignment |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1026 "movntq %%mm3, (%%ebx, %%eax, 2)\n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1027 "movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1028 #else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1029 "movd %%mm3, (%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1030 "psrlq $32, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1031 "movd %%mm3, 4(%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1032 "movd %%mm1, 8(%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1033 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1034 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1035 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1036 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1037 |
3209 | 1038 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1039 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1040 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1041 ); |
6578 | 1042 break; |
1043 case IMGFMT_BGR15: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1044 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1045 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1046 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1047 #ifdef DITHER1XBPP |
4248 | 1048 "paddusb "MANGLE(g5Dither)", %%mm1\n\t" |
1049 "paddusb "MANGLE(r5Dither)", %%mm0\n\t" | |
1050 "paddusb "MANGLE(b5Dither)", %%mm3\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1051 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1052 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1053 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1054 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1055 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1056 "psrlw $3, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1057 "psllw $2, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1058 "psllw $7, %%mm0 \n\t" |
4248 | 1059 "pand "MANGLE(g15Mask)", %%mm1 \n\t" |
1060 "pand "MANGLE(r15Mask)", %%mm0 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1061 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1062 "por %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1063 "por %%mm1, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1064 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1065 MOVNTQ(%%mm0, (%4, %%eax, 2)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1066 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1067 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1068 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1069 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1070 |
3209 | 1071 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1072 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1073 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1074 ); |
6578 | 1075 break; |
1076 case IMGFMT_BGR16: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1077 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1078 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1079 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1080 #ifdef DITHER1XBPP |
4248 | 1081 "paddusb "MANGLE(g6Dither)", %%mm1\n\t" |
1082 "paddusb "MANGLE(r5Dither)", %%mm0\n\t" | |
1083 "paddusb "MANGLE(b5Dither)", %%mm3\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1084 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1085 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1086 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1087 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1088 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1089 "psrlw $3, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1090 "psllw $3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1091 "psllw $8, %%mm0 \n\t" |
4248 | 1092 "pand "MANGLE(g16Mask)", %%mm1 \n\t" |
1093 "pand "MANGLE(r16Mask)", %%mm0 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1094 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1095 "por %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1096 "por %%mm1, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1097 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1098 MOVNTQ(%%mm0, (%4, %%eax, 2)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1099 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1100 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1101 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1102 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1103 |
3209 | 1104 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1105 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1106 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1107 ); |
6578 | 1108 break; |
1109 #endif | |
1110 case IMGFMT_RGB32: | |
1111 #ifndef HAVE_MMX | |
1112 case IMGFMT_BGR32: | |
1113 #endif | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1114 if(dstFormat==IMGFMT_BGR32) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1115 { |
4794 | 1116 int i; |
4793 | 1117 #ifdef WORDS_BIGENDIAN |
1118 dest++; | |
1119 #endif | |
3209 | 1120 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1121 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1122 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1123 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1124 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
2503 | 1125 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; |
1126 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; | |
1127 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1128 dest+= 4; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1129 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1130 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1131 else if(dstFormat==IMGFMT_BGR24) |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1132 { |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1133 int i; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1134 for(i=0;i<dstW;i++){ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1135 // vertical linear interpolation && yuv2rgb in a single step: |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1136 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1137 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1138 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1139 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1140 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1141 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1142 dest+= 3; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1143 } |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1144 } |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1145 else if(dstFormat==IMGFMT_BGR16) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1146 { |
2671 | 1147 int i; |
3209 | 1148 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1149 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1150 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1151 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1152 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1153 |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1154 ((uint16_t*)dest)[i] = |
2584 | 1155 clip_table16b[(Y + yuvtab_40cf[U]) >>13] | |
1156 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | | |
1157 clip_table16r[(Y + yuvtab_3343[V]) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1158 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1159 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1160 else if(dstFormat==IMGFMT_BGR15) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1161 { |
2671 | 1162 int i; |
3209 | 1163 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1164 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1165 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1166 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1167 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1168 |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1169 ((uint16_t*)dest)[i] = |
2584 | 1170 clip_table15b[(Y + yuvtab_40cf[U]) >>13] | |
1171 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | | |
1172 clip_table15r[(Y + yuvtab_3343[V]) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1173 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1174 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1175 }//FULL_UV_IPOL |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1176 else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1177 { |
6578 | 1178 #endif // if 0 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1179 #ifdef HAVE_MMX |
6578 | 1180 switch(c->dstFormat) |
1181 { | |
1182 case IMGFMT_BGR32: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1183 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1184 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1185 WRITEBGR32 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1186 |
3209 | 1187 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1188 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1189 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1190 ); |
6578 | 1191 return; |
1192 case IMGFMT_BGR24: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1193 asm volatile( |
2728 | 1194 "movl %4, %%ebx \n\t" |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1195 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1196 WRITEBGR24 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1197 |
3209 | 1198 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1199 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1200 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1201 ); |
6578 | 1202 return; |
1203 case IMGFMT_BGR15: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1204 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1205 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1206 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1207 #ifdef DITHER1XBPP |
4248 | 1208 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1209 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1210 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1211 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1212 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1213 WRITEBGR15 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1214 |
3209 | 1215 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1216 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1217 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1218 ); |
6578 | 1219 return; |
1220 case IMGFMT_BGR16: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1221 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1222 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1223 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1224 #ifdef DITHER1XBPP |
4248 | 1225 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1226 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1227 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1228 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1229 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1230 WRITEBGR16 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1231 |
3209 | 1232 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1233 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1234 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1235 ); |
6578 | 1236 return; |
7723 | 1237 case IMGFMT_YUY2: |
1238 asm volatile( | |
1239 YSCALEYUV2PACKED | |
1240 WRITEYUY2 | |
1241 | |
1242 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | |
1243 "m" (yalpha1), "m" (uvalpha1) | |
1244 : "%eax" | |
1245 ); | |
1246 return; | |
6578 | 1247 default: break; |
1248 } | |
1249 #endif //HAVE_MMX | |
7723 | 1250 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB2_C, YSCALE_YUV_2_PACKED2_C) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1251 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1252 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1253 /** |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1254 * YV12 to RGB without scaling or interpolating |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1255 */ |
7723 | 1256 static inline void RENAME(yuv2packed1)(SwsContext *c, uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, |
6578 | 1257 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags, int y) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1258 { |
8254
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
7723
diff
changeset
|
1259 #ifdef HAVE_MMX |
2671 | 1260 int uvalpha1=uvalpha^4095; |
8254
772d6d27fd66
warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents:
7723
diff
changeset
|
1261 #endif |
3344 | 1262 const int yalpha1=0; |
6578 | 1263 int i; |
1264 | |
1265 uint16_t *buf1= buf0; //FIXME needed for the rgb1/bgr1 | |
1266 const int yalpha= 4096; //FIXME ... | |
2671 | 1267 |
4467 | 1268 if(flags&SWS_FULL_CHR_H_INT) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1269 { |
7723 | 1270 RENAME(yuv2packed2)(c, buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, y); |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1271 return; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1272 } |
2576 | 1273 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1274 #ifdef HAVE_MMX |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1275 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1276 { |
6578 | 1277 switch(dstFormat) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1278 { |
6578 | 1279 case IMGFMT_BGR32: |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1280 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1281 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1282 WRITEBGR32 |
3344 | 1283 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1284 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1285 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1286 ); |
6578 | 1287 return; |
1288 case IMGFMT_BGR24: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1289 asm volatile( |
2728 | 1290 "movl %4, %%ebx \n\t" |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1291 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1292 WRITEBGR24 |
3344 | 1293 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1294 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1295 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1296 ); |
6578 | 1297 return; |
1298 case IMGFMT_BGR15: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1299 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1300 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1301 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1302 #ifdef DITHER1XBPP |
4248 | 1303 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1304 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1305 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1306 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1307 WRITEBGR15 |
3344 | 1308 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1309 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1310 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1311 ); |
6578 | 1312 return; |
1313 case IMGFMT_BGR16: | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1314 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1315 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1316 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1317 #ifdef DITHER1XBPP |
4248 | 1318 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1319 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1320 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1321 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1322 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1323 WRITEBGR16 |
3344 | 1324 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1325 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1326 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1327 ); |
6578 | 1328 return; |
7723 | 1329 case IMGFMT_YUY2: |
1330 asm volatile( | |
1331 YSCALEYUV2PACKED1 | |
1332 WRITEYUY2 | |
1333 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | |
1334 "m" (yalpha1), "m" (uvalpha1) | |
1335 : "%eax" | |
1336 ); | |
1337 return; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1338 } |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1339 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1340 else |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1341 { |
6578 | 1342 switch(dstFormat) |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1343 { |
6578 | 1344 case IMGFMT_BGR32: |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1345 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1346 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1347 WRITEBGR32 |
3344 | 1348 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1349 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1350 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1351 ); |
6578 | 1352 return; |
1353 case IMGFMT_BGR24: | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1354 asm volatile( |
2728 | 1355 "movl %4, %%ebx \n\t" |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1356 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1357 WRITEBGR24 |
3344 | 1358 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1359 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1360 : "%eax", "%ebx" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1361 ); |
6578 | 1362 return; |
1363 case IMGFMT_BGR15: | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1364 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1365 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1366 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1367 #ifdef DITHER1XBPP |
4248 | 1368 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1369 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1370 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1371 #endif |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1372 WRITEBGR15 |
3344 | 1373 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1374 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1375 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1376 ); |
6578 | 1377 return; |
1378 case IMGFMT_BGR16: | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1379 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1380 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1381 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1382 #ifdef DITHER1XBPP |
4248 | 1383 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1384 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1385 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1386 #endif |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1387 |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1388 WRITEBGR16 |
3344 | 1389 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1390 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1391 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1392 ); |
6578 | 1393 return; |
7723 | 1394 case IMGFMT_YUY2: |
1395 asm volatile( | |
1396 YSCALEYUV2PACKED1b | |
1397 WRITEYUY2 | |
1398 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), | |
1399 "m" (yalpha1), "m" (uvalpha1) | |
1400 : "%eax" | |
1401 ); | |
1402 return; | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1403 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1404 } |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1405 #endif |
6578 | 1406 if( uvalpha < 2048 ) |
1407 { | |
7723 | 1408 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1_C, YSCALE_YUV_2_PACKED1_C) |
6578 | 1409 }else{ |
7723 | 1410 YSCALE_YUV_2_ANYRGB_C(YSCALE_YUV_2_RGB1B_C, YSCALE_YUV_2_PACKED1B_C) |
6578 | 1411 } |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1412 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1413 |
4481 | 1414 //FIXME yuy2* can read upto 7 samples to much |
1415 | |
4467 | 1416 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width) |
1417 { | |
4481 | 1418 #ifdef HAVE_MMX |
1419 asm volatile( | |
1420 "movq "MANGLE(bm01010101)", %%mm2\n\t" | |
1421 "movl %0, %%eax \n\t" | |
1422 "1: \n\t" | |
1423 "movq (%1, %%eax,2), %%mm0 \n\t" | |
1424 "movq 8(%1, %%eax,2), %%mm1 \n\t" | |
1425 "pand %%mm2, %%mm0 \n\t" | |
1426 "pand %%mm2, %%mm1 \n\t" | |
1427 "packuswb %%mm1, %%mm0 \n\t" | |
1428 "movq %%mm0, (%2, %%eax) \n\t" | |
1429 "addl $8, %%eax \n\t" | |
1430 " js 1b \n\t" | |
1431 : : "g" (-width), "r" (src+width*2), "r" (dst+width) | |
1432 : "%eax" | |
1433 ); | |
4467 | 1434 #else |
1435 int i; | |
1436 for(i=0; i<width; i++) | |
1437 dst[i]= src[2*i]; | |
1438 #endif | |
1439 } | |
1440 | |
1441 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1442 { | |
4481 | 1443 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
1444 asm volatile( | |
1445 "movq "MANGLE(bm01010101)", %%mm4\n\t" | |
1446 "movl %0, %%eax \n\t" | |
1447 "1: \n\t" | |
1448 "movq (%1, %%eax,4), %%mm0 \n\t" | |
1449 "movq 8(%1, %%eax,4), %%mm1 \n\t" | |
1450 "movq (%2, %%eax,4), %%mm2 \n\t" | |
1451 "movq 8(%2, %%eax,4), %%mm3 \n\t" | |
1452 PAVGB(%%mm2, %%mm0) | |
1453 PAVGB(%%mm3, %%mm1) | |
1454 "psrlw $8, %%mm0 \n\t" | |
1455 "psrlw $8, %%mm1 \n\t" | |
1456 "packuswb %%mm1, %%mm0 \n\t" | |
1457 "movq %%mm0, %%mm1 \n\t" | |
1458 "psrlw $8, %%mm0 \n\t" | |
1459 "pand %%mm4, %%mm1 \n\t" | |
1460 "packuswb %%mm0, %%mm0 \n\t" | |
1461 "packuswb %%mm1, %%mm1 \n\t" | |
1462 "movd %%mm0, (%4, %%eax) \n\t" | |
1463 "movd %%mm1, (%3, %%eax) \n\t" | |
1464 "addl $4, %%eax \n\t" | |
1465 " js 1b \n\t" | |
1466 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) | |
1467 : "%eax" | |
1468 ); | |
4467 | 1469 #else |
1470 int i; | |
1471 for(i=0; i<width; i++) | |
1472 { | |
1473 dstU[i]= (src1[4*i + 1] + src2[4*i + 1])>>1; | |
1474 dstV[i]= (src1[4*i + 3] + src2[4*i + 3])>>1; | |
1475 } | |
1476 #endif | |
1477 } | |
1478 | |
9071 | 1479 //this is allmost identical to the previous, end exists only cuz yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses |
1480 static inline void RENAME(uyvyToY)(uint8_t *dst, uint8_t *src, int width) | |
1481 { | |
1482 #ifdef HAVE_MMX | |
1483 asm volatile( | |
1484 "movl %0, %%eax \n\t" | |
1485 "1: \n\t" | |
1486 "movq (%1, %%eax,2), %%mm0 \n\t" | |
1487 "movq 8(%1, %%eax,2), %%mm1 \n\t" | |
1488 "psrlw $8, %%mm0 \n\t" | |
1489 "psrlw $8, %%mm1 \n\t" | |
1490 "packuswb %%mm1, %%mm0 \n\t" | |
1491 "movq %%mm0, (%2, %%eax) \n\t" | |
1492 "addl $8, %%eax \n\t" | |
1493 " js 1b \n\t" | |
1494 : : "g" (-width), "r" (src+width*2), "r" (dst+width) | |
1495 : "%eax" | |
1496 ); | |
1497 #else | |
1498 int i; | |
1499 for(i=0; i<width; i++) | |
1500 dst[i]= src[2*i+1]; | |
1501 #endif | |
1502 } | |
1503 | |
1504 static inline void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1505 { | |
1506 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1507 asm volatile( | |
1508 "movq "MANGLE(bm01010101)", %%mm4\n\t" | |
1509 "movl %0, %%eax \n\t" | |
1510 "1: \n\t" | |
1511 "movq (%1, %%eax,4), %%mm0 \n\t" | |
1512 "movq 8(%1, %%eax,4), %%mm1 \n\t" | |
1513 "movq (%2, %%eax,4), %%mm2 \n\t" | |
1514 "movq 8(%2, %%eax,4), %%mm3 \n\t" | |
1515 PAVGB(%%mm2, %%mm0) | |
1516 PAVGB(%%mm3, %%mm1) | |
1517 "pand %%mm4, %%mm0 \n\t" | |
1518 "pand %%mm4, %%mm1 \n\t" | |
1519 "packuswb %%mm1, %%mm0 \n\t" | |
1520 "movq %%mm0, %%mm1 \n\t" | |
1521 "psrlw $8, %%mm0 \n\t" | |
1522 "pand %%mm4, %%mm1 \n\t" | |
1523 "packuswb %%mm0, %%mm0 \n\t" | |
1524 "packuswb %%mm1, %%mm1 \n\t" | |
1525 "movd %%mm0, (%4, %%eax) \n\t" | |
1526 "movd %%mm1, (%3, %%eax) \n\t" | |
1527 "addl $4, %%eax \n\t" | |
1528 " js 1b \n\t" | |
1529 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) | |
1530 : "%eax" | |
1531 ); | |
1532 #else | |
1533 int i; | |
1534 for(i=0; i<width; i++) | |
1535 { | |
1536 dstU[i]= (src1[4*i + 0] + src2[4*i + 0])>>1; | |
1537 dstV[i]= (src1[4*i + 2] + src2[4*i + 2])>>1; | |
1538 } | |
1539 #endif | |
1540 } | |
1541 | |
4467 | 1542 static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width) |
1543 { | |
1544 #ifdef HAVE_MMXFIXME | |
1545 #else | |
1546 int i; | |
1547 for(i=0; i<width; i++) | |
1548 { | |
1549 int b= src[i*4+0]; | |
1550 int g= src[i*4+1]; | |
1551 int r= src[i*4+2]; | |
1552 | |
1553 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1554 } | |
1555 #endif | |
1556 } | |
1557 | |
1558 static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1559 { | |
1560 #ifdef HAVE_MMXFIXME | |
1561 #else | |
1562 int i; | |
1563 for(i=0; i<width; i++) | |
1564 { | |
1565 int b= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4]; | |
1566 int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5]; | |
1567 int r= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6]; | |
1568 | |
1569 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1570 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1571 } | |
1572 #endif | |
1573 } | |
1574 | |
1575 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, int width) | |
1576 { | |
4612 | 1577 #ifdef HAVE_MMX |
1578 asm volatile( | |
1579 "movl %2, %%eax \n\t" | |
4923 | 1580 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" |
1581 "movq "MANGLE(w1111)", %%mm5 \n\t" | |
4612 | 1582 "pxor %%mm7, %%mm7 \n\t" |
1583 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
1584 ".balign 16 \n\t" | |
1585 "1: \n\t" | |
1586 PREFETCH" 64(%0, %%ebx) \n\t" | |
1587 "movd (%0, %%ebx), %%mm0 \n\t" | |
1588 "movd 3(%0, %%ebx), %%mm1 \n\t" | |
1589 "punpcklbw %%mm7, %%mm0 \n\t" | |
1590 "punpcklbw %%mm7, %%mm1 \n\t" | |
1591 "movd 6(%0, %%ebx), %%mm2 \n\t" | |
1592 "movd 9(%0, %%ebx), %%mm3 \n\t" | |
1593 "punpcklbw %%mm7, %%mm2 \n\t" | |
1594 "punpcklbw %%mm7, %%mm3 \n\t" | |
1595 "pmaddwd %%mm6, %%mm0 \n\t" | |
1596 "pmaddwd %%mm6, %%mm1 \n\t" | |
1597 "pmaddwd %%mm6, %%mm2 \n\t" | |
1598 "pmaddwd %%mm6, %%mm3 \n\t" | |
1599 #ifndef FAST_BGR2YV12 | |
1600 "psrad $8, %%mm0 \n\t" | |
1601 "psrad $8, %%mm1 \n\t" | |
1602 "psrad $8, %%mm2 \n\t" | |
1603 "psrad $8, %%mm3 \n\t" | |
1604 #endif | |
1605 "packssdw %%mm1, %%mm0 \n\t" | |
1606 "packssdw %%mm3, %%mm2 \n\t" | |
1607 "pmaddwd %%mm5, %%mm0 \n\t" | |
1608 "pmaddwd %%mm5, %%mm2 \n\t" | |
1609 "packssdw %%mm2, %%mm0 \n\t" | |
1610 "psraw $7, %%mm0 \n\t" | |
1611 | |
1612 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
1613 "movd 15(%0, %%ebx), %%mm1 \n\t" | |
1614 "punpcklbw %%mm7, %%mm4 \n\t" | |
1615 "punpcklbw %%mm7, %%mm1 \n\t" | |
1616 "movd 18(%0, %%ebx), %%mm2 \n\t" | |
1617 "movd 21(%0, %%ebx), %%mm3 \n\t" | |
1618 "punpcklbw %%mm7, %%mm2 \n\t" | |
1619 "punpcklbw %%mm7, %%mm3 \n\t" | |
1620 "pmaddwd %%mm6, %%mm4 \n\t" | |
1621 "pmaddwd %%mm6, %%mm1 \n\t" | |
1622 "pmaddwd %%mm6, %%mm2 \n\t" | |
1623 "pmaddwd %%mm6, %%mm3 \n\t" | |
1624 #ifndef FAST_BGR2YV12 | |
1625 "psrad $8, %%mm4 \n\t" | |
1626 "psrad $8, %%mm1 \n\t" | |
1627 "psrad $8, %%mm2 \n\t" | |
1628 "psrad $8, %%mm3 \n\t" | |
1629 #endif | |
1630 "packssdw %%mm1, %%mm4 \n\t" | |
1631 "packssdw %%mm3, %%mm2 \n\t" | |
1632 "pmaddwd %%mm5, %%mm4 \n\t" | |
1633 "pmaddwd %%mm5, %%mm2 \n\t" | |
1634 "addl $24, %%ebx \n\t" | |
1635 "packssdw %%mm2, %%mm4 \n\t" | |
1636 "psraw $7, %%mm4 \n\t" | |
1637 | |
1638 "packuswb %%mm4, %%mm0 \n\t" | |
4923 | 1639 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" |
4612 | 1640 |
4619 | 1641 "movq %%mm0, (%1, %%eax) \n\t" |
4612 | 1642 "addl $8, %%eax \n\t" |
1643 " js 1b \n\t" | |
1644 : : "r" (src+width*3), "r" (dst+width), "g" (-width) | |
1645 : "%eax", "%ebx" | |
1646 ); | |
4467 | 1647 #else |
1648 int i; | |
1649 for(i=0; i<width; i++) | |
1650 { | |
1651 int b= src[i*3+0]; | |
1652 int g= src[i*3+1]; | |
1653 int r= src[i*3+2]; | |
1654 | |
1655 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1656 } | |
1657 #endif | |
1658 } | |
1659 | |
1660 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1661 { | |
4619 | 1662 #ifdef HAVE_MMX |
1663 asm volatile( | |
1664 "movl %4, %%eax \n\t" | |
4923 | 1665 "movq "MANGLE(w1111)", %%mm5 \n\t" |
1666 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | |
4619 | 1667 "pxor %%mm7, %%mm7 \n\t" |
1668 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
1669 "addl %%ebx, %%ebx \n\t" | |
1670 ".balign 16 \n\t" | |
1671 "1: \n\t" | |
1672 PREFETCH" 64(%0, %%ebx) \n\t" | |
1673 PREFETCH" 64(%1, %%ebx) \n\t" | |
1674 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1675 "movq (%0, %%ebx), %%mm0 \n\t" | |
1676 "movq (%1, %%ebx), %%mm1 \n\t" | |
1677 "movq 6(%0, %%ebx), %%mm2 \n\t" | |
1678 "movq 6(%1, %%ebx), %%mm3 \n\t" | |
1679 PAVGB(%%mm1, %%mm0) | |
1680 PAVGB(%%mm3, %%mm2) | |
1681 "movq %%mm0, %%mm1 \n\t" | |
1682 "movq %%mm2, %%mm3 \n\t" | |
1683 "psrlq $24, %%mm0 \n\t" | |
1684 "psrlq $24, %%mm2 \n\t" | |
1685 PAVGB(%%mm1, %%mm0) | |
1686 PAVGB(%%mm3, %%mm2) | |
1687 "punpcklbw %%mm7, %%mm0 \n\t" | |
1688 "punpcklbw %%mm7, %%mm2 \n\t" | |
1689 #else | |
1690 "movd (%0, %%ebx), %%mm0 \n\t" | |
1691 "movd (%1, %%ebx), %%mm1 \n\t" | |
1692 "movd 3(%0, %%ebx), %%mm2 \n\t" | |
1693 "movd 3(%1, %%ebx), %%mm3 \n\t" | |
1694 "punpcklbw %%mm7, %%mm0 \n\t" | |
1695 "punpcklbw %%mm7, %%mm1 \n\t" | |
1696 "punpcklbw %%mm7, %%mm2 \n\t" | |
1697 "punpcklbw %%mm7, %%mm3 \n\t" | |
1698 "paddw %%mm1, %%mm0 \n\t" | |
1699 "paddw %%mm3, %%mm2 \n\t" | |
1700 "paddw %%mm2, %%mm0 \n\t" | |
1701 "movd 6(%0, %%ebx), %%mm4 \n\t" | |
1702 "movd 6(%1, %%ebx), %%mm1 \n\t" | |
1703 "movd 9(%0, %%ebx), %%mm2 \n\t" | |
1704 "movd 9(%1, %%ebx), %%mm3 \n\t" | |
1705 "punpcklbw %%mm7, %%mm4 \n\t" | |
1706 "punpcklbw %%mm7, %%mm1 \n\t" | |
1707 "punpcklbw %%mm7, %%mm2 \n\t" | |
1708 "punpcklbw %%mm7, %%mm3 \n\t" | |
1709 "paddw %%mm1, %%mm4 \n\t" | |
1710 "paddw %%mm3, %%mm2 \n\t" | |
1711 "paddw %%mm4, %%mm2 \n\t" | |
1712 "psrlw $2, %%mm0 \n\t" | |
1713 "psrlw $2, %%mm2 \n\t" | |
1714 #endif | |
4923 | 1715 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
1716 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4619 | 1717 |
1718 "pmaddwd %%mm0, %%mm1 \n\t" | |
1719 "pmaddwd %%mm2, %%mm3 \n\t" | |
1720 "pmaddwd %%mm6, %%mm0 \n\t" | |
1721 "pmaddwd %%mm6, %%mm2 \n\t" | |
1722 #ifndef FAST_BGR2YV12 | |
1723 "psrad $8, %%mm0 \n\t" | |
1724 "psrad $8, %%mm1 \n\t" | |
1725 "psrad $8, %%mm2 \n\t" | |
1726 "psrad $8, %%mm3 \n\t" | |
1727 #endif | |
1728 "packssdw %%mm2, %%mm0 \n\t" | |
1729 "packssdw %%mm3, %%mm1 \n\t" | |
1730 "pmaddwd %%mm5, %%mm0 \n\t" | |
1731 "pmaddwd %%mm5, %%mm1 \n\t" | |
1732 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 | |
1733 "psraw $7, %%mm0 \n\t" | |
1734 | |
1735 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1736 "movq 12(%0, %%ebx), %%mm4 \n\t" | |
1737 "movq 12(%1, %%ebx), %%mm1 \n\t" | |
1738 "movq 18(%0, %%ebx), %%mm2 \n\t" | |
1739 "movq 18(%1, %%ebx), %%mm3 \n\t" | |
1740 PAVGB(%%mm1, %%mm4) | |
1741 PAVGB(%%mm3, %%mm2) | |
1742 "movq %%mm4, %%mm1 \n\t" | |
1743 "movq %%mm2, %%mm3 \n\t" | |
1744 "psrlq $24, %%mm4 \n\t" | |
1745 "psrlq $24, %%mm2 \n\t" | |
1746 PAVGB(%%mm1, %%mm4) | |
1747 PAVGB(%%mm3, %%mm2) | |
1748 "punpcklbw %%mm7, %%mm4 \n\t" | |
1749 "punpcklbw %%mm7, %%mm2 \n\t" | |
1750 #else | |
1751 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
1752 "movd 12(%1, %%ebx), %%mm1 \n\t" | |
1753 "movd 15(%0, %%ebx), %%mm2 \n\t" | |
1754 "movd 15(%1, %%ebx), %%mm3 \n\t" | |
1755 "punpcklbw %%mm7, %%mm4 \n\t" | |
1756 "punpcklbw %%mm7, %%mm1 \n\t" | |
1757 "punpcklbw %%mm7, %%mm2 \n\t" | |
1758 "punpcklbw %%mm7, %%mm3 \n\t" | |
1759 "paddw %%mm1, %%mm4 \n\t" | |
1760 "paddw %%mm3, %%mm2 \n\t" | |
1761 "paddw %%mm2, %%mm4 \n\t" | |
1762 "movd 18(%0, %%ebx), %%mm5 \n\t" | |
1763 "movd 18(%1, %%ebx), %%mm1 \n\t" | |
1764 "movd 21(%0, %%ebx), %%mm2 \n\t" | |
1765 "movd 21(%1, %%ebx), %%mm3 \n\t" | |
1766 "punpcklbw %%mm7, %%mm5 \n\t" | |
1767 "punpcklbw %%mm7, %%mm1 \n\t" | |
1768 "punpcklbw %%mm7, %%mm2 \n\t" | |
1769 "punpcklbw %%mm7, %%mm3 \n\t" | |
1770 "paddw %%mm1, %%mm5 \n\t" | |
1771 "paddw %%mm3, %%mm2 \n\t" | |
1772 "paddw %%mm5, %%mm2 \n\t" | |
4923 | 1773 "movq "MANGLE(w1111)", %%mm5 \n\t" |
4619 | 1774 "psrlw $2, %%mm4 \n\t" |
1775 "psrlw $2, %%mm2 \n\t" | |
1776 #endif | |
4923 | 1777 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
1778 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4619 | 1779 |
1780 "pmaddwd %%mm4, %%mm1 \n\t" | |
1781 "pmaddwd %%mm2, %%mm3 \n\t" | |
1782 "pmaddwd %%mm6, %%mm4 \n\t" | |
1783 "pmaddwd %%mm6, %%mm2 \n\t" | |
1784 #ifndef FAST_BGR2YV12 | |
1785 "psrad $8, %%mm4 \n\t" | |
1786 "psrad $8, %%mm1 \n\t" | |
1787 "psrad $8, %%mm2 \n\t" | |
1788 "psrad $8, %%mm3 \n\t" | |
1789 #endif | |
1790 "packssdw %%mm2, %%mm4 \n\t" | |
1791 "packssdw %%mm3, %%mm1 \n\t" | |
1792 "pmaddwd %%mm5, %%mm4 \n\t" | |
1793 "pmaddwd %%mm5, %%mm1 \n\t" | |
1794 "addl $24, %%ebx \n\t" | |
1795 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 | |
1796 "psraw $7, %%mm4 \n\t" | |
1797 | |
1798 "movq %%mm0, %%mm1 \n\t" | |
1799 "punpckldq %%mm4, %%mm0 \n\t" | |
1800 "punpckhdq %%mm4, %%mm1 \n\t" | |
1801 "packsswb %%mm1, %%mm0 \n\t" | |
4923 | 1802 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" |
4619 | 1803 |
1804 "movd %%mm0, (%2, %%eax) \n\t" | |
1805 "punpckhdq %%mm0, %%mm0 \n\t" | |
1806 "movd %%mm0, (%3, %%eax) \n\t" | |
1807 "addl $4, %%eax \n\t" | |
1808 " js 1b \n\t" | |
1809 : : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width) | |
1810 : "%eax", "%ebx" | |
1811 ); | |
4467 | 1812 #else |
1813 int i; | |
1814 for(i=0; i<width; i++) | |
1815 { | |
1816 int b= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3]; | |
1817 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4]; | |
1818 int r= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5]; | |
1819 | |
1820 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1821 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1822 } | |
1823 #endif | |
1824 } | |
1825 | |
4578 | 1826 static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width) |
1827 { | |
1828 int i; | |
1829 for(i=0; i<width; i++) | |
1830 { | |
1831 int d= src[i*2] + (src[i*2+1]<<8); | |
1832 int b= d&0x1F; | |
1833 int g= (d>>5)&0x3F; | |
1834 int r= (d>>11)&0x1F; | |
1835 | |
1836 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16; | |
1837 } | |
1838 } | |
1839 | |
1840 static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1841 { | |
1842 int i; | |
1843 for(i=0; i<width; i++) | |
1844 { | |
4579 | 1845 #if 1 |
1846 int d0= le2me_32( ((uint32_t*)src1)[i] ); | |
1847 int d1= le2me_32( ((uint32_t*)src2)[i] ); | |
1848 | |
1849 int dl= (d0&0x07E0F81F) + (d1&0x07E0F81F); | |
1850 int dh= ((d0>>5)&0x07C0F83F) + ((d1>>5)&0x07C0F83F); | |
1851 | |
1852 int dh2= (dh>>11) + (dh<<21); | |
1853 int d= dh2 + dl; | |
1854 | |
1855 int b= d&0x7F; | |
1856 int r= (d>>11)&0x7F; | |
1857 int g= d>>21; | |
1858 #else | |
4578 | 1859 int d0= src1[i*4] + (src1[i*4+1]<<8); |
1860 int b0= d0&0x1F; | |
1861 int g0= (d0>>5)&0x3F; | |
1862 int r0= (d0>>11)&0x1F; | |
1863 | |
1864 int d1= src1[i*4+2] + (src1[i*4+3]<<8); | |
1865 int b1= d1&0x1F; | |
1866 int g1= (d1>>5)&0x3F; | |
1867 int r1= (d1>>11)&0x1F; | |
1868 | |
1869 int d2= src2[i*4] + (src2[i*4+1]<<8); | |
1870 int b2= d2&0x1F; | |
1871 int g2= (d2>>5)&0x3F; | |
1872 int r2= (d2>>11)&0x1F; | |
1873 | |
1874 int d3= src2[i*4+2] + (src2[i*4+3]<<8); | |
1875 int b3= d3&0x1F; | |
1876 int g3= (d3>>5)&0x3F; | |
1877 int r3= (d3>>11)&0x1F; | |
1878 | |
1879 int b= b0 + b1 + b2 + b3; | |
1880 int g= g0 + g1 + g2 + g3; | |
1881 int r= r0 + r1 + r2 + r3; | |
4579 | 1882 #endif |
4578 | 1883 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+2-2)) + 128; |
1884 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+2-2)) + 128; | |
1885 } | |
1886 } | |
1887 | |
4580 | 1888 static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width) |
1889 { | |
1890 int i; | |
1891 for(i=0; i<width; i++) | |
1892 { | |
1893 int d= src[i*2] + (src[i*2+1]<<8); | |
1894 int b= d&0x1F; | |
1895 int g= (d>>5)&0x1F; | |
1896 int r= (d>>10)&0x1F; | |
1897 | |
1898 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16; | |
1899 } | |
1900 } | |
1901 | |
1902 static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1903 { | |
1904 int i; | |
1905 for(i=0; i<width; i++) | |
1906 { | |
1907 #if 1 | |
1908 int d0= le2me_32( ((uint32_t*)src1)[i] ); | |
1909 int d1= le2me_32( ((uint32_t*)src2)[i] ); | |
1910 | |
1911 int dl= (d0&0x03E07C1F) + (d1&0x03E07C1F); | |
1912 int dh= ((d0>>5)&0x03E0F81F) + ((d1>>5)&0x03E0F81F); | |
1913 | |
1914 int dh2= (dh>>11) + (dh<<21); | |
1915 int d= dh2 + dl; | |
1916 | |
1917 int b= d&0x7F; | |
1918 int r= (d>>10)&0x7F; | |
1919 int g= d>>21; | |
1920 #else | |
1921 int d0= src1[i*4] + (src1[i*4+1]<<8); | |
1922 int b0= d0&0x1F; | |
1923 int g0= (d0>>5)&0x1F; | |
1924 int r0= (d0>>10)&0x1F; | |
1925 | |
1926 int d1= src1[i*4+2] + (src1[i*4+3]<<8); | |
1927 int b1= d1&0x1F; | |
1928 int g1= (d1>>5)&0x1F; | |
1929 int r1= (d1>>10)&0x1F; | |
1930 | |
1931 int d2= src2[i*4] + (src2[i*4+1]<<8); | |
1932 int b2= d2&0x1F; | |
1933 int g2= (d2>>5)&0x1F; | |
1934 int r2= (d2>>10)&0x1F; | |
1935 | |
1936 int d3= src2[i*4+2] + (src2[i*4+3]<<8); | |
1937 int b3= d3&0x1F; | |
1938 int g3= (d3>>5)&0x1F; | |
1939 int r3= (d3>>10)&0x1F; | |
1940 | |
1941 int b= b0 + b1 + b2 + b3; | |
1942 int g= g0 + g1 + g2 + g3; | |
1943 int r= r0 + r1 + r2 + r3; | |
1944 #endif | |
1945 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2-3)) + 128; | |
1946 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2-3)) + 128; | |
1947 } | |
1948 } | |
1949 | |
1950 | |
4558 | 1951 static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width) |
1952 { | |
1953 int i; | |
1954 for(i=0; i<width; i++) | |
1955 { | |
1956 int r= src[i*4+0]; | |
1957 int g= src[i*4+1]; | |
1958 int b= src[i*4+2]; | |
1959 | |
1960 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1961 } | |
1962 } | |
1963 | |
1964 static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1965 { | |
1966 int i; | |
1967 for(i=0; i<width; i++) | |
1968 { | |
1969 int r= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4]; | |
1970 int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5]; | |
1971 int b= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6]; | |
1972 | |
1973 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1974 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1975 } | |
1976 } | |
1977 | |
1978 static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width) | |
1979 { | |
1980 int i; | |
1981 for(i=0; i<width; i++) | |
1982 { | |
1983 int r= src[i*3+0]; | |
1984 int g= src[i*3+1]; | |
1985 int b= src[i*3+2]; | |
1986 | |
1987 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1988 } | |
1989 } | |
1990 | |
1991 static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1992 { | |
1993 int i; | |
1994 for(i=0; i<width; i++) | |
1995 { | |
1996 int r= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3]; | |
1997 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4]; | |
1998 int b= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5]; | |
1999 | |
2000 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2001 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2002 } | |
2003 } | |
2004 | |
4467 | 2005 |
3272 | 2006 // Bilinear / Bicubic scaling |
2007 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, | |
2008 int16_t *filter, int16_t *filterPos, int filterSize) | |
2009 { | |
2010 #ifdef HAVE_MMX | |
2011 if(filterSize==4) // allways true for upscaling, sometimes for down too | |
2012 { | |
2013 int counter= -2*dstW; | |
2014 filter-= counter*2; | |
2015 filterPos-= counter/2; | |
2016 dst-= counter/2; | |
2017 asm volatile( | |
2018 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2019 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2020 "pushl %%ebp \n\t" // we use 7 regs here ... |
2021 "movl %%eax, %%ebp \n\t" | |
2022 ".balign 16 \n\t" | |
2023 "1: \n\t" | |
2024 "movzwl (%2, %%ebp), %%eax \n\t" | |
2025 "movzwl 2(%2, %%ebp), %%ebx \n\t" | |
2026 "movq (%1, %%ebp, 4), %%mm1 \n\t" | |
2027 "movq 8(%1, %%ebp, 4), %%mm3 \n\t" | |
2028 "movd (%3, %%eax), %%mm0 \n\t" | |
2029 "movd (%3, %%ebx), %%mm2 \n\t" | |
2030 "punpcklbw %%mm7, %%mm0 \n\t" | |
2031 "punpcklbw %%mm7, %%mm2 \n\t" | |
2032 "pmaddwd %%mm1, %%mm0 \n\t" | |
2033 "pmaddwd %%mm2, %%mm3 \n\t" | |
2034 "psrad $8, %%mm0 \n\t" | |
2035 "psrad $8, %%mm3 \n\t" | |
2036 "packssdw %%mm3, %%mm0 \n\t" | |
2037 "pmaddwd %%mm6, %%mm0 \n\t" | |
2038 "packssdw %%mm0, %%mm0 \n\t" | |
2039 "movd %%mm0, (%4, %%ebp) \n\t" | |
2040 "addl $4, %%ebp \n\t" | |
2041 " jnc 1b \n\t" | |
3352 | 2042 |
3272 | 2043 "popl %%ebp \n\t" |
2044 : "+a" (counter) | |
2045 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) | |
2046 : "%ebx" | |
2047 ); | |
2048 } | |
2049 else if(filterSize==8) | |
2050 { | |
2051 int counter= -2*dstW; | |
2052 filter-= counter*4; | |
2053 filterPos-= counter/2; | |
2054 dst-= counter/2; | |
2055 asm volatile( | |
2056 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2057 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2058 "pushl %%ebp \n\t" // we use 7 regs here ... |
2059 "movl %%eax, %%ebp \n\t" | |
2060 ".balign 16 \n\t" | |
2061 "1: \n\t" | |
2062 "movzwl (%2, %%ebp), %%eax \n\t" | |
2063 "movzwl 2(%2, %%ebp), %%ebx \n\t" | |
2064 "movq (%1, %%ebp, 8), %%mm1 \n\t" | |
2065 "movq 16(%1, %%ebp, 8), %%mm3 \n\t" | |
2066 "movd (%3, %%eax), %%mm0 \n\t" | |
2067 "movd (%3, %%ebx), %%mm2 \n\t" | |
2068 "punpcklbw %%mm7, %%mm0 \n\t" | |
2069 "punpcklbw %%mm7, %%mm2 \n\t" | |
2070 "pmaddwd %%mm1, %%mm0 \n\t" | |
2071 "pmaddwd %%mm2, %%mm3 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
2072 |
3272 | 2073 "movq 8(%1, %%ebp, 8), %%mm1 \n\t" |
2074 "movq 24(%1, %%ebp, 8), %%mm5 \n\t" | |
2075 "movd 4(%3, %%eax), %%mm4 \n\t" | |
2076 "movd 4(%3, %%ebx), %%mm2 \n\t" | |
2077 "punpcklbw %%mm7, %%mm4 \n\t" | |
2078 "punpcklbw %%mm7, %%mm2 \n\t" | |
2079 "pmaddwd %%mm1, %%mm4 \n\t" | |
2080 "pmaddwd %%mm2, %%mm5 \n\t" | |
2081 "paddd %%mm4, %%mm0 \n\t" | |
2082 "paddd %%mm5, %%mm3 \n\t" | |
2083 | |
2084 "psrad $8, %%mm0 \n\t" | |
2085 "psrad $8, %%mm3 \n\t" | |
2086 "packssdw %%mm3, %%mm0 \n\t" | |
2087 "pmaddwd %%mm6, %%mm0 \n\t" | |
2088 "packssdw %%mm0, %%mm0 \n\t" | |
2089 "movd %%mm0, (%4, %%ebp) \n\t" | |
2090 "addl $4, %%ebp \n\t" | |
2091 " jnc 1b \n\t" | |
3344 | 2092 |
3272 | 2093 "popl %%ebp \n\t" |
2094 : "+a" (counter) | |
2095 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) | |
2096 : "%ebx" | |
2097 ); | |
2098 } | |
2099 else | |
2100 { | |
2101 int counter= -2*dstW; | |
2102 // filter-= counter*filterSize/2; | |
2103 filterPos-= counter/2; | |
2104 dst-= counter/2; | |
2105 asm volatile( | |
2106 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2107 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2108 ".balign 16 \n\t" |
2109 "1: \n\t" | |
2110 "movl %2, %%ecx \n\t" | |
2111 "movzwl (%%ecx, %0), %%eax \n\t" | |
2112 "movzwl 2(%%ecx, %0), %%ebx \n\t" | |
2113 "movl %5, %%ecx \n\t" | |
2114 "pxor %%mm4, %%mm4 \n\t" | |
2115 "pxor %%mm5, %%mm5 \n\t" | |
2116 "2: \n\t" | |
2117 "movq (%1), %%mm1 \n\t" | |
2118 "movq (%1, %6), %%mm3 \n\t" | |
2119 "movd (%%ecx, %%eax), %%mm0 \n\t" | |
2120 "movd (%%ecx, %%ebx), %%mm2 \n\t" | |
2121 "punpcklbw %%mm7, %%mm0 \n\t" | |
2122 "punpcklbw %%mm7, %%mm2 \n\t" | |
2123 "pmaddwd %%mm1, %%mm0 \n\t" | |
2124 "pmaddwd %%mm2, %%mm3 \n\t" | |
2125 "paddd %%mm3, %%mm5 \n\t" | |
2126 "paddd %%mm0, %%mm4 \n\t" | |
2127 "addl $8, %1 \n\t" | |
2128 "addl $4, %%ecx \n\t" | |
2129 "cmpl %4, %%ecx \n\t" | |
2130 " jb 2b \n\t" | |
2131 "addl %6, %1 \n\t" | |
2132 "psrad $8, %%mm4 \n\t" | |
2133 "psrad $8, %%mm5 \n\t" | |
2134 "packssdw %%mm5, %%mm4 \n\t" | |
2135 "pmaddwd %%mm6, %%mm4 \n\t" | |
2136 "packssdw %%mm4, %%mm4 \n\t" | |
2137 "movl %3, %%eax \n\t" | |
2138 "movd %%mm4, (%%eax, %0) \n\t" | |
2139 "addl $4, %0 \n\t" | |
2140 " jnc 1b \n\t" | |
3344 | 2141 |
3641 | 2142 : "+r" (counter), "+r" (filter) |
2143 : "m" (filterPos), "m" (dst), "m"(src+filterSize), | |
3272 | 2144 "m" (src), "r" (filterSize*2) |
3299 | 2145 : "%ebx", "%eax", "%ecx" |
3272 | 2146 ); |
2147 } | |
2148 #else | |
2149 int i; | |
2150 for(i=0; i<dstW; i++) | |
2151 { | |
2152 int j; | |
2153 int srcPos= filterPos[i]; | |
2154 int val=0; | |
3344 | 2155 // printf("filterPos: %d\n", filterPos[i]); |
3272 | 2156 for(j=0; j<filterSize; j++) |
2157 { | |
2158 // printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]); | |
2159 val += ((int)src[srcPos + j])*filter[filterSize*i + j]; | |
2160 } | |
2161 // filter += hFilterSize; | |
2162 dst[i] = MIN(MAX(0, val>>7), (1<<15)-1); // the cubic equation does overflow ... | |
2163 // dst[i] = val>>7; | |
2164 } | |
2165 #endif | |
2166 } | |
2167 // *** horizontal scale Y line to temp buffer | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2168 static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2169 int flags, int canMMX2BeUsed, int16_t *hLumFilter, |
4467 | 2170 int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode, |
5452 | 2171 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, |
2172 int32_t *mmx2FilterPos) | |
2469 | 2173 { |
4467 | 2174 if(srcFormat==IMGFMT_YUY2) |
2175 { | |
2176 RENAME(yuy2ToY)(formatConvBuffer, src, srcW); | |
2177 src= formatConvBuffer; | |
2178 } | |
9071 | 2179 else if(srcFormat==IMGFMT_UYVY) |
2180 { | |
2181 RENAME(uyvyToY)(formatConvBuffer, src, srcW); | |
2182 src= formatConvBuffer; | |
2183 } | |
4467 | 2184 else if(srcFormat==IMGFMT_BGR32) |
2185 { | |
2186 RENAME(bgr32ToY)(formatConvBuffer, src, srcW); | |
2187 src= formatConvBuffer; | |
2188 } | |
2189 else if(srcFormat==IMGFMT_BGR24) | |
2190 { | |
2191 RENAME(bgr24ToY)(formatConvBuffer, src, srcW); | |
2192 src= formatConvBuffer; | |
2193 } | |
4578 | 2194 else if(srcFormat==IMGFMT_BGR16) |
2195 { | |
2196 RENAME(bgr16ToY)(formatConvBuffer, src, srcW); | |
2197 src= formatConvBuffer; | |
2198 } | |
4580 | 2199 else if(srcFormat==IMGFMT_BGR15) |
2200 { | |
2201 RENAME(bgr15ToY)(formatConvBuffer, src, srcW); | |
2202 src= formatConvBuffer; | |
2203 } | |
4558 | 2204 else if(srcFormat==IMGFMT_RGB32) |
2205 { | |
2206 RENAME(rgb32ToY)(formatConvBuffer, src, srcW); | |
2207 src= formatConvBuffer; | |
2208 } | |
2209 else if(srcFormat==IMGFMT_RGB24) | |
2210 { | |
2211 RENAME(rgb24ToY)(formatConvBuffer, src, srcW); | |
2212 src= formatConvBuffer; | |
2213 } | |
4467 | 2214 |
3352 | 2215 #ifdef HAVE_MMX |
2216 // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2217 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) |
3352 | 2218 #else |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2219 if(!(flags&SWS_FAST_BILINEAR)) |
3352 | 2220 #endif |
3272 | 2221 { |
2222 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); | |
2223 } | |
2224 else // Fast Bilinear upscale / crap downscale | |
2225 { | |
2469 | 2226 #ifdef ARCH_X86 |
2227 #ifdef HAVE_MMX2 | |
2671 | 2228 int i; |
2469 | 2229 if(canMMX2BeUsed) |
2230 { | |
2231 asm volatile( | |
2232 "pxor %%mm7, %%mm7 \n\t" | |
5452 | 2233 "movl %0, %%ecx \n\t" |
2234 "movl %1, %%edi \n\t" | |
2235 "movl %2, %%edx \n\t" | |
2236 "movl %3, %%ebx \n\t" | |
2469 | 2237 "xorl %%eax, %%eax \n\t" // i |
5452 | 2238 PREFETCH" (%%ecx) \n\t" |
2239 PREFETCH" 32(%%ecx) \n\t" | |
2240 PREFETCH" 64(%%ecx) \n\t" | |
2520 | 2241 |
2469 | 2242 #define FUNNY_Y_CODE \ |
5452 | 2243 "movl (%%ebx), %%esi \n\t"\ |
2244 "call *%4 \n\t"\ | |
2245 "addl (%%ebx, %%eax), %%ecx \n\t"\ | |
2246 "addl %%eax, %%edi \n\t"\ | |
2247 "xorl %%eax, %%eax \n\t"\ | |
2520 | 2248 |
2469 | 2249 FUNNY_Y_CODE |
2250 FUNNY_Y_CODE | |
2251 FUNNY_Y_CODE | |
2252 FUNNY_Y_CODE | |
2253 FUNNY_Y_CODE | |
2254 FUNNY_Y_CODE | |
2255 FUNNY_Y_CODE | |
2256 FUNNY_Y_CODE | |
2257 | |
5452 | 2258 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos), |
2259 "m" (funnyYCode) | |
2469 | 2260 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" |
2261 ); | |
3215 | 2262 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128; |
2469 | 2263 } |
2264 else | |
2265 { | |
2266 #endif | |
2267 //NO MMX just normal asm ... | |
2268 asm volatile( | |
2269 "xorl %%eax, %%eax \n\t" // i | |
2270 "xorl %%ebx, %%ebx \n\t" // xx | |
2271 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
2272 ".balign 16 \n\t" |
2469 | 2273 "1: \n\t" |
2274 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] | |
2275 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] | |
2276 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2277 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2278 "shll $16, %%edi \n\t" | |
2279 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2280 "movl %1, %%edi \n\t" | |
2281 "shrl $9, %%esi \n\t" | |
2282 "movw %%si, (%%edi, %%eax, 2) \n\t" | |
2283 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2284 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2285 | |
2286 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] | |
2287 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] | |
2288 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2289 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2290 "shll $16, %%edi \n\t" | |
2291 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2292 "movl %1, %%edi \n\t" | |
2293 "shrl $9, %%esi \n\t" | |
2294 "movw %%si, 2(%%edi, %%eax, 2) \n\t" | |
2295 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2296 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2297 | |
2298 | |
2299 "addl $2, %%eax \n\t" | |
2300 "cmpl %2, %%eax \n\t" | |
2301 " jb 1b \n\t" | |
2302 | |
2303 | |
2304 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF) | |
2305 : "%eax", "%ebx", "%ecx", "%edi", "%esi" | |
2306 ); | |
2307 #ifdef HAVE_MMX2 | |
2308 } //if MMX2 cant be used | |
2309 #endif | |
2310 #else | |
2671 | 2311 int i; |
2312 unsigned int xpos=0; | |
2313 for(i=0;i<dstWidth;i++) | |
2314 { | |
2315 register unsigned int xx=xpos>>16; | |
2316 register unsigned int xalpha=(xpos&0xFFFF)>>9; | |
2317 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; | |
2318 xpos+=xInc; | |
2319 } | |
2469 | 2320 #endif |
3272 | 2321 } |
2469 | 2322 } |
2323 | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2324 inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2325 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, |
4467 | 2326 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode, |
5452 | 2327 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, |
2328 int32_t *mmx2FilterPos) | |
2469 | 2329 { |
4467 | 2330 if(srcFormat==IMGFMT_YUY2) |
2331 { | |
2332 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2333 src1= formatConvBuffer; | |
2334 src2= formatConvBuffer+2048; | |
2335 } | |
9071 | 2336 else if(srcFormat==IMGFMT_UYVY) |
2337 { | |
2338 RENAME(uyvyToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2339 src1= formatConvBuffer; | |
2340 src2= formatConvBuffer+2048; | |
2341 } | |
4467 | 2342 else if(srcFormat==IMGFMT_BGR32) |
2343 { | |
2344 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2345 src1= formatConvBuffer; | |
2346 src2= formatConvBuffer+2048; | |
2347 } | |
2348 else if(srcFormat==IMGFMT_BGR24) | |
2349 { | |
2350 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2351 src1= formatConvBuffer; | |
2352 src2= formatConvBuffer+2048; | |
2353 } | |
4578 | 2354 else if(srcFormat==IMGFMT_BGR16) |
2355 { | |
2356 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2357 src1= formatConvBuffer; | |
2358 src2= formatConvBuffer+2048; | |
2359 } | |
4580 | 2360 else if(srcFormat==IMGFMT_BGR15) |
2361 { | |
2362 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2363 src1= formatConvBuffer; | |
2364 src2= formatConvBuffer+2048; | |
2365 } | |
4558 | 2366 else if(srcFormat==IMGFMT_RGB32) |
2367 { | |
2368 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2369 src1= formatConvBuffer; | |
2370 src2= formatConvBuffer+2048; | |
2371 } | |
2372 else if(srcFormat==IMGFMT_RGB24) | |
2373 { | |
2374 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2375 src1= formatConvBuffer; | |
2376 src2= formatConvBuffer+2048; | |
2377 } | |
4481 | 2378 else if(isGray(srcFormat)) |
2379 { | |
2380 return; | |
2381 } | |
4467 | 2382 |
3352 | 2383 #ifdef HAVE_MMX |
2384 // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2385 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) |
3352 | 2386 #else |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2387 if(!(flags&SWS_FAST_BILINEAR)) |
3352 | 2388 #endif |
3272 | 2389 { |
2390 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |
2391 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |
2392 } | |
2393 else // Fast Bilinear upscale / crap downscale | |
2394 { | |
2469 | 2395 #ifdef ARCH_X86 |
2396 #ifdef HAVE_MMX2 | |
2671 | 2397 int i; |
2469 | 2398 if(canMMX2BeUsed) |
2399 { | |
2400 asm volatile( | |
5452 | 2401 "pxor %%mm7, %%mm7 \n\t" |
2402 "movl %0, %%ecx \n\t" | |
2403 "movl %1, %%edi \n\t" | |
2404 "movl %2, %%edx \n\t" | |
2405 "movl %3, %%ebx \n\t" | |
2406 "xorl %%eax, %%eax \n\t" // i | |
2407 PREFETCH" (%%ecx) \n\t" | |
2408 PREFETCH" 32(%%ecx) \n\t" | |
2409 PREFETCH" 64(%%ecx) \n\t" | |
2410 | |
2411 #define FUNNY_UV_CODE \ | |
2412 "movl (%%ebx), %%esi \n\t"\ | |
2413 "call *%4 \n\t"\ | |
2414 "addl (%%ebx, %%eax), %%ecx \n\t"\ | |
2415 "addl %%eax, %%edi \n\t"\ | |
2416 "xorl %%eax, %%eax \n\t"\ | |
2469 | 2417 |
5452 | 2418 FUNNY_UV_CODE |
2419 FUNNY_UV_CODE | |
2420 FUNNY_UV_CODE | |
2421 FUNNY_UV_CODE | |
2422 "xorl %%eax, %%eax \n\t" // i | |
2423 "movl %5, %%ecx \n\t" // src | |
2424 "movl %1, %%edi \n\t" // buf1 | |
2425 "addl $4096, %%edi \n\t" | |
2426 PREFETCH" (%%ecx) \n\t" | |
2427 PREFETCH" 32(%%ecx) \n\t" | |
2428 PREFETCH" 64(%%ecx) \n\t" | |
2469 | 2429 |
5452 | 2430 FUNNY_UV_CODE |
2431 FUNNY_UV_CODE | |
2432 FUNNY_UV_CODE | |
2433 FUNNY_UV_CODE | |
2469 | 2434 |
5452 | 2435 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos), |
2436 "m" (funnyUVCode), "m" (src2) | |
2437 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" | |
2438 ); | |
3344 | 2439 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) |
2469 | 2440 { |
3344 | 2441 // printf("%d %d %d\n", dstWidth, i, srcW); |
2442 dst[i] = src1[srcW-1]*128; | |
2443 dst[i+2048] = src2[srcW-1]*128; | |
2469 | 2444 } |
2445 } | |
2446 else | |
2447 { | |
2448 #endif | |
2449 asm volatile( | |
2450 "xorl %%eax, %%eax \n\t" // i | |
2451 "xorl %%ebx, %%ebx \n\t" // xx | |
2452 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
2453 ".balign 16 \n\t" |
2469 | 2454 "1: \n\t" |
2455 "movl %0, %%esi \n\t" | |
2456 "movzbl (%%esi, %%ebx), %%edi \n\t" //src[xx] | |
2457 "movzbl 1(%%esi, %%ebx), %%esi \n\t" //src[xx+1] | |
2458 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2459 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2460 "shll $16, %%edi \n\t" | |
2461 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2462 "movl %1, %%edi \n\t" | |
2463 "shrl $9, %%esi \n\t" | |
2464 "movw %%si, (%%edi, %%eax, 2) \n\t" | |
2465 | |
2466 "movzbl (%5, %%ebx), %%edi \n\t" //src[xx] | |
2467 "movzbl 1(%5, %%ebx), %%esi \n\t" //src[xx+1] | |
2468 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2469 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2470 "shll $16, %%edi \n\t" | |
2471 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2472 "movl %1, %%edi \n\t" | |
2473 "shrl $9, %%esi \n\t" | |
2474 "movw %%si, 4096(%%edi, %%eax, 2)\n\t" | |
2475 | |
2476 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2477 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2478 "addl $1, %%eax \n\t" | |
2479 "cmpl %2, %%eax \n\t" | |
2480 " jb 1b \n\t" | |
2481 | |
2482 :: "m" (src1), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF), | |
2483 "r" (src2) | |
2484 : "%eax", "%ebx", "%ecx", "%edi", "%esi" | |
2485 ); | |
2486 #ifdef HAVE_MMX2 | |
2487 } //if MMX2 cant be used | |
2488 #endif | |
2489 #else | |
2671 | 2490 int i; |
2491 unsigned int xpos=0; | |
2492 for(i=0;i<dstWidth;i++) | |
2493 { | |
2494 register unsigned int xx=xpos>>16; | |
2495 register unsigned int xalpha=(xpos&0xFFFF)>>9; | |
2496 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); | |
2497 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); | |
2566 | 2498 /* slower |
2499 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; | |
2500 dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; | |
2501 */ | |
2671 | 2502 xpos+=xInc; |
2503 } | |
2469 | 2504 #endif |
3272 | 2505 } |
2506 } | |
2507 | |
4467 | 2508 static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY, |
4698 | 2509 int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){ |
3344 | 2510 |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2511 /* load a few things into local vars to make the code more readable? and faster */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2512 const int srcW= c->srcW; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2513 const int dstW= c->dstW; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2514 const int dstH= c->dstH; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2515 const int chrDstW= c->chrDstW; |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2516 const int chrSrcW= c->chrSrcW; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2517 const int lumXInc= c->lumXInc; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2518 const int chrXInc= c->chrXInc; |
4295 | 2519 const int dstFormat= c->dstFormat; |
6503 | 2520 const int srcFormat= c->srcFormat; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2521 const int flags= c->flags; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2522 const int canMMX2BeUsed= c->canMMX2BeUsed; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2523 int16_t *vLumFilterPos= c->vLumFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2524 int16_t *vChrFilterPos= c->vChrFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2525 int16_t *hLumFilterPos= c->hLumFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2526 int16_t *hChrFilterPos= c->hChrFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2527 int16_t *vLumFilter= c->vLumFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2528 int16_t *vChrFilter= c->vChrFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2529 int16_t *hLumFilter= c->hLumFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2530 int16_t *hChrFilter= c->hChrFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2531 int16_t *lumMmxFilter= c->lumMmxFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2532 int16_t *chrMmxFilter= c->chrMmxFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2533 const int vLumFilterSize= c->vLumFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2534 const int vChrFilterSize= c->vChrFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2535 const int hLumFilterSize= c->hLumFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2536 const int hChrFilterSize= c->hChrFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2537 int16_t **lumPixBuf= c->lumPixBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2538 int16_t **chrPixBuf= c->chrPixBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2539 const int vLumBufSize= c->vLumBufSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2540 const int vChrBufSize= c->vChrBufSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2541 uint8_t *funnyYCode= c->funnyYCode; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2542 uint8_t *funnyUVCode= c->funnyUVCode; |
4467 | 2543 uint8_t *formatConvBuffer= c->formatConvBuffer; |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2544 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2545 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); |
3344 | 2546 |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2547 /* vars whch will change and which we need to storw back in the context */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2548 int dstY= c->dstY; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2549 int lumBufIndex= c->lumBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2550 int chrBufIndex= c->chrBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2551 int lastInLumBuf= c->lastInLumBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2552 int lastInChrBuf= c->lastInChrBuf; |
4467 | 2553 int srcStride[3]; |
4698 | 2554 int dstStride[3]; |
4419 | 2555 uint8_t *src[3]; |
2556 uint8_t *dst[3]; | |
6540 | 2557 |
2558 orderYUV(c->srcFormat, src, srcStride, srcParam, srcStrideParam); | |
2559 orderYUV(c->dstFormat, dst, dstStride, dstParam, dstStrideParam); | |
6503 | 2560 |
6540 | 2561 if(isPacked(c->srcFormat)){ |
4467 | 2562 src[0]= |
2563 src[1]= | |
2564 src[2]= srcParam[0]; | |
6540 | 2565 srcStride[0]= |
4467 | 2566 srcStride[1]= |
6540 | 2567 srcStride[2]= srcStrideParam[0]; |
4467 | 2568 } |
6540 | 2569 srcStride[1]<<= c->vChrDrop; |
2570 srcStride[2]<<= c->vChrDrop; | |
4419 | 2571 |
6517 | 2572 // printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2], |
2573 // (int)dst[0], (int)dst[1], (int)dst[2]); | |
2574 | |
2575 #if 0 //self test FIXME move to a vfilter or something | |
2576 { | |
2577 static volatile int i=0; | |
2578 i++; | |
2579 if(srcFormat==IMGFMT_YV12 && i==1 && srcSliceH>= c->srcH) | |
2580 selfTest(src, srcStride, c->srcW, c->srcH); | |
2581 i--; | |
2582 } | |
2583 #endif | |
4554 | 2584 |
2585 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2], | |
2586 //dstStride[0],dstStride[1],dstStride[2]); | |
4419 | 2587 |
2588 if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0) | |
2589 { | |
2590 static int firstTime=1; //FIXME move this into the context perhaps | |
2591 if(flags & SWS_PRINT_INFO && firstTime) | |
2592 { | |
5937 | 2593 mp_msg(MSGT_SWS,MSGL_WARN,"SwScaler: Warning: dstStride is not aligned!\n" |
4419 | 2594 "SwScaler: ->cannot do aligned memory acesses anymore\n"); |
2595 firstTime=0; | |
2596 } | |
2597 } | |
3344 | 2598 |
4467 | 2599 /* Note the user might start scaling the picture in the middle so this will not get executed |
2600 this is not really intended but works currently, so ppl might do it */ | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2601 if(srcSliceY ==0){ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2602 lumBufIndex=0; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2603 chrBufIndex=0; |
4467 | 2604 dstY=0; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2605 lastInLumBuf= -1; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2606 lastInChrBuf= -1; |
3272 | 2607 } |
3344 | 2608 |
2609 for(;dstY < dstH; dstY++){ | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2610 unsigned char *dest =dst[0]+dstStride[0]*dstY; |
6520 | 2611 const int chrDstY= dstY>>c->chrDstVSubSample; |
2612 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; | |
2613 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; | |
3344 | 2614 |
2615 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input | |
2616 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input | |
2617 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input | |
2618 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input | |
2619 | |
4290
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2620 //handle holes (FAST_BILINEAR & weird filters) |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2621 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2622 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2623 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize); |
3344 | 2624 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1) |
2625 ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1) | |
2216 | 2626 |
3344 | 2627 // Do we have enough lines in this slice to output the dstY line |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2628 if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample)) |
2469 | 2629 { |
3344 | 2630 //Do horizontal scaling |
2631 while(lastInLumBuf < lastLumSrcY) | |
2632 { | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2633 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; |
3344 | 2634 lumBufIndex++; |
4290
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2635 // printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY); |
3344 | 2636 ASSERT(lumBufIndex < 2*vLumBufSize) |
2637 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH) | |
2638 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) | |
2639 // printf("%d %d\n", lumBufIndex, vLumBufSize); | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2640 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2641 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, |
5452 | 2642 funnyYCode, c->srcFormat, formatConvBuffer, |
2643 c->lumMmx2Filter, c->lumMmx2FilterPos); | |
3344 | 2644 lastInLumBuf++; |
2645 } | |
2646 while(lastInChrBuf < lastChrSrcY) | |
2647 { | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2648 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2649 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; |
3344 | 2650 chrBufIndex++; |
2651 ASSERT(chrBufIndex < 2*vChrBufSize) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2652 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)) |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2653 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0) |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2654 //FIXME replace parameters through context struct (some at least) |
6503 | 2655 |
2656 if(!(isGray(srcFormat) || isGray(dstFormat))) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2657 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2658 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, |
5452 | 2659 funnyUVCode, c->srcFormat, formatConvBuffer, |
2660 c->chrMmx2Filter, c->chrMmx2FilterPos); | |
3344 | 2661 lastInChrBuf++; |
2662 } | |
2663 //wrap buf index around to stay inside the ring buffer | |
2664 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize; | |
2665 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize; | |
2469 | 2666 } |
3344 | 2667 else // not enough lines left in this slice -> load the rest in the buffer |
2469 | 2668 { |
3344 | 2669 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n", |
2670 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY, | |
2671 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize, | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2672 vChrBufSize, vLumBufSize);*/ |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2673 |
3344 | 2674 //Do horizontal scaling |
2675 while(lastInLumBuf+1 < srcSliceY + srcSliceH) | |
2469 | 2676 { |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2677 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; |
3344 | 2678 lumBufIndex++; |
2679 ASSERT(lumBufIndex < 2*vLumBufSize) | |
2680 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH) | |
2681 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2682 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2683 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, |
5452 | 2684 funnyYCode, c->srcFormat, formatConvBuffer, |
2685 c->lumMmx2Filter, c->lumMmx2FilterPos); | |
3344 | 2686 lastInLumBuf++; |
2469 | 2687 } |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2688 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH)) |
3344 | 2689 { |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2690 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2691 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; |
3344 | 2692 chrBufIndex++; |
2693 ASSERT(chrBufIndex < 2*vChrBufSize) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2694 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH) |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2695 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0) |
6503 | 2696 |
2697 if(!(isGray(srcFormat) || isGray(dstFormat))) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2698 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2699 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, |
5452 | 2700 funnyUVCode, c->srcFormat, formatConvBuffer, |
2701 c->chrMmx2Filter, c->chrMmx2FilterPos); | |
3344 | 2702 lastInChrBuf++; |
2703 } | |
2704 //wrap buf index around to stay inside the ring buffer | |
2705 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize; | |
2706 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize; | |
2707 break; //we cant output a dstY line so lets try with the next slice | |
2469 | 2708 } |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
2709 |
2748 | 2710 #ifdef HAVE_MMX |
3344 | 2711 b5Dither= dither8[dstY&1]; |
2712 g6Dither= dither4[dstY&1]; | |
2713 g5Dither= dither8[dstY&1]; | |
2714 r5Dither= dither8[(dstY+1)&1]; | |
2748 | 2715 #endif |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2716 if(dstY < dstH-2) |
3352 | 2717 { |
6503 | 2718 if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like |
3344 | 2719 { |
7351 | 2720 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
2721 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi | |
3344 | 2722 if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12 |
2723 { | |
2724 int16_t *lumBuf = lumPixBuf[0]; | |
2725 int16_t *chrBuf= chrPixBuf[0]; | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2726 RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW); |
3344 | 2727 } |
2728 else //General YV12 | |
2729 { | |
2730 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |
2731 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |
2732 RENAME(yuv2yuvX)( | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2733 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2734 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2735 dest, uDest, vDest, dstW, chrDstW, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2736 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+chrDstY*vChrFilterSize*4); |
3344 | 2737 } |
2738 } | |
2739 else | |
2740 { | |
2741 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |
2742 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |
2743 | |
2744 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |
2745 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | |
2746 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB | |
2747 { | |
2748 int chrAlpha= vChrFilter[2*dstY+1]; | |
2749 | |
7723 | 2750 RENAME(yuv2packed1)(c, *lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), |
6578 | 2751 dest, dstW, chrAlpha, dstFormat, flags, dstY); |
3344 | 2752 } |
2753 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB | |
2754 { | |
2755 int lumAlpha= vLumFilter[2*dstY+1]; | |
2756 int chrAlpha= vChrFilter[2*dstY+1]; | |
2757 | |
7723 | 2758 RENAME(yuv2packed2)(c, *lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), |
6578 | 2759 dest, dstW, lumAlpha, chrAlpha, dstY); |
3344 | 2760 } |
2761 else //General RGB | |
2762 { | |
7723 | 2763 RENAME(yuv2packedX)(c, |
3344 | 2764 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
2765 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |
6578 | 2766 dest, dstW, |
2767 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4, dstY); | |
3344 | 2768 } |
2769 } | |
3352 | 2770 } |
2771 else // hmm looks like we cant use MMX here without overwriting this arrays tail | |
2772 { | |
2773 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |
2774 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |
6615 | 2775 if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 |
3352 | 2776 { |
7351 | 2777 const int chrSkipMask= (1<<c->chrDstVSubSample)-1; |
2778 if((dstY&chrSkipMask) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi | |
6540 | 2779 yuv2yuvXinC( |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2780 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2781 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
6540 | 2782 dest, uDest, vDest, dstW, chrDstW); |
3352 | 2783 } |
2784 else | |
2785 { | |
2786 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |
2787 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | |
7723 | 2788 yuv2packedXinC(c, |
3352 | 2789 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, |
2790 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |
6578 | 2791 dest, dstW, dstY); |
3352 | 2792 } |
2793 } | |
3344 | 2794 } |
2534
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2795 |
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2796 #ifdef HAVE_MMX |
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2797 __asm __volatile(SFENCE:::"memory"); |
2566 | 2798 __asm __volatile(EMMS:::"memory"); |
2534
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2799 #endif |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2800 /* store changed local vars back in the context */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2801 c->dstY= dstY; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2802 c->lumBufIndex= lumBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2803 c->chrBufIndex= chrBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2804 c->lastInLumBuf= lastInLumBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2805 c->lastInChrBuf= lastInChrBuf; |
3641 | 2806 } |