Mercurial > mplayer.hg
annotate postproc/swscale_template.c @ 6554:5aa2dbf4d300
killing globals
author | michael |
---|---|
date | Mon, 24 Jun 2002 20:05:53 +0000 |
parents | 5b3cace64e9d |
children | 3727eb94a783 |
rev | line source |
---|---|
4295 | 1 /* |
2 Copyright (C) 2001-2002 Michael Niedermayer <michaelni@gmx.at> | |
2216 | 3 |
4295 | 4 This program is free software; you can redistribute it and/or modify |
5 it under the terms of the GNU General Public License as published by | |
6 the Free Software Foundation; either version 2 of the License, or | |
7 (at your option) any later version. | |
2216 | 8 |
4295 | 9 This program is distributed in the hope that it will be useful, |
10 but WITHOUT ANY WARRANTY; without even the implied warranty of | |
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
12 GNU General Public License for more details. | |
13 | |
14 You should have received a copy of the GNU General Public License | |
15 along with this program; if not, write to the Free Software | |
16 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
17 */ | |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
18 |
2540 | 19 #undef MOVNTQ |
2680 | 20 #undef PAVGB |
3136 | 21 #undef PREFETCH |
22 #undef PREFETCHW | |
23 #undef EMMS | |
24 #undef SFENCE | |
25 | |
26 #ifdef HAVE_3DNOW | |
27 /* On K6 femms is faster of emms. On K7 femms is directly mapped on emms. */ | |
28 #define EMMS "femms" | |
29 #else | |
30 #define EMMS "emms" | |
31 #endif | |
32 | |
33 #ifdef HAVE_3DNOW | |
34 #define PREFETCH "prefetch" | |
35 #define PREFETCHW "prefetchw" | |
36 #elif defined ( HAVE_MMX2 ) | |
37 #define PREFETCH "prefetchnta" | |
38 #define PREFETCHW "prefetcht0" | |
39 #else | |
40 #define PREFETCH "/nop" | |
41 #define PREFETCHW "/nop" | |
42 #endif | |
43 | |
44 #ifdef HAVE_MMX2 | |
45 #define SFENCE "sfence" | |
46 #else | |
47 #define SFENCE "/nop" | |
48 #endif | |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
49 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
50 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
51 #define PAVGB(a,b) "pavgb " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
52 #elif defined (HAVE_3DNOW) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
53 #define PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
54 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
55 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
56 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
57 #define MOVNTQ(a,b) "movntq " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
58 #else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
59 #define MOVNTQ(a,b) "movq " #a ", " #b " \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
60 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
61 |
3344 | 62 #define YSCALEYUV2YV12X(x) \ |
63 "xorl %%eax, %%eax \n\t"\ | |
64 "pxor %%mm3, %%mm3 \n\t"\ | |
65 "pxor %%mm4, %%mm4 \n\t"\ | |
66 "movl %0, %%edx \n\t"\ | |
67 ".balign 16 \n\t" /* FIXME Unroll? */\ | |
68 "1: \n\t"\ | |
69 "movl (%1, %%edx, 4), %%esi \n\t"\ | |
70 "movq (%2, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\ | |
71 "movq " #x "(%%esi, %%eax, 2), %%mm2 \n\t" /* srcData */\ | |
72 "movq 8+" #x "(%%esi, %%eax, 2), %%mm5 \n\t" /* srcData */\ | |
73 "pmulhw %%mm0, %%mm2 \n\t"\ | |
74 "pmulhw %%mm0, %%mm5 \n\t"\ | |
75 "paddw %%mm2, %%mm3 \n\t"\ | |
76 "paddw %%mm5, %%mm4 \n\t"\ | |
77 "addl $1, %%edx \n\t"\ | |
78 " jnz 1b \n\t"\ | |
79 "psraw $3, %%mm3 \n\t"\ | |
80 "psraw $3, %%mm4 \n\t"\ | |
81 "packuswb %%mm4, %%mm3 \n\t"\ | |
82 MOVNTQ(%%mm3, (%3, %%eax))\ | |
83 "addl $8, %%eax \n\t"\ | |
84 "cmpl %4, %%eax \n\t"\ | |
85 "pxor %%mm3, %%mm3 \n\t"\ | |
86 "pxor %%mm4, %%mm4 \n\t"\ | |
87 "movl %0, %%edx \n\t"\ | |
88 "jb 1b \n\t" | |
89 | |
90 #define YSCALEYUV2YV121 \ | |
91 "movl %2, %%eax \n\t"\ | |
92 ".balign 16 \n\t" /* FIXME Unroll? */\ | |
93 "1: \n\t"\ | |
94 "movq (%0, %%eax, 2), %%mm0 \n\t"\ | |
95 "movq 8(%0, %%eax, 2), %%mm1 \n\t"\ | |
96 "psraw $7, %%mm0 \n\t"\ | |
97 "psraw $7, %%mm1 \n\t"\ | |
98 "packuswb %%mm1, %%mm0 \n\t"\ | |
99 MOVNTQ(%%mm0, (%1, %%eax))\ | |
100 "addl $8, %%eax \n\t"\ | |
101 "jnc 1b \n\t" | |
102 | |
103 /* | |
104 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
105 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
106 "r" (dest), "m" (dstW), | |
107 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
108 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
109 */ | |
110 #define YSCALEYUV2RGBX \ | |
111 "xorl %%eax, %%eax \n\t"\ | |
112 ".balign 16 \n\t"\ | |
113 "1: \n\t"\ | |
114 "movl %1, %%edx \n\t" /* -chrFilterSize */\ | |
115 "movl %3, %%ebx \n\t" /* chrMmxFilter+lumFilterSize */\ | |
116 "movl %7, %%ecx \n\t" /* chrSrc+lumFilterSize */\ | |
117 "pxor %%mm3, %%mm3 \n\t"\ | |
118 "pxor %%mm4, %%mm4 \n\t"\ | |
119 "2: \n\t"\ | |
120 "movl (%%ecx, %%edx, 4), %%esi \n\t"\ | |
121 "movq (%%ebx, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\ | |
122 "movq (%%esi, %%eax), %%mm2 \n\t" /* UsrcData */\ | |
123 "movq 4096(%%esi, %%eax), %%mm5 \n\t" /* VsrcData */\ | |
124 "pmulhw %%mm0, %%mm2 \n\t"\ | |
125 "pmulhw %%mm0, %%mm5 \n\t"\ | |
126 "paddw %%mm2, %%mm3 \n\t"\ | |
127 "paddw %%mm5, %%mm4 \n\t"\ | |
128 "addl $1, %%edx \n\t"\ | |
129 " jnz 2b \n\t"\ | |
130 \ | |
131 "movl %0, %%edx \n\t" /* -lumFilterSize */\ | |
132 "movl %2, %%ebx \n\t" /* lumMmxFilter+lumFilterSize */\ | |
133 "movl %6, %%ecx \n\t" /* lumSrc+lumFilterSize */\ | |
134 "pxor %%mm1, %%mm1 \n\t"\ | |
135 "pxor %%mm7, %%mm7 \n\t"\ | |
136 "2: \n\t"\ | |
137 "movl (%%ecx, %%edx, 4), %%esi \n\t"\ | |
138 "movq (%%ebx, %%edx, 8), %%mm0 \n\t" /* filterCoeff */\ | |
139 "movq (%%esi, %%eax, 2), %%mm2 \n\t" /* Y1srcData */\ | |
140 "movq 8(%%esi, %%eax, 2), %%mm5 \n\t" /* Y2srcData */\ | |
141 "pmulhw %%mm0, %%mm2 \n\t"\ | |
142 "pmulhw %%mm0, %%mm5 \n\t"\ | |
143 "paddw %%mm2, %%mm1 \n\t"\ | |
144 "paddw %%mm5, %%mm7 \n\t"\ | |
145 "addl $1, %%edx \n\t"\ | |
146 " jnz 2b \n\t"\ | |
147 \ | |
4248 | 148 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
149 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | |
3344 | 150 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
151 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ | |
4248 | 152 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
153 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
3344 | 154 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
4248 | 155 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ |
156 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | |
157 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | |
158 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | |
159 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
160 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | |
3344 | 161 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
162 "paddw %%mm3, %%mm4 \n\t"\ | |
163 "movq %%mm2, %%mm0 \n\t"\ | |
164 "movq %%mm5, %%mm6 \n\t"\ | |
165 "movq %%mm4, %%mm3 \n\t"\ | |
166 "punpcklwd %%mm2, %%mm2 \n\t"\ | |
167 "punpcklwd %%mm5, %%mm5 \n\t"\ | |
168 "punpcklwd %%mm4, %%mm4 \n\t"\ | |
169 "paddw %%mm1, %%mm2 \n\t"\ | |
170 "paddw %%mm1, %%mm5 \n\t"\ | |
171 "paddw %%mm1, %%mm4 \n\t"\ | |
172 "punpckhwd %%mm0, %%mm0 \n\t"\ | |
173 "punpckhwd %%mm6, %%mm6 \n\t"\ | |
174 "punpckhwd %%mm3, %%mm3 \n\t"\ | |
175 "paddw %%mm7, %%mm0 \n\t"\ | |
176 "paddw %%mm7, %%mm6 \n\t"\ | |
177 "paddw %%mm7, %%mm3 \n\t"\ | |
178 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ | |
179 "packuswb %%mm0, %%mm2 \n\t"\ | |
180 "packuswb %%mm6, %%mm5 \n\t"\ | |
181 "packuswb %%mm3, %%mm4 \n\t"\ | |
182 "pxor %%mm7, %%mm7 \n\t" | |
183 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
184 #define FULL_YSCALEYUV2RGB \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
185 "pxor %%mm7, %%mm7 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
186 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
187 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
188 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
189 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
190 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
191 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
192 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
193 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
194 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
195 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
196 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
197 "movq (%2, %%eax,2), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
198 "movq (%3, %%eax,2), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
199 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
200 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
201 "pmulhw %%mm6, %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
202 "pmulhw %%mm5, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
203 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
204 "movq 4096(%2, %%eax,2), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
205 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
206 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
207 "movq 4096(%3, %%eax,2), %%mm0 \n\t" /* uvbuf1[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
208 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
209 "psubw %%mm0, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
4248 | 210 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ |
211 "psubw "MANGLE(w400)", %%mm3 \n\t" /* 8(U-128)*/\ | |
212 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
213 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
214 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
215 "pmulhw %%mm5, %%mm4 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
216 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
4248 | 217 "pmulhw "MANGLE(ubCoeff)", %%mm3\n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
218 "psraw $4, %%mm0 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
4248 | 219 "pmulhw "MANGLE(ugCoeff)", %%mm2\n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
220 "paddw %%mm4, %%mm0 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
4248 | 221 "psubw "MANGLE(w400)", %%mm0 \n\t" /* (V-128)8*/\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
222 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
223 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
224 "movq %%mm0, %%mm4 \n\t" /* (V-128)8*/\ |
4248 | 225 "pmulhw "MANGLE(vrCoeff)", %%mm0\n\t"\ |
226 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
227 "paddw %%mm1, %%mm3 \n\t" /* B*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
228 "paddw %%mm1, %%mm0 \n\t" /* R*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
229 "packuswb %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
230 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
231 "packuswb %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
232 "paddw %%mm4, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
233 "paddw %%mm2, %%mm1 \n\t" /* G*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
234 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
235 "packuswb %%mm1, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
236 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
237 #define YSCALEYUV2RGB \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
238 "movd %6, %%mm6 \n\t" /*yalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
239 "punpcklwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
240 "punpcklwd %%mm6, %%mm6 \n\t"\ |
6554 | 241 "movq %%mm6, 3968(%2) \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
242 "movd %7, %%mm5 \n\t" /*uvalpha1*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
243 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
244 "punpcklwd %%mm5, %%mm5 \n\t"\ |
6554 | 245 "movq %%mm5, 3976(%2) \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
246 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
247 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
248 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
249 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
250 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
251 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
252 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
253 "psubw %%mm3, %%mm2 \n\t" /* uvbuf0[eax] - uvbuf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
254 "psubw %%mm4, %%mm5 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048]*/\ |
6554 | 255 "movq 3976(%2), %%mm0 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
256 "pmulhw %%mm0, %%mm2 \n\t" /* (uvbuf0[eax] - uvbuf1[eax])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
257 "pmulhw %%mm0, %%mm5 \n\t" /* (uvbuf0[eax+2048] - uvbuf1[eax+2048])uvalpha1>>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
258 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
259 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
260 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax]uvalpha1 - uvbuf1[eax](1-uvalpha1)*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
261 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048]uvalpha1 - uvbuf1[eax+2048](1-uvalpha1)*/\ |
4248 | 262 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
263 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
264 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
265 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
4248 | 266 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
267 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
268 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
269 "movq (%0, %%eax, 2), %%mm0 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
270 "movq (%1, %%eax, 2), %%mm1 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
271 "movq 8(%0, %%eax, 2), %%mm6 \n\t" /*buf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
272 "movq 8(%1, %%eax, 2), %%mm7 \n\t" /*buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
273 "psubw %%mm1, %%mm0 \n\t" /* buf0[eax] - buf1[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
274 "psubw %%mm7, %%mm6 \n\t" /* buf0[eax] - buf1[eax]*/\ |
6554 | 275 "pmulhw 3968(%2), %%mm0 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ |
276 "pmulhw 3968(%2), %%mm6 \n\t" /* (buf0[eax] - buf1[eax])yalpha1>>16*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
277 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
278 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
279 "paddw %%mm0, %%mm1 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
280 "paddw %%mm6, %%mm7 \n\t" /* buf0[eax]yalpha1 + buf1[eax](1-yalpha1) >>16*/\ |
4248 | 281 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ |
282 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | |
283 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | |
284 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | |
285 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
286 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
287 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
288 "paddw %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
289 "movq %%mm2, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
290 "movq %%mm5, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
291 "movq %%mm4, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
292 "punpcklwd %%mm2, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
293 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
294 "punpcklwd %%mm4, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
295 "paddw %%mm1, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
296 "paddw %%mm1, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
297 "paddw %%mm1, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
298 "punpckhwd %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
299 "punpckhwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
300 "punpckhwd %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
301 "paddw %%mm7, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
302 "paddw %%mm7, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
303 "paddw %%mm7, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
304 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
305 "packuswb %%mm0, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
306 "packuswb %%mm6, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
307 "packuswb %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
308 "pxor %%mm7, %%mm7 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
309 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
310 #define YSCALEYUV2RGB1 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
311 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
312 ".balign 16 \n\t"\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
313 "1: \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
314 "movq (%2, %%eax), %%mm3 \n\t" /* uvbuf0[eax]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
315 "movq 4096(%2, %%eax), %%mm4 \n\t" /* uvbuf0[eax+2048]*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
316 "psraw $4, %%mm3 \n\t" /* uvbuf0[eax] - uvbuf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
317 "psraw $4, %%mm4 \n\t" /* uvbuf0[eax+2048] - uvbuf1[eax+2048] >>4*/\ |
4248 | 318 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
319 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
320 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
321 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
4248 | 322 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
323 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
324 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
325 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
326 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
327 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
328 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
4248 | 329 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ |
330 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | |
331 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | |
332 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | |
333 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
334 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
335 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
336 "paddw %%mm3, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
337 "movq %%mm2, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
338 "movq %%mm5, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
339 "movq %%mm4, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
340 "punpcklwd %%mm2, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
341 "punpcklwd %%mm5, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
342 "punpcklwd %%mm4, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
343 "paddw %%mm1, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
344 "paddw %%mm1, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
345 "paddw %%mm1, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
346 "punpckhwd %%mm0, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
347 "punpckhwd %%mm6, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
348 "punpckhwd %%mm3, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
349 "paddw %%mm7, %%mm0 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
350 "paddw %%mm7, %%mm6 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
351 "paddw %%mm7, %%mm3 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
352 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
353 "packuswb %%mm0, %%mm2 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
354 "packuswb %%mm6, %%mm5 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
355 "packuswb %%mm3, %%mm4 \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
356 "pxor %%mm7, %%mm7 \n\t" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
357 |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
358 // do vertical chrominance interpolation |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
359 #define YSCALEYUV2RGB1b \ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
360 "xorl %%eax, %%eax \n\t"\ |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
361 ".balign 16 \n\t"\ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
362 "1: \n\t"\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
363 "movq (%2, %%eax), %%mm2 \n\t" /* uvbuf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
364 "movq (%3, %%eax), %%mm3 \n\t" /* uvbuf1[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
365 "movq 4096(%2, %%eax), %%mm5 \n\t" /* uvbuf0[eax+2048]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
366 "movq 4096(%3, %%eax), %%mm4 \n\t" /* uvbuf1[eax+2048]*/\ |
2576 | 367 "paddw %%mm2, %%mm3 \n\t" /* uvbuf0[eax] + uvbuf1[eax]*/\ |
368 "paddw %%mm5, %%mm4 \n\t" /* uvbuf0[eax+2048] + uvbuf1[eax+2048]*/\ | |
3344 | 369 "psrlw $5, %%mm3 \n\t" /*FIXME might overflow*/\ |
370 "psrlw $5, %%mm4 \n\t" /*FIXME might overflow*/\ | |
4248 | 371 "psubw "MANGLE(w400)", %%mm3 \n\t" /* (U-128)8*/\ |
372 "psubw "MANGLE(w400)", %%mm4 \n\t" /* (V-128)8*/\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
373 "movq %%mm3, %%mm2 \n\t" /* (U-128)8*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
374 "movq %%mm4, %%mm5 \n\t" /* (V-128)8*/\ |
4248 | 375 "pmulhw "MANGLE(ugCoeff)", %%mm3\n\t"\ |
376 "pmulhw "MANGLE(vgCoeff)", %%mm4\n\t"\ | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
377 /* mm2=(U-128)8, mm3=ug, mm4=vg mm5=(V-128)8 */\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
378 "movq (%0, %%eax, 2), %%mm1 \n\t" /*buf0[eax]*/\ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
379 "movq 8(%0, %%eax, 2), %%mm7 \n\t" /*buf0[eax]*/\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
380 "psraw $4, %%mm1 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
381 "psraw $4, %%mm7 \n\t" /* buf0[eax] - buf1[eax] >>4*/\ |
4248 | 382 "pmulhw "MANGLE(ubCoeff)", %%mm2\n\t"\ |
383 "pmulhw "MANGLE(vrCoeff)", %%mm5\n\t"\ | |
384 "psubw "MANGLE(w80)", %%mm1 \n\t" /* 8(Y-16)*/\ | |
385 "psubw "MANGLE(w80)", %%mm7 \n\t" /* 8(Y-16)*/\ | |
386 "pmulhw "MANGLE(yCoeff)", %%mm1 \n\t"\ | |
387 "pmulhw "MANGLE(yCoeff)", %%mm7 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
388 /* mm1= Y1, mm2=ub, mm3=ug, mm4=vg mm5=vr, mm7=Y2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
389 "paddw %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
390 "movq %%mm2, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
391 "movq %%mm5, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
392 "movq %%mm4, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
393 "punpcklwd %%mm2, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
394 "punpcklwd %%mm5, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
395 "punpcklwd %%mm4, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
396 "paddw %%mm1, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
397 "paddw %%mm1, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
398 "paddw %%mm1, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
399 "punpckhwd %%mm0, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
400 "punpckhwd %%mm6, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
401 "punpckhwd %%mm3, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
402 "paddw %%mm7, %%mm0 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
403 "paddw %%mm7, %%mm6 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
404 "paddw %%mm7, %%mm3 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
405 /* mm0=B1, mm2=B2, mm3=G2, mm4=G1, mm5=R1, mm6=R2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
406 "packuswb %%mm0, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
407 "packuswb %%mm6, %%mm5 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
408 "packuswb %%mm3, %%mm4 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
409 "pxor %%mm7, %%mm7 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
410 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
411 #define WRITEBGR32 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
412 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
413 "movq %%mm2, %%mm1 \n\t" /* B */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
414 "movq %%mm5, %%mm6 \n\t" /* R */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
415 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
416 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
417 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
418 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
419 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
420 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
421 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
422 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
423 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
424 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
425 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
426 MOVNTQ(%%mm0, (%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
427 MOVNTQ(%%mm2, 8(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
428 MOVNTQ(%%mm1, 16(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
429 MOVNTQ(%%mm3, 24(%4, %%eax, 4))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
430 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
431 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
432 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
433 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
434 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
435 #define WRITEBGR16 \ |
4248 | 436 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ |
437 "pand "MANGLE(bFC)", %%mm4 \n\t" /* G */\ | |
438 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ | |
2669 | 439 "psrlq $3, %%mm2 \n\t"\ |
440 \ | |
441 "movq %%mm2, %%mm1 \n\t"\ | |
442 "movq %%mm4, %%mm3 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
443 \ |
2669 | 444 "punpcklbw %%mm7, %%mm3 \n\t"\ |
445 "punpcklbw %%mm5, %%mm2 \n\t"\ | |
446 "punpckhbw %%mm7, %%mm4 \n\t"\ | |
447 "punpckhbw %%mm5, %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
448 \ |
2669 | 449 "psllq $3, %%mm3 \n\t"\ |
450 "psllq $3, %%mm4 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
451 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
452 "por %%mm3, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
453 "por %%mm4, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
454 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
455 MOVNTQ(%%mm2, (%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
456 MOVNTQ(%%mm1, 8(%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
457 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
458 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
459 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
460 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
461 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
462 #define WRITEBGR15 \ |
4248 | 463 "pand "MANGLE(bF8)", %%mm2 \n\t" /* B */\ |
464 "pand "MANGLE(bF8)", %%mm4 \n\t" /* G */\ | |
465 "pand "MANGLE(bF8)", %%mm5 \n\t" /* R */\ | |
2669 | 466 "psrlq $3, %%mm2 \n\t"\ |
467 "psrlq $1, %%mm5 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
468 \ |
2669 | 469 "movq %%mm2, %%mm1 \n\t"\ |
470 "movq %%mm4, %%mm3 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
471 \ |
2669 | 472 "punpcklbw %%mm7, %%mm3 \n\t"\ |
473 "punpcklbw %%mm5, %%mm2 \n\t"\ | |
474 "punpckhbw %%mm7, %%mm4 \n\t"\ | |
475 "punpckhbw %%mm5, %%mm1 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
476 \ |
2669 | 477 "psllq $2, %%mm3 \n\t"\ |
478 "psllq $2, %%mm4 \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
479 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
480 "por %%mm3, %%mm2 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
481 "por %%mm4, %%mm1 \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
482 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
483 MOVNTQ(%%mm2, (%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
484 MOVNTQ(%%mm1, 8(%4, %%eax, 2))\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
485 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
486 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
487 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
488 " jb 1b \n\t" |
2669 | 489 |
2730 | 490 #define WRITEBGR24OLD \ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
491 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
492 "movq %%mm2, %%mm1 \n\t" /* B */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
493 "movq %%mm5, %%mm6 \n\t" /* R */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
494 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
495 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
496 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
497 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
498 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
499 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ |
2326 | 500 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ |
501 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ | |
502 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ | |
503 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
504 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
505 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
506 "psrlq $8, %%mm0 \n\t" /* 00RGB0RG 0 */\ |
4248 | 507 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 0 */\ |
508 "pand "MANGLE(bm11111000)", %%mm0\n\t" /* 00RGB000 0.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
509 "por %%mm4, %%mm0 \n\t" /* 00RGBRGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
510 "movq %%mm2, %%mm4 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
511 "psllq $48, %%mm2 \n\t" /* GB000000 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
512 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
513 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
514 "movq %%mm4, %%mm2 \n\t" /* 0RGB0RGB 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
515 "psrld $16, %%mm4 \n\t" /* 000R000R 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
516 "psrlq $24, %%mm2 \n\t" /* 0000RGB0 1.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
517 "por %%mm4, %%mm2 \n\t" /* 000RRGBR 1 */\ |
4248 | 518 "pand "MANGLE(bm00001111)", %%mm2\n\t" /* 0000RGBR 1 */\ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
519 "movq %%mm1, %%mm4 \n\t" /* 0RGB0RGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
520 "psrlq $8, %%mm1 \n\t" /* 00RGB0RG 2 */\ |
4248 | 521 "pand "MANGLE(bm00000111)", %%mm4\n\t" /* 00000RGB 2 */\ |
522 "pand "MANGLE(bm11111000)", %%mm1\n\t" /* 00RGB000 2.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
523 "por %%mm4, %%mm1 \n\t" /* 00RGBRGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
524 "movq %%mm1, %%mm4 \n\t" /* 00RGBRGB 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
525 "psllq $32, %%mm1 \n\t" /* BRGB0000 2 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
526 "por %%mm1, %%mm2 \n\t" /* BRGBRGBR 1 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
527 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
528 "psrlq $32, %%mm4 \n\t" /* 000000RG 2.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
529 "movq %%mm3, %%mm5 \n\t" /* 0RGB0RGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
530 "psrlq $8, %%mm3 \n\t" /* 00RGB0RG 3 */\ |
4248 | 531 "pand "MANGLE(bm00000111)", %%mm5\n\t" /* 00000RGB 3 */\ |
532 "pand "MANGLE(bm11111000)", %%mm3\n\t" /* 00RGB000 3.5 */\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
533 "por %%mm5, %%mm3 \n\t" /* 00RGBRGB 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
534 "psllq $16, %%mm3 \n\t" /* RGBRGB00 3 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
535 "por %%mm4, %%mm3 \n\t" /* RGBRGBRG 2.5 */\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
536 \ |
2728 | 537 MOVNTQ(%%mm0, (%%ebx))\ |
538 MOVNTQ(%%mm2, 8(%%ebx))\ | |
539 MOVNTQ(%%mm3, 16(%%ebx))\ | |
540 "addl $24, %%ebx \n\t"\ | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
541 \ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
542 "addl $8, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
543 "cmpl %5, %%eax \n\t"\ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
544 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
545 |
2730 | 546 #define WRITEBGR24MMX \ |
547 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | |
548 "movq %%mm2, %%mm1 \n\t" /* B */\ | |
549 "movq %%mm5, %%mm6 \n\t" /* R */\ | |
550 "punpcklbw %%mm4, %%mm2 \n\t" /* GBGBGBGB 0 */\ | |
551 "punpcklbw %%mm7, %%mm5 \n\t" /* 0R0R0R0R 0 */\ | |
552 "punpckhbw %%mm4, %%mm1 \n\t" /* GBGBGBGB 2 */\ | |
553 "punpckhbw %%mm7, %%mm6 \n\t" /* 0R0R0R0R 2 */\ | |
554 "movq %%mm2, %%mm0 \n\t" /* GBGBGBGB 0 */\ | |
555 "movq %%mm1, %%mm3 \n\t" /* GBGBGBGB 2 */\ | |
556 "punpcklwd %%mm5, %%mm0 \n\t" /* 0RGB0RGB 0 */\ | |
557 "punpckhwd %%mm5, %%mm2 \n\t" /* 0RGB0RGB 1 */\ | |
558 "punpcklwd %%mm6, %%mm1 \n\t" /* 0RGB0RGB 2 */\ | |
559 "punpckhwd %%mm6, %%mm3 \n\t" /* 0RGB0RGB 3 */\ | |
560 \ | |
561 "movq %%mm0, %%mm4 \n\t" /* 0RGB0RGB 0 */\ | |
562 "movq %%mm2, %%mm6 \n\t" /* 0RGB0RGB 1 */\ | |
563 "movq %%mm1, %%mm5 \n\t" /* 0RGB0RGB 2 */\ | |
564 "movq %%mm3, %%mm7 \n\t" /* 0RGB0RGB 3 */\ | |
565 \ | |
566 "psllq $40, %%mm0 \n\t" /* RGB00000 0 */\ | |
567 "psllq $40, %%mm2 \n\t" /* RGB00000 1 */\ | |
568 "psllq $40, %%mm1 \n\t" /* RGB00000 2 */\ | |
569 "psllq $40, %%mm3 \n\t" /* RGB00000 3 */\ | |
570 \ | |
571 "punpckhdq %%mm4, %%mm0 \n\t" /* 0RGBRGB0 0 */\ | |
572 "punpckhdq %%mm6, %%mm2 \n\t" /* 0RGBRGB0 1 */\ | |
573 "punpckhdq %%mm5, %%mm1 \n\t" /* 0RGBRGB0 2 */\ | |
574 "punpckhdq %%mm7, %%mm3 \n\t" /* 0RGBRGB0 3 */\ | |
575 \ | |
576 "psrlq $8, %%mm0 \n\t" /* 00RGBRGB 0 */\ | |
577 "movq %%mm2, %%mm6 \n\t" /* 0RGBRGB0 1 */\ | |
578 "psllq $40, %%mm2 \n\t" /* GB000000 1 */\ | |
579 "por %%mm2, %%mm0 \n\t" /* GBRGBRGB 0 */\ | |
580 MOVNTQ(%%mm0, (%%ebx))\ | |
581 \ | |
582 "psrlq $24, %%mm6 \n\t" /* 0000RGBR 1 */\ | |
583 "movq %%mm1, %%mm5 \n\t" /* 0RGBRGB0 2 */\ | |
584 "psllq $24, %%mm1 \n\t" /* BRGB0000 2 */\ | |
585 "por %%mm1, %%mm6 \n\t" /* BRGBRGBR 1 */\ | |
586 MOVNTQ(%%mm6, 8(%%ebx))\ | |
587 \ | |
588 "psrlq $40, %%mm5 \n\t" /* 000000RG 2 */\ | |
589 "psllq $8, %%mm3 \n\t" /* RGBRGB00 3 */\ | |
590 "por %%mm3, %%mm5 \n\t" /* RGBRGBRG 2 */\ | |
591 MOVNTQ(%%mm5, 16(%%ebx))\ | |
592 \ | |
593 "addl $24, %%ebx \n\t"\ | |
594 \ | |
595 "addl $8, %%eax \n\t"\ | |
596 "cmpl %5, %%eax \n\t"\ | |
597 " jb 1b \n\t" | |
598 | |
599 #define WRITEBGR24MMX2 \ | |
600 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */\ | |
4248 | 601 "movq "MANGLE(M24A)", %%mm0 \n\t"\ |
602 "movq "MANGLE(M24C)", %%mm7 \n\t"\ | |
2730 | 603 "pshufw $0x50, %%mm2, %%mm1 \n\t" /* B3 B2 B3 B2 B1 B0 B1 B0 */\ |
604 "pshufw $0x50, %%mm4, %%mm3 \n\t" /* G3 G2 G3 G2 G1 G0 G1 G0 */\ | |
605 "pshufw $0x00, %%mm5, %%mm6 \n\t" /* R1 R0 R1 R0 R1 R0 R1 R0 */\ | |
606 \ | |
607 "pand %%mm0, %%mm1 \n\t" /* B2 B1 B0 */\ | |
608 "pand %%mm0, %%mm3 \n\t" /* G2 G1 G0 */\ | |
609 "pand %%mm7, %%mm6 \n\t" /* R1 R0 */\ | |
610 \ | |
611 "psllq $8, %%mm3 \n\t" /* G2 G1 G0 */\ | |
612 "por %%mm1, %%mm6 \n\t"\ | |
613 "por %%mm3, %%mm6 \n\t"\ | |
614 MOVNTQ(%%mm6, (%%ebx))\ | |
615 \ | |
616 "psrlq $8, %%mm4 \n\t" /* 00 G7 G6 G5 G4 G3 G2 G1 */\ | |
617 "pshufw $0xA5, %%mm2, %%mm1 \n\t" /* B5 B4 B5 B4 B3 B2 B3 B2 */\ | |
618 "pshufw $0x55, %%mm4, %%mm3 \n\t" /* G4 G3 G4 G3 G4 G3 G4 G3 */\ | |
619 "pshufw $0xA5, %%mm5, %%mm6 \n\t" /* R5 R4 R5 R4 R3 R2 R3 R2 */\ | |
620 \ | |
4248 | 621 "pand "MANGLE(M24B)", %%mm1 \n\t" /* B5 B4 B3 */\ |
2730 | 622 "pand %%mm7, %%mm3 \n\t" /* G4 G3 */\ |
623 "pand %%mm0, %%mm6 \n\t" /* R4 R3 R2 */\ | |
624 \ | |
625 "por %%mm1, %%mm3 \n\t" /* B5 G4 B4 G3 B3 */\ | |
626 "por %%mm3, %%mm6 \n\t"\ | |
627 MOVNTQ(%%mm6, 8(%%ebx))\ | |
628 \ | |
629 "pshufw $0xFF, %%mm2, %%mm1 \n\t" /* B7 B6 B7 B6 B7 B6 B6 B7 */\ | |
630 "pshufw $0xFA, %%mm4, %%mm3 \n\t" /* 00 G7 00 G7 G6 G5 G6 G5 */\ | |
631 "pshufw $0xFA, %%mm5, %%mm6 \n\t" /* R7 R6 R7 R6 R5 R4 R5 R4 */\ | |
632 \ | |
633 "pand %%mm7, %%mm1 \n\t" /* B7 B6 */\ | |
634 "pand %%mm0, %%mm3 \n\t" /* G7 G6 G5 */\ | |
4248 | 635 "pand "MANGLE(M24B)", %%mm6 \n\t" /* R7 R6 R5 */\ |
2730 | 636 \ |
637 "por %%mm1, %%mm3 \n\t"\ | |
638 "por %%mm3, %%mm6 \n\t"\ | |
639 MOVNTQ(%%mm6, 16(%%ebx))\ | |
640 \ | |
641 "addl $24, %%ebx \n\t"\ | |
642 \ | |
643 "addl $8, %%eax \n\t"\ | |
644 "cmpl %5, %%eax \n\t"\ | |
645 " jb 1b \n\t" | |
646 | |
647 #ifdef HAVE_MMX2 | |
3126 | 648 #undef WRITEBGR24 |
2730 | 649 #define WRITEBGR24 WRITEBGR24MMX2 |
650 #else | |
3126 | 651 #undef WRITEBGR24 |
2730 | 652 #define WRITEBGR24 WRITEBGR24MMX |
653 #endif | |
654 | |
3344 | 655 static inline void RENAME(yuv2yuvX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
656 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
657 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW, |
3344 | 658 int16_t * lumMmxFilter, int16_t * chrMmxFilter) |
2519 | 659 { |
3344 | 660 #ifdef HAVE_MMX |
661 if(uDest != NULL) | |
662 { | |
663 asm volatile( | |
664 YSCALEYUV2YV12X(0) | |
665 :: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
666 "r" (chrMmxFilter+chrFilterSize*4), "r" (uDest), "m" (chrDstW) |
3344 | 667 : "%eax", "%edx", "%esi" |
668 ); | |
2519 | 669 |
3344 | 670 asm volatile( |
671 YSCALEYUV2YV12X(4096) | |
672 :: "m" (-chrFilterSize), "r" (chrSrc+chrFilterSize), | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
673 "r" (chrMmxFilter+chrFilterSize*4), "r" (vDest), "m" (chrDstW) |
3344 | 674 : "%eax", "%edx", "%esi" |
675 ); | |
676 } | |
2521 | 677 |
3344 | 678 asm volatile( |
679 YSCALEYUV2YV12X(0) | |
680 :: "m" (-lumFilterSize), "r" (lumSrc+lumFilterSize), | |
681 "r" (lumMmxFilter+lumFilterSize*4), "r" (dest), "m" (dstW) | |
682 : "%eax", "%edx", "%esi" | |
683 ); | |
684 #else | |
6540 | 685 yuv2yuvXinC(lumFilter, lumSrc, lumFilterSize, |
3352 | 686 chrFilter, chrSrc, chrFilterSize, |
6540 | 687 dest, uDest, vDest, dstW, chrDstW); |
3344 | 688 #endif |
689 } | |
690 | |
691 static inline void RENAME(yuv2yuv1)(int16_t *lumSrc, int16_t *chrSrc, | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
692 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) |
3344 | 693 { |
694 #ifdef HAVE_MMX | |
695 if(uDest != NULL) | |
696 { | |
697 asm volatile( | |
698 YSCALEYUV2YV121 | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
699 :: "r" (chrSrc + chrDstW), "r" (uDest + chrDstW), |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
700 "g" (-chrDstW) |
3344 | 701 : "%eax" |
702 ); | |
703 | |
704 asm volatile( | |
705 YSCALEYUV2YV121 | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
706 :: "r" (chrSrc + 2048 + chrDstW), "r" (vDest + chrDstW), |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
707 "g" (-chrDstW) |
3344 | 708 : "%eax" |
709 ); | |
2519 | 710 } |
3344 | 711 |
712 asm volatile( | |
713 YSCALEYUV2YV121 | |
714 :: "r" (lumSrc + dstW), "r" (dest + dstW), | |
715 "g" (-dstW) | |
716 : "%eax" | |
717 ); | |
718 #else | |
719 int i; | |
720 for(i=0; i<dstW; i++) | |
721 { | |
722 int val= lumSrc[i]>>7; | |
6503 | 723 |
724 if(val&256){ | |
725 if(val<0) val=0; | |
726 else val=255; | |
727 } | |
3344 | 728 |
6503 | 729 dest[i]= val; |
3344 | 730 } |
731 | |
732 if(uDest != NULL) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
733 for(i=0; i<chrDstW; i++) |
3344 | 734 { |
735 int u=chrSrc[i]>>7; | |
736 int v=chrSrc[i + 2048]>>7; | |
737 | |
6503 | 738 if((u|v)&256){ |
739 if(u<0) u=0; | |
740 else if (u>255) u=255; | |
741 if(v<0) v=0; | |
742 else if (v>255) v=255; | |
743 } | |
744 | |
745 uDest[i]= u; | |
746 vDest[i]= v; | |
3344 | 747 } |
748 #endif | |
2519 | 749 } |
750 | |
3344 | 751 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
752 /** |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
753 * vertical scale YV12 to RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
754 */ |
3344 | 755 static inline void RENAME(yuv2rgbX)(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
756 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
757 uint8_t *dest, int dstW, int dstFormat, int16_t * lumMmxFilter, int16_t * chrMmxFilter) |
3344 | 758 { |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
759 /* if(flags&SWS_FULL_UV_IPOL) |
3344 | 760 { |
761 //FIXME | |
762 }//FULL_UV_IPOL | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
763 else*/ |
3344 | 764 { |
765 #ifdef HAVE_MMX | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
766 if(dstFormat == IMGFMT_BGR32) //FIXME untested |
3344 | 767 { |
768 asm volatile( | |
769 YSCALEYUV2RGBX | |
770 WRITEBGR32 | |
771 | |
772 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
773 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
774 "r" (dest), "m" (dstW), | |
775 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
776 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
777 ); | |
778 } | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
779 else if(dstFormat == IMGFMT_BGR24) //FIXME untested |
3344 | 780 { |
781 asm volatile( | |
782 YSCALEYUV2RGBX | |
783 "leal (%%eax, %%eax, 2), %%ebx \n\t" //FIXME optimize | |
784 "addl %4, %%ebx \n\t" | |
785 WRITEBGR24 | |
786 | |
787 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
788 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
789 "r" (dest), "m" (dstW), | |
790 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
791 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
792 ); | |
793 } | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
794 else if(dstFormat==IMGFMT_BGR15) |
3344 | 795 { |
796 asm volatile( | |
797 YSCALEYUV2RGBX | |
798 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
799 #ifdef DITHER1XBPP | |
4248 | 800 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
801 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
802 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
3344 | 803 #endif |
804 | |
805 WRITEBGR15 | |
806 | |
807 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
808 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
809 "r" (dest), "m" (dstW), | |
810 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
811 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
812 ); | |
813 } | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
814 else if(dstFormat==IMGFMT_BGR16) |
3344 | 815 { |
816 asm volatile( | |
817 YSCALEYUV2RGBX | |
818 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ | |
819 #ifdef DITHER1XBPP | |
4248 | 820 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
821 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
822 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
3344 | 823 #endif |
824 | |
825 WRITEBGR16 | |
826 | |
827 :: "m" (-lumFilterSize), "m" (-chrFilterSize), | |
828 "m" (lumMmxFilter+lumFilterSize*4), "m" (chrMmxFilter+chrFilterSize*4), | |
829 "r" (dest), "m" (dstW), | |
830 "m" (lumSrc+lumFilterSize), "m" (chrSrc+chrFilterSize) | |
831 : "%eax", "%ebx", "%ecx", "%edx", "%esi" | |
832 ); | |
833 } | |
834 #else | |
3352 | 835 yuv2rgbXinC(lumFilter, lumSrc, lumFilterSize, |
836 chrFilter, chrSrc, chrFilterSize, | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
837 dest, dstW, dstFormat); |
3344 | 838 |
839 #endif | |
840 } //!FULL_UV_IPOL | |
841 } | |
842 | |
843 | |
844 /** | |
845 * vertical bilinear scale YV12 to RGB | |
846 */ | |
847 static inline void RENAME(yuv2rgb2)(uint16_t *buf0, uint16_t *buf1, uint16_t *uvbuf0, uint16_t *uvbuf1, | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
848 uint8_t *dest, int dstW, int yalpha, int uvalpha, int dstFormat, int flags) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
849 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
850 int yalpha1=yalpha^4095; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
851 int uvalpha1=uvalpha^4095; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
852 |
4467 | 853 if(flags&SWS_FULL_CHR_H_INT) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
854 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
855 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
856 #ifdef HAVE_MMX |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
857 if(dstFormat==IMGFMT_BGR32) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
858 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
859 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
860 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
861 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
862 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
863 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
864 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
865 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
866 "movq %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
867 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
868 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
869 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
870 MOVNTQ(%%mm3, (%4, %%eax, 4)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
871 MOVNTQ(%%mm1, 8(%4, %%eax, 4)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
872 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
873 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
874 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
875 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
876 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
877 |
3209 | 878 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
879 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
880 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
881 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
882 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
883 else if(dstFormat==IMGFMT_BGR24) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
884 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
885 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
886 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
887 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
888 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
889 // lsb ... msb |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
890 "punpcklbw %%mm1, %%mm3 \n\t" // BGBGBGBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
891 "punpcklbw %%mm7, %%mm0 \n\t" // R0R0R0R0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
892 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
893 "movq %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
894 "punpcklwd %%mm0, %%mm3 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
895 "punpckhwd %%mm0, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
896 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
897 "movq %%mm3, %%mm2 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
898 "psrlq $8, %%mm3 \n\t" // GR0BGR00 |
4248 | 899 "pand "MANGLE(bm00000111)", %%mm2\n\t" // BGR00000 |
900 "pand "MANGLE(bm11111000)", %%mm3\n\t" // 000BGR00 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
901 "por %%mm2, %%mm3 \n\t" // BGRBGR00 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
902 "movq %%mm1, %%mm2 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
903 "psllq $48, %%mm1 \n\t" // 000000BG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
904 "por %%mm1, %%mm3 \n\t" // BGRBGRBG |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
905 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
906 "movq %%mm2, %%mm1 \n\t" // BGR0BGR0 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
907 "psrld $16, %%mm2 \n\t" // R000R000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
908 "psrlq $24, %%mm1 \n\t" // 0BGR0000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
909 "por %%mm2, %%mm1 \n\t" // RBGRR000 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
910 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
911 "movl %4, %%ebx \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
912 "addl %%eax, %%ebx \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
913 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
914 #ifdef HAVE_MMX2 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
915 //FIXME Alignment |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
916 "movntq %%mm3, (%%ebx, %%eax, 2)\n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
917 "movntq %%mm1, 8(%%ebx, %%eax, 2)\n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
918 #else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
919 "movd %%mm3, (%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
920 "psrlq $32, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
921 "movd %%mm3, 4(%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
922 "movd %%mm1, 8(%%ebx, %%eax, 2) \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
923 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
924 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
925 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
926 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
927 |
3209 | 928 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
929 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
930 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
931 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
932 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
933 else if(dstFormat==IMGFMT_BGR15) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
934 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
935 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
936 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
937 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
938 #ifdef DITHER1XBPP |
4248 | 939 "paddusb "MANGLE(g5Dither)", %%mm1\n\t" |
940 "paddusb "MANGLE(r5Dither)", %%mm0\n\t" | |
941 "paddusb "MANGLE(b5Dither)", %%mm3\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
942 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
943 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
944 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
945 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
946 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
947 "psrlw $3, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
948 "psllw $2, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
949 "psllw $7, %%mm0 \n\t" |
4248 | 950 "pand "MANGLE(g15Mask)", %%mm1 \n\t" |
951 "pand "MANGLE(r15Mask)", %%mm0 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
952 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
953 "por %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
954 "por %%mm1, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
955 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
956 MOVNTQ(%%mm0, (%4, %%eax, 2)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
957 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
958 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
959 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
960 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
961 |
3209 | 962 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
963 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
964 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
965 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
966 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
967 else if(dstFormat==IMGFMT_BGR16) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
968 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
969 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
970 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
971 FULL_YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
972 #ifdef DITHER1XBPP |
4248 | 973 "paddusb "MANGLE(g6Dither)", %%mm1\n\t" |
974 "paddusb "MANGLE(r5Dither)", %%mm0\n\t" | |
975 "paddusb "MANGLE(b5Dither)", %%mm3\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
976 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
977 "punpcklbw %%mm7, %%mm1 \n\t" // 0G0G0G0G |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
978 "punpcklbw %%mm7, %%mm3 \n\t" // 0B0B0B0B |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
979 "punpcklbw %%mm7, %%mm0 \n\t" // 0R0R0R0R |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
980 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
981 "psrlw $3, %%mm3 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
982 "psllw $3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
983 "psllw $8, %%mm0 \n\t" |
4248 | 984 "pand "MANGLE(g16Mask)", %%mm1 \n\t" |
985 "pand "MANGLE(r16Mask)", %%mm0 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
986 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
987 "por %%mm3, %%mm1 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
988 "por %%mm1, %%mm0 \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
989 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
990 MOVNTQ(%%mm0, (%4, %%eax, 2)) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
991 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
992 "addl $4, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
993 "cmpl %5, %%eax \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
994 " jb 1b \n\t" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
995 |
3209 | 996 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
997 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
998 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
999 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1000 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1001 #else |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1002 if(dstFormat==IMGFMT_BGR32) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1003 { |
4794 | 1004 int i; |
4793 | 1005 #ifdef WORDS_BIGENDIAN |
1006 dest++; | |
1007 #endif | |
3209 | 1008 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1009 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1010 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1011 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1012 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
2503 | 1013 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; |
1014 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; | |
1015 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1016 dest+= 4; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1017 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1018 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1019 else if(dstFormat==IMGFMT_BGR24) |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1020 { |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1021 int i; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1022 for(i=0;i<dstW;i++){ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1023 // vertical linear interpolation && yuv2rgb in a single step: |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1024 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1025 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1026 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1027 dest[0]=clip_table[((Y + yuvtab_40cf[U]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1028 dest[1]=clip_table[((Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1029 dest[2]=clip_table[((Y + yuvtab_3343[V]) >>13)]; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1030 dest+= 3; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1031 } |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1032 } |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1033 else if(dstFormat==IMGFMT_BGR16) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1034 { |
2671 | 1035 int i; |
3209 | 1036 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1037 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1038 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1039 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1040 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1041 |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1042 ((uint16_t*)dest)[i] = |
2584 | 1043 clip_table16b[(Y + yuvtab_40cf[U]) >>13] | |
1044 clip_table16g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | | |
1045 clip_table16r[(Y + yuvtab_3343[V]) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1046 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1047 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1048 else if(dstFormat==IMGFMT_BGR15) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1049 { |
2671 | 1050 int i; |
3209 | 1051 for(i=0;i<dstW;i++){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1052 // vertical linear interpolation && yuv2rgb in a single step: |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1053 int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1054 int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1055 int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1056 |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1057 ((uint16_t*)dest)[i] = |
2584 | 1058 clip_table15b[(Y + yuvtab_40cf[U]) >>13] | |
1059 clip_table15g[(Y + yuvtab_1a1e[V] + yuvtab_0c92[U]) >>13] | | |
1060 clip_table15r[(Y + yuvtab_3343[V]) >>13]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1061 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1062 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1063 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1064 }//FULL_UV_IPOL |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1065 else |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1066 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1067 #ifdef HAVE_MMX |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1068 if(dstFormat==IMGFMT_BGR32) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1069 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1070 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1071 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1072 WRITEBGR32 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1073 |
3209 | 1074 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1075 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1076 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1077 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1078 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1079 else if(dstFormat==IMGFMT_BGR24) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1080 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1081 asm volatile( |
2728 | 1082 "movl %4, %%ebx \n\t" |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1083 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1084 WRITEBGR24 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1085 |
3209 | 1086 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1087 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1088 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1089 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1090 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1091 else if(dstFormat==IMGFMT_BGR15) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1092 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1093 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1094 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1095 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1096 #ifdef DITHER1XBPP |
4248 | 1097 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1098 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1099 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1100 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1101 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1102 WRITEBGR15 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1103 |
3209 | 1104 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1105 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1106 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1107 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1108 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1109 else if(dstFormat==IMGFMT_BGR16) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1110 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1111 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1112 YSCALEYUV2RGB |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1113 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1114 #ifdef DITHER1XBPP |
4248 | 1115 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1116 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1117 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1118 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1119 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1120 WRITEBGR16 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1121 |
3209 | 1122 :: "r" (buf0), "r" (buf1), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1123 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1124 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1125 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1126 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1127 #else |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1128 if(dstFormat==IMGFMT_BGR32) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1129 { |
4794 | 1130 int i; |
4793 | 1131 #ifdef WORDS_BIGENDIAN |
1132 dest++; | |
1133 #endif | |
3209 | 1134 for(i=0; i<dstW-1; i+=2){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1135 // vertical linear interpolation && yuv2rgb in a single step: |
2575 | 1136 int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
1137 int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; | |
2585 | 1138 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1139 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2575 | 1140 |
1141 int Cb= yuvtab_40cf[U]; | |
1142 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1143 int Cr= yuvtab_3343[V]; | |
1144 | |
1145 dest[4*i+0]=clip_table[((Y1 + Cb) >>13)]; | |
1146 dest[4*i+1]=clip_table[((Y1 + Cg) >>13)]; | |
1147 dest[4*i+2]=clip_table[((Y1 + Cr) >>13)]; | |
1148 | |
1149 dest[4*i+4]=clip_table[((Y2 + Cb) >>13)]; | |
1150 dest[4*i+5]=clip_table[((Y2 + Cg) >>13)]; | |
1151 dest[4*i+6]=clip_table[((Y2 + Cr) >>13)]; | |
1152 } | |
1153 } | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1154 else if(dstFormat==IMGFMT_BGR24) |
2575 | 1155 { |
2671 | 1156 int i; |
3209 | 1157 for(i=0; i<dstW-1; i+=2){ |
2575 | 1158 // vertical linear interpolation && yuv2rgb in a single step: |
1159 int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; | |
1160 int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; | |
2585 | 1161 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1162 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2575 | 1163 |
1164 int Cb= yuvtab_40cf[U]; | |
1165 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1166 int Cr= yuvtab_3343[V]; | |
1167 | |
1168 dest[0]=clip_table[((Y1 + Cb) >>13)]; | |
1169 dest[1]=clip_table[((Y1 + Cg) >>13)]; | |
1170 dest[2]=clip_table[((Y1 + Cr) >>13)]; | |
1171 | |
1172 dest[3]=clip_table[((Y2 + Cb) >>13)]; | |
1173 dest[4]=clip_table[((Y2 + Cg) >>13)]; | |
1174 dest[5]=clip_table[((Y2 + Cr) >>13)]; | |
1175 dest+=6; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1176 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1177 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1178 else if(dstFormat==IMGFMT_BGR16) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1179 { |
2671 | 1180 int i; |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1181 #ifdef DITHER1XBPP |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1182 static int ditherb1=1<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1183 static int ditherg1=1<<13; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1184 static int ditherr1=2<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1185 static int ditherb2=3<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1186 static int ditherg2=3<<13; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1187 static int ditherr2=0<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1188 |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1189 ditherb1 ^= (1^2)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1190 ditherg1 ^= (1^2)<<13; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1191 ditherr1 ^= (1^2)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1192 ditherb2 ^= (3^0)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1193 ditherg2 ^= (3^0)<<13; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1194 ditherr2 ^= (3^0)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1195 #else |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1196 const int ditherb1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1197 const int ditherg1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1198 const int ditherr1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1199 const int ditherb2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1200 const int ditherg2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1201 const int ditherr2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1202 #endif |
3209 | 1203 for(i=0; i<dstW-1; i+=2){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1204 // vertical linear interpolation && yuv2rgb in a single step: |
2575 | 1205 int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
1206 int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; | |
2585 | 1207 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1208 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1209 |
2575 | 1210 int Cb= yuvtab_40cf[U]; |
1211 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1212 int Cr= yuvtab_3343[V]; | |
1213 | |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1214 ((uint16_t*)dest)[i] = |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1215 clip_table16b[(Y1 + Cb + ditherb1) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1216 clip_table16g[(Y1 + Cg + ditherg1) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1217 clip_table16r[(Y1 + Cr + ditherr1) >>13]; |
2575 | 1218 |
1219 ((uint16_t*)dest)[i+1] = | |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1220 clip_table16b[(Y2 + Cb + ditherb2) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1221 clip_table16g[(Y2 + Cg + ditherg2) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1222 clip_table16r[(Y2 + Cr + ditherr2) >>13]; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1223 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1224 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1225 else if(dstFormat==IMGFMT_BGR15) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1226 { |
2671 | 1227 int i; |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1228 #ifdef DITHER1XBPP |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1229 static int ditherb1=1<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1230 static int ditherg1=1<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1231 static int ditherr1=2<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1232 static int ditherb2=3<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1233 static int ditherg2=3<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1234 static int ditherr2=0<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1235 |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1236 ditherb1 ^= (1^2)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1237 ditherg1 ^= (1^2)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1238 ditherr1 ^= (1^2)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1239 ditherb2 ^= (3^0)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1240 ditherg2 ^= (3^0)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1241 ditherr2 ^= (3^0)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1242 #else |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1243 const int ditherb1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1244 const int ditherg1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1245 const int ditherr1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1246 const int ditherb2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1247 const int ditherg2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1248 const int ditherr2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1249 #endif |
3209 | 1250 for(i=0; i<dstW-1; i+=2){ |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1251 // vertical linear interpolation && yuv2rgb in a single step: |
2575 | 1252 int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)]; |
1253 int Y2=yuvtab_2568[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19)]; | |
2585 | 1254 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1255 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1256 |
2575 | 1257 int Cb= yuvtab_40cf[U]; |
1258 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1259 int Cr= yuvtab_3343[V]; | |
1260 | |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1261 ((uint16_t*)dest)[i] = |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1262 clip_table15b[(Y1 + Cb + ditherb1) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1263 clip_table15g[(Y1 + Cg + ditherg1) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1264 clip_table15r[(Y1 + Cr + ditherr1) >>13]; |
2584 | 1265 |
2575 | 1266 ((uint16_t*)dest)[i+1] = |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1267 clip_table15b[(Y2 + Cb + ditherb2) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1268 clip_table15g[(Y2 + Cg + ditherg2) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1269 clip_table15r[(Y2 + Cr + ditherr2) >>13]; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1270 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1271 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1272 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1273 } //!FULL_UV_IPOL |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1274 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1275 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1276 /** |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1277 * YV12 to RGB without scaling or interpolating |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1278 */ |
3344 | 1279 static inline void RENAME(yuv2rgb1)(uint16_t *buf0, uint16_t *uvbuf0, uint16_t *uvbuf1, |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1280 uint8_t *dest, int dstW, int uvalpha, int dstFormat, int flags) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1281 { |
2671 | 1282 int uvalpha1=uvalpha^4095; |
3344 | 1283 const int yalpha1=0; |
2671 | 1284 |
4467 | 1285 if(flags&SWS_FULL_CHR_H_INT) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1286 { |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1287 RENAME(yuv2rgb2)(buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, dstFormat, flags); |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1288 return; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1289 } |
2576 | 1290 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1291 #ifdef HAVE_MMX |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1292 if( uvalpha < 2048 ) // note this is not correct (shifts chrominance by 0.5 pixels) but its a bit faster |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1293 { |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1294 if(dstFormat==IMGFMT_BGR32) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1295 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1296 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1297 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1298 WRITEBGR32 |
3344 | 1299 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1300 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1301 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1302 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1303 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1304 else if(dstFormat==IMGFMT_BGR24) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1305 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1306 asm volatile( |
2728 | 1307 "movl %4, %%ebx \n\t" |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1308 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1309 WRITEBGR24 |
3344 | 1310 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1311 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1312 : "%eax", "%ebx" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1313 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1314 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1315 else if(dstFormat==IMGFMT_BGR15) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1316 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1317 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1318 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1319 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1320 #ifdef DITHER1XBPP |
4248 | 1321 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1322 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1323 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1324 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1325 WRITEBGR15 |
3344 | 1326 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1327 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1328 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1329 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1330 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1331 else if(dstFormat==IMGFMT_BGR16) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1332 { |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1333 asm volatile( |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1334 YSCALEYUV2RGB1 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1335 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1336 #ifdef DITHER1XBPP |
4248 | 1337 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1338 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1339 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1340 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1341 |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1342 WRITEBGR16 |
3344 | 1343 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1344 "m" (yalpha1), "m" (uvalpha1) |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1345 : "%eax" |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1346 ); |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1347 } |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1348 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1349 else |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1350 { |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1351 if(dstFormat==IMGFMT_BGR32) |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1352 { |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1353 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1354 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1355 WRITEBGR32 |
3344 | 1356 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1357 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1358 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1359 ); |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1360 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1361 else if(dstFormat==IMGFMT_BGR24) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1362 { |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1363 asm volatile( |
2728 | 1364 "movl %4, %%ebx \n\t" |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1365 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1366 WRITEBGR24 |
3344 | 1367 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "m" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1368 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1369 : "%eax", "%ebx" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1370 ); |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1371 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1372 else if(dstFormat==IMGFMT_BGR15) |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1373 { |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1374 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1375 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1376 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1377 #ifdef DITHER1XBPP |
4248 | 1378 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1379 "paddusb "MANGLE(g5Dither)", %%mm4\n\t" | |
1380 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1381 #endif |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1382 WRITEBGR15 |
3344 | 1383 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1384 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1385 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1386 ); |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1387 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1388 else if(dstFormat==IMGFMT_BGR16) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1389 { |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1390 asm volatile( |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1391 YSCALEYUV2RGB1b |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1392 /* mm2=B, %%mm4=G, %%mm5=R, %%mm7=0 */ |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1393 #ifdef DITHER1XBPP |
4248 | 1394 "paddusb "MANGLE(b5Dither)", %%mm2\n\t" |
1395 "paddusb "MANGLE(g6Dither)", %%mm4\n\t" | |
1396 "paddusb "MANGLE(r5Dither)", %%mm5\n\t" | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1397 #endif |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1398 |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1399 WRITEBGR16 |
3344 | 1400 :: "r" (buf0), "r" (buf0), "r" (uvbuf0), "r" (uvbuf1), "r" (dest), "m" (dstW), |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1401 "m" (yalpha1), "m" (uvalpha1) |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1402 : "%eax" |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1403 ); |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1404 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1405 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1406 #else |
2576 | 1407 //FIXME write 2 versions (for even & odd lines) |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1408 |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1409 if(dstFormat==IMGFMT_BGR32) |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1410 { |
4794 | 1411 int i; |
4793 | 1412 #ifdef WORDS_BIGENDIAN |
1413 dest++; | |
1414 #endif | |
3209 | 1415 for(i=0; i<dstW-1; i+=2){ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1416 // vertical linear interpolation && yuv2rgb in a single step: |
2576 | 1417 int Y1=yuvtab_2568[buf0[i]>>7]; |
1418 int Y2=yuvtab_2568[buf0[i+1]>>7]; | |
2585 | 1419 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1420 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2576 | 1421 |
1422 int Cb= yuvtab_40cf[U]; | |
1423 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1424 int Cr= yuvtab_3343[V]; | |
1425 | |
1426 dest[4*i+0]=clip_table[((Y1 + Cb) >>13)]; | |
1427 dest[4*i+1]=clip_table[((Y1 + Cg) >>13)]; | |
1428 dest[4*i+2]=clip_table[((Y1 + Cr) >>13)]; | |
1429 | |
1430 dest[4*i+4]=clip_table[((Y2 + Cb) >>13)]; | |
1431 dest[4*i+5]=clip_table[((Y2 + Cg) >>13)]; | |
1432 dest[4*i+6]=clip_table[((Y2 + Cr) >>13)]; | |
1433 } | |
1434 } | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1435 else if(dstFormat==IMGFMT_BGR24) |
2576 | 1436 { |
2671 | 1437 int i; |
3209 | 1438 for(i=0; i<dstW-1; i+=2){ |
2576 | 1439 // vertical linear interpolation && yuv2rgb in a single step: |
1440 int Y1=yuvtab_2568[buf0[i]>>7]; | |
1441 int Y2=yuvtab_2568[buf0[i+1]>>7]; | |
2585 | 1442 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1443 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2576 | 1444 |
1445 int Cb= yuvtab_40cf[U]; | |
1446 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1447 int Cr= yuvtab_3343[V]; | |
1448 | |
1449 dest[0]=clip_table[((Y1 + Cb) >>13)]; | |
1450 dest[1]=clip_table[((Y1 + Cg) >>13)]; | |
1451 dest[2]=clip_table[((Y1 + Cr) >>13)]; | |
1452 | |
1453 dest[3]=clip_table[((Y2 + Cb) >>13)]; | |
1454 dest[4]=clip_table[((Y2 + Cg) >>13)]; | |
1455 dest[5]=clip_table[((Y2 + Cr) >>13)]; | |
1456 dest+=6; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1457 } |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1458 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1459 else if(dstFormat==IMGFMT_BGR16) |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1460 { |
2671 | 1461 int i; |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1462 #ifdef DITHER1XBPP |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1463 static int ditherb1=1<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1464 static int ditherg1=1<<13; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1465 static int ditherr1=2<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1466 static int ditherb2=3<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1467 static int ditherg2=3<<13; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1468 static int ditherr2=0<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1469 |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1470 ditherb1 ^= (1^2)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1471 ditherg1 ^= (1^2)<<13; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1472 ditherr1 ^= (1^2)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1473 ditherb2 ^= (3^0)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1474 ditherg2 ^= (3^0)<<13; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1475 ditherr2 ^= (3^0)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1476 #else |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1477 const int ditherb1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1478 const int ditherg1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1479 const int ditherr1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1480 const int ditherb2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1481 const int ditherg2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1482 const int ditherr2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1483 #endif |
3209 | 1484 for(i=0; i<dstW-1; i+=2){ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1485 // vertical linear interpolation && yuv2rgb in a single step: |
2576 | 1486 int Y1=yuvtab_2568[buf0[i]>>7]; |
1487 int Y2=yuvtab_2568[buf0[i+1]>>7]; | |
2585 | 1488 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1489 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1490 |
2576 | 1491 int Cb= yuvtab_40cf[U]; |
1492 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1493 int Cr= yuvtab_3343[V]; | |
1494 | |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1495 ((uint16_t*)dest)[i] = |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1496 clip_table16b[(Y1 + Cb + ditherb1) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1497 clip_table16g[(Y1 + Cg + ditherg1) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1498 clip_table16r[(Y1 + Cr + ditherr1) >>13]; |
2576 | 1499 |
1500 ((uint16_t*)dest)[i+1] = | |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1501 clip_table16b[(Y2 + Cb + ditherb2) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1502 clip_table16g[(Y2 + Cg + ditherg2) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1503 clip_table16r[(Y2 + Cr + ditherr2) >>13]; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1504 } |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1505 } |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
1506 else if(dstFormat==IMGFMT_BGR15) |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1507 { |
2671 | 1508 int i; |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1509 #ifdef DITHER1XBPP |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1510 static int ditherb1=1<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1511 static int ditherg1=1<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1512 static int ditherr1=2<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1513 static int ditherb2=3<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1514 static int ditherg2=3<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1515 static int ditherr2=0<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1516 |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1517 ditherb1 ^= (1^2)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1518 ditherg1 ^= (1^2)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1519 ditherr1 ^= (1^2)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1520 ditherb2 ^= (3^0)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1521 ditherg2 ^= (3^0)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1522 ditherr2 ^= (3^0)<<14; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1523 #else |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1524 const int ditherb1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1525 const int ditherg1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1526 const int ditherr1=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1527 const int ditherb2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1528 const int ditherg2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1529 const int ditherr2=0; |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1530 #endif |
3209 | 1531 for(i=0; i<dstW-1; i+=2){ |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1532 // vertical linear interpolation && yuv2rgb in a single step: |
2576 | 1533 int Y1=yuvtab_2568[buf0[i]>>7]; |
1534 int Y2=yuvtab_2568[buf0[i+1]>>7]; | |
2585 | 1535 int U=((uvbuf0[i>>1]*uvalpha1+uvbuf1[i>>1]*uvalpha)>>19); |
1536 int V=((uvbuf0[(i>>1)+2048]*uvalpha1+uvbuf1[(i>>1)+2048]*uvalpha)>>19); | |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1537 |
2576 | 1538 int Cb= yuvtab_40cf[U]; |
1539 int Cg= yuvtab_1a1e[V] + yuvtab_0c92[U]; | |
1540 int Cr= yuvtab_3343[V]; | |
1541 | |
2572
f2353173d52c
c optimizations (array is faster than pointer) (16bpp variants tested and 2% faster)
michael
parents:
2569
diff
changeset
|
1542 ((uint16_t*)dest)[i] = |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1543 clip_table15b[(Y1 + Cb + ditherb1) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1544 clip_table15g[(Y1 + Cg + ditherg1) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1545 clip_table15r[(Y1 + Cr + ditherr1) >>13]; |
2584 | 1546 |
2576 | 1547 ((uint16_t*)dest)[i+1] = |
4297
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1548 clip_table15b[(Y2 + Cb + ditherb2) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1549 clip_table15g[(Y2 + Cg + ditherg2) >>13] | |
29fef3982238
15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out)
michael
parents:
4295
diff
changeset
|
1550 clip_table15r[(Y2 + Cr + ditherr2) >>13]; |
2569
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1551 } |
30b736e7feef
interpolate chrominance for every second line in the 1:1 vertical scale function
michael
parents:
2566
diff
changeset
|
1552 } |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1553 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1554 } |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
1555 |
4481 | 1556 //FIXME yuy2* can read upto 7 samples to much |
1557 | |
4467 | 1558 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width) |
1559 { | |
4481 | 1560 #ifdef HAVE_MMX |
1561 asm volatile( | |
1562 "movq "MANGLE(bm01010101)", %%mm2\n\t" | |
1563 "movl %0, %%eax \n\t" | |
1564 "1: \n\t" | |
1565 "movq (%1, %%eax,2), %%mm0 \n\t" | |
1566 "movq 8(%1, %%eax,2), %%mm1 \n\t" | |
1567 "pand %%mm2, %%mm0 \n\t" | |
1568 "pand %%mm2, %%mm1 \n\t" | |
1569 "packuswb %%mm1, %%mm0 \n\t" | |
1570 "movq %%mm0, (%2, %%eax) \n\t" | |
1571 "addl $8, %%eax \n\t" | |
1572 " js 1b \n\t" | |
1573 : : "g" (-width), "r" (src+width*2), "r" (dst+width) | |
1574 : "%eax" | |
1575 ); | |
4467 | 1576 #else |
1577 int i; | |
1578 for(i=0; i<width; i++) | |
1579 dst[i]= src[2*i]; | |
1580 #endif | |
1581 } | |
1582 | |
1583 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1584 { | |
4481 | 1585 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) |
1586 asm volatile( | |
1587 "movq "MANGLE(bm01010101)", %%mm4\n\t" | |
1588 "movl %0, %%eax \n\t" | |
1589 "1: \n\t" | |
1590 "movq (%1, %%eax,4), %%mm0 \n\t" | |
1591 "movq 8(%1, %%eax,4), %%mm1 \n\t" | |
1592 "movq (%2, %%eax,4), %%mm2 \n\t" | |
1593 "movq 8(%2, %%eax,4), %%mm3 \n\t" | |
1594 PAVGB(%%mm2, %%mm0) | |
1595 PAVGB(%%mm3, %%mm1) | |
1596 "psrlw $8, %%mm0 \n\t" | |
1597 "psrlw $8, %%mm1 \n\t" | |
1598 "packuswb %%mm1, %%mm0 \n\t" | |
1599 "movq %%mm0, %%mm1 \n\t" | |
1600 "psrlw $8, %%mm0 \n\t" | |
1601 "pand %%mm4, %%mm1 \n\t" | |
1602 "packuswb %%mm0, %%mm0 \n\t" | |
1603 "packuswb %%mm1, %%mm1 \n\t" | |
1604 "movd %%mm0, (%4, %%eax) \n\t" | |
1605 "movd %%mm1, (%3, %%eax) \n\t" | |
1606 "addl $4, %%eax \n\t" | |
1607 " js 1b \n\t" | |
1608 : : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width) | |
1609 : "%eax" | |
1610 ); | |
4467 | 1611 #else |
1612 int i; | |
1613 for(i=0; i<width; i++) | |
1614 { | |
1615 dstU[i]= (src1[4*i + 1] + src2[4*i + 1])>>1; | |
1616 dstV[i]= (src1[4*i + 3] + src2[4*i + 3])>>1; | |
1617 } | |
1618 #endif | |
1619 } | |
1620 | |
1621 static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width) | |
1622 { | |
1623 #ifdef HAVE_MMXFIXME | |
1624 #else | |
1625 int i; | |
1626 for(i=0; i<width; i++) | |
1627 { | |
1628 int b= src[i*4+0]; | |
1629 int g= src[i*4+1]; | |
1630 int r= src[i*4+2]; | |
1631 | |
1632 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1633 } | |
1634 #endif | |
1635 } | |
1636 | |
1637 static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1638 { | |
1639 #ifdef HAVE_MMXFIXME | |
1640 #else | |
1641 int i; | |
1642 for(i=0; i<width; i++) | |
1643 { | |
1644 int b= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4]; | |
1645 int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5]; | |
1646 int r= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6]; | |
1647 | |
1648 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1649 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1650 } | |
1651 #endif | |
1652 } | |
1653 | |
1654 static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, int width) | |
1655 { | |
4612 | 1656 #ifdef HAVE_MMX |
1657 asm volatile( | |
1658 "movl %2, %%eax \n\t" | |
4923 | 1659 "movq "MANGLE(bgr2YCoeff)", %%mm6 \n\t" |
1660 "movq "MANGLE(w1111)", %%mm5 \n\t" | |
4612 | 1661 "pxor %%mm7, %%mm7 \n\t" |
1662 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
1663 ".balign 16 \n\t" | |
1664 "1: \n\t" | |
1665 PREFETCH" 64(%0, %%ebx) \n\t" | |
1666 "movd (%0, %%ebx), %%mm0 \n\t" | |
1667 "movd 3(%0, %%ebx), %%mm1 \n\t" | |
1668 "punpcklbw %%mm7, %%mm0 \n\t" | |
1669 "punpcklbw %%mm7, %%mm1 \n\t" | |
1670 "movd 6(%0, %%ebx), %%mm2 \n\t" | |
1671 "movd 9(%0, %%ebx), %%mm3 \n\t" | |
1672 "punpcklbw %%mm7, %%mm2 \n\t" | |
1673 "punpcklbw %%mm7, %%mm3 \n\t" | |
1674 "pmaddwd %%mm6, %%mm0 \n\t" | |
1675 "pmaddwd %%mm6, %%mm1 \n\t" | |
1676 "pmaddwd %%mm6, %%mm2 \n\t" | |
1677 "pmaddwd %%mm6, %%mm3 \n\t" | |
1678 #ifndef FAST_BGR2YV12 | |
1679 "psrad $8, %%mm0 \n\t" | |
1680 "psrad $8, %%mm1 \n\t" | |
1681 "psrad $8, %%mm2 \n\t" | |
1682 "psrad $8, %%mm3 \n\t" | |
1683 #endif | |
1684 "packssdw %%mm1, %%mm0 \n\t" | |
1685 "packssdw %%mm3, %%mm2 \n\t" | |
1686 "pmaddwd %%mm5, %%mm0 \n\t" | |
1687 "pmaddwd %%mm5, %%mm2 \n\t" | |
1688 "packssdw %%mm2, %%mm0 \n\t" | |
1689 "psraw $7, %%mm0 \n\t" | |
1690 | |
1691 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
1692 "movd 15(%0, %%ebx), %%mm1 \n\t" | |
1693 "punpcklbw %%mm7, %%mm4 \n\t" | |
1694 "punpcklbw %%mm7, %%mm1 \n\t" | |
1695 "movd 18(%0, %%ebx), %%mm2 \n\t" | |
1696 "movd 21(%0, %%ebx), %%mm3 \n\t" | |
1697 "punpcklbw %%mm7, %%mm2 \n\t" | |
1698 "punpcklbw %%mm7, %%mm3 \n\t" | |
1699 "pmaddwd %%mm6, %%mm4 \n\t" | |
1700 "pmaddwd %%mm6, %%mm1 \n\t" | |
1701 "pmaddwd %%mm6, %%mm2 \n\t" | |
1702 "pmaddwd %%mm6, %%mm3 \n\t" | |
1703 #ifndef FAST_BGR2YV12 | |
1704 "psrad $8, %%mm4 \n\t" | |
1705 "psrad $8, %%mm1 \n\t" | |
1706 "psrad $8, %%mm2 \n\t" | |
1707 "psrad $8, %%mm3 \n\t" | |
1708 #endif | |
1709 "packssdw %%mm1, %%mm4 \n\t" | |
1710 "packssdw %%mm3, %%mm2 \n\t" | |
1711 "pmaddwd %%mm5, %%mm4 \n\t" | |
1712 "pmaddwd %%mm5, %%mm2 \n\t" | |
1713 "addl $24, %%ebx \n\t" | |
1714 "packssdw %%mm2, %%mm4 \n\t" | |
1715 "psraw $7, %%mm4 \n\t" | |
1716 | |
1717 "packuswb %%mm4, %%mm0 \n\t" | |
4923 | 1718 "paddusb "MANGLE(bgr2YOffset)", %%mm0 \n\t" |
4612 | 1719 |
4619 | 1720 "movq %%mm0, (%1, %%eax) \n\t" |
4612 | 1721 "addl $8, %%eax \n\t" |
1722 " js 1b \n\t" | |
1723 : : "r" (src+width*3), "r" (dst+width), "g" (-width) | |
1724 : "%eax", "%ebx" | |
1725 ); | |
4467 | 1726 #else |
1727 int i; | |
1728 for(i=0; i<width; i++) | |
1729 { | |
1730 int b= src[i*3+0]; | |
1731 int g= src[i*3+1]; | |
1732 int r= src[i*3+2]; | |
1733 | |
1734 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
1735 } | |
1736 #endif | |
1737 } | |
1738 | |
1739 static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1740 { | |
4619 | 1741 #ifdef HAVE_MMX |
1742 asm volatile( | |
1743 "movl %4, %%eax \n\t" | |
4923 | 1744 "movq "MANGLE(w1111)", %%mm5 \n\t" |
1745 "movq "MANGLE(bgr2UCoeff)", %%mm6 \n\t" | |
4619 | 1746 "pxor %%mm7, %%mm7 \n\t" |
1747 "leal (%%eax, %%eax, 2), %%ebx \n\t" | |
1748 "addl %%ebx, %%ebx \n\t" | |
1749 ".balign 16 \n\t" | |
1750 "1: \n\t" | |
1751 PREFETCH" 64(%0, %%ebx) \n\t" | |
1752 PREFETCH" 64(%1, %%ebx) \n\t" | |
1753 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1754 "movq (%0, %%ebx), %%mm0 \n\t" | |
1755 "movq (%1, %%ebx), %%mm1 \n\t" | |
1756 "movq 6(%0, %%ebx), %%mm2 \n\t" | |
1757 "movq 6(%1, %%ebx), %%mm3 \n\t" | |
1758 PAVGB(%%mm1, %%mm0) | |
1759 PAVGB(%%mm3, %%mm2) | |
1760 "movq %%mm0, %%mm1 \n\t" | |
1761 "movq %%mm2, %%mm3 \n\t" | |
1762 "psrlq $24, %%mm0 \n\t" | |
1763 "psrlq $24, %%mm2 \n\t" | |
1764 PAVGB(%%mm1, %%mm0) | |
1765 PAVGB(%%mm3, %%mm2) | |
1766 "punpcklbw %%mm7, %%mm0 \n\t" | |
1767 "punpcklbw %%mm7, %%mm2 \n\t" | |
1768 #else | |
1769 "movd (%0, %%ebx), %%mm0 \n\t" | |
1770 "movd (%1, %%ebx), %%mm1 \n\t" | |
1771 "movd 3(%0, %%ebx), %%mm2 \n\t" | |
1772 "movd 3(%1, %%ebx), %%mm3 \n\t" | |
1773 "punpcklbw %%mm7, %%mm0 \n\t" | |
1774 "punpcklbw %%mm7, %%mm1 \n\t" | |
1775 "punpcklbw %%mm7, %%mm2 \n\t" | |
1776 "punpcklbw %%mm7, %%mm3 \n\t" | |
1777 "paddw %%mm1, %%mm0 \n\t" | |
1778 "paddw %%mm3, %%mm2 \n\t" | |
1779 "paddw %%mm2, %%mm0 \n\t" | |
1780 "movd 6(%0, %%ebx), %%mm4 \n\t" | |
1781 "movd 6(%1, %%ebx), %%mm1 \n\t" | |
1782 "movd 9(%0, %%ebx), %%mm2 \n\t" | |
1783 "movd 9(%1, %%ebx), %%mm3 \n\t" | |
1784 "punpcklbw %%mm7, %%mm4 \n\t" | |
1785 "punpcklbw %%mm7, %%mm1 \n\t" | |
1786 "punpcklbw %%mm7, %%mm2 \n\t" | |
1787 "punpcklbw %%mm7, %%mm3 \n\t" | |
1788 "paddw %%mm1, %%mm4 \n\t" | |
1789 "paddw %%mm3, %%mm2 \n\t" | |
1790 "paddw %%mm4, %%mm2 \n\t" | |
1791 "psrlw $2, %%mm0 \n\t" | |
1792 "psrlw $2, %%mm2 \n\t" | |
1793 #endif | |
4923 | 1794 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
1795 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4619 | 1796 |
1797 "pmaddwd %%mm0, %%mm1 \n\t" | |
1798 "pmaddwd %%mm2, %%mm3 \n\t" | |
1799 "pmaddwd %%mm6, %%mm0 \n\t" | |
1800 "pmaddwd %%mm6, %%mm2 \n\t" | |
1801 #ifndef FAST_BGR2YV12 | |
1802 "psrad $8, %%mm0 \n\t" | |
1803 "psrad $8, %%mm1 \n\t" | |
1804 "psrad $8, %%mm2 \n\t" | |
1805 "psrad $8, %%mm3 \n\t" | |
1806 #endif | |
1807 "packssdw %%mm2, %%mm0 \n\t" | |
1808 "packssdw %%mm3, %%mm1 \n\t" | |
1809 "pmaddwd %%mm5, %%mm0 \n\t" | |
1810 "pmaddwd %%mm5, %%mm1 \n\t" | |
1811 "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 | |
1812 "psraw $7, %%mm0 \n\t" | |
1813 | |
1814 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW) | |
1815 "movq 12(%0, %%ebx), %%mm4 \n\t" | |
1816 "movq 12(%1, %%ebx), %%mm1 \n\t" | |
1817 "movq 18(%0, %%ebx), %%mm2 \n\t" | |
1818 "movq 18(%1, %%ebx), %%mm3 \n\t" | |
1819 PAVGB(%%mm1, %%mm4) | |
1820 PAVGB(%%mm3, %%mm2) | |
1821 "movq %%mm4, %%mm1 \n\t" | |
1822 "movq %%mm2, %%mm3 \n\t" | |
1823 "psrlq $24, %%mm4 \n\t" | |
1824 "psrlq $24, %%mm2 \n\t" | |
1825 PAVGB(%%mm1, %%mm4) | |
1826 PAVGB(%%mm3, %%mm2) | |
1827 "punpcklbw %%mm7, %%mm4 \n\t" | |
1828 "punpcklbw %%mm7, %%mm2 \n\t" | |
1829 #else | |
1830 "movd 12(%0, %%ebx), %%mm4 \n\t" | |
1831 "movd 12(%1, %%ebx), %%mm1 \n\t" | |
1832 "movd 15(%0, %%ebx), %%mm2 \n\t" | |
1833 "movd 15(%1, %%ebx), %%mm3 \n\t" | |
1834 "punpcklbw %%mm7, %%mm4 \n\t" | |
1835 "punpcklbw %%mm7, %%mm1 \n\t" | |
1836 "punpcklbw %%mm7, %%mm2 \n\t" | |
1837 "punpcklbw %%mm7, %%mm3 \n\t" | |
1838 "paddw %%mm1, %%mm4 \n\t" | |
1839 "paddw %%mm3, %%mm2 \n\t" | |
1840 "paddw %%mm2, %%mm4 \n\t" | |
1841 "movd 18(%0, %%ebx), %%mm5 \n\t" | |
1842 "movd 18(%1, %%ebx), %%mm1 \n\t" | |
1843 "movd 21(%0, %%ebx), %%mm2 \n\t" | |
1844 "movd 21(%1, %%ebx), %%mm3 \n\t" | |
1845 "punpcklbw %%mm7, %%mm5 \n\t" | |
1846 "punpcklbw %%mm7, %%mm1 \n\t" | |
1847 "punpcklbw %%mm7, %%mm2 \n\t" | |
1848 "punpcklbw %%mm7, %%mm3 \n\t" | |
1849 "paddw %%mm1, %%mm5 \n\t" | |
1850 "paddw %%mm3, %%mm2 \n\t" | |
1851 "paddw %%mm5, %%mm2 \n\t" | |
4923 | 1852 "movq "MANGLE(w1111)", %%mm5 \n\t" |
4619 | 1853 "psrlw $2, %%mm4 \n\t" |
1854 "psrlw $2, %%mm2 \n\t" | |
1855 #endif | |
4923 | 1856 "movq "MANGLE(bgr2VCoeff)", %%mm1 \n\t" |
1857 "movq "MANGLE(bgr2VCoeff)", %%mm3 \n\t" | |
4619 | 1858 |
1859 "pmaddwd %%mm4, %%mm1 \n\t" | |
1860 "pmaddwd %%mm2, %%mm3 \n\t" | |
1861 "pmaddwd %%mm6, %%mm4 \n\t" | |
1862 "pmaddwd %%mm6, %%mm2 \n\t" | |
1863 #ifndef FAST_BGR2YV12 | |
1864 "psrad $8, %%mm4 \n\t" | |
1865 "psrad $8, %%mm1 \n\t" | |
1866 "psrad $8, %%mm2 \n\t" | |
1867 "psrad $8, %%mm3 \n\t" | |
1868 #endif | |
1869 "packssdw %%mm2, %%mm4 \n\t" | |
1870 "packssdw %%mm3, %%mm1 \n\t" | |
1871 "pmaddwd %%mm5, %%mm4 \n\t" | |
1872 "pmaddwd %%mm5, %%mm1 \n\t" | |
1873 "addl $24, %%ebx \n\t" | |
1874 "packssdw %%mm1, %%mm4 \n\t" // V3 V2 U3 U2 | |
1875 "psraw $7, %%mm4 \n\t" | |
1876 | |
1877 "movq %%mm0, %%mm1 \n\t" | |
1878 "punpckldq %%mm4, %%mm0 \n\t" | |
1879 "punpckhdq %%mm4, %%mm1 \n\t" | |
1880 "packsswb %%mm1, %%mm0 \n\t" | |
4923 | 1881 "paddb "MANGLE(bgr2UVOffset)", %%mm0 \n\t" |
4619 | 1882 |
1883 "movd %%mm0, (%2, %%eax) \n\t" | |
1884 "punpckhdq %%mm0, %%mm0 \n\t" | |
1885 "movd %%mm0, (%3, %%eax) \n\t" | |
1886 "addl $4, %%eax \n\t" | |
1887 " js 1b \n\t" | |
1888 : : "r" (src1+width*6), "r" (src2+width*6), "r" (dstU+width), "r" (dstV+width), "g" (-width) | |
1889 : "%eax", "%ebx" | |
1890 ); | |
4467 | 1891 #else |
1892 int i; | |
1893 for(i=0; i<width; i++) | |
1894 { | |
1895 int b= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3]; | |
1896 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4]; | |
1897 int r= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5]; | |
1898 | |
1899 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1900 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
1901 } | |
1902 #endif | |
1903 } | |
1904 | |
4578 | 1905 static inline void RENAME(bgr16ToY)(uint8_t *dst, uint8_t *src, int width) |
1906 { | |
1907 int i; | |
1908 for(i=0; i<width; i++) | |
1909 { | |
1910 int d= src[i*2] + (src[i*2+1]<<8); | |
1911 int b= d&0x1F; | |
1912 int g= (d>>5)&0x3F; | |
1913 int r= (d>>11)&0x1F; | |
1914 | |
1915 dst[i]= ((2*RY*r + GY*g + 2*BY*b)>>(RGB2YUV_SHIFT-2)) + 16; | |
1916 } | |
1917 } | |
1918 | |
1919 static inline void RENAME(bgr16ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1920 { | |
1921 int i; | |
1922 for(i=0; i<width; i++) | |
1923 { | |
4579 | 1924 #if 1 |
1925 int d0= le2me_32( ((uint32_t*)src1)[i] ); | |
1926 int d1= le2me_32( ((uint32_t*)src2)[i] ); | |
1927 | |
1928 int dl= (d0&0x07E0F81F) + (d1&0x07E0F81F); | |
1929 int dh= ((d0>>5)&0x07C0F83F) + ((d1>>5)&0x07C0F83F); | |
1930 | |
1931 int dh2= (dh>>11) + (dh<<21); | |
1932 int d= dh2 + dl; | |
1933 | |
1934 int b= d&0x7F; | |
1935 int r= (d>>11)&0x7F; | |
1936 int g= d>>21; | |
1937 #else | |
4578 | 1938 int d0= src1[i*4] + (src1[i*4+1]<<8); |
1939 int b0= d0&0x1F; | |
1940 int g0= (d0>>5)&0x3F; | |
1941 int r0= (d0>>11)&0x1F; | |
1942 | |
1943 int d1= src1[i*4+2] + (src1[i*4+3]<<8); | |
1944 int b1= d1&0x1F; | |
1945 int g1= (d1>>5)&0x3F; | |
1946 int r1= (d1>>11)&0x1F; | |
1947 | |
1948 int d2= src2[i*4] + (src2[i*4+1]<<8); | |
1949 int b2= d2&0x1F; | |
1950 int g2= (d2>>5)&0x3F; | |
1951 int r2= (d2>>11)&0x1F; | |
1952 | |
1953 int d3= src2[i*4+2] + (src2[i*4+3]<<8); | |
1954 int b3= d3&0x1F; | |
1955 int g3= (d3>>5)&0x3F; | |
1956 int r3= (d3>>11)&0x1F; | |
1957 | |
1958 int b= b0 + b1 + b2 + b3; | |
1959 int g= g0 + g1 + g2 + g3; | |
1960 int r= r0 + r1 + r2 + r3; | |
4579 | 1961 #endif |
4578 | 1962 dstU[i]= ((2*RU*r + GU*g + 2*BU*b)>>(RGB2YUV_SHIFT+2-2)) + 128; |
1963 dstV[i]= ((2*RV*r + GV*g + 2*BV*b)>>(RGB2YUV_SHIFT+2-2)) + 128; | |
1964 } | |
1965 } | |
1966 | |
4580 | 1967 static inline void RENAME(bgr15ToY)(uint8_t *dst, uint8_t *src, int width) |
1968 { | |
1969 int i; | |
1970 for(i=0; i<width; i++) | |
1971 { | |
1972 int d= src[i*2] + (src[i*2+1]<<8); | |
1973 int b= d&0x1F; | |
1974 int g= (d>>5)&0x1F; | |
1975 int r= (d>>10)&0x1F; | |
1976 | |
1977 dst[i]= ((RY*r + GY*g + BY*b)>>(RGB2YUV_SHIFT-3)) + 16; | |
1978 } | |
1979 } | |
1980 | |
1981 static inline void RENAME(bgr15ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
1982 { | |
1983 int i; | |
1984 for(i=0; i<width; i++) | |
1985 { | |
1986 #if 1 | |
1987 int d0= le2me_32( ((uint32_t*)src1)[i] ); | |
1988 int d1= le2me_32( ((uint32_t*)src2)[i] ); | |
1989 | |
1990 int dl= (d0&0x03E07C1F) + (d1&0x03E07C1F); | |
1991 int dh= ((d0>>5)&0x03E0F81F) + ((d1>>5)&0x03E0F81F); | |
1992 | |
1993 int dh2= (dh>>11) + (dh<<21); | |
1994 int d= dh2 + dl; | |
1995 | |
1996 int b= d&0x7F; | |
1997 int r= (d>>10)&0x7F; | |
1998 int g= d>>21; | |
1999 #else | |
2000 int d0= src1[i*4] + (src1[i*4+1]<<8); | |
2001 int b0= d0&0x1F; | |
2002 int g0= (d0>>5)&0x1F; | |
2003 int r0= (d0>>10)&0x1F; | |
2004 | |
2005 int d1= src1[i*4+2] + (src1[i*4+3]<<8); | |
2006 int b1= d1&0x1F; | |
2007 int g1= (d1>>5)&0x1F; | |
2008 int r1= (d1>>10)&0x1F; | |
2009 | |
2010 int d2= src2[i*4] + (src2[i*4+1]<<8); | |
2011 int b2= d2&0x1F; | |
2012 int g2= (d2>>5)&0x1F; | |
2013 int r2= (d2>>10)&0x1F; | |
2014 | |
2015 int d3= src2[i*4+2] + (src2[i*4+3]<<8); | |
2016 int b3= d3&0x1F; | |
2017 int g3= (d3>>5)&0x1F; | |
2018 int r3= (d3>>10)&0x1F; | |
2019 | |
2020 int b= b0 + b1 + b2 + b3; | |
2021 int g= g0 + g1 + g2 + g3; | |
2022 int r= r0 + r1 + r2 + r3; | |
2023 #endif | |
2024 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2-3)) + 128; | |
2025 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2-3)) + 128; | |
2026 } | |
2027 } | |
2028 | |
2029 | |
4558 | 2030 static inline void RENAME(rgb32ToY)(uint8_t *dst, uint8_t *src, int width) |
2031 { | |
2032 int i; | |
2033 for(i=0; i<width; i++) | |
2034 { | |
2035 int r= src[i*4+0]; | |
2036 int g= src[i*4+1]; | |
2037 int b= src[i*4+2]; | |
2038 | |
2039 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
2040 } | |
2041 } | |
2042 | |
2043 static inline void RENAME(rgb32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
2044 { | |
2045 int i; | |
2046 for(i=0; i<width; i++) | |
2047 { | |
2048 int r= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4]; | |
2049 int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5]; | |
2050 int b= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6]; | |
2051 | |
2052 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2053 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2054 } | |
2055 } | |
2056 | |
2057 static inline void RENAME(rgb24ToY)(uint8_t *dst, uint8_t *src, int width) | |
2058 { | |
2059 int i; | |
2060 for(i=0; i<width; i++) | |
2061 { | |
2062 int r= src[i*3+0]; | |
2063 int g= src[i*3+1]; | |
2064 int b= src[i*3+2]; | |
2065 | |
2066 dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16; | |
2067 } | |
2068 } | |
2069 | |
2070 static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width) | |
2071 { | |
2072 int i; | |
2073 for(i=0; i<width; i++) | |
2074 { | |
2075 int r= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3]; | |
2076 int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4]; | |
2077 int b= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5]; | |
2078 | |
2079 dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2080 dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128; | |
2081 } | |
2082 } | |
2083 | |
4467 | 2084 |
3272 | 2085 // Bilinear / Bicubic scaling |
2086 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc, | |
2087 int16_t *filter, int16_t *filterPos, int filterSize) | |
2088 { | |
2089 #ifdef HAVE_MMX | |
2090 if(filterSize==4) // allways true for upscaling, sometimes for down too | |
2091 { | |
2092 int counter= -2*dstW; | |
2093 filter-= counter*2; | |
2094 filterPos-= counter/2; | |
2095 dst-= counter/2; | |
2096 asm volatile( | |
2097 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2098 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2099 "pushl %%ebp \n\t" // we use 7 regs here ... |
2100 "movl %%eax, %%ebp \n\t" | |
2101 ".balign 16 \n\t" | |
2102 "1: \n\t" | |
2103 "movzwl (%2, %%ebp), %%eax \n\t" | |
2104 "movzwl 2(%2, %%ebp), %%ebx \n\t" | |
2105 "movq (%1, %%ebp, 4), %%mm1 \n\t" | |
2106 "movq 8(%1, %%ebp, 4), %%mm3 \n\t" | |
2107 "movd (%3, %%eax), %%mm0 \n\t" | |
2108 "movd (%3, %%ebx), %%mm2 \n\t" | |
2109 "punpcklbw %%mm7, %%mm0 \n\t" | |
2110 "punpcklbw %%mm7, %%mm2 \n\t" | |
2111 "pmaddwd %%mm1, %%mm0 \n\t" | |
2112 "pmaddwd %%mm2, %%mm3 \n\t" | |
2113 "psrad $8, %%mm0 \n\t" | |
2114 "psrad $8, %%mm3 \n\t" | |
2115 "packssdw %%mm3, %%mm0 \n\t" | |
2116 "pmaddwd %%mm6, %%mm0 \n\t" | |
2117 "packssdw %%mm0, %%mm0 \n\t" | |
2118 "movd %%mm0, (%4, %%ebp) \n\t" | |
2119 "addl $4, %%ebp \n\t" | |
2120 " jnc 1b \n\t" | |
3352 | 2121 |
3272 | 2122 "popl %%ebp \n\t" |
2123 : "+a" (counter) | |
2124 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) | |
2125 : "%ebx" | |
2126 ); | |
2127 } | |
2128 else if(filterSize==8) | |
2129 { | |
2130 int counter= -2*dstW; | |
2131 filter-= counter*4; | |
2132 filterPos-= counter/2; | |
2133 dst-= counter/2; | |
2134 asm volatile( | |
2135 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2136 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2137 "pushl %%ebp \n\t" // we use 7 regs here ... |
2138 "movl %%eax, %%ebp \n\t" | |
2139 ".balign 16 \n\t" | |
2140 "1: \n\t" | |
2141 "movzwl (%2, %%ebp), %%eax \n\t" | |
2142 "movzwl 2(%2, %%ebp), %%ebx \n\t" | |
2143 "movq (%1, %%ebp, 8), %%mm1 \n\t" | |
2144 "movq 16(%1, %%ebp, 8), %%mm3 \n\t" | |
2145 "movd (%3, %%eax), %%mm0 \n\t" | |
2146 "movd (%3, %%ebx), %%mm2 \n\t" | |
2147 "punpcklbw %%mm7, %%mm0 \n\t" | |
2148 "punpcklbw %%mm7, %%mm2 \n\t" | |
2149 "pmaddwd %%mm1, %%mm0 \n\t" | |
2150 "pmaddwd %%mm2, %%mm3 \n\t" | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
2151 |
3272 | 2152 "movq 8(%1, %%ebp, 8), %%mm1 \n\t" |
2153 "movq 24(%1, %%ebp, 8), %%mm5 \n\t" | |
2154 "movd 4(%3, %%eax), %%mm4 \n\t" | |
2155 "movd 4(%3, %%ebx), %%mm2 \n\t" | |
2156 "punpcklbw %%mm7, %%mm4 \n\t" | |
2157 "punpcklbw %%mm7, %%mm2 \n\t" | |
2158 "pmaddwd %%mm1, %%mm4 \n\t" | |
2159 "pmaddwd %%mm2, %%mm5 \n\t" | |
2160 "paddd %%mm4, %%mm0 \n\t" | |
2161 "paddd %%mm5, %%mm3 \n\t" | |
2162 | |
2163 "psrad $8, %%mm0 \n\t" | |
2164 "psrad $8, %%mm3 \n\t" | |
2165 "packssdw %%mm3, %%mm0 \n\t" | |
2166 "pmaddwd %%mm6, %%mm0 \n\t" | |
2167 "packssdw %%mm0, %%mm0 \n\t" | |
2168 "movd %%mm0, (%4, %%ebp) \n\t" | |
2169 "addl $4, %%ebp \n\t" | |
2170 " jnc 1b \n\t" | |
3344 | 2171 |
3272 | 2172 "popl %%ebp \n\t" |
2173 : "+a" (counter) | |
2174 : "c" (filter), "d" (filterPos), "S" (src), "D" (dst) | |
2175 : "%ebx" | |
2176 ); | |
2177 } | |
2178 else | |
2179 { | |
2180 int counter= -2*dstW; | |
2181 // filter-= counter*filterSize/2; | |
2182 filterPos-= counter/2; | |
2183 dst-= counter/2; | |
2184 asm volatile( | |
2185 "pxor %%mm7, %%mm7 \n\t" | |
4248 | 2186 "movq "MANGLE(w02)", %%mm6 \n\t" |
3272 | 2187 ".balign 16 \n\t" |
2188 "1: \n\t" | |
2189 "movl %2, %%ecx \n\t" | |
2190 "movzwl (%%ecx, %0), %%eax \n\t" | |
2191 "movzwl 2(%%ecx, %0), %%ebx \n\t" | |
2192 "movl %5, %%ecx \n\t" | |
2193 "pxor %%mm4, %%mm4 \n\t" | |
2194 "pxor %%mm5, %%mm5 \n\t" | |
2195 "2: \n\t" | |
2196 "movq (%1), %%mm1 \n\t" | |
2197 "movq (%1, %6), %%mm3 \n\t" | |
2198 "movd (%%ecx, %%eax), %%mm0 \n\t" | |
2199 "movd (%%ecx, %%ebx), %%mm2 \n\t" | |
2200 "punpcklbw %%mm7, %%mm0 \n\t" | |
2201 "punpcklbw %%mm7, %%mm2 \n\t" | |
2202 "pmaddwd %%mm1, %%mm0 \n\t" | |
2203 "pmaddwd %%mm2, %%mm3 \n\t" | |
2204 "paddd %%mm3, %%mm5 \n\t" | |
2205 "paddd %%mm0, %%mm4 \n\t" | |
2206 "addl $8, %1 \n\t" | |
2207 "addl $4, %%ecx \n\t" | |
2208 "cmpl %4, %%ecx \n\t" | |
2209 " jb 2b \n\t" | |
2210 "addl %6, %1 \n\t" | |
2211 "psrad $8, %%mm4 \n\t" | |
2212 "psrad $8, %%mm5 \n\t" | |
2213 "packssdw %%mm5, %%mm4 \n\t" | |
2214 "pmaddwd %%mm6, %%mm4 \n\t" | |
2215 "packssdw %%mm4, %%mm4 \n\t" | |
2216 "movl %3, %%eax \n\t" | |
2217 "movd %%mm4, (%%eax, %0) \n\t" | |
2218 "addl $4, %0 \n\t" | |
2219 " jnc 1b \n\t" | |
3344 | 2220 |
3641 | 2221 : "+r" (counter), "+r" (filter) |
2222 : "m" (filterPos), "m" (dst), "m"(src+filterSize), | |
3272 | 2223 "m" (src), "r" (filterSize*2) |
3299 | 2224 : "%ebx", "%eax", "%ecx" |
3272 | 2225 ); |
2226 } | |
2227 #else | |
2228 int i; | |
2229 for(i=0; i<dstW; i++) | |
2230 { | |
2231 int j; | |
2232 int srcPos= filterPos[i]; | |
2233 int val=0; | |
3344 | 2234 // printf("filterPos: %d\n", filterPos[i]); |
3272 | 2235 for(j=0; j<filterSize; j++) |
2236 { | |
2237 // printf("filter: %d, src: %d\n", filter[i], src[srcPos + j]); | |
2238 val += ((int)src[srcPos + j])*filter[filterSize*i + j]; | |
2239 } | |
2240 // filter += hFilterSize; | |
2241 dst[i] = MIN(MAX(0, val>>7), (1<<15)-1); // the cubic equation does overflow ... | |
2242 // dst[i] = val>>7; | |
2243 } | |
2244 #endif | |
2245 } | |
2246 // *** horizontal scale Y line to temp buffer | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2247 static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2248 int flags, int canMMX2BeUsed, int16_t *hLumFilter, |
4467 | 2249 int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode, |
5452 | 2250 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, |
2251 int32_t *mmx2FilterPos) | |
2469 | 2252 { |
4467 | 2253 if(srcFormat==IMGFMT_YUY2) |
2254 { | |
2255 RENAME(yuy2ToY)(formatConvBuffer, src, srcW); | |
2256 src= formatConvBuffer; | |
2257 } | |
2258 else if(srcFormat==IMGFMT_BGR32) | |
2259 { | |
2260 RENAME(bgr32ToY)(formatConvBuffer, src, srcW); | |
2261 src= formatConvBuffer; | |
2262 } | |
2263 else if(srcFormat==IMGFMT_BGR24) | |
2264 { | |
2265 RENAME(bgr24ToY)(formatConvBuffer, src, srcW); | |
2266 src= formatConvBuffer; | |
2267 } | |
4578 | 2268 else if(srcFormat==IMGFMT_BGR16) |
2269 { | |
2270 RENAME(bgr16ToY)(formatConvBuffer, src, srcW); | |
2271 src= formatConvBuffer; | |
2272 } | |
4580 | 2273 else if(srcFormat==IMGFMT_BGR15) |
2274 { | |
2275 RENAME(bgr15ToY)(formatConvBuffer, src, srcW); | |
2276 src= formatConvBuffer; | |
2277 } | |
4558 | 2278 else if(srcFormat==IMGFMT_RGB32) |
2279 { | |
2280 RENAME(rgb32ToY)(formatConvBuffer, src, srcW); | |
2281 src= formatConvBuffer; | |
2282 } | |
2283 else if(srcFormat==IMGFMT_RGB24) | |
2284 { | |
2285 RENAME(rgb24ToY)(formatConvBuffer, src, srcW); | |
2286 src= formatConvBuffer; | |
2287 } | |
4467 | 2288 |
3352 | 2289 #ifdef HAVE_MMX |
2290 // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2291 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) |
3352 | 2292 #else |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2293 if(!(flags&SWS_FAST_BILINEAR)) |
3352 | 2294 #endif |
3272 | 2295 { |
2296 RENAME(hScale)(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize); | |
2297 } | |
2298 else // Fast Bilinear upscale / crap downscale | |
2299 { | |
2469 | 2300 #ifdef ARCH_X86 |
2301 #ifdef HAVE_MMX2 | |
2671 | 2302 int i; |
2469 | 2303 if(canMMX2BeUsed) |
2304 { | |
2305 asm volatile( | |
2306 "pxor %%mm7, %%mm7 \n\t" | |
5452 | 2307 "movl %0, %%ecx \n\t" |
2308 "movl %1, %%edi \n\t" | |
2309 "movl %2, %%edx \n\t" | |
2310 "movl %3, %%ebx \n\t" | |
2469 | 2311 "xorl %%eax, %%eax \n\t" // i |
5452 | 2312 PREFETCH" (%%ecx) \n\t" |
2313 PREFETCH" 32(%%ecx) \n\t" | |
2314 PREFETCH" 64(%%ecx) \n\t" | |
2520 | 2315 |
2469 | 2316 #define FUNNY_Y_CODE \ |
5452 | 2317 "movl (%%ebx), %%esi \n\t"\ |
2318 "call *%4 \n\t"\ | |
2319 "addl (%%ebx, %%eax), %%ecx \n\t"\ | |
2320 "addl %%eax, %%edi \n\t"\ | |
2321 "xorl %%eax, %%eax \n\t"\ | |
2520 | 2322 |
2469 | 2323 FUNNY_Y_CODE |
2324 FUNNY_Y_CODE | |
2325 FUNNY_Y_CODE | |
2326 FUNNY_Y_CODE | |
2327 FUNNY_Y_CODE | |
2328 FUNNY_Y_CODE | |
2329 FUNNY_Y_CODE | |
2330 FUNNY_Y_CODE | |
2331 | |
5452 | 2332 :: "m" (src), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos), |
2333 "m" (funnyYCode) | |
2469 | 2334 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" |
2335 ); | |
3215 | 2336 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) dst[i] = src[srcW-1]*128; |
2469 | 2337 } |
2338 else | |
2339 { | |
2340 #endif | |
2341 //NO MMX just normal asm ... | |
2342 asm volatile( | |
2343 "xorl %%eax, %%eax \n\t" // i | |
2344 "xorl %%ebx, %%ebx \n\t" // xx | |
2345 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
2346 ".balign 16 \n\t" |
2469 | 2347 "1: \n\t" |
2348 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] | |
2349 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] | |
2350 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2351 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2352 "shll $16, %%edi \n\t" | |
2353 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2354 "movl %1, %%edi \n\t" | |
2355 "shrl $9, %%esi \n\t" | |
2356 "movw %%si, (%%edi, %%eax, 2) \n\t" | |
2357 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2358 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2359 | |
2360 "movzbl (%0, %%ebx), %%edi \n\t" //src[xx] | |
2361 "movzbl 1(%0, %%ebx), %%esi \n\t" //src[xx+1] | |
2362 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2363 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2364 "shll $16, %%edi \n\t" | |
2365 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2366 "movl %1, %%edi \n\t" | |
2367 "shrl $9, %%esi \n\t" | |
2368 "movw %%si, 2(%%edi, %%eax, 2) \n\t" | |
2369 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2370 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2371 | |
2372 | |
2373 "addl $2, %%eax \n\t" | |
2374 "cmpl %2, %%eax \n\t" | |
2375 " jb 1b \n\t" | |
2376 | |
2377 | |
2378 :: "r" (src), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF) | |
2379 : "%eax", "%ebx", "%ecx", "%edi", "%esi" | |
2380 ); | |
2381 #ifdef HAVE_MMX2 | |
2382 } //if MMX2 cant be used | |
2383 #endif | |
2384 #else | |
2671 | 2385 int i; |
2386 unsigned int xpos=0; | |
2387 for(i=0;i<dstWidth;i++) | |
2388 { | |
2389 register unsigned int xx=xpos>>16; | |
2390 register unsigned int xalpha=(xpos&0xFFFF)>>9; | |
2391 dst[i]= (src[xx]<<7) + (src[xx+1] - src[xx])*xalpha; | |
2392 xpos+=xInc; | |
2393 } | |
2469 | 2394 #endif |
3272 | 2395 } |
2469 | 2396 } |
2397 | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2398 inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2399 int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter, |
4467 | 2400 int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode, |
5452 | 2401 int srcFormat, uint8_t *formatConvBuffer, int16_t *mmx2Filter, |
2402 int32_t *mmx2FilterPos) | |
2469 | 2403 { |
4467 | 2404 if(srcFormat==IMGFMT_YUY2) |
2405 { | |
2406 RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2407 src1= formatConvBuffer; | |
2408 src2= formatConvBuffer+2048; | |
2409 } | |
2410 else if(srcFormat==IMGFMT_BGR32) | |
2411 { | |
2412 RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2413 src1= formatConvBuffer; | |
2414 src2= formatConvBuffer+2048; | |
2415 } | |
2416 else if(srcFormat==IMGFMT_BGR24) | |
2417 { | |
2418 RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2419 src1= formatConvBuffer; | |
2420 src2= formatConvBuffer+2048; | |
2421 } | |
4578 | 2422 else if(srcFormat==IMGFMT_BGR16) |
2423 { | |
2424 RENAME(bgr16ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2425 src1= formatConvBuffer; | |
2426 src2= formatConvBuffer+2048; | |
2427 } | |
4580 | 2428 else if(srcFormat==IMGFMT_BGR15) |
2429 { | |
2430 RENAME(bgr15ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2431 src1= formatConvBuffer; | |
2432 src2= formatConvBuffer+2048; | |
2433 } | |
4558 | 2434 else if(srcFormat==IMGFMT_RGB32) |
2435 { | |
2436 RENAME(rgb32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2437 src1= formatConvBuffer; | |
2438 src2= formatConvBuffer+2048; | |
2439 } | |
2440 else if(srcFormat==IMGFMT_RGB24) | |
2441 { | |
2442 RENAME(rgb24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW); | |
2443 src1= formatConvBuffer; | |
2444 src2= formatConvBuffer+2048; | |
2445 } | |
4481 | 2446 else if(isGray(srcFormat)) |
2447 { | |
2448 return; | |
2449 } | |
4467 | 2450 |
3352 | 2451 #ifdef HAVE_MMX |
2452 // use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2453 if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed)) |
3352 | 2454 #else |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2455 if(!(flags&SWS_FAST_BILINEAR)) |
3352 | 2456 #endif |
3272 | 2457 { |
2458 RENAME(hScale)(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |
2459 RENAME(hScale)(dst+2048, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize); | |
2460 } | |
2461 else // Fast Bilinear upscale / crap downscale | |
2462 { | |
2469 | 2463 #ifdef ARCH_X86 |
2464 #ifdef HAVE_MMX2 | |
2671 | 2465 int i; |
2469 | 2466 if(canMMX2BeUsed) |
2467 { | |
2468 asm volatile( | |
5452 | 2469 "pxor %%mm7, %%mm7 \n\t" |
2470 "movl %0, %%ecx \n\t" | |
2471 "movl %1, %%edi \n\t" | |
2472 "movl %2, %%edx \n\t" | |
2473 "movl %3, %%ebx \n\t" | |
2474 "xorl %%eax, %%eax \n\t" // i | |
2475 PREFETCH" (%%ecx) \n\t" | |
2476 PREFETCH" 32(%%ecx) \n\t" | |
2477 PREFETCH" 64(%%ecx) \n\t" | |
2478 | |
2479 #define FUNNY_UV_CODE \ | |
2480 "movl (%%ebx), %%esi \n\t"\ | |
2481 "call *%4 \n\t"\ | |
2482 "addl (%%ebx, %%eax), %%ecx \n\t"\ | |
2483 "addl %%eax, %%edi \n\t"\ | |
2484 "xorl %%eax, %%eax \n\t"\ | |
2469 | 2485 |
5452 | 2486 FUNNY_UV_CODE |
2487 FUNNY_UV_CODE | |
2488 FUNNY_UV_CODE | |
2489 FUNNY_UV_CODE | |
2490 "xorl %%eax, %%eax \n\t" // i | |
2491 "movl %5, %%ecx \n\t" // src | |
2492 "movl %1, %%edi \n\t" // buf1 | |
2493 "addl $4096, %%edi \n\t" | |
2494 PREFETCH" (%%ecx) \n\t" | |
2495 PREFETCH" 32(%%ecx) \n\t" | |
2496 PREFETCH" 64(%%ecx) \n\t" | |
2469 | 2497 |
5452 | 2498 FUNNY_UV_CODE |
2499 FUNNY_UV_CODE | |
2500 FUNNY_UV_CODE | |
2501 FUNNY_UV_CODE | |
2469 | 2502 |
5452 | 2503 :: "m" (src1), "m" (dst), "m" (mmx2Filter), "m" (mmx2FilterPos), |
2504 "m" (funnyUVCode), "m" (src2) | |
2505 : "%eax", "%ebx", "%ecx", "%edx", "%esi", "%edi" | |
2506 ); | |
3344 | 2507 for(i=dstWidth-1; (i*xInc)>>16 >=srcW-1; i--) |
2469 | 2508 { |
3344 | 2509 // printf("%d %d %d\n", dstWidth, i, srcW); |
2510 dst[i] = src1[srcW-1]*128; | |
2511 dst[i+2048] = src2[srcW-1]*128; | |
2469 | 2512 } |
2513 } | |
2514 else | |
2515 { | |
2516 #endif | |
2517 asm volatile( | |
2518 "xorl %%eax, %%eax \n\t" // i | |
2519 "xorl %%ebx, %%ebx \n\t" // xx | |
2520 "xorl %%ecx, %%ecx \n\t" // 2*xalpha | |
2800
7847d6b7ad3d
.balign or we¡ll align by 64kb on some architectures
michael
parents:
2799
diff
changeset
|
2521 ".balign 16 \n\t" |
2469 | 2522 "1: \n\t" |
2523 "movl %0, %%esi \n\t" | |
2524 "movzbl (%%esi, %%ebx), %%edi \n\t" //src[xx] | |
2525 "movzbl 1(%%esi, %%ebx), %%esi \n\t" //src[xx+1] | |
2526 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2527 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2528 "shll $16, %%edi \n\t" | |
2529 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2530 "movl %1, %%edi \n\t" | |
2531 "shrl $9, %%esi \n\t" | |
2532 "movw %%si, (%%edi, %%eax, 2) \n\t" | |
2533 | |
2534 "movzbl (%5, %%ebx), %%edi \n\t" //src[xx] | |
2535 "movzbl 1(%5, %%ebx), %%esi \n\t" //src[xx+1] | |
2536 "subl %%edi, %%esi \n\t" //src[xx+1] - src[xx] | |
2537 "imull %%ecx, %%esi \n\t" //(src[xx+1] - src[xx])*2*xalpha | |
2538 "shll $16, %%edi \n\t" | |
2539 "addl %%edi, %%esi \n\t" //src[xx+1]*2*xalpha + src[xx]*(1-2*xalpha) | |
2540 "movl %1, %%edi \n\t" | |
2541 "shrl $9, %%esi \n\t" | |
2542 "movw %%si, 4096(%%edi, %%eax, 2)\n\t" | |
2543 | |
2544 "addw %4, %%cx \n\t" //2*xalpha += xInc&0xFF | |
2545 "adcl %3, %%ebx \n\t" //xx+= xInc>>8 + carry | |
2546 "addl $1, %%eax \n\t" | |
2547 "cmpl %2, %%eax \n\t" | |
2548 " jb 1b \n\t" | |
2549 | |
2550 :: "m" (src1), "m" (dst), "m" (dstWidth), "m" (xInc>>16), "m" (xInc&0xFFFF), | |
2551 "r" (src2) | |
2552 : "%eax", "%ebx", "%ecx", "%edi", "%esi" | |
2553 ); | |
2554 #ifdef HAVE_MMX2 | |
2555 } //if MMX2 cant be used | |
2556 #endif | |
2557 #else | |
2671 | 2558 int i; |
2559 unsigned int xpos=0; | |
2560 for(i=0;i<dstWidth;i++) | |
2561 { | |
2562 register unsigned int xx=xpos>>16; | |
2563 register unsigned int xalpha=(xpos&0xFFFF)>>9; | |
2564 dst[i]=(src1[xx]*(xalpha^127)+src1[xx+1]*xalpha); | |
2565 dst[i+2048]=(src2[xx]*(xalpha^127)+src2[xx+1]*xalpha); | |
2566 | 2566 /* slower |
2567 dst[i]= (src1[xx]<<7) + (src1[xx+1] - src1[xx])*xalpha; | |
2568 dst[i+2048]=(src2[xx]<<7) + (src2[xx+1] - src2[xx])*xalpha; | |
2569 */ | |
2671 | 2570 xpos+=xInc; |
2571 } | |
2469 | 2572 #endif |
3272 | 2573 } |
2574 } | |
2575 | |
4467 | 2576 static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY, |
4698 | 2577 int srcSliceH, uint8_t* dstParam[], int dstStrideParam[]){ |
3344 | 2578 |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2579 /* load a few things into local vars to make the code more readable? and faster */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2580 const int srcW= c->srcW; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2581 const int dstW= c->dstW; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2582 const int dstH= c->dstH; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2583 const int chrDstW= c->chrDstW; |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2584 const int chrSrcW= c->chrSrcW; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2585 const int lumXInc= c->lumXInc; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2586 const int chrXInc= c->chrXInc; |
4295 | 2587 const int dstFormat= c->dstFormat; |
6503 | 2588 const int srcFormat= c->srcFormat; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2589 const int flags= c->flags; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2590 const int canMMX2BeUsed= c->canMMX2BeUsed; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2591 int16_t *vLumFilterPos= c->vLumFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2592 int16_t *vChrFilterPos= c->vChrFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2593 int16_t *hLumFilterPos= c->hLumFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2594 int16_t *hChrFilterPos= c->hChrFilterPos; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2595 int16_t *vLumFilter= c->vLumFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2596 int16_t *vChrFilter= c->vChrFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2597 int16_t *hLumFilter= c->hLumFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2598 int16_t *hChrFilter= c->hChrFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2599 int16_t *lumMmxFilter= c->lumMmxFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2600 int16_t *chrMmxFilter= c->chrMmxFilter; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2601 const int vLumFilterSize= c->vLumFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2602 const int vChrFilterSize= c->vChrFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2603 const int hLumFilterSize= c->hLumFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2604 const int hChrFilterSize= c->hChrFilterSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2605 int16_t **lumPixBuf= c->lumPixBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2606 int16_t **chrPixBuf= c->chrPixBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2607 const int vLumBufSize= c->vLumBufSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2608 const int vChrBufSize= c->vChrBufSize; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2609 uint8_t *funnyYCode= c->funnyYCode; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2610 uint8_t *funnyUVCode= c->funnyUVCode; |
4467 | 2611 uint8_t *formatConvBuffer= c->formatConvBuffer; |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2612 const int chrSrcSliceY= srcSliceY >> c->chrSrcVSubSample; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2613 const int chrSrcSliceH= -((-srcSliceH) >> c->chrSrcVSubSample); |
3344 | 2614 |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2615 /* vars whch will change and which we need to storw back in the context */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2616 int dstY= c->dstY; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2617 int lumBufIndex= c->lumBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2618 int chrBufIndex= c->chrBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2619 int lastInLumBuf= c->lastInLumBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2620 int lastInChrBuf= c->lastInChrBuf; |
4467 | 2621 int srcStride[3]; |
4698 | 2622 int dstStride[3]; |
4419 | 2623 uint8_t *src[3]; |
2624 uint8_t *dst[3]; | |
6540 | 2625 |
2626 orderYUV(c->srcFormat, src, srcStride, srcParam, srcStrideParam); | |
2627 orderYUV(c->dstFormat, dst, dstStride, dstParam, dstStrideParam); | |
6503 | 2628 |
6540 | 2629 if(isPacked(c->srcFormat)){ |
4467 | 2630 src[0]= |
2631 src[1]= | |
2632 src[2]= srcParam[0]; | |
6540 | 2633 srcStride[0]= |
4467 | 2634 srcStride[1]= |
6540 | 2635 srcStride[2]= srcStrideParam[0]; |
4467 | 2636 } |
6540 | 2637 srcStride[1]<<= c->vChrDrop; |
2638 srcStride[2]<<= c->vChrDrop; | |
4419 | 2639 |
6517 | 2640 // printf("swscale %X %X %X -> %X %X %X\n", (int)src[0], (int)src[1], (int)src[2], |
2641 // (int)dst[0], (int)dst[1], (int)dst[2]); | |
2642 | |
2643 #if 0 //self test FIXME move to a vfilter or something | |
2644 { | |
2645 static volatile int i=0; | |
2646 i++; | |
2647 if(srcFormat==IMGFMT_YV12 && i==1 && srcSliceH>= c->srcH) | |
2648 selfTest(src, srcStride, c->srcW, c->srcH); | |
2649 i--; | |
2650 } | |
2651 #endif | |
4554 | 2652 |
2653 //printf("sws Strides:%d %d %d -> %d %d %d\n", srcStride[0],srcStride[1],srcStride[2], | |
2654 //dstStride[0],dstStride[1],dstStride[2]); | |
4419 | 2655 |
2656 if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0) | |
2657 { | |
2658 static int firstTime=1; //FIXME move this into the context perhaps | |
2659 if(flags & SWS_PRINT_INFO && firstTime) | |
2660 { | |
5937 | 2661 mp_msg(MSGT_SWS,MSGL_WARN,"SwScaler: Warning: dstStride is not aligned!\n" |
4419 | 2662 "SwScaler: ->cannot do aligned memory acesses anymore\n"); |
2663 firstTime=0; | |
2664 } | |
2665 } | |
3344 | 2666 |
4467 | 2667 /* Note the user might start scaling the picture in the middle so this will not get executed |
2668 this is not really intended but works currently, so ppl might do it */ | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2669 if(srcSliceY ==0){ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2670 lumBufIndex=0; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2671 chrBufIndex=0; |
4467 | 2672 dstY=0; |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2673 lastInLumBuf= -1; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2674 lastInChrBuf= -1; |
3272 | 2675 } |
3344 | 2676 |
2677 for(;dstY < dstH; dstY++){ | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2678 unsigned char *dest =dst[0]+dstStride[0]*dstY; |
6520 | 2679 const int chrDstY= dstY>>c->chrDstVSubSample; |
2680 unsigned char *uDest=dst[1]+dstStride[1]*chrDstY; | |
2681 unsigned char *vDest=dst[2]+dstStride[2]*chrDstY; | |
3344 | 2682 |
2683 const int firstLumSrcY= vLumFilterPos[dstY]; //First line needed as input | |
2684 const int firstChrSrcY= vChrFilterPos[chrDstY]; //First line needed as input | |
2685 const int lastLumSrcY= firstLumSrcY + vLumFilterSize -1; // Last line needed as input | |
2686 const int lastChrSrcY= firstChrSrcY + vChrFilterSize -1; // Last line needed as input | |
2687 | |
4290
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2688 //handle holes (FAST_BILINEAR & weird filters) |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2689 if(firstLumSrcY > lastInLumBuf) lastInLumBuf= firstLumSrcY-1; |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2690 if(firstChrSrcY > lastInChrBuf) lastInChrBuf= firstChrSrcY-1; |
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2691 //printf("%d %d %d\n", firstChrSrcY, lastInChrBuf, vChrBufSize); |
3344 | 2692 ASSERT(firstLumSrcY >= lastInLumBuf - vLumBufSize + 1) |
2693 ASSERT(firstChrSrcY >= lastInChrBuf - vChrBufSize + 1) | |
2216 | 2694 |
3344 | 2695 // Do we have enough lines in this slice to output the dstY line |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2696 if(lastLumSrcY < srcSliceY + srcSliceH && lastChrSrcY < -((-srcSliceY - srcSliceH)>>c->chrSrcVSubSample)) |
2469 | 2697 { |
3344 | 2698 //Do horizontal scaling |
2699 while(lastInLumBuf < lastLumSrcY) | |
2700 { | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2701 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; |
3344 | 2702 lumBufIndex++; |
4290
1f8ceb12284d
general convolution filtering of the source picture
michael
parents:
4276
diff
changeset
|
2703 // printf("%d %d %d %d\n", lumBufIndex, vLumBufSize, lastInLumBuf, lastLumSrcY); |
3344 | 2704 ASSERT(lumBufIndex < 2*vLumBufSize) |
2705 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH) | |
2706 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) | |
2707 // printf("%d %d\n", lumBufIndex, vLumBufSize); | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2708 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2709 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, |
5452 | 2710 funnyYCode, c->srcFormat, formatConvBuffer, |
2711 c->lumMmx2Filter, c->lumMmx2FilterPos); | |
3344 | 2712 lastInLumBuf++; |
2713 } | |
2714 while(lastInChrBuf < lastChrSrcY) | |
2715 { | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2716 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2717 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; |
3344 | 2718 chrBufIndex++; |
2719 ASSERT(chrBufIndex < 2*vChrBufSize) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2720 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < (chrSrcSliceH)) |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2721 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0) |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2722 //FIXME replace parameters through context struct (some at least) |
6503 | 2723 |
2724 if(!(isGray(srcFormat) || isGray(dstFormat))) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2725 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2726 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, |
5452 | 2727 funnyUVCode, c->srcFormat, formatConvBuffer, |
2728 c->chrMmx2Filter, c->chrMmx2FilterPos); | |
3344 | 2729 lastInChrBuf++; |
2730 } | |
2731 //wrap buf index around to stay inside the ring buffer | |
2732 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize; | |
2733 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize; | |
2469 | 2734 } |
3344 | 2735 else // not enough lines left in this slice -> load the rest in the buffer |
2469 | 2736 { |
3344 | 2737 /* printf("%d %d Last:%d %d LastInBuf:%d %d Index:%d %d Y:%d FSize: %d %d BSize: %d %d\n", |
2738 firstChrSrcY,firstLumSrcY,lastChrSrcY,lastLumSrcY, | |
2739 lastInChrBuf,lastInLumBuf,chrBufIndex,lumBufIndex,dstY,vChrFilterSize,vLumFilterSize, | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2740 vChrBufSize, vLumBufSize);*/ |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2741 |
3344 | 2742 //Do horizontal scaling |
2743 while(lastInLumBuf+1 < srcSliceY + srcSliceH) | |
2469 | 2744 { |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2745 uint8_t *s= src[0]+(lastInLumBuf + 1 - srcSliceY)*srcStride[0]; |
3344 | 2746 lumBufIndex++; |
2747 ASSERT(lumBufIndex < 2*vLumBufSize) | |
2748 ASSERT(lastInLumBuf + 1 - srcSliceY < srcSliceH) | |
2749 ASSERT(lastInLumBuf + 1 - srcSliceY >= 0) | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2750 RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc, |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2751 flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize, |
5452 | 2752 funnyYCode, c->srcFormat, formatConvBuffer, |
2753 c->lumMmx2Filter, c->lumMmx2FilterPos); | |
3344 | 2754 lastInLumBuf++; |
2469 | 2755 } |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2756 while(lastInChrBuf+1 < (chrSrcSliceY + chrSrcSliceH)) |
3344 | 2757 { |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2758 uint8_t *src1= src[1]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[1]; |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2759 uint8_t *src2= src[2]+(lastInChrBuf + 1 - chrSrcSliceY)*srcStride[2]; |
3344 | 2760 chrBufIndex++; |
2761 ASSERT(chrBufIndex < 2*vChrBufSize) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2762 ASSERT(lastInChrBuf + 1 - chrSrcSliceY < chrSrcSliceH) |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2763 ASSERT(lastInChrBuf + 1 - chrSrcSliceY >= 0) |
6503 | 2764 |
2765 if(!(isGray(srcFormat) || isGray(dstFormat))) | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2766 RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, chrSrcW, chrXInc, |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2767 flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize, |
5452 | 2768 funnyUVCode, c->srcFormat, formatConvBuffer, |
2769 c->chrMmx2Filter, c->chrMmx2FilterPos); | |
3344 | 2770 lastInChrBuf++; |
2771 } | |
2772 //wrap buf index around to stay inside the ring buffer | |
2773 if(lumBufIndex >= vLumBufSize ) lumBufIndex-= vLumBufSize; | |
2774 if(chrBufIndex >= vChrBufSize ) chrBufIndex-= vChrBufSize; | |
2775 break; //we cant output a dstY line so lets try with the next slice | |
2469 | 2776 } |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
2777 |
2748 | 2778 #ifdef HAVE_MMX |
3344 | 2779 b5Dither= dither8[dstY&1]; |
2780 g6Dither= dither4[dstY&1]; | |
2781 g5Dither= dither8[dstY&1]; | |
2782 r5Dither= dither8[(dstY+1)&1]; | |
2748 | 2783 #endif |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2784 if(dstY < dstH-2) |
3352 | 2785 { |
6503 | 2786 if(isPlanarYUV(dstFormat) || isGray(dstFormat)) //YV12 like |
3344 | 2787 { |
6503 | 2788 if((dstY&1) || isGray(dstFormat)) uDest=vDest= NULL; //FIXME split functions in lumi / chromi |
3344 | 2789 if(vLumFilterSize == 1 && vChrFilterSize == 1) // Unscaled YV12 |
2790 { | |
2791 int16_t *lumBuf = lumPixBuf[0]; | |
2792 int16_t *chrBuf= chrPixBuf[0]; | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2793 RENAME(yuv2yuv1)(lumBuf, chrBuf, dest, uDest, vDest, dstW, chrDstW); |
3344 | 2794 } |
2795 else //General YV12 | |
2796 { | |
2797 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |
2798 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |
2799 RENAME(yuv2yuvX)( | |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2800 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2801 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2802 dest, uDest, vDest, dstW, chrDstW, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2803 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+chrDstY*vChrFilterSize*4); |
3344 | 2804 } |
2805 } | |
2806 else | |
2807 { | |
2808 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |
2809 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |
2810 | |
2811 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |
2812 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | |
2813 if(vLumFilterSize == 1 && vChrFilterSize == 2) //Unscaled RGB | |
2814 { | |
2815 int chrAlpha= vChrFilter[2*dstY+1]; | |
2816 | |
2817 RENAME(yuv2rgb1)(*lumSrcPtr, *chrSrcPtr, *(chrSrcPtr+1), | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2818 dest, dstW, chrAlpha, dstFormat, flags); |
3344 | 2819 } |
2820 else if(vLumFilterSize == 2 && vChrFilterSize == 2) //BiLinear Upscale RGB | |
2821 { | |
2822 int lumAlpha= vLumFilter[2*dstY+1]; | |
2823 int chrAlpha= vChrFilter[2*dstY+1]; | |
2824 | |
2825 RENAME(yuv2rgb2)(*lumSrcPtr, *(lumSrcPtr+1), *chrSrcPtr, *(chrSrcPtr+1), | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2826 dest, dstW, lumAlpha, chrAlpha, dstFormat, flags); |
3344 | 2827 } |
2828 else //General RGB | |
2829 { | |
2830 RENAME(yuv2rgbX)( | |
2831 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | |
2832 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2833 dest, dstW, dstFormat, |
3344 | 2834 lumMmxFilter+dstY*vLumFilterSize*4, chrMmxFilter+dstY*vChrFilterSize*4); |
2835 } | |
2836 } | |
3352 | 2837 } |
2838 else // hmm looks like we cant use MMX here without overwriting this arrays tail | |
2839 { | |
2840 int16_t **lumSrcPtr= lumPixBuf + lumBufIndex + firstLumSrcY - lastInLumBuf + vLumBufSize; | |
2841 int16_t **chrSrcPtr= chrPixBuf + chrBufIndex + firstChrSrcY - lastInChrBuf + vChrBufSize; | |
4419 | 2842 if(isPlanarYUV(dstFormat)) //YV12 |
3352 | 2843 { |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2844 if(dstY&1) uDest=vDest= NULL; |
6540 | 2845 yuv2yuvXinC( |
6532
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2846 vLumFilter+dstY*vLumFilterSize , lumSrcPtr, vLumFilterSize, |
9834d9980c45
yvu9 support (other planar yuv formats with other chroma subsamplings should be trivial to add, if they had a IMGFMT)
michael
parents:
6520
diff
changeset
|
2847 vChrFilter+chrDstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, |
6540 | 2848 dest, uDest, vDest, dstW, chrDstW); |
3352 | 2849 } |
2850 else | |
2851 { | |
2852 ASSERT(lumSrcPtr + vLumFilterSize - 1 < lumPixBuf + vLumBufSize*2); | |
2853 ASSERT(chrSrcPtr + vChrFilterSize - 1 < chrPixBuf + vChrBufSize*2); | |
2854 yuv2rgbXinC( | |
2855 vLumFilter+dstY*vLumFilterSize, lumSrcPtr, vLumFilterSize, | |
2856 vChrFilter+dstY*vChrFilterSize, chrSrcPtr, vChrFilterSize, | |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2857 dest, dstW, dstFormat); |
3352 | 2858 } |
2859 } | |
3344 | 2860 } |
2534
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2861 |
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2862 #ifdef HAVE_MMX |
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2863 __asm __volatile(SFENCE:::"memory"); |
2566 | 2864 __asm __volatile(EMMS:::"memory"); |
2534
cc9d3fd626f0
patch from Martin Decky <deckm1am@ss1000.ms.mff.cuni.cz> applied and unnecassery "memory" removed
michael
parents:
2521
diff
changeset
|
2865 #endif |
4276
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2866 /* store changed local vars back in the context */ |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2867 c->dstY= dstY; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2868 c->lumBufIndex= lumBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2869 c->chrBufIndex= chrBufIndex; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2870 c->lastInLumBuf= lastInLumBuf; |
9199d15cb4e0
removed global vars so that multiple swscalers can be used
michael
parents:
4248
diff
changeset
|
2871 c->lastInChrBuf= lastInChrBuf; |
3641 | 2872 } |