Mercurial > mplayer.hg
annotate postproc/swscale.c @ 3603:baa8b0c0ff30
Removed unnecessary check after the protocol autodetection.
Now it will try to start streaming even if the autodetection failed.
This will allow to work with web server that doesn't report a
proper mime-type.
author | bertrand |
---|---|
date | Wed, 19 Dec 2001 09:02:52 +0000 |
parents | 64121e8a43f5 |
children | 33c560ffd3dc |
rev | line source |
---|---|
2216 | 1 |
2 // Software scaling and colorspace conversion routines for MPlayer | |
3 | |
2269 | 4 // Orginal C implementation by A'rpi/ESP-team <arpi@thot.banki.hu> |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
5 // current version mostly by Michael Niedermayer (michaelni@gmx.at) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
6 // the parts written by michael are under GNU GPL |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
7 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
8 #include <inttypes.h> |
2476 | 9 #include <string.h> |
3272 | 10 #include <math.h> |
3344 | 11 #include <stdio.h> |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
12 #include "../config.h" |
3344 | 13 #ifdef HAVE_MALLOC_H |
14 #include <malloc.h> | |
15 #endif | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
16 #include "swscale.h" |
3126 | 17 #include "../cpudetect.h" |
2540 | 18 #undef MOVNTQ |
2680 | 19 #undef PAVGB |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
20 |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
21 //#undef HAVE_MMX2 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
22 //#undef HAVE_MMX |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
23 //#undef ARCH_X86 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
24 #define DITHER1XBPP |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
25 int fullUVIpol=0; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
26 //disables the unscaled height version |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
27 int allwaysIpol=0; |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
28 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
29 #define RET 0xC3 //near return opcode |
3344 | 30 |
31 //#define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; } | |
32 #define ASSERT(x) ; | |
33 | |
3352 | 34 extern int verbose; // defined in mplayer.c |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
35 /* |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
36 NOTES |
2216 | 37 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
38 known BUGS with known cause (no bugreports please!, but patches are welcome :) ) |
3352 | 39 horizontal fast_bilinear MMX2 scaler reads 1-7 samples too much (might cause a sig11) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
40 |
3352 | 41 Supported output formats BGR15 BGR16 BGR24 BGR32 YV12 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
42 BGR15 & BGR16 MMX verions support dithering |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
43 Special versions: fast Y 1:1 scaling (no interpolation in y direction) |
2216 | 44 |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
45 TODO |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
46 more intelligent missalignment avoidance for the horizontal scaler |
2585 | 47 dither in C |
48 change the distance of the u & v buffer | |
3344 | 49 Move static / global vars into a struct so multiple scalers can be used |
50 write special vertical cubic upscale version | |
51 Optimize C code (yv12 / minmax) | |
3352 | 52 dstStride[3] |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
53 */ |
2216 | 54 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
55 #define ABS(a) ((a) > 0 ? (a) : (-(a))) |
2469 | 56 #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
57 #define MAX(a,b) ((a) < (b) ? (b) : (a)) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
58 |
3126 | 59 #ifdef ARCH_X86 |
60 #define CAN_COMPILE_X86_ASM | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
61 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
62 |
3126 | 63 #ifdef CAN_COMPILE_X86_ASM |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
64 static uint64_t __attribute__((aligned(8))) yCoeff= 0x2568256825682568LL; |
2503 | 65 static uint64_t __attribute__((aligned(8))) vrCoeff= 0x3343334333433343LL; |
66 static uint64_t __attribute__((aligned(8))) ubCoeff= 0x40cf40cf40cf40cfLL; | |
67 static uint64_t __attribute__((aligned(8))) vgCoeff= 0xE5E2E5E2E5E2E5E2LL; | |
68 static uint64_t __attribute__((aligned(8))) ugCoeff= 0xF36EF36EF36EF36ELL; | |
2669 | 69 static uint64_t __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL; |
70 static uint64_t __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
71 static uint64_t __attribute__((aligned(8))) w400= 0x0400040004000400LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
72 static uint64_t __attribute__((aligned(8))) w80= 0x0080008000800080LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
73 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL; |
3272 | 74 static uint64_t __attribute__((aligned(8))) w02= 0x0002000200020002LL; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
75 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
76 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
77 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL; |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
78 |
2750
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
79 static volatile uint64_t __attribute__((aligned(8))) b5Dither; |
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
80 static volatile uint64_t __attribute__((aligned(8))) g5Dither; |
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
81 static volatile uint64_t __attribute__((aligned(8))) g6Dither; |
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
82 static volatile uint64_t __attribute__((aligned(8))) r5Dither; |
2748 | 83 |
84 static uint64_t __attribute__((aligned(8))) dither4[2]={ | |
85 0x0103010301030103LL, | |
86 0x0200020002000200LL,}; | |
87 | |
88 static uint64_t __attribute__((aligned(8))) dither8[2]={ | |
89 0x0602060206020602LL, | |
90 0x0004000400040004LL,}; | |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
91 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
92 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
93 static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
94 static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
95 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
96 static uint64_t __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
97 static uint64_t __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
98 |
2730 | 99 static uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL; |
100 static uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; | |
101 static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; | |
102 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
103 static uint64_t __attribute__((aligned(8))) temp0; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
104 static uint64_t __attribute__((aligned(8))) asm_yalpha1; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
105 static uint64_t __attribute__((aligned(8))) asm_uvalpha1; |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
106 |
3344 | 107 static int16_t __attribute__((aligned(8))) *lumPixBuf[2000]; |
108 static int16_t __attribute__((aligned(8))) *chrPixBuf[2000]; | |
3272 | 109 static int16_t __attribute__((aligned(8))) hLumFilter[8000]; |
110 static int16_t __attribute__((aligned(8))) hLumFilterPos[2000]; | |
111 static int16_t __attribute__((aligned(8))) hChrFilter[8000]; | |
112 static int16_t __attribute__((aligned(8))) hChrFilterPos[2000]; | |
3344 | 113 static int16_t __attribute__((aligned(8))) vLumFilter[8000]; |
114 static int16_t __attribute__((aligned(8))) vLumFilterPos[2000]; | |
115 static int16_t __attribute__((aligned(8))) vChrFilter[8000]; | |
116 static int16_t __attribute__((aligned(8))) vChrFilterPos[2000]; | |
117 | |
118 // Contain simply the values from v(Lum|Chr)Filter just nicely packed for mmx | |
119 //FIXME these are very likely too small / 8000 caused problems with 480x480 | |
120 static int16_t __attribute__((aligned(8))) lumMmxFilter[16000]; | |
121 static int16_t __attribute__((aligned(8))) chrMmxFilter[16000]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
122 #else |
3344 | 123 static int16_t *lumPixBuf[2000]; |
124 static int16_t *chrPixBuf[2000]; | |
3272 | 125 static int16_t hLumFilter[8000]; |
126 static int16_t hLumFilterPos[2000]; | |
127 static int16_t hChrFilter[8000]; | |
128 static int16_t hChrFilterPos[2000]; | |
3344 | 129 static int16_t vLumFilter[8000]; |
130 static int16_t vLumFilterPos[2000]; | |
131 static int16_t vChrFilter[8000]; | |
132 static int16_t vChrFilterPos[2000]; | |
133 //FIXME just dummy vars | |
134 static int16_t lumMmxFilter[1]; | |
135 static int16_t chrMmxFilter[1]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
136 #endif |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
137 |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
138 // clipping helper table for C implementations: |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
139 static unsigned char clip_table[768]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
140 |
2584 | 141 static unsigned short clip_table16b[768]; |
142 static unsigned short clip_table16g[768]; | |
143 static unsigned short clip_table16r[768]; | |
144 static unsigned short clip_table15b[768]; | |
145 static unsigned short clip_table15g[768]; | |
146 static unsigned short clip_table15r[768]; | |
147 | |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
148 // yuv->rgb conversion tables: |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
149 static int yuvtab_2568[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
150 static int yuvtab_3343[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
151 static int yuvtab_0c92[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
152 static int yuvtab_1a1e[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
153 static int yuvtab_40cf[256]; |
3344 | 154 // Needed for cubic scaler to catch overflows |
155 static int clip_yuvtab_2568[768]; | |
156 static int clip_yuvtab_3343[768]; | |
157 static int clip_yuvtab_0c92[768]; | |
158 static int clip_yuvtab_1a1e[768]; | |
159 static int clip_yuvtab_40cf[768]; | |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
160 |
3344 | 161 static int hLumFilterSize=0; |
162 static int hChrFilterSize=0; | |
163 static int vLumFilterSize=0; | |
164 static int vChrFilterSize=0; | |
165 static int vLumBufSize=0; | |
166 static int vChrBufSize=0; | |
3272 | 167 |
168 int sws_flags=0; | |
169 | |
3126 | 170 #ifdef CAN_COMPILE_X86_ASM |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
171 static uint8_t funnyYCode[10000]; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
172 static uint8_t funnyUVCode[10000]; |
2671 | 173 #endif |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
174 |
2469 | 175 static int canMMX2BeUsed=0; |
176 | |
3126 | 177 #ifdef CAN_COMPILE_X86_ASM |
2671 | 178 void in_asm_used_var_warning_killer() |
179 { | |
3272 | 180 volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+ |
2748 | 181 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+ |
3272 | 182 M24A+M24B+M24C+w02 + funnyYCode[0]+ funnyUVCode[0]+b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]; |
2671 | 183 if(i) i=0; |
184 } | |
185 #endif | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
186 |
3352 | 187 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
188 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | |
189 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW) | |
190 { | |
191 //FIXME Optimize (just quickly writen not opti..) | |
192 int i; | |
193 for(i=0; i<dstW; i++) | |
194 { | |
195 int val=0; | |
196 int j; | |
197 for(j=0; j<lumFilterSize; j++) | |
198 val += lumSrc[j][i] * lumFilter[j]; | |
199 | |
200 dest[i]= MIN(MAX(val>>19, 0), 255); | |
201 } | |
202 | |
203 if(uDest != NULL) | |
204 for(i=0; i<(dstW>>1); i++) | |
205 { | |
206 int u=0; | |
207 int v=0; | |
208 int j; | |
209 for(j=0; j<lumFilterSize; j++) | |
210 { | |
211 u += chrSrc[j][i] * chrFilter[j]; | |
212 v += chrSrc[j][i + 2048] * chrFilter[j]; | |
213 } | |
214 | |
215 uDest[i]= MIN(MAX(u>>19, 0), 255); | |
216 vDest[i]= MIN(MAX(v>>19, 0), 255); | |
217 } | |
218 } | |
219 | |
220 static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | |
221 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | |
222 uint8_t *dest, int dstW, int dstbpp) | |
223 { | |
224 if(dstbpp==32) | |
225 { | |
226 int i; | |
227 for(i=0; i<(dstW>>1); i++){ | |
228 int j; | |
229 int Y1=0; | |
230 int Y2=0; | |
231 int U=0; | |
232 int V=0; | |
233 int Cb, Cr, Cg; | |
234 for(j=0; j<lumFilterSize; j++) | |
235 { | |
236 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
237 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
238 } | |
239 for(j=0; j<chrFilterSize; j++) | |
240 { | |
241 U += chrSrc[j][i] * chrFilter[j]; | |
242 V += chrSrc[j][i+2048] * chrFilter[j]; | |
243 } | |
244 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
245 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
246 U >>= 19; | |
247 V >>= 19; | |
248 | |
249 Cb= clip_yuvtab_40cf[U+ 256]; | |
250 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
251 Cr= clip_yuvtab_3343[V+ 256]; | |
252 | |
253 dest[8*i+0]=clip_table[((Y1 + Cb) >>13)]; | |
254 dest[8*i+1]=clip_table[((Y1 + Cg) >>13)]; | |
255 dest[8*i+2]=clip_table[((Y1 + Cr) >>13)]; | |
256 | |
257 dest[8*i+4]=clip_table[((Y2 + Cb) >>13)]; | |
258 dest[8*i+5]=clip_table[((Y2 + Cg) >>13)]; | |
259 dest[8*i+6]=clip_table[((Y2 + Cr) >>13)]; | |
260 } | |
261 } | |
262 else if(dstbpp==24) | |
263 { | |
264 int i; | |
265 for(i=0; i<(dstW>>1); i++){ | |
266 int j; | |
267 int Y1=0; | |
268 int Y2=0; | |
269 int U=0; | |
270 int V=0; | |
271 int Cb, Cr, Cg; | |
272 for(j=0; j<lumFilterSize; j++) | |
273 { | |
274 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
275 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
276 } | |
277 for(j=0; j<chrFilterSize; j++) | |
278 { | |
279 U += chrSrc[j][i] * chrFilter[j]; | |
280 V += chrSrc[j][i+2048] * chrFilter[j]; | |
281 } | |
282 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
283 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
284 U >>= 19; | |
285 V >>= 19; | |
286 | |
287 Cb= clip_yuvtab_40cf[U+ 256]; | |
288 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
289 Cr= clip_yuvtab_3343[V+ 256]; | |
290 | |
291 dest[0]=clip_table[((Y1 + Cb) >>13)]; | |
292 dest[1]=clip_table[((Y1 + Cg) >>13)]; | |
293 dest[2]=clip_table[((Y1 + Cr) >>13)]; | |
294 | |
295 dest[3]=clip_table[((Y2 + Cb) >>13)]; | |
296 dest[4]=clip_table[((Y2 + Cg) >>13)]; | |
297 dest[5]=clip_table[((Y2 + Cr) >>13)]; | |
298 dest+=6; | |
299 } | |
300 } | |
301 else if(dstbpp==16) | |
302 { | |
303 int i; | |
304 for(i=0; i<(dstW>>1); i++){ | |
305 int j; | |
306 int Y1=0; | |
307 int Y2=0; | |
308 int U=0; | |
309 int V=0; | |
310 int Cb, Cr, Cg; | |
311 for(j=0; j<lumFilterSize; j++) | |
312 { | |
313 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
314 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
315 } | |
316 for(j=0; j<chrFilterSize; j++) | |
317 { | |
318 U += chrSrc[j][i] * chrFilter[j]; | |
319 V += chrSrc[j][i+2048] * chrFilter[j]; | |
320 } | |
321 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
322 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
323 U >>= 19; | |
324 V >>= 19; | |
325 | |
326 Cb= clip_yuvtab_40cf[U+ 256]; | |
327 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
328 Cr= clip_yuvtab_3343[V+ 256]; | |
329 | |
330 ((uint16_t*)dest)[2*i] = | |
331 clip_table16b[(Y1 + Cb) >>13] | | |
332 clip_table16g[(Y1 + Cg) >>13] | | |
333 clip_table16r[(Y1 + Cr) >>13]; | |
334 | |
335 ((uint16_t*)dest)[2*i+1] = | |
336 clip_table16b[(Y2 + Cb) >>13] | | |
337 clip_table16g[(Y2 + Cg) >>13] | | |
338 clip_table16r[(Y2 + Cr) >>13]; | |
339 } | |
340 } | |
341 else if(dstbpp==15) | |
342 { | |
343 int i; | |
344 for(i=0; i<(dstW>>1); i++){ | |
345 int j; | |
346 int Y1=0; | |
347 int Y2=0; | |
348 int U=0; | |
349 int V=0; | |
350 int Cb, Cr, Cg; | |
351 for(j=0; j<lumFilterSize; j++) | |
352 { | |
353 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
354 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
355 } | |
356 for(j=0; j<chrFilterSize; j++) | |
357 { | |
358 U += chrSrc[j][i] * chrFilter[j]; | |
359 V += chrSrc[j][i+2048] * chrFilter[j]; | |
360 } | |
361 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
362 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
363 U >>= 19; | |
364 V >>= 19; | |
365 | |
366 Cb= clip_yuvtab_40cf[U+ 256]; | |
367 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
368 Cr= clip_yuvtab_3343[V+ 256]; | |
369 | |
370 ((uint16_t*)dest)[2*i] = | |
371 clip_table15b[(Y1 + Cb) >>13] | | |
372 clip_table15g[(Y1 + Cg) >>13] | | |
373 clip_table15r[(Y1 + Cr) >>13]; | |
374 | |
375 ((uint16_t*)dest)[2*i+1] = | |
376 clip_table15b[(Y2 + Cb) >>13] | | |
377 clip_table15g[(Y2 + Cg) >>13] | | |
378 clip_table15r[(Y2 + Cr) >>13]; | |
379 } | |
380 } | |
381 } | |
382 | |
383 | |
3126 | 384 //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one |
385 //Plain C versions | |
3152 | 386 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) |
387 #define COMPILE_C | |
388 #endif | |
389 | |
390 #ifdef CAN_COMPILE_X86_ASM | |
391 | |
392 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
393 #define COMPILE_MMX | |
394 #endif | |
395 | |
396 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT) | |
397 #define COMPILE_MMX2 | |
398 #endif | |
399 | |
400 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
401 #define COMPILE_3DNOW | |
402 #endif | |
403 #endif //CAN_COMPILE_X86_ASM | |
404 | |
405 #undef HAVE_MMX | |
406 #undef HAVE_MMX2 | |
407 #undef HAVE_3DNOW | |
408 #undef ARCH_X86 | |
409 | |
410 #ifdef COMPILE_C | |
3126 | 411 #undef HAVE_MMX |
412 #undef HAVE_MMX2 | |
413 #undef HAVE_3DNOW | |
414 #undef ARCH_X86 | |
415 #define RENAME(a) a ## _C | |
416 #include "swscale_template.c" | |
3152 | 417 #endif |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
418 |
3126 | 419 #ifdef CAN_COMPILE_X86_ASM |
2576 | 420 |
3126 | 421 //X86 versions |
422 /* | |
423 #undef RENAME | |
424 #undef HAVE_MMX | |
425 #undef HAVE_MMX2 | |
426 #undef HAVE_3DNOW | |
427 #define ARCH_X86 | |
428 #define RENAME(a) a ## _X86 | |
429 #include "swscale_template.c" | |
430 */ | |
431 //MMX versions | |
3152 | 432 #ifdef COMPILE_MMX |
3126 | 433 #undef RENAME |
434 #define HAVE_MMX | |
435 #undef HAVE_MMX2 | |
436 #undef HAVE_3DNOW | |
437 #define ARCH_X86 | |
438 #define RENAME(a) a ## _MMX | |
439 #include "swscale_template.c" | |
3152 | 440 #endif |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
441 |
3126 | 442 //MMX2 versions |
3152 | 443 #ifdef COMPILE_MMX2 |
3126 | 444 #undef RENAME |
445 #define HAVE_MMX | |
446 #define HAVE_MMX2 | |
447 #undef HAVE_3DNOW | |
448 #define ARCH_X86 | |
449 #define RENAME(a) a ## _MMX2 | |
450 #include "swscale_template.c" | |
3152 | 451 #endif |
2469 | 452 |
3126 | 453 //3DNOW versions |
3152 | 454 #ifdef COMPILE_3DNOW |
3126 | 455 #undef RENAME |
456 #define HAVE_MMX | |
457 #undef HAVE_MMX2 | |
458 #define HAVE_3DNOW | |
459 #define ARCH_X86 | |
460 #define RENAME(a) a ## _3DNow | |
461 #include "swscale_template.c" | |
3152 | 462 #endif |
2469 | 463 |
3126 | 464 #endif //CAN_COMPILE_X86_ASM |
2469 | 465 |
3126 | 466 // minor note: the HAVE_xyz is messed up after that line so dont use it |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
467 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
468 |
2519 | 469 // *** bilinear scaling and yuv->rgb or yuv->yuv conversion of yv12 slices: |
2216 | 470 // *** Note: it's called multiple times while decoding a frame, first time y==0 |
3126 | 471 // switching the cpu type during a sliced drawing can have bad effects, like sig11 |
3209 | 472 void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int srcSliceY , |
473 int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp, | |
474 int srcW, int srcH, int dstW, int dstH){ | |
2216 | 475 |
3152 | 476 #ifdef RUNTIME_CPUDETECT |
3126 | 477 #ifdef CAN_COMPILE_X86_ASM |
478 // ordered per speed fasterst first | |
479 if(gCpuCaps.hasMMX2) | |
3209 | 480 SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3126 | 481 else if(gCpuCaps.has3DNow) |
3209 | 482 SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3126 | 483 else if(gCpuCaps.hasMMX) |
3209 | 484 SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3126 | 485 else |
3209 | 486 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3126 | 487 #else |
3209 | 488 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
2270 | 489 #endif |
3152 | 490 #else //RUNTIME_CPUDETECT |
491 #ifdef HAVE_MMX2 | |
3209 | 492 SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3152 | 493 #elif defined (HAVE_3DNOW) |
3209 | 494 SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3152 | 495 #elif defined (HAVE_MMX) |
3209 | 496 SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3152 | 497 #else |
3209 | 498 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3152 | 499 #endif |
500 #endif //!RUNTIME_CPUDETECT | |
2270 | 501 |
2216 | 502 } |
503 | |
504 void SwScale_Init(){ | |
505 // generating tables: | |
506 int i; | |
3344 | 507 for(i=0; i<768; i++){ |
508 int c= MIN(MAX(i-256, 0), 255); | |
509 clip_table[i]=c; | |
510 yuvtab_2568[c]= clip_yuvtab_2568[i]=(0x2568*(c-16))+(256<<13); | |
511 yuvtab_3343[c]= clip_yuvtab_3343[i]=0x3343*(c-128); | |
512 yuvtab_0c92[c]= clip_yuvtab_0c92[i]=-0x0c92*(c-128); | |
513 yuvtab_1a1e[c]= clip_yuvtab_1a1e[i]=-0x1a1e*(c-128); | |
514 yuvtab_40cf[c]= clip_yuvtab_40cf[i]=0x40cf*(c-128); | |
2216 | 515 } |
516 | |
2584 | 517 for(i=0; i<768; i++) |
518 { | |
519 int v= clip_table[i]; | |
520 clip_table16b[i]= v>>3; | |
521 clip_table16g[i]= (v<<3)&0x07E0; | |
522 clip_table16r[i]= (v<<8)&0xF800; | |
523 clip_table15b[i]= v>>3; | |
524 clip_table15g[i]= (v<<2)&0x03E0; | |
525 clip_table15r[i]= (v<<7)&0x7C00; | |
526 } | |
3344 | 527 |
3126 | 528 } |
2584 | 529 |