Mercurial > mplayer.hg
annotate postproc/swscale.c @ 4271:2c7e6c87fb6f
reworked, picture moves when window moved, fullscreen working with gui, fixed some annoying bugs
author | alex |
---|---|
date | Sat, 19 Jan 2002 22:43:19 +0000 |
parents | 3cdb86beebce |
children | 9199d15cb4e0 |
rev | line source |
---|---|
2216 | 1 |
2 // Software scaling and colorspace conversion routines for MPlayer | |
3 | |
2269 | 4 // Orginal C implementation by A'rpi/ESP-team <arpi@thot.banki.hu> |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
5 // current version mostly by Michael Niedermayer (michaelni@gmx.at) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
6 // the parts written by michael are under GNU GPL |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
7 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
8 #include <inttypes.h> |
2476 | 9 #include <string.h> |
3272 | 10 #include <math.h> |
3344 | 11 #include <stdio.h> |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
12 #include "../config.h" |
4248 | 13 #include "../mangle.h" |
3344 | 14 #ifdef HAVE_MALLOC_H |
15 #include <malloc.h> | |
16 #endif | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
17 #include "swscale.h" |
3126 | 18 #include "../cpudetect.h" |
2540 | 19 #undef MOVNTQ |
2680 | 20 #undef PAVGB |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
21 |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
22 //#undef HAVE_MMX2 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
23 //#undef HAVE_MMX |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
24 //#undef ARCH_X86 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
25 #define DITHER1XBPP |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
26 int fullUVIpol=0; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
27 //disables the unscaled height version |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
28 int allwaysIpol=0; |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
29 |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
30 #define RET 0xC3 //near return opcode |
3344 | 31 |
32 //#define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; } | |
33 #define ASSERT(x) ; | |
34 | |
3352 | 35 extern int verbose; // defined in mplayer.c |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
36 /* |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
37 NOTES |
2216 | 38 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
39 known BUGS with known cause (no bugreports please!, but patches are welcome :) ) |
3352 | 40 horizontal fast_bilinear MMX2 scaler reads 1-7 samples too much (might cause a sig11) |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
41 |
3352 | 42 Supported output formats BGR15 BGR16 BGR24 BGR32 YV12 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
43 BGR15 & BGR16 MMX verions support dithering |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
44 Special versions: fast Y 1:1 scaling (no interpolation in y direction) |
2216 | 45 |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
46 TODO |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
47 more intelligent missalignment avoidance for the horizontal scaler |
2585 | 48 dither in C |
49 change the distance of the u & v buffer | |
3344 | 50 Move static / global vars into a struct so multiple scalers can be used |
51 write special vertical cubic upscale version | |
52 Optimize C code (yv12 / minmax) | |
3352 | 53 dstStride[3] |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
54 */ |
2216 | 55 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
56 #define ABS(a) ((a) > 0 ? (a) : (-(a))) |
2469 | 57 #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
58 #define MAX(a,b) ((a) < (b) ? (b) : (a)) | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
59 |
3126 | 60 #ifdef ARCH_X86 |
61 #define CAN_COMPILE_X86_ASM | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
62 #endif |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
63 |
3126 | 64 #ifdef CAN_COMPILE_X86_ASM |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
65 static uint64_t __attribute__((aligned(8))) yCoeff= 0x2568256825682568LL; |
2503 | 66 static uint64_t __attribute__((aligned(8))) vrCoeff= 0x3343334333433343LL; |
67 static uint64_t __attribute__((aligned(8))) ubCoeff= 0x40cf40cf40cf40cfLL; | |
68 static uint64_t __attribute__((aligned(8))) vgCoeff= 0xE5E2E5E2E5E2E5E2LL; | |
69 static uint64_t __attribute__((aligned(8))) ugCoeff= 0xF36EF36EF36EF36ELL; | |
2669 | 70 static uint64_t __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL; |
71 static uint64_t __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
72 static uint64_t __attribute__((aligned(8))) w400= 0x0400040004000400LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
73 static uint64_t __attribute__((aligned(8))) w80= 0x0080008000800080LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
74 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL; |
3272 | 75 static uint64_t __attribute__((aligned(8))) w02= 0x0002000200020002LL; |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
76 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
77 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
78 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL; |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
79 |
2750
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
80 static volatile uint64_t __attribute__((aligned(8))) b5Dither; |
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
81 static volatile uint64_t __attribute__((aligned(8))) g5Dither; |
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
82 static volatile uint64_t __attribute__((aligned(8))) g6Dither; |
9ef09e232505
gcc does optimize writes to non volatile variables away if it didnt know that they were read in between
michael
parents:
2748
diff
changeset
|
83 static volatile uint64_t __attribute__((aligned(8))) r5Dither; |
2748 | 84 |
85 static uint64_t __attribute__((aligned(8))) dither4[2]={ | |
86 0x0103010301030103LL, | |
87 0x0200020002000200LL,}; | |
88 | |
89 static uint64_t __attribute__((aligned(8))) dither8[2]={ | |
90 0x0602060206020602LL, | |
91 0x0004000400040004LL,}; | |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
92 |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
93 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
94 static uint64_t __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
95 static uint64_t __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
96 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
97 static uint64_t __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
98 static uint64_t __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
99 |
2730 | 100 static uint64_t __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL; |
101 static uint64_t __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; | |
102 static uint64_t __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; | |
103 | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
104 static uint64_t __attribute__((aligned(8))) temp0; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
105 static uint64_t __attribute__((aligned(8))) asm_yalpha1; |
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
106 static uint64_t __attribute__((aligned(8))) asm_uvalpha1; |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
107 |
3344 | 108 static int16_t __attribute__((aligned(8))) *lumPixBuf[2000]; |
109 static int16_t __attribute__((aligned(8))) *chrPixBuf[2000]; | |
3272 | 110 static int16_t __attribute__((aligned(8))) hLumFilter[8000]; |
111 static int16_t __attribute__((aligned(8))) hLumFilterPos[2000]; | |
112 static int16_t __attribute__((aligned(8))) hChrFilter[8000]; | |
113 static int16_t __attribute__((aligned(8))) hChrFilterPos[2000]; | |
3344 | 114 static int16_t __attribute__((aligned(8))) vLumFilter[8000]; |
115 static int16_t __attribute__((aligned(8))) vLumFilterPos[2000]; | |
116 static int16_t __attribute__((aligned(8))) vChrFilter[8000]; | |
117 static int16_t __attribute__((aligned(8))) vChrFilterPos[2000]; | |
118 | |
119 // Contain simply the values from v(Lum|Chr)Filter just nicely packed for mmx | |
120 //FIXME these are very likely too small / 8000 caused problems with 480x480 | |
121 static int16_t __attribute__((aligned(8))) lumMmxFilter[16000]; | |
122 static int16_t __attribute__((aligned(8))) chrMmxFilter[16000]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
123 #else |
3344 | 124 static int16_t *lumPixBuf[2000]; |
125 static int16_t *chrPixBuf[2000]; | |
3272 | 126 static int16_t hLumFilter[8000]; |
127 static int16_t hLumFilterPos[2000]; | |
128 static int16_t hChrFilter[8000]; | |
129 static int16_t hChrFilterPos[2000]; | |
3344 | 130 static int16_t vLumFilter[8000]; |
131 static int16_t vLumFilterPos[2000]; | |
132 static int16_t vChrFilter[8000]; | |
133 static int16_t vChrFilterPos[2000]; | |
134 //FIXME just dummy vars | |
135 static int16_t lumMmxFilter[1]; | |
136 static int16_t chrMmxFilter[1]; | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
137 #endif |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
138 |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
139 // clipping helper table for C implementations: |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
140 static unsigned char clip_table[768]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
141 |
2584 | 142 static unsigned short clip_table16b[768]; |
143 static unsigned short clip_table16g[768]; | |
144 static unsigned short clip_table16r[768]; | |
145 static unsigned short clip_table15b[768]; | |
146 static unsigned short clip_table15g[768]; | |
147 static unsigned short clip_table15r[768]; | |
148 | |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
149 // yuv->rgb conversion tables: |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
150 static int yuvtab_2568[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
151 static int yuvtab_3343[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
152 static int yuvtab_0c92[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
153 static int yuvtab_1a1e[256]; |
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
154 static int yuvtab_40cf[256]; |
3344 | 155 // Needed for cubic scaler to catch overflows |
156 static int clip_yuvtab_2568[768]; | |
157 static int clip_yuvtab_3343[768]; | |
158 static int clip_yuvtab_0c92[768]; | |
159 static int clip_yuvtab_1a1e[768]; | |
160 static int clip_yuvtab_40cf[768]; | |
2264
7851375ea156
increased precission of s_xinc s_xinc2 (needed for the mmx2 bugfix)
michael
parents:
2237
diff
changeset
|
161 |
3344 | 162 static int hLumFilterSize=0; |
163 static int hChrFilterSize=0; | |
164 static int vLumFilterSize=0; | |
165 static int vChrFilterSize=0; | |
166 static int vLumBufSize=0; | |
167 static int vChrBufSize=0; | |
3272 | 168 |
169 int sws_flags=0; | |
170 | |
3126 | 171 #ifdef CAN_COMPILE_X86_ASM |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
172 static uint8_t funnyYCode[10000]; |
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
173 static uint8_t funnyUVCode[10000]; |
2671 | 174 #endif |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
175 |
2469 | 176 static int canMMX2BeUsed=0; |
177 | |
3126 | 178 #ifdef CAN_COMPILE_X86_ASM |
2671 | 179 void in_asm_used_var_warning_killer() |
180 { | |
3272 | 181 volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+ |
2748 | 182 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+ |
3272 | 183 M24A+M24B+M24C+w02 + funnyYCode[0]+ funnyUVCode[0]+b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]; |
2671 | 184 if(i) i=0; |
185 } | |
186 #endif | |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
187 |
3352 | 188 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, |
189 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | |
190 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW) | |
191 { | |
192 //FIXME Optimize (just quickly writen not opti..) | |
193 int i; | |
194 for(i=0; i<dstW; i++) | |
195 { | |
196 int val=0; | |
197 int j; | |
198 for(j=0; j<lumFilterSize; j++) | |
199 val += lumSrc[j][i] * lumFilter[j]; | |
200 | |
201 dest[i]= MIN(MAX(val>>19, 0), 255); | |
202 } | |
203 | |
204 if(uDest != NULL) | |
205 for(i=0; i<(dstW>>1); i++) | |
206 { | |
207 int u=0; | |
208 int v=0; | |
209 int j; | |
3641 | 210 for(j=0; j<chrFilterSize; j++) |
3352 | 211 { |
212 u += chrSrc[j][i] * chrFilter[j]; | |
213 v += chrSrc[j][i + 2048] * chrFilter[j]; | |
214 } | |
215 | |
216 uDest[i]= MIN(MAX(u>>19, 0), 255); | |
217 vDest[i]= MIN(MAX(v>>19, 0), 255); | |
218 } | |
219 } | |
220 | |
221 static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | |
222 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | |
223 uint8_t *dest, int dstW, int dstbpp) | |
224 { | |
225 if(dstbpp==32) | |
226 { | |
227 int i; | |
228 for(i=0; i<(dstW>>1); i++){ | |
229 int j; | |
230 int Y1=0; | |
231 int Y2=0; | |
232 int U=0; | |
233 int V=0; | |
234 int Cb, Cr, Cg; | |
235 for(j=0; j<lumFilterSize; j++) | |
236 { | |
237 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
238 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
239 } | |
240 for(j=0; j<chrFilterSize; j++) | |
241 { | |
242 U += chrSrc[j][i] * chrFilter[j]; | |
243 V += chrSrc[j][i+2048] * chrFilter[j]; | |
244 } | |
245 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
246 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
247 U >>= 19; | |
248 V >>= 19; | |
249 | |
250 Cb= clip_yuvtab_40cf[U+ 256]; | |
251 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
252 Cr= clip_yuvtab_3343[V+ 256]; | |
253 | |
254 dest[8*i+0]=clip_table[((Y1 + Cb) >>13)]; | |
255 dest[8*i+1]=clip_table[((Y1 + Cg) >>13)]; | |
256 dest[8*i+2]=clip_table[((Y1 + Cr) >>13)]; | |
257 | |
258 dest[8*i+4]=clip_table[((Y2 + Cb) >>13)]; | |
259 dest[8*i+5]=clip_table[((Y2 + Cg) >>13)]; | |
260 dest[8*i+6]=clip_table[((Y2 + Cr) >>13)]; | |
261 } | |
262 } | |
263 else if(dstbpp==24) | |
264 { | |
265 int i; | |
266 for(i=0; i<(dstW>>1); i++){ | |
267 int j; | |
268 int Y1=0; | |
269 int Y2=0; | |
270 int U=0; | |
271 int V=0; | |
272 int Cb, Cr, Cg; | |
273 for(j=0; j<lumFilterSize; j++) | |
274 { | |
275 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
276 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
277 } | |
278 for(j=0; j<chrFilterSize; j++) | |
279 { | |
280 U += chrSrc[j][i] * chrFilter[j]; | |
281 V += chrSrc[j][i+2048] * chrFilter[j]; | |
282 } | |
283 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
284 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
285 U >>= 19; | |
286 V >>= 19; | |
287 | |
288 Cb= clip_yuvtab_40cf[U+ 256]; | |
289 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
290 Cr= clip_yuvtab_3343[V+ 256]; | |
291 | |
292 dest[0]=clip_table[((Y1 + Cb) >>13)]; | |
293 dest[1]=clip_table[((Y1 + Cg) >>13)]; | |
294 dest[2]=clip_table[((Y1 + Cr) >>13)]; | |
295 | |
296 dest[3]=clip_table[((Y2 + Cb) >>13)]; | |
297 dest[4]=clip_table[((Y2 + Cg) >>13)]; | |
298 dest[5]=clip_table[((Y2 + Cr) >>13)]; | |
299 dest+=6; | |
300 } | |
301 } | |
302 else if(dstbpp==16) | |
303 { | |
304 int i; | |
305 for(i=0; i<(dstW>>1); i++){ | |
306 int j; | |
307 int Y1=0; | |
308 int Y2=0; | |
309 int U=0; | |
310 int V=0; | |
311 int Cb, Cr, Cg; | |
312 for(j=0; j<lumFilterSize; j++) | |
313 { | |
314 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
315 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
316 } | |
317 for(j=0; j<chrFilterSize; j++) | |
318 { | |
319 U += chrSrc[j][i] * chrFilter[j]; | |
320 V += chrSrc[j][i+2048] * chrFilter[j]; | |
321 } | |
322 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
323 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
324 U >>= 19; | |
325 V >>= 19; | |
326 | |
327 Cb= clip_yuvtab_40cf[U+ 256]; | |
328 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
329 Cr= clip_yuvtab_3343[V+ 256]; | |
330 | |
331 ((uint16_t*)dest)[2*i] = | |
332 clip_table16b[(Y1 + Cb) >>13] | | |
333 clip_table16g[(Y1 + Cg) >>13] | | |
334 clip_table16r[(Y1 + Cr) >>13]; | |
335 | |
336 ((uint16_t*)dest)[2*i+1] = | |
337 clip_table16b[(Y2 + Cb) >>13] | | |
338 clip_table16g[(Y2 + Cg) >>13] | | |
339 clip_table16r[(Y2 + Cr) >>13]; | |
340 } | |
341 } | |
342 else if(dstbpp==15) | |
343 { | |
344 int i; | |
345 for(i=0; i<(dstW>>1); i++){ | |
346 int j; | |
347 int Y1=0; | |
348 int Y2=0; | |
349 int U=0; | |
350 int V=0; | |
351 int Cb, Cr, Cg; | |
352 for(j=0; j<lumFilterSize; j++) | |
353 { | |
354 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
355 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
356 } | |
357 for(j=0; j<chrFilterSize; j++) | |
358 { | |
359 U += chrSrc[j][i] * chrFilter[j]; | |
360 V += chrSrc[j][i+2048] * chrFilter[j]; | |
361 } | |
362 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
363 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
364 U >>= 19; | |
365 V >>= 19; | |
366 | |
367 Cb= clip_yuvtab_40cf[U+ 256]; | |
368 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
369 Cr= clip_yuvtab_3343[V+ 256]; | |
370 | |
371 ((uint16_t*)dest)[2*i] = | |
372 clip_table15b[(Y1 + Cb) >>13] | | |
373 clip_table15g[(Y1 + Cg) >>13] | | |
374 clip_table15r[(Y1 + Cr) >>13]; | |
375 | |
376 ((uint16_t*)dest)[2*i+1] = | |
377 clip_table15b[(Y2 + Cb) >>13] | | |
378 clip_table15g[(Y2 + Cg) >>13] | | |
379 clip_table15r[(Y2 + Cr) >>13]; | |
380 } | |
381 } | |
382 } | |
383 | |
384 | |
3126 | 385 //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one |
386 //Plain C versions | |
3152 | 387 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) |
388 #define COMPILE_C | |
389 #endif | |
390 | |
391 #ifdef CAN_COMPILE_X86_ASM | |
392 | |
393 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
394 #define COMPILE_MMX | |
395 #endif | |
396 | |
397 #if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT) | |
398 #define COMPILE_MMX2 | |
399 #endif | |
400 | |
401 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
402 #define COMPILE_3DNOW | |
403 #endif | |
404 #endif //CAN_COMPILE_X86_ASM | |
405 | |
406 #undef HAVE_MMX | |
407 #undef HAVE_MMX2 | |
408 #undef HAVE_3DNOW | |
409 #undef ARCH_X86 | |
410 | |
411 #ifdef COMPILE_C | |
3126 | 412 #undef HAVE_MMX |
413 #undef HAVE_MMX2 | |
414 #undef HAVE_3DNOW | |
415 #undef ARCH_X86 | |
416 #define RENAME(a) a ## _C | |
417 #include "swscale_template.c" | |
3152 | 418 #endif |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
419 |
3126 | 420 #ifdef CAN_COMPILE_X86_ASM |
2576 | 421 |
3126 | 422 //X86 versions |
423 /* | |
424 #undef RENAME | |
425 #undef HAVE_MMX | |
426 #undef HAVE_MMX2 | |
427 #undef HAVE_3DNOW | |
428 #define ARCH_X86 | |
429 #define RENAME(a) a ## _X86 | |
430 #include "swscale_template.c" | |
431 */ | |
432 //MMX versions | |
3152 | 433 #ifdef COMPILE_MMX |
3126 | 434 #undef RENAME |
435 #define HAVE_MMX | |
436 #undef HAVE_MMX2 | |
437 #undef HAVE_3DNOW | |
438 #define ARCH_X86 | |
439 #define RENAME(a) a ## _MMX | |
440 #include "swscale_template.c" | |
3152 | 441 #endif |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
442 |
3126 | 443 //MMX2 versions |
3152 | 444 #ifdef COMPILE_MMX2 |
3126 | 445 #undef RENAME |
446 #define HAVE_MMX | |
447 #define HAVE_MMX2 | |
448 #undef HAVE_3DNOW | |
449 #define ARCH_X86 | |
450 #define RENAME(a) a ## _MMX2 | |
451 #include "swscale_template.c" | |
3152 | 452 #endif |
2469 | 453 |
3126 | 454 //3DNOW versions |
3152 | 455 #ifdef COMPILE_3DNOW |
3126 | 456 #undef RENAME |
457 #define HAVE_MMX | |
458 #undef HAVE_MMX2 | |
459 #define HAVE_3DNOW | |
460 #define ARCH_X86 | |
461 #define RENAME(a) a ## _3DNow | |
462 #include "swscale_template.c" | |
3152 | 463 #endif |
2469 | 464 |
3126 | 465 #endif //CAN_COMPILE_X86_ASM |
2469 | 466 |
3126 | 467 // minor note: the HAVE_xyz is messed up after that line so dont use it |
2316
bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
michael
parents:
2297
diff
changeset
|
468 |
2232
65996b3467d7
MMX & MMX2 optimizations (MMX2 is buggy and commented out)
michael
parents:
2230
diff
changeset
|
469 |
2519 | 470 // *** bilinear scaling and yuv->rgb or yuv->yuv conversion of yv12 slices: |
2216 | 471 // *** Note: it's called multiple times while decoding a frame, first time y==0 |
3126 | 472 // switching the cpu type during a sliced drawing can have bad effects, like sig11 |
3209 | 473 void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int srcSliceY , |
474 int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp, | |
475 int srcW, int srcH, int dstW, int dstH){ | |
2216 | 476 |
3152 | 477 #ifdef RUNTIME_CPUDETECT |
3126 | 478 #ifdef CAN_COMPILE_X86_ASM |
479 // ordered per speed fasterst first | |
480 if(gCpuCaps.hasMMX2) | |
3209 | 481 SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3126 | 482 else if(gCpuCaps.has3DNow) |
3209 | 483 SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3126 | 484 else if(gCpuCaps.hasMMX) |
3209 | 485 SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3126 | 486 else |
3209 | 487 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3126 | 488 #else |
3209 | 489 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
2270 | 490 #endif |
3152 | 491 #else //RUNTIME_CPUDETECT |
492 #ifdef HAVE_MMX2 | |
3209 | 493 SwScale_YV12slice_MMX2(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3152 | 494 #elif defined (HAVE_3DNOW) |
3209 | 495 SwScale_YV12slice_3DNow(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3152 | 496 #elif defined (HAVE_MMX) |
3209 | 497 SwScale_YV12slice_MMX(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3152 | 498 #else |
3209 | 499 SwScale_YV12slice_C(srcptr, stride, srcSliceY, srcSliceH, dstptr, dststride, dstbpp, srcW, srcH, dstW, dstH); |
3152 | 500 #endif |
501 #endif //!RUNTIME_CPUDETECT | |
2270 | 502 |
2216 | 503 } |
504 | |
505 void SwScale_Init(){ | |
506 // generating tables: | |
507 int i; | |
3344 | 508 for(i=0; i<768; i++){ |
509 int c= MIN(MAX(i-256, 0), 255); | |
510 clip_table[i]=c; | |
511 yuvtab_2568[c]= clip_yuvtab_2568[i]=(0x2568*(c-16))+(256<<13); | |
512 yuvtab_3343[c]= clip_yuvtab_3343[i]=0x3343*(c-128); | |
513 yuvtab_0c92[c]= clip_yuvtab_0c92[i]=-0x0c92*(c-128); | |
514 yuvtab_1a1e[c]= clip_yuvtab_1a1e[i]=-0x1a1e*(c-128); | |
515 yuvtab_40cf[c]= clip_yuvtab_40cf[i]=0x40cf*(c-128); | |
2216 | 516 } |
517 | |
2584 | 518 for(i=0; i<768; i++) |
519 { | |
520 int v= clip_table[i]; | |
521 clip_table16b[i]= v>>3; | |
522 clip_table16g[i]= (v<<3)&0x07E0; | |
523 clip_table16r[i]= (v<<8)&0xF800; | |
524 clip_table15b[i]= v>>3; | |
525 clip_table15g[i]= (v<<2)&0x03E0; | |
526 clip_table15r[i]= (v<<7)&0x7C00; | |
527 } | |
3344 | 528 |
3126 | 529 } |
2584 | 530 |