Mercurial > mplayer.hg
comparison postproc/swscale.c @ 3352:64121e8a43f5
print more info if -v
use new horizontal mmx scaler instead of old x86asm if mmx2 cant be used (FAST_BILINEAR only)
fixed overflow in init function ... using double precission fp now :)
using C scaler for the last 1-2 lines if there is a chance to write over the end of the dst array
author | michael |
---|---|
date | Thu, 06 Dec 2001 19:07:25 +0000 |
parents | e87c59969d17 |
children | 33c560ffd3dc |
comparison
equal
deleted
inserted
replaced
3351:091cdd056ca4 | 3352:64121e8a43f5 |
---|---|
29 #define RET 0xC3 //near return opcode | 29 #define RET 0xC3 //near return opcode |
30 | 30 |
31 //#define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; } | 31 //#define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; } |
32 #define ASSERT(x) ; | 32 #define ASSERT(x) ; |
33 | 33 |
34 | 34 extern int verbose; // defined in mplayer.c |
35 /* | 35 /* |
36 NOTES | 36 NOTES |
37 | 37 |
38 known BUGS with known cause (no bugreports please!, but patches are welcome :) ) | 38 known BUGS with known cause (no bugreports please!, but patches are welcome :) ) |
39 horizontal MMX2 scaler reads 1-7 samples too much (might cause a sig11) | 39 horizontal fast_bilinear MMX2 scaler reads 1-7 samples too much (might cause a sig11) |
40 | 40 |
41 Supported output formats BGR15 BGR16 BGR24 BGR32, YV12 | 41 Supported output formats BGR15 BGR16 BGR24 BGR32 YV12 |
42 BGR15 & BGR16 MMX verions support dithering | 42 BGR15 & BGR16 MMX verions support dithering |
43 Special versions: fast Y 1:1 scaling (no interpolation in y direction) | 43 Special versions: fast Y 1:1 scaling (no interpolation in y direction) |
44 | 44 |
45 TODO | 45 TODO |
46 more intelligent missalignment avoidance for the horizontal scaler | 46 more intelligent missalignment avoidance for the horizontal scaler |
47 dither in C | 47 dither in C |
48 change the distance of the u & v buffer | 48 change the distance of the u & v buffer |
49 Move static / global vars into a struct so multiple scalers can be used | 49 Move static / global vars into a struct so multiple scalers can be used |
50 write special vertical cubic upscale version | 50 write special vertical cubic upscale version |
51 Optimize C code (yv12 / minmax) | 51 Optimize C code (yv12 / minmax) |
52 dstStride[3] | |
52 */ | 53 */ |
53 | 54 |
54 #define ABS(a) ((a) > 0 ? (a) : (-(a))) | 55 #define ABS(a) ((a) > 0 ? (a) : (-(a))) |
55 #define MIN(a,b) ((a) > (b) ? (b) : (a)) | 56 #define MIN(a,b) ((a) > (b) ? (b) : (a)) |
56 #define MAX(a,b) ((a) < (b) ? (b) : (a)) | 57 #define MAX(a,b) ((a) < (b) ? (b) : (a)) |
180 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+ | 181 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+temp0+asm_yalpha1+ asm_uvalpha1+ |
181 M24A+M24B+M24C+w02 + funnyYCode[0]+ funnyUVCode[0]+b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]; | 182 M24A+M24B+M24C+w02 + funnyYCode[0]+ funnyUVCode[0]+b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]; |
182 if(i) i=0; | 183 if(i) i=0; |
183 } | 184 } |
184 #endif | 185 #endif |
186 | |
187 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | |
188 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | |
189 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW) | |
190 { | |
191 //FIXME Optimize (just quickly writen not opti..) | |
192 int i; | |
193 for(i=0; i<dstW; i++) | |
194 { | |
195 int val=0; | |
196 int j; | |
197 for(j=0; j<lumFilterSize; j++) | |
198 val += lumSrc[j][i] * lumFilter[j]; | |
199 | |
200 dest[i]= MIN(MAX(val>>19, 0), 255); | |
201 } | |
202 | |
203 if(uDest != NULL) | |
204 for(i=0; i<(dstW>>1); i++) | |
205 { | |
206 int u=0; | |
207 int v=0; | |
208 int j; | |
209 for(j=0; j<lumFilterSize; j++) | |
210 { | |
211 u += chrSrc[j][i] * chrFilter[j]; | |
212 v += chrSrc[j][i + 2048] * chrFilter[j]; | |
213 } | |
214 | |
215 uDest[i]= MIN(MAX(u>>19, 0), 255); | |
216 vDest[i]= MIN(MAX(v>>19, 0), 255); | |
217 } | |
218 } | |
219 | |
220 static inline void yuv2rgbXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, | |
221 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, | |
222 uint8_t *dest, int dstW, int dstbpp) | |
223 { | |
224 if(dstbpp==32) | |
225 { | |
226 int i; | |
227 for(i=0; i<(dstW>>1); i++){ | |
228 int j; | |
229 int Y1=0; | |
230 int Y2=0; | |
231 int U=0; | |
232 int V=0; | |
233 int Cb, Cr, Cg; | |
234 for(j=0; j<lumFilterSize; j++) | |
235 { | |
236 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
237 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
238 } | |
239 for(j=0; j<chrFilterSize; j++) | |
240 { | |
241 U += chrSrc[j][i] * chrFilter[j]; | |
242 V += chrSrc[j][i+2048] * chrFilter[j]; | |
243 } | |
244 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
245 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
246 U >>= 19; | |
247 V >>= 19; | |
248 | |
249 Cb= clip_yuvtab_40cf[U+ 256]; | |
250 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
251 Cr= clip_yuvtab_3343[V+ 256]; | |
252 | |
253 dest[8*i+0]=clip_table[((Y1 + Cb) >>13)]; | |
254 dest[8*i+1]=clip_table[((Y1 + Cg) >>13)]; | |
255 dest[8*i+2]=clip_table[((Y1 + Cr) >>13)]; | |
256 | |
257 dest[8*i+4]=clip_table[((Y2 + Cb) >>13)]; | |
258 dest[8*i+5]=clip_table[((Y2 + Cg) >>13)]; | |
259 dest[8*i+6]=clip_table[((Y2 + Cr) >>13)]; | |
260 } | |
261 } | |
262 else if(dstbpp==24) | |
263 { | |
264 int i; | |
265 for(i=0; i<(dstW>>1); i++){ | |
266 int j; | |
267 int Y1=0; | |
268 int Y2=0; | |
269 int U=0; | |
270 int V=0; | |
271 int Cb, Cr, Cg; | |
272 for(j=0; j<lumFilterSize; j++) | |
273 { | |
274 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
275 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
276 } | |
277 for(j=0; j<chrFilterSize; j++) | |
278 { | |
279 U += chrSrc[j][i] * chrFilter[j]; | |
280 V += chrSrc[j][i+2048] * chrFilter[j]; | |
281 } | |
282 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
283 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
284 U >>= 19; | |
285 V >>= 19; | |
286 | |
287 Cb= clip_yuvtab_40cf[U+ 256]; | |
288 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
289 Cr= clip_yuvtab_3343[V+ 256]; | |
290 | |
291 dest[0]=clip_table[((Y1 + Cb) >>13)]; | |
292 dest[1]=clip_table[((Y1 + Cg) >>13)]; | |
293 dest[2]=clip_table[((Y1 + Cr) >>13)]; | |
294 | |
295 dest[3]=clip_table[((Y2 + Cb) >>13)]; | |
296 dest[4]=clip_table[((Y2 + Cg) >>13)]; | |
297 dest[5]=clip_table[((Y2 + Cr) >>13)]; | |
298 dest+=6; | |
299 } | |
300 } | |
301 else if(dstbpp==16) | |
302 { | |
303 int i; | |
304 for(i=0; i<(dstW>>1); i++){ | |
305 int j; | |
306 int Y1=0; | |
307 int Y2=0; | |
308 int U=0; | |
309 int V=0; | |
310 int Cb, Cr, Cg; | |
311 for(j=0; j<lumFilterSize; j++) | |
312 { | |
313 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
314 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
315 } | |
316 for(j=0; j<chrFilterSize; j++) | |
317 { | |
318 U += chrSrc[j][i] * chrFilter[j]; | |
319 V += chrSrc[j][i+2048] * chrFilter[j]; | |
320 } | |
321 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
322 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
323 U >>= 19; | |
324 V >>= 19; | |
325 | |
326 Cb= clip_yuvtab_40cf[U+ 256]; | |
327 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
328 Cr= clip_yuvtab_3343[V+ 256]; | |
329 | |
330 ((uint16_t*)dest)[2*i] = | |
331 clip_table16b[(Y1 + Cb) >>13] | | |
332 clip_table16g[(Y1 + Cg) >>13] | | |
333 clip_table16r[(Y1 + Cr) >>13]; | |
334 | |
335 ((uint16_t*)dest)[2*i+1] = | |
336 clip_table16b[(Y2 + Cb) >>13] | | |
337 clip_table16g[(Y2 + Cg) >>13] | | |
338 clip_table16r[(Y2 + Cr) >>13]; | |
339 } | |
340 } | |
341 else if(dstbpp==15) | |
342 { | |
343 int i; | |
344 for(i=0; i<(dstW>>1); i++){ | |
345 int j; | |
346 int Y1=0; | |
347 int Y2=0; | |
348 int U=0; | |
349 int V=0; | |
350 int Cb, Cr, Cg; | |
351 for(j=0; j<lumFilterSize; j++) | |
352 { | |
353 Y1 += lumSrc[j][2*i] * lumFilter[j]; | |
354 Y2 += lumSrc[j][2*i+1] * lumFilter[j]; | |
355 } | |
356 for(j=0; j<chrFilterSize; j++) | |
357 { | |
358 U += chrSrc[j][i] * chrFilter[j]; | |
359 V += chrSrc[j][i+2048] * chrFilter[j]; | |
360 } | |
361 Y1= clip_yuvtab_2568[ (Y1>>19) + 256 ]; | |
362 Y2= clip_yuvtab_2568[ (Y2>>19) + 256 ]; | |
363 U >>= 19; | |
364 V >>= 19; | |
365 | |
366 Cb= clip_yuvtab_40cf[U+ 256]; | |
367 Cg= clip_yuvtab_1a1e[V+ 256] + yuvtab_0c92[U+ 256]; | |
368 Cr= clip_yuvtab_3343[V+ 256]; | |
369 | |
370 ((uint16_t*)dest)[2*i] = | |
371 clip_table15b[(Y1 + Cb) >>13] | | |
372 clip_table15g[(Y1 + Cg) >>13] | | |
373 clip_table15r[(Y1 + Cr) >>13]; | |
374 | |
375 ((uint16_t*)dest)[2*i+1] = | |
376 clip_table15b[(Y2 + Cb) >>13] | | |
377 clip_table15g[(Y2 + Cg) >>13] | | |
378 clip_table15r[(Y2 + Cr) >>13]; | |
379 } | |
380 } | |
381 } | |
382 | |
185 | 383 |
186 //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one | 384 //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one |
187 //Plain C versions | 385 //Plain C versions |
188 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) | 386 #if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) |
189 #define COMPILE_C | 387 #define COMPILE_C |
268 // minor note: the HAVE_xyz is messed up after that line so dont use it | 466 // minor note: the HAVE_xyz is messed up after that line so dont use it |
269 | 467 |
270 | 468 |
271 // *** bilinear scaling and yuv->rgb or yuv->yuv conversion of yv12 slices: | 469 // *** bilinear scaling and yuv->rgb or yuv->yuv conversion of yv12 slices: |
272 // *** Note: it's called multiple times while decoding a frame, first time y==0 | 470 // *** Note: it's called multiple times while decoding a frame, first time y==0 |
273 // *** Designed to upscale, but may work for downscale too. | |
274 // switching the cpu type during a sliced drawing can have bad effects, like sig11 | 471 // switching the cpu type during a sliced drawing can have bad effects, like sig11 |
275 void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int srcSliceY , | 472 void SwScale_YV12slice(unsigned char* srcptr[],int stride[], int srcSliceY , |
276 int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp, | 473 int srcSliceH, uint8_t* dstptr[], int dststride, int dstbpp, |
277 int srcW, int srcH, int dstW, int dstH){ | 474 int srcW, int srcH, int dstW, int dstH){ |
278 | 475 |