comparison libswscale/swscale.c @ 23155:1befc8d767cd

cosmetics attack, part II: Remove all tabs and prettyprint/reindent the code.
author diego
date Sun, 29 Apr 2007 13:39:27 +0000
parents 9528d1ebe68f
children 6431b46104b7
comparison
equal deleted inserted replaced
23154:e564b9cd7290 23155:1befc8d767cd
100 #define PI M_PI 100 #define PI M_PI
101 #else 101 #else
102 #define PI 3.14159265358979323846 102 #define PI 3.14159265358979323846
103 #endif 103 #endif
104 104
105 #define isSupportedIn(x) ((x)==PIX_FMT_YUV420P || (x)==PIX_FMT_YUYV422 || (x)==PIX_FMT_UYVY422\ 105 #define isSupportedIn(x) ((x)==PIX_FMT_YUV420P || (x)==PIX_FMT_YUYV422 || (x)==PIX_FMT_UYVY422 \
106 || (x)==PIX_FMT_RGB32|| (x)==PIX_FMT_BGR24|| (x)==PIX_FMT_BGR565|| (x)==PIX_FMT_BGR555\ 106 || (x)==PIX_FMT_RGB32 || (x)==PIX_FMT_BGR24 || (x)==PIX_FMT_BGR565 || (x)==PIX_FMT_BGR555 \
107 || (x)==PIX_FMT_BGR32|| (x)==PIX_FMT_RGB24|| (x)==PIX_FMT_RGB565|| (x)==PIX_FMT_RGB555\ 107 || (x)==PIX_FMT_BGR32 || (x)==PIX_FMT_RGB24 || (x)==PIX_FMT_RGB565 || (x)==PIX_FMT_RGB555 \
108 || (x)==PIX_FMT_GRAY8 || (x)==PIX_FMT_YUV410P\ 108 || (x)==PIX_FMT_GRAY8 || (x)==PIX_FMT_YUV410P \
109 || (x)==PIX_FMT_GRAY16BE || (x)==PIX_FMT_GRAY16LE\ 109 || (x)==PIX_FMT_GRAY16BE || (x)==PIX_FMT_GRAY16LE \
110 || (x)==PIX_FMT_YUV444P || (x)==PIX_FMT_YUV422P || (x)==PIX_FMT_YUV411P\ 110 || (x)==PIX_FMT_YUV444P || (x)==PIX_FMT_YUV422P || (x)==PIX_FMT_YUV411P \
111 || (x)==PIX_FMT_PAL8 || (x)==PIX_FMT_BGR8 || (x)==PIX_FMT_RGB8\ 111 || (x)==PIX_FMT_PAL8 || (x)==PIX_FMT_BGR8 || (x)==PIX_FMT_RGB8 \
112 || (x)==PIX_FMT_BGR4_BYTE || (x)==PIX_FMT_RGB4_BYTE) 112 || (x)==PIX_FMT_BGR4_BYTE || (x)==PIX_FMT_RGB4_BYTE)
113 #define isSupportedOut(x) ((x)==PIX_FMT_YUV420P || (x)==PIX_FMT_YUYV422 || (x)==PIX_FMT_UYVY422\ 113 #define isSupportedOut(x) ((x)==PIX_FMT_YUV420P || (x)==PIX_FMT_YUYV422 || (x)==PIX_FMT_UYVY422 \
114 || (x)==PIX_FMT_YUV444P || (x)==PIX_FMT_YUV422P || (x)==PIX_FMT_YUV411P\ 114 || (x)==PIX_FMT_YUV444P || (x)==PIX_FMT_YUV422P || (x)==PIX_FMT_YUV411P \
115 || isRGB(x) || isBGR(x)\ 115 || isRGB(x) || isBGR(x) \
116 || (x)==PIX_FMT_NV12 || (x)==PIX_FMT_NV21\ 116 || (x)==PIX_FMT_NV12 || (x)==PIX_FMT_NV21 \
117 || (x)==PIX_FMT_GRAY16BE || (x)==PIX_FMT_GRAY16LE\ 117 || (x)==PIX_FMT_GRAY16BE || (x)==PIX_FMT_GRAY16LE \
118 || (x)==PIX_FMT_GRAY8 || (x)==PIX_FMT_YUV410P) 118 || (x)==PIX_FMT_GRAY8 || (x)==PIX_FMT_YUV410P)
119 #define isPacked(x) ((x)==PIX_FMT_PAL8 || (x)==PIX_FMT_YUYV422 ||\ 119 #define isPacked(x) ((x)==PIX_FMT_PAL8 || (x)==PIX_FMT_YUYV422 || \
120 (x)==PIX_FMT_UYVY422 || isRGB(x) || isBGR(x)) 120 (x)==PIX_FMT_UYVY422 || isRGB(x) || isBGR(x))
121 121
122 #define RGB2YUV_SHIFT 16 122 #define RGB2YUV_SHIFT 16
123 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5)) 123 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
124 #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5)) 124 #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
148 */ 148 */
149 149
150 #if defined(ARCH_X86) && defined (CONFIG_GPL) 150 #if defined(ARCH_X86) && defined (CONFIG_GPL)
151 static uint64_t attribute_used __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL; 151 static uint64_t attribute_used __attribute__((aligned(8))) bF8= 0xF8F8F8F8F8F8F8F8LL;
152 static uint64_t attribute_used __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL; 152 static uint64_t attribute_used __attribute__((aligned(8))) bFC= 0xFCFCFCFCFCFCFCFCLL;
153 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL; 153 static uint64_t __attribute__((aligned(8))) w10= 0x0010001000100010LL;
154 static uint64_t attribute_used __attribute__((aligned(8))) w02= 0x0002000200020002LL; 154 static uint64_t attribute_used __attribute__((aligned(8))) w02= 0x0002000200020002LL;
155 static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL; 155 static uint64_t attribute_used __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
156 static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL; 156 static uint64_t attribute_used __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
157 static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL; 157 static uint64_t attribute_used __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
158 static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL; 158 static uint64_t attribute_used __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
161 static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither; 161 static volatile uint64_t attribute_used __attribute__((aligned(8))) g5Dither;
162 static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither; 162 static volatile uint64_t attribute_used __attribute__((aligned(8))) g6Dither;
163 static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither; 163 static volatile uint64_t attribute_used __attribute__((aligned(8))) r5Dither;
164 164
165 static uint64_t __attribute__((aligned(8))) dither4[2]={ 165 static uint64_t __attribute__((aligned(8))) dither4[2]={
166 0x0103010301030103LL, 166 0x0103010301030103LL,
167 0x0200020002000200LL,}; 167 0x0200020002000200LL,};
168 168
169 static uint64_t __attribute__((aligned(8))) dither8[2]={ 169 static uint64_t __attribute__((aligned(8))) dither8[2]={
170 0x0602060206020602LL, 170 0x0602060206020602LL,
171 0x0004000400040004LL,}; 171 0x0004000400040004LL,};
172 172
173 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL; 173 static uint64_t __attribute__((aligned(8))) b16Mask= 0x001F001F001F001FLL;
174 static uint64_t attribute_used __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL; 174 static uint64_t attribute_used __attribute__((aligned(8))) g16Mask= 0x07E007E007E007E0LL;
175 static uint64_t attribute_used __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL; 175 static uint64_t attribute_used __attribute__((aligned(8))) r16Mask= 0xF800F800F800F800LL;
176 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL; 176 static uint64_t __attribute__((aligned(8))) b15Mask= 0x001F001F001F001FLL;
177 static uint64_t attribute_used __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL; 177 static uint64_t attribute_used __attribute__((aligned(8))) g15Mask= 0x03E003E003E003E0LL;
178 static uint64_t attribute_used __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL; 178 static uint64_t attribute_used __attribute__((aligned(8))) r15Mask= 0x7C007C007C007C00LL;
179 179
180 static uint64_t attribute_used __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL; 180 static uint64_t attribute_used __attribute__((aligned(8))) M24A= 0x00FF0000FF0000FFLL;
181 static uint64_t attribute_used __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL; 181 static uint64_t attribute_used __attribute__((aligned(8))) M24B= 0xFF0000FF0000FF00LL;
182 static uint64_t attribute_used __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL; 182 static uint64_t attribute_used __attribute__((aligned(8))) M24C= 0x0000FF0000FF0000LL;
183 183
184 #ifdef FAST_BGR2YV12 184 #ifdef FAST_BGR2YV12
185 static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL; 185 static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000000210041000DULL;
186 static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL; 186 static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000FFEEFFDC0038ULL;
187 static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL; 187 static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00000038FFD2FFF8ULL;
188 #else 188 #else
189 static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL; 189 static const uint64_t bgr2YCoeff attribute_used __attribute__((aligned(8))) = 0x000020E540830C8BULL;
190 static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL; 190 static const uint64_t bgr2UCoeff attribute_used __attribute__((aligned(8))) = 0x0000ED0FDAC23831ULL;
191 static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL; 191 static const uint64_t bgr2VCoeff attribute_used __attribute__((aligned(8))) = 0x00003831D0E6F6EAULL;
192 #endif /* FAST_BGR2YV12 */ 192 #endif /* FAST_BGR2YV12 */
193 static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL; 193 static const uint64_t bgr2YOffset attribute_used __attribute__((aligned(8))) = 0x1010101010101010ULL;
194 static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8)))= 0x8080808080808080ULL; 194 static const uint64_t bgr2UVOffset attribute_used __attribute__((aligned(8))) = 0x8080808080808080ULL;
195 static const uint64_t w1111 attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL; 195 static const uint64_t w1111 attribute_used __attribute__((aligned(8))) = 0x0001000100010001ULL;
196 #endif /* defined(ARCH_X86) */ 196 #endif /* defined(ARCH_X86) */
197 197
198 // clipping helper table for C implementations: 198 // clipping helper table for C implementations:
199 static unsigned char clip_table[768]; 199 static unsigned char clip_table[768];
200 200
295 } 295 }
296 296
297 #if defined(ARCH_X86) && defined (CONFIG_GPL) 297 #if defined(ARCH_X86) && defined (CONFIG_GPL)
298 void in_asm_used_var_warning_killer() 298 void in_asm_used_var_warning_killer()
299 { 299 {
300 volatile int i= bF8+bFC+w10+ 300 volatile int i= bF8+bFC+w10+
301 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+ 301 bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+
302 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101; 302 M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
303 if(i) i=0; 303 if (i) i=0;
304 } 304 }
305 #endif 305 #endif
306 306
307 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, 307 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
308 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, 308 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
309 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) 309 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
310 { 310 {
311 //FIXME Optimize (just quickly writen not opti..) 311 //FIXME Optimize (just quickly writen not opti..)
312 int i; 312 int i;
313 for(i=0; i<dstW; i++) 313 for (i=0; i<dstW; i++)
314 { 314 {
315 int val=1<<18; 315 int val=1<<18;
316 int j; 316 int j;
317 for(j=0; j<lumFilterSize; j++) 317 for (j=0; j<lumFilterSize; j++)
318 val += lumSrc[j][i] * lumFilter[j]; 318 val += lumSrc[j][i] * lumFilter[j];
319 319
320 dest[i]= av_clip_uint8(val>>19); 320 dest[i]= av_clip_uint8(val>>19);
321 } 321 }
322 322
323 if(uDest != NULL) 323 if (uDest != NULL)
324 for(i=0; i<chrDstW; i++) 324 for (i=0; i<chrDstW; i++)
325 { 325 {
326 int u=1<<18; 326 int u=1<<18;
327 int v=1<<18; 327 int v=1<<18;
328 int j; 328 int j;
329 for(j=0; j<chrFilterSize; j++) 329 for (j=0; j<chrFilterSize; j++)
330 { 330 {
331 u += chrSrc[j][i] * chrFilter[j]; 331 u += chrSrc[j][i] * chrFilter[j];
332 v += chrSrc[j][i + 2048] * chrFilter[j]; 332 v += chrSrc[j][i + 2048] * chrFilter[j];
333 } 333 }
334 334
335 uDest[i]= av_clip_uint8(u>>19); 335 uDest[i]= av_clip_uint8(u>>19);
336 vDest[i]= av_clip_uint8(v>>19); 336 vDest[i]= av_clip_uint8(v>>19);
337 } 337 }
338 } 338 }
339 339
340 static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, 340 static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
341 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, 341 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
342 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat) 342 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
343 { 343 {
344 //FIXME Optimize (just quickly writen not opti..) 344 //FIXME Optimize (just quickly writen not opti..)
345 int i; 345 int i;
346 for(i=0; i<dstW; i++) 346 for (i=0; i<dstW; i++)
347 { 347 {
348 int val=1<<18; 348 int val=1<<18;
349 int j; 349 int j;
350 for(j=0; j<lumFilterSize; j++) 350 for (j=0; j<lumFilterSize; j++)
351 val += lumSrc[j][i] * lumFilter[j]; 351 val += lumSrc[j][i] * lumFilter[j];
352 352
353 dest[i]= av_clip_uint8(val>>19); 353 dest[i]= av_clip_uint8(val>>19);
354 } 354 }
355 355
356 if(uDest == NULL) 356 if (uDest == NULL)
357 return; 357 return;
358 358
359 if(dstFormat == PIX_FMT_NV12) 359 if (dstFormat == PIX_FMT_NV12)
360 for(i=0; i<chrDstW; i++) 360 for (i=0; i<chrDstW; i++)
361 { 361 {
362 int u=1<<18; 362 int u=1<<18;
363 int v=1<<18; 363 int v=1<<18;
364 int j; 364 int j;
365 for(j=0; j<chrFilterSize; j++) 365 for (j=0; j<chrFilterSize; j++)
366 { 366 {
367 u += chrSrc[j][i] * chrFilter[j]; 367 u += chrSrc[j][i] * chrFilter[j];
368 v += chrSrc[j][i + 2048] * chrFilter[j]; 368 v += chrSrc[j][i + 2048] * chrFilter[j];
369 } 369 }
370 370
371 uDest[2*i]= av_clip_uint8(u>>19); 371 uDest[2*i]= av_clip_uint8(u>>19);
372 uDest[2*i+1]= av_clip_uint8(v>>19); 372 uDest[2*i+1]= av_clip_uint8(v>>19);
373 } 373 }
374 else 374 else
375 for(i=0; i<chrDstW; i++) 375 for (i=0; i<chrDstW; i++)
376 { 376 {
377 int u=1<<18; 377 int u=1<<18;
378 int v=1<<18; 378 int v=1<<18;
379 int j; 379 int j;
380 for(j=0; j<chrFilterSize; j++) 380 for (j=0; j<chrFilterSize; j++)
381 { 381 {
382 u += chrSrc[j][i] * chrFilter[j]; 382 u += chrSrc[j][i] * chrFilter[j];
383 v += chrSrc[j][i + 2048] * chrFilter[j]; 383 v += chrSrc[j][i + 2048] * chrFilter[j];
384 } 384 }
385 385
386 uDest[2*i]= av_clip_uint8(v>>19); 386 uDest[2*i]= av_clip_uint8(v>>19);
387 uDest[2*i+1]= av_clip_uint8(u>>19); 387 uDest[2*i+1]= av_clip_uint8(u>>19);
388 } 388 }
389 } 389 }
390 390
391 #define YSCALE_YUV_2_PACKEDX_C(type) \ 391 #define YSCALE_YUV_2_PACKEDX_C(type) \
392 for(i=0; i<(dstW>>1); i++){\ 392 for (i=0; i<(dstW>>1); i++){\
393 int j;\ 393 int j;\
394 int Y1=1<<18;\ 394 int Y1 = 1<<18;\
395 int Y2=1<<18;\ 395 int Y2 = 1<<18;\
396 int U=1<<18;\ 396 int U = 1<<18;\
397 int V=1<<18;\ 397 int V = 1<<18;\
398 type attribute_unused *r, *b, *g;\ 398 type attribute_unused *r, *b, *g;\
399 const int i2= 2*i;\ 399 const int i2= 2*i;\
400 \ 400 \
401 for(j=0; j<lumFilterSize; j++)\ 401 for (j=0; j<lumFilterSize; j++)\
402 {\ 402 {\
403 Y1 += lumSrc[j][i2] * lumFilter[j];\ 403 Y1 += lumSrc[j][i2] * lumFilter[j];\
404 Y2 += lumSrc[j][i2+1] * lumFilter[j];\ 404 Y2 += lumSrc[j][i2+1] * lumFilter[j];\
405 }\ 405 }\
406 for(j=0; j<chrFilterSize; j++)\ 406 for (j=0; j<chrFilterSize; j++)\
407 {\ 407 {\
408 U += chrSrc[j][i] * chrFilter[j];\ 408 U += chrSrc[j][i] * chrFilter[j];\
409 V += chrSrc[j][i+2048] * chrFilter[j];\ 409 V += chrSrc[j][i+2048] * chrFilter[j];\
410 }\ 410 }\
411 Y1>>=19;\ 411 Y1>>=19;\
412 Y2>>=19;\ 412 Y2>>=19;\
413 U >>=19;\ 413 U >>=19;\
414 V >>=19;\ 414 V >>=19;\
415 if((Y1|Y2|U|V)&256)\ 415 if ((Y1|Y2|U|V)&256)\
416 {\ 416 {\
417 if(Y1>255) Y1=255;\ 417 if (Y1>255) Y1=255; \
418 else if(Y1<0)Y1=0;\ 418 else if (Y1<0)Y1=0; \
419 if(Y2>255) Y2=255;\ 419 if (Y2>255) Y2=255; \
420 else if(Y2<0)Y2=0;\ 420 else if (Y2<0)Y2=0; \
421 if(U>255) U=255;\ 421 if (U>255) U=255; \
422 else if(U<0) U=0;\ 422 else if (U<0) U=0; \
423 if(V>255) V=255;\ 423 if (V>255) V=255; \
424 else if(V<0) V=0;\ 424 else if (V<0) V=0; \
425 } 425 }
426 426
427 #define YSCALE_YUV_2_RGBX_C(type) \ 427 #define YSCALE_YUV_2_RGBX_C(type) \
428 YSCALE_YUV_2_PACKEDX_C(type)\ 428 YSCALE_YUV_2_PACKEDX_C(type) \
429 r = (type *)c->table_rV[V];\ 429 r = (type *)c->table_rV[V]; \
430 g = (type *)(c->table_gU[U] + c->table_gV[V]);\ 430 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
431 b = (type *)c->table_bU[U];\ 431 b = (type *)c->table_bU[U]; \
432 432
433 #define YSCALE_YUV_2_PACKED2_C \ 433 #define YSCALE_YUV_2_PACKED2_C \
434 for(i=0; i<(dstW>>1); i++){\ 434 for (i=0; i<(dstW>>1); i++){ \
435 const int i2= 2*i;\ 435 const int i2= 2*i; \
436 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19;\ 436 int Y1= (buf0[i2 ]*yalpha1+buf1[i2 ]*yalpha)>>19; \
437 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19;\ 437 int Y2= (buf0[i2+1]*yalpha1+buf1[i2+1]*yalpha)>>19; \
438 int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19;\ 438 int U= (uvbuf0[i ]*uvalpha1+uvbuf1[i ]*uvalpha)>>19; \
439 int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19;\ 439 int V= (uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>19; \
440 440
441 #define YSCALE_YUV_2_RGB2_C(type) \ 441 #define YSCALE_YUV_2_RGB2_C(type) \
442 YSCALE_YUV_2_PACKED2_C\ 442 YSCALE_YUV_2_PACKED2_C\
443 type *r, *b, *g;\ 443 type *r, *b, *g;\
444 r = (type *)c->table_rV[V];\ 444 r = (type *)c->table_rV[V];\
445 g = (type *)(c->table_gU[U] + c->table_gV[V]);\ 445 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
446 b = (type *)c->table_bU[U];\ 446 b = (type *)c->table_bU[U];\
447 447
448 #define YSCALE_YUV_2_PACKED1_C \ 448 #define YSCALE_YUV_2_PACKED1_C \
449 for(i=0; i<(dstW>>1); i++){\ 449 for (i=0; i<(dstW>>1); i++){\
450 const int i2= 2*i;\ 450 const int i2= 2*i;\
451 int Y1= buf0[i2 ]>>7;\ 451 int Y1= buf0[i2 ]>>7;\
452 int Y2= buf0[i2+1]>>7;\ 452 int Y2= buf0[i2+1]>>7;\
453 int U= (uvbuf1[i ])>>7;\ 453 int U= (uvbuf1[i ])>>7;\
454 int V= (uvbuf1[i+2048])>>7;\ 454 int V= (uvbuf1[i+2048])>>7;\
455 455
456 #define YSCALE_YUV_2_RGB1_C(type) \ 456 #define YSCALE_YUV_2_RGB1_C(type) \
457 YSCALE_YUV_2_PACKED1_C\ 457 YSCALE_YUV_2_PACKED1_C\
458 type *r, *b, *g;\ 458 type *r, *b, *g;\
459 r = (type *)c->table_rV[V];\ 459 r = (type *)c->table_rV[V];\
460 g = (type *)(c->table_gU[U] + c->table_gV[V]);\ 460 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
461 b = (type *)c->table_bU[U];\ 461 b = (type *)c->table_bU[U];\
462 462
463 #define YSCALE_YUV_2_PACKED1B_C \ 463 #define YSCALE_YUV_2_PACKED1B_C \
464 for(i=0; i<(dstW>>1); i++){\ 464 for (i=0; i<(dstW>>1); i++){\
465 const int i2= 2*i;\ 465 const int i2= 2*i;\
466 int Y1= buf0[i2 ]>>7;\ 466 int Y1= buf0[i2 ]>>7;\
467 int Y2= buf0[i2+1]>>7;\ 467 int Y2= buf0[i2+1]>>7;\
468 int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\ 468 int U= (uvbuf0[i ] + uvbuf1[i ])>>8;\
469 int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\ 469 int V= (uvbuf0[i+2048] + uvbuf1[i+2048])>>8;\
470 470
471 #define YSCALE_YUV_2_RGB1B_C(type) \ 471 #define YSCALE_YUV_2_RGB1B_C(type) \
472 YSCALE_YUV_2_PACKED1B_C\ 472 YSCALE_YUV_2_PACKED1B_C\
473 type *r, *b, *g;\ 473 type *r, *b, *g;\
474 r = (type *)c->table_rV[V];\ 474 r = (type *)c->table_rV[V];\
475 g = (type *)(c->table_gU[U] + c->table_gV[V]);\ 475 g = (type *)(c->table_gU[U] + c->table_gV[V]);\
476 b = (type *)c->table_bU[U];\ 476 b = (type *)c->table_bU[U];\
477 477
478 #define YSCALE_YUV_2_ANYRGB_C(func, func2)\ 478 #define YSCALE_YUV_2_ANYRGB_C(func, func2)\
479 switch(c->dstFormat)\ 479 switch(c->dstFormat)\
480 {\ 480 {\
481 case PIX_FMT_RGB32:\ 481 case PIX_FMT_RGB32:\
482 case PIX_FMT_BGR32:\ 482 case PIX_FMT_BGR32:\
483 func(uint32_t)\ 483 func(uint32_t)\
484 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\ 484 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];\
485 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\ 485 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];\
486 } \ 486 } \
487 break;\ 487 break;\
488 case PIX_FMT_RGB24:\ 488 case PIX_FMT_RGB24:\
489 func(uint8_t)\ 489 func(uint8_t)\
490 ((uint8_t*)dest)[0]= r[Y1];\ 490 ((uint8_t*)dest)[0]= r[Y1];\
491 ((uint8_t*)dest)[1]= g[Y1];\ 491 ((uint8_t*)dest)[1]= g[Y1];\
492 ((uint8_t*)dest)[2]= b[Y1];\ 492 ((uint8_t*)dest)[2]= b[Y1];\
493 ((uint8_t*)dest)[3]= r[Y2];\ 493 ((uint8_t*)dest)[3]= r[Y2];\
494 ((uint8_t*)dest)[4]= g[Y2];\ 494 ((uint8_t*)dest)[4]= g[Y2];\
495 ((uint8_t*)dest)[5]= b[Y2];\ 495 ((uint8_t*)dest)[5]= b[Y2];\
496 dest+=6;\ 496 dest+=6;\
497 }\ 497 }\
498 break;\ 498 break;\
499 case PIX_FMT_BGR24:\ 499 case PIX_FMT_BGR24:\
500 func(uint8_t)\ 500 func(uint8_t)\
501 ((uint8_t*)dest)[0]= b[Y1];\ 501 ((uint8_t*)dest)[0]= b[Y1];\
502 ((uint8_t*)dest)[1]= g[Y1];\ 502 ((uint8_t*)dest)[1]= g[Y1];\
503 ((uint8_t*)dest)[2]= r[Y1];\ 503 ((uint8_t*)dest)[2]= r[Y1];\
504 ((uint8_t*)dest)[3]= b[Y2];\ 504 ((uint8_t*)dest)[3]= b[Y2];\
505 ((uint8_t*)dest)[4]= g[Y2];\ 505 ((uint8_t*)dest)[4]= g[Y2];\
506 ((uint8_t*)dest)[5]= r[Y2];\ 506 ((uint8_t*)dest)[5]= r[Y2];\
507 dest+=6;\ 507 dest+=6;\
508 }\ 508 }\
509 break;\ 509 break;\
510 case PIX_FMT_RGB565:\ 510 case PIX_FMT_RGB565:\
511 case PIX_FMT_BGR565:\ 511 case PIX_FMT_BGR565:\
512 {\ 512 {\
513 const int dr1= dither_2x2_8[y&1 ][0];\ 513 const int dr1= dither_2x2_8[y&1 ][0];\
514 const int dg1= dither_2x2_4[y&1 ][0];\ 514 const int dg1= dither_2x2_4[y&1 ][0];\
515 const int db1= dither_2x2_8[(y&1)^1][0];\ 515 const int db1= dither_2x2_8[(y&1)^1][0];\
516 const int dr2= dither_2x2_8[y&1 ][1];\ 516 const int dr2= dither_2x2_8[y&1 ][1];\
517 const int dg2= dither_2x2_4[y&1 ][1];\ 517 const int dg2= dither_2x2_4[y&1 ][1];\
518 const int db2= dither_2x2_8[(y&1)^1][1];\ 518 const int db2= dither_2x2_8[(y&1)^1][1];\
519 func(uint16_t)\ 519 func(uint16_t)\
520 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\ 520 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
521 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\ 521 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
522 }\ 522 }\
523 }\ 523 }\
524 break;\ 524 break;\
525 case PIX_FMT_RGB555:\ 525 case PIX_FMT_RGB555:\
526 case PIX_FMT_BGR555:\ 526 case PIX_FMT_BGR555:\
527 {\ 527 {\
528 const int dr1= dither_2x2_8[y&1 ][0];\ 528 const int dr1= dither_2x2_8[y&1 ][0];\
529 const int dg1= dither_2x2_8[y&1 ][1];\ 529 const int dg1= dither_2x2_8[y&1 ][1];\
530 const int db1= dither_2x2_8[(y&1)^1][0];\ 530 const int db1= dither_2x2_8[(y&1)^1][0];\
531 const int dr2= dither_2x2_8[y&1 ][1];\ 531 const int dr2= dither_2x2_8[y&1 ][1];\
532 const int dg2= dither_2x2_8[y&1 ][0];\ 532 const int dg2= dither_2x2_8[y&1 ][0];\
533 const int db2= dither_2x2_8[(y&1)^1][1];\ 533 const int db2= dither_2x2_8[(y&1)^1][1];\
534 func(uint16_t)\ 534 func(uint16_t)\
535 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\ 535 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];\
536 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\ 536 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];\
537 }\ 537 }\
538 }\ 538 }\
539 break;\ 539 break;\
540 case PIX_FMT_RGB8:\ 540 case PIX_FMT_RGB8:\
541 case PIX_FMT_BGR8:\ 541 case PIX_FMT_BGR8:\
542 {\ 542 {\
543 const uint8_t * const d64= dither_8x8_73[y&7];\ 543 const uint8_t * const d64= dither_8x8_73[y&7];\
544 const uint8_t * const d32= dither_8x8_32[y&7];\ 544 const uint8_t * const d32= dither_8x8_32[y&7];\
545 func(uint8_t)\ 545 func(uint8_t)\
546 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\ 546 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];\
547 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\ 547 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];\
548 }\ 548 }\
549 }\ 549 }\
550 break;\ 550 break;\
551 case PIX_FMT_RGB4:\ 551 case PIX_FMT_RGB4:\
552 case PIX_FMT_BGR4:\ 552 case PIX_FMT_BGR4:\
553 {\ 553 {\
554 const uint8_t * const d64= dither_8x8_73 [y&7];\ 554 const uint8_t * const d64= dither_8x8_73 [y&7];\
555 const uint8_t * const d128=dither_8x8_220[y&7];\ 555 const uint8_t * const d128=dither_8x8_220[y&7];\
556 func(uint8_t)\ 556 func(uint8_t)\
557 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\ 557 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]\
558 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\ 558 + ((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);\
559 }\ 559 }\
560 }\ 560 }\
561 break;\ 561 break;\
562 case PIX_FMT_RGB4_BYTE:\ 562 case PIX_FMT_RGB4_BYTE:\
563 case PIX_FMT_BGR4_BYTE:\ 563 case PIX_FMT_BGR4_BYTE:\
564 {\ 564 {\
565 const uint8_t * const d64= dither_8x8_73 [y&7];\ 565 const uint8_t * const d64= dither_8x8_73 [y&7];\
566 const uint8_t * const d128=dither_8x8_220[y&7];\ 566 const uint8_t * const d128=dither_8x8_220[y&7];\
567 func(uint8_t)\ 567 func(uint8_t)\
568 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\ 568 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];\
569 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\ 569 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];\
570 }\ 570 }\
571 }\ 571 }\
572 break;\ 572 break;\
573 case PIX_FMT_MONOBLACK:\ 573 case PIX_FMT_MONOBLACK:\
574 {\ 574 {\
575 const uint8_t * const d128=dither_8x8_220[y&7];\ 575 const uint8_t * const d128=dither_8x8_220[y&7];\
576 uint8_t *g= c->table_gU[128] + c->table_gV[128];\ 576 uint8_t *g= c->table_gU[128] + c->table_gV[128];\
577 for(i=0; i<dstW-7; i+=8){\ 577 for (i=0; i<dstW-7; i+=8){\
578 int acc;\ 578 int acc;\
579 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\ 579 acc = g[((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19) + d128[0]];\
580 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\ 580 acc+= acc + g[((buf0[i+1]*yalpha1+buf1[i+1]*yalpha)>>19) + d128[1]];\
581 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\ 581 acc+= acc + g[((buf0[i+2]*yalpha1+buf1[i+2]*yalpha)>>19) + d128[2]];\
582 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\ 582 acc+= acc + g[((buf0[i+3]*yalpha1+buf1[i+3]*yalpha)>>19) + d128[3]];\
583 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\ 583 acc+= acc + g[((buf0[i+4]*yalpha1+buf1[i+4]*yalpha)>>19) + d128[4]];\
584 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\ 584 acc+= acc + g[((buf0[i+5]*yalpha1+buf1[i+5]*yalpha)>>19) + d128[5]];\
585 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\ 585 acc+= acc + g[((buf0[i+6]*yalpha1+buf1[i+6]*yalpha)>>19) + d128[6]];\
586 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\ 586 acc+= acc + g[((buf0[i+7]*yalpha1+buf1[i+7]*yalpha)>>19) + d128[7]];\
587 ((uint8_t*)dest)[0]= acc;\ 587 ((uint8_t*)dest)[0]= acc;\
588 dest++;\ 588 dest++;\
589 }\ 589 }\
590 \ 590 \
591 /*\ 591 /*\
592 ((uint8_t*)dest)-= dstW>>4;\ 592 ((uint8_t*)dest)-= dstW>>4;\
593 {\ 593 {\
594 int acc=0;\ 594 int acc=0;\
595 int left=0;\ 595 int left=0;\
596 static int top[1024];\ 596 static int top[1024];\
597 static int last_new[1024][1024];\ 597 static int last_new[1024][1024];\
598 static int last_in3[1024][1024];\ 598 static int last_in3[1024][1024];\
599 static int drift[1024][1024];\ 599 static int drift[1024][1024];\
600 int topLeft=0;\ 600 int topLeft=0;\
601 int shift=0;\ 601 int shift=0;\
602 int count=0;\ 602 int count=0;\
603 const uint8_t * const d128=dither_8x8_220[y&7];\ 603 const uint8_t * const d128=dither_8x8_220[y&7];\
604 int error_new=0;\ 604 int error_new=0;\
605 int error_in3=0;\ 605 int error_in3=0;\
606 int f=0;\ 606 int f=0;\
607 \ 607 \
608 for(i=dstW>>1; i<dstW; i++){\ 608 for (i=dstW>>1; i<dstW; i++){\
609 int in= ((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19);\ 609 int in= ((buf0[i ]*yalpha1+buf1[i ]*yalpha)>>19);\
610 int in2 = (76309 * (in - 16) + 32768) >> 16;\ 610 int in2 = (76309 * (in - 16) + 32768) >> 16;\
611 int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\ 611 int in3 = (in2 < 0) ? 0 : ((in2 > 255) ? 255 : in2);\
612 int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\ 612 int old= (left*7 + topLeft + top[i]*5 + top[i+1]*3)/20 + in3\
613 + (last_new[y][i] - in3)*f/256;\ 613 + (last_new[y][i] - in3)*f/256;\
614 int new= old> 128 ? 255 : 0;\ 614 int new= old> 128 ? 255 : 0;\
615 \ 615 \
616 error_new+= FFABS(last_new[y][i] - new);\ 616 error_new+= FFABS(last_new[y][i] - new);\
617 error_in3+= FFABS(last_in3[y][i] - in3);\ 617 error_in3+= FFABS(last_in3[y][i] - in3);\
618 f= error_new - error_in3*4;\ 618 f= error_new - error_in3*4;\
619 if(f<0) f=0;\ 619 if (f<0) f=0;\
620 if(f>256) f=256;\ 620 if (f>256) f=256;\
621 \ 621 \
622 topLeft= top[i];\ 622 topLeft= top[i];\
623 left= top[i]= old - new;\ 623 left= top[i]= old - new;\
624 last_new[y][i]= new;\ 624 last_new[y][i]= new;\
625 last_in3[y][i]= in3;\ 625 last_in3[y][i]= in3;\
626 \ 626 \
627 acc+= acc + (new&1);\ 627 acc+= acc + (new&1);\
628 if((i&7)==6){\ 628 if ((i&7)==6){\
629 ((uint8_t*)dest)[0]= acc;\ 629 ((uint8_t*)dest)[0]= acc;\
630 ((uint8_t*)dest)++;\ 630 ((uint8_t*)dest)++;\
631 }\ 631 }\
632 }\ 632 }\
633 }\ 633 }\
634 */\ 634 */\
635 }\ 635 }\
636 break;\ 636 break;\
637 case PIX_FMT_YUYV422:\ 637 case PIX_FMT_YUYV422:\
638 func2\ 638 func2\
639 ((uint8_t*)dest)[2*i2+0]= Y1;\ 639 ((uint8_t*)dest)[2*i2+0]= Y1;\
640 ((uint8_t*)dest)[2*i2+1]= U;\ 640 ((uint8_t*)dest)[2*i2+1]= U;\
641 ((uint8_t*)dest)[2*i2+2]= Y2;\ 641 ((uint8_t*)dest)[2*i2+2]= Y2;\
642 ((uint8_t*)dest)[2*i2+3]= V;\ 642 ((uint8_t*)dest)[2*i2+3]= V;\
643 } \ 643 } \
644 break;\ 644 break;\
645 case PIX_FMT_UYVY422:\ 645 case PIX_FMT_UYVY422:\
646 func2\ 646 func2\
647 ((uint8_t*)dest)[2*i2+0]= U;\ 647 ((uint8_t*)dest)[2*i2+0]= U;\
648 ((uint8_t*)dest)[2*i2+1]= Y1;\ 648 ((uint8_t*)dest)[2*i2+1]= Y1;\
649 ((uint8_t*)dest)[2*i2+2]= V;\ 649 ((uint8_t*)dest)[2*i2+2]= V;\
650 ((uint8_t*)dest)[2*i2+3]= Y2;\ 650 ((uint8_t*)dest)[2*i2+3]= Y2;\
651 } \ 651 } \
652 break;\ 652 break;\
653 }\ 653 }\
654 654
655 655
656 static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, 656 static inline void yuv2packedXinC(SwsContext *c, int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
657 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, 657 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
658 uint8_t *dest, int dstW, int y) 658 uint8_t *dest, int dstW, int y)
659 { 659 {
660 int i; 660 int i;
661 switch(c->dstFormat) 661 switch(c->dstFormat)
662 { 662 {
663 case PIX_FMT_BGR32: 663 case PIX_FMT_BGR32:
664 case PIX_FMT_RGB32: 664 case PIX_FMT_RGB32:
665 YSCALE_YUV_2_RGBX_C(uint32_t) 665 YSCALE_YUV_2_RGBX_C(uint32_t)
666 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1]; 666 ((uint32_t*)dest)[i2+0]= r[Y1] + g[Y1] + b[Y1];
667 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2]; 667 ((uint32_t*)dest)[i2+1]= r[Y2] + g[Y2] + b[Y2];
668 } 668 }
669 break; 669 break;
670 case PIX_FMT_RGB24: 670 case PIX_FMT_RGB24:
671 YSCALE_YUV_2_RGBX_C(uint8_t) 671 YSCALE_YUV_2_RGBX_C(uint8_t)
672 ((uint8_t*)dest)[0]= r[Y1]; 672 ((uint8_t*)dest)[0]= r[Y1];
673 ((uint8_t*)dest)[1]= g[Y1]; 673 ((uint8_t*)dest)[1]= g[Y1];
674 ((uint8_t*)dest)[2]= b[Y1]; 674 ((uint8_t*)dest)[2]= b[Y1];
675 ((uint8_t*)dest)[3]= r[Y2]; 675 ((uint8_t*)dest)[3]= r[Y2];
676 ((uint8_t*)dest)[4]= g[Y2]; 676 ((uint8_t*)dest)[4]= g[Y2];
677 ((uint8_t*)dest)[5]= b[Y2]; 677 ((uint8_t*)dest)[5]= b[Y2];
678 dest+=6; 678 dest+=6;
679 } 679 }
680 break; 680 break;
681 case PIX_FMT_BGR24: 681 case PIX_FMT_BGR24:
682 YSCALE_YUV_2_RGBX_C(uint8_t) 682 YSCALE_YUV_2_RGBX_C(uint8_t)
683 ((uint8_t*)dest)[0]= b[Y1]; 683 ((uint8_t*)dest)[0]= b[Y1];
684 ((uint8_t*)dest)[1]= g[Y1]; 684 ((uint8_t*)dest)[1]= g[Y1];
685 ((uint8_t*)dest)[2]= r[Y1]; 685 ((uint8_t*)dest)[2]= r[Y1];
686 ((uint8_t*)dest)[3]= b[Y2]; 686 ((uint8_t*)dest)[3]= b[Y2];
687 ((uint8_t*)dest)[4]= g[Y2]; 687 ((uint8_t*)dest)[4]= g[Y2];
688 ((uint8_t*)dest)[5]= r[Y2]; 688 ((uint8_t*)dest)[5]= r[Y2];
689 dest+=6; 689 dest+=6;
690 } 690 }
691 break; 691 break;
692 case PIX_FMT_RGB565: 692 case PIX_FMT_RGB565:
693 case PIX_FMT_BGR565: 693 case PIX_FMT_BGR565:
694 { 694 {
695 const int dr1= dither_2x2_8[y&1 ][0]; 695 const int dr1= dither_2x2_8[y&1 ][0];
696 const int dg1= dither_2x2_4[y&1 ][0]; 696 const int dg1= dither_2x2_4[y&1 ][0];
697 const int db1= dither_2x2_8[(y&1)^1][0]; 697 const int db1= dither_2x2_8[(y&1)^1][0];
698 const int dr2= dither_2x2_8[y&1 ][1]; 698 const int dr2= dither_2x2_8[y&1 ][1];
699 const int dg2= dither_2x2_4[y&1 ][1]; 699 const int dg2= dither_2x2_4[y&1 ][1];
700 const int db2= dither_2x2_8[(y&1)^1][1]; 700 const int db2= dither_2x2_8[(y&1)^1][1];
701 YSCALE_YUV_2_RGBX_C(uint16_t) 701 YSCALE_YUV_2_RGBX_C(uint16_t)
702 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1]; 702 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
703 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2]; 703 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
704 } 704 }
705 } 705 }
706 break; 706 break;
707 case PIX_FMT_RGB555: 707 case PIX_FMT_RGB555:
708 case PIX_FMT_BGR555: 708 case PIX_FMT_BGR555:
709 { 709 {
710 const int dr1= dither_2x2_8[y&1 ][0]; 710 const int dr1= dither_2x2_8[y&1 ][0];
711 const int dg1= dither_2x2_8[y&1 ][1]; 711 const int dg1= dither_2x2_8[y&1 ][1];
712 const int db1= dither_2x2_8[(y&1)^1][0]; 712 const int db1= dither_2x2_8[(y&1)^1][0];
713 const int dr2= dither_2x2_8[y&1 ][1]; 713 const int dr2= dither_2x2_8[y&1 ][1];
714 const int dg2= dither_2x2_8[y&1 ][0]; 714 const int dg2= dither_2x2_8[y&1 ][0];
715 const int db2= dither_2x2_8[(y&1)^1][1]; 715 const int db2= dither_2x2_8[(y&1)^1][1];
716 YSCALE_YUV_2_RGBX_C(uint16_t) 716 YSCALE_YUV_2_RGBX_C(uint16_t)
717 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1]; 717 ((uint16_t*)dest)[i2+0]= r[Y1+dr1] + g[Y1+dg1] + b[Y1+db1];
718 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2]; 718 ((uint16_t*)dest)[i2+1]= r[Y2+dr2] + g[Y2+dg2] + b[Y2+db2];
719 } 719 }
720 } 720 }
721 break; 721 break;
722 case PIX_FMT_RGB8: 722 case PIX_FMT_RGB8:
723 case PIX_FMT_BGR8: 723 case PIX_FMT_BGR8:
724 { 724 {
725 const uint8_t * const d64= dither_8x8_73[y&7]; 725 const uint8_t * const d64= dither_8x8_73[y&7];
726 const uint8_t * const d32= dither_8x8_32[y&7]; 726 const uint8_t * const d32= dither_8x8_32[y&7];
727 YSCALE_YUV_2_RGBX_C(uint8_t) 727 YSCALE_YUV_2_RGBX_C(uint8_t)
728 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]]; 728 ((uint8_t*)dest)[i2+0]= r[Y1+d32[(i2+0)&7]] + g[Y1+d32[(i2+0)&7]] + b[Y1+d64[(i2+0)&7]];
729 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]]; 729 ((uint8_t*)dest)[i2+1]= r[Y2+d32[(i2+1)&7]] + g[Y2+d32[(i2+1)&7]] + b[Y2+d64[(i2+1)&7]];
730 } 730 }
731 } 731 }
732 break; 732 break;
733 case PIX_FMT_RGB4: 733 case PIX_FMT_RGB4:
734 case PIX_FMT_BGR4: 734 case PIX_FMT_BGR4:
735 { 735 {
736 const uint8_t * const d64= dither_8x8_73 [y&7]; 736 const uint8_t * const d64= dither_8x8_73 [y&7];
737 const uint8_t * const d128=dither_8x8_220[y&7]; 737 const uint8_t * const d128=dither_8x8_220[y&7];
738 YSCALE_YUV_2_RGBX_C(uint8_t) 738 YSCALE_YUV_2_RGBX_C(uint8_t)
739 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]] 739 ((uint8_t*)dest)[i]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]
740 +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4); 740 +((r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]])<<4);
741 } 741 }
742 } 742 }
743 break; 743 break;
744 case PIX_FMT_RGB4_BYTE: 744 case PIX_FMT_RGB4_BYTE:
745 case PIX_FMT_BGR4_BYTE: 745 case PIX_FMT_BGR4_BYTE:
746 { 746 {
747 const uint8_t * const d64= dither_8x8_73 [y&7]; 747 const uint8_t * const d64= dither_8x8_73 [y&7];
748 const uint8_t * const d128=dither_8x8_220[y&7]; 748 const uint8_t * const d128=dither_8x8_220[y&7];
749 YSCALE_YUV_2_RGBX_C(uint8_t) 749 YSCALE_YUV_2_RGBX_C(uint8_t)
750 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]]; 750 ((uint8_t*)dest)[i2+0]= r[Y1+d128[(i2+0)&7]] + g[Y1+d64[(i2+0)&7]] + b[Y1+d128[(i2+0)&7]];
751 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]]; 751 ((uint8_t*)dest)[i2+1]= r[Y2+d128[(i2+1)&7]] + g[Y2+d64[(i2+1)&7]] + b[Y2+d128[(i2+1)&7]];
752 } 752 }
753 } 753 }
754 break; 754 break;
755 case PIX_FMT_MONOBLACK: 755 case PIX_FMT_MONOBLACK:
756 { 756 {
757 const uint8_t * const d128=dither_8x8_220[y&7]; 757 const uint8_t * const d128=dither_8x8_220[y&7];
758 uint8_t *g= c->table_gU[128] + c->table_gV[128]; 758 uint8_t *g= c->table_gU[128] + c->table_gV[128];
759 int acc=0; 759 int acc=0;
760 for(i=0; i<dstW-1; i+=2){ 760 for (i=0; i<dstW-1; i+=2){
761 int j; 761 int j;
762 int Y1=1<<18; 762 int Y1=1<<18;
763 int Y2=1<<18; 763 int Y2=1<<18;
764 764
765 for(j=0; j<lumFilterSize; j++) 765 for (j=0; j<lumFilterSize; j++)
766 { 766 {
767 Y1 += lumSrc[j][i] * lumFilter[j]; 767 Y1 += lumSrc[j][i] * lumFilter[j];
768 Y2 += lumSrc[j][i+1] * lumFilter[j]; 768 Y2 += lumSrc[j][i+1] * lumFilter[j];
769 } 769 }
770 Y1>>=19; 770 Y1>>=19;
771 Y2>>=19; 771 Y2>>=19;
772 if((Y1|Y2)&256) 772 if ((Y1|Y2)&256)
773 { 773 {
774 if(Y1>255) Y1=255; 774 if (Y1>255) Y1=255;
775 else if(Y1<0)Y1=0; 775 else if (Y1<0)Y1=0;
776 if(Y2>255) Y2=255; 776 if (Y2>255) Y2=255;
777 else if(Y2<0)Y2=0; 777 else if (Y2<0)Y2=0;
778 } 778 }
779 acc+= acc + g[Y1+d128[(i+0)&7]]; 779 acc+= acc + g[Y1+d128[(i+0)&7]];
780 acc+= acc + g[Y2+d128[(i+1)&7]]; 780 acc+= acc + g[Y2+d128[(i+1)&7]];
781 if((i&7)==6){ 781 if ((i&7)==6){
782 ((uint8_t*)dest)[0]= acc; 782 ((uint8_t*)dest)[0]= acc;
783 dest++; 783 dest++;
784 } 784 }
785 } 785 }
786 } 786 }
787 break; 787 break;
788 case PIX_FMT_YUYV422: 788 case PIX_FMT_YUYV422:
789 YSCALE_YUV_2_PACKEDX_C(void) 789 YSCALE_YUV_2_PACKEDX_C(void)
790 ((uint8_t*)dest)[2*i2+0]= Y1; 790 ((uint8_t*)dest)[2*i2+0]= Y1;
791 ((uint8_t*)dest)[2*i2+1]= U; 791 ((uint8_t*)dest)[2*i2+1]= U;
792 ((uint8_t*)dest)[2*i2+2]= Y2; 792 ((uint8_t*)dest)[2*i2+2]= Y2;
793 ((uint8_t*)dest)[2*i2+3]= V; 793 ((uint8_t*)dest)[2*i2+3]= V;
794 } 794 }
795 break; 795 break;
796 case PIX_FMT_UYVY422: 796 case PIX_FMT_UYVY422:
797 YSCALE_YUV_2_PACKEDX_C(void) 797 YSCALE_YUV_2_PACKEDX_C(void)
798 ((uint8_t*)dest)[2*i2+0]= U; 798 ((uint8_t*)dest)[2*i2+0]= U;
799 ((uint8_t*)dest)[2*i2+1]= Y1; 799 ((uint8_t*)dest)[2*i2+1]= Y1;
800 ((uint8_t*)dest)[2*i2+2]= V; 800 ((uint8_t*)dest)[2*i2+2]= V;
801 ((uint8_t*)dest)[2*i2+3]= Y2; 801 ((uint8_t*)dest)[2*i2+3]= Y2;
802 } 802 }
803 break; 803 break;
804 } 804 }
805 } 805 }
806 806
807 807
808 //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one 808 //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one
809 //Plain C versions 809 //Plain C versions
900 900
901 // minor note: the HAVE_xyz is messed up after that line so don't use it 901 // minor note: the HAVE_xyz is messed up after that line so don't use it
902 902
903 static double getSplineCoeff(double a, double b, double c, double d, double dist) 903 static double getSplineCoeff(double a, double b, double c, double d, double dist)
904 { 904 {
905 // printf("%f %f %f %f %f\n", a,b,c,d,dist); 905 // printf("%f %f %f %f %f\n", a,b,c,d,dist);
906 if(dist<=1.0) return ((d*dist + c)*dist + b)*dist +a; 906 if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
907 else return getSplineCoeff( 0.0, 907 else return getSplineCoeff( 0.0,
908 b+ 2.0*c + 3.0*d, 908 b+ 2.0*c + 3.0*d,
909 c + 3.0*d, 909 c + 3.0*d,
910 -b- 3.0*c - 6.0*d, 910 -b- 3.0*c - 6.0*d,
911 dist-1.0); 911 dist-1.0);
912 } 912 }
913 913
914 static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc, 914 static inline int initFilter(int16_t **outFilter, int16_t **filterPos, int *outFilterSize, int xInc,
915 int srcW, int dstW, int filterAlign, int one, int flags, 915 int srcW, int dstW, int filterAlign, int one, int flags,
916 SwsVector *srcFilter, SwsVector *dstFilter, double param[2]) 916 SwsVector *srcFilter, SwsVector *dstFilter, double param[2])
917 { 917 {
918 int i; 918 int i;
919 int filterSize; 919 int filterSize;
920 int filter2Size; 920 int filter2Size;
921 int minFilterSize; 921 int minFilterSize;
922 double *filter=NULL; 922 double *filter=NULL;
923 double *filter2=NULL; 923 double *filter2=NULL;
924 #if defined(ARCH_X86) 924 #if defined(ARCH_X86)
925 if(flags & SWS_CPU_CAPS_MMX) 925 if (flags & SWS_CPU_CAPS_MMX)
926 asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions) 926 asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
927 #endif 927 #endif
928 928
929 // Note the +1 is for the MMXscaler which reads over the end 929 // Note the +1 is for the MMXscaler which reads over the end
930 *filterPos = av_malloc((dstW+1)*sizeof(int16_t)); 930 *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
931 931
932 if(FFABS(xInc - 0x10000) <10) // unscaled 932 if (FFABS(xInc - 0x10000) <10) // unscaled
933 { 933 {
934 int i; 934 int i;
935 filterSize= 1; 935 filterSize= 1;
936 filter= av_malloc(dstW*sizeof(double)*filterSize); 936 filter= av_malloc(dstW*sizeof(double)*filterSize);
937 for(i=0; i<dstW*filterSize; i++) filter[i]=0; 937 for (i=0; i<dstW*filterSize; i++) filter[i]=0;
938 938
939 for(i=0; i<dstW; i++) 939 for (i=0; i<dstW; i++)
940 { 940 {
941 filter[i*filterSize]=1; 941 filter[i*filterSize]=1;
942 (*filterPos)[i]=i; 942 (*filterPos)[i]=i;
943 } 943 }
944 944
945 } 945 }
946 else if(flags&SWS_POINT) // lame looking point sampling mode 946 else if (flags&SWS_POINT) // lame looking point sampling mode
947 { 947 {
948 int i; 948 int i;
949 int xDstInSrc; 949 int xDstInSrc;
950 filterSize= 1; 950 filterSize= 1;
951 filter= av_malloc(dstW*sizeof(double)*filterSize); 951 filter= av_malloc(dstW*sizeof(double)*filterSize);
952 952
953 xDstInSrc= xInc/2 - 0x8000; 953 xDstInSrc= xInc/2 - 0x8000;
954 for(i=0; i<dstW; i++) 954 for (i=0; i<dstW; i++)
955 { 955 {
956 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16; 956 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
957 957
958 (*filterPos)[i]= xx; 958 (*filterPos)[i]= xx;
959 filter[i]= 1.0; 959 filter[i]= 1.0;
960 xDstInSrc+= xInc; 960 xDstInSrc+= xInc;
961 } 961 }
962 } 962 }
963 else if((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale 963 else if ((xInc <= (1<<16) && (flags&SWS_AREA)) || (flags&SWS_FAST_BILINEAR)) // bilinear upscale
964 { 964 {
965 int i; 965 int i;
966 int xDstInSrc; 966 int xDstInSrc;
967 if (flags&SWS_BICUBIC) filterSize= 4; 967 if (flags&SWS_BICUBIC) filterSize= 4;
968 else if(flags&SWS_X ) filterSize= 4; 968 else if (flags&SWS_X ) filterSize= 4;
969 else filterSize= 2; // SWS_BILINEAR / SWS_AREA 969 else filterSize= 2; // SWS_BILINEAR / SWS_AREA
970 filter= av_malloc(dstW*sizeof(double)*filterSize); 970 filter= av_malloc(dstW*sizeof(double)*filterSize);
971 971
972 xDstInSrc= xInc/2 - 0x8000; 972 xDstInSrc= xInc/2 - 0x8000;
973 for(i=0; i<dstW; i++) 973 for (i=0; i<dstW; i++)
974 { 974 {
975 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16; 975 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
976 int j; 976 int j;
977 977
978 (*filterPos)[i]= xx; 978 (*filterPos)[i]= xx;
979 //Bilinear upscale / linear interpolate / Area averaging 979 //Bilinear upscale / linear interpolate / Area averaging
980 for(j=0; j<filterSize; j++) 980 for (j=0; j<filterSize; j++)
981 { 981 {
982 double d= FFABS((xx<<16) - xDstInSrc)/(double)(1<<16); 982 double d= FFABS((xx<<16) - xDstInSrc)/(double)(1<<16);
983 double coeff= 1.0 - d; 983 double coeff= 1.0 - d;
984 if(coeff<0) coeff=0; 984 if (coeff<0) coeff=0;
985 filter[i*filterSize + j]= coeff; 985 filter[i*filterSize + j]= coeff;
986 xx++; 986 xx++;
987 } 987 }
988 xDstInSrc+= xInc; 988 xDstInSrc+= xInc;
989 } 989 }
990 } 990 }
991 else 991 else
992 { 992 {
993 double xDstInSrc; 993 double xDstInSrc;
994 double sizeFactor, filterSizeInSrc; 994 double sizeFactor, filterSizeInSrc;
995 const double xInc1= (double)xInc / (double)(1<<16); 995 const double xInc1= (double)xInc / (double)(1<<16);
996 996
997 if (flags&SWS_BICUBIC) sizeFactor= 4.0; 997 if (flags&SWS_BICUBIC) sizeFactor= 4.0;
998 else if(flags&SWS_X) sizeFactor= 8.0; 998 else if (flags&SWS_X) sizeFactor= 8.0;
999 else if(flags&SWS_AREA) sizeFactor= 1.0; //downscale only, for upscale it is bilinear 999 else if (flags&SWS_AREA) sizeFactor= 1.0; //downscale only, for upscale it is bilinear
1000 else if(flags&SWS_GAUSS) sizeFactor= 8.0; // infinite ;) 1000 else if (flags&SWS_GAUSS) sizeFactor= 8.0; // infinite ;)
1001 else if(flags&SWS_LANCZOS) sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0; 1001 else if (flags&SWS_LANCZOS) sizeFactor= param[0] != SWS_PARAM_DEFAULT ? 2.0*param[0] : 6.0;
1002 else if(flags&SWS_SINC) sizeFactor= 20.0; // infinite ;) 1002 else if (flags&SWS_SINC) sizeFactor= 20.0; // infinite ;)
1003 else if(flags&SWS_SPLINE) sizeFactor= 20.0; // infinite ;) 1003 else if (flags&SWS_SPLINE) sizeFactor= 20.0; // infinite ;)
1004 else if(flags&SWS_BILINEAR) sizeFactor= 2.0; 1004 else if (flags&SWS_BILINEAR) sizeFactor= 2.0;
1005 else { 1005 else {
1006 sizeFactor= 0.0; //GCC warning killer 1006 sizeFactor= 0.0; //GCC warning killer
1007 ASSERT(0) 1007 ASSERT(0)
1008 } 1008 }
1009 1009
1010 if(xInc1 <= 1.0) filterSizeInSrc= sizeFactor; // upscale 1010 if (xInc1 <= 1.0) filterSizeInSrc= sizeFactor; // upscale
1011 else filterSizeInSrc= sizeFactor*srcW / (double)dstW; 1011 else filterSizeInSrc= sizeFactor*srcW / (double)dstW;
1012 1012
1013 filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible 1013 filterSize= (int)ceil(1 + filterSizeInSrc); // will be reduced later if possible
1014 if(filterSize > srcW-2) filterSize=srcW-2; 1014 if (filterSize > srcW-2) filterSize=srcW-2;
1015 1015
1016 filter= av_malloc(dstW*sizeof(double)*filterSize); 1016 filter= av_malloc(dstW*sizeof(double)*filterSize);
1017 1017
1018 xDstInSrc= xInc1 / 2.0 - 0.5; 1018 xDstInSrc= xInc1 / 2.0 - 0.5;
1019 for(i=0; i<dstW; i++) 1019 for (i=0; i<dstW; i++)
1020 { 1020 {
1021 int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5); 1021 int xx= (int)(xDstInSrc - (filterSize-1)*0.5 + 0.5);
1022 int j; 1022 int j;
1023 (*filterPos)[i]= xx; 1023 (*filterPos)[i]= xx;
1024 for(j=0; j<filterSize; j++) 1024 for (j=0; j<filterSize; j++)
1025 { 1025 {
1026 double d= FFABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor; 1026 double d= FFABS(xx - xDstInSrc)/filterSizeInSrc*sizeFactor;
1027 double coeff; 1027 double coeff;
1028 if(flags & SWS_BICUBIC) 1028 if (flags & SWS_BICUBIC)
1029 { 1029 {
1030 double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0; 1030 double B= param[0] != SWS_PARAM_DEFAULT ? param[0] : 0.0;
1031 double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6; 1031 double C= param[1] != SWS_PARAM_DEFAULT ? param[1] : 0.6;
1032 1032
1033 if(d<1.0) 1033 if (d<1.0)
1034 coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B; 1034 coeff = (12-9*B-6*C)*d*d*d + (-18+12*B+6*C)*d*d + 6-2*B;
1035 else if(d<2.0) 1035 else if (d<2.0)
1036 coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C; 1036 coeff = (-B-6*C)*d*d*d + (6*B+30*C)*d*d + (-12*B-48*C)*d +8*B+24*C;
1037 else 1037 else
1038 coeff=0.0; 1038 coeff=0.0;
1039 } 1039 }
1040 /* else if(flags & SWS_X) 1040 /* else if (flags & SWS_X)
1041 { 1041 {
1042 double p= param ? param*0.01 : 0.3; 1042 double p= param ? param*0.01 : 0.3;
1043 coeff = d ? sin(d*PI)/(d*PI) : 1.0; 1043 coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1044 coeff*= pow(2.0, - p*d*d); 1044 coeff*= pow(2.0, - p*d*d);
1045 }*/ 1045 }*/
1046 else if(flags & SWS_X) 1046 else if (flags & SWS_X)
1047 { 1047 {
1048 double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0; 1048 double A= param[0] != SWS_PARAM_DEFAULT ? param[0] : 1.0;
1049 1049
1050 if(d<1.0) 1050 if (d<1.0)
1051 coeff = cos(d*PI); 1051 coeff = cos(d*PI);
1052 else 1052 else
1053 coeff=-1.0; 1053 coeff=-1.0;
1054 if(coeff<0.0) coeff= -pow(-coeff, A); 1054 if (coeff<0.0) coeff= -pow(-coeff, A);
1055 else coeff= pow( coeff, A); 1055 else coeff= pow( coeff, A);
1056 coeff= coeff*0.5 + 0.5; 1056 coeff= coeff*0.5 + 0.5;
1057 } 1057 }
1058 else if(flags & SWS_AREA) 1058 else if (flags & SWS_AREA)
1059 { 1059 {
1060 double srcPixelSize= 1.0/xInc1; 1060 double srcPixelSize= 1.0/xInc1;
1061 if(d + srcPixelSize/2 < 0.5) coeff= 1.0; 1061 if (d + srcPixelSize/2 < 0.5) coeff= 1.0;
1062 else if(d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5; 1062 else if (d - srcPixelSize/2 < 0.5) coeff= (0.5-d)/srcPixelSize + 0.5;
1063 else coeff=0.0; 1063 else coeff=0.0;
1064 } 1064 }
1065 else if(flags & SWS_GAUSS) 1065 else if (flags & SWS_GAUSS)
1066 { 1066 {
1067 double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; 1067 double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1068 coeff = pow(2.0, - p*d*d); 1068 coeff = pow(2.0, - p*d*d);
1069 } 1069 }
1070 else if(flags & SWS_SINC) 1070 else if (flags & SWS_SINC)
1071 { 1071 {
1072 coeff = d ? sin(d*PI)/(d*PI) : 1.0; 1072 coeff = d ? sin(d*PI)/(d*PI) : 1.0;
1073 } 1073 }
1074 else if(flags & SWS_LANCZOS) 1074 else if (flags & SWS_LANCZOS)
1075 { 1075 {
1076 double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0; 1076 double p= param[0] != SWS_PARAM_DEFAULT ? param[0] : 3.0;
1077 coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0; 1077 coeff = d ? sin(d*PI)*sin(d*PI/p)/(d*d*PI*PI/p) : 1.0;
1078 if(d>p) coeff=0; 1078 if (d>p) coeff=0;
1079 } 1079 }
1080 else if(flags & SWS_BILINEAR) 1080 else if (flags & SWS_BILINEAR)
1081 { 1081 {
1082 coeff= 1.0 - d; 1082 coeff= 1.0 - d;
1083 if(coeff<0) coeff=0; 1083 if (coeff<0) coeff=0;
1084 } 1084 }
1085 else if(flags & SWS_SPLINE) 1085 else if (flags & SWS_SPLINE)
1086 { 1086 {
1087 double p=-2.196152422706632; 1087 double p=-2.196152422706632;
1088 coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d); 1088 coeff = getSplineCoeff(1.0, 0.0, p, -p-1.0, d);
1089 } 1089 }
1090 else { 1090 else {
1091 coeff= 0.0; //GCC warning killer 1091 coeff= 0.0; //GCC warning killer
1092 ASSERT(0) 1092 ASSERT(0)
1093 } 1093 }
1094 1094
1095 filter[i*filterSize + j]= coeff; 1095 filter[i*filterSize + j]= coeff;
1096 xx++; 1096 xx++;
1097 } 1097 }
1098 xDstInSrc+= xInc1; 1098 xDstInSrc+= xInc1;
1099 } 1099 }
1100 } 1100 }
1101 1101
1102 /* apply src & dst Filter to filter -> filter2 1102 /* apply src & dst Filter to filter -> filter2
1103 av_free(filter); 1103 av_free(filter);
1104 */ 1104 */
1105 ASSERT(filterSize>0) 1105 ASSERT(filterSize>0)
1106 filter2Size= filterSize; 1106 filter2Size= filterSize;
1107 if(srcFilter) filter2Size+= srcFilter->length - 1; 1107 if (srcFilter) filter2Size+= srcFilter->length - 1;
1108 if(dstFilter) filter2Size+= dstFilter->length - 1; 1108 if (dstFilter) filter2Size+= dstFilter->length - 1;
1109 ASSERT(filter2Size>0) 1109 ASSERT(filter2Size>0)
1110 filter2= av_malloc(filter2Size*dstW*sizeof(double)); 1110 filter2= av_malloc(filter2Size*dstW*sizeof(double));
1111 1111
1112 for(i=0; i<dstW; i++) 1112 for (i=0; i<dstW; i++)
1113 { 1113 {
1114 int j; 1114 int j;
1115 SwsVector scaleFilter; 1115 SwsVector scaleFilter;
1116 SwsVector *outVec; 1116 SwsVector *outVec;
1117 1117
1118 scaleFilter.coeff= filter + i*filterSize; 1118 scaleFilter.coeff= filter + i*filterSize;
1119 scaleFilter.length= filterSize; 1119 scaleFilter.length= filterSize;
1120 1120
1121 if(srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter); 1121 if (srcFilter) outVec= sws_getConvVec(srcFilter, &scaleFilter);
1122 else outVec= &scaleFilter; 1122 else outVec= &scaleFilter;
1123 1123
1124 ASSERT(outVec->length == filter2Size) 1124 ASSERT(outVec->length == filter2Size)
1125 //FIXME dstFilter 1125 //FIXME dstFilter
1126 1126
1127 for(j=0; j<outVec->length; j++) 1127 for (j=0; j<outVec->length; j++)
1128 { 1128 {
1129 filter2[i*filter2Size + j]= outVec->coeff[j]; 1129 filter2[i*filter2Size + j]= outVec->coeff[j];
1130 } 1130 }
1131 1131
1132 (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2; 1132 (*filterPos)[i]+= (filterSize-1)/2 - (filter2Size-1)/2;
1133 1133
1134 if(outVec != &scaleFilter) sws_freeVec(outVec); 1134 if (outVec != &scaleFilter) sws_freeVec(outVec);
1135 } 1135 }
1136 av_free(filter); filter=NULL; 1136 av_free(filter); filter=NULL;
1137 1137
1138 /* try to reduce the filter-size (step1 find size and shift left) */ 1138 /* try to reduce the filter-size (step1 find size and shift left) */
1139 // Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not) 1139 // Assume its near normalized (*0.5 or *2.0 is ok but * 0.001 is not)
1140 minFilterSize= 0; 1140 minFilterSize= 0;
1141 for(i=dstW-1; i>=0; i--) 1141 for (i=dstW-1; i>=0; i--)
1142 { 1142 {
1143 int min= filter2Size; 1143 int min= filter2Size;
1144 int j; 1144 int j;
1145 double cutOff=0.0; 1145 double cutOff=0.0;
1146 1146
1147 /* get rid off near zero elements on the left by shifting left */ 1147 /* get rid off near zero elements on the left by shifting left */
1148 for(j=0; j<filter2Size; j++) 1148 for (j=0; j<filter2Size; j++)
1149 { 1149 {
1150 int k; 1150 int k;
1151 cutOff += FFABS(filter2[i*filter2Size]); 1151 cutOff += FFABS(filter2[i*filter2Size]);
1152 1152
1153 if(cutOff > SWS_MAX_REDUCE_CUTOFF) break; 1153 if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1154 1154
1155 /* preserve Monotonicity because the core can't handle the filter otherwise */ 1155 /* preserve Monotonicity because the core can't handle the filter otherwise */
1156 if(i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break; 1156 if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1157 1157
1158 // Move filter coeffs left 1158 // Move filter coeffs left
1159 for(k=1; k<filter2Size; k++) 1159 for (k=1; k<filter2Size; k++)
1160 filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k]; 1160 filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1161 filter2[i*filter2Size + k - 1]= 0.0; 1161 filter2[i*filter2Size + k - 1]= 0.0;
1162 (*filterPos)[i]++; 1162 (*filterPos)[i]++;
1163 } 1163 }
1164 1164
1165 cutOff=0.0; 1165 cutOff=0.0;
1166 /* count near zeros on the right */ 1166 /* count near zeros on the right */
1167 for(j=filter2Size-1; j>0; j--) 1167 for (j=filter2Size-1; j>0; j--)
1168 { 1168 {
1169 cutOff += FFABS(filter2[i*filter2Size + j]); 1169 cutOff += FFABS(filter2[i*filter2Size + j]);
1170 1170
1171 if(cutOff > SWS_MAX_REDUCE_CUTOFF) break; 1171 if (cutOff > SWS_MAX_REDUCE_CUTOFF) break;
1172 min--; 1172 min--;
1173 } 1173 }
1174 1174
1175 if(min>minFilterSize) minFilterSize= min; 1175 if (min>minFilterSize) minFilterSize= min;
1176 } 1176 }
1177 1177
1178 if (flags & SWS_CPU_CAPS_ALTIVEC) { 1178 if (flags & SWS_CPU_CAPS_ALTIVEC) {
1179 // we can handle the special case 4, 1179 // we can handle the special case 4,
1180 // so we don't want to go to the full 8 1180 // so we don't want to go to the full 8
1181 if (minFilterSize < 5) 1181 if (minFilterSize < 5)
1182 filterAlign = 4; 1182 filterAlign = 4;
1183 1183
1184 // we really don't want to waste our time 1184 // we really don't want to waste our time
1185 // doing useless computation, so fall-back on 1185 // doing useless computation, so fall-back on
1186 // the scalar C code for very small filter. 1186 // the scalar C code for very small filter.
1187 // vectorizing is worth it only if you have 1187 // vectorizing is worth it only if you have
1188 // decent-sized vector. 1188 // decent-sized vector.
1189 if (minFilterSize < 3) 1189 if (minFilterSize < 3)
1190 filterAlign = 1; 1190 filterAlign = 1;
1191 } 1191 }
1192 1192
1193 if (flags & SWS_CPU_CAPS_MMX) { 1193 if (flags & SWS_CPU_CAPS_MMX) {
1194 // special case for unscaled vertical filtering 1194 // special case for unscaled vertical filtering
1195 if(minFilterSize == 1 && filterAlign == 2) 1195 if (minFilterSize == 1 && filterAlign == 2)
1196 filterAlign= 1; 1196 filterAlign= 1;
1197 } 1197 }
1198 1198
1199 ASSERT(minFilterSize > 0) 1199 ASSERT(minFilterSize > 0)
1200 filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1)); 1200 filterSize= (minFilterSize +(filterAlign-1)) & (~(filterAlign-1));
1201 ASSERT(filterSize > 0) 1201 ASSERT(filterSize > 0)
1202 filter= av_malloc(filterSize*dstW*sizeof(double)); 1202 filter= av_malloc(filterSize*dstW*sizeof(double));
1203 if(filterSize >= MAX_FILTER_SIZE) 1203 if (filterSize >= MAX_FILTER_SIZE)
1204 return -1; 1204 return -1;
1205 *outFilterSize= filterSize; 1205 *outFilterSize= filterSize;
1206 1206
1207 if(flags&SWS_PRINT_INFO) 1207 if (flags&SWS_PRINT_INFO)
1208 av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize); 1208 av_log(NULL, AV_LOG_VERBOSE, "SwScaler: reducing / aligning filtersize %d -> %d\n", filter2Size, filterSize);
1209 /* try to reduce the filter-size (step2 reduce it) */ 1209 /* try to reduce the filter-size (step2 reduce it) */
1210 for(i=0; i<dstW; i++) 1210 for (i=0; i<dstW; i++)
1211 { 1211 {
1212 int j; 1212 int j;
1213 1213
1214 for(j=0; j<filterSize; j++) 1214 for (j=0; j<filterSize; j++)
1215 { 1215 {
1216 if(j>=filter2Size) filter[i*filterSize + j]= 0.0; 1216 if (j>=filter2Size) filter[i*filterSize + j]= 0.0;
1217 else filter[i*filterSize + j]= filter2[i*filter2Size + j]; 1217 else filter[i*filterSize + j]= filter2[i*filter2Size + j];
1218 } 1218 }
1219 } 1219 }
1220 av_free(filter2); filter2=NULL; 1220 av_free(filter2); filter2=NULL;
1221 1221
1222 1222
1223 //FIXME try to align filterpos if possible 1223 //FIXME try to align filterpos if possible
1224 1224
1225 //fix borders 1225 //fix borders
1226 for(i=0; i<dstW; i++) 1226 for (i=0; i<dstW; i++)
1227 { 1227 {
1228 int j; 1228 int j;
1229 if((*filterPos)[i] < 0) 1229 if ((*filterPos)[i] < 0)
1230 { 1230 {
1231 // Move filter coeffs left to compensate for filterPos 1231 // Move filter coeffs left to compensate for filterPos
1232 for(j=1; j<filterSize; j++) 1232 for (j=1; j<filterSize; j++)
1233 { 1233 {
1234 int left= FFMAX(j + (*filterPos)[i], 0); 1234 int left= FFMAX(j + (*filterPos)[i], 0);
1235 filter[i*filterSize + left] += filter[i*filterSize + j]; 1235 filter[i*filterSize + left] += filter[i*filterSize + j];
1236 filter[i*filterSize + j]=0; 1236 filter[i*filterSize + j]=0;
1237 } 1237 }
1238 (*filterPos)[i]= 0; 1238 (*filterPos)[i]= 0;
1239 } 1239 }
1240 1240
1241 if((*filterPos)[i] + filterSize > srcW) 1241 if ((*filterPos)[i] + filterSize > srcW)
1242 { 1242 {
1243 int shift= (*filterPos)[i] + filterSize - srcW; 1243 int shift= (*filterPos)[i] + filterSize - srcW;
1244 // Move filter coeffs right to compensate for filterPos 1244 // Move filter coeffs right to compensate for filterPos
1245 for(j=filterSize-2; j>=0; j--) 1245 for (j=filterSize-2; j>=0; j--)
1246 { 1246 {
1247 int right= FFMIN(j + shift, filterSize-1); 1247 int right= FFMIN(j + shift, filterSize-1);
1248 filter[i*filterSize +right] += filter[i*filterSize +j]; 1248 filter[i*filterSize +right] += filter[i*filterSize +j];
1249 filter[i*filterSize +j]=0; 1249 filter[i*filterSize +j]=0;
1250 } 1250 }
1251 (*filterPos)[i]= srcW - filterSize; 1251 (*filterPos)[i]= srcW - filterSize;
1252 } 1252 }
1253 } 1253 }
1254 1254
1255 // Note the +1 is for the MMXscaler which reads over the end 1255 // Note the +1 is for the MMXscaler which reads over the end
1256 /* align at 16 for AltiVec (needed by hScale_altivec_real) */ 1256 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1257 *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t)); 1257 *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1258 1258
1259 /* Normalize & Store in outFilter */ 1259 /* Normalize & Store in outFilter */
1260 for(i=0; i<dstW; i++) 1260 for (i=0; i<dstW; i++)
1261 { 1261 {
1262 int j; 1262 int j;
1263 double error=0; 1263 double error=0;
1264 double sum=0; 1264 double sum=0;
1265 double scale= one; 1265 double scale= one;
1266 1266
1267 for(j=0; j<filterSize; j++) 1267 for (j=0; j<filterSize; j++)
1268 { 1268 {
1269 sum+= filter[i*filterSize + j]; 1269 sum+= filter[i*filterSize + j];
1270 } 1270 }
1271 scale/= sum; 1271 scale/= sum;
1272 for(j=0; j<*outFilterSize; j++) 1272 for (j=0; j<*outFilterSize; j++)
1273 { 1273 {
1274 double v= filter[i*filterSize + j]*scale + error; 1274 double v= filter[i*filterSize + j]*scale + error;
1275 int intV= floor(v + 0.5); 1275 int intV= floor(v + 0.5);
1276 (*outFilter)[i*(*outFilterSize) + j]= intV; 1276 (*outFilter)[i*(*outFilterSize) + j]= intV;
1277 error = v - intV; 1277 error = v - intV;
1278 } 1278 }
1279 } 1279 }
1280 1280
1281 (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end 1281 (*filterPos)[dstW]= (*filterPos)[dstW-1]; // the MMX scaler will read over the end
1282 for(i=0; i<*outFilterSize; i++) 1282 for (i=0; i<*outFilterSize; i++)
1283 { 1283 {
1284 int j= dstW*(*outFilterSize); 1284 int j= dstW*(*outFilterSize);
1285 (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)]; 1285 (*outFilter)[j + i]= (*outFilter)[j + i - (*outFilterSize)];
1286 } 1286 }
1287 1287
1288 av_free(filter); 1288 av_free(filter);
1289 return 0; 1289 return 0;
1290 } 1290 }
1291 1291
1292 #ifdef COMPILE_MMX2 1292 #ifdef COMPILE_MMX2
1293 static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits) 1293 static void initMMX2HScaler(int dstW, int xInc, uint8_t *funnyCode, int16_t *filter, int32_t *filterPos, int numSplits)
1294 { 1294 {
1295 uint8_t *fragmentA; 1295 uint8_t *fragmentA;
1296 long imm8OfPShufW1A; 1296 long imm8OfPShufW1A;
1297 long imm8OfPShufW2A; 1297 long imm8OfPShufW2A;
1298 long fragmentLengthA; 1298 long fragmentLengthA;
1299 uint8_t *fragmentB; 1299 uint8_t *fragmentB;
1300 long imm8OfPShufW1B; 1300 long imm8OfPShufW1B;
1301 long imm8OfPShufW2B; 1301 long imm8OfPShufW2B;
1302 long fragmentLengthB; 1302 long fragmentLengthB;
1303 int fragmentPos; 1303 int fragmentPos;
1304 1304
1305 int xpos, i; 1305 int xpos, i;
1306 1306
1307 // create an optimized horizontal scaling routine 1307 // create an optimized horizontal scaling routine
1308 1308
1309 //code fragment 1309 //code fragment
1310 1310
1311 asm volatile( 1311 asm volatile(
1312 "jmp 9f \n\t" 1312 "jmp 9f \n\t"
1313 // Begin 1313 // Begin
1314 "0: \n\t" 1314 "0: \n\t"
1315 "movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 1315 "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
1316 "movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 1316 "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
1317 "movd 1(%%"REG_c", %%"REG_S"), %%mm1\n\t" 1317 "movd 1(%%"REG_c", %%"REG_S"), %%mm1 \n\t"
1318 "punpcklbw %%mm7, %%mm1 \n\t" 1318 "punpcklbw %%mm7, %%mm1 \n\t"
1319 "punpcklbw %%mm7, %%mm0 \n\t" 1319 "punpcklbw %%mm7, %%mm0 \n\t"
1320 "pshufw $0xFF, %%mm1, %%mm1 \n\t" 1320 "pshufw $0xFF, %%mm1, %%mm1 \n\t"
1321 "1: \n\t" 1321 "1: \n\t"
1322 "pshufw $0xFF, %%mm0, %%mm0 \n\t" 1322 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
1323 "2: \n\t" 1323 "2: \n\t"
1324 "psubw %%mm1, %%mm0 \n\t" 1324 "psubw %%mm1, %%mm0 \n\t"
1325 "movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t" 1325 "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
1326 "pmullw %%mm3, %%mm0 \n\t" 1326 "pmullw %%mm3, %%mm0 \n\t"
1327 "psllw $7, %%mm1 \n\t" 1327 "psllw $7, %%mm1 \n\t"
1328 "paddw %%mm1, %%mm0 \n\t" 1328 "paddw %%mm1, %%mm0 \n\t"
1329 1329
1330 "movq %%mm0, (%%"REG_D", %%"REG_a")\n\t" 1330 "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1331 1331
1332 "add $8, %%"REG_a" \n\t" 1332 "add $8, %%"REG_a" \n\t"
1333 // End 1333 // End
1334 "9: \n\t" 1334 "9: \n\t"
1335 // "int $3\n\t" 1335 // "int $3 \n\t"
1336 "lea 0b, %0 \n\t" 1336 "lea 0b, %0 \n\t"
1337 "lea 1b, %1 \n\t" 1337 "lea 1b, %1 \n\t"
1338 "lea 2b, %2 \n\t" 1338 "lea 2b, %2 \n\t"
1339 "dec %1 \n\t" 1339 "dec %1 \n\t"
1340 "dec %2 \n\t" 1340 "dec %2 \n\t"
1341 "sub %0, %1 \n\t" 1341 "sub %0, %1 \n\t"
1342 "sub %0, %2 \n\t" 1342 "sub %0, %2 \n\t"
1343 "lea 9b, %3 \n\t" 1343 "lea 9b, %3 \n\t"
1344 "sub %0, %3 \n\t" 1344 "sub %0, %3 \n\t"
1345 1345
1346 1346
1347 :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A), 1347 :"=r" (fragmentA), "=r" (imm8OfPShufW1A), "=r" (imm8OfPShufW2A),
1348 "=r" (fragmentLengthA) 1348 "=r" (fragmentLengthA)
1349 ); 1349 );
1350 1350
1351 asm volatile( 1351 asm volatile(
1352 "jmp 9f \n\t" 1352 "jmp 9f \n\t"
1353 // Begin 1353 // Begin
1354 "0: \n\t" 1354 "0: \n\t"
1355 "movq (%%"REG_d", %%"REG_a"), %%mm3\n\t" 1355 "movq (%%"REG_d", %%"REG_a"), %%mm3 \n\t"
1356 "movd (%%"REG_c", %%"REG_S"), %%mm0\n\t" 1356 "movd (%%"REG_c", %%"REG_S"), %%mm0 \n\t"
1357 "punpcklbw %%mm7, %%mm0 \n\t" 1357 "punpcklbw %%mm7, %%mm0 \n\t"
1358 "pshufw $0xFF, %%mm0, %%mm1 \n\t" 1358 "pshufw $0xFF, %%mm0, %%mm1 \n\t"
1359 "1: \n\t" 1359 "1: \n\t"
1360 "pshufw $0xFF, %%mm0, %%mm0 \n\t" 1360 "pshufw $0xFF, %%mm0, %%mm0 \n\t"
1361 "2: \n\t" 1361 "2: \n\t"
1362 "psubw %%mm1, %%mm0 \n\t" 1362 "psubw %%mm1, %%mm0 \n\t"
1363 "movl 8(%%"REG_b", %%"REG_a"), %%esi\n\t" 1363 "movl 8(%%"REG_b", %%"REG_a"), %%esi \n\t"
1364 "pmullw %%mm3, %%mm0 \n\t" 1364 "pmullw %%mm3, %%mm0 \n\t"
1365 "psllw $7, %%mm1 \n\t" 1365 "psllw $7, %%mm1 \n\t"
1366 "paddw %%mm1, %%mm0 \n\t" 1366 "paddw %%mm1, %%mm0 \n\t"
1367 1367
1368 "movq %%mm0, (%%"REG_D", %%"REG_a")\n\t" 1368 "movq %%mm0, (%%"REG_D", %%"REG_a") \n\t"
1369 1369
1370 "add $8, %%"REG_a" \n\t" 1370 "add $8, %%"REG_a" \n\t"
1371 // End 1371 // End
1372 "9: \n\t" 1372 "9: \n\t"
1373 // "int $3\n\t" 1373 // "int $3 \n\t"
1374 "lea 0b, %0 \n\t" 1374 "lea 0b, %0 \n\t"
1375 "lea 1b, %1 \n\t" 1375 "lea 1b, %1 \n\t"
1376 "lea 2b, %2 \n\t" 1376 "lea 2b, %2 \n\t"
1377 "dec %1 \n\t" 1377 "dec %1 \n\t"
1378 "dec %2 \n\t" 1378 "dec %2 \n\t"
1379 "sub %0, %1 \n\t" 1379 "sub %0, %1 \n\t"
1380 "sub %0, %2 \n\t" 1380 "sub %0, %2 \n\t"
1381 "lea 9b, %3 \n\t" 1381 "lea 9b, %3 \n\t"
1382 "sub %0, %3 \n\t" 1382 "sub %0, %3 \n\t"
1383 1383
1384 1384
1385 :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B), 1385 :"=r" (fragmentB), "=r" (imm8OfPShufW1B), "=r" (imm8OfPShufW2B),
1386 "=r" (fragmentLengthB) 1386 "=r" (fragmentLengthB)
1387 ); 1387 );
1388 1388
1389 xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers 1389 xpos= 0; //lumXInc/2 - 0x8000; // difference between pixel centers
1390 fragmentPos=0; 1390 fragmentPos=0;
1391 1391
1392 for(i=0; i<dstW/numSplits; i++) 1392 for (i=0; i<dstW/numSplits; i++)
1393 { 1393 {
1394 int xx=xpos>>16; 1394 int xx=xpos>>16;
1395 1395
1396 if((i&3) == 0) 1396 if ((i&3) == 0)
1397 { 1397 {
1398 int a=0; 1398 int a=0;
1399 int b=((xpos+xInc)>>16) - xx; 1399 int b=((xpos+xInc)>>16) - xx;
1400 int c=((xpos+xInc*2)>>16) - xx; 1400 int c=((xpos+xInc*2)>>16) - xx;
1401 int d=((xpos+xInc*3)>>16) - xx; 1401 int d=((xpos+xInc*3)>>16) - xx;
1402 1402
1403 filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9; 1403 filter[i ] = (( xpos & 0xFFFF) ^ 0xFFFF)>>9;
1404 filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9; 1404 filter[i+1] = (((xpos+xInc ) & 0xFFFF) ^ 0xFFFF)>>9;
1405 filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9; 1405 filter[i+2] = (((xpos+xInc*2) & 0xFFFF) ^ 0xFFFF)>>9;
1406 filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9; 1406 filter[i+3] = (((xpos+xInc*3) & 0xFFFF) ^ 0xFFFF)>>9;
1407 filterPos[i/2]= xx; 1407 filterPos[i/2]= xx;
1408 1408
1409 if(d+1<4) 1409 if (d+1<4)
1410 { 1410 {
1411 int maxShift= 3-(d+1); 1411 int maxShift= 3-(d+1);
1412 int shift=0; 1412 int shift=0;
1413 1413
1414 memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB); 1414 memcpy(funnyCode + fragmentPos, fragmentB, fragmentLengthB);
1415 1415
1416 funnyCode[fragmentPos + imm8OfPShufW1B]= 1416 funnyCode[fragmentPos + imm8OfPShufW1B]=
1417 (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6); 1417 (a+1) | ((b+1)<<2) | ((c+1)<<4) | ((d+1)<<6);
1418 funnyCode[fragmentPos + imm8OfPShufW2B]= 1418 funnyCode[fragmentPos + imm8OfPShufW2B]=
1419 a | (b<<2) | (c<<4) | (d<<6); 1419 a | (b<<2) | (c<<4) | (d<<6);
1420 1420
1421 if(i+3>=dstW) shift=maxShift; //avoid overread 1421 if (i+3>=dstW) shift=maxShift; //avoid overread
1422 else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align 1422 else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //Align
1423 1423
1424 if(shift && i>=shift) 1424 if (shift && i>=shift)
1425 { 1425 {
1426 funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift; 1426 funnyCode[fragmentPos + imm8OfPShufW1B]+= 0x55*shift;
1427 funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift; 1427 funnyCode[fragmentPos + imm8OfPShufW2B]+= 0x55*shift;
1428 filterPos[i/2]-=shift; 1428 filterPos[i/2]-=shift;
1429 } 1429 }
1430 1430
1431 fragmentPos+= fragmentLengthB; 1431 fragmentPos+= fragmentLengthB;
1432 } 1432 }
1433 else 1433 else
1434 { 1434 {
1435 int maxShift= 3-d; 1435 int maxShift= 3-d;
1436 int shift=0; 1436 int shift=0;
1437 1437
1438 memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA); 1438 memcpy(funnyCode + fragmentPos, fragmentA, fragmentLengthA);
1439 1439
1440 funnyCode[fragmentPos + imm8OfPShufW1A]= 1440 funnyCode[fragmentPos + imm8OfPShufW1A]=
1441 funnyCode[fragmentPos + imm8OfPShufW2A]= 1441 funnyCode[fragmentPos + imm8OfPShufW2A]=
1442 a | (b<<2) | (c<<4) | (d<<6); 1442 a | (b<<2) | (c<<4) | (d<<6);
1443 1443
1444 if(i+4>=dstW) shift=maxShift; //avoid overread 1444 if (i+4>=dstW) shift=maxShift; //avoid overread
1445 else if((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align 1445 else if ((filterPos[i/2]&3) <= maxShift) shift=filterPos[i/2]&3; //partial align
1446 1446
1447 if(shift && i>=shift) 1447 if (shift && i>=shift)
1448 { 1448 {
1449 funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift; 1449 funnyCode[fragmentPos + imm8OfPShufW1A]+= 0x55*shift;
1450 funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift; 1450 funnyCode[fragmentPos + imm8OfPShufW2A]+= 0x55*shift;
1451 filterPos[i/2]-=shift; 1451 filterPos[i/2]-=shift;
1452 } 1452 }
1453 1453
1454 fragmentPos+= fragmentLengthA; 1454 fragmentPos+= fragmentLengthA;
1455 } 1455 }
1456 1456
1457 funnyCode[fragmentPos]= RET; 1457 funnyCode[fragmentPos]= RET;
1458 } 1458 }
1459 xpos+=xInc; 1459 xpos+=xInc;
1460 } 1460 }
1461 filterPos[i/2]= xpos>>16; // needed to jump to the next part 1461 filterPos[i/2]= xpos>>16; // needed to jump to the next part
1462 } 1462 }
1463 #endif /* COMPILE_MMX2 */ 1463 #endif /* COMPILE_MMX2 */
1464 1464
1465 static void globalInit(void){ 1465 static void globalInit(void){
1466 // generating tables: 1466 // generating tables:
1467 int i; 1467 int i;
1468 for(i=0; i<768; i++){ 1468 for (i=0; i<768; i++){
1469 int c= av_clip_uint8(i-256); 1469 int c= av_clip_uint8(i-256);
1470 clip_table[i]=c; 1470 clip_table[i]=c;
1471 } 1471 }
1472 } 1472 }
1473 1473
1474 static SwsFunc getSwsFunc(int flags){ 1474 static SwsFunc getSwsFunc(int flags){
1475 1475
1476 #if defined(RUNTIME_CPUDETECT) && defined (CONFIG_GPL) 1476 #if defined(RUNTIME_CPUDETECT) && defined (CONFIG_GPL)
1477 #if defined(ARCH_X86) 1477 #if defined(ARCH_X86)
1478 // ordered per speed fasterst first 1478 // ordered per speed fasterst first
1479 if(flags & SWS_CPU_CAPS_MMX2) 1479 if (flags & SWS_CPU_CAPS_MMX2)
1480 return swScale_MMX2; 1480 return swScale_MMX2;
1481 else if(flags & SWS_CPU_CAPS_3DNOW) 1481 else if (flags & SWS_CPU_CAPS_3DNOW)
1482 return swScale_3DNow; 1482 return swScale_3DNow;
1483 else if(flags & SWS_CPU_CAPS_MMX) 1483 else if (flags & SWS_CPU_CAPS_MMX)
1484 return swScale_MMX; 1484 return swScale_MMX;
1485 else 1485 else
1486 return swScale_C; 1486 return swScale_C;
1487 1487
1488 #else 1488 #else
1489 #ifdef ARCH_POWERPC 1489 #ifdef ARCH_POWERPC
1490 if(flags & SWS_CPU_CAPS_ALTIVEC) 1490 if (flags & SWS_CPU_CAPS_ALTIVEC)
1491 return swScale_altivec; 1491 return swScale_altivec;
1492 else 1492 else
1493 return swScale_C; 1493 return swScale_C;
1494 #endif 1494 #endif
1495 return swScale_C; 1495 return swScale_C;
1496 #endif /* defined(ARCH_X86) */ 1496 #endif /* defined(ARCH_X86) */
1497 #else //RUNTIME_CPUDETECT 1497 #else //RUNTIME_CPUDETECT
1498 #ifdef HAVE_MMX2 1498 #ifdef HAVE_MMX2
1499 return swScale_MMX2; 1499 return swScale_MMX2;
1500 #elif defined (HAVE_3DNOW) 1500 #elif defined (HAVE_3DNOW)
1501 return swScale_3DNow; 1501 return swScale_3DNow;
1502 #elif defined (HAVE_MMX) 1502 #elif defined (HAVE_MMX)
1503 return swScale_MMX; 1503 return swScale_MMX;
1504 #elif defined (HAVE_ALTIVEC) 1504 #elif defined (HAVE_ALTIVEC)
1505 return swScale_altivec; 1505 return swScale_altivec;
1506 #else 1506 #else
1507 return swScale_C; 1507 return swScale_C;
1508 #endif 1508 #endif
1509 #endif //!RUNTIME_CPUDETECT 1509 #endif //!RUNTIME_CPUDETECT
1510 } 1510 }
1511 1511
1512 static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 1512 static int PlanarToNV12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1513 int srcSliceH, uint8_t* dstParam[], int dstStride[]){ 1513 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1514 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; 1514 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1515 /* Copy Y plane */ 1515 /* Copy Y plane */
1516 if(dstStride[0]==srcStride[0] && srcStride[0] > 0) 1516 if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1517 memcpy(dst, src[0], srcSliceH*dstStride[0]); 1517 memcpy(dst, src[0], srcSliceH*dstStride[0]);
1518 else 1518 else
1519 { 1519 {
1520 int i; 1520 int i;
1521 uint8_t *srcPtr= src[0]; 1521 uint8_t *srcPtr= src[0];
1522 uint8_t *dstPtr= dst; 1522 uint8_t *dstPtr= dst;
1523 for(i=0; i<srcSliceH; i++) 1523 for (i=0; i<srcSliceH; i++)
1524 { 1524 {
1525 memcpy(dstPtr, srcPtr, c->srcW); 1525 memcpy(dstPtr, srcPtr, c->srcW);
1526 srcPtr+= srcStride[0]; 1526 srcPtr+= srcStride[0];
1527 dstPtr+= dstStride[0]; 1527 dstPtr+= dstStride[0];
1528 } 1528 }
1529 } 1529 }
1530 dst = dstParam[1] + dstStride[1]*srcSliceY/2; 1530 dst = dstParam[1] + dstStride[1]*srcSliceY/2;
1531 if (c->dstFormat == PIX_FMT_NV12) 1531 if (c->dstFormat == PIX_FMT_NV12)
1532 interleaveBytes( src[1],src[2],dst,c->srcW/2,srcSliceH/2,srcStride[1],srcStride[2],dstStride[0] ); 1532 interleaveBytes( src[1],src[2],dst,c->srcW/2,srcSliceH/2,srcStride[1],srcStride[2],dstStride[0] );
1533 else 1533 else
1534 interleaveBytes( src[2],src[1],dst,c->srcW/2,srcSliceH/2,srcStride[2],srcStride[1],dstStride[0] ); 1534 interleaveBytes( src[2],src[1],dst,c->srcW/2,srcSliceH/2,srcStride[2],srcStride[1],dstStride[0] );
1535 1535
1536 return srcSliceH; 1536 return srcSliceH;
1537 } 1537 }
1538 1538
1539 static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 1539 static int PlanarToYuy2Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1540 int srcSliceH, uint8_t* dstParam[], int dstStride[]){ 1540 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1541 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; 1541 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1542 1542
1543 yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] ); 1543 yv12toyuy2( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1544 1544
1545 return srcSliceH; 1545 return srcSliceH;
1546 } 1546 }
1547 1547
1548 static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 1548 static int PlanarToUyvyWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1549 int srcSliceH, uint8_t* dstParam[], int dstStride[]){ 1549 int srcSliceH, uint8_t* dstParam[], int dstStride[]){
1550 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY; 1550 uint8_t *dst=dstParam[0] + dstStride[0]*srcSliceY;
1551 1551
1552 yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] ); 1552 yv12touyvy( src[0],src[1],src[2],dst,c->srcW,srcSliceH,srcStride[0],srcStride[1],dstStride[0] );
1553 1553
1554 return srcSliceH; 1554 return srcSliceH;
1555 } 1555 }
1556 1556
1557 /* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */ 1557 /* {RGB,BGR}{15,16,24,32} -> {RGB,BGR}{15,16,24,32} */
1558 static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 1558 static int rgb2rgbWrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1559 int srcSliceH, uint8_t* dst[], int dstStride[]){ 1559 int srcSliceH, uint8_t* dst[], int dstStride[]){
1560 const int srcFormat= c->srcFormat; 1560 const int srcFormat= c->srcFormat;
1561 const int dstFormat= c->dstFormat; 1561 const int dstFormat= c->dstFormat;
1562 const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3; 1562 const int srcBpp= (fmt_depth(srcFormat) + 7) >> 3;
1563 const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3; 1563 const int dstBpp= (fmt_depth(dstFormat) + 7) >> 3;
1564 const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */ 1564 const int srcId= fmt_depth(srcFormat) >> 2; /* 1:0, 4:1, 8:2, 15:3, 16:4, 24:6, 32:8 */
1565 const int dstId= fmt_depth(dstFormat) >> 2; 1565 const int dstId= fmt_depth(dstFormat) >> 2;
1566 void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL; 1566 void (*conv)(const uint8_t *src, uint8_t *dst, long src_size)=NULL;
1567 1567
1568 /* BGR -> BGR */ 1568 /* BGR -> BGR */
1569 if( (isBGR(srcFormat) && isBGR(dstFormat)) 1569 if ( (isBGR(srcFormat) && isBGR(dstFormat))
1570 || (isRGB(srcFormat) && isRGB(dstFormat))){ 1570 || (isRGB(srcFormat) && isRGB(dstFormat))){
1571 switch(srcId | (dstId<<4)){ 1571 switch(srcId | (dstId<<4)){
1572 case 0x34: conv= rgb16to15; break; 1572 case 0x34: conv= rgb16to15; break;
1573 case 0x36: conv= rgb24to15; break; 1573 case 0x36: conv= rgb24to15; break;
1574 case 0x38: conv= rgb32to15; break; 1574 case 0x38: conv= rgb32to15; break;
1575 case 0x43: conv= rgb15to16; break; 1575 case 0x43: conv= rgb15to16; break;
1576 case 0x46: conv= rgb24to16; break; 1576 case 0x46: conv= rgb24to16; break;
1577 case 0x48: conv= rgb32to16; break; 1577 case 0x48: conv= rgb32to16; break;
1578 case 0x63: conv= rgb15to24; break; 1578 case 0x63: conv= rgb15to24; break;
1579 case 0x64: conv= rgb16to24; break; 1579 case 0x64: conv= rgb16to24; break;
1580 case 0x68: conv= rgb32to24; break; 1580 case 0x68: conv= rgb32to24; break;
1581 case 0x83: conv= rgb15to32; break; 1581 case 0x83: conv= rgb15to32; break;
1582 case 0x84: conv= rgb16to32; break; 1582 case 0x84: conv= rgb16to32; break;
1583 case 0x86: conv= rgb24to32; break; 1583 case 0x86: conv= rgb24to32; break;
1584 default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n", 1584 default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1585 sws_format_name(srcFormat), sws_format_name(dstFormat)); break; 1585 sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1586 } 1586 }
1587 }else if( (isBGR(srcFormat) && isRGB(dstFormat)) 1587 }else if ( (isBGR(srcFormat) && isRGB(dstFormat))
1588 || (isRGB(srcFormat) && isBGR(dstFormat))){ 1588 || (isRGB(srcFormat) && isBGR(dstFormat))){
1589 switch(srcId | (dstId<<4)){ 1589 switch(srcId | (dstId<<4)){
1590 case 0x33: conv= rgb15tobgr15; break; 1590 case 0x33: conv= rgb15tobgr15; break;
1591 case 0x34: conv= rgb16tobgr15; break; 1591 case 0x34: conv= rgb16tobgr15; break;
1592 case 0x36: conv= rgb24tobgr15; break; 1592 case 0x36: conv= rgb24tobgr15; break;
1593 case 0x38: conv= rgb32tobgr15; break; 1593 case 0x38: conv= rgb32tobgr15; break;
1594 case 0x43: conv= rgb15tobgr16; break; 1594 case 0x43: conv= rgb15tobgr16; break;
1595 case 0x44: conv= rgb16tobgr16; break; 1595 case 0x44: conv= rgb16tobgr16; break;
1596 case 0x46: conv= rgb24tobgr16; break; 1596 case 0x46: conv= rgb24tobgr16; break;
1597 case 0x48: conv= rgb32tobgr16; break; 1597 case 0x48: conv= rgb32tobgr16; break;
1598 case 0x63: conv= rgb15tobgr24; break; 1598 case 0x63: conv= rgb15tobgr24; break;
1599 case 0x64: conv= rgb16tobgr24; break; 1599 case 0x64: conv= rgb16tobgr24; break;
1600 case 0x66: conv= rgb24tobgr24; break; 1600 case 0x66: conv= rgb24tobgr24; break;
1601 case 0x68: conv= rgb32tobgr24; break; 1601 case 0x68: conv= rgb32tobgr24; break;
1602 case 0x83: conv= rgb15tobgr32; break; 1602 case 0x83: conv= rgb15tobgr32; break;
1603 case 0x84: conv= rgb16tobgr32; break; 1603 case 0x84: conv= rgb16tobgr32; break;
1604 case 0x86: conv= rgb24tobgr32; break; 1604 case 0x86: conv= rgb24tobgr32; break;
1605 case 0x88: conv= rgb32tobgr32; break; 1605 case 0x88: conv= rgb32tobgr32; break;
1606 default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n", 1606 default: av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1607 sws_format_name(srcFormat), sws_format_name(dstFormat)); break; 1607 sws_format_name(srcFormat), sws_format_name(dstFormat)); break;
1608 } 1608 }
1609 }else{ 1609 }else{
1610 av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n", 1610 av_log(c, AV_LOG_ERROR, "swScaler: internal error %s -> %s converter\n",
1611 sws_format_name(srcFormat), sws_format_name(dstFormat)); 1611 sws_format_name(srcFormat), sws_format_name(dstFormat));
1612 } 1612 }
1613 1613
1614 if(dstStride[0]*srcBpp == srcStride[0]*dstBpp) 1614 if (dstStride[0]*srcBpp == srcStride[0]*dstBpp)
1615 conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]); 1615 conv(src[0], dst[0] + dstStride[0]*srcSliceY, srcSliceH*srcStride[0]);
1616 else 1616 else
1617 { 1617 {
1618 int i; 1618 int i;
1619 uint8_t *srcPtr= src[0]; 1619 uint8_t *srcPtr= src[0];
1620 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; 1620 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1621 1621
1622 for(i=0; i<srcSliceH; i++) 1622 for (i=0; i<srcSliceH; i++)
1623 { 1623 {
1624 conv(srcPtr, dstPtr, c->srcW*srcBpp); 1624 conv(srcPtr, dstPtr, c->srcW*srcBpp);
1625 srcPtr+= srcStride[0]; 1625 srcPtr+= srcStride[0];
1626 dstPtr+= dstStride[0]; 1626 dstPtr+= dstStride[0];
1627 } 1627 }
1628 } 1628 }
1629 return srcSliceH; 1629 return srcSliceH;
1630 } 1630 }
1631 1631
1632 static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 1632 static int bgr24toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1633 int srcSliceH, uint8_t* dst[], int dstStride[]){ 1633 int srcSliceH, uint8_t* dst[], int dstStride[]){
1634 1634
1635 rgb24toyv12( 1635 rgb24toyv12(
1636 src[0], 1636 src[0],
1637 dst[0]+ srcSliceY *dstStride[0], 1637 dst[0]+ srcSliceY *dstStride[0],
1638 dst[1]+(srcSliceY>>1)*dstStride[1], 1638 dst[1]+(srcSliceY>>1)*dstStride[1],
1639 dst[2]+(srcSliceY>>1)*dstStride[2], 1639 dst[2]+(srcSliceY>>1)*dstStride[2],
1640 c->srcW, srcSliceH, 1640 c->srcW, srcSliceH,
1641 dstStride[0], dstStride[1], srcStride[0]); 1641 dstStride[0], dstStride[1], srcStride[0]);
1642 return srcSliceH; 1642 return srcSliceH;
1643 } 1643 }
1644 1644
1645 static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 1645 static int yvu9toyv12Wrapper(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1646 int srcSliceH, uint8_t* dst[], int dstStride[]){ 1646 int srcSliceH, uint8_t* dst[], int dstStride[]){
1647 int i; 1647 int i;
1648 1648
1649 /* copy Y */ 1649 /* copy Y */
1650 if(srcStride[0]==dstStride[0] && srcStride[0] > 0) 1650 if (srcStride[0]==dstStride[0] && srcStride[0] > 0)
1651 memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH); 1651 memcpy(dst[0]+ srcSliceY*dstStride[0], src[0], srcStride[0]*srcSliceH);
1652 else{ 1652 else{
1653 uint8_t *srcPtr= src[0]; 1653 uint8_t *srcPtr= src[0];
1654 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; 1654 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1655 1655
1656 for(i=0; i<srcSliceH; i++) 1656 for (i=0; i<srcSliceH; i++)
1657 { 1657 {
1658 memcpy(dstPtr, srcPtr, c->srcW); 1658 memcpy(dstPtr, srcPtr, c->srcW);
1659 srcPtr+= srcStride[0]; 1659 srcPtr+= srcStride[0];
1660 dstPtr+= dstStride[0]; 1660 dstPtr+= dstStride[0];
1661 } 1661 }
1662 } 1662 }
1663 1663
1664 if(c->dstFormat==PIX_FMT_YUV420P){ 1664 if (c->dstFormat==PIX_FMT_YUV420P){
1665 planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]); 1665 planar2x(src[1], dst[1], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[1]);
1666 planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]); 1666 planar2x(src[2], dst[2], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[2]);
1667 }else{ 1667 }else{
1668 planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]); 1668 planar2x(src[1], dst[2], c->chrSrcW, c->chrSrcH, srcStride[1], dstStride[2]);
1669 planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]); 1669 planar2x(src[2], dst[1], c->chrSrcW, c->chrSrcH, srcStride[2], dstStride[1]);
1670 } 1670 }
1671 return srcSliceH; 1671 return srcSliceH;
1672 } 1672 }
1673 1673
1674 /* unscaled copy like stuff (assumes nearly identical formats) */ 1674 /* unscaled copy like stuff (assumes nearly identical formats) */
1675 static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 1675 static int simpleCopy(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1676 int srcSliceH, uint8_t* dst[], int dstStride[]){ 1676 int srcSliceH, uint8_t* dst[], int dstStride[]){
1677 1677
1678 if(isPacked(c->srcFormat)) 1678 if (isPacked(c->srcFormat))
1679 { 1679 {
1680 if(dstStride[0]==srcStride[0] && srcStride[0] > 0) 1680 if (dstStride[0]==srcStride[0] && srcStride[0] > 0)
1681 memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]); 1681 memcpy(dst[0] + dstStride[0]*srcSliceY, src[0], srcSliceH*dstStride[0]);
1682 else 1682 else
1683 { 1683 {
1684 int i; 1684 int i;
1685 uint8_t *srcPtr= src[0]; 1685 uint8_t *srcPtr= src[0];
1686 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY; 1686 uint8_t *dstPtr= dst[0] + dstStride[0]*srcSliceY;
1687 int length=0; 1687 int length=0;
1688 1688
1689 /* universal length finder */ 1689 /* universal length finder */
1690 while(length+c->srcW <= FFABS(dstStride[0]) 1690 while(length+c->srcW <= FFABS(dstStride[0])
1691 && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW; 1691 && length+c->srcW <= FFABS(srcStride[0])) length+= c->srcW;
1692 ASSERT(length!=0); 1692 ASSERT(length!=0);
1693 1693
1694 for(i=0; i<srcSliceH; i++) 1694 for (i=0; i<srcSliceH; i++)
1695 { 1695 {
1696 memcpy(dstPtr, srcPtr, length); 1696 memcpy(dstPtr, srcPtr, length);
1697 srcPtr+= srcStride[0]; 1697 srcPtr+= srcStride[0];
1698 dstPtr+= dstStride[0]; 1698 dstPtr+= dstStride[0];
1699 } 1699 }
1700 } 1700 }
1701 } 1701 }
1702 else 1702 else
1703 { /* Planar YUV or gray */ 1703 { /* Planar YUV or gray */
1704 int plane; 1704 int plane;
1705 for(plane=0; plane<3; plane++) 1705 for (plane=0; plane<3; plane++)
1706 { 1706 {
1707 int length= plane==0 ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample); 1707 int length= plane==0 ? c->srcW : -((-c->srcW )>>c->chrDstHSubSample);
1708 int y= plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample); 1708 int y= plane==0 ? srcSliceY: -((-srcSliceY)>>c->chrDstVSubSample);
1709 int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample); 1709 int height= plane==0 ? srcSliceH: -((-srcSliceH)>>c->chrDstVSubSample);
1710 1710
1711 if((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0) 1711 if ((isGray(c->srcFormat) || isGray(c->dstFormat)) && plane>0)
1712 { 1712 {
1713 if(!isGray(c->dstFormat)) 1713 if (!isGray(c->dstFormat))
1714 memset(dst[plane], 128, dstStride[plane]*height); 1714 memset(dst[plane], 128, dstStride[plane]*height);
1715 } 1715 }
1716 else 1716 else
1717 { 1717 {
1718 if(dstStride[plane]==srcStride[plane] && srcStride[plane] > 0) 1718 if (dstStride[plane]==srcStride[plane] && srcStride[plane] > 0)
1719 memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]); 1719 memcpy(dst[plane] + dstStride[plane]*y, src[plane], height*dstStride[plane]);
1720 else 1720 else
1721 { 1721 {
1722 int i; 1722 int i;
1723 uint8_t *srcPtr= src[plane]; 1723 uint8_t *srcPtr= src[plane];
1724 uint8_t *dstPtr= dst[plane] + dstStride[plane]*y; 1724 uint8_t *dstPtr= dst[plane] + dstStride[plane]*y;
1725 for(i=0; i<height; i++) 1725 for (i=0; i<height; i++)
1726 { 1726 {
1727 memcpy(dstPtr, srcPtr, length); 1727 memcpy(dstPtr, srcPtr, length);
1728 srcPtr+= srcStride[plane]; 1728 srcPtr+= srcStride[plane];
1729 dstPtr+= dstStride[plane]; 1729 dstPtr+= dstStride[plane];
1730 } 1730 }
1731 } 1731 }
1732 } 1732 }
1733 } 1733 }
1734 } 1734 }
1735 return srcSliceH; 1735 return srcSliceH;
1736 } 1736 }
1737 1737
1738 static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 1738 static int gray16togray(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1739 int srcSliceH, uint8_t* dst[], int dstStride[]){ 1739 int srcSliceH, uint8_t* dst[], int dstStride[]){
1740 1740
1741 int length= c->srcW; 1741 int length= c->srcW;
1742 int y= srcSliceY; 1742 int y= srcSliceY;
1743 int height= srcSliceH; 1743 int height= srcSliceH;
1744 int i, j; 1744 int i, j;
1745 uint8_t *srcPtr= src[0]; 1745 uint8_t *srcPtr= src[0];
1746 uint8_t *dstPtr= dst[0] + dstStride[0]*y; 1746 uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1747 1747
1748 if(!isGray(c->dstFormat)){ 1748 if (!isGray(c->dstFormat)){
1749 int height= -((-srcSliceH)>>c->chrDstVSubSample); 1749 int height= -((-srcSliceH)>>c->chrDstVSubSample);
1750 memset(dst[1], 128, dstStride[1]*height); 1750 memset(dst[1], 128, dstStride[1]*height);
1751 memset(dst[2], 128, dstStride[2]*height); 1751 memset(dst[2], 128, dstStride[2]*height);
1752 } 1752 }
1753 if(c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++; 1753 if (c->srcFormat == PIX_FMT_GRAY16LE) srcPtr++;
1754 for(i=0; i<height; i++) 1754 for (i=0; i<height; i++)
1755 { 1755 {
1756 for(j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1]; 1756 for (j=0; j<length; j++) dstPtr[j] = srcPtr[j<<1];
1757 srcPtr+= srcStride[0]; 1757 srcPtr+= srcStride[0];
1758 dstPtr+= dstStride[0]; 1758 dstPtr+= dstStride[0];
1759 } 1759 }
1760 return srcSliceH; 1760 return srcSliceH;
1761 } 1761 }
1762 1762
1763 static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 1763 static int graytogray16(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1764 int srcSliceH, uint8_t* dst[], int dstStride[]){ 1764 int srcSliceH, uint8_t* dst[], int dstStride[]){
1765 1765
1766 int length= c->srcW; 1766 int length= c->srcW;
1767 int y= srcSliceY; 1767 int y= srcSliceY;
1768 int height= srcSliceH; 1768 int height= srcSliceH;
1769 int i, j; 1769 int i, j;
1770 uint8_t *srcPtr= src[0]; 1770 uint8_t *srcPtr= src[0];
1771 uint8_t *dstPtr= dst[0] + dstStride[0]*y; 1771 uint8_t *dstPtr= dst[0] + dstStride[0]*y;
1772 for(i=0; i<height; i++) 1772 for (i=0; i<height; i++)
1773 { 1773 {
1774 for(j=0; j<length; j++) 1774 for (j=0; j<length; j++)
1775 { 1775 {
1776 dstPtr[j<<1] = srcPtr[j]; 1776 dstPtr[j<<1] = srcPtr[j];
1777 dstPtr[(j<<1)+1] = srcPtr[j]; 1777 dstPtr[(j<<1)+1] = srcPtr[j];
1778 } 1778 }
1779 srcPtr+= srcStride[0]; 1779 srcPtr+= srcStride[0];
1780 dstPtr+= dstStride[0]; 1780 dstPtr+= dstStride[0];
1781 } 1781 }
1782 return srcSliceH; 1782 return srcSliceH;
1783 } 1783 }
1784 1784
1785 static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 1785 static int gray16swap(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
1786 int srcSliceH, uint8_t* dst[], int dstStride[]){ 1786 int srcSliceH, uint8_t* dst[], int dstStride[]){
1787 1787
1788 int length= c->srcW; 1788 int length= c->srcW;
1789 int y= srcSliceY; 1789 int y= srcSliceY;
1790 int height= srcSliceH; 1790 int height= srcSliceH;
1791 int i, j; 1791 int i, j;
1792 uint16_t *srcPtr= src[0]; 1792 uint16_t *srcPtr= src[0];
1793 uint16_t *dstPtr= dst[0] + dstStride[0]*y/2; 1793 uint16_t *dstPtr= dst[0] + dstStride[0]*y/2;
1794 for(i=0; i<height; i++) 1794 for (i=0; i<height; i++)
1795 { 1795 {
1796 for(j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]); 1796 for (j=0; j<length; j++) dstPtr[j] = bswap_16(srcPtr[j]);
1797 srcPtr+= srcStride[0]/2; 1797 srcPtr+= srcStride[0]/2;
1798 dstPtr+= dstStride[0]/2; 1798 dstPtr+= dstStride[0]/2;
1799 } 1799 }
1800 return srcSliceH; 1800 return srcSliceH;
1801 } 1801 }
1802 1802
1803 1803
1804 static void getSubSampleFactors(int *h, int *v, int format){ 1804 static void getSubSampleFactors(int *h, int *v, int format){
1805 switch(format){ 1805 switch(format){
1806 case PIX_FMT_UYVY422: 1806 case PIX_FMT_UYVY422:
1807 case PIX_FMT_YUYV422: 1807 case PIX_FMT_YUYV422:
1808 *h=1; 1808 *h=1;
1809 *v=0; 1809 *v=0;
1810 break; 1810 break;
1811 case PIX_FMT_YUV420P: 1811 case PIX_FMT_YUV420P:
1812 case PIX_FMT_GRAY16BE: 1812 case PIX_FMT_GRAY16BE:
1813 case PIX_FMT_GRAY16LE: 1813 case PIX_FMT_GRAY16LE:
1814 case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented 1814 case PIX_FMT_GRAY8: //FIXME remove after different subsamplings are fully implemented
1815 case PIX_FMT_NV12: 1815 case PIX_FMT_NV12:
1816 case PIX_FMT_NV21: 1816 case PIX_FMT_NV21:
1817 *h=1; 1817 *h=1;
1818 *v=1; 1818 *v=1;
1819 break; 1819 break;
1820 case PIX_FMT_YUV410P: 1820 case PIX_FMT_YUV410P:
1821 *h=2; 1821 *h=2;
1822 *v=2; 1822 *v=2;
1823 break; 1823 break;
1824 case PIX_FMT_YUV444P: 1824 case PIX_FMT_YUV444P:
1825 *h=0; 1825 *h=0;
1826 *v=0; 1826 *v=0;
1827 break; 1827 break;
1828 case PIX_FMT_YUV422P: 1828 case PIX_FMT_YUV422P:
1829 *h=1; 1829 *h=1;
1830 *v=0; 1830 *v=0;
1831 break; 1831 break;
1832 case PIX_FMT_YUV411P: 1832 case PIX_FMT_YUV411P:
1833 *h=2; 1833 *h=2;
1834 *v=0; 1834 *v=0;
1835 break; 1835 break;
1836 default: 1836 default:
1837 *h=0; 1837 *h=0;
1838 *v=0; 1838 *v=0;
1839 break; 1839 break;
1840 } 1840 }
1841 } 1841 }
1842 1842
1843 static uint16_t roundToInt16(int64_t f){ 1843 static uint16_t roundToInt16(int64_t f){
1844 int r= (f + (1<<15))>>16; 1844 int r= (f + (1<<15))>>16;
1845 if(r<-0x7FFF) return 0x8000; 1845 if (r<-0x7FFF) return 0x8000;
1846 else if(r> 0x7FFF) return 0x7FFF; 1846 else if (r> 0x7FFF) return 0x7FFF;
1847 else return r; 1847 else return r;
1848 } 1848 }
1849 1849
1850 /** 1850 /**
1851 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x] 1851 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x]
1852 * @param fullRange if 1 then the luma range is 0..255 if 0 its 16..235 1852 * @param fullRange if 1 then the luma range is 0..255 if 0 its 16..235
1853 * @return -1 if not supported 1853 * @return -1 if not supported
1854 */ 1854 */
1855 int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){ 1855 int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
1856 int64_t crv = inv_table[0]; 1856 int64_t crv = inv_table[0];
1857 int64_t cbu = inv_table[1]; 1857 int64_t cbu = inv_table[1];
1858 int64_t cgu = -inv_table[2]; 1858 int64_t cgu = -inv_table[2];
1859 int64_t cgv = -inv_table[3]; 1859 int64_t cgv = -inv_table[3];
1860 int64_t cy = 1<<16; 1860 int64_t cy = 1<<16;
1861 int64_t oy = 0; 1861 int64_t oy = 0;
1862 1862
1863 if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; 1863 if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1864 memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4); 1864 memcpy(c->srcColorspaceTable, inv_table, sizeof(int)*4);
1865 memcpy(c->dstColorspaceTable, table, sizeof(int)*4); 1865 memcpy(c->dstColorspaceTable, table, sizeof(int)*4);
1866 1866
1867 c->brightness= brightness; 1867 c->brightness= brightness;
1868 c->contrast = contrast; 1868 c->contrast = contrast;
1869 c->saturation= saturation; 1869 c->saturation= saturation;
1870 c->srcRange = srcRange; 1870 c->srcRange = srcRange;
1871 c->dstRange = dstRange; 1871 c->dstRange = dstRange;
1872 1872
1873 c->uOffset= 0x0400040004000400LL; 1873 c->uOffset= 0x0400040004000400LL;
1874 c->vOffset= 0x0400040004000400LL; 1874 c->vOffset= 0x0400040004000400LL;
1875 1875
1876 if(!srcRange){ 1876 if (!srcRange){
1877 cy= (cy*255) / 219; 1877 cy= (cy*255) / 219;
1878 oy= 16<<16; 1878 oy= 16<<16;
1879 }else{ 1879 }else{
1880 crv= (crv*224) / 255; 1880 crv= (crv*224) / 255;
1881 cbu= (cbu*224) / 255; 1881 cbu= (cbu*224) / 255;
1882 cgu= (cgu*224) / 255; 1882 cgu= (cgu*224) / 255;
1883 cgv= (cgv*224) / 255; 1883 cgv= (cgv*224) / 255;
1884 } 1884 }
1885 1885
1886 cy = (cy *contrast )>>16; 1886 cy = (cy *contrast )>>16;
1887 crv= (crv*contrast * saturation)>>32; 1887 crv= (crv*contrast * saturation)>>32;
1888 cbu= (cbu*contrast * saturation)>>32; 1888 cbu= (cbu*contrast * saturation)>>32;
1889 cgu= (cgu*contrast * saturation)>>32; 1889 cgu= (cgu*contrast * saturation)>>32;
1890 cgv= (cgv*contrast * saturation)>>32; 1890 cgv= (cgv*contrast * saturation)>>32;
1891 1891
1892 oy -= 256*brightness; 1892 oy -= 256*brightness;
1893 1893
1894 c->yCoeff= roundToInt16(cy *8192) * 0x0001000100010001ULL; 1894 c->yCoeff= roundToInt16(cy *8192) * 0x0001000100010001ULL;
1895 c->vrCoeff= roundToInt16(crv*8192) * 0x0001000100010001ULL; 1895 c->vrCoeff= roundToInt16(crv*8192) * 0x0001000100010001ULL;
1896 c->ubCoeff= roundToInt16(cbu*8192) * 0x0001000100010001ULL; 1896 c->ubCoeff= roundToInt16(cbu*8192) * 0x0001000100010001ULL;
1897 c->vgCoeff= roundToInt16(cgv*8192) * 0x0001000100010001ULL; 1897 c->vgCoeff= roundToInt16(cgv*8192) * 0x0001000100010001ULL;
1898 c->ugCoeff= roundToInt16(cgu*8192) * 0x0001000100010001ULL; 1898 c->ugCoeff= roundToInt16(cgu*8192) * 0x0001000100010001ULL;
1899 c->yOffset= roundToInt16(oy * 8) * 0x0001000100010001ULL; 1899 c->yOffset= roundToInt16(oy * 8) * 0x0001000100010001ULL;
1900 1900
1901 yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation); 1901 yuv2rgb_c_init_tables(c, inv_table, srcRange, brightness, contrast, saturation);
1902 //FIXME factorize 1902 //FIXME factorize
1903 1903
1904 #ifdef COMPILE_ALTIVEC 1904 #ifdef COMPILE_ALTIVEC
1905 if (c->flags & SWS_CPU_CAPS_ALTIVEC) 1905 if (c->flags & SWS_CPU_CAPS_ALTIVEC)
1906 yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation); 1906 yuv2rgb_altivec_init_tables (c, inv_table, brightness, contrast, saturation);
1907 #endif 1907 #endif
1908 return 0; 1908 return 0;
1909 } 1909 }
1910 1910
1911 /** 1911 /**
1912 * @return -1 if not supported 1912 * @return -1 if not supported
1913 */ 1913 */
1914 int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){ 1914 int sws_getColorspaceDetails(SwsContext *c, int **inv_table, int *srcRange, int **table, int *dstRange, int *brightness, int *contrast, int *saturation){
1915 if(isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1; 1915 if (isYUV(c->dstFormat) || isGray(c->dstFormat)) return -1;
1916 1916
1917 *inv_table = c->srcColorspaceTable; 1917 *inv_table = c->srcColorspaceTable;
1918 *table = c->dstColorspaceTable; 1918 *table = c->dstColorspaceTable;
1919 *srcRange = c->srcRange; 1919 *srcRange = c->srcRange;
1920 *dstRange = c->dstRange; 1920 *dstRange = c->dstRange;
1921 *brightness= c->brightness; 1921 *brightness= c->brightness;
1922 *contrast = c->contrast; 1922 *contrast = c->contrast;
1923 *saturation= c->saturation; 1923 *saturation= c->saturation;
1924 1924
1925 return 0; 1925 return 0;
1926 } 1926 }
1927 1927
1928 static int handle_jpeg(int *format) 1928 static int handle_jpeg(int *format)
1929 { 1929 {
1930 switch (*format) { 1930 switch (*format) {
1931 case PIX_FMT_YUVJ420P: 1931 case PIX_FMT_YUVJ420P:
1932 *format = PIX_FMT_YUV420P; 1932 *format = PIX_FMT_YUV420P;
1933 return 1; 1933 return 1;
1934 case PIX_FMT_YUVJ422P: 1934 case PIX_FMT_YUVJ422P:
1935 *format = PIX_FMT_YUV422P; 1935 *format = PIX_FMT_YUV422P;
1936 return 1; 1936 return 1;
1937 case PIX_FMT_YUVJ444P: 1937 case PIX_FMT_YUVJ444P:
1938 *format = PIX_FMT_YUV444P; 1938 *format = PIX_FMT_YUV444P;
1939 return 1; 1939 return 1;
1940 default: 1940 default:
1941 return 0; 1941 return 0;
1942 } 1942 }
1943 } 1943 }
1944 1944
1945 SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags, 1945 SwsContext *sws_getContext(int srcW, int srcH, int srcFormat, int dstW, int dstH, int dstFormat, int flags,
1946 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){ 1946 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param){
1947 1947
1948 SwsContext *c; 1948 SwsContext *c;
1949 int i; 1949 int i;
1950 int usesVFilter, usesHFilter; 1950 int usesVFilter, usesHFilter;
1951 int unscaled, needsDither; 1951 int unscaled, needsDither;
1952 int srcRange, dstRange; 1952 int srcRange, dstRange;
1953 SwsFilter dummyFilter= {NULL, NULL, NULL, NULL}; 1953 SwsFilter dummyFilter= {NULL, NULL, NULL, NULL};
1954 #if defined(ARCH_X86) 1954 #if defined(ARCH_X86)
1955 if(flags & SWS_CPU_CAPS_MMX) 1955 if (flags & SWS_CPU_CAPS_MMX)
1956 asm volatile("emms\n\t"::: "memory"); 1956 asm volatile("emms\n\t"::: "memory");
1957 #endif 1957 #endif
1958 1958
1959 #if !defined(RUNTIME_CPUDETECT) || !defined (CONFIG_GPL) //ensure that the flags match the compiled variant if cpudetect is off 1959 #if !defined(RUNTIME_CPUDETECT) || !defined (CONFIG_GPL) //ensure that the flags match the compiled variant if cpudetect is off
1960 flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC); 1960 flags &= ~(SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2|SWS_CPU_CAPS_3DNOW|SWS_CPU_CAPS_ALTIVEC);
1961 #ifdef HAVE_MMX2 1961 #ifdef HAVE_MMX2
1962 flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2; 1962 flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_MMX2;
1963 #elif defined (HAVE_3DNOW) 1963 #elif defined (HAVE_3DNOW)
1964 flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW; 1964 flags |= SWS_CPU_CAPS_MMX|SWS_CPU_CAPS_3DNOW;
1965 #elif defined (HAVE_MMX) 1965 #elif defined (HAVE_MMX)
1966 flags |= SWS_CPU_CAPS_MMX; 1966 flags |= SWS_CPU_CAPS_MMX;
1967 #elif defined (HAVE_ALTIVEC) 1967 #elif defined (HAVE_ALTIVEC)
1968 flags |= SWS_CPU_CAPS_ALTIVEC; 1968 flags |= SWS_CPU_CAPS_ALTIVEC;
1969 #endif 1969 #endif
1970 #endif /* RUNTIME_CPUDETECT */ 1970 #endif /* RUNTIME_CPUDETECT */
1971 if(clip_table[512] != 255) globalInit(); 1971 if (clip_table[512] != 255) globalInit();
1972 if(rgb15to16 == NULL) sws_rgb2rgb_init(flags); 1972 if (rgb15to16 == NULL) sws_rgb2rgb_init(flags);
1973 1973
1974 unscaled = (srcW == dstW && srcH == dstH); 1974 unscaled = (srcW == dstW && srcH == dstH);
1975 needsDither= (isBGR(dstFormat) || isRGB(dstFormat)) 1975 needsDither= (isBGR(dstFormat) || isRGB(dstFormat))
1976 && (fmt_depth(dstFormat))<24 1976 && (fmt_depth(dstFormat))<24
1977 && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat)))); 1977 && ((fmt_depth(dstFormat))<(fmt_depth(srcFormat)) || (!(isRGB(srcFormat) || isBGR(srcFormat))));
1978 1978
1979 srcRange = handle_jpeg(&srcFormat); 1979 srcRange = handle_jpeg(&srcFormat);
1980 dstRange = handle_jpeg(&dstFormat); 1980 dstRange = handle_jpeg(&dstFormat);
1981 1981
1982 if(!isSupportedIn(srcFormat)) 1982 if (!isSupportedIn(srcFormat))
1983 { 1983 {
1984 av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input format\n", sws_format_name(srcFormat)); 1984 av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as input format\n", sws_format_name(srcFormat));
1985 return NULL; 1985 return NULL;
1986 } 1986 }
1987 if(!isSupportedOut(dstFormat)) 1987 if (!isSupportedOut(dstFormat))
1988 { 1988 {
1989 av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output format\n", sws_format_name(dstFormat)); 1989 av_log(NULL, AV_LOG_ERROR, "swScaler: %s is not supported as output format\n", sws_format_name(dstFormat));
1990 return NULL; 1990 return NULL;
1991 } 1991 }
1992 1992
1993 /* sanity check */ 1993 /* sanity check */
1994 if(srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code 1994 if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
1995 { 1995 {
1996 av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", 1996 av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
1997 srcW, srcH, dstW, dstH); 1997 srcW, srcH, dstW, dstH);
1998 return NULL; 1998 return NULL;
1999 } 1999 }
2000 2000
2001 if(!dstFilter) dstFilter= &dummyFilter; 2001 if (!dstFilter) dstFilter= &dummyFilter;
2002 if(!srcFilter) srcFilter= &dummyFilter; 2002 if (!srcFilter) srcFilter= &dummyFilter;
2003 2003
2004 c= av_mallocz(sizeof(SwsContext)); 2004 c= av_mallocz(sizeof(SwsContext));
2005 2005
2006 c->av_class = &sws_context_class; 2006 c->av_class = &sws_context_class;
2007 c->srcW= srcW; 2007 c->srcW= srcW;
2008 c->srcH= srcH; 2008 c->srcH= srcH;
2009 c->dstW= dstW; 2009 c->dstW= dstW;
2010 c->dstH= dstH; 2010 c->dstH= dstH;
2011 c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW; 2011 c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
2012 c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH; 2012 c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
2013 c->flags= flags; 2013 c->flags= flags;
2014 c->dstFormat= dstFormat; 2014 c->dstFormat= dstFormat;
2015 c->srcFormat= srcFormat; 2015 c->srcFormat= srcFormat;
2016 c->vRounder= 4* 0x0001000100010001ULL; 2016 c->vRounder= 4* 0x0001000100010001ULL;
2017 2017
2018 usesHFilter= usesVFilter= 0; 2018 usesHFilter= usesVFilter= 0;
2019 if(dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1; 2019 if (dstFilter->lumV!=NULL && dstFilter->lumV->length>1) usesVFilter=1;
2020 if(dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1; 2020 if (dstFilter->lumH!=NULL && dstFilter->lumH->length>1) usesHFilter=1;
2021 if(dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1; 2021 if (dstFilter->chrV!=NULL && dstFilter->chrV->length>1) usesVFilter=1;
2022 if(dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1; 2022 if (dstFilter->chrH!=NULL && dstFilter->chrH->length>1) usesHFilter=1;
2023 if(srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1; 2023 if (srcFilter->lumV!=NULL && srcFilter->lumV->length>1) usesVFilter=1;
2024 if(srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1; 2024 if (srcFilter->lumH!=NULL && srcFilter->lumH->length>1) usesHFilter=1;
2025 if(srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1; 2025 if (srcFilter->chrV!=NULL && srcFilter->chrV->length>1) usesVFilter=1;
2026 if(srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1; 2026 if (srcFilter->chrH!=NULL && srcFilter->chrH->length>1) usesHFilter=1;
2027 2027
2028 getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat); 2028 getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2029 getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat); 2029 getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2030 2030
2031 // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation 2031 // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation
2032 if((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1; 2032 if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2033 2033
2034 // drop some chroma lines if the user wants it 2034 // drop some chroma lines if the user wants it
2035 c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT; 2035 c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2036 c->chrSrcVSubSample+= c->vChrDrop; 2036 c->chrSrcVSubSample+= c->vChrDrop;
2037 2037
2038 // drop every 2. pixel for chroma calculation unless user wants full chroma 2038 // drop every 2. pixel for chroma calculation unless user wants full chroma
2039 if((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP) 2039 if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2040 && srcFormat!=PIX_FMT_RGB8 && srcFormat!=PIX_FMT_BGR8 2040 && srcFormat!=PIX_FMT_RGB8 && srcFormat!=PIX_FMT_BGR8
2041 && srcFormat!=PIX_FMT_RGB4 && srcFormat!=PIX_FMT_BGR4 2041 && srcFormat!=PIX_FMT_RGB4 && srcFormat!=PIX_FMT_BGR4
2042 && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE) 2042 && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE)
2043 c->chrSrcHSubSample=1; 2043 c->chrSrcHSubSample=1;
2044 2044
2045 if(param){ 2045 if (param){
2046 c->param[0] = param[0]; 2046 c->param[0] = param[0];
2047 c->param[1] = param[1]; 2047 c->param[1] = param[1];
2048 }else{ 2048 }else{
2049 c->param[0] = 2049 c->param[0] =
2050 c->param[1] = SWS_PARAM_DEFAULT; 2050 c->param[1] = SWS_PARAM_DEFAULT;
2051 } 2051 }
2052 2052
2053 c->chrIntHSubSample= c->chrDstHSubSample; 2053 c->chrIntHSubSample= c->chrDstHSubSample;
2054 c->chrIntVSubSample= c->chrSrcVSubSample; 2054 c->chrIntVSubSample= c->chrSrcVSubSample;
2055 2055
2056 // Note the -((-x)>>y) is so that we always round toward +inf. 2056 // Note the -((-x)>>y) is so that we always round toward +inf.
2057 c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample); 2057 c->chrSrcW= -((-srcW) >> c->chrSrcHSubSample);
2058 c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample); 2058 c->chrSrcH= -((-srcH) >> c->chrSrcVSubSample);
2059 c->chrDstW= -((-dstW) >> c->chrDstHSubSample); 2059 c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2060 c->chrDstH= -((-dstH) >> c->chrDstVSubSample); 2060 c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2061 2061
2062 sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16); 2062 sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2063 2063
2064 /* unscaled special Cases */ 2064 /* unscaled special Cases */
2065 if(unscaled && !usesHFilter && !usesVFilter) 2065 if (unscaled && !usesHFilter && !usesVFilter)
2066 { 2066 {
2067 /* yv12_to_nv12 */ 2067 /* yv12_to_nv12 */
2068 if(srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) 2068 if (srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
2069 { 2069 {
2070 c->swScale= PlanarToNV12Wrapper; 2070 c->swScale= PlanarToNV12Wrapper;
2071 } 2071 }
2072 #ifdef CONFIG_GPL 2072 #ifdef CONFIG_GPL
2073 /* yuv2bgr */ 2073 /* yuv2bgr */
2074 if((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P) && (isBGR(dstFormat) || isRGB(dstFormat))) 2074 if ((srcFormat==PIX_FMT_YUV420P || srcFormat==PIX_FMT_YUV422P) && (isBGR(dstFormat) || isRGB(dstFormat)))
2075 { 2075 {
2076 c->swScale= yuv2rgb_get_func_ptr(c); 2076 c->swScale= yuv2rgb_get_func_ptr(c);
2077 } 2077 }
2078 #endif 2078 #endif
2079 2079
2080 if( srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P ) 2080 if ( srcFormat==PIX_FMT_YUV410P && dstFormat==PIX_FMT_YUV420P )
2081 { 2081 {
2082 c->swScale= yvu9toyv12Wrapper; 2082 c->swScale= yvu9toyv12Wrapper;
2083 } 2083 }
2084 2084
2085 /* bgr24toYV12 */ 2085 /* bgr24toYV12 */
2086 if(srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P) 2086 if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P)
2087 c->swScale= bgr24toyv12Wrapper; 2087 c->swScale= bgr24toyv12Wrapper;
2088 2088
2089 /* rgb/bgr -> rgb/bgr (no dither needed forms) */ 2089 /* rgb/bgr -> rgb/bgr (no dither needed forms) */
2090 if( (isBGR(srcFormat) || isRGB(srcFormat)) 2090 if ( (isBGR(srcFormat) || isRGB(srcFormat))
2091 && (isBGR(dstFormat) || isRGB(dstFormat)) 2091 && (isBGR(dstFormat) || isRGB(dstFormat))
2092 && srcFormat != PIX_FMT_BGR8 && dstFormat != PIX_FMT_BGR8 2092 && srcFormat != PIX_FMT_BGR8 && dstFormat != PIX_FMT_BGR8
2093 && srcFormat != PIX_FMT_RGB8 && dstFormat != PIX_FMT_RGB8 2093 && srcFormat != PIX_FMT_RGB8 && dstFormat != PIX_FMT_RGB8
2094 && srcFormat != PIX_FMT_BGR4 && dstFormat != PIX_FMT_BGR4 2094 && srcFormat != PIX_FMT_BGR4 && dstFormat != PIX_FMT_BGR4
2095 && srcFormat != PIX_FMT_RGB4 && dstFormat != PIX_FMT_RGB4 2095 && srcFormat != PIX_FMT_RGB4 && dstFormat != PIX_FMT_RGB4
2096 && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE 2096 && srcFormat != PIX_FMT_BGR4_BYTE && dstFormat != PIX_FMT_BGR4_BYTE
2097 && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE 2097 && srcFormat != PIX_FMT_RGB4_BYTE && dstFormat != PIX_FMT_RGB4_BYTE
2098 && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK 2098 && srcFormat != PIX_FMT_MONOBLACK && dstFormat != PIX_FMT_MONOBLACK
2099 && !needsDither) 2099 && !needsDither)
2100 c->swScale= rgb2rgbWrapper; 2100 c->swScale= rgb2rgbWrapper;
2101 2101
2102 /* LQ converters if -sws 0 or -sws 4*/ 2102 /* LQ converters if -sws 0 or -sws 4*/
2103 if(c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){ 2103 if (c->flags&(SWS_FAST_BILINEAR|SWS_POINT)){
2104 /* rgb/bgr -> rgb/bgr (dither needed forms) */ 2104 /* rgb/bgr -> rgb/bgr (dither needed forms) */
2105 if( (isBGR(srcFormat) || isRGB(srcFormat)) 2105 if ( (isBGR(srcFormat) || isRGB(srcFormat))
2106 && (isBGR(dstFormat) || isRGB(dstFormat)) 2106 && (isBGR(dstFormat) || isRGB(dstFormat))
2107 && needsDither) 2107 && needsDither)
2108 c->swScale= rgb2rgbWrapper; 2108 c->swScale= rgb2rgbWrapper;
2109 2109
2110 /* yv12_to_yuy2 */ 2110 /* yv12_to_yuy2 */
2111 if(srcFormat == PIX_FMT_YUV420P && 2111 if (srcFormat == PIX_FMT_YUV420P &&
2112 (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422)) 2112 (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422))
2113 { 2113 {
2114 if (dstFormat == PIX_FMT_YUYV422) 2114 if (dstFormat == PIX_FMT_YUYV422)
2115 c->swScale= PlanarToYuy2Wrapper; 2115 c->swScale= PlanarToYuy2Wrapper;
2116 else 2116 else
2117 c->swScale= PlanarToUyvyWrapper; 2117 c->swScale= PlanarToUyvyWrapper;
2118 } 2118 }
2119 } 2119 }
2120 2120
2121 #ifdef COMPILE_ALTIVEC 2121 #ifdef COMPILE_ALTIVEC
2122 if ((c->flags & SWS_CPU_CAPS_ALTIVEC) && 2122 if ((c->flags & SWS_CPU_CAPS_ALTIVEC) &&
2123 ((srcFormat == PIX_FMT_YUV420P && 2123 ((srcFormat == PIX_FMT_YUV420P &&
2124 (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422)))) { 2124 (dstFormat == PIX_FMT_YUYV422 || dstFormat == PIX_FMT_UYVY422)))) {
2125 // unscaled YV12 -> packed YUV, we want speed 2125 // unscaled YV12 -> packed YUV, we want speed
2126 if (dstFormat == PIX_FMT_YUYV422) 2126 if (dstFormat == PIX_FMT_YUYV422)
2127 c->swScale= yv12toyuy2_unscaled_altivec; 2127 c->swScale= yv12toyuy2_unscaled_altivec;
2128 else 2128 else
2129 c->swScale= yv12touyvy_unscaled_altivec; 2129 c->swScale= yv12touyvy_unscaled_altivec;
2130 } 2130 }
2131 #endif 2131 #endif
2132 2132
2133 /* simple copy */ 2133 /* simple copy */
2134 if( srcFormat == dstFormat 2134 if ( srcFormat == dstFormat
2135 || (isPlanarYUV(srcFormat) && isGray(dstFormat)) 2135 || (isPlanarYUV(srcFormat) && isGray(dstFormat))
2136 || (isPlanarYUV(dstFormat) && isGray(srcFormat)) 2136 || (isPlanarYUV(dstFormat) && isGray(srcFormat)) )
2137 ) 2137 {
2138 { 2138 c->swScale= simpleCopy;
2139 c->swScale= simpleCopy; 2139 }
2140 } 2140
2141 2141 /* gray16{le,be} conversions */
2142 /* gray16{le,be} conversions */ 2142 if (isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8)))
2143 if(isGray16(srcFormat) && (isPlanarYUV(dstFormat) || (dstFormat == PIX_FMT_GRAY8))) 2143 {
2144 { 2144 c->swScale= gray16togray;
2145 c->swScale= gray16togray; 2145 }
2146 } 2146 if ((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat))
2147 if((isPlanarYUV(srcFormat) || (srcFormat == PIX_FMT_GRAY8)) && isGray16(dstFormat)) 2147 {
2148 { 2148 c->swScale= graytogray16;
2149 c->swScale= graytogray16; 2149 }
2150 } 2150 if (srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat))
2151 if(srcFormat != dstFormat && isGray16(srcFormat) && isGray16(dstFormat)) 2151 {
2152 { 2152 c->swScale= gray16swap;
2153 c->swScale= gray16swap; 2153 }
2154 } 2154
2155 2155 if (c->swScale){
2156 if(c->swScale){ 2156 if (flags&SWS_PRINT_INFO)
2157 if(flags&SWS_PRINT_INFO) 2157 av_log(c, AV_LOG_INFO, "SwScaler: using unscaled %s -> %s special converter\n",
2158 av_log(c, AV_LOG_INFO, "SwScaler: using unscaled %s -> %s special converter\n", 2158 sws_format_name(srcFormat), sws_format_name(dstFormat));
2159 sws_format_name(srcFormat), sws_format_name(dstFormat)); 2159 return c;
2160 return c; 2160 }
2161 } 2161 }
2162 } 2162
2163 2163 if (flags & SWS_CPU_CAPS_MMX2)
2164 if(flags & SWS_CPU_CAPS_MMX2) 2164 {
2165 { 2165 c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2166 c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; 2166 if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2167 if(!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) 2167 {
2168 { 2168 if (flags&SWS_PRINT_INFO)
2169 if(flags&SWS_PRINT_INFO) 2169 av_log(c, AV_LOG_INFO, "SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n");
2170 av_log(c, AV_LOG_INFO, "SwScaler: output Width is not a multiple of 32 -> no MMX2 scaler\n"); 2170 }
2171 } 2171 if (usesHFilter) c->canMMX2BeUsed=0;
2172 if(usesHFilter) c->canMMX2BeUsed=0; 2172 }
2173 } 2173 else
2174 else 2174 c->canMMX2BeUsed=0;
2175 c->canMMX2BeUsed=0; 2175
2176 2176 c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
2177 c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW; 2177 c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
2178 c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH; 2178
2179 2179 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
2180 // match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst 2180 // but only for the FAST_BILINEAR mode otherwise do correct scaling
2181 // but only for the FAST_BILINEAR mode otherwise do correct scaling 2181 // n-2 is the last chrominance sample available
2182 // n-2 is the last chrominance sample available 2182 // this is not perfect, but noone shuld notice the difference, the more correct variant
2183 // this is not perfect, but noone shuld notice the difference, the more correct variant 2183 // would be like the vertical one, but that would require some special code for the
2184 // would be like the vertical one, but that would require some special code for the 2184 // first and last pixel
2185 // first and last pixel 2185 if (flags&SWS_FAST_BILINEAR)
2186 if(flags&SWS_FAST_BILINEAR) 2186 {
2187 { 2187 if (c->canMMX2BeUsed)
2188 if(c->canMMX2BeUsed) 2188 {
2189 { 2189 c->lumXInc+= 20;
2190 c->lumXInc+= 20; 2190 c->chrXInc+= 20;
2191 c->chrXInc+= 20; 2191 }
2192 } 2192 //we don't use the x86asm scaler if mmx is available
2193 //we don't use the x86asm scaler if mmx is available 2193 else if (flags & SWS_CPU_CAPS_MMX)
2194 else if(flags & SWS_CPU_CAPS_MMX) 2194 {
2195 { 2195 c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2196 c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; 2196 c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2197 c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20; 2197 }
2198 } 2198 }
2199 } 2199
2200 2200 /* precalculate horizontal scaler filter coefficients */
2201 /* precalculate horizontal scaler filter coefficients */ 2201 {
2202 { 2202 const int filterAlign=
2203 const int filterAlign= 2203 (flags & SWS_CPU_CAPS_MMX) ? 4 :
2204 (flags & SWS_CPU_CAPS_MMX) ? 4 : 2204 (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2205 (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 : 2205 1;
2206 1; 2206
2207 2207 initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc,
2208 initFilter(&c->hLumFilter, &c->hLumFilterPos, &c->hLumFilterSize, c->lumXInc, 2208 srcW , dstW, filterAlign, 1<<14,
2209 srcW , dstW, filterAlign, 1<<14, 2209 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
2210 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, 2210 srcFilter->lumH, dstFilter->lumH, c->param);
2211 srcFilter->lumH, dstFilter->lumH, c->param); 2211 initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc,
2212 initFilter(&c->hChrFilter, &c->hChrFilterPos, &c->hChrFilterSize, c->chrXInc, 2212 c->chrSrcW, c->chrDstW, filterAlign, 1<<14,
2213 c->chrSrcW, c->chrDstW, filterAlign, 1<<14, 2213 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2214 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, 2214 srcFilter->chrH, dstFilter->chrH, c->param);
2215 srcFilter->chrH, dstFilter->chrH, c->param);
2216 2215
2217 #define MAX_FUNNY_CODE_SIZE 10000 2216 #define MAX_FUNNY_CODE_SIZE 10000
2218 #if defined(COMPILE_MMX2) 2217 #if defined(COMPILE_MMX2)
2219 // can't downscale !!! 2218 // can't downscale !!!
2220 if(c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR)) 2219 if (c->canMMX2BeUsed && (flags & SWS_FAST_BILINEAR))
2221 { 2220 {
2222 #ifdef MAP_ANONYMOUS 2221 #ifdef MAP_ANONYMOUS
2223 c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); 2222 c->funnyYCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2224 c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); 2223 c->funnyUVCode = (uint8_t*)mmap(NULL, MAX_FUNNY_CODE_SIZE, PROT_EXEC | PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
2225 #else 2224 #else
2226 c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE); 2225 c->funnyYCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2227 c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE); 2226 c->funnyUVCode = av_malloc(MAX_FUNNY_CODE_SIZE);
2228 #endif 2227 #endif
2229 2228
2230 c->lumMmx2Filter = av_malloc((dstW /8+8)*sizeof(int16_t)); 2229 c->lumMmx2Filter = av_malloc((dstW /8+8)*sizeof(int16_t));
2231 c->chrMmx2Filter = av_malloc((c->chrDstW /4+8)*sizeof(int16_t)); 2230 c->chrMmx2Filter = av_malloc((c->chrDstW /4+8)*sizeof(int16_t));
2232 c->lumMmx2FilterPos= av_malloc((dstW /2/8+8)*sizeof(int32_t)); 2231 c->lumMmx2FilterPos= av_malloc((dstW /2/8+8)*sizeof(int32_t));
2233 c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t)); 2232 c->chrMmx2FilterPos= av_malloc((c->chrDstW/2/4+8)*sizeof(int32_t));
2234 2233
2235 initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8); 2234 initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2236 initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4); 2235 initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2237 } 2236 }
2238 #endif /* defined(COMPILE_MMX2) */ 2237 #endif /* defined(COMPILE_MMX2) */
2239 } // Init Horizontal stuff 2238 } // Init Horizontal stuff
2240 2239
2241 2240
2242 2241
2243 /* precalculate vertical scaler filter coefficients */ 2242 /* precalculate vertical scaler filter coefficients */
2244 { 2243 {
2245 const int filterAlign= 2244 const int filterAlign=
2246 (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 : 2245 (flags & SWS_CPU_CAPS_MMX) && (flags & SWS_ACCURATE_RND) ? 2 :
2247 (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 : 2246 (flags & SWS_CPU_CAPS_ALTIVEC) ? 8 :
2248 1; 2247 1;
2249 2248
2250 initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc, 2249 initFilter(&c->vLumFilter, &c->vLumFilterPos, &c->vLumFilterSize, c->lumYInc,
2251 srcH , dstH, filterAlign, (1<<12)-4, 2250 srcH , dstH, filterAlign, (1<<12)-4,
2252 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags, 2251 (flags&SWS_BICUBLIN) ? (flags|SWS_BICUBIC) : flags,
2253 srcFilter->lumV, dstFilter->lumV, c->param); 2252 srcFilter->lumV, dstFilter->lumV, c->param);
2254 initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc, 2253 initFilter(&c->vChrFilter, &c->vChrFilterPos, &c->vChrFilterSize, c->chrYInc,
2255 c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4, 2254 c->chrSrcH, c->chrDstH, filterAlign, (1<<12)-4,
2256 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags, 2255 (flags&SWS_BICUBLIN) ? (flags|SWS_BILINEAR) : flags,
2257 srcFilter->chrV, dstFilter->chrV, c->param); 2256 srcFilter->chrV, dstFilter->chrV, c->param);
2258 2257
2259 #ifdef HAVE_ALTIVEC 2258 #ifdef HAVE_ALTIVEC
2260 c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH); 2259 c->vYCoeffsBank = av_malloc(sizeof (vector signed short)*c->vLumFilterSize*c->dstH);
2261 c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH); 2260 c->vCCoeffsBank = av_malloc(sizeof (vector signed short)*c->vChrFilterSize*c->chrDstH);
2262 2261
2263 for (i=0;i<c->vLumFilterSize*c->dstH;i++) { 2262 for (i=0;i<c->vLumFilterSize*c->dstH;i++) {
2264 int j; 2263 int j;
2265 short *p = (short *)&c->vYCoeffsBank[i]; 2264 short *p = (short *)&c->vYCoeffsBank[i];
2266 for (j=0;j<8;j++) 2265 for (j=0;j<8;j++)
2267 p[j] = c->vLumFilter[i]; 2266 p[j] = c->vLumFilter[i];
2268 } 2267 }
2269 2268
2270 for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) { 2269 for (i=0;i<c->vChrFilterSize*c->chrDstH;i++) {
2271 int j; 2270 int j;
2272 short *p = (short *)&c->vCCoeffsBank[i]; 2271 short *p = (short *)&c->vCCoeffsBank[i];
2273 for (j=0;j<8;j++) 2272 for (j=0;j<8;j++)
2274 p[j] = c->vChrFilter[i]; 2273 p[j] = c->vChrFilter[i];
2275 } 2274 }
2276 #endif 2275 #endif
2277 } 2276 }
2278 2277
2279 // Calculate Buffer Sizes so that they won't run out while handling these damn slices 2278 // Calculate Buffer Sizes so that they won't run out while handling these damn slices
2280 c->vLumBufSize= c->vLumFilterSize; 2279 c->vLumBufSize= c->vLumFilterSize;
2281 c->vChrBufSize= c->vChrFilterSize; 2280 c->vChrBufSize= c->vChrFilterSize;
2282 for(i=0; i<dstH; i++) 2281 for (i=0; i<dstH; i++)
2283 { 2282 {
2284 int chrI= i*c->chrDstH / dstH; 2283 int chrI= i*c->chrDstH / dstH;
2285 int nextSlice= FFMAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1, 2284 int nextSlice= FFMAX(c->vLumFilterPos[i ] + c->vLumFilterSize - 1,
2286 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample)); 2285 ((c->vChrFilterPos[chrI] + c->vChrFilterSize - 1)<<c->chrSrcVSubSample));
2287 2286
2288 nextSlice>>= c->chrSrcVSubSample; 2287 nextSlice>>= c->chrSrcVSubSample;
2289 nextSlice<<= c->chrSrcVSubSample; 2288 nextSlice<<= c->chrSrcVSubSample;
2290 if(c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice) 2289 if (c->vLumFilterPos[i ] + c->vLumBufSize < nextSlice)
2291 c->vLumBufSize= nextSlice - c->vLumFilterPos[i ]; 2290 c->vLumBufSize= nextSlice - c->vLumFilterPos[i ];
2292 if(c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample)) 2291 if (c->vChrFilterPos[chrI] + c->vChrBufSize < (nextSlice>>c->chrSrcVSubSample))
2293 c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI]; 2292 c->vChrBufSize= (nextSlice>>c->chrSrcVSubSample) - c->vChrFilterPos[chrI];
2294 } 2293 }
2295 2294
2296 // allocate pixbufs (we use dynamic allocation because otherwise we would need to 2295 // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2297 c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*)); 2296 c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2298 c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*)); 2297 c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2299 //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000) 2298 //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000)
2300 /* align at 16 bytes for AltiVec */ 2299 /* align at 16 bytes for AltiVec */
2301 for(i=0; i<c->vLumBufSize; i++) 2300 for (i=0; i<c->vLumBufSize; i++)
2302 c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(4000); 2301 c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(4000);
2303 for(i=0; i<c->vChrBufSize; i++) 2302 for (i=0; i<c->vChrBufSize; i++)
2304 c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc(8000); 2303 c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc(8000);
2305 2304
2306 //try to avoid drawing green stuff between the right end and the stride end 2305 //try to avoid drawing green stuff between the right end and the stride end
2307 for(i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000); 2306 for (i=0; i<c->vChrBufSize; i++) memset(c->chrPixBuf[i], 64, 8000);
2308 2307
2309 ASSERT(c->chrDstH <= dstH) 2308 ASSERT(c->chrDstH <= dstH)
2310 2309
2311 if(flags&SWS_PRINT_INFO) 2310 if (flags&SWS_PRINT_INFO)
2312 { 2311 {
2313 #ifdef DITHER1XBPP 2312 #ifdef DITHER1XBPP
2314 char *dither= " dithered"; 2313 char *dither= " dithered";
2315 #else 2314 #else
2316 char *dither= ""; 2315 char *dither= "";
2317 #endif 2316 #endif
2318 if(flags&SWS_FAST_BILINEAR) 2317 if (flags&SWS_FAST_BILINEAR)
2319 av_log(c, AV_LOG_INFO, "SwScaler: FAST_BILINEAR scaler, "); 2318 av_log(c, AV_LOG_INFO, "SwScaler: FAST_BILINEAR scaler, ");
2320 else if(flags&SWS_BILINEAR) 2319 else if (flags&SWS_BILINEAR)
2321 av_log(c, AV_LOG_INFO, "SwScaler: BILINEAR scaler, "); 2320 av_log(c, AV_LOG_INFO, "SwScaler: BILINEAR scaler, ");
2322 else if(flags&SWS_BICUBIC) 2321 else if (flags&SWS_BICUBIC)
2323 av_log(c, AV_LOG_INFO, "SwScaler: BICUBIC scaler, "); 2322 av_log(c, AV_LOG_INFO, "SwScaler: BICUBIC scaler, ");
2324 else if(flags&SWS_X) 2323 else if (flags&SWS_X)
2325 av_log(c, AV_LOG_INFO, "SwScaler: Experimental scaler, "); 2324 av_log(c, AV_LOG_INFO, "SwScaler: Experimental scaler, ");
2326 else if(flags&SWS_POINT) 2325 else if (flags&SWS_POINT)
2327 av_log(c, AV_LOG_INFO, "SwScaler: Nearest Neighbor / POINT scaler, "); 2326 av_log(c, AV_LOG_INFO, "SwScaler: Nearest Neighbor / POINT scaler, ");
2328 else if(flags&SWS_AREA) 2327 else if (flags&SWS_AREA)
2329 av_log(c, AV_LOG_INFO, "SwScaler: Area Averageing scaler, "); 2328 av_log(c, AV_LOG_INFO, "SwScaler: Area Averageing scaler, ");
2330 else if(flags&SWS_BICUBLIN) 2329 else if (flags&SWS_BICUBLIN)
2331 av_log(c, AV_LOG_INFO, "SwScaler: luma BICUBIC / chroma BILINEAR scaler, "); 2330 av_log(c, AV_LOG_INFO, "SwScaler: luma BICUBIC / chroma BILINEAR scaler, ");
2332 else if(flags&SWS_GAUSS) 2331 else if (flags&SWS_GAUSS)
2333 av_log(c, AV_LOG_INFO, "SwScaler: Gaussian scaler, "); 2332 av_log(c, AV_LOG_INFO, "SwScaler: Gaussian scaler, ");
2334 else if(flags&SWS_SINC) 2333 else if (flags&SWS_SINC)
2335 av_log(c, AV_LOG_INFO, "SwScaler: Sinc scaler, "); 2334 av_log(c, AV_LOG_INFO, "SwScaler: Sinc scaler, ");
2336 else if(flags&SWS_LANCZOS) 2335 else if (flags&SWS_LANCZOS)
2337 av_log(c, AV_LOG_INFO, "SwScaler: Lanczos scaler, "); 2336 av_log(c, AV_LOG_INFO, "SwScaler: Lanczos scaler, ");
2338 else if(flags&SWS_SPLINE) 2337 else if (flags&SWS_SPLINE)
2339 av_log(c, AV_LOG_INFO, "SwScaler: Bicubic spline scaler, "); 2338 av_log(c, AV_LOG_INFO, "SwScaler: Bicubic spline scaler, ");
2340 else 2339 else
2341 av_log(c, AV_LOG_INFO, "SwScaler: ehh flags invalid?! "); 2340 av_log(c, AV_LOG_INFO, "SwScaler: ehh flags invalid?! ");
2342 2341
2343 if(dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565) 2342 if (dstFormat==PIX_FMT_BGR555 || dstFormat==PIX_FMT_BGR565)
2344 av_log(c, AV_LOG_INFO, "from %s to%s %s ", 2343 av_log(c, AV_LOG_INFO, "from %s to%s %s ",
2345 sws_format_name(srcFormat), dither, sws_format_name(dstFormat)); 2344 sws_format_name(srcFormat), dither, sws_format_name(dstFormat));
2346 else 2345 else
2347 av_log(c, AV_LOG_INFO, "from %s to %s ", 2346 av_log(c, AV_LOG_INFO, "from %s to %s ",
2348 sws_format_name(srcFormat), sws_format_name(dstFormat)); 2347 sws_format_name(srcFormat), sws_format_name(dstFormat));
2349 2348
2350 if(flags & SWS_CPU_CAPS_MMX2) 2349 if (flags & SWS_CPU_CAPS_MMX2)
2351 av_log(c, AV_LOG_INFO, "using MMX2\n"); 2350 av_log(c, AV_LOG_INFO, "using MMX2\n");
2352 else if(flags & SWS_CPU_CAPS_3DNOW) 2351 else if (flags & SWS_CPU_CAPS_3DNOW)
2353 av_log(c, AV_LOG_INFO, "using 3DNOW\n"); 2352 av_log(c, AV_LOG_INFO, "using 3DNOW\n");
2354 else if(flags & SWS_CPU_CAPS_MMX) 2353 else if (flags & SWS_CPU_CAPS_MMX)
2355 av_log(c, AV_LOG_INFO, "using MMX\n"); 2354 av_log(c, AV_LOG_INFO, "using MMX\n");
2356 else if(flags & SWS_CPU_CAPS_ALTIVEC) 2355 else if (flags & SWS_CPU_CAPS_ALTIVEC)
2357 av_log(c, AV_LOG_INFO, "using AltiVec\n"); 2356 av_log(c, AV_LOG_INFO, "using AltiVec\n");
2358 else 2357 else
2359 av_log(c, AV_LOG_INFO, "using C\n"); 2358 av_log(c, AV_LOG_INFO, "using C\n");
2360 } 2359 }
2361 2360
2362 if(flags & SWS_PRINT_INFO) 2361 if (flags & SWS_PRINT_INFO)
2363 { 2362 {
2364 if(flags & SWS_CPU_CAPS_MMX) 2363 if (flags & SWS_CPU_CAPS_MMX)
2365 { 2364 {
2366 if(c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR)) 2365 if (c->canMMX2BeUsed && (flags&SWS_FAST_BILINEAR))
2367 av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n"); 2366 av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR MMX2 scaler for horizontal scaling\n");
2368 else 2367 else
2369 { 2368 {
2370 if(c->hLumFilterSize==4) 2369 if (c->hLumFilterSize==4)
2371 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n"); 2370 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal luminance scaling\n");
2372 else if(c->hLumFilterSize==8) 2371 else if (c->hLumFilterSize==8)
2373 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n"); 2372 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal luminance scaling\n");
2374 else 2373 else
2375 av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n"); 2374 av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal luminance scaling\n");
2376 2375
2377 if(c->hChrFilterSize==4) 2376 if (c->hChrFilterSize==4)
2378 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n"); 2377 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 4-tap MMX scaler for horizontal chrominance scaling\n");
2379 else if(c->hChrFilterSize==8) 2378 else if (c->hChrFilterSize==8)
2380 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n"); 2379 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 8-tap MMX scaler for horizontal chrominance scaling\n");
2381 else 2380 else
2382 av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n"); 2381 av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap MMX scaler for horizontal chrominance scaling\n");
2383 } 2382 }
2384 } 2383 }
2385 else 2384 else
2386 { 2385 {
2387 #if defined(ARCH_X86) 2386 #if defined(ARCH_X86)
2388 av_log(c, AV_LOG_VERBOSE, "SwScaler: using X86-Asm scaler for horizontal scaling\n"); 2387 av_log(c, AV_LOG_VERBOSE, "SwScaler: using X86-Asm scaler for horizontal scaling\n");
2389 #else 2388 #else
2390 if(flags & SWS_FAST_BILINEAR) 2389 if (flags & SWS_FAST_BILINEAR)
2391 av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n"); 2390 av_log(c, AV_LOG_VERBOSE, "SwScaler: using FAST_BILINEAR C scaler for horizontal scaling\n");
2392 else 2391 else
2393 av_log(c, AV_LOG_VERBOSE, "SwScaler: using C scaler for horizontal scaling\n"); 2392 av_log(c, AV_LOG_VERBOSE, "SwScaler: using C scaler for horizontal scaling\n");
2394 #endif 2393 #endif
2395 } 2394 }
2396 if(isPlanarYUV(dstFormat)) 2395 if (isPlanarYUV(dstFormat))
2397 { 2396 {
2398 if(c->vLumFilterSize==1) 2397 if (c->vLumFilterSize==1)
2399 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2398 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2400 else 2399 else
2401 av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2400 av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (YV12 like)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2402 } 2401 }
2403 else 2402 else
2404 { 2403 {
2405 if(c->vLumFilterSize==1 && c->vChrFilterSize==2) 2404 if (c->vLumFilterSize==1 && c->vChrFilterSize==2)
2406 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n" 2405 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 1-tap %s \"scaler\" for vertical luminance scaling (BGR)\n"
2407 "SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n",(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2406 "SwScaler: 2-tap scaler for vertical chrominance scaling (BGR)\n",(flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2408 else if(c->vLumFilterSize==2 && c->vChrFilterSize==2) 2407 else if (c->vLumFilterSize==2 && c->vChrFilterSize==2)
2409 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2408 av_log(c, AV_LOG_VERBOSE, "SwScaler: using 2-tap linear %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2410 else 2409 else
2411 av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2410 av_log(c, AV_LOG_VERBOSE, "SwScaler: using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2412 } 2411 }
2413 2412
2414 if(dstFormat==PIX_FMT_BGR24) 2413 if (dstFormat==PIX_FMT_BGR24)
2415 av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR24 Converter\n", 2414 av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR24 Converter\n",
2416 (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C")); 2415 (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2417 else if(dstFormat==PIX_FMT_RGB32) 2416 else if (dstFormat==PIX_FMT_RGB32)
2418 av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2417 av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2419 else if(dstFormat==PIX_FMT_BGR565) 2418 else if (dstFormat==PIX_FMT_BGR565)
2420 av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2419 av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2421 else if(dstFormat==PIX_FMT_BGR555) 2420 else if (dstFormat==PIX_FMT_BGR555)
2422 av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2421 av_log(c, AV_LOG_VERBOSE, "SwScaler: using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2423 2422
2424 av_log(c, AV_LOG_VERBOSE, "SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); 2423 av_log(c, AV_LOG_VERBOSE, "SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2425 } 2424 }
2426 if(flags & SWS_PRINT_INFO) 2425 if (flags & SWS_PRINT_INFO)
2427 { 2426 {
2428 av_log(c, AV_LOG_DEBUG, "SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", 2427 av_log(c, AV_LOG_DEBUG, "SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2429 c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); 2428 c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2430 av_log(c, AV_LOG_DEBUG, "SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", 2429 av_log(c, AV_LOG_DEBUG, "SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2431 c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc); 2430 c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2432 } 2431 }
2433 2432
2434 c->swScale= getSwsFunc(flags); 2433 c->swScale= getSwsFunc(flags);
2435 return c; 2434 return c;
2436 } 2435 }
2437 2436
2438 /** 2437 /**
2439 * swscale warper, so we don't need to export the SwsContext. 2438 * swscale warper, so we don't need to export the SwsContext.
2440 * assumes planar YUV to be in YUV order instead of YVU 2439 * assumes planar YUV to be in YUV order instead of YVU
2441 */ 2440 */
2442 int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 2441 int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2443 int srcSliceH, uint8_t* dst[], int dstStride[]){ 2442 int srcSliceH, uint8_t* dst[], int dstStride[]){
2444 int i; 2443 int i;
2445 uint8_t* src2[4]= {src[0], src[1], src[2]}; 2444 uint8_t* src2[4]= {src[0], src[1], src[2]};
2446 uint32_t pal[256]; 2445 uint32_t pal[256];
2447 if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) { 2446 if (c->sliceDir == 0 && srcSliceY != 0 && srcSliceY + srcSliceH != c->srcH) {
2448 av_log(c, AV_LOG_ERROR, "swScaler: slices start in the middle!\n"); 2447 av_log(c, AV_LOG_ERROR, "swScaler: slices start in the middle!\n");
2449 return 0; 2448 return 0;
2450 } 2449 }
2451 if (c->sliceDir == 0) { 2450 if (c->sliceDir == 0) {
2452 if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1; 2451 if (srcSliceY == 0) c->sliceDir = 1; else c->sliceDir = -1;
2453 } 2452 }
2454 2453
2455 if(c->srcFormat == PIX_FMT_PAL8){ 2454 if (c->srcFormat == PIX_FMT_PAL8){
2456 for(i=0; i<256; i++){ 2455 for (i=0; i<256; i++){
2457 int p= ((uint32_t*)(src[1]))[i]; 2456 int p= ((uint32_t*)(src[1]))[i];
2458 int r= (p>>16)&0xFF; 2457 int r= (p>>16)&0xFF;
2459 int g= (p>> 8)&0xFF; 2458 int g= (p>> 8)&0xFF;
2460 int b= p &0xFF; 2459 int b= p &0xFF;
2461 int y= av_clip_uint8(((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16 ); 2460 int y= av_clip_uint8(((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16 );
2462 int u= av_clip_uint8(((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128); 2461 int u= av_clip_uint8(((RU*r + GU*g + BU*b)>>RGB2YUV_SHIFT) + 128);
2463 int v= av_clip_uint8(((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128); 2462 int v= av_clip_uint8(((RV*r + GV*g + BV*b)>>RGB2YUV_SHIFT) + 128);
2464 pal[i]= y + (u<<8) + (v<<16); 2463 pal[i]= y + (u<<8) + (v<<16);
2465 } 2464 }
2466 src2[1]= pal; 2465 src2[1]= pal;
2467 } 2466 }
2468 2467
2469 // copy strides, so they can safely be modified 2468 // copy strides, so they can safely be modified
2470 if (c->sliceDir == 1) { 2469 if (c->sliceDir == 1) {
2471 // slices go from top to bottom 2470 // slices go from top to bottom
2472 int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2]}; 2471 int srcStride2[4]= {srcStride[0], srcStride[1], srcStride[2]};
2473 int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2]}; 2472 int dstStride2[4]= {dstStride[0], dstStride[1], dstStride[2]};
2474 return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2); 2473 return c->swScale(c, src2, srcStride2, srcSliceY, srcSliceH, dst, dstStride2);
2475 } else { 2474 } else {
2476 // slices go from bottom to top => we flip the image internally 2475 // slices go from bottom to top => we flip the image internally
2477 uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0], 2476 uint8_t* dst2[4]= {dst[0] + (c->dstH-1)*dstStride[0],
2478 dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1], 2477 dst[1] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[1],
2479 dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]}; 2478 dst[2] + ((c->dstH>>c->chrDstVSubSample)-1)*dstStride[2]};
2480 int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2]}; 2479 int srcStride2[4]= {-srcStride[0], -srcStride[1], -srcStride[2]};
2481 int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]}; 2480 int dstStride2[4]= {-dstStride[0], -dstStride[1], -dstStride[2]};
2482 2481
2483 src2[0] += (srcSliceH-1)*srcStride[0]; 2482 src2[0] += (srcSliceH-1)*srcStride[0];
2484 if(c->srcFormat != PIX_FMT_PAL8) 2483 if (c->srcFormat != PIX_FMT_PAL8)
2485 src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1]; 2484 src2[1] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[1];
2486 src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2]; 2485 src2[2] += ((srcSliceH>>c->chrSrcVSubSample)-1)*srcStride[2];
2487 2486
2488 return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2); 2487 return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
2489 } 2488 }
2490 } 2489 }
2491 2490
2492 /** 2491 /**
2493 * swscale warper, so we don't need to export the SwsContext 2492 * swscale warper, so we don't need to export the SwsContext
2494 */ 2493 */
2495 int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 2494 int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2496 int srcSliceH, uint8_t* dst[], int dstStride[]){ 2495 int srcSliceH, uint8_t* dst[], int dstStride[]){
2497 return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride); 2496 return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2498 } 2497 }
2499 2498
2500 SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur, 2499 SwsFilter *sws_getDefaultFilter(float lumaGBlur, float chromaGBlur,
2501 float lumaSharpen, float chromaSharpen, 2500 float lumaSharpen, float chromaSharpen,
2502 float chromaHShift, float chromaVShift, 2501 float chromaHShift, float chromaVShift,
2503 int verbose) 2502 int verbose)
2504 { 2503 {
2505 SwsFilter *filter= av_malloc(sizeof(SwsFilter)); 2504 SwsFilter *filter= av_malloc(sizeof(SwsFilter));
2506 2505
2507 if(lumaGBlur!=0.0){ 2506 if (lumaGBlur!=0.0){
2508 filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0); 2507 filter->lumH= sws_getGaussianVec(lumaGBlur, 3.0);
2509 filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0); 2508 filter->lumV= sws_getGaussianVec(lumaGBlur, 3.0);
2510 }else{ 2509 }else{
2511 filter->lumH= sws_getIdentityVec(); 2510 filter->lumH= sws_getIdentityVec();
2512 filter->lumV= sws_getIdentityVec(); 2511 filter->lumV= sws_getIdentityVec();
2513 } 2512 }
2514 2513
2515 if(chromaGBlur!=0.0){ 2514 if (chromaGBlur!=0.0){
2516 filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0); 2515 filter->chrH= sws_getGaussianVec(chromaGBlur, 3.0);
2517 filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0); 2516 filter->chrV= sws_getGaussianVec(chromaGBlur, 3.0);
2518 }else{ 2517 }else{
2519 filter->chrH= sws_getIdentityVec(); 2518 filter->chrH= sws_getIdentityVec();
2520 filter->chrV= sws_getIdentityVec(); 2519 filter->chrV= sws_getIdentityVec();
2521 } 2520 }
2522 2521
2523 if(chromaSharpen!=0.0){ 2522 if (chromaSharpen!=0.0){
2524 SwsVector *id= sws_getIdentityVec(); 2523 SwsVector *id= sws_getIdentityVec();
2525 sws_scaleVec(filter->chrH, -chromaSharpen); 2524 sws_scaleVec(filter->chrH, -chromaSharpen);
2526 sws_scaleVec(filter->chrV, -chromaSharpen); 2525 sws_scaleVec(filter->chrV, -chromaSharpen);
2527 sws_addVec(filter->chrH, id); 2526 sws_addVec(filter->chrH, id);
2528 sws_addVec(filter->chrV, id); 2527 sws_addVec(filter->chrV, id);
2529 sws_freeVec(id); 2528 sws_freeVec(id);
2530 } 2529 }
2531 2530
2532 if(lumaSharpen!=0.0){ 2531 if (lumaSharpen!=0.0){
2533 SwsVector *id= sws_getIdentityVec(); 2532 SwsVector *id= sws_getIdentityVec();
2534 sws_scaleVec(filter->lumH, -lumaSharpen); 2533 sws_scaleVec(filter->lumH, -lumaSharpen);
2535 sws_scaleVec(filter->lumV, -lumaSharpen); 2534 sws_scaleVec(filter->lumV, -lumaSharpen);
2536 sws_addVec(filter->lumH, id); 2535 sws_addVec(filter->lumH, id);
2537 sws_addVec(filter->lumV, id); 2536 sws_addVec(filter->lumV, id);
2538 sws_freeVec(id); 2537 sws_freeVec(id);
2539 } 2538 }
2540 2539
2541 if(chromaHShift != 0.0) 2540 if (chromaHShift != 0.0)
2542 sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5)); 2541 sws_shiftVec(filter->chrH, (int)(chromaHShift+0.5));
2543 2542
2544 if(chromaVShift != 0.0) 2543 if (chromaVShift != 0.0)
2545 sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5)); 2544 sws_shiftVec(filter->chrV, (int)(chromaVShift+0.5));
2546 2545
2547 sws_normalizeVec(filter->chrH, 1.0); 2546 sws_normalizeVec(filter->chrH, 1.0);
2548 sws_normalizeVec(filter->chrV, 1.0); 2547 sws_normalizeVec(filter->chrV, 1.0);
2549 sws_normalizeVec(filter->lumH, 1.0); 2548 sws_normalizeVec(filter->lumH, 1.0);
2550 sws_normalizeVec(filter->lumV, 1.0); 2549 sws_normalizeVec(filter->lumV, 1.0);
2551 2550
2552 if(verbose) sws_printVec(filter->chrH); 2551 if (verbose) sws_printVec(filter->chrH);
2553 if(verbose) sws_printVec(filter->lumH); 2552 if (verbose) sws_printVec(filter->lumH);
2554 2553
2555 return filter; 2554 return filter;
2556 } 2555 }
2557 2556
2558 /** 2557 /**
2559 * returns a normalized gaussian curve used to filter stuff 2558 * returns a normalized gaussian curve used to filter stuff
2560 * quality=3 is high quality, lowwer is lowwer quality 2559 * quality=3 is high quality, lowwer is lowwer quality
2561 */ 2560 */
2562 SwsVector *sws_getGaussianVec(double variance, double quality){ 2561 SwsVector *sws_getGaussianVec(double variance, double quality){
2563 const int length= (int)(variance*quality + 0.5) | 1; 2562 const int length= (int)(variance*quality + 0.5) | 1;
2564 int i; 2563 int i;
2565 double *coeff= av_malloc(length*sizeof(double)); 2564 double *coeff= av_malloc(length*sizeof(double));
2566 double middle= (length-1)*0.5; 2565 double middle= (length-1)*0.5;
2567 SwsVector *vec= av_malloc(sizeof(SwsVector)); 2566 SwsVector *vec= av_malloc(sizeof(SwsVector));
2568 2567
2569 vec->coeff= coeff; 2568 vec->coeff= coeff;
2570 vec->length= length; 2569 vec->length= length;
2571 2570
2572 for(i=0; i<length; i++) 2571 for (i=0; i<length; i++)
2573 { 2572 {
2574 double dist= i-middle; 2573 double dist= i-middle;
2575 coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI); 2574 coeff[i]= exp( -dist*dist/(2*variance*variance) ) / sqrt(2*variance*PI);
2576 } 2575 }
2577 2576
2578 sws_normalizeVec(vec, 1.0); 2577 sws_normalizeVec(vec, 1.0);
2579 2578
2580 return vec; 2579 return vec;
2581 } 2580 }
2582 2581
2583 SwsVector *sws_getConstVec(double c, int length){ 2582 SwsVector *sws_getConstVec(double c, int length){
2584 int i; 2583 int i;
2585 double *coeff= av_malloc(length*sizeof(double)); 2584 double *coeff= av_malloc(length*sizeof(double));
2586 SwsVector *vec= av_malloc(sizeof(SwsVector)); 2585 SwsVector *vec= av_malloc(sizeof(SwsVector));
2587 2586
2588 vec->coeff= coeff; 2587 vec->coeff= coeff;
2589 vec->length= length; 2588 vec->length= length;
2590 2589
2591 for(i=0; i<length; i++) 2590 for (i=0; i<length; i++)
2592 coeff[i]= c; 2591 coeff[i]= c;
2593 2592
2594 return vec; 2593 return vec;
2595 } 2594 }
2596 2595
2597 2596
2598 SwsVector *sws_getIdentityVec(void){ 2597 SwsVector *sws_getIdentityVec(void){
2599 return sws_getConstVec(1.0, 1); 2598 return sws_getConstVec(1.0, 1);
2600 } 2599 }
2601 2600
2602 double sws_dcVec(SwsVector *a){ 2601 double sws_dcVec(SwsVector *a){
2603 int i; 2602 int i;
2604 double sum=0; 2603 double sum=0;
2605 2604
2606 for(i=0; i<a->length; i++) 2605 for (i=0; i<a->length; i++)
2607 sum+= a->coeff[i]; 2606 sum+= a->coeff[i];
2608 2607
2609 return sum; 2608 return sum;
2610 } 2609 }
2611 2610
2612 void sws_scaleVec(SwsVector *a, double scalar){ 2611 void sws_scaleVec(SwsVector *a, double scalar){
2613 int i; 2612 int i;
2614 2613
2615 for(i=0; i<a->length; i++) 2614 for (i=0; i<a->length; i++)
2616 a->coeff[i]*= scalar; 2615 a->coeff[i]*= scalar;
2617 } 2616 }
2618 2617
2619 void sws_normalizeVec(SwsVector *a, double height){ 2618 void sws_normalizeVec(SwsVector *a, double height){
2620 sws_scaleVec(a, height/sws_dcVec(a)); 2619 sws_scaleVec(a, height/sws_dcVec(a));
2621 } 2620 }
2622 2621
2623 static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){ 2622 static SwsVector *sws_getConvVec(SwsVector *a, SwsVector *b){
2624 int length= a->length + b->length - 1; 2623 int length= a->length + b->length - 1;
2625 double *coeff= av_malloc(length*sizeof(double)); 2624 double *coeff= av_malloc(length*sizeof(double));
2626 int i, j; 2625 int i, j;
2627 SwsVector *vec= av_malloc(sizeof(SwsVector)); 2626 SwsVector *vec= av_malloc(sizeof(SwsVector));
2628 2627
2629 vec->coeff= coeff; 2628 vec->coeff= coeff;
2630 vec->length= length; 2629 vec->length= length;
2631 2630
2632 for(i=0; i<length; i++) coeff[i]= 0.0; 2631 for (i=0; i<length; i++) coeff[i]= 0.0;
2633 2632
2634 for(i=0; i<a->length; i++) 2633 for (i=0; i<a->length; i++)
2635 { 2634 {
2636 for(j=0; j<b->length; j++) 2635 for (j=0; j<b->length; j++)
2637 { 2636 {
2638 coeff[i+j]+= a->coeff[i]*b->coeff[j]; 2637 coeff[i+j]+= a->coeff[i]*b->coeff[j];
2639 } 2638 }
2640 } 2639 }
2641 2640
2642 return vec; 2641 return vec;
2643 } 2642 }
2644 2643
2645 static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){ 2644 static SwsVector *sws_sumVec(SwsVector *a, SwsVector *b){
2646 int length= FFMAX(a->length, b->length); 2645 int length= FFMAX(a->length, b->length);
2647 double *coeff= av_malloc(length*sizeof(double)); 2646 double *coeff= av_malloc(length*sizeof(double));
2648 int i; 2647 int i;
2649 SwsVector *vec= av_malloc(sizeof(SwsVector)); 2648 SwsVector *vec= av_malloc(sizeof(SwsVector));
2650 2649
2651 vec->coeff= coeff; 2650 vec->coeff= coeff;
2652 vec->length= length; 2651 vec->length= length;
2653 2652
2654 for(i=0; i<length; i++) coeff[i]= 0.0; 2653 for (i=0; i<length; i++) coeff[i]= 0.0;
2655 2654
2656 for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i]; 2655 for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2657 for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i]; 2656 for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]+= b->coeff[i];
2658 2657
2659 return vec; 2658 return vec;
2660 } 2659 }
2661 2660
2662 static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){ 2661 static SwsVector *sws_diffVec(SwsVector *a, SwsVector *b){
2663 int length= FFMAX(a->length, b->length); 2662 int length= FFMAX(a->length, b->length);
2664 double *coeff= av_malloc(length*sizeof(double)); 2663 double *coeff= av_malloc(length*sizeof(double));
2665 int i; 2664 int i;
2666 SwsVector *vec= av_malloc(sizeof(SwsVector)); 2665 SwsVector *vec= av_malloc(sizeof(SwsVector));
2667 2666
2668 vec->coeff= coeff; 2667 vec->coeff= coeff;
2669 vec->length= length; 2668 vec->length= length;
2670 2669
2671 for(i=0; i<length; i++) coeff[i]= 0.0; 2670 for (i=0; i<length; i++) coeff[i]= 0.0;
2672 2671
2673 for(i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i]; 2672 for (i=0; i<a->length; i++) coeff[i + (length-1)/2 - (a->length-1)/2]+= a->coeff[i];
2674 for(i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i]; 2673 for (i=0; i<b->length; i++) coeff[i + (length-1)/2 - (b->length-1)/2]-= b->coeff[i];
2675 2674
2676 return vec; 2675 return vec;
2677 } 2676 }
2678 2677
2679 /* shift left / or right if "shift" is negative */ 2678 /* shift left / or right if "shift" is negative */
2680 static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){ 2679 static SwsVector *sws_getShiftedVec(SwsVector *a, int shift){
2681 int length= a->length + FFABS(shift)*2; 2680 int length= a->length + FFABS(shift)*2;
2682 double *coeff= av_malloc(length*sizeof(double)); 2681 double *coeff= av_malloc(length*sizeof(double));
2683 int i; 2682 int i;
2684 SwsVector *vec= av_malloc(sizeof(SwsVector)); 2683 SwsVector *vec= av_malloc(sizeof(SwsVector));
2685 2684
2686 vec->coeff= coeff; 2685 vec->coeff= coeff;
2687 vec->length= length; 2686 vec->length= length;
2688 2687
2689 for(i=0; i<length; i++) coeff[i]= 0.0; 2688 for (i=0; i<length; i++) coeff[i]= 0.0;
2690 2689
2691 for(i=0; i<a->length; i++) 2690 for (i=0; i<a->length; i++)
2692 { 2691 {
2693 coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i]; 2692 coeff[i + (length-1)/2 - (a->length-1)/2 - shift]= a->coeff[i];
2694 } 2693 }
2695 2694
2696 return vec; 2695 return vec;
2697 } 2696 }
2698 2697
2699 void sws_shiftVec(SwsVector *a, int shift){ 2698 void sws_shiftVec(SwsVector *a, int shift){
2700 SwsVector *shifted= sws_getShiftedVec(a, shift); 2699 SwsVector *shifted= sws_getShiftedVec(a, shift);
2701 av_free(a->coeff); 2700 av_free(a->coeff);
2702 a->coeff= shifted->coeff; 2701 a->coeff= shifted->coeff;
2703 a->length= shifted->length; 2702 a->length= shifted->length;
2704 av_free(shifted); 2703 av_free(shifted);
2705 } 2704 }
2706 2705
2707 void sws_addVec(SwsVector *a, SwsVector *b){ 2706 void sws_addVec(SwsVector *a, SwsVector *b){
2708 SwsVector *sum= sws_sumVec(a, b); 2707 SwsVector *sum= sws_sumVec(a, b);
2709 av_free(a->coeff); 2708 av_free(a->coeff);
2710 a->coeff= sum->coeff; 2709 a->coeff= sum->coeff;
2711 a->length= sum->length; 2710 a->length= sum->length;
2712 av_free(sum); 2711 av_free(sum);
2713 } 2712 }
2714 2713
2715 void sws_subVec(SwsVector *a, SwsVector *b){ 2714 void sws_subVec(SwsVector *a, SwsVector *b){
2716 SwsVector *diff= sws_diffVec(a, b); 2715 SwsVector *diff= sws_diffVec(a, b);
2717 av_free(a->coeff); 2716 av_free(a->coeff);
2718 a->coeff= diff->coeff; 2717 a->coeff= diff->coeff;
2719 a->length= diff->length; 2718 a->length= diff->length;
2720 av_free(diff); 2719 av_free(diff);
2721 } 2720 }
2722 2721
2723 void sws_convVec(SwsVector *a, SwsVector *b){ 2722 void sws_convVec(SwsVector *a, SwsVector *b){
2724 SwsVector *conv= sws_getConvVec(a, b); 2723 SwsVector *conv= sws_getConvVec(a, b);
2725 av_free(a->coeff); 2724 av_free(a->coeff);
2726 a->coeff= conv->coeff; 2725 a->coeff= conv->coeff;
2727 a->length= conv->length; 2726 a->length= conv->length;
2728 av_free(conv); 2727 av_free(conv);
2729 } 2728 }
2730 2729
2731 SwsVector *sws_cloneVec(SwsVector *a){ 2730 SwsVector *sws_cloneVec(SwsVector *a){
2732 double *coeff= av_malloc(a->length*sizeof(double)); 2731 double *coeff= av_malloc(a->length*sizeof(double));
2733 int i; 2732 int i;
2734 SwsVector *vec= av_malloc(sizeof(SwsVector)); 2733 SwsVector *vec= av_malloc(sizeof(SwsVector));
2735 2734
2736 vec->coeff= coeff; 2735 vec->coeff= coeff;
2737 vec->length= a->length; 2736 vec->length= a->length;
2738 2737
2739 for(i=0; i<a->length; i++) coeff[i]= a->coeff[i]; 2738 for (i=0; i<a->length; i++) coeff[i]= a->coeff[i];
2740 2739
2741 return vec; 2740 return vec;
2742 } 2741 }
2743 2742
2744 void sws_printVec(SwsVector *a){ 2743 void sws_printVec(SwsVector *a){
2745 int i; 2744 int i;
2746 double max=0; 2745 double max=0;
2747 double min=0; 2746 double min=0;
2748 double range; 2747 double range;
2749 2748
2750 for(i=0; i<a->length; i++) 2749 for (i=0; i<a->length; i++)
2751 if(a->coeff[i]>max) max= a->coeff[i]; 2750 if (a->coeff[i]>max) max= a->coeff[i];
2752 2751
2753 for(i=0; i<a->length; i++) 2752 for (i=0; i<a->length; i++)
2754 if(a->coeff[i]<min) min= a->coeff[i]; 2753 if (a->coeff[i]<min) min= a->coeff[i];
2755 2754
2756 range= max - min; 2755 range= max - min;
2757 2756
2758 for(i=0; i<a->length; i++) 2757 for (i=0; i<a->length; i++)
2759 { 2758 {
2760 int x= (int)((a->coeff[i]-min)*60.0/range +0.5); 2759 int x= (int)((a->coeff[i]-min)*60.0/range +0.5);
2761 av_log(NULL, AV_LOG_DEBUG, "%1.3f ", a->coeff[i]); 2760 av_log(NULL, AV_LOG_DEBUG, "%1.3f ", a->coeff[i]);
2762 for(;x>0; x--) av_log(NULL, AV_LOG_DEBUG, " "); 2761 for (;x>0; x--) av_log(NULL, AV_LOG_DEBUG, " ");
2763 av_log(NULL, AV_LOG_DEBUG, "|\n"); 2762 av_log(NULL, AV_LOG_DEBUG, "|\n");
2764 } 2763 }
2765 } 2764 }
2766 2765
2767 void sws_freeVec(SwsVector *a){ 2766 void sws_freeVec(SwsVector *a){
2768 if(!a) return; 2767 if (!a) return;
2769 av_free(a->coeff); 2768 av_free(a->coeff);
2770 a->coeff=NULL; 2769 a->coeff=NULL;
2771 a->length=0; 2770 a->length=0;
2772 av_free(a); 2771 av_free(a);
2773 } 2772 }
2774 2773
2775 void sws_freeFilter(SwsFilter *filter){ 2774 void sws_freeFilter(SwsFilter *filter){
2776 if(!filter) return; 2775 if (!filter) return;
2777 2776
2778 if(filter->lumH) sws_freeVec(filter->lumH); 2777 if (filter->lumH) sws_freeVec(filter->lumH);
2779 if(filter->lumV) sws_freeVec(filter->lumV); 2778 if (filter->lumV) sws_freeVec(filter->lumV);
2780 if(filter->chrH) sws_freeVec(filter->chrH); 2779 if (filter->chrH) sws_freeVec(filter->chrH);
2781 if(filter->chrV) sws_freeVec(filter->chrV); 2780 if (filter->chrV) sws_freeVec(filter->chrV);
2782 av_free(filter); 2781 av_free(filter);
2783 } 2782 }
2784 2783
2785 2784
2786 void sws_freeContext(SwsContext *c){ 2785 void sws_freeContext(SwsContext *c){
2787 int i; 2786 int i;
2788 if(!c) return; 2787 if (!c) return;
2789 2788
2790 if(c->lumPixBuf) 2789 if (c->lumPixBuf)
2791 { 2790 {
2792 for(i=0; i<c->vLumBufSize; i++) 2791 for (i=0; i<c->vLumBufSize; i++)
2793 { 2792 {
2794 av_free(c->lumPixBuf[i]); 2793 av_free(c->lumPixBuf[i]);
2795 c->lumPixBuf[i]=NULL; 2794 c->lumPixBuf[i]=NULL;
2796 } 2795 }
2797 av_free(c->lumPixBuf); 2796 av_free(c->lumPixBuf);
2798 c->lumPixBuf=NULL; 2797 c->lumPixBuf=NULL;
2799 } 2798 }
2800 2799
2801 if(c->chrPixBuf) 2800 if (c->chrPixBuf)
2802 { 2801 {
2803 for(i=0; i<c->vChrBufSize; i++) 2802 for (i=0; i<c->vChrBufSize; i++)
2804 { 2803 {
2805 av_free(c->chrPixBuf[i]); 2804 av_free(c->chrPixBuf[i]);
2806 c->chrPixBuf[i]=NULL; 2805 c->chrPixBuf[i]=NULL;
2807 } 2806 }
2808 av_free(c->chrPixBuf); 2807 av_free(c->chrPixBuf);
2809 c->chrPixBuf=NULL; 2808 c->chrPixBuf=NULL;
2810 } 2809 }
2811 2810
2812 av_free(c->vLumFilter); 2811 av_free(c->vLumFilter);
2813 c->vLumFilter = NULL; 2812 c->vLumFilter = NULL;
2814 av_free(c->vChrFilter); 2813 av_free(c->vChrFilter);
2815 c->vChrFilter = NULL; 2814 c->vChrFilter = NULL;
2816 av_free(c->hLumFilter); 2815 av_free(c->hLumFilter);
2817 c->hLumFilter = NULL; 2816 c->hLumFilter = NULL;
2818 av_free(c->hChrFilter); 2817 av_free(c->hChrFilter);
2819 c->hChrFilter = NULL; 2818 c->hChrFilter = NULL;
2820 #ifdef HAVE_ALTIVEC 2819 #ifdef HAVE_ALTIVEC
2821 av_free(c->vYCoeffsBank); 2820 av_free(c->vYCoeffsBank);
2822 c->vYCoeffsBank = NULL; 2821 c->vYCoeffsBank = NULL;
2823 av_free(c->vCCoeffsBank); 2822 av_free(c->vCCoeffsBank);
2824 c->vCCoeffsBank = NULL; 2823 c->vCCoeffsBank = NULL;
2825 #endif 2824 #endif
2826 2825
2827 av_free(c->vLumFilterPos); 2826 av_free(c->vLumFilterPos);
2828 c->vLumFilterPos = NULL; 2827 c->vLumFilterPos = NULL;
2829 av_free(c->vChrFilterPos); 2828 av_free(c->vChrFilterPos);
2830 c->vChrFilterPos = NULL; 2829 c->vChrFilterPos = NULL;
2831 av_free(c->hLumFilterPos); 2830 av_free(c->hLumFilterPos);
2832 c->hLumFilterPos = NULL; 2831 c->hLumFilterPos = NULL;
2833 av_free(c->hChrFilterPos); 2832 av_free(c->hChrFilterPos);
2834 c->hChrFilterPos = NULL; 2833 c->hChrFilterPos = NULL;
2835 2834
2836 #if defined(ARCH_X86) && defined(CONFIG_GPL) 2835 #if defined(ARCH_X86) && defined(CONFIG_GPL)
2837 #ifdef MAP_ANONYMOUS 2836 #ifdef MAP_ANONYMOUS
2838 if(c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE); 2837 if (c->funnyYCode) munmap(c->funnyYCode, MAX_FUNNY_CODE_SIZE);
2839 if(c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE); 2838 if (c->funnyUVCode) munmap(c->funnyUVCode, MAX_FUNNY_CODE_SIZE);
2840 #else 2839 #else
2841 av_free(c->funnyYCode); 2840 av_free(c->funnyYCode);
2842 av_free(c->funnyUVCode); 2841 av_free(c->funnyUVCode);
2843 #endif 2842 #endif
2844 c->funnyYCode=NULL; 2843 c->funnyYCode=NULL;
2845 c->funnyUVCode=NULL; 2844 c->funnyUVCode=NULL;
2846 #endif /* defined(ARCH_X86) */ 2845 #endif /* defined(ARCH_X86) */
2847 2846
2848 av_free(c->lumMmx2Filter); 2847 av_free(c->lumMmx2Filter);
2849 c->lumMmx2Filter=NULL; 2848 c->lumMmx2Filter=NULL;
2850 av_free(c->chrMmx2Filter); 2849 av_free(c->chrMmx2Filter);
2851 c->chrMmx2Filter=NULL; 2850 c->chrMmx2Filter=NULL;
2852 av_free(c->lumMmx2FilterPos); 2851 av_free(c->lumMmx2FilterPos);
2853 c->lumMmx2FilterPos=NULL; 2852 c->lumMmx2FilterPos=NULL;
2854 av_free(c->chrMmx2FilterPos); 2853 av_free(c->chrMmx2FilterPos);
2855 c->chrMmx2FilterPos=NULL; 2854 c->chrMmx2FilterPos=NULL;
2856 av_free(c->yuvTable); 2855 av_free(c->yuvTable);
2857 c->yuvTable=NULL; 2856 c->yuvTable=NULL;
2858 2857
2859 av_free(c); 2858 av_free(c);
2860 } 2859 }
2861 2860
2862 /** 2861 /**
2863 * Checks if context is valid or reallocs a new one instead. 2862 * Checks if context is valid or reallocs a new one instead.
2864 * If context is NULL, just calls sws_getContext() to get a new one. 2863 * If context is NULL, just calls sws_getContext() to get a new one.
2868 * 2867 *
2869 * Be warned that srcFilter, dstFilter are not checked, they are 2868 * Be warned that srcFilter, dstFilter are not checked, they are
2870 * asumed to remain valid. 2869 * asumed to remain valid.
2871 */ 2870 */
2872 struct SwsContext *sws_getCachedContext(struct SwsContext *context, 2871 struct SwsContext *sws_getCachedContext(struct SwsContext *context,
2873 int srcW, int srcH, int srcFormat, 2872 int srcW, int srcH, int srcFormat,
2874 int dstW, int dstH, int dstFormat, int flags, 2873 int dstW, int dstH, int dstFormat, int flags,
2875 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param) 2874 SwsFilter *srcFilter, SwsFilter *dstFilter, double *param)
2876 { 2875 {
2877 if (context != NULL) { 2876 if (context != NULL) {
2878 if ((context->srcW != srcW) || (context->srcH != srcH) || 2877 if ((context->srcW != srcW) || (context->srcH != srcH) ||
2879 (context->srcFormat != srcFormat) || 2878 (context->srcFormat != srcFormat) ||
2880 (context->dstW != dstW) || (context->dstH != dstH) || 2879 (context->dstW != dstW) || (context->dstH != dstH) ||
2885 context = NULL; 2884 context = NULL;
2886 } 2885 }
2887 } 2886 }
2888 if (context == NULL) { 2887 if (context == NULL) {
2889 return sws_getContext(srcW, srcH, srcFormat, 2888 return sws_getContext(srcW, srcH, srcFormat,
2890 dstW, dstH, dstFormat, flags, 2889 dstW, dstH, dstFormat, flags,
2891 srcFilter, dstFilter, param); 2890 srcFilter, dstFilter, param);
2892 } 2891 }
2893 return context; 2892 return context;
2894 } 2893 }
2895 2894