comparison libswscale/swscale.c @ 28316:da307fb1c239

spelling/grammar cosmetics
author diego
date Sun, 25 Jan 2009 11:57:17 +0000
parents 82cf99fc9d2b
children 78e355443da9
comparison
equal deleted inserted replaced
28315:82cf99fc9d2b 28316:da307fb1c239
44 BGR15 -> BGR16 44 BGR15 -> BGR16
45 BGR16 -> BGR16 45 BGR16 -> BGR16
46 YVU9 -> YV12 46 YVU9 -> YV12
47 47
48 untested special converters 48 untested special converters
49 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be ok) 49 YV12/I420 -> BGR15/BGR24/BGR32 (it is the yuv2rgb stuff, so it should be OK)
50 YV12/I420 -> YV12/I420 50 YV12/I420 -> YV12/I420
51 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format 51 YUY2/BGR15/BGR24/BGR32/RGB24/RGB32 -> same format
52 BGR24 -> BGR32 & RGB24 -> RGB32 52 BGR24 -> BGR32 & RGB24 -> RGB32
53 BGR32 -> BGR24 & RGB32 -> RGB24 53 BGR32 -> BGR24 & RGB32 -> RGB24
54 BGR24 -> YV12 54 BGR24 -> YV12
87 //#undef HAVE_MMX 87 //#undef HAVE_MMX
88 //#undef ARCH_X86 88 //#undef ARCH_X86
89 //#define WORDS_BIGENDIAN 89 //#define WORDS_BIGENDIAN
90 #define DITHER1XBPP 90 #define DITHER1XBPP
91 91
92 #define FAST_BGR2YV12 // use 7 bit coeffs instead of 15bit 92 #define FAST_BGR2YV12 // use 7 bit coefficients instead of 15 bit
93 93
94 #define RET 0xC3 //near return opcode for X86 94 #define RET 0xC3 //near return opcode for x86
95 95
96 #ifdef M_PI 96 #ifdef M_PI
97 #define PI M_PI 97 #define PI M_PI
98 #else 98 #else
99 #define PI 3.14159265358979323846 99 #define PI 3.14159265358979323846
192 Special versions: fast Y 1:1 scaling (no interpolation in y direction) 192 Special versions: fast Y 1:1 scaling (no interpolation in y direction)
193 193
194 TODO 194 TODO
195 more intelligent misalignment avoidance for the horizontal scaler 195 more intelligent misalignment avoidance for the horizontal scaler
196 write special vertical cubic upscale version 196 write special vertical cubic upscale version
197 Optimize C code (yv12 / minmax) 197 optimize C code (YV12 / minmax)
198 add support for packed pixel yuv input & output 198 add support for packed pixel YUV input & output
199 add support for Y8 output 199 add support for Y8 output
200 optimize bgr24 & bgr32 200 optimize BGR24 & BGR32
201 add BGR4 output support 201 add BGR4 output support
202 write special BGR->BGR scaler 202 write special BGR->BGR scaler
203 */ 203 */
204 204
205 #if ARCH_X86 && CONFIG_GPL 205 #if ARCH_X86 && CONFIG_GPL
469 469
470 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, 470 static inline void yuv2yuvXinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
471 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, 471 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
472 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW) 472 uint8_t *dest, uint8_t *uDest, uint8_t *vDest, int dstW, int chrDstW)
473 { 473 {
474 //FIXME Optimize (just quickly writen not opti..) 474 //FIXME Optimize (just quickly written not optimized..)
475 int i; 475 int i;
476 for (i=0; i<dstW; i++) 476 for (i=0; i<dstW; i++)
477 { 477 {
478 int val=1<<18; 478 int val=1<<18;
479 int j; 479 int j;
502 502
503 static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize, 503 static inline void yuv2nv12XinC(int16_t *lumFilter, int16_t **lumSrc, int lumFilterSize,
504 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize, 504 int16_t *chrFilter, int16_t **chrSrc, int chrFilterSize,
505 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat) 505 uint8_t *dest, uint8_t *uDest, int dstW, int chrDstW, int dstFormat)
506 { 506 {
507 //FIXME Optimize (just quickly writen not opti..) 507 //FIXME Optimize (just quickly written not optimized..)
508 int i; 508 int i;
509 for (i=0; i<dstW; i++) 509 for (i=0; i<dstW; i++)
510 { 510 {
511 int val=1<<18; 511 int val=1<<18;
512 int j; 512 int j;
651 if (Y2>65535) Y2=65535; \ 651 if (Y2>65535) Y2=65535; \
652 else if (Y2<0)Y2=0; \ 652 else if (Y2<0)Y2=0; \
653 } 653 }
654 654
655 #define YSCALE_YUV_2_RGBX_C(type) \ 655 #define YSCALE_YUV_2_RGBX_C(type) \
656 YSCALE_YUV_2_PACKEDX_C(type) /* FIXME fix tables so that cliping is not needed and then use _NOCLIP*/\ 656 YSCALE_YUV_2_PACKEDX_C(type) /* FIXME fix tables so that clipping is not needed and then use _NOCLIP*/\
657 r = (type *)c->table_rV[V]; \ 657 r = (type *)c->table_rV[V]; \
658 g = (type *)(c->table_gU[U] + c->table_gV[V]); \ 658 g = (type *)(c->table_gU[U] + c->table_gV[V]); \
659 b = (type *)c->table_bU[U]; \ 659 b = (type *)c->table_bU[U]; \
660 660
661 #define YSCALE_YUV_2_PACKED2_C \ 661 #define YSCALE_YUV_2_PACKED2_C \
951 default: 951 default:
952 assert(0); 952 assert(0);
953 } 953 }
954 } 954 }
955 955
956 //Note: we have C, X86, MMX, MMX2, 3DNOW version therse no 3DNOW+MMX2 one 956 //Note: we have C, X86, MMX, MMX2, 3DNOW versions, there is no 3DNOW+MMX2 one
957 //Plain C versions 957 //Plain C versions
958 #if !HAVE_MMX || defined (RUNTIME_CPUDETECT) || !CONFIG_GPL 958 #if !HAVE_MMX || defined (RUNTIME_CPUDETECT) || !CONFIG_GPL
959 #define COMPILE_C 959 #define COMPILE_C
960 #endif 960 #endif
961 961
1002 #include "swscale_template.c" 1002 #include "swscale_template.c"
1003 #endif 1003 #endif
1004 1004
1005 #if ARCH_X86 1005 #if ARCH_X86
1006 1006
1007 //X86 versions 1007 //x86 versions
1008 /* 1008 /*
1009 #undef RENAME 1009 #undef RENAME
1010 #undef HAVE_MMX 1010 #undef HAVE_MMX
1011 #undef HAVE_MMX2 1011 #undef HAVE_MMX2
1012 #undef HAVE_3DNOW 1012 #undef HAVE_3DNOW
1053 #include "swscale_template.c" 1053 #include "swscale_template.c"
1054 #endif 1054 #endif
1055 1055
1056 #endif //ARCH_X86 1056 #endif //ARCH_X86
1057 1057
1058 // minor note: the HAVE_xyz is messed up after that line so don't use it 1058 // minor note: the HAVE_xyz are messed up after this line so don't use them
1059 1059
1060 static double getSplineCoeff(double a, double b, double c, double d, double dist) 1060 static double getSplineCoeff(double a, double b, double c, double d, double dist)
1061 { 1061 {
1062 // printf("%f %f %f %f %f\n", a,b,c,d,dist); 1062 // printf("%f %f %f %f %f\n", a,b,c,d,dist);
1063 if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a; 1063 if (dist<=1.0) return ((d*dist + c)*dist + b)*dist +a;
1083 #if ARCH_X86 1083 #if ARCH_X86
1084 if (flags & SWS_CPU_CAPS_MMX) 1084 if (flags & SWS_CPU_CAPS_MMX)
1085 __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions) 1085 __asm__ volatile("emms\n\t"::: "memory"); //FIXME this should not be required but it IS (even for non-MMX versions)
1086 #endif 1086 #endif
1087 1087
1088 // Note the +1 is for the MMXscaler which reads over the end 1088 // NOTE: the +1 is for the MMX scaler which reads over the end
1089 *filterPos = av_malloc((dstW+1)*sizeof(int16_t)); 1089 *filterPos = av_malloc((dstW+1)*sizeof(int16_t));
1090 1090
1091 if (FFABS(xInc - 0x10000) <10) // unscaled 1091 if (FFABS(xInc - 0x10000) <10) // unscaled
1092 { 1092 {
1093 int i; 1093 int i;
1132 { 1132 {
1133 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16; 1133 int xx= (xDstInSrc - ((filterSize-1)<<15) + (1<<15))>>16;
1134 int j; 1134 int j;
1135 1135
1136 (*filterPos)[i]= xx; 1136 (*filterPos)[i]= xx;
1137 //Bilinear upscale / linear interpolate / Area averaging 1137 //bilinear upscale / linear interpolate / area averaging
1138 for (j=0; j<filterSize; j++) 1138 for (j=0; j<filterSize; j++)
1139 { 1139 {
1140 int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16); 1140 int64_t coeff= fone - FFABS((xx<<16) - xDstInSrc)*(fone>>16);
1141 if (coeff<0) coeff=0; 1141 if (coeff<0) coeff=0;
1142 filter[i*filterSize + j]= coeff; 1142 filter[i*filterSize + j]= coeff;
1313 if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break; 1313 if (cutOff > SWS_MAX_REDUCE_CUTOFF*fone) break;
1314 1314
1315 /* preserve monotonicity because the core can't handle the filter otherwise */ 1315 /* preserve monotonicity because the core can't handle the filter otherwise */
1316 if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break; 1316 if (i<dstW-1 && (*filterPos)[i] >= (*filterPos)[i+1]) break;
1317 1317
1318 // Move filter coeffs left 1318 // move filter coefficients left
1319 for (k=1; k<filter2Size; k++) 1319 for (k=1; k<filter2Size; k++)
1320 filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k]; 1320 filter2[i*filter2Size + k - 1]= filter2[i*filter2Size + k];
1321 filter2[i*filter2Size + k - 1]= 0; 1321 filter2[i*filter2Size + k - 1]= 0;
1322 (*filterPos)[i]++; 1322 (*filterPos)[i]++;
1323 } 1323 }
1339 // we can handle the special case 4, 1339 // we can handle the special case 4,
1340 // so we don't want to go to the full 8 1340 // so we don't want to go to the full 8
1341 if (minFilterSize < 5) 1341 if (minFilterSize < 5)
1342 filterAlign = 4; 1342 filterAlign = 4;
1343 1343
1344 // we really don't want to waste our time 1344 // We really don't want to waste our time
1345 // doing useless computation, so fall-back on 1345 // doing useless computation, so fall back on
1346 // the scalar C code for very small filter. 1346 // the scalar C code for very small filters.
1347 // vectorizing is worth it only if you have 1347 // Vectorizing is worth it only if you have a
1348 // decent-sized vector. 1348 // decent-sized vector.
1349 if (minFilterSize < 3) 1349 if (minFilterSize < 3)
1350 filterAlign = 1; 1350 filterAlign = 1;
1351 } 1351 }
1352 1352
1379 filter[i*filterSize + j]= 0; 1379 filter[i*filterSize + j]= 0;
1380 } 1380 }
1381 } 1381 }
1382 1382
1383 1383
1384 //FIXME try to align filterpos if possible 1384 //FIXME try to align filterPos if possible
1385 1385
1386 //fix borders 1386 //fix borders
1387 for (i=0; i<dstW; i++) 1387 for (i=0; i<dstW; i++)
1388 { 1388 {
1389 int j; 1389 int j;
1390 if ((*filterPos)[i] < 0) 1390 if ((*filterPos)[i] < 0)
1391 { 1391 {
1392 // Move filter coeffs left to compensate for filterPos 1392 // move filter coefficients left to compensate for filterPos
1393 for (j=1; j<filterSize; j++) 1393 for (j=1; j<filterSize; j++)
1394 { 1394 {
1395 int left= FFMAX(j + (*filterPos)[i], 0); 1395 int left= FFMAX(j + (*filterPos)[i], 0);
1396 filter[i*filterSize + left] += filter[i*filterSize + j]; 1396 filter[i*filterSize + left] += filter[i*filterSize + j];
1397 filter[i*filterSize + j]=0; 1397 filter[i*filterSize + j]=0;
1400 } 1400 }
1401 1401
1402 if ((*filterPos)[i] + filterSize > srcW) 1402 if ((*filterPos)[i] + filterSize > srcW)
1403 { 1403 {
1404 int shift= (*filterPos)[i] + filterSize - srcW; 1404 int shift= (*filterPos)[i] + filterSize - srcW;
1405 // Move filter coeffs right to compensate for filterPos 1405 // move filter coefficients right to compensate for filterPos
1406 for (j=filterSize-2; j>=0; j--) 1406 for (j=filterSize-2; j>=0; j--)
1407 { 1407 {
1408 int right= FFMIN(j + shift, filterSize-1); 1408 int right= FFMIN(j + shift, filterSize-1);
1409 filter[i*filterSize +right] += filter[i*filterSize +j]; 1409 filter[i*filterSize +right] += filter[i*filterSize +j];
1410 filter[i*filterSize +j]=0; 1410 filter[i*filterSize +j]=0;
1411 } 1411 }
1412 (*filterPos)[i]= srcW - filterSize; 1412 (*filterPos)[i]= srcW - filterSize;
1413 } 1413 }
1414 } 1414 }
1415 1415
1416 // Note the +1 is for the MMXscaler which reads over the end 1416 // Note the +1 is for the MMX scaler which reads over the end
1417 /* align at 16 for AltiVec (needed by hScale_altivec_real) */ 1417 /* align at 16 for AltiVec (needed by hScale_altivec_real) */
1418 *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t)); 1418 *outFilter= av_mallocz(*outFilterSize*(dstW+1)*sizeof(int16_t));
1419 1419
1420 /* Normalize & Store in outFilter */ 1420 /* normalize & store in outFilter */
1421 for (i=0; i<dstW; i++) 1421 for (i=0; i<dstW; i++)
1422 { 1422 {
1423 int j; 1423 int j;
1424 int64_t error=0; 1424 int64_t error=0;
1425 int64_t sum=0; 1425 int64_t sum=0;
2073 else if (r> 0x7FFF) return 0x7FFF; 2073 else if (r> 0x7FFF) return 0x7FFF;
2074 else return r; 2074 else return r;
2075 } 2075 }
2076 2076
2077 /** 2077 /**
2078 * @param inv_table the yuv2rgb coeffs, normally Inverse_Table_6_9[x] 2078 * @param inv_table the yuv2rgb coefficients, normally Inverse_Table_6_9[x]
2079 * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235 2079 * @param fullRange if 1 then the luma range is 0..255 if 0 it is 16..235
2080 * @return -1 if not supported 2080 * @return -1 if not supported
2081 */ 2081 */
2082 int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){ 2082 int sws_setColorspaceDetails(SwsContext *c, const int inv_table[4], int srcRange, const int table[4], int dstRange, int brightness, int contrast, int saturation){
2083 int64_t crv = inv_table[0]; 2083 int64_t crv = inv_table[0];
2240 |SWS_SINC 2240 |SWS_SINC
2241 |SWS_SPLINE 2241 |SWS_SPLINE
2242 |SWS_BICUBLIN); 2242 |SWS_BICUBLIN);
2243 if(!i || (i & (i-1))) 2243 if(!i || (i & (i-1)))
2244 { 2244 {
2245 av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be choosen\n"); 2245 av_log(NULL, AV_LOG_ERROR, "swScaler: Exactly one scaler algorithm must be chosen\n");
2246 return NULL; 2246 return NULL;
2247 } 2247 }
2248 2248
2249 /* sanity check */ 2249 /* sanity check */
2250 if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code 2250 if (srcW<4 || srcH<1 || dstW<8 || dstH<1) //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
2252 av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n", 2252 av_log(NULL, AV_LOG_ERROR, "swScaler: %dx%d -> %dx%d is invalid scaling dimension\n",
2253 srcW, srcH, dstW, dstH); 2253 srcW, srcH, dstW, dstH);
2254 return NULL; 2254 return NULL;
2255 } 2255 }
2256 if(srcW > VOFW || dstW > VOFW){ 2256 if(srcW > VOFW || dstW > VOFW){
2257 av_log(NULL, AV_LOG_ERROR, "swScaler: Compile time max width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n"); 2257 av_log(NULL, AV_LOG_ERROR, "swScaler: Compile-time maximum width is "AV_STRINGIFY(VOFW)" change VOF/VOFW and recompile\n");
2258 return NULL; 2258 return NULL;
2259 } 2259 }
2260 2260
2261 if (!dstFilter) dstFilter= &dummyFilter; 2261 if (!dstFilter) dstFilter= &dummyFilter;
2262 if (!srcFilter) srcFilter= &dummyFilter; 2262 if (!srcFilter) srcFilter= &dummyFilter;
2286 if (srcFilter->chrH && srcFilter->chrH->length>1) usesHFilter=1; 2286 if (srcFilter->chrH && srcFilter->chrH->length>1) usesHFilter=1;
2287 2287
2288 getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat); 2288 getSubSampleFactors(&c->chrSrcHSubSample, &c->chrSrcVSubSample, srcFormat);
2289 getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat); 2289 getSubSampleFactors(&c->chrDstHSubSample, &c->chrDstVSubSample, dstFormat);
2290 2290
2291 // reuse chroma for 2 pixles rgb/bgr unless user wants full chroma interpolation 2291 // reuse chroma for 2 pixels RGB/BGR unless user wants full chroma interpolation
2292 if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1; 2292 if ((isBGR(dstFormat) || isRGB(dstFormat)) && !(flags&SWS_FULL_CHR_H_INT)) c->chrDstHSubSample=1;
2293 2293
2294 // drop some chroma lines if the user wants it 2294 // drop some chroma lines if the user wants it
2295 c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT; 2295 c->vChrDrop= (flags&SWS_SRC_V_CHR_DROP_MASK)>>SWS_SRC_V_CHR_DROP_SHIFT;
2296 c->chrSrcVSubSample+= c->vChrDrop; 2296 c->chrSrcVSubSample+= c->vChrDrop;
2297 2297
2298 // drop every 2. pixel for chroma calculation unless user wants full chroma 2298 // drop every other pixel for chroma calculation unless user wants full chroma
2299 if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP) 2299 if ((isBGR(srcFormat) || isRGB(srcFormat)) && !(flags&SWS_FULL_CHR_H_INP)
2300 && srcFormat!=PIX_FMT_RGB8 && srcFormat!=PIX_FMT_BGR8 2300 && srcFormat!=PIX_FMT_RGB8 && srcFormat!=PIX_FMT_BGR8
2301 && srcFormat!=PIX_FMT_RGB4 && srcFormat!=PIX_FMT_BGR4 2301 && srcFormat!=PIX_FMT_RGB4 && srcFormat!=PIX_FMT_BGR4
2302 && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE 2302 && srcFormat!=PIX_FMT_RGB4_BYTE && srcFormat!=PIX_FMT_BGR4_BYTE
2303 && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT)))) 2303 && ((dstW>>c->chrDstHSubSample) <= (srcW>>1) || (flags&(SWS_FAST_BILINEAR|SWS_POINT))))
2320 c->chrDstW= -((-dstW) >> c->chrDstHSubSample); 2320 c->chrDstW= -((-dstW) >> c->chrDstHSubSample);
2321 c->chrDstH= -((-dstH) >> c->chrDstVSubSample); 2321 c->chrDstH= -((-dstH) >> c->chrDstVSubSample);
2322 2322
2323 sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16); 2323 sws_setColorspaceDetails(c, Inverse_Table_6_9[SWS_CS_DEFAULT], srcRange, Inverse_Table_6_9[SWS_CS_DEFAULT] /* FIXME*/, dstRange, 0, 1<<16, 1<<16);
2324 2324
2325 /* unscaled special Cases */ 2325 /* unscaled special cases */
2326 if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat))) 2326 if (unscaled && !usesHFilter && !usesVFilter && (srcRange == dstRange || isBGR(dstFormat) || isRGB(dstFormat)))
2327 { 2327 {
2328 /* yv12_to_nv12 */ 2328 /* yv12_to_nv12 */
2329 if (srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21)) 2329 if (srcFormat == PIX_FMT_YUV420P && (dstFormat == PIX_FMT_NV12 || dstFormat == PIX_FMT_NV21))
2330 { 2330 {
2346 2346
2347 /* bgr24toYV12 */ 2347 /* bgr24toYV12 */
2348 if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_ACCURATE_RND)) 2348 if (srcFormat==PIX_FMT_BGR24 && dstFormat==PIX_FMT_YUV420P && !(flags & SWS_ACCURATE_RND))
2349 c->swScale= bgr24toyv12Wrapper; 2349 c->swScale= bgr24toyv12Wrapper;
2350 2350
2351 /* rgb/bgr -> rgb/bgr (no dither needed forms) */ 2351 /* RGB/BGR -> RGB/BGR (no dither needed forms) */
2352 if ( (isBGR(srcFormat) || isRGB(srcFormat)) 2352 if ( (isBGR(srcFormat) || isRGB(srcFormat))
2353 && (isBGR(dstFormat) || isRGB(dstFormat)) 2353 && (isBGR(dstFormat) || isRGB(dstFormat))
2354 && srcFormat != PIX_FMT_BGR8 && dstFormat != PIX_FMT_BGR8 2354 && srcFormat != PIX_FMT_BGR8 && dstFormat != PIX_FMT_BGR8
2355 && srcFormat != PIX_FMT_RGB8 && dstFormat != PIX_FMT_RGB8 2355 && srcFormat != PIX_FMT_RGB8 && dstFormat != PIX_FMT_RGB8
2356 && srcFormat != PIX_FMT_BGR4 && dstFormat != PIX_FMT_BGR4 2356 && srcFormat != PIX_FMT_BGR4 && dstFormat != PIX_FMT_BGR4
2446 { 2446 {
2447 c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0; 2447 c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
2448 if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) 2448 if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR))
2449 { 2449 {
2450 if (flags&SWS_PRINT_INFO) 2450 if (flags&SWS_PRINT_INFO)
2451 av_log(c, AV_LOG_INFO, "output Width is not a multiple of 32 -> no MMX2 scaler\n"); 2451 av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
2452 } 2452 }
2453 if (usesHFilter) c->canMMX2BeUsed=0; 2453 if (usesHFilter) c->canMMX2BeUsed=0;
2454 } 2454 }
2455 else 2455 else
2456 c->canMMX2BeUsed=0; 2456 c->canMMX2BeUsed=0;
2469 if (c->canMMX2BeUsed) 2469 if (c->canMMX2BeUsed)
2470 { 2470 {
2471 c->lumXInc+= 20; 2471 c->lumXInc+= 20;
2472 c->chrXInc+= 20; 2472 c->chrXInc+= 20;
2473 } 2473 }
2474 //we don't use the x86asm scaler if mmx is available 2474 //we don't use the x86 asm scaler if MMX is available
2475 else if (flags & SWS_CPU_CAPS_MMX) 2475 else if (flags & SWS_CPU_CAPS_MMX)
2476 { 2476 {
2477 c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20; 2477 c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
2478 c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20; 2478 c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
2479 } 2479 }
2515 2515
2516 initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8); 2516 initMMX2HScaler( dstW, c->lumXInc, c->funnyYCode , c->lumMmx2Filter, c->lumMmx2FilterPos, 8);
2517 initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4); 2517 initMMX2HScaler(c->chrDstW, c->chrXInc, c->funnyUVCode, c->chrMmx2Filter, c->chrMmx2FilterPos, 4);
2518 } 2518 }
2519 #endif /* defined(COMPILE_MMX2) */ 2519 #endif /* defined(COMPILE_MMX2) */
2520 } // Init Horizontal stuff 2520 } // initialize horizontal stuff
2521 2521
2522 2522
2523 2523
2524 /* precalculate vertical scaler filter coefficients */ 2524 /* precalculate vertical scaler filter coefficients */
2525 { 2525 {
2555 p[j] = c->vChrFilter[i]; 2555 p[j] = c->vChrFilter[i];
2556 } 2556 }
2557 #endif 2557 #endif
2558 } 2558 }
2559 2559
2560 // Calculate Buffer Sizes so that they won't run out while handling these damn slices 2560 // calculate buffer sizes so that they won't run out while handling these damn slices
2561 c->vLumBufSize= c->vLumFilterSize; 2561 c->vLumBufSize= c->vLumFilterSize;
2562 c->vChrBufSize= c->vChrFilterSize; 2562 c->vChrBufSize= c->vChrFilterSize;
2563 for (i=0; i<dstH; i++) 2563 for (i=0; i<dstH; i++)
2564 { 2564 {
2565 int chrI= i*c->chrDstH / dstH; 2565 int chrI= i*c->chrDstH / dstH;
2575 } 2575 }
2576 2576
2577 // allocate pixbufs (we use dynamic allocation because otherwise we would need to 2577 // allocate pixbufs (we use dynamic allocation because otherwise we would need to
2578 c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*)); 2578 c->lumPixBuf= av_malloc(c->vLumBufSize*2*sizeof(int16_t*));
2579 c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*)); 2579 c->chrPixBuf= av_malloc(c->vChrBufSize*2*sizeof(int16_t*));
2580 //Note we need at least one pixel more at the end because of the mmx code (just in case someone wanna replace the 4000/8000) 2580 //Note we need at least one pixel more at the end because of the MMX code (just in case someone wanna replace the 4000/8000)
2581 /* align at 16 bytes for AltiVec */ 2581 /* align at 16 bytes for AltiVec */
2582 for (i=0; i<c->vLumBufSize; i++) 2582 for (i=0; i<c->vLumBufSize; i++)
2583 c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1); 2583 c->lumPixBuf[i]= c->lumPixBuf[i+c->vLumBufSize]= av_mallocz(VOF+1);
2584 for (i=0; i<c->vChrBufSize; i++) 2584 for (i=0; i<c->vChrBufSize; i++)
2585 c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc((VOF+1)*2); 2585 c->chrPixBuf[i]= c->chrPixBuf[i+c->vChrBufSize]= av_malloc((VOF+1)*2);
2666 } 2666 }
2667 } 2667 }
2668 else 2668 else
2669 { 2669 {
2670 #if ARCH_X86 2670 #if ARCH_X86
2671 av_log(c, AV_LOG_VERBOSE, "using X86-Asm scaler for horizontal scaling\n"); 2671 av_log(c, AV_LOG_VERBOSE, "using x86 asm scaler for horizontal scaling\n");
2672 #else 2672 #else
2673 if (flags & SWS_FAST_BILINEAR) 2673 if (flags & SWS_FAST_BILINEAR)
2674 av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR C scaler for horizontal scaling\n"); 2674 av_log(c, AV_LOG_VERBOSE, "using FAST_BILINEAR C scaler for horizontal scaling\n");
2675 else 2675 else
2676 av_log(c, AV_LOG_VERBOSE, "using C scaler for horizontal scaling\n"); 2676 av_log(c, AV_LOG_VERBOSE, "using C scaler for horizontal scaling\n");
2693 else 2693 else
2694 av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2694 av_log(c, AV_LOG_VERBOSE, "using n-tap %s scaler for vertical scaling (BGR)\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2695 } 2695 }
2696 2696
2697 if (dstFormat==PIX_FMT_BGR24) 2697 if (dstFormat==PIX_FMT_BGR24)
2698 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 Converter\n", 2698 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR24 converter\n",
2699 (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C")); 2699 (flags & SWS_CPU_CAPS_MMX2) ? "MMX2" : ((flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"));
2700 else if (dstFormat==PIX_FMT_RGB32) 2700 else if (dstFormat==PIX_FMT_RGB32)
2701 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2701 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR32 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2702 else if (dstFormat==PIX_FMT_BGR565) 2702 else if (dstFormat==PIX_FMT_BGR565)
2703 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2703 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR16 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2704 else if (dstFormat==PIX_FMT_BGR555) 2704 else if (dstFormat==PIX_FMT_BGR555)
2705 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 Converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C"); 2705 av_log(c, AV_LOG_VERBOSE, "using %s YV12->BGR15 converter\n", (flags & SWS_CPU_CAPS_MMX) ? "MMX" : "C");
2706 2706
2707 av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH); 2707 av_log(c, AV_LOG_VERBOSE, "%dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
2708 } 2708 }
2709 if (flags & SWS_PRINT_INFO) 2709 if (flags & SWS_PRINT_INFO)
2710 { 2710 {
2711 av_log(c, AV_LOG_DEBUG, "Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", 2711 av_log(c, AV_LOG_DEBUG, "lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2712 c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc); 2712 c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
2713 av_log(c, AV_LOG_DEBUG, "Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n", 2713 av_log(c, AV_LOG_DEBUG, "chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
2714 c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc); 2714 c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
2715 } 2715 }
2716 2716
2717 c->swScale= getSwsFunc(flags); 2717 c->swScale= getSwsFunc(flags);
2718 return c; 2718 return c;
2719 } 2719 }
2720 2720
2721 /** 2721 /**
2722 * swscale wrapper, so we don't need to export the SwsContext. 2722 * swscale wrapper, so we don't need to export the SwsContext.
2723 * assumes planar YUV to be in YUV order instead of YVU 2723 * Assumes planar YUV to be in YUV order instead of YVU.
2724 */ 2724 */
2725 int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 2725 int sws_scale(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2726 int srcSliceH, uint8_t* dst[], int dstStride[]){ 2726 int srcSliceH, uint8_t* dst[], int dstStride[]){
2727 int i; 2727 int i;
2728 uint8_t* src2[4]= {src[0], src[1], src[2]}; 2728 uint8_t* src2[4]= {src[0], src[1], src[2]};
2818 return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2); 2818 return c->swScale(c, src2, srcStride2, c->srcH-srcSliceY-srcSliceH, srcSliceH, dst2, dstStride2);
2819 } 2819 }
2820 } 2820 }
2821 2821
2822 /** 2822 /**
2823 * swscale wrapper, so we don't need to export the SwsContext 2823 * swscale wrapper, so we don't need to export the SwsContext.
2824 */ 2824 */
2825 int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY, 2825 int sws_scale_ordered(SwsContext *c, uint8_t* src[], int srcStride[], int srcSliceY,
2826 int srcSliceH, uint8_t* dst[], int dstStride[]){ 2826 int srcSliceH, uint8_t* dst[], int dstStride[]){
2827 return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride); 2827 return sws_scale(c, src, srcStride, srcSliceY, srcSliceH, dst, dstStride);
2828 } 2828 }
2884 2884
2885 return filter; 2885 return filter;
2886 } 2886 }
2887 2887
2888 /** 2888 /**
2889 * returns a normalized gaussian curve used to filter stuff 2889 * Returns a normalized Gaussian curve used to filter stuff
2890 * quality=3 is high quality, lowwer is lowwer quality 2890 * quality=3 is high quality, lower is lower quality.
2891 */ 2891 */
2892 SwsVector *sws_getGaussianVec(double variance, double quality){ 2892 SwsVector *sws_getGaussianVec(double variance, double quality){
2893 const int length= (int)(variance*quality + 0.5) | 1; 2893 const int length= (int)(variance*quality + 0.5) | 1;
2894 int i; 2894 int i;
2895 double *coeff= av_malloc(length*sizeof(double)); 2895 double *coeff= av_malloc(length*sizeof(double));
3166 } 3166 }
3167 3167
3168 /** 3168 /**
3169 * Checks if context is valid or reallocs a new one instead. 3169 * Checks if context is valid or reallocs a new one instead.
3170 * If context is NULL, just calls sws_getContext() to get a new one. 3170 * If context is NULL, just calls sws_getContext() to get a new one.
3171 * Otherwise, checks if the parameters are the same already saved in context. 3171 * Otherwise, checks if the parameters are the ones already saved in context.
3172 * If that is the case, returns the current context. 3172 * If that is the case, returns the current context.
3173 * Otherwise, frees context and gets a new one. 3173 * Otherwise, frees context and gets a new one.
3174 * 3174 *
3175 * Be warned that srcFilter, dstFilter are not checked, they are 3175 * Be warned that srcFilter, dstFilter are not checked, they are
3176 * asumed to remain valid. 3176 * asumed to remain valid.