# HG changeset patch # User michael # Date 1011651069 0 # Node ID 29fef39822382a5b4061a3885fbc2b12dfb03661 # Parent a20c333d6c09d44590b31773587a4a5de4664be9 15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out) x/yInc bugfix (bug was introduced during the global var removial) more emms & no messing with ARCH_X86 (=workaround against not cleared mmx state somewhere) sharpening filter (-ssf cs= & -ssf ls=) diff -r a20c333d6c09 -r 29fef3982238 cfg-common.h --- a/cfg-common.h Mon Jan 21 18:55:51 2002 +0000 +++ b/cfg-common.h Mon Jan 21 22:11:09 2002 +0000 @@ -114,12 +114,16 @@ extern int sws_chr_hshift; extern float sws_chr_gblur; extern float sws_lum_gblur; +extern float sws_chr_sharpen; +extern float sws_lum_sharpen; struct config scaler_filter_conf[]={ {"lgb", &sws_lum_gblur, CONF_TYPE_FLOAT, 0, 0, 100.0, NULL}, {"cgb", &sws_chr_gblur, CONF_TYPE_FLOAT, 0, 0, 100.0, NULL}, {"cvs", &sws_chr_vshift, CONF_TYPE_INT, 0, 0, 0, NULL}, {"chs", &sws_chr_hshift, CONF_TYPE_INT, 0, 0, 0, NULL}, + {"ls", &sws_lum_sharpen, CONF_TYPE_FLOAT, 0, 0, 100.0, NULL}, + {"cs", &sws_chr_sharpen, CONF_TYPE_FLOAT, 0, 0, 100.0, NULL}, {NULL, NULL, 0, 0, 0, 0, NULL} }; diff -r a20c333d6c09 -r 29fef3982238 postproc/swscale.c --- a/postproc/swscale.c Mon Jan 21 18:55:51 2002 +0000 +++ b/postproc/swscale.c Mon Jan 21 22:11:09 2002 +0000 @@ -161,6 +161,8 @@ float sws_chr_gblur= 0.0; int sws_chr_vshift= 0; int sws_chr_hshift= 0; +float sws_chr_sharpen= 0.0; +float sws_lum_sharpen= 0.0; /* cpuCaps combined from cpudetect and whats actually compiled in (if there is no support for something compiled in it wont appear here) */ @@ -298,6 +300,28 @@ else if(dstFormat==IMGFMT_BGR16) { int i; +#ifdef DITHER1XBPP + static int ditherb1=1<<14; + static int ditherg1=1<<13; + static int ditherr1=2<<14; + static int ditherb2=3<<14; + static int ditherg2=3<<13; + static int ditherr2=0<<14; + + ditherb1 ^= (1^2)<<14; + ditherg1 ^= (1^2)<<13; + ditherr1 ^= (1^2)<<14; + ditherb2 ^= (3^0)<<14; + ditherg2 ^= (3^0)<<13; + ditherr2 ^= (3^0)<<14; +#else + const int ditherb1=0; + const int ditherg1=0; + const int ditherr1=0; + const int ditherb2=0; + const int ditherg2=0; + const int ditherr2=0; +#endif for(i=0; i<(dstW>>1); i++){ int j; int Y1=0; @@ -325,19 +349,41 @@ Cr= clip_yuvtab_3343[V+ 256]; ((uint16_t*)dest)[2*i] = - clip_table16b[(Y1 + Cb) >>13] | - clip_table16g[(Y1 + Cg) >>13] | - clip_table16r[(Y1 + Cr) >>13]; + clip_table16b[(Y1 + Cb + ditherb1) >>13] | + clip_table16g[(Y1 + Cg + ditherg1) >>13] | + clip_table16r[(Y1 + Cr + ditherr1) >>13]; ((uint16_t*)dest)[2*i+1] = - clip_table16b[(Y2 + Cb) >>13] | - clip_table16g[(Y2 + Cg) >>13] | - clip_table16r[(Y2 + Cr) >>13]; + clip_table16b[(Y2 + Cb + ditherb2) >>13] | + clip_table16g[(Y2 + Cg + ditherg2) >>13] | + clip_table16r[(Y2 + Cr + ditherr2) >>13]; } } else if(dstFormat==IMGFMT_BGR15) { int i; +#ifdef DITHER1XBPP + static int ditherb1=1<<14; + static int ditherg1=1<<14; + static int ditherr1=2<<14; + static int ditherb2=3<<14; + static int ditherg2=3<<14; + static int ditherr2=0<<14; + + ditherb1 ^= (1^2)<<14; + ditherg1 ^= (1^2)<<14; + ditherr1 ^= (1^2)<<14; + ditherb2 ^= (3^0)<<14; + ditherg2 ^= (3^0)<<14; + ditherr2 ^= (3^0)<<14; +#else + const int ditherb1=0; + const int ditherg1=0; + const int ditherr1=0; + const int ditherb2=0; + const int ditherg2=0; + const int ditherr2=0; +#endif for(i=0; i<(dstW>>1); i++){ int j; int Y1=0; @@ -365,14 +411,14 @@ Cr= clip_yuvtab_3343[V+ 256]; ((uint16_t*)dest)[2*i] = - clip_table15b[(Y1 + Cb) >>13] | - clip_table15g[(Y1 + Cg) >>13] | - clip_table15r[(Y1 + Cr) >>13]; + clip_table15b[(Y1 + Cb + ditherb1) >>13] | + clip_table15g[(Y1 + Cg + ditherg1) >>13] | + clip_table15r[(Y1 + Cr + ditherr1) >>13]; ((uint16_t*)dest)[2*i+1] = - clip_table15b[(Y2 + Cb) >>13] | - clip_table15g[(Y2 + Cg) >>13] | - clip_table15r[(Y2 + Cr) >>13]; + clip_table15b[(Y2 + Cb + ditherb2) >>13] | + clip_table15g[(Y2 + Cg + ditherg2) >>13] | + clip_table15r[(Y2 + Cr + ditherr2) >>13]; } } } @@ -402,13 +448,11 @@ #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW -#undef ARCH_X86 #ifdef COMPILE_C #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW -#undef ARCH_X86 #define RENAME(a) a ## _C #include "swscale_template.c" #endif @@ -431,7 +475,6 @@ #define HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _MMX #include "swscale_template.c" #endif @@ -442,7 +485,6 @@ #define HAVE_MMX #define HAVE_MMX2 #undef HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _MMX2 #include "swscale_template.c" #endif @@ -453,7 +495,6 @@ #define HAVE_MMX #undef HAVE_MMX2 #define HAVE_3DNOW -#define ARCH_X86 #define RENAME(a) a ## _3DNow #include "swscale_template.c" #endif @@ -477,6 +518,10 @@ if(firstTime) { +#ifdef ARCH_X86 + if(gCpuCaps.hasMMX) + asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions) +#endif flags= SWS_PRINT_INFO; firstTime=0; @@ -501,12 +546,41 @@ src_filter.chrV= getIdentityVec(); } + if(sws_chr_sharpen!=0.0){ + SwsVector *g= getConstVec(-1.0, 3); + SwsVector *id= getConstVec(10.0/sws_chr_sharpen, 1); + g->coeff[1]=2.0; + addVec(id, g); + convVec(src_filter.chrH, id); + convVec(src_filter.chrV, id); + freeVec(g); + freeVec(id); + } + + if(sws_lum_sharpen!=0.0){ + SwsVector *g= getConstVec(-1.0, 3); + SwsVector *id= getConstVec(10.0/sws_lum_sharpen, 1); + g->coeff[1]=2.0; + addVec(id, g); + convVec(src_filter.lumH, id); + convVec(src_filter.lumV, id); + freeVec(g); + freeVec(id); + } + if(sws_chr_hshift) shiftVec(src_filter.chrH, sws_chr_hshift); if(sws_chr_vshift) shiftVec(src_filter.chrV, sws_chr_vshift); + normalizeVec(src_filter.chrH, 1.0); + normalizeVec(src_filter.chrV, 1.0); + normalizeVec(src_filter.lumH, 1.0); + normalizeVec(src_filter.lumV, 1.0); + + if(verbose > 1) printVec(src_filter.chrH); + if(verbose > 1) printVec(src_filter.lumH); } switch(dstbpp) @@ -551,7 +625,6 @@ #endif *filterPos = (int16_t*)memalign(8, dstW*sizeof(int16_t)); - if(ABS(xInc - 0x10000) <10) // unscaled { int i; @@ -1002,8 +1075,8 @@ c->srcH= srcH; c->dstW= dstW; c->dstH= dstH; - c->lumXInc= ((srcW<<16) + (1<<15))/dstW; - c->lumYInc= ((srcH<<16) + (1<<15))/dstH; + c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW; + c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH; c->flags= flags; c->dstFormat= dstFormat; c->srcFormat= srcFormat; @@ -1120,7 +1193,9 @@ if(flags&SWS_PRINT_INFO) { #ifdef DITHER1XBPP - char *dither= cpuCaps.hasMMX ? " dithered" : ""; + char *dither= " dithered"; +#else + char *dither= ""; #endif if(flags&SWS_FAST_BILINEAR) fprintf(stderr, "\nSwScaler: FAST_BILINEAR scaler "); @@ -1244,6 +1319,21 @@ return vec; } +SwsVector *getConstVec(double c, int length){ + int i; + double *coeff= memalign(sizeof(double), length*sizeof(double)); + SwsVector *vec= malloc(sizeof(SwsVector)); + + vec->coeff= coeff; + vec->length= length; + + for(i=0; i>19)]; @@ -1175,19 +1197,41 @@ int Cr= yuvtab_3343[V]; ((uint16_t*)dest)[i] = - clip_table16b[(Y1 + Cb) >>13] | - clip_table16g[(Y1 + Cg) >>13] | - clip_table16r[(Y1 + Cr) >>13]; + clip_table16b[(Y1 + Cb + ditherb1) >>13] | + clip_table16g[(Y1 + Cg + ditherg1) >>13] | + clip_table16r[(Y1 + Cr + ditherr1) >>13]; ((uint16_t*)dest)[i+1] = - clip_table16b[(Y2 + Cb) >>13] | - clip_table16g[(Y2 + Cg) >>13] | - clip_table16r[(Y2 + Cr) >>13]; + clip_table16b[(Y2 + Cb + ditherb2) >>13] | + clip_table16g[(Y2 + Cg + ditherg2) >>13] | + clip_table16r[(Y2 + Cr + ditherr2) >>13]; } } else if(dstFormat==IMGFMT_BGR15) { int i; +#ifdef DITHER1XBPP + static int ditherb1=1<<14; + static int ditherg1=1<<14; + static int ditherr1=2<<14; + static int ditherb2=3<<14; + static int ditherg2=3<<14; + static int ditherr2=0<<14; + + ditherb1 ^= (1^2)<<14; + ditherg1 ^= (1^2)<<14; + ditherr1 ^= (1^2)<<14; + ditherb2 ^= (3^0)<<14; + ditherg2 ^= (3^0)<<14; + ditherr2 ^= (3^0)<<14; +#else + const int ditherb1=0; + const int ditherg1=0; + const int ditherr1=0; + const int ditherb2=0; + const int ditherg2=0; + const int ditherr2=0; +#endif for(i=0; i>19)]; @@ -1200,14 +1244,14 @@ int Cr= yuvtab_3343[V]; ((uint16_t*)dest)[i] = - clip_table15b[(Y1 + Cb) >>13] | - clip_table15g[(Y1 + Cg) >>13] | - clip_table15r[(Y1 + Cr) >>13]; + clip_table15b[(Y1 + Cb + ditherb1) >>13] | + clip_table15g[(Y1 + Cg + ditherg1) >>13] | + clip_table15r[(Y1 + Cr + ditherr1) >>13]; ((uint16_t*)dest)[i+1] = - clip_table15b[(Y2 + Cb) >>13] | - clip_table15g[(Y2 + Cg) >>13] | - clip_table15r[(Y2 + Cr) >>13]; + clip_table15b[(Y2 + Cb + ditherb2) >>13] | + clip_table15g[(Y2 + Cg + ditherg2) >>13] | + clip_table15r[(Y2 + Cr + ditherr2) >>13]; } } #endif @@ -1397,6 +1441,28 @@ else if(dstFormat==IMGFMT_BGR16) { int i; +#ifdef DITHER1XBPP + static int ditherb1=1<<14; + static int ditherg1=1<<13; + static int ditherr1=2<<14; + static int ditherb2=3<<14; + static int ditherg2=3<<13; + static int ditherr2=0<<14; + + ditherb1 ^= (1^2)<<14; + ditherg1 ^= (1^2)<<13; + ditherr1 ^= (1^2)<<14; + ditherb2 ^= (3^0)<<14; + ditherg2 ^= (3^0)<<13; + ditherr2 ^= (3^0)<<14; +#else + const int ditherb1=0; + const int ditherg1=0; + const int ditherr1=0; + const int ditherb2=0; + const int ditherg2=0; + const int ditherr2=0; +#endif for(i=0; i>7]; @@ -1409,19 +1475,41 @@ int Cr= yuvtab_3343[V]; ((uint16_t*)dest)[i] = - clip_table16b[(Y1 + Cb) >>13] | - clip_table16g[(Y1 + Cg) >>13] | - clip_table16r[(Y1 + Cr) >>13]; + clip_table16b[(Y1 + Cb + ditherb1) >>13] | + clip_table16g[(Y1 + Cg + ditherg1) >>13] | + clip_table16r[(Y1 + Cr + ditherr1) >>13]; ((uint16_t*)dest)[i+1] = - clip_table16b[(Y2 + Cb) >>13] | - clip_table16g[(Y2 + Cg) >>13] | - clip_table16r[(Y2 + Cr) >>13]; + clip_table16b[(Y2 + Cb + ditherb2) >>13] | + clip_table16g[(Y2 + Cg + ditherg2) >>13] | + clip_table16r[(Y2 + Cr + ditherr2) >>13]; } } else if(dstFormat==IMGFMT_BGR15) { int i; +#ifdef DITHER1XBPP + static int ditherb1=1<<14; + static int ditherg1=1<<14; + static int ditherr1=2<<14; + static int ditherb2=3<<14; + static int ditherg2=3<<14; + static int ditherr2=0<<14; + + ditherb1 ^= (1^2)<<14; + ditherg1 ^= (1^2)<<14; + ditherr1 ^= (1^2)<<14; + ditherb2 ^= (3^0)<<14; + ditherg2 ^= (3^0)<<14; + ditherr2 ^= (3^0)<<14; +#else + const int ditherb1=0; + const int ditherg1=0; + const int ditherr1=0; + const int ditherb2=0; + const int ditherg2=0; + const int ditherr2=0; +#endif for(i=0; i>7]; @@ -1434,14 +1522,14 @@ int Cr= yuvtab_3343[V]; ((uint16_t*)dest)[i] = - clip_table15b[(Y1 + Cb) >>13] | - clip_table15g[(Y1 + Cg) >>13] | - clip_table15r[(Y1 + Cr) >>13]; + clip_table15b[(Y1 + Cb + ditherb1) >>13] | + clip_table15g[(Y1 + Cg + ditherg1) >>13] | + clip_table15r[(Y1 + Cr + ditherr1) >>13]; ((uint16_t*)dest)[i+1] = - clip_table15b[(Y2 + Cb) >>13] | - clip_table15g[(Y2 + Cg) >>13] | - clip_table15r[(Y2 + Cr) >>13]; + clip_table15b[(Y2 + Cb + ditherb2) >>13] | + clip_table15g[(Y2 + Cg + ditherg2) >>13] | + clip_table15r[(Y2 + Cr + ditherr2) >>13]; } } #endif