changeset 4297:29fef3982238

15/16 bit dithering in C (5% slower, can be disabled by comenting #define DITHER1XBPP out) x/yInc bugfix (bug was introduced during the global var removial) more emms & no messing with ARCH_X86 (=workaround against not cleared mmx state somewhere) sharpening filter (-ssf cs=<int> & -ssf ls=<int>)
author michael
date Mon, 21 Jan 2002 22:11:09 +0000
parents a20c333d6c09
children 9eb7a02393a3
files cfg-common.h postproc/swscale.c postproc/swscale.h postproc/swscale_template.c
diffstat 4 files changed, 228 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/cfg-common.h	Mon Jan 21 18:55:51 2002 +0000
+++ b/cfg-common.h	Mon Jan 21 22:11:09 2002 +0000
@@ -114,12 +114,16 @@
 extern int sws_chr_hshift;
 extern float sws_chr_gblur;
 extern float sws_lum_gblur;
+extern float sws_chr_sharpen;
+extern float sws_lum_sharpen;
 
 struct config scaler_filter_conf[]={
 	{"lgb", &sws_lum_gblur, CONF_TYPE_FLOAT, 0, 0, 100.0, NULL},
 	{"cgb", &sws_chr_gblur, CONF_TYPE_FLOAT, 0, 0, 100.0, NULL},
 	{"cvs", &sws_chr_vshift, CONF_TYPE_INT, 0, 0, 0, NULL},
 	{"chs", &sws_chr_hshift, CONF_TYPE_INT, 0, 0, 0, NULL},
+	{"ls", &sws_lum_sharpen, CONF_TYPE_FLOAT, 0, 0, 100.0, NULL},
+	{"cs", &sws_chr_sharpen, CONF_TYPE_FLOAT, 0, 0, 100.0, NULL},
 	{NULL, NULL, 0, 0, 0, 0, NULL}
 };
 
--- a/postproc/swscale.c	Mon Jan 21 18:55:51 2002 +0000
+++ b/postproc/swscale.c	Mon Jan 21 22:11:09 2002 +0000
@@ -161,6 +161,8 @@
 float sws_chr_gblur= 0.0;
 int sws_chr_vshift= 0;
 int sws_chr_hshift= 0;
+float sws_chr_sharpen= 0.0;
+float sws_lum_sharpen= 0.0;
 
 /* cpuCaps combined from cpudetect and whats actually compiled in
    (if there is no support for something compiled in it wont appear here) */
@@ -298,6 +300,28 @@
 	else if(dstFormat==IMGFMT_BGR16)
 	{
 		int i;
+#ifdef DITHER1XBPP
+		static int ditherb1=1<<14;
+		static int ditherg1=1<<13;
+		static int ditherr1=2<<14;
+		static int ditherb2=3<<14;
+		static int ditherg2=3<<13;
+		static int ditherr2=0<<14;
+
+		ditherb1 ^= (1^2)<<14;
+		ditherg1 ^= (1^2)<<13;
+		ditherr1 ^= (1^2)<<14;
+		ditherb2 ^= (3^0)<<14;
+		ditherg2 ^= (3^0)<<13;
+		ditherr2 ^= (3^0)<<14;
+#else
+		const int ditherb1=0;
+		const int ditherg1=0;
+		const int ditherr1=0;
+		const int ditherb2=0;
+		const int ditherg2=0;
+		const int ditherr2=0;
+#endif
 		for(i=0; i<(dstW>>1); i++){
 			int j;
 			int Y1=0;
@@ -325,19 +349,41 @@
 			Cr= clip_yuvtab_3343[V+ 256];
 
 			((uint16_t*)dest)[2*i] =
-				clip_table16b[(Y1 + Cb) >>13] |
-				clip_table16g[(Y1 + Cg) >>13] |
-				clip_table16r[(Y1 + Cr) >>13];
+				clip_table16b[(Y1 + Cb + ditherb1) >>13] |
+				clip_table16g[(Y1 + Cg + ditherg1) >>13] |
+				clip_table16r[(Y1 + Cr + ditherr1) >>13];
 
 			((uint16_t*)dest)[2*i+1] =
-				clip_table16b[(Y2 + Cb) >>13] |
-				clip_table16g[(Y2 + Cg) >>13] |
-				clip_table16r[(Y2 + Cr) >>13];
+				clip_table16b[(Y2 + Cb + ditherb2) >>13] |
+				clip_table16g[(Y2 + Cg + ditherg2) >>13] |
+				clip_table16r[(Y2 + Cr + ditherr2) >>13];
 		}
 	}
 	else if(dstFormat==IMGFMT_BGR15)
 	{
 		int i;
+#ifdef DITHER1XBPP
+		static int ditherb1=1<<14;
+		static int ditherg1=1<<14;
+		static int ditherr1=2<<14;
+		static int ditherb2=3<<14;
+		static int ditherg2=3<<14;
+		static int ditherr2=0<<14;
+
+		ditherb1 ^= (1^2)<<14;
+		ditherg1 ^= (1^2)<<14;
+		ditherr1 ^= (1^2)<<14;
+		ditherb2 ^= (3^0)<<14;
+		ditherg2 ^= (3^0)<<14;
+		ditherr2 ^= (3^0)<<14;
+#else
+		const int ditherb1=0;
+		const int ditherg1=0;
+		const int ditherr1=0;
+		const int ditherb2=0;
+		const int ditherg2=0;
+		const int ditherr2=0;
+#endif
 		for(i=0; i<(dstW>>1); i++){
 			int j;
 			int Y1=0;
@@ -365,14 +411,14 @@
 			Cr= clip_yuvtab_3343[V+ 256];
 
 			((uint16_t*)dest)[2*i] =
-				clip_table15b[(Y1 + Cb) >>13] |
-				clip_table15g[(Y1 + Cg) >>13] |
-				clip_table15r[(Y1 + Cr) >>13];
+				clip_table15b[(Y1 + Cb + ditherb1) >>13] |
+				clip_table15g[(Y1 + Cg + ditherg1) >>13] |
+				clip_table15r[(Y1 + Cr + ditherr1) >>13];
 
 			((uint16_t*)dest)[2*i+1] =
-				clip_table15b[(Y2 + Cb) >>13] |
-				clip_table15g[(Y2 + Cg) >>13] |
-				clip_table15r[(Y2 + Cr) >>13];
+				clip_table15b[(Y2 + Cb + ditherb2) >>13] |
+				clip_table15g[(Y2 + Cg + ditherg2) >>13] |
+				clip_table15r[(Y2 + Cr + ditherr2) >>13];
 		}
 	}
 }
@@ -402,13 +448,11 @@
 #undef HAVE_MMX
 #undef HAVE_MMX2
 #undef HAVE_3DNOW
-#undef ARCH_X86
 
 #ifdef COMPILE_C
 #undef HAVE_MMX
 #undef HAVE_MMX2
 #undef HAVE_3DNOW
-#undef ARCH_X86
 #define RENAME(a) a ## _C
 #include "swscale_template.c"
 #endif
@@ -431,7 +475,6 @@
 #define HAVE_MMX
 #undef HAVE_MMX2
 #undef HAVE_3DNOW
-#define ARCH_X86
 #define RENAME(a) a ## _MMX
 #include "swscale_template.c"
 #endif
@@ -442,7 +485,6 @@
 #define HAVE_MMX
 #define HAVE_MMX2
 #undef HAVE_3DNOW
-#define ARCH_X86
 #define RENAME(a) a ## _MMX2
 #include "swscale_template.c"
 #endif
@@ -453,7 +495,6 @@
 #define HAVE_MMX
 #undef HAVE_MMX2
 #define HAVE_3DNOW
-#define ARCH_X86
 #define RENAME(a) a ## _3DNow
 #include "swscale_template.c"
 #endif
@@ -477,6 +518,10 @@
 
 	if(firstTime)
 	{
+#ifdef ARCH_X86
+		if(gCpuCaps.hasMMX)
+			asm volatile("emms\n\t"::: "memory"); //FIXME this shouldnt be required but it IS (even for non mmx versions)
+#endif
 		flags= SWS_PRINT_INFO;
 		firstTime=0;
 
@@ -501,12 +546,41 @@
 			src_filter.chrV= getIdentityVec();
 		}
 
+		if(sws_chr_sharpen!=0.0){
+			SwsVector *g= getConstVec(-1.0, 3);
+			SwsVector *id= getConstVec(10.0/sws_chr_sharpen, 1);
+			g->coeff[1]=2.0;
+			addVec(id, g);
+			convVec(src_filter.chrH, id);
+			convVec(src_filter.chrV, id);
+			freeVec(g);
+			freeVec(id);
+		}
+
+		if(sws_lum_sharpen!=0.0){
+			SwsVector *g= getConstVec(-1.0, 3);
+			SwsVector *id= getConstVec(10.0/sws_lum_sharpen, 1);
+			g->coeff[1]=2.0;
+			addVec(id, g);
+			convVec(src_filter.lumH, id);
+			convVec(src_filter.lumV, id);
+			freeVec(g);
+			freeVec(id);
+		}
+
 		if(sws_chr_hshift)
 			shiftVec(src_filter.chrH, sws_chr_hshift);
 
 		if(sws_chr_vshift)
 			shiftVec(src_filter.chrV, sws_chr_vshift);
 
+		normalizeVec(src_filter.chrH, 1.0);
+		normalizeVec(src_filter.chrV, 1.0);
+		normalizeVec(src_filter.lumH, 1.0);
+		normalizeVec(src_filter.lumV, 1.0);
+
+		if(verbose > 1) printVec(src_filter.chrH);
+		if(verbose > 1) printVec(src_filter.lumH);
 	}
 
 	switch(dstbpp)
@@ -551,7 +625,6 @@
 #endif
 
 	*filterPos = (int16_t*)memalign(8, dstW*sizeof(int16_t));
-
 	if(ABS(xInc - 0x10000) <10) // unscaled
 	{
 		int i;
@@ -1002,8 +1075,8 @@
 	c->srcH= srcH;
 	c->dstW= dstW;
 	c->dstH= dstH;
-	c->lumXInc= ((srcW<<16) + (1<<15))/dstW;
-	c->lumYInc= ((srcH<<16) + (1<<15))/dstH;
+	c->lumXInc= ((srcW<<16) + (dstW>>1))/dstW;
+	c->lumYInc= ((srcH<<16) + (dstH>>1))/dstH;
 	c->flags= flags;
 	c->dstFormat= dstFormat;
 	c->srcFormat= srcFormat;
@@ -1120,7 +1193,9 @@
 	if(flags&SWS_PRINT_INFO)
 	{
 #ifdef DITHER1XBPP
-		char *dither= cpuCaps.hasMMX ? " dithered" : "";
+		char *dither= " dithered";
+#else
+		char *dither= "";
 #endif
 		if(flags&SWS_FAST_BILINEAR)
 			fprintf(stderr, "\nSwScaler: FAST_BILINEAR scaler ");
@@ -1244,6 +1319,21 @@
 	return vec;
 }
 
+SwsVector *getConstVec(double c, int length){
+	int i;
+	double *coeff= memalign(sizeof(double), length*sizeof(double));
+	SwsVector *vec= malloc(sizeof(SwsVector));
+
+	vec->coeff= coeff;
+	vec->length= length;
+
+	for(i=0; i<length; i++)
+		coeff[i]= c;
+
+	return vec;
+}
+
+
 SwsVector *getIdentityVec(void){
 	double *coeff= memalign(sizeof(double), sizeof(double));
 	SwsVector *vec= malloc(sizeof(SwsVector));
--- a/postproc/swscale.h	Mon Jan 21 18:55:51 2002 +0000
+++ b/postproc/swscale.h	Mon Jan 21 22:11:09 2002 +0000
@@ -108,6 +108,7 @@
              int srcSliceH, uint8_t* dst[], int dstStride[]);
 
 SwsVector *getGaussianVec(double variance, double quality);
+SwsVector *getConstVec(double c, int length);
 SwsVector *getIdentityVec(void);
 void scaleVec(SwsVector *a, double scalar);
 void normalizeVec(SwsVector *a, double height);
--- a/postproc/swscale_template.c	Mon Jan 21 18:55:51 2002 +0000
+++ b/postproc/swscale_template.c	Mon Jan 21 22:11:09 2002 +0000
@@ -1163,6 +1163,28 @@
 		else if(dstFormat==IMGFMT_BGR16)
 		{
 			int i;
+#ifdef DITHER1XBPP
+			static int ditherb1=1<<14;
+			static int ditherg1=1<<13;
+			static int ditherr1=2<<14;
+			static int ditherb2=3<<14;
+			static int ditherg2=3<<13;
+			static int ditherr2=0<<14;
+
+			ditherb1 ^= (1^2)<<14;
+			ditherg1 ^= (1^2)<<13;
+			ditherr1 ^= (1^2)<<14;
+			ditherb2 ^= (3^0)<<14;
+			ditherg2 ^= (3^0)<<13;
+			ditherr2 ^= (3^0)<<14;
+#else
+			const int ditherb1=0;
+			const int ditherg1=0;
+			const int ditherr1=0;
+			const int ditherb2=0;
+			const int ditherg2=0;
+			const int ditherr2=0;
+#endif
 			for(i=0; i<dstW-1; i+=2){
 				// vertical linear interpolation && yuv2rgb in a single step:
 				int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
@@ -1175,19 +1197,41 @@
 				int Cr= yuvtab_3343[V];
 
 				((uint16_t*)dest)[i] =
-					clip_table16b[(Y1 + Cb) >>13] |
-					clip_table16g[(Y1 + Cg) >>13] |
-					clip_table16r[(Y1 + Cr) >>13];
+					clip_table16b[(Y1 + Cb + ditherb1) >>13] |
+					clip_table16g[(Y1 + Cg + ditherg1) >>13] |
+					clip_table16r[(Y1 + Cr + ditherr1) >>13];
 
 				((uint16_t*)dest)[i+1] =
-					clip_table16b[(Y2 + Cb) >>13] |
-					clip_table16g[(Y2 + Cg) >>13] |
-					clip_table16r[(Y2 + Cr) >>13];
+					clip_table16b[(Y2 + Cb + ditherb2) >>13] |
+					clip_table16g[(Y2 + Cg + ditherg2) >>13] |
+					clip_table16r[(Y2 + Cr + ditherr2) >>13];
 			}
 		}
 		else if(dstFormat==IMGFMT_BGR15)
 		{
 			int i;
+#ifdef DITHER1XBPP
+			static int ditherb1=1<<14;
+			static int ditherg1=1<<14;
+			static int ditherr1=2<<14;
+			static int ditherb2=3<<14;
+			static int ditherg2=3<<14;
+			static int ditherr2=0<<14;
+
+			ditherb1 ^= (1^2)<<14;
+			ditherg1 ^= (1^2)<<14;
+			ditherr1 ^= (1^2)<<14;
+			ditherb2 ^= (3^0)<<14;
+			ditherg2 ^= (3^0)<<14;
+			ditherr2 ^= (3^0)<<14;
+#else
+			const int ditherb1=0;
+			const int ditherg1=0;
+			const int ditherr1=0;
+			const int ditherb2=0;
+			const int ditherg2=0;
+			const int ditherr2=0;
+#endif
 			for(i=0; i<dstW-1; i+=2){
 				// vertical linear interpolation && yuv2rgb in a single step:
 				int Y1=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>19)];
@@ -1200,14 +1244,14 @@
 				int Cr= yuvtab_3343[V];
 
 				((uint16_t*)dest)[i] =
-					clip_table15b[(Y1 + Cb) >>13] |
-					clip_table15g[(Y1 + Cg) >>13] |
-					clip_table15r[(Y1 + Cr) >>13];
+					clip_table15b[(Y1 + Cb + ditherb1) >>13] |
+					clip_table15g[(Y1 + Cg + ditherg1) >>13] |
+					clip_table15r[(Y1 + Cr + ditherr1) >>13];
 
 				((uint16_t*)dest)[i+1] =
-					clip_table15b[(Y2 + Cb) >>13] |
-					clip_table15g[(Y2 + Cg) >>13] |
-					clip_table15r[(Y2 + Cr) >>13];
+					clip_table15b[(Y2 + Cb + ditherb2) >>13] |
+					clip_table15g[(Y2 + Cg + ditherg2) >>13] |
+					clip_table15r[(Y2 + Cr + ditherr2) >>13];
 			}
 		}
 #endif
@@ -1397,6 +1441,28 @@
 	else if(dstFormat==IMGFMT_BGR16)
 	{
 		int i;
+#ifdef DITHER1XBPP
+		static int ditherb1=1<<14;
+		static int ditherg1=1<<13;
+		static int ditherr1=2<<14;
+		static int ditherb2=3<<14;
+		static int ditherg2=3<<13;
+		static int ditherr2=0<<14;
+
+		ditherb1 ^= (1^2)<<14;
+		ditherg1 ^= (1^2)<<13;
+		ditherr1 ^= (1^2)<<14;
+		ditherb2 ^= (3^0)<<14;
+		ditherg2 ^= (3^0)<<13;
+		ditherr2 ^= (3^0)<<14;
+#else
+		const int ditherb1=0;
+		const int ditherg1=0;
+		const int ditherr1=0;
+		const int ditherb2=0;
+		const int ditherg2=0;
+		const int ditherr2=0;
+#endif
 		for(i=0; i<dstW-1; i+=2){
 			// vertical linear interpolation && yuv2rgb in a single step:
 			int Y1=yuvtab_2568[buf0[i]>>7];
@@ -1409,19 +1475,41 @@
 			int Cr= yuvtab_3343[V];
 
 			((uint16_t*)dest)[i] =
-				clip_table16b[(Y1 + Cb) >>13] |
-				clip_table16g[(Y1 + Cg) >>13] |
-				clip_table16r[(Y1 + Cr) >>13];
+				clip_table16b[(Y1 + Cb + ditherb1) >>13] |
+				clip_table16g[(Y1 + Cg + ditherg1) >>13] |
+				clip_table16r[(Y1 + Cr + ditherr1) >>13];
 
 			((uint16_t*)dest)[i+1] =
-				clip_table16b[(Y2 + Cb) >>13] |
-				clip_table16g[(Y2 + Cg) >>13] |
-				clip_table16r[(Y2 + Cr) >>13];
+				clip_table16b[(Y2 + Cb + ditherb2) >>13] |
+				clip_table16g[(Y2 + Cg + ditherg2) >>13] |
+				clip_table16r[(Y2 + Cr + ditherr2) >>13];
 		}
 	}
 	else if(dstFormat==IMGFMT_BGR15)
 	{
 		int i;
+#ifdef DITHER1XBPP
+		static int ditherb1=1<<14;
+		static int ditherg1=1<<14;
+		static int ditherr1=2<<14;
+		static int ditherb2=3<<14;
+		static int ditherg2=3<<14;
+		static int ditherr2=0<<14;
+
+		ditherb1 ^= (1^2)<<14;
+		ditherg1 ^= (1^2)<<14;
+		ditherr1 ^= (1^2)<<14;
+		ditherb2 ^= (3^0)<<14;
+		ditherg2 ^= (3^0)<<14;
+		ditherr2 ^= (3^0)<<14;
+#else
+		const int ditherb1=0;
+		const int ditherg1=0;
+		const int ditherr1=0;
+		const int ditherb2=0;
+		const int ditherg2=0;
+		const int ditherr2=0;
+#endif
 		for(i=0; i<dstW-1; i+=2){
 			// vertical linear interpolation && yuv2rgb in a single step:
 			int Y1=yuvtab_2568[buf0[i]>>7];
@@ -1434,14 +1522,14 @@
 			int Cr= yuvtab_3343[V];
 
 			((uint16_t*)dest)[i] =
-				clip_table15b[(Y1 + Cb) >>13] |
-				clip_table15g[(Y1 + Cg) >>13] |
-				clip_table15r[(Y1 + Cr) >>13];
+				clip_table15b[(Y1 + Cb + ditherb1) >>13] |
+				clip_table15g[(Y1 + Cg + ditherg1) >>13] |
+				clip_table15r[(Y1 + Cr + ditherr1) >>13];
 
 			((uint16_t*)dest)[i+1] =
-				clip_table15b[(Y2 + Cb) >>13] |
-				clip_table15g[(Y2 + Cg) >>13] |
-				clip_table15r[(Y2 + Cr) >>13];
+				clip_table15b[(Y2 + Cb + ditherb2) >>13] |
+				clip_table15g[(Y2 + Cg + ditherg2) >>13] |
+				clip_table15r[(Y2 + Cr + ditherr2) >>13];
 		}
 	}
 #endif