changeset 4467:9512d6832b38

YUY2, BGR24, BGR32 input support (no mmx yet)
author michael
date Fri, 01 Feb 2002 19:25:09 +0000
parents bc7b7102c147
children 76fb5d33e6eb
files postproc/swscale.c postproc/swscale.h postproc/swscale_template.c
diffstat 3 files changed, 240 insertions(+), 34 deletions(-) [+]
line wrap: on
line diff
--- a/postproc/swscale.c	Fri Feb 01 18:16:30 2002 +0000
+++ b/postproc/swscale.c	Fri Feb 01 19:25:09 2002 +0000
@@ -17,7 +17,7 @@
 */
 
 /*
-  supported Input formats: YV12, I420, IYUV (grayscale soon too)
+  supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24 (grayscale soon too)
   supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32 (grayscale soon too)
   BGR15/16 support dithering
 */
@@ -43,7 +43,7 @@
 //#undef ARCH_X86
 #define DITHER1XBPP
 
-#define RET 0xC3 //near return opcode
+#define RET 0xC3 //near return opcode for X86
 
 #ifdef MP_DEBUG
 #define ASSERT(x) if(!(x)) { printf("ASSERT " #x " failed\n"); *((int*)0)=0; }
@@ -58,10 +58,22 @@
 #endif
 
 //FIXME replace this with something faster
-#define isYUV(x)       ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
 #define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
+#define isYUV(x)       ((x)==IMGFMT_YUY2 || isPlanarYUV(x))
 #define isHalfChrV(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
-#define isHalfChrH(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
+#define isHalfChrH(x)  ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
+#define isPacked(x)    ((x)==IMGFMT_YUY2 || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24)
+
+#define RGB2YUV_SHIFT 8
+#define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
+#define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
+#define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
+#define GY ((int)( 0.504*(1<<RGB2YUV_SHIFT)+0.5))
+#define GV ((int)(-0.368*(1<<RGB2YUV_SHIFT)+0.5))
+#define GU ((int)(-0.291*(1<<RGB2YUV_SHIFT)+0.5))
+#define RY ((int)( 0.257*(1<<RGB2YUV_SHIFT)+0.5))
+#define RV ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
+#define RU ((int)(-0.148*(1<<RGB2YUV_SHIFT)+0.5))
 
 extern int verbose; // defined in mplayer.c
 /*
@@ -80,7 +92,7 @@
 add support for packed pixel yuv input & output
 add support for Y8 input & output
 add BGR4 output support
-add BGR32 / BGR24 input support
+write special BGR->BGR scaler
 */
 
 #define ABS(a) ((a) > 0 ? (a) : (-(a)))
@@ -1105,7 +1117,8 @@
 	/* sanity check */
 	if(srcW<4 || srcH<1 || dstW<8 || dstH<1) return NULL; //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
 	
-	if(srcFormat!=IMGFMT_YV12 && srcFormat!=IMGFMT_I420 && srcFormat!=IMGFMT_IYUV) return NULL;
+//	if(!isSupportedIn(srcFormat)) return NULL;
+//	if(!isSupportedOut(dstFormat)) return NULL;
 
 	if(!dstFilter) dstFilter= &dummyFilter;
 	if(!srcFilter) srcFilter= &dummyFilter;
@@ -1135,6 +1148,30 @@
 	else
 		c->canMMX2BeUsed=0;
 
+
+	/* dont use full vertical UV input/internaly if the source doesnt even have it */
+	if(isHalfChrV(srcFormat)) c->flags= flags= flags&(~SWS_FULL_CHR_V);
+	/* dont use full horizontal UV input if the source doesnt even have it */
+	if(isHalfChrH(srcFormat)) c->flags= flags= flags&(~SWS_FULL_CHR_H_INP);
+	/* dont use full horizontal UV internally if the destination doesnt even have it */
+	if(isHalfChrH(dstFormat)) c->flags= flags= flags&(~SWS_FULL_CHR_H_INT);
+
+	if(flags&SWS_FULL_CHR_H_INP)	c->chrSrcW= srcW;
+	else				c->chrSrcW= (srcW+1)>>1;
+
+	if(flags&SWS_FULL_CHR_H_INT)	c->chrDstW= dstW;
+	else				c->chrDstW= (dstW+1)>>1;
+
+	if(flags&SWS_FULL_CHR_V)	c->chrSrcH= srcH;
+	else				c->chrSrcH= (srcH+1)>>1;
+
+	if(isHalfChrV(dstFormat))	c->chrDstH= (dstH+1)>>1;
+	else				c->chrDstH= dstH;
+
+	c->chrXInc= ((c->chrSrcW<<16) + (c->chrDstW>>1))/c->chrDstW;
+	c->chrYInc= ((c->chrSrcH<<16) + (c->chrDstH>>1))/c->chrDstH;
+
+
 	// match pixel 0 of the src to pixel 0 of dst and match pixel n-2 of src to pixel n-2 of dst
 	// but only for the FAST_BILINEAR mode otherwise do correct scaling
 	// n-2 is the last chrominance sample available
@@ -1143,22 +1180,19 @@
 	// first and last pixel
 	if(flags&SWS_FAST_BILINEAR)
 	{
-		if(c->canMMX2BeUsed) 	c->lumXInc+= 20;
+		if(c->canMMX2BeUsed)
+		{
+			c->lumXInc+= 20;
+			c->chrXInc+= 20;
+		}
 		//we dont use the x86asm scaler if mmx is available
-		else if(cpuCaps.hasMMX)	c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
+		else if(cpuCaps.hasMMX)
+		{
+			c->lumXInc = ((srcW-2)<<16)/(dstW-2) - 20;
+			c->chrXInc = ((c->chrSrcW-2)<<16)/(c->chrDstW-2) - 20;
+		}
 	}
 
-	/* set chrXInc & chrDstW */
-	if((flags&SWS_FULL_UV_IPOL) && !isHalfChrH(dstFormat))
-		c->chrXInc= c->lumXInc>>1, c->chrDstW= dstW;
-	else
-		c->chrXInc= c->lumXInc,    c->chrDstW= (dstW+1)>>1;
-
-	/* set chrYInc & chrDstH */
-	if(isHalfChrV(dstFormat))
-		c->chrYInc= c->lumYInc,    c->chrDstH= (dstH+1)>>1;
-	else	c->chrYInc= c->lumYInc>>1, c->chrDstH= dstH;
-
 	/* precalculate horizontal scaler filter coefficients */
 	{
 		const int filterAlign= cpuCaps.hasMMX ? 4 : 1;
@@ -1246,6 +1280,8 @@
 			fprintf(stderr, "\nSwScaler: BILINEAR scaler ");
 		else if(flags&SWS_BICUBIC)
 			fprintf(stderr, "\nSwScaler: BICUBIC scaler ");
+		else if(flags&SWS_X)
+			fprintf(stderr, "\nSwScaler: Experimental scaler ");
 		else if(flags&SWS_POINT)
 			fprintf(stderr, "\nSwScaler: Nearest Neighbor / POINT scaler ");
 		else if(flags&SWS_AREA)
@@ -1344,7 +1380,14 @@
 
 		printf("SwScaler: %dx%d -> %dx%d\n", srcW, srcH, dstW, dstH);
 	}
-
+	if((flags & SWS_PRINT_INFO) && verbose>1)
+	{
+		printf("SwScaler:Lum srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
+			c->srcW, c->srcH, c->dstW, c->dstH, c->lumXInc, c->lumYInc);
+		printf("SwScaler:Chr srcW=%d srcH=%d dstW=%d dstH=%d xInc=%d yInc=%d\n",
+			c->chrSrcW, c->chrSrcH, c->chrDstW, c->chrDstH, c->chrXInc, c->chrYInc);
+	}
+	
 	return c;
 }
 
--- a/postproc/swscale.h	Fri Feb 01 18:16:30 2002 +0000
+++ b/postproc/swscale.h	Fri Feb 01 19:25:09 2002 +0000
@@ -23,7 +23,15 @@
 #define SWS_X        8
 #define SWS_POINT    0x10
 #define SWS_AREA     0x20
-#define SWS_FULL_UV_IPOL 0x100
+
+//the following 4 flags are not completly implemented
+//internal chrominace subsamling info
+#define SWS_FULL_CHR_V		0x100
+#define SWS_FULL_CHR_H_INT	0x200
+//input subsampling info
+#define SWS_FULL_CHR_H_INP	0x400
+#define SWS_DIRECT_BGR		0x800
+
 #define SWS_PRINT_INFO 0x1000
 
 #define SWS_MAX_REDUCE_CUTOFF 0.002
@@ -31,7 +39,7 @@
 /* this struct should be aligned on at least 32-byte boundary */
 typedef struct{
 	int srcW, srcH, dstW, dstH;
-	int chrDstW, chrDstH;
+	int chrSrcW, chrSrcH, chrDstW, chrDstH;
 	int lumXInc, chrXInc;
 	int lumYInc, chrYInc;
 	int dstFormat, srcFormat;
@@ -50,6 +58,7 @@
 // Contain simply the values from v(Lum|Chr)Filter just nicely packed for mmx
 	int16_t  *lumMmxFilter;
 	int16_t  *chrMmxFilter;
+	uint8_t formatConvBuffer[4000]; //FIXME dynamic alloc, but we have to change alot of code for this to be usefull
 
 	int hLumFilterSize;
 	int hChrFilterSize;
--- a/postproc/swscale_template.c	Fri Feb 01 18:16:30 2002 +0000
+++ b/postproc/swscale_template.c	Fri Feb 01 19:25:09 2002 +0000
@@ -841,7 +841,7 @@
 	int yalpha1=yalpha^4095;
 	int uvalpha1=uvalpha^4095;
 
-	if(flags&SWS_FULL_UV_IPOL)
+	if(flags&SWS_FULL_CHR_H_INT)
 	{
 
 #ifdef HAVE_MMX
@@ -1267,7 +1267,7 @@
 	int uvalpha1=uvalpha^4095;
 	const int yalpha1=0;
 
-	if(flags&SWS_FULL_UV_IPOL)
+	if(flags&SWS_FULL_CHR_H_INT)
 	{
 		RENAME(yuv2rgb2)(buf0, buf0, uvbuf0, uvbuf1, dest, dstW, 0, uvalpha, dstFormat, flags);
 		return;
@@ -1535,6 +1535,96 @@
 #endif
 }
 
+static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width)
+{
+#ifdef HAVE_MMXFIXME
+#else
+	int i;
+	for(i=0; i<width; i++)
+		dst[i]= src[2*i];
+#endif
+}
+
+static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
+{
+#ifdef HAVE_MMXFIXME
+#else
+	int i;
+	for(i=0; i<width; i++)
+	{
+		dstU[i]= (src1[4*i + 1] + src2[4*i + 1])>>1;
+		dstV[i]= (src1[4*i + 3] + src2[4*i + 3])>>1;
+	}
+#endif
+}
+
+static inline void RENAME(bgr32ToY)(uint8_t *dst, uint8_t *src, int width)
+{
+#ifdef HAVE_MMXFIXME
+#else
+	int i;
+	for(i=0; i<width; i++)
+	{
+		int b= src[i*4+0];
+		int g= src[i*4+1];
+		int r= src[i*4+2];
+
+		dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+	}
+#endif
+}
+
+static inline void RENAME(bgr32ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
+{
+#ifdef HAVE_MMXFIXME
+#else
+	int i;
+	for(i=0; i<width; i++)
+	{
+		int b= src1[8*i + 0] + src1[8*i + 4] + src2[8*i + 0] + src2[8*i + 4];
+		int g= src1[8*i + 1] + src1[8*i + 5] + src2[8*i + 1] + src2[8*i + 5];
+		int r= src1[8*i + 2] + src1[8*i + 6] + src2[8*i + 2] + src2[8*i + 6];
+
+		dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
+		dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
+	}
+#endif
+}
+
+static inline void RENAME(bgr24ToY)(uint8_t *dst, uint8_t *src, int width)
+{
+#ifdef HAVE_MMXFIXME
+#else
+	int i;
+	for(i=0; i<width; i++)
+	{
+		int b= src[i*3+0];
+		int g= src[i*3+1];
+		int r= src[i*3+2];
+
+		dst[i]= ((RY*r + GY*g + BY*b)>>RGB2YUV_SHIFT) + 16;
+	}
+#endif
+}
+
+static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
+{
+#ifdef HAVE_MMXFIXME
+#else
+	int i;
+	for(i=0; i<width; i++)
+	{
+		int b= src1[6*i + 0] + src1[6*i + 3] + src2[6*i + 0] + src2[6*i + 3];
+		int g= src1[6*i + 1] + src1[6*i + 4] + src2[6*i + 1] + src2[6*i + 4];
+		int r= src1[6*i + 2] + src1[6*i + 5] + src2[6*i + 2] + src2[6*i + 5];
+
+		dstU[i]= ((RU*r + GU*g + BU*b)>>(RGB2YUV_SHIFT+2)) + 128;
+		dstV[i]= ((RV*r + GV*g + BV*b)>>(RGB2YUV_SHIFT+2)) + 128;
+	}
+#endif
+}
+
+
 // Bilinear / Bicubic scaling
 static inline void RENAME(hScale)(int16_t *dst, int dstW, uint8_t *src, int srcW, int xInc,
 				  int16_t *filter, int16_t *filterPos, int filterSize)
@@ -1699,8 +1789,25 @@
       // *** horizontal scale Y line to temp buffer
 static inline void RENAME(hyscale)(uint16_t *dst, int dstWidth, uint8_t *src, int srcW, int xInc,
 				   int flags, int canMMX2BeUsed, int16_t *hLumFilter,
-				   int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode)
+				   int16_t *hLumFilterPos, int hLumFilterSize, void *funnyYCode, 
+				   int srcFormat, uint8_t *formatConvBuffer)
 {
+    if(srcFormat==IMGFMT_YUY2)
+    {
+	RENAME(yuy2ToY)(formatConvBuffer, src, srcW);
+	src= formatConvBuffer;
+    }
+    else if(srcFormat==IMGFMT_BGR32)
+    {
+	RENAME(bgr32ToY)(formatConvBuffer, src, srcW);
+	src= formatConvBuffer;
+    }
+    else if(srcFormat==IMGFMT_BGR24)
+    {
+	RENAME(bgr24ToY)(formatConvBuffer, src, srcW);
+	src= formatConvBuffer;
+    }
+
 #ifdef HAVE_MMX
 	// use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one)
     if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
@@ -1826,8 +1933,28 @@
 
 inline static void RENAME(hcscale)(uint16_t *dst, int dstWidth, uint8_t *src1, uint8_t *src2,
 				   int srcW, int xInc, int flags, int canMMX2BeUsed, int16_t *hChrFilter,
-				   int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode)
+				   int16_t *hChrFilterPos, int hChrFilterSize, void *funnyUVCode,
+				   int srcFormat, uint8_t *formatConvBuffer)
 {
+    if(srcFormat==IMGFMT_YUY2)
+    {
+	RENAME(yuy2ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
+	src1= formatConvBuffer;
+	src2= formatConvBuffer+2048;
+    }
+    else if(srcFormat==IMGFMT_BGR32)
+    {
+	RENAME(bgr32ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
+	src1= formatConvBuffer;
+	src2= formatConvBuffer+2048;
+    }
+    else if(srcFormat==IMGFMT_BGR24)
+    {
+	RENAME(bgr24ToUV)(formatConvBuffer, formatConvBuffer+2048, src1, src2, srcW);
+	src1= formatConvBuffer;
+	src2= formatConvBuffer+2048;
+    }
+
 #ifdef HAVE_MMX
 	// use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one)
     if(!(flags&SWS_FAST_BILINEAR) || (!canMMX2BeUsed))
@@ -1974,7 +2101,7 @@
    }
 }
 
-static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStride[], int srcSliceY,
+static void RENAME(swScale)(SwsContext *c, uint8_t* srcParam[], int srcStrideParam[], int srcSliceY,
              int srcSliceH, uint8_t* dstParam[], int dstStride[]){
 
 	/* load a few things into local vars to make the code more readable? and faster */
@@ -2007,6 +2134,7 @@
 	const int vChrBufSize= c->vChrBufSize;
 	uint8_t *funnyYCode= c->funnyYCode;
 	uint8_t *funnyUVCode= c->funnyUVCode;
+	uint8_t *formatConvBuffer= c->formatConvBuffer;
 
 	/* vars whch will change and which we need to storw back in the context */
 	int dstY= c->dstY;
@@ -2014,6 +2142,7 @@
 	int chrBufIndex= c->chrBufIndex;
 	int lastInLumBuf= c->lastInLumBuf;
 	int lastInChrBuf= c->lastInChrBuf;
+	int srcStride[3];
 	uint8_t *src[3];
 	uint8_t *dst[3];
 	
@@ -2021,11 +2150,33 @@
 		src[0]= srcParam[0];
 		src[1]= srcParam[2];
 		src[2]= srcParam[1];
-		
-	}else{
+		srcStride[0]= srcStrideParam[0];
+		srcStride[1]= srcStrideParam[2];
+		srcStride[2]= srcStrideParam[1];
+	}
+	else if(c->srcFormat==IMGFMT_YV12){
 		src[0]= srcParam[0];
 		src[1]= srcParam[1];
 		src[2]= srcParam[2];
+		srcStride[0]= srcStrideParam[0];
+		srcStride[1]= srcStrideParam[1];
+		srcStride[2]= srcStrideParam[2];
+	}
+	else if(isPacked(c->srcFormat)){
+		src[0]=
+		src[1]=
+		src[2]= srcParam[0];
+		srcStride[0]= srcStrideParam[0];
+		srcStride[1]=
+		srcStride[2]= srcStrideParam[0]<<1;
+	}
+	else if(c->srcFormat==IMGFMT_Y8){
+		src[0]= srcParam[0];
+		src[1]=
+		src[2]= NULL;
+		srcStride[0]= srcStrideParam[0];
+		srcStride[1]=
+		srcStride[2]= 0;
 	}
 
 	if((c->dstFormat == IMGFMT_IYUV) || (c->dstFormat == IMGFMT_I420)){
@@ -2038,6 +2189,7 @@
 		dst[1]= dstParam[1];
 		dst[2]= dstParam[2];
 	}
+	
 
 	if(dstStride[0]%8 !=0 || dstStride[1]%8 !=0 || dstStride[2]%8 !=0)
 	{
@@ -2050,10 +2202,12 @@
 		}
 	}
 
+	/* Note the user might start scaling the picture in the middle so this will not get executed
+	   this is not really intended but works currently, so ppl might do it */
 	if(srcSliceY ==0){
 		lumBufIndex=0;
 		chrBufIndex=0;
-		dstY=0;
+		dstY=0;	
 		lastInLumBuf= -1;
 		lastInChrBuf= -1;
 	}
@@ -2091,7 +2245,7 @@
 //				printf("%d %d\n", lumBufIndex, vLumBufSize);
 				RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
 						flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
-						funnyYCode);
+						funnyYCode, c->srcFormat, formatConvBuffer);
 				lastInLumBuf++;
 			}
 			while(lastInChrBuf < lastChrSrcY)
@@ -2105,7 +2259,7 @@
 				//FIXME replace parameters through context struct (some at least)
 				RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc,
 						flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
-						funnyUVCode);
+						funnyUVCode, c->srcFormat, formatConvBuffer);
 				lastInChrBuf++;
 			}
 			//wrap buf index around to stay inside the ring buffer
@@ -2129,7 +2283,7 @@
 				ASSERT(lastInLumBuf + 1 - srcSliceY >= 0)
 				RENAME(hyscale)(lumPixBuf[ lumBufIndex ], dstW, s, srcW, lumXInc,
 						flags, canMMX2BeUsed, hLumFilter, hLumFilterPos, hLumFilterSize,
-						funnyYCode);
+						funnyYCode, c->srcFormat, formatConvBuffer);
 				lastInLumBuf++;
 			}
 			while(lastInChrBuf+1 < ((srcSliceY + srcSliceH)>>1))
@@ -2142,7 +2296,7 @@
 				ASSERT(lastInChrBuf + 1 - (srcSliceY>>1) >= 0)
 				RENAME(hcscale)(chrPixBuf[ chrBufIndex ], chrDstW, src1, src2, (srcW+1)>>1, chrXInc,
 						flags, canMMX2BeUsed, hChrFilter, hChrFilterPos, hChrFilterSize,
-						funnyUVCode);
+						funnyUVCode, c->srcFormat, formatConvBuffer);
 				lastInChrBuf++;
 			}
 			//wrap buf index around to stay inside the ring buffer