changeset 4481:b8ec9cc1b2c5

MMX(2) optimized YUY2 input avoid duplicate checks for formats by changeing them (Y8->Y800, IYUV->I420)
author michael
date Sat, 02 Feb 2002 18:18:58 +0000
parents 0919d2ec5e22
children 9c6ab57cfdde
files postproc/swscale.c postproc/swscale_template.c
diffstat 2 files changed, 71 insertions(+), 13 deletions(-) [+]
line wrap: on
line diff
--- a/postproc/swscale.c	Sat Feb 02 18:09:35 2002 +0000
+++ b/postproc/swscale.c	Sat Feb 02 18:18:58 2002 +0000
@@ -17,7 +17,7 @@
 */
 
 /*
-  supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24 (grayscale soon too)
+  supported Input formats: YV12, I420, IYUV, YUY2, BGR32, BGR24, Y8, Y800
   supported output formats: YV12, I420, IYUV, BGR15, BGR16, BGR24, BGR32 (grayscale soon too)
   BGR15/16 support dithering
 */
@@ -58,13 +58,19 @@
 #endif
 
 //FIXME replace this with something faster
-#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
+#define isPlanarYUV(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420)
 #define isYUV(x)       ((x)==IMGFMT_YUY2 || isPlanarYUV(x))
-#define isHalfChrV(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
-#define isHalfChrH(x)  ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_IYUV)
+#define isHalfChrV(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420)
+#define isHalfChrH(x)  ((x)==IMGFMT_YUY2 || (x)==IMGFMT_YV12 || (x)==IMGFMT_I420)
 #define isPacked(x)    ((x)==IMGFMT_YUY2 || (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24)
+#define isGray(x)      ((x)==IMGFMT_Y800)
+#define isSupportedIn(x)  ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 || (x)==IMGFMT_YUY2 \
+			|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24\
+			|| (x)==IMGFMT_Y800)
+#define isSupportedOut(x) ((x)==IMGFMT_YV12 || (x)==IMGFMT_I420 \
+			|| (x)==IMGFMT_BGR32|| (x)==IMGFMT_BGR24|| (x)==IMGFMT_BGR16|| (x)==IMGFMT_BGR15)
 
-#define RGB2YUV_SHIFT 8
+#define RGB2YUV_SHIFT 16
 #define BY ((int)( 0.098*(1<<RGB2YUV_SHIFT)+0.5))
 #define BV ((int)(-0.071*(1<<RGB2YUV_SHIFT)+0.5))
 #define BU ((int)( 0.439*(1<<RGB2YUV_SHIFT)+0.5))
@@ -90,7 +96,8 @@
 write special vertical cubic upscale version
 Optimize C code (yv12 / minmax)
 add support for packed pixel yuv input & output
-add support for Y8 input & output
+add support for Y8 output
+optimize bgr24 & bgr32
 add BGR4 output support
 write special BGR->BGR scaler
 */
@@ -118,6 +125,7 @@
 static uint64_t __attribute__((aligned(8))) bm00001111=0x00000000FFFFFFFFLL;
 static uint64_t __attribute__((aligned(8))) bm00000111=0x0000000000FFFFFFLL;
 static uint64_t __attribute__((aligned(8))) bm11111000=0xFFFFFFFFFF000000LL;
+static uint64_t __attribute__((aligned(8))) bm01010101=0x00FF00FF00FF00FFLL;
 
 static volatile uint64_t __attribute__((aligned(8))) b5Dither;
 static volatile uint64_t __attribute__((aligned(8))) g5Dither;
@@ -198,7 +206,7 @@
 {
  volatile int i= yCoeff+vrCoeff+ubCoeff+vgCoeff+ugCoeff+bF8+bFC+w400+w80+w10+
  bm00001111+bm00000111+bm11111000+b16Mask+g16Mask+r16Mask+b15Mask+g15Mask+r15Mask+asm_yalpha1+ asm_uvalpha1+
- M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0];
+ M24A+M24B+M24C+w02 + b5Dither+g5Dither+r5Dither+g6Dither+dither4[0]+dither8[0]+bm01010101;
  if(i) i=0;
 }
 #endif
@@ -1114,11 +1122,15 @@
 
 	if(swScale==NULL) globalInit();
 
+	/* avoid dupplicate Formats, so we dont need to check to much */
+	if(srcFormat==IMGFMT_IYUV) srcFormat=IMGFMT_I420;
+	if(srcFormat==IMGFMT_Y8)   srcFormat=IMGFMT_Y800;
+	
+	if(!isSupportedIn(srcFormat)) return NULL;
+	if(!isSupportedOut(dstFormat)) return NULL;
+
 	/* sanity check */
 	if(srcW<4 || srcH<1 || dstW<8 || dstH<1) return NULL; //FIXME check if these are enough and try to lowwer them after fixing the relevant parts of the code
-	
-//	if(!isSupportedIn(srcFormat)) return NULL;
-//	if(!isSupportedOut(dstFormat)) return NULL;
 
 	if(!dstFilter) dstFilter= &dummyFilter;
 	if(!srcFilter) srcFilter= &dummyFilter;
--- a/postproc/swscale_template.c	Sat Feb 02 18:09:35 2002 +0000
+++ b/postproc/swscale_template.c	Sat Feb 02 18:18:58 2002 +0000
@@ -1535,9 +1535,26 @@
 #endif
 }
 
+//FIXME yuy2* can read upto 7 samples to much
+
 static inline void RENAME(yuy2ToY)(uint8_t *dst, uint8_t *src, int width)
 {
-#ifdef HAVE_MMXFIXME
+#ifdef HAVE_MMX
+	asm volatile(
+		"movq "MANGLE(bm01010101)", %%mm2\n\t"
+		"movl %0, %%eax			\n\t"
+		"1:				\n\t"
+		"movq (%1, %%eax,2), %%mm0	\n\t"
+		"movq 8(%1, %%eax,2), %%mm1	\n\t"
+		"pand %%mm2, %%mm0		\n\t"
+		"pand %%mm2, %%mm1		\n\t"
+		"packuswb %%mm1, %%mm0		\n\t"
+		"movq %%mm0, (%2, %%eax)	\n\t"
+		"addl $8, %%eax			\n\t"
+		" js 1b				\n\t"
+		: : "g" (-width), "r" (src+width*2), "r" (dst+width)
+		: "%eax"
+	);
 #else
 	int i;
 	for(i=0; i<width; i++)
@@ -1547,7 +1564,32 @@
 
 static inline void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV, uint8_t *src1, uint8_t *src2, int width)
 {
-#ifdef HAVE_MMXFIXME
+#if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
+	asm volatile(
+		"movq "MANGLE(bm01010101)", %%mm4\n\t"
+		"movl %0, %%eax			\n\t"
+		"1:				\n\t"
+		"movq (%1, %%eax,4), %%mm0	\n\t"
+		"movq 8(%1, %%eax,4), %%mm1	\n\t"
+		"movq (%2, %%eax,4), %%mm2	\n\t"
+		"movq 8(%2, %%eax,4), %%mm3	\n\t"
+		PAVGB(%%mm2, %%mm0)
+		PAVGB(%%mm3, %%mm1)
+		"psrlw $8, %%mm0		\n\t"
+		"psrlw $8, %%mm1		\n\t"
+		"packuswb %%mm1, %%mm0		\n\t"
+		"movq %%mm0, %%mm1		\n\t"
+		"psrlw $8, %%mm0		\n\t"
+		"pand %%mm4, %%mm1		\n\t"
+		"packuswb %%mm0, %%mm0		\n\t"
+		"packuswb %%mm1, %%mm1		\n\t"
+		"movd %%mm0, (%4, %%eax)	\n\t"
+		"movd %%mm1, (%3, %%eax)	\n\t"
+		"addl $4, %%eax			\n\t"
+		" js 1b				\n\t"
+		: : "g" (-width), "r" (src1+width*4), "r" (src2+width*4), "r" (dstU+width), "r" (dstV+width)
+		: "%eax"
+	);
 #else
 	int i;
 	for(i=0; i<width; i++)
@@ -1954,6 +1996,10 @@
 	src1= formatConvBuffer;
 	src2= formatConvBuffer+2048;
     }
+    else if(isGray(srcFormat))
+    {
+    	return;
+    }
 
 #ifdef HAVE_MMX
 	// use the new MMX scaler if th mmx2 cant be used (its faster than the x86asm one)
@@ -2170,7 +2216,7 @@
 		srcStride[1]=
 		srcStride[2]= srcStrideParam[0]<<1;
 	}
-	else if(c->srcFormat==IMGFMT_Y8){
+	else if(isGray(c->srcFormat)){
 		src[0]= srcParam[0];
 		src[1]=
 		src[2]= NULL;