changeset 9256:93e8d1655797

yuv9 -> yv12 bugfixes
author michael
date Mon, 03 Feb 2003 18:45:25 +0000
parents c82e31b31194
children ee8fad8f923a
files postproc/rgb2rgb_template.c
diffstat 1 files changed, 52 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/postproc/rgb2rgb_template.c	Mon Feb 03 18:41:41 2003 +0000
+++ b/postproc/rgb2rgb_template.c	Mon Feb 03 18:45:25 2003 +0000
@@ -1619,16 +1619,20 @@
 {
 	int x,y;
 	
+	dst[0]= src[0];
+        
 	// first line
-	for(x=0; x<srcWidth; x++){
-		dst[2*x+0]=
-		dst[2*x+1]= src[x];
+	for(x=0; x<srcWidth-1; x++){
+		dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
+		dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
 	}
-	dst+= dstStride;
+	dst[2*srcWidth-1]= src[srcWidth-1];
+	
+        dst+= dstStride;
 
 	for(y=1; y<srcHeight; y++){
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
-		const int mmxSize= srcWidth;
+		const int mmxSize= srcWidth&~15;
 		asm volatile(
 			"movl %4, %%eax			\n\t"
 			"1:				\n\t"
@@ -1636,67 +1640,76 @@
 			"movq (%1, %%eax), %%mm1	\n\t"
 			"movq 1(%0, %%eax), %%mm2	\n\t"
 			"movq 1(%1, %%eax), %%mm3	\n\t"
-			"movq %%mm0, %%mm4		\n\t"
-			"movq %%mm1, %%mm5		\n\t"
-			PAVGB" %%mm3, %%mm0		\n\t"
-			PAVGB" %%mm3, %%mm0		\n\t"
-			PAVGB" %%mm4, %%mm3		\n\t"
-			PAVGB" %%mm4, %%mm3		\n\t"
-			PAVGB" %%mm2, %%mm1		\n\t"
-			PAVGB" %%mm2, %%mm1		\n\t"
-			PAVGB" %%mm5, %%mm2		\n\t"
-			PAVGB" %%mm5, %%mm2		\n\t"
-			"movq %%mm3, %%mm4		\n\t"
-			"movq %%mm2, %%mm5		\n\t"
-			"punpcklbw %%mm1, %%mm3		\n\t"
-			"punpckhbw %%mm1, %%mm4		\n\t"
-			"punpcklbw %%mm0, %%mm2		\n\t"
-			"punpckhbw %%mm0, %%mm5		\n\t"
+			"movq -1(%0, %%eax), %%mm4	\n\t"
+			"movq -1(%1, %%eax), %%mm5	\n\t"
+			PAVGB" %%mm0, %%mm5		\n\t"
+			PAVGB" %%mm0, %%mm3		\n\t"
+			PAVGB" %%mm0, %%mm5		\n\t"
+			PAVGB" %%mm0, %%mm3		\n\t"
+			PAVGB" %%mm1, %%mm4		\n\t"
+			PAVGB" %%mm1, %%mm2		\n\t"
+			PAVGB" %%mm1, %%mm4		\n\t"
+			PAVGB" %%mm1, %%mm2		\n\t"
+			"movq %%mm5, %%mm7		\n\t"
+			"movq %%mm4, %%mm6		\n\t"
+			"punpcklbw %%mm3, %%mm5		\n\t"
+			"punpckhbw %%mm3, %%mm7		\n\t"
+			"punpcklbw %%mm2, %%mm4		\n\t"
+			"punpckhbw %%mm2, %%mm6		\n\t"
 #if 1
-			MOVNTQ" %%mm3, (%2, %%eax, 2)	\n\t"
-			MOVNTQ" %%mm4, 8(%2, %%eax, 2)	\n\t"
-			MOVNTQ" %%mm2, (%3, %%eax, 2)	\n\t"
-			MOVNTQ" %%mm5, 8(%3, %%eax, 2)	\n\t"
+			MOVNTQ" %%mm5, (%2, %%eax, 2)	\n\t"
+			MOVNTQ" %%mm7, 8(%2, %%eax, 2)	\n\t"
+			MOVNTQ" %%mm4, (%3, %%eax, 2)	\n\t"
+			MOVNTQ" %%mm6, 8(%3, %%eax, 2)	\n\t"
 #else
-			"movq %%mm3, (%2, %%eax, 2)	\n\t"
-			"movq %%mm4, 8(%2, %%eax, 2)	\n\t"
-			"movq %%mm2, (%3, %%eax, 2)	\n\t"
-			"movq %%mm5, 8(%3, %%eax, 2)	\n\t"
+			"movq %%mm5, (%2, %%eax, 2)	\n\t"
+			"movq %%mm7, 8(%2, %%eax, 2)	\n\t"
+			"movq %%mm4, (%3, %%eax, 2)	\n\t"
+			"movq %%mm6, 8(%3, %%eax, 2)	\n\t"
 #endif
 			"addl $8, %%eax			\n\t"
 			" js 1b				\n\t"
-			:: "r" (src + mmxSize-1), "r" (src + srcStride + mmxSize-1),
+			:: "r" (src + mmxSize  ), "r" (src + srcStride + mmxSize  ),
 			   "r" (dst + mmxSize*2), "r" (dst + dstStride + mmxSize*2),
 			   "g" (-mmxSize)
 			: "%eax"
 
 		);
-		dst[0]= 
-		dst[dstStride]= src[0];
 #else
-		dst[0]= 
-		dst[dstStride]= src[0];
+		const int mmxSize=1;
+#endif
+		dst[0        ]= (3*src[0] +   src[srcStride])>>2;
+		dst[dstStride]= (  src[0] + 3*src[srcStride])>>2;
 
-		for(x=0; x<srcWidth-1; x++){
+		for(x=mmxSize-1; x<srcWidth-1; x++){
 			dst[2*x          +1]= (3*src[x+0] +   src[x+srcStride+1])>>2;
 			dst[2*x+dstStride+2]= (  src[x+0] + 3*src[x+srcStride+1])>>2;
 			dst[2*x+dstStride+1]= (  src[x+1] + 3*src[x+srcStride  ])>>2;
 			dst[2*x          +2]= (3*src[x+1] +   src[x+srcStride  ])>>2;
 		}
-#endif
-		dst[srcWidth*2 -1]= 
-		dst[srcWidth*2 -1 + dstStride]= src[srcWidth-1];
+		dst[srcWidth*2 -1            ]= (3*src[srcWidth-1] +   src[srcWidth-1 + srcStride])>>2;
+		dst[srcWidth*2 -1 + dstStride]= (  src[srcWidth-1] + 3*src[srcWidth-1 + srcStride])>>2;
 
 		dst+=dstStride*2;
 		src+=srcStride;
 	}
-	src-=srcStride;
 	
 	// last line
+#if 1
+	dst[0]= src[0];
+        
+	for(x=0; x<srcWidth-1; x++){
+		dst[2*x+1]= (3*src[x] +   src[x+1])>>2;
+		dst[2*x+2]= (  src[x] + 3*src[x+1])>>2;
+	}
+	dst[2*srcWidth-1]= src[srcWidth-1];
+#else
 	for(x=0; x<srcWidth; x++){
 		dst[2*x+0]=
 		dst[2*x+1]= src[x];
 	}
+#endif
+
 #ifdef HAVE_MMX
 asm volatile(   EMMS" \n\t"
         	SFENCE" \n\t"