changeset 9394:b58dcfbbca5a

-fPIC compileable
author michael
date Tue, 11 Feb 2003 15:06:44 +0000
parents 5f9c97070b56
children 2d651a218031
files postproc/rgb2rgb_template.c
diffstat 1 files changed, 28 insertions(+), 35 deletions(-) [+]
line wrap: on
line diff
--- a/postproc/rgb2rgb_template.c	Tue Feb 11 13:13:16 2003 +0000
+++ b/postproc/rgb2rgb_template.c	Tue Feb 11 15:06:44 2003 +0000
@@ -1409,7 +1409,7 @@
 			"addl $8, %%eax			\n\t"
 			"cmpl %4, %%eax			\n\t"
 			" jb 1b				\n\t"
-			::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "r" (chromWidth)
+			::"r"(dst), "r"(ysrc), "r"(usrc), "r"(vsrc), "g" (chromWidth)
 			: "%eax"
 		);
 #else
@@ -1590,7 +1590,7 @@
 			"addl $8, %%eax			\n\t"
 			"cmpl %4, %%eax			\n\t"
 			" jb 1b				\n\t"
-			::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth)
+			::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
 			: "memory", "%eax"
 		);
 
@@ -1620,7 +1620,7 @@
 			"cmpl %4, %%eax			\n\t"
 			" jb 1b				\n\t"
 
-			::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth)
+			::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
 			: "memory", "%eax"
 		);
 #else
@@ -1828,7 +1828,7 @@
 			"addl $8, %%eax			\n\t"
 			"cmpl %4, %%eax			\n\t"
 			" jb 1b				\n\t"
-			::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth)
+			::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
 			: "memory", "%eax"
 		);
 
@@ -1858,7 +1858,7 @@
 			"cmpl %4, %%eax			\n\t"
 			" jb 1b				\n\t"
 
-			::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "r" (chromWidth)
+			::"r"(src), "r"(ydst), "r"(udst), "r"(vdst), "g" (chromWidth)
 			: "memory", "%eax"
 		);
 #else
@@ -2385,32 +2385,24 @@
 			int srcStride1, int srcStride2,
 			int srcStride3, int dstStride)
 {
-    unsigned y,x,x2,w,h;
+    unsigned y,x,w,h;
     w=width/2; h=height;
-#ifdef HAVE_MMX
-    asm volatile(
-	PREFETCH" %0\n\t"
-	PREFETCH" %1\n\t"
-	PREFETCH" %2\n\t"
-	::"m"(*(src1+srcStride1)),"m"(*(src2+srcStride2)),"m"(*(src3+srcStride3)):"memory");
-#endif
     for(y=0;y<h;y++){
 	const uint8_t* yp=src1+srcStride1*y;
 	const uint8_t* up=src2+srcStride2*(y>>2);
 	const uint8_t* vp=src3+srcStride3*(y>>2);
 	uint8_t* d=dst+dstStride*y;
-	x2=0;
 	x=0;
 #ifdef HAVE_MMX
-	for(;x<w-7;x+=8,x2+=32)
+	for(;x<w-7;x+=8)
 	{
 	    asm volatile(
-		PREFETCH" 32%1\n\t"
-		PREFETCH" 32%2\n\t"
-		PREFETCH" 32%3\n\t"
-		"movq	%1, %%mm0\n\t"       /* Y0Y1Y2Y3Y4Y5Y6Y7 */
-		"movq	%2, %%mm1\n\t"       /* U0U1U2U3U4U5U6U7 */
-		"movq	%3, %%mm2\n\t"	     /* V0V1V2V3V4V5V6V7 */
+		PREFETCH" 32(%1, %0)\n\t"
+		PREFETCH" 32(%2, %0)\n\t"
+		PREFETCH" 32(%3, %0)\n\t"
+		"movq	(%1, %0, 4), %%mm0\n\t"       /* Y0Y1Y2Y3Y4Y5Y6Y7 */
+		"movq	(%2, %0), %%mm1\n\t"       /* U0U1U2U3U4U5U6U7 */
+		"movq	(%3, %0), %%mm2\n\t"	     /* V0V1V2V3V4V5V6V7 */
 		"movq	%%mm0, %%mm3\n\t"    /* Y0Y1Y2Y3Y4Y5Y6Y7 */
 		"movq	%%mm1, %%mm4\n\t"    /* U0U1U2U3U4U5U6U7 */
 		"movq	%%mm2, %%mm5\n\t"    /* V0V1V2V3V4V5V6V7 */
@@ -2423,41 +2415,42 @@
 		"punpcklbw %%mm2, %%mm1\n\t" /* U0V0 U0V0 U1V1 U1V1*/
 		"punpcklbw %%mm1, %%mm0\n\t" /* Y0U0 Y1V0 Y2U0 Y3V0*/
 		"punpckhbw %%mm1, %%mm3\n\t" /* Y4U1 Y5V1 Y6U1 Y7V1*/
-		MOVNTQ"	%%mm0, %0\n\t"
-		MOVNTQ"	%%mm3, 8%0\n\t"
+		MOVNTQ"	%%mm0, (%4, %0, 8)\n\t"
+		MOVNTQ"	%%mm3, 8(%4, %0, 8)\n\t"
 		
 		"punpckhbw %%mm2, %%mm6\n\t" /* U2V2 U2V2 U3V3 U3V3*/
-		"movq	8%1, %%mm0\n\t"
+		"movq	8(%1, %0, 4), %%mm0\n\t"
 		"movq	%%mm0, %%mm3\n\t"
 		"punpcklbw %%mm6, %%mm0\n\t" /* Y U2 Y V2 Y U2 Y V2*/
 		"punpckhbw %%mm6, %%mm3\n\t" /* Y U3 Y V3 Y U3 Y V3*/
-		MOVNTQ"	%%mm0, 16%0\n\t"
-		MOVNTQ"	%%mm3, 24%0\n\t"
+		MOVNTQ"	%%mm0, 16(%4, %0, 8)\n\t"
+		MOVNTQ"	%%mm3, 24(%4, %0, 8)\n\t"
 
 		"movq	%%mm4, %%mm6\n\t"
-		"movq	16%1, %%mm0\n\t"
+		"movq	16(%1, %0, 4), %%mm0\n\t"
 		"movq	%%mm0, %%mm3\n\t"
 		"punpcklbw %%mm5, %%mm4\n\t"
 		"punpcklbw %%mm4, %%mm0\n\t" /* Y U4 Y V4 Y U4 Y V4*/
 		"punpckhbw %%mm4, %%mm3\n\t" /* Y U5 Y V5 Y U5 Y V5*/
-		MOVNTQ"	%%mm0, 32%0\n\t"
-		MOVNTQ"	%%mm3, 40%0\n\t"
+		MOVNTQ"	%%mm0, 32(%4, %0, 8)\n\t"
+		MOVNTQ"	%%mm3, 40(%4, %0, 8)\n\t"
 		
 		"punpckhbw %%mm5, %%mm6\n\t"
-		"movq	24%1, %%mm0\n\t"
+		"movq	24(%1, %0, 4), %%mm0\n\t"
 		"movq	%%mm0, %%mm3\n\t"
 		"punpcklbw %%mm6, %%mm0\n\t" /* Y U6 Y V6 Y U6 Y V6*/
 		"punpckhbw %%mm6, %%mm3\n\t" /* Y U7 Y V7 Y U7 Y V7*/
-		MOVNTQ"	%%mm0, 48%0\n\t"
-		MOVNTQ"	%%mm3, 56%0\n\t"
+		MOVNTQ"	%%mm0, 48(%4, %0, 8)\n\t"
+		MOVNTQ"	%%mm3, 56(%4, %0, 8)\n\t"
 
-		:"=m"(d[8*x])
-		:"m"(yp[x2]),"m"(up[x]),"m"(vp[x])
+		: "+r" (x)
+                : "r"(yp), "r" (up), "r"(vp), "r"(d)
 		:"memory");
 	}
 #endif
-	for(;x<w;x++,x2+=4)
+	for(; x<w; x++)
 	{
+	    const int x2= x<<2;
 	    d[8*x+0]=yp[x2];
 	    d[8*x+1]=up[x];
 	    d[8*x+2]=yp[x2+1];