changeset 3037:3fc9a8b9f178

1% speedup
author michael
date Tue, 20 Nov 2001 20:46:39 +0000
parents 8f689566ac85
children fa8665a91729
files postproc/postprocess.c postproc/postprocess_template.c
diffstat 2 files changed, 74 insertions(+), 92 deletions(-) [+]
line wrap: on
line diff
--- a/postproc/postprocess.c	Tue Nov 20 19:35:03 2001 +0000
+++ b/postproc/postprocess.c	Tue Nov 20 20:46:39 2001 +0000
@@ -3086,24 +3086,24 @@
 	{
 #ifdef HAVE_MMX
 					asm volatile(
-						"leal (%2,%2), %%eax	\n\t"
-						"leal (%3,%3), %%ebx	\n\t"
+						"leal (%0,%2), %%eax	\n\t"
+						"leal (%1,%3), %%ebx	\n\t"
 						"movq packedYOffset, %%mm2	\n\t"
 						"movq packedYScale, %%mm3	\n\t"
 						"pxor %%mm4, %%mm4	\n\t"
 
-#define SCALED_CPY					\
-						"movq (%0), %%mm0	\n\t"\
-						"movq (%0), %%mm5	\n\t"\
+#define SCALED_CPY(src1, src2, dst1, dst2)					\
+						"movq " #src1 ", %%mm0	\n\t"\
+						"movq " #src1 ", %%mm5	\n\t"\
 						"punpcklbw %%mm4, %%mm0 \n\t"\
 						"punpckhbw %%mm4, %%mm5 \n\t"\
 						"psubw %%mm2, %%mm0	\n\t"\
 						"psubw %%mm2, %%mm5	\n\t"\
-						"movq (%0,%2), %%mm1	\n\t"\
+						"movq " #src2 ", %%mm1	\n\t"\
 						"psllw $6, %%mm0	\n\t"\
 						"psllw $6, %%mm5	\n\t"\
 						"pmulhw %%mm3, %%mm0	\n\t"\
-						"movq (%0,%2), %%mm6	\n\t"\
+						"movq " #src2 ", %%mm6	\n\t"\
 						"pmulhw %%mm3, %%mm5	\n\t"\
 						"punpcklbw %%mm4, %%mm1 \n\t"\
 						"punpckhbw %%mm4, %%mm6 \n\t"\
@@ -3113,23 +3113,22 @@
 						"psllw $6, %%mm6	\n\t"\
 						"pmulhw %%mm3, %%mm1	\n\t"\
 						"pmulhw %%mm3, %%mm6	\n\t"\
-						"addl %%eax, %0		\n\t"\
 						"packuswb %%mm5, %%mm0	\n\t"\
 						"packuswb %%mm6, %%mm1	\n\t"\
-						"movq %%mm0, (%1)	\n\t"\
-						"movq %%mm1, (%1, %3)	\n\t"\
-
-SCALED_CPY
-						"addl %%ebx, %1		\n\t"
-SCALED_CPY
-						"addl %%ebx, %1		\n\t"
-SCALED_CPY
-						"addl %%ebx, %1		\n\t"
-SCALED_CPY
-
-						: "+r"(src),
-						"+r"(dst)
-						:"r" (srcStride),
+						"movq %%mm0, " #dst1 "	\n\t"\
+						"movq %%mm1, " #dst2 "	\n\t"\
+
+SCALED_CPY((%0)       , (%0, %2)      , (%1)       , (%1, %3))
+SCALED_CPY((%0, %2, 2), (%%eax, %2, 2), (%1, %3, 2), (%%ebx, %3, 2))
+SCALED_CPY((%0, %2, 4), (%%eax, %2, 4), (%1, %3, 4), (%%ebx, %3, 4))
+						"leal (%%eax,%2,4), %%eax	\n\t"
+						"leal (%%ebx,%3,4), %%ebx	\n\t"
+SCALED_CPY((%%eax, %2), (%%eax, %2, 2), (%%ebx, %3), (%%ebx, %3, 2))
+
+
+						: : "r"(src),
+						"r"(dst),
+						"r" (srcStride),
 						"r" (dstStride)
 						: "%eax", "%ebx"
 					);
@@ -3143,30 +3142,22 @@
 	{
 #ifdef HAVE_MMX
 					asm volatile(
-						"pushl %0 \n\t"
-						"pushl %1 \n\t"
-						"leal (%2,%2), %%eax	\n\t"
-						"leal (%3,%3), %%ebx	\n\t"
-
-#define SIMPLE_CPY					\
-						"movq (%0), %%mm0	\n\t"\
-						"movq (%0,%2), %%mm1	\n\t"\
-						"movq %%mm0, (%1)	\n\t"\
-						"movq %%mm1, (%1, %3)	\n\t"\
-
-SIMPLE_CPY
-						"addl %%eax, %0		\n\t"
-						"addl %%ebx, %1		\n\t"
-SIMPLE_CPY
-						"addl %%eax, %0		\n\t"
-						"addl %%ebx, %1		\n\t"
-SIMPLE_CPY
-						"addl %%eax, %0		\n\t"
-						"addl %%ebx, %1		\n\t"
-SIMPLE_CPY
-
-						"popl %1 \n\t"
-						"popl %0 \n\t"
+						"leal (%0,%2), %%eax	\n\t"
+						"leal (%1,%3), %%ebx	\n\t"
+
+#define SIMPLE_CPY(src1, src2, dst1, dst2)				\
+						"movq " #src1 ", %%mm0	\n\t"\
+						"movq " #src2 ", %%mm1	\n\t"\
+						"movq %%mm0, " #dst1 "	\n\t"\
+						"movq %%mm1, " #dst2 "	\n\t"\
+
+SIMPLE_CPY((%0)       , (%0, %2)      , (%1)       , (%1, %3))
+SIMPLE_CPY((%0, %2, 2), (%%eax, %2, 2), (%1, %3, 2), (%%ebx, %3, 2))
+SIMPLE_CPY((%0, %2, 4), (%%eax, %2, 4), (%1, %3, 4), (%%ebx, %3, 4))
+						"leal (%%eax,%2,4), %%eax	\n\t"
+						"leal (%%ebx,%3,4), %%ebx	\n\t"
+SIMPLE_CPY((%%eax, %2), (%%eax, %2, 2), (%%ebx, %3), (%%ebx, %3, 2))
+
 						: : "r" (src),
 						"r" (dst),
 						"r" (srcStride),
--- a/postproc/postprocess_template.c	Tue Nov 20 19:35:03 2001 +0000
+++ b/postproc/postprocess_template.c	Tue Nov 20 20:46:39 2001 +0000
@@ -3086,24 +3086,24 @@
 	{
 #ifdef HAVE_MMX
 					asm volatile(
-						"leal (%2,%2), %%eax	\n\t"
-						"leal (%3,%3), %%ebx	\n\t"
+						"leal (%0,%2), %%eax	\n\t"
+						"leal (%1,%3), %%ebx	\n\t"
 						"movq packedYOffset, %%mm2	\n\t"
 						"movq packedYScale, %%mm3	\n\t"
 						"pxor %%mm4, %%mm4	\n\t"
 
-#define SCALED_CPY					\
-						"movq (%0), %%mm0	\n\t"\
-						"movq (%0), %%mm5	\n\t"\
+#define SCALED_CPY(src1, src2, dst1, dst2)					\
+						"movq " #src1 ", %%mm0	\n\t"\
+						"movq " #src1 ", %%mm5	\n\t"\
 						"punpcklbw %%mm4, %%mm0 \n\t"\
 						"punpckhbw %%mm4, %%mm5 \n\t"\
 						"psubw %%mm2, %%mm0	\n\t"\
 						"psubw %%mm2, %%mm5	\n\t"\
-						"movq (%0,%2), %%mm1	\n\t"\
+						"movq " #src2 ", %%mm1	\n\t"\
 						"psllw $6, %%mm0	\n\t"\
 						"psllw $6, %%mm5	\n\t"\
 						"pmulhw %%mm3, %%mm0	\n\t"\
-						"movq (%0,%2), %%mm6	\n\t"\
+						"movq " #src2 ", %%mm6	\n\t"\
 						"pmulhw %%mm3, %%mm5	\n\t"\
 						"punpcklbw %%mm4, %%mm1 \n\t"\
 						"punpckhbw %%mm4, %%mm6 \n\t"\
@@ -3113,23 +3113,22 @@
 						"psllw $6, %%mm6	\n\t"\
 						"pmulhw %%mm3, %%mm1	\n\t"\
 						"pmulhw %%mm3, %%mm6	\n\t"\
-						"addl %%eax, %0		\n\t"\
 						"packuswb %%mm5, %%mm0	\n\t"\
 						"packuswb %%mm6, %%mm1	\n\t"\
-						"movq %%mm0, (%1)	\n\t"\
-						"movq %%mm1, (%1, %3)	\n\t"\
-
-SCALED_CPY
-						"addl %%ebx, %1		\n\t"
-SCALED_CPY
-						"addl %%ebx, %1		\n\t"
-SCALED_CPY
-						"addl %%ebx, %1		\n\t"
-SCALED_CPY
-
-						: "+r"(src),
-						"+r"(dst)
-						:"r" (srcStride),
+						"movq %%mm0, " #dst1 "	\n\t"\
+						"movq %%mm1, " #dst2 "	\n\t"\
+
+SCALED_CPY((%0)       , (%0, %2)      , (%1)       , (%1, %3))
+SCALED_CPY((%0, %2, 2), (%%eax, %2, 2), (%1, %3, 2), (%%ebx, %3, 2))
+SCALED_CPY((%0, %2, 4), (%%eax, %2, 4), (%1, %3, 4), (%%ebx, %3, 4))
+						"leal (%%eax,%2,4), %%eax	\n\t"
+						"leal (%%ebx,%3,4), %%ebx	\n\t"
+SCALED_CPY((%%eax, %2), (%%eax, %2, 2), (%%ebx, %3), (%%ebx, %3, 2))
+
+
+						: : "r"(src),
+						"r"(dst),
+						"r" (srcStride),
 						"r" (dstStride)
 						: "%eax", "%ebx"
 					);
@@ -3143,30 +3142,22 @@
 	{
 #ifdef HAVE_MMX
 					asm volatile(
-						"pushl %0 \n\t"
-						"pushl %1 \n\t"
-						"leal (%2,%2), %%eax	\n\t"
-						"leal (%3,%3), %%ebx	\n\t"
-
-#define SIMPLE_CPY					\
-						"movq (%0), %%mm0	\n\t"\
-						"movq (%0,%2), %%mm1	\n\t"\
-						"movq %%mm0, (%1)	\n\t"\
-						"movq %%mm1, (%1, %3)	\n\t"\
-
-SIMPLE_CPY
-						"addl %%eax, %0		\n\t"
-						"addl %%ebx, %1		\n\t"
-SIMPLE_CPY
-						"addl %%eax, %0		\n\t"
-						"addl %%ebx, %1		\n\t"
-SIMPLE_CPY
-						"addl %%eax, %0		\n\t"
-						"addl %%ebx, %1		\n\t"
-SIMPLE_CPY
-
-						"popl %1 \n\t"
-						"popl %0 \n\t"
+						"leal (%0,%2), %%eax	\n\t"
+						"leal (%1,%3), %%ebx	\n\t"
+
+#define SIMPLE_CPY(src1, src2, dst1, dst2)				\
+						"movq " #src1 ", %%mm0	\n\t"\
+						"movq " #src2 ", %%mm1	\n\t"\
+						"movq %%mm0, " #dst1 "	\n\t"\
+						"movq %%mm1, " #dst2 "	\n\t"\
+
+SIMPLE_CPY((%0)       , (%0, %2)      , (%1)       , (%1, %3))
+SIMPLE_CPY((%0, %2, 2), (%%eax, %2, 2), (%1, %3, 2), (%%ebx, %3, 2))
+SIMPLE_CPY((%0, %2, 4), (%%eax, %2, 4), (%1, %3, 4), (%%ebx, %3, 4))
+						"leal (%%eax,%2,4), %%eax	\n\t"
+						"leal (%%ebx,%3,4), %%ebx	\n\t"
+SIMPLE_CPY((%%eax, %2), (%%eax, %2, 2), (%%ebx, %3), (%%ebx, %3, 2))
+
 						: : "r" (src),
 						"r" (dst),
 						"r" (srcStride),