changeset 2570:af43a83122fc

minor speedup cleanup
author michael
date Tue, 30 Oct 2001 21:14:02 +0000
parents 30b736e7feef
children 13608ff3d1e6
files postproc/postprocess.c postproc/postprocess_template.c
diffstat 2 files changed, 54 insertions(+), 24 deletions(-) [+]
line wrap: on
line diff
--- a/postproc/postprocess.c	Tue Oct 30 21:04:42 2001 +0000
+++ b/postproc/postprocess.c	Tue Oct 30 21:14:02 2001 +0000
@@ -62,6 +62,7 @@
 fix warnings (unused vars, ...)
 noise reduction filters
 border remover
+optimize c versions
 ...
 
 Notes:
@@ -417,7 +418,6 @@
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 	src+= stride*3;
 	asm volatile(	//"movv %0 %1 %2\n\t"
-		"pushl %0 \n\t"
 		"movq pQPb, %%mm0				\n\t"  // QP,..., QP
 
 		"movq (%0), %%mm6				\n\t"
@@ -535,7 +535,7 @@
 		PAVGB(%%mm3, %%mm0)				      //      112	/4
 		PAVGB(%%mm0, %%mm5)				      //    112246	/16
 		"movq %%mm5, (%%eax, %1, 4)			\n\t" //        X
-		"popl %0\n\t"
+		"subl %1, %0					\n\t"
 
 		:
 		: "r" (src), "r" (stride)
@@ -1167,7 +1167,21 @@
 
 		"movq temp0, %%mm2				\n\t" // 2L0 - 5L1 + 5L2 - 2L3
 		"movq temp1, %%mm3				\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-//FIXME pxor, psubw, pmax for abs
+
+#ifdef HAVE_MMX2
+		"movq %%mm7, %%mm6				\n\t" // 0
+		"psubw %%mm0, %%mm6				\n\t"
+		"pmaxsw %%mm6, %%mm0				\n\t" // |2L4 - 5L5 + 5L6 - 2L7|
+		"movq %%mm7, %%mm6				\n\t" // 0
+		"psubw %%mm1, %%mm6				\n\t"
+		"pmaxsw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
+		"movq %%mm7, %%mm6				\n\t" // 0
+		"psubw %%mm2, %%mm6				\n\t"
+		"pmaxsw %%mm6, %%mm2				\n\t" // |2L0 - 5L1 + 5L2 - 2L3|
+		"movq %%mm7, %%mm6				\n\t" // 0
+		"psubw %%mm3, %%mm6				\n\t"
+		"pmaxsw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
+#else
 		"movq %%mm7, %%mm6				\n\t" // 0
 		"pcmpgtw %%mm0, %%mm6				\n\t"
 		"pxor %%mm6, %%mm0				\n\t"
@@ -1176,7 +1190,6 @@
 		"pcmpgtw %%mm1, %%mm6				\n\t"
 		"pxor %%mm6, %%mm1				\n\t"
 		"psubw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
-
 		"movq %%mm7, %%mm6				\n\t" // 0
 		"pcmpgtw %%mm2, %%mm6				\n\t"
 		"pxor %%mm6, %%mm2				\n\t"
@@ -1185,6 +1198,7 @@
 		"pcmpgtw %%mm3, %%mm6				\n\t"
 		"pxor %%mm6, %%mm3				\n\t"
 		"psubw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
+#endif
 
 #ifdef HAVE_MMX2
 		"pminsw %%mm2, %%mm0				\n\t"
@@ -1981,13 +1995,13 @@
 		PAVGB(lx, pplx)					     \
 		"movq " #lx ", temp1				\n\t"\
 		"movq temp0, " #lx "				\n\t"\
-		"psubusb " #lx ", " #t1 "				\n\t"\
-		"psubusb " #lx ", " #t0 "				\n\t"\
-		"psubusb " #lx ", " #sx "				\n\t"\
+		"psubusb " #lx ", " #t1 "			\n\t"\
+		"psubusb " #lx ", " #t0 "			\n\t"\
+		"psubusb " #lx ", " #sx "			\n\t"\
 		"movq b00, " #lx "				\n\t"\
-		"pcmpeqb " #lx ", " #t1 "				\n\t" /* src[-1] > a ? 0 : -1*/\
-		"pcmpeqb " #lx ", " #t0 "				\n\t" /* src[+1] > a ? 0 : -1*/\
-		"pcmpeqb " #lx ", " #sx "				\n\t" /* src[0]  > a ? 0 : -1*/\
+		"pcmpeqb " #lx ", " #t1 "			\n\t" /* src[-1] > a ? 0 : -1*/\
+		"pcmpeqb " #lx ", " #t0 "			\n\t" /* src[+1] > a ? 0 : -1*/\
+		"pcmpeqb " #lx ", " #sx "			\n\t" /* src[0]  > a ? 0 : -1*/\
 		"paddb " #t1 ", " #t0 "				\n\t"\
 		"paddb " #t0 ", " #sx "				\n\t"\
 \
@@ -2002,10 +2016,10 @@
 		"paddb " #psx ", " #ppsx "			\n\t"\
 	"#paddb b02, " #ppsx "				\n\t"\
 		"pand b08, " #ppsx "				\n\t"\
-		"pcmpeqb " #lx ", " #ppsx "				\n\t"\
+		"pcmpeqb " #lx ", " #ppsx "			\n\t"\
 		"pand " #ppsx ", " #pplx "			\n\t"\
 		"pandn " #dst ", " #ppsx "			\n\t"\
-		"por " #pplx ", " #ppsx "				\n\t"\
+		"por " #pplx ", " #ppsx "			\n\t"\
 		"movq " #ppsx ", " #dst "			\n\t"\
 		"movq temp1, " #lx "				\n\t"
 
@@ -2996,6 +3010,7 @@
 	long long memcpyTime=0, vertTime=0, horizTime=0, sumTime;
 	sumTime= rdtsc();
 #endif
+//mode= 0x7F;
 
 	if(tempDst==NULL)
 	{
--- a/postproc/postprocess_template.c	Tue Oct 30 21:04:42 2001 +0000
+++ b/postproc/postprocess_template.c	Tue Oct 30 21:14:02 2001 +0000
@@ -62,6 +62,7 @@
 fix warnings (unused vars, ...)
 noise reduction filters
 border remover
+optimize c versions
 ...
 
 Notes:
@@ -417,7 +418,6 @@
 #if defined (HAVE_MMX2) || defined (HAVE_3DNOW)
 	src+= stride*3;
 	asm volatile(	//"movv %0 %1 %2\n\t"
-		"pushl %0 \n\t"
 		"movq pQPb, %%mm0				\n\t"  // QP,..., QP
 
 		"movq (%0), %%mm6				\n\t"
@@ -535,7 +535,7 @@
 		PAVGB(%%mm3, %%mm0)				      //      112	/4
 		PAVGB(%%mm0, %%mm5)				      //    112246	/16
 		"movq %%mm5, (%%eax, %1, 4)			\n\t" //        X
-		"popl %0\n\t"
+		"subl %1, %0					\n\t"
 
 		:
 		: "r" (src), "r" (stride)
@@ -1167,7 +1167,21 @@
 
 		"movq temp0, %%mm2				\n\t" // 2L0 - 5L1 + 5L2 - 2L3
 		"movq temp1, %%mm3				\n\t" // 2H0 - 5H1 + 5H2 - 2H3
-//FIXME pxor, psubw, pmax for abs
+
+#ifdef HAVE_MMX2
+		"movq %%mm7, %%mm6				\n\t" // 0
+		"psubw %%mm0, %%mm6				\n\t"
+		"pmaxsw %%mm6, %%mm0				\n\t" // |2L4 - 5L5 + 5L6 - 2L7|
+		"movq %%mm7, %%mm6				\n\t" // 0
+		"psubw %%mm1, %%mm6				\n\t"
+		"pmaxsw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
+		"movq %%mm7, %%mm6				\n\t" // 0
+		"psubw %%mm2, %%mm6				\n\t"
+		"pmaxsw %%mm6, %%mm2				\n\t" // |2L0 - 5L1 + 5L2 - 2L3|
+		"movq %%mm7, %%mm6				\n\t" // 0
+		"psubw %%mm3, %%mm6				\n\t"
+		"pmaxsw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
+#else
 		"movq %%mm7, %%mm6				\n\t" // 0
 		"pcmpgtw %%mm0, %%mm6				\n\t"
 		"pxor %%mm6, %%mm0				\n\t"
@@ -1176,7 +1190,6 @@
 		"pcmpgtw %%mm1, %%mm6				\n\t"
 		"pxor %%mm6, %%mm1				\n\t"
 		"psubw %%mm6, %%mm1				\n\t" // |2H4 - 5H5 + 5H6 - 2H7|
-
 		"movq %%mm7, %%mm6				\n\t" // 0
 		"pcmpgtw %%mm2, %%mm6				\n\t"
 		"pxor %%mm6, %%mm2				\n\t"
@@ -1185,6 +1198,7 @@
 		"pcmpgtw %%mm3, %%mm6				\n\t"
 		"pxor %%mm6, %%mm3				\n\t"
 		"psubw %%mm6, %%mm3				\n\t" // |2H0 - 5H1 + 5H2 - 2H3|
+#endif
 
 #ifdef HAVE_MMX2
 		"pminsw %%mm2, %%mm0				\n\t"
@@ -1981,13 +1995,13 @@
 		PAVGB(lx, pplx)					     \
 		"movq " #lx ", temp1				\n\t"\
 		"movq temp0, " #lx "				\n\t"\
-		"psubusb " #lx ", " #t1 "				\n\t"\
-		"psubusb " #lx ", " #t0 "				\n\t"\
-		"psubusb " #lx ", " #sx "				\n\t"\
+		"psubusb " #lx ", " #t1 "			\n\t"\
+		"psubusb " #lx ", " #t0 "			\n\t"\
+		"psubusb " #lx ", " #sx "			\n\t"\
 		"movq b00, " #lx "				\n\t"\
-		"pcmpeqb " #lx ", " #t1 "				\n\t" /* src[-1] > a ? 0 : -1*/\
-		"pcmpeqb " #lx ", " #t0 "				\n\t" /* src[+1] > a ? 0 : -1*/\
-		"pcmpeqb " #lx ", " #sx "				\n\t" /* src[0]  > a ? 0 : -1*/\
+		"pcmpeqb " #lx ", " #t1 "			\n\t" /* src[-1] > a ? 0 : -1*/\
+		"pcmpeqb " #lx ", " #t0 "			\n\t" /* src[+1] > a ? 0 : -1*/\
+		"pcmpeqb " #lx ", " #sx "			\n\t" /* src[0]  > a ? 0 : -1*/\
 		"paddb " #t1 ", " #t0 "				\n\t"\
 		"paddb " #t0 ", " #sx "				\n\t"\
 \
@@ -2002,10 +2016,10 @@
 		"paddb " #psx ", " #ppsx "			\n\t"\
 	"#paddb b02, " #ppsx "				\n\t"\
 		"pand b08, " #ppsx "				\n\t"\
-		"pcmpeqb " #lx ", " #ppsx "				\n\t"\
+		"pcmpeqb " #lx ", " #ppsx "			\n\t"\
 		"pand " #ppsx ", " #pplx "			\n\t"\
 		"pandn " #dst ", " #ppsx "			\n\t"\
-		"por " #pplx ", " #ppsx "				\n\t"\
+		"por " #pplx ", " #ppsx "			\n\t"\
 		"movq " #ppsx ", " #dst "			\n\t"\
 		"movq temp1, " #lx "				\n\t"
 
@@ -2996,6 +3010,7 @@
 	long long memcpyTime=0, vertTime=0, horizTime=0, sumTime;
 	sumTime= rdtsc();
 #endif
+//mode= 0x7F;
 
 	if(tempDst==NULL)
 	{