changeset 2892:41315d0120b3 libavcodec

replace a few mov + psrlq with pshufw, there are more cases which could benefit from this but they would require us to duplicate some functions ... the trick is from various places (my own code in libpostproc, a patch on the x264 list, ...)
author michael
date Wed, 21 Sep 2005 21:17:09 +0000
parents 4c6eb826e9cb
children 6f8bcb169256
files i386/dsputil_mmx.c i386/mpegvideo_mmx_template.c
diffstat 2 files changed, 17 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/i386/dsputil_mmx.c	Wed Sep 21 08:14:17 2005 +0000
+++ b/i386/dsputil_mmx.c	Wed Sep 21 21:17:09 2005 +0000
@@ -1621,11 +1621,9 @@
         "movq 64(%1), %%mm1		\n\t"
         MMABS_SUM_MMX2(%%mm1, %%mm7, %%mm0)
         
-        "movq %%mm0, %%mm1		\n\t"
-        "psrlq $32, %%mm0		\n\t"
+        "pshufw $0x0E, %%mm0, %%mm1     \n\t"
         "paddusw %%mm1, %%mm0		\n\t"
-        "movq %%mm0, %%mm1		\n\t"
-        "psrlq $16, %%mm0		\n\t"
+        "pshufw $0x01, %%mm0, %%mm1     \n\t"
         "paddusw %%mm1, %%mm0		\n\t"
         "movd %%mm0, %0			\n\t"
                 
--- a/i386/mpegvideo_mmx_template.c	Wed Sep 21 08:14:17 2005 +0000
+++ b/i386/mpegvideo_mmx_template.c	Wed Sep 21 21:17:09 2005 +0000
@@ -22,7 +22,11 @@
 #ifdef HAVE_MMX2
 #define SPREADW(a) "pshufw $0, " #a ", " #a " \n\t"
 #define PMAXW(a,b) "pmaxsw " #a ", " #b " \n\t"
-
+#define PMAX(a,b) \
+            "pshufw $0x0E," #a ", " #b "		\n\t"\
+	    PMAXW(b, a)\
+            "pshufw $0x01," #a ", " #b "		\n\t"\
+	    PMAXW(b, a)
 #else
 #define SPREADW(a) \
 	"punpcklwd " #a ", " #a " \n\t"\
@@ -30,6 +34,14 @@
 #define PMAXW(a,b) \
 	"psubusw " #a ", " #b " \n\t"\
 	"paddw " #a ", " #b " \n\t"
+#define PMAX(a,b)  \
+            "movq " #a ", " #b "		\n\t"\
+            "psrlq $32, " #a "			\n\t"\
+	    PMAXW(b, a)\
+            "movq " #a ", " #b "		\n\t"\
+            "psrlq $16, " #a "			\n\t"\
+	    PMAXW(b, a)
+
 #endif
 
 static int RENAME(dct_quantize)(MpegEncContext *s,
@@ -119,12 +131,7 @@
 	    PMAXW(%%mm0, %%mm3)
             "add $8, %%"REG_a"			\n\t"
             " js 1b				\n\t"
-            "movq %%mm3, %%mm0			\n\t"
-            "psrlq $32, %%mm3			\n\t"
-	    PMAXW(%%mm0, %%mm3)
-            "movq %%mm3, %%mm0			\n\t"
-            "psrlq $16, %%mm3			\n\t"
-	    PMAXW(%%mm0, %%mm3)
+	    PMAX(%%mm3, %%mm0)
             "movd %%mm3, %%"REG_a"		\n\t"
             "movzb %%al, %%"REG_a"		\n\t" // last_non_zero_p1
 	    : "+a" (last_non_zero_p1)
@@ -170,12 +177,7 @@
 	    PMAXW(%%mm0, %%mm3)
             "add $8, %%"REG_a"			\n\t"
             " js 1b				\n\t"
-            "movq %%mm3, %%mm0			\n\t"
-            "psrlq $32, %%mm3			\n\t"
-	    PMAXW(%%mm0, %%mm3)
-            "movq %%mm3, %%mm0			\n\t"
-            "psrlq $16, %%mm3			\n\t"
-	    PMAXW(%%mm0, %%mm3)
+	    PMAX(%%mm3, %%mm0)
             "movd %%mm3, %%"REG_a"		\n\t"
             "movzb %%al, %%"REG_a"		\n\t" // last_non_zero_p1
 	    : "+a" (last_non_zero_p1)