changeset 18683:d940ecaff286

moves invariant code (chiefly MMX register initialization) out of loops as well as eliminating some memory accesses within the inner loop. Patch by Zuxy Meng < zuxy POIS meng AH gmail POIS com > Original thread: Date: Mon, 12 Jun 2006 00:31:53 -0700 (PDT) Subject [MPlayer-dev-eng] [PATCH] Loop invariant motion in libvo/osd_template.c
author gpoirier
date Mon, 12 Jun 2006 11:14:10 +0000
parents bee3186a06f7
children c9de3673e299
files libvo/osd_template.c
diffstat 1 files changed, 43 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/libvo/osd_template.c	Sun Jun 11 21:29:00 2006 +0000
+++ b/libvo/osd_template.c	Mon Jun 12 11:14:10 2006 +0000
@@ -32,6 +32,15 @@
 #if defined(FAST_OSD) && !defined(HAVE_MMX)
     w=w>>1;
 #endif
+#ifdef HAVE_MMX
+    asm volatile(
+        "pcmpeqb %%mm5, %%mm5\n\t" // F..F
+        "movq %%mm5, %%mm4\n\t"
+        "movq %%mm5, %%mm7\n\t"
+        "psllw $8, %%mm5\n\t" //FF00FF00FF00
+        "psrlw $8, %%mm4\n\t" //00FF00FF00FF
+        ::);        
+#endif
     for(y=0;y<h;y++){
         register int x;
 #ifdef HAVE_MMX
@@ -39,11 +48,6 @@
 	PREFETCHW" %0\n\t"
 	PREFETCH" %1\n\t"
 	PREFETCH" %2\n\t"
-//	"pxor %%mm7, %%mm7\n\t"
-	"pcmpeqb %%mm5, %%mm5\n\t" // F..F
-	"movq %%mm5, %%mm4\n\t"
-	"psllw $8, %%mm5\n\t" //FF00FF00FF00
-	"psrlw $8, %%mm4\n\t" //00FF00FF00FF
 	::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
     for(x=0;x<w;x+=8){
 	asm volatile(
@@ -58,7 +62,7 @@
 		"pand %%mm4, %%mm0\n\t" 	//0Y0Y0Y0Y
 		"psrlw $8, %%mm1\n\t"		//0Y0Y0Y0Y
 		"movq	%1, %%mm2\n\t" 		//srca HGFEDCBA
-		"paddb	"MANGLE(bFF)", %%mm2\n\t"
+		"paddb	%%mm7, %%mm2\n\t"
 		"movq %%mm2, %%mm3\n\t"
 		"pand %%mm4, %%mm2\n\t" 	//0G0E0C0A
 		"psrlw $8, %%mm3\n\t"		//0H0F0D0B
@@ -98,6 +102,16 @@
 #if defined(FAST_OSD) && !defined(HAVE_MMX)
     w=w>>1;
 #endif
+#ifdef HAVE_MMX
+    asm volatile(
+        "pxor %%mm7, %%mm7\n\t"
+        "pcmpeqb %%mm5, %%mm5\n\t" // F..F
+        "movq %%mm5, %%mm6\n\t"
+        "movq %%mm5, %%mm4\n\t"
+        "psllw $8, %%mm5\n\t" //FF00FF00FF00
+        "psrlw $8, %%mm4\n\t" //00FF00FF00FF
+        ::);        
+#endif
     for(y=0;y<h;y++){
         register int x;
 #ifdef HAVE_MMX
@@ -105,11 +119,6 @@
 	PREFETCHW" %0\n\t"
 	PREFETCH" %1\n\t"
 	PREFETCH" %2\n\t"
-	"pxor %%mm7, %%mm7\n\t"
-	"pcmpeqb %%mm5, %%mm5\n\t" // F..F
-	"movq %%mm5, %%mm4\n\t"
-	"psllw $8, %%mm5\n\t" //FF00FF00FF00
-	"psrlw $8, %%mm4\n\t" //00FF00FF00FF
 	::"m"(*dstbase),"m"(*srca),"m"(*src));
     for(x=0;x<w;x+=4){
 	asm volatile(
@@ -123,7 +132,7 @@
 		"movq	%%mm0, %%mm1\n\t"
 		"pand %%mm4, %%mm0\n\t" 	//0Y0Y0Y0Y
 		"movd	%%eax, %%mm2\n\t"	//srca 0000DCBA
-		"paddb	"MANGLE(bFF)", %%mm2\n\t"
+		"paddb	%%mm6, %%mm2\n\t"
 		"punpcklbw %%mm7, %%mm2\n\t"	//srca 0D0C0B0A
 		"pmullw	%%mm2, %%mm0\n\t"
 		"psrlw	$8, %%mm0\n\t"
@@ -186,6 +195,12 @@
 
 static inline void RENAME(vo_draw_alpha_rgb24)(int w,int h, unsigned char* src, unsigned char *srca, int srcstride, unsigned char* dstbase,int dststride){
     int y;
+#ifdef HAVE_MMX
+    asm volatile(
+        "pxor %%mm7, %%mm7\n\t"
+        "pcmpeqb %%mm6, %%mm6\n\t" // F..F
+        ::);        
+#endif
     for(y=0;y<h;y++){
         register unsigned char *dst = dstbase;
         register int x;
@@ -195,8 +210,6 @@
 	PREFETCHW" %0\n\t"
 	PREFETCH" %1\n\t"
 	PREFETCH" %2\n\t"
-	"pxor %%mm7, %%mm7\n\t"
-	"pcmpeqb %%mm6, %%mm6\n\t" // F..F
 	::"m"(*dst),"m"(*srca),"m"(*src):"memory");
     for(x=0;x<w;x+=2){
      if(srca[x] || srca[x+1])
@@ -293,6 +306,22 @@
 #ifdef WORDS_BIGENDIAN
     dstbase++;
 #endif
+#ifdef HAVE_MMX
+#ifdef HAVE_3DNOW
+    asm volatile(
+        "pxor %%mm7, %%mm7\n\t"
+        "pcmpeqb %%mm6, %%mm6\n\t" // F..F
+        ::);
+#else /* HAVE_3DNOW */
+    asm volatile(
+        "pxor %%mm7, %%mm7\n\t"
+        "pcmpeqb %%mm5, %%mm5\n\t" // F..F
+        "movq %%mm5, %%mm4\n\t"
+        "psllw $8, %%mm5\n\t" //FF00FF00FF00
+        "psrlw $8, %%mm4\n\t" //00FF00FF00FF
+        ::);
+#endif /* HAVE_3DNOW */
+#endif /* HAVE_MMX */
     for(y=0;y<h;y++){
         register int x;
 #if defined(ARCH_X86) || defined(ARCH_X86_64)
@@ -302,8 +331,6 @@
 	PREFETCHW" %0\n\t"
 	PREFETCH" %1\n\t"
 	PREFETCH" %2\n\t"
-	"pxor %%mm7, %%mm7\n\t"
-	"pcmpeqb %%mm6, %%mm6\n\t" // F..F
 	::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
     for(x=0;x<w;x+=2){
      if(srca[x] || srca[x+1])
@@ -339,11 +366,6 @@
 	PREFETCHW" %0\n\t"
 	PREFETCH" %1\n\t"
 	PREFETCH" %2\n\t"
-	"pxor %%mm7, %%mm7\n\t"
-	"pcmpeqb %%mm5, %%mm5\n\t" // F..F
-	"movq %%mm5, %%mm4\n\t"
-	"psllw $8, %%mm5\n\t" //FF00FF00FF00
-	"psrlw $8, %%mm4\n\t" //00FF00FF00FF
 	::"m"(*dstbase),"m"(*srca),"m"(*src):"memory");
     for(x=0;x<w;x+=4){
 	asm volatile(