changeset 11648:57372aa1d655

mmx simplifications
author michael
date Mon, 15 Dec 2003 14:29:09 +0000
parents f9fccf97d588
children 52798caae526
files libmpcodecs/vf_ilpack.c
diffstat 1 files changed, 25 insertions(+), 91 deletions(-) [+]
line wrap: on
line diff
--- a/libmpcodecs/vf_ilpack.c	Mon Dec 15 11:16:41 2003 +0000
+++ b/libmpcodecs/vf_ilpack.c	Mon Dec 15 14:29:09 2003 +0000
@@ -66,33 +66,15 @@
 {
 	int j;
 	asm volatile (""
-		"pxor %%mm0, %%mm0 \n\t"
 		".balign 16 \n\t"
 		"1: \n\t"
 		"movq (%0), %%mm1 \n\t"
 		"movq (%0), %%mm2 \n\t"
-		"punpcklbw %%mm0, %%mm1 \n\t"
-		"punpckhbw %%mm0, %%mm2 \n\t"
-		
-		"movq (%1), %%mm3 \n\t"
-		"movq (%2), %%mm5 \n\t"
-		"punpcklbw %%mm0, %%mm3 \n\t"
-		"punpcklbw %%mm0, %%mm5 \n\t"
-		"movq %%mm3, %%mm4 \n\t"
-		"movq %%mm5, %%mm6 \n\t"
-		"punpcklwd %%mm0, %%mm3 \n\t"
-		"punpckhwd %%mm0, %%mm4 \n\t"
-		"punpcklwd %%mm0, %%mm5 \n\t"
-		"punpckhwd %%mm0, %%mm6 \n\t"
-		"pslld $8, %%mm3 \n\t"
-		"pslld $8, %%mm4 \n\t"
-		"pslld $24, %%mm5 \n\t"
-		"pslld $24, %%mm6 \n\t"
-		
-		"por %%mm3, %%mm1 \n\t"
-		"por %%mm4, %%mm2 \n\t"
-		"por %%mm5, %%mm1 \n\t"
-		"por %%mm6, %%mm2 \n\t"
+		"movq (%1), %%mm4 \n\t"
+		"movq (%2), %%mm6 \n\t"
+		"punpcklbw %%mm6, %%mm4 \n\t"
+		"punpcklbw %%mm4, %%mm1 \n\t"
+		"punpckhbw %%mm4, %%mm2 \n\t"
 		
 		"addl $8, %0 \n\t"
 		"addl $4, %1 \n\t"
@@ -123,8 +105,6 @@
 		".Lli0: \n\t"
 		"movq (%%esi), %%mm1 \n\t"
 		"movq (%%esi), %%mm2 \n\t"
-		"punpcklbw %%mm0, %%mm1 \n\t"
-		"punpckhbw %%mm0, %%mm2 \n\t"
 		
 		"movq (%%eax,%%edx,2), %%mm4 \n\t"
 		"movq (%%ebx,%%ebp,2), %%mm6 \n\t"
@@ -150,29 +130,17 @@
 		"paddw %%mm5, %%mm6 \n\t"
 		"psrlw $3, %%mm4 \n\t"
 		"psrlw $3, %%mm6 \n\t"
-		"movq %%mm4, %%mm3 \n\t"
-		"movq %%mm6, %%mm5 \n\t"
-		"punpcklwd %%mm0, %%mm3 \n\t"
-		"punpckhwd %%mm0, %%mm4 \n\t"
-		"punpcklwd %%mm0, %%mm5 \n\t"
-		"punpckhwd %%mm0, %%mm6 \n\t"
-		"pslld $8, %%mm3 \n\t"
-		"pslld $8, %%mm4 \n\t"
-		"pslld $24, %%mm5 \n\t"
-		"pslld $24, %%mm6 \n\t"
-		
-		"por %%mm3, %%mm1 \n\t"
-		"por %%mm4, %%mm2 \n\t"
-		"por %%mm5, %%mm1 \n\t"
-		"por %%mm6, %%mm2 \n\t"
+		"packuswb %%mm4, %%mm4 \n\t"
+		"packuswb %%mm6, %%mm6 \n\t"
+		"punpcklbw %%mm6, %%mm4 \n\t"
+		"punpcklbw %%mm4, %%mm1 \n\t"
+		"punpckhbw %%mm4, %%mm2 \n\t"
 		
 		"movq %%mm1, (%%edi) \n\t"
 		"movq %%mm2, 8(%%edi) \n\t"
 		
 		"movq 8(%%esi), %%mm1 \n\t"
 		"movq 8(%%esi), %%mm2 \n\t"
-		"punpcklbw %%mm0, %%mm1 \n\t"
-		"punpckhbw %%mm0, %%mm2 \n\t"
 		
 		"movq (%%eax,%%edx,2), %%mm4 \n\t"
 		"movq (%%ebx,%%ebp,2), %%mm6 \n\t"
@@ -198,21 +166,11 @@
 		"paddw %%mm5, %%mm6 \n\t"
 		"psrlw $3, %%mm4 \n\t"
 		"psrlw $3, %%mm6 \n\t"
-		"movq %%mm4, %%mm3 \n\t"
-		"movq %%mm6, %%mm5 \n\t"
-		"punpcklwd %%mm0, %%mm3 \n\t"
-		"punpckhwd %%mm0, %%mm4 \n\t"
-		"punpcklwd %%mm0, %%mm5 \n\t"
-		"punpckhwd %%mm0, %%mm6 \n\t"
-		"pslld $8, %%mm3 \n\t"
-		"pslld $8, %%mm4 \n\t"
-		"pslld $24, %%mm5 \n\t"
-		"pslld $24, %%mm6 \n\t"
-		
-		"por %%mm3, %%mm1 \n\t"
-		"por %%mm4, %%mm2 \n\t"
-		"por %%mm5, %%mm1 \n\t"
-		"por %%mm6, %%mm2 \n\t"
+		"packuswb %%mm4, %%mm4 \n\t"
+		"packuswb %%mm6, %%mm6 \n\t"
+		"punpcklbw %%mm6, %%mm4 \n\t"
+		"punpcklbw %%mm4, %%mm1 \n\t"
+		"punpckhbw %%mm4, %%mm2 \n\t"
 		
 		"addl $16, %%esi \n\t"
 		"addl $8, %%eax \n\t"
@@ -246,8 +204,6 @@
 		".Lli1: \n\t"
 		"movq (%%esi), %%mm1 \n\t"
 		"movq (%%esi), %%mm2 \n\t"
-		"punpcklbw %%mm0, %%mm1 \n\t"
-		"punpckhbw %%mm0, %%mm2 \n\t"
 		
 		"movq (%%eax,%%edx,2), %%mm4 \n\t"
 		"movq (%%ebx,%%ebp,2), %%mm6 \n\t"
@@ -275,29 +231,17 @@
 		"paddw %%mm5, %%mm6 \n\t"
 		"psrlw $3, %%mm4 \n\t"
 		"psrlw $3, %%mm6 \n\t"
-		"movq %%mm4, %%mm3 \n\t"
-		"movq %%mm6, %%mm5 \n\t"
-		"punpcklwd %%mm0, %%mm3 \n\t"
-		"punpckhwd %%mm0, %%mm4 \n\t"
-		"punpcklwd %%mm0, %%mm5 \n\t"
-		"punpckhwd %%mm0, %%mm6 \n\t"
-		"pslld $8, %%mm3 \n\t"
-		"pslld $8, %%mm4 \n\t"
-		"pslld $24, %%mm5 \n\t"
-		"pslld $24, %%mm6 \n\t"
-		
-		"por %%mm3, %%mm1 \n\t"
-		"por %%mm4, %%mm2 \n\t"
-		"por %%mm5, %%mm1 \n\t"
-		"por %%mm6, %%mm2 \n\t"
+		"packuswb %%mm4, %%mm4 \n\t"
+		"packuswb %%mm6, %%mm6 \n\t"
+		"punpcklbw %%mm6, %%mm4 \n\t"
+		"punpcklbw %%mm4, %%mm1 \n\t"
+		"punpckhbw %%mm4, %%mm2 \n\t"
 		
 		"movq %%mm1, (%%edi) \n\t"
 		"movq %%mm2, 8(%%edi) \n\t"
 		
 		"movq 8(%%esi), %%mm1 \n\t"
 		"movq 8(%%esi), %%mm2 \n\t"
-		"punpcklbw %%mm0, %%mm1 \n\t"
-		"punpckhbw %%mm0, %%mm2 \n\t"
 		
 		"movq (%%eax,%%edx,2), %%mm4 \n\t"
 		"movq (%%ebx,%%ebp,2), %%mm6 \n\t"
@@ -325,21 +269,11 @@
 		"paddw %%mm5, %%mm6 \n\t"
 		"psrlw $3, %%mm4 \n\t"
 		"psrlw $3, %%mm6 \n\t"
-		"movq %%mm4, %%mm3 \n\t"
-		"movq %%mm6, %%mm5 \n\t"
-		"punpcklwd %%mm0, %%mm3 \n\t"
-		"punpckhwd %%mm0, %%mm4 \n\t"
-		"punpcklwd %%mm0, %%mm5 \n\t"
-		"punpckhwd %%mm0, %%mm6 \n\t"
-		"pslld $8, %%mm3 \n\t"
-		"pslld $8, %%mm4 \n\t"
-		"pslld $24, %%mm5 \n\t"
-		"pslld $24, %%mm6 \n\t"
-		
-		"por %%mm3, %%mm1 \n\t"
-		"por %%mm4, %%mm2 \n\t"
-		"por %%mm5, %%mm1 \n\t"
-		"por %%mm6, %%mm2 \n\t"
+		"packuswb %%mm4, %%mm4 \n\t"
+		"packuswb %%mm6, %%mm6 \n\t"
+		"punpcklbw %%mm6, %%mm4 \n\t"
+		"punpcklbw %%mm4, %%mm1 \n\t"
+		"punpckhbw %%mm4, %%mm2 \n\t"
 		
 		"addl $16, %%esi \n\t"
 		"addl $8, %%eax \n\t"