changeset 3747:ba1f57431c85 libavcodec

tweak ff_imdct_calc_3dn2
author lorenm
date Thu, 21 Sep 2006 17:42:23 +0000
parents 2ec498208c6a
children 82d22b7e80ff
files i386/fft_3dn2.c
diffstat 1 files changed, 17 insertions(+), 17 deletions(-) [+]
line wrap: on
line diff
--- a/i386/fft_3dn2.c	Thu Sep 21 16:37:39 2006 +0000
+++ b/i386/fft_3dn2.c	Thu Sep 21 17:42:23 2006 +0000
@@ -177,15 +177,13 @@
         );
     }
 
-    z += n8;
+    k = n-8;
     asm volatile("movd %0, %%mm7" ::"r"(1<<31));
-    for(k = 0; k < n8; k++) {
-        asm volatile(
-            "movq         %0, %%mm0 \n\t"
-            "pswapd       %1, %%mm1 \n\t"
-            ::"m"(z[k]), "m"(z[-1-k])
-        );
-        asm volatile(
+    asm volatile(
+            "1: \n\t"
+            "movq    (%4,%0), %%mm0 \n\t" // z[n8+k]
+            "neg %0 \n\t"
+            "pswapd -8(%4,%0), %%mm1 \n\t" // z[n8-1-k]
             "movq      %%mm0, %%mm2 \n\t"
             "pxor      %%mm7, %%mm2 \n\t"
             "punpckldq %%mm1, %%mm2 \n\t"
@@ -194,15 +192,17 @@
             "pswapd    %%mm0, %%mm4 \n\t"
             "pxor      %%mm7, %%mm0 \n\t"
             "pxor      %%mm7, %%mm4 \n\t"
-            "movq      %%mm0, %0    \n\t" // { -z[n8+k].im, z[n8-1-k].re }
-            "movq      %%mm4, %1    \n\t" // { -z[n8-1-k].re, z[n8+k].im }
-            "movq      %%mm2, %2    \n\t" // { -z[n8+k].re, z[n8-1-k].im }
-            "movq      %%mm3, %3    \n\t" // { z[n8-1-k].im, -z[n8+k].re }
-            :"=m"(output[2*k]), "=m"(output[n2-2-2*k]),
-             "=m"(output[n2+2*k]), "=m"(output[n-2-2*k])
-            ::"memory"
-        );
-    }
+            "movq      %%mm3, -8(%3,%0) \n\t" // output[n-2-2*k] = { z[n8-1-k].im, -z[n8+k].re }
+            "movq      %%mm4, -8(%2,%0) \n\t" // output[n2-2-2*k]= { -z[n8-1-k].re, z[n8+k].im }
+            "neg %0 \n\t"
+            "movq      %%mm0, (%1,%0) \n\t"   // output[2*k]     = { -z[n8+k].im, z[n8-1-k].re }
+            "movq      %%mm2, (%2,%0) \n\t"   // output[n2+2*k]  = { -z[n8+k].re, z[n8-1-k].im }
+            "sub $8, %0 \n\t"
+            "jge 1b \n\t"
+        :"+r"(k)
+        :"r"(output), "r"(output+n2), "r"(output+n), "r"(z+n8)
+        :"memory"
+    );
     asm volatile("femms");
 }