changeset 4974:a2e489e40ea3 libavcodec

tweak mmx2 sad. 6% faster on core2 and k8, no change on p4.
author lorenm
date Thu, 10 May 2007 22:24:19 +0000
parents 91015dc624ad
children 9a6a0818e93f
files i386/motion_est_mmx.c
diffstat 1 files changed, 21 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/i386/motion_est_mmx.c	Thu May 10 18:44:58 2007 +0000
+++ b/i386/motion_est_mmx.c	Thu May 10 22:24:19 2007 +0000
@@ -75,13 +75,11 @@
         ASMALIGN(4)
         "1:                             \n\t"
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
-        "movq (%2, %%"REG_a"), %%mm2    \n\t"
-        "psadbw %%mm2, %%mm0            \n\t"
+        "psadbw (%2, %%"REG_a"), %%mm0  \n\t"
         "add %3, %%"REG_a"              \n\t"
         "movq (%1, %%"REG_a"), %%mm1    \n\t"
-        "movq (%2, %%"REG_a"), %%mm3    \n\t"
-        "psadbw %%mm1, %%mm3            \n\t"
-        "paddw %%mm3, %%mm0             \n\t"
+        "psadbw (%2, %%"REG_a"), %%mm1  \n\t"
+        "paddw %%mm1, %%mm0             \n\t"
         "paddw %%mm0, %%mm6             \n\t"
         "add %3, %%"REG_a"              \n\t"
         " js 1b                         \n\t"
@@ -97,17 +95,13 @@
         ASMALIGN(4)
         "1:                             \n\t"
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
-        "movq (%2, %%"REG_a"), %%mm2    \n\t"
-        "pavgb %%mm2, %%mm0             \n\t"
-        "movq (%3, %%"REG_a"), %%mm2    \n\t"
-        "psadbw %%mm2, %%mm0            \n\t"
+        "pavgb (%2, %%"REG_a"), %%mm0   \n\t"
+        "psadbw (%3, %%"REG_a"), %%mm0  \n\t"
         "add %4, %%"REG_a"              \n\t"
         "movq (%1, %%"REG_a"), %%mm1    \n\t"
-        "movq (%2, %%"REG_a"), %%mm3    \n\t"
-        "pavgb %%mm1, %%mm3             \n\t"
-        "movq (%3, %%"REG_a"), %%mm1    \n\t"
-        "psadbw %%mm1, %%mm3            \n\t"
-        "paddw %%mm3, %%mm0             \n\t"
+        "pavgb (%2, %%"REG_a"), %%mm1   \n\t"
+        "psadbw (%3, %%"REG_a"), %%mm1  \n\t"
+        "paddw %%mm1, %%mm0             \n\t"
         "paddw %%mm0, %%mm6             \n\t"
         "add %4, %%"REG_a"              \n\t"
         " js 1b                         \n\t"
@@ -120,30 +114,24 @@
 { //FIXME reuse src
     long len= -(stride*h);
     asm volatile(
+        "movq "MANGLE(bone)", %%mm5     \n\t"
         ASMALIGN(4)
-        "movq "MANGLE(bone)", %%mm5     \n\t"
         "1:                             \n\t"
         "movq (%1, %%"REG_a"), %%mm0    \n\t"
-        "movq (%2, %%"REG_a"), %%mm2    \n\t"
         "movq 1(%1, %%"REG_a"), %%mm1   \n\t"
-        "movq 1(%2, %%"REG_a"), %%mm3   \n\t"
-        "pavgb %%mm2, %%mm0             \n\t"
-        "pavgb %%mm1, %%mm3             \n\t"
-        "psubusb %%mm5, %%mm3           \n\t"
-        "pavgb %%mm3, %%mm0             \n\t"
-        "movq (%3, %%"REG_a"), %%mm2    \n\t"
-        "psadbw %%mm2, %%mm0            \n\t"
+        "pavgb (%2, %%"REG_a"), %%mm0   \n\t"
+        "pavgb 1(%2, %%"REG_a"), %%mm1  \n\t"
+        "psubusb %%mm5, %%mm1           \n\t"
+        "pavgb %%mm1, %%mm0             \n\t"
+        "psadbw (%3, %%"REG_a"), %%mm0  \n\t"
         "add %4, %%"REG_a"              \n\t"
-        "movq (%1, %%"REG_a"), %%mm1    \n\t"
-        "movq (%2, %%"REG_a"), %%mm3    \n\t"
-        "movq 1(%1, %%"REG_a"), %%mm2   \n\t"
-        "movq 1(%2, %%"REG_a"), %%mm4   \n\t"
-        "pavgb %%mm3, %%mm1             \n\t"
-        "pavgb %%mm4, %%mm2             \n\t"
-        "psubusb %%mm5, %%mm2           \n\t"
-        "pavgb %%mm1, %%mm2             \n\t"
-        "movq (%3, %%"REG_a"), %%mm1    \n\t"
-        "psadbw %%mm1, %%mm2            \n\t"
+        "movq (%1, %%"REG_a"), %%mm2    \n\t"
+        "movq 1(%1, %%"REG_a"), %%mm3   \n\t"
+        "pavgb (%2, %%"REG_a"), %%mm2   \n\t"
+        "pavgb 1(%2, %%"REG_a"), %%mm3  \n\t"
+        "psubusb %%mm5, %%mm3           \n\t"
+        "pavgb %%mm3, %%mm2             \n\t"
+        "psadbw (%3, %%"REG_a"), %%mm2  \n\t"
         "paddw %%mm2, %%mm0             \n\t"
         "paddw %%mm0, %%mm6             \n\t"
         "add %4, %%"REG_a"              \n\t"