changeset 688:894b61908734 libavcodec

pix_sum16_mmx()
author michaelni
date Fri, 20 Sep 2002 10:03:01 +0000
parents 9abb13c21fbe
children efcbfbd18864
files i386/dsputil_mmx.c
diffstat 1 files changed, 39 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/i386/dsputil_mmx.c	Thu Sep 19 22:14:53 2002 +0000
+++ b/i386/dsputil_mmx.c	Fri Sep 20 10:03:01 2002 +0000
@@ -420,6 +420,44 @@
         );
 }
 
+static int pix_sum16_mmx(UINT8 * pix, int line_size){
+    const int h=16;
+    int sum;
+    int index= -line_size*h;
+
+    __asm __volatile(
+                "pxor %%mm7, %%mm7		\n\t"
+                "pxor %%mm6, %%mm6		\n\t"
+                "1:				\n\t"
+                "movq (%2, %1), %%mm0		\n\t"
+                "movq (%2, %1), %%mm1		\n\t"
+                "movq 8(%2, %1), %%mm2		\n\t"
+                "movq 8(%2, %1), %%mm3		\n\t"
+                "punpcklbw %%mm7, %%mm0		\n\t"
+                "punpckhbw %%mm7, %%mm1		\n\t"
+                "punpcklbw %%mm7, %%mm2		\n\t"
+                "punpckhbw %%mm7, %%mm3		\n\t"
+                "paddw %%mm0, %%mm1		\n\t"
+                "paddw %%mm2, %%mm3		\n\t"
+                "paddw %%mm1, %%mm3		\n\t"
+                "paddw %%mm3, %%mm6		\n\t"
+                "addl %3, %1			\n\t"
+                " js 1b				\n\t"
+                "movq %%mm6, %%mm5		\n\t"
+                "psrlq $32, %%mm6		\n\t"
+                "paddw %%mm5, %%mm6		\n\t"
+                "movq %%mm6, %%mm5		\n\t"
+                "psrlq $16, %%mm6		\n\t"
+                "paddw %%mm5, %%mm6		\n\t"
+                "movd %%mm6, %0			\n\t"
+                "andl $0xFFFF, %0		\n\t"
+                : "=&r" (sum), "+r" (index)
+                : "r" (pix - index), "r" (line_size)
+        );
+
+        return sum;
+}
+
 #if 0
 static void just_return() { return; }
 #endif
@@ -448,6 +486,7 @@
         put_pixels_clamped = put_pixels_clamped_mmx;
         add_pixels_clamped = add_pixels_clamped_mmx;
         clear_blocks= clear_blocks_mmx;
+        pix_sum= pix_sum16_mmx;
 
         pix_abs16x16     = pix_abs16x16_mmx;
         pix_abs16x16_x2  = pix_abs16x16_x2_mmx;