diff x86/dsputil_mmx_avg_template.c @ 11826:11c5a87497d3 libavcodec

Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
author conrad
date Fri, 04 Jun 2010 04:46:26 +0000
parents 36b60aa6bc75
children
line wrap: on
line diff
--- a/x86/dsputil_mmx_avg_template.c	Fri Jun 04 01:15:41 2010 +0000
+++ b/x86/dsputil_mmx_avg_template.c	Fri Jun 04 04:46:26 2010 +0000
@@ -586,6 +586,49 @@
         :"%"REG_a, "memory");
 }
 
+static void DEF(put_no_rnd_pixels8_x2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm__ volatile (
+        "pcmpeqb %%mm6, %%mm6           \n\t"
+        "1:                             \n\t"
+        "movq  (%1),     %%mm0          \n\t"
+        "movq  (%1, %3), %%mm2          \n\t"
+        "movq 1(%1),     %%mm1          \n\t"
+        "movq 1(%1, %3), %%mm3          \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm3             \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "movq  %%mm0, (%2)              \n\t"
+        "movq  %%mm2, (%2, %3)          \n\t"
+        "movq  (%1, %3,2), %%mm0        \n\t"
+        "movq 1(%1, %3,2), %%mm1        \n\t"
+        "movq  (%1, %4),   %%mm2        \n\t"
+        "movq 1(%1, %4),   %%mm3        \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm3             \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "movq  %%mm0, (%2, %3,2)        \n\t"
+        "movq  %%mm2, (%2, %4)          \n\t"
+        "lea   (%1, %3,4), %1           \n\t"
+        "lea   (%2, %3,4), %2           \n\t"
+        "subl  $4, %0                   \n\t"
+        "jg 1b                          \n\t"
+        : "+g"(h), "+r"(pixels), "+r"(block)
+        : "r" ((x86_reg)line_size), "r"((x86_reg)3*line_size)
+        : "memory"
+    );
+}
+
 static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm__ volatile(
@@ -650,6 +693,44 @@
         :"%"REG_a, "memory");
 }
 
+static void DEF(put_no_rnd_pixels8_y2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm__ volatile (
+        "movq     (%1), %%mm0           \n\t"
+        "pcmpeqb %%mm6, %%mm6           \n\t"
+        "add        %3, %1              \n\t"
+        "pxor    %%mm6, %%mm0           \n\t"
+        "1:                             \n\t"
+        "movq  (%1),     %%mm1          \n\t"
+        "movq  (%1, %3), %%mm2          \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm2, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "movq  %%mm0, (%2)              \n\t"
+        "movq  %%mm1, (%2, %3)          \n\t"
+        "movq  (%1, %3,2), %%mm1        \n\t"
+        "movq  (%1, %4),   %%mm0        \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        PAVGB" %%mm1, %%mm2             \n\t"
+        PAVGB" %%mm0, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "movq %%mm2, (%2, %3,2)         \n\t"
+        "movq %%mm1, (%2, %4)           \n\t"
+        "lea   (%1, %3,4), %1           \n\t"
+        "lea   (%2, %3,4), %2           \n\t"
+        "subl $4, %0                    \n\t"
+        "jg 1b                          \n\t"
+        :"+g"(h), "+r"(pixels), "+r" (block)
+        :"r" ((x86_reg)line_size), "r"((x86_reg)3*line_size)
+        :"memory"
+    );
+}
+
 static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm__ volatile(