changeset 11826:11c5a87497d3 libavcodec

Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora
author conrad
date Fri, 04 Jun 2010 04:46:26 +0000
parents c6db7667e39b
children e778b61f86f2
files x86/dsputil_mmx.c x86/dsputil_mmx_avg_template.c
diffstat 2 files changed, 93 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/x86/dsputil_mmx.c	Fri Jun 04 01:15:41 2010 +0000
+++ b/x86/dsputil_mmx.c	Fri Jun 04 04:46:26 2010 +0000
@@ -2657,6 +2657,12 @@
                 c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2;
             }
 
+            if (CONFIG_VP3_DECODER
+                && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
+                c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2;
+                c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2;
+            }
+
 #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \
             c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \
             c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \
@@ -2745,6 +2751,12 @@
                 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
             }
 
+            if (CONFIG_VP3_DECODER
+                && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) {
+                c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow;
+                c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow;
+            }
+
             SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow);
             SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow);
             SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow);
--- a/x86/dsputil_mmx_avg_template.c	Fri Jun 04 01:15:41 2010 +0000
+++ b/x86/dsputil_mmx_avg_template.c	Fri Jun 04 04:46:26 2010 +0000
@@ -586,6 +586,49 @@
         :"%"REG_a, "memory");
 }
 
+static void DEF(put_no_rnd_pixels8_x2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm__ volatile (
+        "pcmpeqb %%mm6, %%mm6           \n\t"
+        "1:                             \n\t"
+        "movq  (%1),     %%mm0          \n\t"
+        "movq  (%1, %3), %%mm2          \n\t"
+        "movq 1(%1),     %%mm1          \n\t"
+        "movq 1(%1, %3), %%mm3          \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm3             \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "movq  %%mm0, (%2)              \n\t"
+        "movq  %%mm2, (%2, %3)          \n\t"
+        "movq  (%1, %3,2), %%mm0        \n\t"
+        "movq 1(%1, %3,2), %%mm1        \n\t"
+        "movq  (%1, %4),   %%mm2        \n\t"
+        "movq 1(%1, %4),   %%mm3        \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm3             \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm3, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "movq  %%mm0, (%2, %3,2)        \n\t"
+        "movq  %%mm2, (%2, %4)          \n\t"
+        "lea   (%1, %3,4), %1           \n\t"
+        "lea   (%2, %3,4), %2           \n\t"
+        "subl  $4, %0                   \n\t"
+        "jg 1b                          \n\t"
+        : "+g"(h), "+r"(pixels), "+r"(block)
+        : "r" ((x86_reg)line_size), "r"((x86_reg)3*line_size)
+        : "memory"
+    );
+}
+
 static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm__ volatile(
@@ -650,6 +693,44 @@
         :"%"REG_a, "memory");
 }
 
+static void DEF(put_no_rnd_pixels8_y2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
+{
+    __asm__ volatile (
+        "movq     (%1), %%mm0           \n\t"
+        "pcmpeqb %%mm6, %%mm6           \n\t"
+        "add        %3, %1              \n\t"
+        "pxor    %%mm6, %%mm0           \n\t"
+        "1:                             \n\t"
+        "movq  (%1),     %%mm1          \n\t"
+        "movq  (%1, %3), %%mm2          \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        PAVGB" %%mm1, %%mm0             \n\t"
+        PAVGB" %%mm2, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "movq  %%mm0, (%2)              \n\t"
+        "movq  %%mm1, (%2, %3)          \n\t"
+        "movq  (%1, %3,2), %%mm1        \n\t"
+        "movq  (%1, %4),   %%mm0        \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm0             \n\t"
+        PAVGB" %%mm1, %%mm2             \n\t"
+        PAVGB" %%mm0, %%mm1             \n\t"
+        "pxor  %%mm6, %%mm2             \n\t"
+        "pxor  %%mm6, %%mm1             \n\t"
+        "movq %%mm2, (%2, %3,2)         \n\t"
+        "movq %%mm1, (%2, %4)           \n\t"
+        "lea   (%1, %3,4), %1           \n\t"
+        "lea   (%2, %3,4), %2           \n\t"
+        "subl $4, %0                    \n\t"
+        "jg 1b                          \n\t"
+        :"+g"(h), "+r"(pixels), "+r" (block)
+        :"r" ((x86_reg)line_size), "r"((x86_reg)3*line_size)
+        :"memory"
+    );
+}
+
 static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
 {
     __asm__ volatile(