changeset 12003:3b761226ea35 libavcodec

Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
author darkshikari
date Mon, 28 Jun 2010 23:53:07 +0000
parents 9c8584f68754
children 2002ea7c06f6
files x86/h264_intrapred.asm x86/h264dsp_mmx.c
diffstat 2 files changed, 84 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/x86/h264_intrapred.asm	Mon Jun 28 23:37:24 2010 +0000
+++ b/x86/h264_intrapred.asm	Mon Jun 28 23:53:07 2010 +0000
@@ -494,3 +494,81 @@
     mov   [r0+r2*1], r3d
     mov   [r0+r2*2], r3d
     RET
+
+;-----------------------------------------------------------------------------
+; void pred4x4_tm_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride)
+;-----------------------------------------------------------------------------
+
+%macro PRED4x4_TM_MMX 1
+cglobal pred4x4_tm_vp8_%1, 3,6
+    sub        r0, r2
+    pxor      mm7, mm7
+    movd      mm0, [r0]
+    punpcklbw mm0, mm7
+    movzx     r4d, byte [r0-1]
+    mov       r5d, 2
+.loop:
+    movzx     r1d, byte [r0+r2*1-1]
+    movzx     r3d, byte [r0+r2*2-1]
+    sub       r1d, r4d
+    sub       r3d, r4d
+    movd      mm2, r1d
+    movd      mm4, r3d
+%ifidn %1, mmx
+    punpcklwd mm2, mm2
+    punpcklwd mm4, mm4
+    punpckldq mm2, mm2
+    punpckldq mm4, mm4
+%else
+    pshufw    mm2, mm2, 0
+    pshufw    mm4, mm4, 0
+%endif
+    paddw     mm2, mm0
+    paddw     mm4, mm0
+    packuswb  mm2, mm2
+    packuswb  mm4, mm4
+    movd [r0+r2*1], mm2
+    movd [r0+r2*2], mm4
+    lea        r0, [r0+r2*2]
+    dec       r5d
+    jg .loop
+    REP_RET
+%endmacro
+
+PRED4x4_TM_MMX mmx
+PRED4x4_TM_MMX mmxext
+
+cglobal pred4x4_tm_vp8_ssse3, 3,3
+    sub         r0, r2
+    movq       mm6, [tm_shuf]
+    pxor       mm1, mm1
+    movd       mm0, [r0]
+    punpcklbw  mm0, mm1
+    movd       mm7, [r0-4]
+    pshufb     mm7, mm6
+    lea         r1, [r0+r2*2]
+    movd       mm2, [r0+r2*1-4]
+    movd       mm3, [r0+r2*2-4]
+    movd       mm4, [r1+r2*1-4]
+    movd       mm5, [r1+r2*2-4]
+    pshufb     mm2, mm6
+    pshufb     mm3, mm6
+    pshufb     mm4, mm6
+    pshufb     mm5, mm6
+    psubw      mm2, mm7
+    psubw      mm3, mm7
+    psubw      mm4, mm7
+    psubw      mm5, mm7
+    paddw      mm2, mm0
+    paddw      mm3, mm0
+    paddw      mm4, mm0
+    paddw      mm5, mm0
+    packuswb   mm2, mm2
+    packuswb   mm3, mm3
+    packuswb   mm4, mm4
+    packuswb   mm5, mm5
+    movd [r0+r2*1], mm2
+    movd [r0+r2*2], mm3
+    movd [r1+r2*1], mm4
+    movd [r1+r2*2], mm5
+    RET
--- a/x86/h264dsp_mmx.c	Mon Jun 28 23:37:24 2010 +0000
+++ b/x86/h264dsp_mmx.c	Mon Jun 28 23:53:07 2010 +0000
@@ -2345,6 +2345,9 @@
 void ff_pred8x8_tm_vp8_sse2        (uint8_t *src, int stride);
 void ff_pred8x8_tm_vp8_ssse3       (uint8_t *src, int stride);
 void ff_pred4x4_dc_mmxext          (uint8_t *src, const uint8_t *topright, int stride);
+void ff_pred4x4_tm_vp8_mmx         (uint8_t *src, const uint8_t *topright, int stride);
+void ff_pred4x4_tm_vp8_mmxext      (uint8_t *src, const uint8_t *topright, int stride);
+void ff_pred4x4_tm_vp8_ssse3       (uint8_t *src, const uint8_t *topright, int stride);
 
 #if CONFIG_H264DSP
 void ff_h264_pred_init_x86(H264PredContext *h, int codec_id)
@@ -2358,6 +2361,7 @@
         if (codec_id == CODEC_ID_VP8) {
             h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx;
             h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx;
+            h->pred4x4  [TM_VP8_PRED  ] = ff_pred4x4_tm_vp8_mmx;
         }
     }
 
@@ -2370,6 +2374,7 @@
             h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext;
             h->pred8x8  [DC_PRED8x8   ] = ff_pred8x8_dc_rv40_mmxext;
             h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmxext;
+            h->pred4x4  [TM_VP8_PRED  ] = ff_pred4x4_tm_vp8_mmxext;
         }
     }
 
@@ -2392,6 +2397,7 @@
         h->pred8x8  [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3;
         if (codec_id == CODEC_ID_VP8) {
             h->pred8x8  [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3;
+            h->pred4x4  [TM_VP8_PRED  ] = ff_pred4x4_tm_vp8_ssse3;
         }
     }
 #endif