Mercurial > libavcodec.hg
changeset 12003:3b761226ea35 libavcodec
Add mmx/mmxext/ssse3 4x4 TM intra pred functions for vp8
author | darkshikari |
---|---|
date | Mon, 28 Jun 2010 23:53:07 +0000 |
parents | 9c8584f68754 |
children | 2002ea7c06f6 |
files | x86/h264_intrapred.asm x86/h264dsp_mmx.c |
diffstat | 2 files changed, 84 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- a/x86/h264_intrapred.asm Mon Jun 28 23:37:24 2010 +0000 +++ b/x86/h264_intrapred.asm Mon Jun 28 23:53:07 2010 +0000 @@ -494,3 +494,81 @@ mov [r0+r2*1], r3d mov [r0+r2*2], r3d RET + +;----------------------------------------------------------------------------- +; void pred4x4_tm_vp8_mmxext(uint8_t *src, const uint8_t *topright, int stride) +;----------------------------------------------------------------------------- + +%macro PRED4x4_TM_MMX 1 +cglobal pred4x4_tm_vp8_%1, 3,6 + sub r0, r2 + pxor mm7, mm7 + movd mm0, [r0] + punpcklbw mm0, mm7 + movzx r4d, byte [r0-1] + mov r5d, 2 +.loop: + movzx r1d, byte [r0+r2*1-1] + movzx r3d, byte [r0+r2*2-1] + sub r1d, r4d + sub r3d, r4d + movd mm2, r1d + movd mm4, r3d +%ifidn %1, mmx + punpcklwd mm2, mm2 + punpcklwd mm4, mm4 + punpckldq mm2, mm2 + punpckldq mm4, mm4 +%else + pshufw mm2, mm2, 0 + pshufw mm4, mm4, 0 +%endif + paddw mm2, mm0 + paddw mm4, mm0 + packuswb mm2, mm2 + packuswb mm4, mm4 + movd [r0+r2*1], mm2 + movd [r0+r2*2], mm4 + lea r0, [r0+r2*2] + dec r5d + jg .loop + REP_RET +%endmacro + +PRED4x4_TM_MMX mmx +PRED4x4_TM_MMX mmxext + +cglobal pred4x4_tm_vp8_ssse3, 3,3 + sub r0, r2 + movq mm6, [tm_shuf] + pxor mm1, mm1 + movd mm0, [r0] + punpcklbw mm0, mm1 + movd mm7, [r0-4] + pshufb mm7, mm6 + lea r1, [r0+r2*2] + movd mm2, [r0+r2*1-4] + movd mm3, [r0+r2*2-4] + movd mm4, [r1+r2*1-4] + movd mm5, [r1+r2*2-4] + pshufb mm2, mm6 + pshufb mm3, mm6 + pshufb mm4, mm6 + pshufb mm5, mm6 + psubw mm2, mm7 + psubw mm3, mm7 + psubw mm4, mm7 + psubw mm5, mm7 + paddw mm2, mm0 + paddw mm3, mm0 + paddw mm4, mm0 + paddw mm5, mm0 + packuswb mm2, mm2 + packuswb mm3, mm3 + packuswb mm4, mm4 + packuswb mm5, mm5 + movd [r0+r2*1], mm2 + movd [r0+r2*2], mm3 + movd [r1+r2*1], mm4 + movd [r1+r2*2], mm5 + RET
--- a/x86/h264dsp_mmx.c Mon Jun 28 23:37:24 2010 +0000 +++ b/x86/h264dsp_mmx.c Mon Jun 28 23:53:07 2010 +0000 @@ -2345,6 +2345,9 @@ void ff_pred8x8_tm_vp8_sse2 (uint8_t *src, int stride); void ff_pred8x8_tm_vp8_ssse3 (uint8_t *src, int stride); void ff_pred4x4_dc_mmxext (uint8_t *src, const uint8_t *topright, int stride); +void ff_pred4x4_tm_vp8_mmx (uint8_t *src, const uint8_t *topright, int stride); +void ff_pred4x4_tm_vp8_mmxext (uint8_t *src, const uint8_t *topright, int stride); +void ff_pred4x4_tm_vp8_ssse3 (uint8_t *src, const uint8_t *topright, int stride); #if CONFIG_H264DSP void ff_h264_pred_init_x86(H264PredContext *h, int codec_id) @@ -2358,6 +2361,7 @@ if (codec_id == CODEC_ID_VP8) { h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmx; h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmx; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmx; } } @@ -2370,6 +2374,7 @@ h->pred16x16[PLANE_PRED8x8] = ff_pred16x16_tm_vp8_mmxext; h->pred8x8 [DC_PRED8x8 ] = ff_pred8x8_dc_rv40_mmxext; h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_mmxext; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_mmxext; } } @@ -2392,6 +2397,7 @@ h->pred8x8 [HOR_PRED8x8 ] = ff_pred8x8_horizontal_ssse3; if (codec_id == CODEC_ID_VP8) { h->pred8x8 [PLANE_PRED8x8] = ff_pred8x8_tm_vp8_ssse3; + h->pred4x4 [TM_VP8_PRED ] = ff_pred4x4_tm_vp8_ssse3; } } #endif