# HG changeset patch # User conrad # Date 1275626786 0 # Node ID 11c5a87497d3374482b4ee272eb73cc625d32a34 # Parent c6db7667e39b6e016e82ef12bd6f1659db6168cc Add bitexact versions of put_no_rnd_pixels8 _x2 and _y2 for vp3/theora diff -r c6db7667e39b -r 11c5a87497d3 x86/dsputil_mmx.c --- a/x86/dsputil_mmx.c Fri Jun 04 01:15:41 2010 +0000 +++ b/x86/dsputil_mmx.c Fri Jun 04 04:46:26 2010 +0000 @@ -2657,6 +2657,12 @@ c->vp3_idct_dc_add = ff_vp3_idct_dc_add_mmx2; } + if (CONFIG_VP3_DECODER + && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) { + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_mmx2; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_mmx2; + } + #define SET_QPEL_FUNCS(PFX, IDX, SIZE, CPU) \ c->PFX ## _pixels_tab[IDX][ 0] = PFX ## SIZE ## _mc00_ ## CPU; \ c->PFX ## _pixels_tab[IDX][ 1] = PFX ## SIZE ## _mc10_ ## CPU; \ @@ -2745,6 +2751,12 @@ c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; } + if (CONFIG_VP3_DECODER + && (avctx->codec_id == CODEC_ID_VP3 || avctx->codec_id == CODEC_ID_THEORA)) { + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_exact_3dnow; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_exact_3dnow; + } + SET_QPEL_FUNCS(put_qpel, 0, 16, 3dnow); SET_QPEL_FUNCS(put_qpel, 1, 8, 3dnow); SET_QPEL_FUNCS(put_no_rnd_qpel, 0, 16, 3dnow); diff -r c6db7667e39b -r 11c5a87497d3 x86/dsputil_mmx_avg_template.c --- a/x86/dsputil_mmx_avg_template.c Fri Jun 04 01:15:41 2010 +0000 +++ b/x86/dsputil_mmx_avg_template.c Fri Jun 04 04:46:26 2010 +0000 @@ -586,6 +586,49 @@ :"%"REG_a, "memory"); } +static void DEF(put_no_rnd_pixels8_x2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm__ volatile ( + "pcmpeqb %%mm6, %%mm6 \n\t" + "1: \n\t" + "movq (%1), %%mm0 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "movq 1(%1), %%mm1 \n\t" + "movq 1(%1, %3), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm2, (%2, %3) \n\t" + "movq (%1, %3,2), %%mm0 \n\t" + "movq 1(%1, %3,2), %%mm1 \n\t" + "movq (%1, %4), %%mm2 \n\t" + "movq 1(%1, %4), %%mm3 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm3 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm3, %%mm2 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "movq %%mm0, (%2, %3,2) \n\t" + "movq %%mm2, (%2, %4) \n\t" + "lea (%1, %3,4), %1 \n\t" + "lea (%2, %3,4), %2 \n\t" + "subl $4, %0 \n\t" + "jg 1b \n\t" + : "+g"(h), "+r"(pixels), "+r"(block) + : "r" ((x86_reg)line_size), "r"((x86_reg)3*line_size) + : "memory" + ); +} + static void DEF(put_pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm__ volatile( @@ -650,6 +693,44 @@ :"%"REG_a, "memory"); } +static void DEF(put_no_rnd_pixels8_y2_exact)(uint8_t *block, const uint8_t *pixels, int line_size, int h) +{ + __asm__ volatile ( + "movq (%1), %%mm0 \n\t" + "pcmpeqb %%mm6, %%mm6 \n\t" + "add %3, %1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "1: \n\t" + "movq (%1), %%mm1 \n\t" + "movq (%1, %3), %%mm2 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + PAVGB" %%mm1, %%mm0 \n\t" + PAVGB" %%mm2, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "movq %%mm0, (%2) \n\t" + "movq %%mm1, (%2, %3) \n\t" + "movq (%1, %3,2), %%mm1 \n\t" + "movq (%1, %4), %%mm0 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "pxor %%mm6, %%mm0 \n\t" + PAVGB" %%mm1, %%mm2 \n\t" + PAVGB" %%mm0, %%mm1 \n\t" + "pxor %%mm6, %%mm2 \n\t" + "pxor %%mm6, %%mm1 \n\t" + "movq %%mm2, (%2, %3,2) \n\t" + "movq %%mm1, (%2, %4) \n\t" + "lea (%1, %3,4), %1 \n\t" + "lea (%2, %3,4), %2 \n\t" + "subl $4, %0 \n\t" + "jg 1b \n\t" + :"+g"(h), "+r"(pixels), "+r" (block) + :"r" ((x86_reg)line_size), "r"((x86_reg)3*line_size) + :"memory" + ); +} + static void DEF(avg_pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h) { __asm__ volatile(