# HG changeset patch # User lorenm # Date 1159737917 0 # Node ID 6a40092eb9e67eb92bf252114044f100dc4a9fa2 # Parent e1986d9ddc2d7f16b76d4b3d59414872821d12aa approximate qpel functions: sacrifice some quality for some decoding speed. enabled on B-frames with -lavdopts fast. diff -r e1986d9ddc2d -r 6a40092eb9e6 dsputil.c --- a/dsputil.c Sun Oct 01 18:19:49 2006 +0000 +++ b/dsputil.c Sun Oct 01 21:25:17 2006 +0000 @@ -4165,6 +4165,9 @@ c->prefetch= just_return; + memset(c->put_2tap_qpel_pixels_tab, 0, sizeof(c->put_2tap_qpel_pixels_tab)); + memset(c->avg_2tap_qpel_pixels_tab, 0, sizeof(c->avg_2tap_qpel_pixels_tab)); + #ifdef HAVE_MMX dsputil_init_mmx(c, avctx); #endif @@ -4193,6 +4196,13 @@ dsputil_init_bfin(c,avctx); #endif + for(i=0; i<64; i++){ + if(!c->put_2tap_qpel_pixels_tab[0][i]) + c->put_2tap_qpel_pixels_tab[0][i]= c->put_h264_qpel_pixels_tab[0][i]; + if(!c->avg_2tap_qpel_pixels_tab[0][i]) + c->avg_2tap_qpel_pixels_tab[0][i]= c->avg_h264_qpel_pixels_tab[0][i]; + } + switch(c->idct_permutation_type){ case FF_NO_IDCT_PERM: for(i=0; i<64; i++) diff -r e1986d9ddc2d -r 6a40092eb9e6 dsputil.h --- a/dsputil.h Sun Oct 01 18:19:49 2006 +0000 +++ b/dsputil.h Sun Oct 01 21:25:17 2006 +0000 @@ -277,6 +277,9 @@ qpel_mc_func put_h264_qpel_pixels_tab[4][16]; qpel_mc_func avg_h264_qpel_pixels_tab[4][16]; + qpel_mc_func put_2tap_qpel_pixels_tab[4][16]; + qpel_mc_func avg_2tap_qpel_pixels_tab[4][16]; + h264_weight_func weight_h264_pixels_tab[10]; h264_biweight_func biweight_h264_pixels_tab[10]; diff -r e1986d9ddc2d -r 6a40092eb9e6 h263dec.c --- a/h263dec.c Sun Oct 01 18:19:49 2006 +0000 +++ b/h263dec.c Sun Oct 01 21:25:17 2006 +0000 @@ -693,6 +693,17 @@ s->next_p_frame_damaged=0; } + if((s->avctx->flags2 & CODEC_FLAG2_FAST) && s->pict_type==B_TYPE){ + s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab; + s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab; + }else if((!s->no_rounding) || s->pict_type==B_TYPE){ + s->me.qpel_put= s->dsp.put_no_rnd_qpel_pixels_tab; + s->me.qpel_avg= s->dsp.avg_qpel_pixels_tab; + }else{ + s->me.qpel_put= s->dsp.put_qpel_pixels_tab; + s->me.qpel_avg= s->dsp.avg_qpel_pixels_tab; + } + if(MPV_frame_start(s, avctx) < 0) return -1; diff -r e1986d9ddc2d -r 6a40092eb9e6 h264.c --- a/h264.c Sun Oct 01 18:19:49 2006 +0000 +++ b/h264.c Sun Oct 01 21:25:17 2006 +0000 @@ -3782,8 +3782,8 @@ xchg_mb_border(h, dest_y, dest_cb, dest_cr, linesize, uvlinesize, 0); }else if(s->codec_id == CODEC_ID_H264){ hl_motion(h, dest_y, dest_cb, dest_cr, - s->dsp.put_h264_qpel_pixels_tab, s->dsp.put_h264_chroma_pixels_tab, - s->dsp.avg_h264_qpel_pixels_tab, s->dsp.avg_h264_chroma_pixels_tab, + s->me.qpel_put, s->dsp.put_h264_chroma_pixels_tab, + s->me.qpel_avg, s->dsp.avg_h264_chroma_pixels_tab, s->dsp.weight_h264_pixels_tab, s->dsp.biweight_h264_pixels_tab); } @@ -4885,6 +4885,14 @@ ); } + if((s->avctx->flags2 & CODEC_FLAG2_FAST) && !s->current_picture.reference){ + s->me.qpel_put= s->dsp.put_2tap_qpel_pixels_tab; + s->me.qpel_avg= s->dsp.avg_2tap_qpel_pixels_tab; + }else{ + s->me.qpel_put= s->dsp.put_h264_qpel_pixels_tab; + s->me.qpel_avg= s->dsp.avg_h264_qpel_pixels_tab; + } + return 0; } diff -r e1986d9ddc2d -r 6a40092eb9e6 i386/dsputil_mmx.c --- a/i386/dsputil_mmx.c Sun Oct 01 18:19:49 2006 +0000 +++ b/i386/dsputil_mmx.c Sun Oct 01 21:25:17 2006 +0000 @@ -2400,6 +2400,53 @@ QPEL_OP(avg_ , ff_pw_16, _ , AVG_MMX2_OP, mmx2) QPEL_OP(put_no_rnd_, ff_pw_15, _no_rnd_, PUT_OP, mmx2) +/***********************************/ +/* bilinear qpel: not compliant to any spec, only for -lavdopts fast */ + +#define QPEL_2TAP_XY(OPNAME, SIZE, MMX, XY, HPEL)\ +static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## pixels ## SIZE ## HPEL(dst, src, stride, SIZE);\ +} +#define QPEL_2TAP_L3(OPNAME, SIZE, MMX, XY, S0, S1, S2)\ +static void OPNAME ## 2tap_qpel ## SIZE ## _mc ## XY ## _ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## 2tap_qpel ## SIZE ## _l3_ ## MMX(dst, src+S0, stride, SIZE, S1, S2);\ +} + +#define QPEL_2TAP(OPNAME, SIZE, MMX)\ +QPEL_2TAP_XY(OPNAME, SIZE, MMX, 20, _x2_ ## MMX)\ +QPEL_2TAP_XY(OPNAME, SIZE, MMX, 02, _y2_ ## MMX)\ +QPEL_2TAP_XY(OPNAME, SIZE, MMX, 22, _xy2_mmx)\ +static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc00_ ## MMX =\ + OPNAME ## qpel ## SIZE ## _mc00_ ## MMX;\ +static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc21_ ## MMX =\ + OPNAME ## 2tap_qpel ## SIZE ## _mc20_ ## MMX;\ +static const qpel_mc_func OPNAME ## 2tap_qpel ## SIZE ## _mc12_ ## MMX =\ + OPNAME ## 2tap_qpel ## SIZE ## _mc02_ ## MMX;\ +static void OPNAME ## 2tap_qpel ## SIZE ## _mc32_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## pixels ## SIZE ## _y2_ ## MMX(dst, src+1, stride, SIZE);\ +}\ +static void OPNAME ## 2tap_qpel ## SIZE ## _mc23_ ## MMX(uint8_t *dst, uint8_t *src, int stride){\ + OPNAME ## pixels ## SIZE ## _x2_ ## MMX(dst, src+stride, stride, SIZE);\ +}\ +QPEL_2TAP_L3(OPNAME, SIZE, MMX, 10, 0, 1, 0)\ +QPEL_2TAP_L3(OPNAME, SIZE, MMX, 30, 1, -1, 0)\ +QPEL_2TAP_L3(OPNAME, SIZE, MMX, 01, 0, stride, 0)\ +QPEL_2TAP_L3(OPNAME, SIZE, MMX, 03, stride, -stride, 0)\ +QPEL_2TAP_L3(OPNAME, SIZE, MMX, 11, 0, stride, 1)\ +QPEL_2TAP_L3(OPNAME, SIZE, MMX, 31, 1, stride, -1)\ +QPEL_2TAP_L3(OPNAME, SIZE, MMX, 13, stride, -stride, 1)\ +QPEL_2TAP_L3(OPNAME, SIZE, MMX, 33, stride+1, -stride, -1)\ + +QPEL_2TAP(put_, 16, mmx2) +QPEL_2TAP(avg_, 16, mmx2) +QPEL_2TAP(put_, 8, mmx2) +QPEL_2TAP(avg_, 8, mmx2) +QPEL_2TAP(put_, 16, 3dnow) +QPEL_2TAP(avg_, 16, 3dnow) +QPEL_2TAP(put_, 8, 3dnow) +QPEL_2TAP(avg_, 8, 3dnow) + + #if 0 static void just_return() { return; } #endif @@ -3276,6 +3323,11 @@ dspfunc(avg_h264_qpel, 0, 16); dspfunc(avg_h264_qpel, 1, 8); dspfunc(avg_h264_qpel, 2, 4); + + dspfunc(put_2tap_qpel, 0, 16); + dspfunc(put_2tap_qpel, 1, 8); + dspfunc(avg_2tap_qpel, 0, 16); + dspfunc(avg_2tap_qpel, 1, 8); #undef dspfunc c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_mmx2; @@ -3399,6 +3451,11 @@ dspfunc(avg_h264_qpel, 1, 8); dspfunc(avg_h264_qpel, 2, 4); + dspfunc(put_2tap_qpel, 0, 16); + dspfunc(put_2tap_qpel, 1, 8); + dspfunc(avg_2tap_qpel, 0, 16); + dspfunc(avg_2tap_qpel, 1, 8); + c->avg_h264_chroma_pixels_tab[0]= avg_h264_chroma_mc8_3dnow; c->avg_h264_chroma_pixels_tab[1]= avg_h264_chroma_mc4_3dnow; } diff -r e1986d9ddc2d -r 6a40092eb9e6 i386/dsputil_mmx_avg.h --- a/i386/dsputil_mmx_avg.h Sun Oct 01 18:19:49 2006 +0000 +++ b/i386/dsputil_mmx_avg.h Sun Oct 01 21:25:17 2006 +0000 @@ -818,3 +818,51 @@ DEF(avg_pixels8_xy2)(block+8, pixels+8, line_size, h); } +#define QPEL_2TAP_L3(OPNAME) \ +static void DEF(OPNAME ## 2tap_qpel16_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\ + asm volatile(\ + "1: \n\t"\ + "movq (%1,%2), %%mm0 \n\t"\ + "movq 8(%1,%2), %%mm1 \n\t"\ + PAVGB" (%1,%3), %%mm0 \n\t"\ + PAVGB" 8(%1,%3), %%mm1 \n\t"\ + PAVGB" (%1), %%mm0 \n\t"\ + PAVGB" 8(%1), %%mm1 \n\t"\ + STORE_OP( (%1,%4),%%mm0)\ + STORE_OP(8(%1,%4),%%mm1)\ + "movq %%mm0, (%1,%4) \n\t"\ + "movq %%mm1, 8(%1,%4) \n\t"\ + "add %5, %1 \n\t"\ + "decl %0 \n\t"\ + "jnz 1b \n\t"\ + :"+g"(h), "+r"(src)\ + :"r"((long)off1), "r"((long)off2),\ + "r"((long)(dst-src)), "r"((long)stride)\ + :"memory"\ + );\ +}\ +static void DEF(OPNAME ## 2tap_qpel8_l3)(uint8_t *dst, uint8_t *src, int stride, int h, int off1, int off2){\ + asm volatile(\ + "1: \n\t"\ + "movq (%1,%2), %%mm0 \n\t"\ + PAVGB" (%1,%3), %%mm0 \n\t"\ + PAVGB" (%1), %%mm0 \n\t"\ + STORE_OP((%1,%4),%%mm0)\ + "movq %%mm0, (%1,%4) \n\t"\ + "add %5, %1 \n\t"\ + "decl %0 \n\t"\ + "jnz 1b \n\t"\ + :"+g"(h), "+r"(src)\ + :"r"((long)off1), "r"((long)off2),\ + "r"((long)(dst-src)), "r"((long)stride)\ + :"memory"\ + );\ +} + +#define STORE_OP(a,b) PAVGB" "#a","#b" \n\t" +QPEL_2TAP_L3(avg_) +#undef STORE_OP +#define STORE_OP(a,b) +QPEL_2TAP_L3(put_) +#undef STORE_OP +#undef QPEL_2TAP_L3 diff -r e1986d9ddc2d -r 6a40092eb9e6 mpegvideo.c --- a/mpegvideo.c Sun Oct 01 18:19:49 2006 +0000 +++ b/mpegvideo.c Sun Oct 01 21:25:17 2006 +0000 @@ -3944,17 +3944,16 @@ MPV_motion_lowres(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix); } }else{ + op_qpix= s->me.qpel_put; if ((!s->no_rounding) || s->pict_type==B_TYPE){ op_pix = s->dsp.put_pixels_tab; - op_qpix= s->dsp.put_qpel_pixels_tab; }else{ op_pix = s->dsp.put_no_rnd_pixels_tab; - op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab; } if (s->mv_dir & MV_DIR_FORWARD) { MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture.data, op_pix, op_qpix); op_pix = s->dsp.avg_pixels_tab; - op_qpix= s->dsp.avg_qpel_pixels_tab; + op_qpix= s->me.qpel_avg; } if (s->mv_dir & MV_DIR_BACKWARD) { MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture.data, op_pix, op_qpix);