# HG changeset patch # User lorenm # Date 1128901132 0 # Node ID 3c79bc9f3aa9edbfb5788f0b94b49c741b298d2d # Parent bf8d87efc31a092050dcf2d090a73eed069cc964 h264 mmx weighted prediction. up to 3% overall speedup. diff -r bf8d87efc31a -r 3c79bc9f3aa9 i386/dsputil_mmx.c --- a/i386/dsputil_mmx.c Thu Oct 06 12:13:25 2005 +0000 +++ b/i386/dsputil_mmx.c Sun Oct 09 23:38:52 2005 +0000 @@ -2832,6 +2832,24 @@ c->h264_v_loop_filter_chroma_intra= h264_v_loop_filter_chroma_intra_mmx2; c->h264_h_loop_filter_chroma_intra= h264_h_loop_filter_chroma_intra_mmx2; + c->weight_h264_pixels_tab[0]= ff_h264_weight_16x16_mmx2; + c->weight_h264_pixels_tab[1]= ff_h264_weight_16x8_mmx2; + c->weight_h264_pixels_tab[2]= ff_h264_weight_8x16_mmx2; + c->weight_h264_pixels_tab[3]= ff_h264_weight_8x8_mmx2; + c->weight_h264_pixels_tab[4]= ff_h264_weight_8x4_mmx2; + c->weight_h264_pixels_tab[5]= ff_h264_weight_4x8_mmx2; + c->weight_h264_pixels_tab[6]= ff_h264_weight_4x4_mmx2; + c->weight_h264_pixels_tab[7]= ff_h264_weight_4x2_mmx2; + + c->biweight_h264_pixels_tab[0]= ff_h264_biweight_16x16_mmx2; + c->biweight_h264_pixels_tab[1]= ff_h264_biweight_16x8_mmx2; + c->biweight_h264_pixels_tab[2]= ff_h264_biweight_8x16_mmx2; + c->biweight_h264_pixels_tab[3]= ff_h264_biweight_8x8_mmx2; + c->biweight_h264_pixels_tab[4]= ff_h264_biweight_8x4_mmx2; + c->biweight_h264_pixels_tab[5]= ff_h264_biweight_4x8_mmx2; + c->biweight_h264_pixels_tab[6]= ff_h264_biweight_4x4_mmx2; + c->biweight_h264_pixels_tab[7]= ff_h264_biweight_4x2_mmx2; + #ifdef CONFIG_ENCODERS c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_mmx2; #endif //CONFIG_ENCODERS diff -r bf8d87efc31a -r 3c79bc9f3aa9 i386/h264dsp_mmx.c --- a/i386/h264dsp_mmx.c Thu Oct 06 12:13:25 2005 +0000 +++ b/i386/h264dsp_mmx.c Sun Oct 09 23:38:52 2005 +0000 @@ -909,3 +909,100 @@ #undef H264_CHROMA_OP #undef H264_CHROMA_MC8_TMPL +/***********************************/ +/* weighted prediction */ + +static inline void ff_h264_weight_WxH_mmx2(uint8_t *dst, int stride, int log2_denom, int weight, int offset, int w, int h) +{ + int x, y; + offset <<= log2_denom; + offset += (1 << log2_denom) >> 1; + asm volatile( + "movd %0, %%mm4 \n\t" + "movd %1, %%mm5 \n\t" + "movd %2, %%mm6 \n\t" + "pshufw $0, %%mm4, %%mm4 \n\t" + "pshufw $0, %%mm5, %%mm5 \n\t" + "pxor %%mm7, %%mm7 \n\t" + :: "g"(weight), "g"(offset), "g"(log2_denom) + ); + for(y=0; y