# HG changeset patch # User conrad # Date 1239737193 0 # Node ID 8aa2e86549cde1411791cfef847b47abc86c29e0 # Parent dfed6a243babe6b45533f2b64c22b0bf2681b9f2 VC1: Do qpel when needed for both MVs in a B frame diff -r dfed6a243bab -r 8aa2e86549cd dsputil.c --- a/dsputil.c Tue Apr 14 04:07:35 2009 +0000 +++ b/dsputil.c Tue Apr 14 19:26:33 2009 +0000 @@ -2737,6 +2737,9 @@ void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { put_pixels8_c(dst, src, stride, 8); } +void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) { + avg_pixels8_c(dst, src, stride, 8); +} #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */ void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx); diff -r dfed6a243bab -r 8aa2e86549cd dsputil.h --- a/dsputil.h Tue Apr 14 04:07:35 2009 +0000 +++ b/dsputil.h Tue Apr 14 19:26:33 2009 +0000 @@ -485,6 +485,7 @@ * last argument is actually round value instead of height */ op_pixels_func put_vc1_mspel_pixels_tab[16]; + op_pixels_func avg_vc1_mspel_pixels_tab[16]; /* intrax8 functions */ void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize); diff -r dfed6a243bab -r 8aa2e86549cd vc1.c --- a/vc1.c Tue Apr 14 04:07:35 2009 +0000 +++ b/vc1.c Tue Apr 14 19:26:33 2009 +0000 @@ -1904,11 +1904,21 @@ srcY += s->mspel * (1 + s->linesize); } - mx >>= 1; - my >>= 1; - dxy = ((my & 1) << 1) | (mx & 1); - - dsp->avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16); + if(s->mspel) { + dxy = ((my & 3) << 2) | (mx & 3); + dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] , srcY , s->linesize, v->rnd); + dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd); + srcY += s->linesize * 8; + dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize , srcY , s->linesize, v->rnd); + dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd); + } else { // hpel mc + dxy = (my & 2) | ((mx & 2) >> 1); + + if(!v->rnd) + dsp->avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16); + else + dsp->avg_no_rnd_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16); + } if(s->flags & CODEC_FLAG_GRAY) return; /* Chroma MC always uses qpel blilinear */ diff -r dfed6a243bab -r 8aa2e86549cd vc1dsp.c --- a/vc1dsp.c Tue Apr 14 04:07:35 2009 +0000 +++ b/vc1dsp.c Tue Apr 14 19:26:33 2009 +0000 @@ -348,69 +348,80 @@ /** Function used to do motion compensation with bicubic interpolation */ -static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd) -{ - int i, j; - - if (vmode) { /* Horizontal filter to apply */ - int r; - - if (hmode) { /* Vertical filter to apply, output to tmp */ - static const int shift_value[] = { 0, 5, 1, 5 }; - int shift = (shift_value[hmode]+shift_value[vmode])>>1; - int16_t tmp[11*8], *tptr = tmp; - - r = (1<<(shift-1)) + rnd-1; - - src -= 1; - for(j = 0; j < 8; j++) { - for(i = 0; i < 11; i++) - tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift; - src += stride; - tptr += 11; - } +#define VC1_MSPEL_MC(OP, OPNAME)\ +static void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\ +{\ + int i, j;\ +\ + if (vmode) { /* Horizontal filter to apply */\ + int r;\ +\ + if (hmode) { /* Vertical filter to apply, output to tmp */\ + static const int shift_value[] = { 0, 5, 1, 5 };\ + int shift = (shift_value[hmode]+shift_value[vmode])>>1;\ + int16_t tmp[11*8], *tptr = tmp;\ +\ + r = (1<<(shift-1)) + rnd-1;\ +\ + src -= 1;\ + for(j = 0; j < 8; j++) {\ + for(i = 0; i < 11; i++)\ + tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;\ + src += stride;\ + tptr += 11;\ + }\ +\ + r = 64-rnd;\ + tptr = tmp+1;\ + for(j = 0; j < 8; j++) {\ + for(i = 0; i < 8; i++)\ + OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);\ + dst += stride;\ + tptr += 11;\ + }\ +\ + return;\ + }\ + else { /* No horizontal filter, output 8 lines to dst */\ + r = 1-rnd;\ +\ + for(j = 0; j < 8; j++) {\ + for(i = 0; i < 8; i++)\ + OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));\ + src += stride;\ + dst += stride;\ + }\ + return;\ + }\ + }\ +\ + /* Horizontal mode with no vertical mode */\ + for(j = 0; j < 8; j++) {\ + for(i = 0; i < 8; i++)\ + OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));\ + dst += stride;\ + src += stride;\ + }\ +} - r = 64-rnd; - tptr = tmp+1; - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) - dst[i] = av_clip_uint8((vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7); - dst += stride; - tptr += 11; - } - - return; - } - else { /* No horizontal filter, output 8 lines to dst */ - r = 1-rnd; +#define op_put(a, b) a = av_clip_uint8(b) +#define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1 - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) - dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, stride, vmode, r)); - src += stride; - dst += stride; - } - return; - } - } - - /* Horizontal mode with no vertical mode */ - for(j = 0; j < 8; j++) { - for(i = 0; i < 8; i++) - dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, 1, hmode, rnd)); - dst += stride; - src += stride; - } -} +VC1_MSPEL_MC(op_put, put_) +VC1_MSPEL_MC(op_avg, avg_) /* pixel functions - really are entry points to vc1_mspel_mc */ /* this one is defined in dsputil.c */ void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd); +void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd); #define PUT_VC1_MSPEL(a, b)\ static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ - vc1_mspel_mc(dst, src, stride, a, b, rnd); \ + put_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ +}\ +static void avg_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \ + avg_vc1_mspel_mc(dst, src, stride, a, b, rnd); \ } PUT_VC1_MSPEL(1, 0) @@ -456,4 +467,21 @@ dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c; dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c; dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c; + + dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_c; + dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_c; + dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_c; + dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_c; + dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_c; + dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_c; + dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_c; + dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_c; + dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_c; + dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_c; + dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_c; + dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_c; + dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_c; + dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c; + dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c; + dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c; }