# HG changeset patch
# User conrad
# Date 1239737193 0
# Node ID 8aa2e86549cde1411791cfef847b47abc86c29e0
# Parent  dfed6a243babe6b45533f2b64c22b0bf2681b9f2
VC1: Do qpel when needed for both MVs in a B frame

diff -r dfed6a243bab -r 8aa2e86549cd dsputil.c
--- a/dsputil.c	Tue Apr 14 04:07:35 2009 +0000
+++ b/dsputil.c	Tue Apr 14 19:26:33 2009 +0000
@@ -2737,6 +2737,9 @@
 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
     put_pixels8_c(dst, src, stride, 8);
 }
+void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, uint8_t *src, int stride, int rnd) {
+    avg_pixels8_c(dst, src, stride, 8);
+}
 #endif /* CONFIG_VC1_DECODER||CONFIG_WMV3_DECODER */
 
 void ff_intrax8dsp_init(DSPContext* c, AVCodecContext *avctx);
diff -r dfed6a243bab -r 8aa2e86549cd dsputil.h
--- a/dsputil.h	Tue Apr 14 04:07:35 2009 +0000
+++ b/dsputil.h	Tue Apr 14 19:26:33 2009 +0000
@@ -485,6 +485,7 @@
      * last argument is actually round value instead of height
      */
     op_pixels_func put_vc1_mspel_pixels_tab[16];
+    op_pixels_func avg_vc1_mspel_pixels_tab[16];
 
     /* intrax8 functions */
     void (*x8_spatial_compensation[12])(uint8_t *src , uint8_t *dst, int linesize);
diff -r dfed6a243bab -r 8aa2e86549cd vc1.c
--- a/vc1.c	Tue Apr 14 04:07:35 2009 +0000
+++ b/vc1.c	Tue Apr 14 19:26:33 2009 +0000
@@ -1904,11 +1904,21 @@
         srcY += s->mspel * (1 + s->linesize);
     }
 
-    mx >>= 1;
-    my >>= 1;
-    dxy = ((my & 1) << 1) | (mx & 1);
-
-    dsp->avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
+    if(s->mspel) {
+        dxy = ((my & 3) << 2) | (mx & 3);
+        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0]    , srcY    , s->linesize, v->rnd);
+        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8, srcY + 8, s->linesize, v->rnd);
+        srcY += s->linesize * 8;
+        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize    , srcY    , s->linesize, v->rnd);
+        dsp->avg_vc1_mspel_pixels_tab[dxy](s->dest[0] + 8 * s->linesize + 8, srcY + 8, s->linesize, v->rnd);
+    } else { // hpel mc
+        dxy = (my & 2) | ((mx & 2) >> 1);
+
+        if(!v->rnd)
+            dsp->avg_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
+        else
+            dsp->avg_no_rnd_pixels_tab[0][dxy](s->dest[0], srcY, s->linesize, 16);
+    }
 
     if(s->flags & CODEC_FLAG_GRAY) return;
     /* Chroma MC always uses qpel blilinear */
diff -r dfed6a243bab -r 8aa2e86549cd vc1dsp.c
--- a/vc1dsp.c	Tue Apr 14 04:07:35 2009 +0000
+++ b/vc1dsp.c	Tue Apr 14 19:26:33 2009 +0000
@@ -348,69 +348,80 @@
 
 /** Function used to do motion compensation with bicubic interpolation
  */
-static void vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)
-{
-    int     i, j;
-
-    if (vmode) { /* Horizontal filter to apply */
-        int r;
-
-        if (hmode) { /* Vertical filter to apply, output to tmp */
-            static const int shift_value[] = { 0, 5, 1, 5 };
-            int              shift = (shift_value[hmode]+shift_value[vmode])>>1;
-            int16_t          tmp[11*8], *tptr = tmp;
-
-            r = (1<<(shift-1)) + rnd-1;
-
-            src -= 1;
-            for(j = 0; j < 8; j++) {
-                for(i = 0; i < 11; i++)
-                    tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;
-                src += stride;
-                tptr += 11;
-            }
+#define VC1_MSPEL_MC(OP, OPNAME)\
+static void OPNAME ## vc1_mspel_mc(uint8_t *dst, const uint8_t *src, int stride, int hmode, int vmode, int rnd)\
+{\
+    int     i, j;\
+\
+    if (vmode) { /* Horizontal filter to apply */\
+        int r;\
+\
+        if (hmode) { /* Vertical filter to apply, output to tmp */\
+            static const int shift_value[] = { 0, 5, 1, 5 };\
+            int              shift = (shift_value[hmode]+shift_value[vmode])>>1;\
+            int16_t          tmp[11*8], *tptr = tmp;\
+\
+            r = (1<<(shift-1)) + rnd-1;\
+\
+            src -= 1;\
+            for(j = 0; j < 8; j++) {\
+                for(i = 0; i < 11; i++)\
+                    tptr[i] = (vc1_mspel_ver_filter_16bits(src + i, stride, vmode)+r)>>shift;\
+                src += stride;\
+                tptr += 11;\
+            }\
+\
+            r = 64-rnd;\
+            tptr = tmp+1;\
+            for(j = 0; j < 8; j++) {\
+                for(i = 0; i < 8; i++)\
+                    OP(dst[i], (vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);\
+                dst += stride;\
+                tptr += 11;\
+            }\
+\
+            return;\
+        }\
+        else { /* No horizontal filter, output 8 lines to dst */\
+            r = 1-rnd;\
+\
+            for(j = 0; j < 8; j++) {\
+                for(i = 0; i < 8; i++)\
+                    OP(dst[i], vc1_mspel_filter(src + i, stride, vmode, r));\
+                src += stride;\
+                dst += stride;\
+            }\
+            return;\
+        }\
+    }\
+\
+    /* Horizontal mode with no vertical mode */\
+    for(j = 0; j < 8; j++) {\
+        for(i = 0; i < 8; i++)\
+            OP(dst[i], vc1_mspel_filter(src + i, 1, hmode, rnd));\
+        dst += stride;\
+        src += stride;\
+    }\
+}
 
-            r = 64-rnd;
-            tptr = tmp+1;
-            for(j = 0; j < 8; j++) {
-                for(i = 0; i < 8; i++)
-                    dst[i] = av_clip_uint8((vc1_mspel_hor_filter_16bits(tptr + i, 1, hmode)+r)>>7);
-                dst += stride;
-                tptr += 11;
-            }
-
-            return;
-        }
-        else { /* No horizontal filter, output 8 lines to dst */
-            r = 1-rnd;
+#define op_put(a, b) a = av_clip_uint8(b)
+#define op_avg(a, b) a = (a + av_clip_uint8(b) + 1) >> 1
 
-            for(j = 0; j < 8; j++) {
-                for(i = 0; i < 8; i++)
-                    dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, stride, vmode, r));
-                src += stride;
-                dst += stride;
-            }
-            return;
-        }
-    }
-
-    /* Horizontal mode with no vertical mode */
-    for(j = 0; j < 8; j++) {
-        for(i = 0; i < 8; i++)
-            dst[i] = av_clip_uint8(vc1_mspel_filter(src + i, 1, hmode, rnd));
-        dst += stride;
-        src += stride;
-    }
-}
+VC1_MSPEL_MC(op_put, put_)
+VC1_MSPEL_MC(op_avg, avg_)
 
 /* pixel functions - really are entry points to vc1_mspel_mc */
 
 /* this one is defined in dsputil.c */
 void ff_put_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
+void ff_avg_vc1_mspel_mc00_c(uint8_t *dst, const uint8_t *src, int stride, int rnd);
 
 #define PUT_VC1_MSPEL(a, b)\
 static void put_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
-     vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \
+     put_vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \
+}\
+static void avg_vc1_mspel_mc ## a ## b ##_c(uint8_t *dst, const uint8_t *src, int stride, int rnd) { \
+     avg_vc1_mspel_mc(dst, src, stride, a, b, rnd);                         \
 }
 
 PUT_VC1_MSPEL(1, 0)
@@ -456,4 +467,21 @@
     dsp->put_vc1_mspel_pixels_tab[13] = put_vc1_mspel_mc13_c;
     dsp->put_vc1_mspel_pixels_tab[14] = put_vc1_mspel_mc23_c;
     dsp->put_vc1_mspel_pixels_tab[15] = put_vc1_mspel_mc33_c;
+
+    dsp->avg_vc1_mspel_pixels_tab[ 0] = ff_avg_vc1_mspel_mc00_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 1] = avg_vc1_mspel_mc10_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 2] = avg_vc1_mspel_mc20_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 3] = avg_vc1_mspel_mc30_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 4] = avg_vc1_mspel_mc01_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 5] = avg_vc1_mspel_mc11_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 6] = avg_vc1_mspel_mc21_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 7] = avg_vc1_mspel_mc31_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 8] = avg_vc1_mspel_mc02_c;
+    dsp->avg_vc1_mspel_pixels_tab[ 9] = avg_vc1_mspel_mc12_c;
+    dsp->avg_vc1_mspel_pixels_tab[10] = avg_vc1_mspel_mc22_c;
+    dsp->avg_vc1_mspel_pixels_tab[11] = avg_vc1_mspel_mc32_c;
+    dsp->avg_vc1_mspel_pixels_tab[12] = avg_vc1_mspel_mc03_c;
+    dsp->avg_vc1_mspel_pixels_tab[13] = avg_vc1_mspel_mc13_c;
+    dsp->avg_vc1_mspel_pixels_tab[14] = avg_vc1_mspel_mc23_c;
+    dsp->avg_vc1_mspel_pixels_tab[15] = avg_vc1_mspel_mc33_c;
 }