Mercurial > libavcodec.hg

--- a/h263.c	Thu Nov 27 13:38:00 2003 +0000
+++ b/h263.c	Fri Nov 28 21:31:20 2003 +0000
@@ -222,7 +222,7 @@
         calculation of the current MB to see if we're on the limits */
         put_bits(&s->pb, 1, 0);	/* unrestricted motion vector: off */
         put_bits(&s->pb, 1, 0);	/* SAC: off */
-        put_bits(&s->pb, 1, 0);	/* advanced prediction mode: off */
+        put_bits(&s->pb, 1, s->obmc);	/* advanced prediction mode */
         put_bits(&s->pb, 1, 0);	/* not PB frame */
         put_bits(&s->pb, 5, s->qscale);
         put_bits(&s->pb, 1, 0);	/* Continuous Presence Multipoint mode: off */
@@ -240,7 +240,7 @@
         s->umvplus = s->unrestricted_mv;
         put_bits(&s->pb, 1, s->umvplus); /* Unrestricted Motion Vector */
         put_bits(&s->pb,1,0); /* SAC: off */
-        put_bits(&s->pb,1,0); /* Advanced Prediction Mode: off */
+        put_bits(&s->pb,1,s->obmc); /* Advanced Prediction Mode */
         put_bits(&s->pb,1,s->h263_aic); /* Advanced Intra Coding */
         put_bits(&s->pb,1,0); /* Deblocking Filter: off */
         put_bits(&s->pb,1,0); /* Slice Structured: off */
@@ -602,6 +602,8 @@
     if(s->encoding){ //FIXME encoding MUST be cleaned up
         if (s->mv_type == MV_TYPE_8X8)
             s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_8x8;
+        else if(s->mb_intra)
+            s->current_picture.mb_type[mb_xy]= MB_TYPE_INTRA;
         else
             s->current_picture.mb_type[mb_xy]= MB_TYPE_L0 | MB_TYPE_16x16;
     }
@@ -896,15 +898,15 @@
             }

             put_bits(&s->pb, 1, 0);	/* mb coded */
+            cbpc = cbp & 3;
+            cbpy = cbp >> 2;
+            cbpy ^= 0xf;
             if(s->mv_type==MV_TYPE_16X16){
-                cbpc = cbp & 3;
                 if(s->dquant) cbpc+= 8;
                 put_bits(&s->pb,
                         inter_MCBPC_bits[cbpc],
                         inter_MCBPC_code[cbpc]);

-                cbpy = cbp >> 2;
-                cbpy ^= 0xf;
                 put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
                 if(s->dquant)
                     put_bits(pb2, 2, dquant_code[s->dquant+2]);
@@ -925,12 +927,9 @@
                 h263_encode_motion(s, motion_x - pred_x, s->f_code);
                 h263_encode_motion(s, motion_y - pred_y, s->f_code);
             }else{
-                cbpc = (cbp & 3)+16;
                 put_bits(&s->pb,
-                        inter_MCBPC_bits[cbpc],
-                        inter_MCBPC_code[cbpc]);
-                cbpy = cbp >> 2;
-                cbpy ^= 0xf;
+                        inter_MCBPC_bits[cbpc+16],
+                        inter_MCBPC_code[cbpc+16]);
                 put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);

                 if(!s->progressive_sequence){
@@ -1070,34 +1069,68 @@
             return;
         }
         put_bits(&s->pb, 1, 0);	/* mb coded */
+
         cbpc = cbp & 3;
         if(s->dquant) cbpc+= 8;
-        put_bits(&s->pb,
-		    inter_MCBPC_bits[cbpc],
-		    inter_MCBPC_code[cbpc]);
         cbpy = cbp >> 2;
         cbpy ^= 0xf;
-        put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
-        if(s->dquant)
-            put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
-
-        if(interleaved_stats){
-            s->misc_bits+= get_bits_diff(s);
-        }
-
-        /* motion vectors: 16x16 mode only now */
-        h263_pred_motion(s, 0, &pred_x, &pred_y);
-
-        if (!s->umvplus) {
-            h263_encode_motion(s, motion_x - pred_x, s->f_code);
-            h263_encode_motion(s, motion_y - pred_y, s->f_code);
-        }
-        else {
-            h263p_encode_umotion(s, motion_x - pred_x);
-            h263p_encode_umotion(s, motion_y - pred_y);
-            if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
-                /* To prevent Start Code emulation */
-                put_bits(&s->pb,1,1);
+        if(s->mv_type==MV_TYPE_16X16){
+            put_bits(&s->pb,
+                    inter_MCBPC_bits[cbpc],
+                    inter_MCBPC_code[cbpc]);
+
+            put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+            if(s->dquant)
+                put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
+
+            if(interleaved_stats){
+                s->misc_bits+= get_bits_diff(s);
+            }
+
+            /* motion vectors: 16x16 mode */
+            h263_pred_motion(s, 0, &pred_x, &pred_y);
+
+            if (!s->umvplus) {
+                h263_encode_motion(s, motion_x - pred_x, s->f_code);
+                h263_encode_motion(s, motion_y - pred_y, s->f_code);
+            }
+            else {
+                h263p_encode_umotion(s, motion_x - pred_x);
+                h263p_encode_umotion(s, motion_y - pred_y);
+                if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
+                    /* To prevent Start Code emulation */
+                    put_bits(&s->pb,1,1);
+            }
+        }else{
+            put_bits(&s->pb,
+                    inter_MCBPC_bits[cbpc+16],
+                    inter_MCBPC_code[cbpc+16]);
+            put_bits(&s->pb, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]);
+            if(s->dquant)
+                put_bits(&s->pb, 2, dquant_code[s->dquant+2]);
+
+            if(interleaved_stats){
+                s->misc_bits+= get_bits_diff(s);
+            }
+
+            for(i=0; i<4; i++){
+                /* motion vectors: 8x8 mode*/
+                h263_pred_motion(s, i, &pred_x, &pred_y);
+
+                motion_x= s->motion_val[ s->block_index[i] ][0];
+                motion_y= s->motion_val[ s->block_index[i] ][1];
+                if (!s->umvplus) {
+                    h263_encode_motion(s, motion_x - pred_x, s->f_code);
+                    h263_encode_motion(s, motion_y - pred_y, s->f_code);
+                }
+                else {
+                    h263p_encode_umotion(s, motion_x - pred_x);
+                    h263p_encode_umotion(s, motion_y - pred_y);
+                    if (((motion_x - pred_x) == 1) && ((motion_y - pred_y) == 1))
+                        /* To prevent Start Code emulation */
+                        put_bits(&s->pb,1,1);
+                }
+            }
         }

         if(interleaved_stats){
@@ -3301,6 +3334,97 @@
     }
 }

+/**
+ * read the next MVs for OBMC. yes this is a ugly hack, feel free to send a patch :)
+ */
+static void preview_obmc(MpegEncContext *s){
+    GetBitContext gb= s->gb;
+
+    int cbpc, i, pred_x, pred_y, mx, my;
+    int16_t *mot_val;
+    const int xy= s->mb_x + 1 + s->mb_y * s->mb_stride;
+    const int stride= s->block_wrap[0]*2;
+
+    for(i=0; i<4; i++)
+        s->block_index[i]+= 2;
+    for(i=4; i<6; i++)
+        s->block_index[i]+= 1;
+    s->mb_x++;
+
+    assert(s->pict_type == P_TYPE);
+
+    do{
+        if (get_bits1(&s->gb)) {
+            /* skip mb */
+            mot_val = s->motion_val[ s->block_index[0] ];
+            mot_val[0       ]= mot_val[2       ]=
+            mot_val[0+stride]= mot_val[2+stride]= 0;
+            mot_val[1       ]= mot_val[3       ]=
+            mot_val[1+stride]= mot_val[3+stride]= 0;
+
+            s->current_picture.mb_type[xy]= MB_TYPE_SKIP | MB_TYPE_16x16 | MB_TYPE_L0;
+            goto end;
+        }
+        cbpc = get_vlc2(&s->gb, inter_MCBPC_vlc.table, INTER_MCBPC_VLC_BITS, 2);
+    }while(cbpc == 20);
+
+    if(cbpc & 4){
+        s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
+    }else{
+        get_vlc2(&s->gb, cbpy_vlc.table, CBPY_VLC_BITS, 1);
+        if (cbpc & 8) {
+            skip_bits(&s->gb, 2);
+        }
+
+        if ((cbpc & 16) == 0) {
+                s->current_picture.mb_type[xy]= MB_TYPE_16x16 | MB_TYPE_L0;
+                /* 16x16 motion prediction */
+                mot_val= h263_pred_motion(s, 0, &pred_x, &pred_y);
+                if (s->umvplus)
+                   mx = h263p_decode_umotion(s, pred_x);
+                else
+                   mx = h263_decode_motion(s, pred_x, s->f_code);
+
+                if (s->umvplus)
+                   my = h263p_decode_umotion(s, pred_y);
+                else
+                   my = h263_decode_motion(s, pred_y, s->f_code);
+
+                mot_val[0       ]= mot_val[2       ]=
+                mot_val[0+stride]= mot_val[2+stride]= mx;
+                mot_val[1       ]= mot_val[3       ]=
+                mot_val[1+stride]= mot_val[3+stride]= my;
+        } else {
+            s->current_picture.mb_type[xy]= MB_TYPE_8x8 | MB_TYPE_L0;
+            for(i=0;i<4;i++) {
+                mot_val = h263_pred_motion(s, i, &pred_x, &pred_y);
+                if (s->umvplus)
+                  mx = h263p_decode_umotion(s, pred_x);
+                else
+                  mx = h263_decode_motion(s, pred_x, s->f_code);
+
+                if (s->umvplus)
+                  my = h263p_decode_umotion(s, pred_y);
+                else
+                  my = h263_decode_motion(s, pred_y, s->f_code);
+                if (s->umvplus && (mx - pred_x) == 1 && (my - pred_y) == 1)
+                  skip_bits1(&s->gb); /* Bit stuffing to prevent PSC */
+                mot_val[0] = mx;
+                mot_val[1] = my;
+            }
+        }
+    }
+end:
+
+    for(i=0; i<4; i++)
+        s->block_index[i]-= 2;
+    for(i=4; i<6; i++)
+        s->block_index[i]-= 1;
+    s->mb_x--;
+
+    s->gb= gb;
+}
+
 int ff_h263_decode_mb(MpegEncContext *s,
                       DCTELEM block[6][64])
 {
@@ -3330,7 +3454,7 @@
                 s->mcsel=0;
                 s->mv[0][0][0] = 0;
                 s->mv[0][0][1] = 0;
-                s->mb_skiped = 1;
+                s->mb_skiped = !s->obmc;
             }
             goto end;
         }
@@ -3645,6 +3769,10 @@
         }
     }
 end:
+    if(s->obmc){
+        if(s->pict_type == P_TYPE && s->mb_x+1<s->mb_width)
+            preview_obmc(s);
+    }

         /* per-MB end of slice check */
     if(s->codec_id==CODEC_ID_MPEG4){
@@ -4231,16 +4359,14 @@

         s->pict_type = I_TYPE + get_bits1(&s->gb);

-        s->unrestricted_mv = get_bits1(&s->gb);
-        s->h263_long_vectors = s->unrestricted_mv;
+        s->h263_long_vectors = get_bits1(&s->gb);

         if (get_bits1(&s->gb) != 0) {
             av_log(s->avctx, AV_LOG_ERROR, "H263 SAC not supported\n");
             return -1;	/* SAC: off */
         }
-        if (get_bits1(&s->gb) != 0) {
-            s->mv_type = MV_TYPE_8X8; /* Advanced prediction mode */
-        }
+        s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
+        s->unrestricted_mv = s->h263_long_vectors || s->obmc;

         if (get_bits1(&s->gb) != 0) {
             av_log(s->avctx, AV_LOG_ERROR, "H263 PB frame not supported\n");
@@ -4265,14 +4391,10 @@
             dprintf("ufep=1, format: %d\n", format);
             skip_bits(&s->gb,1); /* Custom PCF */
             s->umvplus = get_bits(&s->gb, 1); /* Unrestricted Motion Vector */
-            s->unrestricted_mv = s->umvplus;
             skip_bits1(&s->gb); /* Syntax-based Arithmetic Coding (SAC) */
-            if (get_bits1(&s->gb) != 0) {
-                s->mv_type = MV_TYPE_8X8; /* Advanced prediction mode */
-            }
-            if (get_bits1(&s->gb) != 0) { /* Advanced Intra Coding (AIC) */
-                s->h263_aic = 1;
-            }
+            s->obmc= get_bits1(&s->gb); /* Advanced prediction mode */
+            s->unrestricted_mv = s->umvplus || s->obmc;
+            s->h263_aic = get_bits1(&s->gb); /* Advanced Intra Coding (AIC) */

             if (get_bits1(&s->gb) != 0) {
                 av_log(s->avctx, AV_LOG_ERROR, "Deblocking Filter not supported\n");
@@ -4303,7 +4425,6 @@

         /* MPPTYPE */
         s->pict_type = get_bits(&s->gb, 3) + I_TYPE;
-        dprintf("pict_type: %d\n", s->pict_type);
 	if (s->pict_type == 8 && s->avctx->codec_tag == ff_get_fourcc("ZYGO"))
 	    s->pict_type = I_TYPE;
         if (s->pict_type != I_TYPE &&
@@ -4311,7 +4432,6 @@
             return -1;
         skip_bits(&s->gb, 2);
         s->no_rounding = get_bits1(&s->gb);
-        dprintf("RTYPE: %d\n", s->no_rounding);
         skip_bits(&s->gb, 4);

         /* Get the picture dimensions */
@@ -4371,13 +4491,14 @@
     }

      if(s->avctx->debug&FF_DEBUG_PICT_INFO){
-         av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d %s %s %s %s\n",
+         av_log(s->avctx, AV_LOG_DEBUG, "qp:%d %c size:%d rnd:%d%s%s%s%s%s\n",
          s->qscale, av_get_pict_type_char(s->pict_type),
          s->gb.size_in_bits, 1-s->no_rounding,
-         s->mv_type == MV_TYPE_8X8 ? "ADV" : "",
-         s->umvplus ? "UMV" : "",
-         s->h263_long_vectors ? "LONG" : "",
-         s->h263_plus ? "+" : ""
+         s->obmc ? " AP" : "",
+         s->umvplus ? " UMV" : "",
+         s->h263_long_vectors ? " LONG" : "",
+         s->h263_plus ? " +" : "",
+         s->h263_aic ? " AIC" : ""
          );
      }
 #if 1
@@ -5246,8 +5367,9 @@
         return -1;	/* SAC: off */
     }
     if (get_bits1(&s->gb) != 0) {
+        s->obmc= 1;
         av_log(s->avctx, AV_LOG_ERROR, "Advanced Prediction Mode not supported\n");
-        return -1;	/* advanced prediction mode: off */
+//        return -1;	/* advanced prediction mode: off */
     }
     if (get_bits1(&s->gb) != 0) {
         av_log(s->avctx, AV_LOG_ERROR, "PB frame mode no supported\n");
--- a/motion_est.c	Thu Nov 27 13:38:00 2003 +0000
+++ b/motion_est.c	Fri Nov 28 21:31:20 2003 +0000
@@ -1120,6 +1120,7 @@
         }else
             set_p_mv_tables(s, mx, my, 1);
     }else{
+        int intra_score, i;
         mb_type= MB_TYPE_INTER;

         dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
@@ -1136,10 +1137,59 @@
                 dmin=dmin4;
             }
         }
-        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
+
+//        pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
         set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
+
+        /* get intra luma score */
+        if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
+            intra_score= (varc<<8) - 500; //FIXME dont scale it down so we dont have to fix it
+        }else{
+            int mean= (sum+128)>>8;
+            mean*= 0x01010101;
+
+            for(i=0; i<16; i++){
+                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
+                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
+                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
+                *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
+            }
+
+            intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize);
+        }
+#if 0 //FIXME
+        /* get chroma score */
+        if(s->avctx->mb_cmp&FF_CMP_CHROMA){
+            for(i=1; i<3; i++){
+                uint8_t *dest_c;
+                int mean;
+
+                if(s->out_format == FMT_H263){
+                    mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
+                }else{
+                    mean= (s->last_dc[i] + 4)>>3;
+                }
+                dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
+
+                mean*= 0x01010101;
+                for(i=0; i<8; i++){
+                    *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
+                    *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
+                }
+
+                intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
+            }
+        }
+#endif
+        intra_score += s->me.mb_penalty_factor*16;

-        if (vard <= 64 || vard < varc) {
+        if(intra_score < dmin){
+            mb_type= MB_TYPE_INTRA;
+            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= MB_TYPE_INTRA; //FIXME cleanup
+        }else
+            s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
+
+        if (vard <= 64 || vard < varc) { //FIXME
             s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
         }else{
             s->scene_change_score+= s->qscale;
@@ -1566,7 +1616,7 @@
             int x;
             int xy= y*s->mb_stride;
             for(x=0; x<s->mb_width; x++){
-                if(s->mb_type[xy] & type){
+                if((s->mb_type[xy] & type) || (s->mb_type[xy] & MB_TYPE_INTRA)){ //FIXME
                     int fcode= FFMAX(fcode_tab[mv_table[xy][0] + MAX_MV],
                                      fcode_tab[mv_table[xy][1] + MAX_MV]);
                     int j;
@@ -1621,6 +1671,7 @@
                    || s->p_mv_table[xy][1] >=range || s->p_mv_table[xy][1] <-range){
                     s->mb_type[xy] &= ~MB_TYPE_INTER;
                     s->mb_type[xy] |= MB_TYPE_INTRA;
+                    s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
                     s->p_mv_table[xy][0] = 0;
                     s->p_mv_table[xy][1] = 0;
                 }
@@ -1650,6 +1701,7 @@
                            || my >=range || my <-range){
                             s->mb_type[i] &= ~MB_TYPE_INTER4V;
                             s->mb_type[i] |= MB_TYPE_INTRA;
+                            s->current_picture.mb_type[i]= MB_TYPE_INTRA;
                         }
                     }
                 }
--- a/mpegvideo.c	Thu Nov 27 13:38:00 2003 +0000
+++ b/mpegvideo.c	Fri Nov 28 21:31:20 2003 +0000
@@ -297,7 +297,6 @@
             CHECKED_ALLOCZ(pic->mb_var   , mb_array_size * sizeof(int16_t))
             CHECKED_ALLOCZ(pic->mc_mb_var, mb_array_size * sizeof(int16_t))
             CHECKED_ALLOCZ(pic->mb_mean  , mb_array_size * sizeof(int8_t))
-            CHECKED_ALLOCZ(pic->mb_cmp_score, mb_array_size * sizeof(int32_t))
         }

         CHECKED_ALLOCZ(pic->mbskip_table , mb_array_size * sizeof(uint8_t)+2) //the +2 is for the slice end check
@@ -338,7 +337,6 @@
     av_freep(&pic->mb_var);
     av_freep(&pic->mc_mb_var);
     av_freep(&pic->mb_mean);
-    av_freep(&pic->mb_cmp_score);
     av_freep(&pic->mbskip_table);
     av_freep(&pic->qscale_table);
     av_freep(&pic->mb_type_base);
@@ -669,12 +667,21 @@
                        && !s->fixed_qscale;

     s->progressive_sequence= !(avctx->flags & CODEC_FLAG_INTERLACED_DCT);
-
-    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4){
+
+    s->obmc=    (s->codec_id == CODEC_ID_H263 || s->codec_id == CODEC_ID_H263P)
+             && (s->flags & CODEC_FLAG_4MV);
+
+    if((s->flags & CODEC_FLAG_4MV) && s->codec_id != CODEC_ID_MPEG4
+       && s->codec_id != CODEC_ID_H263 && s->codec_id != CODEC_ID_H263P){
         av_log(avctx, AV_LOG_ERROR, "4MV not supported by codec\n");
         return -1;
     }

+    if(s->obmc && s->avctx->mb_decision != FF_MB_DECISION_SIMPLE){
+        av_log(avctx, AV_LOG_ERROR, "OBMC is only supported with simple mb decission\n");
+        return -1;
+    }
+
     if(s->quarter_sample && s->codec_id != CODEC_ID_MPEG4){
         av_log(avctx, AV_LOG_ERROR, "qpel not supported by codec\n");
         return -1;
@@ -1964,6 +1971,42 @@
     }
 }

+static inline int hpel_motion(MpegEncContext *s,
+                                  uint8_t *dest, uint8_t *src,
+                                  int src_x, int src_y,
+                                  int width, int height, int stride,
+                                  int h_edge_pos, int v_edge_pos,
+                                  int w, int h, op_pixels_func *pix_op,
+                                  int motion_x, int motion_y)
+{
+    int dxy;
+    int emu=0;
+
+    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
+    src_x += motion_x >> 1;
+    src_y += motion_y >> 1;
+
+    /* WARNING: do no forget half pels */
+    src_x = clip(src_x, -16, width); //FIXME unneeded for emu?
+    if (src_x == width)
+        dxy &= ~1;
+    src_y = clip(src_y, -16, height);
+    if (src_y == height)
+        dxy &= ~2;
+    src += src_y * stride + src_x;
+
+    if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
+        if(   (unsigned)src_x > h_edge_pos - (motion_x&1) - w
+           || (unsigned)src_y > v_edge_pos - (motion_y&1) - h){
+            ff_emulated_edge_mc(s->edge_emu_buffer, src, stride, w+1, h+1,
+                             src_x, src_y, h_edge_pos, v_edge_pos);
+            src= s->edge_emu_buffer;
+            emu=1;
+        }
+    }
+    pix_op[dxy](dest, src, stride, h);
+    return emu;
+}

 /* apply one mpeg motion vector to the three components */
 static inline void mpeg_motion(MpegEncContext *s,
@@ -1974,7 +2017,7 @@
                                int motion_x, int motion_y, int h)
 {
     uint8_t *ptr;
-    int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, linesize, uvlinesize;
+    int dxy, offset, mx, my, src_x, src_y, height, v_edge_pos, uvlinesize;
     int emu=0;
 #if 0
 if(s->quarter_sample)
@@ -1983,34 +2026,19 @@
     motion_y>>=1;
 }
 #endif
-    dxy = ((motion_y & 1) << 1) | (motion_x & 1);
-    src_x = s->mb_x * 16 + (motion_x >> 1);
-    src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
-
-    /* WARNING: do no forget half pels */
+
     height = s->height >> field_based;
     v_edge_pos = s->v_edge_pos >> field_based;
-    src_x = clip(src_x, -16, s->width);
-    if (src_x == s->width)
-        dxy &= ~1;
-    src_y = clip(src_y, -16, height);
-    if (src_y == height)
-        dxy &= ~2;
-    linesize   = s->current_picture.linesize[0] << field_based;
     uvlinesize = s->current_picture.linesize[1] << field_based;
-    ptr = ref_picture[0] + (src_y * linesize) + (src_x) + src_offset;
-    dest_y += dest_offset;
-
-    if(s->unrestricted_mv && (s->flags&CODEC_FLAG_EMU_EDGE)){
-        if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 16
-           || (unsigned)src_y >    v_edge_pos - (motion_y&1) - h){
-            ff_emulated_edge_mc(s->edge_emu_buffer, ptr - src_offset, s->linesize, 17, 17+field_based,  //FIXME linesize? and uv below
-                             src_x, src_y<<field_based, s->h_edge_pos, s->v_edge_pos);
-            ptr= s->edge_emu_buffer + src_offset;
-            emu=1;
-        }
-    }
-    pix_op[0][dxy](dest_y, ptr, linesize, h);
+
+    emu= hpel_motion(s,
+                dest_y + dest_offset, ref_picture[0] + src_offset,
+                s->mb_x * 16, s->mb_y * (16 >> field_based),
+                s->width, height, s->current_picture.linesize[0] << field_based,
+                s->h_edge_pos, v_edge_pos,
+                16, h, pix_op[0],
+                motion_x, motion_y);
+

     if(s->flags&CODEC_FLAG_GRAY) return;

@@ -2055,6 +2083,87 @@
     }
     pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1);
 }
+//FIXME move to dsputil, avg variant, 16x16 version
+static inline void put_obmc(uint8_t *dst, uint8_t *src[5], int stride){
+    int x;
+    uint8_t * const top   = src[1];
+    uint8_t * const left  = src[2];
+    uint8_t * const mid   = src[0];
+    uint8_t * const right = src[3];
+    uint8_t * const bottom= src[4];
+#define OBMC_FILTER(x, t, l, m, r, b)\
+    dst[x]= (t*top[x] + l*left[x] + m*mid[x] + r*right[x] + b*bottom[x] + 4)>>3
+#define OBMC_FILTER4(x, t, l, m, r, b)\
+    OBMC_FILTER(x         , t, l, m, r, b);\
+    OBMC_FILTER(x+1       , t, l, m, r, b);\
+    OBMC_FILTER(x  +stride, t, l, m, r, b);\
+    OBMC_FILTER(x+1+stride, t, l, m, r, b);
+
+    x=0;
+    OBMC_FILTER (x  , 2, 2, 4, 0, 0);
+    OBMC_FILTER (x+1, 2, 1, 5, 0, 0);
+    OBMC_FILTER4(x+2, 2, 1, 5, 0, 0);
+    OBMC_FILTER4(x+4, 2, 0, 5, 1, 0);
+    OBMC_FILTER (x+6, 2, 0, 5, 1, 0);
+    OBMC_FILTER (x+7, 2, 0, 4, 2, 0);
+    x+= stride;
+    OBMC_FILTER (x  , 1, 2, 5, 0, 0);
+    OBMC_FILTER (x+1, 1, 2, 5, 0, 0);
+    OBMC_FILTER (x+6, 1, 0, 5, 2, 0);
+    OBMC_FILTER (x+7, 1, 0, 5, 2, 0);
+    x+= stride;
+    OBMC_FILTER4(x  , 1, 2, 5, 0, 0);
+    OBMC_FILTER4(x+2, 1, 1, 6, 0, 0);
+    OBMC_FILTER4(x+4, 1, 0, 6, 1, 0);
+    OBMC_FILTER4(x+6, 1, 0, 5, 2, 0);
+    x+= 2*stride;
+    OBMC_FILTER4(x  , 0, 2, 5, 0, 1);
+    OBMC_FILTER4(x+2, 0, 1, 6, 0, 1);
+    OBMC_FILTER4(x+4, 0, 0, 6, 1, 1);
+    OBMC_FILTER4(x+6, 0, 0, 5, 2, 1);
+    x+= 2*stride;
+    OBMC_FILTER (x  , 0, 2, 5, 0, 1);
+    OBMC_FILTER (x+1, 0, 2, 5, 0, 1);
+    OBMC_FILTER4(x+2, 0, 1, 5, 0, 2);
+    OBMC_FILTER4(x+4, 0, 0, 5, 1, 2);
+    OBMC_FILTER (x+6, 0, 0, 5, 2, 1);
+    OBMC_FILTER (x+7, 0, 0, 5, 2, 1);
+    x+= stride;
+    OBMC_FILTER (x  , 0, 2, 4, 0, 2);
+    OBMC_FILTER (x+1, 0, 1, 5, 0, 2);
+    OBMC_FILTER (x+6, 0, 0, 5, 1, 2);
+    OBMC_FILTER (x+7, 0, 0, 4, 2, 2);
+}
+
+/* obmc for 1 8x8 luma block */
+static inline void obmc_motion(MpegEncContext *s,
+                               uint8_t *dest, uint8_t *src,
+                               int src_x, int src_y,
+                               op_pixels_func *pix_op,
+                               int16_t mv[5][2]/* mid top left right bottom*/)
+#define MID    0
+{
+    int i;
+    uint8_t *ptr[5];
+
+    assert(s->quarter_sample==0);
+
+    for(i=0; i<5; i++){
+        if(i && mv[i][0]==mv[MID][0] && mv[i][1]==mv[MID][1]){
+            ptr[i]= ptr[MID];
+        }else{
+            ptr[i]= s->edge_emu_buffer + 16 + 8*(i&1) + s->linesize*8*(i>>1);
+            hpel_motion(s, ptr[i], src,
+                        src_x, src_y,
+                        s->width, s->height, s->linesize,
+                        s->h_edge_pos, s->v_edge_pos,
+                        8, 8, pix_op,
+                        mv[i][0], mv[i][1]);
+        }
+    }
+
+    put_obmc(dest, ptr, s->linesize);
+}

 static inline void qpel_motion(MpegEncContext *s,
                                uint8_t *dest_y, uint8_t *dest_cb, uint8_t *dest_cr,
@@ -2164,6 +2273,55 @@
 }

 /**
+ * h263 chorma 4mv motion compensation.
+ */
+static inline void chroma_4mv_motion(MpegEncContext *s,
+                                     uint8_t *dest_cb, uint8_t *dest_cr,
+                                     uint8_t **ref_picture,
+                                     op_pixels_func *pix_op,
+                                     int mx, int my){
+    int dxy, emu=0, src_x, src_y, offset;
+    uint8_t *ptr;
+
+    /* In case of 8X8, we construct a single chroma motion vector
+       with a special rounding */
+    mx= ff_h263_round_chroma(mx);
+    my= ff_h263_round_chroma(my);
+
+    dxy = ((my & 1) << 1) | (mx & 1);
+    mx >>= 1;
+    my >>= 1;
+
+    src_x = s->mb_x * 8 + mx;
+    src_y = s->mb_y * 8 + my;
+    src_x = clip(src_x, -8, s->width/2);
+    if (src_x == s->width/2)
+        dxy &= ~1;
+    src_y = clip(src_y, -8, s->height/2);
+    if (src_y == s->height/2)
+        dxy &= ~2;
+
+    offset = (src_y * (s->uvlinesize)) + src_x;
+    ptr = ref_picture[1] + offset;
+    if(s->flags&CODEC_FLAG_EMU_EDGE){
+        if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
+           || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
+            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+            ptr= s->edge_emu_buffer;
+            emu=1;
+        }
+    }
+    pix_op[dxy](dest_cb, ptr, s->uvlinesize, 8);
+
+    ptr = ref_picture[2] + offset;
+    if(emu){
+        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
+        ptr= s->edge_emu_buffer;
+    }
+    pix_op[dxy](dest_cr, ptr, s->uvlinesize, 8);
+}
+
+/**
  * motion compesation of a single macroblock
  * @param s context
  * @param dest_y luma destination pointer
@@ -2180,14 +2338,74 @@
                               int dir, uint8_t **ref_picture,
                               op_pixels_func (*pix_op)[4], qpel_mc_func (*qpix_op)[16])
 {
-    int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
+    int dxy, mx, my, src_x, src_y, motion_x, motion_y;
     int mb_x, mb_y, i;
     uint8_t *ptr, *dest;
-    int emu=0;

     mb_x = s->mb_x;
     mb_y = s->mb_y;

+    if(s->obmc){
+        int16_t mv_cache[4][4][2];
+        const int xy= s->mb_x + s->mb_y*s->mb_stride;
+        const int mot_stride= s->mb_width*2 + 2;
+        const int mot_xy= 1 + mb_x*2 + (mb_y*2 + 1)*mot_stride;
+
+        assert(!s->mb_skiped);
+
+        memcpy(mv_cache[1][1], s->motion_val[mot_xy           ], sizeof(int16_t)*4);
+        memcpy(mv_cache[2][1], s->motion_val[mot_xy+mot_stride], sizeof(int16_t)*4);
+        memcpy(mv_cache[3][1], s->motion_val[mot_xy+mot_stride], sizeof(int16_t)*4);
+
+        if(mb_y==0 || IS_INTRA(s->current_picture.mb_type[xy-s->mb_stride])){
+            memcpy(mv_cache[0][1], mv_cache[1][1], sizeof(int16_t)*4);
+        }else{
+            memcpy(mv_cache[0][1], s->motion_val[mot_xy-mot_stride], sizeof(int16_t)*4);
+        }
+
+        if(mb_x==0 || IS_INTRA(s->current_picture.mb_type[xy-1])){
+            *(int32_t*)mv_cache[1][0]= *(int32_t*)mv_cache[1][1];
+            *(int32_t*)mv_cache[2][0]= *(int32_t*)mv_cache[2][1];
+        }else{
+            *(int32_t*)mv_cache[1][0]= *(int32_t*)s->motion_val[mot_xy-1];
+            *(int32_t*)mv_cache[2][0]= *(int32_t*)s->motion_val[mot_xy-1+mot_stride];
+        }
+
+        if(mb_x+1>=s->mb_width || IS_INTRA(s->current_picture.mb_type[xy+1])){
+            *(int32_t*)mv_cache[1][3]= *(int32_t*)mv_cache[1][2];
+            *(int32_t*)mv_cache[2][3]= *(int32_t*)mv_cache[2][2];
+        }else{
+            *(int32_t*)mv_cache[1][3]= *(int32_t*)s->motion_val[mot_xy+2];
+            *(int32_t*)mv_cache[2][3]= *(int32_t*)s->motion_val[mot_xy+2+mot_stride];
+        }
+
+        mx = 0;
+        my = 0;
+        for(i=0;i<4;i++) {
+            const int x= (i&1)+1;
+            const int y= (i>>1)+1;
+            int16_t mv[5][2]= {
+                {mv_cache[y][x  ][0], mv_cache[y][x  ][1]},
+                {mv_cache[y-1][x][0], mv_cache[y-1][x][1]},
+                {mv_cache[y][x-1][0], mv_cache[y][x-1][1]},
+                {mv_cache[y][x+1][0], mv_cache[y][x+1][1]},
+                {mv_cache[y+1][x][0], mv_cache[y+1][x][1]}};
+            //FIXME cleanup
+            obmc_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
+                        ref_picture[0],
+                        mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
+                        pix_op[1],
+                        mv);
+
+            mx += mv[0][0];
+            my += mv[0][1];
+        }
+        if(!(s->flags&CODEC_FLAG_GRAY))
+            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
+
+        return;
+    }
+
     switch(s->mv_type) {
     case MV_TYPE_16X16:
 #ifdef CONFIG_RISKY
@@ -2253,73 +2471,21 @@
             }
         }else{
             for(i=0;i<4;i++) {
-                motion_x = s->mv[dir][i][0];
-                motion_y = s->mv[dir][i][1];
-
-                dxy = ((motion_y & 1) << 1) | (motion_x & 1);
-                src_x = mb_x * 16 + (motion_x >> 1) + (i & 1) * 8;
-                src_y = mb_y * 16 + (motion_y >> 1) + (i >>1) * 8;
-
-                /* WARNING: do no forget half pels */
-                src_x = clip(src_x, -16, s->width);
-                if (src_x == s->width)
-                    dxy &= ~1;
-                src_y = clip(src_y, -16, s->height);
-                if (src_y == s->height)
-                    dxy &= ~2;
-
-                ptr = ref_picture[0] + (src_y * s->linesize) + (src_x);
-                if(s->flags&CODEC_FLAG_EMU_EDGE){
-                    if(   (unsigned)src_x > s->h_edge_pos - (motion_x&1) - 8
-                       || (unsigned)src_y > s->v_edge_pos - (motion_y&1) - 8){
-                        ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->linesize, 9, 9, src_x, src_y, s->h_edge_pos, s->v_edge_pos);
-                        ptr= s->edge_emu_buffer;
-                    }
-                }
-                dest = dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize;
-                pix_op[1][dxy](dest, ptr, s->linesize, 8);
+                hpel_motion(s, dest_y + ((i & 1) * 8) + (i >> 1) * 8 * s->linesize,
+                            ref_picture[0],
+                            mb_x * 16 + (i & 1) * 8, mb_y * 16 + (i >>1) * 8,
+                            s->width, s->height, s->linesize,
+                            s->h_edge_pos, s->v_edge_pos,
+                            8, 8, pix_op[1],
+                            s->mv[dir][i][0], s->mv[dir][i][1]);

                 mx += s->mv[dir][i][0];
                 my += s->mv[dir][i][1];
             }
         }

-        if(s->flags&CODEC_FLAG_GRAY) break;
-        /* In case of 8X8, we construct a single chroma motion vector
-           with a special rounding */
-        mx= ff_h263_round_chroma(mx);
-        my= ff_h263_round_chroma(my);
-        dxy = ((my & 1) << 1) | (mx & 1);
-        mx >>= 1;
-        my >>= 1;
-
-        src_x = mb_x * 8 + mx;
-        src_y = mb_y * 8 + my;
-        src_x = clip(src_x, -8, s->width/2);
-        if (src_x == s->width/2)
-            dxy &= ~1;
-        src_y = clip(src_y, -8, s->height/2);
-        if (src_y == s->height/2)
-            dxy &= ~2;
-
-        offset = (src_y * (s->uvlinesize)) + src_x;
-        ptr = ref_picture[1] + offset;
-        if(s->flags&CODEC_FLAG_EMU_EDGE){
-                if(   (unsigned)src_x > (s->h_edge_pos>>1) - (dxy &1) - 8
-                   || (unsigned)src_y > (s->v_edge_pos>>1) - (dxy>>1) - 8){
-                    ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
-                    ptr= s->edge_emu_buffer;
-                    emu=1;
-                }
-            }
-        pix_op[1][dxy](dest_cb, ptr, s->uvlinesize, 8);
-
-        ptr = ref_picture[2] + offset;
-        if(emu){
-            ff_emulated_edge_mc(s->edge_emu_buffer, ptr, s->uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
-            ptr= s->edge_emu_buffer;
-        }
-        pix_op[1][dxy](dest_cr, ptr, s->uvlinesize, 8);
+        if(!(s->flags&CODEC_FLAG_GRAY))
+            chroma_4mv_motion(s, dest_cb, dest_cr, ref_picture, pix_op[1], mx, my);
         break;
     case MV_TYPE_FIELD:
         if (s->picture_structure == PICT_FRAME) {
@@ -2459,7 +2625,7 @@
         }
     }
     break;
-
+    default: assert(0);
     }
 }

@@ -3943,90 +4109,6 @@
                     MPV_decode_mb(s, s->block);
             } else {
                 int motion_x, motion_y;
-                int intra_score;
-                int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_stride];
-
-              if(s->avctx->mb_decision==FF_MB_DECISION_SIMPLE && s->pict_type==P_TYPE){ //FIXME check if the mess is usefull at all
-                /* get luma score */
-                if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
-                    intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_stride]<<8) - 500; //FIXME dont scale it down so we dont have to fix it
-                }else{
-                    uint8_t *dest_y;
-
-                    int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_stride]; //FIXME
-                    mean*= 0x01010101;
-
-                    dest_y  = s->new_picture.data[0] + (mb_y * 16 * s->linesize    ) + mb_x * 16;
-
-                    for(i=0; i<16; i++){
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean;
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
-                        *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
-                    }
-
-                    s->mb_intra=1;
-                    intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize);
-
-/*                    printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8,
-                        s->current_picture.mb_var[mb_x + mb_y*s->mb_stride],
-                        s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_stride]);*/
-                }
-
-                /* get chroma score */
-                if(s->avctx->mb_cmp&FF_CMP_CHROMA){
-                    int i;
-
-                    s->mb_intra=1;
-                    for(i=1; i<3; i++){
-                        uint8_t *dest_c;
-                        int mean;
-
-                        if(s->out_format == FMT_H263){
-                            mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;)
-                        }else{
-                            mean= (s->last_dc[i] + 4)>>3;
-                        }
-                        dest_c = s->new_picture.data[i] + (mb_y * 8  * (s->uvlinesize)) + mb_x * 8;
-
-                        mean*= 0x01010101;
-                        for(i=0; i<8; i++){
-                            *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean;
-                            *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean;
-                        }
-
-                        intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize);
-                    }
-                }
-
-                /* bias */
-                switch(s->avctx->mb_cmp&0xFF){
-                default:
-                case FF_CMP_SAD:
-                    intra_score+= 32*s->qscale;
-                    break;
-                case FF_CMP_SSE:
-                    intra_score+= 24*s->qscale*s->qscale;
-                    break;
-                case FF_CMP_SATD:
-                    intra_score+= 96*s->qscale;
-                    break;
-                case FF_CMP_DCT:
-                    intra_score+= 48*s->qscale;
-                    break;
-                case FF_CMP_BIT:
-                    intra_score+= 16;
-                    break;
-                case FF_CMP_PSNR:
-                case FF_CMP_RD:
-                    intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7;
-                    break;
-                }
-
-                if(intra_score < inter_score)
-                    mb_type= MB_TYPE_INTRA;
-              }
-
                 s->mv_type=MV_TYPE_16X16;
                 // only one MB-Type possible

@@ -4941,4 +5023,3 @@
 };

 #endif //CONFIG_ENCODERS
-