changeset 255:db20b987c32d libavcodec

divx5-gmc support q-pel mc support neither is totally bugfree yet though :(
author michaelni
date Sat, 09 Mar 2002 13:01:16 +0000
parents b4fed8b24e3a
children 4c1cec7c3c7c
files dsputil.c dsputil.h h263.c mpegvideo.c mpegvideo.h
diffstat 5 files changed, 565 insertions(+), 33 deletions(-) [+]
line wrap: on
line diff
--- a/dsputil.c	Thu Mar 07 13:27:15 2002 +0000
+++ b/dsputil.c	Sat Mar 09 13:01:16 2002 +0000
@@ -27,6 +27,7 @@
 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
+void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
 
 op_pixels_abs_func pix_abs16x16;
 op_pixels_abs_func pix_abs16x16_x2;
@@ -344,6 +345,282 @@
 #define avg2(a,b) ((a+b+1)>>1)
 #define avg4(a,b,c,d) ((a+b+c+d+2)>>2)
 
+static void gmc1_c(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder)
+{
+    const int A=(16-x16)*(16-y16);
+    const int B=(   x16)*(16-y16);
+    const int C=(16-x16)*(   y16);
+    const int D=(   x16)*(   y16);
+    int i;
+    rounder= 128 - rounder;
+
+    for(i=0; i<h; i++)
+    {
+        dst[0]= (A*src[0] + B*src[1] + C*src[srcStride+0] + D*src[srcStride+1] + rounder)>>8;
+        dst[1]= (A*src[1] + B*src[2] + C*src[srcStride+1] + D*src[srcStride+2] + rounder)>>8;
+        dst[2]= (A*src[2] + B*src[3] + C*src[srcStride+2] + D*src[srcStride+3] + rounder)>>8;
+        dst[3]= (A*src[3] + B*src[4] + C*src[srcStride+3] + D*src[srcStride+4] + rounder)>>8;
+        dst[4]= (A*src[4] + B*src[5] + C*src[srcStride+4] + D*src[srcStride+5] + rounder)>>8;
+        dst[5]= (A*src[5] + B*src[6] + C*src[srcStride+5] + D*src[srcStride+6] + rounder)>>8;
+        dst[6]= (A*src[6] + B*src[7] + C*src[srcStride+6] + D*src[srcStride+7] + rounder)>>8;
+        dst[7]= (A*src[7] + B*src[8] + C*src[srcStride+7] + D*src[srcStride+8] + rounder)>>8;
+        dst+= srcStride;
+        src+= srcStride;
+    }
+}
+
+static void qpel_h_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int h, int r)
+{
+    UINT8 *cm = cropTbl + MAX_NEG_CROP;
+    int i;
+    for(i=0; i<h; i++)
+    {
+        dst[0]= cm[(((src[0]+src[1])*160 - (src[0]+src[2])*48 + (src[1]+src[3])*24 - (src[2]+src[4])*8 + r)>>8)];
+        dst[1]= cm[(((src[1]+src[2])*160 - (src[0]+src[3])*48 + (src[0]+src[4])*24 - (src[1]+src[5])*8 + r)>>8)];
+        dst[2]= cm[(((src[2]+src[3])*160 - (src[1]+src[4])*48 + (src[0]+src[5])*24 - (src[0]+src[6])*8 + r)>>8)];
+        dst[3]= cm[(((src[3]+src[4])*160 - (src[2]+src[5])*48 + (src[1]+src[6])*24 - (src[0]+src[7])*8 + r)>>8)];
+        dst[4]= cm[(((src[4]+src[5])*160 - (src[3]+src[6])*48 + (src[2]+src[7])*24 - (src[1]+src[8])*8 + r)>>8)];
+        dst[5]= cm[(((src[5]+src[6])*160 - (src[4]+src[7])*48 + (src[3]+src[8])*24 - (src[2]+src[8])*8 + r)>>8)];
+        dst[6]= cm[(((src[6]+src[7])*160 - (src[5]+src[8])*48 + (src[4]+src[8])*24 - (src[3]+src[7])*8 + r)>>8)];
+        dst[7]= cm[(((src[7]+src[8])*160 - (src[6]+src[8])*48 + (src[5]+src[7])*24 - (src[4]+src[6])*8 + r)>>8)];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static void qpel_v_lowpass(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int w, int r)
+{
+    UINT8 *cm = cropTbl + MAX_NEG_CROP;
+    int i;
+    for(i=0; i<w; i++)
+    {
+        const int src0= src[0*srcStride];
+        const int src1= src[1*srcStride];
+        const int src2= src[2*srcStride];
+        const int src3= src[3*srcStride];
+        const int src4= src[4*srcStride];
+        const int src5= src[5*srcStride];
+        const int src6= src[6*srcStride];
+        const int src7= src[7*srcStride];
+        const int src8= src[8*srcStride];
+        dst[0*dstStride]= cm[(((src0+src1)*160 - (src0+src2)*48 + (src1+src3)*24 - (src2+src4)*8 + r)>>8)];
+        dst[1*dstStride]= cm[(((src1+src2)*160 - (src0+src3)*48 + (src0+src4)*24 - (src1+src5)*8 + r)>>8)];
+        dst[2*dstStride]= cm[(((src2+src3)*160 - (src1+src4)*48 + (src0+src5)*24 - (src0+src6)*8 + r)>>8)];
+        dst[3*dstStride]= cm[(((src3+src4)*160 - (src2+src5)*48 + (src1+src6)*24 - (src0+src7)*8 + r)>>8)];
+        dst[4*dstStride]= cm[(((src4+src5)*160 - (src3+src6)*48 + (src2+src7)*24 - (src1+src8)*8 + r)>>8)];
+        dst[5*dstStride]= cm[(((src5+src6)*160 - (src4+src7)*48 + (src3+src8)*24 - (src2+src8)*8 + r)>>8)];
+        dst[6*dstStride]= cm[(((src6+src7)*160 - (src5+src8)*48 + (src4+src8)*24 - (src3+src7)*8 + r)>>8)];
+        dst[7*dstStride]= cm[(((src7+src8)*160 - (src6+src8)*48 + (src5+src7)*24 - (src4+src6)*8 + r)>>8)];
+        dst++;
+        src++;
+    }
+}
+
+static inline void put_block(UINT8 *dst, UINT8 *src, int dstStride, int srcStride)
+{
+    int i;
+    for(i=0; i<8; i++)
+    {
+        dst[0]= src[0];
+        dst[1]= src[1];
+        dst[2]= src[2];
+        dst[3]= src[3];
+        dst[4]= src[4];
+        dst[5]= src[5];
+        dst[6]= src[6];
+        dst[7]= src[7];
+        dst+=dstStride;
+        src+=srcStride;
+    }
+}
+
+static inline void avg2_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, int dstStride, int srcStride, int r)
+{
+    int i;
+    for(i=0; i<8; i++)
+    {
+        dst[0]= (src1[0] + src2[0] + r)>>1;
+        dst[1]= (src1[1] + src2[1] + r)>>1;
+        dst[2]= (src1[2] + src2[2] + r)>>1;
+        dst[3]= (src1[3] + src2[3] + r)>>1;
+        dst[4]= (src1[4] + src2[4] + r)>>1;
+        dst[5]= (src1[5] + src2[5] + r)>>1;
+        dst[6]= (src1[6] + src2[6] + r)>>1;
+        dst[7]= (src1[7] + src2[7] + r)>>1;
+        dst+=dstStride;
+        src1+=srcStride;
+        src2+=8;
+    }
+}
+
+static inline void avg4_block(UINT8 *dst, UINT8 *src1, UINT8 *src2, UINT8 *src3, UINT8 *src4, int dstStride, int srcStride, int r)
+{
+    int i;
+    for(i=0; i<8; i++)
+    {
+        dst[0]= (src1[0] + src2[0] + src3[0] + src4[0] + r)>>2;
+        dst[1]= (src1[1] + src2[1] + src3[1] + src4[1] + r)>>2;
+        dst[2]= (src1[2] + src2[2] + src3[2] + src4[2] + r)>>2;
+        dst[3]= (src1[3] + src2[3] + src3[3] + src4[3] + r)>>2;
+        dst[4]= (src1[4] + src2[4] + src3[4] + src4[4] + r)>>2;
+        dst[5]= (src1[5] + src2[5] + src3[5] + src4[5] + r)>>2;
+        dst[6]= (src1[6] + src2[6] + src3[6] + src4[6] + r)>>2;
+        dst[7]= (src1[7] + src2[7] + src3[7] + src4[7] + r)>>2;
+        dst+=dstStride;
+        src1+=srcStride;
+        src2+=8;
+        src3+=9;
+        src4+=8;
+    }
+}
+
+#define QPEL_MC(r, name) \
+static void qpel_mc00_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    put_block(dst, src, dstStride, srcStride);\
+}\
+\
+static void qpel_mc10_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 half[64];\
+    qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\
+    avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
+}\
+\
+static void qpel_mc20_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    qpel_h_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\
+}\
+\
+static void qpel_mc30_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 half[64];\
+    qpel_h_lowpass(half, src, 8, srcStride, 8, 128-r);\
+    avg2_block(dst, src+1, half, dstStride, srcStride, 1-r);\
+}\
+\
+static void qpel_mc01_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 half[64];\
+    qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\
+    avg2_block(dst, src, half, dstStride, srcStride, 1-r);\
+}\
+\
+static void qpel_mc02_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    qpel_v_lowpass(dst, src, dstStride, srcStride, 8, 128-r);\
+}\
+\
+static void qpel_mc03_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 half[64];\
+    qpel_v_lowpass(half, src, 8, srcStride, 8, 128-r);\
+    avg2_block(dst, src+srcStride, half, dstStride, srcStride, 1-r);\
+}\
+static void qpel_mc11_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 halfH[72];\
+    UINT8 halfV[72];\
+    UINT8 halfHV[64];\
+    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+    avg4_block(dst, src, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
+}\
+static void qpel_mc31_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 halfH[72];\
+    UINT8 halfV[72];\
+    UINT8 halfHV[64];\
+    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+    avg4_block(dst, src+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
+}\
+static void qpel_mc13_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 halfH[72];\
+    UINT8 halfV[72];\
+    UINT8 halfHV[64];\
+    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+    avg4_block(dst, src+srcStride, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
+}\
+static void qpel_mc33_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 halfH[72];\
+    UINT8 halfV[72];\
+    UINT8 halfHV[64];\
+    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+    avg4_block(dst, src+srcStride+1, halfH, halfV, halfHV, dstStride, srcStride, 2-r);\
+}\
+static void qpel_mc21_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 halfH[72];\
+    UINT8 halfHV[64];\
+    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+    avg2_block(dst, halfH, halfHV, dstStride, 8, 1-r);\
+}\
+static void qpel_mc23_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 halfH[72];\
+    UINT8 halfHV[64];\
+    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+    avg2_block(dst, halfH+8, halfHV, dstStride, 8, 1-r);\
+}\
+static void qpel_mc12_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 halfH[72];\
+    UINT8 halfV[72];\
+    UINT8 halfHV[64];\
+    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+    avg2_block(dst, halfV, halfHV, dstStride, 9, 1-r);\
+}\
+static void qpel_mc32_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 halfH[72];\
+    UINT8 halfV[72];\
+    UINT8 halfHV[64];\
+    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfV, src, 9, srcStride, 9, 128-r);\
+    qpel_v_lowpass(halfHV, halfH, 8, 8, 8, 128-r);\
+    avg2_block(dst, halfV+1, halfHV, dstStride, 9, 1-r);\
+}\
+static void qpel_mc22_c ## name (UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my)\
+{\
+    UINT8 halfH[72];\
+    qpel_h_lowpass(halfH, src, 8, srcStride, 9, 128-r);\
+    qpel_v_lowpass(dst, halfH, dstStride, 8, 8, 128-r);\
+}\
+qpel_mc_func qpel_mc ## name ## _tab[16]={ \
+    qpel_mc00_c ## name,                                                                   \
+    qpel_mc10_c ## name,                                                                   \
+    qpel_mc20_c ## name,                                                                   \
+    qpel_mc30_c ## name,                                                                   \
+    qpel_mc01_c ## name,                                                                   \
+    qpel_mc11_c ## name,                                                                   \
+    qpel_mc21_c ## name,                                                                   \
+    qpel_mc31_c ## name,                                                                   \
+    qpel_mc02_c ## name,                                                                   \
+    qpel_mc12_c ## name,                                                                   \
+    qpel_mc22_c ## name,                                                                   \
+    qpel_mc32_c ## name,                                                                   \
+    qpel_mc03_c ## name,                                                                   \
+    qpel_mc13_c ## name,                                                                   \
+    qpel_mc23_c ## name,                                                                   \
+    qpel_mc33_c ## name,                                                                   \
+};
+
+QPEL_MC(0, _rnd)
+QPEL_MC(1, _no_rnd)
+
 int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size, int h)
 {
     int s, i;
@@ -521,6 +798,7 @@
     get_pixels = get_pixels_c;
     put_pixels_clamped = put_pixels_clamped_c;
     add_pixels_clamped = add_pixels_clamped_c;
+    gmc1= gmc1_c;
 
     pix_abs16x16 = pix_abs16x16_c;
     pix_abs16x16_x2 = pix_abs16x16_x2_c;
--- a/dsputil.h	Thu Mar 07 13:27:15 2002 +0000
+++ b/dsputil.h	Sat Mar 09 13:01:16 2002 +0000
@@ -4,6 +4,7 @@
 #include "common.h"
 #include "avcodec.h"
 
+//#define DEBUG
 /* dct code */
 typedef short DCTELEM;
 
@@ -38,6 +39,8 @@
 extern void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
 extern void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 extern void (*add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
+extern void (*gmc1)(UINT8 *dst, UINT8 *src, int srcStride, int h, int x16, int y16, int rounder);
+
 
 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
@@ -45,11 +48,15 @@
 
 /* add and put pixel (decoding) */
 typedef void (*op_pixels_func)(UINT8 *block, const UINT8 *pixels, int line_size, int h);
+typedef void (*qpel_mc_func)(UINT8 *dst, UINT8 *src, int dstStride, int srcStride, int mx, int my);
 
 extern op_pixels_func put_pixels_tab[4];
 extern op_pixels_func avg_pixels_tab[4];
 extern op_pixels_func put_no_rnd_pixels_tab[4];
 extern op_pixels_func avg_no_rnd_pixels_tab[4];
+extern qpel_mc_func qpel_mc_rnd_tab[16];
+extern qpel_mc_func qpel_mc_no_rnd_tab[16];
+
 
 /* sub pixel (encoding) */
 extern void (*sub_pixels_tab[4])(DCTELEM *block, const UINT8 *pixels, int line_size, int h);
--- a/h263.c	Thu Mar 07 13:27:15 2002 +0000
+++ b/h263.c	Sat Mar 09 13:01:16 2002 +0000
@@ -25,6 +25,10 @@
 #include "h263data.h"
 #include "mpeg4data.h"
 
+//rounded divison & shift
+#define RDIV(a,b) ((a) > 0 ? ((a)+((b)>>1))/(b) : ((a)-((b)>>1))/(b))
+#define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b))
+
 static void h263_encode_block(MpegEncContext * s, DCTELEM * block,
 			      int n);
 static void h263_encode_motion(MpegEncContext * s, int val);
@@ -961,9 +965,25 @@
                 s->block_last_index[i] = -1;
             s->mv_dir = MV_DIR_FORWARD;
             s->mv_type = MV_TYPE_16X16;
-            s->mv[0][0][0] = 0;
-            s->mv[0][0][1] = 0;
-            s->mb_skiped = 1;
+            if(s->pict_type==S_TYPE && s->vol_sprite_usage==GMC_SPRITE){
+                const int a= s->sprite_warping_accuracy;
+//                int l = (1 << (s->f_code - 1)) * 32;
+
+                s->mcsel=1;
+                s->mv[0][0][0] = RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
+                s->mv[0][0][1] = RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+/*                if (s->mv[0][0][0] < -l) s->mv[0][0][0]= -l;
+                else if (s->mv[0][0][0] >= l) s->mv[0][0][0]= l-1;
+                if (s->mv[0][0][1] < -l) s->mv[0][0][1]= -l;
+                else if (s->mv[0][0][1] >= l) s->mv[0][0][1]= l-1;*/
+
+                s->mb_skiped = 0;
+            }else{
+                s->mcsel=0;
+                s->mv[0][0][0] = 0;
+                s->mv[0][0][1] = 0;
+                s->mb_skiped = 1;
+            }
             return 0;
         }
         cbpc = get_vlc(&s->gb, &inter_MCBPC_vlc);
@@ -1007,7 +1027,13 @@
                mx = h263p_decode_umotion(s, pred_x);
             else if(!s->mcsel)
                mx = h263_decode_motion(s, pred_x);
-            else mx=0;
+            else {
+               const int a= s->sprite_warping_accuracy;
+//        int l = (1 << (s->f_code - 1)) * 32;
+               mx= RSHIFT(s->sprite_offset[0][0], a-s->quarter_sample);
+//        if (mx < -l) mx= -l;
+//        else if (mx >= l) mx= l-1;
+            }
             if (mx >= 0xffff)
                 return -1;
             
@@ -1015,7 +1041,13 @@
                my = h263p_decode_umotion(s, pred_y);
             else if(!s->mcsel)
                my = h263_decode_motion(s, pred_y);
-            else my=0;
+            else{
+               const int a= s->sprite_warping_accuracy;
+//       int l = (1 << (s->f_code - 1)) * 32;
+               my= RSHIFT(s->sprite_offset[0][1], a-s->quarter_sample);
+//       if (my < -l) my= -l;
+//       else if (my >= l) my= l-1;
+            }
             if (my >= 0xffff)
                 return -1;
             s->mv[0][0][0] = mx;
@@ -1510,7 +1542,7 @@
     int alpha=0, beta=0;
     int w= s->width;
     int h= s->height;
-    
+//printf("SP %d\n", s->sprite_warping_accuracy);
     for(i=0; i<s->num_sprite_warping_points; i++){
         int length;
         int x=0, y=0;
@@ -1518,21 +1550,23 @@
         length= get_vlc(&s->gb, &sprite_trajectory);
         if(length){
             x= get_bits(&s->gb, length);
+//printf("lx %d %d\n", length, x);
             if ((x >> (length - 1)) == 0) /* if MSB not set it is negative*/
                 x = - (x ^ ((1 << length) - 1));
         }
-// FIXME the mpeg4 std says that here should be a marker but but divx5 doesnt have one here
-//        skip_bits1(&s->gb); /* marker bit */
+        if(!(s->divx_version==500 && s->divx_build==413)) skip_bits1(&s->gb); /* marker bit */
         
         length= get_vlc(&s->gb, &sprite_trajectory);
         if(length){
             y=get_bits(&s->gb, length);
+//printf("ly %d %d\n", length, y);
             if ((y >> (length - 1)) == 0) /* if MSB not set it is negative*/
                 y = - (y ^ ((1 << length) - 1));
         }
         skip_bits1(&s->gb); /* marker bit */
-//        printf("%d %d\n", x, y);
+//printf("%d %d %d %d\n", x, y, i, s->sprite_warping_accuracy);
 //if(i>0 && (x!=0 || y!=0)) printf("AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA\n");
+//x=y=0;
         d[i][0]= x;
         d[i][1]= y;
     }
@@ -1543,13 +1577,21 @@
     h2= 1<<beta;
 
 // Note, the 4th point isnt used for GMC
-    
+/*
     sprite_ref[0][0]= (a>>1)*(2*vop_ref[0][0] + d[0][0]);
     sprite_ref[0][1]= (a>>1)*(2*vop_ref[0][1] + d[0][1]);
     sprite_ref[1][0]= (a>>1)*(2*vop_ref[1][0] + d[0][0] + d[1][0]);
     sprite_ref[1][1]= (a>>1)*(2*vop_ref[1][1] + d[0][1] + d[1][1]);
     sprite_ref[2][0]= (a>>1)*(2*vop_ref[2][0] + d[0][0] + d[2][0]);
     sprite_ref[2][1]= (a>>1)*(2*vop_ref[2][1] + d[0][1] + d[2][1]);
+*/
+//FIXME DIVX5 vs. mpeg4 ?
+    sprite_ref[0][0]= a*vop_ref[0][0] + d[0][0];
+    sprite_ref[0][1]= a*vop_ref[0][1] + d[0][1];
+    sprite_ref[1][0]= a*vop_ref[1][0] + d[0][0] + d[1][0];
+    sprite_ref[1][1]= a*vop_ref[1][1] + d[0][1] + d[1][1];
+    sprite_ref[2][0]= a*vop_ref[2][0] + d[0][0] + d[2][0];
+    sprite_ref[2][1]= a*vop_ref[2][1] + d[0][1] + d[2][1];
 /*    sprite_ref[3][0]= (a>>1)*(2*vop_ref[3][0] + d[0][0] + d[1][0] + d[2][0] + d[3][0]);
     sprite_ref[3][1]= (a>>1)*(2*vop_ref[3][1] + d[0][1] + d[1][1] + d[2][1] + d[3][1]); */
     
@@ -1557,15 +1599,14 @@
 // perhaps it should be reordered to be more readable ...
 // the idea behind this virtual_ref mess is to be able to use shifts later per pixel instead of divides
 // so the distance between points is converted from w&h based to w2&h2 based which are of the 2^x form
-// FIXME rounding (they should be positive but who knows ...)
     virtual_ref[0][0]= 16*(vop_ref[0][0] + w2) 
-        + ((w - w2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + w2*(r*sprite_ref[1][0] - 16*vop_ref[1][0]) + w/2)/w;
+        + RDIV(((w - w2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + w2*(r*sprite_ref[1][0] - 16*vop_ref[1][0])),w);
     virtual_ref[0][1]= 16*vop_ref[0][1] 
-        + ((w - w2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + w2*(r*sprite_ref[1][1] - 16*vop_ref[1][1]) + w/2)/w;
+        + RDIV(((w - w2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + w2*(r*sprite_ref[1][1] - 16*vop_ref[1][1])),w);
     virtual_ref[1][0]= 16*vop_ref[0][0] 
-        + ((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0]) + h/2)/h;
+        + RDIV(((h - h2)*(r*sprite_ref[0][0] - 16*vop_ref[0][0]) + h2*(r*sprite_ref[2][0] - 16*vop_ref[2][0])),h);
     virtual_ref[1][1]= 16*(vop_ref[0][1] + h2) 
-        + ((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1]) + h/2)/h;
+        + RDIV(((h - h2)*(r*sprite_ref[0][1] - 16*vop_ref[0][1]) + h2*(r*sprite_ref[2][1] - 16*vop_ref[2][1])),h);
 
     switch(s->num_sprite_warping_points)
     {
@@ -1635,7 +1676,46 @@
 //        case 3:
             break;
     }
+/*printf("%d %d\n", s->sprite_delta[0][0][0], a<<s->sprite_shift[0][0]);
+printf("%d %d\n", s->sprite_delta[0][0][1], 0);
+printf("%d %d\n", s->sprite_delta[0][1][0], 0);
+printf("%d %d\n", s->sprite_delta[0][1][1], a<<s->sprite_shift[0][1]);
+printf("%d %d\n", s->sprite_delta[1][0][0], a<<s->sprite_shift[1][0]);
+printf("%d %d\n", s->sprite_delta[1][0][1], 0);
+printf("%d %d\n", s->sprite_delta[1][1][0], 0);
+printf("%d %d\n", s->sprite_delta[1][1][1], a<<s->sprite_shift[1][1]);*/
+    /* try to simplify the situation */ 
+    if(   s->sprite_delta[0][0][0] == a<<s->sprite_shift[0][0]
+       && s->sprite_delta[0][0][1] == 0
+       && s->sprite_delta[0][1][0] == 0
+       && s->sprite_delta[0][1][1] == a<<s->sprite_shift[0][1]
+       && s->sprite_delta[1][0][0] == a<<s->sprite_shift[1][0]
+       && s->sprite_delta[1][0][1] == 0
+       && s->sprite_delta[1][1][0] == 0
+       && s->sprite_delta[1][1][1] == a<<s->sprite_shift[1][1])
+    {
+        s->sprite_offset[0][0]>>=s->sprite_shift[0][0];
+        s->sprite_offset[0][1]>>=s->sprite_shift[0][1];
+        s->sprite_offset[1][0]>>=s->sprite_shift[1][0];
+        s->sprite_offset[1][1]>>=s->sprite_shift[1][1];
+        s->sprite_delta[0][0][0]= a;
+        s->sprite_delta[0][0][1]= 0;
+        s->sprite_delta[0][1][0]= 0;
+        s->sprite_delta[0][1][1]= a;
+        s->sprite_delta[1][0][0]= a;
+        s->sprite_delta[1][0][1]= 0;
+        s->sprite_delta[1][1][0]= 0;
+        s->sprite_delta[1][1][1]= a;
+        s->sprite_shift[0][0]= 0;
+        s->sprite_shift[0][1]= 0;
+        s->sprite_shift[1][0]= 0;
+        s->sprite_shift[1][1]= 0;
+        s->real_sprite_warping_points=1;
+    }
+    else
+        s->real_sprite_warping_points= s->num_sprite_warping_points;
 
+//FIXME convert stuff if accurace != 3
 }
 
 /* decode mpeg4 VOP header */
@@ -1748,7 +1828,6 @@
             if(vo_ver_id != 1)
                  s->quarter_sample= get_bits1(&s->gb);
             else s->quarter_sample=0;
-            if(s->quarter_sample) printf("Quarter sample not supported\n");
 #if 0
             if(get_bits1(&s->gb)) printf("Complexity est disabled\n");
             if(get_bits1(&s->gb)) printf("resync disable\n");
@@ -1786,7 +1865,34 @@
 //printf("end Data %X %d\n", show_bits(&s->gb, 32), get_bits_count(&s->gb)&0x7);
         goto redo;
     } else if (startcode == 0x1b2) { //userdata
+        char buf[256];
+        int i;
+        int e;
+        int ver, build;
+
 //printf("user Data %X\n", show_bits(&s->gb, 32));
+        buf[0]= show_bits(&s->gb, 8);
+        for(i=1; i<256; i++){
+            buf[i]= show_bits(&s->gb, 16)&0xFF;
+            if(buf[i]==0) break;
+            skip_bits(&s->gb, 8);
+        }
+        buf[255]=0;
+        e=sscanf(buf, "DivX%dBuild%d", &ver, &build);
+        if(e==2){
+            s->divx_version= ver;
+            s->divx_build= build;
+            if(s->picture_number==0){
+                printf("This file was encoded with DivX%d Build%d\n", ver, build);
+                if(ver==500 && build==413){ //most likely all version are indeed totally buggy but i dunno for sure ...
+                    printf("WARNING: this version of DivX is not MPEG4 compatible, trying to workaround these bugs...\n");
+                }else{
+                    printf("hmm, i havnt seen that version of divx yet, lets assume they fixed these bugs ...\n"
+                           "using mpeg4 decoder, if it fails contact the developers (of ffmpeg)\n");
+                }
+            }
+        }
+//printf("User Data: %s\n", buf);
         goto redo;
     } else if (startcode != 0x1b6) { //VOP
         goto redo;
@@ -1798,12 +1904,7 @@
         printf("B-VOP\n");
 	return -1;
     }
-    if(s->pict_type == S_TYPE)
-    {
-        printf("S-VOP\n");
-//	return -1;
-    }
-    
+ 
     /* XXX: parse time base */
     time_incr = 0;
     while (get_bits1(&s->gb) != 0) 
@@ -1878,6 +1979,7 @@
          }
      }
 //printf("end Data %X %d\n", show_bits(&s->gb, 32), get_bits_count(&s->gb)&0x7);
+     s->picture_number++; // better than pic number==0 allways ;)
      return 0;
 }
 
--- a/mpegvideo.c	Thu Mar 07 13:27:15 2002 +0000
+++ b/mpegvideo.c	Sat Mar 09 13:01:16 2002 +0000
@@ -541,6 +541,59 @@
         return a;
 }
 
+static inline void gmc1_motion(MpegEncContext *s,
+                               UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
+                               int dest_offset,
+                               UINT8 **ref_picture, int src_offset,
+                               int h)
+{
+    UINT8 *ptr;
+    int dxy, offset, mx, my, src_x, src_y, height, linesize;
+    int motion_x, motion_y;
+
+    if(s->real_sprite_warping_points>1) printf("Oops, thats bad, contact the developers\n");
+    motion_x= s->sprite_offset[0][0];
+    motion_y= s->sprite_offset[0][1];
+    src_x = s->mb_x * 16 + (motion_x >> (s->sprite_warping_accuracy+1));
+    src_y = s->mb_y * 16 + (motion_y >> (s->sprite_warping_accuracy+1));
+    motion_x<<=(3-s->sprite_warping_accuracy);
+    motion_y<<=(3-s->sprite_warping_accuracy);
+    src_x = clip(src_x, -16, s->width);
+    if (src_x == s->width)
+        motion_x =0;
+    src_y = clip(src_y, -16, s->height);
+    if (src_y == s->height)
+        motion_y =0;
+    
+    linesize = s->linesize;
+    ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
+
+    dest_y+=dest_offset;
+    gmc1(dest_y  , ptr  , linesize, h, motion_x&15, motion_y&15, s->no_rounding);
+    gmc1(dest_y+8, ptr+8, linesize, h, motion_x&15, motion_y&15, s->no_rounding);
+
+    motion_x= s->sprite_offset[1][0];
+    motion_y= s->sprite_offset[1][1];
+    src_x = s->mb_x * 8 + (motion_x >> (s->sprite_warping_accuracy+1));
+    src_y = s->mb_y * 8 + (motion_y >> (s->sprite_warping_accuracy+1));
+    motion_x<<=(3-s->sprite_warping_accuracy);
+    motion_y<<=(3-s->sprite_warping_accuracy);
+    src_x = clip(src_x, -8, s->width>>1);
+    if (src_x == s->width>>1)
+        motion_x =0;
+    src_y = clip(src_y, -8, s->height>>1);
+    if (src_y == s->height>>1)
+        motion_y =0;
+
+    offset = (src_y * linesize>>1) + src_x + (src_offset>>1);
+    ptr = ref_picture[1] + offset;
+    gmc1(dest_cb + (dest_offset>>1), ptr, linesize>>1, h>>1, motion_x&15, motion_y&15, s->no_rounding);
+    ptr = ref_picture[2] + offset;
+    gmc1(dest_cr + (dest_offset>>1), ptr, linesize>>1, h>>1, motion_x&15, motion_y&15, s->no_rounding);
+    
+    return;
+}
+
 /* apply one mpeg motion vector to the three components */
 static inline void mpeg_motion(MpegEncContext *s,
                                UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
@@ -551,7 +604,11 @@
 {
     UINT8 *ptr;
     int dxy, offset, mx, my, src_x, src_y, height, linesize;
-    
+if(s->quarter_sample)
+{
+    motion_x>>=1;
+    motion_y>>=1;
+}
     dxy = ((motion_y & 1) << 1) | (motion_x & 1);
     src_x = s->mb_x * 16 + (motion_x >> 1);
     src_y = s->mb_y * (16 >> field_based) + (motion_y >> 1);
@@ -602,10 +659,69 @@
     pix_op[dxy](dest_cr + (dest_offset >> 1), ptr, linesize >> 1, h >> 1);
 }
 
+static inline void qpel_motion(MpegEncContext *s,
+                               UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
+                               int dest_offset,
+                               UINT8 **ref_picture, int src_offset,
+                               int field_based, op_pixels_func *pix_op,
+                               qpel_mc_func *qpix_op,
+                               int motion_x, int motion_y, int h)
+{
+    UINT8 *ptr;
+    int dxy, offset, mx, my, src_x, src_y, height, linesize;
+
+    dxy = ((motion_y & 3) << 2) | (motion_x & 3);
+    src_x = s->mb_x * 16 + (motion_x >> 2);
+    src_y = s->mb_y * (16 >> field_based) + (motion_y >> 2);
+
+    height = s->height >> field_based;
+    src_x = clip(src_x, -16, s->width);
+    if (src_x == s->width)
+        dxy &= ~3;
+    src_y = clip(src_y, -16, height);
+    if (src_y == height)
+        dxy &= ~12;
+    linesize = s->linesize << field_based;
+    ptr = ref_picture[0] + (src_y * linesize) + src_x + src_offset;
+    dest_y += dest_offset;
+//printf("%d %d %d\n", src_x, src_y, dxy);
+    qpix_op[dxy](dest_y                 , ptr                 , linesize, linesize, motion_x&3, motion_y&3);
+    qpix_op[dxy](dest_y              + 8, ptr              + 8, linesize, linesize, motion_x&3, motion_y&3);
+    qpix_op[dxy](dest_y + linesize*8    , ptr + linesize*8    , linesize, linesize, motion_x&3, motion_y&3);
+    qpix_op[dxy](dest_y + linesize*8 + 8, ptr + linesize*8 + 8, linesize, linesize, motion_x&3, motion_y&3);
+    
+    mx= (motion_x>>1) | (motion_x&1);
+    my= (motion_y>>1) | (motion_y&1);
+
+    dxy = 0;
+    if ((mx & 3) != 0)
+        dxy |= 1;
+    if ((my & 3) != 0)
+        dxy |= 2;
+    mx = mx >> 2;
+    my = my >> 2;
+    
+    src_x = s->mb_x * 8 + mx;
+    src_y = s->mb_y * (8 >> field_based) + my;
+    src_x = clip(src_x, -8, s->width >> 1);
+    if (src_x == (s->width >> 1))
+        dxy &= ~1;
+    src_y = clip(src_y, -8, height >> 1);
+    if (src_y == (height >> 1))
+        dxy &= ~2;
+
+    offset = (src_y * (linesize >> 1)) + src_x + (src_offset >> 1);
+    ptr = ref_picture[1] + offset;
+    pix_op[dxy](dest_cb + (dest_offset >> 1), ptr, linesize >> 1, h >> 1);
+    ptr = ref_picture[2] + offset;
+    pix_op[dxy](dest_cr + (dest_offset >> 1), ptr, linesize >> 1, h >> 1);
+}
+
+
 static inline void MPV_motion(MpegEncContext *s, 
                               UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr,
                               int dir, UINT8 **ref_picture, 
-                              op_pixels_func *pix_op)
+                              op_pixels_func *pix_op, qpel_mc_func *qpix_op)
 {
     int dxy, offset, mx, my, src_x, src_y, motion_x, motion_y;
     int mb_x, mb_y, i;
@@ -616,10 +732,30 @@
 
     switch(s->mv_type) {
     case MV_TYPE_16X16:
-        mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
-                    ref_picture, 0,
-                    0, pix_op,
-                    s->mv[dir][0][0], s->mv[dir][0][1], 16);
+        if(s->mcsel){
+#if 0
+            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
+                        ref_picture, 0,
+                        0, pix_op,
+                        s->sprite_offset[0][0]>>3,
+                        s->sprite_offset[0][1]>>3,
+                        16);
+#else
+            gmc1_motion(s, dest_y, dest_cb, dest_cr, 0,
+                        ref_picture, 0,
+                        16);
+#endif
+        }else if(s->quarter_sample){
+            qpel_motion(s, dest_y, dest_cb, dest_cr, 0,
+                        ref_picture, 0,
+                        0, pix_op, qpix_op,
+                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
+        }else{
+            mpeg_motion(s, dest_y, dest_cb, dest_cr, 0,
+                        ref_picture, 0,
+                        0, pix_op,
+                        s->mv[dir][0][0], s->mv[dir][0][1], 16);
+        }           
         break;
     case MV_TYPE_8X8:
         for(i=0;i<4;i++) {
@@ -740,6 +876,7 @@
     int mb_x, mb_y;
     int dct_linesize, dct_offset;
     op_pixels_func *op_pix;
+    qpel_mc_func *op_qpix;
 
     mb_x = s->mb_x;
     mb_y = s->mb_y;
@@ -851,20 +988,23 @@
 
         if (!s->mb_intra) {
             /* motion handling */
-            if (!s->no_rounding) 
+            if (!s->no_rounding){
                 op_pix = put_pixels_tab;
-            else
+                op_qpix= qpel_mc_rnd_tab;
+            }else{
                 op_pix = put_no_rnd_pixels_tab;
+                op_qpix= qpel_mc_no_rnd_tab;
+            }
 
             if (s->mv_dir & MV_DIR_FORWARD) {
-                MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix);
+                MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
                 if (!s->no_rounding) 
                     op_pix = avg_pixels_tab;
                 else
                     op_pix = avg_no_rnd_pixels_tab;
             }
             if (s->mv_dir & MV_DIR_BACKWARD) {
-                MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix);
+                MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
             }
 
             /* add dct residue */
--- a/mpegvideo.h	Thu Mar 07 13:27:15 2002 +0000
+++ b/mpegvideo.h	Sat Mar 09 13:01:16 2002 +0000
@@ -165,6 +165,7 @@
     int sprite_top;
     int sprite_brightness_change;
     int num_sprite_warping_points;
+    int real_sprite_warping_points;
     int sprite_offset[2][2];
     int sprite_delta[2][2][2];
     int sprite_shift[2][2];
@@ -178,7 +179,11 @@
     int sprite_warping_accuracy;
     int low_latency_sprite;
     int data_partioning;
-    
+
+    /* divx specific, used to workaround (many) bugs in divx5 */
+    int divx_version;
+    int divx_build;
+
     /* RV10 specific */
     int rv10_version; /* RV10 version: 0 or 3 */
     int rv10_first_dc_coded[3];