changeset 853:eacc2dd8fd9d libavcodec

* using DSPContext - so each codec could use its local (sub)set of CPU extension
author kabi
date Mon, 11 Nov 2002 09:40:17 +0000
parents c01c98206ee6
children 3034f1816596
files dsputil.c dsputil.h dv.c error_resilience.c h263.c h263dec.c i386/dsputil_mmx.c motion_est.c mpeg12.c mpegvideo.c mpegvideo.h rv10.c svq1.c
diffstat 13 files changed, 376 insertions(+), 324 deletions(-) [+]
line wrap: on
line diff
--- a/dsputil.c	Mon Nov 11 09:37:40 2002 +0000
+++ b/dsputil.c	Mon Nov 11 09:40:17 2002 +0000
@@ -20,7 +20,7 @@
  */
 #include "avcodec.h"
 #include "dsputil.h"
-
+/*
 void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size);
 void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
 void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
@@ -41,7 +41,7 @@
 op_pixels_abs_func pix_abs8x8_x2;
 op_pixels_abs_func pix_abs8x8_y2;
 op_pixels_abs_func pix_abs8x8_xy2;
-
+*/
 int ff_bit_exact=0;
 
 UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
@@ -84,7 +84,7 @@
 };
 
 /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */
-UINT32 inverse[256]={
+const UINT32 inverse[256]={
          0, 4294967295U,2147483648U,1431655766, 1073741824,  858993460,  715827883,  613566757, 
  536870912,  477218589,  429496730,  390451573,  357913942,  330382100,  306783379,  286331154, 
  268435456,  252645136,  238609295,  226050911,  214748365,  204522253,  195225787,  186737709, 
@@ -119,7 +119,7 @@
   17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
 };
 
-int pix_sum_c(UINT8 * pix, int line_size)
+static int pix_sum_c(UINT8 * pix, int line_size)
 {
     int s, i, j;
 
@@ -141,7 +141,7 @@
     return s;
 }
 
-int pix_norm1_c(UINT8 * pix, int line_size)
+static int pix_norm1_c(UINT8 * pix, int line_size)
 {
     int s, i, j;
     UINT32 *sq = squareTbl + 256;
@@ -165,7 +165,7 @@
 }
 
 
-void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
+static void get_pixels_c(DCTELEM *restrict block, const UINT8 *pixels, int line_size)
 {
     int i;
 
@@ -184,8 +184,8 @@
     }
 }
 
-void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1, const UINT8 *s2,
-		   int stride){
+static void diff_pixels_c(DCTELEM *restrict block, const UINT8 *s1,
+			  const UINT8 *s2, int stride){
     int i;
 
     /* read the pixels */
@@ -205,8 +205,8 @@
 }
 
 
-void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
-                          int line_size)
+static void put_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
+				 int line_size)
 {
     int i;
     UINT8 *cm = cropTbl + MAX_NEG_CROP;
@@ -227,7 +227,7 @@
     }
 }
 
-void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
+static void add_pixels_clamped_c(const DCTELEM *block, UINT8 *restrict pixels,
                           int line_size)
 {
     int i;
@@ -1353,7 +1353,7 @@
 #undef op_put
 #undef op_put_no_rnd
 
-int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+static int pix_abs16x16_c(UINT8 *pix1, UINT8 *pix2, int line_size)
 {
     int s, i;
 
@@ -1381,7 +1381,7 @@
     return s;
 }
 
-int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+static int pix_abs16x16_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
 {
     int s, i;
 
@@ -1409,7 +1409,7 @@
     return s;
 }
 
-int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+static int pix_abs16x16_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
 {
     int s, i;
     UINT8 *pix3 = pix2 + line_size;
@@ -1439,7 +1439,7 @@
     return s;
 }
 
-int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+static int pix_abs16x16_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
 {
     int s, i;
     UINT8 *pix3 = pix2 + line_size;
@@ -1469,7 +1469,7 @@
     return s;
 }
 
-int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+static int pix_abs8x8_c(UINT8 *pix1, UINT8 *pix2, int line_size)
 {
     int s, i;
 
@@ -1489,7 +1489,7 @@
     return s;
 }
 
-int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+static int pix_abs8x8_x2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
 {
     int s, i;
 
@@ -1509,7 +1509,7 @@
     return s;
 }
 
-int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+static int pix_abs8x8_y2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
 {
     int s, i;
     UINT8 *pix3 = pix2 + line_size;
@@ -1531,7 +1531,7 @@
     return s;
 }
 
-int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
+static int pix_abs8x8_xy2_c(UINT8 *pix1, UINT8 *pix2, int line_size)
 {
     int s, i;
     UINT8 *pix3 = pix2 + line_size;
@@ -1574,12 +1574,12 @@
     }
 }
 
-void clear_blocks_c(DCTELEM *blocks)
+static void clear_blocks_c(DCTELEM *blocks)
 {
     memset(blocks, 0, sizeof(DCTELEM)*6*64);
 }
 
-void dsputil_init(void)
+void dsputil_init(DSPContext* c, unsigned mask)
 {
     int i;
 
@@ -1593,42 +1593,82 @@
         squareTbl[i] = (i - 256) * (i - 256);
     }
 
-    get_pixels = get_pixels_c;
-    diff_pixels = diff_pixels_c;
-    put_pixels_clamped = put_pixels_clamped_c;
-    add_pixels_clamped = add_pixels_clamped_c;
-    ff_gmc1= gmc1_c;
-    ff_gmc= gmc_c;
-    clear_blocks= clear_blocks_c;
-    pix_sum= pix_sum_c;
-    pix_norm1= pix_norm1_c;
+    c->get_pixels = get_pixels_c;
+    c->diff_pixels = diff_pixels_c;
+    c->put_pixels_clamped = put_pixels_clamped_c;
+    c->add_pixels_clamped = add_pixels_clamped_c;
+    c->gmc1 = gmc1_c;
+    c->gmc = gmc_c;
+    c->clear_blocks = clear_blocks_c;
+    c->pix_sum = pix_sum_c;
+    c->pix_norm1 = pix_norm1_c;
+
+    c->pix_abs16x16     = pix_abs16x16_c;
+    c->pix_abs16x16_x2  = pix_abs16x16_x2_c;
+    c->pix_abs16x16_y2  = pix_abs16x16_y2_c;
+    c->pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
+    c->pix_abs8x8     = pix_abs8x8_c;
+    c->pix_abs8x8_x2  = pix_abs8x8_x2_c;
+    c->pix_abs8x8_y2  = pix_abs8x8_y2_c;
+    c->pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
+
+    c->put_pixels_tab[0][0] = put_pixels16;
+    c->put_pixels_tab[0][1] = put_pixels16_x2;
+    c->put_pixels_tab[0][2] = put_pixels16_y2;
+    c->put_pixels_tab[0][3] = put_pixels16_xy2;
+
+    c->put_no_rnd_pixels_tab[0][0] = put_pixels16;
+    c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2;
+    c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2;
+    c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2;
 
-    pix_abs16x16     = pix_abs16x16_c;
-    pix_abs16x16_x2  = pix_abs16x16_x2_c;
-    pix_abs16x16_y2  = pix_abs16x16_y2_c;
-    pix_abs16x16_xy2 = pix_abs16x16_xy2_c;
-    pix_abs8x8     = pix_abs8x8_c;
-    pix_abs8x8_x2  = pix_abs8x8_x2_c;
-    pix_abs8x8_y2  = pix_abs8x8_y2_c;
-    pix_abs8x8_xy2 = pix_abs8x8_xy2_c;
+    c->avg_pixels_tab[0][0] = avg_pixels16;
+    c->avg_pixels_tab[0][1] = avg_pixels16_x2;
+    c->avg_pixels_tab[0][2] = avg_pixels16_y2;
+    c->avg_pixels_tab[0][3] = avg_pixels16_xy2;
+
+    c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16;
+    c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2;
+    c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2;
+    c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2;
+
+    c->put_pixels_tab[1][0] = put_pixels8;
+    c->put_pixels_tab[1][1] = put_pixels8_x2;
+    c->put_pixels_tab[1][2] = put_pixels8_y2;
+    c->put_pixels_tab[1][3] = put_pixels8_xy2;
+
+    c->put_no_rnd_pixels_tab[1][0] = put_pixels8;
+    c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2;
+    c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2;
+    c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2;
+
+    c->avg_pixels_tab[1][0] = avg_pixels8;
+    c->avg_pixels_tab[1][1] = avg_pixels8_x2;
+    c->avg_pixels_tab[1][2] = avg_pixels8_y2;
+    c->avg_pixels_tab[1][3] = avg_pixels8_xy2;
+
+    c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8;
+    c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2;
+    c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2;
+    c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2;
 
 #ifdef HAVE_MMX
-    dsputil_init_mmx();
+    dsputil_init_mmx(c, mask);
 #endif
 #ifdef ARCH_ARMV4L
-    dsputil_init_armv4l();
+    dsputil_init_armv4l(c, mask);
 #endif
 #ifdef HAVE_MLIB
-    dsputil_init_mlib();
+    dsputil_init_mlib(c, mask);
 #endif
 #ifdef ARCH_ALPHA
-    dsputil_init_alpha();
+    dsputil_init_alpha(c, mask);
 #endif
 #ifdef ARCH_POWERPC
-    dsputil_init_ppc();
+    dsputil_init_ppc(c, mask);
 #endif
 #ifdef HAVE_MMI
-    dsputil_init_mmi();
+    dsputil_init_mmi(c, mask);
 #endif
 
     for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1;
@@ -1639,7 +1679,8 @@
 {
     ff_bit_exact=1;
 #ifdef HAVE_MMX
-    dsputil_set_bit_exact_mmx();
+#warning FIXME - set_bit_exact
+//    dsputil_set_bit_exact_mmx();
 #endif
 }
 
--- a/dsputil.h	Mon Nov 11 09:37:40 2002 +0000
+++ b/dsputil.h	Mon Nov 11 09:40:17 2002 +0000
@@ -45,10 +45,9 @@
 extern UINT32 squareTbl[512];
 extern UINT8 cropTbl[256 + 2 * MAX_NEG_CROP];
 
-void dsputil_init(void);
 
 /* minimum alignment rules ;)
-if u notice errors in the align stuff, need more alignment for some asm code for some cpu 
+if u notice errors in the align stuff, need more alignment for some asm code for some cpu
 or need to use a function with less aligned data then send a mail to the ffmpeg-dev list, ...
 
 !warning these alignments might not match reallity, (missing attribute((align)) stuff somewhere possible)
@@ -57,39 +56,20 @@
 !future video codecs might need functions with less strict alignment
 */
 
-/* pixel ops : interface with DCT */
-extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
-extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
-extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
-extern void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
-extern void (*ff_gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
-extern void (*ff_gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy, 
-                  int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
-extern void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
-extern int (*pix_sum)(UINT8 * pix, int line_size);
-extern int (*pix_norm1)(UINT8 * pix, int line_size);
-
-
-
+/*
 void get_pixels_c(DCTELEM *block, const UINT8 *pixels, int line_size);
 void diff_pixels_c(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride);
 void put_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
 void add_pixels_clamped_c(const DCTELEM *block, UINT8 *pixels, int line_size);
 void clear_blocks_c(DCTELEM *blocks);
+*/
 
 /* add and put pixel (decoding) */
 // blocksizes for op_pixels_func are 8x4,8x8 16x8 16x16
 typedef void (*op_pixels_func)(UINT8 *block/*align width (8 or 16)*/, const UINT8 *pixels/*align 1*/, int line_size, int h);
 typedef void (*qpel_mc_func)(UINT8 *dst/*align width (8 or 16)*/, UINT8 *src/*align 1*/, int stride);
 
-extern op_pixels_func put_pixels_tab[2][4];
-extern op_pixels_func avg_pixels_tab[2][4];
-extern op_pixels_func put_no_rnd_pixels_tab[2][4];
-extern op_pixels_func avg_no_rnd_pixels_tab[2][4];
-extern qpel_mc_func put_qpel_pixels_tab[2][16];
-extern qpel_mc_func avg_qpel_pixels_tab[2][16];
-extern qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
-extern qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
+
 
 #define CALL_2X_PIXELS(a, b, n)\
 static void a(uint8_t *block, const uint8_t *pixels, int line_size, int h){\
@@ -100,20 +80,46 @@
 /* motion estimation */
 
 typedef int (*op_pixels_abs_func)(UINT8 *blk1/*align width (8 or 16)*/, UINT8 *blk2/*align 1*/, int line_size);
-
-extern op_pixels_abs_func pix_abs16x16;
-extern op_pixels_abs_func pix_abs16x16_x2;
-extern op_pixels_abs_func pix_abs16x16_y2;
-extern op_pixels_abs_func pix_abs16x16_xy2;
-extern op_pixels_abs_func pix_abs8x8;
-extern op_pixels_abs_func pix_abs8x8_x2;
-extern op_pixels_abs_func pix_abs8x8_y2;
-extern op_pixels_abs_func pix_abs8x8_xy2;
-
+/*
 int pix_abs16x16_c(UINT8 *blk1, UINT8 *blk2, int lx);
 int pix_abs16x16_x2_c(UINT8 *blk1, UINT8 *blk2, int lx);
 int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx);
 int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx);
+*/
+typedef struct DSPContext {
+    /* pixel ops : interface with DCT */
+    void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size);
+    void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride);
+    void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
+    void (*add_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size);
+    void (*gmc1)(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int srcStride, int h, int x16, int y16, int rounder);
+    void (*gmc )(UINT8 *dst/*align 8*/, UINT8 *src/*align 1*/, int stride, int h, int ox, int oy,
+		    int dxx, int dxy, int dyx, int dyy, int shift, int r, int width, int height);
+    void (*clear_blocks)(DCTELEM *blocks/*align 16*/);
+    int (*pix_sum)(UINT8 * pix, int line_size);
+    int (*pix_norm1)(UINT8 * pix, int line_size);
+
+    /* maybe create an array for 16/8 functions */
+    op_pixels_func put_pixels_tab[2][4];
+    op_pixels_func avg_pixels_tab[2][4];
+    op_pixels_func put_no_rnd_pixels_tab[2][4];
+    op_pixels_func avg_no_rnd_pixels_tab[2][4];
+    qpel_mc_func put_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_qpel_pixels_tab[2][16];
+    qpel_mc_func put_no_rnd_qpel_pixels_tab[2][16];
+    qpel_mc_func avg_no_rnd_qpel_pixels_tab[2][16];
+
+    op_pixels_abs_func pix_abs16x16;
+    op_pixels_abs_func pix_abs16x16_x2;
+    op_pixels_abs_func pix_abs16x16_y2;
+    op_pixels_abs_func pix_abs16x16_xy2;
+    op_pixels_abs_func pix_abs8x8;
+    op_pixels_abs_func pix_abs8x8_x2;
+    op_pixels_abs_func pix_abs8x8_y2;
+    op_pixels_abs_func pix_abs8x8_xy2;
+} DSPContext;
+
+void dsputil_init(DSPContext* p, unsigned mask);
 
 /**
  * permute block according to permuatation.
@@ -121,8 +127,12 @@
  */
 void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last);
 
+#define emms_c()
+
 #if defined(HAVE_MMX)
 
+#undef emms_c()
+
 #define MM_MMX    0x0001 /* standard MMX */
 #define MM_3DNOW  0x0004 /* AMD 3DNOW */
 #define MM_MMXEXT 0x0002 /* SSE integer functions or AMD MMX ext */
@@ -132,6 +142,8 @@
 extern int mm_flags;
 
 int mm_support(void);
+void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
+void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size);
 
 static inline void emms(void)
 {
@@ -146,54 +158,44 @@
 
 #define __align8 __attribute__ ((aligned (8)))
 
-void dsputil_init_mmx(void);
-void dsputil_set_bit_exact_mmx(void);
+void dsputil_init_mmx(DSPContext* c, unsigned mask);
+void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask);
 
 #elif defined(ARCH_ARMV4L)
 
-#define emms_c()
-
 /* This is to use 4 bytes read to the IDCT pointers for some 'zero'
    line ptimizations */
 #define __align8 __attribute__ ((aligned (4)))
 
-void dsputil_init_armv4l(void);   
+void dsputil_init_armv4l(DSPContext* c, unsigned mask);
 
 #elif defined(HAVE_MLIB)
- 
-#define emms_c()
 
 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
 #define __align8 __attribute__ ((aligned (8)))
 
-void dsputil_init_mlib(void);   
+void dsputil_init_mlib(DSPContext* c, unsigned mask);
 
 #elif defined(ARCH_ALPHA)
 
-#define emms_c()
 #define __align8 __attribute__ ((aligned (8)))
 
-void dsputil_init_alpha(void);
+void dsputil_init_alpha(DSPContext* c, unsigned mask);
 
 #elif defined(ARCH_POWERPC)
 
-#define emms_c()
 #define __align8 __attribute__ ((aligned (16)))
 
-void dsputil_init_ppc(void);
+void dsputil_init_ppc(DSPContext* c, unsigned mask);
 
 #elif defined(HAVE_MMI)
 
-#define emms_c()
-
 #define __align8 __attribute__ ((aligned (16)))
 
-void dsputil_init_mmi(void);   
+void dsputil_init_mmi(DSPContext* c, unsigned mask);
 
 #else
 
-#define emms_c()
-
 #define __align8
 
 #endif
@@ -263,9 +265,9 @@
 } MDCTContext;
 
 int ff_mdct_init(MDCTContext *s, int nbits, int inverse);
-void ff_imdct_calc(MDCTContext *s, FFTSample *output, 
+void ff_imdct_calc(MDCTContext *s, FFTSample *output,
                 const FFTSample *input, FFTSample *tmp);
-void ff_mdct_calc(MDCTContext *s, FFTSample *out, 
+void ff_mdct_calc(MDCTContext *s, FFTSample *out,
                const FFTSample *input, FFTSample *tmp);
 void ff_mdct_end(MDCTContext *s);
 
--- a/dv.c	Mon Nov 11 09:37:40 2002 +0000
+++ b/dv.c	Mon Nov 11 09:40:17 2002 +0000
@@ -114,6 +114,7 @@
     /* XXX: fix it */
     memset(&s2, 0, sizeof(MpegEncContext));
     s2.avctx = avctx;
+    dsputil_init(&s2.dsp, avctx->dsp_mask);
     if (DCT_common_init(&s2) < 0)
        return -1;
 
--- a/error_resilience.c	Mon Nov 11 09:37:40 2002 +0000
+++ b/error_resilience.c	Mon Nov 11 09:40:17 2002 +0000
@@ -331,7 +331,7 @@
                 s->mv_type = MV_TYPE_16X16;
                 s->mb_skiped=0;
 
-                clear_blocks(s->block[0]);
+		s->dsp.clear_blocks(s->block[0]);
 
                 s->mb_x= mb_x;
                 s->mb_y= mb_y;
@@ -458,7 +458,7 @@
                     s->mv_type = MV_TYPE_16X16;
                     s->mb_skiped=0;
 
-                    clear_blocks(s->block[0]);
+		    s->dsp.clear_blocks(s->block[0]);
 
                     s->mb_x= mb_x;
                     s->mb_y= mb_y;
@@ -559,8 +559,8 @@
                 UINT8 *mb_ptr     = s->current_picture[0] + mb_x*16 + mb_y*16*s->linesize;
                 UINT8 *last_mb_ptr= s->last_picture   [0] + mb_x*16 + mb_y*16*s->linesize;
     
-                is_intra_likely += pix_abs16x16(last_mb_ptr, mb_ptr                    , s->linesize);
-                is_intra_likely -= pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize);
+		is_intra_likely += s->dsp.pix_abs16x16(last_mb_ptr, mb_ptr                    , s->linesize);
+                is_intra_likely -= s->dsp.pix_abs16x16(last_mb_ptr, last_mb_ptr+s->linesize*16, s->linesize);
             }else{
                 if(s->mbintra_table[i]) //HACK (this is allways inited but we should use mb_type[])
                    is_intra_likely++;
@@ -738,7 +738,7 @@
                 s->mv[0][0][1] = s->motion_val[ mb_x*2+1 + (mb_y*2+1)*s->block_wrap[0] ][1];
             }
         
-            clear_blocks(s->block[0]);
+	    s->dsp.clear_blocks(s->block[0]);
 
             s->mb_x= mb_x;
             s->mb_y= mb_y;
@@ -778,8 +778,8 @@
                     s->mv[1][0][0]= 0;
                     s->mv[1][0][1]= 0;
                 }
-                                
-                clear_blocks(s->block[0]);
+
+                s->dsp.clear_blocks(s->block[0]);
                 s->mb_x= mb_x;
                 s->mb_y= mb_y;
                 MPV_decode_mb(s, s->block);
--- a/h263.c	Mon Nov 11 09:37:40 2002 +0000
+++ b/h263.c	Mon Nov 11 09:40:17 2002 +0000
@@ -538,7 +538,7 @@
                         if(s->coded_order[i+1].pict_type!=B_TYPE) break;
 
                         b_pic= s->coded_order[i+1].picture[0] + offset;
-                        diff= pix_abs16x16(p_pic, b_pic, s->linesize);
+			diff= s->dsp.pix_abs16x16(p_pic, b_pic, s->linesize);
                         if(diff>s->qscale*70){ //FIXME check that 70 is optimal
                             s->mb_skiped=0;
                             break;
--- a/h263dec.c	Mon Nov 11 09:37:40 2002 +0000
+++ b/h263dec.c	Mon Nov 11 09:40:17 2002 +0000
@@ -195,7 +195,7 @@
             }
 
             /* DCT & quantize */
-            clear_blocks(s->block[0]);
+	    s->dsp.clear_blocks(s->block[0]);
             
             s->mv_dir = MV_DIR_FORWARD;
             s->mv_type = MV_TYPE_16X16;
--- a/i386/dsputil_mmx.c	Mon Nov 11 09:37:40 2002 +0000
+++ b/i386/dsputil_mmx.c	Mon Nov 11 09:40:17 2002 +0000
@@ -22,7 +22,7 @@
 #include "../dsputil.h"
 
 int mm_flags; /* multimedia extension flags */
-
+/* FIXME use them in static form */
 int pix_abs16x16_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
 int pix_abs16x16_x2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
 int pix_abs16x16_y2_mmx(UINT8 *blk1, UINT8 *blk2, int lx);
@@ -242,7 +242,7 @@
     );
 }
 
-static void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
+void put_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
 {
     const DCTELEM *p;
     UINT8 *pix;
@@ -297,7 +297,7 @@
 	    :"memory");
 }
 
-static void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
+void add_pixels_clamped_mmx(const DCTELEM *block, UINT8 *pixels, int line_size)
 {
     const DCTELEM *p;
     UINT8 *pix;
@@ -457,7 +457,7 @@
 static void just_return() { return; }
 #endif
 
-void dsputil_init_mmx(void)
+void dsputil_init_mmx(DSPContext* c, unsigned mask)
 {
     mm_flags = mm_support();
 #if 0
@@ -476,112 +476,112 @@
 #endif
 
     if (mm_flags & MM_MMX) {
-        get_pixels = get_pixels_mmx;
-        diff_pixels = diff_pixels_mmx;
-        put_pixels_clamped = put_pixels_clamped_mmx;
-        add_pixels_clamped = add_pixels_clamped_mmx;
-        clear_blocks= clear_blocks_mmx;
-        pix_sum= pix_sum16_mmx;
+        c->get_pixels = get_pixels_mmx;
+        c->diff_pixels = diff_pixels_mmx;
+        c->put_pixels_clamped = put_pixels_clamped_mmx;
+        c->add_pixels_clamped = add_pixels_clamped_mmx;
+        c->clear_blocks = clear_blocks_mmx;
+        c->pix_sum = pix_sum16_mmx;
 
-        pix_abs16x16     = pix_abs16x16_mmx;
-        pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
-        pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
-        pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
-        pix_abs8x8    = pix_abs8x8_mmx;
-        pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
-        pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
-        pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
+        c->pix_abs16x16     = pix_abs16x16_mmx;
+        c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
+        c->pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
+        c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
+        c->pix_abs8x8     = pix_abs8x8_mmx;
+        c->pix_abs8x8_x2  = pix_abs8x8_x2_mmx;
+        c->pix_abs8x8_y2  = pix_abs8x8_y2_mmx;
+        c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx;
 
-        put_pixels_tab[0][0] = put_pixels16_mmx;
-        put_pixels_tab[0][1] = put_pixels16_x2_mmx;
-        put_pixels_tab[0][2] = put_pixels16_y2_mmx;
-        put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
+        c->put_pixels_tab[0][0] = put_pixels16_mmx;
+        c->put_pixels_tab[0][1] = put_pixels16_x2_mmx;
+        c->put_pixels_tab[0][2] = put_pixels16_y2_mmx;
+        c->put_pixels_tab[0][3] = put_pixels16_xy2_mmx;
 
-        put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
-        put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
-        put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
-        put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
+        c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmx;
+        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
+        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
+        c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_mmx;
 
-        avg_pixels_tab[0][0] = avg_pixels16_mmx;
-        avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
-        avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
-        avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
+        c->avg_pixels_tab[0][0] = avg_pixels16_mmx;
+        c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx;
+        c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx;
+        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
 
-        avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
-        avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
-        avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
-        avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
-        
-        put_pixels_tab[1][0] = put_pixels8_mmx;
-        put_pixels_tab[1][1] = put_pixels8_x2_mmx;
-        put_pixels_tab[1][2] = put_pixels8_y2_mmx;
-        put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
+        c->avg_no_rnd_pixels_tab[0][0] = avg_no_rnd_pixels16_mmx;
+        c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_mmx;
+        c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_mmx;
+        c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_mmx;
+
+        c->put_pixels_tab[1][0] = put_pixels8_mmx;
+        c->put_pixels_tab[1][1] = put_pixels8_x2_mmx;
+        c->put_pixels_tab[1][2] = put_pixels8_y2_mmx;
+        c->put_pixels_tab[1][3] = put_pixels8_xy2_mmx;
 
-        put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
-        put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
-        put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
-        put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
+        c->put_no_rnd_pixels_tab[1][0] = put_pixels8_mmx;
+        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
+        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
+        c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_mmx;
 
-        avg_pixels_tab[1][0] = avg_pixels8_mmx;
-        avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
-        avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
-        avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
+        c->avg_pixels_tab[1][0] = avg_pixels8_mmx;
+        c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx;
+        c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx;
+        c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
 
-        avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
-        avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
-        avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
-        avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
+        c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_mmx;
+        c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_mmx;
+        c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_mmx;
+        c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_mmx;
 
         if (mm_flags & MM_MMXEXT) {
-            pix_abs16x16    = pix_abs16x16_mmx2;
-            pix_abs16x16_x2 = pix_abs16x16_x2_mmx2;
-            pix_abs16x16_y2 = pix_abs16x16_y2_mmx2;
-            pix_abs16x16_xy2= pix_abs16x16_xy2_mmx2;
+            c->pix_abs16x16     = pix_abs16x16_mmx2;
+            c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx2;
+            c->pix_abs16x16_y2  = pix_abs16x16_y2_mmx2;
+            c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
 
-            pix_abs8x8    = pix_abs8x8_mmx2;
-            pix_abs8x8_x2 = pix_abs8x8_x2_mmx2;
-            pix_abs8x8_y2 = pix_abs8x8_y2_mmx2;
-            pix_abs8x8_xy2= pix_abs8x8_xy2_mmx2;
+            c->pix_abs8x8     = pix_abs8x8_mmx2;
+            c->pix_abs8x8_x2  = pix_abs8x8_x2_mmx2;
+            c->pix_abs8x8_y2  = pix_abs8x8_y2_mmx2;
+            c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
 
-            put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
-            put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
-            put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
-            put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
+            c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
+            c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
+            c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
+            c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
 
-            avg_pixels_tab[0][0] = avg_pixels16_mmx2;
-            avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
-            avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
-            avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
+            c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
+            c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
+            c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
+            c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
 
-            put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
-            put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
-            put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
-            put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
+            c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
+            c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
+            c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
+            c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
 
-            avg_pixels_tab[1][0] = avg_pixels8_mmx2;
-            avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
-            avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
-            avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+            c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
+            c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
+            c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
+            c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
         } else if (mm_flags & MM_3DNOW) {
-            put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
-            put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
-            put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
-            put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
+            c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
+            c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
+            c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
+            c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
 
-            avg_pixels_tab[0][0] = avg_pixels16_3dnow;
-            avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
-            avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
-            avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
-            
-            put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
-            put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
-            put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
-            put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
+            c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
+            c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
+            c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
+            c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
 
-            avg_pixels_tab[1][0] = avg_pixels8_3dnow;
-            avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
-            avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
-            avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
+            c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
+            c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
+            c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
+            c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
+
+            c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
+            c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
+            c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
+            c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
         }
     }
 
@@ -624,25 +624,24 @@
 /* remove any non bit exact operation (testing purpose). NOTE that
    this function should be kept as small as possible because it is
    always difficult to test automatically non bit exact cases. */
-void dsputil_set_bit_exact_mmx(void)
+void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask)
 {
     if (mm_flags & MM_MMX) {
-    
         /* MMX2 & 3DNOW */
-        put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
-        put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
-        avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
-        put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
-        put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
-        avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
+        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
+        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
+        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
+        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
+        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
+        c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
 
         if (mm_flags & MM_MMXEXT) {
-            pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
-            pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
-            pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
-            pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
-            pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
-            pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
+            c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
+            c->pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
+            c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
+            c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx;
+            c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx;
+            c->pix_abs8x8_xy2= pix_abs8x8_xy2_mmx;
         }
     }
 }
--- a/motion_est.c	Mon Nov 11 09:37:40 2002 +0000
+++ b/motion_est.c	Mon Nov 11 09:40:17 2002 +0000
@@ -88,8 +88,8 @@
     return s;
 }
 
-static void no_motion_search(MpegEncContext * s,
-			     int *mx_ptr, int *my_ptr)
+static inline void no_motion_search(MpegEncContext * s,
+				    int *mx_ptr, int *my_ptr)
 {
     *mx_ptr = 16 * s->mb_x;
     *my_ptr = 16 * s->mb_y;
@@ -123,7 +123,7 @@
     my = 0;
     for (y = y1; y <= y2; y++) {
 	for (x = x1; x <= x2; x++) {
-	    d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
+	    d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x,
 			     s->linesize);
 	    if (d < dmin ||
 		(d == dmin &&
@@ -188,7 +188,7 @@
     do {
 	for (y = y1; y <= y2; y += range) {
 	    for (x = x1; x <= x2; x += range) {
-		d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
+		d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
 		if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
 		    dmin = d;
 		    mx = x;
@@ -268,7 +268,7 @@
 
 	lastx = x;
 	for (x = x1; x <= x2; x += range) {
-	    d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
+	    d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
 	    if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
 		dminx = d;
 		mx = x;
@@ -277,7 +277,7 @@
 
 	x = lastx;
 	for (y = y1; y <= y2; y += range) {
-	    d = pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
+	    d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize);
 	    if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
 		dminy = d;
 		my = y;
@@ -324,7 +324,7 @@
     const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
     const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
     if(map[index]!=key){\
-        d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
+        d = s->dsp.pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
         d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
         COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
         map[index]= key;\
@@ -355,7 +355,7 @@
     const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
     const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
     if(map[index]!=key){\
-        d = pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
+        d = s->dsp.pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
         d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
         COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
         map[index]= key;\
@@ -590,7 +590,7 @@
     
     map_generation= update_map_generation(s);
 
-    dmin = pix_abs16x16(new_pic, old_pic, pic_stride);
+    dmin = s->dsp.pix_abs16x16(new_pic, old_pic, pic_stride);
     map[0]= map_generation;
     score_map[0]= dmin;
 
@@ -644,11 +644,11 @@
     if(s->me_method==ME_EPZS)
         dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, 
                                    pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
-                                   shift, map, score_map, map_generation, pix_abs16x16);
+				   shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
     else
         dmin=         cross_search(s, best, dmin, new_pic, old_pic, pic_stride, 
                                    pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
-                                   shift, map, score_map, map_generation, pix_abs16x16);
+                                   shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
 //check(best[0],best[1],0, b1)
     *mx_ptr= best[0];
     *my_ptr= best[1];    
@@ -683,7 +683,7 @@
 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 
     /* first line */
     if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
-        CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
+	CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
         CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
         CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
     }else{
@@ -705,11 +705,11 @@
     if(s->me_method==ME_EPZS)
         dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, 
                                    pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
-                                   shift, map, score_map, map_generation, pix_abs8x8);
+				   shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
     else
         dmin=         cross_search(s, best, dmin, new_pic, old_pic, pic_stride, 
                                    pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, 
-                                   shift, map, score_map, map_generation, pix_abs8x8);
+                                   shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
 
     *mx_ptr= best[0];
     *my_ptr= best[1];    
@@ -1023,8 +1023,8 @@
         dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture);
 
         dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 
-                                   pred_x4, pred_y4, ref_picture, pix_abs8x8_x2, 
-                                   pix_abs8x8_y2, pix_abs8x8_xy2, block);
+					  pred_x4, pred_y4, ref_picture, s->dsp.pix_abs8x8_x2,
+					  s->dsp.pix_abs8x8_y2, s->dsp.pix_abs8x8_xy2, block);
  
         s->motion_val[ s->block_index[block] ][0]= mx4;
         s->motion_val[ s->block_index[block] ][1]= my4;
@@ -1133,9 +1133,10 @@
     /* At this point (mx,my) are full-pell and the relative displacement */
     ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx);
     
-    sum = pix_sum(pix, s->linesize);
+    sum = s->dsp.pix_sum(pix, s->linesize);
     
-    varc = (pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
+    varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
+    // FIXME: MMX OPTIMIZE
     vard = (pix_norm(pix, ppix, s->linesize)+128)>>8;
 
 //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
@@ -1161,13 +1162,13 @@
         if (varc*2 + 200 > vard){
             mb_type|= MB_TYPE_INTER;
             if(s->me_method >= ME_EPZS)
-                fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
-                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
-                                           pix_abs16x16_xy2, 0);
+                fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+					   pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
+					   s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
             else
-                halfpel_motion_search(     s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
-                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
-                                           pix_abs16x16_xy2, 0);                                           
+                halfpel_motion_search(     s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+				           pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
+				           s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
         }else{
             mx <<=1;
             my <<=1;
@@ -1186,13 +1187,13 @@
             mb_type|= MB_TYPE_INTER;
             if (s->me_method != ME_ZERO) {
                 if(s->me_method >= ME_EPZS)
-                    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
-                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
-                                           pix_abs16x16_xy2, 0);
+		    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+                                           pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
+                                           s->dsp.pix_abs16x16_xy2, 0);
                 else
-                    dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
-                                           pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
-                                           pix_abs16x16_xy2, 0);
+                    dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+                                           pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
+                                           s->dsp.pix_abs16x16_xy2, 0);
                 if((s->flags&CODEC_FLAG_4MV)
                    && !s->skip_me && varc>50 && vard>10){
                     int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
@@ -1303,9 +1304,9 @@
         break;
     }
     
-    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 
-                                pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, 
-                                pix_abs16x16_xy2, 0);
+    dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
+                                pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
+                                s->dsp.pix_abs16x16_xy2, 0);
 //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
 //    s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
     mv_table[mot_xy][0]= mx;
@@ -1343,8 +1344,8 @@
         dxy&= 1;
 
     ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
-    put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
-    
+    s->dsp.put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
+
     fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale;
 
     dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
@@ -1356,11 +1357,11 @@
     src_y = clip(src_y, -16, s->height);
     if (src_y == s->height)
         dxy&= 1;
-            
+
     ptr = s->next_picture[0] + (src_y * s->linesize) + src_x;
-    avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
-    
-    fbmin += pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
+    s->dsp.avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
+
+    fbmin += s->dsp.pix_abs16x16(s->new_picture[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
     return fbmin;
 }
 
@@ -1443,7 +1444,7 @@
             if (src_y == height) dxy &= ~2;
 
             ptr = s->last_picture[0] + (src_y * s->linesize) + src_x;
-            put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
+            s->dsp.put_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
 
             dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
             src_x = (mb_x + bx) * 16 + (motion_bx >> 1);
@@ -1453,7 +1454,7 @@
             src_y = clip(src_y, -16, height);
             if (src_y == height) dxy &= ~2;
 
-            avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
+	    s->dsp.avg_pixels_tab[0][dxy](dest_y    , ptr    , s->linesize, 16);
         }
     }
 
--- a/mpeg12.c	Mon Nov 11 09:37:40 2002 +0000
+++ b/mpeg12.c	Mon Nov 11 09:40:17 2002 +0000
@@ -1623,7 +1623,7 @@
     s->mb_incr= 1;
 
     for(;;) {
-        clear_blocks(s->block[0]);
+	s->dsp.clear_blocks(s->block[0]);
         
         ret = mpeg_decode_mb(s, s->block);
         dprintf("ret=%d\n", ret);
--- a/mpegvideo.c	Mon Nov 11 09:37:40 2002 +0000
+++ b/mpegvideo.c	Mon Nov 11 09:40:17 2002 +0000
@@ -57,7 +57,7 @@
 /* for jpeg fast DCT */
 #define CONST_BITS 14
 
-static const unsigned short aanscales[64] = {
+static const uint16_t aanscales[64] = {
     /* precomputed values scaled up by 14 bits */
     16384, 22725, 21407, 19266, 16384, 12873,  8867,  4520,
     22725, 31521, 29692, 26722, 22725, 17855, 12299,  6270,
@@ -70,7 +70,7 @@
 };
 
 /* Input permutation for the simple_idct_mmx */
-static const UINT8 simple_mmx_permutation[64]={
+static const uint8_t simple_mmx_permutation[64]={
 	0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
 	0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
 	0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
@@ -81,7 +81,7 @@
 	0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
 };
 
-static UINT8 h263_chroma_roundtab[16] = {
+static const uint8_t h263_chroma_roundtab[16] = {
     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
 };
 
@@ -172,16 +172,19 @@
 }
 
 /* XXX: those functions should be suppressed ASAP when all IDCTs are
-   converted */
+ converted */
+// *FIXME* this is ugly hack using local static
+static void (*ff_put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
+static void (*ff_add_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size);
 static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block)
 {
     j_rev_dct (block);
-    put_pixels_clamped(block, dest, line_size);
+    ff_put_pixels_clamped(block, dest, line_size);
 }
 static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block)
 {
     j_rev_dct (block);
-    add_pixels_clamped(block, dest, line_size);
+    ff_add_pixels_clamped(block, dest, line_size);
 }
 
 /* init common dct for both encoder and decoder */
@@ -189,6 +192,9 @@
 {
     int i;
 
+    ff_put_pixels_clamped = s->dsp.put_pixels_clamped;
+    ff_add_pixels_clamped = s->dsp.add_pixels_clamped;
+
     s->dct_unquantize_h263 = dct_unquantize_h263_c;
     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
@@ -268,29 +274,30 @@
     UINT8 *pict;
     int y_size, c_size, yc_size, i;
 
+    dsputil_init(&s->dsp, s->avctx->dsp_mask);
     DCT_common_init(s);
-    
+
     s->flags= s->avctx->flags;
 
     s->mb_width = (s->width + 15) / 16;
     s->mb_height = (s->height + 15) / 16;
-    
+
+    /* set default edge pos, will be overriden in decode_header if needed */
+    s->h_edge_pos= s->mb_width*16;
+    s->v_edge_pos= s->mb_height*16;
+
+    s->mb_num = s->mb_width * s->mb_height;
+
     y_size = (2 * s->mb_width + 2) * (2 * s->mb_height + 2);
     c_size = (s->mb_width + 2) * (s->mb_height + 2);
     yc_size = y_size + 2 * c_size;
-    
-    /* set default edge pos, will be overriden in decode_header if needed */
-    s->h_edge_pos= s->mb_width*16;
-    s->v_edge_pos= s->mb_height*16;
-    
+
     /* convert fourcc to upper case */
     s->avctx->fourcc=   toupper( s->avctx->fourcc     &0xFF)          
                      + (toupper((s->avctx->fourcc>>8 )&0xFF)<<8 )
                      + (toupper((s->avctx->fourcc>>16)&0xFF)<<16) 
                      + (toupper((s->avctx->fourcc>>24)&0xFF)<<24);
 
-    s->mb_num = s->mb_width * s->mb_height;
-    
     if(!(s->flags&CODEC_FLAG_DR1)){
       s->linesize   = s->mb_width * 16 + 2 * EDGE_WIDTH;
       s->uvlinesize = s->mb_width * 8  +     EDGE_WIDTH;
@@ -1133,17 +1140,17 @@
     }
     
     if((motion_x|motion_y)&7){
-        ff_gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
-        ff_gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
+        s->dsp.gmc1(dest_y  , ptr  , linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
+        s->dsp.gmc1(dest_y+8, ptr+8, linesize, 16, motion_x&15, motion_y&15, 128 - s->no_rounding);
     }else{
         int dxy;
         
         dxy= ((motion_x>>3)&1) | ((motion_y>>2)&2);
         if (s->no_rounding){
-            put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
+	    s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y, ptr, linesize, 16);
         }else{
-            put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
-        }        
+            s->dsp.put_pixels_tab       [0][dxy](dest_y, ptr, linesize, 16);
+        }
     }
     
     if(s->flags&CODEC_FLAG_GRAY) return;
@@ -1167,14 +1174,14 @@
         emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
         ptr= s->edge_emu_buffer;
     }
-    ff_gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
+    s->dsp.gmc1(dest_cb + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
     
     ptr = ref_picture[2] + offset;
     if(emu){
         emulated_edge_mc(s, ptr, uvlinesize, 9, 9, src_x, src_y, s->h_edge_pos>>1, s->v_edge_pos>>1);
         ptr= s->edge_emu_buffer;
     }
-    ff_gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
+    s->dsp.gmc1(dest_cr + (dest_offset>>1), ptr, uvlinesize, 8, motion_x&15, motion_y&15, 128 - s->no_rounding);
     
     return;
 }
@@ -1199,14 +1206,14 @@
     ox= s->sprite_offset[0][0] + s->sprite_delta[0][0]*s->mb_x*16 + s->sprite_delta[0][1]*s->mb_y*16;
     oy= s->sprite_offset[0][1] + s->sprite_delta[1][0]*s->mb_x*16 + s->sprite_delta[1][1]*s->mb_y*16;
 
-    ff_gmc(dest_y, ptr, linesize, 16, 
+    s->dsp.gmc(dest_y, ptr, linesize, 16,
            ox, 
            oy, 
            s->sprite_delta[0][0], s->sprite_delta[0][1],
            s->sprite_delta[1][0], s->sprite_delta[1][1], 
            a+1, (1<<(2*a+1)) - s->no_rounding,
            s->h_edge_pos, s->v_edge_pos);
-    ff_gmc(dest_y+8, ptr, linesize, 16, 
+    s->dsp.gmc(dest_y+8, ptr, linesize, 16,
            ox + s->sprite_delta[0][0]*8, 
            oy + s->sprite_delta[1][0]*8, 
            s->sprite_delta[0][0], s->sprite_delta[0][1],
@@ -1224,7 +1231,7 @@
     oy= s->sprite_offset[1][1] + s->sprite_delta[1][0]*s->mb_x*8 + s->sprite_delta[1][1]*s->mb_y*8;
 
     ptr = ref_picture[1] + (src_offset>>1);
-    ff_gmc(dest_cb, ptr, uvlinesize, 8, 
+    s->dsp.gmc(dest_cb, ptr, uvlinesize, 8,
            ox, 
            oy, 
            s->sprite_delta[0][0], s->sprite_delta[0][1],
@@ -1233,7 +1240,7 @@
            s->h_edge_pos>>1, s->v_edge_pos>>1);
     
     ptr = ref_picture[2] + (src_offset>>1);
-    ff_gmc(dest_cr, ptr, uvlinesize, 8, 
+    s->dsp.gmc(dest_cr, ptr, uvlinesize, 8,
            ox, 
            oy, 
            s->sprite_delta[0][0], s->sprite_delta[0][1],
@@ -1248,7 +1255,7 @@
     int x, y;
     int start_y, start_x, end_y, end_x;
     UINT8 *buf= s->edge_emu_buffer;
-    
+
     if(src_y>= h){
         src+= (h-1-src_y)*linesize;
         src_y=h-1;
@@ -1860,17 +1867,17 @@
             /* decoding or more than one mb_type (MC was allready done otherwise) */
             if((!s->encoding) || (s->mb_type[mb_xy]&(s->mb_type[mb_xy]-1))){
                 if ((!s->no_rounding) || s->pict_type==B_TYPE){                
-                    op_pix = put_pixels_tab;
-                    op_qpix= put_qpel_pixels_tab;
+		    op_pix = s->dsp.put_pixels_tab;
+                    op_qpix= s->dsp.put_qpel_pixels_tab;
                 }else{
-                    op_pix = put_no_rnd_pixels_tab;
-                    op_qpix= put_no_rnd_qpel_pixels_tab;
+                    op_pix = s->dsp.put_no_rnd_pixels_tab;
+                    op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
                 }
 
                 if (s->mv_dir & MV_DIR_FORWARD) {
                     MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
-                    op_pix = avg_pixels_tab;
-                    op_qpix= avg_qpel_pixels_tab;
+		    op_pix = s->dsp.avg_pixels_tab;
+                    op_qpix= s->dsp.avg_qpel_pixels_tab;
                 }
                 if (s->mv_dir & MV_DIR_BACKWARD) {
                     MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
@@ -2224,10 +2231,10 @@
                 s->interlaced_dct=0;
         }
         
-        get_pixels(s->block[0], ptr                 , wrap_y);
-        get_pixels(s->block[1], ptr              + 8, wrap_y);
-        get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
-        get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
+	s->dsp.get_pixels(s->block[0], ptr                 , wrap_y);
+        s->dsp.get_pixels(s->block[1], ptr              + 8, wrap_y);
+        s->dsp.get_pixels(s->block[2], ptr + dct_offset    , wrap_y);
+        s->dsp.get_pixels(s->block[3], ptr + dct_offset + 8, wrap_y);
 
         if(s->flags&CODEC_FLAG_GRAY){
             skip_dct[4]= 1;
@@ -2239,14 +2246,14 @@
                 emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
                 ptr= s->edge_emu_buffer;
             }
-            get_pixels(s->block[4], ptr, wrap_c);
+	    s->dsp.get_pixels(s->block[4], ptr, wrap_c);
 
             ptr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
             if(emu){
                 emulated_edge_mc(s, ptr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
                 ptr= s->edge_emu_buffer;
             }
-            get_pixels(s->block[5], ptr, wrap_c);
+            s->dsp.get_pixels(s->block[5], ptr, wrap_c);
         }
     }else{
         op_pixels_func (*op_pix)[4];
@@ -2266,17 +2273,17 @@
         ptr_cr = s->new_picture[2] + (mb_y * 8 * wrap_c) + mb_x * 8;
 
         if ((!s->no_rounding) || s->pict_type==B_TYPE){
-            op_pix = put_pixels_tab;
-            op_qpix= put_qpel_pixels_tab;
+	    op_pix = s->dsp.put_pixels_tab;
+            op_qpix= s->dsp.put_qpel_pixels_tab;
         }else{
-            op_pix = put_no_rnd_pixels_tab;
-            op_qpix= put_no_rnd_qpel_pixels_tab;
+            op_pix = s->dsp.put_no_rnd_pixels_tab;
+            op_qpix= s->dsp.put_no_rnd_qpel_pixels_tab;
         }
 
         if (s->mv_dir & MV_DIR_FORWARD) {
             MPV_motion(s, dest_y, dest_cb, dest_cr, 0, s->last_picture, op_pix, op_qpix);
-            op_pix = avg_pixels_tab;
-            op_qpix= avg_qpel_pixels_tab;
+            op_pix = s->dsp.avg_pixels_tab;
+            op_qpix= s->dsp.avg_qpel_pixels_tab;
         }
         if (s->mv_dir & MV_DIR_BACKWARD) {
             MPV_motion(s, dest_y, dest_cb, dest_cr, 1, s->next_picture, op_pix, op_qpix);
@@ -2305,10 +2312,10 @@
                 s->interlaced_dct=0;
         }
         
-        diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
-        diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
-        diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
-        diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
+	s->dsp.diff_pixels(s->block[0], ptr_y                 , dest_y                 , wrap_y);
+        s->dsp.diff_pixels(s->block[1], ptr_y              + 8, dest_y              + 8, wrap_y);
+        s->dsp.diff_pixels(s->block[2], ptr_y + dct_offset    , dest_y + dct_offset    , wrap_y);
+        s->dsp.diff_pixels(s->block[3], ptr_y + dct_offset + 8, dest_y + dct_offset + 8, wrap_y);
         
         if(s->flags&CODEC_FLAG_GRAY){
             skip_dct[4]= 1;
@@ -2318,23 +2325,23 @@
                 emulated_edge_mc(s, ptr_cb, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
                 ptr_cb= s->edge_emu_buffer;
             }
-            diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
+            s->dsp.diff_pixels(s->block[4], ptr_cb, dest_cb, wrap_c);
             if(emu){
                 emulated_edge_mc(s, ptr_cr, wrap_c, 8, 8, mb_x*8, mb_y*8, s->width>>1, s->height>>1);
                 ptr_cr= s->edge_emu_buffer;
             }
-            diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
+            s->dsp.diff_pixels(s->block[5], ptr_cr, dest_cr, wrap_c);
         }
 
         /* pre quantization */         
         if(s->mc_mb_var[s->mb_width*mb_y+ mb_x]<2*s->qscale*s->qscale){
             //FIXME optimize
-            if(pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
-            if(pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
-            if(pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
-            if(pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
-            if(pix_abs8x8(ptr_cb              , dest_cb              , wrap_y) < 20*s->qscale) skip_dct[4]= 1;
-            if(pix_abs8x8(ptr_cr              , dest_cr              , wrap_y) < 20*s->qscale) skip_dct[5]= 1;
+	    if(s->dsp.pix_abs8x8(ptr_y               , dest_y               , wrap_y) < 20*s->qscale) skip_dct[0]= 1;
+            if(s->dsp.pix_abs8x8(ptr_y            + 8, dest_y            + 8, wrap_y) < 20*s->qscale) skip_dct[1]= 1;
+            if(s->dsp.pix_abs8x8(ptr_y +dct_offset   , dest_y +dct_offset   , wrap_y) < 20*s->qscale) skip_dct[2]= 1;
+            if(s->dsp.pix_abs8x8(ptr_y +dct_offset+ 8, dest_y +dct_offset+ 8, wrap_y) < 20*s->qscale) skip_dct[3]= 1;
+            if(s->dsp.pix_abs8x8(ptr_cb              , dest_cb              , wrap_y) < 20*s->qscale) skip_dct[4]= 1;
+            if(s->dsp.pix_abs8x8(ptr_cr              , dest_cr              , wrap_y) < 20*s->qscale) skip_dct[5]= 1;
 #if 0
 {
  static int stat[7];
@@ -2601,9 +2608,9 @@
                     int yy = mb_y * 16;
                     uint8_t *pix = s->new_picture[0] + (yy * s->linesize) + xx;
                     int varc;
-                    int sum = pix_sum(pix, s->linesize);
+		    int sum = s->dsp.pix_sum(pix, s->linesize);
     
-                    varc = (pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
+		    varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
 
                     s->mb_var [s->mb_width * mb_y + mb_x] = varc;
                     s->mb_mean[s->mb_width * mb_y + mb_x] = (sum+128)>>8;
--- a/mpegvideo.h	Mon Nov 11 09:37:40 2002 +0000
+++ b/mpegvideo.h	Mon Nov 11 09:40:17 2002 +0000
@@ -221,6 +221,7 @@
     int unrestricted_mv;
     int h263_long_vectors; /* use horrible h263v1 long vector mode */
 
+    DSPContext dsp;             /* pointers for accelerated dsp fucntions */
     int f_code; /* forward MV resolution */
     int b_code; /* backward MV resolution for B Frames (mpeg4) */
     INT16 (*motion_val)[2];            /* used for MV prediction (4MV per MB) */
--- a/rv10.c	Mon Nov 11 09:37:40 2002 +0000
+++ b/rv10.c	Mon Nov 11 09:40:17 2002 +0000
@@ -447,7 +447,7 @@
         printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y);
 #endif
         
-        clear_blocks(s->block[0]);
+	s->dsp.clear_blocks(s->block[0]);
         s->mv_dir = MV_DIR_FORWARD;
         s->mv_type = MV_TYPE_16X16; 
         if (ff_h263_decode_mb(s, s->block) == SLICE_ERROR) {
--- a/svq1.c	Mon Nov 11 09:37:40 2002 +0000
+++ b/svq1.c	Mon Nov 11 09:40:17 2002 +0000
@@ -804,7 +804,7 @@
   }
 }
 
-static int svq1_motion_inter_block (bit_buffer_t *bitbuf,
+static int svq1_motion_inter_block (MpegEncContext *s, bit_buffer_t *bitbuf,
 			       uint8_t *current, uint8_t *previous, int pitch,
 			       svq1_pmv_t *motion, int x, int y) {
   uint8_t    *src;
@@ -839,12 +839,12 @@
   src = &previous[(x + (mv.x >> 1)) + (y + (mv.y >> 1))*pitch];
   dst = current;
 
-  put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
+  s->dsp.put_pixels_tab[0][((mv.y & 1) << 1) | (mv.x & 1)](dst,src,pitch,16);
 
   return 0;
 }
 
-static int svq1_motion_inter_4v_block (bit_buffer_t *bitbuf,
+static int svq1_motion_inter_4v_block (MpegEncContext *s, bit_buffer_t *bitbuf,
 				  uint8_t *current, uint8_t *previous, int pitch,
 				  svq1_pmv_t *motion,int x, int y) {
   uint8_t    *src;
@@ -906,7 +906,7 @@
     src = &previous[(x + (pmv[i]->x >> 1)) + (y + (pmv[i]->y >> 1))*pitch];
     dst = current;
 
-    put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8);
+    s->dsp.put_pixels_tab[1][((pmv[i]->y & 1) << 1) | (pmv[i]->x & 1)](dst,src,pitch,8);
 
     /* select next block */
     if (i & 1) {
@@ -921,7 +921,7 @@
   return 0;
 }
 
-static int svq1_decode_delta_block (bit_buffer_t *bitbuf,
+static int svq1_decode_delta_block (MpegEncContext *s, bit_buffer_t *bitbuf,
 			uint8_t *current, uint8_t *previous, int pitch,
 			svq1_pmv_t *motion, int x, int y) {
   uint32_t bit_cache;
@@ -951,7 +951,7 @@
     break;
 
   case SVQ1_BLOCK_INTER:
-    result = svq1_motion_inter_block (bitbuf, current, previous, pitch, motion, x, y);
+    result = svq1_motion_inter_block (s, bitbuf, current, previous, pitch, motion, x, y);
 
     if (result != 0)
     {
@@ -964,7 +964,7 @@
     break;
 
   case SVQ1_BLOCK_INTER_4V:
-    result = svq1_motion_inter_4v_block (bitbuf, current, previous, pitch, motion, x, y);
+    result = svq1_motion_inter_4v_block (s, bitbuf, current, previous, pitch, motion, x, y);
 
     if (result != 0)
     {
@@ -1142,8 +1142,8 @@
 
       for (y=0; y < height; y+=16) {
 	for (x=0; x < width; x+=16) {
-	  result = svq1_decode_delta_block (&s->gb, &current[x], previous,
-				       linesize, pmv, x, y);
+	  result = svq1_decode_delta_block (s, &s->gb, &current[x], previous,
+					    linesize, pmv, x, y);
 	  if (result != 0)
 	  {
 #ifdef DEBUG_SVQ1