# HG changeset patch
# User michaelni
# Date 1046703240 0
# Node ID f59c3f66363be9c76bfa856d37a4043c2a7c6804
# Parent  03df246fb06b214d899152bf566f557412862bf2
MpegEncContext.(i)dct_* -> DspContext.(i)dct_*
bitexact cleanup

diff -r 03df246fb06b -r f59c3f66363b alpha/dsputil_alpha.c
--- a/alpha/dsputil_alpha.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/alpha/dsputil_alpha.c	Mon Mar 03 14:54:00 2003 +0000
@@ -20,6 +20,9 @@
 #include "asm.h"
 #include "../dsputil.h"
 
+extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block);
+extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block);
+
 void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels,
                         int line_size, int h);
 void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels,
@@ -295,7 +298,7 @@
     return pix_abs8x8_mvi(a, b, stride);
 }
 
-void dsputil_init_alpha(DSPContext* c, unsigned mask)
+void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx)
 {
     c->put_pixels_tab[0][0] = put_pixels16_axp_asm;
     c->put_pixels_tab[0][1] = put_pixels16_x2_axp;
@@ -357,4 +360,7 @@
 
     put_pixels_clamped_axp_p = c->put_pixels_clamped;
     add_pixels_clamped_axp_p = c->add_pixels_clamped;
+    
+    c->idct_put = simple_idct_put_axp;
+    c->idct_add = simple_idct_add_axp;
 }
diff -r 03df246fb06b -r f59c3f66363b alpha/mpegvideo_alpha.c
--- a/alpha/mpegvideo_alpha.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/alpha/mpegvideo_alpha.c	Mon Mar 03 14:54:00 2003 +0000
@@ -21,9 +21,6 @@
 #include "../dsputil.h"
 #include "../mpegvideo.h"
 
-extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block);
-extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block);
-  
 static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block,
                                     int n, int qscale)
 {
@@ -97,6 +94,4 @@
 void MPV_common_init_axp(MpegEncContext *s)
 {
     s->dct_unquantize_h263 = dct_unquantize_h263_axp;
-    s->idct_put = simple_idct_put_axp;
-    s->idct_add = simple_idct_add_axp;
 }
diff -r 03df246fb06b -r f59c3f66363b armv4l/dsputil_arm.c
--- a/armv4l/dsputil_arm.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/armv4l/dsputil_arm.c	Mon Mar 03 14:54:00 2003 +0000
@@ -21,7 +21,33 @@
 
 extern void j_rev_dct_ARM(DCTELEM *data);
 
-void dsputil_init_armv4l(DSPContext* c, unsigned mask)
+/* XXX: local hack */
+static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
+static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
+
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+   converted */
+static void arm_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct_ARM (block);
+    ff_put_pixels_clamped(block, dest, line_size);
+}
+static void arm_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
 {
-//  ff_idct = j_rev_dct_ARM;
+    j_rev_dct_ARM (block);
+    ff_add_pixels_clamped(block, dest, line_size);
 }
+
+void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx)
+{
+    const int idct_algo= avctx->idct_algo;
+
+    ff_put_pixels_clamped = c->put_pixels_clamped;
+    ff_add_pixels_clamped = c->add_pixels_clamped;
+
+    if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_ARM){
+        c->idct_put= arm_idct_put;
+        c->idct_add= arm_idct_add;
+        c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
+    }
+}
diff -r 03df246fb06b -r f59c3f66363b armv4l/mpegvideo_arm.c
--- a/armv4l/mpegvideo_arm.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/armv4l/mpegvideo_arm.c	Mon Mar 03 14:54:00 2003 +0000
@@ -21,35 +21,6 @@
 #include "../mpegvideo.h"
 #include "../avcodec.h"
 
-extern void j_rev_dct_ARM(DCTELEM *data);
-/* XXX: local hack */
-static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
-static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
-
-/* XXX: those functions should be suppressed ASAP when all IDCTs are
-   converted */
-static void arm_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    j_rev_dct_ARM (block);
-    ff_put_pixels_clamped(block, dest, line_size);
-}
-static void arm_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    j_rev_dct_ARM (block);
-    ff_add_pixels_clamped(block, dest, line_size);
-}
-
 void MPV_common_init_armv4l(MpegEncContext *s)
 {
-    int i;
-    const int idct_algo= s->avctx->idct_algo;
-
-    ff_put_pixels_clamped = s->dsp.put_pixels_clamped;
-    ff_add_pixels_clamped = s->dsp.add_pixels_clamped;
-
-    if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_ARM){
-        s->idct_put= arm_idct_put;
-        s->idct_add= arm_idct_add;
-        s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */
-    }
 }
diff -r 03df246fb06b -r f59c3f66363b avcodec.h
--- a/avcodec.h	Sat Mar 01 00:16:00 2003 +0000
+++ b/avcodec.h	Mon Mar 03 14:54:00 2003 +0000
@@ -16,8 +16,8 @@
 
 #define LIBAVCODEC_VERSION_INT 0x000406
 #define LIBAVCODEC_VERSION     "0.4.6"
-#define LIBAVCODEC_BUILD       4659
-#define LIBAVCODEC_BUILD_STR   "4659"
+#define LIBAVCODEC_BUILD       4660
+#define LIBAVCODEC_BUILD_STR   "4660"
 
 enum CodecID {
     CODEC_ID_NONE, 
@@ -159,6 +159,7 @@
 #define CODEC_FLAG_ALT_SCAN       0x00100000 /* use alternate scan */
 #define CODEC_FLAG_TRELLIS_QUANT  0x00200000 /* use trellis quantization */
 #define CODEC_FLAG_GLOBAL_HEADER  0x00400000 /* place global headers in extradata instead of every keyframe */
+#define CODEC_FLAG_BITEXACT       0x00800000 /* use only bitexact stuff (except (i)dct) */
 
 /* codec capabilities */
 
@@ -1167,8 +1168,6 @@
 unsigned avcodec_build(void);
 void avcodec_init(void);
 
-void avcodec_set_bit_exact(void);
-
 void register_avcodec(AVCodec *format);
 AVCodec *avcodec_find_encoder(enum CodecID id);
 AVCodec *avcodec_find_encoder_by_name(const char *name);
diff -r 03df246fb06b -r f59c3f66363b dsputil.c
--- a/dsputil.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/dsputil.c	Mon Mar 03 14:54:00 2003 +0000
@@ -21,8 +21,8 @@
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
+#include "simple_idct.h"
 
-int ff_bit_exact=0;
 
 uint8_t cropTbl[256 + 2 * MAX_NEG_CROP];
 uint32_t squareTbl[512];
@@ -99,6 +99,18 @@
   17318417,   17248865,   17179870,   17111424,   17043522,   16976156,   16909321,   16843010,
 };
 
+/* Input permutation for the simple_idct_mmx */
+static const uint8_t simple_mmx_permutation[64]={
+	0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
+	0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
+	0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
+	0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
+	0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
+	0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
+	0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
+	0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
+};
+
 static int pix_sum_c(uint8_t * pix, int line_size)
 {
     int s, i, j;
@@ -1787,7 +1799,7 @@
     int sum=0, i;
 
     s->dsp.diff_pixels(temp, src1, src2, stride);
-    s->fdct(temp);
+    s->dsp.fdct(temp);
 
     for(i=0; i<64; i++)
         sum+= ABS(temp[i]);
@@ -1887,7 +1899,7 @@
         s->dct_unquantize(s, temp, 0, s->qscale);
     }
     
-    s->idct_add(bak, stride, temp);
+    s->dsp.idct_add(bak, stride, temp);
     
     distoration= s->dsp.sse[1](NULL, bak, src1, stride);
 
@@ -1959,7 +1971,20 @@
 WARPER88_1616(rd8x8_c, rd16x16_c)
 WARPER88_1616(bit8x8_c, bit16x16_c)
 
-void dsputil_init(DSPContext* c, unsigned mask)
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+ converted */
+static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    put_pixels_clamped_c(block, dest, line_size);
+}
+static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    j_rev_dct (block);
+    add_pixels_clamped_c(block, dest, line_size);
+}
+
+void dsputil_init(DSPContext* c, AVCodecContext *avctx)
 {
     static int init_done = 0;
     int i;
@@ -1980,6 +2005,23 @@
 	init_done = 1;
     }
 
+#ifdef CONFIG_ENCODERS
+    if(avctx->dct_algo==FF_DCT_FASTINT)
+        c->fdct = fdct_ifast;
+    else
+        c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
+#endif //CONFIG_ENCODERS
+
+    if(avctx->idct_algo==FF_IDCT_INT){
+        c->idct_put= ff_jref_idct_put;
+        c->idct_add= ff_jref_idct_add;
+        c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+    }else{ //accurate/default
+        c->idct_put= simple_idct_put;
+        c->idct_add= simple_idct_add;
+        c->idct_permutation_type= FF_NO_IDCT_PERM;
+    }
+
     c->get_pixels = get_pixels_c;
     c->diff_pixels = diff_pixels_c;
     c->put_pixels_clamped = put_pixels_clamped_c;
@@ -2082,37 +2124,43 @@
     c->diff_bytes= diff_bytes_c;
 
 #ifdef HAVE_MMX
-    dsputil_init_mmx(c, mask);
-    if (ff_bit_exact)
-    {
-        /* FIXME - AVCodec context should have flag for bitexact match */
-	/* fprintf(stderr, "\n\n\nff_bit_exact %d\n\n\n\n", ff_bit_exact); */
-	dsputil_set_bit_exact_mmx(c, mask);
-    }
+    dsputil_init_mmx(c, avctx);
 #endif
 #ifdef ARCH_ARMV4L
-    dsputil_init_armv4l(c, mask);
+    dsputil_init_armv4l(c, avctx);
 #endif
 #ifdef HAVE_MLIB
-    dsputil_init_mlib(c, mask);
+    dsputil_init_mlib(c, avctx);
 #endif
 #ifdef ARCH_ALPHA
-    dsputil_init_alpha(c, mask);
+    dsputil_init_alpha(c, avctx);
 #endif
 #ifdef ARCH_POWERPC
-    dsputil_init_ppc(c, mask);
+    dsputil_init_ppc(c, avctx);
 #endif
 #ifdef HAVE_MMI
-    dsputil_init_mmi(c, mask);
+    dsputil_init_mmi(c, avctx);
 #endif
+
+    switch(c->idct_permutation_type){
+    case FF_NO_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= i;
+        break;
+    case FF_LIBMPEG2_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
+        break;
+    case FF_SIMPLE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= simple_mmx_permutation[i];
+        break;
+    case FF_TRANSPOSE_IDCT_PERM:
+        for(i=0; i<64; i++)
+            c->idct_permutation[i]= ((i&7)<<3) | (i>>3);
+        break;
+    default:
+        fprintf(stderr, "Internal error, IDCT permutation not set\n");
+    }
 }
 
-/* remove any non bit exact operation (testing purpose) */
-void avcodec_set_bit_exact(void)
-{
-    ff_bit_exact=1;
-#ifdef HAVE_MMX
-// FIXME - better set_bit_exact
-//    dsputil_set_bit_exact_mmx();
-#endif
-}
diff -r 03df246fb06b -r f59c3f66363b dsputil.h
--- a/dsputil.h	Sat Mar 01 00:16:00 2003 +0000
+++ b/dsputil.h	Mon Mar 03 14:54:00 2003 +0000
@@ -149,9 +149,21 @@
     /* huffyuv specific */
     void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w);
     void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w);
+    
+    /* (I)DCT */
+    void (*fdct)(DCTELEM *block/* align 16*/);
+    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
+    uint8_t idct_permutation[64];
+    int idct_permutation_type;
+#define FF_NO_IDCT_PERM 1
+#define FF_LIBMPEG2_IDCT_PERM 2
+#define FF_SIMPLE_IDCT_PERM 3
+#define FF_TRANSPOSE_IDCT_PERM 4
+
 } DSPContext;
 
-void dsputil_init(DSPContext* p, unsigned mask);
+void dsputil_init(DSPContext* p, AVCodecContext *avctx);
 
 /**
  * permute block according to permuatation.
@@ -194,11 +206,8 @@
 
 #define __align8 __attribute__ ((aligned (8)))
 
-void dsputil_init_mmx(DSPContext* c, unsigned mask);
-void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask);
-
-void dsputil_init_pix_mmx(DSPContext* c, unsigned mask);
-void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask);
+void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx);
+void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx);
 
 #elif defined(ARCH_ARMV4L)
 
@@ -206,20 +215,20 @@
    line ptimizations */
 #define __align8 __attribute__ ((aligned (4)))
 
-void dsputil_init_armv4l(DSPContext* c, unsigned mask);
+void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx);
 
 #elif defined(HAVE_MLIB)
 
 /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */
 #define __align8 __attribute__ ((aligned (8)))
 
-void dsputil_init_mlib(DSPContext* c, unsigned mask);
+void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx);
 
 #elif defined(ARCH_ALPHA)
 
 #define __align8 __attribute__ ((aligned (8)))
 
-void dsputil_init_alpha(DSPContext* c, unsigned mask);
+void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx);
 
 #elif defined(ARCH_POWERPC)
 
@@ -233,13 +242,13 @@
 
 #define __align8 __attribute__ ((aligned (16)))
 
-void dsputil_init_ppc(DSPContext* c, unsigned mask);
+void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx);
 
 #elif defined(HAVE_MMI)
 
 #define __align8 __attribute__ ((aligned (16)))
 
-void dsputil_init_mmi(DSPContext* c, unsigned mask);
+void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx);
 
 #else
 
diff -r 03df246fb06b -r f59c3f66363b dv.c
--- a/dv.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/dv.c	Mon Mar 03 14:54:00 2003 +0000
@@ -115,12 +115,12 @@
     /* XXX: fix it */
     memset(&s2, 0, sizeof(MpegEncContext));
     s2.avctx = avctx;
-    dsputil_init(&s2.dsp, avctx->dsp_mask);
+    dsputil_init(&s2.dsp, avctx);
     if (DCT_common_init(&s2) < 0)
        return -1;
 
-    s->idct_put[0] = s2.idct_put;
-    memcpy(s->idct_permutation, s2.idct_permutation, 64);
+    s->idct_put[0] = s2.dsp.idct_put;
+    memcpy(s->idct_permutation, s2.dsp.idct_permutation, 64);
     memcpy(s->dv_zigzag[0], s2.intra_scantable.permutated, 64);
 
     /* XXX: use MMX also for idct248 */
diff -r 03df246fb06b -r f59c3f66363b h263.c
--- a/h263.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/h263.c	Mon Mar 03 14:54:00 2003 +0000
@@ -297,19 +297,19 @@
             if(s->mb_y==0 || s->qscale == qscale_table[xy] || n==2 || n==3){
                 /* same qscale */
                 for(i=1; i<8; i++){
-                    const int level= block[n][s->idct_permutation[i   ]];
+                    const int level= block[n][s->dsp.idct_permutation[i   ]];
                     score0+= ABS(level);
                     score1+= ABS(level - ac_val[i+8]);
-                    ac_val1[i  ]=    block[n][s->idct_permutation[i<<3]];
+                    ac_val1[i  ]=    block[n][s->dsp.idct_permutation[i<<3]];
                     ac_val1[i+8]= level;
                 }
             }else{
                 /* different qscale, we must rescale */
                 for(i=1; i<8; i++){
-                    const int level= block[n][s->idct_permutation[i   ]];
+                    const int level= block[n][s->dsp.idct_permutation[i   ]];
                     score0+= ABS(level);
                     score1+= ABS(level - ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale));
-                    ac_val1[i  ]=    block[n][s->idct_permutation[i<<3]];
+                    ac_val1[i  ]=    block[n][s->dsp.idct_permutation[i<<3]];
                     ac_val1[i+8]= level;
                 }
             }
@@ -320,20 +320,20 @@
             if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){
                 /* same qscale */
                 for(i=1; i<8; i++){
-                    const int level= block[n][s->idct_permutation[i<<3]];
+                    const int level= block[n][s->dsp.idct_permutation[i<<3]];
                     score0+= ABS(level);
                     score1+= ABS(level - ac_val[i]);
                     ac_val1[i  ]= level;
-                    ac_val1[i+8]=    block[n][s->idct_permutation[i   ]];
+                    ac_val1[i+8]=    block[n][s->dsp.idct_permutation[i   ]];
                 }
             }else{
                 /* different qscale, we must rescale */
                 for(i=1; i<8; i++){
-                    const int level= block[n][s->idct_permutation[i<<3]];
+                    const int level= block[n][s->dsp.idct_permutation[i<<3]];
                     score0+= ABS(level);
                     score1+= ABS(level - ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale));
                     ac_val1[i  ]= level;
-                    ac_val1[i+8]=    block[n][s->idct_permutation[i   ]];
+                    ac_val1[i+8]=    block[n][s->dsp.idct_permutation[i   ]];
                 }
             }
         }
@@ -831,10 +831,10 @@
 
                 if(dir[i]){
                     for(j=1; j<8; j++) 
-                        block[i][s->idct_permutation[j   ]]= ac_val[j+8];
+                        block[i][s->dsp.idct_permutation[j   ]]= ac_val[j+8];
                 }else{
                     for(j=1; j<8; j++) 
-                        block[i][s->idct_permutation[j<<3]]= ac_val[j  ];
+                        block[i][s->dsp.idct_permutation[j<<3]]= ac_val[j  ];
                 }
                 s->block_last_index[i]= zigzag_last_index[i];
             }
@@ -1059,7 +1059,7 @@
             if (a != 1024) {
                 ac_val -= 16;
                 for(i=1;i<8;i++) {
-                    block[s->idct_permutation[i<<3]] += ac_val[i];
+                    block[s->dsp.idct_permutation[i<<3]] += ac_val[i];
                 }
                 pred_dc = a;
             }
@@ -1068,7 +1068,7 @@
             if (c != 1024) {
                 ac_val -= 16 * wrap;
                 for(i=1;i<8;i++) {
-                    block[s->idct_permutation[i   ]] += ac_val[i + 8];
+                    block[s->dsp.idct_permutation[i   ]] += ac_val[i + 8];
                 }
                 pred_dc = c;
             }
@@ -1096,10 +1096,10 @@
     
     /* left copy */
     for(i=1;i<8;i++)
-        ac_val1[i    ] = block[s->idct_permutation[i<<3]];
+        ac_val1[i    ] = block[s->dsp.idct_permutation[i<<3]];
     /* top copy */
     for(i=1;i<8;i++)
-        ac_val1[8 + i] = block[s->idct_permutation[i   ]];
+        ac_val1[8 + i] = block[s->dsp.idct_permutation[i   ]];
 }
 
 int16_t *h263_pred_motion(MpegEncContext * s, int block, 
@@ -1752,7 +1752,7 @@
     ff_mpeg4_stuffing(&s->pb);
 
     /* user data */
-    if(!ff_bit_exact){
+    if(!(s->flags & CODEC_FLAG_BITEXACT)){
         put_bits(&s->pb, 16, 0);
         put_bits(&s->pb, 16, 0x1B2);	/* user_data */
         sprintf(buf, "FFmpeg%sb%s", FFMPEG_VERSION, LIBAVCODEC_BUILD_STR);
@@ -1926,12 +1926,12 @@
             if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){
                 /* same qscale */
                 for(i=1;i<8;i++) {
-                    block[s->idct_permutation[i<<3]] += ac_val[i];
+                    block[s->dsp.idct_permutation[i<<3]] += ac_val[i];
                 }
             }else{
                 /* different qscale, we must rescale */
                 for(i=1;i<8;i++) {
-                    block[s->idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale);
+                    block[s->dsp.idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale);
                 }
             }
         } else {
@@ -1942,23 +1942,23 @@
             if(s->mb_y==0 || s->qscale == qscale_table[xy] || n==2 || n==3){
                 /* same qscale */
                 for(i=1;i<8;i++) {
-                    block[s->idct_permutation[i]] += ac_val[i + 8];
+                    block[s->dsp.idct_permutation[i]] += ac_val[i + 8];
                 }
             }else{
                 /* different qscale, we must rescale */
                 for(i=1;i<8;i++) {
-                    block[s->idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale);
+                    block[s->dsp.idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale);
                 }
             }
         }
     }
     /* left copy */
     for(i=1;i<8;i++)
-        ac_val1[i    ] = block[s->idct_permutation[i<<3]];
+        ac_val1[i    ] = block[s->dsp.idct_permutation[i<<3]];
 
     /* top copy */
     for(i=1;i<8;i++)
-        ac_val1[8 + i] = block[s->idct_permutation[i   ]];
+        ac_val1[8 + i] = block[s->dsp.idct_permutation[i   ]];
 
 }
 
@@ -1981,12 +1981,12 @@
         if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){
             /* same qscale */
             for(i=1;i<8;i++) {
-                block[s->idct_permutation[i<<3]] -= ac_val[i];
+                block[s->dsp.idct_permutation[i<<3]] -= ac_val[i];
             }
         }else{
             /* different qscale, we must rescale */
             for(i=1;i<8;i++) {
-                block[s->idct_permutation[i<<3]] -= ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale);
+                block[s->dsp.idct_permutation[i<<3]] -= ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale);
             }
         }
     } else {
@@ -1996,12 +1996,12 @@
         if(s->mb_y==0 || s->qscale == qscale_table[xy] || n==2 || n==3){
             /* same qscale */
             for(i=1;i<8;i++) {
-                block[s->idct_permutation[i]] -= ac_val[i + 8];
+                block[s->dsp.idct_permutation[i]] -= ac_val[i + 8];
             }
         }else{
             /* different qscale, we must rescale */
             for(i=1;i<8;i++) {
-                block[s->idct_permutation[i]] -= ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale);
+                block[s->dsp.idct_permutation[i]] -= ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale);
             }
         }
     }
@@ -4406,7 +4406,7 @@
             
             /* load default matrixes */
             for(i=0; i<64; i++){
-                int j= s->idct_permutation[i];
+                int j= s->dsp.idct_permutation[i];
                 v= ff_mpeg4_default_intra_matrix[i];
                 s->intra_matrix[j]= v;
                 s->chroma_intra_matrix[j]= v;
@@ -4425,14 +4425,14 @@
                     if(v==0) break;
                     
                     last= v;
-                    j= s->idct_permutation[ ff_zigzag_direct[i] ];
+                    j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
                     s->intra_matrix[j]= v;
                     s->chroma_intra_matrix[j]= v;
                 }
 
                 /* replicate last value */
                 for(; i<64; i++){
-		    int j= s->idct_permutation[ ff_zigzag_direct[i] ];
+		    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
                     s->intra_matrix[j]= v;
                     s->chroma_intra_matrix[j]= v;
                 }
@@ -4447,14 +4447,14 @@
                     if(v==0) break;
 
                     last= v;
-                    j= s->idct_permutation[ ff_zigzag_direct[i] ];
+                    j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
                     s->inter_matrix[j]= v;
                     s->chroma_inter_matrix[j]= v;
                 }
 
                 /* replicate last value */
                 for(; i<64; i++){
-		    int j= s->idct_permutation[ ff_zigzag_direct[i] ];
+		    int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
                     s->inter_matrix[j]= last;
                     s->chroma_inter_matrix[j]= last;
                 }
diff -r 03df246fb06b -r f59c3f66363b i386/dsputil_mmx.c
--- a/i386/dsputil_mmx.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/i386/dsputil_mmx.c	Mon Mar 03 14:54:00 2003 +0000
@@ -20,6 +20,7 @@
  */
 
 #include "../dsputil.h"
+#include "../simple_idct.h"
 
 int mm_flags; /* multimedia extension flags */
 
@@ -1408,8 +1409,35 @@
     c->put_ ## postfix1 = put_ ## postfix2;\
     c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\
     c->avg_ ## postfix1 = avg_ ## postfix2;
+
+/* external functions, from idct_mmx.c */
+void ff_mmx_idct(DCTELEM *block);
+void ff_mmxext_idct(DCTELEM *block);
+
+/* XXX: those functions should be suppressed ASAP when all IDCTs are
+   converted */
+static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_mmx_idct (block);
+    put_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_mmx_idct (block);
+    add_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_mmxext_idct (block);
+    put_pixels_clamped_mmx(block, dest, line_size);
+}
+static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
+{
+    ff_mmxext_idct (block);
+    add_pixels_clamped_mmx(block, dest, line_size);
+}
     
-void dsputil_init_mmx(DSPContext* c, unsigned mask)
+void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx)
 {
     mm_flags = mm_support();
 #if 0
@@ -1428,6 +1456,27 @@
 #endif
 
     if (mm_flags & MM_MMX) {
+        const int dct_algo = avctx->dct_algo;
+        const int idct_algo= avctx->idct_algo;
+
+        if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX)
+            c->fdct = ff_fdct_mmx;
+
+        if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
+            c->idct_put= ff_simple_idct_put_mmx;
+            c->idct_add= ff_simple_idct_add_mmx;
+            c->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
+        }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
+            if(mm_flags & MM_MMXEXT){
+                c->idct_put= ff_libmpeg2mmx2_idct_put;
+                c->idct_add= ff_libmpeg2mmx2_idct_add;
+            }else{
+                c->idct_put= ff_libmpeg2mmx_idct_put;
+                c->idct_add= ff_libmpeg2mmx_idct_add;
+            }
+            c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+        }
+        
         c->get_pixels = get_pixels_mmx;
         c->diff_pixels = diff_pixels_mmx;
         c->put_pixels_clamped = put_pixels_clamped_mmx;
@@ -1487,23 +1536,26 @@
         if (mm_flags & MM_MMXEXT) {
             c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2;
             c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2;
-            c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
-            c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
 
             c->avg_pixels_tab[0][0] = avg_pixels16_mmx2;
             c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2;
             c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2;
-            c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
 
             c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2;
             c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2;
-            c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
-            c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
 
             c->avg_pixels_tab[1][0] = avg_pixels8_mmx2;
             c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2;
             c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2;
-            c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+
+            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+                c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2;
+                c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2;
+                c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2;
+                c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2;
+                c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2;
+                c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2;
+            }
 
 #if 1
             SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_mmx2)
@@ -1542,23 +1594,26 @@
         } else if (mm_flags & MM_3DNOW) {
             c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow;
             c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow;
-            c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
-            c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
 
             c->avg_pixels_tab[0][0] = avg_pixels16_3dnow;
             c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow;
             c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow;
-            c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
 
             c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow;
             c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow;
-            c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
-            c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
 
             c->avg_pixels_tab[1][0] = avg_pixels8_3dnow;
             c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow;
             c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow;
-            c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
+
+            if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+                c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow;
+                c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow;
+                c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow;
+                c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow;
+                c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow;
+                c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow;
+            }
 
             SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow)
             SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow)
@@ -1594,7 +1649,8 @@
             SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow)
         }
     }
-    dsputil_init_pix_mmx(c, mask);
+        
+    dsputil_init_pix_mmx(c, avctx);
 #if 0
     // for speed testing
     get_pixels = just_return;
@@ -1630,20 +1686,3 @@
     //ff_idct = just_return;
 #endif
 }
-
-/* remove any non bit exact operation (testing purpose). NOTE that
-   this function should be kept as small as possible because it is
-   always difficult to test automatically non bit exact cases. */
-void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask)
-{
-    if (mm_flags & MM_MMX) {
-        /* MMX2 & 3DNOW */
-        c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx;
-        c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx;
-        c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx;
-        c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx;
-        c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx;
-        c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx;
-    }
-    dsputil_set_bit_exact_pix_mmx(c, mask);
-}
diff -r 03df246fb06b -r f59c3f66363b i386/motion_est_mmx.c
--- a/i386/motion_est_mmx.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/i386/motion_est_mmx.c	Mon Mar 03 14:54:00 2003 +0000
@@ -386,7 +386,7 @@
 PIX_SAD(mmx)
 PIX_SAD(mmx2)
 
-void dsputil_init_pix_mmx(DSPContext* c, unsigned mask)
+void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx)
 {
     if (mm_flags & MM_MMX) {
         c->pix_abs16x16     = pix_abs16x16_mmx;
@@ -403,27 +403,18 @@
     }
     if (mm_flags & MM_MMXEXT) {
 	c->pix_abs16x16     = pix_abs16x16_mmx2;
-	c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx2;
-	c->pix_abs16x16_y2  = pix_abs16x16_y2_mmx2;
-	c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
 	c->pix_abs8x8     = pix_abs8x8_mmx2;
-	c->pix_abs8x8_x2  = pix_abs8x8_x2_mmx2;
-	c->pix_abs8x8_y2  = pix_abs8x8_y2_mmx2;
-	c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
 
 	c->sad[0]= sad16x16_mmx2;
 	c->sad[1]= sad8x8_mmx2;
+        
+        if(!(avctx->flags & CODEC_FLAG_BITEXACT)){
+            c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx2;
+            c->pix_abs16x16_y2  = pix_abs16x16_y2_mmx2;
+            c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2;
+            c->pix_abs8x8_x2  = pix_abs8x8_x2_mmx2;
+            c->pix_abs8x8_y2  = pix_abs8x8_y2_mmx2;
+            c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2;
+        }
     }
 }
-
-void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask)
-{
-    if (mm_flags & MM_MMXEXT) {
-	c->pix_abs16x16_x2  = pix_abs16x16_x2_mmx;
-	c->pix_abs16x16_y2  = pix_abs16x16_y2_mmx;
-	c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx;
-	c->pix_abs8x8_x2  = pix_abs8x8_x2_mmx;
-	c->pix_abs8x8_y2  = pix_abs8x8_y2_mmx;
-	c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx;
-    }
-}
diff -r 03df246fb06b -r f59c3f66363b i386/mpegvideo_mmx.c
--- a/i386/mpegvideo_mmx.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/i386/mpegvideo_mmx.c	Mon Mar 03 14:54:00 2003 +0000
@@ -23,7 +23,6 @@
 #include "../dsputil.h"
 #include "../mpegvideo.h"
 #include "../avcodec.h"
-#include "../simple_idct.h"
 
 extern uint8_t zigzag_direct_noperm[64];
 extern uint16_t inv_zigzag_direct16[64];
@@ -499,38 +498,10 @@
 #define RENAME(a) a ## _MMX2
 #include "mpegvideo_mmx_template.c"
 
-/* external functions, from idct_mmx.c */
-void ff_mmx_idct(DCTELEM *block);
-void ff_mmxext_idct(DCTELEM *block);
-
-/* XXX: those functions should be suppressed ASAP when all IDCTs are
-   converted */
-static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    ff_mmx_idct (block);
-    put_pixels_clamped_mmx(block, dest, line_size);
-}
-static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    ff_mmx_idct (block);
-    add_pixels_clamped_mmx(block, dest, line_size);
-}
-static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    ff_mmxext_idct (block);
-    put_pixels_clamped_mmx(block, dest, line_size);
-}
-static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    ff_mmxext_idct (block);
-    add_pixels_clamped_mmx(block, dest, line_size);
-}
-
 void MPV_common_init_mmx(MpegEncContext *s)
 {
     if (mm_flags & MM_MMX) {
         const int dct_algo = s->avctx->dct_algo;
-        const int idct_algo= s->avctx->idct_algo;
         
         s->dct_unquantize_h263 = dct_unquantize_h263_mmx;
         s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx;
@@ -539,28 +510,11 @@
         draw_edges = draw_edges_mmx;
 
         if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){
-            s->fdct = ff_fdct_mmx;
-
             if(mm_flags & MM_MMXEXT){
                 s->dct_quantize= dct_quantize_MMX2;
             } else {
                 s->dct_quantize= dct_quantize_MMX;
             }
         }
-
-        if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){
-            s->idct_put= ff_simple_idct_put_mmx;
-            s->idct_add= ff_simple_idct_add_mmx;
-            s->idct_permutation_type= FF_SIMPLE_IDCT_PERM;
-        }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){
-            if(mm_flags & MM_MMXEXT){
-                s->idct_put= ff_libmpeg2mmx2_idct_put;
-                s->idct_add= ff_libmpeg2mmx2_idct_add;
-            }else{
-                s->idct_put= ff_libmpeg2mmx_idct_put;
-                s->idct_add= ff_libmpeg2mmx_idct_add;
-            }
-            s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
-        }
     }
 }
diff -r 03df246fb06b -r f59c3f66363b i386/mpegvideo_mmx_template.c
--- a/i386/mpegvideo_mmx_template.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/i386/mpegvideo_mmx_template.c	Mon Mar 03 14:54:00 2003 +0000
@@ -191,7 +191,7 @@
     if(s->mb_intra) block[0]= level;
     else            block[0]= temp_block[0];
 
-    if(s->idct_permutation[1]==8){
+    if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){
         if(last_non_zero_p1 <= 1) goto end;
         block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08]; 
         block[0x20] = temp_block[0x10]; 
@@ -235,7 +235,7 @@
         block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36]; 
         block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37]; 
         block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F];
-    }else if(s->idct_permutation[1]==4){
+    }else if(s->dsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){
         if(last_non_zero_p1 <= 1) goto end;
         block[0x04] = temp_block[0x01]; 
         block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; 
diff -r 03df246fb06b -r f59c3f66363b mjpeg.c
--- a/mjpeg.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/mjpeg.c	Mon Mar 03 14:54:00 2003 +0000
@@ -386,7 +386,7 @@
     }
 
     /* comment */
-    if(!ff_bit_exact){
+    if(!(s->flags & CODEC_FLAG_BITEXACT)){
         put_marker(p, COM);
         flush_put_bits(p);
         ptr = pbBufPtr(p);
@@ -703,7 +703,7 @@
 
     s->avctx = avctx;
 
-    /* ugly way to get the idct & scantable */
+    /* ugly way to get the idct & scantable FIXME */
     memset(&s2, 0, sizeof(MpegEncContext));
     s2.flags= avctx->flags;
     s2.avctx= avctx;
@@ -713,7 +713,7 @@
     if (MPV_common_init(&s2) < 0)
        return -1;
     s->scantable= s2.intra_scantable;
-    s->idct_put= s2.idct_put;
+    s->idct_put= s2.dsp.idct_put;
     MPV_common_end(&s2);
 
     s->mpeg_enc_ctx_allocated = 0;
diff -r 03df246fb06b -r f59c3f66363b mpeg12.c
--- a/mpeg12.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/mpeg12.c	Mon Mar 03 14:54:00 2003 +0000
@@ -1653,7 +1653,7 @@
     if (get_bits1(&s->gb)) {
         for(i=0;i<64;i++) {
             v = get_bits(&s->gb, 8);
-            j= s->idct_permutation[ ff_zigzag_direct[i] ];
+            j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
             s->intra_matrix[j] = v;
             s->chroma_intra_matrix[j] = v;
         }
@@ -1661,7 +1661,7 @@
     if (get_bits1(&s->gb)) {
         for(i=0;i<64;i++) {
             v = get_bits(&s->gb, 8);
-            j= s->idct_permutation[ ff_zigzag_direct[i] ];
+            j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
             s->inter_matrix[j] = v;
             s->chroma_inter_matrix[j] = v;
         }
@@ -1669,14 +1669,14 @@
     if (get_bits1(&s->gb)) {
         for(i=0;i<64;i++) {
             v = get_bits(&s->gb, 8);
-            j= s->idct_permutation[ ff_zigzag_direct[i] ];
+            j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
             s->chroma_intra_matrix[j] = v;
         }
     }
     if (get_bits1(&s->gb)) {
         for(i=0;i<64;i++) {
             v = get_bits(&s->gb, 8);
-            j= s->idct_permutation[ ff_zigzag_direct[i] ];
+            j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ];
             s->chroma_inter_matrix[j] = v;
         }
     }
@@ -1985,7 +1985,7 @@
 #endif
     } else {
         for(i=0;i<64;i++) {
-            int j= s->idct_permutation[i];
+            int j= s->dsp.idct_permutation[i];
             v = ff_mpeg1_default_intra_matrix[i];
             s->intra_matrix[j] = v;
             s->chroma_intra_matrix[j] = v;
@@ -2006,7 +2006,7 @@
 #endif
     } else {
         for(i=0;i<64;i++) {
-            int j= s->idct_permutation[i];
+            int j= s->dsp.idct_permutation[i];
             v = ff_mpeg1_default_non_intra_matrix[i];
             s->inter_matrix[j] = v;
             s->chroma_inter_matrix[j] = v;
diff -r 03df246fb06b -r f59c3f66363b mpegvideo.c
--- a/mpegvideo.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/mpegvideo.c	Mon Mar 03 14:54:00 2003 +0000
@@ -24,7 +24,6 @@
 #include "avcodec.h"
 #include "dsputil.h"
 #include "mpegvideo.h"
-#include "simple_idct.h"
 
 #ifdef USE_FASTMEMCPY
 #include "fastmemcpy.h"
@@ -72,18 +71,6 @@
     4520 ,  6270,  5906,  5315,  4520,  3552,  2446,  1247
 };
 
-/* Input permutation for the simple_idct_mmx */
-static const uint8_t simple_mmx_permutation[64]={
-	0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, 
-	0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, 
-	0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, 
-	0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, 
-	0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, 
-	0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, 
-	0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, 
-	0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F,
-};
-
 static const uint8_t h263_chroma_roundtab[16] = {
 //  0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15
     0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2,
@@ -102,9 +89,9 @@
 
     for(qscale=qmin; qscale<=qmax; qscale++){
         int i;
-        if (s->fdct == ff_jpeg_fdct_islow) {
+        if (s->dsp.fdct == ff_jpeg_fdct_islow) {
             for(i=0;i<64;i++) {
-                const int j= s->idct_permutation[i];
+                const int j= s->dsp.idct_permutation[i];
                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
@@ -113,9 +100,9 @@
                 qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / 
                                 (qscale * quant_matrix[j]));
             }
-        } else if (s->fdct == fdct_ifast) {
+        } else if (s->dsp.fdct == fdct_ifast) {
             for(i=0;i<64;i++) {
-                const int j= s->idct_permutation[i];
+                const int j= s->dsp.idct_permutation[i];
                 /* 16 <= qscale * quant_matrix[i] <= 7905 */
                 /* 19952         <= aanscales[i] * qscale * quant_matrix[i]           <= 249205026 */
                 /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */
@@ -126,7 +113,7 @@
             }
         } else {
             for(i=0;i<64;i++) {
-                const int j= s->idct_permutation[i];
+                const int j= s->dsp.idct_permutation[i];
                 /* We can safely suppose that 16 <= quant_matrix[i] <= 255
                    So 16           <= qscale * quant_matrix[i]             <= 7905
                    so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905
@@ -163,7 +150,7 @@
     for(i=0; i<64; i++){
         int j;
         j = src_scantable[i];
-        st->permutated[i] = s->idct_permutation[j];
+        st->permutated[i] = s->dsp.idct_permutation[j];
 #ifdef ARCH_POWERPC
         st->inverse[j] = i;
 #endif
@@ -178,51 +165,16 @@
     }
 }
 
-/* XXX: those functions should be suppressed ASAP when all IDCTs are
- converted */
-// *FIXME* this is ugly hack using local static
-static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
-static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size);
-static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    j_rev_dct (block);
-    ff_put_pixels_clamped(block, dest, line_size);
-}
-static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block)
-{
-    j_rev_dct (block);
-    ff_add_pixels_clamped(block, dest, line_size);
-}
-
 /* init common dct for both encoder and decoder */
 int DCT_common_init(MpegEncContext *s)
 {
-    int i;
-
-    ff_put_pixels_clamped = s->dsp.put_pixels_clamped;
-    ff_add_pixels_clamped = s->dsp.add_pixels_clamped;
-
     s->dct_unquantize_h263 = dct_unquantize_h263_c;
     s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c;
     s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c;
+
 #ifdef CONFIG_ENCODERS
     s->dct_quantize= dct_quantize_c;
-
-    if(s->avctx->dct_algo==FF_DCT_FASTINT)
-        s->fdct = fdct_ifast;
-    else
-        s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default
-#endif //CONFIG_ENCODERS
-
-    if(s->avctx->idct_algo==FF_IDCT_INT){
-        s->idct_put= ff_jref_idct_put;
-        s->idct_add= ff_jref_idct_add;
-        s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
-    }else{ //accurate/default
-        s->idct_put= simple_idct_put;
-        s->idct_add= simple_idct_add;
-        s->idct_permutation_type= FF_NO_IDCT_PERM;
-    }
+#endif
         
 #ifdef HAVE_MMX
     MPV_common_init_mmx(s);
@@ -252,29 +204,6 @@
 
 #endif //CONFIG_ENCODERS
 
-    switch(s->idct_permutation_type){
-    case FF_NO_IDCT_PERM:
-        for(i=0; i<64; i++)
-            s->idct_permutation[i]= i;
-        break;
-    case FF_LIBMPEG2_IDCT_PERM:
-        for(i=0; i<64; i++)
-            s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2);
-        break;
-    case FF_SIMPLE_IDCT_PERM:
-        for(i=0; i<64; i++)
-            s->idct_permutation[i]= simple_mmx_permutation[i];
-        break;
-    case FF_TRANSPOSE_IDCT_PERM:
-        for(i=0; i<64; i++)
-            s->idct_permutation[i]= ((i&7)<<3) | (i>>3);
-        break;
-    default:
-        fprintf(stderr, "Internal error, IDCT permutation not set\n");
-        return -1;
-    }
-
-
     /* load & permutate scantables
        note: only wmv uses differnt ones 
     */
@@ -384,7 +313,7 @@
 {
     int y_size, c_size, yc_size, i;
 
-    dsputil_init(&s->dsp, s->avctx->dsp_mask);
+    dsputil_init(&s->dsp, s->avctx);
     DCT_common_init(s);
 
     s->flags= s->avctx->flags;
@@ -768,7 +697,7 @@
 
     /* init default q matrix */
     for(i=0;i<64;i++) {
-        int j= s->idct_permutation[i];
+        int j= s->dsp.idct_permutation[i];
 #ifdef CONFIG_RISKY
         if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){
             s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i];
@@ -1938,7 +1867,7 @@
                            DCTELEM *block, int i, uint8_t *dest, int line_size)
 {
     s->dct_unquantize(s, block, i, s->qscale);
-    s->idct_put (dest, line_size, block);
+    s->dsp.idct_put (dest, line_size, block);
 }
 
 /* add block[] to dest[] */
@@ -1946,7 +1875,7 @@
                            DCTELEM *block, int i, uint8_t *dest, int line_size)
 {
     if (s->block_last_index[i] >= 0) {
-        s->idct_add (dest, line_size, block);
+        s->dsp.idct_add (dest, line_size, block);
     }
 }
 
@@ -1956,7 +1885,7 @@
     if (s->block_last_index[i] >= 0) {
         s->dct_unquantize(s, block, i, s->qscale);
 
-        s->idct_add (dest, line_size, block);
+        s->dsp.idct_add (dest, line_size, block);
     }
 }
 
@@ -2193,14 +2122,14 @@
                     put_dct(s, block[5], 5, dest_cr, s->uvlinesize);
                 }
             }else{
-                s->idct_put(dest_y                 , dct_linesize, block[0]);
-                s->idct_put(dest_y              + 8, dct_linesize, block[1]);
-                s->idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
-                s->idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
+                s->dsp.idct_put(dest_y                 , dct_linesize, block[0]);
+                s->dsp.idct_put(dest_y              + 8, dct_linesize, block[1]);
+                s->dsp.idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
+                s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
 
                 if(!(s->flags&CODEC_FLAG_GRAY)){
-                    s->idct_put(dest_cb, s->uvlinesize, block[4]);
-                    s->idct_put(dest_cr, s->uvlinesize, block[5]);
+                    s->dsp.idct_put(dest_cb, s->uvlinesize, block[4]);
+                    s->dsp.idct_put(dest_cr, s->uvlinesize, block[5]);
                 }
             }
         }
@@ -3040,7 +2969,7 @@
         /* for mjpeg, we do include qscale in the matrix */
         s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0];
         for(i=1;i<64;i++){
-            int j= s->idct_permutation[i];
+            int j= s->dsp.idct_permutation[i];
 
             s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3);
         }
@@ -3549,7 +3478,7 @@
     int score_limit=0;
     int left_limit= 0;
         
-    s->fdct (block);
+    s->dsp.fdct (block);
 
     qmul= qscale*16;
     qadd= ((qscale-1)|1)*8;
@@ -3648,7 +3577,7 @@
                     unquant_coeff= level*qmul - qadd;
                 }
             }else{ //MPEG1
-                j= s->idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
+                j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize
                 if(s->mb_intra){
                     if (level < 0) {
                         unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3;
@@ -3760,11 +3689,11 @@
     i= last_i;
     assert(last_level);
 //FIXME use permutated scantable
-    block[ s->idct_permutation[ scantable[last_non_zero] ] ]= last_level;
+    block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level;
     i -= last_run + 1;
     
     for(;i>0 ; i -= run_tab[i] + 1){
-        const int j= s->idct_permutation[ scantable[i - 1 + start_i] ];
+        const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ];
     
         block[j]= level_tab[i];
         assert(block[j]);
@@ -3784,7 +3713,7 @@
     int max=0;
     unsigned int threshold1, threshold2;
 
-    s->fdct (block);
+    s->dsp.fdct (block);
 
     if (s->mb_intra) {
         if (!s->h263_aic) {
@@ -3836,8 +3765,8 @@
     *overflow= s->max_qcoeff < max; //overflow might have happend
     
     /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */
-    if (s->idct_permutation_type != FF_NO_IDCT_PERM)
-	ff_block_permute(block, s->idct_permutation, scantable, last_non_zero);
+    if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)
+	ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero);
 
     return last_non_zero;
 }
diff -r 03df246fb06b -r f59c3f66363b mpegvideo.h
--- a/mpegvideo.h	Sat Mar 01 00:16:00 2003 +0000
+++ b/mpegvideo.h	Mon Mar 03 14:54:00 2003 +0000
@@ -346,12 +346,6 @@
     ScanTable intra_h_scantable;
     ScanTable intra_v_scantable;
     ScanTable inter_scantable; // if inter == intra then intra should be used to reduce tha cache usage
-    uint8_t idct_permutation[64];
-    int idct_permutation_type;
-#define FF_NO_IDCT_PERM 1
-#define FF_LIBMPEG2_IDCT_PERM 2
-#define FF_SIMPLE_IDCT_PERM 3
-#define FF_TRANSPOSE_IDCT_PERM 4
 
     void *opaque; /* private data for the user */
 
@@ -562,10 +556,6 @@
                            DCTELEM *block/*align 16*/, int n, int qscale);
     int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
     int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow);
-    void (*fdct)(DCTELEM *block/* align 16*/);
-    void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
-    void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/);
-    //FIXME move above funcs into dspContext perhaps
 } MpegEncContext;
 
 
@@ -610,8 +600,6 @@
 
 extern enum PixelFormat ff_yuv420p_list[2];
 
-extern int ff_bit_exact;
-
 static inline void ff_init_block_index(MpegEncContext *s){
     s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2;
     s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1)     + s->mb_x*2;
diff -r 03df246fb06b -r f59c3f66363b ppc/dsputil_ppc.c
--- a/ppc/dsputil_ppc.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/ppc/dsputil_ppc.c	Mon Mar 03 14:54:00 2003 +0000
@@ -25,6 +25,9 @@
 #include "dsputil_altivec.h"
 #endif
 
+extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
+extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
+
 int mm_flags = 0;
 
 int mm_support(void)
@@ -169,7 +172,7 @@
   return count;
 }
 
-void dsputil_init_ppc(DSPContext* c, unsigned mask)
+void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
 {
     // Common optimisations whether Altivec or not
 
@@ -215,6 +218,18 @@
         c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
         
 	c->gmc1 = gmc1_altivec;
+
+        if ((avctx->idct_algo == FF_IDCT_AUTO) ||
+                (avctx->idct_algo == FF_IDCT_ALTIVEC))
+        {
+            c->idct_put = idct_put_altivec;
+            c->idct_add = idct_add_altivec;
+#ifndef ALTIVEC_USE_REFERENCE_C_CODE
+            c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
+#else /* ALTIVEC_USE_REFERENCE_C_CODE */
+            c->idct_permutation_type = FF_NO_IDCT_PERM;
+#endif /* ALTIVEC_USE_REFERENCE_C_CODE */
+        }
         
 #ifdef POWERPC_TBL_PERFORMANCE_REPORT
         {
diff -r 03df246fb06b -r f59c3f66363b ppc/mpegvideo_altivec.c
--- a/ppc/mpegvideo_altivec.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/ppc/mpegvideo_altivec.c	Mon Mar 03 14:54:00 2003 +0000
@@ -468,7 +468,7 @@
         // and handle it using the vector unit if we can.  This is the permute used
         // by the altivec idct, so it is common when using the altivec dct.
 
-        if ((lastNonZero > 0) && (s->idct_permutation_type == FF_TRANSPOSE_IDCT_PERM))
+        if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM))
         {
             TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7);
         }
@@ -501,10 +501,10 @@
     // We handled the tranpose permutation above and we don't
     // need to permute the "no" permutation case.
     if ((lastNonZero > 0) &&
-        (s->idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) &&
-        (s->idct_permutation_type != FF_NO_IDCT_PERM))
+        (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) &&
+        (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM))
     {
-        ff_block_permute(data, s->idct_permutation,
+        ff_block_permute(data, s->idsp.dct_permutation,
                 s->intra_scantable.scantable, lastNonZero);
     }
 
diff -r 03df246fb06b -r f59c3f66363b ps2/dsputil_mmi.c
--- a/ps2/dsputil_mmi.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/ps2/dsputil_mmi.c	Mon Mar 03 14:54:00 2003 +0000
@@ -22,6 +22,9 @@
 #include "../dsputil.h"
 #include "mmi.h"
 
+void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
+void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
+
 
 static void clear_blocks_mmi(DCTELEM * blocks)
 {
@@ -103,8 +106,10 @@
 }
 
 
-void dsputil_init_mmi(DSPContext* c, unsigned mask)
+void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx)
 {
+    const int idct_algo= avctx->idct_algo;
+
     c->clear_blocks = clear_blocks_mmi;
 
     c->put_pixels_tab[1][0] = put_pixels8_mmi;
@@ -114,5 +119,11 @@
     c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi;
 
     c->get_pixels = get_pixels_mmi;
+       
+    if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){
+        c->idct_put= ff_mmi_idct_put;
+        c->idct_add= ff_mmi_idct_add;
+        c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
+    }
 }
 
diff -r 03df246fb06b -r f59c3f66363b ps2/mpegvideo_mmi.c
--- a/ps2/mpegvideo_mmi.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/ps2/mpegvideo_mmi.c	Mon Mar 03 14:54:00 2003 +0000
@@ -22,10 +22,6 @@
 #include "../mpegvideo.h"
 #include "../avcodec.h"
 
-void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block);
-void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block);
-
-
 static void dct_unquantize_h263_mmi(MpegEncContext *s, 
                                   DCTELEM *block, int n, int qscale)
 {
@@ -84,14 +80,6 @@
 
 void MPV_common_init_mmi(MpegEncContext *s)
 {
-    int i;
-    const int idct_algo= s->avctx->idct_algo;
-       
-    if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){
-        s->idct_put= ff_mmi_idct_put;
-        s->idct_add= ff_mmi_idct_add;
-        s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;
-    }
     s->dct_unquantize_h263 = dct_unquantize_h263_mmi;
 }
 
diff -r 03df246fb06b -r f59c3f66363b wmv2.c
--- a/wmv2.c	Sat Mar 01 00:16:00 2003 +0000
+++ b/wmv2.c	Mon Mar 03 14:54:00 2003 +0000
@@ -328,7 +328,7 @@
     code               = get_bits(&gb, 3);
     
     if(code==0) return -1;
-            
+
     s->slice_height = s->mb_height / code;
 
     if(s->avctx->debug&FF_DEBUG_PICT_INFO){
@@ -582,7 +582,7 @@
     switch(w->abt_type_table[n]){
     case 0:
         if (s->block_last_index[n] >= 0) {
-            s->idct_add (dst, stride, block1);
+            s->dsp.idct_add (dst, stride, block1);
         }
         break;
     case 1: