# HG changeset patch # User michaelni # Date 1046703240 0 # Node ID f59c3f66363be9c76bfa856d37a4043c2a7c6804 # Parent 03df246fb06b214d899152bf566f557412862bf2 MpegEncContext.(i)dct_* -> DspContext.(i)dct_* bitexact cleanup diff -r 03df246fb06b -r f59c3f66363b alpha/dsputil_alpha.c --- a/alpha/dsputil_alpha.c Sat Mar 01 00:16:00 2003 +0000 +++ b/alpha/dsputil_alpha.c Mon Mar 03 14:54:00 2003 +0000 @@ -20,6 +20,9 @@ #include "asm.h" #include "../dsputil.h" +extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block); +extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block); + void put_pixels_axp_asm(uint8_t *block, const uint8_t *pixels, int line_size, int h); void put_pixels_clamped_mvi_asm(const DCTELEM *block, uint8_t *pixels, @@ -295,7 +298,7 @@ return pix_abs8x8_mvi(a, b, stride); } -void dsputil_init_alpha(DSPContext* c, unsigned mask) +void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx) { c->put_pixels_tab[0][0] = put_pixels16_axp_asm; c->put_pixels_tab[0][1] = put_pixels16_x2_axp; @@ -357,4 +360,7 @@ put_pixels_clamped_axp_p = c->put_pixels_clamped; add_pixels_clamped_axp_p = c->add_pixels_clamped; + + c->idct_put = simple_idct_put_axp; + c->idct_add = simple_idct_add_axp; } diff -r 03df246fb06b -r f59c3f66363b alpha/mpegvideo_alpha.c --- a/alpha/mpegvideo_alpha.c Sat Mar 01 00:16:00 2003 +0000 +++ b/alpha/mpegvideo_alpha.c Mon Mar 03 14:54:00 2003 +0000 @@ -21,9 +21,6 @@ #include "../dsputil.h" #include "../mpegvideo.h" -extern void simple_idct_put_axp(uint8_t *dest, int line_size, DCTELEM *block); -extern void simple_idct_add_axp(uint8_t *dest, int line_size, DCTELEM *block); - static void dct_unquantize_h263_axp(MpegEncContext *s, DCTELEM *block, int n, int qscale) { @@ -97,6 +94,4 @@ void MPV_common_init_axp(MpegEncContext *s) { s->dct_unquantize_h263 = dct_unquantize_h263_axp; - s->idct_put = simple_idct_put_axp; - s->idct_add = simple_idct_add_axp; } diff -r 03df246fb06b -r f59c3f66363b armv4l/dsputil_arm.c --- a/armv4l/dsputil_arm.c Sat Mar 01 00:16:00 2003 +0000 +++ b/armv4l/dsputil_arm.c Mon Mar 03 14:54:00 2003 +0000 @@ -21,7 +21,33 @@ extern void j_rev_dct_ARM(DCTELEM *data); -void dsputil_init_armv4l(DSPContext* c, unsigned mask) +/* XXX: local hack */ +static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); +static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); + +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +static void arm_idct_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct_ARM (block); + ff_put_pixels_clamped(block, dest, line_size); +} +static void arm_idct_add(uint8_t *dest, int line_size, DCTELEM *block) { -// ff_idct = j_rev_dct_ARM; + j_rev_dct_ARM (block); + ff_add_pixels_clamped(block, dest, line_size); } + +void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx) +{ + const int idct_algo= avctx->idct_algo; + + ff_put_pixels_clamped = c->put_pixels_clamped; + ff_add_pixels_clamped = c->add_pixels_clamped; + + if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_ARM){ + c->idct_put= arm_idct_put; + c->idct_add= arm_idct_add; + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */ + } +} diff -r 03df246fb06b -r f59c3f66363b armv4l/mpegvideo_arm.c --- a/armv4l/mpegvideo_arm.c Sat Mar 01 00:16:00 2003 +0000 +++ b/armv4l/mpegvideo_arm.c Mon Mar 03 14:54:00 2003 +0000 @@ -21,35 +21,6 @@ #include "../mpegvideo.h" #include "../avcodec.h" -extern void j_rev_dct_ARM(DCTELEM *data); -/* XXX: local hack */ -static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); -static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); - -/* XXX: those functions should be suppressed ASAP when all IDCTs are - converted */ -static void arm_idct_put(uint8_t *dest, int line_size, DCTELEM *block) -{ - j_rev_dct_ARM (block); - ff_put_pixels_clamped(block, dest, line_size); -} -static void arm_idct_add(uint8_t *dest, int line_size, DCTELEM *block) -{ - j_rev_dct_ARM (block); - ff_add_pixels_clamped(block, dest, line_size); -} - void MPV_common_init_armv4l(MpegEncContext *s) { - int i; - const int idct_algo= s->avctx->idct_algo; - - ff_put_pixels_clamped = s->dsp.put_pixels_clamped; - ff_add_pixels_clamped = s->dsp.add_pixels_clamped; - - if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_ARM){ - s->idct_put= arm_idct_put; - s->idct_add= arm_idct_add; - s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM;/* FF_NO_IDCT_PERM */ - } } diff -r 03df246fb06b -r f59c3f66363b avcodec.h --- a/avcodec.h Sat Mar 01 00:16:00 2003 +0000 +++ b/avcodec.h Mon Mar 03 14:54:00 2003 +0000 @@ -16,8 +16,8 @@ #define LIBAVCODEC_VERSION_INT 0x000406 #define LIBAVCODEC_VERSION "0.4.6" -#define LIBAVCODEC_BUILD 4659 -#define LIBAVCODEC_BUILD_STR "4659" +#define LIBAVCODEC_BUILD 4660 +#define LIBAVCODEC_BUILD_STR "4660" enum CodecID { CODEC_ID_NONE, @@ -159,6 +159,7 @@ #define CODEC_FLAG_ALT_SCAN 0x00100000 /* use alternate scan */ #define CODEC_FLAG_TRELLIS_QUANT 0x00200000 /* use trellis quantization */ #define CODEC_FLAG_GLOBAL_HEADER 0x00400000 /* place global headers in extradata instead of every keyframe */ +#define CODEC_FLAG_BITEXACT 0x00800000 /* use only bitexact stuff (except (i)dct) */ /* codec capabilities */ @@ -1167,8 +1168,6 @@ unsigned avcodec_build(void); void avcodec_init(void); -void avcodec_set_bit_exact(void); - void register_avcodec(AVCodec *format); AVCodec *avcodec_find_encoder(enum CodecID id); AVCodec *avcodec_find_encoder_by_name(const char *name); diff -r 03df246fb06b -r f59c3f66363b dsputil.c --- a/dsputil.c Sat Mar 01 00:16:00 2003 +0000 +++ b/dsputil.c Mon Mar 03 14:54:00 2003 +0000 @@ -21,8 +21,8 @@ #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" +#include "simple_idct.h" -int ff_bit_exact=0; uint8_t cropTbl[256 + 2 * MAX_NEG_CROP]; uint32_t squareTbl[512]; @@ -99,6 +99,18 @@ 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, }; +/* Input permutation for the simple_idct_mmx */ +static const uint8_t simple_mmx_permutation[64]={ + 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, + 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, + 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, + 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, + 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, + 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, + 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, + 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, +}; + static int pix_sum_c(uint8_t * pix, int line_size) { int s, i, j; @@ -1787,7 +1799,7 @@ int sum=0, i; s->dsp.diff_pixels(temp, src1, src2, stride); - s->fdct(temp); + s->dsp.fdct(temp); for(i=0; i<64; i++) sum+= ABS(temp[i]); @@ -1887,7 +1899,7 @@ s->dct_unquantize(s, temp, 0, s->qscale); } - s->idct_add(bak, stride, temp); + s->dsp.idct_add(bak, stride, temp); distoration= s->dsp.sse[1](NULL, bak, src1, stride); @@ -1959,7 +1971,20 @@ WARPER88_1616(rd8x8_c, rd16x16_c) WARPER88_1616(bit8x8_c, bit16x16_c) -void dsputil_init(DSPContext* c, unsigned mask) +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct (block); + put_pixels_clamped_c(block, dest, line_size); +} +static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + j_rev_dct (block); + add_pixels_clamped_c(block, dest, line_size); +} + +void dsputil_init(DSPContext* c, AVCodecContext *avctx) { static int init_done = 0; int i; @@ -1980,6 +2005,23 @@ init_done = 1; } +#ifdef CONFIG_ENCODERS + if(avctx->dct_algo==FF_DCT_FASTINT) + c->fdct = fdct_ifast; + else + c->fdct = ff_jpeg_fdct_islow; //slow/accurate/default +#endif //CONFIG_ENCODERS + + if(avctx->idct_algo==FF_IDCT_INT){ + c->idct_put= ff_jref_idct_put; + c->idct_add= ff_jref_idct_add; + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + }else{ //accurate/default + c->idct_put= simple_idct_put; + c->idct_add= simple_idct_add; + c->idct_permutation_type= FF_NO_IDCT_PERM; + } + c->get_pixels = get_pixels_c; c->diff_pixels = diff_pixels_c; c->put_pixels_clamped = put_pixels_clamped_c; @@ -2082,37 +2124,43 @@ c->diff_bytes= diff_bytes_c; #ifdef HAVE_MMX - dsputil_init_mmx(c, mask); - if (ff_bit_exact) - { - /* FIXME - AVCodec context should have flag for bitexact match */ - /* fprintf(stderr, "\n\n\nff_bit_exact %d\n\n\n\n", ff_bit_exact); */ - dsputil_set_bit_exact_mmx(c, mask); - } + dsputil_init_mmx(c, avctx); #endif #ifdef ARCH_ARMV4L - dsputil_init_armv4l(c, mask); + dsputil_init_armv4l(c, avctx); #endif #ifdef HAVE_MLIB - dsputil_init_mlib(c, mask); + dsputil_init_mlib(c, avctx); #endif #ifdef ARCH_ALPHA - dsputil_init_alpha(c, mask); + dsputil_init_alpha(c, avctx); #endif #ifdef ARCH_POWERPC - dsputil_init_ppc(c, mask); + dsputil_init_ppc(c, avctx); #endif #ifdef HAVE_MMI - dsputil_init_mmi(c, mask); + dsputil_init_mmi(c, avctx); #endif + + switch(c->idct_permutation_type){ + case FF_NO_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= i; + break; + case FF_LIBMPEG2_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); + break; + case FF_SIMPLE_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= simple_mmx_permutation[i]; + break; + case FF_TRANSPOSE_IDCT_PERM: + for(i=0; i<64; i++) + c->idct_permutation[i]= ((i&7)<<3) | (i>>3); + break; + default: + fprintf(stderr, "Internal error, IDCT permutation not set\n"); + } } -/* remove any non bit exact operation (testing purpose) */ -void avcodec_set_bit_exact(void) -{ - ff_bit_exact=1; -#ifdef HAVE_MMX -// FIXME - better set_bit_exact -// dsputil_set_bit_exact_mmx(); -#endif -} diff -r 03df246fb06b -r f59c3f66363b dsputil.h --- a/dsputil.h Sat Mar 01 00:16:00 2003 +0000 +++ b/dsputil.h Mon Mar 03 14:54:00 2003 +0000 @@ -149,9 +149,21 @@ /* huffyuv specific */ void (*add_bytes)(uint8_t *dst/*align 16*/, uint8_t *src/*align 16*/, int w); void (*diff_bytes)(uint8_t *dst/*align 16*/, uint8_t *src1/*align 16*/, uint8_t *src2/*align 1*/,int w); + + /* (I)DCT */ + void (*fdct)(DCTELEM *block/* align 16*/); + void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); + void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); + uint8_t idct_permutation[64]; + int idct_permutation_type; +#define FF_NO_IDCT_PERM 1 +#define FF_LIBMPEG2_IDCT_PERM 2 +#define FF_SIMPLE_IDCT_PERM 3 +#define FF_TRANSPOSE_IDCT_PERM 4 + } DSPContext; -void dsputil_init(DSPContext* p, unsigned mask); +void dsputil_init(DSPContext* p, AVCodecContext *avctx); /** * permute block according to permuatation. @@ -194,11 +206,8 @@ #define __align8 __attribute__ ((aligned (8))) -void dsputil_init_mmx(DSPContext* c, unsigned mask); -void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask); - -void dsputil_init_pix_mmx(DSPContext* c, unsigned mask); -void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask); +void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx); +void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx); #elif defined(ARCH_ARMV4L) @@ -206,20 +215,20 @@ line ptimizations */ #define __align8 __attribute__ ((aligned (4))) -void dsputil_init_armv4l(DSPContext* c, unsigned mask); +void dsputil_init_armv4l(DSPContext* c, AVCodecContext *avctx); #elif defined(HAVE_MLIB) /* SPARC/VIS IDCT needs 8-byte aligned DCT blocks */ #define __align8 __attribute__ ((aligned (8))) -void dsputil_init_mlib(DSPContext* c, unsigned mask); +void dsputil_init_mlib(DSPContext* c, AVCodecContext *avctx); #elif defined(ARCH_ALPHA) #define __align8 __attribute__ ((aligned (8))) -void dsputil_init_alpha(DSPContext* c, unsigned mask); +void dsputil_init_alpha(DSPContext* c, AVCodecContext *avctx); #elif defined(ARCH_POWERPC) @@ -233,13 +242,13 @@ #define __align8 __attribute__ ((aligned (16))) -void dsputil_init_ppc(DSPContext* c, unsigned mask); +void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx); #elif defined(HAVE_MMI) #define __align8 __attribute__ ((aligned (16))) -void dsputil_init_mmi(DSPContext* c, unsigned mask); +void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx); #else diff -r 03df246fb06b -r f59c3f66363b dv.c --- a/dv.c Sat Mar 01 00:16:00 2003 +0000 +++ b/dv.c Mon Mar 03 14:54:00 2003 +0000 @@ -115,12 +115,12 @@ /* XXX: fix it */ memset(&s2, 0, sizeof(MpegEncContext)); s2.avctx = avctx; - dsputil_init(&s2.dsp, avctx->dsp_mask); + dsputil_init(&s2.dsp, avctx); if (DCT_common_init(&s2) < 0) return -1; - s->idct_put[0] = s2.idct_put; - memcpy(s->idct_permutation, s2.idct_permutation, 64); + s->idct_put[0] = s2.dsp.idct_put; + memcpy(s->idct_permutation, s2.dsp.idct_permutation, 64); memcpy(s->dv_zigzag[0], s2.intra_scantable.permutated, 64); /* XXX: use MMX also for idct248 */ diff -r 03df246fb06b -r f59c3f66363b h263.c --- a/h263.c Sat Mar 01 00:16:00 2003 +0000 +++ b/h263.c Mon Mar 03 14:54:00 2003 +0000 @@ -297,19 +297,19 @@ if(s->mb_y==0 || s->qscale == qscale_table[xy] || n==2 || n==3){ /* same qscale */ for(i=1; i<8; i++){ - const int level= block[n][s->idct_permutation[i ]]; + const int level= block[n][s->dsp.idct_permutation[i ]]; score0+= ABS(level); score1+= ABS(level - ac_val[i+8]); - ac_val1[i ]= block[n][s->idct_permutation[i<<3]]; + ac_val1[i ]= block[n][s->dsp.idct_permutation[i<<3]]; ac_val1[i+8]= level; } }else{ /* different qscale, we must rescale */ for(i=1; i<8; i++){ - const int level= block[n][s->idct_permutation[i ]]; + const int level= block[n][s->dsp.idct_permutation[i ]]; score0+= ABS(level); score1+= ABS(level - ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale)); - ac_val1[i ]= block[n][s->idct_permutation[i<<3]]; + ac_val1[i ]= block[n][s->dsp.idct_permutation[i<<3]]; ac_val1[i+8]= level; } } @@ -320,20 +320,20 @@ if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){ /* same qscale */ for(i=1; i<8; i++){ - const int level= block[n][s->idct_permutation[i<<3]]; + const int level= block[n][s->dsp.idct_permutation[i<<3]]; score0+= ABS(level); score1+= ABS(level - ac_val[i]); ac_val1[i ]= level; - ac_val1[i+8]= block[n][s->idct_permutation[i ]]; + ac_val1[i+8]= block[n][s->dsp.idct_permutation[i ]]; } }else{ /* different qscale, we must rescale */ for(i=1; i<8; i++){ - const int level= block[n][s->idct_permutation[i<<3]]; + const int level= block[n][s->dsp.idct_permutation[i<<3]]; score0+= ABS(level); score1+= ABS(level - ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale)); ac_val1[i ]= level; - ac_val1[i+8]= block[n][s->idct_permutation[i ]]; + ac_val1[i+8]= block[n][s->dsp.idct_permutation[i ]]; } } } @@ -831,10 +831,10 @@ if(dir[i]){ for(j=1; j<8; j++) - block[i][s->idct_permutation[j ]]= ac_val[j+8]; + block[i][s->dsp.idct_permutation[j ]]= ac_val[j+8]; }else{ for(j=1; j<8; j++) - block[i][s->idct_permutation[j<<3]]= ac_val[j ]; + block[i][s->dsp.idct_permutation[j<<3]]= ac_val[j ]; } s->block_last_index[i]= zigzag_last_index[i]; } @@ -1059,7 +1059,7 @@ if (a != 1024) { ac_val -= 16; for(i=1;i<8;i++) { - block[s->idct_permutation[i<<3]] += ac_val[i]; + block[s->dsp.idct_permutation[i<<3]] += ac_val[i]; } pred_dc = a; } @@ -1068,7 +1068,7 @@ if (c != 1024) { ac_val -= 16 * wrap; for(i=1;i<8;i++) { - block[s->idct_permutation[i ]] += ac_val[i + 8]; + block[s->dsp.idct_permutation[i ]] += ac_val[i + 8]; } pred_dc = c; } @@ -1096,10 +1096,10 @@ /* left copy */ for(i=1;i<8;i++) - ac_val1[i ] = block[s->idct_permutation[i<<3]]; + ac_val1[i ] = block[s->dsp.idct_permutation[i<<3]]; /* top copy */ for(i=1;i<8;i++) - ac_val1[8 + i] = block[s->idct_permutation[i ]]; + ac_val1[8 + i] = block[s->dsp.idct_permutation[i ]]; } int16_t *h263_pred_motion(MpegEncContext * s, int block, @@ -1752,7 +1752,7 @@ ff_mpeg4_stuffing(&s->pb); /* user data */ - if(!ff_bit_exact){ + if(!(s->flags & CODEC_FLAG_BITEXACT)){ put_bits(&s->pb, 16, 0); put_bits(&s->pb, 16, 0x1B2); /* user_data */ sprintf(buf, "FFmpeg%sb%s", FFMPEG_VERSION, LIBAVCODEC_BUILD_STR); @@ -1926,12 +1926,12 @@ if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){ /* same qscale */ for(i=1;i<8;i++) { - block[s->idct_permutation[i<<3]] += ac_val[i]; + block[s->dsp.idct_permutation[i<<3]] += ac_val[i]; } }else{ /* different qscale, we must rescale */ for(i=1;i<8;i++) { - block[s->idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale); + block[s->dsp.idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale); } } } else { @@ -1942,23 +1942,23 @@ if(s->mb_y==0 || s->qscale == qscale_table[xy] || n==2 || n==3){ /* same qscale */ for(i=1;i<8;i++) { - block[s->idct_permutation[i]] += ac_val[i + 8]; + block[s->dsp.idct_permutation[i]] += ac_val[i + 8]; } }else{ /* different qscale, we must rescale */ for(i=1;i<8;i++) { - block[s->idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale); + block[s->dsp.idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale); } } } } /* left copy */ for(i=1;i<8;i++) - ac_val1[i ] = block[s->idct_permutation[i<<3]]; + ac_val1[i ] = block[s->dsp.idct_permutation[i<<3]]; /* top copy */ for(i=1;i<8;i++) - ac_val1[8 + i] = block[s->idct_permutation[i ]]; + ac_val1[8 + i] = block[s->dsp.idct_permutation[i ]]; } @@ -1981,12 +1981,12 @@ if(s->mb_x==0 || s->qscale == qscale_table[xy] || n==1 || n==3){ /* same qscale */ for(i=1;i<8;i++) { - block[s->idct_permutation[i<<3]] -= ac_val[i]; + block[s->dsp.idct_permutation[i<<3]] -= ac_val[i]; } }else{ /* different qscale, we must rescale */ for(i=1;i<8;i++) { - block[s->idct_permutation[i<<3]] -= ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale); + block[s->dsp.idct_permutation[i<<3]] -= ROUNDED_DIV(ac_val[i]*qscale_table[xy], s->qscale); } } } else { @@ -1996,12 +1996,12 @@ if(s->mb_y==0 || s->qscale == qscale_table[xy] || n==2 || n==3){ /* same qscale */ for(i=1;i<8;i++) { - block[s->idct_permutation[i]] -= ac_val[i + 8]; + block[s->dsp.idct_permutation[i]] -= ac_val[i + 8]; } }else{ /* different qscale, we must rescale */ for(i=1;i<8;i++) { - block[s->idct_permutation[i]] -= ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale); + block[s->dsp.idct_permutation[i]] -= ROUNDED_DIV(ac_val[i + 8]*qscale_table[xy], s->qscale); } } } @@ -4406,7 +4406,7 @@ /* load default matrixes */ for(i=0; i<64; i++){ - int j= s->idct_permutation[i]; + int j= s->dsp.idct_permutation[i]; v= ff_mpeg4_default_intra_matrix[i]; s->intra_matrix[j]= v; s->chroma_intra_matrix[j]= v; @@ -4425,14 +4425,14 @@ if(v==0) break; last= v; - j= s->idct_permutation[ ff_zigzag_direct[i] ]; + j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; s->intra_matrix[j]= v; s->chroma_intra_matrix[j]= v; } /* replicate last value */ for(; i<64; i++){ - int j= s->idct_permutation[ ff_zigzag_direct[i] ]; + int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; s->intra_matrix[j]= v; s->chroma_intra_matrix[j]= v; } @@ -4447,14 +4447,14 @@ if(v==0) break; last= v; - j= s->idct_permutation[ ff_zigzag_direct[i] ]; + j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; s->inter_matrix[j]= v; s->chroma_inter_matrix[j]= v; } /* replicate last value */ for(; i<64; i++){ - int j= s->idct_permutation[ ff_zigzag_direct[i] ]; + int j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; s->inter_matrix[j]= last; s->chroma_inter_matrix[j]= last; } diff -r 03df246fb06b -r f59c3f66363b i386/dsputil_mmx.c --- a/i386/dsputil_mmx.c Sat Mar 01 00:16:00 2003 +0000 +++ b/i386/dsputil_mmx.c Mon Mar 03 14:54:00 2003 +0000 @@ -20,6 +20,7 @@ */ #include "../dsputil.h" +#include "../simple_idct.h" int mm_flags; /* multimedia extension flags */ @@ -1408,8 +1409,35 @@ c->put_ ## postfix1 = put_ ## postfix2;\ c->put_no_rnd_ ## postfix1 = put_no_rnd_ ## postfix2;\ c->avg_ ## postfix1 = avg_ ## postfix2; + +/* external functions, from idct_mmx.c */ +void ff_mmx_idct(DCTELEM *block); +void ff_mmxext_idct(DCTELEM *block); + +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmx_idct (block); + put_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmx_idct (block); + add_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmxext_idct (block); + put_pixels_clamped_mmx(block, dest, line_size); +} +static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block) +{ + ff_mmxext_idct (block); + add_pixels_clamped_mmx(block, dest, line_size); +} -void dsputil_init_mmx(DSPContext* c, unsigned mask) +void dsputil_init_mmx(DSPContext* c, AVCodecContext *avctx) { mm_flags = mm_support(); #if 0 @@ -1428,6 +1456,27 @@ #endif if (mm_flags & MM_MMX) { + const int dct_algo = avctx->dct_algo; + const int idct_algo= avctx->idct_algo; + + if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX) + c->fdct = ff_fdct_mmx; + + if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ + c->idct_put= ff_simple_idct_put_mmx; + c->idct_add= ff_simple_idct_add_mmx; + c->idct_permutation_type= FF_SIMPLE_IDCT_PERM; + }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ + if(mm_flags & MM_MMXEXT){ + c->idct_put= ff_libmpeg2mmx2_idct_put; + c->idct_add= ff_libmpeg2mmx2_idct_add; + }else{ + c->idct_put= ff_libmpeg2mmx_idct_put; + c->idct_add= ff_libmpeg2mmx_idct_add; + } + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + } + c->get_pixels = get_pixels_mmx; c->diff_pixels = diff_pixels_mmx; c->put_pixels_clamped = put_pixels_clamped_mmx; @@ -1487,23 +1536,26 @@ if (mm_flags & MM_MMXEXT) { c->put_pixels_tab[0][1] = put_pixels16_x2_mmx2; c->put_pixels_tab[0][2] = put_pixels16_y2_mmx2; - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; c->avg_pixels_tab[0][0] = avg_pixels16_mmx2; c->avg_pixels_tab[0][1] = avg_pixels16_x2_mmx2; c->avg_pixels_tab[0][2] = avg_pixels16_y2_mmx2; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; c->put_pixels_tab[1][1] = put_pixels8_x2_mmx2; c->put_pixels_tab[1][2] = put_pixels8_y2_mmx2; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; c->avg_pixels_tab[1][0] = avg_pixels8_mmx2; c->avg_pixels_tab[1][1] = avg_pixels8_x2_mmx2; c->avg_pixels_tab[1][2] = avg_pixels8_y2_mmx2; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; + + if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx2; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx2; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx2; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx2; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx2; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx2; + } #if 1 SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_mmx2) @@ -1542,23 +1594,26 @@ } else if (mm_flags & MM_3DNOW) { c->put_pixels_tab[0][1] = put_pixels16_x2_3dnow; c->put_pixels_tab[0][2] = put_pixels16_y2_3dnow; - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; c->avg_pixels_tab[0][0] = avg_pixels16_3dnow; c->avg_pixels_tab[0][1] = avg_pixels16_x2_3dnow; c->avg_pixels_tab[0][2] = avg_pixels16_y2_3dnow; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; c->put_pixels_tab[1][1] = put_pixels8_x2_3dnow; c->put_pixels_tab[1][2] = put_pixels8_y2_3dnow; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; c->avg_pixels_tab[1][0] = avg_pixels8_3dnow; c->avg_pixels_tab[1][1] = avg_pixels8_x2_3dnow; c->avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; + + if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_3dnow; + c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_3dnow; + c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_3dnow; + c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_3dnow; + c->avg_pixels_tab[0][3] = avg_pixels16_xy2_3dnow; + c->avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; + } SET_QPEL_FUNC(qpel_pixels_tab[0][ 0], qpel16_mc00_3dnow) SET_QPEL_FUNC(qpel_pixels_tab[0][ 1], qpel16_mc10_3dnow) @@ -1594,7 +1649,8 @@ SET_QPEL_FUNC(qpel_pixels_tab[1][15], qpel8_mc33_3dnow) } } - dsputil_init_pix_mmx(c, mask); + + dsputil_init_pix_mmx(c, avctx); #if 0 // for speed testing get_pixels = just_return; @@ -1630,20 +1686,3 @@ //ff_idct = just_return; #endif } - -/* remove any non bit exact operation (testing purpose). NOTE that - this function should be kept as small as possible because it is - always difficult to test automatically non bit exact cases. */ -void dsputil_set_bit_exact_mmx(DSPContext* c, unsigned mask) -{ - if (mm_flags & MM_MMX) { - /* MMX2 & 3DNOW */ - c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_mmx; - c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_mmx; - c->avg_pixels_tab[0][3] = avg_pixels16_xy2_mmx; - c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_mmx; - c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_mmx; - c->avg_pixels_tab[1][3] = avg_pixels8_xy2_mmx; - } - dsputil_set_bit_exact_pix_mmx(c, mask); -} diff -r 03df246fb06b -r f59c3f66363b i386/motion_est_mmx.c --- a/i386/motion_est_mmx.c Sat Mar 01 00:16:00 2003 +0000 +++ b/i386/motion_est_mmx.c Mon Mar 03 14:54:00 2003 +0000 @@ -386,7 +386,7 @@ PIX_SAD(mmx) PIX_SAD(mmx2) -void dsputil_init_pix_mmx(DSPContext* c, unsigned mask) +void dsputil_init_pix_mmx(DSPContext* c, AVCodecContext *avctx) { if (mm_flags & MM_MMX) { c->pix_abs16x16 = pix_abs16x16_mmx; @@ -403,27 +403,18 @@ } if (mm_flags & MM_MMXEXT) { c->pix_abs16x16 = pix_abs16x16_mmx2; - c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2; c->pix_abs8x8 = pix_abs8x8_mmx2; - c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; - c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; - c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2; c->sad[0]= sad16x16_mmx2; c->sad[1]= sad8x8_mmx2; + + if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ + c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx2; + c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx2; + c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx2; + c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx2; + c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx2; + c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx2; + } } } - -void dsputil_set_bit_exact_pix_mmx(DSPContext* c, unsigned mask) -{ - if (mm_flags & MM_MMXEXT) { - c->pix_abs16x16_x2 = pix_abs16x16_x2_mmx; - c->pix_abs16x16_y2 = pix_abs16x16_y2_mmx; - c->pix_abs16x16_xy2 = pix_abs16x16_xy2_mmx; - c->pix_abs8x8_x2 = pix_abs8x8_x2_mmx; - c->pix_abs8x8_y2 = pix_abs8x8_y2_mmx; - c->pix_abs8x8_xy2 = pix_abs8x8_xy2_mmx; - } -} diff -r 03df246fb06b -r f59c3f66363b i386/mpegvideo_mmx.c --- a/i386/mpegvideo_mmx.c Sat Mar 01 00:16:00 2003 +0000 +++ b/i386/mpegvideo_mmx.c Mon Mar 03 14:54:00 2003 +0000 @@ -23,7 +23,6 @@ #include "../dsputil.h" #include "../mpegvideo.h" #include "../avcodec.h" -#include "../simple_idct.h" extern uint8_t zigzag_direct_noperm[64]; extern uint16_t inv_zigzag_direct16[64]; @@ -499,38 +498,10 @@ #define RENAME(a) a ## _MMX2 #include "mpegvideo_mmx_template.c" -/* external functions, from idct_mmx.c */ -void ff_mmx_idct(DCTELEM *block); -void ff_mmxext_idct(DCTELEM *block); - -/* XXX: those functions should be suppressed ASAP when all IDCTs are - converted */ -static void ff_libmpeg2mmx_idct_put(uint8_t *dest, int line_size, DCTELEM *block) -{ - ff_mmx_idct (block); - put_pixels_clamped_mmx(block, dest, line_size); -} -static void ff_libmpeg2mmx_idct_add(uint8_t *dest, int line_size, DCTELEM *block) -{ - ff_mmx_idct (block); - add_pixels_clamped_mmx(block, dest, line_size); -} -static void ff_libmpeg2mmx2_idct_put(uint8_t *dest, int line_size, DCTELEM *block) -{ - ff_mmxext_idct (block); - put_pixels_clamped_mmx(block, dest, line_size); -} -static void ff_libmpeg2mmx2_idct_add(uint8_t *dest, int line_size, DCTELEM *block) -{ - ff_mmxext_idct (block); - add_pixels_clamped_mmx(block, dest, line_size); -} - void MPV_common_init_mmx(MpegEncContext *s) { if (mm_flags & MM_MMX) { const int dct_algo = s->avctx->dct_algo; - const int idct_algo= s->avctx->idct_algo; s->dct_unquantize_h263 = dct_unquantize_h263_mmx; s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx; @@ -539,28 +510,11 @@ draw_edges = draw_edges_mmx; if(dct_algo==FF_DCT_AUTO || dct_algo==FF_DCT_MMX){ - s->fdct = ff_fdct_mmx; - if(mm_flags & MM_MMXEXT){ s->dct_quantize= dct_quantize_MMX2; } else { s->dct_quantize= dct_quantize_MMX; } } - - if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ - s->idct_put= ff_simple_idct_put_mmx; - s->idct_add= ff_simple_idct_add_mmx; - s->idct_permutation_type= FF_SIMPLE_IDCT_PERM; - }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ - if(mm_flags & MM_MMXEXT){ - s->idct_put= ff_libmpeg2mmx2_idct_put; - s->idct_add= ff_libmpeg2mmx2_idct_add; - }else{ - s->idct_put= ff_libmpeg2mmx_idct_put; - s->idct_add= ff_libmpeg2mmx_idct_add; - } - s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; - } } } diff -r 03df246fb06b -r f59c3f66363b i386/mpegvideo_mmx_template.c --- a/i386/mpegvideo_mmx_template.c Sat Mar 01 00:16:00 2003 +0000 +++ b/i386/mpegvideo_mmx_template.c Mon Mar 03 14:54:00 2003 +0000 @@ -191,7 +191,7 @@ if(s->mb_intra) block[0]= level; else block[0]= temp_block[0]; - if(s->idct_permutation[1]==8){ + if(s->dsp.idct_permutation_type == FF_SIMPLE_IDCT_PERM){ if(last_non_zero_p1 <= 1) goto end; block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08]; block[0x20] = temp_block[0x10]; @@ -235,7 +235,7 @@ block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36]; block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37]; block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; - }else if(s->idct_permutation[1]==4){ + }else if(s->dsp.idct_permutation_type == FF_LIBMPEG2_IDCT_PERM){ if(last_non_zero_p1 <= 1) goto end; block[0x04] = temp_block[0x01]; block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; diff -r 03df246fb06b -r f59c3f66363b mjpeg.c --- a/mjpeg.c Sat Mar 01 00:16:00 2003 +0000 +++ b/mjpeg.c Mon Mar 03 14:54:00 2003 +0000 @@ -386,7 +386,7 @@ } /* comment */ - if(!ff_bit_exact){ + if(!(s->flags & CODEC_FLAG_BITEXACT)){ put_marker(p, COM); flush_put_bits(p); ptr = pbBufPtr(p); @@ -703,7 +703,7 @@ s->avctx = avctx; - /* ugly way to get the idct & scantable */ + /* ugly way to get the idct & scantable FIXME */ memset(&s2, 0, sizeof(MpegEncContext)); s2.flags= avctx->flags; s2.avctx= avctx; @@ -713,7 +713,7 @@ if (MPV_common_init(&s2) < 0) return -1; s->scantable= s2.intra_scantable; - s->idct_put= s2.idct_put; + s->idct_put= s2.dsp.idct_put; MPV_common_end(&s2); s->mpeg_enc_ctx_allocated = 0; diff -r 03df246fb06b -r f59c3f66363b mpeg12.c --- a/mpeg12.c Sat Mar 01 00:16:00 2003 +0000 +++ b/mpeg12.c Mon Mar 03 14:54:00 2003 +0000 @@ -1653,7 +1653,7 @@ if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j= s->idct_permutation[ ff_zigzag_direct[i] ]; + j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; s->intra_matrix[j] = v; s->chroma_intra_matrix[j] = v; } @@ -1661,7 +1661,7 @@ if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j= s->idct_permutation[ ff_zigzag_direct[i] ]; + j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; s->inter_matrix[j] = v; s->chroma_inter_matrix[j] = v; } @@ -1669,14 +1669,14 @@ if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j= s->idct_permutation[ ff_zigzag_direct[i] ]; + j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; s->chroma_intra_matrix[j] = v; } } if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j= s->idct_permutation[ ff_zigzag_direct[i] ]; + j= s->dsp.idct_permutation[ ff_zigzag_direct[i] ]; s->chroma_inter_matrix[j] = v; } } @@ -1985,7 +1985,7 @@ #endif } else { for(i=0;i<64;i++) { - int j= s->idct_permutation[i]; + int j= s->dsp.idct_permutation[i]; v = ff_mpeg1_default_intra_matrix[i]; s->intra_matrix[j] = v; s->chroma_intra_matrix[j] = v; @@ -2006,7 +2006,7 @@ #endif } else { for(i=0;i<64;i++) { - int j= s->idct_permutation[i]; + int j= s->dsp.idct_permutation[i]; v = ff_mpeg1_default_non_intra_matrix[i]; s->inter_matrix[j] = v; s->chroma_inter_matrix[j] = v; diff -r 03df246fb06b -r f59c3f66363b mpegvideo.c --- a/mpegvideo.c Sat Mar 01 00:16:00 2003 +0000 +++ b/mpegvideo.c Mon Mar 03 14:54:00 2003 +0000 @@ -24,7 +24,6 @@ #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" -#include "simple_idct.h" #ifdef USE_FASTMEMCPY #include "fastmemcpy.h" @@ -72,18 +71,6 @@ 4520 , 6270, 5906, 5315, 4520, 3552, 2446, 1247 }; -/* Input permutation for the simple_idct_mmx */ -static const uint8_t simple_mmx_permutation[64]={ - 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, - 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, - 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, - 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, - 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, - 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, - 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, - 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, -}; - static const uint8_t h263_chroma_roundtab[16] = { // 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, @@ -102,9 +89,9 @@ for(qscale=qmin; qscale<=qmax; qscale++){ int i; - if (s->fdct == ff_jpeg_fdct_islow) { + if (s->dsp.fdct == ff_jpeg_fdct_islow) { for(i=0;i<64;i++) { - const int j= s->idct_permutation[i]; + const int j= s->dsp.idct_permutation[i]; /* 16 <= qscale * quant_matrix[i] <= 7905 */ /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ @@ -113,9 +100,9 @@ qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j])); } - } else if (s->fdct == fdct_ifast) { + } else if (s->dsp.fdct == fdct_ifast) { for(i=0;i<64;i++) { - const int j= s->idct_permutation[i]; + const int j= s->dsp.idct_permutation[i]; /* 16 <= qscale * quant_matrix[i] <= 7905 */ /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ @@ -126,7 +113,7 @@ } } else { for(i=0;i<64;i++) { - const int j= s->idct_permutation[i]; + const int j= s->dsp.idct_permutation[i]; /* We can safely suppose that 16 <= quant_matrix[i] <= 255 So 16 <= qscale * quant_matrix[i] <= 7905 so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 @@ -163,7 +150,7 @@ for(i=0; i<64; i++){ int j; j = src_scantable[i]; - st->permutated[i] = s->idct_permutation[j]; + st->permutated[i] = s->dsp.idct_permutation[j]; #ifdef ARCH_POWERPC st->inverse[j] = i; #endif @@ -178,51 +165,16 @@ } } -/* XXX: those functions should be suppressed ASAP when all IDCTs are - converted */ -// *FIXME* this is ugly hack using local static -static void (*ff_put_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); -static void (*ff_add_pixels_clamped)(const DCTELEM *block, uint8_t *pixels, int line_size); -static void ff_jref_idct_put(uint8_t *dest, int line_size, DCTELEM *block) -{ - j_rev_dct (block); - ff_put_pixels_clamped(block, dest, line_size); -} -static void ff_jref_idct_add(uint8_t *dest, int line_size, DCTELEM *block) -{ - j_rev_dct (block); - ff_add_pixels_clamped(block, dest, line_size); -} - /* init common dct for both encoder and decoder */ int DCT_common_init(MpegEncContext *s) { - int i; - - ff_put_pixels_clamped = s->dsp.put_pixels_clamped; - ff_add_pixels_clamped = s->dsp.add_pixels_clamped; - s->dct_unquantize_h263 = dct_unquantize_h263_c; s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c; s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c; + #ifdef CONFIG_ENCODERS s->dct_quantize= dct_quantize_c; - - if(s->avctx->dct_algo==FF_DCT_FASTINT) - s->fdct = fdct_ifast; - else - s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default -#endif //CONFIG_ENCODERS - - if(s->avctx->idct_algo==FF_IDCT_INT){ - s->idct_put= ff_jref_idct_put; - s->idct_add= ff_jref_idct_add; - s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; - }else{ //accurate/default - s->idct_put= simple_idct_put; - s->idct_add= simple_idct_add; - s->idct_permutation_type= FF_NO_IDCT_PERM; - } +#endif #ifdef HAVE_MMX MPV_common_init_mmx(s); @@ -252,29 +204,6 @@ #endif //CONFIG_ENCODERS - switch(s->idct_permutation_type){ - case FF_NO_IDCT_PERM: - for(i=0; i<64; i++) - s->idct_permutation[i]= i; - break; - case FF_LIBMPEG2_IDCT_PERM: - for(i=0; i<64; i++) - s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); - break; - case FF_SIMPLE_IDCT_PERM: - for(i=0; i<64; i++) - s->idct_permutation[i]= simple_mmx_permutation[i]; - break; - case FF_TRANSPOSE_IDCT_PERM: - for(i=0; i<64; i++) - s->idct_permutation[i]= ((i&7)<<3) | (i>>3); - break; - default: - fprintf(stderr, "Internal error, IDCT permutation not set\n"); - return -1; - } - - /* load & permutate scantables note: only wmv uses differnt ones */ @@ -384,7 +313,7 @@ { int y_size, c_size, yc_size, i; - dsputil_init(&s->dsp, s->avctx->dsp_mask); + dsputil_init(&s->dsp, s->avctx); DCT_common_init(s); s->flags= s->avctx->flags; @@ -768,7 +697,7 @@ /* init default q matrix */ for(i=0;i<64;i++) { - int j= s->idct_permutation[i]; + int j= s->dsp.idct_permutation[i]; #ifdef CONFIG_RISKY if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){ s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i]; @@ -1938,7 +1867,7 @@ DCTELEM *block, int i, uint8_t *dest, int line_size) { s->dct_unquantize(s, block, i, s->qscale); - s->idct_put (dest, line_size, block); + s->dsp.idct_put (dest, line_size, block); } /* add block[] to dest[] */ @@ -1946,7 +1875,7 @@ DCTELEM *block, int i, uint8_t *dest, int line_size) { if (s->block_last_index[i] >= 0) { - s->idct_add (dest, line_size, block); + s->dsp.idct_add (dest, line_size, block); } } @@ -1956,7 +1885,7 @@ if (s->block_last_index[i] >= 0) { s->dct_unquantize(s, block, i, s->qscale); - s->idct_add (dest, line_size, block); + s->dsp.idct_add (dest, line_size, block); } } @@ -2193,14 +2122,14 @@ put_dct(s, block[5], 5, dest_cr, s->uvlinesize); } }else{ - s->idct_put(dest_y , dct_linesize, block[0]); - s->idct_put(dest_y + 8, dct_linesize, block[1]); - s->idct_put(dest_y + dct_offset , dct_linesize, block[2]); - s->idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]); + s->dsp.idct_put(dest_y , dct_linesize, block[0]); + s->dsp.idct_put(dest_y + 8, dct_linesize, block[1]); + s->dsp.idct_put(dest_y + dct_offset , dct_linesize, block[2]); + s->dsp.idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]); if(!(s->flags&CODEC_FLAG_GRAY)){ - s->idct_put(dest_cb, s->uvlinesize, block[4]); - s->idct_put(dest_cr, s->uvlinesize, block[5]); + s->dsp.idct_put(dest_cb, s->uvlinesize, block[4]); + s->dsp.idct_put(dest_cr, s->uvlinesize, block[5]); } } } @@ -3040,7 +2969,7 @@ /* for mjpeg, we do include qscale in the matrix */ s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0]; for(i=1;i<64;i++){ - int j= s->idct_permutation[i]; + int j= s->dsp.idct_permutation[i]; s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); } @@ -3549,7 +3478,7 @@ int score_limit=0; int left_limit= 0; - s->fdct (block); + s->dsp.fdct (block); qmul= qscale*16; qadd= ((qscale-1)|1)*8; @@ -3648,7 +3577,7 @@ unquant_coeff= level*qmul - qadd; } }else{ //MPEG1 - j= s->idct_permutation[ scantable[i + start_i] ]; //FIXME optimize + j= s->dsp.idct_permutation[ scantable[i + start_i] ]; //FIXME optimize if(s->mb_intra){ if (level < 0) { unquant_coeff = (int)((-level) * qscale * s->intra_matrix[j]) >> 3; @@ -3760,11 +3689,11 @@ i= last_i; assert(last_level); //FIXME use permutated scantable - block[ s->idct_permutation[ scantable[last_non_zero] ] ]= last_level; + block[ s->dsp.idct_permutation[ scantable[last_non_zero] ] ]= last_level; i -= last_run + 1; for(;i>0 ; i -= run_tab[i] + 1){ - const int j= s->idct_permutation[ scantable[i - 1 + start_i] ]; + const int j= s->dsp.idct_permutation[ scantable[i - 1 + start_i] ]; block[j]= level_tab[i]; assert(block[j]); @@ -3784,7 +3713,7 @@ int max=0; unsigned int threshold1, threshold2; - s->fdct (block); + s->dsp.fdct (block); if (s->mb_intra) { if (!s->h263_aic) { @@ -3836,8 +3765,8 @@ *overflow= s->max_qcoeff < max; //overflow might have happend /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ - if (s->idct_permutation_type != FF_NO_IDCT_PERM) - ff_block_permute(block, s->idct_permutation, scantable, last_non_zero); + if (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM) + ff_block_permute(block, s->dsp.idct_permutation, scantable, last_non_zero); return last_non_zero; } diff -r 03df246fb06b -r f59c3f66363b mpegvideo.h --- a/mpegvideo.h Sat Mar 01 00:16:00 2003 +0000 +++ b/mpegvideo.h Mon Mar 03 14:54:00 2003 +0000 @@ -346,12 +346,6 @@ ScanTable intra_h_scantable; ScanTable intra_v_scantable; ScanTable inter_scantable; // if inter == intra then intra should be used to reduce tha cache usage - uint8_t idct_permutation[64]; - int idct_permutation_type; -#define FF_NO_IDCT_PERM 1 -#define FF_LIBMPEG2_IDCT_PERM 2 -#define FF_SIMPLE_IDCT_PERM 3 -#define FF_TRANSPOSE_IDCT_PERM 4 void *opaque; /* private data for the user */ @@ -562,10 +556,6 @@ DCTELEM *block/*align 16*/, int n, int qscale); int (*dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow); int (*fast_dct_quantize)(struct MpegEncContext *s, DCTELEM *block/*align 16*/, int n, int qscale, int *overflow); - void (*fdct)(DCTELEM *block/* align 16*/); - void (*idct_put)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); - void (*idct_add)(uint8_t *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); - //FIXME move above funcs into dspContext perhaps } MpegEncContext; @@ -610,8 +600,6 @@ extern enum PixelFormat ff_yuv420p_list[2]; -extern int ff_bit_exact; - static inline void ff_init_block_index(MpegEncContext *s){ s->block_index[0]= s->block_wrap[0]*(s->mb_y*2 + 1) - 1 + s->mb_x*2; s->block_index[1]= s->block_wrap[0]*(s->mb_y*2 + 1) + s->mb_x*2; diff -r 03df246fb06b -r f59c3f66363b ppc/dsputil_ppc.c --- a/ppc/dsputil_ppc.c Sat Mar 01 00:16:00 2003 +0000 +++ b/ppc/dsputil_ppc.c Mon Mar 03 14:54:00 2003 +0000 @@ -25,6 +25,9 @@ #include "dsputil_altivec.h" #endif +extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block); +extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block); + int mm_flags = 0; int mm_support(void) @@ -169,7 +172,7 @@ return count; } -void dsputil_init_ppc(DSPContext* c, unsigned mask) +void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx) { // Common optimisations whether Altivec or not @@ -215,6 +218,18 @@ c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec; c->gmc1 = gmc1_altivec; + + if ((avctx->idct_algo == FF_IDCT_AUTO) || + (avctx->idct_algo == FF_IDCT_ALTIVEC)) + { + c->idct_put = idct_put_altivec; + c->idct_add = idct_add_altivec; +#ifndef ALTIVEC_USE_REFERENCE_C_CODE + c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM; +#else /* ALTIVEC_USE_REFERENCE_C_CODE */ + c->idct_permutation_type = FF_NO_IDCT_PERM; +#endif /* ALTIVEC_USE_REFERENCE_C_CODE */ + } #ifdef POWERPC_TBL_PERFORMANCE_REPORT { diff -r 03df246fb06b -r f59c3f66363b ppc/mpegvideo_altivec.c --- a/ppc/mpegvideo_altivec.c Sat Mar 01 00:16:00 2003 +0000 +++ b/ppc/mpegvideo_altivec.c Mon Mar 03 14:54:00 2003 +0000 @@ -468,7 +468,7 @@ // and handle it using the vector unit if we can. This is the permute used // by the altivec idct, so it is common when using the altivec dct. - if ((lastNonZero > 0) && (s->idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) + if ((lastNonZero > 0) && (s->dsp.idct_permutation_type == FF_TRANSPOSE_IDCT_PERM)) { TRANSPOSE8(data0, data1, data2, data3, data4, data5, data6, data7); } @@ -501,10 +501,10 @@ // We handled the tranpose permutation above and we don't // need to permute the "no" permutation case. if ((lastNonZero > 0) && - (s->idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) && - (s->idct_permutation_type != FF_NO_IDCT_PERM)) + (s->dsp.idct_permutation_type != FF_TRANSPOSE_IDCT_PERM) && + (s->dsp.idct_permutation_type != FF_NO_IDCT_PERM)) { - ff_block_permute(data, s->idct_permutation, + ff_block_permute(data, s->idsp.dct_permutation, s->intra_scantable.scantable, lastNonZero); } diff -r 03df246fb06b -r f59c3f66363b ps2/dsputil_mmi.c --- a/ps2/dsputil_mmi.c Sat Mar 01 00:16:00 2003 +0000 +++ b/ps2/dsputil_mmi.c Mon Mar 03 14:54:00 2003 +0000 @@ -22,6 +22,9 @@ #include "../dsputil.h" #include "mmi.h" +void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block); +void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block); + static void clear_blocks_mmi(DCTELEM * blocks) { @@ -103,8 +106,10 @@ } -void dsputil_init_mmi(DSPContext* c, unsigned mask) +void dsputil_init_mmi(DSPContext* c, AVCodecContext *avctx) { + const int idct_algo= avctx->idct_algo; + c->clear_blocks = clear_blocks_mmi; c->put_pixels_tab[1][0] = put_pixels8_mmi; @@ -114,5 +119,11 @@ c->put_no_rnd_pixels_tab[0][0] = put_pixels16_mmi; c->get_pixels = get_pixels_mmi; + + if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){ + c->idct_put= ff_mmi_idct_put; + c->idct_add= ff_mmi_idct_add; + c->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; + } } diff -r 03df246fb06b -r f59c3f66363b ps2/mpegvideo_mmi.c --- a/ps2/mpegvideo_mmi.c Sat Mar 01 00:16:00 2003 +0000 +++ b/ps2/mpegvideo_mmi.c Mon Mar 03 14:54:00 2003 +0000 @@ -22,10 +22,6 @@ #include "../mpegvideo.h" #include "../avcodec.h" -void ff_mmi_idct_put(uint8_t *dest, int line_size, DCTELEM *block); -void ff_mmi_idct_add(uint8_t *dest, int line_size, DCTELEM *block); - - static void dct_unquantize_h263_mmi(MpegEncContext *s, DCTELEM *block, int n, int qscale) { @@ -84,14 +80,6 @@ void MPV_common_init_mmi(MpegEncContext *s) { - int i; - const int idct_algo= s->avctx->idct_algo; - - if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_PS2){ - s->idct_put= ff_mmi_idct_put; - s->idct_add= ff_mmi_idct_add; - s->idct_permutation_type= FF_LIBMPEG2_IDCT_PERM; - } s->dct_unquantize_h263 = dct_unquantize_h263_mmi; } diff -r 03df246fb06b -r f59c3f66363b wmv2.c --- a/wmv2.c Sat Mar 01 00:16:00 2003 +0000 +++ b/wmv2.c Mon Mar 03 14:54:00 2003 +0000 @@ -328,7 +328,7 @@ code = get_bits(&gb, 3); if(code==0) return -1; - + s->slice_height = s->mb_height / code; if(s->avctx->debug&FF_DEBUG_PICT_INFO){ @@ -582,7 +582,7 @@ switch(w->abt_type_table[n]){ case 0: if (s->block_last_index[n] >= 0) { - s->idct_add (dst, stride, block1); + s->dsp.idct_add (dst, stride, block1); } break; case 1: