# HG changeset patch # User michaelni # Date 1033339462 0 # Node ID e65798d228ea583f520cdb5520707303246abe3a # Parent 107a56aa74f594a45daa1d1b885e0aec4222a1af idct permutation cleanup, idct can be selected per context now fixing some threadunsafe code diff -r 107a56aa74f5 -r e65798d228ea avcodec.h --- a/avcodec.h Sun Sep 29 15:14:28 2002 +0000 +++ b/avcodec.h Sun Sep 29 22:44:22 2002 +0000 @@ -5,8 +5,8 @@ #define LIBAVCODEC_VERSION_INT 0x000406 #define LIBAVCODEC_VERSION "0.4.6" -#define LIBAVCODEC_BUILD 4628 -#define LIBAVCODEC_BUILD_STR "4628" +#define LIBAVCODEC_BUILD 4629 +#define LIBAVCODEC_BUILD_STR "4629" enum CodecID { CODEC_ID_NONE, @@ -684,6 +684,21 @@ */ int fourcc; + /** + * idct algorithm, see FF_IDCT_* below + * encoding: set by user + * decoding: set by user + */ + int idct_algo; +#define FF_IDCT_AUTO 0 +#define FF_IDCT_INT 1 +#define FF_IDCT_SIMPLE 2 +#define FF_IDCT_SIMPLEMMX 3 +#define FF_IDCT_LIBMPEG2MMX 4 +#define FF_IDCT_PS2 5 +#define FF_IDCT_MLIB 6 +#define FF_IDCT_ARM 7 + //FIXME this should be reordered after kabis API is finished ... //TODO kill kabi /* diff -r 107a56aa74f5 -r e65798d228ea common.h --- a/common.h Sun Sep 29 15:14:28 2002 +0000 +++ b/common.h Sun Sep 29 22:44:22 2002 +0000 @@ -932,6 +932,22 @@ return (s[0]) + (s[1]<<8) + (s[2]<<16) + (s[3]<<24); } + +#ifdef ARCH_X86 +#define MASK_ABS(mask, level)\ + asm volatile(\ + "cdq \n\t"\ + "xorl %1, %0 \n\t"\ + "subl %1, %0 \n\t"\ + : "+a" (level), "=&d" (mask)\ + ); +#else +#define MASK_ABS(mask, level)\ + mask= level>>31;\ + level= (level^mask)-mask; +#endif + + #if __CPU__ >= 686 && !defined(RUNTIME_CPUDETECT) #define COPY3_IF_LT(x,y,a,b,c,d)\ asm volatile (\ diff -r 107a56aa74f5 -r e65798d228ea dsputil.c --- a/dsputil.c Sun Sep 29 15:14:28 2002 +0000 +++ b/dsputil.c Sun Sep 29 22:44:22 2002 +0000 @@ -20,11 +20,7 @@ */ #include "avcodec.h" #include "dsputil.h" -#include "simple_idct.h" -void (*ff_idct)(DCTELEM *block); -void (*ff_idct_put)(UINT8 *dest, int line_size, DCTELEM *block); -void (*ff_idct_add)(UINT8 *dest, int line_size, DCTELEM *block); void (*get_pixels)(DCTELEM *block, const UINT8 *pixels, int line_size); void (*diff_pixels)(DCTELEM *block, const UINT8 *s1, const UINT8 *s2, int stride); void (*put_pixels_clamped)(const DCTELEM *block, UINT8 *pixels, int line_size); @@ -49,16 +45,11 @@ UINT8 cropTbl[256 + 2 * MAX_NEG_CROP]; UINT32 squareTbl[512]; -extern INT16 ff_mpeg1_default_intra_matrix[64]; -extern INT16 ff_mpeg1_default_non_intra_matrix[64]; -extern INT16 ff_mpeg4_default_intra_matrix[64]; -extern INT16 ff_mpeg4_default_non_intra_matrix[64]; - -UINT8 zigzag_direct[64] = { - 0, 1, 8, 16, 9, 2, 3, 10, - 17, 24, 32, 25, 18, 11, 4, 5, +const UINT8 ff_zigzag_direct[64] = { + 0, 1, 8, 16, 9, 2, 3, 10, + 17, 24, 32, 25, 18, 11, 4, 5, 12, 19, 26, 33, 40, 48, 41, 34, - 27, 20, 13, 6, 7, 14, 21, 28, + 27, 20, 13, 6, 7, 14, 21, 28, 35, 42, 49, 56, 57, 50, 43, 36, 29, 22, 15, 23, 30, 37, 44, 51, 58, 59, 52, 45, 38, 31, 39, 46, @@ -68,11 +59,8 @@ /* not permutated inverse zigzag_direct + 1 for MMX quantizer */ UINT16 __align8 inv_zigzag_direct16[64]; -/* not permutated zigzag_direct for MMX quantizer */ -UINT8 zigzag_direct_noperm[64]; - -UINT8 ff_alternate_horizontal_scan[64] = { - 0, 1, 2, 3, 8, 9, 16, 17, +const UINT8 ff_alternate_horizontal_scan[64] = { + 0, 1, 2, 3, 8, 9, 16, 17, 10, 11, 4, 5, 6, 7, 15, 14, 13, 12, 19, 18, 24, 25, 32, 33, 26, 27, 20, 21, 22, 23, 28, 29, @@ -82,8 +70,8 @@ 52, 53, 54, 55, 60, 61, 62, 63, }; -UINT8 ff_alternate_vertical_scan[64] = { - 0, 8, 16, 24, 1, 9, 2, 10, +const UINT8 ff_alternate_vertical_scan[64] = { + 0, 8, 16, 24, 1, 9, 2, 10, 17, 25, 32, 40, 48, 56, 57, 49, 41, 33, 26, 18, 3, 11, 4, 12, 19, 27, 34, 42, 50, 58, 35, 43, @@ -93,21 +81,6 @@ 38, 46, 54, 62, 39, 47, 55, 63, }; -#ifdef SIMPLE_IDCT - -/* Input permutation for the simple_idct_mmx */ -static UINT8 simple_mmx_permutation[64]={ - 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, - 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, - 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, - 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, - 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, - 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, - 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, - 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, -}; -#endif - /* a*inverse[b]>>32 == a/b for all 0<=a<=65536 && 2<=b<=255 */ UINT32 inverse[256]={ 0, 4294967295U,2147483648U,1431655766, 1073741824, 858993460, 715827883, 613566757, @@ -144,24 +117,6 @@ 17318417, 17248865, 17179870, 17111424, 17043522, 16976156, 16909321, 16843010, }; -/* used to skip zeros at the end */ -UINT8 zigzag_end[64]; - -UINT8 permutation[64]; -//UINT8 invPermutation[64]; - -static void build_zigzag_end(void) -{ - int lastIndex; - int lastIndexAfterPerm=0; - for(lastIndex=0; lastIndex<64; lastIndex++) - { - if(zigzag_direct[lastIndex] > lastIndexAfterPerm) - lastIndexAfterPerm= zigzag_direct[lastIndex]; - zigzag_end[lastIndex]= lastIndexAfterPerm + 1; - } -} - int pix_sum_c(UINT8 * pix, int line_size) { int s, i, j; @@ -1540,65 +1495,24 @@ /* permute block according so that it corresponds to the MMX idct order */ -#ifdef SIMPLE_IDCT - /* general permutation, but perhaps slightly slower */ -void block_permute(INT16 *block) +void block_permute(INT16 *block, UINT8 *permutation) { int i; INT16 temp[64]; - for(i=0; i<64; i++) temp[ block_permute_op(i) ] = block[i]; + for(i=0; i<64; i++) temp[ permutation[i] ] = block[i]; for(i=0; i<64; i++) block[i] = temp[i]; } -#else - -void block_permute(INT16 *block) -{ - int tmp1, tmp2, tmp3, tmp4, tmp5, tmp6; - int i; - - for(i=0;i<8;i++) { - tmp1 = block[1]; - tmp2 = block[2]; - tmp3 = block[3]; - tmp4 = block[4]; - tmp5 = block[5]; - tmp6 = block[6]; - block[1] = tmp2; - block[2] = tmp4; - block[3] = tmp6; - block[4] = tmp1; - block[5] = tmp3; - block[6] = tmp5; - block += 8; - } -} -#endif void clear_blocks_c(DCTELEM *blocks) { memset(blocks, 0, sizeof(DCTELEM)*6*64); } -/* XXX: those functions should be suppressed ASAP when all IDCTs are - converted */ -void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block) -{ - ff_idct (block); - put_pixels_clamped(block, dest, line_size); -} - -void gen_idct_add(UINT8 *dest, int line_size, DCTELEM *block) -{ - ff_idct (block); - add_pixels_clamped(block, dest, line_size); -} - void dsputil_init(void) { int i, j; - int use_permuted_idct; for(i=0;i<256;i++) cropTbl[i + MAX_NEG_CROP] = i; for(i=0;i> 1) | ((i & 1) << 2); -#endif - else - for(i=0; i<64; i++) permutation[i]=i; - - for(i=0; i<64; i++) inv_zigzag_direct16[zigzag_direct[i]]= i+1; - for(i=0; i<64; i++) zigzag_direct_noperm[i]= zigzag_direct[i]; - - if (use_permuted_idct) { - /* permute for IDCT */ - for(i=0;i<64;i++) { - j = zigzag_direct[i]; - zigzag_direct[i] = block_permute_op(j); - j = ff_alternate_horizontal_scan[i]; - ff_alternate_horizontal_scan[i] = block_permute_op(j); - j = ff_alternate_vertical_scan[i]; - ff_alternate_vertical_scan[i] = block_permute_op(j); - } - block_permute(ff_mpeg1_default_intra_matrix); - block_permute(ff_mpeg1_default_non_intra_matrix); - block_permute(ff_mpeg4_default_intra_matrix); - block_permute(ff_mpeg4_default_non_intra_matrix); - } - - build_zigzag_end(); + for(i=0; i<64; i++) inv_zigzag_direct16[ff_zigzag_direct[i]]= i+1; } /* remove any non bit exact operation (testing purpose) */ diff -r 107a56aa74f5 -r e65798d228ea dsputil.h --- a/dsputil.h Sun Sep 29 15:14:28 2002 +0000 +++ b/dsputil.h Sun Sep 29 22:44:22 2002 +0000 @@ -34,12 +34,9 @@ void ff_fdct_mmx(DCTELEM *block); /* encoding scans */ -extern UINT8 ff_alternate_horizontal_scan[64]; -extern UINT8 ff_alternate_vertical_scan[64]; -extern UINT8 zigzag_direct[64]; - -/* permutation table */ -extern UINT8 permutation[64]; +extern const UINT8 ff_alternate_horizontal_scan[64]; +extern const UINT8 ff_alternate_vertical_scan[64]; +extern const UINT8 ff_zigzag_direct[64]; /* pixel operations */ #define MAX_NEG_CROP 384 @@ -61,9 +58,6 @@ */ /* pixel ops : interface with DCT */ -extern void (*ff_idct)(DCTELEM *block/*align 16*/); -extern void (*ff_idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); -extern void (*ff_idct_add)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); extern void (*get_pixels)(DCTELEM *block/*align 16*/, const UINT8 *pixels/*align 8*/, int line_size); extern void (*diff_pixels)(DCTELEM *block/*align 16*/, const UINT8 *s1/*align 8*/, const UINT8 *s2/*align 8*/, int stride); extern void (*put_pixels_clamped)(const DCTELEM *block/*align 16*/, UINT8 *pixels/*align 8*/, int line_size); @@ -119,12 +113,7 @@ int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); -static inline int block_permute_op(int j) -{ - return permutation[j]; -} - -void block_permute(INT16 *block); +void block_permute(INT16 *block, UINT8 *permutation); #if defined(HAVE_MMX) diff -r 107a56aa74f5 -r e65798d228ea h263.c --- a/h263.c Sun Sep 29 15:14:28 2002 +0000 +++ b/h263.c Sun Sep 29 22:44:22 2002 +0000 @@ -287,19 +287,19 @@ if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ /* same qscale */ for(i=1; i<8; i++){ - const int level= block[n][block_permute_op(i )]; + const int level= block[n][s->idct_permutation[i ]]; score0+= ABS(level); score1+= ABS(level - ac_val[i+8]); - ac_val1[i ]= block[n][block_permute_op(i<<3)]; + ac_val1[i ]= block[n][s->idct_permutation[i<<3]]; ac_val1[i+8]= level; } }else{ /* different qscale, we must rescale */ for(i=1; i<8; i++){ - const int level= block[n][block_permute_op(i )]; + const int level= block[n][s->idct_permutation[i ]]; score0+= ABS(level); score1+= ABS(level - ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale)); - ac_val1[i ]= block[n][block_permute_op(i<<3)]; + ac_val1[i ]= block[n][s->idct_permutation[i<<3]]; ac_val1[i+8]= level; } } @@ -310,20 +310,20 @@ if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ /* same qscale */ for(i=1; i<8; i++){ - const int level= block[n][block_permute_op(i<<3)]; + const int level= block[n][s->idct_permutation[i<<3]]; score0+= ABS(level); score1+= ABS(level - ac_val[i]); ac_val1[i ]= level; - ac_val1[i+8]= block[n][block_permute_op(i )]; + ac_val1[i+8]= block[n][s->idct_permutation[i ]]; } }else{ /* different qscale, we must rescale */ for(i=1; i<8; i++){ - const int level= block[n][block_permute_op(i<<3)]; + const int level= block[n][s->idct_permutation[i<<3]]; score0+= ABS(level); score1+= ABS(level - ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale)); ac_val1[i ]= level; - ac_val1[i+8]= block[n][block_permute_op(i )]; + ac_val1[i+8]= block[n][s->idct_permutation[i ]]; } } } @@ -519,7 +519,7 @@ /* encode each block */ for (i = 0; i < 6; i++) { - mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, &s->pb); + mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, &s->pb); } if(interleaved_stats){ @@ -637,7 +637,7 @@ /* encode each block */ for (i = 0; i < 6; i++) { - mpeg4_encode_block(s, block[i], i, 0, zigzag_direct, NULL, tex_pb); + mpeg4_encode_block(s, block[i], i, 0, s->intra_scantable.permutated, NULL, tex_pb); } if(interleaved_stats){ @@ -674,8 +674,8 @@ int last_index; mpeg4_inv_pred_ac(s, block[i], i, dir[i]); - if (dir[i]==0) st = ff_alternate_vertical_scan; /* left */ - else st = ff_alternate_horizontal_scan; /* top */ + if (dir[i]==0) st = s->intra_v_scantable.permutated; /* left */ + else st = s->intra_h_scantable.permutated; /* top */ for(last_index=63; last_index>=0; last_index--) //FIXME optimize if(block[i][st[last_index]]) break; @@ -685,7 +685,7 @@ } }else{ for(i=0; i<6; i++) - scan_table[i]= zigzag_direct; + scan_table[i]= s->intra_scantable.permutated; } /* compute cbp */ @@ -746,10 +746,10 @@ if(dir[i]){ for(j=1; j<8; j++) - block[i][block_permute_op(j )]= ac_val[j+8]; + block[i][s->idct_permutation[j ]]= ac_val[j+8]; }else{ for(j=1; j<8; j++) - block[i][block_permute_op(j<<3)]= ac_val[j ]; + block[i][s->idct_permutation[j<<3]]= ac_val[j ]; } s->block_last_index[i]= zigzag_last_index[i]; } @@ -974,7 +974,7 @@ if (a != 1024) { ac_val -= 16; for(i=1;i<8;i++) { - block[block_permute_op(i*8)] += ac_val[i]; + block[s->idct_permutation[i<<3]] += ac_val[i]; } pred_dc = a; } @@ -983,7 +983,7 @@ if (c != 1024) { ac_val -= 16 * wrap; for(i=1;i<8;i++) { - block[block_permute_op(i)] += ac_val[i + 8]; + block[s->idct_permutation[i ]] += ac_val[i + 8]; } pred_dc = c; } @@ -1011,10 +1011,10 @@ /* left copy */ for(i=1;i<8;i++) - ac_val1[i] = block[block_permute_op(i * 8)]; + ac_val1[i ] = block[s->idct_permutation[i<<3]]; /* top copy */ for(i=1;i<8;i++) - ac_val1[8 + i] = block[block_permute_op(i)]; + ac_val1[8 + i] = block[s->idct_permutation[i ]]; } INT16 *h263_pred_motion(MpegEncContext * s, int block, @@ -1425,7 +1425,7 @@ last_index = s->block_last_index[n]; last_non_zero = i - 1; for (; i <= last_index; i++) { - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; level = block[j]; if (level) { run = i - last_non_zero - 1; @@ -1710,12 +1710,12 @@ if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ /* same qscale */ for(i=1;i<8;i++) { - block[block_permute_op(i*8)] += ac_val[i]; + block[s->idct_permutation[i<<3]] += ac_val[i]; } }else{ /* different qscale, we must rescale */ for(i=1;i<8;i++) { - block[block_permute_op(i*8)] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); + block[s->idct_permutation[i<<3]] += ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); } } } else { @@ -1726,23 +1726,23 @@ if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ /* same qscale */ for(i=1;i<8;i++) { - block[block_permute_op(i)] += ac_val[i + 8]; + block[s->idct_permutation[i]] += ac_val[i + 8]; } }else{ /* different qscale, we must rescale */ for(i=1;i<8;i++) { - block[block_permute_op(i)] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); + block[s->idct_permutation[i]] += ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); } } } } /* left copy */ for(i=1;i<8;i++) - ac_val1[i] = block[block_permute_op(i * 8)]; + ac_val1[i ] = block[s->idct_permutation[i<<3]]; /* top copy */ for(i=1;i<8;i++) - ac_val1[8 + i] = block[block_permute_op(i)]; + ac_val1[8 + i] = block[s->idct_permutation[i ]]; } @@ -1762,12 +1762,12 @@ if(s->mb_x==0 || s->qscale == s->qscale_table[xy] || n==1 || n==3){ /* same qscale */ for(i=1;i<8;i++) { - block[block_permute_op(i*8)] -= ac_val[i]; + block[s->idct_permutation[i<<3]] -= ac_val[i]; } }else{ /* different qscale, we must rescale */ for(i=1;i<8;i++) { - block[block_permute_op(i*8)] -= ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); + block[s->idct_permutation[i<<3]] -= ROUNDED_DIV(ac_val[i]*s->qscale_table[xy], s->qscale); } } } else { @@ -1777,12 +1777,12 @@ if(s->mb_y==0 || s->qscale == s->qscale_table[xy] || n==2 || n==3){ /* same qscale */ for(i=1;i<8;i++) { - block[block_permute_op(i)] -= ac_val[i + 8]; + block[s->idct_permutation[i]] -= ac_val[i + 8]; } }else{ /* different qscale, we must rescale */ for(i=1;i<8;i++) { - block[block_permute_op(i)] -= ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); + block[s->idct_permutation[i]] -= ROUNDED_DIV(ac_val[i + 8]*s->qscale_table[xy], s->qscale); } } } @@ -3192,13 +3192,13 @@ static int h263_decode_motion(MpegEncContext * s, int pred, int f_code) { int code, val, sign, shift, l; - code = get_vlc2(&s->gb, mv_vlc.table, MV_VLC_BITS, 2); if (code < 0) return 0xffff; if (code == 0) return pred; + sign = get_bits1(&s->gb); shift = f_code - 1; val = (code - 1) << shift; @@ -3211,7 +3211,7 @@ /* modulo decoding */ if (!s->h263_long_vectors) { - l = (1 << (f_code - 1)) * 32; + l = 1 << (f_code + 4); if (val < -l) { val += l<<1; } else if (val >= l) { @@ -3261,15 +3261,15 @@ RLTable *rl = &rl_inter; const UINT8 *scan_table; - scan_table = zigzag_direct; + scan_table = s->intra_scantable.permutated; if (s->h263_aic && s->mb_intra) { rl = &rl_intra_aic; i = 0; if (s->ac_pred) { if (s->h263_aic_dir) - scan_table = ff_alternate_vertical_scan; /* left */ + scan_table = s->intra_v_scantable.permutated; /* left */ else - scan_table = ff_alternate_horizontal_scan; /* top */ + scan_table = s->intra_h_scantable.permutated; /* top */ } } else if (s->mb_intra) { /* DC coef */ @@ -3417,14 +3417,14 @@ rl = &rl_intra; rl_vlc = rl_intra.rl_vlc[0]; if(s->alternate_scan) - scan_table = ff_alternate_vertical_scan; /* left */ + scan_table = s->intra_v_scantable.permutated; /* left */ else if (s->ac_pred) { if (dc_pred_dir == 0) - scan_table = ff_alternate_vertical_scan; /* left */ + scan_table = s->intra_v_scantable.permutated; /* left */ else - scan_table = ff_alternate_horizontal_scan; /* top */ + scan_table = s->intra_h_scantable.permutated; /* top */ } else { - scan_table = zigzag_direct; + scan_table = s->intra_scantable.permutated; } qmul=1; qadd=0; @@ -3437,9 +3437,9 @@ rl = &rl_inter; if(s->alternate_scan) - scan_table = ff_alternate_vertical_scan; /* left */ + scan_table = s->intra_v_scantable.permutated; /* left */ else - scan_table = zigzag_direct; + scan_table = s->intra_scantable.permutated; if(s->mpeg_quant){ qmul=1; @@ -4081,13 +4081,14 @@ /* load default matrixes */ for(i=0; i<64; i++){ + int j= s->idct_permutation[i]; v= ff_mpeg4_default_intra_matrix[i]; - s->intra_matrix[i]= v; - s->chroma_intra_matrix[i]= v; + s->intra_matrix[j]= v; + s->chroma_intra_matrix[j]= v; v= ff_mpeg4_default_non_intra_matrix[i]; - s->inter_matrix[i]= v; - s->chroma_inter_matrix[i]= v; + s->inter_matrix[j]= v; + s->chroma_inter_matrix[j]= v; } /* load custom intra matrix */ @@ -4096,7 +4097,7 @@ v= get_bits(&s->gb, 8); if(v==0) break; - j= zigzag_direct[i]; + j= s->intra_scantable.permutated[i]; s->intra_matrix[j]= v; s->chroma_intra_matrix[j]= v; } @@ -4108,14 +4109,14 @@ v= get_bits(&s->gb, 8); if(v==0) break; - j= zigzag_direct[i]; + j= s->intra_scantable.permutated[i]; s->inter_matrix[j]= v; s->chroma_inter_matrix[j]= v; } /* replicate last value */ for(; i<64; i++){ - j= zigzag_direct[i]; + j= s->intra_scantable.permutated[i]; s->inter_matrix[j]= v; s->chroma_inter_matrix[j]= v; } diff -r 107a56aa74f5 -r e65798d228ea i386/dsputil_mmx.c --- a/i386/dsputil_mmx.c Sun Sep 29 15:14:28 2002 +0000 +++ b/i386/dsputil_mmx.c Sun Sep 29 22:44:22 2002 +0000 @@ -20,7 +20,6 @@ */ #include "../dsputil.h" -#include "../simple_idct.h" int mm_flags; /* multimedia extension flags */ @@ -44,10 +43,6 @@ int pix_abs8x8_y2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs8x8_xy2_mmx2(UINT8 *blk1, UINT8 *blk2, int lx); -/* external functions, from idct_mmx.c */ -void ff_mmx_idct(DCTELEM *block); -void ff_mmxext_idct(DCTELEM *block); - /* pixel operations */ static const uint64_t mm_bone __attribute__ ((aligned(8))) = 0x0101010101010101ULL; static const uint64_t mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; @@ -588,17 +583,6 @@ avg_pixels_tab[1][2] = avg_pixels8_y2_3dnow; avg_pixels_tab[1][3] = avg_pixels8_xy2_3dnow; } - - /* idct */ - if (mm_flags & MM_MMXEXT) { - ff_idct = ff_mmxext_idct; - } else { - ff_idct = ff_mmx_idct; - } -#ifdef SIMPLE_IDCT -// ff_idct = simple_idct; - ff_idct = simple_idct_mmx; -#endif } #if 0 @@ -637,28 +621,6 @@ #endif } -void gen_idct_put(UINT8 *dest, int line_size, DCTELEM *block); - -/** - * this will send coeff matrixes which would have different results for the 16383 type MMX vs C IDCTs to the C IDCT - */ -void bit_exact_idct_put(UINT8 *dest, int line_size, INT16 *block){ - if( block[0]>1022 && block[1]==0 && block[4 ]==0 && block[5 ]==0 - && block[8]==0 && block[9]==0 && block[12]==0 && block[13]==0){ - int16_t tmp[64]; - int i; - - for(i=0; i<64; i++) - tmp[i]= block[i]; - for(i=0; i<64; i++) - block[i]= tmp[block_permute_op(i)]; - - simple_idct_put(dest, line_size, block); - } - else - gen_idct_put(dest, line_size, block); -} - /* remove any non bit exact operation (testing purpose). NOTE that this function should be kept as small as possible because it is always difficult to test automatically non bit exact cases. */ @@ -682,9 +644,5 @@ pix_abs8x8_y2 = pix_abs8x8_y2_mmx; pix_abs8x8_xy2= pix_abs8x8_xy2_mmx; } -#ifdef SIMPLE_IDCT - if(ff_idct_put==gen_idct_put && ff_idct == simple_idct_mmx) - ff_idct_put= bit_exact_idct_put; -#endif } } diff -r 107a56aa74f5 -r e65798d228ea i386/mpegvideo_mmx.c --- a/i386/mpegvideo_mmx.c Sun Sep 29 15:14:28 2002 +0000 +++ b/i386/mpegvideo_mmx.c Sun Sep 29 22:44:22 2002 +0000 @@ -23,53 +23,24 @@ #include "../dsputil.h" #include "../mpegvideo.h" #include "../avcodec.h" +#include "../simple_idct.h" -extern UINT8 zigzag_end[64]; +/* Input permutation for the simple_idct_mmx */ +static UINT8 simple_mmx_permutation[64]={ + 0x00, 0x08, 0x04, 0x09, 0x01, 0x0C, 0x05, 0x0D, + 0x10, 0x18, 0x14, 0x19, 0x11, 0x1C, 0x15, 0x1D, + 0x20, 0x28, 0x24, 0x29, 0x21, 0x2C, 0x25, 0x2D, + 0x12, 0x1A, 0x16, 0x1B, 0x13, 0x1E, 0x17, 0x1F, + 0x02, 0x0A, 0x06, 0x0B, 0x03, 0x0E, 0x07, 0x0F, + 0x30, 0x38, 0x34, 0x39, 0x31, 0x3C, 0x35, 0x3D, + 0x22, 0x2A, 0x26, 0x2B, 0x23, 0x2E, 0x27, 0x2F, + 0x32, 0x3A, 0x36, 0x3B, 0x33, 0x3E, 0x37, 0x3F, +}; extern UINT8 zigzag_direct_noperm[64]; extern UINT16 inv_zigzag_direct16[64]; extern UINT32 inverse[256]; -#if 0 - -/* XXX: GL: I don't understand why this function needs optimization - (it is called only once per frame!), so I disabled it */ - -void MPV_frame_start(MpegEncContext *s) -{ - if (s->pict_type == B_TYPE) { - __asm __volatile( - "movl (%1), %%eax\n\t" - "movl 4(%1), %%edx\n\t" - "movl 8(%1), %%ecx\n\t" - "movl %%eax, (%0)\n\t" - "movl %%edx, 4(%0)\n\t" - "movl %%ecx, 8(%0)\n\t" - : - :"r"(s->current_picture), "r"(s->aux_picture) - :"eax","edx","ecx","memory"); - } else { - /* swap next and last */ - __asm __volatile( - "movl (%1), %%eax\n\t" - "movl 4(%1), %%edx\n\t" - "movl 8(%1), %%ecx\n\t" - "xchgl (%0), %%eax\n\t" - "xchgl 4(%0), %%edx\n\t" - "xchgl 8(%0), %%ecx\n\t" - "movl %%eax, (%1)\n\t" - "movl %%edx, 4(%1)\n\t" - "movl %%ecx, 8(%1)\n\t" - "movl %%eax, (%2)\n\t" - "movl %%edx, 4(%2)\n\t" - "movl %%ecx, 8(%2)\n\t" - : - :"r"(s->last_picture), "r"(s->next_picture), "r"(s->current_picture) - :"eax","edx","ecx","memory"); - } -} -#endif - static const unsigned long long int mm_wabs __attribute__ ((aligned(8))) = 0xffffffffffffffffULL; static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; @@ -77,36 +48,26 @@ static void dct_unquantize_h263_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { - int i, level, qmul, qadd, nCoeffs; - - qmul = s->qscale << 1; - if (s->h263_aic && s->mb_intra) - qadd = 0; - else - qadd = (s->qscale - 1) | 1; + int level, qmul, qadd, nCoeffs; + qmul = qscale << 1; + qadd = (qscale - 1) | 1; + + assert(s->block_last_index[n]>=0); + if (s->mb_intra) { if (!s->h263_aic) { if (n < 4) - block[0] = block[0] * s->y_dc_scale; + level = block[0] * s->y_dc_scale; else - block[0] = block[0] * s->c_dc_scale; + level = block[0] * s->c_dc_scale; + }else{ + qadd = 0; + level= block[0]; } - for(i=1; i<8; i++) { - level = block[i]; - if (level) { - if (level < 0) { - level = level * qmul - qadd; - } else { - level = level * qmul + qadd; - } - block[i] = level; - } - } - nCoeffs=64; + nCoeffs=63; } else { - i = 0; - nCoeffs= zigzag_end[ s->block_last_index[n] ]; + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; } //printf("%d %d ", qmul, qadd); asm volatile( @@ -152,10 +113,12 @@ "movq %%mm1, 8(%0, %3) \n\t" "addl $16, %3 \n\t" - "js 1b \n\t" - ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(i-nCoeffs)) + "jng 1b \n\t" + ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) : "memory" ); + if(s->mb_intra) + block[0]= level; } @@ -193,9 +156,10 @@ { int nCoeffs; const UINT16 *quant_matrix; - - if(s->alternate_scan) nCoeffs= 64; - else nCoeffs= zigzag_end[ s->block_last_index[n] ]; + + assert(s->block_last_index[n]>=0); + + nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; if (s->mb_intra) { int block0; @@ -312,6 +276,7 @@ : "%eax", "memory" ); } + } static void dct_unquantize_mpeg2_mmx(MpegEncContext *s, @@ -320,8 +285,10 @@ int nCoeffs; const UINT16 *quant_matrix; - if(s->alternate_scan) nCoeffs= 64; - else nCoeffs= zigzag_end[ s->block_last_index[n] ]; + assert(s->block_last_index[n]>=0); + + if(s->alternate_scan) nCoeffs= 63; //FIXME + else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; if (s->mb_intra) { int block0; @@ -371,7 +338,7 @@ "movq %%mm5, 8(%0, %%eax) \n\t" "addl $16, %%eax \n\t" - "js 1b \n\t" + "jng 1b \n\t" ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) : "%eax", "memory" ); @@ -427,7 +394,7 @@ "movq %%mm5, 8(%0, %%eax) \n\t" "addl $16, %%eax \n\t" - "js 1b \n\t" + "jng 1b \n\t" "movd 124(%0, %3), %%mm0 \n\t" "movq %%mm7, %%mm6 \n\t" "psrlq $32, %%mm7 \n\t" @@ -534,12 +501,6 @@ } } -static volatile int esp_temp; - -void unused_var_warning_killer(){ - esp_temp++; -} - #undef HAVE_MMX2 #define RENAME(a) a ## _MMX #include "mpegvideo_mmx_template.c" @@ -549,10 +510,40 @@ #define RENAME(a) a ## _MMX2 #include "mpegvideo_mmx_template.c" +/* external functions, from idct_mmx.c */ +void ff_mmx_idct(DCTELEM *block); +void ff_mmxext_idct(DCTELEM *block); + +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +static void ff_libmpeg2mmx_idct_put(UINT8 *dest, int line_size, DCTELEM *block) +{ + ff_mmx_idct (block); + put_pixels_clamped(block, dest, line_size); +} +static void ff_libmpeg2mmx_idct_add(UINT8 *dest, int line_size, DCTELEM *block) +{ + ff_mmx_idct (block); + add_pixels_clamped(block, dest, line_size); +} +static void ff_libmpeg2mmx2_idct_put(UINT8 *dest, int line_size, DCTELEM *block) +{ + ff_mmxext_idct (block); + put_pixels_clamped(block, dest, line_size); +} +static void ff_libmpeg2mmx2_idct_add(UINT8 *dest, int line_size, DCTELEM *block) +{ + ff_mmxext_idct (block); + add_pixels_clamped(block, dest, line_size); +} + void MPV_common_init_mmx(MpegEncContext *s) { if (mm_flags & MM_MMX) { - const int dct_algo= s->avctx->dct_algo; + int i; + const int dct_algo = s->avctx->dct_algo; + const int idct_algo= s->avctx->idct_algo; + s->dct_unquantize_h263 = dct_unquantize_h263_mmx; s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx; s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_mmx; @@ -568,5 +559,22 @@ s->dct_quantize= dct_quantize_MMX; } } + + if(idct_algo==FF_IDCT_AUTO || idct_algo==FF_IDCT_SIMPLEMMX){ + s->idct_put= ff_simple_idct_put_mmx; + s->idct_add= ff_simple_idct_add_mmx; + for(i=0; i<64; i++) + s->idct_permutation[i]= simple_mmx_permutation[i]; + }else if(idct_algo==FF_IDCT_LIBMPEG2MMX){ + if(mm_flags & MM_MMXEXT){ + s->idct_put= ff_libmpeg2mmx2_idct_put; + s->idct_add= ff_libmpeg2mmx2_idct_add; + }else{ + s->idct_put= ff_libmpeg2mmx_idct_put; + s->idct_add= ff_libmpeg2mmx_idct_add; + } + for(i=0; i<64; i++) + s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); + } } } diff -r 107a56aa74f5 -r e65798d228ea i386/mpegvideo_mmx_template.c --- a/i386/mpegvideo_mmx_template.c Sun Sep 29 15:14:28 2002 +0000 +++ b/i386/mpegvideo_mmx_template.c Sun Sep 29 22:44:22 2002 +0000 @@ -189,31 +189,143 @@ ); } - if(s->mb_intra) temp_block[0]= level; //FIXME move afer permute - -// last_non_zero_p1=64; - /* permute for IDCT */ - asm volatile( - "movl %0, %%eax \n\t" - "pushl %%ebp \n\t" - "movl %%esp, " MANGLE(esp_temp) "\n\t" - "1: \n\t" - "movzbl (%1, %%eax), %%ebx \n\t" - "movzbl 1(%1, %%eax), %%ebp \n\t" - "movw (%2, %%ebx, 2), %%cx \n\t" - "movw (%2, %%ebp, 2), %%sp \n\t" - "movzbl " MANGLE(permutation) "(%%ebx), %%ebx\n\t" - "movzbl " MANGLE(permutation) "(%%ebp), %%ebp\n\t" - "movw %%cx, (%3, %%ebx, 2) \n\t" - "movw %%sp, (%3, %%ebp, 2) \n\t" - "addl $2, %%eax \n\t" - " js 1b \n\t" - "movl " MANGLE(esp_temp) ", %%esp\n\t" - "popl %%ebp \n\t" - : - : "g" (-last_non_zero_p1), "d" (zigzag_direct_noperm+last_non_zero_p1), "S" (temp_block), "D" (block) - : "%eax", "%ebx", "%ecx" - ); + if(s->mb_intra) block[0]= level; + else block[0]= temp_block[0]; + + if(s->idct_permutation[1]==8){ + if(last_non_zero_p1 <= 1) goto end; + block[0x08] = temp_block[0x01]; block[0x10] = temp_block[0x08]; + block[0x20] = temp_block[0x10]; + if(last_non_zero_p1 <= 4) goto end; + block[0x18] = temp_block[0x09]; block[0x04] = temp_block[0x02]; + block[0x09] = temp_block[0x03]; + if(last_non_zero_p1 <= 7) goto end; + block[0x14] = temp_block[0x0A]; block[0x28] = temp_block[0x11]; + block[0x12] = temp_block[0x18]; block[0x02] = temp_block[0x20]; + if(last_non_zero_p1 <= 11) goto end; + block[0x1A] = temp_block[0x19]; block[0x24] = temp_block[0x12]; + block[0x19] = temp_block[0x0B]; block[0x01] = temp_block[0x04]; + block[0x0C] = temp_block[0x05]; + if(last_non_zero_p1 <= 16) goto end; + block[0x11] = temp_block[0x0C]; block[0x29] = temp_block[0x13]; + block[0x16] = temp_block[0x1A]; block[0x0A] = temp_block[0x21]; + block[0x30] = temp_block[0x28]; block[0x22] = temp_block[0x30]; + block[0x38] = temp_block[0x29]; block[0x06] = temp_block[0x22]; + if(last_non_zero_p1 <= 24) goto end; + block[0x1B] = temp_block[0x1B]; block[0x21] = temp_block[0x14]; + block[0x1C] = temp_block[0x0D]; block[0x05] = temp_block[0x06]; + block[0x0D] = temp_block[0x07]; block[0x15] = temp_block[0x0E]; + block[0x2C] = temp_block[0x15]; block[0x13] = temp_block[0x1C]; + if(last_non_zero_p1 <= 32) goto end; + block[0x0B] = temp_block[0x23]; block[0x34] = temp_block[0x2A]; + block[0x2A] = temp_block[0x31]; block[0x32] = temp_block[0x38]; + block[0x3A] = temp_block[0x39]; block[0x26] = temp_block[0x32]; + block[0x39] = temp_block[0x2B]; block[0x03] = temp_block[0x24]; + if(last_non_zero_p1 <= 40) goto end; + block[0x1E] = temp_block[0x1D]; block[0x25] = temp_block[0x16]; + block[0x1D] = temp_block[0x0F]; block[0x2D] = temp_block[0x17]; + block[0x17] = temp_block[0x1E]; block[0x0E] = temp_block[0x25]; + block[0x31] = temp_block[0x2C]; block[0x2B] = temp_block[0x33]; + if(last_non_zero_p1 <= 48) goto end; + block[0x36] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; + block[0x23] = temp_block[0x34]; block[0x3C] = temp_block[0x2D]; + block[0x07] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; + block[0x0F] = temp_block[0x27]; block[0x35] = temp_block[0x2E]; + if(last_non_zero_p1 <= 56) goto end; + block[0x2E] = temp_block[0x35]; block[0x33] = temp_block[0x3C]; + block[0x3E] = temp_block[0x3D]; block[0x27] = temp_block[0x36]; + block[0x3D] = temp_block[0x2F]; block[0x2F] = temp_block[0x37]; + block[0x37] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; + }else if(s->idct_permutation[1]==4){ + if(last_non_zero_p1 <= 1) goto end; + block[0x04] = temp_block[0x01]; + block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; + if(last_non_zero_p1 <= 4) goto end; + block[0x0C] = temp_block[0x09]; block[0x01] = temp_block[0x02]; + block[0x05] = temp_block[0x03]; + if(last_non_zero_p1 <= 7) goto end; + block[0x09] = temp_block[0x0A]; block[0x14] = temp_block[0x11]; + block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; + if(last_non_zero_p1 <= 11) goto end; + block[0x1C] = temp_block[0x19]; + block[0x11] = temp_block[0x12]; block[0x0D] = temp_block[0x0B]; + block[0x02] = temp_block[0x04]; block[0x06] = temp_block[0x05]; + if(last_non_zero_p1 <= 16) goto end; + block[0x0A] = temp_block[0x0C]; block[0x15] = temp_block[0x13]; + block[0x19] = temp_block[0x1A]; block[0x24] = temp_block[0x21]; + block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; + block[0x2C] = temp_block[0x29]; block[0x21] = temp_block[0x22]; + if(last_non_zero_p1 <= 24) goto end; + block[0x1D] = temp_block[0x1B]; block[0x12] = temp_block[0x14]; + block[0x0E] = temp_block[0x0D]; block[0x03] = temp_block[0x06]; + block[0x07] = temp_block[0x07]; block[0x0B] = temp_block[0x0E]; + block[0x16] = temp_block[0x15]; block[0x1A] = temp_block[0x1C]; + if(last_non_zero_p1 <= 32) goto end; + block[0x25] = temp_block[0x23]; block[0x29] = temp_block[0x2A]; + block[0x34] = temp_block[0x31]; block[0x38] = temp_block[0x38]; + block[0x3C] = temp_block[0x39]; block[0x31] = temp_block[0x32]; + block[0x2D] = temp_block[0x2B]; block[0x22] = temp_block[0x24]; + if(last_non_zero_p1 <= 40) goto end; + block[0x1E] = temp_block[0x1D]; block[0x13] = temp_block[0x16]; + block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; + block[0x1B] = temp_block[0x1E]; block[0x26] = temp_block[0x25]; + block[0x2A] = temp_block[0x2C]; block[0x35] = temp_block[0x33]; + if(last_non_zero_p1 <= 48) goto end; + block[0x39] = temp_block[0x3A]; block[0x3D] = temp_block[0x3B]; + block[0x32] = temp_block[0x34]; block[0x2E] = temp_block[0x2D]; + block[0x23] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; + block[0x27] = temp_block[0x27]; block[0x2B] = temp_block[0x2E]; + if(last_non_zero_p1 <= 56) goto end; + block[0x36] = temp_block[0x35]; block[0x3A] = temp_block[0x3C]; + block[0x3E] = temp_block[0x3D]; block[0x33] = temp_block[0x36]; + block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; + block[0x3B] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; + }else{ + if(last_non_zero_p1 <= 1) goto end; + block[0x01] = temp_block[0x01]; + block[0x08] = temp_block[0x08]; block[0x10] = temp_block[0x10]; + if(last_non_zero_p1 <= 4) goto end; + block[0x09] = temp_block[0x09]; block[0x02] = temp_block[0x02]; + block[0x03] = temp_block[0x03]; + if(last_non_zero_p1 <= 7) goto end; + block[0x0A] = temp_block[0x0A]; block[0x11] = temp_block[0x11]; + block[0x18] = temp_block[0x18]; block[0x20] = temp_block[0x20]; + if(last_non_zero_p1 <= 11) goto end; + block[0x19] = temp_block[0x19]; + block[0x12] = temp_block[0x12]; block[0x0B] = temp_block[0x0B]; + block[0x04] = temp_block[0x04]; block[0x05] = temp_block[0x05]; + if(last_non_zero_p1 <= 16) goto end; + block[0x0C] = temp_block[0x0C]; block[0x13] = temp_block[0x13]; + block[0x1A] = temp_block[0x1A]; block[0x21] = temp_block[0x21]; + block[0x28] = temp_block[0x28]; block[0x30] = temp_block[0x30]; + block[0x29] = temp_block[0x29]; block[0x22] = temp_block[0x22]; + if(last_non_zero_p1 <= 24) goto end; + block[0x1B] = temp_block[0x1B]; block[0x14] = temp_block[0x14]; + block[0x0D] = temp_block[0x0D]; block[0x06] = temp_block[0x06]; + block[0x07] = temp_block[0x07]; block[0x0E] = temp_block[0x0E]; + block[0x15] = temp_block[0x15]; block[0x1C] = temp_block[0x1C]; + if(last_non_zero_p1 <= 32) goto end; + block[0x23] = temp_block[0x23]; block[0x2A] = temp_block[0x2A]; + block[0x31] = temp_block[0x31]; block[0x38] = temp_block[0x38]; + block[0x39] = temp_block[0x39]; block[0x32] = temp_block[0x32]; + block[0x2B] = temp_block[0x2B]; block[0x24] = temp_block[0x24]; + if(last_non_zero_p1 <= 40) goto end; + block[0x1D] = temp_block[0x1D]; block[0x16] = temp_block[0x16]; + block[0x0F] = temp_block[0x0F]; block[0x17] = temp_block[0x17]; + block[0x1E] = temp_block[0x1E]; block[0x25] = temp_block[0x25]; + block[0x2C] = temp_block[0x2C]; block[0x33] = temp_block[0x33]; + if(last_non_zero_p1 <= 48) goto end; + block[0x3A] = temp_block[0x3A]; block[0x3B] = temp_block[0x3B]; + block[0x34] = temp_block[0x34]; block[0x2D] = temp_block[0x2D]; + block[0x26] = temp_block[0x26]; block[0x1F] = temp_block[0x1F]; + block[0x27] = temp_block[0x27]; block[0x2E] = temp_block[0x2E]; + if(last_non_zero_p1 <= 56) goto end; + block[0x35] = temp_block[0x35]; block[0x3C] = temp_block[0x3C]; + block[0x3D] = temp_block[0x3D]; block[0x36] = temp_block[0x36]; + block[0x2F] = temp_block[0x2F]; block[0x37] = temp_block[0x37]; + block[0x3E] = temp_block[0x3E]; block[0x3F] = temp_block[0x3F]; + } + end: /* for(i=0; iintra_scantable.permutated[i]; put_bits(p, 8, s->intra_matrix[j]); } #ifdef TWOMATRIXES put_bits(p, 4, 0); /* 8 bit precision */ put_bits(p, 4, 1); /* table 1 */ for(i=0;i<64;i++) { - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; put_bits(p, 8, s->chroma_intra_matrix[j]); } #endif @@ -535,7 +535,7 @@ run = 0; last_index = s->block_last_index[n]; for(i=1;i<=last_index;i++) { - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; val = block[j]; if (val == 0) { run++; @@ -620,6 +620,8 @@ int restart_interval; int restart_count; int interleaved_rows; + ScanTable scantable; + void (*idct_put)(UINT8 *dest/*align 8*/, int line_size, DCTELEM *block/*align 16*/); } MJpegDecodeContext; #define SKIP_REMAINING(gb, len) { \ @@ -645,9 +647,23 @@ static int mjpeg_decode_init(AVCodecContext *avctx) { MJpegDecodeContext *s = avctx->priv_data; + MpegEncContext s2; s->avctx = avctx; + /* ugly way to get the idct & scantable */ + memset(&s2, 0, sizeof(MpegEncContext)); + s2.flags= avctx->flags; + s2.avctx= avctx; +// s2->out_format = FMT_MJPEG; + s2.width = 8; + s2.height = 8; + if (MPV_common_init(&s2) < 0) + return -1; + s->scantable= s2.intra_scantable; + s->idct_put= s2.idct_put; + MPV_common_end(&s2); + s->header_state = 0; s->mpeg_enc_ctx_allocated = 0; s->buffer_size = PICTURE_BUFFER_SIZE - 1; /* minus 1 to take into @@ -657,7 +673,7 @@ s->first_picture = 1; s->org_width = avctx->width; s->org_height = avctx->height; - + build_vlc(&s->vlcs[0][0], bits_dc_luminance, val_dc_luminance, 12); build_vlc(&s->vlcs[0][1], bits_dc_chrominance, val_dc_chrominance, 12); build_vlc(&s->vlcs[1][0], bits_ac_luminance, val_ac_luminance, 251); @@ -694,7 +710,7 @@ dprintf("index=%d\n", index); /* read quant table */ for(i=0;i<64;i++) { - j = zigzag_direct[i]; + j = s->scantable.permutated[i]; s->quant_matrixes[index][j] = get_bits(&s->gb, 8); } len -= 65; @@ -897,7 +913,7 @@ dprintf("error count: %d\n", i); return -1; } - j = zigzag_direct[i]; + j = s->scantable.permutated[i]; block[j] = level * quant_matrix[j]; i++; if (i >= 64) @@ -1021,7 +1037,7 @@ (h * mb_x + x) * 8; if (s->interlaced && s->bottom_field) ptr += s->linesize[c] >> 1; - ff_idct_put(ptr, s->linesize[c], s->block); + s->idct_put(ptr, s->linesize[c], s->block); if (++x == h) { x = 0; y++; diff -r 107a56aa74f5 -r e65798d228ea mpeg12.c --- a/mpeg12.c Sun Sep 29 15:14:28 2002 +0000 +++ b/mpeg12.c Sun Sep 29 22:44:22 2002 +0000 @@ -542,7 +542,7 @@ last_non_zero = i - 1; for(;i<=last_index;i++) { - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; level = block[j]; next_coef: #if 0 @@ -552,26 +552,11 @@ /* encode using VLC */ if (level != 0) { run = i - last_non_zero - 1; -#ifdef ARCH_X86 - asm volatile( - "movl %2, %1 \n\t" - "movl %1, %0 \n\t" - "addl %1, %1 \n\t" - "sbbl %1, %1 \n\t" - "xorl %1, %0 \n\t" - "subl %1, %0 \n\t" - "andl $1, %1 \n\t" - : "=&r" (alevel), "=&r" (sign) - : "g" (level) - ); -#else - sign = 0; - alevel = level; - if (alevel < 0) { - sign = 1; - alevel = -alevel; - } -#endif + + alevel= level; + MASK_ABS(sign, alevel) + sign&=1; + // code = get_rl_index(rl, 0, run, alevel); if (alevel > mpeg1_max_level[0][run]) code= 111; /*rl->n*/ @@ -1040,6 +1025,7 @@ int level, dc, diff, i, j, run; int code, component; RLTable *rl = &rl_mpeg1; + UINT8 * const scantable= s->intra_scantable.permutated; if (s->mb_intra) { /* DC coef */ @@ -1099,7 +1085,7 @@ return -1; add_coef: dprintf("%d: run=%d level=%d\n", n, run, level); - j = zigzag_direct[i]; + j = scantable[i]; block[j] = level; i++; } @@ -1121,9 +1107,9 @@ int mismatch; if (s->alternate_scan) - scan_table = ff_alternate_vertical_scan; + scan_table = s->intra_v_scantable.permutated; else - scan_table = zigzag_direct; + scan_table = s->intra_scantable.permutated; mismatch = 1; { @@ -1140,7 +1126,7 @@ v= SHOW_UBITS(re, &s->gb, 2); if (v & 2) { run = 0; - level = 1 - ((v & 1) << 1); + level = 5 - (v << 1); SKIP_BITS(re, &s->gb, 2); CLOSE_READER(re, &s->gb); goto add_coef; @@ -1191,6 +1177,7 @@ } block[63] ^= (mismatch & 1); s->block_last_index[n] = i; + return 0; } @@ -1206,9 +1193,9 @@ int mismatch; if (s->alternate_scan) - scan_table = ff_alternate_vertical_scan; + scan_table = s->intra_v_scantable.permutated; else - scan_table = zigzag_direct; + scan_table = s->intra_scantable.permutated; /* DC coef */ component = (n <= 3 ? 0 : n - 4 + 1); @@ -1402,7 +1389,7 @@ if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; s->intra_matrix[j] = v; s->chroma_intra_matrix[j] = v; } @@ -1410,7 +1397,7 @@ if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; s->inter_matrix[j] = v; s->chroma_inter_matrix[j] = v; } @@ -1418,14 +1405,14 @@ if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; s->chroma_intra_matrix[j] = v; } } if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; s->chroma_inter_matrix[j] = v; } } @@ -1636,7 +1623,7 @@ if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; s->intra_matrix[j] = v; s->chroma_intra_matrix[j] = v; } @@ -1648,15 +1635,16 @@ #endif } else { for(i=0;i<64;i++) { + int j= s->idct_permutation[i]; v = ff_mpeg1_default_intra_matrix[i]; - s->intra_matrix[i] = v; - s->chroma_intra_matrix[i] = v; + s->intra_matrix[j] = v; + s->chroma_intra_matrix[j] = v; } } if (get_bits1(&s->gb)) { for(i=0;i<64;i++) { v = get_bits(&s->gb, 8); - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; s->inter_matrix[j] = v; s->chroma_inter_matrix[j] = v; } @@ -1668,9 +1656,10 @@ #endif } else { for(i=0;i<64;i++) { + int j= s->idct_permutation[i]; v = ff_mpeg1_default_non_intra_matrix[i]; - s->inter_matrix[i] = v; - s->chroma_inter_matrix[i] = v; + s->inter_matrix[j] = v; + s->chroma_inter_matrix[j] = v; } } diff -r 107a56aa74f5 -r e65798d228ea mpeg12data.h --- a/mpeg12data.h Sun Sep 29 15:14:28 2002 +0000 +++ b/mpeg12data.h Sun Sep 29 22:44:22 2002 +0000 @@ -2,7 +2,7 @@ * MPEG1/2 tables */ -INT16 ff_mpeg1_default_intra_matrix[64] = { +const INT16 ff_mpeg1_default_intra_matrix[64] = { 8, 16, 19, 22, 26, 27, 29, 34, 16, 16, 22, 24, 27, 29, 34, 37, 19, 22, 26, 27, 29, 34, 34, 38, @@ -13,7 +13,7 @@ 27, 29, 35, 38, 46, 56, 69, 83 }; -INT16 ff_mpeg1_default_non_intra_matrix[64] = { +const INT16 ff_mpeg1_default_non_intra_matrix[64] = { 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, diff -r 107a56aa74f5 -r e65798d228ea mpeg4data.h --- a/mpeg4data.h Sun Sep 29 15:14:28 2002 +0000 +++ b/mpeg4data.h Sun Sep 29 22:44:22 2002 +0000 @@ -135,7 +135,7 @@ }; /* these matrixes will be permuted for the idct */ -INT16 ff_mpeg4_default_intra_matrix[64] = { +const INT16 ff_mpeg4_default_intra_matrix[64] = { 8, 17, 18, 19, 21, 23, 25, 27, 17, 18, 19, 21, 23, 25, 27, 28, 20, 21, 22, 23, 24, 26, 28, 30, @@ -146,7 +146,7 @@ 27, 28, 30, 32, 35, 38, 41, 45, }; -INT16 ff_mpeg4_default_non_intra_matrix[64] = { +const INT16 ff_mpeg4_default_non_intra_matrix[64] = { 16, 17, 18, 19, 20, 21, 22, 23, 17, 18, 19, 20, 21, 22, 23, 24, 18, 19, 20, 21, 22, 23, 24, 25, diff -r 107a56aa74f5 -r e65798d228ea mpegvideo.c --- a/mpegvideo.c Sun Sep 29 15:14:28 2002 +0000 +++ b/mpegvideo.c Sun Sep 29 22:44:22 2002 +0000 @@ -23,11 +23,15 @@ #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" +#include "simple_idct.h" #ifdef USE_FASTMEMCPY #include "fastmemcpy.h" #endif +//#undef NDEBUG +//#include + static void encode_picture(MpegEncContext *s, int picture_number); static void dct_unquantize_mpeg1_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); @@ -72,8 +76,6 @@ static UINT16 default_mv_penalty[MAX_FCODE+1][MAX_MV*2+1]; static UINT8 default_fcode_tab[MAX_MV*2+1]; -extern UINT8 zigzag_end[64]; - /* default motion estimation */ int motion_estimation_method = ME_EPZS; @@ -86,7 +88,7 @@ int i; if (s->fdct == ff_jpeg_fdct_islow) { for(i=0;i<64;i++) { - const int j= block_permute_op(i); + const int j= s->idct_permutation[i]; /* 16 <= qscale * quant_matrix[i] <= 7905 */ /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ @@ -97,7 +99,7 @@ } } else if (s->fdct == fdct_ifast) { for(i=0;i<64;i++) { - const int j= block_permute_op(i); + const int j= s->idct_permutation[i]; /* 16 <= qscale * quant_matrix[i] <= 7905 */ /* 19952 <= aanscales[i] * qscale * quant_matrix[i] <= 249205026 */ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ @@ -108,13 +110,14 @@ } } else { for(i=0;i<64;i++) { + const int j= s->idct_permutation[i]; /* We can safely suppose that 16 <= quant_matrix[i] <= 255 So 16 <= qscale * quant_matrix[i] <= 7905 so (1<<19) / 16 >= (1<<19) / (qscale * quant_matrix[i]) >= (1<<19) / 7905 so 32768 >= (1<<19) / (qscale * quant_matrix[i]) >= 67 */ qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); - qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]); + qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]); if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1; qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]); @@ -131,6 +134,50 @@ goto fail;\ }\ } +/* +static void build_end(void) +{ + int lastIndex; + int lastIndexAfterPerm=0; + for(lastIndex=0; lastIndex<64; lastIndex++) + { + if(ff_zigzag_direct[lastIndex] > lastIndexAfterPerm) + lastIndexAfterPerm= ff_zigzag_direct[lastIndex]; + zigzag_end[lastIndex]= lastIndexAfterPerm + 1; + } +} +*/ +void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){ + int i; + int end; + + for(i=0; i<64; i++){ + int j; + j = src_scantable[i]; + st->permutated[i] = s->idct_permutation[j]; + } + + end=-1; + for(i=0; i<64; i++){ + int j; + j = st->permutated[i]; + if(j>end) end=j; + st->raster_end[i]= end; + } +} + +/* XXX: those functions should be suppressed ASAP when all IDCTs are + converted */ +static void ff_jref_idct_put(UINT8 *dest, int line_size, DCTELEM *block) +{ + j_rev_dct (block); + put_pixels_clamped(block, dest, line_size); +} +static void ff_jref_idct_add(UINT8 *dest, int line_size, DCTELEM *block) +{ + j_rev_dct (block); + add_pixels_clamped(block, dest, line_size); +} /* init common structure for both encoder and decoder */ int MPV_common_init(MpegEncContext *s) @@ -146,7 +193,19 @@ if(s->avctx->dct_algo==FF_DCT_FASTINT) s->fdct = fdct_ifast; else - s->fdct = ff_jpeg_fdct_islow; + s->fdct = ff_jpeg_fdct_islow; //slow/accurate/default + + if(s->avctx->idct_algo==FF_IDCT_INT){ + s->idct_put= ff_jref_idct_put; + s->idct_add= ff_jref_idct_add; + for(i=0; i<64; i++) + s->idct_permutation[i]= (i & 0x38) | ((i & 6) >> 1) | ((i & 1) << 2); + }else{ //accurate/default + s->idct_put= simple_idct_put; + s->idct_add= simple_idct_add; + for(i=0; i<64; i++) + s->idct_permutation[i]= i; + } #ifdef HAVE_MMX MPV_common_init_mmx(s); @@ -157,6 +216,15 @@ #ifdef HAVE_MLIB MPV_common_init_mlib(s); #endif + + + /* load & permutate scantables + note: only wmv uses differnt ones + */ + ff_init_scantable(s, &s->inter_scantable , ff_zigzag_direct); + ff_init_scantable(s, &s->intra_scantable , ff_zigzag_direct); + ff_init_scantable(s, &s->intra_h_scantable, ff_alternate_horizontal_scan); + ff_init_scantable(s, &s->intra_v_scantable, ff_alternate_vertical_scan); s->mb_width = (s->width + 15) / 16; s->mb_height = (s->height + 15) / 16; @@ -577,13 +645,6 @@ s->y_dc_scale_table= s->c_dc_scale_table= ff_mpeg1_dc_scale_table; - if (s->out_format == FMT_H263) - h263_encode_init(s); - else if (s->out_format == FMT_MPEG1) - ff_mpeg1_encode_init(s); - if(s->msmpeg4_version) - ff_msmpeg4_encode_init(s); - /* dont use mv_penalty table for crap MV as it would be confused */ if (s->me_method < ME_EPZS) s->mv_penalty = default_mv_penalty; @@ -593,17 +654,25 @@ if (MPV_common_init(s) < 0) return -1; + if (s->out_format == FMT_H263) + h263_encode_init(s); + else if (s->out_format == FMT_MPEG1) + ff_mpeg1_encode_init(s); + if(s->msmpeg4_version) + ff_msmpeg4_encode_init(s); + /* init default q matrix */ for(i=0;i<64;i++) { + int j= s->idct_permutation[i]; if(s->codec_id==CODEC_ID_MPEG4 && s->mpeg_quant){ - s->intra_matrix[i] = ff_mpeg4_default_intra_matrix[i]; - s->inter_matrix[i] = ff_mpeg4_default_non_intra_matrix[i]; + s->intra_matrix[j] = ff_mpeg4_default_intra_matrix[i]; + s->inter_matrix[j] = ff_mpeg4_default_non_intra_matrix[i]; }else if(s->out_format == FMT_H263){ - s->intra_matrix[i] = - s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i]; + s->intra_matrix[j] = + s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i]; }else{ /* mpeg1 */ - s->intra_matrix[i] = ff_mpeg1_default_intra_matrix[i]; - s->inter_matrix[i] = ff_mpeg1_default_non_intra_matrix[i]; + s->intra_matrix[j] = ff_mpeg1_default_intra_matrix[i]; + s->inter_matrix[j] = ff_mpeg1_default_non_intra_matrix[i]; } } @@ -1450,7 +1519,7 @@ { if (!s->mpeg2) s->dct_unquantize(s, block, i, s->qscale); - ff_idct_put (dest, line_size, block); + s->idct_put (dest, line_size, block); } /* add block[] to dest[] */ @@ -1458,7 +1527,7 @@ DCTELEM *block, int i, UINT8 *dest, int line_size) { if (s->block_last_index[i] >= 0) { - ff_idct_add (dest, line_size, block); + s->idct_add (dest, line_size, block); } } @@ -1468,7 +1537,7 @@ if (s->block_last_index[i] >= 0) { s->dct_unquantize(s, block, i, s->qscale); - ff_idct_add (dest, line_size, block); + s->idct_add (dest, line_size, block); } } @@ -1720,7 +1789,7 @@ if(last_index<=skip_dc - 1) return; for(i=0; i<=last_index; i++){ - const int j = zigzag_direct[i]; + const int j = s->intra_scantable.permutated[i]; const int level = ABS(block[j]); if(level==1){ if(skip_dc && i==0) continue; @@ -1734,7 +1803,7 @@ } if(score >= threshold) return; for(i=skip_dc; i<=last_index; i++){ - const int j = zigzag_direct[i]; + const int j = s->intra_scantable.permutated[i]; block[j]=0; } if(block[0]) s->block_last_index[n]= 0; @@ -1746,9 +1815,14 @@ int i; const int maxlevel= s->max_qcoeff; const int minlevel= s->min_qcoeff; - - for(i=0;i<=last_index; i++){ - const int j = zigzag_direct[i]; + + if(s->mb_intra){ + i=1; //skip clipping of intra dc + }else + i=0; + + for(;i<=last_index; i++){ + const int j= s->intra_scantable.permutated[i]; int level = block[j]; if (level>maxlevel) level=maxlevel; @@ -1760,22 +1834,22 @@ static inline void requantize_coeffs(MpegEncContext *s, DCTELEM block[64], int oldq, int newq, int n) { int i; - - if(s->mb_intra){ - //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...) - i=1; + + if(s->mb_intra){ + i=1; //skip clipping of intra dc + //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...) }else i=0; for(;i<=s->block_last_index[n]; i++){ - const int j = zigzag_direct[i]; + const int j = s->intra_scantable.permutated[i]; int level = block[j]; block[j]= ROUNDED_DIV(level*oldq, newq); } for(i=s->block_last_index[n]; i>=0; i--){ - const int j = zigzag_direct[i]; //FIXME other scantabs + const int j = s->intra_scantable.permutated[i]; if(block[j]) break; } s->block_last_index[n]= i; @@ -1791,11 +1865,14 @@ assert(s->adaptive_quant); for(n=0; n<6; n++){ - if(s->mb_intra) i=1; - else i=0; + if(s->mb_intra){ + i=1; //skip clipping of intra dc + //FIXME requantize, note (mpeg1/h263/h263p-aic dont need it,...) + }else + i=0; for(;i<=s->block_last_index[n]; i++){ - const int j = zigzag_direct[i]; //FIXME other scantabs + const int j = s->intra_scantable.permutated[i]; int level = block[n][j]; if(largest < level) largest = level; if(smallest > level) smallest= level; @@ -2379,8 +2456,11 @@ if (s->out_format == FMT_MJPEG) { /* for mjpeg, we do include qscale in the matrix */ s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0]; - for(i=1;i<64;i++) - s->intra_matrix[i] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); + for(i=1;i<64;i++){ + int j= s->idct_permutation[i]; + + s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); + } convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias); } @@ -2752,7 +2832,7 @@ #ifndef ARCH_ALPHA /* Alpha uses unpermuted matrix */ /* we need this permutation so that we correct the IDCT permutation. will be moved into DCT code */ - block_permute(block); + block_permute(block, s->idct_permutation); //FIXME remove #endif if (s->mb_intra) { @@ -2782,7 +2862,7 @@ threshold2= (threshold1<<1); for(;i<64;i++) { - j = zigzag_direct[i]; + j = s->intra_scantable.permutated[i]; level = block[j]; level = level * qmat[j]; @@ -2813,8 +2893,7 @@ int i, level, nCoeffs; const UINT16 *quant_matrix; - if(s->alternate_scan) nCoeffs= 64; - else nCoeffs= s->block_last_index[n]+1; + nCoeffs= s->block_last_index[n]; if (s->mb_intra) { if (n < 4) @@ -2823,8 +2902,8 @@ block[0] = block[0] * s->c_dc_scale; /* XXX: only mpeg1 */ quant_matrix = s->intra_matrix; - for(i=1;iintra_scantable.permutated[i]; level = block[j]; if (level) { if (level < 0) { @@ -2846,8 +2925,8 @@ } else { i = 0; quant_matrix = s->inter_matrix; - for(;iintra_scantable.permutated[i]; level = block[j]; if (level) { if (level < 0) { @@ -2877,8 +2956,8 @@ int i, level, nCoeffs; const UINT16 *quant_matrix; - if(s->alternate_scan) nCoeffs= 64; - else nCoeffs= s->block_last_index[n]+1; + if(s->alternate_scan) nCoeffs= 63; + else nCoeffs= s->block_last_index[n]; if (s->mb_intra) { if (n < 4) @@ -2886,8 +2965,8 @@ else block[0] = block[0] * s->c_dc_scale; quant_matrix = s->intra_matrix; - for(i=1;iintra_scantable.permutated[i]; level = block[j]; if (level) { if (level < 0) { @@ -2908,8 +2987,8 @@ int sum=-1; i = 0; quant_matrix = s->inter_matrix; - for(;iintra_scantable.permutated[i]; level = block[j]; if (level) { if (level < 0) { @@ -2940,27 +3019,27 @@ int i, level, qmul, qadd; int nCoeffs; + assert(s->block_last_index[n]>=0); + + qadd = (qscale - 1) | 1; + qmul = qscale << 1; + if (s->mb_intra) { if (!s->h263_aic) { if (n < 4) block[0] = block[0] * s->y_dc_scale; else block[0] = block[0] * s->c_dc_scale; - } + }else + qadd = 0; i = 1; - nCoeffs= 64; //does not allways use zigzag table + nCoeffs= 63; //does not allways use zigzag table } else { i = 0; - nCoeffs= zigzag_end[ s->block_last_index[n] ]; + nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; } - qmul = s->qscale << 1; - if (s->h263_aic && s->mb_intra) - qadd = 0; - else - qadd = (s->qscale - 1) | 1; - - for(;imsmpeg4_version==4){ - s->intra_scantable = wmv1_scantable[1]; - s->intra_h_scantable= wmv1_scantable[2]; - s->intra_v_scantable= wmv1_scantable[3]; - s->inter_scantable = wmv1_scantable[0]; - }else{ - s->intra_scantable = zigzag_direct; - s->intra_h_scantable= ff_alternate_horizontal_scan; - s->intra_v_scantable= ff_alternate_vertical_scan; - s->inter_scantable = zigzag_direct; + int i; + ff_init_scantable(s, &s->intra_scantable , wmv1_scantable[1]); + ff_init_scantable(s, &s->intra_h_scantable, wmv1_scantable[2]); + ff_init_scantable(s, &s->intra_v_scantable, wmv1_scantable[3]); + ff_init_scantable(s, &s->inter_scantable , wmv1_scantable[0]); } + //Note the default tables are set in common_init in mpegvideo.c if(!inited){ int i; inited=1; init_h263_dc_for_msmpeg4(); - - /* permute for IDCT */ - for(i=0; irl_chroma_table_index]; } run_diff = 0; - scantable= s->intra_scantable; + scantable= s->intra_scantable.permutated; set_stat(ST_INTRA_AC); } else { i = 0; @@ -945,12 +934,12 @@ run_diff = 0; else run_diff = 1; - scantable= s->inter_scantable; + scantable= s->inter_scantable.permutated; set_stat(ST_INTER_AC); } /* recalculate block_last_index for M$ wmv1 */ - if(scantable!=zigzag_direct && s->block_last_index[n]>0){ + if(s->msmpeg4_version==4 && s->block_last_index[n]>0){ for(last_index=63; last_index>=0; last_index--){ if(block[scantable[last_index]]) break; } @@ -1704,11 +1693,11 @@ } if (s->ac_pred) { if (dc_pred_dir == 0) - scan_table = s->intra_v_scantable; /* left */ + scan_table = s->intra_v_scantable.permutated; /* left */ else - scan_table = s->intra_h_scantable; /* top */ + scan_table = s->intra_h_scantable.permutated; /* top */ } else { - scan_table = s->intra_scantable; + scan_table = s->intra_scantable.permutated; } set_stat(ST_INTRA_AC); rl_vlc= rl->rl_vlc[0]; @@ -1727,7 +1716,7 @@ s->block_last_index[n] = i; return 0; } - scan_table = s->inter_scantable; + scan_table = s->inter_scantable.permutated; set_stat(ST_INTER_AC); rl_vlc= rl->rl_vlc[s->qscale]; } diff -r 107a56aa74f5 -r e65798d228ea msmpeg4data.h --- a/msmpeg4data.h Sun Sep 29 15:14:28 2002 +0000 +++ b/msmpeg4data.h Sun Sep 29 22:44:22 2002 +0000 @@ -1819,7 +1819,7 @@ #define WMV1_SCANTABLE_COUNT 4 -static UINT8 wmv1_scantable00[64]= { +static const UINT8 wmv1_scantable00[64]= { 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 0x30, 0x38, 0x29, 0x21, 0x1A, 0x13, 0x0C, 0x05, @@ -1829,7 +1829,7 @@ 0x2C, 0x25, 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x35, 0x3D, 0x3E, 0x36, 0x2E, 0x27, 0x2F, 0x37, 0x3F, }; -static UINT8 wmv1_scantable01[64]= { +static const UINT8 wmv1_scantable01[64]= { 0x00, 0x08, 0x01, 0x02, 0x09, 0x10, 0x18, 0x11, 0x0A, 0x03, 0x04, 0x0B, 0x12, 0x19, 0x20, 0x28, 0x21, 0x30, 0x1A, 0x13, 0x0C, 0x05, 0x06, 0x0D, @@ -1839,7 +1839,7 @@ 0x1E, 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3C, 0x35, 0x3D, 0x2E, 0x27, 0x2F, 0x36, 0x3E, 0x37, 0x3F, }; -static UINT8 wmv1_scantable02[64]= { +static const UINT8 wmv1_scantable02[64]= { 0x00, 0x01, 0x08, 0x02, 0x03, 0x09, 0x10, 0x18, 0x11, 0x0A, 0x04, 0x05, 0x0B, 0x12, 0x19, 0x20, 0x28, 0x30, 0x21, 0x1A, 0x13, 0x0C, 0x06, 0x07, @@ -1849,7 +1849,7 @@ 0x17, 0x1F, 0x26, 0x2D, 0x34, 0x3B, 0x3C, 0x35, 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, }; -static UINT8 wmv1_scantable03[64]= { +static const UINT8 wmv1_scantable03[64]= { 0x00, 0x08, 0x10, 0x01, 0x18, 0x20, 0x28, 0x09, 0x02, 0x03, 0x0A, 0x11, 0x19, 0x30, 0x38, 0x29, 0x21, 0x1A, 0x12, 0x0B, 0x04, 0x05, 0x0C, 0x13, @@ -1860,7 +1860,7 @@ 0x2E, 0x27, 0x2F, 0x36, 0x3D, 0x3E, 0x37, 0x3F, }; -static UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={ +static const UINT8 *wmv1_scantable[WMV1_SCANTABLE_COUNT+1]={ wmv1_scantable00, wmv1_scantable01, wmv1_scantable02, diff -r 107a56aa74f5 -r e65798d228ea simple_idct.h --- a/simple_idct.h Sun Sep 29 15:14:28 2002 +0000 +++ b/simple_idct.h Sun Sep 29 22:44:22 2002 +0000 @@ -20,5 +20,7 @@ void simple_idct_put(UINT8 *dest, int line_size, INT16 *block); void simple_idct_add(UINT8 *dest, int line_size, INT16 *block); -void simple_idct_mmx(short *block); +void ff_simple_idct_mmx(short *block); +void ff_simple_idct_add_mmx(UINT8 *dest, int line_size, INT16 *block); +void ff_simple_idct_put_mmx(UINT8 *dest, int line_size, INT16 *block); void simple_idct(short *block);