# HG changeset patch # User michaelni # Date 1035385867 0 # Node ID d4726182dfd2b71257597fda787a79c746c5c120 # Parent 339db642859d74d9fdbb95addefe64df4e9ff8d3 optimize block_permute() optimize dct_quantize_c() dont permute s->q_inter/intra_matrix diff -r 339db642859d -r d4726182dfd2 dsputil.c --- a/dsputil.c Wed Oct 23 08:14:12 2002 +0000 +++ b/dsputil.c Wed Oct 23 15:11:07 2002 +0000 @@ -1553,16 +1553,25 @@ return s; } -/* permute block according so that it corresponds to the MMX idct - order */ -void block_permute(INT16 *block, UINT8 *permutation) +void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last) { - int i; - INT16 temp[64]; + int i; + INT16 temp[64]; + + if(last<=0) return; + if(permutation[1]==1) return; //FIXME its ok but not clean and might fail for some perms - for(i=0; i<64; i++) temp[ permutation[i] ] = block[i]; - - for(i=0; i<64; i++) block[i] = temp[i]; + for(i=0; i<=last; i++){ + const int j= scantable[i]; + temp[j]= block[j]; + block[j]=0; + } + + for(i=0; i<=last; i++){ + const int j= scantable[i]; + const int perm_j= permutation[j]; + block[perm_j]= temp[j]; + } } void clear_blocks_c(DCTELEM *blocks) diff -r 339db642859d -r d4726182dfd2 dsputil.h --- a/dsputil.h Wed Oct 23 08:14:12 2002 +0000 +++ b/dsputil.h Wed Oct 23 15:11:07 2002 +0000 @@ -115,7 +115,11 @@ int pix_abs16x16_y2_c(UINT8 *blk1, UINT8 *blk2, int lx); int pix_abs16x16_xy2_c(UINT8 *blk1, UINT8 *blk2, int lx); -void block_permute(INT16 *block, UINT8 *permutation); +/** + * permute block according to permuatation. + * @param last last non zero element in scantable order + */ +void ff_block_permute(INT16 *block, UINT8 *permutation, const UINT8 *scantable, int last); #if defined(HAVE_MMX) diff -r 339db642859d -r d4726182dfd2 mpegvideo.c --- a/mpegvideo.c Wed Oct 23 08:14:12 2002 +0000 +++ b/mpegvideo.c Wed Oct 23 15:11:07 2002 +0000 @@ -94,7 +94,7 @@ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ - qmat[qscale][j] = (int)((UINT64_C(1) << QMAT_SHIFT) / + qmat[qscale][i] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j])); } } else if (s->fdct == fdct_ifast) { @@ -105,7 +105,7 @@ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ - qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / + qmat[qscale][i] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / (aanscales[i] * qscale * quant_matrix[j])); } } else { @@ -138,6 +138,8 @@ void ff_init_scantable(MpegEncContext *s, ScanTable *st, const UINT8 *src_scantable){ int i; int end; + + st->scantable= src_scantable; for(i=0; i<64; i++){ int j; @@ -2968,18 +2970,13 @@ { int i, j, level, last_non_zero, q; const int *qmat; + const UINT8 *scantable= s->intra_scantable.scantable; int bias; int max=0; unsigned int threshold1, threshold2; s->fdct (block); -#ifndef ARCH_ALPHA /* Alpha uses unpermuted matrix */ - /* we need this permutation so that we correct the IDCT - permutation. will be moved into DCT code */ - block_permute(block, s->idct_permutation); //FIXME remove -#endif - if (s->mb_intra) { if (!s->h263_aic) { if (n < 4) @@ -3007,7 +3004,7 @@ threshold2= (threshold1<<1); for(;i<64;i++) { - j = s->intra_scantable.permutated[i]; + j = scantable[i]; level = block[j]; level = level * qmat[j]; @@ -3029,6 +3026,9 @@ } *overflow= s->max_qcoeff < max; //overflow might have happend + /* we need this permutation so that we correct the IDCT, we only permute the !=0 elements */ + ff_block_permute(block, s->idct_permutation, scantable, last_non_zero); + return last_non_zero; } diff -r 339db642859d -r d4726182dfd2 mpegvideo.h --- a/mpegvideo.h Wed Oct 23 08:14:12 2002 +0000 +++ b/mpegvideo.h Wed Oct 23 15:11:07 2002 +0000 @@ -100,6 +100,7 @@ } ReorderBuffer; typedef struct ScanTable{ + const UINT8 *scantable; UINT8 permutated[64]; UINT8 raster_end[64]; } ScanTable;