# HG changeset patch # User michael # Date 1066821819 0 # Node ID d736e24bf3033d5e194a522fdd072f143aec3cb9 # Parent 541681146f8322a3c0aac65392c2ff90053386fc move mmx quantizer matrixes out of MpegEncContext (23k -> 7k) (no meassureable slowdown) diff -r 541681146f83 -r d736e24bf303 i386/mpegvideo_mmx_template.c --- a/i386/mpegvideo_mmx_template.c Wed Oct 22 10:59:39 2003 +0000 +++ b/i386/mpegvideo_mmx_template.c Wed Oct 22 11:23:39 2003 +0000 @@ -76,12 +76,12 @@ block[0]=0; //avoid fake overflow // temp_block[0] = (block[0] + (q >> 1)) / q; last_non_zero_p1 = 1; - bias = s->q_intra_matrix16_bias[qscale]; - qmat = s->q_intra_matrix16[qscale]; + bias = s->q_intra_matrix16[qscale][1]; + qmat = s->q_intra_matrix16[qscale][0]; } else { last_non_zero_p1 = 0; - bias = s->q_inter_matrix16_bias[qscale]; - qmat = s->q_inter_matrix16[qscale]; + bias = s->q_inter_matrix16[qscale][1]; + qmat = s->q_inter_matrix16[qscale][0]; } if(s->out_format == FMT_H263 && s->mpeg_quant==0){ diff -r 541681146f83 -r d736e24bf303 mpegvideo.c --- a/mpegvideo.c Wed Oct 22 10:59:39 2003 +0000 +++ b/mpegvideo.c Wed Oct 22 11:23:39 2003 +0000 @@ -93,7 +93,7 @@ enum PixelFormat ff_yuv420p_list[2]= {PIX_FMT_YUV420P, -1}; -static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[64], uint16_t (*qmat16_bias)[64], +static void convert_matrix(MpegEncContext *s, int (*qmat)[64], uint16_t (*qmat16)[2][64], const uint16_t *quant_matrix, int bias, int qmin, int qmax) { int qscale; @@ -132,10 +132,10 @@ */ qmat[qscale][i] = (int)((uint64_t_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j])); // qmat [qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[i]); - qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]); - - if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1; - qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]); + qmat16[qscale][0][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[j]); + + if(qmat16[qscale][0][i]==0 || qmat16[qscale][0][i]==128*256) qmat16[qscale][0][i]=128*256-1; + qmat16[qscale][1][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][0][i]); } } } @@ -445,6 +445,8 @@ CHECKED_ALLOCZ(s->q_intra_matrix, 64*32 * sizeof(int)) CHECKED_ALLOCZ(s->q_inter_matrix, 64*32 * sizeof(int)) + CHECKED_ALLOCZ(s->q_intra_matrix16, 64*32*2 * sizeof(uint16_t)) + CHECKED_ALLOCZ(s->q_inter_matrix16, 64*32*2 * sizeof(uint16_t)) } CHECKED_ALLOCZ(s->picture, MAX_PICTURE_COUNT * sizeof(Picture)) @@ -565,6 +567,8 @@ av_freep(&s->lambda_table); av_freep(&s->q_intra_matrix); av_freep(&s->q_inter_matrix); + av_freep(&s->q_intra_matrix16); + av_freep(&s->q_inter_matrix16); for(i=0; ipicture[i]); @@ -882,9 +886,9 @@ /* precompute matrix */ /* for mjpeg, we do include qscale in the matrix */ if (s->out_format != FMT_MJPEG) { - convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->q_intra_matrix16_bias, + convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, s->intra_matrix, s->intra_quant_bias, 1, 31); - convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->q_inter_matrix16_bias, + convert_matrix(s, s->q_inter_matrix, s->q_inter_matrix16, s->inter_matrix, s->inter_quant_bias, 1, 31); } @@ -3502,7 +3506,7 @@ s->intra_matrix[j] = CLAMP_TO_8BIT((ff_mpeg1_default_intra_matrix[i] * s->qscale) >> 3); } convert_matrix(s, s->q_intra_matrix, s->q_intra_matrix16, - s->q_intra_matrix16_bias, s->intra_matrix, s->intra_quant_bias, 8, 8); + s->intra_matrix, s->intra_quant_bias, 8, 8); } //FIXME var duplication diff -r 541681146f83 -r d736e24bf303 mpegvideo.h --- a/mpegvideo.h Wed Oct 22 10:59:39 2003 +0000 +++ b/mpegvideo.h Wed Oct 22 11:23:39 2003 +0000 @@ -459,11 +459,9 @@ /** precomputed matrix (combine qscale and DCT renorm) */ int (*q_intra_matrix)[64]; int (*q_inter_matrix)[64]; - /** identical to the above but for MMX & these are not permutated */ - uint16_t __align8 q_intra_matrix16[32][64]; - uint16_t __align8 q_inter_matrix16[32][64]; - uint16_t __align8 q_intra_matrix16_bias[32][64]; - uint16_t __align8 q_inter_matrix16_bias[32][64]; + /** identical to the above but for MMX & these are not permutated, second 64 entries are bias*/ + uint16_t (*q_intra_matrix16)[2][64]; + uint16_t (*q_inter_matrix16)[2][64]; int block_last_index[6]; ///< last non zero coefficient in block /* scantables */ ScanTable __align8 intra_scantable;