# HG changeset patch # User michaelni # Date 1030899153 0 # Node ID 3e0f62e5eed6c9724f59e16f55daf307ac26b40d # Parent be1cb0e1f27673708654b1c9c5f49dfc960c5565 dct cleanup more accurate mmx dct (dont discard bits for fun) fixing mmx quantizer bug for qscale%2==1 (bias was slightly wrong) diff -r be1cb0e1f276 -r 3e0f62e5eed6 dct-test.c --- a/dct-test.c Sun Sep 01 14:30:55 2002 +0000 +++ b/dct-test.c Sun Sep 01 16:52:33 2002 +0000 @@ -101,8 +101,12 @@ case 0: for(i=0;i<64;i++) block1[i] = (random() % 512) -256; - if (is_idct) + if (is_idct){ fdct(block1); + + for(i=0;i<64;i++) + block1[i]>>=3; + } break; case 1:{ int num= (random()%10)+1; @@ -153,17 +157,11 @@ if (fdct_func == fdct_ifast) { for(i=0; i<64; i++) { - scale = (1 << (AANSCALE_BITS + 11)) / aanscales[i]; + scale = 8*(1 << (AANSCALE_BITS + 11)) / aanscales[i]; block[i] = (block[i] * scale /*+ (1<<(AANSCALE_BITS-1))*/) >> AANSCALE_BITS; } } - if (fdct_func == ff_jpeg_fdct_islow) { - for(i=0; i<64; i++) { - block[i] = (block[i]+3)>>3; - } - } - fdct_ref(block1); blockSumErr=0; @@ -212,8 +210,12 @@ case 0: for(i=0;i<64;i++) block1[i] = (random() % 512) -256; - if (is_idct) + if (is_idct){ fdct(block1); + + for(i=0;i<64;i++) + block1[i]>>=3; + } break; case 1:{ case 2: diff -r be1cb0e1f276 -r 3e0f62e5eed6 fdctref.c --- a/fdctref.c Sun Sep 01 14:30:55 2002 +0000 +++ b/fdctref.c Sun Sep 01 16:52:33 2002 +0000 @@ -103,6 +103,7 @@ s += c[i][5] * tmp[8 * 5 + j]; s += c[i][6] * tmp[8 * 6 + j]; s += c[i][7] * tmp[8 * 7 + j]; + s*=8.0; block[8 * i + j] = (short)floor(s + 0.499999); /* diff -r be1cb0e1f276 -r 3e0f62e5eed6 i386/fdct_mmx.c --- a/i386/fdct_mmx.c Sun Sep 01 14:30:55 2002 +0000 +++ b/i386/fdct_mmx.c Sun Sep 01 16:52:33 2002 +0000 @@ -25,7 +25,7 @@ #define BITS_FRW_ACC 3 //; 2 or 3 for accuracy #define SHIFT_FRW_COL BITS_FRW_ACC -#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17) +#define SHIFT_FRW_ROW (BITS_FRW_ACC + 17 - 3) //#define RND_FRW_ROW (262144 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_ROW-1) #define RND_FRW_ROW (1 << (SHIFT_FRW_ROW-1)) //#define RND_FRW_COL (2 * (BITS_FRW_ACC - 1)) //; 1 << (SHIFT_FRW_COL-1) diff -r be1cb0e1f276 -r 3e0f62e5eed6 i386/mpegvideo_mmx_template.c --- a/i386/mpegvideo_mmx_template.c Sun Sep 01 14:30:55 2002 +0000 +++ b/i386/mpegvideo_mmx_template.c Sun Sep 01 16:52:33 2002 +0000 @@ -46,9 +46,9 @@ if (s->mb_intra) { int dummy; if (n < 4) - q = s->y_dc_scale; + q = s->y_dc_scale<<3; else - q = s->c_dc_scale; + q = s->c_dc_scale<<3; /* note: block[0] is assumed to be positive */ if (!s->h263_aic) { #if 1 @@ -70,7 +70,7 @@ #endif } else /* For AIC we skip quant/dequant of INTRADC */ - level = block[0]; + level = block[0]>>3; block[0]=0; //avoid fake overflow // temp_block[0] = (block[0] + (q >> 1)) / q; diff -r be1cb0e1f276 -r 3e0f62e5eed6 mpegvideo.c --- a/mpegvideo.c Sun Sep 01 14:30:55 2002 +0000 +++ b/mpegvideo.c Sun Sep 01 16:52:33 2002 +0000 @@ -90,7 +90,7 @@ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ - qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT-3)) / + qmat[qscale][j] = (int)((UINT64_C(1) << QMAT_SHIFT) / (qscale * quant_matrix[j])); } } else if (s->fdct == fdct_ifast) { @@ -101,7 +101,7 @@ /* (1<<36)/19952 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= (1<<36)/249205026 */ /* 3444240 >= (1<<36)/(aanscales[i] * qscale * quant_matrix[i]) >= 275 */ - qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 11)) / + qmat[qscale][j] = (int)((UINT64_C(1) << (QMAT_SHIFT + 14)) / (aanscales[i] * qscale * quant_matrix[j])); } } else { @@ -115,7 +115,6 @@ qmat16[qscale][i] = (1 << QMAT_SHIFT_MMX) / (qscale * quant_matrix[block_permute_op(i)]); if(qmat16[qscale][i]==0 || qmat16[qscale][i]==128*256) qmat16[qscale][i]=128*256-1; - qmat16_bias[qscale][i]= ROUNDED_DIV(bias<<(16-QUANT_BIAS_SHIFT), qmat16[qscale][i]); } } @@ -2487,15 +2486,15 @@ i = 1; last_non_zero = 0; qmat = s->q_intra_matrix[qscale]; - bias= s->intra_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT); + bias= s->intra_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT); } else { i = 0; last_non_zero = -1; qmat = s->q_inter_matrix[qscale]; - bias= s->inter_quant_bias<<(QMAT_SHIFT - 3 - QUANT_BIAS_SHIFT); + bias= s->inter_quant_bias<<(QMAT_SHIFT - QUANT_BIAS_SHIFT); } - threshold1= (1<<(QMAT_SHIFT - 3)) - bias - 1; - threshold2= threshold1<<1; + threshold1= (1<= (1<<(QMAT_SHIFT - 3))){ if(((unsigned)(level+threshold1))>threshold2){ if(level>0){ - level= (bias + level)>>(QMAT_SHIFT - 3); + level= (bias + level)>>QMAT_SHIFT; block[j]= level; }else{ - level= (bias - level)>>(QMAT_SHIFT - 3); + level= (bias - level)>>QMAT_SHIFT; block[j]= -level; } max |=level; diff -r be1cb0e1f276 -r 3e0f62e5eed6 mpegvideo.h --- a/mpegvideo.h Sun Sep 01 14:30:55 2002 +0000 +++ b/mpegvideo.h Sun Sep 01 16:52:33 2002 +0000 @@ -30,8 +30,8 @@ #define MPEG_BUF_SIZE (16 * 1024) -#define QMAT_SHIFT_MMX 19 -#define QMAT_SHIFT 25 +#define QMAT_SHIFT_MMX 16 +#define QMAT_SHIFT 22 #define MAX_FCODE 7 #define MAX_MV 2048