# HG changeset patch # User michael # Date 1071370620 0 # Node ID 1a2db2073848359bd6cc6aa5455310c3fb01a895 # Parent 04b759af8bd4d0d778a34436818692f391319823 split intra / inter dequantization diff -r 04b759af8bd4 -r 1a2db2073848 dsputil.c --- a/dsputil.c Sun Dec 14 01:42:00 2003 +0000 +++ b/dsputil.c Sun Dec 14 02:57:00 2003 +0000 @@ -2766,7 +2766,7 @@ memcpy(bak, temp, 64*sizeof(DCTELEM)); s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); - s->dct_unquantize(s, temp, 0, s->qscale); + s->dct_unquantize_inter(s, temp, 0, s->qscale); simple_idct(temp); //FIXME for(i=0; i<64; i++) @@ -2839,7 +2839,10 @@ } if(last>=0){ - s->dct_unquantize(s, temp, 0, s->qscale); + if(s->mb_intra) + s->dct_unquantize_intra(s, temp, 0, s->qscale); + else + s->dct_unquantize_inter(s, temp, 0, s->qscale); } s->dsp.idct_add(bak, stride, temp); diff -r 04b759af8bd4 -r 1a2db2073848 i386/mpegvideo_mmx.c --- a/i386/mpegvideo_mmx.c Sun Dec 14 01:42:00 2003 +0000 +++ b/i386/mpegvideo_mmx.c Sun Dec 14 02:57:00 2003 +0000 @@ -31,31 +31,29 @@ static const unsigned long long int mm_wone __attribute__ ((aligned(8))) = 0x0001000100010001ULL; -static void dct_unquantize_h263_mmx(MpegEncContext *s, +static void dct_unquantize_h263_intra_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int level, qmul, qadd, nCoeffs; qmul = qscale << 1; - qadd = (qscale - 1) | 1; assert(s->block_last_index[n]>=0 || s->h263_aic); - if (s->mb_intra) { - if (!s->h263_aic) { - if (n < 4) - level = block[0] * s->y_dc_scale; - else - level = block[0] * s->c_dc_scale; - }else{ - qadd = 0; - level= block[0]; - } + if (!s->h263_aic) { + if (n < 4) + level = block[0] * s->y_dc_scale; + else + level = block[0] * s->c_dc_scale; + qadd = (qscale - 1) | 1; + }else{ + qadd = 0; + level= block[0]; + } + if(s->ac_pred) nCoeffs=63; - } else { - nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; - level = 0;/* keep gcc quiet */ - } + else + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; //printf("%d %d ", qmul, qadd); asm volatile( "movd %1, %%mm6 \n\t" //qmul @@ -104,8 +102,69 @@ ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) : "memory" ); - if(s->mb_intra) - block[0]= level; + block[0]= level; +} + + +static void dct_unquantize_h263_inter_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int level, qmul, qadd, nCoeffs; + + qmul = qscale << 1; + qadd = (qscale - 1) | 1; + + assert(s->block_last_index[n]>=0 || s->h263_aic); + + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; +//printf("%d %d ", qmul, qadd); +asm volatile( + "movd %1, %%mm6 \n\t" //qmul + "packssdw %%mm6, %%mm6 \n\t" + "packssdw %%mm6, %%mm6 \n\t" + "movd %2, %%mm5 \n\t" //qadd + "pxor %%mm7, %%mm7 \n\t" + "packssdw %%mm5, %%mm5 \n\t" + "packssdw %%mm5, %%mm5 \n\t" + "psubw %%mm5, %%mm7 \n\t" + "pxor %%mm4, %%mm4 \n\t" + ".balign 16\n\t" + "1: \n\t" + "movq (%0, %3), %%mm0 \n\t" + "movq 8(%0, %3), %%mm1 \n\t" + + "pmullw %%mm6, %%mm0 \n\t" + "pmullw %%mm6, %%mm1 \n\t" + + "movq (%0, %3), %%mm2 \n\t" + "movq 8(%0, %3), %%mm3 \n\t" + + "pcmpgtw %%mm4, %%mm2 \n\t" // block[i] < 0 ? -1 : 0 + "pcmpgtw %%mm4, %%mm3 \n\t" // block[i] < 0 ? -1 : 0 + + "pxor %%mm2, %%mm0 \n\t" + "pxor %%mm3, %%mm1 \n\t" + + "paddw %%mm7, %%mm0 \n\t" + "paddw %%mm7, %%mm1 \n\t" + + "pxor %%mm0, %%mm2 \n\t" + "pxor %%mm1, %%mm3 \n\t" + + "pcmpeqw %%mm7, %%mm0 \n\t" // block[i] == 0 ? -1 : 0 + "pcmpeqw %%mm7, %%mm1 \n\t" // block[i] == 0 ? -1 : 0 + + "pandn %%mm2, %%mm0 \n\t" + "pandn %%mm3, %%mm1 \n\t" + + "movq %%mm0, (%0, %3) \n\t" + "movq %%mm1, 8(%0, %3) \n\t" + + "addl $16, %3 \n\t" + "jng 1b \n\t" + ::"r" (block+nCoeffs), "g"(qmul), "g" (qadd), "r" (2*(-nCoeffs)) + : "memory" + ); } @@ -138,24 +197,23 @@ high3:low3 = low1*low2 high3 += tlow1 */ -static void dct_unquantize_mpeg1_mmx(MpegEncContext *s, +static void dct_unquantize_mpeg1_intra_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int nCoeffs; const uint16_t *quant_matrix; + int block0; assert(s->block_last_index[n]>=0); nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; - if (s->mb_intra) { - int block0; - if (n < 4) - block0 = block[0] * s->y_dc_scale; - else - block0 = block[0] * s->c_dc_scale; - /* XXX: only mpeg1 */ - quant_matrix = s->intra_matrix; + if (n < 4) + block0 = block[0] * s->y_dc_scale; + else + block0 = block[0] * s->c_dc_scale; + /* XXX: only mpeg1 */ + quant_matrix = s->intra_matrix; asm volatile( "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t" @@ -205,9 +263,19 @@ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) : "%eax", "memory" ); - block[0]= block0; + block[0]= block0; +} - } else { +static void dct_unquantize_mpeg1_inter_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int nCoeffs; + const uint16_t *quant_matrix; + + assert(s->block_last_index[n]>=0); + + nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]+1; + quant_matrix = s->inter_matrix; asm volatile( "pcmpeqw %%mm7, %%mm7 \n\t" @@ -262,28 +330,25 @@ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) : "%eax", "memory" ); - } - } -static void dct_unquantize_mpeg2_mmx(MpegEncContext *s, +static void dct_unquantize_mpeg2_intra_mmx(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int nCoeffs; const uint16_t *quant_matrix; + int block0; assert(s->block_last_index[n]>=0); if(s->alternate_scan) nCoeffs= 63; //FIXME else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; - if (s->mb_intra) { - int block0; - if (n < 4) - block0 = block[0] * s->y_dc_scale; - else - block0 = block[0] * s->c_dc_scale; - quant_matrix = s->intra_matrix; + if (n < 4) + block0 = block[0] * s->y_dc_scale; + else + block0 = block[0] * s->c_dc_scale; + quant_matrix = s->intra_matrix; asm volatile( "pcmpeqw %%mm7, %%mm7 \n\t" "psrlw $15, %%mm7 \n\t" @@ -329,10 +394,21 @@ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "g" (-2*nCoeffs) : "%eax", "memory" ); - block[0]= block0; + block[0]= block0; //Note, we dont do mismatch control for intra as errors cannot accumulate +} - } else { +static void dct_unquantize_mpeg2_inter_mmx(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int nCoeffs; + const uint16_t *quant_matrix; + + assert(s->block_last_index[n]>=0); + + if(s->alternate_scan) nCoeffs= 63; //FIXME + else nCoeffs= s->intra_scantable.raster_end[ s->block_last_index[n] ]; + quant_matrix = s->inter_matrix; asm volatile( "pcmpeqw %%mm7, %%mm7 \n\t" @@ -397,7 +473,6 @@ ::"r" (block+nCoeffs), "r"(quant_matrix+nCoeffs), "g" (qscale), "r" (-2*nCoeffs) : "%eax", "memory" ); - } } /* draw the edges of width 'w' of an image of size width, height @@ -505,9 +580,12 @@ if (mm_flags & MM_MMX) { const int dct_algo = s->avctx->dct_algo; - s->dct_unquantize_h263 = dct_unquantize_h263_mmx; - s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_mmx; - s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_mmx; + s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_mmx; + s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_mmx; + s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_mmx; + s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_mmx; + s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_mmx; + s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_mmx; draw_edges = draw_edges_mmx; diff -r 04b759af8bd4 -r 1a2db2073848 mpegvideo.c --- a/mpegvideo.c Sun Dec 14 01:42:00 2003 +0000 +++ b/mpegvideo.c Sun Dec 14 02:57:00 2003 +0000 @@ -40,11 +40,17 @@ #ifdef CONFIG_ENCODERS static void encode_picture(MpegEncContext *s, int picture_number); #endif //CONFIG_ENCODERS -static void dct_unquantize_mpeg1_c(MpegEncContext *s, +static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, + DCTELEM *block, int n, int qscale); +static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, + DCTELEM *block, int n, int qscale); +static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); -static void dct_unquantize_mpeg2_c(MpegEncContext *s, +static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); -static void dct_unquantize_h263_c(MpegEncContext *s, +static void dct_unquantize_h263_intra_c(MpegEncContext *s, + DCTELEM *block, int n, int qscale); +static void dct_unquantize_h263_inter_c(MpegEncContext *s, DCTELEM *block, int n, int qscale); static void draw_edges_c(uint8_t *buf, int wrap, int width, int height, int w); #ifdef CONFIG_ENCODERS @@ -204,9 +210,12 @@ /* init common dct for both encoder and decoder */ int DCT_common_init(MpegEncContext *s) { - s->dct_unquantize_h263 = dct_unquantize_h263_c; - s->dct_unquantize_mpeg1 = dct_unquantize_mpeg1_c; - s->dct_unquantize_mpeg2 = dct_unquantize_mpeg2_c; + s->dct_unquantize_h263_intra = dct_unquantize_h263_intra_c; + s->dct_unquantize_h263_inter = dct_unquantize_h263_inter_c; + s->dct_unquantize_mpeg1_intra = dct_unquantize_mpeg1_intra_c; + s->dct_unquantize_mpeg1_inter = dct_unquantize_mpeg1_inter_c; + s->dct_unquantize_mpeg2_intra = dct_unquantize_mpeg2_intra_c; + s->dct_unquantize_mpeg2_inter = dct_unquantize_mpeg2_inter_c; #ifdef CONFIG_ENCODERS s->dct_quantize= dct_quantize_c; @@ -1186,12 +1195,16 @@ /* set dequantizer, we cant do it during init as it might change for mpeg4 and we cant do it in the header decode as init isnt called for mpeg4 there yet */ - if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO) - s->dct_unquantize = s->dct_unquantize_mpeg2; - else if(s->out_format == FMT_H263) - s->dct_unquantize = s->dct_unquantize_h263; - else - s->dct_unquantize = s->dct_unquantize_mpeg1; + if(s->mpeg_quant || s->codec_id == CODEC_ID_MPEG2VIDEO){ + s->dct_unquantize_intra = s->dct_unquantize_mpeg2_intra; + s->dct_unquantize_inter = s->dct_unquantize_mpeg2_inter; + }else if(s->out_format == FMT_H263){ + s->dct_unquantize_intra = s->dct_unquantize_h263_intra; + s->dct_unquantize_inter = s->dct_unquantize_h263_inter; + }else{ + s->dct_unquantize_intra = s->dct_unquantize_mpeg1_intra; + s->dct_unquantize_inter = s->dct_unquantize_mpeg1_inter; + } if(s->dct_error_sum){ assert(s->avctx->noise_reduction && s->encoding); @@ -2746,7 +2759,7 @@ static inline void put_dct(MpegEncContext *s, DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale) { - s->dct_unquantize(s, block, i, qscale); + s->dct_unquantize_intra(s, block, i, qscale); s->dsp.idct_put (dest, line_size, block); } @@ -2763,7 +2776,7 @@ DCTELEM *block, int i, uint8_t *dest, int line_size, int qscale) { if (s->block_last_index[i] >= 0) { - s->dct_unquantize(s, block, i, qscale); + s->dct_unquantize_inter(s, block, i, qscale); s->dsp.idct_add (dest, line_size, block); } @@ -4781,7 +4794,7 @@ #endif //CONFIG_ENCODERS -static void dct_unquantize_mpeg1_c(MpegEncContext *s, +static void dct_unquantize_mpeg1_intra_c(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int i, level, nCoeffs; @@ -4789,62 +4802,60 @@ nCoeffs= s->block_last_index[n]; - if (s->mb_intra) { - if (n < 4) - block[0] = block[0] * s->y_dc_scale; - else - block[0] = block[0] * s->c_dc_scale; - /* XXX: only mpeg1 */ - quant_matrix = s->intra_matrix; - for(i=1;i<=nCoeffs;i++) { - int j= s->intra_scantable.permutated[i]; - level = block[j]; - if (level) { - if (level < 0) { - level = -level; - level = (int)(level * qscale * quant_matrix[j]) >> 3; - level = (level - 1) | 1; - level = -level; - } else { - level = (int)(level * qscale * quant_matrix[j]) >> 3; - level = (level - 1) | 1; - } -#ifdef PARANOID - if (level < -2048 || level > 2047) - fprintf(stderr, "unquant error %d %d\n", i, level); -#endif - block[j] = level; + if (n < 4) + block[0] = block[0] * s->y_dc_scale; + else + block[0] = block[0] * s->c_dc_scale; + /* XXX: only mpeg1 */ + quant_matrix = s->intra_matrix; + for(i=1;i<=nCoeffs;i++) { + int j= s->intra_scantable.permutated[i]; + level = block[j]; + if (level) { + if (level < 0) { + level = -level; + level = (int)(level * qscale * quant_matrix[j]) >> 3; + level = (level - 1) | 1; + level = -level; + } else { + level = (int)(level * qscale * quant_matrix[j]) >> 3; + level = (level - 1) | 1; } - } - } else { - i = 0; - quant_matrix = s->inter_matrix; - for(;i<=nCoeffs;i++) { - int j= s->intra_scantable.permutated[i]; - level = block[j]; - if (level) { - if (level < 0) { - level = -level; - level = (((level << 1) + 1) * qscale * - ((int) (quant_matrix[j]))) >> 4; - level = (level - 1) | 1; - level = -level; - } else { - level = (((level << 1) + 1) * qscale * - ((int) (quant_matrix[j]))) >> 4; - level = (level - 1) | 1; - } -#ifdef PARANOID - if (level < -2048 || level > 2047) - fprintf(stderr, "unquant error %d %d\n", i, level); -#endif - block[j] = level; - } + block[j] = level; } } } -static void dct_unquantize_mpeg2_c(MpegEncContext *s, +static void dct_unquantize_mpeg1_inter_c(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int i, level, nCoeffs; + const uint16_t *quant_matrix; + + nCoeffs= s->block_last_index[n]; + + quant_matrix = s->inter_matrix; + for(i=0; i<=nCoeffs; i++) { + int j= s->intra_scantable.permutated[i]; + level = block[j]; + if (level) { + if (level < 0) { + level = -level; + level = (((level << 1) + 1) * qscale * + ((int) (quant_matrix[j]))) >> 4; + level = (level - 1) | 1; + level = -level; + } else { + level = (((level << 1) + 1) * qscale * + ((int) (quant_matrix[j]))) >> 4; + level = (level - 1) | 1; + } + block[j] = level; + } + } +} + +static void dct_unquantize_mpeg2_intra_c(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int i, level, nCoeffs; @@ -4853,61 +4864,96 @@ if(s->alternate_scan) nCoeffs= 63; else nCoeffs= s->block_last_index[n]; - if (s->mb_intra) { + if (n < 4) + block[0] = block[0] * s->y_dc_scale; + else + block[0] = block[0] * s->c_dc_scale; + quant_matrix = s->intra_matrix; + for(i=1;i<=nCoeffs;i++) { + int j= s->intra_scantable.permutated[i]; + level = block[j]; + if (level) { + if (level < 0) { + level = -level; + level = (int)(level * qscale * quant_matrix[j]) >> 3; + level = -level; + } else { + level = (int)(level * qscale * quant_matrix[j]) >> 3; + } + block[j] = level; + } + } +} + +static void dct_unquantize_mpeg2_inter_c(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int i, level, nCoeffs; + const uint16_t *quant_matrix; + int sum=-1; + + if(s->alternate_scan) nCoeffs= 63; + else nCoeffs= s->block_last_index[n]; + + quant_matrix = s->inter_matrix; + for(i=0; i<=nCoeffs; i++) { + int j= s->intra_scantable.permutated[i]; + level = block[j]; + if (level) { + if (level < 0) { + level = -level; + level = (((level << 1) + 1) * qscale * + ((int) (quant_matrix[j]))) >> 4; + level = -level; + } else { + level = (((level << 1) + 1) * qscale * + ((int) (quant_matrix[j]))) >> 4; + } + block[j] = level; + sum+=level; + } + } + block[63]^=sum&1; +} + +static void dct_unquantize_h263_intra_c(MpegEncContext *s, + DCTELEM *block, int n, int qscale) +{ + int i, level, qmul, qadd; + int nCoeffs; + + assert(s->block_last_index[n]>=0); + + qmul = qscale << 1; + + if (!s->h263_aic) { if (n < 4) block[0] = block[0] * s->y_dc_scale; else block[0] = block[0] * s->c_dc_scale; - quant_matrix = s->intra_matrix; - for(i=1;i<=nCoeffs;i++) { - int j= s->intra_scantable.permutated[i]; - level = block[j]; - if (level) { - if (level < 0) { - level = -level; - level = (int)(level * qscale * quant_matrix[j]) >> 3; - level = -level; - } else { - level = (int)(level * qscale * quant_matrix[j]) >> 3; - } -#ifdef PARANOID - if (level < -2048 || level > 2047) - fprintf(stderr, "unquant error %d %d\n", i, level); -#endif - block[j] = level; + qadd = (qscale - 1) | 1; + }else{ + qadd = 0; + } + if(s->ac_pred) + nCoeffs=63; + else + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; + + for(i=1; i<=nCoeffs; i++) { + level = block[i]; + if (level) { + if (level < 0) { + level = level * qmul - qadd; + } else { + level = level * qmul + qadd; } + block[i] = level; } - } else { - int sum=-1; - i = 0; - quant_matrix = s->inter_matrix; - for(;i<=nCoeffs;i++) { - int j= s->intra_scantable.permutated[i]; - level = block[j]; - if (level) { - if (level < 0) { - level = -level; - level = (((level << 1) + 1) * qscale * - ((int) (quant_matrix[j]))) >> 4; - level = -level; - } else { - level = (((level << 1) + 1) * qscale * - ((int) (quant_matrix[j]))) >> 4; - } -#ifdef PARANOID - if (level < -2048 || level > 2047) - fprintf(stderr, "unquant error %d %d\n", i, level); -#endif - block[j] = level; - sum+=level; - } - } - block[63]^=sum&1; } } - -static void dct_unquantize_h263_c(MpegEncContext *s, +static void dct_unquantize_h263_inter_c(MpegEncContext *s, DCTELEM *block, int n, int qscale) { int i, level, qmul, qadd; @@ -4918,22 +4964,9 @@ qadd = (qscale - 1) | 1; qmul = qscale << 1; - if (s->mb_intra) { - if (!s->h263_aic) { - if (n < 4) - block[0] = block[0] * s->y_dc_scale; - else - block[0] = block[0] * s->c_dc_scale; - }else - qadd = 0; - i = 1; - nCoeffs= 63; //does not allways use zigzag table - } else { - i = 0; - nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; - } - - for(;i<=nCoeffs;i++) { + nCoeffs= s->inter_scantable.raster_end[ s->block_last_index[n] ]; + + for(i=0; i<=nCoeffs; i++) { level = block[i]; if (level) { if (level < 0) { @@ -4941,16 +4974,11 @@ } else { level = level * qmul + qadd; } -#ifdef PARANOID - if (level < -2048 || level > 2047) - fprintf(stderr, "unquant error %d %d\n", i, level); -#endif block[i] = level; } } } - static const AVOption mpeg4_options[] = { AVOPTION_CODEC_INT("bitrate", "desired video bitrate", bit_rate, 4, 240000000, 800000), diff -r 04b759af8bd4 -r 1a2db2073848 mpegvideo.h --- a/mpegvideo.h Sun Dec 14 01:42:00 2003 +0000 +++ b/mpegvideo.h Sun Dec 14 02:57:00 2003 +0000 @@ -645,13 +645,21 @@ #define SLICE_END -2 ///