# HG changeset patch # User michaelni # Date 1032792971 0 # Node ID a1c69cb685b304595fba2e33a44dc4f540f78067 # Parent efcbfbd188649e55d24e493a14c05015f8ff2b9a adaptive quantization (lumi/temporal & spatial complexity masking) diff -r efcbfbd18864 -r a1c69cb685b3 avcodec.h --- a/avcodec.h Mon Sep 23 08:44:24 2002 +0000 +++ b/avcodec.h Mon Sep 23 14:56:11 2002 +0000 @@ -5,8 +5,8 @@ #define LIBAVCODEC_VERSION_INT 0x000406 #define LIBAVCODEC_VERSION "0.4.6" -#define LIBAVCODEC_BUILD 4624 -#define LIBAVCODEC_BUILD_STR "4624" +#define LIBAVCODEC_BUILD 4625 +#define LIBAVCODEC_BUILD_STR "4625" enum CodecID { CODEC_ID_NONE, @@ -123,6 +123,8 @@ #define CODEC_FLAG_DR1 0x8000 /* direct renderig type 1 (store internal frames in external buffers) */ #define CODEC_FLAG_NOT_TRUNCATED 0x00010000 /* input bitstream is not truncated, except before a startcode allows the last part of a frame to be decoded earlier */ +#define CODEC_FLAG_NORMALIZE_AQP 0x00020000 /* normalize adaptive quantization */ + /* codec capabilities */ #define CODEC_CAP_DRAW_HORIZ_BAND 0x0001 /* decoder can use draw_horiz_band callback */ @@ -174,8 +176,8 @@ * some codecs need / can use extra-data like huffman tables * mjpeg: huffman tables * rv10: additional flags - * encoding: set/allocated/freed by user. - * decoding: set/allocated/freed by lavc. (can be NULL) + * encoding: set/allocated/freed by lavc. + * decoding: set/allocated/freed by user. */ void *extradata; int extradata_size; @@ -285,8 +287,13 @@ uint8_t *mbskip_table; /* encoding parameters */ + /** + * quality (between 1 (good) and 31 (bad)) + * encoding: set by user if CODEC_FLAG_QSCALE is set otherwise set by lavc + * decoding: set by lavc + */ int quality; /* quality of the previous encoded frame - (between 1 (good) and 31 (bad)) + this is allso used to set the quality in vbr mode and the per frame quality in CODEC_FLAG_TYPE (second pass mode) */ float qcompress; /* amount of qscale change between easy & hard scenes (0.0-1.0)*/ @@ -632,6 +639,34 @@ * decoding; set by lavc */ long long int pts; + + /** + * luminance masking (0-> disabled) + * encoding: set by user + * decoding: unused + */ + float lumi_masking; + + /** + * temporary complexity masking (0-> disabled) + * encoding: set by user + * decoding: unused + */ + float temporal_cplx_masking; + + /** + * spatial complexity masking (0-> disabled) + * encoding: set by user + * decoding: unused + */ + float spatial_cplx_masking; + + /** + * p block masking (0-> disabled) + * encoding: set by user + * decoding: unused + */ + float p_masking; //FIXME this should be reordered after kabis API is finished ... //TODO kill kabi diff -r efcbfbd18864 -r a1c69cb685b3 h263.c --- a/h263.c Mon Sep 23 08:44:24 2002 +0000 +++ b/h263.c Mon Sep 23 14:56:11 2002 +0000 @@ -31,11 +31,17 @@ #include "h263data.h" #include "mpeg4data.h" +//#undef NDEBUG +//#include + //rounded divison & shift #define RSHIFT(a,b) ((a) > 0 ? ((a) + (1<<((b)-1)))>>(b) : ((a) + (1<<((b)-1))-1)>>(b)) +#if 1 #define PRINT_MB_TYPE(a) {} -//#define PRINT_MB_TYPE(a) printf(a) +#else +#define PRINT_MB_TYPE(a) printf(a) +#endif #define INTRA_MCBPC_VLC_BITS 6 #define INTER_MCBPC_VLC_BITS 6 @@ -298,6 +304,54 @@ return score0 > score1 ? 1 : 0; } +void ff_clean_mpeg4_qscales(MpegEncContext *s){ + int i; + /* more braindead iso mpeg mess */ + + for(i=1; imb_num; i++){ + if(s->qscale_table[i] - s->qscale_table[i-1] >2) + s->qscale_table[i]= s->qscale_table[i-1]+2; + } + for(i=s->mb_num-2; i>=0; i--){ + if(s->qscale_table[i] - s->qscale_table[i+1] >2) + s->qscale_table[i]= s->qscale_table[i+1]+2; + } + + for(i=1; imb_num; i++){ + if(s->qscale_table[i] != s->qscale_table[i-1] && (s->mb_type[i]&MB_TYPE_INTER4V)){ + s->mb_type[i]&= ~MB_TYPE_INTER4V; + s->mb_type[i]|= MB_TYPE_INTER; + } + } + + if(s->pict_type== B_TYPE){ + int odd=0; + /* ok, come on, this isnt funny anymore, theres more code for handling this mpeg4 mess than + for the actual adaptive quantization */ + + for(i=0; imb_num; i++){ + odd += s->qscale_table[i]&1; + } + + if(2*odd > s->mb_num) odd=1; + else odd=0; + + for(i=0; imb_num; i++){ + if((s->qscale_table[i]&1) != odd) + s->qscale_table[i]++; + if(s->qscale_table[i] > 31) + s->qscale_table[i]= 31; + } + + for(i=1; imb_num; i++){ + if(s->qscale_table[i] != s->qscale_table[i-1] && (s->mb_type[i]&MB_TYPE_DIRECT)){ + s->mb_type[i]&= ~MB_TYPE_DIRECT; + s->mb_type[i]|= MB_TYPE_BIDIR; + } + } + } +} + void mpeg4_encode_mb(MpegEncContext * s, DCTELEM block[6][64], int motion_x, int motion_y) @@ -308,6 +362,7 @@ PutBitContext * const tex_pb = s->data_partitioning && s->pict_type!=B_TYPE ? &s->tex_pb : &s->pb; PutBitContext * const dc_pb = s->data_partitioning && s->pict_type!=I_TYPE ? &s->pb2 : &s->pb; const int interleaved_stats= (s->flags&CODEC_FLAG_PASS1) && !s->data_partitioning ? 1 : 0; + const int dquant_code[5]= {1,0,9,2,3}; // printf("**mb x=%d y=%d\n", s->mb_x, s->mb_y); if (!s->mb_intra) { @@ -328,20 +383,27 @@ s->last_mv[1][0][0]= s->last_mv[1][0][1]= 0; } + + assert(s->dquant>=-2 && s->dquant<=2); + assert((s->dquant&1)==0); + assert(mb_type>=0); /* nothing to do if this MB was skiped in the next P Frame */ - if(s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]){ + if(s->mbskip_table[s->mb_y * s->mb_width + s->mb_x]){ //FIXME avoid DCT & ... s->skip_count++; s->mv[0][0][0]= s->mv[0][0][1]= s->mv[1][0][0]= s->mv[1][0][1]= 0; s->mv_dir= MV_DIR_FORWARD; //doesnt matter + s->qscale -= s->dquant; return; } if ((cbp | motion_x | motion_y | mb_type) ==0) { /* direct MB with MV={0,0} */ + assert(s->dquant==0); + put_bits(&s->pb, 1, 1); /* mb not coded modb1=1 */ if(interleaved_stats){ @@ -356,8 +418,13 @@ put_bits(&s->pb, mb_type+1, 1); // this table is so simple that we dont need it :) if(cbp) put_bits(&s->pb, 6, cbp); - if(cbp && mb_type) - put_bits(&s->pb, 1, 0); /* no q-scale change */ + if(cbp && mb_type){ + if(s->dquant) + put_bits(&s->pb, 2, (s->dquant>>2)+3); + else + put_bits(&s->pb, 1, 0); + }else + s->qscale -= s->dquant; if(interleaved_stats){ bits= get_bit_count(&s->pb); @@ -421,7 +488,7 @@ s->last_bits=bits; } }else{ /* s->pict_type==B_TYPE */ - if ((cbp | motion_x | motion_y) == 0 && s->mv_type==MV_TYPE_16X16) { + if ((cbp | motion_x | motion_y | s->dquant) == 0 && s->mv_type==MV_TYPE_16X16) { /* check if the B frames can skip it too, as we must skip it if we skip here why didnt they just compress the skip-mb bits instead of reusing them ?! */ if(s->max_b_frames>0){ @@ -470,12 +537,16 @@ put_bits(&s->pb, 1, 0); /* mb coded */ if(s->mv_type==MV_TYPE_16X16){ cbpc = cbp & 3; + if(s->dquant) cbpc+= 8; put_bits(&s->pb, inter_MCBPC_bits[cbpc], inter_MCBPC_code[cbpc]); + cbpy = cbp >> 2; cbpy ^= 0xf; put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(pb2, 2, dquant_code[s->dquant+2]); if(interleaved_stats){ bits= get_bit_count(&s->pb); @@ -580,10 +651,12 @@ cbpc = cbp & 3; if (s->pict_type == I_TYPE) { + if(s->dquant) cbpc+=4; put_bits(&s->pb, intra_MCBPC_bits[cbpc], intra_MCBPC_code[cbpc]); } else { + if(s->dquant) cbpc+=8; put_bits(&s->pb, 1, 0); /* mb coded */ put_bits(&s->pb, inter_MCBPC_bits[cbpc + 4], @@ -592,6 +665,8 @@ put_bits(pb2, 1, s->ac_pred); cbpy = cbp >> 2; put_bits(pb2, cbpy_tab[cbpy][1], cbpy_tab[cbpy][0]); + if(s->dquant) + put_bits(dc_pb, 2, dquant_code[s->dquant+2]); if(interleaved_stats){ bits= get_bit_count(&s->pb); @@ -963,6 +1038,7 @@ } else if (val >= l) { val -= m; } + assert(val>=-l && val= 0) { sign = 0; diff -r efcbfbd18864 -r a1c69cb685b3 motion_est.c --- a/motion_est.c Mon Sep 23 08:44:24 2002 +0000 +++ b/motion_est.c Mon Sep 23 14:56:11 2002 +0000 @@ -1142,6 +1142,7 @@ //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); s->mb_var [s->mb_width * mb_y + mb_x] = varc; s->mc_mb_var[s->mb_width * mb_y + mb_x] = vard; + s->mb_mean [s->mb_width * mb_y + mb_x] = (sum+7)>>4; s->mb_var_sum += varc; s->mc_mb_var_sum += vard; //printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); diff -r efcbfbd18864 -r a1c69cb685b3 mpegvideo.c --- a/mpegvideo.c Mon Sep 23 08:44:24 2002 +0000 +++ b/mpegvideo.c Mon Sep 23 14:56:11 2002 +0000 @@ -207,6 +207,7 @@ CHECKED_ALLOCZ(s->mb_var , s->mb_num * sizeof(INT16)) CHECKED_ALLOCZ(s->mc_mb_var, s->mb_num * sizeof(INT16)) + CHECKED_ALLOCZ(s->mb_mean , s->mb_num * sizeof(INT8)) /* Allocate MV tables */ CHECKED_ALLOCZ(s->p_mv_table , mv_table_size * 2 * sizeof(INT16)) @@ -329,6 +330,7 @@ av_freep(&s->mb_type); av_freep(&s->mb_var); av_freep(&s->mc_mb_var); + av_freep(&s->mb_mean); av_freep(&s->p_mv_table); av_freep(&s->b_forw_mv_table); av_freep(&s->b_back_mv_table); @@ -442,6 +444,12 @@ /* Fixed QSCALE */ s->fixed_qscale = (avctx->flags & CODEC_FLAG_QSCALE); + s->adaptive_quant= ( s->avctx->lumi_masking + || s->avctx->temporal_cplx_masking + || s->avctx->spatial_cplx_masking + || s->avctx->p_masking) + && !s->fixed_qscale; + switch(avctx->codec->id) { case CODEC_ID_MPEG1VIDEO: s->out_format = FMT_MPEG1; @@ -893,7 +901,8 @@ if (s->out_format == FMT_MJPEG) mjpeg_picture_trailer(s); - avctx->quality = s->qscale; + if(!s->fixed_qscale) + avctx->quality = s->qscale; if(s->flags&CODEC_FLAG_PASS1) ff_write_pass1_stats(s); @@ -1753,6 +1762,24 @@ } #endif for(i=0; i<6; i++) skip_dct[i]=0; + + if(s->adaptive_quant){ + s->dquant= s->qscale_table[mb_x + mb_y*s->mb_width] - s->qscale; + if(s->codec_id==CODEC_ID_MPEG4){ + if (s->dquant> 2) s->dquant= 2; + else if(s->dquant<-2) s->dquant=-2; + + if(!s->mb_intra){ + assert(s->dquant==0 || s->mv_type!=MV_TYPE_8X8); + + if(s->mv_dir&MV_DIRECT) + s->dquant=0; + } + } + s->qscale+= s->dquant; + s->y_dc_scale= s->y_dc_scale_table[ s->qscale ]; + s->c_dc_scale= s->c_dc_scale_table[ s->qscale ]; + } if (s->mb_intra) { UINT8 *ptr; @@ -2080,6 +2107,8 @@ ff_set_mpeg4_time(s, s->picture_number); s->scene_change_score=0; + + s->qscale= (int)(s->frame_qscale + 0.5); //FIXME qscale / ... stuff for ME ratedistoration /* Estimate motion for every MB */ if(s->pict_type != I_TYPE){ @@ -2125,7 +2154,8 @@ sum= (sum+8)>>4; varc = (pix_norm1(pix, s->linesize) - sum*sum + 500 + 128)>>8; - s->mb_var[s->mb_width * mb_y + mb_x] = varc; + s->mb_var [s->mb_width * mb_y + mb_x] = varc; + s->mb_mean[s->mb_width * mb_y + mb_x] = (sum+7)>>4; s->mb_var_sum += varc; } } @@ -2154,12 +2184,19 @@ ff_fix_long_b_mvs(s, s->b_bidir_back_mv_table, s->b_code, MB_TYPE_BIDIR); } -//printf("f_code %d ///\n", s->f_code); + if (s->fixed_qscale) + s->frame_qscale = s->avctx->quality; + else + s->frame_qscale = ff_rate_estimate_qscale(s); -// printf("%d %d\n", s->avg_mb_var, s->mc_mb_var); - if (!s->fixed_qscale) - s->qscale = ff_rate_estimate_qscale(s); - + if(s->adaptive_quant && s->codec_id==CODEC_ID_MPEG4) + ff_clean_mpeg4_qscales(s); + + if(s->adaptive_quant) + s->qscale= s->qscale_table[0]; + else + s->qscale= (int)(s->frame_qscale + 0.5); + if (s->out_format == FMT_MJPEG) { /* for mjpeg, we do include qscale in the matrix */ s->intra_matrix[0] = ff_mpeg1_default_intra_matrix[0]; diff -r efcbfbd18864 -r a1c69cb685b3 mpegvideo.h --- a/mpegvideo.h Mon Sep 23 08:44:24 2002 +0000 +++ b/mpegvideo.h Mon Sep 23 14:56:11 2002 +0000 @@ -53,7 +53,7 @@ typedef struct RateControlEntry{ int pict_type; - int qscale; + float qscale; int mv_bits; int i_tex_bits; int p_tex_bits; @@ -188,6 +188,9 @@ int input_pict_type; /* pict_type prior to reordering of frames */ int force_type; /* 0= no force, otherwise I_TYPE, P_TYPE, ... */ int qscale; /* QP */ + float frame_qscale; /* qscale from the frame level rc */ + int adaptive_quant; /* use adaptive quantization */ + int dquant; /* qscale difference to prev qscale */ int pict_type; /* I_TYPE, P_TYPE, B_TYPE, ... */ int last_pict_type; int last_non_b_pict_type; /* used for mpeg4 gmc b-frames & ratecontrol */ @@ -241,13 +244,14 @@ int hurry_up; /* when set to 1 during decoding, b frames will be skiped when set to 2 idct/dequant will be skipped too */ - + /* macroblock layer */ int mb_x, mb_y; int mb_incr; int mb_intra; UINT16 *mb_var; /* Table for MB variances */ UINT16 *mc_mb_var; /* Table for motion compensated MB variances */ + UINT8 *mb_mean; /* Table for MB luminance */ UINT8 *mb_type; /* Table for MB type */ #define MB_TYPE_INTRA 0x01 #define MB_TYPE_INTER 0x02 @@ -582,6 +586,7 @@ void ff_mpeg4_init_partitions(MpegEncContext *s); void ff_mpeg4_merge_partitions(MpegEncContext *s); extern inline int ff_mpeg4_pred_dc(MpegEncContext * s, int n, UINT16 **dc_val_ptr, int *dir_ptr); +void ff_clean_mpeg4_qscales(MpegEncContext *s); /* rv10.c */ void rv10_encode_picture_header(MpegEncContext *s, int picture_number); @@ -611,8 +616,7 @@ /* rate control */ int ff_rate_control_init(MpegEncContext *s); -int ff_rate_estimate_qscale(MpegEncContext *s); -int ff_rate_estimate_qscale_pass2(MpegEncContext *s); +float ff_rate_estimate_qscale(MpegEncContext *s); void ff_write_pass1_stats(MpegEncContext *s); void ff_rate_control_uninit(MpegEncContext *s); double ff_eval(char *s, double *const_value, char **const_name, diff -r efcbfbd18864 -r a1c69cb685b3 ratecontrol.c --- a/ratecontrol.c Mon Sep 23 08:44:24 2002 +0000 +++ b/ratecontrol.c Mon Sep 23 14:56:11 2002 +0000 @@ -38,9 +38,9 @@ static double get_qscale(MpegEncContext *s, RateControlEntry *rce, double rate_factor, int frame_num); void ff_write_pass1_stats(MpegEncContext *s){ - sprintf(s->avctx->stats_out, "in:%d out:%d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d;\n", + sprintf(s->avctx->stats_out, "in:%d out:%d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d;\n", s->picture_number, s->input_picture_number - s->max_b_frames, s->pict_type, - s->qscale, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits, + s->frame_qscale, s->i_tex_bits, s->p_tex_bits, s->mv_bits, s->misc_bits, s->f_code, s->b_code, s->mc_mb_var_sum, s->mb_var_sum, s->i_count); } @@ -105,7 +105,7 @@ assert(picture_number < rcc->num_entries); rce= &rcc->entry[picture_number]; - e+=sscanf(p, " in:%*d out:%*d type:%d q:%d itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d", + e+=sscanf(p, " in:%*d out:%*d type:%d q:%f itex:%d ptex:%d mv:%d misc:%d fcode:%d bcode:%d mc-var:%d var:%d icount:%d", &rce->pict_type, &rce->qscale, &rce->i_tex_bits, &rce->p_tex_bits, &rce->mv_bits, &rce->misc_bits, &rce->f_code, &rce->b_code, &rce->mc_mb_var_sum, &rce->mb_var_sum, &rce->i_count); if(e!=12){ @@ -433,7 +433,7 @@ q= exp(q); } - + return q; } @@ -462,10 +462,89 @@ p->coeff+= new_coeff; } -int ff_rate_estimate_qscale(MpegEncContext *s) +static void adaptive_quantization(MpegEncContext *s, double q){ + int i; + const float lumi_masking= s->avctx->lumi_masking / (128.0*128.0); + const float temp_cplx_masking= s->avctx->temporal_cplx_masking; + const float spatial_cplx_masking = s->avctx->spatial_cplx_masking; + const float p_masking = s->avctx->p_masking; + float bits_sum= 0.0; + float cplx_sum= 0.0; + float cplx_tab[s->mb_num]; + float bits_tab[s->mb_num]; + const int qmin= 2; //s->avctx->mb_qmin; + const int qmax= 31; //s->avctx->mb_qmax; + + for(i=0; imb_num; i++){ + float temp_cplx= sqrt(s->mc_mb_var[i]); + float spat_cplx= sqrt(s->mb_var[i]); + const int lumi= s->mb_mean[i]; + float bits, cplx, factor; + + if(spat_cplx < q/3) spat_cplx= q/3; //FIXME finetune + if(temp_cplx < q/3) temp_cplx= q/3; //FIXME finetune + + if((s->mb_type[i]&MB_TYPE_INTRA)){//FIXME hq mode + cplx= spat_cplx; + factor= 1.0 + p_masking; + }else{ + cplx= temp_cplx; + factor= pow(temp_cplx, - temp_cplx_masking); + } + factor*=pow(spat_cplx, - spatial_cplx_masking); + factor*= (1.0 - (lumi-128)*(lumi-128)*lumi_masking); + + if(factor<0.00001) factor= 0.00001; + + bits= cplx*factor; + cplx_sum+= cplx; + bits_sum+= bits; + cplx_tab[i]= cplx; + bits_tab[i]= bits; + } + + /* handle qmin/qmax cliping */ + if(s->flags&CODEC_FLAG_NORMALIZE_AQP){ + for(i=0; imb_num; i++){ + float newq= q*cplx_tab[i]/bits_tab[i]; + newq*= bits_sum/cplx_sum; + + if (newq > qmax){ + bits_sum -= bits_tab[i]; + cplx_sum -= cplx_tab[i]*q/qmax; + } + else if(newq < qmin){ + bits_sum -= bits_tab[i]; + cplx_sum -= cplx_tab[i]*q/qmin; + } + } + } + + for(i=0; imb_num; i++){ + float newq= q*cplx_tab[i]/bits_tab[i]; + int intq; + + if(s->flags&CODEC_FLAG_NORMALIZE_AQP){ + newq*= bits_sum/cplx_sum; + } + + if(i && ABS(s->qscale_table[i-1] - newq)<0.75) + intq= s->qscale_table[i-1]; + else + intq= (int)(newq + 0.5); + + if (intq > qmax) intq= qmax; + else if(intq < qmin) intq= qmin; +//if(i%s->mb_width==0) printf("\n"); +//printf("%2d%3d ", intq, ff_sqrt(s->mc_mb_var[i])); + s->qscale_table[i]= intq; + } +} + +float ff_rate_estimate_qscale(MpegEncContext *s) { float q; - int qscale, qmin, qmax; + int qmin, qmax; float br_compensation; double diff; double short_term_q; @@ -581,16 +660,20 @@ // printf("%f %d %d %d\n", q, picture_number, (int)wanted_bits, (int)s->total_bits); //printf("%f %f %f\n", q, br_compensation, short_term_q); - qscale= (int)(q + 0.5); - + //printf("q:%d diff:%d comp:%f st_q:%f last_size:%d type:%d\n", qscale, (int)diff, br_compensation, // short_term_q, s->frame_bits, pict_type); //printf("%d %d\n", s->bit_rate, (int)fps); - rcc->last_qscale= qscale; + if(s->adaptive_quant) + adaptive_quantization(s, q); + else + q= (int)(q + 0.5); + + rcc->last_qscale= q; rcc->last_mc_mb_var_sum= s->mc_mb_var_sum; rcc->last_mb_var_sum= s->mb_var_sum; - return qscale; + return q; } //----------------------------------------------