# HG changeset patch # User michaelni # Date 1042998913 0 # Node ID 5d4c95f323d03fd278b82cf468b15153137cbea4 # Parent 7a5038ec769b6edca22dde4284df9a4a05291e1f finetuneing thresholds/factors nicer mb decission a few minor improvements & fixes diff -r 7a5038ec769b -r 5d4c95f323d0 dsputil.c --- a/dsputil.c Sun Jan 19 12:06:36 2003 +0000 +++ b/dsputil.c Sun Jan 19 17:55:13 2003 +0000 @@ -1807,7 +1807,7 @@ memcpy(bak, temp, 64*sizeof(DCTELEM)); - s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); + s->block_last_index[0/*FIXME*/]= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); s->dct_unquantize(s, temp, 0, s->qscale); simple_idct(temp); //FIXME @@ -1826,19 +1826,7 @@ const int esc_length= s->ac_esc_length; uint8_t * length; uint8_t * last_length; - - s->mb_intra=0; - if (s->mb_intra) { - start_i = 1; - length = s->intra_ac_vlc_length; - last_length= s->intra_ac_vlc_last_length; - } else { - start_i = 0; - length = s->inter_ac_vlc_length; - last_length= s->inter_ac_vlc_last_length; - } - for(i=0; i<8; i++){ ((uint32_t*)(bak + i*stride))[0]= ((uint32_t*)(src2 + i*stride))[0]; ((uint32_t*)(bak + i*stride))[1]= ((uint32_t*)(src2 + i*stride))[1]; @@ -1846,10 +1834,22 @@ s->dsp.diff_pixels(temp, src1, src2, stride); - last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); + s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); + + bits=0; - bits=0; - if(last>=0){ + if (s->mb_intra) { + start_i = 1; + length = s->intra_ac_vlc_length; + last_length= s->intra_ac_vlc_last_length; + bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma + } else { + start_i = 0; + length = s->inter_ac_vlc_length; + last_length= s->inter_ac_vlc_last_length; + } + + if(last>=start_i){ run=0; for(i=start_i; i=0){ s->dct_unquantize(s, temp, 0, s->qscale); } @@ -1883,7 +1886,7 @@ distoration= s->dsp.sse[1](NULL, bak, src1, stride); - return distoration + ((bits*s->qscale*s->qscale*105 + 64)>>7); + return distoration + ((bits*s->qscale*s->qscale*109 + 64)>>7); } static int bit8x8_c(/*MpegEncContext*/ void *c, uint8_t *src1, uint8_t *src2, int stride){ @@ -1894,25 +1897,25 @@ const int esc_length= s->ac_esc_length; uint8_t * length; uint8_t * last_length; + + s->dsp.diff_pixels(temp, src1, src2, stride); - s->mb_intra=0; + s->block_last_index[0/*FIXME*/]= last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); + + bits=0; if (s->mb_intra) { - start_i = 1; + start_i = 1; length = s->intra_ac_vlc_length; last_length= s->intra_ac_vlc_last_length; + bits+= s->luma_dc_vlc_length[temp[0] + 256]; //FIXME chroma } else { start_i = 0; length = s->inter_ac_vlc_length; last_length= s->inter_ac_vlc_last_length; } - - s->dsp.diff_pixels(temp, src1, src2, stride); - - last= s->fast_dct_quantize(s, temp, 0/*FIXME*/, s->qscale, &i); - bits=0; - if(last>=0){ + if(last>=start_i){ run=0; for(i=start_i; iintra_ac_vlc_last_length= uni_mpeg4_intra_rl_len + 128*64; s->inter_ac_vlc_length = uni_mpeg4_inter_rl_len; s->inter_ac_vlc_last_length= uni_mpeg4_inter_rl_len + 128*64; + s->luma_dc_vlc_length= uni_DCtab_lum_len; + s->chroma_dc_vlc_length= uni_DCtab_chrom_len; s->ac_esc_length= 7+2+1+6+1+12+1; break; case CODEC_ID_H263P: @@ -1957,10 +1961,10 @@ level+=256; if (n < 4) { /* luminance */ - put_bits(s, uni_DCtab_lum[level][1], uni_DCtab_lum[level][0]); + put_bits(s, uni_DCtab_lum_len[level], uni_DCtab_lum_bits[level]); } else { /* chrominance */ - put_bits(s, uni_DCtab_chrom[level][1], uni_DCtab_chrom[level][0]); + put_bits(s, uni_DCtab_chrom_len[level], uni_DCtab_chrom_bits[level]); } #else int size, v; diff -r 7a5038ec769b -r 5d4c95f323d0 i386/mpegvideo_mmx_template.c --- a/i386/mpegvideo_mmx_template.c Sun Jan 19 12:06:36 2003 +0000 +++ b/i386/mpegvideo_mmx_template.c Sun Jan 19 17:55:13 2003 +0000 @@ -53,8 +53,7 @@ if (!s->h263_aic) { #if 1 asm volatile ( - "xorl %%edx, %%edx \n\t" - "mul %%ecx \n\t" + "imul %%ecx \n\t" : "=d" (level), "=a"(dummy) : "a" ((block[0]>>2) + q), "c" (inverse[q<<1]) ); diff -r 7a5038ec769b -r 5d4c95f323d0 mem.c --- a/mem.c Sun Jan 19 12:06:36 2003 +0000 +++ b/mem.c Sun Jan 19 17:55:13 2003 +0000 @@ -29,6 +29,9 @@ void *av_malloc(unsigned int size) { void *ptr; + +// if(size==0) return NULL; + #if defined (HAVE_MEMALIGN) ptr = memalign(16,size); /* Why 64? diff -r 7a5038ec769b -r 5d4c95f323d0 motion_est.c --- a/motion_est.c Sun Jan 19 12:06:36 2003 +0000 +++ b/motion_est.c Sun Jan 19 17:55:13 2003 +0000 @@ -305,20 +305,21 @@ }; static inline int get_penalty_factor(MpegEncContext *s, int type){ - - switch(type){ + switch(type&0xFF){ default: case FF_CMP_SAD: - return s->qscale; + return s->qscale*2; case FF_CMP_DCT: + return s->qscale*3; case FF_CMP_SATD: + return s->qscale*6; case FF_CMP_SSE: - case FF_CMP_PSNR: - return s->qscale*8; + return s->qscale*s->qscale*2; case FF_CMP_BIT: return 1; case FF_CMP_RD: - return (s->qscale*s->qscale*105 + 64)>>7; + case FF_CMP_PSNR: + return (s->qscale*s->qscale*185 + 64)>>7; } } @@ -336,7 +337,9 @@ }else{ if(s->avctx->me_sub_cmp&FF_CMP_CHROMA) s->me.sub_motion_search= simple_chroma_hpel_motion_search; - else if(s->avctx->me_sub_cmp == FF_CMP_SAD && s->avctx->me_cmp == FF_CMP_SAD) + else if( s->avctx->me_sub_cmp == FF_CMP_SAD + && s->avctx-> me_cmp == FF_CMP_SAD + && s->avctx-> mb_cmp == FF_CMP_SAD) s->me.sub_motion_search= sad_hpel_motion_search; else s->me.sub_motion_search= simple_hpel_motion_search; @@ -355,6 +358,18 @@ }else{ s->me.pre_motion_search= simple_epzs_motion_search; } + + if(s->flags&CODEC_FLAG_QPEL){ + if(s->avctx->mb_cmp&FF_CMP_CHROMA) + s->me.get_mb_score= simple_chroma_qpel_get_mb_score; + else + s->me.get_mb_score= simple_qpel_get_mb_score; + }else{ + if(s->avctx->mb_cmp&FF_CMP_CHROMA) + s->me.get_mb_score= simple_chroma_hpel_get_mb_score; + else + s->me.get_mb_score= simple_hpel_get_mb_score; + } } static int pix_dev(UINT8 * pix, int line_size, int mean) @@ -788,12 +803,11 @@ } } -static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift) +static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift) { int block; int P[10][2]; - uint8_t *ref_picture= s->last_picture.data[0]; - int dmin_sum=0; + int dmin_sum=0, mx4_sum=0, my4_sum=0; uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; for(block=0; block<4; block++){ @@ -838,13 +852,15 @@ P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); - if(s->out_format == FMT_H263){ +// if(s->out_format == FMT_H263){ pred_x4 = P_MEDIAN[0]; pred_y4 = P_MEDIAN[1]; +#if 0 }else { /* mpeg1 at least */ pred_x4= P_LEFT[0]; pred_y4= P_LEFT[1]; } +#endif } P_MV1[0]= mx; P_MV1[1]= my; @@ -855,11 +871,79 @@ dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty); + if(s->dsp.me_sub_cmp != s->dsp.mb_cmp){ + int dxy; + const int offset= ((block&1) + (block>>1)*s->linesize)*8; + uint8_t *dest_y = s->me.scratchpad + offset; + + if(s->quarter_sample){ + uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>2)) + (s->mb_y*16 + (my4>>2))*s->linesize + offset; + dxy = ((my4 & 3) << 2) | (mx4 & 3); + + if(s->no_rounding) + s->dsp.put_no_rnd_qpel_pixels_tab[0][dxy](dest_y , ref , s->linesize); + else + s->dsp.put_qpel_pixels_tab [0][dxy](dest_y , ref , s->linesize); + }else{ + uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>1)) + (s->mb_y*16 + (my4>>1))*s->linesize + offset; + dxy = ((my4 & 1) << 1) | (mx4 & 1); + + if(s->no_rounding) + s->dsp.put_no_rnd_pixels_tab[0][dxy](dest_y , ref , s->linesize, 16); + else + s->dsp.put_pixels_tab [0][dxy](dest_y , ref , s->linesize, 16); + } + dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*s->me.mb_penalty_factor; + }else + dmin_sum+= dmin4; + + if(s->quarter_sample){ + mx4_sum+= mx4/2; + my4_sum+= my4/2; + }else{ + mx4_sum+= mx4; + my4_sum+= my4; + } + s->motion_val[ s->block_index[block] ][0]= mx4; s->motion_val[ s->block_index[block] ][1]= my4; - dmin_sum+= dmin4; + } + + if(s->dsp.me_sub_cmp != s->dsp.mb_cmp){ + dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*s->linesize, s->me.scratchpad, s->linesize); } - return dmin_sum; + + if(s->avctx->mb_cmp&FF_CMP_CHROMA){ + int dxy; + int mx, my; + int offset; + + mx= ff_h263_round_chroma(mx4_sum); + my= ff_h263_round_chroma(my4_sum); + dxy = ((my & 1) << 1) | (mx & 1); + + offset= (s->mb_x*8 + (mx>>1)) + (s->mb_y*8 + (my>>1))*s->uvlinesize; + + if(s->no_rounding){ + s->dsp.put_no_rnd_pixels_tab[1][dxy](s->me.scratchpad , s->last_picture.data[1] + offset, s->uvlinesize, 8); + s->dsp.put_no_rnd_pixels_tab[1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8); + }else{ + s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad , s->last_picture.data[1] + offset, s->uvlinesize, 8); + s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8); + } + + dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize); + dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize); + } + + switch(s->avctx->mb_cmp&0xFF){ + /*case FF_CMP_SSE: + return dmin_sum+ 32*s->qscale*s->qscale;*/ + case FF_CMP_RD: + return dmin_sum; + default: + return dmin_sum+ 11*s->me.mb_penalty_factor; + } } void ff_estimate_p_frame_motion(MpegEncContext * s, @@ -881,6 +965,7 @@ s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); + s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code); rel_xmin= xmin - mb_x*16; @@ -971,6 +1056,7 @@ pic->mb_var [s->mb_width * mb_y + mb_x] = varc; pic->mc_mb_var[s->mb_width * mb_y + mb_x] = vard; pic->mb_mean [s->mb_width * mb_y + mb_x] = (sum+128)>>8; +// pic->mb_cmp_score[s->mb_width * mb_y + mb_x] = dmin; pic->mb_var_sum += varc; pic->mc_mb_var_sum += vard; //printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); @@ -997,44 +1083,36 @@ } if((s->flags&CODEC_FLAG_4MV) && !s->me.skip && varc>50 && vard>10){ - mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); + h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); mb_type|=MB_TYPE_INTER4V; set_p_mv_tables(s, mx, my, 0); }else set_p_mv_tables(s, mx, my, 1); }else{ + mb_type= MB_TYPE_INTER; + + dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); + + if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) + dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, &s->last_picture, mv_penalty); + + if((s->flags&CODEC_FLAG_4MV) + && !s->me.skip && varc>50 && vard>10){ + int dmin4= h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); + if(dmin4 < dmin){ + mb_type= MB_TYPE_INTER4V; + dmin=dmin4; + } + } + pic->mb_cmp_score[s->mb_width * mb_y + mb_x] = dmin; + set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V); + if (vard <= 64 || vard < varc) { -// if (sadP <= 32 || sadP < sadI + 500) { s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); - mb_type|= MB_TYPE_INTER; - if (s->me_method != ME_ZERO) { - dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, - pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); - if((s->flags&CODEC_FLAG_4MV) - && !s->me.skip && varc>50 && vard>10){ - int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); - if(dmin4 + 128 scene_change_score+= s->qscale; - mb_type|= MB_TYPE_INTRA; - mx = 0; - my = 0; } } @@ -1117,6 +1195,7 @@ s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); + s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code); rel_xmin= xmin - mb_x*16; @@ -1186,6 +1265,10 @@ dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, pred_x, pred_y, picture, 0, 0, mv_penalty); + + if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) + dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, picture, mv_penalty); + //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; mv_table[mot_xy][0]= mx; @@ -1249,10 +1332,14 @@ s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); } - fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.sub_penalty_factor - +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.sub_penalty_factor; - + s->dsp.me_sub_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); - + fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor + +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor + + s->dsp.mb_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); + + if(s->avctx->mb_cmp&FF_CMP_CHROMA){ + } + //FIXME CHROMA !!! + return fbmin; } @@ -1356,17 +1443,24 @@ P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); } - + + //FIXME direct_search ptr in context!!! (needed for chroma anyway or this will get messy) if(s->flags&CODEC_FLAG_QPEL){ dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, &s->last_picture, mv_table, 1<<14, mv_penalty); dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, 0, 0, &s->last_picture, 0, 0, mv_penalty); + + if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) + dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty); }else{ dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, &s->last_picture, mv_table, 1<<15, mv_penalty); dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, 0, 0, &s->last_picture, 0, 0, mv_penalty); + + if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) + dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty); } s->b_direct_mv_table[mot_xy][0]= mx; @@ -1377,18 +1471,18 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, int mb_x, int mb_y) { - const int penalty_factor= s->me.penalty_factor; + const int penalty_factor= s->me.mb_penalty_factor; int fmin, bmin, dmin, fbmin; int type=0; dmin= direct_search(s, mb_x, mb_y); - fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code); - bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) - penalty_factor; + fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code) + 3*penalty_factor; + bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) + 2*penalty_factor; //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]); - fbmin= bidir_refine(s, mb_x, mb_y); - + fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor; +//printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin); { int score= dmin; type=MB_TYPE_DIRECT; @@ -1405,9 +1499,10 @@ score=fbmin; type= MB_TYPE_BIDIR; } + score= ((unsigned)(score*score + 128*256))>>16; s->current_picture.mc_mb_var_sum += score; - s->current_picture.mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSD + s->current_picture.mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSE } if(s->flags&CODEC_FLAG_HQ){ diff -r 7a5038ec769b -r 5d4c95f323d0 motion_est_template.c --- a/motion_est_template.c Sun Jan 19 12:06:36 2003 +0000 +++ b/motion_est_template.c Sun Jan 19 17:55:13 2003 +0000 @@ -39,7 +39,7 @@ qpel_mc_func (*qpel_put)[16];\ qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\ const __attribute__((unused)) int unu= time_pp + time_pb + (int)src_u + (int)src_v + (int)ref_u + (int)ref_v\ - + (int)ref2_y + (int)hpel_avg + (int)qpel_avg;\ + + (int)ref2_y + (int)hpel_avg + (int)qpel_avg + (int)score_map;\ if(s->no_rounding /*FIXME b_type*/){\ hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\ chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\ @@ -144,6 +144,7 @@ const int my = *my_ptr; const int penalty_factor= s->me.sub_penalty_factor; me_cmp_func cmp_sub, chroma_cmp_sub; + int bx=2*mx, by=2*my; LOAD_COMMON(xx, yy); @@ -166,7 +167,6 @@ if (mx > xmin && mx < xmax && my > ymin && my < ymax) { - int bx=2*mx, by=2*my; int d= dmin; const int index= (my<me.penalty_factor; -#if 0 +#if 1 int key; int map_generation= s->me.map_generation; uint32_t *map= s->me.map; @@ -231,20 +231,50 @@ CHECK_HALF_MV(0, 1, mx , my) } assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2); - - *mx_ptr = bx; - *my_ptr = by; - }else{ - *mx_ptr =2*mx; - *my_ptr =2*my; } + *mx_ptr = bx; + *my_ptr = by; + return dmin; } #endif +static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, + uint16_t * const mv_penalty) +{ +// const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp; + const int size= 0; + const int xx = 16 * s->mb_x; + const int yy = 16 * s->mb_y; + const int penalty_factor= s->me.mb_penalty_factor; + const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these + const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit + me_cmp_func cmp_sub, chroma_cmp_sub; + int d; + + LOAD_COMMON(xx, yy); + + //FIXME factorize + + cmp_sub= s->dsp.mb_cmp[size]; + chroma_cmp_sub= s->dsp.mb_cmp[size+1]; + + assert(!s->me.skip); + assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp); + + CMP_HPEL(d, mx&1, my&1, mx>>1, my>>1, size); + //FIXME check cbp before adding penalty for (0,0) vector + if(mx || my || size>0) + d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; + + return d; +} + #endif /* CMP_HPEL */ + + #ifdef CMP_QPEL #define CHECK_QUARTER_MV(dx, dy, x, y)\ @@ -477,6 +507,37 @@ return dmin; } +static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, + uint16_t * const mv_penalty) +{ + const int size= 0; + const int xx = 16 * s->mb_x; + const int yy = 16 * s->mb_y; + const int penalty_factor= s->me.mb_penalty_factor; + const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these + const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit + me_cmp_func cmp_sub, chroma_cmp_sub; + int d; + + LOAD_COMMON(xx, yy); + + //FIXME factorize + + cmp_sub= s->dsp.mb_cmp[size]; + chroma_cmp_sub= s->dsp.mb_cmp[size+1]; + + assert(!s->me.skip); + assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp); + + CMP_QPEL(d, mx&3, my&3, mx>>2, my>>2, size); + //FIXME check cbp before adding penalty for (0,0) vector + if(mx || my || size>0) + d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; + + return d; +} + + #endif /* CMP_QPEL */ #define CHECK_MV(x,y)\ diff -r 7a5038ec769b -r 5d4c95f323d0 mpegvideo.c --- a/mpegvideo.c Sun Jan 19 12:06:36 2003 +0000 +++ b/mpegvideo.c Sun Jan 19 17:55:13 2003 +0000 @@ -80,6 +80,7 @@ }; static const uint8_t h263_chroma_roundtab[16] = { +// 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, }; @@ -313,6 +314,7 @@ CHECKED_ALLOCZ(pic->mb_var , s->mb_num * sizeof(INT16)) CHECKED_ALLOCZ(pic->mc_mb_var, s->mb_num * sizeof(INT16)) CHECKED_ALLOCZ(pic->mb_mean , s->mb_num * sizeof(INT8)) + CHECKED_ALLOCZ(pic->mb_cmp_score, s->mb_num * sizeof(int32_t)) } CHECKED_ALLOCZ(pic->mbskip_table , s->mb_num * sizeof(UINT8)+1) //the +1 is for the slice end check @@ -338,6 +340,7 @@ av_freep(&pic->mb_var); av_freep(&pic->mc_mb_var); av_freep(&pic->mb_mean); + av_freep(&pic->mb_cmp_score); av_freep(&pic->mbskip_table); av_freep(&pic->qscale_table); @@ -1663,6 +1666,14 @@ pix_op[1][dxy](dest_cr + (dest_offset >> 1), ptr, uvlinesize, h >> 1); } +inline int ff_h263_round_chroma(int x){ + if (x >= 0) + return (h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1)); + else { + x = -x; + return -(h263_chroma_roundtab[x & 0xf] + ((x >> 3) & ~1)); + } +} static inline void MPV_motion(MpegEncContext *s, UINT8 *dest_y, UINT8 *dest_cb, UINT8 *dest_cr, @@ -1773,20 +1784,8 @@ if(s->flags&CODEC_FLAG_GRAY) break; /* In case of 8X8, we construct a single chroma motion vector with a special rounding */ - for(i=0;i<4;i++) { - } - if (mx >= 0) - mx = (h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1)); - else { - mx = -mx; - mx = -(h263_chroma_roundtab[mx & 0xf] + ((mx >> 3) & ~1)); - } - if (my >= 0) - my = (h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1)); - else { - my = -my; - my = -(h263_chroma_roundtab[my & 0xf] + ((my >> 3) & ~1)); - } + mx= ff_h263_round_chroma(mx); + my= ff_h263_round_chroma(my); dxy = ((my & 1) << 1) | (mx & 1); mx >>= 1; my >>= 1; @@ -2796,6 +2795,7 @@ s->no_rounding ^= 1; } /* Estimate motion for every MB */ + s->mb_intra=0; //for the rate distoration & bit compare functions if(s->pict_type != I_TYPE){ if(s->pict_type != B_TYPE){ if((s->avctx->pre_me && s->last_non_b_pict_type==I_TYPE) || s->avctx->pre_me==2){ @@ -2986,7 +2986,7 @@ s->block_index[4]= s->block_wrap[4]*(mb_y + 1) + s->block_wrap[0]*(s->mb_height*2 + 2); s->block_index[5]= s->block_wrap[4]*(mb_y + 1 + s->mb_height + 2) + s->block_wrap[0]*(s->mb_height*2 + 2); for(mb_x=0; mb_x < s->mb_width; mb_x++) { - const int mb_type= s->mb_type[mb_y * s->mb_width + mb_x]; + int mb_type= s->mb_type[mb_y * s->mb_width + mb_x]; const int xy= (mb_y+1) * (s->mb_width+2) + mb_x + 1; // int d; int dmin=10000000; @@ -3152,8 +3152,93 @@ s->last_bits= get_bit_count(&s->pb); } else { int motion_x, motion_y; + int intra_score; + int inter_score= s->current_picture.mb_cmp_score[mb_x + mb_y*s->mb_width]; + + if(!(s->flags&CODEC_FLAG_HQ) && s->pict_type==P_TYPE){ + /* get luma score */ + if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){ + intra_score= (s->current_picture.mb_var[mb_x + mb_y*s->mb_width]<<8) - 500; //FIXME dont scale it down so we dont have to fix it + }else{ + uint8_t *dest_y; + + int mean= s->current_picture.mb_mean[mb_x + mb_y*s->mb_width]; //FIXME + mean*= 0x01010101; + + dest_y = s->new_picture.data[0] + (mb_y * 16 * s->linesize ) + mb_x * 16; + + for(i=0; i<16; i++){ + *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 0]) = mean; + *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean; + *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean; + *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean; + } + + s->mb_intra=1; + intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, dest_y, s->linesize); + +/* printf("intra:%7d inter:%7d var:%7d mc_var.%7d\n", intra_score>>8, inter_score>>8, + s->current_picture.mb_var[mb_x + mb_y*s->mb_width], + s->current_picture.mc_mb_var[mb_x + mb_y*s->mb_width]);*/ + } + + /* get chroma score */ + if(s->avctx->mb_cmp&FF_CMP_CHROMA){ + int i; + + s->mb_intra=1; + for(i=1; i<3; i++){ + uint8_t *dest_c; + int mean; + + if(s->out_format == FMT_H263){ + mean= (s->dc_val[i][mb_x + (mb_y+1)*(s->mb_width+2)] + 4)>>3; //FIXME not exact but simple ;) + }else{ + mean= (s->last_dc[i] + 4)>>3; + } + dest_c = s->new_picture.data[i] + (mb_y * 8 * (s->uvlinesize)) + mb_x * 8; + + mean*= 0x01010101; + for(i=0; i<8; i++){ + *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 0]) = mean; + *(uint32_t*)(&s->me.scratchpad[i*s->uvlinesize+ 4]) = mean; + } + + intra_score+= s->dsp.mb_cmp[1](s, s->me.scratchpad, dest_c, s->uvlinesize); + } + } + + /* bias */ + switch(s->avctx->mb_cmp&0xFF){ + default: + case FF_CMP_SAD: + intra_score+= 32*s->qscale; + break; + case FF_CMP_SSE: + intra_score+= 24*s->qscale*s->qscale; + break; + case FF_CMP_SATD: + intra_score+= 96*s->qscale; + break; + case FF_CMP_DCT: + intra_score+= 48*s->qscale; + break; + case FF_CMP_BIT: + intra_score+= 16; + break; + case FF_CMP_PSNR: + case FF_CMP_RD: + intra_score+= (s->qscale*s->qscale*109*8 + 64)>>7; + break; + } + + if(intra_score < inter_score) + mb_type= MB_TYPE_INTRA; + } + s->mv_type=MV_TYPE_16X16; // only one MB-Type possible + switch(mb_type){ case MB_TYPE_INTRA: s->mv_dir = MV_DIR_FORWARD; @@ -3383,7 +3468,7 @@ return last_non_zero; } - lambda= (qscale*qscale*64*82 + 50)/100; //FIXME finetune + lambda= (qscale*qscale*64*105 + 64)>>7; //FIXME finetune score_tab[0]= 0; for(i=0; i<=last_non_zero - start_i; i++){ diff -r 7a5038ec769b -r 5d4c95f323d0 mpegvideo.h --- a/mpegvideo.h Sun Jan 19 12:06:36 2003 +0000 +++ b/mpegvideo.h Sun Jan 19 17:55:13 2003 +0000 @@ -98,7 +98,6 @@ int last_non_b_pict_type; }RateControlContext; - typedef struct ScanTable{ const UINT8 *scantable; UINT8 permutated[64]; @@ -117,6 +116,7 @@ uint16_t *mb_var; /* Table for MB variances */ uint16_t *mc_mb_var; /* Table for motion compensated MB variances */ uint8_t *mb_mean; /* Table for MB luminance */ + int32_t *mb_cmp_score; /* Table for MB cmp scores, for mb decission */ int b_frame_score; /* */ } Picture; @@ -142,6 +142,7 @@ int pre_penalty_factor; int penalty_factor; int sub_penalty_factor; + int mb_penalty_factor; int pre_pass; /* = 1 for the pre pass */ int dia_size; UINT16 (*mv_penalty)[MAX_MV*2+1]; /* amount of bits needed to encode a MV */ @@ -160,6 +161,8 @@ int P[10][2], int pred_x, int pred_y, int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], int ref_mv_scale, uint16_t * const mv_penalty); + int (*get_mb_score)(struct MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, + uint16_t * const mv_penalty); }MotionEstContext; typedef struct MpegEncContext { @@ -321,6 +324,8 @@ uint8_t *intra_ac_vlc_last_length; uint8_t *inter_ac_vlc_length; uint8_t *inter_ac_vlc_last_length; + uint8_t *luma_dc_vlc_length; + uint8_t *chroma_dc_vlc_length; #define UNI_AC_ENC_INDEX(run,level) ((run)*128 + (level)) /* precomputed matrix (combine qscale and DCT renorm) */ @@ -719,6 +724,7 @@ int ff_h263_resync(MpegEncContext *s); int ff_h263_get_gob_height(MpegEncContext *s); void ff_mpeg4_set_direct_mv(MpegEncContext *s, int mx, int my); +inline int ff_h263_round_chroma(int x); /* rv10.c */ diff -r 7a5038ec769b -r 5d4c95f323d0 msmpeg4data.h --- a/msmpeg4data.h Sun Jan 19 12:06:36 2003 +0000 +++ b/msmpeg4data.h Sun Jan 19 17:55:13 2003 +0000 @@ -1868,7 +1868,10 @@ }; static const uint8_t table_inter_intra[4][2]={ - {0,1},{2,2},{6,3},{7,3} + {0,1} /*Luma-Left Chroma-Left*/, + {2,2} /*Luma-Top Chroma-Left*/, + {6,3} /*luma-Left Chroma-Top */, + {7,3} /*luma-Top Chroma-Top */ }; #define WMV2_INTER_CBP_TABLE_COUNT 4