# HG changeset patch # User michael # Date 1189263073 0 # Node ID ab023c9f03d031ac51550065550cf52c8a2fe7a7 # Parent 2a25f7167c09239c7f1a82a18849eb6d7d0e376b store halfpel filter coefficients in the header as well as the flag for diagonal interpolation the primary reason for this change is that previously MC up to 1/4 pel matched H.264 exactly and that increases the risk of stumbling over patents secondly this allows 0.10 db or more quality gain by choosing a longer filter and the filter could also be chosen optimally for each frame though that of course would cause speed loss at the decoder and encoder side ... diff -r 2a25f7167c09 -r ab023c9f03d0 snow.c --- a/snow.c Sat Sep 08 03:16:24 2007 +0000 +++ b/snow.c Sat Sep 08 14:51:13 2007 +0000 @@ -394,7 +394,7 @@ #define LOG2_MB_SIZE 4 #define MB_SIZE (1<>4; b= needs[l] | needs[r]; + if(p && !p->diag_mc) + b= 15; if(b&5){ for(y=0; y < b_h+HTAPS-1; y++){ for(x=0; x < b_w; x++){ - int a_2=src[x + HTAPS/2-5]; int a_1=src[x + HTAPS/2-4]; int a0= src[x + HTAPS/2-3]; int a1= src[x + HTAPS/2-2]; @@ -2206,15 +2216,17 @@ int a4= src[x + HTAPS/2+1]; int a5= src[x + HTAPS/2+2]; int a6= src[x + HTAPS/2+3]; - int a7= src[x + HTAPS/2+4]; -#if HTAPS==6 - int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); -#else - int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6); -#endif - - tmpI[x]= am; - am= (am+16)>>5; + int am=0; + if(!p || p->fast_mc){ + am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); + tmpI[x]= am; + am= (am+16)>>5; + }else{ + am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6); + tmpI[x]= am; + am= (am+32)>>6; + } + if(am&(~255)) am= ~(am>>31); tmp2[x]= am; } @@ -2230,7 +2242,6 @@ if(b&2){ for(y=0; y < b_h; y++){ for(x=0; x < b_w+1; x++){ - int a_2=src[x + (HTAPS/2-5)*stride]; int a_1=src[x + (HTAPS/2-4)*stride]; int a0= src[x + (HTAPS/2-3)*stride]; int a1= src[x + (HTAPS/2-2)*stride]; @@ -2239,14 +2250,12 @@ int a4= src[x + (HTAPS/2+1)*stride]; int a5= src[x + (HTAPS/2+2)*stride]; int a6= src[x + (HTAPS/2+3)*stride]; - int a7= src[x + (HTAPS/2+4)*stride]; -#if HTAPS==6 - int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); -#else - int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6); -#endif - - am= (am + 16)>>5; + int am=0; + if(!p || p->fast_mc) + am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5; + else + am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6; + if(am&(~255)) am= ~(am>>31); tmp2[x]= am; } @@ -2261,7 +2270,6 @@ if(b&4){ for(y=0; y < b_h; y++){ for(x=0; x < b_w; x++){ - int a_2=tmpI[x + (HTAPS/2-5)*64]; int a_1=tmpI[x + (HTAPS/2-4)*64]; int a0= tmpI[x + (HTAPS/2-3)*64]; int a1= tmpI[x + (HTAPS/2-2)*64]; @@ -2270,13 +2278,11 @@ int a4= tmpI[x + (HTAPS/2+1)*64]; int a5= tmpI[x + (HTAPS/2+2)*64]; int a6= tmpI[x + (HTAPS/2+3)*64]; - int a7= tmpI[x + (HTAPS/2+4)*64]; -#if HTAPS==6 - int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5); -#else - int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6); -#endif - am= (am + 512)>>10; + int am=0; + if(!p || p->fast_mc) + am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10; + else + am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12; if(am&(~255)) am= ~(am>>31); tmp2[x]= am; } @@ -2336,7 +2342,7 @@ static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\ uint8_t tmp[stride*(b_w+HTAPS-1)];\ assert(h==b_w);\ - mc_block(dst, src-(HTAPS/2-1)-(HTAPS/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ + mc_block(NULL, dst, src-(HTAPS/2-1)-(HTAPS/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\ } mca( 0, 0,16) @@ -2407,23 +2413,23 @@ // assert(!(b_w&(b_w-1))); assert(b_w>1 && b_h>1); assert(tab_index>=0 && tab_index<4 || b_w==32); - if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || HTAPS != 6) - mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy); + if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc ) + mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy); else if(b_w==32){ int y; for(y=0; ydsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride); - s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride); } }else if(b_w==b_h) - s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride); else if(b_w==2*b_h){ - s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 2 + 2*stride,stride); - s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst ,src + 3 + 3*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride); }else{ assert(2*b_w==b_h); - s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 2 + 2*stride ,stride); - s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride); + s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst ,src + 3 + 3*stride ,stride); + s->dsp.put_h264_qpel_pixels_tab[tab_index ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride); } } } @@ -3514,7 +3520,7 @@ } static void encode_header(SnowContext *s){ - int plane_index, level, orientation; + int plane_index, level, orientation, i; uint8_t kstate[32]; memset(kstate, MID_STATE, sizeof(kstate)); @@ -3527,6 +3533,12 @@ s->last_qbias= s->last_mv_scale= s->last_block_max_depth= 0; + for(plane_index=0; plane_index<2; plane_index++){ + Plane *p= &s->plane[plane_index]; + p->last_htaps=0; + p->last_diag_mc=0; + memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff)); + } } if(s->keyframe){ put_symbol(&s->c, s->header_state, s->version, 0); @@ -3550,6 +3562,32 @@ } } } + + if(!s->keyframe){ + int update_mc=0; + for(plane_index=0; plane_index<2; plane_index++){ + Plane *p= &s->plane[plane_index]; + update_mc |= p->last_htaps != p->htaps; + update_mc |= p->last_diag_mc != p->diag_mc; + update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); + } + if(!s->always_reset) + put_rac(&s->c, s->header_state, update_mc); + if(update_mc){ + for(plane_index=0; plane_index<2; plane_index++){ + Plane *p= &s->plane[plane_index]; + put_rac(&s->c, s->header_state, p->diag_mc); + put_symbol(&s->c, s->header_state, p->htaps/2-1, 0); + for(i= p->htaps/2; i; i--) + put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0); + + p->last_diag_mc= p->diag_mc; + p->last_htaps= p->htaps; + memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff)); + } + } + } + put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1); put_symbol(&s->c, s->header_state, s->qlog - s->last_qlog , 1); put_symbol(&s->c, s->header_state, s->mv_scale - s->last_mv_scale, 1); @@ -3608,6 +3646,28 @@ } } + if(!s->keyframe){ + if(s->always_reset || get_rac(&s->c, s->header_state)){ + for(plane_index=0; plane_index<2; plane_index++){ + int htaps, i, sum=0, absum=0; + Plane *p= &s->plane[plane_index]; + p->diag_mc= get_rac(&s->c, s->header_state); + htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2; + if((unsigned)htaps > HTAPS || htaps==0) + return -1; + p->htaps= htaps; + for(i= htaps/2; i; i--){ + p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1)); + sum += p->hcoeff[i]; + } + p->hcoeff[0]= 32-sum; + } + s->plane[2].diag_mc= s->plane[1].diag_mc; + s->plane[2].htaps = s->plane[1].htaps; + memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff)); + } + } + s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1); if(s->spatial_decomposition_type > 1){ av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type); @@ -3715,6 +3775,14 @@ } s->plane[plane_index].width = w; s->plane[plane_index].height= h; + + s->plane[plane_index].diag_mc= 1; + s->plane[plane_index].htaps= 6; + s->plane[plane_index].hcoeff[0]= 40; + s->plane[plane_index].hcoeff[1]= -10; + s->plane[plane_index].hcoeff[2]= 2; + s->plane[plane_index].fast_mc= 1; + //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h); for(level=s->spatial_decomposition_count-1; level>=0; level--){ for(orientation=level ? 1 : 0; orientation<4; orientation++){ @@ -4354,6 +4422,14 @@ s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P decode_header(s); + + for(plane_index=0; plane_index<3; plane_index++){ + Plane *p= &s->plane[plane_index]; + p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40 + && p->hcoeff[1]==-10 + && p->hcoeff[2]==2; + } + if(!s->block) alloc_blocks(s); frame_start(s);