changeset 5651:ab023c9f03d0 libavcodec

store halfpel filter coefficients in the header as well as the flag for diagonal interpolation the primary reason for this change is that previously MC up to 1/4 pel matched H.264 exactly and that increases the risk of stumbling over patents secondly this allows 0.10 db or more quality gain by choosing a longer filter and the filter could also be chosen optimally for each frame though that of course would cause speed loss at the decoder and encoder side ...
author michael
date Sat, 08 Sep 2007 14:51:13 +0000
parents 2a25f7167c09
children 941e5deeb2a4
files snow.c
diffstat 1 files changed, 116 insertions(+), 40 deletions(-) [+]
line wrap: on
line diff
--- a/snow.c	Sat Sep 08 03:16:24 2007 +0000
+++ b/snow.c	Sat Sep 08 14:51:13 2007 +0000
@@ -394,7 +394,7 @@
 #define LOG2_MB_SIZE 4
 #define MB_SIZE (1<<LOG2_MB_SIZE)
 #define ENCODER_EXTRA_BITS 4
-#define HTAPS 6
+#define HTAPS 8
 
 typedef struct x_and_coeff{
     int16_t x;
@@ -421,6 +421,15 @@
     int width;
     int height;
     SubBand band[MAX_DECOMPOSITIONS][4];
+
+    int htaps;
+    int8_t hcoeff[HTAPS/2];
+    int diag_mc;
+    int fast_mc;
+
+    int last_htaps;
+    int8_t last_hcoeff[HTAPS/2];
+    int last_diag_mc;
 }Plane;
 
 typedef struct SnowContext{
@@ -2143,7 +2152,7 @@
     }
 }
 
-static void mc_block(uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
+static void mc_block(Plane *p, uint8_t *dst, const uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
     const static uint8_t weight[64]={
     8,7,6,5,4,3,2,1,
     7,7,0,0,0,0,0,1,
@@ -2193,11 +2202,12 @@
     l= brane[dx + 16*dy]>>4;
 
     b= needs[l] | needs[r];
+    if(p && !p->diag_mc)
+        b= 15;
 
     if(b&5){
         for(y=0; y < b_h+HTAPS-1; y++){
             for(x=0; x < b_w; x++){
-                int a_2=src[x + HTAPS/2-5];
                 int a_1=src[x + HTAPS/2-4];
                 int a0= src[x + HTAPS/2-3];
                 int a1= src[x + HTAPS/2-2];
@@ -2206,15 +2216,17 @@
                 int a4= src[x + HTAPS/2+1];
                 int a5= src[x + HTAPS/2+2];
                 int a6= src[x + HTAPS/2+3];
-                int a7= src[x + HTAPS/2+4];
-#if HTAPS==6
-                int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
-#else
-                int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
-#endif
-
-                tmpI[x]= am;
-                am= (am+16)>>5;
+                int am=0;
+                if(!p || p->fast_mc){
+                    am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
+                    tmpI[x]= am;
+                    am= (am+16)>>5;
+                }else{
+                    am= p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6);
+                    tmpI[x]= am;
+                    am= (am+32)>>6;
+                }
+
                 if(am&(~255)) am= ~(am>>31);
                 tmp2[x]= am;
             }
@@ -2230,7 +2242,6 @@
     if(b&2){
         for(y=0; y < b_h; y++){
             for(x=0; x < b_w+1; x++){
-                int a_2=src[x + (HTAPS/2-5)*stride];
                 int a_1=src[x + (HTAPS/2-4)*stride];
                 int a0= src[x + (HTAPS/2-3)*stride];
                 int a1= src[x + (HTAPS/2-2)*stride];
@@ -2239,14 +2250,12 @@
                 int a4= src[x + (HTAPS/2+1)*stride];
                 int a5= src[x + (HTAPS/2+2)*stride];
                 int a6= src[x + (HTAPS/2+3)*stride];
-                int a7= src[x + (HTAPS/2+4)*stride];
-#if HTAPS==6
-                int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
-#else
-                int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
-#endif
-
-                am= (am + 16)>>5;
+                int am=0;
+                if(!p || p->fast_mc)
+                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 16)>>5;
+                else
+                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 32)>>6;
+
                 if(am&(~255)) am= ~(am>>31);
                 tmp2[x]= am;
             }
@@ -2261,7 +2270,6 @@
     if(b&4){
         for(y=0; y < b_h; y++){
             for(x=0; x < b_w; x++){
-                int a_2=tmpI[x + (HTAPS/2-5)*64];
                 int a_1=tmpI[x + (HTAPS/2-4)*64];
                 int a0= tmpI[x + (HTAPS/2-3)*64];
                 int a1= tmpI[x + (HTAPS/2-2)*64];
@@ -2270,13 +2278,11 @@
                 int a4= tmpI[x + (HTAPS/2+1)*64];
                 int a5= tmpI[x + (HTAPS/2+2)*64];
                 int a6= tmpI[x + (HTAPS/2+3)*64];
-                int a7= tmpI[x + (HTAPS/2+4)*64];
-#if HTAPS==6
-                int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
-#else
-                int am= 21*(a2+a3) - 7*(a1+a4) + 3*(a0+a5) - (a_1+a6);
-#endif
-                am= (am + 512)>>10;
+                int am=0;
+                if(!p || p->fast_mc)
+                    am= (20*(a2+a3) - 5*(a1+a4) + (a0+a5) + 512)>>10;
+                else
+                    am= (p->hcoeff[0]*(a2+a3) + p->hcoeff[1]*(a1+a4) + p->hcoeff[2]*(a0+a5) + p->hcoeff[3]*(a_1+a6) + 2048)>>12;
                 if(am&(~255)) am= ~(am>>31);
                 tmp2[x]= am;
             }
@@ -2336,7 +2342,7 @@
 static void mc_block_hpel ## dx ## dy ## b_w(uint8_t *dst, const uint8_t *src, int stride, int h){\
     uint8_t tmp[stride*(b_w+HTAPS-1)];\
     assert(h==b_w);\
-    mc_block(dst, src-(HTAPS/2-1)-(HTAPS/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
+    mc_block(NULL, dst, src-(HTAPS/2-1)-(HTAPS/2-1)*stride, tmp, stride, b_w, b_w, dx, dy);\
 }
 
 mca( 0, 0,16)
@@ -2407,23 +2413,23 @@
 //        assert(!(b_w&(b_w-1)));
         assert(b_w>1 && b_h>1);
         assert(tab_index>=0 && tab_index<4 || b_w==32);
-        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || HTAPS != 6)
-            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
+        if((dx&3) || (dy&3) || !(b_w == b_h || 2*b_w == b_h || b_w == 2*b_h) || (b_w&(b_w-1)) || !s->plane[plane_index].fast_mc )
+            mc_block(&s->plane[plane_index], dst, src, tmp, stride, b_w, b_h, dx, dy);
         else if(b_w==32){
             int y;
             for(y=0; y<b_h; y+=16){
-                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 2 + (y+2)*stride,stride);
-                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 18 + (y+2)*stride,stride);
+                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + y*stride, src + 3 + (y+3)*stride,stride);
+                s->dsp.put_h264_qpel_pixels_tab[0][dy+(dx>>2)](dst + 16 + y*stride, src + 19 + (y+3)*stride,stride);
             }
         }else if(b_w==b_h)
-            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
+            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst,src + 3 + 3*stride,stride);
         else if(b_w==2*b_h){
-            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 2       + 2*stride,stride);
-            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 2 + b_h + 2*stride,stride);
+            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst    ,src + 3       + 3*stride,stride);
+            s->dsp.put_h264_qpel_pixels_tab[tab_index+1][dy+(dx>>2)](dst+b_h,src + 3 + b_h + 3*stride,stride);
         }else{
             assert(2*b_w==b_h);
-            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 2 + 2*stride           ,stride);
-            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 2 + 2*stride+b_w*stride,stride);
+            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst           ,src + 3 + 3*stride           ,stride);
+            s->dsp.put_h264_qpel_pixels_tab[tab_index  ][dy+(dx>>2)](dst+b_w*stride,src + 3 + 3*stride+b_w*stride,stride);
         }
     }
 }
@@ -3514,7 +3520,7 @@
 }
 
 static void encode_header(SnowContext *s){
-    int plane_index, level, orientation;
+    int plane_index, level, orientation, i;
     uint8_t kstate[32];
 
     memset(kstate, MID_STATE, sizeof(kstate));
@@ -3527,6 +3533,12 @@
         s->last_qbias=
         s->last_mv_scale=
         s->last_block_max_depth= 0;
+        for(plane_index=0; plane_index<2; plane_index++){
+            Plane *p= &s->plane[plane_index];
+            p->last_htaps=0;
+            p->last_diag_mc=0;
+            memset(p->last_hcoeff, 0, sizeof(p->last_hcoeff));
+        }
     }
     if(s->keyframe){
         put_symbol(&s->c, s->header_state, s->version, 0);
@@ -3550,6 +3562,32 @@
             }
         }
     }
+
+    if(!s->keyframe){
+        int update_mc=0;
+        for(plane_index=0; plane_index<2; plane_index++){
+            Plane *p= &s->plane[plane_index];
+            update_mc |= p->last_htaps   != p->htaps;
+            update_mc |= p->last_diag_mc != p->diag_mc;
+            update_mc |= !!memcmp(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
+        }
+        if(!s->always_reset)
+            put_rac(&s->c, s->header_state, update_mc);
+        if(update_mc){
+            for(plane_index=0; plane_index<2; plane_index++){
+                Plane *p= &s->plane[plane_index];
+                put_rac(&s->c, s->header_state, p->diag_mc);
+                put_symbol(&s->c, s->header_state, p->htaps/2-1, 0);
+                for(i= p->htaps/2; i; i--)
+                    put_symbol(&s->c, s->header_state, FFABS(p->hcoeff[i]), 0);
+
+                p->last_diag_mc= p->diag_mc;
+                p->last_htaps= p->htaps;
+                memcpy(p->last_hcoeff, p->hcoeff, sizeof(p->hcoeff));
+            }
+        }
+    }
+
     put_symbol(&s->c, s->header_state, s->spatial_decomposition_type - s->last_spatial_decomposition_type, 1);
     put_symbol(&s->c, s->header_state, s->qlog            - s->last_qlog    , 1);
     put_symbol(&s->c, s->header_state, s->mv_scale        - s->last_mv_scale, 1);
@@ -3608,6 +3646,28 @@
         }
     }
 
+    if(!s->keyframe){
+        if(s->always_reset || get_rac(&s->c, s->header_state)){
+            for(plane_index=0; plane_index<2; plane_index++){
+                int htaps, i, sum=0, absum=0;
+                Plane *p= &s->plane[plane_index];
+                p->diag_mc= get_rac(&s->c, s->header_state);
+                htaps= get_symbol(&s->c, s->header_state, 0)*2 + 2;
+                if((unsigned)htaps > HTAPS || htaps==0)
+                    return -1;
+                p->htaps= htaps;
+                for(i= htaps/2; i; i--){
+                    p->hcoeff[i]= get_symbol(&s->c, s->header_state, 0) * (1-2*(i&1));
+                    sum += p->hcoeff[i];
+                }
+                p->hcoeff[0]= 32-sum;
+            }
+            s->plane[2].diag_mc= s->plane[1].diag_mc;
+            s->plane[2].htaps  = s->plane[1].htaps;
+            memcpy(s->plane[2].hcoeff, s->plane[1].hcoeff, sizeof(s->plane[1].hcoeff));
+        }
+    }
+
     s->spatial_decomposition_type+= get_symbol(&s->c, s->header_state, 1);
     if(s->spatial_decomposition_type > 1){
         av_log(s->avctx, AV_LOG_ERROR, "spatial_decomposition_type %d not supported", s->spatial_decomposition_type);
@@ -3715,6 +3775,14 @@
         }
         s->plane[plane_index].width = w;
         s->plane[plane_index].height= h;
+
+        s->plane[plane_index].diag_mc= 1;
+        s->plane[plane_index].htaps= 6;
+        s->plane[plane_index].hcoeff[0]=  40;
+        s->plane[plane_index].hcoeff[1]= -10;
+        s->plane[plane_index].hcoeff[2]=   2;
+        s->plane[plane_index].fast_mc= 1;
+
 //av_log(NULL, AV_LOG_DEBUG, "%d %d\n", w, h);
         for(level=s->spatial_decomposition_count-1; level>=0; level--){
             for(orientation=level ? 1 : 0; orientation<4; orientation++){
@@ -4354,6 +4422,14 @@
 
     s->current_picture.pict_type= FF_I_TYPE; //FIXME I vs. P
     decode_header(s);
+
+    for(plane_index=0; plane_index<3; plane_index++){
+        Plane *p= &s->plane[plane_index];
+        p->fast_mc= p->diag_mc && p->htaps==6 && p->hcoeff[0]==40
+                                              && p->hcoeff[1]==-10
+                                              && p->hcoeff[2]==2;
+    }
+
     if(!s->block) alloc_blocks(s);
 
     frame_start(s);