diff h264_cabac.c @ 11277:c12d6c6c027e libavcodec

Change mvd_cache & mvd_table to 8bit, this is overall a bit faster for high resolution videos. about 20cycles faster per MB for cathederal.
author michael
date Wed, 24 Feb 2010 20:43:06 +0000
parents 63d329cd8d80
children 1a349d669184
line wrap: on
line diff
--- a/h264_cabac.c	Wed Feb 24 20:37:58 2010 +0000
+++ b/h264_cabac.c	Wed Feb 24 20:43:06 2010 +0000
@@ -938,8 +938,9 @@
         while( k-- ) {
             mvd += get_cabac_bypass( &h->cabac )<<k;
         }
-    }
-    *mvda=mvd;
+        *mvda=mvd < 70 ? mvd : 70;
+    }else
+        *mvda=mvd;
     return get_cabac_bypass_sign( &h->cabac, -mvd );
 }
 
@@ -1429,7 +1430,7 @@
             for(i=0; i<4; i++){
                 h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ];
                 if(IS_DIRECT(h->sub_mb_type[i])){
-                    fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 4);
+                    fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 2);
                     continue;
                 }
 
@@ -1441,9 +1442,8 @@
                         int mx, my;
                         const int index= 4*i + block_width*j;
                         int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
-                        int16_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
+                        uint8_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
                         pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
-
                         DECODE_CABAC_MB_MVD( h, list, index)
                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
 
@@ -1478,14 +1478,14 @@
                     }
                 }else{
                     fill_rectangle(h->mv_cache [list][ scan8[4*i] ], 2, 2, 8, 0, 4);
-                    fill_rectangle(h->mvd_cache[list][ scan8[4*i] ], 2, 2, 8, 0, 4);
+                    fill_rectangle(h->mvd_cache[list][ scan8[4*i] ], 2, 2, 8, 0, 2);
                 }
             }
         }
     } else if( IS_DIRECT(mb_type) ) {
         ff_h264_pred_direct_motion(h, &mb_type);
-        fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 4);
-        fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 4);
+        fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 2);
+        fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 2);
         dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
     } else {
         int list, i;
@@ -1512,7 +1512,7 @@
                     DECODE_CABAC_MB_MVD( h, list, 0)
                     tprintf(s->avctx, "final mv:%d %d\n", mx, my);
 
-                    fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mpx,mpy), 4);
+                    fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack8to16(mpx,mpy), 2);
                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
                 }else
                     fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, 0, 4);
@@ -1544,10 +1544,10 @@
                         DECODE_CABAC_MB_MVD( h, list, 8*i)
                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
 
-                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mpx,mpy), 4);
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack8to16(mpx,mpy), 2);
                         fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
                     }else{
-                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 2);
                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
                     }
                 }
@@ -1579,10 +1579,10 @@
                         DECODE_CABAC_MB_MVD( h, list, 4*i)
 
                         tprintf(s->avctx, "final mv:%d %d\n", mx, my);
-                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mpx,mpy), 4);
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack8to16(mpx,mpy), 2);
                         fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
                     }else{
-                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
+                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 2);
                         fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
                     }
                 }