changeset 2323:1c39d9786efd libavcodec

optimization
author michael
date Tue, 26 Oct 2004 03:12:21 +0000
parents 4140f2e78ebc
children ba91c976c7aa
files cabac.c cabac.h
diffstat 2 files changed, 108 insertions(+), 43 deletions(-) [+]
line wrap: on
line diff
--- a/cabac.c	Mon Oct 25 15:37:13 2004 +0000
+++ b/cabac.c	Tue Oct 26 03:12:21 2004 +0000
@@ -69,6 +69,25 @@
  36,36,37,37,37,38,38,63,
 };
 
+const uint8_t ff_h264_norm_shift[256]= {
+ 8,7,6,6,5,5,5,5,4,4,4,4,4,4,4,4,
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
+ 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 
+};
+
 /**
  *
  * @param buf_size size of buf in bits
@@ -95,10 +114,14 @@
     c->bytestream= buf;
     c->bytestream_end= buf + buf_size;
 
-    c->low= *c->bytestream++;
-    c->low= (c->low<<9) + ((*c->bytestream++)<<1);
-    c->range= 0x1FE00;
-    c->bits_left= 7;
+#if CABAC_BITS == 16
+    c->low =  (*c->bytestream++)<<18;
+    c->low+=  (*c->bytestream++)<<10;
+#else
+    c->low =  (*c->bytestream++)<<10;
+#endif
+    c->low+= ((*c->bytestream++)<<2) + 2;
+    c->range= 0x1FE<<(CABAC_BITS + 1);
 }
 
 void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4], 
@@ -107,8 +130,8 @@
     
     for(i=0; i<state_count; i++){
         for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
-            c->lps_range[2*i+0][j]=
-            c->lps_range[2*i+1][j]= lps_range[i][j];
+            c->lps_range[2*i+0][j+4]=
+            c->lps_range[2*i+1][j+4]= lps_range[i][j];
         }
 
         c->mps_state[2*i+0]= 2*mps_state[i];
--- a/cabac.h	Mon Oct 25 15:37:13 2004 +0000
+++ b/cabac.h	Tue Oct 26 03:12:21 2004 +0000
@@ -27,6 +27,9 @@
 #undef NDEBUG
 #include <assert.h>
 
+#define CABAC_BITS 8
+#define CABAC_MASK ((1<<CABAC_BITS)-1)
+
 typedef struct CABACContext{
     int low;
     int range;
@@ -34,19 +37,20 @@
 #ifdef STRICT_LIMITS
     int symCount;
 #endif
-    uint8_t lps_range[2*64][4];   ///< rangeTabLPS
+    uint8_t lps_range[2*65][4];   ///< rangeTabLPS
     uint8_t lps_state[2*64];      ///< transIdxLPS
     uint8_t mps_state[2*64];      ///< transIdxMPS
     const uint8_t *bytestream_start;
     const uint8_t *bytestream;
     const uint8_t *bytestream_end;
-    int bits_left;                ///<
     PutBitContext pb;
 }CABACContext;
 
 extern const uint8_t ff_h264_lps_range[64][4];
 extern const uint8_t ff_h264_mps_state[64];
 extern const uint8_t ff_h264_lps_state[64];
+extern const uint8_t ff_h264_norm_shift[256];
+
 
 void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size);
 void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size);
@@ -80,7 +84,7 @@
 }
 
 static inline void put_cabac(CABACContext *c, uint8_t * const state, int bit){
-    int RangeLPS= c->lps_range[*state][((c->range)>>6)&3];
+    int RangeLPS= c->lps_range[*state][c->range>>6];
     
     if(bit == ((*state)&1)){
         c->range -= RangeLPS;
@@ -249,63 +253,101 @@
     }
 }
 
+static void refill(CABACContext *c){
+    if(c->bytestream < c->bytestream_end)
+#if CABAC_BITS == 16
+        c->low+= ((c->bytestream[0]<<9) + (c->bytestream[1])<<1);
+#else
+        c->low+= c->bytestream[0]<<1;
+#endif
+    c->low -= CABAC_MASK;
+    c->bytestream+= CABAC_BITS/8;
+}
+
+static void refill2(CABACContext *c){
+    int i, x;
+
+    x= c->low ^ (c->low-1);
+    i= 8 - ff_h264_norm_shift[x>>(CABAC_BITS+1)];
+
+    x= -CABAC_MASK;
+    
+    if(c->bytestream < c->bytestream_end)
+#if CABAC_BITS == 16
+        x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1);
+#else
+        x+= c->bytestream[0]<<1;
+#endif
+    
+    c->low += x<<i;
+    c->bytestream+= CABAC_BITS/8;
+}
+
+
 static inline void renorm_cabac_decoder(CABACContext *c){
-    while(c->range < 0x10000){
+    while(c->range < (0x200 << CABAC_BITS)){
         c->range+= c->range;
         c->low+= c->low;
-        if(--c->bits_left == 0){
-            if(c->bytestream < c->bytestream_end)
-                c->low+= *c->bytestream;
-            c->bytestream++;
-            c->bits_left= 8;
-        }
+        if(!(c->low & CABAC_MASK))
+            refill(c);
     }
 }
 
+static inline void renorm_cabac_decoder_once(CABACContext *c){
+    int mask= (c->range - (0x200 << CABAC_BITS))>>31;
+    c->range+= c->range&mask;
+    c->low  += c->low  &mask;
+    if(!(c->low & CABAC_MASK))
+        refill(c);
+}
+
 static inline int get_cabac(CABACContext *c, uint8_t * const state){
-    int RangeLPS= c->lps_range[*state][((c->range)>>14)&3]<<8;
-    int bit;
+    int RangeLPS= c->lps_range[*state][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1);
+    int bit, lps_mask;
     
     c->range -= RangeLPS;
+#if 1
     if(c->low < c->range){
         bit= (*state)&1;
         *state= c->mps_state[*state];
+        renorm_cabac_decoder_once(c);
     }else{
+//        int shift= ff_h264_norm_shift[RangeLPS>>17];
         bit= ((*state)&1)^1;
         c->low -= c->range;
+        *state= c->lps_state[*state];
         c->range = RangeLPS;
-        *state= c->lps_state[*state];
+        renorm_cabac_decoder(c);
+/*        c->range = RangeLPS<<shift;
+        c->low <<= shift;
+        if(!(c->low & 0xFFFF)){
+            refill2(c);
+        }*/
     }
-    renorm_cabac_decoder(c);
-    
-    return bit;    
-}
-
-static inline int get_cabac_static(CABACContext *c, int RangeLPS){
-    int bit;
+#else
+    lps_mask= (c->range - c->low)>>31;
     
-    c->range -= RangeLPS;
-    if(c->low < c->range){
-        bit= 0;
-    }else{
-        bit= 1;
-        c->low -= c->range;
-        c->range = RangeLPS;
-    }
-    renorm_cabac_decoder(c);
+    c->low -= c->range & lps_mask;
+    c->range += (RangeLPS - c->range) & lps_mask;
+    
+    bit= ((*state)^lps_mask)&1;
+    *state= c->mps_state[(*state) - (128&lps_mask)];
     
+    lps_mask= ff_h264_norm_shift[c->range>>(CABAC_BITS+2)];
+    c->range<<= lps_mask;
+    c->low  <<= lps_mask;
+    if(!(c->low & CABAC_MASK))
+        refill2(c);
+#endif
+
     return bit;    
 }
 
 static inline int get_cabac_bypass(CABACContext *c){
     c->low += c->low;
 
-    if(--c->bits_left == 0){
-        if(c->bytestream < c->bytestream_end)
-            c->low+= *c->bytestream;
-        c->bytestream++;
-        c->bits_left= 8;
-    }
+    if(!(c->low & CABAC_MASK))
+        refill(c);
     
     if(c->low < c->range){
         return 0;
@@ -320,9 +362,9 @@
  * @return the number of bytes read or 0 if no end
  */
 static inline int get_cabac_terminate(CABACContext *c){
-    c->range -= 2<<8;
+    c->range -= 4<<CABAC_BITS;
     if(c->low < c->range){
-        renorm_cabac_decoder(c);    
+        renorm_cabac_decoder_once(c);
         return 0;
     }else{
         return c->bytestream - c->bytestream_start;