changeset 4008:b636f3d59283 libavcodec

prevent "mb level" get_cabac() calls from being inlined (3% faster decode_mb_cabac() on P3)
author michael
date Thu, 12 Oct 2006 14:49:19 +0000
parents 33d3d4ab9f59
children e5f7797e53c7
files cabac.h h264.c
diffstat 2 files changed, 41 insertions(+), 31 deletions(-) [+]
line wrap: on
line diff
--- a/cabac.h	Thu Oct 12 12:27:02 2006 +0000
+++ b/cabac.h	Thu Oct 12 14:49:19 2006 +0000
@@ -359,7 +359,7 @@
         refill(c);
 }
 
-static int get_cabac(CABACContext *c, uint8_t * const state){
+static int always_inline get_cabac_inline(CABACContext *c, uint8_t * const state){
     //FIXME gcc generates duplicate load/stores for c->low and c->range
 #ifdef ARCH_X86
     int bit;
@@ -563,6 +563,14 @@
     return bit;
 }
 
+static int __attribute((noinline)) get_cabac_noinline(CABACContext *c, uint8_t * const state){
+    return get_cabac_inline(c,state);
+}
+
+static int get_cabac(CABACContext *c, uint8_t * const state){
+    return get_cabac_inline(c,state);
+}
+
 static int get_cabac_bypass(CABACContext *c){
     c->low += c->low;
 
--- a/h264.c	Thu Oct 12 12:27:02 2006 +0000
+++ b/h264.c	Thu Oct 12 14:49:19 2006 +0000
@@ -5633,7 +5633,7 @@
         ctx += 1;
     }
 
-    return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] );
+    return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
 }
 
 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
@@ -5649,11 +5649,11 @@
             ctx++;
         if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
             ctx++;
-        if( get_cabac( &h->cabac, &state[ctx] ) == 0 )
+        if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
             return 0;   /* I4x4 */
         state += 2;
     }else{
-        if( get_cabac( &h->cabac, &state[0] ) == 0 )
+        if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
             return 0;   /* I4x4 */
     }
 
@@ -5661,11 +5661,11 @@
         return 25;  /* PCM */
 
     mb_type = 1; /* I16x16 */
-    mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */
-    if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */
-        mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] );
-    mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] );
-    mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] );
+    mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
+    if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
+        mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
+    mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
+    mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
     return mb_type;
 }
 
@@ -5675,14 +5675,14 @@
     if( h->slice_type == I_TYPE ) {
         return decode_cabac_intra_mb_type(h, 3, 1);
     } else if( h->slice_type == P_TYPE ) {
-        if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) {
+        if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
             /* P-type */
-            if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) {
+            if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
                 /* P_L0_D16x16, P_8x8 */
-                return 3 * get_cabac( &h->cabac, &h->cabac_state[16] );
+                return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
             } else {
                 /* P_L0_D8x16, P_L0_D16x8 */
-                return 2 - get_cabac( &h->cabac, &h->cabac_state[17] );
+                return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
             }
         } else {
             return decode_cabac_intra_mb_type(h, 17, 0) + 5;
@@ -5698,17 +5698,17 @@
         if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
             ctx++;
 
-        if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) )
+        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
             return 0; /* B_Direct_16x16 */
 
-        if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) {
-            return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
-        }
-
-        bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3;
-        bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2;
-        bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1;
-        bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] );
+        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
+            return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
+        }
+
+        bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
+        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
+        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
+        bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
         if( bits < 8 )
             return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
         else if( bits == 13 ) {
@@ -5718,7 +5718,7 @@
         else if( bits == 15 )
             return 22; /* B_8x8 */
 
-        bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] );
+        bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
         return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
     } else {
         /* TODO SI/SP frames? */
@@ -5759,7 +5759,7 @@
 
     if( h->slice_type == B_TYPE )
         ctx += 13;
-    return get_cabac( &h->cabac, &h->cabac_state[11+ctx] );
+    return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
 }
 
 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
@@ -5791,12 +5791,12 @@
     if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
         ctx++;
 
-    if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
         return 0;
 
-    if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
         return 1;
-    if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 )
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
         return 2;
     else
         return 3;
@@ -5873,13 +5873,13 @@
     ctx = 0;
     if( cbp_a > 0 ) ctx++;
     if( cbp_b > 0 ) ctx += 2;
-    if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
+    if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
         return 0;
 
     ctx = 4;
     if( cbp_a == 2 ) ctx++;
     if( cbp_b == 2 ) ctx += 2;
-    return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] );
+    return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
 }
 static int decode_cabac_mb_dqp( H264Context *h) {
     MpegEncContext * const s = &h->s;
@@ -5895,7 +5895,7 @@
     if( h->last_qscale_diff != 0 )
         ctx++;
 
-    while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
+    while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
         if( ctx < 2 )
             ctx = 2;
         else
@@ -5937,7 +5937,7 @@
 }
 
 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
-    return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
+    return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
 }
 
 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
@@ -7410,8 +7410,10 @@
         }
 
         for(;;){
+//START_TIMER
             int ret = decode_mb_cabac(h);
             int eos;
+//STOP_TIMER("decode_mb_cabac")
 
             if(ret>=0) hl_decode_mb(h);