changeset 542:d55978a3c369 libavcodec

rl vlc decoding optimizations
author michaelni
date Sat, 13 Jul 2002 14:55:12 +0000
parents 4273be971bf8
children 8f8f4885d874
files common.h h263.c mpegvideo.h msmpeg4.c
diffstat 4 files changed, 153 insertions(+), 69 deletions(-) [+]
line wrap: on
line diff
--- a/common.h	Fri Jul 12 12:41:32 2002 +0000
+++ b/common.h	Sat Jul 13 14:55:12 2002 +0000
@@ -238,6 +238,12 @@
     int table_size, table_allocated;
 } VLC;
 
+typedef struct RL_VLC_ELEM {
+    int16_t level;
+    int8_t len;
+    uint8_t run;
+} RL_VLC_ELEM;
+
 /* used to avoid missaligned exceptions on some archs (alpha, ...) */
 #ifdef ARCH_X86
 #define unaligned32(a) (*(UINT32*)(a))
@@ -755,6 +761,28 @@
     SKIP_BITS(name, gb, n)\
 }
 
+#define GET_RL_VLC(level, run, name, gb, table, bits, max_depth)\
+{\
+    int n, index, nb_bits;\
+\
+    index= SHOW_UBITS(name, gb, bits);\
+    level = table[index].level;\
+    n     = table[index].len;\
+\
+    if(max_depth > 1 && n < 0){\
+        LAST_SKIP_BITS(name, gb, bits)\
+        UPDATE_CACHE(name, gb)\
+\
+        nb_bits = -n;\
+\
+        index= SHOW_UBITS(name, gb, nb_bits) + level;\
+        level = table[index].level;\
+        n     = table[index].len;\
+    }\
+    run= table[index].run;\
+    SKIP_BITS(name, gb, n)\
+}
+
 // deprecated, dont use get_vlc for new code, use get_vlc2 instead or use GET_VLC directly
 static inline int get_vlc(GetBitContext *s, VLC *vlc)
 {
@@ -783,6 +811,7 @@
     return code;
 }
 
+
 /* define it to include statistics code (useful only for optimizing
    codec efficiency */
 //#define STATS
--- a/h263.c	Fri Jul 12 12:41:32 2002 +0000
+++ b/h263.c	Sat Jul 13 14:55:12 2002 +0000
@@ -1622,9 +1622,49 @@
 
 void init_vlc_rl(RLTable *rl)
 {
+    int i, q;
+    
     init_vlc(&rl->vlc, 9, rl->n + 1, 
              &rl->table_vlc[0][1], 4, 2,
              &rl->table_vlc[0][0], 4, 2);
+
+    
+    for(q=0; q<32; q++){
+        int qmul= q*2;
+        int qadd= (q-1)|1;
+        
+        if(q==0){
+            qmul=1;
+            qadd=0;
+        }
+        
+        rl->rl_vlc[q]= av_malloc(rl->vlc.table_size*sizeof(RL_VLC_ELEM));
+        for(i=0; i<rl->vlc.table_size; i++){
+            int code= rl->vlc.table[i][0];
+            int len = rl->vlc.table[i][1];
+            int level, run;
+        
+            if(len==0){ // illegal code
+                run= 65;
+                level= MAX_LEVEL;
+            }else if(len<0){ //more bits needed
+                run= 0;
+                level= code;
+            }else{
+                if(code==rl->n){ //esc
+                    run= 65;
+                    level= 0;
+                }else{
+                    run=   rl->table_run  [code] + 1;
+                    level= rl->table_level[code] * qmul + qadd;
+                    if(code >= rl->last) run+=192;
+                }
+            }
+            rl->rl_vlc[q][i].len= len;
+            rl->rl_vlc[q][i].level= level;
+            rl->rl_vlc[q][i].run= run;
+        }
+    }
 }
 
 /* init vlcs */
--- a/mpegvideo.h	Fri Jul 12 12:41:32 2002 +0000
+++ b/mpegvideo.h	Sat Jul 13 14:55:12 2002 +0000
@@ -482,7 +482,6 @@
 void ff_mpeg1_encode_init(MpegEncContext *s);
 
 /* h263enc.c */
-
 typedef struct RLTable {
     int n; /* number of entries of table_vlc minus 1 */
     int last; /* number of values for last = 0 */
@@ -492,7 +491,8 @@
     UINT8 *index_run[2]; /* encoding only */
     INT8 *max_level[2]; /* encoding & decoding */
     INT8 *max_run[2];   /* encoding & decoding */
-    VLC vlc;            /* decoding only */
+    VLC vlc;            /* decoding only deprected FIXME remove*/
+    RL_VLC_ELEM *rl_vlc[32]; /* decoding only */
 } RLTable;
 
 void init_rl(RLTable *rl);
--- a/msmpeg4.c	Fri Jul 12 12:41:32 2002 +0000
+++ b/msmpeg4.c	Sat Jul 13 14:55:12 2002 +0000
@@ -1629,9 +1629,10 @@
 static inline int msmpeg4_decode_block(MpegEncContext * s, DCTELEM * block,
                               int n, int coded)
 {
-    int code, level, i, j, last, run, run_diff;
+    int level, i, last, run, run_diff;
     int dc_pred_dir;
     RLTable *rl;
+    RL_VLC_ELEM *rl_vlc;
     const UINT8 *scan_table;
     int qmul, qadd;
 
@@ -1671,7 +1672,7 @@
         block[0] = level;
 
         run_diff = 0;
-	i = 1;
+        i = 0;
         if (!coded) {
             goto not_coded;
         }
@@ -1684,10 +1685,11 @@
             scan_table = s->intra_scantable;
         }
         set_stat(ST_INTRA_AC);
+        rl_vlc= rl->rl_vlc[0];
     } else {
         qmul = s->qscale << 1;
         qadd = (s->qscale - 1) | 1;
-	i = 0;
+        i = -1;
         rl = &rl_table[3 + s->rl_table_index];
 
         if(s->msmpeg4_version==2)
@@ -1696,53 +1698,66 @@
             run_diff = 1;
 
         if (!coded) {
-            s->block_last_index[n] = i - 1;
+            s->block_last_index[n] = i;
             return 0;
         }
         scan_table = s->inter_scantable;
         set_stat(ST_INTER_AC);
+        rl_vlc= rl->rl_vlc[s->qscale];
     }
-
+  {
+    OPEN_READER(re, &s->gb);
     for(;;) {
-        code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2);
-        if (code < 0){
-            fprintf(stderr, "illegal AC-VLC code at %d %d\n", s->mb_x, s->mb_y);
-            return -1;
-        }
-        if (code == rl->n) {
+        UPDATE_CACHE(re, &s->gb);
+        GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+        if (level==0) {
+            int cache;
+            cache= GET_CACHE(re, &s->gb);
             /* escape */
-            if (s->msmpeg4_version==1 || get_bits1(&s->gb) == 0) {
-                if (s->msmpeg4_version==1 || get_bits1(&s->gb) == 0) {
+            if (s->msmpeg4_version==1 || (cache&0x80000000)==0) {
+                if (s->msmpeg4_version==1 || (cache&0x40000000)==0) {
                     /* third escape */
+                    if(s->msmpeg4_version!=1) LAST_SKIP_BITS(re, &s->gb, 2);
+                    UPDATE_CACHE(re, &s->gb);
                     if(s->msmpeg4_version<=3){
-                        last= get_bits1(&s->gb);
-                        run= get_bits(&s->gb, 6);  
-                        level= get_bits(&s->gb, 8);
-                        level= ((int8_t)level);
-                    }else{
+                        last=  SHOW_UBITS(re, &s->gb, 1); SKIP_CACHE(re, &s->gb, 1);
+                        run=   SHOW_UBITS(re, &s->gb, 6); SKIP_CACHE(re, &s->gb, 6);
+                        level= SHOW_SBITS(re, &s->gb, 8); LAST_SKIP_CACHE(re, &s->gb, 8);
+                        SKIP_COUNTER(re, &s->gb, 1+6+8);
+                    }else{                        
                         int sign;
-                        last= get_bits1(&s->gb);
+                        last=  SHOW_UBITS(re, &s->gb, 1); SKIP_BITS(re, &s->gb, 1);
                         if(!s->esc3_level_length){
                             int ll;
                             //printf("ESC-3 %X at %d %d\n", show_bits(&s->gb, 24), s->mb_x, s->mb_y);
                             if(s->qscale<8){
-                                ll= get_bits(&s->gb, 3);
+                                ll= SHOW_UBITS(re, &s->gb, 3); SKIP_BITS(re, &s->gb, 3);
                                 if(ll==0){
-                                    if(get_bits1(&s->gb)) printf("cool a new vlc code ,contact the ffmpeg developers and upload the file\n");
+                                    if(SHOW_UBITS(re, &s->gb, 1)) printf("cool a new vlc code ,contact the ffmpeg developers and upload the file\n");
+                                    SKIP_BITS(re, &s->gb, 1);
                                     ll=8;
                                 }
                             }else{
                                 ll=2;
-                                while(ll<8 && get_bits1(&s->gb)==0) ll++;
+                                while(ll<8 && SHOW_UBITS(re, &s->gb, 1)==0){
+                                    ll++;
+                                    SKIP_BITS(re, &s->gb, 1);
+                                }
+                                SKIP_BITS(re, &s->gb, 1);
                             }
 
                             s->esc3_level_length= ll;
-                            s->esc3_run_length= get_bits(&s->gb, 2) + 3;
+                            s->esc3_run_length= SHOW_UBITS(re, &s->gb, 2) + 3; SKIP_BITS(re, &s->gb, 2);
 //printf("level length:%d, run length: %d\n", ll, s->esc3_run_length);
                         }
-                        run= get_bits(&s->gb, s->esc3_run_length);  
-                        sign= get_bits1(&s->gb);
-                        level= get_bits(&s->gb, s->esc3_level_length);
+                        run=   SHOW_UBITS(re, &s->gb, s->esc3_run_length); 
+                        SKIP_BITS(re, &s->gb, s->esc3_run_length);
+                        
+                        sign=  SHOW_UBITS(re, &s->gb, 1); 
+                        SKIP_BITS(re, &s->gb, 1);
+                        
+                        level= SHOW_UBITS(re, &s->gb, s->esc3_level_length); 
+                        SKIP_BITS(re, &s->gb, s->esc3_level_length);
                         if(sign) level= -level;
                     }
 //printf("level: %d, run: %d at %d %d\n", level, run, s->mb_x, s->mb_y);
@@ -1775,64 +1790,64 @@
                         return DECODING_AC_LOST;
                     }
 #endif
+                    i+= run + 1;
+                    if(last) i+=192;
                 } else {
                     /* second escape */
-                    code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2);
-                    if (code < 0 || code >= rl->n){
-                        fprintf(stderr, "illegal ESC2-VLC code %d at %d %d\n", code, s->mb_x, s->mb_y);
-                        return -1;
-                    }
-                    run = rl->table_run[code];
-                    level = rl->table_level[code];
-                    last = code >= rl->last;
-                    run += rl->max_run[last][level] + run_diff;
-                    level= level * qmul + qadd;
-                    if (get_bits1(&s->gb))
-                        level = -level;
+#if MIN_CACHE_BITS < 23
+                    LAST_SKIP_BITS(re, &s->gb, 2);
+                    UPDATE_CACHE(re, &s->gb);
+#else
+                    SKIP_BITS(re, &s->gb, 2);
+#endif
+                    GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+                    i+= run + rl->max_run[run>>7][level/qmul] + run_diff; //FIXME opt indexing
+                    level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                    LAST_SKIP_BITS(re, &s->gb, 1);
                 }
             } else {
                 /* first escape */
-                code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2);
-                if (code < 0 || code >= rl->n){
-                    fprintf(stderr, "illegal ESC2-VLC code %d at %d %d\n", code, s->mb_x, s->mb_y);
-                    return -1;
-                }
-                run = rl->table_run[code];
-                level = rl->table_level[code];
-                last = code >= rl->last;
-                level += rl->max_level[last][run];
-                level= level * qmul + qadd;
-                if (get_bits1(&s->gb))
-                    level = -level;
+#if MIN_CACHE_BITS < 22
+                LAST_SKIP_BITS(re, &s->gb, 1);
+                UPDATE_CACHE(re, &s->gb);
+#else
+                SKIP_BITS(re, &s->gb, 1);
+#endif
+                GET_RL_VLC(level, run, re, &s->gb, rl_vlc, TEX_VLC_BITS, 2);
+                i+= run;
+                level = level + rl->max_level[run>>7][(run-1)&63] * qmul;//FIXME opt indexing
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                LAST_SKIP_BITS(re, &s->gb, 1);
             }
         } else {
-            run = rl->table_run[code];
-            level = rl->table_level[code] * qmul + qadd;
-            last = code >= rl->last;
-            if (get_bits1(&s->gb))
-                level = -level;
+            i+= run;
+            level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+            LAST_SKIP_BITS(re, &s->gb, 1);
         }
-        i += run;
-        if (i >= 64){
-            fprintf(stderr, "run too long at %d %d\n", s->mb_x, s->mb_y);
-            return -1;
+        if (i > 62){
+            i-= 192;
+            if(i&(~63)){
+                fprintf(stderr, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+
+            block[scan_table[i]] = level;
+            break;
         }
 
-	j = scan_table[i];
-        block[j] = level;
-        i++;
-        if (last)
-            break;
+        block[scan_table[i]] = level;
     }
+    CLOSE_READER(re, &s->gb);
+  }
  not_coded:
     if (s->mb_intra) {
         mpeg4_pred_ac(s, block, n, dc_pred_dir);
         if (s->ac_pred) {
-            i = 64; /* XXX: not optimal */
+            i = 63; /* XXX: not optimal */
         }
     }
-    if(s->msmpeg4_version==4 && i>1) i=64; //FIXME/XXX optimize
-    s->block_last_index[n] = i - 1;
+    if(s->msmpeg4_version==4 && i>0) i=63; //FIXME/XXX optimize
+    s->block_last_index[n] = i;
     
     return 0;
 }