changeset 520:19a5e2a81e1a libavcodec

new bitstream reader API (old get_bits() based one is emulated and will still be supported in the future cuz its simpler) minor optimizations to get_vlc
author michaelni
date Tue, 09 Jul 2002 10:35:10 +0000
parents 55b4e2248a45
children 9c66b5183ab3
files common.c common.h mpeg12.c mpegaudiodec.c
diffstat 4 files changed, 344 insertions(+), 525 deletions(-) [+]
line wrap: on
line diff
--- a/common.c	Sun Jul 07 08:34:46 2002 +0000
+++ b/common.c	Tue Jul 09 10:35:10 2002 +0000
@@ -118,101 +118,39 @@
 
 /* bit input functions */
 
-void init_get_bits(GetBitContext *s, 
+void init_get_bits(GetBitContext *s,
                    UINT8 *buffer, int buffer_size)
 {
+    s->buffer= buffer;
+    s->size= buffer_size;
+    s->buffer_end= buffer + buffer_size;
 #ifdef ALT_BITSTREAM_READER
     s->index=0;
-    s->buffer= buffer;
-#else
-    s->buf = buffer;
-    s->buf_ptr = buffer;
-    s->buf_end = buffer + buffer_size;
-    s->bit_cnt = 0;
-    s->bit_buf = 0;
-    while (s->buf_ptr < s->buf_end && 
-           s->bit_cnt < 32) {
-        s->bit_buf |= (*s->buf_ptr++ << (24 - s->bit_cnt));
-        s->bit_cnt += 8;
-    }
+#elif defined LIBMPEG2_BITSTREAM_READER
+    s->buffer_ptr = buffer;
+    s->bit_count = 16;
+    s->cache = 0;
+#elif defined A32_BITSTREAM_READER
+    s->buffer_ptr = (uint32_t*)buffer;
+    s->bit_count = 32;
+    s->cache0 = 0;
+    s->cache1 = 0;
 #endif
-    s->size= buffer_size;
-}
-
-#ifndef ALT_BITSTREAM_READER
-/* n must be >= 1 and <= 32 */
-/* also true: n > s->bit_cnt */
-unsigned int get_bits_long(GetBitContext *s, int n)
-{
-    unsigned int val;
-    int bit_cnt;
-    unsigned int bit_buf;
-
-#ifdef STATS
-    st_bit_counts[st_current_index] += n;
-#endif
-
-    bit_buf = s->bit_buf;
-    bit_cnt = s->bit_cnt - n;
-    
-//    if (bit_cnt >= 0) {
-//        val = bit_buf >> (32 - n);
-//        bit_buf <<= n; 
-//    } else 
     {
-	UINT8 *buf_ptr;
-        val = bit_buf >> (32 - n);
-        buf_ptr = s->buf_ptr;
-        buf_ptr += 4;
-        /* handle common case: we can read everything */
-        if (buf_ptr <= s->buf_end) {
-#ifdef ARCH_X86
-	    bit_buf = bswap_32(*((unsigned long*)(&buf_ptr[-4])));
-#else
-	    bit_buf = (buf_ptr[-4] << 24) |
-		(buf_ptr[-3] << 16) |
-                (buf_ptr[-2] << 8) |
-                (buf_ptr[-1]);	    
+        OPEN_READER(re, s)
+        UPDATE_CACHE(re, s)
+//        UPDATE_CACHE(re, s)
+        CLOSE_READER(re, s)
+    }
+#ifdef A32_BITSTREAM_READER
+    s->cache1 = 0;
 #endif
-            val |= bit_buf >> (32 + bit_cnt);
-            bit_buf <<= - bit_cnt;
-            bit_cnt += 32;
-        } else {
-            buf_ptr -= 4;
-            bit_buf = 0;
-            if (buf_ptr < s->buf_end)
-                bit_buf |= *buf_ptr++ << 24;
-            if (buf_ptr < s->buf_end)
-                bit_buf |= *buf_ptr++ << 16;
-            if (buf_ptr < s->buf_end)
-                bit_buf |= *buf_ptr++ << 8;
-            if (buf_ptr < s->buf_end)
-                bit_buf |= *buf_ptr++;
-
-            val |= bit_buf >> (32 + bit_cnt);
-            bit_buf <<= - bit_cnt;
-            bit_cnt += 8*(buf_ptr - s->buf_ptr);
-            if(bit_cnt<0) bit_cnt=0;
-        }
-        s->buf_ptr = buf_ptr;
-    }
-    s->bit_buf = bit_buf;
-    s->bit_cnt = bit_cnt;
-    return val;
 }
-#endif
 
 void align_get_bits(GetBitContext *s)
 {
-#ifdef ALT_BITSTREAM_READER
-    s->index= (s->index + 7) & (~7); 
-#else
-    int n;
-    n = s->bit_cnt & 7;
-    if (n > 0) {
-        get_bits(s, n);
-    }
-#endif
+    int n= (-get_bits_count(s)) & 7;
+    if(n) skip_bits(s, n);
 }
 
 int check_marker(GetBitContext *s, char *msg)
@@ -223,55 +161,6 @@
     return bit;
 }
 
-#ifndef ALT_BITSTREAM_READER
-/* This function is identical to get_bits_long(), the */
-/* only diference is that it doesn't touch the buffer */
-/* it is usefull to see the buffer.                   */
-
-unsigned int show_bits_long(GetBitContext *s, int n)
-{
-    unsigned int val;
-    int bit_cnt;
-    unsigned int bit_buf;
-	UINT8 *buf_ptr;
-	
-    bit_buf = s->bit_buf;
-    bit_cnt = s->bit_cnt - n;
-
-    val = bit_buf >> (32 - n);
-    buf_ptr = s->buf_ptr;
-    buf_ptr += 4;
-
-    /* handle common case: we can read everything */
-    if (buf_ptr <= s->buf_end) {
-#ifdef ARCH_X86
-        bit_buf = bswap_32(*((unsigned long*)(&buf_ptr[-4])));
-#else
-        bit_buf = (buf_ptr[-4] << 24) |
-            (buf_ptr[-3] << 16) |
-            (buf_ptr[-2] << 8) |
-            (buf_ptr[-1]);	    
-#endif
-    } else {
-        buf_ptr -= 4;
-        bit_buf = 0;
-        if (buf_ptr < s->buf_end)
-            bit_buf |= *buf_ptr++ << 24;
-        if (buf_ptr < s->buf_end)
-            bit_buf |= *buf_ptr++ << 16;
-        if (buf_ptr < s->buf_end)
-            bit_buf |= *buf_ptr++ << 8;
-        if (buf_ptr < s->buf_end)
-            bit_buf |= *buf_ptr++;
-    }
-    val |= bit_buf >> (32 + bit_cnt);
-    bit_buf <<= - bit_cnt;
-    bit_cnt += 32;
-    
-    return val;
-}
-#endif
-
 /* VLC decoding */
 
 //#define DEBUG_VLC
@@ -300,18 +189,15 @@
     vlc->table_size += size;
     if (vlc->table_size > vlc->table_allocated) {
         vlc->table_allocated += (1 << vlc->bits);
-        vlc->table_bits = realloc(vlc->table_bits, 
-                                  sizeof(INT8) * vlc->table_allocated);
-        vlc->table_codes = realloc(vlc->table_codes,
-                                   sizeof(INT16) * vlc->table_allocated);
-        if (!vlc->table_bits ||
-            !vlc->table_codes)
+        vlc->table = realloc(vlc->table,
+                             sizeof(VLC_TYPE) * 2 * vlc->table_allocated);
+        if (!vlc->table)
             return -1;
     }
     return index;
 }
 
-static int build_table(VLC *vlc, int table_nb_bits, 
+static int build_table(VLC *vlc, int table_nb_bits,
                        int nb_codes,
                        const void *bits, int bits_wrap, int bits_size,
                        const void *codes, int codes_wrap, int codes_size,
@@ -319,23 +205,21 @@
 {
     int i, j, k, n, table_size, table_index, nb, n1, index;
     UINT32 code;
-    INT8 *table_bits;
-    INT16 *table_codes;
+    VLC_TYPE (*table)[2];
 
     table_size = 1 << table_nb_bits;
     table_index = alloc_table(vlc, table_size);
 #ifdef DEBUG_VLC
-    printf("new table index=%d size=%d code_prefix=%x n=%d\n", 
+    printf("new table index=%d size=%d code_prefix=%x n=%d\n",
            table_index, table_size, code_prefix, n_prefix);
 #endif
     if (table_index < 0)
         return -1;
-    table_bits = &vlc->table_bits[table_index];
-    table_codes = &vlc->table_codes[table_index];
+    table = &vlc->table[table_index];
 
     for(i=0;i<table_size;i++) {
-        table_bits[i] = 0;
-        table_codes[i] = -1;
+        table[i][1] = 0; //bits
+        table[i][0] = -1; //codes
     }
 
     /* first pass: map codes and compute auxillary table sizes */
@@ -360,12 +244,12 @@
                     printf("%4x: code=%d n=%d\n",
                            j, i, n);
 #endif
-                    if (table_bits[j] != 0) {
+                    if (table[j][1] /*bits*/ != 0) {
                         fprintf(stderr, "incorrect codes\n");
                         exit(1);
                     }
-                    table_bits[j] = n;
-                    table_codes[j] = i;
+                    table[j][1] = n; //bits
+                    table[j][0] = i; //code
                     j++;
                 }
             } else {
@@ -376,22 +260,22 @@
                        j, n);
 #endif
                 /* compute table size */
-                n1 = -table_bits[j];
+                n1 = -table[j][1]; //bits
                 if (n > n1)
                     n1 = n;
-                table_bits[j] = -n1;
+                table[j][1] = -n1; //bits
             }
         }
     }
 
     /* second pass : fill auxillary tables recursively */
     for(i=0;i<table_size;i++) {
-        n = table_bits[i];
+        n = table[i][1]; //bits
         if (n < 0) {
             n = -n;
             if (n > table_nb_bits) {
                 n = table_nb_bits;
-                table_bits[i] = -n;
+                table[i][1] = -n; //bits
             }
             index = build_table(vlc, n, nb_codes,
                                 bits, bits_wrap, bits_size,
@@ -401,9 +285,8 @@
             if (index < 0)
                 return -1;
             /* note: realloc has been done, so reload tables */
-            table_bits = &vlc->table_bits[table_index];
-            table_codes = &vlc->table_codes[table_index];
-            table_codes[i] = index;
+            table = &vlc->table[table_index];
+            table[i][0] = index; //code
         }
     }
     return table_index;
@@ -436,8 +319,7 @@
              const void *codes, int codes_wrap, int codes_size)
 {
     vlc->bits = nb_bits;
-    vlc->table_bits = NULL;
-    vlc->table_codes = NULL;
+    vlc->table = NULL;
     vlc->table_allocated = 0;
     vlc->table_size = 0;
 #ifdef DEBUG_VLC
@@ -448,8 +330,7 @@
                     bits, bits_wrap, bits_size,
                     codes, codes_wrap, codes_size,
                     0, 0) < 0) {
-        av_free(vlc->table_bits);
-        av_free(vlc->table_codes);
+        av_free(vlc->table);
         return -1;
     }
     return 0;
@@ -458,8 +339,7 @@
 
 void free_vlc(VLC *vlc)
 {
-    av_free(vlc->table_bits);
-    av_free(vlc->table_codes);
+    av_free(vlc->table);
 }
 
 int ff_gcd(int a, int b){
--- a/common.h	Sun Jul 07 08:34:46 2002 +0000
+++ b/common.h	Tue Jul 09 10:35:10 2002 +0000
@@ -11,13 +11,17 @@
 //#define ALT_BITSTREAM_WRITER
 //#define ALIGNED_BITSTREAM_WRITER
 
+#define ALT_BITSTREAM_READER
+//#define LIBMPEG2_BITSTREAM_READER
+//#define A32_BITSTREAM_READER
+
 #ifdef ARCH_ALPHA
 #define ALT_BITSTREAM_READER
 #endif
 
 //#define ALIGNED_BITSTREAM
 #define FAST_GET_FIRST_VLC
-//#define DUMP_STREAM // only works with the ALT_BITSTREAM_READER
+//#define DUMP_STREAM
 
 #ifdef HAVE_AV_CONFIG_H
 /* only include the following when compiling package */
@@ -161,6 +165,13 @@
 #define MAX(a,b) ((a) > (b) ? (a) : (b))
 #define MIN(a,b) ((a) > (b) ? (b) : (a))
 
+#ifdef ARCH_X86
+// inverse for shift optimization (gcc should do that ...)
+#define INV32(a) (-a)
+#else
+#define INV32(a) (32-a)
+#endif
+
 /* bit output */
 
 struct PutBitContext;
@@ -195,23 +206,29 @@
 /* bit input */
 
 typedef struct GetBitContext {
+    UINT8 *buffer, *buffer_end;
 #ifdef ALT_BITSTREAM_READER
     int index;
-    UINT8 *buffer;
-#else
-    UINT32 bit_buf;
-    int bit_cnt;
-    UINT8 *buf, *buf_ptr, *buf_end;
+#elif defined LIBMPEG2_BITSTREAM_READER
+    UINT8 *buffer_ptr;
+    UINT32 cache;
+    int bit_count;
+#elif defined A32_BITSTREAM_READER
+    UINT32 *buffer_ptr;
+    UINT32 cache0;
+    UINT32 cache1;
+    int bit_count;
 #endif
     int size;
 } GetBitContext;
 
 static inline int get_bits_count(GetBitContext *s);
 
+#define VLC_TYPE INT16
+
 typedef struct VLC {
     int bits;
-    INT16 *table_codes;
-    INT8 *table_bits;
+    VLC_TYPE (*table)[2]; // code, bits
     int table_size, table_allocated;
 } VLC;
 
@@ -438,64 +455,234 @@
 #endif
 }
 
-void init_get_bits(GetBitContext *s, 
-                   UINT8 *buffer, int buffer_size);
+/* Bitstream reader API docs:
+name
+    abritary name which is used as prefix for the internal variables
+
+gb
+    getbitcontext
+
+OPEN_READER(name, gb)
+    loads gb into local variables
+
+CLOSE_READER(name, gb)
+    stores local vars in gb
+
+UPDATE_CACHE(name, gb)
+    refills the internal cache from the bitstream
+    after this call at least MIN_CACHE_BITS will be available,
+
+GET_CACHE(name, gb)
+    will output the contents of the internal cache, next bit is MSB of 32 or 64 bit (FIXME 64bit)
+
+SHOW_UBITS(name, gb, num)
+    will return the nest num bits
+
+SHOW_SBITS(name, gb, num)
+    will return the nest num bits and do sign extension
+
+SKIP_BITS(name, gb, num)
+    will skip over the next num bits
+    note, this is equinvalent to SKIP_CACHE; SKIP_COUNTER
+
+SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache (note SKIP_COUNTER MUST be called before UPDATE_CACHE / CLOSE_READER)
+
+SKIP_COUNTER(name, gb, num)
+    will increment the internal bit counter (see SKIP_CACHE & SKIP_BITS)
+
+LAST_SKIP_CACHE(name, gb, num)
+    will remove the next num bits from the cache if it is needed for UPDATE_CACHE otherwise it will do nothing
+
+LAST_SKIP_BITS(name, gb, num)
+    is equinvalent to SKIP_LAST_CACHE; SKIP_COUNTER
+
+for examples see get_bits, show_bits, skip_bits, get_vlc
+*/
+
+#ifdef ALT_BITSTREAM_READER
+#   define MIN_CACHE_BITS 25
+
+#   define OPEN_READER(name, gb)\
+        int name##_index= (gb)->index;\
+        int name##_cache= 0;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->index= name##_index;\
+
+#   define UPDATE_CACHE(name, gb)\
+        name##_cache= be2me_32( unaligned32( ((uint8_t *)(gb)->buffer)+(name##_index>>3) ) ) << (name##_index&0x07);\
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);\
+
+// FIXME name?
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_index += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_COUNTER(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) ;
+
+#   define SHOW_UBITS(name, gb, num)\
+        (((uint32_t)name##_cache)>>INV32(num))
+
+#   define SHOW_SBITS(name, gb, num)\
+        (((int32_t)name##_cache)>>INV32(num))
+
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+
+static inline int get_bits_count(GetBitContext *s){
+    return s->index;
+}
+#elif defined LIBMPEG2_BITSTREAM_READER
+//libmpeg2 like reader
+
+#   define MIN_CACHE_BITS 16
+
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        int name##_cache= (gb)->cache;\
+        uint8_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache= name##_cache;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
 
-#ifndef ALT_BITSTREAM_READER
-unsigned int get_bits_long(GetBitContext *s, int n);
-unsigned int show_bits_long(GetBitContext *s, int n);
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count > 0){\
+        name##_cache+= ((name##_buffer_ptr[0]<<8) + name##_buffer_ptr[1]) << name##_bit_count;\
+        name##_buffer_ptr+=2;\
+        name##_bit_count-= 16;\
+    }\
+
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache <<= (num);\
+
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+
+#   define SHOW_UBITS(name, gb, num)\
+        (((uint32_t)name##_cache)>>INV32(num))
+
+#   define SHOW_SBITS(name, gb, num)\
+        (((int32_t)name##_cache)>>INV32(num))
+
+#   define GET_CACHE(name, gb)\
+        ((uint32_t)name##_cache)
+
+static inline int get_bits_count(GetBitContext *s){
+    return (s->buffer_ptr - s->buffer)*8 - 16 + s->bit_count;
+}
+
+#elif defined A32_BITSTREAM_READER
+
+#   define MIN_CACHE_BITS 32
+
+#   define OPEN_READER(name, gb)\
+        int name##_bit_count=(gb)->bit_count;\
+        uint32_t name##_cache0= (gb)->cache0;\
+        uint32_t name##_cache1= (gb)->cache1;\
+        uint32_t * name##_buffer_ptr=(gb)->buffer_ptr;\
+
+#   define CLOSE_READER(name, gb)\
+        (gb)->bit_count= name##_bit_count;\
+        (gb)->cache0= name##_cache0;\
+        (gb)->cache1= name##_cache1;\
+        (gb)->buffer_ptr= name##_buffer_ptr;\
+
+#   define UPDATE_CACHE(name, gb)\
+    if(name##_bit_count > 0){\
+        const uint32_t next= be2me_32( *name##_buffer_ptr );\
+        name##_cache0 |= next>>INV32(name##_bit_count);\
+        name##_cache1 |= next<<name##_bit_count;\
+        name##_buffer_ptr++;\
+        name##_bit_count-= 32;\
+    }\
+
+#ifdef ARCH_X86
+#   define SKIP_CACHE(name, gb, num)\
+        asm(\
+            "shldl %2, %1, %0		\n\t"\
+            "shll %2, %1		\n\t"\
+            : "+r" (name##_cache0), "+r" (name##_cache1)\
+            : "Ic" ((uint8_t)num)\
+           );
+#else
+#   define SKIP_CACHE(name, gb, num)\
+        name##_cache0 <<= (num);\
+        name##_cache0 |= name##_cache1 >>INV32(num);\
+        name##_cache1 <<= (num);
+#endif
+
+#   define SKIP_COUNTER(name, gb, num)\
+        name##_bit_count += (num);\
+
+#   define SKIP_BITS(name, gb, num)\
+        {\
+            SKIP_CACHE(name, gb, num)\
+            SKIP_COUNTER(name, gb, num)\
+        }\
+
+#   define LAST_SKIP_BITS(name, gb, num) SKIP_BITS(name, gb, num)
+#   define LAST_SKIP_CACHE(name, gb, num) SKIP_CACHE(name, gb, num)
+
+#   define SHOW_UBITS(name, gb, num)\
+        (((uint32_t)name##_cache0)>>INV32(num))
+
+#   define SHOW_SBITS(name, gb, num)\
+        (((int32_t)name##_cache0)>>INV32(num))
+
+#   define GET_CACHE(name, gb)\
+        (name##_cache0)
+
+static inline int get_bits_count(GetBitContext *s){
+    return ((uint8_t*)s->buffer_ptr - s->buffer)*8 - 32 + s->bit_count;
+}
+
 #endif
 
 static inline unsigned int get_bits(GetBitContext *s, int n){
-#ifdef ALT_BITSTREAM_READER
-#ifdef ALIGNED_BITSTREAM
-    int index= s->index;
-    uint32_t result1= be2me_32( ((uint32_t *)s->buffer)[index>>5] );
-    uint32_t result2= be2me_32( ((uint32_t *)s->buffer)[(index>>5) + 1] );
-#ifdef ARCH_X86
-    asm ("shldl %%cl, %2, %0\n\t"
-         : "=r" (result1)
-	 : "0" (result1), "r" (result2), "c" (index));
-#else
-    result1<<= (index&0x1F);
-    result2= (result2>>1) >> (31-(index&0x1F));
-    result1|= result2;
-#endif
-    result1>>= 32 - n;
-    index+= n;
-    s->index= index;
-    
-    return result1;
-#else //ALIGNED_BITSTREAM
-    int index= s->index;
-    uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buffer)+(index>>3) ) );
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
+    return tmp;
+}
 
-    result<<= (index&0x07);
-    result>>= 32 - n;
-    index+= n;
-    s->index= index;
-#ifdef DUMP_STREAM
-    while(n){
-        printf("%d", (result>>(n-1))&1);
-        n--;
-    }
-    printf(" ");
-#endif
-    return result;
-#endif //!ALIGNED_BITSTREAM
-#else //ALT_BITSTREAM_READER
-    if(s->bit_cnt>=n){
-        /* most common case here */
-        unsigned int val = s->bit_buf >> (32 - n);
-        s->bit_buf <<= n;
-	s->bit_cnt -= n;
-#ifdef STATS
-	st_bit_counts[st_current_index] += n;
-#endif
-	return val;
-    }
-    return get_bits_long(s,n);
-#endif //!ALT_BITSTREAM_READER
+static inline unsigned int show_bits(GetBitContext *s, int n){
+    register int tmp;
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    tmp= SHOW_UBITS(re, s, n);
+//    CLOSE_READER(re, s)
+    return tmp;
+}
+
+static inline void skip_bits(GetBitContext *s, int n){
+ //Note gcc seems to optimize this to s->index+=n for the ALT_READER :))
+    OPEN_READER(re, s)
+    UPDATE_CACHE(re, s)
+    LAST_SKIP_BITS(re, s, n)
+    CLOSE_READER(re, s)
 }
 
 static inline unsigned int get_bits1(GetBitContext *s){
@@ -506,158 +693,23 @@
     result>>= 8 - 1;
     index++;
     s->index= index;
-    
-#ifdef DUMP_STREAM
-    printf("%d ", result);
-#endif
+
     return result;
 #else
-    if(s->bit_cnt>0){
-        /* most common case here */
-        unsigned int val = s->bit_buf >> 31;
-        s->bit_buf <<= 1;
-	s->bit_cnt--;
-#ifdef STATS
-	st_bit_counts[st_current_index]++;
-#endif
-	return val;
-    }
-    return get_bits_long(s,1);
+    return get_bits(s, 1);
 #endif
 }
 
-/* This function is identical to get_bits(), the only */
-/* diference is that it doesn't touch the buffer      */
-/* it is usefull to see the buffer.                   */
-static inline unsigned int show_bits(GetBitContext *s, int n)
-{
-#ifdef ALT_BITSTREAM_READER
-#ifdef ALIGNED_BITSTREAM
-    int index= s->index;
-    uint32_t result1= be2me_32( ((uint32_t *)s->buffer)[index>>5] );
-    uint32_t result2= be2me_32( ((uint32_t *)s->buffer)[(index>>5) + 1] );
-#ifdef ARCH_X86
-    asm ("shldl %%cl, %2, %0\n\t"
-         : "=r" (result1)
-	 : "0" (result1), "r" (result2), "c" (index));
-#else
-    result1<<= (index&0x1F);
-    result2= (result2>>1) >> (31-(index&0x1F));
-    result1|= result2;
-#endif
-    result1>>= 32 - n;
-    
-    return result1;
-#else //ALIGNED_BITSTREAM
-    int index= s->index;
-    uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buffer)+(index>>3) ) );
-
-    result<<= (index&0x07);
-    result>>= 32 - n;
-    
-    return result;
-#endif //!ALIGNED_BITSTREAM
-#else //ALT_BITSTREAM_READER
-    if(s->bit_cnt>=n) {
-        /* most common case here */
-        unsigned int val = s->bit_buf >> (32 - n);
-        return val;
-    }
-    return show_bits_long(s,n);
-#endif //!ALT_BITSTREAM_READER
-}
-
-static inline int show_aligned_bits(GetBitContext *s, int offset, int n)
-{
-#ifdef ALT_BITSTREAM_READER
-#ifdef ALIGNED_BITSTREAM
-    int index= (s->index + offset + 7)&(~7);
-    uint32_t result1= be2me_32( ((uint32_t *)s->buffer)[index>>5] );
-    uint32_t result2= be2me_32( ((uint32_t *)s->buffer)[(index>>5) + 1] );
-#ifdef ARCH_X86
-    asm ("shldl %%cl, %2, %0\n\t"
-         : "=r" (result1)
-	 : "0" (result1), "r" (result2), "c" (index));
-#else
-    result1<<= (index&0x1F);
-    result2= (result2>>1) >> (31-(index&0x1F));
-    result1|= result2;
-#endif
-    result1>>= 32 - n;
-    
-    return result1;
-#else //ALIGNED_BITSTREAM
-    int index= (s->index + offset + 7)>>3;
-    uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buffer)+index ) );
-
-    result>>= 32 - n;
-    
-    return result;
-#endif //!ALIGNED_BITSTREAM
-#else //ALT_BITSTREAM_READER
-    int index= (get_bits_count(s) + offset + 7)>>3;
-    uint32_t result= be2me_32( unaligned32( ((uint8_t *)s->buf)+index ) );
-
-    result>>= 32 - n;
-//printf(" %X %X %d \n", (int)(((uint8_t *)s->buf)+index ), (int)s->buf_ptr, s->bit_cnt);    
-    return result;
-#endif //!ALT_BITSTREAM_READER
-}
-
-static inline void skip_bits(GetBitContext *s, int n){
-#ifdef ALT_BITSTREAM_READER
-    s->index+= n;
-#ifdef DUMP_STREAM
-    {
-        int result;
-        s->index-= n;
-        result= get_bits(s, n);
-    }
-#endif
-
-#else
-    if(s->bit_cnt>=n){
-        /* most common case here */
-        s->bit_buf <<= n;
-	s->bit_cnt -= n;
-#ifdef STATS
-	st_bit_counts[st_current_index] += n;
-#endif
-    } else {
-	get_bits_long(s,n);
-    }
-#endif
+static inline unsigned int show_bits1(GetBitContext *s){
+    return show_bits(s, 1);
 }
 
 static inline void skip_bits1(GetBitContext *s){
-#ifdef ALT_BITSTREAM_READER
-    s->index++;
-#ifdef DUMP_STREAM
-    s->index--;
-    printf("%d ", get_bits1(s));
-#endif
-#else
-    if(s->bit_cnt>0){
-        /* most common case here */
-        s->bit_buf <<= 1;
-	s->bit_cnt--;
-#ifdef STATS
-	st_bit_counts[st_current_index]++;
-#endif
-    } else {
-	get_bits_long(s,1);
-    }
-#endif
+    skip_bits(s, 1);
 }
 
-static inline int get_bits_count(GetBitContext *s)
-{
-#ifdef ALT_BITSTREAM_READER
-    return s->index;
-#else
-    return (s->buf_ptr - s->buf) * 8 - s->bit_cnt;
-#endif
-}
+void init_get_bits(GetBitContext *s,
+                   UINT8 *buffer, int buffer_size);
 
 int check_marker(GetBitContext *s, char *msg);
 void align_get_bits(GetBitContext *s);
@@ -666,126 +718,43 @@
              const void *codes, int codes_wrap, int codes_size);
 void free_vlc(VLC *vlc);
 
-#ifdef ALT_BITSTREAM_READER
-#ifdef ALIGNED_BITSTREAM
-#ifdef ARCH_X86
-#define SHOW_BITS(s, val, n) \
-    val= be2me_32( ((uint32_t *)(s)->buffer)[bit_cnt>>5] );\
-    {uint32_t result2= be2me_32( ((uint32_t *)(s)->buffer)[(bit_cnt>>5) + 1] );\
-    asm ("shldl %%cl, %2, %0\n\t"\
-         : "=r" (val)\
-         : "0" (val), "r" (result2), "c" (bit_cnt));\
-    ((uint32_t)val)>>= 32 - n;}
-#else //ARCH_X86
-#define SHOW_BITS(s, val, n) \
-    val= be2me_32( ((uint32_t *)(s)->buffer)[bit_cnt>>5] );\
-    {uint32_t result2= be2me_32( ((uint32_t *)(s)->buffer)[(bit_cnt>>5) + 1] );\
-    val<<= (bit_cnt&0x1F);\
-    result2= (result2>>1) >> (31-(bit_cnt&0x1F));\
-    val|= result2;\
-    ((uint32_t)val)>>= 32 - n;}
-#endif //!ARCH_X86
-#else //ALIGNED_BITSTREAM
-#define SHOW_BITS(s, val, n) \
-    val= be2me_32( unaligned32( ((uint8_t *)(s)->buffer)+(bit_cnt>>3) ) );\
-    val<<= (bit_cnt&0x07);\
-    ((uint32_t)val)>>= 32 - n;
-#endif // !ALIGNED_BITSTREAM
-#define FLUSH_BITS(n) bit_cnt+=n; 
-#define SAVE_BITS(s) bit_cnt= (s)->index;
-#define RESTORE_BITS(s) (s)->index= bit_cnt;
-#else
-
-/* macro to go faster */
-/* n must be <= 24 */
-/* XXX: optimize buffer end test */
-#define SHOW_BITS(s, val, n)\
-{\
-    if (bit_cnt < n && buf_ptr < (s)->buf_end) {\
-        bit_buf |= *buf_ptr++ << (24 - bit_cnt);\
-        bit_cnt += 8;\
-        if (bit_cnt < n && buf_ptr < (s)->buf_end) {\
-            bit_buf |= *buf_ptr++ << (24 - bit_cnt);\
-            bit_cnt += 8;\
-            if (bit_cnt < n && buf_ptr < (s)->buf_end) {\
-                bit_buf |= *buf_ptr++ << (24 - bit_cnt);\
-                bit_cnt += 8;\
-            }\
-        }\
-    }\
-    val = bit_buf >> (32 - n);\
-}
-
-/* SHOW_BITS with n1 >= n must be been done before */
-#define FLUSH_BITS(n)\
-{\
-    bit_buf <<= n;\
-    bit_cnt -= n;\
-}
-
-#define SAVE_BITS(s) \
-{\
-    bit_cnt = (s)->bit_cnt;\
-    bit_buf = (s)->bit_buf;\
-    buf_ptr = (s)->buf_ptr;\
-}
-
-#define RESTORE_BITS(s) \
-{\
-    (s)->buf_ptr = buf_ptr;\
-    (s)->bit_buf = bit_buf;\
-    (s)->bit_cnt = bit_cnt;\
-}
-#endif // !ALT_BITSTREAM_READER
-
 static inline int get_vlc(GetBitContext *s, VLC *vlc)
 {
     int code, n, nb_bits, index;
-    INT16 *table_codes;
-    INT8 *table_bits;
-    int bit_cnt;
-#ifndef ALT_BITSTREAM_READER
-    UINT32 bit_buf;
-    UINT8 *buf_ptr;
-#endif
+    VLC_TYPE (*table)[2];
+    OPEN_READER(re, s)
 
-    SAVE_BITS(s);
+    UPDATE_CACHE(re, s)
+
     nb_bits = vlc->bits;
-    table_codes = vlc->table_codes;
-    table_bits = vlc->table_bits;
+    table = vlc->table;
 
 #ifdef FAST_GET_FIRST_VLC
-    SHOW_BITS(s, index, nb_bits);
-    code = table_codes[index];
-    n = table_bits[index];
+    index= SHOW_UBITS(re, s, nb_bits);
+    code = table[index][0];
+    n = table[index][1];
     if (n > 0) {
         /* most common case (90%)*/
-        FLUSH_BITS(n);
-#ifdef DUMP_STREAM
-        {
-            int n= bit_cnt - s->index;
-            skip_bits(s, n);
-            RESTORE_BITS(s);
-        }
-#endif
-        RESTORE_BITS(s);
+        LAST_SKIP_BITS(re, s, n)
+        CLOSE_READER(re, s)
         return code;
     } else if (n == 0) {
         return -1;
     } else {
-        FLUSH_BITS(nb_bits);
+        LAST_SKIP_BITS(re, s, nb_bits)
+        UPDATE_CACHE(re, s) //this isnt needed but its faster if its here
+
         nb_bits = -n;
-        table_codes = vlc->table_codes + code;
-        table_bits = vlc->table_bits + code;
+        table = vlc->table + code;
     }
 #endif
     for(;;) {
-        SHOW_BITS(s, index, nb_bits);
-        code = table_codes[index];
-        n = table_bits[index];
+        index= SHOW_UBITS(re, s, nb_bits);
+        code = table[index][0];
+        n = table[index][1];
         if (n > 0) {
             /* most common case */
-            FLUSH_BITS(n);
+            SKIP_BITS(re, s, n)
 #ifdef STATS
             st_bit_counts[st_current_index] += n;
 #endif
@@ -793,23 +762,16 @@
         } else if (n == 0) {
             return -1;
         } else {
-            FLUSH_BITS(nb_bits);
+            LAST_SKIP_BITS(re, s, nb_bits)
+            UPDATE_CACHE(re, s)
 #ifdef STATS
             st_bit_counts[st_current_index] += nb_bits;
 #endif
             nb_bits = -n;
-            table_codes = vlc->table_codes + code;
-            table_bits = vlc->table_bits + code;
+            table = vlc->table + code;
         }
     }
-#ifdef DUMP_STREAM
-    {
-        int n= bit_cnt - s->index;
-        skip_bits(s, n);
-        RESTORE_BITS(s);
-    }
-#endif
-    RESTORE_BITS(s);
+    CLOSE_READER(re, s)
     return code;
 }
 
--- a/mpeg12.c	Sun Jul 07 08:34:46 2002 +0000
+++ b/mpeg12.c	Tue Jul 09 10:35:10 2002 +0000
@@ -961,21 +961,20 @@
         dprintf("dc=%d diff=%d\n", dc, diff);
         i = 1;
     } else {
-        int bit_cnt, v;
-        UINT32 bit_buf;
-        UINT8 *buf_ptr;
+        int v;
+        OPEN_READER(re, &s->gb);
         i = 0;
         /* special case for the first coef. no need to add a second vlc table */
-        SAVE_BITS(&s->gb);
-        SHOW_BITS(&s->gb, v, 2);
+        UPDATE_CACHE(re, &s->gb);
+        v= SHOW_UBITS(re, &s->gb, 2);
         if (v & 2) {
             run = 0;
             level = 1 - ((v & 1) << 1);
-            FLUSH_BITS(2);
-            RESTORE_BITS(&s->gb);
+            SKIP_BITS(re, &s->gb, 2);
+            CLOSE_READER(re, &s->gb);
             goto add_coef;
         }
-        RESTORE_BITS(&s->gb);
+        CLOSE_READER(re, &s->gb);
     }
 
     /* now quantify & encode AC coefs */
@@ -1035,26 +1034,25 @@
     mismatch = 1;
 
     {
-        int bit_cnt, v;
-        UINT32 bit_buf;
-        UINT8 *buf_ptr;
+        int v;
+        OPEN_READER(re, &s->gb);
         i = 0;
-        if (n < 4) 
+        if (n < 4)
             matrix = s->inter_matrix;
         else
             matrix = s->chroma_inter_matrix;
-            
+
         /* special case for the first coef. no need to add a second vlc table */
-        SAVE_BITS(&s->gb);
-        SHOW_BITS(&s->gb, v, 2);
+        UPDATE_CACHE(re, &s->gb);
+        v= SHOW_UBITS(re, &s->gb, 2);
         if (v & 2) {
             run = 0;
             level = 1 - ((v & 1) << 1);
-            FLUSH_BITS(2);
-            RESTORE_BITS(&s->gb);
+            SKIP_BITS(re, &s->gb, 2);
+            CLOSE_READER(re, &s->gb);
             goto add_coef;
         }
-        RESTORE_BITS(&s->gb);
+        CLOSE_READER(re, &s->gb);
     }
 
     /* now quantify & encode AC coefs */
--- a/mpegaudiodec.c	Sun Jul 07 08:34:46 2002 +0000
+++ b/mpegaudiodec.c	Tue Jul 09 10:35:10 2002 +0000
@@ -1457,11 +1457,8 @@
     UINT8 *ptr;
 
     /* compute current position in stream */
-#ifdef ALT_BITSTREAM_READER
-    ptr = s->gb.buffer + (s->gb.index>>3);
-#else
-    ptr = s->gb.buf_ptr - (s->gb.bit_cnt >> 3);
-#endif    
+    ptr = s->gb.buffer + (get_bits_count(&s->gb)>>3);
+
     /* copy old data before current one */
     ptr -= backstep;
     memcpy(ptr, s->inbuf1[s->inbuf_index ^ 1] + 
@@ -1547,9 +1544,7 @@
 {
     int s_index;
     int linbits, code, x, y, l, v, i, j, k, pos;
-    UINT8 *last_buf_ptr;
-    UINT32 last_bit_buf;
-    int last_bit_cnt;
+    GetBitContext last_gb;
     VLC *vlc;
     UINT8 *code_table;
 
@@ -1608,36 +1603,20 @@
             
     /* high frequencies */
     vlc = &huff_quad_vlc[g->count1table_select];
-    last_buf_ptr = NULL;
-    last_bit_buf = 0;
-    last_bit_cnt = 0;
+    last_gb.buffer = NULL;
     while (s_index <= 572) {
         pos = get_bits_count(&s->gb);
         if (pos >= end_pos) {
-            if (pos > end_pos && last_buf_ptr != NULL) {
+            if (pos > end_pos && last_gb.buffer != NULL) {
                 /* some encoders generate an incorrect size for this
                    part. We must go back into the data */
                 s_index -= 4;
-#ifdef ALT_BITSTREAM_READER
-                s->gb.buffer = last_buf_ptr;
-                s->gb.index = last_bit_cnt;
-#else
-                s->gb.buf_ptr = last_buf_ptr;
-                s->gb.bit_buf = last_bit_buf;
-                s->gb.bit_cnt = last_bit_cnt;
-#endif            
+                s->gb = last_gb;
             }
             break;
         }
-#ifdef ALT_BITSTREAM_READER
-        last_buf_ptr = s->gb.buffer;
-        last_bit_cnt = s->gb.index;
-#else
-        last_buf_ptr = s->gb.buf_ptr;
-        last_bit_buf = s->gb.bit_buf;
-        last_bit_cnt = s->gb.bit_cnt;
-#endif
-        
+        last_gb= s->gb;
+
         code = get_vlc(&s->gb, vlc);
         dprintf("t=%d code=%d\n", g->count1table_select, code);
         if (code < 0)