changeset 1250:fa181d095027 libavcodec

optimizations
author michaelni
date Tue, 13 May 2003 00:46:42 +0000
parents 7ac0a77e5973
children afdd177080c9
files golomb.c golomb.h h264.c svq3.c
diffstat 4 files changed, 140 insertions(+), 39 deletions(-) [+]
line wrap: on
line diff
--- a/golomb.c	Mon May 12 23:03:00 2003 +0000
+++ b/golomb.c	Tue May 13 00:46:42 2003 +0000
@@ -95,3 +95,60 @@
 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,
 15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,17,
 };
+
+const uint8_t ff_interleaved_golomb_vlc_len[256]={
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+9,9,7,7,9,9,7,7,5,5,5,5,5,5,5,5,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
+};
+
+const uint8_t ff_interleaved_ue_golomb_vlc_code[256]={ 
+ 15,16,7, 7, 17,18,8, 8, 3, 3, 3, 3, 3, 3, 3, 3,
+ 19,20,9, 9, 21,22,10,10,4, 4, 4, 4, 4, 4, 4, 4, 
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 23,24,11,11,25,26,12,12,5, 5, 5, 5, 5, 5, 5, 5, 
+ 27,28,13,13,29,30,14,14,6, 6, 6, 6, 6, 6, 6, 6,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+};
+
+const int8_t ff_interleaved_se_golomb_vlc_code[256]={ 
+  8, -8,  4,  4,  9, -9, -4, -4,  2,  2,  2,  2,  2,  2,  2,  2,
+ 10,-10,  5,  5, 11,-11, -5, -5, -2, -2, -2, -2, -2, -2, -2, -2,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,
+ 12,-12,  6,  6, 13,-13, -6, -6,  3,  3,  3,  3,  3,  3,  3,  3,
+ 14,-14,  7,  7, 15,-15, -7, -7, -3, -3, -3, -3, -3, -3, -3, -3,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
+};
--- a/golomb.h	Mon May 12 23:03:00 2003 +0000
+++ b/golomb.h	Tue May 13 00:46:42 2003 +0000
@@ -32,6 +32,10 @@
 extern const  int8_t ff_se_golomb_vlc_code[512];
 extern const uint8_t ff_ue_golomb_len[256];
 
+extern const uint8_t ff_interleaved_golomb_vlc_len[256];
+extern const uint8_t ff_interleaved_ue_golomb_vlc_code[256];
+extern const  int8_t ff_interleaved_se_golomb_vlc_code[256];
+
  
  /**
  * read unsigned exp golomb code.
@@ -62,24 +66,33 @@
 }
 
 static inline int svq3_get_ue_golomb(GetBitContext *gb){
-    unsigned int buf;
+    uint32_t buf;
     int log;
 
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
-    buf=GET_CACHE(re, gb)|1;
-
-    if((buf & 0xAAAAAAAA) == 0)
-        return INVALID_VLC;
+    buf=GET_CACHE(re, gb);
+    
+    if(buf&0xAA800000){
+        buf >>= 32 - 8;
+        LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+        
+        return ff_interleaved_ue_golomb_vlc_code[buf];
+    }else{
+        buf|=1;
+        if((buf & 0xAAAAAAAA) == 0)
+            return INVALID_VLC;
 
-    for(log=31; (buf & 0x80000000) == 0; log--){
-        buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
-    }
+        for(log=31; (buf & 0x80000000) == 0; log--){
+            buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
+        }
 
-    LAST_SKIP_BITS(re, gb, 63 - 2*log);
-    CLOSE_READER(re, gb);
+        LAST_SKIP_BITS(re, gb, 63 - 2*log);
+        CLOSE_READER(re, gb);
 
-    return ((buf << log) >> log) - 1;
+        return ((buf << log) >> log) - 1;
+    }
 }
 
 /**
@@ -141,19 +154,28 @@
 
     OPEN_READER(re, gb);
     UPDATE_CACHE(re, gb);
-    buf=GET_CACHE(re, gb)|1;
-
-    if((buf & 0xAAAAAAAA) == 0)
-        return INVALID_VLC;
+    buf=GET_CACHE(re, gb);
 
-    for(log=31; (buf & 0x80000000) == 0; log--){
-        buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
-    }
+    if(buf&0xAA800000){
+        buf >>= 32 - 8;
+        LAST_SKIP_BITS(re, gb, ff_interleaved_golomb_vlc_len[buf]);
+        CLOSE_READER(re, gb);
+        
+        return ff_interleaved_se_golomb_vlc_code[buf];
+    }else{
+        buf |=1;
+        if((buf & 0xAAAAAAAA) == 0)
+            return INVALID_VLC;
 
-    LAST_SKIP_BITS(re, gb, 63 - 2*log);
-    CLOSE_READER(re, gb);
+        for(log=31; (buf & 0x80000000) == 0; log--){
+            buf = (buf << 2) - ((buf << log) >> (log - 1)) + (buf >> 30);
+        }
 
-    return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
+        LAST_SKIP_BITS(re, gb, 63 - 2*log);
+        CLOSE_READER(re, gb);
+
+        return (signed) (((((buf << log) >> log) - 1) ^ -(buf & 0x1)) + 1) >> 1;
+    }
 }
 
 #ifdef TRACE
--- a/h264.c	Mon May 12 23:03:00 2003 +0000
+++ b/h264.c	Tue May 13 00:46:42 2003 +0000
@@ -2278,13 +2278,19 @@
 
 
     if(!IS_INTRA4x4(mb_type)){
-        for(i=0; i<16; i++){
-            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
-                uint8_t * const ptr= dest_y + h->block_offset[i];
-                if(s->codec_id == CODEC_ID_H264)
+        if(s->codec_id == CODEC_ID_H264){
+            for(i=0; i<16; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
+                    uint8_t * const ptr= dest_y + h->block_offset[i];
                     h264_add_idct_c(ptr, h->mb + i*16, linesize);
-                else
+                }
+            }
+        }else{
+            for(i=0; i<16; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){ //FIXME benchmark weird rule, & below
+                    uint8_t * const ptr= dest_y + h->block_offset[i];
                     svq3_add_idct_c(ptr, h->mb + i*16, linesize, s->qscale, IS_INTRA(mb_type) ? 1 : 0);
+                }
             }
         }
     }
@@ -2292,22 +2298,31 @@
     if(!(s->flags&CODEC_FLAG_GRAY)){
         chroma_dc_dequant_idct_c(h->mb + 16*16, h->chroma_qp);
         chroma_dc_dequant_idct_c(h->mb + 16*16+4*16, h->chroma_qp);
-        for(i=16; i<16+4; i++){
-            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                uint8_t * const ptr= dest_cb + h->block_offset[i];
-                if(s->codec_id == CODEC_ID_H264)
+        if(s->codec_id == CODEC_ID_H264){
+            for(i=16; i<16+4; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                    uint8_t * const ptr= dest_cb + h->block_offset[i];
                     h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
-                else
-                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+                }
+            }
+            for(i=20; i<20+4; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                    uint8_t * const ptr= dest_cr + h->block_offset[i];
+                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
+                }
             }
-        }
-        for(i=20; i<20+4; i++){
-            if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
-                uint8_t * const ptr= dest_cr + h->block_offset[i];
-                if(s->codec_id == CODEC_ID_H264)
-                    h264_add_idct_c(ptr, h->mb + i*16, uvlinesize);
-                else
+        }else{
+            for(i=16; i<16+4; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                    uint8_t * const ptr= dest_cb + h->block_offset[i];
                     svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+                }
+            }
+            for(i=20; i<20+4; i++){
+                if(h->non_zero_count_cache[ scan8[i] ] || h->mb[i*16]){
+                    uint8_t * const ptr= dest_cr + h->block_offset[i];
+                    svq3_add_idct_c(ptr, h->mb + i*16, uvlinesize, chroma_qp[s->qscale + 12] - 12, 2);
+                }
             }
         }
     }
--- a/svq3.c	Mon May 12 23:03:00 2003 +0000
+++ b/svq3.c	Tue May 13 00:46:42 2003 +0000
@@ -729,6 +729,13 @@
   while (get_bits (&s->gb, 1)) {
     get_bits (&s->gb, 8);
   }
+  
+  if(avctx->debug&FF_DEBUG_PICT_INFO){
+      printf("%c hpel:%d, tpel:%d aqp:%d qp:%d\n", 
+      ff_get_pict_type_char(s->pict_type), h->halfpel_flag, h->thirdpel_flag,
+      s->adaptive_quant, s->qscale
+      );
+  }
 
   /* B-frames are not supported */
   if (s->pict_type == B_TYPE/* && avctx->hurry_up*/)