changeset 711:dcbcf9676c9f libavcodec

optimizing mpeg1_decode_block()
author michaelni
date Mon, 30 Sep 2002 16:14:14 +0000
parents 97377ab86647
children e55b91623e09
files mpeg12.c mpegvideo.c
diffstat 2 files changed, 174 insertions(+), 77 deletions(-) [+]
line wrap: on
line diff
--- a/mpeg12.c	Mon Sep 30 10:06:17 2002 +0000
+++ b/mpeg12.c	Mon Sep 30 16:14:14 2002 +0000
@@ -52,7 +52,10 @@
                          int component);
 static void mpeg1_encode_motion(MpegEncContext *s, int val);
 static void mpeg1_skip_picture(MpegEncContext *s, int pict_num);
-static int mpeg1_decode_block(MpegEncContext *s, 
+static inline int mpeg1_decode_block_inter(MpegEncContext *s, 
+                              DCTELEM *block, 
+                              int n);
+static inline int mpeg1_decode_block_intra(MpegEncContext *s, 
                               DCTELEM *block, 
                               int n);
 static int mpeg2_decode_block_non_intra(MpegEncContext *s, 
@@ -92,8 +95,8 @@
                 run= 65;
                 level= 0;
             }else if(code==rl->n+1){ //eob
-                run= 192;
-                level= 1;
+                run= 0;
+                level= 127;
             }else{
                 run=   rl->table_run  [code] + 1;
                 level= rl->table_level[code];
@@ -932,30 +935,35 @@
     if (s->mpeg2) {
         if (s->mb_intra) {
             for(i=0;i<6;i++) {
-                if (cbp & (1 << (5 - i))) {
-                    if (mpeg2_decode_block_intra(s, block[i], i) < 0)
-                        return -1;
-                } else {
-                    s->block_last_index[i] = -1;
-                }
+                if (mpeg2_decode_block_intra(s, block[i], i) < 0)
+                    return -1;
             }
         } else {
             for(i=0;i<6;i++) {
-                if (cbp & (1 << (5 - i))) {
+                if (cbp & 32) {
                     if (mpeg2_decode_block_non_intra(s, block[i], i) < 0)
                         return -1;
                 } else {
                     s->block_last_index[i] = -1;
                 }
+                cbp+=cbp;
             }
         }
     } else {
-        for(i=0;i<6;i++) {
-            if (cbp & (1 << (5 - i))) {
-                if (mpeg1_decode_block(s, block[i], i) < 0)
+        if (s->mb_intra) {
+            for(i=0;i<6;i++) {
+                if (mpeg1_decode_block_intra(s, block[i], i) < 0)
                     return -1;
-            } else {
-                s->block_last_index[i] = -1;
+            }
+        }else{
+            for(i=0;i<6;i++) {
+                if (cbp & 32) {
+                    if (mpeg1_decode_block_inter(s, block[i], i) < 0)
+                        return -1;
+                } else {
+                    s->block_last_index[i] = -1;
+                }
+                cbp+=cbp;
             }
         }
     }
@@ -1018,78 +1026,155 @@
     return diff;
 }
 
-static int mpeg1_decode_block(MpegEncContext *s, 
+static inline int mpeg1_decode_block_intra(MpegEncContext *s, 
                                DCTELEM *block, 
                                int n)
 {
     int level, dc, diff, i, j, run;
-    int code, component;
+    int component;
     RLTable *rl = &rl_mpeg1;
     UINT8 * const scantable= s->intra_scantable.permutated;
+    const UINT16 *quant_matrix= s->intra_matrix;
+    const int qscale= s->qscale;
 
-    if (s->mb_intra) {
-        /* DC coef */
-        component = (n <= 3 ? 0 : n - 4 + 1);
-        diff = decode_dc(s, component);
-        if (diff >= 0xffff)
-            return -1;
-        dc = s->last_dc[component];
-        dc += diff;
-        s->last_dc[component] = dc;
-        block[0] = dc;
-        dprintf("dc=%d diff=%d\n", dc, diff);
-        i = 1;
-    } else {
+    /* DC coef */
+    component = (n <= 3 ? 0 : n - 4 + 1);
+    diff = decode_dc(s, component);
+    if (diff >= 0xffff)
+        return -1;
+    dc = s->last_dc[component];
+    dc += diff;
+    s->last_dc[component] = dc;
+    block[0] = dc<<3;
+    dprintf("dc=%d diff=%d\n", dc, diff);
+    i = 0;
+    {
+        OPEN_READER(re, &s->gb);    
+        /* now quantify & encode AC coefs */
+        for(;;) {
+            UPDATE_CACHE(re, &s->gb);
+            GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2);
+            
+            if(level == 127){
+                break;
+            } else if(level != 0) {
+                i += run;
+                j = scantable[i];
+                level= (level*qscale*quant_matrix[j])>>3;
+                level= (level-1)|1;
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                LAST_SKIP_BITS(re, &s->gb, 1);
+            } else {
+                /* escape */
+                run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6);
+                UPDATE_CACHE(re, &s->gb);
+                level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8);
+                if (level == -128) {
+                    level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8);
+                } else if (level == 0) {
+                    level = SHOW_UBITS(re, &s->gb, 8)      ; LAST_SKIP_BITS(re, &s->gb, 8);
+                }
+                i += run;
+                j = scantable[i];
+                if(level<0){
+                    level= -level;
+                    level= (level*qscale*quant_matrix[j])>>3;
+                    level= (level-1)|1;
+                    level= -level;
+                }else{
+                    level= (level*qscale*quant_matrix[j])>>3;
+                    level= (level-1)|1;
+                }
+            }
+            if (i > 63){
+                fprintf(stderr, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
+
+            block[j] = level;
+        }
+        CLOSE_READER(re, &s->gb);
+    }
+    s->block_last_index[n] = i;
+   return 0;
+}
+
+static inline int mpeg1_decode_block_inter(MpegEncContext *s, 
+                               DCTELEM *block, 
+                               int n)
+{
+    int level, i, j, run;
+    RLTable *rl = &rl_mpeg1;
+    UINT8 * const scantable= s->intra_scantable.permutated;
+    const UINT16 *quant_matrix= s->inter_matrix;
+    const int qscale= s->qscale;
+
+    {
         int v;
         OPEN_READER(re, &s->gb);
-        i = 0;
+        i = -1;
         /* special case for the first coef. no need to add a second vlc table */
         UPDATE_CACHE(re, &s->gb);
         v= SHOW_UBITS(re, &s->gb, 2);
         if (v & 2) {
-            run = 0;
-            level = 1 - ((v & 1) << 1);
             SKIP_BITS(re, &s->gb, 2);
-            CLOSE_READER(re, &s->gb);
-            goto add_coef;
+            level= (3*qscale*quant_matrix[0])>>4;
+            level= (level-1)|1;
+            if(v&1)
+                level= -level;
+            block[ scantable[0] ] = level;
+            i++;
         }
         CLOSE_READER(re, &s->gb);
     }
+    {
+        OPEN_READER(re, &s->gb);    
+        /* now quantify & encode AC coefs */
+        for(;;) {
+            UPDATE_CACHE(re, &s->gb);
+            GET_RL_VLC(level, run, re, &s->gb, rl->rl_vlc[0], TEX_VLC_BITS, 2);
+            
+            if(level == 127){
+                break;
+            } else if(level != 0) {
+                i += run;
+                j = scantable[i];
+                level= ((level*2+1)*qscale*quant_matrix[j])>>4;
+                level= (level-1)|1;
+                level = (level ^ SHOW_SBITS(re, &s->gb, 1)) - SHOW_SBITS(re, &s->gb, 1);
+                LAST_SKIP_BITS(re, &s->gb, 1);
+            } else {
+                /* escape */
+                run = SHOW_UBITS(re, &s->gb, 6)+1; LAST_SKIP_BITS(re, &s->gb, 6);
+                UPDATE_CACHE(re, &s->gb);
+                level = SHOW_SBITS(re, &s->gb, 8); SKIP_BITS(re, &s->gb, 8);
+                if (level == -128) {
+                    level = SHOW_UBITS(re, &s->gb, 8) - 256; LAST_SKIP_BITS(re, &s->gb, 8);
+                } else if (level == 0) {
+                    level = SHOW_UBITS(re, &s->gb, 8)      ; LAST_SKIP_BITS(re, &s->gb, 8);
+                }
+                i += run;
+                j = scantable[i];
+                if(level<0){
+                    level= -level;
+                    level= ((level*2+1)*qscale*quant_matrix[j])>>4;
+                    level= (level-1)|1;
+                    level= -level;
+                }else{
+                    level= ((level*2+1)*qscale*quant_matrix[j])>>4;
+                    level= (level-1)|1;
+                }
+            }
+            if (i > 63){
+                fprintf(stderr, "ac-tex damaged at %d %d\n", s->mb_x, s->mb_y);
+                return -1;
+            }
 
-    /* now quantify & encode AC coefs */
-    for(;;) {
-        code = get_vlc2(&s->gb, rl->vlc.table, TEX_VLC_BITS, 2);
-        if (code < 0) {
-            return -1;
+            block[j] = level;
         }
-        if (code == 112) {
-            break;
-        } else if (code == 111) {
-            /* escape */
-            run = get_bits(&s->gb, 6);
-            level = get_bits(&s->gb, 8);
-            level= (level + ((-1)<<7)) ^ ((-1)<<7); //sign extension
-            if (level == -128) {
-                level = get_bits(&s->gb, 8) - 256;
-            } else if (level == 0) {
-                level = get_bits(&s->gb, 8);
-            }
-        } else {
-            run = rl->table_run[code];
-            level = rl->table_level[code];
-            if (get_bits1(&s->gb))
-                level = -level;
-        }
-        i += run;
-        if (i >= 64)
-            return -1;
-    add_coef:
-        dprintf("%d: run=%d level=%d\n", n, run, level);
-	j = scantable[i];
-        block[j] = level;
-        i++;
+        CLOSE_READER(re, &s->gb);
     }
-    s->block_last_index[n] = i-1;
+    s->block_last_index[n] = i;
     return 0;
 }
 
--- a/mpegvideo.c	Mon Sep 30 10:06:17 2002 +0000
+++ b/mpegvideo.c	Mon Sep 30 16:14:14 2002 +0000
@@ -1517,8 +1517,7 @@
 static inline void put_dct(MpegEncContext *s, 
                            DCTELEM *block, int i, UINT8 *dest, int line_size)
 {
-    if (!s->mpeg2)
-        s->dct_unquantize(s, block, i, s->qscale);
+    s->dct_unquantize(s, block, i, s->qscale);
     s->idct_put (dest, line_size, block);
 }
 
@@ -1723,7 +1722,8 @@
             if(s->hurry_up>1) goto the_end;
 
             /* add dct residue */
-            if(s->encoding || !(s->mpeg2 || s->h263_msmpeg4 || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
+            if(s->encoding || !(   s->mpeg2 || s->h263_msmpeg4 || s->codec_id==CODEC_ID_MPEG1VIDEO 
+                                || (s->codec_id==CODEC_ID_MPEG4 && !s->mpeg_quant))){
                 add_dequant_dct(s, block[0], 0, dest_y, dct_linesize);
                 add_dequant_dct(s, block[1], 1, dest_y + 8, dct_linesize);
                 add_dequant_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
@@ -1746,14 +1746,26 @@
             }
         } else {
             /* dct only in intra block */
-            put_dct(s, block[0], 0, dest_y, dct_linesize);
-            put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
-            put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
-            put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
+            if(s->encoding || !(s->mpeg2 || s->codec_id==CODEC_ID_MPEG1VIDEO)){
+                put_dct(s, block[0], 0, dest_y, dct_linesize);
+                put_dct(s, block[1], 1, dest_y + 8, dct_linesize);
+                put_dct(s, block[2], 2, dest_y + dct_offset, dct_linesize);
+                put_dct(s, block[3], 3, dest_y + dct_offset + 8, dct_linesize);
 
-            if(!(s->flags&CODEC_FLAG_GRAY)){
-                put_dct(s, block[4], 4, dest_cb, s->uvlinesize);
-                put_dct(s, block[5], 5, dest_cr, s->uvlinesize);
+                if(!(s->flags&CODEC_FLAG_GRAY)){
+                    put_dct(s, block[4], 4, dest_cb, s->uvlinesize);
+                    put_dct(s, block[5], 5, dest_cr, s->uvlinesize);
+                }
+            }else{
+                s->idct_put(dest_y                 , dct_linesize, block[0]);
+                s->idct_put(dest_y              + 8, dct_linesize, block[1]);
+                s->idct_put(dest_y + dct_offset    , dct_linesize, block[2]);
+                s->idct_put(dest_y + dct_offset + 8, dct_linesize, block[3]);
+
+                if(!(s->flags&CODEC_FLAG_GRAY)){
+                    s->idct_put(dest_cb, s->uvlinesize, block[4]);
+                    s->idct_put(dest_cr, s->uvlinesize, block[5]);
+                }
             }
         }
     }