changeset 10619:d930f99edbf9 libavcodec

Use a list to track which fragments coded in this frame still have outstanding coefficients yet to be decoded from the bitstream. Once a fragment reaches end-of-block, remove it from this new list. This change makes the VP3/Theora entropy decode process dramatically faster due to not having to iterate incessantly over fragments which have already been fully decoded.
author melanson
date Wed, 02 Dec 2009 02:30:15 +0000
parents 9cea4112ffaf
children 972b17631531
files vp3.c
diffstat 1 files changed, 70 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/vp3.c	Wed Dec 02 00:37:39 2009 +0000
+++ b/vp3.c	Wed Dec 02 02:30:15 2009 +0000
@@ -179,6 +179,14 @@
     int coded_fragment_list_index;
     int pixel_addresses_initialized;
 
+    /* track which fragments have already been decoded; called 'fast'
+     * because this data structure avoids having to iterate through every
+     * fragment in coded_fragment_list; once a fragment has been fully
+     * decoded, it is removed from this list */
+    int *fast_fragment_list;
+    int fragment_list_y_head;
+    int fragment_list_c_head;
+
     VLC dc_vlc[16];
     VLC ac_vlc_1[16];
     VLC ac_vlc_2[16];
@@ -723,6 +731,25 @@
         /* end the list of coded C fragments */
         s->last_coded_c_fragment = s->coded_fragment_list_index - 1;
 
+    for (i = 0; i < s->fragment_count - 1; i++) {
+        s->fast_fragment_list[i] = i + 1;
+    }
+    s->fast_fragment_list[s->fragment_count - 1] = -1;
+
+    if (s->last_coded_y_fragment == -1)
+        s->fragment_list_y_head = -1;
+    else {
+        s->fragment_list_y_head = s->first_coded_y_fragment;
+        s->fast_fragment_list[s->last_coded_y_fragment] = -1;
+    }
+
+    if (s->last_coded_c_fragment == -1)
+        s->fragment_list_c_head = -1;
+    else {
+        s->fragment_list_c_head = s->first_coded_c_fragment;
+        s->fast_fragment_list[s->last_coded_c_fragment] = -1;
+    }
+
     return 0;
 }
 
@@ -1029,7 +1056,7 @@
  */
 static int unpack_vlcs(Vp3DecodeContext *s, GetBitContext *gb,
                         VLC *table, int coeff_index,
-                        int first_fragment, int last_fragment,
+                        int y_plane,
                         int eob_run)
 {
     int i;
@@ -1038,6 +1065,10 @@
     DCTELEM coeff = 0;
     Vp3Fragment *fragment;
     int bits_to_get;
+    int next_fragment;
+    int previous_fragment;
+    int fragment_num;
+    int *list_head;
 
     /* local references to structure members to avoid repeated deferences */
     uint8_t *perm= s->scantable.permutated;
@@ -1045,20 +1076,26 @@
     Vp3Fragment *all_fragments = s->all_fragments;
     uint8_t *coeff_counts = s->coeff_counts;
     VLC_TYPE (*vlc_table)[2] = table->table;
-
-    if ((first_fragment >= s->fragment_count) ||
-        (last_fragment >= s->fragment_count)) {
+    int *fast_fragment_list = s->fast_fragment_list;
 
-        av_log(s->avctx, AV_LOG_ERROR, "  vp3:unpack_vlcs(): bad fragment number (%d -> %d ?)\n",
-            first_fragment, last_fragment);
-        return 0;
+    if (y_plane) {
+        next_fragment = s->fragment_list_y_head;
+        list_head = &s->fragment_list_y_head;
+    } else {
+        next_fragment = s->fragment_list_c_head;
+        list_head = &s->fragment_list_c_head;
     }
 
-    for (i = first_fragment; i <= last_fragment; i++) {
-        int fragment_num = coded_fragment_list[i];
+    i = next_fragment;
+    previous_fragment = -1;  /* this indicates that the previous fragment is actually the list head */
+    while (i != -1) {
+        fragment_num = coded_fragment_list[i];
 
-        if (coeff_counts[fragment_num] > coeff_index)
+        if (coeff_counts[fragment_num] > coeff_index) {
+            previous_fragment = i;
+            i = fast_fragment_list[i];
             continue;
+        }
         fragment = &all_fragments[fragment_num];
 
         if (!eob_run) {
@@ -1091,10 +1128,20 @@
                 s->next_coeff->next=NULL;
                 fragment->next_coeff= s->next_coeff++;
             }
+            /* previous fragment is now this fragment */
+            previous_fragment = i;
         } else {
             coeff_counts[fragment_num] |= 128;
             eob_run--;
+            /* remove this fragment from the list */
+            if (previous_fragment != -1)
+                fast_fragment_list[previous_fragment] = fast_fragment_list[i];
+            else
+                *list_head = fast_fragment_list[i];
+            /* previous fragment remains unchanged */
         }
+
+        i = fast_fragment_list[i];
     }
 
     return eob_run;
@@ -1123,14 +1170,14 @@
 
     /* unpack the Y plane DC coefficients */
     residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_y_table], 0,
-        s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+        1, residual_eob_run);
 
     /* reverse prediction of the Y-plane DC coefficients */
     reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height);
 
     /* unpack the C plane DC coefficients */
     residual_eob_run = unpack_vlcs(s, gb, &s->dc_vlc[dc_c_table], 0,
-        s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+        0, residual_eob_run);
 
     /* reverse prediction of the C-plane DC coefficients */
     if (!(s->avctx->flags & CODEC_FLAG_GRAY))
@@ -1148,37 +1195,37 @@
     /* unpack the group 1 AC coefficients (coeffs 1-5) */
     for (i = 1; i <= 5; i++) {
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_y_table], i,
-            s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+            1, residual_eob_run);
 
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_1[ac_c_table], i,
-            s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+            0, residual_eob_run);
     }
 
     /* unpack the group 2 AC coefficients (coeffs 6-14) */
     for (i = 6; i <= 14; i++) {
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_y_table], i,
-            s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+            1, residual_eob_run);
 
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_2[ac_c_table], i,
-            s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+            0, residual_eob_run);
     }
 
     /* unpack the group 3 AC coefficients (coeffs 15-27) */
     for (i = 15; i <= 27; i++) {
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_y_table], i,
-            s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+            1, residual_eob_run);
 
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_3[ac_c_table], i,
-            s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+            0, residual_eob_run);
     }
 
     /* unpack the group 4 AC coefficients (coeffs 28-63) */
     for (i = 28; i <= 63; i++) {
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_y_table], i,
-            s->first_coded_y_fragment, s->last_coded_y_fragment, residual_eob_run);
+            1, residual_eob_run);
 
         residual_eob_run = unpack_vlcs(s, gb, &s->ac_vlc_4[ac_c_table], i,
-            s->first_coded_c_fragment, s->last_coded_c_fragment, residual_eob_run);
+            0, residual_eob_run);
     }
 
     return 0;
@@ -1756,9 +1803,10 @@
     s->coeff_counts = av_malloc(s->fragment_count * sizeof(*s->coeff_counts));
     s->coeffs = av_malloc(s->fragment_count * sizeof(Coeff) * 65);
     s->coded_fragment_list = av_malloc(s->fragment_count * sizeof(int));
+    s->fast_fragment_list = av_malloc(s->fragment_count * sizeof(int));
     s->pixel_addresses_initialized = 0;
     if (!s->superblock_coding || !s->all_fragments || !s->coeff_counts ||
-        !s->coeffs || !s->coded_fragment_list) {
+        !s->coeffs || !s->coded_fragment_list || !s->fast_fragment_list) {
         vp3_decode_end(avctx);
         return -1;
     }
@@ -2057,6 +2105,7 @@
     av_free(s->coeff_counts);
     av_free(s->coeffs);
     av_free(s->coded_fragment_list);
+    av_free(s->fast_fragment_list);
     av_free(s->superblock_fragments);
     av_free(s->superblock_macroblocks);
     av_free(s->macroblock_fragments);