changeset 12383:6a59d8a22c57 libavcodec

remove b4_stride/mb_stride. correct mb_xy to use mb_width. tighten allocations. reduce the number of zeroing.
author skal
date Wed, 11 Aug 2010 08:27:38 +0000
parents a0c84084fa2f
children e1ef713061ce
files vp8.c
diffstat 1 files changed, 10 insertions(+), 19 deletions(-) [+]
line wrap: on
line diff
--- a/vp8.c	Wed Aug 11 05:24:19 2010 +0000
+++ b/vp8.c	Wed Aug 11 08:27:38 2010 +0000
@@ -87,12 +87,10 @@
     VP8Macroblock *macroblocks;
     VP8Macroblock *macroblocks_base;
     VP8FilterStrength *filter_strength;
-    int mb_stride;
 
     uint8_t *intra4x4_pred_mode_top;
     uint8_t intra4x4_pred_mode_left[4];
     uint8_t *segmentation_map;
-    int b4_stride;
 
     /**
      * Cache of the top row needed for intra prediction
@@ -235,17 +233,12 @@
     s->mb_width  = (s->avctx->coded_width +15) / 16;
     s->mb_height = (s->avctx->coded_height+15) / 16;
 
-    // we allocate a border around the top/left of intra4x4 modes
-    // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle
-    s->mb_stride = s->mb_width+1;
-    s->b4_stride = 4*s->mb_stride;
-
-    s->macroblocks_base        = av_mallocz((s->mb_stride+s->mb_height*2+2)*sizeof(*s->macroblocks));
-    s->filter_strength         = av_mallocz(s->mb_stride*sizeof(*s->filter_strength));
+    s->macroblocks_base        = av_mallocz((s->mb_width+s->mb_height*2+1)*sizeof(*s->macroblocks));
+    s->filter_strength         = av_mallocz(s->mb_width*sizeof(*s->filter_strength));
     s->intra4x4_pred_mode_top  = av_mallocz(s->mb_width*4);
     s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
     s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
-    s->segmentation_map        = av_mallocz(s->mb_stride*s->mb_height);
+    s->segmentation_map        = av_mallocz(s->mb_width*s->mb_height);
 
     if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_top ||
         !s->top_nnz || !s->top_border || !s->segmentation_map)
@@ -535,7 +528,7 @@
 }
 
 static av_always_inline
-void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y,
+void find_near_mvs(VP8Context *s, VP8Macroblock *mb,
                    VP56mv near[2], VP56mv *best, uint8_t cnt[4])
 {
     VP8Macroblock *mb_edge[3] = { mb + 2 /* top */,
@@ -767,7 +760,7 @@
         s->ref_count[mb->ref_frame-1]++;
 
         // motion vectors, 16.3
-        find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt);
+        find_near_mvs(s, mb, near, &best, cnt);
         if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[0]][0])) {
             if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[1]][1])) {
                 if (vp56_rac_get_prob_branchy(c, vp8_mode_contexts[cnt[2]][2])) {
@@ -1536,8 +1529,8 @@
 
     memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
 
-    /* Zero macroblock structures for top/left prediction from outside the frame. */
-    memset(s->macroblocks, 0, (s->mb_width + s->mb_height*2)*sizeof(*s->macroblocks));
+    /* Zero macroblock structures for top/top-left prediction from outside the frame. */
+    memset(s->macroblocks + s->mb_height*2 - 1, 0, (s->mb_width+1)*sizeof(*s->macroblocks));
 
     // top edge of 127 for intra prediction
     memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border));
@@ -1548,14 +1541,14 @@
     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
         VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
         VP8Macroblock *mb = s->macroblocks + (s->mb_height - mb_y - 1)*2;
-        uint8_t *segment_map = s->segmentation_map + mb_y*s->mb_stride;
-        int mb_xy = mb_y * s->mb_stride;
+        int mb_xy = mb_y*s->mb_width;
         uint8_t *dst[3] = {
             curframe->data[0] + 16*mb_y*s->linesize,
             curframe->data[1] +  8*mb_y*s->uvlinesize,
             curframe->data[2] +  8*mb_y*s->uvlinesize
         };
 
+        memset(mb - 1, 0, sizeof(*mb));   // zero left macroblock
         memset(s->left_nnz, 0, sizeof(s->left_nnz));
         AV_WN32A(s->intra4x4_pred_mode_left, DC_PRED*0x01010101);
 
@@ -1568,13 +1561,11 @@
             memset(s->top_border, 129, sizeof(*s->top_border));
 
         for (mb_x = 0; mb_x < s->mb_width; mb_x++, mb_xy++, mb++) {
-            uint8_t *segment_mb = segment_map+mb_x;
-
             /* Prefetch the current frame, 4 MBs ahead */
             s->dsp.prefetch(dst[0] + (mb_x&3)*4*s->linesize + 64, s->linesize, 4);
             s->dsp.prefetch(dst[1] + (mb_x&7)*s->uvlinesize + 64, dst[2] - dst[1], 2);
 
-            decode_mb_mode(s, mb, mb_x, mb_y, segment_mb);
+            decode_mb_mode(s, mb, mb_x, mb_y, s->segmentation_map + mb_xy);
 
             prefetch_motion(s, mb, mb_x, mb_y, mb_xy, VP56_FRAME_PREVIOUS);