changeset 12170:6f0db2eeaf70 libavcodec

vp8: Save mb border needed for intra prediction so that loop filter can run immediately after a mb row is decoded
author conrad
date Fri, 16 Jul 2010 07:20:35 +0000
parents 7501f327cfd1
children 77b51328fc59
files vp8.c
diffstat 1 files changed, 72 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/vp8.c	Fri Jul 16 07:20:31 2010 +0000
+++ b/vp8.c	Fri Jul 16 07:20:35 2010 +0000
@@ -62,6 +62,7 @@
     int update_last;    ///< update VP56_FRAME_PREVIOUS with the current one
     int update_golden;  ///< VP56_FRAME_NONE if not updated, or which frame to copy if so
     int update_altref;
+    int deblock_filter;
 
     /**
      * If this flag is not set, all the probability updates
@@ -85,6 +86,12 @@
     int b4_stride;
 
     /**
+     * Cache of the top row needed for intra prediction
+     * 16 for luma, 8 for each chroma plane
+     */
+    uint8_t (*top_border)[16+8+8];
+
+    /**
      * For coeff decode, we need to know whether the above block had non-zero
      * coefficients. This means for each macroblock, we need data for 4 luma
      * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9
@@ -197,6 +204,7 @@
     av_freep(&s->intra4x4_pred_mode_base);
     av_freep(&s->top_nnz);
     av_freep(&s->edge_emu_buffer);
+    av_freep(&s->top_border);
 
     s->macroblocks        = NULL;
     s->intra4x4_pred_mode = NULL;
@@ -224,8 +232,9 @@
     s->macroblocks_base        = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks));
     s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1));
     s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
+    s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
 
-    if (!s->macroblocks_base || !s->intra4x4_pred_mode_base || !s->top_nnz)
+    if (!s->macroblocks_base || !s->intra4x4_pred_mode_base || !s->top_nnz || !s->top_border)
         return AVERROR(ENOMEM);
 
     s->macroblocks        = s->macroblocks_base        + 1 + s->mb_stride;
@@ -852,6 +861,47 @@
         mb->skip = 1;
 }
 
+static av_always_inline
+void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
+                      int linesize, int uvlinesize, int simple)
+{
+    AV_COPY128(top_border, src_y + 15*linesize);
+    if (!simple) {
+        AV_COPY64(top_border+16, src_cb + 7*uvlinesize);
+        AV_COPY64(top_border+24, src_cr + 7*uvlinesize);
+    }
+}
+
+static av_always_inline
+void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr,
+                    int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width,
+                    int simple, int xchg)
+{
+    uint8_t *top_border_m1 = top_border-32;     // for TL prediction
+    src_y  -=   linesize;
+    src_cb -= uvlinesize;
+    src_cr -= uvlinesize;
+
+#define XCHG(a,b,xchg)\
+if (xchg) AV_SWAP64(b,a);\
+else      AV_COPY64(b,a);
+
+    XCHG(top_border_m1+8, src_y-8, xchg);
+    XCHG(top_border,      src_y,   xchg);
+    XCHG(top_border+8,    src_y+8, 1);
+    if (mb_x < mb_width-1)
+        XCHG(top_border+32, src_y+16, 1);
+
+    // only copy chroma for normal loop filter
+    // or to initialize the top row to 127
+    if (!simple || !mb_y) {
+        XCHG(top_border_m1+16, src_cb-8, xchg);
+        XCHG(top_border_m1+24, src_cr-8, xchg);
+        XCHG(top_border+16,    src_cb, 1);
+        XCHG(top_border+24,    src_cr, 1);
+    }
+}
+
 static int check_intra_pred_mode(int mode, int mb_x, int mb_y)
 {
     if (mode == DC_PRED8x8) {
@@ -870,6 +920,13 @@
 {
     int x, y, mode, nnz, tr;
 
+    // for the first row, we need to run xchg_mb_border to init the top edge to 127
+    // otherwise, skip it if we aren't going to deblock
+    if (s->deblock_filter || !mb_y)
+        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
+                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
+                       s->filter.simple, 1);
+
     if (mb->mode < MODE_I4x4) {
         mode = check_intra_pred_mode(mb->mode, mb_x, mb_y);
         s->hpc.pred16x16[mode](dst[0], s->linesize);
@@ -913,6 +970,11 @@
     mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y);
     s->hpc.pred8x8[mode](dst[1], s->uvlinesize);
     s->hpc.pred8x8[mode](dst[2], s->uvlinesize);
+
+    if (s->deblock_filter || !mb_y)
+        xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2],
+                       s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width,
+                       s->filter.simple, 0);
 }
 
 /**
@@ -1171,7 +1233,6 @@
     }
 }
 
-// TODO: look at backup_mb_border / xchg_mb_border in h264.c
 static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y)
 {
     int filter_level, inner_limit, hev_thresh, mbedge_lim, bedge_lim;
@@ -1251,6 +1312,7 @@
     int mb_x;
 
     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
+        backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
         filter_mb(s, dst, mb++, mb_x, mb_y);
         dst[0] += 16;
         dst[1] += 8;
@@ -1265,6 +1327,7 @@
     int mb_x;
 
     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
+        backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
         filter_mb_simple(s, dst, mb++, mb_x, mb_y);
         dst += 16;
     }
@@ -1291,6 +1354,7 @@
         s->invisible = 1;
         goto skip_decode;
     }
+    s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh;
 
     for (i = 0; i < 4; i++)
         if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] &&
@@ -1329,11 +1393,7 @@
     memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz));
 
     // top edge of 127 for intra prediction
-    if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) {
-        memset(curframe->data[0] - s->linesize  -1, 127, s->linesize  +1);
-        memset(curframe->data[1] - s->uvlinesize-1, 127, s->uvlinesize+1);
-        memset(curframe->data[2] - s->uvlinesize-1, 127, s->uvlinesize+1);
-    }
+    memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border));
 
     for (mb_y = 0; mb_y < s->mb_height; mb_y++) {
         VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)];
@@ -1352,6 +1412,8 @@
             for (i = 0; i < 3; i++)
                 for (y = 0; y < 16>>!!i; y++)
                     dst[i][y*curframe->linesize[i]-1] = 129;
+        if (mb_y)
+            memset(s->top_border, 129, sizeof(*s->top_border));
 
         for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
             decode_mb_mode(s, mb, mb_x, mb_y, intra4x4 + 4*mb_x);
@@ -1388,19 +1450,13 @@
             dst[2] += 8;
             mb++;
         }
-        if (mb_y && s->filter.level && avctx->skip_loop_filter < skip_thresh) {
+        if (s->deblock_filter) {
             if (s->filter.simple)
-                filter_mb_row_simple(s, mb_y-1);
+                filter_mb_row_simple(s, mb_y);
             else
-                filter_mb_row(s, mb_y-1);
+                filter_mb_row(s, mb_y);
         }
     }
-    if (s->filter.level && avctx->skip_loop_filter < skip_thresh) {
-        if (s->filter.simple)
-            filter_mb_row_simple(s, mb_y-1);
-        else
-            filter_mb_row(s, mb_y-1);
-    }
 
 skip_decode:
     // if future frames don't use the updated probabilities,