changeset 12222:7acdbfd2a222 libavcodec

Calculate deblock strength per-MB instead of per-row Gives better cache locality, since the VP8Macroblock structs are still in cache. Inspired by the way x264 does it.
author darkshikari
date Thu, 22 Jul 2010 07:24:22 +0000
parents 45852dac8338
children 93e27a5401de
files vp8.c
diffstat 1 files changed, 38 insertions(+), 27 deletions(-) [+]
line wrap: on
line diff
--- a/vp8.c	Thu Jul 22 07:04:45 2010 +0000
+++ b/vp8.c	Thu Jul 22 07:24:22 2010 +0000
@@ -29,6 +29,11 @@
 #include "rectangle.h"
 
 typedef struct {
+    uint8_t filter_level;
+    uint8_t inner_limit;
+} VP8FilterStrength;
+
+typedef struct {
     uint8_t segment;
     uint8_t skip;
     // todo: make it possible to check for at least (i4x4 or split_mv)
@@ -79,6 +84,7 @@
 
     VP8Macroblock *macroblocks;
     VP8Macroblock *macroblocks_base;
+    VP8FilterStrength *filter_strength;
     int mb_stride;
 
     uint8_t *intra4x4_pred_mode;
@@ -231,11 +237,12 @@
     s->b4_stride = 4*s->mb_stride;
 
     s->macroblocks_base        = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks));
+    s->filter_strength         = av_mallocz(s->mb_stride*sizeof(*s->filter_strength));
     s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1));
     s->top_nnz                 = av_mallocz(s->mb_width*sizeof(*s->top_nnz));
     s->top_border              = av_mallocz((s->mb_width+1)*sizeof(*s->top_border));
 
-    if (!s->macroblocks_base || !s->intra4x4_pred_mode_base || !s->top_nnz || !s->top_border)
+    if (!s->macroblocks_base || !s->filter_strength || !s->intra4x4_pred_mode_base || !s->top_nnz || !s->top_border)
         return AVERROR(ENOMEM);
 
     s->macroblocks        = s->macroblocks_base        + 1 + s->mb_stride;
@@ -1212,7 +1219,7 @@
     }
 }
 
-static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, int *level, int *inner, int *hev_thresh)
+static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, VP8FilterStrength *f )
 {
     int interior_limit, filter_level;
 
@@ -1247,34 +1254,32 @@
     }
     interior_limit = FFMAX(interior_limit, 1);
 
-    *level = filter_level;
-    *inner = interior_limit;
-
-    if (hev_thresh) {
-        *hev_thresh = filter_level >= 15;
-
-        if (s->keyframe) {
-            if (filter_level >= 40)
-                *hev_thresh = 2;
-        } else {
-            if (filter_level >= 40)
-                *hev_thresh = 3;
-            else if (filter_level >= 20)
-                *hev_thresh = 2;
-        }
-    }
+    f->filter_level = filter_level;
+    f->inner_limit = interior_limit;
 }
 
-static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y)
+static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, VP8FilterStrength *f, int mb_x, int mb_y)
 {
-    int filter_level, inner_limit, hev_thresh, mbedge_lim, bedge_lim;
+    int mbedge_lim, bedge_lim, hev_thresh;
+    int filter_level = f->filter_level;
+    int inner_limit = f->inner_limit;
 
-    filter_level_for_mb(s, mb, &filter_level, &inner_limit, &hev_thresh);
     if (!filter_level)
         return;
 
     mbedge_lim = 2*(filter_level+2) + inner_limit;
      bedge_lim = 2* filter_level    + inner_limit;
+    hev_thresh = filter_level >= 15;
+
+    if (s->keyframe) {
+        if (filter_level >= 40)
+            hev_thresh = 2;
+    } else {
+        if (filter_level >= 40)
+            hev_thresh = 3;
+        else if (filter_level >= 20)
+            hev_thresh = 2;
+    }
 
     if (mb_x) {
         s->vp8dsp.vp8_h_loop_filter16y(dst[0],     s->linesize,
@@ -1319,11 +1324,12 @@
     }
 }
 
-static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, int mb_x, int mb_y)
+static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, VP8FilterStrength *f, int mb_x, int mb_y)
 {
-    int filter_level, inner_limit, mbedge_lim, bedge_lim;
+    int mbedge_lim, bedge_lim;
+    int filter_level = f->filter_level;
+    int inner_limit = f->inner_limit;
 
-    filter_level_for_mb(s, mb, &filter_level, &inner_limit, NULL);
     if (!filter_level)
         return;
 
@@ -1349,6 +1355,7 @@
 
 static void filter_mb_row(VP8Context *s, int mb_y)
 {
+    VP8FilterStrength *f = s->filter_strength;
     VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride;
     uint8_t *dst[3] = {
         s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize,
@@ -1359,7 +1366,7 @@
 
     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
         backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0);
-        filter_mb(s, dst, mb++, mb_x, mb_y);
+        filter_mb(s, dst, mb++, f++, mb_x, mb_y);
         dst[0] += 16;
         dst[1] += 8;
         dst[2] += 8;
@@ -1368,13 +1375,14 @@
 
 static void filter_mb_row_simple(VP8Context *s, int mb_y)
 {
+    VP8FilterStrength *f = s->filter_strength;
+    VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride;
     uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize;
-    VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride;
     int mb_x;
 
     for (mb_x = 0; mb_x < s->mb_width; mb_x++) {
         backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1);
-        filter_mb_simple(s, dst, mb++, mb_x, mb_y);
+        filter_mb_simple(s, dst, mb++, f++, mb_x, mb_y);
         dst += 16;
     }
 }
@@ -1497,6 +1505,9 @@
                 }
             }
 
+            if (s->deblock_filter)
+                filter_level_for_mb(s, mb, &s->filter_strength[mb_x]);
+
             dst[0] += 16;
             dst[1] += 8;
             dst[2] += 8;