# HG changeset patch # User conrad # Date 1279264835 0 # Node ID 6f0db2eeaf707bc371f6f384da47e8bf3968ad7c # Parent 7501f327cfd1968e08dccb28890d76e233666889 vp8: Save mb border needed for intra prediction so that loop filter can run immediately after a mb row is decoded diff -r 7501f327cfd1 -r 6f0db2eeaf70 vp8.c --- a/vp8.c Fri Jul 16 07:20:31 2010 +0000 +++ b/vp8.c Fri Jul 16 07:20:35 2010 +0000 @@ -62,6 +62,7 @@ int update_last; ///< update VP56_FRAME_PREVIOUS with the current one int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so int update_altref; + int deblock_filter; /** * If this flag is not set, all the probability updates @@ -85,6 +86,12 @@ int b4_stride; /** + * Cache of the top row needed for intra prediction + * 16 for luma, 8 for each chroma plane + */ + uint8_t (*top_border)[16+8+8]; + + /** * For coeff decode, we need to know whether the above block had non-zero * coefficients. This means for each macroblock, we need data for 4 luma * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 @@ -197,6 +204,7 @@ av_freep(&s->intra4x4_pred_mode_base); av_freep(&s->top_nnz); av_freep(&s->edge_emu_buffer); + av_freep(&s->top_border); s->macroblocks = NULL; s->intra4x4_pred_mode = NULL; @@ -224,8 +232,9 @@ s->macroblocks_base = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks)); s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1)); s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); + s->top_border = av_mallocz((s->mb_width+1)*sizeof(*s->top_border)); - if (!s->macroblocks_base || !s->intra4x4_pred_mode_base || !s->top_nnz) + if (!s->macroblocks_base || !s->intra4x4_pred_mode_base || !s->top_nnz || !s->top_border) return AVERROR(ENOMEM); s->macroblocks = s->macroblocks_base + 1 + s->mb_stride; @@ -852,6 +861,47 @@ mb->skip = 1; } +static av_always_inline +void backup_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, + int linesize, int uvlinesize, int simple) +{ + AV_COPY128(top_border, src_y + 15*linesize); + if (!simple) { + AV_COPY64(top_border+16, src_cb + 7*uvlinesize); + AV_COPY64(top_border+24, src_cr + 7*uvlinesize); + } +} + +static av_always_inline +void xchg_mb_border(uint8_t *top_border, uint8_t *src_y, uint8_t *src_cb, uint8_t *src_cr, + int linesize, int uvlinesize, int mb_x, int mb_y, int mb_width, + int simple, int xchg) +{ + uint8_t *top_border_m1 = top_border-32; // for TL prediction + src_y -= linesize; + src_cb -= uvlinesize; + src_cr -= uvlinesize; + +#define XCHG(a,b,xchg)\ +if (xchg) AV_SWAP64(b,a);\ +else AV_COPY64(b,a); + + XCHG(top_border_m1+8, src_y-8, xchg); + XCHG(top_border, src_y, xchg); + XCHG(top_border+8, src_y+8, 1); + if (mb_x < mb_width-1) + XCHG(top_border+32, src_y+16, 1); + + // only copy chroma for normal loop filter + // or to initialize the top row to 127 + if (!simple || !mb_y) { + XCHG(top_border_m1+16, src_cb-8, xchg); + XCHG(top_border_m1+24, src_cr-8, xchg); + XCHG(top_border+16, src_cb, 1); + XCHG(top_border+24, src_cr, 1); + } +} + static int check_intra_pred_mode(int mode, int mb_x, int mb_y) { if (mode == DC_PRED8x8) { @@ -870,6 +920,13 @@ { int x, y, mode, nnz, tr; + // for the first row, we need to run xchg_mb_border to init the top edge to 127 + // otherwise, skip it if we aren't going to deblock + if (s->deblock_filter || !mb_y) + xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], + s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, + s->filter.simple, 1); + if (mb->mode < MODE_I4x4) { mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); s->hpc.pred16x16[mode](dst[0], s->linesize); @@ -913,6 +970,11 @@ mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); s->hpc.pred8x8[mode](dst[1], s->uvlinesize); s->hpc.pred8x8[mode](dst[2], s->uvlinesize); + + if (s->deblock_filter || !mb_y) + xchg_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], + s->linesize, s->uvlinesize, mb_x, mb_y, s->mb_width, + s->filter.simple, 0); } /** @@ -1171,7 +1233,6 @@ } } -// TODO: look at backup_mb_border / xchg_mb_border in h264.c static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y) { int filter_level, inner_limit, hev_thresh, mbedge_lim, bedge_lim; @@ -1251,6 +1312,7 @@ int mb_x; for (mb_x = 0; mb_x < s->mb_width; mb_x++) { + backup_mb_border(s->top_border[mb_x+1], dst[0], dst[1], dst[2], s->linesize, s->uvlinesize, 0); filter_mb(s, dst, mb++, mb_x, mb_y); dst[0] += 16; dst[1] += 8; @@ -1265,6 +1327,7 @@ int mb_x; for (mb_x = 0; mb_x < s->mb_width; mb_x++) { + backup_mb_border(s->top_border[mb_x+1], dst, NULL, NULL, s->linesize, 0, 1); filter_mb_simple(s, dst, mb++, mb_x, mb_y); dst += 16; } @@ -1291,6 +1354,7 @@ s->invisible = 1; goto skip_decode; } + s->deblock_filter = s->filter.level && avctx->skip_loop_filter < skip_thresh; for (i = 0; i < 4; i++) if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && @@ -1329,11 +1393,7 @@ memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); // top edge of 127 for intra prediction - if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { - memset(curframe->data[0] - s->linesize -1, 127, s->linesize +1); - memset(curframe->data[1] - s->uvlinesize-1, 127, s->uvlinesize+1); - memset(curframe->data[2] - s->uvlinesize-1, 127, s->uvlinesize+1); - } + memset(s->top_border, 127, (s->mb_width+1)*sizeof(*s->top_border)); for (mb_y = 0; mb_y < s->mb_height; mb_y++) { VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; @@ -1352,6 +1412,8 @@ for (i = 0; i < 3; i++) for (y = 0; y < 16>>!!i; y++) dst[i][y*curframe->linesize[i]-1] = 129; + if (mb_y) + memset(s->top_border, 129, sizeof(*s->top_border)); for (mb_x = 0; mb_x < s->mb_width; mb_x++) { decode_mb_mode(s, mb, mb_x, mb_y, intra4x4 + 4*mb_x); @@ -1388,19 +1450,13 @@ dst[2] += 8; mb++; } - if (mb_y && s->filter.level && avctx->skip_loop_filter < skip_thresh) { + if (s->deblock_filter) { if (s->filter.simple) - filter_mb_row_simple(s, mb_y-1); + filter_mb_row_simple(s, mb_y); else - filter_mb_row(s, mb_y-1); + filter_mb_row(s, mb_y); } } - if (s->filter.level && avctx->skip_loop_filter < skip_thresh) { - if (s->filter.simple) - filter_mb_row_simple(s, mb_y-1); - else - filter_mb_row(s, mb_y-1); - } skip_decode: // if future frames don't use the updated probabilities,