# HG changeset patch # User rbultje # Date 1277234649 0 # Node ID f2007d7c3f1dfa00c84dbdb1b5a6c263e4398d8d # Parent 7d04a6cec75f613a79b0fc2c24af78c1192e55a0 Native VP8 decoder. Patch by David Conrad and myself. diff -r 7d04a6cec75f -r f2007d7c3f1d Makefile --- a/Makefile Tue Jun 22 19:19:13 2010 +0000 +++ b/Makefile Tue Jun 22 19:24:09 2010 +0000 @@ -375,6 +375,8 @@ vp3dsp.o OBJS-$(CONFIG_VP6_DECODER) += vp6.o vp56.o vp56data.o vp56dsp.o \ vp3dsp.o vp6dsp.o huffman.o +OBJS-$(CONFIG_VP8_DECODER) += vp8.o vp8dsp.o vp56.o vp56data.o \ + h264pred.o OBJS-$(CONFIG_VQA_DECODER) += vqavideo.o OBJS-$(CONFIG_WAVPACK_DECODER) += wavpack.o OBJS-$(CONFIG_WMAPRO_DECODER) += wmaprodec.o wma.o diff -r 7d04a6cec75f -r f2007d7c3f1d allcodecs.c --- a/allcodecs.c Tue Jun 22 19:19:13 2010 +0000 +++ b/allcodecs.c Tue Jun 22 19:24:09 2010 +0000 @@ -201,6 +201,7 @@ REGISTER_DECODER (VP6, vp6); REGISTER_DECODER (VP6A, vp6a); REGISTER_DECODER (VP6F, vp6f); + REGISTER_DECODER (VP8, vp8); REGISTER_DECODER (VQA, vqa); REGISTER_ENCDEC (WMV1, wmv1); REGISTER_ENCDEC (WMV2, wmv2); diff -r 7d04a6cec75f -r f2007d7c3f1d avcodec.h --- a/avcodec.h Tue Jun 22 19:19:13 2010 +0000 +++ b/avcodec.h Tue Jun 22 19:24:09 2010 +0000 @@ -30,8 +30,8 @@ #include "libavutil/avutil.h" #define LIBAVCODEC_VERSION_MAJOR 52 -#define LIBAVCODEC_VERSION_MINOR 77 -#define LIBAVCODEC_VERSION_MICRO 1 +#define LIBAVCODEC_VERSION_MINOR 78 +#define LIBAVCODEC_VERSION_MICRO 0 #define LIBAVCODEC_VERSION_INT AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \ LIBAVCODEC_VERSION_MINOR, \ diff -r 7d04a6cec75f -r f2007d7c3f1d dsputil.c --- a/dsputil.c Tue Jun 22 19:19:13 2010 +0000 +++ b/dsputil.c Tue Jun 22 19:24:09 2010 +0000 @@ -39,6 +39,7 @@ #include "ac3dec.h" #include "vorbis.h" #include "png.h" +#include "vp8dsp.h" uint8_t ff_cropTbl[256 + 2 * MAX_NEG_CROP] = {0, }; uint32_t ff_squareTbl[512] = {0, }; @@ -2656,6 +2657,18 @@ } #endif /* CONFIG_RV40_DECODER */ +#if CONFIG_VP8_DECODER +void ff_put_vp8_pixels16_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) { + put_pixels16_c(dst, src, stride, h); +} +void ff_put_vp8_pixels8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) { + put_pixels8_c(dst, src, stride, h); +} +void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y) { + put_pixels4_c(dst, src, stride, h); +} +#endif + static void wmv2_mspel8_v_lowpass(uint8_t *dst, uint8_t *src, int dstStride, int srcStride, int w){ uint8_t *cm = ff_cropTbl + MAX_NEG_CROP; int i; diff -r 7d04a6cec75f -r f2007d7c3f1d vp56.h --- a/vp56.h Tue Jun 22 19:19:13 2010 +0000 +++ b/vp56.h Tue Jun 22 19:24:09 2010 +0000 @@ -237,6 +237,12 @@ return bit; } +// rounding is different than vp56_rac_get, is vp56_rac_get wrong? +static inline int vp8_rac_get(VP56RangeCoder *c) +{ + return vp56_rac_get_prob(c, 128); +} + static inline int vp56_rac_gets(VP56RangeCoder *c, int bits) { int value = 0; @@ -248,12 +254,46 @@ return value; } +static inline int vp8_rac_get_uint(VP56RangeCoder *c, int bits) +{ + int value = 0; + + while (bits--) { + value = (value << 1) | vp8_rac_get(c); + } + + return value; +} + +// fixme: add 1 bit to all the calls to this? +static inline int vp8_rac_get_sint(VP56RangeCoder *c, int bits) +{ + int v; + + if (!vp8_rac_get(c)) + return 0; + + v = vp8_rac_get_uint(c, bits); + + if (vp8_rac_get(c)) + v = -v; + + return v; +} + +// P(7) static inline int vp56_rac_gets_nn(VP56RangeCoder *c, int bits) { int v = vp56_rac_gets(c, 7) << 1; return v + !v; } +static inline int vp8_rac_get_nn(VP56RangeCoder *c) +{ + int v = vp8_rac_get_uint(c, 7) << 1; + return v + !v; +} + static inline int vp56_rac_get_tree(VP56RangeCoder *c, const VP56Tree *tree, const uint8_t *probs) @@ -267,4 +307,39 @@ return -tree->val; } +/** + * This is identical to vp8_rac_get_tree except for the possibility of starting + * on a node other than the root node, needed for coeff decode where this is + * used to save a bit after a 0 token (by disallowing EOB to immediately follow.) + */ +static inline int vp8_rac_get_tree_with_offset(VP56RangeCoder *c, const int8_t (*tree)[2], + const uint8_t *probs, int i) +{ + do { + i = tree[i][vp56_rac_get_prob(c, probs[i])]; + } while (i > 0); + + return -i; +} + +// how probabilities are associated with decisions is different I think +// well, the new scheme fits in the old but this way has one fewer branches per decision +static inline int vp8_rac_get_tree(VP56RangeCoder *c, const int8_t (*tree)[2], + const uint8_t *probs) +{ + return vp8_rac_get_tree_with_offset(c, tree, probs, 0); +} + +// DCTextra +static inline int vp8_rac_get_coeff(VP56RangeCoder *c, const uint8_t *prob) +{ + int v = 0; + + do { + v = (v<<1) + vp56_rac_get_prob(c, *prob++); + } while (*prob); + + return v; +} + #endif /* AVCODEC_VP56_H */ diff -r 7d04a6cec75f -r f2007d7c3f1d vp8.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vp8.c Tue Jun 22 19:24:09 2010 +0000 @@ -0,0 +1,1412 @@ +/** + * VP8 compatible video decoder + * + * Copyright (C) 2010 David Conrad + * Copyright (C) 2010 Ronald S. Bultje + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "avcodec.h" +#include "vp56.h" +#include "vp8data.h" +#include "vp8dsp.h" +#include "h264pred.h" +#include "rectangle.h" + +typedef struct { + uint8_t segment; + uint8_t skip; + // todo: make it possible to check for at least (i4x4 or split_mv) + // in one op. are others needed? + uint8_t mode; + uint8_t ref_frame; + uint8_t partitioning; + VP56mv mv; + VP56mv bmv[16]; +} VP8Macroblock; + +typedef struct { + AVCodecContext *avctx; + DSPContext dsp; + VP8DSPContext vp8dsp; + H264PredContext hpc; + AVFrame frames[4]; + AVFrame *framep[4]; + uint8_t *edge_emu_buffer; + VP56RangeCoder c; ///< header context, includes mb modes and motion vectors + int profile; + + int mb_width; /* number of horizontal MB */ + int mb_height; /* number of vertical MB */ + int linesize; + int uvlinesize; + + int keyframe; + int invisible; + int update_last; ///< update VP56_FRAME_PREVIOUS with the current one + int update_golden; ///< VP56_FRAME_NONE if not updated, or which frame to copy if so + int update_altref; + + /** + * If this flag is not set, all the probability updates + * are discarded after this frame is decoded. + */ + int update_probabilities; + + /** + * All coefficients are contained in separate arith coding contexts. + * There can be 1, 2, 4, or 8 of these after the header context. + */ + int num_coeff_partitions; + VP56RangeCoder coeff_partition[8]; + + VP8Macroblock *macroblocks; + VP8Macroblock *macroblocks_base; + int mb_stride; + + uint8_t *intra4x4_pred_mode; + uint8_t *intra4x4_pred_mode_base; + int b4_stride; + + /** + * For coeff decode, we need to know whether the above block had non-zero + * coefficients. This means for each macroblock, we need data for 4 luma + * blocks, 2 u blocks, 2 v blocks, and the luma dc block, for a total of 9 + * per macroblock. We keep the last row in top_nnz. + */ + uint8_t (*top_nnz)[9]; + DECLARE_ALIGNED(8, uint8_t, left_nnz)[9]; + + /** + * This is the index plus one of the last non-zero coeff + * for each of the blocks in the current macroblock. + * So, 0 -> no coeffs + * 1 -> dc-only (special transform) + * 2+-> full transform + */ + DECLARE_ALIGNED(16, uint8_t, non_zero_count_cache)[6][4]; + DECLARE_ALIGNED(16, DCTELEM, block)[6][4][16]; + + int chroma_pred_mode; ///< 8x8c pred mode of the current macroblock + + int mbskip_enabled; + int sign_bias[4]; ///< one state [0, 1] per ref frame type + + /** + * Base parameters for segmentation, i.e. per-macroblock parameters. + * These must be kept unchanged even if segmentation is not used for + * a frame, since the values persist between interframes. + */ + struct { + int enabled; + int absolute_vals; + int update_map; + int8_t base_quant[4]; + int8_t filter_level[4]; ///< base loop filter level + } segmentation; + + /** + * Macroblocks can have one of 4 different quants in a frame when + * segmentation is enabled. + * If segmentation is disabled, only the first segment's values are used. + */ + struct { + // [0] - DC qmul [1] - AC qmul + int16_t luma_qmul[2]; + int16_t luma_dc_qmul[2]; ///< luma dc-only block quant + int16_t chroma_qmul[2]; + } qmat[4]; + + struct { + int simple; + int level; + int sharpness; + } filter; + + struct { + int enabled; ///< whether each mb can have a different strength based on mode/ref + + /** + * filter strength adjustment for the following macroblock modes: + * [0] - i4x4 + * [1] - zero mv + * [2] - inter modes except for zero or split mv + * [3] - split mv + * i16x16 modes never have any adjustment + */ + int8_t mode[4]; + + /** + * filter strength adjustment for macroblocks that reference: + * [0] - intra / VP56_FRAME_CURRENT + * [1] - VP56_FRAME_PREVIOUS + * [2] - VP56_FRAME_GOLDEN + * [3] - altref / VP56_FRAME_GOLDEN2 + */ + int8_t ref[4]; + } lf_delta; + + /** + * These are all of the updatable probabilities for binary decisions. + * They are only implictly reset on keyframes, making it quite likely + * for an interframe to desync if a prior frame's header was corrupt + * or missing outright! + */ + struct { + uint8_t segmentid[3]; + uint8_t mbskip; + uint8_t intra; + uint8_t last; + uint8_t golden; + uint8_t pred16x16[4]; + uint8_t pred8x8c[3]; + uint8_t token[4][8][3][NUM_DCT_TOKENS-1]; + uint8_t mvc[2][19]; + } prob[2]; +} VP8Context; + +#define RL24(p) (AV_RL16(p) + ((p)[2] << 16)) + +static void vp8_decode_flush(AVCodecContext *avctx) +{ + VP8Context *s = avctx->priv_data; + int i; + + for (i = 0; i < 4; i++) + if (s->frames[i].data[0]) + avctx->release_buffer(avctx, &s->frames[i]); + memset(s->framep, 0, sizeof(s->framep)); + + av_freep(&s->macroblocks_base); + av_freep(&s->intra4x4_pred_mode_base); + av_freep(&s->top_nnz); + av_freep(&s->edge_emu_buffer); + + s->macroblocks = NULL; + s->intra4x4_pred_mode = NULL; +} + +static int update_dimensions(VP8Context *s, int width, int height) +{ + int i; + + if (avcodec_check_dimensions(s->avctx, width, height)) + return AVERROR_INVALIDDATA; + + vp8_decode_flush(s->avctx); + + avcodec_set_dimensions(s->avctx, width, height); + + s->mb_width = (s->avctx->coded_width +15) / 16; + s->mb_height = (s->avctx->coded_height+15) / 16; + + // we allocate a border around the top/left of intra4x4 modes + // this is 4 blocks for intra4x4 to keep 4-byte alignment for fill_rectangle + s->mb_stride = s->mb_width+1; + s->b4_stride = 4*s->mb_stride; + + s->macroblocks_base = av_mallocz(s->mb_stride*(s->mb_height+1)*sizeof(*s->macroblocks)); + s->intra4x4_pred_mode_base = av_mallocz(s->b4_stride*(4*s->mb_height+1)); + s->top_nnz = av_mallocz(s->mb_width*sizeof(*s->top_nnz)); + + s->macroblocks = s->macroblocks_base + 1 + s->mb_stride; + s->intra4x4_pred_mode = s->intra4x4_pred_mode_base + 4 + s->b4_stride; + + memset(s->intra4x4_pred_mode_base, DC_PRED, s->b4_stride); + for (i = 0; i < 4*s->mb_height; i++) + s->intra4x4_pred_mode[i*s->b4_stride-1] = DC_PRED; + + return 0; +} + +static void parse_segment_info(VP8Context *s) +{ + VP56RangeCoder *c = &s->c; + int i; + + s->segmentation.update_map = vp8_rac_get(c); + + if (vp8_rac_get(c)) { // update segment feature data + s->segmentation.absolute_vals = vp8_rac_get(c); + + for (i = 0; i < 4; i++) + s->segmentation.base_quant[i] = vp8_rac_get_sint(c, 7); + + for (i = 0; i < 4; i++) + s->segmentation.filter_level[i] = vp8_rac_get_sint(c, 6); + } + if (s->segmentation.update_map) + for (i = 0; i < 3; i++) + s->prob->segmentid[i] = vp8_rac_get(c) ? vp8_rac_get_uint(c, 8) : 255; +} + +static void update_lf_deltas(VP8Context *s) +{ + VP56RangeCoder *c = &s->c; + int i; + + for (i = 0; i < 4; i++) + s->lf_delta.ref[i] = vp8_rac_get_sint(c, 6); + + for (i = 0; i < 4; i++) + s->lf_delta.mode[i] = vp8_rac_get_sint(c, 6); +} + +static int setup_partitions(VP8Context *s, const uint8_t *buf, int buf_size) +{ + const uint8_t *sizes = buf; + int i; + + s->num_coeff_partitions = 1 << vp8_rac_get_uint(&s->c, 2); + + buf += 3*(s->num_coeff_partitions-1); + buf_size -= 3*(s->num_coeff_partitions-1); + if (buf_size < 0) + return -1; + + for (i = 0; i < s->num_coeff_partitions-1; i++) { + int size = RL24(sizes + 3*i); + if (buf_size - size < 0) + return -1; + + vp56_init_range_decoder(&s->coeff_partition[i], buf, size); + buf += size; + buf_size -= size; + } + vp56_init_range_decoder(&s->coeff_partition[i], buf, buf_size); + + return 0; +} + +static void get_quants(VP8Context *s) +{ + VP56RangeCoder *c = &s->c; + int i, base_qi; + + int yac_qi = vp8_rac_get_uint(c, 7); + int ydc_delta = vp8_rac_get_sint(c, 4); + int y2dc_delta = vp8_rac_get_sint(c, 4); + int y2ac_delta = vp8_rac_get_sint(c, 4); + int uvdc_delta = vp8_rac_get_sint(c, 4); + int uvac_delta = vp8_rac_get_sint(c, 4); + + for (i = 0; i < 4; i++) { + if (s->segmentation.enabled) { + base_qi = s->segmentation.base_quant[i]; + if (!s->segmentation.absolute_vals) + base_qi += yac_qi; + } else + base_qi = yac_qi; + + s->qmat[i].luma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + ydc_delta , 0, 127)]; + s->qmat[i].luma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi , 0, 127)]; + s->qmat[i].luma_dc_qmul[0] = 2 * vp8_dc_qlookup[av_clip(base_qi + y2dc_delta, 0, 127)]; + s->qmat[i].luma_dc_qmul[1] = 155 * vp8_ac_qlookup[av_clip(base_qi + y2ac_delta, 0, 127)] / 100; + s->qmat[i].chroma_qmul[0] = vp8_dc_qlookup[av_clip(base_qi + uvdc_delta, 0, 127)]; + s->qmat[i].chroma_qmul[1] = vp8_ac_qlookup[av_clip(base_qi + uvac_delta, 0, 127)]; + + s->qmat[i].luma_dc_qmul[1] = FFMAX(s->qmat[i].luma_dc_qmul[1], 8); + s->qmat[i].chroma_qmul[0] = FFMIN(s->qmat[i].chroma_qmul[0], 132); + } +} + +/** + * Determine which buffers golden and altref should be updated with after this frame. + * The spec isn't clear here, so I'm going by my understanding of what libvpx does + * + * Intra frames update all 3 references + * Inter frames update VP56_FRAME_PREVIOUS if the update_last flag is set + * If the update (golden|altref) flag is set, it's updated with the current frame + * if update_last is set, and VP56_FRAME_PREVIOUS otherwise. + * If the flag is not set, the number read means: + * 0: no update + * 1: VP56_FRAME_PREVIOUS + * 2: update golden with altref, or update altref with golden + */ +static VP56Frame ref_to_update(VP8Context *s, int update, VP56Frame ref) +{ + VP56RangeCoder *c = &s->c; + + if (update) + return VP56_FRAME_CURRENT; + + switch (vp8_rac_get_uint(c, 2)) { + case 1: + return VP56_FRAME_PREVIOUS; + case 2: + return (ref == VP56_FRAME_GOLDEN) ? VP56_FRAME_GOLDEN2 : VP56_FRAME_GOLDEN; + } + return VP56_FRAME_NONE; +} + +static void update_refs(VP8Context *s) +{ + VP56RangeCoder *c = &s->c; + + int update_golden = vp8_rac_get(c); + int update_altref = vp8_rac_get(c); + + s->update_golden = ref_to_update(s, update_golden, VP56_FRAME_GOLDEN); + s->update_altref = ref_to_update(s, update_altref, VP56_FRAME_GOLDEN2); +} + +static int decode_frame_header(VP8Context *s, const uint8_t *buf, int buf_size) +{ + VP56RangeCoder *c = &s->c; + int header_size, hscale, vscale, i, j, k, l, ret; + int width = s->avctx->width; + int height = s->avctx->height; + + s->keyframe = !(buf[0] & 1); + s->profile = (buf[0]>>1) & 7; + s->invisible = !(buf[0] & 0x10); + header_size = RL24(buf) >> 5; + buf += 3; + buf_size -= 3; + + if (s->profile) + av_log(s->avctx, AV_LOG_WARNING, "Profile %d not fully handled\n", s->profile); + + if (header_size > buf_size - 7*s->keyframe) { + av_log(s->avctx, AV_LOG_ERROR, "Header size larger than data provided\n"); + return AVERROR_INVALIDDATA; + } + + if (s->keyframe) { + if (RL24(buf) != 0x2a019d) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid start code 0x%x\n", RL24(buf)); + return AVERROR_INVALIDDATA; + } + width = AV_RL16(buf+3) & 0x3fff; + height = AV_RL16(buf+5) & 0x3fff; + hscale = buf[4] >> 6; + vscale = buf[6] >> 6; + buf += 7; + buf_size -= 7; + + s->update_golden = s->update_altref = VP56_FRAME_CURRENT; + memcpy(s->prob->token , vp8_token_default_probs , sizeof(s->prob->token)); + memcpy(s->prob->pred16x16, vp8_pred16x16_prob_inter, sizeof(s->prob->pred16x16)); + memcpy(s->prob->pred8x8c , vp8_pred8x8c_prob_inter , sizeof(s->prob->pred8x8c)); + memcpy(s->prob->mvc , vp8_mv_default_prob , sizeof(s->prob->mvc)); + memset(&s->segmentation, 0, sizeof(s->segmentation)); + } + + if (!s->macroblocks_base || /* first frame */ + width != s->avctx->width || height != s->avctx->height) { + if ((ret = update_dimensions(s, width, height) < 0)) + return ret; + } + + vp56_init_range_decoder(c, buf, header_size); + buf += header_size; + buf_size -= header_size; + + if (s->keyframe) { + if (vp8_rac_get(c)) + av_log(s->avctx, AV_LOG_WARNING, "Unspecified colorspace\n"); + vp8_rac_get(c); // whether we can skip clamping in dsp functions + } + + if ((s->segmentation.enabled = vp8_rac_get(c))) + parse_segment_info(s); + else + s->segmentation.update_map = 0; // FIXME: move this to some init function? + + s->filter.simple = vp8_rac_get(c); + s->filter.level = vp8_rac_get_uint(c, 6); + s->filter.sharpness = vp8_rac_get_uint(c, 3); + + if ((s->lf_delta.enabled = vp8_rac_get(c))) + if (vp8_rac_get(c)) + update_lf_deltas(s); + + if (setup_partitions(s, buf, buf_size)) { + av_log(s->avctx, AV_LOG_ERROR, "Invalid partitions\n"); + return AVERROR_INVALIDDATA; + } + + get_quants(s); + + if (!s->keyframe) { + update_refs(s); + s->sign_bias[VP56_FRAME_GOLDEN] = vp8_rac_get(c); + s->sign_bias[VP56_FRAME_GOLDEN2 /* altref */] = vp8_rac_get(c); + } + + // if we aren't saving this frame's probabilities for future frames, + // make a copy of the current probabilities + if (!(s->update_probabilities = vp8_rac_get(c))) + s->prob[1] = s->prob[0]; + + s->update_last = s->keyframe || vp8_rac_get(c); + + for (i = 0; i < 4; i++) + for (j = 0; j < 8; j++) + for (k = 0; k < 3; k++) + for (l = 0; l < NUM_DCT_TOKENS-1; l++) + if (vp56_rac_get_prob(c, vp8_token_update_probs[i][j][k][l])) + s->prob->token[i][j][k][l] = vp8_rac_get_uint(c, 8); + + if ((s->mbskip_enabled = vp8_rac_get(c))) + s->prob->mbskip = vp8_rac_get_uint(c, 8); + + if (!s->keyframe) { + s->prob->intra = vp8_rac_get_uint(c, 8); + s->prob->last = vp8_rac_get_uint(c, 8); + s->prob->golden = vp8_rac_get_uint(c, 8); + + if (vp8_rac_get(c)) + for (i = 0; i < 4; i++) + s->prob->pred16x16[i] = vp8_rac_get_uint(c, 8); + if (vp8_rac_get(c)) + for (i = 0; i < 3; i++) + s->prob->pred8x8c[i] = vp8_rac_get_uint(c, 8); + + // 17.2 MV probability update + for (i = 0; i < 2; i++) + for (j = 0; j < 19; j++) + if (vp56_rac_get_prob(c, vp8_mv_update_prob[i][j])) + s->prob->mvc[i][j] = vp8_rac_get_nn(c); + } + + return 0; +} + +static inline void clamp_mv(VP8Context *s, VP56mv *dst, const VP56mv *src, + int mb_x, int mb_y) +{ +#define MARGIN (16 << 2) + dst->x = av_clip(src->x, -((mb_x << 6) + MARGIN), + ((s->mb_width - 1 - mb_x) << 6) + MARGIN); + dst->y = av_clip(src->y, -((mb_y << 6) + MARGIN), + ((s->mb_height - 1 - mb_y) << 6) + MARGIN); +} + +static void find_near_mvs(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, + VP56mv near[2], VP56mv *best, int cnt[4]) +{ + VP8Macroblock *mb_edge[3] = { mb - s->mb_stride /* top */, + mb - 1 /* left */, + mb - s->mb_stride - 1 /* top-left */ }; + enum { EDGE_TOP, EDGE_LEFT, EDGE_TOPLEFT }; + VP56mv near_mv[4] = {{ 0 }}; + enum { CNT_ZERO, CNT_NEAREST, CNT_NEAR, CNT_SPLITMV }; + int idx = CNT_ZERO, n; + int best_idx = CNT_ZERO; + + /* Process MB on top, left and top-left */ + for (n = 0; n < 3; n++) { + VP8Macroblock *edge = mb_edge[n]; + if (edge->ref_frame != VP56_FRAME_CURRENT) { + if (edge->mv.x | edge->mv.y) { + VP56mv tmp = edge->mv; + if (s->sign_bias[mb->ref_frame] != s->sign_bias[edge->ref_frame]) { + tmp.x *= -1; + tmp.y *= -1; + } + if ((tmp.x ^ near_mv[idx].x) | (tmp.y ^ near_mv[idx].y)) + near_mv[++idx] = tmp; + cnt[idx] += 1 + (n != 2); + } else + cnt[CNT_ZERO] += 1 + (n != 2); + } + } + + /* If we have three distinct MV's, merge first and last if they're the same */ + if (cnt[CNT_SPLITMV] && + !((near_mv[1+EDGE_TOP].x ^ near_mv[1+EDGE_TOPLEFT].x) | + (near_mv[1+EDGE_TOP].y ^ near_mv[1+EDGE_TOPLEFT].y))) + cnt[CNT_NEAREST] += 1; + + cnt[CNT_SPLITMV] = ((mb_edge[EDGE_LEFT]->mode == VP8_MVMODE_SPLIT) + + (mb_edge[EDGE_TOP]->mode == VP8_MVMODE_SPLIT)) * 2 + + (mb_edge[EDGE_TOPLEFT]->mode == VP8_MVMODE_SPLIT); + + /* Swap near and nearest if necessary */ + if (cnt[CNT_NEAR] > cnt[CNT_NEAREST]) { + FFSWAP(int, cnt[CNT_NEAREST], cnt[CNT_NEAR]); + FFSWAP(VP56mv, near_mv[CNT_NEAREST], near_mv[CNT_NEAR]); + } + + /* Choose the best mv out of 0,0 and the nearest mv */ + if (cnt[CNT_NEAREST] >= cnt[CNT_ZERO]) + best_idx = CNT_NEAREST; + + clamp_mv(s, best, &near_mv[best_idx], mb_x, mb_y); + near[0] = near_mv[CNT_NEAREST]; + near[1] = near_mv[CNT_NEAR]; +} + +/** + * Motion vector coding, 17.1. + */ +static int read_mv_component(VP56RangeCoder *c, const uint8_t *p) +{ + int x = 0; + + if (vp56_rac_get_prob(c, p[0])) { + int i; + + for (i = 0; i < 3; i++) + x += vp56_rac_get_prob(c, p[9 + i]) << i; + for (i = 9; i > 3; i--) + x += vp56_rac_get_prob(c, p[9 + i]) << i; + if (!(x & 0xFFF0) || vp56_rac_get_prob(c, p[12])) + x += 8; + } else + x = vp8_rac_get_tree(c, vp8_small_mvtree, &p[2]); + + return (x && vp56_rac_get_prob(c, p[1])) ? -x : x; +} + +static const uint8_t *get_submv_prob(const VP56mv *left, const VP56mv *top) +{ + int l_is_zero = !(left->x | left->y); + int t_is_zero = !(top->x | top->y); + int equal = !((left->x ^ top->x) | (left->y ^ top->y)); + + if (equal) + return l_is_zero ? vp8_submv_prob[4] : vp8_submv_prob[3]; + if (t_is_zero) + return vp8_submv_prob[2]; + return l_is_zero ? vp8_submv_prob[1] : vp8_submv_prob[0]; +} + +/** + * Split motion vector prediction, 16.4. + */ +static void decode_splitmvs(VP8Context *s, VP56RangeCoder *c, + VP8Macroblock *mb, VP56mv *base_mv) +{ + int part_idx = mb->partitioning = + vp8_rac_get_tree(c, vp8_mbsplit_tree, vp8_mbsplit_prob); + int n, num = vp8_mbsplit_count[part_idx]; + VP56mv part_mv[16]; + + for (n = 0; n < num; n++) { + int k = vp8_mbfirstidx[part_idx][n]; + const VP56mv *left = (k & 3) ? &mb->bmv[k - 1] : &mb[-1].bmv[k + 3], + *above = (k > 3) ? &mb->bmv[k - 4] : &mb[-s->mb_stride].bmv[k + 12]; + const uint8_t *submv_prob = get_submv_prob(left, above); + + switch (vp8_rac_get_tree(c, vp8_submv_ref_tree, submv_prob)) { + case VP8_SUBMVMODE_NEW4X4: + part_mv[n].y = base_mv->y + read_mv_component(c, s->prob->mvc[0]); + part_mv[n].x = base_mv->x + read_mv_component(c, s->prob->mvc[1]); + break; + case VP8_SUBMVMODE_ZERO4X4: + part_mv[n].x = 0; + part_mv[n].y = 0; + break; + case VP8_SUBMVMODE_LEFT4X4: + part_mv[n] = *left; + break; + case VP8_SUBMVMODE_TOP4X4: + part_mv[n] = *above; + break; + } + + /* fill out over the 4x4 blocks in MB */ + for (k = 0; k < 16; k++) + if (vp8_mbsplits[part_idx][k] == n) { + mb->bmv[k] = part_mv[n]; + } + } +} + +static inline void decode_intra4x4_modes(VP56RangeCoder *c, uint8_t *intra4x4, + int stride, int keyframe) +{ + int x, y, t, l; + const uint8_t *ctx = vp8_pred4x4_prob_inter; + + for (y = 0; y < 4; y++) { + for (x = 0; x < 4; x++) { + if (keyframe) { + t = intra4x4[x - stride]; + l = intra4x4[x - 1]; + ctx = vp8_pred4x4_prob_intra[t][l]; + } + intra4x4[x] = vp8_rac_get_tree(c, vp8_pred4x4_tree, ctx); + } + intra4x4 += stride; + } +} + +static void decode_mb_mode(VP8Context *s, VP8Macroblock *mb, int mb_x, int mb_y, + uint8_t *intra4x4) +{ + VP56RangeCoder *c = &s->c; + int n; + + if (s->segmentation.update_map) + mb->segment = vp8_rac_get_tree(c, vp8_segmentid_tree, s->prob->segmentid); + + mb->skip = s->mbskip_enabled ? vp56_rac_get_prob(c, s->prob->mbskip) : 0; + + if (s->keyframe) { + mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_intra, vp8_pred16x16_prob_intra); + + if (mb->mode == MODE_I4x4) { + decode_intra4x4_modes(c, intra4x4, s->b4_stride, 1); + } else + fill_rectangle(intra4x4, 4, 4, s->b4_stride, vp8_pred4x4_mode[mb->mode], 1); + + s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, vp8_pred8x8c_prob_intra); + mb->ref_frame = VP56_FRAME_CURRENT; + } else if (vp56_rac_get_prob(c, s->prob->intra)) { + VP56mv near[2], best; + int cnt[4] = { 0 }; + uint8_t p[4]; + + // inter MB, 16.2 + if (vp56_rac_get_prob(c, s->prob->last)) + mb->ref_frame = vp56_rac_get_prob(c, s->prob->golden) ? + VP56_FRAME_GOLDEN2 /* altref */ : VP56_FRAME_GOLDEN; + else + mb->ref_frame = VP56_FRAME_PREVIOUS; + + // motion vectors, 16.3 + find_near_mvs(s, mb, mb_x, mb_y, near, &best, cnt); + for (n = 0; n < 4; n++) + p[n] = vp8_mode_contexts[cnt[n]][n]; + mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_mvinter, p); + switch (mb->mode) { + case VP8_MVMODE_SPLIT: + decode_splitmvs(s, c, mb, &best); + mb->mv = mb->bmv[15]; + break; + case VP8_MVMODE_ZERO: + mb->mv.x = 0; + mb->mv.y = 0; + break; + case VP8_MVMODE_NEAREST: + clamp_mv(s, &mb->mv, &near[0], mb_x, mb_y); + break; + case VP8_MVMODE_NEAR: + clamp_mv(s, &mb->mv, &near[1], mb_x, mb_y); + break; + case VP8_MVMODE_NEW: + mb->mv.y = best.y + read_mv_component(c, s->prob->mvc[0]); + mb->mv.x = best.x + read_mv_component(c, s->prob->mvc[1]); + break; + } + if (mb->mode != VP8_MVMODE_SPLIT) { + for (n = 0; n < 16; n++) + mb->bmv[n] = mb->mv; + } + } else { + // intra MB, 16.1 + mb->mode = vp8_rac_get_tree(c, vp8_pred16x16_tree_inter, s->prob->pred16x16); + + if (mb->mode == MODE_I4x4) { + decode_intra4x4_modes(c, intra4x4, s->b4_stride, 0); + } else + fill_rectangle(intra4x4, 4, 4, s->b4_stride, vp8_pred4x4_mode[mb->mode], 1); + + s->chroma_pred_mode = vp8_rac_get_tree(c, vp8_pred8x8c_tree, s->prob->pred8x8c); + mb->ref_frame = VP56_FRAME_CURRENT; + } +} + +/** + * @param i initial coeff index, 0 unless a separate DC block is coded + * @param zero_nhood the initial prediction context for number of surrounding + * all-zero blocks (only left/top, so 0-2) + * @param qmul[0] dc dequant factor + * @param qmul[1] ac dequant factor + * @return 0 if no coeffs were decoded + * otherwise, the index of the last coeff decoded plus one + */ +static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], + uint8_t probs[8][3][NUM_DCT_TOKENS-1], + int i, int zero_nhood, int16_t qmul[2]) +{ + int token, nonzero = 0; + int offset = 0; + + for (; i < 16; i++) { + token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset); + + if (token == DCT_EOB) + break; + else if (token >= DCT_CAT1) { + int cat = token-DCT_CAT1; + token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); + token += vp8_dct_cat_offset[cat]; + } + + // after the first token, the non-zero prediction context becomes + // based on the last decoded coeff + if (!token) { + zero_nhood = 0; + offset = 1; + continue; + } else if (token == 1) + zero_nhood = 1; + else + zero_nhood = 2; + + // todo: full [16] qmat? load into register? + block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i]; + nonzero = i+1; + offset = 0; + } + return nonzero; +} + +static void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, + uint8_t t_nnz[9], uint8_t l_nnz[9]) +{ + LOCAL_ALIGNED_16(DCTELEM, dc,[16]); + int i, x, y, luma_start = 0, luma_ctx = 3; + int nnz_pred, nnz, nnz_total = 0; + int segment = s->segmentation.enabled ? mb->segment : 0; + + s->dsp.clear_blocks((DCTELEM *)s->block); + + if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { + AV_ZERO128(dc); + AV_ZERO128(dc+8); + nnz_pred = t_nnz[8] + l_nnz[8]; + + // decode DC values and do hadamard + nnz = decode_block_coeffs(c, dc, s->prob->token[1], 0, nnz_pred, + s->qmat[segment].luma_dc_qmul); + l_nnz[8] = t_nnz[8] = !!nnz; + nnz_total += nnz; + s->vp8dsp.vp8_luma_dc_wht(s->block, dc); + luma_start = 1; + luma_ctx = 0; + } + + // luma blocks + for (y = 0; y < 4; y++) + for (x = 0; x < 4; x++) { + nnz_pred = l_nnz[y] + t_nnz[x]; + nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, + nnz_pred, s->qmat[segment].luma_qmul); + // nnz+luma_start may be one more than the actual last index, but we don't care + s->non_zero_count_cache[y][x] = nnz + luma_start; + t_nnz[x] = l_nnz[y] = !!nnz; + nnz_total += nnz; + } + + // chroma blocks + // TODO: what to do about dimensions? 2nd dim for luma is x, + // but for chroma it's (y<<1)|x + for (i = 4; i < 6; i++) + for (y = 0; y < 2; y++) + for (x = 0; x < 2; x++) { + nnz_pred = l_nnz[i+2*y] + t_nnz[i+2*x]; + nnz = decode_block_coeffs(c, s->block[i][(y<<1)+x], s->prob->token[2], 0, + nnz_pred, s->qmat[segment].chroma_qmul); + s->non_zero_count_cache[i][(y<<1)+x] = nnz; + t_nnz[i+2*x] = l_nnz[i+2*y] = !!nnz; + nnz_total += nnz; + } + + // if there were no coded coeffs despite the macroblock not being marked skip, + // we MUST not do the inner loop filter and should not do IDCT + // Since skip isn't used for bitstream prediction, just manually set it. + if (!nnz_total) + mb->skip = 1; +} + +static int check_intra_pred_mode(int mode, int mb_x, int mb_y) +{ + if (mode == DC_PRED8x8) { + if (!(mb_x|mb_y)) + mode = DC_128_PRED8x8; + else if (!mb_y) + mode = LEFT_DC_PRED8x8; + else if (!mb_x) + mode = TOP_DC_PRED8x8; + } + return mode; +} + +static void intra_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, + uint8_t *bmode, int mb_x, int mb_y) +{ + int x, y, mode, nnz, tr; + + if (mb->mode < MODE_I4x4) { + mode = check_intra_pred_mode(mb->mode, mb_x, mb_y); + s->hpc.pred16x16[mode](dst[0], s->linesize); + } else { + uint8_t *ptr = dst[0]; + + // all blocks on the right edge of the macroblock use bottom edge + // the top macroblock for their topright edge + uint8_t *tr_right = ptr - s->linesize + 16; + + // if we're on the right edge of the frame, said edge is extended + // from the top macroblock + if (mb_x == s->mb_width-1) { + tr = tr_right[-1]*0x01010101; + tr_right = (uint8_t *)&tr; + } + + for (y = 0; y < 4; y++) { + uint8_t *topright = ptr + 4 - s->linesize; + for (x = 0; x < 4; x++) { + if (x == 3) + topright = tr_right; + + s->hpc.pred4x4[bmode[x]](ptr+4*x, topright, s->linesize); + + nnz = s->non_zero_count_cache[y][x]; + if (nnz) { + if (nnz == 1) + s->vp8dsp.vp8_idct_dc_add(ptr+4*x, s->block[y][x], s->linesize); + else + s->vp8dsp.vp8_idct_add(ptr+4*x, s->block[y][x], s->linesize); + } + topright += 4; + } + + ptr += 4*s->linesize; + bmode += s->b4_stride; + } + } + + mode = check_intra_pred_mode(s->chroma_pred_mode, mb_x, mb_y); + s->hpc.pred8x8[mode](dst[1], s->uvlinesize); + s->hpc.pred8x8[mode](dst[2], s->uvlinesize); +} + +/** + * Generic MC function. + * + * @param s VP8 decoding context + * @param luma 1 for luma (Y) planes, 0 for chroma (Cb/Cr) planes + * @param dst target buffer for block data at block position + * @param src reference picture buffer at origin (0, 0) + * @param mv motion vector (relative to block position) to get pixel data from + * @param x_off horizontal position of block from origin (0, 0) + * @param y_off vertical position of block from origin (0, 0) + * @param block_w width of block (16, 8 or 4) + * @param block_h height of block (always same as block_w) + * @param width width of src/dst plane data + * @param height height of src/dst plane data + * @param linesize size of a single line of plane data, including padding + */ +static inline void vp8_mc(VP8Context *s, int luma, + uint8_t *dst, uint8_t *src, const VP56mv *mv, + int x_off, int y_off, int block_w, int block_h, + int width, int height, int linesize, + h264_chroma_mc_func mc_func[3][3]) +{ + static const uint8_t idx[8] = { 0, 1, 2, 1, 2, 1, 2, 1 }; + int mx = (mv->x << luma)&7, mx_idx = idx[mx]; + int my = (mv->y << luma)&7, my_idx = idx[my]; + + x_off += mv->x >> (3 - luma); + y_off += mv->y >> (3 - luma); + + // edge emulation + src += y_off * linesize + x_off; + if (x_off < 2 || x_off >= width - block_w - 3 || + y_off < 2 || y_off >= height - block_h - 3) { + ff_emulated_edge_mc(s->edge_emu_buffer, src - 2 * linesize - 2, linesize, + block_w + 5, block_h + 5, + x_off - 2, y_off - 2, width, height); + src = s->edge_emu_buffer + 2 + linesize * 2; + } + + mc_func[my_idx][mx_idx](dst, src, linesize, block_h, mx, my); +} + +/** + * Apply motion vectors to prediction buffer, chapter 18. + */ +static void inter_predict(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, + int mb_x, int mb_y) +{ + int x_off = mb_x << 4, y_off = mb_y << 4; + int width = 16*s->mb_width, height = 16*s->mb_height; + VP56mv uvmv; + + if (mb->mode < VP8_MVMODE_SPLIT) { + /* Y */ + vp8_mc(s, 1, dst[0], s->framep[mb->ref_frame]->data[0], &mb->mv, + x_off, y_off, 16, 16, width, height, s->linesize, + s->vp8dsp.put_vp8_epel_pixels_tab[0]); + + /* U/V */ + uvmv = mb->mv; + if (s->profile == 3) { + uvmv.x &= ~7; + uvmv.y &= ~7; + } + x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; + vp8_mc(s, 0, dst[1], s->framep[mb->ref_frame]->data[1], &uvmv, + x_off, y_off, 8, 8, width, height, s->uvlinesize, + s->vp8dsp.put_vp8_epel_pixels_tab[1]); + vp8_mc(s, 0, dst[2], s->framep[mb->ref_frame]->data[2], &uvmv, + x_off, y_off, 8, 8, width, height, s->uvlinesize, + s->vp8dsp.put_vp8_epel_pixels_tab[1]); + } else { + int x, y; + + /* Y */ + for (y = 0; y < 4; y++) { + for (x = 0; x < 4; x++) { + vp8_mc(s, 1, dst[0] + 4*y*s->linesize + x*4, + s->framep[mb->ref_frame]->data[0], &mb->bmv[4*y + x], + 4*x + x_off, 4*y + y_off, 4, 4, + width, height, s->linesize, + s->vp8dsp.put_vp8_epel_pixels_tab[2]); + } + } + + /* U/V */ + x_off >>= 1; y_off >>= 1; width >>= 1; height >>= 1; + for (y = 0; y < 2; y++) { + for (x = 0; x < 2; x++) { + uvmv.x = mb->bmv[ 2*y * 4 + 2*x ].x + + mb->bmv[ 2*y * 4 + 2*x+1].x + + mb->bmv[(2*y+1) * 4 + 2*x ].x + + mb->bmv[(2*y+1) * 4 + 2*x+1].x; + uvmv.y = mb->bmv[ 2*y * 4 + 2*x ].y + + mb->bmv[ 2*y * 4 + 2*x+1].y + + mb->bmv[(2*y+1) * 4 + 2*x ].y + + mb->bmv[(2*y+1) * 4 + 2*x+1].y; + uvmv.x = (uvmv.x + (uvmv.x < 0 ? -2 : 2)) / 4; + uvmv.y = (uvmv.y + (uvmv.y < 0 ? -2 : 2)) / 4; + if (s->profile == 3) { + uvmv.x &= ~7; + uvmv.y &= ~7; + } + vp8_mc(s, 0, dst[1] + 4*y*s->uvlinesize + x*4, + s->framep[mb->ref_frame]->data[1], &uvmv, + 4*x + x_off, 4*y + y_off, 4, 4, + width, height, s->uvlinesize, + s->vp8dsp.put_vp8_epel_pixels_tab[2]); + vp8_mc(s, 0, dst[2] + 4*y*s->uvlinesize + x*4, + s->framep[mb->ref_frame]->data[2], &uvmv, + 4*x + x_off, 4*y + y_off, 4, 4, + width, height, s->uvlinesize, + s->vp8dsp.put_vp8_epel_pixels_tab[2]); + } + } + } +} + +static void idct_mb(VP8Context *s, uint8_t *y_dst, uint8_t *u_dst, uint8_t *v_dst, + VP8Macroblock *mb) +{ + int x, y, nnz; + + if (mb->mode != MODE_I4x4) + for (y = 0; y < 4; y++) { + for (x = 0; x < 4; x++) { + nnz = s->non_zero_count_cache[y][x]; + if (nnz) { + if (nnz == 1) + s->vp8dsp.vp8_idct_dc_add(y_dst+4*x, s->block[y][x], s->linesize); + else + s->vp8dsp.vp8_idct_add(y_dst+4*x, s->block[y][x], s->linesize); + } + } + y_dst += 4*s->linesize; + } + + for (y = 0; y < 2; y++) { + for (x = 0; x < 2; x++) { + nnz = s->non_zero_count_cache[4][(y<<1)+x]; + if (nnz) { + if (nnz == 1) + s->vp8dsp.vp8_idct_dc_add(u_dst+4*x, s->block[4][(y<<1)+x], s->uvlinesize); + else + s->vp8dsp.vp8_idct_add(u_dst+4*x, s->block[4][(y<<1)+x], s->uvlinesize); + } + + nnz = s->non_zero_count_cache[5][(y<<1)+x]; + if (nnz) { + if (nnz == 1) + s->vp8dsp.vp8_idct_dc_add(v_dst+4*x, s->block[5][(y<<1)+x], s->uvlinesize); + else + s->vp8dsp.vp8_idct_add(v_dst+4*x, s->block[5][(y<<1)+x], s->uvlinesize); + } + } + u_dst += 4*s->uvlinesize; + v_dst += 4*s->uvlinesize; + } +} + +static void filter_level_for_mb(VP8Context *s, VP8Macroblock *mb, int *level, int *inner, int *hev_thresh) +{ + int interior_limit, filter_level; + + if (s->segmentation.enabled) { + filter_level = s->segmentation.filter_level[mb->segment]; + if (!s->segmentation.absolute_vals) + filter_level += s->filter.level; + } else + filter_level = s->filter.level; + + if (s->lf_delta.enabled) { + filter_level += s->lf_delta.ref[mb->ref_frame]; + + if (mb->ref_frame == VP56_FRAME_CURRENT) { + if (mb->mode == MODE_I4x4) + filter_level += s->lf_delta.mode[0]; + } else { + if (mb->mode == VP8_MVMODE_ZERO) + filter_level += s->lf_delta.mode[1]; + else if (mb->mode == VP8_MVMODE_SPLIT) + filter_level += s->lf_delta.mode[3]; + else + filter_level += s->lf_delta.mode[2]; + } + } + filter_level = av_clip(filter_level, 0, 63); + + interior_limit = filter_level; + if (s->filter.sharpness) { + interior_limit >>= s->filter.sharpness > 4 ? 2 : 1; + interior_limit = FFMIN(interior_limit, 9 - s->filter.sharpness); + } + interior_limit = FFMAX(interior_limit, 1); + + *level = filter_level; + *inner = interior_limit; + + if (hev_thresh) { + *hev_thresh = filter_level >= 15; + + if (s->keyframe) { + if (filter_level >= 40) + *hev_thresh = 2; + } else { + if (filter_level >= 40) + *hev_thresh = 3; + else if (filter_level >= 20) + *hev_thresh = 2; + } + } +} + +// TODO: look at backup_mb_border / xchg_mb_border in h264.c +static void filter_mb(VP8Context *s, uint8_t *dst[3], VP8Macroblock *mb, int mb_x, int mb_y) +{ + int filter_level, inner_limit, hev_thresh; + + filter_level_for_mb(s, mb, &filter_level, &inner_limit, &hev_thresh); + if (!filter_level) + return; + + if (mb_x) { + s->vp8dsp.vp8_h_loop_filter16(dst[0], s->linesize, filter_level+2, inner_limit, hev_thresh); + s->vp8dsp.vp8_h_loop_filter8 (dst[1], s->uvlinesize, filter_level+2, inner_limit, hev_thresh); + s->vp8dsp.vp8_h_loop_filter8 (dst[2], s->uvlinesize, filter_level+2, inner_limit, hev_thresh); + } + + if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { + s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 4, s->linesize, filter_level, inner_limit, hev_thresh); + s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+ 8, s->linesize, filter_level, inner_limit, hev_thresh); + s->vp8dsp.vp8_h_loop_filter16_inner(dst[0]+12, s->linesize, filter_level, inner_limit, hev_thresh); + s->vp8dsp.vp8_h_loop_filter8_inner (dst[1]+ 4, s->uvlinesize, filter_level, inner_limit, hev_thresh); + s->vp8dsp.vp8_h_loop_filter8_inner (dst[2]+ 4, s->uvlinesize, filter_level, inner_limit, hev_thresh); + } + + if (mb_y) { + s->vp8dsp.vp8_v_loop_filter16(dst[0], s->linesize, filter_level+2, inner_limit, hev_thresh); + s->vp8dsp.vp8_v_loop_filter8 (dst[1], s->uvlinesize, filter_level+2, inner_limit, hev_thresh); + s->vp8dsp.vp8_v_loop_filter8 (dst[2], s->uvlinesize, filter_level+2, inner_limit, hev_thresh); + } + + if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { + s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 4*s->linesize, s->linesize, filter_level, inner_limit, hev_thresh); + s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+ 8*s->linesize, s->linesize, filter_level, inner_limit, hev_thresh); + s->vp8dsp.vp8_v_loop_filter16_inner(dst[0]+12*s->linesize, s->linesize, filter_level, inner_limit, hev_thresh); + s->vp8dsp.vp8_v_loop_filter8_inner (dst[1]+ 4*s->uvlinesize, s->uvlinesize, filter_level, inner_limit, hev_thresh); + s->vp8dsp.vp8_v_loop_filter8_inner (dst[2]+ 4*s->uvlinesize, s->uvlinesize, filter_level, inner_limit, hev_thresh); + } +} + +static void filter_mb_simple(VP8Context *s, uint8_t *dst, VP8Macroblock *mb, int mb_x, int mb_y) +{ + int filter_level, inner_limit, mbedge_lim, bedge_lim; + + filter_level_for_mb(s, mb, &filter_level, &inner_limit, NULL); + if (!filter_level) + return; + + mbedge_lim = 2*(filter_level+2) + inner_limit; + bedge_lim = 2* filter_level + inner_limit; + + if (mb_x) + s->vp8dsp.vp8_h_loop_filter_simple(dst, s->linesize, mbedge_lim); + if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { + s->vp8dsp.vp8_h_loop_filter_simple(dst+ 4, s->linesize, bedge_lim); + s->vp8dsp.vp8_h_loop_filter_simple(dst+ 8, s->linesize, bedge_lim); + s->vp8dsp.vp8_h_loop_filter_simple(dst+12, s->linesize, bedge_lim); + } + + if (mb_y) + s->vp8dsp.vp8_v_loop_filter_simple(dst, s->linesize, mbedge_lim); + if (!mb->skip || mb->mode == MODE_I4x4 || mb->mode == VP8_MVMODE_SPLIT) { + s->vp8dsp.vp8_v_loop_filter_simple(dst+ 4*s->linesize, s->linesize, bedge_lim); + s->vp8dsp.vp8_v_loop_filter_simple(dst+ 8*s->linesize, s->linesize, bedge_lim); + s->vp8dsp.vp8_v_loop_filter_simple(dst+12*s->linesize, s->linesize, bedge_lim); + } +} + +static void filter_mb_row(VP8Context *s, int mb_y) +{ + VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; + uint8_t *dst[3] = { + s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize, + s->framep[VP56_FRAME_CURRENT]->data[1] + 8*mb_y*s->uvlinesize, + s->framep[VP56_FRAME_CURRENT]->data[2] + 8*mb_y*s->uvlinesize + }; + int mb_x; + + for (mb_x = 0; mb_x < s->mb_width; mb_x++) { + filter_mb(s, dst, mb++, mb_x, mb_y); + dst[0] += 16; + dst[1] += 8; + dst[2] += 8; + } +} + +static void filter_mb_row_simple(VP8Context *s, int mb_y) +{ + uint8_t *dst = s->framep[VP56_FRAME_CURRENT]->data[0] + 16*mb_y*s->linesize; + VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; + int mb_x; + + for (mb_x = 0; mb_x < s->mb_width; mb_x++) { + filter_mb_simple(s, dst, mb++, mb_x, mb_y); + dst += 16; + } +} + +static int vp8_decode_frame(AVCodecContext *avctx, void *data, int *data_size, + AVPacket *avpkt) +{ + VP8Context *s = avctx->priv_data; + int ret, mb_x, mb_y, i, y, referenced; + enum AVDiscard skip_thresh; + AVFrame *curframe; + + if ((ret = decode_frame_header(s, avpkt->data, avpkt->size)) < 0) + return ret; + + referenced = s->update_last || s->update_golden == VP56_FRAME_CURRENT + || s->update_altref == VP56_FRAME_CURRENT; + + skip_thresh = !referenced ? AVDISCARD_NONREF : + !s->keyframe ? AVDISCARD_NONKEY : AVDISCARD_ALL; + + if (avctx->skip_frame >= skip_thresh) { + s->invisible = 1; + goto skip_decode; + } + + for (i = 0; i < 4; i++) + if (&s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && + &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && + &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) { + curframe = s->framep[VP56_FRAME_CURRENT] = &s->frames[i]; + break; + } + if (curframe->data[0]) + avctx->release_buffer(avctx, curframe); + + curframe->key_frame = s->keyframe; + curframe->pict_type = s->keyframe ? FF_I_TYPE : FF_P_TYPE; + curframe->reference = referenced ? 3 : 0; + if ((ret = avctx->get_buffer(avctx, curframe))) { + av_log(avctx, AV_LOG_ERROR, "get_buffer() failed!\n"); + return ret; + } + + // Given that arithmetic probabilities are updated every frame, it's quite likely + // that the values we have on a random interframe are complete junk if we didn't + // start decode on a keyframe. So just don't display anything rather than junk. + if (!s->keyframe && (!s->framep[VP56_FRAME_PREVIOUS] || + !s->framep[VP56_FRAME_GOLDEN] || + !s->framep[VP56_FRAME_GOLDEN2])) { + av_log(avctx, AV_LOG_WARNING, "Discarding interframe without a prior keyframe!\n"); + return AVERROR_INVALIDDATA; + } + + s->linesize = curframe->linesize[0]; + s->uvlinesize = curframe->linesize[1]; + + if (!s->edge_emu_buffer) + s->edge_emu_buffer = av_malloc(21*s->linesize); + + memset(s->top_nnz, 0, s->mb_width*sizeof(*s->top_nnz)); + + // top edge of 127 for intra prediction + if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) { + memset(curframe->data[0] - s->linesize -1, 127, s->linesize +1); + memset(curframe->data[1] - s->uvlinesize-1, 127, s->uvlinesize+1); + memset(curframe->data[2] - s->uvlinesize-1, 127, s->uvlinesize+1); + } + + for (mb_y = 0; mb_y < s->mb_height; mb_y++) { + VP56RangeCoder *c = &s->coeff_partition[mb_y & (s->num_coeff_partitions-1)]; + VP8Macroblock *mb = s->macroblocks + mb_y*s->mb_stride; + uint8_t *intra4x4 = s->intra4x4_pred_mode + 4*mb_y*s->b4_stride; + uint8_t *dst[3] = { + curframe->data[0] + 16*mb_y*s->linesize, + curframe->data[1] + 8*mb_y*s->uvlinesize, + curframe->data[2] + 8*mb_y*s->uvlinesize + }; + + memset(s->left_nnz, 0, sizeof(s->left_nnz)); + + // left edge of 129 for intra prediction + if (!(avctx->flags & CODEC_FLAG_EMU_EDGE)) + for (i = 0; i < 3; i++) + for (y = 0; y < 16>>!!i; y++) + dst[i][y*curframe->linesize[i]-1] = 129; + + for (mb_x = 0; mb_x < s->mb_width; mb_x++) { + decode_mb_mode(s, mb, mb_x, mb_y, intra4x4 + 4*mb_x); + + if (!mb->skip) + decode_mb_coeffs(s, c, mb, s->top_nnz[mb_x], s->left_nnz); + else { + AV_ZERO128(s->non_zero_count_cache); // luma + AV_ZERO64(s->non_zero_count_cache[4]); // chroma + } + + if (mb->mode <= MODE_I4x4) { + intra_predict(s, dst, mb, intra4x4 + 4*mb_x, mb_x, mb_y); + memset(mb->bmv, 0, sizeof(mb->bmv)); + } else { + inter_predict(s, dst, mb, mb_x, mb_y); + } + + if (!mb->skip) { + idct_mb(s, dst[0], dst[1], dst[2], mb); + } else { + AV_ZERO64(s->left_nnz); + AV_WN64(s->top_nnz[mb_x], 0); // array of 9, so unaligned + + // Reset DC block predictors if they would exist if the mb had coefficients + if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { + s->left_nnz[8] = 0; + s->top_nnz[mb_x][8] = 0; + } + } + + dst[0] += 16; + dst[1] += 8; + dst[2] += 8; + mb++; + } + if (mb_y && s->filter.level && avctx->skip_loop_filter < skip_thresh) { + if (s->filter.simple) + filter_mb_row_simple(s, mb_y-1); + else + filter_mb_row(s, mb_y-1); + } + } + if (s->filter.level && avctx->skip_loop_filter < skip_thresh) { + if (s->filter.simple) + filter_mb_row_simple(s, mb_y-1); + else + filter_mb_row(s, mb_y-1); + } + +skip_decode: + // if future frames don't use the updated probabilities, + // reset them to the values we saved + if (!s->update_probabilities) + s->prob[0] = s->prob[1]; + + // check if golden and altref are swapped + if (s->update_altref == VP56_FRAME_GOLDEN && + s->update_golden == VP56_FRAME_GOLDEN2) + FFSWAP(AVFrame *, s->framep[VP56_FRAME_GOLDEN], s->framep[VP56_FRAME_GOLDEN2]); + else { + if (s->update_altref != VP56_FRAME_NONE) + s->framep[VP56_FRAME_GOLDEN2] = s->framep[s->update_altref]; + + if (s->update_golden != VP56_FRAME_NONE) + s->framep[VP56_FRAME_GOLDEN] = s->framep[s->update_golden]; + } + + if (s->update_last) // move cur->prev + s->framep[VP56_FRAME_PREVIOUS] = s->framep[VP56_FRAME_CURRENT]; + + // release no longer referenced frames + for (i = 0; i < 4; i++) + if (s->frames[i].data[0] && + &s->frames[i] != s->framep[VP56_FRAME_CURRENT] && + &s->frames[i] != s->framep[VP56_FRAME_PREVIOUS] && + &s->frames[i] != s->framep[VP56_FRAME_GOLDEN] && + &s->frames[i] != s->framep[VP56_FRAME_GOLDEN2]) + avctx->release_buffer(avctx, &s->frames[i]); + + if (!s->invisible) { + *(AVFrame*)data = *s->framep[VP56_FRAME_CURRENT]; + *data_size = sizeof(AVFrame); + } + + return avpkt->size; +} + +static av_cold int vp8_decode_init(AVCodecContext *avctx) +{ + VP8Context *s = avctx->priv_data; + + s->avctx = avctx; + avctx->pix_fmt = PIX_FMT_YUV420P; + + dsputil_init(&s->dsp, avctx); + ff_h264_pred_init(&s->hpc, CODEC_ID_VP8); + ff_vp8dsp_init(&s->vp8dsp); + + // intra pred needs edge emulation among other things + if (avctx->flags&CODEC_FLAG_EMU_EDGE) { + av_log(avctx, AV_LOG_ERROR, "Edge emulation not supproted\n"); + return AVERROR_PATCHWELCOME; + } + + return 0; +} + +static av_cold int vp8_decode_free(AVCodecContext *avctx) +{ + vp8_decode_flush(avctx); + return 0; +} + +AVCodec vp8_decoder = { + "vp8", + AVMEDIA_TYPE_VIDEO, + CODEC_ID_VP8, + sizeof(VP8Context), + vp8_decode_init, + NULL, + vp8_decode_free, + vp8_decode_frame, + CODEC_CAP_DR1, + .flush = vp8_decode_flush, + .long_name = NULL_IF_CONFIG_SMALL("On2 VP8"), +}; diff -r 7d04a6cec75f -r f2007d7c3f1d vp8data.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vp8data.h Tue Jun 22 19:24:09 2010 +0000 @@ -0,0 +1,752 @@ +/** + * VP8 compatible video decoder + * + * Copyright (C) 2010 David Conrad + * Copyright (C) 2010 Ronald S. Bultje + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +// TODO: move these #define ane enum to a better header... + +#define VP8_MAX_QUANT 127 + +enum dct_token { + DCT_0, + DCT_1, + DCT_2, + DCT_3, + DCT_4, + DCT_CAT1, + DCT_CAT2, + DCT_CAT3, + DCT_CAT4, + DCT_CAT5, + DCT_CAT6, + DCT_EOB, + + NUM_DCT_TOKENS +}; + +#include "h264pred.h" + +// used to signal 4x4 intra pred in luma MBs +#define MODE_I4x4 4 + +enum inter_mvmode { + VP8_MVMODE_NEAREST = MODE_I4x4 + 1, + VP8_MVMODE_NEAR, + VP8_MVMODE_ZERO, + VP8_MVMODE_NEW, + VP8_MVMODE_SPLIT +}; + +enum inter_submvmode { + VP8_SUBMVMODE_LEFT4X4, + VP8_SUBMVMODE_TOP4X4, + VP8_SUBMVMODE_ZERO4X4, + VP8_SUBMVMODE_NEW4X4 +}; + +static const uint8_t vp8_pred4x4_mode[] = +{ + [DC_PRED8x8] = DC_PRED, + [VERT_PRED8x8] = VERT_PRED, + [HOR_PRED8x8] = HOR_PRED, + [PLANE_PRED8x8] = TM_VP8_PRED, +}; + +static const int8_t vp8_pred16x16_tree_intra[4][2] = +{ + { -MODE_I4x4, 1 }, // '0' + { 2, 3 }, + { -DC_PRED8x8, -VERT_PRED8x8 }, // '100', '101' + { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111' +}; + +static const int8_t vp8_pred16x16_tree_inter[4][2] = +{ + { -DC_PRED8x8, 1 }, // '0' + { 2, 3 }, + { -VERT_PRED8x8, -HOR_PRED8x8 }, // '100', '101' + { -PLANE_PRED8x8, -MODE_I4x4 }, // '110', '111' +}; + +static const int vp8_mode_contexts[6][4] = { + { 7, 1, 1, 143 }, + { 14, 18, 14, 107 }, + { 135, 64, 57, 68 }, + { 60, 56, 128, 65 }, + { 159, 134, 128, 34 }, + { 234, 188, 128, 28 }, +}; + +static const int8_t vp8_pred16x16_tree_mvinter[4][2] = { + { -VP8_MVMODE_ZERO, 1 }, // '0' + { -VP8_MVMODE_NEAREST, 2 }, // '10' + { -VP8_MVMODE_NEAR, 3 }, // '110' + { -VP8_MVMODE_NEW, -VP8_MVMODE_SPLIT } // '1110', '1111' +}; + +static const int8_t vp8_small_mvtree[7][2] = { + { 1, 4 }, + { 2, 3 }, + { -0, -1 }, // '000', '001' + { -2, -3 }, // '010', '011' + { 5, 6 }, + { -4, -5 }, // '100', '101' + { -6, -7 } // '110', '111' +}; + +static const uint8_t vp8_mbsplits[4][16] = { + { 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1 }, + { 0, 0, 1, 1, 0, 0, 1, 1, + 0, 0, 1, 1, 0, 0, 1, 1 }, + { 0, 0, 1, 1, 0, 0, 1, 1, + 2, 2, 3, 3, 2, 2, 3, 3 }, + { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15 } +}; + +static const uint8_t vp8_mbfirstidx[4][16] = { + { 0, 8 }, { 0, 2 }, { 0, 2, 8, 10 }, + { 0, 1, 2, 3, 4, 5, 6, 7, + 8, 9, 10, 11, 12, 13, 14, 15 } +}; + +static const int8_t vp8_mbsplit_tree[3][2] = { + { -3, 1 }, // '0' - 16 individual MVs + { -2, 2 }, // '10' - quarter-based MVs + { -0, -1 } // '110' - top/bottom MVs, + // '111' - left/right MVs +}; +static const uint8_t vp8_mbsplit_count[4] = { 2, 2, 4, 16 }; +static const uint8_t vp8_mbsplit_prob[3] = { 110, 111, 150 }; + +static const uint8_t vp8_submv_prob[5][3] = { + { 147, 136, 18 }, + { 106, 145, 1 }, + { 179, 121, 1 }, + { 223, 1, 34 }, + { 208, 1, 1 } +}; + +static const int8_t vp8_submv_ref_tree[3][2] = { + { -VP8_SUBMVMODE_LEFT4X4, 1 }, // '0' + { -VP8_SUBMVMODE_TOP4X4, 2 }, // '10' + { -VP8_SUBMVMODE_ZERO4X4, -VP8_SUBMVMODE_NEW4X4 } // '110', '111' +}; + +static const uint8_t vp8_pred16x16_prob_intra[4] = { 145, 156, 163, 128 }; +static const uint8_t vp8_pred16x16_prob_inter[4] = { 112, 86, 140, 37 }; + +static const int8_t vp8_pred4x4_tree[9][2] = +{ + { -DC_PRED, 1 }, // '0' + { -TM_VP8_PRED, 2 }, // '10' + { -VERT_PRED, 3 }, // '110' + { 4, 6 }, + { -HOR_PRED, 5 }, // '11100' + { -DIAG_DOWN_RIGHT_PRED, -VERT_RIGHT_PRED }, // '111010', '111011' + { -DIAG_DOWN_LEFT_PRED, 7 }, // '11110' + { -VERT_LEFT_PRED, 8 }, // '111110' + { -HOR_DOWN_PRED, -HOR_UP_PRED }, // '1111110', '1111111' +}; + +static const int8_t vp8_pred8x8c_tree[3][2] = +{ + { -DC_PRED8x8, 1 }, // '0' + { -VERT_PRED8x8, 2 }, // '10 + { -HOR_PRED8x8, -PLANE_PRED8x8 }, // '110', '111' +}; + +static const uint8_t vp8_pred8x8c_prob_intra[3] = { 142, 114, 183 }; +static const uint8_t vp8_pred8x8c_prob_inter[3] = { 162, 101, 204 }; + +static const uint8_t vp8_pred4x4_prob_inter[9] = +{ + 120, 90, 79, 133, 87, 85, 80, 111, 151 +}; + +static const uint8_t vp8_pred4x4_prob_intra[10][10][9] = +{ + { + { 39, 53, 200, 87, 26, 21, 43, 232, 171 }, + { 56, 34, 51, 104, 114, 102, 29, 93, 77 }, + { 88, 88, 147, 150, 42, 46, 45, 196, 205 }, + { 107, 54, 32, 26, 51, 1, 81, 43, 31 }, + { 39, 28, 85, 171, 58, 165, 90, 98, 64 }, + { 34, 22, 116, 206, 23, 34, 43, 166, 73 }, + { 34, 19, 21, 102, 132, 188, 16, 76, 124 }, + { 68, 25, 106, 22, 64, 171, 36, 225, 114 }, + { 62, 18, 78, 95, 85, 57, 50, 48, 51 }, + { 43, 97, 183, 117, 85, 38, 35, 179, 61 }, + }, + { + { 112, 113, 77, 85, 179, 255, 38, 120, 114 }, + { 40, 42, 1, 196, 245, 209, 10, 25, 109 }, + { 193, 101, 35, 159, 215, 111, 89, 46, 111 }, + { 100, 80, 8, 43, 154, 1, 51, 26, 71 }, + { 88, 43, 29, 140, 166, 213, 37, 43, 154 }, + { 61, 63, 30, 155, 67, 45, 68, 1, 209 }, + { 41, 40, 5, 102, 211, 183, 4, 1, 221 }, + { 142, 78, 78, 16, 255, 128, 34, 197, 171 }, + { 51, 50, 17, 168, 209, 192, 23, 25, 82 }, + { 60, 148, 31, 172, 219, 228, 21, 18, 111 }, + }, + { + { 175, 69, 143, 80, 85, 82, 72, 155, 103 }, + { 56, 58, 10, 171, 218, 189, 17, 13, 152 }, + { 231, 120, 48, 89, 115, 113, 120, 152, 112 }, + { 144, 71, 10, 38, 171, 213, 144, 34, 26 }, + { 114, 26, 17, 163, 44, 195, 21, 10, 173 }, + { 121, 24, 80, 195, 26, 62, 44, 64, 85 }, + { 63, 20, 8, 114, 114, 208, 12, 9, 226 }, + { 170, 46, 55, 19, 136, 160, 33, 206, 71 }, + { 81, 40, 11, 96, 182, 84, 29, 16, 36 }, + { 152, 179, 64, 126, 170, 118, 46, 70, 95 }, + }, + { + { 75, 79, 123, 47, 51, 128, 81, 171, 1 }, + { 57, 17, 5, 71, 102, 57, 53, 41, 49 }, + { 125, 98, 42, 88, 104, 85, 117, 175, 82 }, + { 115, 21, 2, 10, 102, 255, 166, 23, 6 }, + { 38, 33, 13, 121, 57, 73, 26, 1, 85 }, + { 41, 10, 67, 138, 77, 110, 90, 47, 114 }, + { 57, 18, 10, 102, 102, 213, 34, 20, 43 }, + { 101, 29, 16, 10, 85, 128, 101, 196, 26 }, + { 117, 20, 15, 36, 163, 128, 68, 1, 26 }, + { 95, 84, 53, 89, 128, 100, 113, 101, 45 }, + }, + { + { 63, 59, 90, 180, 59, 166, 93, 73, 154 }, + { 40, 40, 21, 116, 143, 209, 34, 39, 175 }, + { 138, 31, 36, 171, 27, 166, 38, 44, 229 }, + { 57, 46, 22, 24, 128, 1, 54, 17, 37 }, + { 47, 15, 16, 183, 34, 223, 49, 45, 183 }, + { 46, 17, 33, 183, 6, 98, 15, 32, 183 }, + { 40, 3, 9, 115, 51, 192, 18, 6, 223 }, + { 65, 32, 73, 115, 28, 128, 23, 128, 205 }, + { 87, 37, 9, 115, 59, 77, 64, 21, 47 }, + { 67, 87, 58, 169, 82, 115, 26, 59, 179 }, + }, + { + { 54, 57, 112, 184, 5, 41, 38, 166, 213 }, + { 30, 34, 26, 133, 152, 116, 10, 32, 134 }, + { 104, 55, 44, 218, 9, 54, 53, 130, 226 }, + { 75, 32, 12, 51, 192, 255, 160, 43, 51 }, + { 39, 19, 53, 221, 26, 114, 32, 73, 255 }, + { 31, 9, 65, 234, 2, 15, 1, 118, 73 }, + { 56, 21, 23, 111, 59, 205, 45, 37, 192 }, + { 88, 31, 35, 67, 102, 85, 55, 186, 85 }, + { 55, 38, 70, 124, 73, 102, 1, 34, 98 }, + { 64, 90, 70, 205, 40, 41, 23, 26, 57 }, + }, + { + { 86, 40, 64, 135, 148, 224, 45, 183, 128 }, + { 22, 26, 17, 131, 240, 154, 14, 1, 209 }, + { 164, 50, 31, 137, 154, 133, 25, 35, 218 }, + { 83, 12, 13, 54, 192, 255, 68, 47, 28 }, + { 45, 16, 21, 91, 64, 222, 7, 1, 197 }, + { 56, 21, 39, 155, 60, 138, 23, 102, 213 }, + { 18, 11, 7, 63, 144, 171, 4, 4, 246 }, + { 85, 26, 85, 85, 128, 128, 32, 146, 171 }, + { 35, 27, 10, 146, 174, 171, 12, 26, 128 }, + { 51, 103, 44, 131, 131, 123, 31, 6, 158 }, + }, + { + { 68, 45, 128, 34, 1, 47, 11, 245, 171 }, + { 62, 17, 19, 70, 146, 85, 55, 62, 70 }, + { 102, 61, 71, 37, 34, 53, 31, 243, 192 }, + { 75, 15, 9, 9, 64, 255, 184, 119, 16 }, + { 37, 43, 37, 154, 100, 163, 85, 160, 1 }, + { 63, 9, 92, 136, 28, 64, 32, 201, 85 }, + { 56, 8, 17, 132, 137, 255, 55, 116, 128 }, + { 86, 6, 28, 5, 64, 255, 25, 248, 1 }, + { 58, 15, 20, 82, 135, 57, 26, 121, 40 }, + { 69, 60, 71, 38, 73, 119, 28, 222, 37 }, + }, + { + { 101, 75, 128, 139, 118, 146, 116, 128, 85 }, + { 56, 41, 15, 176, 236, 85, 37, 9, 62 }, + { 190, 80, 35, 99, 180, 80, 126, 54, 45 }, + { 146, 36, 19, 30, 171, 255, 97, 27, 20 }, + { 71, 30, 17, 119, 118, 255, 17, 18, 138 }, + { 101, 38, 60, 138, 55, 70, 43, 26, 142 }, + { 32, 41, 20, 117, 151, 142, 20, 21, 163 }, + { 138, 45, 61, 62, 219, 1, 81, 188, 64 }, + { 112, 19, 12, 61, 195, 128, 48, 4, 24 }, + { 85, 126, 47, 87, 176, 51, 41, 20, 32 }, + }, + { + { 66, 102, 167, 99, 74, 62, 40, 234, 128 }, + { 41, 53, 9, 178, 241, 141, 26, 8, 107 }, + { 134, 183, 89, 137, 98, 101, 106, 165, 148 }, + { 104, 79, 12, 27, 217, 255, 87, 17, 7 }, + { 74, 43, 26, 146, 73, 166, 49, 23, 157 }, + { 65, 38, 105, 160, 51, 52, 31, 115, 128 }, + { 47, 41, 14, 110, 182, 183, 21, 17, 194 }, + { 87, 68, 71, 44, 114, 51, 15, 186, 23 }, + { 66, 45, 25, 102, 197, 189, 23, 18, 22 }, + { 72, 187, 100, 130, 157, 111, 32, 75, 80 }, + }, +}; + +static const int8_t vp8_segmentid_tree[][2] = +{ + { 1, 2 }, + { -0, -1 }, // '00', '01' + { -2, -3 }, // '10', '11' +}; + +static const uint8_t vp8_coeff_band[16] = +{ + 0, 1, 2, 3, 6, 4, 5, 6, 6, 6, 6, 6, 6, 6, 6, 7 +}; + +static const int8_t vp8_coeff_tree[NUM_DCT_TOKENS-1][2] = +{ + { -DCT_EOB, 1 }, // '0' + { -DCT_0, 2 }, // '10' + { -DCT_1, 3 }, // '110' + { 4, 6 }, + { -DCT_2, 5 }, // '11100' + { -DCT_3, -DCT_4 }, // '111010', '111011' + { 7, 8 }, + { -DCT_CAT1, -DCT_CAT2 }, // '111100', '111101' + { 9, 10 }, + { -DCT_CAT3, -DCT_CAT4 }, // '1111100', '1111101' + { -DCT_CAT5, -DCT_CAT6 }, // '1111110', '1111111' +}; + +static const uint8_t vp8_dct_cat1_prob[] = { 159, 0 }; +static const uint8_t vp8_dct_cat2_prob[] = { 165, 145, 0 }; +static const uint8_t vp8_dct_cat3_prob[] = { 173, 148, 140, 0 }; +static const uint8_t vp8_dct_cat4_prob[] = { 176, 155, 140, 135, 0 }; +static const uint8_t vp8_dct_cat5_prob[] = { 180, 157, 141, 134, 130, 0 }; +static const uint8_t vp8_dct_cat6_prob[] = { 254, 254, 243, 230, 196, 177, 153, 140, 133, 130, 129, 0 }; + +static const uint8_t * const vp8_dct_cat_prob[6] = +{ + vp8_dct_cat1_prob, + vp8_dct_cat2_prob, + vp8_dct_cat3_prob, + vp8_dct_cat4_prob, + vp8_dct_cat5_prob, + vp8_dct_cat6_prob, +}; + +static const uint8_t vp8_dct_cat_offset[6] = { 5, 7, 11, 19, 35, 67 }; + +static const uint8_t vp8_token_default_probs[4][8][3][NUM_DCT_TOKENS-1] = +{ + { + { + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + { 253, 136, 254, 255, 228, 219, 128, 128, 128, 128, 128 }, + { 189, 129, 242, 255, 227, 213, 255, 219, 128, 128, 128 }, + { 106, 126, 227, 252, 214, 209, 255, 255, 128, 128, 128 }, + }, + { + { 1, 98, 248, 255, 236, 226, 255, 255, 128, 128, 128 }, + { 181, 133, 238, 254, 221, 234, 255, 154, 128, 128, 128 }, + { 78, 134, 202, 247, 198, 180, 255, 219, 128, 128, 128 }, + }, + { + { 1, 185, 249, 255, 243, 255, 128, 128, 128, 128, 128 }, + { 184, 150, 247, 255, 236, 224, 128, 128, 128, 128, 128 }, + { 77, 110, 216, 255, 236, 230, 128, 128, 128, 128, 128 }, + }, + { + { 1, 101, 251, 255, 241, 255, 128, 128, 128, 128, 128 }, + { 170, 139, 241, 252, 236, 209, 255, 255, 128, 128, 128 }, + { 37, 116, 196, 243, 228, 255, 255, 255, 128, 128, 128 }, + }, + { + { 1, 204, 254, 255, 245, 255, 128, 128, 128, 128, 128 }, + { 207, 160, 250, 255, 238, 128, 128, 128, 128, 128, 128 }, + { 102, 103, 231, 255, 211, 171, 128, 128, 128, 128, 128 }, + }, + { + { 1, 152, 252, 255, 240, 255, 128, 128, 128, 128, 128 }, + { 177, 135, 243, 255, 234, 225, 128, 128, 128, 128, 128 }, + { 80, 129, 211, 255, 194, 224, 128, 128, 128, 128, 128 }, + }, + { + { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 246, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 255, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + }, + { + { + { 198, 35, 237, 223, 193, 187, 162, 160, 145, 155, 62 }, + { 131, 45, 198, 221, 172, 176, 220, 157, 252, 221, 1 }, + { 68, 47, 146, 208, 149, 167, 221, 162, 255, 223, 128 }, + }, + { + { 1, 149, 241, 255, 221, 224, 255, 255, 128, 128, 128 }, + { 184, 141, 234, 253, 222, 220, 255, 199, 128, 128, 128 }, + { 81, 99, 181, 242, 176, 190, 249, 202, 255, 255, 128 }, + }, + { + { 1, 129, 232, 253, 214, 197, 242, 196, 255, 255, 128 }, + { 99, 121, 210, 250, 201, 198, 255, 202, 128, 128, 128 }, + { 23, 91, 163, 242, 170, 187, 247, 210, 255, 255, 128 }, + }, + { + { 1, 200, 246, 255, 234, 255, 128, 128, 128, 128, 128 }, + { 109, 178, 241, 255, 231, 245, 255, 255, 128, 128, 128 }, + { 44, 130, 201, 253, 205, 192, 255, 255, 128, 128, 128 }, + }, + { + { 1, 132, 239, 251, 219, 209, 255, 165, 128, 128, 128 }, + { 94, 136, 225, 251, 218, 190, 255, 255, 128, 128, 128 }, + { 22, 100, 174, 245, 186, 161, 255, 199, 128, 128, 128 }, + }, + { + { 1, 182, 249, 255, 232, 235, 128, 128, 128, 128, 128 }, + { 124, 143, 241, 255, 227, 234, 128, 128, 128, 128, 128 }, + { 35, 77, 181, 251, 193, 211, 255, 205, 128, 128, 128 }, + }, + { + { 1, 157, 247, 255, 236, 231, 255, 255, 128, 128, 128 }, + { 121, 141, 235, 255, 225, 227, 255, 255, 128, 128, 128 }, + { 45, 99, 188, 251, 195, 217, 255, 224, 128, 128, 128 }, + }, + { + { 1, 1, 251, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 203, 1, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 137, 1, 177, 255, 224, 255, 128, 128, 128, 128, 128 }, + }, + }, + { + { + { 253, 9, 248, 251, 207, 208, 255, 192, 128, 128, 128 }, + { 175, 13, 224, 243, 193, 185, 249, 198, 255, 255, 128 }, + { 73, 17, 171, 221, 161, 179, 236, 167, 255, 234, 128 }, + }, + { + { 1, 95, 247, 253, 212, 183, 255, 255, 128, 128, 128 }, + { 239, 90, 244, 250, 211, 209, 255, 255, 128, 128, 128 }, + { 155, 77, 195, 248, 188, 195, 255, 255, 128, 128, 128 }, + }, + { + { 1, 24, 239, 251, 218, 219, 255, 205, 128, 128, 128 }, + { 201, 51, 219, 255, 196, 186, 128, 128, 128, 128, 128 }, + { 69, 46, 190, 239, 201, 218, 255, 228, 128, 128, 128 }, + }, + { + { 1, 191, 251, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 223, 165, 249, 255, 213, 255, 128, 128, 128, 128, 128 }, + { 141, 124, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + }, + { + { 1, 16, 248, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 190, 36, 230, 255, 236, 255, 128, 128, 128, 128, 128 }, + { 149, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + { 1, 226, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 247, 192, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 240, 128, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + { 1, 134, 252, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 213, 62, 250, 255, 255, 128, 128, 128, 128, 128, 128 }, + { 55, 93, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + { + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 128, 128, 128, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + }, + { + { + { 202, 24, 213, 235, 186, 191, 220, 160, 240, 175, 255 }, + { 126, 38, 182, 232, 169, 184, 228, 174, 255, 187, 128 }, + { 61, 46, 138, 219, 151, 178, 240, 170, 255, 216, 128 }, + }, + { + { 1, 112, 230, 250, 199, 191, 247, 159, 255, 255, 128 }, + { 166, 109, 228, 252, 211, 215, 255, 174, 128, 128, 128 }, + { 39, 77, 162, 232, 172, 180, 245, 178, 255, 255, 128 }, + }, + { + { 1, 52, 220, 246, 198, 199, 249, 220, 255, 255, 128 }, + { 124, 74, 191, 243, 183, 193, 250, 221, 255, 255, 128 }, + { 24, 71, 130, 219, 154, 170, 243, 182, 255, 255, 128 }, + }, + { + { 1, 182, 225, 249, 219, 240, 255, 224, 128, 128, 128 }, + { 149, 150, 226, 252, 216, 205, 255, 171, 128, 128, 128 }, + { 28, 108, 170, 242, 183, 194, 254, 223, 255, 255, 128 }, + }, + { + { 1, 81, 230, 252, 204, 203, 255, 192, 128, 128, 128 }, + { 123, 102, 209, 247, 188, 196, 255, 233, 128, 128, 128 }, + { 20, 95, 153, 243, 164, 173, 255, 203, 128, 128, 128 }, + }, + { + { 1, 222, 248, 255, 216, 213, 128, 128, 128, 128, 128 }, + { 168, 175, 246, 252, 235, 205, 255, 255, 128, 128, 128 }, + { 47, 116, 215, 255, 211, 212, 255, 255, 128, 128, 128 }, + }, + { + { 1, 121, 236, 253, 212, 214, 255, 255, 128, 128, 128 }, + { 141, 84, 213, 252, 201, 202, 255, 219, 128, 128, 128 }, + { 42, 80, 160, 240, 162, 185, 255, 205, 128, 128, 128 }, + }, + { + { 1, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 244, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + { 238, 1, 255, 128, 128, 128, 128, 128, 128, 128, 128 }, + }, + }, +}; + +static const uint8_t vp8_token_update_probs[4][8][3][NUM_DCT_TOKENS-1] = +{ + { + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 176, 246, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 223, 241, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 244, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 234, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 246, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 239, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 253, 255, 254, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 254, 255, 254, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + }, + { + { + { 217, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 225, 252, 241, 253, 255, 255, 254, 255, 255, 255, 255 }, + { 234, 250, 241, 250, 253, 255, 253, 254, 255, 255, 255 }, + }, + { + { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 223, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 238, 253, 254, 254, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 248, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 253, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 247, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + }, + { + { + { 186, 251, 250, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 234, 251, 244, 254, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 251, 243, 253, 254, 255, 254, 255, 255, 255, 255 }, + }, + { + { 255, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 236, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 251, 253, 253, 254, 254, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 254, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + }, + { + { + { 248, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 254, 252, 254, 255, 255, 255, 255, 255, 255, 255 }, + { 248, 254, 249, 253, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 246, 253, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 254, 251, 254, 254, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 254, 252, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 248, 254, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 255, 254, 254, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 245, 251, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 253, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 251, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 252, 253, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 254, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 252, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 249, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 254, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 253, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 250, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + { + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 254, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + { 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255 }, + }, + }, +}; + +// fixme: copied from h264data.h +static const uint8_t zigzag_scan[16]={ + 0+0*4, 1+0*4, 0+1*4, 0+2*4, + 1+1*4, 2+0*4, 3+0*4, 2+1*4, + 1+2*4, 0+3*4, 1+3*4, 2+2*4, + 3+1*4, 3+2*4, 2+3*4, 3+3*4, +}; + +static const uint8_t vp8_dc_qlookup[VP8_MAX_QUANT+1] = +{ + 4, 5, 6, 7, 8, 9, 10, 10, 11, 12, 13, 14, 15, 16, 17, 17, + 18, 19, 20, 20, 21, 21, 22, 22, 23, 23, 24, 25, 25, 26, 27, 28, + 29, 30, 31, 32, 33, 34, 35, 36, 37, 37, 38, 39, 40, 41, 42, 43, + 44, 45, 46, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, + 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, + 75, 76, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, + 91, 93, 95, 96, 98, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, + 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 143, 145, 148, 151, 154, 157, +}; + +static const uint16_t vp8_ac_qlookup[VP8_MAX_QUANT+1] = +{ + 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, + 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, + 52, 53, 54, 55, 56, 57, 58, 60, 62, 64, 66, 68, 70, 72, 74, 76, + 78, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104, 106, 108, + 110, 112, 114, 116, 119, 122, 125, 128, 131, 134, 137, 140, 143, 146, 149, 152, + 155, 158, 161, 164, 167, 170, 173, 177, 181, 185, 189, 193, 197, 201, 205, 209, + 213, 217, 221, 225, 229, 234, 239, 245, 249, 254, 259, 264, 269, 274, 279, 284, +}; + +static const uint8_t vp8_mv_update_prob[2][19] = { + { 237, + 246, + 253, 253, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 250, 250, 252, 254, 254 }, + { 231, + 243, + 245, 253, 254, 254, 254, 254, 254, + 254, 254, 254, 254, 254, 251, 251, 254, 254, 254 } +}; + +static const uint8_t vp8_mv_default_prob[2][19] = { + { 162, + 128, + 225, 146, 172, 147, 214, 39, 156, + 128, 129, 132, 75, 145, 178, 206, 239, 254, 254 }, + { 164, + 128, + 204, 170, 119, 235, 140, 230, 228, + 128, 130, 130, 74, 148, 180, 203, 236, 254, 254 } +}; diff -r 7d04a6cec75f -r f2007d7c3f1d vp8dsp.c --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vp8dsp.c Tue Jun 22 19:24:09 2010 +0000 @@ -0,0 +1,373 @@ +/** + * VP8 compatible video decoder + * + * Copyright (C) 2010 David Conrad + * Copyright (C) 2010 Ronald S. Bultje + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dsputil.h" +#include "vp8dsp.h" + +// TODO: Maybe add dequant +static void vp8_luma_dc_wht_c(DCTELEM block[4][4][16], DCTELEM dc[16]) +{ + int i, t0, t1, t2, t3; + + for (i = 0; i < 4; i++) { + t0 = dc[0*4+i] + dc[3*4+i]; + t1 = dc[1*4+i] + dc[2*4+i]; + t2 = dc[1*4+i] - dc[2*4+i]; + t3 = dc[0*4+i] - dc[3*4+i]; + + dc[0*4+i] = t0 + t1; + dc[1*4+i] = t3 + t2; + dc[2*4+i] = t0 - t1; + dc[3*4+i] = t3 - t2; + } + + for (i = 0; i < 4; i++) { + t0 = dc[i*4+0] + dc[i*4+3] + 3; // rounding + t1 = dc[i*4+1] + dc[i*4+2]; + t2 = dc[i*4+1] - dc[i*4+2]; + t3 = dc[i*4+0] - dc[i*4+3] + 3; // rounding + + *block[i][0] = (t0 + t1) >> 3; + *block[i][1] = (t3 + t2) >> 3; + *block[i][2] = (t0 - t1) >> 3; + *block[i][3] = (t3 - t2) >> 3; + } +} + + +#define MUL_20091(a) ((((a)*20091) >> 16) + (a)) +#define MUL_35468(a) (((a)*35468) >> 16) + +static void vp8_idct_add_c(uint8_t *dst, DCTELEM block[16], int stride) +{ + int i, t0, t1, t2, t3; + DCTELEM tmp[16]; + + for (i = 0; i < 4; i++) { + t0 = block[0*4+i] + block[2*4+i]; + t1 = block[0*4+i] - block[2*4+i]; + t2 = MUL_35468(block[1*4+i]) - MUL_20091(block[3*4+i]); + t3 = MUL_20091(block[1*4+i]) + MUL_35468(block[3*4+i]); + + tmp[i*4+0] = t0 + t3; + tmp[i*4+1] = t1 + t2; + tmp[i*4+2] = t1 - t2; + tmp[i*4+3] = t0 - t3; + } + + for (i = 0; i < 4; i++) { + t0 = tmp[0*4+i] + tmp[2*4+i]; + t1 = tmp[0*4+i] - tmp[2*4+i]; + t2 = MUL_35468(tmp[1*4+i]) - MUL_20091(tmp[3*4+i]); + t3 = MUL_20091(tmp[1*4+i]) + MUL_35468(tmp[3*4+i]); + + dst[0] = av_clip_uint8(dst[0] + ((t0 + t3 + 4) >> 3)); + dst[1] = av_clip_uint8(dst[1] + ((t1 + t2 + 4) >> 3)); + dst[2] = av_clip_uint8(dst[2] + ((t1 - t2 + 4) >> 3)); + dst[3] = av_clip_uint8(dst[3] + ((t0 - t3 + 4) >> 3)); + dst += stride; + } +} + +static void vp8_idct_dc_add_c(uint8_t *dst, DCTELEM block[16], int stride) +{ + int i, dc = (block[0] + 4) >> 3; + + for (i = 0; i < 4; i++) { + dst[0] = av_clip_uint8(dst[0] + dc); + dst[1] = av_clip_uint8(dst[1] + dc); + dst[2] = av_clip_uint8(dst[2] + dc); + dst[3] = av_clip_uint8(dst[3] + dc); + dst += stride; + } +} + + +// because I like only having two parameters to pass functions... +#define LOAD_PIXELS\ + int av_unused p3 = p[-4*stride];\ + int av_unused p2 = p[-3*stride];\ + int av_unused p1 = p[-2*stride];\ + int av_unused p0 = p[-1*stride];\ + int av_unused q0 = p[ 0*stride];\ + int av_unused q1 = p[ 1*stride];\ + int av_unused q2 = p[ 2*stride];\ + int av_unused q3 = p[ 3*stride]; + +static av_always_inline void filter_common(uint8_t *p, int stride, int is4tap) +{ + LOAD_PIXELS + int a, f1, f2; + + a = 3*(q0 - p0); + + if (is4tap) + a += av_clip_int8(p1 - q1); + + a = av_clip_int8(a); + + // We deviate from the spec here with c(a+3) >> 3 + // since that's what libvpx does. + f1 = FFMIN(a+4, 127) >> 3; + f2 = FFMIN(a+3, 127) >> 3; + + // Despite what the spec says, we do need to clamp here to + // be bitexact with libvpx. + p[-1*stride] = av_clip_uint8(p0 + f2); + p[ 0*stride] = av_clip_uint8(q0 - f1); + + // only used for _inner on blocks without high edge variance + if (!is4tap) { + a = (f1+1)>>1; + p[-2*stride] = av_clip_uint8(p1 + a); + p[ 1*stride] = av_clip_uint8(q1 - a); + } +} + +static av_always_inline int simple_limit(uint8_t *p, int stride, int flim) +{ + LOAD_PIXELS + return 2*FFABS(p0-q0) + (FFABS(p1-q1) >> 1) <= flim; +} + +/** + * E - limit at the macroblock edge + * I - limit for interior difference + */ +static av_always_inline int normal_limit(uint8_t *p, int stride, int E, int I) +{ + LOAD_PIXELS + return simple_limit(p, stride, 2*E+I) + && FFABS(p3-p2) <= I && FFABS(p2-p1) <= I && FFABS(p1-p0) <= I + && FFABS(q3-q2) <= I && FFABS(q2-q1) <= I && FFABS(q1-q0) <= I; +} + +// high edge variance +static av_always_inline int hev(uint8_t *p, int stride, int thresh) +{ + LOAD_PIXELS + return FFABS(p1-p0) > thresh || FFABS(q1-q0) > thresh; +} + +static av_always_inline void filter_mbedge(uint8_t *p, int stride) +{ + int a0, a1, a2, w; + + LOAD_PIXELS + + w = av_clip_int8(p1-q1); + w = av_clip_int8(w + 3*(q0-p0)); + + a0 = (27*w + 63) >> 7; + a1 = (18*w + 63) >> 7; + a2 = ( 9*w + 63) >> 7; + + p[-3*stride] = av_clip_uint8(p2 + a2); + p[-2*stride] = av_clip_uint8(p1 + a1); + p[-1*stride] = av_clip_uint8(p0 + a0); + p[ 0*stride] = av_clip_uint8(q0 - a0); + p[ 1*stride] = av_clip_uint8(q1 - a1); + p[ 2*stride] = av_clip_uint8(q2 - a2); +} + +#define LOOP_FILTER(dir, size, stridea, strideb) \ +static void vp8_ ## dir ## _loop_filter ## size ## _c(uint8_t *dst, int stride,\ + int flim_E, int flim_I, int hev_thresh)\ +{\ + int i;\ +\ + for (i = 0; i < size; i++)\ + if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ + if (hev(dst+i*stridea, strideb, hev_thresh))\ + filter_common(dst+i*stridea, strideb, 1);\ + else\ + filter_mbedge(dst+i*stridea, strideb);\ + }\ +}\ +\ +static void vp8_ ## dir ## _loop_filter ## size ## _inner_c(uint8_t *dst, int stride,\ + int flim_E, int flim_I, int hev_thresh)\ +{\ + int i, hv;\ +\ + for (i = 0; i < size; i++)\ + if (normal_limit(dst+i*stridea, strideb, flim_E, flim_I)) {\ + hv = hev(dst+i*stridea, strideb, hev_thresh);\ + filter_common(dst+i*stridea, strideb, hv);\ + }\ +} + +LOOP_FILTER(v, 16, 1, stride) +LOOP_FILTER(h, 16, stride, 1) +LOOP_FILTER(v, 8, 1, stride) +LOOP_FILTER(h, 8, stride, 1) + +static void vp8_v_loop_filter_simple_c(uint8_t *dst, int stride, int flim) +{ + int i; + + for (i = 0; i < 16; i++) + if (simple_limit(dst+i, stride, flim)) + filter_common(dst+i, stride, 1); +} + +static void vp8_h_loop_filter_simple_c(uint8_t *dst, int stride, int flim) +{ + int i; + + for (i = 0; i < 16; i++) + if (simple_limit(dst+i*stride, 1, flim)) + filter_common(dst+i*stride, 1, 1); +} + +static const uint8_t subpel_filters[7][6] = { + { 0, 6, 123, 12, 1, 0 }, + { 2, 11, 108, 36, 8, 1 }, + { 0, 9, 93, 50, 6, 0 }, + { 3, 16, 77, 77, 16, 3 }, + { 0, 6, 50, 93, 9, 0 }, + { 1, 8, 36, 108, 11, 2 }, + { 0, 1, 12, 123, 6, 0 }, +}; + + +#define FILTER_6TAP(src, F, stride) \ + av_clip_uint8((F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + F[0]*src[x-2*stride] + \ + F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + F[5]*src[x+3*stride] + 64) >> 7) + +#define FILTER_4TAP(src, F, stride) \ + av_clip_uint8((F[2]*src[x+0*stride] - F[1]*src[x-1*stride] + \ + F[3]*src[x+1*stride] - F[4]*src[x+2*stride] + 64) >> 7) + +#define VP8_EPEL_H(SIZE, FILTER, FILTERNAME) \ +static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, uint8_t *src, int stride, int h, int mx, int my) \ +{ \ + const uint8_t *filter = subpel_filters[mx-1]; \ + int x, y; \ +\ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = FILTER(src, filter, 1); \ + dst += stride; \ + src += stride; \ + } \ +} +#define VP8_EPEL_V(SIZE, FILTER, FILTERNAME) \ +static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, uint8_t *src, int stride, int h, int mx, int my) \ +{ \ + const uint8_t *filter = subpel_filters[my-1]; \ + int x, y; \ +\ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = FILTER(src, filter, stride); \ + dst += stride; \ + src += stride; \ + } \ +} +#define VP8_EPEL_HV(SIZE, FILTERX, FILTERY, FILTERNAME) \ +static void put_vp8_epel ## SIZE ## _ ## FILTERNAME ## _c(uint8_t *dst, uint8_t *src, int stride, int h, int mx, int my) \ +{ \ + const uint8_t *filter = subpel_filters[mx-1]; \ + int x, y; \ + uint8_t tmp_array[(2*SIZE+5)*SIZE]; \ + uint8_t *tmp = tmp_array; \ + src -= 2*stride; \ +\ + for (y = 0; y < h+5; y++) { \ + for (x = 0; x < SIZE; x++) \ + tmp[x] = FILTERX(src, filter, 1); \ + tmp += SIZE; \ + src += stride; \ + } \ +\ + tmp = tmp_array + 2*SIZE; \ + filter = subpel_filters[my-1]; \ +\ + for (y = 0; y < h; y++) { \ + for (x = 0; x < SIZE; x++) \ + dst[x] = FILTERY(tmp, filter, SIZE); \ + dst += stride; \ + tmp += SIZE; \ + } \ +} + +VP8_EPEL_H(16, FILTER_4TAP, h4) +VP8_EPEL_H(8, FILTER_4TAP, h4) +VP8_EPEL_H(4, FILTER_4TAP, h4) +VP8_EPEL_H(16, FILTER_6TAP, h6) +VP8_EPEL_H(8, FILTER_6TAP, h6) +VP8_EPEL_H(4, FILTER_6TAP, h6) +VP8_EPEL_V(16, FILTER_4TAP, v4) +VP8_EPEL_V(8, FILTER_4TAP, v4) +VP8_EPEL_V(4, FILTER_4TAP, v4) +VP8_EPEL_V(16, FILTER_6TAP, v6) +VP8_EPEL_V(8, FILTER_6TAP, v6) +VP8_EPEL_V(4, FILTER_6TAP, v6) +VP8_EPEL_HV(16, FILTER_4TAP, FILTER_4TAP, h4v4) +VP8_EPEL_HV(8, FILTER_4TAP, FILTER_4TAP, h4v4) +VP8_EPEL_HV(4, FILTER_4TAP, FILTER_4TAP, h4v4) +VP8_EPEL_HV(16, FILTER_4TAP, FILTER_6TAP, h4v6) +VP8_EPEL_HV(8, FILTER_4TAP, FILTER_6TAP, h4v6) +VP8_EPEL_HV(4, FILTER_4TAP, FILTER_6TAP, h4v6) +VP8_EPEL_HV(16, FILTER_6TAP, FILTER_4TAP, h6v4) +VP8_EPEL_HV(8, FILTER_6TAP, FILTER_4TAP, h6v4) +VP8_EPEL_HV(4, FILTER_6TAP, FILTER_4TAP, h6v4) +VP8_EPEL_HV(16, FILTER_6TAP, FILTER_6TAP, h6v6) +VP8_EPEL_HV(8, FILTER_6TAP, FILTER_6TAP, h6v6) +VP8_EPEL_HV(4, FILTER_6TAP, FILTER_6TAP, h6v6) + +#define VP8_MC_FUNC(IDX, SIZE) \ + dsp->put_vp8_epel_pixels_tab[IDX][0][0] = ff_put_vp8_pixels ## SIZE ## _c; \ + dsp->put_vp8_epel_pixels_tab[IDX][0][1] = put_vp8_epel ## SIZE ## _h4_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][0][2] = put_vp8_epel ## SIZE ## _h6_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][1][0] = put_vp8_epel ## SIZE ## _v4_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][1][1] = put_vp8_epel ## SIZE ## _h4v4_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][1][2] = put_vp8_epel ## SIZE ## _h6v4_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][2][0] = put_vp8_epel ## SIZE ## _v6_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][2][1] = put_vp8_epel ## SIZE ## _h4v6_c; \ + dsp->put_vp8_epel_pixels_tab[IDX][2][2] = put_vp8_epel ## SIZE ## _h6v6_c + +av_cold void ff_vp8dsp_init(VP8DSPContext *dsp) +{ + dsp->vp8_luma_dc_wht = vp8_luma_dc_wht_c; + dsp->vp8_idct_add = vp8_idct_add_c; + dsp->vp8_idct_dc_add = vp8_idct_dc_add_c; + + dsp->vp8_v_loop_filter16 = vp8_v_loop_filter16_c; + dsp->vp8_h_loop_filter16 = vp8_h_loop_filter16_c; + dsp->vp8_v_loop_filter8 = vp8_v_loop_filter8_c; + dsp->vp8_h_loop_filter8 = vp8_h_loop_filter8_c; + + dsp->vp8_v_loop_filter16_inner = vp8_v_loop_filter16_inner_c; + dsp->vp8_h_loop_filter16_inner = vp8_h_loop_filter16_inner_c; + dsp->vp8_v_loop_filter8_inner = vp8_v_loop_filter8_inner_c; + dsp->vp8_h_loop_filter8_inner = vp8_h_loop_filter8_inner_c; + + dsp->vp8_v_loop_filter_simple = vp8_v_loop_filter_simple_c; + dsp->vp8_h_loop_filter_simple = vp8_h_loop_filter_simple_c; + + VP8_MC_FUNC(0, 16); + VP8_MC_FUNC(1, 8); + VP8_MC_FUNC(2, 4); +} diff -r 7d04a6cec75f -r f2007d7c3f1d vp8dsp.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/vp8dsp.h Tue Jun 22 19:24:09 2010 +0000 @@ -0,0 +1,67 @@ +/** + * VP8 compatible video decoder + * + * Copyright (C) 2010 David Conrad + * Copyright (C) 2010 Ronald S. Bultje + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + + +#ifndef AVCODEC_VP8DSP_H +#define AVCODEC_VP8DSP_H + +#include "dsputil.h" + +typedef struct VP8DSPContext { + void (*vp8_luma_dc_wht)(DCTELEM block[4][4][16], DCTELEM dc[16]); + void (*vp8_idct_add)(uint8_t *dst, DCTELEM block[16], int stride); + void (*vp8_idct_dc_add)(uint8_t *dst, DCTELEM block[16], int stride); + + // loop filter applied to edges between macroblocks + void (*vp8_v_loop_filter16)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh); + void (*vp8_h_loop_filter16)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh); + void (*vp8_v_loop_filter8)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh); + void (*vp8_h_loop_filter8)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh); + + // loop filter applied to inner macroblock edges + void (*vp8_v_loop_filter16_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh); + void (*vp8_h_loop_filter16_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh); + void (*vp8_v_loop_filter8_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh); + void (*vp8_h_loop_filter8_inner)(uint8_t *dst, int stride, int flim_E, int flim_I, int hev_thresh); + + void (*vp8_v_loop_filter_simple)(uint8_t *dst, int stride, int flim); + void (*vp8_h_loop_filter_simple)(uint8_t *dst, int stride, int flim); + + /** + * first dimension: width>>3, height is assumed equal to width + * second dimension: 0 if no vertical interpolation is needed; + * 1 4-tap vertical interpolation filter (my & 1) + * 2 6-tap vertical interpolation filter (!(my & 1)) + * third dimension: same as second dimention, for horizontal interpolation + * so something like put_vp8_epel_pixels_tab[width>>3][2*!!my-(my&1)][2*!!mx-(mx&1)](..., mx, my) + */ + h264_chroma_mc_func put_vp8_epel_pixels_tab[3][3][3]; +} VP8DSPContext; + +void ff_put_vp8_pixels16_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); +void ff_put_vp8_pixels8_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); +void ff_put_vp8_pixels4_c(uint8_t *dst, uint8_t *src, int stride, int h, int x, int y); + +void ff_vp8dsp_init(VP8DSPContext *c); + +#endif /* AVCODEC_VP8DSP_H */