Mercurial > libavcodec.hg
diff vp8.c @ 12253:112b3a0db187 libavcodec
Decode DCT tokens by branching to a different code path for each branch
on the huffman tree, instead of traversing the tree in a while loop.
Based on the similar optimization in libvpx's detokenize.c
10% faster at normal bitrates, and 30% faster for high-bitrate intra-only
author | conrad |
---|---|
date | Fri, 23 Jul 2010 21:46:17 +0000 |
parents | 35ee666e4496 |
children | 17c151e1280a |
line wrap: on
line diff
--- a/vp8.c Fri Jul 23 21:46:14 2010 +0000 +++ b/vp8.c Fri Jul 23 21:46:17 2010 +0000 @@ -800,36 +800,61 @@ uint8_t probs[8][3][NUM_DCT_TOKENS-1], int i, int zero_nhood, int16_t qmul[2]) { - int token, nonzero = 0; - int offset = 0; + uint8_t *token_prob; + int nonzero = 0; + int coeff; - for (; i < 16; i++) { - token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset); + do { + token_prob = probs[vp8_coeff_band[i]][zero_nhood]; - if (token == DCT_EOB) - break; - else if (token >= DCT_CAT1) { - int cat = token-DCT_CAT1; - token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); - token += 3 + (2<<cat); + if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB + return nonzero; + +skip_eob: + if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 + zero_nhood = 0; + token_prob = probs[vp8_coeff_band[++i]][0]; + if (i < 16) + goto skip_eob; + return nonzero; // invalid input; blocks should end with EOB } - // after the first token, the non-zero prediction context becomes - // based on the last decoded coeff - if (!token) { - zero_nhood = 0; - offset = 1; - continue; - } else if (token == 1) + if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 + coeff = 1; zero_nhood = 1; - else + } else { zero_nhood = 2; + if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 + coeff = vp56_rac_get_prob(c, token_prob[4]); + if (coeff) + coeff += vp56_rac_get_prob(c, token_prob[5]); + coeff += 2; + } else { + // DCT_CAT* + if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { + if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 + coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); + } else { // DCT_CAT2 + coeff = 7; + coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; + coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]); + } + } else { // DCT_CAT3 and up + int a = vp56_rac_get_prob(c, token_prob[8]); + int b = vp56_rac_get_prob(c, token_prob[9+a]); + int cat = (a<<1) + b; + coeff = 3 + (8<<cat); + coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); + } + } + } + // todo: full [16] qmat? load into register? - block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i]; - nonzero = i+1; - offset = 0; - } + block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i]; + nonzero = ++i; + } while (i < 16); + return nonzero; }