Mercurial > libavcodec.hg
comparison vp8.c @ 12253:112b3a0db187 libavcodec
Decode DCT tokens by branching to a different code path for each branch
on the huffman tree, instead of traversing the tree in a while loop.
Based on the similar optimization in libvpx's detokenize.c
10% faster at normal bitrates, and 30% faster for high-bitrate intra-only
author | conrad |
---|---|
date | Fri, 23 Jul 2010 21:46:17 +0000 |
parents | 35ee666e4496 |
children | 17c151e1280a |
comparison
equal
deleted
inserted
replaced
12252:b8211cda076d | 12253:112b3a0db187 |
---|---|
798 */ | 798 */ |
799 static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], | 799 static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], |
800 uint8_t probs[8][3][NUM_DCT_TOKENS-1], | 800 uint8_t probs[8][3][NUM_DCT_TOKENS-1], |
801 int i, int zero_nhood, int16_t qmul[2]) | 801 int i, int zero_nhood, int16_t qmul[2]) |
802 { | 802 { |
803 int token, nonzero = 0; | 803 uint8_t *token_prob; |
804 int offset = 0; | 804 int nonzero = 0; |
805 | 805 int coeff; |
806 for (; i < 16; i++) { | 806 |
807 token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset); | 807 do { |
808 | 808 token_prob = probs[vp8_coeff_band[i]][zero_nhood]; |
809 if (token == DCT_EOB) | 809 |
810 break; | 810 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB |
811 else if (token >= DCT_CAT1) { | 811 return nonzero; |
812 int cat = token-DCT_CAT1; | 812 |
813 token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); | 813 skip_eob: |
814 token += 3 + (2<<cat); | 814 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 |
815 } | |
816 | |
817 // after the first token, the non-zero prediction context becomes | |
818 // based on the last decoded coeff | |
819 if (!token) { | |
820 zero_nhood = 0; | 815 zero_nhood = 0; |
821 offset = 1; | 816 token_prob = probs[vp8_coeff_band[++i]][0]; |
822 continue; | 817 if (i < 16) |
823 } else if (token == 1) | 818 goto skip_eob; |
819 return nonzero; // invalid input; blocks should end with EOB | |
820 } | |
821 | |
822 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 | |
823 coeff = 1; | |
824 zero_nhood = 1; | 824 zero_nhood = 1; |
825 else | 825 } else { |
826 zero_nhood = 2; | 826 zero_nhood = 2; |
827 | 827 |
828 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4 | |
829 coeff = vp56_rac_get_prob(c, token_prob[4]); | |
830 if (coeff) | |
831 coeff += vp56_rac_get_prob(c, token_prob[5]); | |
832 coeff += 2; | |
833 } else { | |
834 // DCT_CAT* | |
835 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) { | |
836 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1 | |
837 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]); | |
838 } else { // DCT_CAT2 | |
839 coeff = 7; | |
840 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1; | |
841 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]); | |
842 } | |
843 } else { // DCT_CAT3 and up | |
844 int a = vp56_rac_get_prob(c, token_prob[8]); | |
845 int b = vp56_rac_get_prob(c, token_prob[9+a]); | |
846 int cat = (a<<1) + b; | |
847 coeff = 3 + (8<<cat); | |
848 coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); | |
849 } | |
850 } | |
851 } | |
852 | |
828 // todo: full [16] qmat? load into register? | 853 // todo: full [16] qmat? load into register? |
829 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i]; | 854 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i]; |
830 nonzero = i+1; | 855 nonzero = ++i; |
831 offset = 0; | 856 } while (i < 16); |
832 } | 857 |
833 return nonzero; | 858 return nonzero; |
834 } | 859 } |
835 | 860 |
836 static av_always_inline | 861 static av_always_inline |
837 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, | 862 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, |