comparison vp8.c @ 12253:112b3a0db187 libavcodec

Decode DCT tokens by branching to a different code path for each branch on the huffman tree, instead of traversing the tree in a while loop. Based on the similar optimization in libvpx's detokenize.c 10% faster at normal bitrates, and 30% faster for high-bitrate intra-only
author conrad
date Fri, 23 Jul 2010 21:46:17 +0000
parents 35ee666e4496
children 17c151e1280a
comparison
equal deleted inserted replaced
12252:b8211cda076d 12253:112b3a0db187
798 */ 798 */
799 static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], 799 static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
800 uint8_t probs[8][3][NUM_DCT_TOKENS-1], 800 uint8_t probs[8][3][NUM_DCT_TOKENS-1],
801 int i, int zero_nhood, int16_t qmul[2]) 801 int i, int zero_nhood, int16_t qmul[2])
802 { 802 {
803 int token, nonzero = 0; 803 uint8_t *token_prob;
804 int offset = 0; 804 int nonzero = 0;
805 805 int coeff;
806 for (; i < 16; i++) { 806
807 token = vp8_rac_get_tree_with_offset(c, vp8_coeff_tree, probs[vp8_coeff_band[i]][zero_nhood], offset); 807 do {
808 808 token_prob = probs[vp8_coeff_band[i]][zero_nhood];
809 if (token == DCT_EOB) 809
810 break; 810 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
811 else if (token >= DCT_CAT1) { 811 return nonzero;
812 int cat = token-DCT_CAT1; 812
813 token = vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); 813 skip_eob:
814 token += 3 + (2<<cat); 814 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
815 }
816
817 // after the first token, the non-zero prediction context becomes
818 // based on the last decoded coeff
819 if (!token) {
820 zero_nhood = 0; 815 zero_nhood = 0;
821 offset = 1; 816 token_prob = probs[vp8_coeff_band[++i]][0];
822 continue; 817 if (i < 16)
823 } else if (token == 1) 818 goto skip_eob;
819 return nonzero; // invalid input; blocks should end with EOB
820 }
821
822 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
823 coeff = 1;
824 zero_nhood = 1; 824 zero_nhood = 1;
825 else 825 } else {
826 zero_nhood = 2; 826 zero_nhood = 2;
827 827
828 if (!vp56_rac_get_prob_branchy(c, token_prob[3])) { // DCT 2,3,4
829 coeff = vp56_rac_get_prob(c, token_prob[4]);
830 if (coeff)
831 coeff += vp56_rac_get_prob(c, token_prob[5]);
832 coeff += 2;
833 } else {
834 // DCT_CAT*
835 if (!vp56_rac_get_prob_branchy(c, token_prob[6])) {
836 if (!vp56_rac_get_prob_branchy(c, token_prob[7])) { // DCT_CAT1
837 coeff = 5 + vp56_rac_get_prob(c, vp8_dct_cat1_prob[0]);
838 } else { // DCT_CAT2
839 coeff = 7;
840 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[0]) << 1;
841 coeff += vp56_rac_get_prob(c, vp8_dct_cat2_prob[1]);
842 }
843 } else { // DCT_CAT3 and up
844 int a = vp56_rac_get_prob(c, token_prob[8]);
845 int b = vp56_rac_get_prob(c, token_prob[9+a]);
846 int cat = (a<<1) + b;
847 coeff = 3 + (8<<cat);
848 coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]);
849 }
850 }
851 }
852
828 // todo: full [16] qmat? load into register? 853 // todo: full [16] qmat? load into register?
829 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -token : token) * qmul[!!i]; 854 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
830 nonzero = i+1; 855 nonzero = ++i;
831 offset = 0; 856 } while (i < 16);
832 } 857
833 return nonzero; 858 return nonzero;
834 } 859 }
835 860
836 static av_always_inline 861 static av_always_inline
837 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, 862 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,