comparison vp8.c @ 12360:18117b5bb7dc libavcodec

VP8: simplify decode_block_coeffs to avoid having to track nonzero coeffs Slightly faster.
author darkshikari
date Wed, 04 Aug 2010 01:38:08 +0000
parents d596749eb0bc
children a66d6456df90
comparison
equal deleted inserted replaced
12359:565ccbf26015 12360:18117b5bb7dc
824 static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16], 824 static int decode_block_coeffs(VP56RangeCoder *c, DCTELEM block[16],
825 uint8_t probs[8][3][NUM_DCT_TOKENS-1], 825 uint8_t probs[8][3][NUM_DCT_TOKENS-1],
826 int i, int zero_nhood, int16_t qmul[2]) 826 int i, int zero_nhood, int16_t qmul[2])
827 { 827 {
828 uint8_t *token_prob = probs[i][zero_nhood]; 828 uint8_t *token_prob = probs[i][zero_nhood];
829 int nonzero = 0;
830 int coeff; 829 int coeff;
830
831 if (!vp56_rac_get_prob_branchy(c, token_prob[0]))
832 return 0;
833 goto skip_eob;
831 834
832 do { 835 do {
833 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB 836 if (!vp56_rac_get_prob_branchy(c, token_prob[0])) // DCT_EOB
834 return nonzero; 837 return i;
835 838
836 skip_eob: 839 skip_eob:
837 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0 840 if (!vp56_rac_get_prob_branchy(c, token_prob[1])) { // DCT_0
838 if (++i == 16) 841 if (++i == 16)
839 return nonzero; // invalid input; blocks should end with EOB 842 return i; // invalid input; blocks should end with EOB
840 token_prob = probs[i][0]; 843 token_prob = probs[i][0];
841 goto skip_eob; 844 goto skip_eob;
842 } 845 }
843 846
844 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1 847 if (!vp56_rac_get_prob_branchy(c, token_prob[2])) { // DCT_1
868 coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]); 871 coeff += vp8_rac_get_coeff(c, vp8_dct_cat_prob[cat]);
869 } 872 }
870 } 873 }
871 token_prob = probs[i+1][2]; 874 token_prob = probs[i+1][2];
872 } 875 }
873
874 // todo: full [16] qmat? load into register?
875 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i]; 876 block[zigzag_scan[i]] = (vp8_rac_get(c) ? -coeff : coeff) * qmul[!!i];
876 nonzero = ++i; 877 } while (++i < 16);
877 } while (i < 16); 878
878 879 return i;
879 return nonzero;
880 } 880 }
881 881
882 static av_always_inline 882 static av_always_inline
883 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb, 883 void decode_mb_coeffs(VP8Context *s, VP56RangeCoder *c, VP8Macroblock *mb,
884 uint8_t t_nnz[9], uint8_t l_nnz[9]) 884 uint8_t t_nnz[9], uint8_t l_nnz[9])
908 } 908 }
909 909
910 // luma blocks 910 // luma blocks
911 for (y = 0; y < 4; y++) 911 for (y = 0; y < 4; y++)
912 for (x = 0; x < 4; x++) { 912 for (x = 0; x < 4; x++) {
913 nnz_pred = l_nnz[y] + t_nnz[x]; 913 nnz_pred = l_nnz[y] + t_nnz[x];START_TIMER;
914 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, 914 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start,
915 nnz_pred, s->qmat[segment].luma_qmul); 915 nnz_pred, s->qmat[segment].luma_qmul);STOP_TIMER("test");
916 // nnz+block_dc may be one more than the actual last index, but we don't care 916 // nnz+block_dc may be one more than the actual last index, but we don't care
917 s->non_zero_count_cache[y][x] = nnz + block_dc; 917 s->non_zero_count_cache[y][x] = nnz + block_dc;
918 t_nnz[x] = l_nnz[y] = !!nnz; 918 t_nnz[x] = l_nnz[y] = !!nnz;
919 nnz_total += nnz; 919 nnz_total += nnz;
920 } 920 }