Mercurial > libavcodec.hg
comparison vp8.c @ 12342:b4c63ffd959b libavcodec
VP8: much faster DC transform handling
A lot of the time the DC block is empty: don't do the WHT in this case.
A lot of the rest of the time, there's only one coefficient: make a special
DC-only transform for that case.
When the block is empty, don't incorrectly mark luma DCT blocks as having DC
coefficients.
author | darkshikari |
---|---|
date | Mon, 02 Aug 2010 20:57:03 +0000 |
parents | 2d15f62f4f8a |
children | a18ab740d2db |
comparison
equal
deleted
inserted
replaced
12341:ad24cca213ae | 12342:b4c63ffd959b |
---|---|
866 uint8_t t_nnz[9], uint8_t l_nnz[9]) | 866 uint8_t t_nnz[9], uint8_t l_nnz[9]) |
867 { | 867 { |
868 int i, x, y, luma_start = 0, luma_ctx = 3; | 868 int i, x, y, luma_start = 0, luma_ctx = 3; |
869 int nnz_pred, nnz, nnz_total = 0; | 869 int nnz_pred, nnz, nnz_total = 0; |
870 int segment = s->segment; | 870 int segment = s->segment; |
871 int block_dc = 0; | |
871 | 872 |
872 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { | 873 if (mb->mode != MODE_I4x4 && mb->mode != VP8_MVMODE_SPLIT) { |
873 nnz_pred = t_nnz[8] + l_nnz[8]; | 874 nnz_pred = t_nnz[8] + l_nnz[8]; |
874 | 875 |
875 // decode DC values and do hadamard | 876 // decode DC values and do hadamard |
876 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred, | 877 nnz = decode_block_coeffs(c, s->block_dc, s->prob->token[1], 0, nnz_pred, |
877 s->qmat[segment].luma_dc_qmul); | 878 s->qmat[segment].luma_dc_qmul); |
878 l_nnz[8] = t_nnz[8] = !!nnz; | 879 l_nnz[8] = t_nnz[8] = !!nnz; |
879 nnz_total += nnz; | 880 if (nnz) { |
880 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc); | 881 nnz_total += nnz; |
882 block_dc = 1; | |
883 if (nnz == 1) | |
884 s->vp8dsp.vp8_luma_dc_wht_dc(s->block, s->block_dc); | |
885 else | |
886 s->vp8dsp.vp8_luma_dc_wht(s->block, s->block_dc); | |
887 } | |
881 luma_start = 1; | 888 luma_start = 1; |
882 luma_ctx = 0; | 889 luma_ctx = 0; |
883 } | 890 } |
884 | 891 |
885 // luma blocks | 892 // luma blocks |
886 for (y = 0; y < 4; y++) | 893 for (y = 0; y < 4; y++) |
887 for (x = 0; x < 4; x++) { | 894 for (x = 0; x < 4; x++) { |
888 nnz_pred = l_nnz[y] + t_nnz[x]; | 895 nnz_pred = l_nnz[y] + t_nnz[x]; |
889 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, | 896 nnz = decode_block_coeffs(c, s->block[y][x], s->prob->token[luma_ctx], luma_start, |
890 nnz_pred, s->qmat[segment].luma_qmul); | 897 nnz_pred, s->qmat[segment].luma_qmul); |
891 // nnz+luma_start may be one more than the actual last index, but we don't care | 898 // nnz+block_dc may be one more than the actual last index, but we don't care |
892 s->non_zero_count_cache[y][x] = nnz + luma_start; | 899 s->non_zero_count_cache[y][x] = nnz + block_dc; |
893 t_nnz[x] = l_nnz[y] = !!nnz; | 900 t_nnz[x] = l_nnz[y] = !!nnz; |
894 nnz_total += nnz; | 901 nnz_total += nnz; |
895 } | 902 } |
896 | 903 |
897 // chroma blocks | 904 // chroma blocks |