# HG changeset patch # User mru # Date 1263564172 0 # Node ID 966a8afdd9aa0c06dbab8124b42aca815ab9169b # Parent e3e466b6bc5ef8a301d47af99b2741884f146c1e AAC: optimise bitstream reading in decode_spectrum_and_dequant() Using the low-level macros directly avoids redundant open/update/close cycles. 2-3% faster on ARM, PPC, and Core i7. diff -r e3e466b6bc5e -r 966a8afdd9aa aac.c --- a/aac.c Fri Jan 15 11:03:40 2010 +0000 +++ b/aac.c Fri Jan 15 14:02:52 2010 +0000 @@ -993,6 +993,7 @@ const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1]; VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table; const int cb_size = ff_aac_spectral_sizes[cbt_m1]; + OPEN_READER(re, gb); switch (cbt_m1 >> 1) { case 0: @@ -1001,15 +1002,18 @@ int len = off_len; do { - const int index = get_vlc2(gb, vlc_tab, 8, 2); + int code; unsigned cb_idx; - if (index >= cb_size) { - err_idx = index; + UPDATE_CACHE(re, gb); + GET_VLC(code, re, gb, vlc_tab, 8, 2); + + if (code >= cb_size) { + err_idx = code; goto err_cb_overflow; } - cb_idx = cb_vector_idx[index]; + cb_idx = cb_vector_idx[code]; cf = VMUL4(cf, vq, cb_idx, sf + idx); } while (len -= 4); } @@ -1021,19 +1025,26 @@ int len = off_len; do { - const int index = get_vlc2(gb, vlc_tab, 8, 2); + int code; unsigned nnz; unsigned cb_idx; uint32_t bits; - if (index >= cb_size) { - err_idx = index; + UPDATE_CACHE(re, gb); + GET_VLC(code, re, gb, vlc_tab, 8, 2); + + if (code >= cb_size) { + err_idx = code; goto err_cb_overflow; } - cb_idx = cb_vector_idx[index]; +#if MIN_CACHE_BITS < 20 + UPDATE_CACHE(re, gb); +#endif + cb_idx = cb_vector_idx[code]; nnz = cb_idx >> 8 & 15; - bits = get_bits(gb, nnz) << (32-nnz); + bits = SHOW_UBITS(re, gb, nnz) << (32-nnz); + LAST_SKIP_BITS(re, gb, nnz); cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx); } while (len -= 4); } @@ -1045,15 +1056,18 @@ int len = off_len; do { - const int index = get_vlc2(gb, vlc_tab, 8, 2); + int code; unsigned cb_idx; - if (index >= cb_size) { - err_idx = index; + UPDATE_CACHE(re, gb); + GET_VLC(code, re, gb, vlc_tab, 8, 2); + + if (code >= cb_size) { + err_idx = code; goto err_cb_overflow; } - cb_idx = cb_vector_idx[index]; + cb_idx = cb_vector_idx[code]; cf = VMUL2(cf, vq, cb_idx, sf + idx); } while (len -= 2); } @@ -1066,19 +1080,23 @@ int len = off_len; do { - const int index = get_vlc2(gb, vlc_tab, 8, 2); + int code; unsigned nnz; unsigned cb_idx; unsigned sign; - if (index >= cb_size) { - err_idx = index; + UPDATE_CACHE(re, gb); + GET_VLC(code, re, gb, vlc_tab, 8, 2); + + if (code >= cb_size) { + err_idx = code; goto err_cb_overflow; } - cb_idx = cb_vector_idx[index]; + cb_idx = cb_vector_idx[code]; nnz = cb_idx >> 8 & 15; - sign = get_bits(gb, nnz) << (cb_idx >> 12); + sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12); + LAST_SKIP_BITS(re, gb, nnz); cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx); } while (len -= 2); } @@ -1091,39 +1109,56 @@ int len = off_len; do { - const int index = get_vlc2(gb, vlc_tab, 8, 2); + int code; unsigned nzt, nnz; unsigned cb_idx; uint32_t bits; int j; - if (!index) { + UPDATE_CACHE(re, gb); + GET_VLC(code, re, gb, vlc_tab, 8, 2); + + if (!code) { *icf++ = 0; *icf++ = 0; continue; } - if (index >= cb_size) { - err_idx = index; + if (code >= cb_size) { + err_idx = code; goto err_cb_overflow; } - cb_idx = cb_vector_idx[index]; + cb_idx = cb_vector_idx[code]; nnz = cb_idx >> 12; nzt = cb_idx >> 8; - bits = get_bits(gb, nnz) << (32-nnz); + bits = SHOW_UBITS(re, gb, nnz) << (32-nnz); + LAST_SKIP_BITS(re, gb, nnz); for (j = 0; j < 2; j++) { if (nzt & 1< 8) { av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n"); return -1; } - n = (1 << n) + get_bits(gb, n); + +#if MIN_CACHE_BITS < 21 + LAST_SKIP_BITS(re, gb, b + 1); + UPDATE_CACHE(re, gb); +#else + SKIP_BITS(re, gb, b + 1); +#endif + b += 4; + n = (1 << b) + SHOW_UBITS(re, gb, b); + LAST_SKIP_BITS(re, gb, b); *icf++ = cbrt_tab[n] | (bits & 1<<31); bits <<= 1; } else { @@ -1138,6 +1173,8 @@ ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); } } + + CLOSE_READER(re, gb); } } coef += g_len << 7;