Mercurial > libavcodec.hg
comparison aac.c @ 10886:966a8afdd9aa libavcodec
AAC: optimise bitstream reading in decode_spectrum_and_dequant()
Using the low-level macros directly avoids redundant open/update/close
cycles.
2-3% faster on ARM, PPC, and Core i7.
author | mru |
---|---|
date | Fri, 15 Jan 2010 14:02:52 +0000 |
parents | 798c62217427 |
children | 134644e36859 |
comparison
equal
deleted
inserted
replaced
10885:e3e466b6bc5e | 10886:966a8afdd9aa |
---|---|
991 } else { | 991 } else { |
992 const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; | 992 const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; |
993 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1]; | 993 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1]; |
994 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table; | 994 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table; |
995 const int cb_size = ff_aac_spectral_sizes[cbt_m1]; | 995 const int cb_size = ff_aac_spectral_sizes[cbt_m1]; |
996 OPEN_READER(re, gb); | |
996 | 997 |
997 switch (cbt_m1 >> 1) { | 998 switch (cbt_m1 >> 1) { |
998 case 0: | 999 case 0: |
999 for (group = 0; group < g_len; group++, cfo+=128) { | 1000 for (group = 0; group < g_len; group++, cfo+=128) { |
1000 float *cf = cfo; | 1001 float *cf = cfo; |
1001 int len = off_len; | 1002 int len = off_len; |
1002 | 1003 |
1003 do { | 1004 do { |
1004 const int index = get_vlc2(gb, vlc_tab, 8, 2); | 1005 int code; |
1005 unsigned cb_idx; | 1006 unsigned cb_idx; |
1006 | 1007 |
1007 if (index >= cb_size) { | 1008 UPDATE_CACHE(re, gb); |
1008 err_idx = index; | 1009 GET_VLC(code, re, gb, vlc_tab, 8, 2); |
1010 | |
1011 if (code >= cb_size) { | |
1012 err_idx = code; | |
1009 goto err_cb_overflow; | 1013 goto err_cb_overflow; |
1010 } | 1014 } |
1011 | 1015 |
1012 cb_idx = cb_vector_idx[index]; | 1016 cb_idx = cb_vector_idx[code]; |
1013 cf = VMUL4(cf, vq, cb_idx, sf + idx); | 1017 cf = VMUL4(cf, vq, cb_idx, sf + idx); |
1014 } while (len -= 4); | 1018 } while (len -= 4); |
1015 } | 1019 } |
1016 break; | 1020 break; |
1017 | 1021 |
1019 for (group = 0; group < g_len; group++, cfo+=128) { | 1023 for (group = 0; group < g_len; group++, cfo+=128) { |
1020 float *cf = cfo; | 1024 float *cf = cfo; |
1021 int len = off_len; | 1025 int len = off_len; |
1022 | 1026 |
1023 do { | 1027 do { |
1024 const int index = get_vlc2(gb, vlc_tab, 8, 2); | 1028 int code; |
1025 unsigned nnz; | 1029 unsigned nnz; |
1026 unsigned cb_idx; | 1030 unsigned cb_idx; |
1027 uint32_t bits; | 1031 uint32_t bits; |
1028 | 1032 |
1029 if (index >= cb_size) { | 1033 UPDATE_CACHE(re, gb); |
1030 err_idx = index; | 1034 GET_VLC(code, re, gb, vlc_tab, 8, 2); |
1035 | |
1036 if (code >= cb_size) { | |
1037 err_idx = code; | |
1031 goto err_cb_overflow; | 1038 goto err_cb_overflow; |
1032 } | 1039 } |
1033 | 1040 |
1034 cb_idx = cb_vector_idx[index]; | 1041 #if MIN_CACHE_BITS < 20 |
1042 UPDATE_CACHE(re, gb); | |
1043 #endif | |
1044 cb_idx = cb_vector_idx[code]; | |
1035 nnz = cb_idx >> 8 & 15; | 1045 nnz = cb_idx >> 8 & 15; |
1036 bits = get_bits(gb, nnz) << (32-nnz); | 1046 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz); |
1047 LAST_SKIP_BITS(re, gb, nnz); | |
1037 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx); | 1048 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx); |
1038 } while (len -= 4); | 1049 } while (len -= 4); |
1039 } | 1050 } |
1040 break; | 1051 break; |
1041 | 1052 |
1043 for (group = 0; group < g_len; group++, cfo+=128) { | 1054 for (group = 0; group < g_len; group++, cfo+=128) { |
1044 float *cf = cfo; | 1055 float *cf = cfo; |
1045 int len = off_len; | 1056 int len = off_len; |
1046 | 1057 |
1047 do { | 1058 do { |
1048 const int index = get_vlc2(gb, vlc_tab, 8, 2); | 1059 int code; |
1049 unsigned cb_idx; | 1060 unsigned cb_idx; |
1050 | 1061 |
1051 if (index >= cb_size) { | 1062 UPDATE_CACHE(re, gb); |
1052 err_idx = index; | 1063 GET_VLC(code, re, gb, vlc_tab, 8, 2); |
1064 | |
1065 if (code >= cb_size) { | |
1066 err_idx = code; | |
1053 goto err_cb_overflow; | 1067 goto err_cb_overflow; |
1054 } | 1068 } |
1055 | 1069 |
1056 cb_idx = cb_vector_idx[index]; | 1070 cb_idx = cb_vector_idx[code]; |
1057 cf = VMUL2(cf, vq, cb_idx, sf + idx); | 1071 cf = VMUL2(cf, vq, cb_idx, sf + idx); |
1058 } while (len -= 2); | 1072 } while (len -= 2); |
1059 } | 1073 } |
1060 break; | 1074 break; |
1061 | 1075 |
1064 for (group = 0; group < g_len; group++, cfo+=128) { | 1078 for (group = 0; group < g_len; group++, cfo+=128) { |
1065 float *cf = cfo; | 1079 float *cf = cfo; |
1066 int len = off_len; | 1080 int len = off_len; |
1067 | 1081 |
1068 do { | 1082 do { |
1069 const int index = get_vlc2(gb, vlc_tab, 8, 2); | 1083 int code; |
1070 unsigned nnz; | 1084 unsigned nnz; |
1071 unsigned cb_idx; | 1085 unsigned cb_idx; |
1072 unsigned sign; | 1086 unsigned sign; |
1073 | 1087 |
1074 if (index >= cb_size) { | 1088 UPDATE_CACHE(re, gb); |
1075 err_idx = index; | 1089 GET_VLC(code, re, gb, vlc_tab, 8, 2); |
1090 | |
1091 if (code >= cb_size) { | |
1092 err_idx = code; | |
1076 goto err_cb_overflow; | 1093 goto err_cb_overflow; |
1077 } | 1094 } |
1078 | 1095 |
1079 cb_idx = cb_vector_idx[index]; | 1096 cb_idx = cb_vector_idx[code]; |
1080 nnz = cb_idx >> 8 & 15; | 1097 nnz = cb_idx >> 8 & 15; |
1081 sign = get_bits(gb, nnz) << (cb_idx >> 12); | 1098 sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12); |
1099 LAST_SKIP_BITS(re, gb, nnz); | |
1082 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx); | 1100 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx); |
1083 } while (len -= 2); | 1101 } while (len -= 2); |
1084 } | 1102 } |
1085 break; | 1103 break; |
1086 | 1104 |
1089 float *cf = cfo; | 1107 float *cf = cfo; |
1090 uint32_t *icf = (uint32_t *) cf; | 1108 uint32_t *icf = (uint32_t *) cf; |
1091 int len = off_len; | 1109 int len = off_len; |
1092 | 1110 |
1093 do { | 1111 do { |
1094 const int index = get_vlc2(gb, vlc_tab, 8, 2); | 1112 int code; |
1095 unsigned nzt, nnz; | 1113 unsigned nzt, nnz; |
1096 unsigned cb_idx; | 1114 unsigned cb_idx; |
1097 uint32_t bits; | 1115 uint32_t bits; |
1098 int j; | 1116 int j; |
1099 | 1117 |
1100 if (!index) { | 1118 UPDATE_CACHE(re, gb); |
1119 GET_VLC(code, re, gb, vlc_tab, 8, 2); | |
1120 | |
1121 if (!code) { | |
1101 *icf++ = 0; | 1122 *icf++ = 0; |
1102 *icf++ = 0; | 1123 *icf++ = 0; |
1103 continue; | 1124 continue; |
1104 } | 1125 } |
1105 | 1126 |
1106 if (index >= cb_size) { | 1127 if (code >= cb_size) { |
1107 err_idx = index; | 1128 err_idx = code; |
1108 goto err_cb_overflow; | 1129 goto err_cb_overflow; |
1109 } | 1130 } |
1110 | 1131 |
1111 cb_idx = cb_vector_idx[index]; | 1132 cb_idx = cb_vector_idx[code]; |
1112 nnz = cb_idx >> 12; | 1133 nnz = cb_idx >> 12; |
1113 nzt = cb_idx >> 8; | 1134 nzt = cb_idx >> 8; |
1114 bits = get_bits(gb, nnz) << (32-nnz); | 1135 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz); |
1136 LAST_SKIP_BITS(re, gb, nnz); | |
1115 | 1137 |
1116 for (j = 0; j < 2; j++) { | 1138 for (j = 0; j < 2; j++) { |
1117 if (nzt & 1<<j) { | 1139 if (nzt & 1<<j) { |
1118 int n = 4; | 1140 uint32_t b; |
1141 int n; | |
1119 /* The total length of escape_sequence must be < 22 bits according | 1142 /* The total length of escape_sequence must be < 22 bits according |
1120 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */ | 1143 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */ |
1121 while (get_bits1(gb) && n < 13) n++; | 1144 UPDATE_CACHE(re, gb); |
1122 if (n == 13) { | 1145 b = GET_CACHE(re, gb); |
1146 b = 31 - av_log2(~b); | |
1147 | |
1148 if (b > 8) { | |
1123 av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n"); | 1149 av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n"); |
1124 return -1; | 1150 return -1; |
1125 } | 1151 } |
1126 n = (1 << n) + get_bits(gb, n); | 1152 |
1153 #if MIN_CACHE_BITS < 21 | |
1154 LAST_SKIP_BITS(re, gb, b + 1); | |
1155 UPDATE_CACHE(re, gb); | |
1156 #else | |
1157 SKIP_BITS(re, gb, b + 1); | |
1158 #endif | |
1159 b += 4; | |
1160 n = (1 << b) + SHOW_UBITS(re, gb, b); | |
1161 LAST_SKIP_BITS(re, gb, b); | |
1127 *icf++ = cbrt_tab[n] | (bits & 1<<31); | 1162 *icf++ = cbrt_tab[n] | (bits & 1<<31); |
1128 bits <<= 1; | 1163 bits <<= 1; |
1129 } else { | 1164 } else { |
1130 unsigned v = ((const uint32_t*)vq)[cb_idx & 15]; | 1165 unsigned v = ((const uint32_t*)vq)[cb_idx & 15]; |
1131 *icf++ = (bits & 1<<31) | v; | 1166 *icf++ = (bits & 1<<31) | v; |
1136 } while (len -= 2); | 1171 } while (len -= 2); |
1137 | 1172 |
1138 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); | 1173 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); |
1139 } | 1174 } |
1140 } | 1175 } |
1176 | |
1177 CLOSE_READER(re, gb); | |
1141 } | 1178 } |
1142 } | 1179 } |
1143 coef += g_len << 7; | 1180 coef += g_len << 7; |
1144 } | 1181 } |
1145 | 1182 |