comparison aac.c @ 10886:966a8afdd9aa libavcodec

AAC: optimise bitstream reading in decode_spectrum_and_dequant() Using the low-level macros directly avoids redundant open/update/close cycles. 2-3% faster on ARM, PPC, and Core i7.
author mru
date Fri, 15 Jan 2010 14:02:52 +0000
parents 798c62217427
children 134644e36859
comparison
equal deleted inserted replaced
10885:e3e466b6bc5e 10886:966a8afdd9aa
991 } else { 991 } else {
992 const float *vq = ff_aac_codebook_vector_vals[cbt_m1]; 992 const float *vq = ff_aac_codebook_vector_vals[cbt_m1];
993 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1]; 993 const uint16_t *cb_vector_idx = ff_aac_codebook_vector_idx[cbt_m1];
994 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table; 994 VLC_TYPE (*vlc_tab)[2] = vlc_spectral[cbt_m1].table;
995 const int cb_size = ff_aac_spectral_sizes[cbt_m1]; 995 const int cb_size = ff_aac_spectral_sizes[cbt_m1];
996 OPEN_READER(re, gb);
996 997
997 switch (cbt_m1 >> 1) { 998 switch (cbt_m1 >> 1) {
998 case 0: 999 case 0:
999 for (group = 0; group < g_len; group++, cfo+=128) { 1000 for (group = 0; group < g_len; group++, cfo+=128) {
1000 float *cf = cfo; 1001 float *cf = cfo;
1001 int len = off_len; 1002 int len = off_len;
1002 1003
1003 do { 1004 do {
1004 const int index = get_vlc2(gb, vlc_tab, 8, 2); 1005 int code;
1005 unsigned cb_idx; 1006 unsigned cb_idx;
1006 1007
1007 if (index >= cb_size) { 1008 UPDATE_CACHE(re, gb);
1008 err_idx = index; 1009 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1010
1011 if (code >= cb_size) {
1012 err_idx = code;
1009 goto err_cb_overflow; 1013 goto err_cb_overflow;
1010 } 1014 }
1011 1015
1012 cb_idx = cb_vector_idx[index]; 1016 cb_idx = cb_vector_idx[code];
1013 cf = VMUL4(cf, vq, cb_idx, sf + idx); 1017 cf = VMUL4(cf, vq, cb_idx, sf + idx);
1014 } while (len -= 4); 1018 } while (len -= 4);
1015 } 1019 }
1016 break; 1020 break;
1017 1021
1019 for (group = 0; group < g_len; group++, cfo+=128) { 1023 for (group = 0; group < g_len; group++, cfo+=128) {
1020 float *cf = cfo; 1024 float *cf = cfo;
1021 int len = off_len; 1025 int len = off_len;
1022 1026
1023 do { 1027 do {
1024 const int index = get_vlc2(gb, vlc_tab, 8, 2); 1028 int code;
1025 unsigned nnz; 1029 unsigned nnz;
1026 unsigned cb_idx; 1030 unsigned cb_idx;
1027 uint32_t bits; 1031 uint32_t bits;
1028 1032
1029 if (index >= cb_size) { 1033 UPDATE_CACHE(re, gb);
1030 err_idx = index; 1034 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1035
1036 if (code >= cb_size) {
1037 err_idx = code;
1031 goto err_cb_overflow; 1038 goto err_cb_overflow;
1032 } 1039 }
1033 1040
1034 cb_idx = cb_vector_idx[index]; 1041 #if MIN_CACHE_BITS < 20
1042 UPDATE_CACHE(re, gb);
1043 #endif
1044 cb_idx = cb_vector_idx[code];
1035 nnz = cb_idx >> 8 & 15; 1045 nnz = cb_idx >> 8 & 15;
1036 bits = get_bits(gb, nnz) << (32-nnz); 1046 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1047 LAST_SKIP_BITS(re, gb, nnz);
1037 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx); 1048 cf = VMUL4S(cf, vq, cb_idx, bits, sf + idx);
1038 } while (len -= 4); 1049 } while (len -= 4);
1039 } 1050 }
1040 break; 1051 break;
1041 1052
1043 for (group = 0; group < g_len; group++, cfo+=128) { 1054 for (group = 0; group < g_len; group++, cfo+=128) {
1044 float *cf = cfo; 1055 float *cf = cfo;
1045 int len = off_len; 1056 int len = off_len;
1046 1057
1047 do { 1058 do {
1048 const int index = get_vlc2(gb, vlc_tab, 8, 2); 1059 int code;
1049 unsigned cb_idx; 1060 unsigned cb_idx;
1050 1061
1051 if (index >= cb_size) { 1062 UPDATE_CACHE(re, gb);
1052 err_idx = index; 1063 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1064
1065 if (code >= cb_size) {
1066 err_idx = code;
1053 goto err_cb_overflow; 1067 goto err_cb_overflow;
1054 } 1068 }
1055 1069
1056 cb_idx = cb_vector_idx[index]; 1070 cb_idx = cb_vector_idx[code];
1057 cf = VMUL2(cf, vq, cb_idx, sf + idx); 1071 cf = VMUL2(cf, vq, cb_idx, sf + idx);
1058 } while (len -= 2); 1072 } while (len -= 2);
1059 } 1073 }
1060 break; 1074 break;
1061 1075
1064 for (group = 0; group < g_len; group++, cfo+=128) { 1078 for (group = 0; group < g_len; group++, cfo+=128) {
1065 float *cf = cfo; 1079 float *cf = cfo;
1066 int len = off_len; 1080 int len = off_len;
1067 1081
1068 do { 1082 do {
1069 const int index = get_vlc2(gb, vlc_tab, 8, 2); 1083 int code;
1070 unsigned nnz; 1084 unsigned nnz;
1071 unsigned cb_idx; 1085 unsigned cb_idx;
1072 unsigned sign; 1086 unsigned sign;
1073 1087
1074 if (index >= cb_size) { 1088 UPDATE_CACHE(re, gb);
1075 err_idx = index; 1089 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1090
1091 if (code >= cb_size) {
1092 err_idx = code;
1076 goto err_cb_overflow; 1093 goto err_cb_overflow;
1077 } 1094 }
1078 1095
1079 cb_idx = cb_vector_idx[index]; 1096 cb_idx = cb_vector_idx[code];
1080 nnz = cb_idx >> 8 & 15; 1097 nnz = cb_idx >> 8 & 15;
1081 sign = get_bits(gb, nnz) << (cb_idx >> 12); 1098 sign = SHOW_UBITS(re, gb, nnz) << (cb_idx >> 12);
1099 LAST_SKIP_BITS(re, gb, nnz);
1082 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx); 1100 cf = VMUL2S(cf, vq, cb_idx, sign, sf + idx);
1083 } while (len -= 2); 1101 } while (len -= 2);
1084 } 1102 }
1085 break; 1103 break;
1086 1104
1089 float *cf = cfo; 1107 float *cf = cfo;
1090 uint32_t *icf = (uint32_t *) cf; 1108 uint32_t *icf = (uint32_t *) cf;
1091 int len = off_len; 1109 int len = off_len;
1092 1110
1093 do { 1111 do {
1094 const int index = get_vlc2(gb, vlc_tab, 8, 2); 1112 int code;
1095 unsigned nzt, nnz; 1113 unsigned nzt, nnz;
1096 unsigned cb_idx; 1114 unsigned cb_idx;
1097 uint32_t bits; 1115 uint32_t bits;
1098 int j; 1116 int j;
1099 1117
1100 if (!index) { 1118 UPDATE_CACHE(re, gb);
1119 GET_VLC(code, re, gb, vlc_tab, 8, 2);
1120
1121 if (!code) {
1101 *icf++ = 0; 1122 *icf++ = 0;
1102 *icf++ = 0; 1123 *icf++ = 0;
1103 continue; 1124 continue;
1104 } 1125 }
1105 1126
1106 if (index >= cb_size) { 1127 if (code >= cb_size) {
1107 err_idx = index; 1128 err_idx = code;
1108 goto err_cb_overflow; 1129 goto err_cb_overflow;
1109 } 1130 }
1110 1131
1111 cb_idx = cb_vector_idx[index]; 1132 cb_idx = cb_vector_idx[code];
1112 nnz = cb_idx >> 12; 1133 nnz = cb_idx >> 12;
1113 nzt = cb_idx >> 8; 1134 nzt = cb_idx >> 8;
1114 bits = get_bits(gb, nnz) << (32-nnz); 1135 bits = SHOW_UBITS(re, gb, nnz) << (32-nnz);
1136 LAST_SKIP_BITS(re, gb, nnz);
1115 1137
1116 for (j = 0; j < 2; j++) { 1138 for (j = 0; j < 2; j++) {
1117 if (nzt & 1<<j) { 1139 if (nzt & 1<<j) {
1118 int n = 4; 1140 uint32_t b;
1141 int n;
1119 /* The total length of escape_sequence must be < 22 bits according 1142 /* The total length of escape_sequence must be < 22 bits according
1120 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */ 1143 to the specification (i.e. max is 111111110xxxxxxxxxxxx). */
1121 while (get_bits1(gb) && n < 13) n++; 1144 UPDATE_CACHE(re, gb);
1122 if (n == 13) { 1145 b = GET_CACHE(re, gb);
1146 b = 31 - av_log2(~b);
1147
1148 if (b > 8) {
1123 av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n"); 1149 av_log(ac->avccontext, AV_LOG_ERROR, "error in spectral data, ESC overflow\n");
1124 return -1; 1150 return -1;
1125 } 1151 }
1126 n = (1 << n) + get_bits(gb, n); 1152
1153 #if MIN_CACHE_BITS < 21
1154 LAST_SKIP_BITS(re, gb, b + 1);
1155 UPDATE_CACHE(re, gb);
1156 #else
1157 SKIP_BITS(re, gb, b + 1);
1158 #endif
1159 b += 4;
1160 n = (1 << b) + SHOW_UBITS(re, gb, b);
1161 LAST_SKIP_BITS(re, gb, b);
1127 *icf++ = cbrt_tab[n] | (bits & 1<<31); 1162 *icf++ = cbrt_tab[n] | (bits & 1<<31);
1128 bits <<= 1; 1163 bits <<= 1;
1129 } else { 1164 } else {
1130 unsigned v = ((const uint32_t*)vq)[cb_idx & 15]; 1165 unsigned v = ((const uint32_t*)vq)[cb_idx & 15];
1131 *icf++ = (bits & 1<<31) | v; 1166 *icf++ = (bits & 1<<31) | v;
1136 } while (len -= 2); 1171 } while (len -= 2);
1137 1172
1138 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len); 1173 ac->dsp.vector_fmul_scalar(cfo, cfo, sf[idx], off_len);
1139 } 1174 }
1140 } 1175 }
1176
1177 CLOSE_READER(re, gb);
1141 } 1178 }
1142 } 1179 }
1143 coef += g_len << 7; 1180 coef += g_len << 7;
1144 } 1181 }
1145 1182