comparison aac.c @ 7663:66fb1f3937fa libavcodec

Use ff_imdct_half() and vector_fmul_window() for IMDCT and windowing. Reduce buffer sizes accordingly. This produces a ~10% overall decoding perfomance improvement.
author superdump
date Fri, 22 Aug 2008 18:21:22 +0000
parents dea70eae1e49
children 5d93ceaa7a2c
comparison
equal deleted inserted replaced
7662:dea70eae1e49 7663:66fb1f3937fa
1164 static void imdct_and_windowing(AACContext * ac, SingleChannelElement * sce) { 1164 static void imdct_and_windowing(AACContext * ac, SingleChannelElement * sce) {
1165 IndividualChannelStream * ics = &sce->ics; 1165 IndividualChannelStream * ics = &sce->ics;
1166 float * in = sce->coeffs; 1166 float * in = sce->coeffs;
1167 float * out = sce->ret; 1167 float * out = sce->ret;
1168 float * saved = sce->saved; 1168 float * saved = sce->saved;
1169 const float * lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1170 const float * swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; 1169 const float * swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128;
1171 const float * lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; 1170 const float * lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024;
1172 const float * swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; 1171 const float * swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128;
1173 float * buf = ac->buf_mdct; 1172 float * buf = ac->buf_mdct;
1173 DECLARE_ALIGNED(16, float, temp[128]);
1174 int i; 1174 int i;
1175 1175
1176 // imdct 1176 // imdct
1177 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 1177 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1178 if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) 1178 if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE)
1179 av_log(ac->avccontext, AV_LOG_WARNING, 1179 av_log(ac->avccontext, AV_LOG_WARNING,
1180 "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. " 1180 "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. "
1181 "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n"); 1181 "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n");
1182 for (i = 0; i < 2048; i += 256) { 1182 for (i = 0; i < 1024; i += 128)
1183 ff_imdct_calc(&ac->mdct_small, buf + i, in + i/2); 1183 ff_imdct_half(&ac->mdct_small, buf + i, in + i);
1184 ac->dsp.vector_fmul_reverse(ac->revers + i/2, buf + i + 128, swindow, 128);
1185 }
1186 } else 1184 } else
1187 ff_imdct_calc(&ac->mdct, buf, in); 1185 ff_imdct_half(&ac->mdct, buf, in);
1188 1186
1189 /* window overlapping 1187 /* window overlapping
1190 * NOTE: To simplify the overlapping code, all 'meaningless' short to long 1188 * NOTE: To simplify the overlapping code, all 'meaningless' short to long
1191 * and long to short transitions are considered to be short to short 1189 * and long to short transitions are considered to be short to short
1192 * transitions. This leaves just two cases (long to long and short to short) 1190 * transitions. This leaves just two cases (long to long and short to short)
1193 * with a little special sauce for EIGHT_SHORT_SEQUENCE. 1191 * with a little special sauce for EIGHT_SHORT_SEQUENCE.
1194 */ 1192 */
1195 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && 1193 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) &&
1196 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { 1194 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) {
1197 ac->dsp.vector_fmul_add_add(out, buf, lwindow_prev, saved, ac->add_bias, 1024, 1); 1195 ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, ac->add_bias, 512);
1198 } else { 1196 } else {
1199 for (i = 0; i < 448; i++) 1197 for (i = 0; i < 448; i++)
1200 out[i] = saved[i] + ac->add_bias; 1198 out[i] = saved[i] + ac->add_bias;
1201 1199
1202 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 1200 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1203 ac->dsp.vector_fmul_add_add(out + 448 + 0*128, buf + 0*128, swindow_prev, saved + 448 , ac->add_bias, 128, 1); 1201 ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, ac->add_bias, 64);
1204 ac->dsp.vector_fmul_add_add(out + 448 + 1*128, buf + 2*128, swindow, ac->revers + 0*128, ac->add_bias, 128, 1); 1202 ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, ac->add_bias, 64);
1205 ac->dsp.vector_fmul_add_add(out + 448 + 2*128, buf + 4*128, swindow, ac->revers + 1*128, ac->add_bias, 128, 1); 1203 ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, ac->add_bias, 64);
1206 ac->dsp.vector_fmul_add_add(out + 448 + 3*128, buf + 6*128, swindow, ac->revers + 2*128, ac->add_bias, 128, 1); 1204 ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, ac->add_bias, 64);
1207 ac->dsp.vector_fmul_add_add(out + 448 + 4*128, buf + 8*128, swindow, ac->revers + 3*128, ac->add_bias, 64, 1); 1205 ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, ac->add_bias, 64);
1206 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float));
1208 } else { 1207 } else {
1209 ac->dsp.vector_fmul_add_add(out + 448, buf + 448, swindow_prev, saved + 448, ac->add_bias, 128, 1); 1208 ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, ac->add_bias, 64);
1210 for (i = 576; i < 1024; i++) 1209 for (i = 576; i < 1024; i++)
1211 out[i] = buf[i] + saved[i] + ac->add_bias; 1210 out[i] = buf[i-512] + ac->add_bias;
1212 } 1211 }
1213 } 1212 }
1214 1213
1215 // buffer update 1214 // buffer update
1216 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { 1215 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) {
1217 ac->dsp.vector_fmul_add_add(saved, buf + 1024 + 64, swindow + 64, ac->revers + 3*128+64, 0, 64, 1); 1216 for (i = 0; i < 64; i++)
1218 ac->dsp.vector_fmul_add_add(saved + 64, buf + 1024 + 2*128, swindow, ac->revers + 4*128, 0, 128, 1); 1217 saved[i] = temp[64 + i] - ac->add_bias;
1219 ac->dsp.vector_fmul_add_add(saved + 192, buf + 1024 + 4*128, swindow, ac->revers + 5*128, 0, 128, 1); 1218 ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64);
1220 ac->dsp.vector_fmul_add_add(saved + 320, buf + 1024 + 6*128, swindow, ac->revers + 6*128, 0, 128, 1); 1219 ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64);
1221 memcpy( saved + 448, ac->revers + 7*128, 128 * sizeof(float)); 1220 ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64);
1222 memset( saved + 576, 0, 448 * sizeof(float)); 1221 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1223 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { 1222 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) {
1224 memcpy(saved, buf + 1024, 448 * sizeof(float)); 1223 memcpy( saved, buf + 512, 448 * sizeof(float));
1225 ac->dsp.vector_fmul_reverse(saved + 448, buf + 1024 + 448, swindow, 128); 1224 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float));
1226 memset(saved + 576, 0, 448 * sizeof(float));
1227 } else { // LONG_STOP or ONLY_LONG 1225 } else { // LONG_STOP or ONLY_LONG
1228 ac->dsp.vector_fmul_reverse(saved, buf + 1024, lwindow, 1024); 1226 memcpy( saved, buf + 512, 512 * sizeof(float));
1229 } 1227 }
1230 } 1228 }
1231 1229
1232 /** 1230 /**
1233 * Apply dependent channel coupling (applied before IMDCT). 1231 * Apply dependent channel coupling (applied before IMDCT).