Mercurial > libavcodec.hg
comparison aac.c @ 7663:66fb1f3937fa libavcodec
Use ff_imdct_half() and vector_fmul_window() for IMDCT and windowing. Reduce
buffer sizes accordingly. This produces a ~10% overall decoding perfomance
improvement.
author | superdump |
---|---|
date | Fri, 22 Aug 2008 18:21:22 +0000 |
parents | dea70eae1e49 |
children | 5d93ceaa7a2c |
comparison
equal
deleted
inserted
replaced
7662:dea70eae1e49 | 7663:66fb1f3937fa |
---|---|
1164 static void imdct_and_windowing(AACContext * ac, SingleChannelElement * sce) { | 1164 static void imdct_and_windowing(AACContext * ac, SingleChannelElement * sce) { |
1165 IndividualChannelStream * ics = &sce->ics; | 1165 IndividualChannelStream * ics = &sce->ics; |
1166 float * in = sce->coeffs; | 1166 float * in = sce->coeffs; |
1167 float * out = sce->ret; | 1167 float * out = sce->ret; |
1168 float * saved = sce->saved; | 1168 float * saved = sce->saved; |
1169 const float * lwindow = ics->use_kb_window[0] ? ff_aac_kbd_long_1024 : ff_sine_1024; | |
1170 const float * swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; | 1169 const float * swindow = ics->use_kb_window[0] ? ff_aac_kbd_short_128 : ff_sine_128; |
1171 const float * lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; | 1170 const float * lwindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_long_1024 : ff_sine_1024; |
1172 const float * swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; | 1171 const float * swindow_prev = ics->use_kb_window[1] ? ff_aac_kbd_short_128 : ff_sine_128; |
1173 float * buf = ac->buf_mdct; | 1172 float * buf = ac->buf_mdct; |
1173 DECLARE_ALIGNED(16, float, temp[128]); | |
1174 int i; | 1174 int i; |
1175 | 1175 |
1176 // imdct | 1176 // imdct |
1177 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { | 1177 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { |
1178 if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) | 1178 if (ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) |
1179 av_log(ac->avccontext, AV_LOG_WARNING, | 1179 av_log(ac->avccontext, AV_LOG_WARNING, |
1180 "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. " | 1180 "Transition from an ONLY_LONG or LONG_STOP to an EIGHT_SHORT sequence detected. " |
1181 "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n"); | 1181 "If you heard an audible artifact, please submit the sample to the FFmpeg developers.\n"); |
1182 for (i = 0; i < 2048; i += 256) { | 1182 for (i = 0; i < 1024; i += 128) |
1183 ff_imdct_calc(&ac->mdct_small, buf + i, in + i/2); | 1183 ff_imdct_half(&ac->mdct_small, buf + i, in + i); |
1184 ac->dsp.vector_fmul_reverse(ac->revers + i/2, buf + i + 128, swindow, 128); | |
1185 } | |
1186 } else | 1184 } else |
1187 ff_imdct_calc(&ac->mdct, buf, in); | 1185 ff_imdct_half(&ac->mdct, buf, in); |
1188 | 1186 |
1189 /* window overlapping | 1187 /* window overlapping |
1190 * NOTE: To simplify the overlapping code, all 'meaningless' short to long | 1188 * NOTE: To simplify the overlapping code, all 'meaningless' short to long |
1191 * and long to short transitions are considered to be short to short | 1189 * and long to short transitions are considered to be short to short |
1192 * transitions. This leaves just two cases (long to long and short to short) | 1190 * transitions. This leaves just two cases (long to long and short to short) |
1193 * with a little special sauce for EIGHT_SHORT_SEQUENCE. | 1191 * with a little special sauce for EIGHT_SHORT_SEQUENCE. |
1194 */ | 1192 */ |
1195 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && | 1193 if ((ics->window_sequence[1] == ONLY_LONG_SEQUENCE || ics->window_sequence[1] == LONG_STOP_SEQUENCE) && |
1196 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { | 1194 (ics->window_sequence[0] == ONLY_LONG_SEQUENCE || ics->window_sequence[0] == LONG_START_SEQUENCE)) { |
1197 ac->dsp.vector_fmul_add_add(out, buf, lwindow_prev, saved, ac->add_bias, 1024, 1); | 1195 ac->dsp.vector_fmul_window( out, saved, buf, lwindow_prev, ac->add_bias, 512); |
1198 } else { | 1196 } else { |
1199 for (i = 0; i < 448; i++) | 1197 for (i = 0; i < 448; i++) |
1200 out[i] = saved[i] + ac->add_bias; | 1198 out[i] = saved[i] + ac->add_bias; |
1201 | 1199 |
1202 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { | 1200 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { |
1203 ac->dsp.vector_fmul_add_add(out + 448 + 0*128, buf + 0*128, swindow_prev, saved + 448 , ac->add_bias, 128, 1); | 1201 ac->dsp.vector_fmul_window(out + 448 + 0*128, saved + 448, buf + 0*128, swindow_prev, ac->add_bias, 64); |
1204 ac->dsp.vector_fmul_add_add(out + 448 + 1*128, buf + 2*128, swindow, ac->revers + 0*128, ac->add_bias, 128, 1); | 1202 ac->dsp.vector_fmul_window(out + 448 + 1*128, buf + 0*128 + 64, buf + 1*128, swindow, ac->add_bias, 64); |
1205 ac->dsp.vector_fmul_add_add(out + 448 + 2*128, buf + 4*128, swindow, ac->revers + 1*128, ac->add_bias, 128, 1); | 1203 ac->dsp.vector_fmul_window(out + 448 + 2*128, buf + 1*128 + 64, buf + 2*128, swindow, ac->add_bias, 64); |
1206 ac->dsp.vector_fmul_add_add(out + 448 + 3*128, buf + 6*128, swindow, ac->revers + 2*128, ac->add_bias, 128, 1); | 1204 ac->dsp.vector_fmul_window(out + 448 + 3*128, buf + 2*128 + 64, buf + 3*128, swindow, ac->add_bias, 64); |
1207 ac->dsp.vector_fmul_add_add(out + 448 + 4*128, buf + 8*128, swindow, ac->revers + 3*128, ac->add_bias, 64, 1); | 1205 ac->dsp.vector_fmul_window(temp, buf + 3*128 + 64, buf + 4*128, swindow, ac->add_bias, 64); |
1206 memcpy( out + 448 + 4*128, temp, 64 * sizeof(float)); | |
1208 } else { | 1207 } else { |
1209 ac->dsp.vector_fmul_add_add(out + 448, buf + 448, swindow_prev, saved + 448, ac->add_bias, 128, 1); | 1208 ac->dsp.vector_fmul_window(out + 448, saved + 448, buf, swindow_prev, ac->add_bias, 64); |
1210 for (i = 576; i < 1024; i++) | 1209 for (i = 576; i < 1024; i++) |
1211 out[i] = buf[i] + saved[i] + ac->add_bias; | 1210 out[i] = buf[i-512] + ac->add_bias; |
1212 } | 1211 } |
1213 } | 1212 } |
1214 | 1213 |
1215 // buffer update | 1214 // buffer update |
1216 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { | 1215 if (ics->window_sequence[0] == EIGHT_SHORT_SEQUENCE) { |
1217 ac->dsp.vector_fmul_add_add(saved, buf + 1024 + 64, swindow + 64, ac->revers + 3*128+64, 0, 64, 1); | 1216 for (i = 0; i < 64; i++) |
1218 ac->dsp.vector_fmul_add_add(saved + 64, buf + 1024 + 2*128, swindow, ac->revers + 4*128, 0, 128, 1); | 1217 saved[i] = temp[64 + i] - ac->add_bias; |
1219 ac->dsp.vector_fmul_add_add(saved + 192, buf + 1024 + 4*128, swindow, ac->revers + 5*128, 0, 128, 1); | 1218 ac->dsp.vector_fmul_window(saved + 64, buf + 4*128 + 64, buf + 5*128, swindow, 0, 64); |
1220 ac->dsp.vector_fmul_add_add(saved + 320, buf + 1024 + 6*128, swindow, ac->revers + 6*128, 0, 128, 1); | 1219 ac->dsp.vector_fmul_window(saved + 192, buf + 5*128 + 64, buf + 6*128, swindow, 0, 64); |
1221 memcpy( saved + 448, ac->revers + 7*128, 128 * sizeof(float)); | 1220 ac->dsp.vector_fmul_window(saved + 320, buf + 6*128 + 64, buf + 7*128, swindow, 0, 64); |
1222 memset( saved + 576, 0, 448 * sizeof(float)); | 1221 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); |
1223 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { | 1222 } else if (ics->window_sequence[0] == LONG_START_SEQUENCE) { |
1224 memcpy(saved, buf + 1024, 448 * sizeof(float)); | 1223 memcpy( saved, buf + 512, 448 * sizeof(float)); |
1225 ac->dsp.vector_fmul_reverse(saved + 448, buf + 1024 + 448, swindow, 128); | 1224 memcpy( saved + 448, buf + 7*128 + 64, 64 * sizeof(float)); |
1226 memset(saved + 576, 0, 448 * sizeof(float)); | |
1227 } else { // LONG_STOP or ONLY_LONG | 1225 } else { // LONG_STOP or ONLY_LONG |
1228 ac->dsp.vector_fmul_reverse(saved, buf + 1024, lwindow, 1024); | 1226 memcpy( saved, buf + 512, 512 * sizeof(float)); |
1229 } | 1227 } |
1230 } | 1228 } |
1231 | 1229 |
1232 /** | 1230 /** |
1233 * Apply dependent channel coupling (applied before IMDCT). | 1231 * Apply dependent channel coupling (applied before IMDCT). |