comparison wmadec.c @ 3592:6a358dccf2ab libavcodec

SIMD vector optimizations. 3% faster overall decoding.
author banan
date Sat, 19 Aug 2006 08:46:52 +0000
parents 5ea82888103e
children e28285ddde8d
comparison
equal deleted inserted replaced
3591:088be7d7c4fd 3592:6a358dccf2ab
128 /* lsp_to_curve tables */ 128 /* lsp_to_curve tables */
129 float lsp_cos_table[BLOCK_MAX_SIZE]; 129 float lsp_cos_table[BLOCK_MAX_SIZE];
130 float lsp_pow_e_table[256]; 130 float lsp_pow_e_table[256];
131 float lsp_pow_m_table1[(1 << LSP_POW_BITS)]; 131 float lsp_pow_m_table1[(1 << LSP_POW_BITS)];
132 float lsp_pow_m_table2[(1 << LSP_POW_BITS)]; 132 float lsp_pow_m_table2[(1 << LSP_POW_BITS)];
133 DSPContext dsp;
133 134
134 #ifdef TRACE 135 #ifdef TRACE
135 int frame_count; 136 int frame_count;
136 #endif 137 #endif
137 } WMADecodeContext; 138 } WMADecodeContext;
225 226
226 s->sample_rate = avctx->sample_rate; 227 s->sample_rate = avctx->sample_rate;
227 s->nb_channels = avctx->channels; 228 s->nb_channels = avctx->channels;
228 s->bit_rate = avctx->bit_rate; 229 s->bit_rate = avctx->bit_rate;
229 s->block_align = avctx->block_align; 230 s->block_align = avctx->block_align;
231
232 dsputil_init(&s->dsp, avctx);
230 233
231 if (avctx->codec->id == CODEC_ID_WMAV1) { 234 if (avctx->codec->id == CODEC_ID_WMAV1) {
232 s->version = 1; 235 s->version = 1;
233 } else { 236 } else {
234 s->version = 2; 237 s->version = 2;
1107 1110
1108 for(ch = 0; ch < s->nb_channels; ch++) { 1111 for(ch = 0; ch < s->nb_channels; ch++) {
1109 if (s->channel_coded[ch]) { 1112 if (s->channel_coded[ch]) {
1110 DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); 1113 DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
1111 float *ptr; 1114 float *ptr;
1112 int i, n4, index, n; 1115 int n4, index, n;
1113 1116
1114 n = s->block_len; 1117 n = s->block_len;
1115 n4 = s->block_len / 2; 1118 n4 = s->block_len / 2;
1116 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], 1119 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize],
1117 output, s->coefs[ch], s->mdct_tmp); 1120 output, s->coefs[ch], s->mdct_tmp);
1118 1121
1119 /* XXX: optimize all that by build the window and 1122 /* XXX: optimize all that by build the window and
1120 multipying/adding at the same time */ 1123 multipying/adding at the same time */
1121 /* multiply by the window */ 1124
1122 for(i=0;i<n * 2;i++) { 1125 /* multiply by the window and add in the frame */
1123 output[i] *= window[i];
1124 }
1125
1126 /* add in the frame */
1127 index = (s->frame_len / 2) + s->block_pos - n4; 1126 index = (s->frame_len / 2) + s->block_pos - n4;
1128 ptr = &s->frame_out[ch][index]; 1127 ptr = &s->frame_out[ch][index];
1129 for(i=0;i<n * 2;i++) { 1128 s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
1130 *ptr += output[i];
1131 ptr++;
1132 }
1133 1129
1134 /* specific fast case for ms-stereo : add to second 1130 /* specific fast case for ms-stereo : add to second
1135 channel if it is not coded */ 1131 channel if it is not coded */
1136 if (s->ms_stereo && !s->channel_coded[1]) { 1132 if (s->ms_stereo && !s->channel_coded[1]) {
1137 ptr = &s->frame_out[1][index]; 1133 ptr = &s->frame_out[1][index];
1138 for(i=0;i<n * 2;i++) { 1134 s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
1139 *ptr += output[i];
1140 ptr++;
1141 }
1142 } 1135 }
1143 } 1136 }
1144 } 1137 }
1145 next: 1138 next:
1146 /* update block number */ 1139 /* update block number */