Mercurial > libavcodec.hg
comparison wmadec.c @ 4737:99d9dd34903b libavcodec
Optimize by building the mdct window and multipying/adding at the same time.
Patch by Ian Braithwaite ian .. braithwaite . dk
[Ffmpeg-devel] WMA decoder speedup 2007-03-22 22:56
author | banan |
---|---|
date | Mon, 26 Mar 2007 10:03:57 +0000 |
parents | 7b9ce6f729ae |
children | 4ae9ab738aec |
comparison
equal
deleted
inserted
replaced
4736:59649ebd5ed8 | 4737:99d9dd34903b |
---|---|
313 } while (--n); | 313 } while (--n); |
314 } | 314 } |
315 s->max_exponent[ch] = max_scale; | 315 s->max_exponent[ch] = max_scale; |
316 return 0; | 316 return 0; |
317 } | 317 } |
318 | |
319 | |
320 /** | |
321 * Apply MDCT window and add into output. | |
322 * | |
323 * We ensure that when the windows overlap their squared sum | |
324 * is always 1 (MDCT reconstruction rule). | |
325 */ | |
326 static void wma_window(WMACodecContext *s, float *out) | |
327 { | |
328 float *in = s->output; | |
329 int block_len, bsize, n; | |
330 | |
331 /* left part */ | |
332 if (s->block_len_bits <= s->prev_block_len_bits) { | |
333 block_len = s->block_len; | |
334 bsize = s->frame_len_bits - s->block_len_bits; | |
335 | |
336 s->dsp.vector_fmul_add_add(out, in, s->windows[bsize], | |
337 out, 0, block_len, 1); | |
338 | |
339 } else { | |
340 block_len = 1 << s->prev_block_len_bits; | |
341 n = (s->block_len - block_len) / 2; | |
342 bsize = s->frame_len_bits - s->prev_block_len_bits; | |
343 | |
344 s->dsp.vector_fmul_add_add(out+n, in+n, s->windows[bsize], | |
345 out+n, 0, block_len, 1); | |
346 | |
347 memcpy(out+n+block_len, in+n+block_len, n*sizeof(float)); | |
348 } | |
349 | |
350 out += s->block_len; | |
351 in += s->block_len; | |
352 | |
353 /* right part */ | |
354 if (s->block_len_bits <= s->next_block_len_bits) { | |
355 block_len = s->block_len; | |
356 bsize = s->frame_len_bits - s->block_len_bits; | |
357 | |
358 s->dsp.vector_fmul_reverse(out, in, s->windows[bsize], block_len); | |
359 | |
360 } else { | |
361 block_len = 1 << s->next_block_len_bits; | |
362 n = (s->block_len - block_len) / 2; | |
363 bsize = s->frame_len_bits - s->next_block_len_bits; | |
364 | |
365 memcpy(out, in, n*sizeof(float)); | |
366 | |
367 s->dsp.vector_fmul_reverse(out+n, in+n, s->windows[bsize], block_len); | |
368 | |
369 memset(out+n+block_len, 0, n*sizeof(float)); | |
370 } | |
371 } | |
372 | |
318 | 373 |
319 /** | 374 /** |
320 * @return 0 if OK. 1 if last block of frame. return -1 if | 375 * @return 0 if OK. 1 if last block of frame. return -1 if |
321 * unrecorrable error. | 376 * unrecorrable error. |
322 */ | 377 */ |
655 s->coefs[0][i] = a + b; | 710 s->coefs[0][i] = a + b; |
656 s->coefs[1][i] = a - b; | 711 s->coefs[1][i] = a - b; |
657 } | 712 } |
658 } | 713 } |
659 | 714 |
660 /* build the window : we ensure that when the windows overlap | |
661 their squared sum is always 1 (MDCT reconstruction rule) */ | |
662 /* XXX: merge with output */ | |
663 { | |
664 int i, next_block_len, block_len, prev_block_len, n; | |
665 float *wptr; | |
666 | |
667 block_len = s->block_len; | |
668 prev_block_len = 1 << s->prev_block_len_bits; | |
669 next_block_len = 1 << s->next_block_len_bits; | |
670 | |
671 /* right part */ | |
672 wptr = s->window + block_len; | |
673 if (block_len <= next_block_len) { | |
674 for(i=0;i<block_len;i++) | |
675 *wptr++ = s->windows[bsize][i]; | |
676 } else { | |
677 /* overlap */ | |
678 n = (block_len / 2) - (next_block_len / 2); | |
679 for(i=0;i<n;i++) | |
680 *wptr++ = 1.0; | |
681 for(i=0;i<next_block_len;i++) | |
682 *wptr++ = s->windows[s->frame_len_bits - s->next_block_len_bits][i]; | |
683 for(i=0;i<n;i++) | |
684 *wptr++ = 0.0; | |
685 } | |
686 | |
687 /* left part */ | |
688 wptr = s->window + block_len; | |
689 if (block_len <= prev_block_len) { | |
690 for(i=0;i<block_len;i++) | |
691 *--wptr = s->windows[bsize][i]; | |
692 } else { | |
693 /* overlap */ | |
694 n = (block_len / 2) - (prev_block_len / 2); | |
695 for(i=0;i<n;i++) | |
696 *--wptr = 1.0; | |
697 for(i=0;i<prev_block_len;i++) | |
698 *--wptr = s->windows[s->frame_len_bits - s->prev_block_len_bits][i]; | |
699 for(i=0;i<n;i++) | |
700 *--wptr = 0.0; | |
701 } | |
702 } | |
703 | |
704 | |
705 for(ch = 0; ch < s->nb_channels; ch++) { | 715 for(ch = 0; ch < s->nb_channels; ch++) { |
706 if (s->channel_coded[ch]) { | 716 if (s->channel_coded[ch]) { |
707 float *ptr; | |
708 int n4, index, n; | 717 int n4, index, n; |
709 | 718 |
710 n = s->block_len; | 719 n = s->block_len; |
711 n4 = s->block_len / 2; | 720 n4 = s->block_len / 2; |
712 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], | 721 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], |
713 s->output, s->coefs[ch], s->mdct_tmp); | 722 s->output, s->coefs[ch], s->mdct_tmp); |
714 | 723 |
715 /* XXX: optimize all that by build the window and | |
716 multipying/adding at the same time */ | |
717 | |
718 /* multiply by the window and add in the frame */ | 724 /* multiply by the window and add in the frame */ |
719 index = (s->frame_len / 2) + s->block_pos - n4; | 725 index = (s->frame_len / 2) + s->block_pos - n4; |
720 ptr = &s->frame_out[ch][index]; | 726 wma_window(s, &s->frame_out[ch][index]); |
721 s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1); | |
722 | 727 |
723 /* specific fast case for ms-stereo : add to second | 728 /* specific fast case for ms-stereo : add to second |
724 channel if it is not coded */ | 729 channel if it is not coded */ |
725 if (s->ms_stereo && !s->channel_coded[1]) { | 730 if (s->ms_stereo && !s->channel_coded[1]) { |
726 ptr = &s->frame_out[1][index]; | 731 wma_window(s, &s->frame_out[1][index]); |
727 s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1); | |
728 } | 732 } |
729 } | 733 } |
730 } | 734 } |
731 next: | 735 next: |
732 /* update block number */ | 736 /* update block number */ |
777 ptr += incr; | 781 ptr += incr; |
778 } | 782 } |
779 /* prepare for next block */ | 783 /* prepare for next block */ |
780 memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len], | 784 memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len], |
781 s->frame_len * sizeof(float)); | 785 s->frame_len * sizeof(float)); |
782 /* XXX: suppress this */ | |
783 memset(&s->frame_out[ch][s->frame_len], 0, | |
784 s->frame_len * sizeof(float)); | |
785 } | 786 } |
786 | 787 |
787 #ifdef TRACE | 788 #ifdef TRACE |
788 dump_shorts(s, "samples", samples, n * s->nb_channels); | 789 dump_shorts(s, "samples", samples, n * s->nb_channels); |
789 #endif | 790 #endif |