comparison wmadec.c @ 4737:99d9dd34903b libavcodec

Optimize by building the mdct window and multipying/adding at the same time. Patch by Ian Braithwaite ian .. braithwaite . dk [Ffmpeg-devel] WMA decoder speedup 2007-03-22 22:56
author banan
date Mon, 26 Mar 2007 10:03:57 +0000
parents 7b9ce6f729ae
children 4ae9ab738aec
comparison
equal deleted inserted replaced
4736:59649ebd5ed8 4737:99d9dd34903b
313 } while (--n); 313 } while (--n);
314 } 314 }
315 s->max_exponent[ch] = max_scale; 315 s->max_exponent[ch] = max_scale;
316 return 0; 316 return 0;
317 } 317 }
318
319
320 /**
321 * Apply MDCT window and add into output.
322 *
323 * We ensure that when the windows overlap their squared sum
324 * is always 1 (MDCT reconstruction rule).
325 */
326 static void wma_window(WMACodecContext *s, float *out)
327 {
328 float *in = s->output;
329 int block_len, bsize, n;
330
331 /* left part */
332 if (s->block_len_bits <= s->prev_block_len_bits) {
333 block_len = s->block_len;
334 bsize = s->frame_len_bits - s->block_len_bits;
335
336 s->dsp.vector_fmul_add_add(out, in, s->windows[bsize],
337 out, 0, block_len, 1);
338
339 } else {
340 block_len = 1 << s->prev_block_len_bits;
341 n = (s->block_len - block_len) / 2;
342 bsize = s->frame_len_bits - s->prev_block_len_bits;
343
344 s->dsp.vector_fmul_add_add(out+n, in+n, s->windows[bsize],
345 out+n, 0, block_len, 1);
346
347 memcpy(out+n+block_len, in+n+block_len, n*sizeof(float));
348 }
349
350 out += s->block_len;
351 in += s->block_len;
352
353 /* right part */
354 if (s->block_len_bits <= s->next_block_len_bits) {
355 block_len = s->block_len;
356 bsize = s->frame_len_bits - s->block_len_bits;
357
358 s->dsp.vector_fmul_reverse(out, in, s->windows[bsize], block_len);
359
360 } else {
361 block_len = 1 << s->next_block_len_bits;
362 n = (s->block_len - block_len) / 2;
363 bsize = s->frame_len_bits - s->next_block_len_bits;
364
365 memcpy(out, in, n*sizeof(float));
366
367 s->dsp.vector_fmul_reverse(out+n, in+n, s->windows[bsize], block_len);
368
369 memset(out+n+block_len, 0, n*sizeof(float));
370 }
371 }
372
318 373
319 /** 374 /**
320 * @return 0 if OK. 1 if last block of frame. return -1 if 375 * @return 0 if OK. 1 if last block of frame. return -1 if
321 * unrecorrable error. 376 * unrecorrable error.
322 */ 377 */
655 s->coefs[0][i] = a + b; 710 s->coefs[0][i] = a + b;
656 s->coefs[1][i] = a - b; 711 s->coefs[1][i] = a - b;
657 } 712 }
658 } 713 }
659 714
660 /* build the window : we ensure that when the windows overlap
661 their squared sum is always 1 (MDCT reconstruction rule) */
662 /* XXX: merge with output */
663 {
664 int i, next_block_len, block_len, prev_block_len, n;
665 float *wptr;
666
667 block_len = s->block_len;
668 prev_block_len = 1 << s->prev_block_len_bits;
669 next_block_len = 1 << s->next_block_len_bits;
670
671 /* right part */
672 wptr = s->window + block_len;
673 if (block_len <= next_block_len) {
674 for(i=0;i<block_len;i++)
675 *wptr++ = s->windows[bsize][i];
676 } else {
677 /* overlap */
678 n = (block_len / 2) - (next_block_len / 2);
679 for(i=0;i<n;i++)
680 *wptr++ = 1.0;
681 for(i=0;i<next_block_len;i++)
682 *wptr++ = s->windows[s->frame_len_bits - s->next_block_len_bits][i];
683 for(i=0;i<n;i++)
684 *wptr++ = 0.0;
685 }
686
687 /* left part */
688 wptr = s->window + block_len;
689 if (block_len <= prev_block_len) {
690 for(i=0;i<block_len;i++)
691 *--wptr = s->windows[bsize][i];
692 } else {
693 /* overlap */
694 n = (block_len / 2) - (prev_block_len / 2);
695 for(i=0;i<n;i++)
696 *--wptr = 1.0;
697 for(i=0;i<prev_block_len;i++)
698 *--wptr = s->windows[s->frame_len_bits - s->prev_block_len_bits][i];
699 for(i=0;i<n;i++)
700 *--wptr = 0.0;
701 }
702 }
703
704
705 for(ch = 0; ch < s->nb_channels; ch++) { 715 for(ch = 0; ch < s->nb_channels; ch++) {
706 if (s->channel_coded[ch]) { 716 if (s->channel_coded[ch]) {
707 float *ptr;
708 int n4, index, n; 717 int n4, index, n;
709 718
710 n = s->block_len; 719 n = s->block_len;
711 n4 = s->block_len / 2; 720 n4 = s->block_len / 2;
712 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], 721 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize],
713 s->output, s->coefs[ch], s->mdct_tmp); 722 s->output, s->coefs[ch], s->mdct_tmp);
714 723
715 /* XXX: optimize all that by build the window and
716 multipying/adding at the same time */
717
718 /* multiply by the window and add in the frame */ 724 /* multiply by the window and add in the frame */
719 index = (s->frame_len / 2) + s->block_pos - n4; 725 index = (s->frame_len / 2) + s->block_pos - n4;
720 ptr = &s->frame_out[ch][index]; 726 wma_window(s, &s->frame_out[ch][index]);
721 s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
722 727
723 /* specific fast case for ms-stereo : add to second 728 /* specific fast case for ms-stereo : add to second
724 channel if it is not coded */ 729 channel if it is not coded */
725 if (s->ms_stereo && !s->channel_coded[1]) { 730 if (s->ms_stereo && !s->channel_coded[1]) {
726 ptr = &s->frame_out[1][index]; 731 wma_window(s, &s->frame_out[1][index]);
727 s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
728 } 732 }
729 } 733 }
730 } 734 }
731 next: 735 next:
732 /* update block number */ 736 /* update block number */
777 ptr += incr; 781 ptr += incr;
778 } 782 }
779 /* prepare for next block */ 783 /* prepare for next block */
780 memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len], 784 memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len],
781 s->frame_len * sizeof(float)); 785 s->frame_len * sizeof(float));
782 /* XXX: suppress this */
783 memset(&s->frame_out[ch][s->frame_len], 0,
784 s->frame_len * sizeof(float));
785 } 786 }
786 787
787 #ifdef TRACE 788 #ifdef TRACE
788 dump_shorts(s, "samples", samples, n * s->nb_channels); 789 dump_shorts(s, "samples", samples, n * s->nb_channels);
789 #endif 790 #endif