Mercurial > libavcodec.hg
comparison vorbis_dec.c @ 7263:fc843d00867c libavcodec
exploit mdct symmetry
2% faster vorbis on conroe, k8. 7% on celeron.
author | lorenm |
---|---|
date | Sun, 13 Jul 2008 15:03:58 +0000 |
parents | 032a49f033e8 |
children | a40ae5adf74c |
comparison
equal
deleted
inserted
replaced
7262:e3822c61f2e4 | 7263:fc843d00867c |
---|---|
897 return 2; | 897 return 2; |
898 } | 898 } |
899 | 899 |
900 vc->channel_residues= av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); | 900 vc->channel_residues= av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); |
901 vc->channel_floors = av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); | 901 vc->channel_floors = av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); |
902 vc->saved = av_mallocz((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); | 902 vc->saved = av_mallocz((vc->blocksize[1]/4)*vc->audio_channels * sizeof(float)); |
903 vc->ret = av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); | 903 vc->ret = av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); |
904 vc->buf = av_malloc( vc->blocksize[1] * sizeof(float)); | 904 vc->buf = av_malloc( vc->blocksize[1]/2 * sizeof(float)); |
905 vc->buf_tmp = av_malloc( vc->blocksize[1] * sizeof(float)); | 905 vc->buf_tmp = av_malloc( vc->blocksize[1]/2 * sizeof(float)); |
906 vc->previous_window=0; | 906 vc->previous_window=0; |
907 | 907 |
908 ff_mdct_init(&vc->mdct[0], bl0, 1); | 908 ff_mdct_init(&vc->mdct[0], bl0, 1); |
909 ff_mdct_init(&vc->mdct[1], bl1, 1); | 909 ff_mdct_init(&vc->mdct[1], bl1, 1); |
910 | 910 |
1518 | 1518 |
1519 retlen = (blocksize + vc->blocksize[previous_window])/4; | 1519 retlen = (blocksize + vc->blocksize[previous_window])/4; |
1520 for(j=0;j<vc->audio_channels;++j) { | 1520 for(j=0;j<vc->audio_channels;++j) { |
1521 uint_fast16_t bs0=vc->blocksize[0]; | 1521 uint_fast16_t bs0=vc->blocksize[0]; |
1522 uint_fast16_t bs1=vc->blocksize[1]; | 1522 uint_fast16_t bs1=vc->blocksize[1]; |
1523 float *saved=vc->saved+j*bs1/2; | 1523 float *saved=vc->saved+j*bs1/4; |
1524 float *ret=vc->ret+j*retlen; | 1524 float *ret=vc->ret+j*retlen; |
1525 float *buf=vc->buf; | 1525 float *buf=vc->buf; |
1526 const float *win=vc->win[blockflag&previous_window]; | 1526 const float *win=vc->win[blockflag&previous_window]; |
1527 | 1527 |
1528 vc->mdct[0].fft.imdct_calc(&vc->mdct[blockflag], buf, vc->channel_floors+j*blocksize/2, vc->buf_tmp); | 1528 vc->mdct[0].fft.imdct_half(&vc->mdct[blockflag], buf, vc->channel_floors+j*blocksize/2, vc->buf_tmp); |
1529 | 1529 |
1530 if(blockflag == previous_window) { | 1530 if(blockflag == previous_window) { |
1531 vc->dsp.vector_fmul_window(ret, saved, buf, win, fadd_bias, blocksize/2); | 1531 vc->dsp.vector_fmul_window(ret, saved, buf, win, fadd_bias, blocksize/4); |
1532 } else if(blockflag > previous_window) { | 1532 } else if(blockflag > previous_window) { |
1533 vc->dsp.vector_fmul_window(ret, saved, buf+(bs1-bs0)/4, win, fadd_bias, bs0/2); | 1533 vc->dsp.vector_fmul_window(ret, saved, buf, win, fadd_bias, bs0/4); |
1534 copy_normalize(ret+bs0/2, buf+(bs1+bs0)/4, (bs1-bs0)/4, vc->exp_bias, fadd_bias); | 1534 copy_normalize(ret+bs0/2, buf+bs0/4, (bs1-bs0)/4, vc->exp_bias, fadd_bias); |
1535 } else { | 1535 } else { |
1536 copy_normalize(ret, saved, (bs1-bs0)/4, vc->exp_bias, fadd_bias); | 1536 copy_normalize(ret, saved, (bs1-bs0)/4, vc->exp_bias, fadd_bias); |
1537 vc->dsp.vector_fmul_window(ret+(bs1-bs0)/4, saved+(bs1-bs0)/4, buf, win, fadd_bias, bs0/2); | 1537 vc->dsp.vector_fmul_window(ret+(bs1-bs0)/4, saved+(bs1-bs0)/4, buf, win, fadd_bias, bs0/4); |
1538 } | 1538 } |
1539 memcpy(saved, buf+blocksize/2, blocksize/2*sizeof(float)); | 1539 memcpy(saved, buf+blocksize/4, blocksize/4*sizeof(float)); |
1540 } | 1540 } |
1541 | 1541 |
1542 vc->previous_window = blockflag; | 1542 vc->previous_window = blockflag; |
1543 return retlen; | 1543 return retlen; |
1544 } | 1544 } |