comparison vorbis_dec.c @ 7263:fc843d00867c libavcodec

exploit mdct symmetry 2% faster vorbis on conroe, k8. 7% on celeron.
author lorenm
date Sun, 13 Jul 2008 15:03:58 +0000
parents 032a49f033e8
children a40ae5adf74c
comparison
equal deleted inserted replaced
7262:e3822c61f2e4 7263:fc843d00867c
897 return 2; 897 return 2;
898 } 898 }
899 899
900 vc->channel_residues= av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); 900 vc->channel_residues= av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
901 vc->channel_floors = av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); 901 vc->channel_floors = av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
902 vc->saved = av_mallocz((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); 902 vc->saved = av_mallocz((vc->blocksize[1]/4)*vc->audio_channels * sizeof(float));
903 vc->ret = av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float)); 903 vc->ret = av_malloc((vc->blocksize[1]/2)*vc->audio_channels * sizeof(float));
904 vc->buf = av_malloc( vc->blocksize[1] * sizeof(float)); 904 vc->buf = av_malloc( vc->blocksize[1]/2 * sizeof(float));
905 vc->buf_tmp = av_malloc( vc->blocksize[1] * sizeof(float)); 905 vc->buf_tmp = av_malloc( vc->blocksize[1]/2 * sizeof(float));
906 vc->previous_window=0; 906 vc->previous_window=0;
907 907
908 ff_mdct_init(&vc->mdct[0], bl0, 1); 908 ff_mdct_init(&vc->mdct[0], bl0, 1);
909 ff_mdct_init(&vc->mdct[1], bl1, 1); 909 ff_mdct_init(&vc->mdct[1], bl1, 1);
910 910
1518 1518
1519 retlen = (blocksize + vc->blocksize[previous_window])/4; 1519 retlen = (blocksize + vc->blocksize[previous_window])/4;
1520 for(j=0;j<vc->audio_channels;++j) { 1520 for(j=0;j<vc->audio_channels;++j) {
1521 uint_fast16_t bs0=vc->blocksize[0]; 1521 uint_fast16_t bs0=vc->blocksize[0];
1522 uint_fast16_t bs1=vc->blocksize[1]; 1522 uint_fast16_t bs1=vc->blocksize[1];
1523 float *saved=vc->saved+j*bs1/2; 1523 float *saved=vc->saved+j*bs1/4;
1524 float *ret=vc->ret+j*retlen; 1524 float *ret=vc->ret+j*retlen;
1525 float *buf=vc->buf; 1525 float *buf=vc->buf;
1526 const float *win=vc->win[blockflag&previous_window]; 1526 const float *win=vc->win[blockflag&previous_window];
1527 1527
1528 vc->mdct[0].fft.imdct_calc(&vc->mdct[blockflag], buf, vc->channel_floors+j*blocksize/2, vc->buf_tmp); 1528 vc->mdct[0].fft.imdct_half(&vc->mdct[blockflag], buf, vc->channel_floors+j*blocksize/2, vc->buf_tmp);
1529 1529
1530 if(blockflag == previous_window) { 1530 if(blockflag == previous_window) {
1531 vc->dsp.vector_fmul_window(ret, saved, buf, win, fadd_bias, blocksize/2); 1531 vc->dsp.vector_fmul_window(ret, saved, buf, win, fadd_bias, blocksize/4);
1532 } else if(blockflag > previous_window) { 1532 } else if(blockflag > previous_window) {
1533 vc->dsp.vector_fmul_window(ret, saved, buf+(bs1-bs0)/4, win, fadd_bias, bs0/2); 1533 vc->dsp.vector_fmul_window(ret, saved, buf, win, fadd_bias, bs0/4);
1534 copy_normalize(ret+bs0/2, buf+(bs1+bs0)/4, (bs1-bs0)/4, vc->exp_bias, fadd_bias); 1534 copy_normalize(ret+bs0/2, buf+bs0/4, (bs1-bs0)/4, vc->exp_bias, fadd_bias);
1535 } else { 1535 } else {
1536 copy_normalize(ret, saved, (bs1-bs0)/4, vc->exp_bias, fadd_bias); 1536 copy_normalize(ret, saved, (bs1-bs0)/4, vc->exp_bias, fadd_bias);
1537 vc->dsp.vector_fmul_window(ret+(bs1-bs0)/4, saved+(bs1-bs0)/4, buf, win, fadd_bias, bs0/2); 1537 vc->dsp.vector_fmul_window(ret+(bs1-bs0)/4, saved+(bs1-bs0)/4, buf, win, fadd_bias, bs0/4);
1538 } 1538 }
1539 memcpy(saved, buf+blocksize/2, blocksize/2*sizeof(float)); 1539 memcpy(saved, buf+blocksize/4, blocksize/4*sizeof(float));
1540 } 1540 }
1541 1541
1542 vc->previous_window = blockflag; 1542 vc->previous_window = blockflag;
1543 return retlen; 1543 return retlen;
1544 } 1544 }