changeset 7962:c0745c5b1bb4 libavcodec

Interleaved forward/backward channels processing in order to increase chances of stepping on already cached data for the cores with extremely small data cache. Patch by Siarhei Siamashka: name dot surname at gmail
author benoit
date Wed, 01 Oct 2008 07:28:34 +0000
parents ef0616549c16
children 474794ff86df
files vorbis_dec.c
diffstat 1 files changed, 7 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/vorbis_dec.c	Tue Sep 30 19:51:02 2008 +0000
+++ b/vorbis_dec.c	Wed Oct 01 07:28:34 2008 +0000
@@ -1426,7 +1426,7 @@
     uint_fast8_t mode_number;
     uint_fast8_t blockflag;
     uint_fast16_t blocksize;
-    int_fast32_t i,j,dir;
+    int_fast32_t i,j;
     uint_fast8_t no_residue[vc->audio_channels];
     uint_fast8_t do_not_decode[vc->audio_channels];
     vorbis_mapping *mapping;
@@ -1518,29 +1518,27 @@
         vc->dsp.vorbis_inverse_coupling(mag, ang, blocksize/2);
     }
 
-// Dotproduct
+// Dotproduct, MDCT
 
-    for(j=0, ch_floor_ptr=vc->channel_floors;j<vc->audio_channels;++j,ch_floor_ptr+=blocksize/2) {
+    for(j=vc->audio_channels-1;j>=0;j--) {
+        ch_floor_ptr=vc->channel_floors+j*blocksize/2;
         ch_res_ptr=vc->channel_residues+res_chan[j]*blocksize/2;
         vc->dsp.vector_fmul(ch_floor_ptr, ch_res_ptr, blocksize/2);
+        ff_imdct_half(&vc->mdct[blockflag], ch_res_ptr, ch_floor_ptr);
     }
 
-// MDCT, overlap/add, save data for next overlapping  FPMATH
+// Overlap/add, save data for next overlapping  FPMATH
 
     retlen = (blocksize + vc->blocksize[previous_window])/4;
-    dir = retlen <= blocksize/2; // pick an order so that ret[] can reuse floors[] without stepping on any data we need
-    for(j=dir?0:vc->audio_channels-1; (unsigned)j<vc->audio_channels; j+=dir*2-1) {
+    for(j=0;j<vc->audio_channels;j++) {
         uint_fast16_t bs0=vc->blocksize[0];
         uint_fast16_t bs1=vc->blocksize[1];
         float *residue=vc->channel_residues+res_chan[j]*blocksize/2;
-        float *floor=vc->channel_floors+j*blocksize/2;
         float *saved=vc->saved+j*bs1/4;
         float *ret=vc->channel_floors+j*retlen;
         float *buf=residue;
         const float *win=vc->win[blockflag&previous_window];
 
-        ff_imdct_half(&vc->mdct[blockflag], buf, floor);
-
         if(blockflag == previous_window) {
             vc->dsp.vector_fmul_window(ret, saved, buf, win, fadd_bias, blocksize/4);
         } else if(blockflag > previous_window) {