changeset 4737:99d9dd34903b libavcodec

Optimize by building the mdct window and multipying/adding at the same time. Patch by Ian Braithwaite ian .. braithwaite . dk [Ffmpeg-devel] WMA decoder speedup 2007-03-22 22:56
author banan
date Mon, 26 Mar 2007 10:03:57 +0000
parents 59649ebd5ed8
children 7bec6dd03317
files wma.c wma.h wmadec.c wmaenc.c
diffstat 4 files changed, 60 insertions(+), 60 deletions(-) [+]
line wrap: on
line diff
--- a/wma.c	Mon Mar 26 00:54:29 2007 +0000
+++ b/wma.c	Mon Mar 26 10:03:57 2007 +0000
@@ -302,7 +302,7 @@
         window = av_malloc(sizeof(float) * n);
         alpha = M_PI / (2.0 * n);
         for(j=0;j<n;j++) {
-            window[n - j - 1] = sin((j + 0.5) * alpha);
+            window[j] = sin((j + 0.5) * alpha);
         }
         s->windows[i] = window;
     }
--- a/wma.h	Mon Mar 26 00:54:29 2007 +0000
+++ b/wma.h	Mon Mar 26 10:03:57 2007 +0000
@@ -112,7 +112,6 @@
     int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE];
     DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]);
     DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
-    DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]);
     MDCTContext mdct_ctx[BLOCK_NB_SIZES];
     float *windows[BLOCK_NB_SIZES];
     DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); ///< temporary storage for imdct
--- a/wmadec.c	Mon Mar 26 00:54:29 2007 +0000
+++ b/wmadec.c	Mon Mar 26 10:03:57 2007 +0000
@@ -316,6 +316,61 @@
     return 0;
 }
 
+
+/**
+ * Apply MDCT window and add into output.
+ *
+ * We ensure that when the windows overlap their squared sum
+ * is always 1 (MDCT reconstruction rule).
+ */
+static void wma_window(WMACodecContext *s, float *out)
+{
+    float *in = s->output;
+    int block_len, bsize, n;
+
+    /* left part */
+    if (s->block_len_bits <= s->prev_block_len_bits) {
+        block_len = s->block_len;
+        bsize = s->frame_len_bits - s->block_len_bits;
+
+        s->dsp.vector_fmul_add_add(out, in, s->windows[bsize],
+                                   out, 0, block_len, 1);
+
+    } else {
+        block_len = 1 << s->prev_block_len_bits;
+        n = (s->block_len - block_len) / 2;
+        bsize = s->frame_len_bits - s->prev_block_len_bits;
+
+        s->dsp.vector_fmul_add_add(out+n, in+n, s->windows[bsize],
+                                   out+n, 0, block_len, 1);
+
+        memcpy(out+n+block_len, in+n+block_len, n*sizeof(float));
+    }
+
+    out += s->block_len;
+    in += s->block_len;
+
+    /* right part */
+    if (s->block_len_bits <= s->next_block_len_bits) {
+        block_len = s->block_len;
+        bsize = s->frame_len_bits - s->block_len_bits;
+
+        s->dsp.vector_fmul_reverse(out, in, s->windows[bsize], block_len);
+
+    } else {
+        block_len = 1 << s->next_block_len_bits;
+        n = (s->block_len - block_len) / 2;
+        bsize = s->frame_len_bits - s->next_block_len_bits;
+
+        memcpy(out, in, n*sizeof(float));
+
+        s->dsp.vector_fmul_reverse(out+n, in+n, s->windows[bsize], block_len);
+
+        memset(out+n+block_len, 0, n*sizeof(float));
+    }
+}
+
+
 /**
  * @return 0 if OK. 1 if last block of frame. return -1 if
  * unrecorrable error.
@@ -657,54 +712,8 @@
         }
     }
 
-    /* build the window : we ensure that when the windows overlap
-       their squared sum is always 1 (MDCT reconstruction rule) */
-    /* XXX: merge with output */
-    {
-        int i, next_block_len, block_len, prev_block_len, n;
-        float *wptr;
-
-        block_len = s->block_len;
-        prev_block_len = 1 << s->prev_block_len_bits;
-        next_block_len = 1 << s->next_block_len_bits;
-
-        /* right part */
-        wptr = s->window + block_len;
-        if (block_len <= next_block_len) {
-            for(i=0;i<block_len;i++)
-                *wptr++ = s->windows[bsize][i];
-        } else {
-            /* overlap */
-            n = (block_len / 2) - (next_block_len / 2);
-            for(i=0;i<n;i++)
-                *wptr++ = 1.0;
-            for(i=0;i<next_block_len;i++)
-                *wptr++ = s->windows[s->frame_len_bits - s->next_block_len_bits][i];
-            for(i=0;i<n;i++)
-                *wptr++ = 0.0;
-        }
-
-        /* left part */
-        wptr = s->window + block_len;
-        if (block_len <= prev_block_len) {
-            for(i=0;i<block_len;i++)
-                *--wptr = s->windows[bsize][i];
-        } else {
-            /* overlap */
-            n = (block_len / 2) - (prev_block_len / 2);
-            for(i=0;i<n;i++)
-                *--wptr = 1.0;
-            for(i=0;i<prev_block_len;i++)
-                *--wptr = s->windows[s->frame_len_bits - s->prev_block_len_bits][i];
-            for(i=0;i<n;i++)
-                *--wptr = 0.0;
-        }
-    }
-
-
     for(ch = 0; ch < s->nb_channels; ch++) {
         if (s->channel_coded[ch]) {
-            float *ptr;
             int n4, index, n;
 
             n = s->block_len;
@@ -712,19 +721,14 @@
             s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize],
                           s->output, s->coefs[ch], s->mdct_tmp);
 
-            /* XXX: optimize all that by build the window and
-               multipying/adding at the same time */
-
             /* multiply by the window and add in the frame */
             index = (s->frame_len / 2) + s->block_pos - n4;
-            ptr = &s->frame_out[ch][index];
-            s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
+            wma_window(s, &s->frame_out[ch][index]);
 
             /* specific fast case for ms-stereo : add to second
                channel if it is not coded */
             if (s->ms_stereo && !s->channel_coded[1]) {
-                ptr = &s->frame_out[1][index];
-                s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1);
+                wma_window(s, &s->frame_out[1][index]);
             }
         }
     }
@@ -779,9 +783,6 @@
         /* prepare for next block */
         memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len],
                 s->frame_len * sizeof(float));
-        /* XXX: suppress this */
-        memset(&s->frame_out[ch][s->frame_len], 0,
-               s->frame_len * sizeof(float));
     }
 
 #ifdef TRACE
--- a/wmaenc.c	Mon Mar 26 00:54:29 2007 +0000
+++ b/wmaenc.c	Mon Mar 26 10:03:57 2007 +0000
@@ -92,8 +92,8 @@
         memcpy(s->output, s->frame_out[channel], sizeof(float)*window_len);
         j = channel;
         for (i = 0; i < len; i++, j += avctx->channels){
-            s->output[i+window_len]  = audio[j] / n * win[i];
-            s->frame_out[channel][i] = audio[j] / n * win[window_len - i - 1];
+            s->output[i+window_len]  = audio[j] / n * win[window_len - i - 1];
+            s->frame_out[channel][i] = audio[j] / n * win[i];
         }
         ff_mdct_calc(&s->mdct_ctx[window_index], s->coefs[channel], s->output, s->mdct_tmp);
     }