changeset 3592:6a358dccf2ab libavcodec

SIMD vector optimizations. 3% faster overall decoding.
author banan
date Sat, 19 Aug 2006 08:46:52 +0000
parents 088be7d7c4fd
children 3da79b0751b1
files wmadec.c
diffstat 1 files changed, 7 insertions(+), 14 deletions(-) [+]
line wrap: on
line diff
--- a/wmadec.c	Sat Aug 19 02:42:20 2006 +0000
+++ b/wmadec.c	Sat Aug 19 08:46:52 2006 +0000
@@ -130,6 +130,7 @@
     float lsp_pow_e_table[256];
     float lsp_pow_m_table1[(1 << LSP_POW_BITS)];
     float lsp_pow_m_table2[(1 << LSP_POW_BITS)];
+    DSPContext dsp;
 
 #ifdef TRACE
     int frame_count;
@@ -228,6 +229,8 @@
     s->bit_rate = avctx->bit_rate;
     s->block_align = avctx->block_align;
 
+    dsputil_init(&s->dsp, avctx);
+
     if (avctx->codec->id == CODEC_ID_WMAV1) {
         s->version = 1;
     } else {
@@ -1109,7 +1112,7 @@
         if (s->channel_coded[ch]) {
             DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]);
             float *ptr;
-            int i, n4, index, n;
+            int n4, index, n;
 
             n = s->block_len;
             n4 = s->block_len / 2;
@@ -1118,27 +1121,17 @@
 
             /* XXX: optimize all that by build the window and
                multipying/adding at the same time */
-            /* multiply by the window */
-            for(i=0;i<n * 2;i++) {
-                output[i] *= window[i];
-            }
 
-            /* add in the frame */
+            /* multiply by the window and add in the frame */
             index = (s->frame_len / 2) + s->block_pos - n4;
             ptr = &s->frame_out[ch][index];
-            for(i=0;i<n * 2;i++) {
-                *ptr += output[i];
-                ptr++;
-            }
+            s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
 
             /* specific fast case for ms-stereo : add to second
                channel if it is not coded */
             if (s->ms_stereo && !s->channel_coded[1]) {
                 ptr = &s->frame_out[1][index];
-                for(i=0;i<n * 2;i++) {
-                    *ptr += output[i];
-                    ptr++;
-                }
+                s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1);
             }
         }
     }