changeset 10424:94595d0e617c libavcodec

Move autocorrelation function from flacenc.c to lpc.c. Also rename the corresponding dsputil functions and remove their dependency on the FLAC encoder. Fixes Issue1486.
author jbr
date Sat, 17 Oct 2009 21:00:39 +0000
parents 2e4967487e59
children 2eed7e18fc5d
files Makefile dsputil.c dsputil.h flacenc.c lpc.c x86/dsputilenc_mmx.c x86/flacdsp_mmx.c x86/lpc_mmx.c
diffstat 8 files changed, 209 insertions(+), 214 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Sat Oct 17 19:35:47 2009 +0000
+++ b/Makefile	Sat Oct 17 21:00:39 2009 +0000
@@ -456,7 +456,6 @@
 
 MMX-OBJS-$(CONFIG_CAVS_DECODER)        += x86/cavsdsp_mmx.o
 MMX-OBJS-$(CONFIG_ENCODERS)            += x86/dsputilenc_mmx.o
-MMX-OBJS-$(CONFIG_FLAC_ENCODER)        += x86/flacdsp_mmx.o
 MMX-OBJS-$(CONFIG_GPL)                 += x86/idct_mmx.o
 MMX-OBJS-$(CONFIG_SNOW_DECODER)        += x86/snowdsp_mmx.o
 MMX-OBJS-$(CONFIG_VC1_DECODER)         += x86/vc1dsp_mmx.o
@@ -474,6 +473,7 @@
                                           x86/fft.o                     \
                                           x86/idct_mmx_xvid.o           \
                                           x86/idct_sse2_xvid.o          \
+                                          x86/lpc_mmx.o                 \
                                           x86/motion_est_mmx.o          \
                                           x86/mpegvideo_mmx.o           \
                                           x86/simple_idct_mmx.o         \
--- a/dsputil.c	Sat Oct 17 19:35:47 2009 +0000
+++ b/dsputil.c	Sat Oct 17 21:00:39 2009 +0000
@@ -45,8 +45,8 @@
 /* ac3dec.c */
 void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
 
-/* flacenc.c */
-void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
+/* lpc.c */
+void ff_lpc_compute_autocorr(const int32_t *data, int len, int lag, double *autoc);
 
 /* pngdec.c */
 void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp);
@@ -4837,9 +4837,7 @@
 #if CONFIG_AC3_DECODER
     c->ac3_downmix = ff_ac3_downmix_c;
 #endif
-#if CONFIG_FLAC_ENCODER
-    c->flac_compute_autocorr = ff_flac_compute_autocorr;
-#endif
+    c->lpc_compute_autocorr = ff_lpc_compute_autocorr;
     c->vector_fmul = vector_fmul_c;
     c->vector_fmul_reverse = vector_fmul_reverse_c;
     c->vector_fmul_add = vector_fmul_add_c;
--- a/dsputil.h	Sat Oct 17 19:35:47 2009 +0000
+++ b/dsputil.h	Sat Oct 17 21:00:39 2009 +0000
@@ -386,7 +386,7 @@
     void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize);
     void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len);
     /* no alignment needed */
-    void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc);
+    void (*lpc_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc);
     /* assume len is a multiple of 8, and arrays are 16-byte aligned */
     void (*vector_fmul)(float *dst, const float *src, int len);
     void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
--- a/flacenc.c	Sat Oct 17 19:35:47 2009 +0000
+++ b/flacenc.c	Sat Oct 17 21:00:39 2009 +0000
@@ -552,69 +552,6 @@
     return bits;
 }
 
-/**
- * Apply Welch window function to audio block
- */
-static void apply_welch_window(const int32_t *data, int len, double *w_data)
-{
-    int i, n2;
-    double w;
-    double c;
-
-    assert(!(len&1)); //the optimization in r11881 does not support odd len
-                      //if someone wants odd len extend the change in r11881
-
-    n2 = (len >> 1);
-    c = 2.0 / (len - 1.0);
-
-    w_data+=n2;
-      data+=n2;
-    for(i=0; i<n2; i++) {
-        w = c - n2 + i;
-        w = 1.0 - (w * w);
-        w_data[-i-1] = data[-i-1] * w;
-        w_data[+i  ] = data[+i  ] * w;
-    }
-}
-
-/**
- * Calculates autocorrelation data from audio samples
- * A Welch window function is applied before calculation.
- */
-void ff_flac_compute_autocorr(const int32_t *data, int len, int lag,
-                              double *autoc)
-{
-    int i, j;
-    double tmp[len + lag + 1];
-    double *data1= tmp + lag;
-
-    apply_welch_window(data, len, data1);
-
-    for(j=0; j<lag; j++)
-        data1[j-lag]= 0.0;
-    data1[len] = 0.0;
-
-    for(j=0; j<lag; j+=2){
-        double sum0 = 1.0, sum1 = 1.0;
-        for(i=j; i<len; i++){
-            sum0 += data1[i] * data1[i-j];
-            sum1 += data1[i] * data1[i-j-1];
-        }
-        autoc[j  ] = sum0;
-        autoc[j+1] = sum1;
-    }
-
-    if(j==lag){
-        double sum = 1.0;
-        for(i=j-1; i<len; i+=2){
-            sum += data1[i  ] * data1[i-j  ]
-                 + data1[i+1] * data1[i-j+1];
-        }
-        autoc[j] = sum;
-    }
-}
-
-
 static void encode_residual_verbatim(int32_t *res, int32_t *smp, int n)
 {
     assert(n > 0);
--- a/lpc.c	Sat Oct 17 19:35:47 2009 +0000
+++ b/lpc.c	Sat Oct 17 21:00:39 2009 +0000
@@ -27,6 +27,68 @@
 
 
 /**
+ * Apply Welch window function to audio block
+ */
+static void apply_welch_window(const int32_t *data, int len, double *w_data)
+{
+    int i, n2;
+    double w;
+    double c;
+
+    assert(!(len&1)); //the optimization in r11881 does not support odd len
+                      //if someone wants odd len extend the change in r11881
+
+    n2 = (len >> 1);
+    c = 2.0 / (len - 1.0);
+
+    w_data+=n2;
+      data+=n2;
+    for(i=0; i<n2; i++) {
+        w = c - n2 + i;
+        w = 1.0 - (w * w);
+        w_data[-i-1] = data[-i-1] * w;
+        w_data[+i  ] = data[+i  ] * w;
+    }
+}
+
+/**
+ * Calculates autocorrelation data from audio samples
+ * A Welch window function is applied before calculation.
+ */
+void ff_lpc_compute_autocorr(const int32_t *data, int len, int lag,
+                             double *autoc)
+{
+    int i, j;
+    double tmp[len + lag + 1];
+    double *data1= tmp + lag;
+
+    apply_welch_window(data, len, data1);
+
+    for(j=0; j<lag; j++)
+        data1[j-lag]= 0.0;
+    data1[len] = 0.0;
+
+    for(j=0; j<lag; j+=2){
+        double sum0 = 1.0, sum1 = 1.0;
+        for(i=j; i<len; i++){
+            sum0 += data1[i] * data1[i-j];
+            sum1 += data1[i] * data1[i-j-1];
+        }
+        autoc[j  ] = sum0;
+        autoc[j+1] = sum1;
+    }
+
+    if(j==lag){
+        double sum = 1.0;
+        for(i=j-1; i<len; i+=2){
+            sum += data1[i  ] * data1[i-j  ]
+                 + data1[i+1] * data1[i-j+1];
+        }
+        autoc[j] = sum;
+    }
+}
+
+/**
  * Quantize LPC coefficients
  */
 static void quantize_lpc_coefs(double *lpc_in, int order, int precision,
@@ -115,7 +177,7 @@
     assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER && use_lpc > 0);
 
     if(use_lpc == 1){
-        s->flac_compute_autocorr(samples, blocksize, max_order, autoc);
+        s->lpc_compute_autocorr(samples, blocksize, max_order, autoc);
 
         compute_lpc_coefs(autoc, max_order, &lpc[0][0], MAX_LPC_ORDER, 0, 1);
 
--- a/x86/dsputilenc_mmx.c	Sat Oct 17 19:35:47 2009 +0000
+++ b/x86/dsputilenc_mmx.c	Sat Oct 17 21:00:39 2009 +0000
@@ -1348,8 +1348,7 @@
 #endif //HAVE_SSSE3
 
 
-/* FLAC specific */
-void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag,
+void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
                                    double *autoc);
 
 
@@ -1414,8 +1413,7 @@
             c->sum_abs_dctelem= sum_abs_dctelem_sse2;
             c->hadamard8_diff[0]= hadamard8_diff16_sse2;
             c->hadamard8_diff[1]= hadamard8_diff_sse2;
-            if (CONFIG_FLAC_ENCODER)
-                c->flac_compute_autocorr = ff_flac_compute_autocorr_sse2;
+            c->lpc_compute_autocorr = ff_lpc_compute_autocorr_sse2;
         }
 
 #if HAVE_SSSE3
--- a/x86/flacdsp_mmx.c	Sat Oct 17 19:35:47 2009 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,139 +0,0 @@
-/*
- * MMX optimized FLAC DSP utils
- * Copyright (c) 2007 Loren Merritt
- *
- * This file is part of FFmpeg.
- *
- * FFmpeg is free software; you can redistribute it and/or
- * modify it under the terms of the GNU Lesser General Public
- * License as published by the Free Software Foundation; either
- * version 2.1 of the License, or (at your option) any later version.
- *
- * FFmpeg is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- * Lesser General Public License for more details.
- *
- * You should have received a copy of the GNU Lesser General Public
- * License along with FFmpeg; if not, write to the Free Software
- * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
- */
-
-#include "libavutil/x86_cpu.h"
-#include "dsputil_mmx.h"
-
-static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data)
-{
-    double c = 2.0 / (len-1.0);
-    int n2 = len>>1;
-    x86_reg i = -n2*sizeof(int32_t);
-    x86_reg j =  n2*sizeof(int32_t);
-    __asm__ volatile(
-        "movsd   %0,     %%xmm7                \n\t"
-        "movapd  "MANGLE(ff_pd_1)", %%xmm6     \n\t"
-        "movapd  "MANGLE(ff_pd_2)", %%xmm5     \n\t"
-        "movlhps %%xmm7, %%xmm7                \n\t"
-        "subpd   %%xmm5, %%xmm7                \n\t"
-        "addsd   %%xmm6, %%xmm7                \n\t"
-        ::"m"(c)
-    );
-#define WELCH(MOVPD, offset)\
-    __asm__ volatile(\
-        "1:                                    \n\t"\
-        "movapd   %%xmm7,  %%xmm1              \n\t"\
-        "mulpd    %%xmm1,  %%xmm1              \n\t"\
-        "movapd   %%xmm6,  %%xmm0              \n\t"\
-        "subpd    %%xmm1,  %%xmm0              \n\t"\
-        "pshufd   $0x4e,   %%xmm0, %%xmm1      \n\t"\
-        "cvtpi2pd (%3,%0), %%xmm2              \n\t"\
-        "cvtpi2pd "#offset"*4(%3,%1), %%xmm3   \n\t"\
-        "mulpd    %%xmm0,  %%xmm2              \n\t"\
-        "mulpd    %%xmm1,  %%xmm3              \n\t"\
-        "movapd   %%xmm2, (%2,%0,2)            \n\t"\
-        MOVPD"    %%xmm3, "#offset"*8(%2,%1,2) \n\t"\
-        "subpd    %%xmm5,  %%xmm7              \n\t"\
-        "sub      $8,      %1                  \n\t"\
-        "add      $8,      %0                  \n\t"\
-        "jl 1b                                 \n\t"\
-        :"+&r"(i), "+&r"(j)\
-        :"r"(w_data+n2), "r"(data+n2)\
-    );
-    if(len&1)
-        WELCH("movupd", -1)
-    else
-        WELCH("movapd", -2)
-#undef WELCH
-}
-
-void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag,
-                                   double *autoc)
-{
-    double tmp[len + lag + 2];
-    double *data1 = tmp + lag;
-    int j;
-
-    if((x86_reg)data1 & 15)
-        data1++;
-
-    apply_welch_window_sse2(data, len, data1);
-
-    for(j=0; j<lag; j++)
-        data1[j-lag]= 0.0;
-    data1[len] = 0.0;
-
-    for(j=0; j<lag; j+=2){
-        x86_reg i = -len*sizeof(double);
-        if(j == lag-2) {
-            __asm__ volatile(
-                "movsd    "MANGLE(ff_pd_1)", %%xmm0 \n\t"
-                "movsd    "MANGLE(ff_pd_1)", %%xmm1 \n\t"
-                "movsd    "MANGLE(ff_pd_1)", %%xmm2 \n\t"
-                "1:                                 \n\t"
-                "movapd   (%4,%0), %%xmm3           \n\t"
-                "movupd -8(%5,%0), %%xmm4           \n\t"
-                "movapd   (%5,%0), %%xmm5           \n\t"
-                "mulpd     %%xmm3, %%xmm4           \n\t"
-                "mulpd     %%xmm3, %%xmm5           \n\t"
-                "mulpd -16(%5,%0), %%xmm3           \n\t"
-                "addpd     %%xmm4, %%xmm1           \n\t"
-                "addpd     %%xmm5, %%xmm0           \n\t"
-                "addpd     %%xmm3, %%xmm2           \n\t"
-                "add       $16,    %0               \n\t"
-                "jl 1b                              \n\t"
-                "movhlps   %%xmm0, %%xmm3           \n\t"
-                "movhlps   %%xmm1, %%xmm4           \n\t"
-                "movhlps   %%xmm2, %%xmm5           \n\t"
-                "addsd     %%xmm3, %%xmm0           \n\t"
-                "addsd     %%xmm4, %%xmm1           \n\t"
-                "addsd     %%xmm5, %%xmm2           \n\t"
-                "movsd     %%xmm0, %1               \n\t"
-                "movsd     %%xmm1, %2               \n\t"
-                "movsd     %%xmm2, %3               \n\t"
-                :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2])
-                :"r"(data1+len), "r"(data1+len-j)
-            );
-        } else {
-            __asm__ volatile(
-                "movsd    "MANGLE(ff_pd_1)", %%xmm0 \n\t"
-                "movsd    "MANGLE(ff_pd_1)", %%xmm1 \n\t"
-                "1:                                 \n\t"
-                "movapd   (%3,%0), %%xmm3           \n\t"
-                "movupd -8(%4,%0), %%xmm4           \n\t"
-                "mulpd     %%xmm3, %%xmm4           \n\t"
-                "mulpd    (%4,%0), %%xmm3           \n\t"
-                "addpd     %%xmm4, %%xmm1           \n\t"
-                "addpd     %%xmm3, %%xmm0           \n\t"
-                "add       $16,    %0               \n\t"
-                "jl 1b                              \n\t"
-                "movhlps   %%xmm0, %%xmm3           \n\t"
-                "movhlps   %%xmm1, %%xmm4           \n\t"
-                "addsd     %%xmm3, %%xmm0           \n\t"
-                "addsd     %%xmm4, %%xmm1           \n\t"
-                "movsd     %%xmm0, %1               \n\t"
-                "movsd     %%xmm1, %2               \n\t"
-                :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
-                :"r"(data1+len), "r"(data1+len-j)
-            );
-        }
-    }
-}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/x86/lpc_mmx.c	Sat Oct 17 21:00:39 2009 +0000
@@ -0,0 +1,139 @@
+/*
+ * MMX optimized LPC DSP utils
+ * Copyright (c) 2007 Loren Merritt
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/x86_cpu.h"
+#include "dsputil_mmx.h"
+
+static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data)
+{
+    double c = 2.0 / (len-1.0);
+    int n2 = len>>1;
+    x86_reg i = -n2*sizeof(int32_t);
+    x86_reg j =  n2*sizeof(int32_t);
+    __asm__ volatile(
+        "movsd   %0,     %%xmm7                \n\t"
+        "movapd  "MANGLE(ff_pd_1)", %%xmm6     \n\t"
+        "movapd  "MANGLE(ff_pd_2)", %%xmm5     \n\t"
+        "movlhps %%xmm7, %%xmm7                \n\t"
+        "subpd   %%xmm5, %%xmm7                \n\t"
+        "addsd   %%xmm6, %%xmm7                \n\t"
+        ::"m"(c)
+    );
+#define WELCH(MOVPD, offset)\
+    __asm__ volatile(\
+        "1:                                    \n\t"\
+        "movapd   %%xmm7,  %%xmm1              \n\t"\
+        "mulpd    %%xmm1,  %%xmm1              \n\t"\
+        "movapd   %%xmm6,  %%xmm0              \n\t"\
+        "subpd    %%xmm1,  %%xmm0              \n\t"\
+        "pshufd   $0x4e,   %%xmm0, %%xmm1      \n\t"\
+        "cvtpi2pd (%3,%0), %%xmm2              \n\t"\
+        "cvtpi2pd "#offset"*4(%3,%1), %%xmm3   \n\t"\
+        "mulpd    %%xmm0,  %%xmm2              \n\t"\
+        "mulpd    %%xmm1,  %%xmm3              \n\t"\
+        "movapd   %%xmm2, (%2,%0,2)            \n\t"\
+        MOVPD"    %%xmm3, "#offset"*8(%2,%1,2) \n\t"\
+        "subpd    %%xmm5,  %%xmm7              \n\t"\
+        "sub      $8,      %1                  \n\t"\
+        "add      $8,      %0                  \n\t"\
+        "jl 1b                                 \n\t"\
+        :"+&r"(i), "+&r"(j)\
+        :"r"(w_data+n2), "r"(data+n2)\
+    );
+    if(len&1)
+        WELCH("movupd", -1)
+    else
+        WELCH("movapd", -2)
+#undef WELCH
+}
+
+void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag,
+                                   double *autoc)
+{
+    double tmp[len + lag + 2];
+    double *data1 = tmp + lag;
+    int j;
+
+    if((x86_reg)data1 & 15)
+        data1++;
+
+    apply_welch_window_sse2(data, len, data1);
+
+    for(j=0; j<lag; j++)
+        data1[j-lag]= 0.0;
+    data1[len] = 0.0;
+
+    for(j=0; j<lag; j+=2){
+        x86_reg i = -len*sizeof(double);
+        if(j == lag-2) {
+            __asm__ volatile(
+                "movsd    "MANGLE(ff_pd_1)", %%xmm0 \n\t"
+                "movsd    "MANGLE(ff_pd_1)", %%xmm1 \n\t"
+                "movsd    "MANGLE(ff_pd_1)", %%xmm2 \n\t"
+                "1:                                 \n\t"
+                "movapd   (%4,%0), %%xmm3           \n\t"
+                "movupd -8(%5,%0), %%xmm4           \n\t"
+                "movapd   (%5,%0), %%xmm5           \n\t"
+                "mulpd     %%xmm3, %%xmm4           \n\t"
+                "mulpd     %%xmm3, %%xmm5           \n\t"
+                "mulpd -16(%5,%0), %%xmm3           \n\t"
+                "addpd     %%xmm4, %%xmm1           \n\t"
+                "addpd     %%xmm5, %%xmm0           \n\t"
+                "addpd     %%xmm3, %%xmm2           \n\t"
+                "add       $16,    %0               \n\t"
+                "jl 1b                              \n\t"
+                "movhlps   %%xmm0, %%xmm3           \n\t"
+                "movhlps   %%xmm1, %%xmm4           \n\t"
+                "movhlps   %%xmm2, %%xmm5           \n\t"
+                "addsd     %%xmm3, %%xmm0           \n\t"
+                "addsd     %%xmm4, %%xmm1           \n\t"
+                "addsd     %%xmm5, %%xmm2           \n\t"
+                "movsd     %%xmm0, %1               \n\t"
+                "movsd     %%xmm1, %2               \n\t"
+                "movsd     %%xmm2, %3               \n\t"
+                :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2])
+                :"r"(data1+len), "r"(data1+len-j)
+            );
+        } else {
+            __asm__ volatile(
+                "movsd    "MANGLE(ff_pd_1)", %%xmm0 \n\t"
+                "movsd    "MANGLE(ff_pd_1)", %%xmm1 \n\t"
+                "1:                                 \n\t"
+                "movapd   (%3,%0), %%xmm3           \n\t"
+                "movupd -8(%4,%0), %%xmm4           \n\t"
+                "mulpd     %%xmm3, %%xmm4           \n\t"
+                "mulpd    (%4,%0), %%xmm3           \n\t"
+                "addpd     %%xmm4, %%xmm1           \n\t"
+                "addpd     %%xmm3, %%xmm0           \n\t"
+                "add       $16,    %0               \n\t"
+                "jl 1b                              \n\t"
+                "movhlps   %%xmm0, %%xmm3           \n\t"
+                "movhlps   %%xmm1, %%xmm4           \n\t"
+                "addsd     %%xmm3, %%xmm0           \n\t"
+                "addsd     %%xmm4, %%xmm1           \n\t"
+                "movsd     %%xmm0, %1               \n\t"
+                "movsd     %%xmm1, %2               \n\t"
+                :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
+                :"r"(data1+len), "r"(data1+len-j)
+            );
+        }
+    }
+}