Mercurial > libavcodec.hg
changeset 10424:94595d0e617c libavcodec
Move autocorrelation function from flacenc.c to lpc.c. Also rename the
corresponding dsputil functions and remove their dependency on the FLAC
encoder.
Fixes Issue1486.
author | jbr |
---|---|
date | Sat, 17 Oct 2009 21:00:39 +0000 |
parents | 2e4967487e59 |
children | 2eed7e18fc5d |
files | Makefile dsputil.c dsputil.h flacenc.c lpc.c x86/dsputilenc_mmx.c x86/flacdsp_mmx.c x86/lpc_mmx.c |
diffstat | 8 files changed, 209 insertions(+), 214 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Sat Oct 17 19:35:47 2009 +0000 +++ b/Makefile Sat Oct 17 21:00:39 2009 +0000 @@ -456,7 +456,6 @@ MMX-OBJS-$(CONFIG_CAVS_DECODER) += x86/cavsdsp_mmx.o MMX-OBJS-$(CONFIG_ENCODERS) += x86/dsputilenc_mmx.o -MMX-OBJS-$(CONFIG_FLAC_ENCODER) += x86/flacdsp_mmx.o MMX-OBJS-$(CONFIG_GPL) += x86/idct_mmx.o MMX-OBJS-$(CONFIG_SNOW_DECODER) += x86/snowdsp_mmx.o MMX-OBJS-$(CONFIG_VC1_DECODER) += x86/vc1dsp_mmx.o @@ -474,6 +473,7 @@ x86/fft.o \ x86/idct_mmx_xvid.o \ x86/idct_sse2_xvid.o \ + x86/lpc_mmx.o \ x86/motion_est_mmx.o \ x86/mpegvideo_mmx.o \ x86/simple_idct_mmx.o \
--- a/dsputil.c Sat Oct 17 19:35:47 2009 +0000 +++ b/dsputil.c Sat Oct 17 21:00:39 2009 +0000 @@ -45,8 +45,8 @@ /* ac3dec.c */ void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); -/* flacenc.c */ -void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, double *autoc); +/* lpc.c */ +void ff_lpc_compute_autocorr(const int32_t *data, int len, int lag, double *autoc); /* pngdec.c */ void ff_add_png_paeth_prediction(uint8_t *dst, uint8_t *src, uint8_t *top, int w, int bpp); @@ -4837,9 +4837,7 @@ #if CONFIG_AC3_DECODER c->ac3_downmix = ff_ac3_downmix_c; #endif -#if CONFIG_FLAC_ENCODER - c->flac_compute_autocorr = ff_flac_compute_autocorr; -#endif + c->lpc_compute_autocorr = ff_lpc_compute_autocorr; c->vector_fmul = vector_fmul_c; c->vector_fmul_reverse = vector_fmul_reverse_c; c->vector_fmul_add = vector_fmul_add_c;
--- a/dsputil.h Sat Oct 17 19:35:47 2009 +0000 +++ b/dsputil.h Sat Oct 17 21:00:39 2009 +0000 @@ -386,7 +386,7 @@ void (*vorbis_inverse_coupling)(float *mag, float *ang, int blocksize); void (*ac3_downmix)(float (*samples)[256], float (*matrix)[2], int out_ch, int in_ch, int len); /* no alignment needed */ - void (*flac_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc); + void (*lpc_compute_autocorr)(const int32_t *data, int len, int lag, double *autoc); /* assume len is a multiple of 8, and arrays are 16-byte aligned */ void (*vector_fmul)(float *dst, const float *src, int len); void (*vector_fmul_reverse)(float *dst, const float *src0, const float *src1, int len);
--- a/flacenc.c Sat Oct 17 19:35:47 2009 +0000 +++ b/flacenc.c Sat Oct 17 21:00:39 2009 +0000 @@ -552,69 +552,6 @@ return bits; } -/** - * Apply Welch window function to audio block - */ -static void apply_welch_window(const int32_t *data, int len, double *w_data) -{ - int i, n2; - double w; - double c; - - assert(!(len&1)); //the optimization in r11881 does not support odd len - //if someone wants odd len extend the change in r11881 - - n2 = (len >> 1); - c = 2.0 / (len - 1.0); - - w_data+=n2; - data+=n2; - for(i=0; i<n2; i++) { - w = c - n2 + i; - w = 1.0 - (w * w); - w_data[-i-1] = data[-i-1] * w; - w_data[+i ] = data[+i ] * w; - } -} - -/** - * Calculates autocorrelation data from audio samples - * A Welch window function is applied before calculation. - */ -void ff_flac_compute_autocorr(const int32_t *data, int len, int lag, - double *autoc) -{ - int i, j; - double tmp[len + lag + 1]; - double *data1= tmp + lag; - - apply_welch_window(data, len, data1); - - for(j=0; j<lag; j++) - data1[j-lag]= 0.0; - data1[len] = 0.0; - - for(j=0; j<lag; j+=2){ - double sum0 = 1.0, sum1 = 1.0; - for(i=j; i<len; i++){ - sum0 += data1[i] * data1[i-j]; - sum1 += data1[i] * data1[i-j-1]; - } - autoc[j ] = sum0; - autoc[j+1] = sum1; - } - - if(j==lag){ - double sum = 1.0; - for(i=j-1; i<len; i+=2){ - sum += data1[i ] * data1[i-j ] - + data1[i+1] * data1[i-j+1]; - } - autoc[j] = sum; - } -} - - static void encode_residual_verbatim(int32_t *res, int32_t *smp, int n) { assert(n > 0);
--- a/lpc.c Sat Oct 17 19:35:47 2009 +0000 +++ b/lpc.c Sat Oct 17 21:00:39 2009 +0000 @@ -27,6 +27,68 @@ /** + * Apply Welch window function to audio block + */ +static void apply_welch_window(const int32_t *data, int len, double *w_data) +{ + int i, n2; + double w; + double c; + + assert(!(len&1)); //the optimization in r11881 does not support odd len + //if someone wants odd len extend the change in r11881 + + n2 = (len >> 1); + c = 2.0 / (len - 1.0); + + w_data+=n2; + data+=n2; + for(i=0; i<n2; i++) { + w = c - n2 + i; + w = 1.0 - (w * w); + w_data[-i-1] = data[-i-1] * w; + w_data[+i ] = data[+i ] * w; + } +} + +/** + * Calculates autocorrelation data from audio samples + * A Welch window function is applied before calculation. + */ +void ff_lpc_compute_autocorr(const int32_t *data, int len, int lag, + double *autoc) +{ + int i, j; + double tmp[len + lag + 1]; + double *data1= tmp + lag; + + apply_welch_window(data, len, data1); + + for(j=0; j<lag; j++) + data1[j-lag]= 0.0; + data1[len] = 0.0; + + for(j=0; j<lag; j+=2){ + double sum0 = 1.0, sum1 = 1.0; + for(i=j; i<len; i++){ + sum0 += data1[i] * data1[i-j]; + sum1 += data1[i] * data1[i-j-1]; + } + autoc[j ] = sum0; + autoc[j+1] = sum1; + } + + if(j==lag){ + double sum = 1.0; + for(i=j-1; i<len; i+=2){ + sum += data1[i ] * data1[i-j ] + + data1[i+1] * data1[i-j+1]; + } + autoc[j] = sum; + } +} + +/** * Quantize LPC coefficients */ static void quantize_lpc_coefs(double *lpc_in, int order, int precision, @@ -115,7 +177,7 @@ assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER && use_lpc > 0); if(use_lpc == 1){ - s->flac_compute_autocorr(samples, blocksize, max_order, autoc); + s->lpc_compute_autocorr(samples, blocksize, max_order, autoc); compute_lpc_coefs(autoc, max_order, &lpc[0][0], MAX_LPC_ORDER, 0, 1);
--- a/x86/dsputilenc_mmx.c Sat Oct 17 19:35:47 2009 +0000 +++ b/x86/dsputilenc_mmx.c Sat Oct 17 21:00:39 2009 +0000 @@ -1348,8 +1348,7 @@ #endif //HAVE_SSSE3 -/* FLAC specific */ -void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, +void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag, double *autoc); @@ -1414,8 +1413,7 @@ c->sum_abs_dctelem= sum_abs_dctelem_sse2; c->hadamard8_diff[0]= hadamard8_diff16_sse2; c->hadamard8_diff[1]= hadamard8_diff_sse2; - if (CONFIG_FLAC_ENCODER) - c->flac_compute_autocorr = ff_flac_compute_autocorr_sse2; + c->lpc_compute_autocorr = ff_lpc_compute_autocorr_sse2; } #if HAVE_SSSE3
--- a/x86/flacdsp_mmx.c Sat Oct 17 19:35:47 2009 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,139 +0,0 @@ -/* - * MMX optimized FLAC DSP utils - * Copyright (c) 2007 Loren Merritt - * - * This file is part of FFmpeg. - * - * FFmpeg is free software; you can redistribute it and/or - * modify it under the terms of the GNU Lesser General Public - * License as published by the Free Software Foundation; either - * version 2.1 of the License, or (at your option) any later version. - * - * FFmpeg is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * Lesser General Public License for more details. - * - * You should have received a copy of the GNU Lesser General Public - * License along with FFmpeg; if not, write to the Free Software - * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA - */ - -#include "libavutil/x86_cpu.h" -#include "dsputil_mmx.h" - -static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data) -{ - double c = 2.0 / (len-1.0); - int n2 = len>>1; - x86_reg i = -n2*sizeof(int32_t); - x86_reg j = n2*sizeof(int32_t); - __asm__ volatile( - "movsd %0, %%xmm7 \n\t" - "movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t" - "movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t" - "movlhps %%xmm7, %%xmm7 \n\t" - "subpd %%xmm5, %%xmm7 \n\t" - "addsd %%xmm6, %%xmm7 \n\t" - ::"m"(c) - ); -#define WELCH(MOVPD, offset)\ - __asm__ volatile(\ - "1: \n\t"\ - "movapd %%xmm7, %%xmm1 \n\t"\ - "mulpd %%xmm1, %%xmm1 \n\t"\ - "movapd %%xmm6, %%xmm0 \n\t"\ - "subpd %%xmm1, %%xmm0 \n\t"\ - "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\ - "cvtpi2pd (%3,%0), %%xmm2 \n\t"\ - "cvtpi2pd "#offset"*4(%3,%1), %%xmm3 \n\t"\ - "mulpd %%xmm0, %%xmm2 \n\t"\ - "mulpd %%xmm1, %%xmm3 \n\t"\ - "movapd %%xmm2, (%2,%0,2) \n\t"\ - MOVPD" %%xmm3, "#offset"*8(%2,%1,2) \n\t"\ - "subpd %%xmm5, %%xmm7 \n\t"\ - "sub $8, %1 \n\t"\ - "add $8, %0 \n\t"\ - "jl 1b \n\t"\ - :"+&r"(i), "+&r"(j)\ - :"r"(w_data+n2), "r"(data+n2)\ - ); - if(len&1) - WELCH("movupd", -1) - else - WELCH("movapd", -2) -#undef WELCH -} - -void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, - double *autoc) -{ - double tmp[len + lag + 2]; - double *data1 = tmp + lag; - int j; - - if((x86_reg)data1 & 15) - data1++; - - apply_welch_window_sse2(data, len, data1); - - for(j=0; j<lag; j++) - data1[j-lag]= 0.0; - data1[len] = 0.0; - - for(j=0; j<lag; j+=2){ - x86_reg i = -len*sizeof(double); - if(j == lag-2) { - __asm__ volatile( - "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t" - "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t" - "movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t" - "1: \n\t" - "movapd (%4,%0), %%xmm3 \n\t" - "movupd -8(%5,%0), %%xmm4 \n\t" - "movapd (%5,%0), %%xmm5 \n\t" - "mulpd %%xmm3, %%xmm4 \n\t" - "mulpd %%xmm3, %%xmm5 \n\t" - "mulpd -16(%5,%0), %%xmm3 \n\t" - "addpd %%xmm4, %%xmm1 \n\t" - "addpd %%xmm5, %%xmm0 \n\t" - "addpd %%xmm3, %%xmm2 \n\t" - "add $16, %0 \n\t" - "jl 1b \n\t" - "movhlps %%xmm0, %%xmm3 \n\t" - "movhlps %%xmm1, %%xmm4 \n\t" - "movhlps %%xmm2, %%xmm5 \n\t" - "addsd %%xmm3, %%xmm0 \n\t" - "addsd %%xmm4, %%xmm1 \n\t" - "addsd %%xmm5, %%xmm2 \n\t" - "movsd %%xmm0, %1 \n\t" - "movsd %%xmm1, %2 \n\t" - "movsd %%xmm2, %3 \n\t" - :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2]) - :"r"(data1+len), "r"(data1+len-j) - ); - } else { - __asm__ volatile( - "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t" - "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t" - "1: \n\t" - "movapd (%3,%0), %%xmm3 \n\t" - "movupd -8(%4,%0), %%xmm4 \n\t" - "mulpd %%xmm3, %%xmm4 \n\t" - "mulpd (%4,%0), %%xmm3 \n\t" - "addpd %%xmm4, %%xmm1 \n\t" - "addpd %%xmm3, %%xmm0 \n\t" - "add $16, %0 \n\t" - "jl 1b \n\t" - "movhlps %%xmm0, %%xmm3 \n\t" - "movhlps %%xmm1, %%xmm4 \n\t" - "addsd %%xmm3, %%xmm0 \n\t" - "addsd %%xmm4, %%xmm1 \n\t" - "movsd %%xmm0, %1 \n\t" - "movsd %%xmm1, %2 \n\t" - :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) - :"r"(data1+len), "r"(data1+len-j) - ); - } - } -}
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/x86/lpc_mmx.c Sat Oct 17 21:00:39 2009 +0000 @@ -0,0 +1,139 @@ +/* + * MMX optimized LPC DSP utils + * Copyright (c) 2007 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "libavutil/x86_cpu.h" +#include "dsputil_mmx.h" + +static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data) +{ + double c = 2.0 / (len-1.0); + int n2 = len>>1; + x86_reg i = -n2*sizeof(int32_t); + x86_reg j = n2*sizeof(int32_t); + __asm__ volatile( + "movsd %0, %%xmm7 \n\t" + "movapd "MANGLE(ff_pd_1)", %%xmm6 \n\t" + "movapd "MANGLE(ff_pd_2)", %%xmm5 \n\t" + "movlhps %%xmm7, %%xmm7 \n\t" + "subpd %%xmm5, %%xmm7 \n\t" + "addsd %%xmm6, %%xmm7 \n\t" + ::"m"(c) + ); +#define WELCH(MOVPD, offset)\ + __asm__ volatile(\ + "1: \n\t"\ + "movapd %%xmm7, %%xmm1 \n\t"\ + "mulpd %%xmm1, %%xmm1 \n\t"\ + "movapd %%xmm6, %%xmm0 \n\t"\ + "subpd %%xmm1, %%xmm0 \n\t"\ + "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\ + "cvtpi2pd (%3,%0), %%xmm2 \n\t"\ + "cvtpi2pd "#offset"*4(%3,%1), %%xmm3 \n\t"\ + "mulpd %%xmm0, %%xmm2 \n\t"\ + "mulpd %%xmm1, %%xmm3 \n\t"\ + "movapd %%xmm2, (%2,%0,2) \n\t"\ + MOVPD" %%xmm3, "#offset"*8(%2,%1,2) \n\t"\ + "subpd %%xmm5, %%xmm7 \n\t"\ + "sub $8, %1 \n\t"\ + "add $8, %0 \n\t"\ + "jl 1b \n\t"\ + :"+&r"(i), "+&r"(j)\ + :"r"(w_data+n2), "r"(data+n2)\ + ); + if(len&1) + WELCH("movupd", -1) + else + WELCH("movapd", -2) +#undef WELCH +} + +void ff_lpc_compute_autocorr_sse2(const int32_t *data, int len, int lag, + double *autoc) +{ + double tmp[len + lag + 2]; + double *data1 = tmp + lag; + int j; + + if((x86_reg)data1 & 15) + data1++; + + apply_welch_window_sse2(data, len, data1); + + for(j=0; j<lag; j++) + data1[j-lag]= 0.0; + data1[len] = 0.0; + + for(j=0; j<lag; j+=2){ + x86_reg i = -len*sizeof(double); + if(j == lag-2) { + __asm__ volatile( + "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t" + "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t" + "movsd "MANGLE(ff_pd_1)", %%xmm2 \n\t" + "1: \n\t" + "movapd (%4,%0), %%xmm3 \n\t" + "movupd -8(%5,%0), %%xmm4 \n\t" + "movapd (%5,%0), %%xmm5 \n\t" + "mulpd %%xmm3, %%xmm4 \n\t" + "mulpd %%xmm3, %%xmm5 \n\t" + "mulpd -16(%5,%0), %%xmm3 \n\t" + "addpd %%xmm4, %%xmm1 \n\t" + "addpd %%xmm5, %%xmm0 \n\t" + "addpd %%xmm3, %%xmm2 \n\t" + "add $16, %0 \n\t" + "jl 1b \n\t" + "movhlps %%xmm0, %%xmm3 \n\t" + "movhlps %%xmm1, %%xmm4 \n\t" + "movhlps %%xmm2, %%xmm5 \n\t" + "addsd %%xmm3, %%xmm0 \n\t" + "addsd %%xmm4, %%xmm1 \n\t" + "addsd %%xmm5, %%xmm2 \n\t" + "movsd %%xmm0, %1 \n\t" + "movsd %%xmm1, %2 \n\t" + "movsd %%xmm2, %3 \n\t" + :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2]) + :"r"(data1+len), "r"(data1+len-j) + ); + } else { + __asm__ volatile( + "movsd "MANGLE(ff_pd_1)", %%xmm0 \n\t" + "movsd "MANGLE(ff_pd_1)", %%xmm1 \n\t" + "1: \n\t" + "movapd (%3,%0), %%xmm3 \n\t" + "movupd -8(%4,%0), %%xmm4 \n\t" + "mulpd %%xmm3, %%xmm4 \n\t" + "mulpd (%4,%0), %%xmm3 \n\t" + "addpd %%xmm4, %%xmm1 \n\t" + "addpd %%xmm3, %%xmm0 \n\t" + "add $16, %0 \n\t" + "jl 1b \n\t" + "movhlps %%xmm0, %%xmm3 \n\t" + "movhlps %%xmm1, %%xmm4 \n\t" + "addsd %%xmm3, %%xmm0 \n\t" + "addsd %%xmm4, %%xmm1 \n\t" + "movsd %%xmm0, %1 \n\t" + "movsd %%xmm1, %2 \n\t" + :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) + :"r"(data1+len), "r"(data1+len-j) + ); + } + } +}