Mercurial > libavcodec.hg
changeset 6030:fb99890ee609 libavcodec
move FLAC mmx dsp to its own file
author | aurel |
---|---|
date | Sun, 16 Dec 2007 22:20:47 +0000 |
parents | fc51a6ffa64f |
children | 9d3f52380cb3 |
files | Makefile i386/dsputil_mmx.c i386/flacdsp_mmx.c |
diffstat | 3 files changed, 146 insertions(+), 120 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Sun Dec 16 17:22:09 2007 +0000 +++ b/Makefile Sun Dec 16 22:20:47 2007 +0000 @@ -363,6 +363,7 @@ OBJS-$(CONFIG_GPL) += i386/idct_mmx.o OBJS-$(CONFIG_CAVS_DECODER) += i386/cavsdsp_mmx.o +OBJS-$(CONFIG_FLAC_ENCODER) += i386/flacdsp_mmx.o OBJS-$(CONFIG_SNOW_DECODER) += i386/snowdsp_mmx.o OBJS-$(CONFIG_VC1_DECODER) += i386/vc1dsp_mmx.o OBJS-$(CONFIG_VP3_DECODER) += i386/vp3dsp_mmx.o i386/vp3dsp_sse2.o
--- a/i386/dsputil_mmx.c Sun Dec 16 17:22:09 2007 +0000 +++ b/i386/dsputil_mmx.c Sun Dec 16 22:20:47 2007 +0000 @@ -2857,6 +2857,10 @@ avg_pixels16_mmx(dst, src, stride, 16); } +/* FLAC specific */ +void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, + double *autoc); + /* VC1 specific */ void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx); @@ -2971,125 +2975,6 @@ } } -#ifdef CONFIG_ENCODERS -static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data) -{ - double c = 2.0 / (len-1.0); - int n2 = len>>1; - long i = -n2*sizeof(int32_t); - long j = n2*sizeof(int32_t); - asm volatile( - "movsd %0, %%xmm7 \n\t" - "movapd %1, %%xmm6 \n\t" - "movapd %2, %%xmm5 \n\t" - "movlhps %%xmm7, %%xmm7 \n\t" - "subpd %%xmm5, %%xmm7 \n\t" - "addsd %%xmm6, %%xmm7 \n\t" - ::"m"(c), "m"(*ff_pd_1), "m"(*ff_pd_2) - ); -#define WELCH(MOVPD)\ - asm volatile(\ - "1: \n\t"\ - "movapd %%xmm7, %%xmm1 \n\t"\ - "mulpd %%xmm1, %%xmm1 \n\t"\ - "movapd %%xmm6, %%xmm0 \n\t"\ - "subpd %%xmm1, %%xmm0 \n\t"\ - "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\ - "cvtpi2pd (%4,%0), %%xmm2 \n\t"\ - "cvtpi2pd (%5,%1), %%xmm3 \n\t"\ - "mulpd %%xmm0, %%xmm2 \n\t"\ - "mulpd %%xmm1, %%xmm3 \n\t"\ - "movapd %%xmm2, (%2,%0,2) \n\t"\ - MOVPD" %%xmm3, (%3,%1,2) \n\t"\ - "subpd %%xmm5, %%xmm7 \n\t"\ - "sub $8, %1 \n\t"\ - "add $8, %0 \n\t"\ - "jl 1b \n\t"\ - :"+&r"(i), "+&r"(j)\ - :"r"(w_data+n2), "r"(w_data+len-2-n2),\ - "r"(data+n2), "r"(data+len-2-n2)\ - ); - if(len&1) - WELCH("movupd") - else - WELCH("movapd") -#undef WELCH -} - -static void flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, - double *autoc) -{ - double tmp[len + lag + 2]; - double *data1 = tmp + lag; - int j; - - if((long)data1 & 15) - data1++; - - apply_welch_window_sse2(data, len, data1); - - for(j=0; j<lag; j++) - data1[j-lag]= 0.0; - data1[len] = 0.0; - - for(j=0; j<lag; j+=2){ - long i = -len*sizeof(double); - if(j == lag-2) { - asm volatile( - "movsd %6, %%xmm0 \n\t" - "movsd %6, %%xmm1 \n\t" - "movsd %6, %%xmm2 \n\t" - "1: \n\t" - "movapd (%4,%0), %%xmm3 \n\t" - "movupd -8(%5,%0), %%xmm4 \n\t" - "movapd (%5,%0), %%xmm5 \n\t" - "mulpd %%xmm3, %%xmm4 \n\t" - "mulpd %%xmm3, %%xmm5 \n\t" - "mulpd -16(%5,%0), %%xmm3 \n\t" - "addpd %%xmm4, %%xmm1 \n\t" - "addpd %%xmm5, %%xmm0 \n\t" - "addpd %%xmm3, %%xmm2 \n\t" - "add $16, %0 \n\t" - "jl 1b \n\t" - "movhlps %%xmm0, %%xmm3 \n\t" - "movhlps %%xmm1, %%xmm4 \n\t" - "movhlps %%xmm2, %%xmm5 \n\t" - "addsd %%xmm3, %%xmm0 \n\t" - "addsd %%xmm4, %%xmm1 \n\t" - "addsd %%xmm5, %%xmm2 \n\t" - "movsd %%xmm0, %1 \n\t" - "movsd %%xmm1, %2 \n\t" - "movsd %%xmm2, %3 \n\t" - :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2]) - :"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1) - ); - } else { - asm volatile( - "movsd %5, %%xmm0 \n\t" - "movsd %5, %%xmm1 \n\t" - "1: \n\t" - "movapd (%3,%0), %%xmm3 \n\t" - "movupd -8(%4,%0), %%xmm4 \n\t" - "mulpd %%xmm3, %%xmm4 \n\t" - "mulpd (%4,%0), %%xmm3 \n\t" - "addpd %%xmm4, %%xmm1 \n\t" - "addpd %%xmm3, %%xmm0 \n\t" - "add $16, %0 \n\t" - "jl 1b \n\t" - "movhlps %%xmm0, %%xmm3 \n\t" - "movhlps %%xmm1, %%xmm4 \n\t" - "addsd %%xmm3, %%xmm0 \n\t" - "addsd %%xmm4, %%xmm1 \n\t" - "movsd %%xmm0, %1 \n\t" - "movsd %%xmm1, %2 \n\t" - :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) - :"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1) - ); - } - } -} -#endif // CONFIG_ENCODERS - static void vector_fmul_3dnow(float *dst, const float *src, int len){ long i = (len-4)*4; asm volatile( @@ -3737,7 +3622,8 @@ c->sum_abs_dctelem= sum_abs_dctelem_sse2; c->hadamard8_diff[0]= hadamard8_diff16_sse2; c->hadamard8_diff[1]= hadamard8_diff_sse2; - c->flac_compute_autocorr = flac_compute_autocorr_sse2; + if (ENABLE_FLAC_ENCODER) + c->flac_compute_autocorr = ff_flac_compute_autocorr_sse2; } #ifdef HAVE_SSSE3
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/i386/flacdsp_mmx.c Sun Dec 16 22:20:47 2007 +0000 @@ -0,0 +1,139 @@ +/* + * MMX optimized FLAC DSP utils + * Copyright (c) 2007 Loren Merritt + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include "dsputil_mmx.h" + +static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data) +{ + double c = 2.0 / (len-1.0); + int n2 = len>>1; + long i = -n2*sizeof(int32_t); + long j = n2*sizeof(int32_t); + asm volatile( + "movsd %0, %%xmm7 \n\t" + "movapd %1, %%xmm6 \n\t" + "movapd %2, %%xmm5 \n\t" + "movlhps %%xmm7, %%xmm7 \n\t" + "subpd %%xmm5, %%xmm7 \n\t" + "addsd %%xmm6, %%xmm7 \n\t" + ::"m"(c), "m"(*ff_pd_1), "m"(*ff_pd_2) + ); +#define WELCH(MOVPD)\ + asm volatile(\ + "1: \n\t"\ + "movapd %%xmm7, %%xmm1 \n\t"\ + "mulpd %%xmm1, %%xmm1 \n\t"\ + "movapd %%xmm6, %%xmm0 \n\t"\ + "subpd %%xmm1, %%xmm0 \n\t"\ + "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\ + "cvtpi2pd (%4,%0), %%xmm2 \n\t"\ + "cvtpi2pd (%5,%1), %%xmm3 \n\t"\ + "mulpd %%xmm0, %%xmm2 \n\t"\ + "mulpd %%xmm1, %%xmm3 \n\t"\ + "movapd %%xmm2, (%2,%0,2) \n\t"\ + MOVPD" %%xmm3, (%3,%1,2) \n\t"\ + "subpd %%xmm5, %%xmm7 \n\t"\ + "sub $8, %1 \n\t"\ + "add $8, %0 \n\t"\ + "jl 1b \n\t"\ + :"+&r"(i), "+&r"(j)\ + :"r"(w_data+n2), "r"(w_data+len-2-n2),\ + "r"(data+n2), "r"(data+len-2-n2)\ + ); + if(len&1) + WELCH("movupd") + else + WELCH("movapd") +#undef WELCH +} + +void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, + double *autoc) +{ + double tmp[len + lag + 2]; + double *data1 = tmp + lag; + int j; + + if((long)data1 & 15) + data1++; + + apply_welch_window_sse2(data, len, data1); + + for(j=0; j<lag; j++) + data1[j-lag]= 0.0; + data1[len] = 0.0; + + for(j=0; j<lag; j+=2){ + long i = -len*sizeof(double); + if(j == lag-2) { + asm volatile( + "movsd %6, %%xmm0 \n\t" + "movsd %6, %%xmm1 \n\t" + "movsd %6, %%xmm2 \n\t" + "1: \n\t" + "movapd (%4,%0), %%xmm3 \n\t" + "movupd -8(%5,%0), %%xmm4 \n\t" + "movapd (%5,%0), %%xmm5 \n\t" + "mulpd %%xmm3, %%xmm4 \n\t" + "mulpd %%xmm3, %%xmm5 \n\t" + "mulpd -16(%5,%0), %%xmm3 \n\t" + "addpd %%xmm4, %%xmm1 \n\t" + "addpd %%xmm5, %%xmm0 \n\t" + "addpd %%xmm3, %%xmm2 \n\t" + "add $16, %0 \n\t" + "jl 1b \n\t" + "movhlps %%xmm0, %%xmm3 \n\t" + "movhlps %%xmm1, %%xmm4 \n\t" + "movhlps %%xmm2, %%xmm5 \n\t" + "addsd %%xmm3, %%xmm0 \n\t" + "addsd %%xmm4, %%xmm1 \n\t" + "addsd %%xmm5, %%xmm2 \n\t" + "movsd %%xmm0, %1 \n\t" + "movsd %%xmm1, %2 \n\t" + "movsd %%xmm2, %3 \n\t" + :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2]) + :"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1) + ); + } else { + asm volatile( + "movsd %5, %%xmm0 \n\t" + "movsd %5, %%xmm1 \n\t" + "1: \n\t" + "movapd (%3,%0), %%xmm3 \n\t" + "movupd -8(%4,%0), %%xmm4 \n\t" + "mulpd %%xmm3, %%xmm4 \n\t" + "mulpd (%4,%0), %%xmm3 \n\t" + "addpd %%xmm4, %%xmm1 \n\t" + "addpd %%xmm3, %%xmm0 \n\t" + "add $16, %0 \n\t" + "jl 1b \n\t" + "movhlps %%xmm0, %%xmm3 \n\t" + "movhlps %%xmm1, %%xmm4 \n\t" + "addsd %%xmm3, %%xmm0 \n\t" + "addsd %%xmm4, %%xmm1 \n\t" + "movsd %%xmm0, %1 \n\t" + "movsd %%xmm1, %2 \n\t" + :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) + :"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1) + ); + } + } +}