libavcodec.hg: i386/dsputil

comparison i386/dsputil_mmx.c @ 6030:fb99890ee609 libavcodec

move FLAC mmx dsp to its own file

author	aurel
date	Sun, 16 Dec 2007 22:20:47 +0000
parents	ecfdc0bfb233
children	558c1fd0ee72

comparison

equal deleted inserted replaced

-:fc51a6ffa64f
+:fb99890ee609
 }
 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) {
 avg_pixels16_mmx(dst, src, stride, 16);
 }
+/* FLAC specific */
+void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag,
+double *autoc);
 /* VC1 specific */
 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx);
 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) {
 put_pixels8_mmx(dst, src, stride, 8);
 :"+m"(mag[i]), "+m"(ang[i])
 ::"memory"
 );
 }
 }
-#ifdef CONFIG_ENCODERS
-static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data)
-{
-double c = 2.0 / (len-1.0);
-int n2 = len>>1;
-long i = -n2*sizeof(int32_t);
-long j =  n2*sizeof(int32_t);
-asm volatile(
-"movsd   %0,     %%xmm7 \n\t"
-"movapd  %1,     %%xmm6 \n\t"
-"movapd  %2,     %%xmm5 \n\t"
-"movlhps %%xmm7, %%xmm7 \n\t"
-"subpd   %%xmm5, %%xmm7 \n\t"
-"addsd   %%xmm6, %%xmm7 \n\t"
-::"m"(c), "m"(*ff_pd_1), "m"(*ff_pd_2)
-);
-#define WELCH(MOVPD)\
-asm volatile(\
-"1:                         \n\t"\
-"movapd   %%xmm7,  %%xmm1   \n\t"\
-"mulpd    %%xmm1,  %%xmm1   \n\t"\
-"movapd   %%xmm6,  %%xmm0   \n\t"\
-"subpd    %%xmm1,  %%xmm0   \n\t"\
-"pshufd   $0x4e,   %%xmm0, %%xmm1 \n\t"\
-"cvtpi2pd (%4,%0), %%xmm2   \n\t"\
-"cvtpi2pd (%5,%1), %%xmm3   \n\t"\
-"mulpd    %%xmm0,  %%xmm2   \n\t"\
-"mulpd    %%xmm1,  %%xmm3   \n\t"\
-"movapd   %%xmm2, (%2,%0,2) \n\t"\
-MOVPD"    %%xmm3, (%3,%1,2) \n\t"\
-"subpd    %%xmm5,  %%xmm7   \n\t"\
-"sub      $8,      %1       \n\t"\
-"add      $8,      %0       \n\t"\
-"jl 1b                      \n\t"\
-:"+&r"(i), "+&r"(j)\
-:"r"(w_data+n2), "r"(w_data+len-2-n2),\
-"r"(data+n2), "r"(data+len-2-n2)\
-);
-if(len&1)
-WELCH("movupd")
-else
-WELCH("movapd")
-#undef WELCH
-}
-static void flac_compute_autocorr_sse2(const int32_t *data, int len, int lag,
-double *autoc)
-{
-double tmp[len + lag + 2];
-double *data1 = tmp + lag;
-int j;
-if((long)data1 & 15)
-data1++;
-apply_welch_window_sse2(data, len, data1);
-for(j=0; j<lag; j++)
-data1[j-lag]= 0.0;
-data1[len] = 0.0;
-for(j=0; j<lag; j+=2){
-long i = -len*sizeof(double);
-if(j == lag-2) {
-asm volatile(
-"movsd     %6,     %%xmm0 \n\t"
-"movsd     %6,     %%xmm1 \n\t"
-"movsd     %6,     %%xmm2 \n\t"
-"1:                       \n\t"
-"movapd   (%4,%0), %%xmm3 \n\t"
-"movupd -8(%5,%0), %%xmm4 \n\t"
-"movapd   (%5,%0), %%xmm5 \n\t"
-"mulpd     %%xmm3, %%xmm4 \n\t"
-"mulpd     %%xmm3, %%xmm5 \n\t"
-"mulpd -16(%5,%0), %%xmm3 \n\t"
-"addpd     %%xmm4, %%xmm1 \n\t"
-"addpd     %%xmm5, %%xmm0 \n\t"
-"addpd     %%xmm3, %%xmm2 \n\t"
-"add       $16,    %0     \n\t"
-"jl 1b                    \n\t"
-"movhlps   %%xmm0, %%xmm3 \n\t"
-"movhlps   %%xmm1, %%xmm4 \n\t"
-"movhlps   %%xmm2, %%xmm5 \n\t"
-"addsd     %%xmm3, %%xmm0 \n\t"
-"addsd     %%xmm4, %%xmm1 \n\t"
-"addsd     %%xmm5, %%xmm2 \n\t"
-"movsd     %%xmm0, %1     \n\t"
-"movsd     %%xmm1, %2     \n\t"
-"movsd     %%xmm2, %3     \n\t"
-:"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2])
-:"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1)
-);
-} else {
-asm volatile(
-"movsd     %5,     %%xmm0 \n\t"
-"movsd     %5,     %%xmm1 \n\t"
-"1:                       \n\t"
-"movapd   (%3,%0), %%xmm3 \n\t"
-"movupd -8(%4,%0), %%xmm4 \n\t"
-"mulpd     %%xmm3, %%xmm4 \n\t"
-"mulpd    (%4,%0), %%xmm3 \n\t"
-"addpd     %%xmm4, %%xmm1 \n\t"
-"addpd     %%xmm3, %%xmm0 \n\t"
-"add       $16,    %0     \n\t"
-"jl 1b                    \n\t"
-"movhlps   %%xmm0, %%xmm3 \n\t"
-"movhlps   %%xmm1, %%xmm4 \n\t"
-"addsd     %%xmm3, %%xmm0 \n\t"
-"addsd     %%xmm4, %%xmm1 \n\t"
-"movsd     %%xmm0, %1     \n\t"
-"movsd     %%xmm1, %2     \n\t"
-:"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1])
-:"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1)
-);
-}
-}
-}
-#endif // CONFIG_ENCODERS
 static void vector_fmul_3dnow(float *dst, const float *src, int len){
 long i = (len-4)*4;
 asm volatile(
 "1: \n\t"
 #ifdef CONFIG_ENCODERS
 if(mm_flags & MM_SSE2){
 c->sum_abs_dctelem= sum_abs_dctelem_sse2;
 c->hadamard8_diff[0]= hadamard8_diff16_sse2;
 c->hadamard8_diff[1]= hadamard8_diff_sse2;
-c->flac_compute_autocorr = flac_compute_autocorr_sse2;
+if (ENABLE_FLAC_ENCODER)
+c->flac_compute_autocorr = ff_flac_compute_autocorr_sse2;
 }
 #ifdef HAVE_SSSE3
 if(mm_flags & MM_SSSE3){
 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){

Mercurial > libavcodec.hg

comparison i386/dsputil_mmx.c @ 6030:fb99890ee609 libavcodec