Mercurial > libavcodec.hg
comparison i386/dsputil_mmx.c @ 6030:fb99890ee609 libavcodec
move FLAC mmx dsp to its own file
author | aurel |
---|---|
date | Sun, 16 Dec 2007 22:20:47 +0000 |
parents | ecfdc0bfb233 |
children | 558c1fd0ee72 |
comparison
equal
deleted
inserted
replaced
6029:fc51a6ffa64f | 6030:fb99890ee609 |
---|---|
2855 } | 2855 } |
2856 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { | 2856 void ff_avg_cavs_qpel16_mc00_mmx2(uint8_t *dst, uint8_t *src, int stride) { |
2857 avg_pixels16_mmx(dst, src, stride, 16); | 2857 avg_pixels16_mmx(dst, src, stride, 16); |
2858 } | 2858 } |
2859 | 2859 |
2860 /* FLAC specific */ | |
2861 void ff_flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, | |
2862 double *autoc); | |
2863 | |
2860 /* VC1 specific */ | 2864 /* VC1 specific */ |
2861 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx); | 2865 void ff_vc1dsp_init_mmx(DSPContext* dsp, AVCodecContext *avctx); |
2862 | 2866 |
2863 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { | 2867 void ff_put_vc1_mspel_mc00_mmx(uint8_t *dst, const uint8_t *src, int stride, int rnd) { |
2864 put_pixels8_mmx(dst, src, stride, 8); | 2868 put_pixels8_mmx(dst, src, stride, 8); |
2968 :"+m"(mag[i]), "+m"(ang[i]) | 2972 :"+m"(mag[i]), "+m"(ang[i]) |
2969 ::"memory" | 2973 ::"memory" |
2970 ); | 2974 ); |
2971 } | 2975 } |
2972 } | 2976 } |
2973 | |
2974 #ifdef CONFIG_ENCODERS | |
2975 static void apply_welch_window_sse2(const int32_t *data, int len, double *w_data) | |
2976 { | |
2977 double c = 2.0 / (len-1.0); | |
2978 int n2 = len>>1; | |
2979 long i = -n2*sizeof(int32_t); | |
2980 long j = n2*sizeof(int32_t); | |
2981 asm volatile( | |
2982 "movsd %0, %%xmm7 \n\t" | |
2983 "movapd %1, %%xmm6 \n\t" | |
2984 "movapd %2, %%xmm5 \n\t" | |
2985 "movlhps %%xmm7, %%xmm7 \n\t" | |
2986 "subpd %%xmm5, %%xmm7 \n\t" | |
2987 "addsd %%xmm6, %%xmm7 \n\t" | |
2988 ::"m"(c), "m"(*ff_pd_1), "m"(*ff_pd_2) | |
2989 ); | |
2990 #define WELCH(MOVPD)\ | |
2991 asm volatile(\ | |
2992 "1: \n\t"\ | |
2993 "movapd %%xmm7, %%xmm1 \n\t"\ | |
2994 "mulpd %%xmm1, %%xmm1 \n\t"\ | |
2995 "movapd %%xmm6, %%xmm0 \n\t"\ | |
2996 "subpd %%xmm1, %%xmm0 \n\t"\ | |
2997 "pshufd $0x4e, %%xmm0, %%xmm1 \n\t"\ | |
2998 "cvtpi2pd (%4,%0), %%xmm2 \n\t"\ | |
2999 "cvtpi2pd (%5,%1), %%xmm3 \n\t"\ | |
3000 "mulpd %%xmm0, %%xmm2 \n\t"\ | |
3001 "mulpd %%xmm1, %%xmm3 \n\t"\ | |
3002 "movapd %%xmm2, (%2,%0,2) \n\t"\ | |
3003 MOVPD" %%xmm3, (%3,%1,2) \n\t"\ | |
3004 "subpd %%xmm5, %%xmm7 \n\t"\ | |
3005 "sub $8, %1 \n\t"\ | |
3006 "add $8, %0 \n\t"\ | |
3007 "jl 1b \n\t"\ | |
3008 :"+&r"(i), "+&r"(j)\ | |
3009 :"r"(w_data+n2), "r"(w_data+len-2-n2),\ | |
3010 "r"(data+n2), "r"(data+len-2-n2)\ | |
3011 ); | |
3012 if(len&1) | |
3013 WELCH("movupd") | |
3014 else | |
3015 WELCH("movapd") | |
3016 #undef WELCH | |
3017 } | |
3018 | |
3019 static void flac_compute_autocorr_sse2(const int32_t *data, int len, int lag, | |
3020 double *autoc) | |
3021 { | |
3022 double tmp[len + lag + 2]; | |
3023 double *data1 = tmp + lag; | |
3024 int j; | |
3025 | |
3026 if((long)data1 & 15) | |
3027 data1++; | |
3028 | |
3029 apply_welch_window_sse2(data, len, data1); | |
3030 | |
3031 for(j=0; j<lag; j++) | |
3032 data1[j-lag]= 0.0; | |
3033 data1[len] = 0.0; | |
3034 | |
3035 for(j=0; j<lag; j+=2){ | |
3036 long i = -len*sizeof(double); | |
3037 if(j == lag-2) { | |
3038 asm volatile( | |
3039 "movsd %6, %%xmm0 \n\t" | |
3040 "movsd %6, %%xmm1 \n\t" | |
3041 "movsd %6, %%xmm2 \n\t" | |
3042 "1: \n\t" | |
3043 "movapd (%4,%0), %%xmm3 \n\t" | |
3044 "movupd -8(%5,%0), %%xmm4 \n\t" | |
3045 "movapd (%5,%0), %%xmm5 \n\t" | |
3046 "mulpd %%xmm3, %%xmm4 \n\t" | |
3047 "mulpd %%xmm3, %%xmm5 \n\t" | |
3048 "mulpd -16(%5,%0), %%xmm3 \n\t" | |
3049 "addpd %%xmm4, %%xmm1 \n\t" | |
3050 "addpd %%xmm5, %%xmm0 \n\t" | |
3051 "addpd %%xmm3, %%xmm2 \n\t" | |
3052 "add $16, %0 \n\t" | |
3053 "jl 1b \n\t" | |
3054 "movhlps %%xmm0, %%xmm3 \n\t" | |
3055 "movhlps %%xmm1, %%xmm4 \n\t" | |
3056 "movhlps %%xmm2, %%xmm5 \n\t" | |
3057 "addsd %%xmm3, %%xmm0 \n\t" | |
3058 "addsd %%xmm4, %%xmm1 \n\t" | |
3059 "addsd %%xmm5, %%xmm2 \n\t" | |
3060 "movsd %%xmm0, %1 \n\t" | |
3061 "movsd %%xmm1, %2 \n\t" | |
3062 "movsd %%xmm2, %3 \n\t" | |
3063 :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]), "=m"(autoc[j+2]) | |
3064 :"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1) | |
3065 ); | |
3066 } else { | |
3067 asm volatile( | |
3068 "movsd %5, %%xmm0 \n\t" | |
3069 "movsd %5, %%xmm1 \n\t" | |
3070 "1: \n\t" | |
3071 "movapd (%3,%0), %%xmm3 \n\t" | |
3072 "movupd -8(%4,%0), %%xmm4 \n\t" | |
3073 "mulpd %%xmm3, %%xmm4 \n\t" | |
3074 "mulpd (%4,%0), %%xmm3 \n\t" | |
3075 "addpd %%xmm4, %%xmm1 \n\t" | |
3076 "addpd %%xmm3, %%xmm0 \n\t" | |
3077 "add $16, %0 \n\t" | |
3078 "jl 1b \n\t" | |
3079 "movhlps %%xmm0, %%xmm3 \n\t" | |
3080 "movhlps %%xmm1, %%xmm4 \n\t" | |
3081 "addsd %%xmm3, %%xmm0 \n\t" | |
3082 "addsd %%xmm4, %%xmm1 \n\t" | |
3083 "movsd %%xmm0, %1 \n\t" | |
3084 "movsd %%xmm1, %2 \n\t" | |
3085 :"+&r"(i), "=m"(autoc[j]), "=m"(autoc[j+1]) | |
3086 :"r"(data1+len), "r"(data1+len-j), "m"(*ff_pd_1) | |
3087 ); | |
3088 } | |
3089 } | |
3090 } | |
3091 #endif // CONFIG_ENCODERS | |
3092 | 2977 |
3093 static void vector_fmul_3dnow(float *dst, const float *src, int len){ | 2978 static void vector_fmul_3dnow(float *dst, const float *src, int len){ |
3094 long i = (len-4)*4; | 2979 long i = (len-4)*4; |
3095 asm volatile( | 2980 asm volatile( |
3096 "1: \n\t" | 2981 "1: \n\t" |
3735 #ifdef CONFIG_ENCODERS | 3620 #ifdef CONFIG_ENCODERS |
3736 if(mm_flags & MM_SSE2){ | 3621 if(mm_flags & MM_SSE2){ |
3737 c->sum_abs_dctelem= sum_abs_dctelem_sse2; | 3622 c->sum_abs_dctelem= sum_abs_dctelem_sse2; |
3738 c->hadamard8_diff[0]= hadamard8_diff16_sse2; | 3623 c->hadamard8_diff[0]= hadamard8_diff16_sse2; |
3739 c->hadamard8_diff[1]= hadamard8_diff_sse2; | 3624 c->hadamard8_diff[1]= hadamard8_diff_sse2; |
3740 c->flac_compute_autocorr = flac_compute_autocorr_sse2; | 3625 if (ENABLE_FLAC_ENCODER) |
3626 c->flac_compute_autocorr = ff_flac_compute_autocorr_sse2; | |
3741 } | 3627 } |
3742 | 3628 |
3743 #ifdef HAVE_SSSE3 | 3629 #ifdef HAVE_SSSE3 |
3744 if(mm_flags & MM_SSSE3){ | 3630 if(mm_flags & MM_SSSE3){ |
3745 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ | 3631 if(!(avctx->flags & CODEC_FLAG_BITEXACT)){ |