Mercurial > libavcodec.hg
diff i386/dsputil_mmx.c @ 3536:545a15c19c91 libavcodec
sse & sse2 implementations of vorbis channel coupling.
9% faster vorbis (on a K8).
author | lorenm |
---|---|
date | Thu, 03 Aug 2006 03:18:47 +0000 |
parents | 419409926166 |
children | 3fbddeb13686 |
line wrap: on
line diff
--- a/i386/dsputil_mmx.c Thu Aug 03 02:18:07 2006 +0000 +++ b/i386/dsputil_mmx.c Thu Aug 03 03:18:47 2006 +0000 @@ -2711,6 +2711,59 @@ } #endif +static void vorbis_inverse_coupling_sse(float *mag, float *ang, int blocksize) +{ + int i; + asm volatile("pxor %%mm7, %%mm7":); + for(i=0; i<blocksize; i+=2) { + asm volatile( + "movq %0, %%mm0 \n\t" + "movq %1, %%mm1 \n\t" + "movq %%mm0, %%mm2 \n\t" + "movq %%mm1, %%mm3 \n\t" + "pfcmpge %%mm7, %%mm2 \n\t" // m <= 0.0 + "pfcmpge %%mm7, %%mm3 \n\t" // a <= 0.0 + "pslld $31, %%mm2 \n\t" // keep only the sign bit + "pxor %%mm2, %%mm1 \n\t" + "movq %%mm3, %%mm4 \n\t" + "pand %%mm1, %%mm3 \n\t" + "pandn %%mm1, %%mm4 \n\t" + "pfadd %%mm0, %%mm3 \n\t" // a = m + ((a<0) & (a ^ sign(m))) + "pfsub %%mm4, %%mm0 \n\t" // m = m + ((a>0) & (a ^ sign(m))) + "movq %%mm3, %1 \n\t" + "movq %%mm0, %0 \n\t" + :"+m"(mag[i]), "+m"(ang[i]) + ::"memory" + ); + } + asm volatile("emms"); +} +static void vorbis_inverse_coupling_sse2(float *mag, float *ang, int blocksize) +{ + int i; + for(i=0; i<blocksize; i+=4) { + asm volatile( + "movaps %0, %%xmm0 \n\t" + "movaps %1, %%xmm1 \n\t" + "pxor %%xmm2, %%xmm2 \n\t" + "pxor %%xmm3, %%xmm3 \n\t" + "cmpleps %%xmm0, %%xmm2 \n\t" // m <= 0.0 + "cmpleps %%xmm1, %%xmm3 \n\t" // a <= 0.0 + "pslld $31, %%xmm2 \n\t" // keep only the sign bit + "pxor %%xmm2, %%xmm1 \n\t" + "movaps %%xmm3, %%xmm4 \n\t" + "pand %%xmm1, %%xmm3 \n\t" + "pandn %%xmm1, %%xmm4 \n\t" + "addps %%xmm0, %%xmm3 \n\t" // a = m + ((a<0) & (a ^ sign(m))) + "subps %%xmm4, %%xmm0 \n\t" // m = m + ((a>0) & (a ^ sign(m))) + "movaps %%xmm3, %1 \n\t" + "movaps %%xmm0, %0 \n\t" + :"+m"(mag[i]), "+m"(ang[i]) + ::"memory" + ); + } +} + #ifdef CONFIG_SNOW_ENCODER extern void ff_snow_horizontal_compose97i_sse2(DWTELEM *b, int width); extern void ff_snow_horizontal_compose97i_mmx(DWTELEM *b, int width); @@ -3137,6 +3190,11 @@ c->inner_add_yblock = ff_snow_inner_add_yblock_mmx; } #endif + + if(mm_flags & MM_SSE2) + c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse2; + else if(mm_flags & MM_SSE) + c->vorbis_inverse_coupling = vorbis_inverse_coupling_sse; } #ifdef CONFIG_ENCODERS