Mercurial > libavcodec.hg
changeset 3560:f1a16d793fc5 libavcodec
gcc 2.95 and 3.4.x on x86 32bit without fomit-frame-pointer can't even find 5 registers for asm input.
0.5% slower vorbis.
author | lorenm |
---|---|
date | Wed, 09 Aug 2006 06:33:49 +0000 |
parents | c02459cd0d31 |
children | 97325fecd35a |
files | i386/fft_3dn2.c |
diffstat | 1 files changed, 16 insertions(+), 10 deletions(-) [+] |
line wrap: on
line diff
--- a/i386/fft_3dn2.c Tue Aug 08 21:47:11 2006 +0000 +++ b/i386/fft_3dn2.c Wed Aug 09 06:33:49 2006 +0000 @@ -154,20 +154,23 @@ in1 = input; in2 = input + n2 - 1; for(k = 0; k < n4; k++) { + // FIXME a single block is faster, but gcc 2.95 and 3.4.x on 32bit can't compile it asm volatile( - "movd %1, %%mm0 \n\t" - "movd %3, %%mm1 \n\t" - "punpckldq %2, %%mm0 \n\t" - "punpckldq %4, %%mm1 \n\t" + "movd %0, %%mm0 \n\t" + "movd %2, %%mm1 \n\t" + "punpckldq %1, %%mm0 \n\t" + "punpckldq %3, %%mm1 \n\t" "movq %%mm0, %%mm2 \n\t" "pfmul %%mm1, %%mm0 \n\t" "pswapd %%mm1, %%mm1 \n\t" "pfmul %%mm1, %%mm2 \n\t" "pfpnacc %%mm2, %%mm0 \n\t" + ::"m"(in2[-2*k]), "m"(in1[2*k]), + "m"(tcos[k]), "m"(tsin[k]) + ); + asm volatile( "movq %%mm0, %0 \n\t" :"=m"(z[revtab[k]]) - :"m"(in2[-2*k]), "m"(in1[2*k]), - "m"(tcos[k]), "m"(tsin[k]) ); } @@ -190,11 +193,15 @@ ); } + z += n8; asm volatile("movd %0, %%mm7" ::"r"(1<<31)); for(k = 0; k < n8; k++) { asm volatile( - "movq %4, %%mm0 \n\t" - "pswapd %5, %%mm1 \n\t" + "movq %0, %%mm0 \n\t" + "pswapd %1, %%mm1 \n\t" + ::"m"(z[k]), "m"(z[-1-k]) + ); + asm volatile( "movq %%mm0, %%mm2 \n\t" "pxor %%mm7, %%mm2 \n\t" "punpckldq %%mm1, %%mm2 \n\t" @@ -209,8 +216,7 @@ "movq %%mm3, %3 \n\t" // { z[n8-1-k].im, -z[n8+k].re } :"=m"(output[2*k]), "=m"(output[n2-2-2*k]), "=m"(output[n2+2*k]), "=m"(output[n-2-2*k]) - :"m"(z[n8+k]), "m"(z[n8-1-k]) - :"memory" + ::"memory" ); } asm volatile("emms");