# HG changeset patch # User nickols_k # Date 990891834 0 # Node ID cb432deedb920d58d79b6e2d08916d42e102c007 # Parent cec1562ccf8ace3b19bcf6152c6ed8d5daa8c3c9 Improvements diff -r cec1562ccf8a -r cb432deedb92 liba52/srfftp_3dnow.h --- a/liba52/srfftp_3dnow.h Sat May 26 10:29:41 2001 +0000 +++ b/liba52/srfftp_3dnow.h Sat May 26 15:43:54 2001 +0000 @@ -33,6 +33,8 @@ #ifndef SRFFTP_3DNOW_H__ #define SRFFTP_3DNOW_H__ +static float HSQRT2_3DNOW = 0.707106781188; + #ifdef HAVE_3DNOWEX #define TRANS_FILL_MM6_MM7_3DNOW()\ asm(\ @@ -59,10 +61,10 @@ #endif #ifdef HAVE_3DNOWEX -#define PSWAP_MM(mm_base,mm_hlp) "pswapd "##mm_base","##mm_base" \n\t" +#define PSWAP_MM(mm_base,mm_hlp) "pswapd "##mm_base","##mm_base"\n\t" #else #define PSWAP_MM(mm_base,mm_hlp)\ - "movq "##mm_base","##mm_hlp" \n\t"\ + "movq "##mm_base","##mm_hlp"\n\t"\ "psrlq $32, "##mm_base"\n\t"\ "punpckldq "##mm_hlp","##mm_base"\n\t" #endif @@ -82,16 +84,16 @@ "movq %%mm0, %%mm4\n\t"/*v.re =-wTB[0].im + wTB[k*2].im;*/\ PSWAP_MM("%%mm4","%%mm2")/* mm4 = v*/\ "movq %6, %%mm0\n\t" /* a1 = A0;*/\ + "movq %7, %%mm2\n\t" /* a1 = A4;*/\ "movq %%mm0, %%mm1\n\t"\ + "movq %%mm2, %%mm3\n\t"\ "pfadd %%mm5, %%mm0\n\t" /*A0 = a1 + u;*/\ + "pfadd %%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\ "pfsub %%mm5, %%mm1\n\t" /*A1 = a1 - u;*/\ + "pfsub %%mm4, %%mm3\n\t" /*A4 = a1 - v;*/\ "movq %%mm0, %0\n\t"\ + "movq %%mm2, %3\n\t"\ "movq %%mm1, %1\n\t"\ - "movq %7, %%mm2\n\t" /* a1 = A4;*/\ - "movq %%mm2, %%mm3\n\t"\ - "pfadd %%mm4, %%mm2\n\t" /*A12 = a1 + v;*/\ - "pfsub %%mm4, %%mm3\n\t" /*A4 = a1 - v;*/\ - "movq %%mm2, %3\n\t"\ "movq %%mm3, %2"\ :"=m"(A0), "=m"(A8), "=m"(A4), "=m"(A12)\ :"m"(wTB[0]), "m"(wTB[k*2]), "0"(A0), "2"(A4)\ @@ -99,4 +101,49 @@ asm volatile("femms":::"memory");\ } +#define TRANSHALF_16_3DNOW(A2,A6,A10,A14)\ +{\ + asm volatile("femms":::"memory");\ + TRANS_FILL_MM6_MM7_3DNOW()\ + asm(\ + "movq %4, %%mm0\n\t"/*u.re = wTB[2].im + wTB[2].re;*/\ + "movq %%mm0, %%mm1\n\t"\ + "pfmul %%mm7, %%mm1\n\t"\ + "pfacc %%mm1, %%mm0\n\t"/*u.im = wTB[2].im - wTB[2].re; mm0 = u*/\ + "movq %5, %%mm1\n\t" /*a.re = wTB[6].im - wTB[6].re; */\ + "movq %%mm1, %%mm2\n\t"\ + "pfmul %%mm7, %%mm1\n\t"\ + "pfacc %%mm2, %%mm1\n\t"/*a.im = wTB[6].im + wTB[6].re; mm1 = a*/\ + "movq %%mm1, %%mm2\n\t"\ + "pfmul %%mm7, %%mm2\n\t"/*v.im = u.re - a.re;*/\ + "movq %%mm0, %%mm3\n\t"/*v.re = u.im + a.im;*/\ + "pfadd %%mm2, %%mm3\n\t"\ + PSWAP_MM("%%mm3","%%mm2")/*mm3 = v*/\ + "pfmul %%mm6, %%mm1\n\t"/*u.re = u.re + a.re;*/\ + "pfadd %%mm1, %%mm0\n\t"/*u.im = u.im - a.im; mm0 = u*/\ + "movd %8, %%mm2\n\t"\ + "punpckldq %8, %%mm2\n\t"\ + "pfmul %%mm2, %%mm3\n\t" /* v *= HSQRT2_3DNOW; */\ + "pfmul %%mm2, %%mm0\n\t" /* u *= HSQRT2_3DNOW; */\ + "movq %6, %%mm1\n\t" /* a1 = A2;*/\ + "movq %%mm1, %%mm2\n\t"\ + "pfadd %%mm0, %%mm1\n\t" /*A2 = a1 + u;*/\ + "pfsub %%mm0, %%mm2\n\t" /*A2 = a1 - u;*/\ + "movq %%mm1, %0\n\t"\ + "movq %%mm2, %1\n\t"\ + "movq %7, %%mm1\n\t" /* a1 = A6;*/\ + "movq %%mm1, %%mm2\n\t"\ + "movq %%mm3, %%mm4\n\t"\ + "pfmul %%mm6, %%mm4\n\t"/*A6.re = a1.re + v.re;*/\ + "pfadd %%mm4, %%mm1\n\t"/*A6.im = a1.im - v.im;*/\ + "pfmul %%mm7, %%mm3\n\t"/*A14.re = a1.re - v.re;*/\ + "pfadd %%mm3, %%mm2\n\t"/*A14.im = a1.im + v.im;*/\ + "movq %%mm1, %2\n\t"\ + "movq %%mm2, %3"\ + :"=m"(A2), "=m"(A10), "=m"(A6), "=m"(A14)\ + :"m"(wTB[2]), "m"(wTB[6]), "0"(A2), "2"(A6), "m"(HSQRT2_3DNOW)\ + :"memory");\ + asm volatile("femms":::"memory");\ +} + #endif