Mercurial > mplayer.hg
changeset 3584:7c4046c04be3
removing unnecessary sse sin/cos LUT
author | michael |
---|---|
date | Tue, 18 Dec 2001 17:29:27 +0000 |
parents | 66e418645b67 |
children | cb985ea11ed5 |
files | liba52/imdct.c |
diffstat | 1 files changed, 12 insertions(+), 16 deletions(-) [+] |
line wrap: on
line diff
--- a/liba52/imdct.c Tue Dec 18 17:07:21 2001 +0000 +++ b/liba52/imdct.c Tue Dec 18 17:29:27 2001 +0000 @@ -79,8 +79,6 @@ // NOTE: SSE needs 16byte alignment or it will segfault // static complex_t __attribute__((aligned(16))) buf[128]; -static float __attribute__((aligned(16))) sseSinCos1a[256]; -static float __attribute__((aligned(16))) sseSinCos1b[256]; static float __attribute__((aligned(16))) sseSinCos1c[256]; static float __attribute__((aligned(16))) sseSinCos1d[256]; static float __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1}; @@ -388,17 +386,20 @@ "pushl %%ebp \n\t" //use ebp without telling gcc ".balign 16 \n\t" "1: \n\t" - "movaps (%0, %%esi), %%xmm0 \n\t" - "movaps (%0, %%edi), %%xmm1 \n\t" - "shufps $0xA0, %%xmm0, %%xmm0 \n\t" - "shufps $0x5F, %%xmm1, %%xmm1 \n\t" - "mulps sseSinCos1a(%%esi), %%xmm0 \n\t" - "mulps sseSinCos1b(%%esi), %%xmm1 \n\t" - "addps %%xmm1, %%xmm0 \n\t" + "movlps (%0, %%esi), %%xmm0 \n\t" // XXXI + "movhps 8(%0, %%edi), %%xmm0 \n\t" // RXXI + "movlps 8(%0, %%esi), %%xmm1 \n\t" // XXXi + "movhps (%0, %%edi), %%xmm1 \n\t" // rXXi + "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR + "movaps sseSinCos1c(%%esi), %%xmm2 \n\t" + "mulps %%xmm0, %%xmm2 \n\t" + "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI + "mulps sseSinCos1d(%%esi), %%xmm0 \n\t" + "subps %%xmm0, %%xmm2 \n\t" "movzbl (%%eax), %%edx \n\t" "movzbl 1(%%eax), %%ebp \n\t" - "movlps %%xmm0, (%1, %%edx,8) \n\t" - "movhps %%xmm0, (%1, %%ebp,8) \n\t" + "movlps %%xmm2, (%1, %%edx,8) \n\t" + "movhps %%xmm2, (%1, %%ebp,8) \n\t" "addl $16, %%esi \n\t" "addl $2, %%eax \n\t" // avoid complex addressing for P4 crap "subl $16, %%edi \n\t" @@ -831,11 +832,6 @@ } #ifdef ARCH_X86 for (i = 0; i < 128; i++) { - sseSinCos1a[2*i+0]= -xsin1[i]; - sseSinCos1a[2*i+1]= -xcos1[i]; - sseSinCos1b[2*i+0]= xcos1[i]; - sseSinCos1b[2*i+1]= -xsin1[i]; - sseSinCos1c[2*i+0]= xcos1[i]; sseSinCos1c[2*i+1]= -xcos1[i]; sseSinCos1d[2*i+0]= xsin1[i];