Mercurial > mplayer.hg
comparison liba52/imdct.c @ 3584:7c4046c04be3
removing unnecessary sse sin/cos LUT
author | michael |
---|---|
date | Tue, 18 Dec 2001 17:29:27 +0000 |
parents | 8ddf654c4871 |
children | 3f1c2c06d0d8 |
comparison
equal
deleted
inserted
replaced
3583:66e418645b67 | 3584:7c4046c04be3 |
---|---|
77 | 77 |
78 #ifdef ARCH_X86 | 78 #ifdef ARCH_X86 |
79 // NOTE: SSE needs 16byte alignment or it will segfault | 79 // NOTE: SSE needs 16byte alignment or it will segfault |
80 // | 80 // |
81 static complex_t __attribute__((aligned(16))) buf[128]; | 81 static complex_t __attribute__((aligned(16))) buf[128]; |
82 static float __attribute__((aligned(16))) sseSinCos1a[256]; | |
83 static float __attribute__((aligned(16))) sseSinCos1b[256]; | |
84 static float __attribute__((aligned(16))) sseSinCos1c[256]; | 82 static float __attribute__((aligned(16))) sseSinCos1c[256]; |
85 static float __attribute__((aligned(16))) sseSinCos1d[256]; | 83 static float __attribute__((aligned(16))) sseSinCos1d[256]; |
86 static float __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1}; | 84 static float __attribute__((aligned(16))) ps111_1[4]={1,1,1,-1}; |
87 //static float __attribute__((aligned(16))) sseW0[4]; | 85 //static float __attribute__((aligned(16))) sseW0[4]; |
88 static float __attribute__((aligned(16))) sseW1[8]; | 86 static float __attribute__((aligned(16))) sseW1[8]; |
386 "leal bit_reverse_512, %%eax \n\t" | 384 "leal bit_reverse_512, %%eax \n\t" |
387 "movl $1008, %%edi \n\t" | 385 "movl $1008, %%edi \n\t" |
388 "pushl %%ebp \n\t" //use ebp without telling gcc | 386 "pushl %%ebp \n\t" //use ebp without telling gcc |
389 ".balign 16 \n\t" | 387 ".balign 16 \n\t" |
390 "1: \n\t" | 388 "1: \n\t" |
391 "movaps (%0, %%esi), %%xmm0 \n\t" | 389 "movlps (%0, %%esi), %%xmm0 \n\t" // XXXI |
392 "movaps (%0, %%edi), %%xmm1 \n\t" | 390 "movhps 8(%0, %%edi), %%xmm0 \n\t" // RXXI |
393 "shufps $0xA0, %%xmm0, %%xmm0 \n\t" | 391 "movlps 8(%0, %%esi), %%xmm1 \n\t" // XXXi |
394 "shufps $0x5F, %%xmm1, %%xmm1 \n\t" | 392 "movhps (%0, %%edi), %%xmm1 \n\t" // rXXi |
395 "mulps sseSinCos1a(%%esi), %%xmm0 \n\t" | 393 "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR |
396 "mulps sseSinCos1b(%%esi), %%xmm1 \n\t" | 394 "movaps sseSinCos1c(%%esi), %%xmm2 \n\t" |
397 "addps %%xmm1, %%xmm0 \n\t" | 395 "mulps %%xmm0, %%xmm2 \n\t" |
396 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI | |
397 "mulps sseSinCos1d(%%esi), %%xmm0 \n\t" | |
398 "subps %%xmm0, %%xmm2 \n\t" | |
398 "movzbl (%%eax), %%edx \n\t" | 399 "movzbl (%%eax), %%edx \n\t" |
399 "movzbl 1(%%eax), %%ebp \n\t" | 400 "movzbl 1(%%eax), %%ebp \n\t" |
400 "movlps %%xmm0, (%1, %%edx,8) \n\t" | 401 "movlps %%xmm2, (%1, %%edx,8) \n\t" |
401 "movhps %%xmm0, (%1, %%ebp,8) \n\t" | 402 "movhps %%xmm2, (%1, %%ebp,8) \n\t" |
402 "addl $16, %%esi \n\t" | 403 "addl $16, %%esi \n\t" |
403 "addl $2, %%eax \n\t" // avoid complex addressing for P4 crap | 404 "addl $2, %%eax \n\t" // avoid complex addressing for P4 crap |
404 "subl $16, %%edi \n\t" | 405 "subl $16, %%edi \n\t" |
405 " jnc 1b \n\t" | 406 " jnc 1b \n\t" |
406 "popl %%ebp \n\t"//no we didnt touch ebp *g* | 407 "popl %%ebp \n\t"//no we didnt touch ebp *g* |
829 xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); | 830 xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1)); |
830 xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); | 831 xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1)); |
831 } | 832 } |
832 #ifdef ARCH_X86 | 833 #ifdef ARCH_X86 |
833 for (i = 0; i < 128; i++) { | 834 for (i = 0; i < 128; i++) { |
834 sseSinCos1a[2*i+0]= -xsin1[i]; | |
835 sseSinCos1a[2*i+1]= -xcos1[i]; | |
836 sseSinCos1b[2*i+0]= xcos1[i]; | |
837 sseSinCos1b[2*i+1]= -xsin1[i]; | |
838 | |
839 sseSinCos1c[2*i+0]= xcos1[i]; | 835 sseSinCos1c[2*i+0]= xcos1[i]; |
840 sseSinCos1c[2*i+1]= -xcos1[i]; | 836 sseSinCos1c[2*i+1]= -xcos1[i]; |
841 sseSinCos1d[2*i+0]= xsin1[i]; | 837 sseSinCos1d[2*i+0]= xsin1[i]; |
842 sseSinCos1d[2*i+1]= xsin1[i]; | 838 sseSinCos1d[2*i+1]= xsin1[i]; |
843 } | 839 } |