Mercurial > mplayer.hg
comparison liba52/imdct.c @ 4247:2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
author | atmos4 |
---|---|
date | Sat, 19 Jan 2002 05:12:34 +0000 |
parents | 0cc94b1eec0f |
children | d3aedd7db02c |
comparison
equal
deleted
inserted
replaced
4246:3f677202418b | 4247:2dbd637ffe05 |
---|---|
35 #include <inttypes.h> | 35 #include <inttypes.h> |
36 | 36 |
37 #include "a52.h" | 37 #include "a52.h" |
38 #include "a52_internal.h" | 38 #include "a52_internal.h" |
39 #include "mm_accel.h" | 39 #include "mm_accel.h" |
40 #include "mangle.h" | |
40 | 41 |
41 #ifdef RUNTIME_CPUDETECT | 42 #ifdef RUNTIME_CPUDETECT |
42 #undef HAVE_3DNOWEX | 43 #undef HAVE_3DNOWEX |
43 #endif | 44 #endif |
44 | 45 |
658 | 659 |
659 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ | 660 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ |
660 /* Bit reversed shuffling */ | 661 /* Bit reversed shuffling */ |
661 asm volatile( | 662 asm volatile( |
662 "xorl %%esi, %%esi \n\t" | 663 "xorl %%esi, %%esi \n\t" |
663 "leal bit_reverse_512, %%eax \n\t" | 664 "leal "MANGLE(bit_reverse_512)", %%eax \n\t" |
664 "movl $1008, %%edi \n\t" | 665 "movl $1008, %%edi \n\t" |
665 "pushl %%ebp \n\t" //use ebp without telling gcc | 666 "pushl %%ebp \n\t" //use ebp without telling gcc |
666 ".balign 16 \n\t" | 667 ".balign 16 \n\t" |
667 "1: \n\t" | 668 "1: \n\t" |
668 "movlps (%0, %%esi), %%xmm0 \n\t" // XXXI | 669 "movlps (%0, %%esi), %%xmm0 \n\t" // XXXI |
669 "movhps 8(%0, %%edi), %%xmm0 \n\t" // RXXI | 670 "movhps 8(%0, %%edi), %%xmm0 \n\t" // RXXI |
670 "movlps 8(%0, %%esi), %%xmm1 \n\t" // XXXi | 671 "movlps 8(%0, %%esi), %%xmm1 \n\t" // XXXi |
671 "movhps (%0, %%edi), %%xmm1 \n\t" // rXXi | 672 "movhps (%0, %%edi), %%xmm1 \n\t" // rXXi |
672 "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR | 673 "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR |
673 "movaps sseSinCos1c(%%esi), %%xmm2 \n\t" | 674 "movaps "MANGLE(sseSinCos1c)"(%%esi), %%xmm2\n\t" |
674 "mulps %%xmm0, %%xmm2 \n\t" | 675 "mulps %%xmm0, %%xmm2 \n\t" |
675 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI | 676 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI |
676 "mulps sseSinCos1d(%%esi), %%xmm0 \n\t" | 677 "mulps "MANGLE(sseSinCos1d)"(%%esi), %%xmm0\n\t" |
677 "subps %%xmm0, %%xmm2 \n\t" | 678 "subps %%xmm0, %%xmm2 \n\t" |
678 "movzbl (%%eax), %%edx \n\t" | 679 "movzbl (%%eax), %%edx \n\t" |
679 "movzbl 1(%%eax), %%ebp \n\t" | 680 "movzbl 1(%%eax), %%ebp \n\t" |
680 "movlps %%xmm2, (%1, %%edx,8) \n\t" | 681 "movlps %%xmm2, (%1, %%edx,8) \n\t" |
681 "movhps %%xmm2, (%1, %%ebp,8) \n\t" | 682 "movhps %%xmm2, (%1, %%ebp,8) \n\t" |
739 ); | 740 ); |
740 | 741 |
741 /* 2. iteration */ | 742 /* 2. iteration */ |
742 // Note w[1]={{1,0}, {0,-1}} | 743 // Note w[1]={{1,0}, {0,-1}} |
743 asm volatile( | 744 asm volatile( |
744 "movaps ps111_1, %%xmm7 \n\t" // 1,1,1,-1 | 745 "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 |
745 "movl %0, %%esi \n\t" | 746 "movl %0, %%esi \n\t" |
746 ".balign 16 \n\t" | 747 ".balign 16 \n\t" |
747 "1: \n\t" | 748 "1: \n\t" |
748 "movaps 16(%%esi), %%xmm2 \n\t" //r2,i2,r3,i3 | 749 "movaps 16(%%esi), %%xmm2 \n\t" //r2,i2,r3,i3 |
749 "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 | 750 "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 |
767 Note sseW2+16={0,0,sqrt(2),-sqrt(2)) | 768 Note sseW2+16={0,0,sqrt(2),-sqrt(2)) |
768 Note sseW2+32={0,0,-sqrt(2),-sqrt(2)) | 769 Note sseW2+32={0,0,-sqrt(2),-sqrt(2)) |
769 Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) | 770 Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) |
770 */ | 771 */ |
771 asm volatile( | 772 asm volatile( |
772 "movaps 48+sseW2, %%xmm6 \n\t" | 773 "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" |
773 "movaps 16+sseW2, %%xmm7 \n\t" | 774 "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" |
774 "xorps %%xmm5, %%xmm5 \n\t" | 775 "xorps %%xmm5, %%xmm5 \n\t" |
775 "xorps %%xmm2, %%xmm2 \n\t" | 776 "xorps %%xmm2, %%xmm2 \n\t" |
776 "movl %0, %%esi \n\t" | 777 "movl %0, %%esi \n\t" |
777 ".balign 16 \n\t" | 778 ".balign 16 \n\t" |
778 "1: \n\t" | 779 "1: \n\t" |
779 "movaps 32(%%esi), %%xmm2 \n\t" //r4,i4,r5,i5 | 780 "movaps 32(%%esi), %%xmm2 \n\t" //r4,i4,r5,i5 |
780 "movaps 48(%%esi), %%xmm3 \n\t" //r6,i6,r7,i7 | 781 "movaps 48(%%esi), %%xmm3 \n\t" //r6,i6,r7,i7 |
781 "movaps sseW2, %%xmm4 \n\t" //r4,i4,r5,i5 | 782 "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5 |
782 "movaps 32+sseW2, %%xmm5 \n\t" //r6,i6,r7,i7 | 783 "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7 |
783 "mulps %%xmm2, %%xmm4 \n\t" | 784 "mulps %%xmm2, %%xmm4 \n\t" |
784 "mulps %%xmm3, %%xmm5 \n\t" | 785 "mulps %%xmm3, %%xmm5 \n\t" |
785 "shufps $0xB1, %%xmm2, %%xmm2 \n\t" //i4,r4,i5,r5 | 786 "shufps $0xB1, %%xmm2, %%xmm2 \n\t" //i4,r4,i5,r5 |
786 "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7 | 787 "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7 |
787 "mulps %%xmm6, %%xmm3 \n\t" | 788 "mulps %%xmm6, %%xmm3 \n\t" |
842 ); | 843 ); |
843 } | 844 } |
844 | 845 |
845 /* Post IFFT complex multiply plus IFFT complex conjugate*/ | 846 /* Post IFFT complex multiply plus IFFT complex conjugate*/ |
846 asm volatile( | 847 asm volatile( |
847 "movl $-1024, %%esi \n\t" | 848 "movl $-1024, %%esi \n\t" |
848 ".balign 16 \n\t" | 849 ".balign 16 \n\t" |
849 "1: \n\t" | 850 "1: \n\t" |
850 "movaps (%0, %%esi), %%xmm0 \n\t" | 851 "movaps (%0, %%esi), %%xmm0 \n\t" |
851 "movaps (%0, %%esi), %%xmm1 \n\t" | 852 "movaps (%0, %%esi), %%xmm1 \n\t" |
852 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" | 853 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" |
853 "mulps 1024+sseSinCos1c(%%esi), %%xmm1 \n\t" | 854 "mulps 1024+"MANGLE(sseSinCos1c)"(%%esi), %%xmm1\n\t" |
854 "mulps 1024+sseSinCos1d(%%esi), %%xmm0 \n\t" | 855 "mulps 1024+"MANGLE(sseSinCos1d)"(%%esi), %%xmm0\n\t" |
855 "addps %%xmm1, %%xmm0 \n\t" | 856 "addps %%xmm1, %%xmm0 \n\t" |
856 "movaps %%xmm0, (%0, %%esi) \n\t" | 857 "movaps %%xmm0, (%0, %%esi) \n\t" |
857 "addl $16, %%esi \n\t" | 858 "addl $16, %%esi \n\t" |
858 " jnz 1b \n\t" | 859 " jnz 1b \n\t" |
859 :: "r" (buf+128) | 860 :: "r" (buf+128) |
876 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ? | 877 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ? |
877 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ? | 878 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ? |
878 "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ? | 879 "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ? |
879 "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ? | 880 "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ? |
880 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A | 881 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A |
881 "mulps sseWindow(%%esi), %%xmm0 \n\t" | 882 "mulps "MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" |
882 "addps (%2, %%esi), %%xmm0 \n\t" | 883 "addps (%2, %%esi), %%xmm0 \n\t" |
883 "addps %%xmm2, %%xmm0 \n\t" | 884 "addps %%xmm2, %%xmm0 \n\t" |
884 "movaps %%xmm0, (%1, %%esi) \n\t" | 885 "movaps %%xmm0, (%1, %%esi) \n\t" |
885 "addl $16, %%esi \n\t" | 886 "addl $16, %%esi \n\t" |
886 "subl $16, %%edi \n\t" | 887 "subl $16, %%edi \n\t" |
903 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A | 904 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A |
904 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C | 905 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C |
905 "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C | 906 "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C |
906 "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A | 907 "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A |
907 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A | 908 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A |
908 "mulps 512+sseWindow(%%esi), %%xmm0 \n\t" | 909 "mulps 512+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" |
909 "addps (%2, %%esi), %%xmm0 \n\t" | 910 "addps (%2, %%esi), %%xmm0 \n\t" |
910 "addps %%xmm2, %%xmm0 \n\t" | 911 "addps %%xmm2, %%xmm0 \n\t" |
911 "movaps %%xmm0, (%1, %%esi) \n\t" | 912 "movaps %%xmm0, (%1, %%esi) \n\t" |
912 "addl $16, %%esi \n\t" | 913 "addl $16, %%esi \n\t" |
913 "subl $16, %%edi \n\t" | 914 "subl $16, %%edi \n\t" |
930 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A | 931 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A |
931 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C | 932 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C |
932 "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C | 933 "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C |
933 "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A | 934 "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A |
934 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A | 935 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A |
935 "mulps 1024+sseWindow(%%esi), %%xmm0 \n\t" | 936 "mulps 1024+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" |
936 "movaps %%xmm0, (%1, %%esi) \n\t" | 937 "movaps %%xmm0, (%1, %%esi) \n\t" |
937 "addl $16, %%esi \n\t" | 938 "addl $16, %%esi \n\t" |
938 "subl $16, %%edi \n\t" | 939 "subl $16, %%edi \n\t" |
939 "cmpl $512, %%esi \n\t" | 940 "cmpl $512, %%esi \n\t" |
940 " jb 1b \n\t" | 941 " jb 1b \n\t" |
952 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ? | 953 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ? |
953 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ? | 954 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ? |
954 "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ? | 955 "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ? |
955 "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ? | 956 "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ? |
956 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A | 957 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A |
957 "mulps 1536+sseWindow(%%esi), %%xmm0 \n\t" | 958 "mulps 1536+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t" |
958 "movaps %%xmm0, (%1, %%esi) \n\t" | 959 "movaps %%xmm0, (%1, %%esi) \n\t" |
959 "addl $16, %%esi \n\t" | 960 "addl $16, %%esi \n\t" |
960 "subl $16, %%edi \n\t" | 961 "subl $16, %%edi \n\t" |
961 "cmpl $512, %%esi \n\t" | 962 "cmpl $512, %%esi \n\t" |
962 " jb 1b \n\t" | 963 " jb 1b \n\t" |