comparison liba52/imdct.c @ 4247:2dbd637ffe05

mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
author atmos4
date Sat, 19 Jan 2002 05:12:34 +0000
parents 0cc94b1eec0f
children d3aedd7db02c
comparison
equal deleted inserted replaced
4246:3f677202418b 4247:2dbd637ffe05
35 #include <inttypes.h> 35 #include <inttypes.h>
36 36
37 #include "a52.h" 37 #include "a52.h"
38 #include "a52_internal.h" 38 #include "a52_internal.h"
39 #include "mm_accel.h" 39 #include "mm_accel.h"
40 #include "mangle.h"
40 41
41 #ifdef RUNTIME_CPUDETECT 42 #ifdef RUNTIME_CPUDETECT
42 #undef HAVE_3DNOWEX 43 #undef HAVE_3DNOWEX
43 #endif 44 #endif
44 45
658 659
659 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ 660 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
660 /* Bit reversed shuffling */ 661 /* Bit reversed shuffling */
661 asm volatile( 662 asm volatile(
662 "xorl %%esi, %%esi \n\t" 663 "xorl %%esi, %%esi \n\t"
663 "leal bit_reverse_512, %%eax \n\t" 664 "leal "MANGLE(bit_reverse_512)", %%eax \n\t"
664 "movl $1008, %%edi \n\t" 665 "movl $1008, %%edi \n\t"
665 "pushl %%ebp \n\t" //use ebp without telling gcc 666 "pushl %%ebp \n\t" //use ebp without telling gcc
666 ".balign 16 \n\t" 667 ".balign 16 \n\t"
667 "1: \n\t" 668 "1: \n\t"
668 "movlps (%0, %%esi), %%xmm0 \n\t" // XXXI 669 "movlps (%0, %%esi), %%xmm0 \n\t" // XXXI
669 "movhps 8(%0, %%edi), %%xmm0 \n\t" // RXXI 670 "movhps 8(%0, %%edi), %%xmm0 \n\t" // RXXI
670 "movlps 8(%0, %%esi), %%xmm1 \n\t" // XXXi 671 "movlps 8(%0, %%esi), %%xmm1 \n\t" // XXXi
671 "movhps (%0, %%edi), %%xmm1 \n\t" // rXXi 672 "movhps (%0, %%edi), %%xmm1 \n\t" // rXXi
672 "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR 673 "shufps $0x33, %%xmm1, %%xmm0 \n\t" // irIR
673 "movaps sseSinCos1c(%%esi), %%xmm2 \n\t" 674 "movaps "MANGLE(sseSinCos1c)"(%%esi), %%xmm2\n\t"
674 "mulps %%xmm0, %%xmm2 \n\t" 675 "mulps %%xmm0, %%xmm2 \n\t"
675 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI 676 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" // riRI
676 "mulps sseSinCos1d(%%esi), %%xmm0 \n\t" 677 "mulps "MANGLE(sseSinCos1d)"(%%esi), %%xmm0\n\t"
677 "subps %%xmm0, %%xmm2 \n\t" 678 "subps %%xmm0, %%xmm2 \n\t"
678 "movzbl (%%eax), %%edx \n\t" 679 "movzbl (%%eax), %%edx \n\t"
679 "movzbl 1(%%eax), %%ebp \n\t" 680 "movzbl 1(%%eax), %%ebp \n\t"
680 "movlps %%xmm2, (%1, %%edx,8) \n\t" 681 "movlps %%xmm2, (%1, %%edx,8) \n\t"
681 "movhps %%xmm2, (%1, %%ebp,8) \n\t" 682 "movhps %%xmm2, (%1, %%ebp,8) \n\t"
739 ); 740 );
740 741
741 /* 2. iteration */ 742 /* 2. iteration */
742 // Note w[1]={{1,0}, {0,-1}} 743 // Note w[1]={{1,0}, {0,-1}}
743 asm volatile( 744 asm volatile(
744 "movaps ps111_1, %%xmm7 \n\t" // 1,1,1,-1 745 "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1
745 "movl %0, %%esi \n\t" 746 "movl %0, %%esi \n\t"
746 ".balign 16 \n\t" 747 ".balign 16 \n\t"
747 "1: \n\t" 748 "1: \n\t"
748 "movaps 16(%%esi), %%xmm2 \n\t" //r2,i2,r3,i3 749 "movaps 16(%%esi), %%xmm2 \n\t" //r2,i2,r3,i3
749 "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 750 "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3
767 Note sseW2+16={0,0,sqrt(2),-sqrt(2)) 768 Note sseW2+16={0,0,sqrt(2),-sqrt(2))
768 Note sseW2+32={0,0,-sqrt(2),-sqrt(2)) 769 Note sseW2+32={0,0,-sqrt(2),-sqrt(2))
769 Note sseW2+48={1,-1,sqrt(2),-sqrt(2)) 770 Note sseW2+48={1,-1,sqrt(2),-sqrt(2))
770 */ 771 */
771 asm volatile( 772 asm volatile(
772 "movaps 48+sseW2, %%xmm6 \n\t" 773 "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t"
773 "movaps 16+sseW2, %%xmm7 \n\t" 774 "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t"
774 "xorps %%xmm5, %%xmm5 \n\t" 775 "xorps %%xmm5, %%xmm5 \n\t"
775 "xorps %%xmm2, %%xmm2 \n\t" 776 "xorps %%xmm2, %%xmm2 \n\t"
776 "movl %0, %%esi \n\t" 777 "movl %0, %%esi \n\t"
777 ".balign 16 \n\t" 778 ".balign 16 \n\t"
778 "1: \n\t" 779 "1: \n\t"
779 "movaps 32(%%esi), %%xmm2 \n\t" //r4,i4,r5,i5 780 "movaps 32(%%esi), %%xmm2 \n\t" //r4,i4,r5,i5
780 "movaps 48(%%esi), %%xmm3 \n\t" //r6,i6,r7,i7 781 "movaps 48(%%esi), %%xmm3 \n\t" //r6,i6,r7,i7
781 "movaps sseW2, %%xmm4 \n\t" //r4,i4,r5,i5 782 "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5
782 "movaps 32+sseW2, %%xmm5 \n\t" //r6,i6,r7,i7 783 "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7
783 "mulps %%xmm2, %%xmm4 \n\t" 784 "mulps %%xmm2, %%xmm4 \n\t"
784 "mulps %%xmm3, %%xmm5 \n\t" 785 "mulps %%xmm3, %%xmm5 \n\t"
785 "shufps $0xB1, %%xmm2, %%xmm2 \n\t" //i4,r4,i5,r5 786 "shufps $0xB1, %%xmm2, %%xmm2 \n\t" //i4,r4,i5,r5
786 "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7 787 "shufps $0xB1, %%xmm3, %%xmm3 \n\t" //i6,r6,i7,r7
787 "mulps %%xmm6, %%xmm3 \n\t" 788 "mulps %%xmm6, %%xmm3 \n\t"
842 ); 843 );
843 } 844 }
844 845
845 /* Post IFFT complex multiply plus IFFT complex conjugate*/ 846 /* Post IFFT complex multiply plus IFFT complex conjugate*/
846 asm volatile( 847 asm volatile(
847 "movl $-1024, %%esi \n\t" 848 "movl $-1024, %%esi \n\t"
848 ".balign 16 \n\t" 849 ".balign 16 \n\t"
849 "1: \n\t" 850 "1: \n\t"
850 "movaps (%0, %%esi), %%xmm0 \n\t" 851 "movaps (%0, %%esi), %%xmm0 \n\t"
851 "movaps (%0, %%esi), %%xmm1 \n\t" 852 "movaps (%0, %%esi), %%xmm1 \n\t"
852 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" 853 "shufps $0xB1, %%xmm0, %%xmm0 \n\t"
853 "mulps 1024+sseSinCos1c(%%esi), %%xmm1 \n\t" 854 "mulps 1024+"MANGLE(sseSinCos1c)"(%%esi), %%xmm1\n\t"
854 "mulps 1024+sseSinCos1d(%%esi), %%xmm0 \n\t" 855 "mulps 1024+"MANGLE(sseSinCos1d)"(%%esi), %%xmm0\n\t"
855 "addps %%xmm1, %%xmm0 \n\t" 856 "addps %%xmm1, %%xmm0 \n\t"
856 "movaps %%xmm0, (%0, %%esi) \n\t" 857 "movaps %%xmm0, (%0, %%esi) \n\t"
857 "addl $16, %%esi \n\t" 858 "addl $16, %%esi \n\t"
858 " jnz 1b \n\t" 859 " jnz 1b \n\t"
859 :: "r" (buf+128) 860 :: "r" (buf+128)
876 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ? 877 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ?
877 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ? 878 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ?
878 "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ? 879 "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ?
879 "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ? 880 "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ?
880 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A 881 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A
881 "mulps sseWindow(%%esi), %%xmm0 \n\t" 882 "mulps "MANGLE(sseWindow)"(%%esi), %%xmm0\n\t"
882 "addps (%2, %%esi), %%xmm0 \n\t" 883 "addps (%2, %%esi), %%xmm0 \n\t"
883 "addps %%xmm2, %%xmm0 \n\t" 884 "addps %%xmm2, %%xmm0 \n\t"
884 "movaps %%xmm0, (%1, %%esi) \n\t" 885 "movaps %%xmm0, (%1, %%esi) \n\t"
885 "addl $16, %%esi \n\t" 886 "addl $16, %%esi \n\t"
886 "subl $16, %%edi \n\t" 887 "subl $16, %%edi \n\t"
903 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A 904 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A
904 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C 905 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C
905 "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C 906 "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C
906 "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A 907 "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A
907 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A 908 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A
908 "mulps 512+sseWindow(%%esi), %%xmm0 \n\t" 909 "mulps 512+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t"
909 "addps (%2, %%esi), %%xmm0 \n\t" 910 "addps (%2, %%esi), %%xmm0 \n\t"
910 "addps %%xmm2, %%xmm0 \n\t" 911 "addps %%xmm2, %%xmm0 \n\t"
911 "movaps %%xmm0, (%1, %%esi) \n\t" 912 "movaps %%xmm0, (%1, %%esi) \n\t"
912 "addl $16, %%esi \n\t" 913 "addl $16, %%esi \n\t"
913 "subl $16, %%edi \n\t" 914 "subl $16, %%edi \n\t"
930 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A 931 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? ? A
931 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C 932 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? ? C
932 "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C 933 "movhps -16(%0, %%edi), %%xmm1 \n\t" // D ? ? C
933 "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A 934 "movhps -8(%0, %%edi), %%xmm0 \n\t" // B ? ? A
934 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A 935 "shufps $0xCC, %%xmm1, %%xmm0 \n\t" // D C B A
935 "mulps 1024+sseWindow(%%esi), %%xmm0 \n\t" 936 "mulps 1024+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t"
936 "movaps %%xmm0, (%1, %%esi) \n\t" 937 "movaps %%xmm0, (%1, %%esi) \n\t"
937 "addl $16, %%esi \n\t" 938 "addl $16, %%esi \n\t"
938 "subl $16, %%edi \n\t" 939 "subl $16, %%edi \n\t"
939 "cmpl $512, %%esi \n\t" 940 "cmpl $512, %%esi \n\t"
940 " jb 1b \n\t" 941 " jb 1b \n\t"
952 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ? 953 "movlps (%0, %%esi), %%xmm0 \n\t" // ? ? A ?
953 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ? 954 "movlps 8(%0, %%esi), %%xmm1 \n\t" // ? ? C ?
954 "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ? 955 "movhps -16(%0, %%edi), %%xmm1 \n\t" // ? D C ?
955 "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ? 956 "movhps -8(%0, %%edi), %%xmm0 \n\t" // ? B A ?
956 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A 957 "shufps $0x99, %%xmm1, %%xmm0 \n\t" // D C B A
957 "mulps 1536+sseWindow(%%esi), %%xmm0 \n\t" 958 "mulps 1536+"MANGLE(sseWindow)"(%%esi), %%xmm0\n\t"
958 "movaps %%xmm0, (%1, %%esi) \n\t" 959 "movaps %%xmm0, (%1, %%esi) \n\t"
959 "addl $16, %%esi \n\t" 960 "addl $16, %%esi \n\t"
960 "subl $16, %%edi \n\t" 961 "subl $16, %%edi \n\t"
961 "cmpl $512, %%esi \n\t" 962 "cmpl $512, %%esi \n\t"
962 " jb 1b \n\t" 963 " jb 1b \n\t"