Mercurial > mplayer.hg
comparison liba52/imdct.c @ 18104:7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
author | nplourde |
---|---|
date | Sat, 15 Apr 2006 20:46:54 +0000 |
parents | 72764c0dad8a |
children | 4bad7f00556e |
comparison
equal
deleted
inserted
replaced
18103:26ea12332e67 | 18104:7b408d60de9e |
---|---|
29 * michael did port them from libac3 (untested, perhaps totally broken) | 29 * michael did port them from libac3 (untested, perhaps totally broken) |
30 * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org) | 30 * AltiVec optimizations from Romain Dolbeau (romain@dolbeau.org) |
31 */ | 31 */ |
32 | 32 |
33 #include "config.h" | 33 #include "config.h" |
34 #include "asmalign.h" | |
34 | 35 |
35 #include <math.h> | 36 #include <math.h> |
36 #include <stdio.h> | 37 #include <stdio.h> |
37 #ifndef M_PI | 38 #ifndef M_PI |
38 #define M_PI 3.1415926535897932384626433832795029 | 39 #define M_PI 3.1415926535897932384626433832795029 |
790 asm volatile( | 791 asm volatile( |
791 "xor %%"REG_S", %%"REG_S" \n\t" | 792 "xor %%"REG_S", %%"REG_S" \n\t" |
792 "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t" | 793 "lea "MANGLE(bit_reverse_512)", %%"REG_a"\n\t" |
793 "mov $1008, %%"REG_D" \n\t" | 794 "mov $1008, %%"REG_D" \n\t" |
794 "push %%"REG_BP" \n\t" //use ebp without telling gcc | 795 "push %%"REG_BP" \n\t" //use ebp without telling gcc |
795 ".balign 16 \n\t" | 796 ASMALIGN16 |
796 "1: \n\t" | 797 "1: \n\t" |
797 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI | 798 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // XXXI |
798 "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI | 799 "movhps 8(%0, %%"REG_D"), %%xmm0 \n\t" // RXXI |
799 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // XXXi | 800 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // XXXi |
800 "movhps (%0, %%"REG_D"), %%xmm1 \n\t" // rXXi | 801 "movhps (%0, %%"REG_D"), %%xmm1 \n\t" // rXXi |
849 // Note w[0][0]={1,0} | 850 // Note w[0][0]={1,0} |
850 asm volatile( | 851 asm volatile( |
851 "xorps %%xmm1, %%xmm1 \n\t" | 852 "xorps %%xmm1, %%xmm1 \n\t" |
852 "xorps %%xmm2, %%xmm2 \n\t" | 853 "xorps %%xmm2, %%xmm2 \n\t" |
853 "mov %0, %%"REG_S" \n\t" | 854 "mov %0, %%"REG_S" \n\t" |
854 ".balign 16 \n\t" | 855 ASMALIGN16 |
855 "1: \n\t" | 856 "1: \n\t" |
856 "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p] | 857 "movlps (%%"REG_S"), %%xmm0\n\t" //buf[p] |
857 "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q] | 858 "movlps 8(%%"REG_S"), %%xmm1\n\t" //buf[q] |
858 "movhps (%%"REG_S"), %%xmm0\n\t" //buf[p] | 859 "movhps (%%"REG_S"), %%xmm0\n\t" //buf[p] |
859 "movhps 8(%%"REG_S"), %%xmm2\n\t" //buf[q] | 860 "movhps 8(%%"REG_S"), %%xmm2\n\t" //buf[q] |
870 /* 2. iteration */ | 871 /* 2. iteration */ |
871 // Note w[1]={{1,0}, {0,-1}} | 872 // Note w[1]={{1,0}, {0,-1}} |
872 asm volatile( | 873 asm volatile( |
873 "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 | 874 "movaps "MANGLE(ps111_1)", %%xmm7\n\t" // 1,1,1,-1 |
874 "mov %0, %%"REG_S" \n\t" | 875 "mov %0, %%"REG_S" \n\t" |
875 ".balign 16 \n\t" | 876 ASMALIGN16 |
876 "1: \n\t" | 877 "1: \n\t" |
877 "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3 | 878 "movaps 16(%%"REG_S"), %%xmm2 \n\t" //r2,i2,r3,i3 |
878 "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 | 879 "shufps $0xB4, %%xmm2, %%xmm2 \n\t" //r2,i2,i3,r3 |
879 "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3 | 880 "mulps %%xmm7, %%xmm2 \n\t" //r2,i2,i3,-r3 |
880 "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 | 881 "movaps (%%"REG_S"), %%xmm0 \n\t" //r0,i0,r1,i1 |
901 "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" | 902 "movaps 48+"MANGLE(sseW2)", %%xmm6\n\t" |
902 "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" | 903 "movaps 16+"MANGLE(sseW2)", %%xmm7\n\t" |
903 "xorps %%xmm5, %%xmm5 \n\t" | 904 "xorps %%xmm5, %%xmm5 \n\t" |
904 "xorps %%xmm2, %%xmm2 \n\t" | 905 "xorps %%xmm2, %%xmm2 \n\t" |
905 "mov %0, %%"REG_S" \n\t" | 906 "mov %0, %%"REG_S" \n\t" |
906 ".balign 16 \n\t" | 907 ASMALIGN16 |
907 "1: \n\t" | 908 "1: \n\t" |
908 "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5 | 909 "movaps 32(%%"REG_S"), %%xmm2 \n\t" //r4,i4,r5,i5 |
909 "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7 | 910 "movaps 48(%%"REG_S"), %%xmm3 \n\t" //r6,i6,r7,i7 |
910 "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5 | 911 "movaps "MANGLE(sseW2)", %%xmm4 \n\t" //r4,i4,r5,i5 |
911 "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7 | 912 "movaps 32+"MANGLE(sseW2)", %%xmm5\n\t" //r6,i6,r7,i7 |
942 two_m_plus_one = two_m<<1; | 943 two_m_plus_one = two_m<<1; |
943 two_m_plus_one_shl3 = (two_m_plus_one<<3); | 944 two_m_plus_one_shl3 = (two_m_plus_one<<3); |
944 buf_offset = buf+128; | 945 buf_offset = buf+128; |
945 asm volatile( | 946 asm volatile( |
946 "mov %0, %%"REG_S" \n\t" | 947 "mov %0, %%"REG_S" \n\t" |
947 ".balign 16 \n\t" | 948 ASMALIGN16 |
948 "1: \n\t" | 949 "1: \n\t" |
949 "xor %%"REG_D", %%"REG_D" \n\t" // k | 950 "xor %%"REG_D", %%"REG_D" \n\t" // k |
950 "lea (%%"REG_S", %3), %%"REG_d" \n\t" | 951 "lea (%%"REG_S", %3), %%"REG_d" \n\t" |
951 "2: \n\t" | 952 "2: \n\t" |
952 "movaps (%%"REG_d", %%"REG_D"), %%xmm1 \n\t" | 953 "movaps (%%"REG_d", %%"REG_D"), %%xmm1 \n\t" |
974 } | 975 } |
975 | 976 |
976 /* Post IFFT complex multiply plus IFFT complex conjugate*/ | 977 /* Post IFFT complex multiply plus IFFT complex conjugate*/ |
977 asm volatile( | 978 asm volatile( |
978 "mov $-1024, %%"REG_S" \n\t" | 979 "mov $-1024, %%"REG_S" \n\t" |
979 ".balign 16 \n\t" | 980 ASMALIGN16 |
980 "1: \n\t" | 981 "1: \n\t" |
981 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" | 982 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
982 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" | 983 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
983 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" | 984 "shufps $0xB1, %%xmm0, %%xmm0 \n\t" |
984 "mulps 1024+"MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm1\n\t" | 985 "mulps 1024+"MANGLE(sseSinCos1c)"(%%"REG_S"), %%xmm1\n\t" |
1000 asm volatile( | 1001 asm volatile( |
1001 "xor %%"REG_D", %%"REG_D" \n\t" // 0 | 1002 "xor %%"REG_D", %%"REG_D" \n\t" // 0 |
1002 "xor %%"REG_S", %%"REG_S" \n\t" // 0 | 1003 "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
1003 "movss %3, %%xmm2 \n\t" // bias | 1004 "movss %3, %%xmm2 \n\t" // bias |
1004 "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... | 1005 "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... |
1005 ".balign 16 \n\t" | 1006 ASMALIGN16 |
1006 "1: \n\t" | 1007 "1: \n\t" |
1007 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? | 1008 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? |
1008 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? | 1009 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? |
1009 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? | 1010 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? |
1010 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? | 1011 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? |
1027 asm volatile( | 1028 asm volatile( |
1028 "mov $1024, %%"REG_D" \n\t" // 512 | 1029 "mov $1024, %%"REG_D" \n\t" // 512 |
1029 "xor %%"REG_S", %%"REG_S" \n\t" // 0 | 1030 "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
1030 "movss %3, %%xmm2 \n\t" // bias | 1031 "movss %3, %%xmm2 \n\t" // bias |
1031 "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... | 1032 "shufps $0x00, %%xmm2, %%xmm2 \n\t" // bias, bias, ... |
1032 ".balign 16 \n\t" | 1033 ASMALIGN16 |
1033 "1: \n\t" | 1034 "1: \n\t" |
1034 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A | 1035 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A |
1035 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C | 1036 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C |
1036 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C | 1037 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C |
1037 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A | 1038 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A |
1054 delay_ptr = delay; | 1055 delay_ptr = delay; |
1055 | 1056 |
1056 asm volatile( | 1057 asm volatile( |
1057 "xor %%"REG_D", %%"REG_D" \n\t" // 0 | 1058 "xor %%"REG_D", %%"REG_D" \n\t" // 0 |
1058 "xor %%"REG_S", %%"REG_S" \n\t" // 0 | 1059 "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
1059 ".balign 16 \n\t" | 1060 ASMALIGN16 |
1060 "1: \n\t" | 1061 "1: \n\t" |
1061 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A | 1062 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? ? A |
1062 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C | 1063 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? ? C |
1063 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C | 1064 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // D ? ? C |
1064 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A | 1065 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // B ? ? A |
1076 // window_ptr-=128; | 1077 // window_ptr-=128; |
1077 | 1078 |
1078 asm volatile( | 1079 asm volatile( |
1079 "mov $1024, %%"REG_D" \n\t" // 1024 | 1080 "mov $1024, %%"REG_D" \n\t" // 1024 |
1080 "xor %%"REG_S", %%"REG_S" \n\t" // 0 | 1081 "xor %%"REG_S", %%"REG_S" \n\t" // 0 |
1081 ".balign 16 \n\t" | 1082 ASMALIGN16 |
1082 "1: \n\t" | 1083 "1: \n\t" |
1083 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? | 1084 "movlps (%0, %%"REG_S"), %%xmm0 \n\t" // ? ? A ? |
1084 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? | 1085 "movlps 8(%0, %%"REG_S"), %%xmm1 \n\t" // ? ? C ? |
1085 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? | 1086 "movhps -16(%0, %%"REG_D"), %%xmm1 \n\t" // ? D C ? |
1086 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? | 1087 "movhps -8(%0, %%"REG_D"), %%xmm0 \n\t" // ? B A ? |