Mercurial > mplayer.hg
annotate mp3lib/decode_MMX.c @ 13864:96259a2f2142
enable mmx support on x86_64 in libmpeg2
author | aurel |
---|---|
date | Wed, 03 Nov 2004 17:12:01 +0000 |
parents | 114f3d149324 |
children | e7a129082fda |
rev | line source |
---|---|
4142 | 1 /* |
2 * this code comes under GPL | |
3 * This code was taken from http://www.mpg123.org | |
4 * See ChangeLog of mpg123-0.59s-pre.1 for detail | |
5 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> | |
6 * | |
7 * Local ChangeLog: | |
8 * - Partial loops unrolling and removing MOVW insn from loops | |
9 */ | |
12292 | 10 #include "../config.h" |
4246
3f677202418b
mangling in mp3lib + stdcall undefined fix with cygwin
atmos4
parents:
4142
diff
changeset
|
11 #include "../mangle.h" |
4142 | 12 #define real float /* ugly - but only way */ |
13 | |
12292 | 14 static unsigned long long attribute_used __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL; |
15 static unsigned long long attribute_used __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL; | |
4142 | 16 unsigned long __attribute__((aligned(8))) costab_mmx[] = |
17 { | |
18 1056974725, | |
19 1057056395, | |
20 1057223771, | |
21 1057485416, | |
22 1057855544, | |
23 1058356026, | |
24 1059019886, | |
25 1059897405, | |
26 1061067246, | |
27 1062657950, | |
28 1064892987, | |
29 1066774581, | |
30 1069414683, | |
31 1073984175, | |
32 1079645762, | |
33 1092815430, | |
34 1057005197, | |
35 1057342072, | |
36 1058087743, | |
37 1059427869, | |
38 1061799040, | |
39 1065862217, | |
40 1071413542, | |
41 1084439708, | |
42 1057128951, | |
43 1058664893, | |
44 1063675095, | |
45 1076102863, | |
46 1057655764, | |
47 1067924853, | |
48 1060439283, | |
49 }; | |
50 | |
11266
6beea1683618
reverting my last change as buggy gcc 3.x fails with it :(
alex
parents:
11244
diff
changeset
|
51 static int temp; // buggy gcc 3.x fails if this is moved into the function :( |
4142 | 52 void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples, |
53 short *buffs, int *bo) | |
54 { | |
11244 | 55 |
4142 | 56 __asm __volatile( |
57 "movl %1,%%ecx\n\t" | |
58 "movl %2,%%edi\n\t" | |
59 "movl $15,%%ebx\n\t" | |
60 "movl %4,%%edx\n\t" | |
61 "leal (%%edi,%%ecx,2),%%edi\n\t" | |
62 "decl %%ecx\n\t" | |
63 "movl %3,%%esi\n\t" | |
64 "movl (%%edx),%%eax\n\t" | |
4608 | 65 "jecxz .L01\n\t" |
4142 | 66 "decl %%eax\n\t" |
67 "andl %%ebx,%%eax\n\t" | |
68 "leal 1088(%%esi),%%esi\n\t" | |
69 "movl %%eax,(%%edx)\n\t" | |
4608 | 70 ".L01:\n\t" |
4142 | 71 "leal (%%esi,%%eax,2),%%edx\n\t" |
4322 | 72 "movl %%eax,%5\n\t" |
4142 | 73 "incl %%eax\n\t" |
74 "andl %%ebx,%%eax\n\t" | |
75 "leal 544(%%esi,%%eax,2),%%ecx\n\t" | |
76 "incl %%ebx\n\t" | |
77 "testl $1, %%eax\n\t" | |
4608 | 78 "jnz .L02\n\t" |
4142 | 79 "xchgl %%edx,%%ecx\n\t" |
4322 | 80 "incl %5\n\t" |
4142 | 81 "leal 544(%%esi),%%esi\n\t" |
4608 | 82 ".L02:\n\t" |
4142 | 83 "emms\n\t" |
11240 | 84 "pushl %0\n\t" |
4142 | 85 "pushl %%edx\n\t" |
86 "pushl %%ecx\n\t" | |
4246
3f677202418b
mangling in mp3lib + stdcall undefined fix with cygwin
atmos4
parents:
4142
diff
changeset
|
87 "call *"MANGLE(dct64_MMX_func)"\n\t" |
11240 | 88 "addl $12, %%esp\n\t" |
4142 | 89 "leal 1(%%ebx), %%ecx\n\t" |
4322 | 90 "subl %5,%%ebx\n\t" |
4142 | 91 "pushl %%ecx\n\t" |
8560
1320f1b3229d
fixing that f*cking linker 'bug' e.g. naming config with libmp3lame
alex
parents:
5290
diff
changeset
|
92 "leal "MANGLE(mp3lib_decwins)"(%%ebx,%%ebx,1), %%edx\n\t" |
4142 | 93 "shrl $1, %%ecx\n\t" |
5290 | 94 ".balign 16\n\t" |
4608 | 95 ".L03:\n\t" |
4142 | 96 "movq (%%edx),%%mm0\n\t" |
97 "movq 64(%%edx),%%mm4\n\t" | |
98 "pmaddwd (%%esi),%%mm0\n\t" | |
99 "pmaddwd 32(%%esi),%%mm4\n\t" | |
100 "movq 8(%%edx),%%mm1\n\t" | |
101 "movq 72(%%edx),%%mm5\n\t" | |
102 "pmaddwd 8(%%esi),%%mm1\n\t" | |
103 "pmaddwd 40(%%esi),%%mm5\n\t" | |
104 "movq 16(%%edx),%%mm2\n\t" | |
105 "movq 80(%%edx),%%mm6\n\t" | |
106 "pmaddwd 16(%%esi),%%mm2\n\t" | |
107 "pmaddwd 48(%%esi),%%mm6\n\t" | |
108 "movq 24(%%edx),%%mm3\n\t" | |
109 "movq 88(%%edx),%%mm7\n\t" | |
110 "pmaddwd 24(%%esi),%%mm3\n\t" | |
111 "pmaddwd 56(%%esi),%%mm7\n\t" | |
112 "paddd %%mm1,%%mm0\n\t" | |
113 "paddd %%mm5,%%mm4\n\t" | |
114 "paddd %%mm2,%%mm0\n\t" | |
115 "paddd %%mm6,%%mm4\n\t" | |
116 "paddd %%mm3,%%mm0\n\t" | |
117 "paddd %%mm7,%%mm4\n\t" | |
118 "movq %%mm0,%%mm1\n\t" | |
119 "movq %%mm4,%%mm5\n\t" | |
120 "psrlq $32,%%mm1\n\t" | |
121 "psrlq $32,%%mm5\n\t" | |
122 "paddd %%mm1,%%mm0\n\t" | |
123 "paddd %%mm5,%%mm4\n\t" | |
124 "psrad $13,%%mm0\n\t" | |
125 "psrad $13,%%mm4\n\t" | |
126 "packssdw %%mm0,%%mm0\n\t" | |
127 "packssdw %%mm4,%%mm4\n\t" | |
128 | |
129 "movq (%%edi), %%mm1\n\t" | |
130 "punpckldq %%mm4, %%mm0\n\t" | |
4246
3f677202418b
mangling in mp3lib + stdcall undefined fix with cygwin
atmos4
parents:
4142
diff
changeset
|
131 "pand "MANGLE(one_null)", %%mm1\n\t" |
3f677202418b
mangling in mp3lib + stdcall undefined fix with cygwin
atmos4
parents:
4142
diff
changeset
|
132 "pand "MANGLE(null_one)", %%mm0\n\t" |
4142 | 133 "por %%mm0, %%mm1\n\t" |
134 "movq %%mm1,(%%edi)\n\t" | |
135 | |
136 "leal 64(%%esi),%%esi\n\t" | |
137 "leal 128(%%edx),%%edx\n\t" | |
138 "leal 8(%%edi),%%edi\n\t" | |
139 | |
140 "decl %%ecx\n\t" | |
4608 | 141 "jnz .L03\n\t" |
4142 | 142 |
143 "popl %%ecx\n\t" | |
144 "andl $1, %%ecx\n\t" | |
145 "jecxz .next_loop\n\t" | |
146 | |
147 "movq (%%edx),%%mm0\n\t" | |
148 "pmaddwd (%%esi),%%mm0\n\t" | |
149 "movq 8(%%edx),%%mm1\n\t" | |
150 "pmaddwd 8(%%esi),%%mm1\n\t" | |
151 "movq 16(%%edx),%%mm2\n\t" | |
152 "pmaddwd 16(%%esi),%%mm2\n\t" | |
153 "movq 24(%%edx),%%mm3\n\t" | |
154 "pmaddwd 24(%%esi),%%mm3\n\t" | |
155 "paddd %%mm1,%%mm0\n\t" | |
156 "paddd %%mm2,%%mm0\n\t" | |
157 "paddd %%mm3,%%mm0\n\t" | |
158 "movq %%mm0,%%mm1\n\t" | |
159 "psrlq $32,%%mm1\n\t" | |
160 "paddd %%mm1,%%mm0\n\t" | |
161 "psrad $13,%%mm0\n\t" | |
162 "packssdw %%mm0,%%mm0\n\t" | |
163 "movd %%mm0,%%eax\n\t" | |
164 "movw %%ax, (%%edi)\n\t" | |
165 "leal 32(%%esi),%%esi\n\t" | |
166 "leal 64(%%edx),%%edx\n\t" | |
167 "leal 4(%%edi),%%edi\n\t" | |
168 | |
169 ".next_loop:\n\t" | |
170 "subl $64,%%esi\n\t" | |
171 "movl $7,%%ecx\n\t" | |
5290 | 172 ".balign 16\n\t" |
4608 | 173 ".L04:\n\t" |
4142 | 174 "movq (%%edx),%%mm0\n\t" |
175 "movq 64(%%edx),%%mm4\n\t" | |
176 "pmaddwd (%%esi),%%mm0\n\t" | |
177 "pmaddwd -32(%%esi),%%mm4\n\t" | |
178 "movq 8(%%edx),%%mm1\n\t" | |
179 "movq 72(%%edx),%%mm5\n\t" | |
180 "pmaddwd 8(%%esi),%%mm1\n\t" | |
181 "pmaddwd -24(%%esi),%%mm5\n\t" | |
182 "movq 16(%%edx),%%mm2\n\t" | |
183 "movq 80(%%edx),%%mm6\n\t" | |
184 "pmaddwd 16(%%esi),%%mm2\n\t" | |
185 "pmaddwd -16(%%esi),%%mm6\n\t" | |
186 "movq 24(%%edx),%%mm3\n\t" | |
187 "movq 88(%%edx),%%mm7\n\t" | |
188 "pmaddwd 24(%%esi),%%mm3\n\t" | |
189 "pmaddwd -8(%%esi),%%mm7\n\t" | |
190 "paddd %%mm1,%%mm0\n\t" | |
191 "paddd %%mm5,%%mm4\n\t" | |
192 "paddd %%mm2,%%mm0\n\t" | |
193 "paddd %%mm6,%%mm4\n\t" | |
194 "paddd %%mm3,%%mm0\n\t" | |
195 "paddd %%mm7,%%mm4\n\t" | |
196 "movq %%mm0,%%mm1\n\t" | |
197 "movq %%mm4,%%mm5\n\t" | |
198 "psrlq $32,%%mm1\n\t" | |
199 "psrlq $32,%%mm5\n\t" | |
200 "paddd %%mm0,%%mm1\n\t" | |
201 "paddd %%mm4,%%mm5\n\t" | |
202 "psrad $13,%%mm1\n\t" | |
203 "psrad $13,%%mm5\n\t" | |
204 "packssdw %%mm1,%%mm1\n\t" | |
205 "packssdw %%mm5,%%mm5\n\t" | |
206 "psubd %%mm0,%%mm0\n\t" | |
207 "psubd %%mm4,%%mm4\n\t" | |
208 "psubsw %%mm1,%%mm0\n\t" | |
209 "psubsw %%mm5,%%mm4\n\t" | |
210 | |
211 "movq (%%edi), %%mm1\n\t" | |
212 "punpckldq %%mm4, %%mm0\n\t" | |
4246
3f677202418b
mangling in mp3lib + stdcall undefined fix with cygwin
atmos4
parents:
4142
diff
changeset
|
213 "pand "MANGLE(one_null)", %%mm1\n\t" |
3f677202418b
mangling in mp3lib + stdcall undefined fix with cygwin
atmos4
parents:
4142
diff
changeset
|
214 "pand "MANGLE(null_one)", %%mm0\n\t" |
4142 | 215 "por %%mm0, %%mm1\n\t" |
216 "movq %%mm1,(%%edi)\n\t" | |
217 | |
218 "subl $64,%%esi\n\t" | |
219 "addl $128,%%edx\n\t" | |
220 "leal 8(%%edi),%%edi\n\t" | |
221 "decl %%ecx\n\t" | |
4608 | 222 "jnz .L04\n\t" |
4142 | 223 |
224 "movq (%%edx),%%mm0\n\t" | |
225 "pmaddwd (%%esi),%%mm0\n\t" | |
226 "movq 8(%%edx),%%mm1\n\t" | |
227 "pmaddwd 8(%%esi),%%mm1\n\t" | |
228 "movq 16(%%edx),%%mm2\n\t" | |
229 "pmaddwd 16(%%esi),%%mm2\n\t" | |
230 "movq 24(%%edx),%%mm3\n\t" | |
231 "pmaddwd 24(%%esi),%%mm3\n\t" | |
232 "paddd %%mm1,%%mm0\n\t" | |
233 "paddd %%mm2,%%mm0\n\t" | |
234 "paddd %%mm3,%%mm0\n\t" | |
235 "movq %%mm0,%%mm1\n\t" | |
236 "psrlq $32,%%mm1\n\t" | |
237 "paddd %%mm0,%%mm1\n\t" | |
238 "psrad $13,%%mm1\n\t" | |
239 "packssdw %%mm1,%%mm1\n\t" | |
240 "psubd %%mm0,%%mm0\n\t" | |
241 "psubsw %%mm1,%%mm0\n\t" | |
242 "movd %%mm0,%%eax\n\t" | |
243 "movw %%ax,(%%edi)\n\t" | |
244 "emms\n\t" | |
245 : | |
4322 | 246 :"m"(bandPtr),"m"(channel),"m"(samples),"m"(buffs),"m"(bo), "m"(temp) |
11240 | 247 :"memory","%edi","%esi","%ebx","%esp"); |
4142 | 248 } |