4147
|
1 /*
|
|
2 * mpg123_synth_1to1 works the same way as the c version of this
|
|
3 * file. only two types of changes have been made:
|
|
4 * - reordered floating point instructions to
|
|
5 * prevent pipline stalls
|
|
6 * - made WRITE_SAMPLE use integer instead of
|
|
7 * (slower) floating point
|
|
8 * all kinds of x86 processors should benefit from these
|
|
9 * modifications.
|
|
10 *
|
|
11 * useful sources of information on optimizing x86 code include:
|
|
12 *
|
|
13 * Intel Architecture Optimization Manual
|
|
14 * http://www.intel.com/design/pentium/manuals/242816.htm
|
|
15 *
|
|
16 * Cyrix 6x86 Instruction Set Summary
|
|
17 * ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
|
|
18 *
|
|
19 * AMD-K5 Processor Software Development
|
|
20 * http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
|
|
21 *
|
|
22 * Stefan Bieschewski <stb@acm.org>
|
|
23 *
|
|
24 * $Id$
|
|
25 */
|
4246
|
26 #include "../mangle.h"
|
4147
|
27 #define real float /* ugly - but only way */
|
|
28
|
|
29 static long buffs[1088];
|
|
30 static long bo=1;
|
|
31
|
|
32 int synth_1to1_pent(real *bandPtr, int channel, short *samples)
|
|
33 {
|
|
34 real tmp[3];
|
|
35 register int retval;
|
|
36 __asm __volatile(
|
|
37 " movl %1,%%eax\n\t"/*bandPtr*/
|
|
38 " movl %3,%%esi\n\t"
|
|
39 " xorl %%edi,%%edi\n\t"
|
4246
|
40 " movl "MANGLE(bo)",%%ebp\n\t"
|
4147
|
41 " cmpl %%edi,%2\n\t"
|
|
42 " jne .L48\n\t"
|
|
43 " decl %%ebp\n\t"
|
|
44 " andl $15,%%ebp\n\t"
|
4246
|
45 " movl %%ebp,"MANGLE(bo)"\n\t"
|
|
46 " movl $"MANGLE(buffs)",%%ecx\n\t"
|
4147
|
47 " jmp .L49\n\t"
|
|
48 ".L48:\n\t"
|
|
49 " addl $2,%%esi\n\t"
|
4246
|
50 " movl $"MANGLE(buffs)"+2176,%%ecx\n\t"
|
4147
|
51 ".L49:\n\t"
|
|
52 " testl $1,%%ebp\n\t"
|
|
53 " je .L50\n\t"
|
|
54 " movl %%ecx,%%ebx\n\t"
|
|
55 " movl %%ebp,%4\n\t"
|
|
56 " pushl %%eax\n\t"
|
|
57 " movl 4+%4,%%edx\n\t"
|
|
58 " leal (%%ebx,%%edx,4),%%eax\n\t"
|
|
59 " pushl %%eax\n\t"
|
|
60 " movl 8+%4,%%eax\n\t"
|
|
61 " incl %%eax\n\t"
|
|
62 " andl $15,%%eax\n\t"
|
|
63 " leal 1088(,%%eax,4),%%eax\n\t"
|
|
64 " addl %%ebx,%%eax\n\t"
|
|
65 " jmp .L74\n\t"
|
|
66 ".L50:\n\t"
|
|
67 " leal 1088(%%ecx),%%ebx\n\t"
|
|
68 " leal 1(%%ebp),%%edx\n\t"
|
|
69 " movl %%edx,%4\n\t"
|
|
70 " pushl %%eax\n\t"
|
|
71 " leal 1092(%%ecx,%%ebp,4),%%eax\n\t"
|
|
72 " pushl %%eax\n\t"
|
|
73 " leal (%%ecx,%%ebp,4),%%eax\n\t"
|
|
74 ".L74:\n\t"
|
|
75 " pushl %%eax\n\t"
|
4246
|
76 " call "MANGLE(dct64)"\n\t"
|
4147
|
77 " addl $12,%%esp\n\t"
|
|
78 " movl %4,%%edx\n\t"
|
|
79 " leal 0(,%%edx,4),%%edx\n\t"
|
4246
|
80 " movl $"MANGLE(decwin)"+64,%%eax\n\t"
|
4147
|
81 " movl %%eax,%%ecx\n\t"
|
|
82 " subl %%edx,%%ecx\n\t"
|
|
83 " movl $16,%%ebp\n\t"
|
|
84 ".L55:\n\t"
|
|
85 " flds (%%ecx)\n\t"
|
|
86 " fmuls (%%ebx)\n\t"
|
|
87 " flds 4(%%ecx)\n\t"
|
|
88 " fmuls 4(%%ebx)\n\t"
|
|
89 " fxch %%st(1)\n\t"
|
|
90 " flds 8(%%ecx)\n\t"
|
|
91 " fmuls 8(%%ebx)\n\t"
|
|
92 " fxch %%st(2)\n\t"
|
|
93 " fsubrp %%st,%%st(1)\n\t"
|
|
94 " flds 12(%%ecx)\n\t"
|
|
95 " fmuls 12(%%ebx)\n\t"
|
|
96 " fxch %%st(2)\n\t"
|
|
97 " faddp %%st,%%st(1)\n\t"
|
|
98 " flds 16(%%ecx)\n\t"
|
|
99 " fmuls 16(%%ebx)\n\t"
|
|
100 " fxch %%st(2)\n\t"
|
|
101 " fsubrp %%st,%%st(1)\n\t"
|
|
102 " flds 20(%%ecx)\n\t"
|
|
103 " fmuls 20(%%ebx)\n\t"
|
|
104 " fxch %%st(2)\n\t"
|
|
105 " faddp %%st,%%st(1)\n\t"
|
|
106 " flds 24(%%ecx)\n\t"
|
|
107 " fmuls 24(%%ebx)\n\t"
|
|
108 " fxch %%st(2)\n\t"
|
|
109 " fsubrp %%st,%%st(1)\n\t"
|
|
110 " flds 28(%%ecx)\n\t"
|
|
111 " fmuls 28(%%ebx)\n\t"
|
|
112 " fxch %%st(2)\n\t"
|
|
113 " faddp %%st,%%st(1)\n\t"
|
|
114 " flds 32(%%ecx)\n\t"
|
|
115 " fmuls 32(%%ebx)\n\t"
|
|
116 " fxch %%st(2)\n\t"
|
|
117 " fsubrp %%st,%%st(1)\n\t"
|
|
118 " flds 36(%%ecx)\n\t"
|
|
119 " fmuls 36(%%ebx)\n\t"
|
|
120 " fxch %%st(2)\n\t"
|
|
121 " faddp %%st,%%st(1)\n\t"
|
|
122 " flds 40(%%ecx)\n\t"
|
|
123 " fmuls 40(%%ebx)\n\t"
|
|
124 " fxch %%st(2)\n\t"
|
|
125 " fsubrp %%st,%%st(1)\n\t"
|
|
126 " flds 44(%%ecx)\n\t"
|
|
127 " fmuls 44(%%ebx)\n\t"
|
|
128 " fxch %%st(2)\n\t"
|
|
129 " faddp %%st,%%st(1)\n\t"
|
|
130 " flds 48(%%ecx)\n\t"
|
|
131 " fmuls 48(%%ebx)\n\t"
|
|
132 " fxch %%st(2)\n\t"
|
|
133 " fsubrp %%st,%%st(1)\n\t"
|
|
134 " flds 52(%%ecx)\n\t"
|
|
135 " fmuls 52(%%ebx)\n\t"
|
|
136 " fxch %%st(2)\n\t"
|
|
137 " faddp %%st,%%st(1)\n\t"
|
|
138 " flds 56(%%ecx)\n\t"
|
|
139 " fmuls 56(%%ebx)\n\t"
|
|
140 " fxch %%st(2)\n\t"
|
|
141 " fsubrp %%st,%%st(1)\n\t"
|
|
142 " flds 60(%%ecx)\n\t"
|
|
143 " fmuls 60(%%ebx)\n\t"
|
|
144 " fxch %%st(2)\n\t"
|
|
145 " subl $4,%%esp\n\t"
|
|
146 " faddp %%st,%%st(1)\n\t"
|
|
147 " fxch %%st(1)\n\t"
|
|
148 " fsubrp %%st,%%st(1)\n\t"
|
|
149 " fistpl (%%esp)\n\t"
|
|
150 " popl %%eax\n\t"
|
|
151 " cmpl $32767,%%eax\n\t"
|
|
152 " jg 1f\n\t"
|
|
153 " cmpl $-32768,%%eax\n\t"
|
|
154 " jl 2f\n\t"
|
|
155 " movw %%ax,(%%esi)\n\t"
|
|
156 " jmp 4f\n\t"
|
|
157 "1: movw $32767,(%%esi)\n\t"
|
|
158 " jmp 3f\n\t"
|
|
159 "2: movw $-32768,(%%esi)\n\t"
|
|
160 "3: incl %%edi\n\t"
|
|
161 "4:\n\t"
|
|
162 ".L54:\n\t"
|
|
163 " addl $64,%%ebx\n\t"
|
|
164 " subl $-128,%%ecx\n\t"
|
|
165 " addl $4,%%esi\n\t"
|
|
166 " decl %%ebp\n\t"
|
|
167 " jnz .L55\n\t"
|
|
168 " flds (%%ecx)\n\t"
|
|
169 " fmuls (%%ebx)\n\t"
|
|
170 " flds 8(%%ecx)\n\t"
|
|
171 " fmuls 8(%%ebx)\n\t"
|
|
172 " flds 16(%%ecx)\n\t"
|
|
173 " fmuls 16(%%ebx)\n\t"
|
|
174 " fxch %%st(2)\n\t"
|
|
175 " faddp %%st,%%st(1)\n\t"
|
|
176 " flds 24(%%ecx)\n\t"
|
|
177 " fmuls 24(%%ebx)\n\t"
|
|
178 " fxch %%st(2)\n\t"
|
|
179 " faddp %%st,%%st(1)\n\t"
|
|
180 " flds 32(%%ecx)\n\t"
|
|
181 " fmuls 32(%%ebx)\n\t"
|
|
182 " fxch %%st(2)\n\t"
|
|
183 " faddp %%st,%%st(1)\n\t"
|
|
184 " flds 40(%%ecx)\n\t"
|
|
185 " fmuls 40(%%ebx)\n\t"
|
|
186 " fxch %%st(2)\n\t"
|
|
187 " faddp %%st,%%st(1)\n\t"
|
|
188 " flds 48(%%ecx)\n\t"
|
|
189 " fmuls 48(%%ebx)\n\t"
|
|
190 " fxch %%st(2)\n\t"
|
|
191 " faddp %%st,%%st(1)\n\t"
|
|
192 " flds 56(%%ecx)\n\t"
|
|
193 " fmuls 56(%%ebx)\n\t"
|
|
194 " fxch %%st(2)\n\t"
|
|
195 " subl $4,%%esp\n\t"
|
|
196 " faddp %%st,%%st(1)\n\t"
|
|
197 " fxch %%st(1)\n\t"
|
|
198 " faddp %%st,%%st(1)\n\t"
|
|
199 " fistpl (%%esp)\n\t"
|
|
200 " popl %%eax\n\t"
|
|
201 " cmpl $32767,%%eax\n\t"
|
|
202 " jg 1f\n\t"
|
|
203 " cmpl $-32768,%%eax\n\t"
|
|
204 " jl 2f\n\t"
|
|
205 " movw %%ax,(%%esi)\n\t"
|
|
206 " jmp 4f\n\t"
|
|
207 "1: movw $32767,(%%esi)\n\t"
|
|
208 " jmp 3f\n\t"
|
|
209 "2: movw $-32768,(%%esi)\n\t"
|
|
210 "3: incl %%edi\n\t"
|
|
211 "4:\n\t"
|
|
212 ".L62:\n\t"
|
|
213 " addl $-64,%%ebx\n\t"
|
|
214 " addl $4,%%esi\n\t"
|
|
215 " movl %4,%%edx\n\t"
|
|
216 " leal -128(%%ecx,%%edx,8),%%ecx\n\t"
|
|
217 " movl $15,%%ebp\n\t"
|
|
218 ".L68:\n\t"
|
|
219 " flds -4(%%ecx)\n\t"
|
|
220 " fchs\n\t"
|
|
221 " fmuls (%%ebx)\n\t"
|
|
222 " flds -8(%%ecx)\n\t"
|
|
223 " fmuls 4(%%ebx)\n\t"
|
|
224 " fxch %%st(1)\n\t"
|
|
225 " flds -12(%%ecx)\n\t"
|
|
226 " fmuls 8(%%ebx)\n\t"
|
|
227 " fxch %%st(2)\n\t"
|
|
228 " fsubrp %%st,%%st(1)\n\t"
|
|
229 " flds -16(%%ecx)\n\t"
|
|
230 " fmuls 12(%%ebx)\n\t"
|
|
231 " fxch %%st(2)\n\t"
|
|
232 " fsubrp %%st,%%st(1)\n\t"
|
|
233 " flds -20(%%ecx)\n\t"
|
|
234 " fmuls 16(%%ebx)\n\t"
|
|
235 " fxch %%st(2)\n\t"
|
|
236 " fsubrp %%st,%%st(1)\n\t"
|
|
237 " flds -24(%%ecx)\n\t"
|
|
238 " fmuls 20(%%ebx)\n\t"
|
|
239 " fxch %%st(2)\n\t"
|
|
240 " fsubrp %%st,%%st(1)\n\t"
|
|
241 " flds -28(%%ecx)\n\t"
|
|
242 " fmuls 24(%%ebx)\n\t"
|
|
243 " fxch %%st(2)\n\t"
|
|
244 " fsubrp %%st,%%st(1)\n\t"
|
|
245 " flds -32(%%ecx)\n\t"
|
|
246 " fmuls 28(%%ebx)\n\t"
|
|
247 " fxch %%st(2)\n\t"
|
|
248 " fsubrp %%st,%%st(1)\n\t"
|
|
249 " flds -36(%%ecx)\n\t"
|
|
250 " fmuls 32(%%ebx)\n\t"
|
|
251 " fxch %%st(2)\n\t"
|
|
252 " fsubrp %%st,%%st(1)\n\t"
|
|
253 " flds -40(%%ecx)\n\t"
|
|
254 " fmuls 36(%%ebx)\n\t"
|
|
255 " fxch %%st(2)\n\t"
|
|
256 " fsubrp %%st,%%st(1)\n\t"
|
|
257 " flds -44(%%ecx)\n\t"
|
|
258 " fmuls 40(%%ebx)\n\t"
|
|
259 " fxch %%st(2)\n\t"
|
|
260 " fsubrp %%st,%%st(1)\n\t"
|
|
261 " flds -48(%%ecx)\n\t"
|
|
262 " fmuls 44(%%ebx)\n\t"
|
|
263 " fxch %%st(2)\n\t"
|
|
264 " fsubrp %%st,%%st(1)\n\t"
|
|
265 " flds -52(%%ecx)\n\t"
|
|
266 " fmuls 48(%%ebx)\n\t"
|
|
267 " fxch %%st(2)\n\t"
|
|
268 " fsubrp %%st,%%st(1)\n\t"
|
|
269 " flds -56(%%ecx)\n\t"
|
|
270 " fmuls 52(%%ebx)\n\t"
|
|
271 " fxch %%st(2)\n\t"
|
|
272 " fsubrp %%st,%%st(1)\n\t"
|
|
273 " flds -60(%%ecx)\n\t"
|
|
274 " fmuls 56(%%ebx)\n\t"
|
|
275 " fxch %%st(2)\n\t"
|
|
276 " fsubrp %%st,%%st(1)\n\t"
|
|
277 " flds (%%ecx)\n\t"
|
|
278 " fmuls 60(%%ebx)\n\t"
|
|
279 " fxch %%st(2)\n\t"
|
|
280 " subl $4,%%esp\n\t"
|
|
281 " fsubrp %%st,%%st(1)\n\t"
|
|
282 " fxch %%st(1)\n\t"
|
|
283 " fsubrp %%st,%%st(1)\n\t"
|
|
284 " fistpl (%%esp)\n\t"
|
|
285 " popl %%eax\n\t"
|
|
286 " cmpl $32767,%%eax\n\t"
|
|
287 " jg 1f\n\t"
|
|
288 " cmpl $-32768,%%eax\n\t"
|
|
289 " jl 2f\n\t"
|
|
290 " movw %%ax,(%%esi)\n\t"
|
|
291 " jmp 4f\n\t"
|
|
292 "1: movw $32767,(%%esi)\n\t"
|
|
293 " jmp 3f\n\t"
|
|
294 "2: movw $-32768,(%%esi)\n\t"
|
|
295 "3: incl %%edi\n\t"
|
|
296 "4:\n\t"
|
|
297 ".L67:\n\t"
|
|
298 " addl $-64,%%ebx\n\t"
|
|
299 " addl $-128,%%ecx\n\t"
|
|
300 " addl $4,%%esi\n\t"
|
|
301 " decl %%ebp\n\t"
|
|
302 " jnz .L68\n\t"
|
|
303 " movl %%edi,%%eax\n\t"
|
|
304 :"=a"(retval)
|
|
305 :"m"(bandPtr),"m"(channel),"m"(samples),"m"(tmp[0])
|
|
306 :"memory","%ebp","%edi","%esi","%ebx");
|
|
307 return retval;
|
|
308 }
|