Mercurial > audlegacy
annotate Plugins/Input/mpg123/decode_i586.s @ 130:7dc049844b73 trunk
[svn] Fix libmikmod test.
author | nenolod |
---|---|
date | Wed, 09 Nov 2005 19:57:44 -0800 |
parents | 05d824e30afd |
children |
rev | line source |
---|---|
127
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
1 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
2 # synth_1to1 works the same way as the c version of this |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
3 # file. only two types of changes have been made: |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
4 # - reordered floating point instructions to |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
5 # prevent pipline stalls |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
6 # - made WRITE_SAMPLE use integer instead of |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
7 # (slower) floating point |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
8 # all kinds of x86 processors should benefit from these |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
9 # modifications. |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
10 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
11 # useful sources of information on optimizing x86 code include: |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
12 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
13 # Intel Architecture Optimization Manual |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
14 # http:#/www.intel.com/design/pentium/manuals/242816.htm |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
15 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
16 # Cyrix 6x86 Instruction Set Summary |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
17 # ftp:#/ftp.cyrix.com/6x86/6x-dbch6.pdf |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
18 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
19 # AMD-K5 Processor Software Development |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
20 # http:#/www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
21 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
22 # Stefan Bieschewski <stb@acm.org> |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
23 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
24 # You can use this part under GPL. |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
25 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
26 # $Id: decode_i586.s,v 1.3 2000/10/25 11:05:23 hippm Exp $ |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
27 # |
61 | 28 .bss |
29 .comm buffs,4352,4 | |
30 .data | |
31 .align 4 | |
32 bo: | |
33 .long 1 | |
34 .section .rodata | |
35 .align 8 | |
36 .LC0: | |
37 .long 0x0,0x40dfffc0 | |
38 .align 8 | |
39 .LC1: | |
40 .long 0x0,0xc0e00000 | |
41 .align 8 | |
42 .text | |
43 .globl mpg123_synth_1to1_pent | |
44 mpg123_synth_1to1_pent: | |
45 subl $12,%esp | |
46 pushl %ebp | |
47 pushl %edi | |
48 pushl %esi | |
49 pushl %ebx | |
50 movl 32(%esp),%eax | |
51 movl 40(%esp),%esi | |
52 xorl %edi,%edi | |
53 movl bo,%ebp | |
54 cmpl %edi,36(%esp) | |
55 jne .L48 | |
56 decl %ebp | |
57 andl $15,%ebp | |
58 movl %ebp,bo | |
59 movl $buffs,%ecx | |
60 jmp .L49 | |
61 .L48: | |
62 addl $2,%esi | |
63 movl $buffs+2176,%ecx | |
64 .L49: | |
65 testl $1,%ebp | |
66 je .L50 | |
67 movl %ecx,%ebx | |
68 movl %ebp,16(%esp) | |
69 pushl %eax | |
70 movl 20(%esp),%edx | |
71 leal (%ebx,%edx,4),%eax | |
72 pushl %eax | |
73 movl 24(%esp),%eax | |
74 incl %eax | |
75 andl $15,%eax | |
76 leal 1088(,%eax,4),%eax | |
77 addl %ebx,%eax | |
78 jmp .L74 | |
79 .L50: | |
80 leal 1088(%ecx),%ebx | |
81 leal 1(%ebp),%edx | |
82 movl %edx,16(%esp) | |
83 pushl %eax | |
84 leal 1092(%ecx,%ebp,4),%eax | |
85 pushl %eax | |
86 leal (%ecx,%ebp,4),%eax | |
87 .L74: | |
88 pushl %eax | |
89 call mpg123_dct64 | |
90 addl $12,%esp | |
91 movl 16(%esp),%edx | |
92 leal 0(,%edx,4),%edx | |
93 movl $mpg123_decwin+64,%eax | |
94 movl %eax,%ecx | |
95 subl %edx,%ecx | |
96 movl $16,%ebp | |
97 .L55: | |
98 flds (%ecx) | |
99 fmuls (%ebx) | |
100 flds 4(%ecx) | |
101 fmuls 4(%ebx) | |
102 fxch %st(1) | |
103 flds 8(%ecx) | |
104 fmuls 8(%ebx) | |
105 fxch %st(2) | |
106 fsubrp %st,%st(1) | |
107 flds 12(%ecx) | |
108 fmuls 12(%ebx) | |
109 fxch %st(2) | |
110 faddp %st,%st(1) | |
111 flds 16(%ecx) | |
112 fmuls 16(%ebx) | |
113 fxch %st(2) | |
114 fsubrp %st,%st(1) | |
115 flds 20(%ecx) | |
116 fmuls 20(%ebx) | |
117 fxch %st(2) | |
118 faddp %st,%st(1) | |
119 flds 24(%ecx) | |
120 fmuls 24(%ebx) | |
121 fxch %st(2) | |
122 fsubrp %st,%st(1) | |
123 flds 28(%ecx) | |
124 fmuls 28(%ebx) | |
125 fxch %st(2) | |
126 faddp %st,%st(1) | |
127 flds 32(%ecx) | |
128 fmuls 32(%ebx) | |
129 fxch %st(2) | |
130 fsubrp %st,%st(1) | |
131 flds 36(%ecx) | |
132 fmuls 36(%ebx) | |
133 fxch %st(2) | |
134 faddp %st,%st(1) | |
135 flds 40(%ecx) | |
136 fmuls 40(%ebx) | |
137 fxch %st(2) | |
138 fsubrp %st,%st(1) | |
139 flds 44(%ecx) | |
140 fmuls 44(%ebx) | |
141 fxch %st(2) | |
142 faddp %st,%st(1) | |
143 flds 48(%ecx) | |
144 fmuls 48(%ebx) | |
145 fxch %st(2) | |
146 fsubrp %st,%st(1) | |
147 flds 52(%ecx) | |
148 fmuls 52(%ebx) | |
149 fxch %st(2) | |
150 faddp %st,%st(1) | |
151 flds 56(%ecx) | |
152 fmuls 56(%ebx) | |
153 fxch %st(2) | |
154 fsubrp %st,%st(1) | |
155 flds 60(%ecx) | |
156 fmuls 60(%ebx) | |
157 fxch %st(2) | |
158 subl $4,%esp | |
159 faddp %st,%st(1) | |
160 fxch %st(1) | |
161 fsubrp %st,%st(1) | |
162 fistpl (%esp) | |
163 popl %eax | |
164 cmpl $32767,%eax | |
165 jg 1f | |
166 cmpl $-32768,%eax | |
167 jl 2f | |
168 movw %ax,(%esi) | |
169 jmp 4f | |
170 1: movw $32767,(%esi) | |
171 jmp 3f | |
172 2: movw $-32768,(%esi) | |
173 3: incl %edi | |
174 4: | |
175 .L54: | |
176 addl $64,%ebx | |
177 subl $-128,%ecx | |
178 addl $4,%esi | |
179 decl %ebp | |
180 jnz .L55 | |
181 flds (%ecx) | |
182 fmuls (%ebx) | |
183 flds 8(%ecx) | |
184 fmuls 8(%ebx) | |
185 flds 16(%ecx) | |
186 fmuls 16(%ebx) | |
187 fxch %st(2) | |
188 faddp %st,%st(1) | |
189 flds 24(%ecx) | |
190 fmuls 24(%ebx) | |
191 fxch %st(2) | |
192 faddp %st,%st(1) | |
193 flds 32(%ecx) | |
194 fmuls 32(%ebx) | |
195 fxch %st(2) | |
196 faddp %st,%st(1) | |
197 flds 40(%ecx) | |
198 fmuls 40(%ebx) | |
199 fxch %st(2) | |
200 faddp %st,%st(1) | |
201 flds 48(%ecx) | |
202 fmuls 48(%ebx) | |
203 fxch %st(2) | |
204 faddp %st,%st(1) | |
205 flds 56(%ecx) | |
206 fmuls 56(%ebx) | |
207 fxch %st(2) | |
208 subl $4,%esp | |
209 faddp %st,%st(1) | |
210 fxch %st(1) | |
211 faddp %st,%st(1) | |
212 fistpl (%esp) | |
213 popl %eax | |
214 cmpl $32767,%eax | |
215 jg 1f | |
216 cmpl $-32768,%eax | |
217 jl 2f | |
218 movw %ax,(%esi) | |
219 jmp 4f | |
220 1: movw $32767,(%esi) | |
221 jmp 3f | |
222 2: movw $-32768,(%esi) | |
223 3: incl %edi | |
224 4: | |
225 .L62: | |
226 addl $-64,%ebx | |
227 addl $4,%esi | |
228 movl 16(%esp),%edx | |
229 leal -128(%ecx,%edx,8),%ecx | |
230 movl $15,%ebp | |
231 .L68: | |
232 flds -4(%ecx) | |
233 fchs | |
234 fmuls (%ebx) | |
235 flds -8(%ecx) | |
236 fmuls 4(%ebx) | |
237 fxch %st(1) | |
238 flds -12(%ecx) | |
239 fmuls 8(%ebx) | |
240 fxch %st(2) | |
241 fsubrp %st,%st(1) | |
242 flds -16(%ecx) | |
243 fmuls 12(%ebx) | |
244 fxch %st(2) | |
245 fsubrp %st,%st(1) | |
246 flds -20(%ecx) | |
247 fmuls 16(%ebx) | |
248 fxch %st(2) | |
249 fsubrp %st,%st(1) | |
250 flds -24(%ecx) | |
251 fmuls 20(%ebx) | |
252 fxch %st(2) | |
253 fsubrp %st,%st(1) | |
254 flds -28(%ecx) | |
255 fmuls 24(%ebx) | |
256 fxch %st(2) | |
257 fsubrp %st,%st(1) | |
258 flds -32(%ecx) | |
259 fmuls 28(%ebx) | |
260 fxch %st(2) | |
261 fsubrp %st,%st(1) | |
262 flds -36(%ecx) | |
263 fmuls 32(%ebx) | |
264 fxch %st(2) | |
265 fsubrp %st,%st(1) | |
266 flds -40(%ecx) | |
267 fmuls 36(%ebx) | |
268 fxch %st(2) | |
269 fsubrp %st,%st(1) | |
270 flds -44(%ecx) | |
271 fmuls 40(%ebx) | |
272 fxch %st(2) | |
273 fsubrp %st,%st(1) | |
274 flds -48(%ecx) | |
275 fmuls 44(%ebx) | |
276 fxch %st(2) | |
277 fsubrp %st,%st(1) | |
278 flds -52(%ecx) | |
279 fmuls 48(%ebx) | |
280 fxch %st(2) | |
281 fsubrp %st,%st(1) | |
282 flds -56(%ecx) | |
283 fmuls 52(%ebx) | |
284 fxch %st(2) | |
285 fsubrp %st,%st(1) | |
286 flds -60(%ecx) | |
287 fmuls 56(%ebx) | |
288 fxch %st(2) | |
289 fsubrp %st,%st(1) | |
290 flds (%ecx) | |
291 fmuls 60(%ebx) | |
292 fxch %st(2) | |
293 subl $4,%esp | |
294 fsubrp %st,%st(1) | |
295 fxch %st(1) | |
296 fsubrp %st,%st(1) | |
297 fistpl (%esp) | |
298 popl %eax | |
299 cmpl $32767,%eax | |
300 jg 1f | |
301 cmpl $-32768,%eax | |
302 jl 2f | |
303 movw %ax,(%esi) | |
304 jmp 4f | |
305 1: movw $32767,(%esi) | |
306 jmp 3f | |
307 2: movw $-32768,(%esi) | |
308 3: incl %edi | |
309 4: | |
310 .L67: | |
311 addl $-64,%ebx | |
312 addl $-128,%ecx | |
313 addl $4,%esi | |
314 decl %ebp | |
315 jnz .L68 | |
316 movl %edi,%eax | |
317 popl %ebx | |
318 popl %esi | |
319 popl %edi | |
320 popl %ebp | |
321 addl $12,%esp | |
322 ret | |
323 |