Mercurial > audlegacy
annotate Plugins/Input/mpg123/decode_3dnow.s @ 130:7dc049844b73 trunk
[svn] Fix libmikmod test.
author | nenolod |
---|---|
date | Wed, 09 Nov 2005 19:57:44 -0800 |
parents | 05d824e30afd |
children |
rev | line source |
---|---|
127
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
1 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
2 # decode_3dnow.s - 3DNow! optimized synth_1to1() |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
3 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
4 # This code based 'decode_3dnow.s' by Syuuhei Kashiyama |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
5 # <squash@mb.kcom.ne.jp>,only two types of changes have been made: |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
6 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
7 # - remove PREFETCH instruction for speedup |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
8 # - change function name for support 3DNow! automatic detect |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
9 # - femms moved to before 'call dct64_3dnow' |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
10 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
11 # You can find Kashiyama's original 3dnow! support patch |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
12 # (for mpg123-0.59o) at |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
13 # http:#/user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese). |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
14 # |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
15 # by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
16 # <kim@comtec.co.jp> - after 1.Apr.1999 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
17 # |
61 | 18 |
127
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
19 ##/ |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
20 ##/ Replacement of synth_1to1() with AMD's 3DNow! SIMD operations support |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
21 ##/ |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
22 ##/ Syuuhei Kashiyama <squash@mb.kcom.ne.jp> |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
23 ##/ |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
24 ##/ The author of this program disclaim whole expressed or implied |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
25 ##/ warranties with regard to this program, and in no event shall the |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
26 ##/ author of this program liable to whatever resulted from the use of |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
27 ##/ this program. Use it at your own risk. |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
28 ##/ |
61 | 29 |
30 .local buffs.40 | |
31 .comm buffs.40,4352,32 | |
32 .data | |
33 .align 4 | |
34 .type bo.42,@object | |
35 .size bo.42,4 | |
36 bo.42: | |
37 .long 1 | |
38 .text | |
39 .globl mpg123_synth_1to1_3dnow | |
40 .type mpg123_synth_1to1_3dnow,@function | |
41 mpg123_synth_1to1_3dnow: | |
42 subl $24,%esp | |
43 pushl %ebp | |
44 pushl %edi | |
45 xorl %ebp,%ebp | |
46 pushl %esi | |
47 pushl %ebx | |
48 movl 56(%esp),%esi | |
49 movl 52(%esp),%edi | |
50 movl 0(%esi),%esi | |
51 movl 48(%esp),%ebx | |
52 addl %edi,%esi | |
53 movl %esi,16(%esp) | |
54 | |
55 femms | |
56 | |
127
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
57 .L25: |
61 | 58 testl %ebx,%ebx |
59 jne .L26 | |
60 decl bo.42 | |
61 movl $buffs.40,%ecx | |
62 andl $15,bo.42 | |
63 jmp .L27 | |
64 .L26: | |
65 addl $2,16(%esp) | |
66 movl $buffs.40+2176,%ecx | |
67 .L27: | |
68 movl bo.42,%edx | |
69 testb $1,%dl | |
70 je .L28 | |
71 movl %edx,36(%esp) | |
72 movl %ecx,%ebx | |
73 movl 44(%esp),%esi | |
74 movl %edx,%edi | |
75 pushl %esi | |
76 sall $2,%edi | |
77 movl %ebx,%eax | |
78 movl %edi,24(%esp) | |
79 addl %edi,%eax | |
80 pushl %eax | |
81 movl %edx,%eax | |
82 incl %eax | |
83 andl $15,%eax | |
84 leal 1088(,%eax,4),%eax | |
85 addl %ebx,%eax | |
86 pushl %eax | |
87 call mpg123_dct64_3dnow | |
88 addl $12,%esp | |
89 jmp .L29 | |
90 .L28: | |
91 leal 1(%edx),%esi | |
92 movl 44(%esp),%edi | |
93 movl %esi,36(%esp) | |
94 leal 1092(%ecx,%edx,4),%eax | |
95 pushl %edi | |
96 leal 1088(%ecx),%ebx | |
97 pushl %eax | |
98 sall $2,%esi | |
99 leal (%ecx,%edx,4),%eax | |
100 pushl %eax | |
101 call mpg123_dct64_3dnow | |
102 addl $12,%esp | |
103 movl %esi,20(%esp) | |
104 .L29: | |
105 movl $mpg123_decwin+64,%edx | |
106 movl $16,%ecx | |
107 subl 20(%esp),%edx | |
108 movl 16(%esp),%edi | |
109 | |
110 movq (%edx),%mm0 | |
111 movq (%ebx),%mm1 | |
112 .align 32 | |
113 .L33: | |
114 movq 8(%edx),%mm3 | |
115 pfmul %mm1,%mm0 | |
116 movq 8(%ebx),%mm4 | |
117 movq 16(%edx),%mm5 | |
118 pfmul %mm4,%mm3 | |
119 movq 16(%ebx),%mm6 | |
120 pfadd %mm3,%mm0 | |
121 movq 24(%edx),%mm1 | |
122 pfmul %mm6,%mm5 | |
123 movq 24(%ebx),%mm2 | |
124 pfadd %mm5,%mm0 | |
125 movq 32(%edx),%mm3 | |
126 pfmul %mm2,%mm1 | |
127 movq 32(%ebx),%mm4 | |
128 pfadd %mm1,%mm0 | |
129 movq 40(%edx),%mm5 | |
130 pfmul %mm4,%mm3 | |
131 movq 40(%ebx),%mm6 | |
132 pfadd %mm3,%mm0 | |
133 movq 48(%edx),%mm1 | |
134 pfmul %mm6,%mm5 | |
135 movq 48(%ebx),%mm2 | |
136 pfadd %mm0,%mm5 | |
137 movq 56(%edx),%mm3 | |
138 pfmul %mm1,%mm2 | |
139 movq 56(%ebx),%mm4 | |
140 pfadd %mm5,%mm2 | |
141 addl $64,%ebx | |
142 subl $-128,%edx | |
143 movq (%edx),%mm0 | |
144 pfmul %mm4,%mm3 | |
145 movq (%ebx),%mm1 | |
146 pfadd %mm3,%mm2 | |
147 movq %mm2,%mm3 | |
148 psrlq $32,%mm3 | |
149 pfsub %mm3,%mm2 | |
150 incl %ebp | |
151 pf2id %mm2,%mm2 | |
152 packssdw %mm2,%mm2 | |
153 movd %mm2,%eax | |
154 movw %ax,0(%edi) | |
155 addl $4,%edi | |
156 decl %ecx | |
157 jnz .L33 | |
158 | |
159 movd (%ebx),%mm0 | |
160 movd (%edx),%mm1 | |
161 punpckldq 8(%ebx),%mm0 | |
162 punpckldq 8(%edx),%mm1 | |
163 movd 16(%ebx),%mm3 | |
164 movd 16(%edx),%mm4 | |
165 pfmul %mm1,%mm0 | |
166 punpckldq 24(%ebx),%mm3 | |
167 punpckldq 24(%edx),%mm4 | |
168 movd 32(%ebx),%mm5 | |
169 movd 32(%edx),%mm6 | |
170 pfmul %mm4,%mm3 | |
171 punpckldq 40(%ebx),%mm5 | |
172 punpckldq 40(%edx),%mm6 | |
173 pfadd %mm3,%mm0 | |
174 movd 48(%ebx),%mm1 | |
175 movd 48(%edx),%mm2 | |
176 pfmul %mm6,%mm5 | |
177 punpckldq 56(%ebx),%mm1 | |
178 punpckldq 56(%edx),%mm2 | |
179 pfadd %mm5,%mm0 | |
180 pfmul %mm2,%mm1 | |
181 pfadd %mm1,%mm0 | |
182 pfacc %mm1,%mm0 | |
183 pf2id %mm0,%mm0 | |
184 packssdw %mm0,%mm0 | |
185 movd %mm0,%eax | |
186 movw %ax,0(%edi) | |
187 incl %ebp | |
188 movl 36(%esp),%esi | |
189 addl $-64,%ebx | |
190 movl $15,%ebp | |
191 addl $4,%edi | |
192 leal -128(%edx,%esi,8),%edx | |
193 | |
194 movl $15,%ecx | |
195 movd (%ebx),%mm0 | |
196 movd -4(%edx),%mm1 | |
197 punpckldq 4(%ebx),%mm0 | |
198 punpckldq -8(%edx),%mm1 | |
199 .align 32 | |
200 .L46: | |
201 movd 8(%ebx),%mm3 | |
202 movd -12(%edx),%mm4 | |
203 pfmul %mm1,%mm0 | |
204 punpckldq 12(%ebx),%mm3 | |
205 punpckldq -16(%edx),%mm4 | |
206 movd 16(%ebx),%mm5 | |
207 movd -20(%edx),%mm6 | |
208 pfmul %mm4,%mm3 | |
209 punpckldq 20(%ebx),%mm5 | |
210 punpckldq -24(%edx),%mm6 | |
211 pfadd %mm3,%mm0 | |
127
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
212 movd 24(%ebx),%mm1 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
213 movd -28(%edx),%mm2 |
61 | 214 pfmul %mm6,%mm5 |
215 punpckldq 28(%ebx),%mm1 | |
216 punpckldq -32(%edx),%mm2 | |
217 pfadd %mm5,%mm0 | |
127
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
218 movd 32(%ebx),%mm3 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
219 movd -36(%edx),%mm4 |
61 | 220 pfmul %mm2,%mm1 |
221 punpckldq 36(%ebx),%mm3 | |
222 punpckldq -40(%edx),%mm4 | |
127
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
223 pfadd %mm1,%mm0 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
224 movd 40(%ebx),%mm5 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
225 movd -44(%edx),%mm6 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
226 pfmul %mm4,%mm3 |
61 | 227 punpckldq 44(%ebx),%mm5 |
228 punpckldq -48(%edx),%mm6 | |
127
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
229 pfadd %mm3,%mm0 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
230 movd 48(%ebx),%mm1 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
231 movd -52(%edx),%mm2 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
232 pfmul %mm6,%mm5 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
233 punpckldq 52(%ebx),%mm1 |
61 | 234 punpckldq -56(%edx),%mm2 |
235 pfadd %mm0,%mm5 | |
127
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
236 movd 56(%ebx),%mm3 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
237 movd -60(%edx),%mm4 |
61 | 238 pfmul %mm2,%mm1 |
239 punpckldq 60(%ebx),%mm3 | |
240 punpckldq (%edx),%mm4 | |
127
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
241 pfadd %mm1,%mm5 |
05d824e30afd
[svn] Synchronize mpg123 code with mpg123-0.59s.mc3. This brings us superior accuracy to libMAD, apparently.
nenolod
parents:
61
diff
changeset
|
242 addl $-128,%edx |
61 | 243 addl $-64,%ebx |
244 movd (%ebx),%mm0 | |
245 movd -4(%edx),%mm1 | |
246 pfmul %mm4,%mm3 | |
247 punpckldq 4(%ebx),%mm0 | |
248 punpckldq -8(%edx),%mm1 | |
249 pfadd %mm5,%mm3 | |
250 pfacc %mm3,%mm3 | |
251 incl %ebp | |
252 pf2id %mm3,%mm3 | |
253 movd %mm3,%eax | |
254 negl %eax | |
255 movd %eax,%mm3 | |
256 packssdw %mm3,%mm3 | |
257 movd %mm3,%eax | |
258 movw %ax,(%edi) | |
259 addl $4,%edi | |
260 decl %ecx | |
261 jnz .L46 | |
262 | |
263 femms | |
264 movl 56(%esp),%esi | |
265 movl %ebp,%eax | |
266 subl $-128,0(%esi) | |
267 popl %ebx | |
268 popl %esi | |
269 popl %edi | |
270 popl %ebp | |
271 addl $24,%esp | |
272 ret |