Mercurial > mplayer.hg
annotate mp3lib/dct64_k7.c @ 26981:f3f553c03044
No need to set LC_ALL=C for individual shell commands,
it is already set from config.mak.
author | diego |
---|---|
date | Sat, 07 Jun 2008 12:01:29 +0000 |
parents | 1b739c2dc613 |
children | 08d18fe9da52 |
rev | line source |
---|---|
4148 | 1 /* |
2 * This code was taken from http://www.mpg123.org | |
3 * See ChangeLog of mpg123-0.59s-pre.1 for detail | |
4 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> | |
5 * Partial 3dnowex-DSP! optimization by Nick Kurshev | |
6 * | |
7 * TODO: optimize scalar 3dnow! code | |
8 * Warning: Phases 7 & 8 are not tested | |
9 */ | |
10 #define real float /* ugly - but only way */ | |
11 | |
16989 | 12 #include "config.h" |
13 #include "mangle.h" | |
5291 | 14 |
13918 | 15 static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL; |
12292 | 16 static float attribute_used plus_1f = 1.0; |
4148 | 17 |
23441 | 18 void dct64_MMX_3dnowex(short *a,short *b,real *c) |
4148 | 19 { |
20 char tmp[256]; | |
21 __asm __volatile( | |
22 " movl %2,%%eax\n\t" | |
23 | |
24 " leal 128+%3,%%edx\n\t" | |
25 " movl %0,%%esi\n\t" | |
26 " movl %1,%%edi\n\t" | |
5291 | 27 " movl $"MANGLE(costab_mmx)",%%ebx\n\t" |
4148 | 28 " leal %3,%%ecx\n\t" |
29 | |
30 /* Phase 1*/ | |
31 " movq (%%eax), %%mm0\n\t" | |
32 " movq 8(%%eax), %%mm4\n\t" | |
33 " movq %%mm0, %%mm3\n\t" | |
34 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
35 " pswapd 120(%%eax), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
36 " pswapd 112(%%eax), %%mm5\n\t" |
4148 | 37 " pfadd %%mm1, %%mm0\n\t" |
38 " pfadd %%mm5, %%mm4\n\t" | |
39 " movq %%mm0, (%%edx)\n\t" | |
40 " movq %%mm4, 8(%%edx)\n\t" | |
41 " pfsub %%mm1, %%mm3\n\t" | |
42 " pfsub %%mm5, %%mm7\n\t" | |
43 " pfmul (%%ebx), %%mm3\n\t" | |
44 " pfmul 8(%%ebx), %%mm7\n\t" | |
45 " pswapd %%mm3, %%mm3\n\t" | |
46 " pswapd %%mm7, %%mm7\n\t" | |
47 " movq %%mm3, 120(%%edx)\n\t" | |
48 " movq %%mm7, 112(%%edx)\n\t" | |
49 | |
50 " movq 16(%%eax), %%mm0\n\t" | |
51 " movq 24(%%eax), %%mm4\n\t" | |
52 " movq %%mm0, %%mm3\n\t" | |
53 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
54 " pswapd 104(%%eax), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
55 " pswapd 96(%%eax), %%mm5\n\t" |
4148 | 56 " pfadd %%mm1, %%mm0\n\t" |
57 " pfadd %%mm5, %%mm4\n\t" | |
58 " movq %%mm0, 16(%%edx)\n\t" | |
59 " movq %%mm4, 24(%%edx)\n\t" | |
60 " pfsub %%mm1, %%mm3\n\t" | |
61 " pfsub %%mm5, %%mm7\n\t" | |
62 " pfmul 16(%%ebx), %%mm3\n\t" | |
63 " pfmul 24(%%ebx), %%mm7\n\t" | |
64 " pswapd %%mm3, %%mm3\n\t" | |
65 " pswapd %%mm7, %%mm7\n\t" | |
66 " movq %%mm3, 104(%%edx)\n\t" | |
67 " movq %%mm7, 96(%%edx)\n\t" | |
68 | |
69 " movq 32(%%eax), %%mm0\n\t" | |
70 " movq 40(%%eax), %%mm4\n\t" | |
71 " movq %%mm0, %%mm3\n\t" | |
72 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
73 " pswapd 88(%%eax), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
74 " pswapd 80(%%eax), %%mm5\n\t" |
4148 | 75 " pfadd %%mm1, %%mm0\n\t" |
76 " pfadd %%mm5, %%mm4\n\t" | |
77 " movq %%mm0, 32(%%edx)\n\t" | |
78 " movq %%mm4, 40(%%edx)\n\t" | |
79 " pfsub %%mm1, %%mm3\n\t" | |
80 " pfsub %%mm5, %%mm7\n\t" | |
81 " pfmul 32(%%ebx), %%mm3\n\t" | |
82 " pfmul 40(%%ebx), %%mm7\n\t" | |
83 " pswapd %%mm3, %%mm3\n\t" | |
84 " pswapd %%mm7, %%mm7\n\t" | |
85 " movq %%mm3, 88(%%edx)\n\t" | |
86 " movq %%mm7, 80(%%edx)\n\t" | |
87 | |
88 " movq 48(%%eax), %%mm0\n\t" | |
89 " movq 56(%%eax), %%mm4\n\t" | |
90 " movq %%mm0, %%mm3\n\t" | |
91 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
92 " pswapd 72(%%eax), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
93 " pswapd 64(%%eax), %%mm5\n\t" |
4148 | 94 " pfadd %%mm1, %%mm0\n\t" |
95 " pfadd %%mm5, %%mm4\n\t" | |
96 " movq %%mm0, 48(%%edx)\n\t" | |
97 " movq %%mm4, 56(%%edx)\n\t" | |
98 " pfsub %%mm1, %%mm3\n\t" | |
99 " pfsub %%mm5, %%mm7\n\t" | |
100 " pfmul 48(%%ebx), %%mm3\n\t" | |
101 " pfmul 56(%%ebx), %%mm7\n\t" | |
102 " pswapd %%mm3, %%mm3\n\t" | |
103 " pswapd %%mm7, %%mm7\n\t" | |
104 " movq %%mm3, 72(%%edx)\n\t" | |
105 " movq %%mm7, 64(%%edx)\n\t" | |
106 | |
107 /* Phase 2*/ | |
108 | |
109 " movq (%%edx), %%mm0\n\t" | |
110 " movq 8(%%edx), %%mm4\n\t" | |
111 " movq %%mm0, %%mm3\n\t" | |
112 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
113 " pswapd 56(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
114 " pswapd 48(%%edx), %%mm5\n\t" |
4148 | 115 " pfadd %%mm1, %%mm0\n\t" |
116 " pfadd %%mm5, %%mm4\n\t" | |
117 " movq %%mm0, (%%ecx)\n\t" | |
118 " movq %%mm4, 8(%%ecx)\n\t" | |
119 " pfsub %%mm1, %%mm3\n\t" | |
120 " pfsub %%mm5, %%mm7\n\t" | |
121 " pfmul 64(%%ebx), %%mm3\n\t" | |
122 " pfmul 72(%%ebx), %%mm7\n\t" | |
123 " pswapd %%mm3, %%mm3\n\t" | |
124 " pswapd %%mm7, %%mm7\n\t" | |
125 " movq %%mm3, 56(%%ecx)\n\t" | |
126 " movq %%mm7, 48(%%ecx)\n\t" | |
127 | |
128 " movq 16(%%edx), %%mm0\n\t" | |
129 " movq 24(%%edx), %%mm4\n\t" | |
130 " movq %%mm0, %%mm3\n\t" | |
131 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
132 " pswapd 40(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
133 " pswapd 32(%%edx), %%mm5\n\t" |
4148 | 134 " pfadd %%mm1, %%mm0\n\t" |
135 " pfadd %%mm5, %%mm4\n\t" | |
136 " movq %%mm0, 16(%%ecx)\n\t" | |
137 " movq %%mm4, 24(%%ecx)\n\t" | |
138 " pfsub %%mm1, %%mm3\n\t" | |
139 " pfsub %%mm5, %%mm7\n\t" | |
140 " pfmul 80(%%ebx), %%mm3\n\t" | |
141 " pfmul 88(%%ebx), %%mm7\n\t" | |
142 " pswapd %%mm3, %%mm3\n\t" | |
143 " pswapd %%mm7, %%mm7\n\t" | |
144 " movq %%mm3, 40(%%ecx)\n\t" | |
145 " movq %%mm7, 32(%%ecx)\n\t" | |
146 | |
147 /* Phase 3*/ | |
148 | |
149 " movq 64(%%edx), %%mm0\n\t" | |
150 " movq 72(%%edx), %%mm4\n\t" | |
151 " movq %%mm0, %%mm3\n\t" | |
152 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
153 " pswapd 120(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
154 " pswapd 112(%%edx), %%mm5\n\t" |
4148 | 155 " pfadd %%mm1, %%mm0\n\t" |
156 " pfadd %%mm5, %%mm4\n\t" | |
157 " movq %%mm0, 64(%%ecx)\n\t" | |
158 " movq %%mm4, 72(%%ecx)\n\t" | |
159 " pfsubr %%mm1, %%mm3\n\t" | |
160 " pfsubr %%mm5, %%mm7\n\t" | |
161 " pfmul 64(%%ebx), %%mm3\n\t" | |
162 " pfmul 72(%%ebx), %%mm7\n\t" | |
163 " pswapd %%mm3, %%mm3\n\t" | |
164 " pswapd %%mm7, %%mm7\n\t" | |
165 " movq %%mm3, 120(%%ecx)\n\t" | |
166 " movq %%mm7, 112(%%ecx)\n\t" | |
167 | |
168 " movq 80(%%edx), %%mm0\n\t" | |
169 " movq 88(%%edx), %%mm4\n\t" | |
170 " movq %%mm0, %%mm3\n\t" | |
171 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
172 " pswapd 104(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
173 " pswapd 96(%%edx), %%mm5\n\t" |
4148 | 174 " pfadd %%mm1, %%mm0\n\t" |
175 " pfadd %%mm5, %%mm4\n\t" | |
176 " movq %%mm0, 80(%%ecx)\n\t" | |
177 " movq %%mm4, 88(%%ecx)\n\t" | |
178 " pfsubr %%mm1, %%mm3\n\t" | |
179 " pfsubr %%mm5, %%mm7\n\t" | |
180 " pfmul 80(%%ebx), %%mm3\n\t" | |
181 " pfmul 88(%%ebx), %%mm7\n\t" | |
182 " pswapd %%mm3, %%mm3\n\t" | |
183 " pswapd %%mm7, %%mm7\n\t" | |
184 " movq %%mm3, 104(%%ecx)\n\t" | |
185 " movq %%mm7, 96(%%ecx)\n\t" | |
186 | |
187 /* Phase 4*/ | |
188 | |
189 " movq 96(%%ebx), %%mm2\n\t" | |
190 " movq 104(%%ebx), %%mm6\n\t" | |
191 | |
192 " movq (%%ecx), %%mm0\n\t" | |
193 " movq 8(%%ecx), %%mm4\n\t" | |
194 " movq %%mm0, %%mm3\n\t" | |
195 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
196 " pswapd 24(%%ecx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
197 " pswapd 16(%%ecx), %%mm5\n\t" |
4148 | 198 " pfadd %%mm1, %%mm0\n\t" |
199 " pfadd %%mm5, %%mm4\n\t" | |
200 " movq %%mm0, (%%edx)\n\t" | |
201 " movq %%mm4, 8(%%edx)\n\t" | |
202 " pfsub %%mm1, %%mm3\n\t" | |
203 " pfsub %%mm5, %%mm7\n\t" | |
204 " pfmul %%mm2, %%mm3\n\t" | |
205 " pfmul %%mm6, %%mm7\n\t" | |
206 " pswapd %%mm3, %%mm3\n\t" | |
207 " pswapd %%mm7, %%mm7\n\t" | |
208 " movq %%mm3, 24(%%edx)\n\t" | |
209 " movq %%mm7, 16(%%edx)\n\t" | |
210 | |
211 " movq 32(%%ecx), %%mm0\n\t" | |
212 " movq 40(%%ecx), %%mm4\n\t" | |
213 " movq %%mm0, %%mm3\n\t" | |
214 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
215 " pswapd 56(%%ecx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
216 " pswapd 48(%%ecx), %%mm5\n\t" |
4148 | 217 " pfadd %%mm1, %%mm0\n\t" |
218 " pfadd %%mm5, %%mm4\n\t" | |
219 " movq %%mm0, 32(%%edx)\n\t" | |
220 " movq %%mm4, 40(%%edx)\n\t" | |
221 " pfsubr %%mm1, %%mm3\n\t" | |
222 " pfsubr %%mm5, %%mm7\n\t" | |
223 " pfmul %%mm2, %%mm3\n\t" | |
224 " pfmul %%mm6, %%mm7\n\t" | |
225 " pswapd %%mm3, %%mm3\n\t" | |
226 " pswapd %%mm7, %%mm7\n\t" | |
227 " movq %%mm3, 56(%%edx)\n\t" | |
228 " movq %%mm7, 48(%%edx)\n\t" | |
229 | |
230 " movq 64(%%ecx), %%mm0\n\t" | |
231 " movq 72(%%ecx), %%mm4\n\t" | |
232 " movq %%mm0, %%mm3\n\t" | |
233 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
234 " pswapd 88(%%ecx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
235 " pswapd 80(%%ecx), %%mm5\n\t" |
4148 | 236 " pfadd %%mm1, %%mm0\n\t" |
237 " pfadd %%mm5, %%mm4\n\t" | |
238 " movq %%mm0, 64(%%edx)\n\t" | |
239 " movq %%mm4, 72(%%edx)\n\t" | |
240 " pfsub %%mm1, %%mm3\n\t" | |
241 " pfsub %%mm5, %%mm7\n\t" | |
242 " pfmul %%mm2, %%mm3\n\t" | |
243 " pfmul %%mm6, %%mm7\n\t" | |
244 " pswapd %%mm3, %%mm3\n\t" | |
245 " pswapd %%mm7, %%mm7\n\t" | |
246 " movq %%mm3, 88(%%edx)\n\t" | |
247 " movq %%mm7, 80(%%edx)\n\t" | |
248 | |
249 " movq 96(%%ecx), %%mm0\n\t" | |
250 " movq 104(%%ecx), %%mm4\n\t" | |
251 " movq %%mm0, %%mm3\n\t" | |
252 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
253 " pswapd 120(%%ecx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
254 " pswapd 112(%%ecx), %%mm5\n\t" |
4148 | 255 " pfadd %%mm1, %%mm0\n\t" |
256 " pfadd %%mm5, %%mm4\n\t" | |
257 " movq %%mm0, 96(%%edx)\n\t" | |
258 " movq %%mm4, 104(%%edx)\n\t" | |
259 " pfsubr %%mm1, %%mm3\n\t" | |
260 " pfsubr %%mm5, %%mm7\n\t" | |
261 " pfmul %%mm2, %%mm3\n\t" | |
262 " pfmul %%mm6, %%mm7\n\t" | |
263 " pswapd %%mm3, %%mm3\n\t" | |
264 " pswapd %%mm7, %%mm7\n\t" | |
265 " movq %%mm3, 120(%%edx)\n\t" | |
266 " movq %%mm7, 112(%%edx)\n\t" | |
267 | |
268 /* Phase 5 */ | |
269 | |
270 " movq 112(%%ebx), %%mm2\n\t" | |
271 | |
272 " movq (%%edx), %%mm0\n\t" | |
273 " movq 16(%%edx), %%mm4\n\t" | |
274 " movq %%mm0, %%mm3\n\t" | |
275 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
276 " pswapd 8(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
277 " pswapd 24(%%edx), %%mm5\n\t" |
4148 | 278 " pfadd %%mm1, %%mm0\n\t" |
279 " pfadd %%mm5, %%mm4\n\t" | |
280 " movq %%mm0, (%%ecx)\n\t" | |
281 " movq %%mm4, 16(%%ecx)\n\t" | |
282 " pfsub %%mm1, %%mm3\n\t" | |
283 " pfsubr %%mm5, %%mm7\n\t" | |
284 " pfmul %%mm2, %%mm3\n\t" | |
285 " pfmul %%mm2, %%mm7\n\t" | |
286 " pswapd %%mm3, %%mm3\n\t" | |
287 " pswapd %%mm7, %%mm7\n\t" | |
288 " movq %%mm3, 8(%%ecx)\n\t" | |
289 " movq %%mm7, 24(%%ecx)\n\t" | |
290 | |
291 " movq 32(%%edx), %%mm0\n\t" | |
292 " movq 48(%%edx), %%mm4\n\t" | |
293 " movq %%mm0, %%mm3\n\t" | |
294 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
295 " pswapd 40(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
296 " pswapd 56(%%edx), %%mm5\n\t" |
4148 | 297 " pfadd %%mm1, %%mm0\n\t" |
298 " pfadd %%mm5, %%mm4\n\t" | |
299 " movq %%mm0, 32(%%ecx)\n\t" | |
300 " movq %%mm4, 48(%%ecx)\n\t" | |
301 " pfsub %%mm1, %%mm3\n\t" | |
302 " pfsubr %%mm5, %%mm7\n\t" | |
303 " pfmul %%mm2, %%mm3\n\t" | |
304 " pfmul %%mm2, %%mm7\n\t" | |
305 " pswapd %%mm3, %%mm3\n\t" | |
306 " pswapd %%mm7, %%mm7\n\t" | |
307 " movq %%mm3, 40(%%ecx)\n\t" | |
308 " movq %%mm7, 56(%%ecx)\n\t" | |
309 | |
310 " movq 64(%%edx), %%mm0\n\t" | |
311 " movq 80(%%edx), %%mm4\n\t" | |
312 " movq %%mm0, %%mm3\n\t" | |
313 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
314 " pswapd 72(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
315 " pswapd 88(%%edx), %%mm5\n\t" |
4148 | 316 " pfadd %%mm1, %%mm0\n\t" |
317 " pfadd %%mm5, %%mm4\n\t" | |
318 " movq %%mm0, 64(%%ecx)\n\t" | |
319 " movq %%mm4, 80(%%ecx)\n\t" | |
320 " pfsub %%mm1, %%mm3\n\t" | |
321 " pfsubr %%mm5, %%mm7\n\t" | |
322 " pfmul %%mm2, %%mm3\n\t" | |
323 " pfmul %%mm2, %%mm7\n\t" | |
324 " pswapd %%mm3, %%mm3\n\t" | |
325 " pswapd %%mm7, %%mm7\n\t" | |
326 " movq %%mm3, 72(%%ecx)\n\t" | |
327 " movq %%mm7, 88(%%ecx)\n\t" | |
328 | |
329 " movq 96(%%edx), %%mm0\n\t" | |
330 " movq 112(%%edx), %%mm4\n\t" | |
331 " movq %%mm0, %%mm3\n\t" | |
332 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
333 " pswapd 104(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
334 " pswapd 120(%%edx), %%mm5\n\t" |
4148 | 335 " pfadd %%mm1, %%mm0\n\t" |
336 " pfadd %%mm5, %%mm4\n\t" | |
337 " movq %%mm0, 96(%%ecx)\n\t" | |
338 " movq %%mm4, 112(%%ecx)\n\t" | |
339 " pfsub %%mm1, %%mm3\n\t" | |
340 " pfsubr %%mm5, %%mm7\n\t" | |
341 " pfmul %%mm2, %%mm3\n\t" | |
342 " pfmul %%mm2, %%mm7\n\t" | |
343 " pswapd %%mm3, %%mm3\n\t" | |
344 " pswapd %%mm7, %%mm7\n\t" | |
345 " movq %%mm3, 104(%%ecx)\n\t" | |
346 " movq %%mm7, 120(%%ecx)\n\t" | |
347 | |
348 | |
349 /* Phase 6. This is the end of easy road. */ | |
350 /* Code below is coded in scalar mode. Should be optimized */ | |
351 | |
5291 | 352 " movd "MANGLE(plus_1f)", %%mm6\n\t" |
4148 | 353 " punpckldq 120(%%ebx), %%mm6\n\t" /* mm6 = 1.0 | 120(%%ebx)*/ |
8236
7e2ca93330d0
forgotten mangle patch by (Bj«Órn Sandell <biorn at dce dot chalmers dot se>)
michael
parents:
7307
diff
changeset
|
354 " movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */ |
4148 | 355 |
356 " movq 32(%%ecx), %%mm0\n\t" | |
357 " movq 64(%%ecx), %%mm2\n\t" | |
358 " movq %%mm0, %%mm1\n\t" | |
359 " movq %%mm2, %%mm3\n\t" | |
360 " pxor %%mm7, %%mm1\n\t" | |
361 " pxor %%mm7, %%mm3\n\t" | |
362 " pfacc %%mm1, %%mm0\n\t" | |
363 " pfacc %%mm3, %%mm2\n\t" | |
364 " pfmul %%mm6, %%mm0\n\t" | |
365 " pfmul %%mm6, %%mm2\n\t" | |
366 " movq %%mm0, 32(%%edx)\n\t" | |
367 " movq %%mm2, 64(%%edx)\n\t" | |
368 | |
369 " movd 44(%%ecx), %%mm0\n\t" | |
370 " movd 40(%%ecx), %%mm2\n\t" | |
371 " movd 120(%%ebx), %%mm3\n\t" | |
372 " punpckldq 76(%%ecx), %%mm0\n\t" | |
373 " punpckldq 72(%%ecx), %%mm2\n\t" | |
374 " punpckldq %%mm3, %%mm3\n\t" | |
375 " movq %%mm0, %%mm4\n\t" | |
376 " movq %%mm2, %%mm5\n\t" | |
377 " pfsub %%mm2, %%mm0\n\t" | |
378 " pfmul %%mm3, %%mm0\n\t" | |
379 " movq %%mm0, %%mm1\n\t" | |
380 " pfadd %%mm5, %%mm0\n\t" | |
381 " pfadd %%mm4, %%mm0\n\t" | |
382 " movq %%mm0, %%mm2\n\t" | |
383 " punpckldq %%mm1, %%mm0\n\t" | |
384 " punpckhdq %%mm1, %%mm2\n\t" | |
385 " movq %%mm0, 40(%%edx)\n\t" | |
386 " movq %%mm2, 72(%%edx)\n\t" | |
387 | |
388 " movd 48(%%ecx), %%mm3\n\t" | |
389 " movd 60(%%ecx), %%mm2\n\t" | |
390 " pfsub 52(%%ecx), %%mm3\n\t" | |
391 " pfsub 56(%%ecx), %%mm2\n\t" | |
392 " pfmul 120(%%ebx), %%mm3\n\t" | |
393 " pfmul 120(%%ebx), %%mm2\n\t" | |
394 " movq %%mm2, %%mm1\n\t" | |
395 | |
396 " pfadd 56(%%ecx), %%mm1\n\t" | |
397 " pfadd 60(%%ecx), %%mm1\n\t" | |
398 " movq %%mm1, %%mm0\n\t" | |
399 | |
400 " pfadd 48(%%ecx), %%mm0\n\t" | |
401 " pfadd 52(%%ecx), %%mm0\n\t" | |
402 " pfadd %%mm3, %%mm1\n\t" | |
403 " punpckldq %%mm2, %%mm1\n\t" | |
404 " pfadd %%mm3, %%mm2\n\t" | |
405 " punpckldq %%mm2, %%mm0\n\t" | |
406 " movq %%mm1, 56(%%edx)\n\t" | |
407 " movq %%mm0, 48(%%edx)\n\t" | |
408 | |
409 /*---*/ | |
410 | |
411 " movd 92(%%ecx), %%mm1\n\t" | |
412 " pfsub 88(%%ecx), %%mm1\n\t" | |
413 " pfmul 120(%%ebx), %%mm1\n\t" | |
414 " movd %%mm1, 92(%%edx)\n\t" | |
415 " pfadd 92(%%ecx), %%mm1\n\t" | |
416 " pfadd 88(%%ecx), %%mm1\n\t" | |
417 " movq %%mm1, %%mm0\n\t" | |
418 | |
419 " pfadd 80(%%ecx), %%mm0\n\t" | |
420 " pfadd 84(%%ecx), %%mm0\n\t" | |
421 " movd %%mm0, 80(%%edx)\n\t" | |
422 | |
423 " movd 80(%%ecx), %%mm0\n\t" | |
424 " pfsub 84(%%ecx), %%mm0\n\t" | |
425 " pfmul 120(%%ebx), %%mm0\n\t" | |
426 " pfadd %%mm0, %%mm1\n\t" | |
427 " pfadd 92(%%edx), %%mm0\n\t" | |
428 " punpckldq %%mm1, %%mm0\n\t" | |
429 " movq %%mm0, 84(%%edx)\n\t" | |
430 | |
431 " movq 96(%%ecx), %%mm0\n\t" | |
432 " movq %%mm0, %%mm1\n\t" | |
433 " pxor %%mm7, %%mm1\n\t" | |
434 " pfacc %%mm1, %%mm0\n\t" | |
435 " pfmul %%mm6, %%mm0\n\t" | |
436 " movq %%mm0, 96(%%edx)\n\t" | |
437 | |
438 " movd 108(%%ecx), %%mm0\n\t" | |
439 " pfsub 104(%%ecx), %%mm0\n\t" | |
440 " pfmul 120(%%ebx), %%mm0\n\t" | |
441 " movd %%mm0, 108(%%edx)\n\t" | |
442 " pfadd 104(%%ecx), %%mm0\n\t" | |
443 " pfadd 108(%%ecx), %%mm0\n\t" | |
444 " movd %%mm0, 104(%%edx)\n\t" | |
445 | |
446 " movd 124(%%ecx), %%mm1\n\t" | |
447 " pfsub 120(%%ecx), %%mm1\n\t" | |
448 " pfmul 120(%%ebx), %%mm1\n\t" | |
449 " movd %%mm1, 124(%%edx)\n\t" | |
450 " pfadd 120(%%ecx), %%mm1\n\t" | |
451 " pfadd 124(%%ecx), %%mm1\n\t" | |
452 " movq %%mm1, %%mm0\n\t" | |
453 | |
454 " pfadd 112(%%ecx), %%mm0\n\t" | |
455 " pfadd 116(%%ecx), %%mm0\n\t" | |
456 " movd %%mm0, 112(%%edx)\n\t" | |
457 | |
458 " movd 112(%%ecx), %%mm0\n\t" | |
459 " pfsub 116(%%ecx), %%mm0\n\t" | |
460 " pfmul 120(%%ebx), %%mm0\n\t" | |
461 " pfadd %%mm0,%%mm1\n\t" | |
462 " pfadd 124(%%edx), %%mm0\n\t" | |
463 " punpckldq %%mm1, %%mm0\n\t" | |
464 " movq %%mm0, 116(%%edx)\n\t" | |
465 | |
20504
27fb949fffa9
disable nonworking/broken code for now till I find out what it is supposed to do.
reimar
parents:
18834
diff
changeset
|
466 // this code is broken, there is nothing modifying the z flag above. |
27fb949fffa9
disable nonworking/broken code for now till I find out what it is supposed to do.
reimar
parents:
18834
diff
changeset
|
467 #if 0 |
4148 | 468 " jnz .L01\n\t" |
469 | |
470 /* Phase 7*/ | |
471 /* Code below is coded in scalar mode. Should be optimized */ | |
472 | |
473 " movd (%%ecx), %%mm0\n\t" | |
474 " pfadd 4(%%ecx), %%mm0\n\t" | |
475 " movd %%mm0, 1024(%%esi)\n\t" | |
476 | |
477 " movd (%%ecx), %%mm0\n\t" | |
478 " pfsub 4(%%ecx), %%mm0\n\t" | |
479 " pfmul 120(%%ebx), %%mm0\n\t" | |
480 " movd %%mm0, (%%esi)\n\t" | |
481 " movd %%mm0, (%%edi)\n\t" | |
482 | |
483 " movd 12(%%ecx), %%mm0\n\t" | |
484 " pfsub 8(%%ecx), %%mm0\n\t" | |
485 " pfmul 120(%%ebx), %%mm0\n\t" | |
486 " movd %%mm0, 512(%%edi)\n\t" | |
487 " pfadd 12(%%ecx), %%mm0\n\t" | |
488 " pfadd 8(%%ecx), %%mm0\n\t" | |
489 " movd %%mm0, 512(%%esi)\n\t" | |
490 | |
491 " movd 16(%%ecx), %%mm0\n\t" | |
492 " pfsub 20(%%ecx), %%mm0\n\t" | |
493 " pfmul 120(%%ebx), %%mm0\n\t" | |
494 " movq %%mm0, %%mm3\n\t" | |
495 | |
496 " movd 28(%%ecx), %%mm0\n\t" | |
497 " pfsub 24(%%ecx), %%mm0\n\t" | |
498 " pfmul 120(%%ebx), %%mm0\n\t" | |
499 " movd %%mm0, 768(%%edi)\n\t" | |
500 " movq %%mm0, %%mm2\n\t" | |
501 | |
502 " pfadd 24(%%ecx), %%mm0\n\t" | |
503 " pfadd 28(%%ecx), %%mm0\n\t" | |
504 " movq %%mm0, %%mm1\n\t" | |
505 | |
506 " pfadd 16(%%ecx), %%mm0\n\t" | |
507 " pfadd 20(%%ecx), %%mm0\n\t" | |
508 " movd %%mm0, 768(%%esi)\n\t" | |
509 " pfadd %%mm3, %%mm1\n\t" | |
510 " movd %%mm1, 256(%%esi)\n\t" | |
511 " pfadd %%mm3, %%mm2\n\t" | |
512 " movd %%mm2, 256(%%edi)\n\t" | |
513 | |
514 /* Phase 8*/ | |
515 | |
516 " movq 32(%%edx), %%mm0\n\t" | |
517 " movq 48(%%edx), %%mm1\n\t" | |
518 " pfadd 48(%%edx), %%mm0\n\t" | |
519 " pfadd 40(%%edx), %%mm1\n\t" | |
520 " movd %%mm0, 896(%%esi)\n\t" | |
521 " movd %%mm1, 640(%%esi)\n\t" | |
522 " psrlq $32, %%mm0\n\t" | |
523 " psrlq $32, %%mm1\n\t" | |
524 " movd %%mm0, 128(%%edi)\n\t" | |
525 " movd %%mm1, 384(%%edi)\n\t" | |
526 | |
527 " movd 40(%%edx), %%mm0\n\t" | |
528 " pfadd 56(%%edx), %%mm0\n\t" | |
529 " movd %%mm0, 384(%%esi)\n\t" | |
530 | |
531 " movd 56(%%edx), %%mm0\n\t" | |
532 " pfadd 36(%%edx), %%mm0\n\t" | |
533 " movd %%mm0, 128(%%esi)\n\t" | |
534 | |
535 " movd 60(%%edx), %%mm0\n\t" | |
536 " movd %%mm0, 896(%%edi)\n\t" | |
537 " pfadd 44(%%edx), %%mm0\n\t" | |
538 " movd %%mm0, 640(%%edi)\n\t" | |
539 | |
540 " movq 96(%%edx), %%mm0\n\t" | |
541 " movq 112(%%edx), %%mm2\n\t" | |
542 " movq 104(%%edx), %%mm4\n\t" | |
543 " pfadd 112(%%edx), %%mm0\n\t" | |
544 " pfadd 104(%%edx), %%mm2\n\t" | |
545 " pfadd 120(%%edx), %%mm4\n\t" | |
546 " movq %%mm0, %%mm1\n\t" | |
547 " movq %%mm2, %%mm3\n\t" | |
548 " movq %%mm4, %%mm5\n\t" | |
549 " pfadd 64(%%edx), %%mm0\n\t" | |
550 " pfadd 80(%%edx), %%mm2\n\t" | |
551 " pfadd 72(%%edx), %%mm4\n\t" | |
552 " movd %%mm0, 960(%%esi)\n\t" | |
553 " movd %%mm2, 704(%%esi)\n\t" | |
554 " movd %%mm4, 448(%%esi)\n\t" | |
555 " psrlq $32, %%mm0\n\t" | |
556 " psrlq $32, %%mm2\n\t" | |
557 " psrlq $32, %%mm4\n\t" | |
558 " movd %%mm0, 64(%%edi)\n\t" | |
559 " movd %%mm2, 320(%%edi)\n\t" | |
560 " movd %%mm4, 576(%%edi)\n\t" | |
561 " pfadd 80(%%edx), %%mm1\n\t" | |
562 " pfadd 72(%%edx), %%mm3\n\t" | |
563 " pfadd 88(%%edx), %%mm5\n\t" | |
564 " movd %%mm1, 832(%%esi)\n\t" | |
565 " movd %%mm3, 576(%%esi)\n\t" | |
566 " movd %%mm5, 320(%%esi)\n\t" | |
567 " psrlq $32, %%mm1\n\t" | |
568 " psrlq $32, %%mm3\n\t" | |
569 " psrlq $32, %%mm5\n\t" | |
570 " movd %%mm1, 192(%%edi)\n\t" | |
571 " movd %%mm3, 448(%%edi)\n\t" | |
572 " movd %%mm5, 704(%%edi)\n\t" | |
573 | |
574 " movd 120(%%edx), %%mm0\n\t" | |
575 " pfadd 100(%%edx), %%mm0\n\t" | |
576 " movq %%mm0, %%mm1\n\t" | |
577 " pfadd 88(%%edx), %%mm0\n\t" | |
578 " movd %%mm0, 192(%%esi)\n\t" | |
579 " pfadd 68(%%edx), %%mm1\n\t" | |
580 " movd %%mm1, 64(%%esi)\n\t" | |
581 | |
582 " movd 124(%%edx), %%mm0\n\t" | |
583 " movd %%mm0, 960(%%edi)\n\t" | |
584 " pfadd 92(%%edx), %%mm0\n\t" | |
585 " movd %%mm0, 832(%%edi)\n\t" | |
586 | |
587 " jmp .L_bye\n\t" | |
588 ".L01: \n\t" | |
20504
27fb949fffa9
disable nonworking/broken code for now till I find out what it is supposed to do.
reimar
parents:
18834
diff
changeset
|
589 #endif |
4148 | 590 /* Phase 9*/ |
591 | |
592 " movq (%%ecx), %%mm0\n\t" | |
593 " movq %%mm0, %%mm1\n\t" | |
594 " pxor %%mm7, %%mm1\n\t" | |
595 " pfacc %%mm1, %%mm0\n\t" | |
596 " pfmul %%mm6, %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
597 " pf2iw %%mm0, %%mm0\n\t" |
4148 | 598 " movd %%mm0, %%eax\n\t" |
599 " movw %%ax, 512(%%esi)\n\t" | |
600 " psrlq $32, %%mm0\n\t" | |
601 " movd %%mm0, %%eax\n\t" | |
602 " movw %%ax, (%%esi)\n\t" | |
603 | |
604 " movd 12(%%ecx), %%mm0\n\t" | |
605 " pfsub 8(%%ecx), %%mm0\n\t" | |
606 " pfmul 120(%%ebx), %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
607 " pf2iw %%mm0, %%mm7\n\t" |
4148 | 608 " movd %%mm7, %%eax\n\t" |
609 " movw %%ax, 256(%%edi)\n\t" | |
610 " pfadd 12(%%ecx), %%mm0\n\t" | |
611 " pfadd 8(%%ecx), %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
612 " pf2iw %%mm0, %%mm0\n\t" |
4148 | 613 " movd %%mm0, %%eax\n\t" |
614 " movw %%ax, 256(%%esi)\n\t" | |
615 | |
616 " movd 16(%%ecx), %%mm3\n\t" | |
617 " pfsub 20(%%ecx), %%mm3\n\t" | |
618 " pfmul 120(%%ebx), %%mm3\n\t" | |
619 " movq %%mm3, %%mm2\n\t" | |
620 | |
621 " movd 28(%%ecx), %%mm2\n\t" | |
622 " pfsub 24(%%ecx), %%mm2\n\t" | |
623 " pfmul 120(%%ebx), %%mm2\n\t" | |
624 " movq %%mm2, %%mm1\n\t" | |
625 | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
626 " pf2iw %%mm2, %%mm7\n\t" |
4148 | 627 " movd %%mm7, %%eax\n\t" |
628 " movw %%ax, 384(%%edi)\n\t" | |
629 | |
630 " pfadd 24(%%ecx), %%mm1\n\t" | |
631 " pfadd 28(%%ecx), %%mm1\n\t" | |
632 " movq %%mm1, %%mm0\n\t" | |
633 | |
634 " pfadd 16(%%ecx), %%mm0\n\t" | |
635 " pfadd 20(%%ecx), %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
636 " pf2iw %%mm0, %%mm0\n\t" |
4148 | 637 " movd %%mm0, %%eax\n\t" |
638 " movw %%ax, 384(%%esi)\n\t" | |
639 " pfadd %%mm3, %%mm1\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
640 " pf2iw %%mm1, %%mm1\n\t" |
4148 | 641 " movd %%mm1, %%eax\n\t" |
642 " movw %%ax, 128(%%esi)\n\t" | |
643 " pfadd %%mm3, %%mm2\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
644 " pf2iw %%mm2, %%mm2\n\t" |
4148 | 645 " movd %%mm2, %%eax\n\t" |
646 " movw %%ax, 128(%%edi)\n\t" | |
647 | |
648 /* Phase 10*/ | |
649 | |
650 " movq 32(%%edx), %%mm0\n\t" | |
651 " movq 48(%%edx), %%mm1\n\t" | |
652 " pfadd 48(%%edx), %%mm0\n\t" | |
653 " pfadd 40(%%edx), %%mm1\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
654 " pf2iw %%mm0, %%mm0\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
655 " pf2iw %%mm1, %%mm1\n\t" |
4148 | 656 " movd %%mm0, %%eax\n\t" |
657 " movd %%mm1, %%ecx\n\t" | |
658 " movw %%ax, 448(%%esi)\n\t" | |
659 " movw %%cx, 320(%%esi)\n\t" | |
660 " psrlq $32, %%mm0\n\t" | |
661 " psrlq $32, %%mm1\n\t" | |
662 " movd %%mm0, %%eax\n\t" | |
663 " movd %%mm1, %%ecx\n\t" | |
664 " movw %%ax, 64(%%edi)\n\t" | |
665 " movw %%cx, 192(%%edi)\n\t" | |
666 | |
667 " movd 40(%%edx), %%mm3\n\t" | |
668 " movd 56(%%edx), %%mm4\n\t" | |
669 " movd 60(%%edx), %%mm0\n\t" | |
670 " movd 44(%%edx), %%mm2\n\t" | |
671 " movd 120(%%edx), %%mm5\n\t" | |
672 " punpckldq %%mm4, %%mm3\n\t" | |
673 " punpckldq 124(%%edx), %%mm0\n\t" | |
674 " pfadd 100(%%edx), %%mm5\n\t" | |
675 " punpckldq 36(%%edx), %%mm4\n\t" | |
676 " punpckldq 92(%%edx), %%mm2\n\t" | |
677 " movq %%mm5, %%mm6\n\t" | |
678 " pfadd %%mm4, %%mm3\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
679 " pf2iw %%mm0, %%mm1\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
680 " pf2iw %%mm3, %%mm3\n\t" |
4148 | 681 " pfadd 88(%%edx), %%mm5\n\t" |
682 " movd %%mm1, %%eax\n\t" | |
683 " movd %%mm3, %%ecx\n\t" | |
684 " movw %%ax, 448(%%edi)\n\t" | |
685 " movw %%cx, 192(%%esi)\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
686 " pf2iw %%mm5, %%mm5\n\t" |
4148 | 687 " psrlq $32, %%mm1\n\t" |
688 " psrlq $32, %%mm3\n\t" | |
689 " movd %%mm5, %%ebx\n\t" | |
690 " movd %%mm1, %%eax\n\t" | |
691 " movd %%mm3, %%ecx\n\t" | |
692 " movw %%bx, 96(%%esi)\n\t" | |
693 " movw %%ax, 480(%%edi)\n\t" | |
694 " movw %%cx, 64(%%esi)\n\t" | |
695 " pfadd %%mm2, %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
696 " pf2iw %%mm0, %%mm0\n\t" |
4148 | 697 " movd %%mm0, %%eax\n\t" |
698 " pfadd 68(%%edx), %%mm6\n\t" | |
699 " movw %%ax, 320(%%edi)\n\t" | |
700 " psrlq $32, %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
701 " pf2iw %%mm6, %%mm6\n\t" |
4148 | 702 " movd %%mm0, %%eax\n\t" |
703 " movd %%mm6, %%ebx\n\t" | |
704 " movw %%ax, 416(%%edi)\n\t" | |
705 " movw %%bx, 32(%%esi)\n\t" | |
706 | |
707 " movq 96(%%edx), %%mm0\n\t" | |
708 " movq 112(%%edx), %%mm2\n\t" | |
709 " movq 104(%%edx), %%mm4\n\t" | |
710 " pfadd %%mm2, %%mm0\n\t" | |
711 " pfadd %%mm4, %%mm2\n\t" | |
712 " pfadd 120(%%edx), %%mm4\n\t" | |
713 " movq %%mm0, %%mm1\n\t" | |
714 " movq %%mm2, %%mm3\n\t" | |
715 " movq %%mm4, %%mm5\n\t" | |
716 " pfadd 64(%%edx), %%mm0\n\t" | |
717 " pfadd 80(%%edx), %%mm2\n\t" | |
718 " pfadd 72(%%edx), %%mm4\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
719 " pf2iw %%mm0, %%mm0\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
720 " pf2iw %%mm2, %%mm2\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
721 " pf2iw %%mm4, %%mm4\n\t" |
4148 | 722 " movd %%mm0, %%eax\n\t" |
723 " movd %%mm2, %%ecx\n\t" | |
724 " movd %%mm4, %%ebx\n\t" | |
725 " movw %%ax, 480(%%esi)\n\t" | |
726 " movw %%cx, 352(%%esi)\n\t" | |
727 " movw %%bx, 224(%%esi)\n\t" | |
728 " psrlq $32, %%mm0\n\t" | |
729 " psrlq $32, %%mm2\n\t" | |
730 " psrlq $32, %%mm4\n\t" | |
731 " movd %%mm0, %%eax\n\t" | |
732 " movd %%mm2, %%ecx\n\t" | |
733 " movd %%mm4, %%ebx\n\t" | |
734 " movw %%ax, 32(%%edi)\n\t" | |
735 " movw %%cx, 160(%%edi)\n\t" | |
736 " movw %%bx, 288(%%edi)\n\t" | |
737 " pfadd 80(%%edx), %%mm1\n\t" | |
738 " pfadd 72(%%edx), %%mm3\n\t" | |
739 " pfadd 88(%%edx), %%mm5\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
740 " pf2iw %%mm1, %%mm1\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
741 " pf2iw %%mm3, %%mm3\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
742 " pf2iw %%mm5, %%mm5\n\t" |
4148 | 743 " movd %%mm1, %%eax\n\t" |
744 " movd %%mm3, %%ecx\n\t" | |
745 " movd %%mm5, %%ebx\n\t" | |
746 " movw %%ax, 416(%%esi)\n\t" | |
747 " movw %%cx, 288(%%esi)\n\t" | |
748 " movw %%bx, 160(%%esi)\n\t" | |
749 " psrlq $32, %%mm1\n\t" | |
750 " psrlq $32, %%mm3\n\t" | |
751 " psrlq $32, %%mm5\n\t" | |
752 " movd %%mm1, %%eax\n\t" | |
753 " movd %%mm3, %%ecx\n\t" | |
754 " movd %%mm5, %%ebx\n\t" | |
755 " movw %%ax, 96(%%edi)\n\t" | |
756 " movw %%cx, 224(%%edi)\n\t" | |
757 " movw %%bx, 352(%%edi)\n\t" | |
758 | |
759 " movsw\n\t" | |
760 | |
761 ".L_bye:\n\t" | |
762 " femms\n\t" | |
763 : | |
764 :"m"(a),"m"(b),"m"(c),"m"(tmp[0]) | |
9039 | 765 :"memory","%eax","%ebx","%ecx","%edx","%esi","%edi"); |
5842
d6eab895c742
Avoid stdcall on cygwin, it causes undefined ref, code needs testing as I have no athlon or k6-2.
atmos4
parents:
5291
diff
changeset
|
766 } |