Mercurial > mplayer.hg
annotate mp3lib/dct64_3dnow.c @ 31007:26537f75c1dd
Tell x264 that we aren't going to give it timestamps.
Fixes some warnings starting in x264-r1480.
author | lorenm |
---|---|
date | Tue, 20 Apr 2010 09:14:54 +0000 |
parents | 0ad2da052b2e |
children | d0f70692a140 |
rev | line source |
---|---|
4148 | 1 /* |
2 * This code was taken from http://www.mpg123.org | |
3 * See ChangeLog of mpg123-0.59s-pre.1 for detail | |
4 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> | |
5 * Partial 3dnow! optimization by Nick Kurshev | |
6 * | |
7 * TODO: optimize scalar 3dnow! code | |
8 * Warning: Phases 7 & 8 are not tested | |
9 */ | |
10 | |
16989 | 11 #include "config.h" |
12 #include "mangle.h" | |
30167
347d152a5cfa
Refactor real --> float #define to a typedef in a common header.
diego
parents:
27757
diff
changeset
|
13 #include "mpg123.h" |
5291 | 14 |
13918 | 15 static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL; |
12292 | 16 static float attribute_used plus_1f = 1.0; |
4148 | 17 |
23441 | 18 void dct64_MMX_3dnow(short *a,short *b,real *c) |
4148 | 19 { |
20 char tmp[256]; | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
21 __asm__ volatile( |
30990 | 22 " movl %2,%%eax\n\t" |
4148 | 23 |
30990 | 24 " leal 128+%3,%%edx\n\t" |
25 " movl %0,%%esi\n\t" | |
26 " movl %1,%%edi\n\t" | |
27 " movl $"MANGLE(costab_mmx)",%%ebx\n\t" | |
28 " leal %3,%%ecx\n\t" | |
4148 | 29 |
30 /* Phase 1*/ | |
30990 | 31 " movq (%%eax), %%mm0\n\t" |
32 " movq 8(%%eax), %%mm4\n\t" | |
33 " movq %%mm0, %%mm3\n\t" | |
34 " movq %%mm4, %%mm7\n\t" | |
35 " movq 120(%%eax), %%mm1\n\t" | |
36 " movq 112(%%eax), %%mm5\n\t" | |
4148 | 37 /* n.b.: pswapd*/ |
30990 | 38 " movq %%mm1, %%mm2\n\t" |
39 " movq %%mm5, %%mm6\n\t" | |
40 " psrlq $32, %%mm1\n\t" | |
41 " psrlq $32, %%mm5\n\t" | |
42 " punpckldq %%mm2, %%mm1\n\t" | |
43 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 44 /**/ |
30990 | 45 " pfadd %%mm1, %%mm0\n\t" |
46 " pfadd %%mm5, %%mm4\n\t" | |
47 " movq %%mm0, (%%edx)\n\t" | |
48 " movq %%mm4, 8(%%edx)\n\t" | |
49 " pfsub %%mm1, %%mm3\n\t" | |
50 " pfsub %%mm5, %%mm7\n\t" | |
51 " pfmul (%%ebx), %%mm3\n\t" | |
52 " pfmul 8(%%ebx), %%mm7\n\t" | |
53 " movd %%mm3, 124(%%edx)\n\t" | |
54 " movd %%mm7, 116(%%edx)\n\t" | |
55 " psrlq $32, %%mm3\n\t" | |
56 " psrlq $32, %%mm7\n\t" | |
57 " movd %%mm3, 120(%%edx)\n\t" | |
58 " movd %%mm7, 112(%%edx)\n\t" | |
4148 | 59 |
30990 | 60 " movq 16(%%eax), %%mm0\n\t" |
61 " movq 24(%%eax), %%mm4\n\t" | |
62 " movq %%mm0, %%mm3\n\t" | |
63 " movq %%mm4, %%mm7\n\t" | |
64 " movq 104(%%eax), %%mm1\n\t" | |
65 " movq 96(%%eax), %%mm5\n\t" | |
4148 | 66 /* n.b.: pswapd*/ |
30990 | 67 " movq %%mm1, %%mm2\n\t" |
68 " movq %%mm5, %%mm6\n\t" | |
69 " psrlq $32, %%mm1\n\t" | |
70 " psrlq $32, %%mm5\n\t" | |
71 " punpckldq %%mm2, %%mm1\n\t" | |
72 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 73 /**/ |
30990 | 74 " pfadd %%mm1, %%mm0\n\t" |
75 " pfadd %%mm5, %%mm4\n\t" | |
76 " movq %%mm0, 16(%%edx)\n\t" | |
77 " movq %%mm4, 24(%%edx)\n\t" | |
78 " pfsub %%mm1, %%mm3\n\t" | |
79 " pfsub %%mm5, %%mm7\n\t" | |
80 " pfmul 16(%%ebx), %%mm3\n\t" | |
81 " pfmul 24(%%ebx), %%mm7\n\t" | |
82 " movd %%mm3, 108(%%edx)\n\t" | |
83 " movd %%mm7, 100(%%edx)\n\t" | |
84 " psrlq $32, %%mm3\n\t" | |
85 " psrlq $32, %%mm7\n\t" | |
86 " movd %%mm3, 104(%%edx)\n\t" | |
87 " movd %%mm7, 96(%%edx)\n\t" | |
4148 | 88 |
30990 | 89 " movq 32(%%eax), %%mm0\n\t" |
90 " movq 40(%%eax), %%mm4\n\t" | |
91 " movq %%mm0, %%mm3\n\t" | |
92 " movq %%mm4, %%mm7\n\t" | |
93 " movq 88(%%eax), %%mm1\n\t" | |
94 " movq 80(%%eax), %%mm5\n\t" | |
4148 | 95 /* n.b.: pswapd*/ |
30990 | 96 " movq %%mm1, %%mm2\n\t" |
97 " movq %%mm5, %%mm6\n\t" | |
98 " psrlq $32, %%mm1\n\t" | |
99 " psrlq $32, %%mm5\n\t" | |
100 " punpckldq %%mm2, %%mm1\n\t" | |
101 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 102 /**/ |
30990 | 103 " pfadd %%mm1, %%mm0\n\t" |
104 " pfadd %%mm5, %%mm4\n\t" | |
105 " movq %%mm0, 32(%%edx)\n\t" | |
106 " movq %%mm4, 40(%%edx)\n\t" | |
107 " pfsub %%mm1, %%mm3\n\t" | |
108 " pfsub %%mm5, %%mm7\n\t" | |
109 " pfmul 32(%%ebx), %%mm3\n\t" | |
110 " pfmul 40(%%ebx), %%mm7\n\t" | |
111 " movd %%mm3, 92(%%edx)\n\t" | |
112 " movd %%mm7, 84(%%edx)\n\t" | |
113 " psrlq $32, %%mm3\n\t" | |
114 " psrlq $32, %%mm7\n\t" | |
115 " movd %%mm3, 88(%%edx)\n\t" | |
116 " movd %%mm7, 80(%%edx)\n\t" | |
4148 | 117 |
30990 | 118 " movq 48(%%eax), %%mm0\n\t" |
119 " movq 56(%%eax), %%mm4\n\t" | |
120 " movq %%mm0, %%mm3\n\t" | |
121 " movq %%mm4, %%mm7\n\t" | |
122 " movq 72(%%eax), %%mm1\n\t" | |
123 " movq 64(%%eax), %%mm5\n\t" | |
4148 | 124 /* n.b.: pswapd*/ |
30990 | 125 " movq %%mm1, %%mm2\n\t" |
126 " movq %%mm5, %%mm6\n\t" | |
127 " psrlq $32, %%mm1\n\t" | |
128 " psrlq $32, %%mm5\n\t" | |
129 " punpckldq %%mm2, %%mm1\n\t" | |
130 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 131 /**/ |
30990 | 132 " pfadd %%mm1, %%mm0\n\t" |
133 " pfadd %%mm5, %%mm4\n\t" | |
134 " movq %%mm0, 48(%%edx)\n\t" | |
135 " movq %%mm4, 56(%%edx)\n\t" | |
136 " pfsub %%mm1, %%mm3\n\t" | |
137 " pfsub %%mm5, %%mm7\n\t" | |
138 " pfmul 48(%%ebx), %%mm3\n\t" | |
139 " pfmul 56(%%ebx), %%mm7\n\t" | |
140 " movd %%mm3, 76(%%edx)\n\t" | |
141 " movd %%mm7, 68(%%edx)\n\t" | |
142 " psrlq $32, %%mm3\n\t" | |
143 " psrlq $32, %%mm7\n\t" | |
144 " movd %%mm3, 72(%%edx)\n\t" | |
145 " movd %%mm7, 64(%%edx)\n\t" | |
4148 | 146 |
147 /* Phase 2*/ | |
148 | |
30990 | 149 " movq (%%edx), %%mm0\n\t" |
150 " movq 8(%%edx), %%mm4\n\t" | |
151 " movq %%mm0, %%mm3\n\t" | |
152 " movq %%mm4, %%mm7\n\t" | |
153 " movq 56(%%edx), %%mm1\n\t" | |
154 " movq 48(%%edx), %%mm5\n\t" | |
4148 | 155 /* n.b.: pswapd*/ |
30990 | 156 " movq %%mm1, %%mm2\n\t" |
157 " movq %%mm5, %%mm6\n\t" | |
158 " psrlq $32, %%mm1\n\t" | |
159 " psrlq $32, %%mm5\n\t" | |
160 " punpckldq %%mm2, %%mm1\n\t" | |
161 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 162 /**/ |
30990 | 163 " pfadd %%mm1, %%mm0\n\t" |
164 " pfadd %%mm5, %%mm4\n\t" | |
165 " movq %%mm0, (%%ecx)\n\t" | |
166 " movq %%mm4, 8(%%ecx)\n\t" | |
167 " pfsub %%mm1, %%mm3\n\t" | |
168 " pfsub %%mm5, %%mm7\n\t" | |
169 " pfmul 64(%%ebx), %%mm3\n\t" | |
170 " pfmul 72(%%ebx), %%mm7\n\t" | |
171 " movd %%mm3, 60(%%ecx)\n\t" | |
172 " movd %%mm7, 52(%%ecx)\n\t" | |
173 " psrlq $32, %%mm3\n\t" | |
174 " psrlq $32, %%mm7\n\t" | |
175 " movd %%mm3, 56(%%ecx)\n\t" | |
176 " movd %%mm7, 48(%%ecx)\n\t" | |
4148 | 177 |
30990 | 178 " movq 16(%%edx), %%mm0\n\t" |
179 " movq 24(%%edx), %%mm4\n\t" | |
180 " movq %%mm0, %%mm3\n\t" | |
181 " movq %%mm4, %%mm7\n\t" | |
182 " movq 40(%%edx), %%mm1\n\t" | |
183 " movq 32(%%edx), %%mm5\n\t" | |
4148 | 184 /* n.b.: pswapd*/ |
30990 | 185 " movq %%mm1, %%mm2\n\t" |
186 " movq %%mm5, %%mm6\n\t" | |
187 " psrlq $32, %%mm1\n\t" | |
188 " psrlq $32, %%mm5\n\t" | |
189 " punpckldq %%mm2, %%mm1\n\t" | |
190 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 191 /**/ |
30990 | 192 " pfadd %%mm1, %%mm0\n\t" |
193 " pfadd %%mm5, %%mm4\n\t" | |
194 " movq %%mm0, 16(%%ecx)\n\t" | |
195 " movq %%mm4, 24(%%ecx)\n\t" | |
196 " pfsub %%mm1, %%mm3\n\t" | |
197 " pfsub %%mm5, %%mm7\n\t" | |
198 " pfmul 80(%%ebx), %%mm3\n\t" | |
199 " pfmul 88(%%ebx), %%mm7\n\t" | |
200 " movd %%mm3, 44(%%ecx)\n\t" | |
201 " movd %%mm7, 36(%%ecx)\n\t" | |
202 " psrlq $32, %%mm3\n\t" | |
203 " psrlq $32, %%mm7\n\t" | |
204 " movd %%mm3, 40(%%ecx)\n\t" | |
205 " movd %%mm7, 32(%%ecx)\n\t" | |
4148 | 206 |
207 /* Phase 3*/ | |
208 | |
30990 | 209 " movq 64(%%edx), %%mm0\n\t" |
210 " movq 72(%%edx), %%mm4\n\t" | |
211 " movq %%mm0, %%mm3\n\t" | |
212 " movq %%mm4, %%mm7\n\t" | |
213 " movq 120(%%edx), %%mm1\n\t" | |
214 " movq 112(%%edx), %%mm5\n\t" | |
4148 | 215 /* n.b.: pswapd*/ |
30990 | 216 " movq %%mm1, %%mm2\n\t" |
217 " movq %%mm5, %%mm6\n\t" | |
218 " psrlq $32, %%mm1\n\t" | |
219 " psrlq $32, %%mm5\n\t" | |
220 " punpckldq %%mm2, %%mm1\n\t" | |
221 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 222 /**/ |
30990 | 223 " pfadd %%mm1, %%mm0\n\t" |
224 " pfadd %%mm5, %%mm4\n\t" | |
225 " movq %%mm0, 64(%%ecx)\n\t" | |
226 " movq %%mm4, 72(%%ecx)\n\t" | |
227 " pfsubr %%mm1, %%mm3\n\t" | |
228 " pfsubr %%mm5, %%mm7\n\t" | |
229 " pfmul 64(%%ebx), %%mm3\n\t" | |
230 " pfmul 72(%%ebx), %%mm7\n\t" | |
231 " movd %%mm3, 124(%%ecx)\n\t" | |
232 " movd %%mm7, 116(%%ecx)\n\t" | |
233 " psrlq $32, %%mm3\n\t" | |
234 " psrlq $32, %%mm7\n\t" | |
235 " movd %%mm3, 120(%%ecx)\n\t" | |
236 " movd %%mm7, 112(%%ecx)\n\t" | |
4148 | 237 |
30990 | 238 " movq 80(%%edx), %%mm0\n\t" |
239 " movq 88(%%edx), %%mm4\n\t" | |
240 " movq %%mm0, %%mm3\n\t" | |
241 " movq %%mm4, %%mm7\n\t" | |
242 " movq 104(%%edx), %%mm1\n\t" | |
243 " movq 96(%%edx), %%mm5\n\t" | |
4148 | 244 /* n.b.: pswapd*/ |
30990 | 245 " movq %%mm1, %%mm2\n\t" |
246 " movq %%mm5, %%mm6\n\t" | |
247 " psrlq $32, %%mm1\n\t" | |
248 " psrlq $32, %%mm5\n\t" | |
249 " punpckldq %%mm2, %%mm1\n\t" | |
250 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 251 /**/ |
30990 | 252 " pfadd %%mm1, %%mm0\n\t" |
253 " pfadd %%mm5, %%mm4\n\t" | |
254 " movq %%mm0, 80(%%ecx)\n\t" | |
255 " movq %%mm4, 88(%%ecx)\n\t" | |
256 " pfsubr %%mm1, %%mm3\n\t" | |
257 " pfsubr %%mm5, %%mm7\n\t" | |
258 " pfmul 80(%%ebx), %%mm3\n\t" | |
259 " pfmul 88(%%ebx), %%mm7\n\t" | |
260 " movd %%mm3, 108(%%ecx)\n\t" | |
261 " movd %%mm7, 100(%%ecx)\n\t" | |
262 " psrlq $32, %%mm3\n\t" | |
263 " psrlq $32, %%mm7\n\t" | |
264 " movd %%mm3, 104(%%ecx)\n\t" | |
265 " movd %%mm7, 96(%%ecx)\n\t" | |
4148 | 266 |
267 /* Phase 4*/ | |
268 | |
30990 | 269 " movq (%%ecx), %%mm0\n\t" |
270 " movq 8(%%ecx), %%mm4\n\t" | |
271 " movq %%mm0, %%mm3\n\t" | |
272 " movq %%mm4, %%mm7\n\t" | |
273 " movq 24(%%ecx), %%mm1\n\t" | |
274 " movq 16(%%ecx), %%mm5\n\t" | |
4148 | 275 /* n.b.: pswapd*/ |
30990 | 276 " movq %%mm1, %%mm2\n\t" |
277 " movq %%mm5, %%mm6\n\t" | |
278 " psrlq $32, %%mm1\n\t" | |
279 " psrlq $32, %%mm5\n\t" | |
280 " punpckldq %%mm2, %%mm1\n\t" | |
281 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 282 /**/ |
30990 | 283 " pfadd %%mm1, %%mm0\n\t" |
284 " pfadd %%mm5, %%mm4\n\t" | |
285 " movq %%mm0, (%%edx)\n\t" | |
286 " movq %%mm4, 8(%%edx)\n\t" | |
287 " pfsub %%mm1, %%mm3\n\t" | |
288 " pfsub %%mm5, %%mm7\n\t" | |
289 " pfmul 96(%%ebx), %%mm3\n\t" | |
290 " pfmul 104(%%ebx), %%mm7\n\t" | |
291 " movd %%mm3, 28(%%edx)\n\t" | |
292 " movd %%mm7, 20(%%edx)\n\t" | |
293 " psrlq $32, %%mm3\n\t" | |
294 " psrlq $32, %%mm7\n\t" | |
295 " movd %%mm3, 24(%%edx)\n\t" | |
296 " movd %%mm7, 16(%%edx)\n\t" | |
4148 | 297 |
30990 | 298 " movq 32(%%ecx), %%mm0\n\t" |
299 " movq 40(%%ecx), %%mm4\n\t" | |
300 " movq %%mm0, %%mm3\n\t" | |
301 " movq %%mm4, %%mm7\n\t" | |
302 " movq 56(%%ecx), %%mm1\n\t" | |
303 " movq 48(%%ecx), %%mm5\n\t" | |
4148 | 304 /* n.b.: pswapd*/ |
30990 | 305 " movq %%mm1, %%mm2\n\t" |
306 " movq %%mm5, %%mm6\n\t" | |
307 " psrlq $32, %%mm1\n\t" | |
308 " psrlq $32, %%mm5\n\t" | |
309 " punpckldq %%mm2, %%mm1\n\t" | |
310 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 311 /**/ |
30990 | 312 " pfadd %%mm1, %%mm0\n\t" |
313 " pfadd %%mm5, %%mm4\n\t" | |
314 " movq %%mm0, 32(%%edx)\n\t" | |
315 " movq %%mm4, 40(%%edx)\n\t" | |
316 " pfsubr %%mm1, %%mm3\n\t" | |
317 " pfsubr %%mm5, %%mm7\n\t" | |
318 " pfmul 96(%%ebx), %%mm3\n\t" | |
319 " pfmul 104(%%ebx), %%mm7\n\t" | |
320 " movd %%mm3, 60(%%edx)\n\t" | |
321 " movd %%mm7, 52(%%edx)\n\t" | |
322 " psrlq $32, %%mm3\n\t" | |
323 " psrlq $32, %%mm7\n\t" | |
324 " movd %%mm3, 56(%%edx)\n\t" | |
325 " movd %%mm7, 48(%%edx)\n\t" | |
4148 | 326 |
30990 | 327 " movq 64(%%ecx), %%mm0\n\t" |
328 " movq 72(%%ecx), %%mm4\n\t" | |
329 " movq %%mm0, %%mm3\n\t" | |
330 " movq %%mm4, %%mm7\n\t" | |
331 " movq 88(%%ecx), %%mm1\n\t" | |
332 " movq 80(%%ecx), %%mm5\n\t" | |
4148 | 333 /* n.b.: pswapd*/ |
30990 | 334 " movq %%mm1, %%mm2\n\t" |
335 " movq %%mm5, %%mm6\n\t" | |
336 " psrlq $32, %%mm1\n\t" | |
337 " psrlq $32, %%mm5\n\t" | |
338 " punpckldq %%mm2, %%mm1\n\t" | |
339 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 340 /**/ |
30990 | 341 " pfadd %%mm1, %%mm0\n\t" |
342 " pfadd %%mm5, %%mm4\n\t" | |
343 " movq %%mm0, 64(%%edx)\n\t" | |
344 " movq %%mm4, 72(%%edx)\n\t" | |
345 " pfsub %%mm1, %%mm3\n\t" | |
346 " pfsub %%mm5, %%mm7\n\t" | |
347 " pfmul 96(%%ebx), %%mm3\n\t" | |
348 " pfmul 104(%%ebx), %%mm7\n\t" | |
349 " movd %%mm3, 92(%%edx)\n\t" | |
350 " movd %%mm7, 84(%%edx)\n\t" | |
351 " psrlq $32, %%mm3\n\t" | |
352 " psrlq $32, %%mm7\n\t" | |
353 " movd %%mm3, 88(%%edx)\n\t" | |
354 " movd %%mm7, 80(%%edx)\n\t" | |
4148 | 355 |
30990 | 356 " movq 96(%%ecx), %%mm0\n\t" |
357 " movq 104(%%ecx), %%mm4\n\t" | |
358 " movq %%mm0, %%mm3\n\t" | |
359 " movq %%mm4, %%mm7\n\t" | |
360 " movq 120(%%ecx), %%mm1\n\t" | |
361 " movq 112(%%ecx), %%mm5\n\t" | |
4148 | 362 /* n.b.: pswapd*/ |
30990 | 363 " movq %%mm1, %%mm2\n\t" |
364 " movq %%mm5, %%mm6\n\t" | |
365 " psrlq $32, %%mm1\n\t" | |
366 " psrlq $32, %%mm5\n\t" | |
367 " punpckldq %%mm2, %%mm1\n\t" | |
368 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 369 /**/ |
30990 | 370 " pfadd %%mm1, %%mm0\n\t" |
371 " pfadd %%mm5, %%mm4\n\t" | |
372 " movq %%mm0, 96(%%edx)\n\t" | |
373 " movq %%mm4, 104(%%edx)\n\t" | |
374 " pfsubr %%mm1, %%mm3\n\t" | |
375 " pfsubr %%mm5, %%mm7\n\t" | |
376 " pfmul 96(%%ebx), %%mm3\n\t" | |
377 " pfmul 104(%%ebx), %%mm7\n\t" | |
378 " movd %%mm3, 124(%%edx)\n\t" | |
379 " movd %%mm7, 116(%%edx)\n\t" | |
380 " psrlq $32, %%mm3\n\t" | |
381 " psrlq $32, %%mm7\n\t" | |
382 " movd %%mm3, 120(%%edx)\n\t" | |
383 " movd %%mm7, 112(%%edx)\n\t" | |
4148 | 384 |
385 /* Phase 5 */ | |
386 | |
30990 | 387 " movq (%%edx), %%mm0\n\t" |
388 " movq 16(%%edx), %%mm4\n\t" | |
389 " movq %%mm0, %%mm3\n\t" | |
390 " movq %%mm4, %%mm7\n\t" | |
391 " movq 8(%%edx), %%mm1\n\t" | |
392 " movq 24(%%edx), %%mm5\n\t" | |
4148 | 393 /* n.b.: pswapd*/ |
30990 | 394 " movq %%mm1, %%mm2\n\t" |
395 " movq %%mm5, %%mm6\n\t" | |
396 " psrlq $32, %%mm1\n\t" | |
397 " psrlq $32, %%mm5\n\t" | |
398 " punpckldq %%mm2, %%mm1\n\t" | |
399 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 400 /**/ |
30990 | 401 " pfadd %%mm1, %%mm0\n\t" |
402 " pfadd %%mm5, %%mm4\n\t" | |
403 " movq %%mm0, (%%ecx)\n\t" | |
404 " movq %%mm4, 16(%%ecx)\n\t" | |
405 " pfsub %%mm1, %%mm3\n\t" | |
406 " pfsubr %%mm5, %%mm7\n\t" | |
407 " pfmul 112(%%ebx), %%mm3\n\t" | |
408 " pfmul 112(%%ebx), %%mm7\n\t" | |
409 " movd %%mm3, 12(%%ecx)\n\t" | |
410 " movd %%mm7, 28(%%ecx)\n\t" | |
411 " psrlq $32, %%mm3\n\t" | |
412 " psrlq $32, %%mm7\n\t" | |
413 " movd %%mm3, 8(%%ecx)\n\t" | |
414 " movd %%mm7, 24(%%ecx)\n\t" | |
4148 | 415 |
30990 | 416 " movq 32(%%edx), %%mm0\n\t" |
417 " movq 48(%%edx), %%mm4\n\t" | |
418 " movq %%mm0, %%mm3\n\t" | |
419 " movq %%mm4, %%mm7\n\t" | |
420 " movq 40(%%edx), %%mm1\n\t" | |
421 " movq 56(%%edx), %%mm5\n\t" | |
4148 | 422 /* n.b.: pswapd*/ |
30990 | 423 " movq %%mm1, %%mm2\n\t" |
424 " movq %%mm5, %%mm6\n\t" | |
425 " psrlq $32, %%mm1\n\t" | |
426 " psrlq $32, %%mm5\n\t" | |
427 " punpckldq %%mm2, %%mm1\n\t" | |
428 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 429 /**/ |
30990 | 430 " pfadd %%mm1, %%mm0\n\t" |
431 " pfadd %%mm5, %%mm4\n\t" | |
432 " movq %%mm0, 32(%%ecx)\n\t" | |
433 " movq %%mm4, 48(%%ecx)\n\t" | |
434 " pfsub %%mm1, %%mm3\n\t" | |
435 " pfsubr %%mm5, %%mm7\n\t" | |
436 " pfmul 112(%%ebx), %%mm3\n\t" | |
437 " pfmul 112(%%ebx), %%mm7\n\t" | |
438 " movd %%mm3, 44(%%ecx)\n\t" | |
439 " movd %%mm7, 60(%%ecx)\n\t" | |
440 " psrlq $32, %%mm3\n\t" | |
441 " psrlq $32, %%mm7\n\t" | |
442 " movd %%mm3, 40(%%ecx)\n\t" | |
443 " movd %%mm7, 56(%%ecx)\n\t" | |
4148 | 444 |
30990 | 445 " movq 64(%%edx), %%mm0\n\t" |
446 " movq 80(%%edx), %%mm4\n\t" | |
447 " movq %%mm0, %%mm3\n\t" | |
448 " movq %%mm4, %%mm7\n\t" | |
449 " movq 72(%%edx), %%mm1\n\t" | |
450 " movq 88(%%edx), %%mm5\n\t" | |
4148 | 451 /* n.b.: pswapd*/ |
30990 | 452 " movq %%mm1, %%mm2\n\t" |
453 " movq %%mm5, %%mm6\n\t" | |
454 " psrlq $32, %%mm1\n\t" | |
455 " psrlq $32, %%mm5\n\t" | |
456 " punpckldq %%mm2, %%mm1\n\t" | |
457 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 458 /**/ |
30990 | 459 " pfadd %%mm1, %%mm0\n\t" |
460 " pfadd %%mm5, %%mm4\n\t" | |
461 " movq %%mm0, 64(%%ecx)\n\t" | |
462 " movq %%mm4, 80(%%ecx)\n\t" | |
463 " pfsub %%mm1, %%mm3\n\t" | |
464 " pfsubr %%mm5, %%mm7\n\t" | |
465 " pfmul 112(%%ebx), %%mm3\n\t" | |
466 " pfmul 112(%%ebx), %%mm7\n\t" | |
467 " movd %%mm3, 76(%%ecx)\n\t" | |
468 " movd %%mm7, 92(%%ecx)\n\t" | |
469 " psrlq $32, %%mm3\n\t" | |
470 " psrlq $32, %%mm7\n\t" | |
471 " movd %%mm3, 72(%%ecx)\n\t" | |
472 " movd %%mm7, 88(%%ecx)\n\t" | |
4148 | 473 |
30990 | 474 " movq 96(%%edx), %%mm0\n\t" |
475 " movq 112(%%edx), %%mm4\n\t" | |
476 " movq %%mm0, %%mm3\n\t" | |
477 " movq %%mm4, %%mm7\n\t" | |
478 " movq 104(%%edx), %%mm1\n\t" | |
479 " movq 120(%%edx), %%mm5\n\t" | |
4148 | 480 /* n.b.: pswapd*/ |
30990 | 481 " movq %%mm1, %%mm2\n\t" |
482 " movq %%mm5, %%mm6\n\t" | |
483 " psrlq $32, %%mm1\n\t" | |
484 " psrlq $32, %%mm5\n\t" | |
485 " punpckldq %%mm2, %%mm1\n\t" | |
486 " punpckldq %%mm6, %%mm5\n\t" | |
4148 | 487 /**/ |
30990 | 488 " pfadd %%mm1, %%mm0\n\t" |
489 " pfadd %%mm5, %%mm4\n\t" | |
490 " movq %%mm0, 96(%%ecx)\n\t" | |
491 " movq %%mm4, 112(%%ecx)\n\t" | |
492 " pfsub %%mm1, %%mm3\n\t" | |
493 " pfsubr %%mm5, %%mm7\n\t" | |
494 " pfmul 112(%%ebx), %%mm3\n\t" | |
495 " pfmul 112(%%ebx), %%mm7\n\t" | |
496 " movd %%mm3, 108(%%ecx)\n\t" | |
497 " movd %%mm7, 124(%%ecx)\n\t" | |
498 " psrlq $32, %%mm3\n\t" | |
499 " psrlq $32, %%mm7\n\t" | |
500 " movd %%mm3, 104(%%ecx)\n\t" | |
501 " movd %%mm7, 120(%%ecx)\n\t" | |
4148 | 502 |
503 /* Phase 6. This is the end of easy road. */ | |
504 /* Code below is coded in scalar mode. Should be optimized */ | |
505 | |
30990 | 506 " movd "MANGLE(plus_1f)", %%mm6\n\t" |
507 " punpckldq 120(%%ebx), %%mm6\n\t" /* mm6 = 1.0 | 120(%%ebx)*/ | |
508 " movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */ | |
4148 | 509 |
30990 | 510 " movq 32(%%ecx), %%mm0\n\t" |
511 " movq 64(%%ecx), %%mm2\n\t" | |
512 " movq %%mm0, %%mm1\n\t" | |
513 " movq %%mm2, %%mm3\n\t" | |
514 " pxor %%mm7, %%mm1\n\t" | |
515 " pxor %%mm7, %%mm3\n\t" | |
516 " pfacc %%mm1, %%mm0\n\t" | |
517 " pfacc %%mm3, %%mm2\n\t" | |
518 " pfmul %%mm6, %%mm0\n\t" | |
519 " pfmul %%mm6, %%mm2\n\t" | |
520 " movq %%mm0, 32(%%edx)\n\t" | |
521 " movq %%mm2, 64(%%edx)\n\t" | |
4148 | 522 |
30990 | 523 " movd 44(%%ecx), %%mm0\n\t" |
524 " movd 40(%%ecx), %%mm2\n\t" | |
525 " movd 120(%%ebx), %%mm3\n\t" | |
526 " punpckldq 76(%%ecx), %%mm0\n\t" | |
527 " punpckldq 72(%%ecx), %%mm2\n\t" | |
528 " punpckldq %%mm3, %%mm3\n\t" | |
529 " movq %%mm0, %%mm4\n\t" | |
530 " movq %%mm2, %%mm5\n\t" | |
531 " pfsub %%mm2, %%mm0\n\t" | |
532 " pfmul %%mm3, %%mm0\n\t" | |
533 " movq %%mm0, %%mm1\n\t" | |
534 " pfadd %%mm5, %%mm0\n\t" | |
535 " pfadd %%mm4, %%mm0\n\t" | |
536 " movq %%mm0, %%mm2\n\t" | |
537 " punpckldq %%mm1, %%mm0\n\t" | |
538 " punpckhdq %%mm1, %%mm2\n\t" | |
539 " movq %%mm0, 40(%%edx)\n\t" | |
540 " movq %%mm2, 72(%%edx)\n\t" | |
4148 | 541 |
30990 | 542 " movd 48(%%ecx), %%mm3\n\t" |
543 " movd 60(%%ecx), %%mm2\n\t" | |
544 " pfsub 52(%%ecx), %%mm3\n\t" | |
545 " pfsub 56(%%ecx), %%mm2\n\t" | |
546 " pfmul 120(%%ebx), %%mm3\n\t" | |
547 " pfmul 120(%%ebx), %%mm2\n\t" | |
548 " movq %%mm2, %%mm1\n\t" | |
4148 | 549 |
30990 | 550 " pfadd 56(%%ecx), %%mm1\n\t" |
551 " pfadd 60(%%ecx), %%mm1\n\t" | |
552 " movq %%mm1, %%mm0\n\t" | |
4148 | 553 |
30990 | 554 " pfadd 48(%%ecx), %%mm0\n\t" |
555 " pfadd 52(%%ecx), %%mm0\n\t" | |
556 " pfadd %%mm3, %%mm1\n\t" | |
557 " punpckldq %%mm2, %%mm1\n\t" | |
558 " pfadd %%mm3, %%mm2\n\t" | |
559 " punpckldq %%mm2, %%mm0\n\t" | |
560 " movq %%mm1, 56(%%edx)\n\t" | |
561 " movq %%mm0, 48(%%edx)\n\t" | |
4148 | 562 |
563 /*---*/ | |
564 | |
30990 | 565 " movd 92(%%ecx), %%mm1\n\t" |
566 " pfsub 88(%%ecx), %%mm1\n\t" | |
567 " pfmul 120(%%ebx), %%mm1\n\t" | |
568 " movd %%mm1, 92(%%edx)\n\t" | |
569 " pfadd 92(%%ecx), %%mm1\n\t" | |
570 " pfadd 88(%%ecx), %%mm1\n\t" | |
571 " movq %%mm1, %%mm0\n\t" | |
4148 | 572 |
30990 | 573 " pfadd 80(%%ecx), %%mm0\n\t" |
574 " pfadd 84(%%ecx), %%mm0\n\t" | |
575 " movd %%mm0, 80(%%edx)\n\t" | |
4148 | 576 |
30990 | 577 " movd 80(%%ecx), %%mm0\n\t" |
578 " pfsub 84(%%ecx), %%mm0\n\t" | |
579 " pfmul 120(%%ebx), %%mm0\n\t" | |
580 " pfadd %%mm0, %%mm1\n\t" | |
581 " pfadd 92(%%edx), %%mm0\n\t" | |
582 " punpckldq %%mm1, %%mm0\n\t" | |
583 " movq %%mm0, 84(%%edx)\n\t" | |
4148 | 584 |
30990 | 585 " movq 96(%%ecx), %%mm0\n\t" |
586 " movq %%mm0, %%mm1\n\t" | |
587 " pxor %%mm7, %%mm1\n\t" | |
588 " pfacc %%mm1, %%mm0\n\t" | |
589 " pfmul %%mm6, %%mm0\n\t" | |
590 " movq %%mm0, 96(%%edx)\n\t" | |
4148 | 591 |
30990 | 592 " movd 108(%%ecx), %%mm0\n\t" |
593 " pfsub 104(%%ecx), %%mm0\n\t" | |
594 " pfmul 120(%%ebx), %%mm0\n\t" | |
595 " movd %%mm0, 108(%%edx)\n\t" | |
596 " pfadd 104(%%ecx), %%mm0\n\t" | |
597 " pfadd 108(%%ecx), %%mm0\n\t" | |
598 " movd %%mm0, 104(%%edx)\n\t" | |
4148 | 599 |
30990 | 600 " movd 124(%%ecx), %%mm1\n\t" |
601 " pfsub 120(%%ecx), %%mm1\n\t" | |
602 " pfmul 120(%%ebx), %%mm1\n\t" | |
603 " movd %%mm1, 124(%%edx)\n\t" | |
604 " pfadd 120(%%ecx), %%mm1\n\t" | |
605 " pfadd 124(%%ecx), %%mm1\n\t" | |
606 " movq %%mm1, %%mm0\n\t" | |
4148 | 607 |
30990 | 608 " pfadd 112(%%ecx), %%mm0\n\t" |
609 " pfadd 116(%%ecx), %%mm0\n\t" | |
610 " movd %%mm0, 112(%%edx)\n\t" | |
4148 | 611 |
30990 | 612 " movd 112(%%ecx), %%mm0\n\t" |
613 " pfsub 116(%%ecx), %%mm0\n\t" | |
614 " pfmul 120(%%ebx), %%mm0\n\t" | |
615 " pfadd %%mm0,%%mm1\n\t" | |
616 " pfadd 124(%%edx), %%mm0\n\t" | |
617 " punpckldq %%mm1, %%mm0\n\t" | |
618 " movq %%mm0, 116(%%edx)\n\t" | |
4148 | 619 |
20504
27fb949fffa9
disable nonworking/broken code for now till I find out what it is supposed to do.
reimar
parents:
18941
diff
changeset
|
620 // this code is broken, there is nothing modifying the z flag above. |
27fb949fffa9
disable nonworking/broken code for now till I find out what it is supposed to do.
reimar
parents:
18941
diff
changeset
|
621 #if 0 |
30990 | 622 " jnz .L01\n\t" |
4148 | 623 |
624 /* Phase 7*/ | |
625 /* Code below is coded in scalar mode. Should be optimized */ | |
626 | |
30990 | 627 " movd (%%ecx), %%mm0\n\t" |
628 " pfadd 4(%%ecx), %%mm0\n\t" | |
629 " movd %%mm0, 1024(%%esi)\n\t" | |
4148 | 630 |
30990 | 631 " movd (%%ecx), %%mm0\n\t" |
632 " pfsub 4(%%ecx), %%mm0\n\t" | |
633 " pfmul 120(%%ebx), %%mm0\n\t" | |
634 " movd %%mm0, (%%esi)\n\t" | |
635 " movd %%mm0, (%%edi)\n\t" | |
4148 | 636 |
30990 | 637 " movd 12(%%ecx), %%mm0\n\t" |
638 " pfsub 8(%%ecx), %%mm0\n\t" | |
639 " pfmul 120(%%ebx), %%mm0\n\t" | |
640 " movd %%mm0, 512(%%edi)\n\t" | |
641 " pfadd 12(%%ecx), %%mm0\n\t" | |
642 " pfadd 8(%%ecx), %%mm0\n\t" | |
643 " movd %%mm0, 512(%%esi)\n\t" | |
4148 | 644 |
30990 | 645 " movd 16(%%ecx), %%mm0\n\t" |
646 " pfsub 20(%%ecx), %%mm0\n\t" | |
647 " pfmul 120(%%ebx), %%mm0\n\t" | |
648 " movq %%mm0, %%mm3\n\t" | |
4148 | 649 |
30990 | 650 " movd 28(%%ecx), %%mm0\n\t" |
651 " pfsub 24(%%ecx), %%mm0\n\t" | |
652 " pfmul 120(%%ebx), %%mm0\n\t" | |
653 " movd %%mm0, 768(%%edi)\n\t" | |
654 " movq %%mm0, %%mm2\n\t" | |
4148 | 655 |
30990 | 656 " pfadd 24(%%ecx), %%mm0\n\t" |
657 " pfadd 28(%%ecx), %%mm0\n\t" | |
658 " movq %%mm0, %%mm1\n\t" | |
4148 | 659 |
30990 | 660 " pfadd 16(%%ecx), %%mm0\n\t" |
661 " pfadd 20(%%ecx), %%mm0\n\t" | |
662 " movd %%mm0, 768(%%esi)\n\t" | |
663 " pfadd %%mm3, %%mm1\n\t" | |
664 " movd %%mm1, 256(%%esi)\n\t" | |
665 " pfadd %%mm3, %%mm2\n\t" | |
666 " movd %%mm2, 256(%%edi)\n\t" | |
4148 | 667 |
668 /* Phase 8*/ | |
669 | |
30990 | 670 " movq 32(%%edx), %%mm0\n\t" |
671 " movq 48(%%edx), %%mm1\n\t" | |
672 " pfadd 48(%%edx), %%mm0\n\t" | |
673 " pfadd 40(%%edx), %%mm1\n\t" | |
674 " movd %%mm0, 896(%%esi)\n\t" | |
675 " movd %%mm1, 640(%%esi)\n\t" | |
676 " psrlq $32, %%mm0\n\t" | |
677 " psrlq $32, %%mm1\n\t" | |
678 " movd %%mm0, 128(%%edi)\n\t" | |
679 " movd %%mm1, 384(%%edi)\n\t" | |
4148 | 680 |
30990 | 681 " movd 40(%%edx), %%mm0\n\t" |
682 " pfadd 56(%%edx), %%mm0\n\t" | |
683 " movd %%mm0, 384(%%esi)\n\t" | |
4148 | 684 |
30990 | 685 " movd 56(%%edx), %%mm0\n\t" |
686 " pfadd 36(%%edx), %%mm0\n\t" | |
687 " movd %%mm0, 128(%%esi)\n\t" | |
4148 | 688 |
30990 | 689 " movd 60(%%edx), %%mm0\n\t" |
690 " movd %%mm0, 896(%%edi)\n\t" | |
691 " pfadd 44(%%edx), %%mm0\n\t" | |
692 " movd %%mm0, 640(%%edi)\n\t" | |
4148 | 693 |
30990 | 694 " movq 96(%%edx), %%mm0\n\t" |
695 " movq 112(%%edx), %%mm2\n\t" | |
696 " movq 104(%%edx), %%mm4\n\t" | |
697 " pfadd 112(%%edx), %%mm0\n\t" | |
698 " pfadd 104(%%edx), %%mm2\n\t" | |
699 " pfadd 120(%%edx), %%mm4\n\t" | |
700 " movq %%mm0, %%mm1\n\t" | |
701 " movq %%mm2, %%mm3\n\t" | |
702 " movq %%mm4, %%mm5\n\t" | |
703 " pfadd 64(%%edx), %%mm0\n\t" | |
704 " pfadd 80(%%edx), %%mm2\n\t" | |
705 " pfadd 72(%%edx), %%mm4\n\t" | |
706 " movd %%mm0, 960(%%esi)\n\t" | |
707 " movd %%mm2, 704(%%esi)\n\t" | |
708 " movd %%mm4, 448(%%esi)\n\t" | |
709 " psrlq $32, %%mm0\n\t" | |
710 " psrlq $32, %%mm2\n\t" | |
711 " psrlq $32, %%mm4\n\t" | |
712 " movd %%mm0, 64(%%edi)\n\t" | |
713 " movd %%mm2, 320(%%edi)\n\t" | |
714 " movd %%mm4, 576(%%edi)\n\t" | |
715 " pfadd 80(%%edx), %%mm1\n\t" | |
716 " pfadd 72(%%edx), %%mm3\n\t" | |
717 " pfadd 88(%%edx), %%mm5\n\t" | |
718 " movd %%mm1, 832(%%esi)\n\t" | |
719 " movd %%mm3, 576(%%esi)\n\t" | |
720 " movd %%mm5, 320(%%esi)\n\t" | |
721 " psrlq $32, %%mm1\n\t" | |
722 " psrlq $32, %%mm3\n\t" | |
723 " psrlq $32, %%mm5\n\t" | |
724 " movd %%mm1, 192(%%edi)\n\t" | |
725 " movd %%mm3, 448(%%edi)\n\t" | |
726 " movd %%mm5, 704(%%edi)\n\t" | |
4148 | 727 |
30990 | 728 " movd 120(%%edx), %%mm0\n\t" |
729 " pfadd 100(%%edx), %%mm0\n\t" | |
730 " movq %%mm0, %%mm1\n\t" | |
731 " pfadd 88(%%edx), %%mm0\n\t" | |
732 " movd %%mm0, 192(%%esi)\n\t" | |
733 " pfadd 68(%%edx), %%mm1\n\t" | |
734 " movd %%mm1, 64(%%esi)\n\t" | |
4148 | 735 |
30990 | 736 " movd 124(%%edx), %%mm0\n\t" |
737 " movd %%mm0, 960(%%edi)\n\t" | |
738 " pfadd 92(%%edx), %%mm0\n\t" | |
739 " movd %%mm0, 832(%%edi)\n\t" | |
4148 | 740 |
30990 | 741 " jmp .L_bye\n\t" |
4148 | 742 ".L01:\n\t" |
20504
27fb949fffa9
disable nonworking/broken code for now till I find out what it is supposed to do.
reimar
parents:
18941
diff
changeset
|
743 #endif |
4148 | 744 /* Phase 9*/ |
745 | |
30990 | 746 " movq (%%ecx), %%mm0\n\t" |
747 " movq %%mm0, %%mm1\n\t" | |
748 " pxor %%mm7, %%mm1\n\t" | |
749 " pfacc %%mm1, %%mm0\n\t" | |
750 " pfmul %%mm6, %%mm0\n\t" | |
751 " pf2id %%mm0, %%mm0\n\t" | |
752 " packssdw %%mm0, %%mm0\n\t" | |
753 " movd %%mm0, %%eax\n\t" | |
754 " movw %%ax, 512(%%esi)\n\t" | |
755 " shrl $16, %%eax\n\t" | |
756 " movw %%ax, (%%esi)\n\t" | |
4148 | 757 |
30990 | 758 " movd 12(%%ecx), %%mm0\n\t" |
759 " pfsub 8(%%ecx), %%mm0\n\t" | |
760 " pfmul 120(%%ebx), %%mm0\n\t" | |
761 " pf2id %%mm0, %%mm7\n\t" | |
762 " packssdw %%mm7, %%mm7\n\t" | |
763 " movd %%mm7, %%eax\n\t" | |
764 " movw %%ax, 256(%%edi)\n\t" | |
765 " pfadd 12(%%ecx), %%mm0\n\t" | |
766 " pfadd 8(%%ecx), %%mm0\n\t" | |
767 " pf2id %%mm0, %%mm0\n\t" | |
768 " packssdw %%mm0, %%mm0\n\t" | |
769 " movd %%mm0, %%eax\n\t" | |
770 " movw %%ax, 256(%%esi)\n\t" | |
4148 | 771 |
30990 | 772 " movd 16(%%ecx), %%mm3\n\t" |
773 " pfsub 20(%%ecx), %%mm3\n\t" | |
774 " pfmul 120(%%ebx), %%mm3\n\t" | |
775 " movq %%mm3, %%mm2\n\t" | |
4148 | 776 |
30990 | 777 " movd 28(%%ecx), %%mm2\n\t" |
778 " pfsub 24(%%ecx), %%mm2\n\t" | |
779 " pfmul 120(%%ebx), %%mm2\n\t" | |
780 " movq %%mm2, %%mm1\n\t" | |
4148 | 781 |
30990 | 782 " pf2id %%mm2, %%mm7\n\t" |
783 " packssdw %%mm7, %%mm7\n\t" | |
784 " movd %%mm7, %%eax\n\t" | |
785 " movw %%ax, 384(%%edi)\n\t" | |
4148 | 786 |
30990 | 787 " pfadd 24(%%ecx), %%mm1\n\t" |
788 " pfadd 28(%%ecx), %%mm1\n\t" | |
789 " movq %%mm1, %%mm0\n\t" | |
4148 | 790 |
30990 | 791 " pfadd 16(%%ecx), %%mm0\n\t" |
792 " pfadd 20(%%ecx), %%mm0\n\t" | |
793 " pf2id %%mm0, %%mm0\n\t" | |
794 " packssdw %%mm0, %%mm0\n\t" | |
795 " movd %%mm0, %%eax\n\t" | |
796 " movw %%ax, 384(%%esi)\n\t" | |
797 " pfadd %%mm3, %%mm1\n\t" | |
798 " pf2id %%mm1, %%mm1\n\t" | |
799 " packssdw %%mm1, %%mm1\n\t" | |
800 " movd %%mm1, %%eax\n\t" | |
801 " movw %%ax, 128(%%esi)\n\t" | |
802 " pfadd %%mm3, %%mm2\n\t" | |
803 " pf2id %%mm2, %%mm2\n\t" | |
804 " packssdw %%mm2, %%mm2\n\t" | |
805 " movd %%mm2, %%eax\n\t" | |
806 " movw %%ax, 128(%%edi)\n\t" | |
4148 | 807 |
808 /* Phase 10*/ | |
809 | |
30990 | 810 " movq 32(%%edx), %%mm0\n\t" |
811 " movq 48(%%edx), %%mm1\n\t" | |
812 " pfadd 48(%%edx), %%mm0\n\t" | |
813 " pfadd 40(%%edx), %%mm1\n\t" | |
814 " pf2id %%mm0, %%mm0\n\t" | |
815 " pf2id %%mm1, %%mm1\n\t" | |
816 " packssdw %%mm0, %%mm0\n\t" | |
817 " packssdw %%mm1, %%mm1\n\t" | |
818 " movd %%mm0, %%eax\n\t" | |
819 " movd %%mm1, %%ecx\n\t" | |
820 " movw %%ax, 448(%%esi)\n\t" | |
821 " movw %%cx, 320(%%esi)\n\t" | |
822 " shrl $16, %%eax\n\t" | |
823 " shrl $16, %%ecx\n\t" | |
824 " movw %%ax, 64(%%edi)\n\t" | |
825 " movw %%cx, 192(%%edi)\n\t" | |
4148 | 826 |
30990 | 827 " movd 40(%%edx), %%mm3\n\t" |
828 " movd 56(%%edx), %%mm4\n\t" | |
829 " movd 60(%%edx), %%mm0\n\t" | |
830 " movd 44(%%edx), %%mm2\n\t" | |
831 " movd 120(%%edx), %%mm5\n\t" | |
832 " punpckldq %%mm4, %%mm3\n\t" | |
833 " punpckldq 124(%%edx), %%mm0\n\t" | |
834 " pfadd 100(%%edx), %%mm5\n\t" | |
835 " punpckldq 36(%%edx), %%mm4\n\t" | |
836 " punpckldq 92(%%edx), %%mm2\n\t" | |
837 " movq %%mm5, %%mm6\n\t" | |
838 " pfadd %%mm4, %%mm3\n\t" | |
839 " pf2id %%mm0, %%mm1\n\t" | |
840 " pf2id %%mm3, %%mm3\n\t" | |
841 " packssdw %%mm1, %%mm1\n\t" | |
842 " packssdw %%mm3, %%mm3\n\t" | |
843 " pfadd 88(%%edx), %%mm5\n\t" | |
844 " movd %%mm1, %%eax\n\t" | |
845 " movd %%mm3, %%ecx\n\t" | |
846 " movw %%ax, 448(%%edi)\n\t" | |
847 " movw %%cx, 192(%%esi)\n\t" | |
848 " pf2id %%mm5, %%mm5\n\t" | |
849 " packssdw %%mm5, %%mm5\n\t" | |
850 " shrl $16, %%eax\n\t" | |
851 " shrl $16, %%ecx\n\t" | |
852 " movd %%mm5, %%ebx\n\t" | |
853 " movw %%bx, 96(%%esi)\n\t" | |
854 " movw %%ax, 480(%%edi)\n\t" | |
855 " movw %%cx, 64(%%esi)\n\t" | |
856 " pfadd %%mm2, %%mm0\n\t" | |
857 " pf2id %%mm0, %%mm0\n\t" | |
858 " packssdw %%mm0, %%mm0\n\t" | |
859 " movd %%mm0, %%eax\n\t" | |
860 " pfadd 68(%%edx), %%mm6\n\t" | |
861 " movw %%ax, 320(%%edi)\n\t" | |
862 " shr $16, %%eax\n\t" | |
863 " pf2id %%mm6, %%mm6\n\t" | |
864 " packssdw %%mm6, %%mm6\n\t" | |
865 " movd %%mm6, %%ebx\n\t" | |
866 " movw %%ax, 416(%%edi)\n\t" | |
867 " movw %%bx, 32(%%esi)\n\t" | |
4148 | 868 |
30990 | 869 " movq 96(%%edx), %%mm0\n\t" |
870 " movq 112(%%edx), %%mm2\n\t" | |
871 " movq 104(%%edx), %%mm4\n\t" | |
872 " pfadd %%mm2, %%mm0\n\t" | |
873 " pfadd %%mm4, %%mm2\n\t" | |
874 " pfadd 120(%%edx), %%mm4\n\t" | |
875 " movq %%mm0, %%mm1\n\t" | |
876 " movq %%mm2, %%mm3\n\t" | |
877 " movq %%mm4, %%mm5\n\t" | |
878 " pfadd 64(%%edx), %%mm0\n\t" | |
879 " pfadd 80(%%edx), %%mm2\n\t" | |
880 " pfadd 72(%%edx), %%mm4\n\t" | |
881 " pf2id %%mm0, %%mm0\n\t" | |
882 " pf2id %%mm2, %%mm2\n\t" | |
883 " pf2id %%mm4, %%mm4\n\t" | |
884 " packssdw %%mm0, %%mm0\n\t" | |
885 " packssdw %%mm2, %%mm2\n\t" | |
886 " packssdw %%mm4, %%mm4\n\t" | |
887 " movd %%mm0, %%eax\n\t" | |
888 " movd %%mm2, %%ecx\n\t" | |
889 " movd %%mm4, %%ebx\n\t" | |
890 " movw %%ax, 480(%%esi)\n\t" | |
891 " movw %%cx, 352(%%esi)\n\t" | |
892 " movw %%bx, 224(%%esi)\n\t" | |
893 " shrl $16, %%eax\n\t" | |
894 " shrl $16, %%ecx\n\t" | |
895 " shrl $16, %%ebx\n\t" | |
896 " movw %%ax, 32(%%edi)\n\t" | |
897 " movw %%cx, 160(%%edi)\n\t" | |
898 " movw %%bx, 288(%%edi)\n\t" | |
899 " pfadd 80(%%edx), %%mm1\n\t" | |
900 " pfadd 72(%%edx), %%mm3\n\t" | |
901 " pfadd 88(%%edx), %%mm5\n\t" | |
902 " pf2id %%mm1, %%mm1\n\t" | |
903 " pf2id %%mm3, %%mm3\n\t" | |
904 " pf2id %%mm5, %%mm5\n\t" | |
905 " packssdw %%mm1, %%mm1\n\t" | |
906 " packssdw %%mm3, %%mm3\n\t" | |
907 " packssdw %%mm5, %%mm5\n\t" | |
908 " movd %%mm1, %%eax\n\t" | |
909 " movd %%mm3, %%ecx\n\t" | |
910 " movd %%mm5, %%ebx\n\t" | |
911 " movw %%ax, 416(%%esi)\n\t" | |
912 " movw %%cx, 288(%%esi)\n\t" | |
913 " movw %%bx, 160(%%esi)\n\t" | |
914 " shrl $16, %%eax\n\t" | |
915 " shrl $16, %%ecx\n\t" | |
916 " shrl $16, %%ebx\n\t" | |
917 " movw %%ax, 96(%%edi)\n\t" | |
918 " movw %%cx, 224(%%edi)\n\t" | |
919 " movw %%bx, 352(%%edi)\n\t" | |
4148 | 920 |
30990 | 921 " movsw\n\t" |
4148 | 922 |
923 ".L_bye:\n\t" | |
30990 | 924 " femms\n\t" |
925 : | |
926 :"m"(a),"m"(b),"m"(c),"m"(tmp[0]) | |
927 :"memory","%eax","%ebx","%ecx","%edx","%esi","%edi"); | |
5842
d6eab895c742
Avoid stdcall on cygwin, it causes undefined ref, code needs testing as I have no athlon or k6-2.
atmos4
parents:
5291
diff
changeset
|
928 } |