Mercurial > mplayer.hg
comparison mp3lib/dct64_3dnow.c @ 4148:3b29772a4fb2
S->C
author | nick |
---|---|
date | Mon, 14 Jan 2002 10:34:38 +0000 |
parents | |
children | 421969d55d5f |
comparison
equal
deleted
inserted
replaced
4147:4bbdda22003d | 4148:3b29772a4fb2 |
---|---|
1 /* | |
2 * This code was taken from http://www.mpg123.org | |
3 * See ChangeLog of mpg123-0.59s-pre.1 for detail | |
4 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> | |
5 * Partial 3dnow! optimization by Nick Kurshev | |
6 * | |
7 * TODO: optimize scalar 3dnow! code | |
8 * Warning: Phases 7 & 8 are not tested | |
9 */ | |
10 #define real float /* ugly - but only way */ | |
11 | |
12 static unsigned long long int __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL; | |
13 static float plus_1f = 1.0; | |
14 | |
15 void __attribute__ (( __stdcall__ )) dct64_MMX_3dnow(real *a,real *b,real *c) | |
16 { | |
17 char tmp[256]; | |
18 __asm __volatile( | |
19 " movl %2,%%eax\n\t" | |
20 | |
21 " leal 128+%3,%%edx\n\t" | |
22 " movl %0,%%esi\n\t" | |
23 " movl %1,%%edi\n\t" | |
24 " movl $costab_mmx,%%ebx\n\t" | |
25 " leal %3,%%ecx\n\t" | |
26 | |
27 /* Phase 1*/ | |
28 " movq (%%eax), %%mm0\n\t" | |
29 " movq 8(%%eax), %%mm4\n\t" | |
30 " movq %%mm0, %%mm3\n\t" | |
31 " movq %%mm4, %%mm7\n\t" | |
32 " movq 120(%%eax), %%mm1\n\t" | |
33 " movq 112(%%eax), %%mm5\n\t" | |
34 /* n.b.: pswapd*/ | |
35 " movq %%mm1, %%mm2\n\t" | |
36 " movq %%mm5, %%mm6\n\t" | |
37 " psrlq $32, %%mm1\n\t" | |
38 " psrlq $32, %%mm5\n\t" | |
39 " punpckldq %%mm2, %%mm1\n\t" | |
40 " punpckldq %%mm6, %%mm5\n\t" | |
41 /**/ | |
42 " pfadd %%mm1, %%mm0\n\t" | |
43 " pfadd %%mm5, %%mm4\n\t" | |
44 " movq %%mm0, (%%edx)\n\t" | |
45 " movq %%mm4, 8(%%edx)\n\t" | |
46 " pfsub %%mm1, %%mm3\n\t" | |
47 " pfsub %%mm5, %%mm7\n\t" | |
48 " pfmul (%%ebx), %%mm3\n\t" | |
49 " pfmul 8(%%ebx), %%mm7\n\t" | |
50 " movd %%mm3, 124(%%edx)\n\t" | |
51 " movd %%mm7, 116(%%edx)\n\t" | |
52 " psrlq $32, %%mm3\n\t" | |
53 " psrlq $32, %%mm7\n\t" | |
54 " movd %%mm3, 120(%%edx)\n\t" | |
55 " movd %%mm7, 112(%%edx)\n\t" | |
56 | |
57 " movq 16(%%eax), %%mm0\n\t" | |
58 " movq 24(%%eax), %%mm4\n\t" | |
59 " movq %%mm0, %%mm3\n\t" | |
60 " movq %%mm4, %%mm7\n\t" | |
61 " movq 104(%%eax), %%mm1\n\t" | |
62 " movq 96(%%eax), %%mm5\n\t" | |
63 /* n.b.: pswapd*/ | |
64 " movq %%mm1, %%mm2\n\t" | |
65 " movq %%mm5, %%mm6\n\t" | |
66 " psrlq $32, %%mm1\n\t" | |
67 " psrlq $32, %%mm5\n\t" | |
68 " punpckldq %%mm2, %%mm1\n\t" | |
69 " punpckldq %%mm6, %%mm5\n\t" | |
70 /**/ | |
71 " pfadd %%mm1, %%mm0\n\t" | |
72 " pfadd %%mm5, %%mm4\n\t" | |
73 " movq %%mm0, 16(%%edx)\n\t" | |
74 " movq %%mm4, 24(%%edx)\n\t" | |
75 " pfsub %%mm1, %%mm3\n\t" | |
76 " pfsub %%mm5, %%mm7\n\t" | |
77 " pfmul 16(%%ebx), %%mm3\n\t" | |
78 " pfmul 24(%%ebx), %%mm7\n\t" | |
79 " movd %%mm3, 108(%%edx)\n\t" | |
80 " movd %%mm7, 100(%%edx)\n\t" | |
81 " psrlq $32, %%mm3\n\t" | |
82 " psrlq $32, %%mm7\n\t" | |
83 " movd %%mm3, 104(%%edx)\n\t" | |
84 " movd %%mm7, 96(%%edx)\n\t" | |
85 | |
86 " movq 32(%%eax), %%mm0\n\t" | |
87 " movq 40(%%eax), %%mm4\n\t" | |
88 " movq %%mm0, %%mm3\n\t" | |
89 " movq %%mm4, %%mm7\n\t" | |
90 " movq 88(%%eax), %%mm1\n\t" | |
91 " movq 80(%%eax), %%mm5\n\t" | |
92 /* n.b.: pswapd*/ | |
93 " movq %%mm1, %%mm2\n\t" | |
94 " movq %%mm5, %%mm6\n\t" | |
95 " psrlq $32, %%mm1\n\t" | |
96 " psrlq $32, %%mm5\n\t" | |
97 " punpckldq %%mm2, %%mm1\n\t" | |
98 " punpckldq %%mm6, %%mm5\n\t" | |
99 /**/ | |
100 " pfadd %%mm1, %%mm0\n\t" | |
101 " pfadd %%mm5, %%mm4\n\t" | |
102 " movq %%mm0, 32(%%edx)\n\t" | |
103 " movq %%mm4, 40(%%edx)\n\t" | |
104 " pfsub %%mm1, %%mm3\n\t" | |
105 " pfsub %%mm5, %%mm7\n\t" | |
106 " pfmul 32(%%ebx), %%mm3\n\t" | |
107 " pfmul 40(%%ebx), %%mm7\n\t" | |
108 " movd %%mm3, 92(%%edx)\n\t" | |
109 " movd %%mm7, 84(%%edx)\n\t" | |
110 " psrlq $32, %%mm3\n\t" | |
111 " psrlq $32, %%mm7\n\t" | |
112 " movd %%mm3, 88(%%edx)\n\t" | |
113 " movd %%mm7, 80(%%edx)\n\t" | |
114 | |
115 " movq 48(%%eax), %%mm0\n\t" | |
116 " movq 56(%%eax), %%mm4\n\t" | |
117 " movq %%mm0, %%mm3\n\t" | |
118 " movq %%mm4, %%mm7\n\t" | |
119 " movq 72(%%eax), %%mm1\n\t" | |
120 " movq 64(%%eax), %%mm5\n\t" | |
121 /* n.b.: pswapd*/ | |
122 " movq %%mm1, %%mm2\n\t" | |
123 " movq %%mm5, %%mm6\n\t" | |
124 " psrlq $32, %%mm1\n\t" | |
125 " psrlq $32, %%mm5\n\t" | |
126 " punpckldq %%mm2, %%mm1\n\t" | |
127 " punpckldq %%mm6, %%mm5\n\t" | |
128 /**/ | |
129 " pfadd %%mm1, %%mm0\n\t" | |
130 " pfadd %%mm5, %%mm4\n\t" | |
131 " movq %%mm0, 48(%%edx)\n\t" | |
132 " movq %%mm4, 56(%%edx)\n\t" | |
133 " pfsub %%mm1, %%mm3\n\t" | |
134 " pfsub %%mm5, %%mm7\n\t" | |
135 " pfmul 48(%%ebx), %%mm3\n\t" | |
136 " pfmul 56(%%ebx), %%mm7\n\t" | |
137 " movd %%mm3, 76(%%edx)\n\t" | |
138 " movd %%mm7, 68(%%edx)\n\t" | |
139 " psrlq $32, %%mm3\n\t" | |
140 " psrlq $32, %%mm7\n\t" | |
141 " movd %%mm3, 72(%%edx)\n\t" | |
142 " movd %%mm7, 64(%%edx)\n\t" | |
143 | |
144 /* Phase 2*/ | |
145 | |
146 " movq (%%edx), %%mm0\n\t" | |
147 " movq 8(%%edx), %%mm4\n\t" | |
148 " movq %%mm0, %%mm3\n\t" | |
149 " movq %%mm4, %%mm7\n\t" | |
150 " movq 56(%%edx), %%mm1\n\t" | |
151 " movq 48(%%edx), %%mm5\n\t" | |
152 /* n.b.: pswapd*/ | |
153 " movq %%mm1, %%mm2\n\t" | |
154 " movq %%mm5, %%mm6\n\t" | |
155 " psrlq $32, %%mm1\n\t" | |
156 " psrlq $32, %%mm5\n\t" | |
157 " punpckldq %%mm2, %%mm1\n\t" | |
158 " punpckldq %%mm6, %%mm5\n\t" | |
159 /**/ | |
160 " pfadd %%mm1, %%mm0\n\t" | |
161 " pfadd %%mm5, %%mm4\n\t" | |
162 " movq %%mm0, (%%ecx)\n\t" | |
163 " movq %%mm4, 8(%%ecx)\n\t" | |
164 " pfsub %%mm1, %%mm3\n\t" | |
165 " pfsub %%mm5, %%mm7\n\t" | |
166 " pfmul 64(%%ebx), %%mm3\n\t" | |
167 " pfmul 72(%%ebx), %%mm7\n\t" | |
168 " movd %%mm3, 60(%%ecx)\n\t" | |
169 " movd %%mm7, 52(%%ecx)\n\t" | |
170 " psrlq $32, %%mm3\n\t" | |
171 " psrlq $32, %%mm7\n\t" | |
172 " movd %%mm3, 56(%%ecx)\n\t" | |
173 " movd %%mm7, 48(%%ecx)\n\t" | |
174 | |
175 " movq 16(%%edx), %%mm0\n\t" | |
176 " movq 24(%%edx), %%mm4\n\t" | |
177 " movq %%mm0, %%mm3\n\t" | |
178 " movq %%mm4, %%mm7\n\t" | |
179 " movq 40(%%edx), %%mm1\n\t" | |
180 " movq 32(%%edx), %%mm5\n\t" | |
181 /* n.b.: pswapd*/ | |
182 " movq %%mm1, %%mm2\n\t" | |
183 " movq %%mm5, %%mm6\n\t" | |
184 " psrlq $32, %%mm1\n\t" | |
185 " psrlq $32, %%mm5\n\t" | |
186 " punpckldq %%mm2, %%mm1\n\t" | |
187 " punpckldq %%mm6, %%mm5\n\t" | |
188 /**/ | |
189 " pfadd %%mm1, %%mm0\n\t" | |
190 " pfadd %%mm5, %%mm4\n\t" | |
191 " movq %%mm0, 16(%%ecx)\n\t" | |
192 " movq %%mm4, 24(%%ecx)\n\t" | |
193 " pfsub %%mm1, %%mm3\n\t" | |
194 " pfsub %%mm5, %%mm7\n\t" | |
195 " pfmul 80(%%ebx), %%mm3\n\t" | |
196 " pfmul 88(%%ebx), %%mm7\n\t" | |
197 " movd %%mm3, 44(%%ecx)\n\t" | |
198 " movd %%mm7, 36(%%ecx)\n\t" | |
199 " psrlq $32, %%mm3\n\t" | |
200 " psrlq $32, %%mm7\n\t" | |
201 " movd %%mm3, 40(%%ecx)\n\t" | |
202 " movd %%mm7, 32(%%ecx)\n\t" | |
203 | |
204 /* Phase 3*/ | |
205 | |
206 " movq 64(%%edx), %%mm0\n\t" | |
207 " movq 72(%%edx), %%mm4\n\t" | |
208 " movq %%mm0, %%mm3\n\t" | |
209 " movq %%mm4, %%mm7\n\t" | |
210 " movq 120(%%edx), %%mm1\n\t" | |
211 " movq 112(%%edx), %%mm5\n\t" | |
212 /* n.b.: pswapd*/ | |
213 " movq %%mm1, %%mm2\n\t" | |
214 " movq %%mm5, %%mm6\n\t" | |
215 " psrlq $32, %%mm1\n\t" | |
216 " psrlq $32, %%mm5\n\t" | |
217 " punpckldq %%mm2, %%mm1\n\t" | |
218 " punpckldq %%mm6, %%mm5\n\t" | |
219 /**/ | |
220 " pfadd %%mm1, %%mm0\n\t" | |
221 " pfadd %%mm5, %%mm4\n\t" | |
222 " movq %%mm0, 64(%%ecx)\n\t" | |
223 " movq %%mm4, 72(%%ecx)\n\t" | |
224 " pfsubr %%mm1, %%mm3\n\t" | |
225 " pfsubr %%mm5, %%mm7\n\t" | |
226 " pfmul 64(%%ebx), %%mm3\n\t" | |
227 " pfmul 72(%%ebx), %%mm7\n\t" | |
228 " movd %%mm3, 124(%%ecx)\n\t" | |
229 " movd %%mm7, 116(%%ecx)\n\t" | |
230 " psrlq $32, %%mm3\n\t" | |
231 " psrlq $32, %%mm7\n\t" | |
232 " movd %%mm3, 120(%%ecx)\n\t" | |
233 " movd %%mm7, 112(%%ecx)\n\t" | |
234 | |
235 " movq 80(%%edx), %%mm0\n\t" | |
236 " movq 88(%%edx), %%mm4\n\t" | |
237 " movq %%mm0, %%mm3\n\t" | |
238 " movq %%mm4, %%mm7\n\t" | |
239 " movq 104(%%edx), %%mm1\n\t" | |
240 " movq 96(%%edx), %%mm5\n\t" | |
241 /* n.b.: pswapd*/ | |
242 " movq %%mm1, %%mm2\n\t" | |
243 " movq %%mm5, %%mm6\n\t" | |
244 " psrlq $32, %%mm1\n\t" | |
245 " psrlq $32, %%mm5\n\t" | |
246 " punpckldq %%mm2, %%mm1\n\t" | |
247 " punpckldq %%mm6, %%mm5\n\t" | |
248 /**/ | |
249 " pfadd %%mm1, %%mm0\n\t" | |
250 " pfadd %%mm5, %%mm4\n\t" | |
251 " movq %%mm0, 80(%%ecx)\n\t" | |
252 " movq %%mm4, 88(%%ecx)\n\t" | |
253 " pfsubr %%mm1, %%mm3\n\t" | |
254 " pfsubr %%mm5, %%mm7\n\t" | |
255 " pfmul 80(%%ebx), %%mm3\n\t" | |
256 " pfmul 88(%%ebx), %%mm7\n\t" | |
257 " movd %%mm3, 108(%%ecx)\n\t" | |
258 " movd %%mm7, 100(%%ecx)\n\t" | |
259 " psrlq $32, %%mm3\n\t" | |
260 " psrlq $32, %%mm7\n\t" | |
261 " movd %%mm3, 104(%%ecx)\n\t" | |
262 " movd %%mm7, 96(%%ecx)\n\t" | |
263 | |
264 /* Phase 4*/ | |
265 | |
266 " movq (%%ecx), %%mm0\n\t" | |
267 " movq 8(%%ecx), %%mm4\n\t" | |
268 " movq %%mm0, %%mm3\n\t" | |
269 " movq %%mm4, %%mm7\n\t" | |
270 " movq 24(%%ecx), %%mm1\n\t" | |
271 " movq 16(%%ecx), %%mm5\n\t" | |
272 /* n.b.: pswapd*/ | |
273 " movq %%mm1, %%mm2\n\t" | |
274 " movq %%mm5, %%mm6\n\t" | |
275 " psrlq $32, %%mm1\n\t" | |
276 " psrlq $32, %%mm5\n\t" | |
277 " punpckldq %%mm2, %%mm1\n\t" | |
278 " punpckldq %%mm6, %%mm5\n\t" | |
279 /**/ | |
280 " pfadd %%mm1, %%mm0\n\t" | |
281 " pfadd %%mm5, %%mm4\n\t" | |
282 " movq %%mm0, (%%edx)\n\t" | |
283 " movq %%mm4, 8(%%edx)\n\t" | |
284 " pfsub %%mm1, %%mm3\n\t" | |
285 " pfsub %%mm5, %%mm7\n\t" | |
286 " pfmul 96(%%ebx), %%mm3\n\t" | |
287 " pfmul 104(%%ebx), %%mm7\n\t" | |
288 " movd %%mm3, 28(%%edx)\n\t" | |
289 " movd %%mm7, 20(%%edx)\n\t" | |
290 " psrlq $32, %%mm3\n\t" | |
291 " psrlq $32, %%mm7\n\t" | |
292 " movd %%mm3, 24(%%edx)\n\t" | |
293 " movd %%mm7, 16(%%edx)\n\t" | |
294 | |
295 " movq 32(%%ecx), %%mm0\n\t" | |
296 " movq 40(%%ecx), %%mm4\n\t" | |
297 " movq %%mm0, %%mm3\n\t" | |
298 " movq %%mm4, %%mm7\n\t" | |
299 " movq 56(%%ecx), %%mm1\n\t" | |
300 " movq 48(%%ecx), %%mm5\n\t" | |
301 /* n.b.: pswapd*/ | |
302 " movq %%mm1, %%mm2\n\t" | |
303 " movq %%mm5, %%mm6\n\t" | |
304 " psrlq $32, %%mm1\n\t" | |
305 " psrlq $32, %%mm5\n\t" | |
306 " punpckldq %%mm2, %%mm1\n\t" | |
307 " punpckldq %%mm6, %%mm5\n\t" | |
308 /**/ | |
309 " pfadd %%mm1, %%mm0\n\t" | |
310 " pfadd %%mm5, %%mm4\n\t" | |
311 " movq %%mm0, 32(%%edx)\n\t" | |
312 " movq %%mm4, 40(%%edx)\n\t" | |
313 " pfsubr %%mm1, %%mm3\n\t" | |
314 " pfsubr %%mm5, %%mm7\n\t" | |
315 " pfmul 96(%%ebx), %%mm3\n\t" | |
316 " pfmul 104(%%ebx), %%mm7\n\t" | |
317 " movd %%mm3, 60(%%edx)\n\t" | |
318 " movd %%mm7, 52(%%edx)\n\t" | |
319 " psrlq $32, %%mm3\n\t" | |
320 " psrlq $32, %%mm7\n\t" | |
321 " movd %%mm3, 56(%%edx)\n\t" | |
322 " movd %%mm7, 48(%%edx)\n\t" | |
323 | |
324 " movq 64(%%ecx), %%mm0\n\t" | |
325 " movq 72(%%ecx), %%mm4\n\t" | |
326 " movq %%mm0, %%mm3\n\t" | |
327 " movq %%mm4, %%mm7\n\t" | |
328 " movq 88(%%ecx), %%mm1\n\t" | |
329 " movq 80(%%ecx), %%mm5\n\t" | |
330 /* n.b.: pswapd*/ | |
331 " movq %%mm1, %%mm2\n\t" | |
332 " movq %%mm5, %%mm6\n\t" | |
333 " psrlq $32, %%mm1\n\t" | |
334 " psrlq $32, %%mm5\n\t" | |
335 " punpckldq %%mm2, %%mm1\n\t" | |
336 " punpckldq %%mm6, %%mm5\n\t" | |
337 /**/ | |
338 " pfadd %%mm1, %%mm0\n\t" | |
339 " pfadd %%mm5, %%mm4\n\t" | |
340 " movq %%mm0, 64(%%edx)\n\t" | |
341 " movq %%mm4, 72(%%edx)\n\t" | |
342 " pfsub %%mm1, %%mm3\n\t" | |
343 " pfsub %%mm5, %%mm7\n\t" | |
344 " pfmul 96(%%ebx), %%mm3\n\t" | |
345 " pfmul 104(%%ebx), %%mm7\n\t" | |
346 " movd %%mm3, 92(%%edx)\n\t" | |
347 " movd %%mm7, 84(%%edx)\n\t" | |
348 " psrlq $32, %%mm3\n\t" | |
349 " psrlq $32, %%mm7\n\t" | |
350 " movd %%mm3, 88(%%edx)\n\t" | |
351 " movd %%mm7, 80(%%edx)\n\t" | |
352 | |
353 " movq 96(%%ecx), %%mm0\n\t" | |
354 " movq 104(%%ecx), %%mm4\n\t" | |
355 " movq %%mm0, %%mm3\n\t" | |
356 " movq %%mm4, %%mm7\n\t" | |
357 " movq 120(%%ecx), %%mm1\n\t" | |
358 " movq 112(%%ecx), %%mm5\n\t" | |
359 /* n.b.: pswapd*/ | |
360 " movq %%mm1, %%mm2\n\t" | |
361 " movq %%mm5, %%mm6\n\t" | |
362 " psrlq $32, %%mm1\n\t" | |
363 " psrlq $32, %%mm5\n\t" | |
364 " punpckldq %%mm2, %%mm1\n\t" | |
365 " punpckldq %%mm6, %%mm5\n\t" | |
366 /**/ | |
367 " pfadd %%mm1, %%mm0\n\t" | |
368 " pfadd %%mm5, %%mm4\n\t" | |
369 " movq %%mm0, 96(%%edx)\n\t" | |
370 " movq %%mm4, 104(%%edx)\n\t" | |
371 " pfsubr %%mm1, %%mm3\n\t" | |
372 " pfsubr %%mm5, %%mm7\n\t" | |
373 " pfmul 96(%%ebx), %%mm3\n\t" | |
374 " pfmul 104(%%ebx), %%mm7\n\t" | |
375 " movd %%mm3, 124(%%edx)\n\t" | |
376 " movd %%mm7, 116(%%edx)\n\t" | |
377 " psrlq $32, %%mm3\n\t" | |
378 " psrlq $32, %%mm7\n\t" | |
379 " movd %%mm3, 120(%%edx)\n\t" | |
380 " movd %%mm7, 112(%%edx)\n\t" | |
381 | |
382 /* Phase 5 */ | |
383 | |
384 " movq (%%edx), %%mm0\n\t" | |
385 " movq 16(%%edx), %%mm4\n\t" | |
386 " movq %%mm0, %%mm3\n\t" | |
387 " movq %%mm4, %%mm7\n\t" | |
388 " movq 8(%%edx), %%mm1\n\t" | |
389 " movq 24(%%edx), %%mm5\n\t" | |
390 /* n.b.: pswapd*/ | |
391 " movq %%mm1, %%mm2\n\t" | |
392 " movq %%mm5, %%mm6\n\t" | |
393 " psrlq $32, %%mm1\n\t" | |
394 " psrlq $32, %%mm5\n\t" | |
395 " punpckldq %%mm2, %%mm1\n\t" | |
396 " punpckldq %%mm6, %%mm5\n\t" | |
397 /**/ | |
398 " pfadd %%mm1, %%mm0\n\t" | |
399 " pfadd %%mm5, %%mm4\n\t" | |
400 " movq %%mm0, (%%ecx)\n\t" | |
401 " movq %%mm4, 16(%%ecx)\n\t" | |
402 " pfsub %%mm1, %%mm3\n\t" | |
403 " pfsubr %%mm5, %%mm7\n\t" | |
404 " pfmul 112(%%ebx), %%mm3\n\t" | |
405 " pfmul 112(%%ebx), %%mm7\n\t" | |
406 " movd %%mm3, 12(%%ecx)\n\t" | |
407 " movd %%mm7, 28(%%ecx)\n\t" | |
408 " psrlq $32, %%mm3\n\t" | |
409 " psrlq $32, %%mm7\n\t" | |
410 " movd %%mm3, 8(%%ecx)\n\t" | |
411 " movd %%mm7, 24(%%ecx)\n\t" | |
412 | |
413 " movq 32(%%edx), %%mm0\n\t" | |
414 " movq 48(%%edx), %%mm4\n\t" | |
415 " movq %%mm0, %%mm3\n\t" | |
416 " movq %%mm4, %%mm7\n\t" | |
417 " movq 40(%%edx), %%mm1\n\t" | |
418 " movq 56(%%edx), %%mm5\n\t" | |
419 /* n.b.: pswapd*/ | |
420 " movq %%mm1, %%mm2\n\t" | |
421 " movq %%mm5, %%mm6\n\t" | |
422 " psrlq $32, %%mm1\n\t" | |
423 " psrlq $32, %%mm5\n\t" | |
424 " punpckldq %%mm2, %%mm1\n\t" | |
425 " punpckldq %%mm6, %%mm5\n\t" | |
426 /**/ | |
427 " pfadd %%mm1, %%mm0\n\t" | |
428 " pfadd %%mm5, %%mm4\n\t" | |
429 " movq %%mm0, 32(%%ecx)\n\t" | |
430 " movq %%mm4, 48(%%ecx)\n\t" | |
431 " pfsub %%mm1, %%mm3\n\t" | |
432 " pfsubr %%mm5, %%mm7\n\t" | |
433 " pfmul 112(%%ebx), %%mm3\n\t" | |
434 " pfmul 112(%%ebx), %%mm7\n\t" | |
435 " movd %%mm3, 44(%%ecx)\n\t" | |
436 " movd %%mm7, 60(%%ecx)\n\t" | |
437 " psrlq $32, %%mm3\n\t" | |
438 " psrlq $32, %%mm7\n\t" | |
439 " movd %%mm3, 40(%%ecx)\n\t" | |
440 " movd %%mm7, 56(%%ecx)\n\t" | |
441 | |
442 " movq 64(%%edx), %%mm0\n\t" | |
443 " movq 80(%%edx), %%mm4\n\t" | |
444 " movq %%mm0, %%mm3\n\t" | |
445 " movq %%mm4, %%mm7\n\t" | |
446 " movq 72(%%edx), %%mm1\n\t" | |
447 " movq 88(%%edx), %%mm5\n\t" | |
448 /* n.b.: pswapd*/ | |
449 " movq %%mm1, %%mm2\n\t" | |
450 " movq %%mm5, %%mm6\n\t" | |
451 " psrlq $32, %%mm1\n\t" | |
452 " psrlq $32, %%mm5\n\t" | |
453 " punpckldq %%mm2, %%mm1\n\t" | |
454 " punpckldq %%mm6, %%mm5\n\t" | |
455 /**/ | |
456 " pfadd %%mm1, %%mm0\n\t" | |
457 " pfadd %%mm5, %%mm4\n\t" | |
458 " movq %%mm0, 64(%%ecx)\n\t" | |
459 " movq %%mm4, 80(%%ecx)\n\t" | |
460 " pfsub %%mm1, %%mm3\n\t" | |
461 " pfsubr %%mm5, %%mm7\n\t" | |
462 " pfmul 112(%%ebx), %%mm3\n\t" | |
463 " pfmul 112(%%ebx), %%mm7\n\t" | |
464 " movd %%mm3, 76(%%ecx)\n\t" | |
465 " movd %%mm7, 92(%%ecx)\n\t" | |
466 " psrlq $32, %%mm3\n\t" | |
467 " psrlq $32, %%mm7\n\t" | |
468 " movd %%mm3, 72(%%ecx)\n\t" | |
469 " movd %%mm7, 88(%%ecx)\n\t" | |
470 | |
471 " movq 96(%%edx), %%mm0\n\t" | |
472 " movq 112(%%edx), %%mm4\n\t" | |
473 " movq %%mm0, %%mm3\n\t" | |
474 " movq %%mm4, %%mm7\n\t" | |
475 " movq 104(%%edx), %%mm1\n\t" | |
476 " movq 120(%%edx), %%mm5\n\t" | |
477 /* n.b.: pswapd*/ | |
478 " movq %%mm1, %%mm2\n\t" | |
479 " movq %%mm5, %%mm6\n\t" | |
480 " psrlq $32, %%mm1\n\t" | |
481 " psrlq $32, %%mm5\n\t" | |
482 " punpckldq %%mm2, %%mm1\n\t" | |
483 " punpckldq %%mm6, %%mm5\n\t" | |
484 /**/ | |
485 " pfadd %%mm1, %%mm0\n\t" | |
486 " pfadd %%mm5, %%mm4\n\t" | |
487 " movq %%mm0, 96(%%ecx)\n\t" | |
488 " movq %%mm4, 112(%%ecx)\n\t" | |
489 " pfsub %%mm1, %%mm3\n\t" | |
490 " pfsubr %%mm5, %%mm7\n\t" | |
491 " pfmul 112(%%ebx), %%mm3\n\t" | |
492 " pfmul 112(%%ebx), %%mm7\n\t" | |
493 " movd %%mm3, 108(%%ecx)\n\t" | |
494 " movd %%mm7, 124(%%ecx)\n\t" | |
495 " psrlq $32, %%mm3\n\t" | |
496 " psrlq $32, %%mm7\n\t" | |
497 " movd %%mm3, 104(%%ecx)\n\t" | |
498 " movd %%mm7, 120(%%ecx)\n\t" | |
499 | |
500 /* Phase 6. This is the end of easy road. */ | |
501 /* Code below is coded in scalar mode. Should be optimized */ | |
502 | |
503 " movd plus_1f, %%mm6\n\t" | |
504 " punpckldq 120(%%ebx), %%mm6\n\t" /* mm6 = 1.0 | 120(%%ebx)*/ | |
505 " movq x_plus_minus_3dnow, %%mm7\n\t" /* mm7 = +1 | -1 */ | |
506 | |
507 " movq 32(%%ecx), %%mm0\n\t" | |
508 " movq 64(%%ecx), %%mm2\n\t" | |
509 " movq %%mm0, %%mm1\n\t" | |
510 " movq %%mm2, %%mm3\n\t" | |
511 " pxor %%mm7, %%mm1\n\t" | |
512 " pxor %%mm7, %%mm3\n\t" | |
513 " pfacc %%mm1, %%mm0\n\t" | |
514 " pfacc %%mm3, %%mm2\n\t" | |
515 " pfmul %%mm6, %%mm0\n\t" | |
516 " pfmul %%mm6, %%mm2\n\t" | |
517 " movq %%mm0, 32(%%edx)\n\t" | |
518 " movq %%mm2, 64(%%edx)\n\t" | |
519 | |
520 " movd 44(%%ecx), %%mm0\n\t" | |
521 " movd 40(%%ecx), %%mm2\n\t" | |
522 " movd 120(%%ebx), %%mm3\n\t" | |
523 " punpckldq 76(%%ecx), %%mm0\n\t" | |
524 " punpckldq 72(%%ecx), %%mm2\n\t" | |
525 " punpckldq %%mm3, %%mm3\n\t" | |
526 " movq %%mm0, %%mm4\n\t" | |
527 " movq %%mm2, %%mm5\n\t" | |
528 " pfsub %%mm2, %%mm0\n\t" | |
529 " pfmul %%mm3, %%mm0\n\t" | |
530 " movq %%mm0, %%mm1\n\t" | |
531 " pfadd %%mm5, %%mm0\n\t" | |
532 " pfadd %%mm4, %%mm0\n\t" | |
533 " movq %%mm0, %%mm2\n\t" | |
534 " punpckldq %%mm1, %%mm0\n\t" | |
535 " punpckhdq %%mm1, %%mm2\n\t" | |
536 " movq %%mm0, 40(%%edx)\n\t" | |
537 " movq %%mm2, 72(%%edx)\n\t" | |
538 | |
539 " movd 48(%%ecx), %%mm3\n\t" | |
540 " movd 60(%%ecx), %%mm2\n\t" | |
541 " pfsub 52(%%ecx), %%mm3\n\t" | |
542 " pfsub 56(%%ecx), %%mm2\n\t" | |
543 " pfmul 120(%%ebx), %%mm3\n\t" | |
544 " pfmul 120(%%ebx), %%mm2\n\t" | |
545 " movq %%mm2, %%mm1\n\t" | |
546 | |
547 " pfadd 56(%%ecx), %%mm1\n\t" | |
548 " pfadd 60(%%ecx), %%mm1\n\t" | |
549 " movq %%mm1, %%mm0\n\t" | |
550 | |
551 " pfadd 48(%%ecx), %%mm0\n\t" | |
552 " pfadd 52(%%ecx), %%mm0\n\t" | |
553 " pfadd %%mm3, %%mm1\n\t" | |
554 " punpckldq %%mm2, %%mm1\n\t" | |
555 " pfadd %%mm3, %%mm2\n\t" | |
556 " punpckldq %%mm2, %%mm0\n\t" | |
557 " movq %%mm1, 56(%%edx)\n\t" | |
558 " movq %%mm0, 48(%%edx)\n\t" | |
559 | |
560 /*---*/ | |
561 | |
562 " movd 92(%%ecx), %%mm1\n\t" | |
563 " pfsub 88(%%ecx), %%mm1\n\t" | |
564 " pfmul 120(%%ebx), %%mm1\n\t" | |
565 " movd %%mm1, 92(%%edx)\n\t" | |
566 " pfadd 92(%%ecx), %%mm1\n\t" | |
567 " pfadd 88(%%ecx), %%mm1\n\t" | |
568 " movq %%mm1, %%mm0\n\t" | |
569 | |
570 " pfadd 80(%%ecx), %%mm0\n\t" | |
571 " pfadd 84(%%ecx), %%mm0\n\t" | |
572 " movd %%mm0, 80(%%edx)\n\t" | |
573 | |
574 " movd 80(%%ecx), %%mm0\n\t" | |
575 " pfsub 84(%%ecx), %%mm0\n\t" | |
576 " pfmul 120(%%ebx), %%mm0\n\t" | |
577 " pfadd %%mm0, %%mm1\n\t" | |
578 " pfadd 92(%%edx), %%mm0\n\t" | |
579 " punpckldq %%mm1, %%mm0\n\t" | |
580 " movq %%mm0, 84(%%edx)\n\t" | |
581 | |
582 " movq 96(%%ecx), %%mm0\n\t" | |
583 " movq %%mm0, %%mm1\n\t" | |
584 " pxor %%mm7, %%mm1\n\t" | |
585 " pfacc %%mm1, %%mm0\n\t" | |
586 " pfmul %%mm6, %%mm0\n\t" | |
587 " movq %%mm0, 96(%%edx)\n\t" | |
588 | |
589 " movd 108(%%ecx), %%mm0\n\t" | |
590 " pfsub 104(%%ecx), %%mm0\n\t" | |
591 " pfmul 120(%%ebx), %%mm0\n\t" | |
592 " movd %%mm0, 108(%%edx)\n\t" | |
593 " pfadd 104(%%ecx), %%mm0\n\t" | |
594 " pfadd 108(%%ecx), %%mm0\n\t" | |
595 " movd %%mm0, 104(%%edx)\n\t" | |
596 | |
597 " movd 124(%%ecx), %%mm1\n\t" | |
598 " pfsub 120(%%ecx), %%mm1\n\t" | |
599 " pfmul 120(%%ebx), %%mm1\n\t" | |
600 " movd %%mm1, 124(%%edx)\n\t" | |
601 " pfadd 120(%%ecx), %%mm1\n\t" | |
602 " pfadd 124(%%ecx), %%mm1\n\t" | |
603 " movq %%mm1, %%mm0\n\t" | |
604 | |
605 " pfadd 112(%%ecx), %%mm0\n\t" | |
606 " pfadd 116(%%ecx), %%mm0\n\t" | |
607 " movd %%mm0, 112(%%edx)\n\t" | |
608 | |
609 " movd 112(%%ecx), %%mm0\n\t" | |
610 " pfsub 116(%%ecx), %%mm0\n\t" | |
611 " pfmul 120(%%ebx), %%mm0\n\t" | |
612 " pfadd %%mm0,%%mm1\n\t" | |
613 " pfadd 124(%%edx), %%mm0\n\t" | |
614 " punpckldq %%mm1, %%mm0\n\t" | |
615 " movq %%mm0, 116(%%edx)\n\t" | |
616 | |
617 " jnz .L01\n\t" | |
618 | |
619 /* Phase 7*/ | |
620 /* Code below is coded in scalar mode. Should be optimized */ | |
621 | |
622 " movd (%%ecx), %%mm0\n\t" | |
623 " pfadd 4(%%ecx), %%mm0\n\t" | |
624 " movd %%mm0, 1024(%%esi)\n\t" | |
625 | |
626 " movd (%%ecx), %%mm0\n\t" | |
627 " pfsub 4(%%ecx), %%mm0\n\t" | |
628 " pfmul 120(%%ebx), %%mm0\n\t" | |
629 " movd %%mm0, (%%esi)\n\t" | |
630 " movd %%mm0, (%%edi)\n\t" | |
631 | |
632 " movd 12(%%ecx), %%mm0\n\t" | |
633 " pfsub 8(%%ecx), %%mm0\n\t" | |
634 " pfmul 120(%%ebx), %%mm0\n\t" | |
635 " movd %%mm0, 512(%%edi)\n\t" | |
636 " pfadd 12(%%ecx), %%mm0\n\t" | |
637 " pfadd 8(%%ecx), %%mm0\n\t" | |
638 " movd %%mm0, 512(%%esi)\n\t" | |
639 | |
640 " movd 16(%%ecx), %%mm0\n\t" | |
641 " pfsub 20(%%ecx), %%mm0\n\t" | |
642 " pfmul 120(%%ebx), %%mm0\n\t" | |
643 " movq %%mm0, %%mm3\n\t" | |
644 | |
645 " movd 28(%%ecx), %%mm0\n\t" | |
646 " pfsub 24(%%ecx), %%mm0\n\t" | |
647 " pfmul 120(%%ebx), %%mm0\n\t" | |
648 " movd %%mm0, 768(%%edi)\n\t" | |
649 " movq %%mm0, %%mm2\n\t" | |
650 | |
651 " pfadd 24(%%ecx), %%mm0\n\t" | |
652 " pfadd 28(%%ecx), %%mm0\n\t" | |
653 " movq %%mm0, %%mm1\n\t" | |
654 | |
655 " pfadd 16(%%ecx), %%mm0\n\t" | |
656 " pfadd 20(%%ecx), %%mm0\n\t" | |
657 " movd %%mm0, 768(%%esi)\n\t" | |
658 " pfadd %%mm3, %%mm1\n\t" | |
659 " movd %%mm1, 256(%%esi)\n\t" | |
660 " pfadd %%mm3, %%mm2\n\t" | |
661 " movd %%mm2, 256(%%edi)\n\t" | |
662 | |
663 /* Phase 8*/ | |
664 | |
665 " movq 32(%%edx), %%mm0\n\t" | |
666 " movq 48(%%edx), %%mm1\n\t" | |
667 " pfadd 48(%%edx), %%mm0\n\t" | |
668 " pfadd 40(%%edx), %%mm1\n\t" | |
669 " movd %%mm0, 896(%%esi)\n\t" | |
670 " movd %%mm1, 640(%%esi)\n\t" | |
671 " psrlq $32, %%mm0\n\t" | |
672 " psrlq $32, %%mm1\n\t" | |
673 " movd %%mm0, 128(%%edi)\n\t" | |
674 " movd %%mm1, 384(%%edi)\n\t" | |
675 | |
676 " movd 40(%%edx), %%mm0\n\t" | |
677 " pfadd 56(%%edx), %%mm0\n\t" | |
678 " movd %%mm0, 384(%%esi)\n\t" | |
679 | |
680 " movd 56(%%edx), %%mm0\n\t" | |
681 " pfadd 36(%%edx), %%mm0\n\t" | |
682 " movd %%mm0, 128(%%esi)\n\t" | |
683 | |
684 " movd 60(%%edx), %%mm0\n\t" | |
685 " movd %%mm0, 896(%%edi)\n\t" | |
686 " pfadd 44(%%edx), %%mm0\n\t" | |
687 " movd %%mm0, 640(%%edi)\n\t" | |
688 | |
689 " movq 96(%%edx), %%mm0\n\t" | |
690 " movq 112(%%edx), %%mm2\n\t" | |
691 " movq 104(%%edx), %%mm4\n\t" | |
692 " pfadd 112(%%edx), %%mm0\n\t" | |
693 " pfadd 104(%%edx), %%mm2\n\t" | |
694 " pfadd 120(%%edx), %%mm4\n\t" | |
695 " movq %%mm0, %%mm1\n\t" | |
696 " movq %%mm2, %%mm3\n\t" | |
697 " movq %%mm4, %%mm5\n\t" | |
698 " pfadd 64(%%edx), %%mm0\n\t" | |
699 " pfadd 80(%%edx), %%mm2\n\t" | |
700 " pfadd 72(%%edx), %%mm4\n\t" | |
701 " movd %%mm0, 960(%%esi)\n\t" | |
702 " movd %%mm2, 704(%%esi)\n\t" | |
703 " movd %%mm4, 448(%%esi)\n\t" | |
704 " psrlq $32, %%mm0\n\t" | |
705 " psrlq $32, %%mm2\n\t" | |
706 " psrlq $32, %%mm4\n\t" | |
707 " movd %%mm0, 64(%%edi)\n\t" | |
708 " movd %%mm2, 320(%%edi)\n\t" | |
709 " movd %%mm4, 576(%%edi)\n\t" | |
710 " pfadd 80(%%edx), %%mm1\n\t" | |
711 " pfadd 72(%%edx), %%mm3\n\t" | |
712 " pfadd 88(%%edx), %%mm5\n\t" | |
713 " movd %%mm1, 832(%%esi)\n\t" | |
714 " movd %%mm3, 576(%%esi)\n\t" | |
715 " movd %%mm5, 320(%%esi)\n\t" | |
716 " psrlq $32, %%mm1\n\t" | |
717 " psrlq $32, %%mm3\n\t" | |
718 " psrlq $32, %%mm5\n\t" | |
719 " movd %%mm1, 192(%%edi)\n\t" | |
720 " movd %%mm3, 448(%%edi)\n\t" | |
721 " movd %%mm5, 704(%%edi)\n\t" | |
722 | |
723 " movd 120(%%edx), %%mm0\n\t" | |
724 " pfadd 100(%%edx), %%mm0\n\t" | |
725 " movq %%mm0, %%mm1\n\t" | |
726 " pfadd 88(%%edx), %%mm0\n\t" | |
727 " movd %%mm0, 192(%%esi)\n\t" | |
728 " pfadd 68(%%edx), %%mm1\n\t" | |
729 " movd %%mm1, 64(%%esi)\n\t" | |
730 | |
731 " movd 124(%%edx), %%mm0\n\t" | |
732 " movd %%mm0, 960(%%edi)\n\t" | |
733 " pfadd 92(%%edx), %%mm0\n\t" | |
734 " movd %%mm0, 832(%%edi)\n\t" | |
735 | |
736 " jmp .L_bye\n\t" | |
737 ".L01:\n\t" | |
738 /* Phase 9*/ | |
739 | |
740 " movq (%%ecx), %%mm0\n\t" | |
741 " movq %%mm0, %%mm1\n\t" | |
742 " pxor %%mm7, %%mm1\n\t" | |
743 " pfacc %%mm1, %%mm0\n\t" | |
744 " pfmul %%mm6, %%mm0\n\t" | |
745 " pf2id %%mm0, %%mm0\n\t" | |
746 " movd %%mm0, %%eax\n\t" | |
747 " movw %%ax, 512(%%esi)\n\t" | |
748 " psrlq $32, %%mm0\n\t" | |
749 " movd %%mm0, %%eax\n\t" | |
750 " movw %%ax, (%%esi)\n\t" | |
751 | |
752 " movd 12(%%ecx), %%mm0\n\t" | |
753 " pfsub 8(%%ecx), %%mm0\n\t" | |
754 " pfmul 120(%%ebx), %%mm0\n\t" | |
755 " pf2id %%mm0, %%mm7\n\t" | |
756 " movd %%mm7, %%eax\n\t" | |
757 " movw %%ax, 256(%%edi)\n\t" | |
758 " pfadd 12(%%ecx), %%mm0\n\t" | |
759 " pfadd 8(%%ecx), %%mm0\n\t" | |
760 " pf2id %%mm0, %%mm0\n\t" | |
761 " movd %%mm0, %%eax\n\t" | |
762 " movw %%ax, 256(%%esi)\n\t" | |
763 | |
764 " movd 16(%%ecx), %%mm3\n\t" | |
765 " pfsub 20(%%ecx), %%mm3\n\t" | |
766 " pfmul 120(%%ebx), %%mm3\n\t" | |
767 " movq %%mm3, %%mm2\n\t" | |
768 | |
769 " movd 28(%%ecx), %%mm2\n\t" | |
770 " pfsub 24(%%ecx), %%mm2\n\t" | |
771 " pfmul 120(%%ebx), %%mm2\n\t" | |
772 " movq %%mm2, %%mm1\n\t" | |
773 | |
774 " pf2id %%mm2, %%mm7\n\t" | |
775 " movd %%mm7, %%eax\n\t" | |
776 " movw %%ax, 384(%%edi)\n\t" | |
777 | |
778 " pfadd 24(%%ecx), %%mm1\n\t" | |
779 " pfadd 28(%%ecx), %%mm1\n\t" | |
780 " movq %%mm1, %%mm0\n\t" | |
781 | |
782 " pfadd 16(%%ecx), %%mm0\n\t" | |
783 " pfadd 20(%%ecx), %%mm0\n\t" | |
784 " pf2id %%mm0, %%mm0\n\t" | |
785 " movd %%mm0, %%eax\n\t" | |
786 " movw %%ax, 384(%%esi)\n\t" | |
787 " pfadd %%mm3, %%mm1\n\t" | |
788 " pf2id %%mm1, %%mm1\n\t" | |
789 " movd %%mm1, %%eax\n\t" | |
790 " movw %%ax, 128(%%esi)\n\t" | |
791 " pfadd %%mm3, %%mm2\n\t" | |
792 " pf2id %%mm2, %%mm2\n\t" | |
793 " movd %%mm2, %%eax\n\t" | |
794 " movw %%ax, 128(%%edi)\n\t" | |
795 | |
796 /* Phase 10*/ | |
797 | |
798 " movq 32(%%edx), %%mm0\n\t" | |
799 " movq 48(%%edx), %%mm1\n\t" | |
800 " pfadd 48(%%edx), %%mm0\n\t" | |
801 " pfadd 40(%%edx), %%mm1\n\t" | |
802 " pf2id %%mm0, %%mm0\n\t" | |
803 " pf2id %%mm1, %%mm1\n\t" | |
804 " movd %%mm0, %%eax\n\t" | |
805 " movd %%mm1, %%ecx\n\t" | |
806 " movw %%ax, 448(%%esi)\n\t" | |
807 " movw %%cx, 320(%%esi)\n\t" | |
808 " psrlq $32, %%mm0\n\t" | |
809 " psrlq $32, %%mm1\n\t" | |
810 " movd %%mm0, %%eax\n\t" | |
811 " movd %%mm1, %%ecx\n\t" | |
812 " movw %%ax, 64(%%edi)\n\t" | |
813 " movw %%cx, 192(%%edi)\n\t" | |
814 | |
815 " movd 40(%%edx), %%mm3\n\t" | |
816 " movd 56(%%edx), %%mm4\n\t" | |
817 " movd 60(%%edx), %%mm0\n\t" | |
818 " movd 44(%%edx), %%mm2\n\t" | |
819 " movd 120(%%edx), %%mm5\n\t" | |
820 " punpckldq %%mm4, %%mm3\n\t" | |
821 " punpckldq 124(%%edx), %%mm0\n\t" | |
822 " pfadd 100(%%edx), %%mm5\n\t" | |
823 " punpckldq 36(%%edx), %%mm4\n\t" | |
824 " punpckldq 92(%%edx), %%mm2\n\t" | |
825 " movq %%mm5, %%mm6\n\t" | |
826 " pfadd %%mm4, %%mm3\n\t" | |
827 " pf2id %%mm0, %%mm1\n\t" | |
828 " pf2id %%mm3, %%mm3\n\t" | |
829 " pfadd 88(%%edx), %%mm5\n\t" | |
830 " movd %%mm1, %%eax\n\t" | |
831 " movd %%mm3, %%ecx\n\t" | |
832 " movw %%ax, 448(%%edi)\n\t" | |
833 " movw %%cx, 192(%%esi)\n\t" | |
834 " pf2id %%mm5, %%mm5\n\t" | |
835 " psrlq $32, %%mm1\n\t" | |
836 " psrlq $32, %%mm3\n\t" | |
837 " movd %%mm5, %%ebx\n\t" | |
838 " movd %%mm1, %%eax\n\t" | |
839 " movd %%mm3, %%ecx\n\t" | |
840 " movw %%bx, 96(%%esi)\n\t" | |
841 " movw %%ax, 480(%%edi)\n\t" | |
842 " movw %%cx, 64(%%esi)\n\t" | |
843 " pfadd %%mm2, %%mm0\n\t" | |
844 " pf2id %%mm0, %%mm0\n\t" | |
845 " movd %%mm0, %%eax\n\t" | |
846 " pfadd 68(%%edx), %%mm6\n\t" | |
847 " movw %%ax, 320(%%edi)\n\t" | |
848 " psrlq $32, %%mm0\n\t" | |
849 " pf2id %%mm6, %%mm6\n\t" | |
850 " movd %%mm0, %%eax\n\t" | |
851 " movd %%mm6, %%ebx\n\t" | |
852 " movw %%ax, 416(%%edi)\n\t" | |
853 " movw %%bx, 32(%%esi)\n\t" | |
854 | |
855 " movq 96(%%edx), %%mm0\n\t" | |
856 " movq 112(%%edx), %%mm2\n\t" | |
857 " movq 104(%%edx), %%mm4\n\t" | |
858 " pfadd %%mm2, %%mm0\n\t" | |
859 " pfadd %%mm4, %%mm2\n\t" | |
860 " pfadd 120(%%edx), %%mm4\n\t" | |
861 " movq %%mm0, %%mm1\n\t" | |
862 " movq %%mm2, %%mm3\n\t" | |
863 " movq %%mm4, %%mm5\n\t" | |
864 " pfadd 64(%%edx), %%mm0\n\t" | |
865 " pfadd 80(%%edx), %%mm2\n\t" | |
866 " pfadd 72(%%edx), %%mm4\n\t" | |
867 " pf2id %%mm0, %%mm0\n\t" | |
868 " pf2id %%mm2, %%mm2\n\t" | |
869 " pf2id %%mm4, %%mm4\n\t" | |
870 " movd %%mm0, %%eax\n\t" | |
871 " movd %%mm2, %%ecx\n\t" | |
872 " movd %%mm4, %%ebx\n\t" | |
873 " movw %%ax, 480(%%esi)\n\t" | |
874 " movw %%cx, 352(%%esi)\n\t" | |
875 " movw %%bx, 224(%%esi)\n\t" | |
876 " psrlq $32, %%mm0\n\t" | |
877 " psrlq $32, %%mm2\n\t" | |
878 " psrlq $32, %%mm4\n\t" | |
879 " movd %%mm0, %%eax\n\t" | |
880 " movd %%mm2, %%ecx\n\t" | |
881 " movd %%mm4, %%ebx\n\t" | |
882 " movw %%ax, 32(%%edi)\n\t" | |
883 " movw %%cx, 160(%%edi)\n\t" | |
884 " movw %%bx, 288(%%edi)\n\t" | |
885 " pfadd 80(%%edx), %%mm1\n\t" | |
886 " pfadd 72(%%edx), %%mm3\n\t" | |
887 " pfadd 88(%%edx), %%mm5\n\t" | |
888 " pf2id %%mm1, %%mm1\n\t" | |
889 " pf2id %%mm3, %%mm3\n\t" | |
890 " pf2id %%mm5, %%mm5\n\t" | |
891 " movd %%mm1, %%eax\n\t" | |
892 " movd %%mm3, %%ecx\n\t" | |
893 " movd %%mm5, %%ebx\n\t" | |
894 " movw %%ax, 416(%%esi)\n\t" | |
895 " movw %%cx, 288(%%esi)\n\t" | |
896 " movw %%bx, 160(%%esi)\n\t" | |
897 " psrlq $32, %%mm1\n\t" | |
898 " psrlq $32, %%mm3\n\t" | |
899 " psrlq $32, %%mm5\n\t" | |
900 " movd %%mm1, %%eax\n\t" | |
901 " movd %%mm3, %%ecx\n\t" | |
902 " movd %%mm5, %%ebx\n\t" | |
903 " movw %%ax, 96(%%edi)\n\t" | |
904 " movw %%cx, 224(%%edi)\n\t" | |
905 " movw %%bx, 352(%%edi)\n\t" | |
906 | |
907 " movsw\n\t" | |
908 | |
909 ".L_bye:\n\t" | |
910 " femms\n\t" | |
911 : | |
912 :"m"(a),"m"(b),"m"(c),"m"(tmp[0]) | |
913 :"memory","%ebx","%esi","%edi"); | |
914 } |