Mercurial > mplayer.hg
annotate mp3lib/dct64_k7.c @ 29779:282ea4fbe87d
ad_ffmpeg: Fix channel layout for ffvorbis and ffaac
Patch submitted by Nicolas George, nicolas.george normalesup org
The layout exceptions removed by this patch were rendered unnecessary by
changes in ffmpeg which normalize channel layout for aac (r20067) and vorbis
(r20148).
author | tack |
---|---|
date | Wed, 04 Nov 2009 00:54:46 +0000 |
parents | b5a46071062a |
children | 347d152a5cfa |
rev | line source |
---|---|
4148 | 1 /* |
2 * This code was taken from http://www.mpg123.org | |
3 * See ChangeLog of mpg123-0.59s-pre.1 for detail | |
4 * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> | |
5 * Partial 3dnowex-DSP! optimization by Nick Kurshev | |
6 * | |
7 * TODO: optimize scalar 3dnow! code | |
8 * Warning: Phases 7 & 8 are not tested | |
9 */ | |
10 #define real float /* ugly - but only way */ | |
11 | |
16989 | 12 #include "config.h" |
13 #include "mangle.h" | |
5291 | 14 |
13918 | 15 static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL; |
12292 | 16 static float attribute_used plus_1f = 1.0; |
4148 | 17 |
23441 | 18 void dct64_MMX_3dnowex(short *a,short *b,real *c) |
4148 | 19 { |
20 char tmp[256]; | |
27757
b5a46071062a
Replace all occurrences of '__volatile__' and '__volatile' by plain 'volatile'.
diego
parents:
27754
diff
changeset
|
21 __asm__ volatile( |
4148 | 22 " movl %2,%%eax\n\t" |
23 | |
24 " leal 128+%3,%%edx\n\t" | |
25 " movl %0,%%esi\n\t" | |
26 " movl %1,%%edi\n\t" | |
5291 | 27 " movl $"MANGLE(costab_mmx)",%%ebx\n\t" |
4148 | 28 " leal %3,%%ecx\n\t" |
29 | |
30 /* Phase 1*/ | |
31 " movq (%%eax), %%mm0\n\t" | |
32 " movq 8(%%eax), %%mm4\n\t" | |
33 " movq %%mm0, %%mm3\n\t" | |
34 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
35 " pswapd 120(%%eax), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
36 " pswapd 112(%%eax), %%mm5\n\t" |
4148 | 37 " pfadd %%mm1, %%mm0\n\t" |
38 " pfadd %%mm5, %%mm4\n\t" | |
39 " movq %%mm0, (%%edx)\n\t" | |
40 " movq %%mm4, 8(%%edx)\n\t" | |
41 " pfsub %%mm1, %%mm3\n\t" | |
42 " pfsub %%mm5, %%mm7\n\t" | |
43 " pfmul (%%ebx), %%mm3\n\t" | |
44 " pfmul 8(%%ebx), %%mm7\n\t" | |
45 " pswapd %%mm3, %%mm3\n\t" | |
46 " pswapd %%mm7, %%mm7\n\t" | |
47 " movq %%mm3, 120(%%edx)\n\t" | |
48 " movq %%mm7, 112(%%edx)\n\t" | |
49 | |
50 " movq 16(%%eax), %%mm0\n\t" | |
51 " movq 24(%%eax), %%mm4\n\t" | |
52 " movq %%mm0, %%mm3\n\t" | |
53 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
54 " pswapd 104(%%eax), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
55 " pswapd 96(%%eax), %%mm5\n\t" |
4148 | 56 " pfadd %%mm1, %%mm0\n\t" |
57 " pfadd %%mm5, %%mm4\n\t" | |
58 " movq %%mm0, 16(%%edx)\n\t" | |
59 " movq %%mm4, 24(%%edx)\n\t" | |
60 " pfsub %%mm1, %%mm3\n\t" | |
61 " pfsub %%mm5, %%mm7\n\t" | |
62 " pfmul 16(%%ebx), %%mm3\n\t" | |
63 " pfmul 24(%%ebx), %%mm7\n\t" | |
64 " pswapd %%mm3, %%mm3\n\t" | |
65 " pswapd %%mm7, %%mm7\n\t" | |
66 " movq %%mm3, 104(%%edx)\n\t" | |
67 " movq %%mm7, 96(%%edx)\n\t" | |
68 | |
69 " movq 32(%%eax), %%mm0\n\t" | |
70 " movq 40(%%eax), %%mm4\n\t" | |
71 " movq %%mm0, %%mm3\n\t" | |
72 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
73 " pswapd 88(%%eax), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
74 " pswapd 80(%%eax), %%mm5\n\t" |
4148 | 75 " pfadd %%mm1, %%mm0\n\t" |
76 " pfadd %%mm5, %%mm4\n\t" | |
77 " movq %%mm0, 32(%%edx)\n\t" | |
78 " movq %%mm4, 40(%%edx)\n\t" | |
79 " pfsub %%mm1, %%mm3\n\t" | |
80 " pfsub %%mm5, %%mm7\n\t" | |
81 " pfmul 32(%%ebx), %%mm3\n\t" | |
82 " pfmul 40(%%ebx), %%mm7\n\t" | |
83 " pswapd %%mm3, %%mm3\n\t" | |
84 " pswapd %%mm7, %%mm7\n\t" | |
85 " movq %%mm3, 88(%%edx)\n\t" | |
86 " movq %%mm7, 80(%%edx)\n\t" | |
87 | |
88 " movq 48(%%eax), %%mm0\n\t" | |
89 " movq 56(%%eax), %%mm4\n\t" | |
90 " movq %%mm0, %%mm3\n\t" | |
91 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
92 " pswapd 72(%%eax), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
93 " pswapd 64(%%eax), %%mm5\n\t" |
4148 | 94 " pfadd %%mm1, %%mm0\n\t" |
95 " pfadd %%mm5, %%mm4\n\t" | |
96 " movq %%mm0, 48(%%edx)\n\t" | |
97 " movq %%mm4, 56(%%edx)\n\t" | |
98 " pfsub %%mm1, %%mm3\n\t" | |
99 " pfsub %%mm5, %%mm7\n\t" | |
100 " pfmul 48(%%ebx), %%mm3\n\t" | |
101 " pfmul 56(%%ebx), %%mm7\n\t" | |
102 " pswapd %%mm3, %%mm3\n\t" | |
103 " pswapd %%mm7, %%mm7\n\t" | |
104 " movq %%mm3, 72(%%edx)\n\t" | |
105 " movq %%mm7, 64(%%edx)\n\t" | |
106 | |
107 /* Phase 2*/ | |
108 | |
109 " movq (%%edx), %%mm0\n\t" | |
110 " movq 8(%%edx), %%mm4\n\t" | |
111 " movq %%mm0, %%mm3\n\t" | |
112 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
113 " pswapd 56(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
114 " pswapd 48(%%edx), %%mm5\n\t" |
4148 | 115 " pfadd %%mm1, %%mm0\n\t" |
116 " pfadd %%mm5, %%mm4\n\t" | |
117 " movq %%mm0, (%%ecx)\n\t" | |
118 " movq %%mm4, 8(%%ecx)\n\t" | |
119 " pfsub %%mm1, %%mm3\n\t" | |
120 " pfsub %%mm5, %%mm7\n\t" | |
121 " pfmul 64(%%ebx), %%mm3\n\t" | |
122 " pfmul 72(%%ebx), %%mm7\n\t" | |
123 " pswapd %%mm3, %%mm3\n\t" | |
124 " pswapd %%mm7, %%mm7\n\t" | |
125 " movq %%mm3, 56(%%ecx)\n\t" | |
126 " movq %%mm7, 48(%%ecx)\n\t" | |
127 | |
128 " movq 16(%%edx), %%mm0\n\t" | |
129 " movq 24(%%edx), %%mm4\n\t" | |
130 " movq %%mm0, %%mm3\n\t" | |
131 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
132 " pswapd 40(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
133 " pswapd 32(%%edx), %%mm5\n\t" |
4148 | 134 " pfadd %%mm1, %%mm0\n\t" |
135 " pfadd %%mm5, %%mm4\n\t" | |
136 " movq %%mm0, 16(%%ecx)\n\t" | |
137 " movq %%mm4, 24(%%ecx)\n\t" | |
138 " pfsub %%mm1, %%mm3\n\t" | |
139 " pfsub %%mm5, %%mm7\n\t" | |
140 " pfmul 80(%%ebx), %%mm3\n\t" | |
141 " pfmul 88(%%ebx), %%mm7\n\t" | |
142 " pswapd %%mm3, %%mm3\n\t" | |
143 " pswapd %%mm7, %%mm7\n\t" | |
144 " movq %%mm3, 40(%%ecx)\n\t" | |
145 " movq %%mm7, 32(%%ecx)\n\t" | |
146 | |
147 /* Phase 3*/ | |
148 | |
149 " movq 64(%%edx), %%mm0\n\t" | |
150 " movq 72(%%edx), %%mm4\n\t" | |
151 " movq %%mm0, %%mm3\n\t" | |
152 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
153 " pswapd 120(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
154 " pswapd 112(%%edx), %%mm5\n\t" |
4148 | 155 " pfadd %%mm1, %%mm0\n\t" |
156 " pfadd %%mm5, %%mm4\n\t" | |
157 " movq %%mm0, 64(%%ecx)\n\t" | |
158 " movq %%mm4, 72(%%ecx)\n\t" | |
159 " pfsubr %%mm1, %%mm3\n\t" | |
160 " pfsubr %%mm5, %%mm7\n\t" | |
161 " pfmul 64(%%ebx), %%mm3\n\t" | |
162 " pfmul 72(%%ebx), %%mm7\n\t" | |
163 " pswapd %%mm3, %%mm3\n\t" | |
164 " pswapd %%mm7, %%mm7\n\t" | |
165 " movq %%mm3, 120(%%ecx)\n\t" | |
166 " movq %%mm7, 112(%%ecx)\n\t" | |
167 | |
168 " movq 80(%%edx), %%mm0\n\t" | |
169 " movq 88(%%edx), %%mm4\n\t" | |
170 " movq %%mm0, %%mm3\n\t" | |
171 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
172 " pswapd 104(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
173 " pswapd 96(%%edx), %%mm5\n\t" |
4148 | 174 " pfadd %%mm1, %%mm0\n\t" |
175 " pfadd %%mm5, %%mm4\n\t" | |
176 " movq %%mm0, 80(%%ecx)\n\t" | |
177 " movq %%mm4, 88(%%ecx)\n\t" | |
178 " pfsubr %%mm1, %%mm3\n\t" | |
179 " pfsubr %%mm5, %%mm7\n\t" | |
180 " pfmul 80(%%ebx), %%mm3\n\t" | |
181 " pfmul 88(%%ebx), %%mm7\n\t" | |
182 " pswapd %%mm3, %%mm3\n\t" | |
183 " pswapd %%mm7, %%mm7\n\t" | |
184 " movq %%mm3, 104(%%ecx)\n\t" | |
185 " movq %%mm7, 96(%%ecx)\n\t" | |
186 | |
187 /* Phase 4*/ | |
188 | |
189 " movq 96(%%ebx), %%mm2\n\t" | |
190 " movq 104(%%ebx), %%mm6\n\t" | |
191 | |
192 " movq (%%ecx), %%mm0\n\t" | |
193 " movq 8(%%ecx), %%mm4\n\t" | |
194 " movq %%mm0, %%mm3\n\t" | |
195 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
196 " pswapd 24(%%ecx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
197 " pswapd 16(%%ecx), %%mm5\n\t" |
4148 | 198 " pfadd %%mm1, %%mm0\n\t" |
199 " pfadd %%mm5, %%mm4\n\t" | |
200 " movq %%mm0, (%%edx)\n\t" | |
201 " movq %%mm4, 8(%%edx)\n\t" | |
202 " pfsub %%mm1, %%mm3\n\t" | |
203 " pfsub %%mm5, %%mm7\n\t" | |
204 " pfmul %%mm2, %%mm3\n\t" | |
205 " pfmul %%mm6, %%mm7\n\t" | |
206 " pswapd %%mm3, %%mm3\n\t" | |
207 " pswapd %%mm7, %%mm7\n\t" | |
208 " movq %%mm3, 24(%%edx)\n\t" | |
209 " movq %%mm7, 16(%%edx)\n\t" | |
210 | |
211 " movq 32(%%ecx), %%mm0\n\t" | |
212 " movq 40(%%ecx), %%mm4\n\t" | |
213 " movq %%mm0, %%mm3\n\t" | |
214 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
215 " pswapd 56(%%ecx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
216 " pswapd 48(%%ecx), %%mm5\n\t" |
4148 | 217 " pfadd %%mm1, %%mm0\n\t" |
218 " pfadd %%mm5, %%mm4\n\t" | |
219 " movq %%mm0, 32(%%edx)\n\t" | |
220 " movq %%mm4, 40(%%edx)\n\t" | |
221 " pfsubr %%mm1, %%mm3\n\t" | |
222 " pfsubr %%mm5, %%mm7\n\t" | |
223 " pfmul %%mm2, %%mm3\n\t" | |
224 " pfmul %%mm6, %%mm7\n\t" | |
225 " pswapd %%mm3, %%mm3\n\t" | |
226 " pswapd %%mm7, %%mm7\n\t" | |
227 " movq %%mm3, 56(%%edx)\n\t" | |
228 " movq %%mm7, 48(%%edx)\n\t" | |
229 | |
230 " movq 64(%%ecx), %%mm0\n\t" | |
231 " movq 72(%%ecx), %%mm4\n\t" | |
232 " movq %%mm0, %%mm3\n\t" | |
233 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
234 " pswapd 88(%%ecx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
235 " pswapd 80(%%ecx), %%mm5\n\t" |
4148 | 236 " pfadd %%mm1, %%mm0\n\t" |
237 " pfadd %%mm5, %%mm4\n\t" | |
238 " movq %%mm0, 64(%%edx)\n\t" | |
239 " movq %%mm4, 72(%%edx)\n\t" | |
240 " pfsub %%mm1, %%mm3\n\t" | |
241 " pfsub %%mm5, %%mm7\n\t" | |
242 " pfmul %%mm2, %%mm3\n\t" | |
243 " pfmul %%mm6, %%mm7\n\t" | |
244 " pswapd %%mm3, %%mm3\n\t" | |
245 " pswapd %%mm7, %%mm7\n\t" | |
246 " movq %%mm3, 88(%%edx)\n\t" | |
247 " movq %%mm7, 80(%%edx)\n\t" | |
248 | |
249 " movq 96(%%ecx), %%mm0\n\t" | |
250 " movq 104(%%ecx), %%mm4\n\t" | |
251 " movq %%mm0, %%mm3\n\t" | |
252 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
253 " pswapd 120(%%ecx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
254 " pswapd 112(%%ecx), %%mm5\n\t" |
4148 | 255 " pfadd %%mm1, %%mm0\n\t" |
256 " pfadd %%mm5, %%mm4\n\t" | |
257 " movq %%mm0, 96(%%edx)\n\t" | |
258 " movq %%mm4, 104(%%edx)\n\t" | |
259 " pfsubr %%mm1, %%mm3\n\t" | |
260 " pfsubr %%mm5, %%mm7\n\t" | |
261 " pfmul %%mm2, %%mm3\n\t" | |
262 " pfmul %%mm6, %%mm7\n\t" | |
263 " pswapd %%mm3, %%mm3\n\t" | |
264 " pswapd %%mm7, %%mm7\n\t" | |
265 " movq %%mm3, 120(%%edx)\n\t" | |
266 " movq %%mm7, 112(%%edx)\n\t" | |
267 | |
268 /* Phase 5 */ | |
269 | |
270 " movq 112(%%ebx), %%mm2\n\t" | |
271 | |
272 " movq (%%edx), %%mm0\n\t" | |
273 " movq 16(%%edx), %%mm4\n\t" | |
274 " movq %%mm0, %%mm3\n\t" | |
275 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
276 " pswapd 8(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
277 " pswapd 24(%%edx), %%mm5\n\t" |
4148 | 278 " pfadd %%mm1, %%mm0\n\t" |
279 " pfadd %%mm5, %%mm4\n\t" | |
280 " movq %%mm0, (%%ecx)\n\t" | |
281 " movq %%mm4, 16(%%ecx)\n\t" | |
282 " pfsub %%mm1, %%mm3\n\t" | |
283 " pfsubr %%mm5, %%mm7\n\t" | |
284 " pfmul %%mm2, %%mm3\n\t" | |
285 " pfmul %%mm2, %%mm7\n\t" | |
286 " pswapd %%mm3, %%mm3\n\t" | |
287 " pswapd %%mm7, %%mm7\n\t" | |
288 " movq %%mm3, 8(%%ecx)\n\t" | |
289 " movq %%mm7, 24(%%ecx)\n\t" | |
290 | |
291 " movq 32(%%edx), %%mm0\n\t" | |
292 " movq 48(%%edx), %%mm4\n\t" | |
293 " movq %%mm0, %%mm3\n\t" | |
294 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
295 " pswapd 40(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
296 " pswapd 56(%%edx), %%mm5\n\t" |
4148 | 297 " pfadd %%mm1, %%mm0\n\t" |
298 " pfadd %%mm5, %%mm4\n\t" | |
299 " movq %%mm0, 32(%%ecx)\n\t" | |
300 " movq %%mm4, 48(%%ecx)\n\t" | |
301 " pfsub %%mm1, %%mm3\n\t" | |
302 " pfsubr %%mm5, %%mm7\n\t" | |
303 " pfmul %%mm2, %%mm3\n\t" | |
304 " pfmul %%mm2, %%mm7\n\t" | |
305 " pswapd %%mm3, %%mm3\n\t" | |
306 " pswapd %%mm7, %%mm7\n\t" | |
307 " movq %%mm3, 40(%%ecx)\n\t" | |
308 " movq %%mm7, 56(%%ecx)\n\t" | |
309 | |
310 " movq 64(%%edx), %%mm0\n\t" | |
311 " movq 80(%%edx), %%mm4\n\t" | |
312 " movq %%mm0, %%mm3\n\t" | |
313 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
314 " pswapd 72(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
315 " pswapd 88(%%edx), %%mm5\n\t" |
4148 | 316 " pfadd %%mm1, %%mm0\n\t" |
317 " pfadd %%mm5, %%mm4\n\t" | |
318 " movq %%mm0, 64(%%ecx)\n\t" | |
319 " movq %%mm4, 80(%%ecx)\n\t" | |
320 " pfsub %%mm1, %%mm3\n\t" | |
321 " pfsubr %%mm5, %%mm7\n\t" | |
322 " pfmul %%mm2, %%mm3\n\t" | |
323 " pfmul %%mm2, %%mm7\n\t" | |
324 " pswapd %%mm3, %%mm3\n\t" | |
325 " pswapd %%mm7, %%mm7\n\t" | |
326 " movq %%mm3, 72(%%ecx)\n\t" | |
327 " movq %%mm7, 88(%%ecx)\n\t" | |
328 | |
329 " movq 96(%%edx), %%mm0\n\t" | |
330 " movq 112(%%edx), %%mm4\n\t" | |
331 " movq %%mm0, %%mm3\n\t" | |
332 " movq %%mm4, %%mm7\n\t" | |
18834
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
333 " pswapd 104(%%edx), %%mm1\n\t" |
306982c83e8e
remove MMX2 dependency (replace pshufw $78 with pswapd) so it can
gpoirier
parents:
18833
diff
changeset
|
334 " pswapd 120(%%edx), %%mm5\n\t" |
4148 | 335 " pfadd %%mm1, %%mm0\n\t" |
336 " pfadd %%mm5, %%mm4\n\t" | |
337 " movq %%mm0, 96(%%ecx)\n\t" | |
338 " movq %%mm4, 112(%%ecx)\n\t" | |
339 " pfsub %%mm1, %%mm3\n\t" | |
340 " pfsubr %%mm5, %%mm7\n\t" | |
341 " pfmul %%mm2, %%mm3\n\t" | |
342 " pfmul %%mm2, %%mm7\n\t" | |
343 " pswapd %%mm3, %%mm3\n\t" | |
344 " pswapd %%mm7, %%mm7\n\t" | |
345 " movq %%mm3, 104(%%ecx)\n\t" | |
346 " movq %%mm7, 120(%%ecx)\n\t" | |
347 | |
348 | |
349 /* Phase 6. This is the end of easy road. */ | |
350 /* Code below is coded in scalar mode. Should be optimized */ | |
351 | |
5291 | 352 " movd "MANGLE(plus_1f)", %%mm6\n\t" |
4148 | 353 " punpckldq 120(%%ebx), %%mm6\n\t" /* mm6 = 1.0 | 120(%%ebx)*/ |
8236
7e2ca93330d0
forgotten mangle patch by (Bj«Órn Sandell <biorn at dce dot chalmers dot se>)
michael
parents:
7307
diff
changeset
|
354 " movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */ |
4148 | 355 |
356 " movq 32(%%ecx), %%mm0\n\t" | |
357 " movq 64(%%ecx), %%mm2\n\t" | |
358 " movq %%mm0, %%mm1\n\t" | |
359 " movq %%mm2, %%mm3\n\t" | |
360 " pxor %%mm7, %%mm1\n\t" | |
361 " pxor %%mm7, %%mm3\n\t" | |
362 " pfacc %%mm1, %%mm0\n\t" | |
363 " pfacc %%mm3, %%mm2\n\t" | |
364 " pfmul %%mm6, %%mm0\n\t" | |
365 " pfmul %%mm6, %%mm2\n\t" | |
366 " movq %%mm0, 32(%%edx)\n\t" | |
367 " movq %%mm2, 64(%%edx)\n\t" | |
368 | |
369 " movd 44(%%ecx), %%mm0\n\t" | |
370 " movd 40(%%ecx), %%mm2\n\t" | |
371 " movd 120(%%ebx), %%mm3\n\t" | |
372 " punpckldq 76(%%ecx), %%mm0\n\t" | |
373 " punpckldq 72(%%ecx), %%mm2\n\t" | |
374 " punpckldq %%mm3, %%mm3\n\t" | |
375 " movq %%mm0, %%mm4\n\t" | |
376 " movq %%mm2, %%mm5\n\t" | |
377 " pfsub %%mm2, %%mm0\n\t" | |
378 " pfmul %%mm3, %%mm0\n\t" | |
379 " movq %%mm0, %%mm1\n\t" | |
380 " pfadd %%mm5, %%mm0\n\t" | |
381 " pfadd %%mm4, %%mm0\n\t" | |
382 " movq %%mm0, %%mm2\n\t" | |
383 " punpckldq %%mm1, %%mm0\n\t" | |
384 " punpckhdq %%mm1, %%mm2\n\t" | |
385 " movq %%mm0, 40(%%edx)\n\t" | |
386 " movq %%mm2, 72(%%edx)\n\t" | |
387 | |
388 " movd 48(%%ecx), %%mm3\n\t" | |
389 " movd 60(%%ecx), %%mm2\n\t" | |
390 " pfsub 52(%%ecx), %%mm3\n\t" | |
391 " pfsub 56(%%ecx), %%mm2\n\t" | |
392 " pfmul 120(%%ebx), %%mm3\n\t" | |
393 " pfmul 120(%%ebx), %%mm2\n\t" | |
394 " movq %%mm2, %%mm1\n\t" | |
395 | |
396 " pfadd 56(%%ecx), %%mm1\n\t" | |
397 " pfadd 60(%%ecx), %%mm1\n\t" | |
398 " movq %%mm1, %%mm0\n\t" | |
399 | |
400 " pfadd 48(%%ecx), %%mm0\n\t" | |
401 " pfadd 52(%%ecx), %%mm0\n\t" | |
402 " pfadd %%mm3, %%mm1\n\t" | |
403 " punpckldq %%mm2, %%mm1\n\t" | |
404 " pfadd %%mm3, %%mm2\n\t" | |
405 " punpckldq %%mm2, %%mm0\n\t" | |
406 " movq %%mm1, 56(%%edx)\n\t" | |
407 " movq %%mm0, 48(%%edx)\n\t" | |
408 | |
409 /*---*/ | |
410 | |
411 " movd 92(%%ecx), %%mm1\n\t" | |
412 " pfsub 88(%%ecx), %%mm1\n\t" | |
413 " pfmul 120(%%ebx), %%mm1\n\t" | |
414 " movd %%mm1, 92(%%edx)\n\t" | |
415 " pfadd 92(%%ecx), %%mm1\n\t" | |
416 " pfadd 88(%%ecx), %%mm1\n\t" | |
417 " movq %%mm1, %%mm0\n\t" | |
418 | |
419 " pfadd 80(%%ecx), %%mm0\n\t" | |
420 " pfadd 84(%%ecx), %%mm0\n\t" | |
421 " movd %%mm0, 80(%%edx)\n\t" | |
422 | |
423 " movd 80(%%ecx), %%mm0\n\t" | |
424 " pfsub 84(%%ecx), %%mm0\n\t" | |
425 " pfmul 120(%%ebx), %%mm0\n\t" | |
426 " pfadd %%mm0, %%mm1\n\t" | |
427 " pfadd 92(%%edx), %%mm0\n\t" | |
428 " punpckldq %%mm1, %%mm0\n\t" | |
429 " movq %%mm0, 84(%%edx)\n\t" | |
430 | |
431 " movq 96(%%ecx), %%mm0\n\t" | |
432 " movq %%mm0, %%mm1\n\t" | |
433 " pxor %%mm7, %%mm1\n\t" | |
434 " pfacc %%mm1, %%mm0\n\t" | |
435 " pfmul %%mm6, %%mm0\n\t" | |
436 " movq %%mm0, 96(%%edx)\n\t" | |
437 | |
438 " movd 108(%%ecx), %%mm0\n\t" | |
439 " pfsub 104(%%ecx), %%mm0\n\t" | |
440 " pfmul 120(%%ebx), %%mm0\n\t" | |
441 " movd %%mm0, 108(%%edx)\n\t" | |
442 " pfadd 104(%%ecx), %%mm0\n\t" | |
443 " pfadd 108(%%ecx), %%mm0\n\t" | |
444 " movd %%mm0, 104(%%edx)\n\t" | |
445 | |
446 " movd 124(%%ecx), %%mm1\n\t" | |
447 " pfsub 120(%%ecx), %%mm1\n\t" | |
448 " pfmul 120(%%ebx), %%mm1\n\t" | |
449 " movd %%mm1, 124(%%edx)\n\t" | |
450 " pfadd 120(%%ecx), %%mm1\n\t" | |
451 " pfadd 124(%%ecx), %%mm1\n\t" | |
452 " movq %%mm1, %%mm0\n\t" | |
453 | |
454 " pfadd 112(%%ecx), %%mm0\n\t" | |
455 " pfadd 116(%%ecx), %%mm0\n\t" | |
456 " movd %%mm0, 112(%%edx)\n\t" | |
457 | |
458 " movd 112(%%ecx), %%mm0\n\t" | |
459 " pfsub 116(%%ecx), %%mm0\n\t" | |
460 " pfmul 120(%%ebx), %%mm0\n\t" | |
461 " pfadd %%mm0,%%mm1\n\t" | |
462 " pfadd 124(%%edx), %%mm0\n\t" | |
463 " punpckldq %%mm1, %%mm0\n\t" | |
464 " movq %%mm0, 116(%%edx)\n\t" | |
465 | |
20504
27fb949fffa9
disable nonworking/broken code for now till I find out what it is supposed to do.
reimar
parents:
18834
diff
changeset
|
466 // this code is broken, there is nothing modifying the z flag above. |
27fb949fffa9
disable nonworking/broken code for now till I find out what it is supposed to do.
reimar
parents:
18834
diff
changeset
|
467 #if 0 |
4148 | 468 " jnz .L01\n\t" |
469 | |
470 /* Phase 7*/ | |
471 /* Code below is coded in scalar mode. Should be optimized */ | |
472 | |
473 " movd (%%ecx), %%mm0\n\t" | |
474 " pfadd 4(%%ecx), %%mm0\n\t" | |
475 " movd %%mm0, 1024(%%esi)\n\t" | |
476 | |
477 " movd (%%ecx), %%mm0\n\t" | |
478 " pfsub 4(%%ecx), %%mm0\n\t" | |
479 " pfmul 120(%%ebx), %%mm0\n\t" | |
480 " movd %%mm0, (%%esi)\n\t" | |
481 " movd %%mm0, (%%edi)\n\t" | |
482 | |
483 " movd 12(%%ecx), %%mm0\n\t" | |
484 " pfsub 8(%%ecx), %%mm0\n\t" | |
485 " pfmul 120(%%ebx), %%mm0\n\t" | |
486 " movd %%mm0, 512(%%edi)\n\t" | |
487 " pfadd 12(%%ecx), %%mm0\n\t" | |
488 " pfadd 8(%%ecx), %%mm0\n\t" | |
489 " movd %%mm0, 512(%%esi)\n\t" | |
490 | |
491 " movd 16(%%ecx), %%mm0\n\t" | |
492 " pfsub 20(%%ecx), %%mm0\n\t" | |
493 " pfmul 120(%%ebx), %%mm0\n\t" | |
494 " movq %%mm0, %%mm3\n\t" | |
495 | |
496 " movd 28(%%ecx), %%mm0\n\t" | |
497 " pfsub 24(%%ecx), %%mm0\n\t" | |
498 " pfmul 120(%%ebx), %%mm0\n\t" | |
499 " movd %%mm0, 768(%%edi)\n\t" | |
500 " movq %%mm0, %%mm2\n\t" | |
501 | |
502 " pfadd 24(%%ecx), %%mm0\n\t" | |
503 " pfadd 28(%%ecx), %%mm0\n\t" | |
504 " movq %%mm0, %%mm1\n\t" | |
505 | |
506 " pfadd 16(%%ecx), %%mm0\n\t" | |
507 " pfadd 20(%%ecx), %%mm0\n\t" | |
508 " movd %%mm0, 768(%%esi)\n\t" | |
509 " pfadd %%mm3, %%mm1\n\t" | |
510 " movd %%mm1, 256(%%esi)\n\t" | |
511 " pfadd %%mm3, %%mm2\n\t" | |
512 " movd %%mm2, 256(%%edi)\n\t" | |
513 | |
514 /* Phase 8*/ | |
515 | |
516 " movq 32(%%edx), %%mm0\n\t" | |
517 " movq 48(%%edx), %%mm1\n\t" | |
518 " pfadd 48(%%edx), %%mm0\n\t" | |
519 " pfadd 40(%%edx), %%mm1\n\t" | |
520 " movd %%mm0, 896(%%esi)\n\t" | |
521 " movd %%mm1, 640(%%esi)\n\t" | |
522 " psrlq $32, %%mm0\n\t" | |
523 " psrlq $32, %%mm1\n\t" | |
524 " movd %%mm0, 128(%%edi)\n\t" | |
525 " movd %%mm1, 384(%%edi)\n\t" | |
526 | |
527 " movd 40(%%edx), %%mm0\n\t" | |
528 " pfadd 56(%%edx), %%mm0\n\t" | |
529 " movd %%mm0, 384(%%esi)\n\t" | |
530 | |
531 " movd 56(%%edx), %%mm0\n\t" | |
532 " pfadd 36(%%edx), %%mm0\n\t" | |
533 " movd %%mm0, 128(%%esi)\n\t" | |
534 | |
535 " movd 60(%%edx), %%mm0\n\t" | |
536 " movd %%mm0, 896(%%edi)\n\t" | |
537 " pfadd 44(%%edx), %%mm0\n\t" | |
538 " movd %%mm0, 640(%%edi)\n\t" | |
539 | |
540 " movq 96(%%edx), %%mm0\n\t" | |
541 " movq 112(%%edx), %%mm2\n\t" | |
542 " movq 104(%%edx), %%mm4\n\t" | |
543 " pfadd 112(%%edx), %%mm0\n\t" | |
544 " pfadd 104(%%edx), %%mm2\n\t" | |
545 " pfadd 120(%%edx), %%mm4\n\t" | |
546 " movq %%mm0, %%mm1\n\t" | |
547 " movq %%mm2, %%mm3\n\t" | |
548 " movq %%mm4, %%mm5\n\t" | |
549 " pfadd 64(%%edx), %%mm0\n\t" | |
550 " pfadd 80(%%edx), %%mm2\n\t" | |
551 " pfadd 72(%%edx), %%mm4\n\t" | |
552 " movd %%mm0, 960(%%esi)\n\t" | |
553 " movd %%mm2, 704(%%esi)\n\t" | |
554 " movd %%mm4, 448(%%esi)\n\t" | |
555 " psrlq $32, %%mm0\n\t" | |
556 " psrlq $32, %%mm2\n\t" | |
557 " psrlq $32, %%mm4\n\t" | |
558 " movd %%mm0, 64(%%edi)\n\t" | |
559 " movd %%mm2, 320(%%edi)\n\t" | |
560 " movd %%mm4, 576(%%edi)\n\t" | |
561 " pfadd 80(%%edx), %%mm1\n\t" | |
562 " pfadd 72(%%edx), %%mm3\n\t" | |
563 " pfadd 88(%%edx), %%mm5\n\t" | |
564 " movd %%mm1, 832(%%esi)\n\t" | |
565 " movd %%mm3, 576(%%esi)\n\t" | |
566 " movd %%mm5, 320(%%esi)\n\t" | |
567 " psrlq $32, %%mm1\n\t" | |
568 " psrlq $32, %%mm3\n\t" | |
569 " psrlq $32, %%mm5\n\t" | |
570 " movd %%mm1, 192(%%edi)\n\t" | |
571 " movd %%mm3, 448(%%edi)\n\t" | |
572 " movd %%mm5, 704(%%edi)\n\t" | |
573 | |
574 " movd 120(%%edx), %%mm0\n\t" | |
575 " pfadd 100(%%edx), %%mm0\n\t" | |
576 " movq %%mm0, %%mm1\n\t" | |
577 " pfadd 88(%%edx), %%mm0\n\t" | |
578 " movd %%mm0, 192(%%esi)\n\t" | |
579 " pfadd 68(%%edx), %%mm1\n\t" | |
580 " movd %%mm1, 64(%%esi)\n\t" | |
581 | |
582 " movd 124(%%edx), %%mm0\n\t" | |
583 " movd %%mm0, 960(%%edi)\n\t" | |
584 " pfadd 92(%%edx), %%mm0\n\t" | |
585 " movd %%mm0, 832(%%edi)\n\t" | |
586 | |
587 " jmp .L_bye\n\t" | |
588 ".L01: \n\t" | |
20504
27fb949fffa9
disable nonworking/broken code for now till I find out what it is supposed to do.
reimar
parents:
18834
diff
changeset
|
589 #endif |
4148 | 590 /* Phase 9*/ |
591 | |
592 " movq (%%ecx), %%mm0\n\t" | |
593 " movq %%mm0, %%mm1\n\t" | |
594 " pxor %%mm7, %%mm1\n\t" | |
595 " pfacc %%mm1, %%mm0\n\t" | |
596 " pfmul %%mm6, %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
597 " pf2iw %%mm0, %%mm0\n\t" |
4148 | 598 " movd %%mm0, %%eax\n\t" |
599 " movw %%ax, 512(%%esi)\n\t" | |
600 " psrlq $32, %%mm0\n\t" | |
601 " movd %%mm0, %%eax\n\t" | |
602 " movw %%ax, (%%esi)\n\t" | |
603 | |
604 " movd 12(%%ecx), %%mm0\n\t" | |
605 " pfsub 8(%%ecx), %%mm0\n\t" | |
606 " pfmul 120(%%ebx), %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
607 " pf2iw %%mm0, %%mm7\n\t" |
4148 | 608 " movd %%mm7, %%eax\n\t" |
609 " movw %%ax, 256(%%edi)\n\t" | |
610 " pfadd 12(%%ecx), %%mm0\n\t" | |
611 " pfadd 8(%%ecx), %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
612 " pf2iw %%mm0, %%mm0\n\t" |
4148 | 613 " movd %%mm0, %%eax\n\t" |
614 " movw %%ax, 256(%%esi)\n\t" | |
615 | |
616 " movd 16(%%ecx), %%mm3\n\t" | |
617 " pfsub 20(%%ecx), %%mm3\n\t" | |
618 " pfmul 120(%%ebx), %%mm3\n\t" | |
619 " movq %%mm3, %%mm2\n\t" | |
620 | |
621 " movd 28(%%ecx), %%mm2\n\t" | |
622 " pfsub 24(%%ecx), %%mm2\n\t" | |
623 " pfmul 120(%%ebx), %%mm2\n\t" | |
624 " movq %%mm2, %%mm1\n\t" | |
625 | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
626 " pf2iw %%mm2, %%mm7\n\t" |
4148 | 627 " movd %%mm7, %%eax\n\t" |
628 " movw %%ax, 384(%%edi)\n\t" | |
629 | |
630 " pfadd 24(%%ecx), %%mm1\n\t" | |
631 " pfadd 28(%%ecx), %%mm1\n\t" | |
632 " movq %%mm1, %%mm0\n\t" | |
633 | |
634 " pfadd 16(%%ecx), %%mm0\n\t" | |
635 " pfadd 20(%%ecx), %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
636 " pf2iw %%mm0, %%mm0\n\t" |
4148 | 637 " movd %%mm0, %%eax\n\t" |
638 " movw %%ax, 384(%%esi)\n\t" | |
639 " pfadd %%mm3, %%mm1\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
640 " pf2iw %%mm1, %%mm1\n\t" |
4148 | 641 " movd %%mm1, %%eax\n\t" |
642 " movw %%ax, 128(%%esi)\n\t" | |
643 " pfadd %%mm3, %%mm2\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
644 " pf2iw %%mm2, %%mm2\n\t" |
4148 | 645 " movd %%mm2, %%eax\n\t" |
646 " movw %%ax, 128(%%edi)\n\t" | |
647 | |
648 /* Phase 10*/ | |
649 | |
650 " movq 32(%%edx), %%mm0\n\t" | |
651 " movq 48(%%edx), %%mm1\n\t" | |
652 " pfadd 48(%%edx), %%mm0\n\t" | |
653 " pfadd 40(%%edx), %%mm1\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
654 " pf2iw %%mm0, %%mm0\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
655 " pf2iw %%mm1, %%mm1\n\t" |
4148 | 656 " movd %%mm0, %%eax\n\t" |
657 " movd %%mm1, %%ecx\n\t" | |
658 " movw %%ax, 448(%%esi)\n\t" | |
659 " movw %%cx, 320(%%esi)\n\t" | |
660 " psrlq $32, %%mm0\n\t" | |
661 " psrlq $32, %%mm1\n\t" | |
662 " movd %%mm0, %%eax\n\t" | |
663 " movd %%mm1, %%ecx\n\t" | |
664 " movw %%ax, 64(%%edi)\n\t" | |
665 " movw %%cx, 192(%%edi)\n\t" | |
666 | |
667 " movd 40(%%edx), %%mm3\n\t" | |
668 " movd 56(%%edx), %%mm4\n\t" | |
669 " movd 60(%%edx), %%mm0\n\t" | |
670 " movd 44(%%edx), %%mm2\n\t" | |
671 " movd 120(%%edx), %%mm5\n\t" | |
672 " punpckldq %%mm4, %%mm3\n\t" | |
673 " punpckldq 124(%%edx), %%mm0\n\t" | |
674 " pfadd 100(%%edx), %%mm5\n\t" | |
675 " punpckldq 36(%%edx), %%mm4\n\t" | |
676 " punpckldq 92(%%edx), %%mm2\n\t" | |
677 " movq %%mm5, %%mm6\n\t" | |
678 " pfadd %%mm4, %%mm3\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
679 " pf2iw %%mm0, %%mm1\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
680 " pf2iw %%mm3, %%mm3\n\t" |
4148 | 681 " pfadd 88(%%edx), %%mm5\n\t" |
682 " movd %%mm1, %%eax\n\t" | |
683 " movd %%mm3, %%ecx\n\t" | |
684 " movw %%ax, 448(%%edi)\n\t" | |
685 " movw %%cx, 192(%%esi)\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
686 " pf2iw %%mm5, %%mm5\n\t" |
4148 | 687 " psrlq $32, %%mm1\n\t" |
688 " psrlq $32, %%mm3\n\t" | |
689 " movd %%mm5, %%ebx\n\t" | |
690 " movd %%mm1, %%eax\n\t" | |
691 " movd %%mm3, %%ecx\n\t" | |
692 " movw %%bx, 96(%%esi)\n\t" | |
693 " movw %%ax, 480(%%edi)\n\t" | |
694 " movw %%cx, 64(%%esi)\n\t" | |
695 " pfadd %%mm2, %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
696 " pf2iw %%mm0, %%mm0\n\t" |
4148 | 697 " movd %%mm0, %%eax\n\t" |
698 " pfadd 68(%%edx), %%mm6\n\t" | |
699 " movw %%ax, 320(%%edi)\n\t" | |
700 " psrlq $32, %%mm0\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
701 " pf2iw %%mm6, %%mm6\n\t" |
4148 | 702 " movd %%mm0, %%eax\n\t" |
703 " movd %%mm6, %%ebx\n\t" | |
704 " movw %%ax, 416(%%edi)\n\t" | |
705 " movw %%bx, 32(%%esi)\n\t" | |
706 | |
707 " movq 96(%%edx), %%mm0\n\t" | |
708 " movq 112(%%edx), %%mm2\n\t" | |
709 " movq 104(%%edx), %%mm4\n\t" | |
710 " pfadd %%mm2, %%mm0\n\t" | |
711 " pfadd %%mm4, %%mm2\n\t" | |
712 " pfadd 120(%%edx), %%mm4\n\t" | |
713 " movq %%mm0, %%mm1\n\t" | |
714 " movq %%mm2, %%mm3\n\t" | |
715 " movq %%mm4, %%mm5\n\t" | |
716 " pfadd 64(%%edx), %%mm0\n\t" | |
717 " pfadd 80(%%edx), %%mm2\n\t" | |
718 " pfadd 72(%%edx), %%mm4\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
719 " pf2iw %%mm0, %%mm0\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
720 " pf2iw %%mm2, %%mm2\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
721 " pf2iw %%mm4, %%mm4\n\t" |
4148 | 722 " movd %%mm0, %%eax\n\t" |
723 " movd %%mm2, %%ecx\n\t" | |
724 " movd %%mm4, %%ebx\n\t" | |
725 " movw %%ax, 480(%%esi)\n\t" | |
726 " movw %%cx, 352(%%esi)\n\t" | |
727 " movw %%bx, 224(%%esi)\n\t" | |
728 " psrlq $32, %%mm0\n\t" | |
729 " psrlq $32, %%mm2\n\t" | |
730 " psrlq $32, %%mm4\n\t" | |
731 " movd %%mm0, %%eax\n\t" | |
732 " movd %%mm2, %%ecx\n\t" | |
733 " movd %%mm4, %%ebx\n\t" | |
734 " movw %%ax, 32(%%edi)\n\t" | |
735 " movw %%cx, 160(%%edi)\n\t" | |
736 " movw %%bx, 288(%%edi)\n\t" | |
737 " pfadd 80(%%edx), %%mm1\n\t" | |
738 " pfadd 72(%%edx), %%mm3\n\t" | |
739 " pfadd 88(%%edx), %%mm5\n\t" | |
18833
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
740 " pf2iw %%mm1, %%mm1\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
741 " pf2iw %%mm3, %%mm3\n\t" |
c452bd0d6ede
fix conversion float to int to use saturated ops,
gpoirier
parents:
16989
diff
changeset
|
742 " pf2iw %%mm5, %%mm5\n\t" |
4148 | 743 " movd %%mm1, %%eax\n\t" |
744 " movd %%mm3, %%ecx\n\t" | |
745 " movd %%mm5, %%ebx\n\t" | |
746 " movw %%ax, 416(%%esi)\n\t" | |
747 " movw %%cx, 288(%%esi)\n\t" | |
748 " movw %%bx, 160(%%esi)\n\t" | |
749 " psrlq $32, %%mm1\n\t" | |
750 " psrlq $32, %%mm3\n\t" | |
751 " psrlq $32, %%mm5\n\t" | |
752 " movd %%mm1, %%eax\n\t" | |
753 " movd %%mm3, %%ecx\n\t" | |
754 " movd %%mm5, %%ebx\n\t" | |
755 " movw %%ax, 96(%%edi)\n\t" | |
756 " movw %%cx, 224(%%edi)\n\t" | |
757 " movw %%bx, 352(%%edi)\n\t" | |
758 | |
759 " movsw\n\t" | |
760 | |
761 ".L_bye:\n\t" | |
762 " femms\n\t" | |
763 : | |
764 :"m"(a),"m"(b),"m"(c),"m"(tmp[0]) | |
9039 | 765 :"memory","%eax","%ebx","%ecx","%edx","%esi","%edi"); |
5842
d6eab895c742
Avoid stdcall on cygwin, it causes undefined ref, code needs testing as I have no athlon or k6-2.
atmos4
parents:
5291
diff
changeset
|
766 } |