comparison mp3lib/dct64_k7.c @ 31215:d0f70692a140

Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace). They are not compiled/enabled on 64 bit though, since they are vastly slower but would still be selected by default.
author reimar
date Sun, 30 May 2010 10:01:40 +0000
parents 0ad2da052b2e
children
comparison
equal deleted inserted replaced
31214:0bdd15feba42 31215:d0f70692a140
9 */ 9 */
10 10
11 #include "config.h" 11 #include "config.h"
12 #include "mangle.h" 12 #include "mangle.h"
13 #include "mpg123.h" 13 #include "mpg123.h"
14 #include "libavutil/x86_cpu.h"
14 15
15 static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL; 16 static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL;
16 static float attribute_used plus_1f = 1.0; 17 static float attribute_used plus_1f = 1.0;
17 18
18 void dct64_MMX_3dnowex(short *a,short *b,real *c) 19 void dct64_MMX_3dnowex(short *a,short *b,real *c)
19 { 20 {
20 char tmp[256]; 21 char tmp[256];
21 __asm__ volatile( 22 __asm__ volatile(
22 " movl %2,%%eax\n\t" 23 " mov %2,%%"REG_a"\n\t"
23 24
24 " leal 128+%3,%%edx\n\t" 25 " lea 128+%3,%%"REG_d"\n\t"
25 " movl %0,%%esi\n\t" 26 " mov %0,%%"REG_S"\n\t"
26 " movl %1,%%edi\n\t" 27 " mov %1,%%"REG_D"\n\t"
27 " movl $"MANGLE(costab_mmx)",%%ebx\n\t" 28 " mov $"MANGLE(costab_mmx)",%%"REG_b"\n\t"
28 " leal %3,%%ecx\n\t" 29 " lea %3,%%"REG_c"\n\t"
29 30
30 /* Phase 1*/ 31 /* Phase 1*/
31 " movq (%%eax), %%mm0\n\t" 32 " movq (%%"REG_a"), %%mm0\n\t"
32 " movq 8(%%eax), %%mm4\n\t" 33 " movq 8(%%"REG_a"), %%mm4\n\t"
33 " movq %%mm0, %%mm3\n\t" 34 " movq %%mm0, %%mm3\n\t"
34 " movq %%mm4, %%mm7\n\t" 35 " movq %%mm4, %%mm7\n\t"
35 " pswapd 120(%%eax), %%mm1\n\t" 36 " pswapd 120(%%"REG_a"), %%mm1\n\t"
36 " pswapd 112(%%eax), %%mm5\n\t" 37 " pswapd 112(%%"REG_a"), %%mm5\n\t"
37 " pfadd %%mm1, %%mm0\n\t" 38 " pfadd %%mm1, %%mm0\n\t"
38 " pfadd %%mm5, %%mm4\n\t" 39 " pfadd %%mm5, %%mm4\n\t"
39 " movq %%mm0, (%%edx)\n\t" 40 " movq %%mm0, (%%"REG_d")\n\t"
40 " movq %%mm4, 8(%%edx)\n\t" 41 " movq %%mm4, 8(%%"REG_d")\n\t"
41 " pfsub %%mm1, %%mm3\n\t" 42 " pfsub %%mm1, %%mm3\n\t"
42 " pfsub %%mm5, %%mm7\n\t" 43 " pfsub %%mm5, %%mm7\n\t"
43 " pfmul (%%ebx), %%mm3\n\t" 44 " pfmul (%%"REG_b"), %%mm3\n\t"
44 " pfmul 8(%%ebx), %%mm7\n\t" 45 " pfmul 8(%%"REG_b"), %%mm7\n\t"
45 " pswapd %%mm3, %%mm3\n\t" 46 " pswapd %%mm3, %%mm3\n\t"
46 " pswapd %%mm7, %%mm7\n\t" 47 " pswapd %%mm7, %%mm7\n\t"
47 " movq %%mm3, 120(%%edx)\n\t" 48 " movq %%mm3, 120(%%"REG_d")\n\t"
48 " movq %%mm7, 112(%%edx)\n\t" 49 " movq %%mm7, 112(%%"REG_d")\n\t"
49 50
50 " movq 16(%%eax), %%mm0\n\t" 51 " movq 16(%%"REG_a"), %%mm0\n\t"
51 " movq 24(%%eax), %%mm4\n\t" 52 " movq 24(%%"REG_a"), %%mm4\n\t"
52 " movq %%mm0, %%mm3\n\t" 53 " movq %%mm0, %%mm3\n\t"
53 " movq %%mm4, %%mm7\n\t" 54 " movq %%mm4, %%mm7\n\t"
54 " pswapd 104(%%eax), %%mm1\n\t" 55 " pswapd 104(%%"REG_a"), %%mm1\n\t"
55 " pswapd 96(%%eax), %%mm5\n\t" 56 " pswapd 96(%%"REG_a"), %%mm5\n\t"
56 " pfadd %%mm1, %%mm0\n\t" 57 " pfadd %%mm1, %%mm0\n\t"
57 " pfadd %%mm5, %%mm4\n\t" 58 " pfadd %%mm5, %%mm4\n\t"
58 " movq %%mm0, 16(%%edx)\n\t" 59 " movq %%mm0, 16(%%"REG_d")\n\t"
59 " movq %%mm4, 24(%%edx)\n\t" 60 " movq %%mm4, 24(%%"REG_d")\n\t"
60 " pfsub %%mm1, %%mm3\n\t" 61 " pfsub %%mm1, %%mm3\n\t"
61 " pfsub %%mm5, %%mm7\n\t" 62 " pfsub %%mm5, %%mm7\n\t"
62 " pfmul 16(%%ebx), %%mm3\n\t" 63 " pfmul 16(%%"REG_b"), %%mm3\n\t"
63 " pfmul 24(%%ebx), %%mm7\n\t" 64 " pfmul 24(%%"REG_b"), %%mm7\n\t"
64 " pswapd %%mm3, %%mm3\n\t" 65 " pswapd %%mm3, %%mm3\n\t"
65 " pswapd %%mm7, %%mm7\n\t" 66 " pswapd %%mm7, %%mm7\n\t"
66 " movq %%mm3, 104(%%edx)\n\t" 67 " movq %%mm3, 104(%%"REG_d")\n\t"
67 " movq %%mm7, 96(%%edx)\n\t" 68 " movq %%mm7, 96(%%"REG_d")\n\t"
68 69
69 " movq 32(%%eax), %%mm0\n\t" 70 " movq 32(%%"REG_a"), %%mm0\n\t"
70 " movq 40(%%eax), %%mm4\n\t" 71 " movq 40(%%"REG_a"), %%mm4\n\t"
71 " movq %%mm0, %%mm3\n\t" 72 " movq %%mm0, %%mm3\n\t"
72 " movq %%mm4, %%mm7\n\t" 73 " movq %%mm4, %%mm7\n\t"
73 " pswapd 88(%%eax), %%mm1\n\t" 74 " pswapd 88(%%"REG_a"), %%mm1\n\t"
74 " pswapd 80(%%eax), %%mm5\n\t" 75 " pswapd 80(%%"REG_a"), %%mm5\n\t"
75 " pfadd %%mm1, %%mm0\n\t" 76 " pfadd %%mm1, %%mm0\n\t"
76 " pfadd %%mm5, %%mm4\n\t" 77 " pfadd %%mm5, %%mm4\n\t"
77 " movq %%mm0, 32(%%edx)\n\t" 78 " movq %%mm0, 32(%%"REG_d")\n\t"
78 " movq %%mm4, 40(%%edx)\n\t" 79 " movq %%mm4, 40(%%"REG_d")\n\t"
79 " pfsub %%mm1, %%mm3\n\t" 80 " pfsub %%mm1, %%mm3\n\t"
80 " pfsub %%mm5, %%mm7\n\t" 81 " pfsub %%mm5, %%mm7\n\t"
81 " pfmul 32(%%ebx), %%mm3\n\t" 82 " pfmul 32(%%"REG_b"), %%mm3\n\t"
82 " pfmul 40(%%ebx), %%mm7\n\t" 83 " pfmul 40(%%"REG_b"), %%mm7\n\t"
83 " pswapd %%mm3, %%mm3\n\t" 84 " pswapd %%mm3, %%mm3\n\t"
84 " pswapd %%mm7, %%mm7\n\t" 85 " pswapd %%mm7, %%mm7\n\t"
85 " movq %%mm3, 88(%%edx)\n\t" 86 " movq %%mm3, 88(%%"REG_d")\n\t"
86 " movq %%mm7, 80(%%edx)\n\t" 87 " movq %%mm7, 80(%%"REG_d")\n\t"
87 88
88 " movq 48(%%eax), %%mm0\n\t" 89 " movq 48(%%"REG_a"), %%mm0\n\t"
89 " movq 56(%%eax), %%mm4\n\t" 90 " movq 56(%%"REG_a"), %%mm4\n\t"
90 " movq %%mm0, %%mm3\n\t" 91 " movq %%mm0, %%mm3\n\t"
91 " movq %%mm4, %%mm7\n\t" 92 " movq %%mm4, %%mm7\n\t"
92 " pswapd 72(%%eax), %%mm1\n\t" 93 " pswapd 72(%%"REG_a"), %%mm1\n\t"
93 " pswapd 64(%%eax), %%mm5\n\t" 94 " pswapd 64(%%"REG_a"), %%mm5\n\t"
94 " pfadd %%mm1, %%mm0\n\t" 95 " pfadd %%mm1, %%mm0\n\t"
95 " pfadd %%mm5, %%mm4\n\t" 96 " pfadd %%mm5, %%mm4\n\t"
96 " movq %%mm0, 48(%%edx)\n\t" 97 " movq %%mm0, 48(%%"REG_d")\n\t"
97 " movq %%mm4, 56(%%edx)\n\t" 98 " movq %%mm4, 56(%%"REG_d")\n\t"
98 " pfsub %%mm1, %%mm3\n\t" 99 " pfsub %%mm1, %%mm3\n\t"
99 " pfsub %%mm5, %%mm7\n\t" 100 " pfsub %%mm5, %%mm7\n\t"
100 " pfmul 48(%%ebx), %%mm3\n\t" 101 " pfmul 48(%%"REG_b"), %%mm3\n\t"
101 " pfmul 56(%%ebx), %%mm7\n\t" 102 " pfmul 56(%%"REG_b"), %%mm7\n\t"
102 " pswapd %%mm3, %%mm3\n\t" 103 " pswapd %%mm3, %%mm3\n\t"
103 " pswapd %%mm7, %%mm7\n\t" 104 " pswapd %%mm7, %%mm7\n\t"
104 " movq %%mm3, 72(%%edx)\n\t" 105 " movq %%mm3, 72(%%"REG_d")\n\t"
105 " movq %%mm7, 64(%%edx)\n\t" 106 " movq %%mm7, 64(%%"REG_d")\n\t"
106 107
107 /* Phase 2*/ 108 /* Phase 2*/
108 109
109 " movq (%%edx), %%mm0\n\t" 110 " movq (%%"REG_d"), %%mm0\n\t"
110 " movq 8(%%edx), %%mm4\n\t" 111 " movq 8(%%"REG_d"), %%mm4\n\t"
111 " movq %%mm0, %%mm3\n\t" 112 " movq %%mm0, %%mm3\n\t"
112 " movq %%mm4, %%mm7\n\t" 113 " movq %%mm4, %%mm7\n\t"
113 " pswapd 56(%%edx), %%mm1\n\t" 114 " pswapd 56(%%"REG_d"), %%mm1\n\t"
114 " pswapd 48(%%edx), %%mm5\n\t" 115 " pswapd 48(%%"REG_d"), %%mm5\n\t"
115 " pfadd %%mm1, %%mm0\n\t" 116 " pfadd %%mm1, %%mm0\n\t"
116 " pfadd %%mm5, %%mm4\n\t" 117 " pfadd %%mm5, %%mm4\n\t"
117 " movq %%mm0, (%%ecx)\n\t" 118 " movq %%mm0, (%%"REG_c")\n\t"
118 " movq %%mm4, 8(%%ecx)\n\t" 119 " movq %%mm4, 8(%%"REG_c")\n\t"
119 " pfsub %%mm1, %%mm3\n\t" 120 " pfsub %%mm1, %%mm3\n\t"
120 " pfsub %%mm5, %%mm7\n\t" 121 " pfsub %%mm5, %%mm7\n\t"
121 " pfmul 64(%%ebx), %%mm3\n\t" 122 " pfmul 64(%%"REG_b"), %%mm3\n\t"
122 " pfmul 72(%%ebx), %%mm7\n\t" 123 " pfmul 72(%%"REG_b"), %%mm7\n\t"
123 " pswapd %%mm3, %%mm3\n\t" 124 " pswapd %%mm3, %%mm3\n\t"
124 " pswapd %%mm7, %%mm7\n\t" 125 " pswapd %%mm7, %%mm7\n\t"
125 " movq %%mm3, 56(%%ecx)\n\t" 126 " movq %%mm3, 56(%%"REG_c")\n\t"
126 " movq %%mm7, 48(%%ecx)\n\t" 127 " movq %%mm7, 48(%%"REG_c")\n\t"
127 128
128 " movq 16(%%edx), %%mm0\n\t" 129 " movq 16(%%"REG_d"), %%mm0\n\t"
129 " movq 24(%%edx), %%mm4\n\t" 130 " movq 24(%%"REG_d"), %%mm4\n\t"
130 " movq %%mm0, %%mm3\n\t" 131 " movq %%mm0, %%mm3\n\t"
131 " movq %%mm4, %%mm7\n\t" 132 " movq %%mm4, %%mm7\n\t"
132 " pswapd 40(%%edx), %%mm1\n\t" 133 " pswapd 40(%%"REG_d"), %%mm1\n\t"
133 " pswapd 32(%%edx), %%mm5\n\t" 134 " pswapd 32(%%"REG_d"), %%mm5\n\t"
134 " pfadd %%mm1, %%mm0\n\t" 135 " pfadd %%mm1, %%mm0\n\t"
135 " pfadd %%mm5, %%mm4\n\t" 136 " pfadd %%mm5, %%mm4\n\t"
136 " movq %%mm0, 16(%%ecx)\n\t" 137 " movq %%mm0, 16(%%"REG_c")\n\t"
137 " movq %%mm4, 24(%%ecx)\n\t" 138 " movq %%mm4, 24(%%"REG_c")\n\t"
138 " pfsub %%mm1, %%mm3\n\t" 139 " pfsub %%mm1, %%mm3\n\t"
139 " pfsub %%mm5, %%mm7\n\t" 140 " pfsub %%mm5, %%mm7\n\t"
140 " pfmul 80(%%ebx), %%mm3\n\t" 141 " pfmul 80(%%"REG_b"), %%mm3\n\t"
141 " pfmul 88(%%ebx), %%mm7\n\t" 142 " pfmul 88(%%"REG_b"), %%mm7\n\t"
142 " pswapd %%mm3, %%mm3\n\t" 143 " pswapd %%mm3, %%mm3\n\t"
143 " pswapd %%mm7, %%mm7\n\t" 144 " pswapd %%mm7, %%mm7\n\t"
144 " movq %%mm3, 40(%%ecx)\n\t" 145 " movq %%mm3, 40(%%"REG_c")\n\t"
145 " movq %%mm7, 32(%%ecx)\n\t" 146 " movq %%mm7, 32(%%"REG_c")\n\t"
146 147
147 /* Phase 3*/ 148 /* Phase 3*/
148 149
149 " movq 64(%%edx), %%mm0\n\t" 150 " movq 64(%%"REG_d"), %%mm0\n\t"
150 " movq 72(%%edx), %%mm4\n\t" 151 " movq 72(%%"REG_d"), %%mm4\n\t"
151 " movq %%mm0, %%mm3\n\t" 152 " movq %%mm0, %%mm3\n\t"
152 " movq %%mm4, %%mm7\n\t" 153 " movq %%mm4, %%mm7\n\t"
153 " pswapd 120(%%edx), %%mm1\n\t" 154 " pswapd 120(%%"REG_d"), %%mm1\n\t"
154 " pswapd 112(%%edx), %%mm5\n\t" 155 " pswapd 112(%%"REG_d"), %%mm5\n\t"
155 " pfadd %%mm1, %%mm0\n\t" 156 " pfadd %%mm1, %%mm0\n\t"
156 " pfadd %%mm5, %%mm4\n\t" 157 " pfadd %%mm5, %%mm4\n\t"
157 " movq %%mm0, 64(%%ecx)\n\t" 158 " movq %%mm0, 64(%%"REG_c")\n\t"
158 " movq %%mm4, 72(%%ecx)\n\t" 159 " movq %%mm4, 72(%%"REG_c")\n\t"
159 " pfsubr %%mm1, %%mm3\n\t" 160 " pfsubr %%mm1, %%mm3\n\t"
160 " pfsubr %%mm5, %%mm7\n\t" 161 " pfsubr %%mm5, %%mm7\n\t"
161 " pfmul 64(%%ebx), %%mm3\n\t" 162 " pfmul 64(%%"REG_b"), %%mm3\n\t"
162 " pfmul 72(%%ebx), %%mm7\n\t" 163 " pfmul 72(%%"REG_b"), %%mm7\n\t"
163 " pswapd %%mm3, %%mm3\n\t" 164 " pswapd %%mm3, %%mm3\n\t"
164 " pswapd %%mm7, %%mm7\n\t" 165 " pswapd %%mm7, %%mm7\n\t"
165 " movq %%mm3, 120(%%ecx)\n\t" 166 " movq %%mm3, 120(%%"REG_c")\n\t"
166 " movq %%mm7, 112(%%ecx)\n\t" 167 " movq %%mm7, 112(%%"REG_c")\n\t"
167 168
168 " movq 80(%%edx), %%mm0\n\t" 169 " movq 80(%%"REG_d"), %%mm0\n\t"
169 " movq 88(%%edx), %%mm4\n\t" 170 " movq 88(%%"REG_d"), %%mm4\n\t"
170 " movq %%mm0, %%mm3\n\t" 171 " movq %%mm0, %%mm3\n\t"
171 " movq %%mm4, %%mm7\n\t" 172 " movq %%mm4, %%mm7\n\t"
172 " pswapd 104(%%edx), %%mm1\n\t" 173 " pswapd 104(%%"REG_d"), %%mm1\n\t"
173 " pswapd 96(%%edx), %%mm5\n\t" 174 " pswapd 96(%%"REG_d"), %%mm5\n\t"
174 " pfadd %%mm1, %%mm0\n\t" 175 " pfadd %%mm1, %%mm0\n\t"
175 " pfadd %%mm5, %%mm4\n\t" 176 " pfadd %%mm5, %%mm4\n\t"
176 " movq %%mm0, 80(%%ecx)\n\t" 177 " movq %%mm0, 80(%%"REG_c")\n\t"
177 " movq %%mm4, 88(%%ecx)\n\t" 178 " movq %%mm4, 88(%%"REG_c")\n\t"
178 " pfsubr %%mm1, %%mm3\n\t" 179 " pfsubr %%mm1, %%mm3\n\t"
179 " pfsubr %%mm5, %%mm7\n\t" 180 " pfsubr %%mm5, %%mm7\n\t"
180 " pfmul 80(%%ebx), %%mm3\n\t" 181 " pfmul 80(%%"REG_b"), %%mm3\n\t"
181 " pfmul 88(%%ebx), %%mm7\n\t" 182 " pfmul 88(%%"REG_b"), %%mm7\n\t"
182 " pswapd %%mm3, %%mm3\n\t" 183 " pswapd %%mm3, %%mm3\n\t"
183 " pswapd %%mm7, %%mm7\n\t" 184 " pswapd %%mm7, %%mm7\n\t"
184 " movq %%mm3, 104(%%ecx)\n\t" 185 " movq %%mm3, 104(%%"REG_c")\n\t"
185 " movq %%mm7, 96(%%ecx)\n\t" 186 " movq %%mm7, 96(%%"REG_c")\n\t"
186 187
187 /* Phase 4*/ 188 /* Phase 4*/
188 189
189 " movq 96(%%ebx), %%mm2\n\t" 190 " movq 96(%%"REG_b"), %%mm2\n\t"
190 " movq 104(%%ebx), %%mm6\n\t" 191 " movq 104(%%"REG_b"), %%mm6\n\t"
191 192
192 " movq (%%ecx), %%mm0\n\t" 193 " movq (%%"REG_c"), %%mm0\n\t"
193 " movq 8(%%ecx), %%mm4\n\t" 194 " movq 8(%%"REG_c"), %%mm4\n\t"
194 " movq %%mm0, %%mm3\n\t" 195 " movq %%mm0, %%mm3\n\t"
195 " movq %%mm4, %%mm7\n\t" 196 " movq %%mm4, %%mm7\n\t"
196 " pswapd 24(%%ecx), %%mm1\n\t" 197 " pswapd 24(%%"REG_c"), %%mm1\n\t"
197 " pswapd 16(%%ecx), %%mm5\n\t" 198 " pswapd 16(%%"REG_c"), %%mm5\n\t"
198 " pfadd %%mm1, %%mm0\n\t" 199 " pfadd %%mm1, %%mm0\n\t"
199 " pfadd %%mm5, %%mm4\n\t" 200 " pfadd %%mm5, %%mm4\n\t"
200 " movq %%mm0, (%%edx)\n\t" 201 " movq %%mm0, (%%"REG_d")\n\t"
201 " movq %%mm4, 8(%%edx)\n\t" 202 " movq %%mm4, 8(%%"REG_d")\n\t"
202 " pfsub %%mm1, %%mm3\n\t" 203 " pfsub %%mm1, %%mm3\n\t"
203 " pfsub %%mm5, %%mm7\n\t" 204 " pfsub %%mm5, %%mm7\n\t"
204 " pfmul %%mm2, %%mm3\n\t" 205 " pfmul %%mm2, %%mm3\n\t"
205 " pfmul %%mm6, %%mm7\n\t" 206 " pfmul %%mm6, %%mm7\n\t"
206 " pswapd %%mm3, %%mm3\n\t" 207 " pswapd %%mm3, %%mm3\n\t"
207 " pswapd %%mm7, %%mm7\n\t" 208 " pswapd %%mm7, %%mm7\n\t"
208 " movq %%mm3, 24(%%edx)\n\t" 209 " movq %%mm3, 24(%%"REG_d")\n\t"
209 " movq %%mm7, 16(%%edx)\n\t" 210 " movq %%mm7, 16(%%"REG_d")\n\t"
210 211
211 " movq 32(%%ecx), %%mm0\n\t" 212 " movq 32(%%"REG_c"), %%mm0\n\t"
212 " movq 40(%%ecx), %%mm4\n\t" 213 " movq 40(%%"REG_c"), %%mm4\n\t"
213 " movq %%mm0, %%mm3\n\t" 214 " movq %%mm0, %%mm3\n\t"
214 " movq %%mm4, %%mm7\n\t" 215 " movq %%mm4, %%mm7\n\t"
215 " pswapd 56(%%ecx), %%mm1\n\t" 216 " pswapd 56(%%"REG_c"), %%mm1\n\t"
216 " pswapd 48(%%ecx), %%mm5\n\t" 217 " pswapd 48(%%"REG_c"), %%mm5\n\t"
217 " pfadd %%mm1, %%mm0\n\t" 218 " pfadd %%mm1, %%mm0\n\t"
218 " pfadd %%mm5, %%mm4\n\t" 219 " pfadd %%mm5, %%mm4\n\t"
219 " movq %%mm0, 32(%%edx)\n\t" 220 " movq %%mm0, 32(%%"REG_d")\n\t"
220 " movq %%mm4, 40(%%edx)\n\t" 221 " movq %%mm4, 40(%%"REG_d")\n\t"
221 " pfsubr %%mm1, %%mm3\n\t" 222 " pfsubr %%mm1, %%mm3\n\t"
222 " pfsubr %%mm5, %%mm7\n\t" 223 " pfsubr %%mm5, %%mm7\n\t"
223 " pfmul %%mm2, %%mm3\n\t" 224 " pfmul %%mm2, %%mm3\n\t"
224 " pfmul %%mm6, %%mm7\n\t" 225 " pfmul %%mm6, %%mm7\n\t"
225 " pswapd %%mm3, %%mm3\n\t" 226 " pswapd %%mm3, %%mm3\n\t"
226 " pswapd %%mm7, %%mm7\n\t" 227 " pswapd %%mm7, %%mm7\n\t"
227 " movq %%mm3, 56(%%edx)\n\t" 228 " movq %%mm3, 56(%%"REG_d")\n\t"
228 " movq %%mm7, 48(%%edx)\n\t" 229 " movq %%mm7, 48(%%"REG_d")\n\t"
229 230
230 " movq 64(%%ecx), %%mm0\n\t" 231 " movq 64(%%"REG_c"), %%mm0\n\t"
231 " movq 72(%%ecx), %%mm4\n\t" 232 " movq 72(%%"REG_c"), %%mm4\n\t"
232 " movq %%mm0, %%mm3\n\t" 233 " movq %%mm0, %%mm3\n\t"
233 " movq %%mm4, %%mm7\n\t" 234 " movq %%mm4, %%mm7\n\t"
234 " pswapd 88(%%ecx), %%mm1\n\t" 235 " pswapd 88(%%"REG_c"), %%mm1\n\t"
235 " pswapd 80(%%ecx), %%mm5\n\t" 236 " pswapd 80(%%"REG_c"), %%mm5\n\t"
236 " pfadd %%mm1, %%mm0\n\t" 237 " pfadd %%mm1, %%mm0\n\t"
237 " pfadd %%mm5, %%mm4\n\t" 238 " pfadd %%mm5, %%mm4\n\t"
238 " movq %%mm0, 64(%%edx)\n\t" 239 " movq %%mm0, 64(%%"REG_d")\n\t"
239 " movq %%mm4, 72(%%edx)\n\t" 240 " movq %%mm4, 72(%%"REG_d")\n\t"
240 " pfsub %%mm1, %%mm3\n\t" 241 " pfsub %%mm1, %%mm3\n\t"
241 " pfsub %%mm5, %%mm7\n\t" 242 " pfsub %%mm5, %%mm7\n\t"
242 " pfmul %%mm2, %%mm3\n\t" 243 " pfmul %%mm2, %%mm3\n\t"
243 " pfmul %%mm6, %%mm7\n\t" 244 " pfmul %%mm6, %%mm7\n\t"
244 " pswapd %%mm3, %%mm3\n\t" 245 " pswapd %%mm3, %%mm3\n\t"
245 " pswapd %%mm7, %%mm7\n\t" 246 " pswapd %%mm7, %%mm7\n\t"
246 " movq %%mm3, 88(%%edx)\n\t" 247 " movq %%mm3, 88(%%"REG_d")\n\t"
247 " movq %%mm7, 80(%%edx)\n\t" 248 " movq %%mm7, 80(%%"REG_d")\n\t"
248 249
249 " movq 96(%%ecx), %%mm0\n\t" 250 " movq 96(%%"REG_c"), %%mm0\n\t"
250 " movq 104(%%ecx), %%mm4\n\t" 251 " movq 104(%%"REG_c"), %%mm4\n\t"
251 " movq %%mm0, %%mm3\n\t" 252 " movq %%mm0, %%mm3\n\t"
252 " movq %%mm4, %%mm7\n\t" 253 " movq %%mm4, %%mm7\n\t"
253 " pswapd 120(%%ecx), %%mm1\n\t" 254 " pswapd 120(%%"REG_c"), %%mm1\n\t"
254 " pswapd 112(%%ecx), %%mm5\n\t" 255 " pswapd 112(%%"REG_c"), %%mm5\n\t"
255 " pfadd %%mm1, %%mm0\n\t" 256 " pfadd %%mm1, %%mm0\n\t"
256 " pfadd %%mm5, %%mm4\n\t" 257 " pfadd %%mm5, %%mm4\n\t"
257 " movq %%mm0, 96(%%edx)\n\t" 258 " movq %%mm0, 96(%%"REG_d")\n\t"
258 " movq %%mm4, 104(%%edx)\n\t" 259 " movq %%mm4, 104(%%"REG_d")\n\t"
259 " pfsubr %%mm1, %%mm3\n\t" 260 " pfsubr %%mm1, %%mm3\n\t"
260 " pfsubr %%mm5, %%mm7\n\t" 261 " pfsubr %%mm5, %%mm7\n\t"
261 " pfmul %%mm2, %%mm3\n\t" 262 " pfmul %%mm2, %%mm3\n\t"
262 " pfmul %%mm6, %%mm7\n\t" 263 " pfmul %%mm6, %%mm7\n\t"
263 " pswapd %%mm3, %%mm3\n\t" 264 " pswapd %%mm3, %%mm3\n\t"
264 " pswapd %%mm7, %%mm7\n\t" 265 " pswapd %%mm7, %%mm7\n\t"
265 " movq %%mm3, 120(%%edx)\n\t" 266 " movq %%mm3, 120(%%"REG_d")\n\t"
266 " movq %%mm7, 112(%%edx)\n\t" 267 " movq %%mm7, 112(%%"REG_d")\n\t"
267 268
268 /* Phase 5 */ 269 /* Phase 5 */
269 270
270 " movq 112(%%ebx), %%mm2\n\t" 271 " movq 112(%%"REG_b"), %%mm2\n\t"
271 272
272 " movq (%%edx), %%mm0\n\t" 273 " movq (%%"REG_d"), %%mm0\n\t"
273 " movq 16(%%edx), %%mm4\n\t" 274 " movq 16(%%"REG_d"), %%mm4\n\t"
274 " movq %%mm0, %%mm3\n\t" 275 " movq %%mm0, %%mm3\n\t"
275 " movq %%mm4, %%mm7\n\t" 276 " movq %%mm4, %%mm7\n\t"
276 " pswapd 8(%%edx), %%mm1\n\t" 277 " pswapd 8(%%"REG_d"), %%mm1\n\t"
277 " pswapd 24(%%edx), %%mm5\n\t" 278 " pswapd 24(%%"REG_d"), %%mm5\n\t"
278 " pfadd %%mm1, %%mm0\n\t" 279 " pfadd %%mm1, %%mm0\n\t"
279 " pfadd %%mm5, %%mm4\n\t" 280 " pfadd %%mm5, %%mm4\n\t"
280 " movq %%mm0, (%%ecx)\n\t" 281 " movq %%mm0, (%%"REG_c")\n\t"
281 " movq %%mm4, 16(%%ecx)\n\t" 282 " movq %%mm4, 16(%%"REG_c")\n\t"
282 " pfsub %%mm1, %%mm3\n\t" 283 " pfsub %%mm1, %%mm3\n\t"
283 " pfsubr %%mm5, %%mm7\n\t" 284 " pfsubr %%mm5, %%mm7\n\t"
284 " pfmul %%mm2, %%mm3\n\t" 285 " pfmul %%mm2, %%mm3\n\t"
285 " pfmul %%mm2, %%mm7\n\t" 286 " pfmul %%mm2, %%mm7\n\t"
286 " pswapd %%mm3, %%mm3\n\t" 287 " pswapd %%mm3, %%mm3\n\t"
287 " pswapd %%mm7, %%mm7\n\t" 288 " pswapd %%mm7, %%mm7\n\t"
288 " movq %%mm3, 8(%%ecx)\n\t" 289 " movq %%mm3, 8(%%"REG_c")\n\t"
289 " movq %%mm7, 24(%%ecx)\n\t" 290 " movq %%mm7, 24(%%"REG_c")\n\t"
290 291
291 " movq 32(%%edx), %%mm0\n\t" 292 " movq 32(%%"REG_d"), %%mm0\n\t"
292 " movq 48(%%edx), %%mm4\n\t" 293 " movq 48(%%"REG_d"), %%mm4\n\t"
293 " movq %%mm0, %%mm3\n\t" 294 " movq %%mm0, %%mm3\n\t"
294 " movq %%mm4, %%mm7\n\t" 295 " movq %%mm4, %%mm7\n\t"
295 " pswapd 40(%%edx), %%mm1\n\t" 296 " pswapd 40(%%"REG_d"), %%mm1\n\t"
296 " pswapd 56(%%edx), %%mm5\n\t" 297 " pswapd 56(%%"REG_d"), %%mm5\n\t"
297 " pfadd %%mm1, %%mm0\n\t" 298 " pfadd %%mm1, %%mm0\n\t"
298 " pfadd %%mm5, %%mm4\n\t" 299 " pfadd %%mm5, %%mm4\n\t"
299 " movq %%mm0, 32(%%ecx)\n\t" 300 " movq %%mm0, 32(%%"REG_c")\n\t"
300 " movq %%mm4, 48(%%ecx)\n\t" 301 " movq %%mm4, 48(%%"REG_c")\n\t"
301 " pfsub %%mm1, %%mm3\n\t" 302 " pfsub %%mm1, %%mm3\n\t"
302 " pfsubr %%mm5, %%mm7\n\t" 303 " pfsubr %%mm5, %%mm7\n\t"
303 " pfmul %%mm2, %%mm3\n\t" 304 " pfmul %%mm2, %%mm3\n\t"
304 " pfmul %%mm2, %%mm7\n\t" 305 " pfmul %%mm2, %%mm7\n\t"
305 " pswapd %%mm3, %%mm3\n\t" 306 " pswapd %%mm3, %%mm3\n\t"
306 " pswapd %%mm7, %%mm7\n\t" 307 " pswapd %%mm7, %%mm7\n\t"
307 " movq %%mm3, 40(%%ecx)\n\t" 308 " movq %%mm3, 40(%%"REG_c")\n\t"
308 " movq %%mm7, 56(%%ecx)\n\t" 309 " movq %%mm7, 56(%%"REG_c")\n\t"
309 310
310 " movq 64(%%edx), %%mm0\n\t" 311 " movq 64(%%"REG_d"), %%mm0\n\t"
311 " movq 80(%%edx), %%mm4\n\t" 312 " movq 80(%%"REG_d"), %%mm4\n\t"
312 " movq %%mm0, %%mm3\n\t" 313 " movq %%mm0, %%mm3\n\t"
313 " movq %%mm4, %%mm7\n\t" 314 " movq %%mm4, %%mm7\n\t"
314 " pswapd 72(%%edx), %%mm1\n\t" 315 " pswapd 72(%%"REG_d"), %%mm1\n\t"
315 " pswapd 88(%%edx), %%mm5\n\t" 316 " pswapd 88(%%"REG_d"), %%mm5\n\t"
316 " pfadd %%mm1, %%mm0\n\t" 317 " pfadd %%mm1, %%mm0\n\t"
317 " pfadd %%mm5, %%mm4\n\t" 318 " pfadd %%mm5, %%mm4\n\t"
318 " movq %%mm0, 64(%%ecx)\n\t" 319 " movq %%mm0, 64(%%"REG_c")\n\t"
319 " movq %%mm4, 80(%%ecx)\n\t" 320 " movq %%mm4, 80(%%"REG_c")\n\t"
320 " pfsub %%mm1, %%mm3\n\t" 321 " pfsub %%mm1, %%mm3\n\t"
321 " pfsubr %%mm5, %%mm7\n\t" 322 " pfsubr %%mm5, %%mm7\n\t"
322 " pfmul %%mm2, %%mm3\n\t" 323 " pfmul %%mm2, %%mm3\n\t"
323 " pfmul %%mm2, %%mm7\n\t" 324 " pfmul %%mm2, %%mm7\n\t"
324 " pswapd %%mm3, %%mm3\n\t" 325 " pswapd %%mm3, %%mm3\n\t"
325 " pswapd %%mm7, %%mm7\n\t" 326 " pswapd %%mm7, %%mm7\n\t"
326 " movq %%mm3, 72(%%ecx)\n\t" 327 " movq %%mm3, 72(%%"REG_c")\n\t"
327 " movq %%mm7, 88(%%ecx)\n\t" 328 " movq %%mm7, 88(%%"REG_c")\n\t"
328 329
329 " movq 96(%%edx), %%mm0\n\t" 330 " movq 96(%%"REG_d"), %%mm0\n\t"
330 " movq 112(%%edx), %%mm4\n\t" 331 " movq 112(%%"REG_d"), %%mm4\n\t"
331 " movq %%mm0, %%mm3\n\t" 332 " movq %%mm0, %%mm3\n\t"
332 " movq %%mm4, %%mm7\n\t" 333 " movq %%mm4, %%mm7\n\t"
333 " pswapd 104(%%edx), %%mm1\n\t" 334 " pswapd 104(%%"REG_d"), %%mm1\n\t"
334 " pswapd 120(%%edx), %%mm5\n\t" 335 " pswapd 120(%%"REG_d"), %%mm5\n\t"
335 " pfadd %%mm1, %%mm0\n\t" 336 " pfadd %%mm1, %%mm0\n\t"
336 " pfadd %%mm5, %%mm4\n\t" 337 " pfadd %%mm5, %%mm4\n\t"
337 " movq %%mm0, 96(%%ecx)\n\t" 338 " movq %%mm0, 96(%%"REG_c")\n\t"
338 " movq %%mm4, 112(%%ecx)\n\t" 339 " movq %%mm4, 112(%%"REG_c")\n\t"
339 " pfsub %%mm1, %%mm3\n\t" 340 " pfsub %%mm1, %%mm3\n\t"
340 " pfsubr %%mm5, %%mm7\n\t" 341 " pfsubr %%mm5, %%mm7\n\t"
341 " pfmul %%mm2, %%mm3\n\t" 342 " pfmul %%mm2, %%mm3\n\t"
342 " pfmul %%mm2, %%mm7\n\t" 343 " pfmul %%mm2, %%mm7\n\t"
343 " pswapd %%mm3, %%mm3\n\t" 344 " pswapd %%mm3, %%mm3\n\t"
344 " pswapd %%mm7, %%mm7\n\t" 345 " pswapd %%mm7, %%mm7\n\t"
345 " movq %%mm3, 104(%%ecx)\n\t" 346 " movq %%mm3, 104(%%"REG_c")\n\t"
346 " movq %%mm7, 120(%%ecx)\n\t" 347 " movq %%mm7, 120(%%"REG_c")\n\t"
347 348
348 349
349 /* Phase 6. This is the end of easy road. */ 350 /* Phase 6. This is the end of easy road. */
350 /* Code below is coded in scalar mode. Should be optimized */ 351 /* Code below is coded in scalar mode. Should be optimized */
351 352
352 " movd "MANGLE(plus_1f)", %%mm6\n\t" 353 " movd "MANGLE(plus_1f)", %%mm6\n\t"
353 " punpckldq 120(%%ebx), %%mm6\n\t" /* mm6 = 1.0 | 120(%%ebx)*/ 354 " punpckldq 120(%%"REG_b"), %%mm6\n\t" /* mm6 = 1.0 | 120(%%"REG_b")*/
354 " movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */ 355 " movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */
355 356
356 " movq 32(%%ecx), %%mm0\n\t" 357 " movq 32(%%"REG_c"), %%mm0\n\t"
357 " movq 64(%%ecx), %%mm2\n\t" 358 " movq 64(%%"REG_c"), %%mm2\n\t"
358 " movq %%mm0, %%mm1\n\t" 359 " movq %%mm0, %%mm1\n\t"
359 " movq %%mm2, %%mm3\n\t" 360 " movq %%mm2, %%mm3\n\t"
360 " pxor %%mm7, %%mm1\n\t" 361 " pxor %%mm7, %%mm1\n\t"
361 " pxor %%mm7, %%mm3\n\t" 362 " pxor %%mm7, %%mm3\n\t"
362 " pfacc %%mm1, %%mm0\n\t" 363 " pfacc %%mm1, %%mm0\n\t"
363 " pfacc %%mm3, %%mm2\n\t" 364 " pfacc %%mm3, %%mm2\n\t"
364 " pfmul %%mm6, %%mm0\n\t" 365 " pfmul %%mm6, %%mm0\n\t"
365 " pfmul %%mm6, %%mm2\n\t" 366 " pfmul %%mm6, %%mm2\n\t"
366 " movq %%mm0, 32(%%edx)\n\t" 367 " movq %%mm0, 32(%%"REG_d")\n\t"
367 " movq %%mm2, 64(%%edx)\n\t" 368 " movq %%mm2, 64(%%"REG_d")\n\t"
368 369
369 " movd 44(%%ecx), %%mm0\n\t" 370 " movd 44(%%"REG_c"), %%mm0\n\t"
370 " movd 40(%%ecx), %%mm2\n\t" 371 " movd 40(%%"REG_c"), %%mm2\n\t"
371 " movd 120(%%ebx), %%mm3\n\t" 372 " movd 120(%%"REG_b"), %%mm3\n\t"
372 " punpckldq 76(%%ecx), %%mm0\n\t" 373 " punpckldq 76(%%"REG_c"), %%mm0\n\t"
373 " punpckldq 72(%%ecx), %%mm2\n\t" 374 " punpckldq 72(%%"REG_c"), %%mm2\n\t"
374 " punpckldq %%mm3, %%mm3\n\t" 375 " punpckldq %%mm3, %%mm3\n\t"
375 " movq %%mm0, %%mm4\n\t" 376 " movq %%mm0, %%mm4\n\t"
376 " movq %%mm2, %%mm5\n\t" 377 " movq %%mm2, %%mm5\n\t"
377 " pfsub %%mm2, %%mm0\n\t" 378 " pfsub %%mm2, %%mm0\n\t"
378 " pfmul %%mm3, %%mm0\n\t" 379 " pfmul %%mm3, %%mm0\n\t"
380 " pfadd %%mm5, %%mm0\n\t" 381 " pfadd %%mm5, %%mm0\n\t"
381 " pfadd %%mm4, %%mm0\n\t" 382 " pfadd %%mm4, %%mm0\n\t"
382 " movq %%mm0, %%mm2\n\t" 383 " movq %%mm0, %%mm2\n\t"
383 " punpckldq %%mm1, %%mm0\n\t" 384 " punpckldq %%mm1, %%mm0\n\t"
384 " punpckhdq %%mm1, %%mm2\n\t" 385 " punpckhdq %%mm1, %%mm2\n\t"
385 " movq %%mm0, 40(%%edx)\n\t" 386 " movq %%mm0, 40(%%"REG_d")\n\t"
386 " movq %%mm2, 72(%%edx)\n\t" 387 " movq %%mm2, 72(%%"REG_d")\n\t"
387 388
388 " movd 48(%%ecx), %%mm3\n\t" 389 " movd 48(%%"REG_c"), %%mm3\n\t"
389 " movd 60(%%ecx), %%mm2\n\t" 390 " movd 60(%%"REG_c"), %%mm2\n\t"
390 " pfsub 52(%%ecx), %%mm3\n\t" 391 " pfsub 52(%%"REG_c"), %%mm3\n\t"
391 " pfsub 56(%%ecx), %%mm2\n\t" 392 " pfsub 56(%%"REG_c"), %%mm2\n\t"
392 " pfmul 120(%%ebx), %%mm3\n\t" 393 " pfmul 120(%%"REG_b"), %%mm3\n\t"
393 " pfmul 120(%%ebx), %%mm2\n\t" 394 " pfmul 120(%%"REG_b"), %%mm2\n\t"
394 " movq %%mm2, %%mm1\n\t" 395 " movq %%mm2, %%mm1\n\t"
395 396
396 " pfadd 56(%%ecx), %%mm1\n\t" 397 " pfadd 56(%%"REG_c"), %%mm1\n\t"
397 " pfadd 60(%%ecx), %%mm1\n\t" 398 " pfadd 60(%%"REG_c"), %%mm1\n\t"
398 " movq %%mm1, %%mm0\n\t" 399 " movq %%mm1, %%mm0\n\t"
399 400
400 " pfadd 48(%%ecx), %%mm0\n\t" 401 " pfadd 48(%%"REG_c"), %%mm0\n\t"
401 " pfadd 52(%%ecx), %%mm0\n\t" 402 " pfadd 52(%%"REG_c"), %%mm0\n\t"
402 " pfadd %%mm3, %%mm1\n\t" 403 " pfadd %%mm3, %%mm1\n\t"
403 " punpckldq %%mm2, %%mm1\n\t" 404 " punpckldq %%mm2, %%mm1\n\t"
404 " pfadd %%mm3, %%mm2\n\t" 405 " pfadd %%mm3, %%mm2\n\t"
405 " punpckldq %%mm2, %%mm0\n\t" 406 " punpckldq %%mm2, %%mm0\n\t"
406 " movq %%mm1, 56(%%edx)\n\t" 407 " movq %%mm1, 56(%%"REG_d")\n\t"
407 " movq %%mm0, 48(%%edx)\n\t" 408 " movq %%mm0, 48(%%"REG_d")\n\t"
408 409
409 /*---*/ 410 /*---*/
410 411
411 " movd 92(%%ecx), %%mm1\n\t" 412 " movd 92(%%"REG_c"), %%mm1\n\t"
412 " pfsub 88(%%ecx), %%mm1\n\t" 413 " pfsub 88(%%"REG_c"), %%mm1\n\t"
413 " pfmul 120(%%ebx), %%mm1\n\t" 414 " pfmul 120(%%"REG_b"), %%mm1\n\t"
414 " movd %%mm1, 92(%%edx)\n\t" 415 " movd %%mm1, 92(%%"REG_d")\n\t"
415 " pfadd 92(%%ecx), %%mm1\n\t" 416 " pfadd 92(%%"REG_c"), %%mm1\n\t"
416 " pfadd 88(%%ecx), %%mm1\n\t" 417 " pfadd 88(%%"REG_c"), %%mm1\n\t"
417 " movq %%mm1, %%mm0\n\t" 418 " movq %%mm1, %%mm0\n\t"
418 419
419 " pfadd 80(%%ecx), %%mm0\n\t" 420 " pfadd 80(%%"REG_c"), %%mm0\n\t"
420 " pfadd 84(%%ecx), %%mm0\n\t" 421 " pfadd 84(%%"REG_c"), %%mm0\n\t"
421 " movd %%mm0, 80(%%edx)\n\t" 422 " movd %%mm0, 80(%%"REG_d")\n\t"
422 423
423 " movd 80(%%ecx), %%mm0\n\t" 424 " movd 80(%%"REG_c"), %%mm0\n\t"
424 " pfsub 84(%%ecx), %%mm0\n\t" 425 " pfsub 84(%%"REG_c"), %%mm0\n\t"
425 " pfmul 120(%%ebx), %%mm0\n\t" 426 " pfmul 120(%%"REG_b"), %%mm0\n\t"
426 " pfadd %%mm0, %%mm1\n\t" 427 " pfadd %%mm0, %%mm1\n\t"
427 " pfadd 92(%%edx), %%mm0\n\t" 428 " pfadd 92(%%"REG_d"), %%mm0\n\t"
428 " punpckldq %%mm1, %%mm0\n\t" 429 " punpckldq %%mm1, %%mm0\n\t"
429 " movq %%mm0, 84(%%edx)\n\t" 430 " movq %%mm0, 84(%%"REG_d")\n\t"
430 431
431 " movq 96(%%ecx), %%mm0\n\t" 432 " movq 96(%%"REG_c"), %%mm0\n\t"
432 " movq %%mm0, %%mm1\n\t" 433 " movq %%mm0, %%mm1\n\t"
433 " pxor %%mm7, %%mm1\n\t" 434 " pxor %%mm7, %%mm1\n\t"
434 " pfacc %%mm1, %%mm0\n\t" 435 " pfacc %%mm1, %%mm0\n\t"
435 " pfmul %%mm6, %%mm0\n\t" 436 " pfmul %%mm6, %%mm0\n\t"
436 " movq %%mm0, 96(%%edx)\n\t" 437 " movq %%mm0, 96(%%"REG_d")\n\t"
437 438
438 " movd 108(%%ecx), %%mm0\n\t" 439 " movd 108(%%"REG_c"), %%mm0\n\t"
439 " pfsub 104(%%ecx), %%mm0\n\t" 440 " pfsub 104(%%"REG_c"), %%mm0\n\t"
440 " pfmul 120(%%ebx), %%mm0\n\t" 441 " pfmul 120(%%"REG_b"), %%mm0\n\t"
441 " movd %%mm0, 108(%%edx)\n\t" 442 " movd %%mm0, 108(%%"REG_d")\n\t"
442 " pfadd 104(%%ecx), %%mm0\n\t" 443 " pfadd 104(%%"REG_c"), %%mm0\n\t"
443 " pfadd 108(%%ecx), %%mm0\n\t" 444 " pfadd 108(%%"REG_c"), %%mm0\n\t"
444 " movd %%mm0, 104(%%edx)\n\t" 445 " movd %%mm0, 104(%%"REG_d")\n\t"
445 446
446 " movd 124(%%ecx), %%mm1\n\t" 447 " movd 124(%%"REG_c"), %%mm1\n\t"
447 " pfsub 120(%%ecx), %%mm1\n\t" 448 " pfsub 120(%%"REG_c"), %%mm1\n\t"
448 " pfmul 120(%%ebx), %%mm1\n\t" 449 " pfmul 120(%%"REG_b"), %%mm1\n\t"
449 " movd %%mm1, 124(%%edx)\n\t" 450 " movd %%mm1, 124(%%"REG_d")\n\t"
450 " pfadd 120(%%ecx), %%mm1\n\t" 451 " pfadd 120(%%"REG_c"), %%mm1\n\t"
451 " pfadd 124(%%ecx), %%mm1\n\t" 452 " pfadd 124(%%"REG_c"), %%mm1\n\t"
452 " movq %%mm1, %%mm0\n\t" 453 " movq %%mm1, %%mm0\n\t"
453 454
454 " pfadd 112(%%ecx), %%mm0\n\t" 455 " pfadd 112(%%"REG_c"), %%mm0\n\t"
455 " pfadd 116(%%ecx), %%mm0\n\t" 456 " pfadd 116(%%"REG_c"), %%mm0\n\t"
456 " movd %%mm0, 112(%%edx)\n\t" 457 " movd %%mm0, 112(%%"REG_d")\n\t"
457 458
458 " movd 112(%%ecx), %%mm0\n\t" 459 " movd 112(%%"REG_c"), %%mm0\n\t"
459 " pfsub 116(%%ecx), %%mm0\n\t" 460 " pfsub 116(%%"REG_c"), %%mm0\n\t"
460 " pfmul 120(%%ebx), %%mm0\n\t" 461 " pfmul 120(%%"REG_b"), %%mm0\n\t"
461 " pfadd %%mm0,%%mm1\n\t" 462 " pfadd %%mm0,%%mm1\n\t"
462 " pfadd 124(%%edx), %%mm0\n\t" 463 " pfadd 124(%%"REG_d"), %%mm0\n\t"
463 " punpckldq %%mm1, %%mm0\n\t" 464 " punpckldq %%mm1, %%mm0\n\t"
464 " movq %%mm0, 116(%%edx)\n\t" 465 " movq %%mm0, 116(%%"REG_d")\n\t"
465 466
466 // this code is broken, there is nothing modifying the z flag above. 467 // this code is broken, there is nothing modifying the z flag above.
467 #if 0 468 #if 0
468 " jnz .L01\n\t" 469 " jnz .L01\n\t"
469 470
470 /* Phase 7*/ 471 /* Phase 7*/
471 /* Code below is coded in scalar mode. Should be optimized */ 472 /* Code below is coded in scalar mode. Should be optimized */
472 473
473 " movd (%%ecx), %%mm0\n\t" 474 " movd (%%"REG_c"), %%mm0\n\t"
474 " pfadd 4(%%ecx), %%mm0\n\t" 475 " pfadd 4(%%"REG_c"), %%mm0\n\t"
475 " movd %%mm0, 1024(%%esi)\n\t" 476 " movd %%mm0, 1024(%%"REG_S")\n\t"
476 477
477 " movd (%%ecx), %%mm0\n\t" 478 " movd (%%"REG_c"), %%mm0\n\t"
478 " pfsub 4(%%ecx), %%mm0\n\t" 479 " pfsub 4(%%"REG_c"), %%mm0\n\t"
479 " pfmul 120(%%ebx), %%mm0\n\t" 480 " pfmul 120(%%"REG_b"), %%mm0\n\t"
480 " movd %%mm0, (%%esi)\n\t" 481 " movd %%mm0, (%%"REG_S")\n\t"
481 " movd %%mm0, (%%edi)\n\t" 482 " movd %%mm0, (%%"REG_D")\n\t"
482 483
483 " movd 12(%%ecx), %%mm0\n\t" 484 " movd 12(%%"REG_c"), %%mm0\n\t"
484 " pfsub 8(%%ecx), %%mm0\n\t" 485 " pfsub 8(%%"REG_c"), %%mm0\n\t"
485 " pfmul 120(%%ebx), %%mm0\n\t" 486 " pfmul 120(%%"REG_b"), %%mm0\n\t"
486 " movd %%mm0, 512(%%edi)\n\t" 487 " movd %%mm0, 512(%%"REG_D")\n\t"
487 " pfadd 12(%%ecx), %%mm0\n\t" 488 " pfadd 12(%%"REG_c"), %%mm0\n\t"
488 " pfadd 8(%%ecx), %%mm0\n\t" 489 " pfadd 8(%%"REG_c"), %%mm0\n\t"
489 " movd %%mm0, 512(%%esi)\n\t" 490 " movd %%mm0, 512(%%"REG_S")\n\t"
490 491
491 " movd 16(%%ecx), %%mm0\n\t" 492 " movd 16(%%"REG_c"), %%mm0\n\t"
492 " pfsub 20(%%ecx), %%mm0\n\t" 493 " pfsub 20(%%"REG_c"), %%mm0\n\t"
493 " pfmul 120(%%ebx), %%mm0\n\t" 494 " pfmul 120(%%"REG_b"), %%mm0\n\t"
494 " movq %%mm0, %%mm3\n\t" 495 " movq %%mm0, %%mm3\n\t"
495 496
496 " movd 28(%%ecx), %%mm0\n\t" 497 " movd 28(%%"REG_c"), %%mm0\n\t"
497 " pfsub 24(%%ecx), %%mm0\n\t" 498 " pfsub 24(%%"REG_c"), %%mm0\n\t"
498 " pfmul 120(%%ebx), %%mm0\n\t" 499 " pfmul 120(%%"REG_b"), %%mm0\n\t"
499 " movd %%mm0, 768(%%edi)\n\t" 500 " movd %%mm0, 768(%%"REG_D")\n\t"
500 " movq %%mm0, %%mm2\n\t" 501 " movq %%mm0, %%mm2\n\t"
501 502
502 " pfadd 24(%%ecx), %%mm0\n\t" 503 " pfadd 24(%%"REG_c"), %%mm0\n\t"
503 " pfadd 28(%%ecx), %%mm0\n\t" 504 " pfadd 28(%%"REG_c"), %%mm0\n\t"
504 " movq %%mm0, %%mm1\n\t" 505 " movq %%mm0, %%mm1\n\t"
505 506
506 " pfadd 16(%%ecx), %%mm0\n\t" 507 " pfadd 16(%%"REG_c"), %%mm0\n\t"
507 " pfadd 20(%%ecx), %%mm0\n\t" 508 " pfadd 20(%%"REG_c"), %%mm0\n\t"
508 " movd %%mm0, 768(%%esi)\n\t" 509 " movd %%mm0, 768(%%"REG_S")\n\t"
509 " pfadd %%mm3, %%mm1\n\t" 510 " pfadd %%mm3, %%mm1\n\t"
510 " movd %%mm1, 256(%%esi)\n\t" 511 " movd %%mm1, 256(%%"REG_S")\n\t"
511 " pfadd %%mm3, %%mm2\n\t" 512 " pfadd %%mm3, %%mm2\n\t"
512 " movd %%mm2, 256(%%edi)\n\t" 513 " movd %%mm2, 256(%%"REG_D")\n\t"
513 514
514 /* Phase 8*/ 515 /* Phase 8*/
515 516
516 " movq 32(%%edx), %%mm0\n\t" 517 " movq 32(%%"REG_d"), %%mm0\n\t"
517 " movq 48(%%edx), %%mm1\n\t" 518 " movq 48(%%"REG_d"), %%mm1\n\t"
518 " pfadd 48(%%edx), %%mm0\n\t" 519 " pfadd 48(%%"REG_d"), %%mm0\n\t"
519 " pfadd 40(%%edx), %%mm1\n\t" 520 " pfadd 40(%%"REG_d"), %%mm1\n\t"
520 " movd %%mm0, 896(%%esi)\n\t" 521 " movd %%mm0, 896(%%"REG_S")\n\t"
521 " movd %%mm1, 640(%%esi)\n\t" 522 " movd %%mm1, 640(%%"REG_S")\n\t"
522 " psrlq $32, %%mm0\n\t" 523 " psrlq $32, %%mm0\n\t"
523 " psrlq $32, %%mm1\n\t" 524 " psrlq $32, %%mm1\n\t"
524 " movd %%mm0, 128(%%edi)\n\t" 525 " movd %%mm0, 128(%%"REG_D")\n\t"
525 " movd %%mm1, 384(%%edi)\n\t" 526 " movd %%mm1, 384(%%"REG_D")\n\t"
526 527
527 " movd 40(%%edx), %%mm0\n\t" 528 " movd 40(%%"REG_d"), %%mm0\n\t"
528 " pfadd 56(%%edx), %%mm0\n\t" 529 " pfadd 56(%%"REG_d"), %%mm0\n\t"
529 " movd %%mm0, 384(%%esi)\n\t" 530 " movd %%mm0, 384(%%"REG_S")\n\t"
530 531
531 " movd 56(%%edx), %%mm0\n\t" 532 " movd 56(%%"REG_d"), %%mm0\n\t"
532 " pfadd 36(%%edx), %%mm0\n\t" 533 " pfadd 36(%%"REG_d"), %%mm0\n\t"
533 " movd %%mm0, 128(%%esi)\n\t" 534 " movd %%mm0, 128(%%"REG_S")\n\t"
534 535
535 " movd 60(%%edx), %%mm0\n\t" 536 " movd 60(%%"REG_d"), %%mm0\n\t"
536 " movd %%mm0, 896(%%edi)\n\t" 537 " movd %%mm0, 896(%%"REG_D")\n\t"
537 " pfadd 44(%%edx), %%mm0\n\t" 538 " pfadd 44(%%"REG_d"), %%mm0\n\t"
538 " movd %%mm0, 640(%%edi)\n\t" 539 " movd %%mm0, 640(%%"REG_D")\n\t"
539 540
540 " movq 96(%%edx), %%mm0\n\t" 541 " movq 96(%%"REG_d"), %%mm0\n\t"
541 " movq 112(%%edx), %%mm2\n\t" 542 " movq 112(%%"REG_d"), %%mm2\n\t"
542 " movq 104(%%edx), %%mm4\n\t" 543 " movq 104(%%"REG_d"), %%mm4\n\t"
543 " pfadd 112(%%edx), %%mm0\n\t" 544 " pfadd 112(%%"REG_d"), %%mm0\n\t"
544 " pfadd 104(%%edx), %%mm2\n\t" 545 " pfadd 104(%%"REG_d"), %%mm2\n\t"
545 " pfadd 120(%%edx), %%mm4\n\t" 546 " pfadd 120(%%"REG_d"), %%mm4\n\t"
546 " movq %%mm0, %%mm1\n\t" 547 " movq %%mm0, %%mm1\n\t"
547 " movq %%mm2, %%mm3\n\t" 548 " movq %%mm2, %%mm3\n\t"
548 " movq %%mm4, %%mm5\n\t" 549 " movq %%mm4, %%mm5\n\t"
549 " pfadd 64(%%edx), %%mm0\n\t" 550 " pfadd 64(%%"REG_d"), %%mm0\n\t"
550 " pfadd 80(%%edx), %%mm2\n\t" 551 " pfadd 80(%%"REG_d"), %%mm2\n\t"
551 " pfadd 72(%%edx), %%mm4\n\t" 552 " pfadd 72(%%"REG_d"), %%mm4\n\t"
552 " movd %%mm0, 960(%%esi)\n\t" 553 " movd %%mm0, 960(%%"REG_S")\n\t"
553 " movd %%mm2, 704(%%esi)\n\t" 554 " movd %%mm2, 704(%%"REG_S")\n\t"
554 " movd %%mm4, 448(%%esi)\n\t" 555 " movd %%mm4, 448(%%"REG_S")\n\t"
555 " psrlq $32, %%mm0\n\t" 556 " psrlq $32, %%mm0\n\t"
556 " psrlq $32, %%mm2\n\t" 557 " psrlq $32, %%mm2\n\t"
557 " psrlq $32, %%mm4\n\t" 558 " psrlq $32, %%mm4\n\t"
558 " movd %%mm0, 64(%%edi)\n\t" 559 " movd %%mm0, 64(%%"REG_D")\n\t"
559 " movd %%mm2, 320(%%edi)\n\t" 560 " movd %%mm2, 320(%%"REG_D")\n\t"
560 " movd %%mm4, 576(%%edi)\n\t" 561 " movd %%mm4, 576(%%"REG_D")\n\t"
561 " pfadd 80(%%edx), %%mm1\n\t" 562 " pfadd 80(%%"REG_d"), %%mm1\n\t"
562 " pfadd 72(%%edx), %%mm3\n\t" 563 " pfadd 72(%%"REG_d"), %%mm3\n\t"
563 " pfadd 88(%%edx), %%mm5\n\t" 564 " pfadd 88(%%"REG_d"), %%mm5\n\t"
564 " movd %%mm1, 832(%%esi)\n\t" 565 " movd %%mm1, 832(%%"REG_S")\n\t"
565 " movd %%mm3, 576(%%esi)\n\t" 566 " movd %%mm3, 576(%%"REG_S")\n\t"
566 " movd %%mm5, 320(%%esi)\n\t" 567 " movd %%mm5, 320(%%"REG_S")\n\t"
567 " psrlq $32, %%mm1\n\t" 568 " psrlq $32, %%mm1\n\t"
568 " psrlq $32, %%mm3\n\t" 569 " psrlq $32, %%mm3\n\t"
569 " psrlq $32, %%mm5\n\t" 570 " psrlq $32, %%mm5\n\t"
570 " movd %%mm1, 192(%%edi)\n\t" 571 " movd %%mm1, 192(%%"REG_D")\n\t"
571 " movd %%mm3, 448(%%edi)\n\t" 572 " movd %%mm3, 448(%%"REG_D")\n\t"
572 " movd %%mm5, 704(%%edi)\n\t" 573 " movd %%mm5, 704(%%"REG_D")\n\t"
573 574
574 " movd 120(%%edx), %%mm0\n\t" 575 " movd 120(%%"REG_d"), %%mm0\n\t"
575 " pfadd 100(%%edx), %%mm0\n\t" 576 " pfadd 100(%%"REG_d"), %%mm0\n\t"
576 " movq %%mm0, %%mm1\n\t" 577 " movq %%mm0, %%mm1\n\t"
577 " pfadd 88(%%edx), %%mm0\n\t" 578 " pfadd 88(%%"REG_d"), %%mm0\n\t"
578 " movd %%mm0, 192(%%esi)\n\t" 579 " movd %%mm0, 192(%%"REG_S")\n\t"
579 " pfadd 68(%%edx), %%mm1\n\t" 580 " pfadd 68(%%"REG_d"), %%mm1\n\t"
580 " movd %%mm1, 64(%%esi)\n\t" 581 " movd %%mm1, 64(%%"REG_S")\n\t"
581 582
582 " movd 124(%%edx), %%mm0\n\t" 583 " movd 124(%%"REG_d"), %%mm0\n\t"
583 " movd %%mm0, 960(%%edi)\n\t" 584 " movd %%mm0, 960(%%"REG_D")\n\t"
584 " pfadd 92(%%edx), %%mm0\n\t" 585 " pfadd 92(%%"REG_d"), %%mm0\n\t"
585 " movd %%mm0, 832(%%edi)\n\t" 586 " movd %%mm0, 832(%%"REG_D")\n\t"
586 587
587 " jmp .L_bye\n\t" 588 " jmp .L_bye\n\t"
588 ".L01: \n\t" 589 ".L01: \n\t"
589 #endif 590 #endif
590 /* Phase 9*/ 591 /* Phase 9*/
591 592
592 " movq (%%ecx), %%mm0\n\t" 593 " movq (%%"REG_c"), %%mm0\n\t"
593 " movq %%mm0, %%mm1\n\t" 594 " movq %%mm0, %%mm1\n\t"
594 " pxor %%mm7, %%mm1\n\t" 595 " pxor %%mm7, %%mm1\n\t"
595 " pfacc %%mm1, %%mm0\n\t" 596 " pfacc %%mm1, %%mm0\n\t"
596 " pfmul %%mm6, %%mm0\n\t" 597 " pfmul %%mm6, %%mm0\n\t"
597 " pf2iw %%mm0, %%mm0\n\t" 598 " pf2iw %%mm0, %%mm0\n\t"
598 " movd %%mm0, %%eax\n\t" 599 " movd %%mm0, %%"REG_a"\n\t"
599 " movw %%ax, 512(%%esi)\n\t" 600 " movw %%ax, 512(%%"REG_S")\n\t"
600 " psrlq $32, %%mm0\n\t" 601 " psrlq $32, %%mm0\n\t"
601 " movd %%mm0, %%eax\n\t" 602 " movd %%mm0, %%"REG_a"\n\t"
602 " movw %%ax, (%%esi)\n\t" 603 " movw %%ax, (%%"REG_S")\n\t"
603 604
604 " movd 12(%%ecx), %%mm0\n\t" 605 " movd 12(%%"REG_c"), %%mm0\n\t"
605 " pfsub 8(%%ecx), %%mm0\n\t" 606 " pfsub 8(%%"REG_c"), %%mm0\n\t"
606 " pfmul 120(%%ebx), %%mm0\n\t" 607 " pfmul 120(%%"REG_b"), %%mm0\n\t"
607 " pf2iw %%mm0, %%mm7\n\t" 608 " pf2iw %%mm0, %%mm7\n\t"
608 " movd %%mm7, %%eax\n\t" 609 " movd %%mm7, %%"REG_a"\n\t"
609 " movw %%ax, 256(%%edi)\n\t" 610 " movw %%ax, 256(%%"REG_D")\n\t"
610 " pfadd 12(%%ecx), %%mm0\n\t" 611 " pfadd 12(%%"REG_c"), %%mm0\n\t"
611 " pfadd 8(%%ecx), %%mm0\n\t" 612 " pfadd 8(%%"REG_c"), %%mm0\n\t"
612 " pf2iw %%mm0, %%mm0\n\t" 613 " pf2iw %%mm0, %%mm0\n\t"
613 " movd %%mm0, %%eax\n\t" 614 " movd %%mm0, %%"REG_a"\n\t"
614 " movw %%ax, 256(%%esi)\n\t" 615 " movw %%ax, 256(%%"REG_S")\n\t"
615 616
616 " movd 16(%%ecx), %%mm3\n\t" 617 " movd 16(%%"REG_c"), %%mm3\n\t"
617 " pfsub 20(%%ecx), %%mm3\n\t" 618 " pfsub 20(%%"REG_c"), %%mm3\n\t"
618 " pfmul 120(%%ebx), %%mm3\n\t" 619 " pfmul 120(%%"REG_b"), %%mm3\n\t"
619 " movq %%mm3, %%mm2\n\t" 620 " movq %%mm3, %%mm2\n\t"
620 621
621 " movd 28(%%ecx), %%mm2\n\t" 622 " movd 28(%%"REG_c"), %%mm2\n\t"
622 " pfsub 24(%%ecx), %%mm2\n\t" 623 " pfsub 24(%%"REG_c"), %%mm2\n\t"
623 " pfmul 120(%%ebx), %%mm2\n\t" 624 " pfmul 120(%%"REG_b"), %%mm2\n\t"
624 " movq %%mm2, %%mm1\n\t" 625 " movq %%mm2, %%mm1\n\t"
625 626
626 " pf2iw %%mm2, %%mm7\n\t" 627 " pf2iw %%mm2, %%mm7\n\t"
627 " movd %%mm7, %%eax\n\t" 628 " movd %%mm7, %%"REG_a"\n\t"
628 " movw %%ax, 384(%%edi)\n\t" 629 " movw %%ax, 384(%%"REG_D")\n\t"
629 630
630 " pfadd 24(%%ecx), %%mm1\n\t" 631 " pfadd 24(%%"REG_c"), %%mm1\n\t"
631 " pfadd 28(%%ecx), %%mm1\n\t" 632 " pfadd 28(%%"REG_c"), %%mm1\n\t"
632 " movq %%mm1, %%mm0\n\t" 633 " movq %%mm1, %%mm0\n\t"
633 634
634 " pfadd 16(%%ecx), %%mm0\n\t" 635 " pfadd 16(%%"REG_c"), %%mm0\n\t"
635 " pfadd 20(%%ecx), %%mm0\n\t" 636 " pfadd 20(%%"REG_c"), %%mm0\n\t"
636 " pf2iw %%mm0, %%mm0\n\t" 637 " pf2iw %%mm0, %%mm0\n\t"
637 " movd %%mm0, %%eax\n\t" 638 " movd %%mm0, %%"REG_a"\n\t"
638 " movw %%ax, 384(%%esi)\n\t" 639 " movw %%ax, 384(%%"REG_S")\n\t"
639 " pfadd %%mm3, %%mm1\n\t" 640 " pfadd %%mm3, %%mm1\n\t"
640 " pf2iw %%mm1, %%mm1\n\t" 641 " pf2iw %%mm1, %%mm1\n\t"
641 " movd %%mm1, %%eax\n\t" 642 " movd %%mm1, %%"REG_a"\n\t"
642 " movw %%ax, 128(%%esi)\n\t" 643 " movw %%ax, 128(%%"REG_S")\n\t"
643 " pfadd %%mm3, %%mm2\n\t" 644 " pfadd %%mm3, %%mm2\n\t"
644 " pf2iw %%mm2, %%mm2\n\t" 645 " pf2iw %%mm2, %%mm2\n\t"
645 " movd %%mm2, %%eax\n\t" 646 " movd %%mm2, %%"REG_a"\n\t"
646 " movw %%ax, 128(%%edi)\n\t" 647 " movw %%ax, 128(%%"REG_D")\n\t"
647 648
648 /* Phase 10*/ 649 /* Phase 10*/
649 650
650 " movq 32(%%edx), %%mm0\n\t" 651 " movq 32(%%"REG_d"), %%mm0\n\t"
651 " movq 48(%%edx), %%mm1\n\t" 652 " movq 48(%%"REG_d"), %%mm1\n\t"
652 " pfadd 48(%%edx), %%mm0\n\t" 653 " pfadd 48(%%"REG_d"), %%mm0\n\t"
653 " pfadd 40(%%edx), %%mm1\n\t" 654 " pfadd 40(%%"REG_d"), %%mm1\n\t"
654 " pf2iw %%mm0, %%mm0\n\t" 655 " pf2iw %%mm0, %%mm0\n\t"
655 " pf2iw %%mm1, %%mm1\n\t" 656 " pf2iw %%mm1, %%mm1\n\t"
656 " movd %%mm0, %%eax\n\t" 657 " movd %%mm0, %%"REG_a"\n\t"
657 " movd %%mm1, %%ecx\n\t" 658 " movd %%mm1, %%"REG_c"\n\t"
658 " movw %%ax, 448(%%esi)\n\t" 659 " movw %%ax, 448(%%"REG_S")\n\t"
659 " movw %%cx, 320(%%esi)\n\t" 660 " movw %%cx, 320(%%"REG_S")\n\t"
660 " psrlq $32, %%mm0\n\t" 661 " psrlq $32, %%mm0\n\t"
661 " psrlq $32, %%mm1\n\t" 662 " psrlq $32, %%mm1\n\t"
662 " movd %%mm0, %%eax\n\t" 663 " movd %%mm0, %%"REG_a"\n\t"
663 " movd %%mm1, %%ecx\n\t" 664 " movd %%mm1, %%"REG_c"\n\t"
664 " movw %%ax, 64(%%edi)\n\t" 665 " movw %%ax, 64(%%"REG_D")\n\t"
665 " movw %%cx, 192(%%edi)\n\t" 666 " movw %%cx, 192(%%"REG_D")\n\t"
666 667
667 " movd 40(%%edx), %%mm3\n\t" 668 " movd 40(%%"REG_d"), %%mm3\n\t"
668 " movd 56(%%edx), %%mm4\n\t" 669 " movd 56(%%"REG_d"), %%mm4\n\t"
669 " movd 60(%%edx), %%mm0\n\t" 670 " movd 60(%%"REG_d"), %%mm0\n\t"
670 " movd 44(%%edx), %%mm2\n\t" 671 " movd 44(%%"REG_d"), %%mm2\n\t"
671 " movd 120(%%edx), %%mm5\n\t" 672 " movd 120(%%"REG_d"), %%mm5\n\t"
672 " punpckldq %%mm4, %%mm3\n\t" 673 " punpckldq %%mm4, %%mm3\n\t"
673 " punpckldq 124(%%edx), %%mm0\n\t" 674 " punpckldq 124(%%"REG_d"), %%mm0\n\t"
674 " pfadd 100(%%edx), %%mm5\n\t" 675 " pfadd 100(%%"REG_d"), %%mm5\n\t"
675 " punpckldq 36(%%edx), %%mm4\n\t" 676 " punpckldq 36(%%"REG_d"), %%mm4\n\t"
676 " punpckldq 92(%%edx), %%mm2\n\t" 677 " punpckldq 92(%%"REG_d"), %%mm2\n\t"
677 " movq %%mm5, %%mm6\n\t" 678 " movq %%mm5, %%mm6\n\t"
678 " pfadd %%mm4, %%mm3\n\t" 679 " pfadd %%mm4, %%mm3\n\t"
679 " pf2iw %%mm0, %%mm1\n\t" 680 " pf2iw %%mm0, %%mm1\n\t"
680 " pf2iw %%mm3, %%mm3\n\t" 681 " pf2iw %%mm3, %%mm3\n\t"
681 " pfadd 88(%%edx), %%mm5\n\t" 682 " pfadd 88(%%"REG_d"), %%mm5\n\t"
682 " movd %%mm1, %%eax\n\t" 683 " movd %%mm1, %%"REG_a"\n\t"
683 " movd %%mm3, %%ecx\n\t" 684 " movd %%mm3, %%"REG_c"\n\t"
684 " movw %%ax, 448(%%edi)\n\t" 685 " movw %%ax, 448(%%"REG_D")\n\t"
685 " movw %%cx, 192(%%esi)\n\t" 686 " movw %%cx, 192(%%"REG_S")\n\t"
686 " pf2iw %%mm5, %%mm5\n\t" 687 " pf2iw %%mm5, %%mm5\n\t"
687 " psrlq $32, %%mm1\n\t" 688 " psrlq $32, %%mm1\n\t"
688 " psrlq $32, %%mm3\n\t" 689 " psrlq $32, %%mm3\n\t"
689 " movd %%mm5, %%ebx\n\t" 690 " movd %%mm5, %%"REG_b"\n\t"
690 " movd %%mm1, %%eax\n\t" 691 " movd %%mm1, %%"REG_a"\n\t"
691 " movd %%mm3, %%ecx\n\t" 692 " movd %%mm3, %%"REG_c"\n\t"
692 " movw %%bx, 96(%%esi)\n\t" 693 " movw %%bx, 96(%%"REG_S")\n\t"
693 " movw %%ax, 480(%%edi)\n\t" 694 " movw %%ax, 480(%%"REG_D")\n\t"
694 " movw %%cx, 64(%%esi)\n\t" 695 " movw %%cx, 64(%%"REG_S")\n\t"
695 " pfadd %%mm2, %%mm0\n\t" 696 " pfadd %%mm2, %%mm0\n\t"
696 " pf2iw %%mm0, %%mm0\n\t" 697 " pf2iw %%mm0, %%mm0\n\t"
697 " movd %%mm0, %%eax\n\t" 698 " movd %%mm0, %%"REG_a"\n\t"
698 " pfadd 68(%%edx), %%mm6\n\t" 699 " pfadd 68(%%"REG_d"), %%mm6\n\t"
699 " movw %%ax, 320(%%edi)\n\t" 700 " movw %%ax, 320(%%"REG_D")\n\t"
700 " psrlq $32, %%mm0\n\t" 701 " psrlq $32, %%mm0\n\t"
701 " pf2iw %%mm6, %%mm6\n\t" 702 " pf2iw %%mm6, %%mm6\n\t"
702 " movd %%mm0, %%eax\n\t" 703 " movd %%mm0, %%"REG_a"\n\t"
703 " movd %%mm6, %%ebx\n\t" 704 " movd %%mm6, %%"REG_b"\n\t"
704 " movw %%ax, 416(%%edi)\n\t" 705 " movw %%ax, 416(%%"REG_D")\n\t"
705 " movw %%bx, 32(%%esi)\n\t" 706 " movw %%bx, 32(%%"REG_S")\n\t"
706 707
707 " movq 96(%%edx), %%mm0\n\t" 708 " movq 96(%%"REG_d"), %%mm0\n\t"
708 " movq 112(%%edx), %%mm2\n\t" 709 " movq 112(%%"REG_d"), %%mm2\n\t"
709 " movq 104(%%edx), %%mm4\n\t" 710 " movq 104(%%"REG_d"), %%mm4\n\t"
710 " pfadd %%mm2, %%mm0\n\t" 711 " pfadd %%mm2, %%mm0\n\t"
711 " pfadd %%mm4, %%mm2\n\t" 712 " pfadd %%mm4, %%mm2\n\t"
712 " pfadd 120(%%edx), %%mm4\n\t" 713 " pfadd 120(%%"REG_d"), %%mm4\n\t"
713 " movq %%mm0, %%mm1\n\t" 714 " movq %%mm0, %%mm1\n\t"
714 " movq %%mm2, %%mm3\n\t" 715 " movq %%mm2, %%mm3\n\t"
715 " movq %%mm4, %%mm5\n\t" 716 " movq %%mm4, %%mm5\n\t"
716 " pfadd 64(%%edx), %%mm0\n\t" 717 " pfadd 64(%%"REG_d"), %%mm0\n\t"
717 " pfadd 80(%%edx), %%mm2\n\t" 718 " pfadd 80(%%"REG_d"), %%mm2\n\t"
718 " pfadd 72(%%edx), %%mm4\n\t" 719 " pfadd 72(%%"REG_d"), %%mm4\n\t"
719 " pf2iw %%mm0, %%mm0\n\t" 720 " pf2iw %%mm0, %%mm0\n\t"
720 " pf2iw %%mm2, %%mm2\n\t" 721 " pf2iw %%mm2, %%mm2\n\t"
721 " pf2iw %%mm4, %%mm4\n\t" 722 " pf2iw %%mm4, %%mm4\n\t"
722 " movd %%mm0, %%eax\n\t" 723 " movd %%mm0, %%"REG_a"\n\t"
723 " movd %%mm2, %%ecx\n\t" 724 " movd %%mm2, %%"REG_c"\n\t"
724 " movd %%mm4, %%ebx\n\t" 725 " movd %%mm4, %%"REG_b"\n\t"
725 " movw %%ax, 480(%%esi)\n\t" 726 " movw %%ax, 480(%%"REG_S")\n\t"
726 " movw %%cx, 352(%%esi)\n\t" 727 " movw %%cx, 352(%%"REG_S")\n\t"
727 " movw %%bx, 224(%%esi)\n\t" 728 " movw %%bx, 224(%%"REG_S")\n\t"
728 " psrlq $32, %%mm0\n\t" 729 " psrlq $32, %%mm0\n\t"
729 " psrlq $32, %%mm2\n\t" 730 " psrlq $32, %%mm2\n\t"
730 " psrlq $32, %%mm4\n\t" 731 " psrlq $32, %%mm4\n\t"
731 " movd %%mm0, %%eax\n\t" 732 " movd %%mm0, %%"REG_a"\n\t"
732 " movd %%mm2, %%ecx\n\t" 733 " movd %%mm2, %%"REG_c"\n\t"
733 " movd %%mm4, %%ebx\n\t" 734 " movd %%mm4, %%"REG_b"\n\t"
734 " movw %%ax, 32(%%edi)\n\t" 735 " movw %%ax, 32(%%"REG_D")\n\t"
735 " movw %%cx, 160(%%edi)\n\t" 736 " movw %%cx, 160(%%"REG_D")\n\t"
736 " movw %%bx, 288(%%edi)\n\t" 737 " movw %%bx, 288(%%"REG_D")\n\t"
737 " pfadd 80(%%edx), %%mm1\n\t" 738 " pfadd 80(%%"REG_d"), %%mm1\n\t"
738 " pfadd 72(%%edx), %%mm3\n\t" 739 " pfadd 72(%%"REG_d"), %%mm3\n\t"
739 " pfadd 88(%%edx), %%mm5\n\t" 740 " pfadd 88(%%"REG_d"), %%mm5\n\t"
740 " pf2iw %%mm1, %%mm1\n\t" 741 " pf2iw %%mm1, %%mm1\n\t"
741 " pf2iw %%mm3, %%mm3\n\t" 742 " pf2iw %%mm3, %%mm3\n\t"
742 " pf2iw %%mm5, %%mm5\n\t" 743 " pf2iw %%mm5, %%mm5\n\t"
743 " movd %%mm1, %%eax\n\t" 744 " movd %%mm1, %%"REG_a"\n\t"
744 " movd %%mm3, %%ecx\n\t" 745 " movd %%mm3, %%"REG_c"\n\t"
745 " movd %%mm5, %%ebx\n\t" 746 " movd %%mm5, %%"REG_b"\n\t"
746 " movw %%ax, 416(%%esi)\n\t" 747 " movw %%ax, 416(%%"REG_S")\n\t"
747 " movw %%cx, 288(%%esi)\n\t" 748 " movw %%cx, 288(%%"REG_S")\n\t"
748 " movw %%bx, 160(%%esi)\n\t" 749 " movw %%bx, 160(%%"REG_S")\n\t"
749 " psrlq $32, %%mm1\n\t" 750 " psrlq $32, %%mm1\n\t"
750 " psrlq $32, %%mm3\n\t" 751 " psrlq $32, %%mm3\n\t"
751 " psrlq $32, %%mm5\n\t" 752 " psrlq $32, %%mm5\n\t"
752 " movd %%mm1, %%eax\n\t" 753 " movd %%mm1, %%"REG_a"\n\t"
753 " movd %%mm3, %%ecx\n\t" 754 " movd %%mm3, %%"REG_c"\n\t"
754 " movd %%mm5, %%ebx\n\t" 755 " movd %%mm5, %%"REG_b"\n\t"
755 " movw %%ax, 96(%%edi)\n\t" 756 " movw %%ax, 96(%%"REG_D")\n\t"
756 " movw %%cx, 224(%%edi)\n\t" 757 " movw %%cx, 224(%%"REG_D")\n\t"
757 " movw %%bx, 352(%%edi)\n\t" 758 " movw %%bx, 352(%%"REG_D")\n\t"
758 759
759 " movsw\n\t" 760 " movsw\n\t"
760 761
761 ".L_bye:\n\t" 762 ".L_bye:\n\t"
762 " femms\n\t" 763 " femms\n\t"