Mercurial > mplayer.hg
comparison mp3lib/dct64_k7.c @ 31215:d0f70692a140
Make 3dnow and MMX code compile for 64 bit (just stupid search-and-replace).
They are not compiled/enabled on 64 bit though, since they are vastly slower
but would still be selected by default.
author | reimar |
---|---|
date | Sun, 30 May 2010 10:01:40 +0000 |
parents | 0ad2da052b2e |
children |
comparison
equal
deleted
inserted
replaced
31214:0bdd15feba42 | 31215:d0f70692a140 |
---|---|
9 */ | 9 */ |
10 | 10 |
11 #include "config.h" | 11 #include "config.h" |
12 #include "mangle.h" | 12 #include "mangle.h" |
13 #include "mpg123.h" | 13 #include "mpg123.h" |
14 #include "libavutil/x86_cpu.h" | |
14 | 15 |
15 static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL; | 16 static unsigned long long int attribute_used __attribute__((aligned(8))) x_plus_minus_3dnow = 0x8000000000000000ULL; |
16 static float attribute_used plus_1f = 1.0; | 17 static float attribute_used plus_1f = 1.0; |
17 | 18 |
18 void dct64_MMX_3dnowex(short *a,short *b,real *c) | 19 void dct64_MMX_3dnowex(short *a,short *b,real *c) |
19 { | 20 { |
20 char tmp[256]; | 21 char tmp[256]; |
21 __asm__ volatile( | 22 __asm__ volatile( |
22 " movl %2,%%eax\n\t" | 23 " mov %2,%%"REG_a"\n\t" |
23 | 24 |
24 " leal 128+%3,%%edx\n\t" | 25 " lea 128+%3,%%"REG_d"\n\t" |
25 " movl %0,%%esi\n\t" | 26 " mov %0,%%"REG_S"\n\t" |
26 " movl %1,%%edi\n\t" | 27 " mov %1,%%"REG_D"\n\t" |
27 " movl $"MANGLE(costab_mmx)",%%ebx\n\t" | 28 " mov $"MANGLE(costab_mmx)",%%"REG_b"\n\t" |
28 " leal %3,%%ecx\n\t" | 29 " lea %3,%%"REG_c"\n\t" |
29 | 30 |
30 /* Phase 1*/ | 31 /* Phase 1*/ |
31 " movq (%%eax), %%mm0\n\t" | 32 " movq (%%"REG_a"), %%mm0\n\t" |
32 " movq 8(%%eax), %%mm4\n\t" | 33 " movq 8(%%"REG_a"), %%mm4\n\t" |
33 " movq %%mm0, %%mm3\n\t" | 34 " movq %%mm0, %%mm3\n\t" |
34 " movq %%mm4, %%mm7\n\t" | 35 " movq %%mm4, %%mm7\n\t" |
35 " pswapd 120(%%eax), %%mm1\n\t" | 36 " pswapd 120(%%"REG_a"), %%mm1\n\t" |
36 " pswapd 112(%%eax), %%mm5\n\t" | 37 " pswapd 112(%%"REG_a"), %%mm5\n\t" |
37 " pfadd %%mm1, %%mm0\n\t" | 38 " pfadd %%mm1, %%mm0\n\t" |
38 " pfadd %%mm5, %%mm4\n\t" | 39 " pfadd %%mm5, %%mm4\n\t" |
39 " movq %%mm0, (%%edx)\n\t" | 40 " movq %%mm0, (%%"REG_d")\n\t" |
40 " movq %%mm4, 8(%%edx)\n\t" | 41 " movq %%mm4, 8(%%"REG_d")\n\t" |
41 " pfsub %%mm1, %%mm3\n\t" | 42 " pfsub %%mm1, %%mm3\n\t" |
42 " pfsub %%mm5, %%mm7\n\t" | 43 " pfsub %%mm5, %%mm7\n\t" |
43 " pfmul (%%ebx), %%mm3\n\t" | 44 " pfmul (%%"REG_b"), %%mm3\n\t" |
44 " pfmul 8(%%ebx), %%mm7\n\t" | 45 " pfmul 8(%%"REG_b"), %%mm7\n\t" |
45 " pswapd %%mm3, %%mm3\n\t" | 46 " pswapd %%mm3, %%mm3\n\t" |
46 " pswapd %%mm7, %%mm7\n\t" | 47 " pswapd %%mm7, %%mm7\n\t" |
47 " movq %%mm3, 120(%%edx)\n\t" | 48 " movq %%mm3, 120(%%"REG_d")\n\t" |
48 " movq %%mm7, 112(%%edx)\n\t" | 49 " movq %%mm7, 112(%%"REG_d")\n\t" |
49 | 50 |
50 " movq 16(%%eax), %%mm0\n\t" | 51 " movq 16(%%"REG_a"), %%mm0\n\t" |
51 " movq 24(%%eax), %%mm4\n\t" | 52 " movq 24(%%"REG_a"), %%mm4\n\t" |
52 " movq %%mm0, %%mm3\n\t" | 53 " movq %%mm0, %%mm3\n\t" |
53 " movq %%mm4, %%mm7\n\t" | 54 " movq %%mm4, %%mm7\n\t" |
54 " pswapd 104(%%eax), %%mm1\n\t" | 55 " pswapd 104(%%"REG_a"), %%mm1\n\t" |
55 " pswapd 96(%%eax), %%mm5\n\t" | 56 " pswapd 96(%%"REG_a"), %%mm5\n\t" |
56 " pfadd %%mm1, %%mm0\n\t" | 57 " pfadd %%mm1, %%mm0\n\t" |
57 " pfadd %%mm5, %%mm4\n\t" | 58 " pfadd %%mm5, %%mm4\n\t" |
58 " movq %%mm0, 16(%%edx)\n\t" | 59 " movq %%mm0, 16(%%"REG_d")\n\t" |
59 " movq %%mm4, 24(%%edx)\n\t" | 60 " movq %%mm4, 24(%%"REG_d")\n\t" |
60 " pfsub %%mm1, %%mm3\n\t" | 61 " pfsub %%mm1, %%mm3\n\t" |
61 " pfsub %%mm5, %%mm7\n\t" | 62 " pfsub %%mm5, %%mm7\n\t" |
62 " pfmul 16(%%ebx), %%mm3\n\t" | 63 " pfmul 16(%%"REG_b"), %%mm3\n\t" |
63 " pfmul 24(%%ebx), %%mm7\n\t" | 64 " pfmul 24(%%"REG_b"), %%mm7\n\t" |
64 " pswapd %%mm3, %%mm3\n\t" | 65 " pswapd %%mm3, %%mm3\n\t" |
65 " pswapd %%mm7, %%mm7\n\t" | 66 " pswapd %%mm7, %%mm7\n\t" |
66 " movq %%mm3, 104(%%edx)\n\t" | 67 " movq %%mm3, 104(%%"REG_d")\n\t" |
67 " movq %%mm7, 96(%%edx)\n\t" | 68 " movq %%mm7, 96(%%"REG_d")\n\t" |
68 | 69 |
69 " movq 32(%%eax), %%mm0\n\t" | 70 " movq 32(%%"REG_a"), %%mm0\n\t" |
70 " movq 40(%%eax), %%mm4\n\t" | 71 " movq 40(%%"REG_a"), %%mm4\n\t" |
71 " movq %%mm0, %%mm3\n\t" | 72 " movq %%mm0, %%mm3\n\t" |
72 " movq %%mm4, %%mm7\n\t" | 73 " movq %%mm4, %%mm7\n\t" |
73 " pswapd 88(%%eax), %%mm1\n\t" | 74 " pswapd 88(%%"REG_a"), %%mm1\n\t" |
74 " pswapd 80(%%eax), %%mm5\n\t" | 75 " pswapd 80(%%"REG_a"), %%mm5\n\t" |
75 " pfadd %%mm1, %%mm0\n\t" | 76 " pfadd %%mm1, %%mm0\n\t" |
76 " pfadd %%mm5, %%mm4\n\t" | 77 " pfadd %%mm5, %%mm4\n\t" |
77 " movq %%mm0, 32(%%edx)\n\t" | 78 " movq %%mm0, 32(%%"REG_d")\n\t" |
78 " movq %%mm4, 40(%%edx)\n\t" | 79 " movq %%mm4, 40(%%"REG_d")\n\t" |
79 " pfsub %%mm1, %%mm3\n\t" | 80 " pfsub %%mm1, %%mm3\n\t" |
80 " pfsub %%mm5, %%mm7\n\t" | 81 " pfsub %%mm5, %%mm7\n\t" |
81 " pfmul 32(%%ebx), %%mm3\n\t" | 82 " pfmul 32(%%"REG_b"), %%mm3\n\t" |
82 " pfmul 40(%%ebx), %%mm7\n\t" | 83 " pfmul 40(%%"REG_b"), %%mm7\n\t" |
83 " pswapd %%mm3, %%mm3\n\t" | 84 " pswapd %%mm3, %%mm3\n\t" |
84 " pswapd %%mm7, %%mm7\n\t" | 85 " pswapd %%mm7, %%mm7\n\t" |
85 " movq %%mm3, 88(%%edx)\n\t" | 86 " movq %%mm3, 88(%%"REG_d")\n\t" |
86 " movq %%mm7, 80(%%edx)\n\t" | 87 " movq %%mm7, 80(%%"REG_d")\n\t" |
87 | 88 |
88 " movq 48(%%eax), %%mm0\n\t" | 89 " movq 48(%%"REG_a"), %%mm0\n\t" |
89 " movq 56(%%eax), %%mm4\n\t" | 90 " movq 56(%%"REG_a"), %%mm4\n\t" |
90 " movq %%mm0, %%mm3\n\t" | 91 " movq %%mm0, %%mm3\n\t" |
91 " movq %%mm4, %%mm7\n\t" | 92 " movq %%mm4, %%mm7\n\t" |
92 " pswapd 72(%%eax), %%mm1\n\t" | 93 " pswapd 72(%%"REG_a"), %%mm1\n\t" |
93 " pswapd 64(%%eax), %%mm5\n\t" | 94 " pswapd 64(%%"REG_a"), %%mm5\n\t" |
94 " pfadd %%mm1, %%mm0\n\t" | 95 " pfadd %%mm1, %%mm0\n\t" |
95 " pfadd %%mm5, %%mm4\n\t" | 96 " pfadd %%mm5, %%mm4\n\t" |
96 " movq %%mm0, 48(%%edx)\n\t" | 97 " movq %%mm0, 48(%%"REG_d")\n\t" |
97 " movq %%mm4, 56(%%edx)\n\t" | 98 " movq %%mm4, 56(%%"REG_d")\n\t" |
98 " pfsub %%mm1, %%mm3\n\t" | 99 " pfsub %%mm1, %%mm3\n\t" |
99 " pfsub %%mm5, %%mm7\n\t" | 100 " pfsub %%mm5, %%mm7\n\t" |
100 " pfmul 48(%%ebx), %%mm3\n\t" | 101 " pfmul 48(%%"REG_b"), %%mm3\n\t" |
101 " pfmul 56(%%ebx), %%mm7\n\t" | 102 " pfmul 56(%%"REG_b"), %%mm7\n\t" |
102 " pswapd %%mm3, %%mm3\n\t" | 103 " pswapd %%mm3, %%mm3\n\t" |
103 " pswapd %%mm7, %%mm7\n\t" | 104 " pswapd %%mm7, %%mm7\n\t" |
104 " movq %%mm3, 72(%%edx)\n\t" | 105 " movq %%mm3, 72(%%"REG_d")\n\t" |
105 " movq %%mm7, 64(%%edx)\n\t" | 106 " movq %%mm7, 64(%%"REG_d")\n\t" |
106 | 107 |
107 /* Phase 2*/ | 108 /* Phase 2*/ |
108 | 109 |
109 " movq (%%edx), %%mm0\n\t" | 110 " movq (%%"REG_d"), %%mm0\n\t" |
110 " movq 8(%%edx), %%mm4\n\t" | 111 " movq 8(%%"REG_d"), %%mm4\n\t" |
111 " movq %%mm0, %%mm3\n\t" | 112 " movq %%mm0, %%mm3\n\t" |
112 " movq %%mm4, %%mm7\n\t" | 113 " movq %%mm4, %%mm7\n\t" |
113 " pswapd 56(%%edx), %%mm1\n\t" | 114 " pswapd 56(%%"REG_d"), %%mm1\n\t" |
114 " pswapd 48(%%edx), %%mm5\n\t" | 115 " pswapd 48(%%"REG_d"), %%mm5\n\t" |
115 " pfadd %%mm1, %%mm0\n\t" | 116 " pfadd %%mm1, %%mm0\n\t" |
116 " pfadd %%mm5, %%mm4\n\t" | 117 " pfadd %%mm5, %%mm4\n\t" |
117 " movq %%mm0, (%%ecx)\n\t" | 118 " movq %%mm0, (%%"REG_c")\n\t" |
118 " movq %%mm4, 8(%%ecx)\n\t" | 119 " movq %%mm4, 8(%%"REG_c")\n\t" |
119 " pfsub %%mm1, %%mm3\n\t" | 120 " pfsub %%mm1, %%mm3\n\t" |
120 " pfsub %%mm5, %%mm7\n\t" | 121 " pfsub %%mm5, %%mm7\n\t" |
121 " pfmul 64(%%ebx), %%mm3\n\t" | 122 " pfmul 64(%%"REG_b"), %%mm3\n\t" |
122 " pfmul 72(%%ebx), %%mm7\n\t" | 123 " pfmul 72(%%"REG_b"), %%mm7\n\t" |
123 " pswapd %%mm3, %%mm3\n\t" | 124 " pswapd %%mm3, %%mm3\n\t" |
124 " pswapd %%mm7, %%mm7\n\t" | 125 " pswapd %%mm7, %%mm7\n\t" |
125 " movq %%mm3, 56(%%ecx)\n\t" | 126 " movq %%mm3, 56(%%"REG_c")\n\t" |
126 " movq %%mm7, 48(%%ecx)\n\t" | 127 " movq %%mm7, 48(%%"REG_c")\n\t" |
127 | 128 |
128 " movq 16(%%edx), %%mm0\n\t" | 129 " movq 16(%%"REG_d"), %%mm0\n\t" |
129 " movq 24(%%edx), %%mm4\n\t" | 130 " movq 24(%%"REG_d"), %%mm4\n\t" |
130 " movq %%mm0, %%mm3\n\t" | 131 " movq %%mm0, %%mm3\n\t" |
131 " movq %%mm4, %%mm7\n\t" | 132 " movq %%mm4, %%mm7\n\t" |
132 " pswapd 40(%%edx), %%mm1\n\t" | 133 " pswapd 40(%%"REG_d"), %%mm1\n\t" |
133 " pswapd 32(%%edx), %%mm5\n\t" | 134 " pswapd 32(%%"REG_d"), %%mm5\n\t" |
134 " pfadd %%mm1, %%mm0\n\t" | 135 " pfadd %%mm1, %%mm0\n\t" |
135 " pfadd %%mm5, %%mm4\n\t" | 136 " pfadd %%mm5, %%mm4\n\t" |
136 " movq %%mm0, 16(%%ecx)\n\t" | 137 " movq %%mm0, 16(%%"REG_c")\n\t" |
137 " movq %%mm4, 24(%%ecx)\n\t" | 138 " movq %%mm4, 24(%%"REG_c")\n\t" |
138 " pfsub %%mm1, %%mm3\n\t" | 139 " pfsub %%mm1, %%mm3\n\t" |
139 " pfsub %%mm5, %%mm7\n\t" | 140 " pfsub %%mm5, %%mm7\n\t" |
140 " pfmul 80(%%ebx), %%mm3\n\t" | 141 " pfmul 80(%%"REG_b"), %%mm3\n\t" |
141 " pfmul 88(%%ebx), %%mm7\n\t" | 142 " pfmul 88(%%"REG_b"), %%mm7\n\t" |
142 " pswapd %%mm3, %%mm3\n\t" | 143 " pswapd %%mm3, %%mm3\n\t" |
143 " pswapd %%mm7, %%mm7\n\t" | 144 " pswapd %%mm7, %%mm7\n\t" |
144 " movq %%mm3, 40(%%ecx)\n\t" | 145 " movq %%mm3, 40(%%"REG_c")\n\t" |
145 " movq %%mm7, 32(%%ecx)\n\t" | 146 " movq %%mm7, 32(%%"REG_c")\n\t" |
146 | 147 |
147 /* Phase 3*/ | 148 /* Phase 3*/ |
148 | 149 |
149 " movq 64(%%edx), %%mm0\n\t" | 150 " movq 64(%%"REG_d"), %%mm0\n\t" |
150 " movq 72(%%edx), %%mm4\n\t" | 151 " movq 72(%%"REG_d"), %%mm4\n\t" |
151 " movq %%mm0, %%mm3\n\t" | 152 " movq %%mm0, %%mm3\n\t" |
152 " movq %%mm4, %%mm7\n\t" | 153 " movq %%mm4, %%mm7\n\t" |
153 " pswapd 120(%%edx), %%mm1\n\t" | 154 " pswapd 120(%%"REG_d"), %%mm1\n\t" |
154 " pswapd 112(%%edx), %%mm5\n\t" | 155 " pswapd 112(%%"REG_d"), %%mm5\n\t" |
155 " pfadd %%mm1, %%mm0\n\t" | 156 " pfadd %%mm1, %%mm0\n\t" |
156 " pfadd %%mm5, %%mm4\n\t" | 157 " pfadd %%mm5, %%mm4\n\t" |
157 " movq %%mm0, 64(%%ecx)\n\t" | 158 " movq %%mm0, 64(%%"REG_c")\n\t" |
158 " movq %%mm4, 72(%%ecx)\n\t" | 159 " movq %%mm4, 72(%%"REG_c")\n\t" |
159 " pfsubr %%mm1, %%mm3\n\t" | 160 " pfsubr %%mm1, %%mm3\n\t" |
160 " pfsubr %%mm5, %%mm7\n\t" | 161 " pfsubr %%mm5, %%mm7\n\t" |
161 " pfmul 64(%%ebx), %%mm3\n\t" | 162 " pfmul 64(%%"REG_b"), %%mm3\n\t" |
162 " pfmul 72(%%ebx), %%mm7\n\t" | 163 " pfmul 72(%%"REG_b"), %%mm7\n\t" |
163 " pswapd %%mm3, %%mm3\n\t" | 164 " pswapd %%mm3, %%mm3\n\t" |
164 " pswapd %%mm7, %%mm7\n\t" | 165 " pswapd %%mm7, %%mm7\n\t" |
165 " movq %%mm3, 120(%%ecx)\n\t" | 166 " movq %%mm3, 120(%%"REG_c")\n\t" |
166 " movq %%mm7, 112(%%ecx)\n\t" | 167 " movq %%mm7, 112(%%"REG_c")\n\t" |
167 | 168 |
168 " movq 80(%%edx), %%mm0\n\t" | 169 " movq 80(%%"REG_d"), %%mm0\n\t" |
169 " movq 88(%%edx), %%mm4\n\t" | 170 " movq 88(%%"REG_d"), %%mm4\n\t" |
170 " movq %%mm0, %%mm3\n\t" | 171 " movq %%mm0, %%mm3\n\t" |
171 " movq %%mm4, %%mm7\n\t" | 172 " movq %%mm4, %%mm7\n\t" |
172 " pswapd 104(%%edx), %%mm1\n\t" | 173 " pswapd 104(%%"REG_d"), %%mm1\n\t" |
173 " pswapd 96(%%edx), %%mm5\n\t" | 174 " pswapd 96(%%"REG_d"), %%mm5\n\t" |
174 " pfadd %%mm1, %%mm0\n\t" | 175 " pfadd %%mm1, %%mm0\n\t" |
175 " pfadd %%mm5, %%mm4\n\t" | 176 " pfadd %%mm5, %%mm4\n\t" |
176 " movq %%mm0, 80(%%ecx)\n\t" | 177 " movq %%mm0, 80(%%"REG_c")\n\t" |
177 " movq %%mm4, 88(%%ecx)\n\t" | 178 " movq %%mm4, 88(%%"REG_c")\n\t" |
178 " pfsubr %%mm1, %%mm3\n\t" | 179 " pfsubr %%mm1, %%mm3\n\t" |
179 " pfsubr %%mm5, %%mm7\n\t" | 180 " pfsubr %%mm5, %%mm7\n\t" |
180 " pfmul 80(%%ebx), %%mm3\n\t" | 181 " pfmul 80(%%"REG_b"), %%mm3\n\t" |
181 " pfmul 88(%%ebx), %%mm7\n\t" | 182 " pfmul 88(%%"REG_b"), %%mm7\n\t" |
182 " pswapd %%mm3, %%mm3\n\t" | 183 " pswapd %%mm3, %%mm3\n\t" |
183 " pswapd %%mm7, %%mm7\n\t" | 184 " pswapd %%mm7, %%mm7\n\t" |
184 " movq %%mm3, 104(%%ecx)\n\t" | 185 " movq %%mm3, 104(%%"REG_c")\n\t" |
185 " movq %%mm7, 96(%%ecx)\n\t" | 186 " movq %%mm7, 96(%%"REG_c")\n\t" |
186 | 187 |
187 /* Phase 4*/ | 188 /* Phase 4*/ |
188 | 189 |
189 " movq 96(%%ebx), %%mm2\n\t" | 190 " movq 96(%%"REG_b"), %%mm2\n\t" |
190 " movq 104(%%ebx), %%mm6\n\t" | 191 " movq 104(%%"REG_b"), %%mm6\n\t" |
191 | 192 |
192 " movq (%%ecx), %%mm0\n\t" | 193 " movq (%%"REG_c"), %%mm0\n\t" |
193 " movq 8(%%ecx), %%mm4\n\t" | 194 " movq 8(%%"REG_c"), %%mm4\n\t" |
194 " movq %%mm0, %%mm3\n\t" | 195 " movq %%mm0, %%mm3\n\t" |
195 " movq %%mm4, %%mm7\n\t" | 196 " movq %%mm4, %%mm7\n\t" |
196 " pswapd 24(%%ecx), %%mm1\n\t" | 197 " pswapd 24(%%"REG_c"), %%mm1\n\t" |
197 " pswapd 16(%%ecx), %%mm5\n\t" | 198 " pswapd 16(%%"REG_c"), %%mm5\n\t" |
198 " pfadd %%mm1, %%mm0\n\t" | 199 " pfadd %%mm1, %%mm0\n\t" |
199 " pfadd %%mm5, %%mm4\n\t" | 200 " pfadd %%mm5, %%mm4\n\t" |
200 " movq %%mm0, (%%edx)\n\t" | 201 " movq %%mm0, (%%"REG_d")\n\t" |
201 " movq %%mm4, 8(%%edx)\n\t" | 202 " movq %%mm4, 8(%%"REG_d")\n\t" |
202 " pfsub %%mm1, %%mm3\n\t" | 203 " pfsub %%mm1, %%mm3\n\t" |
203 " pfsub %%mm5, %%mm7\n\t" | 204 " pfsub %%mm5, %%mm7\n\t" |
204 " pfmul %%mm2, %%mm3\n\t" | 205 " pfmul %%mm2, %%mm3\n\t" |
205 " pfmul %%mm6, %%mm7\n\t" | 206 " pfmul %%mm6, %%mm7\n\t" |
206 " pswapd %%mm3, %%mm3\n\t" | 207 " pswapd %%mm3, %%mm3\n\t" |
207 " pswapd %%mm7, %%mm7\n\t" | 208 " pswapd %%mm7, %%mm7\n\t" |
208 " movq %%mm3, 24(%%edx)\n\t" | 209 " movq %%mm3, 24(%%"REG_d")\n\t" |
209 " movq %%mm7, 16(%%edx)\n\t" | 210 " movq %%mm7, 16(%%"REG_d")\n\t" |
210 | 211 |
211 " movq 32(%%ecx), %%mm0\n\t" | 212 " movq 32(%%"REG_c"), %%mm0\n\t" |
212 " movq 40(%%ecx), %%mm4\n\t" | 213 " movq 40(%%"REG_c"), %%mm4\n\t" |
213 " movq %%mm0, %%mm3\n\t" | 214 " movq %%mm0, %%mm3\n\t" |
214 " movq %%mm4, %%mm7\n\t" | 215 " movq %%mm4, %%mm7\n\t" |
215 " pswapd 56(%%ecx), %%mm1\n\t" | 216 " pswapd 56(%%"REG_c"), %%mm1\n\t" |
216 " pswapd 48(%%ecx), %%mm5\n\t" | 217 " pswapd 48(%%"REG_c"), %%mm5\n\t" |
217 " pfadd %%mm1, %%mm0\n\t" | 218 " pfadd %%mm1, %%mm0\n\t" |
218 " pfadd %%mm5, %%mm4\n\t" | 219 " pfadd %%mm5, %%mm4\n\t" |
219 " movq %%mm0, 32(%%edx)\n\t" | 220 " movq %%mm0, 32(%%"REG_d")\n\t" |
220 " movq %%mm4, 40(%%edx)\n\t" | 221 " movq %%mm4, 40(%%"REG_d")\n\t" |
221 " pfsubr %%mm1, %%mm3\n\t" | 222 " pfsubr %%mm1, %%mm3\n\t" |
222 " pfsubr %%mm5, %%mm7\n\t" | 223 " pfsubr %%mm5, %%mm7\n\t" |
223 " pfmul %%mm2, %%mm3\n\t" | 224 " pfmul %%mm2, %%mm3\n\t" |
224 " pfmul %%mm6, %%mm7\n\t" | 225 " pfmul %%mm6, %%mm7\n\t" |
225 " pswapd %%mm3, %%mm3\n\t" | 226 " pswapd %%mm3, %%mm3\n\t" |
226 " pswapd %%mm7, %%mm7\n\t" | 227 " pswapd %%mm7, %%mm7\n\t" |
227 " movq %%mm3, 56(%%edx)\n\t" | 228 " movq %%mm3, 56(%%"REG_d")\n\t" |
228 " movq %%mm7, 48(%%edx)\n\t" | 229 " movq %%mm7, 48(%%"REG_d")\n\t" |
229 | 230 |
230 " movq 64(%%ecx), %%mm0\n\t" | 231 " movq 64(%%"REG_c"), %%mm0\n\t" |
231 " movq 72(%%ecx), %%mm4\n\t" | 232 " movq 72(%%"REG_c"), %%mm4\n\t" |
232 " movq %%mm0, %%mm3\n\t" | 233 " movq %%mm0, %%mm3\n\t" |
233 " movq %%mm4, %%mm7\n\t" | 234 " movq %%mm4, %%mm7\n\t" |
234 " pswapd 88(%%ecx), %%mm1\n\t" | 235 " pswapd 88(%%"REG_c"), %%mm1\n\t" |
235 " pswapd 80(%%ecx), %%mm5\n\t" | 236 " pswapd 80(%%"REG_c"), %%mm5\n\t" |
236 " pfadd %%mm1, %%mm0\n\t" | 237 " pfadd %%mm1, %%mm0\n\t" |
237 " pfadd %%mm5, %%mm4\n\t" | 238 " pfadd %%mm5, %%mm4\n\t" |
238 " movq %%mm0, 64(%%edx)\n\t" | 239 " movq %%mm0, 64(%%"REG_d")\n\t" |
239 " movq %%mm4, 72(%%edx)\n\t" | 240 " movq %%mm4, 72(%%"REG_d")\n\t" |
240 " pfsub %%mm1, %%mm3\n\t" | 241 " pfsub %%mm1, %%mm3\n\t" |
241 " pfsub %%mm5, %%mm7\n\t" | 242 " pfsub %%mm5, %%mm7\n\t" |
242 " pfmul %%mm2, %%mm3\n\t" | 243 " pfmul %%mm2, %%mm3\n\t" |
243 " pfmul %%mm6, %%mm7\n\t" | 244 " pfmul %%mm6, %%mm7\n\t" |
244 " pswapd %%mm3, %%mm3\n\t" | 245 " pswapd %%mm3, %%mm3\n\t" |
245 " pswapd %%mm7, %%mm7\n\t" | 246 " pswapd %%mm7, %%mm7\n\t" |
246 " movq %%mm3, 88(%%edx)\n\t" | 247 " movq %%mm3, 88(%%"REG_d")\n\t" |
247 " movq %%mm7, 80(%%edx)\n\t" | 248 " movq %%mm7, 80(%%"REG_d")\n\t" |
248 | 249 |
249 " movq 96(%%ecx), %%mm0\n\t" | 250 " movq 96(%%"REG_c"), %%mm0\n\t" |
250 " movq 104(%%ecx), %%mm4\n\t" | 251 " movq 104(%%"REG_c"), %%mm4\n\t" |
251 " movq %%mm0, %%mm3\n\t" | 252 " movq %%mm0, %%mm3\n\t" |
252 " movq %%mm4, %%mm7\n\t" | 253 " movq %%mm4, %%mm7\n\t" |
253 " pswapd 120(%%ecx), %%mm1\n\t" | 254 " pswapd 120(%%"REG_c"), %%mm1\n\t" |
254 " pswapd 112(%%ecx), %%mm5\n\t" | 255 " pswapd 112(%%"REG_c"), %%mm5\n\t" |
255 " pfadd %%mm1, %%mm0\n\t" | 256 " pfadd %%mm1, %%mm0\n\t" |
256 " pfadd %%mm5, %%mm4\n\t" | 257 " pfadd %%mm5, %%mm4\n\t" |
257 " movq %%mm0, 96(%%edx)\n\t" | 258 " movq %%mm0, 96(%%"REG_d")\n\t" |
258 " movq %%mm4, 104(%%edx)\n\t" | 259 " movq %%mm4, 104(%%"REG_d")\n\t" |
259 " pfsubr %%mm1, %%mm3\n\t" | 260 " pfsubr %%mm1, %%mm3\n\t" |
260 " pfsubr %%mm5, %%mm7\n\t" | 261 " pfsubr %%mm5, %%mm7\n\t" |
261 " pfmul %%mm2, %%mm3\n\t" | 262 " pfmul %%mm2, %%mm3\n\t" |
262 " pfmul %%mm6, %%mm7\n\t" | 263 " pfmul %%mm6, %%mm7\n\t" |
263 " pswapd %%mm3, %%mm3\n\t" | 264 " pswapd %%mm3, %%mm3\n\t" |
264 " pswapd %%mm7, %%mm7\n\t" | 265 " pswapd %%mm7, %%mm7\n\t" |
265 " movq %%mm3, 120(%%edx)\n\t" | 266 " movq %%mm3, 120(%%"REG_d")\n\t" |
266 " movq %%mm7, 112(%%edx)\n\t" | 267 " movq %%mm7, 112(%%"REG_d")\n\t" |
267 | 268 |
268 /* Phase 5 */ | 269 /* Phase 5 */ |
269 | 270 |
270 " movq 112(%%ebx), %%mm2\n\t" | 271 " movq 112(%%"REG_b"), %%mm2\n\t" |
271 | 272 |
272 " movq (%%edx), %%mm0\n\t" | 273 " movq (%%"REG_d"), %%mm0\n\t" |
273 " movq 16(%%edx), %%mm4\n\t" | 274 " movq 16(%%"REG_d"), %%mm4\n\t" |
274 " movq %%mm0, %%mm3\n\t" | 275 " movq %%mm0, %%mm3\n\t" |
275 " movq %%mm4, %%mm7\n\t" | 276 " movq %%mm4, %%mm7\n\t" |
276 " pswapd 8(%%edx), %%mm1\n\t" | 277 " pswapd 8(%%"REG_d"), %%mm1\n\t" |
277 " pswapd 24(%%edx), %%mm5\n\t" | 278 " pswapd 24(%%"REG_d"), %%mm5\n\t" |
278 " pfadd %%mm1, %%mm0\n\t" | 279 " pfadd %%mm1, %%mm0\n\t" |
279 " pfadd %%mm5, %%mm4\n\t" | 280 " pfadd %%mm5, %%mm4\n\t" |
280 " movq %%mm0, (%%ecx)\n\t" | 281 " movq %%mm0, (%%"REG_c")\n\t" |
281 " movq %%mm4, 16(%%ecx)\n\t" | 282 " movq %%mm4, 16(%%"REG_c")\n\t" |
282 " pfsub %%mm1, %%mm3\n\t" | 283 " pfsub %%mm1, %%mm3\n\t" |
283 " pfsubr %%mm5, %%mm7\n\t" | 284 " pfsubr %%mm5, %%mm7\n\t" |
284 " pfmul %%mm2, %%mm3\n\t" | 285 " pfmul %%mm2, %%mm3\n\t" |
285 " pfmul %%mm2, %%mm7\n\t" | 286 " pfmul %%mm2, %%mm7\n\t" |
286 " pswapd %%mm3, %%mm3\n\t" | 287 " pswapd %%mm3, %%mm3\n\t" |
287 " pswapd %%mm7, %%mm7\n\t" | 288 " pswapd %%mm7, %%mm7\n\t" |
288 " movq %%mm3, 8(%%ecx)\n\t" | 289 " movq %%mm3, 8(%%"REG_c")\n\t" |
289 " movq %%mm7, 24(%%ecx)\n\t" | 290 " movq %%mm7, 24(%%"REG_c")\n\t" |
290 | 291 |
291 " movq 32(%%edx), %%mm0\n\t" | 292 " movq 32(%%"REG_d"), %%mm0\n\t" |
292 " movq 48(%%edx), %%mm4\n\t" | 293 " movq 48(%%"REG_d"), %%mm4\n\t" |
293 " movq %%mm0, %%mm3\n\t" | 294 " movq %%mm0, %%mm3\n\t" |
294 " movq %%mm4, %%mm7\n\t" | 295 " movq %%mm4, %%mm7\n\t" |
295 " pswapd 40(%%edx), %%mm1\n\t" | 296 " pswapd 40(%%"REG_d"), %%mm1\n\t" |
296 " pswapd 56(%%edx), %%mm5\n\t" | 297 " pswapd 56(%%"REG_d"), %%mm5\n\t" |
297 " pfadd %%mm1, %%mm0\n\t" | 298 " pfadd %%mm1, %%mm0\n\t" |
298 " pfadd %%mm5, %%mm4\n\t" | 299 " pfadd %%mm5, %%mm4\n\t" |
299 " movq %%mm0, 32(%%ecx)\n\t" | 300 " movq %%mm0, 32(%%"REG_c")\n\t" |
300 " movq %%mm4, 48(%%ecx)\n\t" | 301 " movq %%mm4, 48(%%"REG_c")\n\t" |
301 " pfsub %%mm1, %%mm3\n\t" | 302 " pfsub %%mm1, %%mm3\n\t" |
302 " pfsubr %%mm5, %%mm7\n\t" | 303 " pfsubr %%mm5, %%mm7\n\t" |
303 " pfmul %%mm2, %%mm3\n\t" | 304 " pfmul %%mm2, %%mm3\n\t" |
304 " pfmul %%mm2, %%mm7\n\t" | 305 " pfmul %%mm2, %%mm7\n\t" |
305 " pswapd %%mm3, %%mm3\n\t" | 306 " pswapd %%mm3, %%mm3\n\t" |
306 " pswapd %%mm7, %%mm7\n\t" | 307 " pswapd %%mm7, %%mm7\n\t" |
307 " movq %%mm3, 40(%%ecx)\n\t" | 308 " movq %%mm3, 40(%%"REG_c")\n\t" |
308 " movq %%mm7, 56(%%ecx)\n\t" | 309 " movq %%mm7, 56(%%"REG_c")\n\t" |
309 | 310 |
310 " movq 64(%%edx), %%mm0\n\t" | 311 " movq 64(%%"REG_d"), %%mm0\n\t" |
311 " movq 80(%%edx), %%mm4\n\t" | 312 " movq 80(%%"REG_d"), %%mm4\n\t" |
312 " movq %%mm0, %%mm3\n\t" | 313 " movq %%mm0, %%mm3\n\t" |
313 " movq %%mm4, %%mm7\n\t" | 314 " movq %%mm4, %%mm7\n\t" |
314 " pswapd 72(%%edx), %%mm1\n\t" | 315 " pswapd 72(%%"REG_d"), %%mm1\n\t" |
315 " pswapd 88(%%edx), %%mm5\n\t" | 316 " pswapd 88(%%"REG_d"), %%mm5\n\t" |
316 " pfadd %%mm1, %%mm0\n\t" | 317 " pfadd %%mm1, %%mm0\n\t" |
317 " pfadd %%mm5, %%mm4\n\t" | 318 " pfadd %%mm5, %%mm4\n\t" |
318 " movq %%mm0, 64(%%ecx)\n\t" | 319 " movq %%mm0, 64(%%"REG_c")\n\t" |
319 " movq %%mm4, 80(%%ecx)\n\t" | 320 " movq %%mm4, 80(%%"REG_c")\n\t" |
320 " pfsub %%mm1, %%mm3\n\t" | 321 " pfsub %%mm1, %%mm3\n\t" |
321 " pfsubr %%mm5, %%mm7\n\t" | 322 " pfsubr %%mm5, %%mm7\n\t" |
322 " pfmul %%mm2, %%mm3\n\t" | 323 " pfmul %%mm2, %%mm3\n\t" |
323 " pfmul %%mm2, %%mm7\n\t" | 324 " pfmul %%mm2, %%mm7\n\t" |
324 " pswapd %%mm3, %%mm3\n\t" | 325 " pswapd %%mm3, %%mm3\n\t" |
325 " pswapd %%mm7, %%mm7\n\t" | 326 " pswapd %%mm7, %%mm7\n\t" |
326 " movq %%mm3, 72(%%ecx)\n\t" | 327 " movq %%mm3, 72(%%"REG_c")\n\t" |
327 " movq %%mm7, 88(%%ecx)\n\t" | 328 " movq %%mm7, 88(%%"REG_c")\n\t" |
328 | 329 |
329 " movq 96(%%edx), %%mm0\n\t" | 330 " movq 96(%%"REG_d"), %%mm0\n\t" |
330 " movq 112(%%edx), %%mm4\n\t" | 331 " movq 112(%%"REG_d"), %%mm4\n\t" |
331 " movq %%mm0, %%mm3\n\t" | 332 " movq %%mm0, %%mm3\n\t" |
332 " movq %%mm4, %%mm7\n\t" | 333 " movq %%mm4, %%mm7\n\t" |
333 " pswapd 104(%%edx), %%mm1\n\t" | 334 " pswapd 104(%%"REG_d"), %%mm1\n\t" |
334 " pswapd 120(%%edx), %%mm5\n\t" | 335 " pswapd 120(%%"REG_d"), %%mm5\n\t" |
335 " pfadd %%mm1, %%mm0\n\t" | 336 " pfadd %%mm1, %%mm0\n\t" |
336 " pfadd %%mm5, %%mm4\n\t" | 337 " pfadd %%mm5, %%mm4\n\t" |
337 " movq %%mm0, 96(%%ecx)\n\t" | 338 " movq %%mm0, 96(%%"REG_c")\n\t" |
338 " movq %%mm4, 112(%%ecx)\n\t" | 339 " movq %%mm4, 112(%%"REG_c")\n\t" |
339 " pfsub %%mm1, %%mm3\n\t" | 340 " pfsub %%mm1, %%mm3\n\t" |
340 " pfsubr %%mm5, %%mm7\n\t" | 341 " pfsubr %%mm5, %%mm7\n\t" |
341 " pfmul %%mm2, %%mm3\n\t" | 342 " pfmul %%mm2, %%mm3\n\t" |
342 " pfmul %%mm2, %%mm7\n\t" | 343 " pfmul %%mm2, %%mm7\n\t" |
343 " pswapd %%mm3, %%mm3\n\t" | 344 " pswapd %%mm3, %%mm3\n\t" |
344 " pswapd %%mm7, %%mm7\n\t" | 345 " pswapd %%mm7, %%mm7\n\t" |
345 " movq %%mm3, 104(%%ecx)\n\t" | 346 " movq %%mm3, 104(%%"REG_c")\n\t" |
346 " movq %%mm7, 120(%%ecx)\n\t" | 347 " movq %%mm7, 120(%%"REG_c")\n\t" |
347 | 348 |
348 | 349 |
349 /* Phase 6. This is the end of easy road. */ | 350 /* Phase 6. This is the end of easy road. */ |
350 /* Code below is coded in scalar mode. Should be optimized */ | 351 /* Code below is coded in scalar mode. Should be optimized */ |
351 | 352 |
352 " movd "MANGLE(plus_1f)", %%mm6\n\t" | 353 " movd "MANGLE(plus_1f)", %%mm6\n\t" |
353 " punpckldq 120(%%ebx), %%mm6\n\t" /* mm6 = 1.0 | 120(%%ebx)*/ | 354 " punpckldq 120(%%"REG_b"), %%mm6\n\t" /* mm6 = 1.0 | 120(%%"REG_b")*/ |
354 " movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */ | 355 " movq "MANGLE(x_plus_minus_3dnow)", %%mm7\n\t" /* mm7 = +1 | -1 */ |
355 | 356 |
356 " movq 32(%%ecx), %%mm0\n\t" | 357 " movq 32(%%"REG_c"), %%mm0\n\t" |
357 " movq 64(%%ecx), %%mm2\n\t" | 358 " movq 64(%%"REG_c"), %%mm2\n\t" |
358 " movq %%mm0, %%mm1\n\t" | 359 " movq %%mm0, %%mm1\n\t" |
359 " movq %%mm2, %%mm3\n\t" | 360 " movq %%mm2, %%mm3\n\t" |
360 " pxor %%mm7, %%mm1\n\t" | 361 " pxor %%mm7, %%mm1\n\t" |
361 " pxor %%mm7, %%mm3\n\t" | 362 " pxor %%mm7, %%mm3\n\t" |
362 " pfacc %%mm1, %%mm0\n\t" | 363 " pfacc %%mm1, %%mm0\n\t" |
363 " pfacc %%mm3, %%mm2\n\t" | 364 " pfacc %%mm3, %%mm2\n\t" |
364 " pfmul %%mm6, %%mm0\n\t" | 365 " pfmul %%mm6, %%mm0\n\t" |
365 " pfmul %%mm6, %%mm2\n\t" | 366 " pfmul %%mm6, %%mm2\n\t" |
366 " movq %%mm0, 32(%%edx)\n\t" | 367 " movq %%mm0, 32(%%"REG_d")\n\t" |
367 " movq %%mm2, 64(%%edx)\n\t" | 368 " movq %%mm2, 64(%%"REG_d")\n\t" |
368 | 369 |
369 " movd 44(%%ecx), %%mm0\n\t" | 370 " movd 44(%%"REG_c"), %%mm0\n\t" |
370 " movd 40(%%ecx), %%mm2\n\t" | 371 " movd 40(%%"REG_c"), %%mm2\n\t" |
371 " movd 120(%%ebx), %%mm3\n\t" | 372 " movd 120(%%"REG_b"), %%mm3\n\t" |
372 " punpckldq 76(%%ecx), %%mm0\n\t" | 373 " punpckldq 76(%%"REG_c"), %%mm0\n\t" |
373 " punpckldq 72(%%ecx), %%mm2\n\t" | 374 " punpckldq 72(%%"REG_c"), %%mm2\n\t" |
374 " punpckldq %%mm3, %%mm3\n\t" | 375 " punpckldq %%mm3, %%mm3\n\t" |
375 " movq %%mm0, %%mm4\n\t" | 376 " movq %%mm0, %%mm4\n\t" |
376 " movq %%mm2, %%mm5\n\t" | 377 " movq %%mm2, %%mm5\n\t" |
377 " pfsub %%mm2, %%mm0\n\t" | 378 " pfsub %%mm2, %%mm0\n\t" |
378 " pfmul %%mm3, %%mm0\n\t" | 379 " pfmul %%mm3, %%mm0\n\t" |
380 " pfadd %%mm5, %%mm0\n\t" | 381 " pfadd %%mm5, %%mm0\n\t" |
381 " pfadd %%mm4, %%mm0\n\t" | 382 " pfadd %%mm4, %%mm0\n\t" |
382 " movq %%mm0, %%mm2\n\t" | 383 " movq %%mm0, %%mm2\n\t" |
383 " punpckldq %%mm1, %%mm0\n\t" | 384 " punpckldq %%mm1, %%mm0\n\t" |
384 " punpckhdq %%mm1, %%mm2\n\t" | 385 " punpckhdq %%mm1, %%mm2\n\t" |
385 " movq %%mm0, 40(%%edx)\n\t" | 386 " movq %%mm0, 40(%%"REG_d")\n\t" |
386 " movq %%mm2, 72(%%edx)\n\t" | 387 " movq %%mm2, 72(%%"REG_d")\n\t" |
387 | 388 |
388 " movd 48(%%ecx), %%mm3\n\t" | 389 " movd 48(%%"REG_c"), %%mm3\n\t" |
389 " movd 60(%%ecx), %%mm2\n\t" | 390 " movd 60(%%"REG_c"), %%mm2\n\t" |
390 " pfsub 52(%%ecx), %%mm3\n\t" | 391 " pfsub 52(%%"REG_c"), %%mm3\n\t" |
391 " pfsub 56(%%ecx), %%mm2\n\t" | 392 " pfsub 56(%%"REG_c"), %%mm2\n\t" |
392 " pfmul 120(%%ebx), %%mm3\n\t" | 393 " pfmul 120(%%"REG_b"), %%mm3\n\t" |
393 " pfmul 120(%%ebx), %%mm2\n\t" | 394 " pfmul 120(%%"REG_b"), %%mm2\n\t" |
394 " movq %%mm2, %%mm1\n\t" | 395 " movq %%mm2, %%mm1\n\t" |
395 | 396 |
396 " pfadd 56(%%ecx), %%mm1\n\t" | 397 " pfadd 56(%%"REG_c"), %%mm1\n\t" |
397 " pfadd 60(%%ecx), %%mm1\n\t" | 398 " pfadd 60(%%"REG_c"), %%mm1\n\t" |
398 " movq %%mm1, %%mm0\n\t" | 399 " movq %%mm1, %%mm0\n\t" |
399 | 400 |
400 " pfadd 48(%%ecx), %%mm0\n\t" | 401 " pfadd 48(%%"REG_c"), %%mm0\n\t" |
401 " pfadd 52(%%ecx), %%mm0\n\t" | 402 " pfadd 52(%%"REG_c"), %%mm0\n\t" |
402 " pfadd %%mm3, %%mm1\n\t" | 403 " pfadd %%mm3, %%mm1\n\t" |
403 " punpckldq %%mm2, %%mm1\n\t" | 404 " punpckldq %%mm2, %%mm1\n\t" |
404 " pfadd %%mm3, %%mm2\n\t" | 405 " pfadd %%mm3, %%mm2\n\t" |
405 " punpckldq %%mm2, %%mm0\n\t" | 406 " punpckldq %%mm2, %%mm0\n\t" |
406 " movq %%mm1, 56(%%edx)\n\t" | 407 " movq %%mm1, 56(%%"REG_d")\n\t" |
407 " movq %%mm0, 48(%%edx)\n\t" | 408 " movq %%mm0, 48(%%"REG_d")\n\t" |
408 | 409 |
409 /*---*/ | 410 /*---*/ |
410 | 411 |
411 " movd 92(%%ecx), %%mm1\n\t" | 412 " movd 92(%%"REG_c"), %%mm1\n\t" |
412 " pfsub 88(%%ecx), %%mm1\n\t" | 413 " pfsub 88(%%"REG_c"), %%mm1\n\t" |
413 " pfmul 120(%%ebx), %%mm1\n\t" | 414 " pfmul 120(%%"REG_b"), %%mm1\n\t" |
414 " movd %%mm1, 92(%%edx)\n\t" | 415 " movd %%mm1, 92(%%"REG_d")\n\t" |
415 " pfadd 92(%%ecx), %%mm1\n\t" | 416 " pfadd 92(%%"REG_c"), %%mm1\n\t" |
416 " pfadd 88(%%ecx), %%mm1\n\t" | 417 " pfadd 88(%%"REG_c"), %%mm1\n\t" |
417 " movq %%mm1, %%mm0\n\t" | 418 " movq %%mm1, %%mm0\n\t" |
418 | 419 |
419 " pfadd 80(%%ecx), %%mm0\n\t" | 420 " pfadd 80(%%"REG_c"), %%mm0\n\t" |
420 " pfadd 84(%%ecx), %%mm0\n\t" | 421 " pfadd 84(%%"REG_c"), %%mm0\n\t" |
421 " movd %%mm0, 80(%%edx)\n\t" | 422 " movd %%mm0, 80(%%"REG_d")\n\t" |
422 | 423 |
423 " movd 80(%%ecx), %%mm0\n\t" | 424 " movd 80(%%"REG_c"), %%mm0\n\t" |
424 " pfsub 84(%%ecx), %%mm0\n\t" | 425 " pfsub 84(%%"REG_c"), %%mm0\n\t" |
425 " pfmul 120(%%ebx), %%mm0\n\t" | 426 " pfmul 120(%%"REG_b"), %%mm0\n\t" |
426 " pfadd %%mm0, %%mm1\n\t" | 427 " pfadd %%mm0, %%mm1\n\t" |
427 " pfadd 92(%%edx), %%mm0\n\t" | 428 " pfadd 92(%%"REG_d"), %%mm0\n\t" |
428 " punpckldq %%mm1, %%mm0\n\t" | 429 " punpckldq %%mm1, %%mm0\n\t" |
429 " movq %%mm0, 84(%%edx)\n\t" | 430 " movq %%mm0, 84(%%"REG_d")\n\t" |
430 | 431 |
431 " movq 96(%%ecx), %%mm0\n\t" | 432 " movq 96(%%"REG_c"), %%mm0\n\t" |
432 " movq %%mm0, %%mm1\n\t" | 433 " movq %%mm0, %%mm1\n\t" |
433 " pxor %%mm7, %%mm1\n\t" | 434 " pxor %%mm7, %%mm1\n\t" |
434 " pfacc %%mm1, %%mm0\n\t" | 435 " pfacc %%mm1, %%mm0\n\t" |
435 " pfmul %%mm6, %%mm0\n\t" | 436 " pfmul %%mm6, %%mm0\n\t" |
436 " movq %%mm0, 96(%%edx)\n\t" | 437 " movq %%mm0, 96(%%"REG_d")\n\t" |
437 | 438 |
438 " movd 108(%%ecx), %%mm0\n\t" | 439 " movd 108(%%"REG_c"), %%mm0\n\t" |
439 " pfsub 104(%%ecx), %%mm0\n\t" | 440 " pfsub 104(%%"REG_c"), %%mm0\n\t" |
440 " pfmul 120(%%ebx), %%mm0\n\t" | 441 " pfmul 120(%%"REG_b"), %%mm0\n\t" |
441 " movd %%mm0, 108(%%edx)\n\t" | 442 " movd %%mm0, 108(%%"REG_d")\n\t" |
442 " pfadd 104(%%ecx), %%mm0\n\t" | 443 " pfadd 104(%%"REG_c"), %%mm0\n\t" |
443 " pfadd 108(%%ecx), %%mm0\n\t" | 444 " pfadd 108(%%"REG_c"), %%mm0\n\t" |
444 " movd %%mm0, 104(%%edx)\n\t" | 445 " movd %%mm0, 104(%%"REG_d")\n\t" |
445 | 446 |
446 " movd 124(%%ecx), %%mm1\n\t" | 447 " movd 124(%%"REG_c"), %%mm1\n\t" |
447 " pfsub 120(%%ecx), %%mm1\n\t" | 448 " pfsub 120(%%"REG_c"), %%mm1\n\t" |
448 " pfmul 120(%%ebx), %%mm1\n\t" | 449 " pfmul 120(%%"REG_b"), %%mm1\n\t" |
449 " movd %%mm1, 124(%%edx)\n\t" | 450 " movd %%mm1, 124(%%"REG_d")\n\t" |
450 " pfadd 120(%%ecx), %%mm1\n\t" | 451 " pfadd 120(%%"REG_c"), %%mm1\n\t" |
451 " pfadd 124(%%ecx), %%mm1\n\t" | 452 " pfadd 124(%%"REG_c"), %%mm1\n\t" |
452 " movq %%mm1, %%mm0\n\t" | 453 " movq %%mm1, %%mm0\n\t" |
453 | 454 |
454 " pfadd 112(%%ecx), %%mm0\n\t" | 455 " pfadd 112(%%"REG_c"), %%mm0\n\t" |
455 " pfadd 116(%%ecx), %%mm0\n\t" | 456 " pfadd 116(%%"REG_c"), %%mm0\n\t" |
456 " movd %%mm0, 112(%%edx)\n\t" | 457 " movd %%mm0, 112(%%"REG_d")\n\t" |
457 | 458 |
458 " movd 112(%%ecx), %%mm0\n\t" | 459 " movd 112(%%"REG_c"), %%mm0\n\t" |
459 " pfsub 116(%%ecx), %%mm0\n\t" | 460 " pfsub 116(%%"REG_c"), %%mm0\n\t" |
460 " pfmul 120(%%ebx), %%mm0\n\t" | 461 " pfmul 120(%%"REG_b"), %%mm0\n\t" |
461 " pfadd %%mm0,%%mm1\n\t" | 462 " pfadd %%mm0,%%mm1\n\t" |
462 " pfadd 124(%%edx), %%mm0\n\t" | 463 " pfadd 124(%%"REG_d"), %%mm0\n\t" |
463 " punpckldq %%mm1, %%mm0\n\t" | 464 " punpckldq %%mm1, %%mm0\n\t" |
464 " movq %%mm0, 116(%%edx)\n\t" | 465 " movq %%mm0, 116(%%"REG_d")\n\t" |
465 | 466 |
466 // this code is broken, there is nothing modifying the z flag above. | 467 // this code is broken, there is nothing modifying the z flag above. |
467 #if 0 | 468 #if 0 |
468 " jnz .L01\n\t" | 469 " jnz .L01\n\t" |
469 | 470 |
470 /* Phase 7*/ | 471 /* Phase 7*/ |
471 /* Code below is coded in scalar mode. Should be optimized */ | 472 /* Code below is coded in scalar mode. Should be optimized */ |
472 | 473 |
473 " movd (%%ecx), %%mm0\n\t" | 474 " movd (%%"REG_c"), %%mm0\n\t" |
474 " pfadd 4(%%ecx), %%mm0\n\t" | 475 " pfadd 4(%%"REG_c"), %%mm0\n\t" |
475 " movd %%mm0, 1024(%%esi)\n\t" | 476 " movd %%mm0, 1024(%%"REG_S")\n\t" |
476 | 477 |
477 " movd (%%ecx), %%mm0\n\t" | 478 " movd (%%"REG_c"), %%mm0\n\t" |
478 " pfsub 4(%%ecx), %%mm0\n\t" | 479 " pfsub 4(%%"REG_c"), %%mm0\n\t" |
479 " pfmul 120(%%ebx), %%mm0\n\t" | 480 " pfmul 120(%%"REG_b"), %%mm0\n\t" |
480 " movd %%mm0, (%%esi)\n\t" | 481 " movd %%mm0, (%%"REG_S")\n\t" |
481 " movd %%mm0, (%%edi)\n\t" | 482 " movd %%mm0, (%%"REG_D")\n\t" |
482 | 483 |
483 " movd 12(%%ecx), %%mm0\n\t" | 484 " movd 12(%%"REG_c"), %%mm0\n\t" |
484 " pfsub 8(%%ecx), %%mm0\n\t" | 485 " pfsub 8(%%"REG_c"), %%mm0\n\t" |
485 " pfmul 120(%%ebx), %%mm0\n\t" | 486 " pfmul 120(%%"REG_b"), %%mm0\n\t" |
486 " movd %%mm0, 512(%%edi)\n\t" | 487 " movd %%mm0, 512(%%"REG_D")\n\t" |
487 " pfadd 12(%%ecx), %%mm0\n\t" | 488 " pfadd 12(%%"REG_c"), %%mm0\n\t" |
488 " pfadd 8(%%ecx), %%mm0\n\t" | 489 " pfadd 8(%%"REG_c"), %%mm0\n\t" |
489 " movd %%mm0, 512(%%esi)\n\t" | 490 " movd %%mm0, 512(%%"REG_S")\n\t" |
490 | 491 |
491 " movd 16(%%ecx), %%mm0\n\t" | 492 " movd 16(%%"REG_c"), %%mm0\n\t" |
492 " pfsub 20(%%ecx), %%mm0\n\t" | 493 " pfsub 20(%%"REG_c"), %%mm0\n\t" |
493 " pfmul 120(%%ebx), %%mm0\n\t" | 494 " pfmul 120(%%"REG_b"), %%mm0\n\t" |
494 " movq %%mm0, %%mm3\n\t" | 495 " movq %%mm0, %%mm3\n\t" |
495 | 496 |
496 " movd 28(%%ecx), %%mm0\n\t" | 497 " movd 28(%%"REG_c"), %%mm0\n\t" |
497 " pfsub 24(%%ecx), %%mm0\n\t" | 498 " pfsub 24(%%"REG_c"), %%mm0\n\t" |
498 " pfmul 120(%%ebx), %%mm0\n\t" | 499 " pfmul 120(%%"REG_b"), %%mm0\n\t" |
499 " movd %%mm0, 768(%%edi)\n\t" | 500 " movd %%mm0, 768(%%"REG_D")\n\t" |
500 " movq %%mm0, %%mm2\n\t" | 501 " movq %%mm0, %%mm2\n\t" |
501 | 502 |
502 " pfadd 24(%%ecx), %%mm0\n\t" | 503 " pfadd 24(%%"REG_c"), %%mm0\n\t" |
503 " pfadd 28(%%ecx), %%mm0\n\t" | 504 " pfadd 28(%%"REG_c"), %%mm0\n\t" |
504 " movq %%mm0, %%mm1\n\t" | 505 " movq %%mm0, %%mm1\n\t" |
505 | 506 |
506 " pfadd 16(%%ecx), %%mm0\n\t" | 507 " pfadd 16(%%"REG_c"), %%mm0\n\t" |
507 " pfadd 20(%%ecx), %%mm0\n\t" | 508 " pfadd 20(%%"REG_c"), %%mm0\n\t" |
508 " movd %%mm0, 768(%%esi)\n\t" | 509 " movd %%mm0, 768(%%"REG_S")\n\t" |
509 " pfadd %%mm3, %%mm1\n\t" | 510 " pfadd %%mm3, %%mm1\n\t" |
510 " movd %%mm1, 256(%%esi)\n\t" | 511 " movd %%mm1, 256(%%"REG_S")\n\t" |
511 " pfadd %%mm3, %%mm2\n\t" | 512 " pfadd %%mm3, %%mm2\n\t" |
512 " movd %%mm2, 256(%%edi)\n\t" | 513 " movd %%mm2, 256(%%"REG_D")\n\t" |
513 | 514 |
514 /* Phase 8*/ | 515 /* Phase 8*/ |
515 | 516 |
516 " movq 32(%%edx), %%mm0\n\t" | 517 " movq 32(%%"REG_d"), %%mm0\n\t" |
517 " movq 48(%%edx), %%mm1\n\t" | 518 " movq 48(%%"REG_d"), %%mm1\n\t" |
518 " pfadd 48(%%edx), %%mm0\n\t" | 519 " pfadd 48(%%"REG_d"), %%mm0\n\t" |
519 " pfadd 40(%%edx), %%mm1\n\t" | 520 " pfadd 40(%%"REG_d"), %%mm1\n\t" |
520 " movd %%mm0, 896(%%esi)\n\t" | 521 " movd %%mm0, 896(%%"REG_S")\n\t" |
521 " movd %%mm1, 640(%%esi)\n\t" | 522 " movd %%mm1, 640(%%"REG_S")\n\t" |
522 " psrlq $32, %%mm0\n\t" | 523 " psrlq $32, %%mm0\n\t" |
523 " psrlq $32, %%mm1\n\t" | 524 " psrlq $32, %%mm1\n\t" |
524 " movd %%mm0, 128(%%edi)\n\t" | 525 " movd %%mm0, 128(%%"REG_D")\n\t" |
525 " movd %%mm1, 384(%%edi)\n\t" | 526 " movd %%mm1, 384(%%"REG_D")\n\t" |
526 | 527 |
527 " movd 40(%%edx), %%mm0\n\t" | 528 " movd 40(%%"REG_d"), %%mm0\n\t" |
528 " pfadd 56(%%edx), %%mm0\n\t" | 529 " pfadd 56(%%"REG_d"), %%mm0\n\t" |
529 " movd %%mm0, 384(%%esi)\n\t" | 530 " movd %%mm0, 384(%%"REG_S")\n\t" |
530 | 531 |
531 " movd 56(%%edx), %%mm0\n\t" | 532 " movd 56(%%"REG_d"), %%mm0\n\t" |
532 " pfadd 36(%%edx), %%mm0\n\t" | 533 " pfadd 36(%%"REG_d"), %%mm0\n\t" |
533 " movd %%mm0, 128(%%esi)\n\t" | 534 " movd %%mm0, 128(%%"REG_S")\n\t" |
534 | 535 |
535 " movd 60(%%edx), %%mm0\n\t" | 536 " movd 60(%%"REG_d"), %%mm0\n\t" |
536 " movd %%mm0, 896(%%edi)\n\t" | 537 " movd %%mm0, 896(%%"REG_D")\n\t" |
537 " pfadd 44(%%edx), %%mm0\n\t" | 538 " pfadd 44(%%"REG_d"), %%mm0\n\t" |
538 " movd %%mm0, 640(%%edi)\n\t" | 539 " movd %%mm0, 640(%%"REG_D")\n\t" |
539 | 540 |
540 " movq 96(%%edx), %%mm0\n\t" | 541 " movq 96(%%"REG_d"), %%mm0\n\t" |
541 " movq 112(%%edx), %%mm2\n\t" | 542 " movq 112(%%"REG_d"), %%mm2\n\t" |
542 " movq 104(%%edx), %%mm4\n\t" | 543 " movq 104(%%"REG_d"), %%mm4\n\t" |
543 " pfadd 112(%%edx), %%mm0\n\t" | 544 " pfadd 112(%%"REG_d"), %%mm0\n\t" |
544 " pfadd 104(%%edx), %%mm2\n\t" | 545 " pfadd 104(%%"REG_d"), %%mm2\n\t" |
545 " pfadd 120(%%edx), %%mm4\n\t" | 546 " pfadd 120(%%"REG_d"), %%mm4\n\t" |
546 " movq %%mm0, %%mm1\n\t" | 547 " movq %%mm0, %%mm1\n\t" |
547 " movq %%mm2, %%mm3\n\t" | 548 " movq %%mm2, %%mm3\n\t" |
548 " movq %%mm4, %%mm5\n\t" | 549 " movq %%mm4, %%mm5\n\t" |
549 " pfadd 64(%%edx), %%mm0\n\t" | 550 " pfadd 64(%%"REG_d"), %%mm0\n\t" |
550 " pfadd 80(%%edx), %%mm2\n\t" | 551 " pfadd 80(%%"REG_d"), %%mm2\n\t" |
551 " pfadd 72(%%edx), %%mm4\n\t" | 552 " pfadd 72(%%"REG_d"), %%mm4\n\t" |
552 " movd %%mm0, 960(%%esi)\n\t" | 553 " movd %%mm0, 960(%%"REG_S")\n\t" |
553 " movd %%mm2, 704(%%esi)\n\t" | 554 " movd %%mm2, 704(%%"REG_S")\n\t" |
554 " movd %%mm4, 448(%%esi)\n\t" | 555 " movd %%mm4, 448(%%"REG_S")\n\t" |
555 " psrlq $32, %%mm0\n\t" | 556 " psrlq $32, %%mm0\n\t" |
556 " psrlq $32, %%mm2\n\t" | 557 " psrlq $32, %%mm2\n\t" |
557 " psrlq $32, %%mm4\n\t" | 558 " psrlq $32, %%mm4\n\t" |
558 " movd %%mm0, 64(%%edi)\n\t" | 559 " movd %%mm0, 64(%%"REG_D")\n\t" |
559 " movd %%mm2, 320(%%edi)\n\t" | 560 " movd %%mm2, 320(%%"REG_D")\n\t" |
560 " movd %%mm4, 576(%%edi)\n\t" | 561 " movd %%mm4, 576(%%"REG_D")\n\t" |
561 " pfadd 80(%%edx), %%mm1\n\t" | 562 " pfadd 80(%%"REG_d"), %%mm1\n\t" |
562 " pfadd 72(%%edx), %%mm3\n\t" | 563 " pfadd 72(%%"REG_d"), %%mm3\n\t" |
563 " pfadd 88(%%edx), %%mm5\n\t" | 564 " pfadd 88(%%"REG_d"), %%mm5\n\t" |
564 " movd %%mm1, 832(%%esi)\n\t" | 565 " movd %%mm1, 832(%%"REG_S")\n\t" |
565 " movd %%mm3, 576(%%esi)\n\t" | 566 " movd %%mm3, 576(%%"REG_S")\n\t" |
566 " movd %%mm5, 320(%%esi)\n\t" | 567 " movd %%mm5, 320(%%"REG_S")\n\t" |
567 " psrlq $32, %%mm1\n\t" | 568 " psrlq $32, %%mm1\n\t" |
568 " psrlq $32, %%mm3\n\t" | 569 " psrlq $32, %%mm3\n\t" |
569 " psrlq $32, %%mm5\n\t" | 570 " psrlq $32, %%mm5\n\t" |
570 " movd %%mm1, 192(%%edi)\n\t" | 571 " movd %%mm1, 192(%%"REG_D")\n\t" |
571 " movd %%mm3, 448(%%edi)\n\t" | 572 " movd %%mm3, 448(%%"REG_D")\n\t" |
572 " movd %%mm5, 704(%%edi)\n\t" | 573 " movd %%mm5, 704(%%"REG_D")\n\t" |
573 | 574 |
574 " movd 120(%%edx), %%mm0\n\t" | 575 " movd 120(%%"REG_d"), %%mm0\n\t" |
575 " pfadd 100(%%edx), %%mm0\n\t" | 576 " pfadd 100(%%"REG_d"), %%mm0\n\t" |
576 " movq %%mm0, %%mm1\n\t" | 577 " movq %%mm0, %%mm1\n\t" |
577 " pfadd 88(%%edx), %%mm0\n\t" | 578 " pfadd 88(%%"REG_d"), %%mm0\n\t" |
578 " movd %%mm0, 192(%%esi)\n\t" | 579 " movd %%mm0, 192(%%"REG_S")\n\t" |
579 " pfadd 68(%%edx), %%mm1\n\t" | 580 " pfadd 68(%%"REG_d"), %%mm1\n\t" |
580 " movd %%mm1, 64(%%esi)\n\t" | 581 " movd %%mm1, 64(%%"REG_S")\n\t" |
581 | 582 |
582 " movd 124(%%edx), %%mm0\n\t" | 583 " movd 124(%%"REG_d"), %%mm0\n\t" |
583 " movd %%mm0, 960(%%edi)\n\t" | 584 " movd %%mm0, 960(%%"REG_D")\n\t" |
584 " pfadd 92(%%edx), %%mm0\n\t" | 585 " pfadd 92(%%"REG_d"), %%mm0\n\t" |
585 " movd %%mm0, 832(%%edi)\n\t" | 586 " movd %%mm0, 832(%%"REG_D")\n\t" |
586 | 587 |
587 " jmp .L_bye\n\t" | 588 " jmp .L_bye\n\t" |
588 ".L01: \n\t" | 589 ".L01: \n\t" |
589 #endif | 590 #endif |
590 /* Phase 9*/ | 591 /* Phase 9*/ |
591 | 592 |
592 " movq (%%ecx), %%mm0\n\t" | 593 " movq (%%"REG_c"), %%mm0\n\t" |
593 " movq %%mm0, %%mm1\n\t" | 594 " movq %%mm0, %%mm1\n\t" |
594 " pxor %%mm7, %%mm1\n\t" | 595 " pxor %%mm7, %%mm1\n\t" |
595 " pfacc %%mm1, %%mm0\n\t" | 596 " pfacc %%mm1, %%mm0\n\t" |
596 " pfmul %%mm6, %%mm0\n\t" | 597 " pfmul %%mm6, %%mm0\n\t" |
597 " pf2iw %%mm0, %%mm0\n\t" | 598 " pf2iw %%mm0, %%mm0\n\t" |
598 " movd %%mm0, %%eax\n\t" | 599 " movd %%mm0, %%"REG_a"\n\t" |
599 " movw %%ax, 512(%%esi)\n\t" | 600 " movw %%ax, 512(%%"REG_S")\n\t" |
600 " psrlq $32, %%mm0\n\t" | 601 " psrlq $32, %%mm0\n\t" |
601 " movd %%mm0, %%eax\n\t" | 602 " movd %%mm0, %%"REG_a"\n\t" |
602 " movw %%ax, (%%esi)\n\t" | 603 " movw %%ax, (%%"REG_S")\n\t" |
603 | 604 |
604 " movd 12(%%ecx), %%mm0\n\t" | 605 " movd 12(%%"REG_c"), %%mm0\n\t" |
605 " pfsub 8(%%ecx), %%mm0\n\t" | 606 " pfsub 8(%%"REG_c"), %%mm0\n\t" |
606 " pfmul 120(%%ebx), %%mm0\n\t" | 607 " pfmul 120(%%"REG_b"), %%mm0\n\t" |
607 " pf2iw %%mm0, %%mm7\n\t" | 608 " pf2iw %%mm0, %%mm7\n\t" |
608 " movd %%mm7, %%eax\n\t" | 609 " movd %%mm7, %%"REG_a"\n\t" |
609 " movw %%ax, 256(%%edi)\n\t" | 610 " movw %%ax, 256(%%"REG_D")\n\t" |
610 " pfadd 12(%%ecx), %%mm0\n\t" | 611 " pfadd 12(%%"REG_c"), %%mm0\n\t" |
611 " pfadd 8(%%ecx), %%mm0\n\t" | 612 " pfadd 8(%%"REG_c"), %%mm0\n\t" |
612 " pf2iw %%mm0, %%mm0\n\t" | 613 " pf2iw %%mm0, %%mm0\n\t" |
613 " movd %%mm0, %%eax\n\t" | 614 " movd %%mm0, %%"REG_a"\n\t" |
614 " movw %%ax, 256(%%esi)\n\t" | 615 " movw %%ax, 256(%%"REG_S")\n\t" |
615 | 616 |
616 " movd 16(%%ecx), %%mm3\n\t" | 617 " movd 16(%%"REG_c"), %%mm3\n\t" |
617 " pfsub 20(%%ecx), %%mm3\n\t" | 618 " pfsub 20(%%"REG_c"), %%mm3\n\t" |
618 " pfmul 120(%%ebx), %%mm3\n\t" | 619 " pfmul 120(%%"REG_b"), %%mm3\n\t" |
619 " movq %%mm3, %%mm2\n\t" | 620 " movq %%mm3, %%mm2\n\t" |
620 | 621 |
621 " movd 28(%%ecx), %%mm2\n\t" | 622 " movd 28(%%"REG_c"), %%mm2\n\t" |
622 " pfsub 24(%%ecx), %%mm2\n\t" | 623 " pfsub 24(%%"REG_c"), %%mm2\n\t" |
623 " pfmul 120(%%ebx), %%mm2\n\t" | 624 " pfmul 120(%%"REG_b"), %%mm2\n\t" |
624 " movq %%mm2, %%mm1\n\t" | 625 " movq %%mm2, %%mm1\n\t" |
625 | 626 |
626 " pf2iw %%mm2, %%mm7\n\t" | 627 " pf2iw %%mm2, %%mm7\n\t" |
627 " movd %%mm7, %%eax\n\t" | 628 " movd %%mm7, %%"REG_a"\n\t" |
628 " movw %%ax, 384(%%edi)\n\t" | 629 " movw %%ax, 384(%%"REG_D")\n\t" |
629 | 630 |
630 " pfadd 24(%%ecx), %%mm1\n\t" | 631 " pfadd 24(%%"REG_c"), %%mm1\n\t" |
631 " pfadd 28(%%ecx), %%mm1\n\t" | 632 " pfadd 28(%%"REG_c"), %%mm1\n\t" |
632 " movq %%mm1, %%mm0\n\t" | 633 " movq %%mm1, %%mm0\n\t" |
633 | 634 |
634 " pfadd 16(%%ecx), %%mm0\n\t" | 635 " pfadd 16(%%"REG_c"), %%mm0\n\t" |
635 " pfadd 20(%%ecx), %%mm0\n\t" | 636 " pfadd 20(%%"REG_c"), %%mm0\n\t" |
636 " pf2iw %%mm0, %%mm0\n\t" | 637 " pf2iw %%mm0, %%mm0\n\t" |
637 " movd %%mm0, %%eax\n\t" | 638 " movd %%mm0, %%"REG_a"\n\t" |
638 " movw %%ax, 384(%%esi)\n\t" | 639 " movw %%ax, 384(%%"REG_S")\n\t" |
639 " pfadd %%mm3, %%mm1\n\t" | 640 " pfadd %%mm3, %%mm1\n\t" |
640 " pf2iw %%mm1, %%mm1\n\t" | 641 " pf2iw %%mm1, %%mm1\n\t" |
641 " movd %%mm1, %%eax\n\t" | 642 " movd %%mm1, %%"REG_a"\n\t" |
642 " movw %%ax, 128(%%esi)\n\t" | 643 " movw %%ax, 128(%%"REG_S")\n\t" |
643 " pfadd %%mm3, %%mm2\n\t" | 644 " pfadd %%mm3, %%mm2\n\t" |
644 " pf2iw %%mm2, %%mm2\n\t" | 645 " pf2iw %%mm2, %%mm2\n\t" |
645 " movd %%mm2, %%eax\n\t" | 646 " movd %%mm2, %%"REG_a"\n\t" |
646 " movw %%ax, 128(%%edi)\n\t" | 647 " movw %%ax, 128(%%"REG_D")\n\t" |
647 | 648 |
648 /* Phase 10*/ | 649 /* Phase 10*/ |
649 | 650 |
650 " movq 32(%%edx), %%mm0\n\t" | 651 " movq 32(%%"REG_d"), %%mm0\n\t" |
651 " movq 48(%%edx), %%mm1\n\t" | 652 " movq 48(%%"REG_d"), %%mm1\n\t" |
652 " pfadd 48(%%edx), %%mm0\n\t" | 653 " pfadd 48(%%"REG_d"), %%mm0\n\t" |
653 " pfadd 40(%%edx), %%mm1\n\t" | 654 " pfadd 40(%%"REG_d"), %%mm1\n\t" |
654 " pf2iw %%mm0, %%mm0\n\t" | 655 " pf2iw %%mm0, %%mm0\n\t" |
655 " pf2iw %%mm1, %%mm1\n\t" | 656 " pf2iw %%mm1, %%mm1\n\t" |
656 " movd %%mm0, %%eax\n\t" | 657 " movd %%mm0, %%"REG_a"\n\t" |
657 " movd %%mm1, %%ecx\n\t" | 658 " movd %%mm1, %%"REG_c"\n\t" |
658 " movw %%ax, 448(%%esi)\n\t" | 659 " movw %%ax, 448(%%"REG_S")\n\t" |
659 " movw %%cx, 320(%%esi)\n\t" | 660 " movw %%cx, 320(%%"REG_S")\n\t" |
660 " psrlq $32, %%mm0\n\t" | 661 " psrlq $32, %%mm0\n\t" |
661 " psrlq $32, %%mm1\n\t" | 662 " psrlq $32, %%mm1\n\t" |
662 " movd %%mm0, %%eax\n\t" | 663 " movd %%mm0, %%"REG_a"\n\t" |
663 " movd %%mm1, %%ecx\n\t" | 664 " movd %%mm1, %%"REG_c"\n\t" |
664 " movw %%ax, 64(%%edi)\n\t" | 665 " movw %%ax, 64(%%"REG_D")\n\t" |
665 " movw %%cx, 192(%%edi)\n\t" | 666 " movw %%cx, 192(%%"REG_D")\n\t" |
666 | 667 |
667 " movd 40(%%edx), %%mm3\n\t" | 668 " movd 40(%%"REG_d"), %%mm3\n\t" |
668 " movd 56(%%edx), %%mm4\n\t" | 669 " movd 56(%%"REG_d"), %%mm4\n\t" |
669 " movd 60(%%edx), %%mm0\n\t" | 670 " movd 60(%%"REG_d"), %%mm0\n\t" |
670 " movd 44(%%edx), %%mm2\n\t" | 671 " movd 44(%%"REG_d"), %%mm2\n\t" |
671 " movd 120(%%edx), %%mm5\n\t" | 672 " movd 120(%%"REG_d"), %%mm5\n\t" |
672 " punpckldq %%mm4, %%mm3\n\t" | 673 " punpckldq %%mm4, %%mm3\n\t" |
673 " punpckldq 124(%%edx), %%mm0\n\t" | 674 " punpckldq 124(%%"REG_d"), %%mm0\n\t" |
674 " pfadd 100(%%edx), %%mm5\n\t" | 675 " pfadd 100(%%"REG_d"), %%mm5\n\t" |
675 " punpckldq 36(%%edx), %%mm4\n\t" | 676 " punpckldq 36(%%"REG_d"), %%mm4\n\t" |
676 " punpckldq 92(%%edx), %%mm2\n\t" | 677 " punpckldq 92(%%"REG_d"), %%mm2\n\t" |
677 " movq %%mm5, %%mm6\n\t" | 678 " movq %%mm5, %%mm6\n\t" |
678 " pfadd %%mm4, %%mm3\n\t" | 679 " pfadd %%mm4, %%mm3\n\t" |
679 " pf2iw %%mm0, %%mm1\n\t" | 680 " pf2iw %%mm0, %%mm1\n\t" |
680 " pf2iw %%mm3, %%mm3\n\t" | 681 " pf2iw %%mm3, %%mm3\n\t" |
681 " pfadd 88(%%edx), %%mm5\n\t" | 682 " pfadd 88(%%"REG_d"), %%mm5\n\t" |
682 " movd %%mm1, %%eax\n\t" | 683 " movd %%mm1, %%"REG_a"\n\t" |
683 " movd %%mm3, %%ecx\n\t" | 684 " movd %%mm3, %%"REG_c"\n\t" |
684 " movw %%ax, 448(%%edi)\n\t" | 685 " movw %%ax, 448(%%"REG_D")\n\t" |
685 " movw %%cx, 192(%%esi)\n\t" | 686 " movw %%cx, 192(%%"REG_S")\n\t" |
686 " pf2iw %%mm5, %%mm5\n\t" | 687 " pf2iw %%mm5, %%mm5\n\t" |
687 " psrlq $32, %%mm1\n\t" | 688 " psrlq $32, %%mm1\n\t" |
688 " psrlq $32, %%mm3\n\t" | 689 " psrlq $32, %%mm3\n\t" |
689 " movd %%mm5, %%ebx\n\t" | 690 " movd %%mm5, %%"REG_b"\n\t" |
690 " movd %%mm1, %%eax\n\t" | 691 " movd %%mm1, %%"REG_a"\n\t" |
691 " movd %%mm3, %%ecx\n\t" | 692 " movd %%mm3, %%"REG_c"\n\t" |
692 " movw %%bx, 96(%%esi)\n\t" | 693 " movw %%bx, 96(%%"REG_S")\n\t" |
693 " movw %%ax, 480(%%edi)\n\t" | 694 " movw %%ax, 480(%%"REG_D")\n\t" |
694 " movw %%cx, 64(%%esi)\n\t" | 695 " movw %%cx, 64(%%"REG_S")\n\t" |
695 " pfadd %%mm2, %%mm0\n\t" | 696 " pfadd %%mm2, %%mm0\n\t" |
696 " pf2iw %%mm0, %%mm0\n\t" | 697 " pf2iw %%mm0, %%mm0\n\t" |
697 " movd %%mm0, %%eax\n\t" | 698 " movd %%mm0, %%"REG_a"\n\t" |
698 " pfadd 68(%%edx), %%mm6\n\t" | 699 " pfadd 68(%%"REG_d"), %%mm6\n\t" |
699 " movw %%ax, 320(%%edi)\n\t" | 700 " movw %%ax, 320(%%"REG_D")\n\t" |
700 " psrlq $32, %%mm0\n\t" | 701 " psrlq $32, %%mm0\n\t" |
701 " pf2iw %%mm6, %%mm6\n\t" | 702 " pf2iw %%mm6, %%mm6\n\t" |
702 " movd %%mm0, %%eax\n\t" | 703 " movd %%mm0, %%"REG_a"\n\t" |
703 " movd %%mm6, %%ebx\n\t" | 704 " movd %%mm6, %%"REG_b"\n\t" |
704 " movw %%ax, 416(%%edi)\n\t" | 705 " movw %%ax, 416(%%"REG_D")\n\t" |
705 " movw %%bx, 32(%%esi)\n\t" | 706 " movw %%bx, 32(%%"REG_S")\n\t" |
706 | 707 |
707 " movq 96(%%edx), %%mm0\n\t" | 708 " movq 96(%%"REG_d"), %%mm0\n\t" |
708 " movq 112(%%edx), %%mm2\n\t" | 709 " movq 112(%%"REG_d"), %%mm2\n\t" |
709 " movq 104(%%edx), %%mm4\n\t" | 710 " movq 104(%%"REG_d"), %%mm4\n\t" |
710 " pfadd %%mm2, %%mm0\n\t" | 711 " pfadd %%mm2, %%mm0\n\t" |
711 " pfadd %%mm4, %%mm2\n\t" | 712 " pfadd %%mm4, %%mm2\n\t" |
712 " pfadd 120(%%edx), %%mm4\n\t" | 713 " pfadd 120(%%"REG_d"), %%mm4\n\t" |
713 " movq %%mm0, %%mm1\n\t" | 714 " movq %%mm0, %%mm1\n\t" |
714 " movq %%mm2, %%mm3\n\t" | 715 " movq %%mm2, %%mm3\n\t" |
715 " movq %%mm4, %%mm5\n\t" | 716 " movq %%mm4, %%mm5\n\t" |
716 " pfadd 64(%%edx), %%mm0\n\t" | 717 " pfadd 64(%%"REG_d"), %%mm0\n\t" |
717 " pfadd 80(%%edx), %%mm2\n\t" | 718 " pfadd 80(%%"REG_d"), %%mm2\n\t" |
718 " pfadd 72(%%edx), %%mm4\n\t" | 719 " pfadd 72(%%"REG_d"), %%mm4\n\t" |
719 " pf2iw %%mm0, %%mm0\n\t" | 720 " pf2iw %%mm0, %%mm0\n\t" |
720 " pf2iw %%mm2, %%mm2\n\t" | 721 " pf2iw %%mm2, %%mm2\n\t" |
721 " pf2iw %%mm4, %%mm4\n\t" | 722 " pf2iw %%mm4, %%mm4\n\t" |
722 " movd %%mm0, %%eax\n\t" | 723 " movd %%mm0, %%"REG_a"\n\t" |
723 " movd %%mm2, %%ecx\n\t" | 724 " movd %%mm2, %%"REG_c"\n\t" |
724 " movd %%mm4, %%ebx\n\t" | 725 " movd %%mm4, %%"REG_b"\n\t" |
725 " movw %%ax, 480(%%esi)\n\t" | 726 " movw %%ax, 480(%%"REG_S")\n\t" |
726 " movw %%cx, 352(%%esi)\n\t" | 727 " movw %%cx, 352(%%"REG_S")\n\t" |
727 " movw %%bx, 224(%%esi)\n\t" | 728 " movw %%bx, 224(%%"REG_S")\n\t" |
728 " psrlq $32, %%mm0\n\t" | 729 " psrlq $32, %%mm0\n\t" |
729 " psrlq $32, %%mm2\n\t" | 730 " psrlq $32, %%mm2\n\t" |
730 " psrlq $32, %%mm4\n\t" | 731 " psrlq $32, %%mm4\n\t" |
731 " movd %%mm0, %%eax\n\t" | 732 " movd %%mm0, %%"REG_a"\n\t" |
732 " movd %%mm2, %%ecx\n\t" | 733 " movd %%mm2, %%"REG_c"\n\t" |
733 " movd %%mm4, %%ebx\n\t" | 734 " movd %%mm4, %%"REG_b"\n\t" |
734 " movw %%ax, 32(%%edi)\n\t" | 735 " movw %%ax, 32(%%"REG_D")\n\t" |
735 " movw %%cx, 160(%%edi)\n\t" | 736 " movw %%cx, 160(%%"REG_D")\n\t" |
736 " movw %%bx, 288(%%edi)\n\t" | 737 " movw %%bx, 288(%%"REG_D")\n\t" |
737 " pfadd 80(%%edx), %%mm1\n\t" | 738 " pfadd 80(%%"REG_d"), %%mm1\n\t" |
738 " pfadd 72(%%edx), %%mm3\n\t" | 739 " pfadd 72(%%"REG_d"), %%mm3\n\t" |
739 " pfadd 88(%%edx), %%mm5\n\t" | 740 " pfadd 88(%%"REG_d"), %%mm5\n\t" |
740 " pf2iw %%mm1, %%mm1\n\t" | 741 " pf2iw %%mm1, %%mm1\n\t" |
741 " pf2iw %%mm3, %%mm3\n\t" | 742 " pf2iw %%mm3, %%mm3\n\t" |
742 " pf2iw %%mm5, %%mm5\n\t" | 743 " pf2iw %%mm5, %%mm5\n\t" |
743 " movd %%mm1, %%eax\n\t" | 744 " movd %%mm1, %%"REG_a"\n\t" |
744 " movd %%mm3, %%ecx\n\t" | 745 " movd %%mm3, %%"REG_c"\n\t" |
745 " movd %%mm5, %%ebx\n\t" | 746 " movd %%mm5, %%"REG_b"\n\t" |
746 " movw %%ax, 416(%%esi)\n\t" | 747 " movw %%ax, 416(%%"REG_S")\n\t" |
747 " movw %%cx, 288(%%esi)\n\t" | 748 " movw %%cx, 288(%%"REG_S")\n\t" |
748 " movw %%bx, 160(%%esi)\n\t" | 749 " movw %%bx, 160(%%"REG_S")\n\t" |
749 " psrlq $32, %%mm1\n\t" | 750 " psrlq $32, %%mm1\n\t" |
750 " psrlq $32, %%mm3\n\t" | 751 " psrlq $32, %%mm3\n\t" |
751 " psrlq $32, %%mm5\n\t" | 752 " psrlq $32, %%mm5\n\t" |
752 " movd %%mm1, %%eax\n\t" | 753 " movd %%mm1, %%"REG_a"\n\t" |
753 " movd %%mm3, %%ecx\n\t" | 754 " movd %%mm3, %%"REG_c"\n\t" |
754 " movd %%mm5, %%ebx\n\t" | 755 " movd %%mm5, %%"REG_b"\n\t" |
755 " movw %%ax, 96(%%edi)\n\t" | 756 " movw %%ax, 96(%%"REG_D")\n\t" |
756 " movw %%cx, 224(%%edi)\n\t" | 757 " movw %%cx, 224(%%"REG_D")\n\t" |
757 " movw %%bx, 352(%%edi)\n\t" | 758 " movw %%bx, 352(%%"REG_D")\n\t" |
758 | 759 |
759 " movsw\n\t" | 760 " movsw\n\t" |
760 | 761 |
761 ".L_bye:\n\t" | 762 ".L_bye:\n\t" |
762 " femms\n\t" | 763 " femms\n\t" |