Mercurial > mplayer.hg
annotate libfaad2/cfft.c @ 13030:99d386a5c122
credits for Robert Kesterson
author | diego |
---|---|
date | Mon, 16 Aug 2004 00:18:30 +0000 |
parents | d81145997036 |
children | 6d50ef45a058 |
rev | line source |
---|---|
10725 | 1 /* |
2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding | |
12527 | 3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com |
4 ** | |
10725 | 5 ** This program is free software; you can redistribute it and/or modify |
6 ** it under the terms of the GNU General Public License as published by | |
7 ** the Free Software Foundation; either version 2 of the License, or | |
8 ** (at your option) any later version. | |
12527 | 9 ** |
10725 | 10 ** This program is distributed in the hope that it will be useful, |
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 ** GNU General Public License for more details. | |
12527 | 14 ** |
10725 | 15 ** You should have received a copy of the GNU General Public License |
12527 | 16 ** along with this program; if not, write to the Free Software |
10725 | 17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
18 ** | |
19 ** Any non-GPL usage of this software or parts of this software is strictly | |
20 ** forbidden. | |
21 ** | |
22 ** Commercial non-GPL licensing of this software is possible. | |
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. | |
24 ** | |
12625
d81145997036
More information about modifications to comply more closely with GPL 2a.
diego
parents:
12527
diff
changeset
|
25 ** Initially modified for use with MPlayer by Arpad Gereöffy on 2003/08/30 |
d81145997036
More information about modifications to comply more closely with GPL 2a.
diego
parents:
12527
diff
changeset
|
26 ** $Id: cfft.c,v 1.3 2004/06/02 22:59:02 diego Exp $ |
d81145997036
More information about modifications to comply more closely with GPL 2a.
diego
parents:
12527
diff
changeset
|
27 ** detailed CVS changelog at http://www.mplayerhq.hu/cgi-bin/cvsweb.cgi/main/ |
10725 | 28 **/ |
29 | |
30 /* | |
31 * Algorithmically based on Fortran-77 FFTPACK | |
32 * by Paul N. Swarztrauber(Version 4, 1985). | |
33 * | |
34 * Does even sized fft only | |
35 */ | |
36 | |
37 /* isign is +1 for backward and -1 for forward transforms */ | |
38 | |
39 #include "common.h" | |
40 #include "structs.h" | |
41 | |
42 #include <stdlib.h> | |
43 | |
44 #include "cfft.h" | |
45 #include "cfft_tab.h" | |
46 | |
47 | |
12527 | 48 /* static function declarations */ |
49 #ifdef USE_SSE | |
50 static void passf2pos_sse(const uint16_t l1, const complex_t *cc, | |
51 complex_t *ch, const complex_t *wa); | |
52 static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
53 complex_t *ch, const complex_t *wa); | |
54 static void passf4pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, | |
55 const complex_t *wa1, const complex_t *wa2, const complex_t *wa3); | |
56 #endif | |
57 static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
58 complex_t *ch, const complex_t *wa); | |
59 static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
60 complex_t *ch, const complex_t *wa); | |
61 static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
62 complex_t *ch, const complex_t *wa1, const complex_t *wa2, const int8_t isign); | |
63 static void passf4pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, | |
64 const complex_t *wa1, const complex_t *wa2, const complex_t *wa3); | |
65 static void passf4neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, | |
66 const complex_t *wa1, const complex_t *wa2, const complex_t *wa3); | |
67 static void passf5(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, | |
68 const complex_t *wa1, const complex_t *wa2, const complex_t *wa3, | |
69 const complex_t *wa4, const int8_t isign); | |
70 INLINE void cfftf1(uint16_t n, complex_t *c, complex_t *ch, | |
71 const uint16_t *ifac, const complex_t *wa, const int8_t isign); | |
72 static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac); | |
73 | |
74 | |
10725 | 75 /*---------------------------------------------------------------------- |
76 passf2, passf3, passf4, passf5. Complex FFT passes fwd and bwd. | |
77 ----------------------------------------------------------------------*/ | |
78 | |
12527 | 79 #if 0 //def USE_SSE |
80 static void passf2pos_sse(const uint16_t l1, const complex_t *cc, | |
81 complex_t *ch, const complex_t *wa) | |
82 { | |
83 uint16_t k, ah, ac; | |
84 | |
85 for (k = 0; k < l1; k++) | |
86 { | |
87 ah = 2*k; | |
88 ac = 4*k; | |
89 | |
90 RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); | |
91 IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); | |
92 | |
93 RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); | |
94 IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); | |
95 } | |
96 } | |
97 | |
98 static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
99 complex_t *ch, const complex_t *wa) | |
100 { | |
101 uint16_t i, k, ah, ac; | |
102 | |
103 for (k = 0; k < l1; k++) | |
104 { | |
105 ah = k*ido; | |
106 ac = 2*k*ido; | |
107 | |
108 for (i = 0; i < ido; i+=4) | |
109 { | |
110 __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14; | |
111 __m128 m15, m16, m17, m18, m19, m20, m21, m22, m23, m24; | |
112 __m128 w1, w2, w3, w4; | |
113 | |
114 m1 = _mm_load_ps(&RE(cc[ac+i])); | |
115 m2 = _mm_load_ps(&RE(cc[ac+ido+i])); | |
116 m5 = _mm_load_ps(&RE(cc[ac+i+2])); | |
117 m6 = _mm_load_ps(&RE(cc[ac+ido+i+2])); | |
118 w1 = _mm_load_ps(&RE(wa[i])); | |
119 w3 = _mm_load_ps(&RE(wa[i+2])); | |
120 | |
121 m3 = _mm_add_ps(m1, m2); | |
122 m15 = _mm_add_ps(m5, m6); | |
123 | |
124 m4 = _mm_sub_ps(m1, m2); | |
125 m16 = _mm_sub_ps(m5, m6); | |
126 | |
127 _mm_store_ps(&RE(ch[ah+i]), m3); | |
128 _mm_store_ps(&RE(ch[ah+i+2]), m15); | |
129 | |
130 | |
131 w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1)); | |
132 w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1)); | |
133 | |
134 m7 = _mm_mul_ps(m4, w1); | |
135 m17 = _mm_mul_ps(m16, w3); | |
136 m8 = _mm_mul_ps(m4, w2); | |
137 m18 = _mm_mul_ps(m16, w4); | |
138 | |
139 m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0)); | |
140 m19 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(2, 0, 2, 0)); | |
141 m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1)); | |
142 m20 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(3, 1, 3, 1)); | |
143 | |
144 m11 = _mm_add_ps(m9, m10); | |
145 m21 = _mm_add_ps(m19, m20); | |
146 m12 = _mm_sub_ps(m9, m10); | |
147 m22 = _mm_sub_ps(m19, m20); | |
148 | |
149 m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2)); | |
150 m23 = _mm_shuffle_ps(m21, m21, _MM_SHUFFLE(0, 0, 3, 2)); | |
151 | |
152 m14 = _mm_unpacklo_ps(m12, m13); | |
153 m24 = _mm_unpacklo_ps(m22, m23); | |
154 | |
155 _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14); | |
156 _mm_store_ps(&RE(ch[ah+i+2+l1*ido]), m24); | |
157 } | |
158 } | |
159 } | |
160 #endif | |
161 | |
162 static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
163 complex_t *ch, const complex_t *wa) | |
10725 | 164 { |
165 uint16_t i, k, ah, ac; | |
166 | |
167 if (ido == 1) | |
168 { | |
169 for (k = 0; k < l1; k++) | |
170 { | |
171 ah = 2*k; | |
172 ac = 4*k; | |
173 | |
12527 | 174 RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); |
175 RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); | |
176 IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); | |
177 IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); | |
178 } | |
179 } else { | |
180 for (k = 0; k < l1; k++) | |
181 { | |
182 ah = k*ido; | |
183 ac = 2*k*ido; | |
184 | |
185 for (i = 0; i < ido; i++) | |
186 { | |
187 complex_t t2; | |
188 | |
189 RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]); | |
190 RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]); | |
191 | |
192 IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]); | |
193 IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]); | |
194 | |
195 #if 1 | |
196 ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), | |
197 IM(t2), RE(t2), RE(wa[i]), IM(wa[i])); | |
198 #else | |
199 ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), | |
200 RE(t2), IM(t2), RE(wa[i]), IM(wa[i])); | |
201 #endif | |
202 } | |
203 } | |
204 } | |
205 } | |
206 | |
207 static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
208 complex_t *ch, const complex_t *wa) | |
209 { | |
210 uint16_t i, k, ah, ac; | |
211 | |
212 if (ido == 1) | |
213 { | |
214 for (k = 0; k < l1; k++) | |
215 { | |
216 ah = 2*k; | |
217 ac = 4*k; | |
218 | |
219 RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); | |
10725 | 220 RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); |
10989 | 221 IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); |
10725 | 222 IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); |
223 } | |
224 } else { | |
225 for (k = 0; k < l1; k++) | |
226 { | |
227 ah = k*ido; | |
228 ac = 2*k*ido; | |
229 | |
230 for (i = 0; i < ido; i++) | |
231 { | |
232 complex_t t2; | |
233 | |
10989 | 234 RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]); |
235 RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]); | |
10725 | 236 |
10989 | 237 IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]); |
238 IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]); | |
10725 | 239 |
12527 | 240 #if 1 |
241 ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), | |
242 RE(t2), IM(t2), RE(wa[i]), IM(wa[i])); | |
243 #else | |
244 ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), | |
245 IM(t2), RE(t2), RE(wa[i]), IM(wa[i])); | |
246 #endif | |
10725 | 247 } |
248 } | |
249 } | |
250 } | |
251 | |
252 | |
12527 | 253 static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc, |
254 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
255 const int8_t isign) | |
10725 | 256 { |
12527 | 257 static real_t taur = FRAC_CONST(-0.5); |
258 static real_t taui = FRAC_CONST(0.866025403784439); | |
10725 | 259 uint16_t i, k, ac, ah; |
260 complex_t c2, c3, d2, d3, t2; | |
261 | |
262 if (ido == 1) | |
263 { | |
12527 | 264 if (isign == 1) |
10725 | 265 { |
12527 | 266 for (k = 0; k < l1; k++) |
267 { | |
268 ac = 3*k+1; | |
269 ah = k; | |
10725 | 270 |
12527 | 271 RE(t2) = RE(cc[ac]) + RE(cc[ac+1]); |
272 IM(t2) = IM(cc[ac]) + IM(cc[ac+1]); | |
273 RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),taur); | |
274 IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),taur); | |
275 | |
276 RE(ch[ah]) = RE(cc[ac-1]) + RE(t2); | |
277 IM(ch[ah]) = IM(cc[ac-1]) + IM(t2); | |
278 | |
279 RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+1])), taui); | |
280 IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+1])), taui); | |
10725 | 281 |
12527 | 282 RE(ch[ah+l1]) = RE(c2) - IM(c3); |
283 IM(ch[ah+l1]) = IM(c2) + RE(c3); | |
284 RE(ch[ah+2*l1]) = RE(c2) + IM(c3); | |
285 IM(ch[ah+2*l1]) = IM(c2) - RE(c3); | |
286 } | |
287 } else { | |
288 for (k = 0; k < l1; k++) | |
289 { | |
290 ac = 3*k+1; | |
291 ah = k; | |
10725 | 292 |
12527 | 293 RE(t2) = RE(cc[ac]) + RE(cc[ac+1]); |
294 IM(t2) = IM(cc[ac]) + IM(cc[ac+1]); | |
295 RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),taur); | |
296 IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),taur); | |
297 | |
298 RE(ch[ah]) = RE(cc[ac-1]) + RE(t2); | |
299 IM(ch[ah]) = IM(cc[ac-1]) + IM(t2); | |
10725 | 300 |
12527 | 301 RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+1])), taui); |
302 IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+1])), taui); | |
303 | |
304 RE(ch[ah+l1]) = RE(c2) + IM(c3); | |
305 IM(ch[ah+l1]) = IM(c2) - RE(c3); | |
306 RE(ch[ah+2*l1]) = RE(c2) - IM(c3); | |
307 IM(ch[ah+2*l1]) = IM(c2) + RE(c3); | |
308 } | |
10725 | 309 } |
310 } else { | |
12527 | 311 if (isign == 1) |
10725 | 312 { |
12527 | 313 for (k = 0; k < l1; k++) |
10725 | 314 { |
12527 | 315 for (i = 0; i < ido; i++) |
316 { | |
317 ac = i + (3*k+1)*ido; | |
318 ah = i + k * ido; | |
319 | |
320 RE(t2) = RE(cc[ac]) + RE(cc[ac+ido]); | |
321 RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),taur); | |
322 IM(t2) = IM(cc[ac]) + IM(cc[ac+ido]); | |
323 IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),taur); | |
10725 | 324 |
12527 | 325 RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2); |
326 IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2); | |
327 | |
328 RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+ido])), taui); | |
329 IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+ido])), taui); | |
330 | |
331 RE(d2) = RE(c2) - IM(c3); | |
332 IM(d3) = IM(c2) - RE(c3); | |
333 RE(d3) = RE(c2) + IM(c3); | |
334 IM(d2) = IM(c2) + RE(c3); | |
10725 | 335 |
12527 | 336 #if 1 |
337 ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), | |
338 IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); | |
339 ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), | |
340 IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); | |
341 #else | |
342 ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), | |
343 RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); | |
344 ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), | |
345 RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); | |
346 #endif | |
347 } | |
348 } | |
349 } else { | |
350 for (k = 0; k < l1; k++) | |
351 { | |
352 for (i = 0; i < ido; i++) | |
353 { | |
354 ac = i + (3*k+1)*ido; | |
355 ah = i + k * ido; | |
10725 | 356 |
12527 | 357 RE(t2) = RE(cc[ac]) + RE(cc[ac+ido]); |
358 RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),taur); | |
359 IM(t2) = IM(cc[ac]) + IM(cc[ac+ido]); | |
360 IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),taur); | |
361 | |
362 RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2); | |
363 IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2); | |
364 | |
365 RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+ido])), taui); | |
366 IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+ido])), taui); | |
10725 | 367 |
12527 | 368 RE(d2) = RE(c2) + IM(c3); |
369 IM(d3) = IM(c2) + RE(c3); | |
370 RE(d3) = RE(c2) - IM(c3); | |
371 IM(d2) = IM(c2) - RE(c3); | |
372 | |
373 #if 1 | |
374 ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), | |
375 RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); | |
376 ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), | |
377 RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); | |
378 #else | |
379 ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), | |
380 IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); | |
381 ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), | |
382 IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); | |
383 #endif | |
384 } | |
10725 | 385 } |
386 } | |
387 } | |
388 } | |
389 | |
12527 | 390 #ifdef USE_SSE |
391 ALIGN static const int32_t negate[4] = { 0x0, 0x0, 0x0, 0x80000000 }; | |
10725 | 392 |
12527 | 393 __declspec(naked) static void passf4pos_sse(const uint16_t l1, const complex_t *cc, |
394 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
395 const complex_t *wa3) | |
396 { | |
397 __asm { | |
398 push ebx | |
399 mov ebx, esp | |
400 and esp, -16 | |
401 push edi | |
402 push esi | |
403 sub esp, 8 | |
404 movzx edi, WORD PTR [ebx+8] | |
405 | |
406 movaps xmm1, XMMWORD PTR negate | |
407 | |
408 test edi, edi | |
409 jle l1_is_zero | |
410 | |
411 lea esi, DWORD PTR [edi+edi] | |
412 add esi, esi | |
413 sub esi, edi | |
414 add esi, esi | |
415 add esi, esi | |
416 add esi, esi | |
417 mov eax, DWORD PTR [ebx+16] | |
418 add esi, eax | |
419 lea ecx, DWORD PTR [edi+edi] | |
420 add ecx, ecx | |
421 add ecx, ecx | |
422 add ecx, ecx | |
423 add ecx, eax | |
424 lea edx, DWORD PTR [edi+edi] | |
425 add edx, edx | |
426 add edx, edx | |
427 add edx, eax | |
428 xor eax, eax | |
429 mov DWORD PTR [esp], ebp | |
430 mov ebp, DWORD PTR [ebx+12] | |
431 | |
432 fftloop: | |
433 lea edi, DWORD PTR [eax+eax] | |
434 add edi, edi | |
435 movaps xmm2, XMMWORD PTR [ebp+edi*8] | |
436 movaps xmm0, XMMWORD PTR [ebp+edi*8+16] | |
437 movaps xmm7, XMMWORD PTR [ebp+edi*8+32] | |
438 movaps xmm5, XMMWORD PTR [ebp+edi*8+48] | |
439 movaps xmm6, xmm2 | |
440 addps xmm6, xmm0 | |
441 movaps xmm4, xmm1 | |
442 xorps xmm4, xmm7 | |
443 movaps xmm3, xmm1 | |
444 xorps xmm3, xmm5 | |
445 xorps xmm2, xmm1 | |
446 xorps xmm0, xmm1 | |
447 addps xmm7, xmm5 | |
448 subps xmm2, xmm0 | |
449 movaps xmm0, xmm6 | |
450 shufps xmm0, xmm7, 68 | |
451 subps xmm4, xmm3 | |
452 shufps xmm6, xmm7, 238 | |
453 movaps xmm5, xmm2 | |
454 shufps xmm5, xmm4, 68 | |
455 movaps xmm3, xmm0 | |
456 addps xmm3, xmm6 | |
457 shufps xmm2, xmm4, 187 | |
458 subps xmm0, xmm6 | |
459 movaps xmm4, xmm5 | |
460 addps xmm4, xmm2 | |
461 mov edi, DWORD PTR [ebx+16] | |
462 movaps XMMWORD PTR [edi+eax*8], xmm3 | |
463 subps xmm5, xmm2 | |
464 movaps XMMWORD PTR [edx+eax*8], xmm4 | |
465 movaps XMMWORD PTR [ecx+eax*8], xmm0 | |
466 movaps XMMWORD PTR [esi+eax*8], xmm5 | |
467 add eax, 2 | |
468 movzx eax, ax | |
469 movzx edi, WORD PTR [ebx+8] | |
470 cmp eax, edi | |
471 jl fftloop | |
472 | |
473 mov ebp, DWORD PTR [esp] | |
474 | |
475 l1_is_zero: | |
476 | |
477 add esp, 8 | |
478 pop esi | |
479 pop edi | |
480 mov esp, ebx | |
481 pop ebx | |
482 ret | |
483 } | |
484 } | |
485 #endif | |
486 | |
487 #if 0 | |
488 static void passf4pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
489 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
490 const complex_t *wa3) | |
491 { | |
492 uint16_t i, k, ac, ah; | |
493 | |
494 for (k = 0; k < l1; k++) | |
495 { | |
496 ac = 4*k*ido; | |
497 ah = k*ido; | |
498 | |
499 for (i = 0; i < ido; i+=2) | |
500 { | |
501 __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14, m15, m16; | |
502 __m128 n1, n2, n3, n4, n5, n6, n7, n8, n9, m17, m18, m19, m20, m21, m22, m23; | |
503 __m128 w1, w2, w3, w4, w5, w6, m24, m25, m26, m27, m28, m29, m30; | |
504 __m128 neg1 = _mm_set_ps(-1.0, 1.0, -1.0, 1.0); | |
505 | |
506 m1 = _mm_load_ps(&RE(cc[ac+i])); | |
507 m2 = _mm_load_ps(&RE(cc[ac+i+2*ido])); | |
508 m3 = _mm_add_ps(m1, m2); | |
509 m4 = _mm_sub_ps(m1, m2); | |
510 | |
511 n1 = _mm_load_ps(&RE(cc[ac+i+ido])); | |
512 n2 = _mm_load_ps(&RE(cc[ac+i+3*ido])); | |
513 n3 = _mm_add_ps(n1, n2); | |
514 | |
515 n4 = _mm_mul_ps(neg1, n1); | |
516 n5 = _mm_mul_ps(neg1, n2); | |
517 n6 = _mm_sub_ps(n4, n5); | |
518 | |
519 m5 = _mm_add_ps(m3, n3); | |
520 | |
521 n7 = _mm_shuffle_ps(n6, n6, _MM_SHUFFLE(2, 3, 0, 1)); | |
522 n8 = _mm_add_ps(m4, n7); | |
523 | |
524 m6 = _mm_sub_ps(m3, n3); | |
525 n9 = _mm_sub_ps(m4, n7); | |
526 | |
527 _mm_store_ps(&RE(ch[ah+i]), m5); | |
528 | |
529 #if 0 | |
530 static INLINE void ComplexMult(real_t *y1, real_t *y2, | |
531 real_t x1, real_t x2, real_t c1, real_t c2) | |
532 { | |
533 *y1 = MUL_F(x1, c1) + MUL_F(x2, c2); | |
534 *y2 = MUL_F(x2, c1) - MUL_F(x1, c2); | |
535 } | |
536 | |
537 m7.0 = RE(c2)*RE(wa1[i]) | |
538 m7.1 = IM(c2)*IM(wa1[i]) | |
539 m7.2 = RE(c6)*RE(wa1[i+1]) | |
540 m7.3 = IM(c6)*IM(wa1[i+1]) | |
541 | |
542 m8.0 = RE(c2)*IM(wa1[i]) | |
543 m8.1 = IM(c2)*RE(wa1[i]) | |
544 m8.2 = RE(c6)*IM(wa1[i+1]) | |
545 m8.3 = IM(c6)*RE(wa1[i+1]) | |
546 | |
547 RE(0) = m7.0 - m7.1 | |
548 IM(0) = m8.0 + m8.1 | |
549 RE(1) = m7.2 - m7.3 | |
550 IM(1) = m8.2 + m8.3 | |
551 | |
552 //// | |
553 RE(0) = RE(c2)*RE(wa1[i]) - IM(c2)*IM(wa1[i]) | |
554 IM(0) = RE(c2)*IM(wa1[i]) + IM(c2)*RE(wa1[i]) | |
555 RE(1) = RE(c6)*RE(wa1[i+1]) - IM(c6)*IM(wa1[i+1]) | |
556 IM(1) = RE(c6)*IM(wa1[i+1]) + IM(c6)*RE(wa1[i+1]) | |
557 #endif | |
558 | |
559 w1 = _mm_load_ps(&RE(wa1[i])); | |
560 w3 = _mm_load_ps(&RE(wa2[i])); | |
561 w5 = _mm_load_ps(&RE(wa3[i])); | |
562 | |
563 w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1)); | |
564 w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1)); | |
565 w6 = _mm_shuffle_ps(w5, w5, _MM_SHUFFLE(2, 3, 0, 1)); | |
566 | |
567 m7 = _mm_mul_ps(n8, w1); | |
568 m15 = _mm_mul_ps(m6, w3); | |
569 m23 = _mm_mul_ps(n9, w5); | |
570 m8 = _mm_mul_ps(n8, w2); | |
571 m16 = _mm_mul_ps(m6, w4); | |
572 m24 = _mm_mul_ps(n9, w6); | |
573 | |
574 m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0)); | |
575 m17 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(2, 0, 2, 0)); | |
576 m25 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(2, 0, 2, 0)); | |
577 m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1)); | |
578 m18 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(3, 1, 3, 1)); | |
579 m26 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(3, 1, 3, 1)); | |
580 | |
581 m11 = _mm_add_ps(m9, m10); | |
582 m19 = _mm_add_ps(m17, m18); | |
583 m27 = _mm_add_ps(m25, m26); | |
584 m12 = _mm_sub_ps(m9, m10); | |
585 m20 = _mm_sub_ps(m17, m18); | |
586 m28 = _mm_sub_ps(m25, m26); | |
587 | |
588 m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2)); | |
589 m21 = _mm_shuffle_ps(m19, m19, _MM_SHUFFLE(0, 0, 3, 2)); | |
590 m29 = _mm_shuffle_ps(m27, m27, _MM_SHUFFLE(0, 0, 3, 2)); | |
591 m14 = _mm_unpacklo_ps(m12, m13); | |
592 m22 = _mm_unpacklo_ps(m20, m21); | |
593 m30 = _mm_unpacklo_ps(m28, m29); | |
594 | |
595 _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14); | |
596 _mm_store_ps(&RE(ch[ah+i+2*l1*ido]), m22); | |
597 _mm_store_ps(&RE(ch[ah+i+3*l1*ido]), m30); | |
598 } | |
599 } | |
600 } | |
601 #endif | |
602 | |
603 static void passf4pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
604 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
605 const complex_t *wa3) | |
10725 | 606 { |
607 uint16_t i, k, ac, ah; | |
608 | |
609 if (ido == 1) | |
610 { | |
611 for (k = 0; k < l1; k++) | |
612 { | |
10989 | 613 complex_t t1, t2, t3, t4; |
614 | |
10725 | 615 ac = 4*k; |
616 ah = k; | |
617 | |
12527 | 618 RE(t2) = RE(cc[ac]) + RE(cc[ac+2]); |
619 RE(t1) = RE(cc[ac]) - RE(cc[ac+2]); | |
10989 | 620 IM(t2) = IM(cc[ac]) + IM(cc[ac+2]); |
12527 | 621 IM(t1) = IM(cc[ac]) - IM(cc[ac+2]); |
10725 | 622 RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]); |
10989 | 623 IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]); |
624 IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]); | |
10725 | 625 RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]); |
626 | |
12527 | 627 RE(ch[ah]) = RE(t2) + RE(t3); |
10725 | 628 RE(ch[ah+2*l1]) = RE(t2) - RE(t3); |
10989 | 629 |
630 IM(ch[ah]) = IM(t2) + IM(t3); | |
10725 | 631 IM(ch[ah+2*l1]) = IM(t2) - IM(t3); |
10989 | 632 |
12527 | 633 RE(ch[ah+l1]) = RE(t1) + RE(t4); |
634 RE(ch[ah+3*l1]) = RE(t1) - RE(t4); | |
10989 | 635 |
12527 | 636 IM(ch[ah+l1]) = IM(t1) + IM(t4); |
637 IM(ch[ah+3*l1]) = IM(t1) - IM(t4); | |
10725 | 638 } |
639 } else { | |
640 for (k = 0; k < l1; k++) | |
641 { | |
10989 | 642 ac = 4*k*ido; |
643 ah = k*ido; | |
644 | |
10725 | 645 for (i = 0; i < ido; i++) |
646 { | |
10989 | 647 complex_t c2, c3, c4, t1, t2, t3, t4; |
10725 | 648 |
10989 | 649 RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]); |
650 RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]); | |
651 IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]); | |
652 IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]); | |
653 RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]); | |
654 IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]); | |
655 IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]); | |
656 RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]); | |
10725 | 657 |
12527 | 658 RE(c2) = RE(t1) + RE(t4); |
659 RE(c4) = RE(t1) - RE(t4); | |
10989 | 660 |
12527 | 661 IM(c2) = IM(t1) + IM(t4); |
662 IM(c4) = IM(t1) - IM(t4); | |
10725 | 663 |
10989 | 664 RE(ch[ah+i]) = RE(t2) + RE(t3); |
12527 | 665 RE(c3) = RE(t2) - RE(t3); |
10989 | 666 |
667 IM(ch[ah+i]) = IM(t2) + IM(t3); | |
12527 | 668 IM(c3) = IM(t2) - IM(t3); |
10989 | 669 |
12527 | 670 #if 1 |
671 ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), | |
672 IM(c2), RE(c2), RE(wa1[i]), IM(wa1[i])); | |
673 ComplexMult(&IM(ch[ah+i+2*l1*ido]), &RE(ch[ah+i+2*l1*ido]), | |
674 IM(c3), RE(c3), RE(wa2[i]), IM(wa2[i])); | |
675 ComplexMult(&IM(ch[ah+i+3*l1*ido]), &RE(ch[ah+i+3*l1*ido]), | |
676 IM(c4), RE(c4), RE(wa3[i]), IM(wa3[i])); | |
677 #else | |
678 ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), | |
679 RE(c2), IM(c2), RE(wa1[i]), IM(wa1[i])); | |
680 ComplexMult(&RE(ch[ah+i+2*l1*ido]), &IM(ch[ah+i+2*l1*ido]), | |
681 RE(c3), IM(c3), RE(wa2[i]), IM(wa2[i])); | |
682 ComplexMult(&RE(ch[ah+i+3*l1*ido]), &IM(ch[ah+i+3*l1*ido]), | |
683 RE(c4), IM(c4), RE(wa3[i]), IM(wa3[i])); | |
684 #endif | |
10725 | 685 } |
686 } | |
687 } | |
688 } | |
689 | |
12527 | 690 static void passf4neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, |
691 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
692 const complex_t *wa3) | |
10725 | 693 { |
694 uint16_t i, k, ac, ah; | |
695 | |
696 if (ido == 1) | |
697 { | |
698 for (k = 0; k < l1; k++) | |
699 { | |
12527 | 700 complex_t t1, t2, t3, t4; |
701 | |
702 ac = 4*k; | |
10725 | 703 ah = k; |
704 | |
12527 | 705 RE(t2) = RE(cc[ac]) + RE(cc[ac+2]); |
706 RE(t1) = RE(cc[ac]) - RE(cc[ac+2]); | |
707 IM(t2) = IM(cc[ac]) + IM(cc[ac+2]); | |
708 IM(t1) = IM(cc[ac]) - IM(cc[ac+2]); | |
709 RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]); | |
710 IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]); | |
711 IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]); | |
712 RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]); | |
10725 | 713 |
12527 | 714 RE(ch[ah]) = RE(t2) + RE(t3); |
715 RE(ch[ah+2*l1]) = RE(t2) - RE(t3); | |
716 | |
717 IM(ch[ah]) = IM(t2) + IM(t3); | |
718 IM(ch[ah+2*l1]) = IM(t2) - IM(t3); | |
10725 | 719 |
12527 | 720 RE(ch[ah+l1]) = RE(t1) - RE(t4); |
721 RE(ch[ah+3*l1]) = RE(t1) + RE(t4); | |
722 | |
723 IM(ch[ah+l1]) = IM(t1) - IM(t4); | |
724 IM(ch[ah+3*l1]) = IM(t1) + IM(t4); | |
10725 | 725 } |
726 } else { | |
727 for (k = 0; k < l1; k++) | |
728 { | |
12527 | 729 ac = 4*k*ido; |
730 ah = k*ido; | |
731 | |
10725 | 732 for (i = 0; i < ido; i++) |
733 { | |
12527 | 734 complex_t c2, c3, c4, t1, t2, t3, t4; |
735 | |
736 RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]); | |
737 RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]); | |
738 IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]); | |
739 IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]); | |
740 RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]); | |
741 IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]); | |
742 IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]); | |
743 RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]); | |
744 | |
745 RE(c2) = RE(t1) - RE(t4); | |
746 RE(c4) = RE(t1) + RE(t4); | |
747 | |
748 IM(c2) = IM(t1) - IM(t4); | |
749 IM(c4) = IM(t1) + IM(t4); | |
750 | |
751 RE(ch[ah+i]) = RE(t2) + RE(t3); | |
752 RE(c3) = RE(t2) - RE(t3); | |
753 | |
754 IM(ch[ah+i]) = IM(t2) + IM(t3); | |
755 IM(c3) = IM(t2) - IM(t3); | |
756 | |
757 #if 1 | |
758 ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), | |
759 RE(c2), IM(c2), RE(wa1[i]), IM(wa1[i])); | |
760 ComplexMult(&RE(ch[ah+i+2*l1*ido]), &IM(ch[ah+i+2*l1*ido]), | |
761 RE(c3), IM(c3), RE(wa2[i]), IM(wa2[i])); | |
762 ComplexMult(&RE(ch[ah+i+3*l1*ido]), &IM(ch[ah+i+3*l1*ido]), | |
763 RE(c4), IM(c4), RE(wa3[i]), IM(wa3[i])); | |
764 #else | |
765 ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), | |
766 IM(c2), RE(c2), RE(wa1[i]), IM(wa1[i])); | |
767 ComplexMult(&IM(ch[ah+i+2*l1*ido]), &RE(ch[ah+i+2*l1*ido]), | |
768 IM(c3), RE(c3), RE(wa2[i]), IM(wa2[i])); | |
769 ComplexMult(&IM(ch[ah+i+3*l1*ido]), &RE(ch[ah+i+3*l1*ido]), | |
770 IM(c4), RE(c4), RE(wa3[i]), IM(wa3[i])); | |
771 #endif | |
772 } | |
773 } | |
774 } | |
775 } | |
776 | |
777 static void passf5(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
778 complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3, | |
779 const complex_t *wa4, const int8_t isign) | |
780 { | |
781 static real_t tr11 = FRAC_CONST(0.309016994374947); | |
782 static real_t ti11 = FRAC_CONST(0.951056516295154); | |
783 static real_t tr12 = FRAC_CONST(-0.809016994374947); | |
784 static real_t ti12 = FRAC_CONST(0.587785252292473); | |
785 uint16_t i, k, ac, ah; | |
786 complex_t c2, c3, c4, c5, d3, d4, d5, d2, t2, t3, t4, t5; | |
10725 | 787 |
12527 | 788 if (ido == 1) |
789 { | |
790 if (isign == 1) | |
791 { | |
792 for (k = 0; k < l1; k++) | |
793 { | |
794 ac = 5*k + 1; | |
795 ah = k; | |
796 | |
797 RE(t2) = RE(cc[ac]) + RE(cc[ac+3]); | |
798 IM(t2) = IM(cc[ac]) + IM(cc[ac+3]); | |
799 RE(t3) = RE(cc[ac+1]) + RE(cc[ac+2]); | |
800 IM(t3) = IM(cc[ac+1]) + IM(cc[ac+2]); | |
801 RE(t4) = RE(cc[ac+1]) - RE(cc[ac+2]); | |
802 IM(t4) = IM(cc[ac+1]) - IM(cc[ac+2]); | |
803 RE(t5) = RE(cc[ac]) - RE(cc[ac+3]); | |
804 IM(t5) = IM(cc[ac]) - IM(cc[ac+3]); | |
805 | |
806 RE(ch[ah]) = RE(cc[ac-1]) + RE(t2) + RE(t3); | |
807 IM(ch[ah]) = IM(cc[ac-1]) + IM(t2) + IM(t3); | |
808 | |
809 RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); | |
810 IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); | |
811 RE(c3) = RE(cc[ac-1]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); | |
812 IM(c3) = IM(cc[ac-1]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); | |
813 | |
814 ComplexMult(&RE(c5), &RE(c4), | |
815 ti11, ti12, RE(t5), RE(t4)); | |
816 ComplexMult(&IM(c5), &IM(c4), | |
817 ti11, ti12, IM(t5), IM(t4)); | |
10725 | 818 |
12527 | 819 RE(ch[ah+l1]) = RE(c2) - IM(c5); |
820 IM(ch[ah+l1]) = IM(c2) + RE(c5); | |
821 RE(ch[ah+2*l1]) = RE(c3) - IM(c4); | |
822 IM(ch[ah+2*l1]) = IM(c3) + RE(c4); | |
823 RE(ch[ah+3*l1]) = RE(c3) + IM(c4); | |
824 IM(ch[ah+3*l1]) = IM(c3) - RE(c4); | |
825 RE(ch[ah+4*l1]) = RE(c2) + IM(c5); | |
826 IM(ch[ah+4*l1]) = IM(c2) - RE(c5); | |
827 } | |
828 } else { | |
829 for (k = 0; k < l1; k++) | |
830 { | |
831 ac = 5*k + 1; | |
832 ah = k; | |
833 | |
834 RE(t2) = RE(cc[ac]) + RE(cc[ac+3]); | |
835 IM(t2) = IM(cc[ac]) + IM(cc[ac+3]); | |
836 RE(t3) = RE(cc[ac+1]) + RE(cc[ac+2]); | |
837 IM(t3) = IM(cc[ac+1]) + IM(cc[ac+2]); | |
838 RE(t4) = RE(cc[ac+1]) - RE(cc[ac+2]); | |
839 IM(t4) = IM(cc[ac+1]) - IM(cc[ac+2]); | |
840 RE(t5) = RE(cc[ac]) - RE(cc[ac+3]); | |
841 IM(t5) = IM(cc[ac]) - IM(cc[ac+3]); | |
842 | |
843 RE(ch[ah]) = RE(cc[ac-1]) + RE(t2) + RE(t3); | |
844 IM(ch[ah]) = IM(cc[ac-1]) + IM(t2) + IM(t3); | |
845 | |
846 RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); | |
847 IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); | |
848 RE(c3) = RE(cc[ac-1]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); | |
849 IM(c3) = IM(cc[ac-1]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); | |
850 | |
851 ComplexMult(&RE(c4), &RE(c5), | |
852 ti12, ti11, RE(t5), RE(t4)); | |
853 ComplexMult(&IM(c4), &IM(c5), | |
854 ti12, ti12, IM(t5), IM(t4)); | |
10725 | 855 |
12527 | 856 RE(ch[ah+l1]) = RE(c2) + IM(c5); |
857 IM(ch[ah+l1]) = IM(c2) - RE(c5); | |
858 RE(ch[ah+2*l1]) = RE(c3) + IM(c4); | |
859 IM(ch[ah+2*l1]) = IM(c3) - RE(c4); | |
860 RE(ch[ah+3*l1]) = RE(c3) - IM(c4); | |
861 IM(ch[ah+3*l1]) = IM(c3) + RE(c4); | |
862 RE(ch[ah+4*l1]) = RE(c2) - IM(c5); | |
863 IM(ch[ah+4*l1]) = IM(c2) + RE(c5); | |
864 } | |
865 } | |
866 } else { | |
867 if (isign == 1) | |
868 { | |
869 for (k = 0; k < l1; k++) | |
870 { | |
871 for (i = 0; i < ido; i++) | |
872 { | |
873 ac = i + (k*5 + 1) * ido; | |
874 ah = i + k * ido; | |
875 | |
876 RE(t2) = RE(cc[ac]) + RE(cc[ac+3*ido]); | |
877 IM(t2) = IM(cc[ac]) + IM(cc[ac+3*ido]); | |
878 RE(t3) = RE(cc[ac+ido]) + RE(cc[ac+2*ido]); | |
879 IM(t3) = IM(cc[ac+ido]) + IM(cc[ac+2*ido]); | |
880 RE(t4) = RE(cc[ac+ido]) - RE(cc[ac+2*ido]); | |
881 IM(t4) = IM(cc[ac+ido]) - IM(cc[ac+2*ido]); | |
882 RE(t5) = RE(cc[ac]) - RE(cc[ac+3*ido]); | |
883 IM(t5) = IM(cc[ac]) - IM(cc[ac+3*ido]); | |
884 | |
885 RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2) + RE(t3); | |
886 IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2) + IM(t3); | |
887 | |
888 RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); | |
889 IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); | |
890 RE(c3) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); | |
891 IM(c3) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); | |
892 | |
893 ComplexMult(&RE(c5), &RE(c4), | |
894 ti11, ti12, RE(t5), RE(t4)); | |
895 ComplexMult(&IM(c5), &IM(c4), | |
896 ti11, ti12, IM(t5), IM(t4)); | |
897 | |
898 IM(d2) = IM(c2) + RE(c5); | |
899 IM(d3) = IM(c3) + RE(c4); | |
900 RE(d4) = RE(c3) + IM(c4); | |
901 RE(d5) = RE(c2) + IM(c5); | |
902 RE(d2) = RE(c2) - IM(c5); | |
903 IM(d5) = IM(c2) - RE(c5); | |
904 RE(d3) = RE(c3) - IM(c4); | |
905 IM(d4) = IM(c3) - RE(c4); | |
10725 | 906 |
12527 | 907 #if 1 |
908 ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), | |
909 IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); | |
910 ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), | |
911 IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); | |
912 ComplexMult(&IM(ch[ah+3*l1*ido]), &RE(ch[ah+3*l1*ido]), | |
913 IM(d4), RE(d4), RE(wa3[i]), IM(wa3[i])); | |
914 ComplexMult(&IM(ch[ah+4*l1*ido]), &RE(ch[ah+4*l1*ido]), | |
915 IM(d5), RE(d5), RE(wa4[i]), IM(wa4[i])); | |
916 #else | |
917 ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), | |
918 RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); | |
919 ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), | |
920 RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); | |
921 ComplexMult(&RE(ch[ah+3*l1*ido]), &IM(ch[ah+3*l1*ido]), | |
922 RE(d4), IM(d4), RE(wa3[i]), IM(wa3[i])); | |
923 ComplexMult(&RE(ch[ah+4*l1*ido]), &IM(ch[ah+4*l1*ido]), | |
924 RE(d5), IM(d5), RE(wa4[i]), IM(wa4[i])); | |
925 #endif | |
926 } | |
927 } | |
928 } else { | |
929 for (k = 0; k < l1; k++) | |
930 { | |
931 for (i = 0; i < ido; i++) | |
932 { | |
933 ac = i + (k*5 + 1) * ido; | |
934 ah = i + k * ido; | |
935 | |
936 RE(t2) = RE(cc[ac]) + RE(cc[ac+3*ido]); | |
937 IM(t2) = IM(cc[ac]) + IM(cc[ac+3*ido]); | |
938 RE(t3) = RE(cc[ac+ido]) + RE(cc[ac+2*ido]); | |
939 IM(t3) = IM(cc[ac+ido]) + IM(cc[ac+2*ido]); | |
940 RE(t4) = RE(cc[ac+ido]) - RE(cc[ac+2*ido]); | |
941 IM(t4) = IM(cc[ac+ido]) - IM(cc[ac+2*ido]); | |
942 RE(t5) = RE(cc[ac]) - RE(cc[ac+3*ido]); | |
943 IM(t5) = IM(cc[ac]) - IM(cc[ac+3*ido]); | |
10725 | 944 |
12527 | 945 RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2) + RE(t3); |
946 IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2) + IM(t3); | |
947 | |
948 RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); | |
949 IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); | |
950 RE(c3) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); | |
951 IM(c3) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); | |
952 | |
953 ComplexMult(&RE(c4), &RE(c5), | |
954 ti12, ti11, RE(t5), RE(t4)); | |
955 ComplexMult(&IM(c4), &IM(c5), | |
956 ti12, ti12, IM(t5), IM(t4)); | |
957 | |
958 IM(d2) = IM(c2) - RE(c5); | |
959 IM(d3) = IM(c3) - RE(c4); | |
960 RE(d4) = RE(c3) - IM(c4); | |
961 RE(d5) = RE(c2) - IM(c5); | |
962 RE(d2) = RE(c2) + IM(c5); | |
963 IM(d5) = IM(c2) + RE(c5); | |
964 RE(d3) = RE(c3) + IM(c4); | |
965 IM(d4) = IM(c3) + RE(c4); | |
966 | |
967 #if 1 | |
968 ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), | |
969 RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); | |
970 ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), | |
971 RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); | |
972 ComplexMult(&RE(ch[ah+3*l1*ido]), &IM(ch[ah+3*l1*ido]), | |
973 RE(d4), IM(d4), RE(wa3[i]), IM(wa3[i])); | |
974 ComplexMult(&RE(ch[ah+4*l1*ido]), &IM(ch[ah+4*l1*ido]), | |
975 RE(d5), IM(d5), RE(wa4[i]), IM(wa4[i])); | |
976 #else | |
977 ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), | |
978 IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); | |
979 ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), | |
980 IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); | |
981 ComplexMult(&IM(ch[ah+3*l1*ido]), &RE(ch[ah+3*l1*ido]), | |
982 IM(d4), RE(d4), RE(wa3[i]), IM(wa3[i])); | |
983 ComplexMult(&IM(ch[ah+4*l1*ido]), &RE(ch[ah+4*l1*ido]), | |
984 IM(d5), RE(d5), RE(wa4[i]), IM(wa4[i])); | |
985 #endif | |
986 } | |
10725 | 987 } |
988 } | |
989 } | |
990 } | |
991 | |
992 | |
993 /*---------------------------------------------------------------------- | |
994 cfftf1, cfftf, cfftb, cffti1, cffti. Complex FFTs. | |
995 ----------------------------------------------------------------------*/ | |
996 | |
12527 | 997 #ifdef USE_SSE |
998 | |
999 #define CONV(A,B,C) ( (A<<2) | ((B & 0x1)<<1) | ((C==1)&0x1) ) | |
1000 | |
1001 static INLINE void cfftf1pos_sse(uint16_t n, complex_t *c, complex_t *ch, | |
1002 const uint16_t *ifac, const complex_t *wa, | |
1003 const int8_t isign) | |
1004 { | |
1005 uint16_t i; | |
1006 uint16_t k1, l1, l2; | |
1007 uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; | |
1008 | |
1009 nf = ifac[1]; | |
1010 na = 0; | |
1011 l1 = 1; | |
1012 iw = 0; | |
1013 | |
1014 for (k1 = 2; k1 <= nf+1; k1++) | |
1015 { | |
1016 ip = ifac[k1]; | |
1017 l2 = ip*l1; | |
1018 ido = n / l2; | |
1019 idl1 = ido*l1; | |
1020 | |
1021 ix2 = iw + ido; | |
1022 ix3 = ix2 + ido; | |
1023 ix4 = ix3 + ido; | |
1024 | |
1025 switch (CONV(ip,na,ido)) | |
1026 { | |
1027 case CONV(4,0,0): | |
1028 //passf4pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); | |
1029 passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); | |
1030 break; | |
1031 case CONV(4,0,1): | |
1032 passf4pos_sse((const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); | |
1033 break; | |
1034 case CONV(4,1,0): | |
1035 passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); | |
1036 //passf4pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); | |
1037 break; | |
1038 case CONV(4,1,1): | |
1039 passf4pos_sse((const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); | |
1040 break; | |
1041 case CONV(2,0,0): | |
1042 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1043 //passf2pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1044 break; | |
1045 case CONV(2,0,1): | |
1046 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1047 //passf2pos_sse((const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1048 break; | |
1049 case CONV(2,1,0): | |
1050 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1051 //passf2pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1052 break; | |
1053 case CONV(2,1,1): | |
1054 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1055 //passf2pos_sse((const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1056 break; | |
1057 case CONV(3,0,0): | |
1058 case CONV(3,0,1): | |
1059 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign); | |
1060 break; | |
1061 case CONV(3,1,0): | |
1062 case CONV(3,1,1): | |
1063 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign); | |
1064 break; | |
1065 case CONV(5,0,0): | |
1066 case CONV(5,0,1): | |
1067 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); | |
1068 break; | |
1069 case CONV(5,1,0): | |
1070 case CONV(5,1,1): | |
1071 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); | |
1072 break; | |
1073 } | |
1074 | |
1075 na = 1 - na; | |
1076 | |
1077 l1 = l2; | |
1078 iw += (ip-1) * ido; | |
1079 } | |
1080 | |
1081 if (na == 0) | |
1082 return; | |
1083 | |
1084 for (i = 0; i < n; i++) | |
1085 { | |
1086 RE(c[i]) = RE(ch[i]); | |
1087 IM(c[i]) = IM(ch[i]); | |
1088 } | |
1089 } | |
1090 #endif | |
1091 | |
1092 static INLINE void cfftf1pos(uint16_t n, complex_t *c, complex_t *ch, | |
1093 const uint16_t *ifac, const complex_t *wa, | |
1094 const int8_t isign) | |
10725 | 1095 { |
1096 uint16_t i; | |
1097 uint16_t k1, l1, l2; | |
1098 uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; | |
1099 | |
1100 nf = ifac[1]; | |
1101 na = 0; | |
1102 l1 = 1; | |
1103 iw = 0; | |
1104 | |
1105 for (k1 = 2; k1 <= nf+1; k1++) | |
1106 { | |
1107 ip = ifac[k1]; | |
1108 l2 = ip*l1; | |
1109 ido = n / l2; | |
1110 idl1 = ido*l1; | |
1111 | |
1112 switch (ip) | |
1113 { | |
10989 | 1114 case 4: |
1115 ix2 = iw + ido; | |
1116 ix3 = ix2 + ido; | |
1117 | |
1118 if (na == 0) | |
12527 | 1119 passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); |
10989 | 1120 else |
12527 | 1121 passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); |
10989 | 1122 |
1123 na = 1 - na; | |
1124 break; | |
10725 | 1125 case 2: |
1126 if (na == 0) | |
12527 | 1127 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); |
10725 | 1128 else |
12527 | 1129 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); |
10725 | 1130 |
1131 na = 1 - na; | |
1132 break; | |
1133 case 3: | |
1134 ix2 = iw + ido; | |
1135 | |
1136 if (na == 0) | |
12527 | 1137 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign); |
10725 | 1138 else |
12527 | 1139 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign); |
10725 | 1140 |
1141 na = 1 - na; | |
1142 break; | |
1143 case 5: | |
1144 ix2 = iw + ido; | |
1145 ix3 = ix2 + ido; | |
1146 ix4 = ix3 + ido; | |
1147 | |
1148 if (na == 0) | |
12527 | 1149 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); |
10725 | 1150 else |
12527 | 1151 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); |
1152 | |
1153 na = 1 - na; | |
1154 break; | |
1155 } | |
1156 | |
1157 l1 = l2; | |
1158 iw += (ip-1) * ido; | |
1159 } | |
1160 | |
1161 if (na == 0) | |
1162 return; | |
1163 | |
1164 for (i = 0; i < n; i++) | |
1165 { | |
1166 RE(c[i]) = RE(ch[i]); | |
1167 IM(c[i]) = IM(ch[i]); | |
1168 } | |
1169 } | |
1170 | |
1171 static INLINE void cfftf1neg(uint16_t n, complex_t *c, complex_t *ch, | |
1172 const uint16_t *ifac, const complex_t *wa, | |
1173 const int8_t isign) | |
1174 { | |
1175 uint16_t i; | |
1176 uint16_t k1, l1, l2; | |
1177 uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; | |
1178 | |
1179 nf = ifac[1]; | |
1180 na = 0; | |
1181 l1 = 1; | |
1182 iw = 0; | |
1183 | |
1184 for (k1 = 2; k1 <= nf+1; k1++) | |
1185 { | |
1186 ip = ifac[k1]; | |
1187 l2 = ip*l1; | |
1188 ido = n / l2; | |
1189 idl1 = ido*l1; | |
1190 | |
1191 switch (ip) | |
1192 { | |
1193 case 4: | |
1194 ix2 = iw + ido; | |
1195 ix3 = ix2 + ido; | |
1196 | |
1197 if (na == 0) | |
1198 passf4neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); | |
1199 else | |
1200 passf4neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); | |
1201 | |
1202 na = 1 - na; | |
1203 break; | |
1204 case 2: | |
1205 if (na == 0) | |
1206 passf2neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1207 else | |
1208 passf2neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1209 | |
1210 na = 1 - na; | |
1211 break; | |
1212 case 3: | |
1213 ix2 = iw + ido; | |
1214 | |
1215 if (na == 0) | |
1216 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign); | |
1217 else | |
1218 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign); | |
1219 | |
1220 na = 1 - na; | |
1221 break; | |
1222 case 5: | |
1223 ix2 = iw + ido; | |
1224 ix3 = ix2 + ido; | |
1225 ix4 = ix3 + ido; | |
1226 | |
1227 if (na == 0) | |
1228 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); | |
1229 else | |
1230 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); | |
10725 | 1231 |
1232 na = 1 - na; | |
1233 break; | |
1234 } | |
1235 | |
1236 l1 = l2; | |
1237 iw += (ip-1) * ido; | |
1238 } | |
1239 | |
1240 if (na == 0) | |
1241 return; | |
1242 | |
1243 for (i = 0; i < n; i++) | |
1244 { | |
1245 RE(c[i]) = RE(ch[i]); | |
1246 IM(c[i]) = IM(ch[i]); | |
1247 } | |
1248 } | |
1249 | |
1250 void cfftf(cfft_info *cfft, complex_t *c) | |
1251 { | |
12527 | 1252 cfftf1neg(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, -1); |
10725 | 1253 } |
1254 | |
1255 void cfftb(cfft_info *cfft, complex_t *c) | |
1256 { | |
12527 | 1257 cfftf1pos(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1); |
10725 | 1258 } |
1259 | |
12527 | 1260 #ifdef USE_SSE |
1261 void cfftb_sse(cfft_info *cfft, complex_t *c) | |
1262 { | |
1263 cfftf1pos_sse(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1); | |
1264 } | |
1265 #endif | |
1266 | |
10725 | 1267 static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac) |
1268 { | |
1269 static uint16_t ntryh[4] = {3, 4, 2, 5}; | |
1270 #ifndef FIXED_POINT | |
1271 real_t arg, argh, argld, fi; | |
1272 uint16_t ido, ipm; | |
1273 uint16_t i1, k1, l1, l2; | |
1274 uint16_t ld, ii, ip; | |
1275 #endif | |
12527 | 1276 uint16_t ntry = 0, i, j; |
10725 | 1277 uint16_t ib; |
1278 uint16_t nf, nl, nq, nr; | |
1279 | |
1280 nl = n; | |
1281 nf = 0; | |
1282 j = 0; | |
1283 | |
1284 startloop: | |
1285 j++; | |
1286 | |
1287 if (j <= 4) | |
1288 ntry = ntryh[j-1]; | |
1289 else | |
1290 ntry += 2; | |
1291 | |
1292 do | |
1293 { | |
1294 nq = nl / ntry; | |
1295 nr = nl - ntry*nq; | |
1296 | |
1297 if (nr != 0) | |
1298 goto startloop; | |
1299 | |
1300 nf++; | |
1301 ifac[nf+1] = ntry; | |
1302 nl = nq; | |
1303 | |
1304 if (ntry == 2 && nf != 1) | |
1305 { | |
1306 for (i = 2; i <= nf; i++) | |
1307 { | |
1308 ib = nf - i + 2; | |
1309 ifac[ib+1] = ifac[ib]; | |
1310 } | |
1311 ifac[2] = 2; | |
1312 } | |
1313 } while (nl != 1); | |
1314 | |
1315 ifac[0] = n; | |
1316 ifac[1] = nf; | |
1317 | |
1318 #ifndef FIXED_POINT | |
12527 | 1319 argh = (real_t)2.0*(real_t)M_PI / (real_t)n; |
10725 | 1320 i = 0; |
1321 l1 = 1; | |
1322 | |
1323 for (k1 = 1; k1 <= nf; k1++) | |
1324 { | |
1325 ip = ifac[k1+1]; | |
1326 ld = 0; | |
1327 l2 = l1*ip; | |
1328 ido = n / l2; | |
1329 ipm = ip - 1; | |
1330 | |
1331 for (j = 0; j < ipm; j++) | |
1332 { | |
1333 i1 = i; | |
1334 RE(wa[i]) = 1.0; | |
1335 IM(wa[i]) = 0.0; | |
1336 ld += l1; | |
1337 fi = 0; | |
1338 argld = ld*argh; | |
1339 | |
1340 for (ii = 0; ii < ido; ii++) | |
1341 { | |
1342 i++; | |
1343 fi++; | |
1344 arg = fi * argld; | |
10989 | 1345 RE(wa[i]) = (real_t)cos(arg); |
12527 | 1346 #if 1 |
10989 | 1347 IM(wa[i]) = (real_t)sin(arg); |
12527 | 1348 #else |
1349 IM(wa[i]) = (real_t)-sin(arg); | |
1350 #endif | |
10725 | 1351 } |
1352 | |
1353 if (ip > 5) | |
1354 { | |
1355 RE(wa[i1]) = RE(wa[i]); | |
1356 IM(wa[i1]) = IM(wa[i]); | |
1357 } | |
1358 } | |
1359 l1 = l2; | |
1360 } | |
1361 #endif | |
1362 } | |
1363 | |
1364 cfft_info *cffti(uint16_t n) | |
1365 { | |
12527 | 1366 cfft_info *cfft = (cfft_info*)faad_malloc(sizeof(cfft_info)); |
10725 | 1367 |
1368 cfft->n = n; | |
12527 | 1369 cfft->work = (complex_t*)faad_malloc(n*sizeof(complex_t)); |
10725 | 1370 |
1371 #ifndef FIXED_POINT | |
12527 | 1372 cfft->tab = (complex_t*)faad_malloc(n*sizeof(complex_t)); |
10725 | 1373 |
1374 cffti1(n, cfft->tab, cfft->ifac); | |
1375 #else | |
1376 cffti1(n, NULL, cfft->ifac); | |
1377 | |
1378 switch (n) | |
1379 { | |
1380 case 64: cfft->tab = cfft_tab_64; break; | |
1381 case 512: cfft->tab = cfft_tab_512; break; | |
1382 #ifdef LD_DEC | |
12527 | 1383 case 256: cfft->tab = cfft_tab_256; break; |
1384 #endif | |
1385 | |
1386 #ifdef ALLOW_SMALL_FRAMELENGTH | |
1387 case 60: cfft->tab = cfft_tab_60; break; | |
1388 case 480: cfft->tab = cfft_tab_480; break; | |
1389 #ifdef LD_DEC | |
10725 | 1390 case 240: cfft->tab = cfft_tab_240; break; |
12527 | 1391 #endif |
10725 | 1392 #endif |
1393 } | |
1394 #endif | |
1395 | |
1396 return cfft; | |
1397 } | |
1398 | |
1399 void cfftu(cfft_info *cfft) | |
1400 { | |
12527 | 1401 if (cfft->work) faad_free(cfft->work); |
10725 | 1402 #ifndef FIXED_POINT |
12527 | 1403 if (cfft->tab) faad_free(cfft->tab); |
10725 | 1404 #endif |
1405 | |
12527 | 1406 if (cfft) faad_free(cfft); |
10989 | 1407 } |
12527 | 1408 |