Mercurial > mplayer.hg
annotate libfaad2/cfft.c @ 16045:aea67dddbdb5
small fixes.
author | ods15 |
---|---|
date | Fri, 22 Jul 2005 08:50:49 +0000 |
parents | 2ae5ab4331ca |
children | 59b6fa5b4201 |
rev | line source |
---|---|
10725 | 1 /* |
2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding | |
12527 | 3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com |
4 ** | |
10725 | 5 ** This program is free software; you can redistribute it and/or modify |
6 ** it under the terms of the GNU General Public License as published by | |
7 ** the Free Software Foundation; either version 2 of the License, or | |
8 ** (at your option) any later version. | |
12527 | 9 ** |
10725 | 10 ** This program is distributed in the hope that it will be useful, |
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 ** GNU General Public License for more details. | |
12527 | 14 ** |
10725 | 15 ** You should have received a copy of the GNU General Public License |
12527 | 16 ** along with this program; if not, write to the Free Software |
10725 | 17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
18 ** | |
19 ** Any non-GPL usage of this software or parts of this software is strictly | |
20 ** forbidden. | |
21 ** | |
22 ** Commercial non-GPL licensing of this software is possible. | |
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. | |
24 ** | |
14727
2ae5ab4331ca
Remove modification notice from files that have not been locally modified.
diego
parents:
13453
diff
changeset
|
25 ** $Id: cfft.c,v 1.27 2004/06/30 12:45:55 menno Exp $ |
10725 | 26 **/ |
27 | |
28 /* | |
29 * Algorithmically based on Fortran-77 FFTPACK | |
30 * by Paul N. Swarztrauber(Version 4, 1985). | |
31 * | |
32 * Does even sized fft only | |
33 */ | |
34 | |
35 /* isign is +1 for backward and -1 for forward transforms */ | |
36 | |
37 #include "common.h" | |
38 #include "structs.h" | |
39 | |
40 #include <stdlib.h> | |
41 | |
42 #include "cfft.h" | |
43 #include "cfft_tab.h" | |
44 | |
45 | |
12527 | 46 /* static function declarations */ |
47 #ifdef USE_SSE | |
48 static void passf2pos_sse(const uint16_t l1, const complex_t *cc, | |
49 complex_t *ch, const complex_t *wa); | |
50 static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
51 complex_t *ch, const complex_t *wa); | |
52 static void passf4pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, | |
53 const complex_t *wa1, const complex_t *wa2, const complex_t *wa3); | |
54 #endif | |
55 static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
56 complex_t *ch, const complex_t *wa); | |
57 static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
58 complex_t *ch, const complex_t *wa); | |
59 static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
60 complex_t *ch, const complex_t *wa1, const complex_t *wa2, const int8_t isign); | |
61 static void passf4pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, | |
62 const complex_t *wa1, const complex_t *wa2, const complex_t *wa3); | |
63 static void passf4neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, | |
64 const complex_t *wa1, const complex_t *wa2, const complex_t *wa3); | |
65 static void passf5(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, | |
66 const complex_t *wa1, const complex_t *wa2, const complex_t *wa3, | |
67 const complex_t *wa4, const int8_t isign); | |
68 INLINE void cfftf1(uint16_t n, complex_t *c, complex_t *ch, | |
69 const uint16_t *ifac, const complex_t *wa, const int8_t isign); | |
70 static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac); | |
71 | |
72 | |
10725 | 73 /*---------------------------------------------------------------------- |
74 passf2, passf3, passf4, passf5. Complex FFT passes fwd and bwd. | |
75 ----------------------------------------------------------------------*/ | |
76 | |
12527 | 77 #if 0 //def USE_SSE |
78 static void passf2pos_sse(const uint16_t l1, const complex_t *cc, | |
79 complex_t *ch, const complex_t *wa) | |
80 { | |
81 uint16_t k, ah, ac; | |
82 | |
83 for (k = 0; k < l1; k++) | |
84 { | |
85 ah = 2*k; | |
86 ac = 4*k; | |
87 | |
88 RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); | |
89 IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); | |
90 | |
91 RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); | |
92 IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); | |
93 } | |
94 } | |
95 | |
96 static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
97 complex_t *ch, const complex_t *wa) | |
98 { | |
99 uint16_t i, k, ah, ac; | |
100 | |
101 for (k = 0; k < l1; k++) | |
102 { | |
103 ah = k*ido; | |
104 ac = 2*k*ido; | |
105 | |
106 for (i = 0; i < ido; i+=4) | |
107 { | |
108 __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14; | |
109 __m128 m15, m16, m17, m18, m19, m20, m21, m22, m23, m24; | |
110 __m128 w1, w2, w3, w4; | |
111 | |
112 m1 = _mm_load_ps(&RE(cc[ac+i])); | |
113 m2 = _mm_load_ps(&RE(cc[ac+ido+i])); | |
114 m5 = _mm_load_ps(&RE(cc[ac+i+2])); | |
115 m6 = _mm_load_ps(&RE(cc[ac+ido+i+2])); | |
116 w1 = _mm_load_ps(&RE(wa[i])); | |
117 w3 = _mm_load_ps(&RE(wa[i+2])); | |
118 | |
119 m3 = _mm_add_ps(m1, m2); | |
120 m15 = _mm_add_ps(m5, m6); | |
121 | |
122 m4 = _mm_sub_ps(m1, m2); | |
123 m16 = _mm_sub_ps(m5, m6); | |
124 | |
125 _mm_store_ps(&RE(ch[ah+i]), m3); | |
126 _mm_store_ps(&RE(ch[ah+i+2]), m15); | |
127 | |
128 | |
129 w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1)); | |
130 w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1)); | |
131 | |
132 m7 = _mm_mul_ps(m4, w1); | |
133 m17 = _mm_mul_ps(m16, w3); | |
134 m8 = _mm_mul_ps(m4, w2); | |
135 m18 = _mm_mul_ps(m16, w4); | |
136 | |
137 m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0)); | |
138 m19 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(2, 0, 2, 0)); | |
139 m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1)); | |
140 m20 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(3, 1, 3, 1)); | |
141 | |
142 m11 = _mm_add_ps(m9, m10); | |
143 m21 = _mm_add_ps(m19, m20); | |
144 m12 = _mm_sub_ps(m9, m10); | |
145 m22 = _mm_sub_ps(m19, m20); | |
146 | |
147 m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2)); | |
148 m23 = _mm_shuffle_ps(m21, m21, _MM_SHUFFLE(0, 0, 3, 2)); | |
149 | |
150 m14 = _mm_unpacklo_ps(m12, m13); | |
151 m24 = _mm_unpacklo_ps(m22, m23); | |
152 | |
153 _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14); | |
154 _mm_store_ps(&RE(ch[ah+i+2+l1*ido]), m24); | |
155 } | |
156 } | |
157 } | |
158 #endif | |
159 | |
160 static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
161 complex_t *ch, const complex_t *wa) | |
10725 | 162 { |
163 uint16_t i, k, ah, ac; | |
164 | |
165 if (ido == 1) | |
166 { | |
167 for (k = 0; k < l1; k++) | |
168 { | |
169 ah = 2*k; | |
170 ac = 4*k; | |
171 | |
12527 | 172 RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); |
173 RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); | |
174 IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); | |
175 IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); | |
176 } | |
177 } else { | |
178 for (k = 0; k < l1; k++) | |
179 { | |
180 ah = k*ido; | |
181 ac = 2*k*ido; | |
182 | |
183 for (i = 0; i < ido; i++) | |
184 { | |
185 complex_t t2; | |
186 | |
187 RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]); | |
188 RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]); | |
189 | |
190 IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]); | |
191 IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]); | |
192 | |
193 #if 1 | |
194 ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), | |
195 IM(t2), RE(t2), RE(wa[i]), IM(wa[i])); | |
196 #else | |
197 ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), | |
198 RE(t2), IM(t2), RE(wa[i]), IM(wa[i])); | |
199 #endif | |
200 } | |
201 } | |
202 } | |
203 } | |
204 | |
205 static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
206 complex_t *ch, const complex_t *wa) | |
207 { | |
208 uint16_t i, k, ah, ac; | |
209 | |
210 if (ido == 1) | |
211 { | |
212 for (k = 0; k < l1; k++) | |
213 { | |
214 ah = 2*k; | |
215 ac = 4*k; | |
216 | |
217 RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); | |
10725 | 218 RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); |
10989 | 219 IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); |
10725 | 220 IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); |
221 } | |
222 } else { | |
223 for (k = 0; k < l1; k++) | |
224 { | |
225 ah = k*ido; | |
226 ac = 2*k*ido; | |
227 | |
228 for (i = 0; i < ido; i++) | |
229 { | |
230 complex_t t2; | |
231 | |
10989 | 232 RE(ch[ah+i]) = RE(cc[ac+i]) + RE(cc[ac+i+ido]); |
233 RE(t2) = RE(cc[ac+i]) - RE(cc[ac+i+ido]); | |
10725 | 234 |
10989 | 235 IM(ch[ah+i]) = IM(cc[ac+i]) + IM(cc[ac+i+ido]); |
236 IM(t2) = IM(cc[ac+i]) - IM(cc[ac+i+ido]); | |
10725 | 237 |
12527 | 238 #if 1 |
239 ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), | |
240 RE(t2), IM(t2), RE(wa[i]), IM(wa[i])); | |
241 #else | |
242 ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), | |
243 IM(t2), RE(t2), RE(wa[i]), IM(wa[i])); | |
244 #endif | |
10725 | 245 } |
246 } | |
247 } | |
248 } | |
249 | |
250 | |
12527 | 251 static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc, |
252 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
253 const int8_t isign) | |
10725 | 254 { |
12527 | 255 static real_t taur = FRAC_CONST(-0.5); |
256 static real_t taui = FRAC_CONST(0.866025403784439); | |
10725 | 257 uint16_t i, k, ac, ah; |
258 complex_t c2, c3, d2, d3, t2; | |
259 | |
260 if (ido == 1) | |
261 { | |
12527 | 262 if (isign == 1) |
10725 | 263 { |
12527 | 264 for (k = 0; k < l1; k++) |
265 { | |
266 ac = 3*k+1; | |
267 ah = k; | |
10725 | 268 |
12527 | 269 RE(t2) = RE(cc[ac]) + RE(cc[ac+1]); |
270 IM(t2) = IM(cc[ac]) + IM(cc[ac+1]); | |
271 RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),taur); | |
272 IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),taur); | |
273 | |
274 RE(ch[ah]) = RE(cc[ac-1]) + RE(t2); | |
275 IM(ch[ah]) = IM(cc[ac-1]) + IM(t2); | |
276 | |
277 RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+1])), taui); | |
278 IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+1])), taui); | |
10725 | 279 |
12527 | 280 RE(ch[ah+l1]) = RE(c2) - IM(c3); |
281 IM(ch[ah+l1]) = IM(c2) + RE(c3); | |
282 RE(ch[ah+2*l1]) = RE(c2) + IM(c3); | |
283 IM(ch[ah+2*l1]) = IM(c2) - RE(c3); | |
284 } | |
285 } else { | |
286 for (k = 0; k < l1; k++) | |
287 { | |
288 ac = 3*k+1; | |
289 ah = k; | |
10725 | 290 |
12527 | 291 RE(t2) = RE(cc[ac]) + RE(cc[ac+1]); |
292 IM(t2) = IM(cc[ac]) + IM(cc[ac+1]); | |
293 RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),taur); | |
294 IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),taur); | |
295 | |
296 RE(ch[ah]) = RE(cc[ac-1]) + RE(t2); | |
297 IM(ch[ah]) = IM(cc[ac-1]) + IM(t2); | |
10725 | 298 |
12527 | 299 RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+1])), taui); |
300 IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+1])), taui); | |
301 | |
302 RE(ch[ah+l1]) = RE(c2) + IM(c3); | |
303 IM(ch[ah+l1]) = IM(c2) - RE(c3); | |
304 RE(ch[ah+2*l1]) = RE(c2) - IM(c3); | |
305 IM(ch[ah+2*l1]) = IM(c2) + RE(c3); | |
306 } | |
10725 | 307 } |
308 } else { | |
12527 | 309 if (isign == 1) |
10725 | 310 { |
12527 | 311 for (k = 0; k < l1; k++) |
10725 | 312 { |
12527 | 313 for (i = 0; i < ido; i++) |
314 { | |
315 ac = i + (3*k+1)*ido; | |
316 ah = i + k * ido; | |
317 | |
318 RE(t2) = RE(cc[ac]) + RE(cc[ac+ido]); | |
319 RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),taur); | |
320 IM(t2) = IM(cc[ac]) + IM(cc[ac+ido]); | |
321 IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),taur); | |
10725 | 322 |
12527 | 323 RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2); |
324 IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2); | |
325 | |
326 RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+ido])), taui); | |
327 IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+ido])), taui); | |
328 | |
329 RE(d2) = RE(c2) - IM(c3); | |
330 IM(d3) = IM(c2) - RE(c3); | |
331 RE(d3) = RE(c2) + IM(c3); | |
332 IM(d2) = IM(c2) + RE(c3); | |
10725 | 333 |
12527 | 334 #if 1 |
335 ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), | |
336 IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); | |
337 ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), | |
338 IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); | |
339 #else | |
340 ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), | |
341 RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); | |
342 ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), | |
343 RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); | |
344 #endif | |
345 } | |
346 } | |
347 } else { | |
348 for (k = 0; k < l1; k++) | |
349 { | |
350 for (i = 0; i < ido; i++) | |
351 { | |
352 ac = i + (3*k+1)*ido; | |
353 ah = i + k * ido; | |
10725 | 354 |
12527 | 355 RE(t2) = RE(cc[ac]) + RE(cc[ac+ido]); |
356 RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),taur); | |
357 IM(t2) = IM(cc[ac]) + IM(cc[ac+ido]); | |
358 IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),taur); | |
359 | |
360 RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2); | |
361 IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2); | |
362 | |
363 RE(c3) = MUL_F((RE(cc[ac]) - RE(cc[ac+ido])), taui); | |
364 IM(c3) = MUL_F((IM(cc[ac]) - IM(cc[ac+ido])), taui); | |
10725 | 365 |
12527 | 366 RE(d2) = RE(c2) + IM(c3); |
367 IM(d3) = IM(c2) + RE(c3); | |
368 RE(d3) = RE(c2) - IM(c3); | |
369 IM(d2) = IM(c2) - RE(c3); | |
370 | |
371 #if 1 | |
372 ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), | |
373 RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); | |
374 ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), | |
375 RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); | |
376 #else | |
377 ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), | |
378 IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); | |
379 ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), | |
380 IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); | |
381 #endif | |
382 } | |
10725 | 383 } |
384 } | |
385 } | |
386 } | |
387 | |
12527 | 388 #ifdef USE_SSE |
389 ALIGN static const int32_t negate[4] = { 0x0, 0x0, 0x0, 0x80000000 }; | |
10725 | 390 |
12527 | 391 __declspec(naked) static void passf4pos_sse(const uint16_t l1, const complex_t *cc, |
392 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
393 const complex_t *wa3) | |
394 { | |
395 __asm { | |
396 push ebx | |
397 mov ebx, esp | |
398 and esp, -16 | |
399 push edi | |
400 push esi | |
401 sub esp, 8 | |
402 movzx edi, WORD PTR [ebx+8] | |
403 | |
404 movaps xmm1, XMMWORD PTR negate | |
405 | |
406 test edi, edi | |
407 jle l1_is_zero | |
408 | |
409 lea esi, DWORD PTR [edi+edi] | |
410 add esi, esi | |
411 sub esi, edi | |
412 add esi, esi | |
413 add esi, esi | |
414 add esi, esi | |
415 mov eax, DWORD PTR [ebx+16] | |
416 add esi, eax | |
417 lea ecx, DWORD PTR [edi+edi] | |
418 add ecx, ecx | |
419 add ecx, ecx | |
420 add ecx, ecx | |
421 add ecx, eax | |
422 lea edx, DWORD PTR [edi+edi] | |
423 add edx, edx | |
424 add edx, edx | |
425 add edx, eax | |
426 xor eax, eax | |
427 mov DWORD PTR [esp], ebp | |
428 mov ebp, DWORD PTR [ebx+12] | |
429 | |
430 fftloop: | |
431 lea edi, DWORD PTR [eax+eax] | |
432 add edi, edi | |
433 movaps xmm2, XMMWORD PTR [ebp+edi*8] | |
434 movaps xmm0, XMMWORD PTR [ebp+edi*8+16] | |
435 movaps xmm7, XMMWORD PTR [ebp+edi*8+32] | |
436 movaps xmm5, XMMWORD PTR [ebp+edi*8+48] | |
437 movaps xmm6, xmm2 | |
438 addps xmm6, xmm0 | |
439 movaps xmm4, xmm1 | |
440 xorps xmm4, xmm7 | |
441 movaps xmm3, xmm1 | |
442 xorps xmm3, xmm5 | |
443 xorps xmm2, xmm1 | |
444 xorps xmm0, xmm1 | |
445 addps xmm7, xmm5 | |
446 subps xmm2, xmm0 | |
447 movaps xmm0, xmm6 | |
448 shufps xmm0, xmm7, 68 | |
449 subps xmm4, xmm3 | |
450 shufps xmm6, xmm7, 238 | |
451 movaps xmm5, xmm2 | |
452 shufps xmm5, xmm4, 68 | |
453 movaps xmm3, xmm0 | |
454 addps xmm3, xmm6 | |
455 shufps xmm2, xmm4, 187 | |
456 subps xmm0, xmm6 | |
457 movaps xmm4, xmm5 | |
458 addps xmm4, xmm2 | |
459 mov edi, DWORD PTR [ebx+16] | |
460 movaps XMMWORD PTR [edi+eax*8], xmm3 | |
461 subps xmm5, xmm2 | |
462 movaps XMMWORD PTR [edx+eax*8], xmm4 | |
463 movaps XMMWORD PTR [ecx+eax*8], xmm0 | |
464 movaps XMMWORD PTR [esi+eax*8], xmm5 | |
465 add eax, 2 | |
466 movzx eax, ax | |
467 movzx edi, WORD PTR [ebx+8] | |
468 cmp eax, edi | |
469 jl fftloop | |
470 | |
471 mov ebp, DWORD PTR [esp] | |
472 | |
473 l1_is_zero: | |
474 | |
475 add esp, 8 | |
476 pop esi | |
477 pop edi | |
478 mov esp, ebx | |
479 pop ebx | |
480 ret | |
481 } | |
482 } | |
483 #endif | |
484 | |
485 #if 0 | |
486 static void passf4pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
487 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
488 const complex_t *wa3) | |
489 { | |
490 uint16_t i, k, ac, ah; | |
491 | |
492 for (k = 0; k < l1; k++) | |
493 { | |
494 ac = 4*k*ido; | |
495 ah = k*ido; | |
496 | |
497 for (i = 0; i < ido; i+=2) | |
498 { | |
499 __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14, m15, m16; | |
500 __m128 n1, n2, n3, n4, n5, n6, n7, n8, n9, m17, m18, m19, m20, m21, m22, m23; | |
501 __m128 w1, w2, w3, w4, w5, w6, m24, m25, m26, m27, m28, m29, m30; | |
502 __m128 neg1 = _mm_set_ps(-1.0, 1.0, -1.0, 1.0); | |
503 | |
504 m1 = _mm_load_ps(&RE(cc[ac+i])); | |
505 m2 = _mm_load_ps(&RE(cc[ac+i+2*ido])); | |
506 m3 = _mm_add_ps(m1, m2); | |
507 m4 = _mm_sub_ps(m1, m2); | |
508 | |
509 n1 = _mm_load_ps(&RE(cc[ac+i+ido])); | |
510 n2 = _mm_load_ps(&RE(cc[ac+i+3*ido])); | |
511 n3 = _mm_add_ps(n1, n2); | |
512 | |
513 n4 = _mm_mul_ps(neg1, n1); | |
514 n5 = _mm_mul_ps(neg1, n2); | |
515 n6 = _mm_sub_ps(n4, n5); | |
516 | |
517 m5 = _mm_add_ps(m3, n3); | |
518 | |
519 n7 = _mm_shuffle_ps(n6, n6, _MM_SHUFFLE(2, 3, 0, 1)); | |
520 n8 = _mm_add_ps(m4, n7); | |
521 | |
522 m6 = _mm_sub_ps(m3, n3); | |
523 n9 = _mm_sub_ps(m4, n7); | |
524 | |
525 _mm_store_ps(&RE(ch[ah+i]), m5); | |
526 | |
527 #if 0 | |
528 static INLINE void ComplexMult(real_t *y1, real_t *y2, | |
529 real_t x1, real_t x2, real_t c1, real_t c2) | |
530 { | |
531 *y1 = MUL_F(x1, c1) + MUL_F(x2, c2); | |
532 *y2 = MUL_F(x2, c1) - MUL_F(x1, c2); | |
533 } | |
534 | |
535 m7.0 = RE(c2)*RE(wa1[i]) | |
536 m7.1 = IM(c2)*IM(wa1[i]) | |
537 m7.2 = RE(c6)*RE(wa1[i+1]) | |
538 m7.3 = IM(c6)*IM(wa1[i+1]) | |
539 | |
540 m8.0 = RE(c2)*IM(wa1[i]) | |
541 m8.1 = IM(c2)*RE(wa1[i]) | |
542 m8.2 = RE(c6)*IM(wa1[i+1]) | |
543 m8.3 = IM(c6)*RE(wa1[i+1]) | |
544 | |
545 RE(0) = m7.0 - m7.1 | |
546 IM(0) = m8.0 + m8.1 | |
547 RE(1) = m7.2 - m7.3 | |
548 IM(1) = m8.2 + m8.3 | |
549 | |
550 //// | |
551 RE(0) = RE(c2)*RE(wa1[i]) - IM(c2)*IM(wa1[i]) | |
552 IM(0) = RE(c2)*IM(wa1[i]) + IM(c2)*RE(wa1[i]) | |
553 RE(1) = RE(c6)*RE(wa1[i+1]) - IM(c6)*IM(wa1[i+1]) | |
554 IM(1) = RE(c6)*IM(wa1[i+1]) + IM(c6)*RE(wa1[i+1]) | |
555 #endif | |
556 | |
557 w1 = _mm_load_ps(&RE(wa1[i])); | |
558 w3 = _mm_load_ps(&RE(wa2[i])); | |
559 w5 = _mm_load_ps(&RE(wa3[i])); | |
560 | |
561 w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1)); | |
562 w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1)); | |
563 w6 = _mm_shuffle_ps(w5, w5, _MM_SHUFFLE(2, 3, 0, 1)); | |
564 | |
565 m7 = _mm_mul_ps(n8, w1); | |
566 m15 = _mm_mul_ps(m6, w3); | |
567 m23 = _mm_mul_ps(n9, w5); | |
568 m8 = _mm_mul_ps(n8, w2); | |
569 m16 = _mm_mul_ps(m6, w4); | |
570 m24 = _mm_mul_ps(n9, w6); | |
571 | |
572 m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0)); | |
573 m17 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(2, 0, 2, 0)); | |
574 m25 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(2, 0, 2, 0)); | |
575 m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1)); | |
576 m18 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(3, 1, 3, 1)); | |
577 m26 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(3, 1, 3, 1)); | |
578 | |
579 m11 = _mm_add_ps(m9, m10); | |
580 m19 = _mm_add_ps(m17, m18); | |
581 m27 = _mm_add_ps(m25, m26); | |
582 m12 = _mm_sub_ps(m9, m10); | |
583 m20 = _mm_sub_ps(m17, m18); | |
584 m28 = _mm_sub_ps(m25, m26); | |
585 | |
586 m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2)); | |
587 m21 = _mm_shuffle_ps(m19, m19, _MM_SHUFFLE(0, 0, 3, 2)); | |
588 m29 = _mm_shuffle_ps(m27, m27, _MM_SHUFFLE(0, 0, 3, 2)); | |
589 m14 = _mm_unpacklo_ps(m12, m13); | |
590 m22 = _mm_unpacklo_ps(m20, m21); | |
591 m30 = _mm_unpacklo_ps(m28, m29); | |
592 | |
593 _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14); | |
594 _mm_store_ps(&RE(ch[ah+i+2*l1*ido]), m22); | |
595 _mm_store_ps(&RE(ch[ah+i+3*l1*ido]), m30); | |
596 } | |
597 } | |
598 } | |
599 #endif | |
600 | |
601 static void passf4pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
602 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
603 const complex_t *wa3) | |
10725 | 604 { |
605 uint16_t i, k, ac, ah; | |
606 | |
607 if (ido == 1) | |
608 { | |
609 for (k = 0; k < l1; k++) | |
610 { | |
10989 | 611 complex_t t1, t2, t3, t4; |
612 | |
10725 | 613 ac = 4*k; |
614 ah = k; | |
615 | |
12527 | 616 RE(t2) = RE(cc[ac]) + RE(cc[ac+2]); |
617 RE(t1) = RE(cc[ac]) - RE(cc[ac+2]); | |
10989 | 618 IM(t2) = IM(cc[ac]) + IM(cc[ac+2]); |
12527 | 619 IM(t1) = IM(cc[ac]) - IM(cc[ac+2]); |
10725 | 620 RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]); |
10989 | 621 IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]); |
622 IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]); | |
10725 | 623 RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]); |
624 | |
12527 | 625 RE(ch[ah]) = RE(t2) + RE(t3); |
10725 | 626 RE(ch[ah+2*l1]) = RE(t2) - RE(t3); |
10989 | 627 |
628 IM(ch[ah]) = IM(t2) + IM(t3); | |
10725 | 629 IM(ch[ah+2*l1]) = IM(t2) - IM(t3); |
10989 | 630 |
12527 | 631 RE(ch[ah+l1]) = RE(t1) + RE(t4); |
632 RE(ch[ah+3*l1]) = RE(t1) - RE(t4); | |
10989 | 633 |
12527 | 634 IM(ch[ah+l1]) = IM(t1) + IM(t4); |
635 IM(ch[ah+3*l1]) = IM(t1) - IM(t4); | |
10725 | 636 } |
637 } else { | |
638 for (k = 0; k < l1; k++) | |
639 { | |
10989 | 640 ac = 4*k*ido; |
641 ah = k*ido; | |
642 | |
10725 | 643 for (i = 0; i < ido; i++) |
644 { | |
10989 | 645 complex_t c2, c3, c4, t1, t2, t3, t4; |
10725 | 646 |
10989 | 647 RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]); |
648 RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]); | |
649 IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]); | |
650 IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]); | |
651 RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]); | |
652 IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]); | |
653 IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]); | |
654 RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]); | |
10725 | 655 |
12527 | 656 RE(c2) = RE(t1) + RE(t4); |
657 RE(c4) = RE(t1) - RE(t4); | |
10989 | 658 |
12527 | 659 IM(c2) = IM(t1) + IM(t4); |
660 IM(c4) = IM(t1) - IM(t4); | |
10725 | 661 |
10989 | 662 RE(ch[ah+i]) = RE(t2) + RE(t3); |
12527 | 663 RE(c3) = RE(t2) - RE(t3); |
10989 | 664 |
665 IM(ch[ah+i]) = IM(t2) + IM(t3); | |
12527 | 666 IM(c3) = IM(t2) - IM(t3); |
10989 | 667 |
12527 | 668 #if 1 |
669 ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), | |
670 IM(c2), RE(c2), RE(wa1[i]), IM(wa1[i])); | |
671 ComplexMult(&IM(ch[ah+i+2*l1*ido]), &RE(ch[ah+i+2*l1*ido]), | |
672 IM(c3), RE(c3), RE(wa2[i]), IM(wa2[i])); | |
673 ComplexMult(&IM(ch[ah+i+3*l1*ido]), &RE(ch[ah+i+3*l1*ido]), | |
674 IM(c4), RE(c4), RE(wa3[i]), IM(wa3[i])); | |
675 #else | |
676 ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), | |
677 RE(c2), IM(c2), RE(wa1[i]), IM(wa1[i])); | |
678 ComplexMult(&RE(ch[ah+i+2*l1*ido]), &IM(ch[ah+i+2*l1*ido]), | |
679 RE(c3), IM(c3), RE(wa2[i]), IM(wa2[i])); | |
680 ComplexMult(&RE(ch[ah+i+3*l1*ido]), &IM(ch[ah+i+3*l1*ido]), | |
681 RE(c4), IM(c4), RE(wa3[i]), IM(wa3[i])); | |
682 #endif | |
10725 | 683 } |
684 } | |
685 } | |
686 } | |
687 | |
12527 | 688 static void passf4neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, |
689 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
690 const complex_t *wa3) | |
10725 | 691 { |
692 uint16_t i, k, ac, ah; | |
693 | |
694 if (ido == 1) | |
695 { | |
696 for (k = 0; k < l1; k++) | |
697 { | |
12527 | 698 complex_t t1, t2, t3, t4; |
699 | |
700 ac = 4*k; | |
10725 | 701 ah = k; |
702 | |
12527 | 703 RE(t2) = RE(cc[ac]) + RE(cc[ac+2]); |
704 RE(t1) = RE(cc[ac]) - RE(cc[ac+2]); | |
705 IM(t2) = IM(cc[ac]) + IM(cc[ac+2]); | |
706 IM(t1) = IM(cc[ac]) - IM(cc[ac+2]); | |
707 RE(t3) = RE(cc[ac+1]) + RE(cc[ac+3]); | |
708 IM(t4) = RE(cc[ac+1]) - RE(cc[ac+3]); | |
709 IM(t3) = IM(cc[ac+3]) + IM(cc[ac+1]); | |
710 RE(t4) = IM(cc[ac+3]) - IM(cc[ac+1]); | |
10725 | 711 |
12527 | 712 RE(ch[ah]) = RE(t2) + RE(t3); |
713 RE(ch[ah+2*l1]) = RE(t2) - RE(t3); | |
714 | |
715 IM(ch[ah]) = IM(t2) + IM(t3); | |
716 IM(ch[ah+2*l1]) = IM(t2) - IM(t3); | |
10725 | 717 |
12527 | 718 RE(ch[ah+l1]) = RE(t1) - RE(t4); |
719 RE(ch[ah+3*l1]) = RE(t1) + RE(t4); | |
720 | |
721 IM(ch[ah+l1]) = IM(t1) - IM(t4); | |
722 IM(ch[ah+3*l1]) = IM(t1) + IM(t4); | |
10725 | 723 } |
724 } else { | |
725 for (k = 0; k < l1; k++) | |
726 { | |
12527 | 727 ac = 4*k*ido; |
728 ah = k*ido; | |
729 | |
10725 | 730 for (i = 0; i < ido; i++) |
731 { | |
12527 | 732 complex_t c2, c3, c4, t1, t2, t3, t4; |
733 | |
734 RE(t2) = RE(cc[ac+i]) + RE(cc[ac+i+2*ido]); | |
735 RE(t1) = RE(cc[ac+i]) - RE(cc[ac+i+2*ido]); | |
736 IM(t2) = IM(cc[ac+i]) + IM(cc[ac+i+2*ido]); | |
737 IM(t1) = IM(cc[ac+i]) - IM(cc[ac+i+2*ido]); | |
738 RE(t3) = RE(cc[ac+i+ido]) + RE(cc[ac+i+3*ido]); | |
739 IM(t4) = RE(cc[ac+i+ido]) - RE(cc[ac+i+3*ido]); | |
740 IM(t3) = IM(cc[ac+i+3*ido]) + IM(cc[ac+i+ido]); | |
741 RE(t4) = IM(cc[ac+i+3*ido]) - IM(cc[ac+i+ido]); | |
742 | |
743 RE(c2) = RE(t1) - RE(t4); | |
744 RE(c4) = RE(t1) + RE(t4); | |
745 | |
746 IM(c2) = IM(t1) - IM(t4); | |
747 IM(c4) = IM(t1) + IM(t4); | |
748 | |
749 RE(ch[ah+i]) = RE(t2) + RE(t3); | |
750 RE(c3) = RE(t2) - RE(t3); | |
751 | |
752 IM(ch[ah+i]) = IM(t2) + IM(t3); | |
753 IM(c3) = IM(t2) - IM(t3); | |
754 | |
755 #if 1 | |
756 ComplexMult(&RE(ch[ah+i+l1*ido]), &IM(ch[ah+i+l1*ido]), | |
757 RE(c2), IM(c2), RE(wa1[i]), IM(wa1[i])); | |
758 ComplexMult(&RE(ch[ah+i+2*l1*ido]), &IM(ch[ah+i+2*l1*ido]), | |
759 RE(c3), IM(c3), RE(wa2[i]), IM(wa2[i])); | |
760 ComplexMult(&RE(ch[ah+i+3*l1*ido]), &IM(ch[ah+i+3*l1*ido]), | |
761 RE(c4), IM(c4), RE(wa3[i]), IM(wa3[i])); | |
762 #else | |
763 ComplexMult(&IM(ch[ah+i+l1*ido]), &RE(ch[ah+i+l1*ido]), | |
764 IM(c2), RE(c2), RE(wa1[i]), IM(wa1[i])); | |
765 ComplexMult(&IM(ch[ah+i+2*l1*ido]), &RE(ch[ah+i+2*l1*ido]), | |
766 IM(c3), RE(c3), RE(wa2[i]), IM(wa2[i])); | |
767 ComplexMult(&IM(ch[ah+i+3*l1*ido]), &RE(ch[ah+i+3*l1*ido]), | |
768 IM(c4), RE(c4), RE(wa3[i]), IM(wa3[i])); | |
769 #endif | |
770 } | |
771 } | |
772 } | |
773 } | |
774 | |
775 static void passf5(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
776 complex_t *ch, const complex_t *wa1, const complex_t *wa2, const complex_t *wa3, | |
777 const complex_t *wa4, const int8_t isign) | |
778 { | |
779 static real_t tr11 = FRAC_CONST(0.309016994374947); | |
780 static real_t ti11 = FRAC_CONST(0.951056516295154); | |
781 static real_t tr12 = FRAC_CONST(-0.809016994374947); | |
782 static real_t ti12 = FRAC_CONST(0.587785252292473); | |
783 uint16_t i, k, ac, ah; | |
784 complex_t c2, c3, c4, c5, d3, d4, d5, d2, t2, t3, t4, t5; | |
10725 | 785 |
12527 | 786 if (ido == 1) |
787 { | |
788 if (isign == 1) | |
789 { | |
790 for (k = 0; k < l1; k++) | |
791 { | |
792 ac = 5*k + 1; | |
793 ah = k; | |
794 | |
795 RE(t2) = RE(cc[ac]) + RE(cc[ac+3]); | |
796 IM(t2) = IM(cc[ac]) + IM(cc[ac+3]); | |
797 RE(t3) = RE(cc[ac+1]) + RE(cc[ac+2]); | |
798 IM(t3) = IM(cc[ac+1]) + IM(cc[ac+2]); | |
799 RE(t4) = RE(cc[ac+1]) - RE(cc[ac+2]); | |
800 IM(t4) = IM(cc[ac+1]) - IM(cc[ac+2]); | |
801 RE(t5) = RE(cc[ac]) - RE(cc[ac+3]); | |
802 IM(t5) = IM(cc[ac]) - IM(cc[ac+3]); | |
803 | |
804 RE(ch[ah]) = RE(cc[ac-1]) + RE(t2) + RE(t3); | |
805 IM(ch[ah]) = IM(cc[ac-1]) + IM(t2) + IM(t3); | |
806 | |
807 RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); | |
808 IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); | |
809 RE(c3) = RE(cc[ac-1]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); | |
810 IM(c3) = IM(cc[ac-1]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); | |
811 | |
812 ComplexMult(&RE(c5), &RE(c4), | |
813 ti11, ti12, RE(t5), RE(t4)); | |
814 ComplexMult(&IM(c5), &IM(c4), | |
815 ti11, ti12, IM(t5), IM(t4)); | |
10725 | 816 |
12527 | 817 RE(ch[ah+l1]) = RE(c2) - IM(c5); |
818 IM(ch[ah+l1]) = IM(c2) + RE(c5); | |
819 RE(ch[ah+2*l1]) = RE(c3) - IM(c4); | |
820 IM(ch[ah+2*l1]) = IM(c3) + RE(c4); | |
821 RE(ch[ah+3*l1]) = RE(c3) + IM(c4); | |
822 IM(ch[ah+3*l1]) = IM(c3) - RE(c4); | |
823 RE(ch[ah+4*l1]) = RE(c2) + IM(c5); | |
824 IM(ch[ah+4*l1]) = IM(c2) - RE(c5); | |
825 } | |
826 } else { | |
827 for (k = 0; k < l1; k++) | |
828 { | |
829 ac = 5*k + 1; | |
830 ah = k; | |
831 | |
832 RE(t2) = RE(cc[ac]) + RE(cc[ac+3]); | |
833 IM(t2) = IM(cc[ac]) + IM(cc[ac+3]); | |
834 RE(t3) = RE(cc[ac+1]) + RE(cc[ac+2]); | |
835 IM(t3) = IM(cc[ac+1]) + IM(cc[ac+2]); | |
836 RE(t4) = RE(cc[ac+1]) - RE(cc[ac+2]); | |
837 IM(t4) = IM(cc[ac+1]) - IM(cc[ac+2]); | |
838 RE(t5) = RE(cc[ac]) - RE(cc[ac+3]); | |
839 IM(t5) = IM(cc[ac]) - IM(cc[ac+3]); | |
840 | |
841 RE(ch[ah]) = RE(cc[ac-1]) + RE(t2) + RE(t3); | |
842 IM(ch[ah]) = IM(cc[ac-1]) + IM(t2) + IM(t3); | |
843 | |
844 RE(c2) = RE(cc[ac-1]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); | |
845 IM(c2) = IM(cc[ac-1]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); | |
846 RE(c3) = RE(cc[ac-1]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); | |
847 IM(c3) = IM(cc[ac-1]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); | |
848 | |
849 ComplexMult(&RE(c4), &RE(c5), | |
850 ti12, ti11, RE(t5), RE(t4)); | |
851 ComplexMult(&IM(c4), &IM(c5), | |
852 ti12, ti12, IM(t5), IM(t4)); | |
10725 | 853 |
12527 | 854 RE(ch[ah+l1]) = RE(c2) + IM(c5); |
855 IM(ch[ah+l1]) = IM(c2) - RE(c5); | |
856 RE(ch[ah+2*l1]) = RE(c3) + IM(c4); | |
857 IM(ch[ah+2*l1]) = IM(c3) - RE(c4); | |
858 RE(ch[ah+3*l1]) = RE(c3) - IM(c4); | |
859 IM(ch[ah+3*l1]) = IM(c3) + RE(c4); | |
860 RE(ch[ah+4*l1]) = RE(c2) - IM(c5); | |
861 IM(ch[ah+4*l1]) = IM(c2) + RE(c5); | |
862 } | |
863 } | |
864 } else { | |
865 if (isign == 1) | |
866 { | |
867 for (k = 0; k < l1; k++) | |
868 { | |
869 for (i = 0; i < ido; i++) | |
870 { | |
871 ac = i + (k*5 + 1) * ido; | |
872 ah = i + k * ido; | |
873 | |
874 RE(t2) = RE(cc[ac]) + RE(cc[ac+3*ido]); | |
875 IM(t2) = IM(cc[ac]) + IM(cc[ac+3*ido]); | |
876 RE(t3) = RE(cc[ac+ido]) + RE(cc[ac+2*ido]); | |
877 IM(t3) = IM(cc[ac+ido]) + IM(cc[ac+2*ido]); | |
878 RE(t4) = RE(cc[ac+ido]) - RE(cc[ac+2*ido]); | |
879 IM(t4) = IM(cc[ac+ido]) - IM(cc[ac+2*ido]); | |
880 RE(t5) = RE(cc[ac]) - RE(cc[ac+3*ido]); | |
881 IM(t5) = IM(cc[ac]) - IM(cc[ac+3*ido]); | |
882 | |
883 RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2) + RE(t3); | |
884 IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2) + IM(t3); | |
885 | |
886 RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); | |
887 IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); | |
888 RE(c3) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); | |
889 IM(c3) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); | |
890 | |
891 ComplexMult(&RE(c5), &RE(c4), | |
892 ti11, ti12, RE(t5), RE(t4)); | |
893 ComplexMult(&IM(c5), &IM(c4), | |
894 ti11, ti12, IM(t5), IM(t4)); | |
895 | |
896 IM(d2) = IM(c2) + RE(c5); | |
897 IM(d3) = IM(c3) + RE(c4); | |
898 RE(d4) = RE(c3) + IM(c4); | |
899 RE(d5) = RE(c2) + IM(c5); | |
900 RE(d2) = RE(c2) - IM(c5); | |
901 IM(d5) = IM(c2) - RE(c5); | |
902 RE(d3) = RE(c3) - IM(c4); | |
903 IM(d4) = IM(c3) - RE(c4); | |
10725 | 904 |
12527 | 905 #if 1 |
906 ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), | |
907 IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); | |
908 ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), | |
909 IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); | |
910 ComplexMult(&IM(ch[ah+3*l1*ido]), &RE(ch[ah+3*l1*ido]), | |
911 IM(d4), RE(d4), RE(wa3[i]), IM(wa3[i])); | |
912 ComplexMult(&IM(ch[ah+4*l1*ido]), &RE(ch[ah+4*l1*ido]), | |
913 IM(d5), RE(d5), RE(wa4[i]), IM(wa4[i])); | |
914 #else | |
915 ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), | |
916 RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); | |
917 ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), | |
918 RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); | |
919 ComplexMult(&RE(ch[ah+3*l1*ido]), &IM(ch[ah+3*l1*ido]), | |
920 RE(d4), IM(d4), RE(wa3[i]), IM(wa3[i])); | |
921 ComplexMult(&RE(ch[ah+4*l1*ido]), &IM(ch[ah+4*l1*ido]), | |
922 RE(d5), IM(d5), RE(wa4[i]), IM(wa4[i])); | |
923 #endif | |
924 } | |
925 } | |
926 } else { | |
927 for (k = 0; k < l1; k++) | |
928 { | |
929 for (i = 0; i < ido; i++) | |
930 { | |
931 ac = i + (k*5 + 1) * ido; | |
932 ah = i + k * ido; | |
933 | |
934 RE(t2) = RE(cc[ac]) + RE(cc[ac+3*ido]); | |
935 IM(t2) = IM(cc[ac]) + IM(cc[ac+3*ido]); | |
936 RE(t3) = RE(cc[ac+ido]) + RE(cc[ac+2*ido]); | |
937 IM(t3) = IM(cc[ac+ido]) + IM(cc[ac+2*ido]); | |
938 RE(t4) = RE(cc[ac+ido]) - RE(cc[ac+2*ido]); | |
939 IM(t4) = IM(cc[ac+ido]) - IM(cc[ac+2*ido]); | |
940 RE(t5) = RE(cc[ac]) - RE(cc[ac+3*ido]); | |
941 IM(t5) = IM(cc[ac]) - IM(cc[ac+3*ido]); | |
10725 | 942 |
12527 | 943 RE(ch[ah]) = RE(cc[ac-ido]) + RE(t2) + RE(t3); |
944 IM(ch[ah]) = IM(cc[ac-ido]) + IM(t2) + IM(t3); | |
945 | |
946 RE(c2) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr11) + MUL_F(RE(t3),tr12); | |
947 IM(c2) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr11) + MUL_F(IM(t3),tr12); | |
948 RE(c3) = RE(cc[ac-ido]) + MUL_F(RE(t2),tr12) + MUL_F(RE(t3),tr11); | |
949 IM(c3) = IM(cc[ac-ido]) + MUL_F(IM(t2),tr12) + MUL_F(IM(t3),tr11); | |
950 | |
951 ComplexMult(&RE(c4), &RE(c5), | |
952 ti12, ti11, RE(t5), RE(t4)); | |
953 ComplexMult(&IM(c4), &IM(c5), | |
954 ti12, ti12, IM(t5), IM(t4)); | |
955 | |
956 IM(d2) = IM(c2) - RE(c5); | |
957 IM(d3) = IM(c3) - RE(c4); | |
958 RE(d4) = RE(c3) - IM(c4); | |
959 RE(d5) = RE(c2) - IM(c5); | |
960 RE(d2) = RE(c2) + IM(c5); | |
961 IM(d5) = IM(c2) + RE(c5); | |
962 RE(d3) = RE(c3) + IM(c4); | |
963 IM(d4) = IM(c3) + RE(c4); | |
964 | |
965 #if 1 | |
966 ComplexMult(&RE(ch[ah+l1*ido]), &IM(ch[ah+l1*ido]), | |
967 RE(d2), IM(d2), RE(wa1[i]), IM(wa1[i])); | |
968 ComplexMult(&RE(ch[ah+2*l1*ido]), &IM(ch[ah+2*l1*ido]), | |
969 RE(d3), IM(d3), RE(wa2[i]), IM(wa2[i])); | |
970 ComplexMult(&RE(ch[ah+3*l1*ido]), &IM(ch[ah+3*l1*ido]), | |
971 RE(d4), IM(d4), RE(wa3[i]), IM(wa3[i])); | |
972 ComplexMult(&RE(ch[ah+4*l1*ido]), &IM(ch[ah+4*l1*ido]), | |
973 RE(d5), IM(d5), RE(wa4[i]), IM(wa4[i])); | |
974 #else | |
975 ComplexMult(&IM(ch[ah+l1*ido]), &RE(ch[ah+l1*ido]), | |
976 IM(d2), RE(d2), RE(wa1[i]), IM(wa1[i])); | |
977 ComplexMult(&IM(ch[ah+2*l1*ido]), &RE(ch[ah+2*l1*ido]), | |
978 IM(d3), RE(d3), RE(wa2[i]), IM(wa2[i])); | |
979 ComplexMult(&IM(ch[ah+3*l1*ido]), &RE(ch[ah+3*l1*ido]), | |
980 IM(d4), RE(d4), RE(wa3[i]), IM(wa3[i])); | |
981 ComplexMult(&IM(ch[ah+4*l1*ido]), &RE(ch[ah+4*l1*ido]), | |
982 IM(d5), RE(d5), RE(wa4[i]), IM(wa4[i])); | |
983 #endif | |
984 } | |
10725 | 985 } |
986 } | |
987 } | |
988 } | |
989 | |
990 | |
991 /*---------------------------------------------------------------------- | |
992 cfftf1, cfftf, cfftb, cffti1, cffti. Complex FFTs. | |
993 ----------------------------------------------------------------------*/ | |
994 | |
12527 | 995 #ifdef USE_SSE |
996 | |
997 #define CONV(A,B,C) ( (A<<2) | ((B & 0x1)<<1) | ((C==1)&0x1) ) | |
998 | |
999 static INLINE void cfftf1pos_sse(uint16_t n, complex_t *c, complex_t *ch, | |
1000 const uint16_t *ifac, const complex_t *wa, | |
1001 const int8_t isign) | |
1002 { | |
1003 uint16_t i; | |
1004 uint16_t k1, l1, l2; | |
1005 uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; | |
1006 | |
1007 nf = ifac[1]; | |
1008 na = 0; | |
1009 l1 = 1; | |
1010 iw = 0; | |
1011 | |
1012 for (k1 = 2; k1 <= nf+1; k1++) | |
1013 { | |
1014 ip = ifac[k1]; | |
1015 l2 = ip*l1; | |
1016 ido = n / l2; | |
1017 idl1 = ido*l1; | |
1018 | |
1019 ix2 = iw + ido; | |
1020 ix3 = ix2 + ido; | |
1021 ix4 = ix3 + ido; | |
1022 | |
1023 switch (CONV(ip,na,ido)) | |
1024 { | |
1025 case CONV(4,0,0): | |
1026 //passf4pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); | |
1027 passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); | |
1028 break; | |
1029 case CONV(4,0,1): | |
1030 passf4pos_sse((const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); | |
1031 break; | |
1032 case CONV(4,1,0): | |
1033 passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); | |
1034 //passf4pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); | |
1035 break; | |
1036 case CONV(4,1,1): | |
1037 passf4pos_sse((const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); | |
1038 break; | |
1039 case CONV(2,0,0): | |
1040 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1041 //passf2pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1042 break; | |
1043 case CONV(2,0,1): | |
1044 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1045 //passf2pos_sse((const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1046 break; | |
1047 case CONV(2,1,0): | |
1048 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1049 //passf2pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1050 break; | |
1051 case CONV(2,1,1): | |
1052 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1053 //passf2pos_sse((const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1054 break; | |
1055 case CONV(3,0,0): | |
1056 case CONV(3,0,1): | |
1057 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign); | |
1058 break; | |
1059 case CONV(3,1,0): | |
1060 case CONV(3,1,1): | |
1061 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign); | |
1062 break; | |
1063 case CONV(5,0,0): | |
1064 case CONV(5,0,1): | |
1065 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); | |
1066 break; | |
1067 case CONV(5,1,0): | |
1068 case CONV(5,1,1): | |
1069 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); | |
1070 break; | |
1071 } | |
1072 | |
1073 na = 1 - na; | |
1074 | |
1075 l1 = l2; | |
1076 iw += (ip-1) * ido; | |
1077 } | |
1078 | |
1079 if (na == 0) | |
1080 return; | |
1081 | |
1082 for (i = 0; i < n; i++) | |
1083 { | |
1084 RE(c[i]) = RE(ch[i]); | |
1085 IM(c[i]) = IM(ch[i]); | |
1086 } | |
1087 } | |
1088 #endif | |
1089 | |
1090 static INLINE void cfftf1pos(uint16_t n, complex_t *c, complex_t *ch, | |
1091 const uint16_t *ifac, const complex_t *wa, | |
1092 const int8_t isign) | |
10725 | 1093 { |
1094 uint16_t i; | |
1095 uint16_t k1, l1, l2; | |
1096 uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; | |
1097 | |
1098 nf = ifac[1]; | |
1099 na = 0; | |
1100 l1 = 1; | |
1101 iw = 0; | |
1102 | |
1103 for (k1 = 2; k1 <= nf+1; k1++) | |
1104 { | |
1105 ip = ifac[k1]; | |
1106 l2 = ip*l1; | |
1107 ido = n / l2; | |
1108 idl1 = ido*l1; | |
1109 | |
1110 switch (ip) | |
1111 { | |
10989 | 1112 case 4: |
1113 ix2 = iw + ido; | |
1114 ix3 = ix2 + ido; | |
1115 | |
1116 if (na == 0) | |
12527 | 1117 passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); |
10989 | 1118 else |
12527 | 1119 passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); |
10989 | 1120 |
1121 na = 1 - na; | |
1122 break; | |
10725 | 1123 case 2: |
1124 if (na == 0) | |
12527 | 1125 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); |
10725 | 1126 else |
12527 | 1127 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); |
10725 | 1128 |
1129 na = 1 - na; | |
1130 break; | |
1131 case 3: | |
1132 ix2 = iw + ido; | |
1133 | |
1134 if (na == 0) | |
12527 | 1135 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign); |
10725 | 1136 else |
12527 | 1137 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign); |
10725 | 1138 |
1139 na = 1 - na; | |
1140 break; | |
1141 case 5: | |
1142 ix2 = iw + ido; | |
1143 ix3 = ix2 + ido; | |
1144 ix4 = ix3 + ido; | |
1145 | |
1146 if (na == 0) | |
12527 | 1147 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); |
10725 | 1148 else |
12527 | 1149 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); |
1150 | |
1151 na = 1 - na; | |
1152 break; | |
1153 } | |
1154 | |
1155 l1 = l2; | |
1156 iw += (ip-1) * ido; | |
1157 } | |
1158 | |
1159 if (na == 0) | |
1160 return; | |
1161 | |
1162 for (i = 0; i < n; i++) | |
1163 { | |
1164 RE(c[i]) = RE(ch[i]); | |
1165 IM(c[i]) = IM(ch[i]); | |
1166 } | |
1167 } | |
1168 | |
1169 static INLINE void cfftf1neg(uint16_t n, complex_t *c, complex_t *ch, | |
1170 const uint16_t *ifac, const complex_t *wa, | |
1171 const int8_t isign) | |
1172 { | |
1173 uint16_t i; | |
1174 uint16_t k1, l1, l2; | |
1175 uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; | |
1176 | |
1177 nf = ifac[1]; | |
1178 na = 0; | |
1179 l1 = 1; | |
1180 iw = 0; | |
1181 | |
1182 for (k1 = 2; k1 <= nf+1; k1++) | |
1183 { | |
1184 ip = ifac[k1]; | |
1185 l2 = ip*l1; | |
1186 ido = n / l2; | |
1187 idl1 = ido*l1; | |
1188 | |
1189 switch (ip) | |
1190 { | |
1191 case 4: | |
1192 ix2 = iw + ido; | |
1193 ix3 = ix2 + ido; | |
1194 | |
1195 if (na == 0) | |
1196 passf4neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); | |
1197 else | |
1198 passf4neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); | |
1199 | |
1200 na = 1 - na; | |
1201 break; | |
1202 case 2: | |
1203 if (na == 0) | |
1204 passf2neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1205 else | |
1206 passf2neg((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1207 | |
1208 na = 1 - na; | |
1209 break; | |
1210 case 3: | |
1211 ix2 = iw + ido; | |
1212 | |
1213 if (na == 0) | |
1214 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign); | |
1215 else | |
1216 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign); | |
1217 | |
1218 na = 1 - na; | |
1219 break; | |
1220 case 5: | |
1221 ix2 = iw + ido; | |
1222 ix3 = ix2 + ido; | |
1223 ix4 = ix3 + ido; | |
1224 | |
1225 if (na == 0) | |
1226 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); | |
1227 else | |
1228 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); | |
10725 | 1229 |
1230 na = 1 - na; | |
1231 break; | |
1232 } | |
1233 | |
1234 l1 = l2; | |
1235 iw += (ip-1) * ido; | |
1236 } | |
1237 | |
1238 if (na == 0) | |
1239 return; | |
1240 | |
1241 for (i = 0; i < n; i++) | |
1242 { | |
1243 RE(c[i]) = RE(ch[i]); | |
1244 IM(c[i]) = IM(ch[i]); | |
1245 } | |
1246 } | |
1247 | |
1248 void cfftf(cfft_info *cfft, complex_t *c) | |
1249 { | |
12527 | 1250 cfftf1neg(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, -1); |
10725 | 1251 } |
1252 | |
1253 void cfftb(cfft_info *cfft, complex_t *c) | |
1254 { | |
12527 | 1255 cfftf1pos(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1); |
10725 | 1256 } |
1257 | |
12527 | 1258 #ifdef USE_SSE |
1259 void cfftb_sse(cfft_info *cfft, complex_t *c) | |
1260 { | |
1261 cfftf1pos_sse(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1); | |
1262 } | |
1263 #endif | |
1264 | |
10725 | 1265 static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac) |
1266 { | |
1267 static uint16_t ntryh[4] = {3, 4, 2, 5}; | |
1268 #ifndef FIXED_POINT | |
1269 real_t arg, argh, argld, fi; | |
1270 uint16_t ido, ipm; | |
1271 uint16_t i1, k1, l1, l2; | |
1272 uint16_t ld, ii, ip; | |
1273 #endif | |
12527 | 1274 uint16_t ntry = 0, i, j; |
10725 | 1275 uint16_t ib; |
1276 uint16_t nf, nl, nq, nr; | |
1277 | |
1278 nl = n; | |
1279 nf = 0; | |
1280 j = 0; | |
1281 | |
1282 startloop: | |
1283 j++; | |
1284 | |
1285 if (j <= 4) | |
1286 ntry = ntryh[j-1]; | |
1287 else | |
1288 ntry += 2; | |
1289 | |
1290 do | |
1291 { | |
1292 nq = nl / ntry; | |
1293 nr = nl - ntry*nq; | |
1294 | |
1295 if (nr != 0) | |
1296 goto startloop; | |
1297 | |
1298 nf++; | |
1299 ifac[nf+1] = ntry; | |
1300 nl = nq; | |
1301 | |
1302 if (ntry == 2 && nf != 1) | |
1303 { | |
1304 for (i = 2; i <= nf; i++) | |
1305 { | |
1306 ib = nf - i + 2; | |
1307 ifac[ib+1] = ifac[ib]; | |
1308 } | |
1309 ifac[2] = 2; | |
1310 } | |
1311 } while (nl != 1); | |
1312 | |
1313 ifac[0] = n; | |
1314 ifac[1] = nf; | |
1315 | |
1316 #ifndef FIXED_POINT | |
12527 | 1317 argh = (real_t)2.0*(real_t)M_PI / (real_t)n; |
10725 | 1318 i = 0; |
1319 l1 = 1; | |
1320 | |
1321 for (k1 = 1; k1 <= nf; k1++) | |
1322 { | |
1323 ip = ifac[k1+1]; | |
1324 ld = 0; | |
1325 l2 = l1*ip; | |
1326 ido = n / l2; | |
1327 ipm = ip - 1; | |
1328 | |
1329 for (j = 0; j < ipm; j++) | |
1330 { | |
1331 i1 = i; | |
1332 RE(wa[i]) = 1.0; | |
1333 IM(wa[i]) = 0.0; | |
1334 ld += l1; | |
1335 fi = 0; | |
1336 argld = ld*argh; | |
1337 | |
1338 for (ii = 0; ii < ido; ii++) | |
1339 { | |
1340 i++; | |
1341 fi++; | |
1342 arg = fi * argld; | |
10989 | 1343 RE(wa[i]) = (real_t)cos(arg); |
12527 | 1344 #if 1 |
10989 | 1345 IM(wa[i]) = (real_t)sin(arg); |
12527 | 1346 #else |
1347 IM(wa[i]) = (real_t)-sin(arg); | |
1348 #endif | |
10725 | 1349 } |
1350 | |
1351 if (ip > 5) | |
1352 { | |
1353 RE(wa[i1]) = RE(wa[i]); | |
1354 IM(wa[i1]) = IM(wa[i]); | |
1355 } | |
1356 } | |
1357 l1 = l2; | |
1358 } | |
1359 #endif | |
1360 } | |
1361 | |
1362 cfft_info *cffti(uint16_t n) | |
1363 { | |
12527 | 1364 cfft_info *cfft = (cfft_info*)faad_malloc(sizeof(cfft_info)); |
10725 | 1365 |
1366 cfft->n = n; | |
12527 | 1367 cfft->work = (complex_t*)faad_malloc(n*sizeof(complex_t)); |
10725 | 1368 |
1369 #ifndef FIXED_POINT | |
12527 | 1370 cfft->tab = (complex_t*)faad_malloc(n*sizeof(complex_t)); |
10725 | 1371 |
1372 cffti1(n, cfft->tab, cfft->ifac); | |
1373 #else | |
1374 cffti1(n, NULL, cfft->ifac); | |
1375 | |
1376 switch (n) | |
1377 { | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
1378 case 64: cfft->tab = (complex_t*)cfft_tab_64; break; |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
1379 case 512: cfft->tab = (complex_t*)cfft_tab_512; break; |
10725 | 1380 #ifdef LD_DEC |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
1381 case 256: cfft->tab = (complex_t*)cfft_tab_256; break; |
12527 | 1382 #endif |
1383 | |
1384 #ifdef ALLOW_SMALL_FRAMELENGTH | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
1385 case 60: cfft->tab = (complex_t*)cfft_tab_60; break; |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
1386 case 480: cfft->tab = (complex_t*)cfft_tab_480; break; |
12527 | 1387 #ifdef LD_DEC |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
1388 case 240: cfft->tab = (complex_t*)cfft_tab_240; break; |
12527 | 1389 #endif |
10725 | 1390 #endif |
1391 } | |
1392 #endif | |
1393 | |
1394 return cfft; | |
1395 } | |
1396 | |
1397 void cfftu(cfft_info *cfft) | |
1398 { | |
12527 | 1399 if (cfft->work) faad_free(cfft->work); |
10725 | 1400 #ifndef FIXED_POINT |
12527 | 1401 if (cfft->tab) faad_free(cfft->tab); |
10725 | 1402 #endif |
1403 | |
12527 | 1404 if (cfft) faad_free(cfft); |
10989 | 1405 } |
12527 | 1406 |