Mercurial > mplayer.hg
comparison libfaad2/cfft.c @ 18141:59b6fa5b4201
Update to faad2 cvs 20040915+MPlayer fixes
Patch by me and Emanuele Giaquinta
author | rtognimp |
---|---|
date | Tue, 18 Apr 2006 19:39:34 +0000 |
parents | 2ae5ab4331ca |
children |
comparison
equal
deleted
inserted
replaced
18140:e371c7e18402 | 18141:59b6fa5b4201 |
---|---|
20 ** forbidden. | 20 ** forbidden. |
21 ** | 21 ** |
22 ** Commercial non-GPL licensing of this software is possible. | 22 ** Commercial non-GPL licensing of this software is possible. |
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. | 23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. |
24 ** | 24 ** |
25 ** $Id: cfft.c,v 1.27 2004/06/30 12:45:55 menno Exp $ | 25 ** $Id: cfft.c,v 1.30 2004/09/08 09:43:11 gcp Exp $ |
26 **/ | 26 **/ |
27 | 27 |
28 /* | 28 /* |
29 * Algorithmically based on Fortran-77 FFTPACK | 29 * Algorithmically based on Fortran-77 FFTPACK |
30 * by Paul N. Swarztrauber(Version 4, 1985). | 30 * by Paul N. Swarztrauber(Version 4, 1985). |
42 #include "cfft.h" | 42 #include "cfft.h" |
43 #include "cfft_tab.h" | 43 #include "cfft_tab.h" |
44 | 44 |
45 | 45 |
46 /* static function declarations */ | 46 /* static function declarations */ |
47 #ifdef USE_SSE | |
48 static void passf2pos_sse(const uint16_t l1, const complex_t *cc, | |
49 complex_t *ch, const complex_t *wa); | |
50 static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
51 complex_t *ch, const complex_t *wa); | |
52 static void passf4pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, complex_t *ch, | |
53 const complex_t *wa1, const complex_t *wa2, const complex_t *wa3); | |
54 #endif | |
55 static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, | 47 static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, |
56 complex_t *ch, const complex_t *wa); | 48 complex_t *ch, const complex_t *wa); |
57 static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, | 49 static void passf2neg(const uint16_t ido, const uint16_t l1, const complex_t *cc, |
58 complex_t *ch, const complex_t *wa); | 50 complex_t *ch, const complex_t *wa); |
59 static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc, | 51 static void passf3(const uint16_t ido, const uint16_t l1, const complex_t *cc, |
72 | 64 |
73 /*---------------------------------------------------------------------- | 65 /*---------------------------------------------------------------------- |
74 passf2, passf3, passf4, passf5. Complex FFT passes fwd and bwd. | 66 passf2, passf3, passf4, passf5. Complex FFT passes fwd and bwd. |
75 ----------------------------------------------------------------------*/ | 67 ----------------------------------------------------------------------*/ |
76 | 68 |
77 #if 0 //def USE_SSE | |
78 static void passf2pos_sse(const uint16_t l1, const complex_t *cc, | |
79 complex_t *ch, const complex_t *wa) | |
80 { | |
81 uint16_t k, ah, ac; | |
82 | |
83 for (k = 0; k < l1; k++) | |
84 { | |
85 ah = 2*k; | |
86 ac = 4*k; | |
87 | |
88 RE(ch[ah]) = RE(cc[ac]) + RE(cc[ac+1]); | |
89 IM(ch[ah]) = IM(cc[ac]) + IM(cc[ac+1]); | |
90 | |
91 RE(ch[ah+l1]) = RE(cc[ac]) - RE(cc[ac+1]); | |
92 IM(ch[ah+l1]) = IM(cc[ac]) - IM(cc[ac+1]); | |
93 } | |
94 } | |
95 | |
96 static void passf2pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
97 complex_t *ch, const complex_t *wa) | |
98 { | |
99 uint16_t i, k, ah, ac; | |
100 | |
101 for (k = 0; k < l1; k++) | |
102 { | |
103 ah = k*ido; | |
104 ac = 2*k*ido; | |
105 | |
106 for (i = 0; i < ido; i+=4) | |
107 { | |
108 __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14; | |
109 __m128 m15, m16, m17, m18, m19, m20, m21, m22, m23, m24; | |
110 __m128 w1, w2, w3, w4; | |
111 | |
112 m1 = _mm_load_ps(&RE(cc[ac+i])); | |
113 m2 = _mm_load_ps(&RE(cc[ac+ido+i])); | |
114 m5 = _mm_load_ps(&RE(cc[ac+i+2])); | |
115 m6 = _mm_load_ps(&RE(cc[ac+ido+i+2])); | |
116 w1 = _mm_load_ps(&RE(wa[i])); | |
117 w3 = _mm_load_ps(&RE(wa[i+2])); | |
118 | |
119 m3 = _mm_add_ps(m1, m2); | |
120 m15 = _mm_add_ps(m5, m6); | |
121 | |
122 m4 = _mm_sub_ps(m1, m2); | |
123 m16 = _mm_sub_ps(m5, m6); | |
124 | |
125 _mm_store_ps(&RE(ch[ah+i]), m3); | |
126 _mm_store_ps(&RE(ch[ah+i+2]), m15); | |
127 | |
128 | |
129 w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1)); | |
130 w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1)); | |
131 | |
132 m7 = _mm_mul_ps(m4, w1); | |
133 m17 = _mm_mul_ps(m16, w3); | |
134 m8 = _mm_mul_ps(m4, w2); | |
135 m18 = _mm_mul_ps(m16, w4); | |
136 | |
137 m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0)); | |
138 m19 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(2, 0, 2, 0)); | |
139 m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1)); | |
140 m20 = _mm_shuffle_ps(m17, m18, _MM_SHUFFLE(3, 1, 3, 1)); | |
141 | |
142 m11 = _mm_add_ps(m9, m10); | |
143 m21 = _mm_add_ps(m19, m20); | |
144 m12 = _mm_sub_ps(m9, m10); | |
145 m22 = _mm_sub_ps(m19, m20); | |
146 | |
147 m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2)); | |
148 m23 = _mm_shuffle_ps(m21, m21, _MM_SHUFFLE(0, 0, 3, 2)); | |
149 | |
150 m14 = _mm_unpacklo_ps(m12, m13); | |
151 m24 = _mm_unpacklo_ps(m22, m23); | |
152 | |
153 _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14); | |
154 _mm_store_ps(&RE(ch[ah+i+2+l1*ido]), m24); | |
155 } | |
156 } | |
157 } | |
158 #endif | |
159 | |
160 static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, | 69 static void passf2pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, |
161 complex_t *ch, const complex_t *wa) | 70 complex_t *ch, const complex_t *wa) |
162 { | 71 { |
163 uint16_t i, k, ah, ac; | 72 uint16_t i, k, ah, ac; |
164 | 73 |
383 } | 292 } |
384 } | 293 } |
385 } | 294 } |
386 } | 295 } |
387 | 296 |
388 #ifdef USE_SSE | |
389 ALIGN static const int32_t negate[4] = { 0x0, 0x0, 0x0, 0x80000000 }; | |
390 | |
391 __declspec(naked) static void passf4pos_sse(const uint16_t l1, const complex_t *cc, | |
392 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
393 const complex_t *wa3) | |
394 { | |
395 __asm { | |
396 push ebx | |
397 mov ebx, esp | |
398 and esp, -16 | |
399 push edi | |
400 push esi | |
401 sub esp, 8 | |
402 movzx edi, WORD PTR [ebx+8] | |
403 | |
404 movaps xmm1, XMMWORD PTR negate | |
405 | |
406 test edi, edi | |
407 jle l1_is_zero | |
408 | |
409 lea esi, DWORD PTR [edi+edi] | |
410 add esi, esi | |
411 sub esi, edi | |
412 add esi, esi | |
413 add esi, esi | |
414 add esi, esi | |
415 mov eax, DWORD PTR [ebx+16] | |
416 add esi, eax | |
417 lea ecx, DWORD PTR [edi+edi] | |
418 add ecx, ecx | |
419 add ecx, ecx | |
420 add ecx, ecx | |
421 add ecx, eax | |
422 lea edx, DWORD PTR [edi+edi] | |
423 add edx, edx | |
424 add edx, edx | |
425 add edx, eax | |
426 xor eax, eax | |
427 mov DWORD PTR [esp], ebp | |
428 mov ebp, DWORD PTR [ebx+12] | |
429 | |
430 fftloop: | |
431 lea edi, DWORD PTR [eax+eax] | |
432 add edi, edi | |
433 movaps xmm2, XMMWORD PTR [ebp+edi*8] | |
434 movaps xmm0, XMMWORD PTR [ebp+edi*8+16] | |
435 movaps xmm7, XMMWORD PTR [ebp+edi*8+32] | |
436 movaps xmm5, XMMWORD PTR [ebp+edi*8+48] | |
437 movaps xmm6, xmm2 | |
438 addps xmm6, xmm0 | |
439 movaps xmm4, xmm1 | |
440 xorps xmm4, xmm7 | |
441 movaps xmm3, xmm1 | |
442 xorps xmm3, xmm5 | |
443 xorps xmm2, xmm1 | |
444 xorps xmm0, xmm1 | |
445 addps xmm7, xmm5 | |
446 subps xmm2, xmm0 | |
447 movaps xmm0, xmm6 | |
448 shufps xmm0, xmm7, 68 | |
449 subps xmm4, xmm3 | |
450 shufps xmm6, xmm7, 238 | |
451 movaps xmm5, xmm2 | |
452 shufps xmm5, xmm4, 68 | |
453 movaps xmm3, xmm0 | |
454 addps xmm3, xmm6 | |
455 shufps xmm2, xmm4, 187 | |
456 subps xmm0, xmm6 | |
457 movaps xmm4, xmm5 | |
458 addps xmm4, xmm2 | |
459 mov edi, DWORD PTR [ebx+16] | |
460 movaps XMMWORD PTR [edi+eax*8], xmm3 | |
461 subps xmm5, xmm2 | |
462 movaps XMMWORD PTR [edx+eax*8], xmm4 | |
463 movaps XMMWORD PTR [ecx+eax*8], xmm0 | |
464 movaps XMMWORD PTR [esi+eax*8], xmm5 | |
465 add eax, 2 | |
466 movzx eax, ax | |
467 movzx edi, WORD PTR [ebx+8] | |
468 cmp eax, edi | |
469 jl fftloop | |
470 | |
471 mov ebp, DWORD PTR [esp] | |
472 | |
473 l1_is_zero: | |
474 | |
475 add esp, 8 | |
476 pop esi | |
477 pop edi | |
478 mov esp, ebx | |
479 pop ebx | |
480 ret | |
481 } | |
482 } | |
483 #endif | |
484 | |
485 #if 0 | |
486 static void passf4pos_sse_ido(const uint16_t ido, const uint16_t l1, const complex_t *cc, | |
487 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | |
488 const complex_t *wa3) | |
489 { | |
490 uint16_t i, k, ac, ah; | |
491 | |
492 for (k = 0; k < l1; k++) | |
493 { | |
494 ac = 4*k*ido; | |
495 ah = k*ido; | |
496 | |
497 for (i = 0; i < ido; i+=2) | |
498 { | |
499 __m128 m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m12, m13, m14, m15, m16; | |
500 __m128 n1, n2, n3, n4, n5, n6, n7, n8, n9, m17, m18, m19, m20, m21, m22, m23; | |
501 __m128 w1, w2, w3, w4, w5, w6, m24, m25, m26, m27, m28, m29, m30; | |
502 __m128 neg1 = _mm_set_ps(-1.0, 1.0, -1.0, 1.0); | |
503 | |
504 m1 = _mm_load_ps(&RE(cc[ac+i])); | |
505 m2 = _mm_load_ps(&RE(cc[ac+i+2*ido])); | |
506 m3 = _mm_add_ps(m1, m2); | |
507 m4 = _mm_sub_ps(m1, m2); | |
508 | |
509 n1 = _mm_load_ps(&RE(cc[ac+i+ido])); | |
510 n2 = _mm_load_ps(&RE(cc[ac+i+3*ido])); | |
511 n3 = _mm_add_ps(n1, n2); | |
512 | |
513 n4 = _mm_mul_ps(neg1, n1); | |
514 n5 = _mm_mul_ps(neg1, n2); | |
515 n6 = _mm_sub_ps(n4, n5); | |
516 | |
517 m5 = _mm_add_ps(m3, n3); | |
518 | |
519 n7 = _mm_shuffle_ps(n6, n6, _MM_SHUFFLE(2, 3, 0, 1)); | |
520 n8 = _mm_add_ps(m4, n7); | |
521 | |
522 m6 = _mm_sub_ps(m3, n3); | |
523 n9 = _mm_sub_ps(m4, n7); | |
524 | |
525 _mm_store_ps(&RE(ch[ah+i]), m5); | |
526 | |
527 #if 0 | |
528 static INLINE void ComplexMult(real_t *y1, real_t *y2, | |
529 real_t x1, real_t x2, real_t c1, real_t c2) | |
530 { | |
531 *y1 = MUL_F(x1, c1) + MUL_F(x2, c2); | |
532 *y2 = MUL_F(x2, c1) - MUL_F(x1, c2); | |
533 } | |
534 | |
535 m7.0 = RE(c2)*RE(wa1[i]) | |
536 m7.1 = IM(c2)*IM(wa1[i]) | |
537 m7.2 = RE(c6)*RE(wa1[i+1]) | |
538 m7.3 = IM(c6)*IM(wa1[i+1]) | |
539 | |
540 m8.0 = RE(c2)*IM(wa1[i]) | |
541 m8.1 = IM(c2)*RE(wa1[i]) | |
542 m8.2 = RE(c6)*IM(wa1[i+1]) | |
543 m8.3 = IM(c6)*RE(wa1[i+1]) | |
544 | |
545 RE(0) = m7.0 - m7.1 | |
546 IM(0) = m8.0 + m8.1 | |
547 RE(1) = m7.2 - m7.3 | |
548 IM(1) = m8.2 + m8.3 | |
549 | |
550 //// | |
551 RE(0) = RE(c2)*RE(wa1[i]) - IM(c2)*IM(wa1[i]) | |
552 IM(0) = RE(c2)*IM(wa1[i]) + IM(c2)*RE(wa1[i]) | |
553 RE(1) = RE(c6)*RE(wa1[i+1]) - IM(c6)*IM(wa1[i+1]) | |
554 IM(1) = RE(c6)*IM(wa1[i+1]) + IM(c6)*RE(wa1[i+1]) | |
555 #endif | |
556 | |
557 w1 = _mm_load_ps(&RE(wa1[i])); | |
558 w3 = _mm_load_ps(&RE(wa2[i])); | |
559 w5 = _mm_load_ps(&RE(wa3[i])); | |
560 | |
561 w2 = _mm_shuffle_ps(w1, w1, _MM_SHUFFLE(2, 3, 0, 1)); | |
562 w4 = _mm_shuffle_ps(w3, w3, _MM_SHUFFLE(2, 3, 0, 1)); | |
563 w6 = _mm_shuffle_ps(w5, w5, _MM_SHUFFLE(2, 3, 0, 1)); | |
564 | |
565 m7 = _mm_mul_ps(n8, w1); | |
566 m15 = _mm_mul_ps(m6, w3); | |
567 m23 = _mm_mul_ps(n9, w5); | |
568 m8 = _mm_mul_ps(n8, w2); | |
569 m16 = _mm_mul_ps(m6, w4); | |
570 m24 = _mm_mul_ps(n9, w6); | |
571 | |
572 m9 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(2, 0, 2, 0)); | |
573 m17 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(2, 0, 2, 0)); | |
574 m25 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(2, 0, 2, 0)); | |
575 m10 = _mm_shuffle_ps(m7, m8, _MM_SHUFFLE(3, 1, 3, 1)); | |
576 m18 = _mm_shuffle_ps(m15, m16, _MM_SHUFFLE(3, 1, 3, 1)); | |
577 m26 = _mm_shuffle_ps(m23, m24, _MM_SHUFFLE(3, 1, 3, 1)); | |
578 | |
579 m11 = _mm_add_ps(m9, m10); | |
580 m19 = _mm_add_ps(m17, m18); | |
581 m27 = _mm_add_ps(m25, m26); | |
582 m12 = _mm_sub_ps(m9, m10); | |
583 m20 = _mm_sub_ps(m17, m18); | |
584 m28 = _mm_sub_ps(m25, m26); | |
585 | |
586 m13 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(0, 0, 3, 2)); | |
587 m21 = _mm_shuffle_ps(m19, m19, _MM_SHUFFLE(0, 0, 3, 2)); | |
588 m29 = _mm_shuffle_ps(m27, m27, _MM_SHUFFLE(0, 0, 3, 2)); | |
589 m14 = _mm_unpacklo_ps(m12, m13); | |
590 m22 = _mm_unpacklo_ps(m20, m21); | |
591 m30 = _mm_unpacklo_ps(m28, m29); | |
592 | |
593 _mm_store_ps(&RE(ch[ah+i+l1*ido]), m14); | |
594 _mm_store_ps(&RE(ch[ah+i+2*l1*ido]), m22); | |
595 _mm_store_ps(&RE(ch[ah+i+3*l1*ido]), m30); | |
596 } | |
597 } | |
598 } | |
599 #endif | |
600 | 297 |
601 static void passf4pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, | 298 static void passf4pos(const uint16_t ido, const uint16_t l1, const complex_t *cc, |
602 complex_t *ch, const complex_t *wa1, const complex_t *wa2, | 299 complex_t *ch, const complex_t *wa1, const complex_t *wa2, |
603 const complex_t *wa3) | 300 const complex_t *wa3) |
604 { | 301 { |
990 | 687 |
991 /*---------------------------------------------------------------------- | 688 /*---------------------------------------------------------------------- |
992 cfftf1, cfftf, cfftb, cffti1, cffti. Complex FFTs. | 689 cfftf1, cfftf, cfftb, cffti1, cffti. Complex FFTs. |
993 ----------------------------------------------------------------------*/ | 690 ----------------------------------------------------------------------*/ |
994 | 691 |
995 #ifdef USE_SSE | |
996 | |
997 #define CONV(A,B,C) ( (A<<2) | ((B & 0x1)<<1) | ((C==1)&0x1) ) | |
998 | |
999 static INLINE void cfftf1pos_sse(uint16_t n, complex_t *c, complex_t *ch, | |
1000 const uint16_t *ifac, const complex_t *wa, | |
1001 const int8_t isign) | |
1002 { | |
1003 uint16_t i; | |
1004 uint16_t k1, l1, l2; | |
1005 uint16_t na, nf, ip, iw, ix2, ix3, ix4, ido, idl1; | |
1006 | |
1007 nf = ifac[1]; | |
1008 na = 0; | |
1009 l1 = 1; | |
1010 iw = 0; | |
1011 | |
1012 for (k1 = 2; k1 <= nf+1; k1++) | |
1013 { | |
1014 ip = ifac[k1]; | |
1015 l2 = ip*l1; | |
1016 ido = n / l2; | |
1017 idl1 = ido*l1; | |
1018 | |
1019 ix2 = iw + ido; | |
1020 ix3 = ix2 + ido; | |
1021 ix4 = ix3 + ido; | |
1022 | |
1023 switch (CONV(ip,na,ido)) | |
1024 { | |
1025 case CONV(4,0,0): | |
1026 //passf4pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); | |
1027 passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); | |
1028 break; | |
1029 case CONV(4,0,1): | |
1030 passf4pos_sse((const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3]); | |
1031 break; | |
1032 case CONV(4,1,0): | |
1033 passf4pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); | |
1034 //passf4pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); | |
1035 break; | |
1036 case CONV(4,1,1): | |
1037 passf4pos_sse((const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3]); | |
1038 break; | |
1039 case CONV(2,0,0): | |
1040 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1041 //passf2pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1042 break; | |
1043 case CONV(2,0,1): | |
1044 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1045 //passf2pos_sse((const uint16_t)l1, (const complex_t*)c, ch, &wa[iw]); | |
1046 break; | |
1047 case CONV(2,1,0): | |
1048 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1049 //passf2pos_sse_ido((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1050 break; | |
1051 case CONV(2,1,1): | |
1052 passf2pos((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1053 //passf2pos_sse((const uint16_t)l1, (const complex_t*)ch, c, &wa[iw]); | |
1054 break; | |
1055 case CONV(3,0,0): | |
1056 case CONV(3,0,1): | |
1057 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], isign); | |
1058 break; | |
1059 case CONV(3,1,0): | |
1060 case CONV(3,1,1): | |
1061 passf3((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], isign); | |
1062 break; | |
1063 case CONV(5,0,0): | |
1064 case CONV(5,0,1): | |
1065 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)c, ch, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); | |
1066 break; | |
1067 case CONV(5,1,0): | |
1068 case CONV(5,1,1): | |
1069 passf5((const uint16_t)ido, (const uint16_t)l1, (const complex_t*)ch, c, &wa[iw], &wa[ix2], &wa[ix3], &wa[ix4], isign); | |
1070 break; | |
1071 } | |
1072 | |
1073 na = 1 - na; | |
1074 | |
1075 l1 = l2; | |
1076 iw += (ip-1) * ido; | |
1077 } | |
1078 | |
1079 if (na == 0) | |
1080 return; | |
1081 | |
1082 for (i = 0; i < n; i++) | |
1083 { | |
1084 RE(c[i]) = RE(ch[i]); | |
1085 IM(c[i]) = IM(ch[i]); | |
1086 } | |
1087 } | |
1088 #endif | |
1089 | |
1090 static INLINE void cfftf1pos(uint16_t n, complex_t *c, complex_t *ch, | 692 static INLINE void cfftf1pos(uint16_t n, complex_t *c, complex_t *ch, |
1091 const uint16_t *ifac, const complex_t *wa, | 693 const uint16_t *ifac, const complex_t *wa, |
1092 const int8_t isign) | 694 const int8_t isign) |
1093 { | 695 { |
1094 uint16_t i; | 696 uint16_t i; |
1252 | 854 |
1253 void cfftb(cfft_info *cfft, complex_t *c) | 855 void cfftb(cfft_info *cfft, complex_t *c) |
1254 { | 856 { |
1255 cfftf1pos(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1); | 857 cfftf1pos(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1); |
1256 } | 858 } |
1257 | |
1258 #ifdef USE_SSE | |
1259 void cfftb_sse(cfft_info *cfft, complex_t *c) | |
1260 { | |
1261 cfftf1pos_sse(cfft->n, c, cfft->work, (const uint16_t*)cfft->ifac, (const complex_t*)cfft->tab, +1); | |
1262 } | |
1263 #endif | |
1264 | 859 |
1265 static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac) | 860 static void cffti1(uint16_t n, complex_t *wa, uint16_t *ifac) |
1266 { | 861 { |
1267 static uint16_t ntryh[4] = {3, 4, 2, 5}; | 862 static uint16_t ntryh[4] = {3, 4, 2, 5}; |
1268 #ifndef FIXED_POINT | 863 #ifndef FIXED_POINT |
1386 case 480: cfft->tab = (complex_t*)cfft_tab_480; break; | 981 case 480: cfft->tab = (complex_t*)cfft_tab_480; break; |
1387 #ifdef LD_DEC | 982 #ifdef LD_DEC |
1388 case 240: cfft->tab = (complex_t*)cfft_tab_240; break; | 983 case 240: cfft->tab = (complex_t*)cfft_tab_240; break; |
1389 #endif | 984 #endif |
1390 #endif | 985 #endif |
986 case 128: cfft->tab = (complex_t*)cfft_tab_128; break; | |
1391 } | 987 } |
1392 #endif | 988 #endif |
1393 | 989 |
1394 return cfft; | 990 return cfft; |
1395 } | 991 } |