comparison src/wma/libffwma/fft.c @ 12:3da1b8942b8b trunk

[svn] - remove src/Input src/Output src/Effect src/General src/Visualization src/Container
author nenolod
date Mon, 18 Sep 2006 03:14:20 -0700
parents src/Input/wma/libffwma/fft.c@13389e613d67
children 69f3260e5ca7
comparison
equal deleted inserted replaced
11:cff1d04026ae 12:3da1b8942b8b
1 /*
2 * FFT/IFFT transforms
3 * Copyright (c) 2002 Fabrice Bellard.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20 /**
21 * @file fft.c
22 * FFT/IFFT transforms.
23 */
24
25 #include "dsputil.h"
26
27 #ifdef HAVE_ALTIVEC
28
29 #ifdef HAVE_ALTIVEC_H
30 #include <altivec.h>
31 #endif
32
33 #ifdef CONFIG_DARWIN
34 #include <sys/sysctl.h>
35 #else /* CONFIG_DARWIN */
36 #include <signal.h>
37 #include <setjmp.h>
38
39 static sigjmp_buf jmpbuf;
40 static volatile sig_atomic_t canjump = 0;
41
42 static void sigill_handler (int sig)
43 {
44 if (!canjump) {
45 signal (sig, SIG_DFL);
46 raise (sig);
47 }
48
49 canjump = 0;
50 siglongjmp (jmpbuf, 1);
51 }
52 #endif /* CONFIG_DARWIN */
53
54
55 #define WORD_0 0x00,0x01,0x02,0x03
56 #define WORD_1 0x04,0x05,0x06,0x07
57 #define WORD_2 0x08,0x09,0x0a,0x0b
58 #define WORD_3 0x0c,0x0d,0x0e,0x0f
59 #define WORD_s0 0x10,0x11,0x12,0x13
60 #define WORD_s1 0x14,0x15,0x16,0x17
61 #define WORD_s2 0x18,0x19,0x1a,0x1b
62 #define WORD_s3 0x1c,0x1d,0x1e,0x1f
63
64 #ifdef CONFIG_DARWIN
65 #define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d)
66 #else
67 #define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d}
68 #endif
69
70 // vcprmle is used to keep the same index as in the SSE version.
71 // it's the same as vcprm, with the index inversed
72 // ('le' is Little Endian)
73 #define vcprmle(a,b,c,d) vcprm(d,c,b,a)
74
75 // used to build inverse/identity vectors (vcii)
76 // n is _n_egative, p is _p_ositive
77 #define FLOAT_n -1.
78 #define FLOAT_p 1.
79
80
81 #ifdef CONFIG_DARWIN
82 #define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d)
83 #else
84 #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d}
85 #endif
86
87 int has_altivec(void)
88 {
89 #ifdef CONFIG_DARWIN
90 int sels[2] = {CTL_HW, HW_VECTORUNIT};
91 int has_vu = 0;
92 size_t len = sizeof(has_vu);
93 int err;
94
95 err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
96
97 if (err == 0) return (has_vu != 0);
98 #else /* CONFIG_DARWIN */
99 /* no Darwin, do it the brute-force way */
100 /* this is borrowed from the libmpeg2 library */
101 {
102 signal (SIGILL, sigill_handler);
103 if (sigsetjmp (jmpbuf, 1)) {
104 signal (SIGILL, SIG_DFL);
105 } else {
106 canjump = 1;
107
108 asm volatile ("mtspr 256, %0\n\t"
109 "vand %%v0, %%v0, %%v0"
110 :
111 : "r" (-1));
112
113 signal (SIGILL, SIG_DFL);
114 return 1;
115 }
116 }
117 #endif /* CONFIG_DARWIN */
118 return 0;
119 }
120
121
122 void fft_calc_altivec(FFTContext *s, FFTComplex *z)
123 {
124 #ifdef CONFIG_DARWIN
125 register const vector float vczero = (const vector float)(0.);
126 #else
127 register const vector float vczero = (const vector float){0.,0.,0.,0.};
128 #endif
129
130 int ln = s->nbits;
131 int j, np, np2;
132 int nblocks, nloops;
133 register FFTComplex *p, *q;
134 FFTComplex *cptr, *cptr1;
135 int k;
136
137 np = 1 << ln;
138
139 {
140 vector float *r, a, b, a1, c1, c2;
141
142 r = (vector float *)&z[0];
143
144 c1 = vcii(p,p,n,n);
145
146 if (s->inverse)
147 {
148 c2 = vcii(p,p,n,p);
149 }
150 else
151 {
152 c2 = vcii(p,p,p,n);
153 }
154
155 j = (np >> 2);
156 do {
157 a = vec_ld(0, r);
158 a1 = vec_ld(sizeof(vector float), r);
159
160 b = vec_perm(a,a,vcprmle(1,0,3,2));
161 a = vec_madd(a,c1,b);
162 /* do the pass 0 butterfly */
163
164 b = vec_perm(a1,a1,vcprmle(1,0,3,2));
165 b = vec_madd(a1,c1,b);
166 /* do the pass 0 butterfly */
167
168 /* multiply third by -i */
169 b = vec_perm(b,b,vcprmle(2,3,1,0));
170
171 /* do the pass 1 butterfly */
172 vec_st(vec_madd(b,c2,a), 0, r);
173 vec_st(vec_nmsub(b,c2,a), sizeof(vector float), r);
174
175 r += 2;
176 } while (--j != 0);
177 }
178 /* pass 2 .. ln-1 */
179
180 nblocks = np >> 3;
181 nloops = 1 << 2;
182 np2 = np >> 1;
183
184 cptr1 = s->exptab1;
185 do {
186 p = z;
187 q = z + nloops;
188 j = nblocks;
189 do {
190 cptr = cptr1;
191 k = nloops >> 1;
192 do {
193 vector float a,b,c,t1;
194
195 a = vec_ld(0, (float*)p);
196 b = vec_ld(0, (float*)q);
197
198 /* complex mul */
199 c = vec_ld(0, (float*)cptr);
200 /* cre*re cim*re */
201 t1 = vec_madd(c, vec_perm(b,b,vcprmle(2,2,0,0)),vczero);
202 c = vec_ld(sizeof(vector float), (float*)cptr);
203 /* -cim*im cre*im */
204 b = vec_madd(c, vec_perm(b,b,vcprmle(3,3,1,1)),t1);
205
206 /* butterfly */
207 vec_st(vec_add(a,b), 0, (float*)p);
208 vec_st(vec_sub(a,b), 0, (float*)q);
209
210 p += 2;
211 q += 2;
212 cptr += 4;
213 } while (--k);
214
215 p += nloops;
216 q += nloops;
217 } while (--j);
218 cptr1 += nloops * 2;
219 nblocks = nblocks >> 1;
220 nloops = nloops << 1;
221 } while (nblocks != 0);
222 }
223
224 #endif
225
226 /**
227 * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is
228 * done
229 */
230 int fft_inits(FFTContext *s, int nbits, int inverse)
231 {
232 int i, j, m, n;
233 float alpha, c1, s1, s2;
234
235 s->nbits = nbits;
236 n = 1 << nbits;
237
238 s->exptab = av_malloc((n / 2) * sizeof(FFTComplex));
239 if (!s->exptab)
240 goto fail;
241 s->revtab = av_malloc(n * sizeof(uint16_t));
242 if (!s->revtab)
243 goto fail;
244 s->inverse = inverse;
245
246 s2 = inverse ? 1.0 : -1.0;
247
248 for(i=0;i<(n/2);i++) {
249 alpha = 2 * M_PI * (float)i / (float)n;
250 c1 = cos(alpha);
251 s1 = sin(alpha) * s2;
252 s->exptab[i].re = c1;
253 s->exptab[i].im = s1;
254 }
255 s->fft_calc = fft_calc_c;
256 s->exptab1 = NULL;
257 /* compute constant table for HAVE_SSE version */
258 #if (defined(HAVE_ALTIVEC))
259 {
260 int has_vectors = 0;
261
262 #if defined(HAVE_ALTIVEC)
263 has_vectors = has_altivec();
264 #endif
265 if (has_vectors) {
266 int np, nblocks, np2, l;
267 FFTComplex *q;
268
269 np = 1 << nbits;
270 nblocks = np >> 3;
271 np2 = np >> 1;
272 s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex));
273 if (!s->exptab1)
274 goto fail;
275 q = s->exptab1;
276 do {
277 for(l = 0; l < np2; l += 2 * nblocks) {
278 *q++ = s->exptab[l];
279 *q++ = s->exptab[l + nblocks];
280
281 q->re = -s->exptab[l].im;
282 q->im = s->exptab[l].re;
283 q++;
284 q->re = -s->exptab[l + nblocks].im;
285 q->im = s->exptab[l + nblocks].re;
286 q++;
287 }
288 nblocks = nblocks >> 1;
289 } while (nblocks != 0);
290 av_freep(&s->exptab);
291 s->fft_calc = fft_calc_altivec;
292 }
293 }
294 #endif
295
296 /* compute bit reverse table */
297
298 for(i=0;i<n;i++) {
299 m=0;
300 for(j=0;j<nbits;j++) {
301 m |= ((i >> j) & 1) << (nbits-j-1);
302 }
303 s->revtab[i]=m;
304 }
305 return 0;
306 fail:
307 av_freep(&s->revtab);
308 av_freep(&s->exptab);
309 av_freep(&s->exptab1);
310 return -1;
311 }
312
313 /* butter fly op */
314 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
315 {\
316 FFTSample ax, ay, bx, by;\
317 bx=pre1;\
318 by=pim1;\
319 ax=qre1;\
320 ay=qim1;\
321 pre = (bx + ax);\
322 pim = (by + ay);\
323 qre = (bx - ax);\
324 qim = (by - ay);\
325 }
326
327 #define MUL16(a,b) ((a) * (b))
328
329 #define CMUL(pre, pim, are, aim, bre, bim) \
330 {\
331 pre = (MUL16(are, bre) - MUL16(aim, bim));\
332 pim = (MUL16(are, bim) + MUL16(bre, aim));\
333 }
334
335 /**
336 * Do a complex FFT with the parameters defined in fft_init(). The
337 * input data must be permuted before with s->revtab table. No
338 * 1.0/sqrt(n) normalization is done.
339 */
340 void fft_calc_c(FFTContext *s, FFTComplex *z)
341 {
342 int ln = s->nbits;
343 int j, np, np2;
344 int nblocks, nloops;
345 register FFTComplex *p, *q;
346 FFTComplex *exptab = s->exptab;
347 int l;
348 FFTSample tmp_re, tmp_im;
349
350 np = 1 << ln;
351
352 /* pass 0 */
353
354 p=&z[0];
355 j=(np >> 1);
356 do {
357 BF(p[0].re, p[0].im, p[1].re, p[1].im,
358 p[0].re, p[0].im, p[1].re, p[1].im);
359 p+=2;
360 } while (--j != 0);
361
362 /* pass 1 */
363
364
365 p=&z[0];
366 j=np >> 2;
367 if (s->inverse) {
368 do {
369 BF(p[0].re, p[0].im, p[2].re, p[2].im,
370 p[0].re, p[0].im, p[2].re, p[2].im);
371 BF(p[1].re, p[1].im, p[3].re, p[3].im,
372 p[1].re, p[1].im, -p[3].im, p[3].re);
373 p+=4;
374 } while (--j != 0);
375 } else {
376 do {
377 BF(p[0].re, p[0].im, p[2].re, p[2].im,
378 p[0].re, p[0].im, p[2].re, p[2].im);
379 BF(p[1].re, p[1].im, p[3].re, p[3].im,
380 p[1].re, p[1].im, p[3].im, -p[3].re);
381 p+=4;
382 } while (--j != 0);
383 }
384 /* pass 2 .. ln-1 */
385
386 nblocks = np >> 3;
387 nloops = 1 << 2;
388 np2 = np >> 1;
389 do {
390 p = z;
391 q = z + nloops;
392 for (j = 0; j < nblocks; ++j) {
393 BF(p->re, p->im, q->re, q->im,
394 p->re, p->im, q->re, q->im);
395
396 p++;
397 q++;
398 for(l = nblocks; l < np2; l += nblocks) {
399 CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
400 BF(p->re, p->im, q->re, q->im,
401 p->re, p->im, tmp_re, tmp_im);
402 p++;
403 q++;
404 }
405
406 p += nloops;
407 q += nloops;
408 }
409 nblocks = nblocks >> 1;
410 nloops = nloops << 1;
411 } while (nblocks != 0);
412 }
413
414 /**
415 * Do the permutation needed BEFORE calling fft_calc()
416 */
417 void fft_permute(FFTContext *s, FFTComplex *z)
418 {
419 int j, k, np;
420 FFTComplex tmp;
421 const uint16_t *revtab = s->revtab;
422
423 /* reverse */
424 np = 1 << s->nbits;
425 for(j=0;j<np;j++) {
426 k = revtab[j];
427 if (k < j) {
428 tmp = z[k];
429 z[k] = z[j];
430 z[j] = tmp;
431 }
432 }
433 }
434
435 void fft_end(FFTContext *s)
436 {
437 av_freep(&s->revtab);
438 av_freep(&s->exptab);
439 av_freep(&s->exptab1);
440 }
441