Mercurial > audlegacy-plugins
annotate src/wma/libffwma/fft.c @ 3058:2e649bf16ebc
Robust media change handling written by John Wehle, closes bug #46.
author | Tony Vroon <chainsaw@gentoo.org> |
---|---|
date | Fri, 24 Apr 2009 09:23:20 +0100 |
parents | 6b427677621f |
children |
rev | line source |
---|---|
878 | 1 /* |
2 * FFT/IFFT transforms | |
3 * Copyright (c) 2002 Fabrice Bellard. | |
4 * | |
5 * This library is free software; you can redistribute it and/or | |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
9 * | |
10 * This library is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 * Lesser General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU Lesser General Public | |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
18 */ | |
19 | |
20 /** | |
21 * @file fft.c | |
22 * FFT/IFFT transforms. | |
23 */ | |
24 | |
25 #include "dsputil.h" | |
26 | |
27 #ifdef HAVE_ALTIVEC | |
28 #include <altivec.h> | |
29 | |
30 #ifdef CONFIG_DARWIN | |
31 #include <sys/sysctl.h> | |
32 #else /* CONFIG_DARWIN */ | |
33 #include <signal.h> | |
34 #include <setjmp.h> | |
35 | |
36 static sigjmp_buf jmpbuf; | |
37 static volatile sig_atomic_t canjump = 0; | |
38 | |
39 static void sigill_handler (int sig) | |
40 { | |
41 if (!canjump) { | |
42 signal (sig, SIG_DFL); | |
43 raise (sig); | |
44 } | |
45 | |
46 canjump = 0; | |
47 siglongjmp (jmpbuf, 1); | |
48 } | |
49 #endif /* CONFIG_DARWIN */ | |
50 | |
51 | |
52 #define WORD_0 0x00,0x01,0x02,0x03 | |
53 #define WORD_1 0x04,0x05,0x06,0x07 | |
54 #define WORD_2 0x08,0x09,0x0a,0x0b | |
55 #define WORD_3 0x0c,0x0d,0x0e,0x0f | |
56 #define WORD_s0 0x10,0x11,0x12,0x13 | |
57 #define WORD_s1 0x14,0x15,0x16,0x17 | |
58 #define WORD_s2 0x18,0x19,0x1a,0x1b | |
59 #define WORD_s3 0x1c,0x1d,0x1e,0x1f | |
60 | |
61 #ifdef CONFIG_DARWIN | |
62 #define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) | |
63 #else | |
64 #define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} | |
65 #endif | |
66 | |
67 // vcprmle is used to keep the same index as in the SSE version. | |
68 // it's the same as vcprm, with the index inversed | |
69 // ('le' is Little Endian) | |
70 #define vcprmle(a,b,c,d) vcprm(d,c,b,a) | |
71 | |
72 // used to build inverse/identity vectors (vcii) | |
73 // n is _n_egative, p is _p_ositive | |
74 #define FLOAT_n -1. | |
75 #define FLOAT_p 1. | |
76 | |
77 | |
78 #ifdef CONFIG_DARWIN | |
79 #define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) | |
80 #else | |
81 #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} | |
82 #endif | |
83 | |
84 int has_altivec(void) | |
85 { | |
86 #ifdef CONFIG_DARWIN | |
87 int sels[2] = {CTL_HW, HW_VECTORUNIT}; | |
88 int has_vu = 0; | |
89 size_t len = sizeof(has_vu); | |
90 int err; | |
91 | |
92 err = sysctl(sels, 2, &has_vu, &len, NULL, 0); | |
93 | |
94 if (err == 0) return (has_vu != 0); | |
95 #else /* CONFIG_DARWIN */ | |
96 /* no Darwin, do it the brute-force way */ | |
97 /* this is borrowed from the libmpeg2 library */ | |
98 { | |
99 signal (SIGILL, sigill_handler); | |
100 if (sigsetjmp (jmpbuf, 1)) { | |
101 signal (SIGILL, SIG_DFL); | |
102 } else { | |
103 canjump = 1; | |
104 | |
105 __asm__ volatile ("mtspr 256, %0\n\t" | |
106 "vand %%v0, %%v0, %%v0" | |
107 : | |
108 : "r" (-1)); | |
109 | |
110 signal (SIGILL, SIG_DFL); | |
111 return 1; | |
112 } | |
113 } | |
114 #endif /* CONFIG_DARWIN */ | |
115 return 0; | |
116 } | |
117 | |
118 | |
119 void fft_calc_altivec(FFTContext *s, FFTComplex *z) | |
120 { | |
121 #ifdef CONFIG_DARWIN | |
122 register const vector float vczero = (const vector float)(0.); | |
123 #else | |
124 register const vector float vczero = (const vector float){0.,0.,0.,0.}; | |
125 #endif | |
126 | |
127 int ln = s->nbits; | |
128 int j, np, np2; | |
129 int nblocks, nloops; | |
130 register FFTComplex *p, *q; | |
131 FFTComplex *cptr, *cptr1; | |
132 int k; | |
133 | |
134 np = 1 << ln; | |
135 | |
136 { | |
137 vector float *r, a, b, a1, c1, c2; | |
138 | |
139 r = (vector float *)&z[0]; | |
140 | |
141 c1 = vcii(p,p,n,n); | |
142 | |
143 if (s->inverse) | |
144 { | |
145 c2 = vcii(p,p,n,p); | |
146 } | |
147 else | |
148 { | |
149 c2 = vcii(p,p,p,n); | |
150 } | |
151 | |
152 j = (np >> 2); | |
153 do { | |
154 a = vec_ld(0, r); | |
155 a1 = vec_ld(sizeof(vector float), r); | |
156 | |
157 b = vec_perm(a,a,vcprmle(1,0,3,2)); | |
158 a = vec_madd(a,c1,b); | |
159 /* do the pass 0 butterfly */ | |
160 | |
161 b = vec_perm(a1,a1,vcprmle(1,0,3,2)); | |
162 b = vec_madd(a1,c1,b); | |
163 /* do the pass 0 butterfly */ | |
164 | |
165 /* multiply third by -i */ | |
166 b = vec_perm(b,b,vcprmle(2,3,1,0)); | |
167 | |
168 /* do the pass 1 butterfly */ | |
169 vec_st(vec_madd(b,c2,a), 0, r); | |
170 vec_st(vec_nmsub(b,c2,a), sizeof(vector float), r); | |
171 | |
172 r += 2; | |
173 } while (--j != 0); | |
174 } | |
175 /* pass 2 .. ln-1 */ | |
176 | |
177 nblocks = np >> 3; | |
178 nloops = 1 << 2; | |
179 np2 = np >> 1; | |
180 | |
181 cptr1 = s->exptab1; | |
182 do { | |
183 p = z; | |
184 q = z + nloops; | |
185 j = nblocks; | |
186 do { | |
187 cptr = cptr1; | |
188 k = nloops >> 1; | |
189 do { | |
190 vector float a,b,c,t1; | |
191 | |
192 a = vec_ld(0, (float*)p); | |
193 b = vec_ld(0, (float*)q); | |
194 | |
195 /* complex mul */ | |
196 c = vec_ld(0, (float*)cptr); | |
197 /* cre*re cim*re */ | |
198 t1 = vec_madd(c, vec_perm(b,b,vcprmle(2,2,0,0)),vczero); | |
199 c = vec_ld(sizeof(vector float), (float*)cptr); | |
200 /* -cim*im cre*im */ | |
201 b = vec_madd(c, vec_perm(b,b,vcprmle(3,3,1,1)),t1); | |
202 | |
203 /* butterfly */ | |
204 vec_st(vec_add(a,b), 0, (float*)p); | |
205 vec_st(vec_sub(a,b), 0, (float*)q); | |
206 | |
207 p += 2; | |
208 q += 2; | |
209 cptr += 4; | |
210 } while (--k); | |
211 | |
212 p += nloops; | |
213 q += nloops; | |
214 } while (--j); | |
215 cptr1 += nloops * 2; | |
216 nblocks = nblocks >> 1; | |
217 nloops = nloops << 1; | |
218 } while (nblocks != 0); | |
219 } | |
220 | |
221 #endif | |
222 | |
223 /** | |
224 * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is | |
225 * done | |
226 */ | |
227 int fft_inits(FFTContext *s, int nbits, int inverse) | |
228 { | |
229 int i, j, m, n; | |
230 float alpha, c1, s1, s2; | |
231 | |
232 s->nbits = nbits; | |
233 n = 1 << nbits; | |
234 | |
235 s->exptab = av_malloc((n / 2) * sizeof(FFTComplex)); | |
236 if (!s->exptab) | |
237 goto fail; | |
238 s->revtab = av_malloc(n * sizeof(uint16_t)); | |
239 if (!s->revtab) | |
240 goto fail; | |
241 s->inverse = inverse; | |
242 | |
243 s2 = inverse ? 1.0 : -1.0; | |
244 | |
245 for(i=0;i<(n/2);i++) { | |
246 alpha = 2 * M_PI * (float)i / (float)n; | |
247 c1 = cos(alpha); | |
248 s1 = sin(alpha) * s2; | |
249 s->exptab[i].re = c1; | |
250 s->exptab[i].im = s1; | |
251 } | |
252 s->fft_calc = fft_calc_c; | |
253 s->exptab1 = NULL; | |
254 /* compute constant table for HAVE_SSE version */ | |
255 #if (defined(HAVE_ALTIVEC)) | |
256 { | |
257 int has_vectors = 0; | |
258 | |
259 #if defined(HAVE_ALTIVEC) | |
260 has_vectors = has_altivec(); | |
261 #endif | |
262 if (has_vectors) { | |
263 int np, nblocks, np2, l; | |
264 FFTComplex *q; | |
265 | |
266 np = 1 << nbits; | |
267 nblocks = np >> 3; | |
268 np2 = np >> 1; | |
269 s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); | |
270 if (!s->exptab1) | |
271 goto fail; | |
272 q = s->exptab1; | |
273 do { | |
274 for(l = 0; l < np2; l += 2 * nblocks) { | |
275 *q++ = s->exptab[l]; | |
276 *q++ = s->exptab[l + nblocks]; | |
277 | |
278 q->re = -s->exptab[l].im; | |
279 q->im = s->exptab[l].re; | |
280 q++; | |
281 q->re = -s->exptab[l + nblocks].im; | |
282 q->im = s->exptab[l + nblocks].re; | |
283 q++; | |
284 } | |
285 nblocks = nblocks >> 1; | |
286 } while (nblocks != 0); | |
287 av_freep(&s->exptab); | |
288 s->fft_calc = fft_calc_altivec; | |
289 } | |
290 } | |
291 #endif | |
292 | |
293 /* compute bit reverse table */ | |
294 | |
295 for(i=0;i<n;i++) { | |
296 m=0; | |
297 for(j=0;j<nbits;j++) { | |
298 m |= ((i >> j) & 1) << (nbits-j-1); | |
299 } | |
300 s->revtab[i]=m; | |
301 } | |
302 return 0; | |
303 fail: | |
2321
6b427677621f
killed a few warnings
Cristi Magherusan <majeru@atheme.org>
parents:
1951
diff
changeset
|
304 av_freep((void*)&s->revtab); |
6b427677621f
killed a few warnings
Cristi Magherusan <majeru@atheme.org>
parents:
1951
diff
changeset
|
305 av_freep((void*)&s->exptab); |
6b427677621f
killed a few warnings
Cristi Magherusan <majeru@atheme.org>
parents:
1951
diff
changeset
|
306 av_freep((void*)&s->exptab1); |
878 | 307 return -1; |
308 } | |
309 | |
310 /* butter fly op */ | |
311 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ | |
312 {\ | |
313 FFTSample ax, ay, bx, by;\ | |
314 bx=pre1;\ | |
315 by=pim1;\ | |
316 ax=qre1;\ | |
317 ay=qim1;\ | |
318 pre = (bx + ax);\ | |
319 pim = (by + ay);\ | |
320 qre = (bx - ax);\ | |
321 qim = (by - ay);\ | |
322 } | |
323 | |
324 #define MUL16(a,b) ((a) * (b)) | |
325 | |
326 #define CMUL(pre, pim, are, aim, bre, bim) \ | |
327 {\ | |
328 pre = (MUL16(are, bre) - MUL16(aim, bim));\ | |
329 pim = (MUL16(are, bim) + MUL16(bre, aim));\ | |
330 } | |
331 | |
332 /** | |
333 * Do a complex FFT with the parameters defined in fft_init(). The | |
334 * input data must be permuted before with s->revtab table. No | |
335 * 1.0/sqrt(n) normalization is done. | |
336 */ | |
337 void fft_calc_c(FFTContext *s, FFTComplex *z) | |
338 { | |
339 int ln = s->nbits; | |
340 int j, np, np2; | |
341 int nblocks, nloops; | |
342 register FFTComplex *p, *q; | |
343 FFTComplex *exptab = s->exptab; | |
344 int l; | |
345 FFTSample tmp_re, tmp_im; | |
346 | |
347 np = 1 << ln; | |
348 | |
349 /* pass 0 */ | |
350 | |
351 p=&z[0]; | |
352 j=(np >> 1); | |
353 do { | |
354 BF(p[0].re, p[0].im, p[1].re, p[1].im, | |
355 p[0].re, p[0].im, p[1].re, p[1].im); | |
356 p+=2; | |
357 } while (--j != 0); | |
358 | |
359 /* pass 1 */ | |
360 | |
361 | |
362 p=&z[0]; | |
363 j=np >> 2; | |
364 if (s->inverse) { | |
365 do { | |
366 BF(p[0].re, p[0].im, p[2].re, p[2].im, | |
367 p[0].re, p[0].im, p[2].re, p[2].im); | |
368 BF(p[1].re, p[1].im, p[3].re, p[3].im, | |
369 p[1].re, p[1].im, -p[3].im, p[3].re); | |
370 p+=4; | |
371 } while (--j != 0); | |
372 } else { | |
373 do { | |
374 BF(p[0].re, p[0].im, p[2].re, p[2].im, | |
375 p[0].re, p[0].im, p[2].re, p[2].im); | |
376 BF(p[1].re, p[1].im, p[3].re, p[3].im, | |
377 p[1].re, p[1].im, p[3].im, -p[3].re); | |
378 p+=4; | |
379 } while (--j != 0); | |
380 } | |
381 /* pass 2 .. ln-1 */ | |
382 | |
383 nblocks = np >> 3; | |
384 nloops = 1 << 2; | |
385 np2 = np >> 1; | |
386 do { | |
387 p = z; | |
388 q = z + nloops; | |
389 for (j = 0; j < nblocks; ++j) { | |
390 BF(p->re, p->im, q->re, q->im, | |
391 p->re, p->im, q->re, q->im); | |
392 | |
393 p++; | |
394 q++; | |
395 for(l = nblocks; l < np2; l += nblocks) { | |
396 CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); | |
397 BF(p->re, p->im, q->re, q->im, | |
398 p->re, p->im, tmp_re, tmp_im); | |
399 p++; | |
400 q++; | |
401 } | |
402 | |
403 p += nloops; | |
404 q += nloops; | |
405 } | |
406 nblocks = nblocks >> 1; | |
407 nloops = nloops << 1; | |
408 } while (nblocks != 0); | |
409 } | |
410 | |
411 /** | |
412 * Do the permutation needed BEFORE calling fft_calc() | |
413 */ | |
414 void fft_permute(FFTContext *s, FFTComplex *z) | |
415 { | |
416 int j, k, np; | |
417 FFTComplex tmp; | |
418 const uint16_t *revtab = s->revtab; | |
419 | |
420 /* reverse */ | |
421 np = 1 << s->nbits; | |
422 for(j=0;j<np;j++) { | |
423 k = revtab[j]; | |
424 if (k < j) { | |
425 tmp = z[k]; | |
426 z[k] = z[j]; | |
427 z[j] = tmp; | |
428 } | |
429 } | |
430 } | |
431 | |
432 void fft_end(FFTContext *s) | |
433 { | |
2321
6b427677621f
killed a few warnings
Cristi Magherusan <majeru@atheme.org>
parents:
1951
diff
changeset
|
434 av_freep((void*)&s->revtab); |
6b427677621f
killed a few warnings
Cristi Magherusan <majeru@atheme.org>
parents:
1951
diff
changeset
|
435 av_freep((void*)&s->exptab); |
6b427677621f
killed a few warnings
Cristi Magherusan <majeru@atheme.org>
parents:
1951
diff
changeset
|
436 av_freep((void*)&s->exptab1); |
878 | 437 } |
438 |