Mercurial > libavcodec.hg
annotate fft.c @ 11864:7204cb7dd601 libavcodec
Quant changes only once per MB so move the corresponding scale factor assignment
out of the block decoding loop. Indeo4 doesn't use any scale table but the quant
level itself as scale. Therefore access scale table only if its pointer != NULL.
author | maxim |
---|---|
date | Thu, 10 Jun 2010 17:31:12 +0000 |
parents | 7dd2a45249a9 |
children | c80c7a717156 |
rev | line source |
---|---|
781 | 1 /* |
2 * FFT/IFFT transforms | |
7542 | 3 * Copyright (c) 2008 Loren Merritt |
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
8590
diff
changeset
|
4 * Copyright (c) 2002 Fabrice Bellard |
7542 | 5 * Partly based on libdjbfft by D. J. Bernstein |
781 | 6 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3746
diff
changeset
|
7 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3746
diff
changeset
|
8 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3746
diff
changeset
|
9 * FFmpeg is free software; you can redistribute it and/or |
781 | 10 * modify it under the terms of the GNU Lesser General Public |
11 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3746
diff
changeset
|
12 * version 2.1 of the License, or (at your option) any later version. |
781 | 13 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3746
diff
changeset
|
14 * FFmpeg is distributed in the hope that it will be useful, |
781 | 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
17 * Lesser General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3746
diff
changeset
|
20 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
21 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
781 | 22 */ |
1106 | 23 |
24 /** | |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11444
diff
changeset
|
25 * @file |
1106 | 26 * FFT/IFFT transforms. |
27 */ | |
28 | |
11444
6f1697664bf2
Replace many includes of libavutil/common.h with what is actually needed
mru
parents:
11370
diff
changeset
|
29 #include <stdlib.h> |
6f1697664bf2
Replace many includes of libavutil/common.h with what is actually needed
mru
parents:
11370
diff
changeset
|
30 #include <string.h> |
11370 | 31 #include "libavutil/mathematics.h" |
32 #include "fft.h" | |
781 | 33 |
10407
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
34 /* cos(2*pi*x/n) for 0<=x<=n/4, followed by its reverse */ |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
35 #if !CONFIG_HARDCODED_TABLES |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
36 COSTABLE(16); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
37 COSTABLE(32); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
38 COSTABLE(64); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
39 COSTABLE(128); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
40 COSTABLE(256); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
41 COSTABLE(512); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
42 COSTABLE(1024); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
43 COSTABLE(2048); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
44 COSTABLE(4096); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
45 COSTABLE(8192); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
46 COSTABLE(16384); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
47 COSTABLE(32768); |
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
48 COSTABLE(65536); |
10400
866dffa620d1
Use hardcoded instead of runtime-calculated ff_cos_* tables if
reimar
parents:
10391
diff
changeset
|
49 #endif |
10407
57acce8b1380
Move/add COSTABLE/SINTABLE macros to dsputil to add extern definitions
reimar
parents:
10400
diff
changeset
|
50 COSTABLE_CONST FFTSample * const ff_cos_tabs[] = { |
10492
63910f7ba293
Pad ff_cos_tabs and ff_sin_tabs so that index n points to the table for n bits.
reimar
parents:
10407
diff
changeset
|
51 NULL, NULL, NULL, NULL, |
7542 | 52 ff_cos_16, ff_cos_32, ff_cos_64, ff_cos_128, ff_cos_256, ff_cos_512, ff_cos_1024, |
53 ff_cos_2048, ff_cos_4096, ff_cos_8192, ff_cos_16384, ff_cos_32768, ff_cos_65536, | |
54 }; | |
55 | |
56 static int split_radix_permutation(int i, int n, int inverse) | |
57 { | |
58 int m; | |
59 if(n <= 2) return i&1; | |
60 m = n >> 1; | |
61 if(!(i&m)) return split_radix_permutation(i, m, inverse)*2; | |
62 m >>= 1; | |
63 if(inverse == !(i&m)) return split_radix_permutation(i, m, inverse)*4 + 1; | |
64 else return split_radix_permutation(i, m, inverse)*4 - 1; | |
65 } | |
66 | |
10496
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
67 av_cold void ff_init_ff_cos_tabs(int index) |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
68 { |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
69 #if !CONFIG_HARDCODED_TABLES |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
70 int i; |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
71 int m = 1<<index; |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
72 double freq = 2*M_PI/m; |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
73 FFTSample *tab = ff_cos_tabs[index]; |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
74 for(i=0; i<=m/4; i++) |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
75 tab[i] = cos(i*freq); |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
76 for(i=1; i<m/4; i++) |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
77 tab[m/2-i] = tab[i]; |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
78 #endif |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
79 } |
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
80 |
8637 | 81 av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) |
781 | 82 { |
83 int i, j, m, n; | |
84 float alpha, c1, s1, s2; | |
6503 | 85 int av_unused has_vectors; |
2967 | 86 |
7542 | 87 if (nbits < 2 || nbits > 16) |
88 goto fail; | |
781 | 89 s->nbits = nbits; |
90 n = 1 << nbits; | |
91 | |
7542 | 92 s->tmp_buf = NULL; |
8974 | 93 s->exptab = av_malloc((n / 2) * sizeof(FFTComplex)); |
781 | 94 if (!s->exptab) |
95 goto fail; | |
96 s->revtab = av_malloc(n * sizeof(uint16_t)); | |
97 if (!s->revtab) | |
98 goto fail; | |
99 s->inverse = inverse; | |
100 | |
101 s2 = inverse ? 1.0 : -1.0; | |
2967 | 102 |
7542 | 103 s->fft_permute = ff_fft_permute_c; |
8974 | 104 s->fft_calc = ff_fft_calc_c; |
11122 | 105 #if CONFIG_MDCT |
8974 | 106 s->imdct_calc = ff_imdct_calc_c; |
107 s->imdct_half = ff_imdct_half_c; | |
10161 | 108 s->mdct_calc = ff_mdct_calc_c; |
11122 | 109 #endif |
8974 | 110 s->exptab1 = NULL; |
10175
5cf49858179a
Move per-arch fft init bits into the corresponding subdirs
mru
parents:
10172
diff
changeset
|
111 s->split_radix = 1; |
781 | 112 |
10175
5cf49858179a
Move per-arch fft init bits into the corresponding subdirs
mru
parents:
10172
diff
changeset
|
113 if (ARCH_ARM) ff_fft_init_arm(s); |
5cf49858179a
Move per-arch fft init bits into the corresponding subdirs
mru
parents:
10172
diff
changeset
|
114 if (HAVE_ALTIVEC) ff_fft_init_altivec(s); |
5cf49858179a
Move per-arch fft init bits into the corresponding subdirs
mru
parents:
10172
diff
changeset
|
115 if (HAVE_MMX) ff_fft_init_mmx(s); |
781 | 116 |
10175
5cf49858179a
Move per-arch fft init bits into the corresponding subdirs
mru
parents:
10172
diff
changeset
|
117 if (s->split_radix) { |
7542 | 118 for(j=4; j<=nbits; j++) { |
10496
74b0c1a0851e
Add ff_init_ff_cos_tabs function and use it in rdft.c to ensure that the
reimar
parents:
10492
diff
changeset
|
119 ff_init_ff_cos_tabs(j); |
7542 | 120 } |
121 for(i=0; i<n; i++) | |
10172 | 122 s->revtab[-split_radix_permutation(i, n, s->inverse) & (n-1)] = i; |
7542 | 123 s->tmp_buf = av_malloc(n * sizeof(FFTComplex)); |
124 } else { | |
6504 | 125 int np, nblocks, np2, l; |
126 FFTComplex *q; | |
2967 | 127 |
7542 | 128 for(i=0; i<(n/2); i++) { |
129 alpha = 2 * M_PI * (float)i / (float)n; | |
130 c1 = cos(alpha); | |
131 s1 = sin(alpha) * s2; | |
132 s->exptab[i].re = c1; | |
133 s->exptab[i].im = s1; | |
134 } | |
135 | |
6504 | 136 np = 1 << nbits; |
137 nblocks = np >> 3; | |
138 np2 = np >> 1; | |
139 s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); | |
140 if (!s->exptab1) | |
141 goto fail; | |
142 q = s->exptab1; | |
143 do { | |
144 for(l = 0; l < np2; l += 2 * nblocks) { | |
145 *q++ = s->exptab[l]; | |
146 *q++ = s->exptab[l + nblocks]; | |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
147 |
6504 | 148 q->re = -s->exptab[l].im; |
149 q->im = s->exptab[l].re; | |
150 q++; | |
151 q->re = -s->exptab[l + nblocks].im; | |
152 q->im = s->exptab[l + nblocks].re; | |
153 q++; | |
154 } | |
155 nblocks = nblocks >> 1; | |
156 } while (nblocks != 0); | |
157 av_freep(&s->exptab); | |
781 | 158 |
7543 | 159 /* compute bit reverse table */ |
160 for(i=0;i<n;i++) { | |
161 m=0; | |
162 for(j=0;j<nbits;j++) { | |
163 m |= ((i >> j) & 1) << (nbits-j-1); | |
164 } | |
165 s->revtab[i]=m; | |
781 | 166 } |
7542 | 167 } |
168 | |
781 | 169 return 0; |
170 fail: | |
171 av_freep(&s->revtab); | |
172 av_freep(&s->exptab); | |
173 av_freep(&s->exptab1); | |
7542 | 174 av_freep(&s->tmp_buf); |
781 | 175 return -1; |
176 } | |
177 | |
7542 | 178 void ff_fft_permute_c(FFTContext *s, FFTComplex *z) |
781 | 179 { |
180 int j, k, np; | |
181 FFTComplex tmp; | |
182 const uint16_t *revtab = s->revtab; | |
7542 | 183 np = 1 << s->nbits; |
184 | |
185 if (s->tmp_buf) { | |
186 /* TODO: handle split-radix permute in a more optimal way, probably in-place */ | |
187 for(j=0;j<np;j++) s->tmp_buf[revtab[j]] = z[j]; | |
188 memcpy(z, s->tmp_buf, np * sizeof(FFTComplex)); | |
189 return; | |
190 } | |
2967 | 191 |
781 | 192 /* reverse */ |
193 for(j=0;j<np;j++) { | |
194 k = revtab[j]; | |
195 if (k < j) { | |
196 tmp = z[k]; | |
197 z[k] = z[j]; | |
198 z[j] = tmp; | |
199 } | |
200 } | |
201 } | |
202 | |
8687 | 203 av_cold void ff_fft_end(FFTContext *s) |
781 | 204 { |
205 av_freep(&s->revtab); | |
206 av_freep(&s->exptab); | |
207 av_freep(&s->exptab1); | |
7542 | 208 av_freep(&s->tmp_buf); |
209 } | |
210 | |
211 #define sqrthalf (float)M_SQRT1_2 | |
212 | |
213 #define BF(x,y,a,b) {\ | |
214 x = a - b;\ | |
215 y = a + b;\ | |
216 } | |
217 | |
218 #define BUTTERFLIES(a0,a1,a2,a3) {\ | |
219 BF(t3, t5, t5, t1);\ | |
220 BF(a2.re, a0.re, a0.re, t5);\ | |
221 BF(a3.im, a1.im, a1.im, t3);\ | |
222 BF(t4, t6, t2, t6);\ | |
223 BF(a3.re, a1.re, a1.re, t4);\ | |
224 BF(a2.im, a0.im, a0.im, t6);\ | |
225 } | |
226 | |
227 // force loading all the inputs before storing any. | |
228 // this is slightly slower for small data, but avoids store->load aliasing | |
229 // for addresses separated by large powers of 2. | |
230 #define BUTTERFLIES_BIG(a0,a1,a2,a3) {\ | |
231 FFTSample r0=a0.re, i0=a0.im, r1=a1.re, i1=a1.im;\ | |
232 BF(t3, t5, t5, t1);\ | |
233 BF(a2.re, a0.re, r0, t5);\ | |
234 BF(a3.im, a1.im, i1, t3);\ | |
235 BF(t4, t6, t2, t6);\ | |
236 BF(a3.re, a1.re, r1, t4);\ | |
237 BF(a2.im, a0.im, i0, t6);\ | |
238 } | |
239 | |
240 #define TRANSFORM(a0,a1,a2,a3,wre,wim) {\ | |
241 t1 = a2.re * wre + a2.im * wim;\ | |
242 t2 = a2.im * wre - a2.re * wim;\ | |
243 t5 = a3.re * wre - a3.im * wim;\ | |
244 t6 = a3.im * wre + a3.re * wim;\ | |
245 BUTTERFLIES(a0,a1,a2,a3)\ | |
246 } | |
247 | |
248 #define TRANSFORM_ZERO(a0,a1,a2,a3) {\ | |
249 t1 = a2.re;\ | |
250 t2 = a2.im;\ | |
251 t5 = a3.re;\ | |
252 t6 = a3.im;\ | |
253 BUTTERFLIES(a0,a1,a2,a3)\ | |
254 } | |
255 | |
256 /* z[0...8n-1], w[1...2n-1] */ | |
257 #define PASS(name)\ | |
258 static void name(FFTComplex *z, const FFTSample *wre, unsigned int n)\ | |
259 {\ | |
260 FFTSample t1, t2, t3, t4, t5, t6;\ | |
261 int o1 = 2*n;\ | |
262 int o2 = 4*n;\ | |
263 int o3 = 6*n;\ | |
264 const FFTSample *wim = wre+o1;\ | |
265 n--;\ | |
266 \ | |
267 TRANSFORM_ZERO(z[0],z[o1],z[o2],z[o3]);\ | |
268 TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ | |
269 do {\ | |
270 z += 2;\ | |
271 wre += 2;\ | |
272 wim -= 2;\ | |
273 TRANSFORM(z[0],z[o1],z[o2],z[o3],wre[0],wim[0]);\ | |
274 TRANSFORM(z[1],z[o1+1],z[o2+1],z[o3+1],wre[1],wim[-1]);\ | |
275 } while(--n);\ | |
781 | 276 } |
277 | |
7542 | 278 PASS(pass) |
279 #undef BUTTERFLIES | |
280 #define BUTTERFLIES BUTTERFLIES_BIG | |
281 PASS(pass_big) | |
282 | |
283 #define DECL_FFT(n,n2,n4)\ | |
284 static void fft##n(FFTComplex *z)\ | |
285 {\ | |
286 fft##n2(z);\ | |
287 fft##n4(z+n4*2);\ | |
288 fft##n4(z+n4*3);\ | |
289 pass(z,ff_cos_##n,n4/2);\ | |
290 } | |
291 | |
292 static void fft4(FFTComplex *z) | |
293 { | |
294 FFTSample t1, t2, t3, t4, t5, t6, t7, t8; | |
295 | |
296 BF(t3, t1, z[0].re, z[1].re); | |
297 BF(t8, t6, z[3].re, z[2].re); | |
298 BF(z[2].re, z[0].re, t1, t6); | |
299 BF(t4, t2, z[0].im, z[1].im); | |
300 BF(t7, t5, z[2].im, z[3].im); | |
301 BF(z[3].im, z[1].im, t4, t8); | |
302 BF(z[3].re, z[1].re, t3, t7); | |
303 BF(z[2].im, z[0].im, t2, t5); | |
304 } | |
305 | |
306 static void fft8(FFTComplex *z) | |
307 { | |
308 FFTSample t1, t2, t3, t4, t5, t6, t7, t8; | |
309 | |
310 fft4(z); | |
311 | |
312 BF(t1, z[5].re, z[4].re, -z[5].re); | |
313 BF(t2, z[5].im, z[4].im, -z[5].im); | |
314 BF(t3, z[7].re, z[6].re, -z[7].re); | |
315 BF(t4, z[7].im, z[6].im, -z[7].im); | |
316 BF(t8, t1, t3, t1); | |
317 BF(t7, t2, t2, t4); | |
318 BF(z[4].re, z[0].re, z[0].re, t1); | |
319 BF(z[4].im, z[0].im, z[0].im, t2); | |
320 BF(z[6].re, z[2].re, z[2].re, t7); | |
321 BF(z[6].im, z[2].im, z[2].im, t8); | |
322 | |
323 TRANSFORM(z[1],z[3],z[5],z[7],sqrthalf,sqrthalf); | |
324 } | |
325 | |
8590 | 326 #if !CONFIG_SMALL |
7542 | 327 static void fft16(FFTComplex *z) |
328 { | |
329 FFTSample t1, t2, t3, t4, t5, t6; | |
330 | |
331 fft8(z); | |
332 fft4(z+8); | |
333 fft4(z+12); | |
334 | |
335 TRANSFORM_ZERO(z[0],z[4],z[8],z[12]); | |
336 TRANSFORM(z[2],z[6],z[10],z[14],sqrthalf,sqrthalf); | |
337 TRANSFORM(z[1],z[5],z[9],z[13],ff_cos_16[1],ff_cos_16[3]); | |
338 TRANSFORM(z[3],z[7],z[11],z[15],ff_cos_16[3],ff_cos_16[1]); | |
339 } | |
340 #else | |
341 DECL_FFT(16,8,4) | |
342 #endif | |
343 DECL_FFT(32,16,8) | |
344 DECL_FFT(64,32,16) | |
345 DECL_FFT(128,64,32) | |
346 DECL_FFT(256,128,64) | |
347 DECL_FFT(512,256,128) | |
8590 | 348 #if !CONFIG_SMALL |
7542 | 349 #define pass pass_big |
350 #endif | |
351 DECL_FFT(1024,512,256) | |
352 DECL_FFT(2048,1024,512) | |
353 DECL_FFT(4096,2048,1024) | |
354 DECL_FFT(8192,4096,2048) | |
355 DECL_FFT(16384,8192,4096) | |
356 DECL_FFT(32768,16384,8192) | |
357 DECL_FFT(65536,32768,16384) | |
358 | |
10391 | 359 static void (* const fft_dispatch[])(FFTComplex*) = { |
7542 | 360 fft4, fft8, fft16, fft32, fft64, fft128, fft256, fft512, fft1024, |
361 fft2048, fft4096, fft8192, fft16384, fft32768, fft65536, | |
362 }; | |
363 | |
364 void ff_fft_calc_c(FFTContext *s, FFTComplex *z) | |
365 { | |
366 fft_dispatch[s->nbits-2](z); | |
367 } | |
368 |