Mercurial > libavcodec.hg
annotate fft.c @ 2892:41315d0120b3 libavcodec
replace a few mov + psrlq with pshufw, there are more cases which could benefit from this but they would require us to duplicate some functions ...
the trick is from various places (my own code in libpostproc, a patch on the x264 list, ...)
author | michael |
---|---|
date | Wed, 21 Sep 2005 21:17:09 +0000 |
parents | dd63cb7e5080 |
children | ef2149182f1c |
rev | line source |
---|---|
781 | 1 /* |
2 * FFT/IFFT transforms | |
3 * Copyright (c) 2002 Fabrice Bellard. | |
4 * | |
5 * This library is free software; you can redistribute it and/or | |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
9 * | |
10 * This library is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 * Lesser General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU Lesser General Public | |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 */ | |
1106 | 19 |
20 /** | |
21 * @file fft.c | |
22 * FFT/IFFT transforms. | |
23 */ | |
24 | |
781 | 25 #include "dsputil.h" |
26 | |
27 /** | |
28 * The size of the FFT is 2^nbits. If inverse is TRUE, inverse FFT is | |
29 * done | |
30 */ | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
31 int ff_fft_init(FFTContext *s, int nbits, int inverse) |
781 | 32 { |
33 int i, j, m, n; | |
34 float alpha, c1, s1, s2; | |
35 | |
36 s->nbits = nbits; | |
37 n = 1 << nbits; | |
38 | |
39 s->exptab = av_malloc((n / 2) * sizeof(FFTComplex)); | |
40 if (!s->exptab) | |
41 goto fail; | |
42 s->revtab = av_malloc(n * sizeof(uint16_t)); | |
43 if (!s->revtab) | |
44 goto fail; | |
45 s->inverse = inverse; | |
46 | |
47 s2 = inverse ? 1.0 : -1.0; | |
48 | |
49 for(i=0;i<(n/2);i++) { | |
50 alpha = 2 * M_PI * (float)i / (float)n; | |
51 c1 = cos(alpha); | |
52 s1 = sin(alpha) * s2; | |
53 s->exptab[i].re = c1; | |
54 s->exptab[i].im = s1; | |
55 } | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
56 s->fft_calc = ff_fft_calc_c; |
781 | 57 s->exptab1 = NULL; |
58 | |
59 /* compute constant table for HAVE_SSE version */ | |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
60 #if (defined(HAVE_MMX) && defined(HAVE_BUILTIN_VECTOR)) || defined(HAVE_ALTIVEC) |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
61 { |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
62 int has_vectors = 0; |
781 | 63 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
64 #if defined(HAVE_MMX) |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
65 has_vectors = mm_support() & MM_SSE; |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
975
diff
changeset
|
66 #endif |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
67 #if defined(HAVE_ALTIVEC) && !defined(ALTIVEC_USE_REFERENCE_C_CODE) |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
975
diff
changeset
|
68 has_vectors = mm_support() & MM_ALTIVEC; |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
69 #endif |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
70 if (has_vectors) { |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
71 int np, nblocks, np2, l; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
72 FFTComplex *q; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
73 |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
74 np = 1 << nbits; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
75 nblocks = np >> 3; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
76 np2 = np >> 1; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
77 s->exptab1 = av_malloc(np * 2 * sizeof(FFTComplex)); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
78 if (!s->exptab1) |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
79 goto fail; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
80 q = s->exptab1; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
81 do { |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
82 for(l = 0; l < np2; l += 2 * nblocks) { |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
83 *q++ = s->exptab[l]; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
84 *q++ = s->exptab[l + nblocks]; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
85 |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
86 q->re = -s->exptab[l].im; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
87 q->im = s->exptab[l].re; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
88 q++; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
89 q->re = -s->exptab[l + nblocks].im; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
90 q->im = s->exptab[l + nblocks].re; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
91 q++; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
92 } |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
93 nblocks = nblocks >> 1; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
94 } while (nblocks != 0); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
95 av_freep(&s->exptab); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
96 #if defined(HAVE_MMX) |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
97 s->fft_calc = ff_fft_calc_sse; |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
98 #else |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
99 s->fft_calc = ff_fft_calc_altivec; |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
100 #endif |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
971
diff
changeset
|
101 } |
781 | 102 } |
103 #endif | |
104 | |
105 /* compute bit reverse table */ | |
106 | |
107 for(i=0;i<n;i++) { | |
108 m=0; | |
109 for(j=0;j<nbits;j++) { | |
110 m |= ((i >> j) & 1) << (nbits-j-1); | |
111 } | |
112 s->revtab[i]=m; | |
113 } | |
114 return 0; | |
115 fail: | |
116 av_freep(&s->revtab); | |
117 av_freep(&s->exptab); | |
118 av_freep(&s->exptab1); | |
119 return -1; | |
120 } | |
121 | |
122 /* butter fly op */ | |
123 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ | |
124 {\ | |
125 FFTSample ax, ay, bx, by;\ | |
126 bx=pre1;\ | |
127 by=pim1;\ | |
128 ax=qre1;\ | |
129 ay=qim1;\ | |
130 pre = (bx + ax);\ | |
131 pim = (by + ay);\ | |
132 qre = (bx - ax);\ | |
133 qim = (by - ay);\ | |
134 } | |
135 | |
136 #define MUL16(a,b) ((a) * (b)) | |
137 | |
138 #define CMUL(pre, pim, are, aim, bre, bim) \ | |
139 {\ | |
140 pre = (MUL16(are, bre) - MUL16(aim, bim));\ | |
141 pim = (MUL16(are, bim) + MUL16(bre, aim));\ | |
142 } | |
143 | |
144 /** | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
145 * Do a complex FFT with the parameters defined in ff_fft_init(). The |
781 | 146 * input data must be permuted before with s->revtab table. No |
147 * 1.0/sqrt(n) normalization is done. | |
148 */ | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
149 void ff_fft_calc_c(FFTContext *s, FFTComplex *z) |
781 | 150 { |
151 int ln = s->nbits; | |
152 int j, np, np2; | |
153 int nblocks, nloops; | |
154 register FFTComplex *p, *q; | |
155 FFTComplex *exptab = s->exptab; | |
156 int l; | |
157 FFTSample tmp_re, tmp_im; | |
158 | |
159 np = 1 << ln; | |
160 | |
161 /* pass 0 */ | |
162 | |
163 p=&z[0]; | |
164 j=(np >> 1); | |
165 do { | |
166 BF(p[0].re, p[0].im, p[1].re, p[1].im, | |
167 p[0].re, p[0].im, p[1].re, p[1].im); | |
168 p+=2; | |
169 } while (--j != 0); | |
170 | |
171 /* pass 1 */ | |
172 | |
173 | |
174 p=&z[0]; | |
175 j=np >> 2; | |
176 if (s->inverse) { | |
177 do { | |
178 BF(p[0].re, p[0].im, p[2].re, p[2].im, | |
179 p[0].re, p[0].im, p[2].re, p[2].im); | |
180 BF(p[1].re, p[1].im, p[3].re, p[3].im, | |
181 p[1].re, p[1].im, -p[3].im, p[3].re); | |
182 p+=4; | |
183 } while (--j != 0); | |
184 } else { | |
185 do { | |
186 BF(p[0].re, p[0].im, p[2].re, p[2].im, | |
187 p[0].re, p[0].im, p[2].re, p[2].im); | |
188 BF(p[1].re, p[1].im, p[3].re, p[3].im, | |
189 p[1].re, p[1].im, p[3].im, -p[3].re); | |
190 p+=4; | |
191 } while (--j != 0); | |
192 } | |
193 /* pass 2 .. ln-1 */ | |
194 | |
195 nblocks = np >> 3; | |
196 nloops = 1 << 2; | |
197 np2 = np >> 1; | |
198 do { | |
199 p = z; | |
200 q = z + nloops; | |
201 for (j = 0; j < nblocks; ++j) { | |
202 BF(p->re, p->im, q->re, q->im, | |
203 p->re, p->im, q->re, q->im); | |
204 | |
205 p++; | |
206 q++; | |
207 for(l = nblocks; l < np2; l += nblocks) { | |
208 CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); | |
209 BF(p->re, p->im, q->re, q->im, | |
210 p->re, p->im, tmp_re, tmp_im); | |
211 p++; | |
212 q++; | |
213 } | |
214 | |
215 p += nloops; | |
216 q += nloops; | |
217 } | |
218 nblocks = nblocks >> 1; | |
219 nloops = nloops << 1; | |
220 } while (nblocks != 0); | |
221 } | |
222 | |
223 /** | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
224 * Do the permutation needed BEFORE calling ff_fft_calc() |
781 | 225 */ |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
226 void ff_fft_permute(FFTContext *s, FFTComplex *z) |
781 | 227 { |
228 int j, k, np; | |
229 FFTComplex tmp; | |
230 const uint16_t *revtab = s->revtab; | |
231 | |
232 /* reverse */ | |
233 np = 1 << s->nbits; | |
234 for(j=0;j<np;j++) { | |
235 k = revtab[j]; | |
236 if (k < j) { | |
237 tmp = z[k]; | |
238 z[k] = z[j]; | |
239 z[j] = tmp; | |
240 } | |
241 } | |
242 } | |
243 | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
244 void ff_fft_end(FFTContext *s) |
781 | 245 { |
246 av_freep(&s->revtab); | |
247 av_freep(&s->exptab); | |
248 av_freep(&s->exptab1); | |
249 } | |
250 |