Mercurial > libavcodec.hg
annotate fft-test.c @ 3980:5afe4253a220 libavcodec
replace a few and/sub/... by cmov
this is faster on P3, should be faster on AMD, and should be slower on P4
its disabled by default (benchmarks welcome so we know when to enable it)
author | michael |
---|---|
date | Tue, 10 Oct 2006 01:08:39 +0000 |
parents | c8c591fe26f8 |
children | e82ceaa9c386 |
rev | line source |
---|---|
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
1 /* |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
2 * (c) 2002 Fabrice Bellard |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
3 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
4 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
5 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
6 * FFmpeg is free software; you can redistribute it and/or |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
7 * modify it under the terms of the GNU Lesser General Public |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
8 * License as published by the Free Software Foundation; either |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
9 * version 2.1 of the License, or (at your option) any later version. |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
10 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
11 * FFmpeg is distributed in the hope that it will be useful, |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
14 * Lesser General Public License for more details. |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
15 * |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
16 * You should have received a copy of the GNU Lesser General Public |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3699
diff
changeset
|
17 * License along with FFmpeg; if not, write to the Free Software |
3699
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
18 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
19 */ |
c537a97eec66
Add official LGPL license headers to the files that were missing them.
diego
parents:
2967
diff
changeset
|
20 |
1106 | 21 /** |
22 * @file fft-test.c | |
23 * FFT and MDCT tests. | |
24 */ | |
25 | |
781 | 26 #include "dsputil.h" |
27 #include <math.h> | |
980 | 28 #include <unistd.h> |
781 | 29 #include <sys/time.h> |
30 | |
31 int mm_flags; | |
32 | |
33 /* reference fft */ | |
34 | |
35 #define MUL16(a,b) ((a) * (b)) | |
36 | |
37 #define CMAC(pre, pim, are, aim, bre, bim) \ | |
38 {\ | |
39 pre += (MUL16(are, bre) - MUL16(aim, bim));\ | |
40 pim += (MUL16(are, bim) + MUL16(bre, aim));\ | |
41 } | |
42 | |
43 FFTComplex *exptab; | |
44 | |
45 void fft_ref_init(int nbits, int inverse) | |
46 { | |
47 int n, i; | |
48 float c1, s1, alpha; | |
49 | |
50 n = 1 << nbits; | |
51 exptab = av_malloc((n / 2) * sizeof(FFTComplex)); | |
52 | |
53 for(i=0;i<(n/2);i++) { | |
54 alpha = 2 * M_PI * (float)i / (float)n; | |
55 c1 = cos(alpha); | |
56 s1 = sin(alpha); | |
57 if (!inverse) | |
58 s1 = -s1; | |
59 exptab[i].re = c1; | |
60 exptab[i].im = s1; | |
61 } | |
62 } | |
63 | |
64 void fft_ref(FFTComplex *tabr, FFTComplex *tab, int nbits) | |
65 { | |
66 int n, i, j, k, n2; | |
67 float tmp_re, tmp_im, s, c; | |
68 FFTComplex *q; | |
69 | |
70 n = 1 << nbits; | |
71 n2 = n >> 1; | |
72 for(i=0;i<n;i++) { | |
73 tmp_re = 0; | |
74 tmp_im = 0; | |
75 q = tab; | |
76 for(j=0;j<n;j++) { | |
77 k = (i * j) & (n - 1); | |
78 if (k >= n2) { | |
79 c = -exptab[k - n2].re; | |
80 s = -exptab[k - n2].im; | |
81 } else { | |
82 c = exptab[k].re; | |
83 s = exptab[k].im; | |
84 } | |
85 CMAC(tmp_re, tmp_im, c, s, q->re, q->im); | |
86 q++; | |
87 } | |
88 tabr[i].re = tmp_re; | |
89 tabr[i].im = tmp_im; | |
90 } | |
91 } | |
92 | |
93 void imdct_ref(float *out, float *in, int n) | |
94 { | |
95 int k, i, a; | |
96 float sum, f; | |
97 | |
98 for(i=0;i<n;i++) { | |
99 sum = 0; | |
100 for(k=0;k<n/2;k++) { | |
101 a = (2 * i + 1 + (n / 2)) * (2 * k + 1); | |
102 f = cos(M_PI * a / (double)(2 * n)); | |
103 sum += f * in[k]; | |
104 } | |
105 out[i] = -sum; | |
106 } | |
107 } | |
108 | |
109 /* NOTE: no normalisation by 1 / N is done */ | |
110 void mdct_ref(float *output, float *input, int n) | |
111 { | |
112 int k, i; | |
113 float a, s; | |
114 | |
115 /* do it by hand */ | |
116 for(k=0;k<n/2;k++) { | |
117 s = 0; | |
118 for(i=0;i<n;i++) { | |
119 a = (2*M_PI*(2*i+1+n/2)*(2*k+1) / (4 * n)); | |
120 s += input[i] * cos(a); | |
121 } | |
122 output[k] = s; | |
123 } | |
124 } | |
125 | |
126 | |
127 float frandom(void) | |
128 { | |
129 return (float)((random() & 0xffff) - 32768) / 32768.0; | |
130 } | |
131 | |
1064 | 132 int64_t gettime(void) |
781 | 133 { |
134 struct timeval tv; | |
135 gettimeofday(&tv,NULL); | |
1064 | 136 return (int64_t)tv.tv_sec * 1000000 + tv.tv_usec; |
781 | 137 } |
138 | |
139 void check_diff(float *tab1, float *tab2, int n) | |
140 { | |
141 int i; | |
142 | |
143 for(i=0;i<n;i++) { | |
144 if (fabsf(tab1[i] - tab2[i]) >= 1e-3) { | |
2967 | 145 av_log(NULL, AV_LOG_ERROR, "ERROR %d: %f %f\n", |
781 | 146 i, tab1[i], tab2[i]); |
147 } | |
148 } | |
149 } | |
150 | |
151 | |
152 void help(void) | |
153 { | |
2593
786ccf72ccd5
printf -> av_log patch by (Benjamin Larsson <>banan student.ltu se)
michael
parents:
1879
diff
changeset
|
154 av_log(NULL, AV_LOG_INFO,"usage: fft-test [-h] [-s] [-i] [-n b]\n" |
781 | 155 "-h print this help\n" |
156 "-s speed test\n" | |
157 "-m (I)MDCT test\n" | |
158 "-i inverse transform test\n" | |
159 "-n b set the transform size to 2^b\n" | |
160 ); | |
161 exit(1); | |
162 } | |
163 | |
164 | |
165 | |
166 int main(int argc, char **argv) | |
167 { | |
168 FFTComplex *tab, *tab1, *tab_ref; | |
169 FFTSample *tabtmp, *tab2; | |
170 int it, i, c; | |
171 int do_speed = 0; | |
172 int do_mdct = 0; | |
173 int do_inverse = 0; | |
174 FFTContext s1, *s = &s1; | |
175 MDCTContext m1, *m = &m1; | |
176 int fft_nbits, fft_size; | |
177 | |
178 mm_flags = 0; | |
179 fft_nbits = 9; | |
180 for(;;) { | |
181 c = getopt(argc, argv, "hsimn:"); | |
182 if (c == -1) | |
183 break; | |
184 switch(c) { | |
185 case 'h': | |
186 help(); | |
187 break; | |
188 case 's': | |
189 do_speed = 1; | |
190 break; | |
191 case 'i': | |
192 do_inverse = 1; | |
193 break; | |
194 case 'm': | |
195 do_mdct = 1; | |
196 break; | |
197 case 'n': | |
198 fft_nbits = atoi(optarg); | |
199 break; | |
200 } | |
201 } | |
202 | |
203 fft_size = 1 << fft_nbits; | |
204 tab = av_malloc(fft_size * sizeof(FFTComplex)); | |
205 tab1 = av_malloc(fft_size * sizeof(FFTComplex)); | |
206 tab_ref = av_malloc(fft_size * sizeof(FFTComplex)); | |
207 tabtmp = av_malloc(fft_size / 2 * sizeof(FFTSample)); | |
208 tab2 = av_malloc(fft_size * sizeof(FFTSample)); | |
209 | |
210 if (do_mdct) { | |
211 if (do_inverse) | |
2593
786ccf72ccd5
printf -> av_log patch by (Benjamin Larsson <>banan student.ltu se)
michael
parents:
1879
diff
changeset
|
212 av_log(NULL, AV_LOG_INFO,"IMDCT"); |
781 | 213 else |
2593
786ccf72ccd5
printf -> av_log patch by (Benjamin Larsson <>banan student.ltu se)
michael
parents:
1879
diff
changeset
|
214 av_log(NULL, AV_LOG_INFO,"MDCT"); |
969 | 215 ff_mdct_init(m, fft_nbits, do_inverse); |
781 | 216 } else { |
217 if (do_inverse) | |
2593
786ccf72ccd5
printf -> av_log patch by (Benjamin Larsson <>banan student.ltu se)
michael
parents:
1879
diff
changeset
|
218 av_log(NULL, AV_LOG_INFO,"IFFT"); |
781 | 219 else |
2593
786ccf72ccd5
printf -> av_log patch by (Benjamin Larsson <>banan student.ltu se)
michael
parents:
1879
diff
changeset
|
220 av_log(NULL, AV_LOG_INFO,"FFT"); |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
221 ff_fft_init(s, fft_nbits, do_inverse); |
781 | 222 fft_ref_init(fft_nbits, do_inverse); |
223 } | |
2593
786ccf72ccd5
printf -> av_log patch by (Benjamin Larsson <>banan student.ltu se)
michael
parents:
1879
diff
changeset
|
224 av_log(NULL, AV_LOG_INFO," %d test\n", fft_size); |
781 | 225 |
226 /* generate random data */ | |
227 | |
228 for(i=0;i<fft_size;i++) { | |
229 tab1[i].re = frandom(); | |
230 tab1[i].im = frandom(); | |
231 } | |
232 | |
233 /* checking result */ | |
2593
786ccf72ccd5
printf -> av_log patch by (Benjamin Larsson <>banan student.ltu se)
michael
parents:
1879
diff
changeset
|
234 av_log(NULL, AV_LOG_INFO,"Checking...\n"); |
781 | 235 |
236 if (do_mdct) { | |
237 if (do_inverse) { | |
238 imdct_ref((float *)tab_ref, (float *)tab1, fft_size); | |
969 | 239 ff_imdct_calc(m, tab2, (float *)tab1, tabtmp); |
781 | 240 check_diff((float *)tab_ref, tab2, fft_size); |
241 } else { | |
242 mdct_ref((float *)tab_ref, (float *)tab1, fft_size); | |
2967 | 243 |
969 | 244 ff_mdct_calc(m, tab2, (float *)tab1, tabtmp); |
781 | 245 |
246 check_diff((float *)tab_ref, tab2, fft_size / 2); | |
247 } | |
248 } else { | |
249 memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
250 ff_fft_permute(s, tab); |
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
251 ff_fft_calc(s, tab); |
2967 | 252 |
781 | 253 fft_ref(tab_ref, tab1, fft_nbits); |
254 check_diff((float *)tab_ref, (float *)tab, fft_size * 2); | |
255 } | |
256 | |
257 /* do a speed test */ | |
258 | |
259 if (do_speed) { | |
1064 | 260 int64_t time_start, duration; |
781 | 261 int nb_its; |
262 | |
2593
786ccf72ccd5
printf -> av_log patch by (Benjamin Larsson <>banan student.ltu se)
michael
parents:
1879
diff
changeset
|
263 av_log(NULL, AV_LOG_INFO,"Speed test...\n"); |
781 | 264 /* we measure during about 1 seconds */ |
265 nb_its = 1; | |
266 for(;;) { | |
267 time_start = gettime(); | |
268 for(it=0;it<nb_its;it++) { | |
269 if (do_mdct) { | |
270 if (do_inverse) { | |
969 | 271 ff_imdct_calc(m, (float *)tab, (float *)tab1, tabtmp); |
781 | 272 } else { |
969 | 273 ff_mdct_calc(m, (float *)tab, (float *)tab1, tabtmp); |
781 | 274 } |
275 } else { | |
276 memcpy(tab, tab1, fft_size * sizeof(FFTComplex)); | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
277 ff_fft_calc(s, tab); |
781 | 278 } |
279 } | |
280 duration = gettime() - time_start; | |
281 if (duration >= 1000000) | |
282 break; | |
283 nb_its *= 2; | |
284 } | |
2967 | 285 av_log(NULL, AV_LOG_INFO,"time: %0.1f us/transform [total time=%0.2f s its=%d]\n", |
286 (double)duration / nb_its, | |
781 | 287 (double)duration / 1000000.0, |
288 nb_its); | |
289 } | |
2967 | 290 |
781 | 291 if (do_mdct) { |
969 | 292 ff_mdct_end(m); |
781 | 293 } else { |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
294 ff_fft_end(s); |
781 | 295 } |
296 return 0; | |
297 } |