Mercurial > mplayer.hg
annotate libfaad2/mdct.c @ 13150:a7542243d695
some more segfault fixes
author | faust3 |
---|---|
date | Thu, 26 Aug 2004 10:34:20 +0000 |
parents | d81145997036 |
children | 6d50ef45a058 |
rev | line source |
---|---|
10725 | 1 /* |
2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding | |
12527 | 3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com |
10725 | 4 ** |
5 ** This program is free software; you can redistribute it and/or modify | |
6 ** it under the terms of the GNU General Public License as published by | |
7 ** the Free Software Foundation; either version 2 of the License, or | |
8 ** (at your option) any later version. | |
9 ** | |
10 ** This program is distributed in the hope that it will be useful, | |
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 ** GNU General Public License for more details. | |
14 ** | |
15 ** You should have received a copy of the GNU General Public License | |
16 ** along with this program; if not, write to the Free Software | |
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
18 ** | |
19 ** Any non-GPL usage of this software or parts of this software is strictly | |
20 ** forbidden. | |
21 ** | |
22 ** Commercial non-GPL licensing of this software is possible. | |
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. | |
24 ** | |
12625
d81145997036
More information about modifications to comply more closely with GPL 2a.
diego
parents:
12527
diff
changeset
|
25 ** Initially modified for use with MPlayer by Arpad Gereöffy on 2003/08/30 |
d81145997036
More information about modifications to comply more closely with GPL 2a.
diego
parents:
12527
diff
changeset
|
26 ** $Id: mdct.c,v 1.3 2004/06/02 22:59:03 diego Exp $ |
d81145997036
More information about modifications to comply more closely with GPL 2a.
diego
parents:
12527
diff
changeset
|
27 ** detailed CVS changelog at http://www.mplayerhq.hu/cgi-bin/cvsweb.cgi/main/ |
10725 | 28 **/ |
29 | |
30 /* | |
31 * Fast (I)MDCT Implementation using (I)FFT ((Inverse) Fast Fourier Transform) | |
32 * and consists of three steps: pre-(I)FFT complex multiplication, complex | |
33 * (I)FFT, post-(I)FFT complex multiplication, | |
34 * | |
35 * As described in: | |
36 * P. Duhamel, Y. Mahieux, and J.P. Petit, "A Fast Algorithm for the | |
37 * Implementation of Filter Banks Based on 'Time Domain Aliasing | |
38 * Cancellation’," IEEE Proc. on ICASSP‘91, 1991, pp. 2209-2212. | |
39 * | |
40 * | |
41 * As of April 6th 2002 completely rewritten. | |
42 * This (I)MDCT can now be used for any data size n, where n is divisible by 8. | |
43 * | |
44 */ | |
45 | |
46 #include "common.h" | |
47 #include "structs.h" | |
48 | |
49 #include <stdlib.h> | |
50 #ifdef _WIN32_WCE | |
51 #define assert(x) | |
52 #else | |
53 #include <assert.h> | |
54 #endif | |
55 | |
56 #include "cfft.h" | |
57 #include "mdct.h" | |
58 | |
59 /* const_tab[]: | |
60 0: sqrt(2 / N) | |
61 1: cos(2 * PI / N) | |
62 2: sin(2 * PI / N) | |
63 3: cos(2 * PI * (1/8) / N) | |
64 4: sin(2 * PI * (1/8) / N) | |
65 */ | |
12527 | 66 #ifdef FIXED_POINT |
10725 | 67 real_t const_tab[][5] = |
68 { | |
12527 | 69 { /* 2048 */ |
70 COEF_CONST(1), | |
71 FRAC_CONST(0.99999529380957619), | |
72 FRAC_CONST(0.0030679567629659761), | |
73 FRAC_CONST(0.99999992646571789), | |
74 FRAC_CONST(0.00038349518757139556) | |
75 }, { /* 1920 */ | |
76 COEF_CONST(/* sqrt(1024/960) */ 1.0327955589886444), | |
77 FRAC_CONST(0.99999464540169647), | |
78 FRAC_CONST(0.0032724865065266251), | |
79 FRAC_CONST(0.99999991633432805), | |
80 FRAC_CONST(0.00040906153202803459) | |
81 }, { /* 1024 */ | |
82 COEF_CONST(1), | |
83 FRAC_CONST(0.99998117528260111), | |
84 FRAC_CONST(0.0061358846491544753), | |
85 FRAC_CONST(0.99999970586288223), | |
86 FRAC_CONST(0.00076699031874270449) | |
87 }, { /* 960 */ | |
88 COEF_CONST(/* sqrt(512/480) */ 1.0327955589886444), | |
89 FRAC_CONST(0.99997858166412923), | |
90 FRAC_CONST(0.0065449379673518581), | |
91 FRAC_CONST(0.99999966533732598), | |
92 FRAC_CONST(0.00081812299560725323) | |
93 }, { /* 256 */ | |
94 COEF_CONST(1), | |
95 FRAC_CONST(0.99969881869620425), | |
96 FRAC_CONST(0.024541228522912288), | |
97 FRAC_CONST(0.99999529380957619), | |
98 FRAC_CONST(0.0030679567629659761) | |
99 }, { /* 240 */ | |
100 COEF_CONST(/* sqrt(256/240) */ 1.0327955589886444), | |
101 FRAC_CONST(0.99965732497555726), | |
102 FRAC_CONST(0.026176948307873149), | |
103 FRAC_CONST(0.99999464540169647), | |
104 FRAC_CONST(0.0032724865065266251) | |
105 } | |
10725 | 106 #ifdef SSR_DEC |
12527 | 107 ,{ /* 512 */ |
108 COEF_CONST(1), | |
109 FRAC_CONST(0.9999247018391445), | |
110 FRAC_CONST(0.012271538285719925), | |
111 FRAC_CONST(0.99999882345170188), | |
112 FRAC_CONST(0.0015339801862847655) | |
113 }, { /* 64 */ | |
114 COEF_CONST(1), | |
115 FRAC_CONST(0.99518472667219693), | |
116 FRAC_CONST(0.098017140329560604), | |
117 FRAC_CONST(0.9999247018391445), | |
118 FRAC_CONST(0.012271538285719925) | |
119 } | |
10725 | 120 #endif |
121 }; | |
122 #endif | |
123 | |
12527 | 124 #ifdef FIXED_POINT |
125 static uint8_t map_N_to_idx(uint16_t N) | |
10725 | 126 { |
10989 | 127 /* gives an index into const_tab above */ |
128 /* for normal AAC deocding (eg. no scalable profile) only */ | |
129 /* index 0 and 4 will be used */ | |
10725 | 130 switch(N) |
131 { | |
132 case 2048: return 0; | |
133 case 1920: return 1; | |
134 case 1024: return 2; | |
135 case 960: return 3; | |
136 case 256: return 4; | |
137 case 240: return 5; | |
138 #ifdef SSR_DEC | |
139 case 512: return 6; | |
140 case 64: return 7; | |
141 #endif | |
142 } | |
143 return 0; | |
144 } | |
12527 | 145 #endif |
10725 | 146 |
147 mdct_info *faad_mdct_init(uint16_t N) | |
148 { | |
12527 | 149 uint16_t k; |
150 #ifdef FIXED_POINT | |
151 uint16_t N_idx; | |
10725 | 152 real_t cangle, sangle, c, s, cold; |
12527 | 153 #endif |
10725 | 154 real_t scale; |
155 | |
12527 | 156 mdct_info *mdct = (mdct_info*)faad_malloc(sizeof(mdct_info)); |
10725 | 157 |
158 assert(N % 8 == 0); | |
159 | |
160 mdct->N = N; | |
12527 | 161 mdct->sincos = (complex_t*)faad_malloc(N/4*sizeof(complex_t)); |
10725 | 162 |
12527 | 163 #ifdef FIXED_POINT |
10725 | 164 N_idx = map_N_to_idx(N); |
165 | |
166 scale = const_tab[N_idx][0]; | |
167 cangle = const_tab[N_idx][1]; | |
168 sangle = const_tab[N_idx][2]; | |
169 c = const_tab[N_idx][3]; | |
170 s = const_tab[N_idx][4]; | |
12527 | 171 #else |
172 scale = (real_t)sqrt(2.0 / (real_t)N); | |
173 #endif | |
10725 | 174 |
10989 | 175 /* (co)sine table build using recurrence relations */ |
176 /* this can also be done using static table lookup or */ | |
177 /* some form of interpolation */ | |
10725 | 178 for (k = 0; k < N/4; k++) |
179 { | |
12527 | 180 #ifdef FIXED_POINT |
181 RE(mdct->sincos[k]) = c; //MUL_C_C(c,scale); | |
182 IM(mdct->sincos[k]) = s; //MUL_C_C(s,scale); | |
10725 | 183 |
184 cold = c; | |
12527 | 185 c = MUL_F(c,cangle) - MUL_F(s,sangle); |
186 s = MUL_F(s,cangle) + MUL_F(cold,sangle); | |
10989 | 187 #else |
188 /* no recurrence, just sines */ | |
12527 | 189 RE(mdct->sincos[k]) = scale*(real_t)(cos(2.0*M_PI*(k+1./8.) / (real_t)N)); |
190 IM(mdct->sincos[k]) = scale*(real_t)(sin(2.0*M_PI*(k+1./8.) / (real_t)N)); | |
10989 | 191 #endif |
10725 | 192 } |
193 | |
194 /* initialise fft */ | |
195 mdct->cfft = cffti(N/4); | |
196 | |
12527 | 197 #ifdef PROFILE |
198 mdct->cycles = 0; | |
199 mdct->fft_cycles = 0; | |
200 #endif | |
201 | |
10725 | 202 return mdct; |
203 } | |
204 | |
205 void faad_mdct_end(mdct_info *mdct) | |
206 { | |
207 if (mdct != NULL) | |
208 { | |
12527 | 209 #ifdef PROFILE |
210 printf("MDCT[%.4d]: %I64d cycles\n", mdct->N, mdct->cycles); | |
211 printf("CFFT[%.4d]: %I64d cycles\n", mdct->N/4, mdct->fft_cycles); | |
212 #endif | |
213 | |
10725 | 214 cfftu(mdct->cfft); |
215 | |
12527 | 216 if (mdct->sincos) faad_free(mdct->sincos); |
10725 | 217 |
12527 | 218 faad_free(mdct); |
10725 | 219 } |
220 } | |
221 | |
222 void faad_imdct(mdct_info *mdct, real_t *X_in, real_t *X_out) | |
223 { | |
224 uint16_t k; | |
225 | |
226 complex_t x; | |
12527 | 227 ALIGN complex_t Z1[512]; |
228 complex_t *sincos = mdct->sincos; | |
229 | |
230 uint16_t N = mdct->N; | |
231 uint16_t N2 = N >> 1; | |
232 uint16_t N4 = N >> 2; | |
233 uint16_t N8 = N >> 3; | |
234 | |
235 #ifdef PROFILE | |
236 int64_t count1, count2 = faad_get_ts(); | |
237 #endif | |
238 | |
239 /* pre-IFFT complex multiplication */ | |
240 for (k = 0; k < N4; k++) | |
241 { | |
242 ComplexMult(&IM(Z1[k]), &RE(Z1[k]), | |
243 X_in[2*k], X_in[N2 - 1 - 2*k], RE(sincos[k]), IM(sincos[k])); | |
244 } | |
245 | |
246 #ifdef PROFILE | |
247 count1 = faad_get_ts(); | |
248 #endif | |
249 | |
250 /* complex IFFT, any non-scaling FFT can be used here */ | |
251 cfftb(mdct->cfft, Z1); | |
252 | |
253 #ifdef PROFILE | |
254 count1 = faad_get_ts() - count1; | |
255 #endif | |
256 | |
257 /* post-IFFT complex multiplication */ | |
258 for (k = 0; k < N4; k++) | |
259 { | |
260 RE(x) = RE(Z1[k]); | |
261 IM(x) = IM(Z1[k]); | |
262 ComplexMult(&IM(Z1[k]), &RE(Z1[k]), | |
263 IM(x), RE(x), RE(sincos[k]), IM(sincos[k])); | |
264 } | |
265 | |
266 /* reordering */ | |
267 for (k = 0; k < N8; k+=2) | |
268 { | |
269 X_out[ 2*k] = IM(Z1[N8 + k]); | |
270 X_out[ 2 + 2*k] = IM(Z1[N8 + 1 + k]); | |
271 | |
272 X_out[ 1 + 2*k] = -RE(Z1[N8 - 1 - k]); | |
273 X_out[ 3 + 2*k] = -RE(Z1[N8 - 2 - k]); | |
274 | |
275 X_out[N4 + 2*k] = RE(Z1[ k]); | |
276 X_out[N4 + + 2 + 2*k] = RE(Z1[ 1 + k]); | |
277 | |
278 X_out[N4 + 1 + 2*k] = -IM(Z1[N4 - 1 - k]); | |
279 X_out[N4 + 3 + 2*k] = -IM(Z1[N4 - 2 - k]); | |
280 | |
281 X_out[N2 + 2*k] = RE(Z1[N8 + k]); | |
282 X_out[N2 + + 2 + 2*k] = RE(Z1[N8 + 1 + k]); | |
283 | |
284 X_out[N2 + 1 + 2*k] = -IM(Z1[N8 - 1 - k]); | |
285 X_out[N2 + 3 + 2*k] = -IM(Z1[N8 - 2 - k]); | |
286 | |
287 X_out[N2 + N4 + 2*k] = -IM(Z1[ k]); | |
288 X_out[N2 + N4 + 2 + 2*k] = -IM(Z1[ 1 + k]); | |
289 | |
290 X_out[N2 + N4 + 1 + 2*k] = RE(Z1[N4 - 1 - k]); | |
291 X_out[N2 + N4 + 3 + 2*k] = RE(Z1[N4 - 2 - k]); | |
292 } | |
293 | |
294 #ifdef PROFILE | |
295 count2 = faad_get_ts() - count2; | |
296 mdct->fft_cycles += count1; | |
297 mdct->cycles += (count2 - count1); | |
298 #endif | |
299 } | |
300 | |
301 #ifdef USE_SSE | |
302 void faad_imdct_sse(mdct_info *mdct, real_t *X_in, real_t *X_out) | |
303 { | |
304 uint16_t k; | |
305 | |
306 ALIGN complex_t Z1[512]; | |
10725 | 307 complex_t *sincos = mdct->sincos; |
308 | |
309 uint16_t N = mdct->N; | |
310 uint16_t N2 = N >> 1; | |
311 uint16_t N4 = N >> 2; | |
312 uint16_t N8 = N >> 3; | |
313 | |
12527 | 314 #ifdef PROFILE |
315 int64_t count1, count2 = faad_get_ts(); | |
316 #endif | |
317 | |
10725 | 318 /* pre-IFFT complex multiplication */ |
12527 | 319 for (k = 0; k < N4; k+=4) |
10725 | 320 { |
12527 | 321 __m128 m12, m13, m14, m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11; |
322 __m128 n12, n13, n14, n0, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11; | |
323 n12 = _mm_load_ps(&X_in[N2 - 2*k - 8]); | |
324 m12 = _mm_load_ps(&X_in[N2 - 2*k - 4]); | |
325 m13 = _mm_load_ps(&X_in[2*k]); | |
326 n13 = _mm_load_ps(&X_in[2*k + 4]); | |
327 m1 = _mm_load_ps(&RE(sincos[k])); | |
328 n1 = _mm_load_ps(&RE(sincos[k+2])); | |
329 | |
330 m0 = _mm_shuffle_ps(m12, m13, _MM_SHUFFLE(2,0,1,3)); | |
331 m2 = _mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,3,0,1)); | |
332 m14 = _mm_shuffle_ps(m0, m0, _MM_SHUFFLE(3,1,2,0)); | |
333 n0 = _mm_shuffle_ps(n12, n13, _MM_SHUFFLE(2,0,1,3)); | |
334 n2 = _mm_shuffle_ps(n1, n1, _MM_SHUFFLE(2,3,0,1)); | |
335 n14 = _mm_shuffle_ps(n0, n0, _MM_SHUFFLE(3,1,2,0)); | |
336 | |
337 m3 = _mm_mul_ps(m14, m1); | |
338 n3 = _mm_mul_ps(n14, n1); | |
339 m4 = _mm_mul_ps(m14, m2); | |
340 n4 = _mm_mul_ps(n14, n2); | |
341 | |
342 m5 = _mm_shuffle_ps(m3, m4, _MM_SHUFFLE(2,0,2,0)); | |
343 n5 = _mm_shuffle_ps(n3, n4, _MM_SHUFFLE(2,0,2,0)); | |
344 m6 = _mm_shuffle_ps(m3, m4, _MM_SHUFFLE(3,1,3,1)); | |
345 n6 = _mm_shuffle_ps(n3, n4, _MM_SHUFFLE(3,1,3,1)); | |
346 | |
347 m7 = _mm_add_ps(m5, m6); | |
348 n7 = _mm_add_ps(n5, n6); | |
349 m8 = _mm_sub_ps(m5, m6); | |
350 n8 = _mm_sub_ps(n5, n6); | |
351 | |
352 m9 = _mm_shuffle_ps(m7, m7, _MM_SHUFFLE(3,2,3,2)); | |
353 n9 = _mm_shuffle_ps(n7, n7, _MM_SHUFFLE(3,2,3,2)); | |
354 m10 = _mm_shuffle_ps(m8, m8, _MM_SHUFFLE(1,0,1,0)); | |
355 n10 = _mm_shuffle_ps(n8, n8, _MM_SHUFFLE(1,0,1,0)); | |
356 | |
357 m11 = _mm_unpacklo_ps(m10, m9); | |
358 n11 = _mm_unpacklo_ps(n10, n9); | |
359 | |
360 _mm_store_ps(&RE(Z1[k]), m11); | |
361 _mm_store_ps(&RE(Z1[k+2]), n11); | |
10725 | 362 } |
363 | |
12527 | 364 #ifdef PROFILE |
365 count1 = faad_get_ts(); | |
366 #endif | |
367 | |
10989 | 368 /* complex IFFT, any non-scaling FFT can be used here */ |
12527 | 369 cfftb_sse(mdct->cfft, Z1); |
370 | |
371 #ifdef PROFILE | |
372 count1 = faad_get_ts() - count1; | |
373 #endif | |
10725 | 374 |
375 /* post-IFFT complex multiplication */ | |
12527 | 376 for (k = 0; k < N4; k+=4) |
10725 | 377 { |
12527 | 378 __m128 m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11; |
379 __m128 n0, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11; | |
380 m0 = _mm_load_ps(&RE(Z1[k])); | |
381 n0 = _mm_load_ps(&RE(Z1[k+2])); | |
382 m1 = _mm_load_ps(&RE(sincos[k])); | |
383 n1 = _mm_load_ps(&RE(sincos[k+2])); | |
384 | |
385 m2 = _mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,3,0,1)); | |
386 n2 = _mm_shuffle_ps(n1, n1, _MM_SHUFFLE(2,3,0,1)); | |
387 | |
388 m3 = _mm_mul_ps(m0, m1); | |
389 n3 = _mm_mul_ps(n0, n1); | |
390 m4 = _mm_mul_ps(m0, m2); | |
391 n4 = _mm_mul_ps(n0, n2); | |
10725 | 392 |
12527 | 393 m5 = _mm_shuffle_ps(m3, m4, _MM_SHUFFLE(2,0,2,0)); |
394 n5 = _mm_shuffle_ps(n3, n4, _MM_SHUFFLE(2,0,2,0)); | |
395 m6 = _mm_shuffle_ps(m3, m4, _MM_SHUFFLE(3,1,3,1)); | |
396 n6 = _mm_shuffle_ps(n3, n4, _MM_SHUFFLE(3,1,3,1)); | |
397 | |
398 m7 = _mm_add_ps(m5, m6); | |
399 n7 = _mm_add_ps(n5, n6); | |
400 m8 = _mm_sub_ps(m5, m6); | |
401 n8 = _mm_sub_ps(n5, n6); | |
402 | |
403 m9 = _mm_shuffle_ps(m7, m7, _MM_SHUFFLE(3,2,3,2)); | |
404 n9 = _mm_shuffle_ps(n7, n7, _MM_SHUFFLE(3,2,3,2)); | |
405 m10 = _mm_shuffle_ps(m8, m8, _MM_SHUFFLE(1,0,1,0)); | |
406 n10 = _mm_shuffle_ps(n8, n8, _MM_SHUFFLE(1,0,1,0)); | |
407 | |
408 m11 = _mm_unpacklo_ps(m10, m9); | |
409 n11 = _mm_unpacklo_ps(n10, n9); | |
410 | |
411 _mm_store_ps(&RE(Z1[k]), m11); | |
412 _mm_store_ps(&RE(Z1[k+2]), n11); | |
10725 | 413 } |
414 | |
415 /* reordering */ | |
12527 | 416 for (k = 0; k < N8; k+=2) |
10725 | 417 { |
12527 | 418 __m128 m0, m1, m2, m3, m4, m5, m6, m7, m8, m9, m10, m11, m13; |
419 __m128 n4, n5, n6, n7, n8, n9; | |
420 __m128 neg1 = _mm_set_ps(-1.0, 1.0, -1.0, 1.0); | |
421 __m128 neg2 = _mm_set_ps(-1.0, -1.0, -1.0, -1.0); | |
422 | |
423 m0 = _mm_load_ps(&RE(Z1[k])); | |
424 m1 = _mm_load_ps(&RE(Z1[N8 - 2 - k])); | |
425 m2 = _mm_load_ps(&RE(Z1[N8 + k])); | |
426 m3 = _mm_load_ps(&RE(Z1[N4 - 2 - k])); | |
427 | |
428 m10 = _mm_mul_ps(m0, neg1); | |
429 m11 = _mm_mul_ps(m1, neg2); | |
430 m13 = _mm_mul_ps(m3, neg1); | |
431 | |
432 m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(3,1,2,0)); | |
433 n4 = _mm_shuffle_ps(m10, m10, _MM_SHUFFLE(3,1,2,0)); | |
434 m4 = _mm_shuffle_ps(m11, m11, _MM_SHUFFLE(3,1,2,0)); | |
435 n5 = _mm_shuffle_ps(m13, m13, _MM_SHUFFLE(3,1,2,0)); | |
436 | |
437 m6 = _mm_shuffle_ps(m4, m5, _MM_SHUFFLE(3,2,1,0)); | |
438 n6 = _mm_shuffle_ps(n4, n5, _MM_SHUFFLE(3,2,1,0)); | |
439 m7 = _mm_shuffle_ps(m5, m4, _MM_SHUFFLE(3,2,1,0)); | |
440 n7 = _mm_shuffle_ps(n5, n4, _MM_SHUFFLE(3,2,1,0)); | |
441 | |
442 m8 = _mm_shuffle_ps(m6, m6, _MM_SHUFFLE(0,3,1,2)); | |
443 n8 = _mm_shuffle_ps(n6, n6, _MM_SHUFFLE(2,1,3,0)); | |
444 m9 = _mm_shuffle_ps(m7, m7, _MM_SHUFFLE(2,1,3,0)); | |
445 n9 = _mm_shuffle_ps(n7, n7, _MM_SHUFFLE(0,3,1,2)); | |
446 | |
447 _mm_store_ps(&X_out[2*k], m8); | |
448 _mm_store_ps(&X_out[N4 + 2*k], n8); | |
449 _mm_store_ps(&X_out[N2 + 2*k], m9); | |
450 _mm_store_ps(&X_out[N2 + N4 + 2*k], n9); | |
10725 | 451 } |
12527 | 452 |
453 #ifdef PROFILE | |
454 count2 = faad_get_ts() - count2; | |
455 mdct->fft_cycles += count1; | |
456 mdct->cycles += (count2 - count1); | |
457 #endif | |
10725 | 458 } |
12527 | 459 #endif |
10725 | 460 |
461 #ifdef LTP_DEC | |
462 void faad_mdct(mdct_info *mdct, real_t *X_in, real_t *X_out) | |
463 { | |
464 uint16_t k; | |
465 | |
466 complex_t x; | |
12527 | 467 ALIGN complex_t Z1[512]; |
10725 | 468 complex_t *sincos = mdct->sincos; |
469 | |
470 uint16_t N = mdct->N; | |
471 uint16_t N2 = N >> 1; | |
472 uint16_t N4 = N >> 2; | |
473 uint16_t N8 = N >> 3; | |
474 | |
12527 | 475 #ifndef FIXED_POINT |
10725 | 476 real_t scale = REAL_CONST(N); |
12527 | 477 #else |
478 real_t scale = REAL_CONST(4.0/N); | |
479 #endif | |
10725 | 480 |
481 /* pre-FFT complex multiplication */ | |
482 for (k = 0; k < N8; k++) | |
483 { | |
484 uint16_t n = k << 1; | |
485 RE(x) = X_in[N - N4 - 1 - n] + X_in[N - N4 + n]; | |
486 IM(x) = X_in[ N4 + n] - X_in[ N4 - 1 - n]; | |
487 | |
12527 | 488 ComplexMult(&RE(Z1[k]), &IM(Z1[k]), |
489 RE(x), IM(x), RE(sincos[k]), IM(sincos[k])); | |
490 | |
491 RE(Z1[k]) = MUL_R(RE(Z1[k]), scale); | |
492 IM(Z1[k]) = MUL_R(IM(Z1[k]), scale); | |
10725 | 493 |
494 RE(x) = X_in[N2 - 1 - n] - X_in[ n]; | |
495 IM(x) = X_in[N2 + n] + X_in[N - 1 - n]; | |
496 | |
12527 | 497 ComplexMult(&RE(Z1[k + N8]), &IM(Z1[k + N8]), |
498 RE(x), IM(x), RE(sincos[k + N8]), IM(sincos[k + N8])); | |
499 | |
500 RE(Z1[k + N8]) = MUL_R(RE(Z1[k + N8]), scale); | |
501 IM(Z1[k + N8]) = MUL_R(IM(Z1[k + N8]), scale); | |
10725 | 502 } |
503 | |
10989 | 504 /* complex FFT, any non-scaling FFT can be used here */ |
10725 | 505 cfftf(mdct->cfft, Z1); |
506 | |
507 /* post-FFT complex multiplication */ | |
508 for (k = 0; k < N4; k++) | |
509 { | |
510 uint16_t n = k << 1; | |
12527 | 511 ComplexMult(&RE(x), &IM(x), |
512 RE(Z1[k]), IM(Z1[k]), RE(sincos[k]), IM(sincos[k])); | |
10725 | 513 |
12527 | 514 X_out[ n] = -RE(x); |
515 X_out[N2 - 1 - n] = IM(x); | |
516 X_out[N2 + n] = -IM(x); | |
517 X_out[N - 1 - n] = RE(x); | |
10725 | 518 } |
519 } | |
520 #endif |