comparison libac3/imdct.c @ 0:986e461dc072 libavcodec

Initial revision
author glantau
date Sun, 22 Jul 2001 14:18:56 +0000
parents
children 5aa6292a1660
comparison
equal deleted inserted replaced
-1:000000000000 0:986e461dc072
1 /*
2 * imdct.c
3 *
4 * Copyright (C) Aaron Holtzman - May 1999
5 *
6 * This file is part of ac3dec, a free Dolby AC-3 stream decoder.
7 *
8 * ac3dec is free software; you can redistribute it and/or modify
9 * it under the terms of the GNU General Public License as published by
10 * the Free Software Foundation; either version 2, or (at your option)
11 * any later version.
12 *
13 * ac3dec is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 * GNU General Public License for more details.
17 *
18 * You should have received a copy of the GNU General Public License
19 * along with GNU Make; see the file COPYING. If not, write to
20 * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
21 *
22 *
23 */
24
25 //#include "config.h"
26
27 #include <inttypes.h>
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <math.h>
31 #include "ac3.h"
32 #include "ac3_internal.h"
33
34 void (* imdct_256) (float data[], float delay[]);
35 void (* imdct_512) (float data[], float delay[]);
36
37 typedef struct complex_s
38 {
39 float real;
40 float imag;
41 } complex_t;
42
43
44 /* 128 point bit-reverse LUT */
45 static uint8_t bit_reverse_512[] = {
46 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
47 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
48 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
49 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
50 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
51 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
52 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
53 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
54 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
55 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
56 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
57 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
58 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
59 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
60 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
61 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
62
63 static uint8_t bit_reverse_256[] = {
64 0x00, 0x20, 0x10, 0x30, 0x08, 0x28, 0x18, 0x38,
65 0x04, 0x24, 0x14, 0x34, 0x0c, 0x2c, 0x1c, 0x3c,
66 0x02, 0x22, 0x12, 0x32, 0x0a, 0x2a, 0x1a, 0x3a,
67 0x06, 0x26, 0x16, 0x36, 0x0e, 0x2e, 0x1e, 0x3e,
68 0x01, 0x21, 0x11, 0x31, 0x09, 0x29, 0x19, 0x39,
69 0x05, 0x25, 0x15, 0x35, 0x0d, 0x2d, 0x1d, 0x3d,
70 0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b,
71 0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f};
72
73 static complex_t buf[128];
74
75 /* Twiddle factor LUT */
76 static complex_t w_1[1];
77 static complex_t w_2[2];
78 static complex_t w_4[4];
79 static complex_t w_8[8];
80 static complex_t w_16[16];
81 static complex_t w_32[32];
82 static complex_t w_64[64];
83 static complex_t * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64};
84
85 /* Twiddle factors for IMDCT */
86 static float xcos1[128];
87 static float xsin1[128];
88 static float xcos2[64];
89 static float xsin2[64];
90
91 /* Windowing function for Modified DCT - Thank you acroread */
92 float imdct_window[] = {
93 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
94 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
95 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
96 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
97 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
98 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
99 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
100 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
101 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
102 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
103 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
104 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
105 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
106 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
107 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
108 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
109 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
110 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
111 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
112 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
113 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
114 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
115 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
116 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
117 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
118 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
119 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
120 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
121 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
122 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
123 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
124 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 };
125
126
127 static inline void swap_cmplx(complex_t *a, complex_t *b)
128 {
129 complex_t tmp;
130
131 tmp = *a;
132 *a = *b;
133 *b = tmp;
134 }
135
136
137
138 static inline complex_t cmplx_mult(complex_t a, complex_t b)
139 {
140 complex_t ret;
141
142 ret.real = a.real * b.real - a.imag * b.imag;
143 ret.imag = a.real * b.imag + a.imag * b.real;
144
145 return ret;
146 }
147
148 void
149 imdct_do_512(float data[],float delay[])
150 {
151 int i,k;
152 int p,q;
153 int m;
154 int two_m;
155 int two_m_plus_one;
156
157 float tmp_a_i;
158 float tmp_a_r;
159 float tmp_b_i;
160 float tmp_b_r;
161
162 float *data_ptr;
163 float *delay_ptr;
164 float *window_ptr;
165
166 //
167 // 512 IMDCT with source and dest data in 'data'
168 //
169
170 // Pre IFFT complex multiply plus IFFT cmplx conjugate
171 for( i=0; i < 128; i++) {
172 /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
173 buf[i].real = (data[256-2*i-1] * xcos1[i]) - (data[2*i] * xsin1[i]);
174 buf[i].imag = -1.0 * ((data[2*i] * xcos1[i]) + (data[256-2*i-1] * xsin1[i]));
175 }
176
177 //Bit reversed shuffling
178 for(i=0; i<128; i++) {
179 k = bit_reverse_512[i];
180 if (k < i)
181 swap_cmplx(&buf[i],&buf[k]);
182 }
183
184 /* FFT Merge */
185 for (m=0; m < 7; m++) {
186 if(m)
187 two_m = (1 << m);
188 else
189 two_m = 1;
190
191 two_m_plus_one = (1 << (m+1));
192
193 for(k = 0; k < two_m; k++) {
194 for(i = 0; i < 128; i += two_m_plus_one) {
195 p = k + i;
196 q = p + two_m;
197 tmp_a_r = buf[p].real;
198 tmp_a_i = buf[p].imag;
199 tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
200 tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
201 buf[p].real = tmp_a_r + tmp_b_r;
202 buf[p].imag = tmp_a_i + tmp_b_i;
203 buf[q].real = tmp_a_r - tmp_b_r;
204 buf[q].imag = tmp_a_i - tmp_b_i;
205 }
206 }
207 }
208
209 /* Post IFFT complex multiply plus IFFT complex conjugate*/
210 for( i=0; i < 128; i++) {
211 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
212 tmp_a_r = buf[i].real;
213 tmp_a_i = -1.0 * buf[i].imag;
214 buf[i].real =(tmp_a_r * xcos1[i]) - (tmp_a_i * xsin1[i]);
215 buf[i].imag =(tmp_a_r * xsin1[i]) + (tmp_a_i * xcos1[i]);
216 }
217
218 data_ptr = data;
219 delay_ptr = delay;
220 window_ptr = imdct_window;
221
222 /* Window and convert to real valued signal */
223 for(i=0; i< 64; i++) {
224 *data_ptr++ = 2.0f * (-buf[64+i].imag * *window_ptr++ + *delay_ptr++);
225 *data_ptr++ = 2.0f * ( buf[64-i-1].real * *window_ptr++ + *delay_ptr++);
226 }
227
228 for(i=0; i< 64; i++) {
229 *data_ptr++ = 2.0f * (-buf[i].real * *window_ptr++ + *delay_ptr++);
230 *data_ptr++ = 2.0f * ( buf[128-i-1].imag * *window_ptr++ + *delay_ptr++);
231 }
232
233 /* The trailing edge of the window goes into the delay line */
234 delay_ptr = delay;
235
236 for(i=0; i< 64; i++) {
237 *delay_ptr++ = -buf[64+i].real * *--window_ptr;
238 *delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
239 }
240
241 for(i=0; i<64; i++) {
242 *delay_ptr++ = buf[i].imag * *--window_ptr;
243 *delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
244 }
245 }
246
247 void
248 imdct_do_256(float data[],float delay[])
249 {
250 int i,k;
251 int p,q;
252 int m;
253 int two_m;
254 int two_m_plus_one;
255
256 float tmp_a_i;
257 float tmp_a_r;
258 float tmp_b_i;
259 float tmp_b_r;
260
261 float *data_ptr;
262 float *delay_ptr;
263 float *window_ptr;
264
265 complex_t *buf_1, *buf_2;
266
267 buf_1 = &buf[0];
268 buf_2 = &buf[64];
269
270 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
271 for(k=0; k<64; k++) {
272 /* X1[k] = X[2*k] */
273 /* X2[k] = X[2*k+1] */
274
275 p = 2 * (128-2*k-1);
276 q = 2 * (2 * k);
277
278 /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
279 buf_1[k].real = data[p] * xcos2[k] - data[q] * xsin2[k];
280 buf_1[k].imag = -1.0f * (data[q] * xcos2[k] + data[p] * xsin2[k]);
281 /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
282 buf_2[k].real = data[p + 1] * xcos2[k] - data[q + 1] * xsin2[k];
283 buf_2[k].imag = -1.0f * ( data[q + 1] * xcos2[k] + data[p + 1] * xsin2[k]);
284 }
285
286 //IFFT Bit reversed shuffling
287 for(i=0; i<64; i++) {
288 k = bit_reverse_256[i];
289 if (k < i) {
290 swap_cmplx(&buf_1[i],&buf_1[k]);
291 swap_cmplx(&buf_2[i],&buf_2[k]);
292 }
293 }
294
295 /* FFT Merge */
296 for (m=0; m < 6; m++) {
297 two_m = (1 << m);
298 two_m_plus_one = (1 << (m+1));
299
300 //FIXME
301 if(m)
302 two_m = (1 << m);
303 else
304 two_m = 1;
305
306 for(k = 0; k < two_m; k++) {
307 for(i = 0; i < 64; i += two_m_plus_one) {
308 p = k + i;
309 q = p + two_m;
310 //Do block 1
311 tmp_a_r = buf_1[p].real;
312 tmp_a_i = buf_1[p].imag;
313 tmp_b_r = buf_1[q].real * w[m][k].real - buf_1[q].imag * w[m][k].imag;
314 tmp_b_i = buf_1[q].imag * w[m][k].real + buf_1[q].real * w[m][k].imag;
315 buf_1[p].real = tmp_a_r + tmp_b_r;
316 buf_1[p].imag = tmp_a_i + tmp_b_i;
317 buf_1[q].real = tmp_a_r - tmp_b_r;
318 buf_1[q].imag = tmp_a_i - tmp_b_i;
319
320 //Do block 2
321 tmp_a_r = buf_2[p].real;
322 tmp_a_i = buf_2[p].imag;
323 tmp_b_r = buf_2[q].real * w[m][k].real - buf_2[q].imag * w[m][k].imag;
324 tmp_b_i = buf_2[q].imag * w[m][k].real + buf_2[q].real * w[m][k].imag;
325 buf_2[p].real = tmp_a_r + tmp_b_r;
326 buf_2[p].imag = tmp_a_i + tmp_b_i;
327 buf_2[q].real = tmp_a_r - tmp_b_r;
328 buf_2[q].imag = tmp_a_i - tmp_b_i;
329 }
330 }
331 }
332
333 /* Post IFFT complex multiply */
334 for( i=0; i < 64; i++) {
335 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
336 tmp_a_r = buf_1[i].real;
337 tmp_a_i = -buf_1[i].imag;
338 buf_1[i].real =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
339 buf_1[i].imag =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
340 /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
341 tmp_a_r = buf_2[i].real;
342 tmp_a_i = -buf_2[i].imag;
343 buf_2[i].real =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
344 buf_2[i].imag =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
345 }
346
347 data_ptr = data;
348 delay_ptr = delay;
349 window_ptr = imdct_window;
350
351 /* Window and convert to real valued signal */
352 for(i=0; i< 64; i++) {
353 *data_ptr++ = 2.0f * (-buf_1[i].imag * *window_ptr++ + *delay_ptr++);
354 *data_ptr++ = 2.0f * ( buf_1[64-i-1].real * *window_ptr++ + *delay_ptr++);
355 }
356
357 for(i=0; i< 64; i++) {
358 *data_ptr++ = 2.0f * (-buf_1[i].real * *window_ptr++ + *delay_ptr++);
359 *data_ptr++ = 2.0f * ( buf_1[64-i-1].imag * *window_ptr++ + *delay_ptr++);
360 }
361
362 delay_ptr = delay;
363
364 for(i=0; i< 64; i++) {
365 *delay_ptr++ = -buf_2[i].real * *--window_ptr;
366 *delay_ptr++ = buf_2[64-i-1].imag * *--window_ptr;
367 }
368
369 for(i=0; i< 64; i++) {
370 *delay_ptr++ = buf_2[i].imag * *--window_ptr;
371 *delay_ptr++ = -buf_2[64-i-1].real * *--window_ptr;
372 }
373 }
374
375 void imdct_init (void)
376 {
377 #ifdef LIBAC3_MLIB
378 void imdct_do_256_mlib(float data[],float delay[]);
379 void imdct_do_512_mlib(float data[],float delay[]);
380
381 imdct_512 = imdct_do_512_mlib;
382 imdct_256 = imdct_do_256_mlib;
383 #else
384 int i, j, k;
385
386 /* Twiddle factors to turn IFFT into IMDCT */
387 for (i = 0; i < 128; i++) {
388 xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
389 xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
390 }
391
392 /* More twiddle factors to turn IFFT into IMDCT */
393 for (i = 0; i < 64; i++) {
394 xcos2[i] = -cos ((M_PI / 1024) * (8 * i + 1));
395 xsin2[i] = -sin ((M_PI / 1024) * (8 * i + 1));
396 }
397
398 for (i = 0; i < 7; i++) {
399 j = 1 << i;
400 for (k = 0; k < j; k++) {
401 w[i][k].real = cos (-M_PI * k / j);
402 w[i][k].imag = sin (-M_PI * k / j);
403 }
404 }
405 imdct_512 = imdct_do_512;
406 imdct_256 = imdct_do_256;
407 #endif
408 }