comparison liba52/imdct.c @ 3394:35b18ed357c2

imported from liba52 CVS
author arpi
date Sun, 09 Dec 2001 15:28:44 +0000
parents
children b5220cf63fc3
comparison
equal deleted inserted replaced
3393:3624cd351618 3394:35b18ed357c2
1 /*
2 * imdct.c
3 * Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org>
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
5 *
6 * This file is part of a52dec, a free ATSC A-52 stream decoder.
7 * See http://liba52.sourceforge.net/ for updates.
8 *
9 * a52dec is free software; you can redistribute it and/or modify
10 * it under the terms of the GNU General Public License as published by
11 * the Free Software Foundation; either version 2 of the License, or
12 * (at your option) any later version.
13 *
14 * a52dec is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 * GNU General Public License for more details.
18 *
19 * You should have received a copy of the GNU General Public License
20 * along with this program; if not, write to the Free Software
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22 */
23
24 #include "config.h"
25
26 #include <math.h>
27 #include <stdio.h>
28 #ifndef M_PI
29 #define M_PI 3.1415926535897932384626433832795029
30 #endif
31 #include <inttypes.h>
32
33 #include "a52.h"
34 #include "a52_internal.h"
35 #include "mm_accel.h"
36
37 void (* imdct_256) (sample_t data[], sample_t delay[], sample_t bias);
38 void (* imdct_512) (sample_t data[], sample_t delay[], sample_t bias);
39
40 typedef struct complex_s {
41 sample_t real;
42 sample_t imag;
43 } complex_t;
44
45
46 /* 128 point bit-reverse LUT */
47 static uint8_t bit_reverse_512[] = {
48 0x00, 0x40, 0x20, 0x60, 0x10, 0x50, 0x30, 0x70,
49 0x08, 0x48, 0x28, 0x68, 0x18, 0x58, 0x38, 0x78,
50 0x04, 0x44, 0x24, 0x64, 0x14, 0x54, 0x34, 0x74,
51 0x0c, 0x4c, 0x2c, 0x6c, 0x1c, 0x5c, 0x3c, 0x7c,
52 0x02, 0x42, 0x22, 0x62, 0x12, 0x52, 0x32, 0x72,
53 0x0a, 0x4a, 0x2a, 0x6a, 0x1a, 0x5a, 0x3a, 0x7a,
54 0x06, 0x46, 0x26, 0x66, 0x16, 0x56, 0x36, 0x76,
55 0x0e, 0x4e, 0x2e, 0x6e, 0x1e, 0x5e, 0x3e, 0x7e,
56 0x01, 0x41, 0x21, 0x61, 0x11, 0x51, 0x31, 0x71,
57 0x09, 0x49, 0x29, 0x69, 0x19, 0x59, 0x39, 0x79,
58 0x05, 0x45, 0x25, 0x65, 0x15, 0x55, 0x35, 0x75,
59 0x0d, 0x4d, 0x2d, 0x6d, 0x1d, 0x5d, 0x3d, 0x7d,
60 0x03, 0x43, 0x23, 0x63, 0x13, 0x53, 0x33, 0x73,
61 0x0b, 0x4b, 0x2b, 0x6b, 0x1b, 0x5b, 0x3b, 0x7b,
62 0x07, 0x47, 0x27, 0x67, 0x17, 0x57, 0x37, 0x77,
63 0x0f, 0x4f, 0x2f, 0x6f, 0x1f, 0x5f, 0x3f, 0x7f};
64
65 static uint8_t bit_reverse_256[] = {
66 0x00, 0x20, 0x10, 0x30, 0x08, 0x28, 0x18, 0x38,
67 0x04, 0x24, 0x14, 0x34, 0x0c, 0x2c, 0x1c, 0x3c,
68 0x02, 0x22, 0x12, 0x32, 0x0a, 0x2a, 0x1a, 0x3a,
69 0x06, 0x26, 0x16, 0x36, 0x0e, 0x2e, 0x1e, 0x3e,
70 0x01, 0x21, 0x11, 0x31, 0x09, 0x29, 0x19, 0x39,
71 0x05, 0x25, 0x15, 0x35, 0x0d, 0x2d, 0x1d, 0x3d,
72 0x03, 0x23, 0x13, 0x33, 0x0b, 0x2b, 0x1b, 0x3b,
73 0x07, 0x27, 0x17, 0x37, 0x0f, 0x2f, 0x1f, 0x3f};
74
75 static complex_t buf[128];
76
77 /* Twiddle factor LUT */
78 static complex_t w_1[1];
79 static complex_t w_2[2];
80 static complex_t w_4[4];
81 static complex_t w_8[8];
82 static complex_t w_16[16];
83 static complex_t w_32[32];
84 static complex_t w_64[64];
85 static complex_t * w[7] = {w_1, w_2, w_4, w_8, w_16, w_32, w_64};
86
87 /* Twiddle factors for IMDCT */
88 static sample_t xcos1[128];
89 static sample_t xsin1[128];
90 static sample_t xcos2[64];
91 static sample_t xsin2[64];
92
93 /* Windowing function for Modified DCT - Thank you acroread */
94 sample_t imdct_window[] = {
95 0.00014, 0.00024, 0.00037, 0.00051, 0.00067, 0.00086, 0.00107, 0.00130,
96 0.00157, 0.00187, 0.00220, 0.00256, 0.00297, 0.00341, 0.00390, 0.00443,
97 0.00501, 0.00564, 0.00632, 0.00706, 0.00785, 0.00871, 0.00962, 0.01061,
98 0.01166, 0.01279, 0.01399, 0.01526, 0.01662, 0.01806, 0.01959, 0.02121,
99 0.02292, 0.02472, 0.02662, 0.02863, 0.03073, 0.03294, 0.03527, 0.03770,
100 0.04025, 0.04292, 0.04571, 0.04862, 0.05165, 0.05481, 0.05810, 0.06153,
101 0.06508, 0.06878, 0.07261, 0.07658, 0.08069, 0.08495, 0.08935, 0.09389,
102 0.09859, 0.10343, 0.10842, 0.11356, 0.11885, 0.12429, 0.12988, 0.13563,
103 0.14152, 0.14757, 0.15376, 0.16011, 0.16661, 0.17325, 0.18005, 0.18699,
104 0.19407, 0.20130, 0.20867, 0.21618, 0.22382, 0.23161, 0.23952, 0.24757,
105 0.25574, 0.26404, 0.27246, 0.28100, 0.28965, 0.29841, 0.30729, 0.31626,
106 0.32533, 0.33450, 0.34376, 0.35311, 0.36253, 0.37204, 0.38161, 0.39126,
107 0.40096, 0.41072, 0.42054, 0.43040, 0.44030, 0.45023, 0.46020, 0.47019,
108 0.48020, 0.49022, 0.50025, 0.51028, 0.52031, 0.53033, 0.54033, 0.55031,
109 0.56026, 0.57019, 0.58007, 0.58991, 0.59970, 0.60944, 0.61912, 0.62873,
110 0.63827, 0.64774, 0.65713, 0.66643, 0.67564, 0.68476, 0.69377, 0.70269,
111 0.71150, 0.72019, 0.72877, 0.73723, 0.74557, 0.75378, 0.76186, 0.76981,
112 0.77762, 0.78530, 0.79283, 0.80022, 0.80747, 0.81457, 0.82151, 0.82831,
113 0.83496, 0.84145, 0.84779, 0.85398, 0.86001, 0.86588, 0.87160, 0.87716,
114 0.88257, 0.88782, 0.89291, 0.89785, 0.90264, 0.90728, 0.91176, 0.91610,
115 0.92028, 0.92432, 0.92822, 0.93197, 0.93558, 0.93906, 0.94240, 0.94560,
116 0.94867, 0.95162, 0.95444, 0.95713, 0.95971, 0.96217, 0.96451, 0.96674,
117 0.96887, 0.97089, 0.97281, 0.97463, 0.97635, 0.97799, 0.97953, 0.98099,
118 0.98236, 0.98366, 0.98488, 0.98602, 0.98710, 0.98811, 0.98905, 0.98994,
119 0.99076, 0.99153, 0.99225, 0.99291, 0.99353, 0.99411, 0.99464, 0.99513,
120 0.99558, 0.99600, 0.99639, 0.99674, 0.99706, 0.99736, 0.99763, 0.99788,
121 0.99811, 0.99831, 0.99850, 0.99867, 0.99882, 0.99895, 0.99908, 0.99919,
122 0.99929, 0.99938, 0.99946, 0.99953, 0.99959, 0.99965, 0.99969, 0.99974,
123 0.99978, 0.99981, 0.99984, 0.99986, 0.99988, 0.99990, 0.99992, 0.99993,
124 0.99994, 0.99995, 0.99996, 0.99997, 0.99998, 0.99998, 0.99998, 0.99999,
125 0.99999, 0.99999, 0.99999, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000,
126 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000, 1.00000 };
127
128
129 static inline void swap_cmplx(complex_t *a, complex_t *b)
130 {
131 complex_t tmp;
132
133 tmp = *a;
134 *a = *b;
135 *b = tmp;
136 }
137
138
139
140 static inline complex_t cmplx_mult(complex_t a, complex_t b)
141 {
142 complex_t ret;
143
144 ret.real = a.real * b.real - a.imag * b.imag;
145 ret.imag = a.real * b.imag + a.imag * b.real;
146
147 return ret;
148 }
149
150 void
151 imdct_do_512(sample_t data[],sample_t delay[], sample_t bias)
152 {
153 int i,k;
154 int p,q;
155 int m;
156 int two_m;
157 int two_m_plus_one;
158
159 sample_t tmp_a_i;
160 sample_t tmp_a_r;
161 sample_t tmp_b_i;
162 sample_t tmp_b_r;
163
164 sample_t *data_ptr;
165 sample_t *delay_ptr;
166 sample_t *window_ptr;
167
168 /* 512 IMDCT with source and dest data in 'data' */
169
170 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
171 for( i=0; i < 128; i++) {
172 /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
173 buf[i].real = (data[256-2*i-1] * xcos1[i]) - (data[2*i] * xsin1[i]);
174 buf[i].imag = -1.0 * ((data[2*i] * xcos1[i]) + (data[256-2*i-1] * xsin1[i]));
175 }
176
177 /* Bit reversed shuffling */
178 for(i=0; i<128; i++) {
179 k = bit_reverse_512[i];
180 if (k < i)
181 swap_cmplx(&buf[i],&buf[k]);
182 }
183
184 /* FFT Merge */
185 for (m=0; m < 7; m++) {
186 if(m)
187 two_m = (1 << m);
188 else
189 two_m = 1;
190
191 two_m_plus_one = (1 << (m+1));
192
193 for(k = 0; k < two_m; k++) {
194 for(i = 0; i < 128; i += two_m_plus_one) {
195 p = k + i;
196 q = p + two_m;
197 tmp_a_r = buf[p].real;
198 tmp_a_i = buf[p].imag;
199 tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
200 tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
201 buf[p].real = tmp_a_r + tmp_b_r;
202 buf[p].imag = tmp_a_i + tmp_b_i;
203 buf[q].real = tmp_a_r - tmp_b_r;
204 buf[q].imag = tmp_a_i - tmp_b_i;
205 }
206 }
207 }
208
209 /* Post IFFT complex multiply plus IFFT complex conjugate*/
210 for( i=0; i < 128; i++) {
211 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
212 tmp_a_r = buf[i].real;
213 tmp_a_i = -1.0 * buf[i].imag;
214 buf[i].real =(tmp_a_r * xcos1[i]) - (tmp_a_i * xsin1[i]);
215 buf[i].imag =(tmp_a_r * xsin1[i]) + (tmp_a_i * xcos1[i]);
216 }
217
218 data_ptr = data;
219 delay_ptr = delay;
220 window_ptr = imdct_window;
221
222 /* Window and convert to real valued signal */
223 for(i=0; i< 64; i++) {
224 *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias;
225 *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
226 }
227
228 for(i=0; i< 64; i++) {
229 *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias;
230 *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
231 }
232
233 /* The trailing edge of the window goes into the delay line */
234 delay_ptr = delay;
235
236 for(i=0; i< 64; i++) {
237 *delay_ptr++ = -buf[64+i].real * *--window_ptr;
238 *delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
239 }
240
241 for(i=0; i<64; i++) {
242 *delay_ptr++ = buf[i].imag * *--window_ptr;
243 *delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
244 }
245 }
246
247 void
248 imdct_do_256(sample_t data[],sample_t delay[],sample_t bias)
249 {
250 int i,k;
251 int p,q;
252 int m;
253 int two_m;
254 int two_m_plus_one;
255
256 sample_t tmp_a_i;
257 sample_t tmp_a_r;
258 sample_t tmp_b_i;
259 sample_t tmp_b_r;
260
261 sample_t *data_ptr;
262 sample_t *delay_ptr;
263 sample_t *window_ptr;
264
265 complex_t *buf_1, *buf_2;
266
267 buf_1 = &buf[0];
268 buf_2 = &buf[64];
269
270 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */
271 for(k=0; k<64; k++) {
272 /* X1[k] = X[2*k] */
273 /* X2[k] = X[2*k+1] */
274
275 p = 2 * (128-2*k-1);
276 q = 2 * (2 * k);
277
278 /* Z1[k] = (X1[128-2*k-1] + j * X1[2*k]) * (xcos2[k] + j * xsin2[k]); */
279 buf_1[k].real = data[p] * xcos2[k] - data[q] * xsin2[k];
280 buf_1[k].imag = -1.0f * (data[q] * xcos2[k] + data[p] * xsin2[k]);
281 /* Z2[k] = (X2[128-2*k-1] + j * X2[2*k]) * (xcos2[k] + j * xsin2[k]); */
282 buf_2[k].real = data[p + 1] * xcos2[k] - data[q + 1] * xsin2[k];
283 buf_2[k].imag = -1.0f * ( data[q + 1] * xcos2[k] + data[p + 1] * xsin2[k]);
284 }
285
286 /* IFFT Bit reversed shuffling */
287 for(i=0; i<64; i++) {
288 k = bit_reverse_256[i];
289 if (k < i) {
290 swap_cmplx(&buf_1[i],&buf_1[k]);
291 swap_cmplx(&buf_2[i],&buf_2[k]);
292 }
293 }
294
295 /* FFT Merge */
296 for (m=0; m < 6; m++) {
297 two_m = (1 << m);
298 two_m_plus_one = (1 << (m+1));
299
300 /* FIXME */
301 if(m)
302 two_m = (1 << m);
303 else
304 two_m = 1;
305
306 for(k = 0; k < two_m; k++) {
307 for(i = 0; i < 64; i += two_m_plus_one) {
308 p = k + i;
309 q = p + two_m;
310 /* Do block 1 */
311 tmp_a_r = buf_1[p].real;
312 tmp_a_i = buf_1[p].imag;
313 tmp_b_r = buf_1[q].real * w[m][k].real - buf_1[q].imag * w[m][k].imag;
314 tmp_b_i = buf_1[q].imag * w[m][k].real + buf_1[q].real * w[m][k].imag;
315 buf_1[p].real = tmp_a_r + tmp_b_r;
316 buf_1[p].imag = tmp_a_i + tmp_b_i;
317 buf_1[q].real = tmp_a_r - tmp_b_r;
318 buf_1[q].imag = tmp_a_i - tmp_b_i;
319
320 /* Do block 2 */
321 tmp_a_r = buf_2[p].real;
322 tmp_a_i = buf_2[p].imag;
323 tmp_b_r = buf_2[q].real * w[m][k].real - buf_2[q].imag * w[m][k].imag;
324 tmp_b_i = buf_2[q].imag * w[m][k].real + buf_2[q].real * w[m][k].imag;
325 buf_2[p].real = tmp_a_r + tmp_b_r;
326 buf_2[p].imag = tmp_a_i + tmp_b_i;
327 buf_2[q].real = tmp_a_r - tmp_b_r;
328 buf_2[q].imag = tmp_a_i - tmp_b_i;
329 }
330 }
331 }
332
333 /* Post IFFT complex multiply */
334 for( i=0; i < 64; i++) {
335 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */
336 tmp_a_r = buf_1[i].real;
337 tmp_a_i = -buf_1[i].imag;
338 buf_1[i].real =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
339 buf_1[i].imag =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
340 /* y2[n] = z2[n] * (xcos2[n] + j * xsin2[n]) ; */
341 tmp_a_r = buf_2[i].real;
342 tmp_a_i = -buf_2[i].imag;
343 buf_2[i].real =(tmp_a_r * xcos2[i]) - (tmp_a_i * xsin2[i]);
344 buf_2[i].imag =(tmp_a_r * xsin2[i]) + (tmp_a_i * xcos2[i]);
345 }
346
347 data_ptr = data;
348 delay_ptr = delay;
349 window_ptr = imdct_window;
350
351 /* Window and convert to real valued signal */
352 for(i=0; i< 64; i++) {
353 *data_ptr++ = -buf_1[i].imag * *window_ptr++ + *delay_ptr++ + bias;
354 *data_ptr++ = buf_1[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
355 }
356
357 for(i=0; i< 64; i++) {
358 *data_ptr++ = -buf_1[i].real * *window_ptr++ + *delay_ptr++ + bias;
359 *data_ptr++ = buf_1[64-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
360 }
361
362 delay_ptr = delay;
363
364 for(i=0; i< 64; i++) {
365 *delay_ptr++ = -buf_2[i].real * *--window_ptr;
366 *delay_ptr++ = buf_2[64-i-1].imag * *--window_ptr;
367 }
368
369 for(i=0; i< 64; i++) {
370 *delay_ptr++ = buf_2[i].imag * *--window_ptr;
371 *delay_ptr++ = -buf_2[64-i-1].real * *--window_ptr;
372 }
373 }
374
375 void imdct_init (uint32_t mm_accel)
376 {
377 #ifdef LIBA52_MLIB
378 if (mm_accel & MM_ACCEL_MLIB) {
379 fprintf (stderr, "Using mlib for IMDCT transform\n");
380 imdct_512 = imdct_do_512_mlib;
381 imdct_256 = imdct_do_256_mlib;
382 } else
383 #endif
384 {
385 int i, j, k;
386
387 fprintf (stderr, "No accelerated IMDCT transform found\n");
388
389 /* Twiddle factors to turn IFFT into IMDCT */
390 for (i = 0; i < 128; i++) {
391 xcos1[i] = -cos ((M_PI / 2048) * (8 * i + 1));
392 xsin1[i] = -sin ((M_PI / 2048) * (8 * i + 1));
393 }
394
395 /* More twiddle factors to turn IFFT into IMDCT */
396 for (i = 0; i < 64; i++) {
397 xcos2[i] = -cos ((M_PI / 1024) * (8 * i + 1));
398 xsin2[i] = -sin ((M_PI / 1024) * (8 * i + 1));
399 }
400
401 for (i = 0; i < 7; i++) {
402 j = 1 << i;
403 for (k = 0; k < j; k++) {
404 w[i][k].real = cos (-M_PI * k / j);
405 w[i][k].imag = sin (-M_PI * k / j);
406 }
407 }
408 imdct_512 = imdct_do_512;
409 imdct_256 = imdct_do_256;
410 }
411 }