Mercurial > libavcodec.hg
comparison liba52/imdct.c @ 1072:68d0a38bd802 libavcodec
* sync with main liba52 sources
author | kabi |
---|---|
date | Tue, 18 Feb 2003 11:48:57 +0000 |
parents | dd4f4c3d7171 |
children | e101d1cffec6 |
comparison
equal
deleted
inserted
replaced
1071:0a48dd404167 | 1072:68d0a38bd802 |
---|---|
1 /* | 1 /* |
2 * imdct.c | 2 * imdct.c |
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> | 3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> |
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> | 4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
5 * | 5 * |
6 * The ifft algorithms in this file have been largely inspired by Dan | 6 * The ifft algorithms in this file have been largely inspired by Dan |
7 * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html | 7 * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html |
8 * | 8 * |
22 * You should have received a copy of the GNU General Public License | 22 * You should have received a copy of the GNU General Public License |
23 * along with this program; if not, write to the Free Software | 23 * along with this program; if not, write to the Free Software |
24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 24 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
25 */ | 25 */ |
26 | 26 |
27 #include "config.h" | |
28 | |
29 #include <math.h> | |
30 #include <stdio.h> | |
31 #ifdef LIBA52_DJBFFT | |
32 #include <fftc4.h> | |
33 #endif | |
34 #ifndef M_PI | |
35 #define M_PI 3.1415926535897932384626433832795029 | |
36 #endif | |
37 #include <inttypes.h> | |
38 | |
27 #include "a52.h" | 39 #include "a52.h" |
28 #include "a52_internal.h" | 40 #include "a52_internal.h" |
29 #include "mm_accel.h" | 41 #include "mm_accel.h" |
30 | 42 |
31 typedef struct complex_s { | 43 typedef struct complex_s { |
32 sample_t real; | 44 sample_t real; |
33 sample_t imag; | 45 sample_t imag; |
34 } complex_t; | 46 } complex_t; |
35 | |
36 static complex_t buf[128]; | |
37 | 47 |
38 static uint8_t fftorder[] = { | 48 static uint8_t fftorder[] = { |
39 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, | 49 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, |
40 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, | 50 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, |
41 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180, | 51 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180, |
63 static void (* ifft128) (complex_t * buf); | 73 static void (* ifft128) (complex_t * buf); |
64 static void (* ifft64) (complex_t * buf); | 74 static void (* ifft64) (complex_t * buf); |
65 | 75 |
66 static inline void ifft2 (complex_t * buf) | 76 static inline void ifft2 (complex_t * buf) |
67 { | 77 { |
68 double r, i; | 78 sample_t r, i; |
69 | 79 |
70 r = buf[0].real; | 80 r = buf[0].real; |
71 i = buf[0].imag; | 81 i = buf[0].imag; |
72 buf[0].real += buf[1].real; | 82 buf[0].real += buf[1].real; |
73 buf[0].imag += buf[1].imag; | 83 buf[0].imag += buf[1].imag; |
75 buf[1].imag = i - buf[1].imag; | 85 buf[1].imag = i - buf[1].imag; |
76 } | 86 } |
77 | 87 |
78 static inline void ifft4 (complex_t * buf) | 88 static inline void ifft4 (complex_t * buf) |
79 { | 89 { |
80 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; | 90 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
81 | 91 |
82 tmp1 = buf[0].real + buf[1].real; | 92 tmp1 = buf[0].real + buf[1].real; |
83 tmp2 = buf[3].real + buf[2].real; | 93 tmp2 = buf[3].real + buf[2].real; |
84 tmp3 = buf[0].imag + buf[1].imag; | 94 tmp3 = buf[0].imag + buf[1].imag; |
85 tmp4 = buf[2].imag + buf[3].imag; | 95 tmp4 = buf[2].imag + buf[3].imag; |
96 buf[1].imag = tmp6 + tmp8; | 106 buf[1].imag = tmp6 + tmp8; |
97 buf[3].real = tmp5 - tmp7; | 107 buf[3].real = tmp5 - tmp7; |
98 buf[3].imag = tmp6 - tmp8; | 108 buf[3].imag = tmp6 - tmp8; |
99 } | 109 } |
100 | 110 |
111 /* basic radix-2 ifft butterfly */ | |
112 | |
113 #define BUTTERFLY_0(t0,t1,W0,W1,d0,d1) do { \ | |
114 t0 = MUL (W1, d1) + MUL (W0, d0); \ | |
115 t1 = MUL (W0, d1) - MUL (W1, d0); \ | |
116 } while (0) | |
117 | |
118 /* radix-2 ifft butterfly with bias */ | |
119 | |
120 #define BUTTERFLY_B(t0,t1,W0,W1,d0,d1) do { \ | |
121 t0 = BIAS (MUL (d1, W1) + MUL (d0, W0)); \ | |
122 t1 = BIAS (MUL (d1, W0) - MUL (d0, W1)); \ | |
123 } while (0) | |
124 | |
101 /* the basic split-radix ifft butterfly */ | 125 /* the basic split-radix ifft butterfly */ |
102 | 126 |
103 #define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \ | 127 #define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \ |
104 tmp5 = a2.real * wr + a2.imag * wi; \ | 128 BUTTERFLY_0 (tmp5, tmp6, wr, wi, a2.real, a2.imag); \ |
105 tmp6 = a2.imag * wr - a2.real * wi; \ | 129 BUTTERFLY_0 (tmp8, tmp7, wr, wi, a3.imag, a3.real); \ |
106 tmp7 = a3.real * wr - a3.imag * wi; \ | 130 tmp1 = tmp5 + tmp7; \ |
107 tmp8 = a3.imag * wr + a3.real * wi; \ | 131 tmp2 = tmp6 + tmp8; \ |
108 tmp1 = tmp5 + tmp7; \ | 132 tmp3 = tmp6 - tmp8; \ |
109 tmp2 = tmp6 + tmp8; \ | 133 tmp4 = tmp7 - tmp5; \ |
110 tmp3 = tmp6 - tmp8; \ | 134 a2.real = a0.real - tmp1; \ |
111 tmp4 = tmp7 - tmp5; \ | 135 a2.imag = a0.imag - tmp2; \ |
112 a2.real = a0.real - tmp1; \ | 136 a3.real = a1.real - tmp3; \ |
113 a2.imag = a0.imag - tmp2; \ | 137 a3.imag = a1.imag - tmp4; \ |
114 a3.real = a1.real - tmp3; \ | 138 a0.real += tmp1; \ |
115 a3.imag = a1.imag - tmp4; \ | 139 a0.imag += tmp2; \ |
116 a0.real += tmp1; \ | 140 a1.real += tmp3; \ |
117 a0.imag += tmp2; \ | 141 a1.imag += tmp4; \ |
118 a1.real += tmp3; \ | |
119 a1.imag += tmp4; \ | |
120 } while (0) | 142 } while (0) |
121 | 143 |
122 /* split-radix ifft butterfly, specialized for wr=1 wi=0 */ | 144 /* split-radix ifft butterfly, specialized for wr=1 wi=0 */ |
123 | 145 |
124 #define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \ | 146 #define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \ |
137 } while (0) | 159 } while (0) |
138 | 160 |
139 /* split-radix ifft butterfly, specialized for wr=wi */ | 161 /* split-radix ifft butterfly, specialized for wr=wi */ |
140 | 162 |
141 #define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \ | 163 #define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \ |
142 tmp5 = (a2.real + a2.imag) * w; \ | 164 tmp5 = MUL (a2.real + a2.imag, w); \ |
143 tmp6 = (a2.imag - a2.real) * w; \ | 165 tmp6 = MUL (a2.imag - a2.real, w); \ |
144 tmp7 = (a3.real - a3.imag) * w; \ | 166 tmp7 = MUL (a3.real - a3.imag, w); \ |
145 tmp8 = (a3.imag + a3.real) * w; \ | 167 tmp8 = MUL (a3.imag + a3.real, w); \ |
146 tmp1 = tmp5 + tmp7; \ | 168 tmp1 = tmp5 + tmp7; \ |
147 tmp2 = tmp6 + tmp8; \ | 169 tmp2 = tmp6 + tmp8; \ |
148 tmp3 = tmp6 - tmp8; \ | 170 tmp3 = tmp6 - tmp8; \ |
149 tmp4 = tmp7 - tmp5; \ | 171 tmp4 = tmp7 - tmp5; \ |
150 a2.real = a0.real - tmp1; \ | 172 a2.real = a0.real - tmp1; \ |
157 a1.imag += tmp4; \ | 179 a1.imag += tmp4; \ |
158 } while (0) | 180 } while (0) |
159 | 181 |
160 static inline void ifft8 (complex_t * buf) | 182 static inline void ifft8 (complex_t * buf) |
161 { | 183 { |
162 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; | 184 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
163 | 185 |
164 ifft4 (buf); | 186 ifft4 (buf); |
165 ifft2 (buf + 4); | 187 ifft2 (buf + 4); |
166 ifft2 (buf + 6); | 188 ifft2 (buf + 6); |
167 BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]); | 189 BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]); |
171 static void ifft_pass (complex_t * buf, sample_t * weight, int n) | 193 static void ifft_pass (complex_t * buf, sample_t * weight, int n) |
172 { | 194 { |
173 complex_t * buf1; | 195 complex_t * buf1; |
174 complex_t * buf2; | 196 complex_t * buf2; |
175 complex_t * buf3; | 197 complex_t * buf3; |
176 double tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; | 198 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
177 int i; | 199 int i; |
178 | 200 |
179 buf++; | 201 buf++; |
180 buf1 = buf + n; | 202 buf1 = buf + n; |
181 buf2 = buf + 2 * n; | 203 buf2 = buf + 2 * n; |
184 BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]); | 206 BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]); |
185 | 207 |
186 i = n - 1; | 208 i = n - 1; |
187 | 209 |
188 do { | 210 do { |
189 BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], weight[n], weight[2*i]); | 211 BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], |
212 weight[0], weight[2*i-n]); | |
190 buf++; | 213 buf++; |
191 buf1++; | 214 buf1++; |
192 buf2++; | 215 buf2++; |
193 buf3++; | 216 buf3++; |
194 weight++; | 217 weight++; |
198 static void ifft16 (complex_t * buf) | 221 static void ifft16 (complex_t * buf) |
199 { | 222 { |
200 ifft8 (buf); | 223 ifft8 (buf); |
201 ifft4 (buf + 8); | 224 ifft4 (buf + 8); |
202 ifft4 (buf + 12); | 225 ifft4 (buf + 12); |
203 ifft_pass (buf, roots16 - 4, 4); | 226 ifft_pass (buf, roots16, 4); |
204 } | 227 } |
205 | 228 |
206 static void ifft32 (complex_t * buf) | 229 static void ifft32 (complex_t * buf) |
207 { | 230 { |
208 ifft16 (buf); | 231 ifft16 (buf); |
209 ifft8 (buf + 16); | 232 ifft8 (buf + 16); |
210 ifft8 (buf + 24); | 233 ifft8 (buf + 24); |
211 ifft_pass (buf, roots32 - 8, 8); | 234 ifft_pass (buf, roots32, 8); |
212 } | 235 } |
213 | 236 |
214 static void ifft64_c (complex_t * buf) | 237 static void ifft64_c (complex_t * buf) |
215 { | 238 { |
216 ifft32 (buf); | 239 ifft32 (buf); |
217 ifft16 (buf + 32); | 240 ifft16 (buf + 32); |
218 ifft16 (buf + 48); | 241 ifft16 (buf + 48); |
219 ifft_pass (buf, roots64 - 16, 16); | 242 ifft_pass (buf, roots64, 16); |
220 } | 243 } |
221 | 244 |
222 static void ifft128_c (complex_t * buf) | 245 static void ifft128_c (complex_t * buf) |
223 { | 246 { |
224 ifft32 (buf); | 247 ifft32 (buf); |
225 ifft16 (buf + 32); | 248 ifft16 (buf + 32); |
226 ifft16 (buf + 48); | 249 ifft16 (buf + 48); |
227 ifft_pass (buf, roots64 - 16, 16); | 250 ifft_pass (buf, roots64, 16); |
228 | 251 |
229 ifft32 (buf + 64); | 252 ifft32 (buf + 64); |
230 ifft32 (buf + 96); | 253 ifft32 (buf + 96); |
231 ifft_pass (buf, roots128 - 32, 32); | 254 ifft_pass (buf, roots128, 32); |
232 } | 255 } |
233 | 256 |
234 void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) | 257 void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) |
235 { | 258 { |
236 int i, k; | 259 int i, k; |
237 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; | 260 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; |
238 const sample_t * window = a52_imdct_window; | 261 const sample_t * window = a52_imdct_window; |
262 complex_t buf[128]; | |
239 | 263 |
240 for (i = 0; i < 128; i++) { | 264 for (i = 0; i < 128; i++) { |
241 k = fftorder[i]; | 265 k = fftorder[i]; |
242 t_r = pre1[i].real; | 266 t_r = pre1[i].real; |
243 t_i = pre1[i].imag; | 267 t_i = pre1[i].imag; |
244 | 268 BUTTERFLY_0 (buf[i].real, buf[i].imag, t_r, t_i, data[k], data[255-k]); |
245 buf[i].real = t_i * data[255-k] + t_r * data[k]; | |
246 buf[i].imag = t_r * data[255-k] - t_i * data[k]; | |
247 } | 269 } |
248 | 270 |
249 ifft128 (buf); | 271 ifft128 (buf); |
250 | 272 |
251 /* Post IFFT complex multiply plus IFFT complex conjugate*/ | 273 /* Post IFFT complex multiply plus IFFT complex conjugate*/ |
252 /* Window and convert to real valued signal */ | 274 /* Window and convert to real valued signal */ |
253 for (i = 0; i < 64; i++) { | 275 for (i = 0; i < 64; i++) { |
254 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ | 276 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ |
255 t_r = post1[i].real; | 277 t_r = post1[i].real; |
256 t_i = post1[i].imag; | 278 t_i = post1[i].imag; |
257 | 279 BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf[i].imag, buf[i].real); |
258 a_r = t_r * buf[i].real + t_i * buf[i].imag; | 280 BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf[127-i].imag, buf[127-i].real); |
259 a_i = t_i * buf[i].real - t_r * buf[i].imag; | |
260 b_r = t_i * buf[127-i].real + t_r * buf[127-i].imag; | |
261 b_i = t_r * buf[127-i].real - t_i * buf[127-i].imag; | |
262 | 281 |
263 w_1 = window[2*i]; | 282 w_1 = window[2*i]; |
264 w_2 = window[255-2*i]; | 283 w_2 = window[255-2*i]; |
265 data[2*i] = delay[2*i] * w_2 - a_r * w_1 + bias; | 284 BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]); |
266 data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias; | |
267 delay[2*i] = a_i; | 285 delay[2*i] = a_i; |
268 | 286 |
269 w_1 = window[2*i+1]; | 287 w_1 = window[2*i+1]; |
270 w_2 = window[254-2*i]; | 288 w_2 = window[254-2*i]; |
271 data[2*i+1] = delay[2*i+1] * w_2 + b_r * w_1 + bias; | 289 BUTTERFLY_B (data[2*i+1], data[254-2*i], w_1, w_2, b_r, delay[2*i+1]); |
272 data[254-2*i] = delay[2*i+1] * w_1 - b_r * w_2 + bias; | |
273 delay[2*i+1] = b_i; | 290 delay[2*i+1] = b_i; |
274 } | 291 } |
275 } | 292 } |
276 | 293 |
277 void a52_imdct_256(sample_t data[],sample_t delay[],sample_t bias) | 294 void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias) |
278 { | 295 { |
279 int i, k; | 296 int i, k; |
280 sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; | 297 sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; |
281 complex_t * buf1, * buf2; | |
282 const sample_t * window = a52_imdct_window; | 298 const sample_t * window = a52_imdct_window; |
283 | 299 complex_t buf1[64], buf2[64]; |
284 buf1 = &buf[0]; | |
285 buf2 = &buf[64]; | |
286 | 300 |
287 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ | 301 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ |
288 for (i = 0; i < 64; i++) { | 302 for (i = 0; i < 64; i++) { |
289 k = fftorder[i]; | 303 k = fftorder[i]; |
290 t_r = pre2[i].real; | 304 t_r = pre2[i].real; |
291 t_i = pre2[i].imag; | 305 t_i = pre2[i].imag; |
292 | 306 BUTTERFLY_0 (buf1[i].real, buf1[i].imag, t_r, t_i, data[k], data[254-k]); |
293 buf1[i].real = t_i * data[254-k] + t_r * data[k]; | 307 BUTTERFLY_0 (buf2[i].real, buf2[i].imag, t_r, t_i, data[k+1], data[255-k]); |
294 buf1[i].imag = t_r * data[254-k] - t_i * data[k]; | |
295 | |
296 buf2[i].real = t_i * data[255-k] + t_r * data[k+1]; | |
297 buf2[i].imag = t_r * data[255-k] - t_i * data[k+1]; | |
298 } | 308 } |
299 | 309 |
300 ifft64 (buf1); | 310 ifft64 (buf1); |
301 ifft64 (buf2); | 311 ifft64 (buf2); |
302 | 312 |
304 /* Window and convert to real valued signal */ | 314 /* Window and convert to real valued signal */ |
305 for (i = 0; i < 32; i++) { | 315 for (i = 0; i < 32; i++) { |
306 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ | 316 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ |
307 t_r = post2[i].real; | 317 t_r = post2[i].real; |
308 t_i = post2[i].imag; | 318 t_i = post2[i].imag; |
309 | 319 BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf1[i].imag, buf1[i].real); |
310 a_r = t_r * buf1[i].real + t_i * buf1[i].imag; | 320 BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf1[63-i].imag, buf1[63-i].real); |
311 a_i = t_i * buf1[i].real - t_r * buf1[i].imag; | 321 BUTTERFLY_0 (c_r, c_i, t_i, t_r, buf2[i].imag, buf2[i].real); |
312 b_r = t_i * buf1[63-i].real + t_r * buf1[63-i].imag; | 322 BUTTERFLY_0 (d_r, d_i, t_r, t_i, buf2[63-i].imag, buf2[63-i].real); |
313 b_i = t_r * buf1[63-i].real - t_i * buf1[63-i].imag; | |
314 | |
315 c_r = t_r * buf2[i].real + t_i * buf2[i].imag; | |
316 c_i = t_i * buf2[i].real - t_r * buf2[i].imag; | |
317 d_r = t_i * buf2[63-i].real + t_r * buf2[63-i].imag; | |
318 d_i = t_r * buf2[63-i].real - t_i * buf2[63-i].imag; | |
319 | 323 |
320 w_1 = window[2*i]; | 324 w_1 = window[2*i]; |
321 w_2 = window[255-2*i]; | 325 w_2 = window[255-2*i]; |
322 data[2*i] = delay[2*i] * w_2 - a_r * w_1 + bias; | 326 BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]); |
323 data[255-2*i] = delay[2*i] * w_1 + a_r * w_2 + bias; | |
324 delay[2*i] = c_i; | 327 delay[2*i] = c_i; |
325 | 328 |
326 w_1 = window[128+2*i]; | 329 w_1 = window[128+2*i]; |
327 w_2 = window[127-2*i]; | 330 w_2 = window[127-2*i]; |
328 data[128+2*i] = delay[127-2*i] * w_2 + a_i * w_1 + bias; | 331 BUTTERFLY_B (data[128+2*i], data[127-2*i], w_1, w_2, a_i, delay[127-2*i]); |
329 data[127-2*i] = delay[127-2*i] * w_1 - a_i * w_2 + bias; | |
330 delay[127-2*i] = c_r; | 332 delay[127-2*i] = c_r; |
331 | 333 |
332 w_1 = window[2*i+1]; | 334 w_1 = window[2*i+1]; |
333 w_2 = window[254-2*i]; | 335 w_2 = window[254-2*i]; |
334 data[2*i+1] = delay[2*i+1] * w_2 - b_i * w_1 + bias; | 336 BUTTERFLY_B (data[254-2*i], data[2*i+1], w_2, w_1, b_i, delay[2*i+1]); |
335 data[254-2*i] = delay[2*i+1] * w_1 + b_i * w_2 + bias; | |
336 delay[2*i+1] = d_r; | 337 delay[2*i+1] = d_r; |
337 | 338 |
338 w_1 = window[129+2*i]; | 339 w_1 = window[129+2*i]; |
339 w_2 = window[126-2*i]; | 340 w_2 = window[126-2*i]; |
340 data[129+2*i] = delay[126-2*i] * w_2 + b_r * w_1 + bias; | 341 BUTTERFLY_B (data[129+2*i], data[126-2*i], w_1, w_2, b_r, delay[126-2*i]); |
341 data[126-2*i] = delay[126-2*i] * w_1 - b_r * w_2 + bias; | |
342 delay[126-2*i] = d_i; | 342 delay[126-2*i] = d_i; |
343 } | 343 } |
344 } | 344 } |
345 | 345 |
346 static double besselI0 (double x) | 346 static double besselI0 (double x) |
356 | 356 |
357 void a52_imdct_init (uint32_t mm_accel) | 357 void a52_imdct_init (uint32_t mm_accel) |
358 { | 358 { |
359 int i, k; | 359 int i, k; |
360 double sum; | 360 double sum; |
361 double local_imdct_window[256]; | |
361 | 362 |
362 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ | 363 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ |
363 sum = 0; | 364 sum = 0; |
364 for (i = 0; i < 256; i++) { | 365 for (i = 0; i < 256; i++) { |
365 sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256)); | 366 sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256)); |
366 a52_imdct_window[i] = sum; | 367 local_imdct_window[i] = sum; |
367 } | 368 } |
368 sum++; | 369 sum++; |
369 for (i = 0; i < 256; i++) | 370 for (i = 0; i < 256; i++) |
370 a52_imdct_window[i] = sqrt (a52_imdct_window[i] / sum); | 371 a52_imdct_window[i] = SAMPLE (sqrt (local_imdct_window[i] / sum)); |
371 | 372 |
372 for (i = 0; i < 3; i++) | 373 for (i = 0; i < 3; i++) |
373 roots16[i] = cos ((M_PI / 8) * (i + 1)); | 374 roots16[i] = SAMPLE (cos ((M_PI / 8) * (i + 1))); |
374 | 375 |
375 for (i = 0; i < 7; i++) | 376 for (i = 0; i < 7; i++) |
376 roots32[i] = cos ((M_PI / 16) * (i + 1)); | 377 roots32[i] = SAMPLE (cos ((M_PI / 16) * (i + 1))); |
377 | 378 |
378 for (i = 0; i < 15; i++) | 379 for (i = 0; i < 15; i++) |
379 roots64[i] = cos ((M_PI / 32) * (i + 1)); | 380 roots64[i] = SAMPLE (cos ((M_PI / 32) * (i + 1))); |
380 | 381 |
381 for (i = 0; i < 31; i++) | 382 for (i = 0; i < 31; i++) |
382 roots128[i] = cos ((M_PI / 64) * (i + 1)); | 383 roots128[i] = SAMPLE (cos ((M_PI / 64) * (i + 1))); |
383 | 384 |
384 for (i = 0; i < 64; i++) { | 385 for (i = 0; i < 64; i++) { |
385 k = fftorder[i] / 2 + 64; | 386 k = fftorder[i] / 2 + 64; |
386 pre1[i].real = cos ((M_PI / 256) * (k - 0.25)); | 387 pre1[i].real = SAMPLE (cos ((M_PI / 256) * (k - 0.25))); |
387 pre1[i].imag = sin ((M_PI / 256) * (k - 0.25)); | 388 pre1[i].imag = SAMPLE (sin ((M_PI / 256) * (k - 0.25))); |
388 } | 389 } |
389 | 390 |
390 for (i = 64; i < 128; i++) { | 391 for (i = 64; i < 128; i++) { |
391 k = fftorder[i] / 2 + 64; | 392 k = fftorder[i] / 2 + 64; |
392 pre1[i].real = -cos ((M_PI / 256) * (k - 0.25)); | 393 pre1[i].real = SAMPLE (-cos ((M_PI / 256) * (k - 0.25))); |
393 pre1[i].imag = -sin ((M_PI / 256) * (k - 0.25)); | 394 pre1[i].imag = SAMPLE (-sin ((M_PI / 256) * (k - 0.25))); |
394 } | 395 } |
395 | 396 |
396 for (i = 0; i < 64; i++) { | 397 for (i = 0; i < 64; i++) { |
397 post1[i].real = cos ((M_PI / 256) * (i + 0.5)); | 398 post1[i].real = SAMPLE (cos ((M_PI / 256) * (i + 0.5))); |
398 post1[i].imag = sin ((M_PI / 256) * (i + 0.5)); | 399 post1[i].imag = SAMPLE (sin ((M_PI / 256) * (i + 0.5))); |
399 } | 400 } |
400 | 401 |
401 for (i = 0; i < 64; i++) { | 402 for (i = 0; i < 64; i++) { |
402 k = fftorder[i] / 4; | 403 k = fftorder[i] / 4; |
403 pre2[i].real = cos ((M_PI / 128) * (k - 0.25)); | 404 pre2[i].real = SAMPLE (cos ((M_PI / 128) * (k - 0.25))); |
404 pre2[i].imag = sin ((M_PI / 128) * (k - 0.25)); | 405 pre2[i].imag = SAMPLE (sin ((M_PI / 128) * (k - 0.25))); |
405 } | 406 } |
406 | 407 |
407 for (i = 0; i < 32; i++) { | 408 for (i = 0; i < 32; i++) { |
408 post2[i].real = cos ((M_PI / 128) * (i + 0.5)); | 409 post2[i].real = SAMPLE (cos ((M_PI / 128) * (i + 0.5))); |
409 post2[i].imag = sin ((M_PI / 128) * (i + 0.5)); | 410 post2[i].imag = SAMPLE (sin ((M_PI / 128) * (i + 0.5))); |
410 } | 411 } |
411 | 412 |
412 #ifdef LIBA52_DJBFFT | 413 #ifdef LIBA52_DJBFFT |
413 if (mm_accel & MM_ACCEL_DJBFFT) { | 414 if (mm_accel & MM_ACCEL_DJBFFT) { |
414 fprintf (stderr, "Using djbfft for IMDCT transform\n"); | |
415 ifft128 = (void (*) (complex_t *)) fftc4_un128; | 415 ifft128 = (void (*) (complex_t *)) fftc4_un128; |
416 ifft64 = (void (*) (complex_t *)) fftc4_un64; | 416 ifft64 = (void (*) (complex_t *)) fftc4_un64; |
417 } else | 417 } else |
418 #endif | 418 #endif |
419 { | 419 { |
420 fprintf (stderr, "No accelerated IMDCT transform found\n"); | |
421 ifft128 = ifft128_c; | 420 ifft128 = ifft128_c; |
422 ifft64 = ifft64_c; | 421 ifft64 = ifft64_c; |
423 } | 422 } |
424 } | 423 } |