Mercurial > libavcodec.hg
annotate liba52/imdct.c @ 3656:1cc5bdadd487 libavcodec
New qpel MC functions conforming to VC-1 standard.
Existing DSPUtil functions cause chroma artifacts on some files.
author | kostya |
---|---|
date | Thu, 31 Aug 2006 04:44:54 +0000 |
parents | 0b546eab515d |
children |
rev | line source |
---|---|
332 | 1 /* |
2 * imdct.c | |
1072 | 3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> |
332 | 4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
5 * | |
6 * The ifft algorithms in this file have been largely inspired by Dan | |
7 * Bernstein's work, djbfft, available at http://cr.yp.to/djbfft.html | |
8 * | |
9 * This file is part of a52dec, a free ATSC A-52 stream decoder. | |
10 * See http://liba52.sourceforge.net/ for updates. | |
11 * | |
12 * a52dec is free software; you can redistribute it and/or modify | |
13 * it under the terms of the GNU General Public License as published by | |
14 * the Free Software Foundation; either version 2 of the License, or | |
15 * (at your option) any later version. | |
16 * | |
17 * a52dec is distributed in the hope that it will be useful, | |
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
20 * GNU General Public License for more details. | |
21 * | |
22 * You should have received a copy of the GNU General Public License | |
23 * along with this program; if not, write to the Free Software | |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
332 | 25 */ |
26 | |
27 #include "a52.h" | |
28 #include "a52_internal.h" | |
29 #include "mm_accel.h" | |
30 | |
31 typedef struct complex_s { | |
32 sample_t real; | |
33 sample_t imag; | |
34 } complex_t; | |
35 | |
36 static uint8_t fftorder[] = { | |
37 0,128, 64,192, 32,160,224, 96, 16,144, 80,208,240,112, 48,176, | |
38 8,136, 72,200, 40,168,232,104,248,120, 56,184, 24,152,216, 88, | |
39 4,132, 68,196, 36,164,228,100, 20,148, 84,212,244,116, 52,180, | |
40 252,124, 60,188, 28,156,220, 92, 12,140, 76,204,236,108, 44,172, | |
41 2,130, 66,194, 34,162,226, 98, 18,146, 82,210,242,114, 50,178, | |
42 10,138, 74,202, 42,170,234,106,250,122, 58,186, 26,154,218, 90, | |
43 254,126, 62,190, 30,158,222, 94, 14,142, 78,206,238,110, 46,174, | |
44 6,134, 70,198, 38,166,230,102,246,118, 54,182, 22,150,214, 86 | |
45 }; | |
46 | |
47 /* Root values for IFFT */ | |
48 static sample_t roots16[3]; | |
49 static sample_t roots32[7]; | |
50 static sample_t roots64[15]; | |
51 static sample_t roots128[31]; | |
52 | |
53 /* Twiddle factors for IMDCT */ | |
54 static complex_t pre1[128]; | |
55 static complex_t post1[64]; | |
56 static complex_t pre2[64]; | |
57 static complex_t post2[32]; | |
58 | |
59 static sample_t a52_imdct_window[256]; | |
60 | |
61 static void (* ifft128) (complex_t * buf); | |
62 static void (* ifft64) (complex_t * buf); | |
63 | |
64 static inline void ifft2 (complex_t * buf) | |
65 { | |
1072 | 66 sample_t r, i; |
332 | 67 |
68 r = buf[0].real; | |
69 i = buf[0].imag; | |
70 buf[0].real += buf[1].real; | |
71 buf[0].imag += buf[1].imag; | |
72 buf[1].real = r - buf[1].real; | |
73 buf[1].imag = i - buf[1].imag; | |
74 } | |
75 | |
76 static inline void ifft4 (complex_t * buf) | |
77 { | |
1072 | 78 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
332 | 79 |
80 tmp1 = buf[0].real + buf[1].real; | |
81 tmp2 = buf[3].real + buf[2].real; | |
82 tmp3 = buf[0].imag + buf[1].imag; | |
83 tmp4 = buf[2].imag + buf[3].imag; | |
84 tmp5 = buf[0].real - buf[1].real; | |
85 tmp6 = buf[0].imag - buf[1].imag; | |
86 tmp7 = buf[2].imag - buf[3].imag; | |
87 tmp8 = buf[3].real - buf[2].real; | |
88 | |
89 buf[0].real = tmp1 + tmp2; | |
90 buf[0].imag = tmp3 + tmp4; | |
91 buf[2].real = tmp1 - tmp2; | |
92 buf[2].imag = tmp3 - tmp4; | |
93 buf[1].real = tmp5 + tmp7; | |
94 buf[1].imag = tmp6 + tmp8; | |
95 buf[3].real = tmp5 - tmp7; | |
96 buf[3].imag = tmp6 - tmp8; | |
97 } | |
98 | |
1072 | 99 /* basic radix-2 ifft butterfly */ |
100 | |
101 #define BUTTERFLY_0(t0,t1,W0,W1,d0,d1) do { \ | |
102 t0 = MUL (W1, d1) + MUL (W0, d0); \ | |
103 t1 = MUL (W0, d1) - MUL (W1, d0); \ | |
104 } while (0) | |
105 | |
106 /* radix-2 ifft butterfly with bias */ | |
107 | |
108 #define BUTTERFLY_B(t0,t1,W0,W1,d0,d1) do { \ | |
109 t0 = BIAS (MUL (d1, W1) + MUL (d0, W0)); \ | |
110 t1 = BIAS (MUL (d1, W0) - MUL (d0, W1)); \ | |
111 } while (0) | |
112 | |
332 | 113 /* the basic split-radix ifft butterfly */ |
114 | |
1072 | 115 #define BUTTERFLY(a0,a1,a2,a3,wr,wi) do { \ |
116 BUTTERFLY_0 (tmp5, tmp6, wr, wi, a2.real, a2.imag); \ | |
117 BUTTERFLY_0 (tmp8, tmp7, wr, wi, a3.imag, a3.real); \ | |
118 tmp1 = tmp5 + tmp7; \ | |
119 tmp2 = tmp6 + tmp8; \ | |
120 tmp3 = tmp6 - tmp8; \ | |
121 tmp4 = tmp7 - tmp5; \ | |
122 a2.real = a0.real - tmp1; \ | |
123 a2.imag = a0.imag - tmp2; \ | |
124 a3.real = a1.real - tmp3; \ | |
125 a3.imag = a1.imag - tmp4; \ | |
126 a0.real += tmp1; \ | |
127 a0.imag += tmp2; \ | |
128 a1.real += tmp3; \ | |
129 a1.imag += tmp4; \ | |
332 | 130 } while (0) |
131 | |
132 /* split-radix ifft butterfly, specialized for wr=1 wi=0 */ | |
133 | |
134 #define BUTTERFLY_ZERO(a0,a1,a2,a3) do { \ | |
135 tmp1 = a2.real + a3.real; \ | |
136 tmp2 = a2.imag + a3.imag; \ | |
137 tmp3 = a2.imag - a3.imag; \ | |
138 tmp4 = a3.real - a2.real; \ | |
139 a2.real = a0.real - tmp1; \ | |
140 a2.imag = a0.imag - tmp2; \ | |
141 a3.real = a1.real - tmp3; \ | |
142 a3.imag = a1.imag - tmp4; \ | |
143 a0.real += tmp1; \ | |
144 a0.imag += tmp2; \ | |
145 a1.real += tmp3; \ | |
146 a1.imag += tmp4; \ | |
147 } while (0) | |
148 | |
149 /* split-radix ifft butterfly, specialized for wr=wi */ | |
150 | |
151 #define BUTTERFLY_HALF(a0,a1,a2,a3,w) do { \ | |
1072 | 152 tmp5 = MUL (a2.real + a2.imag, w); \ |
153 tmp6 = MUL (a2.imag - a2.real, w); \ | |
154 tmp7 = MUL (a3.real - a3.imag, w); \ | |
155 tmp8 = MUL (a3.imag + a3.real, w); \ | |
332 | 156 tmp1 = tmp5 + tmp7; \ |
157 tmp2 = tmp6 + tmp8; \ | |
158 tmp3 = tmp6 - tmp8; \ | |
159 tmp4 = tmp7 - tmp5; \ | |
160 a2.real = a0.real - tmp1; \ | |
161 a2.imag = a0.imag - tmp2; \ | |
162 a3.real = a1.real - tmp3; \ | |
163 a3.imag = a1.imag - tmp4; \ | |
164 a0.real += tmp1; \ | |
165 a0.imag += tmp2; \ | |
166 a1.real += tmp3; \ | |
167 a1.imag += tmp4; \ | |
168 } while (0) | |
169 | |
170 static inline void ifft8 (complex_t * buf) | |
171 { | |
1072 | 172 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
332 | 173 |
174 ifft4 (buf); | |
175 ifft2 (buf + 4); | |
176 ifft2 (buf + 6); | |
177 BUTTERFLY_ZERO (buf[0], buf[2], buf[4], buf[6]); | |
178 BUTTERFLY_HALF (buf[1], buf[3], buf[5], buf[7], roots16[1]); | |
179 } | |
180 | |
181 static void ifft_pass (complex_t * buf, sample_t * weight, int n) | |
182 { | |
183 complex_t * buf1; | |
184 complex_t * buf2; | |
185 complex_t * buf3; | |
1072 | 186 sample_t tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7, tmp8; |
332 | 187 int i; |
188 | |
189 buf++; | |
190 buf1 = buf + n; | |
191 buf2 = buf + 2 * n; | |
192 buf3 = buf + 3 * n; | |
193 | |
194 BUTTERFLY_ZERO (buf[-1], buf1[-1], buf2[-1], buf3[-1]); | |
195 | |
196 i = n - 1; | |
197 | |
198 do { | |
1072 | 199 BUTTERFLY (buf[0], buf1[0], buf2[0], buf3[0], |
200 weight[0], weight[2*i-n]); | |
332 | 201 buf++; |
202 buf1++; | |
203 buf2++; | |
204 buf3++; | |
205 weight++; | |
206 } while (--i); | |
207 } | |
208 | |
209 static void ifft16 (complex_t * buf) | |
210 { | |
211 ifft8 (buf); | |
212 ifft4 (buf + 8); | |
213 ifft4 (buf + 12); | |
1072 | 214 ifft_pass (buf, roots16, 4); |
332 | 215 } |
216 | |
217 static void ifft32 (complex_t * buf) | |
218 { | |
219 ifft16 (buf); | |
220 ifft8 (buf + 16); | |
221 ifft8 (buf + 24); | |
1072 | 222 ifft_pass (buf, roots32, 8); |
332 | 223 } |
224 | |
225 static void ifft64_c (complex_t * buf) | |
226 { | |
227 ifft32 (buf); | |
228 ifft16 (buf + 32); | |
229 ifft16 (buf + 48); | |
1072 | 230 ifft_pass (buf, roots64, 16); |
332 | 231 } |
232 | |
233 static void ifft128_c (complex_t * buf) | |
234 { | |
235 ifft32 (buf); | |
236 ifft16 (buf + 32); | |
237 ifft16 (buf + 48); | |
1072 | 238 ifft_pass (buf, roots64, 16); |
332 | 239 |
240 ifft32 (buf + 64); | |
241 ifft32 (buf + 96); | |
1072 | 242 ifft_pass (buf, roots128, 32); |
332 | 243 } |
244 | |
245 void a52_imdct_512 (sample_t * data, sample_t * delay, sample_t bias) | |
246 { | |
247 int i, k; | |
248 sample_t t_r, t_i, a_r, a_i, b_r, b_i, w_1, w_2; | |
249 const sample_t * window = a52_imdct_window; | |
1072 | 250 complex_t buf[128]; |
2967 | 251 |
332 | 252 for (i = 0; i < 128; i++) { |
253 k = fftorder[i]; | |
254 t_r = pre1[i].real; | |
255 t_i = pre1[i].imag; | |
1072 | 256 BUTTERFLY_0 (buf[i].real, buf[i].imag, t_r, t_i, data[k], data[255-k]); |
332 | 257 } |
258 | |
259 ifft128 (buf); | |
260 | |
261 /* Post IFFT complex multiply plus IFFT complex conjugate*/ | |
262 /* Window and convert to real valued signal */ | |
263 for (i = 0; i < 64; i++) { | |
264 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */ | |
265 t_r = post1[i].real; | |
266 t_i = post1[i].imag; | |
1072 | 267 BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf[i].imag, buf[i].real); |
268 BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf[127-i].imag, buf[127-i].real); | |
332 | 269 |
270 w_1 = window[2*i]; | |
271 w_2 = window[255-2*i]; | |
1072 | 272 BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]); |
332 | 273 delay[2*i] = a_i; |
274 | |
275 w_1 = window[2*i+1]; | |
276 w_2 = window[254-2*i]; | |
1072 | 277 BUTTERFLY_B (data[2*i+1], data[254-2*i], w_1, w_2, b_r, delay[2*i+1]); |
332 | 278 delay[2*i+1] = b_i; |
279 } | |
280 } | |
281 | |
1072 | 282 void a52_imdct_256 (sample_t * data, sample_t * delay, sample_t bias) |
332 | 283 { |
284 int i, k; | |
285 sample_t t_r, t_i, a_r, a_i, b_r, b_i, c_r, c_i, d_r, d_i, w_1, w_2; | |
286 const sample_t * window = a52_imdct_window; | |
1072 | 287 complex_t buf1[64], buf2[64]; |
332 | 288 |
289 /* Pre IFFT complex multiply plus IFFT cmplx conjugate */ | |
290 for (i = 0; i < 64; i++) { | |
291 k = fftorder[i]; | |
292 t_r = pre2[i].real; | |
293 t_i = pre2[i].imag; | |
1072 | 294 BUTTERFLY_0 (buf1[i].real, buf1[i].imag, t_r, t_i, data[k], data[254-k]); |
295 BUTTERFLY_0 (buf2[i].real, buf2[i].imag, t_r, t_i, data[k+1], data[255-k]); | |
332 | 296 } |
297 | |
298 ifft64 (buf1); | |
299 ifft64 (buf2); | |
300 | |
301 /* Post IFFT complex multiply */ | |
302 /* Window and convert to real valued signal */ | |
303 for (i = 0; i < 32; i++) { | |
2967 | 304 /* y1[n] = z1[n] * (xcos2[n] + j * xs in2[n]) ; */ |
332 | 305 t_r = post2[i].real; |
306 t_i = post2[i].imag; | |
1072 | 307 BUTTERFLY_0 (a_r, a_i, t_i, t_r, buf1[i].imag, buf1[i].real); |
308 BUTTERFLY_0 (b_r, b_i, t_r, t_i, buf1[63-i].imag, buf1[63-i].real); | |
309 BUTTERFLY_0 (c_r, c_i, t_i, t_r, buf2[i].imag, buf2[i].real); | |
310 BUTTERFLY_0 (d_r, d_i, t_r, t_i, buf2[63-i].imag, buf2[63-i].real); | |
332 | 311 |
312 w_1 = window[2*i]; | |
313 w_2 = window[255-2*i]; | |
1072 | 314 BUTTERFLY_B (data[255-2*i], data[2*i], w_2, w_1, a_r, delay[2*i]); |
332 | 315 delay[2*i] = c_i; |
316 | |
317 w_1 = window[128+2*i]; | |
318 w_2 = window[127-2*i]; | |
1072 | 319 BUTTERFLY_B (data[128+2*i], data[127-2*i], w_1, w_2, a_i, delay[127-2*i]); |
332 | 320 delay[127-2*i] = c_r; |
321 | |
322 w_1 = window[2*i+1]; | |
323 w_2 = window[254-2*i]; | |
1072 | 324 BUTTERFLY_B (data[254-2*i], data[2*i+1], w_2, w_1, b_i, delay[2*i+1]); |
332 | 325 delay[2*i+1] = d_r; |
326 | |
327 w_1 = window[129+2*i]; | |
328 w_2 = window[126-2*i]; | |
1072 | 329 BUTTERFLY_B (data[129+2*i], data[126-2*i], w_1, w_2, b_r, delay[126-2*i]); |
332 | 330 delay[126-2*i] = d_i; |
331 } | |
332 } | |
333 | |
334 static double besselI0 (double x) | |
335 { | |
336 double bessel = 1; | |
337 int i = 100; | |
338 | |
339 do | |
340 bessel = bessel * x / (i * i) + 1; | |
341 while (--i); | |
342 return bessel; | |
343 } | |
344 | |
345 void a52_imdct_init (uint32_t mm_accel) | |
346 { | |
347 int i, k; | |
348 double sum; | |
1072 | 349 double local_imdct_window[256]; |
332 | 350 |
351 /* compute imdct window - kaiser-bessel derived window, alpha = 5.0 */ | |
352 sum = 0; | |
353 for (i = 0; i < 256; i++) { | |
354 sum += besselI0 (i * (256 - i) * (5 * M_PI / 256) * (5 * M_PI / 256)); | |
1072 | 355 local_imdct_window[i] = sum; |
332 | 356 } |
357 sum++; | |
358 for (i = 0; i < 256; i++) | |
1072 | 359 a52_imdct_window[i] = SAMPLE (sqrt (local_imdct_window[i] / sum)); |
332 | 360 |
361 for (i = 0; i < 3; i++) | |
1072 | 362 roots16[i] = SAMPLE (cos ((M_PI / 8) * (i + 1))); |
332 | 363 |
364 for (i = 0; i < 7; i++) | |
1072 | 365 roots32[i] = SAMPLE (cos ((M_PI / 16) * (i + 1))); |
332 | 366 |
367 for (i = 0; i < 15; i++) | |
1072 | 368 roots64[i] = SAMPLE (cos ((M_PI / 32) * (i + 1))); |
332 | 369 |
370 for (i = 0; i < 31; i++) | |
1072 | 371 roots128[i] = SAMPLE (cos ((M_PI / 64) * (i + 1))); |
332 | 372 |
373 for (i = 0; i < 64; i++) { | |
374 k = fftorder[i] / 2 + 64; | |
1072 | 375 pre1[i].real = SAMPLE (cos ((M_PI / 256) * (k - 0.25))); |
376 pre1[i].imag = SAMPLE (sin ((M_PI / 256) * (k - 0.25))); | |
332 | 377 } |
378 | |
379 for (i = 64; i < 128; i++) { | |
380 k = fftorder[i] / 2 + 64; | |
1072 | 381 pre1[i].real = SAMPLE (-cos ((M_PI / 256) * (k - 0.25))); |
382 pre1[i].imag = SAMPLE (-sin ((M_PI / 256) * (k - 0.25))); | |
332 | 383 } |
384 | |
385 for (i = 0; i < 64; i++) { | |
1072 | 386 post1[i].real = SAMPLE (cos ((M_PI / 256) * (i + 0.5))); |
387 post1[i].imag = SAMPLE (sin ((M_PI / 256) * (i + 0.5))); | |
332 | 388 } |
389 | |
390 for (i = 0; i < 64; i++) { | |
391 k = fftorder[i] / 4; | |
1072 | 392 pre2[i].real = SAMPLE (cos ((M_PI / 128) * (k - 0.25))); |
393 pre2[i].imag = SAMPLE (sin ((M_PI / 128) * (k - 0.25))); | |
332 | 394 } |
395 | |
396 for (i = 0; i < 32; i++) { | |
1072 | 397 post2[i].real = SAMPLE (cos ((M_PI / 128) * (i + 0.5))); |
398 post2[i].imag = SAMPLE (sin ((M_PI / 128) * (i + 0.5))); | |
332 | 399 } |
400 | |
401 #ifdef LIBA52_DJBFFT | |
402 if (mm_accel & MM_ACCEL_DJBFFT) { | |
403 ifft128 = (void (*) (complex_t *)) fftc4_un128; | |
404 ifft64 = (void (*) (complex_t *)) fftc4_un64; | |
405 } else | |
406 #endif | |
407 { | |
408 ifft128 = ifft128_c; | |
409 ifft64 = ifft64_c; | |
410 } | |
411 } |