Mercurial > libavcodec.hg
annotate mdct.c @ 12266:48d6738904a9 libavcodec
Fix SPLATB_REG mess. Used to be a if/elseif/elseif/elseif spaghetti, so this
splits it into small optimization-specific macros which are selected for each
DSP function. The advantage of this approach is that the sse4 functions now
use the ssse3 codepath also without needing an explicit sse4 codepath.
author | rbultje |
---|---|
date | Sat, 24 Jul 2010 19:33:05 +0000 |
parents | 052b9c58ccc4 |
children |
rev | line source |
---|---|
781 | 1 /* |
2 * MDCT/IMDCT transforms | |
8629
04423b2f6e0b
cosmetics: Remove pointless period after copyright statement non-sentences.
diego
parents:
8567
diff
changeset
|
3 * Copyright (c) 2002 Fabrice Bellard |
781 | 4 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
781 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
781 | 11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
781 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
781 | 20 */ |
11370 | 21 |
11444
6f1697664bf2
Replace many includes of libavutil/common.h with what is actually needed
mru
parents:
11370
diff
changeset
|
22 #include <stdlib.h> |
6f1697664bf2
Replace many includes of libavutil/common.h with what is actually needed
mru
parents:
11370
diff
changeset
|
23 #include <string.h> |
6f1697664bf2
Replace many includes of libavutil/common.h with what is actually needed
mru
parents:
11370
diff
changeset
|
24 #include "libavutil/common.h" |
11370 | 25 #include "libavutil/mathematics.h" |
26 #include "fft.h" | |
781 | 27 |
1106 | 28 /** |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11444
diff
changeset
|
29 * @file |
1106 | 30 * MDCT/IMDCT transforms. |
31 */ | |
32 | |
6139
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
33 // Generate a Kaiser-Bessel Derived Window. |
6142
a35b838ab955
Add variable alpha and size of half window for Kaiser-Bessel Derived window
superdump
parents:
6139
diff
changeset
|
34 #define BESSEL_I0_ITER 50 // default: 50 iterations of Bessel I0 approximation |
8737
eeca2fc122f8
Add av_cold attributes to *_init and *_end functions.
alexc
parents:
8718
diff
changeset
|
35 av_cold void ff_kbd_window_init(float *window, float alpha, int n) |
6139
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
36 { |
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
37 int i, j; |
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
38 double sum = 0.0, bessel, tmp; |
11944
052b9c58ccc4
Remove VLA in ff_kbd_window_init, limit window size to 1024
mru
parents:
11644
diff
changeset
|
39 double local_window[FF_KBD_WINDOW_MAX]; |
6142
a35b838ab955
Add variable alpha and size of half window for Kaiser-Bessel Derived window
superdump
parents:
6139
diff
changeset
|
40 double alpha2 = (alpha * M_PI / n) * (alpha * M_PI / n); |
6139
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
41 |
11944
052b9c58ccc4
Remove VLA in ff_kbd_window_init, limit window size to 1024
mru
parents:
11644
diff
changeset
|
42 assert(n <= FF_KBD_WINDOW_MAX); |
052b9c58ccc4
Remove VLA in ff_kbd_window_init, limit window size to 1024
mru
parents:
11644
diff
changeset
|
43 |
6142
a35b838ab955
Add variable alpha and size of half window for Kaiser-Bessel Derived window
superdump
parents:
6139
diff
changeset
|
44 for (i = 0; i < n; i++) { |
a35b838ab955
Add variable alpha and size of half window for Kaiser-Bessel Derived window
superdump
parents:
6139
diff
changeset
|
45 tmp = i * (n - i) * alpha2; |
6139
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
46 bessel = 1.0; |
6142
a35b838ab955
Add variable alpha and size of half window for Kaiser-Bessel Derived window
superdump
parents:
6139
diff
changeset
|
47 for (j = BESSEL_I0_ITER; j > 0; j--) |
6139
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
48 bessel = bessel * tmp / (j * j) + 1; |
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
49 sum += bessel; |
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
50 local_window[i] = sum; |
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
51 } |
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
52 |
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
53 sum++; |
6142
a35b838ab955
Add variable alpha and size of half window for Kaiser-Bessel Derived window
superdump
parents:
6139
diff
changeset
|
54 for (i = 0; i < n; i++) |
6139
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
55 window[i] = sqrt(local_window[i] / sum); |
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
56 } |
5077d1562573
Make the Kaiser-Bessel window generator a common function
andoma
parents:
3947
diff
changeset
|
57 |
10827
3d011a01a6a0
Add support for hard-coded MDCT-related ff_sine_windows tables.
reimar
parents:
10204
diff
changeset
|
58 #include "mdct_tablegen.h" |
7094
b0820b8bd4dd
Add generic ff_sine_window_init function and implement in codecs appropriately
superdump
parents:
6498
diff
changeset
|
59 |
1106 | 60 /** |
61 * init MDCT or IMDCT computation. | |
781 | 62 */ |
10199 | 63 av_cold int ff_mdct_init(FFTContext *s, int nbits, int inverse, double scale) |
781 | 64 { |
65 int n, n4, i; | |
9658
67a20f0eb42c
Support for getting (i)MDCT output multiplied by a constant scaling factor.
serge
parents:
9480
diff
changeset
|
66 double alpha, theta; |
10204
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
67 int tstep; |
781 | 68 |
69 memset(s, 0, sizeof(*s)); | |
70 n = 1 << nbits; | |
10199 | 71 s->mdct_bits = nbits; |
72 s->mdct_size = n; | |
781 | 73 n4 = n >> 2; |
10204
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
74 s->permutation = FF_MDCT_PERM_NONE; |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
75 |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
76 if (ff_fft_init(s, s->mdct_bits - 2, inverse) < 0) |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
77 goto fail; |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
78 |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
79 s->tcos = av_malloc(n/2 * sizeof(FFTSample)); |
781 | 80 if (!s->tcos) |
81 goto fail; | |
10204
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
82 |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
83 switch (s->permutation) { |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
84 case FF_MDCT_PERM_NONE: |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
85 s->tsin = s->tcos + n4; |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
86 tstep = 1; |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
87 break; |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
88 case FF_MDCT_PERM_INTERLEAVE: |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
89 s->tsin = s->tcos + 1; |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
90 tstep = 2; |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
91 break; |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
92 default: |
781 | 93 goto fail; |
10204
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
94 } |
781 | 95 |
9658
67a20f0eb42c
Support for getting (i)MDCT output multiplied by a constant scaling factor.
serge
parents:
9480
diff
changeset
|
96 theta = 1.0 / 8.0 + (scale < 0 ? n4 : 0); |
67a20f0eb42c
Support for getting (i)MDCT output multiplied by a constant scaling factor.
serge
parents:
9480
diff
changeset
|
97 scale = sqrt(fabs(scale)); |
781 | 98 for(i=0;i<n4;i++) { |
9658
67a20f0eb42c
Support for getting (i)MDCT output multiplied by a constant scaling factor.
serge
parents:
9480
diff
changeset
|
99 alpha = 2 * M_PI * (i + theta) / n; |
10204
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
100 s->tcos[i*tstep] = -cos(alpha) * scale; |
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
101 s->tsin[i*tstep] = -sin(alpha) * scale; |
781 | 102 } |
103 return 0; | |
104 fail: | |
10204
db033d1fbf44
Allow arch-specific mdct code to request interleaving of cos/sin tables
mru
parents:
10199
diff
changeset
|
105 ff_mdct_end(s); |
781 | 106 return -1; |
107 } | |
108 | |
109 /* complex multiplication: p = a * b */ | |
110 #define CMUL(pre, pim, are, aim, bre, bim) \ | |
111 {\ | |
7545 | 112 FFTSample _are = (are);\ |
113 FFTSample _aim = (aim);\ | |
114 FFTSample _bre = (bre);\ | |
115 FFTSample _bim = (bim);\ | |
781 | 116 (pre) = _are * _bre - _aim * _bim;\ |
117 (pim) = _are * _bim + _aim * _bre;\ | |
118 } | |
119 | |
7544 | 120 /** |
121 * Compute the middle half of the inverse MDCT of size N = 2^nbits, | |
122 * thus excluding the parts that can be derived by symmetry | |
123 * @param output N/2 samples | |
124 * @param input N/2 samples | |
125 */ | |
10199 | 126 void ff_imdct_half_c(FFTContext *s, FFTSample *output, const FFTSample *input) |
781 | 127 { |
7544 | 128 int k, n8, n4, n2, n, j; |
10199 | 129 const uint16_t *revtab = s->revtab; |
781 | 130 const FFTSample *tcos = s->tcos; |
131 const FFTSample *tsin = s->tsin; | |
132 const FFTSample *in1, *in2; | |
7544 | 133 FFTComplex *z = (FFTComplex *)output; |
781 | 134 |
10199 | 135 n = 1 << s->mdct_bits; |
781 | 136 n2 = n >> 1; |
137 n4 = n >> 2; | |
7544 | 138 n8 = n >> 3; |
781 | 139 |
140 /* pre rotation */ | |
141 in1 = input; | |
142 in2 = input + n2 - 1; | |
143 for(k = 0; k < n4; k++) { | |
144 j=revtab[k]; | |
145 CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); | |
146 in1 += 2; | |
147 in2 -= 2; | |
148 } | |
10199 | 149 ff_fft_calc(s, z); |
781 | 150 |
151 /* post rotation + reordering */ | |
7544 | 152 for(k = 0; k < n8; k++) { |
153 FFTSample r0, i0, r1, i1; | |
154 CMUL(r0, i1, z[n8-k-1].im, z[n8-k-1].re, tsin[n8-k-1], tcos[n8-k-1]); | |
155 CMUL(r1, i0, z[n8+k ].im, z[n8+k ].re, tsin[n8+k ], tcos[n8+k ]); | |
156 z[n8-k-1].re = r0; | |
157 z[n8-k-1].im = i0; | |
158 z[n8+k ].re = r1; | |
159 z[n8+k ].im = i1; | |
781 | 160 } |
7263 | 161 } |
162 | |
163 /** | |
164 * Compute inverse MDCT of size N = 2^nbits | |
165 * @param output N samples | |
166 * @param input N/2 samples | |
167 */ | |
10199 | 168 void ff_imdct_calc_c(FFTContext *s, FFTSample *output, const FFTSample *input) |
7263 | 169 { |
7544 | 170 int k; |
10199 | 171 int n = 1 << s->mdct_bits; |
7544 | 172 int n2 = n >> 1; |
173 int n4 = n >> 2; | |
781 | 174 |
7547 | 175 ff_imdct_half_c(s, output+n4, input); |
7263 | 176 |
7544 | 177 for(k = 0; k < n4; k++) { |
178 output[k] = -output[n2-k-1]; | |
179 output[n-k-1] = output[n2+k]; | |
7263 | 180 } |
181 } | |
182 | |
183 /** | |
781 | 184 * Compute MDCT of size N = 2^nbits |
185 * @param input N samples | |
186 * @param out N/2 samples | |
187 */ | |
10199 | 188 void ff_mdct_calc_c(FFTContext *s, FFTSample *out, const FFTSample *input) |
781 | 189 { |
190 int i, j, n, n8, n4, n2, n3; | |
7546 | 191 FFTSample re, im; |
10199 | 192 const uint16_t *revtab = s->revtab; |
781 | 193 const FFTSample *tcos = s->tcos; |
194 const FFTSample *tsin = s->tsin; | |
7544 | 195 FFTComplex *x = (FFTComplex *)out; |
781 | 196 |
10199 | 197 n = 1 << s->mdct_bits; |
781 | 198 n2 = n >> 1; |
199 n4 = n >> 2; | |
200 n8 = n >> 3; | |
201 n3 = 3 * n4; | |
202 | |
203 /* pre rotation */ | |
204 for(i=0;i<n8;i++) { | |
205 re = -input[2*i+3*n4] - input[n3-1-2*i]; | |
206 im = -input[n4+2*i] + input[n4-1-2*i]; | |
207 j = revtab[i]; | |
208 CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]); | |
209 | |
210 re = input[2*i] - input[n2-1-2*i]; | |
211 im = -(input[n2+2*i] + input[n-1-2*i]); | |
212 j = revtab[n8 + i]; | |
213 CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]); | |
214 } | |
215 | |
10199 | 216 ff_fft_calc(s, x); |
2967 | 217 |
781 | 218 /* post rotation */ |
7544 | 219 for(i=0;i<n8;i++) { |
220 FFTSample r0, i0, r1, i1; | |
221 CMUL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]); | |
222 CMUL(i0, r1, x[n8+i ].re, x[n8+i ].im, -tsin[n8+i ], -tcos[n8+i ]); | |
223 x[n8-i-1].re = r0; | |
224 x[n8-i-1].im = i0; | |
225 x[n8+i ].re = r1; | |
226 x[n8+i ].im = i1; | |
781 | 227 } |
228 } | |
229 | |
10199 | 230 av_cold void ff_mdct_end(FFTContext *s) |
781 | 231 { |
232 av_freep(&s->tcos); | |
10199 | 233 ff_fft_end(s); |
781 | 234 } |