Mercurial > libavcodec.hg
annotate mdct.c @ 5729:81fff8db79cc libavcodec
2.5x faster compute_autocorr()
overall flac encoding: 15-50% faster on core2, 8-30% on k8, 2-20% on p4 (depending on compression_level)
author | lorenm |
---|---|
date | Fri, 28 Sep 2007 06:06:18 +0000 |
parents | c8c591fe26f8 |
children | 5077d1562573 |
rev | line source |
---|---|
781 | 1 /* |
2 * MDCT/IMDCT transforms | |
3 * Copyright (c) 2002 Fabrice Bellard. | |
4 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
781 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
781 | 11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
781 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3036
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
781 | 20 */ |
21 #include "dsputil.h" | |
22 | |
1106 | 23 /** |
24 * @file mdct.c | |
25 * MDCT/IMDCT transforms. | |
26 */ | |
27 | |
28 /** | |
29 * init MDCT or IMDCT computation. | |
781 | 30 */ |
794 | 31 int ff_mdct_init(MDCTContext *s, int nbits, int inverse) |
781 | 32 { |
33 int n, n4, i; | |
34 float alpha; | |
35 | |
36 memset(s, 0, sizeof(*s)); | |
37 n = 1 << nbits; | |
38 s->nbits = nbits; | |
39 s->n = n; | |
40 n4 = n >> 2; | |
970 | 41 s->tcos = av_malloc(n4 * sizeof(FFTSample)); |
781 | 42 if (!s->tcos) |
43 goto fail; | |
970 | 44 s->tsin = av_malloc(n4 * sizeof(FFTSample)); |
781 | 45 if (!s->tsin) |
46 goto fail; | |
47 | |
48 for(i=0;i<n4;i++) { | |
49 alpha = 2 * M_PI * (i + 1.0 / 8.0) / n; | |
50 s->tcos[i] = -cos(alpha); | |
51 s->tsin[i] = -sin(alpha); | |
52 } | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
53 if (ff_fft_init(&s->fft, s->nbits - 2, inverse) < 0) |
781 | 54 goto fail; |
55 return 0; | |
56 fail: | |
57 av_freep(&s->tcos); | |
58 av_freep(&s->tsin); | |
59 return -1; | |
60 } | |
61 | |
62 /* complex multiplication: p = a * b */ | |
63 #define CMUL(pre, pim, are, aim, bre, bim) \ | |
64 {\ | |
65 float _are = (are);\ | |
66 float _aim = (aim);\ | |
67 float _bre = (bre);\ | |
68 float _bim = (bim);\ | |
69 (pre) = _are * _bre - _aim * _bim;\ | |
70 (pim) = _are * _bim + _aim * _bre;\ | |
71 } | |
72 | |
73 /** | |
74 * Compute inverse MDCT of size N = 2^nbits | |
75 * @param output N samples | |
76 * @param input N/2 samples | |
77 * @param tmp N/2 samples | |
78 */ | |
2967 | 79 void ff_imdct_calc(MDCTContext *s, FFTSample *output, |
794 | 80 const FFTSample *input, FFTSample *tmp) |
781 | 81 { |
82 int k, n8, n4, n2, n, j; | |
83 const uint16_t *revtab = s->fft.revtab; | |
84 const FFTSample *tcos = s->tcos; | |
85 const FFTSample *tsin = s->tsin; | |
86 const FFTSample *in1, *in2; | |
87 FFTComplex *z = (FFTComplex *)tmp; | |
88 | |
89 n = 1 << s->nbits; | |
90 n2 = n >> 1; | |
91 n4 = n >> 2; | |
92 n8 = n >> 3; | |
93 | |
94 /* pre rotation */ | |
95 in1 = input; | |
96 in2 = input + n2 - 1; | |
97 for(k = 0; k < n4; k++) { | |
98 j=revtab[k]; | |
99 CMUL(z[j].re, z[j].im, *in2, *in1, tcos[k], tsin[k]); | |
100 in1 += 2; | |
101 in2 -= 2; | |
102 } | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
103 ff_fft_calc(&s->fft, z); |
781 | 104 |
105 /* post rotation + reordering */ | |
106 /* XXX: optimize */ | |
107 for(k = 0; k < n4; k++) { | |
108 CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]); | |
109 } | |
110 for(k = 0; k < n8; k++) { | |
111 output[2*k] = -z[n8 + k].im; | |
112 output[n2-1-2*k] = z[n8 + k].im; | |
113 | |
114 output[2*k+1] = z[n8-1-k].re; | |
115 output[n2-1-2*k-1] = -z[n8-1-k].re; | |
116 | |
117 output[n2 + 2*k]=-z[k+n8].re; | |
118 output[n-1- 2*k]=-z[k+n8].re; | |
119 | |
120 output[n2 + 2*k+1]=z[n8-k-1].im; | |
121 output[n-2 - 2 * k] = z[n8-k-1].im; | |
122 } | |
123 } | |
124 | |
125 /** | |
126 * Compute MDCT of size N = 2^nbits | |
127 * @param input N samples | |
128 * @param out N/2 samples | |
129 * @param tmp temporary storage of N/2 samples | |
130 */ | |
2967 | 131 void ff_mdct_calc(MDCTContext *s, FFTSample *out, |
794 | 132 const FFTSample *input, FFTSample *tmp) |
781 | 133 { |
134 int i, j, n, n8, n4, n2, n3; | |
135 FFTSample re, im, re1, im1; | |
136 const uint16_t *revtab = s->fft.revtab; | |
137 const FFTSample *tcos = s->tcos; | |
138 const FFTSample *tsin = s->tsin; | |
139 FFTComplex *x = (FFTComplex *)tmp; | |
140 | |
141 n = 1 << s->nbits; | |
142 n2 = n >> 1; | |
143 n4 = n >> 2; | |
144 n8 = n >> 3; | |
145 n3 = 3 * n4; | |
146 | |
147 /* pre rotation */ | |
148 for(i=0;i<n8;i++) { | |
149 re = -input[2*i+3*n4] - input[n3-1-2*i]; | |
150 im = -input[n4+2*i] + input[n4-1-2*i]; | |
151 j = revtab[i]; | |
152 CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]); | |
153 | |
154 re = input[2*i] - input[n2-1-2*i]; | |
155 im = -(input[n2+2*i] + input[n-1-2*i]); | |
156 j = revtab[n8 + i]; | |
157 CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]); | |
158 } | |
159 | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
160 ff_fft_calc(&s->fft, x); |
2967 | 161 |
781 | 162 /* post rotation */ |
163 for(i=0;i<n4;i++) { | |
164 re = x[i].re; | |
165 im = x[i].im; | |
166 CMUL(re1, im1, re, im, -tsin[i], -tcos[i]); | |
167 out[2*i] = im1; | |
168 out[n2-1-2*i] = re1; | |
169 } | |
170 } | |
171 | |
794 | 172 void ff_mdct_end(MDCTContext *s) |
781 | 173 { |
174 av_freep(&s->tcos); | |
175 av_freep(&s->tsin); | |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1106
diff
changeset
|
176 ff_fft_end(&s->fft); |
781 | 177 } |