Mercurial > libavcodec.hg
comparison mdct.c @ 7263:fc843d00867c libavcodec
exploit mdct symmetry
2% faster vorbis on conroe, k8. 7% on celeron.
author | lorenm |
---|---|
date | Sun, 13 Jul 2008 15:03:58 +0000 |
parents | b0820b8bd4dd |
children | ee1cb5ab9f99 |
comparison
equal
deleted
inserted
replaced
7262:e3822c61f2e4 | 7263:fc843d00867c |
---|---|
98 double _bim = (bim);\ | 98 double _bim = (bim);\ |
99 (pre) = _are * _bre - _aim * _bim;\ | 99 (pre) = _are * _bre - _aim * _bim;\ |
100 (pim) = _are * _bim + _aim * _bre;\ | 100 (pim) = _are * _bim + _aim * _bre;\ |
101 } | 101 } |
102 | 102 |
103 /** | 103 static void imdct_c(MDCTContext *s, const FFTSample *input, FFTSample *tmp) |
104 * Compute inverse MDCT of size N = 2^nbits | 104 { |
105 * @param output N samples | 105 int k, n4, n2, n, j; |
106 * @param input N/2 samples | |
107 * @param tmp N/2 samples | |
108 */ | |
109 void ff_imdct_calc(MDCTContext *s, FFTSample *output, | |
110 const FFTSample *input, FFTSample *tmp) | |
111 { | |
112 int k, n8, n4, n2, n, j; | |
113 const uint16_t *revtab = s->fft.revtab; | 106 const uint16_t *revtab = s->fft.revtab; |
114 const FFTSample *tcos = s->tcos; | 107 const FFTSample *tcos = s->tcos; |
115 const FFTSample *tsin = s->tsin; | 108 const FFTSample *tsin = s->tsin; |
116 const FFTSample *in1, *in2; | 109 const FFTSample *in1, *in2; |
117 FFTComplex *z = (FFTComplex *)tmp; | 110 FFTComplex *z = (FFTComplex *)tmp; |
118 | 111 |
119 n = 1 << s->nbits; | 112 n = 1 << s->nbits; |
120 n2 = n >> 1; | 113 n2 = n >> 1; |
121 n4 = n >> 2; | 114 n4 = n >> 2; |
122 n8 = n >> 3; | |
123 | 115 |
124 /* pre rotation */ | 116 /* pre rotation */ |
125 in1 = input; | 117 in1 = input; |
126 in2 = input + n2 - 1; | 118 in2 = input + n2 - 1; |
127 for(k = 0; k < n4; k++) { | 119 for(k = 0; k < n4; k++) { |
135 /* post rotation + reordering */ | 127 /* post rotation + reordering */ |
136 /* XXX: optimize */ | 128 /* XXX: optimize */ |
137 for(k = 0; k < n4; k++) { | 129 for(k = 0; k < n4; k++) { |
138 CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]); | 130 CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]); |
139 } | 131 } |
132 } | |
133 | |
134 /** | |
135 * Compute inverse MDCT of size N = 2^nbits | |
136 * @param output N samples | |
137 * @param input N/2 samples | |
138 * @param tmp N/2 samples | |
139 */ | |
140 void ff_imdct_calc(MDCTContext *s, FFTSample *output, | |
141 const FFTSample *input, FFTSample *tmp) | |
142 { | |
143 int k, n8, n2, n; | |
144 FFTComplex *z = (FFTComplex *)tmp; | |
145 n = 1 << s->nbits; | |
146 n2 = n >> 1; | |
147 n8 = n >> 3; | |
148 | |
149 imdct_c(s, input, tmp); | |
150 | |
140 for(k = 0; k < n8; k++) { | 151 for(k = 0; k < n8; k++) { |
141 output[2*k] = -z[n8 + k].im; | 152 output[2*k] = -z[n8 + k].im; |
142 output[n2-1-2*k] = z[n8 + k].im; | 153 output[n2-1-2*k] = z[n8 + k].im; |
143 | 154 |
144 output[2*k+1] = z[n8-1-k].re; | 155 output[2*k+1] = z[n8-1-k].re; |
147 output[n2 + 2*k]=-z[k+n8].re; | 158 output[n2 + 2*k]=-z[k+n8].re; |
148 output[n-1- 2*k]=-z[k+n8].re; | 159 output[n-1- 2*k]=-z[k+n8].re; |
149 | 160 |
150 output[n2 + 2*k+1]=z[n8-k-1].im; | 161 output[n2 + 2*k+1]=z[n8-k-1].im; |
151 output[n-2 - 2 * k] = z[n8-k-1].im; | 162 output[n-2 - 2 * k] = z[n8-k-1].im; |
163 } | |
164 } | |
165 | |
166 /** | |
167 * Compute the middle half of the inverse MDCT of size N = 2^nbits, | |
168 * thus excluding the parts that can be derived by symmetry | |
169 * @param output N/2 samples | |
170 * @param input N/2 samples | |
171 * @param tmp N/2 samples | |
172 */ | |
173 void ff_imdct_half(MDCTContext *s, FFTSample *output, | |
174 const FFTSample *input, FFTSample *tmp) | |
175 { | |
176 int k, n8, n4, n; | |
177 FFTComplex *z = (FFTComplex *)tmp; | |
178 n = 1 << s->nbits; | |
179 n4 = n >> 2; | |
180 n8 = n >> 3; | |
181 | |
182 imdct_c(s, input, tmp); | |
183 | |
184 for(k = 0; k < n8; k++) { | |
185 output[n4-1-2*k] = z[n8+k].im; | |
186 output[n4-1-2*k-1] = -z[n8-k-1].re; | |
187 output[n4 + 2*k] = -z[n8+k].re; | |
188 output[n4 + 2*k+1] = z[n8-k-1].im; | |
152 } | 189 } |
153 } | 190 } |
154 | 191 |
155 /** | 192 /** |
156 * Compute MDCT of size N = 2^nbits | 193 * Compute MDCT of size N = 2^nbits |