comparison mdct.c @ 7263:fc843d00867c libavcodec

exploit mdct symmetry 2% faster vorbis on conroe, k8. 7% on celeron.
author lorenm
date Sun, 13 Jul 2008 15:03:58 +0000
parents b0820b8bd4dd
children ee1cb5ab9f99
comparison
equal deleted inserted replaced
7262:e3822c61f2e4 7263:fc843d00867c
98 double _bim = (bim);\ 98 double _bim = (bim);\
99 (pre) = _are * _bre - _aim * _bim;\ 99 (pre) = _are * _bre - _aim * _bim;\
100 (pim) = _are * _bim + _aim * _bre;\ 100 (pim) = _are * _bim + _aim * _bre;\
101 } 101 }
102 102
103 /** 103 static void imdct_c(MDCTContext *s, const FFTSample *input, FFTSample *tmp)
104 * Compute inverse MDCT of size N = 2^nbits 104 {
105 * @param output N samples 105 int k, n4, n2, n, j;
106 * @param input N/2 samples
107 * @param tmp N/2 samples
108 */
109 void ff_imdct_calc(MDCTContext *s, FFTSample *output,
110 const FFTSample *input, FFTSample *tmp)
111 {
112 int k, n8, n4, n2, n, j;
113 const uint16_t *revtab = s->fft.revtab; 106 const uint16_t *revtab = s->fft.revtab;
114 const FFTSample *tcos = s->tcos; 107 const FFTSample *tcos = s->tcos;
115 const FFTSample *tsin = s->tsin; 108 const FFTSample *tsin = s->tsin;
116 const FFTSample *in1, *in2; 109 const FFTSample *in1, *in2;
117 FFTComplex *z = (FFTComplex *)tmp; 110 FFTComplex *z = (FFTComplex *)tmp;
118 111
119 n = 1 << s->nbits; 112 n = 1 << s->nbits;
120 n2 = n >> 1; 113 n2 = n >> 1;
121 n4 = n >> 2; 114 n4 = n >> 2;
122 n8 = n >> 3;
123 115
124 /* pre rotation */ 116 /* pre rotation */
125 in1 = input; 117 in1 = input;
126 in2 = input + n2 - 1; 118 in2 = input + n2 - 1;
127 for(k = 0; k < n4; k++) { 119 for(k = 0; k < n4; k++) {
135 /* post rotation + reordering */ 127 /* post rotation + reordering */
136 /* XXX: optimize */ 128 /* XXX: optimize */
137 for(k = 0; k < n4; k++) { 129 for(k = 0; k < n4; k++) {
138 CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]); 130 CMUL(z[k].re, z[k].im, z[k].re, z[k].im, tcos[k], tsin[k]);
139 } 131 }
132 }
133
134 /**
135 * Compute inverse MDCT of size N = 2^nbits
136 * @param output N samples
137 * @param input N/2 samples
138 * @param tmp N/2 samples
139 */
140 void ff_imdct_calc(MDCTContext *s, FFTSample *output,
141 const FFTSample *input, FFTSample *tmp)
142 {
143 int k, n8, n2, n;
144 FFTComplex *z = (FFTComplex *)tmp;
145 n = 1 << s->nbits;
146 n2 = n >> 1;
147 n8 = n >> 3;
148
149 imdct_c(s, input, tmp);
150
140 for(k = 0; k < n8; k++) { 151 for(k = 0; k < n8; k++) {
141 output[2*k] = -z[n8 + k].im; 152 output[2*k] = -z[n8 + k].im;
142 output[n2-1-2*k] = z[n8 + k].im; 153 output[n2-1-2*k] = z[n8 + k].im;
143 154
144 output[2*k+1] = z[n8-1-k].re; 155 output[2*k+1] = z[n8-1-k].re;
147 output[n2 + 2*k]=-z[k+n8].re; 158 output[n2 + 2*k]=-z[k+n8].re;
148 output[n-1- 2*k]=-z[k+n8].re; 159 output[n-1- 2*k]=-z[k+n8].re;
149 160
150 output[n2 + 2*k+1]=z[n8-k-1].im; 161 output[n2 + 2*k+1]=z[n8-k-1].im;
151 output[n-2 - 2 * k] = z[n8-k-1].im; 162 output[n-2 - 2 * k] = z[n8-k-1].im;
163 }
164 }
165
166 /**
167 * Compute the middle half of the inverse MDCT of size N = 2^nbits,
168 * thus excluding the parts that can be derived by symmetry
169 * @param output N/2 samples
170 * @param input N/2 samples
171 * @param tmp N/2 samples
172 */
173 void ff_imdct_half(MDCTContext *s, FFTSample *output,
174 const FFTSample *input, FFTSample *tmp)
175 {
176 int k, n8, n4, n;
177 FFTComplex *z = (FFTComplex *)tmp;
178 n = 1 << s->nbits;
179 n4 = n >> 2;
180 n8 = n >> 3;
181
182 imdct_c(s, input, tmp);
183
184 for(k = 0; k < n8; k++) {
185 output[n4-1-2*k] = z[n8+k].im;
186 output[n4-1-2*k-1] = -z[n8-k-1].re;
187 output[n4 + 2*k] = -z[n8+k].re;
188 output[n4 + 2*k+1] = z[n8-k-1].im;
152 } 189 }
153 } 190 }
154 191
155 /** 192 /**
156 * Compute MDCT of size N = 2^nbits 193 * Compute MDCT of size N = 2^nbits