Mercurial > mplayer.hg
annotate libfaad2/sbr_qmf.c @ 13394:455a5056801f
New generic 'portable anymap' video output driver. It supports portable
pixmaps and graymaps in both raw and ASCII mode. Besides PPM and PGM, it
can also output PGMYUV files which are PGM files with the U and V plane
appended to the bottom of the Y image (bottom left and bottom right). All
files can be written to the current directory, to a specified output directory
or to multiple subdirectories if the filesystem can't handle the amount of
files in one directory anymore.
Note: This driver is not yet activated and will not be compiled and linked
to libvo. A separate patch will take care of that. This is just for adding
the file to the repository.
author | ivo |
---|---|
date | Mon, 20 Sep 2004 00:54:57 +0000 |
parents | d81145997036 |
children | 6d50ef45a058 |
rev | line source |
---|---|
10725 | 1 /* |
2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding | |
12527 | 3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com |
10725 | 4 ** |
5 ** This program is free software; you can redistribute it and/or modify | |
6 ** it under the terms of the GNU General Public License as published by | |
7 ** the Free Software Foundation; either version 2 of the License, or | |
8 ** (at your option) any later version. | |
9 ** | |
10 ** This program is distributed in the hope that it will be useful, | |
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 ** GNU General Public License for more details. | |
14 ** | |
15 ** You should have received a copy of the GNU General Public License | |
16 ** along with this program; if not, write to the Free Software | |
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
18 ** | |
19 ** Any non-GPL usage of this software or parts of this software is strictly | |
20 ** forbidden. | |
21 ** | |
22 ** Commercial non-GPL licensing of this software is possible. | |
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. | |
24 ** | |
12625
d81145997036
More information about modifications to comply more closely with GPL 2a.
diego
parents:
12527
diff
changeset
|
25 ** Initially modified for use with MPlayer by Arpad Gereöffy on 2003/08/30 |
d81145997036
More information about modifications to comply more closely with GPL 2a.
diego
parents:
12527
diff
changeset
|
26 ** $Id: sbr_qmf.c,v 1.3 2004/06/02 22:59:03 diego Exp $ |
d81145997036
More information about modifications to comply more closely with GPL 2a.
diego
parents:
12527
diff
changeset
|
27 ** detailed CVS changelog at http://www.mplayerhq.hu/cgi-bin/cvsweb.cgi/main/ |
10725 | 28 **/ |
29 | |
30 #include "common.h" | |
31 #include "structs.h" | |
32 | |
33 #ifdef SBR_DEC | |
34 | |
35 | |
36 #include <stdlib.h> | |
37 #include <string.h> | |
38 #include "sbr_dct.h" | |
39 #include "sbr_qmf.h" | |
10989 | 40 #include "sbr_qmf_c.h" |
10725 | 41 #include "sbr_syntax.h" |
42 | |
43 | |
44 qmfa_info *qmfa_init(uint8_t channels) | |
45 { | |
12527 | 46 qmfa_info *qmfa = (qmfa_info*)faad_malloc(sizeof(qmfa_info)); |
47 qmfa->x = (real_t*)faad_malloc(channels * 10 * sizeof(real_t)); | |
10725 | 48 memset(qmfa->x, 0, channels * 10 * sizeof(real_t)); |
49 | |
50 qmfa->channels = channels; | |
51 | |
52 return qmfa; | |
53 } | |
54 | |
55 void qmfa_end(qmfa_info *qmfa) | |
56 { | |
57 if (qmfa) | |
58 { | |
12527 | 59 if (qmfa->x) faad_free(qmfa->x); |
60 faad_free(qmfa); | |
10725 | 61 } |
62 } | |
63 | |
10989 | 64 void sbr_qmf_analysis_32(sbr_info *sbr, qmfa_info *qmfa, const real_t *input, |
12527 | 65 qmf_t X[MAX_NTSRHFG][32], uint8_t offset, uint8_t kx) |
10725 | 66 { |
12527 | 67 ALIGN real_t u[64]; |
10725 | 68 #ifndef SBR_LOW_POWER |
12527 | 69 ALIGN real_t x[64], y[64]; |
10725 | 70 #else |
12527 | 71 ALIGN real_t y[32]; |
10725 | 72 #endif |
12527 | 73 uint16_t in = 0; |
74 uint8_t l; | |
10725 | 75 |
76 /* qmf subsample l */ | |
10989 | 77 for (l = 0; l < sbr->numTimeSlotsRate; l++) |
10725 | 78 { |
79 int16_t n; | |
80 | |
81 /* shift input buffer x */ | |
82 memmove(qmfa->x + 32, qmfa->x, (320-32)*sizeof(real_t)); | |
83 | |
84 /* add new samples to input buffer x */ | |
85 for (n = 32 - 1; n >= 0; n--) | |
86 { | |
87 #ifdef FIXED_POINT | |
12527 | 88 qmfa->x[n] = (input[in++]) >> 5; |
10725 | 89 #else |
12527 | 90 qmfa->x[n] = input[in++]; |
10725 | 91 #endif |
92 } | |
93 | |
94 /* window and summation to create array u */ | |
95 for (n = 0; n < 64; n++) | |
96 { | |
12527 | 97 u[n] = MUL_F(qmfa->x[n], qmf_c[2*n]) + |
98 MUL_F(qmfa->x[n + 64], qmf_c[2*(n + 64)]) + | |
99 MUL_F(qmfa->x[n + 128], qmf_c[2*(n + 128)]) + | |
100 MUL_F(qmfa->x[n + 192], qmf_c[2*(n + 192)]) + | |
101 MUL_F(qmfa->x[n + 256], qmf_c[2*(n + 256)]); | |
10725 | 102 } |
103 | |
104 /* calculate 32 subband samples by introducing X */ | |
105 #ifdef SBR_LOW_POWER | |
106 y[0] = u[48]; | |
107 for (n = 1; n < 16; n++) | |
108 y[n] = u[n+48] + u[48-n]; | |
109 for (n = 16; n < 32; n++) | |
110 y[n] = -u[n-16] + u[48-n]; | |
111 | |
112 DCT3_32_unscaled(u, y); | |
113 | |
114 for (n = 0; n < 32; n++) | |
115 { | |
10989 | 116 if (n < kx) |
117 { | |
10725 | 118 #ifdef FIXED_POINT |
12527 | 119 QMF_RE(X[l + offset][n]) = u[n] << 1; |
10725 | 120 #else |
12527 | 121 QMF_RE(X[l + offset][n]) = 2. * u[n]; |
10725 | 122 #endif |
10989 | 123 } else { |
12527 | 124 QMF_RE(X[l + offset][n]) = 0; |
10725 | 125 } |
126 } | |
127 #else | |
128 x[0] = u[0]; | |
12527 | 129 for (n = 0; n < 31; n++) |
10725 | 130 { |
12527 | 131 x[2*n+1] = u[n+1] + u[63-n]; |
132 x[2*n+2] = u[n+1] - u[63-n]; | |
10725 | 133 } |
12527 | 134 x[63] = u[32]; |
10725 | 135 |
12527 | 136 DCT4_64_kernel(y, x); |
10725 | 137 |
138 for (n = 0; n < 32; n++) | |
139 { | |
10989 | 140 if (n < kx) |
141 { | |
10725 | 142 #ifdef FIXED_POINT |
12527 | 143 QMF_RE(X[l + offset][n]) = y[n] << 1; |
144 QMF_IM(X[l + offset][n]) = -y[63-n] << 1; | |
10725 | 145 #else |
12527 | 146 QMF_RE(X[l + offset][n]) = 2. * y[n]; |
147 QMF_IM(X[l + offset][n]) = -2. * y[63-n]; | |
10725 | 148 #endif |
10989 | 149 } else { |
12527 | 150 QMF_RE(X[l + offset][n]) = 0; |
151 QMF_IM(X[l + offset][n]) = 0; | |
10725 | 152 } |
153 } | |
154 #endif | |
155 } | |
156 } | |
157 | |
158 qmfs_info *qmfs_init(uint8_t channels) | |
159 { | |
12527 | 160 qmfs_info *qmfs = (qmfs_info*)faad_malloc(sizeof(qmfs_info)); |
10989 | 161 |
12527 | 162 #ifndef SBR_LOW_POWER |
163 qmfs->v[0] = (real_t*)faad_malloc(channels * 10 * sizeof(real_t)); | |
10989 | 164 memset(qmfs->v[0], 0, channels * 10 * sizeof(real_t)); |
12527 | 165 qmfs->v[1] = (real_t*)faad_malloc(channels * 10 * sizeof(real_t)); |
10989 | 166 memset(qmfs->v[1], 0, channels * 10 * sizeof(real_t)); |
12527 | 167 #else |
168 qmfs->v[0] = (real_t*)faad_malloc(channels * 20 * sizeof(real_t)); | |
169 memset(qmfs->v[0], 0, channels * 20 * sizeof(real_t)); | |
170 qmfs->v[1] = NULL; | |
171 #endif | |
10989 | 172 |
173 qmfs->v_index = 0; | |
10725 | 174 |
175 qmfs->channels = channels; | |
176 | |
12527 | 177 #ifdef USE_SSE |
178 if (cpu_has_sse()) | |
179 { | |
180 qmfs->qmf_func = sbr_qmf_synthesis_64_sse; | |
181 } else { | |
182 qmfs->qmf_func = sbr_qmf_synthesis_64; | |
183 } | |
184 #endif | |
185 | |
10725 | 186 return qmfs; |
187 } | |
188 | |
189 void qmfs_end(qmfs_info *qmfs) | |
190 { | |
191 if (qmfs) | |
192 { | |
12527 | 193 if (qmfs->v[0]) faad_free(qmfs->v[0]); |
194 #ifndef SBR_LOW_POWER | |
195 if (qmfs->v[1]) faad_free(qmfs->v[1]); | |
196 #endif | |
197 faad_free(qmfs); | |
10725 | 198 } |
199 } | |
200 | |
10989 | 201 #ifdef SBR_LOW_POWER |
12527 | 202 void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], |
10725 | 203 real_t *output) |
204 { | |
12527 | 205 ALIGN real_t x[64]; |
206 ALIGN real_t y[64]; | |
207 int16_t n, k, out = 0; | |
10725 | 208 uint8_t l; |
209 | |
210 | |
211 /* qmf subsample l */ | |
10989 | 212 for (l = 0; l < sbr->numTimeSlotsRate; l++) |
12527 | 213 { |
214 //real_t *v0, *v1; | |
10989 | 215 |
216 /* shift buffers */ | |
12527 | 217 //memmove(qmfs->v[0] + 64, qmfs->v[0], (640-64)*sizeof(real_t)); |
218 //memmove(qmfs->v[1] + 64, qmfs->v[1], (640-64)*sizeof(real_t)); | |
219 memmove(qmfs->v[0] + 128, qmfs->v[0], (1280-128)*sizeof(real_t)); | |
10989 | 220 |
12527 | 221 //v0 = qmfs->v[qmfs->v_index]; |
222 //v1 = qmfs->v[(qmfs->v_index + 1) & 0x1]; | |
223 //qmfs->v_index = (qmfs->v_index + 1) & 0x1; | |
10725 | 224 |
225 /* calculate 128 samples */ | |
226 for (k = 0; k < 64; k++) | |
227 { | |
228 #ifdef FIXED_POINT | |
12527 | 229 x[k] = QMF_RE(X[l][k]); |
10725 | 230 #else |
12527 | 231 x[k] = QMF_RE(X[l][k]) / 32.; |
10725 | 232 #endif |
233 } | |
234 | |
12527 | 235 for (n = 0; n < 32; n++) |
236 { | |
237 y[2*n] = -x[2*n]; | |
238 y[2*n+1] = x[2*n+1]; | |
239 } | |
240 | |
10725 | 241 DCT2_64_unscaled(x, x); |
242 | |
12527 | 243 for (n = 0; n < 64; n++) |
244 { | |
245 qmfs->v[0][n+32] = x[n]; | |
246 } | |
10989 | 247 for (n = 0; n < 32; n++) |
10725 | 248 { |
12527 | 249 qmfs->v[0][31 - n] = x[n + 1]; |
10725 | 250 } |
12527 | 251 DST2_64_unscaled(x, y); |
252 qmfs->v[0][96] = 0; | |
10989 | 253 for (n = 1; n < 32; n++) |
12527 | 254 { |
255 qmfs->v[0][n + 96] = x[n-1]; | |
256 } | |
10725 | 257 |
258 /* calculate 64 output samples and window */ | |
259 for (k = 0; k < 64; k++) | |
260 { | |
12527 | 261 #if 1 |
262 output[out++] = MUL_F(qmfs->v[0][k], qmf_c[k]) + | |
263 MUL_F(qmfs->v[0][192 + k], qmf_c[64 + k]) + | |
264 MUL_F(qmfs->v[0][256 + k], qmf_c[128 + k]) + | |
265 MUL_F(qmfs->v[0][256 + 192 + k], qmf_c[128 + 64 + k]) + | |
266 MUL_F(qmfs->v[0][512 + k], qmf_c[256 + k]) + | |
267 MUL_F(qmfs->v[0][512 + 192 + k], qmf_c[256 + 64 + k]) + | |
268 MUL_F(qmfs->v[0][768 + k], qmf_c[384 + k]) + | |
269 MUL_F(qmfs->v[0][768 + 192 + k], qmf_c[384 + 64 + k]) + | |
270 MUL_F(qmfs->v[0][1024 + k], qmf_c[512 + k]) + | |
271 MUL_F(qmfs->v[0][1024 + 192 + k], qmf_c[512 + 64 + k]); | |
272 #else | |
273 output[out++] = MUL_F(v0[k], qmf_c[k]) + | |
274 MUL_F(v0[64 + k], qmf_c[64 + k]) + | |
275 MUL_F(v0[128 + k], qmf_c[128 + k]) + | |
276 MUL_F(v0[192 + k], qmf_c[192 + k]) + | |
277 MUL_F(v0[256 + k], qmf_c[256 + k]) + | |
278 MUL_F(v0[320 + k], qmf_c[320 + k]) + | |
279 MUL_F(v0[384 + k], qmf_c[384 + k]) + | |
280 MUL_F(v0[448 + k], qmf_c[448 + k]) + | |
281 MUL_F(v0[512 + k], qmf_c[512 + k]) + | |
282 MUL_F(v0[576 + k], qmf_c[576 + k]); | |
283 #endif | |
284 } | |
285 } | |
286 } | |
287 | |
288 void sbr_qmf_synthesis_64_sse(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], | |
289 real_t *output) | |
290 { | |
291 ALIGN real_t x[64]; | |
292 ALIGN real_t y[64]; | |
293 ALIGN real_t y2[64]; | |
294 int16_t n, k, out = 0; | |
295 uint8_t l; | |
296 | |
297 /* qmf subsample l */ | |
298 for (l = 0; l < sbr->numTimeSlotsRate; l++) | |
299 { | |
300 //real_t *v0, *v1; | |
301 | |
302 /* shift buffers */ | |
303 //memmove(qmfs->v[0] + 64, qmfs->v[0], (640-64)*sizeof(real_t)); | |
304 //memmove(qmfs->v[1] + 64, qmfs->v[1], (640-64)*sizeof(real_t)); | |
305 memmove(qmfs->v[0] + 128, qmfs->v[0], (1280-128)*sizeof(real_t)); | |
306 | |
307 //v0 = qmfs->v[qmfs->v_index]; | |
308 //v1 = qmfs->v[(qmfs->v_index + 1) & 0x1]; | |
309 //qmfs->v_index = (qmfs->v_index + 1) & 0x1; | |
310 | |
311 /* calculate 128 samples */ | |
312 for (k = 0; k < 64; k++) | |
313 { | |
314 #ifdef FIXED_POINT | |
315 x[k] = QMF_RE(X[l][k]); | |
316 #else | |
317 x[k] = QMF_RE(X[l][k]) / 32.; | |
318 #endif | |
319 } | |
320 | |
321 for (n = 0; n < 32; n++) | |
322 { | |
323 y[2*n] = -x[2*n]; | |
324 y[2*n+1] = x[2*n+1]; | |
325 } | |
326 | |
327 DCT2_64_unscaled(x, x); | |
328 | |
329 for (n = 0; n < 64; n++) | |
330 { | |
331 qmfs->v[0][n+32] = x[n]; | |
332 } | |
333 for (n = 0; n < 32; n++) | |
334 { | |
335 qmfs->v[0][31 - n] = x[n + 1]; | |
336 } | |
337 | |
338 DST2_64_unscaled(x, y); | |
339 qmfs->v[0][96] = 0; | |
340 for (n = 1; n < 32; n++) | |
341 { | |
342 qmfs->v[0][n + 96] = x[n-1]; | |
343 } | |
344 | |
345 /* calculate 64 output samples and window */ | |
346 for (k = 0; k < 64; k++) | |
347 { | |
348 #if 1 | |
349 output[out++] = MUL_F(qmfs->v[0][k], qmf_c[k]) + | |
350 MUL_F(qmfs->v[0][192 + k], qmf_c[64 + k]) + | |
351 MUL_F(qmfs->v[0][256 + k], qmf_c[128 + k]) + | |
352 MUL_F(qmfs->v[0][256 + 192 + k], qmf_c[128 + 64 + k]) + | |
353 MUL_F(qmfs->v[0][512 + k], qmf_c[256 + k]) + | |
354 MUL_F(qmfs->v[0][512 + 192 + k], qmf_c[256 + 64 + k]) + | |
355 MUL_F(qmfs->v[0][768 + k], qmf_c[384 + k]) + | |
356 MUL_F(qmfs->v[0][768 + 192 + k], qmf_c[384 + 64 + k]) + | |
357 MUL_F(qmfs->v[0][1024 + k], qmf_c[512 + k]) + | |
358 MUL_F(qmfs->v[0][1024 + 192 + k], qmf_c[512 + 64 + k]); | |
359 #else | |
360 output[out++] = MUL_F(v0[k], qmf_c[k]) + | |
361 MUL_F(v0[64 + k], qmf_c[64 + k]) + | |
362 MUL_F(v0[128 + k], qmf_c[128 + k]) + | |
363 MUL_F(v0[192 + k], qmf_c[192 + k]) + | |
364 MUL_F(v0[256 + k], qmf_c[256 + k]) + | |
365 MUL_F(v0[320 + k], qmf_c[320 + k]) + | |
366 MUL_F(v0[384 + k], qmf_c[384 + k]) + | |
367 MUL_F(v0[448 + k], qmf_c[448 + k]) + | |
368 MUL_F(v0[512 + k], qmf_c[512 + k]) + | |
369 MUL_F(v0[576 + k], qmf_c[576 + k]); | |
370 #endif | |
10725 | 371 } |
372 } | |
373 } | |
10989 | 374 #else |
12527 | 375 void sbr_qmf_synthesis_64(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], |
10989 | 376 real_t *output) |
377 { | |
12527 | 378 ALIGN real_t x1[64], x2[64]; |
379 real_t scale = 1.f/64.f; | |
380 int16_t n, k, out = 0; | |
10989 | 381 uint8_t l; |
382 | |
383 | |
384 /* qmf subsample l */ | |
385 for (l = 0; l < sbr->numTimeSlotsRate; l++) | |
386 { | |
387 real_t *v0, *v1; | |
388 | |
389 /* shift buffers */ | |
390 memmove(qmfs->v[0] + 64, qmfs->v[0], (640-64)*sizeof(real_t)); | |
391 memmove(qmfs->v[1] + 64, qmfs->v[1], (640-64)*sizeof(real_t)); | |
392 | |
393 v0 = qmfs->v[qmfs->v_index]; | |
394 v1 = qmfs->v[(qmfs->v_index + 1) & 0x1]; | |
395 qmfs->v_index = (qmfs->v_index + 1) & 0x1; | |
396 | |
397 /* calculate 128 samples */ | |
12527 | 398 x1[0] = scale*QMF_RE(X[l][0]); |
399 x2[63] = scale*QMF_IM(X[l][0]); | |
400 for (k = 0; k < 31; k++) | |
10989 | 401 { |
12527 | 402 x1[2*k+1] = scale*(QMF_RE(X[l][2*k+1]) - QMF_RE(X[l][2*k+2])); |
403 x1[2*k+2] = scale*(QMF_RE(X[l][2*k+1]) + QMF_RE(X[l][2*k+2])); | |
10989 | 404 |
12527 | 405 x2[61 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) - QMF_IM(X[l][2*k+1])); |
406 x2[62 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) + QMF_IM(X[l][2*k+1])); | |
407 } | |
408 x1[63] = scale*QMF_RE(X[l][63]); | |
409 x2[0] = scale*QMF_IM(X[l][63]); | |
10989 | 410 |
12527 | 411 DCT4_64_kernel(x1, x1); |
412 DCT4_64_kernel(x2, x2); | |
413 | |
414 for (n = 0; n < 32; n++) | |
10989 | 415 { |
12527 | 416 v0[ 2*n] = x2[2*n] - x1[2*n]; |
417 v1[63-2*n] = x2[2*n] + x1[2*n]; | |
418 v0[ 2*n+1] = -x2[2*n+1] - x1[2*n+1]; | |
419 v1[62-2*n] = -x2[2*n+1] + x1[2*n+1]; | |
10989 | 420 } |
421 | |
422 /* calculate 64 output samples and window */ | |
423 for (k = 0; k < 64; k++) | |
424 { | |
12527 | 425 output[out++] = MUL_F(v0[k], qmf_c[k]) + |
426 MUL_F(v0[64 + k], qmf_c[64 + k]) + | |
427 MUL_F(v0[128 + k], qmf_c[128 + k]) + | |
428 MUL_F(v0[192 + k], qmf_c[192 + k]) + | |
429 MUL_F(v0[256 + k], qmf_c[256 + k]) + | |
430 MUL_F(v0[320 + k], qmf_c[320 + k]) + | |
431 MUL_F(v0[384 + k], qmf_c[384 + k]) + | |
432 MUL_F(v0[448 + k], qmf_c[448 + k]) + | |
433 MUL_F(v0[512 + k], qmf_c[512 + k]) + | |
434 MUL_F(v0[576 + k], qmf_c[576 + k]); | |
435 } | |
436 } | |
437 } | |
438 | |
439 #ifdef USE_SSE | |
440 void memmove_sse_576(real_t *out, const real_t *in) | |
441 { | |
442 __m128 m[144]; | |
443 uint16_t i; | |
444 | |
445 for (i = 0; i < 144; i++) | |
446 { | |
447 m[i] = _mm_load_ps(&in[i*4]); | |
448 } | |
449 for (i = 0; i < 144; i++) | |
450 { | |
451 _mm_store_ps(&out[i*4], m[i]); | |
452 } | |
453 } | |
454 | |
455 void sbr_qmf_synthesis_64_sse(sbr_info *sbr, qmfs_info *qmfs, qmf_t X[MAX_NTSRHFG][64], | |
456 real_t *output) | |
457 { | |
458 ALIGN real_t x1[64], x2[64]; | |
459 real_t scale = 1.f/64.f; | |
460 int16_t n, k, out = 0; | |
461 uint8_t l; | |
462 | |
463 | |
464 /* qmf subsample l */ | |
465 for (l = 0; l < sbr->numTimeSlotsRate; l++) | |
466 { | |
467 real_t *v0, *v1; | |
468 | |
469 /* shift buffers */ | |
470 memmove_sse_576(qmfs->v[0] + 64, qmfs->v[0]); | |
471 memmove_sse_576(qmfs->v[1] + 64, qmfs->v[1]); | |
472 | |
473 v0 = qmfs->v[qmfs->v_index]; | |
474 v1 = qmfs->v[(qmfs->v_index + 1) & 0x1]; | |
475 qmfs->v_index = (qmfs->v_index + 1) & 0x1; | |
476 | |
477 /* calculate 128 samples */ | |
478 x1[0] = scale*QMF_RE(X[l][0]); | |
479 x2[63] = scale*QMF_IM(X[l][0]); | |
480 for (k = 0; k < 31; k++) | |
481 { | |
482 x1[2*k+1] = scale*(QMF_RE(X[l][2*k+1]) - QMF_RE(X[l][2*k+2])); | |
483 x1[2*k+2] = scale*(QMF_RE(X[l][2*k+1]) + QMF_RE(X[l][2*k+2])); | |
484 | |
485 x2[61 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) - QMF_IM(X[l][2*k+1])); | |
486 x2[62 - 2*k] = scale*(QMF_IM(X[l][2*k+2]) + QMF_IM(X[l][2*k+1])); | |
487 } | |
488 x1[63] = scale*QMF_RE(X[l][63]); | |
489 x2[0] = scale*QMF_IM(X[l][63]); | |
490 | |
491 DCT4_64_kernel(x1, x1); | |
492 DCT4_64_kernel(x2, x2); | |
493 | |
494 for (n = 0; n < 32; n++) | |
495 { | |
496 v0[ 2*n ] = x2[2*n] - x1[2*n]; | |
497 v1[63- 2*n ] = x2[2*n] + x1[2*n]; | |
498 v0[ 2*n+1 ] = -x2[2*n+1] - x1[2*n+1]; | |
499 v1[63-(2*n+1)] = -x2[2*n+1] + x1[2*n+1]; | |
500 } | |
501 | |
502 /* calculate 64 output samples and window */ | |
503 for (k = 0; k < 64; k+=4) | |
504 { | |
505 __m128 m0, m1, m2, m3, m4, m5, m6, m7, m8, m9; | |
506 __m128 c0, c1, c2, c3, c4, c5, c6, c7, c8, c9; | |
507 __m128 s1, s2, s3, s4, s5, s6, s7, s8, s9; | |
508 | |
509 m0 = _mm_load_ps(&v0[k]); | |
510 m1 = _mm_load_ps(&v0[k + 64]); | |
511 m2 = _mm_load_ps(&v0[k + 128]); | |
512 m3 = _mm_load_ps(&v0[k + 192]); | |
513 m4 = _mm_load_ps(&v0[k + 256]); | |
514 c0 = _mm_load_ps(&qmf_c[k]); | |
515 c1 = _mm_load_ps(&qmf_c[k + 64]); | |
516 c2 = _mm_load_ps(&qmf_c[k + 128]); | |
517 c3 = _mm_load_ps(&qmf_c[k + 192]); | |
518 c4 = _mm_load_ps(&qmf_c[k + 256]); | |
519 | |
520 m0 = _mm_mul_ps(m0, c0); | |
521 m1 = _mm_mul_ps(m1, c1); | |
522 m2 = _mm_mul_ps(m2, c2); | |
523 m3 = _mm_mul_ps(m3, c3); | |
524 m4 = _mm_mul_ps(m4, c4); | |
525 | |
526 s1 = _mm_add_ps(m0, m1); | |
527 s2 = _mm_add_ps(m2, m3); | |
528 s6 = _mm_add_ps(s1, s2); | |
529 | |
530 m5 = _mm_load_ps(&v0[k + 320]); | |
531 m6 = _mm_load_ps(&v0[k + 384]); | |
532 m7 = _mm_load_ps(&v0[k + 448]); | |
533 m8 = _mm_load_ps(&v0[k + 512]); | |
534 m9 = _mm_load_ps(&v0[k + 576]); | |
535 c5 = _mm_load_ps(&qmf_c[k + 320]); | |
536 c6 = _mm_load_ps(&qmf_c[k + 384]); | |
537 c7 = _mm_load_ps(&qmf_c[k + 448]); | |
538 c8 = _mm_load_ps(&qmf_c[k + 512]); | |
539 c9 = _mm_load_ps(&qmf_c[k + 576]); | |
540 | |
541 m5 = _mm_mul_ps(m5, c5); | |
542 m6 = _mm_mul_ps(m6, c6); | |
543 m7 = _mm_mul_ps(m7, c7); | |
544 m8 = _mm_mul_ps(m8, c8); | |
545 m9 = _mm_mul_ps(m9, c9); | |
546 | |
547 s3 = _mm_add_ps(m4, m5); | |
548 s4 = _mm_add_ps(m6, m7); | |
549 s5 = _mm_add_ps(m8, m9); | |
550 s7 = _mm_add_ps(s3, s4); | |
551 s8 = _mm_add_ps(s5, s6); | |
552 s9 = _mm_add_ps(s7, s8); | |
553 | |
554 _mm_store_ps(&output[out], s9); | |
555 out += 4; | |
10989 | 556 } |
557 } | |
558 } | |
559 #endif | |
12527 | 560 #endif |
10725 | 561 |
562 #endif |