Mercurial > mplayer.hg
annotate libfaad2/filtbank.c @ 14396:0a22a046f0d3
license issues clarified
author | henry |
---|---|
date | Thu, 06 Jan 2005 13:14:30 +0000 |
parents | 6d50ef45a058 |
children | 2ae5ab4331ca |
rev | line source |
---|---|
10725 | 1 /* |
2 ** FAAD2 - Freeware Advanced Audio (AAC) Decoder including SBR decoding | |
12527 | 3 ** Copyright (C) 2003-2004 M. Bakker, Ahead Software AG, http://www.nero.com |
10725 | 4 ** |
5 ** This program is free software; you can redistribute it and/or modify | |
6 ** it under the terms of the GNU General Public License as published by | |
7 ** the Free Software Foundation; either version 2 of the License, or | |
8 ** (at your option) any later version. | |
9 ** | |
10 ** This program is distributed in the hope that it will be useful, | |
11 ** but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 ** MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
13 ** GNU General Public License for more details. | |
14 ** | |
15 ** You should have received a copy of the GNU General Public License | |
16 ** along with this program; if not, write to the Free Software | |
17 ** Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. | |
18 ** | |
19 ** Any non-GPL usage of this software or parts of this software is strictly | |
20 ** forbidden. | |
21 ** | |
22 ** Commercial non-GPL licensing of this software is possible. | |
23 ** For more info contact Ahead Software through Mpeg4AAClicense@nero.com. | |
24 ** | |
12625
d81145997036
More information about modifications to comply more closely with GPL 2a.
diego
parents:
12527
diff
changeset
|
25 ** Initially modified for use with MPlayer by Arpad Gereöffy on 2003/08/30 |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
26 ** $Id: filtbank.c,v 1.4 2004/06/23 13:50:49 diego Exp $ |
12625
d81145997036
More information about modifications to comply more closely with GPL 2a.
diego
parents:
12527
diff
changeset
|
27 ** detailed CVS changelog at http://www.mplayerhq.hu/cgi-bin/cvsweb.cgi/main/ |
10725 | 28 **/ |
29 | |
30 #include "common.h" | |
31 #include "structs.h" | |
32 | |
33 #include <stdlib.h> | |
34 #include <string.h> | |
35 #ifdef _WIN32_WCE | |
36 #define assert(x) | |
37 #else | |
38 #include <assert.h> | |
39 #endif | |
40 | |
41 #include "filtbank.h" | |
42 #include "decoder.h" | |
43 #include "syntax.h" | |
44 #include "kbd_win.h" | |
45 #include "sine_win.h" | |
46 #include "mdct.h" | |
47 | |
48 | |
49 fb_info *filter_bank_init(uint16_t frame_len) | |
50 { | |
51 uint16_t nshort = frame_len/8; | |
52 #ifdef LD_DEC | |
53 uint16_t frame_len_ld = frame_len/2; | |
54 #endif | |
55 | |
12527 | 56 fb_info *fb = (fb_info*)faad_malloc(sizeof(fb_info)); |
10725 | 57 memset(fb, 0, sizeof(fb_info)); |
58 | |
59 /* normal */ | |
60 fb->mdct256 = faad_mdct_init(2*nshort); | |
61 fb->mdct2048 = faad_mdct_init(2*frame_len); | |
62 #ifdef LD_DEC | |
63 /* LD */ | |
64 fb->mdct1024 = faad_mdct_init(2*frame_len_ld); | |
65 #endif | |
66 | |
12527 | 67 #ifdef ALLOW_SMALL_FRAMELENGTH |
10725 | 68 if (frame_len == 1024) |
69 { | |
12527 | 70 #endif |
10725 | 71 fb->long_window[0] = sine_long_1024; |
72 fb->short_window[0] = sine_short_128; | |
73 fb->long_window[1] = kbd_long_1024; | |
74 fb->short_window[1] = kbd_short_128; | |
75 #ifdef LD_DEC | |
76 fb->ld_window[0] = sine_mid_512; | |
77 fb->ld_window[1] = ld_mid_512; | |
78 #endif | |
12527 | 79 #ifdef ALLOW_SMALL_FRAMELENGTH |
10725 | 80 } else /* (frame_len == 960) */ { |
81 fb->long_window[0] = sine_long_960; | |
82 fb->short_window[0] = sine_short_120; | |
83 fb->long_window[1] = kbd_long_960; | |
84 fb->short_window[1] = kbd_short_120; | |
85 #ifdef LD_DEC | |
86 fb->ld_window[0] = sine_mid_480; | |
87 fb->ld_window[1] = ld_mid_480; | |
88 #endif | |
89 } | |
12527 | 90 #endif |
91 | |
92 #ifdef USE_SSE | |
93 if (cpu_has_sse()) | |
94 { | |
95 fb->if_func = ifilter_bank_sse; | |
96 } else { | |
97 fb->if_func = ifilter_bank; | |
98 } | |
99 #endif | |
10725 | 100 |
101 return fb; | |
102 } | |
103 | |
104 void filter_bank_end(fb_info *fb) | |
105 { | |
106 if (fb != NULL) | |
107 { | |
12527 | 108 #ifdef PROFILE |
109 printf("FB: %I64d cycles\n", fb->cycles); | |
110 #endif | |
111 | |
10725 | 112 faad_mdct_end(fb->mdct256); |
113 faad_mdct_end(fb->mdct2048); | |
114 #ifdef LD_DEC | |
115 faad_mdct_end(fb->mdct1024); | |
116 #endif | |
117 | |
12527 | 118 faad_free(fb); |
10725 | 119 } |
120 } | |
121 | |
12527 | 122 static INLINE void imdct_long(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len) |
10725 | 123 { |
12527 | 124 #ifdef LD_DEC |
125 mdct_info *mdct = NULL; | |
10725 | 126 |
127 switch (len) | |
128 { | |
129 case 2048: | |
130 case 1920: | |
131 mdct = fb->mdct2048; | |
132 break; | |
133 case 1024: | |
134 case 960: | |
135 mdct = fb->mdct1024; | |
136 break; | |
137 } | |
138 | |
139 faad_imdct(mdct, in_data, out_data); | |
12527 | 140 #else |
141 faad_imdct(fb->mdct2048, in_data, out_data); | |
142 #endif | |
10725 | 143 } |
144 | |
12527 | 145 #ifdef USE_SSE |
146 static INLINE void imdct_long_sse(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len) | |
147 { | |
148 #ifdef LD_DEC | |
149 mdct_info *mdct = NULL; | |
150 | |
151 switch (len) | |
152 { | |
153 case 2048: | |
154 case 1920: | |
155 mdct = fb->mdct2048; | |
156 break; | |
157 case 1024: | |
158 case 960: | |
159 mdct = fb->mdct1024; | |
160 break; | |
161 } | |
162 | |
163 faad_imdct_sse(mdct, in_data, out_data); | |
164 #else | |
165 faad_imdct_sse(fb->mdct2048, in_data, out_data); | |
166 #endif | |
167 } | |
168 #endif | |
169 | |
10725 | 170 #ifdef LTP_DEC |
171 static INLINE void mdct(fb_info *fb, real_t *in_data, real_t *out_data, uint16_t len) | |
172 { | |
12527 | 173 mdct_info *mdct = NULL; |
10725 | 174 |
175 switch (len) | |
176 { | |
177 case 2048: | |
178 case 1920: | |
179 mdct = fb->mdct2048; | |
180 break; | |
181 case 256: | |
182 case 240: | |
183 mdct = fb->mdct256; | |
184 break; | |
185 #ifdef LD_DEC | |
186 case 1024: | |
187 case 960: | |
188 mdct = fb->mdct1024; | |
189 break; | |
190 #endif | |
191 } | |
192 | |
193 faad_mdct(mdct, in_data, out_data); | |
194 } | |
195 #endif | |
196 | |
197 void ifilter_bank(fb_info *fb, uint8_t window_sequence, uint8_t window_shape, | |
198 uint8_t window_shape_prev, real_t *freq_in, | |
12527 | 199 real_t *time_out, real_t *overlap, |
200 uint8_t object_type, uint16_t frame_len) | |
10725 | 201 { |
202 int16_t i; | |
12527 | 203 ALIGN real_t transf_buf[2*1024] = {0}; |
10725 | 204 |
12527 | 205 const real_t *window_long = NULL; |
206 const real_t *window_long_prev = NULL; | |
207 const real_t *window_short = NULL; | |
208 const real_t *window_short_prev = NULL; | |
10725 | 209 |
210 uint16_t nlong = frame_len; | |
211 uint16_t nshort = frame_len/8; | |
212 uint16_t trans = nshort/2; | |
213 | |
214 uint16_t nflat_ls = (nlong-nshort)/2; | |
215 | |
12527 | 216 #ifdef PROFILE |
217 int64_t count = faad_get_ts(); | |
218 #endif | |
219 | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
220 /* select windows of current frame and previous frame (Sine or KBD) */ |
12527 | 221 #ifdef LD_DEC |
222 if (object_type == LD) | |
223 { | |
224 window_long = fb->ld_window[window_shape]; | |
225 window_long_prev = fb->ld_window[window_shape_prev]; | |
226 } else { | |
227 #endif | |
228 window_long = fb->long_window[window_shape]; | |
229 window_long_prev = fb->long_window[window_shape_prev]; | |
230 window_short = fb->short_window[window_shape]; | |
231 window_short_prev = fb->short_window[window_shape_prev]; | |
232 #ifdef LD_DEC | |
233 } | |
234 #endif | |
235 | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
236 #if 0 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
237 for (i = 0; i < 1024; i++) |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
238 { |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
239 printf("%d\n", freq_in[i]); |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
240 } |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
241 #endif |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
242 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
243 #if 0 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
244 printf("%d %d\n", window_sequence, window_shape); |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
245 #endif |
12527 | 246 |
247 switch (window_sequence) | |
248 { | |
249 case ONLY_LONG_SEQUENCE: | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
250 /* perform iMDCT */ |
12527 | 251 imdct_long(fb, freq_in, transf_buf, 2*nlong); |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
252 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
253 /* add second half output of previous frame to windowed output of current frame */ |
12527 | 254 for (i = 0; i < nlong; i+=4) |
255 { | |
256 time_out[i] = overlap[i] + MUL_F(transf_buf[i],window_long_prev[i]); | |
257 time_out[i+1] = overlap[i+1] + MUL_F(transf_buf[i+1],window_long_prev[i+1]); | |
258 time_out[i+2] = overlap[i+2] + MUL_F(transf_buf[i+2],window_long_prev[i+2]); | |
259 time_out[i+3] = overlap[i+3] + MUL_F(transf_buf[i+3],window_long_prev[i+3]); | |
260 } | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
261 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
262 /* window the second half and save as overlap for next frame */ |
12527 | 263 for (i = 0; i < nlong; i+=4) |
264 { | |
265 overlap[i] = MUL_F(transf_buf[nlong+i],window_long[nlong-1-i]); | |
266 overlap[i+1] = MUL_F(transf_buf[nlong+i+1],window_long[nlong-2-i]); | |
267 overlap[i+2] = MUL_F(transf_buf[nlong+i+2],window_long[nlong-3-i]); | |
268 overlap[i+3] = MUL_F(transf_buf[nlong+i+3],window_long[nlong-4-i]); | |
269 } | |
270 break; | |
271 | |
272 case LONG_START_SEQUENCE: | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
273 /* perform iMDCT */ |
12527 | 274 imdct_long(fb, freq_in, transf_buf, 2*nlong); |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
275 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
276 /* add second half output of previous frame to windowed output of current frame */ |
12527 | 277 for (i = 0; i < nlong; i+=4) |
278 { | |
279 time_out[i] = overlap[i] + MUL_F(transf_buf[i],window_long_prev[i]); | |
280 time_out[i+1] = overlap[i+1] + MUL_F(transf_buf[i+1],window_long_prev[i+1]); | |
281 time_out[i+2] = overlap[i+2] + MUL_F(transf_buf[i+2],window_long_prev[i+2]); | |
282 time_out[i+3] = overlap[i+3] + MUL_F(transf_buf[i+3],window_long_prev[i+3]); | |
283 } | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
284 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
285 /* window the second half and save as overlap for next frame */ |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
286 /* construct second half window using padding with 1's and 0's */ |
12527 | 287 for (i = 0; i < nflat_ls; i++) |
288 overlap[i] = transf_buf[nlong+i]; | |
289 for (i = 0; i < nshort; i++) | |
290 overlap[nflat_ls+i] = MUL_F(transf_buf[nlong+nflat_ls+i],window_short[nshort-i-1]); | |
291 for (i = 0; i < nflat_ls; i++) | |
292 overlap[nflat_ls+nshort+i] = 0; | |
293 break; | |
294 | |
295 case EIGHT_SHORT_SEQUENCE: | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
296 /* perform iMDCT for each short block */ |
12527 | 297 faad_imdct(fb->mdct256, freq_in+0*nshort, transf_buf+2*nshort*0); |
298 faad_imdct(fb->mdct256, freq_in+1*nshort, transf_buf+2*nshort*1); | |
299 faad_imdct(fb->mdct256, freq_in+2*nshort, transf_buf+2*nshort*2); | |
300 faad_imdct(fb->mdct256, freq_in+3*nshort, transf_buf+2*nshort*3); | |
301 faad_imdct(fb->mdct256, freq_in+4*nshort, transf_buf+2*nshort*4); | |
302 faad_imdct(fb->mdct256, freq_in+5*nshort, transf_buf+2*nshort*5); | |
303 faad_imdct(fb->mdct256, freq_in+6*nshort, transf_buf+2*nshort*6); | |
304 faad_imdct(fb->mdct256, freq_in+7*nshort, transf_buf+2*nshort*7); | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
305 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
306 /* add second half output of previous frame to windowed output of current frame */ |
12527 | 307 for (i = 0; i < nflat_ls; i++) |
308 time_out[i] = overlap[i]; | |
309 for(i = 0; i < nshort; i++) | |
310 { | |
311 time_out[nflat_ls+ i] = overlap[nflat_ls+ i] + MUL_F(transf_buf[nshort*0+i],window_short_prev[i]); | |
312 time_out[nflat_ls+1*nshort+i] = overlap[nflat_ls+nshort*1+i] + MUL_F(transf_buf[nshort*1+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*2+i],window_short[i]); | |
313 time_out[nflat_ls+2*nshort+i] = overlap[nflat_ls+nshort*2+i] + MUL_F(transf_buf[nshort*3+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*4+i],window_short[i]); | |
314 time_out[nflat_ls+3*nshort+i] = overlap[nflat_ls+nshort*3+i] + MUL_F(transf_buf[nshort*5+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*6+i],window_short[i]); | |
315 if (i < trans) | |
316 time_out[nflat_ls+4*nshort+i] = overlap[nflat_ls+nshort*4+i] + MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]); | |
317 } | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
318 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
319 /* window the second half and save as overlap for next frame */ |
12527 | 320 for(i = 0; i < nshort; i++) |
321 { | |
322 if (i >= trans) | |
323 overlap[nflat_ls+4*nshort+i-nlong] = MUL_F(transf_buf[nshort*7+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*8+i],window_short[i]); | |
324 overlap[nflat_ls+5*nshort+i-nlong] = MUL_F(transf_buf[nshort*9+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*10+i],window_short[i]); | |
325 overlap[nflat_ls+6*nshort+i-nlong] = MUL_F(transf_buf[nshort*11+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*12+i],window_short[i]); | |
326 overlap[nflat_ls+7*nshort+i-nlong] = MUL_F(transf_buf[nshort*13+i],window_short[nshort-1-i]) + MUL_F(transf_buf[nshort*14+i],window_short[i]); | |
327 overlap[nflat_ls+8*nshort+i-nlong] = MUL_F(transf_buf[nshort*15+i],window_short[nshort-1-i]); | |
328 } | |
329 for (i = 0; i < nflat_ls; i++) | |
330 overlap[nflat_ls+nshort+i] = 0; | |
331 break; | |
332 | |
333 case LONG_STOP_SEQUENCE: | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
334 /* perform iMDCT */ |
12527 | 335 imdct_long(fb, freq_in, transf_buf, 2*nlong); |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
336 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
337 /* add second half output of previous frame to windowed output of current frame */ |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
338 /* construct first half window using padding with 1's and 0's */ |
12527 | 339 for (i = 0; i < nflat_ls; i++) |
340 time_out[i] = overlap[i]; | |
341 for (i = 0; i < nshort; i++) | |
342 time_out[nflat_ls+i] = overlap[nflat_ls+i] + MUL_F(transf_buf[nflat_ls+i],window_short_prev[i]); | |
343 for (i = 0; i < nflat_ls; i++) | |
344 time_out[nflat_ls+nshort+i] = overlap[nflat_ls+nshort+i] + transf_buf[nflat_ls+nshort+i]; | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
345 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
346 /* window the second half and save as overlap for next frame */ |
12527 | 347 for (i = 0; i < nlong; i++) |
348 overlap[i] = MUL_F(transf_buf[nlong+i],window_long[nlong-1-i]); | |
349 break; | |
350 } | |
351 | |
13453
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
352 #if 0 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
353 for (i = 0; i < 1024; i++) |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
354 { |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
355 //printf("%d\n", time_out[i]); |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
356 printf("0x%.8X\n", time_out[i]); |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
357 } |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
358 #endif |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
359 |
6d50ef45a058
Update FAAD to a 2.1 beta CVS snapshot from 2004.07.12.
diego
parents:
12625
diff
changeset
|
360 |
12527 | 361 #ifdef PROFILE |
362 count = faad_get_ts() - count; | |
363 fb->cycles += count; | |
364 #endif | |
365 } | |
366 | |
367 #ifdef USE_SSE | |
368 void ifilter_bank_sse(fb_info *fb, uint8_t window_sequence, uint8_t window_shape, | |
369 uint8_t window_shape_prev, real_t *freq_in, | |
370 real_t *time_out, uint8_t object_type, uint16_t frame_len) | |
371 { | |
372 int16_t i; | |
373 ALIGN real_t transf_buf[2*1024] = {0}; | |
374 | |
375 const real_t *window_long = NULL; | |
376 const real_t *window_long_prev = NULL; | |
377 const real_t *window_short = NULL; | |
378 const real_t *window_short_prev = NULL; | |
379 | |
380 uint16_t nlong = frame_len; | |
381 uint16_t nshort = frame_len/8; | |
382 uint16_t trans = nshort/2; | |
383 | |
384 uint16_t nflat_ls = (nlong-nshort)/2; | |
385 | |
386 #ifdef PROFILE | |
387 int64_t count = faad_get_ts(); | |
388 #endif | |
10725 | 389 |
390 #ifdef LD_DEC | |
391 if (object_type == LD) | |
392 { | |
393 window_long = fb->ld_window[window_shape]; | |
394 window_long_prev = fb->ld_window[window_shape_prev]; | |
395 } else { | |
396 #endif | |
397 window_long = fb->long_window[window_shape]; | |
398 window_long_prev = fb->long_window[window_shape_prev]; | |
399 window_short = fb->short_window[window_shape]; | |
400 window_short_prev = fb->short_window[window_shape_prev]; | |
401 #ifdef LD_DEC | |
402 } | |
403 #endif | |
404 | |
405 switch (window_sequence) | |
406 { | |
407 case ONLY_LONG_SEQUENCE: | |
12527 | 408 imdct_long_sse(fb, freq_in, transf_buf, 2*nlong); |
10989 | 409 for (i = 0; i < nlong; i+=4) |
10725 | 410 { |
12527 | 411 __m128 m1, m2, m3, m4, m5, m6, m7, m8; |
412 | |
413 m1 = _mm_load_ps(&transf_buf[i]); | |
414 m2 = _mm_load_ps(&window_long_prev[i]); | |
415 m6 = _mm_load_ps(&window_long[nlong-4-i]); | |
416 m3 = _mm_load_ps(&time_out[nlong+i]); | |
417 m5 = _mm_load_ps(&transf_buf[nlong+i]); | |
418 | |
419 m4 = _mm_mul_ps(m1, m2); | |
420 m7 = _mm_shuffle_ps(m6, m6, _MM_SHUFFLE(0, 1, 2, 3)); | |
421 | |
422 m4 = _mm_add_ps(m4, m3); | |
423 m8 = _mm_mul_ps(m5, m7); | |
424 | |
425 _mm_store_ps(&time_out[i], m4); | |
426 _mm_store_ps(&time_out[nlong+i], m8); | |
10725 | 427 } |
428 break; | |
429 | |
430 case LONG_START_SEQUENCE: | |
12527 | 431 imdct_long_sse(fb, freq_in, transf_buf, 2*nlong); |
10989 | 432 for (i = 0; i < nlong; i+=4) |
433 { | |
12527 | 434 __m128 m1 = _mm_load_ps(&transf_buf[i]); |
435 __m128 m2 = _mm_load_ps(&window_long_prev[i]); | |
436 __m128 m3 = _mm_load_ps(&time_out[nlong+i]); | |
437 | |
438 __m128 m4 = _mm_mul_ps(m1, m2); | |
439 m4 = _mm_add_ps(m4, m3); | |
440 | |
441 _mm_store_ps(&time_out[i], m4); | |
442 } | |
443 for (i = 0; i < nflat_ls; i+=4) | |
444 { | |
445 __m128 m1 = _mm_load_ps(&transf_buf[nlong+i]); | |
446 _mm_store_ps(&time_out[nlong+i], m1); | |
10989 | 447 } |
12527 | 448 for (i = 0; i < nshort; i+=4) |
449 { | |
450 __m128 m1 = _mm_load_ps(&transf_buf[nlong+nflat_ls+i]); | |
451 __m128 m2 = _mm_load_ps(&window_short[nshort-4-i]); | |
452 __m128 m3, m4; | |
453 | |
454 m3 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); | |
455 | |
456 m4 = _mm_mul_ps(m1, m3); | |
457 | |
458 _mm_store_ps(&time_out[nlong+nflat_ls+i], m4); | |
459 } | |
460 for (i = 0; i < nflat_ls; i+=4) | |
461 { | |
462 __m128 m1 = _mm_setzero_ps(); | |
463 _mm_store_ps(&time_out[nlong+nflat_ls+nshort+i], m1); | |
464 } | |
10725 | 465 break; |
466 | |
467 case EIGHT_SHORT_SEQUENCE: | |
12527 | 468 faad_imdct_sse(fb->mdct256, &freq_in[0*nshort], &transf_buf[2*nshort*0]); |
469 faad_imdct_sse(fb->mdct256, &freq_in[1*nshort], &transf_buf[2*nshort*1]); | |
470 faad_imdct_sse(fb->mdct256, &freq_in[2*nshort], &transf_buf[2*nshort*2]); | |
471 faad_imdct_sse(fb->mdct256, &freq_in[3*nshort], &transf_buf[2*nshort*3]); | |
472 faad_imdct_sse(fb->mdct256, &freq_in[4*nshort], &transf_buf[2*nshort*4]); | |
473 faad_imdct_sse(fb->mdct256, &freq_in[5*nshort], &transf_buf[2*nshort*5]); | |
474 faad_imdct_sse(fb->mdct256, &freq_in[6*nshort], &transf_buf[2*nshort*6]); | |
475 faad_imdct_sse(fb->mdct256, &freq_in[7*nshort], &transf_buf[2*nshort*7]); | |
476 for (i = 0; i < nflat_ls; i+=4) | |
477 { | |
478 __m128 m1 = _mm_load_ps(&time_out[nlong+i]); | |
479 _mm_store_ps(&time_out[i], m1); | |
480 } | |
481 for (i = 0; i < nshort; i+=4) | |
482 { | |
483 __m128 m1 = _mm_load_ps(&transf_buf[nshort*0+i]); | |
484 __m128 m2 = _mm_load_ps(&window_short_prev[i]); | |
485 __m128 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+i]); | |
486 | |
487 __m128 m4 = _mm_mul_ps(m1, m2); | |
488 m4 = _mm_add_ps(m4, m3); | |
489 | |
490 _mm_store_ps(&time_out[nflat_ls+i], m4); | |
491 } | |
492 for (i = 0; i < nshort; i+=4) | |
493 { | |
494 __m128 m1, m2, m3, m4, m5, m6, m7, m8; | |
495 m1 = _mm_load_ps(&transf_buf[nshort*1+i]); | |
496 m2 = _mm_load_ps(&window_short[nshort-4-i]); | |
497 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*1+i]); | |
498 m6 = _mm_load_ps(&transf_buf[nshort*2+i]); | |
499 m7 = _mm_load_ps(&window_short[i]); | |
500 | |
501 m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); | |
502 | |
503 m4 = _mm_mul_ps(m1, m5); | |
504 m8 = _mm_mul_ps(m6, m7); | |
505 m4 = _mm_add_ps(m4, m3); | |
506 m4 = _mm_add_ps(m4, m8); | |
507 | |
508 _mm_store_ps(&time_out[nflat_ls+1*nshort+i], m4); | |
509 } | |
510 for (i = 0; i < nshort; i+=4) | |
511 { | |
512 __m128 m1, m2, m3, m4, m5, m6, m7, m8; | |
513 m1 = _mm_load_ps(&transf_buf[nshort*3+i]); | |
514 m2 = _mm_load_ps(&window_short[nshort-4-i]); | |
515 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*2+i]); | |
516 m6 = _mm_load_ps(&transf_buf[nshort*4+i]); | |
517 m7 = _mm_load_ps(&window_short[i]); | |
518 | |
519 m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); | |
520 | |
521 m4 = _mm_mul_ps(m1, m5); | |
522 m8 = _mm_mul_ps(m6, m7); | |
523 m4 = _mm_add_ps(m4, m3); | |
524 m4 = _mm_add_ps(m4, m8); | |
525 | |
526 _mm_store_ps(&time_out[nflat_ls+2*nshort+i], m4); | |
527 } | |
528 for (i = 0; i < nshort; i+=4) | |
529 { | |
530 __m128 m1, m2, m3, m4, m5, m6, m7, m8; | |
531 m1 = _mm_load_ps(&transf_buf[nshort*5+i]); | |
532 m2 = _mm_load_ps(&window_short[nshort-4-i]); | |
533 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*3+i]); | |
534 m6 = _mm_load_ps(&transf_buf[nshort*6+i]); | |
535 m7 = _mm_load_ps(&window_short[i]); | |
536 | |
537 m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); | |
538 | |
539 m4 = _mm_mul_ps(m1, m5); | |
540 m8 = _mm_mul_ps(m6, m7); | |
541 m4 = _mm_add_ps(m4, m3); | |
542 m4 = _mm_add_ps(m4, m8); | |
543 | |
544 _mm_store_ps(&time_out[nflat_ls+3*nshort+i], m4); | |
545 } | |
546 for(i = 0; i < trans; i+=4) | |
10725 | 547 { |
12527 | 548 __m128 m1, m2, m3, m4, m5, m6, m7, m8; |
549 m1 = _mm_load_ps(&transf_buf[nshort*7+i]); | |
550 m2 = _mm_load_ps(&window_short[nshort-4-i]); | |
551 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort*4+i]); | |
552 m6 = _mm_load_ps(&transf_buf[nshort*8+i]); | |
553 m7 = _mm_load_ps(&window_short[i]); | |
554 | |
555 m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); | |
556 | |
557 m4 = _mm_mul_ps(m1, m5); | |
558 m8 = _mm_mul_ps(m6, m7); | |
559 m4 = _mm_add_ps(m4, m3); | |
560 m4 = _mm_add_ps(m4, m8); | |
561 | |
562 _mm_store_ps(&time_out[nflat_ls+4*nshort+i], m4); | |
563 } | |
564 for (i = trans; i < nshort; i+=4) | |
565 { | |
566 __m128 m1, m2, m3, m4, m5, m6, m7, m8; | |
567 m1 = _mm_load_ps(&transf_buf[nshort*7+i]); | |
568 m2 = _mm_load_ps(&window_short[nshort-4-i]); | |
569 m6 = _mm_load_ps(&transf_buf[nshort*8+i]); | |
570 m7 = _mm_load_ps(&window_short[i]); | |
571 | |
572 m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); | |
573 | |
574 m4 = _mm_mul_ps(m1, m5); | |
575 m8 = _mm_mul_ps(m6, m7); | |
576 m3 = _mm_add_ps(m4, m8); | |
577 | |
578 _mm_store_ps(&time_out[nflat_ls+4*nshort+i], m3); | |
579 } | |
580 for (i = 0; i < nshort; i+=4) | |
581 { | |
582 __m128 m1, m2, m3, m4, m5, m6, m7, m8; | |
583 m1 = _mm_load_ps(&transf_buf[nshort*9+i]); | |
584 m2 = _mm_load_ps(&window_short[nshort-4-i]); | |
585 m6 = _mm_load_ps(&transf_buf[nshort*10+i]); | |
586 m7 = _mm_load_ps(&window_short[i]); | |
587 | |
588 m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); | |
589 | |
590 m4 = _mm_mul_ps(m1, m5); | |
591 m8 = _mm_mul_ps(m6, m7); | |
592 m3 = _mm_add_ps(m4, m8); | |
593 | |
594 _mm_store_ps(&time_out[nflat_ls+5*nshort+i], m3); | |
10725 | 595 } |
12527 | 596 for (i = 0; i < nshort; i+=4) |
597 { | |
598 __m128 m1, m2, m3, m4, m5, m6, m7, m8; | |
599 m1 = _mm_load_ps(&transf_buf[nshort*11+i]); | |
600 m2 = _mm_load_ps(&window_short[nshort-4-i]); | |
601 m6 = _mm_load_ps(&transf_buf[nshort*12+i]); | |
602 m7 = _mm_load_ps(&window_short[i]); | |
603 | |
604 m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); | |
605 | |
606 m4 = _mm_mul_ps(m1, m5); | |
607 m8 = _mm_mul_ps(m6, m7); | |
608 m3 = _mm_add_ps(m4, m8); | |
609 | |
610 _mm_store_ps(&time_out[nflat_ls+6*nshort+i], m3); | |
611 } | |
612 for (i = 0; i < nshort; i+=4) | |
613 { | |
614 __m128 m1, m2, m3, m4, m5, m6, m7, m8; | |
615 m1 = _mm_load_ps(&transf_buf[nshort*13+i]); | |
616 m2 = _mm_load_ps(&window_short[nshort-4-i]); | |
617 m6 = _mm_load_ps(&transf_buf[nshort*14+i]); | |
618 m7 = _mm_load_ps(&window_short[i]); | |
619 | |
620 m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); | |
621 | |
622 m4 = _mm_mul_ps(m1, m5); | |
623 m8 = _mm_mul_ps(m6, m7); | |
624 m3 = _mm_add_ps(m4, m8); | |
625 | |
626 _mm_store_ps(&time_out[nflat_ls+7*nshort+i], m3); | |
627 } | |
628 for (i = 0; i < nshort; i+=4) | |
629 { | |
630 __m128 m1, m2, m3, m5; | |
631 m1 = _mm_load_ps(&transf_buf[nshort*15+i]); | |
632 m2 = _mm_load_ps(&window_short[nshort-4-i]); | |
633 | |
634 m5 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); | |
635 | |
636 m3 = _mm_mul_ps(m1, m5); | |
637 | |
638 _mm_store_ps(&time_out[nflat_ls+8*nshort+i], m3); | |
639 } | |
640 for (i = 0; i < nflat_ls; i+=4) | |
641 { | |
642 __m128 m1 = _mm_setzero_ps(); | |
643 _mm_store_ps(&time_out[nlong+nflat_ls+nshort+i], m1); | |
644 } | |
10725 | 645 break; |
646 | |
647 case LONG_STOP_SEQUENCE: | |
12527 | 648 imdct_long_sse(fb, freq_in, transf_buf, 2*nlong); |
649 for (i = 0; i < nflat_ls; i+=4) | |
650 { | |
651 __m128 m1 = _mm_load_ps(&time_out[nlong+i]); | |
652 _mm_store_ps(&time_out[i], m1); | |
653 } | |
654 for (i = 0; i < nshort; i+=4) | |
655 { | |
656 __m128 m1 = _mm_load_ps(&transf_buf[nflat_ls+i]); | |
657 __m128 m2 = _mm_load_ps(&window_short_prev[i]); | |
658 __m128 m3 = _mm_load_ps(&time_out[nlong+nflat_ls+i]); | |
659 | |
660 __m128 m4 = _mm_mul_ps(m1, m2); | |
661 m4 = _mm_add_ps(m4, m3); | |
662 | |
663 _mm_store_ps(&time_out[nflat_ls+i], m4); | |
664 } | |
665 for (i = 0; i < nflat_ls; i+=4) | |
666 { | |
667 __m128 m1 = _mm_load_ps(&transf_buf[nflat_ls+nshort+i]); | |
668 __m128 m2 = _mm_load_ps(&time_out[nlong+nflat_ls+nshort+i]); | |
669 | |
670 __m128 m3 = _mm_add_ps(m1, m2); | |
671 | |
672 _mm_store_ps(&time_out[nflat_ls+nshort+i], m3); | |
673 } | |
674 for (i = 0; i < nlong; i+=4) | |
675 { | |
676 __m128 m1 = _mm_load_ps(&transf_buf[nlong+i]); | |
677 __m128 m2 = _mm_load_ps(&window_long[nlong-4-i]); | |
678 __m128 m3, m4; | |
679 | |
680 m3 = _mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0, 1, 2, 3)); | |
681 | |
682 m4 = _mm_mul_ps(m1, m3); | |
683 | |
684 _mm_store_ps(&time_out[nlong+i], m4); | |
685 } | |
10725 | 686 break; |
687 } | |
688 | |
12527 | 689 #ifdef PROFILE |
690 count = faad_get_ts() - count; | |
691 fb->cycles += count; | |
692 #endif | |
10725 | 693 } |
12527 | 694 #endif |
10725 | 695 |
696 #ifdef LTP_DEC | |
697 /* only works for LTP -> no overlapping, no short blocks */ | |
698 void filter_bank_ltp(fb_info *fb, uint8_t window_sequence, uint8_t window_shape, | |
699 uint8_t window_shape_prev, real_t *in_data, real_t *out_mdct, | |
700 uint8_t object_type, uint16_t frame_len) | |
701 { | |
702 int16_t i; | |
12527 | 703 ALIGN real_t windowed_buf[2*1024] = {0}; |
10725 | 704 |
12527 | 705 const real_t *window_long = NULL; |
706 const real_t *window_long_prev = NULL; | |
707 const real_t *window_short = NULL; | |
708 const real_t *window_short_prev = NULL; | |
10725 | 709 |
710 uint16_t nlong = frame_len; | |
711 uint16_t nshort = frame_len/8; | |
712 uint16_t nflat_ls = (nlong-nshort)/2; | |
713 | |
714 assert(window_sequence != EIGHT_SHORT_SEQUENCE); | |
715 | |
716 #ifdef LD_DEC | |
717 if (object_type == LD) | |
718 { | |
719 window_long = fb->ld_window[window_shape]; | |
720 window_long_prev = fb->ld_window[window_shape_prev]; | |
721 } else { | |
722 #endif | |
723 window_long = fb->long_window[window_shape]; | |
724 window_long_prev = fb->long_window[window_shape_prev]; | |
725 window_short = fb->short_window[window_shape]; | |
726 window_short_prev = fb->short_window[window_shape_prev]; | |
727 #ifdef LD_DEC | |
728 } | |
729 #endif | |
730 | |
731 switch(window_sequence) | |
732 { | |
733 case ONLY_LONG_SEQUENCE: | |
734 for (i = nlong-1; i >= 0; i--) | |
735 { | |
12527 | 736 windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]); |
737 windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]); | |
10725 | 738 } |
739 mdct(fb, windowed_buf, out_mdct, 2*nlong); | |
740 break; | |
741 | |
742 case LONG_START_SEQUENCE: | |
743 for (i = 0; i < nlong; i++) | |
12527 | 744 windowed_buf[i] = MUL_F(in_data[i], window_long_prev[i]); |
10725 | 745 for (i = 0; i < nflat_ls; i++) |
746 windowed_buf[i+nlong] = in_data[i+nlong]; | |
747 for (i = 0; i < nshort; i++) | |
12527 | 748 windowed_buf[i+nlong+nflat_ls] = MUL_F(in_data[i+nlong+nflat_ls], window_short[nshort-1-i]); |
10725 | 749 for (i = 0; i < nflat_ls; i++) |
750 windowed_buf[i+nlong+nflat_ls+nshort] = 0; | |
751 mdct(fb, windowed_buf, out_mdct, 2*nlong); | |
752 break; | |
753 | |
754 case LONG_STOP_SEQUENCE: | |
755 for (i = 0; i < nflat_ls; i++) | |
756 windowed_buf[i] = 0; | |
757 for (i = 0; i < nshort; i++) | |
12527 | 758 windowed_buf[i+nflat_ls] = MUL_F(in_data[i+nflat_ls], window_short_prev[i]); |
10725 | 759 for (i = 0; i < nflat_ls; i++) |
760 windowed_buf[i+nflat_ls+nshort] = in_data[i+nflat_ls+nshort]; | |
761 for (i = 0; i < nlong; i++) | |
12527 | 762 windowed_buf[i+nlong] = MUL_F(in_data[i+nlong], window_long[nlong-1-i]); |
10725 | 763 mdct(fb, windowed_buf, out_mdct, 2*nlong); |
764 break; | |
765 } | |
766 } | |
767 #endif |