Mercurial > libavcodec.hg
annotate wmadec.c @ 3604:dad0296d4522 libavcodec
replace MULL with asm too, no significnat speedup but its probably better to not take any chances, some versions of gcc will almost certainly mess it up too if they can
author | michael |
---|---|
date | Tue, 22 Aug 2006 12:07:02 +0000 |
parents | 6a358dccf2ab |
children | e28285ddde8d |
rev | line source |
---|---|
783 | 1 /* |
2 * WMA compatible decoder | |
3 * Copyright (c) 2002 The FFmpeg Project. | |
4 * | |
5 * This library is free software; you can redistribute it and/or | |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
9 * | |
10 * This library is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 * Lesser General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU Lesser General Public | |
16 * License along with this library; if not, write to the Free Software | |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3022
diff
changeset
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
783 | 18 */ |
1106 | 19 |
20 /** | |
21 * @file wmadec.c | |
22 * WMA compatible decoder. | |
1967
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
23 * This decoder handles Microsoft Windows Media Audio data, versions 1 & 2. |
2967 | 24 * WMA v1 is identified by audio format 0x160 in Microsoft media files |
1967
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
25 * (ASF/AVI/WAV). WMA v2 is identified by audio format 0x161. |
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
26 * |
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
27 * To use this decoder, a calling application must supply the extra data |
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
28 * bytes provided with the WMA data. These are the extra, codec-specific |
2967 | 29 * bytes at the end of a WAVEFORMATEX data structure. Transmit these bytes |
30 * to the decoder using the extradata[_size] fields in AVCodecContext. There | |
1967
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
31 * should be 4 extra bytes for v1 data and 6 extra bytes for v2 data. |
1106 | 32 */ |
33 | |
783 | 34 #include "avcodec.h" |
2398
582e635cfa08
common.c -> bitstream.c (and the single non bitstream func -> utils.c)
michael
parents:
2370
diff
changeset
|
35 #include "bitstream.h" |
783 | 36 #include "dsputil.h" |
37 | |
38 /* size of blocks */ | |
39 #define BLOCK_MIN_BITS 7 | |
40 #define BLOCK_MAX_BITS 11 | |
41 #define BLOCK_MAX_SIZE (1 << BLOCK_MAX_BITS) | |
42 | |
43 #define BLOCK_NB_SIZES (BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) | |
44 | |
45 /* XXX: find exact max size */ | |
46 #define HIGH_BAND_MAX_SIZE 16 | |
47 | |
48 #define NB_LSP_COEFS 10 | |
49 | |
817 | 50 /* XXX: is it a suitable value ? */ |
2775
f3cdd51c9e16
WMA MAX_CODED_SUPERFRAME_SIZE too small patch by (Mark Weaver: mark-clist, npsl co uk)
michael
parents:
2398
diff
changeset
|
51 #define MAX_CODED_SUPERFRAME_SIZE 16384 |
783 | 52 |
53 #define MAX_CHANNELS 2 | |
54 | |
55 #define NOISE_TAB_SIZE 8192 | |
56 | |
57 #define LSP_POW_BITS 7 | |
58 | |
3022 | 59 #define VLCBITS 9 |
3113 | 60 #define VLCMAX ((22+VLCBITS-1)/VLCBITS) |
61 | |
62 #define EXPVLCBITS 8 | |
63 #define EXPMAX ((19+EXPVLCBITS-1)/EXPVLCBITS) | |
64 | |
65 #define HGAINVLCBITS 9 | |
66 #define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS) | |
3022 | 67 |
783 | 68 typedef struct WMADecodeContext { |
69 GetBitContext gb; | |
70 int sample_rate; | |
71 int nb_channels; | |
72 int bit_rate; | |
73 int version; /* 1 = 0x160 (WMAV1), 2 = 0x161 (WMAV2) */ | |
74 int block_align; | |
75 int use_bit_reservoir; | |
76 int use_variable_block_len; | |
77 int use_exp_vlc; /* exponent coding: 0 = lsp, 1 = vlc + delta */ | |
78 int use_noise_coding; /* true if perceptual noise is added */ | |
79 int byte_offset_bits; | |
80 VLC exp_vlc; | |
81 int exponent_sizes[BLOCK_NB_SIZES]; | |
82 uint16_t exponent_bands[BLOCK_NB_SIZES][25]; | |
83 int high_band_start[BLOCK_NB_SIZES]; /* index of first coef in high band */ | |
84 int coefs_start; /* first coded coef */ | |
85 int coefs_end[BLOCK_NB_SIZES]; /* max number of coded coefficients */ | |
86 int exponent_high_sizes[BLOCK_NB_SIZES]; | |
2967 | 87 int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE]; |
783 | 88 VLC hgain_vlc; |
2967 | 89 |
783 | 90 /* coded values in high bands */ |
91 int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE]; | |
92 int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE]; | |
93 | |
94 /* there are two possible tables for spectral coefficients */ | |
95 VLC coef_vlc[2]; | |
96 uint16_t *run_table[2]; | |
97 uint16_t *level_table[2]; | |
98 /* frame info */ | |
99 int frame_len; /* frame length in samples */ | |
100 int frame_len_bits; /* frame_len = 1 << frame_len_bits */ | |
101 int nb_block_sizes; /* number of block sizes */ | |
102 /* block info */ | |
103 int reset_block_lengths; | |
104 int block_len_bits; /* log2 of current block length */ | |
105 int next_block_len_bits; /* log2 of next block length */ | |
106 int prev_block_len_bits; /* log2 of prev block length */ | |
107 int block_len; /* block length in samples */ | |
108 int block_num; /* block number in current frame */ | |
109 int block_pos; /* current position in frame */ | |
110 uint8_t ms_stereo; /* true if mid/side stereo mode */ | |
111 uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */ | |
3089 | 112 DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]); |
783 | 113 float max_exponent[MAX_CHANNELS]; |
114 int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; | |
3089 | 115 DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]); |
783 | 116 MDCTContext mdct_ctx[BLOCK_NB_SIZES]; |
1031
19de1445beb2
use av_malloc() functions - added av_strdup and av_realloc()
bellard
parents:
1025
diff
changeset
|
117 float *windows[BLOCK_NB_SIZES]; |
3089 | 118 DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */ |
783 | 119 /* output buffer for one frame and the last for IMDCT windowing */ |
3089 | 120 DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]); |
783 | 121 /* last frame info */ |
122 uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */ | |
123 int last_bitoffset; | |
124 int last_superframe_len; | |
125 float noise_table[NOISE_TAB_SIZE]; | |
126 int noise_index; | |
127 float noise_mult; /* XXX: suppress that and integrate it in the noise array */ | |
128 /* lsp_to_curve tables */ | |
129 float lsp_cos_table[BLOCK_MAX_SIZE]; | |
130 float lsp_pow_e_table[256]; | |
131 float lsp_pow_m_table1[(1 << LSP_POW_BITS)]; | |
132 float lsp_pow_m_table2[(1 << LSP_POW_BITS)]; | |
3592
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
133 DSPContext dsp; |
1343 | 134 |
135 #ifdef TRACE | |
136 int frame_count; | |
137 #endif | |
783 | 138 } WMADecodeContext; |
139 | |
140 typedef struct CoefVLCTable { | |
141 int n; /* total number of codes */ | |
142 const uint32_t *huffcodes; /* VLC bit values */ | |
143 const uint8_t *huffbits; /* VLC bit size */ | |
144 const uint16_t *levels; /* table to build run/level tables */ | |
145 } CoefVLCTable; | |
146 | |
147 static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len); | |
148 | |
149 #include "wmadata.h" | |
150 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
151 #ifdef TRACE |
783 | 152 static void dump_shorts(const char *name, const short *tab, int n) |
153 { | |
154 int i; | |
155 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
156 tprintf("%s[%d]:\n", name, n); |
783 | 157 for(i=0;i<n;i++) { |
158 if ((i & 7) == 0) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
159 tprintf("%4d: ", i); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
160 tprintf(" %5d.0", tab[i]); |
783 | 161 if ((i & 7) == 7) |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
162 tprintf("\n"); |
783 | 163 } |
164 } | |
165 | |
166 static void dump_floats(const char *name, int prec, const float *tab, int n) | |
167 { | |
168 int i; | |
169 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
170 tprintf("%s[%d]:\n", name, n); |
783 | 171 for(i=0;i<n;i++) { |
172 if ((i & 7) == 0) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
173 tprintf("%4d: ", i); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
174 tprintf(" %8.*f", prec, tab[i]); |
783 | 175 if ((i & 7) == 7) |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
176 tprintf("\n"); |
783 | 177 } |
178 if ((i & 7) != 0) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
179 tprintf("\n"); |
783 | 180 } |
181 #endif | |
182 | |
183 /* XXX: use same run/length optimization as mpeg decoders */ | |
2967 | 184 static void init_coef_vlc(VLC *vlc, |
783 | 185 uint16_t **prun_table, uint16_t **plevel_table, |
186 const CoefVLCTable *vlc_table) | |
187 { | |
188 int n = vlc_table->n; | |
189 const uint8_t *table_bits = vlc_table->huffbits; | |
190 const uint32_t *table_codes = vlc_table->huffcodes; | |
191 const uint16_t *levels_table = vlc_table->levels; | |
192 uint16_t *run_table, *level_table; | |
193 const uint16_t *p; | |
194 int i, l, j, level; | |
195 | |
3113 | 196 init_vlc(vlc, VLCBITS, n, table_bits, 1, 1, table_codes, 4, 4, 0); |
783 | 197 |
1031
19de1445beb2
use av_malloc() functions - added av_strdup and av_realloc()
bellard
parents:
1025
diff
changeset
|
198 run_table = av_malloc(n * sizeof(uint16_t)); |
19de1445beb2
use av_malloc() functions - added av_strdup and av_realloc()
bellard
parents:
1025
diff
changeset
|
199 level_table = av_malloc(n * sizeof(uint16_t)); |
783 | 200 p = levels_table; |
201 i = 2; | |
202 level = 1; | |
203 while (i < n) { | |
204 l = *p++; | |
205 for(j=0;j<l;j++) { | |
206 run_table[i] = j; | |
207 level_table[i] = level; | |
208 i++; | |
209 } | |
210 level++; | |
211 } | |
212 *prun_table = run_table; | |
213 *plevel_table = level_table; | |
214 } | |
215 | |
216 static int wma_decode_init(AVCodecContext * avctx) | |
217 { | |
218 WMADecodeContext *s = avctx->priv_data; | |
219 int i, flags1, flags2; | |
220 float *window; | |
221 uint8_t *extradata; | |
3235 | 222 float bps1, high_freq; |
223 volatile float bps; | |
783 | 224 int sample_rate1; |
225 int coef_vlc_table; | |
2967 | 226 |
783 | 227 s->sample_rate = avctx->sample_rate; |
228 s->nb_channels = avctx->channels; | |
229 s->bit_rate = avctx->bit_rate; | |
230 s->block_align = avctx->block_align; | |
231 | |
3592
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
232 dsputil_init(&s->dsp, avctx); |
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
233 |
808
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
234 if (avctx->codec->id == CODEC_ID_WMAV1) { |
783 | 235 s->version = 1; |
236 } else { | |
237 s->version = 2; | |
238 } | |
2967 | 239 |
783 | 240 /* extract flag infos */ |
241 flags1 = 0; | |
242 flags2 = 0; | |
243 extradata = avctx->extradata; | |
244 if (s->version == 1 && avctx->extradata_size >= 4) { | |
245 flags1 = extradata[0] | (extradata[1] << 8); | |
246 flags2 = extradata[2] | (extradata[3] << 8); | |
247 } else if (s->version == 2 && avctx->extradata_size >= 6) { | |
2967 | 248 flags1 = extradata[0] | (extradata[1] << 8) | |
783 | 249 (extradata[2] << 16) | (extradata[3] << 24); |
250 flags2 = extradata[4] | (extradata[5] << 8); | |
251 } | |
252 s->use_exp_vlc = flags2 & 0x0001; | |
253 s->use_bit_reservoir = flags2 & 0x0002; | |
254 s->use_variable_block_len = flags2 & 0x0004; | |
255 | |
256 /* compute MDCT block size */ | |
257 if (s->sample_rate <= 16000) { | |
258 s->frame_len_bits = 9; | |
2967 | 259 } else if (s->sample_rate <= 22050 || |
795
55add0e7eafb
avoid name clash - fixed again block size selection
bellard
parents:
785
diff
changeset
|
260 (s->sample_rate <= 32000 && s->version == 1)) { |
783 | 261 s->frame_len_bits = 10; |
262 } else { | |
263 s->frame_len_bits = 11; | |
264 } | |
265 s->frame_len = 1 << s->frame_len_bits; | |
266 if (s->use_variable_block_len) { | |
808
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
267 int nb_max, nb; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
268 nb = ((flags2 >> 3) & 3) + 1; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
269 if ((s->bit_rate / s->nb_channels) >= 32000) |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
270 nb += 2; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
271 nb_max = s->frame_len_bits - BLOCK_MIN_BITS; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
272 if (nb > nb_max) |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
273 nb = nb_max; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
274 s->nb_block_sizes = nb + 1; |
783 | 275 } else { |
276 s->nb_block_sizes = 1; | |
277 } | |
278 | |
279 /* init rate dependant parameters */ | |
280 s->use_noise_coding = 1; | |
3235 | 281 high_freq = s->sample_rate * 0.5; |
783 | 282 |
283 /* if version 2, then the rates are normalized */ | |
284 sample_rate1 = s->sample_rate; | |
285 if (s->version == 2) { | |
2967 | 286 if (sample_rate1 >= 44100) |
783 | 287 sample_rate1 = 44100; |
2967 | 288 else if (sample_rate1 >= 22050) |
783 | 289 sample_rate1 = 22050; |
2967 | 290 else if (sample_rate1 >= 16000) |
783 | 291 sample_rate1 = 16000; |
2967 | 292 else if (sample_rate1 >= 11025) |
783 | 293 sample_rate1 = 11025; |
2967 | 294 else if (sample_rate1 >= 8000) |
783 | 295 sample_rate1 = 8000; |
296 } | |
297 | |
298 bps = (float)s->bit_rate / (float)(s->nb_channels * s->sample_rate); | |
2992 | 299 s->byte_offset_bits = av_log2((int)(bps * s->frame_len / 8.0 + 0.5)) + 2; |
783 | 300 |
301 /* compute high frequency value and choose if noise coding should | |
302 be activated */ | |
303 bps1 = bps; | |
304 if (s->nb_channels == 2) | |
305 bps1 = bps * 1.6; | |
306 if (sample_rate1 == 44100) { | |
307 if (bps1 >= 0.61) | |
308 s->use_noise_coding = 0; | |
309 else | |
3235 | 310 high_freq = high_freq * 0.4; |
783 | 311 } else if (sample_rate1 == 22050) { |
312 if (bps1 >= 1.16) | |
313 s->use_noise_coding = 0; | |
2967 | 314 else if (bps1 >= 0.72) |
3235 | 315 high_freq = high_freq * 0.7; |
783 | 316 else |
3235 | 317 high_freq = high_freq * 0.6; |
783 | 318 } else if (sample_rate1 == 16000) { |
319 if (bps > 0.5) | |
3235 | 320 high_freq = high_freq * 0.5; |
783 | 321 else |
3235 | 322 high_freq = high_freq * 0.3; |
783 | 323 } else if (sample_rate1 == 11025) { |
3235 | 324 high_freq = high_freq * 0.7; |
783 | 325 } else if (sample_rate1 == 8000) { |
326 if (bps <= 0.625) { | |
3235 | 327 high_freq = high_freq * 0.5; |
783 | 328 } else if (bps > 0.75) { |
329 s->use_noise_coding = 0; | |
330 } else { | |
3235 | 331 high_freq = high_freq * 0.65; |
783 | 332 } |
333 } else { | |
334 if (bps >= 0.8) { | |
3235 | 335 high_freq = high_freq * 0.75; |
783 | 336 } else if (bps >= 0.6) { |
3235 | 337 high_freq = high_freq * 0.6; |
783 | 338 } else { |
3235 | 339 high_freq = high_freq * 0.5; |
783 | 340 } |
341 } | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
342 dprintf("flags1=0x%x flags2=0x%x\n", flags1, flags2); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
343 dprintf("version=%d channels=%d sample_rate=%d bitrate=%d block_align=%d\n", |
2967 | 344 s->version, s->nb_channels, s->sample_rate, s->bit_rate, |
783 | 345 s->block_align); |
3235 | 346 dprintf("bps=%f bps1=%f high_freq=%f bitoffset=%d\n", |
347 bps, bps1, high_freq, s->byte_offset_bits); | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
348 dprintf("use_noise_coding=%d use_exp_vlc=%d nb_block_sizes=%d\n", |
808
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
349 s->use_noise_coding, s->use_exp_vlc, s->nb_block_sizes); |
783 | 350 |
351 /* compute the scale factor band sizes for each MDCT block size */ | |
352 { | |
353 int a, b, pos, lpos, k, block_len, i, j, n; | |
354 const uint8_t *table; | |
2967 | 355 |
783 | 356 if (s->version == 1) { |
357 s->coefs_start = 3; | |
358 } else { | |
359 s->coefs_start = 0; | |
360 } | |
361 for(k = 0; k < s->nb_block_sizes; k++) { | |
362 block_len = s->frame_len >> k; | |
363 | |
364 if (s->version == 1) { | |
365 lpos = 0; | |
366 for(i=0;i<25;i++) { | |
367 a = wma_critical_freqs[i]; | |
368 b = s->sample_rate; | |
369 pos = ((block_len * 2 * a) + (b >> 1)) / b; | |
2967 | 370 if (pos > block_len) |
783 | 371 pos = block_len; |
372 s->exponent_bands[0][i] = pos - lpos; | |
373 if (pos >= block_len) { | |
374 i++; | |
375 break; | |
376 } | |
377 lpos = pos; | |
378 } | |
379 s->exponent_sizes[0] = i; | |
380 } else { | |
381 /* hardcoded tables */ | |
382 table = NULL; | |
383 a = s->frame_len_bits - BLOCK_MIN_BITS - k; | |
384 if (a < 3) { | |
385 if (s->sample_rate >= 44100) | |
386 table = exponent_band_44100[a]; | |
387 else if (s->sample_rate >= 32000) | |
388 table = exponent_band_32000[a]; | |
389 else if (s->sample_rate >= 22050) | |
390 table = exponent_band_22050[a]; | |
391 } | |
392 if (table) { | |
393 n = *table++; | |
394 for(i=0;i<n;i++) | |
395 s->exponent_bands[k][i] = table[i]; | |
396 s->exponent_sizes[k] = n; | |
397 } else { | |
398 j = 0; | |
399 lpos = 0; | |
400 for(i=0;i<25;i++) { | |
401 a = wma_critical_freqs[i]; | |
402 b = s->sample_rate; | |
403 pos = ((block_len * 2 * a) + (b << 1)) / (4 * b); | |
404 pos <<= 2; | |
2967 | 405 if (pos > block_len) |
783 | 406 pos = block_len; |
407 if (pos > lpos) | |
408 s->exponent_bands[k][j++] = pos - lpos; | |
409 if (pos >= block_len) | |
410 break; | |
411 lpos = pos; | |
412 } | |
413 s->exponent_sizes[k] = j; | |
414 } | |
415 } | |
416 | |
417 /* max number of coefs */ | |
418 s->coefs_end[k] = (s->frame_len - ((s->frame_len * 9) / 100)) >> k; | |
419 /* high freq computation */ | |
3235 | 420 s->high_band_start[k] = (int)((block_len * 2 * high_freq) / |
421 s->sample_rate + 0.5); | |
783 | 422 n = s->exponent_sizes[k]; |
423 j = 0; | |
424 pos = 0; | |
425 for(i=0;i<n;i++) { | |
426 int start, end; | |
427 start = pos; | |
428 pos += s->exponent_bands[k][i]; | |
429 end = pos; | |
430 if (start < s->high_band_start[k]) | |
431 start = s->high_band_start[k]; | |
432 if (end > s->coefs_end[k]) | |
433 end = s->coefs_end[k]; | |
434 if (end > start) | |
435 s->exponent_high_bands[k][j++] = end - start; | |
436 } | |
437 s->exponent_high_sizes[k] = j; | |
438 #if 0 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
439 tprintf("%5d: coefs_end=%d high_band_start=%d nb_high_bands=%d: ", |
2967 | 440 s->frame_len >> k, |
783 | 441 s->coefs_end[k], |
442 s->high_band_start[k], | |
443 s->exponent_high_sizes[k]); | |
444 for(j=0;j<s->exponent_high_sizes[k];j++) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
445 tprintf(" %d", s->exponent_high_bands[k][j]); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
446 tprintf("\n"); |
783 | 447 #endif |
448 } | |
449 } | |
450 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
451 #ifdef TRACE |
783 | 452 { |
453 int i, j; | |
454 for(i = 0; i < s->nb_block_sizes; i++) { | |
2967 | 455 tprintf("%5d: n=%2d:", |
456 s->frame_len >> i, | |
783 | 457 s->exponent_sizes[i]); |
458 for(j=0;j<s->exponent_sizes[i];j++) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
459 tprintf(" %d", s->exponent_bands[i][j]); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
460 tprintf("\n"); |
783 | 461 } |
462 } | |
463 #endif | |
464 | |
465 /* init MDCT */ | |
466 for(i = 0; i < s->nb_block_sizes; i++) | |
795
55add0e7eafb
avoid name clash - fixed again block size selection
bellard
parents:
785
diff
changeset
|
467 ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1); |
2967 | 468 |
783 | 469 /* init MDCT windows : simple sinus window */ |
470 for(i = 0; i < s->nb_block_sizes; i++) { | |
471 int n, j; | |
472 float alpha; | |
473 n = 1 << (s->frame_len_bits - i); | |
474 window = av_malloc(sizeof(float) * n); | |
475 alpha = M_PI / (2.0 * n); | |
476 for(j=0;j<n;j++) { | |
477 window[n - j - 1] = sin((j + 0.5) * alpha); | |
478 } | |
479 s->windows[i] = window; | |
480 } | |
481 | |
482 s->reset_block_lengths = 1; | |
2967 | 483 |
783 | 484 if (s->use_noise_coding) { |
485 | |
486 /* init the noise generator */ | |
487 if (s->use_exp_vlc) | |
488 s->noise_mult = 0.02; | |
489 else | |
490 s->noise_mult = 0.04; | |
2967 | 491 |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
492 #ifdef TRACE |
783 | 493 for(i=0;i<NOISE_TAB_SIZE;i++) |
494 s->noise_table[i] = 1.0 * s->noise_mult; | |
495 #else | |
496 { | |
497 unsigned int seed; | |
498 float norm; | |
499 seed = 1; | |
500 norm = (1.0 / (float)(1LL << 31)) * sqrt(3) * s->noise_mult; | |
501 for(i=0;i<NOISE_TAB_SIZE;i++) { | |
502 seed = seed * 314159 + 1; | |
503 s->noise_table[i] = (float)((int)seed) * norm; | |
504 } | |
505 } | |
506 #endif | |
3113 | 507 init_vlc(&s->hgain_vlc, HGAINVLCBITS, sizeof(hgain_huffbits), |
783 | 508 hgain_huffbits, 1, 1, |
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2180
diff
changeset
|
509 hgain_huffcodes, 2, 2, 0); |
783 | 510 } |
511 | |
512 if (s->use_exp_vlc) { | |
3113 | 513 init_vlc(&s->exp_vlc, EXPVLCBITS, sizeof(scale_huffbits), |
783 | 514 scale_huffbits, 1, 1, |
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2180
diff
changeset
|
515 scale_huffcodes, 4, 4, 0); |
783 | 516 } else { |
517 wma_lsp_to_curve_init(s, s->frame_len); | |
518 } | |
519 | |
520 /* choose the VLC tables for the coefficients */ | |
521 coef_vlc_table = 2; | |
522 if (s->sample_rate >= 32000) { | |
523 if (bps1 < 0.72) | |
524 coef_vlc_table = 0; | |
525 else if (bps1 < 1.16) | |
526 coef_vlc_table = 1; | |
527 } | |
528 | |
529 init_coef_vlc(&s->coef_vlc[0], &s->run_table[0], &s->level_table[0], | |
530 &coef_vlcs[coef_vlc_table * 2]); | |
531 init_coef_vlc(&s->coef_vlc[1], &s->run_table[1], &s->level_table[1], | |
532 &coef_vlcs[coef_vlc_table * 2 + 1]); | |
533 return 0; | |
534 } | |
535 | |
536 /* interpolate values for a bigger or smaller block. The block must | |
537 have multiple sizes */ | |
538 static void interpolate_array(float *scale, int old_size, int new_size) | |
539 { | |
540 int i, j, jincr, k; | |
541 float v; | |
542 | |
543 if (new_size > old_size) { | |
544 jincr = new_size / old_size; | |
545 j = new_size; | |
546 for(i = old_size - 1; i >=0; i--) { | |
547 v = scale[i]; | |
548 k = jincr; | |
549 do { | |
550 scale[--j] = v; | |
551 } while (--k); | |
552 } | |
553 } else if (new_size < old_size) { | |
554 j = 0; | |
555 jincr = old_size / new_size; | |
556 for(i = 0; i < new_size; i++) { | |
557 scale[i] = scale[j]; | |
558 j += jincr; | |
559 } | |
560 } | |
561 } | |
562 | |
563 /* compute x^-0.25 with an exponent and mantissa table. We use linear | |
564 interpolation to reduce the mantissa table size at a small speed | |
565 expense (linear interpolation approximately doubles the number of | |
566 bits of precision). */ | |
567 static inline float pow_m1_4(WMADecodeContext *s, float x) | |
568 { | |
569 union { | |
570 float f; | |
571 unsigned int v; | |
572 } u, t; | |
573 unsigned int e, m; | |
574 float a, b; | |
575 | |
576 u.f = x; | |
577 e = u.v >> 23; | |
578 m = (u.v >> (23 - LSP_POW_BITS)) & ((1 << LSP_POW_BITS) - 1); | |
579 /* build interpolation scale: 1 <= t < 2. */ | |
580 t.v = ((u.v << LSP_POW_BITS) & ((1 << 23) - 1)) | (127 << 23); | |
581 a = s->lsp_pow_m_table1[m]; | |
582 b = s->lsp_pow_m_table2[m]; | |
583 return s->lsp_pow_e_table[e] * (a + b * t.f); | |
584 } | |
585 | |
586 static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len) | |
2967 | 587 { |
783 | 588 float wdel, a, b; |
589 int i, e, m; | |
590 | |
591 wdel = M_PI / frame_len; | |
592 for(i=0;i<frame_len;i++) | |
593 s->lsp_cos_table[i] = 2.0f * cos(wdel * i); | |
594 | |
595 /* tables for x^-0.25 computation */ | |
596 for(i=0;i<256;i++) { | |
597 e = i - 126; | |
598 s->lsp_pow_e_table[i] = pow(2.0, e * -0.25); | |
599 } | |
600 | |
601 /* NOTE: these two tables are needed to avoid two operations in | |
602 pow_m1_4 */ | |
603 b = 1.0; | |
604 for(i=(1 << LSP_POW_BITS) - 1;i>=0;i--) { | |
605 m = (1 << LSP_POW_BITS) + i; | |
606 a = (float)m * (0.5 / (1 << LSP_POW_BITS)); | |
607 a = pow(a, -0.25); | |
608 s->lsp_pow_m_table1[i] = 2 * a - b; | |
609 s->lsp_pow_m_table2[i] = b - a; | |
610 b = a; | |
611 } | |
612 #if 0 | |
613 for(i=1;i<20;i++) { | |
614 float v, r1, r2; | |
615 v = 5.0 / i; | |
616 r1 = pow_m1_4(s, v); | |
617 r2 = pow(v,-0.25); | |
618 printf("%f^-0.25=%f e=%f\n", v, r1, r2 - r1); | |
619 } | |
620 #endif | |
621 } | |
622 | |
623 /* NOTE: We use the same code as Vorbis here */ | |
624 /* XXX: optimize it further with SSE/3Dnow */ | |
2967 | 625 static void wma_lsp_to_curve(WMADecodeContext *s, |
626 float *out, float *val_max_ptr, | |
783 | 627 int n, float *lsp) |
628 { | |
629 int i, j; | |
630 float p, q, w, v, val_max; | |
631 | |
632 val_max = 0; | |
633 for(i=0;i<n;i++) { | |
634 p = 0.5f; | |
635 q = 0.5f; | |
636 w = s->lsp_cos_table[i]; | |
637 for(j=1;j<NB_LSP_COEFS;j+=2){ | |
638 q *= w - lsp[j - 1]; | |
639 p *= w - lsp[j]; | |
640 } | |
641 p *= p * (2.0f - w); | |
642 q *= q * (2.0f + w); | |
643 v = p + q; | |
644 v = pow_m1_4(s, v); | |
645 if (v > val_max) | |
646 val_max = v; | |
647 out[i] = v; | |
648 } | |
649 *val_max_ptr = val_max; | |
650 } | |
651 | |
652 /* decode exponents coded with LSP coefficients (same idea as Vorbis) */ | |
653 static void decode_exp_lsp(WMADecodeContext *s, int ch) | |
654 { | |
655 float lsp_coefs[NB_LSP_COEFS]; | |
656 int val, i; | |
657 | |
658 for(i = 0; i < NB_LSP_COEFS; i++) { | |
659 if (i == 0 || i >= 8) | |
660 val = get_bits(&s->gb, 3); | |
661 else | |
662 val = get_bits(&s->gb, 4); | |
663 lsp_coefs[i] = lsp_codebook[i][val]; | |
664 } | |
665 | |
666 wma_lsp_to_curve(s, s->exponents[ch], &s->max_exponent[ch], | |
667 s->block_len, lsp_coefs); | |
668 } | |
669 | |
670 /* decode exponents coded with VLC codes */ | |
671 static int decode_exp_vlc(WMADecodeContext *s, int ch) | |
672 { | |
673 int last_exp, n, code; | |
674 const uint16_t *ptr, *band_ptr; | |
675 float v, *q, max_scale, *q_end; | |
2967 | 676 |
783 | 677 band_ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; |
678 ptr = band_ptr; | |
679 q = s->exponents[ch]; | |
680 q_end = q + s->block_len; | |
681 max_scale = 0; | |
682 if (s->version == 1) { | |
683 last_exp = get_bits(&s->gb, 5) + 10; | |
3235 | 684 /* XXX: use a table */ |
685 v = pow(10, last_exp * (1.0 / 16.0)); | |
783 | 686 max_scale = v; |
687 n = *ptr++; | |
688 do { | |
689 *q++ = v; | |
690 } while (--n); | |
691 } | |
692 last_exp = 36; | |
693 while (q < q_end) { | |
3113 | 694 code = get_vlc2(&s->gb, s->exp_vlc.table, EXPVLCBITS, EXPMAX); |
783 | 695 if (code < 0) |
696 return -1; | |
697 /* NOTE: this offset is the same as MPEG4 AAC ! */ | |
698 last_exp += code - 60; | |
3235 | 699 /* XXX: use a table */ |
700 v = pow(10, last_exp * (1.0 / 16.0)); | |
783 | 701 if (v > max_scale) |
702 max_scale = v; | |
703 n = *ptr++; | |
704 do { | |
705 *q++ = v; | |
706 } while (--n); | |
707 } | |
708 s->max_exponent[ch] = max_scale; | |
709 return 0; | |
710 } | |
711 | |
712 /* return 0 if OK. return 1 if last block of frame. return -1 if | |
713 unrecorrable error. */ | |
714 static int wma_decode_block(WMADecodeContext *s) | |
715 { | |
716 int n, v, a, ch, code, bsize; | |
717 int coef_nb_bits, total_gain, parse_exponents; | |
718 float window[BLOCK_MAX_SIZE * 2]; | |
2101 | 719 // XXX: FIXME!! there's a bug somewhere which makes this mandatory under altivec |
720 #ifdef HAVE_ALTIVEC | |
721 volatile int nb_coefs[MAX_CHANNELS] __attribute__((aligned(16))); | |
722 #else | |
783 | 723 int nb_coefs[MAX_CHANNELS]; |
2101 | 724 #endif |
783 | 725 float mdct_norm; |
726 | |
1343 | 727 #ifdef TRACE |
728 tprintf("***decode_block: %d:%d\n", s->frame_count - 1, s->block_num); | |
729 #endif | |
783 | 730 |
731 /* compute current block length */ | |
732 if (s->use_variable_block_len) { | |
733 n = av_log2(s->nb_block_sizes - 1) + 1; | |
2967 | 734 |
783 | 735 if (s->reset_block_lengths) { |
736 s->reset_block_lengths = 0; | |
737 v = get_bits(&s->gb, n); | |
738 if (v >= s->nb_block_sizes) | |
739 return -1; | |
740 s->prev_block_len_bits = s->frame_len_bits - v; | |
741 v = get_bits(&s->gb, n); | |
742 if (v >= s->nb_block_sizes) | |
743 return -1; | |
744 s->block_len_bits = s->frame_len_bits - v; | |
745 } else { | |
746 /* update block lengths */ | |
747 s->prev_block_len_bits = s->block_len_bits; | |
748 s->block_len_bits = s->next_block_len_bits; | |
749 } | |
750 v = get_bits(&s->gb, n); | |
751 if (v >= s->nb_block_sizes) | |
752 return -1; | |
753 s->next_block_len_bits = s->frame_len_bits - v; | |
754 } else { | |
755 /* fixed block len */ | |
756 s->next_block_len_bits = s->frame_len_bits; | |
757 s->prev_block_len_bits = s->frame_len_bits; | |
758 s->block_len_bits = s->frame_len_bits; | |
759 } | |
760 | |
761 /* now check if the block length is coherent with the frame length */ | |
762 s->block_len = 1 << s->block_len_bits; | |
763 if ((s->block_pos + s->block_len) > s->frame_len) | |
764 return -1; | |
765 | |
766 if (s->nb_channels == 2) { | |
767 s->ms_stereo = get_bits(&s->gb, 1); | |
768 } | |
769 v = 0; | |
770 for(ch = 0; ch < s->nb_channels; ch++) { | |
771 a = get_bits(&s->gb, 1); | |
772 s->channel_coded[ch] = a; | |
773 v |= a; | |
774 } | |
775 /* if no channel coded, no need to go further */ | |
776 /* XXX: fix potential framing problems */ | |
777 if (!v) | |
778 goto next; | |
779 | |
780 bsize = s->frame_len_bits - s->block_len_bits; | |
781 | |
782 /* read total gain and extract corresponding number of bits for | |
783 coef escape coding */ | |
784 total_gain = 1; | |
785 for(;;) { | |
786 a = get_bits(&s->gb, 7); | |
787 total_gain += a; | |
788 if (a != 127) | |
789 break; | |
790 } | |
2967 | 791 |
783 | 792 if (total_gain < 15) |
793 coef_nb_bits = 13; | |
794 else if (total_gain < 32) | |
795 coef_nb_bits = 12; | |
796 else if (total_gain < 40) | |
797 coef_nb_bits = 11; | |
798 else if (total_gain < 45) | |
799 coef_nb_bits = 10; | |
800 else | |
801 coef_nb_bits = 9; | |
802 | |
803 /* compute number of coefficients */ | |
804 n = s->coefs_end[bsize] - s->coefs_start; | |
805 for(ch = 0; ch < s->nb_channels; ch++) | |
806 nb_coefs[ch] = n; | |
807 | |
808 /* complex coding */ | |
809 if (s->use_noise_coding) { | |
810 | |
811 for(ch = 0; ch < s->nb_channels; ch++) { | |
812 if (s->channel_coded[ch]) { | |
813 int i, n, a; | |
814 n = s->exponent_high_sizes[bsize]; | |
815 for(i=0;i<n;i++) { | |
816 a = get_bits(&s->gb, 1); | |
817 s->high_band_coded[ch][i] = a; | |
818 /* if noise coding, the coefficients are not transmitted */ | |
819 if (a) | |
820 nb_coefs[ch] -= s->exponent_high_bands[bsize][i]; | |
821 } | |
822 } | |
823 } | |
824 for(ch = 0; ch < s->nb_channels; ch++) { | |
825 if (s->channel_coded[ch]) { | |
826 int i, n, val, code; | |
827 | |
828 n = s->exponent_high_sizes[bsize]; | |
829 val = (int)0x80000000; | |
830 for(i=0;i<n;i++) { | |
831 if (s->high_band_coded[ch][i]) { | |
832 if (val == (int)0x80000000) { | |
833 val = get_bits(&s->gb, 7) - 19; | |
834 } else { | |
3113 | 835 code = get_vlc2(&s->gb, s->hgain_vlc.table, HGAINVLCBITS, HGAINMAX); |
783 | 836 if (code < 0) |
837 return -1; | |
838 val += code - 18; | |
839 } | |
840 s->high_band_values[ch][i] = val; | |
841 } | |
842 } | |
843 } | |
844 } | |
845 } | |
2967 | 846 |
783 | 847 /* exposant can be interpolated in short blocks. */ |
848 parse_exponents = 1; | |
849 if (s->block_len_bits != s->frame_len_bits) { | |
850 parse_exponents = get_bits(&s->gb, 1); | |
851 } | |
2967 | 852 |
783 | 853 if (parse_exponents) { |
854 for(ch = 0; ch < s->nb_channels; ch++) { | |
855 if (s->channel_coded[ch]) { | |
856 if (s->use_exp_vlc) { | |
857 if (decode_exp_vlc(s, ch) < 0) | |
858 return -1; | |
859 } else { | |
860 decode_exp_lsp(s, ch); | |
861 } | |
862 } | |
863 } | |
864 } else { | |
865 for(ch = 0; ch < s->nb_channels; ch++) { | |
866 if (s->channel_coded[ch]) { | |
2967 | 867 interpolate_array(s->exponents[ch], 1 << s->prev_block_len_bits, |
783 | 868 s->block_len); |
869 } | |
870 } | |
871 } | |
872 | |
873 /* parse spectral coefficients : just RLE encoding */ | |
874 for(ch = 0; ch < s->nb_channels; ch++) { | |
875 if (s->channel_coded[ch]) { | |
876 VLC *coef_vlc; | |
877 int level, run, sign, tindex; | |
878 int16_t *ptr, *eptr; | |
879 const int16_t *level_table, *run_table; | |
880 | |
881 /* special VLC tables are used for ms stereo because | |
882 there is potentially less energy there */ | |
883 tindex = (ch == 1 && s->ms_stereo); | |
884 coef_vlc = &s->coef_vlc[tindex]; | |
885 run_table = s->run_table[tindex]; | |
886 level_table = s->level_table[tindex]; | |
887 /* XXX: optimize */ | |
888 ptr = &s->coefs1[ch][0]; | |
889 eptr = ptr + nb_coefs[ch]; | |
890 memset(ptr, 0, s->block_len * sizeof(int16_t)); | |
891 for(;;) { | |
3113 | 892 code = get_vlc2(&s->gb, coef_vlc->table, VLCBITS, VLCMAX); |
783 | 893 if (code < 0) |
894 return -1; | |
895 if (code == 1) { | |
896 /* EOB */ | |
897 break; | |
898 } else if (code == 0) { | |
899 /* escape */ | |
900 level = get_bits(&s->gb, coef_nb_bits); | |
901 /* NOTE: this is rather suboptimal. reading | |
902 block_len_bits would be better */ | |
903 run = get_bits(&s->gb, s->frame_len_bits); | |
904 } else { | |
905 /* normal code */ | |
906 run = run_table[code]; | |
907 level = level_table[code]; | |
908 } | |
909 sign = get_bits(&s->gb, 1); | |
910 if (!sign) | |
911 level = -level; | |
912 ptr += run; | |
913 if (ptr >= eptr) | |
3361 | 914 { |
915 av_log(NULL, AV_LOG_ERROR, "overflow in spectral RLE, ignoring\n"); | |
916 break; | |
917 } | |
783 | 918 *ptr++ = level; |
919 /* NOTE: EOB can be omitted */ | |
920 if (ptr >= eptr) | |
921 break; | |
922 } | |
923 } | |
924 if (s->version == 1 && s->nb_channels >= 2) { | |
925 align_get_bits(&s->gb); | |
926 } | |
927 } | |
2967 | 928 |
783 | 929 /* normalize */ |
930 { | |
931 int n4 = s->block_len / 2; | |
932 mdct_norm = 1.0 / (float)n4; | |
933 if (s->version == 1) { | |
934 mdct_norm *= sqrt(n4); | |
935 } | |
936 } | |
937 | |
938 /* finally compute the MDCT coefficients */ | |
939 for(ch = 0; ch < s->nb_channels; ch++) { | |
940 if (s->channel_coded[ch]) { | |
941 int16_t *coefs1; | |
942 float *coefs, *exponents, mult, mult1, noise, *exp_ptr; | |
943 int i, j, n, n1, last_high_band; | |
944 float exp_power[HIGH_BAND_MAX_SIZE]; | |
945 | |
946 coefs1 = s->coefs1[ch]; | |
947 exponents = s->exponents[ch]; | |
3235 | 948 mult = pow(10, total_gain * 0.05) / s->max_exponent[ch]; |
783 | 949 mult *= mdct_norm; |
950 coefs = s->coefs[ch]; | |
951 if (s->use_noise_coding) { | |
952 mult1 = mult; | |
953 /* very low freqs : noise */ | |
954 for(i = 0;i < s->coefs_start; i++) { | |
955 *coefs++ = s->noise_table[s->noise_index] * (*exponents++) * mult1; | |
956 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
957 } | |
2967 | 958 |
783 | 959 n1 = s->exponent_high_sizes[bsize]; |
960 | |
961 /* compute power of high bands */ | |
2967 | 962 exp_ptr = exponents + |
963 s->high_band_start[bsize] - | |
783 | 964 s->coefs_start; |
965 last_high_band = 0; /* avoid warning */ | |
966 for(j=0;j<n1;j++) { | |
2967 | 967 n = s->exponent_high_bands[s->frame_len_bits - |
783 | 968 s->block_len_bits][j]; |
969 if (s->high_band_coded[ch][j]) { | |
970 float e2, v; | |
971 e2 = 0; | |
972 for(i = 0;i < n; i++) { | |
973 v = exp_ptr[i]; | |
974 e2 += v * v; | |
975 } | |
976 exp_power[j] = e2 / n; | |
977 last_high_band = j; | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
978 tprintf("%d: power=%f (%d)\n", j, exp_power[j], n); |
783 | 979 } |
980 exp_ptr += n; | |
981 } | |
982 | |
983 /* main freqs and high freqs */ | |
984 for(j=-1;j<n1;j++) { | |
985 if (j < 0) { | |
2967 | 986 n = s->high_band_start[bsize] - |
783 | 987 s->coefs_start; |
988 } else { | |
2967 | 989 n = s->exponent_high_bands[s->frame_len_bits - |
783 | 990 s->block_len_bits][j]; |
991 } | |
992 if (j >= 0 && s->high_band_coded[ch][j]) { | |
993 /* use noise with specified power */ | |
994 mult1 = sqrt(exp_power[j] / exp_power[last_high_band]); | |
3235 | 995 /* XXX: use a table */ |
996 mult1 = mult1 * pow(10, s->high_band_values[ch][j] * 0.05); | |
783 | 997 mult1 = mult1 / (s->max_exponent[ch] * s->noise_mult); |
998 mult1 *= mdct_norm; | |
999 for(i = 0;i < n; i++) { | |
1000 noise = s->noise_table[s->noise_index]; | |
1001 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
1002 *coefs++ = (*exponents++) * noise * mult1; | |
1003 } | |
1004 } else { | |
1005 /* coded values + small noise */ | |
1006 for(i = 0;i < n; i++) { | |
1007 noise = s->noise_table[s->noise_index]; | |
1008 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
1009 *coefs++ = ((*coefs1++) + noise) * (*exponents++) * mult; | |
1010 } | |
1011 } | |
1012 } | |
1013 | |
1014 /* very high freqs : noise */ | |
1015 n = s->block_len - s->coefs_end[bsize]; | |
1016 mult1 = mult * exponents[-1]; | |
1017 for(i = 0; i < n; i++) { | |
1018 *coefs++ = s->noise_table[s->noise_index] * mult1; | |
1019 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
1020 } | |
1021 } else { | |
1022 /* XXX: optimize more */ | |
1023 for(i = 0;i < s->coefs_start; i++) | |
1024 *coefs++ = 0.0; | |
1025 n = nb_coefs[ch]; | |
1026 for(i = 0;i < n; i++) { | |
1027 *coefs++ = coefs1[i] * exponents[i] * mult; | |
1028 } | |
1029 n = s->block_len - s->coefs_end[bsize]; | |
1030 for(i = 0;i < n; i++) | |
1031 *coefs++ = 0.0; | |
1032 } | |
1033 } | |
1034 } | |
1035 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1036 #ifdef TRACE |
783 | 1037 for(ch = 0; ch < s->nb_channels; ch++) { |
1038 if (s->channel_coded[ch]) { | |
1039 dump_floats("exponents", 3, s->exponents[ch], s->block_len); | |
1040 dump_floats("coefs", 1, s->coefs[ch], s->block_len); | |
1041 } | |
1042 } | |
1043 #endif | |
2967 | 1044 |
783 | 1045 if (s->ms_stereo && s->channel_coded[1]) { |
1046 float a, b; | |
1047 int i; | |
1048 | |
1049 /* nominal case for ms stereo: we do it before mdct */ | |
1050 /* no need to optimize this case because it should almost | |
1051 never happen */ | |
1052 if (!s->channel_coded[0]) { | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1053 tprintf("rare ms-stereo case happened\n"); |
783 | 1054 memset(s->coefs[0], 0, sizeof(float) * s->block_len); |
1055 s->channel_coded[0] = 1; | |
1056 } | |
2967 | 1057 |
783 | 1058 for(i = 0; i < s->block_len; i++) { |
1059 a = s->coefs[0][i]; | |
1060 b = s->coefs[1][i]; | |
1061 s->coefs[0][i] = a + b; | |
1062 s->coefs[1][i] = a - b; | |
1063 } | |
1064 } | |
1065 | |
1066 /* build the window : we ensure that when the windows overlap | |
1067 their squared sum is always 1 (MDCT reconstruction rule) */ | |
1068 /* XXX: merge with output */ | |
1069 { | |
1070 int i, next_block_len, block_len, prev_block_len, n; | |
1071 float *wptr; | |
1072 | |
1073 block_len = s->block_len; | |
1074 prev_block_len = 1 << s->prev_block_len_bits; | |
1075 next_block_len = 1 << s->next_block_len_bits; | |
1076 | |
1077 /* right part */ | |
1078 wptr = window + block_len; | |
1079 if (block_len <= next_block_len) { | |
1080 for(i=0;i<block_len;i++) | |
1081 *wptr++ = s->windows[bsize][i]; | |
1082 } else { | |
1083 /* overlap */ | |
1084 n = (block_len / 2) - (next_block_len / 2); | |
1085 for(i=0;i<n;i++) | |
1086 *wptr++ = 1.0; | |
1087 for(i=0;i<next_block_len;i++) | |
1088 *wptr++ = s->windows[s->frame_len_bits - s->next_block_len_bits][i]; | |
1089 for(i=0;i<n;i++) | |
1090 *wptr++ = 0.0; | |
1091 } | |
1092 | |
1093 /* left part */ | |
1094 wptr = window + block_len; | |
1095 if (block_len <= prev_block_len) { | |
1096 for(i=0;i<block_len;i++) | |
1097 *--wptr = s->windows[bsize][i]; | |
1098 } else { | |
1099 /* overlap */ | |
1100 n = (block_len / 2) - (prev_block_len / 2); | |
1101 for(i=0;i<n;i++) | |
1102 *--wptr = 1.0; | |
1103 for(i=0;i<prev_block_len;i++) | |
1104 *--wptr = s->windows[s->frame_len_bits - s->prev_block_len_bits][i]; | |
1105 for(i=0;i<n;i++) | |
1106 *--wptr = 0.0; | |
1107 } | |
1108 } | |
1109 | |
2967 | 1110 |
783 | 1111 for(ch = 0; ch < s->nb_channels; ch++) { |
1112 if (s->channel_coded[ch]) { | |
3089 | 1113 DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); |
783 | 1114 float *ptr; |
3592
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
1115 int n4, index, n; |
783 | 1116 |
1117 n = s->block_len; | |
1118 n4 = s->block_len / 2; | |
3555 | 1119 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], |
795
55add0e7eafb
avoid name clash - fixed again block size selection
bellard
parents:
785
diff
changeset
|
1120 output, s->coefs[ch], s->mdct_tmp); |
783 | 1121 |
1122 /* XXX: optimize all that by build the window and | |
1123 multipying/adding at the same time */ | |
1124 | |
3592
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
1125 /* multiply by the window and add in the frame */ |
783 | 1126 index = (s->frame_len / 2) + s->block_pos - n4; |
1127 ptr = &s->frame_out[ch][index]; | |
3592
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
1128 s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1); |
783 | 1129 |
1130 /* specific fast case for ms-stereo : add to second | |
1131 channel if it is not coded */ | |
1132 if (s->ms_stereo && !s->channel_coded[1]) { | |
1133 ptr = &s->frame_out[1][index]; | |
3592
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
1134 s->dsp.vector_fmul_add_add(ptr,window,output,ptr,0,2*n,1); |
783 | 1135 } |
1136 } | |
1137 } | |
1138 next: | |
1139 /* update block number */ | |
1140 s->block_num++; | |
1141 s->block_pos += s->block_len; | |
1142 if (s->block_pos >= s->frame_len) | |
1143 return 1; | |
1144 else | |
1145 return 0; | |
1146 } | |
1147 | |
1148 /* decode a frame of frame_len samples */ | |
1149 static int wma_decode_frame(WMADecodeContext *s, int16_t *samples) | |
1150 { | |
1151 int ret, i, n, a, ch, incr; | |
1152 int16_t *ptr; | |
1153 float *iptr; | |
1154 | |
1343 | 1155 #ifdef TRACE |
1156 tprintf("***decode_frame: %d size=%d\n", s->frame_count++, s->frame_len); | |
1157 #endif | |
783 | 1158 |
1159 /* read each block */ | |
1160 s->block_num = 0; | |
1161 s->block_pos = 0; | |
1162 for(;;) { | |
1163 ret = wma_decode_block(s); | |
2967 | 1164 if (ret < 0) |
783 | 1165 return -1; |
1166 if (ret) | |
1167 break; | |
1168 } | |
1169 | |
1170 /* convert frame to integer */ | |
1171 n = s->frame_len; | |
1172 incr = s->nb_channels; | |
1173 for(ch = 0; ch < s->nb_channels; ch++) { | |
1174 ptr = samples + ch; | |
1175 iptr = s->frame_out[ch]; | |
1176 | |
1177 for(i=0;i<n;i++) { | |
797 | 1178 a = lrintf(*iptr++); |
783 | 1179 if (a > 32767) |
1180 a = 32767; | |
1181 else if (a < -32768) | |
1182 a = -32768; | |
1183 *ptr = a; | |
1184 ptr += incr; | |
1185 } | |
1186 /* prepare for next block */ | |
1187 memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len], | |
1188 s->frame_len * sizeof(float)); | |
1189 /* XXX: suppress this */ | |
2967 | 1190 memset(&s->frame_out[ch][s->frame_len], 0, |
783 | 1191 s->frame_len * sizeof(float)); |
1192 } | |
1193 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1194 #ifdef TRACE |
783 | 1195 dump_shorts("samples", samples, n * s->nb_channels); |
1196 #endif | |
1197 return 0; | |
1198 } | |
1199 | |
2967 | 1200 static int wma_decode_superframe(AVCodecContext *avctx, |
783 | 1201 void *data, int *data_size, |
1064 | 1202 uint8_t *buf, int buf_size) |
783 | 1203 { |
1204 WMADecodeContext *s = avctx->priv_data; | |
1205 int nb_frames, bit_offset, i, pos, len; | |
1206 uint8_t *q; | |
1207 int16_t *samples; | |
2967 | 1208 |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1209 tprintf("***decode_superframe:\n"); |
783 | 1210 |
1750 | 1211 if(buf_size==0){ |
1212 s->last_superframe_len = 0; | |
1213 return 0; | |
1214 } | |
2967 | 1215 |
783 | 1216 samples = data; |
1217 | |
1025
1f9afd8b9131
GetBitContext.size is allways multiplied by 8 -> use size_in_bits to avoid useless *8 in a few inner loops
michaelni
parents:
972
diff
changeset
|
1218 init_get_bits(&s->gb, buf, buf_size*8); |
2967 | 1219 |
783 | 1220 if (s->use_bit_reservoir) { |
1221 /* read super frame header */ | |
1222 get_bits(&s->gb, 4); /* super frame index */ | |
1223 nb_frames = get_bits(&s->gb, 4) - 1; | |
1224 | |
1225 bit_offset = get_bits(&s->gb, s->byte_offset_bits + 3); | |
1226 | |
1227 if (s->last_superframe_len > 0) { | |
1228 // printf("skip=%d\n", s->last_bitoffset); | |
1229 /* add bit_offset bits to last frame */ | |
2967 | 1230 if ((s->last_superframe_len + ((bit_offset + 7) >> 3)) > |
783 | 1231 MAX_CODED_SUPERFRAME_SIZE) |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1232 goto fail; |
783 | 1233 q = s->last_superframe + s->last_superframe_len; |
1234 len = bit_offset; | |
3362
c43fcf831f7c
Do not read full byte when less than 8 bits are still to be read.
reimar
parents:
3361
diff
changeset
|
1235 while (len > 7) { |
783 | 1236 *q++ = (get_bits)(&s->gb, 8); |
1237 len -= 8; | |
1238 } | |
1239 if (len > 0) { | |
1240 *q++ = (get_bits)(&s->gb, len) << (8 - len); | |
1241 } | |
2967 | 1242 |
783 | 1243 /* XXX: bit_offset bits into last frame */ |
1025
1f9afd8b9131
GetBitContext.size is allways multiplied by 8 -> use size_in_bits to avoid useless *8 in a few inner loops
michaelni
parents:
972
diff
changeset
|
1244 init_get_bits(&s->gb, s->last_superframe, MAX_CODED_SUPERFRAME_SIZE*8); |
783 | 1245 /* skip unused bits */ |
1246 if (s->last_bitoffset > 0) | |
1247 skip_bits(&s->gb, s->last_bitoffset); | |
1248 /* this frame is stored in the last superframe and in the | |
1249 current one */ | |
1250 if (wma_decode_frame(s, samples) < 0) | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1251 goto fail; |
783 | 1252 samples += s->nb_channels * s->frame_len; |
1253 } | |
1254 | |
1255 /* read each frame starting from bit_offset */ | |
1256 pos = bit_offset + 4 + 4 + s->byte_offset_bits + 3; | |
1025
1f9afd8b9131
GetBitContext.size is allways multiplied by 8 -> use size_in_bits to avoid useless *8 in a few inner loops
michaelni
parents:
972
diff
changeset
|
1257 init_get_bits(&s->gb, buf + (pos >> 3), (MAX_CODED_SUPERFRAME_SIZE - (pos >> 3))*8); |
783 | 1258 len = pos & 7; |
1259 if (len > 0) | |
1260 skip_bits(&s->gb, len); | |
2967 | 1261 |
783 | 1262 s->reset_block_lengths = 1; |
1263 for(i=0;i<nb_frames;i++) { | |
1264 if (wma_decode_frame(s, samples) < 0) | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1265 goto fail; |
783 | 1266 samples += s->nb_channels * s->frame_len; |
1267 } | |
1268 | |
1269 /* we copy the end of the frame in the last frame buffer */ | |
1270 pos = get_bits_count(&s->gb) + ((bit_offset + 4 + 4 + s->byte_offset_bits + 3) & ~7); | |
1271 s->last_bitoffset = pos & 7; | |
1272 pos >>= 3; | |
1273 len = buf_size - pos; | |
819 | 1274 if (len > MAX_CODED_SUPERFRAME_SIZE || len < 0) { |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1275 goto fail; |
783 | 1276 } |
1277 s->last_superframe_len = len; | |
1278 memcpy(s->last_superframe, buf + pos, len); | |
1279 } else { | |
1280 /* single frame decode */ | |
1281 if (wma_decode_frame(s, samples) < 0) | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1282 goto fail; |
783 | 1283 samples += s->nb_channels * s->frame_len; |
1284 } | |
1285 *data_size = (int8_t *)samples - (int8_t *)data; | |
1286 return s->block_align; | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1287 fail: |
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1288 /* when error, we reset the bit reservoir */ |
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1289 s->last_superframe_len = 0; |
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1290 return -1; |
783 | 1291 } |
1292 | |
1293 static int wma_decode_end(AVCodecContext *avctx) | |
1294 { | |
1295 WMADecodeContext *s = avctx->priv_data; | |
1296 int i; | |
1297 | |
1298 for(i = 0; i < s->nb_block_sizes; i++) | |
795
55add0e7eafb
avoid name clash - fixed again block size selection
bellard
parents:
785
diff
changeset
|
1299 ff_mdct_end(&s->mdct_ctx[i]); |
783 | 1300 for(i = 0; i < s->nb_block_sizes; i++) |
1301 av_free(s->windows[i]); | |
1302 | |
1303 if (s->use_exp_vlc) { | |
1304 free_vlc(&s->exp_vlc); | |
1305 } | |
1306 if (s->use_noise_coding) { | |
1307 free_vlc(&s->hgain_vlc); | |
1308 } | |
1309 for(i = 0;i < 2; i++) { | |
1310 free_vlc(&s->coef_vlc[i]); | |
1311 av_free(s->run_table[i]); | |
1312 av_free(s->level_table[i]); | |
1313 } | |
2967 | 1314 |
783 | 1315 return 0; |
1316 } | |
1317 | |
1318 AVCodec wmav1_decoder = | |
1319 { | |
1320 "wmav1", | |
1321 CODEC_TYPE_AUDIO, | |
1322 CODEC_ID_WMAV1, | |
1323 sizeof(WMADecodeContext), | |
1324 wma_decode_init, | |
1325 NULL, | |
1326 wma_decode_end, | |
1327 wma_decode_superframe, | |
1328 }; | |
1329 | |
1330 AVCodec wmav2_decoder = | |
1331 { | |
1332 "wmav2", | |
1333 CODEC_TYPE_AUDIO, | |
1334 CODEC_ID_WMAV2, | |
1335 sizeof(WMADecodeContext), | |
1336 wma_decode_init, | |
1337 NULL, | |
1338 wma_decode_end, | |
1339 wma_decode_superframe, | |
1340 }; |