Mercurial > libavcodec.hg
annotate wmadec.c @ 4301:b43bd0c56eaa libavcodec
Bug fix for crashes when SSE is used on unaligned arrays.
No measureable change in speed. This gave random crashes on Win32
and BeOS. The cause for this bug is that gcc doesn't align the
stackframe. Linux and glibc always ensure this to be true thus
this never affected Linux.
author | banan |
---|---|
date | Thu, 14 Dec 2006 17:50:23 +0000 |
parents | c8c591fe26f8 |
children | 0efc832d9102 |
rev | line source |
---|---|
783 | 1 /* |
2 * WMA compatible decoder | |
3 * Copyright (c) 2002 The FFmpeg Project. | |
4 * | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3776
diff
changeset
|
5 * This file is part of FFmpeg. |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3776
diff
changeset
|
6 * |
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3776
diff
changeset
|
7 * FFmpeg is free software; you can redistribute it and/or |
783 | 8 * modify it under the terms of the GNU Lesser General Public |
9 * License as published by the Free Software Foundation; either | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3776
diff
changeset
|
10 * version 2.1 of the License, or (at your option) any later version. |
783 | 11 * |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3776
diff
changeset
|
12 * FFmpeg is distributed in the hope that it will be useful, |
783 | 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
15 * Lesser General Public License for more details. | |
16 * | |
17 * You should have received a copy of the GNU Lesser General Public | |
3947
c8c591fe26f8
Change license headers to say 'FFmpeg' instead of 'this program/this library'
diego
parents:
3776
diff
changeset
|
18 * License along with FFmpeg; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3022
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
783 | 20 */ |
1106 | 21 |
22 /** | |
23 * @file wmadec.c | |
24 * WMA compatible decoder. | |
1967
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
25 * This decoder handles Microsoft Windows Media Audio data, versions 1 & 2. |
2967 | 26 * WMA v1 is identified by audio format 0x160 in Microsoft media files |
1967
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
27 * (ASF/AVI/WAV). WMA v2 is identified by audio format 0x161. |
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
28 * |
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
29 * To use this decoder, a calling application must supply the extra data |
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
30 * bytes provided with the WMA data. These are the extra, codec-specific |
2967 | 31 * bytes at the end of a WAVEFORMATEX data structure. Transmit these bytes |
32 * to the decoder using the extradata[_size] fields in AVCodecContext. There | |
1967
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
33 * should be 4 extra bytes for v1 data and 6 extra bytes for v2 data. |
1106 | 34 */ |
35 | |
783 | 36 #include "avcodec.h" |
2398
582e635cfa08
common.c -> bitstream.c (and the single non bitstream func -> utils.c)
michael
parents:
2370
diff
changeset
|
37 #include "bitstream.h" |
783 | 38 #include "dsputil.h" |
39 | |
40 /* size of blocks */ | |
41 #define BLOCK_MIN_BITS 7 | |
42 #define BLOCK_MAX_BITS 11 | |
43 #define BLOCK_MAX_SIZE (1 << BLOCK_MAX_BITS) | |
44 | |
45 #define BLOCK_NB_SIZES (BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) | |
46 | |
47 /* XXX: find exact max size */ | |
48 #define HIGH_BAND_MAX_SIZE 16 | |
49 | |
50 #define NB_LSP_COEFS 10 | |
51 | |
817 | 52 /* XXX: is it a suitable value ? */ |
2775
f3cdd51c9e16
WMA MAX_CODED_SUPERFRAME_SIZE too small patch by (Mark Weaver: mark-clist, npsl co uk)
michael
parents:
2398
diff
changeset
|
53 #define MAX_CODED_SUPERFRAME_SIZE 16384 |
783 | 54 |
55 #define MAX_CHANNELS 2 | |
56 | |
57 #define NOISE_TAB_SIZE 8192 | |
58 | |
59 #define LSP_POW_BITS 7 | |
60 | |
3022 | 61 #define VLCBITS 9 |
3113 | 62 #define VLCMAX ((22+VLCBITS-1)/VLCBITS) |
63 | |
64 #define EXPVLCBITS 8 | |
65 #define EXPMAX ((19+EXPVLCBITS-1)/EXPVLCBITS) | |
66 | |
67 #define HGAINVLCBITS 9 | |
68 #define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS) | |
3022 | 69 |
783 | 70 typedef struct WMADecodeContext { |
71 GetBitContext gb; | |
72 int sample_rate; | |
73 int nb_channels; | |
74 int bit_rate; | |
75 int version; /* 1 = 0x160 (WMAV1), 2 = 0x161 (WMAV2) */ | |
76 int block_align; | |
77 int use_bit_reservoir; | |
78 int use_variable_block_len; | |
79 int use_exp_vlc; /* exponent coding: 0 = lsp, 1 = vlc + delta */ | |
80 int use_noise_coding; /* true if perceptual noise is added */ | |
81 int byte_offset_bits; | |
82 VLC exp_vlc; | |
83 int exponent_sizes[BLOCK_NB_SIZES]; | |
84 uint16_t exponent_bands[BLOCK_NB_SIZES][25]; | |
85 int high_band_start[BLOCK_NB_SIZES]; /* index of first coef in high band */ | |
86 int coefs_start; /* first coded coef */ | |
87 int coefs_end[BLOCK_NB_SIZES]; /* max number of coded coefficients */ | |
88 int exponent_high_sizes[BLOCK_NB_SIZES]; | |
2967 | 89 int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE]; |
783 | 90 VLC hgain_vlc; |
2967 | 91 |
783 | 92 /* coded values in high bands */ |
93 int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE]; | |
94 int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE]; | |
95 | |
96 /* there are two possible tables for spectral coefficients */ | |
97 VLC coef_vlc[2]; | |
98 uint16_t *run_table[2]; | |
99 uint16_t *level_table[2]; | |
100 /* frame info */ | |
101 int frame_len; /* frame length in samples */ | |
102 int frame_len_bits; /* frame_len = 1 << frame_len_bits */ | |
103 int nb_block_sizes; /* number of block sizes */ | |
104 /* block info */ | |
105 int reset_block_lengths; | |
106 int block_len_bits; /* log2 of current block length */ | |
107 int next_block_len_bits; /* log2 of next block length */ | |
108 int prev_block_len_bits; /* log2 of prev block length */ | |
109 int block_len; /* block length in samples */ | |
110 int block_num; /* block number in current frame */ | |
111 int block_pos; /* current position in frame */ | |
112 uint8_t ms_stereo; /* true if mid/side stereo mode */ | |
113 uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */ | |
3089 | 114 DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]); |
783 | 115 float max_exponent[MAX_CHANNELS]; |
116 int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; | |
3089 | 117 DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]); |
4301
b43bd0c56eaa
Bug fix for crashes when SSE is used on unaligned arrays.
banan
parents:
3947
diff
changeset
|
118 DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); |
b43bd0c56eaa
Bug fix for crashes when SSE is used on unaligned arrays.
banan
parents:
3947
diff
changeset
|
119 DECLARE_ALIGNED_16(float, window[BLOCK_MAX_SIZE * 2]); |
783 | 120 MDCTContext mdct_ctx[BLOCK_NB_SIZES]; |
1031
19de1445beb2
use av_malloc() functions - added av_strdup and av_realloc()
bellard
parents:
1025
diff
changeset
|
121 float *windows[BLOCK_NB_SIZES]; |
3089 | 122 DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */ |
783 | 123 /* output buffer for one frame and the last for IMDCT windowing */ |
3089 | 124 DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]); |
783 | 125 /* last frame info */ |
126 uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */ | |
127 int last_bitoffset; | |
128 int last_superframe_len; | |
129 float noise_table[NOISE_TAB_SIZE]; | |
130 int noise_index; | |
131 float noise_mult; /* XXX: suppress that and integrate it in the noise array */ | |
132 /* lsp_to_curve tables */ | |
133 float lsp_cos_table[BLOCK_MAX_SIZE]; | |
134 float lsp_pow_e_table[256]; | |
135 float lsp_pow_m_table1[(1 << LSP_POW_BITS)]; | |
136 float lsp_pow_m_table2[(1 << LSP_POW_BITS)]; | |
3592
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
137 DSPContext dsp; |
1343 | 138 |
139 #ifdef TRACE | |
140 int frame_count; | |
141 #endif | |
783 | 142 } WMADecodeContext; |
143 | |
144 typedef struct CoefVLCTable { | |
145 int n; /* total number of codes */ | |
146 const uint32_t *huffcodes; /* VLC bit values */ | |
147 const uint8_t *huffbits; /* VLC bit size */ | |
148 const uint16_t *levels; /* table to build run/level tables */ | |
149 } CoefVLCTable; | |
150 | |
151 static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len); | |
152 | |
153 #include "wmadata.h" | |
154 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
155 #ifdef TRACE |
783 | 156 static void dump_shorts(const char *name, const short *tab, int n) |
157 { | |
158 int i; | |
159 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
160 tprintf("%s[%d]:\n", name, n); |
783 | 161 for(i=0;i<n;i++) { |
162 if ((i & 7) == 0) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
163 tprintf("%4d: ", i); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
164 tprintf(" %5d.0", tab[i]); |
783 | 165 if ((i & 7) == 7) |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
166 tprintf("\n"); |
783 | 167 } |
168 } | |
169 | |
170 static void dump_floats(const char *name, int prec, const float *tab, int n) | |
171 { | |
172 int i; | |
173 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
174 tprintf("%s[%d]:\n", name, n); |
783 | 175 for(i=0;i<n;i++) { |
176 if ((i & 7) == 0) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
177 tprintf("%4d: ", i); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
178 tprintf(" %8.*f", prec, tab[i]); |
783 | 179 if ((i & 7) == 7) |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
180 tprintf("\n"); |
783 | 181 } |
182 if ((i & 7) != 0) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
183 tprintf("\n"); |
783 | 184 } |
185 #endif | |
186 | |
187 /* XXX: use same run/length optimization as mpeg decoders */ | |
2967 | 188 static void init_coef_vlc(VLC *vlc, |
783 | 189 uint16_t **prun_table, uint16_t **plevel_table, |
190 const CoefVLCTable *vlc_table) | |
191 { | |
192 int n = vlc_table->n; | |
193 const uint8_t *table_bits = vlc_table->huffbits; | |
194 const uint32_t *table_codes = vlc_table->huffcodes; | |
195 const uint16_t *levels_table = vlc_table->levels; | |
196 uint16_t *run_table, *level_table; | |
197 const uint16_t *p; | |
198 int i, l, j, level; | |
199 | |
3113 | 200 init_vlc(vlc, VLCBITS, n, table_bits, 1, 1, table_codes, 4, 4, 0); |
783 | 201 |
1031
19de1445beb2
use av_malloc() functions - added av_strdup and av_realloc()
bellard
parents:
1025
diff
changeset
|
202 run_table = av_malloc(n * sizeof(uint16_t)); |
19de1445beb2
use av_malloc() functions - added av_strdup and av_realloc()
bellard
parents:
1025
diff
changeset
|
203 level_table = av_malloc(n * sizeof(uint16_t)); |
783 | 204 p = levels_table; |
205 i = 2; | |
206 level = 1; | |
207 while (i < n) { | |
208 l = *p++; | |
209 for(j=0;j<l;j++) { | |
210 run_table[i] = j; | |
211 level_table[i] = level; | |
212 i++; | |
213 } | |
214 level++; | |
215 } | |
216 *prun_table = run_table; | |
217 *plevel_table = level_table; | |
218 } | |
219 | |
220 static int wma_decode_init(AVCodecContext * avctx) | |
221 { | |
222 WMADecodeContext *s = avctx->priv_data; | |
223 int i, flags1, flags2; | |
224 float *window; | |
225 uint8_t *extradata; | |
3235 | 226 float bps1, high_freq; |
227 volatile float bps; | |
783 | 228 int sample_rate1; |
229 int coef_vlc_table; | |
2967 | 230 |
783 | 231 s->sample_rate = avctx->sample_rate; |
232 s->nb_channels = avctx->channels; | |
233 s->bit_rate = avctx->bit_rate; | |
234 s->block_align = avctx->block_align; | |
235 | |
3592
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
236 dsputil_init(&s->dsp, avctx); |
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
237 |
808
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
238 if (avctx->codec->id == CODEC_ID_WMAV1) { |
783 | 239 s->version = 1; |
240 } else { | |
241 s->version = 2; | |
242 } | |
2967 | 243 |
783 | 244 /* extract flag infos */ |
245 flags1 = 0; | |
246 flags2 = 0; | |
247 extradata = avctx->extradata; | |
248 if (s->version == 1 && avctx->extradata_size >= 4) { | |
249 flags1 = extradata[0] | (extradata[1] << 8); | |
250 flags2 = extradata[2] | (extradata[3] << 8); | |
251 } else if (s->version == 2 && avctx->extradata_size >= 6) { | |
2967 | 252 flags1 = extradata[0] | (extradata[1] << 8) | |
783 | 253 (extradata[2] << 16) | (extradata[3] << 24); |
254 flags2 = extradata[4] | (extradata[5] << 8); | |
255 } | |
256 s->use_exp_vlc = flags2 & 0x0001; | |
257 s->use_bit_reservoir = flags2 & 0x0002; | |
258 s->use_variable_block_len = flags2 & 0x0004; | |
259 | |
260 /* compute MDCT block size */ | |
261 if (s->sample_rate <= 16000) { | |
262 s->frame_len_bits = 9; | |
2967 | 263 } else if (s->sample_rate <= 22050 || |
795
55add0e7eafb
avoid name clash - fixed again block size selection
bellard
parents:
785
diff
changeset
|
264 (s->sample_rate <= 32000 && s->version == 1)) { |
783 | 265 s->frame_len_bits = 10; |
266 } else { | |
267 s->frame_len_bits = 11; | |
268 } | |
269 s->frame_len = 1 << s->frame_len_bits; | |
270 if (s->use_variable_block_len) { | |
808
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
271 int nb_max, nb; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
272 nb = ((flags2 >> 3) & 3) + 1; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
273 if ((s->bit_rate / s->nb_channels) >= 32000) |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
274 nb += 2; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
275 nb_max = s->frame_len_bits - BLOCK_MIN_BITS; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
276 if (nb > nb_max) |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
277 nb = nb_max; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
278 s->nb_block_sizes = nb + 1; |
783 | 279 } else { |
280 s->nb_block_sizes = 1; | |
281 } | |
282 | |
283 /* init rate dependant parameters */ | |
284 s->use_noise_coding = 1; | |
3235 | 285 high_freq = s->sample_rate * 0.5; |
783 | 286 |
287 /* if version 2, then the rates are normalized */ | |
288 sample_rate1 = s->sample_rate; | |
289 if (s->version == 2) { | |
2967 | 290 if (sample_rate1 >= 44100) |
783 | 291 sample_rate1 = 44100; |
2967 | 292 else if (sample_rate1 >= 22050) |
783 | 293 sample_rate1 = 22050; |
2967 | 294 else if (sample_rate1 >= 16000) |
783 | 295 sample_rate1 = 16000; |
2967 | 296 else if (sample_rate1 >= 11025) |
783 | 297 sample_rate1 = 11025; |
2967 | 298 else if (sample_rate1 >= 8000) |
783 | 299 sample_rate1 = 8000; |
300 } | |
301 | |
302 bps = (float)s->bit_rate / (float)(s->nb_channels * s->sample_rate); | |
2992 | 303 s->byte_offset_bits = av_log2((int)(bps * s->frame_len / 8.0 + 0.5)) + 2; |
783 | 304 |
305 /* compute high frequency value and choose if noise coding should | |
306 be activated */ | |
307 bps1 = bps; | |
308 if (s->nb_channels == 2) | |
309 bps1 = bps * 1.6; | |
310 if (sample_rate1 == 44100) { | |
311 if (bps1 >= 0.61) | |
312 s->use_noise_coding = 0; | |
313 else | |
3235 | 314 high_freq = high_freq * 0.4; |
783 | 315 } else if (sample_rate1 == 22050) { |
316 if (bps1 >= 1.16) | |
317 s->use_noise_coding = 0; | |
2967 | 318 else if (bps1 >= 0.72) |
3235 | 319 high_freq = high_freq * 0.7; |
783 | 320 else |
3235 | 321 high_freq = high_freq * 0.6; |
783 | 322 } else if (sample_rate1 == 16000) { |
323 if (bps > 0.5) | |
3235 | 324 high_freq = high_freq * 0.5; |
783 | 325 else |
3235 | 326 high_freq = high_freq * 0.3; |
783 | 327 } else if (sample_rate1 == 11025) { |
3235 | 328 high_freq = high_freq * 0.7; |
783 | 329 } else if (sample_rate1 == 8000) { |
330 if (bps <= 0.625) { | |
3235 | 331 high_freq = high_freq * 0.5; |
783 | 332 } else if (bps > 0.75) { |
333 s->use_noise_coding = 0; | |
334 } else { | |
3235 | 335 high_freq = high_freq * 0.65; |
783 | 336 } |
337 } else { | |
338 if (bps >= 0.8) { | |
3235 | 339 high_freq = high_freq * 0.75; |
783 | 340 } else if (bps >= 0.6) { |
3235 | 341 high_freq = high_freq * 0.6; |
783 | 342 } else { |
3235 | 343 high_freq = high_freq * 0.5; |
783 | 344 } |
345 } | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
346 dprintf("flags1=0x%x flags2=0x%x\n", flags1, flags2); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
347 dprintf("version=%d channels=%d sample_rate=%d bitrate=%d block_align=%d\n", |
2967 | 348 s->version, s->nb_channels, s->sample_rate, s->bit_rate, |
783 | 349 s->block_align); |
3235 | 350 dprintf("bps=%f bps1=%f high_freq=%f bitoffset=%d\n", |
351 bps, bps1, high_freq, s->byte_offset_bits); | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
352 dprintf("use_noise_coding=%d use_exp_vlc=%d nb_block_sizes=%d\n", |
808
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
353 s->use_noise_coding, s->use_exp_vlc, s->nb_block_sizes); |
783 | 354 |
355 /* compute the scale factor band sizes for each MDCT block size */ | |
356 { | |
357 int a, b, pos, lpos, k, block_len, i, j, n; | |
358 const uint8_t *table; | |
2967 | 359 |
783 | 360 if (s->version == 1) { |
361 s->coefs_start = 3; | |
362 } else { | |
363 s->coefs_start = 0; | |
364 } | |
365 for(k = 0; k < s->nb_block_sizes; k++) { | |
366 block_len = s->frame_len >> k; | |
367 | |
368 if (s->version == 1) { | |
369 lpos = 0; | |
370 for(i=0;i<25;i++) { | |
371 a = wma_critical_freqs[i]; | |
372 b = s->sample_rate; | |
373 pos = ((block_len * 2 * a) + (b >> 1)) / b; | |
2967 | 374 if (pos > block_len) |
783 | 375 pos = block_len; |
376 s->exponent_bands[0][i] = pos - lpos; | |
377 if (pos >= block_len) { | |
378 i++; | |
379 break; | |
380 } | |
381 lpos = pos; | |
382 } | |
383 s->exponent_sizes[0] = i; | |
384 } else { | |
385 /* hardcoded tables */ | |
386 table = NULL; | |
387 a = s->frame_len_bits - BLOCK_MIN_BITS - k; | |
388 if (a < 3) { | |
389 if (s->sample_rate >= 44100) | |
390 table = exponent_band_44100[a]; | |
391 else if (s->sample_rate >= 32000) | |
392 table = exponent_band_32000[a]; | |
393 else if (s->sample_rate >= 22050) | |
394 table = exponent_band_22050[a]; | |
395 } | |
396 if (table) { | |
397 n = *table++; | |
398 for(i=0;i<n;i++) | |
399 s->exponent_bands[k][i] = table[i]; | |
400 s->exponent_sizes[k] = n; | |
401 } else { | |
402 j = 0; | |
403 lpos = 0; | |
404 for(i=0;i<25;i++) { | |
405 a = wma_critical_freqs[i]; | |
406 b = s->sample_rate; | |
407 pos = ((block_len * 2 * a) + (b << 1)) / (4 * b); | |
408 pos <<= 2; | |
2967 | 409 if (pos > block_len) |
783 | 410 pos = block_len; |
411 if (pos > lpos) | |
412 s->exponent_bands[k][j++] = pos - lpos; | |
413 if (pos >= block_len) | |
414 break; | |
415 lpos = pos; | |
416 } | |
417 s->exponent_sizes[k] = j; | |
418 } | |
419 } | |
420 | |
421 /* max number of coefs */ | |
422 s->coefs_end[k] = (s->frame_len - ((s->frame_len * 9) / 100)) >> k; | |
423 /* high freq computation */ | |
3235 | 424 s->high_band_start[k] = (int)((block_len * 2 * high_freq) / |
425 s->sample_rate + 0.5); | |
783 | 426 n = s->exponent_sizes[k]; |
427 j = 0; | |
428 pos = 0; | |
429 for(i=0;i<n;i++) { | |
430 int start, end; | |
431 start = pos; | |
432 pos += s->exponent_bands[k][i]; | |
433 end = pos; | |
434 if (start < s->high_band_start[k]) | |
435 start = s->high_band_start[k]; | |
436 if (end > s->coefs_end[k]) | |
437 end = s->coefs_end[k]; | |
438 if (end > start) | |
439 s->exponent_high_bands[k][j++] = end - start; | |
440 } | |
441 s->exponent_high_sizes[k] = j; | |
442 #if 0 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
443 tprintf("%5d: coefs_end=%d high_band_start=%d nb_high_bands=%d: ", |
2967 | 444 s->frame_len >> k, |
783 | 445 s->coefs_end[k], |
446 s->high_band_start[k], | |
447 s->exponent_high_sizes[k]); | |
448 for(j=0;j<s->exponent_high_sizes[k];j++) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
449 tprintf(" %d", s->exponent_high_bands[k][j]); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
450 tprintf("\n"); |
783 | 451 #endif |
452 } | |
453 } | |
454 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
455 #ifdef TRACE |
783 | 456 { |
457 int i, j; | |
458 for(i = 0; i < s->nb_block_sizes; i++) { | |
2967 | 459 tprintf("%5d: n=%2d:", |
460 s->frame_len >> i, | |
783 | 461 s->exponent_sizes[i]); |
462 for(j=0;j<s->exponent_sizes[i];j++) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
463 tprintf(" %d", s->exponent_bands[i][j]); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
464 tprintf("\n"); |
783 | 465 } |
466 } | |
467 #endif | |
468 | |
469 /* init MDCT */ | |
470 for(i = 0; i < s->nb_block_sizes; i++) | |
795
55add0e7eafb
avoid name clash - fixed again block size selection
bellard
parents:
785
diff
changeset
|
471 ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1); |
2967 | 472 |
783 | 473 /* init MDCT windows : simple sinus window */ |
474 for(i = 0; i < s->nb_block_sizes; i++) { | |
475 int n, j; | |
476 float alpha; | |
477 n = 1 << (s->frame_len_bits - i); | |
478 window = av_malloc(sizeof(float) * n); | |
479 alpha = M_PI / (2.0 * n); | |
480 for(j=0;j<n;j++) { | |
481 window[n - j - 1] = sin((j + 0.5) * alpha); | |
482 } | |
483 s->windows[i] = window; | |
484 } | |
485 | |
486 s->reset_block_lengths = 1; | |
2967 | 487 |
783 | 488 if (s->use_noise_coding) { |
489 | |
490 /* init the noise generator */ | |
491 if (s->use_exp_vlc) | |
492 s->noise_mult = 0.02; | |
493 else | |
494 s->noise_mult = 0.04; | |
2967 | 495 |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
496 #ifdef TRACE |
783 | 497 for(i=0;i<NOISE_TAB_SIZE;i++) |
498 s->noise_table[i] = 1.0 * s->noise_mult; | |
499 #else | |
500 { | |
501 unsigned int seed; | |
502 float norm; | |
503 seed = 1; | |
504 norm = (1.0 / (float)(1LL << 31)) * sqrt(3) * s->noise_mult; | |
505 for(i=0;i<NOISE_TAB_SIZE;i++) { | |
506 seed = seed * 314159 + 1; | |
507 s->noise_table[i] = (float)((int)seed) * norm; | |
508 } | |
509 } | |
510 #endif | |
3113 | 511 init_vlc(&s->hgain_vlc, HGAINVLCBITS, sizeof(hgain_huffbits), |
783 | 512 hgain_huffbits, 1, 1, |
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2180
diff
changeset
|
513 hgain_huffcodes, 2, 2, 0); |
783 | 514 } |
515 | |
516 if (s->use_exp_vlc) { | |
3113 | 517 init_vlc(&s->exp_vlc, EXPVLCBITS, sizeof(scale_huffbits), |
783 | 518 scale_huffbits, 1, 1, |
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2180
diff
changeset
|
519 scale_huffcodes, 4, 4, 0); |
783 | 520 } else { |
521 wma_lsp_to_curve_init(s, s->frame_len); | |
522 } | |
523 | |
524 /* choose the VLC tables for the coefficients */ | |
525 coef_vlc_table = 2; | |
526 if (s->sample_rate >= 32000) { | |
527 if (bps1 < 0.72) | |
528 coef_vlc_table = 0; | |
529 else if (bps1 < 1.16) | |
530 coef_vlc_table = 1; | |
531 } | |
532 | |
533 init_coef_vlc(&s->coef_vlc[0], &s->run_table[0], &s->level_table[0], | |
534 &coef_vlcs[coef_vlc_table * 2]); | |
535 init_coef_vlc(&s->coef_vlc[1], &s->run_table[1], &s->level_table[1], | |
536 &coef_vlcs[coef_vlc_table * 2 + 1]); | |
537 return 0; | |
538 } | |
539 | |
540 /* interpolate values for a bigger or smaller block. The block must | |
541 have multiple sizes */ | |
542 static void interpolate_array(float *scale, int old_size, int new_size) | |
543 { | |
544 int i, j, jincr, k; | |
545 float v; | |
546 | |
547 if (new_size > old_size) { | |
548 jincr = new_size / old_size; | |
549 j = new_size; | |
550 for(i = old_size - 1; i >=0; i--) { | |
551 v = scale[i]; | |
552 k = jincr; | |
553 do { | |
554 scale[--j] = v; | |
555 } while (--k); | |
556 } | |
557 } else if (new_size < old_size) { | |
558 j = 0; | |
559 jincr = old_size / new_size; | |
560 for(i = 0; i < new_size; i++) { | |
561 scale[i] = scale[j]; | |
562 j += jincr; | |
563 } | |
564 } | |
565 } | |
566 | |
567 /* compute x^-0.25 with an exponent and mantissa table. We use linear | |
568 interpolation to reduce the mantissa table size at a small speed | |
569 expense (linear interpolation approximately doubles the number of | |
570 bits of precision). */ | |
571 static inline float pow_m1_4(WMADecodeContext *s, float x) | |
572 { | |
573 union { | |
574 float f; | |
575 unsigned int v; | |
576 } u, t; | |
577 unsigned int e, m; | |
578 float a, b; | |
579 | |
580 u.f = x; | |
581 e = u.v >> 23; | |
582 m = (u.v >> (23 - LSP_POW_BITS)) & ((1 << LSP_POW_BITS) - 1); | |
583 /* build interpolation scale: 1 <= t < 2. */ | |
584 t.v = ((u.v << LSP_POW_BITS) & ((1 << 23) - 1)) | (127 << 23); | |
585 a = s->lsp_pow_m_table1[m]; | |
586 b = s->lsp_pow_m_table2[m]; | |
587 return s->lsp_pow_e_table[e] * (a + b * t.f); | |
588 } | |
589 | |
590 static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len) | |
2967 | 591 { |
783 | 592 float wdel, a, b; |
593 int i, e, m; | |
594 | |
595 wdel = M_PI / frame_len; | |
596 for(i=0;i<frame_len;i++) | |
597 s->lsp_cos_table[i] = 2.0f * cos(wdel * i); | |
598 | |
599 /* tables for x^-0.25 computation */ | |
600 for(i=0;i<256;i++) { | |
601 e = i - 126; | |
602 s->lsp_pow_e_table[i] = pow(2.0, e * -0.25); | |
603 } | |
604 | |
605 /* NOTE: these two tables are needed to avoid two operations in | |
606 pow_m1_4 */ | |
607 b = 1.0; | |
608 for(i=(1 << LSP_POW_BITS) - 1;i>=0;i--) { | |
609 m = (1 << LSP_POW_BITS) + i; | |
610 a = (float)m * (0.5 / (1 << LSP_POW_BITS)); | |
611 a = pow(a, -0.25); | |
612 s->lsp_pow_m_table1[i] = 2 * a - b; | |
613 s->lsp_pow_m_table2[i] = b - a; | |
614 b = a; | |
615 } | |
616 #if 0 | |
617 for(i=1;i<20;i++) { | |
618 float v, r1, r2; | |
619 v = 5.0 / i; | |
620 r1 = pow_m1_4(s, v); | |
621 r2 = pow(v,-0.25); | |
622 printf("%f^-0.25=%f e=%f\n", v, r1, r2 - r1); | |
623 } | |
624 #endif | |
625 } | |
626 | |
627 /* NOTE: We use the same code as Vorbis here */ | |
628 /* XXX: optimize it further with SSE/3Dnow */ | |
2967 | 629 static void wma_lsp_to_curve(WMADecodeContext *s, |
630 float *out, float *val_max_ptr, | |
783 | 631 int n, float *lsp) |
632 { | |
633 int i, j; | |
634 float p, q, w, v, val_max; | |
635 | |
636 val_max = 0; | |
637 for(i=0;i<n;i++) { | |
638 p = 0.5f; | |
639 q = 0.5f; | |
640 w = s->lsp_cos_table[i]; | |
641 for(j=1;j<NB_LSP_COEFS;j+=2){ | |
642 q *= w - lsp[j - 1]; | |
643 p *= w - lsp[j]; | |
644 } | |
645 p *= p * (2.0f - w); | |
646 q *= q * (2.0f + w); | |
647 v = p + q; | |
648 v = pow_m1_4(s, v); | |
649 if (v > val_max) | |
650 val_max = v; | |
651 out[i] = v; | |
652 } | |
653 *val_max_ptr = val_max; | |
654 } | |
655 | |
656 /* decode exponents coded with LSP coefficients (same idea as Vorbis) */ | |
657 static void decode_exp_lsp(WMADecodeContext *s, int ch) | |
658 { | |
659 float lsp_coefs[NB_LSP_COEFS]; | |
660 int val, i; | |
661 | |
662 for(i = 0; i < NB_LSP_COEFS; i++) { | |
663 if (i == 0 || i >= 8) | |
664 val = get_bits(&s->gb, 3); | |
665 else | |
666 val = get_bits(&s->gb, 4); | |
667 lsp_coefs[i] = lsp_codebook[i][val]; | |
668 } | |
669 | |
670 wma_lsp_to_curve(s, s->exponents[ch], &s->max_exponent[ch], | |
671 s->block_len, lsp_coefs); | |
672 } | |
673 | |
674 /* decode exponents coded with VLC codes */ | |
675 static int decode_exp_vlc(WMADecodeContext *s, int ch) | |
676 { | |
677 int last_exp, n, code; | |
678 const uint16_t *ptr, *band_ptr; | |
679 float v, *q, max_scale, *q_end; | |
2967 | 680 |
783 | 681 band_ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; |
682 ptr = band_ptr; | |
683 q = s->exponents[ch]; | |
684 q_end = q + s->block_len; | |
685 max_scale = 0; | |
686 if (s->version == 1) { | |
687 last_exp = get_bits(&s->gb, 5) + 10; | |
3235 | 688 /* XXX: use a table */ |
689 v = pow(10, last_exp * (1.0 / 16.0)); | |
783 | 690 max_scale = v; |
691 n = *ptr++; | |
692 do { | |
693 *q++ = v; | |
694 } while (--n); | |
695 } | |
696 last_exp = 36; | |
697 while (q < q_end) { | |
3113 | 698 code = get_vlc2(&s->gb, s->exp_vlc.table, EXPVLCBITS, EXPMAX); |
783 | 699 if (code < 0) |
700 return -1; | |
701 /* NOTE: this offset is the same as MPEG4 AAC ! */ | |
702 last_exp += code - 60; | |
3235 | 703 /* XXX: use a table */ |
704 v = pow(10, last_exp * (1.0 / 16.0)); | |
783 | 705 if (v > max_scale) |
706 max_scale = v; | |
707 n = *ptr++; | |
708 do { | |
709 *q++ = v; | |
710 } while (--n); | |
711 } | |
712 s->max_exponent[ch] = max_scale; | |
713 return 0; | |
714 } | |
715 | |
716 /* return 0 if OK. return 1 if last block of frame. return -1 if | |
717 unrecorrable error. */ | |
718 static int wma_decode_block(WMADecodeContext *s) | |
719 { | |
720 int n, v, a, ch, code, bsize; | |
721 int coef_nb_bits, total_gain, parse_exponents; | |
722 int nb_coefs[MAX_CHANNELS]; | |
723 float mdct_norm; | |
724 | |
1343 | 725 #ifdef TRACE |
726 tprintf("***decode_block: %d:%d\n", s->frame_count - 1, s->block_num); | |
727 #endif | |
783 | 728 |
729 /* compute current block length */ | |
730 if (s->use_variable_block_len) { | |
731 n = av_log2(s->nb_block_sizes - 1) + 1; | |
2967 | 732 |
783 | 733 if (s->reset_block_lengths) { |
734 s->reset_block_lengths = 0; | |
735 v = get_bits(&s->gb, n); | |
736 if (v >= s->nb_block_sizes) | |
737 return -1; | |
738 s->prev_block_len_bits = s->frame_len_bits - v; | |
739 v = get_bits(&s->gb, n); | |
740 if (v >= s->nb_block_sizes) | |
741 return -1; | |
742 s->block_len_bits = s->frame_len_bits - v; | |
743 } else { | |
744 /* update block lengths */ | |
745 s->prev_block_len_bits = s->block_len_bits; | |
746 s->block_len_bits = s->next_block_len_bits; | |
747 } | |
748 v = get_bits(&s->gb, n); | |
749 if (v >= s->nb_block_sizes) | |
750 return -1; | |
751 s->next_block_len_bits = s->frame_len_bits - v; | |
752 } else { | |
753 /* fixed block len */ | |
754 s->next_block_len_bits = s->frame_len_bits; | |
755 s->prev_block_len_bits = s->frame_len_bits; | |
756 s->block_len_bits = s->frame_len_bits; | |
757 } | |
758 | |
759 /* now check if the block length is coherent with the frame length */ | |
760 s->block_len = 1 << s->block_len_bits; | |
761 if ((s->block_pos + s->block_len) > s->frame_len) | |
762 return -1; | |
763 | |
764 if (s->nb_channels == 2) { | |
765 s->ms_stereo = get_bits(&s->gb, 1); | |
766 } | |
767 v = 0; | |
768 for(ch = 0; ch < s->nb_channels; ch++) { | |
769 a = get_bits(&s->gb, 1); | |
770 s->channel_coded[ch] = a; | |
771 v |= a; | |
772 } | |
773 /* if no channel coded, no need to go further */ | |
774 /* XXX: fix potential framing problems */ | |
775 if (!v) | |
776 goto next; | |
777 | |
778 bsize = s->frame_len_bits - s->block_len_bits; | |
779 | |
780 /* read total gain and extract corresponding number of bits for | |
781 coef escape coding */ | |
782 total_gain = 1; | |
783 for(;;) { | |
784 a = get_bits(&s->gb, 7); | |
785 total_gain += a; | |
786 if (a != 127) | |
787 break; | |
788 } | |
2967 | 789 |
783 | 790 if (total_gain < 15) |
791 coef_nb_bits = 13; | |
792 else if (total_gain < 32) | |
793 coef_nb_bits = 12; | |
794 else if (total_gain < 40) | |
795 coef_nb_bits = 11; | |
796 else if (total_gain < 45) | |
797 coef_nb_bits = 10; | |
798 else | |
799 coef_nb_bits = 9; | |
800 | |
801 /* compute number of coefficients */ | |
802 n = s->coefs_end[bsize] - s->coefs_start; | |
803 for(ch = 0; ch < s->nb_channels; ch++) | |
804 nb_coefs[ch] = n; | |
805 | |
806 /* complex coding */ | |
807 if (s->use_noise_coding) { | |
808 | |
809 for(ch = 0; ch < s->nb_channels; ch++) { | |
810 if (s->channel_coded[ch]) { | |
811 int i, n, a; | |
812 n = s->exponent_high_sizes[bsize]; | |
813 for(i=0;i<n;i++) { | |
814 a = get_bits(&s->gb, 1); | |
815 s->high_band_coded[ch][i] = a; | |
816 /* if noise coding, the coefficients are not transmitted */ | |
817 if (a) | |
818 nb_coefs[ch] -= s->exponent_high_bands[bsize][i]; | |
819 } | |
820 } | |
821 } | |
822 for(ch = 0; ch < s->nb_channels; ch++) { | |
823 if (s->channel_coded[ch]) { | |
824 int i, n, val, code; | |
825 | |
826 n = s->exponent_high_sizes[bsize]; | |
827 val = (int)0x80000000; | |
828 for(i=0;i<n;i++) { | |
829 if (s->high_band_coded[ch][i]) { | |
830 if (val == (int)0x80000000) { | |
831 val = get_bits(&s->gb, 7) - 19; | |
832 } else { | |
3113 | 833 code = get_vlc2(&s->gb, s->hgain_vlc.table, HGAINVLCBITS, HGAINMAX); |
783 | 834 if (code < 0) |
835 return -1; | |
836 val += code - 18; | |
837 } | |
838 s->high_band_values[ch][i] = val; | |
839 } | |
840 } | |
841 } | |
842 } | |
843 } | |
2967 | 844 |
783 | 845 /* exposant can be interpolated in short blocks. */ |
846 parse_exponents = 1; | |
847 if (s->block_len_bits != s->frame_len_bits) { | |
848 parse_exponents = get_bits(&s->gb, 1); | |
849 } | |
2967 | 850 |
783 | 851 if (parse_exponents) { |
852 for(ch = 0; ch < s->nb_channels; ch++) { | |
853 if (s->channel_coded[ch]) { | |
854 if (s->use_exp_vlc) { | |
855 if (decode_exp_vlc(s, ch) < 0) | |
856 return -1; | |
857 } else { | |
858 decode_exp_lsp(s, ch); | |
859 } | |
860 } | |
861 } | |
862 } else { | |
863 for(ch = 0; ch < s->nb_channels; ch++) { | |
864 if (s->channel_coded[ch]) { | |
2967 | 865 interpolate_array(s->exponents[ch], 1 << s->prev_block_len_bits, |
783 | 866 s->block_len); |
867 } | |
868 } | |
869 } | |
870 | |
871 /* parse spectral coefficients : just RLE encoding */ | |
872 for(ch = 0; ch < s->nb_channels; ch++) { | |
873 if (s->channel_coded[ch]) { | |
874 VLC *coef_vlc; | |
875 int level, run, sign, tindex; | |
876 int16_t *ptr, *eptr; | |
3776 | 877 const uint16_t *level_table, *run_table; |
783 | 878 |
879 /* special VLC tables are used for ms stereo because | |
880 there is potentially less energy there */ | |
881 tindex = (ch == 1 && s->ms_stereo); | |
882 coef_vlc = &s->coef_vlc[tindex]; | |
883 run_table = s->run_table[tindex]; | |
884 level_table = s->level_table[tindex]; | |
885 /* XXX: optimize */ | |
886 ptr = &s->coefs1[ch][0]; | |
887 eptr = ptr + nb_coefs[ch]; | |
888 memset(ptr, 0, s->block_len * sizeof(int16_t)); | |
889 for(;;) { | |
3113 | 890 code = get_vlc2(&s->gb, coef_vlc->table, VLCBITS, VLCMAX); |
783 | 891 if (code < 0) |
892 return -1; | |
893 if (code == 1) { | |
894 /* EOB */ | |
895 break; | |
896 } else if (code == 0) { | |
897 /* escape */ | |
898 level = get_bits(&s->gb, coef_nb_bits); | |
899 /* NOTE: this is rather suboptimal. reading | |
900 block_len_bits would be better */ | |
901 run = get_bits(&s->gb, s->frame_len_bits); | |
902 } else { | |
903 /* normal code */ | |
904 run = run_table[code]; | |
905 level = level_table[code]; | |
906 } | |
907 sign = get_bits(&s->gb, 1); | |
908 if (!sign) | |
909 level = -level; | |
910 ptr += run; | |
911 if (ptr >= eptr) | |
3361 | 912 { |
913 av_log(NULL, AV_LOG_ERROR, "overflow in spectral RLE, ignoring\n"); | |
914 break; | |
915 } | |
783 | 916 *ptr++ = level; |
917 /* NOTE: EOB can be omitted */ | |
918 if (ptr >= eptr) | |
919 break; | |
920 } | |
921 } | |
922 if (s->version == 1 && s->nb_channels >= 2) { | |
923 align_get_bits(&s->gb); | |
924 } | |
925 } | |
2967 | 926 |
783 | 927 /* normalize */ |
928 { | |
929 int n4 = s->block_len / 2; | |
930 mdct_norm = 1.0 / (float)n4; | |
931 if (s->version == 1) { | |
932 mdct_norm *= sqrt(n4); | |
933 } | |
934 } | |
935 | |
936 /* finally compute the MDCT coefficients */ | |
937 for(ch = 0; ch < s->nb_channels; ch++) { | |
938 if (s->channel_coded[ch]) { | |
939 int16_t *coefs1; | |
940 float *coefs, *exponents, mult, mult1, noise, *exp_ptr; | |
941 int i, j, n, n1, last_high_band; | |
942 float exp_power[HIGH_BAND_MAX_SIZE]; | |
943 | |
944 coefs1 = s->coefs1[ch]; | |
945 exponents = s->exponents[ch]; | |
3235 | 946 mult = pow(10, total_gain * 0.05) / s->max_exponent[ch]; |
783 | 947 mult *= mdct_norm; |
948 coefs = s->coefs[ch]; | |
949 if (s->use_noise_coding) { | |
950 mult1 = mult; | |
951 /* very low freqs : noise */ | |
952 for(i = 0;i < s->coefs_start; i++) { | |
953 *coefs++ = s->noise_table[s->noise_index] * (*exponents++) * mult1; | |
954 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
955 } | |
2967 | 956 |
783 | 957 n1 = s->exponent_high_sizes[bsize]; |
958 | |
959 /* compute power of high bands */ | |
2967 | 960 exp_ptr = exponents + |
961 s->high_band_start[bsize] - | |
783 | 962 s->coefs_start; |
963 last_high_band = 0; /* avoid warning */ | |
964 for(j=0;j<n1;j++) { | |
2967 | 965 n = s->exponent_high_bands[s->frame_len_bits - |
783 | 966 s->block_len_bits][j]; |
967 if (s->high_band_coded[ch][j]) { | |
968 float e2, v; | |
969 e2 = 0; | |
970 for(i = 0;i < n; i++) { | |
971 v = exp_ptr[i]; | |
972 e2 += v * v; | |
973 } | |
974 exp_power[j] = e2 / n; | |
975 last_high_band = j; | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
976 tprintf("%d: power=%f (%d)\n", j, exp_power[j], n); |
783 | 977 } |
978 exp_ptr += n; | |
979 } | |
980 | |
981 /* main freqs and high freqs */ | |
982 for(j=-1;j<n1;j++) { | |
983 if (j < 0) { | |
2967 | 984 n = s->high_band_start[bsize] - |
783 | 985 s->coefs_start; |
986 } else { | |
2967 | 987 n = s->exponent_high_bands[s->frame_len_bits - |
783 | 988 s->block_len_bits][j]; |
989 } | |
990 if (j >= 0 && s->high_band_coded[ch][j]) { | |
991 /* use noise with specified power */ | |
992 mult1 = sqrt(exp_power[j] / exp_power[last_high_band]); | |
3235 | 993 /* XXX: use a table */ |
994 mult1 = mult1 * pow(10, s->high_band_values[ch][j] * 0.05); | |
783 | 995 mult1 = mult1 / (s->max_exponent[ch] * s->noise_mult); |
996 mult1 *= mdct_norm; | |
997 for(i = 0;i < n; i++) { | |
998 noise = s->noise_table[s->noise_index]; | |
999 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
1000 *coefs++ = (*exponents++) * noise * mult1; | |
1001 } | |
1002 } else { | |
1003 /* coded values + small noise */ | |
1004 for(i = 0;i < n; i++) { | |
1005 noise = s->noise_table[s->noise_index]; | |
1006 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
1007 *coefs++ = ((*coefs1++) + noise) * (*exponents++) * mult; | |
1008 } | |
1009 } | |
1010 } | |
1011 | |
1012 /* very high freqs : noise */ | |
1013 n = s->block_len - s->coefs_end[bsize]; | |
1014 mult1 = mult * exponents[-1]; | |
1015 for(i = 0; i < n; i++) { | |
1016 *coefs++ = s->noise_table[s->noise_index] * mult1; | |
1017 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
1018 } | |
1019 } else { | |
1020 /* XXX: optimize more */ | |
1021 for(i = 0;i < s->coefs_start; i++) | |
1022 *coefs++ = 0.0; | |
1023 n = nb_coefs[ch]; | |
1024 for(i = 0;i < n; i++) { | |
1025 *coefs++ = coefs1[i] * exponents[i] * mult; | |
1026 } | |
1027 n = s->block_len - s->coefs_end[bsize]; | |
1028 for(i = 0;i < n; i++) | |
1029 *coefs++ = 0.0; | |
1030 } | |
1031 } | |
1032 } | |
1033 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1034 #ifdef TRACE |
783 | 1035 for(ch = 0; ch < s->nb_channels; ch++) { |
1036 if (s->channel_coded[ch]) { | |
1037 dump_floats("exponents", 3, s->exponents[ch], s->block_len); | |
1038 dump_floats("coefs", 1, s->coefs[ch], s->block_len); | |
1039 } | |
1040 } | |
1041 #endif | |
2967 | 1042 |
783 | 1043 if (s->ms_stereo && s->channel_coded[1]) { |
1044 float a, b; | |
1045 int i; | |
1046 | |
1047 /* nominal case for ms stereo: we do it before mdct */ | |
1048 /* no need to optimize this case because it should almost | |
1049 never happen */ | |
1050 if (!s->channel_coded[0]) { | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1051 tprintf("rare ms-stereo case happened\n"); |
783 | 1052 memset(s->coefs[0], 0, sizeof(float) * s->block_len); |
1053 s->channel_coded[0] = 1; | |
1054 } | |
2967 | 1055 |
783 | 1056 for(i = 0; i < s->block_len; i++) { |
1057 a = s->coefs[0][i]; | |
1058 b = s->coefs[1][i]; | |
1059 s->coefs[0][i] = a + b; | |
1060 s->coefs[1][i] = a - b; | |
1061 } | |
1062 } | |
1063 | |
1064 /* build the window : we ensure that when the windows overlap | |
1065 their squared sum is always 1 (MDCT reconstruction rule) */ | |
1066 /* XXX: merge with output */ | |
1067 { | |
1068 int i, next_block_len, block_len, prev_block_len, n; | |
1069 float *wptr; | |
1070 | |
1071 block_len = s->block_len; | |
1072 prev_block_len = 1 << s->prev_block_len_bits; | |
1073 next_block_len = 1 << s->next_block_len_bits; | |
1074 | |
1075 /* right part */ | |
4301
b43bd0c56eaa
Bug fix for crashes when SSE is used on unaligned arrays.
banan
parents:
3947
diff
changeset
|
1076 wptr = s->window + block_len; |
783 | 1077 if (block_len <= next_block_len) { |
1078 for(i=0;i<block_len;i++) | |
1079 *wptr++ = s->windows[bsize][i]; | |
1080 } else { | |
1081 /* overlap */ | |
1082 n = (block_len / 2) - (next_block_len / 2); | |
1083 for(i=0;i<n;i++) | |
1084 *wptr++ = 1.0; | |
1085 for(i=0;i<next_block_len;i++) | |
1086 *wptr++ = s->windows[s->frame_len_bits - s->next_block_len_bits][i]; | |
1087 for(i=0;i<n;i++) | |
1088 *wptr++ = 0.0; | |
1089 } | |
1090 | |
1091 /* left part */ | |
4301
b43bd0c56eaa
Bug fix for crashes when SSE is used on unaligned arrays.
banan
parents:
3947
diff
changeset
|
1092 wptr = s->window + block_len; |
783 | 1093 if (block_len <= prev_block_len) { |
1094 for(i=0;i<block_len;i++) | |
1095 *--wptr = s->windows[bsize][i]; | |
1096 } else { | |
1097 /* overlap */ | |
1098 n = (block_len / 2) - (prev_block_len / 2); | |
1099 for(i=0;i<n;i++) | |
1100 *--wptr = 1.0; | |
1101 for(i=0;i<prev_block_len;i++) | |
1102 *--wptr = s->windows[s->frame_len_bits - s->prev_block_len_bits][i]; | |
1103 for(i=0;i<n;i++) | |
1104 *--wptr = 0.0; | |
1105 } | |
1106 } | |
1107 | |
2967 | 1108 |
783 | 1109 for(ch = 0; ch < s->nb_channels; ch++) { |
1110 if (s->channel_coded[ch]) { | |
1111 float *ptr; | |
3592
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
1112 int n4, index, n; |
783 | 1113 |
1114 n = s->block_len; | |
1115 n4 = s->block_len / 2; | |
3555 | 1116 s->mdct_ctx[bsize].fft.imdct_calc(&s->mdct_ctx[bsize], |
4301
b43bd0c56eaa
Bug fix for crashes when SSE is used on unaligned arrays.
banan
parents:
3947
diff
changeset
|
1117 s->output, s->coefs[ch], s->mdct_tmp); |
783 | 1118 |
1119 /* XXX: optimize all that by build the window and | |
1120 multipying/adding at the same time */ | |
1121 | |
3592
6a358dccf2ab
SIMD vector optimizations. 3% faster overall decoding.
banan
parents:
3555
diff
changeset
|
1122 /* multiply by the window and add in the frame */ |
783 | 1123 index = (s->frame_len / 2) + s->block_pos - n4; |
1124 ptr = &s->frame_out[ch][index]; | |
4301
b43bd0c56eaa
Bug fix for crashes when SSE is used on unaligned arrays.
banan
parents:
3947
diff
changeset
|
1125 s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1); |
783 | 1126 |
1127 /* specific fast case for ms-stereo : add to second | |
1128 channel if it is not coded */ | |
1129 if (s->ms_stereo && !s->channel_coded[1]) { | |
1130 ptr = &s->frame_out[1][index]; | |
4301
b43bd0c56eaa
Bug fix for crashes when SSE is used on unaligned arrays.
banan
parents:
3947
diff
changeset
|
1131 s->dsp.vector_fmul_add_add(ptr,s->window,s->output,ptr,0,2*n,1); |
783 | 1132 } |
1133 } | |
1134 } | |
1135 next: | |
1136 /* update block number */ | |
1137 s->block_num++; | |
1138 s->block_pos += s->block_len; | |
1139 if (s->block_pos >= s->frame_len) | |
1140 return 1; | |
1141 else | |
1142 return 0; | |
1143 } | |
1144 | |
1145 /* decode a frame of frame_len samples */ | |
1146 static int wma_decode_frame(WMADecodeContext *s, int16_t *samples) | |
1147 { | |
1148 int ret, i, n, a, ch, incr; | |
1149 int16_t *ptr; | |
1150 float *iptr; | |
1151 | |
1343 | 1152 #ifdef TRACE |
1153 tprintf("***decode_frame: %d size=%d\n", s->frame_count++, s->frame_len); | |
1154 #endif | |
783 | 1155 |
1156 /* read each block */ | |
1157 s->block_num = 0; | |
1158 s->block_pos = 0; | |
1159 for(;;) { | |
1160 ret = wma_decode_block(s); | |
2967 | 1161 if (ret < 0) |
783 | 1162 return -1; |
1163 if (ret) | |
1164 break; | |
1165 } | |
1166 | |
1167 /* convert frame to integer */ | |
1168 n = s->frame_len; | |
1169 incr = s->nb_channels; | |
1170 for(ch = 0; ch < s->nb_channels; ch++) { | |
1171 ptr = samples + ch; | |
1172 iptr = s->frame_out[ch]; | |
1173 | |
1174 for(i=0;i<n;i++) { | |
797 | 1175 a = lrintf(*iptr++); |
783 | 1176 if (a > 32767) |
1177 a = 32767; | |
1178 else if (a < -32768) | |
1179 a = -32768; | |
1180 *ptr = a; | |
1181 ptr += incr; | |
1182 } | |
1183 /* prepare for next block */ | |
1184 memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len], | |
1185 s->frame_len * sizeof(float)); | |
1186 /* XXX: suppress this */ | |
2967 | 1187 memset(&s->frame_out[ch][s->frame_len], 0, |
783 | 1188 s->frame_len * sizeof(float)); |
1189 } | |
1190 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1191 #ifdef TRACE |
783 | 1192 dump_shorts("samples", samples, n * s->nb_channels); |
1193 #endif | |
1194 return 0; | |
1195 } | |
1196 | |
2967 | 1197 static int wma_decode_superframe(AVCodecContext *avctx, |
783 | 1198 void *data, int *data_size, |
1064 | 1199 uint8_t *buf, int buf_size) |
783 | 1200 { |
1201 WMADecodeContext *s = avctx->priv_data; | |
1202 int nb_frames, bit_offset, i, pos, len; | |
1203 uint8_t *q; | |
1204 int16_t *samples; | |
2967 | 1205 |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1206 tprintf("***decode_superframe:\n"); |
783 | 1207 |
1750 | 1208 if(buf_size==0){ |
1209 s->last_superframe_len = 0; | |
1210 return 0; | |
1211 } | |
2967 | 1212 |
783 | 1213 samples = data; |
1214 | |
1025
1f9afd8b9131
GetBitContext.size is allways multiplied by 8 -> use size_in_bits to avoid useless *8 in a few inner loops
michaelni
parents:
972
diff
changeset
|
1215 init_get_bits(&s->gb, buf, buf_size*8); |
2967 | 1216 |
783 | 1217 if (s->use_bit_reservoir) { |
1218 /* read super frame header */ | |
1219 get_bits(&s->gb, 4); /* super frame index */ | |
1220 nb_frames = get_bits(&s->gb, 4) - 1; | |
1221 | |
1222 bit_offset = get_bits(&s->gb, s->byte_offset_bits + 3); | |
1223 | |
1224 if (s->last_superframe_len > 0) { | |
1225 // printf("skip=%d\n", s->last_bitoffset); | |
1226 /* add bit_offset bits to last frame */ | |
2967 | 1227 if ((s->last_superframe_len + ((bit_offset + 7) >> 3)) > |
783 | 1228 MAX_CODED_SUPERFRAME_SIZE) |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1229 goto fail; |
783 | 1230 q = s->last_superframe + s->last_superframe_len; |
1231 len = bit_offset; | |
3362
c43fcf831f7c
Do not read full byte when less than 8 bits are still to be read.
reimar
parents:
3361
diff
changeset
|
1232 while (len > 7) { |
783 | 1233 *q++ = (get_bits)(&s->gb, 8); |
1234 len -= 8; | |
1235 } | |
1236 if (len > 0) { | |
1237 *q++ = (get_bits)(&s->gb, len) << (8 - len); | |
1238 } | |
2967 | 1239 |
783 | 1240 /* XXX: bit_offset bits into last frame */ |
1025
1f9afd8b9131
GetBitContext.size is allways multiplied by 8 -> use size_in_bits to avoid useless *8 in a few inner loops
michaelni
parents:
972
diff
changeset
|
1241 init_get_bits(&s->gb, s->last_superframe, MAX_CODED_SUPERFRAME_SIZE*8); |
783 | 1242 /* skip unused bits */ |
1243 if (s->last_bitoffset > 0) | |
1244 skip_bits(&s->gb, s->last_bitoffset); | |
1245 /* this frame is stored in the last superframe and in the | |
1246 current one */ | |
1247 if (wma_decode_frame(s, samples) < 0) | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1248 goto fail; |
783 | 1249 samples += s->nb_channels * s->frame_len; |
1250 } | |
1251 | |
1252 /* read each frame starting from bit_offset */ | |
1253 pos = bit_offset + 4 + 4 + s->byte_offset_bits + 3; | |
1025
1f9afd8b9131
GetBitContext.size is allways multiplied by 8 -> use size_in_bits to avoid useless *8 in a few inner loops
michaelni
parents:
972
diff
changeset
|
1254 init_get_bits(&s->gb, buf + (pos >> 3), (MAX_CODED_SUPERFRAME_SIZE - (pos >> 3))*8); |
783 | 1255 len = pos & 7; |
1256 if (len > 0) | |
1257 skip_bits(&s->gb, len); | |
2967 | 1258 |
783 | 1259 s->reset_block_lengths = 1; |
1260 for(i=0;i<nb_frames;i++) { | |
1261 if (wma_decode_frame(s, samples) < 0) | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1262 goto fail; |
783 | 1263 samples += s->nb_channels * s->frame_len; |
1264 } | |
1265 | |
1266 /* we copy the end of the frame in the last frame buffer */ | |
1267 pos = get_bits_count(&s->gb) + ((bit_offset + 4 + 4 + s->byte_offset_bits + 3) & ~7); | |
1268 s->last_bitoffset = pos & 7; | |
1269 pos >>= 3; | |
1270 len = buf_size - pos; | |
819 | 1271 if (len > MAX_CODED_SUPERFRAME_SIZE || len < 0) { |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1272 goto fail; |
783 | 1273 } |
1274 s->last_superframe_len = len; | |
1275 memcpy(s->last_superframe, buf + pos, len); | |
1276 } else { | |
1277 /* single frame decode */ | |
1278 if (wma_decode_frame(s, samples) < 0) | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1279 goto fail; |
783 | 1280 samples += s->nb_channels * s->frame_len; |
1281 } | |
1282 *data_size = (int8_t *)samples - (int8_t *)data; | |
1283 return s->block_align; | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1284 fail: |
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1285 /* when error, we reset the bit reservoir */ |
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1286 s->last_superframe_len = 0; |
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1287 return -1; |
783 | 1288 } |
1289 | |
1290 static int wma_decode_end(AVCodecContext *avctx) | |
1291 { | |
1292 WMADecodeContext *s = avctx->priv_data; | |
1293 int i; | |
1294 | |
1295 for(i = 0; i < s->nb_block_sizes; i++) | |
795
55add0e7eafb
avoid name clash - fixed again block size selection
bellard
parents:
785
diff
changeset
|
1296 ff_mdct_end(&s->mdct_ctx[i]); |
783 | 1297 for(i = 0; i < s->nb_block_sizes; i++) |
1298 av_free(s->windows[i]); | |
1299 | |
1300 if (s->use_exp_vlc) { | |
1301 free_vlc(&s->exp_vlc); | |
1302 } | |
1303 if (s->use_noise_coding) { | |
1304 free_vlc(&s->hgain_vlc); | |
1305 } | |
1306 for(i = 0;i < 2; i++) { | |
1307 free_vlc(&s->coef_vlc[i]); | |
1308 av_free(s->run_table[i]); | |
1309 av_free(s->level_table[i]); | |
1310 } | |
2967 | 1311 |
783 | 1312 return 0; |
1313 } | |
1314 | |
1315 AVCodec wmav1_decoder = | |
1316 { | |
1317 "wmav1", | |
1318 CODEC_TYPE_AUDIO, | |
1319 CODEC_ID_WMAV1, | |
1320 sizeof(WMADecodeContext), | |
1321 wma_decode_init, | |
1322 NULL, | |
1323 wma_decode_end, | |
1324 wma_decode_superframe, | |
1325 }; | |
1326 | |
1327 AVCodec wmav2_decoder = | |
1328 { | |
1329 "wmav2", | |
1330 CODEC_TYPE_AUDIO, | |
1331 CODEC_ID_WMAV2, | |
1332 sizeof(WMADecodeContext), | |
1333 wma_decode_init, | |
1334 NULL, | |
1335 wma_decode_end, | |
1336 wma_decode_superframe, | |
1337 }; |