Mercurial > libavcodec.hg
annotate wmadec.c @ 3198:6b9f0c4fbdbe libavcodec
First part of a series of speed-enchancing patches.
This one sets up a snow.h and makes snow use the dsputil function pointer
framework to access the three functions that will be implemented in asm
in the other parts of the patchset.
Patch by Robert Edele < yartrebo AH earthlink POIS net>
Original thread:
Subject: [Ffmpeg-devel] [PATCH] Snow mmx+sse2 asm optimizations
Date: Sun, 05 Feb 2006 12:47:14 -0500
author | gpoirier |
---|---|
date | Thu, 16 Mar 2006 19:18:18 +0000 |
parents | 10cda832bd0f |
children | d3c05c7dabcd |
rev | line source |
---|---|
783 | 1 /* |
2 * WMA compatible decoder | |
3 * Copyright (c) 2002 The FFmpeg Project. | |
4 * | |
5 * This library is free software; you can redistribute it and/or | |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
9 * | |
10 * This library is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 * Lesser General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU Lesser General Public | |
16 * License along with this library; if not, write to the Free Software | |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
3022
diff
changeset
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
783 | 18 */ |
1106 | 19 |
20 /** | |
21 * @file wmadec.c | |
22 * WMA compatible decoder. | |
1967
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
23 * This decoder handles Microsoft Windows Media Audio data, versions 1 & 2. |
2967 | 24 * WMA v1 is identified by audio format 0x160 in Microsoft media files |
1967
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
25 * (ASF/AVI/WAV). WMA v2 is identified by audio format 0x161. |
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
26 * |
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
27 * To use this decoder, a calling application must supply the extra data |
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
28 * bytes provided with the WMA data. These are the extra, codec-specific |
2967 | 29 * bytes at the end of a WAVEFORMATEX data structure. Transmit these bytes |
30 * to the decoder using the extradata[_size] fields in AVCodecContext. There | |
1967
2b0fc6b25ab8
add the minimal documentation to make this decoder useful
melanson
parents:
1750
diff
changeset
|
31 * should be 4 extra bytes for v1 data and 6 extra bytes for v2 data. |
1106 | 32 */ |
33 | |
783 | 34 #include "avcodec.h" |
2398
582e635cfa08
common.c -> bitstream.c (and the single non bitstream func -> utils.c)
michael
parents:
2370
diff
changeset
|
35 #include "bitstream.h" |
783 | 36 #include "dsputil.h" |
37 | |
38 /* size of blocks */ | |
39 #define BLOCK_MIN_BITS 7 | |
40 #define BLOCK_MAX_BITS 11 | |
41 #define BLOCK_MAX_SIZE (1 << BLOCK_MAX_BITS) | |
42 | |
43 #define BLOCK_NB_SIZES (BLOCK_MAX_BITS - BLOCK_MIN_BITS + 1) | |
44 | |
45 /* XXX: find exact max size */ | |
46 #define HIGH_BAND_MAX_SIZE 16 | |
47 | |
48 #define NB_LSP_COEFS 10 | |
49 | |
817 | 50 /* XXX: is it a suitable value ? */ |
2775
f3cdd51c9e16
WMA MAX_CODED_SUPERFRAME_SIZE too small patch by (Mark Weaver: mark-clist, npsl co uk)
michael
parents:
2398
diff
changeset
|
51 #define MAX_CODED_SUPERFRAME_SIZE 16384 |
783 | 52 |
53 #define MAX_CHANNELS 2 | |
54 | |
55 #define NOISE_TAB_SIZE 8192 | |
56 | |
57 #define LSP_POW_BITS 7 | |
58 | |
3022 | 59 #define VLCBITS 9 |
3113 | 60 #define VLCMAX ((22+VLCBITS-1)/VLCBITS) |
61 | |
62 #define EXPVLCBITS 8 | |
63 #define EXPMAX ((19+EXPVLCBITS-1)/EXPVLCBITS) | |
64 | |
65 #define HGAINVLCBITS 9 | |
66 #define HGAINMAX ((13+HGAINVLCBITS-1)/HGAINVLCBITS) | |
3022 | 67 |
783 | 68 typedef struct WMADecodeContext { |
69 GetBitContext gb; | |
70 int sample_rate; | |
71 int nb_channels; | |
72 int bit_rate; | |
73 int version; /* 1 = 0x160 (WMAV1), 2 = 0x161 (WMAV2) */ | |
74 int block_align; | |
75 int use_bit_reservoir; | |
76 int use_variable_block_len; | |
77 int use_exp_vlc; /* exponent coding: 0 = lsp, 1 = vlc + delta */ | |
78 int use_noise_coding; /* true if perceptual noise is added */ | |
79 int byte_offset_bits; | |
80 VLC exp_vlc; | |
81 int exponent_sizes[BLOCK_NB_SIZES]; | |
82 uint16_t exponent_bands[BLOCK_NB_SIZES][25]; | |
83 int high_band_start[BLOCK_NB_SIZES]; /* index of first coef in high band */ | |
84 int coefs_start; /* first coded coef */ | |
85 int coefs_end[BLOCK_NB_SIZES]; /* max number of coded coefficients */ | |
86 int exponent_high_sizes[BLOCK_NB_SIZES]; | |
2967 | 87 int exponent_high_bands[BLOCK_NB_SIZES][HIGH_BAND_MAX_SIZE]; |
783 | 88 VLC hgain_vlc; |
2967 | 89 |
783 | 90 /* coded values in high bands */ |
91 int high_band_coded[MAX_CHANNELS][HIGH_BAND_MAX_SIZE]; | |
92 int high_band_values[MAX_CHANNELS][HIGH_BAND_MAX_SIZE]; | |
93 | |
94 /* there are two possible tables for spectral coefficients */ | |
95 VLC coef_vlc[2]; | |
96 uint16_t *run_table[2]; | |
97 uint16_t *level_table[2]; | |
98 /* frame info */ | |
99 int frame_len; /* frame length in samples */ | |
100 int frame_len_bits; /* frame_len = 1 << frame_len_bits */ | |
101 int nb_block_sizes; /* number of block sizes */ | |
102 /* block info */ | |
103 int reset_block_lengths; | |
104 int block_len_bits; /* log2 of current block length */ | |
105 int next_block_len_bits; /* log2 of next block length */ | |
106 int prev_block_len_bits; /* log2 of prev block length */ | |
107 int block_len; /* block length in samples */ | |
108 int block_num; /* block number in current frame */ | |
109 int block_pos; /* current position in frame */ | |
110 uint8_t ms_stereo; /* true if mid/side stereo mode */ | |
111 uint8_t channel_coded[MAX_CHANNELS]; /* true if channel is coded */ | |
3089 | 112 DECLARE_ALIGNED_16(float, exponents[MAX_CHANNELS][BLOCK_MAX_SIZE]); |
783 | 113 float max_exponent[MAX_CHANNELS]; |
114 int16_t coefs1[MAX_CHANNELS][BLOCK_MAX_SIZE]; | |
3089 | 115 DECLARE_ALIGNED_16(float, coefs[MAX_CHANNELS][BLOCK_MAX_SIZE]); |
783 | 116 MDCTContext mdct_ctx[BLOCK_NB_SIZES]; |
1031
19de1445beb2
use av_malloc() functions - added av_strdup and av_realloc()
bellard
parents:
1025
diff
changeset
|
117 float *windows[BLOCK_NB_SIZES]; |
3089 | 118 DECLARE_ALIGNED_16(FFTSample, mdct_tmp[BLOCK_MAX_SIZE]); /* temporary storage for imdct */ |
783 | 119 /* output buffer for one frame and the last for IMDCT windowing */ |
3089 | 120 DECLARE_ALIGNED_16(float, frame_out[MAX_CHANNELS][BLOCK_MAX_SIZE * 2]); |
783 | 121 /* last frame info */ |
122 uint8_t last_superframe[MAX_CODED_SUPERFRAME_SIZE + 4]; /* padding added */ | |
123 int last_bitoffset; | |
124 int last_superframe_len; | |
125 float noise_table[NOISE_TAB_SIZE]; | |
126 int noise_index; | |
127 float noise_mult; /* XXX: suppress that and integrate it in the noise array */ | |
128 /* lsp_to_curve tables */ | |
129 float lsp_cos_table[BLOCK_MAX_SIZE]; | |
130 float lsp_pow_e_table[256]; | |
131 float lsp_pow_m_table1[(1 << LSP_POW_BITS)]; | |
132 float lsp_pow_m_table2[(1 << LSP_POW_BITS)]; | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
133 /* pow tables */ |
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
134 float pow_005_10[121]; |
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
135 float pow_00625_10[121]; |
1343 | 136 |
137 #ifdef TRACE | |
138 int frame_count; | |
139 #endif | |
783 | 140 } WMADecodeContext; |
141 | |
142 typedef struct CoefVLCTable { | |
143 int n; /* total number of codes */ | |
144 const uint32_t *huffcodes; /* VLC bit values */ | |
145 const uint8_t *huffbits; /* VLC bit size */ | |
146 const uint16_t *levels; /* table to build run/level tables */ | |
147 } CoefVLCTable; | |
148 | |
149 static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len); | |
150 | |
151 #include "wmadata.h" | |
152 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
153 #ifdef TRACE |
783 | 154 static void dump_shorts(const char *name, const short *tab, int n) |
155 { | |
156 int i; | |
157 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
158 tprintf("%s[%d]:\n", name, n); |
783 | 159 for(i=0;i<n;i++) { |
160 if ((i & 7) == 0) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
161 tprintf("%4d: ", i); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
162 tprintf(" %5d.0", tab[i]); |
783 | 163 if ((i & 7) == 7) |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
164 tprintf("\n"); |
783 | 165 } |
166 } | |
167 | |
168 static void dump_floats(const char *name, int prec, const float *tab, int n) | |
169 { | |
170 int i; | |
171 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
172 tprintf("%s[%d]:\n", name, n); |
783 | 173 for(i=0;i<n;i++) { |
174 if ((i & 7) == 0) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
175 tprintf("%4d: ", i); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
176 tprintf(" %8.*f", prec, tab[i]); |
783 | 177 if ((i & 7) == 7) |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
178 tprintf("\n"); |
783 | 179 } |
180 if ((i & 7) != 0) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
181 tprintf("\n"); |
783 | 182 } |
183 #endif | |
184 | |
185 /* XXX: use same run/length optimization as mpeg decoders */ | |
2967 | 186 static void init_coef_vlc(VLC *vlc, |
783 | 187 uint16_t **prun_table, uint16_t **plevel_table, |
188 const CoefVLCTable *vlc_table) | |
189 { | |
190 int n = vlc_table->n; | |
191 const uint8_t *table_bits = vlc_table->huffbits; | |
192 const uint32_t *table_codes = vlc_table->huffcodes; | |
193 const uint16_t *levels_table = vlc_table->levels; | |
194 uint16_t *run_table, *level_table; | |
195 const uint16_t *p; | |
196 int i, l, j, level; | |
197 | |
3113 | 198 init_vlc(vlc, VLCBITS, n, table_bits, 1, 1, table_codes, 4, 4, 0); |
783 | 199 |
1031
19de1445beb2
use av_malloc() functions - added av_strdup and av_realloc()
bellard
parents:
1025
diff
changeset
|
200 run_table = av_malloc(n * sizeof(uint16_t)); |
19de1445beb2
use av_malloc() functions - added av_strdup and av_realloc()
bellard
parents:
1025
diff
changeset
|
201 level_table = av_malloc(n * sizeof(uint16_t)); |
783 | 202 p = levels_table; |
203 i = 2; | |
204 level = 1; | |
205 while (i < n) { | |
206 l = *p++; | |
207 for(j=0;j<l;j++) { | |
208 run_table[i] = j; | |
209 level_table[i] = level; | |
210 i++; | |
211 } | |
212 level++; | |
213 } | |
214 *prun_table = run_table; | |
215 *plevel_table = level_table; | |
216 } | |
217 | |
218 static int wma_decode_init(AVCodecContext * avctx) | |
219 { | |
220 WMADecodeContext *s = avctx->priv_data; | |
221 int i, flags1, flags2; | |
222 float *window; | |
223 uint8_t *extradata; | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
224 float bps, bps1; |
3195
10cda832bd0f
fix coverity warning CID: 255 (uninitalized variable used to build tables which arent used, well there is a slight change of a FPU exception maybe ...)
michael
parents:
3176
diff
changeset
|
225 volatile float high_freq_factor= 0; //initial value should not matter as the tables build from this are unused if !use_noise_coding |
783 | 226 int sample_rate1; |
227 int coef_vlc_table; | |
2967 | 228 |
783 | 229 s->sample_rate = avctx->sample_rate; |
230 s->nb_channels = avctx->channels; | |
231 s->bit_rate = avctx->bit_rate; | |
232 s->block_align = avctx->block_align; | |
233 | |
808
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
234 if (avctx->codec->id == CODEC_ID_WMAV1) { |
783 | 235 s->version = 1; |
236 } else { | |
237 s->version = 2; | |
238 } | |
2967 | 239 |
783 | 240 /* extract flag infos */ |
241 flags1 = 0; | |
242 flags2 = 0; | |
243 extradata = avctx->extradata; | |
244 if (s->version == 1 && avctx->extradata_size >= 4) { | |
245 flags1 = extradata[0] | (extradata[1] << 8); | |
246 flags2 = extradata[2] | (extradata[3] << 8); | |
247 } else if (s->version == 2 && avctx->extradata_size >= 6) { | |
2967 | 248 flags1 = extradata[0] | (extradata[1] << 8) | |
783 | 249 (extradata[2] << 16) | (extradata[3] << 24); |
250 flags2 = extradata[4] | (extradata[5] << 8); | |
251 } | |
252 s->use_exp_vlc = flags2 & 0x0001; | |
253 s->use_bit_reservoir = flags2 & 0x0002; | |
254 s->use_variable_block_len = flags2 & 0x0004; | |
255 | |
256 /* compute MDCT block size */ | |
257 if (s->sample_rate <= 16000) { | |
258 s->frame_len_bits = 9; | |
2967 | 259 } else if (s->sample_rate <= 22050 || |
795
55add0e7eafb
avoid name clash - fixed again block size selection
bellard
parents:
785
diff
changeset
|
260 (s->sample_rate <= 32000 && s->version == 1)) { |
783 | 261 s->frame_len_bits = 10; |
262 } else { | |
263 s->frame_len_bits = 11; | |
264 } | |
265 s->frame_len = 1 << s->frame_len_bits; | |
266 if (s->use_variable_block_len) { | |
808
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
267 int nb_max, nb; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
268 nb = ((flags2 >> 3) & 3) + 1; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
269 if ((s->bit_rate / s->nb_channels) >= 32000) |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
270 nb += 2; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
271 nb_max = s->frame_len_bits - BLOCK_MIN_BITS; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
272 if (nb > nb_max) |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
273 nb = nb_max; |
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
274 s->nb_block_sizes = nb + 1; |
783 | 275 } else { |
276 s->nb_block_sizes = 1; | |
277 } | |
278 | |
279 /* init rate dependant parameters */ | |
280 s->use_noise_coding = 1; | |
281 | |
282 /* if version 2, then the rates are normalized */ | |
283 sample_rate1 = s->sample_rate; | |
284 if (s->version == 2) { | |
2967 | 285 if (sample_rate1 >= 44100) |
783 | 286 sample_rate1 = 44100; |
2967 | 287 else if (sample_rate1 >= 22050) |
783 | 288 sample_rate1 = 22050; |
2967 | 289 else if (sample_rate1 >= 16000) |
783 | 290 sample_rate1 = 16000; |
2967 | 291 else if (sample_rate1 >= 11025) |
783 | 292 sample_rate1 = 11025; |
2967 | 293 else if (sample_rate1 >= 8000) |
783 | 294 sample_rate1 = 8000; |
295 } | |
296 | |
297 bps = (float)s->bit_rate / (float)(s->nb_channels * s->sample_rate); | |
2992 | 298 s->byte_offset_bits = av_log2((int)(bps * s->frame_len / 8.0 + 0.5)) + 2; |
783 | 299 |
300 /* compute high frequency value and choose if noise coding should | |
301 be activated */ | |
302 bps1 = bps; | |
303 if (s->nb_channels == 2) | |
304 bps1 = bps * 1.6; | |
305 if (sample_rate1 == 44100) { | |
306 if (bps1 >= 0.61) | |
307 s->use_noise_coding = 0; | |
308 else | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
309 high_freq_factor = 0.4; |
783 | 310 } else if (sample_rate1 == 22050) { |
311 if (bps1 >= 1.16) | |
312 s->use_noise_coding = 0; | |
2967 | 313 else if (bps1 >= 0.72) |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
314 high_freq_factor = 0.7; |
783 | 315 else |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
316 high_freq_factor = 0.6; |
783 | 317 } else if (sample_rate1 == 16000) { |
318 if (bps > 0.5) | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
319 high_freq_factor = 0.5; |
783 | 320 else |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
321 high_freq_factor = 0.3; |
783 | 322 } else if (sample_rate1 == 11025) { |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
323 high_freq_factor = 0.7; |
783 | 324 } else if (sample_rate1 == 8000) { |
325 if (bps <= 0.625) { | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
326 high_freq_factor = 0.5; |
783 | 327 } else if (bps > 0.75) { |
328 s->use_noise_coding = 0; | |
329 } else { | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
330 high_freq_factor = 0.65; |
783 | 331 } |
332 } else { | |
333 if (bps >= 0.8) { | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
334 high_freq_factor = 0.75; |
783 | 335 } else if (bps >= 0.6) { |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
336 high_freq_factor = 0.6; |
783 | 337 } else { |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
338 high_freq_factor = 0.5; |
783 | 339 } |
340 } | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
341 dprintf("flags1=0x%x flags2=0x%x\n", flags1, flags2); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
342 dprintf("version=%d channels=%d sample_rate=%d bitrate=%d block_align=%d\n", |
2967 | 343 s->version, s->nb_channels, s->sample_rate, s->bit_rate, |
783 | 344 s->block_align); |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
345 dprintf("bps=%f bps1=%f bitoffset=%d\n", |
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
346 bps, bps1, s->byte_offset_bits); |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
347 dprintf("use_noise_coding=%d use_exp_vlc=%d nb_block_sizes=%d\n", |
808
e9bfaabcf07d
fixed nb_block_sizes detection - fixed codec_id test (avctx->codec_id does not need to be initialized)
bellard
parents:
797
diff
changeset
|
348 s->use_noise_coding, s->use_exp_vlc, s->nb_block_sizes); |
783 | 349 |
350 /* compute the scale factor band sizes for each MDCT block size */ | |
351 { | |
352 int a, b, pos, lpos, k, block_len, i, j, n; | |
353 const uint8_t *table; | |
2967 | 354 |
783 | 355 if (s->version == 1) { |
356 s->coefs_start = 3; | |
357 } else { | |
358 s->coefs_start = 0; | |
359 } | |
360 for(k = 0; k < s->nb_block_sizes; k++) { | |
361 block_len = s->frame_len >> k; | |
362 | |
363 if (s->version == 1) { | |
364 lpos = 0; | |
365 for(i=0;i<25;i++) { | |
366 a = wma_critical_freqs[i]; | |
367 b = s->sample_rate; | |
368 pos = ((block_len * 2 * a) + (b >> 1)) / b; | |
2967 | 369 if (pos > block_len) |
783 | 370 pos = block_len; |
371 s->exponent_bands[0][i] = pos - lpos; | |
372 if (pos >= block_len) { | |
373 i++; | |
374 break; | |
375 } | |
376 lpos = pos; | |
377 } | |
378 s->exponent_sizes[0] = i; | |
379 } else { | |
380 /* hardcoded tables */ | |
381 table = NULL; | |
382 a = s->frame_len_bits - BLOCK_MIN_BITS - k; | |
383 if (a < 3) { | |
384 if (s->sample_rate >= 44100) | |
385 table = exponent_band_44100[a]; | |
386 else if (s->sample_rate >= 32000) | |
387 table = exponent_band_32000[a]; | |
388 else if (s->sample_rate >= 22050) | |
389 table = exponent_band_22050[a]; | |
390 } | |
391 if (table) { | |
392 n = *table++; | |
393 for(i=0;i<n;i++) | |
394 s->exponent_bands[k][i] = table[i]; | |
395 s->exponent_sizes[k] = n; | |
396 } else { | |
397 j = 0; | |
398 lpos = 0; | |
399 for(i=0;i<25;i++) { | |
400 a = wma_critical_freqs[i]; | |
401 b = s->sample_rate; | |
402 pos = ((block_len * 2 * a) + (b << 1)) / (4 * b); | |
403 pos <<= 2; | |
2967 | 404 if (pos > block_len) |
783 | 405 pos = block_len; |
406 if (pos > lpos) | |
407 s->exponent_bands[k][j++] = pos - lpos; | |
408 if (pos >= block_len) | |
409 break; | |
410 lpos = pos; | |
411 } | |
412 s->exponent_sizes[k] = j; | |
413 } | |
414 } | |
415 | |
416 /* max number of coefs */ | |
417 s->coefs_end[k] = (s->frame_len - ((s->frame_len * 9) / 100)) >> k; | |
418 /* high freq computation */ | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
419 s->high_band_start[k] = (int)((block_len * high_freq_factor) + 0.5); |
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
420 |
783 | 421 n = s->exponent_sizes[k]; |
422 j = 0; | |
423 pos = 0; | |
424 for(i=0;i<n;i++) { | |
425 int start, end; | |
426 start = pos; | |
427 pos += s->exponent_bands[k][i]; | |
428 end = pos; | |
429 if (start < s->high_band_start[k]) | |
430 start = s->high_band_start[k]; | |
431 if (end > s->coefs_end[k]) | |
432 end = s->coefs_end[k]; | |
433 if (end > start) | |
434 s->exponent_high_bands[k][j++] = end - start; | |
435 } | |
436 s->exponent_high_sizes[k] = j; | |
437 #if 0 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
438 tprintf("%5d: coefs_end=%d high_band_start=%d nb_high_bands=%d: ", |
2967 | 439 s->frame_len >> k, |
783 | 440 s->coefs_end[k], |
441 s->high_band_start[k], | |
442 s->exponent_high_sizes[k]); | |
443 for(j=0;j<s->exponent_high_sizes[k];j++) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
444 tprintf(" %d", s->exponent_high_bands[k][j]); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
445 tprintf("\n"); |
783 | 446 #endif |
447 } | |
448 } | |
449 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
450 #ifdef TRACE |
783 | 451 { |
452 int i, j; | |
453 for(i = 0; i < s->nb_block_sizes; i++) { | |
2967 | 454 tprintf("%5d: n=%2d:", |
455 s->frame_len >> i, | |
783 | 456 s->exponent_sizes[i]); |
457 for(j=0;j<s->exponent_sizes[i];j++) | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
458 tprintf(" %d", s->exponent_bands[i][j]); |
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
459 tprintf("\n"); |
783 | 460 } |
461 } | |
462 #endif | |
463 | |
464 /* init MDCT */ | |
465 for(i = 0; i < s->nb_block_sizes; i++) | |
795
55add0e7eafb
avoid name clash - fixed again block size selection
bellard
parents:
785
diff
changeset
|
466 ff_mdct_init(&s->mdct_ctx[i], s->frame_len_bits - i + 1, 1); |
2967 | 467 |
783 | 468 /* init MDCT windows : simple sinus window */ |
469 for(i = 0; i < s->nb_block_sizes; i++) { | |
470 int n, j; | |
471 float alpha; | |
472 n = 1 << (s->frame_len_bits - i); | |
473 window = av_malloc(sizeof(float) * n); | |
474 alpha = M_PI / (2.0 * n); | |
475 for(j=0;j<n;j++) { | |
476 window[n - j - 1] = sin((j + 0.5) * alpha); | |
477 } | |
478 s->windows[i] = window; | |
479 } | |
480 | |
481 s->reset_block_lengths = 1; | |
2967 | 482 |
783 | 483 if (s->use_noise_coding) { |
484 | |
485 /* init the noise generator */ | |
486 if (s->use_exp_vlc) | |
487 s->noise_mult = 0.02; | |
488 else | |
489 s->noise_mult = 0.04; | |
2967 | 490 |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
491 #ifdef TRACE |
783 | 492 for(i=0;i<NOISE_TAB_SIZE;i++) |
493 s->noise_table[i] = 1.0 * s->noise_mult; | |
494 #else | |
495 { | |
496 unsigned int seed; | |
497 float norm; | |
498 seed = 1; | |
499 norm = (1.0 / (float)(1LL << 31)) * sqrt(3) * s->noise_mult; | |
500 for(i=0;i<NOISE_TAB_SIZE;i++) { | |
501 seed = seed * 314159 + 1; | |
502 s->noise_table[i] = (float)((int)seed) * norm; | |
503 } | |
504 } | |
505 #endif | |
3113 | 506 init_vlc(&s->hgain_vlc, HGAINVLCBITS, sizeof(hgain_huffbits), |
783 | 507 hgain_huffbits, 1, 1, |
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2180
diff
changeset
|
508 hgain_huffcodes, 2, 2, 0); |
783 | 509 } |
510 | |
511 if (s->use_exp_vlc) { | |
3113 | 512 init_vlc(&s->exp_vlc, EXPVLCBITS, sizeof(scale_huffbits), |
783 | 513 scale_huffbits, 1, 1, |
2370
26560d4fdb1f
Memory leak fix patch by (Burkhard Plaum <plaum >at< ipf.uni-stuttgart )dot( de>)
michael
parents:
2180
diff
changeset
|
514 scale_huffcodes, 4, 4, 0); |
783 | 515 } else { |
516 wma_lsp_to_curve_init(s, s->frame_len); | |
517 } | |
518 | |
519 /* choose the VLC tables for the coefficients */ | |
520 coef_vlc_table = 2; | |
521 if (s->sample_rate >= 32000) { | |
522 if (bps1 < 0.72) | |
523 coef_vlc_table = 0; | |
524 else if (bps1 < 1.16) | |
525 coef_vlc_table = 1; | |
526 } | |
527 | |
528 init_coef_vlc(&s->coef_vlc[0], &s->run_table[0], &s->level_table[0], | |
529 &coef_vlcs[coef_vlc_table * 2]); | |
530 init_coef_vlc(&s->coef_vlc[1], &s->run_table[1], &s->level_table[1], | |
531 &coef_vlcs[coef_vlc_table * 2 + 1]); | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
532 |
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
533 /* init pow tables */ |
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
534 for (i=0 ; i<121 ; i++) { |
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
535 s->pow_005_10[i] = pow(10, i * 0.05); |
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
536 s->pow_00625_10[i] = pow(10, i * (1.0 / 16.0)); |
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
537 } |
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
538 |
783 | 539 return 0; |
540 } | |
541 | |
542 /* interpolate values for a bigger or smaller block. The block must | |
543 have multiple sizes */ | |
544 static void interpolate_array(float *scale, int old_size, int new_size) | |
545 { | |
546 int i, j, jincr, k; | |
547 float v; | |
548 | |
549 if (new_size > old_size) { | |
550 jincr = new_size / old_size; | |
551 j = new_size; | |
552 for(i = old_size - 1; i >=0; i--) { | |
553 v = scale[i]; | |
554 k = jincr; | |
555 do { | |
556 scale[--j] = v; | |
557 } while (--k); | |
558 } | |
559 } else if (new_size < old_size) { | |
560 j = 0; | |
561 jincr = old_size / new_size; | |
562 for(i = 0; i < new_size; i++) { | |
563 scale[i] = scale[j]; | |
564 j += jincr; | |
565 } | |
566 } | |
567 } | |
568 | |
569 /* compute x^-0.25 with an exponent and mantissa table. We use linear | |
570 interpolation to reduce the mantissa table size at a small speed | |
571 expense (linear interpolation approximately doubles the number of | |
572 bits of precision). */ | |
573 static inline float pow_m1_4(WMADecodeContext *s, float x) | |
574 { | |
575 union { | |
576 float f; | |
577 unsigned int v; | |
578 } u, t; | |
579 unsigned int e, m; | |
580 float a, b; | |
581 | |
582 u.f = x; | |
583 e = u.v >> 23; | |
584 m = (u.v >> (23 - LSP_POW_BITS)) & ((1 << LSP_POW_BITS) - 1); | |
585 /* build interpolation scale: 1 <= t < 2. */ | |
586 t.v = ((u.v << LSP_POW_BITS) & ((1 << 23) - 1)) | (127 << 23); | |
587 a = s->lsp_pow_m_table1[m]; | |
588 b = s->lsp_pow_m_table2[m]; | |
589 return s->lsp_pow_e_table[e] * (a + b * t.f); | |
590 } | |
591 | |
592 static void wma_lsp_to_curve_init(WMADecodeContext *s, int frame_len) | |
2967 | 593 { |
783 | 594 float wdel, a, b; |
595 int i, e, m; | |
596 | |
597 wdel = M_PI / frame_len; | |
598 for(i=0;i<frame_len;i++) | |
599 s->lsp_cos_table[i] = 2.0f * cos(wdel * i); | |
600 | |
601 /* tables for x^-0.25 computation */ | |
602 for(i=0;i<256;i++) { | |
603 e = i - 126; | |
604 s->lsp_pow_e_table[i] = pow(2.0, e * -0.25); | |
605 } | |
606 | |
607 /* NOTE: these two tables are needed to avoid two operations in | |
608 pow_m1_4 */ | |
609 b = 1.0; | |
610 for(i=(1 << LSP_POW_BITS) - 1;i>=0;i--) { | |
611 m = (1 << LSP_POW_BITS) + i; | |
612 a = (float)m * (0.5 / (1 << LSP_POW_BITS)); | |
613 a = pow(a, -0.25); | |
614 s->lsp_pow_m_table1[i] = 2 * a - b; | |
615 s->lsp_pow_m_table2[i] = b - a; | |
616 b = a; | |
617 } | |
618 #if 0 | |
619 for(i=1;i<20;i++) { | |
620 float v, r1, r2; | |
621 v = 5.0 / i; | |
622 r1 = pow_m1_4(s, v); | |
623 r2 = pow(v,-0.25); | |
624 printf("%f^-0.25=%f e=%f\n", v, r1, r2 - r1); | |
625 } | |
626 #endif | |
627 } | |
628 | |
629 /* NOTE: We use the same code as Vorbis here */ | |
630 /* XXX: optimize it further with SSE/3Dnow */ | |
2967 | 631 static void wma_lsp_to_curve(WMADecodeContext *s, |
632 float *out, float *val_max_ptr, | |
783 | 633 int n, float *lsp) |
634 { | |
635 int i, j; | |
636 float p, q, w, v, val_max; | |
637 | |
638 val_max = 0; | |
639 for(i=0;i<n;i++) { | |
640 p = 0.5f; | |
641 q = 0.5f; | |
642 w = s->lsp_cos_table[i]; | |
643 for(j=1;j<NB_LSP_COEFS;j+=2){ | |
644 q *= w - lsp[j - 1]; | |
645 p *= w - lsp[j]; | |
646 } | |
647 p *= p * (2.0f - w); | |
648 q *= q * (2.0f + w); | |
649 v = p + q; | |
650 v = pow_m1_4(s, v); | |
651 if (v > val_max) | |
652 val_max = v; | |
653 out[i] = v; | |
654 } | |
655 *val_max_ptr = val_max; | |
656 } | |
657 | |
658 /* decode exponents coded with LSP coefficients (same idea as Vorbis) */ | |
659 static void decode_exp_lsp(WMADecodeContext *s, int ch) | |
660 { | |
661 float lsp_coefs[NB_LSP_COEFS]; | |
662 int val, i; | |
663 | |
664 for(i = 0; i < NB_LSP_COEFS; i++) { | |
665 if (i == 0 || i >= 8) | |
666 val = get_bits(&s->gb, 3); | |
667 else | |
668 val = get_bits(&s->gb, 4); | |
669 lsp_coefs[i] = lsp_codebook[i][val]; | |
670 } | |
671 | |
672 wma_lsp_to_curve(s, s->exponents[ch], &s->max_exponent[ch], | |
673 s->block_len, lsp_coefs); | |
674 } | |
675 | |
676 /* decode exponents coded with VLC codes */ | |
677 static int decode_exp_vlc(WMADecodeContext *s, int ch) | |
678 { | |
679 int last_exp, n, code; | |
680 const uint16_t *ptr, *band_ptr; | |
681 float v, *q, max_scale, *q_end; | |
2967 | 682 |
783 | 683 band_ptr = s->exponent_bands[s->frame_len_bits - s->block_len_bits]; |
684 ptr = band_ptr; | |
685 q = s->exponents[ch]; | |
686 q_end = q + s->block_len; | |
687 max_scale = 0; | |
688 if (s->version == 1) { | |
689 last_exp = get_bits(&s->gb, 5) + 10; | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
690 v = s->pow_00625_10[last_exp]; |
783 | 691 max_scale = v; |
692 n = *ptr++; | |
693 do { | |
694 *q++ = v; | |
695 } while (--n); | |
696 } | |
697 last_exp = 36; | |
698 while (q < q_end) { | |
3113 | 699 code = get_vlc2(&s->gb, s->exp_vlc.table, EXPVLCBITS, EXPMAX); |
783 | 700 if (code < 0) |
701 return -1; | |
702 /* NOTE: this offset is the same as MPEG4 AAC ! */ | |
703 last_exp += code - 60; | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
704 v = s->pow_00625_10[last_exp]; |
783 | 705 if (v > max_scale) |
706 max_scale = v; | |
707 n = *ptr++; | |
708 do { | |
709 *q++ = v; | |
710 } while (--n); | |
711 } | |
712 s->max_exponent[ch] = max_scale; | |
713 return 0; | |
714 } | |
715 | |
716 /* return 0 if OK. return 1 if last block of frame. return -1 if | |
717 unrecorrable error. */ | |
718 static int wma_decode_block(WMADecodeContext *s) | |
719 { | |
720 int n, v, a, ch, code, bsize; | |
721 int coef_nb_bits, total_gain, parse_exponents; | |
722 float window[BLOCK_MAX_SIZE * 2]; | |
2101 | 723 // XXX: FIXME!! there's a bug somewhere which makes this mandatory under altivec |
724 #ifdef HAVE_ALTIVEC | |
725 volatile int nb_coefs[MAX_CHANNELS] __attribute__((aligned(16))); | |
726 #else | |
783 | 727 int nb_coefs[MAX_CHANNELS]; |
2101 | 728 #endif |
783 | 729 float mdct_norm; |
730 | |
1343 | 731 #ifdef TRACE |
732 tprintf("***decode_block: %d:%d\n", s->frame_count - 1, s->block_num); | |
733 #endif | |
783 | 734 |
735 /* compute current block length */ | |
736 if (s->use_variable_block_len) { | |
737 n = av_log2(s->nb_block_sizes - 1) + 1; | |
2967 | 738 |
783 | 739 if (s->reset_block_lengths) { |
740 s->reset_block_lengths = 0; | |
741 v = get_bits(&s->gb, n); | |
742 if (v >= s->nb_block_sizes) | |
743 return -1; | |
744 s->prev_block_len_bits = s->frame_len_bits - v; | |
745 v = get_bits(&s->gb, n); | |
746 if (v >= s->nb_block_sizes) | |
747 return -1; | |
748 s->block_len_bits = s->frame_len_bits - v; | |
749 } else { | |
750 /* update block lengths */ | |
751 s->prev_block_len_bits = s->block_len_bits; | |
752 s->block_len_bits = s->next_block_len_bits; | |
753 } | |
754 v = get_bits(&s->gb, n); | |
755 if (v >= s->nb_block_sizes) | |
756 return -1; | |
757 s->next_block_len_bits = s->frame_len_bits - v; | |
758 } else { | |
759 /* fixed block len */ | |
760 s->next_block_len_bits = s->frame_len_bits; | |
761 s->prev_block_len_bits = s->frame_len_bits; | |
762 s->block_len_bits = s->frame_len_bits; | |
763 } | |
764 | |
765 /* now check if the block length is coherent with the frame length */ | |
766 s->block_len = 1 << s->block_len_bits; | |
767 if ((s->block_pos + s->block_len) > s->frame_len) | |
768 return -1; | |
769 | |
770 if (s->nb_channels == 2) { | |
771 s->ms_stereo = get_bits(&s->gb, 1); | |
772 } | |
773 v = 0; | |
774 for(ch = 0; ch < s->nb_channels; ch++) { | |
775 a = get_bits(&s->gb, 1); | |
776 s->channel_coded[ch] = a; | |
777 v |= a; | |
778 } | |
779 /* if no channel coded, no need to go further */ | |
780 /* XXX: fix potential framing problems */ | |
781 if (!v) | |
782 goto next; | |
783 | |
784 bsize = s->frame_len_bits - s->block_len_bits; | |
785 | |
786 /* read total gain and extract corresponding number of bits for | |
787 coef escape coding */ | |
788 total_gain = 1; | |
789 for(;;) { | |
790 a = get_bits(&s->gb, 7); | |
791 total_gain += a; | |
792 if (a != 127) | |
793 break; | |
794 } | |
2967 | 795 |
783 | 796 if (total_gain < 15) |
797 coef_nb_bits = 13; | |
798 else if (total_gain < 32) | |
799 coef_nb_bits = 12; | |
800 else if (total_gain < 40) | |
801 coef_nb_bits = 11; | |
802 else if (total_gain < 45) | |
803 coef_nb_bits = 10; | |
804 else | |
805 coef_nb_bits = 9; | |
806 | |
807 /* compute number of coefficients */ | |
808 n = s->coefs_end[bsize] - s->coefs_start; | |
809 for(ch = 0; ch < s->nb_channels; ch++) | |
810 nb_coefs[ch] = n; | |
811 | |
812 /* complex coding */ | |
813 if (s->use_noise_coding) { | |
814 | |
815 for(ch = 0; ch < s->nb_channels; ch++) { | |
816 if (s->channel_coded[ch]) { | |
817 int i, n, a; | |
818 n = s->exponent_high_sizes[bsize]; | |
819 for(i=0;i<n;i++) { | |
820 a = get_bits(&s->gb, 1); | |
821 s->high_band_coded[ch][i] = a; | |
822 /* if noise coding, the coefficients are not transmitted */ | |
823 if (a) | |
824 nb_coefs[ch] -= s->exponent_high_bands[bsize][i]; | |
825 } | |
826 } | |
827 } | |
828 for(ch = 0; ch < s->nb_channels; ch++) { | |
829 if (s->channel_coded[ch]) { | |
830 int i, n, val, code; | |
831 | |
832 n = s->exponent_high_sizes[bsize]; | |
833 val = (int)0x80000000; | |
834 for(i=0;i<n;i++) { | |
835 if (s->high_band_coded[ch][i]) { | |
836 if (val == (int)0x80000000) { | |
837 val = get_bits(&s->gb, 7) - 19; | |
838 } else { | |
3113 | 839 code = get_vlc2(&s->gb, s->hgain_vlc.table, HGAINVLCBITS, HGAINMAX); |
783 | 840 if (code < 0) |
841 return -1; | |
842 val += code - 18; | |
843 } | |
844 s->high_band_values[ch][i] = val; | |
845 } | |
846 } | |
847 } | |
848 } | |
849 } | |
2967 | 850 |
783 | 851 /* exposant can be interpolated in short blocks. */ |
852 parse_exponents = 1; | |
853 if (s->block_len_bits != s->frame_len_bits) { | |
854 parse_exponents = get_bits(&s->gb, 1); | |
855 } | |
2967 | 856 |
783 | 857 if (parse_exponents) { |
858 for(ch = 0; ch < s->nb_channels; ch++) { | |
859 if (s->channel_coded[ch]) { | |
860 if (s->use_exp_vlc) { | |
861 if (decode_exp_vlc(s, ch) < 0) | |
862 return -1; | |
863 } else { | |
864 decode_exp_lsp(s, ch); | |
865 } | |
866 } | |
867 } | |
868 } else { | |
869 for(ch = 0; ch < s->nb_channels; ch++) { | |
870 if (s->channel_coded[ch]) { | |
2967 | 871 interpolate_array(s->exponents[ch], 1 << s->prev_block_len_bits, |
783 | 872 s->block_len); |
873 } | |
874 } | |
875 } | |
876 | |
877 /* parse spectral coefficients : just RLE encoding */ | |
878 for(ch = 0; ch < s->nb_channels; ch++) { | |
879 if (s->channel_coded[ch]) { | |
880 VLC *coef_vlc; | |
881 int level, run, sign, tindex; | |
882 int16_t *ptr, *eptr; | |
883 const int16_t *level_table, *run_table; | |
884 | |
885 /* special VLC tables are used for ms stereo because | |
886 there is potentially less energy there */ | |
887 tindex = (ch == 1 && s->ms_stereo); | |
888 coef_vlc = &s->coef_vlc[tindex]; | |
889 run_table = s->run_table[tindex]; | |
890 level_table = s->level_table[tindex]; | |
891 /* XXX: optimize */ | |
892 ptr = &s->coefs1[ch][0]; | |
893 eptr = ptr + nb_coefs[ch]; | |
894 memset(ptr, 0, s->block_len * sizeof(int16_t)); | |
895 for(;;) { | |
3113 | 896 code = get_vlc2(&s->gb, coef_vlc->table, VLCBITS, VLCMAX); |
783 | 897 if (code < 0) |
898 return -1; | |
899 if (code == 1) { | |
900 /* EOB */ | |
901 break; | |
902 } else if (code == 0) { | |
903 /* escape */ | |
904 level = get_bits(&s->gb, coef_nb_bits); | |
905 /* NOTE: this is rather suboptimal. reading | |
906 block_len_bits would be better */ | |
907 run = get_bits(&s->gb, s->frame_len_bits); | |
908 } else { | |
909 /* normal code */ | |
910 run = run_table[code]; | |
911 level = level_table[code]; | |
912 } | |
913 sign = get_bits(&s->gb, 1); | |
914 if (!sign) | |
915 level = -level; | |
916 ptr += run; | |
917 if (ptr >= eptr) | |
918 return -1; | |
919 *ptr++ = level; | |
920 /* NOTE: EOB can be omitted */ | |
921 if (ptr >= eptr) | |
922 break; | |
923 } | |
924 } | |
925 if (s->version == 1 && s->nb_channels >= 2) { | |
926 align_get_bits(&s->gb); | |
927 } | |
928 } | |
2967 | 929 |
783 | 930 /* normalize */ |
931 { | |
932 int n4 = s->block_len / 2; | |
933 mdct_norm = 1.0 / (float)n4; | |
934 if (s->version == 1) { | |
935 mdct_norm *= sqrt(n4); | |
936 } | |
937 } | |
938 | |
939 /* finally compute the MDCT coefficients */ | |
940 for(ch = 0; ch < s->nb_channels; ch++) { | |
941 if (s->channel_coded[ch]) { | |
942 int16_t *coefs1; | |
943 float *coefs, *exponents, mult, mult1, noise, *exp_ptr; | |
944 int i, j, n, n1, last_high_band; | |
945 float exp_power[HIGH_BAND_MAX_SIZE]; | |
946 | |
947 coefs1 = s->coefs1[ch]; | |
948 exponents = s->exponents[ch]; | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
949 mult = s->pow_005_10[total_gain] / s->max_exponent[ch]; |
783 | 950 mult *= mdct_norm; |
951 coefs = s->coefs[ch]; | |
952 if (s->use_noise_coding) { | |
953 mult1 = mult; | |
954 /* very low freqs : noise */ | |
955 for(i = 0;i < s->coefs_start; i++) { | |
956 *coefs++ = s->noise_table[s->noise_index] * (*exponents++) * mult1; | |
957 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
958 } | |
2967 | 959 |
783 | 960 n1 = s->exponent_high_sizes[bsize]; |
961 | |
962 /* compute power of high bands */ | |
2967 | 963 exp_ptr = exponents + |
964 s->high_band_start[bsize] - | |
783 | 965 s->coefs_start; |
966 last_high_band = 0; /* avoid warning */ | |
967 for(j=0;j<n1;j++) { | |
2967 | 968 n = s->exponent_high_bands[s->frame_len_bits - |
783 | 969 s->block_len_bits][j]; |
970 if (s->high_band_coded[ch][j]) { | |
971 float e2, v; | |
972 e2 = 0; | |
973 for(i = 0;i < n; i++) { | |
974 v = exp_ptr[i]; | |
975 e2 += v * v; | |
976 } | |
977 exp_power[j] = e2 / n; | |
978 last_high_band = j; | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
979 tprintf("%d: power=%f (%d)\n", j, exp_power[j], n); |
783 | 980 } |
981 exp_ptr += n; | |
982 } | |
983 | |
984 /* main freqs and high freqs */ | |
985 for(j=-1;j<n1;j++) { | |
986 if (j < 0) { | |
2967 | 987 n = s->high_band_start[bsize] - |
783 | 988 s->coefs_start; |
989 } else { | |
2967 | 990 n = s->exponent_high_bands[s->frame_len_bits - |
783 | 991 s->block_len_bits][j]; |
992 } | |
993 if (j >= 0 && s->high_band_coded[ch][j]) { | |
994 /* use noise with specified power */ | |
995 mult1 = sqrt(exp_power[j] / exp_power[last_high_band]); | |
3176
babf844e1308
Init simplification and 2% faster wma_decode_block on amd64 with tables use
banan
parents:
3113
diff
changeset
|
996 mult1 = mult1 * s->pow_005_10[s->high_band_values[ch][j]]; |
783 | 997 mult1 = mult1 / (s->max_exponent[ch] * s->noise_mult); |
998 mult1 *= mdct_norm; | |
999 for(i = 0;i < n; i++) { | |
1000 noise = s->noise_table[s->noise_index]; | |
1001 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
1002 *coefs++ = (*exponents++) * noise * mult1; | |
1003 } | |
1004 } else { | |
1005 /* coded values + small noise */ | |
1006 for(i = 0;i < n; i++) { | |
1007 noise = s->noise_table[s->noise_index]; | |
1008 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
1009 *coefs++ = ((*coefs1++) + noise) * (*exponents++) * mult; | |
1010 } | |
1011 } | |
1012 } | |
1013 | |
1014 /* very high freqs : noise */ | |
1015 n = s->block_len - s->coefs_end[bsize]; | |
1016 mult1 = mult * exponents[-1]; | |
1017 for(i = 0; i < n; i++) { | |
1018 *coefs++ = s->noise_table[s->noise_index] * mult1; | |
1019 s->noise_index = (s->noise_index + 1) & (NOISE_TAB_SIZE - 1); | |
1020 } | |
1021 } else { | |
1022 /* XXX: optimize more */ | |
1023 for(i = 0;i < s->coefs_start; i++) | |
1024 *coefs++ = 0.0; | |
1025 n = nb_coefs[ch]; | |
1026 for(i = 0;i < n; i++) { | |
1027 *coefs++ = coefs1[i] * exponents[i] * mult; | |
1028 } | |
1029 n = s->block_len - s->coefs_end[bsize]; | |
1030 for(i = 0;i < n; i++) | |
1031 *coefs++ = 0.0; | |
1032 } | |
1033 } | |
1034 } | |
1035 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1036 #ifdef TRACE |
783 | 1037 for(ch = 0; ch < s->nb_channels; ch++) { |
1038 if (s->channel_coded[ch]) { | |
1039 dump_floats("exponents", 3, s->exponents[ch], s->block_len); | |
1040 dump_floats("coefs", 1, s->coefs[ch], s->block_len); | |
1041 } | |
1042 } | |
1043 #endif | |
2967 | 1044 |
783 | 1045 if (s->ms_stereo && s->channel_coded[1]) { |
1046 float a, b; | |
1047 int i; | |
1048 | |
1049 /* nominal case for ms stereo: we do it before mdct */ | |
1050 /* no need to optimize this case because it should almost | |
1051 never happen */ | |
1052 if (!s->channel_coded[0]) { | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1053 tprintf("rare ms-stereo case happened\n"); |
783 | 1054 memset(s->coefs[0], 0, sizeof(float) * s->block_len); |
1055 s->channel_coded[0] = 1; | |
1056 } | |
2967 | 1057 |
783 | 1058 for(i = 0; i < s->block_len; i++) { |
1059 a = s->coefs[0][i]; | |
1060 b = s->coefs[1][i]; | |
1061 s->coefs[0][i] = a + b; | |
1062 s->coefs[1][i] = a - b; | |
1063 } | |
1064 } | |
1065 | |
1066 /* build the window : we ensure that when the windows overlap | |
1067 their squared sum is always 1 (MDCT reconstruction rule) */ | |
1068 /* XXX: merge with output */ | |
1069 { | |
1070 int i, next_block_len, block_len, prev_block_len, n; | |
1071 float *wptr; | |
1072 | |
1073 block_len = s->block_len; | |
1074 prev_block_len = 1 << s->prev_block_len_bits; | |
1075 next_block_len = 1 << s->next_block_len_bits; | |
1076 | |
1077 /* right part */ | |
1078 wptr = window + block_len; | |
1079 if (block_len <= next_block_len) { | |
1080 for(i=0;i<block_len;i++) | |
1081 *wptr++ = s->windows[bsize][i]; | |
1082 } else { | |
1083 /* overlap */ | |
1084 n = (block_len / 2) - (next_block_len / 2); | |
1085 for(i=0;i<n;i++) | |
1086 *wptr++ = 1.0; | |
1087 for(i=0;i<next_block_len;i++) | |
1088 *wptr++ = s->windows[s->frame_len_bits - s->next_block_len_bits][i]; | |
1089 for(i=0;i<n;i++) | |
1090 *wptr++ = 0.0; | |
1091 } | |
1092 | |
1093 /* left part */ | |
1094 wptr = window + block_len; | |
1095 if (block_len <= prev_block_len) { | |
1096 for(i=0;i<block_len;i++) | |
1097 *--wptr = s->windows[bsize][i]; | |
1098 } else { | |
1099 /* overlap */ | |
1100 n = (block_len / 2) - (prev_block_len / 2); | |
1101 for(i=0;i<n;i++) | |
1102 *--wptr = 1.0; | |
1103 for(i=0;i<prev_block_len;i++) | |
1104 *--wptr = s->windows[s->frame_len_bits - s->prev_block_len_bits][i]; | |
1105 for(i=0;i<n;i++) | |
1106 *--wptr = 0.0; | |
1107 } | |
1108 } | |
1109 | |
2967 | 1110 |
783 | 1111 for(ch = 0; ch < s->nb_channels; ch++) { |
1112 if (s->channel_coded[ch]) { | |
3089 | 1113 DECLARE_ALIGNED_16(FFTSample, output[BLOCK_MAX_SIZE * 2]); |
783 | 1114 float *ptr; |
1115 int i, n4, index, n; | |
1116 | |
1117 n = s->block_len; | |
1118 n4 = s->block_len / 2; | |
2967 | 1119 ff_imdct_calc(&s->mdct_ctx[bsize], |
795
55add0e7eafb
avoid name clash - fixed again block size selection
bellard
parents:
785
diff
changeset
|
1120 output, s->coefs[ch], s->mdct_tmp); |
783 | 1121 |
1122 /* XXX: optimize all that by build the window and | |
1123 multipying/adding at the same time */ | |
1124 /* multiply by the window */ | |
1125 for(i=0;i<n * 2;i++) { | |
1126 output[i] *= window[i]; | |
1127 } | |
1128 | |
1129 /* add in the frame */ | |
1130 index = (s->frame_len / 2) + s->block_pos - n4; | |
1131 ptr = &s->frame_out[ch][index]; | |
1132 for(i=0;i<n * 2;i++) { | |
1133 *ptr += output[i]; | |
1134 ptr++; | |
1135 } | |
1136 | |
1137 /* specific fast case for ms-stereo : add to second | |
1138 channel if it is not coded */ | |
1139 if (s->ms_stereo && !s->channel_coded[1]) { | |
1140 ptr = &s->frame_out[1][index]; | |
1141 for(i=0;i<n * 2;i++) { | |
1142 *ptr += output[i]; | |
1143 ptr++; | |
1144 } | |
1145 } | |
1146 } | |
1147 } | |
1148 next: | |
1149 /* update block number */ | |
1150 s->block_num++; | |
1151 s->block_pos += s->block_len; | |
1152 if (s->block_pos >= s->frame_len) | |
1153 return 1; | |
1154 else | |
1155 return 0; | |
1156 } | |
1157 | |
1158 /* decode a frame of frame_len samples */ | |
1159 static int wma_decode_frame(WMADecodeContext *s, int16_t *samples) | |
1160 { | |
1161 int ret, i, n, a, ch, incr; | |
1162 int16_t *ptr; | |
1163 float *iptr; | |
1164 | |
1343 | 1165 #ifdef TRACE |
1166 tprintf("***decode_frame: %d size=%d\n", s->frame_count++, s->frame_len); | |
1167 #endif | |
783 | 1168 |
1169 /* read each block */ | |
1170 s->block_num = 0; | |
1171 s->block_pos = 0; | |
1172 for(;;) { | |
1173 ret = wma_decode_block(s); | |
2967 | 1174 if (ret < 0) |
783 | 1175 return -1; |
1176 if (ret) | |
1177 break; | |
1178 } | |
1179 | |
1180 /* convert frame to integer */ | |
1181 n = s->frame_len; | |
1182 incr = s->nb_channels; | |
1183 for(ch = 0; ch < s->nb_channels; ch++) { | |
1184 ptr = samples + ch; | |
1185 iptr = s->frame_out[ch]; | |
1186 | |
1187 for(i=0;i<n;i++) { | |
797 | 1188 a = lrintf(*iptr++); |
783 | 1189 if (a > 32767) |
1190 a = 32767; | |
1191 else if (a < -32768) | |
1192 a = -32768; | |
1193 *ptr = a; | |
1194 ptr += incr; | |
1195 } | |
1196 /* prepare for next block */ | |
1197 memmove(&s->frame_out[ch][0], &s->frame_out[ch][s->frame_len], | |
1198 s->frame_len * sizeof(float)); | |
1199 /* XXX: suppress this */ | |
2967 | 1200 memset(&s->frame_out[ch][s->frame_len], 0, |
783 | 1201 s->frame_len * sizeof(float)); |
1202 } | |
1203 | |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1204 #ifdef TRACE |
783 | 1205 dump_shorts("samples", samples, n * s->nb_channels); |
1206 #endif | |
1207 return 0; | |
1208 } | |
1209 | |
2967 | 1210 static int wma_decode_superframe(AVCodecContext *avctx, |
783 | 1211 void *data, int *data_size, |
1064 | 1212 uint8_t *buf, int buf_size) |
783 | 1213 { |
1214 WMADecodeContext *s = avctx->priv_data; | |
1215 int nb_frames, bit_offset, i, pos, len; | |
1216 uint8_t *q; | |
1217 int16_t *samples; | |
2967 | 1218 |
1342
f574934c4219
uniformization (now it uses the same trace functions as h264, defined in common.h)
al3x
parents:
1303
diff
changeset
|
1219 tprintf("***decode_superframe:\n"); |
783 | 1220 |
1750 | 1221 if(buf_size==0){ |
1222 s->last_superframe_len = 0; | |
1223 return 0; | |
1224 } | |
2967 | 1225 |
783 | 1226 samples = data; |
1227 | |
1025
1f9afd8b9131
GetBitContext.size is allways multiplied by 8 -> use size_in_bits to avoid useless *8 in a few inner loops
michaelni
parents:
972
diff
changeset
|
1228 init_get_bits(&s->gb, buf, buf_size*8); |
2967 | 1229 |
783 | 1230 if (s->use_bit_reservoir) { |
1231 /* read super frame header */ | |
1232 get_bits(&s->gb, 4); /* super frame index */ | |
1233 nb_frames = get_bits(&s->gb, 4) - 1; | |
1234 | |
1235 bit_offset = get_bits(&s->gb, s->byte_offset_bits + 3); | |
1236 | |
1237 if (s->last_superframe_len > 0) { | |
1238 // printf("skip=%d\n", s->last_bitoffset); | |
1239 /* add bit_offset bits to last frame */ | |
2967 | 1240 if ((s->last_superframe_len + ((bit_offset + 7) >> 3)) > |
783 | 1241 MAX_CODED_SUPERFRAME_SIZE) |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1242 goto fail; |
783 | 1243 q = s->last_superframe + s->last_superframe_len; |
1244 len = bit_offset; | |
1245 while (len > 0) { | |
1246 *q++ = (get_bits)(&s->gb, 8); | |
1247 len -= 8; | |
1248 } | |
1249 if (len > 0) { | |
1250 *q++ = (get_bits)(&s->gb, len) << (8 - len); | |
1251 } | |
2967 | 1252 |
783 | 1253 /* XXX: bit_offset bits into last frame */ |
1025
1f9afd8b9131
GetBitContext.size is allways multiplied by 8 -> use size_in_bits to avoid useless *8 in a few inner loops
michaelni
parents:
972
diff
changeset
|
1254 init_get_bits(&s->gb, s->last_superframe, MAX_CODED_SUPERFRAME_SIZE*8); |
783 | 1255 /* skip unused bits */ |
1256 if (s->last_bitoffset > 0) | |
1257 skip_bits(&s->gb, s->last_bitoffset); | |
1258 /* this frame is stored in the last superframe and in the | |
1259 current one */ | |
1260 if (wma_decode_frame(s, samples) < 0) | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1261 goto fail; |
783 | 1262 samples += s->nb_channels * s->frame_len; |
1263 } | |
1264 | |
1265 /* read each frame starting from bit_offset */ | |
1266 pos = bit_offset + 4 + 4 + s->byte_offset_bits + 3; | |
1025
1f9afd8b9131
GetBitContext.size is allways multiplied by 8 -> use size_in_bits to avoid useless *8 in a few inner loops
michaelni
parents:
972
diff
changeset
|
1267 init_get_bits(&s->gb, buf + (pos >> 3), (MAX_CODED_SUPERFRAME_SIZE - (pos >> 3))*8); |
783 | 1268 len = pos & 7; |
1269 if (len > 0) | |
1270 skip_bits(&s->gb, len); | |
2967 | 1271 |
783 | 1272 s->reset_block_lengths = 1; |
1273 for(i=0;i<nb_frames;i++) { | |
1274 if (wma_decode_frame(s, samples) < 0) | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1275 goto fail; |
783 | 1276 samples += s->nb_channels * s->frame_len; |
1277 } | |
1278 | |
1279 /* we copy the end of the frame in the last frame buffer */ | |
1280 pos = get_bits_count(&s->gb) + ((bit_offset + 4 + 4 + s->byte_offset_bits + 3) & ~7); | |
1281 s->last_bitoffset = pos & 7; | |
1282 pos >>= 3; | |
1283 len = buf_size - pos; | |
819 | 1284 if (len > MAX_CODED_SUPERFRAME_SIZE || len < 0) { |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1285 goto fail; |
783 | 1286 } |
1287 s->last_superframe_len = len; | |
1288 memcpy(s->last_superframe, buf + pos, len); | |
1289 } else { | |
1290 /* single frame decode */ | |
1291 if (wma_decode_frame(s, samples) < 0) | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1292 goto fail; |
783 | 1293 samples += s->nb_channels * s->frame_len; |
1294 } | |
1295 *data_size = (int8_t *)samples - (int8_t *)data; | |
1296 return s->block_align; | |
964
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1297 fail: |
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1298 /* when error, we reset the bit reservoir */ |
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1299 s->last_superframe_len = 0; |
6e6773512288
oops : better error resilience - should fix most wma decoding problems
bellard
parents:
819
diff
changeset
|
1300 return -1; |
783 | 1301 } |
1302 | |
1303 static int wma_decode_end(AVCodecContext *avctx) | |
1304 { | |
1305 WMADecodeContext *s = avctx->priv_data; | |
1306 int i; | |
1307 | |
1308 for(i = 0; i < s->nb_block_sizes; i++) | |
795
55add0e7eafb
avoid name clash - fixed again block size selection
bellard
parents:
785
diff
changeset
|
1309 ff_mdct_end(&s->mdct_ctx[i]); |
783 | 1310 for(i = 0; i < s->nb_block_sizes; i++) |
1311 av_free(s->windows[i]); | |
1312 | |
1313 if (s->use_exp_vlc) { | |
1314 free_vlc(&s->exp_vlc); | |
1315 } | |
1316 if (s->use_noise_coding) { | |
1317 free_vlc(&s->hgain_vlc); | |
1318 } | |
1319 for(i = 0;i < 2; i++) { | |
1320 free_vlc(&s->coef_vlc[i]); | |
1321 av_free(s->run_table[i]); | |
1322 av_free(s->level_table[i]); | |
1323 } | |
2967 | 1324 |
783 | 1325 return 0; |
1326 } | |
1327 | |
1328 AVCodec wmav1_decoder = | |
1329 { | |
1330 "wmav1", | |
1331 CODEC_TYPE_AUDIO, | |
1332 CODEC_ID_WMAV1, | |
1333 sizeof(WMADecodeContext), | |
1334 wma_decode_init, | |
1335 NULL, | |
1336 wma_decode_end, | |
1337 wma_decode_superframe, | |
1338 }; | |
1339 | |
1340 AVCodec wmav2_decoder = | |
1341 { | |
1342 "wmav2", | |
1343 CODEC_TYPE_AUDIO, | |
1344 CODEC_ID_WMAV2, | |
1345 sizeof(WMADecodeContext), | |
1346 wma_decode_init, | |
1347 NULL, | |
1348 wma_decode_end, | |
1349 wma_decode_superframe, | |
1350 }; |