Mercurial > libavcodec.hg
annotate amrnbdec.c @ 12483:0159a19bfff7 libavcodec
aacdec: Rework channel mapping compatibility hacks.
For a PCE based configuration map the channels solely based on tags.
For an indexed configuration map the channels solely based on position.
This works with all known exotic samples including al17, elem_id0, bad_concat,
and lfe_is_sce.
author | alexc |
---|---|
date | Fri, 10 Sep 2010 18:01:48 +0000 |
parents | 2dd67ed2f947 |
children | b42e02e9bf2b |
rev | line source |
---|---|
11235 | 1 /* |
2 * AMR narrowband decoder | |
3 * Copyright (c) 2006-2007 Robert Swain | |
4 * Copyright (c) 2009 Colin McQuillan | |
5 * | |
6 * This file is part of FFmpeg. | |
7 * | |
8 * FFmpeg is free software; you can redistribute it and/or | |
9 * modify it under the terms of the GNU Lesser General Public | |
10 * License as published by the Free Software Foundation; either | |
11 * version 2.1 of the License, or (at your option) any later version. | |
12 * | |
13 * FFmpeg is distributed in the hope that it will be useful, | |
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 * Lesser General Public License for more details. | |
17 * | |
18 * You should have received a copy of the GNU Lesser General Public | |
19 * License along with FFmpeg; if not, write to the Free Software | |
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 */ | |
22 | |
23 | |
24 /** | |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11560
diff
changeset
|
25 * @file |
11235 | 26 * AMR narrowband decoder |
27 * | |
28 * This decoder uses floats for simplicity and so is not bit-exact. One | |
29 * difference is that differences in phase can accumulate. The test sequences | |
30 * in 3GPP TS 26.074 can still be useful. | |
31 * | |
32 * - Comparing this file's output to the output of the ref decoder gives a | |
33 * PSNR of 30 to 80. Plotting the output samples shows a difference in | |
34 * phase in some areas. | |
35 * | |
36 * - Comparing both decoders against their input, this decoder gives a similar | |
37 * PSNR. If the test sequence homing frames are removed (this decoder does | |
38 * not detect them), the PSNR is at least as good as the reference on 140 | |
39 * out of 169 tests. | |
40 */ | |
41 | |
42 | |
43 #include <string.h> | |
44 #include <math.h> | |
45 | |
46 #include "avcodec.h" | |
47 #include "get_bits.h" | |
48 #include "libavutil/common.h" | |
49 #include "celp_math.h" | |
50 #include "celp_filters.h" | |
51 #include "acelp_filters.h" | |
52 #include "acelp_vectors.h" | |
53 #include "acelp_pitch_delay.h" | |
54 #include "lsp.h" | |
55 | |
56 #include "amrnbdata.h" | |
57 | |
58 #define AMR_BLOCK_SIZE 160 ///< samples per frame | |
59 #define AMR_SAMPLE_BOUND 32768.0 ///< threshold for synthesis overflow | |
60 | |
61 /** | |
62 * Scale from constructed speech to [-1,1] | |
63 * | |
64 * AMR is designed to produce 16-bit PCM samples (3GPP TS 26.090 4.2) but | |
65 * upscales by two (section 6.2.2). | |
66 * | |
67 * Fundamentally, this scale is determined by energy_mean through | |
68 * the fixed vector contribution to the excitation vector. | |
69 */ | |
70 #define AMR_SAMPLE_SCALE (2.0 / 32768.0) | |
71 | |
72 /** Prediction factor for 12.2kbit/s mode */ | |
73 #define PRED_FAC_MODE_12k2 0.65 | |
74 | |
75 #define LSF_R_FAC (8000.0 / 32768.0) ///< LSF residual tables to Hertz | |
76 #define MIN_LSF_SPACING (50.0488 / 8000.0) ///< Ensures stability of LPC filter | |
77 #define PITCH_LAG_MIN_MODE_12k2 18 ///< Lower bound on decoded lag search in 12.2kbit/s mode | |
78 | |
79 /** Initial energy in dB. Also used for bad frames (unimplemented). */ | |
80 #define MIN_ENERGY -14.0 | |
81 | |
82 /** Maximum sharpening factor | |
83 * | |
84 * The specification says 0.8, which should be 13107, but the reference C code | |
85 * uses 13017 instead. (Amusingly the same applies to SHARP_MAX in g729dec.c.) | |
86 */ | |
87 #define SHARP_MAX 0.79449462890625 | |
88 | |
89 /** Number of impulse response coefficients used for tilt factor */ | |
90 #define AMR_TILT_RESPONSE 22 | |
91 /** Tilt factor = 1st reflection coefficient * gamma_t */ | |
92 #define AMR_TILT_GAMMA_T 0.8 | |
93 /** Adaptive gain control factor used in post-filter */ | |
94 #define AMR_AGC_ALPHA 0.9 | |
95 | |
96 typedef struct AMRContext { | |
97 AMRNBFrame frame; ///< decoded AMR parameters (lsf coefficients, codebook indexes, etc) | |
98 uint8_t bad_frame_indicator; ///< bad frame ? 1 : 0 | |
99 enum Mode cur_frame_mode; | |
100 | |
101 int16_t prev_lsf_r[LP_FILTER_ORDER]; ///< residual LSF vector from previous subframe | |
102 double lsp[4][LP_FILTER_ORDER]; ///< lsp vectors from current frame | |
103 double prev_lsp_sub4[LP_FILTER_ORDER]; ///< lsp vector for the 4th subframe of the previous frame | |
104 | |
105 float lsf_q[4][LP_FILTER_ORDER]; ///< Interpolated LSF vector for fixed gain smoothing | |
106 float lsf_avg[LP_FILTER_ORDER]; ///< vector of averaged lsf vector | |
107 | |
108 float lpc[4][LP_FILTER_ORDER]; ///< lpc coefficient vectors for 4 subframes | |
109 | |
110 uint8_t pitch_lag_int; ///< integer part of pitch lag from current subframe | |
111 | |
112 float excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1 + AMR_SUBFRAME_SIZE]; ///< current excitation and all necessary excitation history | |
113 float *excitation; ///< pointer to the current excitation vector in excitation_buf | |
114 | |
115 float pitch_vector[AMR_SUBFRAME_SIZE]; ///< adaptive code book (pitch) vector | |
116 float fixed_vector[AMR_SUBFRAME_SIZE]; ///< algebraic codebook (fixed) vector (must be kept zero between frames) | |
117 | |
118 float prediction_error[4]; ///< quantified prediction errors {20log10(^gamma_gc)} for previous four subframes | |
119 float pitch_gain[5]; ///< quantified pitch gains for the current and previous four subframes | |
120 float fixed_gain[5]; ///< quantified fixed gains for the current and previous four subframes | |
121 | |
122 float beta; ///< previous pitch_gain, bounded by [0.0,SHARP_MAX] | |
123 uint8_t diff_count; ///< the number of subframes for which diff has been above 0.65 | |
124 uint8_t hang_count; ///< the number of subframes since a hangover period started | |
125 | |
126 float prev_sparse_fixed_gain; ///< previous fixed gain; used by anti-sparseness processing to determine "onset" | |
127 uint8_t prev_ir_filter_nr; ///< previous impulse response filter "impNr": 0 - strong, 1 - medium, 2 - none | |
128 uint8_t ir_filter_onset; ///< flag for impulse response filter strength | |
129 | |
130 float postfilter_mem[10]; ///< previous intermediate values in the formant filter | |
131 float tilt_mem; ///< previous input to tilt compensation filter | |
132 float postfilter_agc; ///< previous factor used for adaptive gain control | |
133 float high_pass_mem[2]; ///< previous intermediate values in the high-pass filter | |
134 | |
135 float samples_in[LP_FILTER_ORDER + AMR_SUBFRAME_SIZE]; ///< floating point samples | |
136 | |
137 } AMRContext; | |
138 | |
139 /** Double version of ff_weighted_vector_sumf() */ | |
140 static void weighted_vector_sumd(double *out, const double *in_a, | |
141 const double *in_b, double weight_coeff_a, | |
142 double weight_coeff_b, int length) | |
143 { | |
144 int i; | |
145 | |
146 for (i = 0; i < length; i++) | |
147 out[i] = weight_coeff_a * in_a[i] | |
148 + weight_coeff_b * in_b[i]; | |
149 } | |
150 | |
151 static av_cold int amrnb_decode_init(AVCodecContext *avctx) | |
152 { | |
153 AMRContext *p = avctx->priv_data; | |
154 int i; | |
155 | |
156 avctx->sample_fmt = SAMPLE_FMT_FLT; | |
157 | |
158 // p->excitation always points to the same position in p->excitation_buf | |
159 p->excitation = &p->excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1]; | |
160 | |
161 for (i = 0; i < LP_FILTER_ORDER; i++) { | |
162 p->prev_lsp_sub4[i] = lsp_sub4_init[i] * 1000 / (float)(1 << 15); | |
163 p->lsf_avg[i] = p->lsf_q[3][i] = lsp_avg_init[i] / (float)(1 << 15); | |
164 } | |
165 | |
166 for (i = 0; i < 4; i++) | |
167 p->prediction_error[i] = MIN_ENERGY; | |
168 | |
169 return 0; | |
170 } | |
171 | |
172 | |
173 /** | |
174 * Unpack an RFC4867 speech frame into the AMR frame mode and parameters. | |
175 * | |
176 * The order of speech bits is specified by 3GPP TS 26.101. | |
177 * | |
178 * @param p the context | |
179 * @param buf pointer to the input buffer | |
180 * @param buf_size size of the input buffer | |
181 * | |
182 * @return the frame mode | |
183 */ | |
184 static enum Mode unpack_bitstream(AMRContext *p, const uint8_t *buf, | |
185 int buf_size) | |
186 { | |
187 GetBitContext gb; | |
188 enum Mode mode; | |
189 | |
190 init_get_bits(&gb, buf, buf_size * 8); | |
191 | |
192 // Decode the first octet. | |
193 skip_bits(&gb, 1); // padding bit | |
194 mode = get_bits(&gb, 4); // frame type | |
195 p->bad_frame_indicator = !get_bits1(&gb); // quality bit | |
196 skip_bits(&gb, 2); // two padding bits | |
197 | |
11645
69131e8f6614
10l: do not try to unpack DTX frames in AMR-NB decoder
vitor
parents:
11644
diff
changeset
|
198 if (mode < MODE_DTX) { |
11235 | 199 uint16_t *data = (uint16_t *)&p->frame; |
200 const uint8_t *order = amr_unpacking_bitmaps_per_mode[mode]; | |
201 int field_size; | |
202 | |
203 memset(&p->frame, 0, sizeof(AMRNBFrame)); | |
204 buf++; | |
205 while ((field_size = *order++)) { | |
206 int field = 0; | |
207 int field_offset = *order++; | |
208 while (field_size--) { | |
209 int bit = *order++; | |
210 field <<= 1; | |
211 field |= buf[bit >> 3] >> (bit & 7) & 1; | |
212 } | |
213 data[field_offset] = field; | |
214 } | |
215 } | |
216 | |
217 return mode; | |
218 } | |
219 | |
220 | |
221 /// @defgroup amr_lpc_decoding AMR pitch LPC coefficient decoding functions | |
222 /// @{ | |
223 | |
224 /** | |
225 * Interpolate the LSF vector (used for fixed gain smoothing). | |
226 * The interpolation is done over all four subframes even in MODE_12k2. | |
227 * | |
228 * @param[in,out] lsf_q LSFs in [0,1] for each subframe | |
229 * @param[in] lsf_new New LSFs in [0,1] for subframe 4 | |
230 */ | |
231 static void interpolate_lsf(float lsf_q[4][LP_FILTER_ORDER], float *lsf_new) | |
232 { | |
233 int i; | |
234 | |
235 for (i = 0; i < 4; i++) | |
236 ff_weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new, | |
237 0.25 * (3 - i), 0.25 * (i + 1), | |
238 LP_FILTER_ORDER); | |
239 } | |
240 | |
241 /** | |
242 * Decode a set of 5 split-matrix quantized lsf indexes into an lsp vector. | |
243 * | |
244 * @param p the context | |
245 * @param lsp output LSP vector | |
246 * @param lsf_no_r LSF vector without the residual vector added | |
247 * @param lsf_quantizer pointers to LSF dictionary tables | |
248 * @param quantizer_offset offset in tables | |
249 * @param sign for the 3 dictionary table | |
250 * @param update store data for computing the next frame's LSFs | |
251 */ | |
252 static void lsf2lsp_for_mode12k2(AMRContext *p, double lsp[LP_FILTER_ORDER], | |
253 const float lsf_no_r[LP_FILTER_ORDER], | |
254 const int16_t *lsf_quantizer[5], | |
255 const int quantizer_offset, | |
256 const int sign, const int update) | |
257 { | |
258 int16_t lsf_r[LP_FILTER_ORDER]; // residual LSF vector | |
259 float lsf_q[LP_FILTER_ORDER]; // quantified LSF vector | |
260 int i; | |
261 | |
262 for (i = 0; i < LP_FILTER_ORDER >> 1; i++) | |
263 memcpy(&lsf_r[i << 1], &lsf_quantizer[i][quantizer_offset], | |
264 2 * sizeof(*lsf_r)); | |
265 | |
266 if (sign) { | |
267 lsf_r[4] *= -1; | |
268 lsf_r[5] *= -1; | |
269 } | |
270 | |
271 if (update) | |
272 memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(float)); | |
273 | |
274 for (i = 0; i < LP_FILTER_ORDER; i++) | |
275 lsf_q[i] = lsf_r[i] * (LSF_R_FAC / 8000.0) + lsf_no_r[i] * (1.0 / 8000.0); | |
276 | |
277 ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); | |
278 | |
279 if (update) | |
280 interpolate_lsf(p->lsf_q, lsf_q); | |
281 | |
12464
2dd67ed2f947
Move AMRNB lsf2lsp() function to common code for using in future AMRWB decoder.
vitor
parents:
11676
diff
changeset
|
282 ff_acelp_lsf2lspd(lsp, lsf_q, LP_FILTER_ORDER); |
11235 | 283 } |
284 | |
285 /** | |
286 * Decode a set of 5 split-matrix quantized lsf indexes into 2 lsp vectors. | |
287 * | |
288 * @param p pointer to the AMRContext | |
289 */ | |
290 static void lsf2lsp_5(AMRContext *p) | |
291 { | |
292 const uint16_t *lsf_param = p->frame.lsf; | |
293 float lsf_no_r[LP_FILTER_ORDER]; // LSFs without the residual vector | |
294 const int16_t *lsf_quantizer[5]; | |
295 int i; | |
296 | |
297 lsf_quantizer[0] = lsf_5_1[lsf_param[0]]; | |
298 lsf_quantizer[1] = lsf_5_2[lsf_param[1]]; | |
299 lsf_quantizer[2] = lsf_5_3[lsf_param[2] >> 1]; | |
300 lsf_quantizer[3] = lsf_5_4[lsf_param[3]]; | |
301 lsf_quantizer[4] = lsf_5_5[lsf_param[4]]; | |
302 | |
303 for (i = 0; i < LP_FILTER_ORDER; i++) | |
304 lsf_no_r[i] = p->prev_lsf_r[i] * LSF_R_FAC * PRED_FAC_MODE_12k2 + lsf_5_mean[i]; | |
305 | |
306 lsf2lsp_for_mode12k2(p, p->lsp[1], lsf_no_r, lsf_quantizer, 0, lsf_param[2] & 1, 0); | |
307 lsf2lsp_for_mode12k2(p, p->lsp[3], lsf_no_r, lsf_quantizer, 2, lsf_param[2] & 1, 1); | |
308 | |
309 // interpolate LSP vectors at subframes 1 and 3 | |
310 weighted_vector_sumd(p->lsp[0], p->prev_lsp_sub4, p->lsp[1], 0.5, 0.5, LP_FILTER_ORDER); | |
311 weighted_vector_sumd(p->lsp[2], p->lsp[1] , p->lsp[3], 0.5, 0.5, LP_FILTER_ORDER); | |
312 } | |
313 | |
314 /** | |
315 * Decode a set of 3 split-matrix quantized lsf indexes into an lsp vector. | |
316 * | |
317 * @param p pointer to the AMRContext | |
318 */ | |
319 static void lsf2lsp_3(AMRContext *p) | |
320 { | |
321 const uint16_t *lsf_param = p->frame.lsf; | |
322 int16_t lsf_r[LP_FILTER_ORDER]; // residual LSF vector | |
323 float lsf_q[LP_FILTER_ORDER]; // quantified LSF vector | |
324 const int16_t *lsf_quantizer; | |
325 int i, j; | |
326 | |
327 lsf_quantizer = (p->cur_frame_mode == MODE_7k95 ? lsf_3_1_MODE_7k95 : lsf_3_1)[lsf_param[0]]; | |
328 memcpy(lsf_r, lsf_quantizer, 3 * sizeof(*lsf_r)); | |
329 | |
330 lsf_quantizer = lsf_3_2[lsf_param[1] << (p->cur_frame_mode <= MODE_5k15)]; | |
331 memcpy(lsf_r + 3, lsf_quantizer, 3 * sizeof(*lsf_r)); | |
332 | |
333 lsf_quantizer = (p->cur_frame_mode <= MODE_5k15 ? lsf_3_3_MODE_5k15 : lsf_3_3)[lsf_param[2]]; | |
334 memcpy(lsf_r + 6, lsf_quantizer, 4 * sizeof(*lsf_r)); | |
335 | |
336 // calculate mean-removed LSF vector and add mean | |
337 for (i = 0; i < LP_FILTER_ORDER; i++) | |
338 lsf_q[i] = (lsf_r[i] + p->prev_lsf_r[i] * pred_fac[i]) * (LSF_R_FAC / 8000.0) + lsf_3_mean[i] * (1.0 / 8000.0); | |
339 | |
340 ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); | |
341 | |
342 // store data for computing the next frame's LSFs | |
343 interpolate_lsf(p->lsf_q, lsf_q); | |
344 memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r)); | |
345 | |
12464
2dd67ed2f947
Move AMRNB lsf2lsp() function to common code for using in future AMRWB decoder.
vitor
parents:
11676
diff
changeset
|
346 ff_acelp_lsf2lspd(p->lsp[3], lsf_q, LP_FILTER_ORDER); |
11235 | 347 |
348 // interpolate LSP vectors at subframes 1, 2 and 3 | |
349 for (i = 1; i <= 3; i++) | |
350 for(j = 0; j < LP_FILTER_ORDER; j++) | |
351 p->lsp[i-1][j] = p->prev_lsp_sub4[j] + | |
352 (p->lsp[3][j] - p->prev_lsp_sub4[j]) * 0.25 * i; | |
353 } | |
354 | |
355 /// @} | |
356 | |
357 | |
358 /// @defgroup amr_pitch_vector_decoding AMR pitch vector decoding functions | |
359 /// @{ | |
360 | |
361 /** | |
362 * Like ff_decode_pitch_lag(), but with 1/6 resolution | |
363 */ | |
364 static void decode_pitch_lag_1_6(int *lag_int, int *lag_frac, int pitch_index, | |
365 const int prev_lag_int, const int subframe) | |
366 { | |
367 if (subframe == 0 || subframe == 2) { | |
368 if (pitch_index < 463) { | |
369 *lag_int = (pitch_index + 107) * 10923 >> 16; | |
370 *lag_frac = pitch_index - *lag_int * 6 + 105; | |
371 } else { | |
372 *lag_int = pitch_index - 368; | |
373 *lag_frac = 0; | |
374 } | |
375 } else { | |
376 *lag_int = ((pitch_index + 5) * 10923 >> 16) - 1; | |
377 *lag_frac = pitch_index - *lag_int * 6 - 3; | |
378 *lag_int += av_clip(prev_lag_int - 5, PITCH_LAG_MIN_MODE_12k2, | |
379 PITCH_DELAY_MAX - 9); | |
380 } | |
381 } | |
382 | |
383 static void decode_pitch_vector(AMRContext *p, | |
384 const AMRNBSubframe *amr_subframe, | |
385 const int subframe) | |
386 { | |
387 int pitch_lag_int, pitch_lag_frac; | |
388 enum Mode mode = p->cur_frame_mode; | |
389 | |
390 if (p->cur_frame_mode == MODE_12k2) { | |
391 decode_pitch_lag_1_6(&pitch_lag_int, &pitch_lag_frac, | |
392 amr_subframe->p_lag, p->pitch_lag_int, | |
393 subframe); | |
394 } else | |
395 ff_decode_pitch_lag(&pitch_lag_int, &pitch_lag_frac, | |
396 amr_subframe->p_lag, | |
397 p->pitch_lag_int, subframe, | |
398 mode != MODE_4k75 && mode != MODE_5k15, | |
399 mode <= MODE_6k7 ? 4 : (mode == MODE_7k95 ? 5 : 6)); | |
400 | |
401 p->pitch_lag_int = pitch_lag_int; // store previous lag in a uint8_t | |
402 | |
403 pitch_lag_frac <<= (p->cur_frame_mode != MODE_12k2); | |
404 | |
405 pitch_lag_int += pitch_lag_frac > 0; | |
406 | |
407 /* Calculate the pitch vector by interpolating the past excitation at the | |
408 pitch lag using a b60 hamming windowed sinc function. */ | |
409 ff_acelp_interpolatef(p->excitation, p->excitation + 1 - pitch_lag_int, | |
410 ff_b60_sinc, 6, | |
411 pitch_lag_frac + 6 - 6*(pitch_lag_frac > 0), | |
412 10, AMR_SUBFRAME_SIZE); | |
413 | |
414 memcpy(p->pitch_vector, p->excitation, AMR_SUBFRAME_SIZE * sizeof(float)); | |
415 } | |
416 | |
417 /// @} | |
418 | |
419 | |
420 /// @defgroup amr_algebraic_code_book AMR algebraic code book (fixed) vector decoding functions | |
421 /// @{ | |
422 | |
423 /** | |
424 * Decode a 10-bit algebraic codebook index from a 10.2 kbit/s frame. | |
425 */ | |
426 static void decode_10bit_pulse(int code, int pulse_position[8], | |
427 int i1, int i2, int i3) | |
428 { | |
429 // coded using 7+3 bits with the 3 LSBs being, individually, the LSB of 1 of | |
430 // the 3 pulses and the upper 7 bits being coded in base 5 | |
431 const uint8_t *positions = base_five_table[code >> 3]; | |
432 pulse_position[i1] = (positions[2] << 1) + ( code & 1); | |
433 pulse_position[i2] = (positions[1] << 1) + ((code >> 1) & 1); | |
434 pulse_position[i3] = (positions[0] << 1) + ((code >> 2) & 1); | |
435 } | |
436 | |
437 /** | |
438 * Decode the algebraic codebook index to pulse positions and signs and | |
439 * construct the algebraic codebook vector for MODE_10k2. | |
440 * | |
441 * @param fixed_index positions of the eight pulses | |
442 * @param fixed_sparse pointer to the algebraic codebook vector | |
443 */ | |
444 static void decode_8_pulses_31bits(const int16_t *fixed_index, | |
445 AMRFixed *fixed_sparse) | |
446 { | |
447 int pulse_position[8]; | |
448 int i, temp; | |
449 | |
450 decode_10bit_pulse(fixed_index[4], pulse_position, 0, 4, 1); | |
451 decode_10bit_pulse(fixed_index[5], pulse_position, 2, 6, 5); | |
452 | |
453 // coded using 5+2 bits with the 2 LSBs being, individually, the LSB of 1 of | |
454 // the 2 pulses and the upper 5 bits being coded in base 5 | |
455 temp = ((fixed_index[6] >> 2) * 25 + 12) >> 5; | |
456 pulse_position[3] = temp % 5; | |
457 pulse_position[7] = temp / 5; | |
458 if (pulse_position[7] & 1) | |
459 pulse_position[3] = 4 - pulse_position[3]; | |
460 pulse_position[3] = (pulse_position[3] << 1) + ( fixed_index[6] & 1); | |
461 pulse_position[7] = (pulse_position[7] << 1) + ((fixed_index[6] >> 1) & 1); | |
462 | |
463 fixed_sparse->n = 8; | |
464 for (i = 0; i < 4; i++) { | |
465 const int pos1 = (pulse_position[i] << 2) + i; | |
466 const int pos2 = (pulse_position[i + 4] << 2) + i; | |
467 const float sign = fixed_index[i] ? -1.0 : 1.0; | |
468 fixed_sparse->x[i ] = pos1; | |
469 fixed_sparse->x[i + 4] = pos2; | |
470 fixed_sparse->y[i ] = sign; | |
471 fixed_sparse->y[i + 4] = pos2 < pos1 ? -sign : sign; | |
472 } | |
473 } | |
474 | |
475 /** | |
476 * Decode the algebraic codebook index to pulse positions and signs, | |
477 * then construct the algebraic codebook vector. | |
478 * | |
479 * nb of pulses | bits encoding pulses | |
480 * For MODE_4k75 or MODE_5k15, 2 | 1-3, 4-6, 7 | |
481 * MODE_5k9, 2 | 1, 2-4, 5-6, 7-9 | |
482 * MODE_6k7, 3 | 1-3, 4, 5-7, 8, 9-11 | |
483 * MODE_7k4 or MODE_7k95, 4 | 1-3, 4-6, 7-9, 10, 11-13 | |
484 * | |
485 * @param fixed_sparse pointer to the algebraic codebook vector | |
486 * @param pulses algebraic codebook indexes | |
487 * @param mode mode of the current frame | |
488 * @param subframe current subframe number | |
489 */ | |
490 static void decode_fixed_sparse(AMRFixed *fixed_sparse, const uint16_t *pulses, | |
491 const enum Mode mode, const int subframe) | |
492 { | |
493 assert(MODE_4k75 <= mode && mode <= MODE_12k2); | |
494 | |
495 if (mode == MODE_12k2) { | |
496 ff_decode_10_pulses_35bits(pulses, fixed_sparse, gray_decode, 5, 3); | |
497 } else if (mode == MODE_10k2) { | |
498 decode_8_pulses_31bits(pulses, fixed_sparse); | |
499 } else { | |
500 int *pulse_position = fixed_sparse->x; | |
501 int i, pulse_subset; | |
502 const int fixed_index = pulses[0]; | |
503 | |
504 if (mode <= MODE_5k15) { | |
505 pulse_subset = ((fixed_index >> 3) & 8) + (subframe << 1); | |
506 pulse_position[0] = ( fixed_index & 7) * 5 + track_position[pulse_subset]; | |
507 pulse_position[1] = ((fixed_index >> 3) & 7) * 5 + track_position[pulse_subset + 1]; | |
508 fixed_sparse->n = 2; | |
509 } else if (mode == MODE_5k9) { | |
510 pulse_subset = ((fixed_index & 1) << 1) + 1; | |
511 pulse_position[0] = ((fixed_index >> 1) & 7) * 5 + pulse_subset; | |
512 pulse_subset = (fixed_index >> 4) & 3; | |
513 pulse_position[1] = ((fixed_index >> 6) & 7) * 5 + pulse_subset + (pulse_subset == 3 ? 1 : 0); | |
514 fixed_sparse->n = pulse_position[0] == pulse_position[1] ? 1 : 2; | |
515 } else if (mode == MODE_6k7) { | |
516 pulse_position[0] = (fixed_index & 7) * 5; | |
517 pulse_subset = (fixed_index >> 2) & 2; | |
518 pulse_position[1] = ((fixed_index >> 4) & 7) * 5 + pulse_subset + 1; | |
519 pulse_subset = (fixed_index >> 6) & 2; | |
520 pulse_position[2] = ((fixed_index >> 8) & 7) * 5 + pulse_subset + 2; | |
521 fixed_sparse->n = 3; | |
522 } else { // mode <= MODE_7k95 | |
523 pulse_position[0] = gray_decode[ fixed_index & 7]; | |
524 pulse_position[1] = gray_decode[(fixed_index >> 3) & 7] + 1; | |
525 pulse_position[2] = gray_decode[(fixed_index >> 6) & 7] + 2; | |
526 pulse_subset = (fixed_index >> 9) & 1; | |
527 pulse_position[3] = gray_decode[(fixed_index >> 10) & 7] + pulse_subset + 3; | |
528 fixed_sparse->n = 4; | |
529 } | |
530 for (i = 0; i < fixed_sparse->n; i++) | |
531 fixed_sparse->y[i] = (pulses[1] >> i) & 1 ? 1.0 : -1.0; | |
532 } | |
533 } | |
534 | |
535 /** | |
536 * Apply pitch lag to obtain the sharpened fixed vector (section 6.1.2) | |
537 * | |
538 * @param p the context | |
539 * @param subframe unpacked amr subframe | |
540 * @param mode mode of the current frame | |
541 * @param fixed_sparse sparse respresentation of the fixed vector | |
542 */ | |
543 static void pitch_sharpening(AMRContext *p, int subframe, enum Mode mode, | |
544 AMRFixed *fixed_sparse) | |
545 { | |
546 // The spec suggests the current pitch gain is always used, but in other | |
547 // modes the pitch and codebook gains are joinly quantized (sec 5.8.2) | |
548 // so the codebook gain cannot depend on the quantized pitch gain. | |
549 if (mode == MODE_12k2) | |
550 p->beta = FFMIN(p->pitch_gain[4], 1.0); | |
551 | |
552 fixed_sparse->pitch_lag = p->pitch_lag_int; | |
553 fixed_sparse->pitch_fac = p->beta; | |
554 | |
555 // Save pitch sharpening factor for the next subframe | |
556 // MODE_4k75 only updates on the 2nd and 4th subframes - this follows from | |
557 // the fact that the gains for two subframes are jointly quantized. | |
558 if (mode != MODE_4k75 || subframe & 1) | |
559 p->beta = av_clipf(p->pitch_gain[4], 0.0, SHARP_MAX); | |
560 } | |
561 /// @} | |
562 | |
563 | |
564 /// @defgroup amr_gain_decoding AMR gain decoding functions | |
565 /// @{ | |
566 | |
567 /** | |
568 * fixed gain smoothing | |
569 * Note that where the spec specifies the "spectrum in the q domain" | |
570 * in section 6.1.4, in fact frequencies should be used. | |
571 * | |
572 * @param p the context | |
573 * @param lsf LSFs for the current subframe, in the range [0,1] | |
574 * @param lsf_avg averaged LSFs | |
575 * @param mode mode of the current frame | |
576 * | |
577 * @return fixed gain smoothed | |
578 */ | |
579 static float fixed_gain_smooth(AMRContext *p , const float *lsf, | |
580 const float *lsf_avg, const enum Mode mode) | |
581 { | |
582 float diff = 0.0; | |
583 int i; | |
584 | |
585 for (i = 0; i < LP_FILTER_ORDER; i++) | |
586 diff += fabs(lsf_avg[i] - lsf[i]) / lsf_avg[i]; | |
587 | |
588 // If diff is large for ten subframes, disable smoothing for a 40-subframe | |
589 // hangover period. | |
590 p->diff_count++; | |
591 if (diff <= 0.65) | |
592 p->diff_count = 0; | |
593 | |
594 if (p->diff_count > 10) { | |
595 p->hang_count = 0; | |
596 p->diff_count--; // don't let diff_count overflow | |
597 } | |
598 | |
599 if (p->hang_count < 40) { | |
600 p->hang_count++; | |
601 } else if (mode < MODE_7k4 || mode == MODE_10k2) { | |
602 const float smoothing_factor = av_clipf(4.0 * diff - 1.6, 0.0, 1.0); | |
603 const float fixed_gain_mean = (p->fixed_gain[0] + p->fixed_gain[1] + | |
604 p->fixed_gain[2] + p->fixed_gain[3] + | |
605 p->fixed_gain[4]) * 0.2; | |
606 return smoothing_factor * p->fixed_gain[4] + | |
607 (1.0 - smoothing_factor) * fixed_gain_mean; | |
608 } | |
609 return p->fixed_gain[4]; | |
610 } | |
611 | |
612 /** | |
613 * Decode pitch gain and fixed gain factor (part of section 6.1.3). | |
614 * | |
615 * @param p the context | |
616 * @param amr_subframe unpacked amr subframe | |
617 * @param mode mode of the current frame | |
618 * @param subframe current subframe number | |
619 * @param fixed_gain_factor decoded gain correction factor | |
620 */ | |
621 static void decode_gains(AMRContext *p, const AMRNBSubframe *amr_subframe, | |
622 const enum Mode mode, const int subframe, | |
623 float *fixed_gain_factor) | |
624 { | |
625 if (mode == MODE_12k2 || mode == MODE_7k95) { | |
626 p->pitch_gain[4] = qua_gain_pit [amr_subframe->p_gain ] | |
627 * (1.0 / 16384.0); | |
628 *fixed_gain_factor = qua_gain_code[amr_subframe->fixed_gain] | |
629 * (1.0 / 2048.0); | |
630 } else { | |
631 const uint16_t *gains; | |
632 | |
633 if (mode >= MODE_6k7) { | |
634 gains = gains_high[amr_subframe->p_gain]; | |
635 } else if (mode >= MODE_5k15) { | |
636 gains = gains_low [amr_subframe->p_gain]; | |
637 } else { | |
638 // gain index is only coded in subframes 0,2 for MODE_4k75 | |
639 gains = gains_MODE_4k75[(p->frame.subframe[subframe & 2].p_gain << 1) + (subframe & 1)]; | |
640 } | |
641 | |
642 p->pitch_gain[4] = gains[0] * (1.0 / 16384.0); | |
643 *fixed_gain_factor = gains[1] * (1.0 / 4096.0); | |
644 } | |
645 } | |
646 | |
647 /// @} | |
648 | |
649 | |
650 /// @defgroup amr_pre_processing AMR pre-processing functions | |
651 /// @{ | |
652 | |
653 /** | |
654 * Circularly convolve a sparse fixed vector with a phase dispersion impulse | |
655 * response filter (D.6.2 of G.729 and 6.1.5 of AMR). | |
656 * | |
657 * @param out vector with filter applied | |
658 * @param in source vector | |
659 * @param filter phase filter coefficients | |
660 * | |
661 * out[n] = sum(i,0,len-1){ in[i] * filter[(len + n - i)%len] } | |
662 */ | |
663 static void apply_ir_filter(float *out, const AMRFixed *in, | |
664 const float *filter) | |
665 { | |
666 float filter1[AMR_SUBFRAME_SIZE], //!< filters at pitch lag*1 and *2 | |
667 filter2[AMR_SUBFRAME_SIZE]; | |
668 int lag = in->pitch_lag; | |
669 float fac = in->pitch_fac; | |
670 int i; | |
671 | |
672 if (lag < AMR_SUBFRAME_SIZE) { | |
673 ff_celp_circ_addf(filter1, filter, filter, lag, fac, | |
674 AMR_SUBFRAME_SIZE); | |
675 | |
676 if (lag < AMR_SUBFRAME_SIZE >> 1) | |
677 ff_celp_circ_addf(filter2, filter, filter1, lag, fac, | |
678 AMR_SUBFRAME_SIZE); | |
679 } | |
680 | |
681 memset(out, 0, sizeof(float) * AMR_SUBFRAME_SIZE); | |
682 for (i = 0; i < in->n; i++) { | |
683 int x = in->x[i]; | |
684 float y = in->y[i]; | |
685 const float *filterp; | |
686 | |
687 if (x >= AMR_SUBFRAME_SIZE - lag) { | |
688 filterp = filter; | |
689 } else if (x >= AMR_SUBFRAME_SIZE - (lag << 1)) { | |
690 filterp = filter1; | |
691 } else | |
692 filterp = filter2; | |
693 | |
694 ff_celp_circ_addf(out, out, filterp, x, y, AMR_SUBFRAME_SIZE); | |
695 } | |
696 } | |
697 | |
698 /** | |
699 * Reduce fixed vector sparseness by smoothing with one of three IR filters. | |
700 * Also know as "adaptive phase dispersion". | |
701 * | |
702 * This implements 3GPP TS 26.090 section 6.1(5). | |
703 * | |
704 * @param p the context | |
705 * @param fixed_sparse algebraic codebook vector | |
706 * @param fixed_vector unfiltered fixed vector | |
707 * @param fixed_gain smoothed gain | |
708 * @param out space for modified vector if necessary | |
709 */ | |
710 static const float *anti_sparseness(AMRContext *p, AMRFixed *fixed_sparse, | |
711 const float *fixed_vector, | |
712 float fixed_gain, float *out) | |
713 { | |
714 int ir_filter_nr; | |
715 | |
716 if (p->pitch_gain[4] < 0.6) { | |
717 ir_filter_nr = 0; // strong filtering | |
718 } else if (p->pitch_gain[4] < 0.9) { | |
719 ir_filter_nr = 1; // medium filtering | |
720 } else | |
721 ir_filter_nr = 2; // no filtering | |
722 | |
723 // detect 'onset' | |
724 if (fixed_gain > 2.0 * p->prev_sparse_fixed_gain) { | |
725 p->ir_filter_onset = 2; | |
726 } else if (p->ir_filter_onset) | |
727 p->ir_filter_onset--; | |
728 | |
729 if (!p->ir_filter_onset) { | |
730 int i, count = 0; | |
731 | |
732 for (i = 0; i < 5; i++) | |
733 if (p->pitch_gain[i] < 0.6) | |
734 count++; | |
735 if (count > 2) | |
736 ir_filter_nr = 0; | |
737 | |
738 if (ir_filter_nr > p->prev_ir_filter_nr + 1) | |
739 ir_filter_nr--; | |
740 } else if (ir_filter_nr < 2) | |
741 ir_filter_nr++; | |
742 | |
743 // Disable filtering for very low level of fixed_gain. | |
744 // Note this step is not specified in the technical description but is in | |
745 // the reference source in the function Ph_disp. | |
746 if (fixed_gain < 5.0) | |
747 ir_filter_nr = 2; | |
748 | |
749 if (p->cur_frame_mode != MODE_7k4 && p->cur_frame_mode < MODE_10k2 | |
750 && ir_filter_nr < 2) { | |
751 apply_ir_filter(out, fixed_sparse, | |
752 (p->cur_frame_mode == MODE_7k95 ? | |
753 ir_filters_lookup_MODE_7k95 : | |
754 ir_filters_lookup)[ir_filter_nr]); | |
755 fixed_vector = out; | |
756 } | |
757 | |
758 // update ir filter strength history | |
759 p->prev_ir_filter_nr = ir_filter_nr; | |
760 p->prev_sparse_fixed_gain = fixed_gain; | |
761 | |
762 return fixed_vector; | |
763 } | |
764 | |
765 /// @} | |
766 | |
767 | |
768 /// @defgroup amr_synthesis AMR synthesis functions | |
769 /// @{ | |
770 | |
771 /** | |
772 * Conduct 10th order linear predictive coding synthesis. | |
773 * | |
774 * @param p pointer to the AMRContext | |
775 * @param lpc pointer to the LPC coefficients | |
776 * @param fixed_gain fixed codebook gain for synthesis | |
777 * @param fixed_vector algebraic codebook vector | |
778 * @param samples pointer to the output speech samples | |
779 * @param overflow 16-bit overflow flag | |
780 */ | |
781 static int synthesis(AMRContext *p, float *lpc, | |
782 float fixed_gain, const float *fixed_vector, | |
783 float *samples, uint8_t overflow) | |
784 { | |
11652
8b6f3d3b55cb
Move clipping of audio samples (for those codecs outputting float) from decoder
rbultje
parents:
11648
diff
changeset
|
785 int i; |
11235 | 786 float excitation[AMR_SUBFRAME_SIZE]; |
787 | |
788 // if an overflow has been detected, the pitch vector is scaled down by a | |
789 // factor of 4 | |
790 if (overflow) | |
791 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
792 p->pitch_vector[i] *= 0.25; | |
793 | |
794 ff_weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector, | |
795 p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE); | |
796 | |
797 // emphasize pitch vector contribution | |
798 if (p->pitch_gain[4] > 0.5 && !overflow) { | |
799 float energy = ff_dot_productf(excitation, excitation, | |
800 AMR_SUBFRAME_SIZE); | |
801 float pitch_factor = | |
802 p->pitch_gain[4] * | |
803 (p->cur_frame_mode == MODE_12k2 ? | |
804 0.25 * FFMIN(p->pitch_gain[4], 1.0) : | |
805 0.5 * FFMIN(p->pitch_gain[4], SHARP_MAX)); | |
806 | |
807 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
808 excitation[i] += pitch_factor * p->pitch_vector[i]; | |
809 | |
810 ff_scale_vector_to_given_sum_of_squares(excitation, excitation, energy, | |
811 AMR_SUBFRAME_SIZE); | |
812 } | |
813 | |
814 ff_celp_lp_synthesis_filterf(samples, lpc, excitation, AMR_SUBFRAME_SIZE, | |
815 LP_FILTER_ORDER); | |
816 | |
817 // detect overflow | |
818 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
819 if (fabsf(samples[i]) > AMR_SAMPLE_BOUND) { | |
11652
8b6f3d3b55cb
Move clipping of audio samples (for those codecs outputting float) from decoder
rbultje
parents:
11648
diff
changeset
|
820 return 1; |
11235 | 821 } |
822 | |
11652
8b6f3d3b55cb
Move clipping of audio samples (for those codecs outputting float) from decoder
rbultje
parents:
11648
diff
changeset
|
823 return 0; |
11235 | 824 } |
825 | |
826 /// @} | |
827 | |
828 | |
829 /// @defgroup amr_update AMR update functions | |
830 /// @{ | |
831 | |
832 /** | |
833 * Update buffers and history at the end of decoding a subframe. | |
834 * | |
835 * @param p pointer to the AMRContext | |
836 */ | |
837 static void update_state(AMRContext *p) | |
838 { | |
839 memcpy(p->prev_lsp_sub4, p->lsp[3], LP_FILTER_ORDER * sizeof(p->lsp[3][0])); | |
840 | |
841 memmove(&p->excitation_buf[0], &p->excitation_buf[AMR_SUBFRAME_SIZE], | |
842 (PITCH_DELAY_MAX + LP_FILTER_ORDER + 1) * sizeof(float)); | |
843 | |
844 memmove(&p->pitch_gain[0], &p->pitch_gain[1], 4 * sizeof(float)); | |
845 memmove(&p->fixed_gain[0], &p->fixed_gain[1], 4 * sizeof(float)); | |
846 | |
847 memmove(&p->samples_in[0], &p->samples_in[AMR_SUBFRAME_SIZE], | |
848 LP_FILTER_ORDER * sizeof(float)); | |
849 } | |
850 | |
851 /// @} | |
852 | |
853 | |
854 /// @defgroup amr_postproc AMR Post processing functions | |
855 /// @{ | |
856 | |
857 /** | |
858 * Get the tilt factor of a formant filter from its transfer function | |
859 * | |
860 * @param lpc_n LP_FILTER_ORDER coefficients of the numerator | |
861 * @param lpc_d LP_FILTER_ORDER coefficients of the denominator | |
862 */ | |
863 static float tilt_factor(float *lpc_n, float *lpc_d) | |
864 { | |
865 float rh0, rh1; // autocorrelation at lag 0 and 1 | |
866 | |
867 // LP_FILTER_ORDER prior zeros are needed for ff_celp_lp_synthesis_filterf | |
868 float impulse_buffer[LP_FILTER_ORDER + AMR_TILT_RESPONSE] = { 0 }; | |
869 float *hf = impulse_buffer + LP_FILTER_ORDER; // start of impulse response | |
870 | |
871 hf[0] = 1.0; | |
872 memcpy(hf + 1, lpc_n, sizeof(float) * LP_FILTER_ORDER); | |
873 ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE, | |
874 LP_FILTER_ORDER); | |
875 | |
876 rh0 = ff_dot_productf(hf, hf, AMR_TILT_RESPONSE); | |
877 rh1 = ff_dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1); | |
878 | |
879 // The spec only specifies this check for 12.2 and 10.2 kbit/s | |
880 // modes. But in the ref source the tilt is always non-negative. | |
881 return rh1 >= 0.0 ? rh1 / rh0 * AMR_TILT_GAMMA_T : 0.0; | |
882 } | |
883 | |
884 /** | |
885 * Perform adaptive post-filtering to enhance the quality of the speech. | |
886 * See section 6.2.1. | |
887 * | |
888 * @param p pointer to the AMRContext | |
889 * @param lpc interpolated LP coefficients for this subframe | |
890 * @param buf_out output of the filter | |
891 */ | |
892 static void postfilter(AMRContext *p, float *lpc, float *buf_out) | |
893 { | |
894 int i; | |
895 float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input | |
896 | |
897 float speech_gain = ff_dot_productf(samples, samples, | |
898 AMR_SUBFRAME_SIZE); | |
899 | |
900 float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter | |
901 const float *gamma_n, *gamma_d; // Formant filter factor table | |
902 float lpc_n[LP_FILTER_ORDER], lpc_d[LP_FILTER_ORDER]; // Transfer function coefficients | |
903 | |
904 if (p->cur_frame_mode == MODE_12k2 || p->cur_frame_mode == MODE_10k2) { | |
905 gamma_n = ff_pow_0_7; | |
906 gamma_d = ff_pow_0_75; | |
907 } else { | |
908 gamma_n = ff_pow_0_55; | |
909 gamma_d = ff_pow_0_7; | |
910 } | |
911 | |
912 for (i = 0; i < LP_FILTER_ORDER; i++) { | |
913 lpc_n[i] = lpc[i] * gamma_n[i]; | |
914 lpc_d[i] = lpc[i] * gamma_d[i]; | |
915 } | |
916 | |
917 memcpy(pole_out, p->postfilter_mem, sizeof(float) * LP_FILTER_ORDER); | |
918 ff_celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples, | |
919 AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); | |
920 memcpy(p->postfilter_mem, pole_out + AMR_SUBFRAME_SIZE, | |
921 sizeof(float) * LP_FILTER_ORDER); | |
922 | |
923 ff_celp_lp_zero_synthesis_filterf(buf_out, lpc_n, | |
924 pole_out + LP_FILTER_ORDER, | |
925 AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); | |
926 | |
927 ff_tilt_compensation(&p->tilt_mem, tilt_factor(lpc_n, lpc_d), buf_out, | |
928 AMR_SUBFRAME_SIZE); | |
929 | |
11647
26aabf52f578
Split the input/output data arguments to ff_adaptive_gain_control().
rbultje
parents:
11645
diff
changeset
|
930 ff_adaptive_gain_control(buf_out, buf_out, speech_gain, AMR_SUBFRAME_SIZE, |
11462 | 931 AMR_AGC_ALPHA, &p->postfilter_agc); |
11235 | 932 } |
933 | |
934 /// @} | |
935 | |
936 static int amrnb_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |
937 AVPacket *avpkt) | |
938 { | |
939 | |
940 AMRContext *p = avctx->priv_data; // pointer to private data | |
941 const uint8_t *buf = avpkt->data; | |
942 int buf_size = avpkt->size; | |
943 float *buf_out = data; // pointer to the output data buffer | |
944 int i, subframe; | |
945 float fixed_gain_factor; | |
946 AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness processing | |
947 float spare_vector[AMR_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing | |
948 float synth_fixed_gain; // the fixed gain that synthesis should use | |
949 const float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use | |
950 | |
951 p->cur_frame_mode = unpack_bitstream(p, buf, buf_size); | |
952 if (p->cur_frame_mode == MODE_DTX) { | |
953 av_log_missing_feature(avctx, "dtx mode", 1); | |
954 return -1; | |
955 } | |
956 | |
957 if (p->cur_frame_mode == MODE_12k2) { | |
958 lsf2lsp_5(p); | |
959 } else | |
960 lsf2lsp_3(p); | |
961 | |
962 for (i = 0; i < 4; i++) | |
963 ff_acelp_lspd2lpc(p->lsp[i], p->lpc[i], 5); | |
964 | |
965 for (subframe = 0; subframe < 4; subframe++) { | |
966 const AMRNBSubframe *amr_subframe = &p->frame.subframe[subframe]; | |
967 | |
968 decode_pitch_vector(p, amr_subframe, subframe); | |
969 | |
970 decode_fixed_sparse(&fixed_sparse, amr_subframe->pulses, | |
971 p->cur_frame_mode, subframe); | |
972 | |
973 // The fixed gain (section 6.1.3) depends on the fixed vector | |
974 // (section 6.1.2), but the fixed vector calculation uses | |
975 // pitch sharpening based on the on the pitch gain (section 6.1.3). | |
976 // So the correct order is: pitch gain, pitch sharpening, fixed gain. | |
977 decode_gains(p, amr_subframe, p->cur_frame_mode, subframe, | |
978 &fixed_gain_factor); | |
979 | |
980 pitch_sharpening(p, subframe, p->cur_frame_mode, &fixed_sparse); | |
981 | |
982 ff_set_fixed_vector(p->fixed_vector, &fixed_sparse, 1.0, | |
983 AMR_SUBFRAME_SIZE); | |
984 | |
985 p->fixed_gain[4] = | |
986 ff_amr_set_fixed_gain(fixed_gain_factor, | |
987 ff_dot_productf(p->fixed_vector, p->fixed_vector, | |
988 AMR_SUBFRAME_SIZE)/AMR_SUBFRAME_SIZE, | |
989 p->prediction_error, | |
990 energy_mean[p->cur_frame_mode], energy_pred_fac); | |
991 | |
992 // The excitation feedback is calculated without any processing such | |
993 // as fixed gain smoothing. This isn't mentioned in the specification. | |
994 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
995 p->excitation[i] *= p->pitch_gain[4]; | |
996 ff_set_fixed_vector(p->excitation, &fixed_sparse, p->fixed_gain[4], | |
997 AMR_SUBFRAME_SIZE); | |
998 | |
999 // In the ref decoder, excitation is stored with no fractional bits. | |
1000 // This step prevents buzz in silent periods. The ref encoder can | |
1001 // emit long sequences with pitch factor greater than one. This | |
1002 // creates unwanted feedback if the excitation vector is nonzero. | |
1003 // (e.g. test sequence T19_795.COD in 3GPP TS 26.074) | |
1004 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
1005 p->excitation[i] = truncf(p->excitation[i]); | |
1006 | |
1007 // Smooth fixed gain. | |
1008 // The specification is ambiguous, but in the reference source, the | |
1009 // smoothed value is NOT fed back into later fixed gain smoothing. | |
1010 synth_fixed_gain = fixed_gain_smooth(p, p->lsf_q[subframe], | |
1011 p->lsf_avg, p->cur_frame_mode); | |
1012 | |
1013 synth_fixed_vector = anti_sparseness(p, &fixed_sparse, p->fixed_vector, | |
1014 synth_fixed_gain, spare_vector); | |
1015 | |
1016 if (synthesis(p, p->lpc[subframe], synth_fixed_gain, | |
1017 synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 0)) | |
1018 // overflow detected -> rerun synthesis scaling pitch vector down | |
1019 // by a factor of 4, skipping pitch vector contribution emphasis | |
1020 // and adaptive gain control | |
1021 synthesis(p, p->lpc[subframe], synth_fixed_gain, | |
1022 synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 1); | |
1023 | |
1024 postfilter(p, p->lpc[subframe], buf_out + subframe * AMR_SUBFRAME_SIZE); | |
1025 | |
1026 // update buffers and history | |
1027 ff_clear_fixed_vector(p->fixed_vector, &fixed_sparse, AMR_SUBFRAME_SIZE); | |
1028 update_state(p); | |
1029 } | |
1030 | |
11648
0516f4062307
Split input/output data arguments to ff_acelp_apply_order_2_transfer_function().
rbultje
parents:
11647
diff
changeset
|
1031 ff_acelp_apply_order_2_transfer_function(buf_out, buf_out, highpass_zeros, |
11676
ceec2fb08b8e
amrnbdec: Apply AMR_SAMPLE_SCALE when finishing the decoder output
mstorsjo
parents:
11652
diff
changeset
|
1032 highpass_poles, |
ceec2fb08b8e
amrnbdec: Apply AMR_SAMPLE_SCALE when finishing the decoder output
mstorsjo
parents:
11652
diff
changeset
|
1033 highpass_gain * AMR_SAMPLE_SCALE, |
11235 | 1034 p->high_pass_mem, AMR_BLOCK_SIZE); |
1035 | |
1036 /* Update averaged lsf vector (used for fixed gain smoothing). | |
1037 * | |
1038 * Note that lsf_avg should not incorporate the current frame's LSFs | |
1039 * for fixed_gain_smooth. | |
1040 * The specification has an incorrect formula: the reference decoder uses | |
1041 * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */ | |
1042 ff_weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], | |
1043 0.84, 0.16, LP_FILTER_ORDER); | |
1044 | |
1045 /* report how many samples we got */ | |
1046 *data_size = AMR_BLOCK_SIZE * sizeof(float); | |
1047 | |
1048 /* return the amount of bytes consumed if everything was OK */ | |
1049 return frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC | |
1050 } | |
1051 | |
1052 | |
1053 AVCodec amrnb_decoder = { | |
1054 .name = "amrnb", | |
11560
8a4984c5cacc
Define AVMediaType enum, and use it instead of enum CodecType, which
stefano
parents:
11462
diff
changeset
|
1055 .type = AVMEDIA_TYPE_AUDIO, |
11235 | 1056 .id = CODEC_ID_AMR_NB, |
1057 .priv_data_size = sizeof(AMRContext), | |
1058 .init = amrnb_decode_init, | |
1059 .decode = amrnb_decode_frame, | |
1060 .long_name = NULL_IF_CONFIG_SMALL("Adaptive Multi-Rate NarrowBand"), | |
1061 .sample_fmts = (enum SampleFormat[]){SAMPLE_FMT_FLT,SAMPLE_FMT_NONE}, | |
1062 }; |