Mercurial > libavcodec.hg
annotate amrnbdec.c @ 12494:94eaea836bf4 libavcodec
Check avctx width/height more thoroughly (e.g. all values 0 except width would
have been accepted before).
Also do not fail if they are invalid but instead override them to 0.
This allows decoding e.g. MPEG video when only the container values are corrupted.
For encoding a value of 0,0 of course makes no sense, but was allowed
through before and will be caught by an extra check in the encode function.
author | reimar |
---|---|
date | Wed, 15 Sep 2010 04:46:55 +0000 |
parents | b42e02e9bf2b |
children |
rev | line source |
---|---|
11235 | 1 /* |
2 * AMR narrowband decoder | |
3 * Copyright (c) 2006-2007 Robert Swain | |
4 * Copyright (c) 2009 Colin McQuillan | |
5 * | |
6 * This file is part of FFmpeg. | |
7 * | |
8 * FFmpeg is free software; you can redistribute it and/or | |
9 * modify it under the terms of the GNU Lesser General Public | |
10 * License as published by the Free Software Foundation; either | |
11 * version 2.1 of the License, or (at your option) any later version. | |
12 * | |
13 * FFmpeg is distributed in the hope that it will be useful, | |
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 * Lesser General Public License for more details. | |
17 * | |
18 * You should have received a copy of the GNU Lesser General Public | |
19 * License along with FFmpeg; if not, write to the Free Software | |
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 */ | |
22 | |
23 | |
24 /** | |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11560
diff
changeset
|
25 * @file |
11235 | 26 * AMR narrowband decoder |
27 * | |
28 * This decoder uses floats for simplicity and so is not bit-exact. One | |
29 * difference is that differences in phase can accumulate. The test sequences | |
30 * in 3GPP TS 26.074 can still be useful. | |
31 * | |
32 * - Comparing this file's output to the output of the ref decoder gives a | |
33 * PSNR of 30 to 80. Plotting the output samples shows a difference in | |
34 * phase in some areas. | |
35 * | |
36 * - Comparing both decoders against their input, this decoder gives a similar | |
37 * PSNR. If the test sequence homing frames are removed (this decoder does | |
38 * not detect them), the PSNR is at least as good as the reference on 140 | |
39 * out of 169 tests. | |
40 */ | |
41 | |
42 | |
43 #include <string.h> | |
44 #include <math.h> | |
45 | |
46 #include "avcodec.h" | |
47 #include "get_bits.h" | |
48 #include "libavutil/common.h" | |
49 #include "celp_math.h" | |
50 #include "celp_filters.h" | |
51 #include "acelp_filters.h" | |
52 #include "acelp_vectors.h" | |
53 #include "acelp_pitch_delay.h" | |
54 #include "lsp.h" | |
12485
b42e02e9bf2b
Move AMR-NB frame unpacking code to a common file so it can be reused in
vitor
parents:
12464
diff
changeset
|
55 #include "amr.h" |
11235 | 56 |
57 #include "amrnbdata.h" | |
58 | |
59 #define AMR_BLOCK_SIZE 160 ///< samples per frame | |
60 #define AMR_SAMPLE_BOUND 32768.0 ///< threshold for synthesis overflow | |
61 | |
62 /** | |
63 * Scale from constructed speech to [-1,1] | |
64 * | |
65 * AMR is designed to produce 16-bit PCM samples (3GPP TS 26.090 4.2) but | |
66 * upscales by two (section 6.2.2). | |
67 * | |
68 * Fundamentally, this scale is determined by energy_mean through | |
69 * the fixed vector contribution to the excitation vector. | |
70 */ | |
71 #define AMR_SAMPLE_SCALE (2.0 / 32768.0) | |
72 | |
73 /** Prediction factor for 12.2kbit/s mode */ | |
74 #define PRED_FAC_MODE_12k2 0.65 | |
75 | |
76 #define LSF_R_FAC (8000.0 / 32768.0) ///< LSF residual tables to Hertz | |
77 #define MIN_LSF_SPACING (50.0488 / 8000.0) ///< Ensures stability of LPC filter | |
78 #define PITCH_LAG_MIN_MODE_12k2 18 ///< Lower bound on decoded lag search in 12.2kbit/s mode | |
79 | |
80 /** Initial energy in dB. Also used for bad frames (unimplemented). */ | |
81 #define MIN_ENERGY -14.0 | |
82 | |
83 /** Maximum sharpening factor | |
84 * | |
85 * The specification says 0.8, which should be 13107, but the reference C code | |
86 * uses 13017 instead. (Amusingly the same applies to SHARP_MAX in g729dec.c.) | |
87 */ | |
88 #define SHARP_MAX 0.79449462890625 | |
89 | |
90 /** Number of impulse response coefficients used for tilt factor */ | |
91 #define AMR_TILT_RESPONSE 22 | |
92 /** Tilt factor = 1st reflection coefficient * gamma_t */ | |
93 #define AMR_TILT_GAMMA_T 0.8 | |
94 /** Adaptive gain control factor used in post-filter */ | |
95 #define AMR_AGC_ALPHA 0.9 | |
96 | |
97 typedef struct AMRContext { | |
98 AMRNBFrame frame; ///< decoded AMR parameters (lsf coefficients, codebook indexes, etc) | |
99 uint8_t bad_frame_indicator; ///< bad frame ? 1 : 0 | |
100 enum Mode cur_frame_mode; | |
101 | |
102 int16_t prev_lsf_r[LP_FILTER_ORDER]; ///< residual LSF vector from previous subframe | |
103 double lsp[4][LP_FILTER_ORDER]; ///< lsp vectors from current frame | |
104 double prev_lsp_sub4[LP_FILTER_ORDER]; ///< lsp vector for the 4th subframe of the previous frame | |
105 | |
106 float lsf_q[4][LP_FILTER_ORDER]; ///< Interpolated LSF vector for fixed gain smoothing | |
107 float lsf_avg[LP_FILTER_ORDER]; ///< vector of averaged lsf vector | |
108 | |
109 float lpc[4][LP_FILTER_ORDER]; ///< lpc coefficient vectors for 4 subframes | |
110 | |
111 uint8_t pitch_lag_int; ///< integer part of pitch lag from current subframe | |
112 | |
113 float excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1 + AMR_SUBFRAME_SIZE]; ///< current excitation and all necessary excitation history | |
114 float *excitation; ///< pointer to the current excitation vector in excitation_buf | |
115 | |
116 float pitch_vector[AMR_SUBFRAME_SIZE]; ///< adaptive code book (pitch) vector | |
117 float fixed_vector[AMR_SUBFRAME_SIZE]; ///< algebraic codebook (fixed) vector (must be kept zero between frames) | |
118 | |
119 float prediction_error[4]; ///< quantified prediction errors {20log10(^gamma_gc)} for previous four subframes | |
120 float pitch_gain[5]; ///< quantified pitch gains for the current and previous four subframes | |
121 float fixed_gain[5]; ///< quantified fixed gains for the current and previous four subframes | |
122 | |
123 float beta; ///< previous pitch_gain, bounded by [0.0,SHARP_MAX] | |
124 uint8_t diff_count; ///< the number of subframes for which diff has been above 0.65 | |
125 uint8_t hang_count; ///< the number of subframes since a hangover period started | |
126 | |
127 float prev_sparse_fixed_gain; ///< previous fixed gain; used by anti-sparseness processing to determine "onset" | |
128 uint8_t prev_ir_filter_nr; ///< previous impulse response filter "impNr": 0 - strong, 1 - medium, 2 - none | |
129 uint8_t ir_filter_onset; ///< flag for impulse response filter strength | |
130 | |
131 float postfilter_mem[10]; ///< previous intermediate values in the formant filter | |
132 float tilt_mem; ///< previous input to tilt compensation filter | |
133 float postfilter_agc; ///< previous factor used for adaptive gain control | |
134 float high_pass_mem[2]; ///< previous intermediate values in the high-pass filter | |
135 | |
136 float samples_in[LP_FILTER_ORDER + AMR_SUBFRAME_SIZE]; ///< floating point samples | |
137 | |
138 } AMRContext; | |
139 | |
140 /** Double version of ff_weighted_vector_sumf() */ | |
141 static void weighted_vector_sumd(double *out, const double *in_a, | |
142 const double *in_b, double weight_coeff_a, | |
143 double weight_coeff_b, int length) | |
144 { | |
145 int i; | |
146 | |
147 for (i = 0; i < length; i++) | |
148 out[i] = weight_coeff_a * in_a[i] | |
149 + weight_coeff_b * in_b[i]; | |
150 } | |
151 | |
152 static av_cold int amrnb_decode_init(AVCodecContext *avctx) | |
153 { | |
154 AMRContext *p = avctx->priv_data; | |
155 int i; | |
156 | |
157 avctx->sample_fmt = SAMPLE_FMT_FLT; | |
158 | |
159 // p->excitation always points to the same position in p->excitation_buf | |
160 p->excitation = &p->excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1]; | |
161 | |
162 for (i = 0; i < LP_FILTER_ORDER; i++) { | |
163 p->prev_lsp_sub4[i] = lsp_sub4_init[i] * 1000 / (float)(1 << 15); | |
164 p->lsf_avg[i] = p->lsf_q[3][i] = lsp_avg_init[i] / (float)(1 << 15); | |
165 } | |
166 | |
167 for (i = 0; i < 4; i++) | |
168 p->prediction_error[i] = MIN_ENERGY; | |
169 | |
170 return 0; | |
171 } | |
172 | |
173 | |
174 /** | |
175 * Unpack an RFC4867 speech frame into the AMR frame mode and parameters. | |
176 * | |
177 * The order of speech bits is specified by 3GPP TS 26.101. | |
178 * | |
179 * @param p the context | |
180 * @param buf pointer to the input buffer | |
181 * @param buf_size size of the input buffer | |
182 * | |
183 * @return the frame mode | |
184 */ | |
185 static enum Mode unpack_bitstream(AMRContext *p, const uint8_t *buf, | |
186 int buf_size) | |
187 { | |
188 GetBitContext gb; | |
189 enum Mode mode; | |
190 | |
191 init_get_bits(&gb, buf, buf_size * 8); | |
192 | |
193 // Decode the first octet. | |
194 skip_bits(&gb, 1); // padding bit | |
195 mode = get_bits(&gb, 4); // frame type | |
196 p->bad_frame_indicator = !get_bits1(&gb); // quality bit | |
197 skip_bits(&gb, 2); // two padding bits | |
198 | |
12485
b42e02e9bf2b
Move AMR-NB frame unpacking code to a common file so it can be reused in
vitor
parents:
12464
diff
changeset
|
199 if (mode < MODE_DTX) |
b42e02e9bf2b
Move AMR-NB frame unpacking code to a common file so it can be reused in
vitor
parents:
12464
diff
changeset
|
200 ff_amr_bit_reorder((uint16_t *) &p->frame, sizeof(AMRNBFrame), buf + 1, |
b42e02e9bf2b
Move AMR-NB frame unpacking code to a common file so it can be reused in
vitor
parents:
12464
diff
changeset
|
201 amr_unpacking_bitmaps_per_mode[mode]); |
11235 | 202 |
203 return mode; | |
204 } | |
205 | |
206 | |
207 /// @defgroup amr_lpc_decoding AMR pitch LPC coefficient decoding functions | |
208 /// @{ | |
209 | |
210 /** | |
211 * Interpolate the LSF vector (used for fixed gain smoothing). | |
212 * The interpolation is done over all four subframes even in MODE_12k2. | |
213 * | |
214 * @param[in,out] lsf_q LSFs in [0,1] for each subframe | |
215 * @param[in] lsf_new New LSFs in [0,1] for subframe 4 | |
216 */ | |
217 static void interpolate_lsf(float lsf_q[4][LP_FILTER_ORDER], float *lsf_new) | |
218 { | |
219 int i; | |
220 | |
221 for (i = 0; i < 4; i++) | |
222 ff_weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new, | |
223 0.25 * (3 - i), 0.25 * (i + 1), | |
224 LP_FILTER_ORDER); | |
225 } | |
226 | |
227 /** | |
228 * Decode a set of 5 split-matrix quantized lsf indexes into an lsp vector. | |
229 * | |
230 * @param p the context | |
231 * @param lsp output LSP vector | |
232 * @param lsf_no_r LSF vector without the residual vector added | |
233 * @param lsf_quantizer pointers to LSF dictionary tables | |
234 * @param quantizer_offset offset in tables | |
235 * @param sign for the 3 dictionary table | |
236 * @param update store data for computing the next frame's LSFs | |
237 */ | |
238 static void lsf2lsp_for_mode12k2(AMRContext *p, double lsp[LP_FILTER_ORDER], | |
239 const float lsf_no_r[LP_FILTER_ORDER], | |
240 const int16_t *lsf_quantizer[5], | |
241 const int quantizer_offset, | |
242 const int sign, const int update) | |
243 { | |
244 int16_t lsf_r[LP_FILTER_ORDER]; // residual LSF vector | |
245 float lsf_q[LP_FILTER_ORDER]; // quantified LSF vector | |
246 int i; | |
247 | |
248 for (i = 0; i < LP_FILTER_ORDER >> 1; i++) | |
249 memcpy(&lsf_r[i << 1], &lsf_quantizer[i][quantizer_offset], | |
250 2 * sizeof(*lsf_r)); | |
251 | |
252 if (sign) { | |
253 lsf_r[4] *= -1; | |
254 lsf_r[5] *= -1; | |
255 } | |
256 | |
257 if (update) | |
258 memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(float)); | |
259 | |
260 for (i = 0; i < LP_FILTER_ORDER; i++) | |
261 lsf_q[i] = lsf_r[i] * (LSF_R_FAC / 8000.0) + lsf_no_r[i] * (1.0 / 8000.0); | |
262 | |
263 ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); | |
264 | |
265 if (update) | |
266 interpolate_lsf(p->lsf_q, lsf_q); | |
267 | |
12464
2dd67ed2f947
Move AMRNB lsf2lsp() function to common code for using in future AMRWB decoder.
vitor
parents:
11676
diff
changeset
|
268 ff_acelp_lsf2lspd(lsp, lsf_q, LP_FILTER_ORDER); |
11235 | 269 } |
270 | |
271 /** | |
272 * Decode a set of 5 split-matrix quantized lsf indexes into 2 lsp vectors. | |
273 * | |
274 * @param p pointer to the AMRContext | |
275 */ | |
276 static void lsf2lsp_5(AMRContext *p) | |
277 { | |
278 const uint16_t *lsf_param = p->frame.lsf; | |
279 float lsf_no_r[LP_FILTER_ORDER]; // LSFs without the residual vector | |
280 const int16_t *lsf_quantizer[5]; | |
281 int i; | |
282 | |
283 lsf_quantizer[0] = lsf_5_1[lsf_param[0]]; | |
284 lsf_quantizer[1] = lsf_5_2[lsf_param[1]]; | |
285 lsf_quantizer[2] = lsf_5_3[lsf_param[2] >> 1]; | |
286 lsf_quantizer[3] = lsf_5_4[lsf_param[3]]; | |
287 lsf_quantizer[4] = lsf_5_5[lsf_param[4]]; | |
288 | |
289 for (i = 0; i < LP_FILTER_ORDER; i++) | |
290 lsf_no_r[i] = p->prev_lsf_r[i] * LSF_R_FAC * PRED_FAC_MODE_12k2 + lsf_5_mean[i]; | |
291 | |
292 lsf2lsp_for_mode12k2(p, p->lsp[1], lsf_no_r, lsf_quantizer, 0, lsf_param[2] & 1, 0); | |
293 lsf2lsp_for_mode12k2(p, p->lsp[3], lsf_no_r, lsf_quantizer, 2, lsf_param[2] & 1, 1); | |
294 | |
295 // interpolate LSP vectors at subframes 1 and 3 | |
296 weighted_vector_sumd(p->lsp[0], p->prev_lsp_sub4, p->lsp[1], 0.5, 0.5, LP_FILTER_ORDER); | |
297 weighted_vector_sumd(p->lsp[2], p->lsp[1] , p->lsp[3], 0.5, 0.5, LP_FILTER_ORDER); | |
298 } | |
299 | |
300 /** | |
301 * Decode a set of 3 split-matrix quantized lsf indexes into an lsp vector. | |
302 * | |
303 * @param p pointer to the AMRContext | |
304 */ | |
305 static void lsf2lsp_3(AMRContext *p) | |
306 { | |
307 const uint16_t *lsf_param = p->frame.lsf; | |
308 int16_t lsf_r[LP_FILTER_ORDER]; // residual LSF vector | |
309 float lsf_q[LP_FILTER_ORDER]; // quantified LSF vector | |
310 const int16_t *lsf_quantizer; | |
311 int i, j; | |
312 | |
313 lsf_quantizer = (p->cur_frame_mode == MODE_7k95 ? lsf_3_1_MODE_7k95 : lsf_3_1)[lsf_param[0]]; | |
314 memcpy(lsf_r, lsf_quantizer, 3 * sizeof(*lsf_r)); | |
315 | |
316 lsf_quantizer = lsf_3_2[lsf_param[1] << (p->cur_frame_mode <= MODE_5k15)]; | |
317 memcpy(lsf_r + 3, lsf_quantizer, 3 * sizeof(*lsf_r)); | |
318 | |
319 lsf_quantizer = (p->cur_frame_mode <= MODE_5k15 ? lsf_3_3_MODE_5k15 : lsf_3_3)[lsf_param[2]]; | |
320 memcpy(lsf_r + 6, lsf_quantizer, 4 * sizeof(*lsf_r)); | |
321 | |
322 // calculate mean-removed LSF vector and add mean | |
323 for (i = 0; i < LP_FILTER_ORDER; i++) | |
324 lsf_q[i] = (lsf_r[i] + p->prev_lsf_r[i] * pred_fac[i]) * (LSF_R_FAC / 8000.0) + lsf_3_mean[i] * (1.0 / 8000.0); | |
325 | |
326 ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); | |
327 | |
328 // store data for computing the next frame's LSFs | |
329 interpolate_lsf(p->lsf_q, lsf_q); | |
330 memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r)); | |
331 | |
12464
2dd67ed2f947
Move AMRNB lsf2lsp() function to common code for using in future AMRWB decoder.
vitor
parents:
11676
diff
changeset
|
332 ff_acelp_lsf2lspd(p->lsp[3], lsf_q, LP_FILTER_ORDER); |
11235 | 333 |
334 // interpolate LSP vectors at subframes 1, 2 and 3 | |
335 for (i = 1; i <= 3; i++) | |
336 for(j = 0; j < LP_FILTER_ORDER; j++) | |
337 p->lsp[i-1][j] = p->prev_lsp_sub4[j] + | |
338 (p->lsp[3][j] - p->prev_lsp_sub4[j]) * 0.25 * i; | |
339 } | |
340 | |
341 /// @} | |
342 | |
343 | |
344 /// @defgroup amr_pitch_vector_decoding AMR pitch vector decoding functions | |
345 /// @{ | |
346 | |
347 /** | |
348 * Like ff_decode_pitch_lag(), but with 1/6 resolution | |
349 */ | |
350 static void decode_pitch_lag_1_6(int *lag_int, int *lag_frac, int pitch_index, | |
351 const int prev_lag_int, const int subframe) | |
352 { | |
353 if (subframe == 0 || subframe == 2) { | |
354 if (pitch_index < 463) { | |
355 *lag_int = (pitch_index + 107) * 10923 >> 16; | |
356 *lag_frac = pitch_index - *lag_int * 6 + 105; | |
357 } else { | |
358 *lag_int = pitch_index - 368; | |
359 *lag_frac = 0; | |
360 } | |
361 } else { | |
362 *lag_int = ((pitch_index + 5) * 10923 >> 16) - 1; | |
363 *lag_frac = pitch_index - *lag_int * 6 - 3; | |
364 *lag_int += av_clip(prev_lag_int - 5, PITCH_LAG_MIN_MODE_12k2, | |
365 PITCH_DELAY_MAX - 9); | |
366 } | |
367 } | |
368 | |
369 static void decode_pitch_vector(AMRContext *p, | |
370 const AMRNBSubframe *amr_subframe, | |
371 const int subframe) | |
372 { | |
373 int pitch_lag_int, pitch_lag_frac; | |
374 enum Mode mode = p->cur_frame_mode; | |
375 | |
376 if (p->cur_frame_mode == MODE_12k2) { | |
377 decode_pitch_lag_1_6(&pitch_lag_int, &pitch_lag_frac, | |
378 amr_subframe->p_lag, p->pitch_lag_int, | |
379 subframe); | |
380 } else | |
381 ff_decode_pitch_lag(&pitch_lag_int, &pitch_lag_frac, | |
382 amr_subframe->p_lag, | |
383 p->pitch_lag_int, subframe, | |
384 mode != MODE_4k75 && mode != MODE_5k15, | |
385 mode <= MODE_6k7 ? 4 : (mode == MODE_7k95 ? 5 : 6)); | |
386 | |
387 p->pitch_lag_int = pitch_lag_int; // store previous lag in a uint8_t | |
388 | |
389 pitch_lag_frac <<= (p->cur_frame_mode != MODE_12k2); | |
390 | |
391 pitch_lag_int += pitch_lag_frac > 0; | |
392 | |
393 /* Calculate the pitch vector by interpolating the past excitation at the | |
394 pitch lag using a b60 hamming windowed sinc function. */ | |
395 ff_acelp_interpolatef(p->excitation, p->excitation + 1 - pitch_lag_int, | |
396 ff_b60_sinc, 6, | |
397 pitch_lag_frac + 6 - 6*(pitch_lag_frac > 0), | |
398 10, AMR_SUBFRAME_SIZE); | |
399 | |
400 memcpy(p->pitch_vector, p->excitation, AMR_SUBFRAME_SIZE * sizeof(float)); | |
401 } | |
402 | |
403 /// @} | |
404 | |
405 | |
406 /// @defgroup amr_algebraic_code_book AMR algebraic code book (fixed) vector decoding functions | |
407 /// @{ | |
408 | |
409 /** | |
410 * Decode a 10-bit algebraic codebook index from a 10.2 kbit/s frame. | |
411 */ | |
412 static void decode_10bit_pulse(int code, int pulse_position[8], | |
413 int i1, int i2, int i3) | |
414 { | |
415 // coded using 7+3 bits with the 3 LSBs being, individually, the LSB of 1 of | |
416 // the 3 pulses and the upper 7 bits being coded in base 5 | |
417 const uint8_t *positions = base_five_table[code >> 3]; | |
418 pulse_position[i1] = (positions[2] << 1) + ( code & 1); | |
419 pulse_position[i2] = (positions[1] << 1) + ((code >> 1) & 1); | |
420 pulse_position[i3] = (positions[0] << 1) + ((code >> 2) & 1); | |
421 } | |
422 | |
423 /** | |
424 * Decode the algebraic codebook index to pulse positions and signs and | |
425 * construct the algebraic codebook vector for MODE_10k2. | |
426 * | |
427 * @param fixed_index positions of the eight pulses | |
428 * @param fixed_sparse pointer to the algebraic codebook vector | |
429 */ | |
430 static void decode_8_pulses_31bits(const int16_t *fixed_index, | |
431 AMRFixed *fixed_sparse) | |
432 { | |
433 int pulse_position[8]; | |
434 int i, temp; | |
435 | |
436 decode_10bit_pulse(fixed_index[4], pulse_position, 0, 4, 1); | |
437 decode_10bit_pulse(fixed_index[5], pulse_position, 2, 6, 5); | |
438 | |
439 // coded using 5+2 bits with the 2 LSBs being, individually, the LSB of 1 of | |
440 // the 2 pulses and the upper 5 bits being coded in base 5 | |
441 temp = ((fixed_index[6] >> 2) * 25 + 12) >> 5; | |
442 pulse_position[3] = temp % 5; | |
443 pulse_position[7] = temp / 5; | |
444 if (pulse_position[7] & 1) | |
445 pulse_position[3] = 4 - pulse_position[3]; | |
446 pulse_position[3] = (pulse_position[3] << 1) + ( fixed_index[6] & 1); | |
447 pulse_position[7] = (pulse_position[7] << 1) + ((fixed_index[6] >> 1) & 1); | |
448 | |
449 fixed_sparse->n = 8; | |
450 for (i = 0; i < 4; i++) { | |
451 const int pos1 = (pulse_position[i] << 2) + i; | |
452 const int pos2 = (pulse_position[i + 4] << 2) + i; | |
453 const float sign = fixed_index[i] ? -1.0 : 1.0; | |
454 fixed_sparse->x[i ] = pos1; | |
455 fixed_sparse->x[i + 4] = pos2; | |
456 fixed_sparse->y[i ] = sign; | |
457 fixed_sparse->y[i + 4] = pos2 < pos1 ? -sign : sign; | |
458 } | |
459 } | |
460 | |
461 /** | |
462 * Decode the algebraic codebook index to pulse positions and signs, | |
463 * then construct the algebraic codebook vector. | |
464 * | |
465 * nb of pulses | bits encoding pulses | |
466 * For MODE_4k75 or MODE_5k15, 2 | 1-3, 4-6, 7 | |
467 * MODE_5k9, 2 | 1, 2-4, 5-6, 7-9 | |
468 * MODE_6k7, 3 | 1-3, 4, 5-7, 8, 9-11 | |
469 * MODE_7k4 or MODE_7k95, 4 | 1-3, 4-6, 7-9, 10, 11-13 | |
470 * | |
471 * @param fixed_sparse pointer to the algebraic codebook vector | |
472 * @param pulses algebraic codebook indexes | |
473 * @param mode mode of the current frame | |
474 * @param subframe current subframe number | |
475 */ | |
476 static void decode_fixed_sparse(AMRFixed *fixed_sparse, const uint16_t *pulses, | |
477 const enum Mode mode, const int subframe) | |
478 { | |
479 assert(MODE_4k75 <= mode && mode <= MODE_12k2); | |
480 | |
481 if (mode == MODE_12k2) { | |
482 ff_decode_10_pulses_35bits(pulses, fixed_sparse, gray_decode, 5, 3); | |
483 } else if (mode == MODE_10k2) { | |
484 decode_8_pulses_31bits(pulses, fixed_sparse); | |
485 } else { | |
486 int *pulse_position = fixed_sparse->x; | |
487 int i, pulse_subset; | |
488 const int fixed_index = pulses[0]; | |
489 | |
490 if (mode <= MODE_5k15) { | |
491 pulse_subset = ((fixed_index >> 3) & 8) + (subframe << 1); | |
492 pulse_position[0] = ( fixed_index & 7) * 5 + track_position[pulse_subset]; | |
493 pulse_position[1] = ((fixed_index >> 3) & 7) * 5 + track_position[pulse_subset + 1]; | |
494 fixed_sparse->n = 2; | |
495 } else if (mode == MODE_5k9) { | |
496 pulse_subset = ((fixed_index & 1) << 1) + 1; | |
497 pulse_position[0] = ((fixed_index >> 1) & 7) * 5 + pulse_subset; | |
498 pulse_subset = (fixed_index >> 4) & 3; | |
499 pulse_position[1] = ((fixed_index >> 6) & 7) * 5 + pulse_subset + (pulse_subset == 3 ? 1 : 0); | |
500 fixed_sparse->n = pulse_position[0] == pulse_position[1] ? 1 : 2; | |
501 } else if (mode == MODE_6k7) { | |
502 pulse_position[0] = (fixed_index & 7) * 5; | |
503 pulse_subset = (fixed_index >> 2) & 2; | |
504 pulse_position[1] = ((fixed_index >> 4) & 7) * 5 + pulse_subset + 1; | |
505 pulse_subset = (fixed_index >> 6) & 2; | |
506 pulse_position[2] = ((fixed_index >> 8) & 7) * 5 + pulse_subset + 2; | |
507 fixed_sparse->n = 3; | |
508 } else { // mode <= MODE_7k95 | |
509 pulse_position[0] = gray_decode[ fixed_index & 7]; | |
510 pulse_position[1] = gray_decode[(fixed_index >> 3) & 7] + 1; | |
511 pulse_position[2] = gray_decode[(fixed_index >> 6) & 7] + 2; | |
512 pulse_subset = (fixed_index >> 9) & 1; | |
513 pulse_position[3] = gray_decode[(fixed_index >> 10) & 7] + pulse_subset + 3; | |
514 fixed_sparse->n = 4; | |
515 } | |
516 for (i = 0; i < fixed_sparse->n; i++) | |
517 fixed_sparse->y[i] = (pulses[1] >> i) & 1 ? 1.0 : -1.0; | |
518 } | |
519 } | |
520 | |
521 /** | |
522 * Apply pitch lag to obtain the sharpened fixed vector (section 6.1.2) | |
523 * | |
524 * @param p the context | |
525 * @param subframe unpacked amr subframe | |
526 * @param mode mode of the current frame | |
527 * @param fixed_sparse sparse respresentation of the fixed vector | |
528 */ | |
529 static void pitch_sharpening(AMRContext *p, int subframe, enum Mode mode, | |
530 AMRFixed *fixed_sparse) | |
531 { | |
532 // The spec suggests the current pitch gain is always used, but in other | |
533 // modes the pitch and codebook gains are joinly quantized (sec 5.8.2) | |
534 // so the codebook gain cannot depend on the quantized pitch gain. | |
535 if (mode == MODE_12k2) | |
536 p->beta = FFMIN(p->pitch_gain[4], 1.0); | |
537 | |
538 fixed_sparse->pitch_lag = p->pitch_lag_int; | |
539 fixed_sparse->pitch_fac = p->beta; | |
540 | |
541 // Save pitch sharpening factor for the next subframe | |
542 // MODE_4k75 only updates on the 2nd and 4th subframes - this follows from | |
543 // the fact that the gains for two subframes are jointly quantized. | |
544 if (mode != MODE_4k75 || subframe & 1) | |
545 p->beta = av_clipf(p->pitch_gain[4], 0.0, SHARP_MAX); | |
546 } | |
547 /// @} | |
548 | |
549 | |
550 /// @defgroup amr_gain_decoding AMR gain decoding functions | |
551 /// @{ | |
552 | |
553 /** | |
554 * fixed gain smoothing | |
555 * Note that where the spec specifies the "spectrum in the q domain" | |
556 * in section 6.1.4, in fact frequencies should be used. | |
557 * | |
558 * @param p the context | |
559 * @param lsf LSFs for the current subframe, in the range [0,1] | |
560 * @param lsf_avg averaged LSFs | |
561 * @param mode mode of the current frame | |
562 * | |
563 * @return fixed gain smoothed | |
564 */ | |
565 static float fixed_gain_smooth(AMRContext *p , const float *lsf, | |
566 const float *lsf_avg, const enum Mode mode) | |
567 { | |
568 float diff = 0.0; | |
569 int i; | |
570 | |
571 for (i = 0; i < LP_FILTER_ORDER; i++) | |
572 diff += fabs(lsf_avg[i] - lsf[i]) / lsf_avg[i]; | |
573 | |
574 // If diff is large for ten subframes, disable smoothing for a 40-subframe | |
575 // hangover period. | |
576 p->diff_count++; | |
577 if (diff <= 0.65) | |
578 p->diff_count = 0; | |
579 | |
580 if (p->diff_count > 10) { | |
581 p->hang_count = 0; | |
582 p->diff_count--; // don't let diff_count overflow | |
583 } | |
584 | |
585 if (p->hang_count < 40) { | |
586 p->hang_count++; | |
587 } else if (mode < MODE_7k4 || mode == MODE_10k2) { | |
588 const float smoothing_factor = av_clipf(4.0 * diff - 1.6, 0.0, 1.0); | |
589 const float fixed_gain_mean = (p->fixed_gain[0] + p->fixed_gain[1] + | |
590 p->fixed_gain[2] + p->fixed_gain[3] + | |
591 p->fixed_gain[4]) * 0.2; | |
592 return smoothing_factor * p->fixed_gain[4] + | |
593 (1.0 - smoothing_factor) * fixed_gain_mean; | |
594 } | |
595 return p->fixed_gain[4]; | |
596 } | |
597 | |
598 /** | |
599 * Decode pitch gain and fixed gain factor (part of section 6.1.3). | |
600 * | |
601 * @param p the context | |
602 * @param amr_subframe unpacked amr subframe | |
603 * @param mode mode of the current frame | |
604 * @param subframe current subframe number | |
605 * @param fixed_gain_factor decoded gain correction factor | |
606 */ | |
607 static void decode_gains(AMRContext *p, const AMRNBSubframe *amr_subframe, | |
608 const enum Mode mode, const int subframe, | |
609 float *fixed_gain_factor) | |
610 { | |
611 if (mode == MODE_12k2 || mode == MODE_7k95) { | |
612 p->pitch_gain[4] = qua_gain_pit [amr_subframe->p_gain ] | |
613 * (1.0 / 16384.0); | |
614 *fixed_gain_factor = qua_gain_code[amr_subframe->fixed_gain] | |
615 * (1.0 / 2048.0); | |
616 } else { | |
617 const uint16_t *gains; | |
618 | |
619 if (mode >= MODE_6k7) { | |
620 gains = gains_high[amr_subframe->p_gain]; | |
621 } else if (mode >= MODE_5k15) { | |
622 gains = gains_low [amr_subframe->p_gain]; | |
623 } else { | |
624 // gain index is only coded in subframes 0,2 for MODE_4k75 | |
625 gains = gains_MODE_4k75[(p->frame.subframe[subframe & 2].p_gain << 1) + (subframe & 1)]; | |
626 } | |
627 | |
628 p->pitch_gain[4] = gains[0] * (1.0 / 16384.0); | |
629 *fixed_gain_factor = gains[1] * (1.0 / 4096.0); | |
630 } | |
631 } | |
632 | |
633 /// @} | |
634 | |
635 | |
636 /// @defgroup amr_pre_processing AMR pre-processing functions | |
637 /// @{ | |
638 | |
639 /** | |
640 * Circularly convolve a sparse fixed vector with a phase dispersion impulse | |
641 * response filter (D.6.2 of G.729 and 6.1.5 of AMR). | |
642 * | |
643 * @param out vector with filter applied | |
644 * @param in source vector | |
645 * @param filter phase filter coefficients | |
646 * | |
647 * out[n] = sum(i,0,len-1){ in[i] * filter[(len + n - i)%len] } | |
648 */ | |
649 static void apply_ir_filter(float *out, const AMRFixed *in, | |
650 const float *filter) | |
651 { | |
652 float filter1[AMR_SUBFRAME_SIZE], //!< filters at pitch lag*1 and *2 | |
653 filter2[AMR_SUBFRAME_SIZE]; | |
654 int lag = in->pitch_lag; | |
655 float fac = in->pitch_fac; | |
656 int i; | |
657 | |
658 if (lag < AMR_SUBFRAME_SIZE) { | |
659 ff_celp_circ_addf(filter1, filter, filter, lag, fac, | |
660 AMR_SUBFRAME_SIZE); | |
661 | |
662 if (lag < AMR_SUBFRAME_SIZE >> 1) | |
663 ff_celp_circ_addf(filter2, filter, filter1, lag, fac, | |
664 AMR_SUBFRAME_SIZE); | |
665 } | |
666 | |
667 memset(out, 0, sizeof(float) * AMR_SUBFRAME_SIZE); | |
668 for (i = 0; i < in->n; i++) { | |
669 int x = in->x[i]; | |
670 float y = in->y[i]; | |
671 const float *filterp; | |
672 | |
673 if (x >= AMR_SUBFRAME_SIZE - lag) { | |
674 filterp = filter; | |
675 } else if (x >= AMR_SUBFRAME_SIZE - (lag << 1)) { | |
676 filterp = filter1; | |
677 } else | |
678 filterp = filter2; | |
679 | |
680 ff_celp_circ_addf(out, out, filterp, x, y, AMR_SUBFRAME_SIZE); | |
681 } | |
682 } | |
683 | |
684 /** | |
685 * Reduce fixed vector sparseness by smoothing with one of three IR filters. | |
686 * Also know as "adaptive phase dispersion". | |
687 * | |
688 * This implements 3GPP TS 26.090 section 6.1(5). | |
689 * | |
690 * @param p the context | |
691 * @param fixed_sparse algebraic codebook vector | |
692 * @param fixed_vector unfiltered fixed vector | |
693 * @param fixed_gain smoothed gain | |
694 * @param out space for modified vector if necessary | |
695 */ | |
696 static const float *anti_sparseness(AMRContext *p, AMRFixed *fixed_sparse, | |
697 const float *fixed_vector, | |
698 float fixed_gain, float *out) | |
699 { | |
700 int ir_filter_nr; | |
701 | |
702 if (p->pitch_gain[4] < 0.6) { | |
703 ir_filter_nr = 0; // strong filtering | |
704 } else if (p->pitch_gain[4] < 0.9) { | |
705 ir_filter_nr = 1; // medium filtering | |
706 } else | |
707 ir_filter_nr = 2; // no filtering | |
708 | |
709 // detect 'onset' | |
710 if (fixed_gain > 2.0 * p->prev_sparse_fixed_gain) { | |
711 p->ir_filter_onset = 2; | |
712 } else if (p->ir_filter_onset) | |
713 p->ir_filter_onset--; | |
714 | |
715 if (!p->ir_filter_onset) { | |
716 int i, count = 0; | |
717 | |
718 for (i = 0; i < 5; i++) | |
719 if (p->pitch_gain[i] < 0.6) | |
720 count++; | |
721 if (count > 2) | |
722 ir_filter_nr = 0; | |
723 | |
724 if (ir_filter_nr > p->prev_ir_filter_nr + 1) | |
725 ir_filter_nr--; | |
726 } else if (ir_filter_nr < 2) | |
727 ir_filter_nr++; | |
728 | |
729 // Disable filtering for very low level of fixed_gain. | |
730 // Note this step is not specified in the technical description but is in | |
731 // the reference source in the function Ph_disp. | |
732 if (fixed_gain < 5.0) | |
733 ir_filter_nr = 2; | |
734 | |
735 if (p->cur_frame_mode != MODE_7k4 && p->cur_frame_mode < MODE_10k2 | |
736 && ir_filter_nr < 2) { | |
737 apply_ir_filter(out, fixed_sparse, | |
738 (p->cur_frame_mode == MODE_7k95 ? | |
739 ir_filters_lookup_MODE_7k95 : | |
740 ir_filters_lookup)[ir_filter_nr]); | |
741 fixed_vector = out; | |
742 } | |
743 | |
744 // update ir filter strength history | |
745 p->prev_ir_filter_nr = ir_filter_nr; | |
746 p->prev_sparse_fixed_gain = fixed_gain; | |
747 | |
748 return fixed_vector; | |
749 } | |
750 | |
751 /// @} | |
752 | |
753 | |
754 /// @defgroup amr_synthesis AMR synthesis functions | |
755 /// @{ | |
756 | |
757 /** | |
758 * Conduct 10th order linear predictive coding synthesis. | |
759 * | |
760 * @param p pointer to the AMRContext | |
761 * @param lpc pointer to the LPC coefficients | |
762 * @param fixed_gain fixed codebook gain for synthesis | |
763 * @param fixed_vector algebraic codebook vector | |
764 * @param samples pointer to the output speech samples | |
765 * @param overflow 16-bit overflow flag | |
766 */ | |
767 static int synthesis(AMRContext *p, float *lpc, | |
768 float fixed_gain, const float *fixed_vector, | |
769 float *samples, uint8_t overflow) | |
770 { | |
11652
8b6f3d3b55cb
Move clipping of audio samples (for those codecs outputting float) from decoder
rbultje
parents:
11648
diff
changeset
|
771 int i; |
11235 | 772 float excitation[AMR_SUBFRAME_SIZE]; |
773 | |
774 // if an overflow has been detected, the pitch vector is scaled down by a | |
775 // factor of 4 | |
776 if (overflow) | |
777 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
778 p->pitch_vector[i] *= 0.25; | |
779 | |
780 ff_weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector, | |
781 p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE); | |
782 | |
783 // emphasize pitch vector contribution | |
784 if (p->pitch_gain[4] > 0.5 && !overflow) { | |
785 float energy = ff_dot_productf(excitation, excitation, | |
786 AMR_SUBFRAME_SIZE); | |
787 float pitch_factor = | |
788 p->pitch_gain[4] * | |
789 (p->cur_frame_mode == MODE_12k2 ? | |
790 0.25 * FFMIN(p->pitch_gain[4], 1.0) : | |
791 0.5 * FFMIN(p->pitch_gain[4], SHARP_MAX)); | |
792 | |
793 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
794 excitation[i] += pitch_factor * p->pitch_vector[i]; | |
795 | |
796 ff_scale_vector_to_given_sum_of_squares(excitation, excitation, energy, | |
797 AMR_SUBFRAME_SIZE); | |
798 } | |
799 | |
800 ff_celp_lp_synthesis_filterf(samples, lpc, excitation, AMR_SUBFRAME_SIZE, | |
801 LP_FILTER_ORDER); | |
802 | |
803 // detect overflow | |
804 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
805 if (fabsf(samples[i]) > AMR_SAMPLE_BOUND) { | |
11652
8b6f3d3b55cb
Move clipping of audio samples (for those codecs outputting float) from decoder
rbultje
parents:
11648
diff
changeset
|
806 return 1; |
11235 | 807 } |
808 | |
11652
8b6f3d3b55cb
Move clipping of audio samples (for those codecs outputting float) from decoder
rbultje
parents:
11648
diff
changeset
|
809 return 0; |
11235 | 810 } |
811 | |
812 /// @} | |
813 | |
814 | |
815 /// @defgroup amr_update AMR update functions | |
816 /// @{ | |
817 | |
818 /** | |
819 * Update buffers and history at the end of decoding a subframe. | |
820 * | |
821 * @param p pointer to the AMRContext | |
822 */ | |
823 static void update_state(AMRContext *p) | |
824 { | |
825 memcpy(p->prev_lsp_sub4, p->lsp[3], LP_FILTER_ORDER * sizeof(p->lsp[3][0])); | |
826 | |
827 memmove(&p->excitation_buf[0], &p->excitation_buf[AMR_SUBFRAME_SIZE], | |
828 (PITCH_DELAY_MAX + LP_FILTER_ORDER + 1) * sizeof(float)); | |
829 | |
830 memmove(&p->pitch_gain[0], &p->pitch_gain[1], 4 * sizeof(float)); | |
831 memmove(&p->fixed_gain[0], &p->fixed_gain[1], 4 * sizeof(float)); | |
832 | |
833 memmove(&p->samples_in[0], &p->samples_in[AMR_SUBFRAME_SIZE], | |
834 LP_FILTER_ORDER * sizeof(float)); | |
835 } | |
836 | |
837 /// @} | |
838 | |
839 | |
840 /// @defgroup amr_postproc AMR Post processing functions | |
841 /// @{ | |
842 | |
843 /** | |
844 * Get the tilt factor of a formant filter from its transfer function | |
845 * | |
846 * @param lpc_n LP_FILTER_ORDER coefficients of the numerator | |
847 * @param lpc_d LP_FILTER_ORDER coefficients of the denominator | |
848 */ | |
849 static float tilt_factor(float *lpc_n, float *lpc_d) | |
850 { | |
851 float rh0, rh1; // autocorrelation at lag 0 and 1 | |
852 | |
853 // LP_FILTER_ORDER prior zeros are needed for ff_celp_lp_synthesis_filterf | |
854 float impulse_buffer[LP_FILTER_ORDER + AMR_TILT_RESPONSE] = { 0 }; | |
855 float *hf = impulse_buffer + LP_FILTER_ORDER; // start of impulse response | |
856 | |
857 hf[0] = 1.0; | |
858 memcpy(hf + 1, lpc_n, sizeof(float) * LP_FILTER_ORDER); | |
859 ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE, | |
860 LP_FILTER_ORDER); | |
861 | |
862 rh0 = ff_dot_productf(hf, hf, AMR_TILT_RESPONSE); | |
863 rh1 = ff_dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1); | |
864 | |
865 // The spec only specifies this check for 12.2 and 10.2 kbit/s | |
866 // modes. But in the ref source the tilt is always non-negative. | |
867 return rh1 >= 0.0 ? rh1 / rh0 * AMR_TILT_GAMMA_T : 0.0; | |
868 } | |
869 | |
870 /** | |
871 * Perform adaptive post-filtering to enhance the quality of the speech. | |
872 * See section 6.2.1. | |
873 * | |
874 * @param p pointer to the AMRContext | |
875 * @param lpc interpolated LP coefficients for this subframe | |
876 * @param buf_out output of the filter | |
877 */ | |
878 static void postfilter(AMRContext *p, float *lpc, float *buf_out) | |
879 { | |
880 int i; | |
881 float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input | |
882 | |
883 float speech_gain = ff_dot_productf(samples, samples, | |
884 AMR_SUBFRAME_SIZE); | |
885 | |
886 float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter | |
887 const float *gamma_n, *gamma_d; // Formant filter factor table | |
888 float lpc_n[LP_FILTER_ORDER], lpc_d[LP_FILTER_ORDER]; // Transfer function coefficients | |
889 | |
890 if (p->cur_frame_mode == MODE_12k2 || p->cur_frame_mode == MODE_10k2) { | |
891 gamma_n = ff_pow_0_7; | |
892 gamma_d = ff_pow_0_75; | |
893 } else { | |
894 gamma_n = ff_pow_0_55; | |
895 gamma_d = ff_pow_0_7; | |
896 } | |
897 | |
898 for (i = 0; i < LP_FILTER_ORDER; i++) { | |
899 lpc_n[i] = lpc[i] * gamma_n[i]; | |
900 lpc_d[i] = lpc[i] * gamma_d[i]; | |
901 } | |
902 | |
903 memcpy(pole_out, p->postfilter_mem, sizeof(float) * LP_FILTER_ORDER); | |
904 ff_celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples, | |
905 AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); | |
906 memcpy(p->postfilter_mem, pole_out + AMR_SUBFRAME_SIZE, | |
907 sizeof(float) * LP_FILTER_ORDER); | |
908 | |
909 ff_celp_lp_zero_synthesis_filterf(buf_out, lpc_n, | |
910 pole_out + LP_FILTER_ORDER, | |
911 AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); | |
912 | |
913 ff_tilt_compensation(&p->tilt_mem, tilt_factor(lpc_n, lpc_d), buf_out, | |
914 AMR_SUBFRAME_SIZE); | |
915 | |
11647
26aabf52f578
Split the input/output data arguments to ff_adaptive_gain_control().
rbultje
parents:
11645
diff
changeset
|
916 ff_adaptive_gain_control(buf_out, buf_out, speech_gain, AMR_SUBFRAME_SIZE, |
11462 | 917 AMR_AGC_ALPHA, &p->postfilter_agc); |
11235 | 918 } |
919 | |
920 /// @} | |
921 | |
922 static int amrnb_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |
923 AVPacket *avpkt) | |
924 { | |
925 | |
926 AMRContext *p = avctx->priv_data; // pointer to private data | |
927 const uint8_t *buf = avpkt->data; | |
928 int buf_size = avpkt->size; | |
929 float *buf_out = data; // pointer to the output data buffer | |
930 int i, subframe; | |
931 float fixed_gain_factor; | |
932 AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness processing | |
933 float spare_vector[AMR_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing | |
934 float synth_fixed_gain; // the fixed gain that synthesis should use | |
935 const float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use | |
936 | |
937 p->cur_frame_mode = unpack_bitstream(p, buf, buf_size); | |
938 if (p->cur_frame_mode == MODE_DTX) { | |
939 av_log_missing_feature(avctx, "dtx mode", 1); | |
940 return -1; | |
941 } | |
942 | |
943 if (p->cur_frame_mode == MODE_12k2) { | |
944 lsf2lsp_5(p); | |
945 } else | |
946 lsf2lsp_3(p); | |
947 | |
948 for (i = 0; i < 4; i++) | |
949 ff_acelp_lspd2lpc(p->lsp[i], p->lpc[i], 5); | |
950 | |
951 for (subframe = 0; subframe < 4; subframe++) { | |
952 const AMRNBSubframe *amr_subframe = &p->frame.subframe[subframe]; | |
953 | |
954 decode_pitch_vector(p, amr_subframe, subframe); | |
955 | |
956 decode_fixed_sparse(&fixed_sparse, amr_subframe->pulses, | |
957 p->cur_frame_mode, subframe); | |
958 | |
959 // The fixed gain (section 6.1.3) depends on the fixed vector | |
960 // (section 6.1.2), but the fixed vector calculation uses | |
961 // pitch sharpening based on the on the pitch gain (section 6.1.3). | |
962 // So the correct order is: pitch gain, pitch sharpening, fixed gain. | |
963 decode_gains(p, amr_subframe, p->cur_frame_mode, subframe, | |
964 &fixed_gain_factor); | |
965 | |
966 pitch_sharpening(p, subframe, p->cur_frame_mode, &fixed_sparse); | |
967 | |
968 ff_set_fixed_vector(p->fixed_vector, &fixed_sparse, 1.0, | |
969 AMR_SUBFRAME_SIZE); | |
970 | |
971 p->fixed_gain[4] = | |
972 ff_amr_set_fixed_gain(fixed_gain_factor, | |
973 ff_dot_productf(p->fixed_vector, p->fixed_vector, | |
974 AMR_SUBFRAME_SIZE)/AMR_SUBFRAME_SIZE, | |
975 p->prediction_error, | |
976 energy_mean[p->cur_frame_mode], energy_pred_fac); | |
977 | |
978 // The excitation feedback is calculated without any processing such | |
979 // as fixed gain smoothing. This isn't mentioned in the specification. | |
980 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
981 p->excitation[i] *= p->pitch_gain[4]; | |
982 ff_set_fixed_vector(p->excitation, &fixed_sparse, p->fixed_gain[4], | |
983 AMR_SUBFRAME_SIZE); | |
984 | |
985 // In the ref decoder, excitation is stored with no fractional bits. | |
986 // This step prevents buzz in silent periods. The ref encoder can | |
987 // emit long sequences with pitch factor greater than one. This | |
988 // creates unwanted feedback if the excitation vector is nonzero. | |
989 // (e.g. test sequence T19_795.COD in 3GPP TS 26.074) | |
990 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
991 p->excitation[i] = truncf(p->excitation[i]); | |
992 | |
993 // Smooth fixed gain. | |
994 // The specification is ambiguous, but in the reference source, the | |
995 // smoothed value is NOT fed back into later fixed gain smoothing. | |
996 synth_fixed_gain = fixed_gain_smooth(p, p->lsf_q[subframe], | |
997 p->lsf_avg, p->cur_frame_mode); | |
998 | |
999 synth_fixed_vector = anti_sparseness(p, &fixed_sparse, p->fixed_vector, | |
1000 synth_fixed_gain, spare_vector); | |
1001 | |
1002 if (synthesis(p, p->lpc[subframe], synth_fixed_gain, | |
1003 synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 0)) | |
1004 // overflow detected -> rerun synthesis scaling pitch vector down | |
1005 // by a factor of 4, skipping pitch vector contribution emphasis | |
1006 // and adaptive gain control | |
1007 synthesis(p, p->lpc[subframe], synth_fixed_gain, | |
1008 synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 1); | |
1009 | |
1010 postfilter(p, p->lpc[subframe], buf_out + subframe * AMR_SUBFRAME_SIZE); | |
1011 | |
1012 // update buffers and history | |
1013 ff_clear_fixed_vector(p->fixed_vector, &fixed_sparse, AMR_SUBFRAME_SIZE); | |
1014 update_state(p); | |
1015 } | |
1016 | |
11648
0516f4062307
Split input/output data arguments to ff_acelp_apply_order_2_transfer_function().
rbultje
parents:
11647
diff
changeset
|
1017 ff_acelp_apply_order_2_transfer_function(buf_out, buf_out, highpass_zeros, |
11676
ceec2fb08b8e
amrnbdec: Apply AMR_SAMPLE_SCALE when finishing the decoder output
mstorsjo
parents:
11652
diff
changeset
|
1018 highpass_poles, |
ceec2fb08b8e
amrnbdec: Apply AMR_SAMPLE_SCALE when finishing the decoder output
mstorsjo
parents:
11652
diff
changeset
|
1019 highpass_gain * AMR_SAMPLE_SCALE, |
11235 | 1020 p->high_pass_mem, AMR_BLOCK_SIZE); |
1021 | |
1022 /* Update averaged lsf vector (used for fixed gain smoothing). | |
1023 * | |
1024 * Note that lsf_avg should not incorporate the current frame's LSFs | |
1025 * for fixed_gain_smooth. | |
1026 * The specification has an incorrect formula: the reference decoder uses | |
1027 * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */ | |
1028 ff_weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], | |
1029 0.84, 0.16, LP_FILTER_ORDER); | |
1030 | |
1031 /* report how many samples we got */ | |
1032 *data_size = AMR_BLOCK_SIZE * sizeof(float); | |
1033 | |
1034 /* return the amount of bytes consumed if everything was OK */ | |
1035 return frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC | |
1036 } | |
1037 | |
1038 | |
1039 AVCodec amrnb_decoder = { | |
1040 .name = "amrnb", | |
11560
8a4984c5cacc
Define AVMediaType enum, and use it instead of enum CodecType, which
stefano
parents:
11462
diff
changeset
|
1041 .type = AVMEDIA_TYPE_AUDIO, |
11235 | 1042 .id = CODEC_ID_AMR_NB, |
1043 .priv_data_size = sizeof(AMRContext), | |
1044 .init = amrnb_decode_init, | |
1045 .decode = amrnb_decode_frame, | |
1046 .long_name = NULL_IF_CONFIG_SMALL("Adaptive Multi-Rate NarrowBand"), | |
1047 .sample_fmts = (enum SampleFormat[]){SAMPLE_FMT_FLT,SAMPLE_FMT_NONE}, | |
1048 }; |