Mercurial > libavcodec.hg
annotate amrnbdec.c @ 11816:7c2369ec6faa libavcodec
ARM: check struct offsets only when they are used
The offsets differ depending on configuration, so only check them when
they will actually be used. Presently, this is when NEON is enabled.
author | mru |
---|---|
date | Wed, 02 Jun 2010 22:05:25 +0000 |
parents | ceec2fb08b8e |
children | 2dd67ed2f947 |
rev | line source |
---|---|
11235 | 1 /* |
2 * AMR narrowband decoder | |
3 * Copyright (c) 2006-2007 Robert Swain | |
4 * Copyright (c) 2009 Colin McQuillan | |
5 * | |
6 * This file is part of FFmpeg. | |
7 * | |
8 * FFmpeg is free software; you can redistribute it and/or | |
9 * modify it under the terms of the GNU Lesser General Public | |
10 * License as published by the Free Software Foundation; either | |
11 * version 2.1 of the License, or (at your option) any later version. | |
12 * | |
13 * FFmpeg is distributed in the hope that it will be useful, | |
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
16 * Lesser General Public License for more details. | |
17 * | |
18 * You should have received a copy of the GNU Lesser General Public | |
19 * License along with FFmpeg; if not, write to the Free Software | |
20 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
21 */ | |
22 | |
23 | |
24 /** | |
11644
7dd2a45249a9
Remove explicit filename from Doxygen @file commands.
diego
parents:
11560
diff
changeset
|
25 * @file |
11235 | 26 * AMR narrowband decoder |
27 * | |
28 * This decoder uses floats for simplicity and so is not bit-exact. One | |
29 * difference is that differences in phase can accumulate. The test sequences | |
30 * in 3GPP TS 26.074 can still be useful. | |
31 * | |
32 * - Comparing this file's output to the output of the ref decoder gives a | |
33 * PSNR of 30 to 80. Plotting the output samples shows a difference in | |
34 * phase in some areas. | |
35 * | |
36 * - Comparing both decoders against their input, this decoder gives a similar | |
37 * PSNR. If the test sequence homing frames are removed (this decoder does | |
38 * not detect them), the PSNR is at least as good as the reference on 140 | |
39 * out of 169 tests. | |
40 */ | |
41 | |
42 | |
43 #include <string.h> | |
44 #include <math.h> | |
45 | |
46 #include "avcodec.h" | |
47 #include "get_bits.h" | |
48 #include "libavutil/common.h" | |
49 #include "celp_math.h" | |
50 #include "celp_filters.h" | |
51 #include "acelp_filters.h" | |
52 #include "acelp_vectors.h" | |
53 #include "acelp_pitch_delay.h" | |
54 #include "lsp.h" | |
55 | |
56 #include "amrnbdata.h" | |
57 | |
58 #define AMR_BLOCK_SIZE 160 ///< samples per frame | |
59 #define AMR_SAMPLE_BOUND 32768.0 ///< threshold for synthesis overflow | |
60 | |
61 /** | |
62 * Scale from constructed speech to [-1,1] | |
63 * | |
64 * AMR is designed to produce 16-bit PCM samples (3GPP TS 26.090 4.2) but | |
65 * upscales by two (section 6.2.2). | |
66 * | |
67 * Fundamentally, this scale is determined by energy_mean through | |
68 * the fixed vector contribution to the excitation vector. | |
69 */ | |
70 #define AMR_SAMPLE_SCALE (2.0 / 32768.0) | |
71 | |
72 /** Prediction factor for 12.2kbit/s mode */ | |
73 #define PRED_FAC_MODE_12k2 0.65 | |
74 | |
75 #define LSF_R_FAC (8000.0 / 32768.0) ///< LSF residual tables to Hertz | |
76 #define MIN_LSF_SPACING (50.0488 / 8000.0) ///< Ensures stability of LPC filter | |
77 #define PITCH_LAG_MIN_MODE_12k2 18 ///< Lower bound on decoded lag search in 12.2kbit/s mode | |
78 | |
79 /** Initial energy in dB. Also used for bad frames (unimplemented). */ | |
80 #define MIN_ENERGY -14.0 | |
81 | |
82 /** Maximum sharpening factor | |
83 * | |
84 * The specification says 0.8, which should be 13107, but the reference C code | |
85 * uses 13017 instead. (Amusingly the same applies to SHARP_MAX in g729dec.c.) | |
86 */ | |
87 #define SHARP_MAX 0.79449462890625 | |
88 | |
89 /** Number of impulse response coefficients used for tilt factor */ | |
90 #define AMR_TILT_RESPONSE 22 | |
91 /** Tilt factor = 1st reflection coefficient * gamma_t */ | |
92 #define AMR_TILT_GAMMA_T 0.8 | |
93 /** Adaptive gain control factor used in post-filter */ | |
94 #define AMR_AGC_ALPHA 0.9 | |
95 | |
96 typedef struct AMRContext { | |
97 AMRNBFrame frame; ///< decoded AMR parameters (lsf coefficients, codebook indexes, etc) | |
98 uint8_t bad_frame_indicator; ///< bad frame ? 1 : 0 | |
99 enum Mode cur_frame_mode; | |
100 | |
101 int16_t prev_lsf_r[LP_FILTER_ORDER]; ///< residual LSF vector from previous subframe | |
102 double lsp[4][LP_FILTER_ORDER]; ///< lsp vectors from current frame | |
103 double prev_lsp_sub4[LP_FILTER_ORDER]; ///< lsp vector for the 4th subframe of the previous frame | |
104 | |
105 float lsf_q[4][LP_FILTER_ORDER]; ///< Interpolated LSF vector for fixed gain smoothing | |
106 float lsf_avg[LP_FILTER_ORDER]; ///< vector of averaged lsf vector | |
107 | |
108 float lpc[4][LP_FILTER_ORDER]; ///< lpc coefficient vectors for 4 subframes | |
109 | |
110 uint8_t pitch_lag_int; ///< integer part of pitch lag from current subframe | |
111 | |
112 float excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1 + AMR_SUBFRAME_SIZE]; ///< current excitation and all necessary excitation history | |
113 float *excitation; ///< pointer to the current excitation vector in excitation_buf | |
114 | |
115 float pitch_vector[AMR_SUBFRAME_SIZE]; ///< adaptive code book (pitch) vector | |
116 float fixed_vector[AMR_SUBFRAME_SIZE]; ///< algebraic codebook (fixed) vector (must be kept zero between frames) | |
117 | |
118 float prediction_error[4]; ///< quantified prediction errors {20log10(^gamma_gc)} for previous four subframes | |
119 float pitch_gain[5]; ///< quantified pitch gains for the current and previous four subframes | |
120 float fixed_gain[5]; ///< quantified fixed gains for the current and previous four subframes | |
121 | |
122 float beta; ///< previous pitch_gain, bounded by [0.0,SHARP_MAX] | |
123 uint8_t diff_count; ///< the number of subframes for which diff has been above 0.65 | |
124 uint8_t hang_count; ///< the number of subframes since a hangover period started | |
125 | |
126 float prev_sparse_fixed_gain; ///< previous fixed gain; used by anti-sparseness processing to determine "onset" | |
127 uint8_t prev_ir_filter_nr; ///< previous impulse response filter "impNr": 0 - strong, 1 - medium, 2 - none | |
128 uint8_t ir_filter_onset; ///< flag for impulse response filter strength | |
129 | |
130 float postfilter_mem[10]; ///< previous intermediate values in the formant filter | |
131 float tilt_mem; ///< previous input to tilt compensation filter | |
132 float postfilter_agc; ///< previous factor used for adaptive gain control | |
133 float high_pass_mem[2]; ///< previous intermediate values in the high-pass filter | |
134 | |
135 float samples_in[LP_FILTER_ORDER + AMR_SUBFRAME_SIZE]; ///< floating point samples | |
136 | |
137 } AMRContext; | |
138 | |
139 /** Double version of ff_weighted_vector_sumf() */ | |
140 static void weighted_vector_sumd(double *out, const double *in_a, | |
141 const double *in_b, double weight_coeff_a, | |
142 double weight_coeff_b, int length) | |
143 { | |
144 int i; | |
145 | |
146 for (i = 0; i < length; i++) | |
147 out[i] = weight_coeff_a * in_a[i] | |
148 + weight_coeff_b * in_b[i]; | |
149 } | |
150 | |
151 static av_cold int amrnb_decode_init(AVCodecContext *avctx) | |
152 { | |
153 AMRContext *p = avctx->priv_data; | |
154 int i; | |
155 | |
156 avctx->sample_fmt = SAMPLE_FMT_FLT; | |
157 | |
158 // p->excitation always points to the same position in p->excitation_buf | |
159 p->excitation = &p->excitation_buf[PITCH_DELAY_MAX + LP_FILTER_ORDER + 1]; | |
160 | |
161 for (i = 0; i < LP_FILTER_ORDER; i++) { | |
162 p->prev_lsp_sub4[i] = lsp_sub4_init[i] * 1000 / (float)(1 << 15); | |
163 p->lsf_avg[i] = p->lsf_q[3][i] = lsp_avg_init[i] / (float)(1 << 15); | |
164 } | |
165 | |
166 for (i = 0; i < 4; i++) | |
167 p->prediction_error[i] = MIN_ENERGY; | |
168 | |
169 return 0; | |
170 } | |
171 | |
172 | |
173 /** | |
174 * Unpack an RFC4867 speech frame into the AMR frame mode and parameters. | |
175 * | |
176 * The order of speech bits is specified by 3GPP TS 26.101. | |
177 * | |
178 * @param p the context | |
179 * @param buf pointer to the input buffer | |
180 * @param buf_size size of the input buffer | |
181 * | |
182 * @return the frame mode | |
183 */ | |
184 static enum Mode unpack_bitstream(AMRContext *p, const uint8_t *buf, | |
185 int buf_size) | |
186 { | |
187 GetBitContext gb; | |
188 enum Mode mode; | |
189 | |
190 init_get_bits(&gb, buf, buf_size * 8); | |
191 | |
192 // Decode the first octet. | |
193 skip_bits(&gb, 1); // padding bit | |
194 mode = get_bits(&gb, 4); // frame type | |
195 p->bad_frame_indicator = !get_bits1(&gb); // quality bit | |
196 skip_bits(&gb, 2); // two padding bits | |
197 | |
11645
69131e8f6614
10l: do not try to unpack DTX frames in AMR-NB decoder
vitor
parents:
11644
diff
changeset
|
198 if (mode < MODE_DTX) { |
11235 | 199 uint16_t *data = (uint16_t *)&p->frame; |
200 const uint8_t *order = amr_unpacking_bitmaps_per_mode[mode]; | |
201 int field_size; | |
202 | |
203 memset(&p->frame, 0, sizeof(AMRNBFrame)); | |
204 buf++; | |
205 while ((field_size = *order++)) { | |
206 int field = 0; | |
207 int field_offset = *order++; | |
208 while (field_size--) { | |
209 int bit = *order++; | |
210 field <<= 1; | |
211 field |= buf[bit >> 3] >> (bit & 7) & 1; | |
212 } | |
213 data[field_offset] = field; | |
214 } | |
215 } | |
216 | |
217 return mode; | |
218 } | |
219 | |
220 | |
221 /// @defgroup amr_lpc_decoding AMR pitch LPC coefficient decoding functions | |
222 /// @{ | |
223 | |
224 /** | |
225 * Convert an lsf vector into an lsp vector. | |
226 * | |
227 * @param lsf input lsf vector | |
228 * @param lsp output lsp vector | |
229 */ | |
230 static void lsf2lsp(const float *lsf, double *lsp) | |
231 { | |
232 int i; | |
233 | |
234 for (i = 0; i < LP_FILTER_ORDER; i++) | |
235 lsp[i] = cos(2.0 * M_PI * lsf[i]); | |
236 } | |
237 | |
238 /** | |
239 * Interpolate the LSF vector (used for fixed gain smoothing). | |
240 * The interpolation is done over all four subframes even in MODE_12k2. | |
241 * | |
242 * @param[in,out] lsf_q LSFs in [0,1] for each subframe | |
243 * @param[in] lsf_new New LSFs in [0,1] for subframe 4 | |
244 */ | |
245 static void interpolate_lsf(float lsf_q[4][LP_FILTER_ORDER], float *lsf_new) | |
246 { | |
247 int i; | |
248 | |
249 for (i = 0; i < 4; i++) | |
250 ff_weighted_vector_sumf(lsf_q[i], lsf_q[3], lsf_new, | |
251 0.25 * (3 - i), 0.25 * (i + 1), | |
252 LP_FILTER_ORDER); | |
253 } | |
254 | |
255 /** | |
256 * Decode a set of 5 split-matrix quantized lsf indexes into an lsp vector. | |
257 * | |
258 * @param p the context | |
259 * @param lsp output LSP vector | |
260 * @param lsf_no_r LSF vector without the residual vector added | |
261 * @param lsf_quantizer pointers to LSF dictionary tables | |
262 * @param quantizer_offset offset in tables | |
263 * @param sign for the 3 dictionary table | |
264 * @param update store data for computing the next frame's LSFs | |
265 */ | |
266 static void lsf2lsp_for_mode12k2(AMRContext *p, double lsp[LP_FILTER_ORDER], | |
267 const float lsf_no_r[LP_FILTER_ORDER], | |
268 const int16_t *lsf_quantizer[5], | |
269 const int quantizer_offset, | |
270 const int sign, const int update) | |
271 { | |
272 int16_t lsf_r[LP_FILTER_ORDER]; // residual LSF vector | |
273 float lsf_q[LP_FILTER_ORDER]; // quantified LSF vector | |
274 int i; | |
275 | |
276 for (i = 0; i < LP_FILTER_ORDER >> 1; i++) | |
277 memcpy(&lsf_r[i << 1], &lsf_quantizer[i][quantizer_offset], | |
278 2 * sizeof(*lsf_r)); | |
279 | |
280 if (sign) { | |
281 lsf_r[4] *= -1; | |
282 lsf_r[5] *= -1; | |
283 } | |
284 | |
285 if (update) | |
286 memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(float)); | |
287 | |
288 for (i = 0; i < LP_FILTER_ORDER; i++) | |
289 lsf_q[i] = lsf_r[i] * (LSF_R_FAC / 8000.0) + lsf_no_r[i] * (1.0 / 8000.0); | |
290 | |
291 ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); | |
292 | |
293 if (update) | |
294 interpolate_lsf(p->lsf_q, lsf_q); | |
295 | |
296 lsf2lsp(lsf_q, lsp); | |
297 } | |
298 | |
299 /** | |
300 * Decode a set of 5 split-matrix quantized lsf indexes into 2 lsp vectors. | |
301 * | |
302 * @param p pointer to the AMRContext | |
303 */ | |
304 static void lsf2lsp_5(AMRContext *p) | |
305 { | |
306 const uint16_t *lsf_param = p->frame.lsf; | |
307 float lsf_no_r[LP_FILTER_ORDER]; // LSFs without the residual vector | |
308 const int16_t *lsf_quantizer[5]; | |
309 int i; | |
310 | |
311 lsf_quantizer[0] = lsf_5_1[lsf_param[0]]; | |
312 lsf_quantizer[1] = lsf_5_2[lsf_param[1]]; | |
313 lsf_quantizer[2] = lsf_5_3[lsf_param[2] >> 1]; | |
314 lsf_quantizer[3] = lsf_5_4[lsf_param[3]]; | |
315 lsf_quantizer[4] = lsf_5_5[lsf_param[4]]; | |
316 | |
317 for (i = 0; i < LP_FILTER_ORDER; i++) | |
318 lsf_no_r[i] = p->prev_lsf_r[i] * LSF_R_FAC * PRED_FAC_MODE_12k2 + lsf_5_mean[i]; | |
319 | |
320 lsf2lsp_for_mode12k2(p, p->lsp[1], lsf_no_r, lsf_quantizer, 0, lsf_param[2] & 1, 0); | |
321 lsf2lsp_for_mode12k2(p, p->lsp[3], lsf_no_r, lsf_quantizer, 2, lsf_param[2] & 1, 1); | |
322 | |
323 // interpolate LSP vectors at subframes 1 and 3 | |
324 weighted_vector_sumd(p->lsp[0], p->prev_lsp_sub4, p->lsp[1], 0.5, 0.5, LP_FILTER_ORDER); | |
325 weighted_vector_sumd(p->lsp[2], p->lsp[1] , p->lsp[3], 0.5, 0.5, LP_FILTER_ORDER); | |
326 } | |
327 | |
328 /** | |
329 * Decode a set of 3 split-matrix quantized lsf indexes into an lsp vector. | |
330 * | |
331 * @param p pointer to the AMRContext | |
332 */ | |
333 static void lsf2lsp_3(AMRContext *p) | |
334 { | |
335 const uint16_t *lsf_param = p->frame.lsf; | |
336 int16_t lsf_r[LP_FILTER_ORDER]; // residual LSF vector | |
337 float lsf_q[LP_FILTER_ORDER]; // quantified LSF vector | |
338 const int16_t *lsf_quantizer; | |
339 int i, j; | |
340 | |
341 lsf_quantizer = (p->cur_frame_mode == MODE_7k95 ? lsf_3_1_MODE_7k95 : lsf_3_1)[lsf_param[0]]; | |
342 memcpy(lsf_r, lsf_quantizer, 3 * sizeof(*lsf_r)); | |
343 | |
344 lsf_quantizer = lsf_3_2[lsf_param[1] << (p->cur_frame_mode <= MODE_5k15)]; | |
345 memcpy(lsf_r + 3, lsf_quantizer, 3 * sizeof(*lsf_r)); | |
346 | |
347 lsf_quantizer = (p->cur_frame_mode <= MODE_5k15 ? lsf_3_3_MODE_5k15 : lsf_3_3)[lsf_param[2]]; | |
348 memcpy(lsf_r + 6, lsf_quantizer, 4 * sizeof(*lsf_r)); | |
349 | |
350 // calculate mean-removed LSF vector and add mean | |
351 for (i = 0; i < LP_FILTER_ORDER; i++) | |
352 lsf_q[i] = (lsf_r[i] + p->prev_lsf_r[i] * pred_fac[i]) * (LSF_R_FAC / 8000.0) + lsf_3_mean[i] * (1.0 / 8000.0); | |
353 | |
354 ff_set_min_dist_lsf(lsf_q, MIN_LSF_SPACING, LP_FILTER_ORDER); | |
355 | |
356 // store data for computing the next frame's LSFs | |
357 interpolate_lsf(p->lsf_q, lsf_q); | |
358 memcpy(p->prev_lsf_r, lsf_r, LP_FILTER_ORDER * sizeof(*lsf_r)); | |
359 | |
360 lsf2lsp(lsf_q, p->lsp[3]); | |
361 | |
362 // interpolate LSP vectors at subframes 1, 2 and 3 | |
363 for (i = 1; i <= 3; i++) | |
364 for(j = 0; j < LP_FILTER_ORDER; j++) | |
365 p->lsp[i-1][j] = p->prev_lsp_sub4[j] + | |
366 (p->lsp[3][j] - p->prev_lsp_sub4[j]) * 0.25 * i; | |
367 } | |
368 | |
369 /// @} | |
370 | |
371 | |
372 /// @defgroup amr_pitch_vector_decoding AMR pitch vector decoding functions | |
373 /// @{ | |
374 | |
375 /** | |
376 * Like ff_decode_pitch_lag(), but with 1/6 resolution | |
377 */ | |
378 static void decode_pitch_lag_1_6(int *lag_int, int *lag_frac, int pitch_index, | |
379 const int prev_lag_int, const int subframe) | |
380 { | |
381 if (subframe == 0 || subframe == 2) { | |
382 if (pitch_index < 463) { | |
383 *lag_int = (pitch_index + 107) * 10923 >> 16; | |
384 *lag_frac = pitch_index - *lag_int * 6 + 105; | |
385 } else { | |
386 *lag_int = pitch_index - 368; | |
387 *lag_frac = 0; | |
388 } | |
389 } else { | |
390 *lag_int = ((pitch_index + 5) * 10923 >> 16) - 1; | |
391 *lag_frac = pitch_index - *lag_int * 6 - 3; | |
392 *lag_int += av_clip(prev_lag_int - 5, PITCH_LAG_MIN_MODE_12k2, | |
393 PITCH_DELAY_MAX - 9); | |
394 } | |
395 } | |
396 | |
397 static void decode_pitch_vector(AMRContext *p, | |
398 const AMRNBSubframe *amr_subframe, | |
399 const int subframe) | |
400 { | |
401 int pitch_lag_int, pitch_lag_frac; | |
402 enum Mode mode = p->cur_frame_mode; | |
403 | |
404 if (p->cur_frame_mode == MODE_12k2) { | |
405 decode_pitch_lag_1_6(&pitch_lag_int, &pitch_lag_frac, | |
406 amr_subframe->p_lag, p->pitch_lag_int, | |
407 subframe); | |
408 } else | |
409 ff_decode_pitch_lag(&pitch_lag_int, &pitch_lag_frac, | |
410 amr_subframe->p_lag, | |
411 p->pitch_lag_int, subframe, | |
412 mode != MODE_4k75 && mode != MODE_5k15, | |
413 mode <= MODE_6k7 ? 4 : (mode == MODE_7k95 ? 5 : 6)); | |
414 | |
415 p->pitch_lag_int = pitch_lag_int; // store previous lag in a uint8_t | |
416 | |
417 pitch_lag_frac <<= (p->cur_frame_mode != MODE_12k2); | |
418 | |
419 pitch_lag_int += pitch_lag_frac > 0; | |
420 | |
421 /* Calculate the pitch vector by interpolating the past excitation at the | |
422 pitch lag using a b60 hamming windowed sinc function. */ | |
423 ff_acelp_interpolatef(p->excitation, p->excitation + 1 - pitch_lag_int, | |
424 ff_b60_sinc, 6, | |
425 pitch_lag_frac + 6 - 6*(pitch_lag_frac > 0), | |
426 10, AMR_SUBFRAME_SIZE); | |
427 | |
428 memcpy(p->pitch_vector, p->excitation, AMR_SUBFRAME_SIZE * sizeof(float)); | |
429 } | |
430 | |
431 /// @} | |
432 | |
433 | |
434 /// @defgroup amr_algebraic_code_book AMR algebraic code book (fixed) vector decoding functions | |
435 /// @{ | |
436 | |
437 /** | |
438 * Decode a 10-bit algebraic codebook index from a 10.2 kbit/s frame. | |
439 */ | |
440 static void decode_10bit_pulse(int code, int pulse_position[8], | |
441 int i1, int i2, int i3) | |
442 { | |
443 // coded using 7+3 bits with the 3 LSBs being, individually, the LSB of 1 of | |
444 // the 3 pulses and the upper 7 bits being coded in base 5 | |
445 const uint8_t *positions = base_five_table[code >> 3]; | |
446 pulse_position[i1] = (positions[2] << 1) + ( code & 1); | |
447 pulse_position[i2] = (positions[1] << 1) + ((code >> 1) & 1); | |
448 pulse_position[i3] = (positions[0] << 1) + ((code >> 2) & 1); | |
449 } | |
450 | |
451 /** | |
452 * Decode the algebraic codebook index to pulse positions and signs and | |
453 * construct the algebraic codebook vector for MODE_10k2. | |
454 * | |
455 * @param fixed_index positions of the eight pulses | |
456 * @param fixed_sparse pointer to the algebraic codebook vector | |
457 */ | |
458 static void decode_8_pulses_31bits(const int16_t *fixed_index, | |
459 AMRFixed *fixed_sparse) | |
460 { | |
461 int pulse_position[8]; | |
462 int i, temp; | |
463 | |
464 decode_10bit_pulse(fixed_index[4], pulse_position, 0, 4, 1); | |
465 decode_10bit_pulse(fixed_index[5], pulse_position, 2, 6, 5); | |
466 | |
467 // coded using 5+2 bits with the 2 LSBs being, individually, the LSB of 1 of | |
468 // the 2 pulses and the upper 5 bits being coded in base 5 | |
469 temp = ((fixed_index[6] >> 2) * 25 + 12) >> 5; | |
470 pulse_position[3] = temp % 5; | |
471 pulse_position[7] = temp / 5; | |
472 if (pulse_position[7] & 1) | |
473 pulse_position[3] = 4 - pulse_position[3]; | |
474 pulse_position[3] = (pulse_position[3] << 1) + ( fixed_index[6] & 1); | |
475 pulse_position[7] = (pulse_position[7] << 1) + ((fixed_index[6] >> 1) & 1); | |
476 | |
477 fixed_sparse->n = 8; | |
478 for (i = 0; i < 4; i++) { | |
479 const int pos1 = (pulse_position[i] << 2) + i; | |
480 const int pos2 = (pulse_position[i + 4] << 2) + i; | |
481 const float sign = fixed_index[i] ? -1.0 : 1.0; | |
482 fixed_sparse->x[i ] = pos1; | |
483 fixed_sparse->x[i + 4] = pos2; | |
484 fixed_sparse->y[i ] = sign; | |
485 fixed_sparse->y[i + 4] = pos2 < pos1 ? -sign : sign; | |
486 } | |
487 } | |
488 | |
489 /** | |
490 * Decode the algebraic codebook index to pulse positions and signs, | |
491 * then construct the algebraic codebook vector. | |
492 * | |
493 * nb of pulses | bits encoding pulses | |
494 * For MODE_4k75 or MODE_5k15, 2 | 1-3, 4-6, 7 | |
495 * MODE_5k9, 2 | 1, 2-4, 5-6, 7-9 | |
496 * MODE_6k7, 3 | 1-3, 4, 5-7, 8, 9-11 | |
497 * MODE_7k4 or MODE_7k95, 4 | 1-3, 4-6, 7-9, 10, 11-13 | |
498 * | |
499 * @param fixed_sparse pointer to the algebraic codebook vector | |
500 * @param pulses algebraic codebook indexes | |
501 * @param mode mode of the current frame | |
502 * @param subframe current subframe number | |
503 */ | |
504 static void decode_fixed_sparse(AMRFixed *fixed_sparse, const uint16_t *pulses, | |
505 const enum Mode mode, const int subframe) | |
506 { | |
507 assert(MODE_4k75 <= mode && mode <= MODE_12k2); | |
508 | |
509 if (mode == MODE_12k2) { | |
510 ff_decode_10_pulses_35bits(pulses, fixed_sparse, gray_decode, 5, 3); | |
511 } else if (mode == MODE_10k2) { | |
512 decode_8_pulses_31bits(pulses, fixed_sparse); | |
513 } else { | |
514 int *pulse_position = fixed_sparse->x; | |
515 int i, pulse_subset; | |
516 const int fixed_index = pulses[0]; | |
517 | |
518 if (mode <= MODE_5k15) { | |
519 pulse_subset = ((fixed_index >> 3) & 8) + (subframe << 1); | |
520 pulse_position[0] = ( fixed_index & 7) * 5 + track_position[pulse_subset]; | |
521 pulse_position[1] = ((fixed_index >> 3) & 7) * 5 + track_position[pulse_subset + 1]; | |
522 fixed_sparse->n = 2; | |
523 } else if (mode == MODE_5k9) { | |
524 pulse_subset = ((fixed_index & 1) << 1) + 1; | |
525 pulse_position[0] = ((fixed_index >> 1) & 7) * 5 + pulse_subset; | |
526 pulse_subset = (fixed_index >> 4) & 3; | |
527 pulse_position[1] = ((fixed_index >> 6) & 7) * 5 + pulse_subset + (pulse_subset == 3 ? 1 : 0); | |
528 fixed_sparse->n = pulse_position[0] == pulse_position[1] ? 1 : 2; | |
529 } else if (mode == MODE_6k7) { | |
530 pulse_position[0] = (fixed_index & 7) * 5; | |
531 pulse_subset = (fixed_index >> 2) & 2; | |
532 pulse_position[1] = ((fixed_index >> 4) & 7) * 5 + pulse_subset + 1; | |
533 pulse_subset = (fixed_index >> 6) & 2; | |
534 pulse_position[2] = ((fixed_index >> 8) & 7) * 5 + pulse_subset + 2; | |
535 fixed_sparse->n = 3; | |
536 } else { // mode <= MODE_7k95 | |
537 pulse_position[0] = gray_decode[ fixed_index & 7]; | |
538 pulse_position[1] = gray_decode[(fixed_index >> 3) & 7] + 1; | |
539 pulse_position[2] = gray_decode[(fixed_index >> 6) & 7] + 2; | |
540 pulse_subset = (fixed_index >> 9) & 1; | |
541 pulse_position[3] = gray_decode[(fixed_index >> 10) & 7] + pulse_subset + 3; | |
542 fixed_sparse->n = 4; | |
543 } | |
544 for (i = 0; i < fixed_sparse->n; i++) | |
545 fixed_sparse->y[i] = (pulses[1] >> i) & 1 ? 1.0 : -1.0; | |
546 } | |
547 } | |
548 | |
549 /** | |
550 * Apply pitch lag to obtain the sharpened fixed vector (section 6.1.2) | |
551 * | |
552 * @param p the context | |
553 * @param subframe unpacked amr subframe | |
554 * @param mode mode of the current frame | |
555 * @param fixed_sparse sparse respresentation of the fixed vector | |
556 */ | |
557 static void pitch_sharpening(AMRContext *p, int subframe, enum Mode mode, | |
558 AMRFixed *fixed_sparse) | |
559 { | |
560 // The spec suggests the current pitch gain is always used, but in other | |
561 // modes the pitch and codebook gains are joinly quantized (sec 5.8.2) | |
562 // so the codebook gain cannot depend on the quantized pitch gain. | |
563 if (mode == MODE_12k2) | |
564 p->beta = FFMIN(p->pitch_gain[4], 1.0); | |
565 | |
566 fixed_sparse->pitch_lag = p->pitch_lag_int; | |
567 fixed_sparse->pitch_fac = p->beta; | |
568 | |
569 // Save pitch sharpening factor for the next subframe | |
570 // MODE_4k75 only updates on the 2nd and 4th subframes - this follows from | |
571 // the fact that the gains for two subframes are jointly quantized. | |
572 if (mode != MODE_4k75 || subframe & 1) | |
573 p->beta = av_clipf(p->pitch_gain[4], 0.0, SHARP_MAX); | |
574 } | |
575 /// @} | |
576 | |
577 | |
578 /// @defgroup amr_gain_decoding AMR gain decoding functions | |
579 /// @{ | |
580 | |
581 /** | |
582 * fixed gain smoothing | |
583 * Note that where the spec specifies the "spectrum in the q domain" | |
584 * in section 6.1.4, in fact frequencies should be used. | |
585 * | |
586 * @param p the context | |
587 * @param lsf LSFs for the current subframe, in the range [0,1] | |
588 * @param lsf_avg averaged LSFs | |
589 * @param mode mode of the current frame | |
590 * | |
591 * @return fixed gain smoothed | |
592 */ | |
593 static float fixed_gain_smooth(AMRContext *p , const float *lsf, | |
594 const float *lsf_avg, const enum Mode mode) | |
595 { | |
596 float diff = 0.0; | |
597 int i; | |
598 | |
599 for (i = 0; i < LP_FILTER_ORDER; i++) | |
600 diff += fabs(lsf_avg[i] - lsf[i]) / lsf_avg[i]; | |
601 | |
602 // If diff is large for ten subframes, disable smoothing for a 40-subframe | |
603 // hangover period. | |
604 p->diff_count++; | |
605 if (diff <= 0.65) | |
606 p->diff_count = 0; | |
607 | |
608 if (p->diff_count > 10) { | |
609 p->hang_count = 0; | |
610 p->diff_count--; // don't let diff_count overflow | |
611 } | |
612 | |
613 if (p->hang_count < 40) { | |
614 p->hang_count++; | |
615 } else if (mode < MODE_7k4 || mode == MODE_10k2) { | |
616 const float smoothing_factor = av_clipf(4.0 * diff - 1.6, 0.0, 1.0); | |
617 const float fixed_gain_mean = (p->fixed_gain[0] + p->fixed_gain[1] + | |
618 p->fixed_gain[2] + p->fixed_gain[3] + | |
619 p->fixed_gain[4]) * 0.2; | |
620 return smoothing_factor * p->fixed_gain[4] + | |
621 (1.0 - smoothing_factor) * fixed_gain_mean; | |
622 } | |
623 return p->fixed_gain[4]; | |
624 } | |
625 | |
626 /** | |
627 * Decode pitch gain and fixed gain factor (part of section 6.1.3). | |
628 * | |
629 * @param p the context | |
630 * @param amr_subframe unpacked amr subframe | |
631 * @param mode mode of the current frame | |
632 * @param subframe current subframe number | |
633 * @param fixed_gain_factor decoded gain correction factor | |
634 */ | |
635 static void decode_gains(AMRContext *p, const AMRNBSubframe *amr_subframe, | |
636 const enum Mode mode, const int subframe, | |
637 float *fixed_gain_factor) | |
638 { | |
639 if (mode == MODE_12k2 || mode == MODE_7k95) { | |
640 p->pitch_gain[4] = qua_gain_pit [amr_subframe->p_gain ] | |
641 * (1.0 / 16384.0); | |
642 *fixed_gain_factor = qua_gain_code[amr_subframe->fixed_gain] | |
643 * (1.0 / 2048.0); | |
644 } else { | |
645 const uint16_t *gains; | |
646 | |
647 if (mode >= MODE_6k7) { | |
648 gains = gains_high[amr_subframe->p_gain]; | |
649 } else if (mode >= MODE_5k15) { | |
650 gains = gains_low [amr_subframe->p_gain]; | |
651 } else { | |
652 // gain index is only coded in subframes 0,2 for MODE_4k75 | |
653 gains = gains_MODE_4k75[(p->frame.subframe[subframe & 2].p_gain << 1) + (subframe & 1)]; | |
654 } | |
655 | |
656 p->pitch_gain[4] = gains[0] * (1.0 / 16384.0); | |
657 *fixed_gain_factor = gains[1] * (1.0 / 4096.0); | |
658 } | |
659 } | |
660 | |
661 /// @} | |
662 | |
663 | |
664 /// @defgroup amr_pre_processing AMR pre-processing functions | |
665 /// @{ | |
666 | |
667 /** | |
668 * Circularly convolve a sparse fixed vector with a phase dispersion impulse | |
669 * response filter (D.6.2 of G.729 and 6.1.5 of AMR). | |
670 * | |
671 * @param out vector with filter applied | |
672 * @param in source vector | |
673 * @param filter phase filter coefficients | |
674 * | |
675 * out[n] = sum(i,0,len-1){ in[i] * filter[(len + n - i)%len] } | |
676 */ | |
677 static void apply_ir_filter(float *out, const AMRFixed *in, | |
678 const float *filter) | |
679 { | |
680 float filter1[AMR_SUBFRAME_SIZE], //!< filters at pitch lag*1 and *2 | |
681 filter2[AMR_SUBFRAME_SIZE]; | |
682 int lag = in->pitch_lag; | |
683 float fac = in->pitch_fac; | |
684 int i; | |
685 | |
686 if (lag < AMR_SUBFRAME_SIZE) { | |
687 ff_celp_circ_addf(filter1, filter, filter, lag, fac, | |
688 AMR_SUBFRAME_SIZE); | |
689 | |
690 if (lag < AMR_SUBFRAME_SIZE >> 1) | |
691 ff_celp_circ_addf(filter2, filter, filter1, lag, fac, | |
692 AMR_SUBFRAME_SIZE); | |
693 } | |
694 | |
695 memset(out, 0, sizeof(float) * AMR_SUBFRAME_SIZE); | |
696 for (i = 0; i < in->n; i++) { | |
697 int x = in->x[i]; | |
698 float y = in->y[i]; | |
699 const float *filterp; | |
700 | |
701 if (x >= AMR_SUBFRAME_SIZE - lag) { | |
702 filterp = filter; | |
703 } else if (x >= AMR_SUBFRAME_SIZE - (lag << 1)) { | |
704 filterp = filter1; | |
705 } else | |
706 filterp = filter2; | |
707 | |
708 ff_celp_circ_addf(out, out, filterp, x, y, AMR_SUBFRAME_SIZE); | |
709 } | |
710 } | |
711 | |
712 /** | |
713 * Reduce fixed vector sparseness by smoothing with one of three IR filters. | |
714 * Also know as "adaptive phase dispersion". | |
715 * | |
716 * This implements 3GPP TS 26.090 section 6.1(5). | |
717 * | |
718 * @param p the context | |
719 * @param fixed_sparse algebraic codebook vector | |
720 * @param fixed_vector unfiltered fixed vector | |
721 * @param fixed_gain smoothed gain | |
722 * @param out space for modified vector if necessary | |
723 */ | |
724 static const float *anti_sparseness(AMRContext *p, AMRFixed *fixed_sparse, | |
725 const float *fixed_vector, | |
726 float fixed_gain, float *out) | |
727 { | |
728 int ir_filter_nr; | |
729 | |
730 if (p->pitch_gain[4] < 0.6) { | |
731 ir_filter_nr = 0; // strong filtering | |
732 } else if (p->pitch_gain[4] < 0.9) { | |
733 ir_filter_nr = 1; // medium filtering | |
734 } else | |
735 ir_filter_nr = 2; // no filtering | |
736 | |
737 // detect 'onset' | |
738 if (fixed_gain > 2.0 * p->prev_sparse_fixed_gain) { | |
739 p->ir_filter_onset = 2; | |
740 } else if (p->ir_filter_onset) | |
741 p->ir_filter_onset--; | |
742 | |
743 if (!p->ir_filter_onset) { | |
744 int i, count = 0; | |
745 | |
746 for (i = 0; i < 5; i++) | |
747 if (p->pitch_gain[i] < 0.6) | |
748 count++; | |
749 if (count > 2) | |
750 ir_filter_nr = 0; | |
751 | |
752 if (ir_filter_nr > p->prev_ir_filter_nr + 1) | |
753 ir_filter_nr--; | |
754 } else if (ir_filter_nr < 2) | |
755 ir_filter_nr++; | |
756 | |
757 // Disable filtering for very low level of fixed_gain. | |
758 // Note this step is not specified in the technical description but is in | |
759 // the reference source in the function Ph_disp. | |
760 if (fixed_gain < 5.0) | |
761 ir_filter_nr = 2; | |
762 | |
763 if (p->cur_frame_mode != MODE_7k4 && p->cur_frame_mode < MODE_10k2 | |
764 && ir_filter_nr < 2) { | |
765 apply_ir_filter(out, fixed_sparse, | |
766 (p->cur_frame_mode == MODE_7k95 ? | |
767 ir_filters_lookup_MODE_7k95 : | |
768 ir_filters_lookup)[ir_filter_nr]); | |
769 fixed_vector = out; | |
770 } | |
771 | |
772 // update ir filter strength history | |
773 p->prev_ir_filter_nr = ir_filter_nr; | |
774 p->prev_sparse_fixed_gain = fixed_gain; | |
775 | |
776 return fixed_vector; | |
777 } | |
778 | |
779 /// @} | |
780 | |
781 | |
782 /// @defgroup amr_synthesis AMR synthesis functions | |
783 /// @{ | |
784 | |
785 /** | |
786 * Conduct 10th order linear predictive coding synthesis. | |
787 * | |
788 * @param p pointer to the AMRContext | |
789 * @param lpc pointer to the LPC coefficients | |
790 * @param fixed_gain fixed codebook gain for synthesis | |
791 * @param fixed_vector algebraic codebook vector | |
792 * @param samples pointer to the output speech samples | |
793 * @param overflow 16-bit overflow flag | |
794 */ | |
795 static int synthesis(AMRContext *p, float *lpc, | |
796 float fixed_gain, const float *fixed_vector, | |
797 float *samples, uint8_t overflow) | |
798 { | |
11652
8b6f3d3b55cb
Move clipping of audio samples (for those codecs outputting float) from decoder
rbultje
parents:
11648
diff
changeset
|
799 int i; |
11235 | 800 float excitation[AMR_SUBFRAME_SIZE]; |
801 | |
802 // if an overflow has been detected, the pitch vector is scaled down by a | |
803 // factor of 4 | |
804 if (overflow) | |
805 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
806 p->pitch_vector[i] *= 0.25; | |
807 | |
808 ff_weighted_vector_sumf(excitation, p->pitch_vector, fixed_vector, | |
809 p->pitch_gain[4], fixed_gain, AMR_SUBFRAME_SIZE); | |
810 | |
811 // emphasize pitch vector contribution | |
812 if (p->pitch_gain[4] > 0.5 && !overflow) { | |
813 float energy = ff_dot_productf(excitation, excitation, | |
814 AMR_SUBFRAME_SIZE); | |
815 float pitch_factor = | |
816 p->pitch_gain[4] * | |
817 (p->cur_frame_mode == MODE_12k2 ? | |
818 0.25 * FFMIN(p->pitch_gain[4], 1.0) : | |
819 0.5 * FFMIN(p->pitch_gain[4], SHARP_MAX)); | |
820 | |
821 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
822 excitation[i] += pitch_factor * p->pitch_vector[i]; | |
823 | |
824 ff_scale_vector_to_given_sum_of_squares(excitation, excitation, energy, | |
825 AMR_SUBFRAME_SIZE); | |
826 } | |
827 | |
828 ff_celp_lp_synthesis_filterf(samples, lpc, excitation, AMR_SUBFRAME_SIZE, | |
829 LP_FILTER_ORDER); | |
830 | |
831 // detect overflow | |
832 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
833 if (fabsf(samples[i]) > AMR_SAMPLE_BOUND) { | |
11652
8b6f3d3b55cb
Move clipping of audio samples (for those codecs outputting float) from decoder
rbultje
parents:
11648
diff
changeset
|
834 return 1; |
11235 | 835 } |
836 | |
11652
8b6f3d3b55cb
Move clipping of audio samples (for those codecs outputting float) from decoder
rbultje
parents:
11648
diff
changeset
|
837 return 0; |
11235 | 838 } |
839 | |
840 /// @} | |
841 | |
842 | |
843 /// @defgroup amr_update AMR update functions | |
844 /// @{ | |
845 | |
846 /** | |
847 * Update buffers and history at the end of decoding a subframe. | |
848 * | |
849 * @param p pointer to the AMRContext | |
850 */ | |
851 static void update_state(AMRContext *p) | |
852 { | |
853 memcpy(p->prev_lsp_sub4, p->lsp[3], LP_FILTER_ORDER * sizeof(p->lsp[3][0])); | |
854 | |
855 memmove(&p->excitation_buf[0], &p->excitation_buf[AMR_SUBFRAME_SIZE], | |
856 (PITCH_DELAY_MAX + LP_FILTER_ORDER + 1) * sizeof(float)); | |
857 | |
858 memmove(&p->pitch_gain[0], &p->pitch_gain[1], 4 * sizeof(float)); | |
859 memmove(&p->fixed_gain[0], &p->fixed_gain[1], 4 * sizeof(float)); | |
860 | |
861 memmove(&p->samples_in[0], &p->samples_in[AMR_SUBFRAME_SIZE], | |
862 LP_FILTER_ORDER * sizeof(float)); | |
863 } | |
864 | |
865 /// @} | |
866 | |
867 | |
868 /// @defgroup amr_postproc AMR Post processing functions | |
869 /// @{ | |
870 | |
871 /** | |
872 * Get the tilt factor of a formant filter from its transfer function | |
873 * | |
874 * @param lpc_n LP_FILTER_ORDER coefficients of the numerator | |
875 * @param lpc_d LP_FILTER_ORDER coefficients of the denominator | |
876 */ | |
877 static float tilt_factor(float *lpc_n, float *lpc_d) | |
878 { | |
879 float rh0, rh1; // autocorrelation at lag 0 and 1 | |
880 | |
881 // LP_FILTER_ORDER prior zeros are needed for ff_celp_lp_synthesis_filterf | |
882 float impulse_buffer[LP_FILTER_ORDER + AMR_TILT_RESPONSE] = { 0 }; | |
883 float *hf = impulse_buffer + LP_FILTER_ORDER; // start of impulse response | |
884 | |
885 hf[0] = 1.0; | |
886 memcpy(hf + 1, lpc_n, sizeof(float) * LP_FILTER_ORDER); | |
887 ff_celp_lp_synthesis_filterf(hf, lpc_d, hf, AMR_TILT_RESPONSE, | |
888 LP_FILTER_ORDER); | |
889 | |
890 rh0 = ff_dot_productf(hf, hf, AMR_TILT_RESPONSE); | |
891 rh1 = ff_dot_productf(hf, hf + 1, AMR_TILT_RESPONSE - 1); | |
892 | |
893 // The spec only specifies this check for 12.2 and 10.2 kbit/s | |
894 // modes. But in the ref source the tilt is always non-negative. | |
895 return rh1 >= 0.0 ? rh1 / rh0 * AMR_TILT_GAMMA_T : 0.0; | |
896 } | |
897 | |
898 /** | |
899 * Perform adaptive post-filtering to enhance the quality of the speech. | |
900 * See section 6.2.1. | |
901 * | |
902 * @param p pointer to the AMRContext | |
903 * @param lpc interpolated LP coefficients for this subframe | |
904 * @param buf_out output of the filter | |
905 */ | |
906 static void postfilter(AMRContext *p, float *lpc, float *buf_out) | |
907 { | |
908 int i; | |
909 float *samples = p->samples_in + LP_FILTER_ORDER; // Start of input | |
910 | |
911 float speech_gain = ff_dot_productf(samples, samples, | |
912 AMR_SUBFRAME_SIZE); | |
913 | |
914 float pole_out[AMR_SUBFRAME_SIZE + LP_FILTER_ORDER]; // Output of pole filter | |
915 const float *gamma_n, *gamma_d; // Formant filter factor table | |
916 float lpc_n[LP_FILTER_ORDER], lpc_d[LP_FILTER_ORDER]; // Transfer function coefficients | |
917 | |
918 if (p->cur_frame_mode == MODE_12k2 || p->cur_frame_mode == MODE_10k2) { | |
919 gamma_n = ff_pow_0_7; | |
920 gamma_d = ff_pow_0_75; | |
921 } else { | |
922 gamma_n = ff_pow_0_55; | |
923 gamma_d = ff_pow_0_7; | |
924 } | |
925 | |
926 for (i = 0; i < LP_FILTER_ORDER; i++) { | |
927 lpc_n[i] = lpc[i] * gamma_n[i]; | |
928 lpc_d[i] = lpc[i] * gamma_d[i]; | |
929 } | |
930 | |
931 memcpy(pole_out, p->postfilter_mem, sizeof(float) * LP_FILTER_ORDER); | |
932 ff_celp_lp_synthesis_filterf(pole_out + LP_FILTER_ORDER, lpc_d, samples, | |
933 AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); | |
934 memcpy(p->postfilter_mem, pole_out + AMR_SUBFRAME_SIZE, | |
935 sizeof(float) * LP_FILTER_ORDER); | |
936 | |
937 ff_celp_lp_zero_synthesis_filterf(buf_out, lpc_n, | |
938 pole_out + LP_FILTER_ORDER, | |
939 AMR_SUBFRAME_SIZE, LP_FILTER_ORDER); | |
940 | |
941 ff_tilt_compensation(&p->tilt_mem, tilt_factor(lpc_n, lpc_d), buf_out, | |
942 AMR_SUBFRAME_SIZE); | |
943 | |
11647
26aabf52f578
Split the input/output data arguments to ff_adaptive_gain_control().
rbultje
parents:
11645
diff
changeset
|
944 ff_adaptive_gain_control(buf_out, buf_out, speech_gain, AMR_SUBFRAME_SIZE, |
11462 | 945 AMR_AGC_ALPHA, &p->postfilter_agc); |
11235 | 946 } |
947 | |
948 /// @} | |
949 | |
950 static int amrnb_decode_frame(AVCodecContext *avctx, void *data, int *data_size, | |
951 AVPacket *avpkt) | |
952 { | |
953 | |
954 AMRContext *p = avctx->priv_data; // pointer to private data | |
955 const uint8_t *buf = avpkt->data; | |
956 int buf_size = avpkt->size; | |
957 float *buf_out = data; // pointer to the output data buffer | |
958 int i, subframe; | |
959 float fixed_gain_factor; | |
960 AMRFixed fixed_sparse = {0}; // fixed vector up to anti-sparseness processing | |
961 float spare_vector[AMR_SUBFRAME_SIZE]; // extra stack space to hold result from anti-sparseness processing | |
962 float synth_fixed_gain; // the fixed gain that synthesis should use | |
963 const float *synth_fixed_vector; // pointer to the fixed vector that synthesis should use | |
964 | |
965 p->cur_frame_mode = unpack_bitstream(p, buf, buf_size); | |
966 if (p->cur_frame_mode == MODE_DTX) { | |
967 av_log_missing_feature(avctx, "dtx mode", 1); | |
968 return -1; | |
969 } | |
970 | |
971 if (p->cur_frame_mode == MODE_12k2) { | |
972 lsf2lsp_5(p); | |
973 } else | |
974 lsf2lsp_3(p); | |
975 | |
976 for (i = 0; i < 4; i++) | |
977 ff_acelp_lspd2lpc(p->lsp[i], p->lpc[i], 5); | |
978 | |
979 for (subframe = 0; subframe < 4; subframe++) { | |
980 const AMRNBSubframe *amr_subframe = &p->frame.subframe[subframe]; | |
981 | |
982 decode_pitch_vector(p, amr_subframe, subframe); | |
983 | |
984 decode_fixed_sparse(&fixed_sparse, amr_subframe->pulses, | |
985 p->cur_frame_mode, subframe); | |
986 | |
987 // The fixed gain (section 6.1.3) depends on the fixed vector | |
988 // (section 6.1.2), but the fixed vector calculation uses | |
989 // pitch sharpening based on the on the pitch gain (section 6.1.3). | |
990 // So the correct order is: pitch gain, pitch sharpening, fixed gain. | |
991 decode_gains(p, amr_subframe, p->cur_frame_mode, subframe, | |
992 &fixed_gain_factor); | |
993 | |
994 pitch_sharpening(p, subframe, p->cur_frame_mode, &fixed_sparse); | |
995 | |
996 ff_set_fixed_vector(p->fixed_vector, &fixed_sparse, 1.0, | |
997 AMR_SUBFRAME_SIZE); | |
998 | |
999 p->fixed_gain[4] = | |
1000 ff_amr_set_fixed_gain(fixed_gain_factor, | |
1001 ff_dot_productf(p->fixed_vector, p->fixed_vector, | |
1002 AMR_SUBFRAME_SIZE)/AMR_SUBFRAME_SIZE, | |
1003 p->prediction_error, | |
1004 energy_mean[p->cur_frame_mode], energy_pred_fac); | |
1005 | |
1006 // The excitation feedback is calculated without any processing such | |
1007 // as fixed gain smoothing. This isn't mentioned in the specification. | |
1008 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
1009 p->excitation[i] *= p->pitch_gain[4]; | |
1010 ff_set_fixed_vector(p->excitation, &fixed_sparse, p->fixed_gain[4], | |
1011 AMR_SUBFRAME_SIZE); | |
1012 | |
1013 // In the ref decoder, excitation is stored with no fractional bits. | |
1014 // This step prevents buzz in silent periods. The ref encoder can | |
1015 // emit long sequences with pitch factor greater than one. This | |
1016 // creates unwanted feedback if the excitation vector is nonzero. | |
1017 // (e.g. test sequence T19_795.COD in 3GPP TS 26.074) | |
1018 for (i = 0; i < AMR_SUBFRAME_SIZE; i++) | |
1019 p->excitation[i] = truncf(p->excitation[i]); | |
1020 | |
1021 // Smooth fixed gain. | |
1022 // The specification is ambiguous, but in the reference source, the | |
1023 // smoothed value is NOT fed back into later fixed gain smoothing. | |
1024 synth_fixed_gain = fixed_gain_smooth(p, p->lsf_q[subframe], | |
1025 p->lsf_avg, p->cur_frame_mode); | |
1026 | |
1027 synth_fixed_vector = anti_sparseness(p, &fixed_sparse, p->fixed_vector, | |
1028 synth_fixed_gain, spare_vector); | |
1029 | |
1030 if (synthesis(p, p->lpc[subframe], synth_fixed_gain, | |
1031 synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 0)) | |
1032 // overflow detected -> rerun synthesis scaling pitch vector down | |
1033 // by a factor of 4, skipping pitch vector contribution emphasis | |
1034 // and adaptive gain control | |
1035 synthesis(p, p->lpc[subframe], synth_fixed_gain, | |
1036 synth_fixed_vector, &p->samples_in[LP_FILTER_ORDER], 1); | |
1037 | |
1038 postfilter(p, p->lpc[subframe], buf_out + subframe * AMR_SUBFRAME_SIZE); | |
1039 | |
1040 // update buffers and history | |
1041 ff_clear_fixed_vector(p->fixed_vector, &fixed_sparse, AMR_SUBFRAME_SIZE); | |
1042 update_state(p); | |
1043 } | |
1044 | |
11648
0516f4062307
Split input/output data arguments to ff_acelp_apply_order_2_transfer_function().
rbultje
parents:
11647
diff
changeset
|
1045 ff_acelp_apply_order_2_transfer_function(buf_out, buf_out, highpass_zeros, |
11676
ceec2fb08b8e
amrnbdec: Apply AMR_SAMPLE_SCALE when finishing the decoder output
mstorsjo
parents:
11652
diff
changeset
|
1046 highpass_poles, |
ceec2fb08b8e
amrnbdec: Apply AMR_SAMPLE_SCALE when finishing the decoder output
mstorsjo
parents:
11652
diff
changeset
|
1047 highpass_gain * AMR_SAMPLE_SCALE, |
11235 | 1048 p->high_pass_mem, AMR_BLOCK_SIZE); |
1049 | |
1050 /* Update averaged lsf vector (used for fixed gain smoothing). | |
1051 * | |
1052 * Note that lsf_avg should not incorporate the current frame's LSFs | |
1053 * for fixed_gain_smooth. | |
1054 * The specification has an incorrect formula: the reference decoder uses | |
1055 * qbar(n-1) rather than qbar(n) in section 6.1(4) equation 71. */ | |
1056 ff_weighted_vector_sumf(p->lsf_avg, p->lsf_avg, p->lsf_q[3], | |
1057 0.84, 0.16, LP_FILTER_ORDER); | |
1058 | |
1059 /* report how many samples we got */ | |
1060 *data_size = AMR_BLOCK_SIZE * sizeof(float); | |
1061 | |
1062 /* return the amount of bytes consumed if everything was OK */ | |
1063 return frame_sizes_nb[p->cur_frame_mode] + 1; // +7 for rounding and +8 for TOC | |
1064 } | |
1065 | |
1066 | |
1067 AVCodec amrnb_decoder = { | |
1068 .name = "amrnb", | |
11560
8a4984c5cacc
Define AVMediaType enum, and use it instead of enum CodecType, which
stefano
parents:
11462
diff
changeset
|
1069 .type = AVMEDIA_TYPE_AUDIO, |
11235 | 1070 .id = CODEC_ID_AMR_NB, |
1071 .priv_data_size = sizeof(AMRContext), | |
1072 .init = amrnb_decode_init, | |
1073 .decode = amrnb_decode_frame, | |
1074 .long_name = NULL_IF_CONFIG_SMALL("Adaptive Multi-Rate NarrowBand"), | |
1075 .sample_fmts = (enum SampleFormat[]){SAMPLE_FMT_FLT,SAMPLE_FMT_NONE}, | |
1076 }; |