Mercurial > libavcodec.hg
comparison aac.c @ 8199:8a8d40fe77d9 libavcodec
AAC: Frequency domain prediction and hence Main profile support
Patch by Alex Converse ( alex converse gmail com )
author | superdump |
---|---|
date | Mon, 24 Nov 2008 00:13:50 +0000 |
parents | 7baa25290231 |
children | 843d82925adf |
comparison
equal
deleted
inserted
replaced
8198:de344498875e | 8199:8a8d40fe77d9 |
---|---|
39 * N (code in SoC repo) filterbank - Scalable Sample Rate | 39 * N (code in SoC repo) filterbank - Scalable Sample Rate |
40 * Y Temporal Noise Shaping | 40 * Y Temporal Noise Shaping |
41 * N (code in SoC repo) Long Term Prediction | 41 * N (code in SoC repo) Long Term Prediction |
42 * Y intensity stereo | 42 * Y intensity stereo |
43 * Y channel coupling | 43 * Y channel coupling |
44 * N frequency domain prediction | 44 * Y frequency domain prediction |
45 * Y Perceptual Noise Substitution | 45 * Y Perceptual Noise Substitution |
46 * Y Mid/Side stereo | 46 * Y Mid/Side stereo |
47 * N Scalable Inverse AAC Quantization | 47 * N Scalable Inverse AAC Quantization |
48 * N Frequency Selective Switch | 48 * N Frequency Selective Switch |
49 * N upsampling filter | 49 * N upsampling filter |
329 } | 329 } |
330 | 330 |
331 skip_bits_long(&gb, i); | 331 skip_bits_long(&gb, i); |
332 | 332 |
333 switch (ac->m4ac.object_type) { | 333 switch (ac->m4ac.object_type) { |
334 case AOT_AAC_MAIN: | |
334 case AOT_AAC_LC: | 335 case AOT_AAC_LC: |
335 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config)) | 336 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config)) |
336 return -1; | 337 return -1; |
337 break; | 338 break; |
338 default: | 339 default: |
350 * | 351 * |
351 * @return Returns a 32-bit pseudorandom integer | 352 * @return Returns a 32-bit pseudorandom integer |
352 */ | 353 */ |
353 static av_always_inline int lcg_random(int previous_val) { | 354 static av_always_inline int lcg_random(int previous_val) { |
354 return previous_val * 1664525 + 1013904223; | 355 return previous_val * 1664525 + 1013904223; |
356 } | |
357 | |
358 static void reset_predict_state(PredictorState * ps) { | |
359 ps->r0 = 0.0f; | |
360 ps->r1 = 0.0f; | |
361 ps->cor0 = 0.0f; | |
362 ps->cor1 = 0.0f; | |
363 ps->var0 = 1.0f; | |
364 ps->var1 = 1.0f; | |
365 } | |
366 | |
367 static void reset_all_predictors(PredictorState * ps) { | |
368 int i; | |
369 for (i = 0; i < MAX_PREDICTORS; i++) | |
370 reset_predict_state(&ps[i]); | |
371 } | |
372 | |
373 static void reset_predictor_group(PredictorState * ps, int group_num) { | |
374 int i; | |
375 for (i = group_num-1; i < MAX_PREDICTORS; i+=30) | |
376 reset_predict_state(&ps[i]); | |
355 } | 377 } |
356 | 378 |
357 static av_cold int aac_decode_init(AVCodecContext * avccontext) { | 379 static av_cold int aac_decode_init(AVCodecContext * avccontext) { |
358 AACContext * ac = avccontext->priv_data; | 380 AACContext * ac = avccontext->priv_data; |
359 int i; | 381 int i; |
428 if (count == 255) | 450 if (count == 255) |
429 count += get_bits(gb, 8); | 451 count += get_bits(gb, 8); |
430 if (byte_align) | 452 if (byte_align) |
431 align_get_bits(gb); | 453 align_get_bits(gb); |
432 skip_bits_long(gb, 8 * count); | 454 skip_bits_long(gb, 8 * count); |
455 } | |
456 | |
457 static int decode_prediction(AACContext * ac, IndividualChannelStream * ics, GetBitContext * gb) { | |
458 int sfb; | |
459 if (get_bits1(gb)) { | |
460 ics->predictor_reset_group = get_bits(gb, 5); | |
461 if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) { | |
462 av_log(ac->avccontext, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n"); | |
463 return -1; | |
464 } | |
465 } | |
466 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) { | |
467 ics->prediction_used[sfb] = get_bits1(gb); | |
468 } | |
469 return 0; | |
433 } | 470 } |
434 | 471 |
435 /** | 472 /** |
436 * Decode Individual Channel Stream info; reference: table 4.6. | 473 * Decode Individual Channel Stream info; reference: table 4.6. |
437 * | 474 * |
462 } | 499 } |
463 ics->num_windows = 8; | 500 ics->num_windows = 8; |
464 ics->swb_offset = swb_offset_128[ac->m4ac.sampling_index]; | 501 ics->swb_offset = swb_offset_128[ac->m4ac.sampling_index]; |
465 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index]; | 502 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index]; |
466 ics->tns_max_bands = tns_max_bands_128[ac->m4ac.sampling_index]; | 503 ics->tns_max_bands = tns_max_bands_128[ac->m4ac.sampling_index]; |
504 ics->predictor_present = 0; | |
467 } else { | 505 } else { |
468 ics->max_sfb = get_bits(gb, 6); | 506 ics->max_sfb = get_bits(gb, 6); |
469 ics->num_windows = 1; | 507 ics->num_windows = 1; |
470 ics->swb_offset = swb_offset_1024[ac->m4ac.sampling_index]; | 508 ics->swb_offset = swb_offset_1024[ac->m4ac.sampling_index]; |
471 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index]; | 509 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index]; |
472 ics->tns_max_bands = tns_max_bands_1024[ac->m4ac.sampling_index]; | 510 ics->tns_max_bands = tns_max_bands_1024[ac->m4ac.sampling_index]; |
473 if (get_bits1(gb)) { | 511 ics->predictor_present = get_bits1(gb); |
512 ics->predictor_reset_group = 0; | |
513 if (ics->predictor_present) { | |
514 if (ac->m4ac.object_type == AOT_AAC_MAIN) { | |
515 if (decode_prediction(ac, ics, gb)) { | |
516 memset(ics, 0, sizeof(IndividualChannelStream)); | |
517 return -1; | |
518 } | |
519 } else if (ac->m4ac.object_type == AOT_AAC_LC) { | |
520 av_log(ac->avccontext, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n"); | |
521 memset(ics, 0, sizeof(IndividualChannelStream)); | |
522 return -1; | |
523 } else { | |
474 av_log_missing_feature(ac->avccontext, "Predictor bit set but LTP is", 1); | 524 av_log_missing_feature(ac->avccontext, "Predictor bit set but LTP is", 1); |
475 memset(ics, 0, sizeof(IndividualChannelStream)); | 525 memset(ics, 0, sizeof(IndividualChannelStream)); |
476 return -1; | 526 return -1; |
527 } | |
477 } | 528 } |
478 } | 529 } |
479 | 530 |
480 if(ics->max_sfb > ics->num_swb) { | 531 if(ics->max_sfb > ics->num_swb) { |
481 av_log(ac->avccontext, AV_LOG_ERROR, | 532 av_log(ac->avccontext, AV_LOG_ERROR, |
784 } | 835 } |
785 } | 836 } |
786 return 0; | 837 return 0; |
787 } | 838 } |
788 | 839 |
840 static av_always_inline float flt16_round(float pf) { | |
841 int exp; | |
842 pf = frexpf(pf, &exp); | |
843 pf = ldexpf(roundf(ldexpf(pf, 8)), exp-8); | |
844 return pf; | |
845 } | |
846 | |
847 static av_always_inline float flt16_even(float pf) { | |
848 int exp; | |
849 pf = frexpf(pf, &exp); | |
850 pf = ldexpf(rintf(ldexpf(pf, 8)), exp-8); | |
851 return pf; | |
852 } | |
853 | |
854 static av_always_inline float flt16_trunc(float pf) { | |
855 int exp; | |
856 pf = frexpf(pf, &exp); | |
857 pf = ldexpf(truncf(ldexpf(pf, 8)), exp-8); | |
858 return pf; | |
859 } | |
860 | |
861 static void predict(AACContext * ac, PredictorState * ps, float* coef, int output_enable) { | |
862 const float a = 0.953125; // 61.0/64 | |
863 const float alpha = 0.90625; // 29.0/32 | |
864 float e0, e1; | |
865 float pv; | |
866 float k1, k2; | |
867 | |
868 k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0; | |
869 k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0; | |
870 | |
871 pv = flt16_round(k1 * ps->r0 + k2 * ps->r1); | |
872 if (output_enable) | |
873 *coef += pv * ac->sf_scale; | |
874 | |
875 e0 = *coef / ac->sf_scale; | |
876 e1 = e0 - k1 * ps->r0; | |
877 | |
878 ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1); | |
879 ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1)); | |
880 ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0); | |
881 ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0)); | |
882 | |
883 ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0)); | |
884 ps->r0 = flt16_trunc(a * e0); | |
885 } | |
886 | |
887 /** | |
888 * Apply AAC-Main style frequency domain prediction. | |
889 */ | |
890 static void apply_prediction(AACContext * ac, SingleChannelElement * sce) { | |
891 int sfb, k; | |
892 | |
893 if (!sce->ics.predictor_initialized) { | |
894 reset_all_predictors(sce->ics.predictor_state); | |
895 sce->ics.predictor_initialized = 1; | |
896 } | |
897 | |
898 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) { | |
899 for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) { | |
900 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) { | |
901 predict(ac, &sce->ics.predictor_state[k], &sce->coeffs[k], | |
902 sce->ics.predictor_present && sce->ics.prediction_used[sfb]); | |
903 } | |
904 } | |
905 if (sce->ics.predictor_reset_group) | |
906 reset_predictor_group(sce->ics.predictor_state, sce->ics.predictor_reset_group); | |
907 } else | |
908 reset_all_predictors(sce->ics.predictor_state); | |
909 } | |
910 | |
789 /** | 911 /** |
790 * Decode an individual_channel_stream payload; reference: table 4.44. | 912 * Decode an individual_channel_stream payload; reference: table 4.44. |
791 * | 913 * |
792 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information. | 914 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information. |
793 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.) | 915 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.) |
838 } | 960 } |
839 } | 961 } |
840 | 962 |
841 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0) | 963 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0) |
842 return -1; | 964 return -1; |
965 | |
966 if(ac->m4ac.object_type == AOT_AAC_MAIN) | |
967 apply_prediction(ac, sce); | |
968 | |
843 return 0; | 969 return 0; |
844 } | 970 } |
845 | 971 |
846 /** | 972 /** |
847 * Mid/Side stereo decoding; reference: 4.6.8.1.3. | 973 * Mid/Side stereo decoding; reference: 4.6.8.1.3. |