comparison aac.c @ 8199:8a8d40fe77d9 libavcodec

AAC: Frequency domain prediction and hence Main profile support Patch by Alex Converse ( alex converse gmail com )
author superdump
date Mon, 24 Nov 2008 00:13:50 +0000
parents 7baa25290231
children 843d82925adf
comparison
equal deleted inserted replaced
8198:de344498875e 8199:8a8d40fe77d9
39 * N (code in SoC repo) filterbank - Scalable Sample Rate 39 * N (code in SoC repo) filterbank - Scalable Sample Rate
40 * Y Temporal Noise Shaping 40 * Y Temporal Noise Shaping
41 * N (code in SoC repo) Long Term Prediction 41 * N (code in SoC repo) Long Term Prediction
42 * Y intensity stereo 42 * Y intensity stereo
43 * Y channel coupling 43 * Y channel coupling
44 * N frequency domain prediction 44 * Y frequency domain prediction
45 * Y Perceptual Noise Substitution 45 * Y Perceptual Noise Substitution
46 * Y Mid/Side stereo 46 * Y Mid/Side stereo
47 * N Scalable Inverse AAC Quantization 47 * N Scalable Inverse AAC Quantization
48 * N Frequency Selective Switch 48 * N Frequency Selective Switch
49 * N upsampling filter 49 * N upsampling filter
329 } 329 }
330 330
331 skip_bits_long(&gb, i); 331 skip_bits_long(&gb, i);
332 332
333 switch (ac->m4ac.object_type) { 333 switch (ac->m4ac.object_type) {
334 case AOT_AAC_MAIN:
334 case AOT_AAC_LC: 335 case AOT_AAC_LC:
335 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config)) 336 if (decode_ga_specific_config(ac, &gb, ac->m4ac.chan_config))
336 return -1; 337 return -1;
337 break; 338 break;
338 default: 339 default:
350 * 351 *
351 * @return Returns a 32-bit pseudorandom integer 352 * @return Returns a 32-bit pseudorandom integer
352 */ 353 */
353 static av_always_inline int lcg_random(int previous_val) { 354 static av_always_inline int lcg_random(int previous_val) {
354 return previous_val * 1664525 + 1013904223; 355 return previous_val * 1664525 + 1013904223;
356 }
357
358 static void reset_predict_state(PredictorState * ps) {
359 ps->r0 = 0.0f;
360 ps->r1 = 0.0f;
361 ps->cor0 = 0.0f;
362 ps->cor1 = 0.0f;
363 ps->var0 = 1.0f;
364 ps->var1 = 1.0f;
365 }
366
367 static void reset_all_predictors(PredictorState * ps) {
368 int i;
369 for (i = 0; i < MAX_PREDICTORS; i++)
370 reset_predict_state(&ps[i]);
371 }
372
373 static void reset_predictor_group(PredictorState * ps, int group_num) {
374 int i;
375 for (i = group_num-1; i < MAX_PREDICTORS; i+=30)
376 reset_predict_state(&ps[i]);
355 } 377 }
356 378
357 static av_cold int aac_decode_init(AVCodecContext * avccontext) { 379 static av_cold int aac_decode_init(AVCodecContext * avccontext) {
358 AACContext * ac = avccontext->priv_data; 380 AACContext * ac = avccontext->priv_data;
359 int i; 381 int i;
428 if (count == 255) 450 if (count == 255)
429 count += get_bits(gb, 8); 451 count += get_bits(gb, 8);
430 if (byte_align) 452 if (byte_align)
431 align_get_bits(gb); 453 align_get_bits(gb);
432 skip_bits_long(gb, 8 * count); 454 skip_bits_long(gb, 8 * count);
455 }
456
457 static int decode_prediction(AACContext * ac, IndividualChannelStream * ics, GetBitContext * gb) {
458 int sfb;
459 if (get_bits1(gb)) {
460 ics->predictor_reset_group = get_bits(gb, 5);
461 if (ics->predictor_reset_group == 0 || ics->predictor_reset_group > 30) {
462 av_log(ac->avccontext, AV_LOG_ERROR, "Invalid Predictor Reset Group.\n");
463 return -1;
464 }
465 }
466 for (sfb = 0; sfb < FFMIN(ics->max_sfb, ff_aac_pred_sfb_max[ac->m4ac.sampling_index]); sfb++) {
467 ics->prediction_used[sfb] = get_bits1(gb);
468 }
469 return 0;
433 } 470 }
434 471
435 /** 472 /**
436 * Decode Individual Channel Stream info; reference: table 4.6. 473 * Decode Individual Channel Stream info; reference: table 4.6.
437 * 474 *
462 } 499 }
463 ics->num_windows = 8; 500 ics->num_windows = 8;
464 ics->swb_offset = swb_offset_128[ac->m4ac.sampling_index]; 501 ics->swb_offset = swb_offset_128[ac->m4ac.sampling_index];
465 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index]; 502 ics->num_swb = ff_aac_num_swb_128[ac->m4ac.sampling_index];
466 ics->tns_max_bands = tns_max_bands_128[ac->m4ac.sampling_index]; 503 ics->tns_max_bands = tns_max_bands_128[ac->m4ac.sampling_index];
504 ics->predictor_present = 0;
467 } else { 505 } else {
468 ics->max_sfb = get_bits(gb, 6); 506 ics->max_sfb = get_bits(gb, 6);
469 ics->num_windows = 1; 507 ics->num_windows = 1;
470 ics->swb_offset = swb_offset_1024[ac->m4ac.sampling_index]; 508 ics->swb_offset = swb_offset_1024[ac->m4ac.sampling_index];
471 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index]; 509 ics->num_swb = ff_aac_num_swb_1024[ac->m4ac.sampling_index];
472 ics->tns_max_bands = tns_max_bands_1024[ac->m4ac.sampling_index]; 510 ics->tns_max_bands = tns_max_bands_1024[ac->m4ac.sampling_index];
473 if (get_bits1(gb)) { 511 ics->predictor_present = get_bits1(gb);
512 ics->predictor_reset_group = 0;
513 if (ics->predictor_present) {
514 if (ac->m4ac.object_type == AOT_AAC_MAIN) {
515 if (decode_prediction(ac, ics, gb)) {
516 memset(ics, 0, sizeof(IndividualChannelStream));
517 return -1;
518 }
519 } else if (ac->m4ac.object_type == AOT_AAC_LC) {
520 av_log(ac->avccontext, AV_LOG_ERROR, "Prediction is not allowed in AAC-LC.\n");
521 memset(ics, 0, sizeof(IndividualChannelStream));
522 return -1;
523 } else {
474 av_log_missing_feature(ac->avccontext, "Predictor bit set but LTP is", 1); 524 av_log_missing_feature(ac->avccontext, "Predictor bit set but LTP is", 1);
475 memset(ics, 0, sizeof(IndividualChannelStream)); 525 memset(ics, 0, sizeof(IndividualChannelStream));
476 return -1; 526 return -1;
527 }
477 } 528 }
478 } 529 }
479 530
480 if(ics->max_sfb > ics->num_swb) { 531 if(ics->max_sfb > ics->num_swb) {
481 av_log(ac->avccontext, AV_LOG_ERROR, 532 av_log(ac->avccontext, AV_LOG_ERROR,
784 } 835 }
785 } 836 }
786 return 0; 837 return 0;
787 } 838 }
788 839
840 static av_always_inline float flt16_round(float pf) {
841 int exp;
842 pf = frexpf(pf, &exp);
843 pf = ldexpf(roundf(ldexpf(pf, 8)), exp-8);
844 return pf;
845 }
846
847 static av_always_inline float flt16_even(float pf) {
848 int exp;
849 pf = frexpf(pf, &exp);
850 pf = ldexpf(rintf(ldexpf(pf, 8)), exp-8);
851 return pf;
852 }
853
854 static av_always_inline float flt16_trunc(float pf) {
855 int exp;
856 pf = frexpf(pf, &exp);
857 pf = ldexpf(truncf(ldexpf(pf, 8)), exp-8);
858 return pf;
859 }
860
861 static void predict(AACContext * ac, PredictorState * ps, float* coef, int output_enable) {
862 const float a = 0.953125; // 61.0/64
863 const float alpha = 0.90625; // 29.0/32
864 float e0, e1;
865 float pv;
866 float k1, k2;
867
868 k1 = ps->var0 > 1 ? ps->cor0 * flt16_even(a / ps->var0) : 0;
869 k2 = ps->var1 > 1 ? ps->cor1 * flt16_even(a / ps->var1) : 0;
870
871 pv = flt16_round(k1 * ps->r0 + k2 * ps->r1);
872 if (output_enable)
873 *coef += pv * ac->sf_scale;
874
875 e0 = *coef / ac->sf_scale;
876 e1 = e0 - k1 * ps->r0;
877
878 ps->cor1 = flt16_trunc(alpha * ps->cor1 + ps->r1 * e1);
879 ps->var1 = flt16_trunc(alpha * ps->var1 + 0.5 * (ps->r1 * ps->r1 + e1 * e1));
880 ps->cor0 = flt16_trunc(alpha * ps->cor0 + ps->r0 * e0);
881 ps->var0 = flt16_trunc(alpha * ps->var0 + 0.5 * (ps->r0 * ps->r0 + e0 * e0));
882
883 ps->r1 = flt16_trunc(a * (ps->r0 - k1 * e0));
884 ps->r0 = flt16_trunc(a * e0);
885 }
886
887 /**
888 * Apply AAC-Main style frequency domain prediction.
889 */
890 static void apply_prediction(AACContext * ac, SingleChannelElement * sce) {
891 int sfb, k;
892
893 if (!sce->ics.predictor_initialized) {
894 reset_all_predictors(sce->ics.predictor_state);
895 sce->ics.predictor_initialized = 1;
896 }
897
898 if (sce->ics.window_sequence[0] != EIGHT_SHORT_SEQUENCE) {
899 for (sfb = 0; sfb < ff_aac_pred_sfb_max[ac->m4ac.sampling_index]; sfb++) {
900 for (k = sce->ics.swb_offset[sfb]; k < sce->ics.swb_offset[sfb + 1]; k++) {
901 predict(ac, &sce->ics.predictor_state[k], &sce->coeffs[k],
902 sce->ics.predictor_present && sce->ics.prediction_used[sfb]);
903 }
904 }
905 if (sce->ics.predictor_reset_group)
906 reset_predictor_group(sce->ics.predictor_state, sce->ics.predictor_reset_group);
907 } else
908 reset_all_predictors(sce->ics.predictor_state);
909 }
910
789 /** 911 /**
790 * Decode an individual_channel_stream payload; reference: table 4.44. 912 * Decode an individual_channel_stream payload; reference: table 4.44.
791 * 913 *
792 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information. 914 * @param common_window Channels have independent [0], or shared [1], Individual Channel Stream information.
793 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.) 915 * @param scale_flag scalable [1] or non-scalable [0] AAC (Unused until scalable AAC is implemented.)
838 } 960 }
839 } 961 }
840 962
841 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0) 963 if (decode_spectrum_and_dequant(ac, out, gb, sce->sf, pulse_present, &pulse, ics, sce->band_type) < 0)
842 return -1; 964 return -1;
965
966 if(ac->m4ac.object_type == AOT_AAC_MAIN)
967 apply_prediction(ac, sce);
968
843 return 0; 969 return 0;
844 } 970 }
845 971
846 /** 972 /**
847 * Mid/Side stereo decoding; reference: 4.6.8.1.3. 973 * Mid/Side stereo decoding; reference: 4.6.8.1.3.