changeset 11559:444f4b594fdb libavcodec

Add spectral extension to the E-AC-3 decoder. Original patch by Justin, updated and resubmitted by Christophe Gisquet, christophe D gisquet A gmail
author cehoyos
date Tue, 30 Mar 2010 22:09:14 +0000
parents 44c5c540722c
children 8a4984c5cacc
files ac3dec.c ac3dec.h ac3dec_data.c ac3dec_data.h avcodec.h eac3dec.c eac3dec_data.c eac3dec_data.h
diffstat 8 files changed, 299 insertions(+), 21 deletions(-) [+]
line wrap: on
line diff
--- a/ac3dec.c	Tue Mar 30 21:05:11 2010 +0000
+++ b/ac3dec.c	Tue Mar 30 22:09:14 2010 +0000
@@ -815,14 +815,105 @@
 
     /* spectral extension strategy */
     if (s->eac3 && (!blk || get_bits1(gbc))) {
-        if (get_bits1(gbc)) {
-            av_log_missing_feature(s->avctx, "Spectral extension", 1);
-            return -1;
+        s->spx_in_use = get_bits1(gbc);
+        if (s->spx_in_use) {
+            int dst_start_freq, dst_end_freq, src_start_freq,
+                start_subband, end_subband;
+
+            /* determine which channels use spx */
+            if (s->channel_mode == AC3_CHMODE_MONO) {
+                s->channel_uses_spx[1] = 1;
+            } else {
+                for (ch = 1; ch <= fbw_channels; ch++)
+                    s->channel_uses_spx[ch] = get_bits1(gbc);
+            }
+
+            /* get the frequency bins of the spx copy region and the spx start
+               and end subbands */
+            dst_start_freq = get_bits(gbc, 2);
+            start_subband  = get_bits(gbc, 3) + 2;
+            if (start_subband > 7)
+                start_subband += start_subband - 7;
+            end_subband    = get_bits(gbc, 3) + 5;
+            if (end_subband   > 7)
+                end_subband   += end_subband   - 7;
+            dst_start_freq = dst_start_freq * 12 + 25;
+            src_start_freq = start_subband  * 12 + 25;
+            dst_end_freq   = end_subband    * 12 + 25;
+
+            /* check validity of spx ranges */
+            if (start_subband >= end_subband) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension "
+                       "range (%d >= %d)\n", start_subband, end_subband);
+                return -1;
+            }
+            if (dst_start_freq >= src_start_freq) {
+                av_log(s->avctx, AV_LOG_ERROR, "invalid spectral extension "
+                       "copy start bin (%d >= %d)\n", dst_start_freq, src_start_freq);
+                return -1;
+            }
+
+            s->spx_dst_start_freq = dst_start_freq;
+            s->spx_src_start_freq = src_start_freq;
+            s->spx_dst_end_freq   = dst_end_freq;
+
+            decode_band_structure(gbc, blk, s->eac3, 0,
+                                  start_subband, end_subband,
+                                  ff_eac3_default_spx_band_struct,
+                                  &s->num_spx_bands,
+                                  s->spx_band_sizes);
+        } else {
+            for (ch = 1; ch <= fbw_channels; ch++) {
+                s->channel_uses_spx[ch] = 0;
+                s->first_spx_coords[ch] = 1;
+            }
         }
-        /* TODO: parse spectral extension strategy info */
     }
 
-    /* TODO: spectral extension coordinates */
+    /* spectral extension coordinates */
+    if (s->spx_in_use) {
+        for (ch = 1; ch <= fbw_channels; ch++) {
+            if (s->channel_uses_spx[ch]) {
+                if (s->first_spx_coords[ch] || get_bits1(gbc)) {
+                    float spx_blend;
+                    int bin, master_spx_coord;
+
+                    s->first_spx_coords[ch] = 0;
+                    spx_blend = get_bits(gbc, 5) * (1.0f/32);
+                    master_spx_coord = get_bits(gbc, 2) * 3;
+
+                    bin = s->spx_src_start_freq;
+                    for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
+                        int bandsize;
+                        int spx_coord_exp, spx_coord_mant;
+                        float nratio, sblend, nblend, spx_coord;
+
+                        /* calculate blending factors */
+                        bandsize = s->spx_band_sizes[bnd];
+                        nratio = ((float)((bin + (bandsize >> 1))) / s->spx_dst_end_freq) - spx_blend;
+                        nratio = av_clipf(nratio, 0.0f, 1.0f);
+                        nblend = sqrtf(3.0f * nratio); // noise is scaled by sqrt(3) to give unity variance
+                        sblend = sqrtf(1.0f - nratio);
+                        bin += bandsize;
+
+                        /* decode spx coordinates */
+                        spx_coord_exp  = get_bits(gbc, 4);
+                        spx_coord_mant = get_bits(gbc, 2);
+                        if (spx_coord_exp == 15) spx_coord_mant <<= 1;
+                        else                     spx_coord_mant += 4;
+                        spx_coord_mant <<= (25 - spx_coord_exp - master_spx_coord);
+                        spx_coord = spx_coord_mant * (1.0f/(1<<23));
+
+                        /* multiply noise and signal blending factors by spx coordinate */
+                        s->spx_noise_blend [ch][bnd] = nblend * spx_coord;
+                        s->spx_signal_blend[ch][bnd] = sblend * spx_coord;
+                    }
+                }
+            } else {
+                s->first_spx_coords[ch] = 1;
+            }
+        }
+    }
 
     /* coupling strategy */
     if (s->eac3 ? s->cpl_strategy_exists[blk] : get_bits1(gbc)) {
@@ -859,9 +950,9 @@
                 s->phase_flags_in_use = get_bits1(gbc);
 
             /* coupling frequency range */
-            /* TODO: modify coupling end freq if spectral extension is used */
             cpl_start_subband = get_bits(gbc, 4);
-            cpl_end_subband   = get_bits(gbc, 4) + 3;
+            cpl_end_subband = s->spx_in_use ? (s->spx_src_start_freq - 37) / 12 :
+                                              get_bits(gbc, 4) + 3;
             if (cpl_start_subband >= cpl_end_subband) {
                 av_log(s->avctx, AV_LOG_ERROR, "invalid coupling range (%d >= %d)\n",
                        cpl_start_subband, cpl_end_subband);
@@ -934,8 +1025,11 @@
     if (channel_mode == AC3_CHMODE_STEREO) {
         if ((s->eac3 && !blk) || get_bits1(gbc)) {
             s->num_rematrixing_bands = 4;
-            if(cpl_in_use && s->start_freq[CPL_CH] <= 61)
+            if (cpl_in_use && s->start_freq[CPL_CH] <= 61) {
                 s->num_rematrixing_bands -= 1 + (s->start_freq[CPL_CH] == 37);
+            } else if (s->spx_in_use && s->spx_src_start_freq <= 61) {
+                s->num_rematrixing_bands--;
+            }
             for(bnd=0; bnd<s->num_rematrixing_bands; bnd++)
                 s->rematrixing_flags[bnd] = get_bits1(gbc);
         } else if (!blk) {
@@ -960,6 +1054,8 @@
             int prev = s->end_freq[ch];
             if (s->channel_in_cpl[ch])
                 s->end_freq[ch] = s->start_freq[CPL_CH];
+            else if (s->channel_uses_spx[ch])
+                s->end_freq[ch] = s->spx_src_start_freq;
             else {
                 int bandwidth_code = get_bits(gbc, 6);
                 if (bandwidth_code > 60) {
@@ -1156,8 +1252,6 @@
 
     /* TODO: generate enhanced coupling coordinates and uncouple */
 
-    /* TODO: apply spectral extension */
-
     /* recover coefficients if rematrixing is in use */
     if(s->channel_mode == AC3_CHMODE_STEREO)
         do_rematrixing(s);
@@ -1173,6 +1267,11 @@
         s->dsp.int32_to_float_fmul_scalar(s->transform_coeffs[ch], s->fixed_coeffs[ch], gain, 256);
     }
 
+    /* apply spectral extension to high frequency bins */
+    if (s->spx_in_use) {
+        ff_eac3_apply_spectral_extension(s);
+    }
+
     /* downmix and MDCT. order depends on whether block switching is used for
        any channel in this block. this is because coefficients for the long
        and short transforms cannot be mixed. */
--- a/ac3dec.h	Tue Mar 30 21:05:11 2010 +0000
+++ b/ac3dec.h	Tue Mar 30 22:09:14 2010 +0000
@@ -22,6 +22,29 @@
 /**
  * @file libavcodec/ac3.h
  * Common code between the AC-3 and E-AC-3 decoders.
+ *
+ * Summary of MDCT Coefficient Grouping:
+ * The individual MDCT coefficient indices are often referred to in the
+ * (E-)AC-3 specification as frequency bins.  These bins are grouped together
+ * into subbands of 12 coefficients each.  The subbands are grouped together
+ * into bands as defined in the bitstream by the band structures, which
+ * determine the number of bands and the size of each band.  The full spectrum
+ * of 256 frequency bins is divided into 1 DC bin + 21 subbands = 253 bins.
+ * This system of grouping coefficients is used for channel bandwidth, stereo
+ * rematrixing, channel coupling, enhanced coupling, and spectral extension.
+ *
+ * +-+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+-+
+ * |1|  |12|  |  [12|12|12|12]  |  |  |  |  |  |  |  |  |  |  |  |  |3|
+ * +-+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+-+
+ * ~~~  ~~~~     ~~~~~~~~~~~~~                                      ~~~
+ *  |     |            |                                             |
+ *  |     |            |                    3 unused frequency bins--+
+ *  |     |            |
+ *  |     |            +--1 band containing 4 subbands
+ *  |     |
+ *  |     +--1 subband of 12 frequency bins
+ *  |
+ *  +--DC frequency bin
  */
 
 #ifndef AVCODEC_AC3DEC_H
@@ -43,6 +66,7 @@
 #define AC3_MAX_COEFS   256
 #define AC3_BLOCK_SIZE  256
 #define MAX_BLOCKS        6
+#define SPX_MAX_BANDS    17
 
 typedef struct {
     AVCodecContext *avctx;                  ///< parent context
@@ -89,6 +113,22 @@
     int cpl_coords[AC3_MAX_CHANNELS][18];   ///< coupling coordinates                   (cplco)
 ///@}
 
+///@defgroup spx spectral extension
+///@{
+    int spx_in_use;                             ///< spectral extension in use              (spxinu)
+    uint8_t channel_uses_spx[AC3_MAX_CHANNELS]; ///< channel uses spectral extension        (chinspx)
+    int8_t spx_atten_code[AC3_MAX_CHANNELS];    ///< spx attenuation code                   (spxattencod)
+    int spx_src_start_freq;                     ///< spx start frequency bin
+    int spx_dst_end_freq;                       ///< spx end frequency bin
+    int spx_dst_start_freq;                     ///< spx starting frequency bin for copying (copystartmant)
+                                                ///< the copy region ends at the start of the spx region.
+    int num_spx_bands;                          ///< number of spx bands                    (nspxbnds)
+    uint8_t spx_band_sizes[SPX_MAX_BANDS];      ///< number of bins in each spx band
+    uint8_t first_spx_coords[AC3_MAX_CHANNELS]; ///< first spx coordinates states           (firstspxcos)
+    float spx_noise_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS]; ///< spx noise blending factor  (nblendfact)
+    float spx_signal_blend[AC3_MAX_CHANNELS][SPX_MAX_BANDS];///< spx signal blending factor (sblendfact)
+///@}
+
 ///@defgroup aht adaptive hybrid transform
     int channel_uses_aht[AC3_MAX_CHANNELS];                         ///< channel AHT in use (chahtinu)
     int pre_mantissa[AC3_MAX_CHANNELS][AC3_MAX_COEFS][MAX_BLOCKS];  ///< pre-IDCT mantissas
@@ -182,4 +222,11 @@
 void ff_ac3_downmix_c(float (*samples)[256], float (*matrix)[2],
                       int out_ch, int in_ch, int len);
 
+/**
+ * Apply spectral extension to each channel by copying lower frequency
+ * coefficients to higher frequency bins and applying side information to
+ * approximate the original high frequency signal.
+ */
+void ff_eac3_apply_spectral_extension(AC3DecodeContext *s);
+
 #endif /* AVCODEC_AC3DEC_H */
--- a/ac3dec_data.c	Tue Mar 30 21:05:11 2010 +0000
+++ b/ac3dec_data.c	Tue Mar 30 22:09:14 2010 +0000
@@ -64,3 +64,9 @@
  */
 const uint8_t ff_eac3_default_cpl_band_struct[18] =
 { 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1 };
+
+/**
+ * Table E2.15 Default Spectral Extension Banding Structure
+ */
+const uint8_t ff_eac3_default_spx_band_struct[17] =
+{ 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1 };
--- a/ac3dec_data.h	Tue Mar 30 21:05:11 2010 +0000
+++ b/ac3dec_data.h	Tue Mar 30 22:09:14 2010 +0000
@@ -29,5 +29,6 @@
 
 extern const uint8_t ff_eac3_hebap_tab[64];
 extern const uint8_t ff_eac3_default_cpl_band_struct[18];
+extern const uint8_t ff_eac3_default_spx_band_struct[17];
 
 #endif /* AVCODEC_AC3DEC_DATA_H */
--- a/avcodec.h	Tue Mar 30 21:05:11 2010 +0000
+++ b/avcodec.h	Tue Mar 30 22:09:14 2010 +0000
@@ -30,7 +30,7 @@
 #include "libavutil/avutil.h"
 
 #define LIBAVCODEC_VERSION_MAJOR 52
-#define LIBAVCODEC_VERSION_MINOR 62
+#define LIBAVCODEC_VERSION_MINOR 63
 #define LIBAVCODEC_VERSION_MICRO  0
 
 #define LIBAVCODEC_VERSION_INT  AV_VERSION_INT(LIBAVCODEC_VERSION_MAJOR, \
--- a/eac3dec.c	Tue Mar 30 21:05:11 2010 +0000
+++ b/eac3dec.c	Tue Mar 30 22:09:14 2010 +0000
@@ -23,10 +23,6 @@
 /*
  * There are several features of E-AC-3 that this decoder does not yet support.
  *
- * Spectral Extension
- *     There is a patch to get this working for the two samples we have that
- *     use it, but it needs some minor changes in order to be accepted.
- *
  * Enhanced Coupling
  *     No known samples exist.  If any ever surface, this feature should not be
  *     too difficult to implement.
@@ -67,6 +63,95 @@
 
 #define EAC3_SR_CODE_REDUCED  3
 
+void ff_eac3_apply_spectral_extension(AC3DecodeContext *s)
+{
+    int bin, bnd, ch, i;
+    uint8_t wrapflag[SPX_MAX_BANDS]={1,0,}, num_copy_sections, copy_sizes[SPX_MAX_BANDS];
+    float rms_energy[SPX_MAX_BANDS];
+
+    /* Set copy index mapping table. Set wrap flags to apply a notch filter at
+       wrap points later on. */
+    bin = s->spx_dst_start_freq;
+    num_copy_sections = 0;
+    for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
+        int copysize;
+        int bandsize = s->spx_band_sizes[bnd];
+        if (bin + bandsize > s->spx_src_start_freq) {
+            copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq;
+            bin = s->spx_dst_start_freq;
+            wrapflag[bnd] = 1;
+        }
+        for (i = 0; i < bandsize; i += copysize) {
+            if (bin == s->spx_src_start_freq) {
+                copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq;
+                bin = s->spx_dst_start_freq;
+            }
+            copysize = FFMIN(bandsize - i, s->spx_src_start_freq - bin);
+            bin += copysize;
+        }
+    }
+    copy_sizes[num_copy_sections++] = bin - s->spx_dst_start_freq;
+
+    for (ch = 1; ch <= s->fbw_channels; ch++) {
+        if (!s->channel_uses_spx[ch])
+            continue;
+
+        /* Copy coeffs from normal bands to extension bands */
+        bin = s->spx_src_start_freq;
+        for (i = 0; i < num_copy_sections; i++) {
+            memcpy(&s->transform_coeffs[ch][bin],
+                   &s->transform_coeffs[ch][s->spx_dst_start_freq],
+                   copy_sizes[i]*sizeof(float));
+            bin += copy_sizes[i];
+        }
+
+        /* Calculate RMS energy for each SPX band. */
+        bin = s->spx_src_start_freq;
+        for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
+            int bandsize = s->spx_band_sizes[bnd];
+            float accum = 0.0f;
+            for (i = 0; i < bandsize; i++) {
+                float coeff = s->transform_coeffs[ch][bin++];
+                accum += coeff * coeff;
+            }
+            rms_energy[bnd] = sqrtf(accum / bandsize);
+        }
+
+        /* Apply a notch filter at transitions between normal and extension
+           bands and at all wrap points. */
+        if (s->spx_atten_code[ch] >= 0) {
+            const float *atten_tab = ff_eac3_spx_atten_tab[s->spx_atten_code[ch]];
+            bin = s->spx_src_start_freq - 2;
+            for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
+                if (wrapflag[bnd]) {
+                    float *coeffs = &s->transform_coeffs[ch][bin];
+                    coeffs[0] *= atten_tab[0];
+                    coeffs[1] *= atten_tab[1];
+                    coeffs[2] *= atten_tab[2];
+                    coeffs[3] *= atten_tab[1];
+                    coeffs[4] *= atten_tab[0];
+                }
+                bin += s->spx_band_sizes[bnd];
+            }
+        }
+
+        /* Apply noise-blended coefficient scaling based on previously
+           calculated RMS energy, blending factors, and SPX coordinates for
+           each band. */
+        bin = s->spx_src_start_freq;
+        for (bnd = 0; bnd < s->num_spx_bands; bnd++) {
+            float nscale = s->spx_noise_blend[ch][bnd] * rms_energy[bnd] * (1.0f/(1<<31));
+            float sscale = s->spx_signal_blend[ch][bnd];
+            for (i = 0; i < s->spx_band_sizes[bnd]; i++) {
+                float noise  = nscale * (int32_t)av_lfg_get(&s->dith_state);
+                s->transform_coeffs[ch][bin]   *= sscale;
+                s->transform_coeffs[ch][bin++] += noise;
+            }
+        }
+    }
+}
+
+
 /** lrint(M_SQRT2*cos(2*M_PI/12)*(1<<23)) */
 #define COEFF_0 10273905LL
 
@@ -492,12 +577,11 @@
     }
 
     /* spectral extension attenuation data */
-    if (parse_spx_atten_data) {
-        av_log_missing_feature(s->avctx, "Spectral extension attenuation", 1);
-        for (ch = 1; ch <= s->fbw_channels; ch++) {
-            if (get_bits1(gbc)) { // channel has spx attenuation
-                skip_bits(gbc, 5); // skip spx attenuation code
-            }
+    for (ch = 1; ch <= s->fbw_channels; ch++) {
+        if (parse_spx_atten_data && get_bits1(gbc)) {
+            s->spx_atten_code[ch] = get_bits(gbc, 5);
+        } else {
+            s->spx_atten_code[ch] = -1;
         }
     }
 
@@ -514,6 +598,7 @@
 
     /* syntax state initialization */
     for (ch = 1; ch <= s->fbw_channels; ch++) {
+        s->first_spx_coords[ch] = 1;
         s->first_cpl_coords[ch] = 1;
     }
     s->first_cpl_leak = 1;
--- a/eac3dec_data.c	Tue Mar 30 21:05:11 2010 +0000
+++ b/eac3dec_data.c	Tue Mar 30 22:09:14 2010 +0000
@@ -1093,3 +1093,42 @@
 {    EXP_D45,    EXP_D45,    EXP_D45,    EXP_D45,    EXP_D25,  EXP_REUSE},
 {    EXP_D45,    EXP_D45,    EXP_D45,    EXP_D45,    EXP_D45,    EXP_D45},
 };
+
+/**
+ * Table E.25: Spectral Extension Attenuation Table
+ * ff_eac3_spx_atten_tab[code][bin]=pow(2.0,(bin+1)*(code+1)/-15.0);
+ */
+const float ff_eac3_spx_atten_tab[32][3] = {
+    { 0.954841603910416503f, 0.911722488558216804f, 0.870550563296124125f },
+    { 0.911722488558216804f, 0.831237896142787758f, 0.757858283255198995f },
+    { 0.870550563296124125f, 0.757858283255198995f, 0.659753955386447100f },
+    { 0.831237896142787758f, 0.690956439983888004f, 0.574349177498517438f },
+    { 0.793700525984099792f, 0.629960524947436595f, 0.500000000000000000f },
+    { 0.757858283255198995f, 0.574349177498517438f, 0.435275281648062062f },
+    { 0.723634618720189082f, 0.523647061410313364f, 0.378929141627599553f },
+    { 0.690956439983888004f, 0.477420801955208307f, 0.329876977693223550f },
+    { 0.659753955386447100f, 0.435275281648062062f, 0.287174588749258719f },
+    { 0.629960524947436595f, 0.396850262992049896f, 0.250000000000000000f },
+    { 0.601512518041058319f, 0.361817309360094541f, 0.217637640824031003f },
+    { 0.574349177498517438f, 0.329876977693223550f, 0.189464570813799776f },
+    { 0.548412489847312945f, 0.300756259020529160f, 0.164938488846611775f },
+    { 0.523647061410313364f, 0.274206244923656473f, 0.143587294374629387f },
+    { 0.500000000000000000f, 0.250000000000000000f, 0.125000000000000000f },
+    { 0.477420801955208307f, 0.227930622139554201f, 0.108818820412015502f },
+    { 0.455861244279108402f, 0.207809474035696939f, 0.094732285406899888f },
+    { 0.435275281648062062f, 0.189464570813799776f, 0.082469244423305887f },
+    { 0.415618948071393879f, 0.172739109995972029f, 0.071793647187314694f },
+    { 0.396850262992049896f, 0.157490131236859149f, 0.062500000000000000f },
+    { 0.378929141627599553f, 0.143587294374629387f, 0.054409410206007751f },
+    { 0.361817309360094541f, 0.130911765352578369f, 0.047366142703449930f },
+    { 0.345478219991944002f, 0.119355200488802049f, 0.041234622211652958f },
+    { 0.329876977693223550f, 0.108818820412015502f, 0.035896823593657347f },
+    { 0.314980262473718298f, 0.099212565748012460f, 0.031250000000000000f },
+    { 0.300756259020529160f, 0.090454327340023621f, 0.027204705103003875f },
+    { 0.287174588749258719f, 0.082469244423305887f, 0.023683071351724965f },
+    { 0.274206244923656473f, 0.075189064755132290f, 0.020617311105826479f },
+    { 0.261823530705156682f, 0.068551561230914118f, 0.017948411796828673f },
+    { 0.250000000000000000f, 0.062500000000000000f, 0.015625000000000000f },
+    { 0.238710400977604098f, 0.056982655534888536f, 0.013602352551501938f },
+    { 0.227930622139554201f, 0.051952368508924235f, 0.011841535675862483f }
+};
--- a/eac3dec_data.h	Tue Mar 30 21:05:11 2010 +0000
+++ b/eac3dec_data.h	Tue Mar 30 22:09:14 2010 +0000
@@ -31,5 +31,6 @@
 
 extern const int16_t (* const ff_eac3_mantissa_vq[8])[6];
 extern const uint8_t ff_eac3_frm_expstr[32][6];
+extern const float   ff_eac3_spx_atten_tab[32][3];
 
 #endif /* AVCODEC_EAC3DEC_DATA_H */