Mercurial > libavcodec.hg
comparison dca.c @ 11617:bb17732c00ef libavcodec
DCA: break out lfe_interpolation_fir() inner loops to a function
This enables SIMD optimisations of this function.
author | mru |
---|---|
date | Mon, 12 Apr 2010 20:45:25 +0000 |
parents | a207cc043de8 |
children | 1492bdc1d9d0 |
comparison
equal
deleted
inserted
replaced
11616:1461e6044153 | 11617:bb17732c00ef |
---|---|
39 #include "put_bits.h" | 39 #include "put_bits.h" |
40 #include "dcadata.h" | 40 #include "dcadata.h" |
41 #include "dcahuff.h" | 41 #include "dcahuff.h" |
42 #include "dca.h" | 42 #include "dca.h" |
43 #include "synth_filter.h" | 43 #include "synth_filter.h" |
44 #include "dcadsp.h" | |
44 | 45 |
45 //#define TRACE | 46 //#define TRACE |
46 | 47 |
47 #define DCA_PRIM_CHANNELS_MAX (5) | 48 #define DCA_PRIM_CHANNELS_MAX (5) |
48 #define DCA_SUBBANDS (32) | 49 #define DCA_SUBBANDS (32) |
254 | 255 |
255 int debug_flag; ///< used for suppressing repeated error messages output | 256 int debug_flag; ///< used for suppressing repeated error messages output |
256 DSPContext dsp; | 257 DSPContext dsp; |
257 FFTContext imdct; | 258 FFTContext imdct; |
258 SynthFilterContext synth; | 259 SynthFilterContext synth; |
260 DCADSPContext dcadsp; | |
259 } DCAContext; | 261 } DCAContext; |
260 | 262 |
261 static const uint16_t dca_vlc_offs[] = { | 263 static const uint16_t dca_vlc_offs[] = { |
262 0, 512, 640, 768, 1282, 1794, 2436, 3080, 3770, 4454, 5364, | 264 0, 512, 640, 768, 1282, 1794, 2436, 3080, 3770, 4454, 5364, |
263 5372, 5380, 5388, 5392, 5396, 5412, 5420, 5428, 5460, 5492, 5508, | 265 5372, 5380, 5388, 5392, 5396, 5412, 5420, 5428, 5460, 5492, 5508, |
786 samples_out+= 32; | 788 samples_out+= 32; |
787 | 789 |
788 } | 790 } |
789 } | 791 } |
790 | 792 |
791 static void lfe_interpolation_fir(int decimation_select, | 793 static void lfe_interpolation_fir(DCAContext *s, int decimation_select, |
792 int num_deci_sample, float *samples_in, | 794 int num_deci_sample, float *samples_in, |
793 float *samples_out, float scale, | 795 float *samples_out, float scale, |
794 float bias) | 796 float bias) |
795 { | 797 { |
796 /* samples_in: An array holding decimated samples. | 798 /* samples_in: An array holding decimated samples. |
799 * from last subframe as history. | 801 * from last subframe as history. |
800 * | 802 * |
801 * samples_out: An array holding interpolated samples | 803 * samples_out: An array holding interpolated samples |
802 */ | 804 */ |
803 | 805 |
804 int decifactor, k, j; | 806 int decifactor; |
805 const float *prCoeff; | 807 const float *prCoeff; |
806 int deciindex; | 808 int deciindex; |
807 | 809 |
808 /* Select decimation filter */ | 810 /* Select decimation filter */ |
809 if (decimation_select == 1) { | 811 if (decimation_select == 1) { |
813 decifactor = 32; | 815 decifactor = 32; |
814 prCoeff = lfe_fir_64; | 816 prCoeff = lfe_fir_64; |
815 } | 817 } |
816 /* Interpolation */ | 818 /* Interpolation */ |
817 for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { | 819 for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { |
818 float *samples_out2 = samples_out + decifactor; | 820 s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor, |
819 const float *cf0 = prCoeff; | 821 scale, bias); |
820 const float *cf1 = prCoeff + 256; | |
821 | |
822 /* One decimated sample generates 2*decifactor interpolated ones */ | |
823 for (k = 0; k < decifactor; k++) { | |
824 float v0 = 0.0; | |
825 float v1 = 0.0; | |
826 for (j = 0; j < 256 / decifactor; j++) { | |
827 float s = samples_in[-j]; | |
828 v0 += s * *cf0++; | |
829 v1 += s * *--cf1; | |
830 } | |
831 *samples_out++ = (v0 * scale) + bias; | |
832 *samples_out2++ = (v1 * scale) + bias; | |
833 } | |
834 | |
835 samples_in++; | 822 samples_in++; |
836 samples_out += decifactor; | 823 samples_out += 2 * decifactor; |
837 } | 824 } |
838 } | 825 } |
839 | 826 |
840 /* downmixing routines */ | 827 /* downmixing routines */ |
841 #define MIX_REAR1(samples, si1, rs, coef) \ | 828 #define MIX_REAR1(samples, si1, rs, coef) \ |
1081 | 1068 |
1082 /* Generate LFE samples for this subsubframe FIXME!!! */ | 1069 /* Generate LFE samples for this subsubframe FIXME!!! */ |
1083 if (s->output & DCA_LFE) { | 1070 if (s->output & DCA_LFE) { |
1084 int lfe_samples = 2 * s->lfe * s->subsubframes; | 1071 int lfe_samples = 2 * s->lfe * s->subsubframes; |
1085 | 1072 |
1086 lfe_interpolation_fir(s->lfe, 2 * s->lfe, | 1073 lfe_interpolation_fir(s, s->lfe, 2 * s->lfe, |
1087 s->lfe_data + lfe_samples + | 1074 s->lfe_data + lfe_samples + |
1088 2 * s->lfe * subsubframe, | 1075 2 * s->lfe * subsubframe, |
1089 &s->samples[256 * dca_lfe_index[s->amode]], | 1076 &s->samples[256 * dca_lfe_index[s->amode]], |
1090 (1.0/256.0)*s->scale_bias, s->add_bias); | 1077 (1.0/256.0)*s->scale_bias, s->add_bias); |
1091 /* Outputs 20bits pcm samples */ | 1078 /* Outputs 20bits pcm samples */ |
1311 dca_init_vlcs(); | 1298 dca_init_vlcs(); |
1312 | 1299 |
1313 dsputil_init(&s->dsp, avctx); | 1300 dsputil_init(&s->dsp, avctx); |
1314 ff_mdct_init(&s->imdct, 6, 1, 1.0); | 1301 ff_mdct_init(&s->imdct, 6, 1, 1.0); |
1315 ff_synth_filter_init(&s->synth); | 1302 ff_synth_filter_init(&s->synth); |
1303 ff_dcadsp_init(&s->dcadsp); | |
1316 | 1304 |
1317 for(i = 0; i < 6; i++) | 1305 for(i = 0; i < 6; i++) |
1318 s->samples_chanptr[i] = s->samples + i * 256; | 1306 s->samples_chanptr[i] = s->samples + i * 256; |
1319 avctx->sample_fmt = SAMPLE_FMT_S16; | 1307 avctx->sample_fmt = SAMPLE_FMT_S16; |
1320 | 1308 |