comparison dca.c @ 11617:bb17732c00ef libavcodec

DCA: break out lfe_interpolation_fir() inner loops to a function This enables SIMD optimisations of this function.
author mru
date Mon, 12 Apr 2010 20:45:25 +0000
parents a207cc043de8
children 1492bdc1d9d0
comparison
equal deleted inserted replaced
11616:1461e6044153 11617:bb17732c00ef
39 #include "put_bits.h" 39 #include "put_bits.h"
40 #include "dcadata.h" 40 #include "dcadata.h"
41 #include "dcahuff.h" 41 #include "dcahuff.h"
42 #include "dca.h" 42 #include "dca.h"
43 #include "synth_filter.h" 43 #include "synth_filter.h"
44 #include "dcadsp.h"
44 45
45 //#define TRACE 46 //#define TRACE
46 47
47 #define DCA_PRIM_CHANNELS_MAX (5) 48 #define DCA_PRIM_CHANNELS_MAX (5)
48 #define DCA_SUBBANDS (32) 49 #define DCA_SUBBANDS (32)
254 255
255 int debug_flag; ///< used for suppressing repeated error messages output 256 int debug_flag; ///< used for suppressing repeated error messages output
256 DSPContext dsp; 257 DSPContext dsp;
257 FFTContext imdct; 258 FFTContext imdct;
258 SynthFilterContext synth; 259 SynthFilterContext synth;
260 DCADSPContext dcadsp;
259 } DCAContext; 261 } DCAContext;
260 262
261 static const uint16_t dca_vlc_offs[] = { 263 static const uint16_t dca_vlc_offs[] = {
262 0, 512, 640, 768, 1282, 1794, 2436, 3080, 3770, 4454, 5364, 264 0, 512, 640, 768, 1282, 1794, 2436, 3080, 3770, 4454, 5364,
263 5372, 5380, 5388, 5392, 5396, 5412, 5420, 5428, 5460, 5492, 5508, 265 5372, 5380, 5388, 5392, 5396, 5412, 5420, 5428, 5460, 5492, 5508,
786 samples_out+= 32; 788 samples_out+= 32;
787 789
788 } 790 }
789 } 791 }
790 792
791 static void lfe_interpolation_fir(int decimation_select, 793 static void lfe_interpolation_fir(DCAContext *s, int decimation_select,
792 int num_deci_sample, float *samples_in, 794 int num_deci_sample, float *samples_in,
793 float *samples_out, float scale, 795 float *samples_out, float scale,
794 float bias) 796 float bias)
795 { 797 {
796 /* samples_in: An array holding decimated samples. 798 /* samples_in: An array holding decimated samples.
799 * from last subframe as history. 801 * from last subframe as history.
800 * 802 *
801 * samples_out: An array holding interpolated samples 803 * samples_out: An array holding interpolated samples
802 */ 804 */
803 805
804 int decifactor, k, j; 806 int decifactor;
805 const float *prCoeff; 807 const float *prCoeff;
806 int deciindex; 808 int deciindex;
807 809
808 /* Select decimation filter */ 810 /* Select decimation filter */
809 if (decimation_select == 1) { 811 if (decimation_select == 1) {
813 decifactor = 32; 815 decifactor = 32;
814 prCoeff = lfe_fir_64; 816 prCoeff = lfe_fir_64;
815 } 817 }
816 /* Interpolation */ 818 /* Interpolation */
817 for (deciindex = 0; deciindex < num_deci_sample; deciindex++) { 819 for (deciindex = 0; deciindex < num_deci_sample; deciindex++) {
818 float *samples_out2 = samples_out + decifactor; 820 s->dcadsp.lfe_fir(samples_out, samples_in, prCoeff, decifactor,
819 const float *cf0 = prCoeff; 821 scale, bias);
820 const float *cf1 = prCoeff + 256;
821
822 /* One decimated sample generates 2*decifactor interpolated ones */
823 for (k = 0; k < decifactor; k++) {
824 float v0 = 0.0;
825 float v1 = 0.0;
826 for (j = 0; j < 256 / decifactor; j++) {
827 float s = samples_in[-j];
828 v0 += s * *cf0++;
829 v1 += s * *--cf1;
830 }
831 *samples_out++ = (v0 * scale) + bias;
832 *samples_out2++ = (v1 * scale) + bias;
833 }
834
835 samples_in++; 822 samples_in++;
836 samples_out += decifactor; 823 samples_out += 2 * decifactor;
837 } 824 }
838 } 825 }
839 826
840 /* downmixing routines */ 827 /* downmixing routines */
841 #define MIX_REAR1(samples, si1, rs, coef) \ 828 #define MIX_REAR1(samples, si1, rs, coef) \
1081 1068
1082 /* Generate LFE samples for this subsubframe FIXME!!! */ 1069 /* Generate LFE samples for this subsubframe FIXME!!! */
1083 if (s->output & DCA_LFE) { 1070 if (s->output & DCA_LFE) {
1084 int lfe_samples = 2 * s->lfe * s->subsubframes; 1071 int lfe_samples = 2 * s->lfe * s->subsubframes;
1085 1072
1086 lfe_interpolation_fir(s->lfe, 2 * s->lfe, 1073 lfe_interpolation_fir(s, s->lfe, 2 * s->lfe,
1087 s->lfe_data + lfe_samples + 1074 s->lfe_data + lfe_samples +
1088 2 * s->lfe * subsubframe, 1075 2 * s->lfe * subsubframe,
1089 &s->samples[256 * dca_lfe_index[s->amode]], 1076 &s->samples[256 * dca_lfe_index[s->amode]],
1090 (1.0/256.0)*s->scale_bias, s->add_bias); 1077 (1.0/256.0)*s->scale_bias, s->add_bias);
1091 /* Outputs 20bits pcm samples */ 1078 /* Outputs 20bits pcm samples */
1311 dca_init_vlcs(); 1298 dca_init_vlcs();
1312 1299
1313 dsputil_init(&s->dsp, avctx); 1300 dsputil_init(&s->dsp, avctx);
1314 ff_mdct_init(&s->imdct, 6, 1, 1.0); 1301 ff_mdct_init(&s->imdct, 6, 1, 1.0);
1315 ff_synth_filter_init(&s->synth); 1302 ff_synth_filter_init(&s->synth);
1303 ff_dcadsp_init(&s->dcadsp);
1316 1304
1317 for(i = 0; i < 6; i++) 1305 for(i = 0; i < 6; i++)
1318 s->samples_chanptr[i] = s->samples + i * 256; 1306 s->samples_chanptr[i] = s->samples + i * 256;
1319 avctx->sample_fmt = SAMPLE_FMT_S16; 1307 avctx->sample_fmt = SAMPLE_FMT_S16;
1320 1308