comparison flacenc.c @ 5737:efa3c1f9259a libavcodec

sse2 version of compute_autocorr(). 4x faster than c (somehow, even though doubles only allow 2x simd). overal flac encoding: 15-50% faster on core2, 4-11% on k8, 3-13% on p4.
author lorenm
date Sat, 29 Sep 2007 22:31:18 +0000
parents c2f88af57c16
children c1a4aae5adb4
comparison
equal deleted inserted replaced
5736:810067f2c33d 5737:efa3c1f9259a
20 */ 20 */
21 21
22 #include "avcodec.h" 22 #include "avcodec.h"
23 #include "bitstream.h" 23 #include "bitstream.h"
24 #include "crc.h" 24 #include "crc.h"
25 #include "dsputil.h"
25 #include "golomb.h" 26 #include "golomb.h"
26 #include "lls.h" 27 #include "lls.h"
27 28
28 #define FLAC_MAX_CH 8 29 #define FLAC_MAX_CH 8
29 #define FLAC_MIN_BLOCKSIZE 16 30 #define FLAC_MIN_BLOCKSIZE 16
105 int max_framesize; 106 int max_framesize;
106 uint32_t frame_count; 107 uint32_t frame_count;
107 FlacFrame frame; 108 FlacFrame frame;
108 CompressionOptions options; 109 CompressionOptions options;
109 AVCodecContext *avctx; 110 AVCodecContext *avctx;
111 DSPContext dsp;
110 } FlacEncodeContext; 112 } FlacEncodeContext;
111 113
112 static const int flac_samplerates[16] = { 114 static const int flac_samplerates[16] = {
113 0, 0, 0, 0, 115 0, 0, 0, 0,
114 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000, 116 8000, 16000, 22050, 24000, 32000, 44100, 48000, 96000,
174 FlacEncodeContext *s = avctx->priv_data; 176 FlacEncodeContext *s = avctx->priv_data;
175 int i, level; 177 int i, level;
176 uint8_t *streaminfo; 178 uint8_t *streaminfo;
177 179
178 s->avctx = avctx; 180 s->avctx = avctx;
181
182 dsputil_init(&s->dsp, avctx);
179 183
180 if(avctx->sample_fmt != SAMPLE_FMT_S16) { 184 if(avctx->sample_fmt != SAMPLE_FMT_S16) {
181 return -1; 185 return -1;
182 } 186 }
183 187
602 606
603 /** 607 /**
604 * Calculates autocorrelation data from audio samples 608 * Calculates autocorrelation data from audio samples
605 * A Welch window function is applied before calculation. 609 * A Welch window function is applied before calculation.
606 */ 610 */
607 static void compute_autocorr(const int32_t *data, int len, int lag, 611 void ff_flac_compute_autocorr(const int32_t *data, int len, int lag,
608 double *autoc) 612 double *autoc)
609 { 613 {
610 int i, j; 614 int i, j;
611 double tmp[len + lag + 1]; 615 double tmp[len + lag + 1];
612 double *data1= tmp + lag; 616 double *data1= tmp + lag;
613 617
745 } 749 }
746 750
747 /** 751 /**
748 * Calculate LPC coefficients for multiple orders 752 * Calculate LPC coefficients for multiple orders
749 */ 753 */
750 static int lpc_calc_coefs(const int32_t *samples, int blocksize, int max_order, 754 static int lpc_calc_coefs(FlacEncodeContext *s,
755 const int32_t *samples, int blocksize, int max_order,
751 int precision, int32_t coefs[][MAX_LPC_ORDER], 756 int precision, int32_t coefs[][MAX_LPC_ORDER],
752 int *shift, int use_lpc, int omethod) 757 int *shift, int use_lpc, int omethod)
753 { 758 {
754 double autoc[MAX_LPC_ORDER+1]; 759 double autoc[MAX_LPC_ORDER+1];
755 double ref[MAX_LPC_ORDER]; 760 double ref[MAX_LPC_ORDER];
758 int opt_order; 763 int opt_order;
759 764
760 assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER); 765 assert(max_order >= MIN_LPC_ORDER && max_order <= MAX_LPC_ORDER);
761 766
762 if(use_lpc == 1){ 767 if(use_lpc == 1){
763 compute_autocorr(samples, blocksize, max_order, autoc); 768 s->dsp.flac_compute_autocorr(samples, blocksize, max_order, autoc);
764 769
765 compute_lpc_coefs(autoc, max_order, lpc, ref); 770 compute_lpc_coefs(autoc, max_order, lpc, ref);
766 }else{ 771 }else{
767 LLSModel m[2]; 772 LLSModel m[2];
768 double var[MAX_LPC_ORDER+1], eval, weight; 773 double var[MAX_LPC_ORDER+1], eval, weight;
1015 } 1020 }
1016 return bits[sub->order]; 1021 return bits[sub->order];
1017 } 1022 }
1018 1023
1019 /* LPC */ 1024 /* LPC */
1020 opt_order = lpc_calc_coefs(smp, n, max_order, precision, coefs, shift, ctx->options.use_lpc, omethod); 1025 opt_order = lpc_calc_coefs(ctx, smp, n, max_order, precision, coefs, shift, ctx->options.use_lpc, omethod);
1021 1026
1022 if(omethod == ORDER_METHOD_2LEVEL || 1027 if(omethod == ORDER_METHOD_2LEVEL ||
1023 omethod == ORDER_METHOD_4LEVEL || 1028 omethod == ORDER_METHOD_4LEVEL ||
1024 omethod == ORDER_METHOD_8LEVEL) { 1029 omethod == ORDER_METHOD_8LEVEL) {
1025 int levels = 1 << omethod; 1030 int levels = 1 << omethod;