Mercurial > libavcodec.hg
changeset 5724:96d1b6c30aad libavcodec
unroll encode_residual_lpc(). speedup varies between 1.2x and 1.8x depending on lpc order.
author | lorenm |
---|---|
date | Thu, 27 Sep 2007 02:42:00 +0000 |
parents | 49a5d44423ef |
children | 2ec1ec2d1eae |
files | flacenc.c utils.c |
diffstat | 2 files changed, 84 insertions(+), 3 deletions(-) [+] |
line wrap: on
line diff
--- a/flacenc.c Wed Sep 26 13:28:13 2007 +0000 +++ b/flacenc.c Thu Sep 27 02:42:00 2007 +0000 @@ -834,15 +834,83 @@ } } +#define LPC1(x) {\ + int s = smp[i-(x)+1];\ + p1 += c*s;\ + c = coefs[(x)-2];\ + p0 += c*s;\ +} + +static av_always_inline void encode_residual_lpc_unrolled( + int32_t *res, const int32_t *smp, int n, + int order, const int32_t *coefs, int shift, int big) +{ + int i; + for(i=order; i<n; i+=2) { + int c = coefs[order-1]; + int p0 = c * smp[i-order]; + int p1 = 0; + if(big) { + switch(order) { + case 32: LPC1(32) + case 31: LPC1(31) + case 30: LPC1(30) + case 29: LPC1(29) + case 28: LPC1(28) + case 27: LPC1(27) + case 26: LPC1(26) + case 25: LPC1(25) + case 24: LPC1(24) + case 23: LPC1(23) + case 22: LPC1(22) + case 21: LPC1(21) + case 20: LPC1(20) + case 19: LPC1(19) + case 18: LPC1(18) + case 17: LPC1(17) + case 16: LPC1(16) + case 15: LPC1(15) + case 14: LPC1(14) + case 13: LPC1(13) + case 12: LPC1(12) + case 11: LPC1(11) + case 10: LPC1(10) + case 9: LPC1( 9) + LPC1( 8) + LPC1( 7) + LPC1( 6) + LPC1( 5) + LPC1( 4) + LPC1( 3) + LPC1( 2) + } + } else { + switch(order) { + case 8: LPC1( 8) + case 7: LPC1( 7) + case 6: LPC1( 6) + case 5: LPC1( 5) + case 4: LPC1( 4) + case 3: LPC1( 3) + case 2: LPC1( 2) + } + } + p1 += c * smp[i]; + res[i ] = smp[i ] - (p0 >> shift); + res[i+1] = smp[i+1] - (p1 >> shift); + } +} + static void encode_residual_lpc(int32_t *res, const int32_t *smp, int n, int order, const int32_t *coefs, int shift) { - int i, j; - + int i; for(i=0; i<order; i++) { res[i] = smp[i]; } +#ifdef CONFIG_SMALL for(i=order; i<n; i+=2) { + int j; int32_t c = coefs[0]; int32_t p0 = 0, p1 = c*smp[i]; for(j=1; j<order; j++) { @@ -855,6 +923,19 @@ res[i+0] = smp[i+0] - (p0 >> shift); res[i+1] = smp[i+1] - (p1 >> shift); } +#else + switch(order) { + case 1: encode_residual_lpc_unrolled(res, smp, n, 1, coefs, shift, 0); break; + case 2: encode_residual_lpc_unrolled(res, smp, n, 2, coefs, shift, 0); break; + case 3: encode_residual_lpc_unrolled(res, smp, n, 3, coefs, shift, 0); break; + case 4: encode_residual_lpc_unrolled(res, smp, n, 4, coefs, shift, 0); break; + case 5: encode_residual_lpc_unrolled(res, smp, n, 5, coefs, shift, 0); break; + case 6: encode_residual_lpc_unrolled(res, smp, n, 6, coefs, shift, 0); break; + case 7: encode_residual_lpc_unrolled(res, smp, n, 7, coefs, shift, 0); break; + case 8: encode_residual_lpc_unrolled(res, smp, n, 8, coefs, shift, 0); break; + default: encode_residual_lpc_unrolled(res, smp, n, order, coefs, shift, 1); break; + } +#endif } static int encode_residual(FlacEncodeContext *ctx, int ch)
--- a/utils.c Wed Sep 26 13:28:13 2007 +0000 +++ b/utils.c Thu Sep 27 02:42:00 2007 +0000 @@ -670,7 +670,7 @@ {"context", "context model", OFFSET(context_model), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E}, {"slice_flags", NULL, OFFSET(slice_flags), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, {"xvmc_acceleration", NULL, OFFSET(xvmc_acceleration), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX}, -{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|E, "mbd"}, +{"mbd", "macroblock decision algorithm (high quality mode)", OFFSET(mb_decision), FF_OPT_TYPE_INT, DEFAULT, INT_MIN, INT_MAX, V|A|E, "mbd"}, {"simple", "use mbcmp (default)", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_SIMPLE, INT_MIN, INT_MAX, V|E, "mbd"}, {"bits", "use fewest bits", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_BITS, INT_MIN, INT_MAX, V|E, "mbd"}, {"rd", "use best rate distortion", 0, FF_OPT_TYPE_CONST, FF_MB_DECISION_RD, INT_MIN, INT_MAX, V|E, "mbd"},