# HG changeset patch # User vitor # Date 1260983373 0 # Node ID d124d9b688d0a5eeff007beb4f17ae6aa1587e11 # Parent 750102456f00837569ef12b848e2fee9960fa72f Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests. diff -r 750102456f00 -r d124d9b688d0 celp_filters.c --- a/celp_filters.c Wed Dec 16 11:39:14 2009 +0000 +++ b/celp_filters.c Wed Dec 16 17:09:33 2009 +0000 @@ -93,7 +93,102 @@ { int i,n; - for (n = 0; n < buffer_length; n++) { + float out0, out1, out2, out3; + float old_out0, old_out1, old_out2, old_out3; + float a,b,c; + + a = filter_coeffs[0]; + b = filter_coeffs[1]; + c = filter_coeffs[2]; + b -= filter_coeffs[0] * filter_coeffs[0]; + c -= filter_coeffs[1] * filter_coeffs[0]; + c -= filter_coeffs[0] * b; + + old_out0 = out[-4]; + old_out1 = out[-3]; + old_out2 = out[-2]; + old_out3 = out[-1]; + for (n = 0; n <= buffer_length - 4; n+=4) { + float tmp0,tmp1,tmp2,tmp3; + float val; + + out0 = in[0]; + out1 = in[1]; + out2 = in[2]; + out3 = in[3]; + + out0 -= filter_coeffs[2] * old_out1; + out1 -= filter_coeffs[2] * old_out2; + out2 -= filter_coeffs[2] * old_out3; + + out0 -= filter_coeffs[1] * old_out2; + out1 -= filter_coeffs[1] * old_out3; + + out0 -= filter_coeffs[0] * old_out3; + + val = filter_coeffs[3]; + + out0 -= val * old_out0; + out1 -= val * old_out1; + out2 -= val * old_out2; + out3 -= val * old_out3; + + old_out3 = out[-5]; + + for (i = 5; i <= filter_length; i += 2) { + val = filter_coeffs[i-1]; + + out0 -= val * old_out3; + out1 -= val * old_out0; + out2 -= val * old_out1; + out3 -= val * old_out2; + + old_out2 = out[-i-1]; + + val = filter_coeffs[i]; + + out0 -= val * old_out2; + out1 -= val * old_out3; + out2 -= val * old_out0; + out3 -= val * old_out1; + + FFSWAP(float, old_out0, old_out2); + old_out1 = old_out3; + old_out3 = out[-i-2]; + } + + tmp0 = out0; + tmp1 = out1; + tmp2 = out2; + tmp3 = out3; + + out3 -= a * tmp2; + out2 -= a * tmp1; + out1 -= a * tmp0; + + out3 -= b * tmp1; + out2 -= b * tmp0; + + out3 -= c * tmp0; + + + out[0] = out0; + out[1] = out1; + out[2] = out2; + out[3] = out3; + + old_out0 = out0; + old_out1 = out1; + old_out2 = out2; + old_out3 = out3; + + out += 4; + in += 4; + } + + out -= n; + in -= n; + for (; n < buffer_length; n++) { out[n] = in[n]; for (i = 1; i <= filter_length; i++) out[n] -= filter_coeffs[i-1] * out[n-i]; diff -r 750102456f00 -r d124d9b688d0 celp_filters.h --- a/celp_filters.h Wed Dec 16 11:39:14 2009 +0000 +++ b/celp_filters.h Wed Dec 16 17:09:33 2009 +0000 @@ -90,7 +90,8 @@ * @param filter_coeffs filter coefficients. * @param in input signal * @param buffer_length amount of data to process - * @param filter_length filter length (10 for 10th order LP filter) + * @param filter_length filter length (10 for 10th order LP filter). Must be + * greater than 4 and even. * * @note Output buffer must contain filter_length samples of past * speech data before pointer.