changeset 10689:d124d9b688d0 libavcodec

Optimize ff_celp_lp_synthesis_filterf(). 50% faster in my tests.
author vitor
date Wed, 16 Dec 2009 17:09:33 +0000
parents 750102456f00
children 63451af5f8f9
files celp_filters.c celp_filters.h
diffstat 2 files changed, 98 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/celp_filters.c	Wed Dec 16 11:39:14 2009 +0000
+++ b/celp_filters.c	Wed Dec 16 17:09:33 2009 +0000
@@ -93,7 +93,102 @@
 {
     int i,n;
 
-    for (n = 0; n < buffer_length; n++) {
+    float out0, out1, out2, out3;
+    float old_out0, old_out1, old_out2, old_out3;
+    float a,b,c;
+
+    a = filter_coeffs[0];
+    b = filter_coeffs[1];
+    c = filter_coeffs[2];
+    b -= filter_coeffs[0] * filter_coeffs[0];
+    c -= filter_coeffs[1] * filter_coeffs[0];
+    c -= filter_coeffs[0] * b;
+
+    old_out0 = out[-4];
+    old_out1 = out[-3];
+    old_out2 = out[-2];
+    old_out3 = out[-1];
+    for (n = 0; n <= buffer_length - 4; n+=4) {
+        float tmp0,tmp1,tmp2,tmp3;
+        float val;
+
+        out0 = in[0];
+        out1 = in[1];
+        out2 = in[2];
+        out3 = in[3];
+
+        out0 -= filter_coeffs[2] * old_out1;
+        out1 -= filter_coeffs[2] * old_out2;
+        out2 -= filter_coeffs[2] * old_out3;
+
+        out0 -= filter_coeffs[1] * old_out2;
+        out1 -= filter_coeffs[1] * old_out3;
+
+        out0 -= filter_coeffs[0] * old_out3;
+
+        val = filter_coeffs[3];
+
+        out0 -= val * old_out0;
+        out1 -= val * old_out1;
+        out2 -= val * old_out2;
+        out3 -= val * old_out3;
+
+        old_out3 = out[-5];
+
+        for (i = 5; i <= filter_length; i += 2) {
+            val = filter_coeffs[i-1];
+
+            out0 -= val * old_out3;
+            out1 -= val * old_out0;
+            out2 -= val * old_out1;
+            out3 -= val * old_out2;
+
+            old_out2 = out[-i-1];
+
+            val = filter_coeffs[i];
+
+            out0 -= val * old_out2;
+            out1 -= val * old_out3;
+            out2 -= val * old_out0;
+            out3 -= val * old_out1;
+
+            FFSWAP(float, old_out0, old_out2);
+            old_out1 = old_out3;
+            old_out3 = out[-i-2];
+        }
+
+        tmp0 = out0;
+        tmp1 = out1;
+        tmp2 = out2;
+        tmp3 = out3;
+
+        out3 -= a * tmp2;
+        out2 -= a * tmp1;
+        out1 -= a * tmp0;
+
+        out3 -= b * tmp1;
+        out2 -= b * tmp0;
+
+        out3 -= c * tmp0;
+
+
+        out[0] = out0;
+        out[1] = out1;
+        out[2] = out2;
+        out[3] = out3;
+
+        old_out0 = out0;
+        old_out1 = out1;
+        old_out2 = out2;
+        old_out3 = out3;
+
+        out += 4;
+        in  += 4;
+    }
+
+    out -= n;
+    in -= n;
+    for (; n < buffer_length; n++) {
         out[n] = in[n];
         for (i = 1; i <= filter_length; i++)
             out[n] -= filter_coeffs[i-1] * out[n-i];
--- a/celp_filters.h	Wed Dec 16 11:39:14 2009 +0000
+++ b/celp_filters.h	Wed Dec 16 17:09:33 2009 +0000
@@ -90,7 +90,8 @@
  * @param filter_coeffs filter coefficients.
  * @param in input signal
  * @param buffer_length amount of data to process
- * @param filter_length filter length (10 for 10th order LP filter)
+ * @param filter_length filter length (10 for 10th order LP filter). Must be
+ *                      greater than 4 and even.
  *
  * @note Output buffer must contain filter_length samples of past
  *       speech data before pointer.