changeset 10102:76eeb9e3599b libavcodec

1.5x faster ff_vorbis_floor1_render_list, 5% faster vorbis decoding on Core2. 1.3x and 3% on G4. Though I think only part of this speedup is due to my optimizations per se; some of it is that I got a better roll on the GCC random code generator. Trivial reorderings of this function have a disproportionate effect on speed.
author lorenm
date Thu, 27 Aug 2009 13:41:11 +0000
parents 254f8c7d5e4d
children 2066cbe806ef
files vorbis.c
diffstat 1 files changed, 32 insertions(+), 6 deletions(-) [+]
line wrap: on
line diff
--- a/vorbis.c	Wed Aug 26 23:12:41 2009 +0000
+++ b/vorbis.c	Thu Aug 27 13:41:11 2009 +0000
@@ -146,25 +146,51 @@
     }
 }
 
+static inline void render_line_unrolled(intptr_t x, intptr_t y, int x1, intptr_t sy, int ady, int adx, float * buf) {
+    int err = -adx;
+    int tmp;
+    x -= x1-1;
+    buf += x1-1;
+    while (++x < 0) {
+        err += ady;
+        if (err >= 0) {
+            err += ady - adx;
+            y += sy;
+            buf[x++] = ff_vorbis_floor1_inverse_db_table[y];
+        }
+        buf[x] = ff_vorbis_floor1_inverse_db_table[y];
+    }
+    if (x <= 0) {
+        if (err + ady >= 0)
+            y += sy;
+        buf[x] = ff_vorbis_floor1_inverse_db_table[y];
+    }
+}
+
 static void render_line(int x0, int y0, int x1, int y1, float * buf) {
     int dy = y1 - y0;
     int adx = x1 - x0;
+    int ady = FFABS(dy);
+    int sy = dy<0 ? -1 : 1;
+    buf[x0] = ff_vorbis_floor1_inverse_db_table[y0];
+    if(ady*2<=adx) { // optimized common case
+        render_line_unrolled(x0, y0, x1, sy, ady, adx, buf);
+    } else {
     int base = dy / adx;
-    int ady = FFABS(dy) - FFABS(base) * adx;
     int x = x0;
     int y = y0;
-    int err = 0;
-    int sy = dy<0 ? -1 : 1;
-    buf[x] = ff_vorbis_floor1_inverse_db_table[y];
+    int err = -adx;
+    ady -= FFABS(base) * adx;
     while (++x < x1) {
+        y += base;
         err += ady;
-        if (err >= adx) {
+        if (err >= 0) {
             err -= adx;
             y += sy;
         }
-        y += base;
         buf[x] = ff_vorbis_floor1_inverse_db_table[y];
     }
+    }
 }
 
 void ff_vorbis_floor1_render_list(vorbis_floor1_entry * list, int values, uint_fast16_t * y_list, int * flag, int multiplier, float * out, int samples) {