changeset 8373:5961d2deb35f libavcodec

1.6x faster pred*_vertical_add_c
author lorenm
date Thu, 18 Dec 2008 01:05:06 +0000
parents 5917f39590e3
children 9000fd7c166e
files h264pred.c
diffstat 1 files changed, 34 insertions(+), 28 deletions(-) [+]
line wrap: on
line diff
--- a/h264pred.c	Thu Dec 18 00:46:54 2008 +0000
+++ b/h264pred.c	Thu Dec 18 01:05:06 2008 +0000
@@ -989,23 +989,26 @@
 
 static void pred4x4_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
     int i;
+    pix -= stride;
     for(i=0; i<4; i++){
-        pix[0]= pix[0-stride] + block[0];
-        pix[1]= pix[1-stride] + block[1];
-        pix[2]= pix[2-stride] + block[2];
-        pix[3]= pix[3-stride] + block[3];
-        pix+= stride;
-        block+= 4;
+        uint8_t v = pix[0];
+        pix[1*stride]= v += block[0];
+        pix[2*stride]= v += block[4];
+        pix[3*stride]= v += block[8];
+        pix[4*stride]= v += block[12];
+        pix++;
+        block++;
     }
 }
 
 static void pred4x4_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
     int i;
     for(i=0; i<4; i++){
-        pix[0]= pix[-1] + block[0];
-        pix[1]= pix[ 0] + block[1];
-        pix[2]= pix[ 1] + block[2];
-        pix[3]= pix[ 2] + block[3];
+        uint8_t v = pix[-1];
+        pix[0]= v += block[0];
+        pix[1]= v += block[1];
+        pix[2]= v += block[2];
+        pix[3]= v += block[3];
         pix+= stride;
         block+= 4;
     }
@@ -1013,31 +1016,34 @@
 
 static void pred8x8l_vertical_add_c(uint8_t *pix, const DCTELEM *block, int stride){
     int i;
+    pix -= stride;
     for(i=0; i<8; i++){
-        pix[0]= pix[0-stride] + block[0];
-        pix[1]= pix[1-stride] + block[1];
-        pix[2]= pix[2-stride] + block[2];
-        pix[3]= pix[3-stride] + block[3];
-        pix[4]= pix[4-stride] + block[4];
-        pix[5]= pix[5-stride] + block[5];
-        pix[6]= pix[6-stride] + block[6];
-        pix[7]= pix[7-stride] + block[7];
-        pix+= stride;
-        block+= 8;
+        uint8_t v = pix[0];
+        pix[1*stride]= v += block[0];
+        pix[2*stride]= v += block[8];
+        pix[3*stride]= v += block[16];
+        pix[4*stride]= v += block[24];
+        pix[5*stride]= v += block[32];
+        pix[6*stride]= v += block[40];
+        pix[7*stride]= v += block[48];
+        pix[8*stride]= v += block[56];
+        pix++;
+        block++;
     }
 }
 
 static void pred8x8l_horizontal_add_c(uint8_t *pix, const DCTELEM *block, int stride){
     int i;
     for(i=0; i<8; i++){
-        pix[0]= pix[-1] + block[0];
-        pix[1]= pix[ 0] + block[1];
-        pix[2]= pix[ 1] + block[2];
-        pix[3]= pix[ 2] + block[3];
-        pix[4]= pix[ 3] + block[4];
-        pix[5]= pix[ 4] + block[5];
-        pix[6]= pix[ 5] + block[6];
-        pix[7]= pix[ 6] + block[7];
+        uint8_t v = pix[-1];
+        pix[0]= v += block[0];
+        pix[1]= v += block[1];
+        pix[2]= v += block[2];
+        pix[3]= v += block[3];
+        pix[4]= v += block[4];
+        pix[5]= v += block[5];
+        pix[6]= v += block[6];
+        pix[7]= v += block[7];
         pix+= stride;
         block+= 8;
     }