changeset 2221:3543987dccad libavcodec

use pointer arithmetic in mc_block(), 25% faster
author michael
date Sun, 12 Sep 2004 10:51:02 +0000
parents 21947e176d4d
children ef568cc0972c
files snow.c
diffstat 1 files changed, 24 insertions(+), 16 deletions(-) [+]
line wrap: on
line diff
--- a/snow.c	Fri Sep 10 19:40:55 2004 +0000
+++ b/snow.c	Sun Sep 12 10:51:02 2004 +0000
@@ -1929,14 +1929,15 @@
 
 static void mc_block(uint8_t *dst, uint8_t *src, uint8_t *tmp, int stride, int b_w, int b_h, int dx, int dy){
     int x, y;
+START_TIMER
     for(y=0; y < b_h+5; y++){
         for(x=0; x < b_w; x++){
-            int a0= src[x     + y*stride];
-            int a1= src[x + 1 + y*stride];
-            int a2= src[x + 2 + y*stride];
-            int a3= src[x + 3 + y*stride];
-            int a4= src[x + 4 + y*stride];
-            int a5= src[x + 5 + y*stride];
+            int a0= src[x    ];
+            int a1= src[x + 1];
+            int a2= src[x + 2];
+            int a3= src[x + 3];
+            int a4= src[x + 4];
+            int a5= src[x + 5];
 //            int am= 9*(a1+a2) - (a0+a3);
             int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
 //            int am= 18*(a2+a3) - 2*(a1+a4);
@@ -1945,23 +1946,27 @@
 
 //            if(b_w==16) am= 8*(a1+a2);
 
-            if(dx<8) tmp[x + y*stride]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
-            else     tmp[x + y*stride]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
+            if(dx<8) tmp[x]= (32*a2*( 8-dx) +    am* dx    + 128)>>8;
+            else     tmp[x]= (   am*(16-dx) + 32*a3*(dx-8) + 128)>>8;
 
 /*            if     (dx< 4) tmp[x + y*stride]= (16*a1*( 4-dx) +    aL* dx     + 32)>>6;
             else if(dx< 8) tmp[x + y*stride]= (   aL*( 8-dx) +    am*(dx- 4) + 32)>>6;
             else if(dx<12) tmp[x + y*stride]= (   am*(12-dx) +    aR*(dx- 8) + 32)>>6;
             else           tmp[x + y*stride]= (   aR*(16-dx) + 16*a2*(dx-12) + 32)>>6;*/
         }
+        tmp += stride;
+        src += stride;
     }
+    tmp -= (b_h+5)*stride;
+    
     for(y=0; y < b_h; y++){
         for(x=0; x < b_w; x++){
-            int a0= tmp[x +  y     *stride];
-            int a1= tmp[x + (y + 1)*stride];
-            int a2= tmp[x + (y + 2)*stride];
-            int a3= tmp[x + (y + 3)*stride];
-            int a4= tmp[x + (y + 4)*stride];
-            int a5= tmp[x + (y + 5)*stride];
+            int a0= tmp[x + 0*stride];
+            int a1= tmp[x + 1*stride];
+            int a2= tmp[x + 2*stride];
+            int a3= tmp[x + 3*stride];
+            int a4= tmp[x + 4*stride];
+            int a5= tmp[x + 5*stride];
             int am= 20*(a2+a3) - 5*(a1+a4) + (a0+a5);
 //            int am= 18*(a2+a3) - 2*(a1+a4);
 /*            int aL= (-7*a0 + 105*a1 + 35*a2 - 5*a3)>>3;
@@ -1969,15 +1974,18 @@
             
 //            if(b_w==16) am= 8*(a1+a2);
 
-            if(dy<8) dst[x + y*stride]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
-            else     dst[x + y*stride]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
+            if(dy<8) dst[x]= (32*a2*( 8-dy) +    am* dy    + 128)>>8;
+            else     dst[x]= (   am*(16-dy) + 32*a3*(dy-8) + 128)>>8;
 
 /*            if     (dy< 4) tmp[x + y*stride]= (16*a1*( 4-dy) +    aL* dy     + 32)>>6;
             else if(dy< 8) tmp[x + y*stride]= (   aL*( 8-dy) +    am*(dy- 4) + 32)>>6;
             else if(dy<12) tmp[x + y*stride]= (   am*(12-dy) +    aR*(dy- 8) + 32)>>6;
             else           tmp[x + y*stride]= (   aR*(16-dy) + 16*a2*(dy-12) + 32)>>6;*/
         }
+        dst += stride;
+        tmp += stride;
     }
+STOP_TIMER("mc_block")
 }
 
 #define mcb(dx,dy,b_w)\