changeset 2223:b26474e72d6d libavcodec

use h264 MC code if possible 50% faster predict_plane() if mmx2/3dnow is available 0.1% bitrate increase
author michael
date Sun, 12 Sep 2004 21:32:36 +0000
parents ef568cc0972c
children 11d54cb7ac4e
files snow.c
diffstat 1 files changed, 7 insertions(+), 25 deletions(-) [+]
line wrap: on
line diff
--- a/snow.c	Sun Sep 12 17:35:54 2004 +0000
+++ b/snow.c	Sun Sep 12 21:32:36 2004 +0000
@@ -1988,29 +1988,6 @@
 STOP_TIMER("mc_block")
 }
 
-#define mcb(dx,dy,b_w)\
-static void mc_block ## dx ## dy(uint8_t *dst, uint8_t *src, int stride){\
-    uint8_t tmp[stride*(b_w+5)];\
-    mc_block(dst, src-2-2*stride, tmp, stride, b_w, b_w, dx, dy);\
-}
-
-mcb( 0, 0,16)
-mcb( 4, 0,16)
-mcb( 8, 0,16)
-mcb(12, 0,16)
-mcb( 0, 4,16)
-mcb( 4, 4,16)
-mcb( 8, 4,16)
-mcb(12, 4,16)
-mcb( 0, 8,16)
-mcb( 4, 8,16)
-mcb( 8, 8,16)
-mcb(12, 8,16)
-mcb( 0,12,16)
-mcb( 4,12,16)
-mcb( 8,12,16)
-mcb(12,12,16)
-
 #define mca(dx,dy,b_w)\
 static void mc_block_hpel ## dx ## dy(uint8_t *dst, uint8_t *src, int stride, int h){\
     uint8_t tmp[stride*(b_w+5)];\
@@ -2036,6 +2013,8 @@
         const int scale= plane_index ?  s->mv_scale : 2*s->mv_scale;
         int mx= block->mx*scale;
         int my= block->my*scale;
+        const int dx= mx&15;
+        const int dy= my&15;
         sx += (mx>>4) - 2;
         sy += (my>>4) - 2;
         src += sx + sy*stride;
@@ -2044,7 +2023,10 @@
             ff_emulated_edge_mc(tmp + MB_SIZE, src, stride, b_w+5, b_h+5, sx, sy, w, h);
             src= tmp + MB_SIZE;
         }
-        mc_block(dst, src, tmp, stride, b_w, b_h, mx&15, my&15);
+        if((dx&3) || (dy&3) || b_w!=b_h || (b_w!=4 && b_w!=8 && b_w!=16))
+            mc_block(dst, src, tmp, stride, b_w, b_h, dx, dy);
+        else
+            s->dsp.put_h264_qpel_pixels_tab[2-(b_w>>3)][dy+(dx>>2)](dst,src + 2 + 2*stride,stride);
     }
 }
 
@@ -2464,7 +2446,7 @@
 #define mcf(dx,dy)\
     s->dsp.put_qpel_pixels_tab       [0][dy+dx/4]=\
     s->dsp.put_no_rnd_qpel_pixels_tab[0][dy+dx/4]=\
-        mc_block ## dx ## dy;
+        s->dsp.put_h264_qpel_pixels_tab[0][dy+dx/4];
 
     mcf( 0, 0)
     mcf( 4, 0)