changeset 3216:4186eb8d29a1 libavcodec

prefetch pixels for future motion compensation. 4-10% faster mpeg1/2/4 decoding (on an athlon-xp).
author lorenm
date Thu, 23 Mar 2006 21:54:46 +0000
parents 06f98047ff26
children d9eceb8313c2
files mpegvideo.c
diffstat 1 files changed, 14 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/mpegvideo.c	Thu Mar 23 20:16:36 2006 +0000
+++ b/mpegvideo.c	Thu Mar 23 21:54:46 2006 +0000
@@ -3359,6 +3359,18 @@
     pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
 }
 
+static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
+    /* fetch pixels for estimated mv 4 macroblocks ahead
+     * optimized for 64byte cache lines */
+    const int shift = s->quarter_sample ? 2 : 1;
+    const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
+    const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
+    int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
+    s->dsp.prefetch(pix[0]+off, s->linesize, 4);
+    off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
+    s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
+}
+
 /**
  * motion compensation of a single macroblock
  * @param s context
@@ -3383,6 +3395,8 @@
     mb_x = s->mb_x;
     mb_y = s->mb_y;
 
+    prefetch_motion(s, ref_picture, dir);
+
     if(s->obmc && s->pict_type != B_TYPE){
         int16_t mv_cache[4][4][2];
         const int xy= s->mb_x + s->mb_y*s->mb_stride;