comparison mpegvideo.c @ 3216:4186eb8d29a1 libavcodec

prefetch pixels for future motion compensation. 4-10% faster mpeg1/2/4 decoding (on an athlon-xp).
author lorenm
date Thu, 23 Mar 2006 21:54:46 +0000
parents ede5c3c0a0eb
children c2c29be6282e
comparison
equal deleted inserted replaced
3215:06f98047ff26 3216:4186eb8d29a1
3357 ptr= s->edge_emu_buffer; 3357 ptr= s->edge_emu_buffer;
3358 } 3358 }
3359 pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy); 3359 pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy);
3360 } 3360 }
3361 3361
3362 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){
3363 /* fetch pixels for estimated mv 4 macroblocks ahead
3364 * optimized for 64byte cache lines */
3365 const int shift = s->quarter_sample ? 2 : 1;
3366 const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8;
3367 const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y;
3368 int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64;
3369 s->dsp.prefetch(pix[0]+off, s->linesize, 4);
3370 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64;
3371 s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2);
3372 }
3373
3362 /** 3374 /**
3363 * motion compensation of a single macroblock 3375 * motion compensation of a single macroblock
3364 * @param s context 3376 * @param s context
3365 * @param dest_y luma destination pointer 3377 * @param dest_y luma destination pointer
3366 * @param dest_cb chroma cb/u destination pointer 3378 * @param dest_cb chroma cb/u destination pointer
3380 int mb_x, mb_y, i; 3392 int mb_x, mb_y, i;
3381 uint8_t *ptr, *dest; 3393 uint8_t *ptr, *dest;
3382 3394
3383 mb_x = s->mb_x; 3395 mb_x = s->mb_x;
3384 mb_y = s->mb_y; 3396 mb_y = s->mb_y;
3397
3398 prefetch_motion(s, ref_picture, dir);
3385 3399
3386 if(s->obmc && s->pict_type != B_TYPE){ 3400 if(s->obmc && s->pict_type != B_TYPE){
3387 int16_t mv_cache[4][4][2]; 3401 int16_t mv_cache[4][4][2];
3388 const int xy= s->mb_x + s->mb_y*s->mb_stride; 3402 const int xy= s->mb_x + s->mb_y*s->mb_stride;
3389 const int mot_stride= s->b8_stride; 3403 const int mot_stride= s->b8_stride;