Mercurial > libavcodec.hg
comparison mpegvideo.c @ 3216:4186eb8d29a1 libavcodec
prefetch pixels for future motion compensation. 4-10% faster mpeg1/2/4 decoding (on an athlon-xp).
author | lorenm |
---|---|
date | Thu, 23 Mar 2006 21:54:46 +0000 |
parents | ede5c3c0a0eb |
children | c2c29be6282e |
comparison
equal
deleted
inserted
replaced
3215:06f98047ff26 | 3216:4186eb8d29a1 |
---|---|
3357 ptr= s->edge_emu_buffer; | 3357 ptr= s->edge_emu_buffer; |
3358 } | 3358 } |
3359 pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy); | 3359 pix_op[lowres](dest_cr, ptr, s->uvlinesize, block_s, sx, sy); |
3360 } | 3360 } |
3361 | 3361 |
3362 static inline void prefetch_motion(MpegEncContext *s, uint8_t **pix, int dir){ | |
3363 /* fetch pixels for estimated mv 4 macroblocks ahead | |
3364 * optimized for 64byte cache lines */ | |
3365 const int shift = s->quarter_sample ? 2 : 1; | |
3366 const int mx= (s->mv[dir][0][0]>>shift) + 16*s->mb_x + 8; | |
3367 const int my= (s->mv[dir][0][1]>>shift) + 16*s->mb_y; | |
3368 int off= mx + (my + (s->mb_x&3)*4)*s->linesize + 64; | |
3369 s->dsp.prefetch(pix[0]+off, s->linesize, 4); | |
3370 off= (mx>>1) + ((my>>1) + (s->mb_x&7))*s->uvlinesize + 64; | |
3371 s->dsp.prefetch(pix[1]+off, pix[2]-pix[1], 2); | |
3372 } | |
3373 | |
3362 /** | 3374 /** |
3363 * motion compensation of a single macroblock | 3375 * motion compensation of a single macroblock |
3364 * @param s context | 3376 * @param s context |
3365 * @param dest_y luma destination pointer | 3377 * @param dest_y luma destination pointer |
3366 * @param dest_cb chroma cb/u destination pointer | 3378 * @param dest_cb chroma cb/u destination pointer |
3380 int mb_x, mb_y, i; | 3392 int mb_x, mb_y, i; |
3381 uint8_t *ptr, *dest; | 3393 uint8_t *ptr, *dest; |
3382 | 3394 |
3383 mb_x = s->mb_x; | 3395 mb_x = s->mb_x; |
3384 mb_y = s->mb_y; | 3396 mb_y = s->mb_y; |
3397 | |
3398 prefetch_motion(s, ref_picture, dir); | |
3385 | 3399 |
3386 if(s->obmc && s->pict_type != B_TYPE){ | 3400 if(s->obmc && s->pict_type != B_TYPE){ |
3387 int16_t mv_cache[4][4][2]; | 3401 int16_t mv_cache[4][4][2]; |
3388 const int xy= s->mb_x + s->mb_y*s->mb_stride; | 3402 const int xy= s->mb_x + s->mb_y*s->mb_stride; |
3389 const int mot_stride= s->b8_stride; | 3403 const int mot_stride= s->b8_stride; |