# HG changeset patch # User michael # Date 1267106559 0 # Node ID 613370892df26b53510e151b105cd03b7a0f53c3 # Parent aaca4b58880ff42add9eed21f7724f99f0ff658c Store intra4x4_pred_mode per row only. about 5 cpu cycles slower in the local code but should be overall faster due to reduced cache use. (my sample though has too few intra4x4 blocks for this to be meassureable easily either way) diff -r aaca4b58880f -r 613370892df2 h264.c --- a/h264.c Thu Feb 25 12:51:32 2010 +0000 +++ b/h264.c Thu Feb 25 14:02:39 2010 +0000 @@ -52,15 +52,15 @@ }; void ff_h264_write_back_intra_pred_mode(H264Context *h){ - const int mb_xy= h->mb_xy; - - h->intra4x4_pred_mode[mb_xy][0]= h->intra4x4_pred_mode_cache[7+8*1]; - h->intra4x4_pred_mode[mb_xy][1]= h->intra4x4_pred_mode_cache[7+8*2]; - h->intra4x4_pred_mode[mb_xy][2]= h->intra4x4_pred_mode_cache[7+8*3]; - h->intra4x4_pred_mode[mb_xy][3]= h->intra4x4_pred_mode_cache[7+8*4]; - h->intra4x4_pred_mode[mb_xy][4]= h->intra4x4_pred_mode_cache[4+8*4]; - h->intra4x4_pred_mode[mb_xy][5]= h->intra4x4_pred_mode_cache[5+8*4]; - h->intra4x4_pred_mode[mb_xy][6]= h->intra4x4_pred_mode_cache[6+8*4]; + int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[h->mb_xy]; + + mode[0]= h->intra4x4_pred_mode_cache[7+8*1]; + mode[1]= h->intra4x4_pred_mode_cache[7+8*2]; + mode[2]= h->intra4x4_pred_mode_cache[7+8*3]; + mode[3]= h->intra4x4_pred_mode_cache[7+8*4]; + mode[4]= h->intra4x4_pred_mode_cache[4+8*4]; + mode[5]= h->intra4x4_pred_mode_cache[5+8*4]; + mode[6]= h->intra4x4_pred_mode_cache[6+8*4]; } /** diff -r aaca4b58880f -r 613370892df2 h264.h --- a/h264.h Thu Feb 25 12:51:32 2010 +0000 +++ b/h264.h Thu Feb 25 14:02:39 2010 +0000 @@ -298,7 +298,7 @@ int topleft_partition; int8_t intra4x4_pred_mode_cache[5*8]; - int8_t (*intra4x4_pred_mode)[8]; + int8_t (*intra4x4_pred_mode); H264PredContext hpc; unsigned int topleft_samples_available; unsigned int top_samples_available; @@ -886,10 +886,11 @@ if(IS_INTRA4x4(mb_type)){ if(IS_INTRA4x4(top_type)){ - h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4]; - h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5]; - h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6]; - h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3]; + int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[top_xy]; + h->intra4x4_pred_mode_cache[4+8*0]= mode[4]; + h->intra4x4_pred_mode_cache[5+8*0]= mode[5]; + h->intra4x4_pred_mode_cache[6+8*0]= mode[6]; + h->intra4x4_pred_mode_cache[7+8*0]= mode[3]; }else{ int pred; if(!(top_type & type_mask)) @@ -904,8 +905,9 @@ } for(i=0; i<2; i++){ if(IS_INTRA4x4(left_type[i])){ - h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]]; - h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]]; + int8_t *mode= h->intra4x4_pred_mode + h->mb2br_xy[left_xy[i]]; + h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= mode[left_block[0+2*i]]; + h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= mode[left_block[1+2*i]]; }else{ int pred; if(!(left_type[i] & type_mask))