Mercurial > libavcodec.hg
diff h264.h @ 11277:c12d6c6c027e libavcodec
Change mvd_cache & mvd_table to 8bit, this is overall a bit faster
for high resolution videos.
about 20cycles faster per MB for cathederal.
author | michael |
---|---|
date | Wed, 24 Feb 2010 20:43:06 +0000 |
parents | e817a3c2ec2e |
children | cf41a3e8e14e |
line wrap: on
line diff
--- a/h264.h Wed Feb 24 20:37:58 2010 +0000 +++ b/h264.h Wed Feb 24 20:43:06 2010 +0000 @@ -486,8 +486,8 @@ /* chroma_pred_mode for i4x4 or i16x16, else 0 */ uint8_t *chroma_pred_mode_table; int last_qscale_diff; - int16_t (*mvd_table[2])[2]; - DECLARE_ALIGNED_16(int16_t, mvd_cache)[2][5*8][2]; + uint8_t (*mvd_table[2])[2]; + DECLARE_ALIGNED_16(uint8_t, mvd_cache)[2][5*8][2]; uint8_t *direct_table; uint8_t direct_cache[5*8]; @@ -732,6 +732,14 @@ #endif } +static av_always_inline uint16_t pack8to16(int a, int b){ +#if HAVE_BIGENDIAN + return (b&0xFF) + (a<<8); +#else + return (a&0xFF) + (b<<8); +#endif +} + /** * gets the chroma qp. */ @@ -1060,32 +1068,31 @@ /* XXX beurk, Load mvd */ if(USES_LIST(top_type, list)){ const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; - AV_COPY128(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]); + AV_COPY64(h->mvd_cache[list][scan8[0] + 0 - 1*8], h->mvd_table[list][b_xy + 0]); }else{ - AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]); + AV_ZERO64(h->mvd_cache[list][scan8[0] + 0 - 1*8]); } if(USES_LIST(left_type[0], list)){ const int b_xy= h->mb2b_xy[left_xy[0]] + 3; - AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]); - AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]); + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]); + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]); }else{ - AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 0*8]); - AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 1*8]); + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 0*8]); + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 1*8]); } if(USES_LIST(left_type[1], list)){ const int b_xy= h->mb2b_xy[left_xy[1]] + 3; - AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]); - AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]); + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]); + AV_COPY16(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]); }else{ - AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 2*8]); - AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 3*8]); + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 2*8]); + AV_ZERO16(h->mvd_cache [list][scan8[0] - 1 + 3*8]); } - AV_ZERO32(h->mvd_cache [list][scan8[5 ]+1]); - AV_ZERO32(h->mvd_cache [list][scan8[7 ]+1]); - AV_ZERO32(h->mvd_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else) - AV_ZERO32(h->mvd_cache [list][scan8[4 ]]); - AV_ZERO32(h->mvd_cache [list][scan8[12]]); - + AV_ZERO16(h->mvd_cache [list][scan8[5 ]+1]); + AV_ZERO16(h->mvd_cache [list][scan8[7 ]+1]); + AV_ZERO16(h->mvd_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else) + AV_ZERO16(h->mvd_cache [list][scan8[4 ]]); + AV_ZERO16(h->mvd_cache [list][scan8[12]]); if(h->slice_type_nos == FF_B_TYPE){ fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1); @@ -1414,13 +1421,13 @@ AV_COPY128(mv_dst + y*b_stride, mv_src + 8*y); } if( CABAC ) { - int16_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy]; - int16_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; + uint8_t (*mvd_dst)[2] = &h->mvd_table[list][b_xy]; + uint8_t (*mvd_src)[2] = &h->mvd_cache[list][scan8[0]]; if(IS_SKIP(mb_type)) - fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 4); + fill_rectangle(mvd_dst, 4, 4, h->b_stride, 0, 2); else for(y=0; y<4; y++){ - AV_COPY128(mvd_dst + y*b_stride, mvd_src + 8*y); + AV_COPY64(mvd_dst + y*b_stride, mvd_src + 8*y); } }