comparison h264_loopfilter.c @ 11025:cd1f5f6a2e45 libavcodec

Use table to speedup access to non_zero_count in MBAFF with differing interlacing. ~4 cpu cycles speedup
author michael
date Wed, 27 Jan 2010 11:14:29 +0000
parents 2bc05f2fc993
children f5678fb91140
comparison
equal deleted inserted replaced
11024:5ab861519e79 11025:cd1f5f6a2e45
630 630
631 if( IS_INTRA(mb_type) ) 631 if( IS_INTRA(mb_type) )
632 *(uint64_t*)&bS[0]= 632 *(uint64_t*)&bS[0]=
633 *(uint64_t*)&bS[4]= 0x0004000400040004ULL; 633 *(uint64_t*)&bS[4]= 0x0004000400040004ULL;
634 else { 634 else {
635 static const uint8_t offset[2][2][8]={
636 {
637 {7+8*0, 7+8*0, 7+8*0, 7+8*0, 7+8*1, 7+8*1, 7+8*1, 7+8*1},
638 {7+8*2, 7+8*2, 7+8*2, 7+8*2, 7+8*3, 7+8*3, 7+8*3, 7+8*3},
639 },{
640 {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3},
641 {7+8*0, 7+8*1, 7+8*2, 7+8*3, 7+8*0, 7+8*1, 7+8*2, 7+8*3},
642 }
643 };
644 const uint8_t *off= offset[MB_FIELD][mb_y&1];
635 for( i = 0; i < 8; i++ ) { 645 for( i = 0; i < 8; i++ ) {
636 int j= MB_FIELD ? i>>2 : i&1; 646 int j= MB_FIELD ? i>>2 : i&1;
637 int mbn_xy = h->left_mb_xy[j]; 647 int mbn_xy = h->left_mb_xy[j];
638 int mbn_type= h->left_type[j]; 648 int mbn_type= h->left_type[j];
639 649
642 else{ 652 else{
643 bS[i] = 1 + !!(h->non_zero_count_cache[12+8*(i>>1)] | 653 bS[i] = 1 + !!(h->non_zero_count_cache[12+8*(i>>1)] |
644 ((!h->pps.cabac && IS_8x8DCT(mbn_type)) ? 654 ((!h->pps.cabac && IS_8x8DCT(mbn_type)) ?
645 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2)) 655 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
646 : 656 :
647 h->non_zero_count[mbn_xy][7+(MB_FIELD ? (i&3) : (i>>2)+(mb_y&1)*2)*8])); 657 h->non_zero_count[mbn_xy][ off[i] ]));
648 } 658 }
649 } 659 }
650 } 660 }
651 661
652 mb_qp = s->current_picture.qscale_table[mb_xy]; 662 mb_qp = s->current_picture.qscale_table[mb_xy];