comparison h264.c @ 4008:b636f3d59283 libavcodec

prevent "mb level" get_cabac() calls from being inlined (3% faster decode_mb_cabac() on P3)
author michael
date Thu, 12 Oct 2006 14:49:19 +0000
parents 33d3d4ab9f59
children e5f7797e53c7
comparison
equal deleted inserted replaced
4007:33d3d4ab9f59 4008:b636f3d59283
5631 } 5631 }
5632 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) { 5632 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) {
5633 ctx += 1; 5633 ctx += 1;
5634 } 5634 }
5635 5635
5636 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] ); 5636 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] );
5637 } 5637 }
5638 5638
5639 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) { 5639 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
5640 uint8_t *state= &h->cabac_state[ctx_base]; 5640 uint8_t *state= &h->cabac_state[ctx_base];
5641 int mb_type; 5641 int mb_type;
5647 int ctx=0; 5647 int ctx=0;
5648 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) ) 5648 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) )
5649 ctx++; 5649 ctx++;
5650 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) ) 5650 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) )
5651 ctx++; 5651 ctx++;
5652 if( get_cabac( &h->cabac, &state[ctx] ) == 0 ) 5652 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
5653 return 0; /* I4x4 */ 5653 return 0; /* I4x4 */
5654 state += 2; 5654 state += 2;
5655 }else{ 5655 }else{
5656 if( get_cabac( &h->cabac, &state[0] ) == 0 ) 5656 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 )
5657 return 0; /* I4x4 */ 5657 return 0; /* I4x4 */
5658 } 5658 }
5659 5659
5660 if( get_cabac_terminate( &h->cabac ) ) 5660 if( get_cabac_terminate( &h->cabac ) )
5661 return 25; /* PCM */ 5661 return 25; /* PCM */
5662 5662
5663 mb_type = 1; /* I16x16 */ 5663 mb_type = 1; /* I16x16 */
5664 mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */ 5664 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
5665 if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */ 5665 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
5666 mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] ); 5666 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
5667 mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] ); 5667 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
5668 mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] ); 5668 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
5669 return mb_type; 5669 return mb_type;
5670 } 5670 }
5671 5671
5672 static int decode_cabac_mb_type( H264Context *h ) { 5672 static int decode_cabac_mb_type( H264Context *h ) {
5673 MpegEncContext * const s = &h->s; 5673 MpegEncContext * const s = &h->s;
5674 5674
5675 if( h->slice_type == I_TYPE ) { 5675 if( h->slice_type == I_TYPE ) {
5676 return decode_cabac_intra_mb_type(h, 3, 1); 5676 return decode_cabac_intra_mb_type(h, 3, 1);
5677 } else if( h->slice_type == P_TYPE ) { 5677 } else if( h->slice_type == P_TYPE ) {
5678 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) { 5678 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
5679 /* P-type */ 5679 /* P-type */
5680 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) { 5680 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
5681 /* P_L0_D16x16, P_8x8 */ 5681 /* P_L0_D16x16, P_8x8 */
5682 return 3 * get_cabac( &h->cabac, &h->cabac_state[16] ); 5682 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
5683 } else { 5683 } else {
5684 /* P_L0_D8x16, P_L0_D16x8 */ 5684 /* P_L0_D8x16, P_L0_D16x8 */
5685 return 2 - get_cabac( &h->cabac, &h->cabac_state[17] ); 5685 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
5686 } 5686 }
5687 } else { 5687 } else {
5688 return decode_cabac_intra_mb_type(h, 17, 0) + 5; 5688 return decode_cabac_intra_mb_type(h, 17, 0) + 5;
5689 } 5689 }
5690 } else if( h->slice_type == B_TYPE ) { 5690 } else if( h->slice_type == B_TYPE ) {
5696 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) ) 5696 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) )
5697 ctx++; 5697 ctx++;
5698 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) ) 5698 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) )
5699 ctx++; 5699 ctx++;
5700 5700
5701 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) ) 5701 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) )
5702 return 0; /* B_Direct_16x16 */ 5702 return 0; /* B_Direct_16x16 */
5703 5703
5704 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) { 5704 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
5705 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */ 5705 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
5706 } 5706 }
5707 5707
5708 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3; 5708 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
5709 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2; 5709 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
5710 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1; 5710 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
5711 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ); 5711 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5712 if( bits < 8 ) 5712 if( bits < 8 )
5713 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */ 5713 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
5714 else if( bits == 13 ) { 5714 else if( bits == 13 ) {
5715 return decode_cabac_intra_mb_type(h, 32, 0) + 23; 5715 return decode_cabac_intra_mb_type(h, 32, 0) + 23;
5716 } else if( bits == 14 ) 5716 } else if( bits == 14 )
5717 return 11; /* B_L1_L0_8x16 */ 5717 return 11; /* B_L1_L0_8x16 */
5718 else if( bits == 15 ) 5718 else if( bits == 15 )
5719 return 22; /* B_8x8 */ 5719 return 22; /* B_8x8 */
5720 5720
5721 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] ); 5721 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
5722 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */ 5722 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
5723 } else { 5723 } else {
5724 /* TODO SI/SP frames? */ 5724 /* TODO SI/SP frames? */
5725 return -1; 5725 return -1;
5726 } 5726 }
5757 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )) 5757 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
5758 ctx++; 5758 ctx++;
5759 5759
5760 if( h->slice_type == B_TYPE ) 5760 if( h->slice_type == B_TYPE )
5761 ctx += 13; 5761 ctx += 13;
5762 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] ); 5762 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
5763 } 5763 }
5764 5764
5765 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) { 5765 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
5766 int mode = 0; 5766 int mode = 0;
5767 5767
5789 ctx++; 5789 ctx++;
5790 5790
5791 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 ) 5791 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 )
5792 ctx++; 5792 ctx++;
5793 5793
5794 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 ) 5794 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
5795 return 0; 5795 return 0;
5796 5796
5797 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 ) 5797 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5798 return 1; 5798 return 1;
5799 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 ) 5799 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
5800 return 2; 5800 return 2;
5801 else 5801 else
5802 return 3; 5802 return 3;
5803 } 5803 }
5804 5804
5871 cbp_b = (h-> top_cbp>>4)&0x03; 5871 cbp_b = (h-> top_cbp>>4)&0x03;
5872 5872
5873 ctx = 0; 5873 ctx = 0;
5874 if( cbp_a > 0 ) ctx++; 5874 if( cbp_a > 0 ) ctx++;
5875 if( cbp_b > 0 ) ctx += 2; 5875 if( cbp_b > 0 ) ctx += 2;
5876 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 ) 5876 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
5877 return 0; 5877 return 0;
5878 5878
5879 ctx = 4; 5879 ctx = 4;
5880 if( cbp_a == 2 ) ctx++; 5880 if( cbp_a == 2 ) ctx++;
5881 if( cbp_b == 2 ) ctx += 2; 5881 if( cbp_b == 2 ) ctx += 2;
5882 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ); 5882 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
5883 } 5883 }
5884 static int decode_cabac_mb_dqp( H264Context *h) { 5884 static int decode_cabac_mb_dqp( H264Context *h) {
5885 MpegEncContext * const s = &h->s; 5885 MpegEncContext * const s = &h->s;
5886 int mbn_xy; 5886 int mbn_xy;
5887 int ctx = 0; 5887 int ctx = 0;
5893 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride; 5893 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride;
5894 5894
5895 if( h->last_qscale_diff != 0 ) 5895 if( h->last_qscale_diff != 0 )
5896 ctx++; 5896 ctx++;
5897 5897
5898 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) { 5898 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
5899 if( ctx < 2 ) 5899 if( ctx < 2 )
5900 ctx = 2; 5900 ctx = 2;
5901 else 5901 else
5902 ctx = 3; 5902 ctx = 3;
5903 val++; 5903 val++;
5935 type += get_cabac( &h->cabac, &h->cabac_state[39] ); 5935 type += get_cabac( &h->cabac, &h->cabac_state[39] );
5936 return type; 5936 return type;
5937 } 5937 }
5938 5938
5939 static inline int decode_cabac_mb_transform_size( H264Context *h ) { 5939 static inline int decode_cabac_mb_transform_size( H264Context *h ) {
5940 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ); 5940 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
5941 } 5941 }
5942 5942
5943 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) { 5943 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
5944 int refa = h->ref_cache[list][scan8[n] - 1]; 5944 int refa = h->ref_cache[list][scan8[n] - 1];
5945 int refb = h->ref_cache[list][scan8[n] - 8]; 5945 int refb = h->ref_cache[list][scan8[n] - 8];
7408 else 7408 else
7409 h->cabac_state[i] = 2 * ( pre - 64 ) + 1; 7409 h->cabac_state[i] = 2 * ( pre - 64 ) + 1;
7410 } 7410 }
7411 7411
7412 for(;;){ 7412 for(;;){
7413 //START_TIMER
7413 int ret = decode_mb_cabac(h); 7414 int ret = decode_mb_cabac(h);
7414 int eos; 7415 int eos;
7416 //STOP_TIMER("decode_mb_cabac")
7415 7417
7416 if(ret>=0) hl_decode_mb(h); 7418 if(ret>=0) hl_decode_mb(h);
7417 7419
7418 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ? 7420 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ?
7419 s->mb_y++; 7421 s->mb_y++;