Mercurial > libavcodec.hg
comparison h264.c @ 4008:b636f3d59283 libavcodec
prevent "mb level" get_cabac() calls from being inlined (3% faster decode_mb_cabac() on P3)
author | michael |
---|---|
date | Thu, 12 Oct 2006 14:49:19 +0000 |
parents | 33d3d4ab9f59 |
children | e5f7797e53c7 |
comparison
equal
deleted
inserted
replaced
4007:33d3d4ab9f59 | 4008:b636f3d59283 |
---|---|
5631 } | 5631 } |
5632 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) { | 5632 if( h->slice_table[mbb_xy] == h->slice_num && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) ) { |
5633 ctx += 1; | 5633 ctx += 1; |
5634 } | 5634 } |
5635 | 5635 |
5636 return get_cabac( &h->cabac, &h->cabac_state[70 + ctx] ); | 5636 return get_cabac_noinline( &h->cabac, &h->cabac_state[70 + ctx] ); |
5637 } | 5637 } |
5638 | 5638 |
5639 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) { | 5639 static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) { |
5640 uint8_t *state= &h->cabac_state[ctx_base]; | 5640 uint8_t *state= &h->cabac_state[ctx_base]; |
5641 int mb_type; | 5641 int mb_type; |
5647 int ctx=0; | 5647 int ctx=0; |
5648 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) ) | 5648 if( h->slice_table[mba_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mba_xy] ) ) |
5649 ctx++; | 5649 ctx++; |
5650 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) ) | 5650 if( h->slice_table[mbb_xy] == h->slice_num && !IS_INTRA4x4( s->current_picture.mb_type[mbb_xy] ) ) |
5651 ctx++; | 5651 ctx++; |
5652 if( get_cabac( &h->cabac, &state[ctx] ) == 0 ) | 5652 if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 ) |
5653 return 0; /* I4x4 */ | 5653 return 0; /* I4x4 */ |
5654 state += 2; | 5654 state += 2; |
5655 }else{ | 5655 }else{ |
5656 if( get_cabac( &h->cabac, &state[0] ) == 0 ) | 5656 if( get_cabac_noinline( &h->cabac, &state[0] ) == 0 ) |
5657 return 0; /* I4x4 */ | 5657 return 0; /* I4x4 */ |
5658 } | 5658 } |
5659 | 5659 |
5660 if( get_cabac_terminate( &h->cabac ) ) | 5660 if( get_cabac_terminate( &h->cabac ) ) |
5661 return 25; /* PCM */ | 5661 return 25; /* PCM */ |
5662 | 5662 |
5663 mb_type = 1; /* I16x16 */ | 5663 mb_type = 1; /* I16x16 */ |
5664 mb_type += 12 * get_cabac( &h->cabac, &state[1] ); /* cbp_luma != 0 */ | 5664 mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */ |
5665 if( get_cabac( &h->cabac, &state[2] ) ) /* cbp_chroma */ | 5665 if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */ |
5666 mb_type += 4 + 4 * get_cabac( &h->cabac, &state[2+intra_slice] ); | 5666 mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] ); |
5667 mb_type += 2 * get_cabac( &h->cabac, &state[3+intra_slice] ); | 5667 mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] ); |
5668 mb_type += 1 * get_cabac( &h->cabac, &state[3+2*intra_slice] ); | 5668 mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] ); |
5669 return mb_type; | 5669 return mb_type; |
5670 } | 5670 } |
5671 | 5671 |
5672 static int decode_cabac_mb_type( H264Context *h ) { | 5672 static int decode_cabac_mb_type( H264Context *h ) { |
5673 MpegEncContext * const s = &h->s; | 5673 MpegEncContext * const s = &h->s; |
5674 | 5674 |
5675 if( h->slice_type == I_TYPE ) { | 5675 if( h->slice_type == I_TYPE ) { |
5676 return decode_cabac_intra_mb_type(h, 3, 1); | 5676 return decode_cabac_intra_mb_type(h, 3, 1); |
5677 } else if( h->slice_type == P_TYPE ) { | 5677 } else if( h->slice_type == P_TYPE ) { |
5678 if( get_cabac( &h->cabac, &h->cabac_state[14] ) == 0 ) { | 5678 if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) { |
5679 /* P-type */ | 5679 /* P-type */ |
5680 if( get_cabac( &h->cabac, &h->cabac_state[15] ) == 0 ) { | 5680 if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) { |
5681 /* P_L0_D16x16, P_8x8 */ | 5681 /* P_L0_D16x16, P_8x8 */ |
5682 return 3 * get_cabac( &h->cabac, &h->cabac_state[16] ); | 5682 return 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] ); |
5683 } else { | 5683 } else { |
5684 /* P_L0_D8x16, P_L0_D16x8 */ | 5684 /* P_L0_D8x16, P_L0_D16x8 */ |
5685 return 2 - get_cabac( &h->cabac, &h->cabac_state[17] ); | 5685 return 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] ); |
5686 } | 5686 } |
5687 } else { | 5687 } else { |
5688 return decode_cabac_intra_mb_type(h, 17, 0) + 5; | 5688 return decode_cabac_intra_mb_type(h, 17, 0) + 5; |
5689 } | 5689 } |
5690 } else if( h->slice_type == B_TYPE ) { | 5690 } else if( h->slice_type == B_TYPE ) { |
5696 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) ) | 5696 if( h->slice_table[mba_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mba_xy] ) ) |
5697 ctx++; | 5697 ctx++; |
5698 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) ) | 5698 if( h->slice_table[mbb_xy] == h->slice_num && !IS_DIRECT( s->current_picture.mb_type[mbb_xy] ) ) |
5699 ctx++; | 5699 ctx++; |
5700 | 5700 |
5701 if( !get_cabac( &h->cabac, &h->cabac_state[27+ctx] ) ) | 5701 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) ) |
5702 return 0; /* B_Direct_16x16 */ | 5702 return 0; /* B_Direct_16x16 */ |
5703 | 5703 |
5704 if( !get_cabac( &h->cabac, &h->cabac_state[27+3] ) ) { | 5704 if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) { |
5705 return 1 + get_cabac( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */ | 5705 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */ |
5706 } | 5706 } |
5707 | 5707 |
5708 bits = get_cabac( &h->cabac, &h->cabac_state[27+4] ) << 3; | 5708 bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3; |
5709 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 2; | 5709 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2; |
5710 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ) << 1; | 5710 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1; |
5711 bits|= get_cabac( &h->cabac, &h->cabac_state[27+5] ); | 5711 bits|= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); |
5712 if( bits < 8 ) | 5712 if( bits < 8 ) |
5713 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */ | 5713 return bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */ |
5714 else if( bits == 13 ) { | 5714 else if( bits == 13 ) { |
5715 return decode_cabac_intra_mb_type(h, 32, 0) + 23; | 5715 return decode_cabac_intra_mb_type(h, 32, 0) + 23; |
5716 } else if( bits == 14 ) | 5716 } else if( bits == 14 ) |
5717 return 11; /* B_L1_L0_8x16 */ | 5717 return 11; /* B_L1_L0_8x16 */ |
5718 else if( bits == 15 ) | 5718 else if( bits == 15 ) |
5719 return 22; /* B_8x8 */ | 5719 return 22; /* B_8x8 */ |
5720 | 5720 |
5721 bits= ( bits<<1 ) | get_cabac( &h->cabac, &h->cabac_state[27+5] ); | 5721 bits= ( bits<<1 ) | get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); |
5722 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */ | 5722 return bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */ |
5723 } else { | 5723 } else { |
5724 /* TODO SI/SP frames? */ | 5724 /* TODO SI/SP frames? */ |
5725 return -1; | 5725 return -1; |
5726 } | 5726 } |
5757 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )) | 5757 if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] )) |
5758 ctx++; | 5758 ctx++; |
5759 | 5759 |
5760 if( h->slice_type == B_TYPE ) | 5760 if( h->slice_type == B_TYPE ) |
5761 ctx += 13; | 5761 ctx += 13; |
5762 return get_cabac( &h->cabac, &h->cabac_state[11+ctx] ); | 5762 return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] ); |
5763 } | 5763 } |
5764 | 5764 |
5765 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) { | 5765 static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) { |
5766 int mode = 0; | 5766 int mode = 0; |
5767 | 5767 |
5789 ctx++; | 5789 ctx++; |
5790 | 5790 |
5791 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 ) | 5791 if( h->slice_table[mbb_xy] == h->slice_num && h->chroma_pred_mode_table[mbb_xy] != 0 ) |
5792 ctx++; | 5792 ctx++; |
5793 | 5793 |
5794 if( get_cabac( &h->cabac, &h->cabac_state[64+ctx] ) == 0 ) | 5794 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 ) |
5795 return 0; | 5795 return 0; |
5796 | 5796 |
5797 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 ) | 5797 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 ) |
5798 return 1; | 5798 return 1; |
5799 if( get_cabac( &h->cabac, &h->cabac_state[64+3] ) == 0 ) | 5799 if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 ) |
5800 return 2; | 5800 return 2; |
5801 else | 5801 else |
5802 return 3; | 5802 return 3; |
5803 } | 5803 } |
5804 | 5804 |
5871 cbp_b = (h-> top_cbp>>4)&0x03; | 5871 cbp_b = (h-> top_cbp>>4)&0x03; |
5872 | 5872 |
5873 ctx = 0; | 5873 ctx = 0; |
5874 if( cbp_a > 0 ) ctx++; | 5874 if( cbp_a > 0 ) ctx++; |
5875 if( cbp_b > 0 ) ctx += 2; | 5875 if( cbp_b > 0 ) ctx += 2; |
5876 if( get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 ) | 5876 if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 ) |
5877 return 0; | 5877 return 0; |
5878 | 5878 |
5879 ctx = 4; | 5879 ctx = 4; |
5880 if( cbp_a == 2 ) ctx++; | 5880 if( cbp_a == 2 ) ctx++; |
5881 if( cbp_b == 2 ) ctx += 2; | 5881 if( cbp_b == 2 ) ctx += 2; |
5882 return 1 + get_cabac( &h->cabac, &h->cabac_state[77 + ctx] ); | 5882 return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ); |
5883 } | 5883 } |
5884 static int decode_cabac_mb_dqp( H264Context *h) { | 5884 static int decode_cabac_mb_dqp( H264Context *h) { |
5885 MpegEncContext * const s = &h->s; | 5885 MpegEncContext * const s = &h->s; |
5886 int mbn_xy; | 5886 int mbn_xy; |
5887 int ctx = 0; | 5887 int ctx = 0; |
5893 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride; | 5893 mbn_xy = s->mb_width - 1 + (s->mb_y-1)*s->mb_stride; |
5894 | 5894 |
5895 if( h->last_qscale_diff != 0 ) | 5895 if( h->last_qscale_diff != 0 ) |
5896 ctx++; | 5896 ctx++; |
5897 | 5897 |
5898 while( get_cabac( &h->cabac, &h->cabac_state[60 + ctx] ) ) { | 5898 while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) { |
5899 if( ctx < 2 ) | 5899 if( ctx < 2 ) |
5900 ctx = 2; | 5900 ctx = 2; |
5901 else | 5901 else |
5902 ctx = 3; | 5902 ctx = 3; |
5903 val++; | 5903 val++; |
5935 type += get_cabac( &h->cabac, &h->cabac_state[39] ); | 5935 type += get_cabac( &h->cabac, &h->cabac_state[39] ); |
5936 return type; | 5936 return type; |
5937 } | 5937 } |
5938 | 5938 |
5939 static inline int decode_cabac_mb_transform_size( H264Context *h ) { | 5939 static inline int decode_cabac_mb_transform_size( H264Context *h ) { |
5940 return get_cabac( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ); | 5940 return get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ); |
5941 } | 5941 } |
5942 | 5942 |
5943 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) { | 5943 static int decode_cabac_mb_ref( H264Context *h, int list, int n ) { |
5944 int refa = h->ref_cache[list][scan8[n] - 1]; | 5944 int refa = h->ref_cache[list][scan8[n] - 1]; |
5945 int refb = h->ref_cache[list][scan8[n] - 8]; | 5945 int refb = h->ref_cache[list][scan8[n] - 8]; |
7408 else | 7408 else |
7409 h->cabac_state[i] = 2 * ( pre - 64 ) + 1; | 7409 h->cabac_state[i] = 2 * ( pre - 64 ) + 1; |
7410 } | 7410 } |
7411 | 7411 |
7412 for(;;){ | 7412 for(;;){ |
7413 //START_TIMER | |
7413 int ret = decode_mb_cabac(h); | 7414 int ret = decode_mb_cabac(h); |
7414 int eos; | 7415 int eos; |
7416 //STOP_TIMER("decode_mb_cabac") | |
7415 | 7417 |
7416 if(ret>=0) hl_decode_mb(h); | 7418 if(ret>=0) hl_decode_mb(h); |
7417 | 7419 |
7418 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ? | 7420 if( ret >= 0 && FRAME_MBAFF ) { //FIXME optimal? or let mb_decode decode 16x32 ? |
7419 s->mb_y++; | 7421 s->mb_y++; |