comparison cabac.c @ 4024:d550343b5dac libavcodec

shift CABACContext.range right, this reduces the number of shifts needed in get_cabac() and is slightly faster on P3 (and should be much faster on P4 as the P4 except the more recent variants lacks an integer shifter and so shifts have ~10 times longer latency then simple operations like adds)
author michael
date Sun, 15 Oct 2006 20:40:50 +0000
parents b2582438effe
children 866a83726985
comparison
equal deleted inserted replaced
4023:508f089d0b28 4024:d550343b5dac
49 { 8, 9, 11, 13}, { 7, 9, 11, 12}, { 7, 9, 10, 12}, { 7, 8, 10, 11}, 49 { 8, 9, 11, 13}, { 7, 9, 11, 12}, { 7, 9, 10, 12}, { 7, 8, 10, 11},
50 { 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2}, 50 { 6, 8, 9, 11}, { 6, 7, 9, 10}, { 6, 7, 8, 9}, { 2, 2, 2, 2},
51 }; 51 };
52 52
53 uint8_t ff_h264_mlps_state[4*64]; 53 uint8_t ff_h264_mlps_state[4*64];
54 uint8_t ff_h264_lps_range[2*65][4]; 54 uint8_t ff_h264_lps_range[4][2*64];
55 uint8_t ff_h264_lps_state[2*64]; 55 uint8_t ff_h264_lps_state[2*64];
56 uint8_t ff_h264_mps_state[2*64]; 56 uint8_t ff_h264_mps_state[2*64];
57 57
58 static const uint8_t mps_state[64]= { 58 static const uint8_t mps_state[64]= {
59 1, 2, 3, 4, 5, 6, 7, 8, 59 1, 2, 3, 4, 5, 6, 7, 8,
74 24,25,26,26,27,27,28,29, 74 24,25,26,26,27,27,28,29,
75 29,30,30,30,31,32,32,33, 75 29,30,30,30,31,32,32,33,
76 33,33,34,34,35,35,35,36, 76 33,33,34,34,35,35,35,36,
77 36,36,37,37,37,38,38,63, 77 36,36,37,37,37,38,38,63,
78 }; 78 };
79 79 #if 0
80 const uint8_t ff_h264_norm_shift[128]= { 80 const uint8_t ff_h264_norm_shift_old[128]= {
81 7,6,5,5,4,4,4,4,3,3,3,3,3,3,3,3, 81 7,6,5,5,4,4,4,4,3,3,3,3,3,3,3,3,
82 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 82 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
83 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 83 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
84 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 84 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
85 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 85 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
86 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 86 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
87 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 87 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
88 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 88 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
89 };
90 #endif
91 const uint8_t ff_h264_norm_shift[512]= {
92 9,8,7,7,6,6,6,6,5,5,5,5,5,5,5,5,
93 4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,
94 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
95 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
96 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
97 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
98 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
99 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
100 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
101 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
102 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
103 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
104 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
105 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
106 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
107 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
108 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
109 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
110 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
111 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
89 }; 112 };
90 113
91 /** 114 /**
92 * 115 *
93 * @param buf_size size of buf in bits 116 * @param buf_size size of buf in bits
119 c->low+= (*c->bytestream++)<<10; 142 c->low+= (*c->bytestream++)<<10;
120 #else 143 #else
121 c->low = (*c->bytestream++)<<10; 144 c->low = (*c->bytestream++)<<10;
122 #endif 145 #endif
123 c->low+= ((*c->bytestream++)<<2) + 2; 146 c->low+= ((*c->bytestream++)<<2) + 2;
124 c->range= 0x1FE<<(CABAC_BITS + 1); 147 c->range= 0x1FE;
125 } 148 }
126 149
127 void ff_init_cabac_states(CABACContext *c){ 150 void ff_init_cabac_states(CABACContext *c){
128 int i, j; 151 int i, j;
129 152
130 for(i=0; i<64; i++){ 153 for(i=0; i<64; i++){
131 for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save 154 for(j=0; j<4; j++){ //FIXME check if this is worth the 1 shift we save
132 ff_h264_lps_range[2*i+0][j+4]= 155 ff_h264_lps_range[j][2*i+0]=
133 ff_h264_lps_range[2*i+1][j+4]= lps_range[i][j]; 156 ff_h264_lps_range[j][2*i+1]= lps_range[i][j];
134 } 157 }
135 158
136 ff_h264_mlps_state[128+2*i+0]= 159 ff_h264_mlps_state[128+2*i+0]=
137 ff_h264_mps_state[2*i+0]= 2*mps_state[i]+0; 160 ff_h264_mps_state[2*i+0]= 2*mps_state[i]+0;
138 ff_h264_mlps_state[128+2*i+1]= 161 ff_h264_mlps_state[128+2*i+1]=