Mercurial > libavcodec.hg
annotate cabac.h @ 3928:987fffdf6ae7 libavcodec
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
author | lorenm |
---|---|
date | Wed, 04 Oct 2006 07:16:10 +0000 |
parents | 0efda682253c |
children | 811a9b0d9f32 |
rev | line source |
---|---|
1287 | 1 /* |
2 * H.26L/H.264/AVC/JVT/14496-10/... encoder/decoder | |
3 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at> | |
4 * | |
5 * This library is free software; you can redistribute it and/or | |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
9 * | |
10 * This library is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 * Lesser General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU Lesser General Public | |
16 * License along with this library; if not, write to the Free Software | |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2967
diff
changeset
|
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
1287 | 18 * |
19 */ | |
2967 | 20 |
1287 | 21 /** |
22 * @file cabac.h | |
23 * Context Adaptive Binary Arithmetic Coder. | |
24 */ | |
25 | |
26 | |
3284
a224d9752912
don't force asserts in release builds. 2% faster h264.
lorenm
parents:
3036
diff
changeset
|
27 //#undef NDEBUG |
1287 | 28 #include <assert.h> |
29 | |
2323 | 30 #define CABAC_BITS 8 |
31 #define CABAC_MASK ((1<<CABAC_BITS)-1) | |
32 | |
1287 | 33 typedef struct CABACContext{ |
34 int low; | |
35 int range; | |
36 int outstanding_count; | |
37 #ifdef STRICT_LIMITS | |
38 int symCount; | |
39 #endif | |
2323 | 40 uint8_t lps_range[2*65][4]; ///< rangeTabLPS |
1287 | 41 uint8_t lps_state[2*64]; ///< transIdxLPS |
42 uint8_t mps_state[2*64]; ///< transIdxMPS | |
2024
f65d87bfdd5a
some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
1787
diff
changeset
|
43 const uint8_t *bytestream_start; |
f65d87bfdd5a
some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
1787
diff
changeset
|
44 const uint8_t *bytestream; |
2116 | 45 const uint8_t *bytestream_end; |
1287 | 46 PutBitContext pb; |
47 }CABACContext; | |
48 | |
1301 | 49 extern const uint8_t ff_h264_lps_range[64][4]; |
50 extern const uint8_t ff_h264_mps_state[64]; | |
51 extern const uint8_t ff_h264_lps_state[64]; | |
2323 | 52 extern const uint8_t ff_h264_norm_shift[256]; |
53 | |
1287 | 54 |
55 void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size); | |
2024
f65d87bfdd5a
some of the warning fixes by (Michael Roitzsch <mroi at users dot sourceforge dot net>)
michael
parents:
1787
diff
changeset
|
56 void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size); |
2967 | 57 void ff_init_cabac_states(CABACContext *c, uint8_t const (*lps_range)[4], |
1287 | 58 uint8_t const *mps_state, uint8_t const *lps_state, int state_count); |
59 | |
60 | |
61 static inline void put_cabac_bit(CABACContext *c, int b){ | |
2967 | 62 put_bits(&c->pb, 1, b); |
63 for(;c->outstanding_count; c->outstanding_count--){ | |
1287 | 64 put_bits(&c->pb, 1, 1-b); |
65 } | |
66 } | |
67 | |
68 static inline void renorm_cabac_encoder(CABACContext *c){ | |
69 while(c->range < 0x100){ | |
70 //FIXME optimize | |
71 if(c->low<0x100){ | |
72 put_cabac_bit(c, 0); | |
73 }else if(c->low<0x200){ | |
74 c->outstanding_count++; | |
75 c->low -= 0x100; | |
76 }else{ | |
77 put_cabac_bit(c, 1); | |
78 c->low -= 0x200; | |
79 } | |
2967 | 80 |
1287 | 81 c->range+= c->range; |
82 c->low += c->low; | |
83 } | |
84 } | |
85 | |
3928
987fffdf6ae7
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
lorenm
parents:
3642
diff
changeset
|
86 static void put_cabac(CABACContext *c, uint8_t * const state, int bit){ |
2323 | 87 int RangeLPS= c->lps_range[*state][c->range>>6]; |
2967 | 88 |
1287 | 89 if(bit == ((*state)&1)){ |
90 c->range -= RangeLPS; | |
91 *state= c->mps_state[*state]; | |
92 }else{ | |
93 c->low += c->range - RangeLPS; | |
94 c->range = RangeLPS; | |
95 *state= c->lps_state[*state]; | |
96 } | |
2967 | 97 |
1287 | 98 renorm_cabac_encoder(c); |
99 | |
100 #ifdef STRICT_LIMITS | |
101 c->symCount++; | |
102 #endif | |
103 } | |
104 | |
3928
987fffdf6ae7
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
lorenm
parents:
3642
diff
changeset
|
105 static void put_cabac_static(CABACContext *c, int RangeLPS, int bit){ |
1287 | 106 assert(c->range > RangeLPS); |
107 | |
108 if(!bit){ | |
109 c->range -= RangeLPS; | |
110 }else{ | |
111 c->low += c->range - RangeLPS; | |
112 c->range = RangeLPS; | |
113 } | |
114 | |
115 renorm_cabac_encoder(c); | |
116 | |
117 #ifdef STRICT_LIMITS | |
118 c->symCount++; | |
119 #endif | |
120 } | |
121 | |
1290 | 122 /** |
123 * @param bit 0 -> write zero bit, !=0 write one bit | |
124 */ | |
3928
987fffdf6ae7
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
lorenm
parents:
3642
diff
changeset
|
125 static void put_cabac_bypass(CABACContext *c, int bit){ |
1287 | 126 c->low += c->low; |
127 | |
128 if(bit){ | |
129 c->low += c->range; | |
130 } | |
131 //FIXME optimize | |
132 if(c->low<0x200){ | |
133 put_cabac_bit(c, 0); | |
134 }else if(c->low<0x400){ | |
135 c->outstanding_count++; | |
136 c->low -= 0x200; | |
137 }else{ | |
138 put_cabac_bit(c, 1); | |
139 c->low -= 0x400; | |
140 } | |
2967 | 141 |
1287 | 142 #ifdef STRICT_LIMITS |
143 c->symCount++; | |
144 #endif | |
145 } | |
146 | |
1300
e18667d1e94d
FFV1 codec (our very simple lossless intra only codec, compresses much better then huffyuv)
michaelni
parents:
1298
diff
changeset
|
147 /** |
e18667d1e94d
FFV1 codec (our very simple lossless intra only codec, compresses much better then huffyuv)
michaelni
parents:
1298
diff
changeset
|
148 * |
e18667d1e94d
FFV1 codec (our very simple lossless intra only codec, compresses much better then huffyuv)
michaelni
parents:
1298
diff
changeset
|
149 * @return the number of bytes written |
e18667d1e94d
FFV1 codec (our very simple lossless intra only codec, compresses much better then huffyuv)
michaelni
parents:
1298
diff
changeset
|
150 */ |
3928
987fffdf6ae7
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
lorenm
parents:
3642
diff
changeset
|
151 static int put_cabac_terminate(CABACContext *c, int bit){ |
1287 | 152 c->range -= 2; |
153 | |
154 if(!bit){ | |
155 renorm_cabac_encoder(c); | |
156 }else{ | |
157 c->low += c->range; | |
158 c->range= 2; | |
2967 | 159 |
1287 | 160 renorm_cabac_encoder(c); |
161 | |
162 assert(c->low <= 0x1FF); | |
163 put_cabac_bit(c, c->low>>9); | |
164 put_bits(&c->pb, 2, ((c->low>>7)&3)|1); | |
2967 | 165 |
1287 | 166 flush_put_bits(&c->pb); //FIXME FIXME FIXME XXX wrong |
167 } | |
2967 | 168 |
1287 | 169 #ifdef STRICT_LIMITS |
170 c->symCount++; | |
171 #endif | |
1300
e18667d1e94d
FFV1 codec (our very simple lossless intra only codec, compresses much better then huffyuv)
michaelni
parents:
1298
diff
changeset
|
172 |
1787 | 173 return (put_bits_count(&c->pb)+7)>>3; |
1287 | 174 } |
175 | |
1290 | 176 /** |
177 * put (truncated) unary binarization. | |
178 */ | |
3928
987fffdf6ae7
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
lorenm
parents:
3642
diff
changeset
|
179 static void put_cabac_u(CABACContext *c, uint8_t * state, int v, int max, int max_index, int truncated){ |
1290 | 180 int i; |
2967 | 181 |
1290 | 182 assert(v <= max); |
2967 | 183 |
1290 | 184 #if 1 |
185 for(i=0; i<v; i++){ | |
186 put_cabac(c, state, 1); | |
187 if(i < max_index) state++; | |
188 } | |
189 if(truncated==0 || v<max) | |
190 put_cabac(c, state, 0); | |
191 #else | |
192 if(v <= max_index){ | |
193 for(i=0; i<v; i++){ | |
194 put_cabac(c, state+i, 1); | |
195 } | |
196 if(truncated==0 || v<max) | |
197 put_cabac(c, state+i, 0); | |
198 }else{ | |
199 for(i=0; i<=max_index; i++){ | |
200 put_cabac(c, state+i, 1); | |
201 } | |
202 for(; i<v; i++){ | |
203 put_cabac(c, state+max_index, 1); | |
204 } | |
205 if(truncated==0 || v<max) | |
206 put_cabac(c, state+max_index, 0); | |
207 } | |
208 #endif | |
209 } | |
210 | |
211 /** | |
212 * put unary exp golomb k-th order binarization. | |
213 */ | |
3928
987fffdf6ae7
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
lorenm
parents:
3642
diff
changeset
|
214 static void put_cabac_ueg(CABACContext *c, uint8_t * state, int v, int max, int is_signed, int k, int max_index){ |
1290 | 215 int i; |
2967 | 216 |
1290 | 217 if(v==0) |
218 put_cabac(c, state, 0); | |
219 else{ | |
1298 | 220 const int sign= v < 0; |
2967 | 221 |
1298 | 222 if(is_signed) v= ABS(v); |
2967 | 223 |
1290 | 224 if(v<max){ |
225 for(i=0; i<v; i++){ | |
226 put_cabac(c, state, 1); | |
227 if(i < max_index) state++; | |
228 } | |
229 | |
230 put_cabac(c, state, 0); | |
231 }else{ | |
232 int m= 1<<k; | |
233 | |
234 for(i=0; i<max; i++){ | |
235 put_cabac(c, state, 1); | |
236 if(i < max_index) state++; | |
237 } | |
238 | |
239 v -= max; | |
240 while(v >= m){ //FIXME optimize | |
241 put_cabac_bypass(c, 1); | |
242 v-= m; | |
243 m+= m; | |
244 } | |
245 put_cabac_bypass(c, 0); | |
246 while(m>>=1){ | |
247 put_cabac_bypass(c, v&m); | |
248 } | |
249 } | |
250 | |
251 if(is_signed) | |
252 put_cabac_bypass(c, sign); | |
253 } | |
254 } | |
255 | |
2323 | 256 static void refill(CABACContext *c){ |
2736 | 257 if(c->bytestream <= c->bytestream_end) |
2323 | 258 #if CABAC_BITS == 16 |
259 c->low+= ((c->bytestream[0]<<9) + (c->bytestream[1])<<1); | |
260 #else | |
261 c->low+= c->bytestream[0]<<1; | |
262 #endif | |
263 c->low -= CABAC_MASK; | |
264 c->bytestream+= CABAC_BITS/8; | |
265 } | |
266 | |
2522
e25782262d7d
kill warnings patch by (M«©ns Rullg«©rd <mru inprovide com>)
michael
parents:
2323
diff
changeset
|
267 #if 0 /* all use commented */ |
2323 | 268 static void refill2(CABACContext *c){ |
269 int i, x; | |
270 | |
271 x= c->low ^ (c->low-1); | |
272 i= 8 - ff_h264_norm_shift[x>>(CABAC_BITS+1)]; | |
273 | |
274 x= -CABAC_MASK; | |
2967 | 275 |
2323 | 276 if(c->bytestream < c->bytestream_end) |
277 #if CABAC_BITS == 16 | |
278 x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1); | |
279 #else | |
280 x+= c->bytestream[0]<<1; | |
281 #endif | |
2967 | 282 |
2323 | 283 c->low += x<<i; |
284 c->bytestream+= CABAC_BITS/8; | |
285 } | |
2522
e25782262d7d
kill warnings patch by (M«©ns Rullg«©rd <mru inprovide com>)
michael
parents:
2323
diff
changeset
|
286 #endif |
2323 | 287 |
1287 | 288 static inline void renorm_cabac_decoder(CABACContext *c){ |
2323 | 289 while(c->range < (0x200 << CABAC_BITS)){ |
1287 | 290 c->range+= c->range; |
291 c->low+= c->low; | |
2323 | 292 if(!(c->low & CABAC_MASK)) |
293 refill(c); | |
1287 | 294 } |
295 } | |
296 | |
2323 | 297 static inline void renorm_cabac_decoder_once(CABACContext *c){ |
3642 | 298 int shift= (uint32_t)(c->range - (0x200 << CABAC_BITS))>>31; |
299 c->range<<= shift; | |
300 c->low <<= shift; | |
2323 | 301 if(!(c->low & CABAC_MASK)) |
302 refill(c); | |
303 } | |
304 | |
3928
987fffdf6ae7
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
lorenm
parents:
3642
diff
changeset
|
305 static int get_cabac(CABACContext *c, uint8_t * const state){ |
3642 | 306 //FIXME gcc generates duplicate load/stores for c->low and c->range |
307 int s = *state; | |
308 int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1); | |
2522
e25782262d7d
kill warnings patch by (M«©ns Rullg«©rd <mru inprovide com>)
michael
parents:
2323
diff
changeset
|
309 int bit, lps_mask attribute_unused; |
2967 | 310 |
1287 | 311 c->range -= RangeLPS; |
2323 | 312 #if 1 |
1287 | 313 if(c->low < c->range){ |
3642 | 314 bit= s&1; |
315 *state= c->mps_state[s]; | |
2323 | 316 renorm_cabac_decoder_once(c); |
1287 | 317 }else{ |
2323 | 318 // int shift= ff_h264_norm_shift[RangeLPS>>17]; |
3642 | 319 bit= (s&1)^1; |
1287 | 320 c->low -= c->range; |
3642 | 321 *state= c->lps_state[s]; |
1287 | 322 c->range = RangeLPS; |
2323 | 323 renorm_cabac_decoder(c); |
324 /* c->range = RangeLPS<<shift; | |
325 c->low <<= shift; | |
326 if(!(c->low & 0xFFFF)){ | |
327 refill2(c); | |
328 }*/ | |
1287 | 329 } |
2323 | 330 #else |
331 lps_mask= (c->range - c->low)>>31; | |
2967 | 332 |
2323 | 333 c->low -= c->range & lps_mask; |
334 c->range += (RangeLPS - c->range) & lps_mask; | |
2967 | 335 |
3642 | 336 bit= (s^lps_mask)&1; |
337 *state= c->mps_state[s - (128&lps_mask)]; | |
2967 | 338 |
2323 | 339 lps_mask= ff_h264_norm_shift[c->range>>(CABAC_BITS+2)]; |
340 c->range<<= lps_mask; | |
341 c->low <<= lps_mask; | |
342 if(!(c->low & CABAC_MASK)) | |
343 refill2(c); | |
344 #endif | |
345 | |
2967 | 346 return bit; |
1287 | 347 } |
348 | |
3928
987fffdf6ae7
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
lorenm
parents:
3642
diff
changeset
|
349 static int get_cabac_bypass(CABACContext *c){ |
1287 | 350 c->low += c->low; |
351 | |
2323 | 352 if(!(c->low & CABAC_MASK)) |
353 refill(c); | |
2967 | 354 |
1287 | 355 if(c->low < c->range){ |
356 return 0; | |
357 }else{ | |
358 c->low -= c->range; | |
359 return 1; | |
360 } | |
361 } | |
362 | |
1300
e18667d1e94d
FFV1 codec (our very simple lossless intra only codec, compresses much better then huffyuv)
michaelni
parents:
1298
diff
changeset
|
363 /** |
e18667d1e94d
FFV1 codec (our very simple lossless intra only codec, compresses much better then huffyuv)
michaelni
parents:
1298
diff
changeset
|
364 * |
e18667d1e94d
FFV1 codec (our very simple lossless intra only codec, compresses much better then huffyuv)
michaelni
parents:
1298
diff
changeset
|
365 * @return the number of bytes read or 0 if no end |
e18667d1e94d
FFV1 codec (our very simple lossless intra only codec, compresses much better then huffyuv)
michaelni
parents:
1298
diff
changeset
|
366 */ |
3928
987fffdf6ae7
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
lorenm
parents:
3642
diff
changeset
|
367 static int get_cabac_terminate(CABACContext *c){ |
2323 | 368 c->range -= 4<<CABAC_BITS; |
1287 | 369 if(c->low < c->range){ |
2323 | 370 renorm_cabac_decoder_once(c); |
1287 | 371 return 0; |
372 }else{ | |
1300
e18667d1e94d
FFV1 codec (our very simple lossless intra only codec, compresses much better then huffyuv)
michaelni
parents:
1298
diff
changeset
|
373 return c->bytestream - c->bytestream_start; |
2967 | 374 } |
1287 | 375 } |
376 | |
1290 | 377 /** |
378 * get (truncated) unnary binarization. | |
379 */ | |
3928
987fffdf6ae7
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
lorenm
parents:
3642
diff
changeset
|
380 static int get_cabac_u(CABACContext *c, uint8_t * state, int max, int max_index, int truncated){ |
1290 | 381 int i; |
2967 | 382 |
383 for(i=0; i<max; i++){ | |
1290 | 384 if(get_cabac(c, state)==0) |
385 return i; | |
2967 | 386 |
1290 | 387 if(i< max_index) state++; |
388 } | |
389 | |
390 return truncated ? max : -1; | |
391 } | |
392 | |
393 /** | |
394 * get unary exp golomb k-th order binarization. | |
395 */ | |
3928
987fffdf6ae7
don't try to inline cabac functions. gcc ignored the hint anyway, and forcing it would make h264 slower.
lorenm
parents:
3642
diff
changeset
|
396 static int get_cabac_ueg(CABACContext *c, uint8_t * state, int max, int is_signed, int k, int max_index){ |
1290 | 397 int i, v; |
398 int m= 1<<k; | |
2967 | 399 |
400 if(get_cabac(c, state)==0) | |
1290 | 401 return 0; |
2967 | 402 |
1290 | 403 if(0 < max_index) state++; |
2967 | 404 |
405 for(i=1; i<max; i++){ | |
1290 | 406 if(get_cabac(c, state)==0){ |
407 if(is_signed && get_cabac_bypass(c)){ | |
408 return -i; | |
409 }else | |
410 return i; | |
411 } | |
412 | |
413 if(i < max_index) state++; | |
414 } | |
2967 | 415 |
1290 | 416 while(get_cabac_bypass(c)){ |
417 i+= m; | |
418 m+= m; | |
419 } | |
2967 | 420 |
1290 | 421 v=0; |
422 while(m>>=1){ | |
423 v+= v + get_cabac_bypass(c); | |
424 } | |
425 i += v; | |
426 | |
427 if(is_signed && get_cabac_bypass(c)){ | |
428 return -i; | |
429 }else | |
430 return i; | |
431 } |