Mercurial > libavcodec.hg
comparison cabac.h @ 4024:d550343b5dac libavcodec
shift CABACContext.range right, this reduces the number of shifts needed in get_cabac() and is slightly faster on P3 (and should be much faster on P4 as the P4 except the more recent variants lacks an integer shifter and so shifts have ~10 times longer latency then simple operations like adds)
author | michael |
---|---|
date | Sun, 15 Oct 2006 20:40:50 +0000 |
parents | b2582438effe |
children | 76ca770d2f6b |
comparison
equal
deleted
inserted
replaced
4023:508f089d0b28 | 4024:d550343b5dac |
---|---|
46 const uint8_t *bytestream_end; | 46 const uint8_t *bytestream_end; |
47 PutBitContext pb; | 47 PutBitContext pb; |
48 }CABACContext; | 48 }CABACContext; |
49 | 49 |
50 extern uint8_t ff_h264_mlps_state[4*64]; | 50 extern uint8_t ff_h264_mlps_state[4*64]; |
51 extern uint8_t ff_h264_lps_range[2*65][4]; ///< rangeTabLPS | 51 extern uint8_t ff_h264_lps_range[4][2*64]; ///< rangeTabLPS |
52 extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS | 52 extern uint8_t ff_h264_mps_state[2*64]; ///< transIdxMPS |
53 extern uint8_t ff_h264_lps_state[2*64]; ///< transIdxLPS | 53 extern uint8_t ff_h264_lps_state[2*64]; ///< transIdxLPS |
54 extern const uint8_t ff_h264_norm_shift[128]; | 54 extern const uint8_t ff_h264_norm_shift[512]; |
55 | 55 |
56 | 56 |
57 void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size); | 57 void ff_init_cabac_encoder(CABACContext *c, uint8_t *buf, int buf_size); |
58 void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size); | 58 void ff_init_cabac_decoder(CABACContext *c, const uint8_t *buf, int buf_size); |
59 void ff_init_cabac_states(CABACContext *c); | 59 void ff_init_cabac_states(CABACContext *c); |
83 c->low += c->low; | 83 c->low += c->low; |
84 } | 84 } |
85 } | 85 } |
86 | 86 |
87 static void put_cabac(CABACContext *c, uint8_t * const state, int bit){ | 87 static void put_cabac(CABACContext *c, uint8_t * const state, int bit){ |
88 int RangeLPS= ff_h264_lps_range[*state][c->range>>6]; | 88 int RangeLPS= ff_h264_lps_range[2*(c->range&0xC0) + *state]; |
89 | 89 |
90 if(bit == ((*state)&1)){ | 90 if(bit == ((*state)&1)){ |
91 c->range -= RangeLPS; | 91 c->range -= RangeLPS; |
92 *state= ff_h264_mps_state[*state]; | 92 *state= ff_h264_mps_state[*state]; |
93 }else{ | 93 }else{ |
266 | 266 |
267 static void refill2(CABACContext *c){ | 267 static void refill2(CABACContext *c){ |
268 int i, x; | 268 int i, x; |
269 | 269 |
270 x= c->low ^ (c->low-1); | 270 x= c->low ^ (c->low-1); |
271 i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS+1)]; | 271 i= 7 - ff_h264_norm_shift[x>>(CABAC_BITS-1)]; |
272 | 272 |
273 x= -CABAC_MASK; | 273 x= -CABAC_MASK; |
274 | 274 |
275 #if CABAC_BITS == 16 | 275 #if CABAC_BITS == 16 |
276 x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1); | 276 x+= (c->bytestream[0]<<9) + (c->bytestream[1]<<1); |
281 c->low += x<<i; | 281 c->low += x<<i; |
282 c->bytestream+= CABAC_BITS/8; | 282 c->bytestream+= CABAC_BITS/8; |
283 } | 283 } |
284 | 284 |
285 static inline void renorm_cabac_decoder(CABACContext *c){ | 285 static inline void renorm_cabac_decoder(CABACContext *c){ |
286 while(c->range < (0x200 << CABAC_BITS)){ | 286 while(c->range < 0x100){ |
287 c->range+= c->range; | 287 c->range+= c->range; |
288 c->low+= c->low; | 288 c->low+= c->low; |
289 if(!(c->low & CABAC_MASK)) | 289 if(!(c->low & CABAC_MASK)) |
290 refill(c); | 290 refill(c); |
291 } | 291 } |
295 #ifdef ARCH_X86_DISABLED | 295 #ifdef ARCH_X86_DISABLED |
296 int temp; | 296 int temp; |
297 #if 0 | 297 #if 0 |
298 //P3:683 athlon:475 | 298 //P3:683 athlon:475 |
299 asm( | 299 asm( |
300 "lea -0x2000000(%0), %2 \n\t" | 300 "lea -0x100(%0), %2 \n\t" |
301 "shr $31, %2 \n\t" //FIXME 31->63 for x86-64 | 301 "shr $31, %2 \n\t" //FIXME 31->63 for x86-64 |
302 "shl %%cl, %0 \n\t" | 302 "shl %%cl, %0 \n\t" |
303 "shl %%cl, %1 \n\t" | 303 "shl %%cl, %1 \n\t" |
304 : "+r"(c->range), "+r"(c->low), "+c"(temp) | 304 : "+r"(c->range), "+r"(c->low), "+c"(temp) |
305 ); | 305 ); |
306 #elif 0 | 306 #elif 0 |
307 //P3:680 athlon:474 | 307 //P3:680 athlon:474 |
308 asm( | 308 asm( |
309 "cmp $0x2000000, %0 \n\t" | 309 "cmp $0x100, %0 \n\t" |
310 "setb %%cl \n\t" //FIXME 31->63 for x86-64 | 310 "setb %%cl \n\t" //FIXME 31->63 for x86-64 |
311 "shl %%cl, %0 \n\t" | 311 "shl %%cl, %0 \n\t" |
312 "shl %%cl, %1 \n\t" | 312 "shl %%cl, %1 \n\t" |
313 : "+r"(c->range), "+r"(c->low), "+c"(temp) | 313 : "+r"(c->range), "+r"(c->low), "+c"(temp) |
314 ); | 314 ); |
315 #elif 1 | 315 #elif 1 |
316 int temp2; | 316 int temp2; |
317 //P3:665 athlon:517 | 317 //P3:665 athlon:517 |
318 asm( | 318 asm( |
319 "lea -0x2000000(%0), %%eax \n\t" | 319 "lea -0x100(%0), %%eax \n\t" |
320 "cdq \n\t" | 320 "cdq \n\t" |
321 "mov %0, %%eax \n\t" | 321 "mov %0, %%eax \n\t" |
322 "and %%edx, %0 \n\t" | 322 "and %%edx, %0 \n\t" |
323 "and %1, %%edx \n\t" | 323 "and %1, %%edx \n\t" |
324 "add %%eax, %0 \n\t" | 324 "add %%eax, %0 \n\t" |
327 ); | 327 ); |
328 #elif 0 | 328 #elif 0 |
329 int temp2; | 329 int temp2; |
330 //P3:673 athlon:509 | 330 //P3:673 athlon:509 |
331 asm( | 331 asm( |
332 "cmp $0x2000000, %0 \n\t" | 332 "cmp $0x100, %0 \n\t" |
333 "sbb %%edx, %%edx \n\t" | 333 "sbb %%edx, %%edx \n\t" |
334 "mov %0, %%eax \n\t" | 334 "mov %0, %%eax \n\t" |
335 "and %%edx, %0 \n\t" | 335 "and %%edx, %0 \n\t" |
336 "and %1, %%edx \n\t" | 336 "and %1, %%edx \n\t" |
337 "add %%eax, %0 \n\t" | 337 "add %%eax, %0 \n\t" |
340 ); | 340 ); |
341 #else | 341 #else |
342 int temp2; | 342 int temp2; |
343 //P3:677 athlon:511 | 343 //P3:677 athlon:511 |
344 asm( | 344 asm( |
345 "cmp $0x2000000, %0 \n\t" | 345 "cmp $0x100, %0 \n\t" |
346 "lea (%0, %0), %%eax \n\t" | 346 "lea (%0, %0), %%eax \n\t" |
347 "lea (%1, %1), %%edx \n\t" | 347 "lea (%1, %1), %%edx \n\t" |
348 "cmovb %%eax, %0 \n\t" | 348 "cmovb %%eax, %0 \n\t" |
349 "cmovb %%edx, %1 \n\t" | 349 "cmovb %%edx, %1 \n\t" |
350 : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) | 350 : "+r"(c->range), "+r"(c->low), "+a"(temp), "+d"(temp2) |
351 ); | 351 ); |
352 #endif | 352 #endif |
353 #else | 353 #else |
354 //P3:675 athlon:476 | 354 //P3:675 athlon:476 |
355 int shift= (uint32_t)(c->range - (0x200 << CABAC_BITS))>>31; | 355 int shift= (uint32_t)(c->range - 0x100)>>31; |
356 c->range<<= shift; | 356 c->range<<= shift; |
357 c->low <<= shift; | 357 c->low <<= shift; |
358 #endif | 358 #endif |
359 if(!(c->low & CABAC_MASK)) | 359 if(!(c->low & CABAC_MASK)) |
360 refill(c); | 360 refill(c); |
373 #ifndef BRANCHLESS_CABAC_DECODER | 373 #ifndef BRANCHLESS_CABAC_DECODER |
374 asm volatile( | 374 asm volatile( |
375 "movzbl (%1), %%eax \n\t" | 375 "movzbl (%1), %%eax \n\t" |
376 "movl "RANGE "(%2), %%ebx \n\t" | 376 "movl "RANGE "(%2), %%ebx \n\t" |
377 "movl "RANGE "(%2), %%edx \n\t" | 377 "movl "RANGE "(%2), %%edx \n\t" |
378 "shrl $23, %%ebx \n\t" | 378 "andl $0xC0, %%ebx \n\t" |
379 "movzbl "MANGLE(ff_h264_lps_range)"(%%ebx, %%eax, 4), %%esi\n\t" | 379 "movzbl "MANGLE(ff_h264_lps_range)"(%%eax, %%ebx, 2), %%esi\n\t" |
380 "shll $17, %%esi \n\t" | |
381 "movl "LOW "(%2), %%ebx \n\t" | 380 "movl "LOW "(%2), %%ebx \n\t" |
382 //eax:state ebx:low, edx:range, esi:RangeLPS | 381 //eax:state ebx:low, edx:range, esi:RangeLPS |
383 "subl %%esi, %%edx \n\t" | 382 "subl %%esi, %%edx \n\t" |
384 "cmpl %%edx, %%ebx \n\t" | 383 "movl %%edx, %%ecx \n\t" |
384 "shll $17, %%ecx \n\t" | |
385 "cmpl %%ecx, %%ebx \n\t" | |
385 " ja 1f \n\t" | 386 " ja 1f \n\t" |
386 | 387 |
387 #if 1 | 388 #if 1 |
388 //athlon:4067 P3:4110 | 389 //athlon:4067 P3:4110 |
389 "lea -0x2000000(%%edx), %%ecx \n\t" | 390 "lea -0x100(%%edx), %%ecx \n\t" |
390 "shr $31, %%ecx \n\t" | 391 "shr $31, %%ecx \n\t" |
391 "shl %%cl, %%edx \n\t" | 392 "shl %%cl, %%edx \n\t" |
392 "shl %%cl, %%ebx \n\t" | 393 "shl %%cl, %%ebx \n\t" |
393 #else | 394 #else |
394 //athlon:4057 P3:4130 | 395 //athlon:4057 P3:4130 |
395 "cmp $0x2000000, %%edx \n\t" //FIXME avoidable | 396 "cmp $0x100, %%edx \n\t" //FIXME avoidable |
396 "setb %%cl \n\t" | 397 "setb %%cl \n\t" |
397 "shl %%cl, %%edx \n\t" | 398 "shl %%cl, %%edx \n\t" |
398 "shl %%cl, %%ebx \n\t" | 399 "shl %%cl, %%ebx \n\t" |
399 #endif | 400 #endif |
400 "movzbl "MANGLE(ff_h264_mps_state)"(%%eax), %%ecx \n\t" | 401 "movzbl "MANGLE(ff_h264_mps_state)"(%%eax), %%ecx \n\t" |
411 "addl %%ecx, %%ebx \n\t" | 412 "addl %%ecx, %%ebx \n\t" |
412 "movl %%esi, "BYTE "(%2) \n\t" | 413 "movl %%esi, "BYTE "(%2) \n\t" |
413 "jmp 2f \n\t" | 414 "jmp 2f \n\t" |
414 "1: \n\t" | 415 "1: \n\t" |
415 //eax:state ebx:low, edx:range, esi:RangeLPS | 416 //eax:state ebx:low, edx:range, esi:RangeLPS |
416 "subl %%edx, %%ebx \n\t" | 417 "subl %%ecx, %%ebx \n\t" |
417 "movl %%esi, %%edx \n\t" | 418 "movl %%esi, %%edx \n\t" |
418 "shr $19, %%esi \n\t" | |
419 "movzbl "MANGLE(ff_h264_lps_state)"(%%eax), %%ecx \n\t" | |
420 "movb %%cl, (%1) \n\t" | |
421 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" | 419 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" |
422 "shll %%cl, %%ebx \n\t" | 420 "shll %%cl, %%ebx \n\t" |
423 "shll %%cl, %%edx \n\t" | 421 "shll %%cl, %%edx \n\t" |
422 "movzbl "MANGLE(ff_h264_lps_state)"(%%eax), %%ecx \n\t" | |
423 "movb %%cl, (%1) \n\t" | |
424 "addl $1, %%eax \n\t" | 424 "addl $1, %%eax \n\t" |
425 "test %%bx, %%bx \n\t" | 425 "test %%bx, %%bx \n\t" |
426 " jnz 2f \n\t" | 426 " jnz 2f \n\t" |
427 | 427 |
428 "movl "BYTE "(%2), %%ecx \n\t" | 428 "movl "BYTE "(%2), %%ecx \n\t" |
433 "addl $2, %%ecx \n\t" | 433 "addl $2, %%ecx \n\t" |
434 "movl %%ecx, "BYTE "(%2) \n\t" | 434 "movl %%ecx, "BYTE "(%2) \n\t" |
435 | 435 |
436 "leal -1(%%ebx), %%ecx \n\t" | 436 "leal -1(%%ebx), %%ecx \n\t" |
437 "xorl %%ebx, %%ecx \n\t" | 437 "xorl %%ebx, %%ecx \n\t" |
438 "shrl $17, %%ecx \n\t" | 438 "shrl $15, %%ecx \n\t" |
439 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t" | 439 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t" |
440 "neg %%ecx \n\t" | 440 "neg %%ecx \n\t" |
441 "add $7, %%ecx \n\t" | 441 "add $7, %%ecx \n\t" |
442 | 442 |
443 "shll %%cl , %%esi \n\t" | 443 "shll %%cl , %%esi \n\t" |
453 #else /* BRANCHLESS_CABAC_DECODER */ | 453 #else /* BRANCHLESS_CABAC_DECODER */ |
454 asm volatile( | 454 asm volatile( |
455 "movzbl (%1), %%eax \n\t" | 455 "movzbl (%1), %%eax \n\t" |
456 "movl "RANGE "(%2), %%ebx \n\t" | 456 "movl "RANGE "(%2), %%ebx \n\t" |
457 "movl "RANGE "(%2), %%edx \n\t" | 457 "movl "RANGE "(%2), %%edx \n\t" |
458 "shrl $23, %%ebx \n\t" | 458 "andl $0xC0, %%ebx \n\t" |
459 "movzbl "MANGLE(ff_h264_lps_range)"(%%ebx, %%eax, 4), %%esi\n\t" | 459 "movzbl "MANGLE(ff_h264_lps_range)"(%%eax, %%ebx, 2), %%esi\n\t" |
460 "shll $17, %%esi \n\t" | |
461 "movl "LOW "(%2), %%ebx \n\t" | 460 "movl "LOW "(%2), %%ebx \n\t" |
462 //eax:state ebx:low, edx:range, esi:RangeLPS | 461 //eax:state ebx:low, edx:range, esi:RangeLPS |
463 "subl %%esi, %%edx \n\t" | 462 "subl %%esi, %%edx \n\t" |
464 #ifdef CMOV_IS_FAST //FIXME actually define this somewhere | 463 #ifdef CMOV_IS_FAST |
464 "movl %%edx, %%ecx \n\t" | |
465 "shl $17, %%edx \n\t" | |
465 "cmpl %%ebx, %%edx \n\t" | 466 "cmpl %%ebx, %%edx \n\t" |
466 "cmova %%edx, %%esi \n\t" | 467 "cmova %%ecx, %%esi \n\t" |
467 "sbbl %%ecx, %%ecx \n\t" | 468 "sbbl %%ecx, %%ecx \n\t" |
468 "andl %%ecx, %%edx \n\t" | 469 "andl %%ecx, %%edx \n\t" |
469 "subl %%edx, %%ebx \n\t" | 470 "subl %%edx, %%ebx \n\t" |
470 "xorl %%ecx, %%eax \n\t" | 471 "xorl %%ecx, %%eax \n\t" |
471 #else /* CMOV_IS_FAST */ | 472 #else /* CMOV_IS_FAST */ |
473 FIXTHIS | |
472 "movl %%edx, %%ecx \n\t" | 474 "movl %%edx, %%ecx \n\t" |
473 "subl %%ebx, %%edx \n\t" | 475 "subl %%ebx, %%edx \n\t" |
474 "sarl $31, %%edx \n\t" //lps_mask | 476 "sarl $31, %%edx \n\t" //lps_mask |
475 "subl %%ecx, %%esi \n\t" //RangeLPS - range | 477 "subl %%ecx, %%esi \n\t" //RangeLPS - range |
476 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask | 478 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask |
479 "subl %%ecx, %%ebx \n\t" | 481 "subl %%ecx, %%ebx \n\t" |
480 "xorl %%edx, %%eax \n\t" | 482 "xorl %%edx, %%eax \n\t" |
481 #endif /* CMOV_IS_FAST */ | 483 #endif /* CMOV_IS_FAST */ |
482 | 484 |
483 //eax:state ebx:low edx:mask esi:range | 485 //eax:state ebx:low edx:mask esi:range |
484 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%%eax), %%ecx \n\t" | 486 |
485 "movb %%cl, (%1) \n\t" | 487 //eax:bit ebx:low esi:range |
486 | 488 |
487 "movl %%esi, %%edx \n\t" | |
488 //eax:bit ebx:low edx:range esi:range | |
489 | |
490 "shr $19, %%esi \n\t" | |
491 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" | 489 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" |
492 "shll %%cl, %%edx \n\t" | 490 "shll %%cl, %%esi \n\t" |
493 "movl %%edx, "RANGE "(%2) \n\t" | 491 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%%eax), %%edx \n\t" |
492 "movb %%dl, (%1) \n\t" | |
493 "movl %%esi, "RANGE "(%2) \n\t" | |
494 "shll %%cl, %%ebx \n\t" | 494 "shll %%cl, %%ebx \n\t" |
495 "movl %%ebx, "LOW "(%2) \n\t" | 495 "movl %%ebx, "LOW "(%2) \n\t" |
496 "test %%bx, %%bx \n\t" | 496 "test %%bx, %%bx \n\t" |
497 " jnz 1f \n\t" | 497 " jnz 1f \n\t" |
498 | 498 |
504 "addl $2, %%ecx \n\t" | 504 "addl $2, %%ecx \n\t" |
505 "movl %%ecx, "BYTE "(%2) \n\t" | 505 "movl %%ecx, "BYTE "(%2) \n\t" |
506 | 506 |
507 "leal -1(%%ebx), %%ecx \n\t" | 507 "leal -1(%%ebx), %%ecx \n\t" |
508 "xorl %%ebx, %%ecx \n\t" | 508 "xorl %%ebx, %%ecx \n\t" |
509 "shrl $17, %%ecx \n\t" | 509 "shrl $15, %%ecx \n\t" |
510 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t" | 510 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t" |
511 "neg %%ecx \n\t" | 511 "neg %%ecx \n\t" |
512 "add $7, %%ecx \n\t" | 512 "add $7, %%ecx \n\t" |
513 | 513 |
514 "shll %%cl , %%esi \n\t" | 514 "shll %%cl , %%esi \n\t" |
521 ); | 521 ); |
522 bit&=1; | 522 bit&=1; |
523 #endif /* BRANCHLESS_CABAC_DECODER */ | 523 #endif /* BRANCHLESS_CABAC_DECODER */ |
524 #else /* ARCH_X86 */ | 524 #else /* ARCH_X86 */ |
525 int s = *state; | 525 int s = *state; |
526 int RangeLPS= ff_h264_lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1); | 526 int RangeLPS= ff_h264_lps_range[0][2*(c->range&0xC0) + s]; |
527 int bit, lps_mask attribute_unused; | 527 int bit, lps_mask attribute_unused; |
528 | 528 |
529 c->range -= RangeLPS; | 529 c->range -= RangeLPS; |
530 #ifndef BRANCHLESS_CABAC_DECODER | 530 #ifndef BRANCHLESS_CABAC_DECODER |
531 if(c->low < c->range){ | 531 if(c->low < (c->range<<17)){ |
532 bit= s&1; | 532 bit= s&1; |
533 *state= ff_h264_mps_state[s]; | 533 *state= ff_h264_mps_state[s]; |
534 renorm_cabac_decoder_once(c); | 534 renorm_cabac_decoder_once(c); |
535 }else{ | 535 }else{ |
536 bit= ff_h264_norm_shift[RangeLPS>>19]; | 536 bit= ff_h264_norm_shift[RangeLPS]; |
537 c->low -= c->range; | 537 c->low -= (c->range<<17); |
538 *state= ff_h264_lps_state[s]; | 538 *state= ff_h264_lps_state[s]; |
539 c->range = RangeLPS<<bit; | 539 c->range = RangeLPS<<bit; |
540 c->low <<= bit; | 540 c->low <<= bit; |
541 bit= (s&1)^1; | 541 bit= (s&1)^1; |
542 | 542 |
543 if(!(c->low & 0xFFFF)){ | 543 if(!(c->low & 0xFFFF)){ |
544 refill2(c); | 544 refill2(c); |
545 } | 545 } |
546 } | 546 } |
547 #else /* BRANCHLESS_CABAC_DECODER */ | 547 #else /* BRANCHLESS_CABAC_DECODER */ |
548 lps_mask= (c->range - c->low)>>31; | 548 lps_mask= ((c->range<<17) - c->low)>>31; |
549 | 549 |
550 c->low -= c->range & lps_mask; | 550 c->low -= (c->range<<17) & lps_mask; |
551 c->range += (RangeLPS - c->range) & lps_mask; | 551 c->range += (RangeLPS - c->range) & lps_mask; |
552 | 552 |
553 s^=lps_mask; | 553 s^=lps_mask; |
554 *state= (ff_h264_mlps_state+128)[s]; | 554 *state= (ff_h264_mlps_state+128)[s]; |
555 bit= s&1; | 555 bit= s&1; |
556 | 556 |
557 lps_mask= ff_h264_norm_shift[c->range>>(CABAC_BITS+3)]; | 557 lps_mask= ff_h264_norm_shift[c->range]; |
558 c->range<<= lps_mask; | 558 c->range<<= lps_mask; |
559 c->low <<= lps_mask; | 559 c->low <<= lps_mask; |
560 if(!(c->low & CABAC_MASK)) | 560 if(!(c->low & CABAC_MASK)) |
561 refill2(c); | 561 refill2(c); |
562 #endif /* BRANCHLESS_CABAC_DECODER */ | 562 #endif /* BRANCHLESS_CABAC_DECODER */ |
571 static int get_cabac(CABACContext *c, uint8_t * const state){ | 571 static int get_cabac(CABACContext *c, uint8_t * const state){ |
572 return get_cabac_inline(c,state); | 572 return get_cabac_inline(c,state); |
573 } | 573 } |
574 | 574 |
575 static int get_cabac_bypass(CABACContext *c){ | 575 static int get_cabac_bypass(CABACContext *c){ |
576 int range; | |
576 c->low += c->low; | 577 c->low += c->low; |
577 | 578 |
578 if(!(c->low & CABAC_MASK)) | 579 if(!(c->low & CABAC_MASK)) |
579 refill(c); | 580 refill(c); |
580 | 581 |
581 if(c->low < c->range){ | 582 range= c->range<<17; |
583 if(c->low < range){ | |
582 return 0; | 584 return 0; |
583 }else{ | 585 }else{ |
584 c->low -= c->range; | 586 c->low -= range; |
585 return 1; | 587 return 1; |
586 } | 588 } |
587 } | 589 } |
588 | 590 |
589 /** | 591 /** |
590 * | 592 * |
591 * @return the number of bytes read or 0 if no end | 593 * @return the number of bytes read or 0 if no end |
592 */ | 594 */ |
593 static int get_cabac_terminate(CABACContext *c){ | 595 static int get_cabac_terminate(CABACContext *c){ |
594 c->range -= 4<<CABAC_BITS; | 596 c->range -= 2; |
595 if(c->low < c->range){ | 597 if(c->low < c->range<<17){ |
596 renorm_cabac_decoder_once(c); | 598 renorm_cabac_decoder_once(c); |
597 return 0; | 599 return 0; |
598 }else{ | 600 }else{ |
599 return c->bytestream - c->bytestream_start; | 601 return c->bytestream - c->bytestream_start; |
600 } | 602 } |