comparison cabac.h @ 3975:6cc9eb5ee5e3 libavcodec

x86 branchless cabac decoder slightly faster on P3
author michael
date Mon, 09 Oct 2006 20:51:33 +0000
parents 93746612bc78
children 27e90123b346
comparison
equal deleted inserted replaced
3974:93746612bc78 3975:6cc9eb5ee5e3
29 //#undef NDEBUG 29 //#undef NDEBUG
30 #include <assert.h> 30 #include <assert.h>
31 31
32 #define CABAC_BITS 16 32 #define CABAC_BITS 16
33 #define CABAC_MASK ((1<<CABAC_BITS)-1) 33 #define CABAC_MASK ((1<<CABAC_BITS)-1)
34 #define BRANCHLESS_CABAD 1
34 35
35 typedef struct CABACContext{ 36 typedef struct CABACContext{
36 int low; 37 int low;
37 int range; 38 int range;
38 int outstanding_count; 39 int outstanding_count;
372 #define LPS_STATE "12+2*66*4" 373 #define LPS_STATE "12+2*66*4"
373 #define MPS_STATE "12+2*66*4+2*65" 374 #define MPS_STATE "12+2*66*4+2*65"
374 #define BYTESTART "12+2*66*4+4*65" 375 #define BYTESTART "12+2*66*4+4*65"
375 #define BYTE "16+2*66*4+4*65" 376 #define BYTE "16+2*66*4+4*65"
376 #define BYTEEND "20+2*66*4+4*65" 377 #define BYTEEND "20+2*66*4+4*65"
377 378 #ifndef BRANCHLESS_CABAD
378 asm volatile( 379 asm volatile(
379 "movzbl (%1), %%eax \n\t" 380 "movzbl (%1), %%eax \n\t"
380 "movl "RANGE "(%2), %%ebx \n\t" 381 "movl "RANGE "(%2), %%ebx \n\t"
381 "movl "RANGE "(%2), %%edx \n\t" 382 "movl "RANGE "(%2), %%edx \n\t"
382 "shrl $23, %%ebx \n\t" 383 "shrl $23, %%ebx \n\t"
445 :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used 446 :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used
446 :"r"(state), "r"(c) 447 :"r"(state), "r"(c)
447 : "%ecx", "%ebx", "%edx", "%esi" 448 : "%ecx", "%ebx", "%edx", "%esi"
448 ); 449 );
449 #else 450 #else
451 asm volatile(
452 "movzbl (%1), %%eax \n\t"
453 "movl "RANGE "(%2), %%ebx \n\t"
454 "movl "RANGE "(%2), %%edx \n\t"
455 "shrl $23, %%ebx \n\t"
456 "leal "LPS_RANGE"(%2, %%eax, 4), %%esi \n\t"
457 "movzbl (%%ebx, %%esi), %%esi \n\t"
458 "shll $17, %%esi \n\t"
459 "movl "LOW "(%2), %%ebx \n\t"
460 //eax:state ebx:low, edx:range, esi:RangeLPS
461 "subl %%esi, %%edx \n\t"
462 "movl %%edx, %%ecx \n\t"
463 "subl %%ebx, %%edx \n\t"
464 "sarl $31, %%edx \n\t" //lps_mask
465 "subl %%ecx, %%esi \n\t" //RangeLPS - range
466 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
467 "addl %%ecx, %%esi \n\t" //new range
468 "andl %%edx, %%ecx \n\t"
469 "subl %%ecx, %%ebx \n\t"
470
471 //eax:state ebx:low edx:mask esi:range
472 "movl $-130, %%ecx \n\t"
473 "andl %%edx, %%ecx \n\t"
474 "addl %%eax, %%ecx \n\t"
475
476 "xorl %%edx, %%eax \n\t"
477 "movb "MPS_STATE"(%2, %%eax), %%cl \n\t"
478 "movb %%cl, (%1) \n\t"
479
480 "movl %%esi, %%edx \n\t"
481 //eax:bit ebx:low edx:range esi:range
482
483 "shr $19, %%esi \n\t"
484 "movb " MANGLE(ff_h264_norm_shift) "(%%esi), %%cl \n\t"
485 "shll %%cl, %%ebx \n\t"
486 "shll %%cl, %%edx \n\t"
487 "test %%bx, %%bx \n\t"
488 " jnz 1f \n\t"
489
490 "movl "BYTE "(%2), %%ecx \n\t"
491 "movzwl (%%ecx), %%esi \n\t"
492 "bswap %%esi \n\t"
493 "shrl $15, %%esi \n\t"
494 "subl $0xFFFF, %%esi \n\t"
495 "addl $2, %%ecx \n\t"
496 "movl %%ecx, "BYTE "(%2) \n\t"
497
498 "leal -1(%%ebx), %%ecx \n\t"
499 "xorl %%ebx, %%ecx \n\t"
500 "shrl $17, %%ecx \n\t"
501 "movb " MANGLE(ff_h264_norm_shift) "(%%ecx), %%cl \n\t"
502 "neg %%cl \n\t"
503 "add $7, %%cl \n\t"
504
505 "shll %%cl , %%esi \n\t"
506 "addl %%esi, %%ebx \n\t"
507 "1: \n\t"
508 "movl %%edx, "RANGE "(%2) \n\t"
509 "movl %%ebx, "LOW "(%2) \n\t"
510 "andl $1, %%eax \n\t"
511 :"=&a"(bit)
512 :"r"(state), "r"(c)
513 : "%ecx", "%ebx", "%edx", "%esi"
514 );
515 #endif
516 #else
450 int s = *state; 517 int s = *state;
451 int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1); 518 int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1);
452 int bit, lps_mask attribute_unused; 519 int bit, lps_mask attribute_unused;
453 520
454 c->range -= RangeLPS; 521 c->range -= RangeLPS;