Mercurial > libavcodec.hg
comparison cabac.h @ 3975:6cc9eb5ee5e3 libavcodec
x86 branchless cabac decoder
slightly faster on P3
author | michael |
---|---|
date | Mon, 09 Oct 2006 20:51:33 +0000 |
parents | 93746612bc78 |
children | 27e90123b346 |
comparison
equal
deleted
inserted
replaced
3974:93746612bc78 | 3975:6cc9eb5ee5e3 |
---|---|
29 //#undef NDEBUG | 29 //#undef NDEBUG |
30 #include <assert.h> | 30 #include <assert.h> |
31 | 31 |
32 #define CABAC_BITS 16 | 32 #define CABAC_BITS 16 |
33 #define CABAC_MASK ((1<<CABAC_BITS)-1) | 33 #define CABAC_MASK ((1<<CABAC_BITS)-1) |
34 #define BRANCHLESS_CABAD 1 | |
34 | 35 |
35 typedef struct CABACContext{ | 36 typedef struct CABACContext{ |
36 int low; | 37 int low; |
37 int range; | 38 int range; |
38 int outstanding_count; | 39 int outstanding_count; |
372 #define LPS_STATE "12+2*66*4" | 373 #define LPS_STATE "12+2*66*4" |
373 #define MPS_STATE "12+2*66*4+2*65" | 374 #define MPS_STATE "12+2*66*4+2*65" |
374 #define BYTESTART "12+2*66*4+4*65" | 375 #define BYTESTART "12+2*66*4+4*65" |
375 #define BYTE "16+2*66*4+4*65" | 376 #define BYTE "16+2*66*4+4*65" |
376 #define BYTEEND "20+2*66*4+4*65" | 377 #define BYTEEND "20+2*66*4+4*65" |
377 | 378 #ifndef BRANCHLESS_CABAD |
378 asm volatile( | 379 asm volatile( |
379 "movzbl (%1), %%eax \n\t" | 380 "movzbl (%1), %%eax \n\t" |
380 "movl "RANGE "(%2), %%ebx \n\t" | 381 "movl "RANGE "(%2), %%ebx \n\t" |
381 "movl "RANGE "(%2), %%edx \n\t" | 382 "movl "RANGE "(%2), %%edx \n\t" |
382 "shrl $23, %%ebx \n\t" | 383 "shrl $23, %%ebx \n\t" |
445 :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used | 446 :"=&a"(bit) //FIXME this is fragile gcc either runs out of registers or misscompiles it (for example if "+a"(bit) or "+m"(*state) is used |
446 :"r"(state), "r"(c) | 447 :"r"(state), "r"(c) |
447 : "%ecx", "%ebx", "%edx", "%esi" | 448 : "%ecx", "%ebx", "%edx", "%esi" |
448 ); | 449 ); |
449 #else | 450 #else |
451 asm volatile( | |
452 "movzbl (%1), %%eax \n\t" | |
453 "movl "RANGE "(%2), %%ebx \n\t" | |
454 "movl "RANGE "(%2), %%edx \n\t" | |
455 "shrl $23, %%ebx \n\t" | |
456 "leal "LPS_RANGE"(%2, %%eax, 4), %%esi \n\t" | |
457 "movzbl (%%ebx, %%esi), %%esi \n\t" | |
458 "shll $17, %%esi \n\t" | |
459 "movl "LOW "(%2), %%ebx \n\t" | |
460 //eax:state ebx:low, edx:range, esi:RangeLPS | |
461 "subl %%esi, %%edx \n\t" | |
462 "movl %%edx, %%ecx \n\t" | |
463 "subl %%ebx, %%edx \n\t" | |
464 "sarl $31, %%edx \n\t" //lps_mask | |
465 "subl %%ecx, %%esi \n\t" //RangeLPS - range | |
466 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask | |
467 "addl %%ecx, %%esi \n\t" //new range | |
468 "andl %%edx, %%ecx \n\t" | |
469 "subl %%ecx, %%ebx \n\t" | |
470 | |
471 //eax:state ebx:low edx:mask esi:range | |
472 "movl $-130, %%ecx \n\t" | |
473 "andl %%edx, %%ecx \n\t" | |
474 "addl %%eax, %%ecx \n\t" | |
475 | |
476 "xorl %%edx, %%eax \n\t" | |
477 "movb "MPS_STATE"(%2, %%eax), %%cl \n\t" | |
478 "movb %%cl, (%1) \n\t" | |
479 | |
480 "movl %%esi, %%edx \n\t" | |
481 //eax:bit ebx:low edx:range esi:range | |
482 | |
483 "shr $19, %%esi \n\t" | |
484 "movb " MANGLE(ff_h264_norm_shift) "(%%esi), %%cl \n\t" | |
485 "shll %%cl, %%ebx \n\t" | |
486 "shll %%cl, %%edx \n\t" | |
487 "test %%bx, %%bx \n\t" | |
488 " jnz 1f \n\t" | |
489 | |
490 "movl "BYTE "(%2), %%ecx \n\t" | |
491 "movzwl (%%ecx), %%esi \n\t" | |
492 "bswap %%esi \n\t" | |
493 "shrl $15, %%esi \n\t" | |
494 "subl $0xFFFF, %%esi \n\t" | |
495 "addl $2, %%ecx \n\t" | |
496 "movl %%ecx, "BYTE "(%2) \n\t" | |
497 | |
498 "leal -1(%%ebx), %%ecx \n\t" | |
499 "xorl %%ebx, %%ecx \n\t" | |
500 "shrl $17, %%ecx \n\t" | |
501 "movb " MANGLE(ff_h264_norm_shift) "(%%ecx), %%cl \n\t" | |
502 "neg %%cl \n\t" | |
503 "add $7, %%cl \n\t" | |
504 | |
505 "shll %%cl , %%esi \n\t" | |
506 "addl %%esi, %%ebx \n\t" | |
507 "1: \n\t" | |
508 "movl %%edx, "RANGE "(%2) \n\t" | |
509 "movl %%ebx, "LOW "(%2) \n\t" | |
510 "andl $1, %%eax \n\t" | |
511 :"=&a"(bit) | |
512 :"r"(state), "r"(c) | |
513 : "%ecx", "%ebx", "%edx", "%esi" | |
514 ); | |
515 #endif | |
516 #else | |
450 int s = *state; | 517 int s = *state; |
451 int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1); | 518 int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1); |
452 int bit, lps_mask attribute_unused; | 519 int bit, lps_mask attribute_unused; |
453 | 520 |
454 c->range -= RangeLPS; | 521 c->range -= RangeLPS; |