Mercurial > libavcodec.hg
comparison cabac.h @ 4040:9eaea06c5ba6 libavcodec
optimize sign decoding code in decode_residual()
x86 is 4% faster on P3
C sign stuff + x86 code for everything else is also faster then before (sorry forgot to test pure C)
... and if i replace the second occurance of the sign decoding in decode_residual by the asm too then everything gets slower iam starting to think that it might be best to write the whole function in asm, playing this avoid random deoptimizations game with gcc is not fun at all
author | michael |
---|---|
date | Thu, 19 Oct 2006 01:19:03 +0000 |
parents | 866a83726985 |
children | 87694a28120c |
comparison
equal
deleted
inserted
replaced
4039:866a83726985 | 4040:9eaea06c5ba6 |
---|---|
573 static int get_cabac(CABACContext *c, uint8_t * const state){ | 573 static int get_cabac(CABACContext *c, uint8_t * const state){ |
574 return get_cabac_inline(c,state); | 574 return get_cabac_inline(c,state); |
575 } | 575 } |
576 | 576 |
577 static int get_cabac_bypass(CABACContext *c){ | 577 static int get_cabac_bypass(CABACContext *c){ |
578 #if 0 //not faster | |
579 int bit; | |
580 asm volatile( | |
581 "movl "RANGE "(%1), %%ebx \n\t" | |
582 "movl "LOW "(%1), %%eax \n\t" | |
583 "shl $17, %%ebx \n\t" | |
584 "add %%eax, %%eax \n\t" | |
585 "sub %%ebx, %%eax \n\t" | |
586 "cdq \n\t" | |
587 "and %%edx, %%ebx \n\t" | |
588 "add %%ebx, %%eax \n\t" | |
589 "test %%ax, %%ax \n\t" | |
590 " jnz 1f \n\t" | |
591 "movl "BYTE "(%1), %%ebx \n\t" | |
592 "subl $0xFFFF, %%eax \n\t" | |
593 "movzwl (%%ebx), %%ecx \n\t" | |
594 "bswap %%ecx \n\t" | |
595 "shrl $15, %%ecx \n\t" | |
596 "addl $2, %%ebx \n\t" | |
597 "addl %%ecx, %%eax \n\t" | |
598 "movl %%ebx, "BYTE "(%1) \n\t" | |
599 "1: \n\t" | |
600 "movl %%eax, "LOW "(%1) \n\t" | |
601 | |
602 :"=&d"(bit) | |
603 :"r"(c) | |
604 : "%eax", "%ebx", "%ecx", "memory" | |
605 ); | |
606 return bit+1; | |
607 #else | |
578 int range; | 608 int range; |
579 c->low += c->low; | 609 c->low += c->low; |
580 | 610 |
581 if(!(c->low & CABAC_MASK)) | 611 if(!(c->low & CABAC_MASK)) |
582 refill(c); | 612 refill(c); |
586 return 0; | 616 return 0; |
587 }else{ | 617 }else{ |
588 c->low -= range; | 618 c->low -= range; |
589 return 1; | 619 return 1; |
590 } | 620 } |
591 } | 621 #endif |
622 } | |
623 | |
624 | |
625 static always_inline int get_cabac_bypass_sign(CABACContext *c, int val){ | |
626 #ifdef ARCH_X86 | |
627 int bit; | |
628 asm volatile( | |
629 "movl "RANGE "(%1), %%ebx \n\t" | |
630 "movl "LOW "(%1), %%eax \n\t" | |
631 "shl $17, %%ebx \n\t" | |
632 "add %%eax, %%eax \n\t" | |
633 "sub %%ebx, %%eax \n\t" | |
634 "cdq \n\t" | |
635 "and %%edx, %%ebx \n\t" | |
636 "add %%ebx, %%eax \n\t" | |
637 "xor %%edx, %%ecx \n\t" | |
638 "sub %%edx, %%ecx \n\t" | |
639 "test %%ax, %%ax \n\t" | |
640 " jnz 1f \n\t" | |
641 "movl "BYTE "(%1), %%ebx \n\t" | |
642 "subl $0xFFFF, %%eax \n\t" | |
643 "movzwl (%%ebx), %%edx \n\t" | |
644 "bswap %%edx \n\t" | |
645 "shrl $15, %%edx \n\t" | |
646 "addl $2, %%ebx \n\t" | |
647 "addl %%edx, %%eax \n\t" | |
648 "movl %%ebx, "BYTE "(%1) \n\t" | |
649 "1: \n\t" | |
650 "movl %%eax, "LOW "(%1) \n\t" | |
651 | |
652 :"+c"(val) | |
653 :"r"(c) | |
654 : "%eax", "%ebx", "%edx", "memory" | |
655 ); | |
656 return val; | |
657 #else | |
658 int range, mask; | |
659 c->low += c->low; | |
660 | |
661 if(!(c->low & CABAC_MASK)) | |
662 refill(c); | |
663 | |
664 range= c->range<<17; | |
665 c->low -= range; | |
666 mask= c->low >> 31; | |
667 range &= mask; | |
668 c->low += range; | |
669 return (val^mask)-mask; | |
670 #endif | |
671 } | |
672 | |
592 //FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!) | 673 //FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!) |
593 //FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard) | 674 //FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard) |
594 #ifdef ARCH_X86 | 675 #ifdef ARCH_X86 |
595 static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){ | 676 static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){ |
596 void *end= significant_coeff_ctx_base + max_coeff - 1; | 677 void *end= significant_coeff_ctx_base + max_coeff - 1; |