Mercurial > libavcodec.hg
comparison cabac.h @ 4037:53be304c7f54 libavcodec
x86 asm version of the decode significance loop (not 8x8) of decode_residual() 5% faster decode_residual() on P3
author | michael |
---|---|
date | Tue, 17 Oct 2006 22:18:29 +0000 |
parents | b7f31a32bb30 |
children | 866a83726985 |
comparison
equal
deleted
inserted
replaced
4036:207c22206d53 | 4037:53be304c7f54 |
---|---|
586 }else{ | 586 }else{ |
587 c->low -= range; | 587 c->low -= range; |
588 return 1; | 588 return 1; |
589 } | 589 } |
590 } | 590 } |
591 //FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!) | |
592 //FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard) | |
593 #ifdef ARCH_X86 | |
594 static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){ | |
595 void *end= significant_coeff_ctx_base + max_coeff - 1; | |
596 int minusstart= -(int)significant_coeff_ctx_base; | |
597 int minusindex= -(int)index; | |
598 int coeff_count; | |
599 asm volatile( | |
600 "movl "RANGE "(%3), %%esi \n\t" | |
601 "movl "LOW "(%3), %%ebx \n\t" | |
602 | |
603 "2: \n\t" | |
604 | |
605 "movzbl (%1), %0 \n\t" | |
606 "movl %%esi, %%edx \n\t" | |
607 "andl $0xC0, %%esi \n\t" | |
608 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%esi, 2), %%esi\n\t" | |
609 /*eax:state ebx:low, edx:range, esi:RangeLPS*/ | |
610 "subl %%esi, %%edx \n\t" | |
611 | |
612 #if (defined CMOV_IS_FAST && __CPU__ >= 686) | |
613 "movl %%edx, %%ecx \n\t" | |
614 "shl $17, %%edx \n\t" | |
615 "cmpl %%ebx, %%edx \n\t" | |
616 "cmova %%ecx, %%esi \n\t" | |
617 "sbbl %%ecx, %%ecx \n\t" | |
618 "andl %%ecx, %%edx \n\t" | |
619 "subl %%edx, %%ebx \n\t" | |
620 "xorl %%ecx, %0 \n\t" | |
621 #else /* CMOV_IS_FAST */ | |
622 "movl %%edx, %%ecx \n\t" | |
623 "shl $17, %%edx \n\t" | |
624 "subl %%ebx, %%edx \n\t" | |
625 "sarl $31, %%edx \n\t" //lps_mask | |
626 "subl %%ecx, %%esi \n\t" //RangeLPS - range | |
627 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask | |
628 "addl %%ecx, %%esi \n\t" //new range | |
629 "shl $17, %%ecx \n\t" | |
630 "andl %%edx, %%ecx \n\t" | |
631 "subl %%ecx, %%ebx \n\t" | |
632 "xorl %%edx, %0 \n\t" | |
633 #endif /* CMOV_IS_FAST */ | |
634 | |
635 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" | |
636 "shll %%cl, %%esi \n\t" | |
637 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t" | |
638 "movb %%dl, (%1) \n\t" | |
639 "shll %%cl, %%ebx \n\t" | |
640 "test %%bx, %%bx \n\t" | |
641 " jnz 1f \n\t" | |
642 | |
643 "movl "BYTE "(%3), %%ecx \n\t" | |
644 "movzwl (%%ecx), %%edx \n\t" | |
645 "bswap %%edx \n\t" | |
646 "shrl $15, %%edx \n\t" | |
647 "subl $0xFFFF, %%edx \n\t" | |
648 "addl $2, %%ecx \n\t" | |
649 "movl %%ecx, "BYTE "(%3) \n\t" | |
650 | |
651 "leal -1(%%ebx), %%ecx \n\t" | |
652 "xorl %%ebx, %%ecx \n\t" | |
653 "shrl $15, %%ecx \n\t" | |
654 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t" | |
655 "neg %%ecx \n\t" | |
656 "add $7, %%ecx \n\t" | |
657 | |
658 "shll %%cl , %%edx \n\t" | |
659 "addl %%edx, %%ebx \n\t" | |
660 "1: \n\t" | |
661 | |
662 "test $1, %0 \n\t" | |
663 " jz 3f \n\t" | |
664 | |
665 "movl %2, %%eax \n\t" | |
666 "movl %4, %%ecx \n\t" | |
667 "addl %1, %%ecx \n\t" | |
668 "movl %%ecx, (%%eax) \n\t" | |
669 "addl $4, %%eax \n\t" | |
670 "movl %%eax, %2 \n\t" | |
671 | |
672 "movzbl 61(%1), %0 \n\t" | |
673 "movl %%esi, %%edx \n\t" | |
674 "andl $0xC0, %%esi \n\t" | |
675 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%esi, 2), %%esi\n\t" | |
676 /*eax:state ebx:low, edx:range, esi:RangeLPS*/ | |
677 "subl %%esi, %%edx \n\t" | |
678 | |
679 #if (defined CMOV_IS_FAST && __CPU__ >= 686) | |
680 "movl %%edx, %%ecx \n\t" | |
681 "shl $17, %%edx \n\t" | |
682 "cmpl %%ebx, %%edx \n\t" | |
683 "cmova %%ecx, %%esi \n\t" | |
684 "sbbl %%ecx, %%ecx \n\t" | |
685 "andl %%ecx, %%edx \n\t" | |
686 "subl %%edx, %%ebx \n\t" | |
687 "xorl %%ecx, %0 \n\t" | |
688 #else /* CMOV_IS_FAST */ | |
689 "movl %%edx, %%ecx \n\t" | |
690 "shl $17, %%edx \n\t" | |
691 "subl %%ebx, %%edx \n\t" | |
692 "sarl $31, %%edx \n\t" //lps_mask | |
693 "subl %%ecx, %%esi \n\t" //RangeLPS - range | |
694 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask | |
695 "addl %%ecx, %%esi \n\t" //new range | |
696 "shl $17, %%ecx \n\t" | |
697 "andl %%edx, %%ecx \n\t" | |
698 "subl %%ecx, %%ebx \n\t" | |
699 "xorl %%edx, %0 \n\t" | |
700 #endif /* CMOV_IS_FAST */ | |
701 | |
702 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" | |
703 "shll %%cl, %%esi \n\t" | |
704 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t" | |
705 "movb %%dl, 61(%1) \n\t" | |
706 "shll %%cl, %%ebx \n\t" | |
707 "test %%bx, %%bx \n\t" | |
708 " jnz 1f \n\t" | |
709 | |
710 "movl "BYTE "(%3), %%ecx \n\t" | |
711 "movzwl (%%ecx), %%edx \n\t" | |
712 "bswap %%edx \n\t" | |
713 "shrl $15, %%edx \n\t" | |
714 "subl $0xFFFF, %%edx \n\t" | |
715 "addl $2, %%ecx \n\t" | |
716 "movl %%ecx, "BYTE "(%3) \n\t" | |
717 | |
718 "leal -1(%%ebx), %%ecx \n\t" | |
719 "xorl %%ebx, %%ecx \n\t" | |
720 "shrl $15, %%ecx \n\t" | |
721 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t" | |
722 "neg %%ecx \n\t" | |
723 "add $7, %%ecx \n\t" | |
724 | |
725 "shll %%cl , %%edx \n\t" | |
726 "addl %%edx, %%ebx \n\t" | |
727 "1: \n\t" | |
728 | |
729 "test $1, %%eax \n\t" | |
730 " jnz 4f \n\t" | |
731 | |
732 "3: \n\t" | |
733 "addl $1, %1 \n\t" | |
734 "cmpl %5, %1 \n\t" | |
735 " jb 2b \n\t" | |
736 "movl %2, %%eax \n\t" | |
737 "movl %4, %%ecx \n\t" | |
738 "addl %1, %%ecx \n\t" | |
739 "movl %%ecx, (%%eax) \n\t" | |
740 "addl $4, %%eax \n\t" | |
741 "movl %%eax, %2 \n\t" | |
742 "4: \n\t" | |
743 "movl %2, %%eax \n\t" | |
744 "addl %6, %%eax \n\t" | |
745 "shr $2, %%eax \n\t" | |
746 | |
747 "movl %%esi, "RANGE "(%3) \n\t" | |
748 "movl %%ebx, "LOW "(%3) \n\t" | |
749 :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)\ | |
750 :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)\ | |
751 : "%ecx", "%ebx", "%edx", "%esi", "memory"\ | |
752 ); | |
753 return coeff_count; | |
754 } | |
755 #endif | |
591 | 756 |
592 /** | 757 /** |
593 * | 758 * |
594 * @return the number of bytes read or 0 if no end | 759 * @return the number of bytes read or 0 if no end |
595 */ | 760 */ |