comparison cabac.h @ 4037:53be304c7f54 libavcodec

x86 asm version of the decode significance loop (not 8x8) of decode_residual() 5% faster decode_residual() on P3
author michael
date Tue, 17 Oct 2006 22:18:29 +0000
parents b7f31a32bb30
children 866a83726985
comparison
equal deleted inserted replaced
4036:207c22206d53 4037:53be304c7f54
586 }else{ 586 }else{
587 c->low -= range; 587 c->low -= range;
588 return 1; 588 return 1;
589 } 589 }
590 } 590 }
591 //FIXME the x86 code from this file should be moved into i386/h264 or cabac something.c/h (note ill kill you if you move my code away from under my fingers before iam finished with it!)
592 //FIXME use some macros to avoid duplicatin get_cabac (cant be done yet as that would make optimization work hard)
593 #ifdef ARCH_X86
594 static int decode_significance_x86(CABACContext *c, int max_coeff, uint8_t *significant_coeff_ctx_base, int *index){
595 void *end= significant_coeff_ctx_base + max_coeff - 1;
596 int minusstart= -(int)significant_coeff_ctx_base;
597 int minusindex= -(int)index;
598 int coeff_count;
599 asm volatile(
600 "movl "RANGE "(%3), %%esi \n\t"
601 "movl "LOW "(%3), %%ebx \n\t"
602
603 "2: \n\t"
604
605 "movzbl (%1), %0 \n\t"
606 "movl %%esi, %%edx \n\t"
607 "andl $0xC0, %%esi \n\t"
608 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%esi, 2), %%esi\n\t"
609 /*eax:state ebx:low, edx:range, esi:RangeLPS*/
610 "subl %%esi, %%edx \n\t"
611
612 #if (defined CMOV_IS_FAST && __CPU__ >= 686)
613 "movl %%edx, %%ecx \n\t"
614 "shl $17, %%edx \n\t"
615 "cmpl %%ebx, %%edx \n\t"
616 "cmova %%ecx, %%esi \n\t"
617 "sbbl %%ecx, %%ecx \n\t"
618 "andl %%ecx, %%edx \n\t"
619 "subl %%edx, %%ebx \n\t"
620 "xorl %%ecx, %0 \n\t"
621 #else /* CMOV_IS_FAST */
622 "movl %%edx, %%ecx \n\t"
623 "shl $17, %%edx \n\t"
624 "subl %%ebx, %%edx \n\t"
625 "sarl $31, %%edx \n\t" //lps_mask
626 "subl %%ecx, %%esi \n\t" //RangeLPS - range
627 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
628 "addl %%ecx, %%esi \n\t" //new range
629 "shl $17, %%ecx \n\t"
630 "andl %%edx, %%ecx \n\t"
631 "subl %%ecx, %%ebx \n\t"
632 "xorl %%edx, %0 \n\t"
633 #endif /* CMOV_IS_FAST */
634
635 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
636 "shll %%cl, %%esi \n\t"
637 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t"
638 "movb %%dl, (%1) \n\t"
639 "shll %%cl, %%ebx \n\t"
640 "test %%bx, %%bx \n\t"
641 " jnz 1f \n\t"
642
643 "movl "BYTE "(%3), %%ecx \n\t"
644 "movzwl (%%ecx), %%edx \n\t"
645 "bswap %%edx \n\t"
646 "shrl $15, %%edx \n\t"
647 "subl $0xFFFF, %%edx \n\t"
648 "addl $2, %%ecx \n\t"
649 "movl %%ecx, "BYTE "(%3) \n\t"
650
651 "leal -1(%%ebx), %%ecx \n\t"
652 "xorl %%ebx, %%ecx \n\t"
653 "shrl $15, %%ecx \n\t"
654 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
655 "neg %%ecx \n\t"
656 "add $7, %%ecx \n\t"
657
658 "shll %%cl , %%edx \n\t"
659 "addl %%edx, %%ebx \n\t"
660 "1: \n\t"
661
662 "test $1, %0 \n\t"
663 " jz 3f \n\t"
664
665 "movl %2, %%eax \n\t"
666 "movl %4, %%ecx \n\t"
667 "addl %1, %%ecx \n\t"
668 "movl %%ecx, (%%eax) \n\t"
669 "addl $4, %%eax \n\t"
670 "movl %%eax, %2 \n\t"
671
672 "movzbl 61(%1), %0 \n\t"
673 "movl %%esi, %%edx \n\t"
674 "andl $0xC0, %%esi \n\t"
675 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%esi, 2), %%esi\n\t"
676 /*eax:state ebx:low, edx:range, esi:RangeLPS*/
677 "subl %%esi, %%edx \n\t"
678
679 #if (defined CMOV_IS_FAST && __CPU__ >= 686)
680 "movl %%edx, %%ecx \n\t"
681 "shl $17, %%edx \n\t"
682 "cmpl %%ebx, %%edx \n\t"
683 "cmova %%ecx, %%esi \n\t"
684 "sbbl %%ecx, %%ecx \n\t"
685 "andl %%ecx, %%edx \n\t"
686 "subl %%edx, %%ebx \n\t"
687 "xorl %%ecx, %0 \n\t"
688 #else /* CMOV_IS_FAST */
689 "movl %%edx, %%ecx \n\t"
690 "shl $17, %%edx \n\t"
691 "subl %%ebx, %%edx \n\t"
692 "sarl $31, %%edx \n\t" //lps_mask
693 "subl %%ecx, %%esi \n\t" //RangeLPS - range
694 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
695 "addl %%ecx, %%esi \n\t" //new range
696 "shl $17, %%ecx \n\t"
697 "andl %%edx, %%ecx \n\t"
698 "subl %%ecx, %%ebx \n\t"
699 "xorl %%edx, %0 \n\t"
700 #endif /* CMOV_IS_FAST */
701
702 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
703 "shll %%cl, %%esi \n\t"
704 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t"
705 "movb %%dl, 61(%1) \n\t"
706 "shll %%cl, %%ebx \n\t"
707 "test %%bx, %%bx \n\t"
708 " jnz 1f \n\t"
709
710 "movl "BYTE "(%3), %%ecx \n\t"
711 "movzwl (%%ecx), %%edx \n\t"
712 "bswap %%edx \n\t"
713 "shrl $15, %%edx \n\t"
714 "subl $0xFFFF, %%edx \n\t"
715 "addl $2, %%ecx \n\t"
716 "movl %%ecx, "BYTE "(%3) \n\t"
717
718 "leal -1(%%ebx), %%ecx \n\t"
719 "xorl %%ebx, %%ecx \n\t"
720 "shrl $15, %%ecx \n\t"
721 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
722 "neg %%ecx \n\t"
723 "add $7, %%ecx \n\t"
724
725 "shll %%cl , %%edx \n\t"
726 "addl %%edx, %%ebx \n\t"
727 "1: \n\t"
728
729 "test $1, %%eax \n\t"
730 " jnz 4f \n\t"
731
732 "3: \n\t"
733 "addl $1, %1 \n\t"
734 "cmpl %5, %1 \n\t"
735 " jb 2b \n\t"
736 "movl %2, %%eax \n\t"
737 "movl %4, %%ecx \n\t"
738 "addl %1, %%ecx \n\t"
739 "movl %%ecx, (%%eax) \n\t"
740 "addl $4, %%eax \n\t"
741 "movl %%eax, %2 \n\t"
742 "4: \n\t"
743 "movl %2, %%eax \n\t"
744 "addl %6, %%eax \n\t"
745 "shr $2, %%eax \n\t"
746
747 "movl %%esi, "RANGE "(%3) \n\t"
748 "movl %%ebx, "LOW "(%3) \n\t"
749 :"=&a"(coeff_count), "+r"(significant_coeff_ctx_base), "+m"(index)\
750 :"r"(c), "m"(minusstart), "m"(end), "m"(minusindex)\
751 : "%ecx", "%ebx", "%edx", "%esi", "memory"\
752 );
753 return coeff_count;
754 }
755 #endif
591 756
592 /** 757 /**
593 * 758 *
594 * @return the number of bytes read or 0 if no end 759 * @return the number of bytes read or 0 if no end
595 */ 760 */