comparison cabac.h @ 4046:8bbc695c9603 libavcodec

factorize get_cabac asm (0.5% slower but its much cleaner)
author michael
date Fri, 20 Oct 2006 00:35:54 +0000
parents 5ccdefd60f61
children 61a4e7218a45
comparison
equal deleted inserted replaced
4045:1bf7bc44430f 4046:8bbc695c9603
450 :"r"(state), "r"(c) 450 :"r"(state), "r"(c)
451 : "%ecx", "%ebx", "%edx", "%esi", "memory" 451 : "%ecx", "%ebx", "%edx", "%esi", "memory"
452 ); 452 );
453 bit&=1; 453 bit&=1;
454 #else /* BRANCHLESS_CABAC_DECODER */ 454 #else /* BRANCHLESS_CABAC_DECODER */
455
456
457 #if (defined CMOV_IS_FAST && __CPU__ >= 686)
458 #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
459 "mov "tmp" , %%ecx \n\t"\
460 "shl $17 , "tmp" \n\t"\
461 "cmp "low" , "tmp" \n\t"\
462 "cmova %%ecx , "range" \n\t"\
463 "sbb %%ecx , %%ecx \n\t"\
464 "and %%ecx , "tmp" \n\t"\
465 "sub "tmp" , "low" \n\t"\
466 "xor %%ecx , "ret" \n\t"
467 #else /* CMOV_IS_FAST */
468 #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
469 "mov "tmp" , %%ecx \n\t"\
470 "shl $17 , "tmp" \n\t"\
471 "sub "low" , "tmp" \n\t"\
472 "sar $31 , "tmp" \n\t" /*lps_mask*/\
473 "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\
474 "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\
475 "add %%ecx , "range" \n\t" /*new range*/\
476 "shl $17 , %%ecx \n\t"\
477 "and "tmp" , %%ecx \n\t"\
478 "sub %%ecx , "low" \n\t"\
479 "xor "tmp" , "ret" \n\t"
480 #endif /* CMOV_IS_FAST */
481
482
483 #define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
484 "movzbl "statep" , "ret" \n\t"\
485 "mov "range" , "tmp" \n\t"\
486 "and $0xC0 , "range" \n\t"\
487 "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\
488 "sub "range" , "tmp" \n\t"\
489 BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\
490 "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\
491 "shl %%cl , "range" \n\t"\
492 "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\
493 "mov "tmpbyte" , "statep" \n\t"\
494 "shl %%cl , "low" \n\t"\
495 "test "lowword" , "lowword" \n\t"\
496 " jnz 1f \n\t"\
497 "mov "BYTE"("cabac"), %%ecx \n\t"\
498 "movzwl (%%ecx) , "tmp" \n\t"\
499 "bswap "tmp" \n\t"\
500 "shr $15 , "tmp" \n\t"\
501 "sub $0xFFFF , "tmp" \n\t"\
502 "add $2 , %%ecx \n\t"\
503 "mov %%ecx , "BYTE "("cabac") \n\t"\
504 "lea -1("low") , %%ecx \n\t"\
505 "xor "low" , %%ecx \n\t"\
506 "shr $15 , %%ecx \n\t"\
507 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\
508 "neg %%ecx \n\t"\
509 "add $7 , %%ecx \n\t"\
510 "shl %%cl , "tmp" \n\t"\
511 "add "tmp" , "low" \n\t"\
512 "1: \n\t"
513
455 asm volatile( 514 asm volatile(
456 "movzbl (%1), %0 \n\t" 515 "movl "RANGE "(%2), %%esi \n\t"
457 "movl "RANGE "(%2), %%ebx \n\t"
458 "movl "RANGE "(%2), %%edx \n\t"
459 "andl $0xC0, %%ebx \n\t"
460 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t"
461 "movl "LOW "(%2), %%ebx \n\t" 516 "movl "LOW "(%2), %%ebx \n\t"
462 //eax:state ebx:low, edx:range, esi:RangeLPS 517 BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
463 "subl %%esi, %%edx \n\t"
464 #if (defined CMOV_IS_FAST && __CPU__ >= 686)
465 "movl %%edx, %%ecx \n\t"
466 "shl $17, %%edx \n\t"
467 "cmpl %%ebx, %%edx \n\t"
468 "cmova %%ecx, %%esi \n\t"
469 "sbbl %%ecx, %%ecx \n\t"
470 "andl %%ecx, %%edx \n\t"
471 "subl %%edx, %%ebx \n\t"
472 "xorl %%ecx, %0 \n\t"
473 #else /* CMOV_IS_FAST */
474 "movl %%edx, %%ecx \n\t"
475 "shl $17, %%edx \n\t"
476 "subl %%ebx, %%edx \n\t"
477 "sarl $31, %%edx \n\t" //lps_mask
478 "subl %%ecx, %%esi \n\t" //RangeLPS - range
479 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
480 "addl %%ecx, %%esi \n\t" //new range
481 "shl $17, %%ecx \n\t"
482 "andl %%edx, %%ecx \n\t"
483 "subl %%ecx, %%ebx \n\t"
484 "xorl %%edx, %0 \n\t"
485 #endif /* CMOV_IS_FAST */
486
487 //eax:state ebx:low edx:mask esi:range
488
489 //eax:bit ebx:low esi:range
490
491 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
492 "shll %%cl, %%esi \n\t"
493 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t"
494 "movb %%dl, (%1) \n\t"
495 "movl %%esi, "RANGE "(%2) \n\t" 518 "movl %%esi, "RANGE "(%2) \n\t"
496 "shll %%cl, %%ebx \n\t"
497 "movl %%ebx, "LOW "(%2) \n\t" 519 "movl %%ebx, "LOW "(%2) \n\t"
498 "test %%bx, %%bx \n\t" 520
499 " jnz 1f \n\t"
500
501 "movl "BYTE "(%2), %%ecx \n\t"
502 "movzwl (%%ecx), %%esi \n\t"
503 "bswap %%esi \n\t"
504 "shrl $15, %%esi \n\t"
505 "subl $0xFFFF, %%esi \n\t"
506 "addl $2, %%ecx \n\t"
507 "movl %%ecx, "BYTE "(%2) \n\t"
508
509 "leal -1(%%ebx), %%ecx \n\t"
510 "xorl %%ebx, %%ecx \n\t"
511 "shrl $15, %%ecx \n\t"
512 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
513 "neg %%ecx \n\t"
514 "add $7, %%ecx \n\t"
515
516 "shll %%cl , %%esi \n\t"
517 "addl %%esi, %%ebx \n\t"
518 "movl %%ebx, "LOW "(%2) \n\t"
519 "1: \n\t"
520 :"=&a"(bit) 521 :"=&a"(bit)
521 :"r"(state), "r"(c) 522 :"r"(state), "r"(c)
522 : "%ecx", "%ebx", "%edx", "%esi", "memory" 523 : "%ecx", "%ebx", "%edx", "%esi", "memory"
523 ); 524 );
524 bit&=1; 525 bit&=1;
681 "movl "RANGE "(%3), %%esi \n\t" 682 "movl "RANGE "(%3), %%esi \n\t"
682 "movl "LOW "(%3), %%ebx \n\t" 683 "movl "LOW "(%3), %%ebx \n\t"
683 684
684 "2: \n\t" 685 "2: \n\t"
685 686
686 "movzbl (%1), %0 \n\t" 687 BRANCHLESS_GET_CABAC("%0", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
687 "movl %%esi, %%edx \n\t"
688 "andl $0xC0, %%esi \n\t"
689 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%esi, 2), %%esi\n\t"
690 /*eax:state ebx:low, edx:range, esi:RangeLPS*/
691 "subl %%esi, %%edx \n\t"
692
693 #if (defined CMOV_IS_FAST && __CPU__ >= 686)
694 "movl %%edx, %%ecx \n\t"
695 "shl $17, %%edx \n\t"
696 "cmpl %%ebx, %%edx \n\t"
697 "cmova %%ecx, %%esi \n\t"
698 "sbbl %%ecx, %%ecx \n\t"
699 "andl %%ecx, %%edx \n\t"
700 "subl %%edx, %%ebx \n\t"
701 "xorl %%ecx, %0 \n\t"
702 #else /* CMOV_IS_FAST */
703 "movl %%edx, %%ecx \n\t"
704 "shl $17, %%edx \n\t"
705 "subl %%ebx, %%edx \n\t"
706 "sarl $31, %%edx \n\t" //lps_mask
707 "subl %%ecx, %%esi \n\t" //RangeLPS - range
708 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
709 "addl %%ecx, %%esi \n\t" //new range
710 "shl $17, %%ecx \n\t"
711 "andl %%edx, %%ecx \n\t"
712 "subl %%ecx, %%ebx \n\t"
713 "xorl %%edx, %0 \n\t"
714 #endif /* CMOV_IS_FAST */
715
716 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
717 "shll %%cl, %%esi \n\t"
718 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t"
719 "movb %%dl, (%1) \n\t"
720 "shll %%cl, %%ebx \n\t"
721 "test %%bx, %%bx \n\t"
722 " jnz 1f \n\t"
723
724 "movl "BYTE "(%3), %%ecx \n\t"
725 "movzwl (%%ecx), %%edx \n\t"
726 "bswap %%edx \n\t"
727 "shrl $15, %%edx \n\t"
728 "subl $0xFFFF, %%edx \n\t"
729 "addl $2, %%ecx \n\t"
730 "movl %%ecx, "BYTE "(%3) \n\t"
731
732 "leal -1(%%ebx), %%ecx \n\t"
733 "xorl %%ebx, %%ecx \n\t"
734 "shrl $15, %%ecx \n\t"
735 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
736 "neg %%ecx \n\t"
737 "add $7, %%ecx \n\t"
738
739 "shll %%cl , %%edx \n\t"
740 "addl %%edx, %%ebx \n\t"
741 "1: \n\t"
742 688
743 "test $1, %0 \n\t" 689 "test $1, %0 \n\t"
744 " jz 3f \n\t" 690 " jz 3f \n\t"
745 691
746 "movl %2, %%eax \n\t" 692 "movl %2, %%eax \n\t"
748 "addl %1, %%ecx \n\t" 694 "addl %1, %%ecx \n\t"
749 "movl %%ecx, (%%eax) \n\t" 695 "movl %%ecx, (%%eax) \n\t"
750 "addl $4, %%eax \n\t" 696 "addl $4, %%eax \n\t"
751 "movl %%eax, %2 \n\t" 697 "movl %%eax, %2 \n\t"
752 698
753 "movzbl 61(%1), %0 \n\t" 699 BRANCHLESS_GET_CABAC("%0", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl")
754 "movl %%esi, %%edx \n\t"
755 "andl $0xC0, %%esi \n\t"
756 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%esi, 2), %%esi\n\t"
757 /*eax:state ebx:low, edx:range, esi:RangeLPS*/
758 "subl %%esi, %%edx \n\t"
759
760 #if (defined CMOV_IS_FAST && __CPU__ >= 686)
761 "movl %%edx, %%ecx \n\t"
762 "shl $17, %%edx \n\t"
763 "cmpl %%ebx, %%edx \n\t"
764 "cmova %%ecx, %%esi \n\t"
765 "sbbl %%ecx, %%ecx \n\t"
766 "andl %%ecx, %%edx \n\t"
767 "subl %%edx, %%ebx \n\t"
768 "xorl %%ecx, %0 \n\t"
769 #else /* CMOV_IS_FAST */
770 "movl %%edx, %%ecx \n\t"
771 "shl $17, %%edx \n\t"
772 "subl %%ebx, %%edx \n\t"
773 "sarl $31, %%edx \n\t" //lps_mask
774 "subl %%ecx, %%esi \n\t" //RangeLPS - range
775 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
776 "addl %%ecx, %%esi \n\t" //new range
777 "shl $17, %%ecx \n\t"
778 "andl %%edx, %%ecx \n\t"
779 "subl %%ecx, %%ebx \n\t"
780 "xorl %%edx, %0 \n\t"
781 #endif /* CMOV_IS_FAST */
782
783 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t"
784 "shll %%cl, %%esi \n\t"
785 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t"
786 "movb %%dl, 61(%1) \n\t"
787 "shll %%cl, %%ebx \n\t"
788 "test %%bx, %%bx \n\t"
789 " jnz 1f \n\t"
790
791 "movl "BYTE "(%3), %%ecx \n\t"
792 "movzwl (%%ecx), %%edx \n\t"
793 "bswap %%edx \n\t"
794 "shrl $15, %%edx \n\t"
795 "subl $0xFFFF, %%edx \n\t"
796 "addl $2, %%ecx \n\t"
797 "movl %%ecx, "BYTE "(%3) \n\t"
798
799 "leal -1(%%ebx), %%ecx \n\t"
800 "xorl %%ebx, %%ecx \n\t"
801 "shrl $15, %%ecx \n\t"
802 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"
803 "neg %%ecx \n\t"
804 "add $7, %%ecx \n\t"
805
806 "shll %%cl , %%edx \n\t"
807 "addl %%edx, %%ebx \n\t"
808 "1: \n\t"
809 700
810 "test $1, %%eax \n\t" 701 "test $1, %%eax \n\t"
811 " jnz 4f \n\t" 702 " jnz 4f \n\t"
812 703
813 "3: \n\t" 704 "3: \n\t"