Mercurial > libavcodec.hg
comparison cabac.h @ 4046:8bbc695c9603 libavcodec
factorize get_cabac asm (0.5% slower but its much cleaner)
author | michael |
---|---|
date | Fri, 20 Oct 2006 00:35:54 +0000 |
parents | 5ccdefd60f61 |
children | 61a4e7218a45 |
comparison
equal
deleted
inserted
replaced
4045:1bf7bc44430f | 4046:8bbc695c9603 |
---|---|
450 :"r"(state), "r"(c) | 450 :"r"(state), "r"(c) |
451 : "%ecx", "%ebx", "%edx", "%esi", "memory" | 451 : "%ecx", "%ebx", "%edx", "%esi", "memory" |
452 ); | 452 ); |
453 bit&=1; | 453 bit&=1; |
454 #else /* BRANCHLESS_CABAC_DECODER */ | 454 #else /* BRANCHLESS_CABAC_DECODER */ |
455 | |
456 | |
457 #if (defined CMOV_IS_FAST && __CPU__ >= 686) | |
458 #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\ | |
459 "mov "tmp" , %%ecx \n\t"\ | |
460 "shl $17 , "tmp" \n\t"\ | |
461 "cmp "low" , "tmp" \n\t"\ | |
462 "cmova %%ecx , "range" \n\t"\ | |
463 "sbb %%ecx , %%ecx \n\t"\ | |
464 "and %%ecx , "tmp" \n\t"\ | |
465 "sub "tmp" , "low" \n\t"\ | |
466 "xor %%ecx , "ret" \n\t" | |
467 #else /* CMOV_IS_FAST */ | |
468 #define BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\ | |
469 "mov "tmp" , %%ecx \n\t"\ | |
470 "shl $17 , "tmp" \n\t"\ | |
471 "sub "low" , "tmp" \n\t"\ | |
472 "sar $31 , "tmp" \n\t" /*lps_mask*/\ | |
473 "sub %%ecx , "range" \n\t" /*RangeLPS - range*/\ | |
474 "and "tmp" , "range" \n\t" /*(RangeLPS - range)&lps_mask*/\ | |
475 "add %%ecx , "range" \n\t" /*new range*/\ | |
476 "shl $17 , %%ecx \n\t"\ | |
477 "and "tmp" , %%ecx \n\t"\ | |
478 "sub %%ecx , "low" \n\t"\ | |
479 "xor "tmp" , "ret" \n\t" | |
480 #endif /* CMOV_IS_FAST */ | |
481 | |
482 | |
483 #define BRANCHLESS_GET_CABAC(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\ | |
484 "movzbl "statep" , "ret" \n\t"\ | |
485 "mov "range" , "tmp" \n\t"\ | |
486 "and $0xC0 , "range" \n\t"\ | |
487 "movzbl "MANGLE(ff_h264_lps_range)"("ret", "range", 2), "range" \n\t"\ | |
488 "sub "range" , "tmp" \n\t"\ | |
489 BRANCHLESS_GET_CABAC_UPDATE(ret, cabac, statep, low, lowword, range, tmp, tmpbyte)\ | |
490 "movzbl " MANGLE(ff_h264_norm_shift) "("range"), %%ecx \n\t"\ | |
491 "shl %%cl , "range" \n\t"\ | |
492 "movzbl "MANGLE(ff_h264_mlps_state)"+128("ret"), "tmp" \n\t"\ | |
493 "mov "tmpbyte" , "statep" \n\t"\ | |
494 "shl %%cl , "low" \n\t"\ | |
495 "test "lowword" , "lowword" \n\t"\ | |
496 " jnz 1f \n\t"\ | |
497 "mov "BYTE"("cabac"), %%ecx \n\t"\ | |
498 "movzwl (%%ecx) , "tmp" \n\t"\ | |
499 "bswap "tmp" \n\t"\ | |
500 "shr $15 , "tmp" \n\t"\ | |
501 "sub $0xFFFF , "tmp" \n\t"\ | |
502 "add $2 , %%ecx \n\t"\ | |
503 "mov %%ecx , "BYTE "("cabac") \n\t"\ | |
504 "lea -1("low") , %%ecx \n\t"\ | |
505 "xor "low" , %%ecx \n\t"\ | |
506 "shr $15 , %%ecx \n\t"\ | |
507 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t"\ | |
508 "neg %%ecx \n\t"\ | |
509 "add $7 , %%ecx \n\t"\ | |
510 "shl %%cl , "tmp" \n\t"\ | |
511 "add "tmp" , "low" \n\t"\ | |
512 "1: \n\t" | |
513 | |
455 asm volatile( | 514 asm volatile( |
456 "movzbl (%1), %0 \n\t" | 515 "movl "RANGE "(%2), %%esi \n\t" |
457 "movl "RANGE "(%2), %%ebx \n\t" | |
458 "movl "RANGE "(%2), %%edx \n\t" | |
459 "andl $0xC0, %%ebx \n\t" | |
460 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%ebx, 2), %%esi\n\t" | |
461 "movl "LOW "(%2), %%ebx \n\t" | 516 "movl "LOW "(%2), %%ebx \n\t" |
462 //eax:state ebx:low, edx:range, esi:RangeLPS | 517 BRANCHLESS_GET_CABAC("%0", "%2", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl") |
463 "subl %%esi, %%edx \n\t" | |
464 #if (defined CMOV_IS_FAST && __CPU__ >= 686) | |
465 "movl %%edx, %%ecx \n\t" | |
466 "shl $17, %%edx \n\t" | |
467 "cmpl %%ebx, %%edx \n\t" | |
468 "cmova %%ecx, %%esi \n\t" | |
469 "sbbl %%ecx, %%ecx \n\t" | |
470 "andl %%ecx, %%edx \n\t" | |
471 "subl %%edx, %%ebx \n\t" | |
472 "xorl %%ecx, %0 \n\t" | |
473 #else /* CMOV_IS_FAST */ | |
474 "movl %%edx, %%ecx \n\t" | |
475 "shl $17, %%edx \n\t" | |
476 "subl %%ebx, %%edx \n\t" | |
477 "sarl $31, %%edx \n\t" //lps_mask | |
478 "subl %%ecx, %%esi \n\t" //RangeLPS - range | |
479 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask | |
480 "addl %%ecx, %%esi \n\t" //new range | |
481 "shl $17, %%ecx \n\t" | |
482 "andl %%edx, %%ecx \n\t" | |
483 "subl %%ecx, %%ebx \n\t" | |
484 "xorl %%edx, %0 \n\t" | |
485 #endif /* CMOV_IS_FAST */ | |
486 | |
487 //eax:state ebx:low edx:mask esi:range | |
488 | |
489 //eax:bit ebx:low esi:range | |
490 | |
491 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" | |
492 "shll %%cl, %%esi \n\t" | |
493 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t" | |
494 "movb %%dl, (%1) \n\t" | |
495 "movl %%esi, "RANGE "(%2) \n\t" | 518 "movl %%esi, "RANGE "(%2) \n\t" |
496 "shll %%cl, %%ebx \n\t" | |
497 "movl %%ebx, "LOW "(%2) \n\t" | 519 "movl %%ebx, "LOW "(%2) \n\t" |
498 "test %%bx, %%bx \n\t" | 520 |
499 " jnz 1f \n\t" | |
500 | |
501 "movl "BYTE "(%2), %%ecx \n\t" | |
502 "movzwl (%%ecx), %%esi \n\t" | |
503 "bswap %%esi \n\t" | |
504 "shrl $15, %%esi \n\t" | |
505 "subl $0xFFFF, %%esi \n\t" | |
506 "addl $2, %%ecx \n\t" | |
507 "movl %%ecx, "BYTE "(%2) \n\t" | |
508 | |
509 "leal -1(%%ebx), %%ecx \n\t" | |
510 "xorl %%ebx, %%ecx \n\t" | |
511 "shrl $15, %%ecx \n\t" | |
512 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t" | |
513 "neg %%ecx \n\t" | |
514 "add $7, %%ecx \n\t" | |
515 | |
516 "shll %%cl , %%esi \n\t" | |
517 "addl %%esi, %%ebx \n\t" | |
518 "movl %%ebx, "LOW "(%2) \n\t" | |
519 "1: \n\t" | |
520 :"=&a"(bit) | 521 :"=&a"(bit) |
521 :"r"(state), "r"(c) | 522 :"r"(state), "r"(c) |
522 : "%ecx", "%ebx", "%edx", "%esi", "memory" | 523 : "%ecx", "%ebx", "%edx", "%esi", "memory" |
523 ); | 524 ); |
524 bit&=1; | 525 bit&=1; |
681 "movl "RANGE "(%3), %%esi \n\t" | 682 "movl "RANGE "(%3), %%esi \n\t" |
682 "movl "LOW "(%3), %%ebx \n\t" | 683 "movl "LOW "(%3), %%ebx \n\t" |
683 | 684 |
684 "2: \n\t" | 685 "2: \n\t" |
685 | 686 |
686 "movzbl (%1), %0 \n\t" | 687 BRANCHLESS_GET_CABAC("%0", "%3", "(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl") |
687 "movl %%esi, %%edx \n\t" | |
688 "andl $0xC0, %%esi \n\t" | |
689 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%esi, 2), %%esi\n\t" | |
690 /*eax:state ebx:low, edx:range, esi:RangeLPS*/ | |
691 "subl %%esi, %%edx \n\t" | |
692 | |
693 #if (defined CMOV_IS_FAST && __CPU__ >= 686) | |
694 "movl %%edx, %%ecx \n\t" | |
695 "shl $17, %%edx \n\t" | |
696 "cmpl %%ebx, %%edx \n\t" | |
697 "cmova %%ecx, %%esi \n\t" | |
698 "sbbl %%ecx, %%ecx \n\t" | |
699 "andl %%ecx, %%edx \n\t" | |
700 "subl %%edx, %%ebx \n\t" | |
701 "xorl %%ecx, %0 \n\t" | |
702 #else /* CMOV_IS_FAST */ | |
703 "movl %%edx, %%ecx \n\t" | |
704 "shl $17, %%edx \n\t" | |
705 "subl %%ebx, %%edx \n\t" | |
706 "sarl $31, %%edx \n\t" //lps_mask | |
707 "subl %%ecx, %%esi \n\t" //RangeLPS - range | |
708 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask | |
709 "addl %%ecx, %%esi \n\t" //new range | |
710 "shl $17, %%ecx \n\t" | |
711 "andl %%edx, %%ecx \n\t" | |
712 "subl %%ecx, %%ebx \n\t" | |
713 "xorl %%edx, %0 \n\t" | |
714 #endif /* CMOV_IS_FAST */ | |
715 | |
716 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" | |
717 "shll %%cl, %%esi \n\t" | |
718 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t" | |
719 "movb %%dl, (%1) \n\t" | |
720 "shll %%cl, %%ebx \n\t" | |
721 "test %%bx, %%bx \n\t" | |
722 " jnz 1f \n\t" | |
723 | |
724 "movl "BYTE "(%3), %%ecx \n\t" | |
725 "movzwl (%%ecx), %%edx \n\t" | |
726 "bswap %%edx \n\t" | |
727 "shrl $15, %%edx \n\t" | |
728 "subl $0xFFFF, %%edx \n\t" | |
729 "addl $2, %%ecx \n\t" | |
730 "movl %%ecx, "BYTE "(%3) \n\t" | |
731 | |
732 "leal -1(%%ebx), %%ecx \n\t" | |
733 "xorl %%ebx, %%ecx \n\t" | |
734 "shrl $15, %%ecx \n\t" | |
735 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t" | |
736 "neg %%ecx \n\t" | |
737 "add $7, %%ecx \n\t" | |
738 | |
739 "shll %%cl , %%edx \n\t" | |
740 "addl %%edx, %%ebx \n\t" | |
741 "1: \n\t" | |
742 | 688 |
743 "test $1, %0 \n\t" | 689 "test $1, %0 \n\t" |
744 " jz 3f \n\t" | 690 " jz 3f \n\t" |
745 | 691 |
746 "movl %2, %%eax \n\t" | 692 "movl %2, %%eax \n\t" |
748 "addl %1, %%ecx \n\t" | 694 "addl %1, %%ecx \n\t" |
749 "movl %%ecx, (%%eax) \n\t" | 695 "movl %%ecx, (%%eax) \n\t" |
750 "addl $4, %%eax \n\t" | 696 "addl $4, %%eax \n\t" |
751 "movl %%eax, %2 \n\t" | 697 "movl %%eax, %2 \n\t" |
752 | 698 |
753 "movzbl 61(%1), %0 \n\t" | 699 BRANCHLESS_GET_CABAC("%0", "%3", "61(%1)", "%%ebx", "%%bx", "%%esi", "%%edx", "%%dl") |
754 "movl %%esi, %%edx \n\t" | |
755 "andl $0xC0, %%esi \n\t" | |
756 "movzbl "MANGLE(ff_h264_lps_range)"(%0, %%esi, 2), %%esi\n\t" | |
757 /*eax:state ebx:low, edx:range, esi:RangeLPS*/ | |
758 "subl %%esi, %%edx \n\t" | |
759 | |
760 #if (defined CMOV_IS_FAST && __CPU__ >= 686) | |
761 "movl %%edx, %%ecx \n\t" | |
762 "shl $17, %%edx \n\t" | |
763 "cmpl %%ebx, %%edx \n\t" | |
764 "cmova %%ecx, %%esi \n\t" | |
765 "sbbl %%ecx, %%ecx \n\t" | |
766 "andl %%ecx, %%edx \n\t" | |
767 "subl %%edx, %%ebx \n\t" | |
768 "xorl %%ecx, %0 \n\t" | |
769 #else /* CMOV_IS_FAST */ | |
770 "movl %%edx, %%ecx \n\t" | |
771 "shl $17, %%edx \n\t" | |
772 "subl %%ebx, %%edx \n\t" | |
773 "sarl $31, %%edx \n\t" //lps_mask | |
774 "subl %%ecx, %%esi \n\t" //RangeLPS - range | |
775 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask | |
776 "addl %%ecx, %%esi \n\t" //new range | |
777 "shl $17, %%ecx \n\t" | |
778 "andl %%edx, %%ecx \n\t" | |
779 "subl %%ecx, %%ebx \n\t" | |
780 "xorl %%edx, %0 \n\t" | |
781 #endif /* CMOV_IS_FAST */ | |
782 | |
783 "movzbl " MANGLE(ff_h264_norm_shift) "(%%esi), %%ecx \n\t" | |
784 "shll %%cl, %%esi \n\t" | |
785 "movzbl "MANGLE(ff_h264_mlps_state)"+128(%0), %%edx \n\t" | |
786 "movb %%dl, 61(%1) \n\t" | |
787 "shll %%cl, %%ebx \n\t" | |
788 "test %%bx, %%bx \n\t" | |
789 " jnz 1f \n\t" | |
790 | |
791 "movl "BYTE "(%3), %%ecx \n\t" | |
792 "movzwl (%%ecx), %%edx \n\t" | |
793 "bswap %%edx \n\t" | |
794 "shrl $15, %%edx \n\t" | |
795 "subl $0xFFFF, %%edx \n\t" | |
796 "addl $2, %%ecx \n\t" | |
797 "movl %%ecx, "BYTE "(%3) \n\t" | |
798 | |
799 "leal -1(%%ebx), %%ecx \n\t" | |
800 "xorl %%ebx, %%ecx \n\t" | |
801 "shrl $15, %%ecx \n\t" | |
802 "movzbl " MANGLE(ff_h264_norm_shift) "(%%ecx), %%ecx \n\t" | |
803 "neg %%ecx \n\t" | |
804 "add $7, %%ecx \n\t" | |
805 | |
806 "shll %%cl , %%edx \n\t" | |
807 "addl %%edx, %%ebx \n\t" | |
808 "1: \n\t" | |
809 | 700 |
810 "test $1, %%eax \n\t" | 701 "test $1, %%eax \n\t" |
811 " jnz 4f \n\t" | 702 " jnz 4f \n\t" |
812 | 703 |
813 "3: \n\t" | 704 "3: \n\t" |