# HG changeset patch # User michael # Date 1160442519 0 # Node ID 5afe4253a220588f94480b917285da41bd633868 # Parent ce16f66a48adbb7138ab99d9e9a4f082657582c6 replace a few and/sub/... by cmov this is faster on P3, should be faster on AMD, and should be slower on P4 its disabled by default (benchmarks welcome so we know when to enable it) diff -r ce16f66a48ad -r 5afe4253a220 cabac.h --- a/cabac.h Mon Oct 09 21:57:10 2006 +0000 +++ b/cabac.h Tue Oct 10 01:08:39 2006 +0000 @@ -459,6 +459,14 @@ "movl "LOW "(%2), %%ebx \n\t" //eax:state ebx:low, edx:range, esi:RangeLPS "subl %%esi, %%edx \n\t" +#ifdef CMOV_IS_FAST //FIXME actually define this somewhere + "cmpl %%ebx, %%edx \n\t" + "cmova %%edx, %%esi \n\t" + "sbbl %%ecx, %%ecx \n\t" + "andl %%ecx, %%edx \n\t" + "subl %%edx, %%ebx \n\t" + "xorl %%ecx, %%eax \n\t" +#else "movl %%edx, %%ecx \n\t" "subl %%ebx, %%edx \n\t" "sarl $31, %%edx \n\t" //lps_mask @@ -467,9 +475,10 @@ "addl %%ecx, %%esi \n\t" //new range "andl %%edx, %%ecx \n\t" "subl %%ecx, %%ebx \n\t" + "xorl %%edx, %%eax \n\t" +#endif //eax:state ebx:low edx:mask esi:range - "xorl %%edx, %%eax \n\t" "movzbl "MPS_STATE"(%2, %%eax), %%ecx \n\t" "movb %%cl, (%1) \n\t"