changeset 3980:5afe4253a220 libavcodec

replace a few and/sub/... by cmov this is faster on P3, should be faster on AMD, and should be slower on P4 its disabled by default (benchmarks welcome so we know when to enable it)
author michael
date Tue, 10 Oct 2006 01:08:39 +0000
parents ce16f66a48ad
children 9854f686ba79
files cabac.h
diffstat 1 files changed, 10 insertions(+), 1 deletions(-) [+]
line wrap: on
line diff
--- a/cabac.h	Mon Oct 09 21:57:10 2006 +0000
+++ b/cabac.h	Tue Oct 10 01:08:39 2006 +0000
@@ -459,6 +459,14 @@
         "movl "LOW      "(%2), %%ebx            \n\t"
 //eax:state ebx:low, edx:range, esi:RangeLPS
         "subl %%esi, %%edx                      \n\t"
+#ifdef CMOV_IS_FAST //FIXME actually define this somewhere
+        "cmpl %%ebx, %%edx                      \n\t"
+        "cmova %%edx, %%esi                     \n\t"
+        "sbbl %%ecx, %%ecx                      \n\t"
+        "andl %%ecx, %%edx                      \n\t"
+        "subl %%edx, %%ebx                      \n\t"
+        "xorl %%ecx, %%eax                      \n\t"
+#else
         "movl %%edx, %%ecx                      \n\t"
         "subl %%ebx, %%edx                      \n\t"
         "sarl $31, %%edx                        \n\t" //lps_mask
@@ -467,9 +475,10 @@
         "addl %%ecx, %%esi                      \n\t" //new range
         "andl %%edx, %%ecx                      \n\t"
         "subl %%ecx, %%ebx                      \n\t"
+        "xorl %%edx, %%eax                      \n\t"
+#endif
 
 //eax:state ebx:low edx:mask esi:range
-        "xorl %%edx, %%eax                      \n\t"
         "movzbl "MPS_STATE"(%2, %%eax), %%ecx   \n\t"
         "movb %%cl, (%1)                        \n\t"