comparison cabac.h @ 3980:5afe4253a220 libavcodec

replace a few and/sub/... by cmov this is faster on P3, should be faster on AMD, and should be slower on P4 its disabled by default (benchmarks welcome so we know when to enable it)
author michael
date Tue, 10 Oct 2006 01:08:39 +0000
parents ce16f66a48ad
children 9854f686ba79
comparison
equal deleted inserted replaced
3979:ce16f66a48ad 3980:5afe4253a220
457 "movzbl (%%ebx, %%esi), %%esi \n\t" 457 "movzbl (%%ebx, %%esi), %%esi \n\t"
458 "shll $17, %%esi \n\t" 458 "shll $17, %%esi \n\t"
459 "movl "LOW "(%2), %%ebx \n\t" 459 "movl "LOW "(%2), %%ebx \n\t"
460 //eax:state ebx:low, edx:range, esi:RangeLPS 460 //eax:state ebx:low, edx:range, esi:RangeLPS
461 "subl %%esi, %%edx \n\t" 461 "subl %%esi, %%edx \n\t"
462 #ifdef CMOV_IS_FAST //FIXME actually define this somewhere
463 "cmpl %%ebx, %%edx \n\t"
464 "cmova %%edx, %%esi \n\t"
465 "sbbl %%ecx, %%ecx \n\t"
466 "andl %%ecx, %%edx \n\t"
467 "subl %%edx, %%ebx \n\t"
468 "xorl %%ecx, %%eax \n\t"
469 #else
462 "movl %%edx, %%ecx \n\t" 470 "movl %%edx, %%ecx \n\t"
463 "subl %%ebx, %%edx \n\t" 471 "subl %%ebx, %%edx \n\t"
464 "sarl $31, %%edx \n\t" //lps_mask 472 "sarl $31, %%edx \n\t" //lps_mask
465 "subl %%ecx, %%esi \n\t" //RangeLPS - range 473 "subl %%ecx, %%esi \n\t" //RangeLPS - range
466 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask 474 "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask
467 "addl %%ecx, %%esi \n\t" //new range 475 "addl %%ecx, %%esi \n\t" //new range
468 "andl %%edx, %%ecx \n\t" 476 "andl %%edx, %%ecx \n\t"
469 "subl %%ecx, %%ebx \n\t" 477 "subl %%ecx, %%ebx \n\t"
478 "xorl %%edx, %%eax \n\t"
479 #endif
470 480
471 //eax:state ebx:low edx:mask esi:range 481 //eax:state ebx:low edx:mask esi:range
472 "xorl %%edx, %%eax \n\t"
473 "movzbl "MPS_STATE"(%2, %%eax), %%ecx \n\t" 482 "movzbl "MPS_STATE"(%2, %%eax), %%ecx \n\t"
474 "movb %%cl, (%1) \n\t" 483 "movb %%cl, (%1) \n\t"
475 484
476 "movl %%esi, %%edx \n\t" 485 "movl %%esi, %%edx \n\t"
477 //eax:bit ebx:low edx:range esi:range 486 //eax:bit ebx:low edx:range esi:range