Mercurial > libavcodec.hg
changeset 3975:6cc9eb5ee5e3 libavcodec
x86 branchless cabac decoder
slightly faster on P3
author | michael |
---|---|
date | Mon, 09 Oct 2006 20:51:33 +0000 |
parents | 93746612bc78 |
children | 27e90123b346 |
files | cabac.h |
diffstat | 1 files changed, 68 insertions(+), 1 deletions(-) [+] |
line wrap: on
line diff
--- a/cabac.h Mon Oct 09 20:44:11 2006 +0000 +++ b/cabac.h Mon Oct 09 20:51:33 2006 +0000 @@ -31,6 +31,7 @@ #define CABAC_BITS 16 #define CABAC_MASK ((1<<CABAC_BITS)-1) +#define BRANCHLESS_CABAD 1 typedef struct CABACContext{ int low; @@ -374,7 +375,7 @@ #define BYTESTART "12+2*66*4+4*65" #define BYTE "16+2*66*4+4*65" #define BYTEEND "20+2*66*4+4*65" - +#ifndef BRANCHLESS_CABAD asm volatile( "movzbl (%1), %%eax \n\t" "movl "RANGE "(%2), %%ebx \n\t" @@ -447,6 +448,72 @@ : "%ecx", "%ebx", "%edx", "%esi" ); #else + asm volatile( + "movzbl (%1), %%eax \n\t" + "movl "RANGE "(%2), %%ebx \n\t" + "movl "RANGE "(%2), %%edx \n\t" + "shrl $23, %%ebx \n\t" + "leal "LPS_RANGE"(%2, %%eax, 4), %%esi \n\t" + "movzbl (%%ebx, %%esi), %%esi \n\t" + "shll $17, %%esi \n\t" + "movl "LOW "(%2), %%ebx \n\t" +//eax:state ebx:low, edx:range, esi:RangeLPS + "subl %%esi, %%edx \n\t" + "movl %%edx, %%ecx \n\t" + "subl %%ebx, %%edx \n\t" + "sarl $31, %%edx \n\t" //lps_mask + "subl %%ecx, %%esi \n\t" //RangeLPS - range + "andl %%edx, %%esi \n\t" //(RangeLPS - range)&lps_mask + "addl %%ecx, %%esi \n\t" //new range + "andl %%edx, %%ecx \n\t" + "subl %%ecx, %%ebx \n\t" + +//eax:state ebx:low edx:mask esi:range + "movl $-130, %%ecx \n\t" + "andl %%edx, %%ecx \n\t" + "addl %%eax, %%ecx \n\t" + + "xorl %%edx, %%eax \n\t" + "movb "MPS_STATE"(%2, %%eax), %%cl \n\t" + "movb %%cl, (%1) \n\t" + + "movl %%esi, %%edx \n\t" +//eax:bit ebx:low edx:range esi:range + + "shr $19, %%esi \n\t" + "movb " MANGLE(ff_h264_norm_shift) "(%%esi), %%cl \n\t" + "shll %%cl, %%ebx \n\t" + "shll %%cl, %%edx \n\t" + "test %%bx, %%bx \n\t" + " jnz 1f \n\t" + + "movl "BYTE "(%2), %%ecx \n\t" + "movzwl (%%ecx), %%esi \n\t" + "bswap %%esi \n\t" + "shrl $15, %%esi \n\t" + "subl $0xFFFF, %%esi \n\t" + "addl $2, %%ecx \n\t" + "movl %%ecx, "BYTE "(%2) \n\t" + + "leal -1(%%ebx), %%ecx \n\t" + "xorl %%ebx, %%ecx \n\t" + "shrl $17, %%ecx \n\t" + "movb " MANGLE(ff_h264_norm_shift) "(%%ecx), %%cl \n\t" + "neg %%cl \n\t" + "add $7, %%cl \n\t" + + "shll %%cl , %%esi \n\t" + "addl %%esi, %%ebx \n\t" + "1: \n\t" + "movl %%edx, "RANGE "(%2) \n\t" + "movl %%ebx, "LOW "(%2) \n\t" + "andl $1, %%eax \n\t" + :"=&a"(bit) + :"r"(state), "r"(c) + : "%ecx", "%ebx", "%edx", "%esi" + ); +#endif +#else int s = *state; int RangeLPS= c->lps_range[s][c->range>>(CABAC_BITS+7)]<<(CABAC_BITS+1); int bit, lps_mask attribute_unused;