# HG changeset patch # User conrad # Date 1279921590 0 # Node ID 6e6c92d36c4b9bbf58c016cd66694def8bea8d2c # Parent 7db147ea02c4ee3de8849a9e23db7527a88732ad Inline asm for VP56 arith coder This is a lot more reliable to get cmov rather than trying to trick gcc into generating it, useful since it's 2% faster overall. Patch by Eli Friedman diff -r 7db147ea02c4 -r 6e6c92d36c4b vp56.h --- a/vp56.h Fri Jul 23 21:46:25 2010 +0000 +++ b/vp56.h Fri Jul 23 21:46:30 2010 +0000 @@ -208,23 +208,25 @@ return code_word; } +#if ARCH_X86 +#include "x86/vp56_arith.h" +#endif + +#ifndef vp56_rac_get_prob +#define vp56_rac_get_prob vp56_rac_get_prob static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) { - /* Don't put c->high in a local variable; if we do that, gcc gets - * the stupids and turns the code below into a branch again. */ unsigned int code_word = vp56_rac_renorm(c); unsigned int low = 1 + (((c->high - 1) * prob) >> 8); unsigned int low_shift = low << 8; int bit = code_word >= low_shift; - /* Incantation to convince GCC to turn these into conditional moves - * instead of branches -- faster, as this branch is basically - * unpredictable. */ c->high = bit ? c->high - low : low; c->code_word = bit ? code_word - low_shift : code_word; return bit; } +#endif // branchy variant, to be used where there's a branch based on the bit decoded static av_always_inline int vp56_rac_get_prob_branchy(VP56RangeCoder *c, int prob) diff -r 7db147ea02c4 -r 6e6c92d36c4b x86/vp56_arith.h --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/x86/vp56_arith.h Fri Jul 23 21:46:30 2010 +0000 @@ -0,0 +1,54 @@ +/** + * VP5 and VP6 compatible video decoder (arith decoder) + * + * Copyright (C) 2006 Aurelien Jacobs + * Copyright (C) 2010 Eli Friedman + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVCODEC_X86_VP56_ARITH_H +#define AVCODEC_X86_VP56_ARITH_H + +#if HAVE_FAST_CMOV +#define vp56_rac_get_prob vp56_rac_get_prob +static inline int vp56_rac_get_prob(VP56RangeCoder *c, uint8_t prob) +{ + unsigned int code_word = vp56_rac_renorm(c); + unsigned int high = c->high; + unsigned int low = 1 + (((high - 1) * prob) >> 8); + unsigned int low_shift = low << 8; + int bit = 0; + + __asm__( + "subl %4, %1 \n\t" + "subl %3, %2 \n\t" + "leal (%2, %3), %3 \n\t" + "setae %b0 \n\t" + "cmovb %4, %1 \n\t" + "cmovb %3, %2 \n\t" + : "+q"(bit), "+r"(high), "+r"(code_word), "+r"(low_shift) + : "r"(low) + ); + + c->high = high; + c->code_word = code_word; + return bit; +} +#endif + +#endif /* AVCODEC_X86_VP56_ARITH_H */