# HG changeset patch # User michael # Date 1161380042 0 # Node ID 19f07b651d79b66a4ffbf906029e452a74e896fd # Parent 2c79a8281cb69aa472c3891e211a2b1a0683842e decode_significance_8x8_x86() 8% faster decode_cabac_residual() (8x8 case only) on P3 diff -r 2c79a8281cb6 -r 19f07b651d79 cabac.h --- a/cabac.h Fri Oct 20 17:53:19 2006 +0000 +++ b/cabac.h Fri Oct 20 21:34:02 2006 +0000 @@ -721,6 +721,62 @@ ); return coeff_count; } + +static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, uint8_t *sig_off){ + int minusindex= 4-(int)index; + int coeff_count; + int last=0; + asm volatile( + "movl "RANGE "(%3), %%esi \n\t" + "movl "LOW "(%3), %%ebx \n\t" + + "mov %1, %%edi \n\t" + "2: \n\t" + + "mov %6, %%eax \n\t" + "movzbl (%%eax, %%edi), %%edi \n\t" + "add %5, %%edi \n\t" + + BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") + + "mov %1, %%edi \n\t" + "test $1, %%edx \n\t" + " jz 3f \n\t" + + "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t" + "add %5, %%edi \n\t" + + BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al") + + "movl %2, %%eax \n\t" + "mov %1, %%edi \n\t" + "movl %%edi, (%%eax) \n\t" + + "test $1, %%edx \n\t" + " jnz 4f \n\t" + + "addl $4, %%eax \n\t" + "movl %%eax, %2 \n\t" + + "3: \n\t" + "addl $1, %%edi \n\t" + "mov %%edi, %1 \n\t" + "cmpl $63, %%edi \n\t" + " jb 2b \n\t" + "movl %2, %%eax \n\t" + "movl %%edi, (%%eax) \n\t" + "4: \n\t" + "addl %4, %%eax \n\t" + "shr $2, %%eax \n\t" + + "movl %%esi, "RANGE "(%3) \n\t" + "movl %%ebx, "LOW "(%3) \n\t" + :"=&a"(coeff_count),"+m"(last), "+m"(index)\ + :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\ + : "%ecx", "%ebx", "%edx", "%esi", "%edi", "memory"\ + ); + return coeff_count; +} #endif /** diff -r 2c79a8281cb6 -r 19f07b651d79 h264.c --- a/h264.c Fri Oct 20 17:53:19 2006 +0000 +++ b/h264.c Fri Oct 20 21:34:02 2006 +0000 @@ -6034,6 +6034,13 @@ return ctx + 4 * cat; } +static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = { + 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, + 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 +}; + static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) { const int mb_xy = h->s.mb_x + h->s.mb_y*h->s.mb_stride; static const int significant_coeff_flag_offset[2][6] = { @@ -6057,12 +6064,6 @@ 9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9, 9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 } }; - static const uint8_t last_coeff_flag_offset_8x8[63] = { - 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, - 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8 - }; int index[64]; @@ -6138,11 +6139,13 @@ index[coeff_count++] = last;\ } const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD]; +#ifdef ARCH_X86 + coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off); + } else { + coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index); +#else DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] ); } else { -#ifdef ARCH_X86 - coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index); -#else DECODE_SIGNIFICANCE( max_coeff - 1, last, last ); #endif }