changeset 4051:19f07b651d79 libavcodec

decode_significance_8x8_x86() 8% faster decode_cabac_residual() (8x8 case only) on P3
author michael
date Fri, 20 Oct 2006 21:34:02 +0000
parents 2c79a8281cb6
children 6a05e683e7c4
files cabac.h h264.c
diffstat 2 files changed, 68 insertions(+), 9 deletions(-) [+]
line wrap: on
line diff
--- a/cabac.h	Fri Oct 20 17:53:19 2006 +0000
+++ b/cabac.h	Fri Oct 20 21:34:02 2006 +0000
@@ -721,6 +721,62 @@
     );
     return coeff_count;
 }
+
+static int decode_significance_8x8_x86(CABACContext *c, uint8_t *significant_coeff_ctx_base, int *index, uint8_t *sig_off){
+    int minusindex= 4-(int)index;
+    int coeff_count;
+    int last=0;
+    asm volatile(
+        "movl "RANGE    "(%3), %%esi            \n\t"
+        "movl "LOW      "(%3), %%ebx            \n\t"
+
+        "mov %1, %%edi                          \n\t"
+        "2:                                     \n\t"
+
+        "mov %6, %%eax                          \n\t"
+        "movzbl (%%eax, %%edi), %%edi           \n\t"
+        "add %5, %%edi                          \n\t"
+
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
+
+        "mov %1, %%edi                          \n\t"
+        "test $1, %%edx                         \n\t"
+        " jz 3f                                 \n\t"
+
+        "movzbl "MANGLE(last_coeff_flag_offset_8x8)"(%%edi), %%edi\n\t"
+        "add %5, %%edi                          \n\t"
+
+        BRANCHLESS_GET_CABAC("%%edx", "%3", "15(%%edi)", "%%ebx", "%%bx", "%%esi", "%%eax", "%%al")
+
+        "movl %2, %%eax                         \n\t"
+        "mov %1, %%edi                          \n\t"
+        "movl %%edi, (%%eax)                    \n\t"
+
+        "test $1, %%edx                         \n\t"
+        " jnz 4f                                \n\t"
+
+        "addl $4, %%eax                         \n\t"
+        "movl %%eax, %2                         \n\t"
+
+        "3:                                     \n\t"
+        "addl $1, %%edi                         \n\t"
+        "mov %%edi, %1                          \n\t"
+        "cmpl $63, %%edi                        \n\t"
+        " jb 2b                                 \n\t"
+        "movl %2, %%eax                         \n\t"
+        "movl %%edi, (%%eax)                    \n\t"
+        "4:                                     \n\t"
+        "addl %4, %%eax                         \n\t"
+        "shr $2, %%eax                          \n\t"
+
+        "movl %%esi, "RANGE    "(%3)            \n\t"
+        "movl %%ebx, "LOW      "(%3)            \n\t"
+        :"=&a"(coeff_count),"+m"(last), "+m"(index)\
+        :"r"(c), "m"(minusindex), "m"(significant_coeff_ctx_base), "m"(sig_off)\
+        : "%ecx", "%ebx", "%edx", "%esi", "%edi", "memory"\
+    );
+    return coeff_count;
+}
 #endif
 
 /**
--- a/h264.c	Fri Oct 20 17:53:19 2006 +0000
+++ b/h264.c	Fri Oct 20 21:34:02 2006 +0000
@@ -6034,6 +6034,13 @@
     return ctx + 4 * cat;
 }
 
+static const __attribute((used)) uint8_t last_coeff_flag_offset_8x8[63] = {
+    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
+    5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
+};
+
 static int decode_cabac_residual( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff) {
     const int mb_xy  = h->s.mb_x + h->s.mb_y*h->s.mb_stride;
     static const int significant_coeff_flag_offset[2][6] = {
@@ -6057,12 +6064,6 @@
         9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
         9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
     };
-    static const uint8_t last_coeff_flag_offset_8x8[63] = {
-        0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
-        2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
-        3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
-        5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
-    };
 
     int index[64];
 
@@ -6138,11 +6139,13 @@
             index[coeff_count++] = last;\
         }
         const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
+#ifdef ARCH_X86
+        coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
+    } else {
+        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
+#else
         DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
     } else {
-#ifdef ARCH_X86
-        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
-#else
         DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
 #endif
     }