view h264_cabac.c @ 12272:dd90555c98fd libavcodec

Split pextrw macro-spaghetti into several opt-specific macros, this will make future new optimizations (imagine a sse5) much easier. Also fix a bug where we used the direction (%2) rather than optimization (%1) to enable this, which means it wasn't ever actually used...
author rbultje
date Mon, 26 Jul 2010 13:50:59 +0000
parents 7dd2a45249a9
children
line wrap: on
line source

/*
 * H.26L/H.264/AVC/JVT/14496-10/... cabac decoding
 * Copyright (c) 2003 Michael Niedermayer <michaelni@gmx.at>
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file
 * H.264 / AVC / MPEG4 part10 cabac decoding.
 * @author Michael Niedermayer <michaelni@gmx.at>
 */

#define CABAC 1

#include "internal.h"
#include "dsputil.h"
#include "avcodec.h"
#include "h264.h"
#include "h264data.h"
#include "h264_mvpred.h"
#include "golomb.h"

#include "cabac.h"
#if ARCH_X86
#include "x86/h264_i386.h"
#endif

//#undef NDEBUG
#include <assert.h>

/* Cabac pre state table */

static const int8_t cabac_context_init_I[460][2] =
{
    /* 0 - 10 */
    { 20, -15 }, {  2, 54 },  {  3,  74 }, { 20, -15 },
    {  2,  54 }, {  3, 74 },  { -28,127 }, { -23, 104 },
    { -6,  53 }, { -1, 54 },  {  7,  51 },

    /* 11 - 23 unsused for I */
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },

    /* 24- 39 */
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },

    /* 40 - 53 */
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },

    /* 54 - 59 */
    { 0, 0 },    { 0, 0 },    { 0, 0 },      { 0, 0 },
    { 0, 0 },    { 0, 0 },

    /* 60 - 69 */
    { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
    { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
    { 13, 41 },  { 3, 62 },

    /* 70 -> 87 */
    { 0, 11 },   { 1, 55 },   { 0, 69 },     { -17, 127 },
    { -13, 102 },{ 0, 82 },   { -7, 74 },    { -21, 107 },
    { -27, 127 },{ -31, 127 },{ -24, 127 },  { -18, 95 },
    { -27, 127 },{ -21, 114 },{ -30, 127 },  { -17, 123 },
    { -12, 115 },{ -16, 122 },

    /* 88 -> 104 */
    { -11, 115 },{ -12, 63 }, { -2, 68 },    { -15, 84 },
    { -13, 104 },{ -3, 70 },  { -8, 93 },    { -10, 90 },
    { -30, 127 },{ -1, 74 },  { -6, 97 },    { -7, 91 },
    { -20, 127 },{ -4, 56 },  { -5, 82 },    { -7, 76 },
    { -22, 125 },

    /* 105 -> 135 */
    { -7, 93 },  { -11, 87 }, { -3, 77 },    { -5, 71 },
    { -4, 63 },  { -4, 68 },  { -12, 84 },   { -7, 62 },
    { -7, 65 },  { 8, 61 },   { 5, 56 },     { -2, 66 },
    { 1, 64 },   { 0, 61 },   { -2, 78 },    { 1, 50 },
    { 7, 52 },   { 10, 35 },  { 0, 44 },     { 11, 38 },
    { 1, 45 },   { 0, 46 },   { 5, 44 },     { 31, 17 },
    { 1, 51 },   { 7, 50 },   { 28, 19 },    { 16, 33 },
    { 14, 62 },  { -13, 108 },{ -15, 100 },

    /* 136 -> 165 */
    { -13, 101 },{ -13, 91 }, { -12, 94 },   { -10, 88 },
    { -16, 84 }, { -10, 86 }, { -7, 83 },    { -13, 87 },
    { -19, 94 }, { 1, 70 },   { 0, 72 },     { -5, 74 },
    { 18, 59 },  { -8, 102 }, { -15, 100 },  { 0, 95 },
    { -4, 75 },  { 2, 72 },   { -11, 75 },   { -3, 71 },
    { 15, 46 },  { -13, 69 }, { 0, 62 },     { 0, 65 },
    { 21, 37 },  { -15, 72 }, { 9, 57 },     { 16, 54 },
    { 0, 62 },   { 12, 72 },

    /* 166 -> 196 */
    { 24, 0 },   { 15, 9 },   { 8, 25 },     { 13, 18 },
    { 15, 9 },   { 13, 19 },  { 10, 37 },    { 12, 18 },
    { 6, 29 },   { 20, 33 },  { 15, 30 },    { 4, 45 },
    { 1, 58 },   { 0, 62 },   { 7, 61 },     { 12, 38 },
    { 11, 45 },  { 15, 39 },  { 11, 42 },    { 13, 44 },
    { 16, 45 },  { 12, 41 },  { 10, 49 },    { 30, 34 },
    { 18, 42 },  { 10, 55 },  { 17, 51 },    { 17, 46 },
    { 0, 89 },   { 26, -19 }, { 22, -17 },

    /* 197 -> 226 */
    { 26, -17 }, { 30, -25 }, { 28, -20 },   { 33, -23 },
    { 37, -27 }, { 33, -23 }, { 40, -28 },   { 38, -17 },
    { 33, -11 }, { 40, -15 }, { 41, -6 },    { 38, 1 },
    { 41, 17 },  { 30, -6 },  { 27, 3 },     { 26, 22 },
    { 37, -16 }, { 35, -4 },  { 38, -8 },    { 38, -3 },
    { 37, 3 },   { 38, 5 },   { 42, 0 },     { 35, 16 },
    { 39, 22 },  { 14, 48 },  { 27, 37 },    { 21, 60 },
    { 12, 68 },  { 2, 97 },

    /* 227 -> 251 */
    { -3, 71 },  { -6, 42 },  { -5, 50 },    { -3, 54 },
    { -2, 62 },  { 0, 58 },   { 1, 63 },     { -2, 72 },
    { -1, 74 },  { -9, 91 },  { -5, 67 },    { -5, 27 },
    { -3, 39 },  { -2, 44 },  { 0, 46 },     { -16, 64 },
    { -8, 68 },  { -10, 78 }, { -6, 77 },    { -10, 86 },
    { -12, 92 }, { -15, 55 }, { -10, 60 },   { -6, 62 },
    { -4, 65 },

    /* 252 -> 275 */
    { -12, 73 }, { -8, 76 },  { -7, 80 },    { -9, 88 },
    { -17, 110 },{ -11, 97 }, { -20, 84 },   { -11, 79 },
    { -6, 73 },  { -4, 74 },  { -13, 86 },   { -13, 96 },
    { -11, 97 }, { -19, 117 },{ -8, 78 },    { -5, 33 },
    { -4, 48 },  { -2, 53 },  { -3, 62 },    { -13, 71 },
    { -10, 79 }, { -12, 86 }, { -13, 90 },   { -14, 97 },

    /* 276 a bit special (not used, bypass is used instead) */
    { 0, 0 },

    /* 277 -> 307 */
    { -6, 93 },  { -6, 84 },  { -8, 79 },    { 0, 66 },
    { -1, 71 },  { 0, 62 },   { -2, 60 },    { -2, 59 },
    { -5, 75 },  { -3, 62 },  { -4, 58 },    { -9, 66 },
    { -1, 79 },  { 0, 71 },   { 3, 68 },     { 10, 44 },
    { -7, 62 },  { 15, 36 },  { 14, 40 },    { 16, 27 },
    { 12, 29 },  { 1, 44 },   { 20, 36 },    { 18, 32 },
    { 5, 42 },   { 1, 48 },   { 10, 62 },    { 17, 46 },
    { 9, 64 },   { -12, 104 },{ -11, 97 },

    /* 308 -> 337 */
    { -16, 96 }, { -7, 88 },  { -8, 85 },    { -7, 85 },
    { -9, 85 },  { -13, 88 }, { 4, 66 },     { -3, 77 },
    { -3, 76 },  { -6, 76 },  { 10, 58 },    { -1, 76 },
    { -1, 83 },  { -7, 99 },  { -14, 95 },   { 2, 95 },
    { 0, 76 },   { -5, 74 },  { 0, 70 },     { -11, 75 },
    { 1, 68 },   { 0, 65 },   { -14, 73 },   { 3, 62 },
    { 4, 62 },   { -1, 68 },  { -13, 75 },   { 11, 55 },
    { 5, 64 },   { 12, 70 },

    /* 338 -> 368 */
    { 15, 6 },   { 6, 19 },   { 7, 16 },     { 12, 14 },
    { 18, 13 },  { 13, 11 },  { 13, 15 },    { 15, 16 },
    { 12, 23 },  { 13, 23 },  { 15, 20 },    { 14, 26 },
    { 14, 44 },  { 17, 40 },  { 17, 47 },    { 24, 17 },
    { 21, 21 },  { 25, 22 },  { 31, 27 },    { 22, 29 },
    { 19, 35 },  { 14, 50 },  { 10, 57 },    { 7, 63 },
    { -2, 77 },  { -4, 82 },  { -3, 94 },    { 9, 69 },
    { -12, 109 },{ 36, -35 }, { 36, -34 },

    /* 369 -> 398 */
    { 32, -26 }, { 37, -30 }, { 44, -32 },   { 34, -18 },
    { 34, -15 }, { 40, -15 }, { 33, -7 },    { 35, -5 },
    { 33, 0 },   { 38, 2 },   { 33, 13 },    { 23, 35 },
    { 13, 58 },  { 29, -3 },  { 26, 0 },     { 22, 30 },
    { 31, -7 },  { 35, -15 }, { 34, -3 },    { 34, 3 },
    { 36, -1 },  { 34, 5 },   { 32, 11 },    { 35, 5 },
    { 34, 12 },  { 39, 11 },  { 30, 29 },    { 34, 26 },
    { 29, 39 },  { 19, 66 },

    /* 399 -> 435 */
    {  31,  21 }, {  31,  31 }, {  25,  50 },
    { -17, 120 }, { -20, 112 }, { -18, 114 }, { -11,  85 },
    { -15,  92 }, { -14,  89 }, { -26,  71 }, { -15,  81 },
    { -14,  80 }, {   0,  68 }, { -14,  70 }, { -24,  56 },
    { -23,  68 }, { -24,  50 }, { -11,  74 }, {  23, -13 },
    {  26, -13 }, {  40, -15 }, {  49, -14 }, {  44,   3 },
    {  45,   6 }, {  44,  34 }, {  33,  54 }, {  19,  82 },
    {  -3,  75 }, {  -1,  23 }, {   1,  34 }, {   1,  43 },
    {   0,  54 }, {  -2,  55 }, {   0,  61 }, {   1,  64 },
    {   0,  68 }, {  -9,  92 },

    /* 436 -> 459 */
    { -14, 106 }, { -13,  97 }, { -15,  90 }, { -12,  90 },
    { -18,  88 }, { -10,  73 }, {  -9,  79 }, { -14,  86 },
    { -10,  73 }, { -10,  70 }, { -10,  69 }, {  -5,  66 },
    {  -9,  64 }, {  -5,  58 }, {   2,  59 }, {  21, -10 },
    {  24, -11 }, {  28,  -8 }, {  28,  -1 }, {  29,   3 },
    {  29,   9 }, {  35,  20 }, {  29,  36 }, {  14,  67 }
};

static const int8_t cabac_context_init_PB[3][460][2] =
{
    /* i_cabac_init_idc == 0 */
    {
        /* 0 - 10 */
        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
        {  -6,  53 }, {  -1,  54 }, {   7,  51 },

        /* 11 - 23 */
        {  23,  33 }, {  23,   2 }, {  21,   0 }, {   1,   9 },
        {   0,  49 }, { -37, 118 }, {   5,  57 }, { -13,  78 },
        { -11,  65 }, {   1,  62 }, {  12,  49 }, {  -4,  73 },
        {  17,  50 },

        /* 24 - 39 */
        {  18,  64 }, {   9,  43 }, {  29,   0 }, {  26,  67 },
        {  16,  90 }, {   9, 104 }, { -46, 127 }, { -20, 104 },
        {   1,  67 }, { -13,  78 }, { -11,  65 }, {   1,  62 },
        {  -6,  86 }, { -17,  95 }, {  -6,  61 }, {   9,  45 },

        /* 40 - 53 */
        {  -3,  69 }, {  -6,  81 }, { -11,  96 }, {   6,  55 },
        {   7,  67 }, {  -5,  86 }, {   2,  88 }, {   0,  58 },
        {  -3,  76 }, { -10,  94 }, {   5,  54 }, {   4,  69 },
        {  -3,  81 }, {   0,  88 },

        /* 54 - 59 */
        {  -7,  67 }, {  -5,  74 }, {  -4,  74 }, {  -5,  80 },
        {  -7,  72 }, {   1,  58 },

        /* 60 - 69 */
        {   0,  41 }, {   0,  63 }, {   0,  63 }, { 0, 63 },
        {  -9,  83 }, {   4,  86 }, {   0,  97 }, { -7, 72 },
        {  13,  41 }, {   3,  62 },

        /* 70 - 87 */
        {   0,  45 }, {  -4,  78 }, {  -3,  96 }, { -27,  126 },
        { -28,  98 }, { -25, 101 }, { -23,  67 }, { -28,  82 },
        { -20,  94 }, { -16,  83 }, { -22, 110 }, { -21,  91 },
        { -18, 102 }, { -13,  93 }, { -29, 127 }, {  -7,  92 },
        {  -5,  89 }, {  -7,  96 }, { -13, 108 }, {  -3,  46 },
        {  -1,  65 }, {  -1,  57 }, {  -9,  93 }, {  -3,  74 },
        {  -9,  92 }, {  -8,  87 }, { -23, 126 }, {   5,  54 },
        {   6,  60 }, {   6,  59 }, {   6,  69 }, {  -1,  48 },
        {   0,  68 }, {  -4,  69 }, {  -8,  88 },

        /* 105 -> 165 */
        {  -2,  85 }, {  -6,  78 }, {  -1,  75 }, {  -7,  77 },
        {   2,  54 }, {   5,  50 }, {  -3,  68 }, {   1,  50 },
        {   6,  42 }, {  -4,  81 }, {   1,  63 }, {  -4,  70 },
        {   0,  67 }, {   2,  57 }, {  -2,  76 }, {  11,  35 },
        {   4,  64 }, {   1,  61 }, {  11,  35 }, {  18,  25 },
        {  12,  24 }, {  13,  29 }, {  13,  36 }, { -10,  93 },
        {  -7,  73 }, {  -2,  73 }, {  13,  46 }, {   9,  49 },
        {  -7, 100 }, {   9,  53 }, {   2,  53 }, {   5,  53 },
        {  -2,  61 }, {   0,  56 }, {   0,  56 }, { -13,  63 },
        {  -5,  60 }, {  -1,  62 }, {   4,  57 }, {  -6,  69 },
        {   4,  57 }, {  14,  39 }, {   4,  51 }, {  13,  68 },
        {   3,  64 }, {   1,  61 }, {   9,  63 }, {   7,  50 },
        {  16,  39 }, {   5,  44 }, {   4,  52 }, {  11,  48 },
        {  -5,  60 }, {  -1,  59 }, {   0,  59 }, {  22,  33 },
        {   5,  44 }, {  14,  43 }, {  -1,  78 }, {   0,  60 },
        {   9,  69 },

        /* 166 - 226 */
        {  11,  28 }, {   2,  40 }, {   3,  44 }, {   0,  49 },
        {   0,  46 }, {   2,  44 }, {   2,  51 }, {   0,  47 },
        {   4,  39 }, {   2,  62 }, {   6,  46 }, {   0,  54 },
        {   3,  54 }, {   2,  58 }, {   4,  63 }, {   6,  51 },
        {   6,  57 }, {   7,  53 }, {   6,  52 }, {   6,  55 },
        {  11,  45 }, {  14,  36 }, {   8,  53 }, {  -1,  82 },
        {   7,  55 }, {  -3,  78 }, {  15,  46 }, {  22,  31 },
        {  -1,  84 }, {  25,   7 }, {  30,  -7 }, {  28,   3 },
        {  28,   4 }, {  32,   0 }, {  34,  -1 }, {  30,   6 },
        {  30,   6 }, {  32,   9 }, {  31,  19 }, {  26,  27 },
        {  26,  30 }, {  37,  20 }, {  28,  34 }, {  17,  70 },
        {   1,  67 }, {   5,  59 }, {   9,  67 }, {  16,  30 },
        {  18,  32 }, {  18,  35 }, {  22,  29 }, {  24,  31 },
        {  23,  38 }, {  18,  43 }, {  20,  41 }, {  11,  63 },
        {   9,  59 }, {   9,  64 }, {  -1,  94 }, {  -2,  89 },
        {  -9, 108 },

        /* 227 - 275 */
        {  -6,  76 }, {  -2,  44 }, {   0,  45 }, {   0,  52 },
        {  -3,  64 }, {  -2,  59 }, {  -4,  70 }, {  -4,  75 },
        {  -8,  82 }, { -17, 102 }, {  -9,  77 }, {   3,  24 },
        {   0,  42 }, {   0,  48 }, {   0,  55 }, {  -6,  59 },
        {  -7,  71 }, { -12,  83 }, { -11,  87 }, { -30, 119 },
        {   1,  58 }, {  -3,  29 }, {  -1,  36 }, {   1,  38 },
        {   2,  43 }, {  -6,  55 }, {   0,  58 }, {   0,  64 },
        {  -3,  74 }, { -10,  90 }, {   0,  70 }, {  -4,  29 },
        {   5,  31 }, {   7,  42 }, {   1,  59 }, {  -2,  58 },
        {  -3,  72 }, {  -3,  81 }, { -11,  97 }, {   0,  58 },
        {   8,   5 }, {  10,  14 }, {  14,  18 }, {  13,  27 },
        {   2,  40 }, {   0,  58 }, {  -3,  70 }, {  -6,  79 },
        {  -8,  85 },

        /* 276 a bit special (not used, bypass is used instead) */
        { 0, 0 },

        /* 277 - 337 */
        { -13, 106 }, { -16, 106 }, { -10,  87 }, { -21, 114 },
        { -18, 110 }, { -14,  98 }, { -22, 110 }, { -21, 106 },
        { -18, 103 }, { -21, 107 }, { -23, 108 }, { -26, 112 },
        { -10,  96 }, { -12,  95 }, {  -5,  91 }, {  -9,  93 },
        { -22,  94 }, {  -5,  86 }, {   9,  67 }, {  -4,  80 },
        { -10,  85 }, {  -1,  70 }, {   7,  60 }, {   9,  58 },
        {   5,  61 }, {  12,  50 }, {  15,  50 }, {  18,  49 },
        {  17,  54 }, {  10,  41 }, {   7,  46 }, {  -1,  51 },
        {   7,  49 }, {   8,  52 }, {   9,  41 }, {   6,  47 },
        {   2,  55 }, {  13,  41 }, {  10,  44 }, {   6,  50 },
        {   5,  53 }, {  13,  49 }, {   4,  63 }, {   6,  64 },
        {  -2,  69 }, {  -2,  59 }, {   6,  70 }, {  10,  44 },
        {   9,  31 }, {  12,  43 }, {   3,  53 }, {  14,  34 },
        {  10,  38 }, {  -3,  52 }, {  13,  40 }, {  17,  32 },
        {   7,  44 }, {   7,  38 }, {  13,  50 }, {  10,  57 },
        {  26,  43 },

        /* 338 - 398 */
        {  14,  11 }, {  11,  14 }, {   9,  11 }, {  18,  11 },
        {  21,   9 }, {  23,  -2 }, {  32, -15 }, {  32, -15 },
        {  34, -21 }, {  39, -23 }, {  42, -33 }, {  41, -31 },
        {  46, -28 }, {  38, -12 }, {  21,  29 }, {  45, -24 },
        {  53, -45 }, {  48, -26 }, {  65, -43 }, {  43, -19 },
        {  39, -10 }, {  30,   9 }, {  18,  26 }, {  20,  27 },
        {   0,  57 }, { -14,  82 }, {  -5,  75 }, { -19,  97 },
        { -35, 125 }, {  27,   0 }, {  28,   0 }, {  31,  -4 },
        {  27,   6 }, {  34,   8 }, {  30,  10 }, {  24,  22 },
        {  33,  19 }, {  22,  32 }, {  26,  31 }, {  21,  41 },
        {  26,  44 }, {  23,  47 }, {  16,  65 }, {  14,  71 },
        {   8,  60 }, {   6,  63 }, {  17,  65 }, {  21,  24 },
        {  23,  20 }, {  26,  23 }, {  27,  32 }, {  28,  23 },
        {  28,  24 }, {  23,  40 }, {  24,  32 }, {  28,  29 },
        {  23,  42 }, {  19,  57 }, {  22,  53 }, {  22,  61 },
        {  11,  86 },

        /* 399 - 435 */
        {  12,  40 }, {  11,  51 }, {  14,  59 },
        {  -4,  79 }, {  -7,  71 }, {  -5,  69 }, {  -9,  70 },
        {  -8,  66 }, { -10,  68 }, { -19,  73 }, { -12,  69 },
        { -16,  70 }, { -15,  67 }, { -20,  62 }, { -19,  70 },
        { -16,  66 }, { -22,  65 }, { -20,  63 }, {   9,  -2 },
        {  26,  -9 }, {  33,  -9 }, {  39,  -7 }, {  41,  -2 },
        {  45,   3 }, {  49,   9 }, {  45,  27 }, {  36,  59 },
        {  -6,  66 }, {  -7,  35 }, {  -7,  42 }, {  -8,  45 },
        {  -5,  48 }, { -12,  56 }, {  -6,  60 }, {  -5,  62 },
        {  -8,  66 }, {  -8,  76 },

        /* 436 - 459 */
        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  21, -13 },
        {  33, -14 }, {  39,  -7 }, {  46,  -2 }, {  51,   2 },
        {  60,   6 }, {  61,  17 }, {  55,  34 }, {  42,  62 },
    },

    /* i_cabac_init_idc == 1 */
    {
        /* 0 - 10 */
        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
        {  -6,  53 }, {  -1,  54 }, {   7,  51 },

        /* 11 - 23 */
        {  22,  25 }, {  34,   0 }, {  16,   0 }, {  -2,   9 },
        {   4,  41 }, { -29, 118 }, {   2,  65 }, {  -6,  71 },
        { -13,  79 }, {   5,  52 }, {   9,  50 }, {  -3,  70 },
        {  10,  54 },

        /* 24 - 39 */
        {  26,  34 }, {  19,  22 }, {  40,   0 }, {  57,   2 },
        {  41,  36 }, {  26,  69 }, { -45, 127 }, { -15, 101 },
        {  -4,  76 }, {  -6,  71 }, { -13,  79 }, {   5,  52 },
        {   6,  69 }, { -13,  90 }, {   0,  52 }, {   8,  43 },

        /* 40 - 53 */
        {  -2,  69 },{  -5,  82 },{ -10,  96 },{   2,  59 },
        {   2,  75 },{  -3,  87 },{  -3,  100 },{   1,  56 },
        {  -3,  74 },{  -6,  85 },{   0,  59 },{  -3,  81 },
        {  -7,  86 },{  -5,  95 },

        /* 54 - 59 */
        {  -1,  66 },{  -1,  77 },{   1,  70 },{  -2,  86 },
        {  -5,  72 },{   0,  61 },

        /* 60 - 69 */
        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
        { 13, 41 },  { 3, 62 },

        /* 70 - 104 */
        {  13,  15 }, {   7,  51 }, {   2,  80 }, { -39, 127 },
        { -18,  91 }, { -17,  96 }, { -26,  81 }, { -35,  98 },
        { -24, 102 }, { -23,  97 }, { -27, 119 }, { -24,  99 },
        { -21, 110 }, { -18, 102 }, { -36, 127 }, {   0,  80 },
        {  -5,  89 }, {  -7,  94 }, {  -4,  92 }, {   0,  39 },
        {   0,  65 }, { -15,  84 }, { -35, 127 }, {  -2,  73 },
        { -12, 104 }, {  -9,  91 }, { -31, 127 }, {   3,  55 },
        {   7,  56 }, {   7,  55 }, {   8,  61 }, {  -3,  53 },
        {   0,  68 }, {  -7,  74 }, {  -9,  88 },

        /* 105 -> 165 */
        { -13, 103 }, { -13,  91 }, {  -9,  89 }, { -14,  92 },
        {  -8,  76 }, { -12,  87 }, { -23, 110 }, { -24, 105 },
        { -10,  78 }, { -20, 112 }, { -17,  99 }, { -78, 127 },
        { -70, 127 }, { -50, 127 }, { -46, 127 }, {  -4,  66 },
        {  -5,  78 }, {  -4,  71 }, {  -8,  72 }, {   2,  59 },
        {  -1,  55 }, {  -7,  70 }, {  -6,  75 }, {  -8,  89 },
        { -34, 119 }, {  -3,  75 }, {  32,  20 }, {  30,  22 },
        { -44, 127 }, {   0,  54 }, {  -5,  61 }, {   0,  58 },
        {  -1,  60 }, {  -3,  61 }, {  -8,  67 }, { -25,  84 },
        { -14,  74 }, {  -5,  65 }, {   5,  52 }, {   2,  57 },
        {   0,  61 }, {  -9,  69 }, { -11,  70 }, {  18,  55 },
        {  -4,  71 }, {   0,  58 }, {   7,  61 }, {   9,  41 },
        {  18,  25 }, {   9,  32 }, {   5,  43 }, {   9,  47 },
        {   0,  44 }, {   0,  51 }, {   2,  46 }, {  19,  38 },
        {  -4,  66 }, {  15,  38 }, {  12,  42 }, {   9,  34 },
        {   0,  89 },

        /* 166 - 226 */
        {   4,  45 }, {  10,  28 }, {  10,  31 }, {  33, -11 },
        {  52, -43 }, {  18,  15 }, {  28,   0 }, {  35, -22 },
        {  38, -25 }, {  34,   0 }, {  39, -18 }, {  32, -12 },
        { 102, -94 }, {   0,   0 }, {  56, -15 }, {  33,  -4 },
        {  29,  10 }, {  37,  -5 }, {  51, -29 }, {  39,  -9 },
        {  52, -34 }, {  69, -58 }, {  67, -63 }, {  44,  -5 },
        {  32,   7 }, {  55, -29 }, {  32,   1 }, {   0,   0 },
        {  27,  36 }, {  33, -25 }, {  34, -30 }, {  36, -28 },
        {  38, -28 }, {  38, -27 }, {  34, -18 }, {  35, -16 },
        {  34, -14 }, {  32,  -8 }, {  37,  -6 }, {  35,   0 },
        {  30,  10 }, {  28,  18 }, {  26,  25 }, {  29,  41 },
        {   0,  75 }, {   2,  72 }, {   8,  77 }, {  14,  35 },
        {  18,  31 }, {  17,  35 }, {  21,  30 }, {  17,  45 },
        {  20,  42 }, {  18,  45 }, {  27,  26 }, {  16,  54 },
        {   7,  66 }, {  16,  56 }, {  11,  73 }, {  10,  67 },
        { -10, 116 },

        /* 227 - 275 */
        { -23, 112 }, { -15,  71 }, {  -7,  61 }, {   0,  53 },
        {  -5,  66 }, { -11,  77 }, {  -9,  80 }, {  -9,  84 },
        { -10,  87 }, { -34, 127 }, { -21, 101 }, {  -3,  39 },
        {  -5,  53 }, {  -7,  61 }, { -11,  75 }, { -15,  77 },
        { -17,  91 }, { -25, 107 }, { -25, 111 }, { -28, 122 },
        { -11,  76 }, { -10,  44 }, { -10,  52 }, { -10,  57 },
        {  -9,  58 }, { -16,  72 }, {  -7,  69 }, {  -4,  69 },
        {  -5,  74 }, {  -9,  86 }, {   2,  66 }, {  -9,  34 },
        {   1,  32 }, {  11,  31 }, {   5,  52 }, {  -2,  55 },
        {  -2,  67 }, {   0,  73 }, {  -8,  89 }, {   3,  52 },
        {   7,   4 }, {  10,   8 }, {  17,   8 }, {  16,  19 },
        {   3,  37 }, {  -1,  61 }, {  -5,  73 }, {  -1,  70 },
        {  -4,  78 },

        /* 276 a bit special (not used, bypass is used instead) */
        { 0, 0 },

        /* 277 - 337 */
        { -21, 126 }, { -23, 124 }, { -20, 110 }, { -26, 126 },
        { -25, 124 }, { -17, 105 }, { -27, 121 }, { -27, 117 },
        { -17, 102 }, { -26, 117 }, { -27, 116 }, { -33, 122 },
        { -10,  95 }, { -14, 100 }, {  -8,  95 }, { -17, 111 },
        { -28, 114 }, {  -6,  89 }, {  -2,  80 }, {  -4,  82 },
        {  -9,  85 }, {  -8,  81 }, {  -1,  72 }, {   5,  64 },
        {   1,  67 }, {   9,  56 }, {   0,  69 }, {   1,  69 },
        {   7,  69 }, {  -7,  69 }, {  -6,  67 }, { -16,  77 },
        {  -2,  64 }, {   2,  61 }, {  -6,  67 }, {  -3,  64 },
        {   2,  57 }, {  -3,  65 }, {  -3,  66 }, {   0,  62 },
        {   9,  51 }, {  -1,  66 }, {  -2,  71 }, {  -2,  75 },
        {  -1,  70 }, {  -9,  72 }, {  14,  60 }, {  16,  37 },
        {   0,  47 }, {  18,  35 }, {  11,  37 }, {  12,  41 },
        {  10,  41 }, {   2,  48 }, {  12,  41 }, {  13,  41 },
        {   0,  59 }, {   3,  50 }, {  19,  40 }, {   3,  66 },
        {  18,  50 },

        /* 338 - 398 */
        {  19,  -6 }, {  18,  -6 }, {  14,   0 }, {  26, -12 },
        {  31, -16 }, {  33, -25 }, {  33, -22 }, {  37, -28 },
        {  39, -30 }, {  42, -30 }, {  47, -42 }, {  45, -36 },
        {  49, -34 }, {  41, -17 }, {  32,   9 }, {  69, -71 },
        {  63, -63 }, {  66, -64 }, {  77, -74 }, {  54, -39 },
        {  52, -35 }, {  41, -10 }, {  36,   0 }, {  40,  -1 },
        {  30,  14 }, {  28,  26 }, {  23,  37 }, {  12,  55 },
        {  11,  65 }, {  37, -33 }, {  39, -36 }, {  40, -37 },
        {  38, -30 }, {  46, -33 }, {  42, -30 }, {  40, -24 },
        {  49, -29 }, {  38, -12 }, {  40, -10 }, {  38,  -3 },
        {  46,  -5 }, {  31,  20 }, {  29,  30 }, {  25,  44 },
        {  12,  48 }, {  11,  49 }, {  26,  45 }, {  22,  22 },
        {  23,  22 }, {  27,  21 }, {  33,  20 }, {  26,  28 },
        {  30,  24 }, {  27,  34 }, {  18,  42 }, {  25,  39 },
        {  18,  50 }, {  12,  70 }, {  21,  54 }, {  14,  71 },
        {  11,  83 },

        /* 399 - 435 */
        {  25,  32 }, {  21,  49 }, {  21,  54 },
        {  -5,  85 }, {  -6,  81 }, { -10,  77 }, {  -7,  81 },
        { -17,  80 }, { -18,  73 }, {  -4,  74 }, { -10,  83 },
        {  -9,  71 }, {  -9,  67 }, {  -1,  61 }, {  -8,  66 },
        { -14,  66 }, {   0,  59 }, {   2,  59 }, {  17, -10 },
        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
        {  -5,  71 }, {   0,  24 }, {  -1,  36 }, {  -2,  42 },
        {  -2,  52 }, {  -9,  57 }, {  -6,  63 }, {  -4,  65 },
        {  -4,  67 }, {  -7,  82 },

        /* 436 - 459 */
        {  -3,  81 }, {  -3,  76 }, {  -7,  72 }, {  -6,  78 },
        { -12,  72 }, { -14,  68 }, {  -3,  70 }, {  -6,  76 },
        {  -5,  66 }, {  -5,  62 }, {   0,  57 }, {  -4,  61 },
        {  -9,  60 }, {   1,  54 }, {   2,  58 }, {  17, -10 },
        {  32, -13 }, {  42,  -9 }, {  49,  -5 }, {  53,   0 },
        {  64,   3 }, {  68,  10 }, {  66,  27 }, {  47,  57 },
    },

    /* i_cabac_init_idc == 2 */
    {
        /* 0 - 10 */
        {  20, -15 }, {   2,  54 }, {   3,  74 }, {  20, -15 },
        {   2,  54 }, {   3,  74 }, { -28, 127 }, { -23, 104 },
        {  -6,  53 }, {  -1,  54 }, {   7,  51 },

        /* 11 - 23 */
        {  29,  16 }, {  25,   0 }, {  14,   0 }, { -10,  51 },
        {  -3,  62 }, { -27,  99 }, {  26,  16 }, {  -4,  85 },
        { -24, 102 }, {   5,  57 }, {   6,  57 }, { -17,  73 },
        {  14,  57 },

        /* 24 - 39 */
        {  20,  40 }, {  20,  10 }, {  29,   0 }, {  54,   0 },
        {  37,  42 }, {  12,  97 }, { -32, 127 }, { -22, 117 },
        {  -2,  74 }, {  -4,  85 }, { -24, 102 }, {   5,  57 },
        {  -6,  93 }, { -14,  88 }, {  -6,  44 }, {   4,  55 },

        /* 40 - 53 */
        { -11,  89 },{ -15,  103 },{ -21,  116 },{  19,  57 },
        {  20,  58 },{   4,  84 },{   6,  96 },{   1,  63 },
        {  -5,  85 },{ -13,  106 },{   5,  63 },{   6,  75 },
        {  -3,  90 },{  -1,  101 },

        /* 54 - 59 */
        {   3,  55 },{  -4,  79 },{  -2,  75 },{ -12,  97 },
        {  -7,  50 },{   1,  60 },

        /* 60 - 69 */
        { 0, 41 },   { 0, 63 },   { 0, 63 },     { 0, 63 },
        { -9, 83 },  { 4, 86 },   { 0, 97 },     { -7, 72 },
        { 13, 41 },  { 3, 62 },

        /* 70 - 104 */
        {   7,  34 }, {  -9,  88 }, { -20, 127 }, { -36, 127 },
        { -17,  91 }, { -14,  95 }, { -25,  84 }, { -25,  86 },
        { -12,  89 }, { -17,  91 }, { -31, 127 }, { -14,  76 },
        { -18, 103 }, { -13,  90 }, { -37, 127 }, {  11,  80 },
        {   5,  76 }, {   2,  84 }, {   5,  78 }, {  -6,  55 },
        {   4,  61 }, { -14,  83 }, { -37, 127 }, {  -5,  79 },
        { -11, 104 }, { -11,  91 }, { -30, 127 }, {   0,  65 },
        {  -2,  79 }, {   0,  72 }, {  -4,  92 }, {  -6,  56 },
        {   3,  68 }, {  -8,  71 }, { -13,  98 },

        /* 105 -> 165 */
        {  -4,  86 }, { -12,  88 }, {  -5,  82 }, {  -3,  72 },
        {  -4,  67 }, {  -8,  72 }, { -16,  89 }, {  -9,  69 },
        {  -1,  59 }, {   5,  66 }, {   4,  57 }, {  -4,  71 },
        {  -2,  71 }, {   2,  58 }, {  -1,  74 }, {  -4,  44 },
        {  -1,  69 }, {   0,  62 }, {  -7,  51 }, {  -4,  47 },
        {  -6,  42 }, {  -3,  41 }, {  -6,  53 }, {   8,  76 },
        {  -9,  78 }, { -11,  83 }, {   9,  52 }, {   0,  67 },
        {  -5,  90 }, {   1,  67 }, { -15,  72 }, {  -5,  75 },
        {  -8,  80 }, { -21,  83 }, { -21,  64 }, { -13,  31 },
        { -25,  64 }, { -29,  94 }, {   9,  75 }, {  17,  63 },
        {  -8,  74 }, {  -5,  35 }, {  -2,  27 }, {  13,  91 },
        {   3,  65 }, {  -7,  69 }, {   8,  77 }, { -10,  66 },
        {   3,  62 }, {  -3,  68 }, { -20,  81 }, {   0,  30 },
        {   1,   7 }, {  -3,  23 }, { -21,  74 }, {  16,  66 },
        { -23, 124 }, {  17,  37 }, {  44, -18 }, {  50, -34 },
        { -22, 127 },

        /* 166 - 226 */
        {   4,  39 }, {   0,  42 }, {   7,  34 }, {  11,  29 },
        {   8,  31 }, {   6,  37 }, {   7,  42 }, {   3,  40 },
        {   8,  33 }, {  13,  43 }, {  13,  36 }, {   4,  47 },
        {   3,  55 }, {   2,  58 }, {   6,  60 }, {   8,  44 },
        {  11,  44 }, {  14,  42 }, {   7,  48 }, {   4,  56 },
        {   4,  52 }, {  13,  37 }, {   9,  49 }, {  19,  58 },
        {  10,  48 }, {  12,  45 }, {   0,  69 }, {  20,  33 },
        {   8,  63 }, {  35, -18 }, {  33, -25 }, {  28,  -3 },
        {  24,  10 }, {  27,   0 }, {  34, -14 }, {  52, -44 },
        {  39, -24 }, {  19,  17 }, {  31,  25 }, {  36,  29 },
        {  24,  33 }, {  34,  15 }, {  30,  20 }, {  22,  73 },
        {  20,  34 }, {  19,  31 }, {  27,  44 }, {  19,  16 },
        {  15,  36 }, {  15,  36 }, {  21,  28 }, {  25,  21 },
        {  30,  20 }, {  31,  12 }, {  27,  16 }, {  24,  42 },
        {   0,  93 }, {  14,  56 }, {  15,  57 }, {  26,  38 },
        { -24, 127 },

        /* 227 - 275 */
        { -24, 115 }, { -22,  82 }, {  -9,  62 }, {   0,  53 },
        {   0,  59 }, { -14,  85 }, { -13,  89 }, { -13,  94 },
        { -11,  92 }, { -29, 127 }, { -21, 100 }, { -14,  57 },
        { -12,  67 }, { -11,  71 }, { -10,  77 }, { -21,  85 },
        { -16,  88 }, { -23, 104 }, { -15,  98 }, { -37, 127 },
        { -10,  82 }, {  -8,  48 }, {  -8,  61 }, {  -8,  66 },
        {  -7,  70 }, { -14,  75 }, { -10,  79 }, {  -9,  83 },
        { -12,  92 }, { -18, 108 }, {  -4,  79 }, { -22,  69 },
        { -16,  75 }, {  -2,  58 }, {   1,  58 }, { -13,  78 },
        {  -9,  83 }, {  -4,  81 }, { -13,  99 }, { -13,  81 },
        {  -6,  38 }, { -13,  62 }, {  -6,  58 }, {  -2,  59 },
        { -16,  73 }, { -10,  76 }, { -13,  86 }, {  -9,  83 },
        { -10,  87 },

        /* 276 a bit special (not used, bypass is used instead) */
        { 0, 0 },

        /* 277 - 337 */
        { -22, 127 }, { -25, 127 }, { -25, 120 }, { -27, 127 },
        { -19, 114 }, { -23, 117 }, { -25, 118 }, { -26, 117 },
        { -24, 113 }, { -28, 118 }, { -31, 120 }, { -37, 124 },
        { -10,  94 }, { -15, 102 }, { -10,  99 }, { -13, 106 },
        { -50, 127 }, {  -5,  92 }, {  17,  57 }, {  -5,  86 },
        { -13,  94 }, { -12,  91 }, {  -2,  77 }, {   0,  71 },
        {  -1,  73 }, {   4,  64 }, {  -7,  81 }, {   5,  64 },
        {  15,  57 }, {   1,  67 }, {   0,  68 }, { -10,  67 },
        {   1,  68 }, {   0,  77 }, {   2,  64 }, {   0,  68 },
        {  -5,  78 }, {   7,  55 }, {   5,  59 }, {   2,  65 },
        {  14,  54 }, {  15,  44 }, {   5,  60 }, {   2,  70 },
        {  -2,  76 }, { -18,  86 }, {  12,  70 }, {   5,  64 },
        { -12,  70 }, {  11,  55 }, {   5,  56 }, {   0,  69 },
        {   2,  65 }, {  -6,  74 }, {   5,  54 }, {   7,  54 },
        {  -6,  76 }, { -11,  82 }, {  -2,  77 }, {  -2,  77 },
        {  25,  42 },

        /* 338 - 398 */
        {  17, -13 }, {  16,  -9 }, {  17, -12 }, {  27, -21 },
        {  37, -30 }, {  41, -40 }, {  42, -41 }, {  48, -47 },
        {  39, -32 }, {  46, -40 }, {  52, -51 }, {  46, -41 },
        {  52, -39 }, {  43, -19 }, {  32,  11 }, {  61, -55 },
        {  56, -46 }, {  62, -50 }, {  81, -67 }, {  45, -20 },
        {  35,  -2 }, {  28,  15 }, {  34,   1 }, {  39,   1 },
        {  30,  17 }, {  20,  38 }, {  18,  45 }, {  15,  54 },
        {   0,  79 }, {  36, -16 }, {  37, -14 }, {  37, -17 },
        {  32,   1 }, {  34,  15 }, {  29,  15 }, {  24,  25 },
        {  34,  22 }, {  31,  16 }, {  35,  18 }, {  31,  28 },
        {  33,  41 }, {  36,  28 }, {  27,  47 }, {  21,  62 },
        {  18,  31 }, {  19,  26 }, {  36,  24 }, {  24,  23 },
        {  27,  16 }, {  24,  30 }, {  31,  29 }, {  22,  41 },
        {  22,  42 }, {  16,  60 }, {  15,  52 }, {  14,  60 },
        {   3,  78 }, { -16, 123 }, {  21,  53 }, {  22,  56 },
        {  25,  61 },

        /* 399 - 435 */
        {  21,  33 }, {  19,  50 }, {  17,  61 },
        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
        {  -9,  71 }, {  -7,  37 }, {  -8,  44 }, { -11,  49 },
        { -10,  56 }, { -12,  59 }, {  -8,  63 }, {  -9,  67 },
        {  -6,  68 }, { -10,  79 },

        /* 436 - 459 */
        {  -3,  78 }, {  -8,  74 }, {  -9,  72 }, { -10,  72 },
        { -18,  75 }, { -12,  71 }, { -11,  63 }, {  -5,  70 },
        { -17,  75 }, { -14,  72 }, { -16,  67 }, {  -8,  53 },
        { -14,  59 }, {  -9,  52 }, { -11,  68 }, {   9,  -2 },
        {  30, -10 }, {  31,  -4 }, {  33,  -1 }, {  33,   7 },
        {  31,  12 }, {  37,  23 }, {  31,  38 }, {  20,  64 },
    }
};

void ff_h264_init_cabac_states(H264Context *h) {
    MpegEncContext * const s = &h->s;
    int i;
    const int8_t (*tab)[2];

    if( h->slice_type_nos == FF_I_TYPE ) tab = cabac_context_init_I;
    else                                 tab = cabac_context_init_PB[h->cabac_init_idc];

    /* calculate pre-state */
    for( i= 0; i < 460; i++ ) {
        int pre = 2*(((tab[i][0] * s->qscale) >>4 ) + tab[i][1]) - 127;

        pre^= pre>>31;
        if(pre > 124)
            pre= 124 + (pre&1);

        h->cabac_state[i] =  pre;
    }
}

static int decode_cabac_field_decoding_flag(H264Context *h) {
    MpegEncContext * const s = &h->s;
    const long mbb_xy = h->mb_xy - 2L*s->mb_stride;

    unsigned long ctx = 0;

    ctx += h->mb_field_decoding_flag & !!s->mb_x; //for FMO:(s->current_picture.mb_type[mba_xy]>>7)&(h->slice_table[mba_xy] == h->slice_num);
    ctx += (s->current_picture.mb_type[mbb_xy]>>7)&(h->slice_table[mbb_xy] == h->slice_num);

    return get_cabac_noinline( &h->cabac, &(h->cabac_state+70)[ctx] );
}

static int decode_cabac_intra_mb_type(H264Context *h, int ctx_base, int intra_slice) {
    uint8_t *state= &h->cabac_state[ctx_base];
    int mb_type;

    if(intra_slice){
        int ctx=0;
        if( h->left_type[0] & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
            ctx++;
        if( h->top_type     & (MB_TYPE_INTRA16x16|MB_TYPE_INTRA_PCM))
            ctx++;
        if( get_cabac_noinline( &h->cabac, &state[ctx] ) == 0 )
            return 0;   /* I4x4 */
        state += 2;
    }else{
        if( get_cabac_noinline( &h->cabac, state ) == 0 )
            return 0;   /* I4x4 */
    }

    if( get_cabac_terminate( &h->cabac ) )
        return 25;  /* PCM */

    mb_type = 1; /* I16x16 */
    mb_type += 12 * get_cabac_noinline( &h->cabac, &state[1] ); /* cbp_luma != 0 */
    if( get_cabac_noinline( &h->cabac, &state[2] ) ) /* cbp_chroma */
        mb_type += 4 + 4 * get_cabac_noinline( &h->cabac, &state[2+intra_slice] );
    mb_type += 2 * get_cabac_noinline( &h->cabac, &state[3+intra_slice] );
    mb_type += 1 * get_cabac_noinline( &h->cabac, &state[3+2*intra_slice] );
    return mb_type;
}

static int decode_cabac_mb_skip( H264Context *h, int mb_x, int mb_y ) {
    MpegEncContext * const s = &h->s;
    int mba_xy, mbb_xy;
    int ctx = 0;

    if(FRAME_MBAFF){ //FIXME merge with the stuff in fill_caches?
        int mb_xy = mb_x + (mb_y&~1)*s->mb_stride;
        mba_xy = mb_xy - 1;
        if( (mb_y&1)
            && h->slice_table[mba_xy] == h->slice_num
            && MB_FIELD == !!IS_INTERLACED( s->current_picture.mb_type[mba_xy] ) )
            mba_xy += s->mb_stride;
        if( MB_FIELD ){
            mbb_xy = mb_xy - s->mb_stride;
            if( !(mb_y&1)
                && h->slice_table[mbb_xy] == h->slice_num
                && IS_INTERLACED( s->current_picture.mb_type[mbb_xy] ) )
                mbb_xy -= s->mb_stride;
        }else
            mbb_xy = mb_x + (mb_y-1)*s->mb_stride;
    }else{
        int mb_xy = h->mb_xy;
        mba_xy = mb_xy - 1;
        mbb_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
    }

    if( h->slice_table[mba_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mba_xy] ))
        ctx++;
    if( h->slice_table[mbb_xy] == h->slice_num && !IS_SKIP( s->current_picture.mb_type[mbb_xy] ))
        ctx++;

    if( h->slice_type_nos == FF_B_TYPE )
        ctx += 13;
    return get_cabac_noinline( &h->cabac, &h->cabac_state[11+ctx] );
}

static int decode_cabac_mb_intra4x4_pred_mode( H264Context *h, int pred_mode ) {
    int mode = 0;

    if( get_cabac( &h->cabac, &h->cabac_state[68] ) )
        return pred_mode;

    mode += 1 * get_cabac( &h->cabac, &h->cabac_state[69] );
    mode += 2 * get_cabac( &h->cabac, &h->cabac_state[69] );
    mode += 4 * get_cabac( &h->cabac, &h->cabac_state[69] );

    return mode + ( mode >= pred_mode );
}

static int decode_cabac_mb_chroma_pre_mode( H264Context *h) {
    const int mba_xy = h->left_mb_xy[0];
    const int mbb_xy = h->top_mb_xy;

    int ctx = 0;

    /* No need to test for IS_INTRA4x4 and IS_INTRA16x16, as we set chroma_pred_mode_table to 0 */
    if( h->left_type[0] && h->chroma_pred_mode_table[mba_xy] != 0 )
        ctx++;

    if( h->top_type     && h->chroma_pred_mode_table[mbb_xy] != 0 )
        ctx++;

    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+ctx] ) == 0 )
        return 0;

    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
        return 1;
    if( get_cabac_noinline( &h->cabac, &h->cabac_state[64+3] ) == 0 )
        return 2;
    else
        return 3;
}

static int decode_cabac_mb_cbp_luma( H264Context *h) {
    int cbp_b, cbp_a, ctx, cbp = 0;

    cbp_a = h->left_cbp;
    cbp_b = h->top_cbp;

    ctx = !(cbp_a & 0x02) + 2 * !(cbp_b & 0x04);
    cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]);
    ctx = !(cbp   & 0x01) + 2 * !(cbp_b & 0x08);
    cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 1;
    ctx = !(cbp_a & 0x08) + 2 * !(cbp   & 0x01);
    cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 2;
    ctx = !(cbp   & 0x04) + 2 * !(cbp   & 0x02);
    cbp += get_cabac_noinline(&h->cabac, &h->cabac_state[73 + ctx]) << 3;
    return cbp;
}
static int decode_cabac_mb_cbp_chroma( H264Context *h) {
    int ctx;
    int cbp_a, cbp_b;

    cbp_a = (h->left_cbp>>4)&0x03;
    cbp_b = (h-> top_cbp>>4)&0x03;

    ctx = 0;
    if( cbp_a > 0 ) ctx++;
    if( cbp_b > 0 ) ctx += 2;
    if( get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] ) == 0 )
        return 0;

    ctx = 4;
    if( cbp_a == 2 ) ctx++;
    if( cbp_b == 2 ) ctx += 2;
    return 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[77 + ctx] );
}

static int decode_cabac_p_mb_sub_type( H264Context *h ) {
    if( get_cabac( &h->cabac, &h->cabac_state[21] ) )
        return 0;   /* 8x8 */
    if( !get_cabac( &h->cabac, &h->cabac_state[22] ) )
        return 1;   /* 8x4 */
    if( get_cabac( &h->cabac, &h->cabac_state[23] ) )
        return 2;   /* 4x8 */
    return 3;       /* 4x4 */
}
static int decode_cabac_b_mb_sub_type( H264Context *h ) {
    int type;
    if( !get_cabac( &h->cabac, &h->cabac_state[36] ) )
        return 0;   /* B_Direct_8x8 */
    if( !get_cabac( &h->cabac, &h->cabac_state[37] ) )
        return 1 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L0_8x8, B_L1_8x8 */
    type = 3;
    if( get_cabac( &h->cabac, &h->cabac_state[38] ) ) {
        if( get_cabac( &h->cabac, &h->cabac_state[39] ) )
            return 11 + get_cabac( &h->cabac, &h->cabac_state[39] ); /* B_L1_4x4, B_Bi_4x4 */
        type += 4;
    }
    type += 2*get_cabac( &h->cabac, &h->cabac_state[39] );
    type +=   get_cabac( &h->cabac, &h->cabac_state[39] );
    return type;
}

static int decode_cabac_mb_ref( H264Context *h, int list, int n ) {
    int refa = h->ref_cache[list][scan8[n] - 1];
    int refb = h->ref_cache[list][scan8[n] - 8];
    int ref  = 0;
    int ctx  = 0;

    if( h->slice_type_nos == FF_B_TYPE) {
        if( refa > 0 && !(h->direct_cache[scan8[n] - 1]&(MB_TYPE_DIRECT2>>1)) )
            ctx++;
        if( refb > 0 && !(h->direct_cache[scan8[n] - 8]&(MB_TYPE_DIRECT2>>1)) )
            ctx += 2;
    } else {
        if( refa > 0 )
            ctx++;
        if( refb > 0 )
            ctx += 2;
    }

    while( get_cabac( &h->cabac, &h->cabac_state[54+ctx] ) ) {
        ref++;
        ctx = (ctx>>2)+4;
        if(ref >= 32 /*h->ref_list[list]*/){
            return -1;
        }
    }
    return ref;
}

static int decode_cabac_mb_mvd( H264Context *h, int ctxbase, int amvd, int *mvda) {
    int mvd;

    if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+((amvd-3)>>(INT_BIT-1))+((amvd-33)>>(INT_BIT-1))+2])){
//    if(!get_cabac(&h->cabac, &h->cabac_state[ctxbase+(amvd>2)+(amvd>32)])){
        *mvda= 0;
        return 0;
    }

    mvd= 1;
    ctxbase+= 3;
    while( mvd < 9 && get_cabac( &h->cabac, &h->cabac_state[ctxbase] ) ) {
        if( mvd < 4 )
            ctxbase++;
        mvd++;
    }

    if( mvd >= 9 ) {
        int k = 3;
        while( get_cabac_bypass( &h->cabac ) ) {
            mvd += 1 << k;
            k++;
            if(k>24){
                av_log(h->s.avctx, AV_LOG_ERROR, "overflow in decode_cabac_mb_mvd\n");
                return INT_MIN;
            }
        }
        while( k-- ) {
            mvd += get_cabac_bypass( &h->cabac )<<k;
        }
        *mvda=mvd < 70 ? mvd : 70;
    }else
        *mvda=mvd;
    return get_cabac_bypass_sign( &h->cabac, -mvd );
}

#define DECODE_CABAC_MB_MVD( h,  list,  n )\
{\
    int amvd0 = h->mvd_cache[list][scan8[n] - 1][0] +\
                h->mvd_cache[list][scan8[n] - 8][0];\
    int amvd1 = h->mvd_cache[list][scan8[n] - 1][1] +\
                h->mvd_cache[list][scan8[n] - 8][1];\
\
    mx += decode_cabac_mb_mvd( h, 40, amvd0, &mpx );\
    my += decode_cabac_mb_mvd( h, 47, amvd1, &mpy );\
}

static av_always_inline int get_cabac_cbf_ctx( H264Context *h, int cat, int idx, int is_dc ) {
    int nza, nzb;
    int ctx = 0;

    if( is_dc ) {
        if( cat == 0 ) {
            nza = h->left_cbp&0x100;
            nzb = h-> top_cbp&0x100;
        } else {
            nza = (h->left_cbp>>(6+idx))&0x01;
            nzb = (h-> top_cbp>>(6+idx))&0x01;
        }
    } else {
        assert(cat == 1 || cat == 2 || cat == 4);
        nza = h->non_zero_count_cache[scan8[idx] - 1];
        nzb = h->non_zero_count_cache[scan8[idx] - 8];
    }

    if( nza > 0 )
        ctx++;

    if( nzb > 0 )
        ctx += 2;

    return ctx + 4 * cat;
}

DECLARE_ASM_CONST(1, uint8_t, last_coeff_flag_offset_8x8)[63] = {
    0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
    2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
    3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4,
    5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8
};

static av_always_inline void decode_cabac_residual_internal( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff, int is_dc ) {
    static const int significant_coeff_flag_offset[2][6] = {
      { 105+0, 105+15, 105+29, 105+44, 105+47, 402 },
      { 277+0, 277+15, 277+29, 277+44, 277+47, 436 }
    };
    static const int last_coeff_flag_offset[2][6] = {
      { 166+0, 166+15, 166+29, 166+44, 166+47, 417 },
      { 338+0, 338+15, 338+29, 338+44, 338+47, 451 }
    };
    static const int coeff_abs_level_m1_offset[6] = {
        227+0, 227+10, 227+20, 227+30, 227+39, 426
    };
    static const uint8_t significant_coeff_flag_offset_8x8[2][63] = {
      { 0, 1, 2, 3, 4, 5, 5, 4, 4, 3, 3, 4, 4, 4, 5, 5,
        4, 4, 4, 4, 3, 3, 6, 7, 7, 7, 8, 9,10, 9, 8, 7,
        7, 6,11,12,13,11, 6, 7, 8, 9,14,10, 9, 8, 6,11,
       12,13,11, 6, 9,14,10, 9,11,12,13,11,14,10,12 },
      { 0, 1, 1, 2, 2, 3, 3, 4, 5, 6, 7, 7, 7, 8, 4, 5,
        6, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,11,12,11,
        9, 9,10,10, 8,11,12,11, 9, 9,10,10, 8,13,13, 9,
        9,10,10, 8,13,13, 9, 9,10,10,14,14,14,14,14 }
    };
    /* node ctx: 0..3: abslevel1 (with abslevelgt1 == 0).
     * 4..7: abslevelgt1 + 3 (and abslevel1 doesn't matter).
     * map node ctx => cabac ctx for level=1 */
    static const uint8_t coeff_abs_level1_ctx[8] = { 1, 2, 3, 4, 0, 0, 0, 0 };
    /* map node ctx => cabac ctx for level>1 */
    static const uint8_t coeff_abs_levelgt1_ctx[8] = { 5, 5, 5, 5, 6, 7, 8, 9 };
    static const uint8_t coeff_abs_level_transition[2][8] = {
    /* update node ctx after decoding a level=1 */
        { 1, 2, 3, 3, 4, 5, 6, 7 },
    /* update node ctx after decoding a level>1 */
        { 4, 4, 4, 4, 5, 6, 7, 7 }
    };

    int index[64];

    int av_unused last;
    int coeff_count = 0;
    int node_ctx = 0;

    uint8_t *significant_coeff_ctx_base;
    uint8_t *last_coeff_ctx_base;
    uint8_t *abs_level_m1_ctx_base;

#if !ARCH_X86
#define CABAC_ON_STACK
#endif
#ifdef CABAC_ON_STACK
#define CC &cc
    CABACContext cc;
    cc.range     = h->cabac.range;
    cc.low       = h->cabac.low;
    cc.bytestream= h->cabac.bytestream;
#else
#define CC &h->cabac
#endif


    /* cat: 0-> DC 16x16  n = 0
     *      1-> AC 16x16  n = luma4x4idx
     *      2-> Luma4x4   n = luma4x4idx
     *      3-> DC Chroma n = iCbCr
     *      4-> AC Chroma n = 16 + 4 * iCbCr + chroma4x4idx
     *      5-> Luma8x8   n = 4 * luma8x8idx
     */

    /* read coded block flag */
    if( is_dc || cat != 5 ) {
        if( get_cabac( CC, &h->cabac_state[85 + get_cabac_cbf_ctx( h, cat, n, is_dc ) ] ) == 0 ) {
            if( !is_dc )
                h->non_zero_count_cache[scan8[n]] = 0;

#ifdef CABAC_ON_STACK
            h->cabac.range     = cc.range     ;
            h->cabac.low       = cc.low       ;
            h->cabac.bytestream= cc.bytestream;
#endif
            return;
        }
    }

    significant_coeff_ctx_base = h->cabac_state
        + significant_coeff_flag_offset[MB_FIELD][cat];
    last_coeff_ctx_base = h->cabac_state
        + last_coeff_flag_offset[MB_FIELD][cat];
    abs_level_m1_ctx_base = h->cabac_state
        + coeff_abs_level_m1_offset[cat];

    if( !is_dc && cat == 5 ) {
#define DECODE_SIGNIFICANCE( coefs, sig_off, last_off ) \
        for(last= 0; last < coefs; last++) { \
            uint8_t *sig_ctx = significant_coeff_ctx_base + sig_off; \
            if( get_cabac( CC, sig_ctx )) { \
                uint8_t *last_ctx = last_coeff_ctx_base + last_off; \
                index[coeff_count++] = last; \
                if( get_cabac( CC, last_ctx ) ) { \
                    last= max_coeff; \
                    break; \
                } \
            } \
        }\
        if( last == max_coeff -1 ) {\
            index[coeff_count++] = last;\
        }
        const uint8_t *sig_off = significant_coeff_flag_offset_8x8[MB_FIELD];
#if ARCH_X86 && HAVE_7REGS && HAVE_EBX_AVAILABLE && !defined(BROKEN_RELOCATIONS)
        coeff_count= decode_significance_8x8_x86(CC, significant_coeff_ctx_base, index, sig_off);
    } else {
        coeff_count= decode_significance_x86(CC, max_coeff, significant_coeff_ctx_base, index);
#else
        DECODE_SIGNIFICANCE( 63, sig_off[last], last_coeff_flag_offset_8x8[last] );
    } else {
        DECODE_SIGNIFICANCE( max_coeff - 1, last, last );
#endif
    }
    assert(coeff_count > 0);

    if( is_dc ) {
        if( cat == 0 )
            h->cbp_table[h->mb_xy] |= 0x100;
        else
            h->cbp_table[h->mb_xy] |= 0x40 << n;
    } else {
        if( cat == 5 )
            fill_rectangle(&h->non_zero_count_cache[scan8[n]], 2, 2, 8, coeff_count, 1);
        else {
            assert( cat == 1 || cat == 2 || cat == 4 );
            h->non_zero_count_cache[scan8[n]] = coeff_count;
        }
    }

    do {
        uint8_t *ctx = coeff_abs_level1_ctx[node_ctx] + abs_level_m1_ctx_base;

        int j= scantable[index[--coeff_count]];

        if( get_cabac( CC, ctx ) == 0 ) {
            node_ctx = coeff_abs_level_transition[0][node_ctx];
            if( is_dc ) {
                block[j] = get_cabac_bypass_sign( CC, -1);
            }else{
                block[j] = (get_cabac_bypass_sign( CC, -qmul[j]) + 32) >> 6;
            }
        } else {
            int coeff_abs = 2;
            ctx = coeff_abs_levelgt1_ctx[node_ctx] + abs_level_m1_ctx_base;
            node_ctx = coeff_abs_level_transition[1][node_ctx];

            while( coeff_abs < 15 && get_cabac( CC, ctx ) ) {
                coeff_abs++;
            }

            if( coeff_abs >= 15 ) {
                int j = 0;
                while( get_cabac_bypass( CC ) ) {
                    j++;
                }

                coeff_abs=1;
                while( j-- ) {
                    coeff_abs += coeff_abs + get_cabac_bypass( CC );
                }
                coeff_abs+= 14;
            }

            if( is_dc ) {
                block[j] = get_cabac_bypass_sign( CC, -coeff_abs );
            }else{
                block[j] = (get_cabac_bypass_sign( CC, -coeff_abs ) * qmul[j] + 32) >> 6;
            }
        }
    } while( coeff_count );
#ifdef CABAC_ON_STACK
            h->cabac.range     = cc.range     ;
            h->cabac.low       = cc.low       ;
            h->cabac.bytestream= cc.bytestream;
#endif

}

static void decode_cabac_residual_dc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, int max_coeff ) {
    decode_cabac_residual_internal(h, block, cat, n, scantable, NULL, max_coeff, 1);
}

static void decode_cabac_residual_nondc( H264Context *h, DCTELEM *block, int cat, int n, const uint8_t *scantable, const uint32_t *qmul, int max_coeff ) {
    decode_cabac_residual_internal(h, block, cat, n, scantable, qmul, max_coeff, 0);
}

/**
 * decodes a macroblock
 * @return 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
 */
int ff_h264_decode_mb_cabac(H264Context *h) {
    MpegEncContext * const s = &h->s;
    int mb_xy;
    int mb_type, partition_count, cbp = 0;
    int dct8x8_allowed= h->pps.transform_8x8_mode;

    mb_xy = h->mb_xy = s->mb_x + s->mb_y*s->mb_stride;

    tprintf(s->avctx, "pic:%d mb:%d/%d\n", h->frame_num, s->mb_x, s->mb_y);
    if( h->slice_type_nos != FF_I_TYPE ) {
        int skip;
        /* a skipped mb needs the aff flag from the following mb */
        if( FRAME_MBAFF && (s->mb_y&1)==1 && h->prev_mb_skipped )
            skip = h->next_mb_skipped;
        else
            skip = decode_cabac_mb_skip( h, s->mb_x, s->mb_y );
        /* read skip flags */
        if( skip ) {
            if( FRAME_MBAFF && (s->mb_y&1)==0 ){
                s->current_picture.mb_type[mb_xy] = MB_TYPE_SKIP;
                h->next_mb_skipped = decode_cabac_mb_skip( h, s->mb_x, s->mb_y+1 );
                if(!h->next_mb_skipped)
                    h->mb_mbaff = h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
            }

            decode_mb_skip(h);

            h->cbp_table[mb_xy] = 0;
            h->chroma_pred_mode_table[mb_xy] = 0;
            h->last_qscale_diff = 0;

            return 0;

        }
    }
    if(FRAME_MBAFF){
        if( (s->mb_y&1) == 0 )
            h->mb_mbaff =
            h->mb_field_decoding_flag = decode_cabac_field_decoding_flag(h);
    }

    h->prev_mb_skipped = 0;

    fill_decode_neighbors(h, -(MB_FIELD));

    if( h->slice_type_nos == FF_B_TYPE ) {
        int ctx = 0;
        assert(h->slice_type_nos == FF_B_TYPE);

        if( !IS_DIRECT( h->left_type[0]-1 ) )
            ctx++;
        if( !IS_DIRECT( h->top_type-1 ) )
            ctx++;

        if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+ctx] ) ){
            mb_type= 0; /* B_Direct_16x16 */
        }else if( !get_cabac_noinline( &h->cabac, &h->cabac_state[27+3] ) ) {
            mb_type= 1 + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ); /* B_L[01]_16x16 */
        }else{
            int bits;
            bits = get_cabac_noinline( &h->cabac, &h->cabac_state[27+4] ) << 3;
            bits+= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 2;
            bits+= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] ) << 1;
            bits+= get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
            if( bits < 8 ){
                mb_type= bits + 3; /* B_Bi_16x16 through B_L1_L0_16x8 */
            }else if( bits == 13 ){
                mb_type= decode_cabac_intra_mb_type(h, 32, 0);
                goto decode_intra_mb;
            }else if( bits == 14 ){
                mb_type= 11; /* B_L1_L0_8x16 */
            }else if( bits == 15 ){
                mb_type= 22; /* B_8x8 */
            }else{
                bits= ( bits<<1 ) + get_cabac_noinline( &h->cabac, &h->cabac_state[27+5] );
                mb_type= bits - 4; /* B_L0_Bi_* through B_Bi_Bi_* */
            }
        }
            partition_count= b_mb_type_info[mb_type].partition_count;
            mb_type=         b_mb_type_info[mb_type].type;
    } else if( h->slice_type_nos == FF_P_TYPE ) {
        if( get_cabac_noinline( &h->cabac, &h->cabac_state[14] ) == 0 ) {
            /* P-type */
            if( get_cabac_noinline( &h->cabac, &h->cabac_state[15] ) == 0 ) {
                /* P_L0_D16x16, P_8x8 */
                mb_type= 3 * get_cabac_noinline( &h->cabac, &h->cabac_state[16] );
            } else {
                /* P_L0_D8x16, P_L0_D16x8 */
                mb_type= 2 - get_cabac_noinline( &h->cabac, &h->cabac_state[17] );
            }
            partition_count= p_mb_type_info[mb_type].partition_count;
            mb_type=         p_mb_type_info[mb_type].type;
        } else {
            mb_type= decode_cabac_intra_mb_type(h, 17, 0);
            goto decode_intra_mb;
        }
    } else {
        mb_type= decode_cabac_intra_mb_type(h, 3, 1);
        if(h->slice_type == FF_SI_TYPE && mb_type)
            mb_type--;
        assert(h->slice_type_nos == FF_I_TYPE);
decode_intra_mb:
        partition_count = 0;
        cbp= i_mb_type_info[mb_type].cbp;
        h->intra16x16_pred_mode= i_mb_type_info[mb_type].pred_mode;
        mb_type= i_mb_type_info[mb_type].type;
    }
    if(MB_FIELD)
        mb_type |= MB_TYPE_INTERLACED;

    h->slice_table[ mb_xy ]= h->slice_num;

    if(IS_INTRA_PCM(mb_type)) {
        const uint8_t *ptr;

        // We assume these blocks are very rare so we do not optimize it.
        // FIXME The two following lines get the bitstream position in the cabac
        // decode, I think it should be done by a function in cabac.h (or cabac.c).
        ptr= h->cabac.bytestream;
        if(h->cabac.low&0x1) ptr--;
        if(CABAC_BITS==16){
            if(h->cabac.low&0x1FF) ptr--;
        }

        // The pixels are stored in the same order as levels in h->mb array.
        memcpy(h->mb, ptr, 256); ptr+=256;
        if(CHROMA){
            memcpy(h->mb+128, ptr, 128); ptr+=128;
        }

        ff_init_cabac_decoder(&h->cabac, ptr, h->cabac.bytestream_end - ptr);

        // All blocks are present
        h->cbp_table[mb_xy] = 0x1ef;
        h->chroma_pred_mode_table[mb_xy] = 0;
        // In deblocking, the quantizer is 0
        s->current_picture.qscale_table[mb_xy]= 0;
        // All coeffs are present
        memset(h->non_zero_count[mb_xy], 16, 32);
        s->current_picture.mb_type[mb_xy]= mb_type;
        h->last_qscale_diff = 0;
        return 0;
    }

    if(MB_MBAFF){
        h->ref_count[0] <<= 1;
        h->ref_count[1] <<= 1;
    }

    fill_decode_caches(h, mb_type);

    if( IS_INTRA( mb_type ) ) {
        int i, pred_mode;
        if( IS_INTRA4x4( mb_type ) ) {
            if( dct8x8_allowed && get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] ) ) {
                mb_type |= MB_TYPE_8x8DCT;
                for( i = 0; i < 16; i+=4 ) {
                    int pred = pred_intra_mode( h, i );
                    int mode = decode_cabac_mb_intra4x4_pred_mode( h, pred );
                    fill_rectangle( &h->intra4x4_pred_mode_cache[ scan8[i] ], 2, 2, 8, mode, 1 );
                }
            } else {
                for( i = 0; i < 16; i++ ) {
                    int pred = pred_intra_mode( h, i );
                    h->intra4x4_pred_mode_cache[ scan8[i] ] = decode_cabac_mb_intra4x4_pred_mode( h, pred );

                //av_log( s->avctx, AV_LOG_ERROR, "i4x4 pred=%d mode=%d\n", pred, h->intra4x4_pred_mode_cache[ scan8[i] ] );
                }
            }
            ff_h264_write_back_intra_pred_mode(h);
            if( ff_h264_check_intra4x4_pred_mode(h) < 0 ) return -1;
        } else {
            h->intra16x16_pred_mode= ff_h264_check_intra_pred_mode( h, h->intra16x16_pred_mode );
            if( h->intra16x16_pred_mode < 0 ) return -1;
        }
        if(CHROMA){
            h->chroma_pred_mode_table[mb_xy] =
            pred_mode                        = decode_cabac_mb_chroma_pre_mode( h );

            pred_mode= ff_h264_check_intra_pred_mode( h, pred_mode );
            if( pred_mode < 0 ) return -1;
            h->chroma_pred_mode= pred_mode;
        }
    } else if( partition_count == 4 ) {
        int i, j, sub_partition_count[4], list, ref[2][4];

        if( h->slice_type_nos == FF_B_TYPE ) {
            for( i = 0; i < 4; i++ ) {
                h->sub_mb_type[i] = decode_cabac_b_mb_sub_type( h );
                sub_partition_count[i]= b_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
                h->sub_mb_type[i]=      b_sub_mb_type_info[ h->sub_mb_type[i] ].type;
            }
            if( IS_DIRECT(h->sub_mb_type[0] | h->sub_mb_type[1] |
                          h->sub_mb_type[2] | h->sub_mb_type[3]) ) {
                ff_h264_pred_direct_motion(h, &mb_type);
                h->ref_cache[0][scan8[4]] =
                h->ref_cache[1][scan8[4]] =
                h->ref_cache[0][scan8[12]] =
                h->ref_cache[1][scan8[12]] = PART_NOT_AVAILABLE;
                    for( i = 0; i < 4; i++ )
                        fill_rectangle( &h->direct_cache[scan8[4*i]], 2, 2, 8, (h->sub_mb_type[i]>>1)&0xFF, 1 );
            }
        } else {
            for( i = 0; i < 4; i++ ) {
                h->sub_mb_type[i] = decode_cabac_p_mb_sub_type( h );
                sub_partition_count[i]= p_sub_mb_type_info[ h->sub_mb_type[i] ].partition_count;
                h->sub_mb_type[i]=      p_sub_mb_type_info[ h->sub_mb_type[i] ].type;
            }
        }

        for( list = 0; list < h->list_count; list++ ) {
                for( i = 0; i < 4; i++ ) {
                    if(IS_DIRECT(h->sub_mb_type[i])) continue;
                    if(IS_DIR(h->sub_mb_type[i], 0, list)){
                        if( h->ref_count[list] > 1 ){
                            ref[list][i] = decode_cabac_mb_ref( h, list, 4*i );
                            if(ref[list][i] >= (unsigned)h->ref_count[list]){
                                av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref[list][i], h->ref_count[list]);
                                return -1;
                            }
                        }else
                            ref[list][i] = 0;
                    } else {
                        ref[list][i] = -1;
                    }
                                                       h->ref_cache[list][ scan8[4*i]+1 ]=
                    h->ref_cache[list][ scan8[4*i]+8 ]=h->ref_cache[list][ scan8[4*i]+9 ]= ref[list][i];
                }
        }

        if(dct8x8_allowed)
            dct8x8_allowed = get_dct8x8_allowed(h);

        for(list=0; list<h->list_count; list++){
            for(i=0; i<4; i++){
                h->ref_cache[list][ scan8[4*i]   ]=h->ref_cache[list][ scan8[4*i]+1 ];
                if(IS_DIRECT(h->sub_mb_type[i])){
                    fill_rectangle(h->mvd_cache[list][scan8[4*i]], 2, 2, 8, 0, 2);
                    continue;
                }

                if(IS_DIR(h->sub_mb_type[i], 0, list) && !IS_DIRECT(h->sub_mb_type[i])){
                    const int sub_mb_type= h->sub_mb_type[i];
                    const int block_width= (sub_mb_type & (MB_TYPE_16x16|MB_TYPE_16x8)) ? 2 : 1;
                    for(j=0; j<sub_partition_count[i]; j++){
                        int mpx, mpy;
                        int mx, my;
                        const int index= 4*i + block_width*j;
                        int16_t (* mv_cache)[2]= &h->mv_cache[list][ scan8[index] ];
                        uint8_t (* mvd_cache)[2]= &h->mvd_cache[list][ scan8[index] ];
                        pred_motion(h, index, block_width, list, h->ref_cache[list][ scan8[index] ], &mx, &my);
                        DECODE_CABAC_MB_MVD( h, list, index)
                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);

                        if(IS_SUB_8X8(sub_mb_type)){
                            mv_cache[ 1 ][0]=
                            mv_cache[ 8 ][0]= mv_cache[ 9 ][0]= mx;
                            mv_cache[ 1 ][1]=
                            mv_cache[ 8 ][1]= mv_cache[ 9 ][1]= my;

                            mvd_cache[ 1 ][0]=
                            mvd_cache[ 8 ][0]= mvd_cache[ 9 ][0]= mpx;
                            mvd_cache[ 1 ][1]=
                            mvd_cache[ 8 ][1]= mvd_cache[ 9 ][1]= mpy;
                        }else if(IS_SUB_8X4(sub_mb_type)){
                            mv_cache[ 1 ][0]= mx;
                            mv_cache[ 1 ][1]= my;

                            mvd_cache[ 1 ][0]=  mpx;
                            mvd_cache[ 1 ][1]= mpy;
                        }else if(IS_SUB_4X8(sub_mb_type)){
                            mv_cache[ 8 ][0]= mx;
                            mv_cache[ 8 ][1]= my;

                            mvd_cache[ 8 ][0]= mpx;
                            mvd_cache[ 8 ][1]= mpy;
                        }
                        mv_cache[ 0 ][0]= mx;
                        mv_cache[ 0 ][1]= my;

                        mvd_cache[ 0 ][0]= mpx;
                        mvd_cache[ 0 ][1]= mpy;
                    }
                }else{
                    fill_rectangle(h->mv_cache [list][ scan8[4*i] ], 2, 2, 8, 0, 4);
                    fill_rectangle(h->mvd_cache[list][ scan8[4*i] ], 2, 2, 8, 0, 2);
                }
            }
        }
    } else if( IS_DIRECT(mb_type) ) {
        ff_h264_pred_direct_motion(h, &mb_type);
        fill_rectangle(h->mvd_cache[0][scan8[0]], 4, 4, 8, 0, 2);
        fill_rectangle(h->mvd_cache[1][scan8[0]], 4, 4, 8, 0, 2);
        dct8x8_allowed &= h->sps.direct_8x8_inference_flag;
    } else {
        int list, i;
        if(IS_16X16(mb_type)){
            for(list=0; list<h->list_count; list++){
                if(IS_DIR(mb_type, 0, list)){
                    int ref;
                    if(h->ref_count[list] > 1){
                        ref= decode_cabac_mb_ref(h, list, 0);
                        if(ref >= (unsigned)h->ref_count[list]){
                            av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
                            return -1;
                        }
                    }else
                        ref=0;
                        fill_rectangle(&h->ref_cache[list][ scan8[0] ], 4, 4, 8, ref, 1);
                }
            }
            for(list=0; list<h->list_count; list++){
                if(IS_DIR(mb_type, 0, list)){
                    int mx,my,mpx,mpy;
                    pred_motion(h, 0, 4, list, h->ref_cache[list][ scan8[0] ], &mx, &my);
                    DECODE_CABAC_MB_MVD( h, list, 0)
                    tprintf(s->avctx, "final mv:%d %d\n", mx, my);

                    fill_rectangle(h->mvd_cache[list][ scan8[0] ], 4, 4, 8, pack8to16(mpx,mpy), 2);
                    fill_rectangle(h->mv_cache[list][ scan8[0] ], 4, 4, 8, pack16to32(mx,my), 4);
                }
            }
        }
        else if(IS_16X8(mb_type)){
            for(list=0; list<h->list_count; list++){
                    for(i=0; i<2; i++){
                        if(IS_DIR(mb_type, i, list)){
                            int ref;
                            if(h->ref_count[list] > 1){
                                ref= decode_cabac_mb_ref( h, list, 8*i );
                                if(ref >= (unsigned)h->ref_count[list]){
                                    av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
                                    return -1;
                                }
                            }else
                                ref=0;
                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, ref, 1);
                        }else
                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 16*i ], 4, 2, 8, (LIST_NOT_USED&0xFF), 1);
                    }
            }
            for(list=0; list<h->list_count; list++){
                for(i=0; i<2; i++){
                    if(IS_DIR(mb_type, i, list)){
                        int mx,my,mpx,mpy;
                        pred_16x8_motion(h, 8*i, list, h->ref_cache[list][scan8[0] + 16*i], &mx, &my);
                        DECODE_CABAC_MB_MVD( h, list, 8*i)
                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);

                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack8to16(mpx,mpy), 2);
                        fill_rectangle(h->mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, pack16to32(mx,my), 4);
                    }else{
                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 2);
                        fill_rectangle(h-> mv_cache[list][ scan8[0] + 16*i ], 4, 2, 8, 0, 4);
                    }
                }
            }
        }else{
            assert(IS_8X16(mb_type));
            for(list=0; list<h->list_count; list++){
                    for(i=0; i<2; i++){
                        if(IS_DIR(mb_type, i, list)){ //FIXME optimize
                            int ref;
                            if(h->ref_count[list] > 1){
                                ref= decode_cabac_mb_ref( h, list, 4*i );
                                if(ref >= (unsigned)h->ref_count[list]){
                                    av_log(s->avctx, AV_LOG_ERROR, "Reference %d >= %d\n", ref, h->ref_count[list]);
                                    return -1;
                                }
                            }else
                                ref=0;
                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, ref, 1);
                        }else
                            fill_rectangle(&h->ref_cache[list][ scan8[0] + 2*i ], 2, 4, 8, (LIST_NOT_USED&0xFF), 1);
                    }
            }
            for(list=0; list<h->list_count; list++){
                for(i=0; i<2; i++){
                    if(IS_DIR(mb_type, i, list)){
                        int mx,my,mpx,mpy;
                        pred_8x16_motion(h, i*4, list, h->ref_cache[list][ scan8[0] + 2*i ], &mx, &my);
                        DECODE_CABAC_MB_MVD( h, list, 4*i)

                        tprintf(s->avctx, "final mv:%d %d\n", mx, my);
                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack8to16(mpx,mpy), 2);
                        fill_rectangle(h->mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, pack16to32(mx,my), 4);
                    }else{
                        fill_rectangle(h->mvd_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 2);
                        fill_rectangle(h-> mv_cache[list][ scan8[0] + 2*i ], 2, 4, 8, 0, 4);
                    }
                }
            }
        }
    }

   if( IS_INTER( mb_type ) ) {
        h->chroma_pred_mode_table[mb_xy] = 0;
        write_back_motion( h, mb_type );
   }

    if( !IS_INTRA16x16( mb_type ) ) {
        cbp  = decode_cabac_mb_cbp_luma( h );
        if(CHROMA)
            cbp |= decode_cabac_mb_cbp_chroma( h ) << 4;
    }

    h->cbp_table[mb_xy] = h->cbp = cbp;

    if( dct8x8_allowed && (cbp&15) && !IS_INTRA( mb_type ) ) {
        mb_type |= MB_TYPE_8x8DCT * get_cabac_noinline( &h->cabac, &h->cabac_state[399 + h->neighbor_transform_size] );
    }
    s->current_picture.mb_type[mb_xy]= mb_type;

    if( cbp || IS_INTRA16x16( mb_type ) ) {
        const uint8_t *scan, *scan8x8, *dc_scan;
        const uint32_t *qmul;

        if(IS_INTERLACED(mb_type)){
            scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0;
            scan= s->qscale ? h->field_scan : h->field_scan_q0;
            dc_scan= luma_dc_field_scan;
        }else{
            scan8x8= s->qscale ? h->zigzag_scan8x8 : h->zigzag_scan8x8_q0;
            scan= s->qscale ? h->zigzag_scan : h->zigzag_scan_q0;
            dc_scan= luma_dc_zigzag_scan;
        }

        // decode_cabac_mb_dqp
        if(get_cabac_noinline( &h->cabac, &h->cabac_state[60 + (h->last_qscale_diff != 0)])){
            int val = 1;
            int ctx= 2;

            while( get_cabac_noinline( &h->cabac, &h->cabac_state[60 + ctx] ) ) {
                ctx= 3;
                val++;
                if(val > 102){ //prevent infinite loop
                    av_log(h->s.avctx, AV_LOG_ERROR, "cabac decode of qscale diff failed at %d %d\n", s->mb_x, s->mb_y);
                    return -1;
                }
            }

            if( val&0x01 )
                val=   (val + 1)>>1 ;
            else
                val= -((val + 1)>>1);
            h->last_qscale_diff = val;
            s->qscale += val;
            if(((unsigned)s->qscale) > 51){
                if(s->qscale<0) s->qscale+= 52;
                else            s->qscale-= 52;
            }
            h->chroma_qp[0] = get_chroma_qp(h, 0, s->qscale);
            h->chroma_qp[1] = get_chroma_qp(h, 1, s->qscale);
        }else
            h->last_qscale_diff=0;

        if( IS_INTRA16x16( mb_type ) ) {
            int i;
            //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" );
            decode_cabac_residual_dc( h, h->mb, 0, 0, dc_scan, 16);

            if( cbp&15 ) {
                qmul = h->dequant4_coeff[0][s->qscale];
                for( i = 0; i < 16; i++ ) {
                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i );
                    decode_cabac_residual_nondc(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15);
                }
            } else {
                fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1);
            }
        } else {
            int i8x8, i4x4;
            for( i8x8 = 0; i8x8 < 4; i8x8++ ) {
                if( cbp & (1<<i8x8) ) {
                    if( IS_8x8DCT(mb_type) ) {
                        decode_cabac_residual_nondc(h, h->mb + 64*i8x8, 5, 4*i8x8,
                            scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64);
                    } else {
                        qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale];
                        for( i4x4 = 0; i4x4 < 4; i4x4++ ) {
                            const int index = 4*i8x8 + i4x4;
                            //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index );
//START_TIMER
                            decode_cabac_residual_nondc(h, h->mb + 16*index, 2, index, scan, qmul, 16);
//STOP_TIMER("decode_residual")
                        }
                    }
                } else {
                    uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ];
                    nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0;
                }
            }
        }

        if( cbp&0x30 ){
            int c;
            for( c = 0; c < 2; c++ ) {
                //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-DC\n",c );
                decode_cabac_residual_dc(h, h->mb + 256 + 16*4*c, 3, c, chroma_dc_scan, 4);
            }
        }

        if( cbp&0x20 ) {
            int c, i;
            for( c = 0; c < 2; c++ ) {
                qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]];
                for( i = 0; i < 4; i++ ) {
                    const int index = 16 + 4 * c + i;
                    //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 );
                    decode_cabac_residual_nondc(h, h->mb + 16*index, 4, index, scan + 1, qmul, 15);
                }
            }
        } else {
            uint8_t * const nnz= &h->non_zero_count_cache[0];
            nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
            nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
        }
    } else {
        uint8_t * const nnz= &h->non_zero_count_cache[0];
        fill_rectangle(&nnz[scan8[0]], 4, 4, 8, 0, 1);
        nnz[ scan8[16]+0 ] = nnz[ scan8[16]+1 ] =nnz[ scan8[16]+8 ] =nnz[ scan8[16]+9 ] =
        nnz[ scan8[20]+0 ] = nnz[ scan8[20]+1 ] =nnz[ scan8[20]+8 ] =nnz[ scan8[20]+9 ] = 0;
        h->last_qscale_diff = 0;
    }

    s->current_picture.qscale_table[mb_xy]= s->qscale;
    write_back_non_zero_count(h);

    if(MB_MBAFF){
        h->ref_count[0] >>= 1;
        h->ref_count[1] >>= 1;
    }

    return 0;
}