view 4xm.c @ 3990:746a60ba3177 libavcodec

enable CMOV_IS_FAST as its faster or equal speed on every cpu (duron, athlon, PM, P3) from which ive seen benchmarks, it might be slower on P4 but noone has posted benchmarks ...
author michael
date Wed, 11 Oct 2006 12:23:40 +0000
parents c8c591fe26f8
children 0022b81fc643
line wrap: on
line source

/*
 * 4XM codec
 * Copyright (c) 2003 Michael Niedermayer
 *
 * This file is part of FFmpeg.
 *
 * FFmpeg is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * FFmpeg is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with FFmpeg; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
 */

/**
 * @file 4xm.c
 * 4XM codec.
 */

#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"

//#undef NDEBUG
//#include <assert.h>

#define BLOCK_TYPE_VLC_BITS 5
#define ACDC_VLC_BITS 9

#define CFRAME_BUFFER_COUNT 100

static const uint8_t block_type_tab[4][8][2]={
  {   //{8,4,2}x{8,4,2}
    { 0,1}, { 2,2}, { 6,3}, {14,4}, {30,5}, {31,5}, { 0,0}
  },{ //{8,4}x1
    { 0,1}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}, { 0,0}
  },{ //1x{8,4}
    { 0,1}, { 2,2}, { 0,0}, { 6,3}, {14,4}, {15,4}, { 0,0}
  },{ //1x2, 2x1
    { 0,1}, { 0,0}, { 0,0}, { 2,2}, { 6,3}, {14,4}, {15,4}
  }
};

static const uint8_t size2index[4][4]={
  {-1, 3, 1, 1},
  { 3, 0, 0, 0},
  { 2, 0, 0, 0},
  { 2, 0, 0, 0},
};

static const int8_t mv[256][2]={
{  0,  0},{  0, -1},{ -1,  0},{  1,  0},{  0,  1},{ -1, -1},{  1, -1},{ -1,  1},
{  1,  1},{  0, -2},{ -2,  0},{  2,  0},{  0,  2},{ -1, -2},{  1, -2},{ -2, -1},
{  2, -1},{ -2,  1},{  2,  1},{ -1,  2},{  1,  2},{ -2, -2},{  2, -2},{ -2,  2},
{  2,  2},{  0, -3},{ -3,  0},{  3,  0},{  0,  3},{ -1, -3},{  1, -3},{ -3, -1},
{  3, -1},{ -3,  1},{  3,  1},{ -1,  3},{  1,  3},{ -2, -3},{  2, -3},{ -3, -2},
{  3, -2},{ -3,  2},{  3,  2},{ -2,  3},{  2,  3},{  0, -4},{ -4,  0},{  4,  0},
{  0,  4},{ -1, -4},{  1, -4},{ -4, -1},{  4, -1},{  4,  1},{ -1,  4},{  1,  4},
{ -3, -3},{ -3,  3},{  3,  3},{ -2, -4},{ -4, -2},{  4, -2},{ -4,  2},{ -2,  4},
{  2,  4},{ -3, -4},{  3, -4},{  4, -3},{ -5,  0},{ -4,  3},{ -3,  4},{  3,  4},
{ -1, -5},{ -5, -1},{ -5,  1},{ -1,  5},{ -2, -5},{  2, -5},{  5, -2},{  5,  2},
{ -4, -4},{ -4,  4},{ -3, -5},{ -5, -3},{ -5,  3},{  3,  5},{ -6,  0},{  0,  6},
{ -6, -1},{ -6,  1},{  1,  6},{  2, -6},{ -6,  2},{  2,  6},{ -5, -4},{  5,  4},
{  4,  5},{ -6, -3},{  6,  3},{ -7,  0},{ -1, -7},{  5, -5},{ -7,  1},{ -1,  7},
{  4, -6},{  6,  4},{ -2, -7},{ -7,  2},{ -3, -7},{  7, -3},{  3,  7},{  6, -5},
{  0, -8},{ -1, -8},{ -7, -4},{ -8,  1},{  4,  7},{  2, -8},{ -2,  8},{  6,  6},
{ -8,  3},{  5, -7},{ -5,  7},{  8, -4},{  0, -9},{ -9, -1},{  1,  9},{  7, -6},
{ -7,  6},{ -5, -8},{ -5,  8},{ -9,  3},{  9, -4},{  7, -7},{  8, -6},{  6,  8},
{ 10,  1},{-10,  2},{  9, -5},{ 10, -3},{ -8, -7},{-10, -4},{  6, -9},{-11,  0},
{ 11,  1},{-11, -2},{ -2, 11},{  7, -9},{ -7,  9},{ 10,  6},{ -4, 11},{  8, -9},
{  8,  9},{  5, 11},{  7,-10},{ 12, -3},{ 11,  6},{ -9, -9},{  8, 10},{  5, 12},
{-11,  7},{ 13,  2},{  6,-12},{ 10,  9},{-11,  8},{ -7, 12},{  0, 14},{ 14, -2},
{ -9, 11},{ -6, 13},{-14, -4},{ -5,-14},{  5, 14},{-15, -1},{-14, -6},{  3,-15},
{ 11,-11},{ -7, 14},{ -5, 15},{  8,-14},{ 15,  6},{  3, 16},{  7,-15},{-16,  5},
{  0, 17},{-16, -6},{-10, 14},{-16,  7},{ 12, 13},{-16,  8},{-17,  6},{-18,  3},
{ -7, 17},{ 15, 11},{ 16, 10},{  2,-19},{  3,-19},{-11,-16},{-18,  8},{-19, -6},
{  2,-20},{-17,-11},{-10,-18},{  8, 19},{-21, -1},{-20,  7},{ -4, 21},{ 21,  5},
{ 15, 16},{  2,-22},{-10,-20},{-22,  5},{ 20,-11},{ -7,-22},{-12, 20},{ 23, -5},
{ 13,-20},{ 24, -2},{-15, 19},{-11, 22},{ 16, 19},{ 23,-10},{-18,-18},{ -9,-24},
{ 24,-10},{ -3, 26},{-23, 13},{-18,-20},{ 17, 21},{ -4, 27},{ 27,  6},{  1,-28},
{-11, 26},{-17,-23},{  7, 28},{ 11,-27},{ 29,  5},{-23,-19},{-28,-11},{-21, 22},
{-30,  7},{-17, 26},{-27, 16},{ 13, 29},{ 19,-26},{ 10,-31},{-14,-30},{ 20,-27},
{-29, 18},{-16,-31},{-28,-22},{ 21,-30},{-25, 28},{ 26,-29},{ 25,-32},{-32,-32}
};

// this is simply the scaled down elementwise product of the standard jpeg quantizer table and the AAN premul table
static const uint8_t dequant_table[64]={
 16, 15, 13, 19, 24, 31, 28, 17,
 17, 23, 25, 31, 36, 63, 45, 21,
 18, 24, 27, 37, 52, 59, 49, 20,
 16, 28, 34, 40, 60, 80, 51, 20,
 18, 31, 48, 66, 68, 86, 56, 21,
 19, 38, 56, 59, 64, 64, 48, 20,
 27, 48, 55, 55, 56, 51, 35, 15,
 20, 35, 34, 32, 31, 22, 15,  8,
};

static VLC block_type_vlc[4];


typedef struct CFrameBuffer{
    unsigned int allocated_size;
    unsigned int size;
    int id;
    uint8_t *data;
}CFrameBuffer;

typedef struct FourXContext{
    AVCodecContext *avctx;
    DSPContext dsp;
    AVFrame current_picture, last_picture;
    GetBitContext pre_gb;          ///< ac/dc prefix
    GetBitContext gb;
    uint8_t *bytestream;
    uint16_t *wordstream;
    int mv[256];
    VLC pre_vlc;
    int last_dc;
    DECLARE_ALIGNED_8(DCTELEM, block[6][64]);
    uint8_t *bitstream_buffer;
    unsigned int bitstream_buffer_size;
    CFrameBuffer cfrm[CFRAME_BUFFER_COUNT];
} FourXContext;


#define FIX_1_082392200  70936
#define FIX_1_414213562  92682
#define FIX_1_847759065 121095
#define FIX_2_613125930 171254

#define MULTIPLY(var,const)  (((var)*(const)) >> 16)

static void idct(DCTELEM block[64]){
    int tmp0, tmp1, tmp2, tmp3, tmp4, tmp5, tmp6, tmp7;
    int tmp10, tmp11, tmp12, tmp13;
    int z5, z10, z11, z12, z13;
    int i;
    int temp[64];

    for(i=0; i<8; i++){
        tmp10 = block[8*0 + i] + block[8*4 + i];
        tmp11 = block[8*0 + i] - block[8*4 + i];

        tmp13 =          block[8*2 + i] + block[8*6 + i];
        tmp12 = MULTIPLY(block[8*2 + i] - block[8*6 + i], FIX_1_414213562) - tmp13;

        tmp0 = tmp10 + tmp13;
        tmp3 = tmp10 - tmp13;
        tmp1 = tmp11 + tmp12;
        tmp2 = tmp11 - tmp12;

        z13 = block[8*5 + i] + block[8*3 + i];
        z10 = block[8*5 + i] - block[8*3 + i];
        z11 = block[8*1 + i] + block[8*7 + i];
        z12 = block[8*1 + i] - block[8*7 + i];

        tmp7  =          z11 + z13;
        tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);

        z5    = MULTIPLY(z10 + z12, FIX_1_847759065);
        tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
        tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;

        tmp6 = tmp12 - tmp7;
        tmp5 = tmp11 - tmp6;
        tmp4 = tmp10 + tmp5;

        temp[8*0 + i] = tmp0 + tmp7;
        temp[8*7 + i] = tmp0 - tmp7;
        temp[8*1 + i] = tmp1 + tmp6;
        temp[8*6 + i] = tmp1 - tmp6;
        temp[8*2 + i] = tmp2 + tmp5;
        temp[8*5 + i] = tmp2 - tmp5;
        temp[8*4 + i] = tmp3 + tmp4;
        temp[8*3 + i] = tmp3 - tmp4;
    }

    for(i=0; i<8*8; i+=8){
        tmp10 = temp[0 + i] + temp[4 + i];
        tmp11 = temp[0 + i] - temp[4 + i];

        tmp13 = temp[2 + i] + temp[6 + i];
        tmp12 = MULTIPLY(temp[2 + i] - temp[6 + i], FIX_1_414213562) - tmp13;

        tmp0 = tmp10 + tmp13;
        tmp3 = tmp10 - tmp13;
        tmp1 = tmp11 + tmp12;
        tmp2 = tmp11 - tmp12;

        z13 = temp[5 + i] + temp[3 + i];
        z10 = temp[5 + i] - temp[3 + i];
        z11 = temp[1 + i] + temp[7 + i];
        z12 = temp[1 + i] - temp[7 + i];

        tmp7 = z11 + z13;
        tmp11 = MULTIPLY(z11 - z13, FIX_1_414213562);

        z5 = MULTIPLY(z10 + z12, FIX_1_847759065);
        tmp10 = MULTIPLY(z12, FIX_1_082392200) - z5;
        tmp12 = MULTIPLY(z10, - FIX_2_613125930) + z5;

        tmp6 = tmp12 - tmp7;
        tmp5 = tmp11 - tmp6;
        tmp4 = tmp10 + tmp5;

        block[0 + i] = (tmp0 + tmp7)>>6;
        block[7 + i] = (tmp0 - tmp7)>>6;
        block[1 + i] = (tmp1 + tmp6)>>6;
        block[6 + i] = (tmp1 - tmp6)>>6;
        block[2 + i] = (tmp2 + tmp5)>>6;
        block[5 + i] = (tmp2 - tmp5)>>6;
        block[4 + i] = (tmp3 + tmp4)>>6;
        block[3 + i] = (tmp3 - tmp4)>>6;
    }
}

static void init_vlcs(FourXContext *f){
    int i;

    for(i=0; i<4; i++){
        init_vlc(&block_type_vlc[i], BLOCK_TYPE_VLC_BITS, 7,
                 &block_type_tab[i][0][1], 2, 1,
                 &block_type_tab[i][0][0], 2, 1, 1);
    }
}

static void init_mv(FourXContext *f){
    int i;

    for(i=0; i<256; i++){
        f->mv[i] = mv[i][0] + mv[i][1]*f->current_picture.linesize[0]/2;
    }
}

static inline void mcdc(uint16_t *dst, uint16_t *src, int log2w, int h, int stride, int scale, int dc){
   int i;
   dc*= 0x10001;

   switch(log2w){
   case 0:
        for(i=0; i<h; i++){
            dst[0] = scale*src[0] + dc;
            if(scale) src += stride;
            dst += stride;
        }
        break;
    case 1:
        for(i=0; i<h; i++){
            ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
            if(scale) src += stride;
            dst += stride;
        }
        break;
    case 2:
        for(i=0; i<h; i++){
            ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
            ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc;
            if(scale) src += stride;
            dst += stride;
        }
        break;
    case 3:
        for(i=0; i<h; i++){
            ((uint32_t*)dst)[0] = scale*((uint32_t*)src)[0] + dc;
            ((uint32_t*)dst)[1] = scale*((uint32_t*)src)[1] + dc;
            ((uint32_t*)dst)[2] = scale*((uint32_t*)src)[2] + dc;
            ((uint32_t*)dst)[3] = scale*((uint32_t*)src)[3] + dc;
            if(scale) src += stride;
            dst += stride;
        }
        break;
    default: assert(0);
    }
}

static void decode_p_block(FourXContext *f, uint16_t *dst, uint16_t *src, int log2w, int log2h, int stride){
    const int index= size2index[log2h][log2w];
    const int h= 1<<log2h;
    int code= get_vlc2(&f->gb, block_type_vlc[index].table, BLOCK_TYPE_VLC_BITS, 1);

    assert(code>=0 && code<=6);

    if(code == 0){
        src += f->mv[ *f->bytestream++ ];
        mcdc(dst, src, log2w, h, stride, 1, 0);
    }else if(code == 1){
        log2h--;
        decode_p_block(f, dst                  , src                  , log2w, log2h, stride);
        decode_p_block(f, dst + (stride<<log2h), src + (stride<<log2h), log2w, log2h, stride);
    }else if(code == 2){
        log2w--;
        decode_p_block(f, dst             , src             , log2w, log2h, stride);
        decode_p_block(f, dst + (1<<log2w), src + (1<<log2w), log2w, log2h, stride);
    }else if(code == 4){
        src += f->mv[ *f->bytestream++ ];
        mcdc(dst, src, log2w, h, stride, 1, le2me_16(*f->wordstream++));
    }else if(code == 5){
        mcdc(dst, src, log2w, h, stride, 0, le2me_16(*f->wordstream++));
    }else if(code == 6){
        if(log2w){
            dst[0] = le2me_16(*f->wordstream++);
            dst[1] = le2me_16(*f->wordstream++);
        }else{
            dst[0     ] = le2me_16(*f->wordstream++);
            dst[stride] = le2me_16(*f->wordstream++);
        }
    }
}

static int get32(void *p){
    return le2me_32(*(uint32_t*)p);
}

static int decode_p_frame(FourXContext *f, uint8_t *buf, int length){
    int x, y;
    const int width= f->avctx->width;
    const int height= f->avctx->height;
    uint16_t *src= (uint16_t*)f->last_picture.data[0];
    uint16_t *dst= (uint16_t*)f->current_picture.data[0];
    const int stride= f->current_picture.linesize[0]>>1;
    const unsigned int bitstream_size= get32(buf+8);
    const unsigned int bytestream_size= get32(buf+16);
    const unsigned int wordstream_size= get32(buf+12);

    if(bitstream_size+ bytestream_size+ wordstream_size + 20 != length
       || bitstream_size  > (1<<26)
       || bytestream_size > (1<<26)
       || wordstream_size > (1<<26)
       ){
        av_log(f->avctx, AV_LOG_ERROR, "lengths %d %d %d %d\n", bitstream_size, bytestream_size, wordstream_size,
        bitstream_size+ bytestream_size+ wordstream_size - length);
        return -1;
    }

    f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, bitstream_size + FF_INPUT_BUFFER_PADDING_SIZE);
    f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)(buf + 20), bitstream_size/4);
    init_get_bits(&f->gb, f->bitstream_buffer, 8*bitstream_size);

    f->wordstream= (uint16_t*)(buf + 20 + bitstream_size);
    f->bytestream= buf + 20 + bitstream_size + wordstream_size;

    init_mv(f);

    for(y=0; y<height; y+=8){
        for(x=0; x<width; x+=8){
            decode_p_block(f, dst + x, src + x, 3, 3, stride);
        }
        src += 8*stride;
        dst += 8*stride;
    }

    if(bitstream_size != (get_bits_count(&f->gb)+31)/32*4)
        av_log(f->avctx, AV_LOG_ERROR, " %d %td %td bytes left\n",
            bitstream_size - (get_bits_count(&f->gb)+31)/32*4,
            bytestream_size - (f->bytestream - (buf + 20 + bitstream_size + wordstream_size)),
            wordstream_size - (((uint8_t*)f->wordstream) - (buf + 20 + bitstream_size))
        );

    return 0;
}

/**
 * decode block and dequantize.
 * Note this is allmost identical to mjpeg
 */
static int decode_i_block(FourXContext *f, DCTELEM *block){
    int code, i, j, level, val;

    /* DC coef */
    val = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);
    if (val>>4){
        av_log(f->avctx, AV_LOG_ERROR, "error dc run != 0\n");
    }

    if(val)
        val = get_xbits(&f->gb, val);

    val = val * dequant_table[0] + f->last_dc;
    f->last_dc =
    block[0] = val;
    /* AC coefs */
    i = 1;
    for(;;) {
        code = get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3);

        /* EOB */
        if (code == 0)
            break;
        if (code == 0xf0) {
            i += 16;
        } else {
            level = get_xbits(&f->gb, code & 0xf);
            i += code >> 4;
            if (i >= 64) {
                av_log(f->avctx, AV_LOG_ERROR, "run %d oveflow\n", i);
                return 0;
            }

            j= ff_zigzag_direct[i];
            block[j] = level * dequant_table[j];
            i++;
            if (i >= 64)
                break;
        }
    }

    return 0;
}

static inline void idct_put(FourXContext *f, int x, int y){
    DCTELEM (*block)[64]= f->block;
    int stride= f->current_picture.linesize[0]>>1;
    int i;
    uint16_t *dst = ((uint16_t*)f->current_picture.data[0]) + y * stride + x;

    for(i=0; i<4; i++){
        block[i][0] += 0x80*8*8;
        idct(block[i]);
    }

    if(!(f->avctx->flags&CODEC_FLAG_GRAY)){
        for(i=4; i<6; i++) idct(block[i]);
    }

/* Note transform is:
y= ( 1b + 4g + 2r)/14
cb=( 3b - 2g - 1r)/14
cr=(-1b - 4g + 5r)/14
*/
    for(y=0; y<8; y++){
        for(x=0; x<8; x++){
            DCTELEM *temp= block[(x>>2) + 2*(y>>2)] + 2*(x&3) + 2*8*(y&3); //FIXME optimize
            int cb= block[4][x + 8*y];
            int cr= block[5][x + 8*y];
            int cg= (cb + cr)>>1;
            int y;

            cb+=cb;

            y = temp[0];
            dst[0       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
            y = temp[1];
            dst[1       ]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
            y = temp[8];
            dst[  stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
            y = temp[9];
            dst[1+stride]= ((y+cb)>>3) + (((y-cg)&0xFC)<<3) + (((y+cr)&0xF8)<<8);
            dst += 2;
        }
        dst += 2*stride - 2*8;
    }
}

static int decode_i_mb(FourXContext *f){
    int i;

    f->dsp.clear_blocks(f->block[0]);

    for(i=0; i<6; i++){
        if(decode_i_block(f, f->block[i]) < 0)
            return -1;
    }

    return 0;
}

static uint8_t *read_huffman_tables(FourXContext *f, uint8_t * const buf){
    int frequency[512];
    uint8_t flag[512];
    int up[512];
    uint8_t len_tab[257];
    int bits_tab[257];
    int start, end;
    uint8_t *ptr= buf;
    int j;

    memset(frequency, 0, sizeof(frequency));
    memset(up, -1, sizeof(up));

    start= *ptr++;
    end= *ptr++;
    for(;;){
        int i;

        for(i=start; i<=end; i++){
            frequency[i]= *ptr++;
//            printf("%d %d %d\n", start, end, frequency[i]);
        }
        start= *ptr++;
        if(start==0) break;

        end= *ptr++;
    }
    frequency[256]=1;

    while((ptr - buf)&3) ptr++; // 4byte align

//    for(j=0; j<16; j++)
//        printf("%2X", ptr[j]);

    for(j=257; j<512; j++){
        int min_freq[2]= {256*256, 256*256};
        int smallest[2]= {0, 0};
        int i;
        for(i=0; i<j; i++){
            if(frequency[i] == 0) continue;
            if(frequency[i] < min_freq[1]){
                if(frequency[i] < min_freq[0]){
                    min_freq[1]= min_freq[0]; smallest[1]= smallest[0];
                    min_freq[0]= frequency[i];smallest[0]= i;
                }else{
                    min_freq[1]= frequency[i];smallest[1]= i;
                }
            }
        }
        if(min_freq[1] == 256*256) break;

        frequency[j]= min_freq[0] + min_freq[1];
        flag[ smallest[0] ]= 0;
        flag[ smallest[1] ]= 1;
        up[ smallest[0] ]=
        up[ smallest[1] ]= j;
        frequency[ smallest[0] ]= frequency[ smallest[1] ]= 0;
    }

    for(j=0; j<257; j++){
        int node;
        int len=0;
        int bits=0;

        for(node= j; up[node] != -1; node= up[node]){
            bits += flag[node]<<len;
            len++;
            if(len > 31) av_log(f->avctx, AV_LOG_ERROR, "vlc length overflow\n"); //can this happen at all ?
        }

        bits_tab[j]= bits;
        len_tab[j]= len;
    }

    init_vlc(&f->pre_vlc, ACDC_VLC_BITS, 257,
             len_tab , 1, 1,
             bits_tab, 4, 4, 0);

    return ptr;
}

static int decode_i_frame(FourXContext *f, uint8_t *buf, int length){
    int x, y;
    const int width= f->avctx->width;
    const int height= f->avctx->height;
    uint16_t *dst= (uint16_t*)f->current_picture.data[0];
    const int stride= f->current_picture.linesize[0]>>1;
    const unsigned int bitstream_size= get32(buf);
    const int token_count __attribute__((unused)) = get32(buf + bitstream_size + 8);
    unsigned int prestream_size= 4*get32(buf + bitstream_size + 4);
    uint8_t *prestream= buf + bitstream_size + 12;

    if(prestream_size + bitstream_size + 12 != length
       || bitstream_size > (1<<26)
       || prestream_size > (1<<26)){
        av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d %d\n", prestream_size, bitstream_size, length);
        return -1;
    }

    prestream= read_huffman_tables(f, prestream);

    init_get_bits(&f->gb, buf + 4, 8*bitstream_size);

    prestream_size= length + buf - prestream;

    f->bitstream_buffer= av_fast_realloc(f->bitstream_buffer, &f->bitstream_buffer_size, prestream_size + FF_INPUT_BUFFER_PADDING_SIZE);
    f->dsp.bswap_buf((uint32_t*)f->bitstream_buffer, (uint32_t*)prestream, prestream_size/4);
    init_get_bits(&f->pre_gb, f->bitstream_buffer, 8*prestream_size);

    f->last_dc= 0*128*8*8;

    for(y=0; y<height; y+=16){
        for(x=0; x<width; x+=16){
            if(decode_i_mb(f) < 0)
                return -1;

            idct_put(f, x, y);
        }
        dst += 16*stride;
    }

    if(get_vlc2(&f->pre_gb, f->pre_vlc.table, ACDC_VLC_BITS, 3) != 256)
        av_log(f->avctx, AV_LOG_ERROR, "end mismatch\n");

    return 0;
}

static int decode_frame(AVCodecContext *avctx,
                        void *data, int *data_size,
                        uint8_t *buf, int buf_size)
{
    FourXContext * const f = avctx->priv_data;
    AVFrame *picture = data;
    AVFrame *p, temp;
    int i, frame_4cc, frame_size;

    frame_4cc= get32(buf);
    if(buf_size != get32(buf+4)+8 || buf_size < 20){
        av_log(f->avctx, AV_LOG_ERROR, "size mismatch %d %d\n", buf_size, get32(buf+4));
    }

    if(frame_4cc == ff_get_fourcc("cfrm")){
        int free_index=-1;
        const int data_size= buf_size - 20;
        const int id= get32(buf+12);
        const int whole_size= get32(buf+16);
        CFrameBuffer *cfrm;

        for(i=0; i<CFRAME_BUFFER_COUNT; i++){
            if(f->cfrm[i].id && f->cfrm[i].id < avctx->frame_number)
                av_log(f->avctx, AV_LOG_ERROR, "lost c frame %d\n", f->cfrm[i].id);
        }

        for(i=0; i<CFRAME_BUFFER_COUNT; i++){
            if(f->cfrm[i].id   == id) break;
            if(f->cfrm[i].size == 0 ) free_index= i;
        }

        if(i>=CFRAME_BUFFER_COUNT){
            i= free_index;
            f->cfrm[i].id= id;
        }
        cfrm= &f->cfrm[i];

        cfrm->data= av_fast_realloc(cfrm->data, &cfrm->allocated_size, cfrm->size + data_size + FF_INPUT_BUFFER_PADDING_SIZE);
        if(!cfrm->data){ //explicit check needed as memcpy below might not catch a NULL
            av_log(f->avctx, AV_LOG_ERROR, "realloc falure");
            return -1;
        }

        memcpy(cfrm->data + cfrm->size, buf+20, data_size);
        cfrm->size += data_size;

        if(cfrm->size >= whole_size){
            buf= cfrm->data;
            frame_size= cfrm->size;

            if(id != avctx->frame_number){
                av_log(f->avctx, AV_LOG_ERROR, "cframe id mismatch %d %d\n", id, avctx->frame_number);
            }

            cfrm->size= cfrm->id= 0;
            frame_4cc= ff_get_fourcc("pfrm");
        }else
            return buf_size;
    }else{
        buf= buf + 12;
        frame_size= buf_size - 12;
    }

    temp= f->current_picture;
    f->current_picture= f->last_picture;
    f->last_picture= temp;

    p= &f->current_picture;
    avctx->coded_frame= p;

    avctx->flags |= CODEC_FLAG_EMU_EDGE; // alternatively we would have to use our own buffer management

    if(p->data[0])
        avctx->release_buffer(avctx, p);

    p->reference= 1;
    if(avctx->get_buffer(avctx, p) < 0){
        av_log(avctx, AV_LOG_ERROR, "get_buffer() failed\n");
        return -1;
    }

    if(frame_4cc == ff_get_fourcc("ifrm")){
        p->pict_type= I_TYPE;
        if(decode_i_frame(f, buf, frame_size) < 0)
            return -1;
    }else if(frame_4cc == ff_get_fourcc("pfrm")){
        p->pict_type= P_TYPE;
        if(decode_p_frame(f, buf, frame_size) < 0)
            return -1;
    }else if(frame_4cc == ff_get_fourcc("snd_")){
        av_log(avctx, AV_LOG_ERROR, "ignoring snd_ chunk length:%d\n", buf_size);
    }else{
        av_log(avctx, AV_LOG_ERROR, "ignoring unknown chunk length:%d\n", buf_size);
    }

#if 0
for(i=0; i<20; i++){
    printf("%2X %c ", buf[i], clip(buf[i],16,126));
}
#endif

    p->key_frame= p->pict_type == I_TYPE;

    *picture= *p;
    *data_size = sizeof(AVPicture);

    emms_c();

    return buf_size;
}


static void common_init(AVCodecContext *avctx){
    FourXContext * const f = avctx->priv_data;

    dsputil_init(&f->dsp, avctx);

    f->avctx= avctx;
}

static int decode_init(AVCodecContext *avctx){
    FourXContext * const f = avctx->priv_data;

    common_init(avctx);
    init_vlcs(f);

    avctx->pix_fmt= PIX_FMT_RGB565;

    return 0;
}


static int decode_end(AVCodecContext *avctx){
    FourXContext * const f = avctx->priv_data;
    int i;

    av_freep(&f->bitstream_buffer);
    f->bitstream_buffer_size=0;
    for(i=0; i<CFRAME_BUFFER_COUNT; i++){
        av_freep(&f->cfrm[i].data);
        f->cfrm[i].allocated_size= 0;
    }
    free_vlc(&f->pre_vlc);

    return 0;
}

AVCodec fourxm_decoder = {
    "4xm",
    CODEC_TYPE_VIDEO,
    CODEC_ID_4XM,
    sizeof(FourXContext),
    decode_init,
    NULL,
    decode_end,
    decode_frame,
    /*CODEC_CAP_DR1,*/
};