view roqvideo.c @ 2207:22b768f1261a libavcodec

10000l fix and use more mmx2/3dnow code for mpeg4 qpel which has been written and commited long time ago but appearently never used, qpel motion compensation is 5% faster now
author michael
date Mon, 06 Sep 2004 03:17:31 +0000
parents 141a9539e270
children 2aae25679885
line wrap: on
line source

/*
 * Copyright (C) 2003 the ffmpeg project
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 */

/**
 * @file roqvideo.c
 * Id RoQ Video Decoder by Dr. Tim Ferguson
 * For more information about the Id RoQ format, visit:
 *   http://www.csse.monash.edu.au/~timf/
 */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>

#include "common.h"
#include "avcodec.h"
#include "dsputil.h"

typedef struct {
  unsigned char y0, y1, y2, y3, u, v;
} roq_cell;

typedef struct {
  int idx[4];
} roq_qcell;

static int uiclip[1024], *uiclp;  /* clipping table */
#define avg2(a,b) uiclp[(((int)(a)+(int)(b)+1)>>1)]
#define avg4(a,b,c,d) uiclp[(((int)(a)+(int)(b)+(int)(c)+(int)(d)+2)>>2)]

typedef struct RoqContext {

    AVCodecContext *avctx;
    DSPContext dsp;
    AVFrame last_frame;
    AVFrame current_frame;
    int first_frame;
    int y_stride;
    int c_stride;

    roq_cell cells[256];
    roq_qcell qcells[256];

    unsigned char *buf;
    int size;

} RoqContext;

#define RoQ_INFO              0x1001
#define RoQ_QUAD_CODEBOOK     0x1002
#define RoQ_QUAD_VQ           0x1011
#define RoQ_SOUND_MONO        0x1020
#define RoQ_SOUND_STEREO      0x1021

#define RoQ_ID_MOT              0x00
#define RoQ_ID_FCC              0x01
#define RoQ_ID_SLD              0x02
#define RoQ_ID_CCC              0x03

#define get_byte(in_buffer) *(in_buffer++)
#define get_word(in_buffer) ((unsigned short)(in_buffer += 2, \
  (in_buffer[-1] << 8 | in_buffer[-2])))
#define get_long(in_buffer) ((unsigned long)(in_buffer += 4, \
  (in_buffer[-1] << 24 | in_buffer[-2] << 16 | in_buffer[-3] << 8 | in_buffer[-4])))


static void apply_vector_2x2(RoqContext *ri, int x, int y, roq_cell *cell)
{
    unsigned char *yptr;

    yptr = ri->current_frame.data[0] + (y * ri->y_stride) + x;
    *yptr++ = cell->y0;
    *yptr++ = cell->y1;
    yptr += (ri->y_stride - 2);
    *yptr++ = cell->y2;
    *yptr++ = cell->y3;
    ri->current_frame.data[1][(y/2) * (ri->c_stride) + x/2] = cell->u;
    ri->current_frame.data[2][(y/2) * (ri->c_stride) + x/2] = cell->v;
}

static void apply_vector_4x4(RoqContext *ri, int x, int y, roq_cell *cell)
{
    unsigned long row_inc, c_row_inc;
    register unsigned char y0, y1, u, v;
    unsigned char *yptr, *uptr, *vptr;

    yptr = ri->current_frame.data[0] + (y * ri->y_stride) + x;
    uptr = ri->current_frame.data[1] + (y/2) * (ri->c_stride) + x/2;
    vptr = ri->current_frame.data[2] + (y/2) * (ri->c_stride) + x/2;

    row_inc = ri->y_stride - 4;
    c_row_inc = (ri->c_stride) - 2;
    *yptr++ = y0 = cell->y0; *uptr++ = u = cell->u; *vptr++ = v = cell->v;
    *yptr++ = y0;
    *yptr++ = y1 = cell->y1; *uptr++ = u; *vptr++ = v;
    *yptr++ = y1;

    yptr += row_inc;

    *yptr++ = y0;
    *yptr++ = y0;
    *yptr++ = y1;
    *yptr++ = y1;

    yptr += row_inc; uptr += c_row_inc; vptr += c_row_inc;

    *yptr++ = y0 = cell->y2; *uptr++ = u; *vptr++ = v;
    *yptr++ = y0;
    *yptr++ = y1 = cell->y3; *uptr++ = u; *vptr++ = v;
    *yptr++ = y1;

    yptr += row_inc;

    *yptr++ = y0;
    *yptr++ = y0;
    *yptr++ = y1;
    *yptr++ = y1;
}

static void apply_motion_4x4(RoqContext *ri, int x, int y, unsigned char mv,
    signed char mean_x, signed char mean_y)
{
    int i, hw, mx, my;
    unsigned char *pa, *pb;

    mx = x + 8 - (mv >> 4) - mean_x;
    my = y + 8 - (mv & 0xf) - mean_y;

    pa = ri->current_frame.data[0] + (y * ri->y_stride) + x;
    pb = ri->last_frame.data[0] + (my * ri->y_stride) + mx;
    for(i = 0; i < 4; i++) {
        pa[0] = pb[0];
        pa[1] = pb[1];
        pa[2] = pb[2];
        pa[3] = pb[3];
        pa += ri->y_stride;
        pb += ri->y_stride;
    }

#if 0
    pa = ri->current_frame.data[1] + (y/2) * (ri->c_stride) + x/2;
    pb = ri->last_frame.data[1] + (my/2) * (ri->c_stride) + (mx + 1)/2;
    for(i = 0; i < 2; i++) {
        pa[0] = pb[0];
        pa[1] = pb[1];
        pa += ri->c_stride;
        pb += ri->c_stride;
    }

    pa = ri->current_frame.data[2] + (y/2) * (ri->c_stride) + x/2;
    pb = ri->last_frame.data[2] + (my/2) * (ri->c_stride) + (mx + 1)/2;
    for(i = 0; i < 2; i++) {
        pa[0] = pb[0];
        pa[1] = pb[1];
        pa += ri->c_stride;
        pb += ri->c_stride;
    }
#else
    hw = ri->y_stride/2;
    pa = ri->current_frame.data[1] + (y * ri->y_stride)/4 + x/2;
    pb = ri->last_frame.data[1] + (my/2) * (ri->y_stride/2) + (mx + 1)/2;

    for(i = 0; i < 2; i++) {
        switch(((my & 0x01) << 1) | (mx & 0x01)) {

        case 0:
            pa[0] = pb[0];
            pa[1] = pb[1];
            pa[hw] = pb[hw];
            pa[hw+1] = pb[hw+1];
            break;

        case 1:
            pa[0] = avg2(pb[0], pb[1]);
            pa[1] = avg2(pb[1], pb[2]);
            pa[hw] = avg2(pb[hw], pb[hw+1]);
            pa[hw+1] = avg2(pb[hw+1], pb[hw+2]);
            break;

        case 2:
            pa[0] = avg2(pb[0], pb[hw]);
            pa[1] = avg2(pb[1], pb[hw+1]);
            pa[hw] = avg2(pb[hw], pb[hw*2]);
            pa[hw+1] = avg2(pb[hw+1], pb[(hw*2)+1]);
            break;

        case 3:
            pa[0] = avg4(pb[0], pb[1], pb[hw], pb[hw+1]);
            pa[1] = avg4(pb[1], pb[2], pb[hw+1], pb[hw+2]);
            pa[hw] = avg4(pb[hw], pb[hw+1], pb[hw*2], pb[(hw*2)+1]);
            pa[hw+1] = avg4(pb[hw+1], pb[hw+2], pb[(hw*2)+1], pb[(hw*2)+1]);
            break;
        }

        pa = ri->current_frame.data[2] + (y * ri->y_stride)/4 + x/2;
        pb = ri->last_frame.data[2] + (my/2) * (ri->y_stride/2) + (mx + 1)/2;
    }
#endif
}

static void apply_motion_8x8(RoqContext *ri, int x, int y,
    unsigned char mv, signed char mean_x, signed char mean_y)
{
    int mx, my, i, j, hw;
    unsigned char *pa, *pb;

    mx = x + 8 - (mv >> 4) - mean_x;
    my = y + 8 - (mv & 0xf) - mean_y;

    pa = ri->current_frame.data[0] + (y * ri->y_stride) + x;
    pb = ri->last_frame.data[0] + (my * ri->y_stride) + mx;
    for(i = 0; i < 8; i++) {
        pa[0] = pb[0];
        pa[1] = pb[1];
        pa[2] = pb[2];
        pa[3] = pb[3];
        pa[4] = pb[4];
        pa[5] = pb[5];
        pa[6] = pb[6];
        pa[7] = pb[7];
        pa += ri->y_stride;
        pb += ri->y_stride;
    }

#if 0
    pa = ri->current_frame.data[1] + (y/2) * (ri->c_stride) + x/2;
    pb = ri->last_frame.data[1] + (my/2) * (ri->c_stride) + (mx + 1)/2;
    for(i = 0; i < 4; i++) {
        pa[0] = pb[0];
        pa[1] = pb[1];
        pa[2] = pb[2];
        pa[3] = pb[3];
        pa += ri->c_stride;
        pb += ri->c_stride;
    }

    pa = ri->current_frame.data[2] + (y/2) * (ri->c_stride) + x/2;
    pb = ri->last_frame.data[2] + (my/2) * (ri->c_stride) + (mx + 1)/2;
    for(i = 0; i < 4; i++) {
        pa[0] = pb[0];
        pa[1] = pb[1];
        pa[2] = pb[2];
        pa[3] = pb[3];
        pa += ri->c_stride;
        pb += ri->c_stride;
    }
#else
    hw = ri->c_stride;
    pa = ri->current_frame.data[1] + (y * ri->y_stride)/4 + x/2;
    pb = ri->last_frame.data[1] + (my/2) * (ri->y_stride/2) + (mx + 1)/2;
    for(j = 0; j < 2; j++) {
        for(i = 0; i < 4; i++) {
            switch(((my & 0x01) << 1) | (mx & 0x01)) {

            case 0:
                pa[0] = pb[0];
                pa[1] = pb[1];
                pa[2] = pb[2];
                pa[3] = pb[3];
                break;

            case 1:
                pa[0] = avg2(pb[0], pb[1]);
                pa[1] = avg2(pb[1], pb[2]);
                pa[2] = avg2(pb[2], pb[3]);
                pa[3] = avg2(pb[3], pb[4]);
                break;
 
            case 2:
                pa[0] = avg2(pb[0], pb[hw]);
                pa[1] = avg2(pb[1], pb[hw+1]);
                pa[2] = avg2(pb[2], pb[hw+2]);
                pa[3] = avg2(pb[3], pb[hw+3]);
                break;

            case 3:
                pa[0] = avg4(pb[0], pb[1], pb[hw], pb[hw+1]);
                pa[1] = avg4(pb[1], pb[2], pb[hw+1], pb[hw+2]);
                pa[2] = avg4(pb[2], pb[3], pb[hw+2], pb[hw+3]);
                pa[3] = avg4(pb[3], pb[4], pb[hw+3], pb[hw+4]);
                break;
            }
            pa += ri->c_stride;
            pb += ri->c_stride;
        }

        pa = ri->current_frame.data[2] + (y * ri->y_stride)/4 + x/2;
        pb = ri->last_frame.data[2] + (my/2) * (ri->y_stride/2) + (mx + 1)/2;
    }
#endif
}

static void roqvideo_decode_frame(RoqContext *ri)
{
    unsigned int chunk_id = 0, chunk_arg = 0;
    unsigned long chunk_size = 0;
    int i, j, k, nv1, nv2, vqflg = 0, vqflg_pos = -1;
    int vqid, bpos, xpos, ypos, xp, yp, x, y;
    int frame_stats[2][4] = {{0},{0}};
    roq_qcell *qcell;
    unsigned char *buf = ri->buf;
    unsigned char *buf_end = ri->buf + ri->size;

    while (buf < buf_end) {
        chunk_id = get_word(buf);
        chunk_size = get_long(buf);
        chunk_arg = get_word(buf);

        if(chunk_id == RoQ_QUAD_VQ)
            break;
        if(chunk_id == RoQ_QUAD_CODEBOOK) {
            if((nv1 = chunk_arg >> 8) == 0)
                nv1 = 256;
            if((nv2 = chunk_arg & 0xff) == 0 && nv1 * 6 < chunk_size)
                nv2 = 256;
            for(i = 0; i < nv1; i++) {
                ri->cells[i].y0 = get_byte(buf);
                ri->cells[i].y1 = get_byte(buf);
                ri->cells[i].y2 = get_byte(buf);
                ri->cells[i].y3 = get_byte(buf);
                ri->cells[i].u = get_byte(buf);
                ri->cells[i].v = get_byte(buf);
            }
            for(i = 0; i < nv2; i++)
                for(j = 0; j < 4; j++)
                    ri->qcells[i].idx[j] = get_byte(buf);
        }
    }

    bpos = xpos = ypos = 0;
    while(bpos < chunk_size) {
        for (yp = ypos; yp < ypos + 16; yp += 8)
            for (xp = xpos; xp < xpos + 16; xp += 8) {
                if (vqflg_pos < 0) {
                    vqflg = buf[bpos++]; vqflg |= (buf[bpos++] << 8);
                    vqflg_pos = 7;
                }
                vqid = (vqflg >> (vqflg_pos * 2)) & 0x3;
                frame_stats[0][vqid]++;
                vqflg_pos--;

                switch(vqid) {
                case RoQ_ID_MOT:
                    apply_motion_8x8(ri, xp, yp, 0, 8, 8);
                    break;
                case RoQ_ID_FCC:
                    apply_motion_8x8(ri, xp, yp, buf[bpos++], chunk_arg >> 8,
                        chunk_arg & 0xff);
                    break;
                case RoQ_ID_SLD:
                    qcell = ri->qcells + buf[bpos++];
                    apply_vector_4x4(ri, xp, yp, ri->cells + qcell->idx[0]);
                    apply_vector_4x4(ri, xp+4, yp, ri->cells + qcell->idx[1]);
                    apply_vector_4x4(ri, xp, yp+4, ri->cells + qcell->idx[2]);
                    apply_vector_4x4(ri, xp+4, yp+4, ri->cells + qcell->idx[3]);
                    break;
                case RoQ_ID_CCC:
                    for (k = 0; k < 4; k++) {
                        x = xp; y = yp;
                        if(k & 0x01) x += 4;
                        if(k & 0x02) y += 4;

                        if (vqflg_pos < 0) {
                            vqflg = buf[bpos++];
                            vqflg |= (buf[bpos++] << 8);
                            vqflg_pos = 7;
                        }
                        vqid = (vqflg >> (vqflg_pos * 2)) & 0x3;
                        frame_stats[1][vqid]++;
                        vqflg_pos--;
                        switch(vqid) {
                        case RoQ_ID_MOT:
                            apply_motion_4x4(ri, x, y, 0, 8, 8);
                            break;
                        case RoQ_ID_FCC:
                            apply_motion_4x4(ri, x, y, buf[bpos++], 
                                chunk_arg >> 8, chunk_arg & 0xff);
                            break;
                        case RoQ_ID_SLD:
                            qcell = ri->qcells + buf[bpos++];
                            apply_vector_2x2(ri, x, y, ri->cells + qcell->idx[0]);
                            apply_vector_2x2(ri, x+2, y, ri->cells + qcell->idx[1]);
                            apply_vector_2x2(ri, x, y+2, ri->cells + qcell->idx[2]);
                            apply_vector_2x2(ri, x+2, y+2, ri->cells + qcell->idx[3]);
                            break;
                        case RoQ_ID_CCC:
                            apply_vector_2x2(ri, x, y, ri->cells + buf[bpos]);
                            apply_vector_2x2(ri, x+2, y, ri->cells + buf[bpos+1]);
                            apply_vector_2x2(ri, x, y+2, ri->cells + buf[bpos+2]);
                            apply_vector_2x2(ri, x+2, y+2, ri->cells + buf[bpos+3]);
                            bpos += 4;
                            break;
                        }
                    }
                    break;
                default:
                    av_log(ri->avctx, AV_LOG_ERROR, "Unknown vq code: %d\n", vqid);
            }
        }

        xpos += 16;
        if (xpos >= ri->avctx->width) {
            xpos -= ri->avctx->width;
            ypos += 16;
        }
        if(ypos >= ri->avctx->height)
            break;
    }
}


static int roq_decode_init(AVCodecContext *avctx)
{
    RoqContext *s = avctx->priv_data;
    int i;

    s->avctx = avctx;
    s->first_frame = 1;
    avctx->pix_fmt = PIX_FMT_YUV420P;
    avctx->has_b_frames = 0;
    dsputil_init(&s->dsp, avctx);

    uiclp = uiclip+512;
    for(i = -512; i < 512; i++)
        uiclp[i] = (i < 0 ? 0 : (i > 255 ? 255 : i));

    return 0;
}

static int roq_decode_frame(AVCodecContext *avctx,
                            void *data, int *data_size,
                            uint8_t *buf, int buf_size)
{
    RoqContext *s = avctx->priv_data;

    if (avctx->get_buffer(avctx, &s->current_frame)) {
        av_log(avctx, AV_LOG_ERROR, "  RoQ: get_buffer() failed\n");
        return -1;
    }
    s->y_stride = s->current_frame.linesize[0];
    s->c_stride = s->current_frame.linesize[1];

    s->buf = buf;
    s->size = buf_size;
    roqvideo_decode_frame(s);

    /* release the last frame if it is allocated */
    if (s->first_frame)
        s->first_frame = 0;
    else
        avctx->release_buffer(avctx, &s->last_frame);

    /* shuffle frames */
    s->last_frame = s->current_frame;

    *data_size = sizeof(AVFrame);
    *(AVFrame*)data = s->current_frame;

    return buf_size;
}

static int roq_decode_end(AVCodecContext *avctx)
{
    RoqContext *s = avctx->priv_data;

    /* release the last frame */
    avctx->release_buffer(avctx, &s->last_frame);

    return 0;
}

AVCodec roq_decoder = {
    "roqvideo",
    CODEC_TYPE_VIDEO,
    CODEC_ID_ROQ,
    sizeof(RoqContext),
    roq_decode_init,
    NULL,
    roq_decode_end,
    roq_decode_frame,
    CODEC_CAP_DR1,
};