view asv1.c @ 1352:e8ff4783f188 libavcodec

1) remove TBL support in PPC performance. It's much more useful to use the PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless code around 2) make the PPC perf stuff a configure option 3) make put_pixels16_altivec a bit faster by unrolling the loop by 4 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michaelni
date Wed, 09 Jul 2003 20:18:13 +0000
parents 1cbc2380d172
children 46d3fa8501cd
line wrap: on
line source

/*
 * ASUS V1 codec
 * Copyright (c) 2003 Michael Niedermayer
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 */
 
/**
 * @file asv1.c
 * ASUS V1 codec.
 */
 
#include "avcodec.h"
#include "dsputil.h"
#include "mpegvideo.h"

//#undef NDEBUG
//#include <assert.h>

#define VLC_BITS 5

typedef struct ASV1Context{
    AVCodecContext *avctx;
    DSPContext dsp;
    AVFrame picture;
    PutBitContext pb;
    GetBitContext gb;
    ScanTable scantable;
    int inv_qscale;
    int mb_width;
    int mb_height;
    int mb_width2;
    int mb_height2;
    DCTELEM __align8 block[6][64];
    uint16_t __align8 intra_matrix[64];
    int __align8 q_intra_matrix[64];
    uint8_t *bitstream_buffer;
    int bitstream_buffer_size;
} ASV1Context;

static const uint8_t scantab[64]={
    0x00,0x08,0x01,0x09,0x10,0x18,0x11,0x19,
    0x02,0x0A,0x03,0x0B,0x12,0x1A,0x13,0x1B,
    0x04,0x0C,0x05,0x0D,0x20,0x28,0x21,0x29,
    0x06,0x0E,0x07,0x0F,0x14,0x1C,0x15,0x1D,
    0x22,0x2A,0x23,0x2B,0x30,0x38,0x31,0x39,
};

static const uint8_t ccp_tab[17][2]={
    {0x2,2}, {0xE,5}, {0xD,5}, {0xC,5},
    {0xB,5}, {0xA,5}, {0x9,5}, {0x8,5},
    {0x7,5}, {0x6,5}, {0x5,5}, {0x4,5},
    {0x3,5}, {0x2,5}, {0x1,5}, {0x3,2},
    {0xF,5}, //EOB
};

static const uint8_t level_tab[7][2]={
    {3,4}, {3,3}, {3,2}, {0,3}, {2,2}, {2,3}, {2,4}
};

static VLC ccp_vlc;
static VLC level_vlc;

static void init_vlcs(ASV1Context *a){
    static int done = 0;

    if (!done) {
        done = 1;

        init_vlc(&ccp_vlc, VLC_BITS, 17, 
                 &ccp_tab[0][1], 2, 1,
                 &ccp_tab[0][0], 2, 1);
        init_vlc(&level_vlc,  VLC_BITS, 7, 
                 &level_tab[0][1], 2, 1,
                 &level_tab[0][0], 2, 1);
    }
}

static inline int get_level(GetBitContext *gb){
    int code= get_vlc2(gb, level_vlc.table, VLC_BITS, 1);

    if(code==3) return get_sbits(gb, 8);
    else        return code - 3;
}

static inline void put_level(PutBitContext *pb, int level){
    unsigned int index= level + 3;

    if(index <= 6) put_bits(pb, level_tab[index][1], level_tab[index][0]);
    else{
        put_bits(pb, level_tab[3][1], level_tab[3][0]);
        put_bits(pb, 8, level&0xFF);
    }
}

static inline int decode_block(ASV1Context *a, DCTELEM block[64]){
    int i;

    block[0]= 8*get_bits(&a->gb, 8);
    
    for(i=0; i<11; i++){
        const int ccp= get_vlc2(&a->gb, ccp_vlc.table, VLC_BITS, 1);

        if(ccp){
            if(ccp == 16) break;
            if(ccp < 0 || i>=10){
                printf("coded coeff pattern damaged\n");
                return -1;
            }

            if(ccp&1) block[a->scantable.permutated[4*i+0]]= (get_level(&a->gb) * a->intra_matrix[4*i+0])>>4;
            if(ccp&2) block[a->scantable.permutated[4*i+1]]= (get_level(&a->gb) * a->intra_matrix[4*i+1])>>4;
            if(ccp&4) block[a->scantable.permutated[4*i+2]]= (get_level(&a->gb) * a->intra_matrix[4*i+2])>>4;
            if(ccp&8) block[a->scantable.permutated[4*i+3]]= (get_level(&a->gb) * a->intra_matrix[4*i+3])>>4;
        }
    }

    return 0;
}

static inline void encode_block(ASV1Context *a, DCTELEM block[64]){
    int i;
    int nc_count=0;
    
    put_bits(&a->pb, 8, (block[0] + 32)>>6);
    block[0]= 0;
    
    for(i=0; i<10; i++){
        const int index= scantab[4*i];
        int ccp=0;

        if( (block[index + 0] = (block[index + 0]*a->q_intra_matrix[index + 0] + (1<<15))>>16) ) ccp |= 1;
        if( (block[index + 8] = (block[index + 8]*a->q_intra_matrix[index + 8] + (1<<15))>>16) ) ccp |= 2;
        if( (block[index + 1] = (block[index + 1]*a->q_intra_matrix[index + 1] + (1<<15))>>16) ) ccp |= 4;
        if( (block[index + 9] = (block[index + 9]*a->q_intra_matrix[index + 9] + (1<<15))>>16) ) ccp |= 8;

        if(ccp){
            for(;nc_count; nc_count--) 
                put_bits(&a->pb, ccp_tab[0][1], ccp_tab[0][0]);

            put_bits(&a->pb, ccp_tab[ccp][1], ccp_tab[ccp][0]);
            
            if(ccp&1) put_level(&a->pb, block[index + 0]);
            if(ccp&2) put_level(&a->pb, block[index + 8]);
            if(ccp&4) put_level(&a->pb, block[index + 1]);
            if(ccp&8) put_level(&a->pb, block[index + 9]);
        }else{
            nc_count++;
        }
    }
    put_bits(&a->pb, ccp_tab[16][1], ccp_tab[16][0]);
}

static inline int decode_mb(ASV1Context *a, DCTELEM block[6][64]){
    int i;

    a->dsp.clear_blocks(block[0]);
    
    for(i=0; i<6; i++){
        if( decode_block(a, block[i]) < 0) 
            return -1;
    }
    return 0;
}

static inline void encode_mb(ASV1Context *a, DCTELEM block[6][64]){
    int i;

    for(i=0; i<6; i++){
        encode_block(a, block[i]);
    }
}

static inline void idct_put(ASV1Context *a, int mb_x, int mb_y){
    DCTELEM (*block)[64]= a->block;
    int linesize= a->picture.linesize[0];
    
    uint8_t *dest_y  = a->picture.data[0] + (mb_y * 16* linesize              ) + mb_x * 16;
    uint8_t *dest_cb = a->picture.data[1] + (mb_y * 8 * a->picture.linesize[1]) + mb_x * 8;
    uint8_t *dest_cr = a->picture.data[2] + (mb_y * 8 * a->picture.linesize[2]) + mb_x * 8;

    a->dsp.idct_put(dest_y                 , linesize, block[0]);
    a->dsp.idct_put(dest_y              + 8, linesize, block[1]);
    a->dsp.idct_put(dest_y + 8*linesize    , linesize, block[2]);
    a->dsp.idct_put(dest_y + 8*linesize + 8, linesize, block[3]);

    if(!(a->avctx->flags&CODEC_FLAG_GRAY)){
        a->dsp.idct_put(dest_cb, a->picture.linesize[1], block[4]);
        a->dsp.idct_put(dest_cr, a->picture.linesize[2], block[5]);
    }
}

static inline void dct_get(ASV1Context *a, int mb_x, int mb_y){
    DCTELEM (*block)[64]= a->block;
    int linesize= a->picture.linesize[0];
    int i;
    
    uint8_t *ptr_y  = a->picture.data[0] + (mb_y * 16* linesize              ) + mb_x * 16;
    uint8_t *ptr_cb = a->picture.data[1] + (mb_y * 8 * a->picture.linesize[1]) + mb_x * 8;
    uint8_t *ptr_cr = a->picture.data[2] + (mb_y * 8 * a->picture.linesize[2]) + mb_x * 8;

    a->dsp.get_pixels(block[0], ptr_y                 , linesize);
    a->dsp.get_pixels(block[1], ptr_y              + 8, linesize);
    a->dsp.get_pixels(block[2], ptr_y + 8*linesize    , linesize);
    a->dsp.get_pixels(block[3], ptr_y + 8*linesize + 8, linesize);
    for(i=0; i<4; i++)
        a->dsp.fdct(block[i]);
    
    if(!(a->avctx->flags&CODEC_FLAG_GRAY)){
        a->dsp.get_pixels(block[4], ptr_cb, a->picture.linesize[1]);
        a->dsp.get_pixels(block[5], ptr_cr, a->picture.linesize[2]);
        for(i=4; i<6; i++)
            a->dsp.fdct(block[i]);
    }
}

static int decode_frame(AVCodecContext *avctx, 
                        void *data, int *data_size,
                        uint8_t *buf, int buf_size)
{
    ASV1Context * const a = avctx->priv_data;
    AVFrame *picture = data;
    AVFrame * const p= (AVFrame*)&a->picture;
    int mb_x, mb_y;

    *data_size = 0;

    /* special case for last picture */
    if (buf_size == 0) {
        return 0;
    }

    if(p->data[0])
        avctx->release_buffer(avctx, p);

    p->reference= 0;
    if(avctx->get_buffer(avctx, p) < 0){
        fprintf(stderr, "get_buffer() failed\n");
        return -1;
    }
    p->pict_type= I_TYPE;
    p->key_frame= 1;

    a->bitstream_buffer= av_fast_realloc(a->bitstream_buffer, &a->bitstream_buffer_size, buf_size + FF_INPUT_BUFFER_PADDING_SIZE);
    a->dsp.bswap_buf((uint32_t*)a->bitstream_buffer, (uint32_t*)buf, buf_size/4);
    init_get_bits(&a->gb, a->bitstream_buffer, buf_size*8);

    for(mb_y=0; mb_y<a->mb_height2; mb_y++){
        for(mb_x=0; mb_x<a->mb_width2; mb_x++){
            if( decode_mb(a, a->block) <0)
                return -1;
             
            idct_put(a, mb_x, mb_y);
        }
    }

    if(a->mb_width2 != a->mb_width){
        mb_x= a->mb_width2;
        for(mb_y=0; mb_y<a->mb_height2; mb_y++){
            if( decode_mb(a, a->block) <0)
                return -1;
             
            idct_put(a, mb_x, mb_y);
        }
    }

    if(a->mb_height2 != a->mb_height){
        mb_y= a->mb_height2;
        for(mb_x=0; mb_x<a->mb_width; mb_x++){
            if( decode_mb(a, a->block) <0)
                return -1;
             
            idct_put(a, mb_x, mb_y);
        }
    }
#if 0    
int i;
printf("%d %d\n", 8*buf_size, get_bits_count(&a->gb));
for(i=get_bits_count(&a->gb); i<8*buf_size; i++){
    printf("%d", get_bits1(&a->gb));
}

for(i=0; i<s->avctx->extradata_size; i++){
    printf("%c\n", ((uint8_t*)s->avctx->extradata)[i]);
}
#endif

    p->quality= (32 + a->inv_qscale/2)/a->inv_qscale;
    memset(p->qscale_table, p->quality, p->qstride*a->mb_height);
    
    *picture= *(AVFrame*)&a->picture;
    *data_size = sizeof(AVPicture);

    emms_c();
    
    return (get_bits_count(&a->gb)+31)/32*4;
}

static int encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data){
    ASV1Context * const a = avctx->priv_data;
    AVFrame *pict = data;
    AVFrame * const p= (AVFrame*)&a->picture;
    int size;
    int mb_x, mb_y;

    init_put_bits(&a->pb, buf, buf_size, NULL, NULL);
    
    *p = *pict;
    p->pict_type= I_TYPE;
    p->key_frame= 1;

    for(mb_y=0; mb_y<a->mb_height2; mb_y++){
        for(mb_x=0; mb_x<a->mb_width2; mb_x++){
            dct_get(a, mb_x, mb_y);
            encode_mb(a, a->block);
        }
    }

    if(a->mb_width2 != a->mb_width){
        mb_x= a->mb_width2;
        for(mb_y=0; mb_y<a->mb_height2; mb_y++){
            dct_get(a, mb_x, mb_y);
            encode_mb(a, a->block);
        }
    }

    if(a->mb_height2 != a->mb_height){
        mb_y= a->mb_height2;
        for(mb_x=0; mb_x<a->mb_width; mb_x++){
            dct_get(a, mb_x, mb_y);
            encode_mb(a, a->block);
        }
    }
    emms_c();
    
    align_put_bits(&a->pb);
    while(get_bit_count(&a->pb)&31)
        put_bits(&a->pb, 8, 0);
    
    size= get_bit_count(&a->pb)/32;
    
    a->dsp.bswap_buf((uint32_t*)buf, (uint32_t*)buf, size);
    
    return size*4;
}

static void common_init(AVCodecContext *avctx){
    ASV1Context * const a = avctx->priv_data;

    dsputil_init(&a->dsp, avctx);

    a->mb_width   = (avctx->width  + 15) / 16;
    a->mb_height  = (avctx->height + 15) / 16;
    a->mb_width2  = (avctx->width  + 0) / 16;
    a->mb_height2 = (avctx->height + 0) / 16;

    avctx->coded_frame= (AVFrame*)&a->picture;
    a->avctx= avctx;
}

static int decode_init(AVCodecContext *avctx){
    ASV1Context * const a = avctx->priv_data;
    AVFrame *p= (AVFrame*)&a->picture;
    int i;
 
    common_init(avctx);
    init_vlcs(a);
    ff_init_scantable(a->dsp.idct_permutation, &a->scantable, scantab);

    a->inv_qscale= le2me_32(((uint32_t*)avctx->extradata)[0]);
    if(a->inv_qscale == 0){
        printf("illegal qscale 0\n");
        a->inv_qscale= 6;
    }

    for(i=0; i<64; i++){
        int index= scantab[i];
        a->intra_matrix[i]= 64*ff_mpeg1_default_intra_matrix[index] / a->inv_qscale;
    }

    p->qstride= a->mb_width;
    p->qscale_table= av_mallocz( p->qstride * a->mb_height);

    return 0;
}

static int encode_init(AVCodecContext *avctx){
    ASV1Context * const a = avctx->priv_data;
    int i;
 
    common_init(avctx);
    
    if(avctx->global_quality == 0) avctx->global_quality= 4*FF_QUALITY_SCALE;

    a->inv_qscale= (32*FF_QUALITY_SCALE +  avctx->global_quality/2) / avctx->global_quality;
    
    avctx->extradata= av_mallocz(8);
    avctx->extradata_size=8;
    ((uint32_t*)avctx->extradata)[0]= le2me_32(a->inv_qscale);
    ((uint32_t*)avctx->extradata)[1]= le2me_32(ff_get_fourcc("ASUS"));
    
    for(i=0; i<64; i++){
        int q= 32*ff_mpeg1_default_intra_matrix[i];
        a->q_intra_matrix[i]= ((a->inv_qscale<<16) + q/2) / q;
    }

    return 0;
}

static int decode_end(AVCodecContext *avctx){
    ASV1Context * const a = avctx->priv_data;

    av_freep(&a->bitstream_buffer);
    av_freep(&a->picture.qscale_table);
    a->bitstream_buffer_size=0;
    
    avcodec_default_free_buffers(avctx);

    return 0;
}

AVCodec asv1_decoder = {
    "asv1",
    CODEC_TYPE_VIDEO,
    CODEC_ID_ASV1,
    sizeof(ASV1Context),
    decode_init,
    NULL,
    decode_end,
    decode_frame,
    CODEC_CAP_DR1,
};

#ifdef CONFIG_ENCODERS

AVCodec asv1_encoder = {
    "asv1",
    CODEC_TYPE_VIDEO,
    CODEC_ID_ASV1,
    sizeof(ASV1Context),
    encode_init,
    encode_frame,
    //encode_end,
};

#endif //CONFIG_ENCODERS