Mercurial > libavcodec.hg
view svq1enc.c @ 6920:d02af7474bff libavcodec
Prevent 128*1<<trellis from becoming 0 and creating 0 sized arrays.
fixes CID84 RUN2
CID85 RUN2
CID86 RUN2
CID87 RUN2
CID88 RUN2
CID89 RUN2
CID90 RUN2
CID91 RUN2
CID92 RUN2
CID93 RUN2
CID94 RUN2
CID95 RUN2
CID96 RUN2
CID97 RUN2
CID98 RUN2
CID99 RUN2
CID100 RUN2
CID101 RUN2
CID102 RUN2
CID103 RUN2
CID104 RUN2
CID105 RUN2
CID106 RUN2
author | michael |
---|---|
date | Wed, 28 May 2008 11:59:41 +0000 |
parents | e1302edb0f69 |
children | e943e1409077 |
line wrap: on
line source
/* * SVQ1 Encoder * Copyright (C) 2004 Mike Melanson <melanson@pcisys.net> * * This file is part of FFmpeg. * * FFmpeg is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * FFmpeg is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with FFmpeg; if not, write to the Free Software * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA */ /** * @file svq1enc.c * Sorenson Vector Quantizer #1 (SVQ1) video codec. * For more information of the SVQ1 algorithm, visit: * http://www.pcisys.net/~melanson/codecs/ */ #include "avcodec.h" #include "dsputil.h" #include "mpegvideo.h" #include "svq1.h" #include "svq1enc_cb.h" #undef NDEBUG #include <assert.h> typedef struct SVQ1Context { MpegEncContext m; // needed for motion estimation, should not be used for anything else, the idea is to make the motion estimation eventually independent of MpegEncContext, so this will be removed then (FIXME/XXX) AVCodecContext *avctx; DSPContext dsp; AVFrame picture; AVFrame current_picture; AVFrame last_picture; PutBitContext pb; GetBitContext gb; PutBitContext reorder_pb[6]; //why ooh why this sick breadth first order, everything is slower and more complex int frame_width; int frame_height; /* Y plane block dimensions */ int y_block_width; int y_block_height; /* U & V plane (C planes) block dimensions */ int c_block_width; int c_block_height; uint16_t *mb_type; uint32_t *dummy; int16_t (*motion_val8[3])[2]; int16_t (*motion_val16[3])[2]; int64_t rd_total; } SVQ1Context; static void svq1_write_header(SVQ1Context *s, int frame_type) { int i; /* frame code */ put_bits(&s->pb, 22, 0x20); /* temporal reference (sure hope this is a "don't care") */ put_bits(&s->pb, 8, 0x00); /* frame type */ put_bits(&s->pb, 2, frame_type - 1); if (frame_type == FF_I_TYPE) { /* no checksum since frame code is 0x20 */ /* no embedded string either */ /* output 5 unknown bits (2 + 2 + 1) */ put_bits(&s->pb, 5, 2); /* 2 needed by quicktime decoder */ for (i = 0; i < 7; i++) { if ((ff_svq1_frame_size_table[i].width == s->frame_width) && (ff_svq1_frame_size_table[i].height == s->frame_height)) { put_bits(&s->pb, 3, i); break; } } if (i == 7) { put_bits(&s->pb, 3, 7); put_bits(&s->pb, 12, s->frame_width); put_bits(&s->pb, 12, s->frame_height); } } /* no checksum or extra data (next 2 bits get 0) */ put_bits(&s->pb, 2, 0); } #define QUALITY_THRESHOLD 100 #define THRESHOLD_MULTIPLIER 0.6 #if defined(HAVE_ALTIVEC) #undef vector #endif static int encode_block(SVQ1Context *s, uint8_t *src, uint8_t *ref, uint8_t *decoded, int stride, int level, int threshold, int lambda, int intra){ int count, y, x, i, j, split, best_mean, best_score, best_count; int best_vector[6]; int block_sum[7]= {0, 0, 0, 0, 0, 0}; int w= 2<<((level+2)>>1); int h= 2<<((level+1)>>1); int size=w*h; int16_t block[7][256]; const int8_t *codebook_sum, *codebook; const uint16_t (*mean_vlc)[2]; const uint8_t (*multistage_vlc)[2]; best_score=0; //FIXME optimize, this doenst need to be done multiple times if(intra){ codebook_sum= svq1_intra_codebook_sum[level]; codebook= ff_svq1_intra_codebooks[level]; mean_vlc= ff_svq1_intra_mean_vlc; multistage_vlc= ff_svq1_intra_multistage_vlc[level]; for(y=0; y<h; y++){ for(x=0; x<w; x++){ int v= src[x + y*stride]; block[0][x + w*y]= v; best_score += v*v; block_sum[0] += v; } } }else{ codebook_sum= svq1_inter_codebook_sum[level]; codebook= ff_svq1_inter_codebooks[level]; mean_vlc= ff_svq1_inter_mean_vlc + 256; multistage_vlc= ff_svq1_inter_multistage_vlc[level]; for(y=0; y<h; y++){ for(x=0; x<w; x++){ int v= src[x + y*stride] - ref[x + y*stride]; block[0][x + w*y]= v; best_score += v*v; block_sum[0] += v; } } } best_count=0; best_score -= ((block_sum[0]*block_sum[0])>>(level+3)); best_mean= (block_sum[0] + (size>>1)) >> (level+3); if(level<4){ for(count=1; count<7; count++){ int best_vector_score= INT_MAX; int best_vector_sum=-999, best_vector_mean=-999; const int stage= count-1; const int8_t *vector; for(i=0; i<16; i++){ int sum= codebook_sum[stage*16 + i]; int sqr, diff, score; vector = codebook + stage*size*16 + i*size; sqr = s->dsp.ssd_int8_vs_int16(vector, block[stage], size); diff= block_sum[stage] - sum; score= sqr - ((diff*(int64_t)diff)>>(level+3)); //FIXME 64bit slooow if(score < best_vector_score){ int mean= (diff + (size>>1)) >> (level+3); assert(mean >-300 && mean<300); mean= av_clip(mean, intra?0:-256, 255); best_vector_score= score; best_vector[stage]= i; best_vector_sum= sum; best_vector_mean= mean; } } assert(best_vector_mean != -999); vector= codebook + stage*size*16 + best_vector[stage]*size; for(j=0; j<size; j++){ block[stage+1][j] = block[stage][j] - vector[j]; } block_sum[stage+1]= block_sum[stage] - best_vector_sum; best_vector_score += lambda*(+ 1 + 4*count + multistage_vlc[1+count][1] + mean_vlc[best_vector_mean][1]); if(best_vector_score < best_score){ best_score= best_vector_score; best_count= count; best_mean= best_vector_mean; } } } split=0; if(best_score > threshold && level){ int score=0; int offset= (level&1) ? stride*h/2 : w/2; PutBitContext backup[6]; for(i=level-1; i>=0; i--){ backup[i]= s->reorder_pb[i]; } score += encode_block(s, src , ref , decoded , stride, level-1, threshold>>1, lambda, intra); score += encode_block(s, src + offset, ref + offset, decoded + offset, stride, level-1, threshold>>1, lambda, intra); score += lambda; if(score < best_score){ best_score= score; split=1; }else{ for(i=level-1; i>=0; i--){ s->reorder_pb[i]= backup[i]; } } } if (level > 0) put_bits(&s->reorder_pb[level], 1, split); if(!split){ assert((best_mean >= 0 && best_mean<256) || !intra); assert(best_mean >= -256 && best_mean<256); assert(best_count >=0 && best_count<7); assert(level<4 || best_count==0); /* output the encoding */ put_bits(&s->reorder_pb[level], multistage_vlc[1 + best_count][1], multistage_vlc[1 + best_count][0]); put_bits(&s->reorder_pb[level], mean_vlc[best_mean][1], mean_vlc[best_mean][0]); for (i = 0; i < best_count; i++){ assert(best_vector[i]>=0 && best_vector[i]<16); put_bits(&s->reorder_pb[level], 4, best_vector[i]); } for(y=0; y<h; y++){ for(x=0; x<w; x++){ decoded[x + y*stride]= src[x + y*stride] - block[best_count][x + w*y] + best_mean; } } } return best_score; } static int svq1_encode_plane(SVQ1Context *s, int plane, unsigned char *src_plane, unsigned char *ref_plane, unsigned char *decoded_plane, int width, int height, int src_stride, int stride) { int x, y; int i; int block_width, block_height; int level; int threshold[6]; const int lambda= (s->picture.quality*s->picture.quality) >> (2*FF_LAMBDA_SHIFT); /* figure out the acceptable level thresholds in advance */ threshold[5] = QUALITY_THRESHOLD; for (level = 4; level >= 0; level--) threshold[level] = threshold[level + 1] * THRESHOLD_MULTIPLIER; block_width = (width + 15) / 16; block_height = (height + 15) / 16; if(s->picture.pict_type == FF_P_TYPE){ s->m.avctx= s->avctx; s->m.current_picture_ptr= &s->m.current_picture; s->m.last_picture_ptr = &s->m.last_picture; s->m.last_picture.data[0]= ref_plane; s->m.linesize= s->m.last_picture.linesize[0]= s->m.new_picture.linesize[0]= s->m.current_picture.linesize[0]= stride; s->m.width= width; s->m.height= height; s->m.mb_width= block_width; s->m.mb_height= block_height; s->m.mb_stride= s->m.mb_width+1; s->m.b8_stride= 2*s->m.mb_width+1; s->m.f_code=1; s->m.pict_type= s->picture.pict_type; s->m.me_method= s->avctx->me_method; s->m.me.scene_change_score=0; s->m.flags= s->avctx->flags; // s->m.out_format = FMT_H263; // s->m.unrestricted_mv= 1; s->m.lambda= s->picture.quality; s->m.qscale= (s->m.lambda*139 + FF_LAMBDA_SCALE*64) >> (FF_LAMBDA_SHIFT + 7); s->m.lambda2= (s->m.lambda*s->m.lambda + FF_LAMBDA_SCALE/2) >> FF_LAMBDA_SHIFT; if(!s->motion_val8[plane]){ s->motion_val8 [plane]= av_mallocz((s->m.b8_stride*block_height*2 + 2)*2*sizeof(int16_t)); s->motion_val16[plane]= av_mallocz((s->m.mb_stride*(block_height + 2) + 1)*2*sizeof(int16_t)); } s->m.mb_type= s->mb_type; //dummies, to avoid segfaults s->m.current_picture.mb_mean= (uint8_t *)s->dummy; s->m.current_picture.mb_var= (uint16_t*)s->dummy; s->m.current_picture.mc_mb_var= (uint16_t*)s->dummy; s->m.current_picture.mb_type= s->dummy; s->m.current_picture.motion_val[0]= s->motion_val8[plane] + 2; s->m.p_mv_table= s->motion_val16[plane] + s->m.mb_stride + 1; s->m.dsp= s->dsp; //move ff_init_me(&s->m); s->m.me.dia_size= s->avctx->dia_size; s->m.first_slice_line=1; for (y = 0; y < block_height; y++) { uint8_t src[stride*16]; s->m.new_picture.data[0]= src - y*16*stride; //ugly s->m.mb_y= y; for(i=0; i<16 && i + 16*y<height; i++){ memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width); for(x=width; x<16*block_width; x++) src[i*stride+x]= src[i*stride+x-1]; } for(; i<16 && i + 16*y<16*block_height; i++) memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width); for (x = 0; x < block_width; x++) { s->m.mb_x= x; ff_init_block_index(&s->m); ff_update_block_index(&s->m); ff_estimate_p_frame_motion(&s->m, x, y); } s->m.first_slice_line=0; } ff_fix_long_p_mvs(&s->m); ff_fix_long_mvs(&s->m, NULL, 0, s->m.p_mv_table, s->m.f_code, CANDIDATE_MB_TYPE_INTER, 0); } s->m.first_slice_line=1; for (y = 0; y < block_height; y++) { uint8_t src[stride*16]; for(i=0; i<16 && i + 16*y<height; i++){ memcpy(&src[i*stride], &src_plane[(i+16*y)*src_stride], width); for(x=width; x<16*block_width; x++) src[i*stride+x]= src[i*stride+x-1]; } for(; i<16 && i + 16*y<16*block_height; i++) memcpy(&src[i*stride], &src[(i-1)*stride], 16*block_width); s->m.mb_y= y; for (x = 0; x < block_width; x++) { uint8_t reorder_buffer[3][6][7*32]; int count[3][6]; int offset = y * 16 * stride + x * 16; uint8_t *decoded= decoded_plane + offset; uint8_t *ref= ref_plane + offset; int score[4]={0,0,0,0}, best; uint8_t temp[16*stride]; if(s->pb.buf_end - s->pb.buf - (put_bits_count(&s->pb)>>3) < 3000){ //FIXME check size av_log(s->avctx, AV_LOG_ERROR, "encoded frame too large\n"); return -1; } s->m.mb_x= x; ff_init_block_index(&s->m); ff_update_block_index(&s->m); if(s->picture.pict_type == FF_I_TYPE || (s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTRA)){ for(i=0; i<6; i++){ init_put_bits(&s->reorder_pb[i], reorder_buffer[0][i], 7*32); } if(s->picture.pict_type == FF_P_TYPE){ const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTRA]; put_bits(&s->reorder_pb[5], vlc[1], vlc[0]); score[0]= vlc[1]*lambda; } score[0]+= encode_block(s, src+16*x, NULL, temp, stride, 5, 64, lambda, 1); for(i=0; i<6; i++){ count[0][i]= put_bits_count(&s->reorder_pb[i]); flush_put_bits(&s->reorder_pb[i]); } }else score[0]= INT_MAX; best=0; if(s->picture.pict_type == FF_P_TYPE){ const uint8_t *vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_INTER]; int mx, my, pred_x, pred_y, dxy; int16_t *motion_ptr; motion_ptr= h263_pred_motion(&s->m, 0, 0, &pred_x, &pred_y); if(s->m.mb_type[x + y*s->m.mb_stride]&CANDIDATE_MB_TYPE_INTER){ for(i=0; i<6; i++) init_put_bits(&s->reorder_pb[i], reorder_buffer[1][i], 7*32); put_bits(&s->reorder_pb[5], vlc[1], vlc[0]); s->m.pb= s->reorder_pb[5]; mx= motion_ptr[0]; my= motion_ptr[1]; assert(mx>=-32 && mx<=31); assert(my>=-32 && my<=31); assert(pred_x>=-32 && pred_x<=31); assert(pred_y>=-32 && pred_y<=31); ff_h263_encode_motion(&s->m, mx - pred_x, 1); ff_h263_encode_motion(&s->m, my - pred_y, 1); s->reorder_pb[5]= s->m.pb; score[1] += lambda*put_bits_count(&s->reorder_pb[5]); dxy= (mx&1) + 2*(my&1); s->dsp.put_pixels_tab[0][dxy](temp+16, ref + (mx>>1) + stride*(my>>1), stride, 16); score[1]+= encode_block(s, src+16*x, temp+16, decoded, stride, 5, 64, lambda, 0); best= score[1] <= score[0]; vlc= ff_svq1_block_type_vlc[SVQ1_BLOCK_SKIP]; score[2]= s->dsp.sse[0](NULL, src+16*x, ref, stride, 16); score[2]+= vlc[1]*lambda; if(score[2] < score[best] && mx==0 && my==0){ best=2; s->dsp.put_pixels_tab[0][0](decoded, ref, stride, 16); for(i=0; i<6; i++){ count[2][i]=0; } put_bits(&s->pb, vlc[1], vlc[0]); } } if(best==1){ for(i=0; i<6; i++){ count[1][i]= put_bits_count(&s->reorder_pb[i]); flush_put_bits(&s->reorder_pb[i]); } }else{ motion_ptr[0 ] = motion_ptr[1 ]= motion_ptr[2 ] = motion_ptr[3 ]= motion_ptr[0+2*s->m.b8_stride] = motion_ptr[1+2*s->m.b8_stride]= motion_ptr[2+2*s->m.b8_stride] = motion_ptr[3+2*s->m.b8_stride]=0; } } s->rd_total += score[best]; for(i=5; i>=0; i--){ ff_copy_bits(&s->pb, reorder_buffer[best][i], count[best][i]); } if(best==0){ s->dsp.put_pixels_tab[0][0](decoded, temp, stride, 16); } } s->m.first_slice_line=0; } return 0; } static av_cold int svq1_encode_init(AVCodecContext *avctx) { SVQ1Context * const s = avctx->priv_data; dsputil_init(&s->dsp, avctx); avctx->coded_frame= (AVFrame*)&s->picture; s->frame_width = avctx->width; s->frame_height = avctx->height; s->y_block_width = (s->frame_width + 15) / 16; s->y_block_height = (s->frame_height + 15) / 16; s->c_block_width = (s->frame_width / 4 + 15) / 16; s->c_block_height = (s->frame_height / 4 + 15) / 16; s->avctx= avctx; s->m.avctx= avctx; s->m.me.scratchpad= av_mallocz((avctx->width+64)*2*16*2*sizeof(uint8_t)); s->m.me.map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); s->m.me.score_map = av_mallocz(ME_MAP_SIZE*sizeof(uint32_t)); s->mb_type = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int16_t)); s->dummy = av_mallocz((s->y_block_width+1)*s->y_block_height*sizeof(int32_t)); h263_encode_init(&s->m); //mv_penalty return 0; } static int svq1_encode_frame(AVCodecContext *avctx, unsigned char *buf, int buf_size, void *data) { SVQ1Context * const s = avctx->priv_data; AVFrame *pict = data; AVFrame * const p= (AVFrame*)&s->picture; AVFrame temp; int i; if(avctx->pix_fmt != PIX_FMT_YUV410P){ av_log(avctx, AV_LOG_ERROR, "unsupported pixel format\n"); return -1; } if(!s->current_picture.data[0]){ avctx->get_buffer(avctx, &s->current_picture); avctx->get_buffer(avctx, &s->last_picture); } temp= s->current_picture; s->current_picture= s->last_picture; s->last_picture= temp; init_put_bits(&s->pb, buf, buf_size); *p = *pict; p->pict_type = avctx->gop_size && avctx->frame_number % avctx->gop_size ? FF_P_TYPE : FF_I_TYPE; p->key_frame = p->pict_type == FF_I_TYPE; svq1_write_header(s, p->pict_type); for(i=0; i<3; i++){ if(svq1_encode_plane(s, i, s->picture.data[i], s->last_picture.data[i], s->current_picture.data[i], s->frame_width / (i?4:1), s->frame_height / (i?4:1), s->picture.linesize[i], s->current_picture.linesize[i]) < 0) return -1; } // align_put_bits(&s->pb); while(put_bits_count(&s->pb) & 31) put_bits(&s->pb, 1, 0); flush_put_bits(&s->pb); return put_bits_count(&s->pb) / 8; } static av_cold int svq1_encode_end(AVCodecContext *avctx) { SVQ1Context * const s = avctx->priv_data; int i; av_log(avctx, AV_LOG_DEBUG, "RD: %f\n", s->rd_total/(double)(avctx->width*avctx->height*avctx->frame_number)); av_freep(&s->m.me.scratchpad); av_freep(&s->m.me.map); av_freep(&s->m.me.score_map); av_freep(&s->mb_type); av_freep(&s->dummy); for(i=0; i<3; i++){ av_freep(&s->motion_val8[i]); av_freep(&s->motion_val16[i]); } return 0; } AVCodec svq1_encoder = { "svq1", CODEC_TYPE_VIDEO, CODEC_ID_SVQ1, sizeof(SVQ1Context), svq1_encode_init, svq1_encode_frame, svq1_encode_end, .pix_fmts= (enum PixelFormat[]){PIX_FMT_YUV410P, PIX_FMT_NONE}, .long_name= "Sorenson Vector Quantizer 1", };