Mercurial > libavcodec.hg
changeset 2249:2b1a5e1fd449 libavcodec
merge predict_plane() with DWTELEM->8bit conversation (21% faster)
author | michael |
---|---|
date | Thu, 23 Sep 2004 12:20:41 +0000 |
parents | e4e1b4f31db6 |
children | 902caf560c43 |
files | snow.c |
diffstat | 1 files changed, 36 insertions(+), 31 deletions(-) [+] |
line wrap: on
line diff
--- a/snow.c Thu Sep 23 09:42:25 2004 +0000 +++ b/snow.c Thu Sep 23 12:20:41 2004 +0000 @@ -160,7 +160,8 @@ -4,-4,-4,-4,-4,-4,-4,-4,-4,-3,-3,-3,-3,-2,-2,-1, }; -#define OBMC_MAX 64 +#define LOG2_OBMC_MAX 6 +#define OBMC_MAX (1<<(LOG2_OBMC_MAX)) #if 0 //64*cubic static const uint8_t obmc32[1024]={ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, @@ -2031,7 +2032,7 @@ } //FIXME name clenup (b_w, block_w, b_width stuff) -static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ +static always_inline void add_yblock(SnowContext *s, DWTELEM *dst, uint8_t *dst8, uint8_t *src, uint8_t *obmc, int src_x, int src_y, int b_w, int b_h, int w, int h, int dst_stride, int src_stride, int obmc_stride, int b_x, int b_y, int add, int plane_index){ const int b_width = s->b_width << s->block_max_depth; const int b_height= s->b_height << s->block_max_depth; const int b_stride= b_width; @@ -2077,6 +2078,7 @@ assert(src_stride > 7*MB_SIZE); dst += src_x + src_y*dst_stride; + dst8+= src_x + src_y*src_stride; // src += src_x + src_y*src_stride; block[0]= tmp+3*MB_SIZE; @@ -2153,14 +2155,20 @@ +obmc2[x] * block[2][x + y*src_stride] +obmc3[x] * block[1][x + y*src_stride] +obmc4[x] * block[0][x + y*src_stride]; - - v *= 256/OBMC_MAX; + + v <<= 8 - LOG2_OBMC_MAX; if(FRAC_BITS != 8){ v += 1<<(7 - FRAC_BITS); v >>= 8 - FRAC_BITS; } - if(add) dst[x + y*dst_stride] += v; - else dst[x + y*dst_stride] -= v; + if(add){ + v += dst[x + y*dst_stride]; + v = (v + (1<<(FRAC_BITS-1))) >> FRAC_BITS; + if(v&(~255)) v= ~(v>>31); + dst8[x + y*src_stride] = v; + }else{ + dst[x + y*dst_stride] -= v; + } } } #endif @@ -2175,17 +2183,28 @@ int block_w = plane_index ? block_size/2 : block_size; const uint8_t *obmc = plane_index ? obmc_tab[s->block_max_depth+1] : obmc_tab[s->block_max_depth]; int obmc_stride= plane_index ? block_size : 2*block_size; - int ref_stride= s->last_picture.linesize[plane_index]; + int ref_stride= s->current_picture.linesize[plane_index]; uint8_t *ref = s->last_picture.data[plane_index]; + uint8_t *dst8= s->current_picture.data[plane_index]; int w= p->width; int h= p->height; START_TIMER if(s->keyframe || (s->avctx->debug&512)){ - for(y=0; y<h; y++){ - for(x=0; x<w; x++){ - if(add) buf[x + y*w]+= 128<<FRAC_BITS; - else buf[x + y*w]-= 128<<FRAC_BITS; + if(add){ + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + int v= buf[x + y*w] + (128<<FRAC_BITS) + (1<<(FRAC_BITS-1)); + v >>= FRAC_BITS; + if(v&(~255)) v= ~(v>>31); + dst8[x + y*ref_stride]= v; + } + } + }else{ + for(y=0; y<h; y++){ + for(x=0; x<w; x++){ + buf[x + y*w]-= 128<<FRAC_BITS; + } } } @@ -2196,7 +2215,7 @@ for(mb_x=0; mb_x<=mb_w; mb_x++){ START_TIMER - add_yblock(s, buf, ref, obmc, + add_yblock(s, buf, dst8, ref, obmc, block_w*mb_x - block_w/2, block_w*mb_y - block_w/2, block_w, block_w, @@ -2791,15 +2810,9 @@ } } } +{START_TIMER predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); - //FIXME optimize - for(y=0; y<h; y++){ - for(x=0; x<w; x++){ - int v= (s->spatial_dwt_buffer[y*w + x]+(1<<(FRAC_BITS-1)))>>FRAC_BITS; - if(v&(~255)) v= ~(v>>31); - s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v; - } - } +STOP_TIMER("pred-conv")} if(s->avctx->flags&CODEC_FLAG_PSNR){ int64_t error= 0; @@ -2902,8 +2915,7 @@ for(y=0; y<h; y++){ for(x=0; x<w; x++){ - int v= (s->spatial_dwt_buffer[y*w + x]+(1<<(FRAC_BITS-1)))>>FRAC_BITS; - if(v&(~255)) v= ~(v>>31); + int v= s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]; s->mconly_picture.data[plane_index][y*s->mconly_picture.linesize[plane_index] + x]= v; } } @@ -2929,16 +2941,9 @@ } } } +{START_TIMER predict_plane(s, s->spatial_dwt_buffer, plane_index, 1); - - //FIXME optimize - for(y=0; y<h; y++){ - for(x=0; x<w; x++){ - int v= (s->spatial_dwt_buffer[y*w + x]+(1<<(FRAC_BITS-1)))>>FRAC_BITS; - if(v&(~255)) v= ~(v>>31); - s->current_picture.data[plane_index][y*s->current_picture.linesize[plane_index] + x]= v; - } - } +STOP_TIMER("predict_plane conv2")} } emms_c();