Mercurial > libavcodec.hg
changeset 2184:3378d0677903 libavcodec
4x4 SSE compare function
wavelet based compare functions
make epzs_motion_search() more flexible so it can be used for a wider range of block sizes
make get_penalty_factor() independant of MpegEncContext
author | michael |
---|---|
date | Sun, 22 Aug 2004 17:16:03 +0000 |
parents | 6d40885b03ad |
children | 784c38a3ffc2 |
files | avcodec.h dsputil.c dsputil.h motion_est.c motion_est_template.c |
diffstat | 5 files changed, 175 insertions(+), 44 deletions(-) [+] |
line wrap: on
line diff
--- a/avcodec.h Sun Aug 22 12:40:57 2004 +0000 +++ b/avcodec.h Sun Aug 22 17:16:03 2004 +0000 @@ -1262,6 +1262,8 @@ #define FF_CMP_VSAD 8 #define FF_CMP_VSSE 9 #define FF_CMP_NSSE 10 +#define FF_CMP_W53 11 +#define FF_CMP_W97 12 #define FF_CMP_CHROMA 256 /**
--- a/dsputil.c Sun Aug 22 12:40:57 2004 +0000 +++ b/dsputil.c Sun Aug 22 17:16:03 2004 +0000 @@ -219,6 +219,23 @@ } } +static int sse4_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) +{ + int s, i; + uint32_t *sq = squareTbl + 256; + + s = 0; + for (i = 0; i < h; i++) { + s += sq[pix1[0] - pix2[0]]; + s += sq[pix1[1] - pix2[1]]; + s += sq[pix1[2] - pix2[2]]; + s += sq[pix1[3] - pix2[3]]; + pix1 += line_size; + pix2 += line_size; + } + return s; +} + static int sse8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h) { int s, i; @@ -270,6 +287,103 @@ return s; } + +static inline int w_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int w, int h, int type){ + int s, i, j; + const int dec_count= w==8 ? 3 : 4; + int tmp[16*16]; +#if 0 + int level, ori; + static const int scale[2][2][4][4]={ + { + { + //8x8 dec=3 + {268, 239, 239, 213}, + { 0, 224, 224, 152}, + { 0, 135, 135, 110}, + },{ + //16x16 dec=4 + {344, 310, 310, 280}, + { 0, 320, 320, 228}, + { 0, 175, 175, 136}, + { 0, 129, 129, 102}, + } + },{ + {//FIXME 5/3 + //8x8 dec=3 + {275, 245, 245, 218}, + { 0, 230, 230, 156}, + { 0, 138, 138, 113}, + },{ + //16x16 dec=4 + {352, 317, 317, 286}, + { 0, 328, 328, 233}, + { 0, 180, 180, 140}, + { 0, 132, 132, 105}, + } + } + }; +#endif + + for (i = 0; i < h; i++) { + for (j = 0; j < w; j+=4) { + tmp[16*i+j+0] = (pix1[j+0] - pix2[j+0])<<4; + tmp[16*i+j+1] = (pix1[j+1] - pix2[j+1])<<4; + tmp[16*i+j+2] = (pix1[j+2] - pix2[j+2])<<4; + tmp[16*i+j+3] = (pix1[j+3] - pix2[j+3])<<4; + } + pix1 += line_size; + pix2 += line_size; + } + ff_spatial_dwt(tmp, w, h, 16, type, dec_count); + + s=0; +#if 0 + for(level=0; level<dec_count; level++){ + for(ori= level ? 1 : 0; ori<4; ori++){ + int sx= (ori&1) ? 1<<level: 0; + int stride= 16<<(dec_count-level); + int sy= (ori&2) ? stride>>1 : 0; + int size= 1<<level; + + for(i=0; i<size; i++){ + for(j=0; j<size; j++){ + int v= tmp[sx + sy + i*stride + j] * scale[type][dec_count-3][level][ori]; + s += ABS(v); + } + } + } + } +#endif + for (i = 0; i < h; i++) { + for (j = 0; j < w; j+=4) { + s+= ABS(tmp[16*i+j+0]); + s+= ABS(tmp[16*i+j+1]); + s+= ABS(tmp[16*i+j+2]); + s+= ABS(tmp[16*i+j+3]); + } + } + assert(s>=0); + + return s>>2; +} + +static int w53_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 8, h, 1); +} + +static int w97_8_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 8, h, 0); +} + +static int w53_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 16, h, 1); +} + +static int w97_16_c(void *v, uint8_t * pix1, uint8_t * pix2, int line_size, int h){ + return w_c(v, pix1, pix2, line_size, 16, h, 0); +} + static void get_pixels_c(DCTELEM *restrict block, const uint8_t *pixels, int line_size) { int i; @@ -2733,6 +2847,12 @@ case FF_CMP_NSSE: cmp[i]= c->nsse[i]; break; + case FF_CMP_W53: + cmp[i]= c->w53[i]; + break; + case FF_CMP_W97: + cmp[i]= c->w97[i]; + break; default: av_log(NULL, AV_LOG_ERROR,"internal error in cmp function selection\n"); } @@ -3359,6 +3479,7 @@ c->sad[1]= pix_abs8_c; c->sse[0]= sse16_c; c->sse[1]= sse8_c; + c->sse[2]= sse4_c; SET_CMP_FUNC(quant_psnr) SET_CMP_FUNC(rd) SET_CMP_FUNC(bit) @@ -3368,7 +3489,11 @@ c->vsse[4]= vsse_intra16_c; c->nsse[0]= nsse16_c; c->nsse[1]= nsse8_c; - + c->w53[0]= w53_16_c; + c->w53[1]= w53_8_c; + c->w97[0]= w97_16_c; + c->w97[1]= w97_8_c; + c->add_bytes= add_bytes_c; c->diff_bytes= diff_bytes_c; c->sub_hfyu_median_prediction= sub_hfyu_median_prediction_c;
--- a/dsputil.h Sun Aug 22 12:40:57 2004 +0000 +++ b/dsputil.h Sun Aug 22 17:16:03 2004 +0000 @@ -163,6 +163,8 @@ me_cmp_func vsad[5]; me_cmp_func vsse[5]; me_cmp_func nsse[5]; + me_cmp_func w53[5]; + me_cmp_func w97[5]; me_cmp_func me_pre_cmp[5]; me_cmp_func me_cmp[5]; @@ -351,6 +353,29 @@ return (a & b) + (((a ^ b) & ~BYTE_VEC32(0x01)) >> 1); } +static inline int get_penalty_factor(int lambda, int lambda2, int type){ + switch(type&0xFF){ + default: + case FF_CMP_SAD: + return lambda>>FF_LAMBDA_SHIFT; + case FF_CMP_DCT: + return (3*lambda)>>(FF_LAMBDA_SHIFT+1); + case FF_CMP_W53: + return (4*lambda)>>(FF_LAMBDA_SHIFT); + case FF_CMP_W97: + return (2*lambda)>>(FF_LAMBDA_SHIFT); + case FF_CMP_SATD: + return (2*lambda)>>FF_LAMBDA_SHIFT; + case FF_CMP_RD: + case FF_CMP_PSNR: + case FF_CMP_SSE: + case FF_CMP_NSSE: + return lambda2>>FF_LAMBDA_SHIFT; + case FF_CMP_BIT: + return 1; + } +} + /** * Empty mmx state. * this must be called between any dsp function and float/double code.
--- a/motion_est.c Sun Aug 22 12:40:57 2004 +0000 +++ b/motion_est.c Sun Aug 22 17:16:03 2004 +0000 @@ -219,25 +219,6 @@ #include "motion_est_template.c" -static inline int get_penalty_factor(MpegEncContext *s, int type){ - switch(type&0xFF){ - default: - case FF_CMP_SAD: - return s->lambda>>FF_LAMBDA_SHIFT; - case FF_CMP_DCT: - return (3*s->lambda)>>(FF_LAMBDA_SHIFT+1); - case FF_CMP_SATD: - return (2*s->lambda)>>FF_LAMBDA_SHIFT; - case FF_CMP_RD: - case FF_CMP_PSNR: - case FF_CMP_SSE: - case FF_CMP_NSSE: - return s->lambda2>>FF_LAMBDA_SHIFT; - case FF_CMP_BIT: - return 1; - } -} - static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){ return 0; } @@ -1161,9 +1142,9 @@ assert(s->linesize == c->stride); assert(s->uvlinesize == c->uvstride); - c->penalty_factor = get_penalty_factor(s, c->avctx->me_cmp); - c->sub_penalty_factor= get_penalty_factor(s, c->avctx->me_sub_cmp); - c->mb_penalty_factor = get_penalty_factor(s, c->avctx->mb_cmp); + c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); + c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); + c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV; get_limits(s, 16*mb_x, 16*mb_y); @@ -1256,7 +1237,7 @@ } } - dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift); + dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16); break; } @@ -1424,7 +1405,7 @@ assert(s->quarter_sample==0 || s->quarter_sample==1); - c->pre_penalty_factor = get_penalty_factor(s, c->avctx->me_pre_cmp); + c->pre_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_pre_cmp); c->current_mv_penalty= c->mv_penalty[s->f_code] + MAX_MV; get_limits(s, 16*mb_x, 16*mb_y); @@ -1457,7 +1438,7 @@ c->pred_y = P_MEDIAN[1]; } - dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift); + dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, s->p_mv_table, (1<<16)>>shift, 0, 16); s->p_mv_table[xy][0] = mx<<shift; s->p_mv_table[xy][1] = my<<shift; @@ -1477,9 +1458,9 @@ uint8_t * const mv_penalty= c->mv_penalty[f_code] + MAX_MV; int mv_scale; - c->penalty_factor = get_penalty_factor(s, c->avctx->me_cmp); - c->sub_penalty_factor= get_penalty_factor(s, c->avctx->me_sub_cmp); - c->mb_penalty_factor = get_penalty_factor(s, c->avctx->mb_cmp); + c->penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_cmp); + c->sub_penalty_factor= get_penalty_factor(s->lambda, s->lambda2, c->avctx->me_sub_cmp); + c->mb_penalty_factor = get_penalty_factor(s->lambda, s->lambda2, c->avctx->mb_cmp); c->current_mv_penalty= mv_penalty; get_limits(s, 16*mb_x, 16*mb_y); @@ -1540,7 +1521,7 @@ mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift); } - dmin = epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale); + dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, ref_index, s->p_mv_table, mv_scale, 0, 16); break; } @@ -1731,7 +1712,7 @@ P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); } - dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift)); + dmin = ff_epzs_motion_search(s, &mx, &my, P, 0, 0, mv_table, 1<<(16-shift), 0, 16); if(c->sub_flags&FLAG_QPEL) dmin = qpel_motion_search(s, &mx, &my, dmin, 0, 0, 0, 16); else
--- a/motion_est_template.c Sun Aug 22 12:40:57 2004 +0000 +++ b/motion_est_template.c Sun Aug 22 17:16:03 2004 +0000 @@ -851,15 +851,13 @@ static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr, int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2], - int ref_mv_scale, int flags) + int ref_mv_scale, int flags, int size, int h) { MotionEstContext * const c= &s->me; int best[2]={0, 0}; int d, dmin; int map_generation; const int penalty_factor= c->penalty_factor; - const int size=0; - const int h=16; const int ref_mv_stride= s->mb_stride; //pass as arg FIXME const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME me_cmp_func cmpf, chroma_cmpf; @@ -872,6 +870,7 @@ map_generation= update_map_generation(c); + assert(cmpf); dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags); map[0]= map_generation; score_map[0]= dmin; @@ -882,7 +881,7 @@ CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) }else{ - if(dmin<256 && ( P_LEFT[0] |P_LEFT[1] + if(dmin<h*h && ( P_LEFT[0] |P_LEFT[1] |P_TOP[0] |P_TOP[1] |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){ *mx_ptr= 0; @@ -891,7 +890,7 @@ return dmin; } CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) - if(dmin>256*2){ + if(dmin>h*h*2){ CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift) @@ -899,7 +898,7 @@ CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) } } - if(dmin>256*4){ + if(dmin>h*h*4){ if(c->pre_pass){ CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16, (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16) @@ -948,19 +947,18 @@ } //this function is dedicated to the braindamaged gcc -static inline int epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, +inline int ff_epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2], - int ref_mv_scale) + int ref_mv_scale, int size, int h) { MotionEstContext * const c= &s->me; //FIXME convert other functions in the same way if faster - switch(c->flags){ - case 0: - return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0); + if(c->flags==0 && h==16 && size==0){ + return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0, 0, 16); // case FLAG_QPEL: // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL); - default: - return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags); + }else{ + return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, c->flags, size, h); } }