# HG changeset patch # User michaelni # Date 1023020298 0 # Node ID 7f2d2be2aee9f0024991a789b8b1194a2745aeae # Parent eda22d628b2d358ab69c06a8f291f375c69a5617 dont double check vectors more correct EPZS implementation much faster halfpel ME simplifications / reducing duplicate code 4MV in LQ mode diff -r eda22d628b2d -r 7f2d2be2aee9 motion_est.c --- a/motion_est.c Sun Jun 02 12:16:28 2002 +0000 +++ b/motion_est.c Sun Jun 02 12:18:18 2002 +0000 @@ -1,6 +1,7 @@ /* * Motion estimation * Copyright (c) 2000,2001 Fabrice Bellard. + * Copyright (c) 2002 Michael Niedermayer * * * This library is free software; you can redistribute it and/or @@ -25,14 +26,20 @@ #include "dsputil.h" #include "mpegvideo.h" -//#define ABS(a) ((a)>0 ? (a) : -(a)) -#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#define SQ(a) ((a)*(a)) #define INTER_BIAS 257 -static int halfpel_motion_search(MpegEncContext * s, - int *mx_ptr, int *my_ptr, int dmin, - int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, uint8_t *ref_picture); +#define P_LAST P[0] +#define P_LEFT P[1] +#define P_TOP P[2] +#define P_TOPRIGHT P[3] +#define P_MEDIAN P[4] +#define P_LAST_LEFT P[5] +#define P_LAST_RIGHT P[6] +#define P_LAST_TOP P[7] +#define P_LAST_BOTTOM P[8] +#define P_MV1 P[9] + static int pix_sum(UINT8 * pix, int line_size) { @@ -359,62 +366,61 @@ #define CHECK_MV(x,y)\ {\ - d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\ - d += (mv_penalty[((x)<mb_x, s->mb_y);\ -if( (x)>(xmax<<(S)) ) printf("%d %d %d %d xmax" #v, (x), (y), s->mb_x, s->mb_y);\ -if( (y)<(ymin<<(S)) ) printf("%d %d %d %d ymin" #v, (x), (y), s->mb_x, s->mb_y);\ -if( (y)>(ymax<<(S)) ) printf("%d %d %d %d ymax" #v, (x), (y), s->mb_x, s->mb_y);\ +if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\ +if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\ +if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\ +if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\ static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin, UINT8 *new_pic, UINT8 *old_pic, int pic_stride, int pred_x, int pred_y, UINT16 *mv_penalty, int quant, - int xmin, int ymin, int xmax, int ymax, int shift) + int xmin, int ymin, int xmax, int ymax, int shift, + uint32_t *map, uint16_t *score_map, int map_generation, + op_pixels_abs_func pix_abs) { int next_dir=-1; @@ -462,36 +468,19 @@ */ } -static inline int small_diamond_search4MV(MpegEncContext * s, int *best, int dmin, - UINT8 *new_pic, UINT8 *old_pic, int pic_stride, - int pred_x, int pred_y, UINT16 *mv_penalty, int quant, - int xmin, int ymin, int xmax, int ymax, int shift) -{ - int next_dir=-1; - - for(;;){ - int d; - const int dir= next_dir; - const int x= best[0]; - const int y= best[1]; - next_dir=-1; - -//printf("%d", dir); - if(dir!=2 && x>xmin) CHECK_MV4_DIR(x-1, y , 0) - if(dir!=3 && y>ymin) CHECK_MV4_DIR(x , y-1, 1) - if(dir!=0 && x=xmin && x<=xmax && y>=ymin && y<=ymax){ - d = pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride); - d += (mv_penalty[((x)<=3) return dmin; + if(fails>=SNAKE_1+1) return dmin; }else{ - c= -c; + if(dir&1) dir-= c*3; + else c= -c; +// c= -c; } - dir+=c*2; + dir+=c*SNAKE_2; fails++; } dir&=7; } } +static inline int cross_search(MpegEncContext * s, int *best, int dmin, + UINT8 *new_pic, UINT8 *old_pic, int pic_stride, + int pred_x, int pred_y, UINT16 *mv_penalty, int quant, + int xmin, int ymin, int xmax, int ymax, int shift, + uint32_t *map, uint16_t *score_map,int map_generation, + op_pixels_abs_func pix_abs) +{ + static int x_dir[4]= {-1, 0, 1, 0}; + static int y_dir[4]= { 0,-1, 0, 1}; + int improvement[2]={100000, 100000}; + int dirs[2]={2, 3}; + int dir; + int last_dir= -1; + + for(;;){ + dir= dirs[ improvement[0] > improvement[1] ? 0 : 1 ]; + if(improvement[dir&1]==-1) return dmin; + + { + const int x= best[0] + x_dir[dir]; + const int y= best[1] + y_dir[dir]; + const int key= (y<=xmin && x<=xmax && y>=ymin && y<=ymax){ + if(map[index]!=key){ + d = pix_abs(new_pic, old_pic + x + y*pic_stride, pic_stride); + d += (mv_penalty[(x<me_map_generation+= 1<<(ME_MAP_MV_BITS*2); + if(s->me_map_generation==0){ + s->me_map_generation= 1<<(ME_MAP_MV_BITS*2); + memset(s->me_map, 0, sizeof(uint32_t)*ME_MAP_SIZE); + } + return s->me_map_generation; +} + static int epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, - int P[5][2], int pred_x, int pred_y, + int P[10][2], int pred_x, int pred_y, int xmin, int ymin, int xmax, int ymax, uint8_t * ref_picture) { int best[2]={0, 0}; @@ -561,42 +626,74 @@ UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame int quant= s->qscale; // qscale of the prev frame const int shift= 1+s->quarter_sample; + uint32_t *map= s->me_map; + uint16_t *score_map= s->me_score_map; + int map_generation; new_pic = s->new_picture[0] + pic_xy; old_pic = ref_picture + pic_xy; - + + map_generation= update_map_generation(s); + dmin = pix_abs16x16(new_pic, old_pic, pic_stride); - if(dminmb_y == 0 || s->first_slice_line || s->first_gob_line)) { - CHECK_MV(P[1][0]>>shift, P[1][1]>>shift) + if ((s->mb_y == 0 || s->first_slice_line)) { + CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) + CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift) }else{ - CHECK_MV(P[4][0]>>shift, P[4][1]>>shift) - if(dmin>shift; - *my_ptr= P[4][1]>>shift; -//printf("M\n"); + if(dmin<256 && ( P_LEFT[0] |P_LEFT[1] + |P_TOP[0] |P_TOP[1] + |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){ + *mx_ptr= 0; + *my_ptr= 0; + s->skip_me=1; return dmin; } - CHECK_MV(P[1][0]>>shift, P[1][1]>>shift) - CHECK_MV(P[2][0]>>shift, P[2][1]>>shift) - CHECK_MV(P[3][0]>>shift, P[3][1]>>shift) + CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) + if(dmin>256*2){ + CHECK_MV(P_LAST[0] >>shift, P_LAST[1] >>shift) + CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift) + CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift) + CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) + } + } + if(dmin>256*4){ + CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift) + CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift) } - CHECK_MV(P[0][0]>>shift, P[0][1]>>shift) +#if 0 //doest only slow things down + if(dmin>512*3){ + int step; + dmin= score_map[0]; + best[0]= best[1]=0; + for(step=128; step>0; step>>=1){ + const int step2= step; + int y; + for(y=-step2+best[1]; y<=step2+best[1]; y+=step){ + int x; + if(yymax) continue; + for(x=-step2+best[0]; x<=step2+best[0]; x+=step){ + if(xxmax) continue; + if(x==best[0] && y==best[1]) continue; + CHECK_MV(x,y) + } + } + } + } +#endif //check(best[0],best[1],0, b0) if(s->me_method==ME_EPZS) dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, - pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift); + pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, + shift, map, score_map, map_generation, pix_abs16x16); else - dmin= snake_search(s, best, dmin, new_pic, old_pic, pic_stride, - pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift); + dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride, + pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, + shift, map, score_map, map_generation, pix_abs16x16); //check(best[0],best[1],0, b1) *mx_ptr= best[0]; *my_ptr= best[1]; @@ -607,7 +704,7 @@ static int epzs_motion_search4(MpegEncContext * s, int block, int *mx_ptr, int *my_ptr, - int P[6][2], int pred_x, int pred_y, + int P[10][2], int pred_x, int pred_y, int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture) { int best[2]={0, 0}; @@ -618,34 +715,47 @@ UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame int quant= s->qscale; // qscale of the prev frame const int shift= 1+s->quarter_sample; + uint32_t *map= s->me_map; + uint16_t *score_map= s->me_score_map; + int map_generation; new_pic = s->new_picture[0] + pic_xy; old_pic = ref_picture + pic_xy; - - dmin = pix_abs8x8(new_pic, old_pic, pic_stride); + + map_generation= update_map_generation(s); + dmin = 1000000; +//printf("%d %d %d %d //",xmin, ymin, xmax, ymax); /* first line */ - if ((s->mb_y == 0 || s->first_slice_line || s->first_gob_line) && block<2) { - CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift) + if ((s->mb_y == 0 || s->first_slice_line) && block<2) { + CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift) + CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift) + CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift) }else{ - CHECK_MV4(P[4][0]>>shift, P[4][1]>>shift) - if(dmin>shift; - *my_ptr= P[4][1]>>shift; -//printf("M\n"); - return dmin; + CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift) + //FIXME try some early stop + if(dmin>64*2){ + CHECK_MV4(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) + CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift) + CHECK_MV4(P_TOP[0]>>shift, P_TOP[1]>>shift) + CHECK_MV4(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) + CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift) } - CHECK_MV4(P[1][0]>>shift, P[1][1]>>shift) - CHECK_MV4(P[2][0]>>shift, P[2][1]>>shift) - CHECK_MV4(P[3][0]>>shift, P[3][1]>>shift) + } + if(dmin>64*4){ + CHECK_MV4(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift) + CHECK_MV4(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift) } - CHECK_MV4(P[0][0]>>shift, P[0][1]>>shift) - CHECK_MV4(P[5][0]>>shift, P[5][1]>>shift) -//check(best[0],best[1],0, b0) - dmin= small_diamond_search4MV(s, best, dmin, new_pic, old_pic, pic_stride, - pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, shift); -//check(best[0],best[1],0, b1) + if(s->me_method==ME_EPZS) + dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride, + pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, + shift, map, score_map, map_generation, pix_abs8x8); + else + dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride, + pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax, + shift, map, score_map, map_generation, pix_abs8x8); + *mx_ptr= best[0]; *my_ptr= best[1]; @@ -654,56 +764,50 @@ } #define CHECK_HALF_MV(suffix, x, y) \ - d= pix_abs16x16_ ## suffix(pix, ptr+((x)>>1), s->linesize);\ +{\ + d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\ d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\ - if(d>1), s->linesize);\ - d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\ - if(dmv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame const int quant= s->qscale; - int pen_x, pen_y; - int mx, my, mx1, my1, d, xx, yy, dminh; + int mx, my, xx, yy, dminh; UINT8 *pix, *ptr; + if(s->skip_me){ + *mx_ptr = 0; + *my_ptr = 0; + return dmin; + }else + + xx = 16 * s->mb_x + 8*(n&1); + yy = 16 * s->mb_y + 8*(n>>1); + pix = s->new_picture[0] + (yy * s->linesize) + xx; + mx = *mx_ptr; my = *my_ptr; - ptr = ref_picture + (my * s->linesize) + mx; - - xx = 16 * s->mb_x; - yy = 16 * s->mb_y; - pix = s->new_picture[0] + (yy * s->linesize) + xx; + ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx); dminh = dmin; if (mx > xmin && mx < xmax && my > ymin && my < ymax) { + int dx=0, dy=0; + int d, pen_x, pen_y; - mx= mx1= 2*(mx - xx); - my= my1= 2*(my - yy); - if(dmin < Z_THRESHOLD && mx==0 && my==0){ - *mx_ptr = 0; - *my_ptr = 0; - return dmin; - } + mx<<=1; + my<<=1; pen_x= pred_x + mx; pen_y= pred_y + my; @@ -720,9 +824,11 @@ CHECK_HALF_MV(y2 , 0, +1) CHECK_HALF_MV(xy2, +1, +1) + mx+=dx; + my+=dy; }else{ - mx= 2*(mx - xx); - my= 2*(my - yy); + mx<<=1; + my<<=1; } *mx_ptr = mx; @@ -730,64 +836,115 @@ return dminh; } -static inline void halfpel_motion_search4(MpegEncContext * s, +static inline int fast_halfpel_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr, int dmin, int xmin, int ymin, int xmax, int ymax, - int pred_x, int pred_y, int block_x, int block_y, - uint8_t *ref_picture) + int pred_x, int pred_y, uint8_t *ref_picture, + op_pixels_abs_func pix_abs_x2, + op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n) { UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame + uint16_t *score_map= s->me_score_map; const int quant= s->qscale; - int pen_x, pen_y; - int mx, my, mx1, my1, d, xx, yy, dminh; + int mx, my, xx, yy, dminh; UINT8 *pix, *ptr; - xx = 8 * block_x; - yy = 8 * block_y; + if(s->skip_me){ +// printf("S"); + *mx_ptr = 0; + *my_ptr = 0; + return dmin; + } +// printf("N"); + + xx = 16 * s->mb_x + 8*(n&1); + yy = 16 * s->mb_y + 8*(n>>1); pix = s->new_picture[0] + (yy * s->linesize) + xx; - + mx = *mx_ptr; my = *my_ptr; - ptr = ref_picture + ((yy+my) * s->linesize) + xx + mx; - + ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx); + dminh = dmin; if (mx > xmin && mx < xmax && my > ymin && my < ymax) { + int dx=0, dy=0; + int d, pen_x, pen_y; + const int index= (my<linesize; - CHECK_HALF_MV4(xy2, -1, -1) - CHECK_HALF_MV4(y2 , 0, -1) - CHECK_HALF_MV4(xy2, +1, -1) - - ptr+= s->linesize; - CHECK_HALF_MV4(x2 , -1, 0) - CHECK_HALF_MV4(x2 , +1, 0) - CHECK_HALF_MV4(xy2, -1, +1) - CHECK_HALF_MV4(y2 , 0, +1) - CHECK_HALF_MV4(xy2, +1, +1) + if(t<=b){ + CHECK_HALF_MV(y2 , 0, -1) + if(l<=r){ + CHECK_HALF_MV(xy2, -1, -1) + if(t+r<=b+l){ + CHECK_HALF_MV(xy2, +1, -1) + ptr+= s->linesize; + }else{ + ptr+= s->linesize; + CHECK_HALF_MV(xy2, -1, +1) + } + CHECK_HALF_MV(x2 , -1, 0) + }else{ + CHECK_HALF_MV(xy2, +1, -1) + if(t+l<=b+r){ + CHECK_HALF_MV(xy2, -1, -1) + ptr+= s->linesize; + }else{ + ptr+= s->linesize; + CHECK_HALF_MV(xy2, +1, +1) + } + CHECK_HALF_MV(x2 , +1, 0) + } + }else{ + if(l<=r){ + if(t+l<=b+r){ + CHECK_HALF_MV(xy2, -1, -1) + ptr+= s->linesize; + }else{ + ptr+= s->linesize; + CHECK_HALF_MV(xy2, +1, +1) + } + CHECK_HALF_MV(x2 , -1, 0) + CHECK_HALF_MV(xy2, -1, +1) + }else{ + if(t+r<=b+l){ + CHECK_HALF_MV(xy2, +1, -1) + ptr+= s->linesize; + }else{ + ptr+= s->linesize; + CHECK_HALF_MV(xy2, -1, +1) + } + CHECK_HALF_MV(x2 , +1, 0) + CHECK_HALF_MV(xy2, +1, +1) + } + CHECK_HALF_MV(y2 , 0, +1) + } + mx+=dx; + my+=dy; }else{ - mx*=2; - my*=2; + mx<<=1; + my<<=1; } *mx_ptr = mx; *my_ptr = my; + return dminh; } -static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my) +static inline void set_p_mv_tables(MpegEncContext * s, int mx, int my, int mv4) { const int xy= s->mb_x + 1 + (s->mb_y + 1)*(s->mb_width + 2); @@ -795,7 +952,7 @@ s->p_mv_table[xy][1] = my; /* has allready been set to the 4 MV if 4MV is done */ - if(!(s->flags&CODEC_FLAG_4MV)){ + if(mv4){ int mot_xy= s->block_index[0]; s->motion_val[mot_xy ][0]= mx; @@ -840,6 +997,87 @@ } } +static inline int mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift) +{ + int block; + int P[10][2]; + uint8_t *ref_picture= s->last_picture[0]; + int dmin_sum=0; + + for(block=0; block<4; block++){ + int mx4, my4; + int pred_x4, pred_y4; + int dmin4; + static const int off[4]= {2, 1, 1, -1}; + const int mot_stride = s->block_wrap[0]; + const int mot_xy = s->block_index[block]; +// const int block_x= (block&1); +// const int block_y= (block>>1); +#if 1 // this saves us a bit of cliping work and shouldnt affect compression in a negative way + const int rel_xmin4= xmin; + const int rel_xmax4= xmax; + const int rel_ymin4= ymin; + const int rel_ymax4= ymax; +#else + const int rel_xmin4= xmin - block_x*8; + const int rel_xmax4= xmax - block_x*8 + 8; + const int rel_ymin4= ymin - block_y*8; + const int rel_ymax4= ymax - block_y*8 + 8; +#endif + P_LAST[0] = s->motion_val[mot_xy ][0]; + P_LAST[1] = s->motion_val[mot_xy ][1]; + P_LEFT[0] = s->motion_val[mot_xy - 1][0]; + P_LEFT[1] = s->motion_val[mot_xy - 1][1]; + P_LAST_RIGHT[0] = s->motion_val[mot_xy + 1][0]; + P_LAST_RIGHT[1] = s->motion_val[mot_xy + 1][1]; + P_LAST_BOTTOM[0]= s->motion_val[mot_xy + 1*mot_stride][0]; + P_LAST_BOTTOM[1]= s->motion_val[mot_xy + 1*mot_stride][1]; + + if(P_LEFT[0] > (rel_xmax4<mb_y == 0 || s->first_slice_line) && block<2) { + pred_x4= P_LEFT[0]; + pred_y4= P_LEFT[1]; + } else { + P_TOP[0] = s->motion_val[mot_xy - mot_stride ][0]; + P_TOP[1] = s->motion_val[mot_xy - mot_stride ][1]; + P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + off[block]][0]; + P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + off[block]][1]; + if(P_TOP[1] > (rel_ymax4< (rel_xmax4< (rel_ymax4<out_format == FMT_H263){ + pred_x4 = P_MEDIAN[0]; + pred_y4 = P_MEDIAN[1]; + }else { /* mpeg1 at least */ + pred_x4= P_LEFT[0]; + pred_y4= P_LEFT[1]; + } + } + P_MV1[0]= mx; + P_MV1[1]= my; + + dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture); + + dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, + pred_x4, pred_y4, ref_picture, pix_abs8x8_x2, + pix_abs8x8_y2, pix_abs8x8_xy2, block); + + s->motion_val[ s->block_index[block] ][0]= mx4; + s->motion_val[ s->block_index[block] ][1]= my4; + dmin_sum+= dmin4; + } + return dmin_sum; +} + void ff_estimate_p_frame_motion(MpegEncContext * s, int mb_x, int mb_y) { @@ -848,27 +1086,40 @@ int xmin, ymin, xmax, ymax; int rel_xmin, rel_ymin, rel_xmax, rel_ymax; int pred_x=0, pred_y=0; - int P[6][2]; + int P[10][2]; const int shift= 1+s->quarter_sample; int mb_type=0; uint8_t *ref_picture= s->last_picture[0]; get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code); + rel_xmin= xmin - mb_x*16; + rel_xmax= xmax - mb_x*16; + rel_ymin= ymin - mb_y*16; + rel_ymax= ymax - mb_y*16; + s->skip_me=0; switch(s->me_method) { case ME_ZERO: default: no_motion_search(s, &mx, &my); + mx-= mb_x*16; + my-= mb_y*16; dmin = 0; break; case ME_FULL: dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture); + mx-= mb_x*16; + my-= mb_y*16; break; case ME_LOG: dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + mx-= mb_x*16; + my-= mb_y*16; break; case ME_PHODS: dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + mx-= mb_x*16; + my-= mb_y*16; break; case ME_X1: case ME_EPZS: @@ -876,133 +1127,68 @@ const int mot_stride = s->block_wrap[0]; const int mot_xy = s->block_index[0]; - rel_xmin= xmin - mb_x*16; - rel_xmax= xmax - mb_x*16; - rel_ymin= ymin - mb_y*16; - rel_ymax= ymax - mb_y*16; + P_LAST[0] = s->motion_val[mot_xy ][0]; + P_LAST[1] = s->motion_val[mot_xy ][1]; + P_LEFT[0] = s->motion_val[mot_xy - 1][0]; + P_LEFT[1] = s->motion_val[mot_xy - 1][1]; + P_LAST_RIGHT[0] = s->motion_val[mot_xy + 2][0]; + P_LAST_RIGHT[1] = s->motion_val[mot_xy + 2][1]; + P_LAST_BOTTOM[0]= s->motion_val[mot_xy + 2*mot_stride][0]; + P_LAST_BOTTOM[1]= s->motion_val[mot_xy + 2*mot_stride][1]; - P[0][0] = s->motion_val[mot_xy ][0]; - P[0][1] = s->motion_val[mot_xy ][1]; - P[1][0] = s->motion_val[mot_xy - 1][0]; - P[1][1] = s->motion_val[mot_xy - 1][1]; - if(P[1][0] > (rel_xmax< (rel_xmax<first_slice_line || s->first_gob_line)) { - P[4][0] = P[1][0]; - P[4][1] = P[1][1]; + if ((mb_y == 0 || s->first_slice_line)) { + pred_x= P_LEFT[0]; + pred_y= P_LEFT[1]; } else { - P[2][0] = s->motion_val[mot_xy - mot_stride ][0]; - P[2][1] = s->motion_val[mot_xy - mot_stride ][1]; - P[3][0] = s->motion_val[mot_xy - mot_stride + 2 ][0]; - P[3][1] = s->motion_val[mot_xy - mot_stride + 2 ][1]; - if(P[2][1] > (rel_ymax< (rel_ymax<motion_val[mot_xy - mot_stride ][0]; + P_TOP[1] = s->motion_val[mot_xy - mot_stride ][1]; + P_TOPRIGHT[0] = s->motion_val[mot_xy - mot_stride + 2][0]; + P_TOPRIGHT[1] = s->motion_val[mot_xy - mot_stride + 2][1]; + if(P_TOP[1] > (rel_ymax< (rel_ymax<out_format == FMT_H263){ - pred_x = P[4][0]; - pred_y = P[4][1]; - }else { /* mpeg1 at least */ - pred_x= P[1][0]; - pred_y= P[1][1]; + P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); + P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); + + if(s->out_format == FMT_H263){ + pred_x = P_MEDIAN[0]; + pred_y = P_MEDIAN[1]; + }else { /* mpeg1 at least */ + pred_x= P_LEFT[0]; + pred_y= P_LEFT[1]; + } } } dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture); - mx+= mb_x*16; - my+= mb_y*16; break; } - - if(s->flags&CODEC_FLAG_4MV){ - int block; - - mb_type|= MB_TYPE_INTER4V; - - for(block=0; block<4; block++){ - int mx4, my4; - int pred_x4, pred_y4; - int dmin4; - static const int off[4]= {2, 1, 1, -1}; - const int mot_stride = s->block_wrap[0]; - const int mot_xy = s->block_index[block]; - const int block_x= mb_x*2 + (block&1); - const int block_y= mb_y*2 + (block>>1); - - const int rel_xmin4= xmin - block_x*8; - const int rel_xmax4= xmax - block_x*8 + 8; - const int rel_ymin4= ymin - block_y*8; - const int rel_ymax4= ymax - block_y*8 + 8; - - P[0][0] = s->motion_val[mot_xy ][0]; - P[0][1] = s->motion_val[mot_xy ][1]; - P[1][0] = s->motion_val[mot_xy - 1][0]; - P[1][1] = s->motion_val[mot_xy - 1][1]; - if(P[1][0] > (rel_xmax4<first_slice_line || s->first_gob_line) && block<2) { - P[4][0] = P[1][0]; - P[4][1] = P[1][1]; - } else { - P[2][0] = s->motion_val[mot_xy - mot_stride ][0]; - P[2][1] = s->motion_val[mot_xy - mot_stride ][1]; - P[3][0] = s->motion_val[mot_xy - mot_stride + off[block]][0]; - P[3][1] = s->motion_val[mot_xy - mot_stride + off[block]][1]; - if(P[2][1] > (rel_ymax4< (rel_xmax4< (rel_ymax4<out_format == FMT_H263){ - pred_x4 = P[4][0]; - pred_y4 = P[4][1]; - }else { /* mpeg1 at least */ - pred_x4= P[1][0]; - pred_y4= P[1][1]; - } - P[5][0]= mx - mb_x*16; - P[5][1]= my - mb_y*16; - - dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture); - - halfpel_motion_search4(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, - pred_x4, pred_y4, block_x, block_y, ref_picture); - - s->motion_val[ s->block_index[block] ][0]= mx4; - s->motion_val[ s->block_index[block] ][1]= my4; - } - } /* intra / predictive decision */ xx = mb_x * 16; yy = mb_y * 16; pix = s->new_picture[0] + (yy * s->linesize) + xx; - /* At this point (mx,my) are full-pell and the absolute displacement */ - ppix = ref_picture + (my * s->linesize) + mx; + /* At this point (mx,my) are full-pell and the relative displacement */ + ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx); sum = pix_sum(pix, s->linesize); -#if 0 - varc = pix_dev(pix, s->linesize, (sum+128)>>8) + INTER_BIAS; - vard = pix_abs16x16(pix, ppix, s->linesize); -#else + sum= (sum+8)>>4; - varc = ((pix_norm1(pix, s->linesize) - sum*sum + 128 + 500)>>8); + varc = (pix_norm1(pix, s->linesize) - sum*sum + 500 + 128)>>8; vard = (pix_norm(pix, ppix, s->linesize)+128)>>8; -#endif - - s->mb_var[s->mb_width * mb_y + mb_x] = varc; - s->avg_mb_var+= varc; - s->mc_mb_var += vard; - +//printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); + s->mb_var [s->mb_width * mb_y + mb_x] = varc; + s->mc_mb_var[s->mb_width * mb_y + mb_x] = vard; + s->mb_var_sum += varc; + s->mc_mb_var_sum += vard; +//printf("E%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); #if 0 printf("varc=%4d avg_var=%4d (sum=%4d) vard=%4d mx=%2d my=%2d\n", @@ -1013,19 +1199,49 @@ mb_type|= MB_TYPE_INTRA; if (varc*2 + 200 > vard){ mb_type|= MB_TYPE_INTER; - halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y, ref_picture); + if(s->me_method >= ME_EPZS) + fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, + pix_abs16x16_xy2, 0); + else + halfpel_motion_search( s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, + pix_abs16x16_xy2, 0); }else{ - mx = mx*2 - mb_x*32; - my = my*2 - mb_y*32; + mx <<=1; + my <<=1; } + if((s->flags&CODEC_FLAG_4MV) + && !s->skip_me && varc>50 && vard>10){ + mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); + mb_type|=MB_TYPE_INTER4V; + + set_p_mv_tables(s, mx, my, 0); + }else + set_p_mv_tables(s, mx, my, 1); }else{ if (vard <= 64 || vard < varc) { mb_type|= MB_TYPE_INTER; if (s->me_method != ME_ZERO) { - halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y, ref_picture); + if(s->me_method >= ME_EPZS) + dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, + pix_abs16x16_xy2, 0); + else + dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, + pix_abs16x16_xy2, 0); + if((s->flags&CODEC_FLAG_4MV) + && !s->skip_me && varc>50 && vard>10){ + int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); + if(dmin4 + 128 mb_type[mb_y*s->mb_width + mb_x]= mb_type; - set_p_mv_tables(s, mx, my); } int ff_estimate_motion_b(MpegEncContext * s, @@ -1052,79 +1267,83 @@ int xmin, ymin, xmax, ymax; int rel_xmin, rel_ymin, rel_xmax, rel_ymax; int pred_x=0, pred_y=0; - int P[6][2]; + int P[10][2]; const int shift= 1+s->quarter_sample; const int mot_stride = s->mb_width + 2; const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1; get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code); + rel_xmin= xmin - mb_x*16; + rel_xmax= xmax - mb_x*16; + rel_ymin= ymin - mb_y*16; + rel_ymax= ymax - mb_y*16; switch(s->me_method) { case ME_ZERO: default: no_motion_search(s, &mx, &my); dmin = 0; + mx-= mb_x*16; + my-= mb_y*16; break; case ME_FULL: dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture); + mx-= mb_x*16; + my-= mb_y*16; break; case ME_LOG: dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + mx-= mb_x*16; + my-= mb_y*16; break; case ME_PHODS: dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); + mx-= mb_x*16; + my-= mb_y*16; break; case ME_X1: case ME_EPZS: { - rel_xmin= xmin - mb_x*16; - rel_xmax= xmax - mb_x*16; - rel_ymin= ymin - mb_y*16; - rel_ymax= ymax - mb_y*16; + P_LAST[0] = mv_table[mot_xy ][0]; + P_LAST[1] = mv_table[mot_xy ][1]; + P_LEFT[0] = mv_table[mot_xy - 1][0]; + P_LEFT[1] = mv_table[mot_xy - 1][1]; + P_LAST_RIGHT[0] = mv_table[mot_xy + 1][0]; + P_LAST_RIGHT[1] = mv_table[mot_xy + 1][1]; + P_LAST_BOTTOM[0] = mv_table[mot_xy + mot_stride][0]; + P_LAST_BOTTOM[1] = mv_table[mot_xy + mot_stride][1]; - P[0][0] = mv_table[mot_xy ][0]; - P[0][1] = mv_table[mot_xy ][1]; - P[1][0] = mv_table[mot_xy - 1][0]; - P[1][1] = mv_table[mot_xy - 1][1]; - if(P[1][0] > (rel_xmax< (rel_xmax<first_slice_line || s->first_gob_line)) { - P[4][0] = P[1][0]; - P[4][1] = P[1][1]; + if ((mb_y == 0 || s->first_slice_line)) { } else { - P[2][0] = mv_table[mot_xy - mot_stride ][0]; - P[2][1] = mv_table[mot_xy - mot_stride ][1]; - P[3][0] = mv_table[mot_xy - mot_stride + 1 ][0]; - P[3][1] = mv_table[mot_xy - mot_stride + 1 ][1]; - if(P[2][1] > (rel_ymax< (rel_ymax< (rel_ymax< (rel_ymax<new_picture[0] + (yy * s->linesize) + xx; - /* At this point (mx,my) are full-pell and the absolute displacement */ -// ppix = ref_picture + (my * s->linesize) + mx; - - dmin= halfpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, pred_x, pred_y, ref_picture); - + dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, + pred_x, pred_y, ref_picture, pix_abs16x16_x2, pix_abs16x16_y2, + pix_abs16x16_xy2, 0); +//printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; mv_table[mot_xy][0]= mx; mv_table[mot_xy][1]= my; @@ -1140,6 +1359,7 @@ int pred_bx, int pred_by) { //FIXME optimize? + //FIXME direct mode penalty UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame uint8_t *dest_y = s->me_scratchpad; uint8_t *ptr; @@ -1201,7 +1421,7 @@ static inline int direct_search(MpegEncContext * s, int mb_x, int mb_y) { - int P[6][2]; + int P[10][2]; const int mot_stride = s->mb_width + 2; const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1; int dmin, dmin2; @@ -1266,23 +1486,29 @@ } } - P[0][0] = mv_table[mot_xy ][0]; - P[0][1] = mv_table[mot_xy ][1]; - P[1][0] = mv_table[mot_xy - 1][0]; - P[1][1] = mv_table[mot_xy - 1][1]; - + P_LAST[0] = mv_table[mot_xy ][0]; + P_LAST[1] = mv_table[mot_xy ][1]; + P_LEFT[0] = mv_table[mot_xy - 1][0]; + P_LEFT[1] = mv_table[mot_xy - 1][1]; + P_LAST_RIGHT[0] = mv_table[mot_xy + 1][0]; + P_LAST_RIGHT[1] = mv_table[mot_xy + 1][1]; + P_LAST_BOTTOM[0] = mv_table[mot_xy + mot_stride][0]; + P_LAST_BOTTOM[1] = mv_table[mot_xy + mot_stride][1]; +/* + if(P_LEFT[0] > (rel_xmax<first_slice_line || s->first_gob_line)) { - P[4][0] = P[1][0]; - P[4][1] = P[1][1]; + if ((mb_y == 0 || s->first_slice_line)) { } else { - P[2][0] = mv_table[mot_xy - mot_stride ][0]; - P[2][1] = mv_table[mot_xy - mot_stride ][1]; - P[3][0] = mv_table[mot_xy - mot_stride + 1 ][0]; - P[3][1] = mv_table[mot_xy - mot_stride + 1 ][1]; + P_TOP[0] = mv_table[mot_xy - mot_stride ][0]; + P_TOP[1] = mv_table[mot_xy - mot_stride ][1]; + P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0]; + P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1]; - P[4][0]= mid_pred(P[1][0], P[2][0], P[3][0]); - P[4][1]= mid_pred(P[1][1], P[2][1], P[3][1]); + P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); + P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); } dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, -16, -16, 15, 15, ref_picture); if(mx==0 && my==0) dmin=99999999; // not representable, due to rounding stuff @@ -1332,12 +1558,9 @@ void ff_estimate_b_frame_motion(MpegEncContext * s, int mb_x, int mb_y) { - const int mot_stride = s->mb_width + 2; - const int xy = (mb_y + 1)*mot_stride + mb_x + 1; const int quant= s->qscale; int fmin, bmin, dmin, fbmin; int type=0; - int motion_fx, motion_fy, motion_bx, motion_by; dmin= direct_search(s, mb_x, mb_y); @@ -1365,24 +1588,36 @@ score=fbmin; type= MB_TYPE_BIDIR; } - s->mc_mb_var += score; + s->mc_mb_var_sum += score; + s->mc_mb_var[mb_y*s->mb_width + mb_x] = score; } +/* +{ +static int count=0; +static int sum=0; +if(type==MB_TYPE_DIRECT){ + int diff= ABS(s->b_forw_mv_table) +} +}*/ s->mb_type[mb_y*s->mb_width + mb_x]= type; +/* if(mb_y==0 && mb_x==0) printf("\n"); + if(mb_x==0) printf("\n"); + printf("%d", av_log2(type)); +*/ } /* find best f_code for ME which do unlimited searches */ int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type) { - int f_code; - if(s->me_method>=ME_EPZS){ - int mv_num[8]; + int score[8]; int i, y; - int loose=0; UINT8 * fcode_tab= s->fcode_tab; + int best_fcode=-1; + int best_score=-10000000; - for(i=0; i<8; i++) mv_num[i]=0; + for(i=0; i<8; i++) score[i]= s->mb_num*(8-i); //FIXME *2 and all other too so its the same but nicer for(y=0; ymb_height; y++){ int x; @@ -1390,25 +1625,30 @@ i= y*s->mb_width; for(x=0; xmb_width; x++){ if(s->mb_type[i] & type){ - mv_num[ fcode_tab[mv_table[xy][0] + MAX_MV] ]++; - mv_num[ fcode_tab[mv_table[xy][1] + MAX_MV] ]++; -//printf("%d %d %d\n", s->mv_table[0][i], fcode_tab[s->mv_table[0][i] + MAX_MV], i); + int fcode= MAX(fcode_tab[mv_table[xy][0] + MAX_MV], + fcode_tab[mv_table[xy][1] + MAX_MV]); + int j; + + for(j=0; jpict_type==B_TYPE || s->mc_mb_var[i] < s->mb_var[i]) + score[j]-= 170; + } } i++; xy++; } } - - for(i=MAX_FCODE; i>1; i--){ - int threshold; - loose+= mv_num[i]; + + for(i=1; i<8; i++){ + if(score[i] > best_score){ + best_score= score[i]; + best_fcode= i; + } +// printf("%d %d\n", i, score[i]); + } - if(s->pict_type==B_TYPE) threshold= 0; - else threshold= s->mb_num/20; //FIXME - if(loose > threshold) break; - } // printf("fcode: %d type: %d\n", i, s->pict_type); - return i; + return best_fcode; /* for(i=0; i<=MAX_FCODE; i++){ printf("%d ", mv_num[i]); } @@ -1423,7 +1663,8 @@ const int f_code= s->f_code; int y; UINT8 * fcode_tab= s->fcode_tab; - +//int clip=0; +//int noclip=0; /* clip / convert to intra 16x16 type MVs */ for(y=0; ymb_height; y++){ int x; @@ -1439,13 +1680,16 @@ s->mb_type[i] |= MB_TYPE_INTRA; s->p_mv_table[xy][0] = 0; s->p_mv_table[xy][1] = 0; +//clip++; } +//else +// noclip++; } xy++; i++; } } - +//printf("%d no:%d %d//\n", clip, noclip, f_code); if(s->flags&CODEC_FLAG_4MV){ const int wrap= 2+ s->mb_width*2;