comparison motion_est_template.c @ 1950:a3c60fa850dc libavcodec

motion estimation cleanup replace ugly macros by always_inline functions, that way its much more readable and flexible as always_inline can simply be removed while the macros couldnt be about 0.5 % speedup with default parameters
author michael
date Thu, 22 Apr 2004 03:31:29 +0000
parents e039d79185c2
children 15c885db82a8
comparison
equal deleted inserted replaced
1949:66215baae7b9 1950:a3c60fa850dc
20 20
21 /** 21 /**
22 * @file motion_est_template.c 22 * @file motion_est_template.c
23 * Motion estimation template. 23 * Motion estimation template.
24 */ 24 */
25 //FIXME ref2_y next_pic? 25
26 //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...) 26 //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...)
27 //Note, the last line is there to kill these ugly unused var warnings
28 #define LOAD_COMMON\ 27 #define LOAD_COMMON\
29 uint32_t * const score_map= s->me.score_map;\ 28 uint32_t * const score_map= s->me.score_map;\
30 const int time_pp= s->pp_time;\
31 const int time_pb= s->pb_time;\
32 const int xmin= s->me.xmin;\ 29 const int xmin= s->me.xmin;\
33 const int ymin= s->me.ymin;\ 30 const int ymin= s->me.ymin;\
34 const int xmax= s->me.xmax;\ 31 const int xmax= s->me.xmax;\
35 const int ymax= s->me.ymax;\ 32 const int ymax= s->me.ymax;\
36 uint8_t * const src_y= src_data[0];\ 33 uint8_t *mv_penalty= s->me.current_mv_penalty;\
37 uint8_t * const src_u= src_data[1];\ 34 const int pred_x= s->me.pred_x;\
38 uint8_t * const src_v= src_data[2];\ 35 const int pred_y= s->me.pred_y;\
39 uint8_t * const ref_y= ref_data[0];\ 36
40 uint8_t * const ref_u= ref_data[1];\
41 uint8_t * const ref_v= ref_data[2];\
42 op_pixels_func (*hpel_put)[4];\
43 op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\
44 op_pixels_func (*chroma_hpel_put)[4];\
45 qpel_mc_func (*qpel_put)[16];\
46 qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\
47 const __attribute__((unused)) int unu= time_pp + time_pb + (size_t)src_u + (size_t)src_v + (size_t)ref_u + (size_t)ref_v\
48 + (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map\
49 + xmin + xmax + ymin + ymax;\
50 if(s->no_rounding /*FIXME b_type*/){\
51 hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\
52 chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\
53 qpel_put= &s->dsp.put_no_rnd_qpel_pixels_tab[size];\
54 }else{\
55 hpel_put=& s->dsp.put_pixels_tab[size];\
56 chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];\
57 qpel_put= &s->dsp.put_qpel_pixels_tab[size];\
58 }
59
60
61 #ifdef CMP_HPEL
62
63 #define CHECK_HALF_MV(dx, dy, x, y)\ 37 #define CHECK_HALF_MV(dx, dy, x, y)\
64 {\ 38 {\
65 const int hx= 2*(x)+(dx);\ 39 const int hx= 2*(x)+(dx);\
66 const int hy= 2*(y)+(dy);\ 40 const int hy= 2*(y)+(dy);\
67 CMP_HPEL(d, dx, dy, x, y, size);\ 41 d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);\
68 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ 42 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
69 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ 43 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
70 } 44 }
71 45
72 #if 0 46 #if 0
73 static int RENAME(hpel_motion_search)(MpegEncContext * s, 47 static int hpel_motion_search)(MpegEncContext * s,
74 int *mx_ptr, int *my_ptr, int dmin, 48 int *mx_ptr, int *my_ptr, int dmin,
75 int pred_x, int pred_y, uint8_t *ref_data[3], 49 uint8_t *ref_data[3],
76 int size, uint8_t * const mv_penalty) 50 int size)
77 { 51 {
78 const int xx = 16 * s->mb_x + 8*(n&1); 52 const int xx = 16 * s->mb_x + 8*(n&1);
79 const int yy = 16 * s->mb_y + 8*(n>>1); 53 const int yy = 16 * s->mb_y + 8*(n>>1);
80 const int mx = *mx_ptr; 54 const int mx = *mx_ptr;
81 const int my = *my_ptr; 55 const int my = *my_ptr;
92 chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1]; 66 chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];
93 }else{ 67 }else{
94 hpel_put=& s->dsp.put_pixels_tab[size]; 68 hpel_put=& s->dsp.put_pixels_tab[size];
95 chroma_hpel_put= &s->dsp.put_pixels_tab[size+1]; 69 chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];
96 } 70 }
97 cmp= s->dsp.me_cmp[size]; 71 cmpf= s->dsp.me_cmp[size];
98 chroma_cmp= s->dsp.me_cmp[size+1]; 72 chroma_cmpf= s->dsp.me_cmp[size+1];
99 cmp_sub= s->dsp.me_sub_cmp[size]; 73 cmp_sub= s->dsp.me_sub_cmp[size];
100 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; 74 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
101 75
102 if(s->me.skip){ //FIXME somehow move up (benchmark) 76 if(s->me.skip){ //FIXME somehow move up (benchmark)
103 *mx_ptr = 0; 77 *mx_ptr = 0;
136 110
137 return dmin; 111 return dmin;
138 } 112 }
139 113
140 #else 114 #else
141 static int RENAME(hpel_motion_search)(MpegEncContext * s, 115 static int hpel_motion_search(MpegEncContext * s,
142 int *mx_ptr, int *my_ptr, int dmin, 116 int *mx_ptr, int *my_ptr, int dmin,
143 int pred_x, int pred_y, uint8_t *src_data[3], 117 int src_index, int ref_index,
144 uint8_t *ref_data[3], int stride, int uvstride, 118 int size, int h)
145 int size, int h, uint8_t * const mv_penalty)
146 { 119 {
147 const int mx = *mx_ptr; 120 const int mx = *mx_ptr;
148 const int my = *my_ptr; 121 const int my = *my_ptr;
149 const int penalty_factor= s->me.sub_penalty_factor; 122 const int penalty_factor= s->me.sub_penalty_factor;
150 me_cmp_func cmp_sub, chroma_cmp_sub; 123 me_cmp_func cmp_sub, chroma_cmp_sub;
151 int bx=2*mx, by=2*my; 124 int bx=2*mx, by=2*my;
152 125
153 LOAD_COMMON 126 LOAD_COMMON
127 int flags= s->me.sub_flags;
154 128
155 //FIXME factorize 129 //FIXME factorize
156 130
157 cmp_sub= s->dsp.me_sub_cmp[size]; 131 cmp_sub= s->dsp.me_sub_cmp[size];
158 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; 132 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
162 *my_ptr = 0; 136 *my_ptr = 0;
163 return dmin; 137 return dmin;
164 } 138 }
165 139
166 if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ 140 if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
167 CMP_HPEL(dmin, 0, 0, mx, my, size); 141 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
168 if(mx || my || size>0) 142 if(mx || my || size>0)
169 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor; 143 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor;
170 } 144 }
171 145
172 if (mx > xmin && mx < xmax && 146 if (mx > xmin && mx < xmax &&
244 218
245 return dmin; 219 return dmin;
246 } 220 }
247 #endif 221 #endif
248 222
249 static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3], 223 static int inline get_mb_score(MpegEncContext * s, int mx, int my, int src_index,
250 uint8_t *ref_data[3], int stride, int uvstride, 224 int ref_index)
251 uint8_t * const mv_penalty)
252 { 225 {
253 // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp; 226 // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp;
254 const int size= 0; 227 const int size= 0;
255 const int h= 16; 228 const int h= 16;
256 const int penalty_factor= s->me.mb_penalty_factor; 229 const int penalty_factor= s->me.mb_penalty_factor;
230 const int flags= s->me.mb_flags;
231 const int qpel= flags & FLAG_QPEL;
232 const int mask= 1+2*qpel;
257 me_cmp_func cmp_sub, chroma_cmp_sub; 233 me_cmp_func cmp_sub, chroma_cmp_sub;
258 int d; 234 int d;
259 235
260 LOAD_COMMON 236 LOAD_COMMON
261 237
265 chroma_cmp_sub= s->dsp.mb_cmp[size+1]; 241 chroma_cmp_sub= s->dsp.mb_cmp[size+1];
266 242
267 assert(!s->me.skip); 243 assert(!s->me.skip);
268 assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp); 244 assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp);
269 245
270 CMP_HPEL(d, mx&1, my&1, mx>>1, my>>1, size); 246 d= cmp(s, mx>>(qpel+1), my>>(qpel+1), mx&mask, my&mask, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
271 //FIXME check cbp before adding penalty for (0,0) vector 247 //FIXME check cbp before adding penalty for (0,0) vector
272 if(mx || my || size>0) 248 if(mx || my || size>0)
273 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; 249 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
274 250
275 return d; 251 return d;
276 } 252 }
277
278 #endif /* CMP_HPEL */
279
280
281
282 #ifdef CMP_QPEL
283 253
284 #define CHECK_QUARTER_MV(dx, dy, x, y)\ 254 #define CHECK_QUARTER_MV(dx, dy, x, y)\
285 {\ 255 {\
286 const int hx= 4*(x)+(dx);\ 256 const int hx= 4*(x)+(dx);\
287 const int hy= 4*(y)+(dy);\ 257 const int hy= 4*(y)+(dy);\
288 CMP_QPEL(d, dx, dy, x, y, size);\ 258 d= cmp(s, x, y, dx, dy, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
289 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ 259 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\
290 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ 260 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\
291 } 261 }
292 262
293 static int RENAME(qpel_motion_search)(MpegEncContext * s, 263 static int qpel_motion_search(MpegEncContext * s,
294 int *mx_ptr, int *my_ptr, int dmin, 264 int *mx_ptr, int *my_ptr, int dmin,
295 int pred_x, int pred_y, uint8_t *src_data[3], 265 int src_index, int ref_index,
296 uint8_t *ref_data[3], int stride, int uvstride, 266 int size, int h)
297 int size, int h, uint8_t * const mv_penalty)
298 { 267 {
299 const int mx = *mx_ptr; 268 const int mx = *mx_ptr;
300 const int my = *my_ptr; 269 const int my = *my_ptr;
301 const int penalty_factor= s->me.sub_penalty_factor; 270 const int penalty_factor= s->me.sub_penalty_factor;
302 const int map_generation= s->me.map_generation; 271 const int map_generation= s->me.map_generation;
303 const int subpel_quality= s->avctx->me_subpel_quality; 272 const int subpel_quality= s->avctx->me_subpel_quality;
304 uint32_t *map= s->me.map; 273 uint32_t *map= s->me.map;
305 me_cmp_func cmp, chroma_cmp; 274 me_cmp_func cmpf, chroma_cmpf;
306 me_cmp_func cmp_sub, chroma_cmp_sub; 275 me_cmp_func cmp_sub, chroma_cmp_sub;
307 276
308 LOAD_COMMON 277 LOAD_COMMON
309 278 int flags= s->me.sub_flags;
310 cmp= s->dsp.me_cmp[size]; 279
311 chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME 280 cmpf= s->dsp.me_cmp[size];
281 chroma_cmpf= s->dsp.me_cmp[size+1]; //factorize FIXME
312 //FIXME factorize 282 //FIXME factorize
313 283
314 cmp_sub= s->dsp.me_sub_cmp[size]; 284 cmp_sub= s->dsp.me_sub_cmp[size];
315 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; 285 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1];
316 286
319 *my_ptr = 0; 289 *my_ptr = 0;
320 return dmin; 290 return dmin;
321 } 291 }
322 292
323 if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ 293 if(s->avctx->me_cmp != s->avctx->me_sub_cmp){
324 CMP_QPEL(dmin, 0, 0, mx, my, size); 294 dmin= cmp(s, mx, my, 0, 0, size, h, ref_index, src_index, cmp_sub, chroma_cmp_sub, flags);
325 if(mx || my || size>0) 295 if(mx || my || size>0)
326 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor; 296 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor;
327 } 297 }
328 298
329 if (mx > xmin && mx < xmax && 299 if (mx > xmin && mx < xmax &&
384 int cxy; 354 int cxy;
385 355
386 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME 356 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME
387 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; 357 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)];
388 }else{ 358 }else{
389 CMP(tl, mx-1, my-1, size); //FIXME wrong if chroma me is different 359 tl= cmp(s, mx-1, my-1, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);//FIXME wrong if chroma me is different
390 } 360 }
391 361
392 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c; 362 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c;
393 363
394 assert(16*cx2 + 4*cx + 32*c == 32*r); 364 assert(16*cx2 + 4*cx + 32*c == 32*r);
507 } 477 }
508 478
509 return dmin; 479 return dmin;
510 } 480 }
511 481
512 static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, uint8_t *src_data[3],
513 uint8_t *ref_data[3], int stride, int uvstride,
514 uint8_t * const mv_penalty)
515 {
516 const int size= 0;
517 const int h= 16;
518 const int penalty_factor= s->me.mb_penalty_factor;
519 me_cmp_func cmp_sub, chroma_cmp_sub;
520 int d;
521
522 LOAD_COMMON
523
524 //FIXME factorize
525
526 cmp_sub= s->dsp.mb_cmp[size];
527 chroma_cmp_sub= s->dsp.mb_cmp[size+1];
528
529 assert(!s->me.skip);
530 assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp);
531
532 CMP_QPEL(d, mx&3, my&3, mx>>2, my>>2, size);
533 //FIXME check cbp before adding penalty for (0,0) vector
534 if(mx || my || size>0)
535 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor;
536
537 return d;
538 }
539
540
541 #endif /* CMP_QPEL */
542 482
543 #define CHECK_MV(x,y)\ 483 #define CHECK_MV(x,y)\
544 {\ 484 {\
545 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ 485 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
546 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ 486 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
547 /*printf("check_mv %d %d\n", x, y);*/\ 487 /*printf("check_mv %d %d\n", x, y);*/\
548 if(map[index]!=key){\ 488 if(map[index]!=key){\
549 CMP(d, x, y, size);\ 489 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
550 map[index]= key;\ 490 map[index]= key;\
551 score_map[index]= d;\ 491 score_map[index]= d;\
552 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\ 492 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
553 /*printf("score:%d\n", d);*/\ 493 /*printf("score:%d\n", d);*/\
554 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ 494 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
568 {\ 508 {\
569 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ 509 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
570 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ 510 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
571 /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\ 511 /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\
572 if(map[index]!=key){\ 512 if(map[index]!=key){\
573 CMP(d, x, y, size);\ 513 d= cmp(s, x, y, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
574 map[index]= key;\ 514 map[index]= key;\
575 score_map[index]= d;\ 515 score_map[index]= d;\
576 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\ 516 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\
577 /*printf("score:%d\n", d);*/\ 517 /*printf("score:%d\n", d);*/\
578 if(d<dmin){\ 518 if(d<dmin){\
588 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\ 528 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
589 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\ 529 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
590 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\ 530 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
591 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\ 531 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
592 532
593 533 #define LOAD_COMMON2\
594 static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin, 534 uint32_t *map= s->me.map;\
595 uint8_t *src_data[3], 535 const int qpel= flags&FLAG_QPEL;\
596 uint8_t *ref_data[3], int stride, int uvstride, 536 const int shift= 1+qpel;\
597 int const pred_x, int const pred_y, int const penalty_factor, 537
598 int const shift, 538 static always_inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
599 uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty 539 int src_index, int ref_index, int const penalty_factor,
600 ) 540 int size, int h, int flags)
601 { 541 {
602 me_cmp_func cmp, chroma_cmp; 542 me_cmp_func cmpf, chroma_cmpf;
603 int next_dir=-1; 543 int next_dir=-1;
604 LOAD_COMMON 544 LOAD_COMMON
605 545 LOAD_COMMON2
606 cmp= s->dsp.me_cmp[size]; 546 int map_generation= s->me.map_generation;
607 chroma_cmp= s->dsp.me_cmp[size+1]; 547
548 cmpf= s->dsp.me_cmp[size];
549 chroma_cmpf= s->dsp.me_cmp[size+1];
608 550
609 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */ 551 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */
610 const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation; 552 const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation;
611 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1); 553 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1);
612 if(map[index]!=key){ //this will be executed only very rarey 554 if(map[index]!=key){ //this will be executed only very rarey
613 CMP(score_map[index], best[0], best[1], size); 555 score_map[index]= cmp(s, best[0], best[1], 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
614 map[index]= key; 556 map[index]= key;
615 } 557 }
616 } 558 }
617 559
618 for(;;){ 560 for(;;){
632 return dmin; 574 return dmin;
633 } 575 }
634 } 576 }
635 } 577 }
636 578
637 static inline int RENAME(funny_diamond_search)(MpegEncContext * s, int *best, int dmin, 579 static int funny_diamond_search(MpegEncContext * s, int *best, int dmin,
638 uint8_t *src_data[3], 580 int src_index, int ref_index, int const penalty_factor,
639 uint8_t *ref_data[3], int stride, int uvstride, 581 int size, int h, int flags)
640 int const pred_x, int const pred_y, int const penalty_factor, 582 {
641 int const shift, 583 me_cmp_func cmpf, chroma_cmpf;
642 uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
643 )
644 {
645 me_cmp_func cmp, chroma_cmp;
646 int dia_size; 584 int dia_size;
647 LOAD_COMMON 585 LOAD_COMMON
648 586 LOAD_COMMON2
649 cmp= s->dsp.me_cmp[size]; 587 int map_generation= s->me.map_generation;
650 chroma_cmp= s->dsp.me_cmp[size+1]; 588
589 cmpf= s->dsp.me_cmp[size];
590 chroma_cmpf= s->dsp.me_cmp[size+1];
651 591
652 for(dia_size=1; dia_size<=4; dia_size++){ 592 for(dia_size=1; dia_size<=4; dia_size++){
653 int dir; 593 int dir;
654 const int x= best[0]; 594 const int x= best[0];
655 const int y= best[1]; 595 const int y= best[1];
700 {\ 640 {\
701 const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\ 641 const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\
702 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\ 642 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\
703 /*printf("sab check %d %d\n", ax, ay);*/\ 643 /*printf("sab check %d %d\n", ax, ay);*/\
704 if(map[index]!=key){\ 644 if(map[index]!=key){\
705 CMP(d, ax, ay, size);\ 645 d= cmp(s, ax, ay, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);\
706 map[index]= key;\ 646 map[index]= key;\
707 score_map[index]= d;\ 647 score_map[index]= d;\
708 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\ 648 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\
709 /*printf("score: %d\n", d);*/\ 649 /*printf("score: %d\n", d);*/\
710 if(d < minima[minima_count-1].height){\ 650 if(d < minima[minima_count-1].height){\
724 }\ 664 }\
725 }\ 665 }\
726 } 666 }
727 667
728 #define MAX_SAB_SIZE 16 668 #define MAX_SAB_SIZE 16
729 static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int dmin, 669 static int sab_diamond_search(MpegEncContext * s, int *best, int dmin,
730 uint8_t *src_data[3], 670 int src_index, int ref_index, int const penalty_factor,
731 uint8_t *ref_data[3], int stride, int uvstride, 671 int size, int h, int flags)
732 int const pred_x, int const pred_y, int const penalty_factor, 672 {
733 int const shift, 673 me_cmp_func cmpf, chroma_cmpf;
734 uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
735 )
736 {
737 me_cmp_func cmp, chroma_cmp;
738 Minima minima[MAX_SAB_SIZE]; 674 Minima minima[MAX_SAB_SIZE];
739 const int minima_count= ABS(s->me.dia_size); 675 const int minima_count= ABS(s->me.dia_size);
740 int i, j; 676 int i, j;
741 LOAD_COMMON 677 LOAD_COMMON
742 678 LOAD_COMMON2
743 cmp= s->dsp.me_cmp[size]; 679 int map_generation= s->me.map_generation;
744 chroma_cmp= s->dsp.me_cmp[size+1]; 680
681 cmpf= s->dsp.me_cmp[size];
682 chroma_cmpf= s->dsp.me_cmp[size+1];
745 683
746 for(j=i=0; i<ME_MAP_SIZE; i++){ 684 for(j=i=0; i<ME_MAP_SIZE; i++){
747 uint32_t key= map[i]; 685 uint32_t key= map[i];
748 686
749 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1)); 687 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1));
805 CHECK_MV(best[0], best[1]+1) 743 CHECK_MV(best[0], best[1]+1)
806 } 744 }
807 return dmin; 745 return dmin;
808 } 746 }
809 747
810 static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin, 748 static int var_diamond_search(MpegEncContext * s, int *best, int dmin,
811 uint8_t *src_data[3], 749 int src_index, int ref_index, int const penalty_factor,
812 uint8_t *ref_data[3], int stride, int uvstride, 750 int size, int h, int flags)
813 int const pred_x, int const pred_y, int const penalty_factor, 751 {
814 int const shift, 752 me_cmp_func cmpf, chroma_cmpf;
815 uint32_t *map, int map_generation, int size, int h, uint8_t * const mv_penalty
816 )
817 {
818 me_cmp_func cmp, chroma_cmp;
819 int dia_size; 753 int dia_size;
820 LOAD_COMMON 754 LOAD_COMMON
821 755 LOAD_COMMON2
822 cmp= s->dsp.me_cmp[size]; 756 int map_generation= s->me.map_generation;
823 chroma_cmp= s->dsp.me_cmp[size+1]; 757
758 cmpf= s->dsp.me_cmp[size];
759 chroma_cmpf= s->dsp.me_cmp[size+1];
824 760
825 for(dia_size=1; dia_size<=s->me.dia_size; dia_size++){ 761 for(dia_size=1; dia_size<=s->me.dia_size; dia_size++){
826 int dir, start, end; 762 int dir, start, end;
827 const int x= best[0]; 763 const int x= best[0];
828 const int y= best[1]; 764 const int y= best[1];
883 #endif 819 #endif
884 } 820 }
885 return dmin; 821 return dmin;
886 } 822 }
887 823
888 static int RENAME(epzs_motion_search)(MpegEncContext * s, 824 static always_inline int diamond_search(MpegEncContext * s, int *best, int dmin,
889 int *mx_ptr, int *my_ptr, 825 int src_index, int ref_index, int const penalty_factor,
890 int P[10][2], int pred_x, int pred_y, uint8_t *src_data[3], 826 int size, int h, int flags){
891 uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], 827 if(s->me.dia_size==-1)
892 int ref_mv_scale, uint8_t * const mv_penalty) 828 return funny_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
829 else if(s->me.dia_size<-1)
830 return sab_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
831 else if(s->me.dia_size<2)
832 return small_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
833 else
834 return var_diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
835 }
836
837 static always_inline int epzs_motion_search_internal(MpegEncContext * s, int *mx_ptr, int *my_ptr,
838 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
839 int ref_mv_scale, int flags)
893 { 840 {
894 int best[2]={0, 0}; 841 int best[2]={0, 0};
895 int d, dmin; 842 int d, dmin;
896 const int shift= 1+s->quarter_sample;
897 uint32_t *map= s->me.map;
898 int map_generation; 843 int map_generation;
899 const int penalty_factor= s->me.penalty_factor; 844 const int penalty_factor= s->me.penalty_factor;
900 const int size=0; 845 const int size=0;
901 const int h=16; 846 const int h=16;
902 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME 847 const int ref_mv_stride= s->mb_stride; //pass as arg FIXME
903 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME 848 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; //add to last_mv beforepassing FIXME
904 me_cmp_func cmp, chroma_cmp; 849 me_cmp_func cmpf, chroma_cmpf;
850
905 LOAD_COMMON 851 LOAD_COMMON
906 852 LOAD_COMMON2
907 cmp= s->dsp.me_cmp[size]; 853
908 chroma_cmp= s->dsp.me_cmp[size+1]; 854 cmpf= s->dsp.me_cmp[size];
855 chroma_cmpf= s->dsp.me_cmp[size+1];
909 856
910 map_generation= update_map_generation(s); 857 map_generation= update_map_generation(s);
911 858
912 CMP(dmin, 0, 0, size); 859 dmin= cmp(s, 0, 0, 0, 0, size, h, ref_index, src_index, cmpf, chroma_cmpf, flags);
913 map[0]= map_generation; 860 map[0]= map_generation;
914 score_map[0]= dmin; 861 score_map[0]= dmin;
915 862
916 /* first line */ 863 /* first line */
917 if (s->first_slice_line) { 864 if (s->first_slice_line) {
972 } 919 }
973 } 920 }
974 } 921 }
975 922
976 //check(best[0],best[1],0, b0) 923 //check(best[0],best[1],0, b0)
977 if(s->me.dia_size==-1) 924 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
978 dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
979 pred_x, pred_y, penalty_factor,
980 shift, map, map_generation, size, h, mv_penalty);
981 else if(s->me.dia_size<-1)
982 dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
983 pred_x, pred_y, penalty_factor,
984 shift, map, map_generation, size, h, mv_penalty);
985 else if(s->me.dia_size<2)
986 dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
987 pred_x, pred_y, penalty_factor,
988 shift, map, map_generation, size, h, mv_penalty);
989 else
990 dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
991 pred_x, pred_y, penalty_factor,
992 shift, map, map_generation, size, h, mv_penalty);
993 925
994 //check(best[0],best[1],0, b1) 926 //check(best[0],best[1],0, b1)
995 *mx_ptr= best[0]; 927 *mx_ptr= best[0];
996 *my_ptr= best[1]; 928 *my_ptr= best[1];
997 929
998 // printf("%d %d %d \n", best[0], best[1], dmin); 930 // printf("%d %d %d \n", best[0], best[1], dmin);
999 return dmin; 931 return dmin;
1000 } 932 }
1001 933
1002 #ifndef CMP_DIRECT /* no 4mv search needed in direct mode */ 934 //this function is dedicated to the braindamaged gcc
1003 static int RENAME(epzs_motion_search4)(MpegEncContext * s, 935 static inline int epzs_motion_search(MpegEncContext * s, int *mx_ptr, int *my_ptr,
1004 int *mx_ptr, int *my_ptr, 936 int P[10][2], int src_index, int ref_index, int16_t (*last_mv)[2],
1005 int P[10][2], int pred_x, int pred_y, 937 int ref_mv_scale)
1006 uint8_t *src_data[3], 938 {
1007 uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2], 939 //FIXME convert other functions in the same way if faster
1008 int ref_mv_scale, uint8_t * const mv_penalty) 940 switch(s->me.flags){
941 case 0:
942 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, 0);
943 // case FLAG_QPEL:
944 // return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, FLAG_QPEL);
945 default:
946 return epzs_motion_search_internal(s, mx_ptr, my_ptr, P, src_index, ref_index, last_mv, ref_mv_scale, s->me.flags);
947 }
948 }
949
950 static int epzs_motion_search4(MpegEncContext * s,
951 int *mx_ptr, int *my_ptr, int P[10][2],
952 int src_index, int ref_index, int16_t (*last_mv)[2],
953 int ref_mv_scale)
1009 { 954 {
1010 int best[2]={0, 0}; 955 int best[2]={0, 0};
1011 int d, dmin; 956 int d, dmin;
1012 const int shift= 1+s->quarter_sample;
1013 uint32_t *map= s->me.map;
1014 int map_generation; 957 int map_generation;
1015 const int penalty_factor= s->me.penalty_factor; 958 const int penalty_factor= s->me.penalty_factor;
1016 const int size=1; 959 const int size=1;
1017 const int h=8; 960 const int h=8;
1018 const int ref_mv_stride= s->mb_stride; 961 const int ref_mv_stride= s->mb_stride;
1019 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; 962 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1020 me_cmp_func cmp, chroma_cmp; 963 me_cmp_func cmpf, chroma_cmpf;
1021 LOAD_COMMON 964 LOAD_COMMON
1022 965 int flags= s->me.flags;
1023 cmp= s->dsp.me_cmp[size]; 966 LOAD_COMMON2
1024 chroma_cmp= s->dsp.me_cmp[size+1]; 967
968 cmpf= s->dsp.me_cmp[size];
969 chroma_cmpf= s->dsp.me_cmp[size+1];
1025 970
1026 map_generation= update_map_generation(s); 971 map_generation= update_map_generation(s);
1027 972
1028 dmin = 1000000; 973 dmin = 1000000;
1029 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 974 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1051 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line 996 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1052 CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 997 CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1053 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) 998 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1054 } 999 }
1055 1000
1056 if(s->me.dia_size==-1) 1001 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1057 dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1058 pred_x, pred_y, penalty_factor,
1059 shift, map, map_generation, size, h, mv_penalty);
1060 else if(s->me.dia_size<-1)
1061 dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1062 pred_x, pred_y, penalty_factor,
1063 shift, map, map_generation, size, h, mv_penalty);
1064 else if(s->me.dia_size<2)
1065 dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1066 pred_x, pred_y, penalty_factor,
1067 shift, map, map_generation, size, h, mv_penalty);
1068 else
1069 dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1070 pred_x, pred_y, penalty_factor,
1071 shift, map, map_generation, size, h, mv_penalty);
1072
1073 1002
1074 *mx_ptr= best[0]; 1003 *mx_ptr= best[0];
1075 *my_ptr= best[1]; 1004 *my_ptr= best[1];
1076 1005
1077 // printf("%d %d %d \n", best[0], best[1], dmin); 1006 // printf("%d %d %d \n", best[0], best[1], dmin);
1078 return dmin; 1007 return dmin;
1079 } 1008 }
1080 1009
1081 //try to merge with above FIXME (needs PSNR test) 1010 //try to merge with above FIXME (needs PSNR test)
1082 static int RENAME(epzs_motion_search2)(MpegEncContext * s, 1011 static int epzs_motion_search2(MpegEncContext * s,
1083 int *mx_ptr, int *my_ptr, 1012 int *mx_ptr, int *my_ptr, int P[10][2],
1084 int P[10][2], int pred_x, int pred_y, 1013 int src_index, int ref_index, int16_t (*last_mv)[2],
1085 uint8_t *src_data[3], 1014 int ref_mv_scale)
1086 uint8_t *ref_data[3], int stride, int uvstride, int16_t (*last_mv)[2],
1087 int ref_mv_scale, uint8_t * const mv_penalty)
1088 { 1015 {
1089 int best[2]={0, 0}; 1016 int best[2]={0, 0};
1090 int d, dmin; 1017 int d, dmin;
1091 const int shift= 1+s->quarter_sample;
1092 uint32_t *map= s->me.map;
1093 int map_generation; 1018 int map_generation;
1094 const int penalty_factor= s->me.penalty_factor; 1019 const int penalty_factor= s->me.penalty_factor;
1095 const int size=0; //FIXME pass as arg 1020 const int size=0; //FIXME pass as arg
1096 const int h=8; 1021 const int h=8;
1097 const int ref_mv_stride= s->mb_stride; 1022 const int ref_mv_stride= s->mb_stride;
1098 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; 1023 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride;
1099 me_cmp_func cmp, chroma_cmp; 1024 me_cmp_func cmpf, chroma_cmpf;
1100 LOAD_COMMON 1025 LOAD_COMMON
1101 1026 int flags= s->me.flags;
1102 cmp= s->dsp.me_cmp[size]; 1027 LOAD_COMMON2
1103 chroma_cmp= s->dsp.me_cmp[size+1]; 1028
1029 cmpf= s->dsp.me_cmp[size];
1030 chroma_cmpf= s->dsp.me_cmp[size+1];
1104 1031
1105 map_generation= update_map_generation(s); 1032 map_generation= update_map_generation(s);
1106 1033
1107 dmin = 1000000; 1034 dmin = 1000000;
1108 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); 1035 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
1130 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line 1057 if(s->mb_y+1<s->end_mb_y) //FIXME replace at least with last_slice_line
1131 CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, 1058 CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16,
1132 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) 1059 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16)
1133 } 1060 }
1134 1061
1135 if(s->me.dia_size==-1) 1062 dmin= diamond_search(s, best, dmin, src_index, ref_index, penalty_factor, size, h, flags);
1136 dmin= RENAME(funny_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1137 pred_x, pred_y, penalty_factor,
1138 shift, map, map_generation, size, h, mv_penalty);
1139 else if(s->me.dia_size<-1)
1140 dmin= RENAME(sab_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1141 pred_x, pred_y, penalty_factor,
1142 shift, map, map_generation, size, h, mv_penalty);
1143 else if(s->me.dia_size<2)
1144 dmin= RENAME(small_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1145 pred_x, pred_y, penalty_factor,
1146 shift, map, map_generation, size, h, mv_penalty);
1147 else
1148 dmin= RENAME(var_diamond_search)(s, best, dmin, src_data, ref_data, stride, uvstride,
1149 pred_x, pred_y, penalty_factor,
1150 shift, map, map_generation, size, h, mv_penalty);
1151
1152 1063
1153 *mx_ptr= best[0]; 1064 *mx_ptr= best[0];
1154 *my_ptr= best[1]; 1065 *my_ptr= best[1];
1155 1066
1156 // printf("%d %d %d \n", best[0], best[1], dmin); 1067 // printf("%d %d %d \n", best[0], best[1], dmin);
1157 return dmin; 1068 return dmin;
1158 } 1069 }
1159 #endif /* !CMP_DIRECT */