comparison motion_est.c @ 936:caa77cd960c0 libavcodec

qpel encoding 4mv+b frames encoding finally fixed chroma ME 5 comparission functions for ME b frame encoding speedup wmv2 codec (unfinished) user specified diamond size for EPZS
author michaelni
date Fri, 27 Dec 2002 23:51:46 +0000
parents 51f3b644ae30
children 371bc36a9c5c
comparison
equal deleted inserted replaced
935:c9bbd35064b6 936:caa77cd960c0
24 #include <stdio.h> 24 #include <stdio.h>
25 #include "avcodec.h" 25 #include "avcodec.h"
26 #include "dsputil.h" 26 #include "dsputil.h"
27 #include "mpegvideo.h" 27 #include "mpegvideo.h"
28 28
29 //#undef NDEBUG
30 //#include <assert.h>
31
29 #define SQ(a) ((a)*(a)) 32 #define SQ(a) ((a)*(a))
30 #define INTER_BIAS 257
31 33
32 #define P_LAST P[0] 34 #define P_LAST P[0]
33 #define P_LEFT P[1] 35 #define P_LEFT P[1]
34 #define P_TOP P[2] 36 #define P_TOP P[2]
35 #define P_TOPRIGHT P[3] 37 #define P_TOPRIGHT P[3]
38 #define P_LAST_RIGHT P[6] 40 #define P_LAST_RIGHT P[6]
39 #define P_LAST_TOP P[7] 41 #define P_LAST_TOP P[7]
40 #define P_LAST_BOTTOM P[8] 42 #define P_LAST_BOTTOM P[8]
41 #define P_MV1 P[9] 43 #define P_MV1 P[9]
42 44
43 45 static inline int sad_hpel_motion_search(MpegEncContext * s,
46 int *mx_ptr, int *my_ptr, int dmin,
47 int xmin, int ymin, int xmax, int ymax,
48 int pred_x, int pred_y, Picture *picture,
49 int n, int size, uint16_t * const mv_penalty);
50
51 static inline int update_map_generation(MpegEncContext * s)
52 {
53 s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2);
54 if(s->me.map_generation==0){
55 s->me.map_generation= 1<<(ME_MAP_MV_BITS*2);
56 memset(s->me.map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
57 }
58 return s->me.map_generation;
59 }
60
61
62
63 /* SIMPLE */
64 #define RENAME(a) simple_ ## a
65
66 #define CMP(d, x, y, size)\
67 d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);
68
69 #define CMP_HPEL(d, dx, dy, x, y, size)\
70 {\
71 const int dxy= (dx) + 2*(dy);\
72 hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
73 d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
74 }
75
76 #define CMP_QPEL(d, dx, dy, x, y, size)\
77 {\
78 const int dxy= (dx) + 4*(dy);\
79 qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
80 d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
81 }
82
83 #include "motion_est_template.c"
84 #undef RENAME
85 #undef CMP
86 #undef CMP_HPEL
87 #undef CMP_QPEL
88 #undef INIT
89
90 /* SIMPLE CHROMA */
91 #define RENAME(a) simple_chroma_ ## a
92
93 #define CMP(d, x, y, size)\
94 d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);\
95 if(chroma_cmp){\
96 int dxy= ((x)&1) + 2*((y)&1);\
97 int c= ((x)>>1) + ((y)>>1)*uvstride;\
98 \
99 chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
100 d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride);\
101 chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
102 d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride);\
103 }
104
105 #define CMP_HPEL(d, dx, dy, x, y, size)\
106 {\
107 const int dxy= (dx) + 2*(dy);\
108 hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\
109 d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
110 if(chroma_cmp_sub){\
111 int cxy= (dxy) | ((x)&1) | (2*((y)&1));\
112 int c= ((x)>>1) + ((y)>>1)*uvstride;\
113 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
114 d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
115 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
116 d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
117 }\
118 }
119
120 #define CMP_QPEL(d, dx, dy, x, y, size)\
121 {\
122 const int dxy= (dx) + 4*(dy);\
123 qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
124 d = cmp_sub(s, s->me.scratchpad, src_y, stride);\
125 if(chroma_cmp_sub){\
126 int cxy, c;\
127 int cx= (4*(x) + (dx))/2;\
128 int cy= (4*(y) + (dy))/2;\
129 cx= (cx>>1)|(cx&1);\
130 cy= (cy>>1)|(cy&1);\
131 cxy= (cx&1) + 2*(cy&1);\
132 c= ((cx)>>1) + ((cy)>>1)*uvstride;\
133 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\
134 d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\
135 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\
136 d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\
137 }\
138 }
139
140 #include "motion_est_template.c"
141 #undef RENAME
142 #undef CMP
143 #undef CMP_HPEL
144 #undef CMP_QPEL
145 #undef INIT
146
147 /* SIMPLE DIRECT HPEL */
148 #define RENAME(a) simple_direct_hpel_ ## a
149 //FIXME precalc divisions stuff
150
151 #define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
152 if((x) >= xmin && 2*(x) + (dx) <= 2*xmax && (y) >= ymin && 2*(y) + (dy) <= 2*ymax){\
153 const int hx= 2*(x) + (dx);\
154 const int hy= 2*(y) + (dy);\
155 if(s->mv_type==MV_TYPE_8X8){\
156 int i;\
157 for(i=0; i<4; i++){\
158 int fx = s->me.direct_basis_mv[i][0] + hx;\
159 int fy = s->me.direct_basis_mv[i][1] + hy;\
160 int bx = hx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
161 int by = hy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
162 int fxy= (fx&1) + 2*(fy&1);\
163 int bxy= (bx&1) + 2*(by&1);\
164 \
165 uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
166 hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\
167 hpel_avg[1][bxy](dst, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 8);\
168 }\
169 }else{\
170 int fx = s->me.direct_basis_mv[0][0] + hx;\
171 int fy = s->me.direct_basis_mv[0][1] + hy;\
172 int bx = hx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
173 int by = hy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
174 int fxy= (fx&1) + 2*(fy&1);\
175 int bxy= (bx&1) + 2*(by&1);\
176 \
177 hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\
178 hpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 16);\
179 }\
180 d = cmp_func(s, s->me.scratchpad, src_y, stride);\
181 }else\
182 d= 256*256*256*32;
183
184
185 #define CMP_HPEL(d, dx, dy, x, y, size)\
186 CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
187
188 #define CMP(d, x, y, size)\
189 CMP_DIRECT(d, 0, 0, x, y, size, cmp)
190
191 #include "motion_est_template.c"
192 #undef RENAME
193 #undef CMP
194 #undef CMP_HPEL
195 #undef CMP_QPEL
196 #undef INIT
197 #undef CMP_DIRECT
198
199 /* SIMPLE DIRECT QPEL */
200 #define RENAME(a) simple_direct_qpel_ ## a
201
202 #define CMP_DIRECT(d, dx, dy, x, y, size, cmp_func)\
203 if((x) >= xmin && 4*(x) + (dx) <= 4*xmax && (y) >= ymin && 4*(y) + (dy) <= 4*ymax){\
204 const int qx= 4*(x) + (dx);\
205 const int qy= 4*(y) + (dy);\
206 if(s->mv_type==MV_TYPE_8X8){\
207 int i;\
208 for(i=0; i<4; i++){\
209 int fx = s->me.direct_basis_mv[i][0] + qx;\
210 int fy = s->me.direct_basis_mv[i][1] + qy;\
211 int bx = qx ? fx - s->me.co_located_mv[i][0] : s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + (i &1)*16;\
212 int by = qy ? fy - s->me.co_located_mv[i][1] : s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + (i>>1)*16;\
213 int fxy= (fx&3) + 4*(fy&3);\
214 int bxy= (bx&3) + 4*(by&3);\
215 \
216 uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
217 qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
218 qpel_avg[1][bxy](dst, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
219 }\
220 }else{\
221 int fx = s->me.direct_basis_mv[0][0] + qx;\
222 int fy = s->me.direct_basis_mv[0][1] + qy;\
223 int bx = qx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
224 int by = qy ? fy - s->me.co_located_mv[0][1] : s->me.co_located_mv[0][1]*(time_pb - time_pp)/time_pp;\
225 int fxy= (fx&3) + 4*(fy&3);\
226 int bxy= (bx&3) + 4*(by&3);\
227 \
228 qpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
229 qpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\
230 }\
231 d = cmp_func(s, s->me.scratchpad, src_y, stride);\
232 }else\
233 d= 256*256*256*32;
234
235
236 #define CMP_QPEL(d, dx, dy, x, y, size)\
237 CMP_DIRECT(d, dx, dy, x, y, size, cmp_sub)
238
239 #define CMP(d, x, y, size)\
240 CMP_DIRECT(d, 0, 0, x, y, size, cmp)
241
242 #include "motion_est_template.c"
243 #undef RENAME
244 #undef CMP
245 #undef CMP_HPEL
246 #undef CMP_QPEL
247 #undef INIT
248 #undef CMP__DIRECT
249
250
251 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride){
252 return 0;
253 }
254
255 static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
256 DSPContext* c= &s->dsp;
257 int i;
258
259 memset(cmp, 0, sizeof(void*)*11);
260
261 switch(type&0xFF){
262 case FF_CMP_SAD:
263 cmp[0]= c->sad[0];
264 cmp[1]= c->sad[1];
265 break;
266 case FF_CMP_SATD:
267 cmp[0]= c->hadamard8_diff[0];
268 cmp[1]= c->hadamard8_diff[1];
269 break;
270 case FF_CMP_SSE:
271 cmp[0]= c->sse[0];
272 cmp[1]= c->sse[1];
273 break;
274 case FF_CMP_DCT:
275 cmp[0]= c->dct_sad[0];
276 cmp[1]= c->dct_sad[1];
277 break;
278 case FF_CMP_PSNR:
279 cmp[0]= c->quant_psnr[0];
280 cmp[1]= c->quant_psnr[1];
281 break;
282 case FF_CMP_ZERO:
283 for(i=0; i<7; i++){
284 cmp[i]= zero_cmp;
285 }
286 break;
287 default:
288 fprintf(stderr,"internal error in cmp function selection\n");
289 }
290 };
291
292 static inline int get_penalty_factor(MpegEncContext *s, int type){
293
294 switch(type){
295 default:
296 case FF_CMP_SAD:
297 return s->qscale;
298 case FF_CMP_SSE:
299 // return s->qscale*8;
300 case FF_CMP_DCT:
301 case FF_CMP_SATD:
302 return s->qscale*8;
303 }
304 }
305
306 void ff_init_me(MpegEncContext *s){
307 set_cmp(s, s->dsp.me_cmp, s->avctx->me_cmp);
308 set_cmp(s, s->dsp.me_sub_cmp, s->avctx->me_sub_cmp);
309 set_cmp(s, s->dsp.mb_cmp, s->avctx->mb_cmp);
310
311 if(s->flags&CODEC_FLAG_QPEL){
312 if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
313 s->me.sub_motion_search= simple_chroma_qpel_motion_search;
314 else
315 s->me.sub_motion_search= simple_qpel_motion_search;
316 }else{
317 if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
318 s->me.sub_motion_search= simple_chroma_hpel_motion_search;
319 else if(s->avctx->me_sub_cmp == FF_CMP_SAD && s->avctx->me_cmp == FF_CMP_SAD)
320 s->me.sub_motion_search= sad_hpel_motion_search;
321 else
322 s->me.sub_motion_search= simple_hpel_motion_search;
323 }
324
325 if(s->avctx->me_cmp&FF_CMP_CHROMA){
326 s->me.motion_search[0]= simple_chroma_epzs_motion_search;
327 s->me.motion_search[1]= simple_chroma_epzs_motion_search4;
328 }else{
329 s->me.motion_search[0]= simple_epzs_motion_search;
330 s->me.motion_search[1]= simple_epzs_motion_search4;
331 }
332 }
333
44 static int pix_dev(UINT8 * pix, int line_size, int mean) 334 static int pix_dev(UINT8 * pix, int line_size, int mean)
45 { 335 {
46 int s, i, j; 336 int s, i, j;
47 337
48 s = 0; 338 s = 0;
292 } 582 }
293 583
294 584
295 #define Z_THRESHOLD 256 585 #define Z_THRESHOLD 256
296 586
297 #define CHECK_MV(x,y)\ 587 #define CHECK_SAD_HALF_MV(suffix, x, y) \
298 {\
299 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
300 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
301 if(map[index]!=key){\
302 d = s->dsp.pix_abs16x16(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
303 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
304 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
305 map[index]= key;\
306 score_map[index]= d;\
307 }\
308 }
309
310 #define CHECK_MV_DIR(x,y,new_dir)\
311 {\
312 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
313 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
314 if(map[index]!=key){\
315 d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
316 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
317 if(d<dmin){\
318 best[0]=x;\
319 best[1]=y;\
320 dmin=d;\
321 next_dir= new_dir;\
322 }\
323 map[index]= key;\
324 score_map[index]= d;\
325 }\
326 }
327
328 #define CHECK_MV4(x,y)\
329 {\
330 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\
331 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\
332 if(map[index]!=key){\
333 d = s->dsp.pix_abs8x8(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);\
334 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;\
335 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\
336 map[index]= key;\
337 score_map[index]= d;\
338 }\
339 }
340
341 #define check(x,y,S,v)\
342 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\
343 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\
344 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\
345 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\
346
347
348 static inline int small_diamond_search(MpegEncContext * s, int *best, int dmin,
349 UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
350 int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
351 int xmin, int ymin, int xmax, int ymax, int shift,
352 uint32_t *map, uint16_t *score_map, int map_generation,
353 op_pixels_abs_func pix_abs)
354 {
355 int next_dir=-1;
356
357 for(;;){
358 int d;
359 const int dir= next_dir;
360 const int x= best[0];
361 const int y= best[1];
362 next_dir=-1;
363
364 //printf("%d", dir);
365 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0)
366 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1)
367 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2)
368 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3)
369
370 if(next_dir==-1){
371 return dmin;
372 }
373 }
374
375 /* for(;;){
376 int d;
377 const int x= best[0];
378 const int y= best[1];
379 const int last_min=dmin;
380 if(x>xmin) CHECK_MV(x-1, y )
381 if(y>xmin) CHECK_MV(x , y-1)
382 if(x<xmax) CHECK_MV(x+1, y )
383 if(y<xmax) CHECK_MV(x , y+1)
384 if(x>xmin && y>ymin) CHECK_MV(x-1, y-1)
385 if(x>xmin && y<ymax) CHECK_MV(x-1, y+1)
386 if(x<xmax && y>ymin) CHECK_MV(x+1, y-1)
387 if(x<xmax && y<ymax) CHECK_MV(x+1, y+1)
388 if(x-1>xmin) CHECK_MV(x-2, y )
389 if(y-1>xmin) CHECK_MV(x , y-2)
390 if(x+1<xmax) CHECK_MV(x+2, y )
391 if(y+1<xmax) CHECK_MV(x , y+2)
392 if(x-1>xmin && y-1>ymin) CHECK_MV(x-2, y-2)
393 if(x-1>xmin && y+1<ymax) CHECK_MV(x-2, y+2)
394 if(x+1<xmax && y-1>ymin) CHECK_MV(x+2, y-2)
395 if(x+1<xmax && y+1<ymax) CHECK_MV(x+2, y+2)
396 if(dmin==last_min) return dmin;
397 }
398 */
399 }
400
401 #if 1
402 #define SNAKE_1 3
403 #define SNAKE_2 2
404 #else
405 #define SNAKE_1 7
406 #define SNAKE_2 3
407 #endif
408 static inline int snake_search(MpegEncContext * s, int *best, int dmin,
409 UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
410 int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
411 int xmin, int ymin, int xmax, int ymax, int shift,
412 uint32_t *map, uint16_t *score_map,int map_generation,
413 op_pixels_abs_func pix_abs)
414 {
415 int dir=0;
416 int c=1;
417 static int x_dir[8]= {1,1,0,-1,-1,-1, 0, 1};
418 static int y_dir[8]= {0,1,1, 1, 0,-1,-1,-1};
419 int fails=0;
420 int last_d[2]={dmin, dmin};
421
422 /*static int good=0;
423 static int bad=0;
424 static int point=0;
425
426 point++;
427 if(256*256*256*64%point==0)
428 {
429 printf("%d %d %d\n", good, bad, point);
430 }*/
431
432 for(;;){
433 int x= best[0];
434 int y= best[1];
435 int d;
436 x+=x_dir[dir];
437 y+=y_dir[dir];
438 if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
439 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;
440 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);
441 if(map[index]!=key){
442 d = pix_abs(new_pic, old_pic + (x) + (y)*pic_stride, pic_stride);
443 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*quant;
444 map[index]=key;
445 score_map[index]=d;
446 }else
447 d= dmin+1;
448 }else{
449 d = dmin + 10000; //FIXME smarter boundary handling
450 }
451 if(d<dmin){
452 best[0]=x;
453 best[1]=y;
454 dmin=d;
455
456 if(last_d[1] - last_d[0] > last_d[0] - d) c= -c;
457 dir+=c;
458
459 fails=0;
460 //good++;
461 last_d[1]=last_d[0];
462 last_d[0]=d;
463 }else{
464 //bad++;
465 if(fails){
466 if(fails>=SNAKE_1+1) return dmin;
467 }else{
468 if(dir&1) dir-= c*3;
469 else c= -c;
470 // c= -c;
471 }
472 dir+=c*SNAKE_2;
473 fails++;
474 }
475 dir&=7;
476 }
477 }
478
479 static inline int cross_search(MpegEncContext * s, int *best, int dmin,
480 UINT8 *new_pic, UINT8 *old_pic, int pic_stride,
481 int pred_x, int pred_y, UINT16 *mv_penalty, int quant,
482 int xmin, int ymin, int xmax, int ymax, int shift,
483 uint32_t *map, uint16_t *score_map,int map_generation,
484 op_pixels_abs_func pix_abs)
485 {
486 static int x_dir[4]= {-1, 0, 1, 0};
487 static int y_dir[4]= { 0,-1, 0, 1};
488 int improvement[2]={100000, 100000};
489 int dirs[2]={2, 3};
490 int dir;
491 int last_dir= -1;
492
493 for(;;){
494 dir= dirs[ improvement[0] > improvement[1] ? 0 : 1 ];
495 if(improvement[dir&1]==-1) return dmin;
496
497 {
498 const int x= best[0] + x_dir[dir];
499 const int y= best[1] + y_dir[dir];
500 const int key= (y<<ME_MAP_MV_BITS) + x + map_generation;
501 const int index= ((y<<ME_MAP_SHIFT) + x)&(ME_MAP_SIZE-1);
502 int d;
503 if(x>=xmin && x<=xmax && y>=ymin && y<=ymax){
504 if(map[index]!=key){
505 d = pix_abs(new_pic, old_pic + x + y*pic_stride, pic_stride);
506 d += (mv_penalty[(x<<shift)-pred_x] + mv_penalty[(y<<shift)-pred_y])*quant;
507 map[index]=key;
508 score_map[index]=d;
509 if(d<dmin){
510 improvement[dir&1]= dmin-d;
511 improvement[(dir&1)^1]++;
512 dmin=d;
513 best[0]= x;
514 best[1]= y;
515 last_dir=dir;
516 continue;
517 }
518 }else{
519 d= score_map[index];
520 }
521 }else{
522 d= dmin + 1000; //FIXME is this a good idea?
523 }
524 /* evaluated point was cached or checked and worse */
525
526 if(last_dir==dir){
527 improvement[dir&1]= -1;
528 }else{
529 improvement[dir&1]= d-dmin;
530 last_dir= dirs[dir&1]= dir^2;
531 }
532 }
533 }
534 }
535
536 static inline int update_map_generation(MpegEncContext * s)
537 {
538 s->me_map_generation+= 1<<(ME_MAP_MV_BITS*2);
539 if(s->me_map_generation==0){
540 s->me_map_generation= 1<<(ME_MAP_MV_BITS*2);
541 memset(s->me_map, 0, sizeof(uint32_t)*ME_MAP_SIZE);
542 }
543 return s->me_map_generation;
544 }
545
546 static int epzs_motion_search(MpegEncContext * s,
547 int *mx_ptr, int *my_ptr,
548 int P[10][2], int pred_x, int pred_y,
549 int xmin, int ymin, int xmax, int ymax, uint8_t * ref_picture)
550 {
551 int best[2]={0, 0};
552 int d, dmin;
553 UINT8 *new_pic, *old_pic;
554 const int pic_stride= s->linesize;
555 const int pic_xy= (s->mb_y*pic_stride + s->mb_x)*16;
556 UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
557 int quant= s->qscale; // qscale of the prev frame
558 const int shift= 1+s->quarter_sample;
559 uint32_t *map= s->me_map;
560 uint16_t *score_map= s->me_score_map;
561 int map_generation;
562
563 new_pic = s->new_picture.data[0] + pic_xy;
564 old_pic = ref_picture + pic_xy;
565
566 map_generation= update_map_generation(s);
567
568 dmin = s->dsp.pix_abs16x16(new_pic, old_pic, pic_stride);
569 map[0]= map_generation;
570 score_map[0]= dmin;
571
572 /* first line */
573 if ((s->mb_y == 0 || s->first_slice_line)) {
574 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
575 CHECK_MV(P_LAST[0]>>shift, P_LAST[1]>>shift)
576 }else{
577 if(dmin<256 && ( P_LEFT[0] |P_LEFT[1]
578 |P_TOP[0] |P_TOP[1]
579 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){
580 *mx_ptr= 0;
581 *my_ptr= 0;
582 s->skip_me=1;
583 return dmin;
584 }
585 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
586 if(dmin>256*2){
587 CHECK_MV(P_LAST[0] >>shift, P_LAST[1] >>shift)
588 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift)
589 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift)
590 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
591 }
592 }
593 if(dmin>256*4){
594 CHECK_MV(P_LAST_RIGHT[0] >>shift, P_LAST_RIGHT[1] >>shift)
595 CHECK_MV(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
596 }
597 #if 0 //doest only slow things down
598 if(dmin>512*3){
599 int step;
600 dmin= score_map[0];
601 best[0]= best[1]=0;
602 for(step=128; step>0; step>>=1){
603 const int step2= step;
604 int y;
605 for(y=-step2+best[1]; y<=step2+best[1]; y+=step){
606 int x;
607 if(y<ymin || y>ymax) continue;
608
609 for(x=-step2+best[0]; x<=step2+best[0]; x+=step){
610 if(x<xmin || x>xmax) continue;
611 if(x==best[0] && y==best[1]) continue;
612 CHECK_MV(x,y)
613 }
614 }
615 }
616 }
617 #endif
618 //check(best[0],best[1],0, b0)
619 if(s->me_method==ME_EPZS)
620 dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride,
621 pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
622 shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
623 else
624 dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride,
625 pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
626 shift, map, score_map, map_generation, s->dsp.pix_abs16x16);
627 //check(best[0],best[1],0, b1)
628 *mx_ptr= best[0];
629 *my_ptr= best[1];
630
631 // printf("%d %d %d \n", best[0], best[1], dmin);
632 return dmin;
633 }
634
635 static int epzs_motion_search4(MpegEncContext * s, int block,
636 int *mx_ptr, int *my_ptr,
637 int P[10][2], int pred_x, int pred_y,
638 int xmin, int ymin, int xmax, int ymax, uint8_t *ref_picture)
639 {
640 int best[2]={0, 0};
641 int d, dmin;
642 UINT8 *new_pic, *old_pic;
643 const int pic_stride= s->linesize;
644 const int pic_xy= ((s->mb_y*2 + (block>>1))*pic_stride + s->mb_x*2 + (block&1))*8;
645 UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
646 int quant= s->qscale; // qscale of the prev frame
647 const int shift= 1+s->quarter_sample;
648 uint32_t *map= s->me_map;
649 uint16_t *score_map= s->me_score_map;
650 int map_generation;
651
652 new_pic = s->new_picture.data[0] + pic_xy;
653 old_pic = ref_picture + pic_xy;
654
655 map_generation= update_map_generation(s);
656
657 dmin = 1000000;
658 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax);
659 /* first line */
660 if ((s->mb_y == 0 || s->first_slice_line) && block<2) {
661 CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
662 CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
663 CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
664 }else{
665 CHECK_MV4(P_MV1[0]>>shift, P_MV1[1]>>shift)
666 //FIXME try some early stop
667 if(dmin>64*2){
668 CHECK_MV4(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift)
669 CHECK_MV4(P_LEFT[0]>>shift, P_LEFT[1]>>shift)
670 CHECK_MV4(P_TOP[0]>>shift, P_TOP[1]>>shift)
671 CHECK_MV4(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift)
672 CHECK_MV4(P_LAST[0]>>shift, P_LAST[1]>>shift)
673 }
674 }
675 if(dmin>64*4){
676 CHECK_MV4(P_LAST_RIGHT[0]>>shift, P_LAST_RIGHT[1]>>shift)
677 CHECK_MV4(P_LAST_BOTTOM[0]>>shift, P_LAST_BOTTOM[1]>>shift)
678 }
679
680 if(s->me_method==ME_EPZS)
681 dmin= small_diamond_search(s, best, dmin, new_pic, old_pic, pic_stride,
682 pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
683 shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
684 else
685 dmin= cross_search(s, best, dmin, new_pic, old_pic, pic_stride,
686 pred_x, pred_y, mv_penalty, quant, xmin, ymin, xmax, ymax,
687 shift, map, score_map, map_generation, s->dsp.pix_abs8x8);
688
689 *mx_ptr= best[0];
690 *my_ptr= best[1];
691
692 // printf("%d %d %d \n", best[0], best[1], dmin);
693 return dmin;
694 }
695
696 #define CHECK_HALF_MV(suffix, x, y) \
697 {\ 588 {\
698 d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\ 589 d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\
699 d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*quant;\ 590 d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
700 COPY3_IF_LT(dminh, d, dx, x, dy, y)\ 591 COPY3_IF_LT(dminh, d, dx, x, dy, y)\
701 } 592 }
702 593
703 594 static inline int sad_hpel_motion_search(MpegEncContext * s,
704 /* The idea would be to make half pel ME after Inter/Intra decision to
705 save time. */
706 static inline int halfpel_motion_search(MpegEncContext * s,
707 int *mx_ptr, int *my_ptr, int dmin, 595 int *mx_ptr, int *my_ptr, int dmin,
708 int xmin, int ymin, int xmax, int ymax, 596 int xmin, int ymin, int xmax, int ymax,
709 int pred_x, int pred_y, uint8_t *ref_picture, 597 int pred_x, int pred_y, Picture *picture,
710 op_pixels_abs_func pix_abs_x2, 598 int n, int size, uint16_t * const mv_penalty)
711 op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n) 599 {
712 { 600 uint8_t *ref_picture= picture->data[0];
713 UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame 601 uint32_t *score_map= s->me.score_map;
714 const int quant= s->qscale; 602 const int penalty_factor= s->me.sub_penalty_factor;
715 int mx, my, xx, yy, dminh; 603 int mx, my, xx, yy, dminh;
716 UINT8 *pix, *ptr; 604 UINT8 *pix, *ptr;
717 605 op_pixels_abs_func pix_abs_x2;
718 if(s->skip_me){ 606 op_pixels_abs_func pix_abs_y2;
719 *mx_ptr = 0; 607 op_pixels_abs_func pix_abs_xy2;
720 *my_ptr = 0; 608
721 return dmin; 609 if(size==0){
722 } 610 pix_abs_x2 = s->dsp.pix_abs16x16_x2;
723 611 pix_abs_y2 = s->dsp.pix_abs16x16_y2;
724 xx = 16 * s->mb_x + 8*(n&1); 612 pix_abs_xy2= s->dsp.pix_abs16x16_xy2;
725 yy = 16 * s->mb_y + 8*(n>>1);
726 pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
727
728 mx = *mx_ptr;
729 my = *my_ptr;
730 ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx);
731
732 dminh = dmin;
733
734 if (mx > xmin && mx < xmax &&
735 my > ymin && my < ymax) {
736 int dx=0, dy=0;
737 int d, pen_x, pen_y;
738
739 mx<<=1;
740 my<<=1;
741
742 pen_x= pred_x + mx;
743 pen_y= pred_y + my;
744
745 ptr-= s->linesize;
746 CHECK_HALF_MV(xy2, -1, -1)
747 CHECK_HALF_MV(y2 , 0, -1)
748 CHECK_HALF_MV(xy2, +1, -1)
749
750 ptr+= s->linesize;
751 CHECK_HALF_MV(x2 , -1, 0)
752 CHECK_HALF_MV(x2 , +1, 0)
753 CHECK_HALF_MV(xy2, -1, +1)
754 CHECK_HALF_MV(y2 , 0, +1)
755 CHECK_HALF_MV(xy2, +1, +1)
756
757 mx+=dx;
758 my+=dy;
759 }else{ 613 }else{
760 mx<<=1; 614 pix_abs_x2 = s->dsp.pix_abs8x8_x2;
761 my<<=1; 615 pix_abs_y2 = s->dsp.pix_abs8x8_y2;
762 } 616 pix_abs_xy2= s->dsp.pix_abs8x8_xy2;
763 617 }
764 *mx_ptr = mx; 618
765 *my_ptr = my; 619 if(s->me.skip){
766 return dminh;
767 }
768
769 static inline int fast_halfpel_motion_search(MpegEncContext * s,
770 int *mx_ptr, int *my_ptr, int dmin,
771 int xmin, int ymin, int xmax, int ymax,
772 int pred_x, int pred_y, uint8_t *ref_picture,
773 op_pixels_abs_func pix_abs_x2,
774 op_pixels_abs_func pix_abs_y2, op_pixels_abs_func pix_abs_xy2, int n)
775 {
776 UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
777 uint16_t *score_map= s->me_score_map;
778 const int quant= s->qscale;
779 int mx, my, xx, yy, dminh;
780 UINT8 *pix, *ptr;
781
782 if(s->skip_me){
783 // printf("S"); 620 // printf("S");
784 *mx_ptr = 0; 621 *mx_ptr = 0;
785 *my_ptr = 0; 622 *my_ptr = 0;
786 return dmin; 623 return dmin;
787 } 624 }
813 pen_x= pred_x + mx; 650 pen_x= pred_x + mx;
814 pen_y= pred_y + my; 651 pen_y= pred_y + my;
815 652
816 ptr-= s->linesize; 653 ptr-= s->linesize;
817 if(t<=b){ 654 if(t<=b){
818 CHECK_HALF_MV(y2 , 0, -1) 655 CHECK_SAD_HALF_MV(y2 , 0, -1)
819 if(l<=r){ 656 if(l<=r){
820 CHECK_HALF_MV(xy2, -1, -1) 657 CHECK_SAD_HALF_MV(xy2, -1, -1)
821 if(t+r<=b+l){ 658 if(t+r<=b+l){
822 CHECK_HALF_MV(xy2, +1, -1) 659 CHECK_SAD_HALF_MV(xy2, +1, -1)
823 ptr+= s->linesize; 660 ptr+= s->linesize;
824 }else{ 661 }else{
825 ptr+= s->linesize; 662 ptr+= s->linesize;
826 CHECK_HALF_MV(xy2, -1, +1) 663 CHECK_SAD_HALF_MV(xy2, -1, +1)
827 } 664 }
828 CHECK_HALF_MV(x2 , -1, 0) 665 CHECK_SAD_HALF_MV(x2 , -1, 0)
829 }else{ 666 }else{
830 CHECK_HALF_MV(xy2, +1, -1) 667 CHECK_SAD_HALF_MV(xy2, +1, -1)
831 if(t+l<=b+r){ 668 if(t+l<=b+r){
832 CHECK_HALF_MV(xy2, -1, -1) 669 CHECK_SAD_HALF_MV(xy2, -1, -1)
833 ptr+= s->linesize; 670 ptr+= s->linesize;
834 }else{ 671 }else{
835 ptr+= s->linesize; 672 ptr+= s->linesize;
836 CHECK_HALF_MV(xy2, +1, +1) 673 CHECK_SAD_HALF_MV(xy2, +1, +1)
837 } 674 }
838 CHECK_HALF_MV(x2 , +1, 0) 675 CHECK_SAD_HALF_MV(x2 , +1, 0)
839 } 676 }
840 }else{ 677 }else{
841 if(l<=r){ 678 if(l<=r){
842 if(t+l<=b+r){ 679 if(t+l<=b+r){
843 CHECK_HALF_MV(xy2, -1, -1) 680 CHECK_SAD_HALF_MV(xy2, -1, -1)
844 ptr+= s->linesize; 681 ptr+= s->linesize;
845 }else{ 682 }else{
846 ptr+= s->linesize; 683 ptr+= s->linesize;
847 CHECK_HALF_MV(xy2, +1, +1) 684 CHECK_SAD_HALF_MV(xy2, +1, +1)
848 } 685 }
849 CHECK_HALF_MV(x2 , -1, 0) 686 CHECK_SAD_HALF_MV(x2 , -1, 0)
850 CHECK_HALF_MV(xy2, -1, +1) 687 CHECK_SAD_HALF_MV(xy2, -1, +1)
851 }else{ 688 }else{
852 if(t+r<=b+l){ 689 if(t+r<=b+l){
853 CHECK_HALF_MV(xy2, +1, -1) 690 CHECK_SAD_HALF_MV(xy2, +1, -1)
854 ptr+= s->linesize; 691 ptr+= s->linesize;
855 }else{ 692 }else{
856 ptr+= s->linesize; 693 ptr+= s->linesize;
857 CHECK_HALF_MV(xy2, -1, +1) 694 CHECK_SAD_HALF_MV(xy2, -1, +1)
858 } 695 }
859 CHECK_HALF_MV(x2 , +1, 0) 696 CHECK_SAD_HALF_MV(x2 , +1, 0)
860 CHECK_HALF_MV(xy2, +1, +1) 697 CHECK_SAD_HALF_MV(xy2, +1, +1)
861 } 698 }
862 CHECK_HALF_MV(y2 , 0, +1) 699 CHECK_SAD_HALF_MV(y2 , 0, +1)
863 } 700 }
864 mx+=dx; 701 mx+=dx;
865 my+=dy; 702 my+=dy;
866 703
867 }else{ 704 }else{
931 { 768 {
932 int block; 769 int block;
933 int P[10][2]; 770 int P[10][2];
934 uint8_t *ref_picture= s->last_picture.data[0]; 771 uint8_t *ref_picture= s->last_picture.data[0];
935 int dmin_sum=0; 772 int dmin_sum=0;
773 uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
936 774
937 for(block=0; block<4; block++){ 775 for(block=0; block<4; block++){
938 int mx4, my4; 776 int mx4, my4;
939 int pred_x4, pred_y4; 777 int pred_x4, pred_y4;
940 int dmin4; 778 int dmin4;
993 } 831 }
994 } 832 }
995 P_MV1[0]= mx; 833 P_MV1[0]= mx;
996 P_MV1[1]= my; 834 P_MV1[1]= my;
997 835
998 dmin4 = epzs_motion_search4(s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, ref_picture); 836 dmin4 = s->me.motion_search[1](s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
999 837 &s->last_picture, mv_penalty);
1000 dmin4= fast_halfpel_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 838
1001 pred_x4, pred_y4, ref_picture, s->dsp.pix_abs8x8_x2, 839 dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4,
1002 s->dsp.pix_abs8x8_y2, s->dsp.pix_abs8x8_xy2, block); 840 pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty);
1003 841
1004 s->motion_val[ s->block_index[block] ][0]= mx4; 842 s->motion_val[ s->block_index[block] ][0]= mx4;
1005 s->motion_val[ s->block_index[block] ][1]= my4; 843 s->motion_val[ s->block_index[block] ][1]= my4;
1006 dmin_sum+= dmin4; 844 dmin_sum+= dmin4;
1007 } 845 }
1019 int P[10][2]; 857 int P[10][2];
1020 const int shift= 1+s->quarter_sample; 858 const int shift= 1+s->quarter_sample;
1021 int mb_type=0; 859 int mb_type=0;
1022 uint8_t *ref_picture= s->last_picture.data[0]; 860 uint8_t *ref_picture= s->last_picture.data[0];
1023 Picture * const pic= &s->current_picture; 861 Picture * const pic= &s->current_picture;
862 uint16_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
863
864 assert(s->quarter_sample==0 || s->quarter_sample==1);
865
866 s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
867 s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
1024 868
1025 get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code); 869 get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, s->f_code);
1026 rel_xmin= xmin - mb_x*16; 870 rel_xmin= xmin - mb_x*16;
1027 rel_xmax= xmax - mb_x*16; 871 rel_xmax= xmax - mb_x*16;
1028 rel_ymin= ymin - mb_y*16; 872 rel_ymin= ymin - mb_y*16;
1029 rel_ymax= ymax - mb_y*16; 873 rel_ymax= ymax - mb_y*16;
1030 s->skip_me=0; 874 s->me.skip=0;
1031 875
1032 switch(s->me_method) { 876 switch(s->me_method) {
1033 case ME_ZERO: 877 case ME_ZERO:
1034 default: 878 default:
1035 no_motion_search(s, &mx, &my); 879 no_motion_search(s, &mx, &my);
1094 pred_x= P_LEFT[0]; 938 pred_x= P_LEFT[0];
1095 pred_y= P_LEFT[1]; 939 pred_y= P_LEFT[1];
1096 } 940 }
1097 } 941 }
1098 } 942 }
1099 dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture); 943 dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
944 &s->last_picture, mv_penalty);
1100 945
1101 break; 946 break;
1102 } 947 }
1103 948
1104 /* intra / predictive decision */ 949 /* intra / predictive decision */
1110 ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx); 955 ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx);
1111 956
1112 sum = s->dsp.pix_sum(pix, s->linesize); 957 sum = s->dsp.pix_sum(pix, s->linesize);
1113 958
1114 varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; 959 varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
1115 // FIXME: MMX OPTIMIZE 960 vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize)+128)>>8;
1116 vard = (s->dsp.pix_norm(pix, ppix, s->linesize)+128)>>8;
1117 961
1118 //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); 962 //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
1119 pic->mb_var [s->mb_width * mb_y + mb_x] = varc; 963 pic->mb_var [s->mb_width * mb_y + mb_x] = varc;
1120 pic->mc_mb_var[s->mb_width * mb_y + mb_x] = vard; 964 pic->mc_mb_var[s->mb_width * mb_y + mb_x] = vard;
1121 pic->mb_mean [s->mb_width * mb_y + mb_x] = (sum+128)>>8; 965 pic->mb_mean [s->mb_width * mb_y + mb_x] = (sum+128)>>8;
1135 979
1136 if (vard*2 + 200 > varc) 980 if (vard*2 + 200 > varc)
1137 mb_type|= MB_TYPE_INTRA; 981 mb_type|= MB_TYPE_INTRA;
1138 if (varc*2 + 200 > vard){ 982 if (varc*2 + 200 > vard){
1139 mb_type|= MB_TYPE_INTER; 983 mb_type|= MB_TYPE_INTER;
1140 if(s->me_method >= ME_EPZS) 984 s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1141 fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 985 pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
1142 pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
1143 s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
1144 else
1145 halfpel_motion_search( s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1146 pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2,
1147 s->dsp.pix_abs16x16_y2, s->dsp.pix_abs16x16_xy2, 0);
1148 }else{ 986 }else{
1149 mx <<=1; 987 mx <<=shift;
1150 my <<=1; 988 my <<=shift;
1151 } 989 }
1152 if((s->flags&CODEC_FLAG_4MV) 990 if((s->flags&CODEC_FLAG_4MV)
1153 && !s->skip_me && varc>50 && vard>10){ 991 && !s->me.skip && varc>50 && vard>10){
1154 mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); 992 mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
1155 mb_type|=MB_TYPE_INTER4V; 993 mb_type|=MB_TYPE_INTER4V;
1156 994
1157 set_p_mv_tables(s, mx, my, 0); 995 set_p_mv_tables(s, mx, my, 0);
1158 }else 996 }else
1159 set_p_mv_tables(s, mx, my, 1); 997 set_p_mv_tables(s, mx, my, 1);
1160 }else{ 998 }else{
1161 if (vard <= 64 || vard < varc) { 999 if (vard <= 64 || vard < varc) {
1000 // if (sadP <= 32 || sadP < sadI + 500) {
1162 s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); 1001 s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1163 mb_type|= MB_TYPE_INTER; 1002 mb_type|= MB_TYPE_INTER;
1164 if (s->me_method != ME_ZERO) { 1003 if (s->me_method != ME_ZERO) {
1165 if(s->me_method >= ME_EPZS) 1004 dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1166 dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 1005 pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty);
1167 pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
1168 s->dsp.pix_abs16x16_xy2, 0);
1169 else
1170 dmin= halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1171 pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2,
1172 s->dsp.pix_abs16x16_xy2, 0);
1173 if((s->flags&CODEC_FLAG_4MV) 1006 if((s->flags&CODEC_FLAG_4MV)
1174 && !s->skip_me && varc>50 && vard>10){ 1007 && !s->me.skip && varc>50 && vard>10){
1175 int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); 1008 int dmin4= mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift);
1176 if(dmin4 + 128 <dmin) 1009 if(dmin4 + 128 <dmin)
1177 mb_type= MB_TYPE_INTER4V; 1010 mb_type= MB_TYPE_INTER4V;
1178 } 1011 }
1179 set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V); 1012 set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V);
1180 1013
1181 } else { 1014 } else {
1182 mx <<=1; 1015 mx <<=shift;
1183 my <<=1; 1016 my <<=shift;
1184 } 1017 }
1185 #if 0 1018 #if 0
1186 if (vard < 10) { 1019 if (vard < 10) {
1187 skip++; 1020 skip++;
1188 fprintf(stderr,"\nEarly skip: %d vard: %2d varc: %5d dmin: %d", 1021 fprintf(stderr,"\nEarly skip: %d vard: %2d varc: %5d dmin: %d",
1199 1032
1200 s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; 1033 s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1201 } 1034 }
1202 1035
1203 int ff_estimate_motion_b(MpegEncContext * s, 1036 int ff_estimate_motion_b(MpegEncContext * s,
1204 int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *ref_picture, int f_code) 1037 int mb_x, int mb_y, int16_t (*mv_table)[2], Picture *picture, int f_code)
1205 { 1038 {
1206 int mx, my, range, dmin; 1039 int mx, my, range, dmin;
1207 int xmin, ymin, xmax, ymax; 1040 int xmin, ymin, xmax, ymax;
1208 int rel_xmin, rel_ymin, rel_xmax, rel_ymax; 1041 int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
1209 int pred_x=0, pred_y=0; 1042 int pred_x=0, pred_y=0;
1210 int P[10][2]; 1043 int P[10][2];
1211 const int shift= 1+s->quarter_sample; 1044 const int shift= 1+s->quarter_sample;
1212 const int mot_stride = s->mb_width + 2; 1045 const int mot_stride = s->mb_width + 2;
1213 const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1; 1046 const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
1214 1047 uint8_t * const ref_picture= picture->data[0];
1048 uint16_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
1049
1050 s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
1051 s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
1052
1215 get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code); 1053 get_limits(s, &range, &xmin, &ymin, &xmax, &ymax, f_code);
1216 rel_xmin= xmin - mb_x*16; 1054 rel_xmin= xmin - mb_x*16;
1217 rel_xmax= xmax - mb_x*16; 1055 rel_xmax= xmax - mb_x*16;
1218 rel_ymin= ymin - mb_y*16; 1056 rel_ymin= ymin - mb_y*16;
1219 rel_ymax= ymax - mb_y*16; 1057 rel_ymax= ymax - mb_y*16;
1273 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 1111 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1274 } 1112 }
1275 pred_x= P_LEFT[0]; 1113 pred_x= P_LEFT[0];
1276 pred_y= P_LEFT[1]; 1114 pred_y= P_LEFT[1];
1277 } 1115 }
1278 dmin = epzs_motion_search(s, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, ref_picture); 1116 dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1117 picture, mv_penalty);
1279 1118
1280 break; 1119 break;
1281 } 1120 }
1282 1121
1283 dmin= fast_halfpel_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 1122 dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax,
1284 pred_x, pred_y, ref_picture, s->dsp.pix_abs16x16_x2, s->dsp.pix_abs16x16_y2, 1123 pred_x, pred_y, picture, 0, 0, mv_penalty);
1285 s->dsp.pix_abs16x16_xy2, 0);
1286 //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); 1124 //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1287 // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; 1125 // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1288 mv_table[mot_xy][0]= mx; 1126 mv_table[mot_xy][0]= mx;
1289 mv_table[mot_xy][1]= my; 1127 mv_table[mot_xy][1]= my;
1128
1290 return dmin; 1129 return dmin;
1291 } 1130 }
1292
1293 1131
1294 static inline int check_bidir_mv(MpegEncContext * s, 1132 static inline int check_bidir_mv(MpegEncContext * s,
1295 int mb_x, int mb_y, 1133 int mb_x, int mb_y,
1296 int motion_fx, int motion_fy, 1134 int motion_fx, int motion_fy,
1297 int motion_bx, int motion_by, 1135 int motion_bx, int motion_by,
1298 int pred_fx, int pred_fy, 1136 int pred_fx, int pred_fy,
1299 int pred_bx, int pred_by) 1137 int pred_bx, int pred_by)
1300 { 1138 {
1301 //FIXME optimize? 1139 //FIXME optimize?
1302 //FIXME direct mode penalty 1140 //FIXME move into template?
1303 UINT16 *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame 1141 //FIXME better f_code prediction (max mv & distance)
1304 uint8_t *dest_y = s->me_scratchpad; 1142 UINT16 *mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1143 uint8_t *dest_y = s->me.scratchpad;
1305 uint8_t *ptr; 1144 uint8_t *ptr;
1306 int dxy; 1145 int dxy;
1307 int src_x, src_y; 1146 int src_x, src_y;
1308 int fbmin; 1147 int fbmin;
1309 1148
1310 fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->qscale; 1149 if(s->quarter_sample){
1311 1150 dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1312 dxy = ((motion_fy & 1) << 1) | (motion_fx & 1); 1151 src_x = mb_x * 16 + (motion_fx >> 2);
1313 src_x = mb_x * 16 + (motion_fx >> 1); 1152 src_y = mb_y * 16 + (motion_fy >> 2);
1314 src_y = mb_y * 16 + (motion_fy >> 1); 1153 assert(src_x >=-16 && src_x<=s->width);
1315 src_x = clip(src_x, -16, s->width); 1154 assert(src_y >=-16 && src_y<=s->height);
1316 if (src_x == s->width) 1155
1317 dxy&= 2; 1156 ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1318 src_y = clip(src_y, -16, s->height); 1157 s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize);
1319 if (src_y == s->height) 1158
1320 dxy&= 1; 1159 dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1321 1160 src_x = mb_x * 16 + (motion_bx >> 2);
1322 ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x; 1161 src_y = mb_y * 16 + (motion_by >> 2);
1323 s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); 1162 assert(src_x >=-16 && src_x<=s->width);
1324 1163 assert(src_y >=-16 && src_y<=s->height);
1325 fbmin += (mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->qscale; 1164
1326 1165 ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
1327 dxy = ((motion_by & 1) << 1) | (motion_bx & 1); 1166 s->dsp.avg_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize);
1328 src_x = mb_x * 16 + (motion_bx >> 1); 1167 }else{
1329 src_y = mb_y * 16 + (motion_by >> 1); 1168 dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1330 src_x = clip(src_x, -16, s->width); 1169 src_x = mb_x * 16 + (motion_fx >> 1);
1331 if (src_x == s->width) 1170 src_y = mb_y * 16 + (motion_fy >> 1);
1332 dxy&= 2; 1171 assert(src_x >=-16 && src_x<=s->width);
1333 src_y = clip(src_y, -16, s->height); 1172 assert(src_y >=-16 && src_y<=s->height);
1334 if (src_y == s->height) 1173
1335 dxy&= 1; 1174 ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1336 1175 s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
1337 ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x; 1176
1338 s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16); 1177 dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1339 1178 src_x = mb_x * 16 + (motion_bx >> 1);
1340 fbmin += s->dsp.pix_abs16x16(s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); 1179 src_y = mb_y * 16 + (motion_by >> 1);
1180 assert(src_x >=-16 && src_x<=s->width);
1181 assert(src_y >=-16 && src_y<=s->height);
1182
1183 ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
1184 s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
1185 }
1186
1187 fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.sub_penalty_factor
1188 +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.sub_penalty_factor;
1189 + s->dsp.me_sub_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize);
1190
1341 return fbmin; 1191 return fbmin;
1342 } 1192 }
1343 1193
1344 /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/ 1194 /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1345 static inline int bidir_refine(MpegEncContext * s, 1195 static inline int bidir_refine(MpegEncContext * s,
1372 int mb_x, int mb_y) 1222 int mb_x, int mb_y)
1373 { 1223 {
1374 int P[10][2]; 1224 int P[10][2];
1375 const int mot_stride = s->mb_width + 2; 1225 const int mot_stride = s->mb_width + 2;
1376 const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1; 1226 const int mot_xy = (mb_y + 1)*mot_stride + mb_x + 1;
1377 int dmin, dmin2; 1227 const int shift= 1+s->quarter_sample;
1378 int motion_fx, motion_fy, motion_bx, motion_by, motion_bx0, motion_by0; 1228 int dmin, i;
1379 int motion_dx, motion_dy;
1380 const int motion_px= s->p_mv_table[mot_xy][0];
1381 const int motion_py= s->p_mv_table[mot_xy][1];
1382 const int time_pp= s->pp_time; 1229 const int time_pp= s->pp_time;
1383 const int time_pb= s->pb_time; 1230 const int time_pb= s->pb_time;
1384 const int time_bp= time_pp - time_pb; 1231 int mx, my, xmin, xmax, ymin, ymax;
1385 int bx, by;
1386 int mx, my, mx2, my2;
1387 uint8_t *ref_picture= s->me_scratchpad - (mb_x - 1 + (mb_y - 1)*s->linesize)*16;
1388 int16_t (*mv_table)[2]= s->b_direct_mv_table; 1232 int16_t (*mv_table)[2]= s->b_direct_mv_table;
1389 /* uint16_t *mv_penalty= s->mv_penalty[s->f_code] + MAX_MV; */ // f_code of the prev frame 1233 uint16_t * const mv_penalty= s->me.mv_penalty[1] + MAX_MV;
1390 1234
1391 /* thanks to iso-mpeg the rounding is different for the zero vector, so we need to handle that ... */
1392 motion_fx= (motion_px*time_pb)/time_pp;
1393 motion_fy= (motion_py*time_pb)/time_pp;
1394 motion_bx0= (-motion_px*time_bp)/time_pp;
1395 motion_by0= (-motion_py*time_bp)/time_pp;
1396 motion_dx= motion_dy=0;
1397 dmin2= check_bidir_mv(s, mb_x, mb_y,
1398 motion_fx, motion_fy,
1399 motion_bx0, motion_by0,
1400 motion_fx, motion_fy,
1401 motion_bx0, motion_by0) - s->qscale;
1402
1403 motion_bx= motion_fx - motion_px;
1404 motion_by= motion_fy - motion_py;
1405 for(by=-1; by<2; by++){
1406 for(bx=-1; bx<2; bx++){
1407 uint8_t *dest_y = s->me_scratchpad + (by+1)*s->linesize*16 + (bx+1)*16;
1408 uint8_t *ptr;
1409 int dxy;
1410 int src_x, src_y;
1411 const int width= s->width;
1412 const int height= s->height;
1413
1414 dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1415 src_x = (mb_x + bx) * 16 + (motion_fx >> 1);
1416 src_y = (mb_y + by) * 16 + (motion_fy >> 1);
1417 src_x = clip(src_x, -16, width);
1418 if (src_x == width) dxy &= ~1;
1419 src_y = clip(src_y, -16, height);
1420 if (src_y == height) dxy &= ~2;
1421
1422 ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1423 s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
1424
1425 dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1426 src_x = (mb_x + bx) * 16 + (motion_bx >> 1);
1427 src_y = (mb_y + by) * 16 + (motion_by >> 1);
1428 src_x = clip(src_x, -16, width);
1429 if (src_x == width) dxy &= ~1;
1430 src_y = clip(src_y, -16, height);
1431 if (src_y == height) dxy &= ~2;
1432
1433 s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
1434 }
1435 }
1436
1437 P_LAST[0] = mv_table[mot_xy ][0]; 1235 P_LAST[0] = mv_table[mot_xy ][0];
1438 P_LAST[1] = mv_table[mot_xy ][1]; 1236 P_LAST[1] = mv_table[mot_xy ][1];
1439 P_LEFT[0] = mv_table[mot_xy - 1][0]; 1237 P_LEFT[0] = mv_table[mot_xy - 1][0];
1440 P_LEFT[1] = mv_table[mot_xy - 1][1]; 1238 P_LEFT[1] = mv_table[mot_xy - 1][1];
1441 P_LAST_RIGHT[0] = mv_table[mot_xy + 1][0]; 1239 P_LAST_RIGHT[0] = mv_table[mot_xy + 1][0];
1456 P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1]; 1254 P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1];
1457 1255
1458 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 1256 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1459 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 1257 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1460 } 1258 }
1461 dmin = epzs_motion_search(s, &mx, &my, P, 0, 0, -16, -16, 15, 15, ref_picture); 1259
1462 if(mx==0 && my==0) dmin=99999999; // not representable, due to rounding stuff 1260 ymin= xmin=(-32)>>shift;
1463 if(dmin2<dmin){ 1261 ymax= xmax= 31>>shift;
1464 dmin= dmin2; 1262
1465 mx=0; 1263 if(s->co_located_type_table[mb_x + mb_y*s->mb_width]==CO_LOCATED_TYPE_4MV){
1466 my=0; 1264 s->mv_type= MV_TYPE_8X8;
1467 } 1265 }else{
1468 #if 1 1266 s->mv_type= MV_TYPE_16X16;
1469 mx2= mx= mx*2; 1267 }
1470 my2= my= my*2; 1268
1471 for(by=-1; by<2; by++){ 1269 for(i=0; i<4; i++){
1472 if(my2+by < -32) continue; 1270 int index= s->block_index[i];
1473 for(bx=-1; bx<2; bx++){ 1271 int min, max;
1474 if(bx==0 && by==0) continue; 1272
1475 if(mx2+bx < -32) continue; 1273 s->me.co_located_mv[i][0]= s->motion_val[index][0];
1476 dmin2= check_bidir_mv(s, mb_x, mb_y, 1274 s->me.co_located_mv[i][1]= s->motion_val[index][1];
1477 mx2+bx+motion_fx, my2+by+motion_fy, 1275 s->me.direct_basis_mv[i][0]= s->me.co_located_mv[i][0]*time_pb/time_pp + ((i& 1)<<(shift+3));
1478 mx2+bx+motion_bx, my2+by+motion_by, 1276 s->me.direct_basis_mv[i][1]= s->me.co_located_mv[i][1]*time_pb/time_pp + ((i>>1)<<(shift+3));
1479 mx2+bx+motion_fx, my2+by+motion_fy, 1277 // s->me.direct_basis_mv[1][i][0]= s->me.co_located_mv[i][0]*(time_pb - time_pp)/time_pp + ((i &1)<<(shift+3);
1480 motion_bx, motion_by) - s->qscale; 1278 // s->me.direct_basis_mv[1][i][1]= s->me.co_located_mv[i][1]*(time_pb - time_pp)/time_pp + ((i>>1)<<(shift+3);
1481 1279
1482 if(dmin2<dmin){ 1280 max= FFMAX(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
1483 dmin=dmin2; 1281 min= FFMIN(s->me.direct_basis_mv[i][0], s->me.direct_basis_mv[i][0] - s->me.co_located_mv[i][0])>>shift;
1484 mx= mx2 + bx; 1282 max+= (2*mb_x + (i& 1))*8 - 1; // +-1 is for the simpler rounding
1485 my= my2 + by; 1283 min+= (2*mb_x + (i& 1))*8 + 1;
1486 } 1284 if(max >= s->width) xmax= s->width - max - 1;
1487 } 1285 if(min < -16 ) xmin= - 32 - min;
1488 } 1286
1489 #else 1287 max= FFMAX(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
1490 mx*=2; my*=2; 1288 min= FFMIN(s->me.direct_basis_mv[i][1], s->me.direct_basis_mv[i][1] - s->me.co_located_mv[i][1])>>shift;
1491 #endif 1289 max+= (2*mb_y + (i>>1))*8 - 1; // +-1 is for the simpler rounding
1492 if(mx==0 && my==0){ 1290 min+= (2*mb_y + (i>>1))*8 + 1;
1493 motion_bx= motion_bx0; 1291 if(max >= s->height) ymax= s->height - max - 1;
1494 motion_by= motion_by0; 1292 if(min < -16 ) ymin= - 32 - min;
1293
1294 if(s->mv_type == MV_TYPE_16X16) break;
1295 }
1296
1297 assert(xmax <= 15 && ymax <= 15 && xmin >= -16 && ymin >= -16);
1298
1299 if(xmax < 0 || xmin >0 || ymax < 0 || ymin > 0){
1300 s->b_direct_mv_table[mot_xy][0]= 0;
1301 s->b_direct_mv_table[mot_xy][1]= 0;
1302
1303 return 256*256*256*64;
1304 }
1305
1306 if(s->flags&CODEC_FLAG_QPEL){
1307 dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax,
1308 &s->last_picture, mv_penalty);
1309 dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
1310 0, 0, &s->last_picture, 0, 0, mv_penalty);
1311 }else{
1312 dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax,
1313 &s->last_picture, mv_penalty);
1314 dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax,
1315 0, 0, &s->last_picture, 0, 0, mv_penalty);
1495 } 1316 }
1496 1317
1497 s->b_direct_mv_table[mot_xy][0]= mx; 1318 s->b_direct_mv_table[mot_xy][0]= mx;
1498 s->b_direct_mv_table[mot_xy][1]= my; 1319 s->b_direct_mv_table[mot_xy][1]= my;
1499 s->b_direct_forw_mv_table[mot_xy][0]= motion_fx + mx;
1500 s->b_direct_forw_mv_table[mot_xy][1]= motion_fy + my;
1501 s->b_direct_back_mv_table[mot_xy][0]= motion_bx + mx;
1502 s->b_direct_back_mv_table[mot_xy][1]= motion_by + my;
1503 return dmin; 1320 return dmin;
1504 } 1321 }
1505 1322
1506 void ff_estimate_b_frame_motion(MpegEncContext * s, 1323 void ff_estimate_b_frame_motion(MpegEncContext * s,
1507 int mb_x, int mb_y) 1324 int mb_x, int mb_y)
1508 { 1325 {
1509 const int quant= s->qscale; 1326 const int penalty_factor= s->me.penalty_factor;
1510 int fmin, bmin, dmin, fbmin; 1327 int fmin, bmin, dmin, fbmin;
1511 int type=0; 1328 int type=0;
1512 1329
1513 dmin= direct_search(s, mb_x, mb_y); 1330 dmin= direct_search(s, mb_x, mb_y);
1514 1331
1515 fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, s->last_picture.data[0], s->f_code); 1332 fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code);
1516 bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, s->next_picture.data[0], s->b_code) - quant; 1333 bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) - penalty_factor;
1517 //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]); 1334 //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1518 1335
1519 fbmin= bidir_refine(s, mb_x, mb_y); 1336 fbmin= bidir_refine(s, mb_x, mb_y);
1520 1337
1521 { 1338 {
1539 s->current_picture.mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSD 1356 s->current_picture.mc_mb_var[mb_y*s->mb_width + mb_x] = score; //FIXME use SSD
1540 } 1357 }
1541 1358
1542 if(s->flags&CODEC_FLAG_HQ){ 1359 if(s->flags&CODEC_FLAG_HQ){
1543 type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter 1360 type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter
1544 } 1361 if(dmin>256*256*16) type&= ~MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
1545 1362 }
1546 /*
1547 {
1548 static int count=0;
1549 static int sum=0;
1550 if(type==MB_TYPE_DIRECT){
1551 int diff= ABS(s->b_forw_mv_table)
1552 }
1553 }*/
1554 1363
1555 s->mb_type[mb_y*s->mb_width + mb_x]= type; 1364 s->mb_type[mb_y*s->mb_width + mb_x]= type;
1556 /* if(mb_y==0 && mb_x==0) printf("\n");
1557 if(mb_x==0) printf("\n");
1558 printf("%d", av_log2(type));
1559 */
1560 } 1365 }
1561 1366
1562 /* find best f_code for ME which do unlimited searches */ 1367 /* find best f_code for ME which do unlimited searches */
1563 int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type) 1368 int ff_get_best_fcode(MpegEncContext * s, int16_t (*mv_table)[2], int type)
1564 { 1369 {
1567 int i, y; 1372 int i, y;
1568 UINT8 * fcode_tab= s->fcode_tab; 1373 UINT8 * fcode_tab= s->fcode_tab;
1569 int best_fcode=-1; 1374 int best_fcode=-1;
1570 int best_score=-10000000; 1375 int best_score=-10000000;
1571 1376
1572 for(i=0; i<8; i++) score[i]= s->mb_num*(8-i); //FIXME *2 and all other too so its the same but nicer 1377 for(i=0; i<8; i++) score[i]= s->mb_num*(8-i);
1573 1378
1574 for(y=0; y<s->mb_height; y++){ 1379 for(y=0; y<s->mb_height; y++){
1575 int x; 1380 int x;
1576 int xy= (y+1)* (s->mb_width+2) + 1; 1381 int xy= (y+1)* (s->mb_width+2) + 1;
1577 i= y*s->mb_width; 1382 i= y*s->mb_width;