comparison motion_est.c @ 1708:dea5b2946999 libavcodec

interlaced motion estimation interlaced mpeg2 encoding P & B frames rate distored interlaced mb decission alternate scantable support 4mv encoding fixes (thats also why the regression tests change) passing height to most dsp functions interlaced mpeg4 encoding (no direct mode MBs yet) various related cleanups disabled old motion estimaton algorithms (log, full, ...) they will either be fixed or removed
author michael
date Tue, 30 Dec 2003 16:07:57 +0000
parents 30746f429df6
children a4a5e7521339
comparison
equal deleted inserted replaced
1707:027545a2fdbe 1708:dea5b2946999
44 #define P_MEDIAN P[4] 44 #define P_MEDIAN P[4]
45 #define P_MV1 P[9] 45 #define P_MV1 P[9]
46 46
47 static inline int sad_hpel_motion_search(MpegEncContext * s, 47 static inline int sad_hpel_motion_search(MpegEncContext * s,
48 int *mx_ptr, int *my_ptr, int dmin, 48 int *mx_ptr, int *my_ptr, int dmin,
49 int xmin, int ymin, int xmax, int ymax, 49 int pred_x, int pred_y, uint8_t *src_data[3],
50 int pred_x, int pred_y, Picture *picture, 50 uint8_t *ref_data[6], int stride, int uvstride,
51 int n, int size, uint8_t * const mv_penalty); 51 int size, int h, uint8_t * const mv_penalty);
52 52
53 static inline int update_map_generation(MpegEncContext * s) 53 static inline int update_map_generation(MpegEncContext * s)
54 { 54 {
55 s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2); 55 s->me.map_generation+= 1<<(ME_MAP_MV_BITS*2);
56 if(s->me.map_generation==0){ 56 if(s->me.map_generation==0){
76 76
77 /* SIMPLE */ 77 /* SIMPLE */
78 #define RENAME(a) simple_ ## a 78 #define RENAME(a) simple_ ## a
79 79
80 #define CMP(d, x, y, size)\ 80 #define CMP(d, x, y, size)\
81 d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride); 81 d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h);
82 82
83 #define CMP_HPEL(d, dx, dy, x, y, size)\ 83 #define CMP_HPEL(d, dx, dy, x, y, size)\
84 {\ 84 {\
85 const int dxy= (dx) + 2*(dy);\ 85 const int dxy= (dx) + 2*(dy);\
86 hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\ 86 hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\
87 d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ 87 d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
88 } 88 }
89
89 90
90 #define CMP_QPEL(d, dx, dy, x, y, size)\ 91 #define CMP_QPEL(d, dx, dy, x, y, size)\
91 {\ 92 {\
92 const int dxy= (dx) + 4*(dy);\ 93 const int dxy= (dx) + 4*(dy);\
93 qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\ 94 qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
94 d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ 95 d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
95 } 96 }
96 97
97 #include "motion_est_template.c" 98 #include "motion_est_template.c"
98 #undef RENAME 99 #undef RENAME
99 #undef CMP 100 #undef CMP
103 104
104 /* SIMPLE CHROMA */ 105 /* SIMPLE CHROMA */
105 #define RENAME(a) simple_chroma_ ## a 106 #define RENAME(a) simple_chroma_ ## a
106 107
107 #define CMP(d, x, y, size)\ 108 #define CMP(d, x, y, size)\
108 d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride);\ 109 d = cmp(s, src_y, (ref_y) + (x) + (y)*(stride), stride, h);\
109 if(chroma_cmp){\ 110 if(chroma_cmp){\
110 int dxy= ((x)&1) + 2*((y)&1);\ 111 int dxy= ((x)&1) + 2*((y)&1);\
111 int c= ((x)>>1) + ((y)>>1)*uvstride;\ 112 int c= ((x)>>1) + ((y)>>1)*uvstride;\
112 \ 113 \
113 chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ 114 chroma_hpel_put[0][dxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
114 d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride);\ 115 d += chroma_cmp(s, s->me.scratchpad, src_u, uvstride, h>>1);\
115 chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ 116 chroma_hpel_put[0][dxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
116 d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride);\ 117 d += chroma_cmp(s, s->me.scratchpad, src_v, uvstride, h>>1);\
117 } 118 }
118 119
119 #define CMP_HPEL(d, dx, dy, x, y, size)\ 120 #define CMP_HPEL(d, dx, dy, x, y, size)\
120 {\ 121 {\
121 const int dxy= (dx) + 2*(dy);\ 122 const int dxy= (dx) + 2*(dy);\
122 hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, (16>>size));\ 123 hpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride, h);\
123 d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ 124 d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
124 if(chroma_cmp_sub){\ 125 if(chroma_cmp_sub){\
125 int cxy= (dxy) | ((x)&1) | (2*((y)&1));\ 126 int cxy= (dxy) | ((x)&1) | (2*((y)&1));\
126 int c= ((x)>>1) + ((y)>>1)*uvstride;\ 127 int c= ((x)>>1) + ((y)>>1)*uvstride;\
127 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ 128 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
128 d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\ 129 d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\
129 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ 130 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
130 d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\ 131 d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\
131 }\ 132 }\
132 } 133 }
133 134
134 #define CMP_QPEL(d, dx, dy, x, y, size)\ 135 #define CMP_QPEL(d, dx, dy, x, y, size)\
135 {\ 136 {\
136 const int dxy= (dx) + 4*(dy);\ 137 const int dxy= (dx) + 4*(dy);\
137 qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\ 138 qpel_put[0][dxy](s->me.scratchpad, (ref_y) + (x) + (y)*(stride), stride);\
138 d = cmp_sub(s, s->me.scratchpad, src_y, stride);\ 139 d = cmp_sub(s, s->me.scratchpad, src_y, stride, h);\
139 if(chroma_cmp_sub){\ 140 if(chroma_cmp_sub){\
140 int cxy, c;\ 141 int cxy, c;\
141 int cx= (4*(x) + (dx))/2;\ 142 int cx= (4*(x) + (dx))/2;\
142 int cy= (4*(y) + (dy))/2;\ 143 int cy= (4*(y) + (dy))/2;\
143 cx= (cx>>1)|(cx&1);\ 144 cx= (cx>>1)|(cx&1);\
144 cy= (cy>>1)|(cy&1);\ 145 cy= (cy>>1)|(cy&1);\
145 cxy= (cx&1) + 2*(cy&1);\ 146 cxy= (cx&1) + 2*(cy&1);\
146 c= ((cx)>>1) + ((cy)>>1)*uvstride;\ 147 c= ((cx)>>1) + ((cy)>>1)*uvstride;\
147 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, 8);\ 148 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_u + c, uvstride, h>>1);\
148 d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride);\ 149 d += chroma_cmp_sub(s, s->me.scratchpad, src_u, uvstride, h>>1);\
149 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, 8);\ 150 chroma_hpel_put[0][cxy](s->me.scratchpad, ref_v + c, uvstride, h>>1);\
150 d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride);\ 151 d += chroma_cmp_sub(s, s->me.scratchpad, src_v, uvstride, h>>1);\
151 }\ 152 }\
152 } 153 }
153 154
154 #include "motion_est_template.c" 155 #include "motion_est_template.c"
155 #undef RENAME 156 #undef RENAME
176 int fxy= (fx&1) + 2*(fy&1);\ 177 int fxy= (fx&1) + 2*(fy&1);\
177 int bxy= (bx&1) + 2*(by&1);\ 178 int bxy= (bx&1) + 2*(by&1);\
178 \ 179 \
179 uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\ 180 uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
180 hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\ 181 hpel_put[1][fxy](dst, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 8);\
181 hpel_avg[1][bxy](dst, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 8);\ 182 hpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 8);\
182 }\ 183 }\
183 }else{\ 184 }else{\
184 int fx = s->me.direct_basis_mv[0][0] + hx;\ 185 int fx = s->me.direct_basis_mv[0][0] + hx;\
185 int fy = s->me.direct_basis_mv[0][1] + hy;\ 186 int fy = s->me.direct_basis_mv[0][1] + hy;\
186 int bx = hx ? fx - s->me.co_located_mv[0][0] : (s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp);\ 187 int bx = hx ? fx - s->me.co_located_mv[0][0] : (s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp);\
196 assert((by>>1) + 16*s->mb_y >= -16);\ 197 assert((by>>1) + 16*s->mb_y >= -16);\
197 assert((bx>>1) + 16*s->mb_x <= s->width);\ 198 assert((bx>>1) + 16*s->mb_x <= s->width);\
198 assert((by>>1) + 16*s->mb_y <= s->height);\ 199 assert((by>>1) + 16*s->mb_y <= s->height);\
199 \ 200 \
200 hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\ 201 hpel_put[0][fxy](s->me.scratchpad, (ref_y ) + (fx>>1) + (fy>>1)*(stride), stride, 16);\
201 hpel_avg[0][bxy](s->me.scratchpad, (ref2_y) + (bx>>1) + (by>>1)*(stride), stride, 16);\ 202 hpel_avg[0][bxy](s->me.scratchpad, (ref_data[3]) + (bx>>1) + (by>>1)*(stride), stride, 16);\
202 }\ 203 }\
203 d = cmp_func(s, s->me.scratchpad, src_y, stride);\ 204 d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\
204 }else\ 205 }else\
205 d= 256*256*256*32; 206 d= 256*256*256*32;
206 207
207 208
208 #define CMP_HPEL(d, dx, dy, x, y, size)\ 209 #define CMP_HPEL(d, dx, dy, x, y, size)\
236 int fxy= (fx&3) + 4*(fy&3);\ 237 int fxy= (fx&3) + 4*(fy&3);\
237 int bxy= (bx&3) + 4*(by&3);\ 238 int bxy= (bx&3) + 4*(by&3);\
238 \ 239 \
239 uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\ 240 uint8_t *dst= s->me.scratchpad + 8*(i&1) + 8*stride*(i>>1);\
240 qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\ 241 qpel_put[1][fxy](dst, (ref_y ) + (fx>>2) + (fy>>2)*(stride), stride);\
241 qpel_avg[1][bxy](dst, (ref2_y) + (bx>>2) + (by>>2)*(stride), stride);\ 242 qpel_avg[1][bxy](dst, (ref_data[3]) + (bx>>2) + (by>>2)*(stride), stride);\
242 }\ 243 }\
243 }else{\ 244 }else{\
244 int fx = s->me.direct_basis_mv[0][0] + qx;\ 245 int fx = s->me.direct_basis_mv[0][0] + qx;\
245 int fy = s->me.direct_basis_mv[0][1] + qy;\ 246 int fy = s->me.direct_basis_mv[0][1] + qy;\
246 int bx = qx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\ 247 int bx = qx ? fx - s->me.co_located_mv[0][0] : s->me.co_located_mv[0][0]*(time_pb - time_pp)/time_pp;\
250 \ 251 \
251 qpel_put[1][fxy](s->me.scratchpad , (ref_y ) + (fx>>2) + (fy>>2)*(stride) , stride);\ 252 qpel_put[1][fxy](s->me.scratchpad , (ref_y ) + (fx>>2) + (fy>>2)*(stride) , stride);\
252 qpel_put[1][fxy](s->me.scratchpad + 8 , (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 , stride);\ 253 qpel_put[1][fxy](s->me.scratchpad + 8 , (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 , stride);\
253 qpel_put[1][fxy](s->me.scratchpad + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8*stride, stride);\ 254 qpel_put[1][fxy](s->me.scratchpad + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8*stride, stride);\
254 qpel_put[1][fxy](s->me.scratchpad + 8 + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 + 8*stride, stride);\ 255 qpel_put[1][fxy](s->me.scratchpad + 8 + 8*stride, (ref_y ) + (fx>>2) + (fy>>2)*(stride) + 8 + 8*stride, stride);\
255 qpel_avg[1][bxy](s->me.scratchpad , (ref2_y) + (bx>>2) + (by>>2)*(stride) , stride);\ 256 qpel_avg[1][bxy](s->me.scratchpad , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) , stride);\
256 qpel_avg[1][bxy](s->me.scratchpad + 8 , (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8 , stride);\ 257 qpel_avg[1][bxy](s->me.scratchpad + 8 , (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 , stride);\
257 qpel_avg[1][bxy](s->me.scratchpad + 8*stride, (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8*stride, stride);\ 258 qpel_avg[1][bxy](s->me.scratchpad + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8*stride, stride);\
258 qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref2_y) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\ 259 qpel_avg[1][bxy](s->me.scratchpad + 8 + 8*stride, (ref_data[3]) + (bx>>2) + (by>>2)*(stride) + 8 + 8*stride, stride);\
259 }\ 260 }\
260 d = cmp_func(s, s->me.scratchpad, src_y, stride);\ 261 d = cmp_func(s, s->me.scratchpad, src_y, stride, 16);\
261 }else\ 262 }else\
262 d= 256*256*256*32; 263 d= 256*256*256*32;
263 264
264 265
265 #define CMP_QPEL(d, dx, dy, x, y, size)\ 266 #define CMP_QPEL(d, dx, dy, x, y, size)\
275 #undef CMP_QPEL 276 #undef CMP_QPEL
276 #undef INIT 277 #undef INIT
277 #undef CMP__DIRECT 278 #undef CMP__DIRECT
278 279
279 280
280 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride){ 281 static int zero_cmp(void *s, uint8_t *a, uint8_t *b, int stride, int h){
281 return 0; 282 return 0;
282 } 283 }
283 284
284 static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){ 285 static void set_cmp(MpegEncContext *s, me_cmp_func *cmp, int type){
285 DSPContext* c= &s->dsp; 286 DSPContext* c= &s->dsp;
286 int i; 287 int i;
287 288
288 memset(cmp, 0, sizeof(void*)*11); 289 memset(cmp, 0, sizeof(void*)*5);
289 290
290 switch(type&0xFF){ 291 for(i=0; i<4; i++){
291 case FF_CMP_SAD: 292 switch(type&0xFF){
292 cmp[0]= c->sad[0]; 293 case FF_CMP_SAD:
293 cmp[1]= c->sad[1]; 294 cmp[i]= c->sad[i];
294 break; 295 break;
295 case FF_CMP_SATD: 296 case FF_CMP_SATD:
296 cmp[0]= c->hadamard8_diff[0]; 297 cmp[i]= c->hadamard8_diff[i];
297 cmp[1]= c->hadamard8_diff[1]; 298 break;
298 break; 299 case FF_CMP_SSE:
299 case FF_CMP_SSE: 300 cmp[i]= c->sse[i];
300 cmp[0]= c->sse[0]; 301 break;
301 cmp[1]= c->sse[1]; 302 case FF_CMP_DCT:
302 break; 303 cmp[i]= c->dct_sad[i];
303 case FF_CMP_DCT: 304 break;
304 cmp[0]= c->dct_sad[0]; 305 case FF_CMP_PSNR:
305 cmp[1]= c->dct_sad[1]; 306 cmp[i]= c->quant_psnr[i];
306 break; 307 break;
307 case FF_CMP_PSNR: 308 case FF_CMP_BIT:
308 cmp[0]= c->quant_psnr[0]; 309 cmp[i]= c->bit[i];
309 cmp[1]= c->quant_psnr[1]; 310 break;
310 break; 311 case FF_CMP_RD:
311 case FF_CMP_BIT: 312 cmp[i]= c->rd[i];
312 cmp[0]= c->bit[0]; 313 break;
313 cmp[1]= c->bit[1]; 314 case FF_CMP_ZERO:
314 break;
315 case FF_CMP_RD:
316 cmp[0]= c->rd[0];
317 cmp[1]= c->rd[1];
318 break;
319 case FF_CMP_ZERO:
320 for(i=0; i<7; i++){
321 cmp[i]= zero_cmp; 315 cmp[i]= zero_cmp;
322 } 316 break;
323 break; 317 default:
324 default: 318 av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n");
325 av_log(s->avctx, AV_LOG_ERROR,"internal error in cmp function selection\n"); 319 }
326 } 320 }
327 } 321 }
328 322
329 static inline int get_penalty_factor(MpegEncContext *s, int type){ 323 static inline int get_penalty_factor(MpegEncContext *s, int type){
330 switch(type&0xFF){ 324 switch(type&0xFF){
360 if(s->avctx->me_sub_cmp&FF_CMP_CHROMA) 354 if(s->avctx->me_sub_cmp&FF_CMP_CHROMA)
361 s->me.sub_motion_search= simple_chroma_hpel_motion_search; 355 s->me.sub_motion_search= simple_chroma_hpel_motion_search;
362 else if( s->avctx->me_sub_cmp == FF_CMP_SAD 356 else if( s->avctx->me_sub_cmp == FF_CMP_SAD
363 && s->avctx-> me_cmp == FF_CMP_SAD 357 && s->avctx-> me_cmp == FF_CMP_SAD
364 && s->avctx-> mb_cmp == FF_CMP_SAD) 358 && s->avctx-> mb_cmp == FF_CMP_SAD)
365 s->me.sub_motion_search= sad_hpel_motion_search; 359 s->me.sub_motion_search= sad_hpel_motion_search; // 2050 vs. 2450 cycles
366 else 360 else
367 s->me.sub_motion_search= simple_hpel_motion_search; 361 s->me.sub_motion_search= simple_hpel_motion_search;
368 } 362 }
369 363
370 if(s->avctx->me_cmp&FF_CMP_CHROMA){ 364 if(s->avctx->me_cmp&FF_CMP_CHROMA){
371 s->me.motion_search[0]= simple_chroma_epzs_motion_search; 365 s->me.motion_search[0]= simple_chroma_epzs_motion_search;
372 s->me.motion_search[1]= simple_chroma_epzs_motion_search4; 366 s->me.motion_search[1]= simple_chroma_epzs_motion_search4;
367 s->me.motion_search[4]= simple_chroma_epzs_motion_search2;
373 }else{ 368 }else{
374 s->me.motion_search[0]= simple_epzs_motion_search; 369 s->me.motion_search[0]= simple_epzs_motion_search;
375 s->me.motion_search[1]= simple_epzs_motion_search4; 370 s->me.motion_search[1]= simple_epzs_motion_search4;
371 s->me.motion_search[4]= simple_epzs_motion_search2;
376 } 372 }
377 373
378 if(s->avctx->me_pre_cmp&FF_CMP_CHROMA){ 374 if(s->avctx->me_pre_cmp&FF_CMP_CHROMA){
379 s->me.pre_motion_search= simple_chroma_epzs_motion_search; 375 s->me.pre_motion_search= simple_chroma_epzs_motion_search;
380 }else{ 376 }else{
451 dmin = 0x7fffffff; 447 dmin = 0x7fffffff;
452 mx = 0; 448 mx = 0;
453 my = 0; 449 my = 0;
454 for (y = y1; y <= y2; y++) { 450 for (y = y1; y <= y2; y++) {
455 for (x = x1; x <= x2; x++) { 451 for (x = x1; x <= x2; x++) {
456 d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, 452 d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x,
457 s->linesize); 453 s->linesize, 16);
458 if (d < dmin || 454 if (d < dmin ||
459 (d == dmin && 455 (d == dmin &&
460 (abs(x - xx) + abs(y - yy)) < 456 (abs(x - xx) + abs(y - yy)) <
461 (abs(mx - xx) + abs(my - yy)))) { 457 (abs(mx - xx) + abs(my - yy)))) {
462 dmin = d; 458 dmin = d;
516 my = 0; 512 my = 0;
517 513
518 do { 514 do {
519 for (y = y1; y <= y2; y += range) { 515 for (y = y1; y <= y2; y += range) {
520 for (x = x1; x <= x2; x += range) { 516 for (x = x1; x <= x2; x += range) {
521 d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); 517 d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
522 if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { 518 if (d < dmin || (d == dmin && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
523 dmin = d; 519 dmin = d;
524 mx = x; 520 mx = x;
525 my = y; 521 my = y;
526 } 522 }
596 dminx = 0x7fffffff; 592 dminx = 0x7fffffff;
597 dminy = 0x7fffffff; 593 dminy = 0x7fffffff;
598 594
599 lastx = x; 595 lastx = x;
600 for (x = x1; x <= x2; x += range) { 596 for (x = x1; x <= x2; x += range) {
601 d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); 597 d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
602 if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { 598 if (d < dminx || (d == dminx && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
603 dminx = d; 599 dminx = d;
604 mx = x; 600 mx = x;
605 } 601 }
606 } 602 }
607 603
608 x = lastx; 604 x = lastx;
609 for (y = y1; y <= y2; y += range) { 605 for (y = y1; y <= y2; y += range) {
610 d = s->dsp.pix_abs16x16(pix, ref_picture + (y * s->linesize) + x, s->linesize); 606 d = s->dsp.pix_abs[0][0](NULL, pix, ref_picture + (y * s->linesize) + x, s->linesize, 16);
611 if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) { 607 if (d < dminy || (d == dminy && (abs(x - xx) + abs(y - yy)) < (abs(mx - xx) + abs(my - yy)))) {
612 dminy = d; 608 dminy = d;
613 my = y; 609 my = y;
614 } 610 }
615 } 611 }
649 645
650 #define Z_THRESHOLD 256 646 #define Z_THRESHOLD 256
651 647
652 #define CHECK_SAD_HALF_MV(suffix, x, y) \ 648 #define CHECK_SAD_HALF_MV(suffix, x, y) \
653 {\ 649 {\
654 d= pix_abs_ ## suffix(pix, ptr+((x)>>1), s->linesize);\ 650 d= s->dsp.pix_abs[size][(x?1:0)+(y?2:0)](NULL, pix, ptr+((x)>>1), stride, h);\
655 d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\ 651 d += (mv_penalty[pen_x + x] + mv_penalty[pen_y + y])*penalty_factor;\
656 COPY3_IF_LT(dminh, d, dx, x, dy, y)\ 652 COPY3_IF_LT(dminh, d, dx, x, dy, y)\
657 } 653 }
658 654
659 static inline int sad_hpel_motion_search(MpegEncContext * s, 655 static inline int sad_hpel_motion_search(MpegEncContext * s,
660 int *mx_ptr, int *my_ptr, int dmin, 656 int *mx_ptr, int *my_ptr, int dmin,
661 int xmin, int ymin, int xmax, int ymax, 657 int pred_x, int pred_y, uint8_t *src_data[3],
662 int pred_x, int pred_y, Picture *picture, 658 uint8_t *ref_data[6], int stride, int uvstride,
663 int n, int size, uint8_t * const mv_penalty) 659 int size, int h, uint8_t * const mv_penalty)
664 { 660 {
665 uint8_t *ref_picture= picture->data[0];
666 uint32_t *score_map= s->me.score_map; 661 uint32_t *score_map= s->me.score_map;
667 const int penalty_factor= s->me.sub_penalty_factor; 662 const int penalty_factor= s->me.sub_penalty_factor;
668 int mx, my, xx, yy, dminh; 663 int mx, my, dminh;
669 uint8_t *pix, *ptr; 664 uint8_t *pix, *ptr;
670 op_pixels_abs_func pix_abs_x2; 665 const int xmin= s->me.xmin;
671 op_pixels_abs_func pix_abs_y2; 666 const int ymin= s->me.ymin;
672 op_pixels_abs_func pix_abs_xy2; 667 const int xmax= s->me.xmax;
673 668 const int ymax= s->me.ymax;
674 if(size==0){
675 pix_abs_x2 = s->dsp.pix_abs16x16_x2;
676 pix_abs_y2 = s->dsp.pix_abs16x16_y2;
677 pix_abs_xy2= s->dsp.pix_abs16x16_xy2;
678 }else{
679 pix_abs_x2 = s->dsp.pix_abs8x8_x2;
680 pix_abs_y2 = s->dsp.pix_abs8x8_y2;
681 pix_abs_xy2= s->dsp.pix_abs8x8_xy2;
682 }
683 669
684 if(s->me.skip){ 670 if(s->me.skip){
685 // printf("S"); 671 // printf("S");
686 *mx_ptr = 0; 672 *mx_ptr = 0;
687 *my_ptr = 0; 673 *my_ptr = 0;
688 return dmin; 674 return dmin;
689 } 675 }
690 // printf("N"); 676 // printf("N");
691 677
692 xx = 16 * s->mb_x + 8*(n&1); 678 pix = src_data[0];
693 yy = 16 * s->mb_y + 8*(n>>1);
694 pix = s->new_picture.data[0] + (yy * s->linesize) + xx;
695 679
696 mx = *mx_ptr; 680 mx = *mx_ptr;
697 my = *my_ptr; 681 my = *my_ptr;
698 ptr = ref_picture + ((yy + my) * s->linesize) + (xx + mx); 682 ptr = ref_data[0] + (my * stride) + mx;
699 683
700 dminh = dmin; 684 dminh = dmin;
701 685
702 if (mx > xmin && mx < xmax && 686 if (mx > xmin && mx < xmax &&
703 my > ymin && my < ymax) { 687 my > ymin && my < ymax) {
713 697
714 698
715 pen_x= pred_x + mx; 699 pen_x= pred_x + mx;
716 pen_y= pred_y + my; 700 pen_y= pred_y + my;
717 701
718 ptr-= s->linesize; 702 ptr-= stride;
719 if(t<=b){ 703 if(t<=b){
720 CHECK_SAD_HALF_MV(y2 , 0, -1) 704 CHECK_SAD_HALF_MV(y2 , 0, -1)
721 if(l<=r){ 705 if(l<=r){
722 CHECK_SAD_HALF_MV(xy2, -1, -1) 706 CHECK_SAD_HALF_MV(xy2, -1, -1)
723 if(t+r<=b+l){ 707 if(t+r<=b+l){
724 CHECK_SAD_HALF_MV(xy2, +1, -1) 708 CHECK_SAD_HALF_MV(xy2, +1, -1)
725 ptr+= s->linesize; 709 ptr+= stride;
726 }else{ 710 }else{
727 ptr+= s->linesize; 711 ptr+= stride;
728 CHECK_SAD_HALF_MV(xy2, -1, +1) 712 CHECK_SAD_HALF_MV(xy2, -1, +1)
729 } 713 }
730 CHECK_SAD_HALF_MV(x2 , -1, 0) 714 CHECK_SAD_HALF_MV(x2 , -1, 0)
731 }else{ 715 }else{
732 CHECK_SAD_HALF_MV(xy2, +1, -1) 716 CHECK_SAD_HALF_MV(xy2, +1, -1)
733 if(t+l<=b+r){ 717 if(t+l<=b+r){
734 CHECK_SAD_HALF_MV(xy2, -1, -1) 718 CHECK_SAD_HALF_MV(xy2, -1, -1)
735 ptr+= s->linesize; 719 ptr+= stride;
736 }else{ 720 }else{
737 ptr+= s->linesize; 721 ptr+= stride;
738 CHECK_SAD_HALF_MV(xy2, +1, +1) 722 CHECK_SAD_HALF_MV(xy2, +1, +1)
739 } 723 }
740 CHECK_SAD_HALF_MV(x2 , +1, 0) 724 CHECK_SAD_HALF_MV(x2 , +1, 0)
741 } 725 }
742 }else{ 726 }else{
743 if(l<=r){ 727 if(l<=r){
744 if(t+l<=b+r){ 728 if(t+l<=b+r){
745 CHECK_SAD_HALF_MV(xy2, -1, -1) 729 CHECK_SAD_HALF_MV(xy2, -1, -1)
746 ptr+= s->linesize; 730 ptr+= stride;
747 }else{ 731 }else{
748 ptr+= s->linesize; 732 ptr+= stride;
749 CHECK_SAD_HALF_MV(xy2, +1, +1) 733 CHECK_SAD_HALF_MV(xy2, +1, +1)
750 } 734 }
751 CHECK_SAD_HALF_MV(x2 , -1, 0) 735 CHECK_SAD_HALF_MV(x2 , -1, 0)
752 CHECK_SAD_HALF_MV(xy2, -1, +1) 736 CHECK_SAD_HALF_MV(xy2, -1, +1)
753 }else{ 737 }else{
754 if(t+r<=b+l){ 738 if(t+r<=b+l){
755 CHECK_SAD_HALF_MV(xy2, +1, -1) 739 CHECK_SAD_HALF_MV(xy2, +1, -1)
756 ptr+= s->linesize; 740 ptr+= stride;
757 }else{ 741 }else{
758 ptr+= s->linesize; 742 ptr+= stride;
759 CHECK_SAD_HALF_MV(xy2, -1, +1) 743 CHECK_SAD_HALF_MV(xy2, -1, +1)
760 } 744 }
761 CHECK_SAD_HALF_MV(x2 , +1, 0) 745 CHECK_SAD_HALF_MV(x2 , +1, 0)
762 CHECK_SAD_HALF_MV(xy2, +1, +1) 746 CHECK_SAD_HALF_MV(xy2, +1, +1)
763 } 747 }
800 } 784 }
801 } 785 }
802 786
803 /** 787 /**
804 * get fullpel ME search limits. 788 * get fullpel ME search limits.
805 * @param range the approximate search range for the old ME code, unused for EPZS and newer
806 */ 789 */
807 static inline void get_limits(MpegEncContext *s, int *range, int *xmin, int *ymin, int *xmax, int *ymax) 790 static inline void get_limits(MpegEncContext *s, int x, int y)
808 { 791 {
809 if(s->avctx->me_range) *range= s->avctx->me_range >> 1; 792 /*
810 else *range= 16; 793 if(s->avctx->me_range) s->me.range= s->avctx->me_range >> 1;
811 794 else s->me.range= 16;
795 */
812 if (s->unrestricted_mv) { 796 if (s->unrestricted_mv) {
813 *xmin = -16; 797 s->me.xmin = - x - 16;
814 *ymin = -16; 798 s->me.ymin = - y - 16;
815 *xmax = s->mb_width*16; 799 s->me.xmax = - x + s->mb_width *16;
816 *ymax = s->mb_height*16; 800 s->me.ymax = - y + s->mb_height*16;
817 } else { 801 } else {
818 *xmin = 0; 802 s->me.xmin = - x;
819 *ymin = 0; 803 s->me.ymin = - y;
820 *xmax = s->mb_width*16 - 16; 804 s->me.xmax = - x + s->mb_width *16 - 16;
821 *ymax = s->mb_height*16 - 16; 805 s->me.ymax = - y + s->mb_height*16 - 16;
822 } 806 }
823 807 }
824 //FIXME try to limit x/y min/max if me_range is set 808
825 } 809 static inline int h263_mv4_search(MpegEncContext *s, int mx, int my, int shift)
826 810 {
827 static inline int h263_mv4_search(MpegEncContext *s, int xmin, int ymin, int xmax, int ymax, int mx, int my, int shift) 811 const int size= 1;
828 { 812 const int h=8;
829 int block; 813 int block;
830 int P[10][2]; 814 int P[10][2];
831 int dmin_sum=0, mx4_sum=0, my4_sum=0; 815 int dmin_sum=0, mx4_sum=0, my4_sum=0;
832 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; 816 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
833 int same=1; 817 int same=1;
818 const int stride= s->linesize;
819 const int uvstride= s->uvlinesize;
820 const int xmin= s->me.xmin;
821 const int ymin= s->me.ymin;
822 const int xmax= s->me.xmax;
823 const int ymax= s->me.ymax;
834 824
835 for(block=0; block<4; block++){ 825 for(block=0; block<4; block++){
836 int mx4, my4; 826 int mx4, my4;
837 int pred_x4, pred_y4; 827 int pred_x4, pred_y4;
838 int dmin4; 828 int dmin4;
839 static const int off[4]= {2, 1, 1, -1}; 829 static const int off[4]= {2, 1, 1, -1};
840 const int mot_stride = s->block_wrap[0]; 830 const int mot_stride = s->block_wrap[0];
841 const int mot_xy = s->block_index[block]; 831 const int mot_xy = s->block_index[block];
842 // const int block_x= (block&1); 832 const int block_x= (block&1);
843 // const int block_y= (block>>1); 833 const int block_y= (block>>1);
844 #if 1 // this saves us a bit of cliping work and shouldnt affect compression in a negative way 834 uint8_t *src_data[3]= {
845 const int rel_xmin4= xmin; 835 s->new_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma?
846 const int rel_xmax4= xmax; 836 s->new_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y),
847 const int rel_ymin4= ymin; 837 s->new_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y)
848 const int rel_ymax4= ymax; 838 };
849 #else 839 uint8_t *ref_data[3]= {
850 const int rel_xmin4= xmin - block_x*8; 840 s->last_picture.data[0] + 8*(2*s->mb_x + block_x) + stride *8*(2*s->mb_y + block_y), //FIXME chroma?
851 const int rel_xmax4= xmax - block_x*8 + 8; 841 s->last_picture.data[1] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y),
852 const int rel_ymin4= ymin - block_y*8; 842 s->last_picture.data[2] + 4*(2*s->mb_x + block_x) + uvstride*4*(2*s->mb_y + block_y)
853 const int rel_ymax4= ymax - block_y*8 + 8; 843 };
854 #endif 844
855 P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0]; 845 P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
856 P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1]; 846 P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
857 847
858 if(P_LEFT[0] > (rel_xmax4<<shift)) P_LEFT[0] = (rel_xmax4<<shift); 848 if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
859 849
860 /* special case for first line */ 850 /* special case for first line */
861 if (s->mb_y == 0 && block<2) { 851 if (s->mb_y == 0 && block<2) {
862 pred_x4= P_LEFT[0]; 852 pred_x4= P_LEFT[0];
863 pred_y4= P_LEFT[1]; 853 pred_y4= P_LEFT[1];
864 } else { 854 } else {
865 P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0]; 855 P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0];
866 P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1]; 856 P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1];
867 P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0]; 857 P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][0];
868 P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1]; 858 P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + off[block]][1];
869 if(P_TOP[1] > (rel_ymax4<<shift)) P_TOP[1] = (rel_ymax4<<shift); 859 if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1] = (s->me.ymax<<shift);
870 if(P_TOPRIGHT[0] < (rel_xmin4<<shift)) P_TOPRIGHT[0]= (rel_xmin4<<shift); 860 if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
871 if(P_TOPRIGHT[0] > (rel_xmax4<<shift)) P_TOPRIGHT[0]= (rel_xmax4<<shift); 861 if(P_TOPRIGHT[0] > (s->me.xmax<<shift)) P_TOPRIGHT[0]= (s->me.xmax<<shift);
872 if(P_TOPRIGHT[1] > (rel_ymax4<<shift)) P_TOPRIGHT[1]= (rel_ymax4<<shift); 862 if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
873 863
874 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 864 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
875 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 865 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
876 866
877 // if(s->out_format == FMT_H263){ 867 // if(s->out_format == FMT_H263){
885 #endif 875 #endif
886 } 876 }
887 P_MV1[0]= mx; 877 P_MV1[0]= mx;
888 P_MV1[1]= my; 878 P_MV1[1]= my;
889 879
890 dmin4 = s->me.motion_search[1](s, block, &mx4, &my4, P, pred_x4, pred_y4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 880 dmin4 = s->me.motion_search[1](s, &mx4, &my4, P, pred_x4, pred_y4,
891 &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty); 881 src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty);
892 882
893 dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4, rel_xmin4, rel_ymin4, rel_xmax4, rel_ymax4, 883 dmin4= s->me.sub_motion_search(s, &mx4, &my4, dmin4,
894 pred_x4, pred_y4, &s->last_picture, block, 1, mv_penalty); 884 pred_x4, pred_y4, src_data, ref_data, stride, uvstride, size, h, mv_penalty);
895 885
896 if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){ 886 if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
897 int dxy; 887 int dxy;
898 const int offset= ((block&1) + (block>>1)*s->linesize)*8; 888 const int offset= ((block&1) + (block>>1)*stride)*8;
899 uint8_t *dest_y = s->me.scratchpad + offset; 889 uint8_t *dest_y = s->me.scratchpad + offset;
900 890
901 if(s->quarter_sample){ 891 if(s->quarter_sample){
902 uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>2)) + (s->mb_y*16 + (my4>>2))*s->linesize + offset; 892 uint8_t *ref= ref_data[0] + (mx4>>2) + (my4>>2)*stride + offset;
903 dxy = ((my4 & 3) << 2) | (mx4 & 3); 893 dxy = ((my4 & 3) << 2) | (mx4 & 3);
904 894
905 if(s->no_rounding) 895 if(s->no_rounding)
906 s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , s->linesize); 896 s->dsp.put_no_rnd_qpel_pixels_tab[1][dxy](dest_y , ref , s->linesize);
907 else 897 else
908 s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , s->linesize); 898 s->dsp.put_qpel_pixels_tab [1][dxy](dest_y , ref , stride);
909 }else{ 899 }else{
910 uint8_t *ref= s->last_picture.data[0] + (s->mb_x*16 + (mx4>>1)) + (s->mb_y*16 + (my4>>1))*s->linesize + offset; 900 uint8_t *ref= ref_data[0] + (mx4>>1) + (my4>>1)*stride + offset;
911 dxy = ((my4 & 1) << 1) | (mx4 & 1); 901 dxy = ((my4 & 1) << 1) | (mx4 & 1);
912 902
913 if(s->no_rounding) 903 if(s->no_rounding)
914 s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , s->linesize, 8); 904 s->dsp.put_no_rnd_pixels_tab[1][dxy](dest_y , ref , stride, h);
915 else 905 else
916 s->dsp.put_pixels_tab [1][dxy](dest_y , ref , s->linesize, 8); 906 s->dsp.put_pixels_tab [1][dxy](dest_y , ref , stride, h);
917 } 907 }
918 dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*s->me.mb_penalty_factor; 908 dmin_sum+= (mv_penalty[mx4-pred_x4] + mv_penalty[my4-pred_y4])*s->me.mb_penalty_factor;
919 }else 909 }else
920 dmin_sum+= dmin4; 910 dmin_sum+= dmin4;
921 911
935 925
936 if(same) 926 if(same)
937 return INT_MAX; 927 return INT_MAX;
938 928
939 if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){ 929 if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
940 dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*s->linesize, s->me.scratchpad, s->linesize); 930 dmin_sum += s->dsp.mb_cmp[0](s, s->new_picture.data[0] + s->mb_x*16 + s->mb_y*16*stride, s->me.scratchpad, stride, 16);
941 } 931 }
942 932
943 if(s->avctx->mb_cmp&FF_CMP_CHROMA){ 933 if(s->avctx->mb_cmp&FF_CMP_CHROMA){
944 int dxy; 934 int dxy;
945 int mx, my; 935 int mx, my;
957 }else{ 947 }else{
958 s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad , s->last_picture.data[1] + offset, s->uvlinesize, 8); 948 s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad , s->last_picture.data[1] + offset, s->uvlinesize, 8);
959 s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8); 949 s->dsp.put_pixels_tab [1][dxy](s->me.scratchpad+8 , s->last_picture.data[2] + offset, s->uvlinesize, 8);
960 } 950 }
961 951
962 dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize); 952 dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[1] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad , s->uvlinesize, 8);
963 dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize); 953 dmin_sum += s->dsp.mb_cmp[1](s, s->new_picture.data[2] + s->mb_x*8 + s->mb_y*8*s->uvlinesize, s->me.scratchpad+8, s->uvlinesize, 8);
964 } 954 }
965 955
966 switch(s->avctx->mb_cmp&0xFF){ 956 switch(s->avctx->mb_cmp&0xFF){
967 /*case FF_CMP_SSE: 957 /*case FF_CMP_SSE:
968 return dmin_sum+ 32*s->qscale*s->qscale;*/ 958 return dmin_sum+ 32*s->qscale*s->qscale;*/
971 default: 961 default:
972 return dmin_sum+ 11*s->me.mb_penalty_factor; 962 return dmin_sum+ 11*s->me.mb_penalty_factor;
973 } 963 }
974 } 964 }
975 965
966 static int interlaced_search(MpegEncContext *s, uint8_t *frame_src_data[3], uint8_t *frame_ref_data[3],
967 int16_t (*mv_tables[2][2])[2], uint8_t *field_select_tables[2], int f_code, int mx, int my)
968 {
969 const int size=0;
970 const int h=8;
971 int block;
972 int P[10][2];
973 uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
974 int same=1;
975 const int stride= 2*s->linesize;
976 const int uvstride= 2*s->uvlinesize;
977 int dmin_sum= 0;
978 const int mot_stride= s->mb_stride;
979 const int xy= s->mb_x + s->mb_y*mot_stride;
980
981 s->me.ymin>>=1;
982 s->me.ymax>>=1;
983
984 for(block=0; block<2; block++){
985 int field_select;
986 int best_dmin= INT_MAX;
987 int best_field= -1;
988
989 uint8_t *src_data[3]= {
990 frame_src_data[0] + s-> linesize*block,
991 frame_src_data[1] + s->uvlinesize*block,
992 frame_src_data[2] + s->uvlinesize*block
993 };
994
995 for(field_select=0; field_select<2; field_select++){
996 int dmin, mx_i, my_i, pred_x, pred_y;
997 uint8_t *ref_data[3]= {
998 frame_ref_data[0] + s-> linesize*field_select,
999 frame_ref_data[1] + s->uvlinesize*field_select,
1000 frame_ref_data[2] + s->uvlinesize*field_select
1001 };
1002 int16_t (*mv_table)[2]= mv_tables[block][field_select];
1003
1004 P_LEFT[0] = mv_table[xy - 1][0];
1005 P_LEFT[1] = mv_table[xy - 1][1];
1006 if(P_LEFT[0] > (s->me.xmax<<1)) P_LEFT[0] = (s->me.xmax<<1);
1007
1008 pred_x= P_LEFT[0];
1009 pred_y= P_LEFT[1];
1010
1011 if(s->mb_y){
1012 P_TOP[0] = mv_table[xy - mot_stride][0];
1013 P_TOP[1] = mv_table[xy - mot_stride][1];
1014 P_TOPRIGHT[0] = mv_table[xy - mot_stride + 1][0];
1015 P_TOPRIGHT[1] = mv_table[xy - mot_stride + 1][1];
1016 if(P_TOP[1] > (s->me.ymax<<1)) P_TOP[1] = (s->me.ymax<<1);
1017 if(P_TOPRIGHT[0] < (s->me.xmin<<1)) P_TOPRIGHT[0]= (s->me.xmin<<1);
1018 if(P_TOPRIGHT[0] > (s->me.xmax<<1)) P_TOPRIGHT[0]= (s->me.xmax<<1);
1019 if(P_TOPRIGHT[1] > (s->me.ymax<<1)) P_TOPRIGHT[1]= (s->me.ymax<<1);
1020
1021 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1022 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1023 }
1024 P_MV1[0]= mx; //FIXME not correct if block != field_select
1025 P_MV1[1]= my / 2;
1026
1027 dmin = s->me.motion_search[4](s, &mx_i, &my_i, P, pred_x, pred_y,
1028 src_data, ref_data, stride, uvstride, mv_table, (1<<16)>>1, mv_penalty);
1029
1030 dmin= s->me.sub_motion_search(s, &mx_i, &my_i, dmin,
1031 pred_x, pred_y, src_data, ref_data, stride, uvstride, size, h, mv_penalty);
1032
1033 mv_table[xy][0]= mx_i;
1034 mv_table[xy][1]= my_i;
1035
1036 if(s->dsp.me_sub_cmp[0] != s->dsp.mb_cmp[0]){
1037 int dxy;
1038
1039 //FIXME chroma ME
1040 uint8_t *ref= ref_data[0] + (mx_i>>1) + (my_i>>1)*stride;
1041 dxy = ((my_i & 1) << 1) | (mx_i & 1);
1042
1043 if(s->no_rounding){
1044 s->dsp.put_no_rnd_pixels_tab[size][dxy](s->me.scratchpad, ref , stride, h);
1045 }else{
1046 s->dsp.put_pixels_tab [size][dxy](s->me.scratchpad, ref , stride, h);
1047 }
1048 dmin= s->dsp.mb_cmp[size](s, src_data[0], s->me.scratchpad, stride, h);
1049 dmin+= (mv_penalty[mx_i-pred_x] + mv_penalty[my_i-pred_y] + 1)*s->me.mb_penalty_factor;
1050 }else
1051 dmin+= s->me.mb_penalty_factor; //field_select bits
1052
1053 dmin += field_select != block; //slightly prefer same field
1054
1055 if(dmin < best_dmin){
1056 best_dmin= dmin;
1057 best_field= field_select;
1058 }
1059 }
1060 {
1061 int16_t (*mv_table)[2]= mv_tables[block][best_field];
1062
1063 if(mv_table[xy][0] != mx) same=0; //FIXME check if these checks work and are any good at all
1064 if(mv_table[xy][1]&1) same=0;
1065 if(mv_table[xy][1]*2 != my) same=0;
1066 if(best_field != block) same=0;
1067 }
1068
1069 field_select_tables[block][xy]= best_field;
1070 dmin_sum += best_dmin;
1071 }
1072
1073 s->me.ymin<<=1;
1074 s->me.ymax<<=1;
1075
1076 if(same)
1077 return INT_MAX;
1078
1079 switch(s->avctx->mb_cmp&0xFF){
1080 /*case FF_CMP_SSE:
1081 return dmin_sum+ 32*s->qscale*s->qscale;*/
1082 case FF_CMP_RD:
1083 return dmin_sum;
1084 default:
1085 return dmin_sum+ 11*s->me.mb_penalty_factor;
1086 }
1087 }
1088
976 void ff_estimate_p_frame_motion(MpegEncContext * s, 1089 void ff_estimate_p_frame_motion(MpegEncContext * s,
977 int mb_x, int mb_y) 1090 int mb_x, int mb_y)
978 { 1091 {
979 uint8_t *pix, *ppix; 1092 uint8_t *pix, *ppix;
980 int sum, varc, vard, mx, my, range, dmin, xx, yy; 1093 int sum, varc, vard, mx, my, dmin, xx, yy;
981 int xmin, ymin, xmax, ymax;
982 int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
983 int pred_x=0, pred_y=0; 1094 int pred_x=0, pred_y=0;
984 int P[10][2]; 1095 int P[10][2];
985 const int shift= 1+s->quarter_sample; 1096 const int shift= 1+s->quarter_sample;
986 int mb_type=0; 1097 int mb_type=0;
987 uint8_t *ref_picture= s->last_picture.data[0]; 1098 uint8_t *ref_picture= s->last_picture.data[0];
988 Picture * const pic= &s->current_picture; 1099 Picture * const pic= &s->current_picture;
989 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; 1100 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
990 1101 const int stride= s->linesize;
1102 const int uvstride= s->uvlinesize;
1103 uint8_t *src_data[3]= {
1104 s->new_picture.data[0] + 16*(mb_x + stride*mb_y),
1105 s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y),
1106 s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y)
1107 };
1108 uint8_t *ref_data[3]= {
1109 s->last_picture.data[0] + 16*(mb_x + stride*mb_y),
1110 s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y),
1111 s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y)
1112 };
1113
991 assert(s->quarter_sample==0 || s->quarter_sample==1); 1114 assert(s->quarter_sample==0 || s->quarter_sample==1);
992 1115
993 s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); 1116 s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
994 s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); 1117 s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
995 s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); 1118 s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
996 1119
997 get_limits(s, &range, &xmin, &ymin, &xmax, &ymax); 1120 get_limits(s, 16*mb_x, 16*mb_y);
998 rel_xmin= xmin - mb_x*16;
999 rel_xmax= xmax - mb_x*16;
1000 rel_ymin= ymin - mb_y*16;
1001 rel_ymax= ymax - mb_y*16;
1002 s->me.skip=0; 1121 s->me.skip=0;
1003 1122
1004 switch(s->me_method) { 1123 switch(s->me_method) {
1005 case ME_ZERO: 1124 case ME_ZERO:
1006 default: 1125 default:
1007 no_motion_search(s, &mx, &my); 1126 no_motion_search(s, &mx, &my);
1008 mx-= mb_x*16; 1127 mx-= mb_x*16;
1009 my-= mb_y*16; 1128 my-= mb_y*16;
1010 dmin = 0; 1129 dmin = 0;
1011 break; 1130 break;
1131 #if 0
1012 case ME_FULL: 1132 case ME_FULL:
1013 dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture); 1133 dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1014 mx-= mb_x*16; 1134 mx-= mb_x*16;
1015 my-= mb_y*16; 1135 my-= mb_y*16;
1016 break; 1136 break;
1017 case ME_LOG: 1137 case ME_LOG:
1018 dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); 1138 dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1019 mx-= mb_x*16; 1139 mx-= mb_x*16;
1020 my-= mb_y*16; 1140 my-= mb_y*16;
1021 break; 1141 break;
1022 case ME_PHODS: 1142 case ME_PHODS:
1023 dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); 1143 dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1024 mx-= mb_x*16; 1144 mx-= mb_x*16;
1025 my-= mb_y*16; 1145 my-= mb_y*16;
1026 break; 1146 break;
1147 #endif
1027 case ME_X1: 1148 case ME_X1:
1028 case ME_EPZS: 1149 case ME_EPZS:
1029 { 1150 {
1030 const int mot_stride = s->block_wrap[0]; 1151 const int mot_stride = s->block_wrap[0];
1031 const int mot_xy = s->block_index[0]; 1152 const int mot_xy = s->block_index[0];
1032 1153
1033 P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0]; 1154 P_LEFT[0] = s->current_picture.motion_val[0][mot_xy - 1][0];
1034 P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1]; 1155 P_LEFT[1] = s->current_picture.motion_val[0][mot_xy - 1][1];
1035 1156
1036 if(P_LEFT[0] > (rel_xmax<<shift)) P_LEFT[0] = (rel_xmax<<shift); 1157 if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
1037 1158
1038 if(mb_y) { 1159 if(mb_y) {
1039 P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0]; 1160 P_TOP[0] = s->current_picture.motion_val[0][mot_xy - mot_stride ][0];
1040 P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1]; 1161 P_TOP[1] = s->current_picture.motion_val[0][mot_xy - mot_stride ][1];
1041 P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0]; 1162 P_TOPRIGHT[0] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][0];
1042 P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1]; 1163 P_TOPRIGHT[1] = s->current_picture.motion_val[0][mot_xy - mot_stride + 2][1];
1043 if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1] = (rel_ymax<<shift); 1164 if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1] = (s->me.ymax<<shift);
1044 if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift); 1165 if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
1045 if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift); 1166 if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
1046 1167
1047 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 1168 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1048 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 1169 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1049 1170
1050 if(s->out_format == FMT_H263){ 1171 if(s->out_format == FMT_H263){
1058 pred_x= P_LEFT[0]; 1179 pred_x= P_LEFT[0];
1059 pred_y= P_LEFT[1]; 1180 pred_y= P_LEFT[1];
1060 } 1181 }
1061 1182
1062 } 1183 }
1063 dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 1184 dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y,
1064 &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty); 1185 src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty);
1065 1186
1066 break; 1187 break;
1067 } 1188 }
1068 1189
1069 /* intra / predictive decision */ 1190 /* intra / predictive decision */
1070 xx = mb_x * 16; 1191 xx = mb_x * 16;
1071 yy = mb_y * 16; 1192 yy = mb_y * 16;
1072 1193
1073 pix = s->new_picture.data[0] + (yy * s->linesize) + xx; 1194 pix = src_data[0];
1074 /* At this point (mx,my) are full-pell and the relative displacement */ 1195 /* At this point (mx,my) are full-pell and the relative displacement */
1075 ppix = ref_picture + ((yy+my) * s->linesize) + (xx+mx); 1196 ppix = ref_data[0] + (my * s->linesize) + mx;
1076 1197
1077 sum = s->dsp.pix_sum(pix, s->linesize); 1198 sum = s->dsp.pix_sum(pix, s->linesize);
1078 1199
1079 varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8; 1200 varc = (s->dsp.pix_norm1(pix, s->linesize) - (((unsigned)(sum*sum))>>8) + 500 + 128)>>8;
1080 vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize)+128)>>8; 1201 vard = (s->dsp.sse[0](NULL, pix, ppix, s->linesize, 16)+128)>>8;
1081 1202
1082 //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout); 1203 //printf("%d %d %d %X %X %X\n", s->mb_width, mb_x, mb_y,(int)s, (int)s->mb_var, (int)s->mc_mb_var); fflush(stdout);
1083 pic->mb_var [s->mb_stride * mb_y + mb_x] = varc; 1204 pic->mb_var [s->mb_stride * mb_y + mb_x] = varc;
1084 pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = vard; 1205 pic->mc_mb_var[s->mb_stride * mb_y + mb_x] = vard;
1085 pic->mb_mean [s->mb_stride * mb_y + mb_x] = (sum+128)>>8; 1206 pic->mb_mean [s->mb_stride * mb_y + mb_x] = (sum+128)>>8;
1097 s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); 1218 s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1098 else 1219 else
1099 s->scene_change_score+= s->qscale; 1220 s->scene_change_score+= s->qscale;
1100 1221
1101 if (vard*2 + 200 > varc) 1222 if (vard*2 + 200 > varc)
1102 mb_type|= MB_TYPE_INTRA; 1223 mb_type|= CANDIDATE_MB_TYPE_INTRA;
1103 if (varc*2 + 200 > vard){ 1224 if (varc*2 + 200 > vard){
1104 mb_type|= MB_TYPE_INTER; 1225 mb_type|= CANDIDATE_MB_TYPE_INTER;
1105 s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 1226 s->me.sub_motion_search(s, &mx, &my, dmin,
1106 pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); 1227 pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1107 if(s->flags&CODEC_FLAG_MV0) 1228 if(s->flags&CODEC_FLAG_MV0)
1108 if(mx || my) 1229 if(mx || my)
1109 mb_type |= MB_TYPE_SKIPED; //FIXME check difference 1230 mb_type |= CANDIDATE_MB_TYPE_SKIPED; //FIXME check difference
1110 }else{ 1231 }else{
1111 mx <<=shift; 1232 mx <<=shift;
1112 my <<=shift; 1233 my <<=shift;
1113 } 1234 }
1114 if((s->flags&CODEC_FLAG_4MV) 1235 if((s->flags&CODEC_FLAG_4MV)
1115 && !s->me.skip && varc>50 && vard>10){ 1236 && !s->me.skip && varc>50 && vard>10){
1116 if(h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift) < INT_MAX) 1237 if(h263_mv4_search(s, mx, my, shift) < INT_MAX)
1117 mb_type|=MB_TYPE_INTER4V; 1238 mb_type|=CANDIDATE_MB_TYPE_INTER4V;
1118 1239
1119 set_p_mv_tables(s, mx, my, 0); 1240 set_p_mv_tables(s, mx, my, 0);
1120 }else 1241 }else
1121 set_p_mv_tables(s, mx, my, 1); 1242 set_p_mv_tables(s, mx, my, 1);
1243 if((s->flags&CODEC_FLAG_INTERLACED_ME)
1244 && !s->me.skip){ //FIXME varc/d checks
1245 if(interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my) < INT_MAX)
1246 mb_type |= CANDIDATE_MB_TYPE_INTER_I;
1247 }
1122 }else{ 1248 }else{
1123 int intra_score, i; 1249 int intra_score, i;
1124 mb_type= MB_TYPE_INTER; 1250 mb_type= CANDIDATE_MB_TYPE_INTER;
1125 1251
1126 dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 1252 dmin= s->me.sub_motion_search(s, &mx, &my, dmin,
1127 pred_x, pred_y, &s->last_picture, 0, 0, mv_penalty); 1253 pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1128
1129 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) 1254 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1130 dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, &s->last_picture, mv_penalty); 1255 dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty);
1131 1256
1132 if((s->flags&CODEC_FLAG_4MV) 1257 if((s->flags&CODEC_FLAG_4MV)
1133 && !s->me.skip && varc>50 && vard>10){ 1258 && !s->me.skip && varc>50 && vard>10){
1134 int dmin4= h263_mv4_search(s, rel_xmin, rel_ymin, rel_xmax, rel_ymax, mx, my, shift); 1259 int dmin4= h263_mv4_search(s, mx, my, shift);
1135 if(dmin4 < dmin){ 1260 if(dmin4 < dmin){
1136 mb_type= MB_TYPE_INTER4V; 1261 mb_type= CANDIDATE_MB_TYPE_INTER4V;
1137 dmin=dmin4; 1262 dmin=dmin4;
1138 } 1263 }
1139 } 1264 }
1265 if((s->flags&CODEC_FLAG_INTERLACED_ME)
1266 && !s->me.skip){ //FIXME varc/d checks
1267 int dmin_i= interlaced_search(s, src_data, ref_data, s->p_field_mv_table, s->p_field_select_table, s->f_code, mx, my);
1268 if(dmin_i < dmin){
1269 mb_type = CANDIDATE_MB_TYPE_INTER_I;
1270 dmin= dmin_i;
1271 }
1272 }
1140 1273
1141 // pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin; 1274 // pic->mb_cmp_score[s->mb_stride * mb_y + mb_x] = dmin;
1142 set_p_mv_tables(s, mx, my, mb_type!=MB_TYPE_INTER4V); 1275 set_p_mv_tables(s, mx, my, mb_type!=CANDIDATE_MB_TYPE_INTER4V);
1143 1276
1144 /* get intra luma score */ 1277 /* get intra luma score */
1145 if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){ 1278 if((s->avctx->mb_cmp&0xFF)==FF_CMP_SSE){
1146 intra_score= (varc<<8) - 500; //FIXME dont scale it down so we dont have to fix it 1279 intra_score= (varc<<8) - 500; //FIXME dont scale it down so we dont have to fix it
1147 }else{ 1280 }else{
1153 *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean; 1286 *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 4]) = mean;
1154 *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean; 1287 *(uint32_t*)(&s->me.scratchpad[i*s->linesize+ 8]) = mean;
1155 *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean; 1288 *(uint32_t*)(&s->me.scratchpad[i*s->linesize+12]) = mean;
1156 } 1289 }
1157 1290
1158 intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize); 1291 intra_score= s->dsp.mb_cmp[0](s, s->me.scratchpad, pix, s->linesize, 16);
1159 } 1292 }
1160 #if 0 //FIXME 1293 #if 0 //FIXME
1161 /* get chroma score */ 1294 /* get chroma score */
1162 if(s->avctx->mb_cmp&FF_CMP_CHROMA){ 1295 if(s->avctx->mb_cmp&FF_CMP_CHROMA){
1163 for(i=1; i<3; i++){ 1296 for(i=1; i<3; i++){
1182 } 1315 }
1183 #endif 1316 #endif
1184 intra_score += s->me.mb_penalty_factor*16; 1317 intra_score += s->me.mb_penalty_factor*16;
1185 1318
1186 if(intra_score < dmin){ 1319 if(intra_score < dmin){
1187 mb_type= MB_TYPE_INTRA; 1320 mb_type= CANDIDATE_MB_TYPE_INTRA;
1188 s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= MB_TYPE_INTRA; //FIXME cleanup 1321 s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= CANDIDATE_MB_TYPE_INTRA; //FIXME cleanup
1189 }else 1322 }else
1190 s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0; 1323 s->current_picture.mb_type[mb_y*s->mb_stride + mb_x]= 0;
1191 1324
1192 if (vard <= 64 || vard < varc) { //FIXME 1325 if (vard <= 64 || vard < varc) { //FIXME
1193 s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc); 1326 s->scene_change_score+= ff_sqrt(vard) - ff_sqrt(varc);
1200 } 1333 }
1201 1334
1202 int ff_pre_estimate_p_frame_motion(MpegEncContext * s, 1335 int ff_pre_estimate_p_frame_motion(MpegEncContext * s,
1203 int mb_x, int mb_y) 1336 int mb_x, int mb_y)
1204 { 1337 {
1205 int mx, my, range, dmin; 1338 int mx, my, dmin;
1206 int xmin, ymin, xmax, ymax;
1207 int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
1208 int pred_x=0, pred_y=0; 1339 int pred_x=0, pred_y=0;
1209 int P[10][2]; 1340 int P[10][2];
1210 const int shift= 1+s->quarter_sample; 1341 const int shift= 1+s->quarter_sample;
1211 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; 1342 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV;
1212 const int xy= mb_x + mb_y*s->mb_stride; 1343 const int xy= mb_x + mb_y*s->mb_stride;
1344 const int stride= s->linesize;
1345 const int uvstride= s->uvlinesize;
1346 uint8_t *src_data[3]= {
1347 s->new_picture.data[0] + 16*(mb_x + stride*mb_y),
1348 s->new_picture.data[1] + 8*(mb_x + uvstride*mb_y),
1349 s->new_picture.data[2] + 8*(mb_x + uvstride*mb_y)
1350 };
1351 uint8_t *ref_data[3]= {
1352 s->last_picture.data[0] + 16*(mb_x + stride*mb_y),
1353 s->last_picture.data[1] + 8*(mb_x + uvstride*mb_y),
1354 s->last_picture.data[2] + 8*(mb_x + uvstride*mb_y)
1355 };
1213 1356
1214 assert(s->quarter_sample==0 || s->quarter_sample==1); 1357 assert(s->quarter_sample==0 || s->quarter_sample==1);
1215 1358
1216 s->me.pre_penalty_factor = get_penalty_factor(s, s->avctx->me_pre_cmp); 1359 s->me.pre_penalty_factor = get_penalty_factor(s, s->avctx->me_pre_cmp);
1217 1360
1218 get_limits(s, &range, &xmin, &ymin, &xmax, &ymax); 1361 get_limits(s, 16*mb_x, 16*mb_y);
1219 rel_xmin= xmin - mb_x*16;
1220 rel_xmax= xmax - mb_x*16;
1221 rel_ymin= ymin - mb_y*16;
1222 rel_ymax= ymax - mb_y*16;
1223 s->me.skip=0; 1362 s->me.skip=0;
1224 1363
1225 P_LEFT[0] = s->p_mv_table[xy + 1][0]; 1364 P_LEFT[0] = s->p_mv_table[xy + 1][0];
1226 P_LEFT[1] = s->p_mv_table[xy + 1][1]; 1365 P_LEFT[1] = s->p_mv_table[xy + 1][1];
1227 1366
1228 if(P_LEFT[0] < (rel_xmin<<shift)) P_LEFT[0] = (rel_xmin<<shift); 1367 if(P_LEFT[0] < (s->me.xmin<<shift)) P_LEFT[0] = (s->me.xmin<<shift);
1229 1368
1230 /* special case for first line */ 1369 /* special case for first line */
1231 if (mb_y == s->mb_height-1) { 1370 if (mb_y == s->mb_height-1) {
1232 pred_x= P_LEFT[0]; 1371 pred_x= P_LEFT[0];
1233 pred_y= P_LEFT[1]; 1372 pred_y= P_LEFT[1];
1236 } else { 1375 } else {
1237 P_TOP[0] = s->p_mv_table[xy + s->mb_stride ][0]; 1376 P_TOP[0] = s->p_mv_table[xy + s->mb_stride ][0];
1238 P_TOP[1] = s->p_mv_table[xy + s->mb_stride ][1]; 1377 P_TOP[1] = s->p_mv_table[xy + s->mb_stride ][1];
1239 P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0]; 1378 P_TOPRIGHT[0] = s->p_mv_table[xy + s->mb_stride - 1][0];
1240 P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1]; 1379 P_TOPRIGHT[1] = s->p_mv_table[xy + s->mb_stride - 1][1];
1241 if(P_TOP[1] < (rel_ymin<<shift)) P_TOP[1] = (rel_ymin<<shift); 1380 if(P_TOP[1] < (s->me.ymin<<shift)) P_TOP[1] = (s->me.ymin<<shift);
1242 if(P_TOPRIGHT[0] > (rel_xmax<<shift)) P_TOPRIGHT[0]= (rel_xmax<<shift); 1381 if(P_TOPRIGHT[0] > (s->me.xmax<<shift)) P_TOPRIGHT[0]= (s->me.xmax<<shift);
1243 if(P_TOPRIGHT[1] < (rel_ymin<<shift)) P_TOPRIGHT[1]= (rel_ymin<<shift); 1382 if(P_TOPRIGHT[1] < (s->me.ymin<<shift)) P_TOPRIGHT[1]= (s->me.ymin<<shift);
1244 1383
1245 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 1384 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1246 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 1385 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1247 1386
1248 pred_x = P_MEDIAN[0]; 1387 pred_x = P_MEDIAN[0];
1249 pred_y = P_MEDIAN[1]; 1388 pred_y = P_MEDIAN[1];
1250 } 1389 }
1251 dmin = s->me.pre_motion_search(s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 1390 dmin = s->me.pre_motion_search(s, &mx, &my, P, pred_x, pred_y,
1252 &s->last_picture, s->p_mv_table, (1<<16)>>shift, mv_penalty); 1391 src_data, ref_data, stride, uvstride, s->p_mv_table, (1<<16)>>shift, mv_penalty);
1253 1392
1254 s->p_mv_table[xy][0] = mx<<shift; 1393 s->p_mv_table[xy][0] = mx<<shift;
1255 s->p_mv_table[xy][1] = my<<shift; 1394 s->p_mv_table[xy][1] = my<<shift;
1256 1395
1257 return dmin; 1396 return dmin;
1258 } 1397 }
1259 1398
1260 static int ff_estimate_motion_b(MpegEncContext * s, 1399 static int ff_estimate_motion_b(MpegEncContext * s,
1261 int mb_x, int mb_y, int16_t (*mv_table)[2], Picture *picture, int f_code) 1400 int mb_x, int mb_y, int16_t (*mv_table)[2], uint8_t *src_data[3],
1262 { 1401 uint8_t *ref_data[3], int stride, int uvstride, int f_code)
1263 int mx, my, range, dmin; 1402 {
1264 int xmin, ymin, xmax, ymax; 1403 int mx, my, dmin;
1265 int rel_xmin, rel_ymin, rel_xmax, rel_ymax;
1266 int pred_x=0, pred_y=0; 1404 int pred_x=0, pred_y=0;
1267 int P[10][2]; 1405 int P[10][2];
1268 const int shift= 1+s->quarter_sample; 1406 const int shift= 1+s->quarter_sample;
1269 const int mot_stride = s->mb_stride; 1407 const int mot_stride = s->mb_stride;
1270 const int mot_xy = mb_y*mot_stride + mb_x; 1408 const int mot_xy = mb_y*mot_stride + mb_x;
1271 uint8_t * const ref_picture= picture->data[0]; 1409 uint8_t * const ref_picture= ref_data[0] - 16*s->mb_x - 16*s->mb_y*s->linesize; //FIXME ugly
1272 uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV; 1410 uint8_t * const mv_penalty= s->me.mv_penalty[f_code] + MAX_MV;
1273 int mv_scale; 1411 int mv_scale;
1274 1412
1275 s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp); 1413 s->me.penalty_factor = get_penalty_factor(s, s->avctx->me_cmp);
1276 s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp); 1414 s->me.sub_penalty_factor= get_penalty_factor(s, s->avctx->me_sub_cmp);
1277 s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp); 1415 s->me.mb_penalty_factor = get_penalty_factor(s, s->avctx->mb_cmp);
1278 1416
1279 get_limits(s, &range, &xmin, &ymin, &xmax, &ymax); 1417 get_limits(s, 16*mb_x, 16*mb_y);
1280 rel_xmin= xmin - mb_x*16;
1281 rel_xmax= xmax - mb_x*16;
1282 rel_ymin= ymin - mb_y*16;
1283 rel_ymax= ymax - mb_y*16;
1284 1418
1285 switch(s->me_method) { 1419 switch(s->me_method) {
1286 case ME_ZERO: 1420 case ME_ZERO:
1287 default: 1421 default:
1288 no_motion_search(s, &mx, &my); 1422 no_motion_search(s, &mx, &my);
1289 dmin = 0; 1423 dmin = 0;
1290 mx-= mb_x*16; 1424 mx-= mb_x*16;
1291 my-= mb_y*16; 1425 my-= mb_y*16;
1292 break; 1426 break;
1427 #if 0
1293 case ME_FULL: 1428 case ME_FULL:
1294 dmin = full_motion_search(s, &mx, &my, range, xmin, ymin, xmax, ymax, ref_picture); 1429 dmin = full_motion_search(s, &mx, &my, range, ref_picture);
1295 mx-= mb_x*16; 1430 mx-= mb_x*16;
1296 my-= mb_y*16; 1431 my-= mb_y*16;
1297 break; 1432 break;
1298 case ME_LOG: 1433 case ME_LOG:
1299 dmin = log_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); 1434 dmin = log_motion_search(s, &mx, &my, range / 2, ref_picture);
1300 mx-= mb_x*16; 1435 mx-= mb_x*16;
1301 my-= mb_y*16; 1436 my-= mb_y*16;
1302 break; 1437 break;
1303 case ME_PHODS: 1438 case ME_PHODS:
1304 dmin = phods_motion_search(s, &mx, &my, range / 2, xmin, ymin, xmax, ymax, ref_picture); 1439 dmin = phods_motion_search(s, &mx, &my, range / 2, ref_picture);
1305 mx-= mb_x*16; 1440 mx-= mb_x*16;
1306 my-= mb_y*16; 1441 my-= mb_y*16;
1307 break; 1442 break;
1443 #endif
1308 case ME_X1: 1444 case ME_X1:
1309 case ME_EPZS: 1445 case ME_EPZS:
1310 { 1446 {
1311 P_LEFT[0] = mv_table[mot_xy - 1][0]; 1447 P_LEFT[0] = mv_table[mot_xy - 1][0];
1312 P_LEFT[1] = mv_table[mot_xy - 1][1]; 1448 P_LEFT[1] = mv_table[mot_xy - 1][1];
1313 1449
1314 if(P_LEFT[0] > (rel_xmax<<shift)) P_LEFT[0] = (rel_xmax<<shift); 1450 if(P_LEFT[0] > (s->me.xmax<<shift)) P_LEFT[0] = (s->me.xmax<<shift);
1315 1451
1316 /* special case for first line */ 1452 /* special case for first line */
1317 if (mb_y) { 1453 if (mb_y) {
1318 P_TOP[0] = mv_table[mot_xy - mot_stride ][0]; 1454 P_TOP[0] = mv_table[mot_xy - mot_stride ][0];
1319 P_TOP[1] = mv_table[mot_xy - mot_stride ][1]; 1455 P_TOP[1] = mv_table[mot_xy - mot_stride ][1];
1320 P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0]; 1456 P_TOPRIGHT[0] = mv_table[mot_xy - mot_stride + 1 ][0];
1321 P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1]; 1457 P_TOPRIGHT[1] = mv_table[mot_xy - mot_stride + 1 ][1];
1322 if(P_TOP[1] > (rel_ymax<<shift)) P_TOP[1]= (rel_ymax<<shift); 1458 if(P_TOP[1] > (s->me.ymax<<shift)) P_TOP[1]= (s->me.ymax<<shift);
1323 if(P_TOPRIGHT[0] < (rel_xmin<<shift)) P_TOPRIGHT[0]= (rel_xmin<<shift); 1459 if(P_TOPRIGHT[0] < (s->me.xmin<<shift)) P_TOPRIGHT[0]= (s->me.xmin<<shift);
1324 if(P_TOPRIGHT[1] > (rel_ymax<<shift)) P_TOPRIGHT[1]= (rel_ymax<<shift); 1460 if(P_TOPRIGHT[1] > (s->me.ymax<<shift)) P_TOPRIGHT[1]= (s->me.ymax<<shift);
1325 1461
1326 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]); 1462 P_MEDIAN[0]= mid_pred(P_LEFT[0], P_TOP[0], P_TOPRIGHT[0]);
1327 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 1463 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1328 } 1464 }
1329 pred_x= P_LEFT[0]; 1465 pred_x= P_LEFT[0];
1334 mv_scale= (s->pb_time<<16) / (s->pp_time<<shift); 1470 mv_scale= (s->pb_time<<16) / (s->pp_time<<shift);
1335 }else{ 1471 }else{
1336 mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift); 1472 mv_scale= ((s->pb_time - s->pp_time)<<16) / (s->pp_time<<shift);
1337 } 1473 }
1338 1474
1339 dmin = s->me.motion_search[0](s, 0, &mx, &my, P, pred_x, pred_y, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 1475 dmin = s->me.motion_search[0](s, &mx, &my, P, pred_x, pred_y,
1340 picture, s->p_mv_table, mv_scale, mv_penalty); 1476 src_data, ref_data, stride, uvstride, s->p_mv_table, mv_scale, mv_penalty);
1341 1477
1342 break; 1478 break;
1343 } 1479 }
1344 1480
1345 dmin= s->me.sub_motion_search(s, &mx, &my, dmin, rel_xmin, rel_ymin, rel_xmax, rel_ymax, 1481 dmin= s->me.sub_motion_search(s, &mx, &my, dmin,
1346 pred_x, pred_y, picture, 0, 0, mv_penalty); 1482 pred_x, pred_y, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1347 1483
1348 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) 1484 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1349 dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, picture, mv_penalty); 1485 dmin= s->me.get_mb_score(s, mx, my, pred_x, pred_y, src_data, ref_data, stride, uvstride, mv_penalty);
1350 1486
1351 //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my); 1487 //printf("%d %d %d %d//", s->mb_x, s->mb_y, mx, my);
1352 // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type; 1488 // s->mb_type[mb_y*s->mb_width + mb_x]= mb_type;
1353 mv_table[mot_xy][0]= mx; 1489 mv_table[mot_xy][0]= mx;
1354 mv_table[mot_xy][1]= my; 1490 mv_table[mot_xy][1]= my;
1355 1491
1356 return dmin; 1492 return dmin;
1357 } 1493 }
1358 1494
1359 static inline int check_bidir_mv(MpegEncContext * s, 1495 static inline int check_bidir_mv(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6],
1360 int mb_x, int mb_y, 1496 int stride, int uvstride,
1361 int motion_fx, int motion_fy, 1497 int motion_fx, int motion_fy,
1362 int motion_bx, int motion_by, 1498 int motion_bx, int motion_by,
1363 int pred_fx, int pred_fy, 1499 int pred_fx, int pred_fy,
1364 int pred_bx, int pred_by) 1500 int pred_bx, int pred_by,
1501 int size, int h)
1365 { 1502 {
1366 //FIXME optimize? 1503 //FIXME optimize?
1367 //FIXME move into template? 1504 //FIXME move into template?
1368 //FIXME better f_code prediction (max mv & distance) 1505 //FIXME better f_code prediction (max mv & distance)
1506 //FIXME pointers
1369 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame 1507 uint8_t * const mv_penalty= s->me.mv_penalty[s->f_code] + MAX_MV; // f_code of the prev frame
1370 uint8_t *dest_y = s->me.scratchpad; 1508 uint8_t *dest_y = s->me.scratchpad;
1371 uint8_t *ptr; 1509 uint8_t *ptr;
1372 int dxy; 1510 int dxy;
1373 int src_x, src_y; 1511 int src_x, src_y;
1374 int fbmin; 1512 int fbmin;
1375 1513
1376 if(s->quarter_sample){ 1514 if(s->quarter_sample){
1377 dxy = ((motion_fy & 3) << 2) | (motion_fx & 3); 1515 dxy = ((motion_fy & 3) << 2) | (motion_fx & 3);
1378 src_x = mb_x * 16 + (motion_fx >> 2); 1516 src_x = motion_fx >> 2;
1379 src_y = mb_y * 16 + (motion_fy >> 2); 1517 src_y = motion_fy >> 2;
1380 assert(src_x >=-16 && src_x<=s->h_edge_pos); 1518
1381 assert(src_y >=-16 && src_y<=s->v_edge_pos); 1519 ptr = ref_data[0] + (src_y * stride) + src_x;
1382 1520 s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , stride);
1383 ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1384 s->dsp.put_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize);
1385 1521
1386 dxy = ((motion_by & 3) << 2) | (motion_bx & 3); 1522 dxy = ((motion_by & 3) << 2) | (motion_bx & 3);
1387 src_x = mb_x * 16 + (motion_bx >> 2); 1523 src_x = motion_bx >> 2;
1388 src_y = mb_y * 16 + (motion_by >> 2); 1524 src_y = motion_by >> 2;
1389 assert(src_x >=-16 && src_x<=s->h_edge_pos); 1525
1390 assert(src_y >=-16 && src_y<=s->v_edge_pos); 1526 ptr = ref_data[3] + (src_y * stride) + src_x;
1391 1527 s->dsp.avg_qpel_pixels_tab[size][dxy](dest_y , ptr , stride);
1392 ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
1393 s->dsp.avg_qpel_pixels_tab[0][dxy](dest_y , ptr , s->linesize);
1394 }else{ 1528 }else{
1395 dxy = ((motion_fy & 1) << 1) | (motion_fx & 1); 1529 dxy = ((motion_fy & 1) << 1) | (motion_fx & 1);
1396 src_x = mb_x * 16 + (motion_fx >> 1); 1530 src_x = motion_fx >> 1;
1397 src_y = mb_y * 16 + (motion_fy >> 1); 1531 src_y = motion_fy >> 1;
1398 assert(src_x >=-16 && src_x<=s->h_edge_pos); 1532
1399 assert(src_y >=-16 && src_y<=s->v_edge_pos); 1533 ptr = ref_data[0] + (src_y * stride) + src_x;
1400 1534 s->dsp.put_pixels_tab[size][dxy](dest_y , ptr , stride, h);
1401 ptr = s->last_picture.data[0] + (src_y * s->linesize) + src_x;
1402 s->dsp.put_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
1403 1535
1404 dxy = ((motion_by & 1) << 1) | (motion_bx & 1); 1536 dxy = ((motion_by & 1) << 1) | (motion_bx & 1);
1405 src_x = mb_x * 16 + (motion_bx >> 1); 1537 src_x = motion_bx >> 1;
1406 src_y = mb_y * 16 + (motion_by >> 1); 1538 src_y = motion_by >> 1;
1407 assert(src_x >=-16 && src_x<=s->h_edge_pos); 1539
1408 assert(src_y >=-16 && src_y<=s->v_edge_pos); 1540 ptr = ref_data[3] + (src_y * stride) + src_x;
1409 1541 s->dsp.avg_pixels_tab[size][dxy](dest_y , ptr , stride, h);
1410 ptr = s->next_picture.data[0] + (src_y * s->linesize) + src_x;
1411 s->dsp.avg_pixels_tab[0][dxy](dest_y , ptr , s->linesize, 16);
1412 } 1542 }
1413 1543
1414 fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor 1544 fbmin = (mv_penalty[motion_fx-pred_fx] + mv_penalty[motion_fy-pred_fy])*s->me.mb_penalty_factor
1415 +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor 1545 +(mv_penalty[motion_bx-pred_bx] + mv_penalty[motion_by-pred_by])*s->me.mb_penalty_factor
1416 + s->dsp.mb_cmp[0](s, s->new_picture.data[0] + mb_x*16 + mb_y*16*s->linesize, dest_y, s->linesize); 1546 + s->dsp.mb_cmp[size](s, src_data[0], dest_y, stride, h); //FIXME new_pic
1417 1547
1418 if(s->avctx->mb_cmp&FF_CMP_CHROMA){ 1548 if(s->avctx->mb_cmp&FF_CMP_CHROMA){
1419 } 1549 }
1420 //FIXME CHROMA !!! 1550 //FIXME CHROMA !!!
1421 1551
1422 return fbmin; 1552 return fbmin;
1423 } 1553 }
1424 1554
1425 /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/ 1555 /* refine the bidir vectors in hq mode and return the score in both lq & hq mode*/
1426 static inline int bidir_refine(MpegEncContext * s, 1556 static inline int bidir_refine(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6],
1557 int stride, int uvstride,
1427 int mb_x, int mb_y) 1558 int mb_x, int mb_y)
1428 { 1559 {
1429 const int mot_stride = s->mb_stride; 1560 const int mot_stride = s->mb_stride;
1430 const int xy = mb_y *mot_stride + mb_x; 1561 const int xy = mb_y *mot_stride + mb_x;
1431 int fbmin; 1562 int fbmin;
1438 int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0]; 1569 int motion_bx= s->b_bidir_back_mv_table[xy][0]= s->b_back_mv_table[xy][0];
1439 int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1]; 1570 int motion_by= s->b_bidir_back_mv_table[xy][1]= s->b_back_mv_table[xy][1];
1440 1571
1441 //FIXME do refinement and add flag 1572 //FIXME do refinement and add flag
1442 1573
1443 fbmin= check_bidir_mv(s, mb_x, mb_y, 1574 fbmin= check_bidir_mv(s, src_data, ref_data, stride, uvstride,
1444 motion_fx, motion_fy, 1575 motion_fx, motion_fy,
1445 motion_bx, motion_by, 1576 motion_bx, motion_by,
1446 pred_fx, pred_fy, 1577 pred_fx, pred_fy,
1447 pred_bx, pred_by); 1578 pred_bx, pred_by,
1579 0, 16);
1448 1580
1449 return fbmin; 1581 return fbmin;
1450 } 1582 }
1451 1583
1452 static inline int direct_search(MpegEncContext * s, 1584 static inline int direct_search(MpegEncContext * s, uint8_t *src_data[3], uint8_t *ref_data[6],
1585 int stride, int uvstride,
1453 int mb_x, int mb_y) 1586 int mb_x, int mb_y)
1454 { 1587 {
1455 int P[10][2]; 1588 int P[10][2];
1456 const int mot_stride = s->mb_stride; 1589 const int mot_stride = s->mb_stride;
1457 const int mot_xy = mb_y*mot_stride + mb_x; 1590 const int mot_xy = mb_y*mot_stride + mb_x;
1506 s->b_direct_mv_table[mot_xy][0]= 0; 1639 s->b_direct_mv_table[mot_xy][0]= 0;
1507 s->b_direct_mv_table[mot_xy][1]= 0; 1640 s->b_direct_mv_table[mot_xy][1]= 0;
1508 1641
1509 return 256*256*256*64; 1642 return 256*256*256*64;
1510 } 1643 }
1644
1645 s->me.xmin= xmin;
1646 s->me.ymin= ymin;
1647 s->me.xmax= xmax;
1648 s->me.ymax= ymax;
1511 1649
1512 P_LEFT[0] = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift); 1650 P_LEFT[0] = clip(mv_table[mot_xy - 1][0], xmin<<shift, xmax<<shift);
1513 P_LEFT[1] = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift); 1651 P_LEFT[1] = clip(mv_table[mot_xy - 1][1], ymin<<shift, ymax<<shift);
1514 1652
1515 /* special case for first line */ 1653 /* special case for first line */
1523 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]); 1661 P_MEDIAN[1]= mid_pred(P_LEFT[1], P_TOP[1], P_TOPRIGHT[1]);
1524 } 1662 }
1525 1663
1526 //FIXME direct_search ptr in context!!! (needed for chroma anyway or this will get messy) 1664 //FIXME direct_search ptr in context!!! (needed for chroma anyway or this will get messy)
1527 if(s->flags&CODEC_FLAG_QPEL){ 1665 if(s->flags&CODEC_FLAG_QPEL){
1528 dmin = simple_direct_qpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, 1666 dmin = simple_direct_qpel_epzs_motion_search(s, &mx, &my, P, 0, 0,
1529 &s->last_picture, mv_table, 1<<14, mv_penalty); 1667 src_data, ref_data, stride, uvstride, mv_table, 1<<14, mv_penalty);
1530 dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, 1668 dmin = simple_direct_qpel_qpel_motion_search(s, &mx, &my, dmin,
1531 0, 0, &s->last_picture, 0, 0, mv_penalty); 1669 0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1532 1670
1533 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) 1671 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1534 dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty); 1672 dmin= simple_direct_qpel_qpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty);
1535 }else{ 1673 }else{
1536 dmin = simple_direct_hpel_epzs_motion_search(s, 0, &mx, &my, P, 0, 0, xmin, ymin, xmax, ymax, 1674 dmin = simple_direct_hpel_epzs_motion_search(s, &mx, &my, P, 0, 0,
1537 &s->last_picture, mv_table, 1<<15, mv_penalty); 1675 src_data, ref_data, stride, uvstride, mv_table, 1<<15, mv_penalty);
1538 dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin, xmin, ymin, xmax, ymax, 1676 dmin = simple_direct_hpel_hpel_motion_search(s, &mx, &my, dmin,
1539 0, 0, &s->last_picture, 0, 0, mv_penalty); 1677 0, 0, src_data, ref_data, stride, uvstride, 0, 16, mv_penalty);
1540 1678
1541 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip) 1679 if(s->avctx->me_sub_cmp != s->avctx->mb_cmp && !s->me.skip)
1542 dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, &s->last_picture, mv_penalty); 1680 dmin= simple_direct_hpel_hpel_get_mb_score(s, mx, my, 0, 0, src_data, ref_data, stride, uvstride, mv_penalty);
1543 } 1681 }
1682
1683 get_limits(s, 16*mb_x, 16*mb_y); //restore s->me.?min/max, maybe not needed
1544 1684
1545 s->b_direct_mv_table[mot_xy][0]= mx; 1685 s->b_direct_mv_table[mot_xy][0]= mx;
1546 s->b_direct_mv_table[mot_xy][1]= my; 1686 s->b_direct_mv_table[mot_xy][1]= my;
1547 return dmin; 1687 return dmin;
1548 } 1688 }
1549 1689
1550 void ff_estimate_b_frame_motion(MpegEncContext * s, 1690 void ff_estimate_b_frame_motion(MpegEncContext * s,
1551 int mb_x, int mb_y) 1691 int mb_x, int mb_y)
1552 { 1692 {
1553 const int penalty_factor= s->me.mb_penalty_factor; 1693 const int penalty_factor= s->me.mb_penalty_factor;
1554 int fmin, bmin, dmin, fbmin; 1694 int fmin, bmin, dmin, fbmin, bimin, fimin;
1555 int type=0; 1695 int type=0;
1696 const int stride= s->linesize;
1697 const int uvstride= s->uvlinesize;
1698 uint8_t *src_data[3]= {
1699 s->new_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
1700 s->new_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
1701 s->new_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y)
1702 };
1703 uint8_t *ref_data[6]= {
1704 s->last_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
1705 s->last_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
1706 s->last_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y),
1707 s->next_picture.data[0] + 16*(s->mb_x + stride*s->mb_y),
1708 s->next_picture.data[1] + 8*(s->mb_x + uvstride*s->mb_y),
1709 s->next_picture.data[2] + 8*(s->mb_x + uvstride*s->mb_y)
1710 };
1556 1711
1557 s->me.skip=0; 1712 s->me.skip=0;
1558 if (s->codec_id == CODEC_ID_MPEG4) 1713 if (s->codec_id == CODEC_ID_MPEG4)
1559 dmin= direct_search(s, mb_x, mb_y); 1714 dmin= direct_search(s, src_data, ref_data, stride, uvstride, mb_x, mb_y);
1560 else 1715 else
1561 dmin= INT_MAX; 1716 dmin= INT_MAX;
1562 1717 //FIXME penalty stuff for non mpeg4
1563 s->me.skip=0; 1718 s->me.skip=0;
1564 fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, &s->last_picture, s->f_code) + 3*penalty_factor; 1719 fmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_forw_mv_table, src_data,
1720 ref_data, stride, uvstride, s->f_code) + 3*penalty_factor;
1565 1721
1566 s->me.skip=0; 1722 s->me.skip=0;
1567 bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, &s->next_picture, s->b_code) + 2*penalty_factor; 1723 bmin= ff_estimate_motion_b(s, mb_x, mb_y, s->b_back_mv_table, src_data,
1724 ref_data+3, stride, uvstride, s->b_code) + 2*penalty_factor;
1568 //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]); 1725 //printf(" %d %d ", s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1569 1726
1570 s->me.skip=0; 1727 s->me.skip=0;
1571 fbmin= bidir_refine(s, mb_x, mb_y) + penalty_factor; 1728 fbmin= bidir_refine(s, src_data, ref_data, stride, uvstride, mb_x, mb_y) + penalty_factor;
1572 //printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin); 1729 //printf("%d %d %d %d\n", dmin, fmin, bmin, fbmin);
1730
1731 if(s->flags & CODEC_FLAG_INTERLACED_ME){
1732 const int xy = mb_y*s->mb_stride + mb_x;
1733
1734 //FIXME mb type penalty
1735 s->me.skip=0;
1736 fimin= interlaced_search(s, src_data, ref_data ,
1737 s->b_field_mv_table[0], s->b_field_select_table[0], s->f_code,
1738 s->b_forw_mv_table[xy][0], s->b_forw_mv_table[xy][1]);
1739 bimin= interlaced_search(s, src_data, ref_data+3,
1740 s->b_field_mv_table[1], s->b_field_select_table[1], s->b_code,
1741 s->b_back_mv_table[xy][0], s->b_back_mv_table[xy][1]);
1742 }else
1743 fimin= bimin= INT_MAX;
1744
1573 { 1745 {
1574 int score= fmin; 1746 int score= fmin;
1575 type = MB_TYPE_FORWARD; 1747 type = CANDIDATE_MB_TYPE_FORWARD;
1576 1748
1577 if (dmin <= score){ 1749 if (dmin <= score){
1578 score = dmin; 1750 score = dmin;
1579 type = MB_TYPE_DIRECT; 1751 type = CANDIDATE_MB_TYPE_DIRECT;
1580 } 1752 }
1581 if(bmin<score){ 1753 if(bmin<score){
1582 score=bmin; 1754 score=bmin;
1583 type= MB_TYPE_BACKWARD; 1755 type= CANDIDATE_MB_TYPE_BACKWARD;
1584 } 1756 }
1585 if(fbmin<score){ 1757 if(fbmin<score){
1586 score=fbmin; 1758 score=fbmin;
1587 type= MB_TYPE_BIDIR; 1759 type= CANDIDATE_MB_TYPE_BIDIR;
1760 }
1761 if(fimin<score){
1762 score=fimin;
1763 type= CANDIDATE_MB_TYPE_FORWARD_I;
1764 }
1765 if(bimin<score){
1766 score=bimin;
1767 type= CANDIDATE_MB_TYPE_BACKWARD_I;
1588 } 1768 }
1589 1769
1590 score= ((unsigned)(score*score + 128*256))>>16; 1770 score= ((unsigned)(score*score + 128*256))>>16;
1591 s->current_picture.mc_mb_var_sum += score; 1771 s->current_picture.mc_mb_var_sum += score;
1592 s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE 1772 s->current_picture.mc_mb_var[mb_y*s->mb_stride + mb_x] = score; //FIXME use SSE
1593 } 1773 }
1594 1774
1595 if(s->avctx->mb_decision > FF_MB_DECISION_SIMPLE){ 1775 if(s->avctx->mb_decision > FF_MB_DECISION_SIMPLE){
1596 type= MB_TYPE_FORWARD | MB_TYPE_BACKWARD | MB_TYPE_BIDIR | MB_TYPE_DIRECT; //FIXME something smarter 1776 type= CANDIDATE_MB_TYPE_FORWARD | CANDIDATE_MB_TYPE_BACKWARD | CANDIDATE_MB_TYPE_BIDIR | CANDIDATE_MB_TYPE_DIRECT;
1597 if(dmin>256*256*16) type&= ~MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB 1777 if(fimin < INT_MAX)
1778 type |= CANDIDATE_MB_TYPE_FORWARD_I;
1779 if(bimin < INT_MAX)
1780 type |= CANDIDATE_MB_TYPE_BACKWARD_I;
1781 if(fimin < INT_MAX && bimin < INT_MAX){
1782 type |= CANDIDATE_MB_TYPE_BIDIR_I;
1783 }
1784 //FIXME something smarter
1785 if(dmin>256*256*16) type&= ~CANDIDATE_MB_TYPE_DIRECT; //dont try direct mode if its invalid for this MB
1598 } 1786 }
1599 1787
1600 s->mb_type[mb_y*s->mb_stride + mb_x]= type; 1788 s->mb_type[mb_y*s->mb_stride + mb_x]= type;
1601 } 1789 }
1602 1790
1659 1847
1660 if(s->msmpeg4_version) range= 16; 1848 if(s->msmpeg4_version) range= 16;
1661 1849
1662 if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range; 1850 if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range;
1663 1851
1664 /* clip / convert to intra 16x16 type MVs */
1665 for(y=0; y<s->mb_height; y++){
1666 int x;
1667 int xy= y*s->mb_stride;
1668 for(x=0; x<s->mb_width; x++){
1669 if(s->mb_type[xy]&MB_TYPE_INTER){
1670 if( s->p_mv_table[xy][0] >=range || s->p_mv_table[xy][0] <-range
1671 || s->p_mv_table[xy][1] >=range || s->p_mv_table[xy][1] <-range){
1672 s->mb_type[xy] &= ~MB_TYPE_INTER;
1673 s->mb_type[xy] |= MB_TYPE_INTRA;
1674 s->current_picture.mb_type[xy]= MB_TYPE_INTRA;
1675 s->p_mv_table[xy][0] = 0;
1676 s->p_mv_table[xy][1] = 0;
1677 }
1678 }
1679 xy++;
1680 }
1681 }
1682 //printf("%d no:%d %d//\n", clip, noclip, f_code); 1852 //printf("%d no:%d %d//\n", clip, noclip, f_code);
1683 if(s->flags&CODEC_FLAG_4MV){ 1853 if(s->flags&CODEC_FLAG_4MV){
1684 const int wrap= 2+ s->mb_width*2; 1854 const int wrap= 2+ s->mb_width*2;
1685 1855
1686 /* clip / convert to intra 8x8 type MVs */ 1856 /* clip / convert to intra 8x8 type MVs */
1688 int xy= (y*2 + 1)*wrap + 1; 1858 int xy= (y*2 + 1)*wrap + 1;
1689 int i= y*s->mb_stride; 1859 int i= y*s->mb_stride;
1690 int x; 1860 int x;
1691 1861
1692 for(x=0; x<s->mb_width; x++){ 1862 for(x=0; x<s->mb_width; x++){
1693 if(s->mb_type[i]&MB_TYPE_INTER4V){ 1863 if(s->mb_type[i]&CANDIDATE_MB_TYPE_INTER4V){
1694 int block; 1864 int block;
1695 for(block=0; block<4; block++){ 1865 for(block=0; block<4; block++){
1696 int off= (block& 1) + (block>>1)*wrap; 1866 int off= (block& 1) + (block>>1)*wrap;
1697 int mx= s->current_picture.motion_val[0][ xy + off ][0]; 1867 int mx= s->current_picture.motion_val[0][ xy + off ][0];
1698 int my= s->current_picture.motion_val[0][ xy + off ][1]; 1868 int my= s->current_picture.motion_val[0][ xy + off ][1];
1699 1869
1700 if( mx >=range || mx <-range 1870 if( mx >=range || mx <-range
1701 || my >=range || my <-range){ 1871 || my >=range || my <-range){
1702 s->mb_type[i] &= ~MB_TYPE_INTER4V; 1872 s->mb_type[i] &= ~CANDIDATE_MB_TYPE_INTER4V;
1703 s->mb_type[i] |= MB_TYPE_INTRA; 1873 s->mb_type[i] |= CANDIDATE_MB_TYPE_INTRA;
1704 s->current_picture.mb_type[i]= MB_TYPE_INTRA; 1874 s->current_picture.mb_type[i]= CANDIDATE_MB_TYPE_INTRA;
1705 } 1875 }
1706 } 1876 }
1707 } 1877 }
1708 xy+=2; 1878 xy+=2;
1709 i++; 1879 i++;
1710 } 1880 }
1711 } 1881 }
1712 } 1882 }
1713 } 1883 }
1714 1884
1715 void ff_fix_long_b_mvs(MpegEncContext * s, int16_t (*mv_table)[2], int f_code, int type) 1885 /**
1716 { 1886 *
1717 int y; 1887 * @param truncate 1 for truncation, 0 for using intra
1888 */
1889 void ff_fix_long_mvs(MpegEncContext * s, uint8_t *field_select_table, int field_select,
1890 int16_t (*mv_table)[2], int f_code, int type, int truncate)
1891 {
1892 int y, h_range, v_range;
1718 1893
1719 // RAL: 8 in MPEG-1, 16 in MPEG-4 1894 // RAL: 8 in MPEG-1, 16 in MPEG-4
1720 int range = (((s->out_format == FMT_MPEG1) ? 8 : 16) << f_code); 1895 int range = (((s->out_format == FMT_MPEG1) ? 8 : 16) << f_code);
1721 1896
1897 if(s->msmpeg4_version) range= 16;
1722 if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range; 1898 if(s->avctx->me_range && range > s->avctx->me_range) range= s->avctx->me_range;
1899
1900 h_range= range;
1901 v_range= field_select_table ? range>>1 : range;
1723 1902
1724 /* clip / convert to intra 16x16 type MVs */ 1903 /* clip / convert to intra 16x16 type MVs */
1725 for(y=0; y<s->mb_height; y++){ 1904 for(y=0; y<s->mb_height; y++){
1726 int x; 1905 int x;
1727 int xy= y*s->mb_stride; 1906 int xy= y*s->mb_stride;
1728 for(x=0; x<s->mb_width; x++){ 1907 for(x=0; x<s->mb_width; x++){
1729 if (s->mb_type[xy] & type){ // RAL: "type" test added... 1908 if (s->mb_type[xy] & type){ // RAL: "type" test added...
1730 if( mv_table[xy][0] >=range || mv_table[xy][0] <-range 1909 if(field_select_table==NULL || field_select_table[xy] == field_select){
1731 || mv_table[xy][1] >=range || mv_table[xy][1] <-range){ 1910 if( mv_table[xy][0] >=h_range || mv_table[xy][0] <-h_range
1732 1911 || mv_table[xy][1] >=v_range || mv_table[xy][1] <-v_range){
1733 if(s->codec_id == CODEC_ID_MPEG1VIDEO && 0){ 1912
1734 }else{ 1913 if(truncate){
1735 if (mv_table[xy][0] > range-1) mv_table[xy][0]= range-1; 1914 if (mv_table[xy][0] > h_range-1) mv_table[xy][0]= h_range-1;
1736 else if(mv_table[xy][0] < -range ) mv_table[xy][0]= -range; 1915 else if(mv_table[xy][0] < -h_range ) mv_table[xy][0]= -h_range;
1737 if (mv_table[xy][1] > range-1) mv_table[xy][1]= range-1; 1916 if (mv_table[xy][1] > v_range-1) mv_table[xy][1]= v_range-1;
1738 else if(mv_table[xy][1] < -range ) mv_table[xy][1]= -range; 1917 else if(mv_table[xy][1] < -v_range ) mv_table[xy][1]= -v_range;
1918 }else{
1919 s->mb_type[xy] &= ~type;
1920 s->mb_type[xy] |= CANDIDATE_MB_TYPE_INTRA;
1921 mv_table[xy][0]=
1922 mv_table[xy][1]= 0;
1923 }
1739 } 1924 }
1740 } 1925 }
1741 } 1926 }
1742 xy++; 1927 xy++;
1743 } 1928 }