Mercurial > libavcodec.hg
annotate motion_est_template.c @ 1352:e8ff4783f188 libavcodec
1) remove TBL support in PPC performance. It's much more useful to use the
PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless
code around
2) make the PPC perf stuff a configure option
3) make put_pixels16_altivec a bit faster by unrolling the loop by 4
patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author | michaelni |
---|---|
date | Wed, 09 Jul 2003 20:18:13 +0000 |
parents | ec946cb74397 |
children | a7a9df478e46 |
rev | line source |
---|---|
936 | 1 /* |
2 * Motion estimation | |
3 * Copyright (c) 2002 Michael Niedermayer | |
4 * | |
5 * This library is free software; you can redistribute it and/or | |
6 * modify it under the terms of the GNU Lesser General Public | |
7 * License as published by the Free Software Foundation; either | |
8 * version 2 of the License, or (at your option) any later version. | |
9 * | |
10 * This library is distributed in the hope that it will be useful, | |
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
13 * Lesser General Public License for more details. | |
14 * | |
15 * You should have received a copy of the GNU Lesser General Public | |
16 * License along with this library; if not, write to the Free Software | |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
18 * | |
19 */ | |
1106 | 20 |
21 /** | |
22 * @file motion_est_template.c | |
23 * Motion estimation template. | |
24 */ | |
936 | 25 |
26 //lets hope gcc will remove the unused vars ...(gcc 3.2.2 seems to do it ...) | |
27 //Note, the last line is there to kill these ugly unused var warnings | |
28 #define LOAD_COMMON(x, y)\ | |
29 uint32_t * const score_map= s->me.score_map;\ | |
30 const int stride= s->linesize;\ | |
31 const int uvstride= s->uvlinesize;\ | |
32 const int time_pp= s->pp_time;\ | |
33 const int time_pb= s->pb_time;\ | |
34 uint8_t * const src_y= s->new_picture.data[0] + ((y) * stride) + (x);\ | |
35 uint8_t * const src_u= s->new_picture.data[1] + (((y)>>1) * uvstride) + ((x)>>1);\ | |
36 uint8_t * const src_v= s->new_picture.data[2] + (((y)>>1) * uvstride) + ((x)>>1);\ | |
37 uint8_t * const ref_y= ref_picture->data[0] + ((y) * stride) + (x);\ | |
38 uint8_t * const ref_u= ref_picture->data[1] + (((y)>>1) * uvstride) + ((x)>>1);\ | |
39 uint8_t * const ref_v= ref_picture->data[2] + (((y)>>1) * uvstride) + ((x)>>1);\ | |
40 uint8_t * const ref2_y= s->next_picture.data[0] + ((y) * stride) + (x);\ | |
41 op_pixels_func (*hpel_put)[4];\ | |
42 op_pixels_func (*hpel_avg)[4]= &s->dsp.avg_pixels_tab[size];\ | |
43 op_pixels_func (*chroma_hpel_put)[4];\ | |
44 qpel_mc_func (*qpel_put)[16];\ | |
45 qpel_mc_func (*qpel_avg)[16]= &s->dsp.avg_qpel_pixels_tab[size];\ | |
1266 | 46 const __attribute__((unused)) int unu= time_pp + time_pb + (size_t)src_u + (size_t)src_v + (size_t)ref_u + (size_t)ref_v\ |
47 + (size_t)ref2_y + (size_t)hpel_avg + (size_t)qpel_avg + (size_t)score_map;\ | |
936 | 48 if(s->no_rounding /*FIXME b_type*/){\ |
49 hpel_put= &s->dsp.put_no_rnd_pixels_tab[size];\ | |
50 chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1];\ | |
51 qpel_put= &s->dsp.put_no_rnd_qpel_pixels_tab[size];\ | |
52 }else{\ | |
53 hpel_put=& s->dsp.put_pixels_tab[size];\ | |
54 chroma_hpel_put= &s->dsp.put_pixels_tab[size+1];\ | |
55 qpel_put= &s->dsp.put_qpel_pixels_tab[size];\ | |
56 } | |
57 | |
58 | |
59 #ifdef CMP_HPEL | |
60 | |
61 #define CHECK_HALF_MV(dx, dy, x, y)\ | |
62 {\ | |
63 const int hx= 2*(x)+(dx);\ | |
64 const int hy= 2*(y)+(dy);\ | |
65 CMP_HPEL(d, dx, dy, x, y, size);\ | |
66 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ | |
67 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ | |
68 } | |
69 | |
70 #if 0 | |
71 static int RENAME(hpel_motion_search)(MpegEncContext * s, | |
72 int *mx_ptr, int *my_ptr, int dmin, | |
73 int xmin, int ymin, int xmax, int ymax, | |
74 int pred_x, int pred_y, Picture *ref_picture, | |
1162 | 75 int n, int size, uint8_t * const mv_penalty) |
936 | 76 { |
77 const int xx = 16 * s->mb_x + 8*(n&1); | |
78 const int yy = 16 * s->mb_y + 8*(n>>1); | |
79 const int mx = *mx_ptr; | |
80 const int my = *my_ptr; | |
948 | 81 const int penalty_factor= s->me.sub_penalty_factor; |
936 | 82 |
83 LOAD_COMMON(xx, yy); | |
84 | |
85 // INIT; | |
86 //FIXME factorize | |
87 me_cmp_func cmp, chroma_cmp, cmp_sub, chroma_cmp_sub; | |
88 | |
89 if(s->no_rounding /*FIXME b_type*/){ | |
90 hpel_put= &s->dsp.put_no_rnd_pixels_tab[size]; | |
91 chroma_hpel_put= &s->dsp.put_no_rnd_pixels_tab[size+1]; | |
92 }else{ | |
93 hpel_put=& s->dsp.put_pixels_tab[size]; | |
94 chroma_hpel_put= &s->dsp.put_pixels_tab[size+1]; | |
95 } | |
96 cmp= s->dsp.me_cmp[size]; | |
97 chroma_cmp= s->dsp.me_cmp[size+1]; | |
98 cmp_sub= s->dsp.me_sub_cmp[size]; | |
99 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; | |
100 | |
101 if(s->me.skip){ //FIXME somehow move up (benchmark) | |
102 *mx_ptr = 0; | |
103 *my_ptr = 0; | |
104 return dmin; | |
105 } | |
106 | |
107 if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ | |
108 CMP_HPEL(dmin, 0, 0, mx, my, size); | |
109 if(mx || my) | |
110 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor; | |
111 } | |
112 | |
113 if (mx > xmin && mx < xmax && | |
114 my > ymin && my < ymax) { | |
115 int bx=2*mx, by=2*my; | |
116 int d= dmin; | |
117 | |
118 CHECK_HALF_MV(1, 1, mx-1, my-1) | |
119 CHECK_HALF_MV(0, 1, mx , my-1) | |
120 CHECK_HALF_MV(1, 1, mx , my-1) | |
121 CHECK_HALF_MV(1, 0, mx-1, my ) | |
122 CHECK_HALF_MV(1, 0, mx , my ) | |
123 CHECK_HALF_MV(1, 1, mx-1, my ) | |
124 CHECK_HALF_MV(0, 1, mx , my ) | |
125 CHECK_HALF_MV(1, 1, mx , my ) | |
126 | |
948 | 127 assert(bx >= xmin*2 || bx <= xmax*2 || by >= ymin*2 || by <= ymax*2); |
936 | 128 |
129 *mx_ptr = bx; | |
130 *my_ptr = by; | |
131 }else{ | |
132 *mx_ptr =2*mx; | |
133 *my_ptr =2*my; | |
134 } | |
135 | |
136 return dmin; | |
137 } | |
138 | |
139 #else | |
140 static int RENAME(hpel_motion_search)(MpegEncContext * s, | |
141 int *mx_ptr, int *my_ptr, int dmin, | |
142 int xmin, int ymin, int xmax, int ymax, | |
143 int pred_x, int pred_y, Picture *ref_picture, | |
1162 | 144 int n, int size, uint8_t * const mv_penalty) |
936 | 145 { |
146 const int xx = 16 * s->mb_x + 8*(n&1); | |
147 const int yy = 16 * s->mb_y + 8*(n>>1); | |
148 const int mx = *mx_ptr; | |
149 const int my = *my_ptr; | |
150 const int penalty_factor= s->me.sub_penalty_factor; | |
151 me_cmp_func cmp_sub, chroma_cmp_sub; | |
1013 | 152 int bx=2*mx, by=2*my; |
936 | 153 |
154 LOAD_COMMON(xx, yy); | |
155 | |
156 //FIXME factorize | |
157 | |
158 cmp_sub= s->dsp.me_sub_cmp[size]; | |
159 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; | |
160 | |
161 if(s->me.skip){ //FIXME move out of hpel? | |
162 *mx_ptr = 0; | |
163 *my_ptr = 0; | |
164 return dmin; | |
165 } | |
166 | |
167 if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ | |
168 CMP_HPEL(dmin, 0, 0, mx, my, size); | |
1011 | 169 if(mx || my || size>0) |
936 | 170 dmin += (mv_penalty[2*mx - pred_x] + mv_penalty[2*my - pred_y])*penalty_factor; |
171 } | |
172 | |
173 if (mx > xmin && mx < xmax && | |
174 my > ymin && my < ymax) { | |
175 int d= dmin; | |
176 const int index= (my<<ME_MAP_SHIFT) + mx; | |
177 const int t= score_map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] | |
948 | 178 + (mv_penalty[bx - pred_x] + mv_penalty[by-2 - pred_y])*s->me.penalty_factor; |
936 | 179 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)] |
948 | 180 + (mv_penalty[bx-2 - pred_x] + mv_penalty[by - pred_y])*s->me.penalty_factor; |
936 | 181 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)] |
948 | 182 + (mv_penalty[bx+2 - pred_x] + mv_penalty[by - pred_y])*s->me.penalty_factor; |
936 | 183 const int b= score_map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] |
948 | 184 + (mv_penalty[bx - pred_x] + mv_penalty[by+2 - pred_y])*s->me.penalty_factor; |
185 | |
1013 | 186 #if 1 |
948 | 187 int key; |
188 int map_generation= s->me.map_generation; | |
189 uint32_t *map= s->me.map; | |
190 key= ((my-1)<<ME_MAP_MV_BITS) + (mx) + map_generation; | |
191 assert(map[(index-(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key); | |
192 key= ((my+1)<<ME_MAP_MV_BITS) + (mx) + map_generation; | |
193 assert(map[(index+(1<<ME_MAP_SHIFT))&(ME_MAP_SIZE-1)] == key); | |
194 key= ((my)<<ME_MAP_MV_BITS) + (mx+1) + map_generation; | |
195 assert(map[(index+1)&(ME_MAP_SIZE-1)] == key); | |
196 key= ((my)<<ME_MAP_MV_BITS) + (mx-1) + map_generation; | |
197 assert(map[(index-1)&(ME_MAP_SIZE-1)] == key); | |
198 #endif | |
936 | 199 if(t<=b){ |
200 CHECK_HALF_MV(0, 1, mx ,my-1) | |
201 if(l<=r){ | |
202 CHECK_HALF_MV(1, 1, mx-1, my-1) | |
203 if(t+r<=b+l){ | |
204 CHECK_HALF_MV(1, 1, mx , my-1) | |
205 }else{ | |
206 CHECK_HALF_MV(1, 1, mx-1, my ) | |
207 } | |
208 CHECK_HALF_MV(1, 0, mx-1, my ) | |
209 }else{ | |
210 CHECK_HALF_MV(1, 1, mx , my-1) | |
211 if(t+l<=b+r){ | |
212 CHECK_HALF_MV(1, 1, mx-1, my-1) | |
213 }else{ | |
214 CHECK_HALF_MV(1, 1, mx , my ) | |
215 } | |
216 CHECK_HALF_MV(1, 0, mx , my ) | |
217 } | |
218 }else{ | |
219 if(l<=r){ | |
220 if(t+l<=b+r){ | |
221 CHECK_HALF_MV(1, 1, mx-1, my-1) | |
222 }else{ | |
223 CHECK_HALF_MV(1, 1, mx , my ) | |
224 } | |
225 CHECK_HALF_MV(1, 0, mx-1, my) | |
226 CHECK_HALF_MV(1, 1, mx-1, my) | |
227 }else{ | |
228 if(t+r<=b+l){ | |
229 CHECK_HALF_MV(1, 1, mx , my-1) | |
230 }else{ | |
231 CHECK_HALF_MV(1, 1, mx-1, my) | |
232 } | |
233 CHECK_HALF_MV(1, 0, mx , my) | |
234 CHECK_HALF_MV(1, 1, mx , my) | |
235 } | |
236 CHECK_HALF_MV(0, 1, mx , my) | |
237 } | |
238 assert(bx >= xmin*2 && bx <= xmax*2 && by >= ymin*2 && by <= ymax*2); | |
239 } | |
240 | |
1013 | 241 *mx_ptr = bx; |
242 *my_ptr = by; | |
243 | |
936 | 244 return dmin; |
245 } | |
246 #endif | |
247 | |
1013 | 248 static int RENAME(hpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, |
1162 | 249 uint8_t * const mv_penalty) |
1013 | 250 { |
251 // const int check_luma= s->dsp.me_sub_cmp != s->dsp.mb_cmp; | |
252 const int size= 0; | |
253 const int xx = 16 * s->mb_x; | |
254 const int yy = 16 * s->mb_y; | |
255 const int penalty_factor= s->me.mb_penalty_factor; | |
256 const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these | |
257 const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit | |
258 me_cmp_func cmp_sub, chroma_cmp_sub; | |
259 int d; | |
260 | |
261 LOAD_COMMON(xx, yy); | |
262 | |
263 //FIXME factorize | |
264 | |
265 cmp_sub= s->dsp.mb_cmp[size]; | |
266 chroma_cmp_sub= s->dsp.mb_cmp[size+1]; | |
267 | |
268 assert(!s->me.skip); | |
269 assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp); | |
270 | |
271 CMP_HPEL(d, mx&1, my&1, mx>>1, my>>1, size); | |
272 //FIXME check cbp before adding penalty for (0,0) vector | |
273 if(mx || my || size>0) | |
274 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; | |
275 | |
276 return d; | |
277 } | |
278 | |
936 | 279 #endif /* CMP_HPEL */ |
280 | |
1013 | 281 |
282 | |
936 | 283 #ifdef CMP_QPEL |
284 | |
285 #define CHECK_QUARTER_MV(dx, dy, x, y)\ | |
286 {\ | |
287 const int hx= 4*(x)+(dx);\ | |
288 const int hy= 4*(y)+(dy);\ | |
289 CMP_QPEL(d, dx, dy, x, y, size);\ | |
290 d += (mv_penalty[hx - pred_x] + mv_penalty[hy - pred_y])*penalty_factor;\ | |
291 COPY3_IF_LT(dmin, d, bx, hx, by, hy)\ | |
292 } | |
293 | |
294 static int RENAME(qpel_motion_search)(MpegEncContext * s, | |
295 int *mx_ptr, int *my_ptr, int dmin, | |
296 int xmin, int ymin, int xmax, int ymax, | |
297 int pred_x, int pred_y, Picture *ref_picture, | |
1162 | 298 int n, int size, uint8_t * const mv_penalty) |
936 | 299 { |
300 const int xx = 16 * s->mb_x + 8*(n&1); | |
301 const int yy = 16 * s->mb_y + 8*(n>>1); | |
302 const int mx = *mx_ptr; | |
303 const int my = *my_ptr; | |
304 const int penalty_factor= s->me.sub_penalty_factor; | |
305 const int map_generation= s->me.map_generation; | |
954 | 306 const int subpel_quality= s->avctx->me_subpel_quality; |
936 | 307 uint32_t *map= s->me.map; |
308 me_cmp_func cmp, chroma_cmp; | |
309 me_cmp_func cmp_sub, chroma_cmp_sub; | |
310 | |
311 LOAD_COMMON(xx, yy); | |
312 | |
313 cmp= s->dsp.me_cmp[size]; | |
314 chroma_cmp= s->dsp.me_cmp[size+1]; //factorize FIXME | |
315 //FIXME factorize | |
316 | |
317 cmp_sub= s->dsp.me_sub_cmp[size]; | |
318 chroma_cmp_sub= s->dsp.me_sub_cmp[size+1]; | |
319 | |
320 if(s->me.skip){ //FIXME somehow move up (benchmark) | |
321 *mx_ptr = 0; | |
322 *my_ptr = 0; | |
323 return dmin; | |
324 } | |
325 | |
326 if(s->avctx->me_cmp != s->avctx->me_sub_cmp){ | |
327 CMP_QPEL(dmin, 0, 0, mx, my, size); | |
1011 | 328 if(mx || my || size>0) |
936 | 329 dmin += (mv_penalty[4*mx - pred_x] + mv_penalty[4*my - pred_y])*penalty_factor; |
330 } | |
331 | |
332 if (mx > xmin && mx < xmax && | |
333 my > ymin && my < ymax) { | |
334 int bx=4*mx, by=4*my; | |
335 int d= dmin; | |
336 int i, nx, ny; | |
337 const int index= (my<<ME_MAP_SHIFT) + mx; | |
338 const int t= score_map[(index-(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)]; | |
339 const int l= score_map[(index- 1 )&(ME_MAP_SIZE-1)]; | |
340 const int r= score_map[(index+ 1 )&(ME_MAP_SIZE-1)]; | |
341 const int b= score_map[(index+(1<<ME_MAP_SHIFT) )&(ME_MAP_SIZE-1)]; | |
342 const int c= score_map[(index )&(ME_MAP_SIZE-1)]; | |
343 int best[8]; | |
344 int best_pos[8][2]; | |
345 | |
346 memset(best, 64, sizeof(int)*8); | |
347 #if 1 | |
954 | 348 if(s->me.dia_size>=2){ |
936 | 349 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; |
350 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; | |
351 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; | |
352 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; | |
353 | |
354 for(ny= -3; ny <= 3; ny++){ | |
355 for(nx= -3; nx <= 3; nx++){ | |
356 const int t2= nx*nx*(tr + tl - 2*t) + 4*nx*(tr-tl) + 32*t; | |
357 const int c2= nx*nx*( r + l - 2*c) + 4*nx*( r- l) + 32*c; | |
358 const int b2= nx*nx*(br + bl - 2*b) + 4*nx*(br-bl) + 32*b; | |
359 int score= ny*ny*(b2 + t2 - 2*c2) + 4*ny*(b2 - t2) + 32*c2; | |
360 int i; | |
361 | |
362 if((nx&3)==0 && (ny&3)==0) continue; | |
363 | |
364 score += 1024*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; | |
365 | |
366 // if(nx&1) score-=1024*s->me.penalty_factor; | |
367 // if(ny&1) score-=1024*s->me.penalty_factor; | |
368 | |
369 for(i=0; i<8; i++){ | |
370 if(score < best[i]){ | |
371 memmove(&best[i+1], &best[i], sizeof(int)*(7-i)); | |
372 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i)); | |
373 best[i]= score; | |
374 best_pos[i][0]= nx + 4*mx; | |
375 best_pos[i][1]= ny + 4*my; | |
376 break; | |
377 } | |
378 } | |
379 } | |
380 } | |
381 }else{ | |
382 int tl; | |
383 const int cx = 4*(r - l); | |
384 const int cx2= r + l - 2*c; | |
385 const int cy = 4*(b - t); | |
386 const int cy2= b + t - 2*c; | |
387 int cxy; | |
388 | |
389 if(map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)] == (my<<ME_MAP_MV_BITS) + mx + map_generation && 0){ //FIXME | |
390 tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; | |
391 }else{ | |
392 CMP(tl, mx-1, my-1, size); //FIXME wrong if chroma me is different | |
393 } | |
394 | |
395 cxy= 2*tl + (cx + cy)/4 - (cx2 + cy2) - 2*c; | |
396 | |
397 assert(16*cx2 + 4*cx + 32*c == 32*r); | |
398 assert(16*cx2 - 4*cx + 32*c == 32*l); | |
399 assert(16*cy2 + 4*cy + 32*c == 32*b); | |
400 assert(16*cy2 - 4*cy + 32*c == 32*t); | |
401 assert(16*cxy + 16*cy2 + 16*cx2 - 4*cy - 4*cx + 32*c == 32*tl); | |
402 | |
403 for(ny= -3; ny <= 3; ny++){ | |
404 for(nx= -3; nx <= 3; nx++){ | |
405 int score= ny*nx*cxy + nx*nx*cx2 + ny*ny*cy2 + nx*cx + ny*cy + 32*c; //FIXME factor | |
406 int i; | |
407 | |
408 if((nx&3)==0 && (ny&3)==0) continue; | |
409 | |
410 score += 32*(mv_penalty[4*mx + nx - pred_x] + mv_penalty[4*my + ny - pred_y])*penalty_factor; | |
411 // if(nx&1) score-=32*s->me.penalty_factor; | |
412 // if(ny&1) score-=32*s->me.penalty_factor; | |
413 | |
414 for(i=0; i<8; i++){ | |
415 if(score < best[i]){ | |
416 memmove(&best[i+1], &best[i], sizeof(int)*(7-i)); | |
417 memmove(&best_pos[i+1][0], &best_pos[i][0], sizeof(int)*2*(7-i)); | |
418 best[i]= score; | |
419 best_pos[i][0]= nx + 4*mx; | |
420 best_pos[i][1]= ny + 4*my; | |
421 break; | |
422 } | |
423 } | |
424 } | |
425 } | |
426 } | |
954 | 427 for(i=0; i<subpel_quality; i++){ |
936 | 428 nx= best_pos[i][0]; |
429 ny= best_pos[i][1]; | |
430 CHECK_QUARTER_MV(nx&3, ny&3, nx>>2, ny>>2) | |
431 } | |
954 | 432 |
936 | 433 #if 0 |
954 | 434 const int tl= score_map[(index-(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; |
435 const int bl= score_map[(index+(1<<ME_MAP_SHIFT)-1)&(ME_MAP_SIZE-1)]; | |
436 const int tr= score_map[(index-(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; | |
437 const int br= score_map[(index+(1<<ME_MAP_SHIFT)+1)&(ME_MAP_SIZE-1)]; | |
438 // if(l < r && l < t && l < b && l < tl && l < bl && l < tr && l < br && bl < tl){ | |
439 if(tl<br){ | |
440 | |
441 // nx= FFMAX(4*mx - bx, bx - 4*mx); | |
442 // ny= FFMAX(4*my - by, by - 4*my); | |
936 | 443 |
954 | 444 static int stats[7][7], count; |
445 count++; | |
446 stats[4*mx - bx + 3][4*my - by + 3]++; | |
447 if(256*256*256*64 % count ==0){ | |
448 for(i=0; i<49; i++){ | |
449 if((i%7)==0) printf("\n"); | |
936 | 450 printf("%6d ", stats[0][i]); |
451 } | |
452 printf("\n"); | |
453 } | |
954 | 454 } |
936 | 455 #endif |
456 #else | |
457 | |
458 CHECK_QUARTER_MV(2, 2, mx-1, my-1) | |
459 CHECK_QUARTER_MV(0, 2, mx , my-1) | |
460 CHECK_QUARTER_MV(2, 2, mx , my-1) | |
461 CHECK_QUARTER_MV(2, 0, mx , my ) | |
462 CHECK_QUARTER_MV(2, 2, mx , my ) | |
463 CHECK_QUARTER_MV(0, 2, mx , my ) | |
464 CHECK_QUARTER_MV(2, 2, mx-1, my ) | |
465 CHECK_QUARTER_MV(2, 0, mx-1, my ) | |
466 | |
467 nx= bx; | |
468 ny= by; | |
469 | |
470 for(i=0; i<8; i++){ | |
471 int ox[8]= {0, 1, 1, 1, 0,-1,-1,-1}; | |
472 int oy[8]= {1, 1, 0,-1,-1,-1, 0, 1}; | |
473 CHECK_QUARTER_MV((nx + ox[i])&3, (ny + oy[i])&3, (nx + ox[i])>>2, (ny + oy[i])>>2) | |
474 } | |
475 #endif | |
476 #if 0 | |
477 //outer ring | |
478 CHECK_QUARTER_MV(1, 3, mx-1, my-1) | |
479 CHECK_QUARTER_MV(1, 2, mx-1, my-1) | |
480 CHECK_QUARTER_MV(1, 1, mx-1, my-1) | |
481 CHECK_QUARTER_MV(2, 1, mx-1, my-1) | |
482 CHECK_QUARTER_MV(3, 1, mx-1, my-1) | |
483 CHECK_QUARTER_MV(0, 1, mx , my-1) | |
484 CHECK_QUARTER_MV(1, 1, mx , my-1) | |
485 CHECK_QUARTER_MV(2, 1, mx , my-1) | |
486 CHECK_QUARTER_MV(3, 1, mx , my-1) | |
487 CHECK_QUARTER_MV(3, 2, mx , my-1) | |
488 CHECK_QUARTER_MV(3, 3, mx , my-1) | |
489 CHECK_QUARTER_MV(3, 0, mx , my ) | |
490 CHECK_QUARTER_MV(3, 1, mx , my ) | |
491 CHECK_QUARTER_MV(3, 2, mx , my ) | |
492 CHECK_QUARTER_MV(3, 3, mx , my ) | |
493 CHECK_QUARTER_MV(2, 3, mx , my ) | |
494 CHECK_QUARTER_MV(1, 3, mx , my ) | |
495 CHECK_QUARTER_MV(0, 3, mx , my ) | |
496 CHECK_QUARTER_MV(3, 3, mx-1, my ) | |
497 CHECK_QUARTER_MV(2, 3, mx-1, my ) | |
498 CHECK_QUARTER_MV(1, 3, mx-1, my ) | |
499 CHECK_QUARTER_MV(1, 2, mx-1, my ) | |
500 CHECK_QUARTER_MV(1, 1, mx-1, my ) | |
501 CHECK_QUARTER_MV(1, 0, mx-1, my ) | |
502 #endif | |
503 assert(bx >= xmin*4 && bx <= xmax*4 && by >= ymin*4 && by <= ymax*4); | |
504 | |
505 *mx_ptr = bx; | |
506 *my_ptr = by; | |
507 }else{ | |
508 *mx_ptr =4*mx; | |
509 *my_ptr =4*my; | |
510 } | |
511 | |
512 return dmin; | |
513 } | |
514 | |
1013 | 515 static int RENAME(qpel_get_mb_score)(MpegEncContext * s, int mx, int my, int pred_x, int pred_y, Picture *ref_picture, |
1162 | 516 uint8_t * const mv_penalty) |
1013 | 517 { |
518 const int size= 0; | |
519 const int xx = 16 * s->mb_x; | |
520 const int yy = 16 * s->mb_y; | |
521 const int penalty_factor= s->me.mb_penalty_factor; | |
522 const int xmin= -256*256, ymin= -256*256, xmax= 256*256, ymax= 256*256; //assume that the caller checked these | |
523 const __attribute__((unused)) int unu2= xmin + xmax +ymin + ymax; //no unused warning shit | |
524 me_cmp_func cmp_sub, chroma_cmp_sub; | |
525 int d; | |
526 | |
527 LOAD_COMMON(xx, yy); | |
528 | |
529 //FIXME factorize | |
530 | |
531 cmp_sub= s->dsp.mb_cmp[size]; | |
532 chroma_cmp_sub= s->dsp.mb_cmp[size+1]; | |
533 | |
534 assert(!s->me.skip); | |
535 assert(s->avctx->me_sub_cmp != s->avctx->mb_cmp); | |
536 | |
537 CMP_QPEL(d, mx&3, my&3, mx>>2, my>>2, size); | |
538 //FIXME check cbp before adding penalty for (0,0) vector | |
539 if(mx || my || size>0) | |
540 d += (mv_penalty[mx - pred_x] + mv_penalty[my - pred_y])*penalty_factor; | |
541 | |
542 return d; | |
543 } | |
544 | |
545 | |
936 | 546 #endif /* CMP_QPEL */ |
547 | |
548 #define CHECK_MV(x,y)\ | |
549 {\ | |
550 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ | |
551 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ | |
948 | 552 /*printf("check_mv %d %d\n", x, y);*/\ |
936 | 553 if(map[index]!=key){\ |
554 CMP(d, x, y, size);\ | |
555 map[index]= key;\ | |
556 score_map[index]= d;\ | |
557 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\ | |
948 | 558 /*printf("score:%d\n", d);*/\ |
936 | 559 COPY3_IF_LT(dmin, d, best[0], x, best[1], y)\ |
560 }\ | |
561 } | |
562 | |
948 | 563 #define CHECK_CLIPED_MV(ax,ay)\ |
564 {\ | |
565 const int x= FFMAX(xmin, FFMIN(ax, xmax));\ | |
566 const int y= FFMAX(ymin, FFMIN(ay, ymax));\ | |
567 CHECK_MV(x, y)\ | |
568 } | |
569 | |
936 | 570 #define CHECK_MV_DIR(x,y,new_dir)\ |
571 {\ | |
572 const int key= ((y)<<ME_MAP_MV_BITS) + (x) + map_generation;\ | |
573 const int index= (((y)<<ME_MAP_SHIFT) + (x))&(ME_MAP_SIZE-1);\ | |
948 | 574 /*printf("check_mv_dir %d %d %d\n", x, y, new_dir);*/\ |
936 | 575 if(map[index]!=key){\ |
576 CMP(d, x, y, size);\ | |
577 map[index]= key;\ | |
578 score_map[index]= d;\ | |
579 d += (mv_penalty[((x)<<shift)-pred_x] + mv_penalty[((y)<<shift)-pred_y])*penalty_factor;\ | |
948 | 580 /*printf("score:%d\n", d);*/\ |
936 | 581 if(d<dmin){\ |
582 best[0]=x;\ | |
583 best[1]=y;\ | |
584 dmin=d;\ | |
585 next_dir= new_dir;\ | |
586 }\ | |
587 }\ | |
588 } | |
589 | |
590 #define check(x,y,S,v)\ | |
591 if( (x)<(xmin<<(S)) ) printf("%d %d %d %d %d xmin" #v, xmin, (x), (y), s->mb_x, s->mb_y);\ | |
592 if( (x)>(xmax<<(S)) ) printf("%d %d %d %d %d xmax" #v, xmax, (x), (y), s->mb_x, s->mb_y);\ | |
593 if( (y)<(ymin<<(S)) ) printf("%d %d %d %d %d ymin" #v, ymin, (x), (y), s->mb_x, s->mb_y);\ | |
594 if( (y)>(ymax<<(S)) ) printf("%d %d %d %d %d ymax" #v, ymax, (x), (y), s->mb_x, s->mb_y);\ | |
595 | |
596 | |
597 static inline int RENAME(small_diamond_search)(MpegEncContext * s, int *best, int dmin, | |
598 Picture *ref_picture, | |
599 int const pred_x, int const pred_y, int const penalty_factor, | |
600 int const xmin, int const ymin, int const xmax, int const ymax, int const shift, | |
1162 | 601 uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty |
936 | 602 ) |
603 { | |
604 me_cmp_func cmp, chroma_cmp; | |
605 int next_dir=-1; | |
606 LOAD_COMMON(s->mb_x*16, s->mb_y*16); | |
607 | |
608 cmp= s->dsp.me_cmp[size]; | |
609 chroma_cmp= s->dsp.me_cmp[size+1]; | |
610 | |
948 | 611 { /* ensure that the best point is in the MAP as h/qpel refinement needs it */ |
612 const int key= (best[1]<<ME_MAP_MV_BITS) + best[0] + map_generation; | |
613 const int index= ((best[1]<<ME_MAP_SHIFT) + best[0])&(ME_MAP_SIZE-1); | |
614 if(map[index]!=key){ //this will be executed only very rarey | |
615 CMP(score_map[index], best[0], best[1], size); | |
616 map[index]= key; | |
617 } | |
618 } | |
619 | |
936 | 620 for(;;){ |
621 int d; | |
622 const int dir= next_dir; | |
623 const int x= best[0]; | |
624 const int y= best[1]; | |
625 next_dir=-1; | |
626 | |
627 //printf("%d", dir); | |
628 if(dir!=2 && x>xmin) CHECK_MV_DIR(x-1, y , 0) | |
629 if(dir!=3 && y>ymin) CHECK_MV_DIR(x , y-1, 1) | |
630 if(dir!=0 && x<xmax) CHECK_MV_DIR(x+1, y , 2) | |
631 if(dir!=1 && y<ymax) CHECK_MV_DIR(x , y+1, 3) | |
632 | |
633 if(next_dir==-1){ | |
634 return dmin; | |
635 } | |
636 } | |
637 } | |
638 | |
948 | 639 static inline int RENAME(funny_diamond_search)(MpegEncContext * s, int *best, int dmin, |
640 Picture *ref_picture, | |
641 int const pred_x, int const pred_y, int const penalty_factor, | |
642 int const xmin, int const ymin, int const xmax, int const ymax, int const shift, | |
1162 | 643 uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty |
948 | 644 ) |
645 { | |
646 me_cmp_func cmp, chroma_cmp; | |
647 int dia_size; | |
648 LOAD_COMMON(s->mb_x*16, s->mb_y*16); | |
649 | |
650 cmp= s->dsp.me_cmp[size]; | |
651 chroma_cmp= s->dsp.me_cmp[size+1]; | |
652 | |
653 for(dia_size=1; dia_size<=4; dia_size++){ | |
654 int dir; | |
655 const int x= best[0]; | |
656 const int y= best[1]; | |
657 | |
658 if(dia_size&(dia_size-1)) continue; | |
659 | |
660 if( x + dia_size > xmax | |
661 || x - dia_size < xmin | |
662 || y + dia_size > ymax | |
663 || y - dia_size < ymin) | |
664 continue; | |
665 | |
666 for(dir= 0; dir<dia_size; dir+=2){ | |
667 int d; | |
668 | |
669 CHECK_MV(x + dir , y + dia_size - dir); | |
670 CHECK_MV(x + dia_size - dir, y - dir ); | |
671 CHECK_MV(x - dir , y - dia_size + dir); | |
672 CHECK_MV(x - dia_size + dir, y + dir ); | |
673 } | |
674 | |
675 if(x!=best[0] || y!=best[1]) | |
676 dia_size=0; | |
677 #if 0 | |
678 { | |
679 int dx, dy, i; | |
680 static int stats[8*8]; | |
681 dx= ABS(x-best[0]); | |
682 dy= ABS(y-best[1]); | |
683 if(dy>dx){ | |
684 dx^=dy; dy^=dx; dx^=dy; | |
685 } | |
686 stats[dy*8 + dx] ++; | |
687 if(256*256*256*64 % (stats[0]+1)==0){ | |
688 for(i=0; i<64; i++){ | |
689 if((i&7)==0) printf("\n"); | |
690 printf("%8d ", stats[i]); | |
691 } | |
692 printf("\n"); | |
693 } | |
694 } | |
695 #endif | |
696 } | |
697 return dmin; | |
698 } | |
699 | |
700 #define SAB_CHECK_MV(ax,ay)\ | |
701 {\ | |
702 const int key= ((ay)<<ME_MAP_MV_BITS) + (ax) + map_generation;\ | |
703 const int index= (((ay)<<ME_MAP_SHIFT) + (ax))&(ME_MAP_SIZE-1);\ | |
704 /*printf("sab check %d %d\n", ax, ay);*/\ | |
705 if(map[index]!=key){\ | |
706 CMP(d, ax, ay, size);\ | |
707 map[index]= key;\ | |
708 score_map[index]= d;\ | |
709 d += (mv_penalty[((ax)<<shift)-pred_x] + mv_penalty[((ay)<<shift)-pred_y])*penalty_factor;\ | |
710 /*printf("score: %d\n", d);*/\ | |
711 if(d < minima[minima_count-1].height){\ | |
712 int j=0;\ | |
713 \ | |
714 while(d >= minima[j].height) j++;\ | |
715 \ | |
716 memmove(&minima [j+1], &minima [j], (minima_count - j - 1)*sizeof(Minima));\ | |
717 \ | |
718 minima[j].checked= 0;\ | |
719 minima[j].height= d;\ | |
720 minima[j].x= ax;\ | |
721 minima[j].y= ay;\ | |
722 \ | |
723 i=-1;\ | |
724 continue;\ | |
725 }\ | |
726 }\ | |
727 } | |
728 | |
729 #define MAX_SAB_SIZE 16 | |
730 static inline int RENAME(sab_diamond_search)(MpegEncContext * s, int *best, int dmin, | |
731 Picture *ref_picture, | |
732 int const pred_x, int const pred_y, int const penalty_factor, | |
733 int const xmin, int const ymin, int const xmax, int const ymax, int const shift, | |
1162 | 734 uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty |
948 | 735 ) |
736 { | |
737 me_cmp_func cmp, chroma_cmp; | |
738 Minima minima[MAX_SAB_SIZE]; | |
954 | 739 const int minima_count= ABS(s->me.dia_size); |
948 | 740 int i, j; |
741 LOAD_COMMON(s->mb_x*16, s->mb_y*16); | |
742 | |
743 cmp= s->dsp.me_cmp[size]; | |
744 chroma_cmp= s->dsp.me_cmp[size+1]; | |
745 | |
746 for(j=i=0; i<ME_MAP_SIZE; i++){ | |
747 uint32_t key= map[i]; | |
748 | |
749 key += (1<<(ME_MAP_MV_BITS-1)) + (1<<(2*ME_MAP_MV_BITS-1)); | |
750 | |
751 if((key&((-1)<<(2*ME_MAP_MV_BITS))) != map_generation) continue; | |
752 | |
753 assert(j<MAX_SAB_SIZE); //max j = number of predictors | |
754 | |
755 minima[j].height= score_map[i]; | |
756 minima[j].x= key & ((1<<ME_MAP_MV_BITS)-1); key>>=ME_MAP_MV_BITS; | |
757 minima[j].y= key & ((1<<ME_MAP_MV_BITS)-1); | |
758 minima[j].x-= (1<<(ME_MAP_MV_BITS-1)); | |
759 minima[j].y-= (1<<(ME_MAP_MV_BITS-1)); | |
760 minima[j].checked=0; | |
761 if(minima[j].x || minima[j].y) | |
762 minima[j].height+= (mv_penalty[((minima[j].x)<<shift)-pred_x] + mv_penalty[((minima[j].y)<<shift)-pred_y])*penalty_factor; | |
763 | |
764 j++; | |
765 } | |
766 | |
767 qsort(minima, j, sizeof(Minima), minima_cmp); | |
768 | |
769 for(; j<minima_count; j++){ | |
770 minima[j].height=256*256*256*64; | |
771 minima[j].checked=0; | |
772 minima[j].x= minima[j].y=0; | |
773 } | |
774 | |
775 for(i=0; i<minima_count; i++){ | |
776 const int x= minima[i].x; | |
777 const int y= minima[i].y; | |
778 int d; | |
779 | |
780 if(minima[i].checked) continue; | |
781 | |
782 if( x >= xmax || x <= xmin | |
783 || y >= ymax || y <= ymin) | |
784 continue; | |
785 | |
786 SAB_CHECK_MV(x-1, y) | |
787 SAB_CHECK_MV(x+1, y) | |
788 SAB_CHECK_MV(x , y-1) | |
789 SAB_CHECK_MV(x , y+1) | |
790 | |
791 minima[i].checked= 1; | |
792 } | |
793 | |
794 best[0]= minima[0].x; | |
795 best[1]= minima[0].y; | |
796 dmin= minima[0].height; | |
797 | |
798 if( best[0] < xmax && best[0] > xmin | |
799 && best[1] < ymax && best[1] > ymin){ | |
800 int d; | |
801 //ensure that the refernece samples for hpel refinement are in the map | |
802 CHECK_MV(best[0]-1, best[1]) | |
803 CHECK_MV(best[0]+1, best[1]) | |
804 CHECK_MV(best[0], best[1]-1) | |
805 CHECK_MV(best[0], best[1]+1) | |
806 } | |
807 return dmin; | |
808 } | |
809 | |
936 | 810 static inline int RENAME(var_diamond_search)(MpegEncContext * s, int *best, int dmin, |
811 Picture *ref_picture, | |
812 int const pred_x, int const pred_y, int const penalty_factor, | |
813 int const xmin, int const ymin, int const xmax, int const ymax, int const shift, | |
1162 | 814 uint32_t *map, int map_generation, int size, uint8_t * const mv_penalty |
936 | 815 ) |
816 { | |
817 me_cmp_func cmp, chroma_cmp; | |
948 | 818 int dia_size; |
936 | 819 LOAD_COMMON(s->mb_x*16, s->mb_y*16); |
820 | |
821 cmp= s->dsp.me_cmp[size]; | |
822 chroma_cmp= s->dsp.me_cmp[size+1]; | |
823 | |
954 | 824 for(dia_size=1; dia_size<=s->me.dia_size; dia_size++){ |
936 | 825 int dir, start, end; |
826 const int x= best[0]; | |
827 const int y= best[1]; | |
828 | |
829 start= FFMAX(0, y + dia_size - ymax); | |
948 | 830 end = FFMIN(dia_size, xmax - x + 1); |
936 | 831 for(dir= start; dir<end; dir++){ |
832 int d; | |
833 | |
834 //check(x + dir,y + dia_size - dir,0, a0) | |
835 CHECK_MV(x + dir , y + dia_size - dir); | |
836 } | |
837 | |
838 start= FFMAX(0, x + dia_size - xmax); | |
948 | 839 end = FFMIN(dia_size, y - ymin + 1); |
936 | 840 for(dir= start; dir<end; dir++){ |
841 int d; | |
842 | |
843 //check(x + dia_size - dir, y - dir,0, a1) | |
844 CHECK_MV(x + dia_size - dir, y - dir ); | |
845 } | |
846 | |
847 start= FFMAX(0, -y + dia_size + ymin ); | |
948 | 848 end = FFMIN(dia_size, x - xmin + 1); |
936 | 849 for(dir= start; dir<end; dir++){ |
850 int d; | |
851 | |
852 //check(x - dir,y - dia_size + dir,0, a2) | |
853 CHECK_MV(x - dir , y - dia_size + dir); | |
854 } | |
855 | |
856 start= FFMAX(0, -x + dia_size + xmin ); | |
948 | 857 end = FFMIN(dia_size, ymax - y + 1); |
936 | 858 for(dir= start; dir<end; dir++){ |
859 int d; | |
860 | |
861 //check(x - dia_size + dir, y + dir,0, a3) | |
862 CHECK_MV(x - dia_size + dir, y + dir ); | |
863 } | |
864 | |
865 if(x!=best[0] || y!=best[1]) | |
866 dia_size=0; | |
948 | 867 #if 0 |
868 { | |
869 int dx, dy, i; | |
870 static int stats[8*8]; | |
871 dx= ABS(x-best[0]); | |
872 dy= ABS(y-best[1]); | |
873 stats[dy*8 + dx] ++; | |
874 if(256*256*256*64 % (stats[0]+1)==0){ | |
875 for(i=0; i<64; i++){ | |
876 if((i&7)==0) printf("\n"); | |
877 printf("%6d ", stats[i]); | |
878 } | |
879 printf("\n"); | |
880 } | |
881 } | |
882 #endif | |
936 | 883 } |
884 return dmin; | |
885 } | |
886 | |
887 static int RENAME(epzs_motion_search)(MpegEncContext * s, int block, | |
888 int *mx_ptr, int *my_ptr, | |
889 int P[10][2], int pred_x, int pred_y, | |
948 | 890 int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], |
1162 | 891 int ref_mv_scale, uint8_t * const mv_penalty) |
936 | 892 { |
893 int best[2]={0, 0}; | |
894 int d, dmin; | |
895 const int shift= 1+s->quarter_sample; | |
896 uint32_t *map= s->me.map; | |
897 int map_generation; | |
898 const int penalty_factor= s->me.penalty_factor; | |
899 const int size=0; | |
1177
fea03d2c4946
simplified adressing of most mb based arrays (mb_x + mb_y*s->mb_stride) now instead of mb_x + mb_y*mb_width and 1+mb_x + (1+mb_y)*(mb_width+2) and ... mixture
michaelni
parents:
1162
diff
changeset
|
900 const int ref_mv_stride= s->mb_stride; |
fea03d2c4946
simplified adressing of most mb based arrays (mb_x + mb_y*s->mb_stride) now instead of mb_x + mb_y*mb_width and 1+mb_x + (1+mb_y)*(mb_width+2) and ... mixture
michaelni
parents:
1162
diff
changeset
|
901 const int ref_mv_xy= s->mb_x + s->mb_y*ref_mv_stride; |
936 | 902 me_cmp_func cmp, chroma_cmp; |
903 LOAD_COMMON(s->mb_x*16, s->mb_y*16); | |
904 | |
905 cmp= s->dsp.me_cmp[size]; | |
906 chroma_cmp= s->dsp.me_cmp[size+1]; | |
907 | |
908 map_generation= update_map_generation(s); | |
909 | |
910 CMP(dmin, 0, 0, size); | |
911 map[0]= map_generation; | |
912 score_map[0]= dmin; | |
913 | |
914 /* first line */ | |
952 | 915 if (s->mb_y == 0) { |
936 | 916 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) |
948 | 917 CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
918 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |
936 | 919 }else{ |
920 if(dmin<256 && ( P_LEFT[0] |P_LEFT[1] | |
921 |P_TOP[0] |P_TOP[1] | |
948 | 922 |P_TOPRIGHT[0]|P_TOPRIGHT[1])==0){ |
936 | 923 *mx_ptr= 0; |
924 *my_ptr= 0; | |
925 s->me.skip=1; | |
926 return dmin; | |
927 } | |
928 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) | |
929 if(dmin>256*2){ | |
948 | 930 CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
931 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |
936 | 932 CHECK_MV(P_LEFT[0] >>shift, P_LEFT[1] >>shift) |
933 CHECK_MV(P_TOP[0] >>shift, P_TOP[1] >>shift) | |
934 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) | |
935 } | |
936 } | |
937 if(dmin>256*4){ | |
952 | 938 if(s->me.pre_pass){ |
939 CHECK_CLIPED_MV((last_mv[ref_mv_xy-1][0]*ref_mv_scale + (1<<15))>>16, | |
940 (last_mv[ref_mv_xy-1][1]*ref_mv_scale + (1<<15))>>16) | |
941 CHECK_CLIPED_MV((last_mv[ref_mv_xy-ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, | |
942 (last_mv[ref_mv_xy-ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) | |
943 }else{ | |
944 CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, | |
945 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) | |
946 CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, | |
947 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) | |
948 } | |
936 | 949 } |
948 | 950 |
951 if(s->avctx->last_predictor_count){ | |
952 const int count= s->avctx->last_predictor_count; | |
953 const int xstart= FFMAX(0, s->mb_x - count); | |
954 const int ystart= FFMAX(0, s->mb_y - count); | |
955 const int xend= FFMIN(s->mb_width , s->mb_x + count + 1); | |
956 const int yend= FFMIN(s->mb_height, s->mb_y + count + 1); | |
957 int mb_y; | |
936 | 958 |
948 | 959 for(mb_y=ystart; mb_y<yend; mb_y++){ |
960 int mb_x; | |
961 for(mb_x=xstart; mb_x<xend; mb_x++){ | |
962 const int xy= mb_x + 1 + (mb_y + 1)*ref_mv_stride; | |
963 int mx= (last_mv[xy][0]*ref_mv_scale + (1<<15))>>16; | |
964 int my= (last_mv[xy][1]*ref_mv_scale + (1<<15))>>16; | |
965 | |
966 if(mx>xmax || mx<xmin || my>ymax || my<ymin) continue; | |
967 CHECK_MV(mx,my) | |
936 | 968 } |
969 } | |
970 } | |
948 | 971 |
936 | 972 //check(best[0],best[1],0, b0) |
954 | 973 if(s->me.dia_size==-1) |
948 | 974 dmin= RENAME(funny_diamond_search)(s, best, dmin, ref_picture, |
975 pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, | |
976 shift, map, map_generation, size, mv_penalty); | |
954 | 977 else if(s->me.dia_size<-1) |
948 | 978 dmin= RENAME(sab_diamond_search)(s, best, dmin, ref_picture, |
979 pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, | |
980 shift, map, map_generation, size, mv_penalty); | |
954 | 981 else if(s->me.dia_size<2) |
936 | 982 dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture, |
983 pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, | |
984 shift, map, map_generation, size, mv_penalty); | |
985 else | |
986 dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture, | |
987 pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, | |
988 shift, map, map_generation, size, mv_penalty); | |
989 | |
990 //check(best[0],best[1],0, b1) | |
991 *mx_ptr= best[0]; | |
992 *my_ptr= best[1]; | |
993 | |
994 // printf("%d %d %d \n", best[0], best[1], dmin); | |
995 return dmin; | |
996 } | |
997 | |
998 #ifndef CMP_DIRECT /* no 4mv search needed in direct mode */ | |
999 static int RENAME(epzs_motion_search4)(MpegEncContext * s, int block, | |
1000 int *mx_ptr, int *my_ptr, | |
1001 int P[10][2], int pred_x, int pred_y, | |
948 | 1002 int xmin, int ymin, int xmax, int ymax, Picture *ref_picture, int16_t (*last_mv)[2], |
1162 | 1003 int ref_mv_scale, uint8_t * const mv_penalty) |
936 | 1004 { |
1005 int best[2]={0, 0}; | |
1006 int d, dmin; | |
1007 const int shift= 1+s->quarter_sample; | |
1008 uint32_t *map= s->me.map; | |
1009 int map_generation; | |
1010 const int penalty_factor= s->me.penalty_factor; | |
1011 const int size=1; | |
1177
fea03d2c4946
simplified adressing of most mb based arrays (mb_x + mb_y*s->mb_stride) now instead of mb_x + mb_y*mb_width and 1+mb_x + (1+mb_y)*(mb_width+2) and ... mixture
michaelni
parents:
1162
diff
changeset
|
1012 const int ref_mv_stride= s->mb_stride; |
fea03d2c4946
simplified adressing of most mb based arrays (mb_x + mb_y*s->mb_stride) now instead of mb_x + mb_y*mb_width and 1+mb_x + (1+mb_y)*(mb_width+2) and ... mixture
michaelni
parents:
1162
diff
changeset
|
1013 const int ref_mv_xy= s->mb_x + s->mb_y *ref_mv_stride; |
936 | 1014 me_cmp_func cmp, chroma_cmp; |
1015 LOAD_COMMON((s->mb_x*2 + (block&1))*8, (s->mb_y*2 + (block>>1))*8); | |
1016 | |
1017 cmp= s->dsp.me_cmp[size]; | |
1018 chroma_cmp= s->dsp.me_cmp[size+1]; | |
1019 | |
1020 map_generation= update_map_generation(s); | |
1021 | |
1022 dmin = 1000000; | |
1023 //printf("%d %d %d %d //",xmin, ymin, xmax, ymax); | |
1024 /* first line */ | |
952 | 1025 if (s->mb_y == 0 && block<2) { |
936 | 1026 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) |
948 | 1027 CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
1028 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |
936 | 1029 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) |
1030 }else{ | |
1031 CHECK_MV(P_MV1[0]>>shift, P_MV1[1]>>shift) | |
1032 //FIXME try some early stop | |
1033 if(dmin>64*2){ | |
1034 CHECK_MV(P_MEDIAN[0]>>shift, P_MEDIAN[1]>>shift) | |
1035 CHECK_MV(P_LEFT[0]>>shift, P_LEFT[1]>>shift) | |
1036 CHECK_MV(P_TOP[0]>>shift, P_TOP[1]>>shift) | |
1037 CHECK_MV(P_TOPRIGHT[0]>>shift, P_TOPRIGHT[1]>>shift) | |
948 | 1038 CHECK_CLIPED_MV((last_mv[ref_mv_xy][0]*ref_mv_scale + (1<<15))>>16, |
1039 (last_mv[ref_mv_xy][1]*ref_mv_scale + (1<<15))>>16) | |
936 | 1040 } |
1041 } | |
1042 if(dmin>64*4){ | |
948 | 1043 CHECK_CLIPED_MV((last_mv[ref_mv_xy+1][0]*ref_mv_scale + (1<<15))>>16, |
1044 (last_mv[ref_mv_xy+1][1]*ref_mv_scale + (1<<15))>>16) | |
1045 CHECK_CLIPED_MV((last_mv[ref_mv_xy+ref_mv_stride][0]*ref_mv_scale + (1<<15))>>16, | |
1046 (last_mv[ref_mv_xy+ref_mv_stride][1]*ref_mv_scale + (1<<15))>>16) | |
936 | 1047 } |
1048 | |
954 | 1049 if(s->me.dia_size==-1) |
948 | 1050 dmin= RENAME(funny_diamond_search)(s, best, dmin, ref_picture, |
1051 pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, | |
1052 shift, map, map_generation, size, mv_penalty); | |
954 | 1053 else if(s->me.dia_size<-1) |
948 | 1054 dmin= RENAME(sab_diamond_search)(s, best, dmin, ref_picture, |
1055 pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, | |
1056 shift, map, map_generation, size, mv_penalty); | |
954 | 1057 else if(s->me.dia_size<2) |
936 | 1058 dmin= RENAME(small_diamond_search)(s, best, dmin, ref_picture, |
1059 pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, | |
1060 shift, map, map_generation, size, mv_penalty); | |
1061 else | |
1062 dmin= RENAME(var_diamond_search)(s, best, dmin, ref_picture, | |
1063 pred_x, pred_y, penalty_factor, xmin, ymin, xmax, ymax, | |
1064 shift, map, map_generation, size, mv_penalty); | |
948 | 1065 |
936 | 1066 *mx_ptr= best[0]; |
1067 *my_ptr= best[1]; | |
1068 | |
1069 // printf("%d %d %d \n", best[0], best[1], dmin); | |
1070 return dmin; | |
1071 } | |
1072 #endif /* !CMP_DIRECT */ |