comparison h264_direct.c @ 11136:a0c52e951a29 libavcodec

Pack MVs together from the begin for spatial direct, this simplifies the code and is a bit faster (5-10 cpu cycles depending on what is meassured).
author michael
date Sat, 13 Feb 2010 02:20:56 +0000
parents f85c5cb2c402
children 510950eafba5
comparison
equal deleted inserted replaced
11135:af73d215879d 11136:a0c52e951a29
214 } 214 }
215 } 215 }
216 216
217 if(h->direct_spatial_mv_pred){ 217 if(h->direct_spatial_mv_pred){
218 int ref[2]; 218 int ref[2];
219 int mv[2][2]; 219 int mv[2];
220 int list; 220 int list;
221 221
222 /* ref = min(neighbors) */ 222 /* ref = min(neighbors) */
223 for(list=0; list<2; list++){ 223 for(list=0; list<2; list++){
224 int left_ref = h->ref_cache[list][scan8[0] - 1]; 224 int left_ref = h->ref_cache[list][scan8[0] - 1];
235 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; 235 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ];
236 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; 236 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ];
237 237
238 int match_count= (left_ref==ref[list]) + (top_ref==ref[list]) + (refc==ref[list]); 238 int match_count= (left_ref==ref[list]) + (top_ref==ref[list]) + (refc==ref[list]);
239 if(match_count > 1){ //most common 239 if(match_count > 1){ //most common
240 mv[list][0]= mid_pred(A[0], B[0], C[0]); 240 mv[list]= (mid_pred(A[0], B[0], C[0])&0xFFFF)
241 mv[list][1]= mid_pred(A[1], B[1], C[1]); 241 +(mid_pred(A[1], B[1], C[1])<<16);
242 }else { 242 }else {
243 assert(match_count==1); 243 assert(match_count==1);
244 if(left_ref==ref[list]){ 244 if(left_ref==ref[list]){
245 mv[list][0]= A[0]; 245 mv[list]= *(uint32_t*)A;
246 mv[list][1]= A[1];
247 }else if(top_ref==ref[list]){ 246 }else if(top_ref==ref[list]){
248 mv[list][0]= B[0]; 247 mv[list]= *(uint32_t*)B;
249 mv[list][1]= B[1];
250 }else{ 248 }else{
251 mv[list][0]= C[0]; 249 mv[list]= *(uint32_t*)C;
252 mv[list][1]= C[1];
253 } 250 }
254 } 251 }
255 }else{ 252 }else{
256 int mask= ~(MB_TYPE_L0 << (2*list)); 253 int mask= ~(MB_TYPE_L0 << (2*list));
257 mv[list][0] = mv[list][1] = 0; 254 mv[list] = 0;
258 ref[list] = -1; 255 ref[list] = -1;
259 if(!is_b8x8) 256 if(!is_b8x8)
260 *mb_type &= mask; 257 *mb_type &= mask;
261 sub_mb_type &= mask; 258 sub_mb_type &= mask;
262 } 259 }
286 if(!IS_INTRA(mb_type_col[y8]) && !h->ref_list[1][0].long_ref 283 if(!IS_INTRA(mb_type_col[y8]) && !h->ref_list[1][0].long_ref
287 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1) 284 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1)
288 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){ 285 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){
289 a=b=0; 286 a=b=0;
290 if(ref[0] > 0) 287 if(ref[0] > 0)
291 a= pack16to32(mv[0][0],mv[0][1]); 288 a= mv[0];
292 if(ref[1] > 0) 289 if(ref[1] > 0)
293 b= pack16to32(mv[1][0],mv[1][1]); 290 b= mv[1];
294 n++; 291 n++;
295 }else{ 292 }else{
296 a= pack16to32(mv[0][0],mv[0][1]); 293 a= mv[0];
297 b= pack16to32(mv[1][0],mv[1][1]); 294 b= mv[1];
298 } 295 }
299 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4); 296 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4);
300 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4); 297 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4);
301 } 298 }
302 if(!is_b8x8 && !(n&3)) 299 if(!is_b8x8 && !(n&3))
310 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1) 307 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1)
311 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1 308 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1
312 && h->x264_build>33U))){ 309 && h->x264_build>33U))){
313 a=b=0; 310 a=b=0;
314 if(ref[0] > 0) 311 if(ref[0] > 0)
315 a= pack16to32(mv[0][0],mv[0][1]); 312 a= mv[0];
316 if(ref[1] > 0) 313 if(ref[1] > 0)
317 b= pack16to32(mv[1][0],mv[1][1]); 314 b= mv[1];
318 }else{ 315 }else{
319 a= pack16to32(mv[0][0],mv[0][1]); 316 a= mv[0];
320 b= pack16to32(mv[1][0],mv[1][1]); 317 b= mv[1];
321 } 318 }
322 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4); 319 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4);
323 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4); 320 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4);
324 }else{ 321 }else{
325 int n=0; 322 int n=0;
329 326
330 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) 327 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8]))
331 continue; 328 continue;
332 h->sub_mb_type[i8] = sub_mb_type; 329 h->sub_mb_type[i8] = sub_mb_type;
333 330
334 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4); 331 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, mv[0], 4);
335 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4); 332 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, mv[1], 4);
336 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1); 333 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1);
337 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1); 334 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1);
338 335
339 /* col_zero_flag */ 336 /* col_zero_flag */
340 if(!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref && ( l1ref0[x8 + y8*b8_stride] == 0 337 if(!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref && ( l1ref0[x8 + y8*b8_stride] == 0