Mercurial > libavcodec.hg
comparison h264_direct.c @ 11136:a0c52e951a29 libavcodec
Pack MVs together from the begin for spatial direct, this simplifies the code
and is a bit faster (5-10 cpu cycles depending on what is meassured).
author | michael |
---|---|
date | Sat, 13 Feb 2010 02:20:56 +0000 |
parents | f85c5cb2c402 |
children | 510950eafba5 |
comparison
equal
deleted
inserted
replaced
11135:af73d215879d | 11136:a0c52e951a29 |
---|---|
214 } | 214 } |
215 } | 215 } |
216 | 216 |
217 if(h->direct_spatial_mv_pred){ | 217 if(h->direct_spatial_mv_pred){ |
218 int ref[2]; | 218 int ref[2]; |
219 int mv[2][2]; | 219 int mv[2]; |
220 int list; | 220 int list; |
221 | 221 |
222 /* ref = min(neighbors) */ | 222 /* ref = min(neighbors) */ |
223 for(list=0; list<2; list++){ | 223 for(list=0; list<2; list++){ |
224 int left_ref = h->ref_cache[list][scan8[0] - 1]; | 224 int left_ref = h->ref_cache[list][scan8[0] - 1]; |
235 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; | 235 const int16_t * const A= h->mv_cache[list][ scan8[0] - 1 ]; |
236 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; | 236 const int16_t * const B= h->mv_cache[list][ scan8[0] - 8 ]; |
237 | 237 |
238 int match_count= (left_ref==ref[list]) + (top_ref==ref[list]) + (refc==ref[list]); | 238 int match_count= (left_ref==ref[list]) + (top_ref==ref[list]) + (refc==ref[list]); |
239 if(match_count > 1){ //most common | 239 if(match_count > 1){ //most common |
240 mv[list][0]= mid_pred(A[0], B[0], C[0]); | 240 mv[list]= (mid_pred(A[0], B[0], C[0])&0xFFFF) |
241 mv[list][1]= mid_pred(A[1], B[1], C[1]); | 241 +(mid_pred(A[1], B[1], C[1])<<16); |
242 }else { | 242 }else { |
243 assert(match_count==1); | 243 assert(match_count==1); |
244 if(left_ref==ref[list]){ | 244 if(left_ref==ref[list]){ |
245 mv[list][0]= A[0]; | 245 mv[list]= *(uint32_t*)A; |
246 mv[list][1]= A[1]; | |
247 }else if(top_ref==ref[list]){ | 246 }else if(top_ref==ref[list]){ |
248 mv[list][0]= B[0]; | 247 mv[list]= *(uint32_t*)B; |
249 mv[list][1]= B[1]; | |
250 }else{ | 248 }else{ |
251 mv[list][0]= C[0]; | 249 mv[list]= *(uint32_t*)C; |
252 mv[list][1]= C[1]; | |
253 } | 250 } |
254 } | 251 } |
255 }else{ | 252 }else{ |
256 int mask= ~(MB_TYPE_L0 << (2*list)); | 253 int mask= ~(MB_TYPE_L0 << (2*list)); |
257 mv[list][0] = mv[list][1] = 0; | 254 mv[list] = 0; |
258 ref[list] = -1; | 255 ref[list] = -1; |
259 if(!is_b8x8) | 256 if(!is_b8x8) |
260 *mb_type &= mask; | 257 *mb_type &= mask; |
261 sub_mb_type &= mask; | 258 sub_mb_type &= mask; |
262 } | 259 } |
286 if(!IS_INTRA(mb_type_col[y8]) && !h->ref_list[1][0].long_ref | 283 if(!IS_INTRA(mb_type_col[y8]) && !h->ref_list[1][0].long_ref |
287 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1) | 284 && ( (l1ref0[xy8] == 0 && FFABS(l1mv0[xy4][0]) <= 1 && FFABS(l1mv0[xy4][1]) <= 1) |
288 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){ | 285 || (l1ref0[xy8] < 0 && l1ref1[xy8] == 0 && FFABS(l1mv1[xy4][0]) <= 1 && FFABS(l1mv1[xy4][1]) <= 1))){ |
289 a=b=0; | 286 a=b=0; |
290 if(ref[0] > 0) | 287 if(ref[0] > 0) |
291 a= pack16to32(mv[0][0],mv[0][1]); | 288 a= mv[0]; |
292 if(ref[1] > 0) | 289 if(ref[1] > 0) |
293 b= pack16to32(mv[1][0],mv[1][1]); | 290 b= mv[1]; |
294 n++; | 291 n++; |
295 }else{ | 292 }else{ |
296 a= pack16to32(mv[0][0],mv[0][1]); | 293 a= mv[0]; |
297 b= pack16to32(mv[1][0],mv[1][1]); | 294 b= mv[1]; |
298 } | 295 } |
299 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4); | 296 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, a, 4); |
300 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4); | 297 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, b, 4); |
301 } | 298 } |
302 if(!is_b8x8 && !(n&3)) | 299 if(!is_b8x8 && !(n&3)) |
310 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1) | 307 && ( (l1ref0[0] == 0 && FFABS(l1mv0[0][0]) <= 1 && FFABS(l1mv0[0][1]) <= 1) |
311 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1 | 308 || (l1ref0[0] < 0 && l1ref1[0] == 0 && FFABS(l1mv1[0][0]) <= 1 && FFABS(l1mv1[0][1]) <= 1 |
312 && h->x264_build>33U))){ | 309 && h->x264_build>33U))){ |
313 a=b=0; | 310 a=b=0; |
314 if(ref[0] > 0) | 311 if(ref[0] > 0) |
315 a= pack16to32(mv[0][0],mv[0][1]); | 312 a= mv[0]; |
316 if(ref[1] > 0) | 313 if(ref[1] > 0) |
317 b= pack16to32(mv[1][0],mv[1][1]); | 314 b= mv[1]; |
318 }else{ | 315 }else{ |
319 a= pack16to32(mv[0][0],mv[0][1]); | 316 a= mv[0]; |
320 b= pack16to32(mv[1][0],mv[1][1]); | 317 b= mv[1]; |
321 } | 318 } |
322 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4); | 319 fill_rectangle(&h->mv_cache[0][scan8[0]], 4, 4, 8, a, 4); |
323 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4); | 320 fill_rectangle(&h->mv_cache[1][scan8[0]], 4, 4, 8, b, 4); |
324 }else{ | 321 }else{ |
325 int n=0; | 322 int n=0; |
329 | 326 |
330 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) | 327 if(is_b8x8 && !IS_DIRECT(h->sub_mb_type[i8])) |
331 continue; | 328 continue; |
332 h->sub_mb_type[i8] = sub_mb_type; | 329 h->sub_mb_type[i8] = sub_mb_type; |
333 | 330 |
334 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, pack16to32(mv[0][0],mv[0][1]), 4); | 331 fill_rectangle(&h->mv_cache[0][scan8[i8*4]], 2, 2, 8, mv[0], 4); |
335 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, pack16to32(mv[1][0],mv[1][1]), 4); | 332 fill_rectangle(&h->mv_cache[1][scan8[i8*4]], 2, 2, 8, mv[1], 4); |
336 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1); | 333 fill_rectangle(&h->ref_cache[0][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[0], 1); |
337 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1); | 334 fill_rectangle(&h->ref_cache[1][scan8[i8*4]], 2, 2, 8, (uint8_t)ref[1], 1); |
338 | 335 |
339 /* col_zero_flag */ | 336 /* col_zero_flag */ |
340 if(!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref && ( l1ref0[x8 + y8*b8_stride] == 0 | 337 if(!IS_INTRA(mb_type_col[0]) && !h->ref_list[1][0].long_ref && ( l1ref0[x8 + y8*b8_stride] == 0 |