comparison h264_loopfilter.c @ 11034:fd5921186064 libavcodec

Make the fast loop filter path work with unavailable left MBs. This prevents the issue with having to switch between slow and fast code paths in each row. 0.5% faster loopfilter for cathedral
author michael
date Thu, 28 Jan 2010 02:15:25 +0000
parents b5577677b97d
children 4debec8a15fa
comparison
equal deleted inserted replaced
11033:b5577677b97d 11034:fd5921186064
306 } 306 }
307 307
308 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) { 308 void ff_h264_filter_mb_fast( H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize) {
309 MpegEncContext * const s = &h->s; 309 MpegEncContext * const s = &h->s;
310 int mb_xy; 310 int mb_xy;
311 int mb_type; 311 int mb_type, left_type;
312 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh; 312 int qp, qp0, qp1, qpc, qpc0, qpc1, qp_thresh;
313 313
314 mb_xy = h->mb_xy; 314 mb_xy = h->mb_xy;
315 315
316 if(!h->top_type || !h->left_type[0] || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) { 316 if(!h->top_type || !s->dsp.h264_loop_filter_strength || h->pps.chroma_qp_diff) {
317 ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize); 317 ff_h264_filter_mb(h, mb_x, mb_y, img_y, img_cb, img_cr, linesize, uvlinesize);
318 return; 318 return;
319 } 319 }
320 assert(!FRAME_MBAFF); 320 assert(!FRAME_MBAFF);
321 left_type= h->left_type[0];
321 322
322 mb_type = s->current_picture.mb_type[mb_xy]; 323 mb_type = s->current_picture.mb_type[mb_xy];
323 qp = s->current_picture.qscale_table[mb_xy]; 324 qp = s->current_picture.qscale_table[mb_xy];
324 qp0 = s->current_picture.qscale_table[mb_xy-1]; 325 qp0 = s->current_picture.qscale_table[mb_xy-1];
325 qp1 = s->current_picture.qscale_table[h->top_mb_xy]; 326 qp1 = s->current_picture.qscale_table[h->top_mb_xy];
338 if( IS_INTRA(mb_type) ) { 339 if( IS_INTRA(mb_type) ) {
339 int16_t bS4[4] = {4,4,4,4}; 340 int16_t bS4[4] = {4,4,4,4};
340 int16_t bS3[4] = {3,3,3,3}; 341 int16_t bS3[4] = {3,3,3,3};
341 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4; 342 int16_t *bSH = FIELD_PICTURE ? bS3 : bS4;
342 if( IS_8x8DCT(mb_type) ) { 343 if( IS_8x8DCT(mb_type) ) {
344 if(left_type)
343 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); 345 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
344 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); 346 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
345 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); 347 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
346 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); 348 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
347 } else { 349 } else {
350 if(left_type)
348 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h); 351 filter_mb_edgev( &img_y[4*0], linesize, bS4, qp0, h);
349 filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h); 352 filter_mb_edgev( &img_y[4*1], linesize, bS3, qp, h);
350 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h); 353 filter_mb_edgev( &img_y[4*2], linesize, bS3, qp, h);
351 filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h); 354 filter_mb_edgev( &img_y[4*3], linesize, bS3, qp, h);
352 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h); 355 filter_mb_edgeh( &img_y[4*0*linesize], linesize, bSH, qp1, h);
353 filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h); 356 filter_mb_edgeh( &img_y[4*1*linesize], linesize, bS3, qp, h);
354 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h); 357 filter_mb_edgeh( &img_y[4*2*linesize], linesize, bS3, qp, h);
355 filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h); 358 filter_mb_edgeh( &img_y[4*3*linesize], linesize, bS3, qp, h);
356 } 359 }
357 filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h); 360 if(left_type){
361 filter_mb_edgecv( &img_cb[2*0], uvlinesize, bS4, qpc0, h);
362 filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
363 }
358 filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h); 364 filter_mb_edgecv( &img_cb[2*2], uvlinesize, bS3, qpc, h);
359 filter_mb_edgecv( &img_cr[2*0], uvlinesize, bS4, qpc0, h);
360 filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h); 365 filter_mb_edgecv( &img_cr[2*2], uvlinesize, bS3, qpc, h);
361 filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); 366 filter_mb_edgech( &img_cb[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
362 filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h); 367 filter_mb_edgech( &img_cb[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
363 filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h); 368 filter_mb_edgech( &img_cr[2*0*uvlinesize], uvlinesize, bSH, qpc1, h);
364 filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h); 369 filter_mb_edgech( &img_cr[2*2*uvlinesize], uvlinesize, bS3, qpc, h);
393 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ 398 filter_mb_edgec##hv( &img_cb[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
394 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\ 399 filter_mb_edgec##hv( &img_cr[2*edge*(dir?uvlinesize:1)], uvlinesize, bS[dir][edge], edge ? qpc : qpc##dir, h );\
395 }\ 400 }\
396 } 401 }
397 if( edges == 1 ) { 402 if( edges == 1 ) {
403 if(left_type)
398 FILTER(v,0,0); 404 FILTER(v,0,0);
399 FILTER(h,1,0); 405 FILTER(h,1,0);
400 } else if( IS_8x8DCT(mb_type) ) { 406 } else if( IS_8x8DCT(mb_type) ) {
407 if(left_type)
401 FILTER(v,0,0); 408 FILTER(v,0,0);
402 FILTER(v,0,2); 409 FILTER(v,0,2);
403 FILTER(h,1,0); 410 FILTER(h,1,0);
404 FILTER(h,1,2); 411 FILTER(h,1,2);
405 } else { 412 } else {
413 if(left_type)
406 FILTER(v,0,0); 414 FILTER(v,0,0);
407 FILTER(v,0,1); 415 FILTER(v,0,1);
408 FILTER(v,0,2); 416 FILTER(v,0,2);
409 FILTER(v,0,3); 417 FILTER(v,0,3);
410 FILTER(h,1,0); 418 FILTER(h,1,0);