comparison h264_loopfilter.c @ 10913:497929e9d912 libavcodec

Perform reference remapping at fill_cache() time instead of in the loop filter. This removes one obstacle of getting ff_h264_filter_mb_fast() bitexact. code is maybe 0.1% faster
author michael
date Mon, 18 Jan 2010 05:15:31 +0000
parents 7cecaa3a6b38
children edd918a4c0b8
comparison
equal deleted inserted replaced
10912:9bdf0bb607a3 10913:497929e9d912
443 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) { 443 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
444 MpegEncContext * const s = &h->s; 444 MpegEncContext * const s = &h->s;
445 int edge; 445 int edge;
446 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; 446 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
447 const int mbm_type = s->current_picture.mb_type[mbm_xy]; 447 const int mbm_type = s->current_picture.mb_type[mbm_xy];
448 int (*ref2frm) [64] = h->ref2frm[ h->slice_num &(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
449 int (*ref2frmm)[64] = h->ref2frm[ h->slice_table[mbm_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
450 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0; 448 int start = h->slice_table[mbm_xy] == 0xFFFF ? 1 : 0;
451 449
452 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP)) 450 const int edges = (mb_type & (MB_TYPE_16x16|MB_TYPE_SKIP))
453 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4; 451 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
454 // how often to recheck mv-based bS when iterating between edges 452 // how often to recheck mv-based bS when iterating between edges
511 /* Calculate bS */ 509 /* Calculate bS */
512 for( edge = start; edge < edges; edge++ ) { 510 for( edge = start; edge < edges; edge++ ) {
513 /* mbn_xy: neighbor macroblock */ 511 /* mbn_xy: neighbor macroblock */
514 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy; 512 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
515 const int mbn_type = s->current_picture.mb_type[mbn_xy]; 513 const int mbn_type = s->current_picture.mb_type[mbn_xy];
516 int (*ref2frmn)[64] = edge > 0 ? ref2frm : ref2frmm;
517 int16_t bS[4]; 514 int16_t bS[4];
518 int qp; 515 int qp;
519 516
520 if( (edge&1) && IS_8x8DCT(mb_type) ) 517 if( (edge&1) && IS_8x8DCT(mb_type) )
521 continue; 518 continue;
551 int b_idx= 8 + 4 + edge * (dir ? 8:1); 548 int b_idx= 8 + 4 + edge * (dir ? 8:1);
552 int bn_idx= b_idx - (dir ? 8:1); 549 int bn_idx= b_idx - (dir ? 8:1);
553 int v = 0; 550 int v = 0;
554 551
555 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) { 552 for( l = 0; !v && l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
556 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] | 553 v |= h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] |
557 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | 554 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U |
558 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit; 555 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
559 } 556 }
560 557
561 if(h->slice_type_nos == FF_B_TYPE && v){ 558 if(h->slice_type_nos == FF_B_TYPE && v){
562 v=0; 559 v=0;
563 for( l = 0; !v && l < 2; l++ ) { 560 for( l = 0; !v && l < 2; l++ ) {
564 int ln= 1-l; 561 int ln= 1-l;
565 v |= ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] | 562 v |= h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] |
566 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | 563 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U |
567 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit; 564 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
568 } 565 }
569 } 566 }
570 567
586 } 583 }
587 else if(!mv_done) 584 else if(!mv_done)
588 { 585 {
589 bS[i] = 0; 586 bS[i] = 0;
590 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) { 587 for( l = 0; l < 1 + (h->slice_type_nos == FF_B_TYPE); l++ ) {
591 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[l][h->ref_cache[l][bn_idx]] | 588 if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] |
592 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U | 589 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U |
593 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) { 590 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
594 bS[i] = 1; 591 bS[i] = 1;
595 break; 592 break;
596 } 593 }
598 595
599 if(h->slice_type_nos == FF_B_TYPE && bS[i]){ 596 if(h->slice_type_nos == FF_B_TYPE && bS[i]){
600 bS[i] = 0; 597 bS[i] = 0;
601 for( l = 0; l < 2; l++ ) { 598 for( l = 0; l < 2; l++ ) {
602 int ln= 1-l; 599 int ln= 1-l;
603 if( ref2frm[l][h->ref_cache[l][b_idx]] != ref2frmn[ln][h->ref_cache[ln][bn_idx]] | 600 if( h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] |
604 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U | 601 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U |
605 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) { 602 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
606 bS[i] = 1; 603 bS[i] = 1;
607 break; 604 break;
608 } 605 }