comparison h264.c @ 2924:d98f385b3933 libavcodec

25% faster deblocking, 4-6% overall.
author lorenm
date Fri, 28 Oct 2005 06:37:32 +0000
parents 7fa9106be552
children dfdbef4bfdc5
comparison
equal deleted inserted replaced
2923:7fa9106be552 2924:d98f385b3933
6576 /* dir : 0 -> vertical edge, 1 -> horizontal edge */ 6576 /* dir : 0 -> vertical edge, 1 -> horizontal edge */
6577 for( dir = 0; dir < 2; dir++ ) 6577 for( dir = 0; dir < 2; dir++ )
6578 { 6578 {
6579 int edge; 6579 int edge;
6580 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; 6580 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
6581 const int mb_type = s->current_picture.mb_type[mb_xy];
6582 const int mbm_type = s->current_picture.mb_type[mbm_xy];
6581 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0; 6583 int start = h->slice_table[mbm_xy] == 255 ? 1 : 0;
6582 6584
6583 if (first_vertical_edge_done) { 6585 if (first_vertical_edge_done) {
6584 start = 1; 6586 start = 1;
6585 first_vertical_edge_done = 0; 6587 first_vertical_edge_done = 0;
6586 } 6588 }
6587 6589
6588 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy]) 6590 if (h->deblocking_filter==2 && h->slice_table[mbm_xy] != h->slice_table[mb_xy])
6589 start = 1; 6591 start = 1;
6590 6592
6593 const int edges = ((mb_type & mbm_type) & (MB_TYPE_16x16|MB_TYPE_SKIP))
6594 == (MB_TYPE_16x16|MB_TYPE_SKIP) ? 1 : 4;
6595 // how often to recheck mv-based bS when iterating between edges
6596 const int mask_edge = (mb_type & (MB_TYPE_16x16 | (MB_TYPE_16x8 << dir))) ? 3 :
6597 (mb_type & (MB_TYPE_8x16 >> dir)) ? 1 : 0;
6598 // how often to recheck mv-based bS when iterating along each edge
6599 const int mask_par0 = mb_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir));
6600
6591 /* Calculate bS */ 6601 /* Calculate bS */
6592 for( edge = start; edge < 4; edge++ ) { 6602 for( edge = start; edge < edges; edge++ ) {
6593 /* mbn_xy: neighbor macroblock */ 6603 /* mbn_xy: neighbor macroblock */
6594 int mbn_xy = edge > 0 ? mb_xy : mbm_xy; 6604 const int mbn_xy = edge > 0 ? mb_xy : mbm_xy;
6605 const int mbn_type = s->current_picture.mb_type[mbn_xy];
6595 int bS[4]; 6606 int bS[4];
6596 int qp; 6607 int qp;
6597 6608
6598 if( (edge&1) && IS_8x8DCT(s->current_picture.mb_type[mb_xy]) ) 6609 if( (edge&1) && IS_8x8DCT(mb_type) )
6599 continue; 6610 continue;
6600 6611
6601 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0) 6612 if (h->mb_aff_frame && (dir == 1) && (edge == 0) && ((mb_y & 1) == 0)
6602 && !IS_INTERLACED(s->current_picture.mb_type[mb_xy]) 6613 && !IS_INTERLACED(mb_type)
6603 && IS_INTERLACED(s->current_picture.mb_type[mbn_xy]) 6614 && IS_INTERLACED(mbn_type)
6604 ) { 6615 ) {
6605 // This is a special case in the norm where the filtering must 6616 // This is a special case in the norm where the filtering must
6606 // be done twice (one each of the field) even if we are in a 6617 // be done twice (one each of the field) even if we are in a
6607 // frame macroblock. 6618 // frame macroblock.
6608 // 6619 //
6610 unsigned int tmp_uvlinesize = 2 * uvlinesize; 6621 unsigned int tmp_uvlinesize = 2 * uvlinesize;
6611 int mbn_xy = mb_xy - 2 * s->mb_stride; 6622 int mbn_xy = mb_xy - 2 * s->mb_stride;
6612 int qp, chroma_qp; 6623 int qp, chroma_qp;
6613 6624
6614 // first filtering 6625 // first filtering
6615 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) || 6626 if( IS_INTRA(mb_type) ||
6616 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) { 6627 IS_INTRA(s->current_picture.mb_type[mbn_xy]) ) {
6617 bS[0] = bS[1] = bS[2] = bS[3] = 3; 6628 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6618 } else { 6629 } else {
6619 // TODO 6630 // TODO
6620 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n"); 6631 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6621 } 6632 }
6631 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp ); 6642 filter_mb_edgech( h, &img_cb[0], tmp_uvlinesize, bS, chroma_qp );
6632 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp ); 6643 filter_mb_edgech( h, &img_cr[0], tmp_uvlinesize, bS, chroma_qp );
6633 6644
6634 // second filtering 6645 // second filtering
6635 mbn_xy += s->mb_stride; 6646 mbn_xy += s->mb_stride;
6636 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) || 6647 if( IS_INTRA(mb_type) ||
6637 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) { 6648 IS_INTRA(mbn_type) ) {
6638 bS[0] = bS[1] = bS[2] = bS[3] = 3; 6649 bS[0] = bS[1] = bS[2] = bS[3] = 3;
6639 } else { 6650 } else {
6640 // TODO 6651 // TODO
6641 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n"); 6652 av_log(h->s.avctx, AV_LOG_ERROR, "both non intra (TODO)\n");
6642 } 6653 }
6651 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1; 6662 get_chroma_qp( h->pps.chroma_qp_index_offset, s->current_picture.qscale_table[mbn_xy] ) + 1 ) >> 1;
6652 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp ); 6663 filter_mb_edgech( h, &img_cb[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6653 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp ); 6664 filter_mb_edgech( h, &img_cr[uvlinesize], tmp_uvlinesize, bS, chroma_qp );
6654 continue; 6665 continue;
6655 } 6666 }
6656 if( IS_INTRA( s->current_picture.mb_type[mb_xy] ) || 6667 if( IS_INTRA(mb_type) ||
6657 IS_INTRA( s->current_picture.mb_type[mbn_xy] ) ) { 6668 IS_INTRA(mbn_type) ) {
6658 int value; 6669 int value;
6659 if (edge == 0) { 6670 if (edge == 0) {
6660 if ( (!IS_INTERLACED(s->current_picture.mb_type[mb_xy]) && !IS_INTERLACED(s->current_picture.mb_type[mbm_xy])) 6671 if ( (!IS_INTERLACED(mb_type) && !IS_INTERLACED(mbm_type))
6661 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0)) 6672 || ((h->mb_aff_frame || (s->picture_structure != PICT_FRAME)) && (dir == 0))
6662 ) { 6673 ) {
6663 value = 4; 6674 value = 4;
6664 } else { 6675 } else {
6665 value = 3; 6676 value = 3;
6667 } else { 6678 } else {
6668 value = 3; 6679 value = 3;
6669 } 6680 }
6670 bS[0] = bS[1] = bS[2] = bS[3] = value; 6681 bS[0] = bS[1] = bS[2] = bS[3] = value;
6671 } else { 6682 } else {
6672 int i; 6683 int i, l;
6684 int mv_done;
6685
6686 if( edge & mask_edge ) {
6687 bS[0] = bS[1] = bS[2] = bS[3] = 0;
6688 mv_done = 1;
6689 }
6690 else if( mask_par0 && (edge || (mbn_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
6691 int b_idx= 8 + 4 + edge * (dir ? 8:1);
6692 int bn_idx= b_idx - (dir ? 8:1);
6693 int v = 0;
6694 for( l = 0; !v && l < 1 + (h->slice_type == B_TYPE); l++ ) {
6695 v |= ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6696 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6697 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4;
6698 }
6699 bS[0] = bS[1] = bS[2] = bS[3] = v;
6700 mv_done = 1;
6701 }
6702 else
6703 mv_done = 0;
6704
6673 for( i = 0; i < 4; i++ ) { 6705 for( i = 0; i < 4; i++ ) {
6674 int x = dir == 0 ? edge : i; 6706 int x = dir == 0 ? edge : i;
6675 int y = dir == 0 ? i : edge; 6707 int y = dir == 0 ? i : edge;
6676 int b_idx= 8 + 4 + x + 8*y; 6708 int b_idx= 8 + 4 + x + 8*y;
6677 int bn_idx= b_idx - (dir ? 8:1); 6709 int bn_idx= b_idx - (dir ? 8:1);
6678 6710
6679 if( h->non_zero_count_cache[b_idx] != 0 || 6711 if( h->non_zero_count_cache[b_idx] != 0 ||
6680 h->non_zero_count_cache[bn_idx] != 0 ) { 6712 h->non_zero_count_cache[bn_idx] != 0 ) {
6681 bS[i] = 2; 6713 bS[i] = 2;
6682 } 6714 }
6683 else 6715 else if(!mv_done)
6684 { 6716 {
6685 int l;
6686 bS[i] = 0; 6717 bS[i] = 0;
6687 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) { 6718 for( l = 0; l < 1 + (h->slice_type == B_TYPE); l++ ) {
6688 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] || 6719 if( ref2frm[h->ref_cache[l][b_idx]+2] != ref2frm[h->ref_cache[l][bn_idx]+2] ||
6689 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 || 6720 ABS( h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] ) >= 4 ||
6690 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) { 6721 ABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= 4 ) {