Mercurial > libavcodec.hg
comparison h264_loopfilter.c @ 10906:1b5fba731e24 libavcodec
Rearchitecturing the stiched up goose part 1
Run loop filter per row instead of per MB, this also should make it
much easier to switch to per frame filtering and also doing so in a
seperate thread in the future if some volunteer wants to try.
Overall decoding speedup of 1.7% (single thread on pentium dual / cathedral sample)
This change also allows some optimizations to be tried that would not have
been possible before.
author | michael |
---|---|
date | Sun, 17 Jan 2010 20:35:55 +0000 |
parents | b30aef0c693f |
children | f4cf3960b8c6 |
comparison
equal
deleted
inserted
replaced
10905:06d20a468d1e | 10906:1b5fba731e24 |
---|---|
618 | 618 |
619 /* Filter edge */ | 619 /* Filter edge */ |
620 // Do not use s->qscale as luma quantizer because it has not the same | 620 // Do not use s->qscale as luma quantizer because it has not the same |
621 // value in IPCM macroblocks. | 621 // value in IPCM macroblocks. |
622 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; | 622 qp = ( s->current_picture.qscale_table[mb_xy] + s->current_picture.qscale_table[mbn_xy] + 1 ) >> 1; |
623 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp, s->current_picture.qscale_table[mbn_xy]); | 623 //tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d, QPc:%d, QPcn:%d\n", mb_x, mb_y, dir, edge, qp, h->chroma_qp[0], s->current_picture.qscale_table[mbn_xy]); |
624 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); | 624 tprintf(s->avctx, "filter mb:%d/%d dir:%d edge:%d, QPy:%d ls:%d uvls:%d", mb_x, mb_y, dir, edge, qp, linesize, uvlinesize); |
625 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } | 625 //{ int i; for (i = 0; i < 4; i++) tprintf(s->avctx, " bS[%d]:%d", i, bS[i]); tprintf(s->avctx, "\n"); } |
626 if( dir == 0 ) { | 626 if( dir == 0 ) { |
627 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp ); | 627 filter_mb_edgev( h, &img_y[4*edge], linesize, bS, qp ); |
628 if( (edge&1) == 0 ) { | 628 if( (edge&1) == 0 ) { |
648 const int mb_xy= mb_x + mb_y*s->mb_stride; | 648 const int mb_xy= mb_x + mb_y*s->mb_stride; |
649 const int mb_type = s->current_picture.mb_type[mb_xy]; | 649 const int mb_type = s->current_picture.mb_type[mb_xy]; |
650 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4; | 650 const int mvy_limit = IS_INTERLACED(mb_type) ? 2 : 4; |
651 int first_vertical_edge_done = 0; | 651 int first_vertical_edge_done = 0; |
652 av_unused int dir; | 652 av_unused int dir; |
653 int list; | |
653 | 654 |
654 //for sufficiently low qp, filtering wouldn't do anything | 655 //for sufficiently low qp, filtering wouldn't do anything |
655 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp | 656 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp |
656 if(!FRAME_MBAFF){ | 657 if(!FRAME_MBAFF){ |
657 int qp_thresh = h->qp_thresh; | 658 int qp_thresh = h->qp_thresh; |
661 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){ | 662 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){ |
662 return; | 663 return; |
663 } | 664 } |
664 } | 665 } |
665 | 666 |
667 h->non_zero_count_cache[7+8*1]=h->non_zero_count[mb_xy][0]; | |
668 h->non_zero_count_cache[7+8*2]=h->non_zero_count[mb_xy][1]; | |
669 h->non_zero_count_cache[7+8*3]=h->non_zero_count[mb_xy][2]; | |
670 h->non_zero_count_cache[7+8*4]=h->non_zero_count[mb_xy][3]; | |
671 h->non_zero_count_cache[4+8*4]=h->non_zero_count[mb_xy][4]; | |
672 h->non_zero_count_cache[5+8*4]=h->non_zero_count[mb_xy][5]; | |
673 h->non_zero_count_cache[6+8*4]=h->non_zero_count[mb_xy][6]; | |
674 | |
675 h->non_zero_count_cache[1+8*2]=h->non_zero_count[mb_xy][9]; | |
676 h->non_zero_count_cache[2+8*2]=h->non_zero_count[mb_xy][8]; | |
677 h->non_zero_count_cache[2+8*1]=h->non_zero_count[mb_xy][7]; | |
678 | |
679 h->non_zero_count_cache[1+8*5]=h->non_zero_count[mb_xy][12]; | |
680 h->non_zero_count_cache[2+8*5]=h->non_zero_count[mb_xy][11]; | |
681 h->non_zero_count_cache[2+8*4]=h->non_zero_count[mb_xy][10]; | |
682 | |
683 h->non_zero_count_cache[6+8*1]=h->non_zero_count[mb_xy][13]; | |
684 h->non_zero_count_cache[6+8*2]=h->non_zero_count[mb_xy][14]; | |
685 h->non_zero_count_cache[6+8*3]=h->non_zero_count[mb_xy][15]; | |
686 h->non_zero_count_cache[5+8*1]=h->non_zero_count[mb_xy][16]; | |
687 h->non_zero_count_cache[5+8*2]=h->non_zero_count[mb_xy][17]; | |
688 h->non_zero_count_cache[5+8*3]=h->non_zero_count[mb_xy][18]; | |
689 h->non_zero_count_cache[4+8*1]=h->non_zero_count[mb_xy][19]; | |
690 h->non_zero_count_cache[4+8*2]=h->non_zero_count[mb_xy][20]; | |
691 h->non_zero_count_cache[4+8*3]=h->non_zero_count[mb_xy][21]; | |
692 | |
693 h->non_zero_count_cache[1+8*1]=h->non_zero_count[mb_xy][22]; | |
694 h->non_zero_count_cache[1+8*4]=h->non_zero_count[mb_xy][23]; | |
695 | |
666 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs | 696 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs |
667 if(!h->pps.cabac && h->pps.transform_8x8_mode){ | 697 if(!h->pps.cabac && h->pps.transform_8x8_mode){ |
668 int top_type, left_type[2]; | 698 int top_type, left_type[2]; |
669 top_type = s->current_picture.mb_type[h->top_mb_xy] ; | 699 top_type = s->current_picture.mb_type[h->top_mb_xy] ; |
670 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]]; | 700 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]]; |
685 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF | 715 h->non_zero_count_cache[3+8*4]= h->cbp_table[h->left_mb_xy[1]]&8; //FIXME check MBAFF |
686 } | 716 } |
687 | 717 |
688 if(IS_8x8DCT(mb_type)){ | 718 if(IS_8x8DCT(mb_type)){ |
689 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= | 719 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= |
690 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1; | 720 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp_table[mb_xy] & 1; |
691 | 721 |
692 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= | 722 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= |
693 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2; | 723 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp_table[mb_xy] & 2; |
694 | 724 |
695 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= | 725 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= |
696 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4; | 726 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp_table[mb_xy] & 4; |
697 | 727 |
698 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= | 728 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= |
699 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8; | 729 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp_table[mb_xy] & 8; |
700 } | 730 } |
701 } | 731 } |
702 | 732 |
703 if (FRAME_MBAFF | 733 if (FRAME_MBAFF |
704 // left mb is in picture | 734 // left mb is in picture |