Mercurial > libavcodec.hg
comparison h264_loopfilter.c @ 10909:f4cf3960b8c6 libavcodec
Reorganize how values are stored in h->non_zero_count.
~1% faster
author | michael |
---|---|
date | Sun, 17 Jan 2010 23:44:23 +0000 |
parents | 1b5fba731e24 |
children | 7cecaa3a6b38 |
comparison
equal
deleted
inserted
replaced
10908:28840dfd4b52 | 10909:f4cf3960b8c6 |
---|---|
470 ) { | 470 ) { |
471 // This is a special case in the norm where the filtering must | 471 // This is a special case in the norm where the filtering must |
472 // be done twice (one each of the field) even if we are in a | 472 // be done twice (one each of the field) even if we are in a |
473 // frame macroblock. | 473 // frame macroblock. |
474 // | 474 // |
475 static const int nnz_idx[4] = {4,5,6,3}; | |
476 unsigned int tmp_linesize = 2 * linesize; | 475 unsigned int tmp_linesize = 2 * linesize; |
477 unsigned int tmp_uvlinesize = 2 * uvlinesize; | 476 unsigned int tmp_uvlinesize = 2 * uvlinesize; |
478 int mbn_xy = mb_xy - 2 * s->mb_stride; | 477 int mbn_xy = mb_xy - 2 * s->mb_stride; |
479 int qp; | 478 int qp; |
480 int i, j; | 479 int i, j; |
486 bS[0] = bS[1] = bS[2] = bS[3] = 3; | 485 bS[0] = bS[1] = bS[2] = bS[3] = 3; |
487 } else { | 486 } else { |
488 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy]; | 487 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy]; |
489 for( i = 0; i < 4; i++ ) { | 488 for( i = 0; i < 4; i++ ) { |
490 if( h->non_zero_count_cache[scan8[0]+i] != 0 || | 489 if( h->non_zero_count_cache[scan8[0]+i] != 0 || |
491 mbn_nnz[nnz_idx[i]] != 0 ) | 490 mbn_nnz[i+4+3*8] != 0 ) |
492 bS[i] = 2; | 491 bS[i] = 2; |
493 else | 492 else |
494 bS[i] = 1; | 493 bS[i] = 1; |
495 } | 494 } |
496 } | 495 } |
661 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh) | 660 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh) |
662 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){ | 661 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){ |
663 return; | 662 return; |
664 } | 663 } |
665 } | 664 } |
666 | |
667 h->non_zero_count_cache[7+8*1]=h->non_zero_count[mb_xy][0]; | |
668 h->non_zero_count_cache[7+8*2]=h->non_zero_count[mb_xy][1]; | |
669 h->non_zero_count_cache[7+8*3]=h->non_zero_count[mb_xy][2]; | |
670 h->non_zero_count_cache[7+8*4]=h->non_zero_count[mb_xy][3]; | |
671 h->non_zero_count_cache[4+8*4]=h->non_zero_count[mb_xy][4]; | |
672 h->non_zero_count_cache[5+8*4]=h->non_zero_count[mb_xy][5]; | |
673 h->non_zero_count_cache[6+8*4]=h->non_zero_count[mb_xy][6]; | |
674 | |
675 h->non_zero_count_cache[1+8*2]=h->non_zero_count[mb_xy][9]; | |
676 h->non_zero_count_cache[2+8*2]=h->non_zero_count[mb_xy][8]; | |
677 h->non_zero_count_cache[2+8*1]=h->non_zero_count[mb_xy][7]; | |
678 | |
679 h->non_zero_count_cache[1+8*5]=h->non_zero_count[mb_xy][12]; | |
680 h->non_zero_count_cache[2+8*5]=h->non_zero_count[mb_xy][11]; | |
681 h->non_zero_count_cache[2+8*4]=h->non_zero_count[mb_xy][10]; | |
682 | |
683 h->non_zero_count_cache[6+8*1]=h->non_zero_count[mb_xy][13]; | |
684 h->non_zero_count_cache[6+8*2]=h->non_zero_count[mb_xy][14]; | |
685 h->non_zero_count_cache[6+8*3]=h->non_zero_count[mb_xy][15]; | |
686 h->non_zero_count_cache[5+8*1]=h->non_zero_count[mb_xy][16]; | |
687 h->non_zero_count_cache[5+8*2]=h->non_zero_count[mb_xy][17]; | |
688 h->non_zero_count_cache[5+8*3]=h->non_zero_count[mb_xy][18]; | |
689 h->non_zero_count_cache[4+8*1]=h->non_zero_count[mb_xy][19]; | |
690 h->non_zero_count_cache[4+8*2]=h->non_zero_count[mb_xy][20]; | |
691 h->non_zero_count_cache[4+8*3]=h->non_zero_count[mb_xy][21]; | |
692 | |
693 h->non_zero_count_cache[1+8*1]=h->non_zero_count[mb_xy][22]; | |
694 h->non_zero_count_cache[1+8*4]=h->non_zero_count[mb_xy][23]; | |
695 | |
696 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs | 665 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs |
697 if(!h->pps.cabac && h->pps.transform_8x8_mode){ | 666 if(!h->pps.cabac && h->pps.transform_8x8_mode){ |
698 int top_type, left_type[2]; | 667 int top_type, left_type[2]; |
699 top_type = s->current_picture.mb_type[h->top_mb_xy] ; | 668 top_type = s->current_picture.mb_type[h->top_mb_xy] ; |
700 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]]; | 669 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]]; |
760 bS[i] = 4; | 729 bS[i] = 4; |
761 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 || | 730 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 || |
762 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ? | 731 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ? |
763 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2)) | 732 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2)) |
764 : | 733 : |
765 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2])) | 734 h->non_zero_count[mbn_xy][7+(MB_FIELD ? (i&3) : (i>>2)+(mb_y&1)*2)*8])) |
766 bS[i] = 2; | 735 bS[i] = 2; |
767 else | 736 else |
768 bS[i] = 1; | 737 bS[i] = 1; |
769 } | 738 } |
770 } | 739 } |