comparison h264_loopfilter.c @ 10909:f4cf3960b8c6 libavcodec

Reorganize how values are stored in h->non_zero_count. ~1% faster
author michael
date Sun, 17 Jan 2010 23:44:23 +0000
parents 1b5fba731e24
children 7cecaa3a6b38
comparison
equal deleted inserted replaced
10908:28840dfd4b52 10909:f4cf3960b8c6
470 ) { 470 ) {
471 // This is a special case in the norm where the filtering must 471 // This is a special case in the norm where the filtering must
472 // be done twice (one each of the field) even if we are in a 472 // be done twice (one each of the field) even if we are in a
473 // frame macroblock. 473 // frame macroblock.
474 // 474 //
475 static const int nnz_idx[4] = {4,5,6,3};
476 unsigned int tmp_linesize = 2 * linesize; 475 unsigned int tmp_linesize = 2 * linesize;
477 unsigned int tmp_uvlinesize = 2 * uvlinesize; 476 unsigned int tmp_uvlinesize = 2 * uvlinesize;
478 int mbn_xy = mb_xy - 2 * s->mb_stride; 477 int mbn_xy = mb_xy - 2 * s->mb_stride;
479 int qp; 478 int qp;
480 int i, j; 479 int i, j;
486 bS[0] = bS[1] = bS[2] = bS[3] = 3; 485 bS[0] = bS[1] = bS[2] = bS[3] = 3;
487 } else { 486 } else {
488 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy]; 487 const uint8_t *mbn_nnz = h->non_zero_count[mbn_xy];
489 for( i = 0; i < 4; i++ ) { 488 for( i = 0; i < 4; i++ ) {
490 if( h->non_zero_count_cache[scan8[0]+i] != 0 || 489 if( h->non_zero_count_cache[scan8[0]+i] != 0 ||
491 mbn_nnz[nnz_idx[i]] != 0 ) 490 mbn_nnz[i+4+3*8] != 0 )
492 bS[i] = 2; 491 bS[i] = 2;
493 else 492 else
494 bS[i] = 1; 493 bS[i] = 1;
495 } 494 }
496 } 495 }
661 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh) 660 && (mb_x == 0 || ((qp + s->current_picture.qscale_table[mb_xy-1] + 1)>>1) <= qp_thresh)
662 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){ 661 && (h->top_mb_xy < 0 || ((qp + s->current_picture.qscale_table[h->top_mb_xy] + 1)>>1) <= qp_thresh)){
663 return; 662 return;
664 } 663 }
665 } 664 }
666
667 h->non_zero_count_cache[7+8*1]=h->non_zero_count[mb_xy][0];
668 h->non_zero_count_cache[7+8*2]=h->non_zero_count[mb_xy][1];
669 h->non_zero_count_cache[7+8*3]=h->non_zero_count[mb_xy][2];
670 h->non_zero_count_cache[7+8*4]=h->non_zero_count[mb_xy][3];
671 h->non_zero_count_cache[4+8*4]=h->non_zero_count[mb_xy][4];
672 h->non_zero_count_cache[5+8*4]=h->non_zero_count[mb_xy][5];
673 h->non_zero_count_cache[6+8*4]=h->non_zero_count[mb_xy][6];
674
675 h->non_zero_count_cache[1+8*2]=h->non_zero_count[mb_xy][9];
676 h->non_zero_count_cache[2+8*2]=h->non_zero_count[mb_xy][8];
677 h->non_zero_count_cache[2+8*1]=h->non_zero_count[mb_xy][7];
678
679 h->non_zero_count_cache[1+8*5]=h->non_zero_count[mb_xy][12];
680 h->non_zero_count_cache[2+8*5]=h->non_zero_count[mb_xy][11];
681 h->non_zero_count_cache[2+8*4]=h->non_zero_count[mb_xy][10];
682
683 h->non_zero_count_cache[6+8*1]=h->non_zero_count[mb_xy][13];
684 h->non_zero_count_cache[6+8*2]=h->non_zero_count[mb_xy][14];
685 h->non_zero_count_cache[6+8*3]=h->non_zero_count[mb_xy][15];
686 h->non_zero_count_cache[5+8*1]=h->non_zero_count[mb_xy][16];
687 h->non_zero_count_cache[5+8*2]=h->non_zero_count[mb_xy][17];
688 h->non_zero_count_cache[5+8*3]=h->non_zero_count[mb_xy][18];
689 h->non_zero_count_cache[4+8*1]=h->non_zero_count[mb_xy][19];
690 h->non_zero_count_cache[4+8*2]=h->non_zero_count[mb_xy][20];
691 h->non_zero_count_cache[4+8*3]=h->non_zero_count[mb_xy][21];
692
693 h->non_zero_count_cache[1+8*1]=h->non_zero_count[mb_xy][22];
694 h->non_zero_count_cache[1+8*4]=h->non_zero_count[mb_xy][23];
695
696 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs 665 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
697 if(!h->pps.cabac && h->pps.transform_8x8_mode){ 666 if(!h->pps.cabac && h->pps.transform_8x8_mode){
698 int top_type, left_type[2]; 667 int top_type, left_type[2];
699 top_type = s->current_picture.mb_type[h->top_mb_xy] ; 668 top_type = s->current_picture.mb_type[h->top_mb_xy] ;
700 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]]; 669 left_type[0] = s->current_picture.mb_type[h->left_mb_xy[0]];
760 bS[i] = 4; 729 bS[i] = 4;
761 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 || 730 else if( h->non_zero_count_cache[12+8*(i>>1)] != 0 ||
762 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ? 731 ((!h->pps.cabac && IS_8x8DCT(s->current_picture.mb_type[mbn_xy])) ?
763 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2)) 732 (h->cbp_table[mbn_xy] & ((MB_FIELD ? (i&2) : (mb_y&1)) ? 8 : 2))
764 : 733 :
765 h->non_zero_count[mbn_xy][MB_FIELD ? i&3 : (i>>2)+(mb_y&1)*2])) 734 h->non_zero_count[mbn_xy][7+(MB_FIELD ? (i&3) : (i>>2)+(mb_y&1)*2)*8]))
766 bS[i] = 2; 735 bS[i] = 2;
767 else 736 else
768 bS[i] = 1; 737 bS[i] = 1;
769 } 738 }
770 } 739 }