Mercurial > libavcodec.hg
comparison h264.c @ 5705:c08c9340c7ca libavcodec
factor out dequant table lookup outside loops, gives a 1-2% speed-up
patch by Andreas ªÓman %andreas A olebyn P nu%
original thread:
Date: Sep 24, 2007 12:59 PM
Subject: [FFmpeg-devel] [PATCH] h264: factor out dequant table lookup outside loops
author | gpoirier |
---|---|
date | Mon, 24 Sep 2007 13:01:15 +0000 |
parents | 9a26cb6747a9 |
children | f45228f65e06 |
comparison
equal
deleted
inserted
replaced
5704:cf77b8588f15 | 5705:c08c9340c7ca |
---|---|
5584 } | 5584 } |
5585 s->current_picture.mb_type[mb_xy]= mb_type; | 5585 s->current_picture.mb_type[mb_xy]= mb_type; |
5586 | 5586 |
5587 if( cbp || IS_INTRA16x16( mb_type ) ) { | 5587 if( cbp || IS_INTRA16x16( mb_type ) ) { |
5588 const uint8_t *scan, *scan8x8, *dc_scan; | 5588 const uint8_t *scan, *scan8x8, *dc_scan; |
5589 const uint32_t *qmul; | |
5589 int dqp; | 5590 int dqp; |
5590 | 5591 |
5591 if(IS_INTERLACED(mb_type)){ | 5592 if(IS_INTERLACED(mb_type)){ |
5592 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0; | 5593 scan8x8= s->qscale ? h->field_scan8x8 : h->field_scan8x8_q0; |
5593 scan= s->qscale ? h->field_scan : h->field_scan_q0; | 5594 scan= s->qscale ? h->field_scan : h->field_scan_q0; |
5615 int i; | 5616 int i; |
5616 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" ); | 5617 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 DC\n" ); |
5617 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16); | 5618 decode_cabac_residual( h, h->mb, 0, 0, dc_scan, NULL, 16); |
5618 | 5619 |
5619 if( cbp&15 ) { | 5620 if( cbp&15 ) { |
5621 qmul = h->dequant4_coeff[0][s->qscale]; | |
5620 for( i = 0; i < 16; i++ ) { | 5622 for( i = 0; i < 16; i++ ) { |
5621 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i ); | 5623 //av_log( s->avctx, AV_LOG_ERROR, "INTRA16x16 AC:%d\n", i ); |
5622 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, h->dequant4_coeff[0][s->qscale], 15); | 5624 decode_cabac_residual(h, h->mb + 16*i, 1, i, scan + 1, qmul, 15); |
5623 } | 5625 } |
5624 } else { | 5626 } else { |
5625 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); | 5627 fill_rectangle(&h->non_zero_count_cache[scan8[0]], 4, 4, 8, 0, 1); |
5626 } | 5628 } |
5627 } else { | 5629 } else { |
5629 for( i8x8 = 0; i8x8 < 4; i8x8++ ) { | 5631 for( i8x8 = 0; i8x8 < 4; i8x8++ ) { |
5630 if( cbp & (1<<i8x8) ) { | 5632 if( cbp & (1<<i8x8) ) { |
5631 if( IS_8x8DCT(mb_type) ) { | 5633 if( IS_8x8DCT(mb_type) ) { |
5632 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8, | 5634 decode_cabac_residual(h, h->mb + 64*i8x8, 5, 4*i8x8, |
5633 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64); | 5635 scan8x8, h->dequant8_coeff[IS_INTRA( mb_type ) ? 0:1][s->qscale], 64); |
5634 } else | 5636 } else { |
5637 qmul = h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale]; | |
5635 for( i4x4 = 0; i4x4 < 4; i4x4++ ) { | 5638 for( i4x4 = 0; i4x4 < 4; i4x4++ ) { |
5636 const int index = 4*i8x8 + i4x4; | 5639 const int index = 4*i8x8 + i4x4; |
5637 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); | 5640 //av_log( s->avctx, AV_LOG_ERROR, "Luma4x4: %d\n", index ); |
5638 //START_TIMER | 5641 //START_TIMER |
5639 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, h->dequant4_coeff[IS_INTRA( mb_type ) ? 0:3][s->qscale], 16); | 5642 decode_cabac_residual(h, h->mb + 16*index, 2, index, scan, qmul, 16); |
5640 //STOP_TIMER("decode_residual") | 5643 //STOP_TIMER("decode_residual") |
5644 } | |
5641 } | 5645 } |
5642 } else { | 5646 } else { |
5643 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; | 5647 uint8_t * const nnz= &h->non_zero_count_cache[ scan8[4*i8x8] ]; |
5644 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; | 5648 nnz[0] = nnz[1] = nnz[8] = nnz[9] = 0; |
5645 } | 5649 } |
5655 } | 5659 } |
5656 | 5660 |
5657 if( cbp&0x20 ) { | 5661 if( cbp&0x20 ) { |
5658 int c, i; | 5662 int c, i; |
5659 for( c = 0; c < 2; c++ ) { | 5663 for( c = 0; c < 2; c++ ) { |
5660 const uint32_t *qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; | 5664 qmul = h->dequant4_coeff[c+1+(IS_INTRA( mb_type ) ? 0:3)][h->chroma_qp[c]]; |
5661 for( i = 0; i < 4; i++ ) { | 5665 for( i = 0; i < 4; i++ ) { |
5662 const int index = 16 + 4 * c + i; | 5666 const int index = 16 + 4 * c + i; |
5663 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); | 5667 //av_log( s->avctx, AV_LOG_ERROR, "INTRA C%d-AC %d\n",c, index - 16 ); |
5664 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15); | 5668 decode_cabac_residual(h, h->mb + 16*index, 4, index - 16, scan + 1, qmul, 15); |
5665 } | 5669 } |