Mercurial > libavcodec.hg
comparison vc1dec.c @ 9860:7e82083caab7 libavcodec
VC-1/WMV3 decoding: don't clear blocks unless they're actually used.
~8% faster VC-1 decoding.
Possible future optimization: clear blocks after use instead of before, and for
DC-only blocks, only clear the DC coefficient.
author | darkshikari |
---|---|
date | Tue, 16 Jun 2009 09:09:03 +0000 |
parents | 7a116de63777 |
children | ff0ea87dd64e |
comparison
equal
deleted
inserted
replaced
9859:7a116de63777 | 9860:7e82083caab7 |
---|---|
1798 int a_avail = v->a_avail, c_avail = v->c_avail; | 1798 int a_avail = v->a_avail, c_avail = v->c_avail; |
1799 int use_pred = s->ac_pred; | 1799 int use_pred = s->ac_pred; |
1800 int scale; | 1800 int scale; |
1801 int q1, q2 = 0; | 1801 int q1, q2 = 0; |
1802 | 1802 |
1803 s->dsp.clear_block(block); | |
1804 | |
1803 /* XXX: Guard against dumb values of mquant */ | 1805 /* XXX: Guard against dumb values of mquant */ |
1804 mquant = (mquant < 1) ? 0 : ( (mquant>31) ? 31 : mquant ); | 1806 mquant = (mquant < 1) ? 0 : ( (mquant>31) ? 31 : mquant ); |
1805 | 1807 |
1806 /* Set DC scale - y and c use the same */ | 1808 /* Set DC scale - y and c use the same */ |
1807 s->y_dc_scale = s->y_dc_scale_table[mquant]; | 1809 s->y_dc_scale = s->y_dc_scale_table[mquant]; |
1986 int i, j; | 1988 int i, j; |
1987 int subblkpat = 0; | 1989 int subblkpat = 0; |
1988 int scale, off, idx, last, skip, value; | 1990 int scale, off, idx, last, skip, value; |
1989 int ttblk = ttmb & 7; | 1991 int ttblk = ttmb & 7; |
1990 int pat = 0; | 1992 int pat = 0; |
1993 | |
1994 s->dsp.clear_block(block); | |
1991 | 1995 |
1992 if(ttmb == -1) { | 1996 if(ttmb == -1) { |
1993 ttblk = ff_vc1_ttblk_to_tt[v->tt_index][get_vlc2(gb, ff_vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)]; | 1997 ttblk = ff_vc1_ttblk_to_tt[v->tt_index][get_vlc2(gb, ff_vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)]; |
1994 } | 1998 } |
1995 if(ttblk == TT_4X4) { | 1999 if(ttblk == TT_4X4) { |
2163 fourmv = v->mv_type_mb_plane[mb_pos]; | 2167 fourmv = v->mv_type_mb_plane[mb_pos]; |
2164 if (v->skip_is_raw) | 2168 if (v->skip_is_raw) |
2165 skipped = get_bits1(gb); | 2169 skipped = get_bits1(gb); |
2166 else | 2170 else |
2167 skipped = v->s.mbskip_table[mb_pos]; | 2171 skipped = v->s.mbskip_table[mb_pos]; |
2168 | |
2169 s->dsp.clear_blocks(s->block[0]); | |
2170 | 2172 |
2171 apply_loop_filter = s->loop_filter && !(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY); | 2173 apply_loop_filter = s->loop_filter && !(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY); |
2172 if (!fourmv) /* 1MV mode */ | 2174 if (!fourmv) /* 1MV mode */ |
2173 { | 2175 { |
2174 if (!skipped) | 2176 if (!skipped) |
2457 if (v->skip_is_raw) | 2459 if (v->skip_is_raw) |
2458 skipped = get_bits1(gb); | 2460 skipped = get_bits1(gb); |
2459 else | 2461 else |
2460 skipped = v->s.mbskip_table[mb_pos]; | 2462 skipped = v->s.mbskip_table[mb_pos]; |
2461 | 2463 |
2462 s->dsp.clear_blocks(s->block[0]); | |
2463 dmv_x[0] = dmv_x[1] = dmv_y[0] = dmv_y[1] = 0; | 2464 dmv_x[0] = dmv_x[1] = dmv_y[0] = dmv_y[1] = 0; |
2464 for(i = 0; i < 6; i++) { | 2465 for(i = 0; i < 6; i++) { |
2465 v->mb_type[0][s->block_index[i]] = 0; | 2466 v->mb_type[0][s->block_index[i]] = 0; |
2466 s->dc_val[0][s->block_index[i]] = 0; | 2467 s->dc_val[0][s->block_index[i]] = 0; |
2467 } | 2468 } |
2849 memset(v->cbp_base, 0, sizeof(v->cbp_base[0])*2*s->mb_stride); | 2850 memset(v->cbp_base, 0, sizeof(v->cbp_base[0])*2*s->mb_stride); |
2850 for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) { | 2851 for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) { |
2851 for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) { | 2852 for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) { |
2852 ff_init_block_index(s); | 2853 ff_init_block_index(s); |
2853 ff_update_block_index(s); | 2854 ff_update_block_index(s); |
2854 s->dsp.clear_blocks(s->block[0]); | |
2855 | 2855 |
2856 vc1_decode_p_mb(v); | 2856 vc1_decode_p_mb(v); |
2857 if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) { | 2857 if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) { |
2858 ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)); | 2858 ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)); |
2859 av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y); | 2859 av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y); |
2899 s->first_slice_line = 1; | 2899 s->first_slice_line = 1; |
2900 for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) { | 2900 for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) { |
2901 for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) { | 2901 for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) { |
2902 ff_init_block_index(s); | 2902 ff_init_block_index(s); |
2903 ff_update_block_index(s); | 2903 ff_update_block_index(s); |
2904 s->dsp.clear_blocks(s->block[0]); | |
2905 | 2904 |
2906 vc1_decode_b_mb(v); | 2905 vc1_decode_b_mb(v); |
2907 if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) { | 2906 if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) { |
2908 ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)); | 2907 ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)); |
2909 av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y); | 2908 av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y); |