comparison vc1dec.c @ 9860:7e82083caab7 libavcodec

VC-1/WMV3 decoding: don't clear blocks unless they're actually used. ~8% faster VC-1 decoding. Possible future optimization: clear blocks after use instead of before, and for DC-only blocks, only clear the DC coefficient.
author darkshikari
date Tue, 16 Jun 2009 09:09:03 +0000
parents 7a116de63777
children ff0ea87dd64e
comparison
equal deleted inserted replaced
9859:7a116de63777 9860:7e82083caab7
1798 int a_avail = v->a_avail, c_avail = v->c_avail; 1798 int a_avail = v->a_avail, c_avail = v->c_avail;
1799 int use_pred = s->ac_pred; 1799 int use_pred = s->ac_pred;
1800 int scale; 1800 int scale;
1801 int q1, q2 = 0; 1801 int q1, q2 = 0;
1802 1802
1803 s->dsp.clear_block(block);
1804
1803 /* XXX: Guard against dumb values of mquant */ 1805 /* XXX: Guard against dumb values of mquant */
1804 mquant = (mquant < 1) ? 0 : ( (mquant>31) ? 31 : mquant ); 1806 mquant = (mquant < 1) ? 0 : ( (mquant>31) ? 31 : mquant );
1805 1807
1806 /* Set DC scale - y and c use the same */ 1808 /* Set DC scale - y and c use the same */
1807 s->y_dc_scale = s->y_dc_scale_table[mquant]; 1809 s->y_dc_scale = s->y_dc_scale_table[mquant];
1986 int i, j; 1988 int i, j;
1987 int subblkpat = 0; 1989 int subblkpat = 0;
1988 int scale, off, idx, last, skip, value; 1990 int scale, off, idx, last, skip, value;
1989 int ttblk = ttmb & 7; 1991 int ttblk = ttmb & 7;
1990 int pat = 0; 1992 int pat = 0;
1993
1994 s->dsp.clear_block(block);
1991 1995
1992 if(ttmb == -1) { 1996 if(ttmb == -1) {
1993 ttblk = ff_vc1_ttblk_to_tt[v->tt_index][get_vlc2(gb, ff_vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)]; 1997 ttblk = ff_vc1_ttblk_to_tt[v->tt_index][get_vlc2(gb, ff_vc1_ttblk_vlc[v->tt_index].table, VC1_TTBLK_VLC_BITS, 1)];
1994 } 1998 }
1995 if(ttblk == TT_4X4) { 1999 if(ttblk == TT_4X4) {
2163 fourmv = v->mv_type_mb_plane[mb_pos]; 2167 fourmv = v->mv_type_mb_plane[mb_pos];
2164 if (v->skip_is_raw) 2168 if (v->skip_is_raw)
2165 skipped = get_bits1(gb); 2169 skipped = get_bits1(gb);
2166 else 2170 else
2167 skipped = v->s.mbskip_table[mb_pos]; 2171 skipped = v->s.mbskip_table[mb_pos];
2168
2169 s->dsp.clear_blocks(s->block[0]);
2170 2172
2171 apply_loop_filter = s->loop_filter && !(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY); 2173 apply_loop_filter = s->loop_filter && !(s->avctx->skip_loop_filter >= AVDISCARD_NONKEY);
2172 if (!fourmv) /* 1MV mode */ 2174 if (!fourmv) /* 1MV mode */
2173 { 2175 {
2174 if (!skipped) 2176 if (!skipped)
2457 if (v->skip_is_raw) 2459 if (v->skip_is_raw)
2458 skipped = get_bits1(gb); 2460 skipped = get_bits1(gb);
2459 else 2461 else
2460 skipped = v->s.mbskip_table[mb_pos]; 2462 skipped = v->s.mbskip_table[mb_pos];
2461 2463
2462 s->dsp.clear_blocks(s->block[0]);
2463 dmv_x[0] = dmv_x[1] = dmv_y[0] = dmv_y[1] = 0; 2464 dmv_x[0] = dmv_x[1] = dmv_y[0] = dmv_y[1] = 0;
2464 for(i = 0; i < 6; i++) { 2465 for(i = 0; i < 6; i++) {
2465 v->mb_type[0][s->block_index[i]] = 0; 2466 v->mb_type[0][s->block_index[i]] = 0;
2466 s->dc_val[0][s->block_index[i]] = 0; 2467 s->dc_val[0][s->block_index[i]] = 0;
2467 } 2468 }
2849 memset(v->cbp_base, 0, sizeof(v->cbp_base[0])*2*s->mb_stride); 2850 memset(v->cbp_base, 0, sizeof(v->cbp_base[0])*2*s->mb_stride);
2850 for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) { 2851 for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
2851 for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) { 2852 for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
2852 ff_init_block_index(s); 2853 ff_init_block_index(s);
2853 ff_update_block_index(s); 2854 ff_update_block_index(s);
2854 s->dsp.clear_blocks(s->block[0]);
2855 2855
2856 vc1_decode_p_mb(v); 2856 vc1_decode_p_mb(v);
2857 if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) { 2857 if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
2858 ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)); 2858 ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
2859 av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y); 2859 av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y);
2899 s->first_slice_line = 1; 2899 s->first_slice_line = 1;
2900 for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) { 2900 for(s->mb_y = 0; s->mb_y < s->mb_height; s->mb_y++) {
2901 for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) { 2901 for(s->mb_x = 0; s->mb_x < s->mb_width; s->mb_x++) {
2902 ff_init_block_index(s); 2902 ff_init_block_index(s);
2903 ff_update_block_index(s); 2903 ff_update_block_index(s);
2904 s->dsp.clear_blocks(s->block[0]);
2905 2904
2906 vc1_decode_b_mb(v); 2905 vc1_decode_b_mb(v);
2907 if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) { 2906 if(get_bits_count(&s->gb) > v->bits || get_bits_count(&s->gb) < 0) {
2908 ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END)); 2907 ff_er_add_slice(s, 0, 0, s->mb_x, s->mb_y, (AC_END|DC_END|MV_END));
2909 av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y); 2908 av_log(s->avctx, AV_LOG_ERROR, "Bits overconsumption: %i > %i at %ix%i\n", get_bits_count(&s->gb), v->bits,s->mb_x,s->mb_y);