# HG changeset patch # User romansh # Date 1080179200 0 # Node ID 2761950695cc55eeb943c0604a4c9db7d55163e9 # Parent 129236143f2e1f91c9f008504fcd2a40b48483f2 * some significant clean-up of the dv_decode_ac (it looks real simple now, take a look for yourself). * additional optimizations of the decoder. It runs at 55fps now on my desktop and it used to be ~45fps. diff -r 129236143f2e -r 2761950695cc dv.c --- a/dv.c Wed Mar 24 23:32:48 2004 +0000 +++ b/dv.c Thu Mar 25 01:46:40 2004 +0000 @@ -1,6 +1,7 @@ /* * DV decoder * Copyright (c) 2002 Fabrice Bellard. + * Copyright (c) 2004 Roman Shaposhnik. * * DV encoder * Copyright (c) 2003 Roman Shaposhnik. @@ -39,7 +40,7 @@ uint8_t *buf; uint8_t dv_zigzag[2][64]; - uint8_t dv_idct_shift[2][22][64]; + uint8_t dv_idct_shift[2][2][22][64]; void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); void (*fdct[2])(DCTELEM *block); @@ -77,15 +78,17 @@ for(i = 1; i < 64; i++) { /* 88 table */ j = perm[i]; - s->dv_idct_shift[0][q][j] = + s->dv_idct_shift[0][0][q][j] = dv_quant_shifts[q][dv_88_areas[i]] + 1; + s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1; } /* 248DCT */ for(i = 1; i < 64; i++) { /* 248 table */ - s->dv_idct_shift[1][q][i] = + s->dv_idct_shift[0][1][q][i] = dv_quant_shifts[q][dv_248_areas[i]] + 1; + s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1; } } } @@ -98,8 +101,11 @@ int i, j; if (!done) { - int i; VLC dv_vlc; + uint16_t new_dv_vlc_bits[NB_DV_VLC*2]; + uint8_t new_dv_vlc_len[NB_DV_VLC*2]; + uint8_t new_dv_vlc_run[NB_DV_VLC*2]; + int16_t new_dv_vlc_level[NB_DV_VLC*2]; done = 1; @@ -107,6 +113,7 @@ if (!dv_vlc_map) return -ENOMEM; + /* dv_anchor lets each thread know its Id */ dv_anchor = av_malloc(12*27*sizeof(void*)); if (!dv_anchor) { av_free(dv_vlc_map); @@ -114,11 +121,30 @@ } for (i=0; i<12*27; i++) dv_anchor[i] = (void*)(size_t)i; - + + /* it's faster to include sign bit in a generic VLC parsing scheme */ + for (i=0, j=0; ishift_offset; const uint8_t *scan_table = mb->scan_table; const uint8_t *shift_table = mb->shift_table; int pos = mb->pos; - int level, pos1, run; - int partial_bit_count; - int sign = 0; -#ifndef ALT_BITSTREAM_READER //FIXME - int re_index=0; - int re1_index=0; -#endif + int partial_bit_count = mb->partial_bit_count; + int level, pos1, run, vlc_len, index; + OPEN_READER(re, gb); + UPDATE_CACHE(re, gb); -#ifdef VLC_DEBUG - printf("start\n"); -#endif - /* if we must parse a partial vlc, we do it here */ - partial_bit_count = mb->partial_bit_count; if (partial_bit_count > 0) { - uint8_t buf[4]; - uint32_t v; - int l; - GetBitContext gb1; - - /* build the dummy bit buffer */ - l = 16 - partial_bit_count; - UPDATE_CACHE(re, gb); -#ifdef VLC_DEBUG - printf("show=%04x\n", SHOW_UBITS(re, gb, 16)); -#endif - v = (mb->partial_bit_buffer << l) | SHOW_UBITS(re, gb, l); - buf[0] = v >> 8; - buf[1] = v; -#ifdef VLC_DEBUG - printf("v=%04x cnt=%d %04x\n", - v, partial_bit_count, (mb->partial_bit_buffer << l)); -#endif - /* try to read the codeword */ - init_get_bits(&gb1, buf, 4*8); - { - OPEN_READER(re1, &gb1); - UPDATE_CACHE(re1, &gb1); - GET_RL_VLC(level, run, re1, &gb1, dv_rl_vlc, - TEX_VLC_BITS, 2); - l = re1_index; - CLOSE_READER(re1, &gb1); - } -#ifdef VLC_DEBUG - printf("****run=%d level=%d size=%d\n", run, level, l); -#endif - /* compute codeword length -- if too long, we cannot parse */ - l -= partial_bit_count; - if ((re_index + l + (level != 0)) > last_index) { - mb->partial_bit_count += (last_index - re_index); - mb->partial_bit_buffer = v >> (16 - mb->partial_bit_count); - return; - } - - /* skip read bits */ - last_re_index = 0; /* avoid warning */ - re_index += l; - /* by definition, if we can read the vlc, all partial bits - will be read (otherwise we could have read the vlc before) */ - mb->partial_bit_count = 0; - UPDATE_CACHE(re, gb); - goto handle_vlc; + re_cache = ((unsigned)re_cache >> partial_bit_count) | + (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count)); + re_index -= partial_bit_count; + mb->partial_bit_count = 0; } /* get the AC coefficients until last_index is reached */ for(;;) { - UPDATE_CACHE(re, gb); #ifdef VLC_DEBUG - printf("%2d: bits=%04x index=%d\n", - pos, SHOW_UBITS(re, gb, 16), re_index); + printf("%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), re_index); #endif - last_re_index = re_index; - GET_RL_VLC(level, run, re, gb, dv_rl_vlc, - TEX_VLC_BITS, 2); - handle_vlc: -#ifdef VLC_DEBUG - printf("run=%d level=%d\n", run, level); -#endif - if (level) { - sign = SHOW_SBITS(re, gb, 1); - LAST_SKIP_BITS(re, gb, 1); - } - if (re_index > last_index) { + /* our own optimized GET_RL_VLC */ + index = NEG_USR32(re_cache, TEX_VLC_BITS); + vlc_len = dv_rl_vlc[index].len; + if (vlc_len < 0) { + index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level; + vlc_len = TEX_VLC_BITS - vlc_len; + } + level = dv_rl_vlc[index].level; + run = dv_rl_vlc[index].run; + + /* gotta check if we're still within gb boundaries */ + if (re_index + vlc_len > last_index) { /* should be < 16 bits otherwise a codeword could have been parsed */ - re_index = last_re_index; - UPDATE_CACHE(re, gb); mb->partial_bit_count = last_index - re_index; - mb->partial_bit_buffer = SHOW_UBITS(re, gb, mb->partial_bit_count); + mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count); re_index = last_index; break; } - + re_index += vlc_len; + +#ifdef VLC_DEBUG + printf("run=%d level=%d\n", run, level); +#endif pos += run; if (pos >= 64) break; if (level) { - level = (level ^ sign) - sign; pos1 = scan_table[pos]; - level = level << (shift_table[pos1] + shift_offset); - block[pos1] = level; - // printf("run=%d level=%d shift=%d\n", run, level, shift_table[pos1]); + block[pos1] = level << shift_table[pos1]; } + + UPDATE_CACHE(re, gb); } CLOSE_READER(re, gb); mb->pos = pos; @@ -429,8 +402,7 @@ mb->dct_mode = dct_mode; mb->scan_table = s->dv_zigzag[dct_mode]; class1 = get_bits(&gb, 2); - mb->shift_offset = (class1 == 3); - mb->shift_table = s->dv_idct_shift[dct_mode] + mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode] [quant + dv_quant_offset[class1]]; dc = dc << 2; /* convert to unsigned because 128 is not added in the