comparison dv.c @ 1905:2761950695cc libavcodec

* some significant clean-up of the dv_decode_ac (it looks real simple now, take a look for yourself). * additional optimizations of the decoder. It runs at 55fps now on my desktop and it used to be ~45fps.
author romansh
date Thu, 25 Mar 2004 01:46:40 +0000
parents 129236143f2e
children 8d3540dddd1b
comparison
equal deleted inserted replaced
1904:129236143f2e 1905:2761950695cc
1 /* 1 /*
2 * DV decoder 2 * DV decoder
3 * Copyright (c) 2002 Fabrice Bellard. 3 * Copyright (c) 2002 Fabrice Bellard.
4 * Copyright (c) 2004 Roman Shaposhnik.
4 * 5 *
5 * DV encoder 6 * DV encoder
6 * Copyright (c) 2003 Roman Shaposhnik. 7 * Copyright (c) 2003 Roman Shaposhnik.
7 * 8 *
8 * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth 9 * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth
37 const DVprofile* sys; 38 const DVprofile* sys;
38 AVFrame picture; 39 AVFrame picture;
39 uint8_t *buf; 40 uint8_t *buf;
40 41
41 uint8_t dv_zigzag[2][64]; 42 uint8_t dv_zigzag[2][64];
42 uint8_t dv_idct_shift[2][22][64]; 43 uint8_t dv_idct_shift[2][2][22][64];
43 44
44 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); 45 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
45 void (*fdct[2])(DCTELEM *block); 46 void (*fdct[2])(DCTELEM *block);
46 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); 47 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
47 } DVVideoContext; 48 } DVVideoContext;
75 for(q = 0; q < 22; q++) { 76 for(q = 0; q < 22; q++) {
76 /* 88DCT */ 77 /* 88DCT */
77 for(i = 1; i < 64; i++) { 78 for(i = 1; i < 64; i++) {
78 /* 88 table */ 79 /* 88 table */
79 j = perm[i]; 80 j = perm[i];
80 s->dv_idct_shift[0][q][j] = 81 s->dv_idct_shift[0][0][q][j] =
81 dv_quant_shifts[q][dv_88_areas[i]] + 1; 82 dv_quant_shifts[q][dv_88_areas[i]] + 1;
83 s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1;
82 } 84 }
83 85
84 /* 248DCT */ 86 /* 248DCT */
85 for(i = 1; i < 64; i++) { 87 for(i = 1; i < 64; i++) {
86 /* 248 table */ 88 /* 248 table */
87 s->dv_idct_shift[1][q][i] = 89 s->dv_idct_shift[0][1][q][i] =
88 dv_quant_shifts[q][dv_248_areas[i]] + 1; 90 dv_quant_shifts[q][dv_248_areas[i]] + 1;
91 s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1;
89 } 92 }
90 } 93 }
91 } 94 }
92 95
93 static int dvvideo_init(AVCodecContext *avctx) 96 static int dvvideo_init(AVCodecContext *avctx)
96 DSPContext dsp; 99 DSPContext dsp;
97 static int done=0; 100 static int done=0;
98 int i, j; 101 int i, j;
99 102
100 if (!done) { 103 if (!done) {
101 int i;
102 VLC dv_vlc; 104 VLC dv_vlc;
105 uint16_t new_dv_vlc_bits[NB_DV_VLC*2];
106 uint8_t new_dv_vlc_len[NB_DV_VLC*2];
107 uint8_t new_dv_vlc_run[NB_DV_VLC*2];
108 int16_t new_dv_vlc_level[NB_DV_VLC*2];
103 109
104 done = 1; 110 done = 1;
105 111
106 dv_vlc_map = av_mallocz(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair)); 112 dv_vlc_map = av_mallocz(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair));
107 if (!dv_vlc_map) 113 if (!dv_vlc_map)
108 return -ENOMEM; 114 return -ENOMEM;
109 115
116 /* dv_anchor lets each thread know its Id */
110 dv_anchor = av_malloc(12*27*sizeof(void*)); 117 dv_anchor = av_malloc(12*27*sizeof(void*));
111 if (!dv_anchor) { 118 if (!dv_anchor) {
112 av_free(dv_vlc_map); 119 av_free(dv_vlc_map);
113 return -ENOMEM; 120 return -ENOMEM;
114 } 121 }
115 for (i=0; i<12*27; i++) 122 for (i=0; i<12*27; i++)
116 dv_anchor[i] = (void*)(size_t)i; 123 dv_anchor[i] = (void*)(size_t)i;
117 124
125 /* it's faster to include sign bit in a generic VLC parsing scheme */
126 for (i=0, j=0; i<NB_DV_VLC; i++, j++) {
127 new_dv_vlc_bits[j] = dv_vlc_bits[i];
128 new_dv_vlc_len[j] = dv_vlc_len[i];
129 new_dv_vlc_run[j] = dv_vlc_run[i];
130 new_dv_vlc_level[j] = dv_vlc_level[i];
131
132 if (dv_vlc_level[i]) {
133 new_dv_vlc_bits[j] <<= 1;
134 new_dv_vlc_len[j]++;
135
136 j++;
137 new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1;
138 new_dv_vlc_len[j] = dv_vlc_len[i] + 1;
139 new_dv_vlc_run[j] = dv_vlc_run[i];
140 new_dv_vlc_level[j] = -dv_vlc_level[i];
141 }
142 }
143
118 /* NOTE: as a trick, we use the fact the no codes are unused 144 /* NOTE: as a trick, we use the fact the no codes are unused
119 to accelerate the parsing of partial codes */ 145 to accelerate the parsing of partial codes */
120 init_vlc(&dv_vlc, TEX_VLC_BITS, NB_DV_VLC, 146 init_vlc(&dv_vlc, TEX_VLC_BITS, j,
121 dv_vlc_len, 1, 1, dv_vlc_bits, 2, 2); 147 new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2);
122 148
123 dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM)); 149 dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM));
124 if (!dv_rl_vlc) { 150 if (!dv_rl_vlc) {
125 av_free(dv_anchor); 151 av_free(dv_anchor);
126 av_free(dv_vlc_map); 152 av_free(dv_vlc_map);
133 159
134 if(len<0){ //more bits needed 160 if(len<0){ //more bits needed
135 run= 0; 161 run= 0;
136 level= code; 162 level= code;
137 } else { 163 } else {
138 run= dv_vlc_run[code] + 1; 164 run= new_dv_vlc_run[code] + 1;
139 level= dv_vlc_level[code]; 165 level= new_dv_vlc_level[code];
140 } 166 }
141 dv_rl_vlc[i].len = len; 167 dv_rl_vlc[i].len = len;
142 dv_rl_vlc[i].level = level; 168 dv_rl_vlc[i].level = level;
143 dv_rl_vlc[i].run = run; 169 dv_rl_vlc[i].run = run;
144 } 170 }
214 avcodec_default_free_buffers(avctx); 240 avcodec_default_free_buffers(avctx);
215 return 0; 241 return 0;
216 } 242 }
217 243
218 // #define VLC_DEBUG 244 // #define VLC_DEBUG
245 // #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__)
219 246
220 typedef struct BlockInfo { 247 typedef struct BlockInfo {
221 const uint8_t *shift_table; 248 const uint8_t *shift_table;
222 const uint8_t *scan_table; 249 const uint8_t *scan_table;
223 uint8_t pos; /* position in block */ 250 uint8_t pos; /* position in block */
257 284
258 /* decode ac coefs */ 285 /* decode ac coefs */
259 static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) 286 static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block)
260 { 287 {
261 int last_index = get_bits_size(gb); 288 int last_index = get_bits_size(gb);
262 int last_re_index;
263 int shift_offset = mb->shift_offset;
264 const uint8_t *scan_table = mb->scan_table; 289 const uint8_t *scan_table = mb->scan_table;
265 const uint8_t *shift_table = mb->shift_table; 290 const uint8_t *shift_table = mb->shift_table;
266 int pos = mb->pos; 291 int pos = mb->pos;
267 int level, pos1, run; 292 int partial_bit_count = mb->partial_bit_count;
268 int partial_bit_count; 293 int level, pos1, run, vlc_len, index;
269 int sign = 0; 294
270 #ifndef ALT_BITSTREAM_READER //FIXME
271 int re_index=0;
272 int re1_index=0;
273 #endif
274 OPEN_READER(re, gb); 295 OPEN_READER(re, gb);
275 296 UPDATE_CACHE(re, gb);
276 #ifdef VLC_DEBUG 297
277 printf("start\n");
278 #endif
279
280 /* if we must parse a partial vlc, we do it here */ 298 /* if we must parse a partial vlc, we do it here */
281 partial_bit_count = mb->partial_bit_count;
282 if (partial_bit_count > 0) { 299 if (partial_bit_count > 0) {
283 uint8_t buf[4]; 300 re_cache = ((unsigned)re_cache >> partial_bit_count) |
284 uint32_t v; 301 (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count));
285 int l; 302 re_index -= partial_bit_count;
286 GetBitContext gb1; 303 mb->partial_bit_count = 0;
287
288 /* build the dummy bit buffer */
289 l = 16 - partial_bit_count;
290 UPDATE_CACHE(re, gb);
291 #ifdef VLC_DEBUG
292 printf("show=%04x\n", SHOW_UBITS(re, gb, 16));
293 #endif
294 v = (mb->partial_bit_buffer << l) | SHOW_UBITS(re, gb, l);
295 buf[0] = v >> 8;
296 buf[1] = v;
297 #ifdef VLC_DEBUG
298 printf("v=%04x cnt=%d %04x\n",
299 v, partial_bit_count, (mb->partial_bit_buffer << l));
300 #endif
301 /* try to read the codeword */
302 init_get_bits(&gb1, buf, 4*8);
303 {
304 OPEN_READER(re1, &gb1);
305 UPDATE_CACHE(re1, &gb1);
306 GET_RL_VLC(level, run, re1, &gb1, dv_rl_vlc,
307 TEX_VLC_BITS, 2);
308 l = re1_index;
309 CLOSE_READER(re1, &gb1);
310 }
311 #ifdef VLC_DEBUG
312 printf("****run=%d level=%d size=%d\n", run, level, l);
313 #endif
314 /* compute codeword length -- if too long, we cannot parse */
315 l -= partial_bit_count;
316 if ((re_index + l + (level != 0)) > last_index) {
317 mb->partial_bit_count += (last_index - re_index);
318 mb->partial_bit_buffer = v >> (16 - mb->partial_bit_count);
319 return;
320 }
321
322 /* skip read bits */
323 last_re_index = 0; /* avoid warning */
324 re_index += l;
325 /* by definition, if we can read the vlc, all partial bits
326 will be read (otherwise we could have read the vlc before) */
327 mb->partial_bit_count = 0;
328 UPDATE_CACHE(re, gb);
329 goto handle_vlc;
330 } 304 }
331 305
332 /* get the AC coefficients until last_index is reached */ 306 /* get the AC coefficients until last_index is reached */
333 for(;;) { 307 for(;;) {
334 UPDATE_CACHE(re, gb);
335 #ifdef VLC_DEBUG 308 #ifdef VLC_DEBUG
336 printf("%2d: bits=%04x index=%d\n", 309 printf("%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), re_index);
337 pos, SHOW_UBITS(re, gb, 16), re_index);
338 #endif 310 #endif
339 last_re_index = re_index; 311 /* our own optimized GET_RL_VLC */
340 GET_RL_VLC(level, run, re, gb, dv_rl_vlc, 312 index = NEG_USR32(re_cache, TEX_VLC_BITS);
341 TEX_VLC_BITS, 2); 313 vlc_len = dv_rl_vlc[index].len;
342 handle_vlc: 314 if (vlc_len < 0) {
343 #ifdef VLC_DEBUG 315 index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level;
344 printf("run=%d level=%d\n", run, level); 316 vlc_len = TEX_VLC_BITS - vlc_len;
345 #endif 317 }
346 if (level) { 318 level = dv_rl_vlc[index].level;
347 sign = SHOW_SBITS(re, gb, 1); 319 run = dv_rl_vlc[index].run;
348 LAST_SKIP_BITS(re, gb, 1); 320
349 } 321 /* gotta check if we're still within gb boundaries */
350 if (re_index > last_index) { 322 if (re_index + vlc_len > last_index) {
351 /* should be < 16 bits otherwise a codeword could have been parsed */ 323 /* should be < 16 bits otherwise a codeword could have been parsed */
352 re_index = last_re_index;
353 UPDATE_CACHE(re, gb);
354 mb->partial_bit_count = last_index - re_index; 324 mb->partial_bit_count = last_index - re_index;
355 mb->partial_bit_buffer = SHOW_UBITS(re, gb, mb->partial_bit_count); 325 mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count);
356 re_index = last_index; 326 re_index = last_index;
357 break; 327 break;
358 } 328 }
359 329 re_index += vlc_len;
330
331 #ifdef VLC_DEBUG
332 printf("run=%d level=%d\n", run, level);
333 #endif
360 pos += run; 334 pos += run;
361 if (pos >= 64) 335 if (pos >= 64)
362 break; 336 break;
363 337
364 if (level) { 338 if (level) {
365 level = (level ^ sign) - sign;
366 pos1 = scan_table[pos]; 339 pos1 = scan_table[pos];
367 level = level << (shift_table[pos1] + shift_offset); 340 block[pos1] = level << shift_table[pos1];
368 block[pos1] = level;
369 // printf("run=%d level=%d shift=%d\n", run, level, shift_table[pos1]);
370 } 341 }
342
343 UPDATE_CACHE(re, gb);
371 } 344 }
372 CLOSE_READER(re, gb); 345 CLOSE_READER(re, gb);
373 mb->pos = pos; 346 mb->pos = pos;
374 } 347 }
375 348
427 dc = (dc << (32 - 9)) >> (32 - 9); 400 dc = (dc << (32 - 9)) >> (32 - 9);
428 dct_mode = get_bits1(&gb); 401 dct_mode = get_bits1(&gb);
429 mb->dct_mode = dct_mode; 402 mb->dct_mode = dct_mode;
430 mb->scan_table = s->dv_zigzag[dct_mode]; 403 mb->scan_table = s->dv_zigzag[dct_mode];
431 class1 = get_bits(&gb, 2); 404 class1 = get_bits(&gb, 2);
432 mb->shift_offset = (class1 == 3); 405 mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode]
433 mb->shift_table = s->dv_idct_shift[dct_mode]
434 [quant + dv_quant_offset[class1]]; 406 [quant + dv_quant_offset[class1]];
435 dc = dc << 2; 407 dc = dc << 2;
436 /* convert to unsigned because 128 is not added in the 408 /* convert to unsigned because 128 is not added in the
437 standard IDCT */ 409 standard IDCT */
438 dc += 1024; 410 dc += 1024;