Mercurial > libavcodec.hg
comparison dv.c @ 1905:2761950695cc libavcodec
* some significant clean-up of the dv_decode_ac (it looks real
simple now, take a look for yourself).
* additional optimizations of the decoder. It runs at 55fps now
on my desktop and it used to be ~45fps.
author | romansh |
---|---|
date | Thu, 25 Mar 2004 01:46:40 +0000 |
parents | 129236143f2e |
children | 8d3540dddd1b |
comparison
equal
deleted
inserted
replaced
1904:129236143f2e | 1905:2761950695cc |
---|---|
1 /* | 1 /* |
2 * DV decoder | 2 * DV decoder |
3 * Copyright (c) 2002 Fabrice Bellard. | 3 * Copyright (c) 2002 Fabrice Bellard. |
4 * Copyright (c) 2004 Roman Shaposhnik. | |
4 * | 5 * |
5 * DV encoder | 6 * DV encoder |
6 * Copyright (c) 2003 Roman Shaposhnik. | 7 * Copyright (c) 2003 Roman Shaposhnik. |
7 * | 8 * |
8 * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth | 9 * Many thanks to Dan Dennedy <dan@dennedy.org> for providing wealth |
37 const DVprofile* sys; | 38 const DVprofile* sys; |
38 AVFrame picture; | 39 AVFrame picture; |
39 uint8_t *buf; | 40 uint8_t *buf; |
40 | 41 |
41 uint8_t dv_zigzag[2][64]; | 42 uint8_t dv_zigzag[2][64]; |
42 uint8_t dv_idct_shift[2][22][64]; | 43 uint8_t dv_idct_shift[2][2][22][64]; |
43 | 44 |
44 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); | 45 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); |
45 void (*fdct[2])(DCTELEM *block); | 46 void (*fdct[2])(DCTELEM *block); |
46 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); | 47 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); |
47 } DVVideoContext; | 48 } DVVideoContext; |
75 for(q = 0; q < 22; q++) { | 76 for(q = 0; q < 22; q++) { |
76 /* 88DCT */ | 77 /* 88DCT */ |
77 for(i = 1; i < 64; i++) { | 78 for(i = 1; i < 64; i++) { |
78 /* 88 table */ | 79 /* 88 table */ |
79 j = perm[i]; | 80 j = perm[i]; |
80 s->dv_idct_shift[0][q][j] = | 81 s->dv_idct_shift[0][0][q][j] = |
81 dv_quant_shifts[q][dv_88_areas[i]] + 1; | 82 dv_quant_shifts[q][dv_88_areas[i]] + 1; |
83 s->dv_idct_shift[1][0][q][j] = s->dv_idct_shift[0][0][q][j] + 1; | |
82 } | 84 } |
83 | 85 |
84 /* 248DCT */ | 86 /* 248DCT */ |
85 for(i = 1; i < 64; i++) { | 87 for(i = 1; i < 64; i++) { |
86 /* 248 table */ | 88 /* 248 table */ |
87 s->dv_idct_shift[1][q][i] = | 89 s->dv_idct_shift[0][1][q][i] = |
88 dv_quant_shifts[q][dv_248_areas[i]] + 1; | 90 dv_quant_shifts[q][dv_248_areas[i]] + 1; |
91 s->dv_idct_shift[1][1][q][i] = s->dv_idct_shift[0][1][q][i] + 1; | |
89 } | 92 } |
90 } | 93 } |
91 } | 94 } |
92 | 95 |
93 static int dvvideo_init(AVCodecContext *avctx) | 96 static int dvvideo_init(AVCodecContext *avctx) |
96 DSPContext dsp; | 99 DSPContext dsp; |
97 static int done=0; | 100 static int done=0; |
98 int i, j; | 101 int i, j; |
99 | 102 |
100 if (!done) { | 103 if (!done) { |
101 int i; | |
102 VLC dv_vlc; | 104 VLC dv_vlc; |
105 uint16_t new_dv_vlc_bits[NB_DV_VLC*2]; | |
106 uint8_t new_dv_vlc_len[NB_DV_VLC*2]; | |
107 uint8_t new_dv_vlc_run[NB_DV_VLC*2]; | |
108 int16_t new_dv_vlc_level[NB_DV_VLC*2]; | |
103 | 109 |
104 done = 1; | 110 done = 1; |
105 | 111 |
106 dv_vlc_map = av_mallocz(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair)); | 112 dv_vlc_map = av_mallocz(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair)); |
107 if (!dv_vlc_map) | 113 if (!dv_vlc_map) |
108 return -ENOMEM; | 114 return -ENOMEM; |
109 | 115 |
116 /* dv_anchor lets each thread know its Id */ | |
110 dv_anchor = av_malloc(12*27*sizeof(void*)); | 117 dv_anchor = av_malloc(12*27*sizeof(void*)); |
111 if (!dv_anchor) { | 118 if (!dv_anchor) { |
112 av_free(dv_vlc_map); | 119 av_free(dv_vlc_map); |
113 return -ENOMEM; | 120 return -ENOMEM; |
114 } | 121 } |
115 for (i=0; i<12*27; i++) | 122 for (i=0; i<12*27; i++) |
116 dv_anchor[i] = (void*)(size_t)i; | 123 dv_anchor[i] = (void*)(size_t)i; |
117 | 124 |
125 /* it's faster to include sign bit in a generic VLC parsing scheme */ | |
126 for (i=0, j=0; i<NB_DV_VLC; i++, j++) { | |
127 new_dv_vlc_bits[j] = dv_vlc_bits[i]; | |
128 new_dv_vlc_len[j] = dv_vlc_len[i]; | |
129 new_dv_vlc_run[j] = dv_vlc_run[i]; | |
130 new_dv_vlc_level[j] = dv_vlc_level[i]; | |
131 | |
132 if (dv_vlc_level[i]) { | |
133 new_dv_vlc_bits[j] <<= 1; | |
134 new_dv_vlc_len[j]++; | |
135 | |
136 j++; | |
137 new_dv_vlc_bits[j] = (dv_vlc_bits[i] << 1) | 1; | |
138 new_dv_vlc_len[j] = dv_vlc_len[i] + 1; | |
139 new_dv_vlc_run[j] = dv_vlc_run[i]; | |
140 new_dv_vlc_level[j] = -dv_vlc_level[i]; | |
141 } | |
142 } | |
143 | |
118 /* NOTE: as a trick, we use the fact the no codes are unused | 144 /* NOTE: as a trick, we use the fact the no codes are unused |
119 to accelerate the parsing of partial codes */ | 145 to accelerate the parsing of partial codes */ |
120 init_vlc(&dv_vlc, TEX_VLC_BITS, NB_DV_VLC, | 146 init_vlc(&dv_vlc, TEX_VLC_BITS, j, |
121 dv_vlc_len, 1, 1, dv_vlc_bits, 2, 2); | 147 new_dv_vlc_len, 1, 1, new_dv_vlc_bits, 2, 2); |
122 | 148 |
123 dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM)); | 149 dv_rl_vlc = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM)); |
124 if (!dv_rl_vlc) { | 150 if (!dv_rl_vlc) { |
125 av_free(dv_anchor); | 151 av_free(dv_anchor); |
126 av_free(dv_vlc_map); | 152 av_free(dv_vlc_map); |
133 | 159 |
134 if(len<0){ //more bits needed | 160 if(len<0){ //more bits needed |
135 run= 0; | 161 run= 0; |
136 level= code; | 162 level= code; |
137 } else { | 163 } else { |
138 run= dv_vlc_run[code] + 1; | 164 run= new_dv_vlc_run[code] + 1; |
139 level= dv_vlc_level[code]; | 165 level= new_dv_vlc_level[code]; |
140 } | 166 } |
141 dv_rl_vlc[i].len = len; | 167 dv_rl_vlc[i].len = len; |
142 dv_rl_vlc[i].level = level; | 168 dv_rl_vlc[i].level = level; |
143 dv_rl_vlc[i].run = run; | 169 dv_rl_vlc[i].run = run; |
144 } | 170 } |
214 avcodec_default_free_buffers(avctx); | 240 avcodec_default_free_buffers(avctx); |
215 return 0; | 241 return 0; |
216 } | 242 } |
217 | 243 |
218 // #define VLC_DEBUG | 244 // #define VLC_DEBUG |
245 // #define printf(...) av_log(NULL, AV_LOG_ERROR, __VA_ARGS__) | |
219 | 246 |
220 typedef struct BlockInfo { | 247 typedef struct BlockInfo { |
221 const uint8_t *shift_table; | 248 const uint8_t *shift_table; |
222 const uint8_t *scan_table; | 249 const uint8_t *scan_table; |
223 uint8_t pos; /* position in block */ | 250 uint8_t pos; /* position in block */ |
257 | 284 |
258 /* decode ac coefs */ | 285 /* decode ac coefs */ |
259 static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) | 286 static void dv_decode_ac(GetBitContext *gb, BlockInfo *mb, DCTELEM *block) |
260 { | 287 { |
261 int last_index = get_bits_size(gb); | 288 int last_index = get_bits_size(gb); |
262 int last_re_index; | |
263 int shift_offset = mb->shift_offset; | |
264 const uint8_t *scan_table = mb->scan_table; | 289 const uint8_t *scan_table = mb->scan_table; |
265 const uint8_t *shift_table = mb->shift_table; | 290 const uint8_t *shift_table = mb->shift_table; |
266 int pos = mb->pos; | 291 int pos = mb->pos; |
267 int level, pos1, run; | 292 int partial_bit_count = mb->partial_bit_count; |
268 int partial_bit_count; | 293 int level, pos1, run, vlc_len, index; |
269 int sign = 0; | 294 |
270 #ifndef ALT_BITSTREAM_READER //FIXME | |
271 int re_index=0; | |
272 int re1_index=0; | |
273 #endif | |
274 OPEN_READER(re, gb); | 295 OPEN_READER(re, gb); |
275 | 296 UPDATE_CACHE(re, gb); |
276 #ifdef VLC_DEBUG | 297 |
277 printf("start\n"); | |
278 #endif | |
279 | |
280 /* if we must parse a partial vlc, we do it here */ | 298 /* if we must parse a partial vlc, we do it here */ |
281 partial_bit_count = mb->partial_bit_count; | |
282 if (partial_bit_count > 0) { | 299 if (partial_bit_count > 0) { |
283 uint8_t buf[4]; | 300 re_cache = ((unsigned)re_cache >> partial_bit_count) | |
284 uint32_t v; | 301 (mb->partial_bit_buffer << (sizeof(re_cache)*8 - partial_bit_count)); |
285 int l; | 302 re_index -= partial_bit_count; |
286 GetBitContext gb1; | 303 mb->partial_bit_count = 0; |
287 | |
288 /* build the dummy bit buffer */ | |
289 l = 16 - partial_bit_count; | |
290 UPDATE_CACHE(re, gb); | |
291 #ifdef VLC_DEBUG | |
292 printf("show=%04x\n", SHOW_UBITS(re, gb, 16)); | |
293 #endif | |
294 v = (mb->partial_bit_buffer << l) | SHOW_UBITS(re, gb, l); | |
295 buf[0] = v >> 8; | |
296 buf[1] = v; | |
297 #ifdef VLC_DEBUG | |
298 printf("v=%04x cnt=%d %04x\n", | |
299 v, partial_bit_count, (mb->partial_bit_buffer << l)); | |
300 #endif | |
301 /* try to read the codeword */ | |
302 init_get_bits(&gb1, buf, 4*8); | |
303 { | |
304 OPEN_READER(re1, &gb1); | |
305 UPDATE_CACHE(re1, &gb1); | |
306 GET_RL_VLC(level, run, re1, &gb1, dv_rl_vlc, | |
307 TEX_VLC_BITS, 2); | |
308 l = re1_index; | |
309 CLOSE_READER(re1, &gb1); | |
310 } | |
311 #ifdef VLC_DEBUG | |
312 printf("****run=%d level=%d size=%d\n", run, level, l); | |
313 #endif | |
314 /* compute codeword length -- if too long, we cannot parse */ | |
315 l -= partial_bit_count; | |
316 if ((re_index + l + (level != 0)) > last_index) { | |
317 mb->partial_bit_count += (last_index - re_index); | |
318 mb->partial_bit_buffer = v >> (16 - mb->partial_bit_count); | |
319 return; | |
320 } | |
321 | |
322 /* skip read bits */ | |
323 last_re_index = 0; /* avoid warning */ | |
324 re_index += l; | |
325 /* by definition, if we can read the vlc, all partial bits | |
326 will be read (otherwise we could have read the vlc before) */ | |
327 mb->partial_bit_count = 0; | |
328 UPDATE_CACHE(re, gb); | |
329 goto handle_vlc; | |
330 } | 304 } |
331 | 305 |
332 /* get the AC coefficients until last_index is reached */ | 306 /* get the AC coefficients until last_index is reached */ |
333 for(;;) { | 307 for(;;) { |
334 UPDATE_CACHE(re, gb); | |
335 #ifdef VLC_DEBUG | 308 #ifdef VLC_DEBUG |
336 printf("%2d: bits=%04x index=%d\n", | 309 printf("%2d: bits=%04x index=%d\n", pos, SHOW_UBITS(re, gb, 16), re_index); |
337 pos, SHOW_UBITS(re, gb, 16), re_index); | |
338 #endif | 310 #endif |
339 last_re_index = re_index; | 311 /* our own optimized GET_RL_VLC */ |
340 GET_RL_VLC(level, run, re, gb, dv_rl_vlc, | 312 index = NEG_USR32(re_cache, TEX_VLC_BITS); |
341 TEX_VLC_BITS, 2); | 313 vlc_len = dv_rl_vlc[index].len; |
342 handle_vlc: | 314 if (vlc_len < 0) { |
343 #ifdef VLC_DEBUG | 315 index = NEG_USR32((unsigned)re_cache << TEX_VLC_BITS, -vlc_len) + dv_rl_vlc[index].level; |
344 printf("run=%d level=%d\n", run, level); | 316 vlc_len = TEX_VLC_BITS - vlc_len; |
345 #endif | 317 } |
346 if (level) { | 318 level = dv_rl_vlc[index].level; |
347 sign = SHOW_SBITS(re, gb, 1); | 319 run = dv_rl_vlc[index].run; |
348 LAST_SKIP_BITS(re, gb, 1); | 320 |
349 } | 321 /* gotta check if we're still within gb boundaries */ |
350 if (re_index > last_index) { | 322 if (re_index + vlc_len > last_index) { |
351 /* should be < 16 bits otherwise a codeword could have been parsed */ | 323 /* should be < 16 bits otherwise a codeword could have been parsed */ |
352 re_index = last_re_index; | |
353 UPDATE_CACHE(re, gb); | |
354 mb->partial_bit_count = last_index - re_index; | 324 mb->partial_bit_count = last_index - re_index; |
355 mb->partial_bit_buffer = SHOW_UBITS(re, gb, mb->partial_bit_count); | 325 mb->partial_bit_buffer = NEG_USR32(re_cache, mb->partial_bit_count); |
356 re_index = last_index; | 326 re_index = last_index; |
357 break; | 327 break; |
358 } | 328 } |
359 | 329 re_index += vlc_len; |
330 | |
331 #ifdef VLC_DEBUG | |
332 printf("run=%d level=%d\n", run, level); | |
333 #endif | |
360 pos += run; | 334 pos += run; |
361 if (pos >= 64) | 335 if (pos >= 64) |
362 break; | 336 break; |
363 | 337 |
364 if (level) { | 338 if (level) { |
365 level = (level ^ sign) - sign; | |
366 pos1 = scan_table[pos]; | 339 pos1 = scan_table[pos]; |
367 level = level << (shift_table[pos1] + shift_offset); | 340 block[pos1] = level << shift_table[pos1]; |
368 block[pos1] = level; | |
369 // printf("run=%d level=%d shift=%d\n", run, level, shift_table[pos1]); | |
370 } | 341 } |
342 | |
343 UPDATE_CACHE(re, gb); | |
371 } | 344 } |
372 CLOSE_READER(re, gb); | 345 CLOSE_READER(re, gb); |
373 mb->pos = pos; | 346 mb->pos = pos; |
374 } | 347 } |
375 | 348 |
427 dc = (dc << (32 - 9)) >> (32 - 9); | 400 dc = (dc << (32 - 9)) >> (32 - 9); |
428 dct_mode = get_bits1(&gb); | 401 dct_mode = get_bits1(&gb); |
429 mb->dct_mode = dct_mode; | 402 mb->dct_mode = dct_mode; |
430 mb->scan_table = s->dv_zigzag[dct_mode]; | 403 mb->scan_table = s->dv_zigzag[dct_mode]; |
431 class1 = get_bits(&gb, 2); | 404 class1 = get_bits(&gb, 2); |
432 mb->shift_offset = (class1 == 3); | 405 mb->shift_table = s->dv_idct_shift[class1 == 3][dct_mode] |
433 mb->shift_table = s->dv_idct_shift[dct_mode] | |
434 [quant + dv_quant_offset[class1]]; | 406 [quant + dv_quant_offset[class1]]; |
435 dc = dc << 2; | 407 dc = dc << 2; |
436 /* convert to unsigned because 128 is not added in the | 408 /* convert to unsigned because 128 is not added in the |
437 standard IDCT */ | 409 standard IDCT */ |
438 dc += 1024; | 410 dc += 1024; |