Mercurial > libavcodec.hg
comparison vp3.c @ 1355:1c05f4290517 libavcodec
added the official VP3 IDCT (C implementation) as well as a grayscale
decoding mode
author | tmmm |
---|---|
date | Thu, 10 Jul 2003 05:16:25 +0000 |
parents | f7b3fa4bb7ae |
children | e69f99ade5b0 |
comparison
equal
deleted
inserted
replaced
1354:1f89adb69349 | 1355:1c05f4290517 |
---|---|
15 * You should have received a copy of the GNU Lesser General Public | 15 * You should have received a copy of the GNU Lesser General Public |
16 * License along with this library; if not, write to the Free Software | 16 * License along with this library; if not, write to the Free Software |
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA |
18 * | 18 * |
19 * VP3 Video Decoder by Mike Melanson (melanson@pcisys.net) | 19 * VP3 Video Decoder by Mike Melanson (melanson@pcisys.net) |
20 * For more information about the VP3 coding process, visit: | |
21 * http://www.pcisys.net/~melanson/codecs/ | |
20 * | 22 * |
21 */ | 23 */ |
22 | 24 |
23 /** | 25 /** |
24 * @file vp3.c | 26 * @file vp3.c |
286 int last_coded_c_fragment; | 288 int last_coded_c_fragment; |
287 | 289 |
288 } Vp3DecodeContext; | 290 } Vp3DecodeContext; |
289 | 291 |
290 /************************************************************************ | 292 /************************************************************************ |
293 * VP3 I/DCT | |
294 ************************************************************************/ | |
295 | |
296 #define IdctAdjustBeforeShift 8 | |
297 #define xC1S7 64277 | |
298 #define xC2S6 60547 | |
299 #define xC3S5 54491 | |
300 #define xC4S4 46341 | |
301 #define xC5S3 36410 | |
302 #define xC6S2 25080 | |
303 #define xC7S1 12785 | |
304 | |
305 void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix, | |
306 int16_t *output_data) | |
307 { | |
308 int32_t intermediate_data[64]; | |
309 int32_t *ip = intermediate_data; | |
310 int16_t *op = output_data; | |
311 | |
312 int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H; | |
313 int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd; | |
314 int32_t t1, t2; | |
315 | |
316 int i, j; | |
317 | |
318 debug_idct("raw coefficient block:\n"); | |
319 for (i = 0; i < 8; i++) { | |
320 for (j = 0; j < 8; j++) { | |
321 debug_idct(" %5d", input_data[i * 8 + j]); | |
322 } | |
323 debug_idct("\n"); | |
324 } | |
325 debug_idct("\n"); | |
326 | |
327 for (i = 0; i < 64; i++) { | |
328 j = dezigzag_index[i]; | |
329 intermediate_data[j] = dequant_matrix[i] * input_data[i]; | |
330 } | |
331 | |
332 debug_idct("dequantized block:\n"); | |
333 for (i = 0; i < 8; i++) { | |
334 for (j = 0; j < 8; j++) { | |
335 debug_idct(" %5d", intermediate_data[i * 8 + j]); | |
336 } | |
337 debug_idct("\n"); | |
338 } | |
339 debug_idct("\n"); | |
340 | |
341 /* Inverse DCT on the rows now */ | |
342 for (i = 0; i < 8; i++) { | |
343 /* Check for non-zero values */ | |
344 if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) { | |
345 t1 = (int32_t)(xC1S7 * ip[1]); | |
346 t2 = (int32_t)(xC7S1 * ip[7]); | |
347 t1 >>= 16; | |
348 t2 >>= 16; | |
349 _A = t1 + t2; | |
350 | |
351 t1 = (int32_t)(xC7S1 * ip[1]); | |
352 t2 = (int32_t)(xC1S7 * ip[7]); | |
353 t1 >>= 16; | |
354 t2 >>= 16; | |
355 _B = t1 - t2; | |
356 | |
357 t1 = (int32_t)(xC3S5 * ip[3]); | |
358 t2 = (int32_t)(xC5S3 * ip[5]); | |
359 t1 >>= 16; | |
360 t2 >>= 16; | |
361 _C = t1 + t2; | |
362 | |
363 t1 = (int32_t)(xC3S5 * ip[5]); | |
364 t2 = (int32_t)(xC5S3 * ip[3]); | |
365 t1 >>= 16; | |
366 t2 >>= 16; | |
367 _D = t1 - t2; | |
368 | |
369 | |
370 t1 = (int32_t)(xC4S4 * (_A - _C)); | |
371 t1 >>= 16; | |
372 _Ad = t1; | |
373 | |
374 t1 = (int32_t)(xC4S4 * (_B - _D)); | |
375 t1 >>= 16; | |
376 _Bd = t1; | |
377 | |
378 | |
379 _Cd = _A + _C; | |
380 _Dd = _B + _D; | |
381 | |
382 t1 = (int32_t)(xC4S4 * (ip[0] + ip[4])); | |
383 t1 >>= 16; | |
384 _E = t1; | |
385 | |
386 t1 = (int32_t)(xC4S4 * (ip[0] - ip[4])); | |
387 t1 >>= 16; | |
388 _F = t1; | |
389 | |
390 t1 = (int32_t)(xC2S6 * ip[2]); | |
391 t2 = (int32_t)(xC6S2 * ip[6]); | |
392 t1 >>= 16; | |
393 t2 >>= 16; | |
394 _G = t1 + t2; | |
395 | |
396 t1 = (int32_t)(xC6S2 * ip[2]); | |
397 t2 = (int32_t)(xC2S6 * ip[6]); | |
398 t1 >>= 16; | |
399 t2 >>= 16; | |
400 _H = t1 - t2; | |
401 | |
402 | |
403 _Ed = _E - _G; | |
404 _Gd = _E + _G; | |
405 | |
406 _Add = _F + _Ad; | |
407 _Bdd = _Bd - _H; | |
408 | |
409 _Fd = _F - _Ad; | |
410 _Hd = _Bd + _H; | |
411 | |
412 /* Final sequence of operations over-write original inputs. */ | |
413 ip[0] = (int16_t)((_Gd + _Cd ) >> 0); | |
414 ip[7] = (int16_t)((_Gd - _Cd ) >> 0); | |
415 | |
416 ip[1] = (int16_t)((_Add + _Hd ) >> 0); | |
417 ip[2] = (int16_t)((_Add - _Hd ) >> 0); | |
418 | |
419 ip[3] = (int16_t)((_Ed + _Dd ) >> 0); | |
420 ip[4] = (int16_t)((_Ed - _Dd ) >> 0); | |
421 | |
422 ip[5] = (int16_t)((_Fd + _Bdd ) >> 0); | |
423 ip[6] = (int16_t)((_Fd - _Bdd ) >> 0); | |
424 | |
425 } | |
426 | |
427 ip += 8; /* next row */ | |
428 } | |
429 | |
430 ip = intermediate_data; | |
431 | |
432 for ( i = 0; i < 8; i++) { | |
433 /* Check for non-zero values (bitwise or faster than ||) */ | |
434 if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] | | |
435 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) { | |
436 | |
437 t1 = (int32_t)(xC1S7 * ip[1*8]); | |
438 t2 = (int32_t)(xC7S1 * ip[7*8]); | |
439 t1 >>= 16; | |
440 t2 >>= 16; | |
441 _A = t1 + t2; | |
442 | |
443 t1 = (int32_t)(xC7S1 * ip[1*8]); | |
444 t2 = (int32_t)(xC1S7 * ip[7*8]); | |
445 t1 >>= 16; | |
446 t2 >>= 16; | |
447 _B = t1 - t2; | |
448 | |
449 t1 = (int32_t)(xC3S5 * ip[3*8]); | |
450 t2 = (int32_t)(xC5S3 * ip[5*8]); | |
451 t1 >>= 16; | |
452 t2 >>= 16; | |
453 _C = t1 + t2; | |
454 | |
455 t1 = (int32_t)(xC3S5 * ip[5*8]); | |
456 t2 = (int32_t)(xC5S3 * ip[3*8]); | |
457 t1 >>= 16; | |
458 t2 >>= 16; | |
459 _D = t1 - t2; | |
460 | |
461 | |
462 t1 = (int32_t)(xC4S4 * (_A - _C)); | |
463 t1 >>= 16; | |
464 _Ad = t1; | |
465 | |
466 t1 = (int32_t)(xC4S4 * (_B - _D)); | |
467 t1 >>= 16; | |
468 _Bd = t1; | |
469 | |
470 | |
471 _Cd = _A + _C; | |
472 _Dd = _B + _D; | |
473 | |
474 t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8])); | |
475 t1 >>= 16; | |
476 _E = t1; | |
477 | |
478 t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8])); | |
479 t1 >>= 16; | |
480 _F = t1; | |
481 | |
482 t1 = (int32_t)(xC2S6 * ip[2*8]); | |
483 t2 = (int32_t)(xC6S2 * ip[6*8]); | |
484 t1 >>= 16; | |
485 t2 >>= 16; | |
486 _G = t1 + t2; | |
487 | |
488 t1 = (int32_t)(xC6S2 * ip[2*8]); | |
489 t2 = (int32_t)(xC2S6 * ip[6*8]); | |
490 t1 >>= 16; | |
491 t2 >>= 16; | |
492 _H = t1 - t2; | |
493 | |
494 | |
495 _Ed = _E - _G; | |
496 _Gd = _E + _G; | |
497 | |
498 _Add = _F + _Ad; | |
499 _Bdd = _Bd - _H; | |
500 | |
501 _Fd = _F - _Ad; | |
502 _Hd = _Bd + _H; | |
503 | |
504 _Gd += IdctAdjustBeforeShift; | |
505 _Add += IdctAdjustBeforeShift; | |
506 _Ed += IdctAdjustBeforeShift; | |
507 _Fd += IdctAdjustBeforeShift; | |
508 | |
509 /* Final sequence of operations over-write original inputs. */ | |
510 op[0*8] = (int16_t)((_Gd + _Cd ) >> 4); | |
511 op[7*8] = (int16_t)((_Gd - _Cd ) >> 4); | |
512 | |
513 op[1*8] = (int16_t)((_Add + _Hd ) >> 4); | |
514 op[2*8] = (int16_t)((_Add - _Hd ) >> 4); | |
515 | |
516 op[3*8] = (int16_t)((_Ed + _Dd ) >> 4); | |
517 op[4*8] = (int16_t)((_Ed - _Dd ) >> 4); | |
518 | |
519 op[5*8] = (int16_t)((_Fd + _Bdd ) >> 4); | |
520 op[6*8] = (int16_t)((_Fd - _Bdd ) >> 4); | |
521 | |
522 } else { | |
523 | |
524 op[0*8] = 0; | |
525 op[7*8] = 0; | |
526 op[1*8] = 0; | |
527 op[2*8] = 0; | |
528 op[3*8] = 0; | |
529 op[4*8] = 0; | |
530 op[5*8] = 0; | |
531 op[6*8] = 0; | |
532 } | |
533 | |
534 ip++; /* next column */ | |
535 op++; | |
536 } | |
537 } | |
538 | |
539 void vp3_idct_put(int16_t *input_data, int16_t *dequant_matrix, | |
540 uint8_t *dest, int stride) | |
541 { | |
542 int16_t transformed_data[64]; | |
543 int16_t *op; | |
544 int i, j; | |
545 | |
546 vp3_idct_c(input_data, dequant_matrix, transformed_data); | |
547 | |
548 /* place in final output */ | |
549 op = transformed_data; | |
550 for (i = 0; i < 8; i++) { | |
551 for (j = 0; j < 8; j++) { | |
552 if (*op < -128) | |
553 *dest = 0; | |
554 else if (*op > 127) | |
555 *dest = 255; | |
556 else | |
557 *dest = (uint8_t)(*op + 128); | |
558 op++; | |
559 dest++; | |
560 } | |
561 dest += (stride - 8); | |
562 } | |
563 } | |
564 | |
565 void vp3_idct_add(int16_t *input_data, int16_t *dequant_matrix, | |
566 uint8_t *dest, int stride) | |
567 { | |
568 int16_t transformed_data[64]; | |
569 int16_t *op; | |
570 int i, j; | |
571 int16_t sample; | |
572 | |
573 vp3_idct_c(input_data, dequant_matrix, transformed_data); | |
574 | |
575 /* place in final output */ | |
576 op = transformed_data; | |
577 for (i = 0; i < 8; i++) { | |
578 for (j = 0; j < 8; j++) { | |
579 sample = *dest + *op; | |
580 if (sample < 0) | |
581 *dest = 0; | |
582 else if (sample > 255) | |
583 *dest = 255; | |
584 else | |
585 *dest = (uint8_t)(sample & 0xFF); | |
586 op++; | |
587 dest++; | |
588 } | |
589 dest += (stride - 8); | |
590 } | |
591 } | |
592 | |
593 /************************************************************************ | |
291 * VP3 specific functions | 594 * VP3 specific functions |
292 ************************************************************************/ | 595 ************************************************************************/ |
293 | 596 |
294 /* | 597 /* |
295 * This function sets up all of the various blocks mappings: | 598 * This function sets up all of the various blocks mappings: |
841 * where sf = dc_scale_factor for DC quantizer | 1144 * where sf = dc_scale_factor for DC quantizer |
842 * or quality_scale for AC quantizer | 1145 * or quality_scale for AC quantizer |
843 * | 1146 * |
844 * Then, saturate the result to a lower limit of MIN_DEQUANT_VAL. | 1147 * Then, saturate the result to a lower limit of MIN_DEQUANT_VAL. |
845 */ | 1148 */ |
846 #define SCALER 1 | 1149 #define SCALER 4 |
847 | 1150 |
848 /* scale DC quantizers */ | 1151 /* scale DC quantizers */ |
849 s->intra_y_dequant[0] = vp31_intra_y_dequant[0] * dc_scale_factor / 100; | 1152 s->intra_y_dequant[0] = vp31_intra_y_dequant[0] * dc_scale_factor / 100; |
850 if (s->intra_y_dequant[0] < MIN_DEQUANT_VAL * 2) | 1153 if (s->intra_y_dequant[0] < MIN_DEQUANT_VAL * 2) |
851 s->intra_y_dequant[0] = MIN_DEQUANT_VAL * 2; | 1154 s->intra_y_dequant[0] = MIN_DEQUANT_VAL * 2; |
1421 int prior_last_motion_y = 0; | 1724 int prior_last_motion_y = 0; |
1422 int current_macroblock; | 1725 int current_macroblock; |
1423 int current_fragment; | 1726 int current_fragment; |
1424 | 1727 |
1425 debug_vp3(" vp3: unpacking motion vectors\n"); | 1728 debug_vp3(" vp3: unpacking motion vectors\n"); |
1426 | |
1427 if (s->keyframe) { | 1729 if (s->keyframe) { |
1428 | 1730 |
1429 debug_vp3(" keyframe-- there are no motion vectors\n"); | 1731 debug_vp3(" keyframe-- there are no motion vectors\n"); |
1430 | 1732 |
1431 } else { | 1733 } else { |
2028 int plane /* 0 = Y, 1 = U, 2 = V */) | 2330 int plane /* 0 = Y, 1 = U, 2 = V */) |
2029 { | 2331 { |
2030 int x, y; | 2332 int x, y; |
2031 int m, n; | 2333 int m, n; |
2032 int i = first_fragment; | 2334 int i = first_fragment; |
2033 int j; | |
2034 int16_t *dequantizer; | 2335 int16_t *dequantizer; |
2035 DCTELEM dequant_block[64]; | |
2036 DCTELEM dequant_block_permuted[64]; | |
2037 unsigned char *output_plane; | 2336 unsigned char *output_plane; |
2038 unsigned char *last_plane; | 2337 unsigned char *last_plane; |
2039 unsigned char *golden_plane; | 2338 unsigned char *golden_plane; |
2040 int stride; | 2339 int stride; |
2041 int motion_x, motion_y; | 2340 int motion_x, motion_y; |
2120 | 2419 |
2121 /* if the are any problems with a motion vector, refuse | 2420 /* if the are any problems with a motion vector, refuse |
2122 * to render the block */ | 2421 * to render the block */ |
2123 if ((motion_source < upper_motion_limit) || | 2422 if ((motion_source < upper_motion_limit) || |
2124 (motion_source > lower_motion_limit)) { | 2423 (motion_source > lower_motion_limit)) { |
2125 // printf (" vp3: help! motion source (%d) out of range (%d..%d)\n", | 2424 printf (" vp3: help! motion source (%d) out of range (%d..%d), fragment %d\n", |
2126 // motion_source, upper_motion_limit, lower_motion_limit); | 2425 motion_source, upper_motion_limit, lower_motion_limit, i); |
2127 continue; | 2426 continue; |
2128 } | 2427 } |
2129 } | 2428 } |
2130 | 2429 |
2131 /* first, take care of copying a block from either the | 2430 /* first, take care of copying a block from either the |
2149 | 2448 |
2150 /* dequantize the DCT coefficients */ | 2449 /* dequantize the DCT coefficients */ |
2151 debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n", | 2450 debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n", |
2152 i, s->all_fragments[i].coding_method, | 2451 i, s->all_fragments[i].coding_method, |
2153 s->all_fragments[i].coeffs[0], dequantizer[0]); | 2452 s->all_fragments[i].coeffs[0], dequantizer[0]); |
2154 for (j = 0; j < 64; j++) | |
2155 dequant_block[dezigzag_index[j]] = | |
2156 s->all_fragments[i].coeffs[j] * | |
2157 dequantizer[j]; | |
2158 for (j = 0; j < 64; j++) | |
2159 dequant_block_permuted[s->dsp.idct_permutation[j]] = | |
2160 dequant_block[j]; | |
2161 | |
2162 debug_idct("dequantized block:\n"); | |
2163 for (m = 0; m < 8; m++) { | |
2164 for (n = 0; n < 8; n++) { | |
2165 debug_idct(" %5d", dequant_block[m * 8 + n]); | |
2166 } | |
2167 debug_idct("\n"); | |
2168 } | |
2169 debug_idct("\n"); | |
2170 | 2453 |
2171 /* invert DCT and place (or add) in final output */ | 2454 /* invert DCT and place (or add) in final output */ |
2172 | |
2173 if (s->all_fragments[i].coding_method == MODE_INTRA) { | 2455 if (s->all_fragments[i].coding_method == MODE_INTRA) { |
2174 dequant_block_permuted[0] += 1024; | 2456 vp3_idct_put(s->all_fragments[i].coeffs, dequantizer, |
2175 s->dsp.idct_put( | |
2176 output_plane + s->all_fragments[i].first_pixel, | 2457 output_plane + s->all_fragments[i].first_pixel, |
2177 stride, dequant_block_permuted); | 2458 stride); |
2178 } else { | 2459 } else { |
2179 s->dsp.idct_add( | 2460 vp3_idct_add(s->all_fragments[i].coeffs, dequantizer, |
2180 output_plane + s->all_fragments[i].first_pixel, | 2461 output_plane + s->all_fragments[i].first_pixel, |
2181 stride, dequant_block_permuted); | 2462 stride); |
2182 } | 2463 } |
2183 | 2464 |
2184 debug_idct("block after idct_%s():\n", | 2465 debug_idct("block after idct_%s():\n", |
2185 (s->all_fragments[i].coding_method == MODE_INTRA)? | 2466 (s->all_fragments[i].coding_method == MODE_INTRA)? |
2186 "put" : "add"); | 2467 "put" : "add"); |
2477 printf(" vp3: could not decode frame\n"); | 2758 printf(" vp3: could not decode frame\n"); |
2478 return -1; | 2759 return -1; |
2479 } | 2760 } |
2480 | 2761 |
2481 reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height); | 2762 reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height); |
2482 reverse_dc_prediction(s, s->u_fragment_start, | |
2483 s->fragment_width / 2, s->fragment_height / 2); | |
2484 reverse_dc_prediction(s, s->v_fragment_start, | |
2485 s->fragment_width / 2, s->fragment_height / 2); | |
2486 | |
2487 render_fragments(s, 0, s->width, s->height, 0); | 2763 render_fragments(s, 0, s->width, s->height, 0); |
2488 #if 1 | 2764 |
2489 render_fragments(s, s->u_fragment_start, s->width / 2, s->height / 2, 1); | 2765 if ((avctx->flags & CODEC_FLAG_GRAY) == 0) { |
2490 render_fragments(s, s->v_fragment_start, s->width / 2, s->height / 2, 2); | 2766 reverse_dc_prediction(s, s->u_fragment_start, |
2491 #else | 2767 s->fragment_width / 2, s->fragment_height / 2); |
2492 memset(s->current_frame.data[1], 0x80, s->width * s->height / 4); | 2768 reverse_dc_prediction(s, s->v_fragment_start, |
2493 memset(s->current_frame.data[2], 0x80, s->width * s->height / 4); | 2769 s->fragment_width / 2, s->fragment_height / 2); |
2494 #endif | 2770 render_fragments(s, s->u_fragment_start, s->width / 2, s->height / 2, 1); |
2771 render_fragments(s, s->v_fragment_start, s->width / 2, s->height / 2, 2); | |
2772 } else { | |
2773 memset(s->current_frame.data[1], 0x80, s->width * s->height / 4); | |
2774 memset(s->current_frame.data[2], 0x80, s->width * s->height / 4); | |
2775 } | |
2495 | 2776 |
2496 #if KEYFRAMES_ONLY | 2777 #if KEYFRAMES_ONLY |
2497 } | 2778 } |
2498 #endif | 2779 #endif |
2499 | 2780 |