comparison vp3.c @ 1355:1c05f4290517 libavcodec

added the official VP3 IDCT (C implementation) as well as a grayscale decoding mode
author tmmm
date Thu, 10 Jul 2003 05:16:25 +0000
parents f7b3fa4bb7ae
children e69f99ade5b0
comparison
equal deleted inserted replaced
1354:1f89adb69349 1355:1c05f4290517
15 * You should have received a copy of the GNU Lesser General Public 15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software 16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 17 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
18 * 18 *
19 * VP3 Video Decoder by Mike Melanson (melanson@pcisys.net) 19 * VP3 Video Decoder by Mike Melanson (melanson@pcisys.net)
20 * For more information about the VP3 coding process, visit:
21 * http://www.pcisys.net/~melanson/codecs/
20 * 22 *
21 */ 23 */
22 24
23 /** 25 /**
24 * @file vp3.c 26 * @file vp3.c
286 int last_coded_c_fragment; 288 int last_coded_c_fragment;
287 289
288 } Vp3DecodeContext; 290 } Vp3DecodeContext;
289 291
290 /************************************************************************ 292 /************************************************************************
293 * VP3 I/DCT
294 ************************************************************************/
295
296 #define IdctAdjustBeforeShift 8
297 #define xC1S7 64277
298 #define xC2S6 60547
299 #define xC3S5 54491
300 #define xC4S4 46341
301 #define xC5S3 36410
302 #define xC6S2 25080
303 #define xC7S1 12785
304
305 void vp3_idct_c(int16_t *input_data, int16_t *dequant_matrix,
306 int16_t *output_data)
307 {
308 int32_t intermediate_data[64];
309 int32_t *ip = intermediate_data;
310 int16_t *op = output_data;
311
312 int32_t _A, _B, _C, _D, _Ad, _Bd, _Cd, _Dd, _E, _F, _G, _H;
313 int32_t _Ed, _Gd, _Add, _Bdd, _Fd, _Hd;
314 int32_t t1, t2;
315
316 int i, j;
317
318 debug_idct("raw coefficient block:\n");
319 for (i = 0; i < 8; i++) {
320 for (j = 0; j < 8; j++) {
321 debug_idct(" %5d", input_data[i * 8 + j]);
322 }
323 debug_idct("\n");
324 }
325 debug_idct("\n");
326
327 for (i = 0; i < 64; i++) {
328 j = dezigzag_index[i];
329 intermediate_data[j] = dequant_matrix[i] * input_data[i];
330 }
331
332 debug_idct("dequantized block:\n");
333 for (i = 0; i < 8; i++) {
334 for (j = 0; j < 8; j++) {
335 debug_idct(" %5d", intermediate_data[i * 8 + j]);
336 }
337 debug_idct("\n");
338 }
339 debug_idct("\n");
340
341 /* Inverse DCT on the rows now */
342 for (i = 0; i < 8; i++) {
343 /* Check for non-zero values */
344 if ( ip[0] | ip[1] | ip[2] | ip[3] | ip[4] | ip[5] | ip[6] | ip[7] ) {
345 t1 = (int32_t)(xC1S7 * ip[1]);
346 t2 = (int32_t)(xC7S1 * ip[7]);
347 t1 >>= 16;
348 t2 >>= 16;
349 _A = t1 + t2;
350
351 t1 = (int32_t)(xC7S1 * ip[1]);
352 t2 = (int32_t)(xC1S7 * ip[7]);
353 t1 >>= 16;
354 t2 >>= 16;
355 _B = t1 - t2;
356
357 t1 = (int32_t)(xC3S5 * ip[3]);
358 t2 = (int32_t)(xC5S3 * ip[5]);
359 t1 >>= 16;
360 t2 >>= 16;
361 _C = t1 + t2;
362
363 t1 = (int32_t)(xC3S5 * ip[5]);
364 t2 = (int32_t)(xC5S3 * ip[3]);
365 t1 >>= 16;
366 t2 >>= 16;
367 _D = t1 - t2;
368
369
370 t1 = (int32_t)(xC4S4 * (_A - _C));
371 t1 >>= 16;
372 _Ad = t1;
373
374 t1 = (int32_t)(xC4S4 * (_B - _D));
375 t1 >>= 16;
376 _Bd = t1;
377
378
379 _Cd = _A + _C;
380 _Dd = _B + _D;
381
382 t1 = (int32_t)(xC4S4 * (ip[0] + ip[4]));
383 t1 >>= 16;
384 _E = t1;
385
386 t1 = (int32_t)(xC4S4 * (ip[0] - ip[4]));
387 t1 >>= 16;
388 _F = t1;
389
390 t1 = (int32_t)(xC2S6 * ip[2]);
391 t2 = (int32_t)(xC6S2 * ip[6]);
392 t1 >>= 16;
393 t2 >>= 16;
394 _G = t1 + t2;
395
396 t1 = (int32_t)(xC6S2 * ip[2]);
397 t2 = (int32_t)(xC2S6 * ip[6]);
398 t1 >>= 16;
399 t2 >>= 16;
400 _H = t1 - t2;
401
402
403 _Ed = _E - _G;
404 _Gd = _E + _G;
405
406 _Add = _F + _Ad;
407 _Bdd = _Bd - _H;
408
409 _Fd = _F - _Ad;
410 _Hd = _Bd + _H;
411
412 /* Final sequence of operations over-write original inputs. */
413 ip[0] = (int16_t)((_Gd + _Cd ) >> 0);
414 ip[7] = (int16_t)((_Gd - _Cd ) >> 0);
415
416 ip[1] = (int16_t)((_Add + _Hd ) >> 0);
417 ip[2] = (int16_t)((_Add - _Hd ) >> 0);
418
419 ip[3] = (int16_t)((_Ed + _Dd ) >> 0);
420 ip[4] = (int16_t)((_Ed - _Dd ) >> 0);
421
422 ip[5] = (int16_t)((_Fd + _Bdd ) >> 0);
423 ip[6] = (int16_t)((_Fd - _Bdd ) >> 0);
424
425 }
426
427 ip += 8; /* next row */
428 }
429
430 ip = intermediate_data;
431
432 for ( i = 0; i < 8; i++) {
433 /* Check for non-zero values (bitwise or faster than ||) */
434 if ( ip[0 * 8] | ip[1 * 8] | ip[2 * 8] | ip[3 * 8] |
435 ip[4 * 8] | ip[5 * 8] | ip[6 * 8] | ip[7 * 8] ) {
436
437 t1 = (int32_t)(xC1S7 * ip[1*8]);
438 t2 = (int32_t)(xC7S1 * ip[7*8]);
439 t1 >>= 16;
440 t2 >>= 16;
441 _A = t1 + t2;
442
443 t1 = (int32_t)(xC7S1 * ip[1*8]);
444 t2 = (int32_t)(xC1S7 * ip[7*8]);
445 t1 >>= 16;
446 t2 >>= 16;
447 _B = t1 - t2;
448
449 t1 = (int32_t)(xC3S5 * ip[3*8]);
450 t2 = (int32_t)(xC5S3 * ip[5*8]);
451 t1 >>= 16;
452 t2 >>= 16;
453 _C = t1 + t2;
454
455 t1 = (int32_t)(xC3S5 * ip[5*8]);
456 t2 = (int32_t)(xC5S3 * ip[3*8]);
457 t1 >>= 16;
458 t2 >>= 16;
459 _D = t1 - t2;
460
461
462 t1 = (int32_t)(xC4S4 * (_A - _C));
463 t1 >>= 16;
464 _Ad = t1;
465
466 t1 = (int32_t)(xC4S4 * (_B - _D));
467 t1 >>= 16;
468 _Bd = t1;
469
470
471 _Cd = _A + _C;
472 _Dd = _B + _D;
473
474 t1 = (int32_t)(xC4S4 * (ip[0*8] + ip[4*8]));
475 t1 >>= 16;
476 _E = t1;
477
478 t1 = (int32_t)(xC4S4 * (ip[0*8] - ip[4*8]));
479 t1 >>= 16;
480 _F = t1;
481
482 t1 = (int32_t)(xC2S6 * ip[2*8]);
483 t2 = (int32_t)(xC6S2 * ip[6*8]);
484 t1 >>= 16;
485 t2 >>= 16;
486 _G = t1 + t2;
487
488 t1 = (int32_t)(xC6S2 * ip[2*8]);
489 t2 = (int32_t)(xC2S6 * ip[6*8]);
490 t1 >>= 16;
491 t2 >>= 16;
492 _H = t1 - t2;
493
494
495 _Ed = _E - _G;
496 _Gd = _E + _G;
497
498 _Add = _F + _Ad;
499 _Bdd = _Bd - _H;
500
501 _Fd = _F - _Ad;
502 _Hd = _Bd + _H;
503
504 _Gd += IdctAdjustBeforeShift;
505 _Add += IdctAdjustBeforeShift;
506 _Ed += IdctAdjustBeforeShift;
507 _Fd += IdctAdjustBeforeShift;
508
509 /* Final sequence of operations over-write original inputs. */
510 op[0*8] = (int16_t)((_Gd + _Cd ) >> 4);
511 op[7*8] = (int16_t)((_Gd - _Cd ) >> 4);
512
513 op[1*8] = (int16_t)((_Add + _Hd ) >> 4);
514 op[2*8] = (int16_t)((_Add - _Hd ) >> 4);
515
516 op[3*8] = (int16_t)((_Ed + _Dd ) >> 4);
517 op[4*8] = (int16_t)((_Ed - _Dd ) >> 4);
518
519 op[5*8] = (int16_t)((_Fd + _Bdd ) >> 4);
520 op[6*8] = (int16_t)((_Fd - _Bdd ) >> 4);
521
522 } else {
523
524 op[0*8] = 0;
525 op[7*8] = 0;
526 op[1*8] = 0;
527 op[2*8] = 0;
528 op[3*8] = 0;
529 op[4*8] = 0;
530 op[5*8] = 0;
531 op[6*8] = 0;
532 }
533
534 ip++; /* next column */
535 op++;
536 }
537 }
538
539 void vp3_idct_put(int16_t *input_data, int16_t *dequant_matrix,
540 uint8_t *dest, int stride)
541 {
542 int16_t transformed_data[64];
543 int16_t *op;
544 int i, j;
545
546 vp3_idct_c(input_data, dequant_matrix, transformed_data);
547
548 /* place in final output */
549 op = transformed_data;
550 for (i = 0; i < 8; i++) {
551 for (j = 0; j < 8; j++) {
552 if (*op < -128)
553 *dest = 0;
554 else if (*op > 127)
555 *dest = 255;
556 else
557 *dest = (uint8_t)(*op + 128);
558 op++;
559 dest++;
560 }
561 dest += (stride - 8);
562 }
563 }
564
565 void vp3_idct_add(int16_t *input_data, int16_t *dequant_matrix,
566 uint8_t *dest, int stride)
567 {
568 int16_t transformed_data[64];
569 int16_t *op;
570 int i, j;
571 int16_t sample;
572
573 vp3_idct_c(input_data, dequant_matrix, transformed_data);
574
575 /* place in final output */
576 op = transformed_data;
577 for (i = 0; i < 8; i++) {
578 for (j = 0; j < 8; j++) {
579 sample = *dest + *op;
580 if (sample < 0)
581 *dest = 0;
582 else if (sample > 255)
583 *dest = 255;
584 else
585 *dest = (uint8_t)(sample & 0xFF);
586 op++;
587 dest++;
588 }
589 dest += (stride - 8);
590 }
591 }
592
593 /************************************************************************
291 * VP3 specific functions 594 * VP3 specific functions
292 ************************************************************************/ 595 ************************************************************************/
293 596
294 /* 597 /*
295 * This function sets up all of the various blocks mappings: 598 * This function sets up all of the various blocks mappings:
841 * where sf = dc_scale_factor for DC quantizer 1144 * where sf = dc_scale_factor for DC quantizer
842 * or quality_scale for AC quantizer 1145 * or quality_scale for AC quantizer
843 * 1146 *
844 * Then, saturate the result to a lower limit of MIN_DEQUANT_VAL. 1147 * Then, saturate the result to a lower limit of MIN_DEQUANT_VAL.
845 */ 1148 */
846 #define SCALER 1 1149 #define SCALER 4
847 1150
848 /* scale DC quantizers */ 1151 /* scale DC quantizers */
849 s->intra_y_dequant[0] = vp31_intra_y_dequant[0] * dc_scale_factor / 100; 1152 s->intra_y_dequant[0] = vp31_intra_y_dequant[0] * dc_scale_factor / 100;
850 if (s->intra_y_dequant[0] < MIN_DEQUANT_VAL * 2) 1153 if (s->intra_y_dequant[0] < MIN_DEQUANT_VAL * 2)
851 s->intra_y_dequant[0] = MIN_DEQUANT_VAL * 2; 1154 s->intra_y_dequant[0] = MIN_DEQUANT_VAL * 2;
1421 int prior_last_motion_y = 0; 1724 int prior_last_motion_y = 0;
1422 int current_macroblock; 1725 int current_macroblock;
1423 int current_fragment; 1726 int current_fragment;
1424 1727
1425 debug_vp3(" vp3: unpacking motion vectors\n"); 1728 debug_vp3(" vp3: unpacking motion vectors\n");
1426
1427 if (s->keyframe) { 1729 if (s->keyframe) {
1428 1730
1429 debug_vp3(" keyframe-- there are no motion vectors\n"); 1731 debug_vp3(" keyframe-- there are no motion vectors\n");
1430 1732
1431 } else { 1733 } else {
2028 int plane /* 0 = Y, 1 = U, 2 = V */) 2330 int plane /* 0 = Y, 1 = U, 2 = V */)
2029 { 2331 {
2030 int x, y; 2332 int x, y;
2031 int m, n; 2333 int m, n;
2032 int i = first_fragment; 2334 int i = first_fragment;
2033 int j;
2034 int16_t *dequantizer; 2335 int16_t *dequantizer;
2035 DCTELEM dequant_block[64];
2036 DCTELEM dequant_block_permuted[64];
2037 unsigned char *output_plane; 2336 unsigned char *output_plane;
2038 unsigned char *last_plane; 2337 unsigned char *last_plane;
2039 unsigned char *golden_plane; 2338 unsigned char *golden_plane;
2040 int stride; 2339 int stride;
2041 int motion_x, motion_y; 2340 int motion_x, motion_y;
2120 2419
2121 /* if the are any problems with a motion vector, refuse 2420 /* if the are any problems with a motion vector, refuse
2122 * to render the block */ 2421 * to render the block */
2123 if ((motion_source < upper_motion_limit) || 2422 if ((motion_source < upper_motion_limit) ||
2124 (motion_source > lower_motion_limit)) { 2423 (motion_source > lower_motion_limit)) {
2125 // printf (" vp3: help! motion source (%d) out of range (%d..%d)\n", 2424 printf (" vp3: help! motion source (%d) out of range (%d..%d), fragment %d\n",
2126 // motion_source, upper_motion_limit, lower_motion_limit); 2425 motion_source, upper_motion_limit, lower_motion_limit, i);
2127 continue; 2426 continue;
2128 } 2427 }
2129 } 2428 }
2130 2429
2131 /* first, take care of copying a block from either the 2430 /* first, take care of copying a block from either the
2149 2448
2150 /* dequantize the DCT coefficients */ 2449 /* dequantize the DCT coefficients */
2151 debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n", 2450 debug_idct("fragment %d, coding mode %d, DC = %d, dequant = %d:\n",
2152 i, s->all_fragments[i].coding_method, 2451 i, s->all_fragments[i].coding_method,
2153 s->all_fragments[i].coeffs[0], dequantizer[0]); 2452 s->all_fragments[i].coeffs[0], dequantizer[0]);
2154 for (j = 0; j < 64; j++)
2155 dequant_block[dezigzag_index[j]] =
2156 s->all_fragments[i].coeffs[j] *
2157 dequantizer[j];
2158 for (j = 0; j < 64; j++)
2159 dequant_block_permuted[s->dsp.idct_permutation[j]] =
2160 dequant_block[j];
2161
2162 debug_idct("dequantized block:\n");
2163 for (m = 0; m < 8; m++) {
2164 for (n = 0; n < 8; n++) {
2165 debug_idct(" %5d", dequant_block[m * 8 + n]);
2166 }
2167 debug_idct("\n");
2168 }
2169 debug_idct("\n");
2170 2453
2171 /* invert DCT and place (or add) in final output */ 2454 /* invert DCT and place (or add) in final output */
2172
2173 if (s->all_fragments[i].coding_method == MODE_INTRA) { 2455 if (s->all_fragments[i].coding_method == MODE_INTRA) {
2174 dequant_block_permuted[0] += 1024; 2456 vp3_idct_put(s->all_fragments[i].coeffs, dequantizer,
2175 s->dsp.idct_put(
2176 output_plane + s->all_fragments[i].first_pixel, 2457 output_plane + s->all_fragments[i].first_pixel,
2177 stride, dequant_block_permuted); 2458 stride);
2178 } else { 2459 } else {
2179 s->dsp.idct_add( 2460 vp3_idct_add(s->all_fragments[i].coeffs, dequantizer,
2180 output_plane + s->all_fragments[i].first_pixel, 2461 output_plane + s->all_fragments[i].first_pixel,
2181 stride, dequant_block_permuted); 2462 stride);
2182 } 2463 }
2183 2464
2184 debug_idct("block after idct_%s():\n", 2465 debug_idct("block after idct_%s():\n",
2185 (s->all_fragments[i].coding_method == MODE_INTRA)? 2466 (s->all_fragments[i].coding_method == MODE_INTRA)?
2186 "put" : "add"); 2467 "put" : "add");
2477 printf(" vp3: could not decode frame\n"); 2758 printf(" vp3: could not decode frame\n");
2478 return -1; 2759 return -1;
2479 } 2760 }
2480 2761
2481 reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height); 2762 reverse_dc_prediction(s, 0, s->fragment_width, s->fragment_height);
2482 reverse_dc_prediction(s, s->u_fragment_start,
2483 s->fragment_width / 2, s->fragment_height / 2);
2484 reverse_dc_prediction(s, s->v_fragment_start,
2485 s->fragment_width / 2, s->fragment_height / 2);
2486
2487 render_fragments(s, 0, s->width, s->height, 0); 2763 render_fragments(s, 0, s->width, s->height, 0);
2488 #if 1 2764
2489 render_fragments(s, s->u_fragment_start, s->width / 2, s->height / 2, 1); 2765 if ((avctx->flags & CODEC_FLAG_GRAY) == 0) {
2490 render_fragments(s, s->v_fragment_start, s->width / 2, s->height / 2, 2); 2766 reverse_dc_prediction(s, s->u_fragment_start,
2491 #else 2767 s->fragment_width / 2, s->fragment_height / 2);
2492 memset(s->current_frame.data[1], 0x80, s->width * s->height / 4); 2768 reverse_dc_prediction(s, s->v_fragment_start,
2493 memset(s->current_frame.data[2], 0x80, s->width * s->height / 4); 2769 s->fragment_width / 2, s->fragment_height / 2);
2494 #endif 2770 render_fragments(s, s->u_fragment_start, s->width / 2, s->height / 2, 1);
2771 render_fragments(s, s->v_fragment_start, s->width / 2, s->height / 2, 2);
2772 } else {
2773 memset(s->current_frame.data[1], 0x80, s->width * s->height / 4);
2774 memset(s->current_frame.data[2], 0x80, s->width * s->height / 4);
2775 }
2495 2776
2496 #if KEYFRAMES_ONLY 2777 #if KEYFRAMES_ONLY
2497 } 2778 }
2498 #endif 2779 #endif
2499 2780