comparison dv.c @ 1567:e08df4d22d27 libavcodec

* introducing dct248 into the DSP context. * simple/accurate implementation of dct248 * DV encoding now supports 2-4-8 DCT * DV encoding gets a bit faster (but still miles away from what I think it could do) * misc. DV codec cleanups
author romansh
date Fri, 24 Oct 2003 18:28:01 +0000
parents 0183874861fd
children 932d306bf1dc
comparison
equal deleted inserted replaced
1566:396e8526e82c 1567:e08df4d22d27
33 #include "simple_idct.h" 33 #include "simple_idct.h"
34 #include "dvdata.h" 34 #include "dvdata.h"
35 35
36 typedef struct DVVideoDecodeContext { 36 typedef struct DVVideoDecodeContext {
37 const DVprofile* sys; 37 const DVprofile* sys;
38 AVFrame picture;
39
40 uint8_t dv_zigzag[2][64];
41 uint8_t dv_idct_shift[2][22][64];
42 uint8_t dv_dct_shift[2][22][64];
43
44 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
45 void (*fdct[2])(DCTELEM *block);
46 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
47
38 GetBitContext gb; 48 GetBitContext gb;
39 AVFrame picture;
40 DCTELEM block[5*6][64] __align8; 49 DCTELEM block[5*6][64] __align8;
41
42 /* FIXME: the following is extracted from DSP */
43 uint8_t dv_zigzag[2][64];
44 uint8_t idct_permutation[64];
45 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size);
46 void (*fdct)(DCTELEM *block);
47
48 /* XXX: move it to static storage ? */
49 uint8_t dv_shift[2][22][64];
50 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block);
51 } DVVideoDecodeContext; 50 } DVVideoDecodeContext;
52 51
53 #define TEX_VLC_BITS 9 52 #define TEX_VLC_BITS 9
54 /* XXX: also include quantization */ 53 /* XXX: also include quantization */
55 static RL_VLC_ELEM *dv_rl_vlc[1]; 54 static RL_VLC_ELEM *dv_rl_vlc[1];
56 static VLC_TYPE dv_vlc_codes[15][23]; 55 static VLC_TYPE dv_vlc_codes[15][23];
57 56
58 static void dv_build_unquantize_tables(DVVideoDecodeContext *s) 57 static void dv_build_unquantize_tables(DVVideoDecodeContext *s, uint8_t* perm)
59 { 58 {
60 int i, q, j; 59 int i, q, j;
61 60
62 /* NOTE: max left shift is 6 */ 61 /* NOTE: max left shift is 6 */
63 for(q = 0; q < 22; q++) { 62 for(q = 0; q < 22; q++) {
64 /* 88 unquant */ 63 /* 88DCT */
65 for(i = 1; i < 64; i++) { 64 for(i = 1; i < 64; i++) {
66 /* 88 table */ 65 /* 88 table */
67 j = s->idct_permutation[i]; 66 j = perm[i];
68 s->dv_shift[0][q][j] = 67 s->dv_idct_shift[0][q][j] =
69 dv_quant_shifts[q][dv_88_areas[i]] + 1; 68 dv_quant_shifts[q][dv_88_areas[i]] + 1;
69 s->dv_dct_shift[0][q][i] =
70 dv_quant_shifts[q][dv_88_areas[ff_zigzag_direct[i]]] + 4;
70 } 71 }
71 72
72 /* 248 unquant */ 73 /* 248DCT */
73 for(i = 1; i < 64; i++) { 74 for(i = 1; i < 64; i++) {
74 /* 248 table */ 75 /* 248 table */
75 s->dv_shift[1][q][i] = 76 s->dv_idct_shift[1][q][i] =
76 dv_quant_shifts[q][dv_248_areas[i]] + 1; 77 dv_quant_shifts[q][dv_248_areas[i]] + 1;
78 s->dv_dct_shift[1][q][i] =
79 dv_quant_shifts[q][dv_248_areas[ff_zigzag248_direct[i]]] + 4;
77 } 80 }
78 } 81 }
79 } 82 }
80 83
81 static int dvvideo_init(AVCodecContext *avctx) 84 static int dvvideo_init(AVCodecContext *avctx)
82 { 85 {
83 DVVideoDecodeContext *s = avctx->priv_data; 86 DVVideoDecodeContext *s = avctx->priv_data;
84 MpegEncContext s2; 87 DSPContext dsp;
85 static int done=0; 88 static int done=0;
89 int i;
86 90
87 if (!done) { 91 if (!done) {
88 int i; 92 int i;
89 VLC dv_vlc; 93 VLC dv_vlc;
90 94
122 if (dv_vlc_run[i] < 15 && dv_vlc_level[i] < 23 && dv_vlc_len[i] < 15) 126 if (dv_vlc_run[i] < 15 && dv_vlc_level[i] < 23 && dv_vlc_len[i] < 15)
123 dv_vlc_codes[dv_vlc_run[i]][dv_vlc_level[i]] = i; 127 dv_vlc_codes[dv_vlc_run[i]][dv_vlc_level[i]] = i;
124 } 128 }
125 } 129 }
126 130
127 /* ugly way to get the idct & scantable */ 131 /* Generic DSP setup */
128 /* XXX: fix it */ 132 dsputil_init(&dsp, avctx);
129 memset(&s2, 0, sizeof(MpegEncContext)); 133 s->get_pixels = dsp.get_pixels;
130 s2.avctx = avctx; 134
131 dsputil_init(&s2.dsp, avctx); 135 /* 88DCT setup */
132 if (DCT_common_init(&s2) < 0) 136 s->fdct[0] = dsp.fdct;
133 return -1; 137 s->idct_put[0] = dsp.idct_put;
134 138 for (i=0; i<64; i++)
135 s->get_pixels = s2.dsp.get_pixels; 139 s->dv_zigzag[0][i] = dsp.idct_permutation[ff_zigzag_direct[i]];
136 s->fdct = s2.dsp.fdct; 140
137 141 /* 248DCT setup */
138 s->idct_put[0] = s2.dsp.idct_put; 142 s->fdct[1] = dsp.fdct248;
139 memcpy(s->idct_permutation, s2.dsp.idct_permutation, 64); 143 s->idct_put[1] = simple_idct248_put; // FIXME: need to add it to DSP
140 memcpy(s->dv_zigzag[0], s2.intra_scantable.permutated, 64); 144 memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64);
141
142 /* XXX: use MMX also for idct248 */
143 s->idct_put[1] = simple_idct248_put;
144 memcpy(s->dv_zigzag[1], dv_248_zigzag, 64);
145 145
146 /* XXX: do it only for constant case */ 146 /* XXX: do it only for constant case */
147 dv_build_unquantize_tables(s); 147 dv_build_unquantize_tables(s, dsp.idct_permutation);
148 148
149 /* FIXME: I really don't think this should be here */ 149 /* FIXME: I really don't think this should be here */
150 if (dv_codec_profile(avctx)) 150 if (dv_codec_profile(avctx))
151 avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt; 151 avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt;
152 avctx->coded_frame = &s->picture; 152 avctx->coded_frame = &s->picture;
365 dct_mode = get_bits1(&s->gb); 365 dct_mode = get_bits1(&s->gb);
366 mb->dct_mode = dct_mode; 366 mb->dct_mode = dct_mode;
367 mb->scan_table = s->dv_zigzag[dct_mode]; 367 mb->scan_table = s->dv_zigzag[dct_mode];
368 class1 = get_bits(&s->gb, 2); 368 class1 = get_bits(&s->gb, 2);
369 mb->shift_offset = (class1 == 3); 369 mb->shift_offset = (class1 == 3);
370 mb->shift_table = s->dv_shift[dct_mode] 370 mb->shift_table = s->dv_idct_shift[dct_mode]
371 [quant + dv_quant_offset[class1]]; 371 [quant + dv_quant_offset[class1]];
372 dc = dc << 2; 372 dc = dc << 2;
373 /* convert to unsigned because 128 is not added in the 373 /* convert to unsigned because 128 is not added in the
374 standard IDCT */ 374 standard IDCT */
375 dc += 1024; 375 dc += 1024;
569 int cno; 569 int cno;
570 int dct_mode; 570 int dct_mode;
571 int block_size; 571 int block_size;
572 DCTELEM *mb; 572 DCTELEM *mb;
573 PutBitContext pb; 573 PutBitContext pb;
574 const uint8_t* zigzag_scan;
575 uint8_t *dv_shift;
574 } EncBlockInfo; 576 } EncBlockInfo;
575 577
576 static inline int dv_bits_left(EncBlockInfo* bi) 578 static inline int dv_bits_left(EncBlockInfo* bi)
577 { 579 {
578 return (bi->block_size - get_bit_count(&bi->pb)); 580 return (bi->block_size - get_bit_count(&bi->pb));
581 static inline void dv_encode_ac(EncBlockInfo* bi, PutBitContext* heap) 583 static inline void dv_encode_ac(EncBlockInfo* bi, PutBitContext* heap)
582 { 584 {
583 int i, level, size, run = 0; 585 int i, level, size, run = 0;
584 uint32_t vlc; 586 uint32_t vlc;
585 PutBitContext* cpb = &bi->pb; 587 PutBitContext* cpb = &bi->pb;
588 int bias = (bi->cno == 3);
586 589
587 for (i=1; i<64; i++) { 590 for (i=1; i<64; i++) {
588 level = bi->mb[ff_zigzag_direct[i]] / 591 level = bi->mb[bi->zigzag_scan[i]] / (1<<(bi->dv_shift[i] + bias));
589 (1<<(dv_quant_shifts[bi->qno + dv_quant_offset[bi->cno]]
590 [dv_88_areas[ff_zigzag_direct[i]]] + 4 + (bi->cno == 3)));
591 if (level != 0) { 592 if (level != 0) {
592 size = dv_rl2vlc(run, level, &vlc); 593 size = dv_rl2vlc(run, level, &vlc);
593 put_vlc: 594 put_vlc:
594 595
595 #ifdef VLC_DEBUG 596 #ifdef VLC_DEBUG
661 662
662 if (bi->cno > 3) 663 if (bi->cno > 3)
663 bi->cno = 3; 664 bi->cno = 3;
664 } 665 }
665 666
667 #define SQ(a) ((a)*(a))
668 static int dv_score_lines(DCTELEM *s, int stride) {
669 int score=0;
670 int x, y;
671
672 for(y=0; y<4; y++) {
673 for(x=0; x<8; x+=4){
674 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride])
675 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]);
676 }
677 s+= stride;
678 }
679
680 return score;
681 }
682
666 /* 683 /*
667 * This is a very rough initial implementaion. The performance is 684 * This is a very rough initial implementaion. The performance is
668 * horrible and some features are missing, mainly 2-4-8 DCT encoding. 685 * horrible and the weighting is missing. But it's missing from the
669 * The weighting is missing as well, but it's missing from the decoding 686 * decoding step also -- so at least we're on the same page with decoder ;-)
670 * step also -- so at least we're on the same page with decoder ;-)
671 */ 687 */
672 static inline void dv_encode_video_segment(DVVideoDecodeContext *s, 688 static inline void dv_encode_video_segment(DVVideoDecodeContext *s,
673 uint8_t *dif, 689 uint8_t *dif,
674 const uint16_t *mb_pos_ptr) 690 const uint16_t *mb_pos_ptr)
675 { 691 {
689 705
690 int QNO = 15; 706 int QNO = 15;
691 707
692 /* Stage 1 -- doing DCT on 5 MBs */ 708 /* Stage 1 -- doing DCT on 5 MBs */
693 block = &s->block[0][0]; 709 block = &s->block[0][0];
710 enc_blk = &enc_blks[0];
694 for(mb_index = 0; mb_index < 5; mb_index++) { 711 for(mb_index = 0; mb_index < 5; mb_index++) {
695 v = *mb_pos_ptr++; 712 v = *mb_pos_ptr++;
696 mb_x = v & 0xff; 713 mb_x = v & 0xff;
697 mb_y = v >> 8; 714 mb_y = v >> 8;
698 y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); 715 y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8);
729 b += 8; 746 b += 8;
730 } 747 }
731 } else { /* Simple copy: 8x8 -> 8x8 */ 748 } else { /* Simple copy: 8x8 -> 8x8 */
732 s->get_pixels(block, data, linesize); 749 s->get_pixels(block, data, linesize);
733 } 750 }
734 751
735 s->fdct(block); 752 if (dv_score_lines(block, 8) + dv_score_lines(block+8*4, 8) - 100 >
753 dv_score_lines(block, 16) + dv_score_lines(block+8, 16)) {
754 enc_blk->dct_mode = 1;
755 enc_blk->zigzag_scan = ff_zigzag248_direct;
756 } else {
757 enc_blk->dct_mode = 0;
758 enc_blk->zigzag_scan = ff_zigzag_direct;
759 }
760 enc_blk->mb = block;
761 enc_blk->block_size = block_sizes[j];
736 762
763 s->fdct[enc_blk->dct_mode](block);
764
765 dv_set_class_number(enc_blk, j/4*(j%2));
766
737 block += 64; 767 block += 64;
738 } 768 enc_blk++;
739 } 769 }
740 770 }
741 /* Stage 2 -- setup for encoding phase */ 771
742 enc_blk = &enc_blks[0]; 772 /* Stage 2 -- encoding by trial-and-error */
743 block = &s->block[0][0];
744 for (i=0; i<5; i++) {
745 for (j=0; j<6; j++) {
746 enc_blk->mb = block;
747 enc_blk->dct_mode = 0;
748 enc_blk->block_size = block_sizes[j];
749
750 dv_set_class_number(enc_blk, j/4*(j%2));
751
752 block += 64;
753 enc_blk++;
754 }
755 }
756
757 /* Stage 3 -- encoding by trial-and-error */
758 encode_vs: 773 encode_vs:
759 enc_blk = &enc_blks[0]; 774 enc_blk = &enc_blks[0];
760 for (i=0; i<5; i++) { 775 for (i=0; i<5; i++) {
761 uint8_t* p = dif + i*80 + 4; 776 uint8_t* p = dif + i*80 + 4;
762 for (j=0; j<6; j++) { 777 for (j=0; j<6; j++) {
763 enc_blk->qno = QNO; 778 enc_blk->qno = QNO;
779 enc_blk->dv_shift = &(s->dv_dct_shift[0]
780 [QNO + dv_quant_offset[enc_blk->cno]][0]);
764 init_put_bits(&enc_blk->pb, p, block_sizes[j]/8); 781 init_put_bits(&enc_blk->pb, p, block_sizes[j]/8);
765 enc_blk++; 782 enc_blk++;
766 p += block_sizes[j]/8; 783 p += block_sizes[j]/8;
767 } 784 }
768 } 785 }