Mercurial > libavcodec.hg
comparison dv.c @ 1567:e08df4d22d27 libavcodec
* introducing dct248 into the DSP context.
* simple/accurate implementation of dct248
* DV encoding now supports 2-4-8 DCT
* DV encoding gets a bit faster (but still miles away
from what I think it could do)
* misc. DV codec cleanups
author | romansh |
---|---|
date | Fri, 24 Oct 2003 18:28:01 +0000 |
parents | 0183874861fd |
children | 932d306bf1dc |
comparison
equal
deleted
inserted
replaced
1566:396e8526e82c | 1567:e08df4d22d27 |
---|---|
33 #include "simple_idct.h" | 33 #include "simple_idct.h" |
34 #include "dvdata.h" | 34 #include "dvdata.h" |
35 | 35 |
36 typedef struct DVVideoDecodeContext { | 36 typedef struct DVVideoDecodeContext { |
37 const DVprofile* sys; | 37 const DVprofile* sys; |
38 AVFrame picture; | |
39 | |
40 uint8_t dv_zigzag[2][64]; | |
41 uint8_t dv_idct_shift[2][22][64]; | |
42 uint8_t dv_dct_shift[2][22][64]; | |
43 | |
44 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); | |
45 void (*fdct[2])(DCTELEM *block); | |
46 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); | |
47 | |
38 GetBitContext gb; | 48 GetBitContext gb; |
39 AVFrame picture; | |
40 DCTELEM block[5*6][64] __align8; | 49 DCTELEM block[5*6][64] __align8; |
41 | |
42 /* FIXME: the following is extracted from DSP */ | |
43 uint8_t dv_zigzag[2][64]; | |
44 uint8_t idct_permutation[64]; | |
45 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); | |
46 void (*fdct)(DCTELEM *block); | |
47 | |
48 /* XXX: move it to static storage ? */ | |
49 uint8_t dv_shift[2][22][64]; | |
50 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); | |
51 } DVVideoDecodeContext; | 50 } DVVideoDecodeContext; |
52 | 51 |
53 #define TEX_VLC_BITS 9 | 52 #define TEX_VLC_BITS 9 |
54 /* XXX: also include quantization */ | 53 /* XXX: also include quantization */ |
55 static RL_VLC_ELEM *dv_rl_vlc[1]; | 54 static RL_VLC_ELEM *dv_rl_vlc[1]; |
56 static VLC_TYPE dv_vlc_codes[15][23]; | 55 static VLC_TYPE dv_vlc_codes[15][23]; |
57 | 56 |
58 static void dv_build_unquantize_tables(DVVideoDecodeContext *s) | 57 static void dv_build_unquantize_tables(DVVideoDecodeContext *s, uint8_t* perm) |
59 { | 58 { |
60 int i, q, j; | 59 int i, q, j; |
61 | 60 |
62 /* NOTE: max left shift is 6 */ | 61 /* NOTE: max left shift is 6 */ |
63 for(q = 0; q < 22; q++) { | 62 for(q = 0; q < 22; q++) { |
64 /* 88 unquant */ | 63 /* 88DCT */ |
65 for(i = 1; i < 64; i++) { | 64 for(i = 1; i < 64; i++) { |
66 /* 88 table */ | 65 /* 88 table */ |
67 j = s->idct_permutation[i]; | 66 j = perm[i]; |
68 s->dv_shift[0][q][j] = | 67 s->dv_idct_shift[0][q][j] = |
69 dv_quant_shifts[q][dv_88_areas[i]] + 1; | 68 dv_quant_shifts[q][dv_88_areas[i]] + 1; |
69 s->dv_dct_shift[0][q][i] = | |
70 dv_quant_shifts[q][dv_88_areas[ff_zigzag_direct[i]]] + 4; | |
70 } | 71 } |
71 | 72 |
72 /* 248 unquant */ | 73 /* 248DCT */ |
73 for(i = 1; i < 64; i++) { | 74 for(i = 1; i < 64; i++) { |
74 /* 248 table */ | 75 /* 248 table */ |
75 s->dv_shift[1][q][i] = | 76 s->dv_idct_shift[1][q][i] = |
76 dv_quant_shifts[q][dv_248_areas[i]] + 1; | 77 dv_quant_shifts[q][dv_248_areas[i]] + 1; |
78 s->dv_dct_shift[1][q][i] = | |
79 dv_quant_shifts[q][dv_248_areas[ff_zigzag248_direct[i]]] + 4; | |
77 } | 80 } |
78 } | 81 } |
79 } | 82 } |
80 | 83 |
81 static int dvvideo_init(AVCodecContext *avctx) | 84 static int dvvideo_init(AVCodecContext *avctx) |
82 { | 85 { |
83 DVVideoDecodeContext *s = avctx->priv_data; | 86 DVVideoDecodeContext *s = avctx->priv_data; |
84 MpegEncContext s2; | 87 DSPContext dsp; |
85 static int done=0; | 88 static int done=0; |
89 int i; | |
86 | 90 |
87 if (!done) { | 91 if (!done) { |
88 int i; | 92 int i; |
89 VLC dv_vlc; | 93 VLC dv_vlc; |
90 | 94 |
122 if (dv_vlc_run[i] < 15 && dv_vlc_level[i] < 23 && dv_vlc_len[i] < 15) | 126 if (dv_vlc_run[i] < 15 && dv_vlc_level[i] < 23 && dv_vlc_len[i] < 15) |
123 dv_vlc_codes[dv_vlc_run[i]][dv_vlc_level[i]] = i; | 127 dv_vlc_codes[dv_vlc_run[i]][dv_vlc_level[i]] = i; |
124 } | 128 } |
125 } | 129 } |
126 | 130 |
127 /* ugly way to get the idct & scantable */ | 131 /* Generic DSP setup */ |
128 /* XXX: fix it */ | 132 dsputil_init(&dsp, avctx); |
129 memset(&s2, 0, sizeof(MpegEncContext)); | 133 s->get_pixels = dsp.get_pixels; |
130 s2.avctx = avctx; | 134 |
131 dsputil_init(&s2.dsp, avctx); | 135 /* 88DCT setup */ |
132 if (DCT_common_init(&s2) < 0) | 136 s->fdct[0] = dsp.fdct; |
133 return -1; | 137 s->idct_put[0] = dsp.idct_put; |
134 | 138 for (i=0; i<64; i++) |
135 s->get_pixels = s2.dsp.get_pixels; | 139 s->dv_zigzag[0][i] = dsp.idct_permutation[ff_zigzag_direct[i]]; |
136 s->fdct = s2.dsp.fdct; | 140 |
137 | 141 /* 248DCT setup */ |
138 s->idct_put[0] = s2.dsp.idct_put; | 142 s->fdct[1] = dsp.fdct248; |
139 memcpy(s->idct_permutation, s2.dsp.idct_permutation, 64); | 143 s->idct_put[1] = simple_idct248_put; // FIXME: need to add it to DSP |
140 memcpy(s->dv_zigzag[0], s2.intra_scantable.permutated, 64); | 144 memcpy(s->dv_zigzag[1], ff_zigzag248_direct, 64); |
141 | |
142 /* XXX: use MMX also for idct248 */ | |
143 s->idct_put[1] = simple_idct248_put; | |
144 memcpy(s->dv_zigzag[1], dv_248_zigzag, 64); | |
145 | 145 |
146 /* XXX: do it only for constant case */ | 146 /* XXX: do it only for constant case */ |
147 dv_build_unquantize_tables(s); | 147 dv_build_unquantize_tables(s, dsp.idct_permutation); |
148 | 148 |
149 /* FIXME: I really don't think this should be here */ | 149 /* FIXME: I really don't think this should be here */ |
150 if (dv_codec_profile(avctx)) | 150 if (dv_codec_profile(avctx)) |
151 avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt; | 151 avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt; |
152 avctx->coded_frame = &s->picture; | 152 avctx->coded_frame = &s->picture; |
365 dct_mode = get_bits1(&s->gb); | 365 dct_mode = get_bits1(&s->gb); |
366 mb->dct_mode = dct_mode; | 366 mb->dct_mode = dct_mode; |
367 mb->scan_table = s->dv_zigzag[dct_mode]; | 367 mb->scan_table = s->dv_zigzag[dct_mode]; |
368 class1 = get_bits(&s->gb, 2); | 368 class1 = get_bits(&s->gb, 2); |
369 mb->shift_offset = (class1 == 3); | 369 mb->shift_offset = (class1 == 3); |
370 mb->shift_table = s->dv_shift[dct_mode] | 370 mb->shift_table = s->dv_idct_shift[dct_mode] |
371 [quant + dv_quant_offset[class1]]; | 371 [quant + dv_quant_offset[class1]]; |
372 dc = dc << 2; | 372 dc = dc << 2; |
373 /* convert to unsigned because 128 is not added in the | 373 /* convert to unsigned because 128 is not added in the |
374 standard IDCT */ | 374 standard IDCT */ |
375 dc += 1024; | 375 dc += 1024; |
569 int cno; | 569 int cno; |
570 int dct_mode; | 570 int dct_mode; |
571 int block_size; | 571 int block_size; |
572 DCTELEM *mb; | 572 DCTELEM *mb; |
573 PutBitContext pb; | 573 PutBitContext pb; |
574 const uint8_t* zigzag_scan; | |
575 uint8_t *dv_shift; | |
574 } EncBlockInfo; | 576 } EncBlockInfo; |
575 | 577 |
576 static inline int dv_bits_left(EncBlockInfo* bi) | 578 static inline int dv_bits_left(EncBlockInfo* bi) |
577 { | 579 { |
578 return (bi->block_size - get_bit_count(&bi->pb)); | 580 return (bi->block_size - get_bit_count(&bi->pb)); |
581 static inline void dv_encode_ac(EncBlockInfo* bi, PutBitContext* heap) | 583 static inline void dv_encode_ac(EncBlockInfo* bi, PutBitContext* heap) |
582 { | 584 { |
583 int i, level, size, run = 0; | 585 int i, level, size, run = 0; |
584 uint32_t vlc; | 586 uint32_t vlc; |
585 PutBitContext* cpb = &bi->pb; | 587 PutBitContext* cpb = &bi->pb; |
588 int bias = (bi->cno == 3); | |
586 | 589 |
587 for (i=1; i<64; i++) { | 590 for (i=1; i<64; i++) { |
588 level = bi->mb[ff_zigzag_direct[i]] / | 591 level = bi->mb[bi->zigzag_scan[i]] / (1<<(bi->dv_shift[i] + bias)); |
589 (1<<(dv_quant_shifts[bi->qno + dv_quant_offset[bi->cno]] | |
590 [dv_88_areas[ff_zigzag_direct[i]]] + 4 + (bi->cno == 3))); | |
591 if (level != 0) { | 592 if (level != 0) { |
592 size = dv_rl2vlc(run, level, &vlc); | 593 size = dv_rl2vlc(run, level, &vlc); |
593 put_vlc: | 594 put_vlc: |
594 | 595 |
595 #ifdef VLC_DEBUG | 596 #ifdef VLC_DEBUG |
661 | 662 |
662 if (bi->cno > 3) | 663 if (bi->cno > 3) |
663 bi->cno = 3; | 664 bi->cno = 3; |
664 } | 665 } |
665 | 666 |
667 #define SQ(a) ((a)*(a)) | |
668 static int dv_score_lines(DCTELEM *s, int stride) { | |
669 int score=0; | |
670 int x, y; | |
671 | |
672 for(y=0; y<4; y++) { | |
673 for(x=0; x<8; x+=4){ | |
674 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) | |
675 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); | |
676 } | |
677 s+= stride; | |
678 } | |
679 | |
680 return score; | |
681 } | |
682 | |
666 /* | 683 /* |
667 * This is a very rough initial implementaion. The performance is | 684 * This is a very rough initial implementaion. The performance is |
668 * horrible and some features are missing, mainly 2-4-8 DCT encoding. | 685 * horrible and the weighting is missing. But it's missing from the |
669 * The weighting is missing as well, but it's missing from the decoding | 686 * decoding step also -- so at least we're on the same page with decoder ;-) |
670 * step also -- so at least we're on the same page with decoder ;-) | |
671 */ | 687 */ |
672 static inline void dv_encode_video_segment(DVVideoDecodeContext *s, | 688 static inline void dv_encode_video_segment(DVVideoDecodeContext *s, |
673 uint8_t *dif, | 689 uint8_t *dif, |
674 const uint16_t *mb_pos_ptr) | 690 const uint16_t *mb_pos_ptr) |
675 { | 691 { |
689 | 705 |
690 int QNO = 15; | 706 int QNO = 15; |
691 | 707 |
692 /* Stage 1 -- doing DCT on 5 MBs */ | 708 /* Stage 1 -- doing DCT on 5 MBs */ |
693 block = &s->block[0][0]; | 709 block = &s->block[0][0]; |
710 enc_blk = &enc_blks[0]; | |
694 for(mb_index = 0; mb_index < 5; mb_index++) { | 711 for(mb_index = 0; mb_index < 5; mb_index++) { |
695 v = *mb_pos_ptr++; | 712 v = *mb_pos_ptr++; |
696 mb_x = v & 0xff; | 713 mb_x = v & 0xff; |
697 mb_y = v >> 8; | 714 mb_y = v >> 8; |
698 y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); | 715 y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); |
729 b += 8; | 746 b += 8; |
730 } | 747 } |
731 } else { /* Simple copy: 8x8 -> 8x8 */ | 748 } else { /* Simple copy: 8x8 -> 8x8 */ |
732 s->get_pixels(block, data, linesize); | 749 s->get_pixels(block, data, linesize); |
733 } | 750 } |
734 | 751 |
735 s->fdct(block); | 752 if (dv_score_lines(block, 8) + dv_score_lines(block+8*4, 8) - 100 > |
753 dv_score_lines(block, 16) + dv_score_lines(block+8, 16)) { | |
754 enc_blk->dct_mode = 1; | |
755 enc_blk->zigzag_scan = ff_zigzag248_direct; | |
756 } else { | |
757 enc_blk->dct_mode = 0; | |
758 enc_blk->zigzag_scan = ff_zigzag_direct; | |
759 } | |
760 enc_blk->mb = block; | |
761 enc_blk->block_size = block_sizes[j]; | |
736 | 762 |
763 s->fdct[enc_blk->dct_mode](block); | |
764 | |
765 dv_set_class_number(enc_blk, j/4*(j%2)); | |
766 | |
737 block += 64; | 767 block += 64; |
738 } | 768 enc_blk++; |
739 } | 769 } |
740 | 770 } |
741 /* Stage 2 -- setup for encoding phase */ | 771 |
742 enc_blk = &enc_blks[0]; | 772 /* Stage 2 -- encoding by trial-and-error */ |
743 block = &s->block[0][0]; | |
744 for (i=0; i<5; i++) { | |
745 for (j=0; j<6; j++) { | |
746 enc_blk->mb = block; | |
747 enc_blk->dct_mode = 0; | |
748 enc_blk->block_size = block_sizes[j]; | |
749 | |
750 dv_set_class_number(enc_blk, j/4*(j%2)); | |
751 | |
752 block += 64; | |
753 enc_blk++; | |
754 } | |
755 } | |
756 | |
757 /* Stage 3 -- encoding by trial-and-error */ | |
758 encode_vs: | 773 encode_vs: |
759 enc_blk = &enc_blks[0]; | 774 enc_blk = &enc_blks[0]; |
760 for (i=0; i<5; i++) { | 775 for (i=0; i<5; i++) { |
761 uint8_t* p = dif + i*80 + 4; | 776 uint8_t* p = dif + i*80 + 4; |
762 for (j=0; j<6; j++) { | 777 for (j=0; j<6; j++) { |
763 enc_blk->qno = QNO; | 778 enc_blk->qno = QNO; |
779 enc_blk->dv_shift = &(s->dv_dct_shift[0] | |
780 [QNO + dv_quant_offset[enc_blk->cno]][0]); | |
764 init_put_bits(&enc_blk->pb, p, block_sizes[j]/8); | 781 init_put_bits(&enc_blk->pb, p, block_sizes[j]/8); |
765 enc_blk++; | 782 enc_blk++; |
766 p += block_sizes[j]/8; | 783 p += block_sizes[j]/8; |
767 } | 784 } |
768 } | 785 } |