Mercurial > libavcodec.hg
comparison dv.c @ 1631:59f2fa833449 libavcodec
* 3x encoding speedup. Finally we seem to be on par with libdv
in terms of time and we're doing slightly better w.r.t. to
quality. I don't think there's much room for improvement
left, but I'd like to try and vectorize a couple of things.
Btw, any ideas on what may impact performance will be greatly
appreciated.
author | romansh |
---|---|
date | Thu, 27 Nov 2003 01:40:13 +0000 |
parents | 932d306bf1dc |
children | 806afb8e9085 |
comparison
equal
deleted
inserted
replaced
1630:586b5c08863c | 1631:59f2fa833449 |
---|---|
37 const DVprofile* sys; | 37 const DVprofile* sys; |
38 AVFrame picture; | 38 AVFrame picture; |
39 | 39 |
40 uint8_t dv_zigzag[2][64]; | 40 uint8_t dv_zigzag[2][64]; |
41 uint8_t dv_idct_shift[2][22][64]; | 41 uint8_t dv_idct_shift[2][22][64]; |
42 uint8_t dv_dct_shift[2][22][64]; | |
43 | 42 |
44 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); | 43 void (*get_pixels)(DCTELEM *block, const uint8_t *pixels, int line_size); |
45 void (*fdct[2])(DCTELEM *block); | 44 void (*fdct[2])(DCTELEM *block); |
46 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); | 45 void (*idct_put[2])(uint8_t *dest, int line_size, DCTELEM *block); |
47 | 46 |
48 GetBitContext gb; | 47 GetBitContext gb; |
49 DCTELEM block[5*6][64] __align8; | 48 DCTELEM block[5*6][64] __align8; |
50 } DVVideoDecodeContext; | 49 } DVVideoDecodeContext; |
51 | 50 |
52 #define TEX_VLC_BITS 9 | 51 #define TEX_VLC_BITS 9 |
52 | |
53 #ifdef DV_CODEC_TINY_TARGET | |
54 #define DV_VLC_MAP_RUN_SIZE 15 | |
55 #define DV_VLC_MAP_LEV_SIZE 23 | |
56 #else | |
57 #define DV_VLC_MAP_RUN_SIZE 64 | |
58 #define DV_VLC_MAP_LEV_SIZE 512 | |
59 #endif | |
60 | |
53 /* XXX: also include quantization */ | 61 /* XXX: also include quantization */ |
54 static RL_VLC_ELEM *dv_rl_vlc[1]; | 62 static RL_VLC_ELEM *dv_rl_vlc[1]; |
55 static VLC_TYPE dv_vlc_codes[15][23]; | 63 /* VLC encoding lookup table */ |
64 static struct dv_vlc_pair { | |
65 uint32_t vlc; | |
66 uint8_t size; | |
67 } (*dv_vlc_map)[DV_VLC_MAP_LEV_SIZE] = NULL; | |
56 | 68 |
57 static void dv_build_unquantize_tables(DVVideoDecodeContext *s, uint8_t* perm) | 69 static void dv_build_unquantize_tables(DVVideoDecodeContext *s, uint8_t* perm) |
58 { | 70 { |
59 int i, q, j; | 71 int i, q, j; |
60 | 72 |
64 for(i = 1; i < 64; i++) { | 76 for(i = 1; i < 64; i++) { |
65 /* 88 table */ | 77 /* 88 table */ |
66 j = perm[i]; | 78 j = perm[i]; |
67 s->dv_idct_shift[0][q][j] = | 79 s->dv_idct_shift[0][q][j] = |
68 dv_quant_shifts[q][dv_88_areas[i]] + 1; | 80 dv_quant_shifts[q][dv_88_areas[i]] + 1; |
69 s->dv_dct_shift[0][q][i] = | |
70 dv_quant_shifts[q][dv_88_areas[ff_zigzag_direct[i]]] + 4; | |
71 } | 81 } |
72 | 82 |
73 /* 248DCT */ | 83 /* 248DCT */ |
74 for(i = 1; i < 64; i++) { | 84 for(i = 1; i < 64; i++) { |
75 /* 248 table */ | 85 /* 248 table */ |
76 s->dv_idct_shift[1][q][i] = | 86 s->dv_idct_shift[1][q][i] = |
77 dv_quant_shifts[q][dv_248_areas[i]] + 1; | 87 dv_quant_shifts[q][dv_248_areas[i]] + 1; |
78 s->dv_dct_shift[1][q][i] = | |
79 dv_quant_shifts[q][dv_248_areas[ff_zigzag248_direct[i]]] + 4; | |
80 } | 88 } |
81 } | 89 } |
82 } | 90 } |
83 | 91 |
84 static int dvvideo_init(AVCodecContext *avctx) | 92 static int dvvideo_init(AVCodecContext *avctx) |
85 { | 93 { |
86 DVVideoDecodeContext *s = avctx->priv_data; | 94 DVVideoDecodeContext *s = avctx->priv_data; |
87 DSPContext dsp; | 95 DSPContext dsp; |
88 static int done=0; | 96 static int done=0; |
89 int i; | 97 int i, j; |
90 | 98 |
91 if (!done) { | 99 if (!done) { |
92 int i; | 100 int i; |
93 VLC dv_vlc; | 101 VLC dv_vlc; |
94 | 102 |
95 done = 1; | 103 done = 1; |
104 | |
105 dv_vlc_map = av_mallocz(DV_VLC_MAP_LEV_SIZE*DV_VLC_MAP_RUN_SIZE*sizeof(struct dv_vlc_pair)); | |
106 if (!dv_vlc_map) | |
107 return -ENOMEM; | |
96 | 108 |
97 /* NOTE: as a trick, we use the fact the no codes are unused | 109 /* NOTE: as a trick, we use the fact the no codes are unused |
98 to accelerate the parsing of partial codes */ | 110 to accelerate the parsing of partial codes */ |
99 init_vlc(&dv_vlc, TEX_VLC_BITS, NB_DV_VLC, | 111 init_vlc(&dv_vlc, TEX_VLC_BITS, NB_DV_VLC, |
100 dv_vlc_len, 1, 1, dv_vlc_bits, 2, 2); | 112 dv_vlc_len, 1, 1, dv_vlc_bits, 2, 2); |
101 | 113 |
102 dv_rl_vlc[0] = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM)); | 114 dv_rl_vlc[0] = av_malloc(dv_vlc.table_size * sizeof(RL_VLC_ELEM)); |
115 if (!dv_rl_vlc[0]) { | |
116 av_free(dv_vlc_map); | |
117 return -ENOMEM; | |
118 } | |
103 for(i = 0; i < dv_vlc.table_size; i++){ | 119 for(i = 0; i < dv_vlc.table_size; i++){ |
104 int code= dv_vlc.table[i][0]; | 120 int code= dv_vlc.table[i][0]; |
105 int len = dv_vlc.table[i][1]; | 121 int len = dv_vlc.table[i][1]; |
106 int level, run; | 122 int level, run; |
107 | 123 |
119 dv_rl_vlc[0][i].len = len; | 135 dv_rl_vlc[0][i].len = len; |
120 dv_rl_vlc[0][i].level = level; | 136 dv_rl_vlc[0][i].level = level; |
121 dv_rl_vlc[0][i].run = run; | 137 dv_rl_vlc[0][i].run = run; |
122 } | 138 } |
123 | 139 |
124 memset(dv_vlc_codes, 0xff, sizeof(dv_vlc_codes)); | |
125 for (i = 0; i < NB_DV_VLC - 1; i++) { | 140 for (i = 0; i < NB_DV_VLC - 1; i++) { |
126 if (dv_vlc_run[i] < 15 && dv_vlc_level[i] < 23 && dv_vlc_len[i] < 15) | 141 if (dv_vlc_run[i] >= DV_VLC_MAP_RUN_SIZE || dv_vlc_level[i] >= DV_VLC_MAP_LEV_SIZE) |
127 dv_vlc_codes[dv_vlc_run[i]][dv_vlc_level[i]] = i; | 142 continue; |
143 | |
144 if (dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size != 0) | |
145 continue; | |
146 | |
147 dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].vlc = dv_vlc_bits[i] << | |
148 (!!dv_vlc_level[i]); | |
149 dv_vlc_map[dv_vlc_run[i]][dv_vlc_level[i]].size = dv_vlc_len[i] + | |
150 (!!dv_vlc_level[i]); | |
151 } | |
152 for (i = 0; i < DV_VLC_MAP_RUN_SIZE; i++) { | |
153 #ifdef DV_CODEC_TINY_TARGET | |
154 for (j = 1; j < DV_VLC_MAP_LEV_SIZE; j++) { | |
155 if (dv_vlc_map[i][j].size == 0) { | |
156 dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | | |
157 (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); | |
158 dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + | |
159 dv_vlc_map[0][j].size; | |
160 } | |
161 } | |
162 #else | |
163 for (j = 1; j < DV_VLC_MAP_LEV_SIZE/2; j++) { | |
164 if (dv_vlc_map[i][j].size == 0) { | |
165 dv_vlc_map[i][j].vlc = dv_vlc_map[0][j].vlc | | |
166 (dv_vlc_map[i-1][0].vlc << (dv_vlc_map[0][j].size)); | |
167 dv_vlc_map[i][j].size = dv_vlc_map[i-1][0].size + | |
168 dv_vlc_map[0][j].size; | |
169 } | |
170 dv_vlc_map[i][((uint16_t)(-j))&0x1ff].vlc = | |
171 dv_vlc_map[i][j].vlc | 1; | |
172 dv_vlc_map[i][((uint16_t)(-j))&0x1ff].size = | |
173 dv_vlc_map[i][j].size; | |
174 } | |
175 #endif | |
128 } | 176 } |
129 } | 177 } |
130 | 178 |
131 /* Generic DSP setup */ | 179 /* Generic DSP setup */ |
132 dsputil_init(&dsp, avctx); | 180 dsputil_init(&dsp, avctx); |
169 | 217 |
170 /* block size in bits */ | 218 /* block size in bits */ |
171 static const uint16_t block_sizes[6] = { | 219 static const uint16_t block_sizes[6] = { |
172 112, 112, 112, 112, 80, 80 | 220 112, 112, 112, 112, 80, 80 |
173 }; | 221 }; |
222 /* bit budget for AC only in 5 MBs */ | |
223 static const int vs_total_ac_bits = (100 * 4 + 68*2) * 5; | |
224 /* see dv_88_areas and dv_248_areas for details */ | |
225 static const int mb_area_start[5] = { 1, 6, 21, 43, 64 }; | |
174 | 226 |
175 #ifndef ALT_BITSTREAM_READER | 227 #ifndef ALT_BITSTREAM_READER |
176 #warning only works with ALT_BITSTREAM_READER | 228 #warning only works with ALT_BITSTREAM_READER |
177 #endif | 229 #endif |
178 | 230 |
515 mb++; | 567 mb++; |
516 } | 568 } |
517 } | 569 } |
518 } | 570 } |
519 | 571 |
572 #ifdef DV_CODEC_TINY_TARGET | |
520 /* Converts run and level (where level != 0) pair into vlc, returning bit size */ | 573 /* Converts run and level (where level != 0) pair into vlc, returning bit size */ |
521 static inline int dv_rl2vlc(int run, int l, uint32_t* vlc) | 574 static always_inline int dv_rl2vlc(int run, int l, uint32_t* vlc) |
522 { | 575 { |
523 int sign = l >> 8; | 576 int sign = l >> 8; |
524 int level = (l ^ sign) - sign; | 577 int level = (l ^ sign) - sign; |
525 int size; | 578 int size; |
526 | 579 |
527 sign = (sign & 1); | 580 sign = (sign & 1); |
528 | 581 |
529 if (run < 15 && level < 23 && dv_vlc_codes[run][level] != -1) { | 582 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { |
530 *vlc = (dv_vlc_bits[dv_vlc_codes[run][level]] << 1) | sign; | 583 *vlc = dv_vlc_map[run][level].vlc | sign; |
531 size = dv_vlc_len[dv_vlc_codes[run][level]] + 1; | 584 size = dv_vlc_map[run][level].size; |
532 } | 585 } |
533 else { | 586 else { |
534 if (level < 23) { | 587 if (level < DV_VLC_MAP_LEV_SIZE) { |
535 *vlc = (dv_vlc_bits[dv_vlc_codes[0][level]] << 1) | sign; | 588 *vlc = dv_vlc_map[0][level].vlc | sign; |
536 size = dv_vlc_len[dv_vlc_codes[0][level]] + 1; | 589 size = dv_vlc_map[0][level].size; |
537 } else { | 590 } else { |
538 *vlc = 0xfe00 | (level << 1) | sign; | 591 *vlc = 0xfe00 | (level << 1) | sign; |
539 size = 16; | 592 size = 16; |
540 } | 593 } |
541 | 594 if (run) { |
542 switch(run) { | 595 *vlc |= ((run < 16) ? dv_vlc_map[run-1][0].vlc : |
543 case 0: | 596 (0x1f80 | (run - 1))) << size; |
544 break; | 597 size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; |
545 case 1: | |
546 case 2: | |
547 *vlc |= ((0x7ce | (run - 1)) << size); | |
548 size += 11; | |
549 break; | |
550 case 3: | |
551 case 4: | |
552 case 5: | |
553 case 6: | |
554 *vlc |= ((0xfac | (run - 3)) << size); | |
555 size += 12; | |
556 break; | |
557 default: | |
558 *vlc |= ((0x1f80 | (run - 1)) << size); | |
559 size += 13; | |
560 break; | |
561 } | 598 } |
562 } | 599 } |
563 | 600 |
564 return size; | 601 return size; |
565 } | 602 } |
566 | 603 |
604 static always_inline int dv_rl2vlc_size(int run, int l) | |
605 { | |
606 int level = (l ^ (l >> 8)) - (l >> 8); | |
607 int size; | |
608 | |
609 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { | |
610 size = dv_vlc_map[run][level].size; | |
611 } | |
612 else { | |
613 size = (level < DV_VLC_MAP_LEV_SIZE) ? dv_vlc_map[0][level].size : 16; | |
614 if (run) { | |
615 size += (run < 16) ? dv_vlc_map[run-1][0].size : 13; | |
616 } | |
617 } | |
618 return size; | |
619 } | |
620 #else | |
621 static always_inline int dv_rl2vlc(int run, int l, uint32_t* vlc) | |
622 { | |
623 *vlc = dv_vlc_map[run][((uint16_t)l)&0x1ff].vlc; | |
624 return dv_vlc_map[run][((uint16_t)l)&0x1ff].size; | |
625 } | |
626 | |
627 static always_inline int dv_rl2vlc_size(int run, int l) | |
628 { | |
629 return dv_vlc_map[run][((uint16_t)l)&0x1ff].size; | |
630 } | |
631 #endif | |
632 | |
567 typedef struct EncBlockInfo { | 633 typedef struct EncBlockInfo { |
568 int qno; | 634 int area_q[4]; |
635 int bit_size[4]; | |
636 int prev_run[4]; | |
637 int cur_ac; | |
569 int cno; | 638 int cno; |
570 int dct_mode; | 639 int dct_mode; |
571 int block_size; | |
572 DCTELEM *mb; | 640 DCTELEM *mb; |
573 PutBitContext pb; | 641 uint8_t partial_bit_count; |
574 const uint8_t* zigzag_scan; | 642 uint32_t partial_bit_buffer; /* we can't use uint16_t here */ |
575 uint8_t *dv_shift; | |
576 } EncBlockInfo; | 643 } EncBlockInfo; |
577 | 644 |
578 static inline int dv_bits_left(EncBlockInfo* bi) | 645 static always_inline int dv_bits_left(PutBitContext* s) |
579 { | 646 { |
580 return (bi->block_size - get_bit_count(&bi->pb)); | 647 return (s->buf_end - s->buf) * 8 - |
581 } | 648 ((s->buf_ptr - s->buf + s->data_out_size) * 8 + 32 - (int64_t)s->bit_left); |
582 | 649 } |
583 static inline void dv_encode_ac(EncBlockInfo* bi, PutBitContext* heap) | 650 |
584 { | 651 static always_inline void dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool, |
585 int i, level, size, run = 0; | 652 int pb_size) |
586 uint32_t vlc; | 653 { |
587 PutBitContext* cpb = &bi->pb; | 654 int run; |
588 int bias = (bi->cno == 3); | 655 int bits_left; |
589 | 656 PutBitContext* pb = pb_pool; |
590 for (i=1; i<64; i++) { | 657 int size = bi->partial_bit_count; |
591 level = bi->mb[bi->zigzag_scan[i]] / (1<<(bi->dv_shift[i] + bias)); | 658 uint32_t vlc = bi->partial_bit_buffer; |
592 if (level != 0) { | 659 |
593 size = dv_rl2vlc(run, level, &vlc); | 660 bi->partial_bit_count = bi->partial_bit_buffer = 0; |
594 put_vlc: | 661 vlc_loop: |
595 | 662 /* Find suitable storage space */ |
596 #ifdef VLC_DEBUG | 663 for (; size > (bits_left = dv_bits_left(pb)); pb++) { |
597 printf(" %3d:%3d", run, level); | 664 if (bits_left) { |
598 #endif | 665 size -= bits_left; |
599 if (cpb == &bi->pb && size > dv_bits_left(bi)) { | 666 put_bits(pb, bits_left, vlc >> size); |
600 size -= dv_bits_left(bi); | 667 vlc = vlc & ((1<<size)-1); |
601 put_bits(cpb, dv_bits_left(bi), vlc >> size); | 668 } |
602 vlc = vlc & ((1<<size)-1); | 669 if (pb_size == 1) { |
603 cpb = heap; | 670 bi->partial_bit_count = size; |
671 bi->partial_bit_buffer = vlc; | |
672 return; | |
673 } | |
674 --pb_size; | |
675 } | |
676 | |
677 /* Store VLC */ | |
678 put_bits(pb, size, vlc); | |
679 | |
680 /* Construct the next VLC */ | |
681 run = 0; | |
682 for (; bi->cur_ac < 64; bi->cur_ac++, run++) { | |
683 if (bi->mb[bi->cur_ac]) { | |
684 size = dv_rl2vlc(run, bi->mb[bi->cur_ac], &vlc); | |
685 bi->cur_ac++; | |
686 goto vlc_loop; | |
604 } | 687 } |
605 put_bits(cpb, size, vlc); | 688 } |
606 run = 0; | |
607 } else | |
608 run++; | |
609 } | |
610 | 689 |
611 if (i == 64) { | 690 if (bi->cur_ac == 64) { |
612 size = 4; vlc = 6; /* End Of Block stamp */ | 691 size = 4; vlc = 6; /* End Of Block stamp */ |
613 goto put_vlc; | 692 bi->cur_ac++; |
614 } | 693 goto vlc_loop; |
615 } | 694 } |
616 | 695 } |
617 static inline void dv_redistr_bits(EncBlockInfo* bi, int count, uint8_t* extra_data, int extra_bits, PutBitContext* heap) | 696 |
618 { | 697 static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi, |
698 const uint8_t* zigzag_scan, int bias) | |
699 { | |
700 int i, area; | |
701 int run; | |
702 int classes[] = {12, 24, 36, 0xffff}; | |
703 | |
704 run = 0; | |
705 bi->mb[0] = blk[0]; | |
706 bi->cno = 0; | |
707 for (area = 0; area < 4; area++) { | |
708 bi->prev_run[area] = run; | |
709 bi->bit_size[area] = 0; | |
710 for (i=mb_area_start[area]; i<mb_area_start[area+1]; i++) { | |
711 bi->mb[i] = (blk[zigzag_scan[i]] / 16); | |
712 while ((bi->mb[i] ^ (bi->mb[i] >> 8)) > classes[bi->cno]) | |
713 bi->cno++; | |
714 | |
715 if (bi->mb[i]) { | |
716 bi->bit_size[area] += dv_rl2vlc_size(run, bi->mb[i]); | |
717 run = 0; | |
718 } else | |
719 ++run; | |
720 } | |
721 } | |
722 bi->bit_size[3] += 4; /* EOB marker */ | |
723 bi->cno += bias; | |
724 | |
725 if (bi->cno >= 3) { /* FIXME: we have to recreate bit_size[], prev_run[] */ | |
726 bi->cno = 3; | |
727 for (i=1; i<64; i++) | |
728 bi->mb[i] /= 2; | |
729 } | |
730 } | |
731 | |
732 #define SC(x, y) ((s[x] - s[y]) ^ ((s[x] - s[y]) >> 7)) | |
733 static always_inline int dv_guess_dct_mode(DCTELEM *blk) { | |
734 DCTELEM *s; | |
735 int score88 = 0; | |
736 int score248 = 0; | |
619 int i; | 737 int i; |
620 GetBitContext gb; | 738 |
621 | 739 /* Compute 8-8 score (small values give a better chance for 8-8 DCT) */ |
622 init_get_bits(&gb, extra_data, extra_bits); | 740 s = blk; |
623 | 741 for(i=0; i<7; i++) { |
624 for (i=0; i<count; i++) { | 742 score88 += SC(0, 8) + SC(1, 9) + SC(2, 10) + SC(3, 11) + |
625 int bits_left = dv_bits_left(bi); | 743 SC(4, 12) + SC(5,13) + SC(6, 14) + SC(7, 15); |
626 #ifdef VLC_DEBUG | 744 s += 8; |
627 if (bits_left) | 745 } |
628 printf("------------> inserting %d bytes in %d:%d\n", bits_left, i/6, i%6); | 746 /* Compute 2-4-8 score (small values give a better chance for 2-4-8 DCT) */ |
629 #endif | 747 s = blk; |
630 if (bits_left > extra_bits) { | 748 for(i=0; i<6; i++) { |
631 bit_copy(&bi->pb, &gb, extra_bits); | 749 score248 += SC(0, 16) + SC(1,17) + SC(2, 18) + SC(3, 19) + |
632 extra_bits = 0; | 750 SC(4, 20) + SC(5,21) + SC(6, 22) + SC(7, 23); |
633 break; | 751 s += 8; |
634 } else | 752 } |
635 bit_copy(&bi->pb, &gb, bits_left); | 753 |
636 | 754 return (score88 - score248 > -10); |
637 extra_bits -= bits_left; | 755 } |
638 bi++; | 756 |
639 } | 757 static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) |
640 | 758 { |
641 if (extra_bits > 0 && heap) | 759 int size[5]; |
642 bit_copy(heap, &gb, extra_bits); | 760 int i, j, k, a, run; |
643 } | 761 EncBlockInfo* b; |
644 | 762 |
645 static inline void dv_set_class_number(EncBlockInfo* bi, int j) | 763 do { |
646 { | 764 b = blks; |
647 int i, max_ac = 0; | 765 for (i=0; i<5; i++) { |
648 | 766 if (!qnos[i]) |
649 for (i=1; i<64; i++) { | 767 continue; |
650 int ac = abs(bi->mb[ff_zigzag_direct[i]]) / 4; | 768 |
651 if (max_ac < ac) | 769 qnos[i]--; |
652 max_ac = ac; | 770 size[i] = 0; |
653 } | 771 for (j=0; j<6; j++, b++) { |
654 if (max_ac < 12) | 772 for (a=0; a<4; a++) { |
655 bi->cno = j; | 773 if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) { |
656 else if (max_ac < 24) | 774 b->bit_size[a] = (a==3)?4:0; |
657 bi->cno = j + 1; | 775 b->area_q[a]++; |
658 else if (max_ac < 36) | 776 run = b->prev_run[a]; |
659 bi->cno = j + 2; | 777 for (k=mb_area_start[a]; k<mb_area_start[a+1]; k++) { |
660 else | 778 b->mb[k] /= 2; |
661 bi->cno = j + 3; | 779 if (b->mb[k]) { |
662 | 780 b->bit_size[a] += dv_rl2vlc_size(run, b->mb[k]); |
663 if (bi->cno > 3) | 781 run = 0; |
664 bi->cno = 3; | 782 } else |
665 } | 783 ++run; |
666 | 784 } |
667 #define SQ(a) ((a)*(a)) | 785 } |
668 static int dv_score_lines(DCTELEM *s, int stride) { | 786 size[i] += b->bit_size[a]; |
669 int score=0; | 787 } |
670 int x, y; | 788 } |
671 | 789 } |
672 for(y=0; y<4; y++) { | 790 } while ((vs_total_ac_bits < size[0] + size[1] + size[2] + size[3] + size[4]) && |
673 for(x=0; x<8; x+=4){ | 791 (qnos[0]|qnos[1]|qnos[2]|qnos[3]|qnos[4])); |
674 score+= SQ(s[x ] - s[x +stride]) + SQ(s[x+1] - s[x+1+stride]) | |
675 +SQ(s[x+2] - s[x+2+stride]) + SQ(s[x+3] - s[x+3+stride]); | |
676 } | |
677 s+= stride; | |
678 } | |
679 | |
680 return score; | |
681 } | 792 } |
682 | 793 |
683 /* | 794 /* |
684 * This is a very rough initial implementaion. The performance is | 795 * This is a very rough initial implementaion. The performance is |
685 * horrible and the weighting is missing. But it's missing from the | 796 * horrible and the weighting is missing. But it's missing from the |
691 { | 802 { |
692 int mb_index, i, j, v; | 803 int mb_index, i, j, v; |
693 int mb_x, mb_y, c_offset, linesize; | 804 int mb_x, mb_y, c_offset, linesize; |
694 uint8_t* y_ptr; | 805 uint8_t* y_ptr; |
695 uint8_t* data; | 806 uint8_t* data; |
807 uint8_t* ptr; | |
696 int do_edge_wrap; | 808 int do_edge_wrap; |
697 DCTELEM *block; | 809 DCTELEM block[64] __align8; |
698 EncBlockInfo enc_blks[5*6]; | 810 EncBlockInfo enc_blks[5*6]; |
811 PutBitContext pbs[5*6]; | |
812 PutBitContext* pb; | |
699 EncBlockInfo* enc_blk; | 813 EncBlockInfo* enc_blk; |
700 int free_vs_bits; | 814 int vs_bit_size = 0; |
701 int extra_bits; | 815 int qnos[5]; |
702 PutBitContext extra_vs; | |
703 uint8_t extra_vs_data[5*6*128]; | |
704 uint8_t extra_mb_data[6*128]; | |
705 | |
706 int QNO = 15; | |
707 | 816 |
708 /* Stage 1 -- doing DCT on 5 MBs */ | |
709 block = &s->block[0][0]; | |
710 enc_blk = &enc_blks[0]; | 817 enc_blk = &enc_blks[0]; |
818 pb = &pbs[0]; | |
711 for(mb_index = 0; mb_index < 5; mb_index++) { | 819 for(mb_index = 0; mb_index < 5; mb_index++) { |
712 v = *mb_pos_ptr++; | 820 v = *mb_pos_ptr++; |
713 mb_x = v & 0xff; | 821 mb_x = v & 0xff; |
714 mb_y = v >> 8; | 822 mb_y = v >> 8; |
715 y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); | 823 y_ptr = s->picture.data[0] + (mb_y * s->picture.linesize[0] * 8) + (mb_x * 8); |
716 c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ? | 824 c_offset = (s->sys->pix_fmt == PIX_FMT_YUV411P) ? |
717 ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) : | 825 ((mb_y * s->picture.linesize[1] * 8) + ((mb_x >> 2) * 8)) : |
718 (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8)); | 826 (((mb_y >> 1) * s->picture.linesize[1] * 8) + ((mb_x >> 1) * 8)); |
719 do_edge_wrap = 0; | 827 do_edge_wrap = 0; |
828 qnos[mb_index] = 15; /* No quantization */ | |
829 ptr = dif + mb_index*80 + 4; | |
720 for(j = 0;j < 6; j++) { | 830 for(j = 0;j < 6; j++) { |
721 if (j < 4) { /* Four Y blocks */ | 831 if (j < 4) { /* Four Y blocks */ |
722 /* NOTE: at end of line, the macroblock is handled as 420 */ | 832 /* NOTE: at end of line, the macroblock is handled as 420 */ |
723 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { | 833 if (s->sys->pix_fmt == PIX_FMT_YUV411P && mb_x < (704 / 8)) { |
724 data = y_ptr + (j * 8); | 834 data = y_ptr + (j * 8); |
747 } | 857 } |
748 } else { /* Simple copy: 8x8 -> 8x8 */ | 858 } else { /* Simple copy: 8x8 -> 8x8 */ |
749 s->get_pixels(block, data, linesize); | 859 s->get_pixels(block, data, linesize); |
750 } | 860 } |
751 | 861 |
752 if (dv_score_lines(block, 8) + dv_score_lines(block+8*4, 8) - 100 > | 862 enc_blk->dct_mode = dv_guess_dct_mode(block); |
753 dv_score_lines(block, 16) + dv_score_lines(block+8, 16)) { | 863 enc_blk->mb = &s->block[mb_index*6+j][0]; |
754 enc_blk->dct_mode = 1; | 864 enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0; |
755 enc_blk->zigzag_scan = ff_zigzag248_direct; | 865 enc_blk->partial_bit_count = 0; |
756 } else { | 866 enc_blk->partial_bit_buffer = 0; |
757 enc_blk->dct_mode = 0; | 867 enc_blk->cur_ac = 1; |
758 enc_blk->zigzag_scan = ff_zigzag_direct; | |
759 } | |
760 enc_blk->mb = block; | |
761 enc_blk->block_size = block_sizes[j]; | |
762 | 868 |
763 s->fdct[enc_blk->dct_mode](block); | 869 s->fdct[enc_blk->dct_mode](block); |
764 | 870 |
765 dv_set_class_number(enc_blk, j/4*(j%2)); | 871 dv_set_class_number(block, enc_blk, |
766 | 872 enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, |
767 block += 64; | 873 j/4*(j%2)); |
768 enc_blk++; | 874 |
769 } | 875 init_put_bits(pb, ptr, block_sizes[j]/8); |
770 } | 876 put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024) >> 2)); |
771 | 877 put_bits(pb, 1, enc_blk->dct_mode); |
772 /* Stage 2 -- encoding by trial-and-error */ | 878 put_bits(pb, 2, enc_blk->cno); |
773 encode_vs: | 879 |
774 enc_blk = &enc_blks[0]; | 880 vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] + |
881 enc_blk->bit_size[2] + enc_blk->bit_size[3]; | |
882 ++enc_blk; | |
883 ++pb; | |
884 ptr += block_sizes[j]/8; | |
885 } | |
886 } | |
887 | |
888 if (vs_total_ac_bits < vs_bit_size) | |
889 dv_guess_qnos(&enc_blks[0], &qnos[0]); | |
890 | |
775 for (i=0; i<5; i++) { | 891 for (i=0; i<5; i++) { |
776 uint8_t* p = dif + i*80 + 4; | 892 dif[i*80 + 3] = qnos[i]; |
777 for (j=0; j<6; j++) { | 893 } |
778 enc_blk->qno = QNO; | 894 |
779 enc_blk->dv_shift = &(s->dv_dct_shift[0] | 895 /* First pass over individual cells only */ |
780 [QNO + dv_quant_offset[enc_blk->cno]][0]); | 896 for (j=0; j<5*6; j++) |
781 init_put_bits(&enc_blk->pb, p, block_sizes[j]/8); | 897 dv_encode_ac(&enc_blks[j], &pbs[j], 1); |
782 enc_blk++; | 898 |
783 p += block_sizes[j]/8; | 899 /* Second pass over each MB space */ |
784 } | 900 for (j=0; j<5*6; j++) { |
785 } | 901 if (enc_blks[j].cur_ac < 65 || enc_blks[j].partial_bit_count) |
786 | 902 dv_encode_ac(&enc_blks[j], &pbs[(j/6)*6], 6); |
787 init_put_bits(&extra_vs, extra_vs_data, sizeof(extra_vs_data)); | 903 } |
788 free_vs_bits = 0; | 904 |
789 enc_blk = &enc_blks[0]; | 905 /* Third and final pass over the whole vides segment space */ |
790 for (i=0; i<5; i++) { | 906 for (j=0; j<5*6; j++) { |
791 PutBitContext extra_mb; | 907 if (enc_blks[j].cur_ac < 65 || enc_blks[j].partial_bit_count) |
792 EncBlockInfo* enc_blk2 = enc_blk; | 908 dv_encode_ac(&enc_blks[j], &pbs[0], 6*5); |
793 int free_mb_bits = 0; | 909 } |
794 | 910 |
795 init_put_bits(&extra_mb, extra_mb_data, sizeof(extra_mb_data)); | 911 for (j=0; j<5*6; j++) |
796 dif[i*80 + 3] = enc_blk->qno; | 912 flush_put_bits(&pbs[j]); |
797 | |
798 for (j=0; j<6; j++) { | |
799 uint16_t dc = ((enc_blk->mb[0] >> 3) - 1024) >> 2; | |
800 | |
801 put_bits(&enc_blk->pb, 9, dc); | |
802 put_bits(&enc_blk->pb, 1, enc_blk->dct_mode); | |
803 put_bits(&enc_blk->pb, 2, enc_blk->cno); | |
804 | |
805 #ifdef VLC_DEBUG | |
806 printf("[%d, %d]: ", i, j); | |
807 #endif | |
808 dv_encode_ac(enc_blk, &extra_mb); | |
809 #ifdef VLC_DEBUG | |
810 printf("\n"); | |
811 #endif | |
812 | |
813 free_mb_bits += dv_bits_left(enc_blk); | |
814 enc_blk++; | |
815 } | |
816 | |
817 /* We can't flush extra_mb just yet -- since it'll round up bit number */ | |
818 extra_bits = get_bit_count(&extra_mb); | |
819 if (free_mb_bits > extra_bits) | |
820 free_vs_bits += free_mb_bits - extra_bits; | |
821 | |
822 if (extra_bits) { /* FIXME: speed up things when free_mb_bits == 0 */ | |
823 flush_put_bits(&extra_mb); | |
824 dv_redistr_bits(enc_blk2, 6, extra_mb_data, extra_bits, &extra_vs); | |
825 } | |
826 } | |
827 | |
828 /* We can't flush extra_mb just yet -- since it'll round up bit number */ | |
829 extra_bits = get_bit_count(&extra_vs); | |
830 if (extra_bits > free_vs_bits && QNO) { /* FIXME: very crude trial-and-error */ | |
831 QNO--; | |
832 goto encode_vs; | |
833 } | |
834 | |
835 if (extra_bits) { | |
836 flush_put_bits(&extra_vs); | |
837 dv_redistr_bits(&enc_blks[0], 5*6, extra_vs_data, extra_bits, NULL); | |
838 } | |
839 | |
840 for (i=0; i<6*5; i++) { | |
841 flush_put_bits(&enc_blks[i].pb); | |
842 #ifdef VLC_DEBUG | |
843 printf("[%d:%d] qno=%d cno=%d\n", i/6, i%6, enc_blks[i].qno, enc_blks[i].cno); | |
844 #endif | |
845 } | |
846 } | 913 } |
847 | 914 |
848 /* NOTE: exactly one frame must be given (120000 bytes for NTSC, | 915 /* NOTE: exactly one frame must be given (120000 bytes for NTSC, |
849 144000 bytes for PAL) */ | 916 144000 bytes for PAL) */ |
850 static int dvvideo_decode_frame(AVCodecContext *avctx, | 917 static int dvvideo_decode_frame(AVCodecContext *avctx, |