comparison dv.c @ 2847:712c84b9d8ce libavcodec

optimizations dc coeff rounding fix class=3 num of bits fix do interlaced check & idct only if CODEC_FLAG_INTERLACED_DCT
author michael
date Tue, 30 Aug 2005 20:03:19 +0000
parents d9f4b93e81c5
children c4311f623fd5
comparison
equal deleted inserted replaced
2846:40765c51a7a9 2847:712c84b9d8ce
32 #include "dsputil.h" 32 #include "dsputil.h"
33 #include "mpegvideo.h" 33 #include "mpegvideo.h"
34 #include "simple_idct.h" 34 #include "simple_idct.h"
35 #include "dvdata.h" 35 #include "dvdata.h"
36 36
37 //#undef NDEBUG
38 //#include <assert.h>
39
37 typedef struct DVVideoContext { 40 typedef struct DVVideoContext {
38 const DVprofile* sys; 41 const DVprofile* sys;
39 AVFrame picture; 42 AVFrame picture;
43 AVCodecContext *avctx;
40 uint8_t *buf; 44 uint8_t *buf;
41 45
42 uint8_t dv_zigzag[2][64]; 46 uint8_t dv_zigzag[2][64];
43 uint8_t dv_idct_shift[2][2][22][64]; 47 uint8_t dv_idct_shift[2][2][22][64];
44 48
52 #ifdef DV_CODEC_TINY_TARGET 56 #ifdef DV_CODEC_TINY_TARGET
53 #define DV_VLC_MAP_RUN_SIZE 15 57 #define DV_VLC_MAP_RUN_SIZE 15
54 #define DV_VLC_MAP_LEV_SIZE 23 58 #define DV_VLC_MAP_LEV_SIZE 23
55 #else 59 #else
56 #define DV_VLC_MAP_RUN_SIZE 64 60 #define DV_VLC_MAP_RUN_SIZE 64
57 #define DV_VLC_MAP_LEV_SIZE 512 61 #define DV_VLC_MAP_LEV_SIZE 512 //FIXME sign was removed so this should be /2 but needs check
58 #endif 62 #endif
59 63
60 /* MultiThreading */ 64 /* MultiThreading */
61 static uint8_t** dv_anchor; 65 static uint8_t** dv_anchor;
62 66
231 235
232 /* FIXME: I really don't think this should be here */ 236 /* FIXME: I really don't think this should be here */
233 if (dv_codec_profile(avctx)) 237 if (dv_codec_profile(avctx))
234 avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt; 238 avctx->pix_fmt = dv_codec_profile(avctx)->pix_fmt;
235 avctx->coded_frame = &s->picture; 239 avctx->coded_frame = &s->picture;
240 s->avctx= avctx;
236 241
237 return 0; 242 return 0;
238 } 243 }
239 244
240 // #define VLC_DEBUG 245 // #define VLC_DEBUG
330 #endif 335 #endif
331 pos += run; 336 pos += run;
332 if (pos >= 64) 337 if (pos >= 64)
333 break; 338 break;
334 339
335 if (level) { 340 assert(level);
336 pos1 = scan_table[pos]; 341 pos1 = scan_table[pos];
337 block[pos1] = level << shift_table[pos1]; 342 block[pos1] = level << shift_table[pos1];
338 }
339 343
340 UPDATE_CACHE(re, gb); 344 UPDATE_CACHE(re, gb);
341 } 345 }
342 CLOSE_READER(re, gb); 346 CLOSE_READER(re, gb);
343 mb->pos = pos; 347 mb->pos = pos;
344 } 348 }
345 349
346 static inline void bit_copy(PutBitContext *pb, GetBitContext *gb) 350 static inline void bit_copy(PutBitContext *pb, GetBitContext *gb)
347 { 351 {
348 int bits_left = get_bits_left(gb); 352 int bits_left = get_bits_left(gb);
349 while (bits_left >= 16) { 353 while (bits_left >= MIN_CACHE_BITS) {
350 put_bits(pb, 16, get_bits(gb, 16)); 354 put_bits(pb, MIN_CACHE_BITS, get_bits(gb, MIN_CACHE_BITS));
351 bits_left -= 16; 355 bits_left -= MIN_CACHE_BITS;
352 } 356 }
353 if (bits_left > 0) { 357 if (bits_left > 0) {
354 put_bits(pb, bits_left, get_bits(gb, bits_left)); 358 put_bits(pb, bits_left, get_bits(gb, bits_left));
355 } 359 }
356 } 360 }
369 uint8_t *buf_ptr; 373 uint8_t *buf_ptr;
370 PutBitContext pb, vs_pb; 374 PutBitContext pb, vs_pb;
371 GetBitContext gb; 375 GetBitContext gb;
372 BlockInfo mb_data[5 * 6], *mb, *mb1; 376 BlockInfo mb_data[5 * 6], *mb, *mb1;
373 DCTELEM sblock[5*6][64] __align8; 377 DCTELEM sblock[5*6][64] __align8;
374 uint8_t mb_bit_buffer[80 + 4]; /* allow some slack */ 378 uint8_t mb_bit_buffer[80 + 4] __align8; /* allow some slack */
375 uint8_t vs_bit_buffer[5 * 80 + 4]; /* allow some slack */ 379 uint8_t vs_bit_buffer[5 * 80 + 4] __align8; /* allow some slack */
376 380
381 assert((((int)mb_bit_buffer)&7)==0);
382 assert((((int)vs_bit_buffer)&7)==0);
383
377 memset(sblock, 0, sizeof(sblock)); 384 memset(sblock, 0, sizeof(sblock));
378 385
379 /* pass 1 : read DC and AC coefficients in blocks */ 386 /* pass 1 : read DC and AC coefficients in blocks */
380 buf_ptr = buf_ptr1; 387 buf_ptr = buf_ptr1;
381 block1 = &sblock[0][0]; 388 block1 = &sblock[0][0];
523 } 530 }
524 } 531 }
525 532
526 #ifdef DV_CODEC_TINY_TARGET 533 #ifdef DV_CODEC_TINY_TARGET
527 /* Converts run and level (where level != 0) pair into vlc, returning bit size */ 534 /* Converts run and level (where level != 0) pair into vlc, returning bit size */
528 static always_inline int dv_rl2vlc(int run, int l, uint32_t* vlc) 535 static always_inline int dv_rl2vlc(int run, int level, int sign, uint32_t* vlc)
529 { 536 {
530 int sign = l >> 8;
531 int level = (l ^ sign) - sign;
532 int size; 537 int size;
533
534 sign = (sign & 1);
535
536 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { 538 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
537 *vlc = dv_vlc_map[run][level].vlc | sign; 539 *vlc = dv_vlc_map[run][level].vlc | sign;
538 size = dv_vlc_map[run][level].size; 540 size = dv_vlc_map[run][level].size;
539 } 541 }
540 else { 542 else {
553 } 555 }
554 556
555 return size; 557 return size;
556 } 558 }
557 559
558 static always_inline int dv_rl2vlc_size(int run, int l) 560 static always_inline int dv_rl2vlc_size(int run, int level)
559 { 561 {
560 int level = (l ^ (l >> 8)) - (l >> 8);
561 int size; 562 int size;
562 563
563 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) { 564 if (run < DV_VLC_MAP_RUN_SIZE && level < DV_VLC_MAP_LEV_SIZE) {
564 size = dv_vlc_map[run][level].size; 565 size = dv_vlc_map[run][level].size;
565 } 566 }
570 } 571 }
571 } 572 }
572 return size; 573 return size;
573 } 574 }
574 #else 575 #else
575 static always_inline int dv_rl2vlc(int run, int l, uint32_t* vlc) 576 static always_inline int dv_rl2vlc(int run, int l, int sign, uint32_t* vlc)
576 { 577 {
577 *vlc = dv_vlc_map[run][((uint16_t)l)&0x1ff].vlc; 578 *vlc = dv_vlc_map[run][l].vlc | sign;
578 return dv_vlc_map[run][((uint16_t)l)&0x1ff].size; 579 return dv_vlc_map[run][l].size;
579 } 580 }
580 581
581 static always_inline int dv_rl2vlc_size(int run, int l) 582 static always_inline int dv_rl2vlc_size(int run, int l)
582 { 583 {
583 return dv_vlc_map[run][((uint16_t)l)&0x1ff].size; 584 return dv_vlc_map[run][l].size;
584 } 585 }
585 #endif 586 #endif
586 587
587 typedef struct EncBlockInfo { 588 typedef struct EncBlockInfo {
588 int area_q[4]; 589 int area_q[4];
589 int bit_size[4]; 590 int bit_size[4];
590 int prev_run[4]; 591 int prev[5];
591 int cur_ac; 592 int cur_ac;
592 int cno; 593 int cno;
593 int dct_mode; 594 int dct_mode;
594 DCTELEM *mb; 595 DCTELEM mb[64];
596 uint8_t next[64];
597 uint8_t sign[64];
595 uint8_t partial_bit_count; 598 uint8_t partial_bit_count;
596 uint32_t partial_bit_buffer; /* we can't use uint16_t here */ 599 uint32_t partial_bit_buffer; /* we can't use uint16_t here */
597 } EncBlockInfo; 600 } EncBlockInfo;
598 601
599 static always_inline void dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool, 602 static always_inline PutBitContext* dv_encode_ac(EncBlockInfo* bi, PutBitContext* pb_pool,
600 int pb_size) 603 PutBitContext* pb_end)
601 { 604 {
602 int run; 605 int prev;
603 int bits_left; 606 int bits_left;
604 PutBitContext* pb = pb_pool; 607 PutBitContext* pb = pb_pool;
605 int size = bi->partial_bit_count; 608 int size = bi->partial_bit_count;
606 uint32_t vlc = bi->partial_bit_buffer; 609 uint32_t vlc = bi->partial_bit_buffer;
607 610
608 bi->partial_bit_count = bi->partial_bit_buffer = 0; 611 bi->partial_bit_count = bi->partial_bit_buffer = 0;
609 vlc_loop: 612 for(;;){
610 /* Find suitable storage space */ 613 /* Find suitable storage space */
611 for (; size > (bits_left = put_bits_left(pb)); pb++) { 614 for (; size > (bits_left = put_bits_left(pb)); pb++) {
612 if (bits_left) { 615 if (bits_left) {
613 size -= bits_left; 616 size -= bits_left;
614 put_bits(pb, bits_left, vlc >> size); 617 put_bits(pb, bits_left, vlc >> size);
615 vlc = vlc & ((1<<size)-1); 618 vlc = vlc & ((1<<size)-1);
616 } 619 }
617 if (pb_size == 1) { 620 if (pb + 1 >= pb_end) {
618 bi->partial_bit_count = size; 621 bi->partial_bit_count = size;
619 bi->partial_bit_buffer = vlc; 622 bi->partial_bit_buffer = vlc;
620 return; 623 return pb;
621 } 624 }
622 --pb_size;
623 } 625 }
624 626
625 /* Store VLC */ 627 /* Store VLC */
626 put_bits(pb, size, vlc); 628 put_bits(pb, size, vlc);
627 629
630 if(bi->cur_ac>=64)
631 break;
632
628 /* Construct the next VLC */ 633 /* Construct the next VLC */
629 run = 0; 634 prev= bi->cur_ac;
630 for (; bi->cur_ac < 64; bi->cur_ac++, run++) { 635 bi->cur_ac = bi->next[prev];
631 if (bi->mb[bi->cur_ac]) { 636 if(bi->cur_ac < 64){
632 size = dv_rl2vlc(run, bi->mb[bi->cur_ac], &vlc); 637 size = dv_rl2vlc(bi->cur_ac - prev - 1, bi->mb[bi->cur_ac], bi->sign[bi->cur_ac], &vlc);
633 bi->cur_ac++; 638 } else {
634 goto vlc_loop; 639 size = 4; vlc = 6; /* End Of Block stamp */
635 }
636 } 640 }
637 641 }
638 if (bi->cur_ac == 64) { 642 return pb;
639 size = 4; vlc = 6; /* End Of Block stamp */
640 bi->cur_ac++;
641 goto vlc_loop;
642 }
643 } 643 }
644 644
645 static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi, 645 static always_inline void dv_set_class_number(DCTELEM* blk, EncBlockInfo* bi,
646 const uint8_t* zigzag_scan, int bias) 646 const uint8_t* zigzag_scan, int bias)
647 { 647 {
648 int i, area; 648 int i, area;
649 int run; 649 static const int classes[] = {12, 24, 36, 0xffff};
650 int classes[] = {12, 24, 36, 0xffff}; 650 int max=12;
651 651 int prev=0;
652 run = 0; 652
653 bi->mb[0] = blk[0]; 653 bi->mb[0] = blk[0];
654 bi->cno = 0; 654
655 for (area = 0; area < 4; area++) { 655 for (area = 0; area < 4; area++) {
656 bi->prev_run[area] = run; 656 bi->prev[area] = prev;
657 bi->bit_size[area] = 0; 657 bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
658 for (i=mb_area_start[area]; i<mb_area_start[area+1]; i++) { 658 for (i=mb_area_start[area]; i<mb_area_start[area+1]; i++) {
659 bi->mb[i] = (blk[zigzag_scan[i]] / 16); 659 int level = blk[zigzag_scan[i]];
660 while ((bi->mb[i] ^ (bi->mb[i] >> 8)) > classes[bi->cno])
661 bi->cno++;
662 660
663 if (bi->mb[i]) { 661 if (level+15 > 30U) {
664 bi->bit_size[area] += dv_rl2vlc_size(run, bi->mb[i]); 662 bi->sign[i] = (level>>31)&1;
665 run = 0; 663 bi->mb[i] = level= ABS(level)>>4;
666 } else 664 if(level>max) max= level;
667 ++run; 665 bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, level);
666 bi->next[prev]= i;
667 prev= i;
668 }
668 } 669 }
669 } 670 }
670 bi->bit_size[3] += 4; /* EOB marker */ 671 bi->next[prev]= i;
672 for(bi->cno = 0; max > classes[bi->cno]; bi->cno++);
673
671 bi->cno += bias; 674 bi->cno += bias;
672 675
673 if (bi->cno >= 3) { /* FIXME: we have to recreate bit_size[], prev_run[] */ 676 if (bi->cno >= 3) {
674 bi->cno = 3; 677 bi->cno = 3;
675 for (i=1; i<64; i++) 678 prev=0;
676 bi->mb[i] /= 2; 679 i= bi->next[prev];
677 } 680 for (area = 0; area < 4; area++) {
678 } 681 bi->prev[area] = prev;
679 682 bi->bit_size[area] = 1; // 4 areas 4 bits for EOB :)
683 for (; i<mb_area_start[area+1]; i= bi->next[i]) {
684 bi->mb[i] >>=1;
685
686 if (bi->mb[i]) {
687 bi->bit_size[area] += dv_rl2vlc_size(i - prev - 1, bi->mb[i]);
688 bi->next[prev]= i;
689 prev= i;
690 }
691 }
692 }
693 bi->next[prev]= i;
694 }
695 }
696
697 //FIXME replace this by dsputil
680 #define SC(x, y) ((s[x] - s[y]) ^ ((s[x] - s[y]) >> 7)) 698 #define SC(x, y) ((s[x] - s[y]) ^ ((s[x] - s[y]) >> 7))
681 static always_inline int dv_guess_dct_mode(DCTELEM *blk) { 699 static always_inline int dv_guess_dct_mode(DCTELEM *blk) {
682 DCTELEM *s; 700 DCTELEM *s;
683 int score88 = 0; 701 int score88 = 0;
684 int score248 = 0; 702 int score248 = 0;
703 } 721 }
704 722
705 static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos) 723 static inline void dv_guess_qnos(EncBlockInfo* blks, int* qnos)
706 { 724 {
707 int size[5]; 725 int size[5];
708 int i, j, k, a, run; 726 int i, j, k, a, prev;
709 EncBlockInfo* b; 727 EncBlockInfo* b;
710 728
711 do { 729 do {
712 b = blks; 730 b = blks;
713 for (i=0; i<5; i++) { 731 for (i=0; i<5; i++) {
714 if (!qnos[i]) 732 if (!qnos[i])
715 continue; 733 continue;
717 qnos[i]--; 735 qnos[i]--;
718 size[i] = 0; 736 size[i] = 0;
719 for (j=0; j<6; j++, b++) { 737 for (j=0; j<6; j++, b++) {
720 for (a=0; a<4; a++) { 738 for (a=0; a<4; a++) {
721 if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) { 739 if (b->area_q[a] != dv_quant_shifts[qnos[i] + dv_quant_offset[b->cno]][a]) {
722 b->bit_size[a] = (a==3)?4:0; 740 b->bit_size[a] = 1; // 4 areas 4 bits for EOB :)
723 b->area_q[a]++; 741 b->area_q[a]++;
724 run = b->prev_run[a]; 742 prev= b->prev[a];
725 for (k=mb_area_start[a]; k<mb_area_start[a+1]; k++) { 743 for (k= b->next[prev] ; k<mb_area_start[a+1]; k= b->next[k]) {
726 b->mb[k] /= 2; 744 b->mb[k] >>= 1;
727 if (b->mb[k]) { 745 if (b->mb[k]) {
728 b->bit_size[a] += dv_rl2vlc_size(run, b->mb[k]); 746 b->bit_size[a] += dv_rl2vlc_size(k - prev - 1, b->mb[k]);
729 run = 0; 747 prev= k;
730 } else 748 } else {
731 ++run; 749 b->next[prev] = b->next[k];
750 }
732 } 751 }
752 b->prev[a+1]= prev;
733 } 753 }
734 size[i] += b->bit_size[a]; 754 size[i] += b->bit_size[a];
735 } 755 }
736 } 756 }
737 } 757 }
753 uint8_t* y_ptr; 773 uint8_t* y_ptr;
754 uint8_t* data; 774 uint8_t* data;
755 uint8_t* ptr; 775 uint8_t* ptr;
756 int do_edge_wrap; 776 int do_edge_wrap;
757 DCTELEM block[64] __align8; 777 DCTELEM block[64] __align8;
758 DCTELEM sblock[5*6][64] __align8;
759 EncBlockInfo enc_blks[5*6]; 778 EncBlockInfo enc_blks[5*6];
760 PutBitContext pbs[5*6]; 779 PutBitContext pbs[5*6];
761 PutBitContext* pb; 780 PutBitContext* pb;
762 EncBlockInfo* enc_blk; 781 EncBlockInfo* enc_blk;
763 int vs_bit_size = 0; 782 int vs_bit_size = 0;
764 int qnos[5]; 783 int qnos[5];
784
785 assert((((int)block) & 7) == 0);
765 786
766 enc_blk = &enc_blks[0]; 787 enc_blk = &enc_blks[0];
767 pb = &pbs[0]; 788 pb = &pbs[0];
768 for(mb_index = 0; mb_index < 5; mb_index++) { 789 for(mb_index = 0; mb_index < 5; mb_index++) {
769 v = *mb_pos_ptr++; 790 v = *mb_pos_ptr++;
806 } 827 }
807 } else { /* Simple copy: 8x8 -> 8x8 */ 828 } else { /* Simple copy: 8x8 -> 8x8 */
808 s->get_pixels(block, data, linesize); 829 s->get_pixels(block, data, linesize);
809 } 830 }
810 831
811 enc_blk->dct_mode = dv_guess_dct_mode(block); 832 if(s->avctx->flags & CODEC_FLAG_INTERLACED_DCT)
812 enc_blk->mb = &sblock[mb_index*6+j][0]; 833 enc_blk->dct_mode = dv_guess_dct_mode(block);
834 else
835 enc_blk->dct_mode = 0;
813 enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0; 836 enc_blk->area_q[0] = enc_blk->area_q[1] = enc_blk->area_q[2] = enc_blk->area_q[3] = 0;
814 enc_blk->partial_bit_count = 0; 837 enc_blk->partial_bit_count = 0;
815 enc_blk->partial_bit_buffer = 0; 838 enc_blk->partial_bit_buffer = 0;
816 enc_blk->cur_ac = 1; 839 enc_blk->cur_ac = 0;
817 840
818 s->fdct[enc_blk->dct_mode](block); 841 s->fdct[enc_blk->dct_mode](block);
819 842
820 dv_set_class_number(block, enc_blk, 843 dv_set_class_number(block, enc_blk,
821 enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, 844 enc_blk->dct_mode ? ff_zigzag248_direct : ff_zigzag_direct, j/4);
822 j/4*(j%2));
823 845
824 init_put_bits(pb, ptr, block_sizes[j]/8); 846 init_put_bits(pb, ptr, block_sizes[j]/8);
825 put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024) >> 2)); 847 put_bits(pb, 9, (uint16_t)(((enc_blk->mb[0] >> 3) - 1024 + 2) >> 2));
826 put_bits(pb, 1, enc_blk->dct_mode); 848 put_bits(pb, 1, enc_blk->dct_mode);
827 put_bits(pb, 2, enc_blk->cno); 849 put_bits(pb, 2, enc_blk->cno);
828 850
829 vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] + 851 vs_bit_size += enc_blk->bit_size[0] + enc_blk->bit_size[1] +
830 enc_blk->bit_size[2] + enc_blk->bit_size[3]; 852 enc_blk->bit_size[2] + enc_blk->bit_size[3];
841 dif[i*80 + 3] = qnos[i]; 863 dif[i*80 + 3] = qnos[i];
842 } 864 }
843 865
844 /* First pass over individual cells only */ 866 /* First pass over individual cells only */
845 for (j=0; j<5*6; j++) 867 for (j=0; j<5*6; j++)
846 dv_encode_ac(&enc_blks[j], &pbs[j], 1); 868 dv_encode_ac(&enc_blks[j], &pbs[j], &pbs[j+1]);
847 869
848 /* Second pass over each MB space */ 870 /* Second pass over each MB space */
871 for (j=0; j<5*6; j+=6) {
872 pb= &pbs[j];
873 for (i=0; i<6; i++) {
874 if (enc_blks[i+j].partial_bit_count)
875 pb=dv_encode_ac(&enc_blks[i+j], pb, &pbs[j+6]);
876 }
877 }
878
879 /* Third and final pass over the whole vides segment space */
880 pb= &pbs[0];
849 for (j=0; j<5*6; j++) { 881 for (j=0; j<5*6; j++) {
850 if (enc_blks[j].cur_ac < 65 || enc_blks[j].partial_bit_count) 882 if (enc_blks[j].partial_bit_count)
851 dv_encode_ac(&enc_blks[j], &pbs[(j/6)*6], 6); 883 pb=dv_encode_ac(&enc_blks[j], pb, &pbs[6*5]);
852 }
853
854 /* Third and final pass over the whole vides segment space */
855 for (j=0; j<5*6; j++) {
856 if (enc_blks[j].cur_ac < 65 || enc_blks[j].partial_bit_count)
857 dv_encode_ac(&enc_blks[j], &pbs[0], 6*5);
858 } 884 }
859 885
860 for (j=0; j<5*6; j++) 886 for (j=0; j<5*6; j++)
861 flush_put_bits(&pbs[j]); 887 flush_put_bits(&pbs[j]);
862 } 888 }