comparison h264.h @ 10852:86d7ab878805 libavcodec

Get rid of #include "svq3.c" functions called more than per mb are moved into the header, scan8 is also as it must be known at compiletime. The code after this patch duplicates h264data.h, this has been done to minimize the changes in this step and allow more fine grained benchmarking. Speedwise this is 1% faster on my pentium dual core with diegos cursed cathedral sample.
author michael
date Tue, 12 Jan 2010 05:30:31 +0000
parents e5905bfa625d
children f6fc6ace95e3
comparison
equal deleted inserted replaced
10851:07dbba7c16ec 10852:86d7ab878805
591 /** 591 /**
592 * reconstructs bitstream slice_type. 592 * reconstructs bitstream slice_type.
593 */ 593 */
594 int ff_h264_get_slice_type(H264Context *h); 594 int ff_h264_get_slice_type(H264Context *h);
595 595
596 /**
597 * allocates tables.
598 * needs width/height
599 */
600 int ff_h264_alloc_tables(H264Context *h);
601
602 /**
603 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
604 */
605 int ff_h264_check_intra_pred_mode(H264Context *h, int mode);
606
607 void ff_h264_write_back_intra_pred_mode(H264Context *h);
608 void ff_h264_hl_decode_mb(H264Context *h);
609 int ff_h264_frame_start(H264Context *h);
610 av_cold int ff_h264_decode_init(AVCodecContext *avctx);
611 av_cold int ff_h264_decode_end(AVCodecContext *avctx);
612
613 /*
614 o-o o-o
615 / / /
616 o-o o-o
617 ,---'
618 o-o o-o
619 / / /
620 o-o o-o
621 */
622 //This table must be here because scan8[constant] must be known at compiletime
623 static const uint8_t scan8[16 + 2*4]={
624 4+1*8, 5+1*8, 4+2*8, 5+2*8,
625 6+1*8, 7+1*8, 6+2*8, 7+2*8,
626 4+3*8, 5+3*8, 4+4*8, 5+4*8,
627 6+3*8, 7+3*8, 6+4*8, 7+4*8,
628 1+1*8, 2+1*8,
629 1+2*8, 2+2*8,
630 1+4*8, 2+4*8,
631 1+5*8, 2+5*8,
632 };
633
634 static av_always_inline uint32_t pack16to32(int a, int b){
635 #if HAVE_BIGENDIAN
636 return (b&0xFFFF) + (a<<16);
637 #else
638 return (a&0xFFFF) + (b<<16);
639 #endif
640 }
641
642 /**
643 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks.
644 */
645 static inline int check_intra4x4_pred_mode(H264Context *h){
646 MpegEncContext * const s = &h->s;
647 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0};
648 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED};
649 int i;
650
651 if(!(h->top_samples_available&0x8000)){
652 for(i=0; i<4; i++){
653 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ];
654 if(status<0){
655 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
656 return -1;
657 } else if(status){
658 h->intra4x4_pred_mode_cache[scan8[0] + i]= status;
659 }
660 }
661 }
662
663 if((h->left_samples_available&0x8888)!=0x8888){
664 static const int mask[4]={0x8000,0x2000,0x80,0x20};
665 for(i=0; i<4; i++){
666 if(!(h->left_samples_available&mask[i])){
667 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ];
668 if(status<0){
669 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y);
670 return -1;
671 } else if(status){
672 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status;
673 }
674 }
675 }
676 }
677
678 return 0;
679 } //FIXME cleanup like ff_h264_check_intra_pred_mode
680
681 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){
682 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ];
683 MpegEncContext *s = &h->s;
684
685 /* there is no consistent mapping of mvs to neighboring locations that will
686 * make mbaff happy, so we can't move all this logic to fill_caches */
687 if(FRAME_MBAFF){
688 const uint32_t *mb_types = s->current_picture_ptr->mb_type;
689 const int16_t *mv;
690 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0;
691 *C = h->mv_cache[list][scan8[0]-2];
692
693 if(!MB_FIELD
694 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){
695 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3);
696 if(IS_INTERLACED(mb_types[topright_xy])){
697 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\
698 const int x4 = X4, y4 = Y4;\
699 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\
700 if(!USES_LIST(mb_type,list))\
701 return LIST_NOT_USED;\
702 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\
703 h->mv_cache[list][scan8[0]-2][0] = mv[0];\
704 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\
705 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP;
706
707 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1);
708 }
709 }
710 if(topright_ref == PART_NOT_AVAILABLE
711 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4
712 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){
713 if(!MB_FIELD
714 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){
715 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1);
716 }
717 if(MB_FIELD
718 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]])
719 && i >= scan8[0]+8){
720 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK.
721 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2);
722 }
723 }
724 #undef SET_DIAG_MV
725 }
726
727 if(topright_ref != PART_NOT_AVAILABLE){
728 *C= h->mv_cache[list][ i - 8 + part_width ];
729 return topright_ref;
730 }else{
731 tprintf(s->avctx, "topright MV not available\n");
732
733 *C= h->mv_cache[list][ i - 8 - 1 ];
734 return h->ref_cache[list][ i - 8 - 1 ];
735 }
736 }
737
738 /**
739 * gets the predicted MV.
740 * @param n the block index
741 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4)
742 * @param mx the x component of the predicted motion vector
743 * @param my the y component of the predicted motion vector
744 */
745 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){
746 const int index8= scan8[n];
747 const int top_ref= h->ref_cache[list][ index8 - 8 ];
748 const int left_ref= h->ref_cache[list][ index8 - 1 ];
749 const int16_t * const A= h->mv_cache[list][ index8 - 1 ];
750 const int16_t * const B= h->mv_cache[list][ index8 - 8 ];
751 const int16_t * C;
752 int diagonal_ref, match_count;
753
754 assert(part_width==1 || part_width==2 || part_width==4);
755
756 /* mv_cache
757 B . . A T T T T
758 U . . L . . , .
759 U . . L . . . .
760 U . . L . . , .
761 . . . L . . . .
762 */
763
764 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width);
765 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref);
766 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count);
767 if(match_count > 1){ //most common
768 *mx= mid_pred(A[0], B[0], C[0]);
769 *my= mid_pred(A[1], B[1], C[1]);
770 }else if(match_count==1){
771 if(left_ref==ref){
772 *mx= A[0];
773 *my= A[1];
774 }else if(top_ref==ref){
775 *mx= B[0];
776 *my= B[1];
777 }else{
778 *mx= C[0];
779 *my= C[1];
780 }
781 }else{
782 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){
783 *mx= A[0];
784 *my= A[1];
785 }else{
786 *mx= mid_pred(A[0], B[0], C[0]);
787 *my= mid_pred(A[1], B[1], C[1]);
788 }
789 }
790
791 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list);
792 }
793
794
596 #endif /* AVCODEC_H264_H */ 795 #endif /* AVCODEC_H264_H */