Mercurial > libavcodec.hg
comparison h264.h @ 10852:86d7ab878805 libavcodec
Get rid of #include "svq3.c"
functions called more than per mb are moved into the header, scan8 is also
as it must be known at compiletime.
The code after this patch duplicates h264data.h, this has been done to minimize
the changes in this step and allow more fine grained benchmarking.
Speedwise this is 1% faster on my pentium dual core with diegos cursed cathedral
sample.
author | michael |
---|---|
date | Tue, 12 Jan 2010 05:30:31 +0000 |
parents | e5905bfa625d |
children | f6fc6ace95e3 |
comparison
equal
deleted
inserted
replaced
10851:07dbba7c16ec | 10852:86d7ab878805 |
---|---|
591 /** | 591 /** |
592 * reconstructs bitstream slice_type. | 592 * reconstructs bitstream slice_type. |
593 */ | 593 */ |
594 int ff_h264_get_slice_type(H264Context *h); | 594 int ff_h264_get_slice_type(H264Context *h); |
595 | 595 |
596 /** | |
597 * allocates tables. | |
598 * needs width/height | |
599 */ | |
600 int ff_h264_alloc_tables(H264Context *h); | |
601 | |
602 /** | |
603 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. | |
604 */ | |
605 int ff_h264_check_intra_pred_mode(H264Context *h, int mode); | |
606 | |
607 void ff_h264_write_back_intra_pred_mode(H264Context *h); | |
608 void ff_h264_hl_decode_mb(H264Context *h); | |
609 int ff_h264_frame_start(H264Context *h); | |
610 av_cold int ff_h264_decode_init(AVCodecContext *avctx); | |
611 av_cold int ff_h264_decode_end(AVCodecContext *avctx); | |
612 | |
613 /* | |
614 o-o o-o | |
615 / / / | |
616 o-o o-o | |
617 ,---' | |
618 o-o o-o | |
619 / / / | |
620 o-o o-o | |
621 */ | |
622 //This table must be here because scan8[constant] must be known at compiletime | |
623 static const uint8_t scan8[16 + 2*4]={ | |
624 4+1*8, 5+1*8, 4+2*8, 5+2*8, | |
625 6+1*8, 7+1*8, 6+2*8, 7+2*8, | |
626 4+3*8, 5+3*8, 4+4*8, 5+4*8, | |
627 6+3*8, 7+3*8, 6+4*8, 7+4*8, | |
628 1+1*8, 2+1*8, | |
629 1+2*8, 2+2*8, | |
630 1+4*8, 2+4*8, | |
631 1+5*8, 2+5*8, | |
632 }; | |
633 | |
634 static av_always_inline uint32_t pack16to32(int a, int b){ | |
635 #if HAVE_BIGENDIAN | |
636 return (b&0xFFFF) + (a<<16); | |
637 #else | |
638 return (a&0xFFFF) + (b<<16); | |
639 #endif | |
640 } | |
641 | |
642 /** | |
643 * checks if the top & left blocks are available if needed & changes the dc mode so it only uses the available blocks. | |
644 */ | |
645 static inline int check_intra4x4_pred_mode(H264Context *h){ | |
646 MpegEncContext * const s = &h->s; | |
647 static const int8_t top [12]= {-1, 0,LEFT_DC_PRED,-1,-1,-1,-1,-1, 0}; | |
648 static const int8_t left[12]= { 0,-1, TOP_DC_PRED, 0,-1,-1,-1, 0,-1,DC_128_PRED}; | |
649 int i; | |
650 | |
651 if(!(h->top_samples_available&0x8000)){ | |
652 for(i=0; i<4; i++){ | |
653 int status= top[ h->intra4x4_pred_mode_cache[scan8[0] + i] ]; | |
654 if(status<0){ | |
655 av_log(h->s.avctx, AV_LOG_ERROR, "top block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); | |
656 return -1; | |
657 } else if(status){ | |
658 h->intra4x4_pred_mode_cache[scan8[0] + i]= status; | |
659 } | |
660 } | |
661 } | |
662 | |
663 if((h->left_samples_available&0x8888)!=0x8888){ | |
664 static const int mask[4]={0x8000,0x2000,0x80,0x20}; | |
665 for(i=0; i<4; i++){ | |
666 if(!(h->left_samples_available&mask[i])){ | |
667 int status= left[ h->intra4x4_pred_mode_cache[scan8[0] + 8*i] ]; | |
668 if(status<0){ | |
669 av_log(h->s.avctx, AV_LOG_ERROR, "left block unavailable for requested intra4x4 mode %d at %d %d\n", status, s->mb_x, s->mb_y); | |
670 return -1; | |
671 } else if(status){ | |
672 h->intra4x4_pred_mode_cache[scan8[0] + 8*i]= status; | |
673 } | |
674 } | |
675 } | |
676 } | |
677 | |
678 return 0; | |
679 } //FIXME cleanup like ff_h264_check_intra_pred_mode | |
680 | |
681 static inline int fetch_diagonal_mv(H264Context *h, const int16_t **C, int i, int list, int part_width){ | |
682 const int topright_ref= h->ref_cache[list][ i - 8 + part_width ]; | |
683 MpegEncContext *s = &h->s; | |
684 | |
685 /* there is no consistent mapping of mvs to neighboring locations that will | |
686 * make mbaff happy, so we can't move all this logic to fill_caches */ | |
687 if(FRAME_MBAFF){ | |
688 const uint32_t *mb_types = s->current_picture_ptr->mb_type; | |
689 const int16_t *mv; | |
690 *(uint32_t*)h->mv_cache[list][scan8[0]-2] = 0; | |
691 *C = h->mv_cache[list][scan8[0]-2]; | |
692 | |
693 if(!MB_FIELD | |
694 && (s->mb_y&1) && i < scan8[0]+8 && topright_ref != PART_NOT_AVAILABLE){ | |
695 int topright_xy = s->mb_x + (s->mb_y-1)*s->mb_stride + (i == scan8[0]+3); | |
696 if(IS_INTERLACED(mb_types[topright_xy])){ | |
697 #define SET_DIAG_MV(MV_OP, REF_OP, X4, Y4)\ | |
698 const int x4 = X4, y4 = Y4;\ | |
699 const int mb_type = mb_types[(x4>>2)+(y4>>2)*s->mb_stride];\ | |
700 if(!USES_LIST(mb_type,list))\ | |
701 return LIST_NOT_USED;\ | |
702 mv = s->current_picture_ptr->motion_val[list][x4 + y4*h->b_stride];\ | |
703 h->mv_cache[list][scan8[0]-2][0] = mv[0];\ | |
704 h->mv_cache[list][scan8[0]-2][1] = mv[1] MV_OP;\ | |
705 return s->current_picture_ptr->ref_index[list][(x4>>1) + (y4>>1)*h->b8_stride] REF_OP; | |
706 | |
707 SET_DIAG_MV(*2, >>1, s->mb_x*4+(i&7)-4+part_width, s->mb_y*4-1); | |
708 } | |
709 } | |
710 if(topright_ref == PART_NOT_AVAILABLE | |
711 && ((s->mb_y&1) || i >= scan8[0]+8) && (i&7)==4 | |
712 && h->ref_cache[list][scan8[0]-1] != PART_NOT_AVAILABLE){ | |
713 if(!MB_FIELD | |
714 && IS_INTERLACED(mb_types[h->left_mb_xy[0]])){ | |
715 SET_DIAG_MV(*2, >>1, s->mb_x*4-1, (s->mb_y|1)*4+(s->mb_y&1)*2+(i>>4)-1); | |
716 } | |
717 if(MB_FIELD | |
718 && !IS_INTERLACED(mb_types[h->left_mb_xy[0]]) | |
719 && i >= scan8[0]+8){ | |
720 // left shift will turn LIST_NOT_USED into PART_NOT_AVAILABLE, but that's OK. | |
721 SET_DIAG_MV(/2, <<1, s->mb_x*4-1, (s->mb_y&~1)*4 - 1 + ((i-scan8[0])>>3)*2); | |
722 } | |
723 } | |
724 #undef SET_DIAG_MV | |
725 } | |
726 | |
727 if(topright_ref != PART_NOT_AVAILABLE){ | |
728 *C= h->mv_cache[list][ i - 8 + part_width ]; | |
729 return topright_ref; | |
730 }else{ | |
731 tprintf(s->avctx, "topright MV not available\n"); | |
732 | |
733 *C= h->mv_cache[list][ i - 8 - 1 ]; | |
734 return h->ref_cache[list][ i - 8 - 1 ]; | |
735 } | |
736 } | |
737 | |
738 /** | |
739 * gets the predicted MV. | |
740 * @param n the block index | |
741 * @param part_width the width of the partition (4, 8,16) -> (1, 2, 4) | |
742 * @param mx the x component of the predicted motion vector | |
743 * @param my the y component of the predicted motion vector | |
744 */ | |
745 static inline void pred_motion(H264Context * const h, int n, int part_width, int list, int ref, int * const mx, int * const my){ | |
746 const int index8= scan8[n]; | |
747 const int top_ref= h->ref_cache[list][ index8 - 8 ]; | |
748 const int left_ref= h->ref_cache[list][ index8 - 1 ]; | |
749 const int16_t * const A= h->mv_cache[list][ index8 - 1 ]; | |
750 const int16_t * const B= h->mv_cache[list][ index8 - 8 ]; | |
751 const int16_t * C; | |
752 int diagonal_ref, match_count; | |
753 | |
754 assert(part_width==1 || part_width==2 || part_width==4); | |
755 | |
756 /* mv_cache | |
757 B . . A T T T T | |
758 U . . L . . , . | |
759 U . . L . . . . | |
760 U . . L . . , . | |
761 . . . L . . . . | |
762 */ | |
763 | |
764 diagonal_ref= fetch_diagonal_mv(h, &C, index8, list, part_width); | |
765 match_count= (diagonal_ref==ref) + (top_ref==ref) + (left_ref==ref); | |
766 tprintf(h->s.avctx, "pred_motion match_count=%d\n", match_count); | |
767 if(match_count > 1){ //most common | |
768 *mx= mid_pred(A[0], B[0], C[0]); | |
769 *my= mid_pred(A[1], B[1], C[1]); | |
770 }else if(match_count==1){ | |
771 if(left_ref==ref){ | |
772 *mx= A[0]; | |
773 *my= A[1]; | |
774 }else if(top_ref==ref){ | |
775 *mx= B[0]; | |
776 *my= B[1]; | |
777 }else{ | |
778 *mx= C[0]; | |
779 *my= C[1]; | |
780 } | |
781 }else{ | |
782 if(top_ref == PART_NOT_AVAILABLE && diagonal_ref == PART_NOT_AVAILABLE && left_ref != PART_NOT_AVAILABLE){ | |
783 *mx= A[0]; | |
784 *my= A[1]; | |
785 }else{ | |
786 *mx= mid_pred(A[0], B[0], C[0]); | |
787 *my= mid_pred(A[1], B[1], C[1]); | |
788 } | |
789 } | |
790 | |
791 tprintf(h->s.avctx, "pred_motion (%2d %2d %2d) (%2d %2d %2d) (%2d %2d %2d) -> (%2d %2d %2d) at %2d %2d %d list %d\n", top_ref, B[0], B[1], diagonal_ref, C[0], C[1], left_ref, A[0], A[1], ref, *mx, *my, h->s.mb_x, h->s.mb_y, n, list); | |
792 } | |
793 | |
794 | |
596 #endif /* AVCODEC_H264_H */ | 795 #endif /* AVCODEC_H264_H */ |