comparison h264.h @ 10866:d26e9b4d2ca1 libavcodec

Split cavlc out of h264.c. Seems to speed the code up a little... The placement of many generic functions between h264.c and h264.h is still open Currently they are a little randomly placed between them.
author michael
date Wed, 13 Jan 2010 01:59:19 +0000
parents e3f5eb016712
children 7101061bfa0f
comparison
equal deleted inserted replaced
10865:bcdc5343a577 10866:d26e9b4d2ca1
30 30
31 #include "dsputil.h" 31 #include "dsputil.h"
32 #include "cabac.h" 32 #include "cabac.h"
33 #include "mpegvideo.h" 33 #include "mpegvideo.h"
34 #include "h264pred.h" 34 #include "h264pred.h"
35 #include "rectangle.h"
35 36
36 #define interlaced_dct interlaced_dct_is_a_bad_name 37 #define interlaced_dct interlaced_dct_is_a_bad_name
37 #define mb_intra mb_intra_is_not_initialized_see_mb_type 38 #define mb_intra mb_intra_is_not_initialized_see_mb_type
38 39
39 #define LUMA_DC_BLOCK_INDEX 25 40 #define LUMA_DC_BLOCK_INDEX 25
640 void ff_h264_write_back_intra_pred_mode(H264Context *h); 641 void ff_h264_write_back_intra_pred_mode(H264Context *h);
641 void ff_h264_hl_decode_mb(H264Context *h); 642 void ff_h264_hl_decode_mb(H264Context *h);
642 int ff_h264_frame_start(H264Context *h); 643 int ff_h264_frame_start(H264Context *h);
643 av_cold int ff_h264_decode_init(AVCodecContext *avctx); 644 av_cold int ff_h264_decode_init(AVCodecContext *avctx);
644 av_cold int ff_h264_decode_end(AVCodecContext *avctx); 645 av_cold int ff_h264_decode_end(AVCodecContext *avctx);
646 av_cold void ff_h264_decode_init_vlc(void);
647
648 /**
649 * decodes a macroblock
650 * @returns 0 if OK, AC_ERROR / DC_ERROR / MV_ERROR if an error is noticed
651 */
652 int ff_h264_decode_mb_cavlc(H264Context *h);
645 653
646 void ff_h264_direct_dist_scale_factor(H264Context * const h); 654 void ff_h264_direct_dist_scale_factor(H264Context * const h);
647 void ff_h264_direct_ref_list_init(H264Context * const h); 655 void ff_h264_direct_ref_list_init(H264Context * const h);
648 void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type); 656 void ff_h264_pred_direct_motion(H264Context * const h, int *mb_type);
649 657
692 */ 700 */
693 static inline int get_chroma_qp(H264Context *h, int t, int qscale){ 701 static inline int get_chroma_qp(H264Context *h, int t, int qscale){
694 return h->pps.chroma_qp_table[t][qscale]; 702 return h->pps.chroma_qp_table[t][qscale];
695 } 703 }
696 704
705 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
706
707 static void fill_caches(H264Context *h, int mb_type, int for_deblock){
708 MpegEncContext * const s = &h->s;
709 const int mb_xy= h->mb_xy;
710 int topleft_xy, top_xy, topright_xy, left_xy[2];
711 int topleft_type, top_type, topright_type, left_type[2];
712 const uint8_t * left_block;
713 int topleft_partition= -1;
714 int i;
715 static const uint8_t left_block_options[4][8]={
716 {0,1,2,3,7,10,8,11},
717 {2,2,3,3,8,11,8,11},
718 {0,0,1,1,7,10,7,10},
719 {0,2,0,2,7,10,7,10}
720 };
721
722 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE);
723
724 //FIXME deblocking could skip the intra and nnz parts.
725 if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
726 return;
727
728 /* Wow, what a mess, why didn't they simplify the interlacing & intra
729 * stuff, I can't imagine that these complex rules are worth it. */
730
731 topleft_xy = top_xy - 1;
732 topright_xy= top_xy + 1;
733 left_xy[1] = left_xy[0] = mb_xy-1;
734 left_block = left_block_options[0];
735 if(FRAME_MBAFF){
736 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride;
737 const int top_pair_xy = pair_xy - s->mb_stride;
738 const int topleft_pair_xy = top_pair_xy - 1;
739 const int topright_pair_xy = top_pair_xy + 1;
740 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
741 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
742 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
743 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
744 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
745 const int bottom = (s->mb_y & 1);
746 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag);
747
748 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){
749 top_xy -= s->mb_stride;
750 }
751 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){
752 topleft_xy -= s->mb_stride;
753 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) {
754 topleft_xy += s->mb_stride;
755 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
756 topleft_partition = 0;
757 }
758 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){
759 topright_xy -= s->mb_stride;
760 }
761 if (left_mb_field_flag != curr_mb_field_flag) {
762 left_xy[1] = left_xy[0] = pair_xy - 1;
763 if (curr_mb_field_flag) {
764 left_xy[1] += s->mb_stride;
765 left_block = left_block_options[3];
766 } else {
767 left_block= left_block_options[2 - bottom];
768 }
769 }
770 }
771
772 h->top_mb_xy = top_xy;
773 h->left_mb_xy[0] = left_xy[0];
774 h->left_mb_xy[1] = left_xy[1];
775 if(for_deblock){
776 topleft_type = 0;
777 topright_type = 0;
778 top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
779 left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
780 left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
781
782 if(MB_MBAFF && !IS_INTRA(mb_type)){
783 int list;
784 for(list=0; list<h->list_count; list++){
785 //These values where changed for ease of performing MC, we need to change them back
786 //FIXME maybe we can make MC and loop filter use the same values or prevent
787 //the MC code from changing ref_cache and rather use a temporary array.
788 if(USES_LIST(mb_type,list)){
789 int8_t *ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
790 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
791 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
792 ref += h->b8_stride;
793 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
794 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref[0],ref[1])&0x00FF00FF)*0x0101;
795 }
796 }
797 }
798 }else{
799 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
800 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
801 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
802 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
803 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
804
805 if(IS_INTRA(mb_type)){
806 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
807 h->topleft_samples_available=
808 h->top_samples_available=
809 h->left_samples_available= 0xFFFF;
810 h->topright_samples_available= 0xEEEA;
811
812 if(!(top_type & type_mask)){
813 h->topleft_samples_available= 0xB3FF;
814 h->top_samples_available= 0x33FF;
815 h->topright_samples_available= 0x26EA;
816 }
817 if(IS_INTERLACED(mb_type) != IS_INTERLACED(left_type[0])){
818 if(IS_INTERLACED(mb_type)){
819 if(!(left_type[0] & type_mask)){
820 h->topleft_samples_available&= 0xDFFF;
821 h->left_samples_available&= 0x5FFF;
822 }
823 if(!(left_type[1] & type_mask)){
824 h->topleft_samples_available&= 0xFF5F;
825 h->left_samples_available&= 0xFF5F;
826 }
827 }else{
828 int left_typei = h->slice_table[left_xy[0] + s->mb_stride ] == h->slice_num
829 ? s->current_picture.mb_type[left_xy[0] + s->mb_stride] : 0;
830 assert(left_xy[0] == left_xy[1]);
831 if(!((left_typei & type_mask) && (left_type[0] & type_mask))){
832 h->topleft_samples_available&= 0xDF5F;
833 h->left_samples_available&= 0x5F5F;
834 }
835 }
836 }else{
837 if(!(left_type[0] & type_mask)){
838 h->topleft_samples_available&= 0xDF5F;
839 h->left_samples_available&= 0x5F5F;
840 }
841 }
842
843 if(!(topleft_type & type_mask))
844 h->topleft_samples_available&= 0x7FFF;
845
846 if(!(topright_type & type_mask))
847 h->topright_samples_available&= 0xFBFF;
848
849 if(IS_INTRA4x4(mb_type)){
850 if(IS_INTRA4x4(top_type)){
851 h->intra4x4_pred_mode_cache[4+8*0]= h->intra4x4_pred_mode[top_xy][4];
852 h->intra4x4_pred_mode_cache[5+8*0]= h->intra4x4_pred_mode[top_xy][5];
853 h->intra4x4_pred_mode_cache[6+8*0]= h->intra4x4_pred_mode[top_xy][6];
854 h->intra4x4_pred_mode_cache[7+8*0]= h->intra4x4_pred_mode[top_xy][3];
855 }else{
856 int pred;
857 if(!(top_type & type_mask))
858 pred= -1;
859 else{
860 pred= 2;
861 }
862 h->intra4x4_pred_mode_cache[4+8*0]=
863 h->intra4x4_pred_mode_cache[5+8*0]=
864 h->intra4x4_pred_mode_cache[6+8*0]=
865 h->intra4x4_pred_mode_cache[7+8*0]= pred;
866 }
867 for(i=0; i<2; i++){
868 if(IS_INTRA4x4(left_type[i])){
869 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[0+2*i]];
870 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= h->intra4x4_pred_mode[left_xy[i]][left_block[1+2*i]];
871 }else{
872 int pred;
873 if(!(left_type[i] & type_mask))
874 pred= -1;
875 else{
876 pred= 2;
877 }
878 h->intra4x4_pred_mode_cache[3+8*1 + 2*8*i]=
879 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
880 }
881 }
882 }
883 }
884 }
885
886
887 /*
888 0 . T T. T T T T
889 1 L . .L . . . .
890 2 L . .L . . . .
891 3 . T TL . . . .
892 4 L . .L . . . .
893 5 L . .. . . . .
894 */
895 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
896 if(top_type){
897 h->non_zero_count_cache[4+8*0]= h->non_zero_count[top_xy][4];
898 h->non_zero_count_cache[5+8*0]= h->non_zero_count[top_xy][5];
899 h->non_zero_count_cache[6+8*0]= h->non_zero_count[top_xy][6];
900 h->non_zero_count_cache[7+8*0]= h->non_zero_count[top_xy][3];
901
902 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][9];
903 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][8];
904
905 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][12];
906 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][11];
907
908 }else{
909 h->non_zero_count_cache[4+8*0]=
910 h->non_zero_count_cache[5+8*0]=
911 h->non_zero_count_cache[6+8*0]=
912 h->non_zero_count_cache[7+8*0]=
913
914 h->non_zero_count_cache[1+8*0]=
915 h->non_zero_count_cache[2+8*0]=
916
917 h->non_zero_count_cache[1+8*3]=
918 h->non_zero_count_cache[2+8*3]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
919
920 }
921
922 for (i=0; i<2; i++) {
923 if(left_type[i]){
924 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[0+2*i]];
925 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[1+2*i]];
926 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[4+2*i]];
927 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[5+2*i]];
928 }else{
929 h->non_zero_count_cache[3+8*1 + 2*8*i]=
930 h->non_zero_count_cache[3+8*2 + 2*8*i]=
931 h->non_zero_count_cache[0+8*1 + 8*i]=
932 h->non_zero_count_cache[0+8*4 + 8*i]= h->pps.cabac && !IS_INTRA(mb_type) ? 0 : 64;
933 }
934 }
935
936 if( h->pps.cabac ) {
937 // top_cbp
938 if(top_type) {
939 h->top_cbp = h->cbp_table[top_xy];
940 } else if(IS_INTRA(mb_type)) {
941 h->top_cbp = 0x1C0;
942 } else {
943 h->top_cbp = 0;
944 }
945 // left_cbp
946 if (left_type[0]) {
947 h->left_cbp = h->cbp_table[left_xy[0]] & 0x1f0;
948 } else if(IS_INTRA(mb_type)) {
949 h->left_cbp = 0x1C0;
950 } else {
951 h->left_cbp = 0;
952 }
953 if (left_type[0]) {
954 h->left_cbp |= ((h->cbp_table[left_xy[0]]>>((left_block[0]&(~1))+1))&0x1) << 1;
955 }
956 if (left_type[1]) {
957 h->left_cbp |= ((h->cbp_table[left_xy[1]]>>((left_block[2]&(~1))+1))&0x1) << 3;
958 }
959 }
960
961 #if 1
962 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
963 int list;
964 for(list=0; list<h->list_count; list++){
965 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
966 /*if(!h->mv_cache_clean[list]){
967 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
968 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
969 h->mv_cache_clean[list]= 1;
970 }*/
971 continue;
972 }
973 h->mv_cache_clean[list]= 0;
974
975 if(USES_LIST(top_type, list)){
976 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
977 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
978 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
979 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1];
980 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2];
981 *(uint32_t*)h->mv_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 3];
982 h->ref_cache[list][scan8[0] + 0 - 1*8]=
983 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
984 h->ref_cache[list][scan8[0] + 2 - 1*8]=
985 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
986 }else{
987 *(uint32_t*)h->mv_cache [list][scan8[0] + 0 - 1*8]=
988 *(uint32_t*)h->mv_cache [list][scan8[0] + 1 - 1*8]=
989 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
990 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
991 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
992 }
993
994 for(i=0; i<2; i++){
995 int cache_idx = scan8[0] - 1 + i*2*8;
996 if(USES_LIST(left_type[i], list)){
997 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
998 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
999 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]];
1000 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]];
1001 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
1002 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
1003 }else{
1004 *(uint32_t*)h->mv_cache [list][cache_idx ]=
1005 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
1006 h->ref_cache[list][cache_idx ]=
1007 h->ref_cache[list][cache_idx+8]= left_type[i] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1008 }
1009 }
1010
1011 if(for_deblock || ((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF))
1012 continue;
1013
1014 if(USES_LIST(topleft_type, list)){
1015 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride);
1016 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride);
1017 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
1018 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
1019 }else{
1020 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
1021 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1022 }
1023
1024 if(USES_LIST(topright_type, list)){
1025 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
1026 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
1027 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
1028 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
1029 }else{
1030 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
1031 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1032 }
1033
1034 if((IS_SKIP(mb_type) || IS_DIRECT(mb_type)) && !FRAME_MBAFF)
1035 continue;
1036
1037 h->ref_cache[list][scan8[5 ]+1] =
1038 h->ref_cache[list][scan8[7 ]+1] =
1039 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
1040 h->ref_cache[list][scan8[4 ]] =
1041 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
1042 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]=
1043 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]=
1044 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
1045 *(uint32_t*)h->mv_cache [list][scan8[4 ]]=
1046 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0;
1047
1048 if( h->pps.cabac ) {
1049 /* XXX beurk, Load mvd */
1050 if(USES_LIST(top_type, list)){
1051 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
1052 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0];
1053 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1];
1054 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2];
1055 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3];
1056 }else{
1057 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]=
1058 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]=
1059 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]=
1060 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0;
1061 }
1062 if(USES_LIST(left_type[0], list)){
1063 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
1064 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]];
1065 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]];
1066 }else{
1067 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]=
1068 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0;
1069 }
1070 if(USES_LIST(left_type[1], list)){
1071 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
1072 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]];
1073 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]];
1074 }else{
1075 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]=
1076 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0;
1077 }
1078 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]=
1079 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]=
1080 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else)
1081 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]=
1082 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0;
1083
1084 if(h->slice_type_nos == FF_B_TYPE){
1085 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, 0, 1);
1086
1087 if(IS_DIRECT(top_type)){
1088 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101;
1089 }else if(IS_8X8(top_type)){
1090 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
1091 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
1092 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
1093 }else{
1094 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0;
1095 }
1096
1097 if(IS_DIRECT(left_type[0]))
1098 h->direct_cache[scan8[0] - 1 + 0*8]= 1;
1099 else if(IS_8X8(left_type[0]))
1100 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)];
1101 else
1102 h->direct_cache[scan8[0] - 1 + 0*8]= 0;
1103
1104 if(IS_DIRECT(left_type[1]))
1105 h->direct_cache[scan8[0] - 1 + 2*8]= 1;
1106 else if(IS_8X8(left_type[1]))
1107 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)];
1108 else
1109 h->direct_cache[scan8[0] - 1 + 2*8]= 0;
1110 }
1111 }
1112
1113 if(FRAME_MBAFF){
1114 #define MAP_MVS\
1115 MAP_F2F(scan8[0] - 1 - 1*8, topleft_type)\
1116 MAP_F2F(scan8[0] + 0 - 1*8, top_type)\
1117 MAP_F2F(scan8[0] + 1 - 1*8, top_type)\
1118 MAP_F2F(scan8[0] + 2 - 1*8, top_type)\
1119 MAP_F2F(scan8[0] + 3 - 1*8, top_type)\
1120 MAP_F2F(scan8[0] + 4 - 1*8, topright_type)\
1121 MAP_F2F(scan8[0] - 1 + 0*8, left_type[0])\
1122 MAP_F2F(scan8[0] - 1 + 1*8, left_type[0])\
1123 MAP_F2F(scan8[0] - 1 + 2*8, left_type[1])\
1124 MAP_F2F(scan8[0] - 1 + 3*8, left_type[1])
1125 if(MB_FIELD){
1126 #define MAP_F2F(idx, mb_type)\
1127 if(!IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
1128 h->ref_cache[list][idx] <<= 1;\
1129 h->mv_cache[list][idx][1] /= 2;\
1130 h->mvd_cache[list][idx][1] /= 2;\
1131 }
1132 MAP_MVS
1133 #undef MAP_F2F
1134 }else{
1135 #define MAP_F2F(idx, mb_type)\
1136 if(IS_INTERLACED(mb_type) && h->ref_cache[list][idx] >= 0){\
1137 h->ref_cache[list][idx] >>= 1;\
1138 h->mv_cache[list][idx][1] <<= 1;\
1139 h->mvd_cache[list][idx][1] <<= 1;\
1140 }
1141 MAP_MVS
1142 #undef MAP_F2F
1143 }
1144 }
1145 }
1146 }
1147 #endif
1148
1149 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
1150 }
1151
1152 /**
1153 * gets the predicted intra4x4 prediction mode.
1154 */
1155 static inline int pred_intra_mode(H264Context *h, int n){
1156 const int index8= scan8[n];
1157 const int left= h->intra4x4_pred_mode_cache[index8 - 1];
1158 const int top = h->intra4x4_pred_mode_cache[index8 - 8];
1159 const int min= FFMIN(left, top);
1160
1161 tprintf(h->s.avctx, "mode:%d %d min:%d\n", left ,top, min);
1162
1163 if(min<0) return DC_PRED;
1164 else return min;
1165 }
1166
1167 static inline void write_back_non_zero_count(H264Context *h){
1168 const int mb_xy= h->mb_xy;
1169
1170 h->non_zero_count[mb_xy][0]= h->non_zero_count_cache[7+8*1];
1171 h->non_zero_count[mb_xy][1]= h->non_zero_count_cache[7+8*2];
1172 h->non_zero_count[mb_xy][2]= h->non_zero_count_cache[7+8*3];
1173 h->non_zero_count[mb_xy][3]= h->non_zero_count_cache[7+8*4];
1174 h->non_zero_count[mb_xy][4]= h->non_zero_count_cache[4+8*4];
1175 h->non_zero_count[mb_xy][5]= h->non_zero_count_cache[5+8*4];
1176 h->non_zero_count[mb_xy][6]= h->non_zero_count_cache[6+8*4];
1177
1178 h->non_zero_count[mb_xy][9]= h->non_zero_count_cache[1+8*2];
1179 h->non_zero_count[mb_xy][8]= h->non_zero_count_cache[2+8*2];
1180 h->non_zero_count[mb_xy][7]= h->non_zero_count_cache[2+8*1];
1181
1182 h->non_zero_count[mb_xy][12]=h->non_zero_count_cache[1+8*5];
1183 h->non_zero_count[mb_xy][11]=h->non_zero_count_cache[2+8*5];
1184 h->non_zero_count[mb_xy][10]=h->non_zero_count_cache[2+8*4];
1185 }
1186
1187 static inline void write_back_motion(H264Context *h, int mb_type){
1188 MpegEncContext * const s = &h->s;
1189 const int b_xy = 4*s->mb_x + 4*s->mb_y*h->b_stride;
1190 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride;
1191 int list;
1192
1193 if(!USES_LIST(mb_type, 0))
1194 fill_rectangle(&s->current_picture.ref_index[0][b8_xy], 2, 2, h->b8_stride, (uint8_t)LIST_NOT_USED, 1);
1195
1196 for(list=0; list<h->list_count; list++){
1197 int y;
1198 if(!USES_LIST(mb_type, list))
1199 continue;
1200
1201 for(y=0; y<4; y++){
1202 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y];
1203 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y];
1204 }
1205 if( h->pps.cabac ) {
1206 if(IS_SKIP(mb_type))
1207 fill_rectangle(h->mvd_table[list][b_xy], 4, 4, h->b_stride, 0, 4);
1208 else
1209 for(y=0; y<4; y++){
1210 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y];
1211 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y];
1212 }
1213 }
1214
1215 {
1216 int8_t *ref_index = &s->current_picture.ref_index[list][b8_xy];
1217 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]];
1218 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]];
1219 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]];
1220 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]];
1221 }
1222 }
1223
1224 if(h->slice_type_nos == FF_B_TYPE && h->pps.cabac){
1225 if(IS_8X8(mb_type)){
1226 uint8_t *direct_table = &h->direct_table[b8_xy];
1227 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0;
1228 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0;
1229 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0;
1230 }
1231 }
1232 }
1233
1234 static inline int get_dct8x8_allowed(H264Context *h){
1235 if(h->sps.direct_8x8_inference_flag)
1236 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
1237 else
1238 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
1239 }
1240
1241 static void predict_field_decoding_flag(H264Context *h){
1242 MpegEncContext * const s = &h->s;
1243 const int mb_xy= h->mb_xy;
1244 int mb_type = (h->slice_table[mb_xy-1] == h->slice_num)
1245 ? s->current_picture.mb_type[mb_xy-1]
1246 : (h->slice_table[mb_xy-s->mb_stride] == h->slice_num)
1247 ? s->current_picture.mb_type[mb_xy-s->mb_stride]
1248 : 0;
1249 h->mb_mbaff = h->mb_field_decoding_flag = IS_INTERLACED(mb_type) ? 1 : 0;
1250 }
1251
1252 /**
1253 * decodes a P_SKIP or B_SKIP macroblock
1254 */
1255 static void decode_mb_skip(H264Context *h){
1256 MpegEncContext * const s = &h->s;
1257 const int mb_xy= h->mb_xy;
1258 int mb_type=0;
1259
1260 memset(h->non_zero_count[mb_xy], 0, 16);
1261 memset(h->non_zero_count_cache + 8, 0, 8*5); //FIXME ugly, remove pfui
1262
1263 if(MB_FIELD)
1264 mb_type|= MB_TYPE_INTERLACED;
1265
1266 if( h->slice_type_nos == FF_B_TYPE )
1267 {
1268 // just for fill_caches. pred_direct_motion will set the real mb_type
1269 mb_type|= MB_TYPE_P0L0|MB_TYPE_P0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
1270
1271 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
1272 ff_h264_pred_direct_motion(h, &mb_type);
1273 mb_type|= MB_TYPE_SKIP;
1274 }
1275 else
1276 {
1277 int mx, my;
1278 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
1279
1280 fill_caches(h, mb_type, 0); //FIXME check what is needed and what not ...
1281 pred_pskip_motion(h, &mx, &my);
1282 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1283 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
1284 }
1285
1286 write_back_motion(h, mb_type);
1287 s->current_picture.mb_type[mb_xy]= mb_type;
1288 s->current_picture.qscale_table[mb_xy]= s->qscale;
1289 h->slice_table[ mb_xy ]= h->slice_num;
1290 h->prev_mb_skipped= 1;
1291 }
1292
697 #endif /* AVCODEC_H264_H */ 1293 #endif /* AVCODEC_H264_H */