Mercurial > libavcodec.hg
comparison h264.h @ 11022:44529d4292ce libavcodec
Split fill_caches() between loopfilter & decode, the 2 no longer where common
enough to justify the messy interleaving.
author | michael |
---|---|
date | Tue, 26 Jan 2010 23:54:11 +0000 |
parents | 297bd56297a9 |
children | ff5aff85ec10 |
comparison
equal
deleted
inserted
replaced
11021:2bc05f2fc993 | 11022:44529d4292ce |
---|---|
727 return h->pps.chroma_qp_table[t][qscale]; | 727 return h->pps.chroma_qp_table[t][qscale]; |
728 } | 728 } |
729 | 729 |
730 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my); | 730 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my); |
731 | 731 |
732 static av_always_inline int fill_caches(H264Context *h, int mb_type, int for_deblock){ | 732 static void fill_decode_caches(H264Context *h, int mb_type){ |
733 MpegEncContext * const s = &h->s; | 733 MpegEncContext * const s = &h->s; |
734 const int mb_xy= h->mb_xy; | 734 const int mb_xy= h->mb_xy; |
735 int topleft_xy, top_xy, topright_xy, left_xy[2]; | 735 int topleft_xy, top_xy, topright_xy, left_xy[2]; |
736 int topleft_type, top_type, topright_type, left_type[2]; | 736 int topleft_type, top_type, topright_type, left_type[2]; |
737 const uint8_t * left_block; | 737 const uint8_t * left_block; |
744 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8} | 744 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8} |
745 }; | 745 }; |
746 | 746 |
747 top_xy = mb_xy - (s->mb_stride << MB_FIELD); | 747 top_xy = mb_xy - (s->mb_stride << MB_FIELD); |
748 | 748 |
749 //FIXME deblocking could skip the intra and nnz parts. | |
750 // if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF) | |
751 // return; | |
752 | |
753 /* Wow, what a mess, why didn't they simplify the interlacing & intra | 749 /* Wow, what a mess, why didn't they simplify the interlacing & intra |
754 * stuff, I can't imagine that these complex rules are worth it. */ | 750 * stuff, I can't imagine that these complex rules are worth it. */ |
755 | 751 |
756 topleft_xy = top_xy - 1; | 752 topleft_xy = top_xy - 1; |
757 topright_xy= top_xy + 1; | 753 topright_xy= top_xy + 1; |
760 if(FRAME_MBAFF){ | 756 if(FRAME_MBAFF){ |
761 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); | 757 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); |
762 const int curr_mb_field_flag = IS_INTERLACED(mb_type); | 758 const int curr_mb_field_flag = IS_INTERLACED(mb_type); |
763 if(s->mb_y&1){ | 759 if(s->mb_y&1){ |
764 if (left_mb_field_flag != curr_mb_field_flag) { | 760 if (left_mb_field_flag != curr_mb_field_flag) { |
765 if(for_deblock){ | |
766 left_xy[0] = mb_xy - s->mb_stride - 1; | |
767 left_xy[1] = mb_xy - 1; | |
768 }else{ | |
769 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1; | 761 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1; |
770 if (curr_mb_field_flag) { | 762 if (curr_mb_field_flag) { |
771 left_xy[1] += s->mb_stride; | 763 left_xy[1] += s->mb_stride; |
772 left_block = left_block_options[3]; | 764 left_block = left_block_options[3]; |
773 } else { | 765 } else { |
774 topleft_xy += s->mb_stride; | 766 topleft_xy += s->mb_stride; |
775 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition | 767 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition |
776 topleft_partition = 0; | 768 topleft_partition = 0; |
777 left_block = left_block_options[1]; | 769 left_block = left_block_options[1]; |
778 } | 770 } |
779 } | |
780 } | 771 } |
781 }else{ | 772 }else{ |
782 if(curr_mb_field_flag){ | 773 if(curr_mb_field_flag){ |
783 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy - 1]>>7)&1)-1); | 774 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy - 1]>>7)&1)-1); |
784 topright_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy + 1]>>7)&1)-1); | 775 topright_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy + 1]>>7)&1)-1); |
785 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); | 776 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); |
786 } | 777 } |
787 if (left_mb_field_flag != curr_mb_field_flag) { | 778 if (left_mb_field_flag != curr_mb_field_flag) { |
788 if(for_deblock){ | |
789 left_xy[0] = mb_xy - 1; | |
790 left_xy[1] = mb_xy + s->mb_stride - 1; | |
791 }else{ | |
792 left_xy[1] = left_xy[0] = mb_xy - 1; | 779 left_xy[1] = left_xy[0] = mb_xy - 1; |
793 if (curr_mb_field_flag) { | 780 if (curr_mb_field_flag) { |
794 left_xy[1] += s->mb_stride; | 781 left_xy[1] += s->mb_stride; |
795 left_block = left_block_options[3]; | 782 left_block = left_block_options[3]; |
796 } else { | 783 } else { |
797 left_block = left_block_options[2]; | 784 left_block = left_block_options[2]; |
798 } | 785 } |
799 } | |
800 } | 786 } |
801 } | 787 } |
802 } | 788 } |
803 | 789 |
804 h->top_mb_xy = top_xy; | 790 h->top_mb_xy = top_xy; |
805 h->left_mb_xy[0] = left_xy[0]; | 791 h->left_mb_xy[0] = left_xy[0]; |
806 h->left_mb_xy[1] = left_xy[1]; | 792 h->left_mb_xy[1] = left_xy[1]; |
807 if(for_deblock){ | |
808 | |
809 //for sufficiently low qp, filtering wouldn't do anything | |
810 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp | |
811 int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice | |
812 int qp = s->current_picture.qscale_table[mb_xy]; | |
813 if(qp <= qp_thresh | |
814 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh) | |
815 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){ | |
816 if(!FRAME_MBAFF) | |
817 return 1; | |
818 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh) | |
819 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) | |
820 return 1; | |
821 } | |
822 | |
823 if(h->deblocking_filter == 2){ | |
824 h->top_type = top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; | |
825 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; | |
826 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; | |
827 }else{ | |
828 h->top_type = top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0; | |
829 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0; | |
830 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0; | |
831 } | |
832 if(IS_INTRA(mb_type)) | |
833 return 0; | |
834 | |
835 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); | |
836 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); | |
837 *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]); | |
838 *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]); | |
839 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); | |
840 | |
841 h->cbp= h->cbp_table[mb_xy]; | |
842 | |
843 { | |
844 int list; | |
845 for(list=0; list<h->list_count; list++){ | |
846 int8_t *ref; | |
847 int y, b_stride; | |
848 int16_t (*mv_dst)[2]; | |
849 int16_t (*mv_src)[2]; | |
850 | |
851 if(!USES_LIST(mb_type, list)){ | |
852 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); | |
853 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = | |
854 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = | |
855 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = | |
856 *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101; | |
857 continue; | |
858 } | |
859 | |
860 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; | |
861 { | |
862 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | |
863 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = | |
864 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; | |
865 ref += h->b8_stride; | |
866 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = | |
867 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; | |
868 } | |
869 | |
870 b_stride = h->b_stride; | |
871 mv_dst = &h->mv_cache[list][scan8[0]]; | |
872 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride]; | |
873 for(y=0; y<4; y++){ | |
874 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride); | |
875 } | |
876 | |
877 } | |
878 } | |
879 }else{ | |
880 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; | 793 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; |
881 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; | 794 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; |
882 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; | 795 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; |
883 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; | 796 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; |
884 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; | 797 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; |
960 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; | 873 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; |
961 } | 874 } |
962 } | 875 } |
963 } | 876 } |
964 } | 877 } |
965 } | |
966 | 878 |
967 | 879 |
968 /* | 880 /* |
969 0 . T T. T T T T | 881 0 . T T. T T T T |
970 1 L . .L . . . . | 882 1 L . .L . . . . |
974 5 L . .. . . . . | 886 5 L . .. . . . . |
975 */ | 887 */ |
976 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) | 888 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) |
977 if(top_type){ | 889 if(top_type){ |
978 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; | 890 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; |
979 if(!for_deblock){ | |
980 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; | 891 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; |
981 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; | 892 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; |
982 | 893 |
983 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; | 894 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; |
984 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; | 895 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; |
985 } | 896 }else { |
986 }else if(!for_deblock){ | |
987 h->non_zero_count_cache[1+8*0]= | 897 h->non_zero_count_cache[1+8*0]= |
988 h->non_zero_count_cache[2+8*0]= | 898 h->non_zero_count_cache[2+8*0]= |
989 | 899 |
990 h->non_zero_count_cache[1+8*3]= | 900 h->non_zero_count_cache[1+8*3]= |
991 h->non_zero_count_cache[2+8*3]= | 901 h->non_zero_count_cache[2+8*3]= |
994 | 904 |
995 for (i=0; i<2; i++) { | 905 for (i=0; i<2; i++) { |
996 if(left_type[i]){ | 906 if(left_type[i]){ |
997 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]]; | 907 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]]; |
998 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]]; | 908 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]]; |
999 if(!for_deblock){ | |
1000 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]]; | 909 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]]; |
1001 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]]; | 910 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]]; |
1002 } | 911 }else{ |
1003 }else if(!for_deblock){ | |
1004 h->non_zero_count_cache[3+8*1 + 2*8*i]= | 912 h->non_zero_count_cache[3+8*1 + 2*8*i]= |
1005 h->non_zero_count_cache[3+8*2 + 2*8*i]= | 913 h->non_zero_count_cache[3+8*2 + 2*8*i]= |
1006 h->non_zero_count_cache[0+8*1 + 8*i]= | 914 h->non_zero_count_cache[0+8*1 + 8*i]= |
1007 h->non_zero_count_cache[0+8*4 + 8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64; | 915 h->non_zero_count_cache[0+8*4 + 8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64; |
1008 } | 916 } |
1009 } | 917 } |
1010 | 918 |
1011 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs | 919 if( CABAC ) { |
1012 if(for_deblock && !CABAC && h->pps.transform_8x8_mode){ | |
1013 if(IS_8x8DCT(top_type)){ | |
1014 h->non_zero_count_cache[4+8*0]= | |
1015 h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4; | |
1016 h->non_zero_count_cache[6+8*0]= | |
1017 h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8; | |
1018 } | |
1019 if(IS_8x8DCT(left_type[0])){ | |
1020 h->non_zero_count_cache[3+8*1]= | |
1021 h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF | |
1022 } | |
1023 if(IS_8x8DCT(left_type[1])){ | |
1024 h->non_zero_count_cache[3+8*3]= | |
1025 h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF | |
1026 } | |
1027 | |
1028 if(IS_8x8DCT(mb_type)){ | |
1029 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= | |
1030 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1; | |
1031 | |
1032 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= | |
1033 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2; | |
1034 | |
1035 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= | |
1036 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4; | |
1037 | |
1038 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= | |
1039 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8; | |
1040 } | |
1041 } | |
1042 | |
1043 if( CABAC && !for_deblock) { | |
1044 // top_cbp | 920 // top_cbp |
1045 if(top_type) { | 921 if(top_type) { |
1046 h->top_cbp = h->cbp_table[top_xy]; | 922 h->top_cbp = h->cbp_table[top_xy]; |
1047 } else if(IS_INTRA(mb_type)) { | 923 } else if(IS_INTRA(mb_type)) { |
1048 h->top_cbp = 0x1C0; | 924 h->top_cbp = 0x1C0; |
1067 | 943 |
1068 #if 1 | 944 #if 1 |
1069 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ | 945 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ |
1070 int list; | 946 int list; |
1071 for(list=0; list<h->list_count; list++){ | 947 for(list=0; list<h->list_count; list++){ |
1072 if(!for_deblock && !USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){ | 948 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){ |
1073 /*if(!h->mv_cache_clean[list]){ | 949 /*if(!h->mv_cache_clean[list]){ |
1074 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? | 950 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? |
1075 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); | 951 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); |
1076 h->mv_cache_clean[list]= 1; | 952 h->mv_cache_clean[list]= 1; |
1077 }*/ | 953 }*/ |
1081 | 957 |
1082 if(USES_LIST(top_type, list)){ | 958 if(USES_LIST(top_type, list)){ |
1083 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | 959 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
1084 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; | 960 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; |
1085 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); | 961 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); |
1086 if(for_deblock){ | |
1087 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | |
1088 h->ref_cache[list][scan8[0] + 0 - 1*8]= | |
1089 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; | |
1090 h->ref_cache[list][scan8[0] + 2 - 1*8]= | |
1091 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; | |
1092 }else{ | |
1093 h->ref_cache[list][scan8[0] + 0 - 1*8]= | 962 h->ref_cache[list][scan8[0] + 0 - 1*8]= |
1094 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; | 963 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; |
1095 h->ref_cache[list][scan8[0] + 2 - 1*8]= | 964 h->ref_cache[list][scan8[0] + 2 - 1*8]= |
1096 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; | 965 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; |
1097 } | |
1098 }else{ | 966 }else{ |
1099 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); | 967 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); |
1100 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= (((for_deblock||top_type) ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; | 968 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; |
1101 } | 969 } |
1102 | 970 |
1103 if(for_deblock){ | |
1104 if(!IS_INTERLACED(mb_type^left_type[0])){ | |
1105 if(USES_LIST(left_type[0], list)){ | |
1106 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; | |
1107 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; | |
1108 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | |
1109 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0]; | |
1110 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1]; | |
1111 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2]; | |
1112 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3]; | |
1113 h->ref_cache[list][scan8[0] - 1 + 0 ]= | |
1114 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]]; | |
1115 h->ref_cache[list][scan8[0] - 1 +16 ]= | |
1116 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]]; | |
1117 }else{ | |
1118 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]= | |
1119 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]= | |
1120 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]= | |
1121 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0; | |
1122 h->ref_cache[list][scan8[0] - 1 + 0 ]= | |
1123 h->ref_cache[list][scan8[0] - 1 + 8 ]= | |
1124 h->ref_cache[list][scan8[0] - 1 + 16 ]= | |
1125 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; | |
1126 } | |
1127 } | |
1128 continue; | |
1129 }else{ | |
1130 for(i=0; i<2; i++){ | 971 for(i=0; i<2; i++){ |
1131 int cache_idx = scan8[0] - 1 + i*2*8; | 972 int cache_idx = scan8[0] - 1 + i*2*8; |
1132 if(USES_LIST(left_type[i], list)){ | 973 if(USES_LIST(left_type[i], list)){ |
1133 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; | 974 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; |
1134 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; | 975 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; |
1140 *(uint32_t*)h->mv_cache [list][cache_idx ]= | 981 *(uint32_t*)h->mv_cache [list][cache_idx ]= |
1141 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; | 982 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; |
1142 h->ref_cache[list][cache_idx ]= | 983 h->ref_cache[list][cache_idx ]= |
1143 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; | 984 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
1144 } | 985 } |
1145 } | |
1146 } | 986 } |
1147 | 987 |
1148 if((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF) | 988 if((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF) |
1149 continue; | 989 continue; |
1150 | 990 |
1275 } | 1115 } |
1276 } | 1116 } |
1277 } | 1117 } |
1278 #endif | 1118 #endif |
1279 | 1119 |
1280 if(!for_deblock) | |
1281 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); | 1120 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); |
1282 return 0; | |
1283 } | |
1284 | |
1285 static void fill_decode_caches(H264Context *h, int mb_type){ | |
1286 fill_caches(h, mb_type, 0); | |
1287 } | 1121 } |
1288 | 1122 |
1289 /** | 1123 /** |
1290 * | 1124 * |
1291 * @returns non zero if the loop filter can be skiped | 1125 * @returns non zero if the loop filter can be skiped |
1292 */ | 1126 */ |
1293 static int fill_filter_caches(H264Context *h, int mb_type){ | 1127 static int fill_filter_caches(H264Context *h, int mb_type){ |
1294 return fill_caches(h, mb_type, 1); | 1128 MpegEncContext * const s = &h->s; |
1129 const int mb_xy= h->mb_xy; | |
1130 int top_xy, left_xy[2]; | |
1131 int top_type, left_type[2]; | |
1132 int i; | |
1133 | |
1134 top_xy = mb_xy - (s->mb_stride << MB_FIELD); | |
1135 | |
1136 //FIXME deblocking could skip the intra and nnz parts. | |
1137 | |
1138 /* Wow, what a mess, why didn't they simplify the interlacing & intra | |
1139 * stuff, I can't imagine that these complex rules are worth it. */ | |
1140 | |
1141 left_xy[1] = left_xy[0] = mb_xy-1; | |
1142 if(FRAME_MBAFF){ | |
1143 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); | |
1144 const int curr_mb_field_flag = IS_INTERLACED(mb_type); | |
1145 if(s->mb_y&1){ | |
1146 if (left_mb_field_flag != curr_mb_field_flag) { | |
1147 left_xy[0] = mb_xy - s->mb_stride - 1; | |
1148 left_xy[1] = mb_xy - 1; | |
1149 } | |
1150 }else{ | |
1151 if(curr_mb_field_flag){ | |
1152 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); | |
1153 } | |
1154 if (left_mb_field_flag != curr_mb_field_flag) { | |
1155 left_xy[0] = mb_xy - 1; | |
1156 left_xy[1] = mb_xy + s->mb_stride - 1; | |
1157 } | |
1158 } | |
1159 } | |
1160 | |
1161 h->top_mb_xy = top_xy; | |
1162 h->left_mb_xy[0] = left_xy[0]; | |
1163 h->left_mb_xy[1] = left_xy[1]; | |
1164 { | |
1165 //for sufficiently low qp, filtering wouldn't do anything | |
1166 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp | |
1167 int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice | |
1168 int qp = s->current_picture.qscale_table[mb_xy]; | |
1169 if(qp <= qp_thresh | |
1170 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh) | |
1171 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){ | |
1172 if(!FRAME_MBAFF) | |
1173 return 1; | |
1174 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh) | |
1175 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh)) | |
1176 return 1; | |
1177 } | |
1178 } | |
1179 | |
1180 if(h->deblocking_filter == 2){ | |
1181 h->top_type = top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; | |
1182 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; | |
1183 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; | |
1184 }else{ | |
1185 h->top_type = top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0; | |
1186 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0; | |
1187 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0; | |
1188 } | |
1189 if(IS_INTRA(mb_type)) | |
1190 return 0; | |
1191 | |
1192 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); | |
1193 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); | |
1194 *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]); | |
1195 *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]); | |
1196 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); | |
1197 | |
1198 h->cbp= h->cbp_table[mb_xy]; | |
1199 | |
1200 { | |
1201 int list; | |
1202 for(list=0; list<h->list_count; list++){ | |
1203 int8_t *ref; | |
1204 int y, b_stride; | |
1205 int16_t (*mv_dst)[2]; | |
1206 int16_t (*mv_src)[2]; | |
1207 | |
1208 if(!USES_LIST(mb_type, list)){ | |
1209 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); | |
1210 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = | |
1211 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = | |
1212 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = | |
1213 *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101; | |
1214 continue; | |
1215 } | |
1216 | |
1217 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; | |
1218 { | |
1219 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | |
1220 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = | |
1221 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; | |
1222 ref += h->b8_stride; | |
1223 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = | |
1224 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; | |
1225 } | |
1226 | |
1227 b_stride = h->b_stride; | |
1228 mv_dst = &h->mv_cache[list][scan8[0]]; | |
1229 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride]; | |
1230 for(y=0; y<4; y++){ | |
1231 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride); | |
1232 } | |
1233 | |
1234 } | |
1235 } | |
1236 | |
1237 | |
1238 /* | |
1239 0 . T T. T T T T | |
1240 1 L . .L . . . . | |
1241 2 L . .L . . . . | |
1242 3 . T TL . . . . | |
1243 4 L . .L . . . . | |
1244 5 L . .. . . . . | |
1245 */ | |
1246 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) | |
1247 if(top_type){ | |
1248 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; | |
1249 } | |
1250 | |
1251 if(left_type[0]){ | |
1252 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; | |
1253 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; | |
1254 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8]; | |
1255 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8]; | |
1256 } | |
1257 | |
1258 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs | |
1259 if(!CABAC && h->pps.transform_8x8_mode){ | |
1260 if(IS_8x8DCT(top_type)){ | |
1261 h->non_zero_count_cache[4+8*0]= | |
1262 h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4; | |
1263 h->non_zero_count_cache[6+8*0]= | |
1264 h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8; | |
1265 } | |
1266 if(IS_8x8DCT(left_type[0])){ | |
1267 h->non_zero_count_cache[3+8*1]= | |
1268 h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF | |
1269 } | |
1270 if(IS_8x8DCT(left_type[1])){ | |
1271 h->non_zero_count_cache[3+8*3]= | |
1272 h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF | |
1273 } | |
1274 | |
1275 if(IS_8x8DCT(mb_type)){ | |
1276 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]= | |
1277 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1; | |
1278 | |
1279 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]= | |
1280 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2; | |
1281 | |
1282 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]= | |
1283 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4; | |
1284 | |
1285 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]= | |
1286 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8; | |
1287 } | |
1288 } | |
1289 | |
1290 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ | |
1291 int list; | |
1292 for(list=0; list<h->list_count; list++){ | |
1293 if(USES_LIST(top_type, list)){ | |
1294 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | |
1295 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; | |
1296 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | |
1297 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); | |
1298 h->ref_cache[list][scan8[0] + 0 - 1*8]= | |
1299 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; | |
1300 h->ref_cache[list][scan8[0] + 2 - 1*8]= | |
1301 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; | |
1302 }else{ | |
1303 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); | |
1304 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((LIST_NOT_USED)&0xFF)*0x01010101; | |
1305 } | |
1306 | |
1307 if(!IS_INTERLACED(mb_type^left_type[0])){ | |
1308 if(USES_LIST(left_type[0], list)){ | |
1309 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; | |
1310 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; | |
1311 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | |
1312 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0]; | |
1313 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1]; | |
1314 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2]; | |
1315 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3]; | |
1316 h->ref_cache[list][scan8[0] - 1 + 0 ]= | |
1317 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]]; | |
1318 h->ref_cache[list][scan8[0] - 1 +16 ]= | |
1319 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]]; | |
1320 }else{ | |
1321 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]= | |
1322 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]= | |
1323 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]= | |
1324 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0; | |
1325 h->ref_cache[list][scan8[0] - 1 + 0 ]= | |
1326 h->ref_cache[list][scan8[0] - 1 + 8 ]= | |
1327 h->ref_cache[list][scan8[0] - 1 + 16 ]= | |
1328 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; | |
1329 } | |
1330 } | |
1331 } | |
1332 } | |
1333 | |
1334 return 0; | |
1295 } | 1335 } |
1296 | 1336 |
1297 /** | 1337 /** |
1298 * gets the predicted intra4x4 prediction mode. | 1338 * gets the predicted intra4x4 prediction mode. |
1299 */ | 1339 */ |