comparison h264.h @ 11022:44529d4292ce libavcodec

Split fill_caches() between loopfilter & decode, the 2 no longer where common enough to justify the messy interleaving.
author michael
date Tue, 26 Jan 2010 23:54:11 +0000
parents 297bd56297a9
children ff5aff85ec10
comparison
equal deleted inserted replaced
11021:2bc05f2fc993 11022:44529d4292ce
727 return h->pps.chroma_qp_table[t][qscale]; 727 return h->pps.chroma_qp_table[t][qscale];
728 } 728 }
729 729
730 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my); 730 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
731 731
732 static av_always_inline int fill_caches(H264Context *h, int mb_type, int for_deblock){ 732 static void fill_decode_caches(H264Context *h, int mb_type){
733 MpegEncContext * const s = &h->s; 733 MpegEncContext * const s = &h->s;
734 const int mb_xy= h->mb_xy; 734 const int mb_xy= h->mb_xy;
735 int topleft_xy, top_xy, topright_xy, left_xy[2]; 735 int topleft_xy, top_xy, topright_xy, left_xy[2];
736 int topleft_type, top_type, topright_type, left_type[2]; 736 int topleft_type, top_type, topright_type, left_type[2];
737 const uint8_t * left_block; 737 const uint8_t * left_block;
744 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8} 744 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
745 }; 745 };
746 746
747 top_xy = mb_xy - (s->mb_stride << MB_FIELD); 747 top_xy = mb_xy - (s->mb_stride << MB_FIELD);
748 748
749 //FIXME deblocking could skip the intra and nnz parts.
750 // if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
751 // return;
752
753 /* Wow, what a mess, why didn't they simplify the interlacing & intra 749 /* Wow, what a mess, why didn't they simplify the interlacing & intra
754 * stuff, I can't imagine that these complex rules are worth it. */ 750 * stuff, I can't imagine that these complex rules are worth it. */
755 751
756 topleft_xy = top_xy - 1; 752 topleft_xy = top_xy - 1;
757 topright_xy= top_xy + 1; 753 topright_xy= top_xy + 1;
760 if(FRAME_MBAFF){ 756 if(FRAME_MBAFF){
761 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); 757 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
762 const int curr_mb_field_flag = IS_INTERLACED(mb_type); 758 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
763 if(s->mb_y&1){ 759 if(s->mb_y&1){
764 if (left_mb_field_flag != curr_mb_field_flag) { 760 if (left_mb_field_flag != curr_mb_field_flag) {
765 if(for_deblock){
766 left_xy[0] = mb_xy - s->mb_stride - 1;
767 left_xy[1] = mb_xy - 1;
768 }else{
769 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1; 761 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1;
770 if (curr_mb_field_flag) { 762 if (curr_mb_field_flag) {
771 left_xy[1] += s->mb_stride; 763 left_xy[1] += s->mb_stride;
772 left_block = left_block_options[3]; 764 left_block = left_block_options[3];
773 } else { 765 } else {
774 topleft_xy += s->mb_stride; 766 topleft_xy += s->mb_stride;
775 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition 767 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
776 topleft_partition = 0; 768 topleft_partition = 0;
777 left_block = left_block_options[1]; 769 left_block = left_block_options[1];
778 } 770 }
779 }
780 } 771 }
781 }else{ 772 }else{
782 if(curr_mb_field_flag){ 773 if(curr_mb_field_flag){
783 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy - 1]>>7)&1)-1); 774 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy - 1]>>7)&1)-1);
784 topright_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy + 1]>>7)&1)-1); 775 topright_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy + 1]>>7)&1)-1);
785 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1); 776 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
786 } 777 }
787 if (left_mb_field_flag != curr_mb_field_flag) { 778 if (left_mb_field_flag != curr_mb_field_flag) {
788 if(for_deblock){
789 left_xy[0] = mb_xy - 1;
790 left_xy[1] = mb_xy + s->mb_stride - 1;
791 }else{
792 left_xy[1] = left_xy[0] = mb_xy - 1; 779 left_xy[1] = left_xy[0] = mb_xy - 1;
793 if (curr_mb_field_flag) { 780 if (curr_mb_field_flag) {
794 left_xy[1] += s->mb_stride; 781 left_xy[1] += s->mb_stride;
795 left_block = left_block_options[3]; 782 left_block = left_block_options[3];
796 } else { 783 } else {
797 left_block = left_block_options[2]; 784 left_block = left_block_options[2];
798 } 785 }
799 }
800 } 786 }
801 } 787 }
802 } 788 }
803 789
804 h->top_mb_xy = top_xy; 790 h->top_mb_xy = top_xy;
805 h->left_mb_xy[0] = left_xy[0]; 791 h->left_mb_xy[0] = left_xy[0];
806 h->left_mb_xy[1] = left_xy[1]; 792 h->left_mb_xy[1] = left_xy[1];
807 if(for_deblock){
808
809 //for sufficiently low qp, filtering wouldn't do anything
810 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
811 int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
812 int qp = s->current_picture.qscale_table[mb_xy];
813 if(qp <= qp_thresh
814 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
815 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
816 if(!FRAME_MBAFF)
817 return 1;
818 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh)
819 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
820 return 1;
821 }
822
823 if(h->deblocking_filter == 2){
824 h->top_type = top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
825 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
826 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
827 }else{
828 h->top_type = top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
829 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
830 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
831 }
832 if(IS_INTRA(mb_type))
833 return 0;
834
835 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
836 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
837 *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]);
838 *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]);
839 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
840
841 h->cbp= h->cbp_table[mb_xy];
842
843 {
844 int list;
845 for(list=0; list<h->list_count; list++){
846 int8_t *ref;
847 int y, b_stride;
848 int16_t (*mv_dst)[2];
849 int16_t (*mv_src)[2];
850
851 if(!USES_LIST(mb_type, list)){
852 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
853 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
854 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] =
855 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
856 *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101;
857 continue;
858 }
859
860 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
861 {
862 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
863 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
864 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
865 ref += h->b8_stride;
866 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
867 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
868 }
869
870 b_stride = h->b_stride;
871 mv_dst = &h->mv_cache[list][scan8[0]];
872 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
873 for(y=0; y<4; y++){
874 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
875 }
876
877 }
878 }
879 }else{
880 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; 793 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
881 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; 794 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
882 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; 795 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
883 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; 796 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
884 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; 797 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
960 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred; 873 h->intra4x4_pred_mode_cache[3+8*2 + 2*8*i]= pred;
961 } 874 }
962 } 875 }
963 } 876 }
964 } 877 }
965 }
966 878
967 879
968 /* 880 /*
969 0 . T T. T T T T 881 0 . T T. T T T T
970 1 L . .L . . . . 882 1 L . .L . . . .
974 5 L . .. . . . . 886 5 L . .. . . . .
975 */ 887 */
976 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) 888 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
977 if(top_type){ 889 if(top_type){
978 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; 890 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8];
979 if(!for_deblock){
980 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; 891 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8];
981 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; 892 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8];
982 893
983 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; 894 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8];
984 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; 895 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8];
985 } 896 }else {
986 }else if(!for_deblock){
987 h->non_zero_count_cache[1+8*0]= 897 h->non_zero_count_cache[1+8*0]=
988 h->non_zero_count_cache[2+8*0]= 898 h->non_zero_count_cache[2+8*0]=
989 899
990 h->non_zero_count_cache[1+8*3]= 900 h->non_zero_count_cache[1+8*3]=
991 h->non_zero_count_cache[2+8*3]= 901 h->non_zero_count_cache[2+8*3]=
994 904
995 for (i=0; i<2; i++) { 905 for (i=0; i<2; i++) {
996 if(left_type[i]){ 906 if(left_type[i]){
997 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]]; 907 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
998 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]]; 908 h->non_zero_count_cache[3+8*2 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+1+2*i]];
999 if(!for_deblock){
1000 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]]; 909 h->non_zero_count_cache[0+8*1 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+4+2*i]];
1001 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]]; 910 h->non_zero_count_cache[0+8*4 + 8*i]= h->non_zero_count[left_xy[i]][left_block[8+5+2*i]];
1002 } 911 }else{
1003 }else if(!for_deblock){
1004 h->non_zero_count_cache[3+8*1 + 2*8*i]= 912 h->non_zero_count_cache[3+8*1 + 2*8*i]=
1005 h->non_zero_count_cache[3+8*2 + 2*8*i]= 913 h->non_zero_count_cache[3+8*2 + 2*8*i]=
1006 h->non_zero_count_cache[0+8*1 + 8*i]= 914 h->non_zero_count_cache[0+8*1 + 8*i]=
1007 h->non_zero_count_cache[0+8*4 + 8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64; 915 h->non_zero_count_cache[0+8*4 + 8*i]= CABAC && !IS_INTRA(mb_type) ? 0 : 64;
1008 } 916 }
1009 } 917 }
1010 918
1011 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs 919 if( CABAC ) {
1012 if(for_deblock && !CABAC && h->pps.transform_8x8_mode){
1013 if(IS_8x8DCT(top_type)){
1014 h->non_zero_count_cache[4+8*0]=
1015 h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
1016 h->non_zero_count_cache[6+8*0]=
1017 h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
1018 }
1019 if(IS_8x8DCT(left_type[0])){
1020 h->non_zero_count_cache[3+8*1]=
1021 h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
1022 }
1023 if(IS_8x8DCT(left_type[1])){
1024 h->non_zero_count_cache[3+8*3]=
1025 h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
1026 }
1027
1028 if(IS_8x8DCT(mb_type)){
1029 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
1030 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
1031
1032 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
1033 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
1034
1035 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
1036 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
1037
1038 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
1039 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
1040 }
1041 }
1042
1043 if( CABAC && !for_deblock) {
1044 // top_cbp 920 // top_cbp
1045 if(top_type) { 921 if(top_type) {
1046 h->top_cbp = h->cbp_table[top_xy]; 922 h->top_cbp = h->cbp_table[top_xy];
1047 } else if(IS_INTRA(mb_type)) { 923 } else if(IS_INTRA(mb_type)) {
1048 h->top_cbp = 0x1C0; 924 h->top_cbp = 0x1C0;
1067 943
1068 #if 1 944 #if 1
1069 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ 945 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
1070 int list; 946 int list;
1071 for(list=0; list<h->list_count; list++){ 947 for(list=0; list<h->list_count; list++){
1072 if(!for_deblock && !USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){ 948 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type)){
1073 /*if(!h->mv_cache_clean[list]){ 949 /*if(!h->mv_cache_clean[list]){
1074 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? 950 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
1075 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); 951 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
1076 h->mv_cache_clean[list]= 1; 952 h->mv_cache_clean[list]= 1;
1077 }*/ 953 }*/
1081 957
1082 if(USES_LIST(top_type, list)){ 958 if(USES_LIST(top_type, list)){
1083 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; 959 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
1084 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; 960 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
1085 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]); 961 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
1086 if(for_deblock){
1087 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1088 h->ref_cache[list][scan8[0] + 0 - 1*8]=
1089 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
1090 h->ref_cache[list][scan8[0] + 2 - 1*8]=
1091 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
1092 }else{
1093 h->ref_cache[list][scan8[0] + 0 - 1*8]= 962 h->ref_cache[list][scan8[0] + 0 - 1*8]=
1094 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; 963 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
1095 h->ref_cache[list][scan8[0] + 2 - 1*8]= 964 h->ref_cache[list][scan8[0] + 2 - 1*8]=
1096 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; 965 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
1097 }
1098 }else{ 966 }else{
1099 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); 967 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
1100 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= (((for_deblock||top_type) ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; 968 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
1101 } 969 }
1102 970
1103 if(for_deblock){
1104 if(!IS_INTERLACED(mb_type^left_type[0])){
1105 if(USES_LIST(left_type[0], list)){
1106 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
1107 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
1108 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1109 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0];
1110 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1];
1111 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2];
1112 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3];
1113 h->ref_cache[list][scan8[0] - 1 + 0 ]=
1114 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]];
1115 h->ref_cache[list][scan8[0] - 1 +16 ]=
1116 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]];
1117 }else{
1118 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]=
1119 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]=
1120 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]=
1121 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0;
1122 h->ref_cache[list][scan8[0] - 1 + 0 ]=
1123 h->ref_cache[list][scan8[0] - 1 + 8 ]=
1124 h->ref_cache[list][scan8[0] - 1 + 16 ]=
1125 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
1126 }
1127 }
1128 continue;
1129 }else{
1130 for(i=0; i<2; i++){ 971 for(i=0; i<2; i++){
1131 int cache_idx = scan8[0] - 1 + i*2*8; 972 int cache_idx = scan8[0] - 1 + i*2*8;
1132 if(USES_LIST(left_type[i], list)){ 973 if(USES_LIST(left_type[i], list)){
1133 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; 974 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
1134 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; 975 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
1140 *(uint32_t*)h->mv_cache [list][cache_idx ]= 981 *(uint32_t*)h->mv_cache [list][cache_idx ]=
1141 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; 982 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0;
1142 h->ref_cache[list][cache_idx ]= 983 h->ref_cache[list][cache_idx ]=
1143 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; 984 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1144 } 985 }
1145 }
1146 } 986 }
1147 987
1148 if((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF) 988 if((IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred) && !FRAME_MBAFF)
1149 continue; 989 continue;
1150 990
1275 } 1115 }
1276 } 1116 }
1277 } 1117 }
1278 #endif 1118 #endif
1279 1119
1280 if(!for_deblock)
1281 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]); 1120 h->neighbor_transform_size= !!IS_8x8DCT(top_type) + !!IS_8x8DCT(left_type[0]);
1282 return 0;
1283 }
1284
1285 static void fill_decode_caches(H264Context *h, int mb_type){
1286 fill_caches(h, mb_type, 0);
1287 } 1121 }
1288 1122
1289 /** 1123 /**
1290 * 1124 *
1291 * @returns non zero if the loop filter can be skiped 1125 * @returns non zero if the loop filter can be skiped
1292 */ 1126 */
1293 static int fill_filter_caches(H264Context *h, int mb_type){ 1127 static int fill_filter_caches(H264Context *h, int mb_type){
1294 return fill_caches(h, mb_type, 1); 1128 MpegEncContext * const s = &h->s;
1129 const int mb_xy= h->mb_xy;
1130 int top_xy, left_xy[2];
1131 int top_type, left_type[2];
1132 int i;
1133
1134 top_xy = mb_xy - (s->mb_stride << MB_FIELD);
1135
1136 //FIXME deblocking could skip the intra and nnz parts.
1137
1138 /* Wow, what a mess, why didn't they simplify the interlacing & intra
1139 * stuff, I can't imagine that these complex rules are worth it. */
1140
1141 left_xy[1] = left_xy[0] = mb_xy-1;
1142 if(FRAME_MBAFF){
1143 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
1144 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
1145 if(s->mb_y&1){
1146 if (left_mb_field_flag != curr_mb_field_flag) {
1147 left_xy[0] = mb_xy - s->mb_stride - 1;
1148 left_xy[1] = mb_xy - 1;
1149 }
1150 }else{
1151 if(curr_mb_field_flag){
1152 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
1153 }
1154 if (left_mb_field_flag != curr_mb_field_flag) {
1155 left_xy[0] = mb_xy - 1;
1156 left_xy[1] = mb_xy + s->mb_stride - 1;
1157 }
1158 }
1159 }
1160
1161 h->top_mb_xy = top_xy;
1162 h->left_mb_xy[0] = left_xy[0];
1163 h->left_mb_xy[1] = left_xy[1];
1164 {
1165 //for sufficiently low qp, filtering wouldn't do anything
1166 //this is a conservative estimate: could also check beta_offset and more accurate chroma_qp
1167 int qp_thresh = h->qp_thresh; //FIXME strictly we should store qp_thresh for each mb of a slice
1168 int qp = s->current_picture.qscale_table[mb_xy];
1169 if(qp <= qp_thresh
1170 && (left_xy[0]<0 || ((qp + s->current_picture.qscale_table[left_xy[0]] + 1)>>1) <= qp_thresh)
1171 && (top_xy < 0 || ((qp + s->current_picture.qscale_table[top_xy ] + 1)>>1) <= qp_thresh)){
1172 if(!FRAME_MBAFF)
1173 return 1;
1174 if( (left_xy[0]< 0 || ((qp + s->current_picture.qscale_table[left_xy[1] ] + 1)>>1) <= qp_thresh)
1175 && (top_xy < s->mb_stride || ((qp + s->current_picture.qscale_table[top_xy -s->mb_stride] + 1)>>1) <= qp_thresh))
1176 return 1;
1177 }
1178 }
1179
1180 if(h->deblocking_filter == 2){
1181 h->top_type = top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
1182 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
1183 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
1184 }else{
1185 h->top_type = top_type = h->slice_table[top_xy ] < 0xFFFF ? s->current_picture.mb_type[top_xy] : 0;
1186 h->left_type[0]= left_type[0] = h->slice_table[left_xy[0] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[0]] : 0;
1187 h->left_type[1]= left_type[1] = h->slice_table[left_xy[1] ] < 0xFFFF ? s->current_picture.mb_type[left_xy[1]] : 0;
1188 }
1189 if(IS_INTRA(mb_type))
1190 return 0;
1191
1192 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
1193 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
1194 *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]);
1195 *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]);
1196 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
1197
1198 h->cbp= h->cbp_table[mb_xy];
1199
1200 {
1201 int list;
1202 for(list=0; list<h->list_count; list++){
1203 int8_t *ref;
1204 int y, b_stride;
1205 int16_t (*mv_dst)[2];
1206 int16_t (*mv_src)[2];
1207
1208 if(!USES_LIST(mb_type, list)){
1209 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
1210 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
1211 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] =
1212 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
1213 *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101;
1214 continue;
1215 }
1216
1217 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
1218 {
1219 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1220 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] =
1221 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
1222 ref += h->b8_stride;
1223 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] =
1224 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101;
1225 }
1226
1227 b_stride = h->b_stride;
1228 mv_dst = &h->mv_cache[list][scan8[0]];
1229 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
1230 for(y=0; y<4; y++){
1231 AV_COPY128(mv_dst + 8*y, mv_src + y*b_stride);
1232 }
1233
1234 }
1235 }
1236
1237
1238 /*
1239 0 . T T. T T T T
1240 1 L . .L . . . .
1241 2 L . .L . . . .
1242 3 . T TL . . . .
1243 4 L . .L . . . .
1244 5 L . .. . . . .
1245 */
1246 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
1247 if(top_type){
1248 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8];
1249 }
1250
1251 if(left_type[0]){
1252 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
1253 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
1254 h->non_zero_count_cache[3+8*3]= h->non_zero_count[left_xy[0]][7+2*8];
1255 h->non_zero_count_cache[3+8*4]= h->non_zero_count[left_xy[0]][7+3*8];
1256 }
1257
1258 // CAVLC 8x8dct requires NNZ values for residual decoding that differ from what the loop filter needs
1259 if(!CABAC && h->pps.transform_8x8_mode){
1260 if(IS_8x8DCT(top_type)){
1261 h->non_zero_count_cache[4+8*0]=
1262 h->non_zero_count_cache[5+8*0]= h->cbp_table[top_xy] & 4;
1263 h->non_zero_count_cache[6+8*0]=
1264 h->non_zero_count_cache[7+8*0]= h->cbp_table[top_xy] & 8;
1265 }
1266 if(IS_8x8DCT(left_type[0])){
1267 h->non_zero_count_cache[3+8*1]=
1268 h->non_zero_count_cache[3+8*2]= h->cbp_table[left_xy[0]]&2; //FIXME check MBAFF
1269 }
1270 if(IS_8x8DCT(left_type[1])){
1271 h->non_zero_count_cache[3+8*3]=
1272 h->non_zero_count_cache[3+8*4]= h->cbp_table[left_xy[1]]&8; //FIXME check MBAFF
1273 }
1274
1275 if(IS_8x8DCT(mb_type)){
1276 h->non_zero_count_cache[scan8[0 ]]= h->non_zero_count_cache[scan8[1 ]]=
1277 h->non_zero_count_cache[scan8[2 ]]= h->non_zero_count_cache[scan8[3 ]]= h->cbp & 1;
1278
1279 h->non_zero_count_cache[scan8[0+ 4]]= h->non_zero_count_cache[scan8[1+ 4]]=
1280 h->non_zero_count_cache[scan8[2+ 4]]= h->non_zero_count_cache[scan8[3+ 4]]= h->cbp & 2;
1281
1282 h->non_zero_count_cache[scan8[0+ 8]]= h->non_zero_count_cache[scan8[1+ 8]]=
1283 h->non_zero_count_cache[scan8[2+ 8]]= h->non_zero_count_cache[scan8[3+ 8]]= h->cbp & 4;
1284
1285 h->non_zero_count_cache[scan8[0+12]]= h->non_zero_count_cache[scan8[1+12]]=
1286 h->non_zero_count_cache[scan8[2+12]]= h->non_zero_count_cache[scan8[3+12]]= h->cbp & 8;
1287 }
1288 }
1289
1290 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
1291 int list;
1292 for(list=0; list<h->list_count; list++){
1293 if(USES_LIST(top_type, list)){
1294 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
1295 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
1296 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[top_xy]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1297 AV_COPY128(h->mv_cache[list][scan8[0] + 0 - 1*8], s->current_picture.motion_val[list][b_xy + 0]);
1298 h->ref_cache[list][scan8[0] + 0 - 1*8]=
1299 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
1300 h->ref_cache[list][scan8[0] + 2 - 1*8]=
1301 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
1302 }else{
1303 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
1304 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((LIST_NOT_USED)&0xFF)*0x01010101;
1305 }
1306
1307 if(!IS_INTERLACED(mb_type^left_type[0])){
1308 if(USES_LIST(left_type[0], list)){
1309 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
1310 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
1311 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1312 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0];
1313 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1];
1314 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2];
1315 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3];
1316 h->ref_cache[list][scan8[0] - 1 + 0 ]=
1317 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]];
1318 h->ref_cache[list][scan8[0] - 1 +16 ]=
1319 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]];
1320 }else{
1321 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]=
1322 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]=
1323 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]=
1324 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0;
1325 h->ref_cache[list][scan8[0] - 1 + 0 ]=
1326 h->ref_cache[list][scan8[0] - 1 + 8 ]=
1327 h->ref_cache[list][scan8[0] - 1 + 16 ]=
1328 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
1329 }
1330 }
1331 }
1332 }
1333
1334 return 0;
1295 } 1335 }
1296 1336
1297 /** 1337 /**
1298 * gets the predicted intra4x4 prediction mode. 1338 * gets the predicted intra4x4 prediction mode.
1299 */ 1339 */