Mercurial > libavcodec.hg
comparison h264.h @ 11203:10c06a9bd3d9 libavcodec
H264: use alias-safe macros
This eliminates all aliasing violation warnings in h264 code.
No measurable speed difference with gcc-4.4.3 on i7.
author | mru |
---|---|
date | Thu, 18 Feb 2010 16:24:31 +0000 |
parents | e1b4f03037d5 |
children | 3fbc764c4848 |
comparison
equal
deleted
inserted
replaced
11202:778139a5e058 | 11203:10c06a9bd3d9 |
---|---|
26 */ | 26 */ |
27 | 27 |
28 #ifndef AVCODEC_H264_H | 28 #ifndef AVCODEC_H264_H |
29 #define AVCODEC_H264_H | 29 #define AVCODEC_H264_H |
30 | 30 |
31 #include "libavutil/intreadwrite.h" | |
31 #include "dsputil.h" | 32 #include "dsputil.h" |
32 #include "cabac.h" | 33 #include "cabac.h" |
33 #include "mpegvideo.h" | 34 #include "mpegvideo.h" |
34 #include "h264pred.h" | 35 #include "h264pred.h" |
35 #include "rectangle.h" | 36 #include "rectangle.h" |
919 4 L . .L . . . . | 920 4 L . .L . . . . |
920 5 L . .. . . . . | 921 5 L . .. . . . . |
921 */ | 922 */ |
922 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) | 923 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) |
923 if(top_type){ | 924 if(top_type){ |
924 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; | 925 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); |
925 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; | 926 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; |
926 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; | 927 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; |
927 | 928 |
928 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; | 929 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; |
929 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; | 930 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; |
931 h->non_zero_count_cache[1+8*0]= | 932 h->non_zero_count_cache[1+8*0]= |
932 h->non_zero_count_cache[2+8*0]= | 933 h->non_zero_count_cache[2+8*0]= |
933 | 934 |
934 h->non_zero_count_cache[1+8*3]= | 935 h->non_zero_count_cache[1+8*3]= |
935 h->non_zero_count_cache[2+8*3]= | 936 h->non_zero_count_cache[2+8*3]= |
936 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; | 937 AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040); |
937 } | 938 } |
938 | 939 |
939 for (i=0; i<2; i++) { | 940 for (i=0; i<2; i++) { |
940 if(left_type[i]){ | 941 if(left_type[i]){ |
941 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]]; | 942 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]]; |
1000 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; | 1001 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; |
1001 h->ref_cache[list][scan8[0] + 2 - 1*8]= | 1002 h->ref_cache[list][scan8[0] + 2 - 1*8]= |
1002 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; | 1003 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; |
1003 }else{ | 1004 }else{ |
1004 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); | 1005 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); |
1005 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; | 1006 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101); |
1006 } | 1007 } |
1007 | 1008 |
1008 for(i=0; i<2; i++){ | 1009 for(i=0; i<2; i++){ |
1009 int cache_idx = scan8[0] - 1 + i*2*8; | 1010 int cache_idx = scan8[0] - 1 + i*2*8; |
1010 if(USES_LIST(left_type[i], list)){ | 1011 if(USES_LIST(left_type[i], list)){ |
1011 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; | 1012 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; |
1012 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; | 1013 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; |
1013 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]; | 1014 AV_COPY32(h->mv_cache[list][cache_idx ], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]); |
1014 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]; | 1015 AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]); |
1015 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; | 1016 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; |
1016 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; | 1017 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; |
1017 }else{ | 1018 }else{ |
1018 *(uint32_t*)h->mv_cache [list][cache_idx ]= | 1019 AV_ZERO32(h->mv_cache [list][cache_idx ]); |
1019 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; | 1020 AV_ZERO32(h->mv_cache [list][cache_idx+8]); |
1020 h->ref_cache[list][cache_idx ]= | 1021 h->ref_cache[list][cache_idx ]= |
1021 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; | 1022 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
1022 } | 1023 } |
1023 } | 1024 } |
1024 | 1025 |
1025 if(USES_LIST(topleft_type, list)){ | 1026 if(USES_LIST(topleft_type, list)){ |
1026 const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride); | 1027 const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride); |
1027 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (h->topleft_partition & h->b8_stride); | 1028 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (h->topleft_partition & h->b8_stride); |
1028 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; | 1029 AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_picture.motion_val[list][b_xy]); |
1029 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; | 1030 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
1030 }else{ | 1031 }else{ |
1031 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; | 1032 AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]); |
1032 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; | 1033 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
1033 } | 1034 } |
1034 | 1035 |
1035 if(USES_LIST(topright_type, list)){ | 1036 if(USES_LIST(topright_type, list)){ |
1036 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; | 1037 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; |
1037 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; | 1038 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; |
1038 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; | 1039 AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_picture.motion_val[list][b_xy]); |
1039 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; | 1040 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
1040 }else{ | 1041 }else{ |
1041 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; | 1042 AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]); |
1042 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; | 1043 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
1043 } | 1044 } |
1044 | 1045 |
1045 if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF) | 1046 if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF) |
1046 continue; | 1047 continue; |
1049 h->ref_cache[list][scan8[5 ]+1] = | 1050 h->ref_cache[list][scan8[5 ]+1] = |
1050 h->ref_cache[list][scan8[7 ]+1] = | 1051 h->ref_cache[list][scan8[7 ]+1] = |
1051 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else) | 1052 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else) |
1052 h->ref_cache[list][scan8[4 ]] = | 1053 h->ref_cache[list][scan8[4 ]] = |
1053 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; | 1054 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; |
1054 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]= | 1055 AV_ZERO32(h->mv_cache [list][scan8[5 ]+1]); |
1055 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]= | 1056 AV_ZERO32(h->mv_cache [list][scan8[7 ]+1]); |
1056 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) | 1057 AV_ZERO32(h->mv_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else) |
1057 *(uint32_t*)h->mv_cache [list][scan8[4 ]]= | 1058 AV_ZERO32(h->mv_cache [list][scan8[4 ]]); |
1058 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; | 1059 AV_ZERO32(h->mv_cache [list][scan8[12]]); |
1059 | 1060 |
1060 if( CABAC ) { | 1061 if( CABAC ) { |
1061 /* XXX beurk, Load mvd */ | 1062 /* XXX beurk, Load mvd */ |
1062 if(USES_LIST(top_type, list)){ | 1063 if(USES_LIST(top_type, list)){ |
1063 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | 1064 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
1065 }else{ | 1066 }else{ |
1066 AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]); | 1067 AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]); |
1067 } | 1068 } |
1068 if(USES_LIST(left_type[0], list)){ | 1069 if(USES_LIST(left_type[0], list)){ |
1069 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; | 1070 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
1070 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; | 1071 AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]); |
1071 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; | 1072 AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]); |
1072 }else{ | 1073 }else{ |
1073 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= | 1074 AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 0*8]); |
1074 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; | 1075 AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 1*8]); |
1075 } | 1076 } |
1076 if(USES_LIST(left_type[1], list)){ | 1077 if(USES_LIST(left_type[1], list)){ |
1077 const int b_xy= h->mb2b_xy[left_xy[1]] + 3; | 1078 const int b_xy= h->mb2b_xy[left_xy[1]] + 3; |
1078 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; | 1079 AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]); |
1079 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; | 1080 AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]); |
1080 }else{ | 1081 }else{ |
1081 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= | 1082 AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 2*8]); |
1082 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0; | 1083 AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 3*8]); |
1083 } | 1084 } |
1084 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]= | 1085 AV_ZERO32(h->mvd_cache [list][scan8[5 ]+1]); |
1085 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]= | 1086 AV_ZERO32(h->mvd_cache [list][scan8[7 ]+1]); |
1086 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) | 1087 AV_ZERO32(h->mvd_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else) |
1087 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]= | 1088 AV_ZERO32(h->mvd_cache [list][scan8[4 ]]); |
1088 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0; | 1089 AV_ZERO32(h->mvd_cache [list][scan8[12]]); |
1089 | 1090 |
1090 if(h->slice_type_nos == FF_B_TYPE){ | 1091 if(h->slice_type_nos == FF_B_TYPE){ |
1091 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1); | 1092 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1); |
1092 | 1093 |
1093 if(IS_DIRECT(top_type)){ | 1094 if(IS_DIRECT(top_type)){ |
1094 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101*(MB_TYPE_DIRECT2>>1); | 1095 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_DIRECT2>>1)); |
1095 }else if(IS_8X8(top_type)){ | 1096 }else if(IS_8X8(top_type)){ |
1096 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; | 1097 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; |
1097 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; | 1098 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; |
1098 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; | 1099 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; |
1099 }else{ | 1100 }else{ |
1100 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101*(MB_TYPE_16x16>>1); | 1101 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1)); |
1101 } | 1102 } |
1102 | 1103 |
1103 if(IS_DIRECT(left_type[0])) | 1104 if(IS_DIRECT(left_type[0])) |
1104 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1; | 1105 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1; |
1105 else if(IS_8X8(left_type[0])) | 1106 else if(IS_8X8(left_type[0])) |
1221 if(IS_INTRA(mb_type)) | 1222 if(IS_INTRA(mb_type)) |
1222 return 0; | 1223 return 0; |
1223 | 1224 |
1224 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); | 1225 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); |
1225 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); | 1226 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); |
1226 *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]); | 1227 AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]); |
1227 *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]); | 1228 AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]); |
1228 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); | 1229 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); |
1229 | 1230 |
1230 h->cbp= h->cbp_table[mb_xy]; | 1231 h->cbp= h->cbp_table[mb_xy]; |
1231 | 1232 |
1232 { | 1233 { |
1237 int16_t (*mv_dst)[2]; | 1238 int16_t (*mv_dst)[2]; |
1238 int16_t (*mv_src)[2]; | 1239 int16_t (*mv_src)[2]; |
1239 | 1240 |
1240 if(!USES_LIST(mb_type, list)){ | 1241 if(!USES_LIST(mb_type, list)){ |
1241 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); | 1242 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); |
1242 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = | 1243 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
1243 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = | 1244 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
1244 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = | 1245 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
1245 *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101U; | 1246 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
1246 continue; | 1247 continue; |
1247 } | 1248 } |
1248 | 1249 |
1249 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; | 1250 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; |
1250 { | 1251 { |
1251 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | 1252 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); |
1252 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = | 1253 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
1253 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; | 1254 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
1254 ref += h->b8_stride; | 1255 ref += h->b8_stride; |
1255 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = | 1256 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
1256 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; | 1257 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101); |
1257 } | 1258 } |
1258 | 1259 |
1259 b_stride = h->b_stride; | 1260 b_stride = h->b_stride; |
1260 mv_dst = &h->mv_cache[list][scan8[0]]; | 1261 mv_dst = &h->mv_cache[list][scan8[0]]; |
1261 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride]; | 1262 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride]; |
1275 4 L . .L . . . . | 1276 4 L . .L . . . . |
1276 5 L . .. . . . . | 1277 5 L . .. . . . . |
1277 */ | 1278 */ |
1278 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) | 1279 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) |
1279 if(top_type){ | 1280 if(top_type){ |
1280 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; | 1281 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]); |
1281 } | 1282 } |
1282 | 1283 |
1283 if(left_type[0]){ | 1284 if(left_type[0]){ |
1284 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; | 1285 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; |
1285 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; | 1286 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; |
1331 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; | 1332 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; |
1332 h->ref_cache[list][scan8[0] + 2 - 1*8]= | 1333 h->ref_cache[list][scan8[0] + 2 - 1*8]= |
1333 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; | 1334 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; |
1334 }else{ | 1335 }else{ |
1335 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); | 1336 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); |
1336 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((LIST_NOT_USED)&0xFF)*0x01010101U; | 1337 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u); |
1337 } | 1338 } |
1338 | 1339 |
1339 if(!IS_INTERLACED(mb_type^left_type[0])){ | 1340 if(!IS_INTERLACED(mb_type^left_type[0])){ |
1340 if(USES_LIST(left_type[0], list)){ | 1341 if(USES_LIST(left_type[0], list)){ |
1341 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; | 1342 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
1342 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; | 1343 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; |
1343 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); | 1344 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); |
1344 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0]; | 1345 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]); |
1345 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1]; | 1346 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]); |
1346 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2]; | 1347 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]); |
1347 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3]; | 1348 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]); |
1348 h->ref_cache[list][scan8[0] - 1 + 0 ]= | 1349 h->ref_cache[list][scan8[0] - 1 + 0 ]= |
1349 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]]; | 1350 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]]; |
1350 h->ref_cache[list][scan8[0] - 1 +16 ]= | 1351 h->ref_cache[list][scan8[0] - 1 +16 ]= |
1351 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]]; | 1352 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]]; |
1352 }else{ | 1353 }else{ |
1353 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]= | 1354 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]); |
1354 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]= | 1355 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]); |
1355 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]= | 1356 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]); |
1356 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0; | 1357 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]); |
1357 h->ref_cache[list][scan8[0] - 1 + 0 ]= | 1358 h->ref_cache[list][scan8[0] - 1 + 0 ]= |
1358 h->ref_cache[list][scan8[0] - 1 + 8 ]= | 1359 h->ref_cache[list][scan8[0] - 1 + 8 ]= |
1359 h->ref_cache[list][scan8[0] - 1 + 16 ]= | 1360 h->ref_cache[list][scan8[0] - 1 + 16 ]= |
1360 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; | 1361 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; |
1361 } | 1362 } |
1384 static inline void write_back_non_zero_count(H264Context *h){ | 1385 static inline void write_back_non_zero_count(H264Context *h){ |
1385 const int mb_xy= h->mb_xy; | 1386 const int mb_xy= h->mb_xy; |
1386 | 1387 |
1387 AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]); | 1388 AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]); |
1388 AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]); | 1389 AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]); |
1389 *((uint32_t*)&h->non_zero_count[mb_xy][16]) = *((uint32_t*)&h->non_zero_count_cache[0+8*5]); | 1390 AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]); |
1390 *((uint32_t*)&h->non_zero_count[mb_xy][20]) = *((uint32_t*)&h->non_zero_count_cache[4+8*3]); | 1391 AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]); |
1391 AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]); | 1392 AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]); |
1392 } | 1393 } |
1393 | 1394 |
1394 static inline void write_back_motion(H264Context *h, int mb_type){ | 1395 static inline void write_back_motion(H264Context *h, int mb_type){ |
1395 MpegEncContext * const s = &h->s; | 1396 MpegEncContext * const s = &h->s; |
1444 } | 1445 } |
1445 } | 1446 } |
1446 | 1447 |
1447 static inline int get_dct8x8_allowed(H264Context *h){ | 1448 static inline int get_dct8x8_allowed(H264Context *h){ |
1448 if(h->sps.direct_8x8_inference_flag) | 1449 if(h->sps.direct_8x8_inference_flag) |
1449 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); | 1450 return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); |
1450 else | 1451 else |
1451 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); | 1452 return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); |
1452 } | 1453 } |
1453 | 1454 |
1454 /** | 1455 /** |
1455 * decodes a P_SKIP or B_SKIP macroblock | 1456 * decodes a P_SKIP or B_SKIP macroblock |
1456 */ | 1457 */ |