comparison h264.h @ 11203:10c06a9bd3d9 libavcodec

H264: use alias-safe macros This eliminates all aliasing violation warnings in h264 code. No measurable speed difference with gcc-4.4.3 on i7.
author mru
date Thu, 18 Feb 2010 16:24:31 +0000
parents e1b4f03037d5
children 3fbc764c4848
comparison
equal deleted inserted replaced
11202:778139a5e058 11203:10c06a9bd3d9
26 */ 26 */
27 27
28 #ifndef AVCODEC_H264_H 28 #ifndef AVCODEC_H264_H
29 #define AVCODEC_H264_H 29 #define AVCODEC_H264_H
30 30
31 #include "libavutil/intreadwrite.h"
31 #include "dsputil.h" 32 #include "dsputil.h"
32 #include "cabac.h" 33 #include "cabac.h"
33 #include "mpegvideo.h" 34 #include "mpegvideo.h"
34 #include "h264pred.h" 35 #include "h264pred.h"
35 #include "rectangle.h" 36 #include "rectangle.h"
919 4 L . .L . . . . 920 4 L . .L . . . .
920 5 L . .. . . . . 921 5 L . .. . . . .
921 */ 922 */
922 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) 923 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
923 if(top_type){ 924 if(top_type){
924 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; 925 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
925 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8]; 926 h->non_zero_count_cache[1+8*0]= h->non_zero_count[top_xy][1+1*8];
926 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8]; 927 h->non_zero_count_cache[2+8*0]= h->non_zero_count[top_xy][2+1*8];
927 928
928 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8]; 929 h->non_zero_count_cache[1+8*3]= h->non_zero_count[top_xy][1+2*8];
929 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8]; 930 h->non_zero_count_cache[2+8*3]= h->non_zero_count[top_xy][2+2*8];
931 h->non_zero_count_cache[1+8*0]= 932 h->non_zero_count_cache[1+8*0]=
932 h->non_zero_count_cache[2+8*0]= 933 h->non_zero_count_cache[2+8*0]=
933 934
934 h->non_zero_count_cache[1+8*3]= 935 h->non_zero_count_cache[1+8*3]=
935 h->non_zero_count_cache[2+8*3]= 936 h->non_zero_count_cache[2+8*3]=
936 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040; 937 AV_WN32A(&h->non_zero_count_cache[4+8*0], CABAC && !IS_INTRA(mb_type) ? 0 : 0x40404040);
937 } 938 }
938 939
939 for (i=0; i<2; i++) { 940 for (i=0; i<2; i++) {
940 if(left_type[i]){ 941 if(left_type[i]){
941 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]]; 942 h->non_zero_count_cache[3+8*1 + 2*8*i]= h->non_zero_count[left_xy[i]][left_block[8+0+2*i]];
1000 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0]; 1001 h->ref_cache[list][scan8[0] + 1 - 1*8]= s->current_picture.ref_index[list][b8_xy + 0];
1001 h->ref_cache[list][scan8[0] + 2 - 1*8]= 1002 h->ref_cache[list][scan8[0] + 2 - 1*8]=
1002 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1]; 1003 h->ref_cache[list][scan8[0] + 3 - 1*8]= s->current_picture.ref_index[list][b8_xy + 1];
1003 }else{ 1004 }else{
1004 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); 1005 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
1005 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; 1006 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101);
1006 } 1007 }
1007 1008
1008 for(i=0; i<2; i++){ 1009 for(i=0; i<2; i++){
1009 int cache_idx = scan8[0] - 1 + i*2*8; 1010 int cache_idx = scan8[0] - 1 + i*2*8;
1010 if(USES_LIST(left_type[i], list)){ 1011 if(USES_LIST(left_type[i], list)){
1011 const int b_xy= h->mb2b_xy[left_xy[i]] + 3; 1012 const int b_xy= h->mb2b_xy[left_xy[i]] + 3;
1012 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1; 1013 const int b8_xy= h->mb2b8_xy[left_xy[i]] + 1;
1013 *(uint32_t*)h->mv_cache[list][cache_idx ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]; 1014 AV_COPY32(h->mv_cache[list][cache_idx ], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0+i*2]]);
1014 *(uint32_t*)h->mv_cache[list][cache_idx+8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]; 1015 AV_COPY32(h->mv_cache[list][cache_idx+8], s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1+i*2]]);
1015 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)]; 1016 h->ref_cache[list][cache_idx ]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[0+i*2]>>1)];
1016 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)]; 1017 h->ref_cache[list][cache_idx+8]= s->current_picture.ref_index[list][b8_xy + h->b8_stride*(left_block[1+i*2]>>1)];
1017 }else{ 1018 }else{
1018 *(uint32_t*)h->mv_cache [list][cache_idx ]= 1019 AV_ZERO32(h->mv_cache [list][cache_idx ]);
1019 *(uint32_t*)h->mv_cache [list][cache_idx+8]= 0; 1020 AV_ZERO32(h->mv_cache [list][cache_idx+8]);
1020 h->ref_cache[list][cache_idx ]= 1021 h->ref_cache[list][cache_idx ]=
1021 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; 1022 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1022 } 1023 }
1023 } 1024 }
1024 1025
1025 if(USES_LIST(topleft_type, list)){ 1026 if(USES_LIST(topleft_type, list)){
1026 const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride); 1027 const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride);
1027 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (h->topleft_partition & h->b8_stride); 1028 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (h->topleft_partition & h->b8_stride);
1028 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; 1029 AV_COPY32(h->mv_cache[list][scan8[0] - 1 - 1*8], s->current_picture.motion_val[list][b_xy]);
1029 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; 1030 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
1030 }else{ 1031 }else{
1031 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; 1032 AV_ZERO32(h->mv_cache[list][scan8[0] - 1 - 1*8]);
1032 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; 1033 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1033 } 1034 }
1034 1035
1035 if(USES_LIST(topright_type, list)){ 1036 if(USES_LIST(topright_type, list)){
1036 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; 1037 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
1037 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; 1038 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
1038 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; 1039 AV_COPY32(h->mv_cache[list][scan8[0] + 4 - 1*8], s->current_picture.motion_val[list][b_xy]);
1039 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; 1040 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
1040 }else{ 1041 }else{
1041 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; 1042 AV_ZERO32(h->mv_cache [list][scan8[0] + 4 - 1*8]);
1042 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; 1043 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1043 } 1044 }
1044 1045
1045 if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF) 1046 if((mb_type&(MB_TYPE_SKIP|MB_TYPE_DIRECT2)) && !FRAME_MBAFF)
1046 continue; 1047 continue;
1049 h->ref_cache[list][scan8[5 ]+1] = 1050 h->ref_cache[list][scan8[5 ]+1] =
1050 h->ref_cache[list][scan8[7 ]+1] = 1051 h->ref_cache[list][scan8[7 ]+1] =
1051 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else) 1052 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewhere else)
1052 h->ref_cache[list][scan8[4 ]] = 1053 h->ref_cache[list][scan8[4 ]] =
1053 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE; 1054 h->ref_cache[list][scan8[12]] = PART_NOT_AVAILABLE;
1054 *(uint32_t*)h->mv_cache [list][scan8[5 ]+1]= 1055 AV_ZERO32(h->mv_cache [list][scan8[5 ]+1]);
1055 *(uint32_t*)h->mv_cache [list][scan8[7 ]+1]= 1056 AV_ZERO32(h->mv_cache [list][scan8[7 ]+1]);
1056 *(uint32_t*)h->mv_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) 1057 AV_ZERO32(h->mv_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else)
1057 *(uint32_t*)h->mv_cache [list][scan8[4 ]]= 1058 AV_ZERO32(h->mv_cache [list][scan8[4 ]]);
1058 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; 1059 AV_ZERO32(h->mv_cache [list][scan8[12]]);
1059 1060
1060 if( CABAC ) { 1061 if( CABAC ) {
1061 /* XXX beurk, Load mvd */ 1062 /* XXX beurk, Load mvd */
1062 if(USES_LIST(top_type, list)){ 1063 if(USES_LIST(top_type, list)){
1063 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; 1064 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
1065 }else{ 1066 }else{
1066 AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]); 1067 AV_ZERO128(h->mvd_cache[list][scan8[0] + 0 - 1*8]);
1067 } 1068 }
1068 if(USES_LIST(left_type[0], list)){ 1069 if(USES_LIST(left_type[0], list)){
1069 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; 1070 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
1070 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; 1071 AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 0*8], h->mvd_table[list][b_xy + h->b_stride*left_block[0]]);
1071 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; 1072 AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 1*8], h->mvd_table[list][b_xy + h->b_stride*left_block[1]]);
1072 }else{ 1073 }else{
1073 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= 1074 AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 0*8]);
1074 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; 1075 AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 1*8]);
1075 } 1076 }
1076 if(USES_LIST(left_type[1], list)){ 1077 if(USES_LIST(left_type[1], list)){
1077 const int b_xy= h->mb2b_xy[left_xy[1]] + 3; 1078 const int b_xy= h->mb2b_xy[left_xy[1]] + 3;
1078 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; 1079 AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 2*8], h->mvd_table[list][b_xy + h->b_stride*left_block[2]]);
1079 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; 1080 AV_COPY32(h->mvd_cache[list][scan8[0] - 1 + 3*8], h->mvd_table[list][b_xy + h->b_stride*left_block[3]]);
1080 }else{ 1081 }else{
1081 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= 1082 AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 2*8]);
1082 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 3*8]= 0; 1083 AV_ZERO32(h->mvd_cache [list][scan8[0] - 1 + 3*8]);
1083 } 1084 }
1084 *(uint32_t*)h->mvd_cache [list][scan8[5 ]+1]= 1085 AV_ZERO32(h->mvd_cache [list][scan8[5 ]+1]);
1085 *(uint32_t*)h->mvd_cache [list][scan8[7 ]+1]= 1086 AV_ZERO32(h->mvd_cache [list][scan8[7 ]+1]);
1086 *(uint32_t*)h->mvd_cache [list][scan8[13]+1]= //FIXME remove past 3 (init somewhere else) 1087 AV_ZERO32(h->mvd_cache [list][scan8[13]+1]); //FIXME remove past 3 (init somewhere else)
1087 *(uint32_t*)h->mvd_cache [list][scan8[4 ]]= 1088 AV_ZERO32(h->mvd_cache [list][scan8[4 ]]);
1088 *(uint32_t*)h->mvd_cache [list][scan8[12]]= 0; 1089 AV_ZERO32(h->mvd_cache [list][scan8[12]]);
1089 1090
1090 if(h->slice_type_nos == FF_B_TYPE){ 1091 if(h->slice_type_nos == FF_B_TYPE){
1091 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1); 1092 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1);
1092 1093
1093 if(IS_DIRECT(top_type)){ 1094 if(IS_DIRECT(top_type)){
1094 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101*(MB_TYPE_DIRECT2>>1); 1095 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_DIRECT2>>1));
1095 }else if(IS_8X8(top_type)){ 1096 }else if(IS_8X8(top_type)){
1096 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; 1097 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride;
1097 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; 1098 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy];
1098 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; 1099 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1];
1099 }else{ 1100 }else{
1100 *(uint32_t*)&h->direct_cache[scan8[0] - 1*8]= 0x01010101*(MB_TYPE_16x16>>1); 1101 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1));
1101 } 1102 }
1102 1103
1103 if(IS_DIRECT(left_type[0])) 1104 if(IS_DIRECT(left_type[0]))
1104 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1; 1105 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1;
1105 else if(IS_8X8(left_type[0])) 1106 else if(IS_8X8(left_type[0]))
1221 if(IS_INTRA(mb_type)) 1222 if(IS_INTRA(mb_type))
1222 return 0; 1223 return 0;
1223 1224
1224 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]); 1225 AV_COPY64(&h->non_zero_count_cache[0+8*1], &h->non_zero_count[mb_xy][ 0]);
1225 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]); 1226 AV_COPY64(&h->non_zero_count_cache[0+8*2], &h->non_zero_count[mb_xy][ 8]);
1226 *((uint32_t*)&h->non_zero_count_cache[0+8*5])= *((uint32_t*)&h->non_zero_count[mb_xy][16]); 1227 AV_COPY32(&h->non_zero_count_cache[0+8*5], &h->non_zero_count[mb_xy][16]);
1227 *((uint32_t*)&h->non_zero_count_cache[4+8*3])= *((uint32_t*)&h->non_zero_count[mb_xy][20]); 1228 AV_COPY32(&h->non_zero_count_cache[4+8*3], &h->non_zero_count[mb_xy][20]);
1228 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]); 1229 AV_COPY64(&h->non_zero_count_cache[0+8*4], &h->non_zero_count[mb_xy][24]);
1229 1230
1230 h->cbp= h->cbp_table[mb_xy]; 1231 h->cbp= h->cbp_table[mb_xy];
1231 1232
1232 { 1233 {
1237 int16_t (*mv_dst)[2]; 1238 int16_t (*mv_dst)[2];
1238 int16_t (*mv_src)[2]; 1239 int16_t (*mv_src)[2];
1239 1240
1240 if(!USES_LIST(mb_type, list)){ 1241 if(!USES_LIST(mb_type, list)){
1241 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4); 1242 fill_rectangle( h->mv_cache[list][scan8[0]], 4, 4, 8, pack16to32(0,0), 4);
1242 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = 1243 AV_WN32A(&h->ref_cache[list][scan8[ 0]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
1243 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = 1244 AV_WN32A(&h->ref_cache[list][scan8[ 2]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
1244 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = 1245 AV_WN32A(&h->ref_cache[list][scan8[ 8]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
1245 *(uint32_t*)&h->ref_cache[list][scan8[10]] = ((LIST_NOT_USED)&0xFF)*0x01010101U; 1246 AV_WN32A(&h->ref_cache[list][scan8[10]], ((LIST_NOT_USED)&0xFF)*0x01010101u);
1246 continue; 1247 continue;
1247 } 1248 }
1248 1249
1249 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]]; 1250 ref = &s->current_picture.ref_index[list][h->mb2b8_xy[mb_xy]];
1250 { 1251 {
1251 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 1252 int (*ref2frm)[64] = h->ref2frm[ h->slice_num&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1252 *(uint32_t*)&h->ref_cache[list][scan8[ 0]] = 1253 AV_WN32A(&h->ref_cache[list][scan8[ 0]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
1253 *(uint32_t*)&h->ref_cache[list][scan8[ 2]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; 1254 AV_WN32A(&h->ref_cache[list][scan8[ 2]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
1254 ref += h->b8_stride; 1255 ref += h->b8_stride;
1255 *(uint32_t*)&h->ref_cache[list][scan8[ 8]] = 1256 AV_WN32A(&h->ref_cache[list][scan8[ 8]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
1256 *(uint32_t*)&h->ref_cache[list][scan8[10]] = (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101; 1257 AV_WN32A(&h->ref_cache[list][scan8[10]], (pack16to32(ref2frm[list][ref[0]],ref2frm[list][ref[1]])&0x00FF00FF)*0x0101);
1257 } 1258 }
1258 1259
1259 b_stride = h->b_stride; 1260 b_stride = h->b_stride;
1260 mv_dst = &h->mv_cache[list][scan8[0]]; 1261 mv_dst = &h->mv_cache[list][scan8[0]];
1261 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride]; 1262 mv_src = &s->current_picture.motion_val[list][4*s->mb_x + 4*s->mb_y*b_stride];
1275 4 L . .L . . . . 1276 4 L . .L . . . .
1276 5 L . .. . . . . 1277 5 L . .. . . . .
1277 */ 1278 */
1278 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec) 1279 //FIXME constraint_intra_pred & partitioning & nnz (let us hope this is just a typo in the spec)
1279 if(top_type){ 1280 if(top_type){
1280 *(uint32_t*)&h->non_zero_count_cache[4+8*0]= *(uint32_t*)&h->non_zero_count[top_xy][4+3*8]; 1281 AV_COPY32(&h->non_zero_count_cache[4+8*0], &h->non_zero_count[top_xy][4+3*8]);
1281 } 1282 }
1282 1283
1283 if(left_type[0]){ 1284 if(left_type[0]){
1284 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8]; 1285 h->non_zero_count_cache[3+8*1]= h->non_zero_count[left_xy[0]][7+0*8];
1285 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8]; 1286 h->non_zero_count_cache[3+8*2]= h->non_zero_count[left_xy[0]][7+1*8];
1331 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]]; 1332 h->ref_cache[list][scan8[0] + 1 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 0]];
1332 h->ref_cache[list][scan8[0] + 2 - 1*8]= 1333 h->ref_cache[list][scan8[0] + 2 - 1*8]=
1333 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]]; 1334 h->ref_cache[list][scan8[0] + 3 - 1*8]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + 1]];
1334 }else{ 1335 }else{
1335 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]); 1336 AV_ZERO128(h->mv_cache[list][scan8[0] + 0 - 1*8]);
1336 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((LIST_NOT_USED)&0xFF)*0x01010101U; 1337 AV_WN32A(&h->ref_cache[list][scan8[0] + 0 - 1*8], ((LIST_NOT_USED)&0xFF)*0x01010101u);
1337 } 1338 }
1338 1339
1339 if(!IS_INTERLACED(mb_type^left_type[0])){ 1340 if(!IS_INTERLACED(mb_type^left_type[0])){
1340 if(USES_LIST(left_type[0], list)){ 1341 if(USES_LIST(left_type[0], list)){
1341 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; 1342 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
1342 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; 1343 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
1343 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2); 1344 int (*ref2frm)[64] = h->ref2frm[ h->slice_table[left_xy[0]]&(MAX_SLICES-1) ][0] + (MB_MBAFF ? 20 : 2);
1344 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*0]; 1345 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 0 ], s->current_picture.motion_val[list][b_xy + h->b_stride*0]);
1345 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 8 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*1]; 1346 AV_COPY32(h->mv_cache[list][scan8[0] - 1 + 8 ], s->current_picture.motion_val[list][b_xy + h->b_stride*1]);
1346 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +16 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*2]; 1347 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +16 ], s->current_picture.motion_val[list][b_xy + h->b_stride*2]);
1347 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 +24 ]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*3]; 1348 AV_COPY32(h->mv_cache[list][scan8[0] - 1 +24 ], s->current_picture.motion_val[list][b_xy + h->b_stride*3]);
1348 h->ref_cache[list][scan8[0] - 1 + 0 ]= 1349 h->ref_cache[list][scan8[0] - 1 + 0 ]=
1349 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]]; 1350 h->ref_cache[list][scan8[0] - 1 + 8 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*0]];
1350 h->ref_cache[list][scan8[0] - 1 +16 ]= 1351 h->ref_cache[list][scan8[0] - 1 +16 ]=
1351 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]]; 1352 h->ref_cache[list][scan8[0] - 1 +24 ]= ref2frm[list][s->current_picture.ref_index[list][b8_xy + h->b8_stride*1]];
1352 }else{ 1353 }else{
1353 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 0 ]= 1354 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 0 ]);
1354 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 8 ]= 1355 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 + 8 ]);
1355 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +16 ]= 1356 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +16 ]);
1356 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 +24 ]= 0; 1357 AV_ZERO32(h->mv_cache [list][scan8[0] - 1 +24 ]);
1357 h->ref_cache[list][scan8[0] - 1 + 0 ]= 1358 h->ref_cache[list][scan8[0] - 1 + 0 ]=
1358 h->ref_cache[list][scan8[0] - 1 + 8 ]= 1359 h->ref_cache[list][scan8[0] - 1 + 8 ]=
1359 h->ref_cache[list][scan8[0] - 1 + 16 ]= 1360 h->ref_cache[list][scan8[0] - 1 + 16 ]=
1360 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED; 1361 h->ref_cache[list][scan8[0] - 1 + 24 ]= LIST_NOT_USED;
1361 } 1362 }
1384 static inline void write_back_non_zero_count(H264Context *h){ 1385 static inline void write_back_non_zero_count(H264Context *h){
1385 const int mb_xy= h->mb_xy; 1386 const int mb_xy= h->mb_xy;
1386 1387
1387 AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]); 1388 AV_COPY64(&h->non_zero_count[mb_xy][ 0], &h->non_zero_count_cache[0+8*1]);
1388 AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]); 1389 AV_COPY64(&h->non_zero_count[mb_xy][ 8], &h->non_zero_count_cache[0+8*2]);
1389 *((uint32_t*)&h->non_zero_count[mb_xy][16]) = *((uint32_t*)&h->non_zero_count_cache[0+8*5]); 1390 AV_COPY32(&h->non_zero_count[mb_xy][16], &h->non_zero_count_cache[0+8*5]);
1390 *((uint32_t*)&h->non_zero_count[mb_xy][20]) = *((uint32_t*)&h->non_zero_count_cache[4+8*3]); 1391 AV_COPY32(&h->non_zero_count[mb_xy][20], &h->non_zero_count_cache[4+8*3]);
1391 AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]); 1392 AV_COPY64(&h->non_zero_count[mb_xy][24], &h->non_zero_count_cache[0+8*4]);
1392 } 1393 }
1393 1394
1394 static inline void write_back_motion(H264Context *h, int mb_type){ 1395 static inline void write_back_motion(H264Context *h, int mb_type){
1395 MpegEncContext * const s = &h->s; 1396 MpegEncContext * const s = &h->s;
1444 } 1445 }
1445 } 1446 }
1446 1447
1447 static inline int get_dct8x8_allowed(H264Context *h){ 1448 static inline int get_dct8x8_allowed(H264Context *h){
1448 if(h->sps.direct_8x8_inference_flag) 1449 if(h->sps.direct_8x8_inference_flag)
1449 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL)); 1450 return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8 )*0x0001000100010001ULL));
1450 else 1451 else
1451 return !(*(uint64_t*)h->sub_mb_type & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL)); 1452 return !(AV_RN64A(h->sub_mb_type) & ((MB_TYPE_16x8|MB_TYPE_8x16|MB_TYPE_8x8|MB_TYPE_DIRECT2)*0x0001000100010001ULL));
1452 } 1453 }
1453 1454
1454 /** 1455 /**
1455 * decodes a P_SKIP or B_SKIP macroblock 1456 * decodes a P_SKIP or B_SKIP macroblock
1456 */ 1457 */