comparison h264.h @ 11288:d5dd13f345fc libavcodec

Store data in direct_table interleaved. seems 20cpu cycles faster
author michael
date Thu, 25 Feb 2010 15:27:55 +0000
parents c8fea332d5d9
children 411ab09ada91
comparison
equal deleted inserted replaced
11287:c8fea332d5d9 11288:d5dd13f345fc
1086 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1); 1086 fill_rectangle(&h->direct_cache[scan8[0]], 4, 4, 8, MB_TYPE_16x16>>1, 1);
1087 1087
1088 if(IS_DIRECT(top_type)){ 1088 if(IS_DIRECT(top_type)){
1089 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_DIRECT2>>1)); 1089 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_DIRECT2>>1));
1090 }else if(IS_8X8(top_type)){ 1090 }else if(IS_8X8(top_type)){
1091 int b8_xy = h->mb2b8_xy[top_xy] + h->b8_stride; 1091 int b8_xy = 4*top_xy;
1092 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy]; 1092 h->direct_cache[scan8[0] + 0 - 1*8]= h->direct_table[b8_xy + 2];
1093 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 1]; 1093 h->direct_cache[scan8[0] + 2 - 1*8]= h->direct_table[b8_xy + 3];
1094 }else{ 1094 }else{
1095 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1)); 1095 AV_WN32A(&h->direct_cache[scan8[0] - 1*8], 0x01010101*(MB_TYPE_16x16>>1));
1096 } 1096 }
1097 1097
1098 if(IS_DIRECT(left_type[0])) 1098 if(IS_DIRECT(left_type[0]))
1099 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1; 1099 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_DIRECT2>>1;
1100 else if(IS_8X8(left_type[0])) 1100 else if(IS_8X8(left_type[0]))
1101 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[h->mb2b8_xy[left_xy[0]] + 1 + h->b8_stride*(left_block[0]>>1)]; 1101 h->direct_cache[scan8[0] - 1 + 0*8]= h->direct_table[4*left_xy[0] + 1 + (left_block[0]&~1)];
1102 else 1102 else
1103 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1; 1103 h->direct_cache[scan8[0] - 1 + 0*8]= MB_TYPE_16x16>>1;
1104 1104
1105 if(IS_DIRECT(left_type[1])) 1105 if(IS_DIRECT(left_type[1]))
1106 h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1; 1106 h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_DIRECT2>>1;
1107 else if(IS_8X8(left_type[1])) 1107 else if(IS_8X8(left_type[1]))
1108 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[h->mb2b8_xy[left_xy[1]] + 1 + h->b8_stride*(left_block[2]>>1)]; 1108 h->direct_cache[scan8[0] - 1 + 2*8]= h->direct_table[4*left_xy[1] + 1 + (left_block[2]&~1)];
1109 else 1109 else
1110 h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1; 1110 h->direct_cache[scan8[0] - 1 + 2*8]= MB_TYPE_16x16>>1;
1111 } 1111 }
1112 } 1112 }
1113 } 1113 }
1431 } 1431 }
1432 } 1432 }
1433 1433
1434 if(h->slice_type_nos == FF_B_TYPE && CABAC){ 1434 if(h->slice_type_nos == FF_B_TYPE && CABAC){
1435 if(IS_8X8(mb_type)){ 1435 if(IS_8X8(mb_type)){
1436 uint8_t *direct_table = &h->direct_table[b8_xy]; 1436 uint8_t *direct_table = &h->direct_table[4*h->mb_xy];
1437 direct_table[1+0*h->b8_stride] = h->sub_mb_type[1]>>1; 1437 direct_table[1] = h->sub_mb_type[1]>>1;
1438 direct_table[0+1*h->b8_stride] = h->sub_mb_type[2]>>1; 1438 direct_table[2] = h->sub_mb_type[2]>>1;
1439 direct_table[1+1*h->b8_stride] = h->sub_mb_type[3]>>1; 1439 direct_table[3] = h->sub_mb_type[3]>>1;
1440 } 1440 }
1441 } 1441 }
1442 } 1442 }
1443 1443
1444 static inline int get_dct8x8_allowed(H264Context *h){ 1444 static inline int get_dct8x8_allowed(H264Context *h){