comparison h264.h @ 11183:d1a855cb0a0c libavcodec

Split setting neighboring MBs from fill_decode_caches() no speed change.
author michael
date Mon, 15 Feb 2010 22:07:02 +0000
parents c163ffa8c59e
children 5e583a785508
comparison
equal deleted inserted replaced
11182:0c93bb2b3cb0 11183:d1a855cb0a0c
280 280
281 //prediction stuff 281 //prediction stuff
282 int chroma_pred_mode; 282 int chroma_pred_mode;
283 int intra16x16_pred_mode; 283 int intra16x16_pred_mode;
284 284
285 int topleft_mb_xy;
285 int top_mb_xy; 286 int top_mb_xy;
287 int topright_mb_xy;
286 int left_mb_xy[2]; 288 int left_mb_xy[2];
287 289
290 int topleft_type;
288 int top_type; 291 int top_type;
292 int topright_type;
289 int left_type[2]; 293 int left_type[2];
294
295 const uint8_t * left_block;
296 int topleft_partition;
290 297
291 int8_t intra4x4_pred_mode_cache[5*8]; 298 int8_t intra4x4_pred_mode_cache[5*8];
292 int8_t (*intra4x4_pred_mode)[8]; 299 int8_t (*intra4x4_pred_mode)[8];
293 H264PredContext hpc; 300 H264PredContext hpc;
294 unsigned int topleft_samples_available; 301 unsigned int topleft_samples_available;
732 return h->pps.chroma_qp_table[t][qscale]; 739 return h->pps.chroma_qp_table[t][qscale];
733 } 740 }
734 741
735 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my); 742 static inline void pred_pskip_motion(H264Context * const h, int * const mx, int * const my);
736 743
737 static void fill_decode_caches(H264Context *h, int mb_type){ 744 static void fill_decode_neighbors(H264Context *h, int mb_type){
738 MpegEncContext * const s = &h->s; 745 MpegEncContext * const s = &h->s;
739 const int mb_xy= h->mb_xy; 746 const int mb_xy= h->mb_xy;
740 int topleft_xy, top_xy, topright_xy, left_xy[2]; 747 int topleft_xy, top_xy, topright_xy, left_xy[2];
741 int topleft_type, top_type, topright_type, left_type[2];
742 const uint8_t * left_block;
743 int topleft_partition= -1;
744 int i;
745 static const uint8_t left_block_options[4][16]={ 748 static const uint8_t left_block_options[4][16]={
746 {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8}, 749 {0,1,2,3,7,10,8,11,7+0*8, 7+1*8, 7+2*8, 7+3*8, 2+0*8, 2+3*8, 2+1*8, 2+2*8},
747 {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8}, 750 {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},
748 {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}, 751 {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8},
749 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8} 752 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
750 }; 753 };
751 754
755 h->topleft_partition= -1;
756
752 top_xy = mb_xy - (s->mb_stride << MB_FIELD); 757 top_xy = mb_xy - (s->mb_stride << MB_FIELD);
753 758
754 /* Wow, what a mess, why didn't they simplify the interlacing & intra 759 /* Wow, what a mess, why didn't they simplify the interlacing & intra
755 * stuff, I can't imagine that these complex rules are worth it. */ 760 * stuff, I can't imagine that these complex rules are worth it. */
756 761
757 topleft_xy = top_xy - 1; 762 topleft_xy = top_xy - 1;
758 topright_xy= top_xy + 1; 763 topright_xy= top_xy + 1;
759 left_xy[1] = left_xy[0] = mb_xy-1; 764 left_xy[1] = left_xy[0] = mb_xy-1;
760 left_block = left_block_options[0]; 765 h->left_block = left_block_options[0];
761 if(FRAME_MBAFF){ 766 if(FRAME_MBAFF){
762 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]); 767 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
763 const int curr_mb_field_flag = IS_INTERLACED(mb_type); 768 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
764 if(s->mb_y&1){ 769 if(s->mb_y&1){
765 if (left_mb_field_flag != curr_mb_field_flag) { 770 if (left_mb_field_flag != curr_mb_field_flag) {
766 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1; 771 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1;
767 if (curr_mb_field_flag) { 772 if (curr_mb_field_flag) {
768 left_xy[1] += s->mb_stride; 773 left_xy[1] += s->mb_stride;
769 left_block = left_block_options[3]; 774 h->left_block = left_block_options[3];
770 } else { 775 } else {
771 topleft_xy += s->mb_stride; 776 topleft_xy += s->mb_stride;
772 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition 777 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
773 topleft_partition = 0; 778 h->topleft_partition = 0;
774 left_block = left_block_options[1]; 779 h->left_block = left_block_options[1];
775 } 780 }
776 } 781 }
777 }else{ 782 }else{
778 if(curr_mb_field_flag){ 783 if(curr_mb_field_flag){
779 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy - 1]>>7)&1)-1); 784 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy - 1]>>7)&1)-1);
782 } 787 }
783 if (left_mb_field_flag != curr_mb_field_flag) { 788 if (left_mb_field_flag != curr_mb_field_flag) {
784 left_xy[1] = left_xy[0] = mb_xy - 1; 789 left_xy[1] = left_xy[0] = mb_xy - 1;
785 if (curr_mb_field_flag) { 790 if (curr_mb_field_flag) {
786 left_xy[1] += s->mb_stride; 791 left_xy[1] += s->mb_stride;
787 left_block = left_block_options[3]; 792 h->left_block = left_block_options[3];
788 } else { 793 } else {
789 left_block = left_block_options[2]; 794 h->left_block = left_block_options[2];
790 } 795 }
791 } 796 }
792 } 797 }
793 } 798 }
794 799
795 h->top_mb_xy = top_xy; 800 h->topleft_mb_xy = topleft_xy;
801 h->top_mb_xy = top_xy;
802 h->topright_mb_xy= topright_xy;
796 h->left_mb_xy[0] = left_xy[0]; 803 h->left_mb_xy[0] = left_xy[0];
797 h->left_mb_xy[1] = left_xy[1]; 804 h->left_mb_xy[1] = left_xy[1];
798 topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0; 805 //FIXME do we need all in the context?
799 top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0; 806 h->topleft_type = h->slice_table[topleft_xy ] == h->slice_num ? s->current_picture.mb_type[topleft_xy] : 0;
800 topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0; 807 h->top_type = h->slice_table[top_xy ] == h->slice_num ? s->current_picture.mb_type[top_xy] : 0;
801 left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0; 808 h->topright_type= h->slice_table[topright_xy] == h->slice_num ? s->current_picture.mb_type[topright_xy]: 0;
802 left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0; 809 h->left_type[0] = h->slice_table[left_xy[0] ] == h->slice_num ? s->current_picture.mb_type[left_xy[0]] : 0;
810 h->left_type[1] = h->slice_table[left_xy[1] ] == h->slice_num ? s->current_picture.mb_type[left_xy[1]] : 0;
811 }
812
813 static void fill_decode_caches(H264Context *h, int mb_type){
814 MpegEncContext * const s = &h->s;
815 const int mb_xy= h->mb_xy;
816 int topleft_xy, top_xy, topright_xy, left_xy[2];
817 int topleft_type, top_type, topright_type, left_type[2];
818 const uint8_t * left_block= h->left_block;
819 int i;
820
821 topleft_xy = h->topleft_mb_xy ;
822 top_xy = h->top_mb_xy ;
823 topright_xy = h->topright_mb_xy;
824 left_xy[0] = h->left_mb_xy[0] ;
825 left_xy[1] = h->left_mb_xy[1] ;
826 topleft_type = h->topleft_type ;
827 top_type = h->top_type ;
828 topright_type= h->topright_type ;
829 left_type[0] = h->left_type[0] ;
830 left_type[1] = h->left_type[1] ;
803 831
804 if(!IS_SKIP(mb_type)){ 832 if(!IS_SKIP(mb_type)){
805 if(IS_INTRA(mb_type)){ 833 if(IS_INTRA(mb_type)){
806 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1; 834 int type_mask= h->pps.constrained_intra_pred ? IS_INTRA(-1) : -1;
807 h->topleft_samples_available= 835 h->topleft_samples_available=
993 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE; 1021 h->ref_cache[list][cache_idx+8]= (left_type[i]) ? LIST_NOT_USED : PART_NOT_AVAILABLE;
994 } 1022 }
995 } 1023 }
996 1024
997 if(USES_LIST(topleft_type, list)){ 1025 if(USES_LIST(topleft_type, list)){
998 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + h->b_stride + (topleft_partition & 2*h->b_stride); 1026 const int b_xy = h->mb2b_xy [topleft_xy] + 3 + h->b_stride + (h->topleft_partition & 2*h->b_stride);
999 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (topleft_partition & h->b8_stride); 1027 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + (h->topleft_partition & h->b8_stride);
1000 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; 1028 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
1001 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; 1029 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
1002 }else{ 1030 }else{
1003 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; 1031 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
1004 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; 1032 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
1450 1478
1451 if( h->slice_type_nos == FF_B_TYPE ) 1479 if( h->slice_type_nos == FF_B_TYPE )
1452 { 1480 {
1453 // just for fill_caches. pred_direct_motion will set the real mb_type 1481 // just for fill_caches. pred_direct_motion will set the real mb_type
1454 mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP; 1482 mb_type|= MB_TYPE_L0L1|MB_TYPE_DIRECT2|MB_TYPE_SKIP;
1455 if(h->direct_spatial_mv_pred) 1483 if(h->direct_spatial_mv_pred){
1484 fill_decode_neighbors(h, mb_type);
1456 fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... 1485 fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
1486 }
1457 ff_h264_pred_direct_motion(h, &mb_type); 1487 ff_h264_pred_direct_motion(h, &mb_type);
1458 mb_type|= MB_TYPE_SKIP; 1488 mb_type|= MB_TYPE_SKIP;
1459 } 1489 }
1460 else 1490 else
1461 { 1491 {
1462 int mx, my; 1492 int mx, my;
1463 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP; 1493 mb_type|= MB_TYPE_16x16|MB_TYPE_P0L0|MB_TYPE_P1L0|MB_TYPE_SKIP;
1464 1494
1495 fill_decode_neighbors(h, mb_type);
1465 fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ... 1496 fill_decode_caches(h, mb_type); //FIXME check what is needed and what not ...
1466 pred_pskip_motion(h, &mx, &my); 1497 pred_pskip_motion(h, &mx, &my);
1467 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1); 1498 fill_rectangle(&h->ref_cache[0][scan8[0]], 4, 4, 8, 0, 1);
1468 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4); 1499 fill_rectangle( h->mv_cache[0][scan8[0]], 4, 4, 8, pack16to32(mx,my), 4);
1469 } 1500 }