comparison h264.h @ 11008:ec192d9ebac5 libavcodec

Optimize mb neighbor initialization for MBAFF in fill_caches(). ~10 cpu cycles speedup.
author michael
date Tue, 26 Jan 2010 10:35:36 +0000
parents 1c8892d7a090
children 3d8426123947
comparison
equal deleted inserted replaced
11007:8d4ae55fdada 11008:ec192d9ebac5
739 {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8}, 739 {2,2,3,3,8,11,8,11,7+2*8, 7+2*8, 7+3*8, 7+3*8, 2+1*8, 2+2*8, 2+1*8, 2+2*8},
740 {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}, 740 {0,0,1,1,7,10,7,10,7+0*8, 7+0*8, 7+1*8, 7+1*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8},
741 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8} 741 {0,2,0,2,7,10,7,10,7+0*8, 7+2*8, 7+0*8, 7+2*8, 2+0*8, 2+3*8, 2+0*8, 2+3*8}
742 }; 742 };
743 743
744 top_xy = mb_xy - (s->mb_stride << FIELD_PICTURE); 744 top_xy = mb_xy - (s->mb_stride << MB_FIELD);
745 745
746 //FIXME deblocking could skip the intra and nnz parts. 746 //FIXME deblocking could skip the intra and nnz parts.
747 // if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF) 747 // if(for_deblock && (h->slice_num == 1 || h->slice_table[mb_xy] == h->slice_table[top_xy]) && !FRAME_MBAFF)
748 // return; 748 // return;
749 749
753 topleft_xy = top_xy - 1; 753 topleft_xy = top_xy - 1;
754 topright_xy= top_xy + 1; 754 topright_xy= top_xy + 1;
755 left_xy[1] = left_xy[0] = mb_xy-1; 755 left_xy[1] = left_xy[0] = mb_xy-1;
756 left_block = left_block_options[0]; 756 left_block = left_block_options[0];
757 if(FRAME_MBAFF){ 757 if(FRAME_MBAFF){
758 const int pair_xy = s->mb_x + (s->mb_y & ~1)*s->mb_stride; 758 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[mb_xy-1]);
759 const int top_pair_xy = pair_xy - s->mb_stride;
760 const int topleft_pair_xy = top_pair_xy - 1;
761 const int topright_pair_xy = top_pair_xy + 1;
762 const int topleft_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topleft_pair_xy]);
763 const int top_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[top_pair_xy]);
764 const int topright_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[topright_pair_xy]);
765 const int left_mb_field_flag = IS_INTERLACED(s->current_picture.mb_type[pair_xy-1]);
766 const int curr_mb_field_flag = IS_INTERLACED(mb_type); 759 const int curr_mb_field_flag = IS_INTERLACED(mb_type);
767 const int bottom = (s->mb_y & 1); 760 if(s->mb_y&1){
768 tprintf(s->avctx, "fill_caches: curr_mb_field_flag:%d, left_mb_field_flag:%d, topleft_mb_field_flag:%d, top_mb_field_flag:%d, topright_mb_field_flag:%d\n", curr_mb_field_flag, left_mb_field_flag, topleft_mb_field_flag, top_mb_field_flag, topright_mb_field_flag); 761 if (left_mb_field_flag != curr_mb_field_flag) {
769 762 left_xy[1] = left_xy[0] = mb_xy - s->mb_stride - 1;
770 if (curr_mb_field_flag && (bottom || top_mb_field_flag)){ 763 if (curr_mb_field_flag) {
771 top_xy -= s->mb_stride; 764 left_xy[1] += s->mb_stride;
772 } 765 left_block = left_block_options[3];
773 if (curr_mb_field_flag && (bottom || topleft_mb_field_flag)){ 766 } else {
774 topleft_xy -= s->mb_stride; 767 topleft_xy += s->mb_stride;
775 } else if(bottom && !curr_mb_field_flag && left_mb_field_flag) { 768 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition
776 topleft_xy += s->mb_stride; 769 topleft_partition = 0;
777 // take top left mv from the middle of the mb, as opposed to all other modes which use the bottom right partition 770 left_block = left_block_options[1];
778 topleft_partition = 0; 771 }
779 } 772 }
780 if (curr_mb_field_flag && (bottom || topright_mb_field_flag)){ 773 }else{
781 topright_xy -= s->mb_stride; 774 if(curr_mb_field_flag){
782 } 775 topleft_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy - 1]>>7)&1)-1);
783 if (left_mb_field_flag != curr_mb_field_flag) { 776 topright_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy + 1]>>7)&1)-1);
784 left_xy[1] = left_xy[0] = pair_xy - 1; 777 top_xy += s->mb_stride & (((s->current_picture.mb_type[top_xy ]>>7)&1)-1);
785 if (curr_mb_field_flag) { 778 }
786 left_xy[1] += s->mb_stride; 779 if (left_mb_field_flag != curr_mb_field_flag) {
787 left_block = left_block_options[3]; 780 left_xy[1] = left_xy[0] = mb_xy - 1;
788 } else { 781 if (curr_mb_field_flag) {
789 left_block= left_block_options[2 - bottom]; 782 left_xy[1] += s->mb_stride;
783 left_block = left_block_options[3];
784 } else {
785 left_block = left_block_options[2];
786 }
790 } 787 }
791 } 788 }
792 } 789 }
793 790
794 h->top_mb_xy = top_xy; 791 h->top_mb_xy = top_xy;