comparison h264.c @ 11292:411ab09ada91 libavcodec

Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to ones based on mb_stride in h264. about 20 cpu cycles faster overall per MB
author michael
date Thu, 25 Feb 2010 23:44:42 +0000
parents 43a179c59c57
children b79f22337b6f
comparison
equal deleted inserted replaced
11291:1527e25ec9d4 11292:411ab09ada91
659 h->slice_table= NULL; 659 h->slice_table= NULL;
660 av_freep(&h->list_counts); 660 av_freep(&h->list_counts);
661 661
662 av_freep(&h->mb2b_xy); 662 av_freep(&h->mb2b_xy);
663 av_freep(&h->mb2br_xy); 663 av_freep(&h->mb2br_xy);
664 av_freep(&h->mb2b8_xy);
665 664
666 for(i = 0; i < MAX_THREADS; i++) { 665 for(i = 0; i < MAX_THREADS; i++) {
667 hx = h->thread_context[i]; 666 hx = h->thread_context[i];
668 if(!hx) continue; 667 if(!hx) continue;
669 av_freep(&hx->top_borders[1]); 668 av_freep(&hx->top_borders[1]);
762 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)); 761 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base));
763 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; 762 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1;
764 763
765 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail); 764 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail);
766 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail); 765 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail);
767 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail);
768 for(y=0; y<s->mb_height; y++){ 766 for(y=0; y<s->mb_height; y++){
769 for(x=0; x<s->mb_width; x++){ 767 for(x=0; x<s->mb_width; x++){
770 const int mb_xy= x + y*s->mb_stride; 768 const int mb_xy= x + y*s->mb_stride;
771 const int b_xy = 4*x + 4*y*h->b_stride; 769 const int b_xy = 4*x + 4*y*h->b_stride;
772 const int b8_xy= 2*x + 2*y*h->b8_stride;
773 770
774 h->mb2b_xy [mb_xy]= b_xy; 771 h->mb2b_xy [mb_xy]= b_xy;
775 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride))); 772 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride)));
776 h->mb2b8_xy[mb_xy]= b8_xy;
777 } 773 }
778 } 774 }
779 775
780 s->obmc_scratchpad = NULL; 776 s->obmc_scratchpad = NULL;
781 777
796 dst->non_zero_count = src->non_zero_count; 792 dst->non_zero_count = src->non_zero_count;
797 dst->slice_table = src->slice_table; 793 dst->slice_table = src->slice_table;
798 dst->cbp_table = src->cbp_table; 794 dst->cbp_table = src->cbp_table;
799 dst->mb2b_xy = src->mb2b_xy; 795 dst->mb2b_xy = src->mb2b_xy;
800 dst->mb2br_xy = src->mb2br_xy; 796 dst->mb2br_xy = src->mb2br_xy;
801 dst->mb2b8_xy = src->mb2b8_xy;
802 dst->chroma_pred_mode_table = src->chroma_pred_mode_table; 797 dst->chroma_pred_mode_table = src->chroma_pred_mode_table;
803 dst->mvd_table[0] = src->mvd_table[0]; 798 dst->mvd_table[0] = src->mvd_table[0];
804 dst->mvd_table[1] = src->mvd_table[1]; 799 dst->mvd_table[1] = src->mvd_table[1];
805 dst->direct_table = src->direct_table; 800 dst->direct_table = src->direct_table;
806 dst->list_counts = src->list_counts; 801 dst->list_counts = src->list_counts;
1766 1761
1767 s->mb_width= h->sps.mb_width; 1762 s->mb_width= h->sps.mb_width;
1768 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); 1763 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag);
1769 1764
1770 h->b_stride= s->mb_width*4; 1765 h->b_stride= s->mb_width*4;
1771 h->b8_stride= s->mb_width*2;
1772 1766
1773 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7); 1767 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7);
1774 if(h->sps.frame_mbs_only_flag) 1768 if(h->sps.frame_mbs_only_flag)
1775 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7); 1769 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7);
1776 else 1770 else