Mercurial > libavcodec.hg
comparison h264.c @ 11292:411ab09ada91 libavcodec
Get rid of mb2b8_xy and b8_stride, change arrays organized based on b8_stride to
ones based on mb_stride in h264.
about 20 cpu cycles faster overall per MB
author | michael |
---|---|
date | Thu, 25 Feb 2010 23:44:42 +0000 |
parents | 43a179c59c57 |
children | b79f22337b6f |
comparison
equal
deleted
inserted
replaced
11291:1527e25ec9d4 | 11292:411ab09ada91 |
---|---|
659 h->slice_table= NULL; | 659 h->slice_table= NULL; |
660 av_freep(&h->list_counts); | 660 av_freep(&h->list_counts); |
661 | 661 |
662 av_freep(&h->mb2b_xy); | 662 av_freep(&h->mb2b_xy); |
663 av_freep(&h->mb2br_xy); | 663 av_freep(&h->mb2br_xy); |
664 av_freep(&h->mb2b8_xy); | |
665 | 664 |
666 for(i = 0; i < MAX_THREADS; i++) { | 665 for(i = 0; i < MAX_THREADS; i++) { |
667 hx = h->thread_context[i]; | 666 hx = h->thread_context[i]; |
668 if(!hx) continue; | 667 if(!hx) continue; |
669 av_freep(&hx->top_borders[1]); | 668 av_freep(&hx->top_borders[1]); |
762 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)); | 761 memset(h->slice_table_base, -1, (big_mb_num+s->mb_stride) * sizeof(*h->slice_table_base)); |
763 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; | 762 h->slice_table= h->slice_table_base + s->mb_stride*2 + 1; |
764 | 763 |
765 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail); | 764 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b_xy , big_mb_num * sizeof(uint32_t), fail); |
766 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail); | 765 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2br_xy , big_mb_num * sizeof(uint32_t), fail); |
767 FF_ALLOCZ_OR_GOTO(h->s.avctx, h->mb2b8_xy , big_mb_num * sizeof(uint32_t), fail); | |
768 for(y=0; y<s->mb_height; y++){ | 766 for(y=0; y<s->mb_height; y++){ |
769 for(x=0; x<s->mb_width; x++){ | 767 for(x=0; x<s->mb_width; x++){ |
770 const int mb_xy= x + y*s->mb_stride; | 768 const int mb_xy= x + y*s->mb_stride; |
771 const int b_xy = 4*x + 4*y*h->b_stride; | 769 const int b_xy = 4*x + 4*y*h->b_stride; |
772 const int b8_xy= 2*x + 2*y*h->b8_stride; | |
773 | 770 |
774 h->mb2b_xy [mb_xy]= b_xy; | 771 h->mb2b_xy [mb_xy]= b_xy; |
775 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride))); | 772 h->mb2br_xy[mb_xy]= 8*(FMO ? mb_xy : (mb_xy % (2*s->mb_stride))); |
776 h->mb2b8_xy[mb_xy]= b8_xy; | |
777 } | 773 } |
778 } | 774 } |
779 | 775 |
780 s->obmc_scratchpad = NULL; | 776 s->obmc_scratchpad = NULL; |
781 | 777 |
796 dst->non_zero_count = src->non_zero_count; | 792 dst->non_zero_count = src->non_zero_count; |
797 dst->slice_table = src->slice_table; | 793 dst->slice_table = src->slice_table; |
798 dst->cbp_table = src->cbp_table; | 794 dst->cbp_table = src->cbp_table; |
799 dst->mb2b_xy = src->mb2b_xy; | 795 dst->mb2b_xy = src->mb2b_xy; |
800 dst->mb2br_xy = src->mb2br_xy; | 796 dst->mb2br_xy = src->mb2br_xy; |
801 dst->mb2b8_xy = src->mb2b8_xy; | |
802 dst->chroma_pred_mode_table = src->chroma_pred_mode_table; | 797 dst->chroma_pred_mode_table = src->chroma_pred_mode_table; |
803 dst->mvd_table[0] = src->mvd_table[0]; | 798 dst->mvd_table[0] = src->mvd_table[0]; |
804 dst->mvd_table[1] = src->mvd_table[1]; | 799 dst->mvd_table[1] = src->mvd_table[1]; |
805 dst->direct_table = src->direct_table; | 800 dst->direct_table = src->direct_table; |
806 dst->list_counts = src->list_counts; | 801 dst->list_counts = src->list_counts; |
1766 | 1761 |
1767 s->mb_width= h->sps.mb_width; | 1762 s->mb_width= h->sps.mb_width; |
1768 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); | 1763 s->mb_height= h->sps.mb_height * (2 - h->sps.frame_mbs_only_flag); |
1769 | 1764 |
1770 h->b_stride= s->mb_width*4; | 1765 h->b_stride= s->mb_width*4; |
1771 h->b8_stride= s->mb_width*2; | |
1772 | 1766 |
1773 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7); | 1767 s->width = 16*s->mb_width - 2*FFMIN(h->sps.crop_right, 7); |
1774 if(h->sps.frame_mbs_only_flag) | 1768 if(h->sps.frame_mbs_only_flag) |
1775 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7); | 1769 s->height= 16*s->mb_height - 2*FFMIN(h->sps.crop_bottom, 7); |
1776 else | 1770 else |