Mercurial > libavcodec.hg
comparison h264.c @ 3212:5fb704618ec4 libavcodec
1.5x faster write_back_motion, 1-3% overall
author | lorenm |
---|---|
date | Wed, 22 Mar 2006 12:41:02 +0000 |
parents | 15157293beea |
children | 06f98047ff26 |
comparison
equal
deleted
inserted
replaced
3211:b77b5e7072d6 | 3212:5fb704618ec4 |
---|---|
713 }*/ | 713 }*/ |
714 continue; | 714 continue; |
715 } | 715 } |
716 h->mv_cache_clean[list]= 0; | 716 h->mv_cache_clean[list]= 0; |
717 | 717 |
718 if(IS_INTER(top_type)){ | 718 if(USES_LIST(top_type, list)){ |
719 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | 719 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
720 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; | 720 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; |
721 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; | 721 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; |
722 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1]; | 722 *(uint32_t*)h->mv_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 1]; |
723 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2]; | 723 *(uint32_t*)h->mv_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 2]; |
733 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0; | 733 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0; |
734 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; | 734 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; |
735 } | 735 } |
736 | 736 |
737 //FIXME unify cleanup or sth | 737 //FIXME unify cleanup or sth |
738 if(IS_INTER(left_type[0])){ | 738 if(USES_LIST(left_type[0], list)){ |
739 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; | 739 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
740 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; | 740 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; |
741 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]; | 741 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]; |
742 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]]; | 742 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[1]]; |
743 h->ref_cache[list][scan8[0] - 1 + 0*8]= | 743 h->ref_cache[list][scan8[0] - 1 + 0*8]= |
747 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0; | 747 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 1*8]= 0; |
748 h->ref_cache[list][scan8[0] - 1 + 0*8]= | 748 h->ref_cache[list][scan8[0] - 1 + 0*8]= |
749 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; | 749 h->ref_cache[list][scan8[0] - 1 + 1*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
750 } | 750 } |
751 | 751 |
752 if(IS_INTER(left_type[1])){ | 752 if(USES_LIST(left_type[1], list)){ |
753 const int b_xy= h->mb2b_xy[left_xy[1]] + 3; | 753 const int b_xy= h->mb2b_xy[left_xy[1]] + 3; |
754 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1; | 754 const int b8_xy= h->mb2b8_xy[left_xy[1]] + 1; |
755 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]]; | 755 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[2]]; |
756 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]]; | 756 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[3]]; |
757 h->ref_cache[list][scan8[0] - 1 + 2*8]= | 757 h->ref_cache[list][scan8[0] - 1 + 2*8]= |
765 } | 765 } |
766 | 766 |
767 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) | 767 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) |
768 continue; | 768 continue; |
769 | 769 |
770 if(IS_INTER(topleft_type)){ | 770 if(USES_LIST(topleft_type, list)){ |
771 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; | 771 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; |
772 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride; | 772 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride; |
773 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; | 773 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; |
774 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; | 774 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
775 }else{ | 775 }else{ |
776 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; | 776 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; |
777 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; | 777 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
778 } | 778 } |
779 | 779 |
780 if(IS_INTER(topright_type)){ | 780 if(USES_LIST(topright_type, list)){ |
781 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; | 781 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; |
782 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; | 782 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; |
783 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; | 783 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; |
784 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; | 784 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; |
785 }else{ | 785 }else{ |
799 *(uint32_t*)h->mv_cache [list][scan8[4 ]]= | 799 *(uint32_t*)h->mv_cache [list][scan8[4 ]]= |
800 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; | 800 *(uint32_t*)h->mv_cache [list][scan8[12]]= 0; |
801 | 801 |
802 if( h->pps.cabac ) { | 802 if( h->pps.cabac ) { |
803 /* XXX beurk, Load mvd */ | 803 /* XXX beurk, Load mvd */ |
804 if(IS_INTER(topleft_type)){ | 804 if(USES_LIST(topleft_type, list)){ |
805 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; | 805 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; |
806 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy]; | 806 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy]; |
807 }else{ | 807 }else{ |
808 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0; | 808 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 - 1*8]= 0; |
809 } | 809 } |
810 | 810 |
811 if(IS_INTER(top_type)){ | 811 if(USES_LIST(top_type, list)){ |
812 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | 812 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
813 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0]; | 813 *(uint32_t*)h->mvd_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 0]; |
814 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1]; | 814 *(uint32_t*)h->mvd_cache[list][scan8[0] + 1 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 1]; |
815 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2]; | 815 *(uint32_t*)h->mvd_cache[list][scan8[0] + 2 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 2]; |
816 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3]; | 816 *(uint32_t*)h->mvd_cache[list][scan8[0] + 3 - 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + 3]; |
818 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]= | 818 *(uint32_t*)h->mvd_cache [list][scan8[0] + 0 - 1*8]= |
819 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]= | 819 *(uint32_t*)h->mvd_cache [list][scan8[0] + 1 - 1*8]= |
820 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]= | 820 *(uint32_t*)h->mvd_cache [list][scan8[0] + 2 - 1*8]= |
821 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0; | 821 *(uint32_t*)h->mvd_cache [list][scan8[0] + 3 - 1*8]= 0; |
822 } | 822 } |
823 if(IS_INTER(left_type[0])){ | 823 if(USES_LIST(left_type[0], list)){ |
824 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; | 824 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
825 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; | 825 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[0]]; |
826 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; | 826 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 1*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[1]]; |
827 }else{ | 827 }else{ |
828 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= | 828 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 0*8]= |
829 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; | 829 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 1*8]= 0; |
830 } | 830 } |
831 if(IS_INTER(left_type[1])){ | 831 if(USES_LIST(left_type[1], list)){ |
832 const int b_xy= h->mb2b_xy[left_xy[1]] + 3; | 832 const int b_xy= h->mb2b_xy[left_xy[1]] + 3; |
833 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; | 833 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 2*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[2]]; |
834 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; | 834 *(uint32_t*)h->mvd_cache[list][scan8[0] - 1 + 3*8]= *(uint32_t*)h->mvd_table[list][b_xy + h->b_stride*left_block[3]]; |
835 }else{ | 835 }else{ |
836 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= | 836 *(uint32_t*)h->mvd_cache [list][scan8[0] - 1 + 2*8]= |
1422 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; | 1422 const int b8_xy= 2*s->mb_x + 2*s->mb_y*h->b8_stride; |
1423 int list; | 1423 int list; |
1424 | 1424 |
1425 for(list=0; list<2; list++){ | 1425 for(list=0; list<2; list++){ |
1426 int y; | 1426 int y; |
1427 if(!USES_LIST(mb_type, list)){ | 1427 if(!USES_LIST(mb_type, list)) |
1428 if(1){ //FIXME skip or never read if mb_type doesn't use it | |
1429 for(y=0; y<4; y++){ | |
1430 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= | |
1431 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= 0; | |
1432 } | |
1433 if( h->pps.cabac ) { | |
1434 /* FIXME needed ? */ | |
1435 for(y=0; y<4; y++){ | |
1436 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= | |
1437 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= 0; | |
1438 } | |
1439 } | |
1440 for(y=0; y<2; y++){ | |
1441 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= | |
1442 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= LIST_NOT_USED; | |
1443 } | |
1444 } | |
1445 continue; | 1428 continue; |
1446 } | |
1447 | 1429 |
1448 for(y=0; y<4; y++){ | 1430 for(y=0; y<4; y++){ |
1449 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y]; | 1431 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+0 + 8*y]; |
1450 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y]; | 1432 *(uint64_t*)s->current_picture.motion_val[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mv_cache[list][scan8[0]+2 + 8*y]; |
1451 } | 1433 } |
1453 for(y=0; y<4; y++){ | 1435 for(y=0; y<4; y++){ |
1454 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y]; | 1436 *(uint64_t*)h->mvd_table[list][b_xy + 0 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+0 + 8*y]; |
1455 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y]; | 1437 *(uint64_t*)h->mvd_table[list][b_xy + 2 + y*h->b_stride]= *(uint64_t*)h->mvd_cache[list][scan8[0]+2 + 8*y]; |
1456 } | 1438 } |
1457 } | 1439 } |
1458 for(y=0; y<2; y++){ | 1440 |
1459 s->current_picture.ref_index[list][b8_xy + 0 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+0 + 16*y]; | 1441 { |
1460 s->current_picture.ref_index[list][b8_xy + 1 + y*h->b8_stride]= h->ref_cache[list][scan8[0]+2 + 16*y]; | 1442 uint8_t *ref_index = &s->current_picture.ref_index[list][b8_xy]; |
1443 ref_index[0+0*h->b8_stride]= h->ref_cache[list][scan8[0]]; | |
1444 ref_index[1+0*h->b8_stride]= h->ref_cache[list][scan8[4]]; | |
1445 ref_index[0+1*h->b8_stride]= h->ref_cache[list][scan8[8]]; | |
1446 ref_index[1+1*h->b8_stride]= h->ref_cache[list][scan8[12]]; | |
1461 } | 1447 } |
1462 } | 1448 } |
1463 | 1449 |
1464 if(h->slice_type == B_TYPE && h->pps.cabac){ | 1450 if(h->slice_type == B_TYPE && h->pps.cabac){ |
1465 if(IS_8X8(mb_type)){ | 1451 if(IS_8X8(mb_type)){ |
1466 h->direct_table[b8_xy+1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; | 1452 uint8_t *direct_table = &h->direct_table[b8_xy]; |
1467 h->direct_table[b8_xy+0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; | 1453 direct_table[1+0*h->b8_stride] = IS_DIRECT(h->sub_mb_type[1]) ? 1 : 0; |
1468 h->direct_table[b8_xy+1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; | 1454 direct_table[0+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[2]) ? 1 : 0; |
1455 direct_table[1+1*h->b8_stride] = IS_DIRECT(h->sub_mb_type[3]) ? 1 : 0; | |
1469 } | 1456 } |
1470 } | 1457 } |
1471 } | 1458 } |
1472 | 1459 |
1473 /** | 1460 /** |