Mercurial > libavcodec.hg
comparison h264.c @ 2623:aaf8e94bce00 libavcodec
deblocking filter doesn't need to call fill_caches again. 1.4% faster decoding.
author | lorenm |
---|---|
date | Thu, 21 Apr 2005 20:38:42 +0000 |
parents | b5b09255f7c3 |
children | 511e3afc43e1 |
comparison
equal
deleted
inserted
replaced
2622:943c38487346 | 2623:aaf8e94bce00 |
---|---|
426 int topleft_xy, top_xy, topright_xy, left_xy[2]; | 426 int topleft_xy, top_xy, topright_xy, left_xy[2]; |
427 int topleft_type, top_type, topright_type, left_type[2]; | 427 int topleft_type, top_type, topright_type, left_type[2]; |
428 int left_block[8]; | 428 int left_block[8]; |
429 int i; | 429 int i; |
430 | 430 |
431 //FIXME deblocking can skip fill_caches much of the time with multiple slices too. | |
432 // the actual condition is whether we're on the edge of a slice, | |
433 // and even then the intra and nnz parts are unnecessary. | |
434 if(for_deblock && h->slice_num == 1) | |
435 return; | |
436 | |
431 //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it | 437 //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it |
432 | 438 |
433 top_xy = mb_xy - s->mb_stride; | 439 top_xy = mb_xy - s->mb_stride; |
434 topleft_xy = top_xy - 1; | 440 topleft_xy = top_xy - 1; |
435 topright_xy= top_xy + 1; | 441 topright_xy= top_xy + 1; |
660 } | 666 } |
661 } | 667 } |
662 | 668 |
663 #if 1 | 669 #if 1 |
664 //FIXME direct mb can skip much of this | 670 //FIXME direct mb can skip much of this |
665 if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){ | 671 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){ |
666 int list; | 672 int list; |
667 for(list=0; list<2; list++){ | 673 for(list=0; list<1+(h->slice_type==B_TYPE); list++){ |
668 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !for_deblock){ | 674 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){ |
669 /*if(!h->mv_cache_clean[list]){ | 675 /*if(!h->mv_cache_clean[list]){ |
670 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? | 676 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? |
671 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); | 677 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); |
672 h->mv_cache_clean[list]= 1; | 678 h->mv_cache_clean[list]= 1; |
673 }*/ | 679 }*/ |
674 continue; | 680 continue; |
675 } | 681 } |
676 h->mv_cache_clean[list]= 0; | 682 h->mv_cache_clean[list]= 0; |
677 | |
678 if(IS_INTER(topleft_type)){ | |
679 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; | |
680 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride; | |
681 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; | |
682 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; | |
683 }else{ | |
684 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; | |
685 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; | |
686 } | |
687 | 683 |
688 if(IS_INTER(top_type)){ | 684 if(IS_INTER(top_type)){ |
689 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; | 685 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; |
690 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; | 686 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; |
691 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; | 687 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; |
702 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= | 698 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= |
703 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0; | 699 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0; |
704 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; | 700 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; |
705 } | 701 } |
706 | 702 |
707 if(IS_INTER(topright_type)){ | |
708 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; | |
709 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; | |
710 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; | |
711 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; | |
712 }else{ | |
713 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; | |
714 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; | |
715 } | |
716 | |
717 //FIXME unify cleanup or sth | 703 //FIXME unify cleanup or sth |
718 if(IS_INTER(left_type[0])){ | 704 if(IS_INTER(left_type[0])){ |
719 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; | 705 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; |
720 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; | 706 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; |
721 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]; | 707 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]; |
741 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0; | 727 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0; |
742 h->ref_cache[list][scan8[0] - 1 + 2*8]= | 728 h->ref_cache[list][scan8[0] - 1 + 2*8]= |
743 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; | 729 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; |
744 } | 730 } |
745 | 731 |
746 if(for_deblock) | 732 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred)) |
747 continue; | 733 continue; |
734 | |
735 if(IS_INTER(topleft_type)){ | |
736 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride; | |
737 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride; | |
738 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; | |
739 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy]; | |
740 }else{ | |
741 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0; | |
742 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; | |
743 } | |
744 | |
745 if(IS_INTER(topright_type)){ | |
746 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride; | |
747 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride; | |
748 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy]; | |
749 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy]; | |
750 }else{ | |
751 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0; | |
752 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE; | |
753 } | |
754 | |
748 | 755 |
749 h->ref_cache[list][scan8[5 ]+1] = | 756 h->ref_cache[list][scan8[5 ]+1] = |
750 h->ref_cache[list][scan8[7 ]+1] = | 757 h->ref_cache[list][scan8[7 ]+1] = |
751 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewher else) | 758 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewher else) |
752 h->ref_cache[list][scan8[4 ]] = | 759 h->ref_cache[list][scan8[4 ]] = |