comparison h264.c @ 2623:aaf8e94bce00 libavcodec

deblocking filter doesn't need to call fill_caches again. 1.4% faster decoding.
author lorenm
date Thu, 21 Apr 2005 20:38:42 +0000
parents b5b09255f7c3
children 511e3afc43e1
comparison
equal deleted inserted replaced
2622:943c38487346 2623:aaf8e94bce00
426 int topleft_xy, top_xy, topright_xy, left_xy[2]; 426 int topleft_xy, top_xy, topright_xy, left_xy[2];
427 int topleft_type, top_type, topright_type, left_type[2]; 427 int topleft_type, top_type, topright_type, left_type[2];
428 int left_block[8]; 428 int left_block[8];
429 int i; 429 int i;
430 430
431 //FIXME deblocking can skip fill_caches much of the time with multiple slices too.
432 // the actual condition is whether we're on the edge of a slice,
433 // and even then the intra and nnz parts are unnecessary.
434 if(for_deblock && h->slice_num == 1)
435 return;
436
431 //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it 437 //wow what a mess, why didnt they simplify the interlacing&intra stuff, i cant imagine that these complex rules are worth it
432 438
433 top_xy = mb_xy - s->mb_stride; 439 top_xy = mb_xy - s->mb_stride;
434 topleft_xy = top_xy - 1; 440 topleft_xy = top_xy - 1;
435 topright_xy= top_xy + 1; 441 topright_xy= top_xy + 1;
660 } 666 }
661 } 667 }
662 668
663 #if 1 669 #if 1
664 //FIXME direct mb can skip much of this 670 //FIXME direct mb can skip much of this
665 if(IS_INTER(mb_type) || (IS_DIRECT(mb_type) && h->direct_spatial_mv_pred)){ 671 if(IS_INTER(mb_type) || IS_DIRECT(mb_type)){
666 int list; 672 int list;
667 for(list=0; list<2; list++){ 673 for(list=0; list<1+(h->slice_type==B_TYPE); list++){
668 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !for_deblock){ 674 if(!USES_LIST(mb_type, list) && !IS_DIRECT(mb_type) && !h->deblocking_filter){
669 /*if(!h->mv_cache_clean[list]){ 675 /*if(!h->mv_cache_clean[list]){
670 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all? 676 memset(h->mv_cache [list], 0, 8*5*2*sizeof(int16_t)); //FIXME clean only input? clean at all?
671 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t)); 677 memset(h->ref_cache[list], PART_NOT_AVAILABLE, 8*5*sizeof(int8_t));
672 h->mv_cache_clean[list]= 1; 678 h->mv_cache_clean[list]= 1;
673 }*/ 679 }*/
674 continue; 680 continue;
675 } 681 }
676 h->mv_cache_clean[list]= 0; 682 h->mv_cache_clean[list]= 0;
677
678 if(IS_INTER(topleft_type)){
679 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
680 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
681 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
682 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
683 }else{
684 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
685 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
686 }
687 683
688 if(IS_INTER(top_type)){ 684 if(IS_INTER(top_type)){
689 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride; 685 const int b_xy= h->mb2b_xy[top_xy] + 3*h->b_stride;
690 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride; 686 const int b8_xy= h->mb2b8_xy[top_xy] + h->b8_stride;
691 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0]; 687 *(uint32_t*)h->mv_cache[list][scan8[0] + 0 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + 0];
702 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]= 698 *(uint32_t*)h->mv_cache [list][scan8[0] + 2 - 1*8]=
703 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0; 699 *(uint32_t*)h->mv_cache [list][scan8[0] + 3 - 1*8]= 0;
704 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101; 700 *(uint32_t*)&h->ref_cache[list][scan8[0] + 0 - 1*8]= ((top_type ? LIST_NOT_USED : PART_NOT_AVAILABLE)&0xFF)*0x01010101;
705 } 701 }
706 702
707 if(IS_INTER(topright_type)){
708 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
709 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
710 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
711 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
712 }else{
713 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
714 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
715 }
716
717 //FIXME unify cleanup or sth 703 //FIXME unify cleanup or sth
718 if(IS_INTER(left_type[0])){ 704 if(IS_INTER(left_type[0])){
719 const int b_xy= h->mb2b_xy[left_xy[0]] + 3; 705 const int b_xy= h->mb2b_xy[left_xy[0]] + 3;
720 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1; 706 const int b8_xy= h->mb2b8_xy[left_xy[0]] + 1;
721 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]]; 707 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 + 0*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy + h->b_stride*left_block[0]];
741 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0; 727 *(uint32_t*)h->mv_cache [list][scan8[0] - 1 + 3*8]= 0;
742 h->ref_cache[list][scan8[0] - 1 + 2*8]= 728 h->ref_cache[list][scan8[0] - 1 + 2*8]=
743 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE; 729 h->ref_cache[list][scan8[0] - 1 + 3*8]= left_type[0] ? LIST_NOT_USED : PART_NOT_AVAILABLE;
744 } 730 }
745 731
746 if(for_deblock) 732 if(for_deblock || (IS_DIRECT(mb_type) && !h->direct_spatial_mv_pred))
747 continue; 733 continue;
734
735 if(IS_INTER(topleft_type)){
736 const int b_xy = h->mb2b_xy[topleft_xy] + 3 + 3*h->b_stride;
737 const int b8_xy= h->mb2b8_xy[topleft_xy] + 1 + h->b8_stride;
738 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
739 h->ref_cache[list][scan8[0] - 1 - 1*8]= s->current_picture.ref_index[list][b8_xy];
740 }else{
741 *(uint32_t*)h->mv_cache[list][scan8[0] - 1 - 1*8]= 0;
742 h->ref_cache[list][scan8[0] - 1 - 1*8]= topleft_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
743 }
744
745 if(IS_INTER(topright_type)){
746 const int b_xy= h->mb2b_xy[topright_xy] + 3*h->b_stride;
747 const int b8_xy= h->mb2b8_xy[topright_xy] + h->b8_stride;
748 *(uint32_t*)h->mv_cache[list][scan8[0] + 4 - 1*8]= *(uint32_t*)s->current_picture.motion_val[list][b_xy];
749 h->ref_cache[list][scan8[0] + 4 - 1*8]= s->current_picture.ref_index[list][b8_xy];
750 }else{
751 *(uint32_t*)h->mv_cache [list][scan8[0] + 4 - 1*8]= 0;
752 h->ref_cache[list][scan8[0] + 4 - 1*8]= topright_type ? LIST_NOT_USED : PART_NOT_AVAILABLE;
753 }
754
748 755
749 h->ref_cache[list][scan8[5 ]+1] = 756 h->ref_cache[list][scan8[5 ]+1] =
750 h->ref_cache[list][scan8[7 ]+1] = 757 h->ref_cache[list][scan8[7 ]+1] =
751 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewher else) 758 h->ref_cache[list][scan8[13]+1] = //FIXME remove past 3 (init somewher else)
752 h->ref_cache[list][scan8[4 ]] = 759 h->ref_cache[list][scan8[4 ]] =