comparison h264_loopfilter.c @ 11037:dfc8f928cc8a libavcodec

Factor mv/ref compare code out. This is a hair slower (0.15% maybe) but i really dont want to have the identical code duplicated 3 times because gcc adds odd threaded jumps with register reshuffling and register safe/restore.
author michael
date Thu, 28 Jan 2010 10:10:02 +0000
parents faedf5cdb8c3
children ee6d89b26e7f
comparison
equal deleted inserted replaced
11036:faedf5cdb8c3 11037:dfc8f928cc8a
422 } 422 }
423 #undef FILTER 423 #undef FILTER
424 } 424 }
425 } 425 }
426 426
427 static int check_mv(H264Context *h, long b_idx, long bn_idx, int mvy_limit){
428 int l;
429 int v = 0;
430
431 for( l = 0; !v && l < h->list_count; l++ ) {
432 v |= h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] |
433 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U |
434 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
435 }
436
437 if(h->list_count==2 && v){
438 v=0;
439 for( l = 0; !v && l < 2; l++ ) {
440 int ln= 1-l;
441 v |= h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] |
442 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U |
443 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
444 }
445 }
446
447 return v;
448 }
427 449
428 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) { 450 static av_always_inline void filter_mb_dir(H264Context *h, int mb_x, int mb_y, uint8_t *img_y, uint8_t *img_cb, uint8_t *img_cr, unsigned int linesize, unsigned int uvlinesize, int mb_xy, int mb_type, int mvy_limit, int first_vertical_edge_done, int dir) {
429 MpegEncContext * const s = &h->s; 451 MpegEncContext * const s = &h->s;
430 int edge; 452 int edge;
431 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy; 453 const int mbm_xy = dir == 0 ? mb_xy -1 : h->top_mb_xy;
496 mv_done = 1; 518 mv_done = 1;
497 } 519 }
498 else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) { 520 else if( mask_par0 && ((mbm_type & (MB_TYPE_16x16 | (MB_TYPE_8x16 >> dir)))) ) {
499 int b_idx= 8 + 4; 521 int b_idx= 8 + 4;
500 int bn_idx= b_idx - (dir ? 8:1); 522 int bn_idx= b_idx - (dir ? 8:1);
501 int v = 0; 523
502 524 bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, 8 + 4, bn_idx, mvy_limit);
503 for( l = 0; !v && l < h->list_count; l++ ) {
504 v |= h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] |
505 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U |
506 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
507 }
508
509 if(h->list_count==2 && v){
510 v=0;
511 for( l = 0; !v && l < 2; l++ ) {
512 int ln= 1-l;
513 v |= h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] |
514 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U |
515 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
516 }
517 }
518
519 bS[0] = bS[1] = bS[2] = bS[3] = v;
520 mv_done = 1; 525 mv_done = 1;
521 } 526 }
522 else 527 else
523 mv_done = 0; 528 mv_done = 0;
524 529
532 h->non_zero_count_cache[bn_idx] ) { 537 h->non_zero_count_cache[bn_idx] ) {
533 bS[i] = 2; 538 bS[i] = 2;
534 } 539 }
535 else if(!mv_done) 540 else if(!mv_done)
536 { 541 {
537 bS[i] = 0; 542 bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit);
538 for( l = 0; l < h->list_count; l++ ) {
539 if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] |
540 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U |
541 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
542 bS[i] = 1;
543 break;
544 }
545 }
546
547 if(h->list_count == 2 && bS[i]){
548 bS[i] = 0;
549 for( l = 0; l < 2; l++ ) {
550 int ln= 1-l;
551 if( h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] |
552 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U |
553 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
554 bS[i] = 1;
555 break;
556 }
557 }
558 }
559 } 543 }
560 } 544 }
561 } 545 }
562 546
563 /* Filter edge */ 547 /* Filter edge */
610 mv_done = 1; 594 mv_done = 1;
611 } 595 }
612 else if( mask_par0 ) { 596 else if( mask_par0 ) {
613 int b_idx= 8 + 4 + edge * (dir ? 8:1); 597 int b_idx= 8 + 4 + edge * (dir ? 8:1);
614 int bn_idx= b_idx - (dir ? 8:1); 598 int bn_idx= b_idx - (dir ? 8:1);
615 int v = 0; 599
616 600 bS[0] = bS[1] = bS[2] = bS[3] = check_mv(h, b_idx, bn_idx, mvy_limit);
617 for( l = 0; !v && l < h->list_count; l++ ) {
618 v |= h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] |
619 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U |
620 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit;
621 }
622
623 if(h->list_count==2 && v){
624 v=0;
625 for( l = 0; !v && l < 2; l++ ) {
626 int ln= 1-l;
627 v |= h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] |
628 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U |
629 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit;
630 }
631 }
632
633 bS[0] = bS[1] = bS[2] = bS[3] = v;
634 mv_done = 1; 601 mv_done = 1;
635 } 602 }
636 else 603 else
637 mv_done = 0; 604 mv_done = 0;
638 605
646 h->non_zero_count_cache[bn_idx] ) { 613 h->non_zero_count_cache[bn_idx] ) {
647 bS[i] = 2; 614 bS[i] = 2;
648 } 615 }
649 else if(!mv_done) 616 else if(!mv_done)
650 { 617 {
651 bS[i] = 0; 618 bS[i] = check_mv(h, b_idx, bn_idx, mvy_limit);
652 for( l = 0; l < h->list_count; l++ ) {
653 if( h->ref_cache[l][b_idx] != h->ref_cache[l][bn_idx] |
654 h->mv_cache[l][b_idx][0] - h->mv_cache[l][bn_idx][0] + 3 >= 7U |
655 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[l][bn_idx][1] ) >= mvy_limit ) {
656 bS[i] = 1;
657 break;
658 }
659 }
660
661 if(h->list_count == 2 && bS[i]){
662 bS[i] = 0;
663 for( l = 0; l < 2; l++ ) {
664 int ln= 1-l;
665 if( h->ref_cache[l][b_idx] != h->ref_cache[ln][bn_idx] |
666 h->mv_cache[l][b_idx][0] - h->mv_cache[ln][bn_idx][0] + 3 >= 7U |
667 FFABS( h->mv_cache[l][b_idx][1] - h->mv_cache[ln][bn_idx][1] ) >= mvy_limit ) {
668 bS[i] = 1;
669 break;
670 }
671 }
672 }
673 } 619 }
674 } 620 }
675 621
676 if(bS[0]+bS[1]+bS[2]+bS[3] == 0) 622 if(bS[0]+bS[1]+bS[2]+bS[3] == 0)
677 continue; 623 continue;