Mercurial > libavcodec.hg
comparison apedec.c @ 10632:54982e4c4478 libavcodec
avoid an unpredictable branch
20% faster predictor_update_filter, .4-4% faster ape decoding on core2
author | lorenm |
---|---|
date | Thu, 03 Dec 2009 17:48:54 +0000 |
parents | 79f3ead3ebc1 |
children | 40ff9c7958da |
comparison
equal
deleted
inserted
replaced
10631:79f3ead3ebc1 | 10632:54982e4c4478 |
---|---|
517 return (x < 0) - (x > 0); | 517 return (x < 0) - (x > 0); |
518 } | 518 } |
519 | 519 |
520 static av_always_inline int predictor_update_filter(APEPredictor *p, const int decoded, const int filter, const int delayA, const int delayB, const int adaptA, const int adaptB) | 520 static av_always_inline int predictor_update_filter(APEPredictor *p, const int decoded, const int filter, const int delayA, const int delayB, const int adaptA, const int adaptB) |
521 { | 521 { |
522 int32_t predictionA, predictionB; | 522 int32_t predictionA, predictionB, sign; |
523 | 523 |
524 p->buf[delayA] = p->lastA[filter]; | 524 p->buf[delayA] = p->lastA[filter]; |
525 p->buf[adaptA] = APESIGN(p->buf[delayA]); | 525 p->buf[adaptA] = APESIGN(p->buf[delayA]); |
526 p->buf[delayA - 1] = p->buf[delayA] - p->buf[delayA - 1]; | 526 p->buf[delayA - 1] = p->buf[delayA] - p->buf[delayA - 1]; |
527 p->buf[adaptA - 1] = APESIGN(p->buf[delayA - 1]); | 527 p->buf[adaptA - 1] = APESIGN(p->buf[delayA - 1]); |
545 p->buf[delayB - 4] * p->coeffsB[filter][4]; | 545 p->buf[delayB - 4] * p->coeffsB[filter][4]; |
546 | 546 |
547 p->lastA[filter] = decoded + ((predictionA + (predictionB >> 1)) >> 10); | 547 p->lastA[filter] = decoded + ((predictionA + (predictionB >> 1)) >> 10); |
548 p->filterA[filter] = p->lastA[filter] + ((p->filterA[filter] * 31) >> 5); | 548 p->filterA[filter] = p->lastA[filter] + ((p->filterA[filter] * 31) >> 5); |
549 | 549 |
550 if (!decoded) // no need updating filter coefficients | 550 sign = APESIGN(decoded); |
551 return p->filterA[filter]; | 551 p->coeffsA[filter][0] += p->buf[adaptA ] * sign; |
552 | 552 p->coeffsA[filter][1] += p->buf[adaptA - 1] * sign; |
553 if (decoded > 0) { | 553 p->coeffsA[filter][2] += p->buf[adaptA - 2] * sign; |
554 p->coeffsA[filter][0] -= p->buf[adaptA ]; | 554 p->coeffsA[filter][3] += p->buf[adaptA - 3] * sign; |
555 p->coeffsA[filter][1] -= p->buf[adaptA - 1]; | 555 p->coeffsB[filter][0] += p->buf[adaptB ] * sign; |
556 p->coeffsA[filter][2] -= p->buf[adaptA - 2]; | 556 p->coeffsB[filter][1] += p->buf[adaptB - 1] * sign; |
557 p->coeffsA[filter][3] -= p->buf[adaptA - 3]; | 557 p->coeffsB[filter][2] += p->buf[adaptB - 2] * sign; |
558 | 558 p->coeffsB[filter][3] += p->buf[adaptB - 3] * sign; |
559 p->coeffsB[filter][0] -= p->buf[adaptB ]; | 559 p->coeffsB[filter][4] += p->buf[adaptB - 4] * sign; |
560 p->coeffsB[filter][1] -= p->buf[adaptB - 1]; | 560 |
561 p->coeffsB[filter][2] -= p->buf[adaptB - 2]; | |
562 p->coeffsB[filter][3] -= p->buf[adaptB - 3]; | |
563 p->coeffsB[filter][4] -= p->buf[adaptB - 4]; | |
564 } else { | |
565 p->coeffsA[filter][0] += p->buf[adaptA ]; | |
566 p->coeffsA[filter][1] += p->buf[adaptA - 1]; | |
567 p->coeffsA[filter][2] += p->buf[adaptA - 2]; | |
568 p->coeffsA[filter][3] += p->buf[adaptA - 3]; | |
569 | |
570 p->coeffsB[filter][0] += p->buf[adaptB ]; | |
571 p->coeffsB[filter][1] += p->buf[adaptB - 1]; | |
572 p->coeffsB[filter][2] += p->buf[adaptB - 2]; | |
573 p->coeffsB[filter][3] += p->buf[adaptB - 3]; | |
574 p->coeffsB[filter][4] += p->buf[adaptB - 4]; | |
575 } | |
576 return p->filterA[filter]; | 561 return p->filterA[filter]; |
577 } | 562 } |
578 | 563 |
579 static void predictor_decode_stereo(APEContext * ctx, int count) | 564 static void predictor_decode_stereo(APEContext * ctx, int count) |
580 { | 565 { |
602 | 587 |
603 static void predictor_decode_mono(APEContext * ctx, int count) | 588 static void predictor_decode_mono(APEContext * ctx, int count) |
604 { | 589 { |
605 APEPredictor *p = &ctx->predictor; | 590 APEPredictor *p = &ctx->predictor; |
606 int32_t *decoded0 = ctx->decoded0; | 591 int32_t *decoded0 = ctx->decoded0; |
607 int32_t predictionA, currentA, A; | 592 int32_t predictionA, currentA, A, sign; |
608 | 593 |
609 currentA = p->lastA[0]; | 594 currentA = p->lastA[0]; |
610 | 595 |
611 while (count--) { | 596 while (count--) { |
612 A = *decoded0; | 597 A = *decoded0; |
622 currentA = A + (predictionA >> 10); | 607 currentA = A + (predictionA >> 10); |
623 | 608 |
624 p->buf[YADAPTCOEFFSA] = APESIGN(p->buf[YDELAYA ]); | 609 p->buf[YADAPTCOEFFSA] = APESIGN(p->buf[YDELAYA ]); |
625 p->buf[YADAPTCOEFFSA - 1] = APESIGN(p->buf[YDELAYA - 1]); | 610 p->buf[YADAPTCOEFFSA - 1] = APESIGN(p->buf[YDELAYA - 1]); |
626 | 611 |
627 if (A > 0) { | 612 sign = APESIGN(A); |
628 p->coeffsA[0][0] -= p->buf[YADAPTCOEFFSA ]; | 613 p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA ] * sign; |
629 p->coeffsA[0][1] -= p->buf[YADAPTCOEFFSA - 1]; | 614 p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1] * sign; |
630 p->coeffsA[0][2] -= p->buf[YADAPTCOEFFSA - 2]; | 615 p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2] * sign; |
631 p->coeffsA[0][3] -= p->buf[YADAPTCOEFFSA - 3]; | 616 p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3] * sign; |
632 } else if (A < 0) { | |
633 p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA ]; | |
634 p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1]; | |
635 p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2]; | |
636 p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3]; | |
637 } | |
638 | 617 |
639 p->buf++; | 618 p->buf++; |
640 | 619 |
641 /* Have we filled the history buffer? */ | 620 /* Have we filled the history buffer? */ |
642 if (p->buf == p->historybuffer + HISTORY_SIZE) { | 621 if (p->buf == p->historybuffer + HISTORY_SIZE) { |