comparison apedec.c @ 10632:54982e4c4478 libavcodec

avoid an unpredictable branch 20% faster predictor_update_filter, .4-4% faster ape decoding on core2
author lorenm
date Thu, 03 Dec 2009 17:48:54 +0000
parents 79f3ead3ebc1
children 40ff9c7958da
comparison
equal deleted inserted replaced
10631:79f3ead3ebc1 10632:54982e4c4478
517 return (x < 0) - (x > 0); 517 return (x < 0) - (x > 0);
518 } 518 }
519 519
520 static av_always_inline int predictor_update_filter(APEPredictor *p, const int decoded, const int filter, const int delayA, const int delayB, const int adaptA, const int adaptB) 520 static av_always_inline int predictor_update_filter(APEPredictor *p, const int decoded, const int filter, const int delayA, const int delayB, const int adaptA, const int adaptB)
521 { 521 {
522 int32_t predictionA, predictionB; 522 int32_t predictionA, predictionB, sign;
523 523
524 p->buf[delayA] = p->lastA[filter]; 524 p->buf[delayA] = p->lastA[filter];
525 p->buf[adaptA] = APESIGN(p->buf[delayA]); 525 p->buf[adaptA] = APESIGN(p->buf[delayA]);
526 p->buf[delayA - 1] = p->buf[delayA] - p->buf[delayA - 1]; 526 p->buf[delayA - 1] = p->buf[delayA] - p->buf[delayA - 1];
527 p->buf[adaptA - 1] = APESIGN(p->buf[delayA - 1]); 527 p->buf[adaptA - 1] = APESIGN(p->buf[delayA - 1]);
545 p->buf[delayB - 4] * p->coeffsB[filter][4]; 545 p->buf[delayB - 4] * p->coeffsB[filter][4];
546 546
547 p->lastA[filter] = decoded + ((predictionA + (predictionB >> 1)) >> 10); 547 p->lastA[filter] = decoded + ((predictionA + (predictionB >> 1)) >> 10);
548 p->filterA[filter] = p->lastA[filter] + ((p->filterA[filter] * 31) >> 5); 548 p->filterA[filter] = p->lastA[filter] + ((p->filterA[filter] * 31) >> 5);
549 549
550 if (!decoded) // no need updating filter coefficients 550 sign = APESIGN(decoded);
551 return p->filterA[filter]; 551 p->coeffsA[filter][0] += p->buf[adaptA ] * sign;
552 552 p->coeffsA[filter][1] += p->buf[adaptA - 1] * sign;
553 if (decoded > 0) { 553 p->coeffsA[filter][2] += p->buf[adaptA - 2] * sign;
554 p->coeffsA[filter][0] -= p->buf[adaptA ]; 554 p->coeffsA[filter][3] += p->buf[adaptA - 3] * sign;
555 p->coeffsA[filter][1] -= p->buf[adaptA - 1]; 555 p->coeffsB[filter][0] += p->buf[adaptB ] * sign;
556 p->coeffsA[filter][2] -= p->buf[adaptA - 2]; 556 p->coeffsB[filter][1] += p->buf[adaptB - 1] * sign;
557 p->coeffsA[filter][3] -= p->buf[adaptA - 3]; 557 p->coeffsB[filter][2] += p->buf[adaptB - 2] * sign;
558 558 p->coeffsB[filter][3] += p->buf[adaptB - 3] * sign;
559 p->coeffsB[filter][0] -= p->buf[adaptB ]; 559 p->coeffsB[filter][4] += p->buf[adaptB - 4] * sign;
560 p->coeffsB[filter][1] -= p->buf[adaptB - 1]; 560
561 p->coeffsB[filter][2] -= p->buf[adaptB - 2];
562 p->coeffsB[filter][3] -= p->buf[adaptB - 3];
563 p->coeffsB[filter][4] -= p->buf[adaptB - 4];
564 } else {
565 p->coeffsA[filter][0] += p->buf[adaptA ];
566 p->coeffsA[filter][1] += p->buf[adaptA - 1];
567 p->coeffsA[filter][2] += p->buf[adaptA - 2];
568 p->coeffsA[filter][3] += p->buf[adaptA - 3];
569
570 p->coeffsB[filter][0] += p->buf[adaptB ];
571 p->coeffsB[filter][1] += p->buf[adaptB - 1];
572 p->coeffsB[filter][2] += p->buf[adaptB - 2];
573 p->coeffsB[filter][3] += p->buf[adaptB - 3];
574 p->coeffsB[filter][4] += p->buf[adaptB - 4];
575 }
576 return p->filterA[filter]; 561 return p->filterA[filter];
577 } 562 }
578 563
579 static void predictor_decode_stereo(APEContext * ctx, int count) 564 static void predictor_decode_stereo(APEContext * ctx, int count)
580 { 565 {
602 587
603 static void predictor_decode_mono(APEContext * ctx, int count) 588 static void predictor_decode_mono(APEContext * ctx, int count)
604 { 589 {
605 APEPredictor *p = &ctx->predictor; 590 APEPredictor *p = &ctx->predictor;
606 int32_t *decoded0 = ctx->decoded0; 591 int32_t *decoded0 = ctx->decoded0;
607 int32_t predictionA, currentA, A; 592 int32_t predictionA, currentA, A, sign;
608 593
609 currentA = p->lastA[0]; 594 currentA = p->lastA[0];
610 595
611 while (count--) { 596 while (count--) {
612 A = *decoded0; 597 A = *decoded0;
622 currentA = A + (predictionA >> 10); 607 currentA = A + (predictionA >> 10);
623 608
624 p->buf[YADAPTCOEFFSA] = APESIGN(p->buf[YDELAYA ]); 609 p->buf[YADAPTCOEFFSA] = APESIGN(p->buf[YDELAYA ]);
625 p->buf[YADAPTCOEFFSA - 1] = APESIGN(p->buf[YDELAYA - 1]); 610 p->buf[YADAPTCOEFFSA - 1] = APESIGN(p->buf[YDELAYA - 1]);
626 611
627 if (A > 0) { 612 sign = APESIGN(A);
628 p->coeffsA[0][0] -= p->buf[YADAPTCOEFFSA ]; 613 p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA ] * sign;
629 p->coeffsA[0][1] -= p->buf[YADAPTCOEFFSA - 1]; 614 p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1] * sign;
630 p->coeffsA[0][2] -= p->buf[YADAPTCOEFFSA - 2]; 615 p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2] * sign;
631 p->coeffsA[0][3] -= p->buf[YADAPTCOEFFSA - 3]; 616 p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3] * sign;
632 } else if (A < 0) {
633 p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA ];
634 p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1];
635 p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2];
636 p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3];
637 }
638 617
639 p->buf++; 618 p->buf++;
640 619
641 /* Have we filled the history buffer? */ 620 /* Have we filled the history buffer? */
642 if (p->buf == p->historybuffer + HISTORY_SIZE) { 621 if (p->buf == p->historybuffer + HISTORY_SIZE) {