libavcodec.hg: apedec.c comparison

comparison apedec.c @ 10632:54982e4c4478 libavcodec

avoid an unpredictable branch 20% faster predictor_update_filter, .4-4% faster ape decoding on core2

author	lorenm
date	Thu, 03 Dec 2009 17:48:54 +0000
parents	79f3ead3ebc1
children	40ff9c7958da

comparison

equal deleted inserted replaced

-:79f3ead3ebc1
+:54982e4c4478
 return (x < 0) - (x > 0);
 }
 static av_always_inline int predictor_update_filter(APEPredictor *p, const int decoded, const int filter, const int delayA, const int delayB, const int adaptA, const int adaptB)
 {
-int32_t predictionA, predictionB;
+int32_t predictionA, predictionB, sign;
 p->buf[delayA]     = p->lastA[filter];
 p->buf[adaptA]     = APESIGN(p->buf[delayA]);
 p->buf[delayA - 1] = p->buf[delayA] - p->buf[delayA - 1];
 p->buf[adaptA - 1] = APESIGN(p->buf[delayA - 1]);
 p->buf[delayB - 4] * p->coeffsB[filter][4];
 p->lastA[filter] = decoded + ((predictionA + (predictionB >> 1)) >> 10);
 p->filterA[filter] = p->lastA[filter] + ((p->filterA[filter] * 31) >> 5);
-if (!decoded) // no need updating filter coefficients
+sign = APESIGN(decoded);
-return p->filterA[filter];
+p->coeffsA[filter][0] += p->buf[adaptA    ] * sign;
+p->coeffsA[filter][1] += p->buf[adaptA - 1] * sign;
-if (decoded > 0) {
+p->coeffsA[filter][2] += p->buf[adaptA - 2] * sign;
-p->coeffsA[filter][0] -= p->buf[adaptA    ];
+p->coeffsA[filter][3] += p->buf[adaptA - 3] * sign;
-p->coeffsA[filter][1] -= p->buf[adaptA - 1];
+p->coeffsB[filter][0] += p->buf[adaptB    ] * sign;
-p->coeffsA[filter][2] -= p->buf[adaptA - 2];
+p->coeffsB[filter][1] += p->buf[adaptB - 1] * sign;
-p->coeffsA[filter][3] -= p->buf[adaptA - 3];
+p->coeffsB[filter][2] += p->buf[adaptB - 2] * sign;
+p->coeffsB[filter][3] += p->buf[adaptB - 3] * sign;
-p->coeffsB[filter][0] -= p->buf[adaptB    ];
+p->coeffsB[filter][4] += p->buf[adaptB - 4] * sign;
-p->coeffsB[filter][1] -= p->buf[adaptB - 1];
-p->coeffsB[filter][2] -= p->buf[adaptB - 2];
-p->coeffsB[filter][3] -= p->buf[adaptB - 3];
-p->coeffsB[filter][4] -= p->buf[adaptB - 4];
-} else {
-p->coeffsA[filter][0] += p->buf[adaptA    ];
-p->coeffsA[filter][1] += p->buf[adaptA - 1];
-p->coeffsA[filter][2] += p->buf[adaptA - 2];
-p->coeffsA[filter][3] += p->buf[adaptA - 3];
-p->coeffsB[filter][0] += p->buf[adaptB    ];
-p->coeffsB[filter][1] += p->buf[adaptB - 1];
-p->coeffsB[filter][2] += p->buf[adaptB - 2];
-p->coeffsB[filter][3] += p->buf[adaptB - 3];
-p->coeffsB[filter][4] += p->buf[adaptB - 4];
-}
 return p->filterA[filter];
 }
 static void predictor_decode_stereo(APEContext * ctx, int count)
 {
 static void predictor_decode_mono(APEContext * ctx, int count)
 {
 APEPredictor *p = &ctx->predictor;
 int32_t *decoded0 = ctx->decoded0;
-int32_t predictionA, currentA, A;
+int32_t predictionA, currentA, A, sign;
 currentA = p->lastA[0];
 while (count--) {
 A = *decoded0;
 currentA = A + (predictionA >> 10);
 p->buf[YADAPTCOEFFSA]     = APESIGN(p->buf[YDELAYA    ]);
 p->buf[YADAPTCOEFFSA - 1] = APESIGN(p->buf[YDELAYA - 1]);
-if (A > 0) {
+sign = APESIGN(A);
-p->coeffsA[0][0] -= p->buf[YADAPTCOEFFSA    ];
+p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA    ] * sign;
-p->coeffsA[0][1] -= p->buf[YADAPTCOEFFSA - 1];
+p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1] * sign;
-p->coeffsA[0][2] -= p->buf[YADAPTCOEFFSA - 2];
+p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2] * sign;
-p->coeffsA[0][3] -= p->buf[YADAPTCOEFFSA - 3];
+p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3] * sign;
-} else if (A < 0) {
-p->coeffsA[0][0] += p->buf[YADAPTCOEFFSA    ];
-p->coeffsA[0][1] += p->buf[YADAPTCOEFFSA - 1];
-p->coeffsA[0][2] += p->buf[YADAPTCOEFFSA - 2];
-p->coeffsA[0][3] += p->buf[YADAPTCOEFFSA - 3];
-}
 p->buf++;
 /* Have we filled the history buffer? */
 if (p->buf == p->historybuffer + HISTORY_SIZE) {

Mercurial > libavcodec.hg

comparison apedec.c @ 10632:54982e4c4478 libavcodec