# HG changeset patch # User mhoffman # Date 1182471164 0 # Node ID c73cc80ec43ac30daaa2b332ab9ecc8f357ca561 # Parent 58deb214b9557c7bf832a609104c301a0e3574ad re pipeline loop, to eliminate extra chroma reads diff -r 58deb214b955 -r c73cc80ec43a libswscale/internal_bfin.S --- a/libswscale/internal_bfin.S Fri Jun 22 00:07:55 2007 +0000 +++ b/libswscale/internal_bfin.S Fri Jun 22 00:12:44 2007 +0000 @@ -496,30 +496,29 @@ r2 = r2 - r0; m1 = r2; - r6.l = w[i2--]; r6.l = w[i2]; - r6.h = w[i3--]; r6.h = w[i3]; /* I0,I1 - src input line pointers * p0,p1 - luma output line pointers * I2 - dstU * I3 - dstV */ - lsetup (0f, 1f) lc0 = p4; - -0: lsetup (2f, 3f) lc1 = p5; - r0 = [i0++] || r2 = [i1++]; - r1 = [i0++] || r3 = [i1++]; -2: r4 = byteop1p(r1:0, r3:2) || w[i2++] = r6.l; - r5 = byteop1p(r1:0, r3:2) (r) || w[i3++] = r6.h; - r0 = r0 >> 8(v); + lsetup (0f, 1f) lc1 = p4; // H/2 +0: r0 = [i0++] || r2 = [i1++]; + r1 = [i0++] || r3 = [i1++]; + r4 = byteop1p(r1:0, r3:2); + r5 = byteop1p(r1:0, r3:2) (r); + lsetup (2f, 3f) lc0 = p5; // W/4 +2: r0 = r0 >> 8(v); r1 = r1 >> 8(v); r2 = r2 >> 8(v); r3 = r3 >> 8(v); r0 = bytepack(r0, r1); - r2 = bytepack(r2, r3) || [p0++] = r0; - r6 = pack(r5.l, r4.l) || [p1++] = r2; - r7 = pack(r5.h, r4.h) || r0 = [i0++] || r2 = [i1++]; -3: r6 = bytepack(r6, r7) || r1 = [i0++] || r3 = [i1++]; + r2 = bytepack(r2, r3) || [p0++] = r0; // yyyy + r6 = pack(r5.l, r4.l) || [p1++] = r2; // yyyy + r7 = pack(r5.h, r4.h) || r0 = [i0++] || r2 = [i1++]; + r6 = bytepack(r6, r7) || r1 = [i0++] || r3 = [i1++]; + r4 = byteop1p(r1:0, r3:2) || w[i2++] = r6.l; // uu +3: r5 = byteop1p(r1:0, r3:2) (r) || w[i3++] = r6.h; // vv i0 += m0; i1 += m0; @@ -528,9 +527,6 @@ p0 = p0 + p2; 1: p1 = p1 + p2; - w[i2++] = r6.l; - w[i3++] = r6.h; - (r7:4,p5:4) = [sp++]; unlink; rts;