# HG changeset patch # User gpoirier # Date 1182020350 0 # Node ID 883a5619f52f143eed90c4620f91661843eb2eb4 # Parent 65b1bfb3a74cf6799f4e752bbd5804d77e2be8ea Use a faster way to compute 255-val: Instead of creating a vector of all 255s, and then doing the subtraction, nor of the vector with itself: saves one instruction and a register. Patch by Graham Booker % gbooker A tamu P edu% diff -r 65b1bfb3a74c -r 883a5619f52f ppc/h264_altivec.c --- a/ppc/h264_altivec.c Sat Jun 16 15:15:19 2007 +0000 +++ b/ppc/h264_altivec.c Sat Jun 16 18:59:10 2007 +0000 @@ -766,7 +766,6 @@ const vector unsigned char A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4)); \ \ register vector unsigned char pq0bit = vec_xor(p0,q0); \ - register vector unsigned char temp; \ register vector unsigned char q1minus; \ register vector unsigned char p0minus; \ register vector unsigned char stage1; \ @@ -775,11 +774,10 @@ register vector unsigned char delta; \ register vector unsigned char deltaneg; \ \ - temp = (vector unsigned char)vec_cmpeq(p0, p0); \ - q1minus = vec_xor(temp, q1); /* 255 - q1 */ \ + q1minus = vec_nor(q1, q1); /* 255 - q1 */ \ stage1 = vec_avg(p1, q1minus); /* (p1 - q1 + 256)>>1 */ \ stage2 = vec_sr(stage1, vec_splat_u8(1)); /* (p1 - q1 + 256)>>2 = 64 + (p1 - q1) >> 2 */ \ - p0minus = vec_xor(temp, p0); /* 255 - p0 */ \ + p0minus = vec_nor(p0, p0); /* 255 - p0 */ \ stage1 = vec_avg(q0, p0minus); /* (q0 - p0 + 256)>>1 */ \ pq0bit = vec_and(pq0bit, vec_splat_u8(1)); \ stage2 = vec_avg(stage2, pq0bit); /* 32 + ((q0 - p0)&1 + (p1 - q1) >> 2 + 1) >> 1 */ \