Mercurial > libavcodec.hg
comparison ppc/h264_altivec.c @ 5159:883a5619f52f libavcodec
Use a faster way to compute 255-val: Instead of creating a vector of
all 255s, and then doing the subtraction, nor of the vector with itself: saves
one instruction and a register.
Patch by Graham Booker % gbooker A tamu P edu%
author | gpoirier |
---|---|
date | Sat, 16 Jun 2007 18:59:10 +0000 |
parents | 68d85146620a |
children | 830b9dd36fef |
comparison
equal
deleted
inserted
replaced
5158:65b1bfb3a74c | 5159:883a5619f52f |
---|---|
764 #define h264_deblock_p0_q0(p0, p1, q0, q1, tc0masked) { \ | 764 #define h264_deblock_p0_q0(p0, p1, q0, q1, tc0masked) { \ |
765 \ | 765 \ |
766 const vector unsigned char A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4)); \ | 766 const vector unsigned char A0v = vec_sl(vec_splat_u8(10), vec_splat_u8(4)); \ |
767 \ | 767 \ |
768 register vector unsigned char pq0bit = vec_xor(p0,q0); \ | 768 register vector unsigned char pq0bit = vec_xor(p0,q0); \ |
769 register vector unsigned char temp; \ | |
770 register vector unsigned char q1minus; \ | 769 register vector unsigned char q1minus; \ |
771 register vector unsigned char p0minus; \ | 770 register vector unsigned char p0minus; \ |
772 register vector unsigned char stage1; \ | 771 register vector unsigned char stage1; \ |
773 register vector unsigned char stage2; \ | 772 register vector unsigned char stage2; \ |
774 register vector unsigned char vec160; \ | 773 register vector unsigned char vec160; \ |
775 register vector unsigned char delta; \ | 774 register vector unsigned char delta; \ |
776 register vector unsigned char deltaneg; \ | 775 register vector unsigned char deltaneg; \ |
777 \ | 776 \ |
778 temp = (vector unsigned char)vec_cmpeq(p0, p0); \ | 777 q1minus = vec_nor(q1, q1); /* 255 - q1 */ \ |
779 q1minus = vec_xor(temp, q1); /* 255 - q1 */ \ | |
780 stage1 = vec_avg(p1, q1minus); /* (p1 - q1 + 256)>>1 */ \ | 778 stage1 = vec_avg(p1, q1minus); /* (p1 - q1 + 256)>>1 */ \ |
781 stage2 = vec_sr(stage1, vec_splat_u8(1)); /* (p1 - q1 + 256)>>2 = 64 + (p1 - q1) >> 2 */ \ | 779 stage2 = vec_sr(stage1, vec_splat_u8(1)); /* (p1 - q1 + 256)>>2 = 64 + (p1 - q1) >> 2 */ \ |
782 p0minus = vec_xor(temp, p0); /* 255 - p0 */ \ | 780 p0minus = vec_nor(p0, p0); /* 255 - p0 */ \ |
783 stage1 = vec_avg(q0, p0minus); /* (q0 - p0 + 256)>>1 */ \ | 781 stage1 = vec_avg(q0, p0minus); /* (q0 - p0 + 256)>>1 */ \ |
784 pq0bit = vec_and(pq0bit, vec_splat_u8(1)); \ | 782 pq0bit = vec_and(pq0bit, vec_splat_u8(1)); \ |
785 stage2 = vec_avg(stage2, pq0bit); /* 32 + ((q0 - p0)&1 + (p1 - q1) >> 2 + 1) >> 1 */ \ | 783 stage2 = vec_avg(stage2, pq0bit); /* 32 + ((q0 - p0)&1 + (p1 - q1) >> 2 + 1) >> 1 */ \ |
786 stage2 = vec_adds(stage2, stage1); /* 160 + ((p0 - q0) + (p1 - q1) >> 2 + 1) >> 1 */ \ | 784 stage2 = vec_adds(stage2, stage1); /* 160 + ((p0 - q0) + (p1 - q1) >> 2 + 1) >> 1 */ \ |
787 vec160 = vec_ld(0, &A0v); \ | 785 vec160 = vec_ld(0, &A0v); \ |