comparison ppc/gmc_altivec.c @ 1009:3b7cc8e4b83f libavcodec

AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michaelni
date Thu, 16 Jan 2003 21:54:55 +0000
parents 95cbffdc98a9
children 35cf2f4a0f8c
comparison
equal deleted inserted replaced
1008:fb6cbb8a04a3 1009:3b7cc8e4b83f
26 altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, 26 altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8,
27 to preserve proper dst alignement. 27 to preserve proper dst alignement.
28 */ 28 */
29 void gmc1_altivec(UINT8 *dst /* align 8 */, UINT8 *src /* align1 */, int stride, int h, int x16, int y16, int rounder) 29 void gmc1_altivec(UINT8 *dst /* align 8 */, UINT8 *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
30 { 30 {
31 #if 0 31 ALTIVEC_TBL_DECLARE(altivec_gmc1_num, h == 8);
32 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
32 const int A=(16-x16)*(16-y16); 33 const int A=(16-x16)*(16-y16);
33 const int B=( x16)*(16-y16); 34 const int B=( x16)*(16-y16);
34 const int C=(16-x16)*( y16); 35 const int C=(16-x16)*( y16);
35 const int D=( x16)*( y16); 36 const int D=( x16)*( y16);
36
37 int i; 37 int i;
38
39 ALTIVEC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
38 40
39 for(i=0; i<h; i++) 41 for(i=0; i<h; i++)
40 { 42 {
41 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; 43 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
42 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; 44 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
47 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; 49 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8;
48 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; 50 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
49 dst+= stride; 51 dst+= stride;
50 src+= stride; 52 src+= stride;
51 } 53 }
52 #else 54
55 ALTIVEC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8);
56
57 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
53 const unsigned short __attribute__ ((aligned(16))) rounder_a[8] = 58 const unsigned short __attribute__ ((aligned(16))) rounder_a[8] =
54 {rounder, rounder, rounder, rounder, 59 {rounder, rounder, rounder, rounder,
55 rounder, rounder, rounder, rounder}; 60 rounder, rounder, rounder, rounder};
56 const unsigned short __attribute__ ((aligned(16))) ABCD[8] = 61 const unsigned short __attribute__ ((aligned(16))) ABCD[8] =
57 { 62 {
59 ( x16)*(16-y16), /* B */ 64 ( x16)*(16-y16), /* B */
60 (16-x16)*( y16), /* C */ 65 (16-x16)*( y16), /* C */
61 ( x16)*( y16), /* D */ 66 ( x16)*( y16), /* D */
62 0, 0, 0, 0 /* padding */ 67 0, 0, 0, 0 /* padding */
63 }; 68 };
64
65 register const vector unsigned char vczero = (const vector unsigned char)(0); 69 register const vector unsigned char vczero = (const vector unsigned char)(0);
66 register const vector unsigned short vcsr8 = (const vector unsigned short)(8); 70 register const vector unsigned short vcsr8 = (const vector unsigned short)(8);
67 register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD; 71 register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD;
68 register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD; 72 register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD;
69 int i; 73 int i;
70 unsigned long dst_odd = (unsigned long)dst & 0x0000000F; 74 unsigned long dst_odd = (unsigned long)dst & 0x0000000F;
71 unsigned long src_really_odd = (unsigned long)src & 0x0000000F; 75 unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
76
77
78 ALTIVEC_TBL_START_COUNT(altivec_gmc1_num, h == 8);
72 79
73 tempA = vec_ld(0, (unsigned short*)ABCD); 80 tempA = vec_ld(0, (unsigned short*)ABCD);
74 Av = vec_splat(tempA, 0); 81 Av = vec_splat(tempA, 0);
75 Bv = vec_splat(tempA, 1); 82 Bv = vec_splat(tempA, 1);
76 Cv = vec_splat(tempA, 2); 83 Cv = vec_splat(tempA, 2);
153 vec_st(dstv2, 0, dst); 160 vec_st(dstv2, 0, dst);
154 161
155 dst += stride; 162 dst += stride;
156 src += stride; 163 src += stride;
157 } 164 }
158 #endif 165
166 ALTIVEC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8);
167
168 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
159 } 169 }