Mercurial > libavcodec.hg
comparison ppc/gmc_altivec.c @ 1009:3b7cc8e4b83f libavcodec
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
author | michaelni |
---|---|
date | Thu, 16 Jan 2003 21:54:55 +0000 |
parents | 95cbffdc98a9 |
children | 35cf2f4a0f8c |
comparison
equal
deleted
inserted
replaced
1008:fb6cbb8a04a3 | 1009:3b7cc8e4b83f |
---|---|
26 altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, | 26 altivec-enhanced gmc1. ATM this code assume stride is a multiple of 8, |
27 to preserve proper dst alignement. | 27 to preserve proper dst alignement. |
28 */ | 28 */ |
29 void gmc1_altivec(UINT8 *dst /* align 8 */, UINT8 *src /* align1 */, int stride, int h, int x16, int y16, int rounder) | 29 void gmc1_altivec(UINT8 *dst /* align 8 */, UINT8 *src /* align1 */, int stride, int h, int x16, int y16, int rounder) |
30 { | 30 { |
31 #if 0 | 31 ALTIVEC_TBL_DECLARE(altivec_gmc1_num, h == 8); |
32 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
32 const int A=(16-x16)*(16-y16); | 33 const int A=(16-x16)*(16-y16); |
33 const int B=( x16)*(16-y16); | 34 const int B=( x16)*(16-y16); |
34 const int C=(16-x16)*( y16); | 35 const int C=(16-x16)*( y16); |
35 const int D=( x16)*( y16); | 36 const int D=( x16)*( y16); |
36 | |
37 int i; | 37 int i; |
38 | |
39 ALTIVEC_TBL_START_COUNT(altivec_gmc1_num, h == 8); | |
38 | 40 |
39 for(i=0; i<h; i++) | 41 for(i=0; i<h; i++) |
40 { | 42 { |
41 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; | 43 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; |
42 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | 44 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; |
47 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; | 49 dst[6]= (A*src[6] + B*src[7] + C*src[stride+6] + D*src[stride+7] + rounder)>>8; |
48 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | 50 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; |
49 dst+= stride; | 51 dst+= stride; |
50 src+= stride; | 52 src+= stride; |
51 } | 53 } |
52 #else | 54 |
55 ALTIVEC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8); | |
56 | |
57 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
53 const unsigned short __attribute__ ((aligned(16))) rounder_a[8] = | 58 const unsigned short __attribute__ ((aligned(16))) rounder_a[8] = |
54 {rounder, rounder, rounder, rounder, | 59 {rounder, rounder, rounder, rounder, |
55 rounder, rounder, rounder, rounder}; | 60 rounder, rounder, rounder, rounder}; |
56 const unsigned short __attribute__ ((aligned(16))) ABCD[8] = | 61 const unsigned short __attribute__ ((aligned(16))) ABCD[8] = |
57 { | 62 { |
59 ( x16)*(16-y16), /* B */ | 64 ( x16)*(16-y16), /* B */ |
60 (16-x16)*( y16), /* C */ | 65 (16-x16)*( y16), /* C */ |
61 ( x16)*( y16), /* D */ | 66 ( x16)*( y16), /* D */ |
62 0, 0, 0, 0 /* padding */ | 67 0, 0, 0, 0 /* padding */ |
63 }; | 68 }; |
64 | |
65 register const vector unsigned char vczero = (const vector unsigned char)(0); | 69 register const vector unsigned char vczero = (const vector unsigned char)(0); |
66 register const vector unsigned short vcsr8 = (const vector unsigned short)(8); | 70 register const vector unsigned short vcsr8 = (const vector unsigned short)(8); |
67 register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD; | 71 register vector unsigned char dstv, dstv2, src_0, src_1, srcvA, srcvB, srcvC, srcvD; |
68 register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD; | 72 register vector unsigned short Av, Bv, Cv, Dv, rounderV, tempA, tempB, tempC, tempD; |
69 int i; | 73 int i; |
70 unsigned long dst_odd = (unsigned long)dst & 0x0000000F; | 74 unsigned long dst_odd = (unsigned long)dst & 0x0000000F; |
71 unsigned long src_really_odd = (unsigned long)src & 0x0000000F; | 75 unsigned long src_really_odd = (unsigned long)src & 0x0000000F; |
76 | |
77 | |
78 ALTIVEC_TBL_START_COUNT(altivec_gmc1_num, h == 8); | |
72 | 79 |
73 tempA = vec_ld(0, (unsigned short*)ABCD); | 80 tempA = vec_ld(0, (unsigned short*)ABCD); |
74 Av = vec_splat(tempA, 0); | 81 Av = vec_splat(tempA, 0); |
75 Bv = vec_splat(tempA, 1); | 82 Bv = vec_splat(tempA, 1); |
76 Cv = vec_splat(tempA, 2); | 83 Cv = vec_splat(tempA, 2); |
153 vec_st(dstv2, 0, dst); | 160 vec_st(dstv2, 0, dst); |
154 | 161 |
155 dst += stride; | 162 dst += stride; |
156 src += stride; | 163 src += stride; |
157 } | 164 } |
158 #endif | 165 |
166 ALTIVEC_TBL_STOP_COUNT(altivec_gmc1_num, h == 8); | |
167 | |
168 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
159 } | 169 } |