Mercurial > libavcodec.hg
comparison ppc/gmc_altivec.c @ 1352:e8ff4783f188 libavcodec
1) remove TBL support in PPC performance. It's much more useful to use the
PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless
code around
2) make the PPC perf stuff a configure option
3) make put_pixels16_altivec a bit faster by unrolling the loop by 4
patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author | michaelni |
---|---|
date | Wed, 09 Jul 2003 20:18:13 +0000 |
parents | 09b8fe0f0139 |
children | b370288f004d |
comparison
equal
deleted
inserted
replaced
1351:0fc1a6f8ca94 | 1352:e8ff4783f188 |
---|---|
29 to preserve proper dst alignement. | 29 to preserve proper dst alignement. |
30 */ | 30 */ |
31 #define GMC1_PERF_COND (h==8) | 31 #define GMC1_PERF_COND (h==8) |
32 void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) | 32 void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) |
33 { | 33 { |
34 POWERPC_TBL_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); | 34 POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); |
35 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | 35 #ifdef ALTIVEC_USE_REFERENCE_C_CODE |
36 const int A=(16-x16)*(16-y16); | 36 const int A=(16-x16)*(16-y16); |
37 const int B=( x16)*(16-y16); | 37 const int B=( x16)*(16-y16); |
38 const int C=(16-x16)*( y16); | 38 const int C=(16-x16)*( y16); |
39 const int D=( x16)*( y16); | 39 const int D=( x16)*( y16); |
40 int i; | 40 int i; |
41 | 41 |
42 POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | 42 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); |
43 | 43 |
44 for(i=0; i<h; i++) | 44 for(i=0; i<h; i++) |
45 { | 45 { |
46 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; | 46 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; |
47 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; | 47 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; |
53 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; | 53 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; |
54 dst+= stride; | 54 dst+= stride; |
55 src+= stride; | 55 src+= stride; |
56 } | 56 } |
57 | 57 |
58 POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | 58 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); |
59 | 59 |
60 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | 60 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ |
61 const unsigned short __attribute__ ((aligned(16))) rounder_a[8] = | 61 const unsigned short __attribute__ ((aligned(16))) rounder_a[8] = |
62 {rounder, rounder, rounder, rounder, | 62 {rounder, rounder, rounder, rounder, |
63 rounder, rounder, rounder, rounder}; | 63 rounder, rounder, rounder, rounder}; |
76 int i; | 76 int i; |
77 unsigned long dst_odd = (unsigned long)dst & 0x0000000F; | 77 unsigned long dst_odd = (unsigned long)dst & 0x0000000F; |
78 unsigned long src_really_odd = (unsigned long)src & 0x0000000F; | 78 unsigned long src_really_odd = (unsigned long)src & 0x0000000F; |
79 | 79 |
80 | 80 |
81 POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | 81 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); |
82 | 82 |
83 tempA = vec_ld(0, (unsigned short*)ABCD); | 83 tempA = vec_ld(0, (unsigned short*)ABCD); |
84 Av = vec_splat(tempA, 0); | 84 Av = vec_splat(tempA, 0); |
85 Bv = vec_splat(tempA, 1); | 85 Bv = vec_splat(tempA, 1); |
86 Cv = vec_splat(tempA, 2); | 86 Cv = vec_splat(tempA, 2); |
164 | 164 |
165 dst += stride; | 165 dst += stride; |
166 src += stride; | 166 src += stride; |
167 } | 167 } |
168 | 168 |
169 POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); | 169 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); |
170 | 170 |
171 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | 171 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
172 } | 172 } |