comparison ppc/gmc_altivec.c @ 1352:e8ff4783f188 libavcodec

1) remove TBL support in PPC performance. It's much more useful to use the PMCs, and with Apple's CHUD it's fairly easy too. No reason to keep useless code around 2) make the PPC perf stuff a configure option 3) make put_pixels16_altivec a bit faster by unrolling the loop by 4 patch by (Romain Dolbeau <dolbeau at irisa dot fr>)
author michaelni
date Wed, 09 Jul 2003 20:18:13 +0000
parents 09b8fe0f0139
children b370288f004d
comparison
equal deleted inserted replaced
1351:0fc1a6f8ca94 1352:e8ff4783f188
29 to preserve proper dst alignement. 29 to preserve proper dst alignement.
30 */ 30 */
31 #define GMC1_PERF_COND (h==8) 31 #define GMC1_PERF_COND (h==8)
32 void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder) 32 void gmc1_altivec(uint8_t *dst /* align 8 */, uint8_t *src /* align1 */, int stride, int h, int x16, int y16, int rounder)
33 { 33 {
34 POWERPC_TBL_DECLARE(altivec_gmc1_num, GMC1_PERF_COND); 34 POWERPC_PERF_DECLARE(altivec_gmc1_num, GMC1_PERF_COND);
35 #ifdef ALTIVEC_USE_REFERENCE_C_CODE 35 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
36 const int A=(16-x16)*(16-y16); 36 const int A=(16-x16)*(16-y16);
37 const int B=( x16)*(16-y16); 37 const int B=( x16)*(16-y16);
38 const int C=(16-x16)*( y16); 38 const int C=(16-x16)*( y16);
39 const int D=( x16)*( y16); 39 const int D=( x16)*( y16);
40 int i; 40 int i;
41 41
42 POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); 42 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
43 43
44 for(i=0; i<h; i++) 44 for(i=0; i<h; i++)
45 { 45 {
46 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; 46 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
47 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; 47 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
53 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8; 53 dst[7]= (A*src[7] + B*src[8] + C*src[stride+7] + D*src[stride+8] + rounder)>>8;
54 dst+= stride; 54 dst+= stride;
55 src+= stride; 55 src+= stride;
56 } 56 }
57 57
58 POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); 58 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
59 59
60 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ 60 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
61 const unsigned short __attribute__ ((aligned(16))) rounder_a[8] = 61 const unsigned short __attribute__ ((aligned(16))) rounder_a[8] =
62 {rounder, rounder, rounder, rounder, 62 {rounder, rounder, rounder, rounder,
63 rounder, rounder, rounder, rounder}; 63 rounder, rounder, rounder, rounder};
76 int i; 76 int i;
77 unsigned long dst_odd = (unsigned long)dst & 0x0000000F; 77 unsigned long dst_odd = (unsigned long)dst & 0x0000000F;
78 unsigned long src_really_odd = (unsigned long)src & 0x0000000F; 78 unsigned long src_really_odd = (unsigned long)src & 0x0000000F;
79 79
80 80
81 POWERPC_TBL_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); 81 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
82 82
83 tempA = vec_ld(0, (unsigned short*)ABCD); 83 tempA = vec_ld(0, (unsigned short*)ABCD);
84 Av = vec_splat(tempA, 0); 84 Av = vec_splat(tempA, 0);
85 Bv = vec_splat(tempA, 1); 85 Bv = vec_splat(tempA, 1);
86 Cv = vec_splat(tempA, 2); 86 Cv = vec_splat(tempA, 2);
164 164
165 dst += stride; 165 dst += stride;
166 src += stride; 166 src += stride;
167 } 167 }
168 168
169 POWERPC_TBL_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); 169 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
170 170
171 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ 171 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
172 } 172 }