comparison ppc/gmc_altivec.c @ 2967:ef2149182f1c libavcodec

COSMETICS: Remove all trailing whitespace.
author diego
date Sat, 17 Dec 2005 18:14:38 +0000
parents b370288f004d
children 0b546eab515d
comparison
equal deleted inserted replaced
2966:564788471dd4 2967:ef2149182f1c
38 const int C=(16-x16)*( y16); 38 const int C=(16-x16)*( y16);
39 const int D=( x16)*( y16); 39 const int D=( x16)*( y16);
40 int i; 40 int i;
41 41
42 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND); 42 POWERPC_PERF_START_COUNT(altivec_gmc1_num, GMC1_PERF_COND);
43 43
44 for(i=0; i<h; i++) 44 for(i=0; i<h; i++)
45 { 45 {
46 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8; 46 dst[0]= (A*src[0] + B*src[1] + C*src[stride+0] + D*src[stride+1] + rounder)>>8;
47 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8; 47 dst[1]= (A*src[1] + B*src[2] + C*src[stride+1] + D*src[stride+2] + rounder)>>8;
48 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8; 48 dst[2]= (A*src[2] + B*src[3] + C*src[stride+2] + D*src[stride+3] + rounder)>>8;
85 Bv = vec_splat(tempA, 1); 85 Bv = vec_splat(tempA, 1);
86 Cv = vec_splat(tempA, 2); 86 Cv = vec_splat(tempA, 2);
87 Dv = vec_splat(tempA, 3); 87 Dv = vec_splat(tempA, 3);
88 88
89 rounderV = vec_ld(0, (unsigned short*)rounder_a); 89 rounderV = vec_ld(0, (unsigned short*)rounder_a);
90 90
91 // we'll be able to pick-up our 9 char elements 91 // we'll be able to pick-up our 9 char elements
92 // at src from those 32 bytes 92 // at src from those 32 bytes
93 // we load the first batch here, as inside the loop 93 // we load the first batch here, as inside the loop
94 // we can re-use 'src+stride' from one iteration 94 // we can re-use 'src+stride' from one iteration
95 // as the 'src' of the next. 95 // as the 'src' of the next.
96 src_0 = vec_ld(0, src); 96 src_0 = vec_ld(0, src);
97 src_1 = vec_ld(16, src); 97 src_1 = vec_ld(16, src);
98 srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src)); 98 srcvA = vec_perm(src_0, src_1, vec_lvsl(0, src));
99 99
100 if (src_really_odd != 0x0000000F) 100 if (src_really_odd != 0x0000000F)
101 { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector. 101 { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
102 srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src)); 102 srcvB = vec_perm(src_0, src_1, vec_lvsl(1, src));
103 } 103 }
104 else 104 else
105 { 105 {
106 srcvB = src_1; 106 srcvB = src_1;
107 } 107 }
108 srcvA = vec_mergeh(vczero, srcvA); 108 srcvA = vec_mergeh(vczero, srcvA);
109 srcvB = vec_mergeh(vczero, srcvB); 109 srcvB = vec_mergeh(vczero, srcvB);
110 110
111 for(i=0; i<h; i++) 111 for(i=0; i<h; i++)
112 { 112 {
113 dst_odd = (unsigned long)dst & 0x0000000F; 113 dst_odd = (unsigned long)dst & 0x0000000F;
114 src_really_odd = (((unsigned long)src) + stride) & 0x0000000F; 114 src_really_odd = (((unsigned long)src) + stride) & 0x0000000F;
115 115
116 dstv = vec_ld(0, dst); 116 dstv = vec_ld(0, dst);
117 117
118 // we we'll be able to pick-up our 9 char elements 118 // we we'll be able to pick-up our 9 char elements
119 // at src + stride from those 32 bytes 119 // at src + stride from those 32 bytes
120 // then reuse the resulting 2 vectors srvcC and srcvD 120 // then reuse the resulting 2 vectors srvcC and srcvD
121 // as the next srcvA and srcvB 121 // as the next srcvA and srcvB
122 src_0 = vec_ld(stride + 0, src); 122 src_0 = vec_ld(stride + 0, src);
123 src_1 = vec_ld(stride + 16, src); 123 src_1 = vec_ld(stride + 16, src);
124 srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src)); 124 srcvC = vec_perm(src_0, src_1, vec_lvsl(stride + 0, src));
125 125
126 if (src_really_odd != 0x0000000F) 126 if (src_really_odd != 0x0000000F)
127 { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector. 127 { // if src & 0xF == 0xF, then (src+1) is properly aligned on the second vector.
128 srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src)); 128 srcvD = vec_perm(src_0, src_1, vec_lvsl(stride + 1, src));
129 } 129 }
130 else 130 else
131 { 131 {
132 srcvD = src_1; 132 srcvD = src_1;
133 } 133 }
134 134
135 srcvC = vec_mergeh(vczero, srcvC); 135 srcvC = vec_mergeh(vczero, srcvC);
136 srcvD = vec_mergeh(vczero, srcvD); 136 srcvD = vec_mergeh(vczero, srcvD);
137 137
138 138
139 // OK, now we (finally) do the math :-) 139 // OK, now we (finally) do the math :-)
140 // those four instructions replaces 32 int muls & 32 int adds. 140 // those four instructions replaces 32 int muls & 32 int adds.
141 // isn't AltiVec nice ? 141 // isn't AltiVec nice ?
142 tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV); 142 tempA = vec_mladd((vector unsigned short)srcvA, Av, rounderV);
143 tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA); 143 tempB = vec_mladd((vector unsigned short)srcvB, Bv, tempA);
144 tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB); 144 tempC = vec_mladd((vector unsigned short)srcvC, Cv, tempB);
145 tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC); 145 tempD = vec_mladd((vector unsigned short)srcvD, Dv, tempC);
146 146
147 srcvA = srcvC; 147 srcvA = srcvC;
148 srcvB = srcvD; 148 srcvB = srcvD;
149 149
150 tempD = vec_sr(tempD, vcsr8); 150 tempD = vec_sr(tempD, vcsr8);
151 151
152 dstv2 = vec_pack(tempD, (vector unsigned short)vczero); 152 dstv2 = vec_pack(tempD, (vector unsigned short)vczero);
153 153
154 if (dst_odd) 154 if (dst_odd)
155 { 155 {
156 dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1)); 156 dstv2 = vec_perm(dstv, dstv2, vcprm(0,1,s0,s1));
157 } 157 }
158 else 158 else
159 { 159 {
160 dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3)); 160 dstv2 = vec_perm(dstv, dstv2, vcprm(s0,s1,2,3));
161 } 161 }
162 162
163 vec_st(dstv2, 0, dst); 163 vec_st(dstv2, 0, dst);
164 164
165 dst += stride; 165 dst += stride;
166 src += stride; 166 src += stride;
167 } 167 }
168 168
169 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND); 169 POWERPC_PERF_STOP_COUNT(altivec_gmc1_num, GMC1_PERF_COND);