Mercurial > libavcodec.hg
comparison ppc/dsputil_h264_altivec.c @ 3337:bec1eb6d3746 libavcodec
put_pixels16_l2_altivec and avg_pixels16_l2_altivec
author | lu_zero |
---|---|
date | Sat, 03 Jun 2006 22:18:03 +0000 |
parents | 072dbc669253 |
children | 8bb61d9a2c40 |
comparison
equal
deleted
inserted
replaced
3336:4d807145f29a | 3337:bec1eb6d3746 |
---|---|
186 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ | 186 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ |
187 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ | 187 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ |
188 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ | 188 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ |
189 }\ | 189 }\ |
190 | 190 |
191 | 191 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, |
192 /* from dsputil.c */ | 192 const uint8_t * src2, int dst_stride, |
193 static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | 193 int src_stride1, int h) |
194 int i; | 194 { |
195 for (i = 0; i < h; i++) { | 195 int i; |
196 uint32_t a, b; | 196 vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align; |
197 a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); | 197 |
198 b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); | 198 mask_ = vec_lvsl(0, src2); |
199 *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b); | 199 |
200 a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); | 200 for (i = 0; i < h; i++) { |
201 b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); | 201 |
202 *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b); | 202 tmp1 = vec_ld(i * src_stride1, src1); |
203 } | 203 mask = vec_lvsl(i * src_stride1, src1); |
204 } static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | 204 tmp2 = vec_ld(i * src_stride1 + 15, src1); |
205 int i; | 205 |
206 for (i = 0; i < h; i++) { | 206 a = vec_perm(tmp1, tmp2, mask); |
207 uint32_t a, b; | 207 |
208 a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); | 208 tmp1 = vec_ld(i * 16, src2); |
209 b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); | 209 tmp2 = vec_ld(i * 16 + 15, src2); |
210 *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b)); | 210 |
211 a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); | 211 b = vec_perm(tmp1, tmp2, mask_); |
212 b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); | 212 |
213 *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b)); | 213 tmp1 = vec_ld(0, dst); |
214 } | 214 mask = vec_lvsl(0, dst); |
215 } static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | 215 tmp2 = vec_ld(15, dst); |
216 put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); | 216 |
217 put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); | 217 d = vec_avg(a, b); |
218 } static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { | 218 |
219 avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); | 219 edges = vec_perm(tmp2, tmp1, mask); |
220 avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); | 220 |
221 align = vec_lvsr(0, dst); | |
222 | |
223 tmp1 = vec_perm(edges, d, align); | |
224 tmp2 = vec_perm(d, edges, align); | |
225 | |
226 vec_st(tmp2, 15, dst); | |
227 vec_st(tmp1, 0 , dst); | |
228 | |
229 dst += dst_stride; | |
230 } | |
221 } | 231 } |
222 | 232 |
223 /* UNIMPLEMENTED YET !! */ | 233 static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1, |
234 const uint8_t * src2, int dst_stride, | |
235 int src_stride1, int h) | |
236 { | |
237 int i; | |
238 vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align; | |
239 | |
240 mask_ = vec_lvsl(0, src2); | |
241 | |
242 for (i = 0; i < h; i++) { | |
243 | |
244 tmp1 = vec_ld(i * src_stride1, src1); | |
245 mask = vec_lvsl(i * src_stride1, src1); | |
246 tmp2 = vec_ld(i * src_stride1 + 15, src1); | |
247 | |
248 a = vec_perm(tmp1, tmp2, mask); | |
249 | |
250 tmp1 = vec_ld(i * 16, src2); | |
251 tmp2 = vec_ld(i * 16 + 15, src2); | |
252 | |
253 b = vec_perm(tmp1, tmp2, mask_); | |
254 | |
255 tmp1 = vec_ld(0, dst); | |
256 mask = vec_lvsl(0, dst); | |
257 tmp2 = vec_ld(15, dst); | |
258 | |
259 d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b)); | |
260 | |
261 edges = vec_perm(tmp2, tmp1, mask); | |
262 | |
263 align = vec_lvsr(0, dst); | |
264 | |
265 tmp1 = vec_perm(edges, d, align); | |
266 tmp2 = vec_perm(d, edges, align); | |
267 | |
268 vec_st(tmp2, 15, dst); | |
269 vec_st(tmp1, 0 , dst); | |
270 | |
271 dst += dst_stride; | |
272 } | |
273 } | |
274 | |
275 /* Implemented but could be faster | |
224 #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h) | 276 #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h) |
225 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h) | 277 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h) |
226 | 278 */ |
227 H264_MC(put_, 16, altivec) | 279 |
228 H264_MC(avg_, 16, altivec) | 280 H264_MC(put_, 16, altivec) |
281 H264_MC(avg_, 16, altivec) | |
229 | 282 |
230 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { | 283 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { |
231 | 284 |
232 #ifdef HAVE_ALTIVEC | 285 #ifdef HAVE_ALTIVEC |
233 if (has_altivec()) { | 286 if (has_altivec()) { |