comparison ppc/dsputil_h264_altivec.c @ 3337:bec1eb6d3746 libavcodec

put_pixels16_l2_altivec and avg_pixels16_l2_altivec
author lu_zero
date Sat, 03 Jun 2006 22:18:03 +0000
parents 072dbc669253
children 8bb61d9a2c40
comparison
equal deleted inserted replaced
3336:4d807145f29a 3337:bec1eb6d3746
186 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\ 186 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
187 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\ 187 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
188 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\ 188 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
189 }\ 189 }\
190 190
191 191 static inline void put_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
192 /* from dsputil.c */ 192 const uint8_t * src2, int dst_stride,
193 static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { 193 int src_stride1, int h)
194 int i; 194 {
195 for (i = 0; i < h; i++) { 195 int i;
196 uint32_t a, b; 196 vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
197 a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); 197
198 b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); 198 mask_ = vec_lvsl(0, src2);
199 *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b); 199
200 a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); 200 for (i = 0; i < h; i++) {
201 b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); 201
202 *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b); 202 tmp1 = vec_ld(i * src_stride1, src1);
203 } 203 mask = vec_lvsl(i * src_stride1, src1);
204 } static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { 204 tmp2 = vec_ld(i * src_stride1 + 15, src1);
205 int i; 205
206 for (i = 0; i < h; i++) { 206 a = vec_perm(tmp1, tmp2, mask);
207 uint32_t a, b; 207
208 a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l); 208 tmp1 = vec_ld(i * 16, src2);
209 b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l); 209 tmp2 = vec_ld(i * 16 + 15, src2);
210 *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b)); 210
211 a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l); 211 b = vec_perm(tmp1, tmp2, mask_);
212 b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l); 212
213 *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b)); 213 tmp1 = vec_ld(0, dst);
214 } 214 mask = vec_lvsl(0, dst);
215 } static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { 215 tmp2 = vec_ld(15, dst);
216 put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); 216
217 put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); 217 d = vec_avg(a, b);
218 } static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) { 218
219 avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h); 219 edges = vec_perm(tmp2, tmp1, mask);
220 avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h); 220
221 align = vec_lvsr(0, dst);
222
223 tmp1 = vec_perm(edges, d, align);
224 tmp2 = vec_perm(d, edges, align);
225
226 vec_st(tmp2, 15, dst);
227 vec_st(tmp1, 0 , dst);
228
229 dst += dst_stride;
230 }
221 } 231 }
222 232
223 /* UNIMPLEMENTED YET !! */ 233 static inline void avg_pixels16_l2_altivec( uint8_t * dst, const uint8_t * src1,
234 const uint8_t * src2, int dst_stride,
235 int src_stride1, int h)
236 {
237 int i;
238 vector unsigned char a, b, d, tmp1, tmp2, mask, mask_, edges, align;
239
240 mask_ = vec_lvsl(0, src2);
241
242 for (i = 0; i < h; i++) {
243
244 tmp1 = vec_ld(i * src_stride1, src1);
245 mask = vec_lvsl(i * src_stride1, src1);
246 tmp2 = vec_ld(i * src_stride1 + 15, src1);
247
248 a = vec_perm(tmp1, tmp2, mask);
249
250 tmp1 = vec_ld(i * 16, src2);
251 tmp2 = vec_ld(i * 16 + 15, src2);
252
253 b = vec_perm(tmp1, tmp2, mask_);
254
255 tmp1 = vec_ld(0, dst);
256 mask = vec_lvsl(0, dst);
257 tmp2 = vec_ld(15, dst);
258
259 d = vec_avg(vec_perm(tmp1, tmp2, mask), vec_avg(a, b));
260
261 edges = vec_perm(tmp2, tmp1, mask);
262
263 align = vec_lvsr(0, dst);
264
265 tmp1 = vec_perm(edges, d, align);
266 tmp2 = vec_perm(d, edges, align);
267
268 vec_st(tmp2, 15, dst);
269 vec_st(tmp1, 0 , dst);
270
271 dst += dst_stride;
272 }
273 }
274
275 /* Implemented but could be faster
224 #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h) 276 #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
225 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h) 277 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
226 278 */
227 H264_MC(put_, 16, altivec) 279
228 H264_MC(avg_, 16, altivec) 280 H264_MC(put_, 16, altivec)
281 H264_MC(avg_, 16, altivec)
229 282
230 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) { 283 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
231 284
232 #ifdef HAVE_ALTIVEC 285 #ifdef HAVE_ALTIVEC
233 if (has_altivec()) { 286 if (has_altivec()) {