comparison arm/dsputil_neon_s.S @ 9345:e0a7a6338526 libavcodec

ARM: NEON optimized put_signed_pixels_clamped
author conrad
date Sat, 04 Apr 2009 21:02:48 +0000
parents 9ea1ea6db616
children 93c20dd3da43
comparison
equal deleted inserted replaced
9344:9ea1ea6db616 9345:e0a7a6338526
270 270
271 pixfunc put_ pixels8 271 pixfunc put_ pixels8
272 pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8 272 pixfunc2 put_ pixels8_x2, _no_rnd, vhadd.u8
273 pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8 273 pixfunc2 put_ pixels8_y2, _no_rnd, vhadd.u8
274 pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1 274 pixfunc2 put_ pixels8_xy2, _no_rnd, vshrn.u16, 1
275
276 function ff_put_signed_pixels_clamped_neon, export=1
277 vmov.u8 d31, #128
278 vld1.64 {d16-d17}, [r0,:128]!
279 vqmovn.s16 d0, q8
280 vld1.64 {d18-d19}, [r0,:128]!
281 vqmovn.s16 d1, q9
282 vld1.64 {d16-d17}, [r0,:128]!
283 vqmovn.s16 d2, q8
284 vld1.64 {d18-d19}, [r0,:128]!
285 vadd.u8 d0, d0, d31
286 vld1.64 {d20-d21}, [r0,:128]!
287 vadd.u8 d1, d1, d31
288 vld1.64 {d22-d23}, [r0,:128]!
289 vadd.u8 d2, d2, d31
290 vst1.64 {d0}, [r1,:64], r2
291 vqmovn.s16 d3, q9
292 vst1.64 {d1}, [r1,:64], r2
293 vqmovn.s16 d4, q10
294 vst1.64 {d2}, [r1,:64], r2
295 vqmovn.s16 d5, q11
296 vld1.64 {d24-d25}, [r0,:128]!
297 vadd.u8 d3, d3, d31
298 vld1.64 {d26-d27}, [r0,:128]!
299 vadd.u8 d4, d4, d31
300 vadd.u8 d5, d5, d31
301 vst1.64 {d3}, [r1,:64], r2
302 vqmovn.s16 d6, q12
303 vst1.64 {d4}, [r1,:64], r2
304 vqmovn.s16 d7, q13
305 vst1.64 {d5}, [r1,:64], r2
306 vadd.u8 d6, d6, d31
307 vadd.u8 d7, d7, d31
308 vst1.64 {d6}, [r1,:64], r2
309 vst1.64 {d7}, [r1,:64], r2
310 bx lr
311 .endfunc
275 312
276 function ff_add_pixels_clamped_neon, export=1 313 function ff_add_pixels_clamped_neon, export=1
277 mov r3, r1 314 mov r3, r1
278 vld1.64 {d16}, [r1,:64], r2 315 vld1.64 {d16}, [r1,:64], r2
279 vld1.64 {d0-d1}, [r0,:128]! 316 vld1.64 {d0-d1}, [r0,:128]!