diff arm/dsputil_neon.S @ 10375:199949177888 libavcodec

ARM: NEON avg_pixels8 and avg_h264_qpel8_mc00
author mru
date Wed, 07 Oct 2009 21:35:19 +0000
parents b72bb442a775
children 5c5b864d66e1
line wrap: on
line diff
--- a/arm/dsputil_neon.S	Tue Oct 06 21:55:41 2009 +0000
+++ b/arm/dsputil_neon.S	Wed Oct 07 21:35:19 2009 +0000
@@ -139,7 +139,7 @@
         bx              lr
         .endm
 
-        .macro pixels8
+        .macro pixels8 avg=0
 1:      vld1.64         {d0}, [r1], r2
         vld1.64         {d1}, [r1], r2
         vld1.64         {d2}, [r1], r2
@@ -148,6 +148,17 @@
         pld             [r1]
         pld             [r1, r2]
         pld             [r1, r2, lsl #1]
+.if \avg
+        vld1.64         {d4}, [r0,:64], r2
+        vrhadd.u8       d0,  d0,  d4
+        vld1.64         {d5}, [r0,:64], r2
+        vrhadd.u8       d1,  d1,  d5
+        vld1.64         {d6}, [r0,:64], r2
+        vrhadd.u8       d2,  d2,  d6
+        vld1.64         {d7}, [r0,:64], r2
+        vrhadd.u8       d3,  d3,  d7
+        sub             r0,  r0,  r2,  lsl #2
+.endif
         subs            r3,  r3,  #4
         vst1.64         {d0}, [r0,:64], r2
         vst1.64         {d1}, [r0,:64], r2
@@ -261,6 +272,12 @@
         pixfunc2 put_ pixels8_y2,   _no_rnd, vhadd.u8
         pixfunc2 put_ pixels8_xy2,  _no_rnd, vshrn.u16, 1
 
+function ff_avg_h264_qpel8_mc00_neon, export=1
+        mov             r3,  #8
+        .endfunc
+
+        pixfunc  avg_ pixels8,, 1
+
 function ff_put_pixels_clamped_neon, export=1
         vld1.64         {d16-d19}, [r0,:128]!
         vqmovun.s16     d0, q8