changeset 11114:852772c36cc6 libavcodec

ARMv6 optimised diff_pixels
author mru
date Tue, 09 Feb 2010 16:13:41 +0000
parents b529129c4563
children 3fba8a5c6288
files arm/dsputil_armv6.S arm/dsputil_init_armv6.c
diffstat 2 files changed, 36 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/arm/dsputil_armv6.S	Tue Feb 09 16:13:38 2010 +0000
+++ b/arm/dsputil_armv6.S	Tue Feb 09 16:13:41 2010 +0000
@@ -309,6 +309,39 @@
         pop             {r4-r8, pc}
 .endfunc
 
+function ff_diff_pixels_armv6, export=1
+        pld             [r1, r3]
+        pld             [r2, r3]
+        push            {r4-r9, lr}
+        mov             lr,  #8
+1:
+        ldrd            r4,  r5,  [r1],  r3
+        ldrd            r6,  r7,  [r2],  r3
+        uxtb16          r8,  r4
+        uxtb16          r4,  r4,  ror #8
+        uxtb16          r9,  r6
+        uxtb16          r6,  r6,  ror #8
+        pld             [r1, r3]
+        ssub16          r9,  r8,  r9
+        ssub16          r6,  r4,  r6
+        uxtb16          r8,  r5
+        uxtb16          r5,  r5,  ror #8
+        pld             [r2, r3]
+        pkhbt           r4,  r9,  r6,  lsl #16
+        pkhtb           r6,  r6,  r9,  asr #16
+        uxtb16          r9,  r7
+        uxtb16          r7,  r7,  ror #8
+        ssub16          r9,  r8,  r9
+        ssub16          r5,  r5,  r7
+        subs            lr,  lr,  #1
+        pkhbt           r8,  r9,  r5,  lsl #16
+        pkhtb           r9,  r5,  r9,  asr #16
+        stm             r0!, {r4,r6,r8,r9}
+        bgt             1b
+
+        pop             {r4-r9, pc}
+.endfunc
+
 function ff_pix_abs16_armv6, export=1
         ldr             r0,  [sp]
         push            {r4-r9, lr}
--- a/arm/dsputil_init_armv6.c	Tue Feb 09 16:13:38 2010 +0000
+++ b/arm/dsputil_init_armv6.c	Tue Feb 09 16:13:41 2010 +0000
@@ -51,6 +51,8 @@
                                  int line_size);
 
 void ff_get_pixels_armv6(DCTELEM *block, const uint8_t *pixels, int stride);
+void ff_diff_pixels_armv6(DCTELEM *block, const uint8_t *s1,
+                          const uint8_t *s2, int stride);
 
 int ff_pix_abs16_armv6(void *s, uint8_t *blk1, uint8_t *blk2,
                        int line_size, int h);
@@ -95,6 +97,7 @@
 
     c->add_pixels_clamped = ff_add_pixels_clamped_armv6;
     c->get_pixels = ff_get_pixels_armv6;
+    c->diff_pixels = ff_diff_pixels_armv6;
 
     c->pix_abs[0][0] = ff_pix_abs16_armv6;
     c->pix_abs[0][1] = ff_pix_abs16_x2_armv6;