changeset 10385:bc98e5724513 libavcodec

ARM: align stack in NEON h264 mc functions A certain rotten fruit operating system doesn't provide the 8-byte stack alignment required by the standard ARM ABI, so align it manually.
author mru
date Sun, 11 Oct 2009 16:16:08 +0000
parents c44205b868dc
children 98501365c3aa
files arm/h264dsp_neon.S
diffstat 1 files changed, 18 insertions(+), 15 deletions(-) [+]
line wrap: on
line diff
--- a/arm/h264dsp_neon.S	Sun Oct 11 10:15:48 2009 +0000
+++ b/arm/h264dsp_neon.S	Sun Oct 11 16:16:08 2009 +0000
@@ -1064,9 +1064,11 @@
         .endfunc
 
 function ff_put_h264_qpel8_mc11_neon, export=1
-        push            {r0, r1, r2, lr}
+        push            {r0, r1, r11, lr}
 put_h264_qpel8_mc11:
         lowpass_const   r3
+        mov             r11, sp
+        bic             sp,  sp,  #15
         sub             sp,  sp,  #64
         mov             r0,  sp
         sub             r1,  r1,  #2
@@ -1074,15 +1076,15 @@
         mov             ip,  #8
         vpush           {d8-d15}
         bl              put_h264_qpel8_h_lowpass_neon
-        ldrd            r0,  [sp, #128]
+        ldrd            r0,  [r11]
         mov             r3,  r2
         add             ip,  sp,  #64
         sub             r1,  r1,  r2, lsl #1
         mov             r2,  #8
         bl              put_h264_qpel8_v_lowpass_l2_neon
         vpop            {d8-d15}
-        add             sp,  sp,  #76
-        pop             {pc}
+        add             sp,  r11, #8
+        pop             {r11, pc}
         .endfunc
 
 function ff_put_h264_qpel8_mc21_neon, export=1
@@ -1112,7 +1114,7 @@
 
 function ff_put_h264_qpel8_mc31_neon, export=1
         add             r1,  r1,  #1
-        push            {r0, r1, r2, lr}
+        push            {r0, r1, r11, lr}
         sub             r1,  r1,  #1
         b               put_h264_qpel8_mc11
         .endfunc
@@ -1181,7 +1183,7 @@
         .endfunc
 
 function ff_put_h264_qpel8_mc13_neon, export=1
-        push            {r0, r1, r2, lr}
+        push            {r0, r1, r11, lr}
         add             r1,  r1,  r2
         b               put_h264_qpel8_mc11
         .endfunc
@@ -1194,7 +1196,7 @@
 
 function ff_put_h264_qpel8_mc33_neon, export=1
         add             r1,  r1,  #1
-        push            {r0, r1, r2, lr}
+        push            {r0, r1, r11, lr}
         add             r1,  r1,  r2
         sub             r1,  r1,  #1
         b               put_h264_qpel8_mc11
@@ -1235,25 +1237,26 @@
         .endfunc
 
 function ff_put_h264_qpel16_mc11_neon, export=1
-        push            {r0, r1, r4, lr}
+        push            {r0, r1, r4, r11, lr}
 put_h264_qpel16_mc11:
         lowpass_const   r3
+        mov             r11, sp
+        bic             sp,  sp,  #15
         sub             sp,  sp,  #256
         mov             r0,  sp
         sub             r1,  r1,  #2
         mov             r3,  #16
         vpush           {d8-d15}
         bl              put_h264_qpel16_h_lowpass_neon
-        add             r0,  sp,  #256
-        ldrd            r0,  [r0, #64]
+        ldrd            r0,  [r11]
         mov             r3,  r2
         add             ip,  sp,  #64
         sub             r1,  r1,  r2, lsl #1
         mov             r2,  #16
         bl              put_h264_qpel16_v_lowpass_l2_neon
         vpop            {d8-d15}
-        add             sp,  sp,  #(256+8)
-        pop             {r4, pc}
+        add             sp,  r11, #8
+        pop             {r4, r11, pc}
         .endfunc
 
 function ff_put_h264_qpel16_mc21_neon, export=1
@@ -1280,7 +1283,7 @@
 
 function ff_put_h264_qpel16_mc31_neon, export=1
         add             r1,  r1,  #1
-        push            {r0, r1, r4, lr}
+        push            {r0, r1, r4, r11, lr}
         sub             r1,  r1,  #1
         b               put_h264_qpel16_mc11
         .endfunc
@@ -1349,7 +1352,7 @@
         .endfunc
 
 function ff_put_h264_qpel16_mc13_neon, export=1
-        push            {r0, r1, r4, lr}
+        push            {r0, r1, r4, r11, lr}
         add             r1,  r1,  r2
         b               put_h264_qpel16_mc11
         .endfunc
@@ -1362,7 +1365,7 @@
 
 function ff_put_h264_qpel16_mc33_neon, export=1
         add             r1,  r1,  #1
-        push            {r0, r1, r4, lr}
+        push            {r0, r1, r4, r11, lr}
         add             r1,  r1,  r2
         sub             r1,  r1,  #1
         b               put_h264_qpel16_mc11