comparison arm/mdct_neon.S @ 10172:eda985c53dba libavcodec

ARM: 10l: fix large FFTs
author mru
date Mon, 14 Sep 2009 21:37:41 +0000
parents 8d369aee733f
children 38ab367d4231
comparison
equal deleted inserted replaced
10171:39a7bdbf0d40 10172:eda985c53dba
50 ldr r6, [r3], #4 50 ldr r6, [r3], #4
51 vmul.f32 d4, d0, d3 51 vmul.f32 d4, d0, d3
52 vmul.f32 d5, d17, d3 52 vmul.f32 d5, d17, d3
53 vsub.f32 d4, d6, d4 53 vsub.f32 d4, d6, d4
54 vadd.f32 d5, d5, d7 54 vadd.f32 d5, d5, d7
55 uxtah r8, r1, r6, ror #16 55 uxth r8, r6, ror #16
56 uxtah r6, r1, r6 56 uxth r6, r6
57 add r8, r1, r8, lsl #3
58 add r6, r1, r6, lsl #3
57 beq 1f 59 beq 1f
58 vld2.32 {d16-d17},[r7,:128],r12 60 vld2.32 {d16-d17},[r7,:128],r12
59 vld2.32 {d0-d1}, [r2,:128]! 61 vld2.32 {d0-d1}, [r2,:128]!
60 vrev64.32 d17, d17 62 vrev64.32 d17, d17
61 vld1.32 {d2}, [r4,:64]! 63 vld1.32 {d2}, [r4,:64]!
196 vmul.f32 d4, d0, d3 @ -R*s 198 vmul.f32 d4, d0, d3 @ -R*s
197 vmul.f32 d5, d20, d2 @ I*c 199 vmul.f32 d5, d20, d2 @ I*c
198 subs lr, lr, #16 200 subs lr, lr, #16
199 vsub.f32 d6, d6, d7 @ -R*c-I*s 201 vsub.f32 d6, d6, d7 @ -R*c-I*s
200 vadd.f32 d7, d4, d5 @ -R*s+I*c 202 vadd.f32 d7, d4, d5 @ -R*s+I*c
201 uxtah r10, r1, r6, ror #16 203 uxth r10, r6, ror #16
202 uxtah r6, r1, r6 204 uxth r6, r6
205 add r10, r1, r10, lsl #3
206 add r6, r1, r6, lsl #3
203 beq 1f 207 beq 1f
204 vld2.32 {d16,d18},[r9,:128],r12 @ x,x in4d1,in4d0 208 vld2.32 {d16,d18},[r9,:128],r12 @ x,x in4d1,in4d0
205 vld2.32 {d17,d19},[r8,:128],r12 @ x,x in3d1,in3d0 209 vld2.32 {d17,d19},[r8,:128],r12 @ x,x in3d1,in3d0
206 vneg.f32 d7, d7 @ R*s-I*c 210 vneg.f32 d7, d7 @ R*s-I*c
207 vld2.32 {d20,d21},[r7,:128]! @ in4u0,in4u1 x,x 211 vld2.32 {d20,d21},[r7,:128]! @ in4u0,in4u1 x,x
243 vmul.f32 d4, d0, d3 @ R*s 247 vmul.f32 d4, d0, d3 @ R*s
244 vmul.f32 d5, d20, d2 @ I*c 248 vmul.f32 d5, d20, d2 @ I*c
245 subs lr, lr, #16 249 subs lr, lr, #16
246 vsub.f32 d6, d7, d6 @ I*s-R*c 250 vsub.f32 d6, d7, d6 @ I*s-R*c
247 vadd.f32 d7, d4, d5 @ R*s-I*c 251 vadd.f32 d7, d4, d5 @ R*s-I*c
248 uxtah r10, r1, r6, ror #16 252 uxth r10, r6, ror #16
249 uxtah r6, r1, r6 253 uxth r6, r6
254 add r10, r1, r10, lsl #3
255 add r6, r1, r6, lsl #3
250 beq 1f 256 beq 1f
251 vld2.32 {d16,d18},[r9,:128],r12 @ x,x in2d1,in2d0 257 vld2.32 {d16,d18},[r9,:128],r12 @ x,x in2d1,in2d0
252 vld2.32 {d17,d19},[r8,:128],r12 @ x,x in1d1,in1d0 258 vld2.32 {d17,d19},[r8,:128],r12 @ x,x in1d1,in1d0
253 vld2.32 {d20,d21},[r7,:128]! @ in2u0,in2u1 x,x 259 vld2.32 {d20,d21},[r7,:128]! @ in2u0,in2u1 x,x
254 vrev64.32 q9, q9 @ in2d0,in2d1 in1d0,in1d1 260 vrev64.32 q9, q9 @ in2d0,in2d1 in1d0,in1d1