Mercurial > libavcodec.hg
comparison arm/mdct_neon.S @ 10172:eda985c53dba libavcodec
ARM: 10l: fix large FFTs
author | mru |
---|---|
date | Mon, 14 Sep 2009 21:37:41 +0000 |
parents | 8d369aee733f |
children | 38ab367d4231 |
comparison
equal
deleted
inserted
replaced
10171:39a7bdbf0d40 | 10172:eda985c53dba |
---|---|
50 ldr r6, [r3], #4 | 50 ldr r6, [r3], #4 |
51 vmul.f32 d4, d0, d3 | 51 vmul.f32 d4, d0, d3 |
52 vmul.f32 d5, d17, d3 | 52 vmul.f32 d5, d17, d3 |
53 vsub.f32 d4, d6, d4 | 53 vsub.f32 d4, d6, d4 |
54 vadd.f32 d5, d5, d7 | 54 vadd.f32 d5, d5, d7 |
55 uxtah r8, r1, r6, ror #16 | 55 uxth r8, r6, ror #16 |
56 uxtah r6, r1, r6 | 56 uxth r6, r6 |
57 add r8, r1, r8, lsl #3 | |
58 add r6, r1, r6, lsl #3 | |
57 beq 1f | 59 beq 1f |
58 vld2.32 {d16-d17},[r7,:128],r12 | 60 vld2.32 {d16-d17},[r7,:128],r12 |
59 vld2.32 {d0-d1}, [r2,:128]! | 61 vld2.32 {d0-d1}, [r2,:128]! |
60 vrev64.32 d17, d17 | 62 vrev64.32 d17, d17 |
61 vld1.32 {d2}, [r4,:64]! | 63 vld1.32 {d2}, [r4,:64]! |
196 vmul.f32 d4, d0, d3 @ -R*s | 198 vmul.f32 d4, d0, d3 @ -R*s |
197 vmul.f32 d5, d20, d2 @ I*c | 199 vmul.f32 d5, d20, d2 @ I*c |
198 subs lr, lr, #16 | 200 subs lr, lr, #16 |
199 vsub.f32 d6, d6, d7 @ -R*c-I*s | 201 vsub.f32 d6, d6, d7 @ -R*c-I*s |
200 vadd.f32 d7, d4, d5 @ -R*s+I*c | 202 vadd.f32 d7, d4, d5 @ -R*s+I*c |
201 uxtah r10, r1, r6, ror #16 | 203 uxth r10, r6, ror #16 |
202 uxtah r6, r1, r6 | 204 uxth r6, r6 |
205 add r10, r1, r10, lsl #3 | |
206 add r6, r1, r6, lsl #3 | |
203 beq 1f | 207 beq 1f |
204 vld2.32 {d16,d18},[r9,:128],r12 @ x,x in4d1,in4d0 | 208 vld2.32 {d16,d18},[r9,:128],r12 @ x,x in4d1,in4d0 |
205 vld2.32 {d17,d19},[r8,:128],r12 @ x,x in3d1,in3d0 | 209 vld2.32 {d17,d19},[r8,:128],r12 @ x,x in3d1,in3d0 |
206 vneg.f32 d7, d7 @ R*s-I*c | 210 vneg.f32 d7, d7 @ R*s-I*c |
207 vld2.32 {d20,d21},[r7,:128]! @ in4u0,in4u1 x,x | 211 vld2.32 {d20,d21},[r7,:128]! @ in4u0,in4u1 x,x |
243 vmul.f32 d4, d0, d3 @ R*s | 247 vmul.f32 d4, d0, d3 @ R*s |
244 vmul.f32 d5, d20, d2 @ I*c | 248 vmul.f32 d5, d20, d2 @ I*c |
245 subs lr, lr, #16 | 249 subs lr, lr, #16 |
246 vsub.f32 d6, d7, d6 @ I*s-R*c | 250 vsub.f32 d6, d7, d6 @ I*s-R*c |
247 vadd.f32 d7, d4, d5 @ R*s-I*c | 251 vadd.f32 d7, d4, d5 @ R*s-I*c |
248 uxtah r10, r1, r6, ror #16 | 252 uxth r10, r6, ror #16 |
249 uxtah r6, r1, r6 | 253 uxth r6, r6 |
254 add r10, r1, r10, lsl #3 | |
255 add r6, r1, r6, lsl #3 | |
250 beq 1f | 256 beq 1f |
251 vld2.32 {d16,d18},[r9,:128],r12 @ x,x in2d1,in2d0 | 257 vld2.32 {d16,d18},[r9,:128],r12 @ x,x in2d1,in2d0 |
252 vld2.32 {d17,d19},[r8,:128],r12 @ x,x in1d1,in1d0 | 258 vld2.32 {d17,d19},[r8,:128],r12 @ x,x in1d1,in1d0 |
253 vld2.32 {d20,d21},[r7,:128]! @ in2u0,in2u1 x,x | 259 vld2.32 {d20,d21},[r7,:128]! @ in2u0,in2u1 x,x |
254 vrev64.32 q9, q9 @ in2d0,in2d1 in1d0,in1d1 | 260 vrev64.32 q9, q9 @ in2d0,in2d1 in1d0,in1d1 |