annotate arm/fft_neon.S @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 6f064ab48463
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
1 /*
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
2 * ARM NEON optimised FFT
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
3 *
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
4 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
5 * Copyright (c) 2009 Naotoshi Nojiri
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
6 *
12188
6f064ab48463 more credits to D. J. Bernstein for fft
lorenm
parents: 12104
diff changeset
7 * This algorithm (though not any of the implementation details) is
6f064ab48463 more credits to D. J. Bernstein for fft
lorenm
parents: 12104
diff changeset
8 * based on libdjbfft by D. J. Bernstein.
6f064ab48463 more credits to D. J. Bernstein for fft
lorenm
parents: 12104
diff changeset
9 *
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
10 * This file is part of FFmpeg.
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
11 *
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
12 * FFmpeg is free software; you can redistribute it and/or
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
13 * modify it under the terms of the GNU Lesser General Public
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
14 * License as published by the Free Software Foundation; either
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
15 * version 2.1 of the License, or (at your option) any later version.
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
16 *
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
17 * FFmpeg is distributed in the hope that it will be useful,
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
20 * Lesser General Public License for more details.
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
21 *
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
22 * You should have received a copy of the GNU Lesser General Public
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
23 * License along with FFmpeg; if not, write to the Free Software
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
24 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
25 */
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
26
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
27 #include "asm.S"
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
28
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
29 #define M_SQRT1_2 0.70710678118654752440
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
30
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
31 .text
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
32
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
33 function fft4_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
34 vld1.32 {d0-d3}, [r0,:128]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
35
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
36 vext.32 q8, q1, q1, #1 @ i2,r3 d3=i3,r2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
37 vsub.f32 d6, d0, d1 @ r0-r1,i0-i1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
38 vsub.f32 d7, d16, d17 @ r3-r2,i2-i3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
39 vadd.f32 d4, d0, d1 @ r0+r1,i0+i1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
40 vadd.f32 d5, d2, d3 @ i2+i3,r2+r3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
41 vadd.f32 d1, d6, d7
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
42 vsub.f32 d3, d6, d7
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
43 vadd.f32 d0, d4, d5
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
44 vsub.f32 d2, d4, d5
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
45
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
46 vst1.32 {d0-d3}, [r0,:128]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
47
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
48 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10346
diff changeset
49 endfunc
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
50
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
51 function fft8_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
52 mov r1, r0
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
53 vld1.32 {d0-d3}, [r1,:128]!
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
54 vld1.32 {d16-d19}, [r1,:128]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
55
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
56 movw r2, #0x04f3 @ sqrt(1/2)
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
57 movt r2, #0x3f35
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
58 eor r3, r2, #1<<31
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
59 vdup.32 d31, r2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
60
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
61 vext.32 q11, q1, q1, #1 @ i2,r3,i3,r2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
62 vadd.f32 d4, d16, d17 @ r4+r5,i4+i5
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
63 vmov d28, r3, r2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
64 vadd.f32 d5, d18, d19 @ r6+r7,i6+i7
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
65 vsub.f32 d17, d16, d17 @ r4-r5,i4-i5
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
66 vsub.f32 d19, d18, d19 @ r6-r7,i6-i7
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
67 vrev64.32 d29, d28
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
68 vadd.f32 d20, d0, d1 @ r0+r1,i0+i1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
69 vadd.f32 d21, d2, d3 @ r2+r3,i2+i3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
70 vmul.f32 d26, d17, d28 @ -a2r*w,a2i*w
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
71 vext.32 q3, q2, q2, #1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
72 vmul.f32 d27, d19, d29 @ a3r*w,-a3i*w
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
73 vsub.f32 d23, d22, d23 @ i2-i3,r3-r2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
74 vsub.f32 d22, d0, d1 @ r0-r1,i0-i1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
75 vmul.f32 d24, d17, d31 @ a2r*w,a2i*w
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
76 vmul.f32 d25, d19, d31 @ a3r*w,a3i*w
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
77 vadd.f32 d0, d20, d21
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
78 vsub.f32 d2, d20, d21
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
79 vadd.f32 d1, d22, d23
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
80 vrev64.32 q13, q13
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
81 vsub.f32 d3, d22, d23
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
82 vsub.f32 d6, d6, d7
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
83 vadd.f32 d24, d24, d26 @ a2r+a2i,a2i-a2r t1,t2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
84 vadd.f32 d25, d25, d27 @ a3r-a3i,a3i+a3r t5,t6
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
85 vadd.f32 d7, d4, d5
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
86 vsub.f32 d18, d2, d6
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
87 vext.32 q13, q12, q12, #1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
88 vadd.f32 d2, d2, d6
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
89 vsub.f32 d16, d0, d7
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
90 vadd.f32 d5, d25, d24
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
91 vsub.f32 d4, d26, d27
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
92 vadd.f32 d0, d0, d7
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
93 vsub.f32 d17, d1, d5
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
94 vsub.f32 d19, d3, d4
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
95 vadd.f32 d3, d3, d4
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
96 vadd.f32 d1, d1, d5
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
97
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
98 vst1.32 {d16-d19}, [r1,:128]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
99 vst1.32 {d0-d3}, [r0,:128]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
100
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
101 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10346
diff changeset
102 endfunc
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
103
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
104 function fft16_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
105 movrel r1, mppm
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
106 vld1.32 {d16-d19}, [r0,:128]! @ q8{r0,i0,r1,i1} q9{r2,i2,r3,i3}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
107 pld [r0, #32]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
108 vld1.32 {d2-d3}, [r1,:128]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
109 vext.32 q13, q9, q9, #1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
110 vld1.32 {d22-d25}, [r0,:128]! @ q11{r4,i4,r5,i5} q12{r6,i5,r7,i7}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
111 vadd.f32 d4, d16, d17
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
112 vsub.f32 d5, d16, d17
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
113 vadd.f32 d18, d18, d19
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
114 vsub.f32 d19, d26, d27
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
115
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
116 vadd.f32 d20, d22, d23
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
117 vsub.f32 d22, d22, d23
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
118 vsub.f32 d23, d24, d25
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
119 vadd.f32 q8, q2, q9 @ {r0,i0,r1,i1}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
120 vadd.f32 d21, d24, d25
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
121 vmul.f32 d24, d22, d2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
122 vsub.f32 q9, q2, q9 @ {r2,i2,r3,i3}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
123 vmul.f32 d25, d23, d3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
124 vuzp.32 d16, d17 @ {r0,r1,i0,i1}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
125 vmul.f32 q1, q11, d2[1]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
126 vuzp.32 d18, d19 @ {r2,r3,i2,i3}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
127 vrev64.32 q12, q12
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
128 vadd.f32 q11, q12, q1 @ {t1a,t2a,t5,t6}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
129 vld1.32 {d24-d27}, [r0,:128]! @ q12{r8,i8,r9,i9} q13{r10,i10,r11,i11}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
130 vzip.32 q10, q11
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
131 vld1.32 {d28-d31}, [r0,:128] @ q14{r12,i12,r13,i13} q15{r14,i14,r15,i15}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
132 vadd.f32 d0, d22, d20
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
133 vadd.f32 d1, d21, d23
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
134 vsub.f32 d2, d21, d23
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
135 vsub.f32 d3, d22, d20
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
136 sub r0, r0, #96
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
137 vext.32 q13, q13, q13, #1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
138 vsub.f32 q10, q8, q0 @ {r4,r5,i4,i5}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
139 vadd.f32 q8, q8, q0 @ {r0,r1,i0,i1}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
140 vext.32 q15, q15, q15, #1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
141 vsub.f32 q11, q9, q1 @ {r6,r7,i6,i7}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
142 vswp d25, d26 @ q12{r8,i8,i10,r11} q13{r9,i9,i11,r10}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
143 vadd.f32 q9, q9, q1 @ {r2,r3,i2,i3}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
144 vswp d29, d30 @ q14{r12,i12,i14,r15} q15{r13,i13,i15,r14}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
145 vadd.f32 q0, q12, q13 @ {t1,t2,t5,t6}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
146 vadd.f32 q1, q14, q15 @ {t1a,t2a,t5a,t6a}
10346
f12b7ea2df2a ARM: apply extern symbol prefix where needed
mru
parents: 10172
diff changeset
147 movrel r2, X(ff_cos_16)
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
148 vsub.f32 q13, q12, q13 @ {t3,t4,t7,t8}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
149 vrev64.32 d1, d1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
150 vsub.f32 q15, q14, q15 @ {t3a,t4a,t7a,t8a}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
151 vrev64.32 d3, d3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
152 movrel r3, pmmp
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
153 vswp d1, d26 @ q0{t1,t2,t3,t4} q13{t6,t5,t7,t8}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
154 vswp d3, d30 @ q1{t1a,t2a,t3a,t4a} q15{t6a,t5a,t7a,t8a}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
155 vadd.f32 q12, q0, q13 @ {r8,i8,r9,i9}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
156 vadd.f32 q14, q1, q15 @ {r12,i12,r13,i13}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
157 vld1.32 {d4-d5}, [r2,:64]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
158 vsub.f32 q13, q0, q13 @ {r10,i10,r11,i11}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
159 vsub.f32 q15, q1, q15 @ {r14,i14,r15,i15}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
160 vswp d25, d28 @ q12{r8,i8,r12,i12} q14{r9,i9,r13,i13}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
161 vld1.32 {d6-d7}, [r3,:128]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
162 vrev64.32 q1, q14
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
163 vmul.f32 q14, q14, d4[1]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
164 vmul.f32 q1, q1, q3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
165 vmla.f32 q14, q1, d5[1] @ {t1a,t2a,t5a,t6a}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
166 vswp d27, d30 @ q13{r10,i10,r14,i14} q15{r11,i11,r15,i15}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
167 vzip.32 q12, q14
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
168 vadd.f32 d0, d28, d24
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
169 vadd.f32 d1, d25, d29
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
170 vsub.f32 d2, d25, d29
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
171 vsub.f32 d3, d28, d24
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
172 vsub.f32 q12, q8, q0 @ {r8,r9,i8,i9}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
173 vadd.f32 q8, q8, q0 @ {r0,r1,i0,i1}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
174 vsub.f32 q14, q10, q1 @ {r12,r13,i12,i13}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
175 mov r1, #32
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
176 vadd.f32 q10, q10, q1 @ {r4,r5,i4,i5}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
177 vrev64.32 q0, q13
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
178 vmul.f32 q13, q13, d5[0]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
179 vrev64.32 q1, q15
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
180 vmul.f32 q15, q15, d5[1]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
181 vst2.32 {d16-d17},[r0,:128], r1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
182 vmul.f32 q0, q0, q3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
183 vst2.32 {d20-d21},[r0,:128], r1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
184 vmul.f32 q1, q1, q3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
185 vmla.f32 q13, q0, d5[0] @ {t1,t2,t5,t6}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
186 vmla.f32 q15, q1, d4[1] @ {t1a,t2a,t5a,t6a}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
187 vst2.32 {d24-d25},[r0,:128], r1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
188 vst2.32 {d28-d29},[r0,:128]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
189 vzip.32 q13, q15
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
190 sub r0, r0, #80
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
191 vadd.f32 d0, d30, d26
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
192 vadd.f32 d1, d27, d31
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
193 vsub.f32 d2, d27, d31
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
194 vsub.f32 d3, d30, d26
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
195 vsub.f32 q13, q9, q0 @ {r10,r11,i10,i11}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
196 vadd.f32 q9, q9, q0 @ {r2,r3,i2,i3}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
197 vsub.f32 q15, q11, q1 @ {r14,r15,i14,i15}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
198 vadd.f32 q11, q11, q1 @ {r6,r7,i6,i7}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
199 vst2.32 {d18-d19},[r0,:128], r1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
200 vst2.32 {d22-d23},[r0,:128], r1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
201 vst2.32 {d26-d27},[r0,:128], r1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
202 vst2.32 {d30-d31},[r0,:128]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
203 bx lr
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10346
diff changeset
204 endfunc
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
205
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
206 function fft_pass_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
207 push {r4-r6,lr}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
208 mov r6, r2 @ n
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
209 lsl r5, r2, #3 @ 2 * n * sizeof FFTSample
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
210 lsl r4, r2, #4 @ 2 * n * sizeof FFTComplex
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
211 lsl r2, r2, #5 @ 4 * n * sizeof FFTComplex
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
212 add r3, r2, r4
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
213 add r4, r4, r0 @ &z[o1]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
214 add r2, r2, r0 @ &z[o2]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
215 add r3, r3, r0 @ &z[o3]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
216 vld1.32 {d20-d21},[r2,:128] @ {z[o2],z[o2+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
217 movrel r12, pmmp
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
218 vld1.32 {d22-d23},[r3,:128] @ {z[o3],z[o3+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
219 add r5, r5, r1 @ wim
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
220 vld1.32 {d6-d7}, [r12,:128] @ pmmp
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
221 vswp d21, d22
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
222 vld1.32 {d4}, [r1,:64]! @ {wre[0],wre[1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
223 sub r5, r5, #4 @ wim--
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
224 vrev64.32 q1, q11
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
225 vmul.f32 q11, q11, d4[1]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
226 vmul.f32 q1, q1, q3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
227 vld1.32 {d5[0]}, [r5,:32] @ d5[0] = wim[-1]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
228 vmla.f32 q11, q1, d5[0] @ {t1a,t2a,t5a,t6a}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
229 vld2.32 {d16-d17},[r0,:128] @ {z[0],z[1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
230 sub r6, r6, #1 @ n--
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
231 vld2.32 {d18-d19},[r4,:128] @ {z[o1],z[o1+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
232 vzip.32 q10, q11
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
233 vadd.f32 d0, d22, d20
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
234 vadd.f32 d1, d21, d23
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
235 vsub.f32 d2, d21, d23
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
236 vsub.f32 d3, d22, d20
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
237 vsub.f32 q10, q8, q0
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
238 vadd.f32 q8, q8, q0
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
239 vsub.f32 q11, q9, q1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
240 vadd.f32 q9, q9, q1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
241 vst2.32 {d20-d21},[r2,:128]! @ {z[o2],z[o2+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
242 vst2.32 {d16-d17},[r0,:128]! @ {z[0],z[1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
243 vst2.32 {d22-d23},[r3,:128]! @ {z[o3],z[o3+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
244 vst2.32 {d18-d19},[r4,:128]! @ {z[o1],z[o1+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
245 sub r5, r5, #8 @ wim -= 2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
246 1:
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
247 vld1.32 {d20-d21},[r2,:128] @ {z[o2],z[o2+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
248 vld1.32 {d22-d23},[r3,:128] @ {z[o3],z[o3+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
249 vswp d21, d22
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
250 vld1.32 {d4}, [r1]! @ {wre[0],wre[1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
251 vrev64.32 q0, q10
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
252 vmul.f32 q10, q10, d4[0]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
253 vrev64.32 q1, q11
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
254 vmul.f32 q11, q11, d4[1]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
255 vld1.32 {d5}, [r5] @ {wim[-1],wim[0]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
256 vmul.f32 q0, q0, q3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
257 sub r5, r5, #8 @ wim -= 2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
258 vmul.f32 q1, q1, q3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
259 vmla.f32 q10, q0, d5[1] @ {t1,t2,t5,t6}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
260 vmla.f32 q11, q1, d5[0] @ {t1a,t2a,t5a,t6a}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
261 vld2.32 {d16-d17},[r0,:128] @ {z[0],z[1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
262 subs r6, r6, #1 @ n--
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
263 vld2.32 {d18-d19},[r4,:128] @ {z[o1],z[o1+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
264 vzip.32 q10, q11
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
265 vadd.f32 d0, d22, d20
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
266 vadd.f32 d1, d21, d23
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
267 vsub.f32 d2, d21, d23
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
268 vsub.f32 d3, d22, d20
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
269 vsub.f32 q10, q8, q0
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
270 vadd.f32 q8, q8, q0
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
271 vsub.f32 q11, q9, q1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
272 vadd.f32 q9, q9, q1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
273 vst2.32 {d20-d21}, [r2,:128]! @ {z[o2],z[o2+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
274 vst2.32 {d16-d17}, [r0,:128]! @ {z[0],z[1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
275 vst2.32 {d22-d23}, [r3,:128]! @ {z[o3],z[o3+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
276 vst2.32 {d18-d19}, [r4,:128]! @ {z[o1],z[o1+1]}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
277 bne 1b
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
278
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
279 pop {r4-r6,pc}
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10346
diff changeset
280 endfunc
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
281
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
282 .macro def_fft n, n2, n4
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
283 .align 6
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
284 function fft\n\()_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
285 push {r4, lr}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
286 mov r4, r0
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
287 bl fft\n2\()_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
288 add r0, r4, #\n4*2*8
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
289 bl fft\n4\()_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
290 add r0, r4, #\n4*3*8
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
291 bl fft\n4\()_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
292 mov r0, r4
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
293 pop {r4, lr}
10346
f12b7ea2df2a ARM: apply extern symbol prefix where needed
mru
parents: 10172
diff changeset
294 movrel r1, X(ff_cos_\n)
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
295 mov r2, #\n4/2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
296 b fft_pass_neon
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10346
diff changeset
297 endfunc
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
298 .endm
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
299
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
300 def_fft 32, 16, 8
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
301 def_fft 64, 32, 16
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
302 def_fft 128, 64, 32
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
303 def_fft 256, 128, 64
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
304 def_fft 512, 256, 128
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
305 def_fft 1024, 512, 256
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
306 def_fft 2048, 1024, 512
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
307 def_fft 4096, 2048, 1024
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
308 def_fft 8192, 4096, 2048
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
309 def_fft 16384, 8192, 4096
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
310 def_fft 32768, 16384, 8192
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
311 def_fft 65536, 32768, 16384
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
312
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
313 function ff_fft_calc_neon, export=1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
314 ldr r2, [r0]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
315 sub r2, r2, #2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
316 movrel r3, fft_tab_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
317 ldr r3, [r3, r2, lsl #2]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
318 mov r0, r1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
319 bx r3
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10346
diff changeset
320 endfunc
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
321
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
322 function ff_fft_permute_neon, export=1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
323 push {r4,lr}
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
324 mov r12, #1
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
325 ldr r2, [r0] @ nbits
12047
c80c7a717156 Remove vestiges of radix-2 FFT
mru
parents: 11443
diff changeset
326 ldr r3, [r0, #12] @ tmp_buf
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
327 ldr r0, [r0, #8] @ revtab
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
328 lsl r12, r12, r2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
329 mov r2, r12
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
330 1:
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
331 vld1.32 {d0-d1}, [r1,:128]!
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
332 ldr r4, [r0], #4
10172
eda985c53dba ARM: 10l: fix large FFTs
mru
parents: 10153
diff changeset
333 uxth lr, r4
eda985c53dba ARM: 10l: fix large FFTs
mru
parents: 10153
diff changeset
334 uxth r4, r4, ror #16
eda985c53dba ARM: 10l: fix large FFTs
mru
parents: 10153
diff changeset
335 add lr, r3, lr, lsl #3
eda985c53dba ARM: 10l: fix large FFTs
mru
parents: 10153
diff changeset
336 add r4, r3, r4, lsl #3
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
337 vst1.32 {d0}, [lr,:64]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
338 vst1.32 {d1}, [r4,:64]
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
339 subs r12, r12, #2
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
340 bgt 1b
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
341
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
342 sub r1, r1, r2, lsl #3
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
343 1:
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
344 vld1.32 {d0-d3}, [r3,:128]!
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
345 vst1.32 {d0-d3}, [r1,:128]!
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
346 subs r2, r2, #4
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
347 bgt 1b
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
348
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
349 pop {r4,pc}
11443
361a5fcb4393 ARM: set size of asm functions in object files
mru
parents: 10346
diff changeset
350 endfunc
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
351
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
352 .section .rodata
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
353 .align 4
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
354 fft_tab_neon:
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
355 .word fft4_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
356 .word fft8_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
357 .word fft16_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
358 .word fft32_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
359 .word fft64_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
360 .word fft128_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
361 .word fft256_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
362 .word fft512_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
363 .word fft1024_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
364 .word fft2048_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
365 .word fft4096_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
366 .word fft8192_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
367 .word fft16384_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
368 .word fft32768_neon
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
369 .word fft65536_neon
12104
2a6873ee2fc9 ARM: hide a .size directive on non-ELF targets
mru
parents: 12047
diff changeset
370 ELF .size fft_tab_neon, . - fft_tab_neon
10153
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
371
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
372 .align 4
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
373 pmmp: .float +1.0, -1.0, -1.0, +1.0
7a63015e4627 ARM: NEON optimised FFT and MDCT
mru
parents:
diff changeset
374 mppm: .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, -M_SQRT1_2