annotate arm/rdft_neon.S @ 12483:0159a19bfff7 libavcodec

aacdec: Rework channel mapping compatibility hacks. For a PCE based configuration map the channels solely based on tags. For an indexed configuration map the channels solely based on position. This works with all known exotic samples including al17, elem_id0, bad_concat, and lfe_is_sce.
author alexc
date Fri, 10 Sep 2010 18:01:48 +0000
parents 7ad2eb6a2f10
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11532
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
1 /*
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
2 * ARM NEON optimised RDFT
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
3 * Copyright (c) 2009 Mans Rullgard <mans@mansr.com>
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
4 *
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
5 * This file is part of FFmpeg.
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
6 *
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
11 *
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
15 * Lesser General Public License for more details.
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
16 *
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
20 */
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
21
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
22 #include "asm.S"
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
23
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
24 preserve8
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
25
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
26 function ff_rdft_calc_neon, export=1
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
27 push {r4-r8,lr}
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
28
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
29 ldr r6, [r0, #4] @ inverse
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
30 mov r4, r0
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
31 mov r5, r1
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
32
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
33 lsls r6, r6, #31
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
34 bne 1f
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
35 add r0, r4, #20
11655
7ad2eb6a2f10 ARM: fix build for darwin/iphone
mru
parents: 11532
diff changeset
36 bl X(ff_fft_permute_neon)
11532
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
37 add r0, r4, #20
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
38 mov r1, r5
11655
7ad2eb6a2f10 ARM: fix build for darwin/iphone
mru
parents: 11532
diff changeset
39 bl X(ff_fft_calc_neon)
11532
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
40 1:
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
41 ldr r12, [r4, #0] @ nbits
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
42 mov r2, #1
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
43 lsl r12, r2, r12
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
44 add r0, r5, #8
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
45 add r1, r5, r12, lsl #2
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
46 lsr r12, r12, #2
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
47 ldr r2, [r4, #12] @ tcos
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
48 sub r12, r12, #2
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
49 ldr r3, [r4, #16] @ tsin
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
50 mov r7, r0
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
51 sub r1, r1, #8
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
52 mov lr, r1
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
53 mov r8, #-8
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
54 vld1.32 {d0}, [r0,:64]! @ d1[0,1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
55 vld1.32 {d1}, [r1,:64], r8 @ d2[0,1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
56 vld1.32 {d4}, [r2,:64]! @ tcos[i]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
57 vld1.32 {d5}, [r3,:64]! @ tsin[i]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
58 vmov.f32 d18, #0.5 @ k1
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
59 vdup.32 d19, r6
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
60 pld [r0, #32]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
61 veor d19, d18, d19 @ k2
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
62 vmov.i32 d16, #0
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
63 vmov.i32 d17, #1<<31
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
64 pld [r1, #-32]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
65 vtrn.32 d16, d17
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
66 pld [r2, #32]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
67 vrev64.32 d16, d16 @ d16=1,0 d17=0,1
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
68 pld [r3, #32]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
69 2:
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
70 veor q1, q0, q8 @ -d1[0],d1[1], d2[0],-d2[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
71 vld1.32 {d24}, [r0,:64]! @ d1[0,1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
72 vadd.f32 d0, d0, d3 @ d1[0]+d2[0], d1[1]-d2[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
73 vld1.32 {d25}, [r1,:64], r8 @ d2[0,1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
74 vadd.f32 d1, d2, d1 @ -d1[0]+d2[0], d1[1]+d2[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
75 veor q3, q12, q8 @ -d1[0],d1[1], d2[0],-d2[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
76 pld [r0, #32]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
77 vmul.f32 q10, q0, q9 @ ev.re, ev.im, od.im, od.re
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
78 pld [r1, #-32]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
79 vadd.f32 d0, d24, d7 @ d1[0]+d2[0], d1[1]-d2[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
80 vadd.f32 d1, d6, d25 @ -d1[0]+d2[0], d1[1]+d2[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
81 vmul.f32 q11, q0, q9 @ ev.re, ev.im, od.im, od.re
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
82 veor d7, d21, d16 @ -od.im, od.re
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
83 vrev64.32 d3, d21 @ od.re, od.im
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
84 veor d6, d20, d17 @ ev.re,-ev.im
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
85 veor d2, d3, d16 @ -od.re, od.im
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
86 vmla.f32 d20, d3, d4[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
87 vmla.f32 d20, d7, d5[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
88 vmla.f32 d6, d2, d4[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
89 vmla.f32 d6, d21, d5[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
90 vld1.32 {d4}, [r2,:64]! @ tcos[i]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
91 veor d7, d23, d16 @ -od.im, od.re
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
92 vld1.32 {d5}, [r3,:64]! @ tsin[i]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
93 veor d24, d22, d17 @ ev.re,-ev.im
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
94 vrev64.32 d3, d23 @ od.re, od.im
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
95 pld [r2, #32]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
96 veor d2, d3, d16 @ -od.re, od.im
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
97 pld [r3, #32]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
98 vmla.f32 d22, d3, d4[0]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
99 vmla.f32 d22, d7, d5[0]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
100 vmla.f32 d24, d2, d4[0]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
101 vmla.f32 d24, d23, d5[0]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
102 vld1.32 {d0}, [r0,:64]! @ d1[0,1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
103 vld1.32 {d1}, [r1,:64], r8 @ d2[0,1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
104 vst1.32 {d20}, [r7,:64]!
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
105 vst1.32 {d6}, [lr,:64], r8
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
106 vst1.32 {d22}, [r7,:64]!
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
107 vst1.32 {d24}, [lr,:64], r8
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
108 subs r12, r12, #2
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
109 bgt 2b
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
110
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
111 veor q1, q0, q8 @ -d1[0],d1[1], d2[0],-d2[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
112 vadd.f32 d0, d0, d3 @ d1[0]+d2[0], d1[1]-d2[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
113 vadd.f32 d1, d2, d1 @ -d1[0]+d2[0], d1[1]+d2[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
114 ldr r2, [r4, #8] @ sign_convention
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
115 vmul.f32 q10, q0, q9 @ ev.re, ev.im, od.im, od.re
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
116 add r0, r0, #4
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
117 bfc r2, #0, #31
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
118 vld1.32 {d0[0]}, [r0,:32]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
119 veor d7, d21, d16 @ -od.im, od.re
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
120 vrev64.32 d3, d21 @ od.re, od.im
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
121 veor d6, d20, d17 @ ev.re,-ev.im
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
122 vld1.32 {d22}, [r5,:64]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
123 vdup.32 d1, r2
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
124 vmov d23, d22
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
125 veor d2, d3, d16 @ -od.re, od.im
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
126 vtrn.32 d22, d23
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
127 veor d0, d0, d1
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
128 veor d23, d23, d17
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
129 vmla.f32 d20, d3, d4[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
130 vmla.f32 d20, d7, d5[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
131 vmla.f32 d6, d2, d4[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
132 vmla.f32 d6, d21, d5[1]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
133 vadd.f32 d22, d22, d23
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
134 vst1.32 {d20}, [r7,:64]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
135 vst1.32 {d6}, [lr,:64]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
136 vst1.32 {d0[0]}, [r0,:32]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
137 vst1.32 {d22}, [r5,:64]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
138
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
139 cmp r6, #0
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
140 popeq {r4-r8,pc}
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
141
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
142 vmul.f32 d22, d22, d18
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
143 vst1.32 {d22}, [r5,:64]
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
144 add r0, r4, #20
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
145 mov r1, r5
11655
7ad2eb6a2f10 ARM: fix build for darwin/iphone
mru
parents: 11532
diff changeset
146 bl X(ff_fft_permute_neon)
11532
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
147 add r0, r4, #20
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
148 mov r1, r5
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
149 pop {r4-r8,lr}
11655
7ad2eb6a2f10 ARM: fix build for darwin/iphone
mru
parents: 11532
diff changeset
150 b X(ff_fft_calc_neon)
11532
e011e73a902b ARM: NEON optimised RDFT
mru
parents:
diff changeset
151 endfunc