annotate ppc/fft_altivec_s.S @ 12530:63edd10ad4bc libavcodec tip

Try to fix crashes introduced by r25218 r25218 made assumptions about the existence of past reference frames that weren't necessarily true.
author darkshikari
date Tue, 28 Sep 2010 09:06:22 +0000
parents 6f064ab48463
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
1 /*
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
2 * FFT transform with Altivec optimizations
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
3 * Copyright (c) 2009 Loren Merritt
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
4 *
12188
6f064ab48463 more credits to D. J. Bernstein for fft
lorenm
parents: 12089
diff changeset
5 * This algorithm (though not any of the implementation details) is
6f064ab48463 more credits to D. J. Bernstein for fft
lorenm
parents: 12089
diff changeset
6 * based on libdjbfft by D. J. Bernstein.
6f064ab48463 more credits to D. J. Bernstein for fft
lorenm
parents: 12089
diff changeset
7 *
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
8 * This file is part of FFmpeg.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
9 *
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
10 * FFmpeg is free software; you can redistribute it and/or
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
11 * modify it under the terms of the GNU Lesser General Public
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
12 * License as published by the Free Software Foundation; either
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
13 * version 2.1 of the License, or (at your option) any later version.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
14 *
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
15 * FFmpeg is distributed in the hope that it will be useful,
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
18 * Lesser General Public License for more details.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
19 *
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
20 * You should have received a copy of the GNU Lesser General Public
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
21 * License along with FFmpeg; if not, write to the Free Software
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
23 */
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
24
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
25 /*
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
26 * These functions are not individually interchangeable with the C versions.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
27 * While C takes arrays of FFTComplex, Altivec leaves intermediate results
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
28 * in blocks as convenient to the vector size.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
29 * i.e. {4x real, 4x imaginary, 4x real, ...}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
30 *
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
31 * I ignore standard calling convention.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
32 * Instead, the following registers are treated as global constants:
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
33 * v14: zero
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
34 * v15..v18: cosines
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
35 * v19..v29: permutations
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
36 * r9: 16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
37 * r12: ff_cos_tabs
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
38 * and the rest are free for local use.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
39 */
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
40
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
41 #include "config.h"
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
42 #include "asm.S"
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
43
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
44 .text
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
45
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
46 .macro addi2 ra, imm // add 32-bit immediate
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
47 .if \imm & 0xffff
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
48 addi \ra, \ra, \imm@l
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
49 .endif
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
50 .if (\imm+0x8000)>>16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
51 addis \ra, \ra, \imm@ha
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
52 .endif
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
53 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
54
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
55 .macro FFT4 a0, a1, a2, a3 // in:0-1 out:2-3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
56 vperm \a2,\a0,\a1,v20 // vcprm(0,1,s2,s1) // {r0,i0,r3,i2}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
57 vperm \a3,\a0,\a1,v21 // vcprm(2,3,s0,s3) // {r1,i1,r2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
58 vaddfp \a0,\a2,\a3 // {t1,t2,t6,t5}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
59 vsubfp \a1,\a2,\a3 // {t3,t4,t8,t7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
60 vmrghw \a2,\a0,\a1 // vcprm(0,s0,1,s1) // {t1,t3,t2,t4}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
61 vperm \a3,\a0,\a1,v22 // vcprm(2,s3,3,s2) // {t6,t7,t5,t8}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
62 vaddfp \a0,\a2,\a3 // {r0,r1,i0,i1}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
63 vsubfp \a1,\a2,\a3 // {r2,r3,i2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
64 vperm \a2,\a0,\a1,v23 // vcprm(0,1,s0,s1) // {r0,r1,r2,r3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
65 vperm \a3,\a0,\a1,v24 // vcprm(2,3,s2,s3) // {i0,i1,i2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
66 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
67
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
68 .macro FFT4x2 a0, a1, b0, b1, a2, a3, b2, b3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
69 vperm \a2,\a0,\a1,v20 // vcprm(0,1,s2,s1) // {r0,i0,r3,i2}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
70 vperm \a3,\a0,\a1,v21 // vcprm(2,3,s0,s3) // {r1,i1,r2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
71 vperm \b2,\b0,\b1,v20
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
72 vperm \b3,\b0,\b1,v21
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
73 vaddfp \a0,\a2,\a3 // {t1,t2,t6,t5}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
74 vsubfp \a1,\a2,\a3 // {t3,t4,t8,t7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
75 vaddfp \b0,\b2,\b3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
76 vsubfp \b1,\b2,\b3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
77 vmrghw \a2,\a0,\a1 // vcprm(0,s0,1,s1) // {t1,t3,t2,t4}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
78 vperm \a3,\a0,\a1,v22 // vcprm(2,s3,3,s2) // {t6,t7,t5,t8}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
79 vmrghw \b2,\b0,\b1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
80 vperm \b3,\b0,\b1,v22
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
81 vaddfp \a0,\a2,\a3 // {r0,r1,i0,i1}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
82 vsubfp \a1,\a2,\a3 // {r2,r3,i2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
83 vaddfp \b0,\b2,\b3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
84 vsubfp \b1,\b2,\b3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
85 vperm \a2,\a0,\a1,v23 // vcprm(0,1,s0,s1) // {r0,r1,r2,r3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
86 vperm \a3,\a0,\a1,v24 // vcprm(2,3,s2,s3) // {i0,i1,i2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
87 vperm \b2,\b0,\b1,v23
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
88 vperm \b3,\b0,\b1,v24
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
89 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
90
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
91 .macro FFT8 a0, a1, b0, b1, a2, a3, b2, b3, b4 // in,out:a0-b1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
92 vmrghw \b2,\b0,\b1 // vcprm(0,s0,1,s1) // {r4,r6,i4,i6}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
93 vmrglw \b3,\b0,\b1 // vcprm(2,s2,3,s3) // {r5,r7,i5,i7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
94 vperm \a2,\a0,\a1,v20 // FFT4 ...
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
95 vperm \a3,\a0,\a1,v21
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
96 vaddfp \b0,\b2,\b3 // {t1,t3,t2,t4}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
97 vsubfp \b1,\b2,\b3 // {r5,r7,i5,i7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
98 vperm \b4,\b1,\b1,v25 // vcprm(2,3,0,1) // {i5,i7,r5,r7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
99 vaddfp \a0,\a2,\a3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
100 vsubfp \a1,\a2,\a3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
101 vmaddfp \b1,\b1,v17,v14 // * {-1,1,1,-1}/sqrt(2)
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
102 vmaddfp \b1,\b4,v18,\b1 // * { 1,1,1,1 }/sqrt(2) // {t8,ta,t7,t9}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
103 vmrghw \a2,\a0,\a1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
104 vperm \a3,\a0,\a1,v22
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
105 vperm \b2,\b0,\b1,v26 // vcprm(1,2,s3,s0) // {t3,t2,t9,t8}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
106 vperm \b3,\b0,\b1,v27 // vcprm(0,3,s2,s1) // {t1,t4,t7,ta}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
107 vaddfp \a0,\a2,\a3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
108 vsubfp \a1,\a2,\a3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
109 vaddfp \b0,\b2,\b3 // {t1,t2,t9,ta}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
110 vsubfp \b1,\b2,\b3 // {t6,t5,tc,tb}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
111 vperm \a2,\a0,\a1,v23
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
112 vperm \a3,\a0,\a1,v24
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
113 vperm \b2,\b0,\b1,v28 // vcprm(0,2,s1,s3) // {t1,t9,t5,tb}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
114 vperm \b3,\b0,\b1,v29 // vcprm(1,3,s0,s2) // {t2,ta,t6,tc}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
115 vsubfp \b0,\a2,\b2 // {r4,r5,r6,r7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
116 vsubfp \b1,\a3,\b3 // {i4,i5,i6,i7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
117 vaddfp \a0,\a2,\b2 // {r0,r1,r2,r3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
118 vaddfp \a1,\a3,\b3 // {i0,i1,i2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
119 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
120
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
121 .macro BF d0,d1,s0,s1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
122 vsubfp \d1,\s0,\s1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
123 vaddfp \d0,\s0,\s1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
124 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
125
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
126 .macro zip d0,d1,s0,s1
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
127 vmrghw \d0,\s0,\s1
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
128 vmrglw \d1,\s0,\s1
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
129 .endm
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
130
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
131 .macro def_fft4 interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
132 fft4\interleave\()_altivec:
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
133 lvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
134 lvx v1,r9,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
135 FFT4 v0,v1,v2,v3
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
136 .ifnb \interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
137 zip v0,v1,v2,v3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
138 stvx v0, 0,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
139 stvx v1,r9,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
140 .else
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
141 stvx v2, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
142 stvx v3,r9,r3
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
143 .endif
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
144 blr
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
145 .endm
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
146
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
147 .macro def_fft8 interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
148 fft8\interleave\()_altivec:
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
149 addi r4,r3,32
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
150 lvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
151 lvx v1,r9,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
152 lvx v2, 0,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
153 lvx v3,r9,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
154 FFT8 v0,v1,v2,v3,v4,v5,v6,v7,v8
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
155 .ifnb \interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
156 zip v4,v5,v0,v1
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
157 zip v6,v7,v2,v3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
158 stvx v4, 0,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
159 stvx v5,r9,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
160 stvx v6, 0,r4
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
161 stvx v7,r9,r4
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
162 .else
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
163 stvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
164 stvx v1,r9,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
165 stvx v2, 0,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
166 stvx v3,r9,r4
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
167 .endif
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
168 blr
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
169 .endm
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
170
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
171 .macro def_fft16 interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
172 fft16\interleave\()_altivec:
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
173 addi r5,r3,64
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
174 addi r6,r3,96
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
175 addi r4,r3,32
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
176 lvx v0, 0,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
177 lvx v1,r9,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
178 lvx v2, 0,r6
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
179 lvx v3,r9,r6
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
180 FFT4x2 v0,v1,v2,v3,v4,v5,v6,v7
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
181 lvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
182 lvx v1,r9,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
183 lvx v2, 0,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
184 lvx v3,r9,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
185 FFT8 v0,v1,v2,v3,v8,v9,v10,v11,v12
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
186 vmaddfp v8,v4,v15,v14 // r2*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
187 vmaddfp v9,v5,v15,v14 // i2*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
188 vmaddfp v10,v6,v15,v14 // r3*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
189 vmaddfp v11,v7,v15,v14 // i3*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
190 vmaddfp v8,v5,v16,v8 // i2*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
191 vnmsubfp v9,v4,v16,v9 // r2*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
192 vnmsubfp v10,v7,v16,v10 // i3*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
193 vmaddfp v11,v6,v16,v11 // r3*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
194 BF v10,v12,v10,v8
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
195 BF v11,v13,v9,v11
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
196 BF v0,v4,v0,v10
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
197 BF v3,v7,v3,v12
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
198 BF v1,v5,v1,v11
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
199 BF v2,v6,v2,v13
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
200 .ifnb \interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
201 zip v8, v9,v0,v1
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
202 zip v10,v11,v2,v3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
203 zip v12,v13,v4,v5
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
204 zip v14,v15,v6,v7
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
205 stvx v8, 0,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
206 stvx v9,r9,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
207 stvx v10, 0,r4
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
208 stvx v11,r9,r4
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
209 stvx v12, 0,r5
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
210 stvx v13,r9,r5
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
211 stvx v14, 0,r6
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
212 stvx v15,r9,r6
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
213 .else
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
214 stvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
215 stvx v4, 0,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
216 stvx v3,r9,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
217 stvx v7,r9,r6
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
218 stvx v1,r9,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
219 stvx v5,r9,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
220 stvx v2, 0,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
221 stvx v6, 0,r6
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
222 .endif
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
223 blr
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
224 .endm
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
225
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
226 // void pass(float *z, float *wre, int n)
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
227 .macro PASS interleave, suffix
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
228 fft_pass\suffix\()_altivec:
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
229 mtctr r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
230 slwi r0,r5,4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
231 slwi r7,r5,6 // o2
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
232 slwi r5,r5,5 // o1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
233 add r10,r5,r7 // o3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
234 add r0,r4,r0 // wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
235 addi r6,r5,16 // o1+16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
236 addi r8,r7,16 // o2+16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
237 addi r11,r10,16 // o3+16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
238 1:
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
239 lvx v8, 0,r4 // wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
240 lvx v10, 0,r0 // wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
241 sub r0,r0,r9
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
242 lvx v9, 0,r0
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
243 vperm v9,v9,v10,v19 // vcprm(s0,3,2,1) => wim[0 .. -3]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
244 lvx v4,r3,r7 // r2 = z[o2]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
245 lvx v5,r3,r8 // i2 = z[o2+16]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
246 lvx v6,r3,r10 // r3 = z[o3]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
247 lvx v7,r3,r11 // i3 = z[o3+16]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
248 vmaddfp v10,v4,v8,v14 // r2*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
249 vmaddfp v11,v5,v8,v14 // i2*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
250 vmaddfp v12,v6,v8,v14 // r3*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
251 vmaddfp v13,v7,v8,v14 // i3*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
252 lvx v0, 0,r3 // r0 = z[0]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
253 lvx v3,r3,r6 // i1 = z[o1+16]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
254 vmaddfp v10,v5,v9,v10 // i2*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
255 vnmsubfp v11,v4,v9,v11 // r2*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
256 vnmsubfp v12,v7,v9,v12 // i3*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
257 vmaddfp v13,v6,v9,v13 // r3*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
258 lvx v1,r3,r9 // i0 = z[16]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
259 lvx v2,r3,r5 // r1 = z[o1]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
260 BF v12,v8,v12,v10
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
261 BF v13,v9,v11,v13
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
262 BF v0,v4,v0,v12
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
263 BF v3,v7,v3,v8
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
264 .if !\interleave
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
265 stvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
266 stvx v4,r3,r7
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
267 stvx v3,r3,r6
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
268 stvx v7,r3,r11
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
269 .endif
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
270 BF v1,v5,v1,v13
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
271 BF v2,v6,v2,v9
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
272 .if !\interleave
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
273 stvx v1,r3,r9
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
274 stvx v2,r3,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
275 stvx v5,r3,r8
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
276 stvx v6,r3,r10
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
277 .else
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
278 vmrghw v8,v0,v1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
279 vmrglw v9,v0,v1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
280 stvx v8, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
281 stvx v9,r3,r9
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
282 vmrghw v8,v2,v3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
283 vmrglw v9,v2,v3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
284 stvx v8,r3,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
285 stvx v9,r3,r6
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
286 vmrghw v8,v4,v5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
287 vmrglw v9,v4,v5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
288 stvx v8,r3,r7
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
289 stvx v9,r3,r8
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
290 vmrghw v8,v6,v7
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
291 vmrglw v9,v6,v7
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
292 stvx v8,r3,r10
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
293 stvx v9,r3,r11
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
294 .endif
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
295 addi r3,r3,32
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
296 addi r4,r4,16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
297 bdnz 1b
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
298 sub r3,r3,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
299 blr
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
300 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
301
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
302 #define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
303
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
304 #define WORD_0 0x00,0x01,0x02,0x03
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
305 #define WORD_1 0x04,0x05,0x06,0x07
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
306 #define WORD_2 0x08,0x09,0x0a,0x0b
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
307 #define WORD_3 0x0c,0x0d,0x0e,0x0f
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
308 #define WORD_s0 0x10,0x11,0x12,0x13
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
309 #define WORD_s1 0x14,0x15,0x16,0x17
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
310 #define WORD_s2 0x18,0x19,0x1a,0x1b
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
311 #define WORD_s3 0x1c,0x1d,0x1e,0x1f
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
312
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
313 #define vcprm(a, b, c, d) .byte WORD_##a, WORD_##b, WORD_##c, WORD_##d
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
314
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
315 .rodata
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
316 .align 4
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
317 fft_data:
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
318 .float 0, 0, 0, 0
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
319 .float 1, 0.92387953, M_SQRT1_2, 0.38268343
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
320 .float 0, 0.38268343, M_SQRT1_2, 0.92387953
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
321 .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2,-M_SQRT1_2
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
322 .float M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
323 vcprm(s0,3,2,1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
324 vcprm(0,1,s2,s1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
325 vcprm(2,3,s0,s3)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
326 vcprm(2,s3,3,s2)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
327 vcprm(0,1,s0,s1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
328 vcprm(2,3,s2,s3)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
329 vcprm(2,3,0,1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
330 vcprm(1,2,s3,s0)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
331 vcprm(0,3,s2,s1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
332 vcprm(0,2,s1,s3)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
333 vcprm(1,3,s0,s2)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
334
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
335 .macro lvm b, r, regs:vararg
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
336 lvx \r, 0, \b
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
337 addi \b, \b, 16
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
338 .ifnb \regs
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
339 lvm \b, \regs
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
340 .endif
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
341 .endm
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
342
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
343 .macro stvm b, r, regs:vararg
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
344 stvx \r, 0, \b
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
345 addi \b, \b, 16
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
346 .ifnb \regs
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
347 stvm \b, \regs
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
348 .endif
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
349 .endm
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
350
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
351 .macro fft_calc interleave
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
352 extfunc ff_fft_calc\interleave\()_altivec
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
353 mflr r0
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
354 stp r0, 2*PS(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
355 stpu r1, -(160+16*PS)(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
356 addi r6, r1, 16*PS
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
357 stvm r6, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
358 mfvrsave r0
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
359 stw r0, 15*PS(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
360 li r6, 0xfffffffc
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
361 mtvrsave r6
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
362
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
363 movrel r6, fft_data
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
364 lvm r6, v14, v15, v16, v17, v18, v19, v20, v21
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
365 lvm r6, v22, v23, v24, v25, v26, v27, v28, v29
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
366
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
367 li r9, 16
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
368 movrel r12, X(ff_cos_tabs)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
369
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
370 movrel r6, fft_dispatch_tab\interleave\()_altivec
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
371 lwz r3, 0(r3)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
372 subi r3, r3, 2
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
373 slwi r3, r3, 2+ARCH_PPC64
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
374 lpx r3, r3, r6
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
375 mtctr r3
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
376 mr r3, r4
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
377 bctrl
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
378
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
379 addi r6, r1, 16*PS
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
380 lvm r6, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
381 lwz r6, 15*PS(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
382 mtvrsave r6
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
383 lp r1, 0(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
384 lp r0, 2*PS(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
385 mtlr r0
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
386 blr
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
387 .endm
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
388
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
389 .macro DECL_FFT suffix, bits, n, n2, n4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
390 fft\n\suffix\()_altivec:
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
391 mflr r0
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
392 stp r0,PS*(\bits-3)(r1)
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
393 bl fft\n2\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
394 addi2 r3,\n*4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
395 bl fft\n4\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
396 addi2 r3,\n*2
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
397 bl fft\n4\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
398 addi2 r3,\n*-6
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
399 lp r0,PS*(\bits-3)(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
400 lp r4,\bits*PS(r12)
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
401 mtlr r0
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
402 li r5,\n/16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
403 b fft_pass\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
404 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
405
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
406 .macro DECL_FFTS interleave, suffix
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
407 .text
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
408 def_fft4 \suffix
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
409 def_fft8 \suffix
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
410 def_fft16 \suffix
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
411 PASS \interleave, \suffix
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
412 DECL_FFT \suffix, 5, 32, 16, 8
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
413 DECL_FFT \suffix, 6, 64, 32, 16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
414 DECL_FFT \suffix, 7, 128, 64, 32
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
415 DECL_FFT \suffix, 8, 256, 128, 64
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
416 DECL_FFT \suffix, 9, 512, 256, 128
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
417 DECL_FFT \suffix,10, 1024, 512, 256
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
418 DECL_FFT \suffix,11, 2048, 1024, 512
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
419 DECL_FFT \suffix,12, 4096, 2048, 1024
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
420 DECL_FFT \suffix,13, 8192, 4096, 2048
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
421 DECL_FFT \suffix,14,16384, 8192, 4096
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
422 DECL_FFT \suffix,15,32768,16384, 8192
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
423 DECL_FFT \suffix,16,65536,32768,16384
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
424
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
425 fft_calc \suffix
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
426
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
427 .rodata
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
428 .align 3
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
429 fft_dispatch_tab\suffix\()_altivec:
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
430 PTR fft4\suffix\()_altivec
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
431 PTR fft8\suffix\()_altivec
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
432 PTR fft16\suffix\()_altivec
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
433 PTR fft32\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
434 PTR fft64\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
435 PTR fft128\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
436 PTR fft256\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
437 PTR fft512\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
438 PTR fft1024\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
439 PTR fft2048\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
440 PTR fft4096\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
441 PTR fft8192\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
442 PTR fft16384\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
443 PTR fft32768\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
444 PTR fft65536\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
445 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
446
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
447 DECL_FFTS 0
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
448 DECL_FFTS 1, _interleave