annotate ppc/fft_altivec_s.S @ 12105:d6e87496883b libavcodec

ARM: set section to .text in 'function' macro This ensures code always goes into the .text section and avoids the need to specify it explicitly after changing sections.
author mru
date Wed, 07 Jul 2010 20:09:41 +0000
parents 5638941ec8ef
children 6f064ab48463
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
1 /*
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
2 * FFT transform with Altivec optimizations
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
3 * Copyright (c) 2009 Loren Merritt
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
4 *
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
5 * This file is part of FFmpeg.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
6 *
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
7 * FFmpeg is free software; you can redistribute it and/or
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
10 * version 2.1 of the License, or (at your option) any later version.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
11 *
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
12 * FFmpeg is distributed in the hope that it will be useful,
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
15 * Lesser General Public License for more details.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
16 *
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
18 * License along with FFmpeg; if not, write to the Free Software
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
20 */
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
21
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
22 /*
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
23 * These functions are not individually interchangeable with the C versions.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
24 * While C takes arrays of FFTComplex, Altivec leaves intermediate results
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
25 * in blocks as convenient to the vector size.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
26 * i.e. {4x real, 4x imaginary, 4x real, ...}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
27 *
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
28 * I ignore standard calling convention.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
29 * Instead, the following registers are treated as global constants:
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
30 * v14: zero
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
31 * v15..v18: cosines
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
32 * v19..v29: permutations
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
33 * r9: 16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
34 * r12: ff_cos_tabs
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
35 * and the rest are free for local use.
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
36 */
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
37
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
38 #include "config.h"
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
39 #include "asm.S"
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
40
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
41 .text
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
42
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
43 .macro addi2 ra, imm // add 32-bit immediate
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
44 .if \imm & 0xffff
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
45 addi \ra, \ra, \imm@l
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
46 .endif
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
47 .if (\imm+0x8000)>>16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
48 addis \ra, \ra, \imm@ha
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
49 .endif
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
50 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
51
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
52 .macro FFT4 a0, a1, a2, a3 // in:0-1 out:2-3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
53 vperm \a2,\a0,\a1,v20 // vcprm(0,1,s2,s1) // {r0,i0,r3,i2}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
54 vperm \a3,\a0,\a1,v21 // vcprm(2,3,s0,s3) // {r1,i1,r2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
55 vaddfp \a0,\a2,\a3 // {t1,t2,t6,t5}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
56 vsubfp \a1,\a2,\a3 // {t3,t4,t8,t7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
57 vmrghw \a2,\a0,\a1 // vcprm(0,s0,1,s1) // {t1,t3,t2,t4}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
58 vperm \a3,\a0,\a1,v22 // vcprm(2,s3,3,s2) // {t6,t7,t5,t8}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
59 vaddfp \a0,\a2,\a3 // {r0,r1,i0,i1}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
60 vsubfp \a1,\a2,\a3 // {r2,r3,i2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
61 vperm \a2,\a0,\a1,v23 // vcprm(0,1,s0,s1) // {r0,r1,r2,r3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
62 vperm \a3,\a0,\a1,v24 // vcprm(2,3,s2,s3) // {i0,i1,i2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
63 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
64
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
65 .macro FFT4x2 a0, a1, b0, b1, a2, a3, b2, b3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
66 vperm \a2,\a0,\a1,v20 // vcprm(0,1,s2,s1) // {r0,i0,r3,i2}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
67 vperm \a3,\a0,\a1,v21 // vcprm(2,3,s0,s3) // {r1,i1,r2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
68 vperm \b2,\b0,\b1,v20
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
69 vperm \b3,\b0,\b1,v21
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
70 vaddfp \a0,\a2,\a3 // {t1,t2,t6,t5}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
71 vsubfp \a1,\a2,\a3 // {t3,t4,t8,t7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
72 vaddfp \b0,\b2,\b3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
73 vsubfp \b1,\b2,\b3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
74 vmrghw \a2,\a0,\a1 // vcprm(0,s0,1,s1) // {t1,t3,t2,t4}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
75 vperm \a3,\a0,\a1,v22 // vcprm(2,s3,3,s2) // {t6,t7,t5,t8}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
76 vmrghw \b2,\b0,\b1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
77 vperm \b3,\b0,\b1,v22
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
78 vaddfp \a0,\a2,\a3 // {r0,r1,i0,i1}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
79 vsubfp \a1,\a2,\a3 // {r2,r3,i2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
80 vaddfp \b0,\b2,\b3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
81 vsubfp \b1,\b2,\b3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
82 vperm \a2,\a0,\a1,v23 // vcprm(0,1,s0,s1) // {r0,r1,r2,r3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
83 vperm \a3,\a0,\a1,v24 // vcprm(2,3,s2,s3) // {i0,i1,i2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
84 vperm \b2,\b0,\b1,v23
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
85 vperm \b3,\b0,\b1,v24
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
86 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
87
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
88 .macro FFT8 a0, a1, b0, b1, a2, a3, b2, b3, b4 // in,out:a0-b1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
89 vmrghw \b2,\b0,\b1 // vcprm(0,s0,1,s1) // {r4,r6,i4,i6}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
90 vmrglw \b3,\b0,\b1 // vcprm(2,s2,3,s3) // {r5,r7,i5,i7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
91 vperm \a2,\a0,\a1,v20 // FFT4 ...
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
92 vperm \a3,\a0,\a1,v21
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
93 vaddfp \b0,\b2,\b3 // {t1,t3,t2,t4}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
94 vsubfp \b1,\b2,\b3 // {r5,r7,i5,i7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
95 vperm \b4,\b1,\b1,v25 // vcprm(2,3,0,1) // {i5,i7,r5,r7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
96 vaddfp \a0,\a2,\a3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
97 vsubfp \a1,\a2,\a3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
98 vmaddfp \b1,\b1,v17,v14 // * {-1,1,1,-1}/sqrt(2)
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
99 vmaddfp \b1,\b4,v18,\b1 // * { 1,1,1,1 }/sqrt(2) // {t8,ta,t7,t9}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
100 vmrghw \a2,\a0,\a1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
101 vperm \a3,\a0,\a1,v22
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
102 vperm \b2,\b0,\b1,v26 // vcprm(1,2,s3,s0) // {t3,t2,t9,t8}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
103 vperm \b3,\b0,\b1,v27 // vcprm(0,3,s2,s1) // {t1,t4,t7,ta}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
104 vaddfp \a0,\a2,\a3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
105 vsubfp \a1,\a2,\a3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
106 vaddfp \b0,\b2,\b3 // {t1,t2,t9,ta}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
107 vsubfp \b1,\b2,\b3 // {t6,t5,tc,tb}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
108 vperm \a2,\a0,\a1,v23
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
109 vperm \a3,\a0,\a1,v24
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
110 vperm \b2,\b0,\b1,v28 // vcprm(0,2,s1,s3) // {t1,t9,t5,tb}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
111 vperm \b3,\b0,\b1,v29 // vcprm(1,3,s0,s2) // {t2,ta,t6,tc}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
112 vsubfp \b0,\a2,\b2 // {r4,r5,r6,r7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
113 vsubfp \b1,\a3,\b3 // {i4,i5,i6,i7}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
114 vaddfp \a0,\a2,\b2 // {r0,r1,r2,r3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
115 vaddfp \a1,\a3,\b3 // {i0,i1,i2,i3}
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
116 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
117
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
118 .macro BF d0,d1,s0,s1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
119 vsubfp \d1,\s0,\s1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
120 vaddfp \d0,\s0,\s1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
121 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
122
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
123 .macro zip d0,d1,s0,s1
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
124 vmrghw \d0,\s0,\s1
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
125 vmrglw \d1,\s0,\s1
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
126 .endm
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
127
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
128 .macro def_fft4 interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
129 fft4\interleave\()_altivec:
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
130 lvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
131 lvx v1,r9,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
132 FFT4 v0,v1,v2,v3
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
133 .ifnb \interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
134 zip v0,v1,v2,v3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
135 stvx v0, 0,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
136 stvx v1,r9,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
137 .else
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
138 stvx v2, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
139 stvx v3,r9,r3
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
140 .endif
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
141 blr
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
142 .endm
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
143
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
144 .macro def_fft8 interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
145 fft8\interleave\()_altivec:
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
146 addi r4,r3,32
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
147 lvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
148 lvx v1,r9,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
149 lvx v2, 0,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
150 lvx v3,r9,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
151 FFT8 v0,v1,v2,v3,v4,v5,v6,v7,v8
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
152 .ifnb \interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
153 zip v4,v5,v0,v1
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
154 zip v6,v7,v2,v3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
155 stvx v4, 0,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
156 stvx v5,r9,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
157 stvx v6, 0,r4
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
158 stvx v7,r9,r4
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
159 .else
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
160 stvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
161 stvx v1,r9,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
162 stvx v2, 0,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
163 stvx v3,r9,r4
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
164 .endif
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
165 blr
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
166 .endm
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
167
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
168 .macro def_fft16 interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
169 fft16\interleave\()_altivec:
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
170 addi r5,r3,64
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
171 addi r6,r3,96
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
172 addi r4,r3,32
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
173 lvx v0, 0,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
174 lvx v1,r9,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
175 lvx v2, 0,r6
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
176 lvx v3,r9,r6
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
177 FFT4x2 v0,v1,v2,v3,v4,v5,v6,v7
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
178 lvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
179 lvx v1,r9,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
180 lvx v2, 0,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
181 lvx v3,r9,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
182 FFT8 v0,v1,v2,v3,v8,v9,v10,v11,v12
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
183 vmaddfp v8,v4,v15,v14 // r2*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
184 vmaddfp v9,v5,v15,v14 // i2*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
185 vmaddfp v10,v6,v15,v14 // r3*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
186 vmaddfp v11,v7,v15,v14 // i3*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
187 vmaddfp v8,v5,v16,v8 // i2*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
188 vnmsubfp v9,v4,v16,v9 // r2*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
189 vnmsubfp v10,v7,v16,v10 // i3*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
190 vmaddfp v11,v6,v16,v11 // r3*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
191 BF v10,v12,v10,v8
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
192 BF v11,v13,v9,v11
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
193 BF v0,v4,v0,v10
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
194 BF v3,v7,v3,v12
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
195 BF v1,v5,v1,v11
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
196 BF v2,v6,v2,v13
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
197 .ifnb \interleave
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
198 zip v8, v9,v0,v1
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
199 zip v10,v11,v2,v3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
200 zip v12,v13,v4,v5
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
201 zip v14,v15,v6,v7
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
202 stvx v8, 0,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
203 stvx v9,r9,r3
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
204 stvx v10, 0,r4
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
205 stvx v11,r9,r4
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
206 stvx v12, 0,r5
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
207 stvx v13,r9,r5
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
208 stvx v14, 0,r6
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
209 stvx v15,r9,r6
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
210 .else
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
211 stvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
212 stvx v4, 0,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
213 stvx v3,r9,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
214 stvx v7,r9,r6
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
215 stvx v1,r9,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
216 stvx v5,r9,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
217 stvx v2, 0,r4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
218 stvx v6, 0,r6
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
219 .endif
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
220 blr
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
221 .endm
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
222
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
223 // void pass(float *z, float *wre, int n)
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
224 .macro PASS interleave, suffix
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
225 fft_pass\suffix\()_altivec:
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
226 mtctr r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
227 slwi r0,r5,4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
228 slwi r7,r5,6 // o2
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
229 slwi r5,r5,5 // o1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
230 add r10,r5,r7 // o3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
231 add r0,r4,r0 // wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
232 addi r6,r5,16 // o1+16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
233 addi r8,r7,16 // o2+16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
234 addi r11,r10,16 // o3+16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
235 1:
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
236 lvx v8, 0,r4 // wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
237 lvx v10, 0,r0 // wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
238 sub r0,r0,r9
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
239 lvx v9, 0,r0
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
240 vperm v9,v9,v10,v19 // vcprm(s0,3,2,1) => wim[0 .. -3]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
241 lvx v4,r3,r7 // r2 = z[o2]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
242 lvx v5,r3,r8 // i2 = z[o2+16]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
243 lvx v6,r3,r10 // r3 = z[o3]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
244 lvx v7,r3,r11 // i3 = z[o3+16]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
245 vmaddfp v10,v4,v8,v14 // r2*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
246 vmaddfp v11,v5,v8,v14 // i2*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
247 vmaddfp v12,v6,v8,v14 // r3*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
248 vmaddfp v13,v7,v8,v14 // i3*wre
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
249 lvx v0, 0,r3 // r0 = z[0]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
250 lvx v3,r3,r6 // i1 = z[o1+16]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
251 vmaddfp v10,v5,v9,v10 // i2*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
252 vnmsubfp v11,v4,v9,v11 // r2*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
253 vnmsubfp v12,v7,v9,v12 // i3*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
254 vmaddfp v13,v6,v9,v13 // r3*wim
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
255 lvx v1,r3,r9 // i0 = z[16]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
256 lvx v2,r3,r5 // r1 = z[o1]
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
257 BF v12,v8,v12,v10
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
258 BF v13,v9,v11,v13
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
259 BF v0,v4,v0,v12
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
260 BF v3,v7,v3,v8
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
261 .if !\interleave
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
262 stvx v0, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
263 stvx v4,r3,r7
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
264 stvx v3,r3,r6
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
265 stvx v7,r3,r11
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
266 .endif
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
267 BF v1,v5,v1,v13
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
268 BF v2,v6,v2,v9
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
269 .if !\interleave
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
270 stvx v1,r3,r9
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
271 stvx v2,r3,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
272 stvx v5,r3,r8
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
273 stvx v6,r3,r10
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
274 .else
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
275 vmrghw v8,v0,v1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
276 vmrglw v9,v0,v1
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
277 stvx v8, 0,r3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
278 stvx v9,r3,r9
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
279 vmrghw v8,v2,v3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
280 vmrglw v9,v2,v3
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
281 stvx v8,r3,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
282 stvx v9,r3,r6
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
283 vmrghw v8,v4,v5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
284 vmrglw v9,v4,v5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
285 stvx v8,r3,r7
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
286 stvx v9,r3,r8
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
287 vmrghw v8,v6,v7
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
288 vmrglw v9,v6,v7
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
289 stvx v8,r3,r10
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
290 stvx v9,r3,r11
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
291 .endif
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
292 addi r3,r3,32
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
293 addi r4,r4,16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
294 bdnz 1b
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
295 sub r3,r3,r5
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
296 blr
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
297 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
298
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
299 #define M_SQRT1_2 0.70710678118654752440 /* 1/sqrt(2) */
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
300
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
301 #define WORD_0 0x00,0x01,0x02,0x03
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
302 #define WORD_1 0x04,0x05,0x06,0x07
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
303 #define WORD_2 0x08,0x09,0x0a,0x0b
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
304 #define WORD_3 0x0c,0x0d,0x0e,0x0f
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
305 #define WORD_s0 0x10,0x11,0x12,0x13
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
306 #define WORD_s1 0x14,0x15,0x16,0x17
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
307 #define WORD_s2 0x18,0x19,0x1a,0x1b
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
308 #define WORD_s3 0x1c,0x1d,0x1e,0x1f
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
309
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
310 #define vcprm(a, b, c, d) .byte WORD_##a, WORD_##b, WORD_##c, WORD_##d
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
311
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
312 .rodata
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
313 .align 4
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
314 fft_data:
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
315 .float 0, 0, 0, 0
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
316 .float 1, 0.92387953, M_SQRT1_2, 0.38268343
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
317 .float 0, 0.38268343, M_SQRT1_2, 0.92387953
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
318 .float -M_SQRT1_2, M_SQRT1_2, M_SQRT1_2,-M_SQRT1_2
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
319 .float M_SQRT1_2, M_SQRT1_2, M_SQRT1_2, M_SQRT1_2
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
320 vcprm(s0,3,2,1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
321 vcprm(0,1,s2,s1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
322 vcprm(2,3,s0,s3)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
323 vcprm(2,s3,3,s2)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
324 vcprm(0,1,s0,s1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
325 vcprm(2,3,s2,s3)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
326 vcprm(2,3,0,1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
327 vcprm(1,2,s3,s0)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
328 vcprm(0,3,s2,s1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
329 vcprm(0,2,s1,s3)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
330 vcprm(1,3,s0,s2)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
331
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
332 .macro lvm b, r, regs:vararg
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
333 lvx \r, 0, \b
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
334 addi \b, \b, 16
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
335 .ifnb \regs
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
336 lvm \b, \regs
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
337 .endif
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
338 .endm
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
339
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
340 .macro stvm b, r, regs:vararg
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
341 stvx \r, 0, \b
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
342 addi \b, \b, 16
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
343 .ifnb \regs
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
344 stvm \b, \regs
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
345 .endif
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
346 .endm
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
347
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
348 .macro fft_calc interleave
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
349 extfunc ff_fft_calc\interleave\()_altivec
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
350 mflr r0
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
351 stp r0, 2*PS(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
352 stpu r1, -(160+16*PS)(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
353 addi r6, r1, 16*PS
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
354 stvm r6, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
355 mfvrsave r0
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
356 stw r0, 15*PS(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
357 li r6, 0xfffffffc
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
358 mtvrsave r6
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
359
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
360 movrel r6, fft_data
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
361 lvm r6, v14, v15, v16, v17, v18, v19, v20, v21
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
362 lvm r6, v22, v23, v24, v25, v26, v27, v28, v29
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
363
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
364 li r9, 16
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
365 movrel r12, X(ff_cos_tabs)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
366
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
367 movrel r6, fft_dispatch_tab\interleave\()_altivec
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
368 lwz r3, 0(r3)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
369 subi r3, r3, 2
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
370 slwi r3, r3, 2+ARCH_PPC64
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
371 lpx r3, r3, r6
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
372 mtctr r3
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
373 mr r3, r4
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
374 bctrl
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
375
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
376 addi r6, r1, 16*PS
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
377 lvm r6, v20, v21, v22, v23, v24, v25, v26, v27, v28, v29
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
378 lwz r6, 15*PS(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
379 mtvrsave r6
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
380 lp r1, 0(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
381 lp r0, 2*PS(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
382 mtlr r0
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
383 blr
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
384 .endm
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
385
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
386 .macro DECL_FFT suffix, bits, n, n2, n4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
387 fft\n\suffix\()_altivec:
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
388 mflr r0
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
389 stp r0,PS*(\bits-3)(r1)
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
390 bl fft\n2\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
391 addi2 r3,\n*4
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
392 bl fft\n4\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
393 addi2 r3,\n*2
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
394 bl fft\n4\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
395 addi2 r3,\n*-6
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
396 lp r0,PS*(\bits-3)(r1)
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
397 lp r4,\bits*PS(r12)
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
398 mtlr r0
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
399 li r5,\n/16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
400 b fft_pass\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
401 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
402
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
403 .macro DECL_FFTS interleave, suffix
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
404 .text
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
405 def_fft4 \suffix
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
406 def_fft8 \suffix
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
407 def_fft16 \suffix
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
408 PASS \interleave, \suffix
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
409 DECL_FFT \suffix, 5, 32, 16, 8
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
410 DECL_FFT \suffix, 6, 64, 32, 16
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
411 DECL_FFT \suffix, 7, 128, 64, 32
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
412 DECL_FFT \suffix, 8, 256, 128, 64
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
413 DECL_FFT \suffix, 9, 512, 256, 128
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
414 DECL_FFT \suffix,10, 1024, 512, 256
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
415 DECL_FFT \suffix,11, 2048, 1024, 512
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
416 DECL_FFT \suffix,12, 4096, 2048, 1024
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
417 DECL_FFT \suffix,13, 8192, 4096, 2048
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
418 DECL_FFT \suffix,14,16384, 8192, 4096
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
419 DECL_FFT \suffix,15,32768,16384, 8192
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
420 DECL_FFT \suffix,16,65536,32768,16384
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
421
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
422 fft_calc \suffix
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
423
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
424 .rodata
12089
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
425 .align 3
5638941ec8ef PPC: convert Altivec FFT to pure assembler
mru
parents: 12088
diff changeset
426 fft_dispatch_tab\suffix\()_altivec:
12085
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
427 PTR fft4\suffix\()_altivec
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
428 PTR fft8\suffix\()_altivec
8454bb880008 PPC: add _interleave versions of fft{4,6,16}_altivec
mru
parents: 12046
diff changeset
429 PTR fft16\suffix\()_altivec
12046
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
430 PTR fft32\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
431 PTR fft64\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
432 PTR fft128\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
433 PTR fft256\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
434 PTR fft512\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
435 PTR fft1024\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
436 PTR fft2048\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
437 PTR fft4096\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
438 PTR fft8192\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
439 PTR fft16384\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
440 PTR fft32768\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
441 PTR fft65536\suffix\()_altivec
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
442 .endm
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
443
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
444 DECL_FFTS 0
ae57be2ef58c PPC: Altivec split-radix FFT
mru
parents:
diff changeset
445 DECL_FFTS 1, _interleave