Mercurial > libavcodec.hg
annotate ppc/fft_altivec.c @ 3898:c8b9218c3458 libavcodec
Original Commit: r99 | ods15 | 2006-10-01 11:12:44 +0200 (Sun, 01 Oct 2006) | 2 lines
codebook init data cleanup
author | ods15 |
---|---|
date | Mon, 02 Oct 2006 06:09:14 +0000 |
parents | 0b546eab515d |
children | c8c591fe26f8 |
rev | line source |
---|---|
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
1 /* |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
2 * FFT/IFFT transforms |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
3 * AltiVec-enabled |
981 | 4 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org> |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
5 * Based on code Copyright (c) 2002 Fabrice Bellard. |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
6 * |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
7 * This library is free software; you can redistribute it and/or |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
8 * modify it under the terms of the GNU Lesser General Public |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
9 * License as published by the Free Software Foundation; either |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
10 * version 2 of the License, or (at your option) any later version. |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
11 * |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
12 * This library is distributed in the hope that it will be useful, |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
15 * Lesser General Public License for more details. |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
16 * |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
17 * You should have received a copy of the GNU Lesser General Public |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
18 * License along with this library; if not, write to the Free Software |
3036
0b546eab515d
Update licensing information: The FSF changed postal address.
diego
parents:
2979
diff
changeset
|
19 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
20 */ |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
21 #include "../dsputil.h" |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
22 |
1277
f3152eb76f1a
altivec gcc-3 fixes by (Magnus Damm <damm at opensource dot se>)
michaelni
parents:
1033
diff
changeset
|
23 #include "gcc_fixes.h" |
f3152eb76f1a
altivec gcc-3 fixes by (Magnus Damm <damm at opensource dot se>)
michaelni
parents:
1033
diff
changeset
|
24 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
25 #include "dsputil_altivec.h" |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
26 |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
27 /* |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
28 those three macros are from libavcodec/fft.c |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
29 and are required for the reference C code |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
30 */ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
31 /* butter fly op */ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
32 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
33 {\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
34 FFTSample ax, ay, bx, by;\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
35 bx=pre1;\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
36 by=pim1;\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
37 ax=qre1;\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
38 ay=qim1;\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
39 pre = (bx + ax);\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
40 pim = (by + ay);\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
41 qre = (bx - ax);\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
42 qim = (by - ay);\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
43 } |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
44 #define MUL16(a,b) ((a) * (b)) |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
45 #define CMUL(pre, pim, are, aim, bre, bim) \ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
46 {\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
47 pre = (MUL16(are, bre) - MUL16(aim, bim));\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
48 pim = (MUL16(are, bim) + MUL16(bre, aim));\ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
49 } |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
50 |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
51 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
52 /** |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1352
diff
changeset
|
53 * Do a complex FFT with the parameters defined in ff_fft_init(). The |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
54 * input data must be permuted before with s->revtab table. No |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
55 * 1.0/sqrt(n) normalization is done. |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
56 * AltiVec-enabled |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
57 * This code assumes that the 'z' pointer is 16 bytes-aligned |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
58 * It also assumes all FFTComplex are 8 bytes-aligned pair of float |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
59 * The code is exactly the same as the SSE version, except |
981 | 60 * that successive MUL + ADD/SUB have been merged into |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
61 * fused multiply-add ('vec_madd' in altivec) |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
62 */ |
1879
dd63cb7e5080
fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents:
1352
diff
changeset
|
63 void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z) |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
64 { |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1277
diff
changeset
|
65 POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6); |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
66 #ifdef ALTIVEC_USE_REFERENCE_C_CODE |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
67 int ln = s->nbits; |
2979 | 68 int j, np, np2; |
69 int nblocks, nloops; | |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
70 register FFTComplex *p, *q; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
71 FFTComplex *exptab = s->exptab; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
72 int l; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
73 FFTSample tmp_re, tmp_im; |
2967 | 74 |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1277
diff
changeset
|
75 POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6); |
2967 | 76 |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
77 np = 1 << ln; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
78 |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
79 /* pass 0 */ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
80 |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
81 p=&z[0]; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
82 j=(np >> 1); |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
83 do { |
2967 | 84 BF(p[0].re, p[0].im, p[1].re, p[1].im, |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
85 p[0].re, p[0].im, p[1].re, p[1].im); |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
86 p+=2; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
87 } while (--j != 0); |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
88 |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
89 /* pass 1 */ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
90 |
2967 | 91 |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
92 p=&z[0]; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
93 j=np >> 2; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
94 if (s->inverse) { |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
95 do { |
2967 | 96 BF(p[0].re, p[0].im, p[2].re, p[2].im, |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
97 p[0].re, p[0].im, p[2].re, p[2].im); |
2967 | 98 BF(p[1].re, p[1].im, p[3].re, p[3].im, |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
99 p[1].re, p[1].im, -p[3].im, p[3].re); |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
100 p+=4; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
101 } while (--j != 0); |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
102 } else { |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
103 do { |
2967 | 104 BF(p[0].re, p[0].im, p[2].re, p[2].im, |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
105 p[0].re, p[0].im, p[2].re, p[2].im); |
2967 | 106 BF(p[1].re, p[1].im, p[3].re, p[3].im, |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
107 p[1].re, p[1].im, p[3].im, -p[3].re); |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
108 p+=4; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
109 } while (--j != 0); |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
110 } |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
111 /* pass 2 .. ln-1 */ |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
112 |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
113 nblocks = np >> 3; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
114 nloops = 1 << 2; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
115 np2 = np >> 1; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
116 do { |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
117 p = z; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
118 q = z + nloops; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
119 for (j = 0; j < nblocks; ++j) { |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
120 BF(p->re, p->im, q->re, q->im, |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
121 p->re, p->im, q->re, q->im); |
2967 | 122 |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
123 p++; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
124 q++; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
125 for(l = nblocks; l < np2; l += nblocks) { |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
126 CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im); |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
127 BF(p->re, p->im, q->re, q->im, |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
128 p->re, p->im, tmp_re, tmp_im); |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
129 p++; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
130 q++; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
131 } |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
132 |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
133 p += nloops; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
134 q += nloops; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
135 } |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
136 nblocks = nblocks >> 1; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
137 nloops = nloops << 1; |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
138 } while (nblocks != 0); |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
139 |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1277
diff
changeset
|
140 POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
141 |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
142 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1015
diff
changeset
|
143 #ifdef CONFIG_DARWIN |
995
edc10966b081
altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents:
981
diff
changeset
|
144 register const vector float vczero = (const vector float)(0.); |
1033
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1015
diff
changeset
|
145 #else |
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1015
diff
changeset
|
146 register const vector float vczero = (const vector float){0.,0.,0.,0.}; |
b4172ff70d27
Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents:
1015
diff
changeset
|
147 #endif |
2967 | 148 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
149 int ln = s->nbits; |
2979 | 150 int j, np, np2; |
151 int nblocks, nloops; | |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
152 register FFTComplex *p, *q; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
153 FFTComplex *cptr, *cptr1; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
154 int k; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
155 |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1277
diff
changeset
|
156 POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6); |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
157 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
158 np = 1 << ln; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
159 |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
160 { |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
161 vector float *r, a, b, a1, c1, c2; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
162 |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
163 r = (vector float *)&z[0]; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
164 |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
165 c1 = vcii(p,p,n,n); |
2967 | 166 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
167 if (s->inverse) |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
168 { |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
169 c2 = vcii(p,p,n,p); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
170 } |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
171 else |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
172 { |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
173 c2 = vcii(p,p,p,n); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
174 } |
2967 | 175 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
176 j = (np >> 2); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
177 do { |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
178 a = vec_ld(0, r); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
179 a1 = vec_ld(sizeof(vector float), r); |
2967 | 180 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
181 b = vec_perm(a,a,vcprmle(1,0,3,2)); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
182 a = vec_madd(a,c1,b); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
183 /* do the pass 0 butterfly */ |
2967 | 184 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
185 b = vec_perm(a1,a1,vcprmle(1,0,3,2)); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
186 b = vec_madd(a1,c1,b); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
187 /* do the pass 0 butterfly */ |
2967 | 188 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
189 /* multiply third by -i */ |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
190 b = vec_perm(b,b,vcprmle(2,3,1,0)); |
2967 | 191 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
192 /* do the pass 1 butterfly */ |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
193 vec_st(vec_madd(b,c2,a), 0, r); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
194 vec_st(vec_nmsub(b,c2,a), sizeof(vector float), r); |
2967 | 195 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
196 r += 2; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
197 } while (--j != 0); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
198 } |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
199 /* pass 2 .. ln-1 */ |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
200 |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
201 nblocks = np >> 3; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
202 nloops = 1 << 2; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
203 np2 = np >> 1; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
204 |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
205 cptr1 = s->exptab1; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
206 do { |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
207 p = z; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
208 q = z + nloops; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
209 j = nblocks; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
210 do { |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
211 cptr = cptr1; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
212 k = nloops >> 1; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
213 do { |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
214 vector float a,b,c,t1; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
215 |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
216 a = vec_ld(0, (float*)p); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
217 b = vec_ld(0, (float*)q); |
2967 | 218 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
219 /* complex mul */ |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
220 c = vec_ld(0, (float*)cptr); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
221 /* cre*re cim*re */ |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
222 t1 = vec_madd(c, vec_perm(b,b,vcprmle(2,2,0,0)),vczero); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
223 c = vec_ld(sizeof(vector float), (float*)cptr); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
224 /* -cim*im cre*im */ |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
225 b = vec_madd(c, vec_perm(b,b,vcprmle(3,3,1,1)),t1); |
2967 | 226 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
227 /* butterfly */ |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
228 vec_st(vec_add(a,b), 0, (float*)p); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
229 vec_st(vec_sub(a,b), 0, (float*)q); |
2967 | 230 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
231 p += 2; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
232 q += 2; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
233 cptr += 4; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
234 } while (--k); |
2967 | 235 |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
236 p += nloops; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
237 q += nloops; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
238 } while (--j); |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
239 cptr1 += nloops * 2; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
240 nblocks = nblocks >> 1; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
241 nloops = nloops << 1; |
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
242 } while (nblocks != 0); |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
243 |
1352
e8ff4783f188
1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents:
1277
diff
changeset
|
244 POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6); |
1009
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
245 |
3b7cc8e4b83f
AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents:
995
diff
changeset
|
246 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ |
975
e05d525505c5
fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff
changeset
|
247 } |