annotate ppc/fft_altivec.c @ 2463:9baa47d8297b libavcodec

check norm6 vlc validity as there are some bit sequences which dont corespond to any codeword, the other vlc tables all seem to be huffman tables though
author michael
date Tue, 25 Jan 2005 01:29:10 +0000
parents dd63cb7e5080
children ef2149182f1c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
1 /*
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
2 * FFT/IFFT transforms
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
3 * AltiVec-enabled
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 975
diff changeset
4 * Copyright (c) 2003 Romain Dolbeau <romain@dolbeau.org>
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
5 * Based on code Copyright (c) 2002 Fabrice Bellard.
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
6 *
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
7 * This library is free software; you can redistribute it and/or
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
8 * modify it under the terms of the GNU Lesser General Public
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
9 * License as published by the Free Software Foundation; either
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
10 * version 2 of the License, or (at your option) any later version.
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
11 *
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
12 * This library is distributed in the hope that it will be useful,
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
15 * Lesser General Public License for more details.
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
16 *
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
17 * You should have received a copy of the GNU Lesser General Public
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
18 * License along with this library; if not, write to the Free Software
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
20 */
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
21 #include "../dsputil.h"
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
22
1277
f3152eb76f1a altivec gcc-3 fixes by (Magnus Damm <damm at opensource dot se>)
michaelni
parents: 1033
diff changeset
23 #include "gcc_fixes.h"
f3152eb76f1a altivec gcc-3 fixes by (Magnus Damm <damm at opensource dot se>)
michaelni
parents: 1033
diff changeset
24
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
25 #include "dsputil_altivec.h"
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
26
1009
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
27 /*
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
28 those three macros are from libavcodec/fft.c
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
29 and are required for the reference C code
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
30 */
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
31 /* butter fly op */
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
32 #define BF(pre, pim, qre, qim, pre1, pim1, qre1, qim1) \
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
33 {\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
34 FFTSample ax, ay, bx, by;\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
35 bx=pre1;\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
36 by=pim1;\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
37 ax=qre1;\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
38 ay=qim1;\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
39 pre = (bx + ax);\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
40 pim = (by + ay);\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
41 qre = (bx - ax);\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
42 qim = (by - ay);\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
43 }
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
44 #define MUL16(a,b) ((a) * (b))
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
45 #define CMUL(pre, pim, are, aim, bre, bim) \
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
46 {\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
47 pre = (MUL16(are, bre) - MUL16(aim, bim));\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
48 pim = (MUL16(are, bim) + MUL16(bre, aim));\
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
49 }
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
50
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
51
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
52 /**
1879
dd63cb7e5080 fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents: 1352
diff changeset
53 * Do a complex FFT with the parameters defined in ff_fft_init(). The
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
54 * input data must be permuted before with s->revtab table. No
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
55 * 1.0/sqrt(n) normalization is done.
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
56 * AltiVec-enabled
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
57 * This code assumes that the 'z' pointer is 16 bytes-aligned
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
58 * It also assumes all FFTComplex are 8 bytes-aligned pair of float
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
59 * The code is exactly the same as the SSE version, except
981
8bec850dc9c7 altivec patches by Romain Dolbeau
bellard
parents: 975
diff changeset
60 * that successive MUL + ADD/SUB have been merged into
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
61 * fused multiply-add ('vec_madd' in altivec)
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
62 */
1879
dd63cb7e5080 fft_*() renamed into ff_fft_*() patch by (Gildas Bazin <gbazin at altern dot org>)
michael
parents: 1352
diff changeset
63 void ff_fft_calc_altivec(FFTContext *s, FFTComplex *z)
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
64 {
1352
e8ff4783f188 1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents: 1277
diff changeset
65 POWERPC_PERF_DECLARE(altivec_fft_num, s->nbits >= 6);
1009
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
66 #ifdef ALTIVEC_USE_REFERENCE_C_CODE
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
67 int ln = s->nbits;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
68 int j, np, np2;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
69 int nblocks, nloops;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
70 register FFTComplex *p, *q;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
71 FFTComplex *exptab = s->exptab;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
72 int l;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
73 FFTSample tmp_re, tmp_im;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
74
1352
e8ff4783f188 1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents: 1277
diff changeset
75 POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
1009
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
76
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
77 np = 1 << ln;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
78
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
79 /* pass 0 */
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
80
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
81 p=&z[0];
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
82 j=(np >> 1);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
83 do {
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
84 BF(p[0].re, p[0].im, p[1].re, p[1].im,
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
85 p[0].re, p[0].im, p[1].re, p[1].im);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
86 p+=2;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
87 } while (--j != 0);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
88
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
89 /* pass 1 */
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
90
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
91
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
92 p=&z[0];
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
93 j=np >> 2;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
94 if (s->inverse) {
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
95 do {
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
96 BF(p[0].re, p[0].im, p[2].re, p[2].im,
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
97 p[0].re, p[0].im, p[2].re, p[2].im);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
98 BF(p[1].re, p[1].im, p[3].re, p[3].im,
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
99 p[1].re, p[1].im, -p[3].im, p[3].re);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
100 p+=4;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
101 } while (--j != 0);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
102 } else {
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
103 do {
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
104 BF(p[0].re, p[0].im, p[2].re, p[2].im,
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
105 p[0].re, p[0].im, p[2].re, p[2].im);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
106 BF(p[1].re, p[1].im, p[3].re, p[3].im,
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
107 p[1].re, p[1].im, p[3].im, -p[3].re);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
108 p+=4;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
109 } while (--j != 0);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
110 }
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
111 /* pass 2 .. ln-1 */
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
112
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
113 nblocks = np >> 3;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
114 nloops = 1 << 2;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
115 np2 = np >> 1;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
116 do {
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
117 p = z;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
118 q = z + nloops;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
119 for (j = 0; j < nblocks; ++j) {
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
120 BF(p->re, p->im, q->re, q->im,
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
121 p->re, p->im, q->re, q->im);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
122
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
123 p++;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
124 q++;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
125 for(l = nblocks; l < np2; l += nblocks) {
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
126 CMUL(tmp_re, tmp_im, exptab[l].re, exptab[l].im, q->re, q->im);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
127 BF(p->re, p->im, q->re, q->im,
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
128 p->re, p->im, tmp_re, tmp_im);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
129 p++;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
130 q++;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
131 }
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
132
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
133 p += nloops;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
134 q += nloops;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
135 }
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
136 nblocks = nblocks >> 1;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
137 nloops = nloops << 1;
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
138 } while (nblocks != 0);
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
139
1352
e8ff4783f188 1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents: 1277
diff changeset
140 POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
1009
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
141
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
142 #else /* ALTIVEC_USE_REFERENCE_C_CODE */
1033
b4172ff70d27 Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents: 1015
diff changeset
143 #ifdef CONFIG_DARWIN
995
edc10966b081 altivec jumbo patch by (Romain Dolbeau <dolbeaur at club-internet dot fr>)
michaelni
parents: 981
diff changeset
144 register const vector float vczero = (const vector float)(0.);
1033
b4172ff70d27 Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents: 1015
diff changeset
145 #else
b4172ff70d27 Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents: 1015
diff changeset
146 register const vector float vczero = (const vector float){0.,0.,0.,0.};
b4172ff70d27 Altivec on non darwin systems patch by Romain Dolbeau
bellard
parents: 1015
diff changeset
147 #endif
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
148
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
149 int ln = s->nbits;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
150 int j, np, np2;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
151 int nblocks, nloops;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
152 register FFTComplex *p, *q;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
153 FFTComplex *cptr, *cptr1;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
154 int k;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
155
1352
e8ff4783f188 1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents: 1277
diff changeset
156 POWERPC_PERF_START_COUNT(altivec_fft_num, s->nbits >= 6);
1009
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
157
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
158 np = 1 << ln;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
159
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
160 {
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
161 vector float *r, a, b, a1, c1, c2;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
162
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
163 r = (vector float *)&z[0];
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
164
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
165 c1 = vcii(p,p,n,n);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
166
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
167 if (s->inverse)
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
168 {
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
169 c2 = vcii(p,p,n,p);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
170 }
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
171 else
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
172 {
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
173 c2 = vcii(p,p,p,n);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
174 }
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
175
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
176 j = (np >> 2);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
177 do {
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
178 a = vec_ld(0, r);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
179 a1 = vec_ld(sizeof(vector float), r);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
180
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
181 b = vec_perm(a,a,vcprmle(1,0,3,2));
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
182 a = vec_madd(a,c1,b);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
183 /* do the pass 0 butterfly */
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
184
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
185 b = vec_perm(a1,a1,vcprmle(1,0,3,2));
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
186 b = vec_madd(a1,c1,b);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
187 /* do the pass 0 butterfly */
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
188
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
189 /* multiply third by -i */
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
190 b = vec_perm(b,b,vcprmle(2,3,1,0));
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
191
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
192 /* do the pass 1 butterfly */
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
193 vec_st(vec_madd(b,c2,a), 0, r);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
194 vec_st(vec_nmsub(b,c2,a), sizeof(vector float), r);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
195
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
196 r += 2;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
197 } while (--j != 0);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
198 }
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
199 /* pass 2 .. ln-1 */
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
200
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
201 nblocks = np >> 3;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
202 nloops = 1 << 2;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
203 np2 = np >> 1;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
204
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
205 cptr1 = s->exptab1;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
206 do {
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
207 p = z;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
208 q = z + nloops;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
209 j = nblocks;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
210 do {
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
211 cptr = cptr1;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
212 k = nloops >> 1;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
213 do {
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
214 vector float a,b,c,t1;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
215
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
216 a = vec_ld(0, (float*)p);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
217 b = vec_ld(0, (float*)q);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
218
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
219 /* complex mul */
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
220 c = vec_ld(0, (float*)cptr);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
221 /* cre*re cim*re */
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
222 t1 = vec_madd(c, vec_perm(b,b,vcprmle(2,2,0,0)),vczero);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
223 c = vec_ld(sizeof(vector float), (float*)cptr);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
224 /* -cim*im cre*im */
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
225 b = vec_madd(c, vec_perm(b,b,vcprmle(3,3,1,1)),t1);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
226
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
227 /* butterfly */
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
228 vec_st(vec_add(a,b), 0, (float*)p);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
229 vec_st(vec_sub(a,b), 0, (float*)q);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
230
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
231 p += 2;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
232 q += 2;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
233 cptr += 4;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
234 } while (--k);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
235
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
236 p += nloops;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
237 q += nloops;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
238 } while (--j);
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
239 cptr1 += nloops * 2;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
240 nblocks = nblocks >> 1;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
241 nloops = nloops << 1;
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
242 } while (nblocks != 0);
1009
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
243
1352
e8ff4783f188 1) remove TBL support in PPC performance. It's much more useful to use the
michaelni
parents: 1277
diff changeset
244 POWERPC_PERF_STOP_COUNT(altivec_fft_num, s->nbits >= 6);
1009
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
245
3b7cc8e4b83f AltiVec perf (take 2), plus a couple AltiVec functions by (Romain Dolbeau <dolbeau at irisa dot fr>)
michaelni
parents: 995
diff changeset
246 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */
975
e05d525505c5 fft altivec by Romain Dolbeau - simplified patch, test it on PPC with fft-test and wma decoding
bellard
parents:
diff changeset
247 }