annotate liba52/imdct_3dnow.h @ 16307:03cd6abe5664

avoid reading more than maxlen bytes. Has the sideeffect that the amount read will be close to maxlen instead of minlen as before.
author reimar
date Thu, 25 Aug 2005 19:46:20 +0000
parents 772d6d27fd66
children 4bad7f00556e
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
1 /*
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
2 3DNOW and 3DNOWEX optimized IMDCT
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
3 Licence: GPL v2
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
4 Copyrights: Nick Kurshev
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
5 */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
6
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
7 #undef FFT_4_3DNOW
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
8 #undef FFT_8_3DNOW
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
9 #undef FFT_ASMB_3DNOW
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
10 #undef FFT_ASMB16_3DNOW
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
11 #undef FFT_128P_3DNOW
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
12
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
13 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
14 #define FFT_4_3DNOW fft_4_3dnowex
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
15 #define FFT_8_3DNOW fft_8_3dnowex
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
16 #define FFT_ASMB_3DNOW fft_asmb_3dnowex
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
17 #define FFT_ASMB16_3DNOW fft_asmb16_3dnowex
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
18 #define FFT_128P_3DNOW fft_128p_3dnowex
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
19 #else
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
20 #define FFT_4_3DNOW fft_4_3dnow
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
21 #define FFT_8_3DNOW fft_8_3dnow
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
22 #define FFT_ASMB_3DNOW fft_asmb_3dnow
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
23 #define FFT_ASMB16_3DNOW fft_asmb16_3dnow
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
24 #define FFT_128P_3DNOW fft_128p_3dnow
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
25 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
26
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
27 static void FFT_4_3DNOW(complex_t *x)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
28 {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
29 /* delta_p = 1 here */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
30 /* x[k] = sum_{i=0..3} x[i] * w^{i*k}, w=e^{-2*pi/4}
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
31 */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
32 __asm__ __volatile__(
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
33 "movq 24(%1), %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
34 "movq 8(%1), %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
35 "pxor %2, %%mm3\n\t" /* mm3.re | -mm3.im */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
36 "pxor %3, %%mm1\n\t" /* -mm1.re | mm1.im */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
37 "pfadd %%mm1, %%mm3\n\t" /* vi.im = x[3].re - x[1].re; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
38 "movq %%mm3, %%mm4\n\t" /* vi.re =-x[3].im + x[1].im; mm4 = vi */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
39 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
40 "pswapd %%mm4, %%mm4\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
41 #else
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
42 "punpckldq %%mm4, %%mm5\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
43 "punpckhdq %%mm5, %%mm4\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
44 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
45 "movq (%1), %%mm5\n\t" /* yb.re = x[0].re - x[2].re; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
46 "movq (%1), %%mm6\n\t" /* yt.re = x[0].re + x[2].re; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
47 "movq 24(%1), %%mm7\n\t" /* u.re = x[3].re + x[1].re; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
48 "pfsub 16(%1), %%mm5\n\t" /* yb.im = x[0].im - x[2].im; mm5 = yb */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
49 "pfadd 16(%1), %%mm6\n\t" /* yt.im = x[0].im + x[2].im; mm6 = yt */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
50 "pfadd 8(%1), %%mm7\n\t" /* u.im = x[3].im + x[1].im; mm7 = u */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
51
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
52 "movq %%mm6, %%mm0\n\t" /* x[0].re = yt.re + u.re; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
53 "movq %%mm5, %%mm1\n\t" /* x[1].re = yb.re + vi.re; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
54 "pfadd %%mm7, %%mm0\n\t" /*x[0].im = yt.im + u.im; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
55 "pfadd %%mm4, %%mm1\n\t" /* x[1].im = yb.im + vi.im; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
56 "movq %%mm0, (%0)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
57 "movq %%mm1, 8(%0)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
58
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
59 "pfsub %%mm7, %%mm6\n\t" /* x[2].re = yt.re - u.re; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
60 "pfsub %%mm4, %%mm5\n\t" /* x[3].re = yb.re - vi.re; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
61 "movq %%mm6, 16(%0)\n\t" /* x[2].im = yt.im - u.im; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
62 "movq %%mm5, 24(%0)" /* x[3].im = yb.im - vi.im; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
63 :"=r"(x)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
64 :"0"(x),
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
65 "m"(x_plus_minus_3dnow),
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
66 "m"(x_minus_plus_3dnow)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
67 :"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
68 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
69
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
70 static void FFT_8_3DNOW(complex_t *x)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
71 {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
72 /* delta_p = diag{1, sqrt(i)} here */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
73 /* x[k] = sum_{i=0..7} x[i] * w^{i*k}, w=e^{-2*pi/8}
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
74 */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
75 complex_t wT1, wB1, wB2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
76
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
77 __asm__ __volatile__(
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
78 "movq 8(%2), %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
79 "movq 24(%2), %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
80 "movq %%mm0, %0\n\t" /* wT1 = x[1]; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
81 "movq %%mm1, %1\n\t" /* wB1 = x[3]; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
82 :"=m"(wT1), "=m"(wB1)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
83 :"r"(x)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
84 :"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
85
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
86 __asm__ __volatile__(
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
87 "movq 16(%0), %%mm2\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
88 "movq 32(%0), %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
89 "movq %%mm2, 8(%0)\n\t" /* x[1] = x[2]; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
90 "movq 48(%0), %%mm4\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
91 "movq %%mm3, 16(%0)\n\t" /* x[2] = x[4]; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
92 "movq %%mm4, 24(%0)\n\t" /* x[3] = x[6]; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
93 :"=r"(x)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
94 :"0"(x)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
95 :"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
96
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
97 fft_4_3dnow(&x[0]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
98
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
99 /* x[0] x[4] x[2] x[6] */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
100
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
101 __asm__ __volatile__(
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
102 "movq 40(%1), %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
103 "movq %%mm0, %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
104 "movq 56(%1), %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
105 "pfadd %%mm1, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
106 "pfsub %%mm1, %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
107 "movq (%2), %%mm2\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
108 "pfadd %%mm2, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
109 "pfadd %%mm2, %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
110 "movq (%3), %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
111 "pfadd %%mm1, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
112 "pfsub %%mm1, %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
113 "movq (%1), %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
114 "movq 16(%1), %%mm4\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
115 "movq %%mm1, %%mm2\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
116 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
117 "pswapd %%mm3, %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
118 #else
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
119 "punpckldq %%mm3, %%mm6\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
120 "punpckhdq %%mm6, %%mm3\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
121 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
122 "pfadd %%mm0, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
123 "movq %%mm4, %%mm5\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
124 "pfsub %%mm0, %%mm2\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
125 "pfadd %%mm3, %%mm4\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
126 "movq %%mm1, (%0)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
127 "pfsub %%mm3, %%mm5\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
128 "movq %%mm2, 32(%0)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
129 "movd %%mm4, 16(%0)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
130 "movd %%mm5, 48(%0)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
131 "psrlq $32, %%mm4\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
132 "psrlq $32, %%mm5\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
133 "movd %%mm4, 52(%0)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
134 "movd %%mm5, 20(%0)"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
135 :"=r"(x)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
136 :"0"(x), "r"(&wT1), "r"(&wB1)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
137 :"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
138
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
139 /* x[1] x[5] */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
140 __asm__ __volatile__ (
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
141 "movq %6, %%mm6\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
142 "movq %5, %%mm7\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
143 "movq %1, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
144 "movq %2, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
145 "movq 56(%3), %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
146 "pfsub 40(%3), %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
147 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
148 "pswapd %%mm1, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
149 #else
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
150 "punpckldq %%mm1, %%mm2\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
151 "punpckhdq %%mm2, %%mm1\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
152 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
153 "pxor %%mm7, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
154 "pfadd %%mm1, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
155 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
156 "pswapd %%mm3, %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
157 #else
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
158 "punpckldq %%mm3, %%mm2\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
159 "punpckhdq %%mm2, %%mm3\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
160 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
161 "pxor %%mm6, %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
162 "pfadd %%mm3, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
163 "movq %%mm0, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
164 "pxor %%mm6, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
165 "pfacc %%mm1, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
166 "pfmul %4, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
167
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
168 "movq 40(%3), %%mm5\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
169 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
170 "pswapd %%mm5, %%mm5\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
171 #else
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
172 "punpckldq %%mm5, %%mm1\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
173 "punpckhdq %%mm1, %%mm5\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
174 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
175 "movq %%mm5, %0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
176
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
177 "movq 8(%3), %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
178 "movq %%mm1, %%mm2\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
179 "pfsub %%mm0, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
180 "pfadd %%mm0, %%mm2\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
181 "movq %%mm1, 40(%3)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
182 "movq %%mm2, 8(%3)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
183 :"=m"(wB2)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
184 :"m"(wT1), "m"(wB1), "r"(x), "m"(HSQRT2_3DNOW),
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
185 "m"(x_plus_minus_3dnow), "m"(x_minus_plus_3dnow)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
186 :"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
187
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
188
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
189 /* x[3] x[7] */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
190 __asm__ __volatile__(
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
191 "movq %1, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
192 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
193 "pswapd %3, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
194 #else
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
195 "movq %3, %%mm1\n\t"
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
196 "punpckldq %%mm1, %%mm2\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
197 "punpckhdq %%mm2, %%mm1\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
198 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
199 "pxor %%mm6, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
200 "pfadd %%mm1, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
201 "movq %2, %%mm2\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
202 "movq 56(%4), %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
203 "pxor %%mm7, %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
204 "pfadd %%mm3, %%mm2\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
205 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
206 "pswapd %%mm2, %%mm2\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
207 #else
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
208 "punpckldq %%mm2, %%mm5\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
209 "punpckhdq %%mm5, %%mm2\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
210 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
211 "movq 24(%4), %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
212 "pfsub %%mm2, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
213 "movq %%mm3, %%mm4\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
214 "movq %%mm0, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
215 "pxor %%mm6, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
216 "pfacc %%mm1, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
217 "pfmul %5, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
218 "movq %%mm0, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
219 "pxor %%mm6, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
220 "pxor %%mm7, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
221 "pfadd %%mm1, %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
222 "pfadd %%mm0, %%mm4\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
223 "movq %%mm4, 24(%0)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
224 "movq %%mm3, 56(%0)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
225 :"=r"(x)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
226 :"m"(wT1), "m"(wB2), "m"(wB1), "0"(x), "m"(HSQRT2_3DNOW)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
227 :"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
228 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
229
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
230 static void FFT_ASMB_3DNOW(int k, complex_t *x, complex_t *wTB,
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
231 const complex_t *d, const complex_t *d_3)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
232 {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
233 register complex_t *x2k, *x3k, *x4k, *wB;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
234
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
235 TRANS_FILL_MM6_MM7_3DNOW();
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
236 x2k = x + 2 * k;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
237 x3k = x2k + 2 * k;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
238 x4k = x3k + 2 * k;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
239 wB = wTB + 2 * k;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
240
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
241 TRANSZERO_3DNOW(x[0],x2k[0],x3k[0],x4k[0]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
242 TRANS_3DNOW(x[1],x2k[1],x3k[1],x4k[1],wTB[1],wB[1],d[1],d_3[1]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
243
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
244 --k;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
245 for(;;) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
246 TRANS_3DNOW(x[2],x2k[2],x3k[2],x4k[2],wTB[2],wB[2],d[2],d_3[2]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
247 TRANS_3DNOW(x[3],x2k[3],x3k[3],x4k[3],wTB[3],wB[3],d[3],d_3[3]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
248 if (!--k) break;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
249 x += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
250 x2k += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
251 x3k += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
252 x4k += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
253 d += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
254 d_3 += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
255 wTB += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
256 wB += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
257 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
258
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
259 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
260
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
261 void FFT_ASMB16_3DNOW(complex_t *x, complex_t *wTB)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
262 {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
263 int k = 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
264
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
265 TRANS_FILL_MM6_MM7_3DNOW();
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
266 /* transform x[0], x[8], x[4], x[12] */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
267 TRANSZERO_3DNOW(x[0],x[4],x[8],x[12]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
268
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
269 /* transform x[1], x[9], x[5], x[13] */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
270 TRANS_3DNOW(x[1],x[5],x[9],x[13],wTB[1],wTB[5],delta16[1],delta16_3[1]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
271
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
272 /* transform x[2], x[10], x[6], x[14] */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
273 TRANSHALF_16_3DNOW(x[2],x[6],x[10],x[14]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
274
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
275 /* transform x[3], x[11], x[7], x[15] */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
276 TRANS_3DNOW(x[3],x[7],x[11],x[15],wTB[3],wTB[7],delta16[3],delta16_3[3]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
277
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
278 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
279
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
280 static void FFT_128P_3DNOW(complex_t *a)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
281 {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
282 FFT_8_3DNOW(&a[0]); FFT_4_3DNOW(&a[8]); FFT_4_3DNOW(&a[12]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
283 FFT_ASMB16_3DNOW(&a[0], &a[8]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
284
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
285 FFT_8_3DNOW(&a[16]), FFT_8_3DNOW(&a[24]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
286 FFT_ASMB_3DNOW(4, &a[0], &a[16],&delta32[0], &delta32_3[0]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
287
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
288 FFT_8_3DNOW(&a[32]); FFT_4_3DNOW(&a[40]); FFT_4_3DNOW(&a[44]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
289 FFT_ASMB16_3DNOW(&a[32], &a[40]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
290
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
291 FFT_8_3DNOW(&a[48]); FFT_4_3DNOW(&a[56]); FFT_4_3DNOW(&a[60]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
292 FFT_ASMB16_3DNOW(&a[48], &a[56]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
293
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
294 FFT_ASMB_3DNOW(8, &a[0], &a[32],&delta64[0], &delta64_3[0]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
295
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
296 FFT_8_3DNOW(&a[64]); FFT_4_3DNOW(&a[72]); FFT_4_3DNOW(&a[76]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
297 /* FFT_16(&a[64]); */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
298 FFT_ASMB16_3DNOW(&a[64], &a[72]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
299
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
300 FFT_8_3DNOW(&a[80]); FFT_8_3DNOW(&a[88]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
301
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
302 /* FFT_32(&a[64]); */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
303 FFT_ASMB_3DNOW(4, &a[64], &a[80],&delta32[0], &delta32_3[0]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
304
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
305 FFT_8_3DNOW(&a[96]); FFT_4_3DNOW(&a[104]), FFT_4_3DNOW(&a[108]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
306 /* FFT_16(&a[96]); */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
307 FFT_ASMB16_3DNOW(&a[96], &a[104]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
308
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
309 FFT_8_3DNOW(&a[112]), FFT_8_3DNOW(&a[120]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
310 /* FFT_32(&a[96]); */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
311 FFT_ASMB_3DNOW(4, &a[96], &a[112], &delta32[0], &delta32_3[0]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
312
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
313 /* FFT_128(&a[0]); */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
314 FFT_ASMB_3DNOW(16, &a[0], &a[64], &delta128[0], &delta128_3[0]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
315 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
316
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
317 static void
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
318 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
319 imdct_do_512_3dnowex
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
320 #else
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
321 imdct_do_512_3dnow
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
322 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
323 (sample_t data[],sample_t delay[], sample_t bias)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
324 {
8254
772d6d27fd66 warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents: 8230
diff changeset
325 int i;
772d6d27fd66 warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents: 8230
diff changeset
326 /* int k;
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
327 int p,q;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
328 int m;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
329 int two_m;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
330 int two_m_plus_one;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
331
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
332 sample_t tmp_a_i;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
333 sample_t tmp_a_r;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
334 sample_t tmp_b_i;
8254
772d6d27fd66 warning patch by (Dominik Mierzejewski <dominik at rangers dot eu dot org>)
michael
parents: 8230
diff changeset
335 sample_t tmp_b_r;*/
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
336
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
337 sample_t *data_ptr;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
338 sample_t *delay_ptr;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
339 sample_t *window_ptr;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
340
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
341 /* 512 IMDCT with source and dest data in 'data' */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
342
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
343 /* Pre IFFT complex multiply plus IFFT cmplx conjugate & reordering*/
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
344 #if 1
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
345 __asm__ __volatile__ (
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
346 "movq %0, %%mm7\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
347 ::"m"(x_plus_minus_3dnow)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
348 :"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
349 for( i=0; i < 128; i++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
350 int j = pm128[i];
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
351 __asm__ __volatile__ (
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
352 "movd %1, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
353 "movd %3, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
354 "punpckldq %2, %%mm0\n\t" /* mm0 = data[256-2*j-1] | data[2*j]*/
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
355 "punpckldq %4, %%mm1\n\t" /* mm1 = xcos[j] | xsin[j] */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
356 "movq %%mm0, %%mm2\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
357 "pfmul %%mm1, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
358 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
359 "pswapd %%mm1, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
360 #else
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
361 "punpckldq %%mm1, %%mm5\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
362 "punpckhdq %%mm5, %%mm1\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
363 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
364 "pfmul %%mm1, %%mm2\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
365 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
366 "pfpnacc %%mm2, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
367 #else
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
368 "pxor %%mm7, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
369 "pfacc %%mm2, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
370 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
371 "pxor %%mm7, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
372 "movq %%mm0, %0"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
373 :"=m"(buf[i])
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
374 :"m"(data[256-2*j-1]), "m"(data[2*j]), "m"(xcos1[j]), "m"(xsin1[j])
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
375 :"memory"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
376 );
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
377 /* buf[i].re = (data[256-2*j-1] * xcos1[j] - data[2*j] * xsin1[j]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
378 buf[i].im = (data[256-2*j-1] * xsin1[j] + data[2*j] * xcos1[j])*(-1.0);*/
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
379 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
380 #else
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
381 __asm__ __volatile__ ("femms":::"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
382 for( i=0; i < 128; i++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
383 /* z[i] = (X[256-2*i-1] + j * X[2*i]) * (xcos1[i] + j * xsin1[i]) ; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
384 int j= pm128[i];
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
385 buf[i].real = (data[256-2*j-1] * xcos1[j]) - (data[2*j] * xsin1[j]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
386 buf[i].imag = -1.0 * ((data[2*j] * xcos1[j]) + (data[256-2*j-1] * xsin1[j]));
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
387 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
388 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
389
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
390 /* FFT Merge */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
391 /* unoptimized variant
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
392 for (m=1; m < 7; m++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
393 if(m)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
394 two_m = (1 << m);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
395 else
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
396 two_m = 1;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
397
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
398 two_m_plus_one = (1 << (m+1));
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
399
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
400 for(i = 0; i < 128; i += two_m_plus_one) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
401 for(k = 0; k < two_m; k++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
402 p = k + i;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
403 q = p + two_m;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
404 tmp_a_r = buf[p].real;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
405 tmp_a_i = buf[p].imag;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
406 tmp_b_r = buf[q].real * w[m][k].real - buf[q].imag * w[m][k].imag;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
407 tmp_b_i = buf[q].imag * w[m][k].real + buf[q].real * w[m][k].imag;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
408 buf[p].real = tmp_a_r + tmp_b_r;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
409 buf[p].imag = tmp_a_i + tmp_b_i;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
410 buf[q].real = tmp_a_r - tmp_b_r;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
411 buf[q].imag = tmp_a_i - tmp_b_i;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
412 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
413 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
414 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
415 */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
416
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
417 FFT_128P_3DNOW (&buf[0]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
418 // asm volatile ("femms \n\t":::"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
419
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
420 /* Post IFFT complex multiply plus IFFT complex conjugate*/
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
421 #if 1
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
422 __asm__ __volatile__ (
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
423 "movq %0, %%mm7\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
424 "movq %1, %%mm6\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
425 ::"m"(x_plus_minus_3dnow),
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
426 "m"(x_minus_plus_3dnow)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
427 :"eax","memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
428 for (i=0; i < 128; i++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
429 __asm__ __volatile__ (
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
430 "movq %1, %%mm0\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
431 "movq %%mm0, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
432 #ifndef HAVE_3DNOWEX
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
433 "punpckldq %%mm1, %%mm2\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
434 "punpckhdq %%mm2, %%mm1\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
435 #else
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
436 "pswapd %%mm1, %%mm1\n\t" /* ac3_buf[i].re | ac3_buf[i].im */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
437 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
438 "movd %3, %%mm3\n\t" /* ac3_xsin[i] */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
439 "punpckldq %2, %%mm3\n\t" /* ac3_xsin[i] | ac3_xcos[i] */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
440 "pfmul %%mm3, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
441 "pfmul %%mm3, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
442 #ifndef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
443 "pxor %%mm7, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
444 "pfacc %%mm1, %%mm0\n\t"
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
445 "punpckldq %%mm0, %%mm1\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
446 "punpckhdq %%mm1, %%mm0\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
447 "movq %%mm0, %0\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
448 #else
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
449 "pfpnacc %%mm1, %%mm0\n\t" /* mm0 = mm0[0] - mm0[1] | mm1[0] + mm1[1] */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
450 "pswapd %%mm0, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
451 "movq %%mm0, %0"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
452 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
453 :"=m"(buf[i])
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
454 :"m"(buf[i]),"m"(xcos1[i]),"m"(xsin1[i])
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
455 :"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
456 /* ac3_buf[i].re =(tmp_a_r * ac3_xcos1[i]) + (tmp_a_i * ac3_xsin1[i]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
457 ac3_buf[i].im =(tmp_a_r * ac3_xsin1[i]) - (tmp_a_i * ac3_xcos1[i]);*/
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
458 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
459 #else
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
460 __asm__ __volatile__ ("femms":::"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
461 for( i=0; i < 128; i++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
462 /* y[n] = z[n] * (xcos1[n] + j * xsin1[n]) ; */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
463 tmp_a_r = buf[i].real;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
464 tmp_a_i = -1.0 * buf[i].imag;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
465 buf[i].real =(tmp_a_r * xcos1[i]) - (tmp_a_i * xsin1[i]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
466 buf[i].imag =(tmp_a_r * xsin1[i]) + (tmp_a_i * xcos1[i]);
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
467 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
468 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
469
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
470 data_ptr = data;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
471 delay_ptr = delay;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
472 window_ptr = imdct_window;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
473
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
474 /* Window and convert to real valued signal */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
475 #if 1
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
476 asm volatile (
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
477 "movd (%0), %%mm3 \n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
478 "punpckldq %%mm3, %%mm3 \n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
479 :: "r" (&bias)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
480 );
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
481 for (i=0; i< 64; i++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
482 /* merge two loops in one to enable working of 2 decoders */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
483 __asm__ __volatile__ (
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
484 "movd 516(%1), %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
485 "movd (%1), %%mm1\n\t" /**data_ptr++=-buf[64+i].im**window_ptr+++*delay_ptr++;*/
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
486 "punpckldq (%2), %%mm0\n\t"/*data_ptr[128]=-buf[i].re*window_ptr[128]+delay_ptr[128];*/
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
487 "punpckldq 516(%2), %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
488 "pfmul (%3), %%mm0\n\t"/**data_ptr++=buf[64-i-1].re**window_ptr+++*delay_ptr++;*/
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
489 "pfmul 512(%3), %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
490 "pxor %%mm6, %%mm0\n\t"/*data_ptr[128]=buf[128-i-1].im*window_ptr[128]+delay_ptr[128];*/
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
491 "pxor %%mm6, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
492 "pfadd (%4), %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
493 "pfadd 512(%4), %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
494 "pfadd %%mm3, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
495 "pfadd %%mm3, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
496 "movq %%mm0, (%0)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
497 "movq %%mm1, 512(%0)"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
498 :"=r"(data_ptr)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
499 :"r"(&buf[i].real), "r"(&buf[64-i-1].real), "r"(window_ptr), "r"(delay_ptr), "0"(data_ptr)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
500 :"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
501 data_ptr += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
502 window_ptr += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
503 delay_ptr += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
504 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
505 window_ptr += 128;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
506 #else
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
507 __asm__ __volatile__ ("femms":::"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
508 for(i=0; i< 64; i++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
509 *data_ptr++ = -buf[64+i].imag * *window_ptr++ + *delay_ptr++ + bias;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
510 *data_ptr++ = buf[64-i-1].real * *window_ptr++ + *delay_ptr++ + bias;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
511 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
512
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
513 for(i=0; i< 64; i++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
514 *data_ptr++ = -buf[i].real * *window_ptr++ + *delay_ptr++ + bias;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
515 *data_ptr++ = buf[128-i-1].imag * *window_ptr++ + *delay_ptr++ + bias;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
516 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
517 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
518
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
519 /* The trailing edge of the window goes into the delay line */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
520 delay_ptr = delay;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
521 #if 1
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
522 for(i=0; i< 64; i++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
523 /* merge two loops in one to enable working of 2 decoders */
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
524 window_ptr -=2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
525 __asm__ __volatile__(
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
526 "movd 508(%1), %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
527 "movd (%1), %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
528 "punpckldq (%2), %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
529 "punpckldq 508(%2), %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
530 #ifdef HAVE_3DNOWEX
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
531 "pswapd (%3), %%mm3\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
532 "pswapd -512(%3), %%mm4\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
533 #else
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
534 "movq (%3), %%mm3\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
535 "punpckldq %%mm3, %%mm2\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
536 "punpckhdq %%mm2, %%mm3\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
537 "movq -512(%3), %%mm4\n\t"
8230
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
538 "punpckldq %%mm4, %%mm2\n\t"
330086b89d8f minor optimization & gcc-CVS fix/workaround patch by (Glen Nakamura <glen at imodulo dot com>)
michael
parents: 4497
diff changeset
539 "punpckhdq %%mm2, %%mm4\n\t"
4497
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
540 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
541 "pfmul %%mm3, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
542 "pfmul %%mm4, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
543 "pxor %%mm6, %%mm0\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
544 "pxor %%mm7, %%mm1\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
545 "movq %%mm0, (%0)\n\t"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
546 "movq %%mm1, 512(%0)"
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
547 :"=r"(delay_ptr)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
548 :"r"(&buf[i].imag), "r"(&buf[64-i-1].imag), "r"(window_ptr), "0"(delay_ptr)
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
549 :"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
550 delay_ptr += 2;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
551 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
552 __asm__ __volatile__ ("femms":::"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
553 #else
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
554 __asm__ __volatile__ ("femms":::"memory");
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
555 for(i=0; i< 64; i++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
556 *delay_ptr++ = -buf[64+i].real * *--window_ptr;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
557 *delay_ptr++ = buf[64-i-1].imag * *--window_ptr;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
558 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
559
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
560 for(i=0; i<64; i++) {
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
561 *delay_ptr++ = buf[i].imag * *--window_ptr;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
562 *delay_ptr++ = -buf[128-i-1].real * *--window_ptr;
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
563 }
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
564 #endif
d3aedd7db02c Restore K7 support
nick
parents:
diff changeset
565 }