Mercurial > mplayer.hg
annotate mp3lib/dct64_altivec.c @ 29641:52e00a25ab71
Hack: set DEP/NX on Windows via SetProcessDEPPolicy.
This should really be done via the NXCOMPAT PE flag, but looks like
binutils will not get support for it any time soon and not having this
security feature enabled is just ridiculous.
author | reimar |
---|---|
date | Wed, 16 Sep 2009 18:07:19 +0000 |
parents | 0f1b5b68af32 |
children | 347d152a5cfa |
rev | line source |
---|---|
9002 | 1 |
2 /* | |
11980 | 3 * Discrete Cosine Tansform (DCT) for Altivec |
4 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> | |
5 * based upon code from "mp3lib/dct64.c" | |
18848 | 6 * This file is free software; you can redistribute it and/or |
7 * modify it under the terms of the GNU Lesser General Public License | |
9002 | 8 */ |
9 | |
10 #define real float | |
11 | |
25341 | 12 #include <stdio.h> |
9002 | 13 #include "mpg123.h" |
14 | |
25328
6f0309e575e0
There is a check for altivec.h in configure so use the preprocessor directive
diego
parents:
25327
diff
changeset
|
15 #ifdef HAVE_ALTIVEC_H |
9122 | 16 #include <altivec.h> |
17 #endif | |
18 | |
9002 | 19 // used to build registers permutation vectors (vcprm) |
20 // the 's' are for words in the _s_econd vector | |
21 #define WORD_0 0x00,0x01,0x02,0x03 | |
22 #define WORD_1 0x04,0x05,0x06,0x07 | |
23 #define WORD_2 0x08,0x09,0x0a,0x0b | |
24 #define WORD_3 0x0c,0x0d,0x0e,0x0f | |
25 #define WORD_s0 0x10,0x11,0x12,0x13 | |
26 #define WORD_s1 0x14,0x15,0x16,0x17 | |
27 #define WORD_s2 0x18,0x19,0x1a,0x1b | |
28 #define WORD_s3 0x1c,0x1d,0x1e,0x1f | |
29 | |
27318
bb5ed9aa34fc
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
26895
diff
changeset
|
30 #define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} |
bb5ed9aa34fc
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
26895
diff
changeset
|
31 #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} |
9002 | 32 |
27318
bb5ed9aa34fc
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
26895
diff
changeset
|
33 #define FOUROF(a) {a,a,a,a} |
25996 | 34 |
9002 | 35 // vcprmle is used to keep the same index as in the SSE version. |
36 // it's the same as vcprm, with the index inversed | |
37 // ('le' is Little Endian) | |
38 #define vcprmle(a,b,c,d) vcprm(d,c,b,a) | |
39 | |
40 // used to build inverse/identity vectors (vcii) | |
41 // n is _n_egative, p is _p_ositive | |
42 #define FLOAT_n -1. | |
43 #define FLOAT_p 1. | |
44 | |
45 void dct64_altivec(real *a,real *b,real *c) | |
46 { | |
47 real __attribute__ ((aligned(16))) b1[0x20]; | |
48 real __attribute__ ((aligned(16))) b2[0x20]; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
49 |
9002 | 50 real *out0 = a; |
51 real *out1 = b; | |
52 real *samples = c; | |
53 | |
9122 | 54 const vector float vczero = (const vector float)FOUROF(0.); |
9002 | 55 const vector unsigned char reverse = (const vector unsigned char)vcprm(3,2,1,0); |
56 | |
57 | |
58 if (((unsigned long)b1 & 0x0000000F) || | |
59 ((unsigned long)b2 & 0x0000000F)) | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
60 |
9002 | 61 { |
62 printf("MISALIGNED:\t%p\t%p\t%p\t%p\t%p\n", | |
63 b1, b2, a, b, samples); | |
64 } | |
65 | |
66 | |
67 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
68 |
9002 | 69 { |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
70 register real *costab = mp3lib_pnts[0]; |
9002 | 71 |
72 b1[0x00] = samples[0x00] + samples[0x1F]; | |
73 b1[0x01] = samples[0x01] + samples[0x1E]; | |
74 b1[0x02] = samples[0x02] + samples[0x1D]; | |
75 b1[0x03] = samples[0x03] + samples[0x1C]; | |
76 b1[0x04] = samples[0x04] + samples[0x1B]; | |
77 b1[0x05] = samples[0x05] + samples[0x1A]; | |
78 b1[0x06] = samples[0x06] + samples[0x19]; | |
79 b1[0x07] = samples[0x07] + samples[0x18]; | |
80 b1[0x08] = samples[0x08] + samples[0x17]; | |
81 b1[0x09] = samples[0x09] + samples[0x16]; | |
82 b1[0x0A] = samples[0x0A] + samples[0x15]; | |
83 b1[0x0B] = samples[0x0B] + samples[0x14]; | |
84 b1[0x0C] = samples[0x0C] + samples[0x13]; | |
85 b1[0x0D] = samples[0x0D] + samples[0x12]; | |
86 b1[0x0E] = samples[0x0E] + samples[0x11]; | |
87 b1[0x0F] = samples[0x0F] + samples[0x10]; | |
88 b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF]; | |
89 b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE]; | |
90 b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD]; | |
91 b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC]; | |
92 b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB]; | |
93 b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA]; | |
94 b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9]; | |
95 b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8]; | |
96 b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7]; | |
97 b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6]; | |
98 b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5]; | |
99 b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4]; | |
100 b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3]; | |
101 b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2]; | |
102 b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1]; | |
103 b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0]; | |
104 | |
105 } | |
106 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
107 register real *costab = mp3lib_pnts[1]; |
9002 | 108 |
109 b2[0x00] = b1[0x00] + b1[0x0F]; | |
110 b2[0x01] = b1[0x01] + b1[0x0E]; | |
111 b2[0x02] = b1[0x02] + b1[0x0D]; | |
112 b2[0x03] = b1[0x03] + b1[0x0C]; | |
113 b2[0x04] = b1[0x04] + b1[0x0B]; | |
114 b2[0x05] = b1[0x05] + b1[0x0A]; | |
115 b2[0x06] = b1[0x06] + b1[0x09]; | |
116 b2[0x07] = b1[0x07] + b1[0x08]; | |
117 b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7]; | |
118 b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6]; | |
119 b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5]; | |
120 b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4]; | |
121 b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3]; | |
122 b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2]; | |
123 b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1]; | |
124 b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0]; | |
125 b2[0x10] = b1[0x10] + b1[0x1F]; | |
126 b2[0x11] = b1[0x11] + b1[0x1E]; | |
127 b2[0x12] = b1[0x12] + b1[0x1D]; | |
128 b2[0x13] = b1[0x13] + b1[0x1C]; | |
129 b2[0x14] = b1[0x14] + b1[0x1B]; | |
130 b2[0x15] = b1[0x15] + b1[0x1A]; | |
131 b2[0x16] = b1[0x16] + b1[0x19]; | |
132 b2[0x17] = b1[0x17] + b1[0x18]; | |
133 b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7]; | |
134 b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6]; | |
135 b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5]; | |
136 b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4]; | |
137 b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3]; | |
138 b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2]; | |
139 b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1]; | |
140 b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0]; | |
141 | |
142 } | |
143 | |
144 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
145 register real *costab = mp3lib_pnts[2]; |
9002 | 146 |
147 b1[0x00] = b2[0x00] + b2[0x07]; | |
148 b1[0x01] = b2[0x01] + b2[0x06]; | |
149 b1[0x02] = b2[0x02] + b2[0x05]; | |
150 b1[0x03] = b2[0x03] + b2[0x04]; | |
151 b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3]; | |
152 b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2]; | |
153 b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1]; | |
154 b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0]; | |
155 b1[0x08] = b2[0x08] + b2[0x0F]; | |
156 b1[0x09] = b2[0x09] + b2[0x0E]; | |
157 b1[0x0A] = b2[0x0A] + b2[0x0D]; | |
158 b1[0x0B] = b2[0x0B] + b2[0x0C]; | |
159 b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3]; | |
160 b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2]; | |
161 b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1]; | |
162 b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0]; | |
163 b1[0x10] = b2[0x10] + b2[0x17]; | |
164 b1[0x11] = b2[0x11] + b2[0x16]; | |
165 b1[0x12] = b2[0x12] + b2[0x15]; | |
166 b1[0x13] = b2[0x13] + b2[0x14]; | |
167 b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3]; | |
168 b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2]; | |
169 b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1]; | |
170 b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0]; | |
171 b1[0x18] = b2[0x18] + b2[0x1F]; | |
172 b1[0x19] = b2[0x19] + b2[0x1E]; | |
173 b1[0x1A] = b2[0x1A] + b2[0x1D]; | |
174 b1[0x1B] = b2[0x1B] + b2[0x1C]; | |
175 b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3]; | |
176 b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2]; | |
177 b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1]; | |
178 b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0]; | |
179 } | |
180 | |
181 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
182 | |
183 // How does it work ? | |
184 // the first three passes are reproducted in the three block below | |
185 // all computations are done on a 4 elements vector | |
186 // 'reverse' is a special perumtation vector used to reverse | |
187 // the order of the elements inside a vector. | |
188 // note that all loads/stores to b1 (b2) between passes 1 and 2 (2 and 3) | |
189 // have been removed, all elements are stored inside b1vX (b2vX) | |
190 { | |
191 register vector float | |
192 b1v0, b1v1, b1v2, b1v3, | |
193 b1v4, b1v5, b1v6, b1v7; | |
194 register vector float | |
195 temp1, temp2; | |
196 | |
197 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
198 register real *costab = mp3lib_pnts[0]; |
9002 | 199 |
200 register vector float | |
201 samplesv1, samplesv2, samplesv3, samplesv4, | |
202 samplesv5, samplesv6, samplesv7, samplesv8, | |
203 samplesv9; | |
204 register vector unsigned char samples_perm = vec_lvsl(0, samples); | |
205 register vector float costabv1, costabv2, costabv3, costabv4, costabv5; | |
206 register vector unsigned char costab_perm = vec_lvsl(0, costab); | |
207 | |
208 samplesv1 = vec_ld(0, samples); | |
209 samplesv2 = vec_ld(16, samples); | |
210 samplesv1 = vec_perm(samplesv1, samplesv2, samples_perm); | |
211 samplesv3 = vec_ld(32, samples); | |
212 samplesv2 = vec_perm(samplesv2, samplesv3, samples_perm); | |
213 samplesv4 = vec_ld(48, samples); | |
214 samplesv3 = vec_perm(samplesv3, samplesv4, samples_perm); | |
215 samplesv5 = vec_ld(64, samples); | |
216 samplesv4 = vec_perm(samplesv4, samplesv5, samples_perm); | |
217 samplesv6 = vec_ld(80, samples); | |
218 samplesv5 = vec_perm(samplesv5, samplesv6, samples_perm); | |
219 samplesv7 = vec_ld(96, samples); | |
220 samplesv6 = vec_perm(samplesv6, samplesv7, samples_perm); | |
221 samplesv8 = vec_ld(112, samples); | |
222 samplesv7 = vec_perm(samplesv7, samplesv8, samples_perm); | |
223 samplesv9 = vec_ld(128, samples); | |
224 samplesv8 = vec_perm(samplesv8, samplesv9, samples_perm); | |
225 | |
226 temp1 = vec_add(samplesv1, | |
227 vec_perm(samplesv8, samplesv8, reverse)); | |
228 //vec_st(temp1, 0, b1); | |
229 b1v0 = temp1; | |
230 temp1 = vec_add(samplesv2, | |
231 vec_perm(samplesv7, samplesv7, reverse)); | |
232 //vec_st(temp1, 16, b1); | |
233 b1v1 = temp1; | |
234 temp1 = vec_add(samplesv3, | |
235 vec_perm(samplesv6, samplesv6, reverse)); | |
236 //vec_st(temp1, 32, b1); | |
237 b1v2 = temp1; | |
238 temp1 = vec_add(samplesv4, | |
239 vec_perm(samplesv5, samplesv5, reverse)); | |
240 //vec_st(temp1, 48, b1); | |
241 b1v3 = temp1; | |
242 | |
243 costabv1 = vec_ld(0, costab); | |
244 costabv2 = vec_ld(16, costab); | |
245 costabv1 = vec_perm(costabv1, costabv2, costab_perm); | |
246 costabv3 = vec_ld(32, costab); | |
247 costabv2 = vec_perm(costabv2, costabv3, costab_perm); | |
248 costabv4 = vec_ld(48, costab); | |
249 costabv3 = vec_perm(costabv3, costabv4, costab_perm); | |
250 costabv5 = vec_ld(64, costab); | |
251 costabv4 = vec_perm(costabv4, costabv5, costab_perm); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
252 |
9002 | 253 temp1 = vec_sub(vec_perm(samplesv4, samplesv4, reverse), |
254 samplesv5); | |
255 temp2 = vec_madd(temp1, | |
256 vec_perm(costabv4, costabv4, reverse), | |
257 vczero); | |
258 //vec_st(temp2, 64, b1); | |
259 b1v4 = temp2; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
260 |
9002 | 261 temp1 = vec_sub(vec_perm(samplesv3, samplesv3, reverse), |
262 samplesv6); | |
263 temp2 = vec_madd(temp1, | |
264 vec_perm(costabv3, costabv3, reverse), | |
265 vczero); | |
266 //vec_st(temp2, 80, b1); | |
267 b1v5 = temp2; | |
268 temp1 = vec_sub(vec_perm(samplesv2, samplesv2, reverse), | |
269 samplesv7); | |
270 temp2 = vec_madd(temp1, | |
271 vec_perm(costabv2, costabv2, reverse), | |
272 vczero); | |
273 //vec_st(temp2, 96, b1); | |
274 b1v6 = temp2; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
275 |
9002 | 276 temp1 = vec_sub(vec_perm(samplesv1, samplesv1, reverse), |
277 samplesv8); | |
278 temp2 = vec_madd(temp1, | |
279 vec_perm(costabv1, costabv1, reverse), | |
280 vczero); | |
281 //vec_st(temp2, 112, b1); | |
282 b1v7 = temp2; | |
283 | |
284 } | |
285 | |
286 { | |
287 register vector float | |
288 b2v0, b2v1, b2v2, b2v3, | |
289 b2v4, b2v5, b2v6, b2v7; | |
290 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
291 register real *costab = mp3lib_pnts[1]; |
9002 | 292 register vector float costabv1r, costabv2r, costabv1, costabv2, costabv3; |
293 register vector unsigned char costab_perm = vec_lvsl(0, costab); | |
294 | |
295 costabv1 = vec_ld(0, costab); | |
296 costabv2 = vec_ld(16, costab); | |
297 costabv1 = vec_perm(costabv1, costabv2, costab_perm); | |
298 costabv3 = vec_ld(32, costab); | |
299 costabv2 = vec_perm(costabv2, costabv3 , costab_perm); | |
300 costabv1r = vec_perm(costabv1, costabv1, reverse); | |
301 costabv2r = vec_perm(costabv2, costabv2, reverse); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
302 |
9002 | 303 temp1 = vec_add(b1v0, vec_perm(b1v3, b1v3, reverse)); |
304 //vec_st(temp1, 0, b2); | |
305 b2v0 = temp1; | |
306 temp1 = vec_add(b1v1, vec_perm(b1v2, b1v2, reverse)); | |
307 //vec_st(temp1, 16, b2); | |
308 b2v1 = temp1; | |
309 temp2 = vec_sub(vec_perm(b1v1, b1v1, reverse), b1v2); | |
310 temp1 = vec_madd(temp2, costabv2r, vczero); | |
311 //vec_st(temp1, 32, b2); | |
312 b2v2 = temp1; | |
313 temp2 = vec_sub(vec_perm(b1v0, b1v0, reverse), b1v3); | |
314 temp1 = vec_madd(temp2, costabv1r, vczero); | |
315 //vec_st(temp1, 48, b2); | |
316 b2v3 = temp1; | |
317 temp1 = vec_add(b1v4, vec_perm(b1v7, b1v7, reverse)); | |
318 //vec_st(temp1, 64, b2); | |
319 b2v4 = temp1; | |
320 temp1 = vec_add(b1v5, vec_perm(b1v6, b1v6, reverse)); | |
321 //vec_st(temp1, 80, b2); | |
322 b2v5 = temp1; | |
323 temp2 = vec_sub(b1v6, vec_perm(b1v5, b1v5, reverse)); | |
324 temp1 = vec_madd(temp2, costabv2r, vczero); | |
325 //vec_st(temp1, 96, b2); | |
326 b2v6 = temp1; | |
327 temp2 = vec_sub(b1v7, vec_perm(b1v4, b1v4, reverse)); | |
328 temp1 = vec_madd(temp2, costabv1r, vczero); | |
329 //vec_st(temp1, 112, b2); | |
330 b2v7 = temp1; | |
331 } | |
332 | |
333 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
334 register real *costab = mp3lib_pnts[2]; |
9002 | 335 |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
336 |
9002 | 337 vector float costabv1r, costabv1, costabv2; |
338 vector unsigned char costab_perm = vec_lvsl(0, costab); | |
339 | |
340 costabv1 = vec_ld(0, costab); | |
341 costabv2 = vec_ld(16, costab); | |
342 costabv1 = vec_perm(costabv1, costabv2, costab_perm); | |
343 costabv1r = vec_perm(costabv1, costabv1, reverse); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
344 |
9002 | 345 temp1 = vec_add(b2v0, vec_perm(b2v1, b2v1, reverse)); |
346 vec_st(temp1, 0, b1); | |
347 temp2 = vec_sub(vec_perm(b2v0, b2v0, reverse), b2v1); | |
348 temp1 = vec_madd(temp2, costabv1r, vczero); | |
349 vec_st(temp1, 16, b1); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
350 |
9002 | 351 temp1 = vec_add(b2v2, vec_perm(b2v3, b2v3, reverse)); |
352 vec_st(temp1, 32, b1); | |
353 temp2 = vec_sub(b2v3, vec_perm(b2v2, b2v2, reverse)); | |
354 temp1 = vec_madd(temp2, costabv1r, vczero); | |
355 vec_st(temp1, 48, b1); | |
356 | |
357 temp1 = vec_add(b2v4, vec_perm(b2v5, b2v5, reverse)); | |
358 vec_st(temp1, 64, b1); | |
359 temp2 = vec_sub(vec_perm(b2v4, b2v4, reverse), b2v5); | |
360 temp1 = vec_madd(temp2, costabv1r, vczero); | |
361 vec_st(temp1, 80, b1); | |
362 | |
363 temp1 = vec_add(b2v6, vec_perm(b2v7, b2v7, reverse)); | |
364 vec_st(temp1, 96, b1); | |
365 temp2 = vec_sub(b2v7, vec_perm(b2v6, b2v6, reverse)); | |
366 temp1 = vec_madd(temp2, costabv1r, vczero); | |
367 vec_st(temp1, 112, b1); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
368 |
9002 | 369 } |
370 } | |
371 } | |
372 | |
373 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
374 | |
375 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
376 register real const cos0 = mp3lib_pnts[3][0]; |
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
377 register real const cos1 = mp3lib_pnts[3][1]; |
9002 | 378 |
379 b2[0x00] = b1[0x00] + b1[0x03]; | |
380 b2[0x01] = b1[0x01] + b1[0x02]; | |
381 b2[0x02] = (b1[0x01] - b1[0x02]) * cos1; | |
382 b2[0x03] = (b1[0x00] - b1[0x03]) * cos0; | |
383 b2[0x04] = b1[0x04] + b1[0x07]; | |
384 b2[0x05] = b1[0x05] + b1[0x06]; | |
385 b2[0x06] = (b1[0x06] - b1[0x05]) * cos1; | |
386 b2[0x07] = (b1[0x07] - b1[0x04]) * cos0; | |
387 b2[0x08] = b1[0x08] + b1[0x0B]; | |
388 b2[0x09] = b1[0x09] + b1[0x0A]; | |
389 b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1; | |
390 b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0; | |
391 b2[0x0C] = b1[0x0C] + b1[0x0F]; | |
392 b2[0x0D] = b1[0x0D] + b1[0x0E]; | |
393 b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1; | |
394 b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0; | |
395 b2[0x10] = b1[0x10] + b1[0x13]; | |
396 b2[0x11] = b1[0x11] + b1[0x12]; | |
397 b2[0x12] = (b1[0x11] - b1[0x12]) * cos1; | |
398 b2[0x13] = (b1[0x10] - b1[0x13]) * cos0; | |
399 b2[0x14] = b1[0x14] + b1[0x17]; | |
400 b2[0x15] = b1[0x15] + b1[0x16]; | |
401 b2[0x16] = (b1[0x16] - b1[0x15]) * cos1; | |
402 b2[0x17] = (b1[0x17] - b1[0x14]) * cos0; | |
403 b2[0x18] = b1[0x18] + b1[0x1B]; | |
404 b2[0x19] = b1[0x19] + b1[0x1A]; | |
405 b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1; | |
406 b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0; | |
407 b2[0x1C] = b1[0x1C] + b1[0x1F]; | |
408 b2[0x1D] = b1[0x1D] + b1[0x1E]; | |
409 b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1; | |
410 b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0; | |
411 } | |
412 | |
413 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
414 register real const cos0 = mp3lib_pnts[4][0]; |
9002 | 415 |
416 b1[0x00] = b2[0x00] + b2[0x01]; | |
417 b1[0x01] = (b2[0x00] - b2[0x01]) * cos0; | |
418 b1[0x02] = b2[0x02] + b2[0x03]; | |
419 b1[0x03] = (b2[0x03] - b2[0x02]) * cos0; | |
420 b1[0x02] += b1[0x03]; | |
421 | |
422 b1[0x04] = b2[0x04] + b2[0x05]; | |
423 b1[0x05] = (b2[0x04] - b2[0x05]) * cos0; | |
424 b1[0x06] = b2[0x06] + b2[0x07]; | |
425 b1[0x07] = (b2[0x07] - b2[0x06]) * cos0; | |
426 b1[0x06] += b1[0x07]; | |
427 b1[0x04] += b1[0x06]; | |
428 b1[0x06] += b1[0x05]; | |
429 b1[0x05] += b1[0x07]; | |
430 | |
431 b1[0x08] = b2[0x08] + b2[0x09]; | |
432 b1[0x09] = (b2[0x08] - b2[0x09]) * cos0; | |
433 b1[0x0A] = b2[0x0A] + b2[0x0B]; | |
434 b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0; | |
435 b1[0x0A] += b1[0x0B]; | |
436 | |
437 b1[0x0C] = b2[0x0C] + b2[0x0D]; | |
438 b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0; | |
439 b1[0x0E] = b2[0x0E] + b2[0x0F]; | |
440 b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0; | |
441 b1[0x0E] += b1[0x0F]; | |
442 b1[0x0C] += b1[0x0E]; | |
443 b1[0x0E] += b1[0x0D]; | |
444 b1[0x0D] += b1[0x0F]; | |
445 | |
446 b1[0x10] = b2[0x10] + b2[0x11]; | |
447 b1[0x11] = (b2[0x10] - b2[0x11]) * cos0; | |
448 b1[0x12] = b2[0x12] + b2[0x13]; | |
449 b1[0x13] = (b2[0x13] - b2[0x12]) * cos0; | |
450 b1[0x12] += b1[0x13]; | |
451 | |
452 b1[0x14] = b2[0x14] + b2[0x15]; | |
453 b1[0x15] = (b2[0x14] - b2[0x15]) * cos0; | |
454 b1[0x16] = b2[0x16] + b2[0x17]; | |
455 b1[0x17] = (b2[0x17] - b2[0x16]) * cos0; | |
456 b1[0x16] += b1[0x17]; | |
457 b1[0x14] += b1[0x16]; | |
458 b1[0x16] += b1[0x15]; | |
459 b1[0x15] += b1[0x17]; | |
460 | |
461 b1[0x18] = b2[0x18] + b2[0x19]; | |
462 b1[0x19] = (b2[0x18] - b2[0x19]) * cos0; | |
463 b1[0x1A] = b2[0x1A] + b2[0x1B]; | |
464 b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0; | |
465 b1[0x1A] += b1[0x1B]; | |
466 | |
467 b1[0x1C] = b2[0x1C] + b2[0x1D]; | |
468 b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0; | |
469 b1[0x1E] = b2[0x1E] + b2[0x1F]; | |
470 b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0; | |
471 b1[0x1E] += b1[0x1F]; | |
472 b1[0x1C] += b1[0x1E]; | |
473 b1[0x1E] += b1[0x1D]; | |
474 b1[0x1D] += b1[0x1F]; | |
475 } | |
476 | |
477 out0[0x10*16] = b1[0x00]; | |
478 out0[0x10*12] = b1[0x04]; | |
479 out0[0x10* 8] = b1[0x02]; | |
480 out0[0x10* 4] = b1[0x06]; | |
481 out0[0x10* 0] = b1[0x01]; | |
482 out1[0x10* 0] = b1[0x01]; | |
483 out1[0x10* 4] = b1[0x05]; | |
484 out1[0x10* 8] = b1[0x03]; | |
485 out1[0x10*12] = b1[0x07]; | |
486 | |
487 b1[0x08] += b1[0x0C]; | |
488 out0[0x10*14] = b1[0x08]; | |
489 b1[0x0C] += b1[0x0a]; | |
490 out0[0x10*10] = b1[0x0C]; | |
491 b1[0x0A] += b1[0x0E]; | |
492 out0[0x10* 6] = b1[0x0A]; | |
493 b1[0x0E] += b1[0x09]; | |
494 out0[0x10* 2] = b1[0x0E]; | |
495 b1[0x09] += b1[0x0D]; | |
496 out1[0x10* 2] = b1[0x09]; | |
497 b1[0x0D] += b1[0x0B]; | |
498 out1[0x10* 6] = b1[0x0D]; | |
499 b1[0x0B] += b1[0x0F]; | |
500 out1[0x10*10] = b1[0x0B]; | |
501 out1[0x10*14] = b1[0x0F]; | |
502 | |
503 b1[0x18] += b1[0x1C]; | |
504 out0[0x10*15] = b1[0x10] + b1[0x18]; | |
505 out0[0x10*13] = b1[0x18] + b1[0x14]; | |
506 b1[0x1C] += b1[0x1a]; | |
507 out0[0x10*11] = b1[0x14] + b1[0x1C]; | |
508 out0[0x10* 9] = b1[0x1C] + b1[0x12]; | |
509 b1[0x1A] += b1[0x1E]; | |
510 out0[0x10* 7] = b1[0x12] + b1[0x1A]; | |
511 out0[0x10* 5] = b1[0x1A] + b1[0x16]; | |
512 b1[0x1E] += b1[0x19]; | |
513 out0[0x10* 3] = b1[0x16] + b1[0x1E]; | |
514 out0[0x10* 1] = b1[0x1E] + b1[0x11]; | |
515 b1[0x19] += b1[0x1D]; | |
516 out1[0x10* 1] = b1[0x11] + b1[0x19]; | |
517 out1[0x10* 3] = b1[0x19] + b1[0x15]; | |
518 b1[0x1D] += b1[0x1B]; | |
519 out1[0x10* 5] = b1[0x15] + b1[0x1D]; | |
520 out1[0x10* 7] = b1[0x1D] + b1[0x13]; | |
521 b1[0x1B] += b1[0x1F]; | |
522 out1[0x10* 9] = b1[0x13] + b1[0x1B]; | |
523 out1[0x10*11] = b1[0x1B] + b1[0x17]; | |
524 out1[0x10*13] = b1[0x17] + b1[0x1F]; | |
525 out1[0x10*15] = b1[0x1F]; | |
526 } |