Mercurial > mplayer.hg
annotate mp3lib/dct64_altivec.c @ 15533:ddf15d233d58
Do not switch to audio tracks whose codec private data differs from the main audio track's as this will most likely result in messed up audio output. Patch by Michael Behrisch <list () behrisch ! de>
author | mosu |
---|---|
date | Sat, 21 May 2005 06:50:08 +0000 |
parents | d155623271e3 |
children | 2029204dd999 |
rev | line source |
---|---|
9002 | 1 |
2 /* | |
11980 | 3 * Discrete Cosine Tansform (DCT) for Altivec |
4 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> | |
5 * based upon code from "mp3lib/dct64.c" | |
9002 | 6 */ |
7 | |
8 #define real float | |
9 | |
10 #include "mpg123.h" | |
11 | |
12 #ifdef HAVE_ALTIVEC | |
13 | |
9122 | 14 #ifndef SYS_DARWIN |
15 #include <altivec.h> | |
16 #endif | |
17 | |
9002 | 18 // used to build registers permutation vectors (vcprm) |
19 // the 's' are for words in the _s_econd vector | |
20 #define WORD_0 0x00,0x01,0x02,0x03 | |
21 #define WORD_1 0x04,0x05,0x06,0x07 | |
22 #define WORD_2 0x08,0x09,0x0a,0x0b | |
23 #define WORD_3 0x0c,0x0d,0x0e,0x0f | |
24 #define WORD_s0 0x10,0x11,0x12,0x13 | |
25 #define WORD_s1 0x14,0x15,0x16,0x17 | |
26 #define WORD_s2 0x18,0x19,0x1a,0x1b | |
27 #define WORD_s3 0x1c,0x1d,0x1e,0x1f | |
28 | |
9122 | 29 #ifdef SYS_DARWIN |
9002 | 30 #define vcprm(a,b,c,d) (const vector unsigned char)(WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d) |
9122 | 31 #else |
32 #define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} | |
33 #endif | |
9002 | 34 |
35 // vcprmle is used to keep the same index as in the SSE version. | |
36 // it's the same as vcprm, with the index inversed | |
37 // ('le' is Little Endian) | |
38 #define vcprmle(a,b,c,d) vcprm(d,c,b,a) | |
39 | |
40 // used to build inverse/identity vectors (vcii) | |
41 // n is _n_egative, p is _p_ositive | |
42 #define FLOAT_n -1. | |
43 #define FLOAT_p 1. | |
44 | |
9122 | 45 #ifdef SYS_DARWIN |
9002 | 46 #define vcii(a,b,c,d) (const vector float)(FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d) |
9122 | 47 #else |
48 #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} | |
49 #endif | |
50 | |
51 #ifdef SYS_DARWIN | |
52 #define FOUROF(a) (a) | |
53 #else | |
54 #define FOUROF(a) {a,a,a,a} | |
55 #endif | |
9002 | 56 |
57 void dct64_altivec(real *a,real *b,real *c) | |
58 { | |
59 real __attribute__ ((aligned(16))) b1[0x20]; | |
60 real __attribute__ ((aligned(16))) b2[0x20]; | |
61 | |
62 real *out0 = a; | |
63 real *out1 = b; | |
64 real *samples = c; | |
65 | |
9122 | 66 const vector float vczero = (const vector float)FOUROF(0.); |
9002 | 67 const vector unsigned char reverse = (const vector unsigned char)vcprm(3,2,1,0); |
68 | |
69 | |
70 if (((unsigned long)b1 & 0x0000000F) || | |
71 ((unsigned long)b2 & 0x0000000F)) | |
72 | |
73 { | |
74 printf("MISALIGNED:\t%p\t%p\t%p\t%p\t%p\n", | |
75 b1, b2, a, b, samples); | |
76 } | |
77 | |
78 | |
79 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
80 | |
81 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
82 register real *costab = mp3lib_pnts[0]; |
9002 | 83 |
84 b1[0x00] = samples[0x00] + samples[0x1F]; | |
85 b1[0x01] = samples[0x01] + samples[0x1E]; | |
86 b1[0x02] = samples[0x02] + samples[0x1D]; | |
87 b1[0x03] = samples[0x03] + samples[0x1C]; | |
88 b1[0x04] = samples[0x04] + samples[0x1B]; | |
89 b1[0x05] = samples[0x05] + samples[0x1A]; | |
90 b1[0x06] = samples[0x06] + samples[0x19]; | |
91 b1[0x07] = samples[0x07] + samples[0x18]; | |
92 b1[0x08] = samples[0x08] + samples[0x17]; | |
93 b1[0x09] = samples[0x09] + samples[0x16]; | |
94 b1[0x0A] = samples[0x0A] + samples[0x15]; | |
95 b1[0x0B] = samples[0x0B] + samples[0x14]; | |
96 b1[0x0C] = samples[0x0C] + samples[0x13]; | |
97 b1[0x0D] = samples[0x0D] + samples[0x12]; | |
98 b1[0x0E] = samples[0x0E] + samples[0x11]; | |
99 b1[0x0F] = samples[0x0F] + samples[0x10]; | |
100 b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF]; | |
101 b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE]; | |
102 b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD]; | |
103 b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC]; | |
104 b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB]; | |
105 b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA]; | |
106 b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9]; | |
107 b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8]; | |
108 b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7]; | |
109 b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6]; | |
110 b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5]; | |
111 b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4]; | |
112 b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3]; | |
113 b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2]; | |
114 b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1]; | |
115 b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0]; | |
116 | |
117 } | |
118 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
119 register real *costab = mp3lib_pnts[1]; |
9002 | 120 |
121 b2[0x00] = b1[0x00] + b1[0x0F]; | |
122 b2[0x01] = b1[0x01] + b1[0x0E]; | |
123 b2[0x02] = b1[0x02] + b1[0x0D]; | |
124 b2[0x03] = b1[0x03] + b1[0x0C]; | |
125 b2[0x04] = b1[0x04] + b1[0x0B]; | |
126 b2[0x05] = b1[0x05] + b1[0x0A]; | |
127 b2[0x06] = b1[0x06] + b1[0x09]; | |
128 b2[0x07] = b1[0x07] + b1[0x08]; | |
129 b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7]; | |
130 b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6]; | |
131 b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5]; | |
132 b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4]; | |
133 b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3]; | |
134 b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2]; | |
135 b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1]; | |
136 b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0]; | |
137 b2[0x10] = b1[0x10] + b1[0x1F]; | |
138 b2[0x11] = b1[0x11] + b1[0x1E]; | |
139 b2[0x12] = b1[0x12] + b1[0x1D]; | |
140 b2[0x13] = b1[0x13] + b1[0x1C]; | |
141 b2[0x14] = b1[0x14] + b1[0x1B]; | |
142 b2[0x15] = b1[0x15] + b1[0x1A]; | |
143 b2[0x16] = b1[0x16] + b1[0x19]; | |
144 b2[0x17] = b1[0x17] + b1[0x18]; | |
145 b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7]; | |
146 b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6]; | |
147 b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5]; | |
148 b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4]; | |
149 b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3]; | |
150 b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2]; | |
151 b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1]; | |
152 b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0]; | |
153 | |
154 } | |
155 | |
156 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
157 register real *costab = mp3lib_pnts[2]; |
9002 | 158 |
159 b1[0x00] = b2[0x00] + b2[0x07]; | |
160 b1[0x01] = b2[0x01] + b2[0x06]; | |
161 b1[0x02] = b2[0x02] + b2[0x05]; | |
162 b1[0x03] = b2[0x03] + b2[0x04]; | |
163 b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3]; | |
164 b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2]; | |
165 b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1]; | |
166 b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0]; | |
167 b1[0x08] = b2[0x08] + b2[0x0F]; | |
168 b1[0x09] = b2[0x09] + b2[0x0E]; | |
169 b1[0x0A] = b2[0x0A] + b2[0x0D]; | |
170 b1[0x0B] = b2[0x0B] + b2[0x0C]; | |
171 b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3]; | |
172 b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2]; | |
173 b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1]; | |
174 b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0]; | |
175 b1[0x10] = b2[0x10] + b2[0x17]; | |
176 b1[0x11] = b2[0x11] + b2[0x16]; | |
177 b1[0x12] = b2[0x12] + b2[0x15]; | |
178 b1[0x13] = b2[0x13] + b2[0x14]; | |
179 b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3]; | |
180 b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2]; | |
181 b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1]; | |
182 b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0]; | |
183 b1[0x18] = b2[0x18] + b2[0x1F]; | |
184 b1[0x19] = b2[0x19] + b2[0x1E]; | |
185 b1[0x1A] = b2[0x1A] + b2[0x1D]; | |
186 b1[0x1B] = b2[0x1B] + b2[0x1C]; | |
187 b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3]; | |
188 b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2]; | |
189 b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1]; | |
190 b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0]; | |
191 } | |
192 | |
193 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
194 | |
195 // How does it work ? | |
196 // the first three passes are reproducted in the three block below | |
197 // all computations are done on a 4 elements vector | |
198 // 'reverse' is a special perumtation vector used to reverse | |
199 // the order of the elements inside a vector. | |
200 // note that all loads/stores to b1 (b2) between passes 1 and 2 (2 and 3) | |
201 // have been removed, all elements are stored inside b1vX (b2vX) | |
202 { | |
203 register vector float | |
204 b1v0, b1v1, b1v2, b1v3, | |
205 b1v4, b1v5, b1v6, b1v7; | |
206 register vector float | |
207 temp1, temp2; | |
208 | |
209 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
210 register real *costab = mp3lib_pnts[0]; |
9002 | 211 |
212 register vector float | |
213 samplesv1, samplesv2, samplesv3, samplesv4, | |
214 samplesv5, samplesv6, samplesv7, samplesv8, | |
215 samplesv9; | |
216 register vector unsigned char samples_perm = vec_lvsl(0, samples); | |
217 register vector float costabv1, costabv2, costabv3, costabv4, costabv5; | |
218 register vector unsigned char costab_perm = vec_lvsl(0, costab); | |
219 | |
220 samplesv1 = vec_ld(0, samples); | |
221 samplesv2 = vec_ld(16, samples); | |
222 samplesv1 = vec_perm(samplesv1, samplesv2, samples_perm); | |
223 samplesv3 = vec_ld(32, samples); | |
224 samplesv2 = vec_perm(samplesv2, samplesv3, samples_perm); | |
225 samplesv4 = vec_ld(48, samples); | |
226 samplesv3 = vec_perm(samplesv3, samplesv4, samples_perm); | |
227 samplesv5 = vec_ld(64, samples); | |
228 samplesv4 = vec_perm(samplesv4, samplesv5, samples_perm); | |
229 samplesv6 = vec_ld(80, samples); | |
230 samplesv5 = vec_perm(samplesv5, samplesv6, samples_perm); | |
231 samplesv7 = vec_ld(96, samples); | |
232 samplesv6 = vec_perm(samplesv6, samplesv7, samples_perm); | |
233 samplesv8 = vec_ld(112, samples); | |
234 samplesv7 = vec_perm(samplesv7, samplesv8, samples_perm); | |
235 samplesv9 = vec_ld(128, samples); | |
236 samplesv8 = vec_perm(samplesv8, samplesv9, samples_perm); | |
237 | |
238 temp1 = vec_add(samplesv1, | |
239 vec_perm(samplesv8, samplesv8, reverse)); | |
240 //vec_st(temp1, 0, b1); | |
241 b1v0 = temp1; | |
242 temp1 = vec_add(samplesv2, | |
243 vec_perm(samplesv7, samplesv7, reverse)); | |
244 //vec_st(temp1, 16, b1); | |
245 b1v1 = temp1; | |
246 temp1 = vec_add(samplesv3, | |
247 vec_perm(samplesv6, samplesv6, reverse)); | |
248 //vec_st(temp1, 32, b1); | |
249 b1v2 = temp1; | |
250 temp1 = vec_add(samplesv4, | |
251 vec_perm(samplesv5, samplesv5, reverse)); | |
252 //vec_st(temp1, 48, b1); | |
253 b1v3 = temp1; | |
254 | |
255 costabv1 = vec_ld(0, costab); | |
256 costabv2 = vec_ld(16, costab); | |
257 costabv1 = vec_perm(costabv1, costabv2, costab_perm); | |
258 costabv3 = vec_ld(32, costab); | |
259 costabv2 = vec_perm(costabv2, costabv3, costab_perm); | |
260 costabv4 = vec_ld(48, costab); | |
261 costabv3 = vec_perm(costabv3, costabv4, costab_perm); | |
262 costabv5 = vec_ld(64, costab); | |
263 costabv4 = vec_perm(costabv4, costabv5, costab_perm); | |
264 | |
265 temp1 = vec_sub(vec_perm(samplesv4, samplesv4, reverse), | |
266 samplesv5); | |
267 temp2 = vec_madd(temp1, | |
268 vec_perm(costabv4, costabv4, reverse), | |
269 vczero); | |
270 //vec_st(temp2, 64, b1); | |
271 b1v4 = temp2; | |
272 | |
273 temp1 = vec_sub(vec_perm(samplesv3, samplesv3, reverse), | |
274 samplesv6); | |
275 temp2 = vec_madd(temp1, | |
276 vec_perm(costabv3, costabv3, reverse), | |
277 vczero); | |
278 //vec_st(temp2, 80, b1); | |
279 b1v5 = temp2; | |
280 temp1 = vec_sub(vec_perm(samplesv2, samplesv2, reverse), | |
281 samplesv7); | |
282 temp2 = vec_madd(temp1, | |
283 vec_perm(costabv2, costabv2, reverse), | |
284 vczero); | |
285 //vec_st(temp2, 96, b1); | |
286 b1v6 = temp2; | |
287 | |
288 temp1 = vec_sub(vec_perm(samplesv1, samplesv1, reverse), | |
289 samplesv8); | |
290 temp2 = vec_madd(temp1, | |
291 vec_perm(costabv1, costabv1, reverse), | |
292 vczero); | |
293 //vec_st(temp2, 112, b1); | |
294 b1v7 = temp2; | |
295 | |
296 } | |
297 | |
298 { | |
299 register vector float | |
300 b2v0, b2v1, b2v2, b2v3, | |
301 b2v4, b2v5, b2v6, b2v7; | |
302 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
303 register real *costab = mp3lib_pnts[1]; |
9002 | 304 register vector float costabv1r, costabv2r, costabv1, costabv2, costabv3; |
305 register vector unsigned char costab_perm = vec_lvsl(0, costab); | |
306 | |
307 costabv1 = vec_ld(0, costab); | |
308 costabv2 = vec_ld(16, costab); | |
309 costabv1 = vec_perm(costabv1, costabv2, costab_perm); | |
310 costabv3 = vec_ld(32, costab); | |
311 costabv2 = vec_perm(costabv2, costabv3 , costab_perm); | |
312 costabv1r = vec_perm(costabv1, costabv1, reverse); | |
313 costabv2r = vec_perm(costabv2, costabv2, reverse); | |
314 | |
315 temp1 = vec_add(b1v0, vec_perm(b1v3, b1v3, reverse)); | |
316 //vec_st(temp1, 0, b2); | |
317 b2v0 = temp1; | |
318 temp1 = vec_add(b1v1, vec_perm(b1v2, b1v2, reverse)); | |
319 //vec_st(temp1, 16, b2); | |
320 b2v1 = temp1; | |
321 temp2 = vec_sub(vec_perm(b1v1, b1v1, reverse), b1v2); | |
322 temp1 = vec_madd(temp2, costabv2r, vczero); | |
323 //vec_st(temp1, 32, b2); | |
324 b2v2 = temp1; | |
325 temp2 = vec_sub(vec_perm(b1v0, b1v0, reverse), b1v3); | |
326 temp1 = vec_madd(temp2, costabv1r, vczero); | |
327 //vec_st(temp1, 48, b2); | |
328 b2v3 = temp1; | |
329 temp1 = vec_add(b1v4, vec_perm(b1v7, b1v7, reverse)); | |
330 //vec_st(temp1, 64, b2); | |
331 b2v4 = temp1; | |
332 temp1 = vec_add(b1v5, vec_perm(b1v6, b1v6, reverse)); | |
333 //vec_st(temp1, 80, b2); | |
334 b2v5 = temp1; | |
335 temp2 = vec_sub(b1v6, vec_perm(b1v5, b1v5, reverse)); | |
336 temp1 = vec_madd(temp2, costabv2r, vczero); | |
337 //vec_st(temp1, 96, b2); | |
338 b2v6 = temp1; | |
339 temp2 = vec_sub(b1v7, vec_perm(b1v4, b1v4, reverse)); | |
340 temp1 = vec_madd(temp2, costabv1r, vczero); | |
341 //vec_st(temp1, 112, b2); | |
342 b2v7 = temp1; | |
343 } | |
344 | |
345 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
346 register real *costab = mp3lib_pnts[2]; |
9002 | 347 |
348 | |
349 vector float costabv1r, costabv1, costabv2; | |
350 vector unsigned char costab_perm = vec_lvsl(0, costab); | |
351 | |
352 costabv1 = vec_ld(0, costab); | |
353 costabv2 = vec_ld(16, costab); | |
354 costabv1 = vec_perm(costabv1, costabv2, costab_perm); | |
355 costabv1r = vec_perm(costabv1, costabv1, reverse); | |
356 | |
357 temp1 = vec_add(b2v0, vec_perm(b2v1, b2v1, reverse)); | |
358 vec_st(temp1, 0, b1); | |
359 temp2 = vec_sub(vec_perm(b2v0, b2v0, reverse), b2v1); | |
360 temp1 = vec_madd(temp2, costabv1r, vczero); | |
361 vec_st(temp1, 16, b1); | |
362 | |
363 temp1 = vec_add(b2v2, vec_perm(b2v3, b2v3, reverse)); | |
364 vec_st(temp1, 32, b1); | |
365 temp2 = vec_sub(b2v3, vec_perm(b2v2, b2v2, reverse)); | |
366 temp1 = vec_madd(temp2, costabv1r, vczero); | |
367 vec_st(temp1, 48, b1); | |
368 | |
369 temp1 = vec_add(b2v4, vec_perm(b2v5, b2v5, reverse)); | |
370 vec_st(temp1, 64, b1); | |
371 temp2 = vec_sub(vec_perm(b2v4, b2v4, reverse), b2v5); | |
372 temp1 = vec_madd(temp2, costabv1r, vczero); | |
373 vec_st(temp1, 80, b1); | |
374 | |
375 temp1 = vec_add(b2v6, vec_perm(b2v7, b2v7, reverse)); | |
376 vec_st(temp1, 96, b1); | |
377 temp2 = vec_sub(b2v7, vec_perm(b2v6, b2v6, reverse)); | |
378 temp1 = vec_madd(temp2, costabv1r, vczero); | |
379 vec_st(temp1, 112, b1); | |
380 | |
381 } | |
382 } | |
383 } | |
384 | |
385 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
386 | |
387 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
388 register real const cos0 = mp3lib_pnts[3][0]; |
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
389 register real const cos1 = mp3lib_pnts[3][1]; |
9002 | 390 |
391 b2[0x00] = b1[0x00] + b1[0x03]; | |
392 b2[0x01] = b1[0x01] + b1[0x02]; | |
393 b2[0x02] = (b1[0x01] - b1[0x02]) * cos1; | |
394 b2[0x03] = (b1[0x00] - b1[0x03]) * cos0; | |
395 b2[0x04] = b1[0x04] + b1[0x07]; | |
396 b2[0x05] = b1[0x05] + b1[0x06]; | |
397 b2[0x06] = (b1[0x06] - b1[0x05]) * cos1; | |
398 b2[0x07] = (b1[0x07] - b1[0x04]) * cos0; | |
399 b2[0x08] = b1[0x08] + b1[0x0B]; | |
400 b2[0x09] = b1[0x09] + b1[0x0A]; | |
401 b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1; | |
402 b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0; | |
403 b2[0x0C] = b1[0x0C] + b1[0x0F]; | |
404 b2[0x0D] = b1[0x0D] + b1[0x0E]; | |
405 b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1; | |
406 b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0; | |
407 b2[0x10] = b1[0x10] + b1[0x13]; | |
408 b2[0x11] = b1[0x11] + b1[0x12]; | |
409 b2[0x12] = (b1[0x11] - b1[0x12]) * cos1; | |
410 b2[0x13] = (b1[0x10] - b1[0x13]) * cos0; | |
411 b2[0x14] = b1[0x14] + b1[0x17]; | |
412 b2[0x15] = b1[0x15] + b1[0x16]; | |
413 b2[0x16] = (b1[0x16] - b1[0x15]) * cos1; | |
414 b2[0x17] = (b1[0x17] - b1[0x14]) * cos0; | |
415 b2[0x18] = b1[0x18] + b1[0x1B]; | |
416 b2[0x19] = b1[0x19] + b1[0x1A]; | |
417 b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1; | |
418 b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0; | |
419 b2[0x1C] = b1[0x1C] + b1[0x1F]; | |
420 b2[0x1D] = b1[0x1D] + b1[0x1E]; | |
421 b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1; | |
422 b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0; | |
423 } | |
424 | |
425 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
426 register real const cos0 = mp3lib_pnts[4][0]; |
9002 | 427 |
428 b1[0x00] = b2[0x00] + b2[0x01]; | |
429 b1[0x01] = (b2[0x00] - b2[0x01]) * cos0; | |
430 b1[0x02] = b2[0x02] + b2[0x03]; | |
431 b1[0x03] = (b2[0x03] - b2[0x02]) * cos0; | |
432 b1[0x02] += b1[0x03]; | |
433 | |
434 b1[0x04] = b2[0x04] + b2[0x05]; | |
435 b1[0x05] = (b2[0x04] - b2[0x05]) * cos0; | |
436 b1[0x06] = b2[0x06] + b2[0x07]; | |
437 b1[0x07] = (b2[0x07] - b2[0x06]) * cos0; | |
438 b1[0x06] += b1[0x07]; | |
439 b1[0x04] += b1[0x06]; | |
440 b1[0x06] += b1[0x05]; | |
441 b1[0x05] += b1[0x07]; | |
442 | |
443 b1[0x08] = b2[0x08] + b2[0x09]; | |
444 b1[0x09] = (b2[0x08] - b2[0x09]) * cos0; | |
445 b1[0x0A] = b2[0x0A] + b2[0x0B]; | |
446 b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0; | |
447 b1[0x0A] += b1[0x0B]; | |
448 | |
449 b1[0x0C] = b2[0x0C] + b2[0x0D]; | |
450 b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0; | |
451 b1[0x0E] = b2[0x0E] + b2[0x0F]; | |
452 b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0; | |
453 b1[0x0E] += b1[0x0F]; | |
454 b1[0x0C] += b1[0x0E]; | |
455 b1[0x0E] += b1[0x0D]; | |
456 b1[0x0D] += b1[0x0F]; | |
457 | |
458 b1[0x10] = b2[0x10] + b2[0x11]; | |
459 b1[0x11] = (b2[0x10] - b2[0x11]) * cos0; | |
460 b1[0x12] = b2[0x12] + b2[0x13]; | |
461 b1[0x13] = (b2[0x13] - b2[0x12]) * cos0; | |
462 b1[0x12] += b1[0x13]; | |
463 | |
464 b1[0x14] = b2[0x14] + b2[0x15]; | |
465 b1[0x15] = (b2[0x14] - b2[0x15]) * cos0; | |
466 b1[0x16] = b2[0x16] + b2[0x17]; | |
467 b1[0x17] = (b2[0x17] - b2[0x16]) * cos0; | |
468 b1[0x16] += b1[0x17]; | |
469 b1[0x14] += b1[0x16]; | |
470 b1[0x16] += b1[0x15]; | |
471 b1[0x15] += b1[0x17]; | |
472 | |
473 b1[0x18] = b2[0x18] + b2[0x19]; | |
474 b1[0x19] = (b2[0x18] - b2[0x19]) * cos0; | |
475 b1[0x1A] = b2[0x1A] + b2[0x1B]; | |
476 b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0; | |
477 b1[0x1A] += b1[0x1B]; | |
478 | |
479 b1[0x1C] = b2[0x1C] + b2[0x1D]; | |
480 b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0; | |
481 b1[0x1E] = b2[0x1E] + b2[0x1F]; | |
482 b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0; | |
483 b1[0x1E] += b1[0x1F]; | |
484 b1[0x1C] += b1[0x1E]; | |
485 b1[0x1E] += b1[0x1D]; | |
486 b1[0x1D] += b1[0x1F]; | |
487 } | |
488 | |
489 out0[0x10*16] = b1[0x00]; | |
490 out0[0x10*12] = b1[0x04]; | |
491 out0[0x10* 8] = b1[0x02]; | |
492 out0[0x10* 4] = b1[0x06]; | |
493 out0[0x10* 0] = b1[0x01]; | |
494 out1[0x10* 0] = b1[0x01]; | |
495 out1[0x10* 4] = b1[0x05]; | |
496 out1[0x10* 8] = b1[0x03]; | |
497 out1[0x10*12] = b1[0x07]; | |
498 | |
499 b1[0x08] += b1[0x0C]; | |
500 out0[0x10*14] = b1[0x08]; | |
501 b1[0x0C] += b1[0x0a]; | |
502 out0[0x10*10] = b1[0x0C]; | |
503 b1[0x0A] += b1[0x0E]; | |
504 out0[0x10* 6] = b1[0x0A]; | |
505 b1[0x0E] += b1[0x09]; | |
506 out0[0x10* 2] = b1[0x0E]; | |
507 b1[0x09] += b1[0x0D]; | |
508 out1[0x10* 2] = b1[0x09]; | |
509 b1[0x0D] += b1[0x0B]; | |
510 out1[0x10* 6] = b1[0x0D]; | |
511 b1[0x0B] += b1[0x0F]; | |
512 out1[0x10*10] = b1[0x0B]; | |
513 out1[0x10*14] = b1[0x0F]; | |
514 | |
515 b1[0x18] += b1[0x1C]; | |
516 out0[0x10*15] = b1[0x10] + b1[0x18]; | |
517 out0[0x10*13] = b1[0x18] + b1[0x14]; | |
518 b1[0x1C] += b1[0x1a]; | |
519 out0[0x10*11] = b1[0x14] + b1[0x1C]; | |
520 out0[0x10* 9] = b1[0x1C] + b1[0x12]; | |
521 b1[0x1A] += b1[0x1E]; | |
522 out0[0x10* 7] = b1[0x12] + b1[0x1A]; | |
523 out0[0x10* 5] = b1[0x1A] + b1[0x16]; | |
524 b1[0x1E] += b1[0x19]; | |
525 out0[0x10* 3] = b1[0x16] + b1[0x1E]; | |
526 out0[0x10* 1] = b1[0x1E] + b1[0x11]; | |
527 b1[0x19] += b1[0x1D]; | |
528 out1[0x10* 1] = b1[0x11] + b1[0x19]; | |
529 out1[0x10* 3] = b1[0x19] + b1[0x15]; | |
530 b1[0x1D] += b1[0x1B]; | |
531 out1[0x10* 5] = b1[0x15] + b1[0x1D]; | |
532 out1[0x10* 7] = b1[0x1D] + b1[0x13]; | |
533 b1[0x1B] += b1[0x1F]; | |
534 out1[0x10* 9] = b1[0x13] + b1[0x1B]; | |
535 out1[0x10*11] = b1[0x1B] + b1[0x17]; | |
536 out1[0x10*13] = b1[0x17] + b1[0x1F]; | |
537 out1[0x10*15] = b1[0x1F]; | |
538 } | |
539 | |
9122 | 540 #endif /* HAVE_ALTIVEC */ |
9002 | 541 |