Mercurial > mplayer.hg
annotate mp3lib/dct64_altivec.c @ 34234:4ec96d5d2e4c
build: drop releaseclean target
The target is supposed to remove files that are created during the XML build
process without removing the generated documentation. Unfortunately, it does
not work as expected and is not worth the extra complication.
author | diego |
---|---|
date | Mon, 07 Nov 2011 19:54:38 +0000 |
parents | 347d152a5cfa |
children |
rev | line source |
---|---|
9002 | 1 |
2 /* | |
11980 | 3 * Discrete Cosine Tansform (DCT) for Altivec |
4 * Copyright (c) 2004 Romain Dolbeau <romain@dolbeau.org> | |
5 * based upon code from "mp3lib/dct64.c" | |
18848 | 6 * This file is free software; you can redistribute it and/or |
7 * modify it under the terms of the GNU Lesser General Public License | |
9002 | 8 */ |
9 | |
25341 | 10 #include <stdio.h> |
9002 | 11 #include "mpg123.h" |
12 | |
25328
6f0309e575e0
There is a check for altivec.h in configure so use the preprocessor directive
diego
parents:
25327
diff
changeset
|
13 #ifdef HAVE_ALTIVEC_H |
9122 | 14 #include <altivec.h> |
15 #endif | |
16 | |
9002 | 17 // used to build registers permutation vectors (vcprm) |
18 // the 's' are for words in the _s_econd vector | |
19 #define WORD_0 0x00,0x01,0x02,0x03 | |
20 #define WORD_1 0x04,0x05,0x06,0x07 | |
21 #define WORD_2 0x08,0x09,0x0a,0x0b | |
22 #define WORD_3 0x0c,0x0d,0x0e,0x0f | |
23 #define WORD_s0 0x10,0x11,0x12,0x13 | |
24 #define WORD_s1 0x14,0x15,0x16,0x17 | |
25 #define WORD_s2 0x18,0x19,0x1a,0x1b | |
26 #define WORD_s3 0x1c,0x1d,0x1e,0x1f | |
27 | |
27318
bb5ed9aa34fc
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
26895
diff
changeset
|
28 #define vcprm(a,b,c,d) (const vector unsigned char){WORD_ ## a, WORD_ ## b, WORD_ ## c, WORD_ ## d} |
bb5ed9aa34fc
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
26895
diff
changeset
|
29 #define vcii(a,b,c,d) (const vector float){FLOAT_ ## a, FLOAT_ ## b, FLOAT_ ## c, FLOAT_ ## d} |
9002 | 30 |
27318
bb5ed9aa34fc
Remove AltiVec vector declaration compiler compatibility macros.
diego
parents:
26895
diff
changeset
|
31 #define FOUROF(a) {a,a,a,a} |
25996 | 32 |
9002 | 33 // vcprmle is used to keep the same index as in the SSE version. |
34 // it's the same as vcprm, with the index inversed | |
35 // ('le' is Little Endian) | |
36 #define vcprmle(a,b,c,d) vcprm(d,c,b,a) | |
37 | |
38 // used to build inverse/identity vectors (vcii) | |
39 // n is _n_egative, p is _p_ositive | |
40 #define FLOAT_n -1. | |
41 #define FLOAT_p 1. | |
42 | |
43 void dct64_altivec(real *a,real *b,real *c) | |
44 { | |
45 real __attribute__ ((aligned(16))) b1[0x20]; | |
46 real __attribute__ ((aligned(16))) b2[0x20]; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
47 |
9002 | 48 real *out0 = a; |
49 real *out1 = b; | |
50 real *samples = c; | |
51 | |
9122 | 52 const vector float vczero = (const vector float)FOUROF(0.); |
9002 | 53 const vector unsigned char reverse = (const vector unsigned char)vcprm(3,2,1,0); |
54 | |
55 | |
56 if (((unsigned long)b1 & 0x0000000F) || | |
57 ((unsigned long)b2 & 0x0000000F)) | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
58 |
9002 | 59 { |
60 printf("MISALIGNED:\t%p\t%p\t%p\t%p\t%p\n", | |
61 b1, b2, a, b, samples); | |
62 } | |
63 | |
64 | |
65 #ifdef ALTIVEC_USE_REFERENCE_C_CODE | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
66 |
9002 | 67 { |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
68 register real *costab = mp3lib_pnts[0]; |
9002 | 69 |
70 b1[0x00] = samples[0x00] + samples[0x1F]; | |
71 b1[0x01] = samples[0x01] + samples[0x1E]; | |
72 b1[0x02] = samples[0x02] + samples[0x1D]; | |
73 b1[0x03] = samples[0x03] + samples[0x1C]; | |
74 b1[0x04] = samples[0x04] + samples[0x1B]; | |
75 b1[0x05] = samples[0x05] + samples[0x1A]; | |
76 b1[0x06] = samples[0x06] + samples[0x19]; | |
77 b1[0x07] = samples[0x07] + samples[0x18]; | |
78 b1[0x08] = samples[0x08] + samples[0x17]; | |
79 b1[0x09] = samples[0x09] + samples[0x16]; | |
80 b1[0x0A] = samples[0x0A] + samples[0x15]; | |
81 b1[0x0B] = samples[0x0B] + samples[0x14]; | |
82 b1[0x0C] = samples[0x0C] + samples[0x13]; | |
83 b1[0x0D] = samples[0x0D] + samples[0x12]; | |
84 b1[0x0E] = samples[0x0E] + samples[0x11]; | |
85 b1[0x0F] = samples[0x0F] + samples[0x10]; | |
86 b1[0x10] = (samples[0x0F] - samples[0x10]) * costab[0xF]; | |
87 b1[0x11] = (samples[0x0E] - samples[0x11]) * costab[0xE]; | |
88 b1[0x12] = (samples[0x0D] - samples[0x12]) * costab[0xD]; | |
89 b1[0x13] = (samples[0x0C] - samples[0x13]) * costab[0xC]; | |
90 b1[0x14] = (samples[0x0B] - samples[0x14]) * costab[0xB]; | |
91 b1[0x15] = (samples[0x0A] - samples[0x15]) * costab[0xA]; | |
92 b1[0x16] = (samples[0x09] - samples[0x16]) * costab[0x9]; | |
93 b1[0x17] = (samples[0x08] - samples[0x17]) * costab[0x8]; | |
94 b1[0x18] = (samples[0x07] - samples[0x18]) * costab[0x7]; | |
95 b1[0x19] = (samples[0x06] - samples[0x19]) * costab[0x6]; | |
96 b1[0x1A] = (samples[0x05] - samples[0x1A]) * costab[0x5]; | |
97 b1[0x1B] = (samples[0x04] - samples[0x1B]) * costab[0x4]; | |
98 b1[0x1C] = (samples[0x03] - samples[0x1C]) * costab[0x3]; | |
99 b1[0x1D] = (samples[0x02] - samples[0x1D]) * costab[0x2]; | |
100 b1[0x1E] = (samples[0x01] - samples[0x1E]) * costab[0x1]; | |
101 b1[0x1F] = (samples[0x00] - samples[0x1F]) * costab[0x0]; | |
102 | |
103 } | |
104 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
105 register real *costab = mp3lib_pnts[1]; |
9002 | 106 |
107 b2[0x00] = b1[0x00] + b1[0x0F]; | |
108 b2[0x01] = b1[0x01] + b1[0x0E]; | |
109 b2[0x02] = b1[0x02] + b1[0x0D]; | |
110 b2[0x03] = b1[0x03] + b1[0x0C]; | |
111 b2[0x04] = b1[0x04] + b1[0x0B]; | |
112 b2[0x05] = b1[0x05] + b1[0x0A]; | |
113 b2[0x06] = b1[0x06] + b1[0x09]; | |
114 b2[0x07] = b1[0x07] + b1[0x08]; | |
115 b2[0x08] = (b1[0x07] - b1[0x08]) * costab[7]; | |
116 b2[0x09] = (b1[0x06] - b1[0x09]) * costab[6]; | |
117 b2[0x0A] = (b1[0x05] - b1[0x0A]) * costab[5]; | |
118 b2[0x0B] = (b1[0x04] - b1[0x0B]) * costab[4]; | |
119 b2[0x0C] = (b1[0x03] - b1[0x0C]) * costab[3]; | |
120 b2[0x0D] = (b1[0x02] - b1[0x0D]) * costab[2]; | |
121 b2[0x0E] = (b1[0x01] - b1[0x0E]) * costab[1]; | |
122 b2[0x0F] = (b1[0x00] - b1[0x0F]) * costab[0]; | |
123 b2[0x10] = b1[0x10] + b1[0x1F]; | |
124 b2[0x11] = b1[0x11] + b1[0x1E]; | |
125 b2[0x12] = b1[0x12] + b1[0x1D]; | |
126 b2[0x13] = b1[0x13] + b1[0x1C]; | |
127 b2[0x14] = b1[0x14] + b1[0x1B]; | |
128 b2[0x15] = b1[0x15] + b1[0x1A]; | |
129 b2[0x16] = b1[0x16] + b1[0x19]; | |
130 b2[0x17] = b1[0x17] + b1[0x18]; | |
131 b2[0x18] = (b1[0x18] - b1[0x17]) * costab[7]; | |
132 b2[0x19] = (b1[0x19] - b1[0x16]) * costab[6]; | |
133 b2[0x1A] = (b1[0x1A] - b1[0x15]) * costab[5]; | |
134 b2[0x1B] = (b1[0x1B] - b1[0x14]) * costab[4]; | |
135 b2[0x1C] = (b1[0x1C] - b1[0x13]) * costab[3]; | |
136 b2[0x1D] = (b1[0x1D] - b1[0x12]) * costab[2]; | |
137 b2[0x1E] = (b1[0x1E] - b1[0x11]) * costab[1]; | |
138 b2[0x1F] = (b1[0x1F] - b1[0x10]) * costab[0]; | |
139 | |
140 } | |
141 | |
142 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
143 register real *costab = mp3lib_pnts[2]; |
9002 | 144 |
145 b1[0x00] = b2[0x00] + b2[0x07]; | |
146 b1[0x01] = b2[0x01] + b2[0x06]; | |
147 b1[0x02] = b2[0x02] + b2[0x05]; | |
148 b1[0x03] = b2[0x03] + b2[0x04]; | |
149 b1[0x04] = (b2[0x03] - b2[0x04]) * costab[3]; | |
150 b1[0x05] = (b2[0x02] - b2[0x05]) * costab[2]; | |
151 b1[0x06] = (b2[0x01] - b2[0x06]) * costab[1]; | |
152 b1[0x07] = (b2[0x00] - b2[0x07]) * costab[0]; | |
153 b1[0x08] = b2[0x08] + b2[0x0F]; | |
154 b1[0x09] = b2[0x09] + b2[0x0E]; | |
155 b1[0x0A] = b2[0x0A] + b2[0x0D]; | |
156 b1[0x0B] = b2[0x0B] + b2[0x0C]; | |
157 b1[0x0C] = (b2[0x0C] - b2[0x0B]) * costab[3]; | |
158 b1[0x0D] = (b2[0x0D] - b2[0x0A]) * costab[2]; | |
159 b1[0x0E] = (b2[0x0E] - b2[0x09]) * costab[1]; | |
160 b1[0x0F] = (b2[0x0F] - b2[0x08]) * costab[0]; | |
161 b1[0x10] = b2[0x10] + b2[0x17]; | |
162 b1[0x11] = b2[0x11] + b2[0x16]; | |
163 b1[0x12] = b2[0x12] + b2[0x15]; | |
164 b1[0x13] = b2[0x13] + b2[0x14]; | |
165 b1[0x14] = (b2[0x13] - b2[0x14]) * costab[3]; | |
166 b1[0x15] = (b2[0x12] - b2[0x15]) * costab[2]; | |
167 b1[0x16] = (b2[0x11] - b2[0x16]) * costab[1]; | |
168 b1[0x17] = (b2[0x10] - b2[0x17]) * costab[0]; | |
169 b1[0x18] = b2[0x18] + b2[0x1F]; | |
170 b1[0x19] = b2[0x19] + b2[0x1E]; | |
171 b1[0x1A] = b2[0x1A] + b2[0x1D]; | |
172 b1[0x1B] = b2[0x1B] + b2[0x1C]; | |
173 b1[0x1C] = (b2[0x1C] - b2[0x1B]) * costab[3]; | |
174 b1[0x1D] = (b2[0x1D] - b2[0x1A]) * costab[2]; | |
175 b1[0x1E] = (b2[0x1E] - b2[0x19]) * costab[1]; | |
176 b1[0x1F] = (b2[0x1F] - b2[0x18]) * costab[0]; | |
177 } | |
178 | |
179 #else /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
180 | |
181 // How does it work ? | |
182 // the first three passes are reproducted in the three block below | |
183 // all computations are done on a 4 elements vector | |
184 // 'reverse' is a special perumtation vector used to reverse | |
185 // the order of the elements inside a vector. | |
186 // note that all loads/stores to b1 (b2) between passes 1 and 2 (2 and 3) | |
187 // have been removed, all elements are stored inside b1vX (b2vX) | |
188 { | |
189 register vector float | |
190 b1v0, b1v1, b1v2, b1v3, | |
191 b1v4, b1v5, b1v6, b1v7; | |
192 register vector float | |
193 temp1, temp2; | |
194 | |
195 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
196 register real *costab = mp3lib_pnts[0]; |
9002 | 197 |
198 register vector float | |
199 samplesv1, samplesv2, samplesv3, samplesv4, | |
200 samplesv5, samplesv6, samplesv7, samplesv8, | |
201 samplesv9; | |
202 register vector unsigned char samples_perm = vec_lvsl(0, samples); | |
203 register vector float costabv1, costabv2, costabv3, costabv4, costabv5; | |
204 register vector unsigned char costab_perm = vec_lvsl(0, costab); | |
205 | |
206 samplesv1 = vec_ld(0, samples); | |
207 samplesv2 = vec_ld(16, samples); | |
208 samplesv1 = vec_perm(samplesv1, samplesv2, samples_perm); | |
209 samplesv3 = vec_ld(32, samples); | |
210 samplesv2 = vec_perm(samplesv2, samplesv3, samples_perm); | |
211 samplesv4 = vec_ld(48, samples); | |
212 samplesv3 = vec_perm(samplesv3, samplesv4, samples_perm); | |
213 samplesv5 = vec_ld(64, samples); | |
214 samplesv4 = vec_perm(samplesv4, samplesv5, samples_perm); | |
215 samplesv6 = vec_ld(80, samples); | |
216 samplesv5 = vec_perm(samplesv5, samplesv6, samples_perm); | |
217 samplesv7 = vec_ld(96, samples); | |
218 samplesv6 = vec_perm(samplesv6, samplesv7, samples_perm); | |
219 samplesv8 = vec_ld(112, samples); | |
220 samplesv7 = vec_perm(samplesv7, samplesv8, samples_perm); | |
221 samplesv9 = vec_ld(128, samples); | |
222 samplesv8 = vec_perm(samplesv8, samplesv9, samples_perm); | |
223 | |
224 temp1 = vec_add(samplesv1, | |
225 vec_perm(samplesv8, samplesv8, reverse)); | |
226 //vec_st(temp1, 0, b1); | |
227 b1v0 = temp1; | |
228 temp1 = vec_add(samplesv2, | |
229 vec_perm(samplesv7, samplesv7, reverse)); | |
230 //vec_st(temp1, 16, b1); | |
231 b1v1 = temp1; | |
232 temp1 = vec_add(samplesv3, | |
233 vec_perm(samplesv6, samplesv6, reverse)); | |
234 //vec_st(temp1, 32, b1); | |
235 b1v2 = temp1; | |
236 temp1 = vec_add(samplesv4, | |
237 vec_perm(samplesv5, samplesv5, reverse)); | |
238 //vec_st(temp1, 48, b1); | |
239 b1v3 = temp1; | |
240 | |
241 costabv1 = vec_ld(0, costab); | |
242 costabv2 = vec_ld(16, costab); | |
243 costabv1 = vec_perm(costabv1, costabv2, costab_perm); | |
244 costabv3 = vec_ld(32, costab); | |
245 costabv2 = vec_perm(costabv2, costabv3, costab_perm); | |
246 costabv4 = vec_ld(48, costab); | |
247 costabv3 = vec_perm(costabv3, costabv4, costab_perm); | |
248 costabv5 = vec_ld(64, costab); | |
249 costabv4 = vec_perm(costabv4, costabv5, costab_perm); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
250 |
9002 | 251 temp1 = vec_sub(vec_perm(samplesv4, samplesv4, reverse), |
252 samplesv5); | |
253 temp2 = vec_madd(temp1, | |
254 vec_perm(costabv4, costabv4, reverse), | |
255 vczero); | |
256 //vec_st(temp2, 64, b1); | |
257 b1v4 = temp2; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
258 |
9002 | 259 temp1 = vec_sub(vec_perm(samplesv3, samplesv3, reverse), |
260 samplesv6); | |
261 temp2 = vec_madd(temp1, | |
262 vec_perm(costabv3, costabv3, reverse), | |
263 vczero); | |
264 //vec_st(temp2, 80, b1); | |
265 b1v5 = temp2; | |
266 temp1 = vec_sub(vec_perm(samplesv2, samplesv2, reverse), | |
267 samplesv7); | |
268 temp2 = vec_madd(temp1, | |
269 vec_perm(costabv2, costabv2, reverse), | |
270 vczero); | |
271 //vec_st(temp2, 96, b1); | |
272 b1v6 = temp2; | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
273 |
9002 | 274 temp1 = vec_sub(vec_perm(samplesv1, samplesv1, reverse), |
275 samplesv8); | |
276 temp2 = vec_madd(temp1, | |
277 vec_perm(costabv1, costabv1, reverse), | |
278 vczero); | |
279 //vec_st(temp2, 112, b1); | |
280 b1v7 = temp2; | |
281 | |
282 } | |
283 | |
284 { | |
285 register vector float | |
286 b2v0, b2v1, b2v2, b2v3, | |
287 b2v4, b2v5, b2v6, b2v7; | |
288 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
289 register real *costab = mp3lib_pnts[1]; |
9002 | 290 register vector float costabv1r, costabv2r, costabv1, costabv2, costabv3; |
291 register vector unsigned char costab_perm = vec_lvsl(0, costab); | |
292 | |
293 costabv1 = vec_ld(0, costab); | |
294 costabv2 = vec_ld(16, costab); | |
295 costabv1 = vec_perm(costabv1, costabv2, costab_perm); | |
296 costabv3 = vec_ld(32, costab); | |
297 costabv2 = vec_perm(costabv2, costabv3 , costab_perm); | |
298 costabv1r = vec_perm(costabv1, costabv1, reverse); | |
299 costabv2r = vec_perm(costabv2, costabv2, reverse); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
300 |
9002 | 301 temp1 = vec_add(b1v0, vec_perm(b1v3, b1v3, reverse)); |
302 //vec_st(temp1, 0, b2); | |
303 b2v0 = temp1; | |
304 temp1 = vec_add(b1v1, vec_perm(b1v2, b1v2, reverse)); | |
305 //vec_st(temp1, 16, b2); | |
306 b2v1 = temp1; | |
307 temp2 = vec_sub(vec_perm(b1v1, b1v1, reverse), b1v2); | |
308 temp1 = vec_madd(temp2, costabv2r, vczero); | |
309 //vec_st(temp1, 32, b2); | |
310 b2v2 = temp1; | |
311 temp2 = vec_sub(vec_perm(b1v0, b1v0, reverse), b1v3); | |
312 temp1 = vec_madd(temp2, costabv1r, vczero); | |
313 //vec_st(temp1, 48, b2); | |
314 b2v3 = temp1; | |
315 temp1 = vec_add(b1v4, vec_perm(b1v7, b1v7, reverse)); | |
316 //vec_st(temp1, 64, b2); | |
317 b2v4 = temp1; | |
318 temp1 = vec_add(b1v5, vec_perm(b1v6, b1v6, reverse)); | |
319 //vec_st(temp1, 80, b2); | |
320 b2v5 = temp1; | |
321 temp2 = vec_sub(b1v6, vec_perm(b1v5, b1v5, reverse)); | |
322 temp1 = vec_madd(temp2, costabv2r, vczero); | |
323 //vec_st(temp1, 96, b2); | |
324 b2v6 = temp1; | |
325 temp2 = vec_sub(b1v7, vec_perm(b1v4, b1v4, reverse)); | |
326 temp1 = vec_madd(temp2, costabv1r, vczero); | |
327 //vec_st(temp1, 112, b2); | |
328 b2v7 = temp1; | |
329 } | |
330 | |
331 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
332 register real *costab = mp3lib_pnts[2]; |
9002 | 333 |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
334 |
9002 | 335 vector float costabv1r, costabv1, costabv2; |
336 vector unsigned char costab_perm = vec_lvsl(0, costab); | |
337 | |
338 costabv1 = vec_ld(0, costab); | |
339 costabv2 = vec_ld(16, costab); | |
340 costabv1 = vec_perm(costabv1, costabv2, costab_perm); | |
341 costabv1r = vec_perm(costabv1, costabv1, reverse); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
342 |
9002 | 343 temp1 = vec_add(b2v0, vec_perm(b2v1, b2v1, reverse)); |
344 vec_st(temp1, 0, b1); | |
345 temp2 = vec_sub(vec_perm(b2v0, b2v0, reverse), b2v1); | |
346 temp1 = vec_madd(temp2, costabv1r, vczero); | |
347 vec_st(temp1, 16, b1); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
348 |
9002 | 349 temp1 = vec_add(b2v2, vec_perm(b2v3, b2v3, reverse)); |
350 vec_st(temp1, 32, b1); | |
351 temp2 = vec_sub(b2v3, vec_perm(b2v2, b2v2, reverse)); | |
352 temp1 = vec_madd(temp2, costabv1r, vczero); | |
353 vec_st(temp1, 48, b1); | |
354 | |
355 temp1 = vec_add(b2v4, vec_perm(b2v5, b2v5, reverse)); | |
356 vec_st(temp1, 64, b1); | |
357 temp2 = vec_sub(vec_perm(b2v4, b2v4, reverse), b2v5); | |
358 temp1 = vec_madd(temp2, costabv1r, vczero); | |
359 vec_st(temp1, 80, b1); | |
360 | |
361 temp1 = vec_add(b2v6, vec_perm(b2v7, b2v7, reverse)); | |
362 vec_st(temp1, 96, b1); | |
363 temp2 = vec_sub(b2v7, vec_perm(b2v6, b2v6, reverse)); | |
364 temp1 = vec_madd(temp2, costabv1r, vczero); | |
365 vec_st(temp1, 112, b1); | |
29263
0f1b5b68af32
whitespace cosmetics: Remove all trailing whitespace.
diego
parents:
27318
diff
changeset
|
366 |
9002 | 367 } |
368 } | |
369 } | |
370 | |
371 #endif /* ALTIVEC_USE_REFERENCE_C_CODE */ | |
372 | |
373 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
374 register real const cos0 = mp3lib_pnts[3][0]; |
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
375 register real const cos1 = mp3lib_pnts[3][1]; |
9002 | 376 |
377 b2[0x00] = b1[0x00] + b1[0x03]; | |
378 b2[0x01] = b1[0x01] + b1[0x02]; | |
379 b2[0x02] = (b1[0x01] - b1[0x02]) * cos1; | |
380 b2[0x03] = (b1[0x00] - b1[0x03]) * cos0; | |
381 b2[0x04] = b1[0x04] + b1[0x07]; | |
382 b2[0x05] = b1[0x05] + b1[0x06]; | |
383 b2[0x06] = (b1[0x06] - b1[0x05]) * cos1; | |
384 b2[0x07] = (b1[0x07] - b1[0x04]) * cos0; | |
385 b2[0x08] = b1[0x08] + b1[0x0B]; | |
386 b2[0x09] = b1[0x09] + b1[0x0A]; | |
387 b2[0x0A] = (b1[0x09] - b1[0x0A]) * cos1; | |
388 b2[0x0B] = (b1[0x08] - b1[0x0B]) * cos0; | |
389 b2[0x0C] = b1[0x0C] + b1[0x0F]; | |
390 b2[0x0D] = b1[0x0D] + b1[0x0E]; | |
391 b2[0x0E] = (b1[0x0E] - b1[0x0D]) * cos1; | |
392 b2[0x0F] = (b1[0x0F] - b1[0x0C]) * cos0; | |
393 b2[0x10] = b1[0x10] + b1[0x13]; | |
394 b2[0x11] = b1[0x11] + b1[0x12]; | |
395 b2[0x12] = (b1[0x11] - b1[0x12]) * cos1; | |
396 b2[0x13] = (b1[0x10] - b1[0x13]) * cos0; | |
397 b2[0x14] = b1[0x14] + b1[0x17]; | |
398 b2[0x15] = b1[0x15] + b1[0x16]; | |
399 b2[0x16] = (b1[0x16] - b1[0x15]) * cos1; | |
400 b2[0x17] = (b1[0x17] - b1[0x14]) * cos0; | |
401 b2[0x18] = b1[0x18] + b1[0x1B]; | |
402 b2[0x19] = b1[0x19] + b1[0x1A]; | |
403 b2[0x1A] = (b1[0x19] - b1[0x1A]) * cos1; | |
404 b2[0x1B] = (b1[0x18] - b1[0x1B]) * cos0; | |
405 b2[0x1C] = b1[0x1C] + b1[0x1F]; | |
406 b2[0x1D] = b1[0x1D] + b1[0x1E]; | |
407 b2[0x1E] = (b1[0x1E] - b1[0x1D]) * cos1; | |
408 b2[0x1F] = (b1[0x1F] - b1[0x1C]) * cos0; | |
409 } | |
410 | |
411 { | |
12131
d155623271e3
fix symbol clashes when linking with libmp3lame including mp3 decoder, man, mp3lib is so much bloated
alex
parents:
11980
diff
changeset
|
412 register real const cos0 = mp3lib_pnts[4][0]; |
9002 | 413 |
414 b1[0x00] = b2[0x00] + b2[0x01]; | |
415 b1[0x01] = (b2[0x00] - b2[0x01]) * cos0; | |
416 b1[0x02] = b2[0x02] + b2[0x03]; | |
417 b1[0x03] = (b2[0x03] - b2[0x02]) * cos0; | |
418 b1[0x02] += b1[0x03]; | |
419 | |
420 b1[0x04] = b2[0x04] + b2[0x05]; | |
421 b1[0x05] = (b2[0x04] - b2[0x05]) * cos0; | |
422 b1[0x06] = b2[0x06] + b2[0x07]; | |
423 b1[0x07] = (b2[0x07] - b2[0x06]) * cos0; | |
424 b1[0x06] += b1[0x07]; | |
425 b1[0x04] += b1[0x06]; | |
426 b1[0x06] += b1[0x05]; | |
427 b1[0x05] += b1[0x07]; | |
428 | |
429 b1[0x08] = b2[0x08] + b2[0x09]; | |
430 b1[0x09] = (b2[0x08] - b2[0x09]) * cos0; | |
431 b1[0x0A] = b2[0x0A] + b2[0x0B]; | |
432 b1[0x0B] = (b2[0x0B] - b2[0x0A]) * cos0; | |
433 b1[0x0A] += b1[0x0B]; | |
434 | |
435 b1[0x0C] = b2[0x0C] + b2[0x0D]; | |
436 b1[0x0D] = (b2[0x0C] - b2[0x0D]) * cos0; | |
437 b1[0x0E] = b2[0x0E] + b2[0x0F]; | |
438 b1[0x0F] = (b2[0x0F] - b2[0x0E]) * cos0; | |
439 b1[0x0E] += b1[0x0F]; | |
440 b1[0x0C] += b1[0x0E]; | |
441 b1[0x0E] += b1[0x0D]; | |
442 b1[0x0D] += b1[0x0F]; | |
443 | |
444 b1[0x10] = b2[0x10] + b2[0x11]; | |
445 b1[0x11] = (b2[0x10] - b2[0x11]) * cos0; | |
446 b1[0x12] = b2[0x12] + b2[0x13]; | |
447 b1[0x13] = (b2[0x13] - b2[0x12]) * cos0; | |
448 b1[0x12] += b1[0x13]; | |
449 | |
450 b1[0x14] = b2[0x14] + b2[0x15]; | |
451 b1[0x15] = (b2[0x14] - b2[0x15]) * cos0; | |
452 b1[0x16] = b2[0x16] + b2[0x17]; | |
453 b1[0x17] = (b2[0x17] - b2[0x16]) * cos0; | |
454 b1[0x16] += b1[0x17]; | |
455 b1[0x14] += b1[0x16]; | |
456 b1[0x16] += b1[0x15]; | |
457 b1[0x15] += b1[0x17]; | |
458 | |
459 b1[0x18] = b2[0x18] + b2[0x19]; | |
460 b1[0x19] = (b2[0x18] - b2[0x19]) * cos0; | |
461 b1[0x1A] = b2[0x1A] + b2[0x1B]; | |
462 b1[0x1B] = (b2[0x1B] - b2[0x1A]) * cos0; | |
463 b1[0x1A] += b1[0x1B]; | |
464 | |
465 b1[0x1C] = b2[0x1C] + b2[0x1D]; | |
466 b1[0x1D] = (b2[0x1C] - b2[0x1D]) * cos0; | |
467 b1[0x1E] = b2[0x1E] + b2[0x1F]; | |
468 b1[0x1F] = (b2[0x1F] - b2[0x1E]) * cos0; | |
469 b1[0x1E] += b1[0x1F]; | |
470 b1[0x1C] += b1[0x1E]; | |
471 b1[0x1E] += b1[0x1D]; | |
472 b1[0x1D] += b1[0x1F]; | |
473 } | |
474 | |
475 out0[0x10*16] = b1[0x00]; | |
476 out0[0x10*12] = b1[0x04]; | |
477 out0[0x10* 8] = b1[0x02]; | |
478 out0[0x10* 4] = b1[0x06]; | |
479 out0[0x10* 0] = b1[0x01]; | |
480 out1[0x10* 0] = b1[0x01]; | |
481 out1[0x10* 4] = b1[0x05]; | |
482 out1[0x10* 8] = b1[0x03]; | |
483 out1[0x10*12] = b1[0x07]; | |
484 | |
485 b1[0x08] += b1[0x0C]; | |
486 out0[0x10*14] = b1[0x08]; | |
487 b1[0x0C] += b1[0x0a]; | |
488 out0[0x10*10] = b1[0x0C]; | |
489 b1[0x0A] += b1[0x0E]; | |
490 out0[0x10* 6] = b1[0x0A]; | |
491 b1[0x0E] += b1[0x09]; | |
492 out0[0x10* 2] = b1[0x0E]; | |
493 b1[0x09] += b1[0x0D]; | |
494 out1[0x10* 2] = b1[0x09]; | |
495 b1[0x0D] += b1[0x0B]; | |
496 out1[0x10* 6] = b1[0x0D]; | |
497 b1[0x0B] += b1[0x0F]; | |
498 out1[0x10*10] = b1[0x0B]; | |
499 out1[0x10*14] = b1[0x0F]; | |
500 | |
501 b1[0x18] += b1[0x1C]; | |
502 out0[0x10*15] = b1[0x10] + b1[0x18]; | |
503 out0[0x10*13] = b1[0x18] + b1[0x14]; | |
504 b1[0x1C] += b1[0x1a]; | |
505 out0[0x10*11] = b1[0x14] + b1[0x1C]; | |
506 out0[0x10* 9] = b1[0x1C] + b1[0x12]; | |
507 b1[0x1A] += b1[0x1E]; | |
508 out0[0x10* 7] = b1[0x12] + b1[0x1A]; | |
509 out0[0x10* 5] = b1[0x1A] + b1[0x16]; | |
510 b1[0x1E] += b1[0x19]; | |
511 out0[0x10* 3] = b1[0x16] + b1[0x1E]; | |
512 out0[0x10* 1] = b1[0x1E] + b1[0x11]; | |
513 b1[0x19] += b1[0x1D]; | |
514 out1[0x10* 1] = b1[0x11] + b1[0x19]; | |
515 out1[0x10* 3] = b1[0x19] + b1[0x15]; | |
516 b1[0x1D] += b1[0x1B]; | |
517 out1[0x10* 5] = b1[0x15] + b1[0x1D]; | |
518 out1[0x10* 7] = b1[0x1D] + b1[0x13]; | |
519 b1[0x1B] += b1[0x1F]; | |
520 out1[0x10* 9] = b1[0x13] + b1[0x1B]; | |
521 out1[0x10*11] = b1[0x1B] + b1[0x17]; | |
522 out1[0x10*13] = b1[0x17] + b1[0x1F]; | |
523 out1[0x10*15] = b1[0x1F]; | |
524 } |