Mercurial > mplayer.hg
annotate liba52/downmix.c @ 18646:80df1b8c3759
s/CVS/Subversion/
author | diego |
---|---|
date | Thu, 08 Jun 2006 10:09:04 +0000 |
parents | 7b408d60de9e |
children | 4bad7f00556e |
rev | line source |
---|---|
3394 | 1 /* |
2 * downmix.c | |
3 * Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org> | |
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> | |
5 * | |
6 * This file is part of a52dec, a free ATSC A-52 stream decoder. | |
7 * See http://liba52.sourceforge.net/ for updates. | |
8 * | |
14991
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12137
diff
changeset
|
9 * Modified for use with MPlayer, changes contained in liba52_changes.diff. |
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12137
diff
changeset
|
10 * detailed CVS changelog at http://www.mplayerhq.hu/cgi-bin/cvsweb.cgi/main/ |
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12137
diff
changeset
|
11 * $Id$ |
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12137
diff
changeset
|
12 * |
3394 | 13 * a52dec is free software; you can redistribute it and/or modify |
14 * it under the terms of the GNU General Public License as published by | |
15 * the Free Software Foundation; either version 2 of the License, or | |
16 * (at your option) any later version. | |
17 * | |
18 * a52dec is distributed in the hope that it will be useful, | |
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 * GNU General Public License for more details. | |
22 * | |
23 * You should have received a copy of the GNU General Public License | |
24 * along with this program; if not, write to the Free Software | |
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
3625 | 26 * |
27 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | |
3394 | 28 */ |
29 | |
30 #include "config.h" | |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
31 #include "asmalign.h" |
3394 | 32 |
33 #include <string.h> | |
34 #include <inttypes.h> | |
35 | |
36 #include "a52.h" | |
37 #include "a52_internal.h" | |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
38 #include "mm_accel.h" |
3394 | 39 |
40 #define CONVERT(acmod,output) (((output) << 3) + (acmod)) | |
41 | |
3904 | 42 |
43 void (*downmix)(sample_t * samples, int acmod, int output, sample_t bias, | |
44 sample_t clev, sample_t slev)= NULL; | |
45 void (*upmix)(sample_t * samples, int acmod, int output)= NULL; | |
46 | |
47 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
48 sample_t clev, sample_t slev); | |
4233 | 49 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, |
50 sample_t clev, sample_t slev); | |
3904 | 51 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
52 sample_t clev, sample_t slev); | |
53 static void upmix_MMX (sample_t * samples, int acmod, int output); | |
54 static void upmix_C (sample_t * samples, int acmod, int output); | |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
55 |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
56 void downmix_accel_init(uint32_t mm_accel) |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
57 { |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
58 upmix= upmix_C; |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
59 downmix= downmix_C; |
16173 | 60 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
61 if(mm_accel & MM_ACCEL_X86_MMX) upmix= upmix_MMX; |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
62 if(mm_accel & MM_ACCEL_X86_SSE) downmix= downmix_SSE; |
4233 | 63 if(mm_accel & MM_ACCEL_X86_3DNOW) downmix= downmix_3dnow; |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
64 #endif |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
65 } |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
66 |
3394 | 67 int downmix_init (int input, int flags, sample_t * level, |
68 sample_t clev, sample_t slev) | |
69 { | |
70 static uint8_t table[11][8] = { | |
71 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
72 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, | |
73 {A52_MONO, A52_MONO, A52_MONO, A52_MONO, | |
74 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
75 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
76 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, | |
77 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
78 A52_STEREO, A52_3F, A52_STEREO, A52_3F}, | |
79 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
80 A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R}, | |
81 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
82 A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R}, | |
83 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
84 A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R}, | |
85 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
86 A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R}, | |
87 {A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO, | |
88 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
89 {A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO, | |
90 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
91 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY, | |
92 A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY} | |
93 }; | |
94 int output; | |
95 | |
96 output = flags & A52_CHANNEL_MASK; | |
97 if (output > A52_DOLBY) | |
98 return -1; | |
3738 | 99 |
3394 | 100 output = table[output][input & 7]; |
101 | |
102 if ((output == A52_STEREO) && | |
103 ((input == A52_DOLBY) || ((input == A52_3F) && (clev == LEVEL_3DB)))) | |
104 output = A52_DOLBY; | |
105 | |
106 if (flags & A52_ADJUST_LEVEL) | |
107 switch (CONVERT (input & 7, output)) { | |
108 | |
109 case CONVERT (A52_3F, A52_MONO): | |
110 *level *= LEVEL_3DB / (1 + clev); | |
111 break; | |
112 | |
113 case CONVERT (A52_STEREO, A52_MONO): | |
114 case CONVERT (A52_2F2R, A52_2F1R): | |
115 case CONVERT (A52_3F2R, A52_3F1R): | |
116 level_3db: | |
117 *level *= LEVEL_3DB; | |
118 break; | |
119 | |
120 case CONVERT (A52_3F2R, A52_2F1R): | |
121 if (clev < LEVEL_PLUS3DB - 1) | |
122 goto level_3db; | |
123 /* break thru */ | |
124 case CONVERT (A52_3F, A52_STEREO): | |
125 case CONVERT (A52_3F1R, A52_2F1R): | |
126 case CONVERT (A52_3F1R, A52_2F2R): | |
127 case CONVERT (A52_3F2R, A52_2F2R): | |
128 *level /= 1 + clev; | |
129 break; | |
130 | |
131 case CONVERT (A52_2F1R, A52_MONO): | |
132 *level *= LEVEL_PLUS3DB / (2 + slev); | |
133 break; | |
134 | |
135 case CONVERT (A52_2F1R, A52_STEREO): | |
136 case CONVERT (A52_3F1R, A52_3F): | |
137 *level /= 1 + slev * LEVEL_3DB; | |
138 break; | |
139 | |
140 case CONVERT (A52_3F1R, A52_MONO): | |
141 *level *= LEVEL_3DB / (1 + clev + 0.5 * slev); | |
142 break; | |
143 | |
144 case CONVERT (A52_3F1R, A52_STEREO): | |
145 *level /= 1 + clev + slev * LEVEL_3DB; | |
146 break; | |
147 | |
148 case CONVERT (A52_2F2R, A52_MONO): | |
149 *level *= LEVEL_3DB / (1 + slev); | |
150 break; | |
151 | |
152 case CONVERT (A52_2F2R, A52_STEREO): | |
153 case CONVERT (A52_3F2R, A52_3F): | |
154 *level /= 1 + slev; | |
155 break; | |
156 | |
157 case CONVERT (A52_3F2R, A52_MONO): | |
158 *level *= LEVEL_3DB / (1 + clev + slev); | |
159 break; | |
160 | |
161 case CONVERT (A52_3F2R, A52_STEREO): | |
162 *level /= 1 + clev + slev; | |
163 break; | |
164 | |
165 case CONVERT (A52_MONO, A52_DOLBY): | |
166 *level *= LEVEL_PLUS3DB; | |
167 break; | |
168 | |
169 case CONVERT (A52_3F, A52_DOLBY): | |
170 case CONVERT (A52_2F1R, A52_DOLBY): | |
171 *level *= 1 / (1 + LEVEL_3DB); | |
172 break; | |
173 | |
174 case CONVERT (A52_3F1R, A52_DOLBY): | |
175 case CONVERT (A52_2F2R, A52_DOLBY): | |
176 *level *= 1 / (1 + 2 * LEVEL_3DB); | |
177 break; | |
178 | |
179 case CONVERT (A52_3F2R, A52_DOLBY): | |
180 *level *= 1 / (1 + 3 * LEVEL_3DB); | |
181 break; | |
182 } | |
183 return output; | |
184 } | |
185 | |
186 int downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, | |
187 sample_t clev, sample_t slev) | |
188 { | |
189 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
190 | |
191 case CONVERT (A52_CHANNEL, A52_CHANNEL): | |
192 case CONVERT (A52_MONO, A52_MONO): | |
193 case CONVERT (A52_STEREO, A52_STEREO): | |
194 case CONVERT (A52_3F, A52_3F): | |
195 case CONVERT (A52_2F1R, A52_2F1R): | |
196 case CONVERT (A52_3F1R, A52_3F1R): | |
197 case CONVERT (A52_2F2R, A52_2F2R): | |
198 case CONVERT (A52_3F2R, A52_3F2R): | |
199 case CONVERT (A52_STEREO, A52_DOLBY): | |
200 coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level; | |
201 return 0; | |
202 | |
203 case CONVERT (A52_CHANNEL, A52_MONO): | |
204 coeff[0] = coeff[1] = level * LEVEL_6DB; | |
205 return 3; | |
206 | |
207 case CONVERT (A52_STEREO, A52_MONO): | |
208 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
209 return 3; | |
210 | |
211 case CONVERT (A52_3F, A52_MONO): | |
212 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
213 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
214 return 7; | |
215 | |
216 case CONVERT (A52_2F1R, A52_MONO): | |
217 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
218 coeff[2] = level * slev * LEVEL_3DB; | |
219 return 7; | |
220 | |
221 case CONVERT (A52_2F2R, A52_MONO): | |
222 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
223 coeff[2] = coeff[3] = level * slev * LEVEL_3DB; | |
224 return 15; | |
225 | |
226 case CONVERT (A52_3F1R, A52_MONO): | |
227 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
228 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
229 coeff[3] = level * slev * LEVEL_3DB; | |
230 return 15; | |
231 | |
232 case CONVERT (A52_3F2R, A52_MONO): | |
233 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
234 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
235 coeff[3] = coeff[4] = level * slev * LEVEL_3DB; | |
236 return 31; | |
237 | |
238 case CONVERT (A52_MONO, A52_DOLBY): | |
239 coeff[0] = level * LEVEL_3DB; | |
240 return 0; | |
241 | |
242 case CONVERT (A52_3F, A52_DOLBY): | |
243 clev = LEVEL_3DB; | |
244 case CONVERT (A52_3F, A52_STEREO): | |
245 case CONVERT (A52_3F1R, A52_2F1R): | |
246 case CONVERT (A52_3F2R, A52_2F2R): | |
247 coeff[0] = coeff[2] = coeff[3] = coeff[4] = level; | |
248 coeff[1] = level * clev; | |
249 return 7; | |
250 | |
251 case CONVERT (A52_2F1R, A52_DOLBY): | |
252 slev = 1; | |
253 case CONVERT (A52_2F1R, A52_STEREO): | |
254 coeff[0] = coeff[1] = level; | |
255 coeff[2] = level * slev * LEVEL_3DB; | |
256 return 7; | |
257 | |
258 case CONVERT (A52_3F1R, A52_DOLBY): | |
259 clev = LEVEL_3DB; | |
260 slev = 1; | |
261 case CONVERT (A52_3F1R, A52_STEREO): | |
262 coeff[0] = coeff[2] = level; | |
263 coeff[1] = level * clev; | |
264 coeff[3] = level * slev * LEVEL_3DB; | |
265 return 15; | |
266 | |
267 case CONVERT (A52_2F2R, A52_DOLBY): | |
268 slev = LEVEL_3DB; | |
269 case CONVERT (A52_2F2R, A52_STEREO): | |
270 coeff[0] = coeff[1] = level; | |
271 coeff[2] = coeff[3] = level * slev; | |
272 return 15; | |
273 | |
274 case CONVERT (A52_3F2R, A52_DOLBY): | |
275 clev = LEVEL_3DB; | |
276 case CONVERT (A52_3F2R, A52_2F1R): | |
277 slev = LEVEL_3DB; | |
278 case CONVERT (A52_3F2R, A52_STEREO): | |
279 coeff[0] = coeff[2] = level; | |
280 coeff[1] = level * clev; | |
281 coeff[3] = coeff[4] = level * slev; | |
282 return 31; | |
283 | |
284 case CONVERT (A52_3F1R, A52_3F): | |
285 coeff[0] = coeff[1] = coeff[2] = level; | |
286 coeff[3] = level * slev * LEVEL_3DB; | |
287 return 13; | |
288 | |
289 case CONVERT (A52_3F2R, A52_3F): | |
290 coeff[0] = coeff[1] = coeff[2] = level; | |
291 coeff[3] = coeff[4] = level * slev; | |
292 return 29; | |
293 | |
294 case CONVERT (A52_2F2R, A52_2F1R): | |
295 coeff[0] = coeff[1] = level; | |
296 coeff[2] = coeff[3] = level * LEVEL_3DB; | |
297 return 12; | |
298 | |
299 case CONVERT (A52_3F2R, A52_3F1R): | |
300 coeff[0] = coeff[1] = coeff[2] = level; | |
301 coeff[3] = coeff[4] = level * LEVEL_3DB; | |
302 return 24; | |
303 | |
304 case CONVERT (A52_2F1R, A52_2F2R): | |
305 coeff[0] = coeff[1] = level; | |
306 coeff[2] = level * LEVEL_3DB; | |
307 return 0; | |
308 | |
309 case CONVERT (A52_3F1R, A52_2F2R): | |
310 coeff[0] = coeff[2] = level; | |
311 coeff[1] = level * clev; | |
312 coeff[3] = level * LEVEL_3DB; | |
313 return 7; | |
314 | |
315 case CONVERT (A52_3F1R, A52_3F2R): | |
316 coeff[0] = coeff[1] = coeff[2] = level; | |
317 coeff[3] = level * LEVEL_3DB; | |
318 return 0; | |
319 | |
320 case CONVERT (A52_CHANNEL, A52_CHANNEL1): | |
321 coeff[0] = level; | |
322 coeff[1] = 0; | |
323 return 0; | |
324 | |
325 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
326 coeff[0] = 0; | |
327 coeff[1] = level; | |
328 return 0; | |
329 } | |
330 | |
331 return -1; /* NOTREACHED */ | |
332 } | |
333 | |
334 static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias) | |
335 { | |
336 int i; | |
337 | |
338 for (i = 0; i < 256; i++) | |
339 dest[i] += src[i] + bias; | |
340 } | |
341 | |
342 static void mix3to1 (sample_t * samples, sample_t bias) | |
343 { | |
344 int i; | |
345 | |
346 for (i = 0; i < 256; i++) | |
347 samples[i] += samples[i + 256] + samples[i + 512] + bias; | |
348 } | |
349 | |
350 static void mix4to1 (sample_t * samples, sample_t bias) | |
351 { | |
352 int i; | |
353 | |
354 for (i = 0; i < 256; i++) | |
355 samples[i] += (samples[i + 256] + samples[i + 512] + | |
356 samples[i + 768] + bias); | |
357 } | |
358 | |
359 static void mix5to1 (sample_t * samples, sample_t bias) | |
360 { | |
361 int i; | |
362 | |
363 for (i = 0; i < 256; i++) | |
364 samples[i] += (samples[i + 256] + samples[i + 512] + | |
365 samples[i + 768] + samples[i + 1024] + bias); | |
366 } | |
367 | |
368 static void mix3to2 (sample_t * samples, sample_t bias) | |
369 { | |
370 int i; | |
371 sample_t common; | |
372 | |
373 for (i = 0; i < 256; i++) { | |
374 common = samples[i + 256] + bias; | |
375 samples[i] += common; | |
376 samples[i + 256] = samples[i + 512] + common; | |
377 } | |
378 } | |
379 | |
380 static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) | |
381 { | |
382 int i; | |
383 sample_t common; | |
384 | |
385 for (i = 0; i < 256; i++) { | |
386 common = right[i + 256] + bias; | |
387 left[i] += common; | |
388 right[i] += common; | |
389 } | |
390 } | |
391 | |
392 static void mix21toS (sample_t * samples, sample_t bias) | |
393 { | |
394 int i; | |
395 sample_t surround; | |
396 | |
397 for (i = 0; i < 256; i++) { | |
398 surround = samples[i + 512]; | |
399 samples[i] += bias - surround; | |
400 samples[i + 256] += bias + surround; | |
401 } | |
402 } | |
403 | |
404 static void mix31to2 (sample_t * samples, sample_t bias) | |
405 { | |
406 int i; | |
407 sample_t common; | |
408 | |
409 for (i = 0; i < 256; i++) { | |
410 common = samples[i + 256] + samples[i + 768] + bias; | |
411 samples[i] += common; | |
412 samples[i + 256] = samples[i + 512] + common; | |
413 } | |
414 } | |
415 | |
416 static void mix31toS (sample_t * samples, sample_t bias) | |
417 { | |
418 int i; | |
419 sample_t common, surround; | |
420 | |
421 for (i = 0; i < 256; i++) { | |
422 common = samples[i + 256] + bias; | |
423 surround = samples[i + 768]; | |
424 samples[i] += common - surround; | |
425 samples[i + 256] = samples[i + 512] + common + surround; | |
426 } | |
427 } | |
428 | |
429 static void mix22toS (sample_t * samples, sample_t bias) | |
430 { | |
431 int i; | |
432 sample_t surround; | |
433 | |
434 for (i = 0; i < 256; i++) { | |
435 surround = samples[i + 512] + samples[i + 768]; | |
436 samples[i] += bias - surround; | |
437 samples[i + 256] += bias + surround; | |
438 } | |
439 } | |
440 | |
441 static void mix32to2 (sample_t * samples, sample_t bias) | |
442 { | |
443 int i; | |
444 sample_t common; | |
445 | |
446 for (i = 0; i < 256; i++) { | |
447 common = samples[i + 256] + bias; | |
448 samples[i] += common + samples[i + 768]; | |
449 samples[i + 256] = common + samples[i + 512] + samples[i + 1024]; | |
450 } | |
451 } | |
452 | |
453 static void mix32toS (sample_t * samples, sample_t bias) | |
454 { | |
455 int i; | |
456 sample_t common, surround; | |
457 | |
458 for (i = 0; i < 256; i++) { | |
459 common = samples[i + 256] + bias; | |
460 surround = samples[i + 768] + samples[i + 1024]; | |
461 samples[i] += common - surround; | |
462 samples[i + 256] = samples[i + 512] + common + surround; | |
463 } | |
464 } | |
465 | |
466 static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) | |
467 { | |
468 int i; | |
469 | |
470 for (i = 0; i < 256; i++) | |
471 dest[i] = src[i] + src[i + 256] + bias; | |
472 } | |
473 | |
474 static void zero (sample_t * samples) | |
475 { | |
476 int i; | |
477 for (i = 0; i < 256; i++) | |
478 samples[i] = 0; | |
479 } | |
480 | |
3904 | 481 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
3394 | 482 sample_t clev, sample_t slev) |
483 { | |
484 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
485 | |
486 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
487 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
488 break; | |
489 | |
490 case CONVERT (A52_CHANNEL, A52_MONO): | |
491 case CONVERT (A52_STEREO, A52_MONO): | |
492 mix_2to1: | |
493 mix2to1 (samples, samples + 256, bias); | |
494 break; | |
495 | |
496 case CONVERT (A52_2F1R, A52_MONO): | |
497 if (slev == 0) | |
498 goto mix_2to1; | |
499 case CONVERT (A52_3F, A52_MONO): | |
500 mix_3to1: | |
501 mix3to1 (samples, bias); | |
502 break; | |
503 | |
504 case CONVERT (A52_3F1R, A52_MONO): | |
505 if (slev == 0) | |
506 goto mix_3to1; | |
507 case CONVERT (A52_2F2R, A52_MONO): | |
508 if (slev == 0) | |
509 goto mix_2to1; | |
510 mix4to1 (samples, bias); | |
511 break; | |
512 | |
513 case CONVERT (A52_3F2R, A52_MONO): | |
514 if (slev == 0) | |
515 goto mix_3to1; | |
516 mix5to1 (samples, bias); | |
517 break; | |
518 | |
519 case CONVERT (A52_MONO, A52_DOLBY): | |
520 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
521 break; | |
522 | |
523 case CONVERT (A52_3F, A52_STEREO): | |
524 case CONVERT (A52_3F, A52_DOLBY): | |
525 mix_3to2: | |
526 mix3to2 (samples, bias); | |
527 break; | |
528 | |
529 case CONVERT (A52_2F1R, A52_STEREO): | |
530 if (slev == 0) | |
531 break; | |
532 mix21to2 (samples, samples + 256, bias); | |
533 break; | |
534 | |
535 case CONVERT (A52_2F1R, A52_DOLBY): | |
536 mix21toS (samples, bias); | |
537 break; | |
538 | |
539 case CONVERT (A52_3F1R, A52_STEREO): | |
540 if (slev == 0) | |
541 goto mix_3to2; | |
542 mix31to2 (samples, bias); | |
543 break; | |
544 | |
545 case CONVERT (A52_3F1R, A52_DOLBY): | |
546 mix31toS (samples, bias); | |
547 break; | |
548 | |
549 case CONVERT (A52_2F2R, A52_STEREO): | |
550 if (slev == 0) | |
551 break; | |
552 mix2to1 (samples, samples + 512, bias); | |
553 mix2to1 (samples + 256, samples + 768, bias); | |
554 break; | |
555 | |
556 case CONVERT (A52_2F2R, A52_DOLBY): | |
557 mix22toS (samples, bias); | |
558 break; | |
559 | |
560 case CONVERT (A52_3F2R, A52_STEREO): | |
561 if (slev == 0) | |
562 goto mix_3to2; | |
563 mix32to2 (samples, bias); | |
564 break; | |
565 | |
566 case CONVERT (A52_3F2R, A52_DOLBY): | |
567 mix32toS (samples, bias); | |
568 break; | |
569 | |
570 case CONVERT (A52_3F1R, A52_3F): | |
571 if (slev == 0) | |
572 break; | |
573 mix21to2 (samples, samples + 512, bias); | |
574 break; | |
575 | |
576 case CONVERT (A52_3F2R, A52_3F): | |
577 if (slev == 0) | |
578 break; | |
579 mix2to1 (samples, samples + 768, bias); | |
580 mix2to1 (samples + 512, samples + 1024, bias); | |
581 break; | |
582 | |
583 case CONVERT (A52_3F1R, A52_2F1R): | |
584 mix3to2 (samples, bias); | |
585 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
586 break; | |
587 | |
588 case CONVERT (A52_2F2R, A52_2F1R): | |
589 mix2to1 (samples + 512, samples + 768, bias); | |
590 break; | |
591 | |
592 case CONVERT (A52_3F2R, A52_2F1R): | |
3678 | 593 mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) |
3394 | 594 move2to1 (samples + 768, samples + 512, bias); |
595 break; | |
596 | |
597 case CONVERT (A52_3F2R, A52_3F1R): | |
598 mix2to1 (samples + 768, samples + 1024, bias); | |
599 break; | |
600 | |
601 case CONVERT (A52_2F1R, A52_2F2R): | |
602 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
603 break; | |
604 | |
605 case CONVERT (A52_3F1R, A52_2F2R): | |
606 mix3to2 (samples, bias); | |
607 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
608 break; | |
609 | |
610 case CONVERT (A52_3F2R, A52_2F2R): | |
611 mix3to2 (samples, bias); | |
612 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
613 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
614 break; | |
615 | |
616 case CONVERT (A52_3F1R, A52_3F2R): | |
12137 | 617 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); |
3394 | 618 break; |
619 } | |
620 } | |
621 | |
3904 | 622 static void upmix_C (sample_t * samples, int acmod, int output) |
3394 | 623 { |
624 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
625 | |
626 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
627 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
628 break; | |
629 | |
630 case CONVERT (A52_3F2R, A52_MONO): | |
631 zero (samples + 1024); | |
632 case CONVERT (A52_3F1R, A52_MONO): | |
633 case CONVERT (A52_2F2R, A52_MONO): | |
634 zero (samples + 768); | |
635 case CONVERT (A52_3F, A52_MONO): | |
636 case CONVERT (A52_2F1R, A52_MONO): | |
637 zero (samples + 512); | |
638 case CONVERT (A52_CHANNEL, A52_MONO): | |
639 case CONVERT (A52_STEREO, A52_MONO): | |
640 zero (samples + 256); | |
641 break; | |
642 | |
643 case CONVERT (A52_3F2R, A52_STEREO): | |
644 case CONVERT (A52_3F2R, A52_DOLBY): | |
645 zero (samples + 1024); | |
646 case CONVERT (A52_3F1R, A52_STEREO): | |
647 case CONVERT (A52_3F1R, A52_DOLBY): | |
648 zero (samples + 768); | |
649 case CONVERT (A52_3F, A52_STEREO): | |
650 case CONVERT (A52_3F, A52_DOLBY): | |
651 mix_3to2: | |
652 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
653 zero (samples + 256); | |
654 break; | |
655 | |
656 case CONVERT (A52_2F2R, A52_STEREO): | |
657 case CONVERT (A52_2F2R, A52_DOLBY): | |
658 zero (samples + 768); | |
659 case CONVERT (A52_2F1R, A52_STEREO): | |
660 case CONVERT (A52_2F1R, A52_DOLBY): | |
661 zero (samples + 512); | |
662 break; | |
663 | |
664 case CONVERT (A52_3F2R, A52_3F): | |
665 zero (samples + 1024); | |
666 case CONVERT (A52_3F1R, A52_3F): | |
667 case CONVERT (A52_2F2R, A52_2F1R): | |
668 zero (samples + 768); | |
669 break; | |
670 | |
671 case CONVERT (A52_3F2R, A52_3F1R): | |
672 zero (samples + 1024); | |
673 break; | |
674 | |
675 case CONVERT (A52_3F2R, A52_2F1R): | |
676 zero (samples + 1024); | |
677 case CONVERT (A52_3F1R, A52_2F1R): | |
678 mix_31to21: | |
679 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
680 goto mix_3to2; | |
681 | |
682 case CONVERT (A52_3F2R, A52_2F2R): | |
683 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
684 goto mix_31to21; | |
685 } | |
686 } | |
3904 | 687 |
16173 | 688 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
3904 | 689 static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) |
690 { | |
691 asm volatile( | |
692 "movlps %2, %%xmm7 \n\t" | |
693 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 694 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
695 ASMALIGN16 |
3904 | 696 "1: \n\t" |
16173 | 697 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
698 "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" | |
699 "addps (%1, %%"REG_S"), %%xmm0 \n\t" | |
700 "addps 16(%1, %%"REG_S"), %%xmm1\n\t" | |
3904 | 701 "addps %%xmm7, %%xmm0 \n\t" |
702 "addps %%xmm7, %%xmm1 \n\t" | |
16173 | 703 "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
704 "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" | |
705 "add $32, %%"REG_S" \n\t" | |
3904 | 706 " jnz 1b \n\t" |
707 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 708 : "%"REG_S |
3904 | 709 ); |
710 } | |
711 | |
712 static void mix3to1_SSE (sample_t * samples, sample_t bias) | |
713 { | |
714 asm volatile( | |
715 "movlps %1, %%xmm7 \n\t" | |
716 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 717 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
718 ASMALIGN16 |
3904 | 719 "1: \n\t" |
16173 | 720 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
721 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" | |
722 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" | |
3904 | 723 "addps %%xmm7, %%xmm1 \n\t" |
724 "addps %%xmm1, %%xmm0 \n\t" | |
16173 | 725 "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
726 "add $16, %%"REG_S" \n\t" | |
3904 | 727 " jnz 1b \n\t" |
728 :: "r" (samples+256), "m" (bias) | |
16173 | 729 : "%"REG_S |
3904 | 730 ); |
731 } | |
732 | |
733 static void mix4to1_SSE (sample_t * samples, sample_t bias) | |
734 { | |
735 asm volatile( | |
736 "movlps %1, %%xmm7 \n\t" | |
737 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 738 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
739 ASMALIGN16 |
3904 | 740 "1: \n\t" |
16173 | 741 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
742 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" | |
743 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" | |
744 "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" | |
3904 | 745 "addps %%xmm7, %%xmm0 \n\t" |
746 "addps %%xmm1, %%xmm0 \n\t" | |
16173 | 747 "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
748 "add $16, %%"REG_S" \n\t" | |
3904 | 749 " jnz 1b \n\t" |
750 :: "r" (samples+256), "m" (bias) | |
16173 | 751 : "%"REG_S |
3904 | 752 ); |
753 } | |
754 | |
755 static void mix5to1_SSE (sample_t * samples, sample_t bias) | |
756 { | |
757 asm volatile( | |
758 "movlps %1, %%xmm7 \n\t" | |
759 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 760 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
761 ASMALIGN16 |
3904 | 762 "1: \n\t" |
16173 | 763 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
764 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" | |
765 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" | |
766 "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" | |
3904 | 767 "addps %%xmm7, %%xmm0 \n\t" |
16173 | 768 "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" |
3904 | 769 "addps %%xmm1, %%xmm0 \n\t" |
16173 | 770 "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
771 "add $16, %%"REG_S" \n\t" | |
3904 | 772 " jnz 1b \n\t" |
773 :: "r" (samples+256), "m" (bias) | |
16173 | 774 : "%"REG_S |
3904 | 775 ); |
776 } | |
777 | |
778 static void mix3to2_SSE (sample_t * samples, sample_t bias) | |
779 { | |
780 asm volatile( | |
781 "movlps %1, %%xmm7 \n\t" | |
782 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 783 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
784 ASMALIGN16 |
3904 | 785 "1: \n\t" |
16173 | 786 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
3904 | 787 "addps %%xmm7, %%xmm0 \n\t" //common |
16173 | 788 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
789 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 790 "addps %%xmm0, %%xmm1 \n\t" |
791 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 792 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
793 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
794 "add $16, %%"REG_S" \n\t" | |
3904 | 795 " jnz 1b \n\t" |
796 :: "r" (samples+256), "m" (bias) | |
16173 | 797 : "%"REG_S |
3904 | 798 ); |
799 } | |
800 | |
801 static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) | |
802 { | |
803 asm volatile( | |
804 "movlps %2, %%xmm7 \n\t" | |
805 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 806 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
807 ASMALIGN16 |
3904 | 808 "1: \n\t" |
16173 | 809 "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" |
3904 | 810 "addps %%xmm7, %%xmm0 \n\t" //common |
16173 | 811 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
812 "movaps (%1, %%"REG_S"), %%xmm2 \n\t" | |
3904 | 813 "addps %%xmm0, %%xmm1 \n\t" |
814 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 815 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
816 "movaps %%xmm2, (%1, %%"REG_S") \n\t" | |
817 "add $16, %%"REG_S" \n\t" | |
3904 | 818 " jnz 1b \n\t" |
819 :: "r" (left+256), "r" (right+256), "m" (bias) | |
16173 | 820 : "%"REG_S |
3904 | 821 ); |
822 } | |
823 | |
824 static void mix21toS_SSE (sample_t * samples, sample_t bias) | |
825 { | |
826 asm volatile( | |
827 "movlps %1, %%xmm7 \n\t" | |
828 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 829 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
830 ASMALIGN16 |
3904 | 831 "1: \n\t" |
16173 | 832 "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround |
833 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" | |
834 "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 835 "addps %%xmm7, %%xmm1 \n\t" |
836 "addps %%xmm7, %%xmm2 \n\t" | |
837 "subps %%xmm0, %%xmm1 \n\t" | |
838 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 839 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
840 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
841 "add $16, %%"REG_S" \n\t" | |
3904 | 842 " jnz 1b \n\t" |
843 :: "r" (samples+256), "m" (bias) | |
16173 | 844 : "%"REG_S |
3904 | 845 ); |
846 } | |
847 | |
848 static void mix31to2_SSE (sample_t * samples, sample_t bias) | |
849 { | |
850 asm volatile( | |
851 "movlps %1, %%xmm7 \n\t" | |
852 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 853 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
854 ASMALIGN16 |
3904 | 855 "1: \n\t" |
16173 | 856 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
857 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" | |
3904 | 858 "addps %%xmm7, %%xmm0 \n\t" // common |
16173 | 859 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
860 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 861 "addps %%xmm0, %%xmm1 \n\t" |
862 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 863 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
864 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
865 "add $16, %%"REG_S" \n\t" | |
3904 | 866 " jnz 1b \n\t" |
867 :: "r" (samples+256), "m" (bias) | |
16173 | 868 : "%"REG_S |
3904 | 869 ); |
870 } | |
871 | |
872 static void mix31toS_SSE (sample_t * samples, sample_t bias) | |
873 { | |
874 asm volatile( | |
875 "movlps %1, %%xmm7 \n\t" | |
876 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 877 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
878 ASMALIGN16 |
3904 | 879 "1: \n\t" |
16173 | 880 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
881 "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround | |
3904 | 882 "addps %%xmm7, %%xmm0 \n\t" // common |
16173 | 883 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
884 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 885 "addps %%xmm0, %%xmm1 \n\t" |
886 "addps %%xmm0, %%xmm2 \n\t" | |
887 "subps %%xmm3, %%xmm1 \n\t" | |
888 "addps %%xmm3, %%xmm2 \n\t" | |
16173 | 889 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
890 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
891 "add $16, %%"REG_S" \n\t" | |
3904 | 892 " jnz 1b \n\t" |
893 :: "r" (samples+256), "m" (bias) | |
16173 | 894 : "%"REG_S |
3904 | 895 ); |
896 } | |
897 | |
898 static void mix22toS_SSE (sample_t * samples, sample_t bias) | |
899 { | |
900 asm volatile( | |
901 "movlps %1, %%xmm7 \n\t" | |
902 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 903 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
904 ASMALIGN16 |
3904 | 905 "1: \n\t" |
16173 | 906 "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" |
907 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround | |
908 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" | |
909 "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 910 "addps %%xmm7, %%xmm1 \n\t" |
911 "addps %%xmm7, %%xmm2 \n\t" | |
912 "subps %%xmm0, %%xmm1 \n\t" | |
913 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 914 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
915 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
916 "add $16, %%"REG_S" \n\t" | |
3904 | 917 " jnz 1b \n\t" |
918 :: "r" (samples+256), "m" (bias) | |
16173 | 919 : "%"REG_S |
3904 | 920 ); |
921 } | |
922 | |
923 static void mix32to2_SSE (sample_t * samples, sample_t bias) | |
924 { | |
925 asm volatile( | |
926 "movlps %1, %%xmm7 \n\t" | |
927 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 928 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
929 ASMALIGN16 |
3904 | 930 "1: \n\t" |
16173 | 931 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
3904 | 932 "addps %%xmm7, %%xmm0 \n\t" // common |
933 "movaps %%xmm0, %%xmm1 \n\t" // common | |
16173 | 934 "addps (%0, %%"REG_S"), %%xmm0 \n\t" |
935 "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" | |
936 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" | |
937 "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" | |
938 "movaps %%xmm0, (%0, %%"REG_S") \n\t" | |
939 "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" | |
940 "add $16, %%"REG_S" \n\t" | |
3904 | 941 " jnz 1b \n\t" |
942 :: "r" (samples+256), "m" (bias) | |
16173 | 943 : "%"REG_S |
3904 | 944 ); |
945 } | |
946 | |
947 static void mix32toS_SSE (sample_t * samples, sample_t bias) | |
948 { | |
949 asm volatile( | |
950 "movlps %1, %%xmm7 \n\t" | |
951 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 952 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
953 ASMALIGN16 |
3904 | 954 "1: \n\t" |
16173 | 955 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
956 "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 957 "addps %%xmm7, %%xmm0 \n\t" // common |
16173 | 958 "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround |
959 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" | |
960 "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" | |
3904 | 961 "subps %%xmm2, %%xmm1 \n\t" |
962 "addps %%xmm2, %%xmm3 \n\t" | |
963 "addps %%xmm0, %%xmm1 \n\t" | |
964 "addps %%xmm0, %%xmm3 \n\t" | |
16173 | 965 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
966 "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" | |
967 "add $16, %%"REG_S" \n\t" | |
3904 | 968 " jnz 1b \n\t" |
969 :: "r" (samples+256), "m" (bias) | |
16173 | 970 : "%"REG_S |
3904 | 971 ); |
972 } | |
973 | |
974 static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) | |
975 { | |
976 asm volatile( | |
977 "movlps %2, %%xmm7 \n\t" | |
978 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 979 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
980 ASMALIGN16 |
3904 | 981 "1: \n\t" |
16173 | 982 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
983 "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" | |
984 "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" | |
985 "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" | |
3904 | 986 "addps %%xmm7, %%xmm0 \n\t" |
987 "addps %%xmm7, %%xmm1 \n\t" | |
16173 | 988 "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
989 "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" | |
990 "add $32, %%"REG_S" \n\t" | |
3904 | 991 " jnz 1b \n\t" |
992 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 993 : "%"REG_S |
3904 | 994 ); |
995 } | |
996 | |
997 static void zero_MMX(sample_t * samples) | |
998 { | |
999 asm volatile( | |
16173 | 1000 "mov $-1024, %%"REG_S" \n\t" |
3904 | 1001 "pxor %%mm0, %%mm0 \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1002 ASMALIGN16 |
3904 | 1003 "1: \n\t" |
16173 | 1004 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1005 "movq %%mm0, 8(%0, %%"REG_S") \n\t" | |
1006 "movq %%mm0, 16(%0, %%"REG_S") \n\t" | |
1007 "movq %%mm0, 24(%0, %%"REG_S") \n\t" | |
1008 "add $32, %%"REG_S" \n\t" | |
3904 | 1009 " jnz 1b \n\t" |
1010 "emms" | |
1011 :: "r" (samples+256) | |
16173 | 1012 : "%"REG_S |
3904 | 1013 ); |
1014 } | |
1015 | |
4233 | 1016 /* |
1017 I hope dest and src will be at least 8 byte aligned and size | |
1018 will devide on 8 without remain | |
1019 Note: untested and unused. | |
1020 */ | |
1021 static void copy_MMX(void *dest,const void *src,unsigned size) | |
1022 { | |
1023 unsigned i; | |
1024 size /= 64; | |
1025 for(i=0;i<size;i++) | |
1026 { | |
1027 __asm __volatile( | |
1028 "movq %0, %%mm0\n\t" | |
1029 "movq 8%0, %%mm1\n\t" | |
1030 "movq 16%0, %%mm2\n\t" | |
1031 "movq 24%0, %%mm3\n\t" | |
1032 "movq 32%0, %%mm4\n\t" | |
1033 "movq 40%0, %%mm5\n\t" | |
1034 "movq 48%0, %%mm6\n\t" | |
1035 "movq 56%0, %%mm7\n\t" | |
1036 "movq %%mm0, %1\n\t" | |
1037 "movq %%mm1, 8%1\n\t" | |
1038 "movq %%mm2, 16%1\n\t" | |
1039 "movq %%mm3, 24%1\n\t" | |
1040 "movq %%mm4, 32%1\n\t" | |
1041 "movq %%mm5, 40%1\n\t" | |
1042 "movq %%mm6, 48%1\n\t" | |
1043 "movq %%mm7, 56%1\n\t" | |
1044 : | |
1045 :"m"(src),"m"(dest)); | |
1046 } | |
1047 } | |
3904 | 1048 |
1049 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
1050 sample_t clev, sample_t slev) | |
1051 { | |
1052 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1053 | |
1054 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1055 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1056 break; | |
1057 | |
1058 case CONVERT (A52_CHANNEL, A52_MONO): | |
1059 case CONVERT (A52_STEREO, A52_MONO): | |
1060 mix_2to1_SSE: | |
1061 mix2to1_SSE (samples, samples + 256, bias); | |
1062 break; | |
1063 | |
1064 case CONVERT (A52_2F1R, A52_MONO): | |
1065 if (slev == 0) | |
1066 goto mix_2to1_SSE; | |
1067 case CONVERT (A52_3F, A52_MONO): | |
1068 mix_3to1_SSE: | |
1069 mix3to1_SSE (samples, bias); | |
1070 break; | |
1071 | |
1072 case CONVERT (A52_3F1R, A52_MONO): | |
1073 if (slev == 0) | |
1074 goto mix_3to1_SSE; | |
1075 case CONVERT (A52_2F2R, A52_MONO): | |
1076 if (slev == 0) | |
1077 goto mix_2to1_SSE; | |
1078 mix4to1_SSE (samples, bias); | |
1079 break; | |
1080 | |
1081 case CONVERT (A52_3F2R, A52_MONO): | |
1082 if (slev == 0) | |
1083 goto mix_3to1_SSE; | |
1084 mix5to1_SSE (samples, bias); | |
1085 break; | |
1086 | |
1087 case CONVERT (A52_MONO, A52_DOLBY): | |
1088 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1089 break; | |
1090 | |
1091 case CONVERT (A52_3F, A52_STEREO): | |
1092 case CONVERT (A52_3F, A52_DOLBY): | |
1093 mix_3to2_SSE: | |
1094 mix3to2_SSE (samples, bias); | |
1095 break; | |
1096 | |
1097 case CONVERT (A52_2F1R, A52_STEREO): | |
1098 if (slev == 0) | |
1099 break; | |
1100 mix21to2_SSE (samples, samples + 256, bias); | |
1101 break; | |
1102 | |
1103 case CONVERT (A52_2F1R, A52_DOLBY): | |
1104 mix21toS_SSE (samples, bias); | |
1105 break; | |
1106 | |
1107 case CONVERT (A52_3F1R, A52_STEREO): | |
1108 if (slev == 0) | |
1109 goto mix_3to2_SSE; | |
1110 mix31to2_SSE (samples, bias); | |
1111 break; | |
1112 | |
1113 case CONVERT (A52_3F1R, A52_DOLBY): | |
1114 mix31toS_SSE (samples, bias); | |
1115 break; | |
1116 | |
1117 case CONVERT (A52_2F2R, A52_STEREO): | |
1118 if (slev == 0) | |
1119 break; | |
1120 mix2to1_SSE (samples, samples + 512, bias); | |
1121 mix2to1_SSE (samples + 256, samples + 768, bias); | |
1122 break; | |
1123 | |
1124 case CONVERT (A52_2F2R, A52_DOLBY): | |
1125 mix22toS_SSE (samples, bias); | |
1126 break; | |
1127 | |
1128 case CONVERT (A52_3F2R, A52_STEREO): | |
1129 if (slev == 0) | |
1130 goto mix_3to2_SSE; | |
1131 mix32to2_SSE (samples, bias); | |
1132 break; | |
1133 | |
1134 case CONVERT (A52_3F2R, A52_DOLBY): | |
1135 mix32toS_SSE (samples, bias); | |
1136 break; | |
1137 | |
1138 case CONVERT (A52_3F1R, A52_3F): | |
1139 if (slev == 0) | |
1140 break; | |
1141 mix21to2_SSE (samples, samples + 512, bias); | |
1142 break; | |
1143 | |
1144 case CONVERT (A52_3F2R, A52_3F): | |
1145 if (slev == 0) | |
1146 break; | |
1147 mix2to1_SSE (samples, samples + 768, bias); | |
1148 mix2to1_SSE (samples + 512, samples + 1024, bias); | |
1149 break; | |
1150 | |
1151 case CONVERT (A52_3F1R, A52_2F1R): | |
1152 mix3to2_SSE (samples, bias); | |
1153 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1154 break; | |
1155 | |
1156 case CONVERT (A52_2F2R, A52_2F1R): | |
1157 mix2to1_SSE (samples + 512, samples + 768, bias); | |
1158 break; | |
1159 | |
1160 case CONVERT (A52_3F2R, A52_2F1R): | |
1161 mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1162 move2to1_SSE (samples + 768, samples + 512, bias); | |
1163 break; | |
1164 | |
1165 case CONVERT (A52_3F2R, A52_3F1R): | |
1166 mix2to1_SSE (samples + 768, samples + 1024, bias); | |
1167 break; | |
1168 | |
1169 case CONVERT (A52_2F1R, A52_2F2R): | |
1170 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1171 break; | |
1172 | |
1173 case CONVERT (A52_3F1R, A52_2F2R): | |
1174 mix3to2_SSE (samples, bias); | |
1175 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1176 break; | |
1177 | |
1178 case CONVERT (A52_3F2R, A52_2F2R): | |
1179 mix3to2_SSE (samples, bias); | |
1180 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1181 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1182 break; | |
1183 | |
1184 case CONVERT (A52_3F1R, A52_3F2R): | |
12137 | 1185 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); |
3904 | 1186 break; |
1187 } | |
1188 } | |
1189 | |
1190 static void upmix_MMX (sample_t * samples, int acmod, int output) | |
1191 { | |
1192 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1193 | |
1194 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1195 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1196 break; | |
1197 | |
1198 case CONVERT (A52_3F2R, A52_MONO): | |
1199 zero_MMX (samples + 1024); | |
1200 case CONVERT (A52_3F1R, A52_MONO): | |
1201 case CONVERT (A52_2F2R, A52_MONO): | |
1202 zero_MMX (samples + 768); | |
1203 case CONVERT (A52_3F, A52_MONO): | |
1204 case CONVERT (A52_2F1R, A52_MONO): | |
1205 zero_MMX (samples + 512); | |
1206 case CONVERT (A52_CHANNEL, A52_MONO): | |
1207 case CONVERT (A52_STEREO, A52_MONO): | |
1208 zero_MMX (samples + 256); | |
1209 break; | |
1210 | |
1211 case CONVERT (A52_3F2R, A52_STEREO): | |
1212 case CONVERT (A52_3F2R, A52_DOLBY): | |
1213 zero_MMX (samples + 1024); | |
1214 case CONVERT (A52_3F1R, A52_STEREO): | |
1215 case CONVERT (A52_3F1R, A52_DOLBY): | |
1216 zero_MMX (samples + 768); | |
1217 case CONVERT (A52_3F, A52_STEREO): | |
1218 case CONVERT (A52_3F, A52_DOLBY): | |
1219 mix_3to2_MMX: | |
1220 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
1221 zero_MMX (samples + 256); | |
1222 break; | |
1223 | |
1224 case CONVERT (A52_2F2R, A52_STEREO): | |
1225 case CONVERT (A52_2F2R, A52_DOLBY): | |
1226 zero_MMX (samples + 768); | |
1227 case CONVERT (A52_2F1R, A52_STEREO): | |
1228 case CONVERT (A52_2F1R, A52_DOLBY): | |
1229 zero_MMX (samples + 512); | |
1230 break; | |
1231 | |
1232 case CONVERT (A52_3F2R, A52_3F): | |
1233 zero_MMX (samples + 1024); | |
1234 case CONVERT (A52_3F1R, A52_3F): | |
1235 case CONVERT (A52_2F2R, A52_2F1R): | |
1236 zero_MMX (samples + 768); | |
1237 break; | |
1238 | |
1239 case CONVERT (A52_3F2R, A52_3F1R): | |
1240 zero_MMX (samples + 1024); | |
1241 break; | |
1242 | |
1243 case CONVERT (A52_3F2R, A52_2F1R): | |
1244 zero_MMX (samples + 1024); | |
1245 case CONVERT (A52_3F1R, A52_2F1R): | |
1246 mix_31to21_MMX: | |
1247 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1248 goto mix_3to2_MMX; | |
1249 | |
1250 case CONVERT (A52_3F2R, A52_2F2R): | |
1251 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
1252 goto mix_31to21_MMX; | |
1253 } | |
1254 } | |
4233 | 1255 |
1256 static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) | |
1257 { | |
1258 asm volatile( | |
1259 "movd %2, %%mm7 \n\t" | |
1260 "punpckldq %2, %%mm7 \n\t" | |
16173 | 1261 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1262 ASMALIGN16 |
4233 | 1263 "1: \n\t" |
16173 | 1264 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1265 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1266 "movq 16(%0, %%"REG_S"), %%mm2 \n\t" | |
1267 "movq 24(%0, %%"REG_S"), %%mm3 \n\t" | |
1268 "pfadd (%1, %%"REG_S"), %%mm0 \n\t" | |
1269 "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" | |
1270 "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" | |
1271 "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" | |
4233 | 1272 "pfadd %%mm7, %%mm0 \n\t" |
1273 "pfadd %%mm7, %%mm1 \n\t" | |
1274 "pfadd %%mm7, %%mm2 \n\t" | |
1275 "pfadd %%mm7, %%mm3 \n\t" | |
16173 | 1276 "movq %%mm0, (%1, %%"REG_S") \n\t" |
1277 "movq %%mm1, 8(%1, %%"REG_S") \n\t" | |
1278 "movq %%mm2, 16(%1, %%"REG_S") \n\t" | |
1279 "movq %%mm3, 24(%1, %%"REG_S") \n\t" | |
1280 "add $32, %%"REG_S" \n\t" | |
4233 | 1281 " jnz 1b \n\t" |
1282 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 1283 : "%"REG_S |
4233 | 1284 ); |
1285 } | |
1286 | |
1287 static void mix3to1_3dnow (sample_t * samples, sample_t bias) | |
1288 { | |
1289 asm volatile( | |
1290 "movd %1, %%mm7 \n\t" | |
1291 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1292 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1293 ASMALIGN16 |
4233 | 1294 "1: \n\t" |
16173 | 1295 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1296 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1297 "movq 1024(%0, %%"REG_S"), %%mm2\n\t" | |
1298 "movq 1032(%0, %%"REG_S"), %%mm3\n\t" | |
1299 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" | |
1300 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1301 "pfadd %%mm7, %%mm0 \n\t" |
1302 "pfadd %%mm7, %%mm1 \n\t" | |
1303 "pfadd %%mm2, %%mm0 \n\t" | |
1304 "pfadd %%mm3, %%mm1 \n\t" | |
16173 | 1305 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1306 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1307 "add $16, %%"REG_S" \n\t" | |
4233 | 1308 " jnz 1b \n\t" |
1309 :: "r" (samples+256), "m" (bias) | |
16173 | 1310 : "%"REG_S |
4233 | 1311 ); |
1312 } | |
1313 | |
1314 static void mix4to1_3dnow (sample_t * samples, sample_t bias) | |
1315 { | |
1316 asm volatile( | |
1317 "movd %1, %%mm7 \n\t" | |
1318 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1319 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1320 ASMALIGN16 |
4233 | 1321 "1: \n\t" |
16173 | 1322 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1323 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1324 "movq 1024(%0, %%"REG_S"), %%mm2\n\t" | |
1325 "movq 1032(%0, %%"REG_S"), %%mm3\n\t" | |
1326 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" | |
1327 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" | |
1328 "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" | |
1329 "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1330 "pfadd %%mm7, %%mm0 \n\t" |
1331 "pfadd %%mm7, %%mm1 \n\t" | |
1332 "pfadd %%mm2, %%mm0 \n\t" | |
1333 "pfadd %%mm3, %%mm1 \n\t" | |
16173 | 1334 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1335 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1336 "add $16, %%"REG_S" \n\t" | |
4233 | 1337 " jnz 1b \n\t" |
1338 :: "r" (samples+256), "m" (bias) | |
16173 | 1339 : "%"REG_S |
4233 | 1340 ); |
1341 } | |
1342 | |
1343 static void mix5to1_3dnow (sample_t * samples, sample_t bias) | |
1344 { | |
1345 asm volatile( | |
1346 "movd %1, %%mm7 \n\t" | |
1347 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1348 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1349 ASMALIGN16 |
4233 | 1350 "1: \n\t" |
16173 | 1351 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1352 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1353 "movq 1024(%0, %%"REG_S"), %%mm2\n\t" | |
1354 "movq 1032(%0, %%"REG_S"), %%mm3\n\t" | |
1355 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" | |
1356 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" | |
1357 "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" | |
1358 "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1359 "pfadd %%mm7, %%mm0 \n\t" |
1360 "pfadd %%mm7, %%mm1 \n\t" | |
16173 | 1361 "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" |
1362 "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1363 "pfadd %%mm2, %%mm0 \n\t" |
1364 "pfadd %%mm3, %%mm1 \n\t" | |
16173 | 1365 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1366 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1367 "add $16, %%"REG_S" \n\t" | |
4233 | 1368 " jnz 1b \n\t" |
1369 :: "r" (samples+256), "m" (bias) | |
16173 | 1370 : "%"REG_S |
4233 | 1371 ); |
1372 } | |
1373 | |
1374 static void mix3to2_3dnow (sample_t * samples, sample_t bias) | |
1375 { | |
1376 asm volatile( | |
1377 "movd %1, %%mm7 \n\t" | |
1378 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1379 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1380 ASMALIGN16 |
4233 | 1381 "1: \n\t" |
16173 | 1382 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1383 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1384 "pfadd %%mm7, %%mm0 \n\t" //common |
1385 "pfadd %%mm7, %%mm1 \n\t" //common | |
16173 | 1386 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1387 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1388 "movq 2048(%0, %%"REG_S"), %%mm4\n\t" | |
1389 "movq 2056(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1390 "pfadd %%mm0, %%mm2 \n\t" |
5912 | 1391 "pfadd %%mm1, %%mm3 \n\t" |
4233 | 1392 "pfadd %%mm0, %%mm4 \n\t" |
5912 | 1393 "pfadd %%mm1, %%mm5 \n\t" |
16173 | 1394 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1395 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1396 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1397 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1398 "add $16, %%"REG_S" \n\t" | |
4233 | 1399 " jnz 1b \n\t" |
1400 :: "r" (samples+256), "m" (bias) | |
16173 | 1401 : "%"REG_S |
4233 | 1402 ); |
1403 } | |
1404 | |
1405 static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) | |
1406 { | |
1407 asm volatile( | |
1408 "movd %2, %%mm7 \n\t" | |
1409 "punpckldq %2, %%mm7 \n\t" | |
16173 | 1410 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1411 ASMALIGN16 |
4233 | 1412 "1: \n\t" |
16173 | 1413 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
1414 "movq 1032(%1, %%"REG_S"), %%mm1\n\t" | |
4233 | 1415 "pfadd %%mm7, %%mm0 \n\t" //common |
1416 "pfadd %%mm7, %%mm1 \n\t" //common | |
16173 | 1417 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1418 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1419 "movq (%1, %%"REG_S"), %%mm4 \n\t" | |
1420 "movq 8(%1, %%"REG_S"), %%mm5 \n\t" | |
4233 | 1421 "pfadd %%mm0, %%mm2 \n\t" |
1422 "pfadd %%mm1, %%mm3 \n\t" | |
1423 "pfadd %%mm0, %%mm4 \n\t" | |
1424 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1425 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1426 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1427 "movq %%mm4, (%1, %%"REG_S") \n\t" | |
1428 "movq %%mm5, 8(%1, %%"REG_S") \n\t" | |
1429 "add $16, %%"REG_S" \n\t" | |
4233 | 1430 " jnz 1b \n\t" |
1431 :: "r" (left+256), "r" (right+256), "m" (bias) | |
16173 | 1432 : "%"REG_S |
4233 | 1433 ); |
1434 } | |
1435 | |
1436 static void mix21toS_3dnow (sample_t * samples, sample_t bias) | |
1437 { | |
1438 asm volatile( | |
1439 "movd %1, %%mm7 \n\t" | |
1440 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1441 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1442 ASMALIGN16 |
4233 | 1443 "1: \n\t" |
16173 | 1444 "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround |
1445 "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround | |
1446 "movq (%0, %%"REG_S"), %%mm2 \n\t" | |
1447 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1448 "movq 1024(%0, %%"REG_S"), %%mm4\n\t" | |
1449 "movq 1032(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1450 "pfadd %%mm7, %%mm2 \n\t" |
1451 "pfadd %%mm7, %%mm3 \n\t" | |
1452 "pfadd %%mm7, %%mm4 \n\t" | |
1453 "pfadd %%mm7, %%mm5 \n\t" | |
1454 "pfsub %%mm0, %%mm2 \n\t" | |
1455 "pfsub %%mm1, %%mm3 \n\t" | |
1456 "pfadd %%mm0, %%mm4 \n\t" | |
1457 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1458 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1459 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1460 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1461 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1462 "add $16, %%"REG_S" \n\t" | |
4233 | 1463 " jnz 1b \n\t" |
1464 :: "r" (samples+256), "m" (bias) | |
16173 | 1465 : "%"REG_S |
4233 | 1466 ); |
1467 } | |
1468 | |
1469 static void mix31to2_3dnow (sample_t * samples, sample_t bias) | |
1470 { | |
1471 asm volatile( | |
1472 "movd %1, %%mm7 \n\t" | |
1473 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1474 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1475 ASMALIGN16 |
4233 | 1476 "1: \n\t" |
16173 | 1477 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1478 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
1479 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" | |
1480 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1481 "pfadd %%mm7, %%mm0 \n\t" // common |
1482 "pfadd %%mm7, %%mm1 \n\t" // common | |
16173 | 1483 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1484 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1485 "movq 2048(%0, %%"REG_S"), %%mm4\n\t" | |
1486 "movq 2056(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1487 "pfadd %%mm0, %%mm2 \n\t" |
1488 "pfadd %%mm1, %%mm3 \n\t" | |
1489 "pfadd %%mm0, %%mm4 \n\t" | |
1490 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1491 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1492 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1493 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1494 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1495 "add $16, %%"REG_S" \n\t" | |
4233 | 1496 " jnz 1b \n\t" |
1497 :: "r" (samples+256), "m" (bias) | |
16173 | 1498 : "%"REG_S |
4233 | 1499 ); |
1500 } | |
1501 | |
1502 static void mix31toS_3dnow (sample_t * samples, sample_t bias) | |
1503 { | |
1504 asm volatile( | |
1505 "movd %1, %%mm7 \n\t" | |
1506 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1507 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1508 ASMALIGN16 |
4233 | 1509 "1: \n\t" |
16173 | 1510 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1511 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1512 "pfadd %%mm7, %%mm0 \n\t" // common |
1513 "pfadd %%mm7, %%mm1 \n\t" // common | |
16173 | 1514 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1515 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1516 "movq 2048(%0, %%"REG_S"), %%mm4\n\t" | |
1517 "movq 2056(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1518 "pfadd %%mm0, %%mm2 \n\t" |
1519 "pfadd %%mm1, %%mm3 \n\t" | |
1520 "pfadd %%mm0, %%mm4 \n\t" | |
1521 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1522 "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround |
1523 "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround | |
4233 | 1524 "pfsub %%mm0, %%mm2 \n\t" |
1525 "pfsub %%mm1, %%mm3 \n\t" | |
1526 "pfadd %%mm0, %%mm4 \n\t" | |
1527 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1528 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1529 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1530 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1531 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1532 "add $16, %%"REG_S" \n\t" | |
4233 | 1533 " jnz 1b \n\t" |
1534 :: "r" (samples+256), "m" (bias) | |
16173 | 1535 : "%"REG_S |
4233 | 1536 ); |
1537 } | |
1538 | |
1539 static void mix22toS_3dnow (sample_t * samples, sample_t bias) | |
1540 { | |
1541 asm volatile( | |
1542 "movd %1, %%mm7 \n\t" | |
1543 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1544 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1545 ASMALIGN16 |
4233 | 1546 "1: \n\t" |
16173 | 1547 "movq 2048(%0, %%"REG_S"), %%mm0\n\t" |
1548 "movq 2056(%0, %%"REG_S"), %%mm1\n\t" | |
1549 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround | |
1550 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround | |
1551 "movq (%0, %%"REG_S"), %%mm2 \n\t" | |
1552 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1553 "movq 1024(%0, %%"REG_S"), %%mm4\n\t" | |
1554 "movq 1032(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1555 "pfadd %%mm7, %%mm2 \n\t" |
1556 "pfadd %%mm7, %%mm3 \n\t" | |
1557 "pfadd %%mm7, %%mm4 \n\t" | |
1558 "pfadd %%mm7, %%mm5 \n\t" | |
1559 "pfsub %%mm0, %%mm2 \n\t" | |
1560 "pfsub %%mm1, %%mm3 \n\t" | |
1561 "pfadd %%mm0, %%mm4 \n\t" | |
1562 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1563 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1564 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1565 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1566 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1567 "add $16, %%"REG_S" \n\t" | |
4233 | 1568 " jnz 1b \n\t" |
1569 :: "r" (samples+256), "m" (bias) | |
16173 | 1570 : "%"REG_S |
4233 | 1571 ); |
1572 } | |
1573 | |
1574 static void mix32to2_3dnow (sample_t * samples, sample_t bias) | |
1575 { | |
1576 asm volatile( | |
1577 "movd %1, %%mm7 \n\t" | |
1578 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1579 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1580 ASMALIGN16 |
4233 | 1581 "1: \n\t" |
16173 | 1582 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1583 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1584 "pfadd %%mm7, %%mm0 \n\t" // common |
1585 "pfadd %%mm7, %%mm1 \n\t" // common | |
1586 "movq %%mm0, %%mm2 \n\t" // common | |
1587 "movq %%mm1, %%mm3 \n\t" // common | |
16173 | 1588 "pfadd (%0, %%"REG_S"), %%mm0 \n\t" |
1589 "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1590 "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" | |
1591 "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" | |
1592 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" | |
1593 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" | |
1594 "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" | |
1595 "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" | |
1596 "movq %%mm0, (%0, %%"REG_S") \n\t" | |
1597 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1598 "movq %%mm2, 1024(%0, %%"REG_S")\n\t" | |
1599 "movq %%mm3, 1032(%0, %%"REG_S")\n\t" | |
1600 "add $16, %%"REG_S" \n\t" | |
4233 | 1601 " jnz 1b \n\t" |
1602 :: "r" (samples+256), "m" (bias) | |
16173 | 1603 : "%"REG_S |
4233 | 1604 ); |
1605 } | |
1606 | |
1607 /* todo: should be optimized better */ | |
1608 static void mix32toS_3dnow (sample_t * samples, sample_t bias) | |
1609 { | |
1610 asm volatile( | |
16173 | 1611 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1612 ASMALIGN16 |
4233 | 1613 "1: \n\t" |
1614 "movd %1, %%mm7 \n\t" | |
1615 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1616 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1617 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
1618 "movq 3072(%0, %%"REG_S"), %%mm4\n\t" | |
1619 "movq 3080(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1620 "pfadd %%mm7, %%mm0 \n\t" // common |
1621 "pfadd %%mm7, %%mm1 \n\t" // common | |
16173 | 1622 "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround |
1623 "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround | |
1624 "movq (%0, %%"REG_S"), %%mm2 \n\t" | |
1625 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1626 "movq 2048(%0, %%"REG_S"), %%mm6\n\t" | |
1627 "movq 2056(%0, %%"REG_S"), %%mm7\n\t" | |
4233 | 1628 "pfsub %%mm4, %%mm2 \n\t" |
1629 "pfsub %%mm5, %%mm3 \n\t" | |
1630 "pfadd %%mm4, %%mm6 \n\t" | |
1631 "pfadd %%mm5, %%mm7 \n\t" | |
1632 "pfadd %%mm0, %%mm2 \n\t" | |
1633 "pfadd %%mm1, %%mm3 \n\t" | |
1634 "pfadd %%mm0, %%mm6 \n\t" | |
1635 "pfadd %%mm1, %%mm7 \n\t" | |
16173 | 1636 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1637 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1638 "movq %%mm6, 1024(%0, %%"REG_S")\n\t" | |
1639 "movq %%mm7, 1032(%0, %%"REG_S")\n\t" | |
1640 "add $16, %%"REG_S" \n\t" | |
4233 | 1641 " jnz 1b \n\t" |
1642 :: "r" (samples+256), "m" (bias) | |
16173 | 1643 : "%"REG_S |
4233 | 1644 ); |
1645 } | |
1646 | |
1647 static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) | |
1648 { | |
1649 asm volatile( | |
1650 "movd %2, %%mm7 \n\t" | |
1651 "punpckldq %2, %%mm7 \n\t" | |
16173 | 1652 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1653 ASMALIGN16 |
4233 | 1654 "1: \n\t" |
16173 | 1655 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1656 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1657 "movq 16(%0, %%"REG_S"), %%mm2 \n\t" | |
1658 "movq 24(%0, %%"REG_S"), %%mm3 \n\t" | |
1659 "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" | |
1660 "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" | |
1661 "pfadd 1040(%0, %%"REG_S"), %%mm2\n\t" | |
1662 "pfadd 1048(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1663 "pfadd %%mm7, %%mm0 \n\t" |
1664 "pfadd %%mm7, %%mm1 \n\t" | |
1665 "pfadd %%mm7, %%mm2 \n\t" | |
1666 "pfadd %%mm7, %%mm3 \n\t" | |
16173 | 1667 "movq %%mm0, (%1, %%"REG_S") \n\t" |
1668 "movq %%mm1, 8(%1, %%"REG_S") \n\t" | |
1669 "movq %%mm2, 16(%1, %%"REG_S") \n\t" | |
1670 "movq %%mm3, 24(%1, %%"REG_S") \n\t" | |
1671 "add $32, %%"REG_S" \n\t" | |
4233 | 1672 " jnz 1b \n\t" |
1673 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 1674 : "%"REG_S |
4233 | 1675 ); |
1676 } | |
1677 | |
1678 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, | |
1679 sample_t clev, sample_t slev) | |
1680 { | |
1681 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1682 | |
1683 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1684 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1685 break; | |
1686 | |
1687 case CONVERT (A52_CHANNEL, A52_MONO): | |
1688 case CONVERT (A52_STEREO, A52_MONO): | |
1689 mix_2to1_3dnow: | |
1690 mix2to1_3dnow (samples, samples + 256, bias); | |
1691 break; | |
1692 | |
1693 case CONVERT (A52_2F1R, A52_MONO): | |
1694 if (slev == 0) | |
1695 goto mix_2to1_3dnow; | |
1696 case CONVERT (A52_3F, A52_MONO): | |
1697 mix_3to1_3dnow: | |
1698 mix3to1_3dnow (samples, bias); | |
1699 break; | |
1700 | |
1701 case CONVERT (A52_3F1R, A52_MONO): | |
1702 if (slev == 0) | |
1703 goto mix_3to1_3dnow; | |
1704 case CONVERT (A52_2F2R, A52_MONO): | |
1705 if (slev == 0) | |
1706 goto mix_2to1_3dnow; | |
1707 mix4to1_3dnow (samples, bias); | |
1708 break; | |
1709 | |
1710 case CONVERT (A52_3F2R, A52_MONO): | |
1711 if (slev == 0) | |
1712 goto mix_3to1_3dnow; | |
1713 mix5to1_3dnow (samples, bias); | |
1714 break; | |
1715 | |
1716 case CONVERT (A52_MONO, A52_DOLBY): | |
1717 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1718 break; | |
1719 | |
1720 case CONVERT (A52_3F, A52_STEREO): | |
1721 case CONVERT (A52_3F, A52_DOLBY): | |
1722 mix_3to2_3dnow: | |
1723 mix3to2_3dnow (samples, bias); | |
1724 break; | |
1725 | |
1726 case CONVERT (A52_2F1R, A52_STEREO): | |
1727 if (slev == 0) | |
1728 break; | |
1729 mix21to2_3dnow (samples, samples + 256, bias); | |
1730 break; | |
1731 | |
1732 case CONVERT (A52_2F1R, A52_DOLBY): | |
1733 mix21toS_3dnow (samples, bias); | |
1734 break; | |
1735 | |
1736 case CONVERT (A52_3F1R, A52_STEREO): | |
1737 if (slev == 0) | |
1738 goto mix_3to2_3dnow; | |
1739 mix31to2_3dnow (samples, bias); | |
1740 break; | |
1741 | |
1742 case CONVERT (A52_3F1R, A52_DOLBY): | |
1743 mix31toS_3dnow (samples, bias); | |
1744 break; | |
1745 | |
1746 case CONVERT (A52_2F2R, A52_STEREO): | |
1747 if (slev == 0) | |
1748 break; | |
1749 mix2to1_3dnow (samples, samples + 512, bias); | |
1750 mix2to1_3dnow (samples + 256, samples + 768, bias); | |
1751 break; | |
1752 | |
1753 case CONVERT (A52_2F2R, A52_DOLBY): | |
1754 mix22toS_3dnow (samples, bias); | |
1755 break; | |
1756 | |
1757 case CONVERT (A52_3F2R, A52_STEREO): | |
1758 if (slev == 0) | |
1759 goto mix_3to2_3dnow; | |
1760 mix32to2_3dnow (samples, bias); | |
1761 break; | |
1762 | |
1763 case CONVERT (A52_3F2R, A52_DOLBY): | |
1764 mix32toS_3dnow (samples, bias); | |
1765 break; | |
1766 | |
1767 case CONVERT (A52_3F1R, A52_3F): | |
1768 if (slev == 0) | |
1769 break; | |
1770 mix21to2_3dnow (samples, samples + 512, bias); | |
1771 break; | |
1772 | |
1773 case CONVERT (A52_3F2R, A52_3F): | |
1774 if (slev == 0) | |
1775 break; | |
1776 mix2to1_3dnow (samples, samples + 768, bias); | |
1777 mix2to1_3dnow (samples + 512, samples + 1024, bias); | |
1778 break; | |
1779 | |
1780 case CONVERT (A52_3F1R, A52_2F1R): | |
1781 mix3to2_3dnow (samples, bias); | |
1782 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1783 break; | |
1784 | |
1785 case CONVERT (A52_2F2R, A52_2F1R): | |
1786 mix2to1_3dnow (samples + 512, samples + 768, bias); | |
1787 break; | |
1788 | |
1789 case CONVERT (A52_3F2R, A52_2F1R): | |
1790 mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1791 move2to1_3dnow (samples + 768, samples + 512, bias); | |
1792 break; | |
1793 | |
1794 case CONVERT (A52_3F2R, A52_3F1R): | |
1795 mix2to1_3dnow (samples + 768, samples + 1024, bias); | |
1796 break; | |
1797 | |
1798 case CONVERT (A52_2F1R, A52_2F2R): | |
1799 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1800 break; | |
1801 | |
1802 case CONVERT (A52_3F1R, A52_2F2R): | |
1803 mix3to2_3dnow (samples, bias); | |
1804 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1805 break; | |
1806 | |
1807 case CONVERT (A52_3F2R, A52_2F2R): | |
1808 mix3to2_3dnow (samples, bias); | |
1809 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1810 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1811 break; | |
1812 | |
1813 case CONVERT (A52_3F1R, A52_3F2R): | |
12137 | 1814 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); |
4233 | 1815 break; |
1816 } | |
1817 __asm __volatile("femms":::"memory"); | |
1818 } | |
1819 | |
16173 | 1820 #endif // ARCH_X86 || ARCH_X86_64 |