Mercurial > mplayer.hg
annotate liba52/downmix.c @ 4940:098970f06dc2
patch by Nilmoni Deb <ndeb@ece.cmu.edu>:
"Ok folks, I got it at last. It must have been a careless error caused during
cut and paste. So here's the patch. "
which is shortened to '10L' in mplayer dev's language ;)
author | pl |
---|---|
date | Tue, 05 Mar 2002 08:51:57 +0000 |
parents | ef2abfbbd1df |
children | d0a34309e424 |
rev | line source |
---|---|
3394 | 1 /* |
2 * downmix.c | |
3 * Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org> | |
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> | |
5 * | |
6 * This file is part of a52dec, a free ATSC A-52 stream decoder. | |
7 * See http://liba52.sourceforge.net/ for updates. | |
8 * | |
9 * a52dec is free software; you can redistribute it and/or modify | |
10 * it under the terms of the GNU General Public License as published by | |
11 * the Free Software Foundation; either version 2 of the License, or | |
12 * (at your option) any later version. | |
13 * | |
14 * a52dec is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 * GNU General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU General Public License | |
20 * along with this program; if not, write to the Free Software | |
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
3625 | 22 * |
23 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | |
3394 | 24 */ |
25 | |
26 #include "config.h" | |
27 | |
28 #include <string.h> | |
29 #include <inttypes.h> | |
30 | |
31 #include "a52.h" | |
32 #include "a52_internal.h" | |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
33 #include "mm_accel.h" |
3394 | 34 |
35 #define CONVERT(acmod,output) (((output) << 3) + (acmod)) | |
36 | |
3904 | 37 |
38 void (*downmix)(sample_t * samples, int acmod, int output, sample_t bias, | |
39 sample_t clev, sample_t slev)= NULL; | |
40 void (*upmix)(sample_t * samples, int acmod, int output)= NULL; | |
41 | |
42 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
43 sample_t clev, sample_t slev); | |
4233 | 44 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, |
45 sample_t clev, sample_t slev); | |
3904 | 46 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
47 sample_t clev, sample_t slev); | |
48 static void upmix_MMX (sample_t * samples, int acmod, int output); | |
49 static void upmix_C (sample_t * samples, int acmod, int output); | |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
50 |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
51 void downmix_accel_init(uint32_t mm_accel) |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
52 { |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
53 upmix= upmix_C; |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
54 downmix= downmix_C; |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
55 #ifdef ARCH_X86 |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
56 if(mm_accel & MM_ACCEL_X86_MMX) upmix= upmix_MMX; |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
57 if(mm_accel & MM_ACCEL_X86_SSE) downmix= downmix_SSE; |
4233 | 58 if(mm_accel & MM_ACCEL_X86_3DNOW) downmix= downmix_3dnow; |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
59 #endif |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
60 } |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
61 |
3394 | 62 int downmix_init (int input, int flags, sample_t * level, |
63 sample_t clev, sample_t slev) | |
64 { | |
65 static uint8_t table[11][8] = { | |
66 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
67 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, | |
68 {A52_MONO, A52_MONO, A52_MONO, A52_MONO, | |
69 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
70 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
71 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, | |
72 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
73 A52_STEREO, A52_3F, A52_STEREO, A52_3F}, | |
74 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
75 A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R}, | |
76 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
77 A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R}, | |
78 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
79 A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R}, | |
80 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
81 A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R}, | |
82 {A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO, | |
83 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
84 {A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO, | |
85 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
86 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY, | |
87 A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY} | |
88 }; | |
89 int output; | |
90 | |
91 output = flags & A52_CHANNEL_MASK; | |
92 if (output > A52_DOLBY) | |
93 return -1; | |
3738 | 94 |
3394 | 95 output = table[output][input & 7]; |
96 | |
97 if ((output == A52_STEREO) && | |
98 ((input == A52_DOLBY) || ((input == A52_3F) && (clev == LEVEL_3DB)))) | |
99 output = A52_DOLBY; | |
100 | |
101 if (flags & A52_ADJUST_LEVEL) | |
102 switch (CONVERT (input & 7, output)) { | |
103 | |
104 case CONVERT (A52_3F, A52_MONO): | |
105 *level *= LEVEL_3DB / (1 + clev); | |
106 break; | |
107 | |
108 case CONVERT (A52_STEREO, A52_MONO): | |
109 case CONVERT (A52_2F2R, A52_2F1R): | |
110 case CONVERT (A52_3F2R, A52_3F1R): | |
111 level_3db: | |
112 *level *= LEVEL_3DB; | |
113 break; | |
114 | |
115 case CONVERT (A52_3F2R, A52_2F1R): | |
116 if (clev < LEVEL_PLUS3DB - 1) | |
117 goto level_3db; | |
118 /* break thru */ | |
119 case CONVERT (A52_3F, A52_STEREO): | |
120 case CONVERT (A52_3F1R, A52_2F1R): | |
121 case CONVERT (A52_3F1R, A52_2F2R): | |
122 case CONVERT (A52_3F2R, A52_2F2R): | |
123 *level /= 1 + clev; | |
124 break; | |
125 | |
126 case CONVERT (A52_2F1R, A52_MONO): | |
127 *level *= LEVEL_PLUS3DB / (2 + slev); | |
128 break; | |
129 | |
130 case CONVERT (A52_2F1R, A52_STEREO): | |
131 case CONVERT (A52_3F1R, A52_3F): | |
132 *level /= 1 + slev * LEVEL_3DB; | |
133 break; | |
134 | |
135 case CONVERT (A52_3F1R, A52_MONO): | |
136 *level *= LEVEL_3DB / (1 + clev + 0.5 * slev); | |
137 break; | |
138 | |
139 case CONVERT (A52_3F1R, A52_STEREO): | |
140 *level /= 1 + clev + slev * LEVEL_3DB; | |
141 break; | |
142 | |
143 case CONVERT (A52_2F2R, A52_MONO): | |
144 *level *= LEVEL_3DB / (1 + slev); | |
145 break; | |
146 | |
147 case CONVERT (A52_2F2R, A52_STEREO): | |
148 case CONVERT (A52_3F2R, A52_3F): | |
149 *level /= 1 + slev; | |
150 break; | |
151 | |
152 case CONVERT (A52_3F2R, A52_MONO): | |
153 *level *= LEVEL_3DB / (1 + clev + slev); | |
154 break; | |
155 | |
156 case CONVERT (A52_3F2R, A52_STEREO): | |
157 *level /= 1 + clev + slev; | |
158 break; | |
159 | |
160 case CONVERT (A52_MONO, A52_DOLBY): | |
161 *level *= LEVEL_PLUS3DB; | |
162 break; | |
163 | |
164 case CONVERT (A52_3F, A52_DOLBY): | |
165 case CONVERT (A52_2F1R, A52_DOLBY): | |
166 *level *= 1 / (1 + LEVEL_3DB); | |
167 break; | |
168 | |
169 case CONVERT (A52_3F1R, A52_DOLBY): | |
170 case CONVERT (A52_2F2R, A52_DOLBY): | |
171 *level *= 1 / (1 + 2 * LEVEL_3DB); | |
172 break; | |
173 | |
174 case CONVERT (A52_3F2R, A52_DOLBY): | |
175 *level *= 1 / (1 + 3 * LEVEL_3DB); | |
176 break; | |
177 } | |
178 | |
179 return output; | |
180 } | |
181 | |
182 int downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, | |
183 sample_t clev, sample_t slev) | |
184 { | |
185 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
186 | |
187 case CONVERT (A52_CHANNEL, A52_CHANNEL): | |
188 case CONVERT (A52_MONO, A52_MONO): | |
189 case CONVERT (A52_STEREO, A52_STEREO): | |
190 case CONVERT (A52_3F, A52_3F): | |
191 case CONVERT (A52_2F1R, A52_2F1R): | |
192 case CONVERT (A52_3F1R, A52_3F1R): | |
193 case CONVERT (A52_2F2R, A52_2F2R): | |
194 case CONVERT (A52_3F2R, A52_3F2R): | |
195 case CONVERT (A52_STEREO, A52_DOLBY): | |
196 coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level; | |
197 return 0; | |
198 | |
199 case CONVERT (A52_CHANNEL, A52_MONO): | |
200 coeff[0] = coeff[1] = level * LEVEL_6DB; | |
201 return 3; | |
202 | |
203 case CONVERT (A52_STEREO, A52_MONO): | |
204 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
205 return 3; | |
206 | |
207 case CONVERT (A52_3F, A52_MONO): | |
208 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
209 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
210 return 7; | |
211 | |
212 case CONVERT (A52_2F1R, A52_MONO): | |
213 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
214 coeff[2] = level * slev * LEVEL_3DB; | |
215 return 7; | |
216 | |
217 case CONVERT (A52_2F2R, A52_MONO): | |
218 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
219 coeff[2] = coeff[3] = level * slev * LEVEL_3DB; | |
220 return 15; | |
221 | |
222 case CONVERT (A52_3F1R, A52_MONO): | |
223 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
224 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
225 coeff[3] = level * slev * LEVEL_3DB; | |
226 return 15; | |
227 | |
228 case CONVERT (A52_3F2R, A52_MONO): | |
229 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
230 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
231 coeff[3] = coeff[4] = level * slev * LEVEL_3DB; | |
232 return 31; | |
233 | |
234 case CONVERT (A52_MONO, A52_DOLBY): | |
235 coeff[0] = level * LEVEL_3DB; | |
236 return 0; | |
237 | |
238 case CONVERT (A52_3F, A52_DOLBY): | |
239 clev = LEVEL_3DB; | |
240 case CONVERT (A52_3F, A52_STEREO): | |
241 case CONVERT (A52_3F1R, A52_2F1R): | |
242 case CONVERT (A52_3F2R, A52_2F2R): | |
243 coeff[0] = coeff[2] = coeff[3] = coeff[4] = level; | |
244 coeff[1] = level * clev; | |
245 return 7; | |
246 | |
247 case CONVERT (A52_2F1R, A52_DOLBY): | |
248 slev = 1; | |
249 case CONVERT (A52_2F1R, A52_STEREO): | |
250 coeff[0] = coeff[1] = level; | |
251 coeff[2] = level * slev * LEVEL_3DB; | |
252 return 7; | |
253 | |
254 case CONVERT (A52_3F1R, A52_DOLBY): | |
255 clev = LEVEL_3DB; | |
256 slev = 1; | |
257 case CONVERT (A52_3F1R, A52_STEREO): | |
258 coeff[0] = coeff[2] = level; | |
259 coeff[1] = level * clev; | |
260 coeff[3] = level * slev * LEVEL_3DB; | |
261 return 15; | |
262 | |
263 case CONVERT (A52_2F2R, A52_DOLBY): | |
264 slev = LEVEL_3DB; | |
265 case CONVERT (A52_2F2R, A52_STEREO): | |
266 coeff[0] = coeff[1] = level; | |
267 coeff[2] = coeff[3] = level * slev; | |
268 return 15; | |
269 | |
270 case CONVERT (A52_3F2R, A52_DOLBY): | |
271 clev = LEVEL_3DB; | |
272 case CONVERT (A52_3F2R, A52_2F1R): | |
273 slev = LEVEL_3DB; | |
274 case CONVERT (A52_3F2R, A52_STEREO): | |
275 coeff[0] = coeff[2] = level; | |
276 coeff[1] = level * clev; | |
277 coeff[3] = coeff[4] = level * slev; | |
278 return 31; | |
279 | |
280 case CONVERT (A52_3F1R, A52_3F): | |
281 coeff[0] = coeff[1] = coeff[2] = level; | |
282 coeff[3] = level * slev * LEVEL_3DB; | |
283 return 13; | |
284 | |
285 case CONVERT (A52_3F2R, A52_3F): | |
286 coeff[0] = coeff[1] = coeff[2] = level; | |
287 coeff[3] = coeff[4] = level * slev; | |
288 return 29; | |
289 | |
290 case CONVERT (A52_2F2R, A52_2F1R): | |
291 coeff[0] = coeff[1] = level; | |
292 coeff[2] = coeff[3] = level * LEVEL_3DB; | |
293 return 12; | |
294 | |
295 case CONVERT (A52_3F2R, A52_3F1R): | |
296 coeff[0] = coeff[1] = coeff[2] = level; | |
297 coeff[3] = coeff[4] = level * LEVEL_3DB; | |
298 return 24; | |
299 | |
300 case CONVERT (A52_2F1R, A52_2F2R): | |
301 coeff[0] = coeff[1] = level; | |
302 coeff[2] = level * LEVEL_3DB; | |
303 return 0; | |
304 | |
305 case CONVERT (A52_3F1R, A52_2F2R): | |
306 coeff[0] = coeff[2] = level; | |
307 coeff[1] = level * clev; | |
308 coeff[3] = level * LEVEL_3DB; | |
309 return 7; | |
310 | |
311 case CONVERT (A52_3F1R, A52_3F2R): | |
312 coeff[0] = coeff[1] = coeff[2] = level; | |
313 coeff[3] = level * LEVEL_3DB; | |
314 return 0; | |
315 | |
316 case CONVERT (A52_CHANNEL, A52_CHANNEL1): | |
317 coeff[0] = level; | |
318 coeff[1] = 0; | |
319 return 0; | |
320 | |
321 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
322 coeff[0] = 0; | |
323 coeff[1] = level; | |
324 return 0; | |
325 } | |
326 | |
327 return -1; /* NOTREACHED */ | |
328 } | |
329 | |
330 static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias) | |
331 { | |
332 int i; | |
333 | |
334 for (i = 0; i < 256; i++) | |
335 dest[i] += src[i] + bias; | |
336 } | |
337 | |
338 static void mix3to1 (sample_t * samples, sample_t bias) | |
339 { | |
340 int i; | |
341 | |
342 for (i = 0; i < 256; i++) | |
343 samples[i] += samples[i + 256] + samples[i + 512] + bias; | |
344 } | |
345 | |
346 static void mix4to1 (sample_t * samples, sample_t bias) | |
347 { | |
348 int i; | |
349 | |
350 for (i = 0; i < 256; i++) | |
351 samples[i] += (samples[i + 256] + samples[i + 512] + | |
352 samples[i + 768] + bias); | |
353 } | |
354 | |
355 static void mix5to1 (sample_t * samples, sample_t bias) | |
356 { | |
357 int i; | |
358 | |
359 for (i = 0; i < 256; i++) | |
360 samples[i] += (samples[i + 256] + samples[i + 512] + | |
361 samples[i + 768] + samples[i + 1024] + bias); | |
362 } | |
363 | |
364 static void mix3to2 (sample_t * samples, sample_t bias) | |
365 { | |
366 int i; | |
367 sample_t common; | |
368 | |
369 for (i = 0; i < 256; i++) { | |
370 common = samples[i + 256] + bias; | |
371 samples[i] += common; | |
372 samples[i + 256] = samples[i + 512] + common; | |
373 } | |
374 } | |
375 | |
376 static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) | |
377 { | |
378 int i; | |
379 sample_t common; | |
380 | |
381 for (i = 0; i < 256; i++) { | |
382 common = right[i + 256] + bias; | |
383 left[i] += common; | |
384 right[i] += common; | |
385 } | |
386 } | |
387 | |
388 static void mix21toS (sample_t * samples, sample_t bias) | |
389 { | |
390 int i; | |
391 sample_t surround; | |
392 | |
393 for (i = 0; i < 256; i++) { | |
394 surround = samples[i + 512]; | |
395 samples[i] += bias - surround; | |
396 samples[i + 256] += bias + surround; | |
397 } | |
398 } | |
399 | |
400 static void mix31to2 (sample_t * samples, sample_t bias) | |
401 { | |
402 int i; | |
403 sample_t common; | |
404 | |
405 for (i = 0; i < 256; i++) { | |
406 common = samples[i + 256] + samples[i + 768] + bias; | |
407 samples[i] += common; | |
408 samples[i + 256] = samples[i + 512] + common; | |
409 } | |
410 } | |
411 | |
412 static void mix31toS (sample_t * samples, sample_t bias) | |
413 { | |
414 int i; | |
415 sample_t common, surround; | |
416 | |
417 for (i = 0; i < 256; i++) { | |
418 common = samples[i + 256] + bias; | |
419 surround = samples[i + 768]; | |
420 samples[i] += common - surround; | |
421 samples[i + 256] = samples[i + 512] + common + surround; | |
422 } | |
423 } | |
424 | |
425 static void mix22toS (sample_t * samples, sample_t bias) | |
426 { | |
427 int i; | |
428 sample_t surround; | |
429 | |
430 for (i = 0; i < 256; i++) { | |
431 surround = samples[i + 512] + samples[i + 768]; | |
432 samples[i] += bias - surround; | |
433 samples[i + 256] += bias + surround; | |
434 } | |
435 } | |
436 | |
437 static void mix32to2 (sample_t * samples, sample_t bias) | |
438 { | |
439 int i; | |
440 sample_t common; | |
441 | |
442 for (i = 0; i < 256; i++) { | |
443 common = samples[i + 256] + bias; | |
444 samples[i] += common + samples[i + 768]; | |
445 samples[i + 256] = common + samples[i + 512] + samples[i + 1024]; | |
446 } | |
447 } | |
448 | |
449 static void mix32toS (sample_t * samples, sample_t bias) | |
450 { | |
451 int i; | |
452 sample_t common, surround; | |
453 | |
454 for (i = 0; i < 256; i++) { | |
455 common = samples[i + 256] + bias; | |
456 surround = samples[i + 768] + samples[i + 1024]; | |
457 samples[i] += common - surround; | |
458 samples[i + 256] = samples[i + 512] + common + surround; | |
459 } | |
460 } | |
461 | |
462 static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) | |
463 { | |
464 int i; | |
465 | |
466 for (i = 0; i < 256; i++) | |
467 dest[i] = src[i] + src[i + 256] + bias; | |
468 } | |
469 | |
470 static void zero (sample_t * samples) | |
471 { | |
472 int i; | |
473 for (i = 0; i < 256; i++) | |
474 samples[i] = 0; | |
475 } | |
476 | |
3904 | 477 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
3394 | 478 sample_t clev, sample_t slev) |
479 { | |
480 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
481 | |
482 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
483 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
484 break; | |
485 | |
486 case CONVERT (A52_CHANNEL, A52_MONO): | |
487 case CONVERT (A52_STEREO, A52_MONO): | |
488 mix_2to1: | |
489 mix2to1 (samples, samples + 256, bias); | |
490 break; | |
491 | |
492 case CONVERT (A52_2F1R, A52_MONO): | |
493 if (slev == 0) | |
494 goto mix_2to1; | |
495 case CONVERT (A52_3F, A52_MONO): | |
496 mix_3to1: | |
497 mix3to1 (samples, bias); | |
498 break; | |
499 | |
500 case CONVERT (A52_3F1R, A52_MONO): | |
501 if (slev == 0) | |
502 goto mix_3to1; | |
503 case CONVERT (A52_2F2R, A52_MONO): | |
504 if (slev == 0) | |
505 goto mix_2to1; | |
506 mix4to1 (samples, bias); | |
507 break; | |
508 | |
509 case CONVERT (A52_3F2R, A52_MONO): | |
510 if (slev == 0) | |
511 goto mix_3to1; | |
512 mix5to1 (samples, bias); | |
513 break; | |
514 | |
515 case CONVERT (A52_MONO, A52_DOLBY): | |
516 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
517 break; | |
518 | |
519 case CONVERT (A52_3F, A52_STEREO): | |
520 case CONVERT (A52_3F, A52_DOLBY): | |
521 mix_3to2: | |
522 mix3to2 (samples, bias); | |
523 break; | |
524 | |
525 case CONVERT (A52_2F1R, A52_STEREO): | |
526 if (slev == 0) | |
527 break; | |
528 mix21to2 (samples, samples + 256, bias); | |
529 break; | |
530 | |
531 case CONVERT (A52_2F1R, A52_DOLBY): | |
532 mix21toS (samples, bias); | |
533 break; | |
534 | |
535 case CONVERT (A52_3F1R, A52_STEREO): | |
536 if (slev == 0) | |
537 goto mix_3to2; | |
538 mix31to2 (samples, bias); | |
539 break; | |
540 | |
541 case CONVERT (A52_3F1R, A52_DOLBY): | |
542 mix31toS (samples, bias); | |
543 break; | |
544 | |
545 case CONVERT (A52_2F2R, A52_STEREO): | |
546 if (slev == 0) | |
547 break; | |
548 mix2to1 (samples, samples + 512, bias); | |
549 mix2to1 (samples + 256, samples + 768, bias); | |
550 break; | |
551 | |
552 case CONVERT (A52_2F2R, A52_DOLBY): | |
553 mix22toS (samples, bias); | |
554 break; | |
555 | |
556 case CONVERT (A52_3F2R, A52_STEREO): | |
557 if (slev == 0) | |
558 goto mix_3to2; | |
559 mix32to2 (samples, bias); | |
560 break; | |
561 | |
562 case CONVERT (A52_3F2R, A52_DOLBY): | |
563 mix32toS (samples, bias); | |
564 break; | |
565 | |
566 case CONVERT (A52_3F1R, A52_3F): | |
567 if (slev == 0) | |
568 break; | |
569 mix21to2 (samples, samples + 512, bias); | |
570 break; | |
571 | |
572 case CONVERT (A52_3F2R, A52_3F): | |
573 if (slev == 0) | |
574 break; | |
575 mix2to1 (samples, samples + 768, bias); | |
576 mix2to1 (samples + 512, samples + 1024, bias); | |
577 break; | |
578 | |
579 case CONVERT (A52_3F1R, A52_2F1R): | |
580 mix3to2 (samples, bias); | |
581 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
582 break; | |
583 | |
584 case CONVERT (A52_2F2R, A52_2F1R): | |
585 mix2to1 (samples + 512, samples + 768, bias); | |
586 break; | |
587 | |
588 case CONVERT (A52_3F2R, A52_2F1R): | |
3678 | 589 mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) |
3394 | 590 move2to1 (samples + 768, samples + 512, bias); |
591 break; | |
592 | |
593 case CONVERT (A52_3F2R, A52_3F1R): | |
594 mix2to1 (samples + 768, samples + 1024, bias); | |
595 break; | |
596 | |
597 case CONVERT (A52_2F1R, A52_2F2R): | |
598 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
599 break; | |
600 | |
601 case CONVERT (A52_3F1R, A52_2F2R): | |
602 mix3to2 (samples, bias); | |
603 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
604 break; | |
605 | |
606 case CONVERT (A52_3F2R, A52_2F2R): | |
607 mix3to2 (samples, bias); | |
608 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
609 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
610 break; | |
611 | |
612 case CONVERT (A52_3F1R, A52_3F2R): | |
613 memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); | |
614 break; | |
615 } | |
616 } | |
617 | |
3904 | 618 static void upmix_C (sample_t * samples, int acmod, int output) |
3394 | 619 { |
620 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
621 | |
622 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
623 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
624 break; | |
625 | |
626 case CONVERT (A52_3F2R, A52_MONO): | |
627 zero (samples + 1024); | |
628 case CONVERT (A52_3F1R, A52_MONO): | |
629 case CONVERT (A52_2F2R, A52_MONO): | |
630 zero (samples + 768); | |
631 case CONVERT (A52_3F, A52_MONO): | |
632 case CONVERT (A52_2F1R, A52_MONO): | |
633 zero (samples + 512); | |
634 case CONVERT (A52_CHANNEL, A52_MONO): | |
635 case CONVERT (A52_STEREO, A52_MONO): | |
636 zero (samples + 256); | |
637 break; | |
638 | |
639 case CONVERT (A52_3F2R, A52_STEREO): | |
640 case CONVERT (A52_3F2R, A52_DOLBY): | |
641 zero (samples + 1024); | |
642 case CONVERT (A52_3F1R, A52_STEREO): | |
643 case CONVERT (A52_3F1R, A52_DOLBY): | |
644 zero (samples + 768); | |
645 case CONVERT (A52_3F, A52_STEREO): | |
646 case CONVERT (A52_3F, A52_DOLBY): | |
647 mix_3to2: | |
648 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
649 zero (samples + 256); | |
650 break; | |
651 | |
652 case CONVERT (A52_2F2R, A52_STEREO): | |
653 case CONVERT (A52_2F2R, A52_DOLBY): | |
654 zero (samples + 768); | |
655 case CONVERT (A52_2F1R, A52_STEREO): | |
656 case CONVERT (A52_2F1R, A52_DOLBY): | |
657 zero (samples + 512); | |
658 break; | |
659 | |
660 case CONVERT (A52_3F2R, A52_3F): | |
661 zero (samples + 1024); | |
662 case CONVERT (A52_3F1R, A52_3F): | |
663 case CONVERT (A52_2F2R, A52_2F1R): | |
664 zero (samples + 768); | |
665 break; | |
666 | |
667 case CONVERT (A52_3F2R, A52_3F1R): | |
668 zero (samples + 1024); | |
669 break; | |
670 | |
671 case CONVERT (A52_3F2R, A52_2F1R): | |
672 zero (samples + 1024); | |
673 case CONVERT (A52_3F1R, A52_2F1R): | |
674 mix_31to21: | |
675 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
676 goto mix_3to2; | |
677 | |
678 case CONVERT (A52_3F2R, A52_2F2R): | |
679 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
680 goto mix_31to21; | |
681 } | |
682 } | |
3904 | 683 |
684 #ifdef ARCH_X86 | |
685 static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) | |
686 { | |
687 asm volatile( | |
688 "movlps %2, %%xmm7 \n\t" | |
689 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
690 "movl $-1024, %%esi \n\t" | |
4233 | 691 ".balign 16\n\t" |
3904 | 692 "1: \n\t" |
693 "movaps (%0, %%esi), %%xmm0 \n\t" | |
694 "movaps 16(%0, %%esi), %%xmm1 \n\t" | |
695 "addps (%1, %%esi), %%xmm0 \n\t" | |
696 "addps 16(%1, %%esi), %%xmm1 \n\t" | |
697 "addps %%xmm7, %%xmm0 \n\t" | |
698 "addps %%xmm7, %%xmm1 \n\t" | |
699 "movaps %%xmm0, (%1, %%esi) \n\t" | |
700 "movaps %%xmm1, 16(%1, %%esi) \n\t" | |
701 "addl $32, %%esi \n\t" | |
702 " jnz 1b \n\t" | |
703 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
704 : "%esi" | |
705 ); | |
706 } | |
707 | |
708 static void mix3to1_SSE (sample_t * samples, sample_t bias) | |
709 { | |
710 asm volatile( | |
711 "movlps %1, %%xmm7 \n\t" | |
712 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
713 "movl $-1024, %%esi \n\t" | |
4233 | 714 ".balign 16\n\t" |
3904 | 715 "1: \n\t" |
716 "movaps (%0, %%esi), %%xmm0 \n\t" | |
717 "movaps 1024(%0, %%esi), %%xmm1 \n\t" | |
718 "addps 2048(%0, %%esi), %%xmm0 \n\t" | |
719 "addps %%xmm7, %%xmm1 \n\t" | |
720 "addps %%xmm1, %%xmm0 \n\t" | |
721 "movaps %%xmm0, (%0, %%esi) \n\t" | |
722 "addl $16, %%esi \n\t" | |
723 " jnz 1b \n\t" | |
724 :: "r" (samples+256), "m" (bias) | |
725 : "%esi" | |
726 ); | |
727 } | |
728 | |
729 static void mix4to1_SSE (sample_t * samples, sample_t bias) | |
730 { | |
731 asm volatile( | |
732 "movlps %1, %%xmm7 \n\t" | |
733 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
734 "movl $-1024, %%esi \n\t" | |
4233 | 735 ".balign 16\n\t" |
3904 | 736 "1: \n\t" |
737 "movaps (%0, %%esi), %%xmm0 \n\t" | |
738 "movaps 1024(%0, %%esi), %%xmm1 \n\t" | |
739 "addps 2048(%0, %%esi), %%xmm0 \n\t" | |
740 "addps 3072(%0, %%esi), %%xmm1 \n\t" | |
741 "addps %%xmm7, %%xmm0 \n\t" | |
742 "addps %%xmm1, %%xmm0 \n\t" | |
743 "movaps %%xmm0, (%0, %%esi) \n\t" | |
744 "addl $16, %%esi \n\t" | |
745 " jnz 1b \n\t" | |
746 :: "r" (samples+256), "m" (bias) | |
747 : "%esi" | |
748 ); | |
749 } | |
750 | |
751 static void mix5to1_SSE (sample_t * samples, sample_t bias) | |
752 { | |
753 asm volatile( | |
754 "movlps %1, %%xmm7 \n\t" | |
755 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
756 "movl $-1024, %%esi \n\t" | |
4233 | 757 ".balign 16\n\t" |
3904 | 758 "1: \n\t" |
759 "movaps (%0, %%esi), %%xmm0 \n\t" | |
760 "movaps 1024(%0, %%esi), %%xmm1 \n\t" | |
761 "addps 2048(%0, %%esi), %%xmm0 \n\t" | |
762 "addps 3072(%0, %%esi), %%xmm1 \n\t" | |
763 "addps %%xmm7, %%xmm0 \n\t" | |
764 "addps 4096(%0, %%esi), %%xmm1 \n\t" | |
765 "addps %%xmm1, %%xmm0 \n\t" | |
766 "movaps %%xmm0, (%0, %%esi) \n\t" | |
767 "addl $16, %%esi \n\t" | |
768 " jnz 1b \n\t" | |
769 :: "r" (samples+256), "m" (bias) | |
770 : "%esi" | |
771 ); | |
772 } | |
773 | |
774 static void mix3to2_SSE (sample_t * samples, sample_t bias) | |
775 { | |
776 asm volatile( | |
777 "movlps %1, %%xmm7 \n\t" | |
778 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
779 "movl $-1024, %%esi \n\t" | |
4233 | 780 ".balign 16\n\t" |
3904 | 781 "1: \n\t" |
782 "movaps 1024(%0, %%esi), %%xmm0 \n\t" | |
783 "addps %%xmm7, %%xmm0 \n\t" //common | |
784 "movaps (%0, %%esi), %%xmm1 \n\t" | |
785 "movaps 2048(%0, %%esi), %%xmm2 \n\t" | |
786 "addps %%xmm0, %%xmm1 \n\t" | |
787 "addps %%xmm0, %%xmm2 \n\t" | |
788 "movaps %%xmm1, (%0, %%esi) \n\t" | |
789 "movaps %%xmm2, 1024(%0, %%esi) \n\t" | |
790 "addl $16, %%esi \n\t" | |
791 " jnz 1b \n\t" | |
792 :: "r" (samples+256), "m" (bias) | |
793 : "%esi" | |
794 ); | |
795 } | |
796 | |
797 static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) | |
798 { | |
799 asm volatile( | |
800 "movlps %2, %%xmm7 \n\t" | |
801 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
802 "movl $-1024, %%esi \n\t" | |
4233 | 803 ".balign 16\n\t" |
3904 | 804 "1: \n\t" |
805 "movaps 1024(%1, %%esi), %%xmm0 \n\t" | |
806 "addps %%xmm7, %%xmm0 \n\t" //common | |
807 "movaps (%0, %%esi), %%xmm1 \n\t" | |
808 "movaps (%1, %%esi), %%xmm2 \n\t" | |
809 "addps %%xmm0, %%xmm1 \n\t" | |
810 "addps %%xmm0, %%xmm2 \n\t" | |
811 "movaps %%xmm1, (%0, %%esi) \n\t" | |
812 "movaps %%xmm2, (%1, %%esi) \n\t" | |
813 "addl $16, %%esi \n\t" | |
814 " jnz 1b \n\t" | |
815 :: "r" (left+256), "r" (right+256), "m" (bias) | |
816 : "%esi" | |
817 ); | |
818 } | |
819 | |
820 static void mix21toS_SSE (sample_t * samples, sample_t bias) | |
821 { | |
822 asm volatile( | |
823 "movlps %1, %%xmm7 \n\t" | |
824 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
825 "movl $-1024, %%esi \n\t" | |
4233 | 826 ".balign 16\n\t" |
3904 | 827 "1: \n\t" |
828 "movaps 2048(%0, %%esi), %%xmm0 \n\t" // surround | |
829 "movaps (%0, %%esi), %%xmm1 \n\t" | |
830 "movaps 1024(%0, %%esi), %%xmm2 \n\t" | |
831 "addps %%xmm7, %%xmm1 \n\t" | |
832 "addps %%xmm7, %%xmm2 \n\t" | |
833 "subps %%xmm0, %%xmm1 \n\t" | |
834 "addps %%xmm0, %%xmm2 \n\t" | |
835 "movaps %%xmm1, (%0, %%esi) \n\t" | |
836 "movaps %%xmm2, 1024(%0, %%esi) \n\t" | |
837 "addl $16, %%esi \n\t" | |
838 " jnz 1b \n\t" | |
839 :: "r" (samples+256), "m" (bias) | |
840 : "%esi" | |
841 ); | |
842 } | |
843 | |
844 static void mix31to2_SSE (sample_t * samples, sample_t bias) | |
845 { | |
846 asm volatile( | |
847 "movlps %1, %%xmm7 \n\t" | |
848 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
849 "movl $-1024, %%esi \n\t" | |
4233 | 850 ".balign 16\n\t" |
3904 | 851 "1: \n\t" |
852 "movaps 1024(%0, %%esi), %%xmm0 \n\t" | |
853 "addps 3072(%0, %%esi), %%xmm0 \n\t" | |
854 "addps %%xmm7, %%xmm0 \n\t" // common | |
855 "movaps (%0, %%esi), %%xmm1 \n\t" | |
856 "movaps 2048(%0, %%esi), %%xmm2 \n\t" | |
857 "addps %%xmm0, %%xmm1 \n\t" | |
858 "addps %%xmm0, %%xmm2 \n\t" | |
859 "movaps %%xmm1, (%0, %%esi) \n\t" | |
860 "movaps %%xmm2, 1024(%0, %%esi) \n\t" | |
861 "addl $16, %%esi \n\t" | |
862 " jnz 1b \n\t" | |
863 :: "r" (samples+256), "m" (bias) | |
864 : "%esi" | |
865 ); | |
866 } | |
867 | |
868 static void mix31toS_SSE (sample_t * samples, sample_t bias) | |
869 { | |
870 asm volatile( | |
871 "movlps %1, %%xmm7 \n\t" | |
872 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
873 "movl $-1024, %%esi \n\t" | |
4233 | 874 ".balign 16\n\t" |
3904 | 875 "1: \n\t" |
876 "movaps 1024(%0, %%esi), %%xmm0 \n\t" | |
877 "movaps 3072(%0, %%esi), %%xmm3 \n\t" // surround | |
878 "addps %%xmm7, %%xmm0 \n\t" // common | |
879 "movaps (%0, %%esi), %%xmm1 \n\t" | |
880 "movaps 2048(%0, %%esi), %%xmm2 \n\t" | |
881 "addps %%xmm0, %%xmm1 \n\t" | |
882 "addps %%xmm0, %%xmm2 \n\t" | |
883 "subps %%xmm3, %%xmm1 \n\t" | |
884 "addps %%xmm3, %%xmm2 \n\t" | |
885 "movaps %%xmm1, (%0, %%esi) \n\t" | |
886 "movaps %%xmm2, 1024(%0, %%esi) \n\t" | |
887 "addl $16, %%esi \n\t" | |
888 " jnz 1b \n\t" | |
889 :: "r" (samples+256), "m" (bias) | |
890 : "%esi" | |
891 ); | |
892 } | |
893 | |
894 static void mix22toS_SSE (sample_t * samples, sample_t bias) | |
895 { | |
896 asm volatile( | |
897 "movlps %1, %%xmm7 \n\t" | |
898 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
899 "movl $-1024, %%esi \n\t" | |
4233 | 900 ".balign 16\n\t" |
3904 | 901 "1: \n\t" |
902 "movaps 2048(%0, %%esi), %%xmm0 \n\t" | |
903 "addps 3072(%0, %%esi), %%xmm0 \n\t" // surround | |
904 "movaps (%0, %%esi), %%xmm1 \n\t" | |
905 "movaps 1024(%0, %%esi), %%xmm2 \n\t" | |
906 "addps %%xmm7, %%xmm1 \n\t" | |
907 "addps %%xmm7, %%xmm2 \n\t" | |
908 "subps %%xmm0, %%xmm1 \n\t" | |
909 "addps %%xmm0, %%xmm2 \n\t" | |
910 "movaps %%xmm1, (%0, %%esi) \n\t" | |
911 "movaps %%xmm2, 1024(%0, %%esi) \n\t" | |
912 "addl $16, %%esi \n\t" | |
913 " jnz 1b \n\t" | |
914 :: "r" (samples+256), "m" (bias) | |
915 : "%esi" | |
916 ); | |
917 } | |
918 | |
919 static void mix32to2_SSE (sample_t * samples, sample_t bias) | |
920 { | |
921 asm volatile( | |
922 "movlps %1, %%xmm7 \n\t" | |
923 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
924 "movl $-1024, %%esi \n\t" | |
4233 | 925 ".balign 16\n\t" |
3904 | 926 "1: \n\t" |
927 "movaps 1024(%0, %%esi), %%xmm0 \n\t" | |
928 "addps %%xmm7, %%xmm0 \n\t" // common | |
929 "movaps %%xmm0, %%xmm1 \n\t" // common | |
930 "addps (%0, %%esi), %%xmm0 \n\t" | |
931 "addps 2048(%0, %%esi), %%xmm1 \n\t" | |
932 "addps 3072(%0, %%esi), %%xmm0 \n\t" | |
933 "addps 4096(%0, %%esi), %%xmm1 \n\t" | |
934 "movaps %%xmm0, (%0, %%esi) \n\t" | |
935 "movaps %%xmm1, 1024(%0, %%esi) \n\t" | |
936 "addl $16, %%esi \n\t" | |
937 " jnz 1b \n\t" | |
938 :: "r" (samples+256), "m" (bias) | |
939 : "%esi" | |
940 ); | |
941 } | |
942 | |
943 static void mix32toS_SSE (sample_t * samples, sample_t bias) | |
944 { | |
945 asm volatile( | |
946 "movlps %1, %%xmm7 \n\t" | |
947 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
948 "movl $-1024, %%esi \n\t" | |
4233 | 949 ".balign 16\n\t" |
3904 | 950 "1: \n\t" |
951 "movaps 1024(%0, %%esi), %%xmm0 \n\t" | |
952 "movaps 3072(%0, %%esi), %%xmm2 \n\t" | |
953 "addps %%xmm7, %%xmm0 \n\t" // common | |
954 "addps 4096(%0, %%esi), %%xmm2 \n\t" // surround | |
955 "movaps (%0, %%esi), %%xmm1 \n\t" | |
956 "movaps 2048(%0, %%esi), %%xmm3 \n\t" | |
957 "subps %%xmm2, %%xmm1 \n\t" | |
958 "addps %%xmm2, %%xmm3 \n\t" | |
959 "addps %%xmm0, %%xmm1 \n\t" | |
960 "addps %%xmm0, %%xmm3 \n\t" | |
961 "movaps %%xmm1, (%0, %%esi) \n\t" | |
962 "movaps %%xmm3, 1024(%0, %%esi) \n\t" | |
963 "addl $16, %%esi \n\t" | |
964 " jnz 1b \n\t" | |
965 :: "r" (samples+256), "m" (bias) | |
966 : "%esi" | |
967 ); | |
968 } | |
969 | |
970 static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) | |
971 { | |
972 asm volatile( | |
973 "movlps %2, %%xmm7 \n\t" | |
974 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
975 "movl $-1024, %%esi \n\t" | |
4233 | 976 ".balign 16\n\t" |
3904 | 977 "1: \n\t" |
978 "movaps (%0, %%esi), %%xmm0 \n\t" | |
979 "movaps 16(%0, %%esi), %%xmm1 \n\t" | |
980 "addps 1024(%0, %%esi), %%xmm0 \n\t" | |
981 "addps 1040(%0, %%esi), %%xmm1 \n\t" | |
982 "addps %%xmm7, %%xmm0 \n\t" | |
983 "addps %%xmm7, %%xmm1 \n\t" | |
984 "movaps %%xmm0, (%1, %%esi) \n\t" | |
985 "movaps %%xmm1, 16(%1, %%esi) \n\t" | |
986 "addl $32, %%esi \n\t" | |
987 " jnz 1b \n\t" | |
988 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
989 : "%esi" | |
990 ); | |
991 } | |
992 | |
993 static void zero_MMX(sample_t * samples) | |
994 { | |
995 asm volatile( | |
996 "movl $-1024, %%esi \n\t" | |
997 "pxor %%mm0, %%mm0 \n\t" | |
4233 | 998 ".balign 16\n\t" |
3904 | 999 "1: \n\t" |
1000 "movq %%mm0, (%0, %%esi) \n\t" | |
1001 "movq %%mm0, 8(%0, %%esi) \n\t" | |
1002 "movq %%mm0, 16(%0, %%esi) \n\t" | |
1003 "movq %%mm0, 24(%0, %%esi) \n\t" | |
1004 "addl $32, %%esi \n\t" | |
1005 " jnz 1b \n\t" | |
1006 "emms" | |
1007 :: "r" (samples+256) | |
1008 : "%esi" | |
1009 ); | |
1010 } | |
1011 | |
4233 | 1012 /* |
1013 I hope dest and src will be at least 8 byte aligned and size | |
1014 will devide on 8 without remain | |
1015 Note: untested and unused. | |
1016 */ | |
1017 static void copy_MMX(void *dest,const void *src,unsigned size) | |
1018 { | |
1019 unsigned i; | |
1020 size /= 64; | |
1021 for(i=0;i<size;i++) | |
1022 { | |
1023 __asm __volatile( | |
1024 "movq %0, %%mm0\n\t" | |
1025 "movq 8%0, %%mm1\n\t" | |
1026 "movq 16%0, %%mm2\n\t" | |
1027 "movq 24%0, %%mm3\n\t" | |
1028 "movq 32%0, %%mm4\n\t" | |
1029 "movq 40%0, %%mm5\n\t" | |
1030 "movq 48%0, %%mm6\n\t" | |
1031 "movq 56%0, %%mm7\n\t" | |
1032 "movq %%mm0, %1\n\t" | |
1033 "movq %%mm1, 8%1\n\t" | |
1034 "movq %%mm2, 16%1\n\t" | |
1035 "movq %%mm3, 24%1\n\t" | |
1036 "movq %%mm4, 32%1\n\t" | |
1037 "movq %%mm5, 40%1\n\t" | |
1038 "movq %%mm6, 48%1\n\t" | |
1039 "movq %%mm7, 56%1\n\t" | |
1040 : | |
1041 :"m"(src),"m"(dest)); | |
1042 } | |
1043 } | |
3904 | 1044 |
1045 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
1046 sample_t clev, sample_t slev) | |
1047 { | |
1048 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1049 | |
1050 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1051 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1052 break; | |
1053 | |
1054 case CONVERT (A52_CHANNEL, A52_MONO): | |
1055 case CONVERT (A52_STEREO, A52_MONO): | |
1056 mix_2to1_SSE: | |
1057 mix2to1_SSE (samples, samples + 256, bias); | |
1058 break; | |
1059 | |
1060 case CONVERT (A52_2F1R, A52_MONO): | |
1061 if (slev == 0) | |
1062 goto mix_2to1_SSE; | |
1063 case CONVERT (A52_3F, A52_MONO): | |
1064 mix_3to1_SSE: | |
1065 mix3to1_SSE (samples, bias); | |
1066 break; | |
1067 | |
1068 case CONVERT (A52_3F1R, A52_MONO): | |
1069 if (slev == 0) | |
1070 goto mix_3to1_SSE; | |
1071 case CONVERT (A52_2F2R, A52_MONO): | |
1072 if (slev == 0) | |
1073 goto mix_2to1_SSE; | |
1074 mix4to1_SSE (samples, bias); | |
1075 break; | |
1076 | |
1077 case CONVERT (A52_3F2R, A52_MONO): | |
1078 if (slev == 0) | |
1079 goto mix_3to1_SSE; | |
1080 mix5to1_SSE (samples, bias); | |
1081 break; | |
1082 | |
1083 case CONVERT (A52_MONO, A52_DOLBY): | |
1084 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1085 break; | |
1086 | |
1087 case CONVERT (A52_3F, A52_STEREO): | |
1088 case CONVERT (A52_3F, A52_DOLBY): | |
1089 mix_3to2_SSE: | |
1090 mix3to2_SSE (samples, bias); | |
1091 break; | |
1092 | |
1093 case CONVERT (A52_2F1R, A52_STEREO): | |
1094 if (slev == 0) | |
1095 break; | |
1096 mix21to2_SSE (samples, samples + 256, bias); | |
1097 break; | |
1098 | |
1099 case CONVERT (A52_2F1R, A52_DOLBY): | |
1100 mix21toS_SSE (samples, bias); | |
1101 break; | |
1102 | |
1103 case CONVERT (A52_3F1R, A52_STEREO): | |
1104 if (slev == 0) | |
1105 goto mix_3to2_SSE; | |
1106 mix31to2_SSE (samples, bias); | |
1107 break; | |
1108 | |
1109 case CONVERT (A52_3F1R, A52_DOLBY): | |
1110 mix31toS_SSE (samples, bias); | |
1111 break; | |
1112 | |
1113 case CONVERT (A52_2F2R, A52_STEREO): | |
1114 if (slev == 0) | |
1115 break; | |
1116 mix2to1_SSE (samples, samples + 512, bias); | |
1117 mix2to1_SSE (samples + 256, samples + 768, bias); | |
1118 break; | |
1119 | |
1120 case CONVERT (A52_2F2R, A52_DOLBY): | |
1121 mix22toS_SSE (samples, bias); | |
1122 break; | |
1123 | |
1124 case CONVERT (A52_3F2R, A52_STEREO): | |
1125 if (slev == 0) | |
1126 goto mix_3to2_SSE; | |
1127 mix32to2_SSE (samples, bias); | |
1128 break; | |
1129 | |
1130 case CONVERT (A52_3F2R, A52_DOLBY): | |
1131 mix32toS_SSE (samples, bias); | |
1132 break; | |
1133 | |
1134 case CONVERT (A52_3F1R, A52_3F): | |
1135 if (slev == 0) | |
1136 break; | |
1137 mix21to2_SSE (samples, samples + 512, bias); | |
1138 break; | |
1139 | |
1140 case CONVERT (A52_3F2R, A52_3F): | |
1141 if (slev == 0) | |
1142 break; | |
1143 mix2to1_SSE (samples, samples + 768, bias); | |
1144 mix2to1_SSE (samples + 512, samples + 1024, bias); | |
1145 break; | |
1146 | |
1147 case CONVERT (A52_3F1R, A52_2F1R): | |
1148 mix3to2_SSE (samples, bias); | |
1149 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1150 break; | |
1151 | |
1152 case CONVERT (A52_2F2R, A52_2F1R): | |
1153 mix2to1_SSE (samples + 512, samples + 768, bias); | |
1154 break; | |
1155 | |
1156 case CONVERT (A52_3F2R, A52_2F1R): | |
1157 mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1158 move2to1_SSE (samples + 768, samples + 512, bias); | |
1159 break; | |
1160 | |
1161 case CONVERT (A52_3F2R, A52_3F1R): | |
1162 mix2to1_SSE (samples + 768, samples + 1024, bias); | |
1163 break; | |
1164 | |
1165 case CONVERT (A52_2F1R, A52_2F2R): | |
1166 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1167 break; | |
1168 | |
1169 case CONVERT (A52_3F1R, A52_2F2R): | |
1170 mix3to2_SSE (samples, bias); | |
1171 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1172 break; | |
1173 | |
1174 case CONVERT (A52_3F2R, A52_2F2R): | |
1175 mix3to2_SSE (samples, bias); | |
1176 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1177 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1178 break; | |
1179 | |
1180 case CONVERT (A52_3F1R, A52_3F2R): | |
1181 memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); | |
1182 break; | |
1183 } | |
1184 } | |
1185 | |
1186 static void upmix_MMX (sample_t * samples, int acmod, int output) | |
1187 { | |
1188 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1189 | |
1190 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1191 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1192 break; | |
1193 | |
1194 case CONVERT (A52_3F2R, A52_MONO): | |
1195 zero_MMX (samples + 1024); | |
1196 case CONVERT (A52_3F1R, A52_MONO): | |
1197 case CONVERT (A52_2F2R, A52_MONO): | |
1198 zero_MMX (samples + 768); | |
1199 case CONVERT (A52_3F, A52_MONO): | |
1200 case CONVERT (A52_2F1R, A52_MONO): | |
1201 zero_MMX (samples + 512); | |
1202 case CONVERT (A52_CHANNEL, A52_MONO): | |
1203 case CONVERT (A52_STEREO, A52_MONO): | |
1204 zero_MMX (samples + 256); | |
1205 break; | |
1206 | |
1207 case CONVERT (A52_3F2R, A52_STEREO): | |
1208 case CONVERT (A52_3F2R, A52_DOLBY): | |
1209 zero_MMX (samples + 1024); | |
1210 case CONVERT (A52_3F1R, A52_STEREO): | |
1211 case CONVERT (A52_3F1R, A52_DOLBY): | |
1212 zero_MMX (samples + 768); | |
1213 case CONVERT (A52_3F, A52_STEREO): | |
1214 case CONVERT (A52_3F, A52_DOLBY): | |
1215 mix_3to2_MMX: | |
1216 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
1217 zero_MMX (samples + 256); | |
1218 break; | |
1219 | |
1220 case CONVERT (A52_2F2R, A52_STEREO): | |
1221 case CONVERT (A52_2F2R, A52_DOLBY): | |
1222 zero_MMX (samples + 768); | |
1223 case CONVERT (A52_2F1R, A52_STEREO): | |
1224 case CONVERT (A52_2F1R, A52_DOLBY): | |
1225 zero_MMX (samples + 512); | |
1226 break; | |
1227 | |
1228 case CONVERT (A52_3F2R, A52_3F): | |
1229 zero_MMX (samples + 1024); | |
1230 case CONVERT (A52_3F1R, A52_3F): | |
1231 case CONVERT (A52_2F2R, A52_2F1R): | |
1232 zero_MMX (samples + 768); | |
1233 break; | |
1234 | |
1235 case CONVERT (A52_3F2R, A52_3F1R): | |
1236 zero_MMX (samples + 1024); | |
1237 break; | |
1238 | |
1239 case CONVERT (A52_3F2R, A52_2F1R): | |
1240 zero_MMX (samples + 1024); | |
1241 case CONVERT (A52_3F1R, A52_2F1R): | |
1242 mix_31to21_MMX: | |
1243 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1244 goto mix_3to2_MMX; | |
1245 | |
1246 case CONVERT (A52_3F2R, A52_2F2R): | |
1247 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
1248 goto mix_31to21_MMX; | |
1249 } | |
1250 } | |
4233 | 1251 |
1252 static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) | |
1253 { | |
1254 asm volatile( | |
1255 "movd %2, %%mm7 \n\t" | |
1256 "punpckldq %2, %%mm7 \n\t" | |
1257 "movl $-1024, %%esi \n\t" | |
1258 ".balign 16\n\t" | |
1259 "1: \n\t" | |
1260 "movq (%0, %%esi), %%mm0 \n\t" | |
1261 "movq 8(%0, %%esi), %%mm1 \n\t" | |
1262 "movq 16(%0, %%esi), %%mm2 \n\t" | |
1263 "movq 24(%0, %%esi), %%mm3 \n\t" | |
1264 "pfadd (%1, %%esi), %%mm0 \n\t" | |
1265 "pfadd 8(%1, %%esi), %%mm1 \n\t" | |
1266 "pfadd 16(%1, %%esi), %%mm2 \n\t" | |
1267 "pfadd 24(%1, %%esi), %%mm3 \n\t" | |
1268 "pfadd %%mm7, %%mm0 \n\t" | |
1269 "pfadd %%mm7, %%mm1 \n\t" | |
1270 "pfadd %%mm7, %%mm2 \n\t" | |
1271 "pfadd %%mm7, %%mm3 \n\t" | |
1272 "movq %%mm0, (%1, %%esi) \n\t" | |
1273 "movq %%mm1, 8(%1, %%esi) \n\t" | |
1274 "movq %%mm2, 16(%1, %%esi) \n\t" | |
1275 "movq %%mm3, 24(%1, %%esi) \n\t" | |
1276 "addl $32, %%esi \n\t" | |
1277 " jnz 1b \n\t" | |
1278 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
1279 : "%esi" | |
1280 ); | |
1281 } | |
1282 | |
1283 static void mix3to1_3dnow (sample_t * samples, sample_t bias) | |
1284 { | |
1285 asm volatile( | |
1286 "movd %1, %%mm7 \n\t" | |
1287 "punpckldq %1, %%mm7 \n\t" | |
1288 "movl $-1024, %%esi \n\t" | |
1289 ".balign 16\n\t" | |
1290 "1: \n\t" | |
1291 "movq (%0, %%esi), %%mm0 \n\t" | |
1292 "movq 8(%0, %%esi), %%mm1 \n\t" | |
1293 "movq 1024(%0, %%esi), %%mm2 \n\t" | |
1294 "movq 1032(%0, %%esi), %%mm3 \n\t" | |
1295 "pfadd 2048(%0, %%esi), %%mm0 \n\t" | |
1296 "pfadd 2056(%0, %%esi), %%mm1 \n\t" | |
1297 "pfadd %%mm7, %%mm0 \n\t" | |
1298 "pfadd %%mm7, %%mm1 \n\t" | |
1299 "pfadd %%mm2, %%mm0 \n\t" | |
1300 "pfadd %%mm3, %%mm1 \n\t" | |
1301 "movq %%mm0, (%0, %%esi) \n\t" | |
1302 "movq %%mm1, 8(%0, %%esi) \n\t" | |
1303 "addl $16, %%esi \n\t" | |
1304 " jnz 1b \n\t" | |
1305 :: "r" (samples+256), "m" (bias) | |
1306 : "%esi" | |
1307 ); | |
1308 } | |
1309 | |
1310 static void mix4to1_3dnow (sample_t * samples, sample_t bias) | |
1311 { | |
1312 asm volatile( | |
1313 "movd %1, %%mm7 \n\t" | |
1314 "punpckldq %1, %%mm7 \n\t" | |
1315 "movl $-1024, %%esi \n\t" | |
1316 ".balign 16\n\t" | |
1317 "1: \n\t" | |
1318 "movq (%0, %%esi), %%mm0 \n\t" | |
1319 "movq 8(%0, %%esi), %%mm1 \n\t" | |
1320 "movq 1024(%0, %%esi), %%mm2 \n\t" | |
1321 "movq 1032(%0, %%esi), %%mm3 \n\t" | |
1322 "pfadd 2048(%0, %%esi), %%mm0 \n\t" | |
1323 "pfadd 2056(%0, %%esi), %%mm1 \n\t" | |
1324 "pfadd 3072(%0, %%esi), %%mm2 \n\t" | |
1325 "pfadd 3080(%0, %%esi), %%mm3 \n\t" | |
1326 "pfadd %%mm7, %%mm0 \n\t" | |
1327 "pfadd %%mm7, %%mm1 \n\t" | |
1328 "pfadd %%mm2, %%mm0 \n\t" | |
1329 "pfadd %%mm3, %%mm1 \n\t" | |
1330 "movq %%mm0, (%0, %%esi) \n\t" | |
1331 "movq %%mm1, 8(%0, %%esi) \n\t" | |
1332 "addl $16, %%esi \n\t" | |
1333 " jnz 1b \n\t" | |
1334 :: "r" (samples+256), "m" (bias) | |
1335 : "%esi" | |
1336 ); | |
1337 } | |
1338 | |
1339 static void mix5to1_3dnow (sample_t * samples, sample_t bias) | |
1340 { | |
1341 asm volatile( | |
1342 "movd %1, %%mm7 \n\t" | |
1343 "punpckldq %1, %%mm7 \n\t" | |
1344 "movl $-1024, %%esi \n\t" | |
1345 ".balign 16\n\t" | |
1346 "1: \n\t" | |
1347 "movq (%0, %%esi), %%mm0 \n\t" | |
1348 "movq 8(%0, %%esi), %%mm1 \n\t" | |
1349 "movq 1024(%0, %%esi), %%mm2 \n\t" | |
1350 "movq 1032(%0, %%esi), %%mm3 \n\t" | |
1351 "pfadd 2048(%0, %%esi), %%mm0 \n\t" | |
1352 "pfadd 2056(%0, %%esi), %%mm1 \n\t" | |
1353 "pfadd 3072(%0, %%esi), %%mm2 \n\t" | |
1354 "pfadd 3080(%0, %%esi), %%mm3 \n\t" | |
1355 "pfadd %%mm7, %%mm0 \n\t" | |
1356 "pfadd %%mm7, %%mm1 \n\t" | |
1357 "pfadd 4096(%0, %%esi), %%mm2 \n\t" | |
1358 "pfadd 4104(%0, %%esi), %%mm3 \n\t" | |
1359 "pfadd %%mm2, %%mm0 \n\t" | |
1360 "pfadd %%mm3, %%mm1 \n\t" | |
1361 "movq %%mm0, (%0, %%esi) \n\t" | |
1362 "movq %%mm1, 8(%0, %%esi) \n\t" | |
1363 "addl $16, %%esi \n\t" | |
1364 " jnz 1b \n\t" | |
1365 :: "r" (samples+256), "m" (bias) | |
1366 : "%esi" | |
1367 ); | |
1368 } | |
1369 | |
1370 static void mix3to2_3dnow (sample_t * samples, sample_t bias) | |
1371 { | |
1372 asm volatile( | |
1373 "movd %1, %%mm7 \n\t" | |
1374 "punpckldq %1, %%mm7 \n\t" | |
1375 "movl $-1024, %%esi \n\t" | |
1376 ".balign 16\n\t" | |
1377 "1: \n\t" | |
1378 "movq 1024(%0, %%esi), %%mm0 \n\t" | |
1379 "movq 1032(%0, %%esi), %%mm1 \n\t" | |
1380 "pfadd %%mm7, %%mm0 \n\t" //common | |
1381 "pfadd %%mm7, %%mm1 \n\t" //common | |
1382 "movq (%0, %%esi), %%mm2 \n\t" | |
1383 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1384 "movq 2048(%0, %%esi), %%mm4 \n\t" | |
1385 "movq 2056(%0, %%esi), %%mm5 \n\t" | |
1386 "pfadd %%mm0, %%mm2 \n\t" | |
1387 "pfadd %%mm0, %%mm3 \n\t" | |
1388 "pfadd %%mm0, %%mm4 \n\t" | |
1389 "pfadd %%mm0, %%mm5 \n\t" | |
1390 "movq %%mm2, (%0, %%esi) \n\t" | |
1391 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1392 "movq %%mm4, 1024(%0, %%esi) \n\t" | |
1393 "movq %%mm5, 1032(%0, %%esi) \n\t" | |
1394 "addl $16, %%esi \n\t" | |
1395 " jnz 1b \n\t" | |
1396 :: "r" (samples+256), "m" (bias) | |
1397 : "%esi" | |
1398 ); | |
1399 } | |
1400 | |
1401 static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) | |
1402 { | |
1403 asm volatile( | |
1404 "movd %2, %%mm7 \n\t" | |
1405 "punpckldq %2, %%mm7 \n\t" | |
1406 "movl $-1024, %%esi \n\t" | |
1407 ".balign 16\n\t" | |
1408 "1: \n\t" | |
1409 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
1410 "movq 1032(%1, %%esi), %%mm1 \n\t" | |
1411 "pfadd %%mm7, %%mm0 \n\t" //common | |
1412 "pfadd %%mm7, %%mm1 \n\t" //common | |
1413 "movq (%0, %%esi), %%mm2 \n\t" | |
1414 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1415 "movq (%1, %%esi), %%mm4 \n\t" | |
1416 "movq 8(%1, %%esi), %%mm5 \n\t" | |
1417 "pfadd %%mm0, %%mm2 \n\t" | |
1418 "pfadd %%mm1, %%mm3 \n\t" | |
1419 "pfadd %%mm0, %%mm4 \n\t" | |
1420 "pfadd %%mm1, %%mm5 \n\t" | |
1421 "movq %%mm2, (%0, %%esi) \n\t" | |
1422 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1423 "movq %%mm4, (%1, %%esi) \n\t" | |
1424 "movq %%mm5, 8(%1, %%esi) \n\t" | |
1425 "addl $16, %%esi \n\t" | |
1426 " jnz 1b \n\t" | |
1427 :: "r" (left+256), "r" (right+256), "m" (bias) | |
1428 : "%esi" | |
1429 ); | |
1430 } | |
1431 | |
1432 static void mix21toS_3dnow (sample_t * samples, sample_t bias) | |
1433 { | |
1434 asm volatile( | |
1435 "movd %1, %%mm7 \n\t" | |
1436 "punpckldq %1, %%mm7 \n\t" | |
1437 "movl $-1024, %%esi \n\t" | |
1438 ".balign 16\n\t" | |
1439 "1: \n\t" | |
1440 "movq 2048(%0, %%esi), %%mm0 \n\t" // surround | |
1441 "movq 2056(%0, %%esi), %%mm1 \n\t" // surround | |
1442 "movq (%0, %%esi), %%mm2 \n\t" | |
1443 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1444 "movq 1024(%0, %%esi), %%mm4 \n\t" | |
1445 "movq 1032(%0, %%esi), %%mm5 \n\t" | |
1446 "pfadd %%mm7, %%mm2 \n\t" | |
1447 "pfadd %%mm7, %%mm3 \n\t" | |
1448 "pfadd %%mm7, %%mm4 \n\t" | |
1449 "pfadd %%mm7, %%mm5 \n\t" | |
1450 "pfsub %%mm0, %%mm2 \n\t" | |
1451 "pfsub %%mm1, %%mm3 \n\t" | |
1452 "pfadd %%mm0, %%mm4 \n\t" | |
1453 "pfadd %%mm1, %%mm5 \n\t" | |
1454 "movq %%mm2, (%0, %%esi) \n\t" | |
1455 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1456 "movq %%mm4, 1024(%0, %%esi) \n\t" | |
1457 "movq %%mm5, 1032(%0, %%esi) \n\t" | |
1458 "addl $16, %%esi \n\t" | |
1459 " jnz 1b \n\t" | |
1460 :: "r" (samples+256), "m" (bias) | |
1461 : "%esi" | |
1462 ); | |
1463 } | |
1464 | |
1465 static void mix31to2_3dnow (sample_t * samples, sample_t bias) | |
1466 { | |
1467 asm volatile( | |
1468 "movd %1, %%mm7 \n\t" | |
1469 "punpckldq %1, %%mm7 \n\t" | |
1470 "movl $-1024, %%esi \n\t" | |
1471 ".balign 16\n\t" | |
1472 "1: \n\t" | |
1473 "movq 1024(%0, %%esi), %%mm0 \n\t" | |
1474 "movq 1032(%0, %%esi), %%mm1 \n\t" | |
1475 "pfadd 3072(%0, %%esi), %%mm0 \n\t" | |
1476 "pfadd 3080(%0, %%esi), %%mm1 \n\t" | |
1477 "pfadd %%mm7, %%mm0 \n\t" // common | |
1478 "pfadd %%mm7, %%mm1 \n\t" // common | |
1479 "movq (%0, %%esi), %%mm2 \n\t" | |
1480 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1481 "movq 2048(%0, %%esi), %%mm4 \n\t" | |
1482 "movq 2056(%0, %%esi), %%mm5 \n\t" | |
1483 "pfadd %%mm0, %%mm2 \n\t" | |
1484 "pfadd %%mm1, %%mm3 \n\t" | |
1485 "pfadd %%mm0, %%mm4 \n\t" | |
1486 "pfadd %%mm1, %%mm5 \n\t" | |
1487 "movq %%mm2, (%0, %%esi) \n\t" | |
1488 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1489 "movq %%mm4, 1024(%0, %%esi) \n\t" | |
1490 "movq %%mm5, 1032(%0, %%esi) \n\t" | |
1491 "addl $16, %%esi \n\t" | |
1492 " jnz 1b \n\t" | |
1493 :: "r" (samples+256), "m" (bias) | |
1494 : "%esi" | |
1495 ); | |
1496 } | |
1497 | |
1498 static void mix31toS_3dnow (sample_t * samples, sample_t bias) | |
1499 { | |
1500 asm volatile( | |
1501 "movd %1, %%mm7 \n\t" | |
1502 "punpckldq %1, %%mm7 \n\t" | |
1503 "movl $-1024, %%esi \n\t" | |
1504 ".balign 16\n\t" | |
1505 "1: \n\t" | |
1506 "movq 1024(%0, %%esi), %%mm0 \n\t" | |
1507 "movq 1032(%0, %%esi), %%mm1 \n\t" | |
1508 "pfadd %%mm7, %%mm0 \n\t" // common | |
1509 "pfadd %%mm7, %%mm1 \n\t" // common | |
1510 "movq (%0, %%esi), %%mm2 \n\t" | |
1511 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1512 "movq 2048(%0, %%esi), %%mm4 \n\t" | |
1513 "movq 2056(%0, %%esi), %%mm5 \n\t" | |
1514 "pfadd %%mm0, %%mm2 \n\t" | |
1515 "pfadd %%mm1, %%mm3 \n\t" | |
1516 "pfadd %%mm0, %%mm4 \n\t" | |
1517 "pfadd %%mm1, %%mm5 \n\t" | |
1518 "movq 3072(%0, %%esi), %%mm0 \n\t" // surround | |
1519 "movq 3080(%0, %%esi), %%mm1 \n\t" // surround | |
1520 "pfsub %%mm0, %%mm2 \n\t" | |
1521 "pfsub %%mm1, %%mm3 \n\t" | |
1522 "pfadd %%mm0, %%mm4 \n\t" | |
1523 "pfadd %%mm1, %%mm5 \n\t" | |
1524 "movq %%mm2, (%0, %%esi) \n\t" | |
1525 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1526 "movq %%mm4, 1024(%0, %%esi) \n\t" | |
1527 "movq %%mm5, 1032(%0, %%esi) \n\t" | |
1528 "addl $16, %%esi \n\t" | |
1529 " jnz 1b \n\t" | |
1530 :: "r" (samples+256), "m" (bias) | |
1531 : "%esi" | |
1532 ); | |
1533 } | |
1534 | |
1535 static void mix22toS_3dnow (sample_t * samples, sample_t bias) | |
1536 { | |
1537 asm volatile( | |
1538 "movd %1, %%mm7 \n\t" | |
1539 "punpckldq %1, %%mm7 \n\t" | |
1540 "movl $-1024, %%esi \n\t" | |
1541 ".balign 16\n\t" | |
1542 "1: \n\t" | |
1543 "movq 2048(%0, %%esi), %%mm0 \n\t" | |
1544 "movq 2056(%0, %%esi), %%mm1 \n\t" | |
1545 "pfadd 3072(%0, %%esi), %%mm0 \n\t" // surround | |
1546 "pfadd 3080(%0, %%esi), %%mm1 \n\t" // surround | |
1547 "movq (%0, %%esi), %%mm2 \n\t" | |
1548 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1549 "movq 1024(%0, %%esi), %%mm4 \n\t" | |
1550 "movq 1032(%0, %%esi), %%mm5 \n\t" | |
1551 "pfadd %%mm7, %%mm2 \n\t" | |
1552 "pfadd %%mm7, %%mm3 \n\t" | |
1553 "pfadd %%mm7, %%mm4 \n\t" | |
1554 "pfadd %%mm7, %%mm5 \n\t" | |
1555 "pfsub %%mm0, %%mm2 \n\t" | |
1556 "pfsub %%mm1, %%mm3 \n\t" | |
1557 "pfadd %%mm0, %%mm4 \n\t" | |
1558 "pfadd %%mm1, %%mm5 \n\t" | |
1559 "movq %%mm2, (%0, %%esi) \n\t" | |
1560 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1561 "movq %%mm4, 1024(%0, %%esi) \n\t" | |
1562 "movq %%mm5, 1032(%0, %%esi) \n\t" | |
1563 "addl $16, %%esi \n\t" | |
1564 " jnz 1b \n\t" | |
1565 :: "r" (samples+256), "m" (bias) | |
1566 : "%esi" | |
1567 ); | |
1568 } | |
1569 | |
1570 static void mix32to2_3dnow (sample_t * samples, sample_t bias) | |
1571 { | |
1572 asm volatile( | |
1573 "movd %1, %%mm7 \n\t" | |
1574 "punpckldq %1, %%mm7 \n\t" | |
1575 "movl $-1024, %%esi \n\t" | |
1576 ".balign 16\n\t" | |
1577 "1: \n\t" | |
1578 "movq 1024(%0, %%esi), %%mm0 \n\t" | |
1579 "movq 1032(%0, %%esi), %%mm1 \n\t" | |
1580 "pfadd %%mm7, %%mm0 \n\t" // common | |
1581 "pfadd %%mm7, %%mm1 \n\t" // common | |
1582 "movq %%mm0, %%mm2 \n\t" // common | |
1583 "movq %%mm1, %%mm3 \n\t" // common | |
1584 "pfadd (%0, %%esi), %%mm0 \n\t" | |
1585 "pfadd 8(%0, %%esi), %%mm1 \n\t" | |
1586 "pfadd 2048(%0, %%esi), %%mm2 \n\t" | |
1587 "pfadd 2056(%0, %%esi), %%mm3 \n\t" | |
1588 "pfadd 3072(%0, %%esi), %%mm0 \n\t" | |
1589 "pfadd 3080(%0, %%esi), %%mm1 \n\t" | |
1590 "pfadd 4096(%0, %%esi), %%mm2 \n\t" | |
1591 "pfadd 4104(%0, %%esi), %%mm3 \n\t" | |
1592 "movq %%mm0, (%0, %%esi) \n\t" | |
1593 "movq %%mm1, 8(%0, %%esi) \n\t" | |
1594 "movq %%mm2, 1024(%0, %%esi) \n\t" | |
1595 "movq %%mm3, 1032(%0, %%esi) \n\t" | |
1596 "addl $16, %%esi \n\t" | |
1597 " jnz 1b \n\t" | |
1598 :: "r" (samples+256), "m" (bias) | |
1599 : "%esi" | |
1600 ); | |
1601 } | |
1602 | |
1603 /* todo: should be optimized better */ | |
1604 static void mix32toS_3dnow (sample_t * samples, sample_t bias) | |
1605 { | |
1606 asm volatile( | |
1607 "movl $-1024, %%esi \n\t" | |
1608 ".balign 16\n\t" | |
1609 "1: \n\t" | |
1610 "movd %1, %%mm7 \n\t" | |
1611 "punpckldq %1, %%mm7 \n\t" | |
1612 "movq 1024(%0, %%esi), %%mm0 \n\t" | |
1613 "movq 1032(%0, %%esi), %%mm1 \n\t" | |
1614 "movq 3072(%0, %%esi), %%mm4 \n\t" | |
1615 "movq 3080(%0, %%esi), %%mm5 \n\t" | |
1616 "pfadd %%mm7, %%mm0 \n\t" // common | |
1617 "pfadd %%mm7, %%mm1 \n\t" // common | |
1618 "pfadd 4096(%0, %%esi), %%mm4 \n\t" // surround | |
1619 "pfadd 4104(%0, %%esi), %%mm5 \n\t" // surround | |
1620 "movq (%0, %%esi), %%mm2 \n\t" | |
1621 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1622 "movq 2048(%0, %%esi), %%mm6 \n\t" | |
1623 "movq 2056(%0, %%esi), %%mm7 \n\t" | |
1624 "pfsub %%mm4, %%mm2 \n\t" | |
1625 "pfsub %%mm5, %%mm3 \n\t" | |
1626 "pfadd %%mm4, %%mm6 \n\t" | |
1627 "pfadd %%mm5, %%mm7 \n\t" | |
1628 "pfadd %%mm0, %%mm2 \n\t" | |
1629 "pfadd %%mm1, %%mm3 \n\t" | |
1630 "pfadd %%mm0, %%mm6 \n\t" | |
1631 "pfadd %%mm1, %%mm7 \n\t" | |
1632 "movq %%mm2, (%0, %%esi) \n\t" | |
1633 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1634 "movq %%mm6, 1024(%0, %%esi) \n\t" | |
1635 "movq %%mm7, 1032(%0, %%esi) \n\t" | |
1636 "addl $16, %%esi \n\t" | |
1637 " jnz 1b \n\t" | |
1638 :: "r" (samples+256), "m" (bias) | |
1639 : "%esi" | |
1640 ); | |
1641 } | |
1642 | |
1643 static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) | |
1644 { | |
1645 asm volatile( | |
1646 "movd %2, %%mm7 \n\t" | |
1647 "punpckldq %2, %%mm7 \n\t" | |
1648 "movl $-1024, %%esi \n\t" | |
1649 ".balign 16\n\t" | |
1650 "1: \n\t" | |
1651 "movq (%0, %%esi), %%mm0 \n\t" | |
1652 "movq 8(%0, %%esi), %%mm1 \n\t" | |
1653 "movq 16(%0, %%esi), %%mm2 \n\t" | |
1654 "movq 24(%0, %%esi), %%mm3 \n\t" | |
1655 "pfadd 1024(%0, %%esi), %%mm0 \n\t" | |
1656 "pfadd 1032(%0, %%esi), %%mm1 \n\t" | |
1657 "pfadd 1040(%0, %%esi), %%mm2 \n\t" | |
1658 "pfadd 1048(%0, %%esi), %%mm3 \n\t" | |
1659 "pfadd %%mm7, %%mm0 \n\t" | |
1660 "pfadd %%mm7, %%mm1 \n\t" | |
1661 "pfadd %%mm7, %%mm2 \n\t" | |
1662 "pfadd %%mm7, %%mm3 \n\t" | |
1663 "movq %%mm0, (%1, %%esi) \n\t" | |
1664 "movq %%mm1, 8(%1, %%esi) \n\t" | |
1665 "movq %%mm2, 16(%1, %%esi) \n\t" | |
1666 "movq %%mm3, 24(%1, %%esi) \n\t" | |
1667 "addl $32, %%esi \n\t" | |
1668 " jnz 1b \n\t" | |
1669 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
1670 : "%esi" | |
1671 ); | |
1672 } | |
1673 | |
1674 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, | |
1675 sample_t clev, sample_t slev) | |
1676 { | |
1677 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1678 | |
1679 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1680 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1681 break; | |
1682 | |
1683 case CONVERT (A52_CHANNEL, A52_MONO): | |
1684 case CONVERT (A52_STEREO, A52_MONO): | |
1685 mix_2to1_3dnow: | |
1686 mix2to1_3dnow (samples, samples + 256, bias); | |
1687 break; | |
1688 | |
1689 case CONVERT (A52_2F1R, A52_MONO): | |
1690 if (slev == 0) | |
1691 goto mix_2to1_3dnow; | |
1692 case CONVERT (A52_3F, A52_MONO): | |
1693 mix_3to1_3dnow: | |
1694 mix3to1_3dnow (samples, bias); | |
1695 break; | |
1696 | |
1697 case CONVERT (A52_3F1R, A52_MONO): | |
1698 if (slev == 0) | |
1699 goto mix_3to1_3dnow; | |
1700 case CONVERT (A52_2F2R, A52_MONO): | |
1701 if (slev == 0) | |
1702 goto mix_2to1_3dnow; | |
1703 mix4to1_3dnow (samples, bias); | |
1704 break; | |
1705 | |
1706 case CONVERT (A52_3F2R, A52_MONO): | |
1707 if (slev == 0) | |
1708 goto mix_3to1_3dnow; | |
1709 mix5to1_3dnow (samples, bias); | |
1710 break; | |
1711 | |
1712 case CONVERT (A52_MONO, A52_DOLBY): | |
1713 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1714 break; | |
1715 | |
1716 case CONVERT (A52_3F, A52_STEREO): | |
1717 case CONVERT (A52_3F, A52_DOLBY): | |
1718 mix_3to2_3dnow: | |
1719 mix3to2_3dnow (samples, bias); | |
1720 break; | |
1721 | |
1722 case CONVERT (A52_2F1R, A52_STEREO): | |
1723 if (slev == 0) | |
1724 break; | |
1725 mix21to2_3dnow (samples, samples + 256, bias); | |
1726 break; | |
1727 | |
1728 case CONVERT (A52_2F1R, A52_DOLBY): | |
1729 mix21toS_3dnow (samples, bias); | |
1730 break; | |
1731 | |
1732 case CONVERT (A52_3F1R, A52_STEREO): | |
1733 if (slev == 0) | |
1734 goto mix_3to2_3dnow; | |
1735 mix31to2_3dnow (samples, bias); | |
1736 break; | |
1737 | |
1738 case CONVERT (A52_3F1R, A52_DOLBY): | |
1739 mix31toS_3dnow (samples, bias); | |
1740 break; | |
1741 | |
1742 case CONVERT (A52_2F2R, A52_STEREO): | |
1743 if (slev == 0) | |
1744 break; | |
1745 mix2to1_3dnow (samples, samples + 512, bias); | |
1746 mix2to1_3dnow (samples + 256, samples + 768, bias); | |
1747 break; | |
1748 | |
1749 case CONVERT (A52_2F2R, A52_DOLBY): | |
1750 mix22toS_3dnow (samples, bias); | |
1751 break; | |
1752 | |
1753 case CONVERT (A52_3F2R, A52_STEREO): | |
1754 if (slev == 0) | |
1755 goto mix_3to2_3dnow; | |
1756 mix32to2_3dnow (samples, bias); | |
1757 break; | |
1758 | |
1759 case CONVERT (A52_3F2R, A52_DOLBY): | |
1760 mix32toS_3dnow (samples, bias); | |
1761 break; | |
1762 | |
1763 case CONVERT (A52_3F1R, A52_3F): | |
1764 if (slev == 0) | |
1765 break; | |
1766 mix21to2_3dnow (samples, samples + 512, bias); | |
1767 break; | |
1768 | |
1769 case CONVERT (A52_3F2R, A52_3F): | |
1770 if (slev == 0) | |
1771 break; | |
1772 mix2to1_3dnow (samples, samples + 768, bias); | |
1773 mix2to1_3dnow (samples + 512, samples + 1024, bias); | |
1774 break; | |
1775 | |
1776 case CONVERT (A52_3F1R, A52_2F1R): | |
1777 mix3to2_3dnow (samples, bias); | |
1778 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1779 break; | |
1780 | |
1781 case CONVERT (A52_2F2R, A52_2F1R): | |
1782 mix2to1_3dnow (samples + 512, samples + 768, bias); | |
1783 break; | |
1784 | |
1785 case CONVERT (A52_3F2R, A52_2F1R): | |
1786 mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1787 move2to1_3dnow (samples + 768, samples + 512, bias); | |
1788 break; | |
1789 | |
1790 case CONVERT (A52_3F2R, A52_3F1R): | |
1791 mix2to1_3dnow (samples + 768, samples + 1024, bias); | |
1792 break; | |
1793 | |
1794 case CONVERT (A52_2F1R, A52_2F2R): | |
1795 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1796 break; | |
1797 | |
1798 case CONVERT (A52_3F1R, A52_2F2R): | |
1799 mix3to2_3dnow (samples, bias); | |
1800 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1801 break; | |
1802 | |
1803 case CONVERT (A52_3F2R, A52_2F2R): | |
1804 mix3to2_3dnow (samples, bias); | |
1805 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1806 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1807 break; | |
1808 | |
1809 case CONVERT (A52_3F1R, A52_3F2R): | |
1810 memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); | |
1811 break; | |
1812 } | |
1813 __asm __volatile("femms":::"memory"); | |
1814 } | |
1815 | |
3904 | 1816 #endif //ARCH_X86 |