Mercurial > mplayer.hg
annotate liba52/downmix.c @ 9133:a45282d6ad32
argh, i forgot to 'cvs add' it (again)
author | arpi |
---|---|
date | Tue, 28 Jan 2003 01:02:07 +0000 |
parents | d0a34309e424 |
children | 9f297a651e11 |
rev | line source |
---|---|
3394 | 1 /* |
2 * downmix.c | |
3 * Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org> | |
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> | |
5 * | |
6 * This file is part of a52dec, a free ATSC A-52 stream decoder. | |
7 * See http://liba52.sourceforge.net/ for updates. | |
8 * | |
9 * a52dec is free software; you can redistribute it and/or modify | |
10 * it under the terms of the GNU General Public License as published by | |
11 * the Free Software Foundation; either version 2 of the License, or | |
12 * (at your option) any later version. | |
13 * | |
14 * a52dec is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 * GNU General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU General Public License | |
20 * along with this program; if not, write to the Free Software | |
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
3625 | 22 * |
23 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | |
3394 | 24 */ |
25 | |
26 #include "config.h" | |
27 | |
28 #include <string.h> | |
29 #include <inttypes.h> | |
30 | |
31 #include "a52.h" | |
32 #include "a52_internal.h" | |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
33 #include "mm_accel.h" |
3394 | 34 |
35 #define CONVERT(acmod,output) (((output) << 3) + (acmod)) | |
36 | |
3904 | 37 |
38 void (*downmix)(sample_t * samples, int acmod, int output, sample_t bias, | |
39 sample_t clev, sample_t slev)= NULL; | |
40 void (*upmix)(sample_t * samples, int acmod, int output)= NULL; | |
41 | |
42 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
43 sample_t clev, sample_t slev); | |
4233 | 44 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, |
45 sample_t clev, sample_t slev); | |
3904 | 46 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
47 sample_t clev, sample_t slev); | |
48 static void upmix_MMX (sample_t * samples, int acmod, int output); | |
49 static void upmix_C (sample_t * samples, int acmod, int output); | |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
50 |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
51 void downmix_accel_init(uint32_t mm_accel) |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
52 { |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
53 upmix= upmix_C; |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
54 downmix= downmix_C; |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
55 #ifdef ARCH_X86 |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
56 if(mm_accel & MM_ACCEL_X86_MMX) upmix= upmix_MMX; |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
57 if(mm_accel & MM_ACCEL_X86_SSE) downmix= downmix_SSE; |
4233 | 58 if(mm_accel & MM_ACCEL_X86_3DNOW) downmix= downmix_3dnow; |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
59 #endif |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
60 } |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
61 |
3394 | 62 int downmix_init (int input, int flags, sample_t * level, |
63 sample_t clev, sample_t slev) | |
64 { | |
65 static uint8_t table[11][8] = { | |
66 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
67 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, | |
68 {A52_MONO, A52_MONO, A52_MONO, A52_MONO, | |
69 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
70 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
71 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, | |
72 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
73 A52_STEREO, A52_3F, A52_STEREO, A52_3F}, | |
74 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
75 A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R}, | |
76 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
77 A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R}, | |
78 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
79 A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R}, | |
80 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
81 A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R}, | |
82 {A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO, | |
83 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
84 {A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO, | |
85 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
86 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY, | |
87 A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY} | |
88 }; | |
89 int output; | |
90 | |
91 output = flags & A52_CHANNEL_MASK; | |
92 if (output > A52_DOLBY) | |
93 return -1; | |
3738 | 94 |
3394 | 95 output = table[output][input & 7]; |
96 | |
97 if ((output == A52_STEREO) && | |
98 ((input == A52_DOLBY) || ((input == A52_3F) && (clev == LEVEL_3DB)))) | |
99 output = A52_DOLBY; | |
100 | |
101 if (flags & A52_ADJUST_LEVEL) | |
102 switch (CONVERT (input & 7, output)) { | |
103 | |
104 case CONVERT (A52_3F, A52_MONO): | |
105 *level *= LEVEL_3DB / (1 + clev); | |
106 break; | |
107 | |
108 case CONVERT (A52_STEREO, A52_MONO): | |
109 case CONVERT (A52_2F2R, A52_2F1R): | |
110 case CONVERT (A52_3F2R, A52_3F1R): | |
111 level_3db: | |
112 *level *= LEVEL_3DB; | |
113 break; | |
114 | |
115 case CONVERT (A52_3F2R, A52_2F1R): | |
116 if (clev < LEVEL_PLUS3DB - 1) | |
117 goto level_3db; | |
118 /* break thru */ | |
119 case CONVERT (A52_3F, A52_STEREO): | |
120 case CONVERT (A52_3F1R, A52_2F1R): | |
121 case CONVERT (A52_3F1R, A52_2F2R): | |
122 case CONVERT (A52_3F2R, A52_2F2R): | |
123 *level /= 1 + clev; | |
124 break; | |
125 | |
126 case CONVERT (A52_2F1R, A52_MONO): | |
127 *level *= LEVEL_PLUS3DB / (2 + slev); | |
128 break; | |
129 | |
130 case CONVERT (A52_2F1R, A52_STEREO): | |
131 case CONVERT (A52_3F1R, A52_3F): | |
132 *level /= 1 + slev * LEVEL_3DB; | |
133 break; | |
134 | |
135 case CONVERT (A52_3F1R, A52_MONO): | |
136 *level *= LEVEL_3DB / (1 + clev + 0.5 * slev); | |
137 break; | |
138 | |
139 case CONVERT (A52_3F1R, A52_STEREO): | |
140 *level /= 1 + clev + slev * LEVEL_3DB; | |
141 break; | |
142 | |
143 case CONVERT (A52_2F2R, A52_MONO): | |
144 *level *= LEVEL_3DB / (1 + slev); | |
145 break; | |
146 | |
147 case CONVERT (A52_2F2R, A52_STEREO): | |
148 case CONVERT (A52_3F2R, A52_3F): | |
149 *level /= 1 + slev; | |
150 break; | |
151 | |
152 case CONVERT (A52_3F2R, A52_MONO): | |
153 *level *= LEVEL_3DB / (1 + clev + slev); | |
154 break; | |
155 | |
156 case CONVERT (A52_3F2R, A52_STEREO): | |
157 *level /= 1 + clev + slev; | |
158 break; | |
159 | |
160 case CONVERT (A52_MONO, A52_DOLBY): | |
161 *level *= LEVEL_PLUS3DB; | |
162 break; | |
163 | |
164 case CONVERT (A52_3F, A52_DOLBY): | |
165 case CONVERT (A52_2F1R, A52_DOLBY): | |
166 *level *= 1 / (1 + LEVEL_3DB); | |
167 break; | |
168 | |
169 case CONVERT (A52_3F1R, A52_DOLBY): | |
170 case CONVERT (A52_2F2R, A52_DOLBY): | |
171 *level *= 1 / (1 + 2 * LEVEL_3DB); | |
172 break; | |
173 | |
174 case CONVERT (A52_3F2R, A52_DOLBY): | |
175 *level *= 1 / (1 + 3 * LEVEL_3DB); | |
176 break; | |
177 } | |
178 return output; | |
179 } | |
180 | |
181 int downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, | |
182 sample_t clev, sample_t slev) | |
183 { | |
184 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
185 | |
186 case CONVERT (A52_CHANNEL, A52_CHANNEL): | |
187 case CONVERT (A52_MONO, A52_MONO): | |
188 case CONVERT (A52_STEREO, A52_STEREO): | |
189 case CONVERT (A52_3F, A52_3F): | |
190 case CONVERT (A52_2F1R, A52_2F1R): | |
191 case CONVERT (A52_3F1R, A52_3F1R): | |
192 case CONVERT (A52_2F2R, A52_2F2R): | |
193 case CONVERT (A52_3F2R, A52_3F2R): | |
194 case CONVERT (A52_STEREO, A52_DOLBY): | |
195 coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level; | |
196 return 0; | |
197 | |
198 case CONVERT (A52_CHANNEL, A52_MONO): | |
199 coeff[0] = coeff[1] = level * LEVEL_6DB; | |
200 return 3; | |
201 | |
202 case CONVERT (A52_STEREO, A52_MONO): | |
203 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
204 return 3; | |
205 | |
206 case CONVERT (A52_3F, A52_MONO): | |
207 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
208 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
209 return 7; | |
210 | |
211 case CONVERT (A52_2F1R, A52_MONO): | |
212 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
213 coeff[2] = level * slev * LEVEL_3DB; | |
214 return 7; | |
215 | |
216 case CONVERT (A52_2F2R, A52_MONO): | |
217 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
218 coeff[2] = coeff[3] = level * slev * LEVEL_3DB; | |
219 return 15; | |
220 | |
221 case CONVERT (A52_3F1R, A52_MONO): | |
222 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
223 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
224 coeff[3] = level * slev * LEVEL_3DB; | |
225 return 15; | |
226 | |
227 case CONVERT (A52_3F2R, A52_MONO): | |
228 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
229 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
230 coeff[3] = coeff[4] = level * slev * LEVEL_3DB; | |
231 return 31; | |
232 | |
233 case CONVERT (A52_MONO, A52_DOLBY): | |
234 coeff[0] = level * LEVEL_3DB; | |
235 return 0; | |
236 | |
237 case CONVERT (A52_3F, A52_DOLBY): | |
238 clev = LEVEL_3DB; | |
239 case CONVERT (A52_3F, A52_STEREO): | |
240 case CONVERT (A52_3F1R, A52_2F1R): | |
241 case CONVERT (A52_3F2R, A52_2F2R): | |
242 coeff[0] = coeff[2] = coeff[3] = coeff[4] = level; | |
243 coeff[1] = level * clev; | |
244 return 7; | |
245 | |
246 case CONVERT (A52_2F1R, A52_DOLBY): | |
247 slev = 1; | |
248 case CONVERT (A52_2F1R, A52_STEREO): | |
249 coeff[0] = coeff[1] = level; | |
250 coeff[2] = level * slev * LEVEL_3DB; | |
251 return 7; | |
252 | |
253 case CONVERT (A52_3F1R, A52_DOLBY): | |
254 clev = LEVEL_3DB; | |
255 slev = 1; | |
256 case CONVERT (A52_3F1R, A52_STEREO): | |
257 coeff[0] = coeff[2] = level; | |
258 coeff[1] = level * clev; | |
259 coeff[3] = level * slev * LEVEL_3DB; | |
260 return 15; | |
261 | |
262 case CONVERT (A52_2F2R, A52_DOLBY): | |
263 slev = LEVEL_3DB; | |
264 case CONVERT (A52_2F2R, A52_STEREO): | |
265 coeff[0] = coeff[1] = level; | |
266 coeff[2] = coeff[3] = level * slev; | |
267 return 15; | |
268 | |
269 case CONVERT (A52_3F2R, A52_DOLBY): | |
270 clev = LEVEL_3DB; | |
271 case CONVERT (A52_3F2R, A52_2F1R): | |
272 slev = LEVEL_3DB; | |
273 case CONVERT (A52_3F2R, A52_STEREO): | |
274 coeff[0] = coeff[2] = level; | |
275 coeff[1] = level * clev; | |
276 coeff[3] = coeff[4] = level * slev; | |
277 return 31; | |
278 | |
279 case CONVERT (A52_3F1R, A52_3F): | |
280 coeff[0] = coeff[1] = coeff[2] = level; | |
281 coeff[3] = level * slev * LEVEL_3DB; | |
282 return 13; | |
283 | |
284 case CONVERT (A52_3F2R, A52_3F): | |
285 coeff[0] = coeff[1] = coeff[2] = level; | |
286 coeff[3] = coeff[4] = level * slev; | |
287 return 29; | |
288 | |
289 case CONVERT (A52_2F2R, A52_2F1R): | |
290 coeff[0] = coeff[1] = level; | |
291 coeff[2] = coeff[3] = level * LEVEL_3DB; | |
292 return 12; | |
293 | |
294 case CONVERT (A52_3F2R, A52_3F1R): | |
295 coeff[0] = coeff[1] = coeff[2] = level; | |
296 coeff[3] = coeff[4] = level * LEVEL_3DB; | |
297 return 24; | |
298 | |
299 case CONVERT (A52_2F1R, A52_2F2R): | |
300 coeff[0] = coeff[1] = level; | |
301 coeff[2] = level * LEVEL_3DB; | |
302 return 0; | |
303 | |
304 case CONVERT (A52_3F1R, A52_2F2R): | |
305 coeff[0] = coeff[2] = level; | |
306 coeff[1] = level * clev; | |
307 coeff[3] = level * LEVEL_3DB; | |
308 return 7; | |
309 | |
310 case CONVERT (A52_3F1R, A52_3F2R): | |
311 coeff[0] = coeff[1] = coeff[2] = level; | |
312 coeff[3] = level * LEVEL_3DB; | |
313 return 0; | |
314 | |
315 case CONVERT (A52_CHANNEL, A52_CHANNEL1): | |
316 coeff[0] = level; | |
317 coeff[1] = 0; | |
318 return 0; | |
319 | |
320 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
321 coeff[0] = 0; | |
322 coeff[1] = level; | |
323 return 0; | |
324 } | |
325 | |
326 return -1; /* NOTREACHED */ | |
327 } | |
328 | |
329 static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias) | |
330 { | |
331 int i; | |
332 | |
333 for (i = 0; i < 256; i++) | |
334 dest[i] += src[i] + bias; | |
335 } | |
336 | |
337 static void mix3to1 (sample_t * samples, sample_t bias) | |
338 { | |
339 int i; | |
340 | |
341 for (i = 0; i < 256; i++) | |
342 samples[i] += samples[i + 256] + samples[i + 512] + bias; | |
343 } | |
344 | |
345 static void mix4to1 (sample_t * samples, sample_t bias) | |
346 { | |
347 int i; | |
348 | |
349 for (i = 0; i < 256; i++) | |
350 samples[i] += (samples[i + 256] + samples[i + 512] + | |
351 samples[i + 768] + bias); | |
352 } | |
353 | |
354 static void mix5to1 (sample_t * samples, sample_t bias) | |
355 { | |
356 int i; | |
357 | |
358 for (i = 0; i < 256; i++) | |
359 samples[i] += (samples[i + 256] + samples[i + 512] + | |
360 samples[i + 768] + samples[i + 1024] + bias); | |
361 } | |
362 | |
363 static void mix3to2 (sample_t * samples, sample_t bias) | |
364 { | |
365 int i; | |
366 sample_t common; | |
367 | |
368 for (i = 0; i < 256; i++) { | |
369 common = samples[i + 256] + bias; | |
370 samples[i] += common; | |
371 samples[i + 256] = samples[i + 512] + common; | |
372 } | |
373 } | |
374 | |
375 static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) | |
376 { | |
377 int i; | |
378 sample_t common; | |
379 | |
380 for (i = 0; i < 256; i++) { | |
381 common = right[i + 256] + bias; | |
382 left[i] += common; | |
383 right[i] += common; | |
384 } | |
385 } | |
386 | |
387 static void mix21toS (sample_t * samples, sample_t bias) | |
388 { | |
389 int i; | |
390 sample_t surround; | |
391 | |
392 for (i = 0; i < 256; i++) { | |
393 surround = samples[i + 512]; | |
394 samples[i] += bias - surround; | |
395 samples[i + 256] += bias + surround; | |
396 } | |
397 } | |
398 | |
399 static void mix31to2 (sample_t * samples, sample_t bias) | |
400 { | |
401 int i; | |
402 sample_t common; | |
403 | |
404 for (i = 0; i < 256; i++) { | |
405 common = samples[i + 256] + samples[i + 768] + bias; | |
406 samples[i] += common; | |
407 samples[i + 256] = samples[i + 512] + common; | |
408 } | |
409 } | |
410 | |
411 static void mix31toS (sample_t * samples, sample_t bias) | |
412 { | |
413 int i; | |
414 sample_t common, surround; | |
415 | |
416 for (i = 0; i < 256; i++) { | |
417 common = samples[i + 256] + bias; | |
418 surround = samples[i + 768]; | |
419 samples[i] += common - surround; | |
420 samples[i + 256] = samples[i + 512] + common + surround; | |
421 } | |
422 } | |
423 | |
424 static void mix22toS (sample_t * samples, sample_t bias) | |
425 { | |
426 int i; | |
427 sample_t surround; | |
428 | |
429 for (i = 0; i < 256; i++) { | |
430 surround = samples[i + 512] + samples[i + 768]; | |
431 samples[i] += bias - surround; | |
432 samples[i + 256] += bias + surround; | |
433 } | |
434 } | |
435 | |
436 static void mix32to2 (sample_t * samples, sample_t bias) | |
437 { | |
438 int i; | |
439 sample_t common; | |
440 | |
441 for (i = 0; i < 256; i++) { | |
442 common = samples[i + 256] + bias; | |
443 samples[i] += common + samples[i + 768]; | |
444 samples[i + 256] = common + samples[i + 512] + samples[i + 1024]; | |
445 } | |
446 } | |
447 | |
448 static void mix32toS (sample_t * samples, sample_t bias) | |
449 { | |
450 int i; | |
451 sample_t common, surround; | |
452 | |
453 for (i = 0; i < 256; i++) { | |
454 common = samples[i + 256] + bias; | |
455 surround = samples[i + 768] + samples[i + 1024]; | |
456 samples[i] += common - surround; | |
457 samples[i + 256] = samples[i + 512] + common + surround; | |
458 } | |
459 } | |
460 | |
461 static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) | |
462 { | |
463 int i; | |
464 | |
465 for (i = 0; i < 256; i++) | |
466 dest[i] = src[i] + src[i + 256] + bias; | |
467 } | |
468 | |
469 static void zero (sample_t * samples) | |
470 { | |
471 int i; | |
472 for (i = 0; i < 256; i++) | |
473 samples[i] = 0; | |
474 } | |
475 | |
3904 | 476 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
3394 | 477 sample_t clev, sample_t slev) |
478 { | |
479 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
480 | |
481 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
482 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
483 break; | |
484 | |
485 case CONVERT (A52_CHANNEL, A52_MONO): | |
486 case CONVERT (A52_STEREO, A52_MONO): | |
487 mix_2to1: | |
488 mix2to1 (samples, samples + 256, bias); | |
489 break; | |
490 | |
491 case CONVERT (A52_2F1R, A52_MONO): | |
492 if (slev == 0) | |
493 goto mix_2to1; | |
494 case CONVERT (A52_3F, A52_MONO): | |
495 mix_3to1: | |
496 mix3to1 (samples, bias); | |
497 break; | |
498 | |
499 case CONVERT (A52_3F1R, A52_MONO): | |
500 if (slev == 0) | |
501 goto mix_3to1; | |
502 case CONVERT (A52_2F2R, A52_MONO): | |
503 if (slev == 0) | |
504 goto mix_2to1; | |
505 mix4to1 (samples, bias); | |
506 break; | |
507 | |
508 case CONVERT (A52_3F2R, A52_MONO): | |
509 if (slev == 0) | |
510 goto mix_3to1; | |
511 mix5to1 (samples, bias); | |
512 break; | |
513 | |
514 case CONVERT (A52_MONO, A52_DOLBY): | |
515 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
516 break; | |
517 | |
518 case CONVERT (A52_3F, A52_STEREO): | |
519 case CONVERT (A52_3F, A52_DOLBY): | |
520 mix_3to2: | |
521 mix3to2 (samples, bias); | |
522 break; | |
523 | |
524 case CONVERT (A52_2F1R, A52_STEREO): | |
525 if (slev == 0) | |
526 break; | |
527 mix21to2 (samples, samples + 256, bias); | |
528 break; | |
529 | |
530 case CONVERT (A52_2F1R, A52_DOLBY): | |
531 mix21toS (samples, bias); | |
532 break; | |
533 | |
534 case CONVERT (A52_3F1R, A52_STEREO): | |
535 if (slev == 0) | |
536 goto mix_3to2; | |
537 mix31to2 (samples, bias); | |
538 break; | |
539 | |
540 case CONVERT (A52_3F1R, A52_DOLBY): | |
541 mix31toS (samples, bias); | |
542 break; | |
543 | |
544 case CONVERT (A52_2F2R, A52_STEREO): | |
545 if (slev == 0) | |
546 break; | |
547 mix2to1 (samples, samples + 512, bias); | |
548 mix2to1 (samples + 256, samples + 768, bias); | |
549 break; | |
550 | |
551 case CONVERT (A52_2F2R, A52_DOLBY): | |
552 mix22toS (samples, bias); | |
553 break; | |
554 | |
555 case CONVERT (A52_3F2R, A52_STEREO): | |
556 if (slev == 0) | |
557 goto mix_3to2; | |
558 mix32to2 (samples, bias); | |
559 break; | |
560 | |
561 case CONVERT (A52_3F2R, A52_DOLBY): | |
562 mix32toS (samples, bias); | |
563 break; | |
564 | |
565 case CONVERT (A52_3F1R, A52_3F): | |
566 if (slev == 0) | |
567 break; | |
568 mix21to2 (samples, samples + 512, bias); | |
569 break; | |
570 | |
571 case CONVERT (A52_3F2R, A52_3F): | |
572 if (slev == 0) | |
573 break; | |
574 mix2to1 (samples, samples + 768, bias); | |
575 mix2to1 (samples + 512, samples + 1024, bias); | |
576 break; | |
577 | |
578 case CONVERT (A52_3F1R, A52_2F1R): | |
579 mix3to2 (samples, bias); | |
580 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
581 break; | |
582 | |
583 case CONVERT (A52_2F2R, A52_2F1R): | |
584 mix2to1 (samples + 512, samples + 768, bias); | |
585 break; | |
586 | |
587 case CONVERT (A52_3F2R, A52_2F1R): | |
3678 | 588 mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) |
3394 | 589 move2to1 (samples + 768, samples + 512, bias); |
590 break; | |
591 | |
592 case CONVERT (A52_3F2R, A52_3F1R): | |
593 mix2to1 (samples + 768, samples + 1024, bias); | |
594 break; | |
595 | |
596 case CONVERT (A52_2F1R, A52_2F2R): | |
597 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
598 break; | |
599 | |
600 case CONVERT (A52_3F1R, A52_2F2R): | |
601 mix3to2 (samples, bias); | |
602 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
603 break; | |
604 | |
605 case CONVERT (A52_3F2R, A52_2F2R): | |
606 mix3to2 (samples, bias); | |
607 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
608 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
609 break; | |
610 | |
611 case CONVERT (A52_3F1R, A52_3F2R): | |
612 memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); | |
613 break; | |
614 } | |
615 } | |
616 | |
3904 | 617 static void upmix_C (sample_t * samples, int acmod, int output) |
3394 | 618 { |
619 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
620 | |
621 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
622 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
623 break; | |
624 | |
625 case CONVERT (A52_3F2R, A52_MONO): | |
626 zero (samples + 1024); | |
627 case CONVERT (A52_3F1R, A52_MONO): | |
628 case CONVERT (A52_2F2R, A52_MONO): | |
629 zero (samples + 768); | |
630 case CONVERT (A52_3F, A52_MONO): | |
631 case CONVERT (A52_2F1R, A52_MONO): | |
632 zero (samples + 512); | |
633 case CONVERT (A52_CHANNEL, A52_MONO): | |
634 case CONVERT (A52_STEREO, A52_MONO): | |
635 zero (samples + 256); | |
636 break; | |
637 | |
638 case CONVERT (A52_3F2R, A52_STEREO): | |
639 case CONVERT (A52_3F2R, A52_DOLBY): | |
640 zero (samples + 1024); | |
641 case CONVERT (A52_3F1R, A52_STEREO): | |
642 case CONVERT (A52_3F1R, A52_DOLBY): | |
643 zero (samples + 768); | |
644 case CONVERT (A52_3F, A52_STEREO): | |
645 case CONVERT (A52_3F, A52_DOLBY): | |
646 mix_3to2: | |
647 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
648 zero (samples + 256); | |
649 break; | |
650 | |
651 case CONVERT (A52_2F2R, A52_STEREO): | |
652 case CONVERT (A52_2F2R, A52_DOLBY): | |
653 zero (samples + 768); | |
654 case CONVERT (A52_2F1R, A52_STEREO): | |
655 case CONVERT (A52_2F1R, A52_DOLBY): | |
656 zero (samples + 512); | |
657 break; | |
658 | |
659 case CONVERT (A52_3F2R, A52_3F): | |
660 zero (samples + 1024); | |
661 case CONVERT (A52_3F1R, A52_3F): | |
662 case CONVERT (A52_2F2R, A52_2F1R): | |
663 zero (samples + 768); | |
664 break; | |
665 | |
666 case CONVERT (A52_3F2R, A52_3F1R): | |
667 zero (samples + 1024); | |
668 break; | |
669 | |
670 case CONVERT (A52_3F2R, A52_2F1R): | |
671 zero (samples + 1024); | |
672 case CONVERT (A52_3F1R, A52_2F1R): | |
673 mix_31to21: | |
674 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
675 goto mix_3to2; | |
676 | |
677 case CONVERT (A52_3F2R, A52_2F2R): | |
678 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
679 goto mix_31to21; | |
680 } | |
681 } | |
3904 | 682 |
683 #ifdef ARCH_X86 | |
684 static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) | |
685 { | |
686 asm volatile( | |
687 "movlps %2, %%xmm7 \n\t" | |
688 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
689 "movl $-1024, %%esi \n\t" | |
4233 | 690 ".balign 16\n\t" |
3904 | 691 "1: \n\t" |
692 "movaps (%0, %%esi), %%xmm0 \n\t" | |
693 "movaps 16(%0, %%esi), %%xmm1 \n\t" | |
694 "addps (%1, %%esi), %%xmm0 \n\t" | |
695 "addps 16(%1, %%esi), %%xmm1 \n\t" | |
696 "addps %%xmm7, %%xmm0 \n\t" | |
697 "addps %%xmm7, %%xmm1 \n\t" | |
698 "movaps %%xmm0, (%1, %%esi) \n\t" | |
699 "movaps %%xmm1, 16(%1, %%esi) \n\t" | |
700 "addl $32, %%esi \n\t" | |
701 " jnz 1b \n\t" | |
702 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
703 : "%esi" | |
704 ); | |
705 } | |
706 | |
707 static void mix3to1_SSE (sample_t * samples, sample_t bias) | |
708 { | |
709 asm volatile( | |
710 "movlps %1, %%xmm7 \n\t" | |
711 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
712 "movl $-1024, %%esi \n\t" | |
4233 | 713 ".balign 16\n\t" |
3904 | 714 "1: \n\t" |
715 "movaps (%0, %%esi), %%xmm0 \n\t" | |
716 "movaps 1024(%0, %%esi), %%xmm1 \n\t" | |
717 "addps 2048(%0, %%esi), %%xmm0 \n\t" | |
718 "addps %%xmm7, %%xmm1 \n\t" | |
719 "addps %%xmm1, %%xmm0 \n\t" | |
720 "movaps %%xmm0, (%0, %%esi) \n\t" | |
721 "addl $16, %%esi \n\t" | |
722 " jnz 1b \n\t" | |
723 :: "r" (samples+256), "m" (bias) | |
724 : "%esi" | |
725 ); | |
726 } | |
727 | |
728 static void mix4to1_SSE (sample_t * samples, sample_t bias) | |
729 { | |
730 asm volatile( | |
731 "movlps %1, %%xmm7 \n\t" | |
732 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
733 "movl $-1024, %%esi \n\t" | |
4233 | 734 ".balign 16\n\t" |
3904 | 735 "1: \n\t" |
736 "movaps (%0, %%esi), %%xmm0 \n\t" | |
737 "movaps 1024(%0, %%esi), %%xmm1 \n\t" | |
738 "addps 2048(%0, %%esi), %%xmm0 \n\t" | |
739 "addps 3072(%0, %%esi), %%xmm1 \n\t" | |
740 "addps %%xmm7, %%xmm0 \n\t" | |
741 "addps %%xmm1, %%xmm0 \n\t" | |
742 "movaps %%xmm0, (%0, %%esi) \n\t" | |
743 "addl $16, %%esi \n\t" | |
744 " jnz 1b \n\t" | |
745 :: "r" (samples+256), "m" (bias) | |
746 : "%esi" | |
747 ); | |
748 } | |
749 | |
750 static void mix5to1_SSE (sample_t * samples, sample_t bias) | |
751 { | |
752 asm volatile( | |
753 "movlps %1, %%xmm7 \n\t" | |
754 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
755 "movl $-1024, %%esi \n\t" | |
4233 | 756 ".balign 16\n\t" |
3904 | 757 "1: \n\t" |
758 "movaps (%0, %%esi), %%xmm0 \n\t" | |
759 "movaps 1024(%0, %%esi), %%xmm1 \n\t" | |
760 "addps 2048(%0, %%esi), %%xmm0 \n\t" | |
761 "addps 3072(%0, %%esi), %%xmm1 \n\t" | |
762 "addps %%xmm7, %%xmm0 \n\t" | |
763 "addps 4096(%0, %%esi), %%xmm1 \n\t" | |
764 "addps %%xmm1, %%xmm0 \n\t" | |
765 "movaps %%xmm0, (%0, %%esi) \n\t" | |
766 "addl $16, %%esi \n\t" | |
767 " jnz 1b \n\t" | |
768 :: "r" (samples+256), "m" (bias) | |
769 : "%esi" | |
770 ); | |
771 } | |
772 | |
773 static void mix3to2_SSE (sample_t * samples, sample_t bias) | |
774 { | |
775 asm volatile( | |
776 "movlps %1, %%xmm7 \n\t" | |
777 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
778 "movl $-1024, %%esi \n\t" | |
4233 | 779 ".balign 16\n\t" |
3904 | 780 "1: \n\t" |
781 "movaps 1024(%0, %%esi), %%xmm0 \n\t" | |
782 "addps %%xmm7, %%xmm0 \n\t" //common | |
783 "movaps (%0, %%esi), %%xmm1 \n\t" | |
784 "movaps 2048(%0, %%esi), %%xmm2 \n\t" | |
785 "addps %%xmm0, %%xmm1 \n\t" | |
786 "addps %%xmm0, %%xmm2 \n\t" | |
787 "movaps %%xmm1, (%0, %%esi) \n\t" | |
788 "movaps %%xmm2, 1024(%0, %%esi) \n\t" | |
789 "addl $16, %%esi \n\t" | |
790 " jnz 1b \n\t" | |
791 :: "r" (samples+256), "m" (bias) | |
792 : "%esi" | |
793 ); | |
794 } | |
795 | |
796 static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) | |
797 { | |
798 asm volatile( | |
799 "movlps %2, %%xmm7 \n\t" | |
800 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
801 "movl $-1024, %%esi \n\t" | |
4233 | 802 ".balign 16\n\t" |
3904 | 803 "1: \n\t" |
804 "movaps 1024(%1, %%esi), %%xmm0 \n\t" | |
805 "addps %%xmm7, %%xmm0 \n\t" //common | |
806 "movaps (%0, %%esi), %%xmm1 \n\t" | |
807 "movaps (%1, %%esi), %%xmm2 \n\t" | |
808 "addps %%xmm0, %%xmm1 \n\t" | |
809 "addps %%xmm0, %%xmm2 \n\t" | |
810 "movaps %%xmm1, (%0, %%esi) \n\t" | |
811 "movaps %%xmm2, (%1, %%esi) \n\t" | |
812 "addl $16, %%esi \n\t" | |
813 " jnz 1b \n\t" | |
814 :: "r" (left+256), "r" (right+256), "m" (bias) | |
815 : "%esi" | |
816 ); | |
817 } | |
818 | |
819 static void mix21toS_SSE (sample_t * samples, sample_t bias) | |
820 { | |
821 asm volatile( | |
822 "movlps %1, %%xmm7 \n\t" | |
823 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
824 "movl $-1024, %%esi \n\t" | |
4233 | 825 ".balign 16\n\t" |
3904 | 826 "1: \n\t" |
827 "movaps 2048(%0, %%esi), %%xmm0 \n\t" // surround | |
828 "movaps (%0, %%esi), %%xmm1 \n\t" | |
829 "movaps 1024(%0, %%esi), %%xmm2 \n\t" | |
830 "addps %%xmm7, %%xmm1 \n\t" | |
831 "addps %%xmm7, %%xmm2 \n\t" | |
832 "subps %%xmm0, %%xmm1 \n\t" | |
833 "addps %%xmm0, %%xmm2 \n\t" | |
834 "movaps %%xmm1, (%0, %%esi) \n\t" | |
835 "movaps %%xmm2, 1024(%0, %%esi) \n\t" | |
836 "addl $16, %%esi \n\t" | |
837 " jnz 1b \n\t" | |
838 :: "r" (samples+256), "m" (bias) | |
839 : "%esi" | |
840 ); | |
841 } | |
842 | |
843 static void mix31to2_SSE (sample_t * samples, sample_t bias) | |
844 { | |
845 asm volatile( | |
846 "movlps %1, %%xmm7 \n\t" | |
847 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
848 "movl $-1024, %%esi \n\t" | |
4233 | 849 ".balign 16\n\t" |
3904 | 850 "1: \n\t" |
851 "movaps 1024(%0, %%esi), %%xmm0 \n\t" | |
852 "addps 3072(%0, %%esi), %%xmm0 \n\t" | |
853 "addps %%xmm7, %%xmm0 \n\t" // common | |
854 "movaps (%0, %%esi), %%xmm1 \n\t" | |
855 "movaps 2048(%0, %%esi), %%xmm2 \n\t" | |
856 "addps %%xmm0, %%xmm1 \n\t" | |
857 "addps %%xmm0, %%xmm2 \n\t" | |
858 "movaps %%xmm1, (%0, %%esi) \n\t" | |
859 "movaps %%xmm2, 1024(%0, %%esi) \n\t" | |
860 "addl $16, %%esi \n\t" | |
861 " jnz 1b \n\t" | |
862 :: "r" (samples+256), "m" (bias) | |
863 : "%esi" | |
864 ); | |
865 } | |
866 | |
867 static void mix31toS_SSE (sample_t * samples, sample_t bias) | |
868 { | |
869 asm volatile( | |
870 "movlps %1, %%xmm7 \n\t" | |
871 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
872 "movl $-1024, %%esi \n\t" | |
4233 | 873 ".balign 16\n\t" |
3904 | 874 "1: \n\t" |
875 "movaps 1024(%0, %%esi), %%xmm0 \n\t" | |
876 "movaps 3072(%0, %%esi), %%xmm3 \n\t" // surround | |
877 "addps %%xmm7, %%xmm0 \n\t" // common | |
878 "movaps (%0, %%esi), %%xmm1 \n\t" | |
879 "movaps 2048(%0, %%esi), %%xmm2 \n\t" | |
880 "addps %%xmm0, %%xmm1 \n\t" | |
881 "addps %%xmm0, %%xmm2 \n\t" | |
882 "subps %%xmm3, %%xmm1 \n\t" | |
883 "addps %%xmm3, %%xmm2 \n\t" | |
884 "movaps %%xmm1, (%0, %%esi) \n\t" | |
885 "movaps %%xmm2, 1024(%0, %%esi) \n\t" | |
886 "addl $16, %%esi \n\t" | |
887 " jnz 1b \n\t" | |
888 :: "r" (samples+256), "m" (bias) | |
889 : "%esi" | |
890 ); | |
891 } | |
892 | |
893 static void mix22toS_SSE (sample_t * samples, sample_t bias) | |
894 { | |
895 asm volatile( | |
896 "movlps %1, %%xmm7 \n\t" | |
897 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
898 "movl $-1024, %%esi \n\t" | |
4233 | 899 ".balign 16\n\t" |
3904 | 900 "1: \n\t" |
901 "movaps 2048(%0, %%esi), %%xmm0 \n\t" | |
902 "addps 3072(%0, %%esi), %%xmm0 \n\t" // surround | |
903 "movaps (%0, %%esi), %%xmm1 \n\t" | |
904 "movaps 1024(%0, %%esi), %%xmm2 \n\t" | |
905 "addps %%xmm7, %%xmm1 \n\t" | |
906 "addps %%xmm7, %%xmm2 \n\t" | |
907 "subps %%xmm0, %%xmm1 \n\t" | |
908 "addps %%xmm0, %%xmm2 \n\t" | |
909 "movaps %%xmm1, (%0, %%esi) \n\t" | |
910 "movaps %%xmm2, 1024(%0, %%esi) \n\t" | |
911 "addl $16, %%esi \n\t" | |
912 " jnz 1b \n\t" | |
913 :: "r" (samples+256), "m" (bias) | |
914 : "%esi" | |
915 ); | |
916 } | |
917 | |
918 static void mix32to2_SSE (sample_t * samples, sample_t bias) | |
919 { | |
920 asm volatile( | |
921 "movlps %1, %%xmm7 \n\t" | |
922 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
923 "movl $-1024, %%esi \n\t" | |
4233 | 924 ".balign 16\n\t" |
3904 | 925 "1: \n\t" |
926 "movaps 1024(%0, %%esi), %%xmm0 \n\t" | |
927 "addps %%xmm7, %%xmm0 \n\t" // common | |
928 "movaps %%xmm0, %%xmm1 \n\t" // common | |
929 "addps (%0, %%esi), %%xmm0 \n\t" | |
930 "addps 2048(%0, %%esi), %%xmm1 \n\t" | |
931 "addps 3072(%0, %%esi), %%xmm0 \n\t" | |
932 "addps 4096(%0, %%esi), %%xmm1 \n\t" | |
933 "movaps %%xmm0, (%0, %%esi) \n\t" | |
934 "movaps %%xmm1, 1024(%0, %%esi) \n\t" | |
935 "addl $16, %%esi \n\t" | |
936 " jnz 1b \n\t" | |
937 :: "r" (samples+256), "m" (bias) | |
938 : "%esi" | |
939 ); | |
940 } | |
941 | |
942 static void mix32toS_SSE (sample_t * samples, sample_t bias) | |
943 { | |
944 asm volatile( | |
945 "movlps %1, %%xmm7 \n\t" | |
946 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
947 "movl $-1024, %%esi \n\t" | |
4233 | 948 ".balign 16\n\t" |
3904 | 949 "1: \n\t" |
950 "movaps 1024(%0, %%esi), %%xmm0 \n\t" | |
951 "movaps 3072(%0, %%esi), %%xmm2 \n\t" | |
952 "addps %%xmm7, %%xmm0 \n\t" // common | |
953 "addps 4096(%0, %%esi), %%xmm2 \n\t" // surround | |
954 "movaps (%0, %%esi), %%xmm1 \n\t" | |
955 "movaps 2048(%0, %%esi), %%xmm3 \n\t" | |
956 "subps %%xmm2, %%xmm1 \n\t" | |
957 "addps %%xmm2, %%xmm3 \n\t" | |
958 "addps %%xmm0, %%xmm1 \n\t" | |
959 "addps %%xmm0, %%xmm3 \n\t" | |
960 "movaps %%xmm1, (%0, %%esi) \n\t" | |
961 "movaps %%xmm3, 1024(%0, %%esi) \n\t" | |
962 "addl $16, %%esi \n\t" | |
963 " jnz 1b \n\t" | |
964 :: "r" (samples+256), "m" (bias) | |
965 : "%esi" | |
966 ); | |
967 } | |
968 | |
969 static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) | |
970 { | |
971 asm volatile( | |
972 "movlps %2, %%xmm7 \n\t" | |
973 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
974 "movl $-1024, %%esi \n\t" | |
4233 | 975 ".balign 16\n\t" |
3904 | 976 "1: \n\t" |
977 "movaps (%0, %%esi), %%xmm0 \n\t" | |
978 "movaps 16(%0, %%esi), %%xmm1 \n\t" | |
979 "addps 1024(%0, %%esi), %%xmm0 \n\t" | |
980 "addps 1040(%0, %%esi), %%xmm1 \n\t" | |
981 "addps %%xmm7, %%xmm0 \n\t" | |
982 "addps %%xmm7, %%xmm1 \n\t" | |
983 "movaps %%xmm0, (%1, %%esi) \n\t" | |
984 "movaps %%xmm1, 16(%1, %%esi) \n\t" | |
985 "addl $32, %%esi \n\t" | |
986 " jnz 1b \n\t" | |
987 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
988 : "%esi" | |
989 ); | |
990 } | |
991 | |
992 static void zero_MMX(sample_t * samples) | |
993 { | |
994 asm volatile( | |
995 "movl $-1024, %%esi \n\t" | |
996 "pxor %%mm0, %%mm0 \n\t" | |
4233 | 997 ".balign 16\n\t" |
3904 | 998 "1: \n\t" |
999 "movq %%mm0, (%0, %%esi) \n\t" | |
1000 "movq %%mm0, 8(%0, %%esi) \n\t" | |
1001 "movq %%mm0, 16(%0, %%esi) \n\t" | |
1002 "movq %%mm0, 24(%0, %%esi) \n\t" | |
1003 "addl $32, %%esi \n\t" | |
1004 " jnz 1b \n\t" | |
1005 "emms" | |
1006 :: "r" (samples+256) | |
1007 : "%esi" | |
1008 ); | |
1009 } | |
1010 | |
4233 | 1011 /* |
1012 I hope dest and src will be at least 8 byte aligned and size | |
1013 will devide on 8 without remain | |
1014 Note: untested and unused. | |
1015 */ | |
1016 static void copy_MMX(void *dest,const void *src,unsigned size) | |
1017 { | |
1018 unsigned i; | |
1019 size /= 64; | |
1020 for(i=0;i<size;i++) | |
1021 { | |
1022 __asm __volatile( | |
1023 "movq %0, %%mm0\n\t" | |
1024 "movq 8%0, %%mm1\n\t" | |
1025 "movq 16%0, %%mm2\n\t" | |
1026 "movq 24%0, %%mm3\n\t" | |
1027 "movq 32%0, %%mm4\n\t" | |
1028 "movq 40%0, %%mm5\n\t" | |
1029 "movq 48%0, %%mm6\n\t" | |
1030 "movq 56%0, %%mm7\n\t" | |
1031 "movq %%mm0, %1\n\t" | |
1032 "movq %%mm1, 8%1\n\t" | |
1033 "movq %%mm2, 16%1\n\t" | |
1034 "movq %%mm3, 24%1\n\t" | |
1035 "movq %%mm4, 32%1\n\t" | |
1036 "movq %%mm5, 40%1\n\t" | |
1037 "movq %%mm6, 48%1\n\t" | |
1038 "movq %%mm7, 56%1\n\t" | |
1039 : | |
1040 :"m"(src),"m"(dest)); | |
1041 } | |
1042 } | |
3904 | 1043 |
1044 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
1045 sample_t clev, sample_t slev) | |
1046 { | |
1047 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1048 | |
1049 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1050 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1051 break; | |
1052 | |
1053 case CONVERT (A52_CHANNEL, A52_MONO): | |
1054 case CONVERT (A52_STEREO, A52_MONO): | |
1055 mix_2to1_SSE: | |
1056 mix2to1_SSE (samples, samples + 256, bias); | |
1057 break; | |
1058 | |
1059 case CONVERT (A52_2F1R, A52_MONO): | |
1060 if (slev == 0) | |
1061 goto mix_2to1_SSE; | |
1062 case CONVERT (A52_3F, A52_MONO): | |
1063 mix_3to1_SSE: | |
1064 mix3to1_SSE (samples, bias); | |
1065 break; | |
1066 | |
1067 case CONVERT (A52_3F1R, A52_MONO): | |
1068 if (slev == 0) | |
1069 goto mix_3to1_SSE; | |
1070 case CONVERT (A52_2F2R, A52_MONO): | |
1071 if (slev == 0) | |
1072 goto mix_2to1_SSE; | |
1073 mix4to1_SSE (samples, bias); | |
1074 break; | |
1075 | |
1076 case CONVERT (A52_3F2R, A52_MONO): | |
1077 if (slev == 0) | |
1078 goto mix_3to1_SSE; | |
1079 mix5to1_SSE (samples, bias); | |
1080 break; | |
1081 | |
1082 case CONVERT (A52_MONO, A52_DOLBY): | |
1083 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1084 break; | |
1085 | |
1086 case CONVERT (A52_3F, A52_STEREO): | |
1087 case CONVERT (A52_3F, A52_DOLBY): | |
1088 mix_3to2_SSE: | |
1089 mix3to2_SSE (samples, bias); | |
1090 break; | |
1091 | |
1092 case CONVERT (A52_2F1R, A52_STEREO): | |
1093 if (slev == 0) | |
1094 break; | |
1095 mix21to2_SSE (samples, samples + 256, bias); | |
1096 break; | |
1097 | |
1098 case CONVERT (A52_2F1R, A52_DOLBY): | |
1099 mix21toS_SSE (samples, bias); | |
1100 break; | |
1101 | |
1102 case CONVERT (A52_3F1R, A52_STEREO): | |
1103 if (slev == 0) | |
1104 goto mix_3to2_SSE; | |
1105 mix31to2_SSE (samples, bias); | |
1106 break; | |
1107 | |
1108 case CONVERT (A52_3F1R, A52_DOLBY): | |
1109 mix31toS_SSE (samples, bias); | |
1110 break; | |
1111 | |
1112 case CONVERT (A52_2F2R, A52_STEREO): | |
1113 if (slev == 0) | |
1114 break; | |
1115 mix2to1_SSE (samples, samples + 512, bias); | |
1116 mix2to1_SSE (samples + 256, samples + 768, bias); | |
1117 break; | |
1118 | |
1119 case CONVERT (A52_2F2R, A52_DOLBY): | |
1120 mix22toS_SSE (samples, bias); | |
1121 break; | |
1122 | |
1123 case CONVERT (A52_3F2R, A52_STEREO): | |
1124 if (slev == 0) | |
1125 goto mix_3to2_SSE; | |
1126 mix32to2_SSE (samples, bias); | |
1127 break; | |
1128 | |
1129 case CONVERT (A52_3F2R, A52_DOLBY): | |
1130 mix32toS_SSE (samples, bias); | |
1131 break; | |
1132 | |
1133 case CONVERT (A52_3F1R, A52_3F): | |
1134 if (slev == 0) | |
1135 break; | |
1136 mix21to2_SSE (samples, samples + 512, bias); | |
1137 break; | |
1138 | |
1139 case CONVERT (A52_3F2R, A52_3F): | |
1140 if (slev == 0) | |
1141 break; | |
1142 mix2to1_SSE (samples, samples + 768, bias); | |
1143 mix2to1_SSE (samples + 512, samples + 1024, bias); | |
1144 break; | |
1145 | |
1146 case CONVERT (A52_3F1R, A52_2F1R): | |
1147 mix3to2_SSE (samples, bias); | |
1148 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1149 break; | |
1150 | |
1151 case CONVERT (A52_2F2R, A52_2F1R): | |
1152 mix2to1_SSE (samples + 512, samples + 768, bias); | |
1153 break; | |
1154 | |
1155 case CONVERT (A52_3F2R, A52_2F1R): | |
1156 mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1157 move2to1_SSE (samples + 768, samples + 512, bias); | |
1158 break; | |
1159 | |
1160 case CONVERT (A52_3F2R, A52_3F1R): | |
1161 mix2to1_SSE (samples + 768, samples + 1024, bias); | |
1162 break; | |
1163 | |
1164 case CONVERT (A52_2F1R, A52_2F2R): | |
1165 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1166 break; | |
1167 | |
1168 case CONVERT (A52_3F1R, A52_2F2R): | |
1169 mix3to2_SSE (samples, bias); | |
1170 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1171 break; | |
1172 | |
1173 case CONVERT (A52_3F2R, A52_2F2R): | |
1174 mix3to2_SSE (samples, bias); | |
1175 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1176 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1177 break; | |
1178 | |
1179 case CONVERT (A52_3F1R, A52_3F2R): | |
1180 memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); | |
1181 break; | |
1182 } | |
1183 } | |
1184 | |
1185 static void upmix_MMX (sample_t * samples, int acmod, int output) | |
1186 { | |
1187 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1188 | |
1189 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1190 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1191 break; | |
1192 | |
1193 case CONVERT (A52_3F2R, A52_MONO): | |
1194 zero_MMX (samples + 1024); | |
1195 case CONVERT (A52_3F1R, A52_MONO): | |
1196 case CONVERT (A52_2F2R, A52_MONO): | |
1197 zero_MMX (samples + 768); | |
1198 case CONVERT (A52_3F, A52_MONO): | |
1199 case CONVERT (A52_2F1R, A52_MONO): | |
1200 zero_MMX (samples + 512); | |
1201 case CONVERT (A52_CHANNEL, A52_MONO): | |
1202 case CONVERT (A52_STEREO, A52_MONO): | |
1203 zero_MMX (samples + 256); | |
1204 break; | |
1205 | |
1206 case CONVERT (A52_3F2R, A52_STEREO): | |
1207 case CONVERT (A52_3F2R, A52_DOLBY): | |
1208 zero_MMX (samples + 1024); | |
1209 case CONVERT (A52_3F1R, A52_STEREO): | |
1210 case CONVERT (A52_3F1R, A52_DOLBY): | |
1211 zero_MMX (samples + 768); | |
1212 case CONVERT (A52_3F, A52_STEREO): | |
1213 case CONVERT (A52_3F, A52_DOLBY): | |
1214 mix_3to2_MMX: | |
1215 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
1216 zero_MMX (samples + 256); | |
1217 break; | |
1218 | |
1219 case CONVERT (A52_2F2R, A52_STEREO): | |
1220 case CONVERT (A52_2F2R, A52_DOLBY): | |
1221 zero_MMX (samples + 768); | |
1222 case CONVERT (A52_2F1R, A52_STEREO): | |
1223 case CONVERT (A52_2F1R, A52_DOLBY): | |
1224 zero_MMX (samples + 512); | |
1225 break; | |
1226 | |
1227 case CONVERT (A52_3F2R, A52_3F): | |
1228 zero_MMX (samples + 1024); | |
1229 case CONVERT (A52_3F1R, A52_3F): | |
1230 case CONVERT (A52_2F2R, A52_2F1R): | |
1231 zero_MMX (samples + 768); | |
1232 break; | |
1233 | |
1234 case CONVERT (A52_3F2R, A52_3F1R): | |
1235 zero_MMX (samples + 1024); | |
1236 break; | |
1237 | |
1238 case CONVERT (A52_3F2R, A52_2F1R): | |
1239 zero_MMX (samples + 1024); | |
1240 case CONVERT (A52_3F1R, A52_2F1R): | |
1241 mix_31to21_MMX: | |
1242 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1243 goto mix_3to2_MMX; | |
1244 | |
1245 case CONVERT (A52_3F2R, A52_2F2R): | |
1246 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
1247 goto mix_31to21_MMX; | |
1248 } | |
1249 } | |
4233 | 1250 |
1251 static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) | |
1252 { | |
1253 asm volatile( | |
1254 "movd %2, %%mm7 \n\t" | |
1255 "punpckldq %2, %%mm7 \n\t" | |
1256 "movl $-1024, %%esi \n\t" | |
1257 ".balign 16\n\t" | |
1258 "1: \n\t" | |
1259 "movq (%0, %%esi), %%mm0 \n\t" | |
1260 "movq 8(%0, %%esi), %%mm1 \n\t" | |
1261 "movq 16(%0, %%esi), %%mm2 \n\t" | |
1262 "movq 24(%0, %%esi), %%mm3 \n\t" | |
1263 "pfadd (%1, %%esi), %%mm0 \n\t" | |
1264 "pfadd 8(%1, %%esi), %%mm1 \n\t" | |
1265 "pfadd 16(%1, %%esi), %%mm2 \n\t" | |
1266 "pfadd 24(%1, %%esi), %%mm3 \n\t" | |
1267 "pfadd %%mm7, %%mm0 \n\t" | |
1268 "pfadd %%mm7, %%mm1 \n\t" | |
1269 "pfadd %%mm7, %%mm2 \n\t" | |
1270 "pfadd %%mm7, %%mm3 \n\t" | |
1271 "movq %%mm0, (%1, %%esi) \n\t" | |
1272 "movq %%mm1, 8(%1, %%esi) \n\t" | |
1273 "movq %%mm2, 16(%1, %%esi) \n\t" | |
1274 "movq %%mm3, 24(%1, %%esi) \n\t" | |
1275 "addl $32, %%esi \n\t" | |
1276 " jnz 1b \n\t" | |
1277 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
1278 : "%esi" | |
1279 ); | |
1280 } | |
1281 | |
1282 static void mix3to1_3dnow (sample_t * samples, sample_t bias) | |
1283 { | |
1284 asm volatile( | |
1285 "movd %1, %%mm7 \n\t" | |
1286 "punpckldq %1, %%mm7 \n\t" | |
1287 "movl $-1024, %%esi \n\t" | |
1288 ".balign 16\n\t" | |
1289 "1: \n\t" | |
1290 "movq (%0, %%esi), %%mm0 \n\t" | |
1291 "movq 8(%0, %%esi), %%mm1 \n\t" | |
1292 "movq 1024(%0, %%esi), %%mm2 \n\t" | |
1293 "movq 1032(%0, %%esi), %%mm3 \n\t" | |
1294 "pfadd 2048(%0, %%esi), %%mm0 \n\t" | |
1295 "pfadd 2056(%0, %%esi), %%mm1 \n\t" | |
1296 "pfadd %%mm7, %%mm0 \n\t" | |
1297 "pfadd %%mm7, %%mm1 \n\t" | |
1298 "pfadd %%mm2, %%mm0 \n\t" | |
1299 "pfadd %%mm3, %%mm1 \n\t" | |
1300 "movq %%mm0, (%0, %%esi) \n\t" | |
1301 "movq %%mm1, 8(%0, %%esi) \n\t" | |
1302 "addl $16, %%esi \n\t" | |
1303 " jnz 1b \n\t" | |
1304 :: "r" (samples+256), "m" (bias) | |
1305 : "%esi" | |
1306 ); | |
1307 } | |
1308 | |
1309 static void mix4to1_3dnow (sample_t * samples, sample_t bias) | |
1310 { | |
1311 asm volatile( | |
1312 "movd %1, %%mm7 \n\t" | |
1313 "punpckldq %1, %%mm7 \n\t" | |
1314 "movl $-1024, %%esi \n\t" | |
1315 ".balign 16\n\t" | |
1316 "1: \n\t" | |
1317 "movq (%0, %%esi), %%mm0 \n\t" | |
1318 "movq 8(%0, %%esi), %%mm1 \n\t" | |
1319 "movq 1024(%0, %%esi), %%mm2 \n\t" | |
1320 "movq 1032(%0, %%esi), %%mm3 \n\t" | |
1321 "pfadd 2048(%0, %%esi), %%mm0 \n\t" | |
1322 "pfadd 2056(%0, %%esi), %%mm1 \n\t" | |
1323 "pfadd 3072(%0, %%esi), %%mm2 \n\t" | |
1324 "pfadd 3080(%0, %%esi), %%mm3 \n\t" | |
1325 "pfadd %%mm7, %%mm0 \n\t" | |
1326 "pfadd %%mm7, %%mm1 \n\t" | |
1327 "pfadd %%mm2, %%mm0 \n\t" | |
1328 "pfadd %%mm3, %%mm1 \n\t" | |
1329 "movq %%mm0, (%0, %%esi) \n\t" | |
1330 "movq %%mm1, 8(%0, %%esi) \n\t" | |
1331 "addl $16, %%esi \n\t" | |
1332 " jnz 1b \n\t" | |
1333 :: "r" (samples+256), "m" (bias) | |
1334 : "%esi" | |
1335 ); | |
1336 } | |
1337 | |
1338 static void mix5to1_3dnow (sample_t * samples, sample_t bias) | |
1339 { | |
1340 asm volatile( | |
1341 "movd %1, %%mm7 \n\t" | |
1342 "punpckldq %1, %%mm7 \n\t" | |
1343 "movl $-1024, %%esi \n\t" | |
1344 ".balign 16\n\t" | |
1345 "1: \n\t" | |
1346 "movq (%0, %%esi), %%mm0 \n\t" | |
1347 "movq 8(%0, %%esi), %%mm1 \n\t" | |
1348 "movq 1024(%0, %%esi), %%mm2 \n\t" | |
1349 "movq 1032(%0, %%esi), %%mm3 \n\t" | |
1350 "pfadd 2048(%0, %%esi), %%mm0 \n\t" | |
1351 "pfadd 2056(%0, %%esi), %%mm1 \n\t" | |
1352 "pfadd 3072(%0, %%esi), %%mm2 \n\t" | |
1353 "pfadd 3080(%0, %%esi), %%mm3 \n\t" | |
1354 "pfadd %%mm7, %%mm0 \n\t" | |
1355 "pfadd %%mm7, %%mm1 \n\t" | |
1356 "pfadd 4096(%0, %%esi), %%mm2 \n\t" | |
1357 "pfadd 4104(%0, %%esi), %%mm3 \n\t" | |
1358 "pfadd %%mm2, %%mm0 \n\t" | |
1359 "pfadd %%mm3, %%mm1 \n\t" | |
1360 "movq %%mm0, (%0, %%esi) \n\t" | |
1361 "movq %%mm1, 8(%0, %%esi) \n\t" | |
1362 "addl $16, %%esi \n\t" | |
1363 " jnz 1b \n\t" | |
1364 :: "r" (samples+256), "m" (bias) | |
1365 : "%esi" | |
1366 ); | |
1367 } | |
1368 | |
1369 static void mix3to2_3dnow (sample_t * samples, sample_t bias) | |
1370 { | |
1371 asm volatile( | |
1372 "movd %1, %%mm7 \n\t" | |
1373 "punpckldq %1, %%mm7 \n\t" | |
1374 "movl $-1024, %%esi \n\t" | |
1375 ".balign 16\n\t" | |
1376 "1: \n\t" | |
1377 "movq 1024(%0, %%esi), %%mm0 \n\t" | |
1378 "movq 1032(%0, %%esi), %%mm1 \n\t" | |
1379 "pfadd %%mm7, %%mm0 \n\t" //common | |
1380 "pfadd %%mm7, %%mm1 \n\t" //common | |
1381 "movq (%0, %%esi), %%mm2 \n\t" | |
1382 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1383 "movq 2048(%0, %%esi), %%mm4 \n\t" | |
1384 "movq 2056(%0, %%esi), %%mm5 \n\t" | |
1385 "pfadd %%mm0, %%mm2 \n\t" | |
5912 | 1386 "pfadd %%mm1, %%mm3 \n\t" |
4233 | 1387 "pfadd %%mm0, %%mm4 \n\t" |
5912 | 1388 "pfadd %%mm1, %%mm5 \n\t" |
4233 | 1389 "movq %%mm2, (%0, %%esi) \n\t" |
1390 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1391 "movq %%mm4, 1024(%0, %%esi) \n\t" | |
1392 "movq %%mm5, 1032(%0, %%esi) \n\t" | |
1393 "addl $16, %%esi \n\t" | |
1394 " jnz 1b \n\t" | |
1395 :: "r" (samples+256), "m" (bias) | |
1396 : "%esi" | |
1397 ); | |
1398 } | |
1399 | |
1400 static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) | |
1401 { | |
1402 asm volatile( | |
1403 "movd %2, %%mm7 \n\t" | |
1404 "punpckldq %2, %%mm7 \n\t" | |
1405 "movl $-1024, %%esi \n\t" | |
1406 ".balign 16\n\t" | |
1407 "1: \n\t" | |
1408 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
1409 "movq 1032(%1, %%esi), %%mm1 \n\t" | |
1410 "pfadd %%mm7, %%mm0 \n\t" //common | |
1411 "pfadd %%mm7, %%mm1 \n\t" //common | |
1412 "movq (%0, %%esi), %%mm2 \n\t" | |
1413 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1414 "movq (%1, %%esi), %%mm4 \n\t" | |
1415 "movq 8(%1, %%esi), %%mm5 \n\t" | |
1416 "pfadd %%mm0, %%mm2 \n\t" | |
1417 "pfadd %%mm1, %%mm3 \n\t" | |
1418 "pfadd %%mm0, %%mm4 \n\t" | |
1419 "pfadd %%mm1, %%mm5 \n\t" | |
1420 "movq %%mm2, (%0, %%esi) \n\t" | |
1421 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1422 "movq %%mm4, (%1, %%esi) \n\t" | |
1423 "movq %%mm5, 8(%1, %%esi) \n\t" | |
1424 "addl $16, %%esi \n\t" | |
1425 " jnz 1b \n\t" | |
1426 :: "r" (left+256), "r" (right+256), "m" (bias) | |
1427 : "%esi" | |
1428 ); | |
1429 } | |
1430 | |
1431 static void mix21toS_3dnow (sample_t * samples, sample_t bias) | |
1432 { | |
1433 asm volatile( | |
1434 "movd %1, %%mm7 \n\t" | |
1435 "punpckldq %1, %%mm7 \n\t" | |
1436 "movl $-1024, %%esi \n\t" | |
1437 ".balign 16\n\t" | |
1438 "1: \n\t" | |
1439 "movq 2048(%0, %%esi), %%mm0 \n\t" // surround | |
1440 "movq 2056(%0, %%esi), %%mm1 \n\t" // surround | |
1441 "movq (%0, %%esi), %%mm2 \n\t" | |
1442 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1443 "movq 1024(%0, %%esi), %%mm4 \n\t" | |
1444 "movq 1032(%0, %%esi), %%mm5 \n\t" | |
1445 "pfadd %%mm7, %%mm2 \n\t" | |
1446 "pfadd %%mm7, %%mm3 \n\t" | |
1447 "pfadd %%mm7, %%mm4 \n\t" | |
1448 "pfadd %%mm7, %%mm5 \n\t" | |
1449 "pfsub %%mm0, %%mm2 \n\t" | |
1450 "pfsub %%mm1, %%mm3 \n\t" | |
1451 "pfadd %%mm0, %%mm4 \n\t" | |
1452 "pfadd %%mm1, %%mm5 \n\t" | |
1453 "movq %%mm2, (%0, %%esi) \n\t" | |
1454 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1455 "movq %%mm4, 1024(%0, %%esi) \n\t" | |
1456 "movq %%mm5, 1032(%0, %%esi) \n\t" | |
1457 "addl $16, %%esi \n\t" | |
1458 " jnz 1b \n\t" | |
1459 :: "r" (samples+256), "m" (bias) | |
1460 : "%esi" | |
1461 ); | |
1462 } | |
1463 | |
1464 static void mix31to2_3dnow (sample_t * samples, sample_t bias) | |
1465 { | |
1466 asm volatile( | |
1467 "movd %1, %%mm7 \n\t" | |
1468 "punpckldq %1, %%mm7 \n\t" | |
1469 "movl $-1024, %%esi \n\t" | |
1470 ".balign 16\n\t" | |
1471 "1: \n\t" | |
1472 "movq 1024(%0, %%esi), %%mm0 \n\t" | |
1473 "movq 1032(%0, %%esi), %%mm1 \n\t" | |
1474 "pfadd 3072(%0, %%esi), %%mm0 \n\t" | |
1475 "pfadd 3080(%0, %%esi), %%mm1 \n\t" | |
1476 "pfadd %%mm7, %%mm0 \n\t" // common | |
1477 "pfadd %%mm7, %%mm1 \n\t" // common | |
1478 "movq (%0, %%esi), %%mm2 \n\t" | |
1479 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1480 "movq 2048(%0, %%esi), %%mm4 \n\t" | |
1481 "movq 2056(%0, %%esi), %%mm5 \n\t" | |
1482 "pfadd %%mm0, %%mm2 \n\t" | |
1483 "pfadd %%mm1, %%mm3 \n\t" | |
1484 "pfadd %%mm0, %%mm4 \n\t" | |
1485 "pfadd %%mm1, %%mm5 \n\t" | |
1486 "movq %%mm2, (%0, %%esi) \n\t" | |
1487 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1488 "movq %%mm4, 1024(%0, %%esi) \n\t" | |
1489 "movq %%mm5, 1032(%0, %%esi) \n\t" | |
1490 "addl $16, %%esi \n\t" | |
1491 " jnz 1b \n\t" | |
1492 :: "r" (samples+256), "m" (bias) | |
1493 : "%esi" | |
1494 ); | |
1495 } | |
1496 | |
1497 static void mix31toS_3dnow (sample_t * samples, sample_t bias) | |
1498 { | |
1499 asm volatile( | |
1500 "movd %1, %%mm7 \n\t" | |
1501 "punpckldq %1, %%mm7 \n\t" | |
1502 "movl $-1024, %%esi \n\t" | |
1503 ".balign 16\n\t" | |
1504 "1: \n\t" | |
1505 "movq 1024(%0, %%esi), %%mm0 \n\t" | |
1506 "movq 1032(%0, %%esi), %%mm1 \n\t" | |
1507 "pfadd %%mm7, %%mm0 \n\t" // common | |
1508 "pfadd %%mm7, %%mm1 \n\t" // common | |
1509 "movq (%0, %%esi), %%mm2 \n\t" | |
1510 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1511 "movq 2048(%0, %%esi), %%mm4 \n\t" | |
1512 "movq 2056(%0, %%esi), %%mm5 \n\t" | |
1513 "pfadd %%mm0, %%mm2 \n\t" | |
1514 "pfadd %%mm1, %%mm3 \n\t" | |
1515 "pfadd %%mm0, %%mm4 \n\t" | |
1516 "pfadd %%mm1, %%mm5 \n\t" | |
1517 "movq 3072(%0, %%esi), %%mm0 \n\t" // surround | |
1518 "movq 3080(%0, %%esi), %%mm1 \n\t" // surround | |
1519 "pfsub %%mm0, %%mm2 \n\t" | |
1520 "pfsub %%mm1, %%mm3 \n\t" | |
1521 "pfadd %%mm0, %%mm4 \n\t" | |
1522 "pfadd %%mm1, %%mm5 \n\t" | |
1523 "movq %%mm2, (%0, %%esi) \n\t" | |
1524 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1525 "movq %%mm4, 1024(%0, %%esi) \n\t" | |
1526 "movq %%mm5, 1032(%0, %%esi) \n\t" | |
1527 "addl $16, %%esi \n\t" | |
1528 " jnz 1b \n\t" | |
1529 :: "r" (samples+256), "m" (bias) | |
1530 : "%esi" | |
1531 ); | |
1532 } | |
1533 | |
1534 static void mix22toS_3dnow (sample_t * samples, sample_t bias) | |
1535 { | |
1536 asm volatile( | |
1537 "movd %1, %%mm7 \n\t" | |
1538 "punpckldq %1, %%mm7 \n\t" | |
1539 "movl $-1024, %%esi \n\t" | |
1540 ".balign 16\n\t" | |
1541 "1: \n\t" | |
1542 "movq 2048(%0, %%esi), %%mm0 \n\t" | |
1543 "movq 2056(%0, %%esi), %%mm1 \n\t" | |
1544 "pfadd 3072(%0, %%esi), %%mm0 \n\t" // surround | |
1545 "pfadd 3080(%0, %%esi), %%mm1 \n\t" // surround | |
1546 "movq (%0, %%esi), %%mm2 \n\t" | |
1547 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1548 "movq 1024(%0, %%esi), %%mm4 \n\t" | |
1549 "movq 1032(%0, %%esi), %%mm5 \n\t" | |
1550 "pfadd %%mm7, %%mm2 \n\t" | |
1551 "pfadd %%mm7, %%mm3 \n\t" | |
1552 "pfadd %%mm7, %%mm4 \n\t" | |
1553 "pfadd %%mm7, %%mm5 \n\t" | |
1554 "pfsub %%mm0, %%mm2 \n\t" | |
1555 "pfsub %%mm1, %%mm3 \n\t" | |
1556 "pfadd %%mm0, %%mm4 \n\t" | |
1557 "pfadd %%mm1, %%mm5 \n\t" | |
1558 "movq %%mm2, (%0, %%esi) \n\t" | |
1559 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1560 "movq %%mm4, 1024(%0, %%esi) \n\t" | |
1561 "movq %%mm5, 1032(%0, %%esi) \n\t" | |
1562 "addl $16, %%esi \n\t" | |
1563 " jnz 1b \n\t" | |
1564 :: "r" (samples+256), "m" (bias) | |
1565 : "%esi" | |
1566 ); | |
1567 } | |
1568 | |
1569 static void mix32to2_3dnow (sample_t * samples, sample_t bias) | |
1570 { | |
1571 asm volatile( | |
1572 "movd %1, %%mm7 \n\t" | |
1573 "punpckldq %1, %%mm7 \n\t" | |
1574 "movl $-1024, %%esi \n\t" | |
1575 ".balign 16\n\t" | |
1576 "1: \n\t" | |
1577 "movq 1024(%0, %%esi), %%mm0 \n\t" | |
1578 "movq 1032(%0, %%esi), %%mm1 \n\t" | |
1579 "pfadd %%mm7, %%mm0 \n\t" // common | |
1580 "pfadd %%mm7, %%mm1 \n\t" // common | |
1581 "movq %%mm0, %%mm2 \n\t" // common | |
1582 "movq %%mm1, %%mm3 \n\t" // common | |
1583 "pfadd (%0, %%esi), %%mm0 \n\t" | |
1584 "pfadd 8(%0, %%esi), %%mm1 \n\t" | |
1585 "pfadd 2048(%0, %%esi), %%mm2 \n\t" | |
1586 "pfadd 2056(%0, %%esi), %%mm3 \n\t" | |
1587 "pfadd 3072(%0, %%esi), %%mm0 \n\t" | |
1588 "pfadd 3080(%0, %%esi), %%mm1 \n\t" | |
1589 "pfadd 4096(%0, %%esi), %%mm2 \n\t" | |
1590 "pfadd 4104(%0, %%esi), %%mm3 \n\t" | |
1591 "movq %%mm0, (%0, %%esi) \n\t" | |
1592 "movq %%mm1, 8(%0, %%esi) \n\t" | |
1593 "movq %%mm2, 1024(%0, %%esi) \n\t" | |
1594 "movq %%mm3, 1032(%0, %%esi) \n\t" | |
1595 "addl $16, %%esi \n\t" | |
1596 " jnz 1b \n\t" | |
1597 :: "r" (samples+256), "m" (bias) | |
1598 : "%esi" | |
1599 ); | |
1600 } | |
1601 | |
1602 /* todo: should be optimized better */ | |
1603 static void mix32toS_3dnow (sample_t * samples, sample_t bias) | |
1604 { | |
1605 asm volatile( | |
1606 "movl $-1024, %%esi \n\t" | |
1607 ".balign 16\n\t" | |
1608 "1: \n\t" | |
1609 "movd %1, %%mm7 \n\t" | |
1610 "punpckldq %1, %%mm7 \n\t" | |
1611 "movq 1024(%0, %%esi), %%mm0 \n\t" | |
1612 "movq 1032(%0, %%esi), %%mm1 \n\t" | |
1613 "movq 3072(%0, %%esi), %%mm4 \n\t" | |
1614 "movq 3080(%0, %%esi), %%mm5 \n\t" | |
1615 "pfadd %%mm7, %%mm0 \n\t" // common | |
1616 "pfadd %%mm7, %%mm1 \n\t" // common | |
1617 "pfadd 4096(%0, %%esi), %%mm4 \n\t" // surround | |
1618 "pfadd 4104(%0, %%esi), %%mm5 \n\t" // surround | |
1619 "movq (%0, %%esi), %%mm2 \n\t" | |
1620 "movq 8(%0, %%esi), %%mm3 \n\t" | |
1621 "movq 2048(%0, %%esi), %%mm6 \n\t" | |
1622 "movq 2056(%0, %%esi), %%mm7 \n\t" | |
1623 "pfsub %%mm4, %%mm2 \n\t" | |
1624 "pfsub %%mm5, %%mm3 \n\t" | |
1625 "pfadd %%mm4, %%mm6 \n\t" | |
1626 "pfadd %%mm5, %%mm7 \n\t" | |
1627 "pfadd %%mm0, %%mm2 \n\t" | |
1628 "pfadd %%mm1, %%mm3 \n\t" | |
1629 "pfadd %%mm0, %%mm6 \n\t" | |
1630 "pfadd %%mm1, %%mm7 \n\t" | |
1631 "movq %%mm2, (%0, %%esi) \n\t" | |
1632 "movq %%mm3, 8(%0, %%esi) \n\t" | |
1633 "movq %%mm6, 1024(%0, %%esi) \n\t" | |
1634 "movq %%mm7, 1032(%0, %%esi) \n\t" | |
1635 "addl $16, %%esi \n\t" | |
1636 " jnz 1b \n\t" | |
1637 :: "r" (samples+256), "m" (bias) | |
1638 : "%esi" | |
1639 ); | |
1640 } | |
1641 | |
1642 static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) | |
1643 { | |
1644 asm volatile( | |
1645 "movd %2, %%mm7 \n\t" | |
1646 "punpckldq %2, %%mm7 \n\t" | |
1647 "movl $-1024, %%esi \n\t" | |
1648 ".balign 16\n\t" | |
1649 "1: \n\t" | |
1650 "movq (%0, %%esi), %%mm0 \n\t" | |
1651 "movq 8(%0, %%esi), %%mm1 \n\t" | |
1652 "movq 16(%0, %%esi), %%mm2 \n\t" | |
1653 "movq 24(%0, %%esi), %%mm3 \n\t" | |
1654 "pfadd 1024(%0, %%esi), %%mm0 \n\t" | |
1655 "pfadd 1032(%0, %%esi), %%mm1 \n\t" | |
1656 "pfadd 1040(%0, %%esi), %%mm2 \n\t" | |
1657 "pfadd 1048(%0, %%esi), %%mm3 \n\t" | |
1658 "pfadd %%mm7, %%mm0 \n\t" | |
1659 "pfadd %%mm7, %%mm1 \n\t" | |
1660 "pfadd %%mm7, %%mm2 \n\t" | |
1661 "pfadd %%mm7, %%mm3 \n\t" | |
1662 "movq %%mm0, (%1, %%esi) \n\t" | |
1663 "movq %%mm1, 8(%1, %%esi) \n\t" | |
1664 "movq %%mm2, 16(%1, %%esi) \n\t" | |
1665 "movq %%mm3, 24(%1, %%esi) \n\t" | |
1666 "addl $32, %%esi \n\t" | |
1667 " jnz 1b \n\t" | |
1668 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
1669 : "%esi" | |
1670 ); | |
1671 } | |
1672 | |
1673 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, | |
1674 sample_t clev, sample_t slev) | |
1675 { | |
1676 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1677 | |
1678 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1679 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1680 break; | |
1681 | |
1682 case CONVERT (A52_CHANNEL, A52_MONO): | |
1683 case CONVERT (A52_STEREO, A52_MONO): | |
1684 mix_2to1_3dnow: | |
1685 mix2to1_3dnow (samples, samples + 256, bias); | |
1686 break; | |
1687 | |
1688 case CONVERT (A52_2F1R, A52_MONO): | |
1689 if (slev == 0) | |
1690 goto mix_2to1_3dnow; | |
1691 case CONVERT (A52_3F, A52_MONO): | |
1692 mix_3to1_3dnow: | |
1693 mix3to1_3dnow (samples, bias); | |
1694 break; | |
1695 | |
1696 case CONVERT (A52_3F1R, A52_MONO): | |
1697 if (slev == 0) | |
1698 goto mix_3to1_3dnow; | |
1699 case CONVERT (A52_2F2R, A52_MONO): | |
1700 if (slev == 0) | |
1701 goto mix_2to1_3dnow; | |
1702 mix4to1_3dnow (samples, bias); | |
1703 break; | |
1704 | |
1705 case CONVERT (A52_3F2R, A52_MONO): | |
1706 if (slev == 0) | |
1707 goto mix_3to1_3dnow; | |
1708 mix5to1_3dnow (samples, bias); | |
1709 break; | |
1710 | |
1711 case CONVERT (A52_MONO, A52_DOLBY): | |
1712 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1713 break; | |
1714 | |
1715 case CONVERT (A52_3F, A52_STEREO): | |
1716 case CONVERT (A52_3F, A52_DOLBY): | |
1717 mix_3to2_3dnow: | |
1718 mix3to2_3dnow (samples, bias); | |
1719 break; | |
1720 | |
1721 case CONVERT (A52_2F1R, A52_STEREO): | |
1722 if (slev == 0) | |
1723 break; | |
1724 mix21to2_3dnow (samples, samples + 256, bias); | |
1725 break; | |
1726 | |
1727 case CONVERT (A52_2F1R, A52_DOLBY): | |
1728 mix21toS_3dnow (samples, bias); | |
1729 break; | |
1730 | |
1731 case CONVERT (A52_3F1R, A52_STEREO): | |
1732 if (slev == 0) | |
1733 goto mix_3to2_3dnow; | |
1734 mix31to2_3dnow (samples, bias); | |
1735 break; | |
1736 | |
1737 case CONVERT (A52_3F1R, A52_DOLBY): | |
1738 mix31toS_3dnow (samples, bias); | |
1739 break; | |
1740 | |
1741 case CONVERT (A52_2F2R, A52_STEREO): | |
1742 if (slev == 0) | |
1743 break; | |
1744 mix2to1_3dnow (samples, samples + 512, bias); | |
1745 mix2to1_3dnow (samples + 256, samples + 768, bias); | |
1746 break; | |
1747 | |
1748 case CONVERT (A52_2F2R, A52_DOLBY): | |
1749 mix22toS_3dnow (samples, bias); | |
1750 break; | |
1751 | |
1752 case CONVERT (A52_3F2R, A52_STEREO): | |
1753 if (slev == 0) | |
1754 goto mix_3to2_3dnow; | |
1755 mix32to2_3dnow (samples, bias); | |
1756 break; | |
1757 | |
1758 case CONVERT (A52_3F2R, A52_DOLBY): | |
1759 mix32toS_3dnow (samples, bias); | |
1760 break; | |
1761 | |
1762 case CONVERT (A52_3F1R, A52_3F): | |
1763 if (slev == 0) | |
1764 break; | |
1765 mix21to2_3dnow (samples, samples + 512, bias); | |
1766 break; | |
1767 | |
1768 case CONVERT (A52_3F2R, A52_3F): | |
1769 if (slev == 0) | |
1770 break; | |
1771 mix2to1_3dnow (samples, samples + 768, bias); | |
1772 mix2to1_3dnow (samples + 512, samples + 1024, bias); | |
1773 break; | |
1774 | |
1775 case CONVERT (A52_3F1R, A52_2F1R): | |
1776 mix3to2_3dnow (samples, bias); | |
1777 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1778 break; | |
1779 | |
1780 case CONVERT (A52_2F2R, A52_2F1R): | |
1781 mix2to1_3dnow (samples + 512, samples + 768, bias); | |
1782 break; | |
1783 | |
1784 case CONVERT (A52_3F2R, A52_2F1R): | |
1785 mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1786 move2to1_3dnow (samples + 768, samples + 512, bias); | |
1787 break; | |
1788 | |
1789 case CONVERT (A52_3F2R, A52_3F1R): | |
1790 mix2to1_3dnow (samples + 768, samples + 1024, bias); | |
1791 break; | |
1792 | |
1793 case CONVERT (A52_2F1R, A52_2F2R): | |
1794 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1795 break; | |
1796 | |
1797 case CONVERT (A52_3F1R, A52_2F2R): | |
1798 mix3to2_3dnow (samples, bias); | |
1799 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1800 break; | |
1801 | |
1802 case CONVERT (A52_3F2R, A52_2F2R): | |
1803 mix3to2_3dnow (samples, bias); | |
1804 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1805 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1806 break; | |
1807 | |
1808 case CONVERT (A52_3F1R, A52_3F2R): | |
1809 memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t)); | |
1810 break; | |
1811 } | |
1812 __asm __volatile("femms":::"memory"); | |
1813 } | |
1814 | |
3904 | 1815 #endif //ARCH_X86 |