Mercurial > mplayer.hg
annotate liba52/downmix.c @ 19045:9b94255a50b4
grammar and consistency fixes
author | diego |
---|---|
date | Wed, 12 Jul 2006 23:11:34 +0000 |
parents | 0783dd397f74 |
children | 6334c14b38eb |
rev | line source |
---|---|
3394 | 1 /* |
2 * downmix.c | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> |
3394 | 4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
5 * | |
6 * This file is part of a52dec, a free ATSC A-52 stream decoder. | |
7 * See http://liba52.sourceforge.net/ for updates. | |
8 * | |
14991
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12137
diff
changeset
|
9 * Modified for use with MPlayer, changes contained in liba52_changes.diff. |
18783 | 10 * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/ |
14991
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12137
diff
changeset
|
11 * $Id$ |
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12137
diff
changeset
|
12 * |
3394 | 13 * a52dec is free software; you can redistribute it and/or modify |
14 * it under the terms of the GNU General Public License as published by | |
15 * the Free Software Foundation; either version 2 of the License, or | |
16 * (at your option) any later version. | |
17 * | |
18 * a52dec is distributed in the hope that it will be useful, | |
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 * GNU General Public License for more details. | |
22 * | |
23 * You should have received a copy of the GNU General Public License | |
24 * along with this program; if not, write to the Free Software | |
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
3625 | 26 * |
27 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | |
3394 | 28 */ |
29 | |
30 #include "config.h" | |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
31 #include "asmalign.h" |
3394 | 32 |
33 #include <string.h> | |
34 #include <inttypes.h> | |
35 | |
36 #include "a52.h" | |
37 #include "a52_internal.h" | |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
38 #include "mm_accel.h" |
3394 | 39 |
40 #define CONVERT(acmod,output) (((output) << 3) + (acmod)) | |
41 | |
3904 | 42 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
43 void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias, |
3904 | 44 sample_t clev, sample_t slev)= NULL; |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
45 void (*a52_upmix)(sample_t * samples, int acmod, int output)= NULL; |
3904 | 46 |
47 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
48 sample_t clev, sample_t slev); | |
4233 | 49 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, |
50 sample_t clev, sample_t slev); | |
3904 | 51 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
52 sample_t clev, sample_t slev); | |
53 static void upmix_MMX (sample_t * samples, int acmod, int output); | |
54 static void upmix_C (sample_t * samples, int acmod, int output); | |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
55 |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
56 void downmix_accel_init(uint32_t mm_accel) |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
57 { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
58 a52_upmix= upmix_C; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
59 a52_downmix= downmix_C; |
16173 | 60 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
61 if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
62 if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
63 if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
64 #endif |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
65 } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
66 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
67 int a52_downmix_init (int input, int flags, sample_t * level, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
68 sample_t clev, sample_t slev) |
3394 | 69 { |
70 static uint8_t table[11][8] = { | |
71 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
72 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, | |
73 {A52_MONO, A52_MONO, A52_MONO, A52_MONO, | |
74 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
75 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
76 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, | |
77 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
78 A52_STEREO, A52_3F, A52_STEREO, A52_3F}, | |
79 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
80 A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R}, | |
81 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
82 A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R}, | |
83 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
84 A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R}, | |
85 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
86 A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R}, | |
87 {A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO, | |
88 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
89 {A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO, | |
90 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
91 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY, | |
92 A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY} | |
93 }; | |
94 int output; | |
95 | |
96 output = flags & A52_CHANNEL_MASK; | |
97 if (output > A52_DOLBY) | |
98 return -1; | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
99 |
3394 | 100 output = table[output][input & 7]; |
101 | |
102 if ((output == A52_STEREO) && | |
103 ((input == A52_DOLBY) || ((input == A52_3F) && (clev == LEVEL_3DB)))) | |
104 output = A52_DOLBY; | |
105 | |
106 if (flags & A52_ADJUST_LEVEL) | |
107 switch (CONVERT (input & 7, output)) { | |
108 | |
109 case CONVERT (A52_3F, A52_MONO): | |
110 *level *= LEVEL_3DB / (1 + clev); | |
111 break; | |
112 | |
113 case CONVERT (A52_STEREO, A52_MONO): | |
114 case CONVERT (A52_2F2R, A52_2F1R): | |
115 case CONVERT (A52_3F2R, A52_3F1R): | |
116 level_3db: | |
117 *level *= LEVEL_3DB; | |
118 break; | |
119 | |
120 case CONVERT (A52_3F2R, A52_2F1R): | |
121 if (clev < LEVEL_PLUS3DB - 1) | |
122 goto level_3db; | |
123 /* break thru */ | |
124 case CONVERT (A52_3F, A52_STEREO): | |
125 case CONVERT (A52_3F1R, A52_2F1R): | |
126 case CONVERT (A52_3F1R, A52_2F2R): | |
127 case CONVERT (A52_3F2R, A52_2F2R): | |
128 *level /= 1 + clev; | |
129 break; | |
130 | |
131 case CONVERT (A52_2F1R, A52_MONO): | |
132 *level *= LEVEL_PLUS3DB / (2 + slev); | |
133 break; | |
134 | |
135 case CONVERT (A52_2F1R, A52_STEREO): | |
136 case CONVERT (A52_3F1R, A52_3F): | |
137 *level /= 1 + slev * LEVEL_3DB; | |
138 break; | |
139 | |
140 case CONVERT (A52_3F1R, A52_MONO): | |
141 *level *= LEVEL_3DB / (1 + clev + 0.5 * slev); | |
142 break; | |
143 | |
144 case CONVERT (A52_3F1R, A52_STEREO): | |
145 *level /= 1 + clev + slev * LEVEL_3DB; | |
146 break; | |
147 | |
148 case CONVERT (A52_2F2R, A52_MONO): | |
149 *level *= LEVEL_3DB / (1 + slev); | |
150 break; | |
151 | |
152 case CONVERT (A52_2F2R, A52_STEREO): | |
153 case CONVERT (A52_3F2R, A52_3F): | |
154 *level /= 1 + slev; | |
155 break; | |
156 | |
157 case CONVERT (A52_3F2R, A52_MONO): | |
158 *level *= LEVEL_3DB / (1 + clev + slev); | |
159 break; | |
160 | |
161 case CONVERT (A52_3F2R, A52_STEREO): | |
162 *level /= 1 + clev + slev; | |
163 break; | |
164 | |
165 case CONVERT (A52_MONO, A52_DOLBY): | |
166 *level *= LEVEL_PLUS3DB; | |
167 break; | |
168 | |
169 case CONVERT (A52_3F, A52_DOLBY): | |
170 case CONVERT (A52_2F1R, A52_DOLBY): | |
171 *level *= 1 / (1 + LEVEL_3DB); | |
172 break; | |
173 | |
174 case CONVERT (A52_3F1R, A52_DOLBY): | |
175 case CONVERT (A52_2F2R, A52_DOLBY): | |
176 *level *= 1 / (1 + 2 * LEVEL_3DB); | |
177 break; | |
178 | |
179 case CONVERT (A52_3F2R, A52_DOLBY): | |
180 *level *= 1 / (1 + 3 * LEVEL_3DB); | |
181 break; | |
182 } | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
183 |
3394 | 184 return output; |
185 } | |
186 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
187 int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
188 sample_t clev, sample_t slev) |
3394 | 189 { |
190 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
191 | |
192 case CONVERT (A52_CHANNEL, A52_CHANNEL): | |
193 case CONVERT (A52_MONO, A52_MONO): | |
194 case CONVERT (A52_STEREO, A52_STEREO): | |
195 case CONVERT (A52_3F, A52_3F): | |
196 case CONVERT (A52_2F1R, A52_2F1R): | |
197 case CONVERT (A52_3F1R, A52_3F1R): | |
198 case CONVERT (A52_2F2R, A52_2F2R): | |
199 case CONVERT (A52_3F2R, A52_3F2R): | |
200 case CONVERT (A52_STEREO, A52_DOLBY): | |
201 coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level; | |
202 return 0; | |
203 | |
204 case CONVERT (A52_CHANNEL, A52_MONO): | |
205 coeff[0] = coeff[1] = level * LEVEL_6DB; | |
206 return 3; | |
207 | |
208 case CONVERT (A52_STEREO, A52_MONO): | |
209 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
210 return 3; | |
211 | |
212 case CONVERT (A52_3F, A52_MONO): | |
213 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
214 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
215 return 7; | |
216 | |
217 case CONVERT (A52_2F1R, A52_MONO): | |
218 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
219 coeff[2] = level * slev * LEVEL_3DB; | |
220 return 7; | |
221 | |
222 case CONVERT (A52_2F2R, A52_MONO): | |
223 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
224 coeff[2] = coeff[3] = level * slev * LEVEL_3DB; | |
225 return 15; | |
226 | |
227 case CONVERT (A52_3F1R, A52_MONO): | |
228 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
229 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
230 coeff[3] = level * slev * LEVEL_3DB; | |
231 return 15; | |
232 | |
233 case CONVERT (A52_3F2R, A52_MONO): | |
234 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
235 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
236 coeff[3] = coeff[4] = level * slev * LEVEL_3DB; | |
237 return 31; | |
238 | |
239 case CONVERT (A52_MONO, A52_DOLBY): | |
240 coeff[0] = level * LEVEL_3DB; | |
241 return 0; | |
242 | |
243 case CONVERT (A52_3F, A52_DOLBY): | |
244 clev = LEVEL_3DB; | |
245 case CONVERT (A52_3F, A52_STEREO): | |
246 case CONVERT (A52_3F1R, A52_2F1R): | |
247 case CONVERT (A52_3F2R, A52_2F2R): | |
248 coeff[0] = coeff[2] = coeff[3] = coeff[4] = level; | |
249 coeff[1] = level * clev; | |
250 return 7; | |
251 | |
252 case CONVERT (A52_2F1R, A52_DOLBY): | |
253 slev = 1; | |
254 case CONVERT (A52_2F1R, A52_STEREO): | |
255 coeff[0] = coeff[1] = level; | |
256 coeff[2] = level * slev * LEVEL_3DB; | |
257 return 7; | |
258 | |
259 case CONVERT (A52_3F1R, A52_DOLBY): | |
260 clev = LEVEL_3DB; | |
261 slev = 1; | |
262 case CONVERT (A52_3F1R, A52_STEREO): | |
263 coeff[0] = coeff[2] = level; | |
264 coeff[1] = level * clev; | |
265 coeff[3] = level * slev * LEVEL_3DB; | |
266 return 15; | |
267 | |
268 case CONVERT (A52_2F2R, A52_DOLBY): | |
269 slev = LEVEL_3DB; | |
270 case CONVERT (A52_2F2R, A52_STEREO): | |
271 coeff[0] = coeff[1] = level; | |
272 coeff[2] = coeff[3] = level * slev; | |
273 return 15; | |
274 | |
275 case CONVERT (A52_3F2R, A52_DOLBY): | |
276 clev = LEVEL_3DB; | |
277 case CONVERT (A52_3F2R, A52_2F1R): | |
278 slev = LEVEL_3DB; | |
279 case CONVERT (A52_3F2R, A52_STEREO): | |
280 coeff[0] = coeff[2] = level; | |
281 coeff[1] = level * clev; | |
282 coeff[3] = coeff[4] = level * slev; | |
283 return 31; | |
284 | |
285 case CONVERT (A52_3F1R, A52_3F): | |
286 coeff[0] = coeff[1] = coeff[2] = level; | |
287 coeff[3] = level * slev * LEVEL_3DB; | |
288 return 13; | |
289 | |
290 case CONVERT (A52_3F2R, A52_3F): | |
291 coeff[0] = coeff[1] = coeff[2] = level; | |
292 coeff[3] = coeff[4] = level * slev; | |
293 return 29; | |
294 | |
295 case CONVERT (A52_2F2R, A52_2F1R): | |
296 coeff[0] = coeff[1] = level; | |
297 coeff[2] = coeff[3] = level * LEVEL_3DB; | |
298 return 12; | |
299 | |
300 case CONVERT (A52_3F2R, A52_3F1R): | |
301 coeff[0] = coeff[1] = coeff[2] = level; | |
302 coeff[3] = coeff[4] = level * LEVEL_3DB; | |
303 return 24; | |
304 | |
305 case CONVERT (A52_2F1R, A52_2F2R): | |
306 coeff[0] = coeff[1] = level; | |
307 coeff[2] = level * LEVEL_3DB; | |
308 return 0; | |
309 | |
310 case CONVERT (A52_3F1R, A52_2F2R): | |
311 coeff[0] = coeff[2] = level; | |
312 coeff[1] = level * clev; | |
313 coeff[3] = level * LEVEL_3DB; | |
314 return 7; | |
315 | |
316 case CONVERT (A52_3F1R, A52_3F2R): | |
317 coeff[0] = coeff[1] = coeff[2] = level; | |
318 coeff[3] = level * LEVEL_3DB; | |
319 return 0; | |
320 | |
321 case CONVERT (A52_CHANNEL, A52_CHANNEL1): | |
322 coeff[0] = level; | |
323 coeff[1] = 0; | |
324 return 0; | |
325 | |
326 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
327 coeff[0] = 0; | |
328 coeff[1] = level; | |
329 return 0; | |
330 } | |
331 | |
332 return -1; /* NOTREACHED */ | |
333 } | |
334 | |
335 static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias) | |
336 { | |
337 int i; | |
338 | |
339 for (i = 0; i < 256; i++) | |
340 dest[i] += src[i] + bias; | |
341 } | |
342 | |
343 static void mix3to1 (sample_t * samples, sample_t bias) | |
344 { | |
345 int i; | |
346 | |
347 for (i = 0; i < 256; i++) | |
348 samples[i] += samples[i + 256] + samples[i + 512] + bias; | |
349 } | |
350 | |
351 static void mix4to1 (sample_t * samples, sample_t bias) | |
352 { | |
353 int i; | |
354 | |
355 for (i = 0; i < 256; i++) | |
356 samples[i] += (samples[i + 256] + samples[i + 512] + | |
357 samples[i + 768] + bias); | |
358 } | |
359 | |
360 static void mix5to1 (sample_t * samples, sample_t bias) | |
361 { | |
362 int i; | |
363 | |
364 for (i = 0; i < 256; i++) | |
365 samples[i] += (samples[i + 256] + samples[i + 512] + | |
366 samples[i + 768] + samples[i + 1024] + bias); | |
367 } | |
368 | |
369 static void mix3to2 (sample_t * samples, sample_t bias) | |
370 { | |
371 int i; | |
372 sample_t common; | |
373 | |
374 for (i = 0; i < 256; i++) { | |
375 common = samples[i + 256] + bias; | |
376 samples[i] += common; | |
377 samples[i + 256] = samples[i + 512] + common; | |
378 } | |
379 } | |
380 | |
381 static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) | |
382 { | |
383 int i; | |
384 sample_t common; | |
385 | |
386 for (i = 0; i < 256; i++) { | |
387 common = right[i + 256] + bias; | |
388 left[i] += common; | |
389 right[i] += common; | |
390 } | |
391 } | |
392 | |
393 static void mix21toS (sample_t * samples, sample_t bias) | |
394 { | |
395 int i; | |
396 sample_t surround; | |
397 | |
398 for (i = 0; i < 256; i++) { | |
399 surround = samples[i + 512]; | |
400 samples[i] += bias - surround; | |
401 samples[i + 256] += bias + surround; | |
402 } | |
403 } | |
404 | |
405 static void mix31to2 (sample_t * samples, sample_t bias) | |
406 { | |
407 int i; | |
408 sample_t common; | |
409 | |
410 for (i = 0; i < 256; i++) { | |
411 common = samples[i + 256] + samples[i + 768] + bias; | |
412 samples[i] += common; | |
413 samples[i + 256] = samples[i + 512] + common; | |
414 } | |
415 } | |
416 | |
417 static void mix31toS (sample_t * samples, sample_t bias) | |
418 { | |
419 int i; | |
420 sample_t common, surround; | |
421 | |
422 for (i = 0; i < 256; i++) { | |
423 common = samples[i + 256] + bias; | |
424 surround = samples[i + 768]; | |
425 samples[i] += common - surround; | |
426 samples[i + 256] = samples[i + 512] + common + surround; | |
427 } | |
428 } | |
429 | |
430 static void mix22toS (sample_t * samples, sample_t bias) | |
431 { | |
432 int i; | |
433 sample_t surround; | |
434 | |
435 for (i = 0; i < 256; i++) { | |
436 surround = samples[i + 512] + samples[i + 768]; | |
437 samples[i] += bias - surround; | |
438 samples[i + 256] += bias + surround; | |
439 } | |
440 } | |
441 | |
442 static void mix32to2 (sample_t * samples, sample_t bias) | |
443 { | |
444 int i; | |
445 sample_t common; | |
446 | |
447 for (i = 0; i < 256; i++) { | |
448 common = samples[i + 256] + bias; | |
449 samples[i] += common + samples[i + 768]; | |
450 samples[i + 256] = common + samples[i + 512] + samples[i + 1024]; | |
451 } | |
452 } | |
453 | |
454 static void mix32toS (sample_t * samples, sample_t bias) | |
455 { | |
456 int i; | |
457 sample_t common, surround; | |
458 | |
459 for (i = 0; i < 256; i++) { | |
460 common = samples[i + 256] + bias; | |
461 surround = samples[i + 768] + samples[i + 1024]; | |
462 samples[i] += common - surround; | |
463 samples[i + 256] = samples[i + 512] + common + surround; | |
464 } | |
465 } | |
466 | |
467 static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) | |
468 { | |
469 int i; | |
470 | |
471 for (i = 0; i < 256; i++) | |
472 dest[i] = src[i] + src[i + 256] + bias; | |
473 } | |
474 | |
475 static void zero (sample_t * samples) | |
476 { | |
477 int i; | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
478 |
3394 | 479 for (i = 0; i < 256; i++) |
480 samples[i] = 0; | |
481 } | |
482 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
483 void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
484 sample_t clev, sample_t slev) |
3394 | 485 { |
486 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
487 | |
488 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
489 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
490 break; | |
491 | |
492 case CONVERT (A52_CHANNEL, A52_MONO): | |
493 case CONVERT (A52_STEREO, A52_MONO): | |
494 mix_2to1: | |
495 mix2to1 (samples, samples + 256, bias); | |
496 break; | |
497 | |
498 case CONVERT (A52_2F1R, A52_MONO): | |
499 if (slev == 0) | |
500 goto mix_2to1; | |
501 case CONVERT (A52_3F, A52_MONO): | |
502 mix_3to1: | |
503 mix3to1 (samples, bias); | |
504 break; | |
505 | |
506 case CONVERT (A52_3F1R, A52_MONO): | |
507 if (slev == 0) | |
508 goto mix_3to1; | |
509 case CONVERT (A52_2F2R, A52_MONO): | |
510 if (slev == 0) | |
511 goto mix_2to1; | |
512 mix4to1 (samples, bias); | |
513 break; | |
514 | |
515 case CONVERT (A52_3F2R, A52_MONO): | |
516 if (slev == 0) | |
517 goto mix_3to1; | |
518 mix5to1 (samples, bias); | |
519 break; | |
520 | |
521 case CONVERT (A52_MONO, A52_DOLBY): | |
522 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
523 break; | |
524 | |
525 case CONVERT (A52_3F, A52_STEREO): | |
526 case CONVERT (A52_3F, A52_DOLBY): | |
527 mix_3to2: | |
528 mix3to2 (samples, bias); | |
529 break; | |
530 | |
531 case CONVERT (A52_2F1R, A52_STEREO): | |
532 if (slev == 0) | |
533 break; | |
534 mix21to2 (samples, samples + 256, bias); | |
535 break; | |
536 | |
537 case CONVERT (A52_2F1R, A52_DOLBY): | |
538 mix21toS (samples, bias); | |
539 break; | |
540 | |
541 case CONVERT (A52_3F1R, A52_STEREO): | |
542 if (slev == 0) | |
543 goto mix_3to2; | |
544 mix31to2 (samples, bias); | |
545 break; | |
546 | |
547 case CONVERT (A52_3F1R, A52_DOLBY): | |
548 mix31toS (samples, bias); | |
549 break; | |
550 | |
551 case CONVERT (A52_2F2R, A52_STEREO): | |
552 if (slev == 0) | |
553 break; | |
554 mix2to1 (samples, samples + 512, bias); | |
555 mix2to1 (samples + 256, samples + 768, bias); | |
556 break; | |
557 | |
558 case CONVERT (A52_2F2R, A52_DOLBY): | |
559 mix22toS (samples, bias); | |
560 break; | |
561 | |
562 case CONVERT (A52_3F2R, A52_STEREO): | |
563 if (slev == 0) | |
564 goto mix_3to2; | |
565 mix32to2 (samples, bias); | |
566 break; | |
567 | |
568 case CONVERT (A52_3F2R, A52_DOLBY): | |
569 mix32toS (samples, bias); | |
570 break; | |
571 | |
572 case CONVERT (A52_3F1R, A52_3F): | |
573 if (slev == 0) | |
574 break; | |
575 mix21to2 (samples, samples + 512, bias); | |
576 break; | |
577 | |
578 case CONVERT (A52_3F2R, A52_3F): | |
579 if (slev == 0) | |
580 break; | |
581 mix2to1 (samples, samples + 768, bias); | |
582 mix2to1 (samples + 512, samples + 1024, bias); | |
583 break; | |
584 | |
585 case CONVERT (A52_3F1R, A52_2F1R): | |
586 mix3to2 (samples, bias); | |
587 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
588 break; | |
589 | |
590 case CONVERT (A52_2F2R, A52_2F1R): | |
591 mix2to1 (samples + 512, samples + 768, bias); | |
592 break; | |
593 | |
594 case CONVERT (A52_3F2R, A52_2F1R): | |
3678 | 595 mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) |
3394 | 596 move2to1 (samples + 768, samples + 512, bias); |
597 break; | |
598 | |
599 case CONVERT (A52_3F2R, A52_3F1R): | |
600 mix2to1 (samples + 768, samples + 1024, bias); | |
601 break; | |
602 | |
603 case CONVERT (A52_2F1R, A52_2F2R): | |
604 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
605 break; | |
606 | |
607 case CONVERT (A52_3F1R, A52_2F2R): | |
608 mix3to2 (samples, bias); | |
609 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
610 break; | |
611 | |
612 case CONVERT (A52_3F2R, A52_2F2R): | |
613 mix3to2 (samples, bias); | |
614 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
615 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
616 break; | |
617 | |
618 case CONVERT (A52_3F1R, A52_3F2R): | |
12137 | 619 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); |
3394 | 620 break; |
621 } | |
622 } | |
623 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
624 void upmix_C (sample_t * samples, int acmod, int output) |
3394 | 625 { |
626 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
627 | |
628 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
629 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
630 break; | |
631 | |
632 case CONVERT (A52_3F2R, A52_MONO): | |
633 zero (samples + 1024); | |
634 case CONVERT (A52_3F1R, A52_MONO): | |
635 case CONVERT (A52_2F2R, A52_MONO): | |
636 zero (samples + 768); | |
637 case CONVERT (A52_3F, A52_MONO): | |
638 case CONVERT (A52_2F1R, A52_MONO): | |
639 zero (samples + 512); | |
640 case CONVERT (A52_CHANNEL, A52_MONO): | |
641 case CONVERT (A52_STEREO, A52_MONO): | |
642 zero (samples + 256); | |
643 break; | |
644 | |
645 case CONVERT (A52_3F2R, A52_STEREO): | |
646 case CONVERT (A52_3F2R, A52_DOLBY): | |
647 zero (samples + 1024); | |
648 case CONVERT (A52_3F1R, A52_STEREO): | |
649 case CONVERT (A52_3F1R, A52_DOLBY): | |
650 zero (samples + 768); | |
651 case CONVERT (A52_3F, A52_STEREO): | |
652 case CONVERT (A52_3F, A52_DOLBY): | |
653 mix_3to2: | |
654 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
655 zero (samples + 256); | |
656 break; | |
657 | |
658 case CONVERT (A52_2F2R, A52_STEREO): | |
659 case CONVERT (A52_2F2R, A52_DOLBY): | |
660 zero (samples + 768); | |
661 case CONVERT (A52_2F1R, A52_STEREO): | |
662 case CONVERT (A52_2F1R, A52_DOLBY): | |
663 zero (samples + 512); | |
664 break; | |
665 | |
666 case CONVERT (A52_3F2R, A52_3F): | |
667 zero (samples + 1024); | |
668 case CONVERT (A52_3F1R, A52_3F): | |
669 case CONVERT (A52_2F2R, A52_2F1R): | |
670 zero (samples + 768); | |
671 break; | |
672 | |
673 case CONVERT (A52_3F2R, A52_3F1R): | |
674 zero (samples + 1024); | |
675 break; | |
676 | |
677 case CONVERT (A52_3F2R, A52_2F1R): | |
678 zero (samples + 1024); | |
679 case CONVERT (A52_3F1R, A52_2F1R): | |
680 mix_31to21: | |
681 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
682 goto mix_3to2; | |
683 | |
684 case CONVERT (A52_3F2R, A52_2F2R): | |
685 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
686 goto mix_31to21; | |
687 } | |
688 } | |
3904 | 689 |
16173 | 690 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
3904 | 691 static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) |
692 { | |
693 asm volatile( | |
694 "movlps %2, %%xmm7 \n\t" | |
695 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 696 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
697 ASMALIGN16 |
3904 | 698 "1: \n\t" |
16173 | 699 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
700 "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" | |
701 "addps (%1, %%"REG_S"), %%xmm0 \n\t" | |
702 "addps 16(%1, %%"REG_S"), %%xmm1\n\t" | |
3904 | 703 "addps %%xmm7, %%xmm0 \n\t" |
704 "addps %%xmm7, %%xmm1 \n\t" | |
16173 | 705 "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
706 "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" | |
707 "add $32, %%"REG_S" \n\t" | |
3904 | 708 " jnz 1b \n\t" |
709 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 710 : "%"REG_S |
3904 | 711 ); |
712 } | |
713 | |
714 static void mix3to1_SSE (sample_t * samples, sample_t bias) | |
715 { | |
716 asm volatile( | |
717 "movlps %1, %%xmm7 \n\t" | |
718 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 719 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
720 ASMALIGN16 |
3904 | 721 "1: \n\t" |
16173 | 722 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
723 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" | |
724 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" | |
3904 | 725 "addps %%xmm7, %%xmm1 \n\t" |
726 "addps %%xmm1, %%xmm0 \n\t" | |
16173 | 727 "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
728 "add $16, %%"REG_S" \n\t" | |
3904 | 729 " jnz 1b \n\t" |
730 :: "r" (samples+256), "m" (bias) | |
16173 | 731 : "%"REG_S |
3904 | 732 ); |
733 } | |
734 | |
735 static void mix4to1_SSE (sample_t * samples, sample_t bias) | |
736 { | |
737 asm volatile( | |
738 "movlps %1, %%xmm7 \n\t" | |
739 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 740 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
741 ASMALIGN16 |
3904 | 742 "1: \n\t" |
16173 | 743 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
744 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" | |
745 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" | |
746 "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" | |
3904 | 747 "addps %%xmm7, %%xmm0 \n\t" |
748 "addps %%xmm1, %%xmm0 \n\t" | |
16173 | 749 "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
750 "add $16, %%"REG_S" \n\t" | |
3904 | 751 " jnz 1b \n\t" |
752 :: "r" (samples+256), "m" (bias) | |
16173 | 753 : "%"REG_S |
3904 | 754 ); |
755 } | |
756 | |
757 static void mix5to1_SSE (sample_t * samples, sample_t bias) | |
758 { | |
759 asm volatile( | |
760 "movlps %1, %%xmm7 \n\t" | |
761 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 762 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
763 ASMALIGN16 |
3904 | 764 "1: \n\t" |
16173 | 765 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
766 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" | |
767 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" | |
768 "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" | |
3904 | 769 "addps %%xmm7, %%xmm0 \n\t" |
16173 | 770 "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" |
3904 | 771 "addps %%xmm1, %%xmm0 \n\t" |
16173 | 772 "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
773 "add $16, %%"REG_S" \n\t" | |
3904 | 774 " jnz 1b \n\t" |
775 :: "r" (samples+256), "m" (bias) | |
16173 | 776 : "%"REG_S |
3904 | 777 ); |
778 } | |
779 | |
780 static void mix3to2_SSE (sample_t * samples, sample_t bias) | |
781 { | |
782 asm volatile( | |
783 "movlps %1, %%xmm7 \n\t" | |
784 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 785 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
786 ASMALIGN16 |
3904 | 787 "1: \n\t" |
16173 | 788 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
3904 | 789 "addps %%xmm7, %%xmm0 \n\t" //common |
16173 | 790 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
791 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 792 "addps %%xmm0, %%xmm1 \n\t" |
793 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 794 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
795 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
796 "add $16, %%"REG_S" \n\t" | |
3904 | 797 " jnz 1b \n\t" |
798 :: "r" (samples+256), "m" (bias) | |
16173 | 799 : "%"REG_S |
3904 | 800 ); |
801 } | |
802 | |
803 static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) | |
804 { | |
805 asm volatile( | |
806 "movlps %2, %%xmm7 \n\t" | |
807 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 808 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
809 ASMALIGN16 |
3904 | 810 "1: \n\t" |
16173 | 811 "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" |
3904 | 812 "addps %%xmm7, %%xmm0 \n\t" //common |
16173 | 813 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
814 "movaps (%1, %%"REG_S"), %%xmm2 \n\t" | |
3904 | 815 "addps %%xmm0, %%xmm1 \n\t" |
816 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 817 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
818 "movaps %%xmm2, (%1, %%"REG_S") \n\t" | |
819 "add $16, %%"REG_S" \n\t" | |
3904 | 820 " jnz 1b \n\t" |
821 :: "r" (left+256), "r" (right+256), "m" (bias) | |
16173 | 822 : "%"REG_S |
3904 | 823 ); |
824 } | |
825 | |
826 static void mix21toS_SSE (sample_t * samples, sample_t bias) | |
827 { | |
828 asm volatile( | |
829 "movlps %1, %%xmm7 \n\t" | |
830 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 831 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
832 ASMALIGN16 |
3904 | 833 "1: \n\t" |
16173 | 834 "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround |
835 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" | |
836 "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 837 "addps %%xmm7, %%xmm1 \n\t" |
838 "addps %%xmm7, %%xmm2 \n\t" | |
839 "subps %%xmm0, %%xmm1 \n\t" | |
840 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 841 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
842 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
843 "add $16, %%"REG_S" \n\t" | |
3904 | 844 " jnz 1b \n\t" |
845 :: "r" (samples+256), "m" (bias) | |
16173 | 846 : "%"REG_S |
3904 | 847 ); |
848 } | |
849 | |
850 static void mix31to2_SSE (sample_t * samples, sample_t bias) | |
851 { | |
852 asm volatile( | |
853 "movlps %1, %%xmm7 \n\t" | |
854 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 855 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
856 ASMALIGN16 |
3904 | 857 "1: \n\t" |
16173 | 858 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
859 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" | |
3904 | 860 "addps %%xmm7, %%xmm0 \n\t" // common |
16173 | 861 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
862 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 863 "addps %%xmm0, %%xmm1 \n\t" |
864 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 865 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
866 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
867 "add $16, %%"REG_S" \n\t" | |
3904 | 868 " jnz 1b \n\t" |
869 :: "r" (samples+256), "m" (bias) | |
16173 | 870 : "%"REG_S |
3904 | 871 ); |
872 } | |
873 | |
874 static void mix31toS_SSE (sample_t * samples, sample_t bias) | |
875 { | |
876 asm volatile( | |
877 "movlps %1, %%xmm7 \n\t" | |
878 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 879 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
880 ASMALIGN16 |
3904 | 881 "1: \n\t" |
16173 | 882 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
883 "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround | |
3904 | 884 "addps %%xmm7, %%xmm0 \n\t" // common |
16173 | 885 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
886 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 887 "addps %%xmm0, %%xmm1 \n\t" |
888 "addps %%xmm0, %%xmm2 \n\t" | |
889 "subps %%xmm3, %%xmm1 \n\t" | |
890 "addps %%xmm3, %%xmm2 \n\t" | |
16173 | 891 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
892 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
893 "add $16, %%"REG_S" \n\t" | |
3904 | 894 " jnz 1b \n\t" |
895 :: "r" (samples+256), "m" (bias) | |
16173 | 896 : "%"REG_S |
3904 | 897 ); |
898 } | |
899 | |
900 static void mix22toS_SSE (sample_t * samples, sample_t bias) | |
901 { | |
902 asm volatile( | |
903 "movlps %1, %%xmm7 \n\t" | |
904 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 905 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
906 ASMALIGN16 |
3904 | 907 "1: \n\t" |
16173 | 908 "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" |
909 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround | |
910 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" | |
911 "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 912 "addps %%xmm7, %%xmm1 \n\t" |
913 "addps %%xmm7, %%xmm2 \n\t" | |
914 "subps %%xmm0, %%xmm1 \n\t" | |
915 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 916 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
917 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
918 "add $16, %%"REG_S" \n\t" | |
3904 | 919 " jnz 1b \n\t" |
920 :: "r" (samples+256), "m" (bias) | |
16173 | 921 : "%"REG_S |
3904 | 922 ); |
923 } | |
924 | |
925 static void mix32to2_SSE (sample_t * samples, sample_t bias) | |
926 { | |
927 asm volatile( | |
928 "movlps %1, %%xmm7 \n\t" | |
929 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 930 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
931 ASMALIGN16 |
3904 | 932 "1: \n\t" |
16173 | 933 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
3904 | 934 "addps %%xmm7, %%xmm0 \n\t" // common |
935 "movaps %%xmm0, %%xmm1 \n\t" // common | |
16173 | 936 "addps (%0, %%"REG_S"), %%xmm0 \n\t" |
937 "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" | |
938 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" | |
939 "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" | |
940 "movaps %%xmm0, (%0, %%"REG_S") \n\t" | |
941 "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" | |
942 "add $16, %%"REG_S" \n\t" | |
3904 | 943 " jnz 1b \n\t" |
944 :: "r" (samples+256), "m" (bias) | |
16173 | 945 : "%"REG_S |
3904 | 946 ); |
947 } | |
948 | |
949 static void mix32toS_SSE (sample_t * samples, sample_t bias) | |
950 { | |
951 asm volatile( | |
952 "movlps %1, %%xmm7 \n\t" | |
953 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 954 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
955 ASMALIGN16 |
3904 | 956 "1: \n\t" |
16173 | 957 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
958 "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 959 "addps %%xmm7, %%xmm0 \n\t" // common |
16173 | 960 "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround |
961 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" | |
962 "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" | |
3904 | 963 "subps %%xmm2, %%xmm1 \n\t" |
964 "addps %%xmm2, %%xmm3 \n\t" | |
965 "addps %%xmm0, %%xmm1 \n\t" | |
966 "addps %%xmm0, %%xmm3 \n\t" | |
16173 | 967 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
968 "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" | |
969 "add $16, %%"REG_S" \n\t" | |
3904 | 970 " jnz 1b \n\t" |
971 :: "r" (samples+256), "m" (bias) | |
16173 | 972 : "%"REG_S |
3904 | 973 ); |
974 } | |
975 | |
976 static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) | |
977 { | |
978 asm volatile( | |
979 "movlps %2, %%xmm7 \n\t" | |
980 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 981 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
982 ASMALIGN16 |
3904 | 983 "1: \n\t" |
16173 | 984 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
985 "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" | |
986 "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" | |
987 "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" | |
3904 | 988 "addps %%xmm7, %%xmm0 \n\t" |
989 "addps %%xmm7, %%xmm1 \n\t" | |
16173 | 990 "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
991 "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" | |
992 "add $32, %%"REG_S" \n\t" | |
3904 | 993 " jnz 1b \n\t" |
994 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 995 : "%"REG_S |
3904 | 996 ); |
997 } | |
998 | |
999 static void zero_MMX(sample_t * samples) | |
1000 { | |
1001 asm volatile( | |
16173 | 1002 "mov $-1024, %%"REG_S" \n\t" |
3904 | 1003 "pxor %%mm0, %%mm0 \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1004 ASMALIGN16 |
3904 | 1005 "1: \n\t" |
16173 | 1006 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1007 "movq %%mm0, 8(%0, %%"REG_S") \n\t" | |
1008 "movq %%mm0, 16(%0, %%"REG_S") \n\t" | |
1009 "movq %%mm0, 24(%0, %%"REG_S") \n\t" | |
1010 "add $32, %%"REG_S" \n\t" | |
3904 | 1011 " jnz 1b \n\t" |
1012 "emms" | |
1013 :: "r" (samples+256) | |
16173 | 1014 : "%"REG_S |
3904 | 1015 ); |
1016 } | |
1017 | |
4233 | 1018 /* |
1019 I hope dest and src will be at least 8 byte aligned and size | |
1020 will devide on 8 without remain | |
1021 Note: untested and unused. | |
1022 */ | |
1023 static void copy_MMX(void *dest,const void *src,unsigned size) | |
1024 { | |
1025 unsigned i; | |
1026 size /= 64; | |
1027 for(i=0;i<size;i++) | |
1028 { | |
1029 __asm __volatile( | |
1030 "movq %0, %%mm0\n\t" | |
1031 "movq 8%0, %%mm1\n\t" | |
1032 "movq 16%0, %%mm2\n\t" | |
1033 "movq 24%0, %%mm3\n\t" | |
1034 "movq 32%0, %%mm4\n\t" | |
1035 "movq 40%0, %%mm5\n\t" | |
1036 "movq 48%0, %%mm6\n\t" | |
1037 "movq 56%0, %%mm7\n\t" | |
1038 "movq %%mm0, %1\n\t" | |
1039 "movq %%mm1, 8%1\n\t" | |
1040 "movq %%mm2, 16%1\n\t" | |
1041 "movq %%mm3, 24%1\n\t" | |
1042 "movq %%mm4, 32%1\n\t" | |
1043 "movq %%mm5, 40%1\n\t" | |
1044 "movq %%mm6, 48%1\n\t" | |
1045 "movq %%mm7, 56%1\n\t" | |
1046 : | |
1047 :"m"(src),"m"(dest)); | |
1048 } | |
1049 } | |
3904 | 1050 |
1051 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
1052 sample_t clev, sample_t slev) | |
1053 { | |
1054 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1055 | |
1056 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1057 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1058 break; | |
1059 | |
1060 case CONVERT (A52_CHANNEL, A52_MONO): | |
1061 case CONVERT (A52_STEREO, A52_MONO): | |
1062 mix_2to1_SSE: | |
1063 mix2to1_SSE (samples, samples + 256, bias); | |
1064 break; | |
1065 | |
1066 case CONVERT (A52_2F1R, A52_MONO): | |
1067 if (slev == 0) | |
1068 goto mix_2to1_SSE; | |
1069 case CONVERT (A52_3F, A52_MONO): | |
1070 mix_3to1_SSE: | |
1071 mix3to1_SSE (samples, bias); | |
1072 break; | |
1073 | |
1074 case CONVERT (A52_3F1R, A52_MONO): | |
1075 if (slev == 0) | |
1076 goto mix_3to1_SSE; | |
1077 case CONVERT (A52_2F2R, A52_MONO): | |
1078 if (slev == 0) | |
1079 goto mix_2to1_SSE; | |
1080 mix4to1_SSE (samples, bias); | |
1081 break; | |
1082 | |
1083 case CONVERT (A52_3F2R, A52_MONO): | |
1084 if (slev == 0) | |
1085 goto mix_3to1_SSE; | |
1086 mix5to1_SSE (samples, bias); | |
1087 break; | |
1088 | |
1089 case CONVERT (A52_MONO, A52_DOLBY): | |
1090 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1091 break; | |
1092 | |
1093 case CONVERT (A52_3F, A52_STEREO): | |
1094 case CONVERT (A52_3F, A52_DOLBY): | |
1095 mix_3to2_SSE: | |
1096 mix3to2_SSE (samples, bias); | |
1097 break; | |
1098 | |
1099 case CONVERT (A52_2F1R, A52_STEREO): | |
1100 if (slev == 0) | |
1101 break; | |
1102 mix21to2_SSE (samples, samples + 256, bias); | |
1103 break; | |
1104 | |
1105 case CONVERT (A52_2F1R, A52_DOLBY): | |
1106 mix21toS_SSE (samples, bias); | |
1107 break; | |
1108 | |
1109 case CONVERT (A52_3F1R, A52_STEREO): | |
1110 if (slev == 0) | |
1111 goto mix_3to2_SSE; | |
1112 mix31to2_SSE (samples, bias); | |
1113 break; | |
1114 | |
1115 case CONVERT (A52_3F1R, A52_DOLBY): | |
1116 mix31toS_SSE (samples, bias); | |
1117 break; | |
1118 | |
1119 case CONVERT (A52_2F2R, A52_STEREO): | |
1120 if (slev == 0) | |
1121 break; | |
1122 mix2to1_SSE (samples, samples + 512, bias); | |
1123 mix2to1_SSE (samples + 256, samples + 768, bias); | |
1124 break; | |
1125 | |
1126 case CONVERT (A52_2F2R, A52_DOLBY): | |
1127 mix22toS_SSE (samples, bias); | |
1128 break; | |
1129 | |
1130 case CONVERT (A52_3F2R, A52_STEREO): | |
1131 if (slev == 0) | |
1132 goto mix_3to2_SSE; | |
1133 mix32to2_SSE (samples, bias); | |
1134 break; | |
1135 | |
1136 case CONVERT (A52_3F2R, A52_DOLBY): | |
1137 mix32toS_SSE (samples, bias); | |
1138 break; | |
1139 | |
1140 case CONVERT (A52_3F1R, A52_3F): | |
1141 if (slev == 0) | |
1142 break; | |
1143 mix21to2_SSE (samples, samples + 512, bias); | |
1144 break; | |
1145 | |
1146 case CONVERT (A52_3F2R, A52_3F): | |
1147 if (slev == 0) | |
1148 break; | |
1149 mix2to1_SSE (samples, samples + 768, bias); | |
1150 mix2to1_SSE (samples + 512, samples + 1024, bias); | |
1151 break; | |
1152 | |
1153 case CONVERT (A52_3F1R, A52_2F1R): | |
1154 mix3to2_SSE (samples, bias); | |
1155 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1156 break; | |
1157 | |
1158 case CONVERT (A52_2F2R, A52_2F1R): | |
1159 mix2to1_SSE (samples + 512, samples + 768, bias); | |
1160 break; | |
1161 | |
1162 case CONVERT (A52_3F2R, A52_2F1R): | |
1163 mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1164 move2to1_SSE (samples + 768, samples + 512, bias); | |
1165 break; | |
1166 | |
1167 case CONVERT (A52_3F2R, A52_3F1R): | |
1168 mix2to1_SSE (samples + 768, samples + 1024, bias); | |
1169 break; | |
1170 | |
1171 case CONVERT (A52_2F1R, A52_2F2R): | |
1172 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1173 break; | |
1174 | |
1175 case CONVERT (A52_3F1R, A52_2F2R): | |
1176 mix3to2_SSE (samples, bias); | |
1177 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1178 break; | |
1179 | |
1180 case CONVERT (A52_3F2R, A52_2F2R): | |
1181 mix3to2_SSE (samples, bias); | |
1182 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1183 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1184 break; | |
1185 | |
1186 case CONVERT (A52_3F1R, A52_3F2R): | |
12137 | 1187 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); |
3904 | 1188 break; |
1189 } | |
1190 } | |
1191 | |
1192 static void upmix_MMX (sample_t * samples, int acmod, int output) | |
1193 { | |
1194 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1195 | |
1196 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1197 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1198 break; | |
1199 | |
1200 case CONVERT (A52_3F2R, A52_MONO): | |
1201 zero_MMX (samples + 1024); | |
1202 case CONVERT (A52_3F1R, A52_MONO): | |
1203 case CONVERT (A52_2F2R, A52_MONO): | |
1204 zero_MMX (samples + 768); | |
1205 case CONVERT (A52_3F, A52_MONO): | |
1206 case CONVERT (A52_2F1R, A52_MONO): | |
1207 zero_MMX (samples + 512); | |
1208 case CONVERT (A52_CHANNEL, A52_MONO): | |
1209 case CONVERT (A52_STEREO, A52_MONO): | |
1210 zero_MMX (samples + 256); | |
1211 break; | |
1212 | |
1213 case CONVERT (A52_3F2R, A52_STEREO): | |
1214 case CONVERT (A52_3F2R, A52_DOLBY): | |
1215 zero_MMX (samples + 1024); | |
1216 case CONVERT (A52_3F1R, A52_STEREO): | |
1217 case CONVERT (A52_3F1R, A52_DOLBY): | |
1218 zero_MMX (samples + 768); | |
1219 case CONVERT (A52_3F, A52_STEREO): | |
1220 case CONVERT (A52_3F, A52_DOLBY): | |
1221 mix_3to2_MMX: | |
1222 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
1223 zero_MMX (samples + 256); | |
1224 break; | |
1225 | |
1226 case CONVERT (A52_2F2R, A52_STEREO): | |
1227 case CONVERT (A52_2F2R, A52_DOLBY): | |
1228 zero_MMX (samples + 768); | |
1229 case CONVERT (A52_2F1R, A52_STEREO): | |
1230 case CONVERT (A52_2F1R, A52_DOLBY): | |
1231 zero_MMX (samples + 512); | |
1232 break; | |
1233 | |
1234 case CONVERT (A52_3F2R, A52_3F): | |
1235 zero_MMX (samples + 1024); | |
1236 case CONVERT (A52_3F1R, A52_3F): | |
1237 case CONVERT (A52_2F2R, A52_2F1R): | |
1238 zero_MMX (samples + 768); | |
1239 break; | |
1240 | |
1241 case CONVERT (A52_3F2R, A52_3F1R): | |
1242 zero_MMX (samples + 1024); | |
1243 break; | |
1244 | |
1245 case CONVERT (A52_3F2R, A52_2F1R): | |
1246 zero_MMX (samples + 1024); | |
1247 case CONVERT (A52_3F1R, A52_2F1R): | |
1248 mix_31to21_MMX: | |
1249 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1250 goto mix_3to2_MMX; | |
1251 | |
1252 case CONVERT (A52_3F2R, A52_2F2R): | |
1253 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
1254 goto mix_31to21_MMX; | |
1255 } | |
1256 } | |
4233 | 1257 |
1258 static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) | |
1259 { | |
1260 asm volatile( | |
1261 "movd %2, %%mm7 \n\t" | |
1262 "punpckldq %2, %%mm7 \n\t" | |
16173 | 1263 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1264 ASMALIGN16 |
4233 | 1265 "1: \n\t" |
16173 | 1266 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1267 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1268 "movq 16(%0, %%"REG_S"), %%mm2 \n\t" | |
1269 "movq 24(%0, %%"REG_S"), %%mm3 \n\t" | |
1270 "pfadd (%1, %%"REG_S"), %%mm0 \n\t" | |
1271 "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" | |
1272 "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" | |
1273 "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" | |
4233 | 1274 "pfadd %%mm7, %%mm0 \n\t" |
1275 "pfadd %%mm7, %%mm1 \n\t" | |
1276 "pfadd %%mm7, %%mm2 \n\t" | |
1277 "pfadd %%mm7, %%mm3 \n\t" | |
16173 | 1278 "movq %%mm0, (%1, %%"REG_S") \n\t" |
1279 "movq %%mm1, 8(%1, %%"REG_S") \n\t" | |
1280 "movq %%mm2, 16(%1, %%"REG_S") \n\t" | |
1281 "movq %%mm3, 24(%1, %%"REG_S") \n\t" | |
1282 "add $32, %%"REG_S" \n\t" | |
4233 | 1283 " jnz 1b \n\t" |
1284 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 1285 : "%"REG_S |
4233 | 1286 ); |
1287 } | |
1288 | |
1289 static void mix3to1_3dnow (sample_t * samples, sample_t bias) | |
1290 { | |
1291 asm volatile( | |
1292 "movd %1, %%mm7 \n\t" | |
1293 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1294 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1295 ASMALIGN16 |
4233 | 1296 "1: \n\t" |
16173 | 1297 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1298 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1299 "movq 1024(%0, %%"REG_S"), %%mm2\n\t" | |
1300 "movq 1032(%0, %%"REG_S"), %%mm3\n\t" | |
1301 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" | |
1302 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1303 "pfadd %%mm7, %%mm0 \n\t" |
1304 "pfadd %%mm7, %%mm1 \n\t" | |
1305 "pfadd %%mm2, %%mm0 \n\t" | |
1306 "pfadd %%mm3, %%mm1 \n\t" | |
16173 | 1307 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1308 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1309 "add $16, %%"REG_S" \n\t" | |
4233 | 1310 " jnz 1b \n\t" |
1311 :: "r" (samples+256), "m" (bias) | |
16173 | 1312 : "%"REG_S |
4233 | 1313 ); |
1314 } | |
1315 | |
1316 static void mix4to1_3dnow (sample_t * samples, sample_t bias) | |
1317 { | |
1318 asm volatile( | |
1319 "movd %1, %%mm7 \n\t" | |
1320 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1321 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1322 ASMALIGN16 |
4233 | 1323 "1: \n\t" |
16173 | 1324 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1325 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1326 "movq 1024(%0, %%"REG_S"), %%mm2\n\t" | |
1327 "movq 1032(%0, %%"REG_S"), %%mm3\n\t" | |
1328 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" | |
1329 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" | |
1330 "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" | |
1331 "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1332 "pfadd %%mm7, %%mm0 \n\t" |
1333 "pfadd %%mm7, %%mm1 \n\t" | |
1334 "pfadd %%mm2, %%mm0 \n\t" | |
1335 "pfadd %%mm3, %%mm1 \n\t" | |
16173 | 1336 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1337 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1338 "add $16, %%"REG_S" \n\t" | |
4233 | 1339 " jnz 1b \n\t" |
1340 :: "r" (samples+256), "m" (bias) | |
16173 | 1341 : "%"REG_S |
4233 | 1342 ); |
1343 } | |
1344 | |
1345 static void mix5to1_3dnow (sample_t * samples, sample_t bias) | |
1346 { | |
1347 asm volatile( | |
1348 "movd %1, %%mm7 \n\t" | |
1349 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1350 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1351 ASMALIGN16 |
4233 | 1352 "1: \n\t" |
16173 | 1353 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1354 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1355 "movq 1024(%0, %%"REG_S"), %%mm2\n\t" | |
1356 "movq 1032(%0, %%"REG_S"), %%mm3\n\t" | |
1357 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" | |
1358 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" | |
1359 "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" | |
1360 "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1361 "pfadd %%mm7, %%mm0 \n\t" |
1362 "pfadd %%mm7, %%mm1 \n\t" | |
16173 | 1363 "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" |
1364 "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1365 "pfadd %%mm2, %%mm0 \n\t" |
1366 "pfadd %%mm3, %%mm1 \n\t" | |
16173 | 1367 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1368 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1369 "add $16, %%"REG_S" \n\t" | |
4233 | 1370 " jnz 1b \n\t" |
1371 :: "r" (samples+256), "m" (bias) | |
16173 | 1372 : "%"REG_S |
4233 | 1373 ); |
1374 } | |
1375 | |
1376 static void mix3to2_3dnow (sample_t * samples, sample_t bias) | |
1377 { | |
1378 asm volatile( | |
1379 "movd %1, %%mm7 \n\t" | |
1380 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1381 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1382 ASMALIGN16 |
4233 | 1383 "1: \n\t" |
16173 | 1384 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1385 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1386 "pfadd %%mm7, %%mm0 \n\t" //common |
1387 "pfadd %%mm7, %%mm1 \n\t" //common | |
16173 | 1388 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1389 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1390 "movq 2048(%0, %%"REG_S"), %%mm4\n\t" | |
1391 "movq 2056(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1392 "pfadd %%mm0, %%mm2 \n\t" |
5912 | 1393 "pfadd %%mm1, %%mm3 \n\t" |
4233 | 1394 "pfadd %%mm0, %%mm4 \n\t" |
5912 | 1395 "pfadd %%mm1, %%mm5 \n\t" |
16173 | 1396 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1397 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1398 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1399 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1400 "add $16, %%"REG_S" \n\t" | |
4233 | 1401 " jnz 1b \n\t" |
1402 :: "r" (samples+256), "m" (bias) | |
16173 | 1403 : "%"REG_S |
4233 | 1404 ); |
1405 } | |
1406 | |
1407 static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) | |
1408 { | |
1409 asm volatile( | |
1410 "movd %2, %%mm7 \n\t" | |
1411 "punpckldq %2, %%mm7 \n\t" | |
16173 | 1412 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1413 ASMALIGN16 |
4233 | 1414 "1: \n\t" |
16173 | 1415 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
1416 "movq 1032(%1, %%"REG_S"), %%mm1\n\t" | |
4233 | 1417 "pfadd %%mm7, %%mm0 \n\t" //common |
1418 "pfadd %%mm7, %%mm1 \n\t" //common | |
16173 | 1419 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1420 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1421 "movq (%1, %%"REG_S"), %%mm4 \n\t" | |
1422 "movq 8(%1, %%"REG_S"), %%mm5 \n\t" | |
4233 | 1423 "pfadd %%mm0, %%mm2 \n\t" |
1424 "pfadd %%mm1, %%mm3 \n\t" | |
1425 "pfadd %%mm0, %%mm4 \n\t" | |
1426 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1427 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1428 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1429 "movq %%mm4, (%1, %%"REG_S") \n\t" | |
1430 "movq %%mm5, 8(%1, %%"REG_S") \n\t" | |
1431 "add $16, %%"REG_S" \n\t" | |
4233 | 1432 " jnz 1b \n\t" |
1433 :: "r" (left+256), "r" (right+256), "m" (bias) | |
16173 | 1434 : "%"REG_S |
4233 | 1435 ); |
1436 } | |
1437 | |
1438 static void mix21toS_3dnow (sample_t * samples, sample_t bias) | |
1439 { | |
1440 asm volatile( | |
1441 "movd %1, %%mm7 \n\t" | |
1442 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1443 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1444 ASMALIGN16 |
4233 | 1445 "1: \n\t" |
16173 | 1446 "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround |
1447 "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround | |
1448 "movq (%0, %%"REG_S"), %%mm2 \n\t" | |
1449 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1450 "movq 1024(%0, %%"REG_S"), %%mm4\n\t" | |
1451 "movq 1032(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1452 "pfadd %%mm7, %%mm2 \n\t" |
1453 "pfadd %%mm7, %%mm3 \n\t" | |
1454 "pfadd %%mm7, %%mm4 \n\t" | |
1455 "pfadd %%mm7, %%mm5 \n\t" | |
1456 "pfsub %%mm0, %%mm2 \n\t" | |
1457 "pfsub %%mm1, %%mm3 \n\t" | |
1458 "pfadd %%mm0, %%mm4 \n\t" | |
1459 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1460 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1461 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1462 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1463 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1464 "add $16, %%"REG_S" \n\t" | |
4233 | 1465 " jnz 1b \n\t" |
1466 :: "r" (samples+256), "m" (bias) | |
16173 | 1467 : "%"REG_S |
4233 | 1468 ); |
1469 } | |
1470 | |
1471 static void mix31to2_3dnow (sample_t * samples, sample_t bias) | |
1472 { | |
1473 asm volatile( | |
1474 "movd %1, %%mm7 \n\t" | |
1475 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1476 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1477 ASMALIGN16 |
4233 | 1478 "1: \n\t" |
16173 | 1479 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1480 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
1481 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" | |
1482 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1483 "pfadd %%mm7, %%mm0 \n\t" // common |
1484 "pfadd %%mm7, %%mm1 \n\t" // common | |
16173 | 1485 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1486 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1487 "movq 2048(%0, %%"REG_S"), %%mm4\n\t" | |
1488 "movq 2056(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1489 "pfadd %%mm0, %%mm2 \n\t" |
1490 "pfadd %%mm1, %%mm3 \n\t" | |
1491 "pfadd %%mm0, %%mm4 \n\t" | |
1492 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1493 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1494 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1495 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1496 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1497 "add $16, %%"REG_S" \n\t" | |
4233 | 1498 " jnz 1b \n\t" |
1499 :: "r" (samples+256), "m" (bias) | |
16173 | 1500 : "%"REG_S |
4233 | 1501 ); |
1502 } | |
1503 | |
1504 static void mix31toS_3dnow (sample_t * samples, sample_t bias) | |
1505 { | |
1506 asm volatile( | |
1507 "movd %1, %%mm7 \n\t" | |
1508 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1509 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1510 ASMALIGN16 |
4233 | 1511 "1: \n\t" |
16173 | 1512 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1513 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1514 "pfadd %%mm7, %%mm0 \n\t" // common |
1515 "pfadd %%mm7, %%mm1 \n\t" // common | |
16173 | 1516 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1517 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1518 "movq 2048(%0, %%"REG_S"), %%mm4\n\t" | |
1519 "movq 2056(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1520 "pfadd %%mm0, %%mm2 \n\t" |
1521 "pfadd %%mm1, %%mm3 \n\t" | |
1522 "pfadd %%mm0, %%mm4 \n\t" | |
1523 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1524 "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround |
1525 "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround | |
4233 | 1526 "pfsub %%mm0, %%mm2 \n\t" |
1527 "pfsub %%mm1, %%mm3 \n\t" | |
1528 "pfadd %%mm0, %%mm4 \n\t" | |
1529 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1530 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1531 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1532 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1533 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1534 "add $16, %%"REG_S" \n\t" | |
4233 | 1535 " jnz 1b \n\t" |
1536 :: "r" (samples+256), "m" (bias) | |
16173 | 1537 : "%"REG_S |
4233 | 1538 ); |
1539 } | |
1540 | |
1541 static void mix22toS_3dnow (sample_t * samples, sample_t bias) | |
1542 { | |
1543 asm volatile( | |
1544 "movd %1, %%mm7 \n\t" | |
1545 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1546 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1547 ASMALIGN16 |
4233 | 1548 "1: \n\t" |
16173 | 1549 "movq 2048(%0, %%"REG_S"), %%mm0\n\t" |
1550 "movq 2056(%0, %%"REG_S"), %%mm1\n\t" | |
1551 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround | |
1552 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround | |
1553 "movq (%0, %%"REG_S"), %%mm2 \n\t" | |
1554 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1555 "movq 1024(%0, %%"REG_S"), %%mm4\n\t" | |
1556 "movq 1032(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1557 "pfadd %%mm7, %%mm2 \n\t" |
1558 "pfadd %%mm7, %%mm3 \n\t" | |
1559 "pfadd %%mm7, %%mm4 \n\t" | |
1560 "pfadd %%mm7, %%mm5 \n\t" | |
1561 "pfsub %%mm0, %%mm2 \n\t" | |
1562 "pfsub %%mm1, %%mm3 \n\t" | |
1563 "pfadd %%mm0, %%mm4 \n\t" | |
1564 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1565 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1566 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1567 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1568 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1569 "add $16, %%"REG_S" \n\t" | |
4233 | 1570 " jnz 1b \n\t" |
1571 :: "r" (samples+256), "m" (bias) | |
16173 | 1572 : "%"REG_S |
4233 | 1573 ); |
1574 } | |
1575 | |
1576 static void mix32to2_3dnow (sample_t * samples, sample_t bias) | |
1577 { | |
1578 asm volatile( | |
1579 "movd %1, %%mm7 \n\t" | |
1580 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1581 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1582 ASMALIGN16 |
4233 | 1583 "1: \n\t" |
16173 | 1584 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1585 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1586 "pfadd %%mm7, %%mm0 \n\t" // common |
1587 "pfadd %%mm7, %%mm1 \n\t" // common | |
1588 "movq %%mm0, %%mm2 \n\t" // common | |
1589 "movq %%mm1, %%mm3 \n\t" // common | |
16173 | 1590 "pfadd (%0, %%"REG_S"), %%mm0 \n\t" |
1591 "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1592 "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" | |
1593 "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" | |
1594 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" | |
1595 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" | |
1596 "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" | |
1597 "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" | |
1598 "movq %%mm0, (%0, %%"REG_S") \n\t" | |
1599 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1600 "movq %%mm2, 1024(%0, %%"REG_S")\n\t" | |
1601 "movq %%mm3, 1032(%0, %%"REG_S")\n\t" | |
1602 "add $16, %%"REG_S" \n\t" | |
4233 | 1603 " jnz 1b \n\t" |
1604 :: "r" (samples+256), "m" (bias) | |
16173 | 1605 : "%"REG_S |
4233 | 1606 ); |
1607 } | |
1608 | |
1609 /* todo: should be optimized better */ | |
1610 static void mix32toS_3dnow (sample_t * samples, sample_t bias) | |
1611 { | |
1612 asm volatile( | |
16173 | 1613 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1614 ASMALIGN16 |
4233 | 1615 "1: \n\t" |
1616 "movd %1, %%mm7 \n\t" | |
1617 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1618 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1619 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
1620 "movq 3072(%0, %%"REG_S"), %%mm4\n\t" | |
1621 "movq 3080(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1622 "pfadd %%mm7, %%mm0 \n\t" // common |
1623 "pfadd %%mm7, %%mm1 \n\t" // common | |
16173 | 1624 "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround |
1625 "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround | |
1626 "movq (%0, %%"REG_S"), %%mm2 \n\t" | |
1627 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1628 "movq 2048(%0, %%"REG_S"), %%mm6\n\t" | |
1629 "movq 2056(%0, %%"REG_S"), %%mm7\n\t" | |
4233 | 1630 "pfsub %%mm4, %%mm2 \n\t" |
1631 "pfsub %%mm5, %%mm3 \n\t" | |
1632 "pfadd %%mm4, %%mm6 \n\t" | |
1633 "pfadd %%mm5, %%mm7 \n\t" | |
1634 "pfadd %%mm0, %%mm2 \n\t" | |
1635 "pfadd %%mm1, %%mm3 \n\t" | |
1636 "pfadd %%mm0, %%mm6 \n\t" | |
1637 "pfadd %%mm1, %%mm7 \n\t" | |
16173 | 1638 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1639 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1640 "movq %%mm6, 1024(%0, %%"REG_S")\n\t" | |
1641 "movq %%mm7, 1032(%0, %%"REG_S")\n\t" | |
1642 "add $16, %%"REG_S" \n\t" | |
4233 | 1643 " jnz 1b \n\t" |
1644 :: "r" (samples+256), "m" (bias) | |
16173 | 1645 : "%"REG_S |
4233 | 1646 ); |
1647 } | |
1648 | |
1649 static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) | |
1650 { | |
1651 asm volatile( | |
1652 "movd %2, %%mm7 \n\t" | |
1653 "punpckldq %2, %%mm7 \n\t" | |
16173 | 1654 "mov $-1024, %%"REG_S" \n\t" |
18104
7b408d60de9e
add support for intel mac. mp3lib is not fixed yet.
nplourde
parents:
16173
diff
changeset
|
1655 ASMALIGN16 |
4233 | 1656 "1: \n\t" |
16173 | 1657 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1658 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1659 "movq 16(%0, %%"REG_S"), %%mm2 \n\t" | |
1660 "movq 24(%0, %%"REG_S"), %%mm3 \n\t" | |
1661 "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" | |
1662 "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" | |
1663 "pfadd 1040(%0, %%"REG_S"), %%mm2\n\t" | |
1664 "pfadd 1048(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1665 "pfadd %%mm7, %%mm0 \n\t" |
1666 "pfadd %%mm7, %%mm1 \n\t" | |
1667 "pfadd %%mm7, %%mm2 \n\t" | |
1668 "pfadd %%mm7, %%mm3 \n\t" | |
16173 | 1669 "movq %%mm0, (%1, %%"REG_S") \n\t" |
1670 "movq %%mm1, 8(%1, %%"REG_S") \n\t" | |
1671 "movq %%mm2, 16(%1, %%"REG_S") \n\t" | |
1672 "movq %%mm3, 24(%1, %%"REG_S") \n\t" | |
1673 "add $32, %%"REG_S" \n\t" | |
4233 | 1674 " jnz 1b \n\t" |
1675 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 1676 : "%"REG_S |
4233 | 1677 ); |
1678 } | |
1679 | |
1680 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, | |
1681 sample_t clev, sample_t slev) | |
1682 { | |
1683 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1684 | |
1685 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1686 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1687 break; | |
1688 | |
1689 case CONVERT (A52_CHANNEL, A52_MONO): | |
1690 case CONVERT (A52_STEREO, A52_MONO): | |
1691 mix_2to1_3dnow: | |
1692 mix2to1_3dnow (samples, samples + 256, bias); | |
1693 break; | |
1694 | |
1695 case CONVERT (A52_2F1R, A52_MONO): | |
1696 if (slev == 0) | |
1697 goto mix_2to1_3dnow; | |
1698 case CONVERT (A52_3F, A52_MONO): | |
1699 mix_3to1_3dnow: | |
1700 mix3to1_3dnow (samples, bias); | |
1701 break; | |
1702 | |
1703 case CONVERT (A52_3F1R, A52_MONO): | |
1704 if (slev == 0) | |
1705 goto mix_3to1_3dnow; | |
1706 case CONVERT (A52_2F2R, A52_MONO): | |
1707 if (slev == 0) | |
1708 goto mix_2to1_3dnow; | |
1709 mix4to1_3dnow (samples, bias); | |
1710 break; | |
1711 | |
1712 case CONVERT (A52_3F2R, A52_MONO): | |
1713 if (slev == 0) | |
1714 goto mix_3to1_3dnow; | |
1715 mix5to1_3dnow (samples, bias); | |
1716 break; | |
1717 | |
1718 case CONVERT (A52_MONO, A52_DOLBY): | |
1719 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1720 break; | |
1721 | |
1722 case CONVERT (A52_3F, A52_STEREO): | |
1723 case CONVERT (A52_3F, A52_DOLBY): | |
1724 mix_3to2_3dnow: | |
1725 mix3to2_3dnow (samples, bias); | |
1726 break; | |
1727 | |
1728 case CONVERT (A52_2F1R, A52_STEREO): | |
1729 if (slev == 0) | |
1730 break; | |
1731 mix21to2_3dnow (samples, samples + 256, bias); | |
1732 break; | |
1733 | |
1734 case CONVERT (A52_2F1R, A52_DOLBY): | |
1735 mix21toS_3dnow (samples, bias); | |
1736 break; | |
1737 | |
1738 case CONVERT (A52_3F1R, A52_STEREO): | |
1739 if (slev == 0) | |
1740 goto mix_3to2_3dnow; | |
1741 mix31to2_3dnow (samples, bias); | |
1742 break; | |
1743 | |
1744 case CONVERT (A52_3F1R, A52_DOLBY): | |
1745 mix31toS_3dnow (samples, bias); | |
1746 break; | |
1747 | |
1748 case CONVERT (A52_2F2R, A52_STEREO): | |
1749 if (slev == 0) | |
1750 break; | |
1751 mix2to1_3dnow (samples, samples + 512, bias); | |
1752 mix2to1_3dnow (samples + 256, samples + 768, bias); | |
1753 break; | |
1754 | |
1755 case CONVERT (A52_2F2R, A52_DOLBY): | |
1756 mix22toS_3dnow (samples, bias); | |
1757 break; | |
1758 | |
1759 case CONVERT (A52_3F2R, A52_STEREO): | |
1760 if (slev == 0) | |
1761 goto mix_3to2_3dnow; | |
1762 mix32to2_3dnow (samples, bias); | |
1763 break; | |
1764 | |
1765 case CONVERT (A52_3F2R, A52_DOLBY): | |
1766 mix32toS_3dnow (samples, bias); | |
1767 break; | |
1768 | |
1769 case CONVERT (A52_3F1R, A52_3F): | |
1770 if (slev == 0) | |
1771 break; | |
1772 mix21to2_3dnow (samples, samples + 512, bias); | |
1773 break; | |
1774 | |
1775 case CONVERT (A52_3F2R, A52_3F): | |
1776 if (slev == 0) | |
1777 break; | |
1778 mix2to1_3dnow (samples, samples + 768, bias); | |
1779 mix2to1_3dnow (samples + 512, samples + 1024, bias); | |
1780 break; | |
1781 | |
1782 case CONVERT (A52_3F1R, A52_2F1R): | |
1783 mix3to2_3dnow (samples, bias); | |
1784 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1785 break; | |
1786 | |
1787 case CONVERT (A52_2F2R, A52_2F1R): | |
1788 mix2to1_3dnow (samples + 512, samples + 768, bias); | |
1789 break; | |
1790 | |
1791 case CONVERT (A52_3F2R, A52_2F1R): | |
1792 mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1793 move2to1_3dnow (samples + 768, samples + 512, bias); | |
1794 break; | |
1795 | |
1796 case CONVERT (A52_3F2R, A52_3F1R): | |
1797 mix2to1_3dnow (samples + 768, samples + 1024, bias); | |
1798 break; | |
1799 | |
1800 case CONVERT (A52_2F1R, A52_2F2R): | |
1801 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1802 break; | |
1803 | |
1804 case CONVERT (A52_3F1R, A52_2F2R): | |
1805 mix3to2_3dnow (samples, bias); | |
1806 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1807 break; | |
1808 | |
1809 case CONVERT (A52_3F2R, A52_2F2R): | |
1810 mix3to2_3dnow (samples, bias); | |
1811 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1812 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1813 break; | |
1814 | |
1815 case CONVERT (A52_3F1R, A52_3F2R): | |
12137 | 1816 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); |
4233 | 1817 break; |
1818 } | |
1819 __asm __volatile("femms":::"memory"); | |
1820 } | |
1821 | |
16173 | 1822 #endif // ARCH_X86 || ARCH_X86_64 |