Mercurial > mplayer.hg
annotate liba52/downmix.c @ 28963:3965344f4a70
synced with r28991
author | Gabrov |
---|---|
date | Thu, 19 Mar 2009 11:02:16 +0000 |
parents | 25337a2147e7 |
children | e83eef58b30a |
rev | line source |
---|---|
3394 | 1 /* |
2 * downmix.c | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
3 * Copyright (C) 2000-2002 Michel Lespinasse <walken@zoy.org> |
3394 | 4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
5 * | |
6 * This file is part of a52dec, a free ATSC A-52 stream decoder. | |
7 * See http://liba52.sourceforge.net/ for updates. | |
8 * | |
14991
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12137
diff
changeset
|
9 * Modified for use with MPlayer, changes contained in liba52_changes.diff. |
18783 | 10 * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/ |
14991
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12137
diff
changeset
|
11 * $Id$ |
07f1e7669772
Mark modified files as such to comply more closely with GPL ¡ø2a.
diego
parents:
12137
diff
changeset
|
12 * |
3394 | 13 * a52dec is free software; you can redistribute it and/or modify |
14 * it under the terms of the GNU General Public License as published by | |
15 * the Free Software Foundation; either version 2 of the License, or | |
16 * (at your option) any later version. | |
17 * | |
18 * a52dec is distributed in the hope that it will be useful, | |
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
21 * GNU General Public License for more details. | |
22 * | |
23 * You should have received a copy of the GNU General Public License | |
24 * along with this program; if not, write to the Free Software | |
25 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
3625 | 26 * |
27 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at) | |
3394 | 28 */ |
29 | |
30 #include "config.h" | |
31 | |
32 #include <string.h> | |
33 #include <inttypes.h> | |
34 | |
35 #include "a52.h" | |
36 #include "a52_internal.h" | |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
37 #include "mm_accel.h" |
3394 | 38 |
39 #define CONVERT(acmod,output) (((output) << 3) + (acmod)) | |
40 | |
3904 | 41 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
42 void (*a52_downmix)(sample_t * samples, int acmod, int output, sample_t bias, |
3904 | 43 sample_t clev, sample_t slev)= NULL; |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
44 void (*a52_upmix)(sample_t * samples, int acmod, int output)= NULL; |
3904 | 45 |
46 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
47 sample_t clev, sample_t slev); | |
4233 | 48 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, |
49 sample_t clev, sample_t slev); | |
3904 | 50 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
51 sample_t clev, sample_t slev); | |
52 static void upmix_MMX (sample_t * samples, int acmod, int output); | |
53 static void upmix_C (sample_t * samples, int acmod, int output); | |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
54 |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
55 void downmix_accel_init(uint32_t mm_accel) |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
56 { |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
57 a52_upmix= upmix_C; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
58 a52_downmix= downmix_C; |
28290 | 59 #if ARCH_X86 || ARCH_X86_64 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
60 if(mm_accel & MM_ACCEL_X86_MMX) a52_upmix= upmix_MMX; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
61 if(mm_accel & MM_ACCEL_X86_SSE) a52_downmix= downmix_SSE; |
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
62 if(mm_accel & MM_ACCEL_X86_3DNOW) a52_downmix= downmix_3dnow; |
3910
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
63 #endif |
db1d556fcf58
runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents:
3904
diff
changeset
|
64 } |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
65 |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
66 int a52_downmix_init (int input, int flags, sample_t * level, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
67 sample_t clev, sample_t slev) |
3394 | 68 { |
69 static uint8_t table[11][8] = { | |
70 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
71 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, | |
72 {A52_MONO, A52_MONO, A52_MONO, A52_MONO, | |
73 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
74 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
75 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO}, | |
76 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
77 A52_STEREO, A52_3F, A52_STEREO, A52_3F}, | |
78 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
79 A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R}, | |
80 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO, | |
81 A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R}, | |
82 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
83 A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R}, | |
84 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F, | |
85 A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R}, | |
86 {A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO, | |
87 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
88 {A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO, | |
89 A52_MONO, A52_MONO, A52_MONO, A52_MONO}, | |
90 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY, | |
91 A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY} | |
92 }; | |
93 int output; | |
94 | |
95 output = flags & A52_CHANNEL_MASK; | |
96 if (output > A52_DOLBY) | |
97 return -1; | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
98 |
3394 | 99 output = table[output][input & 7]; |
100 | |
101 if ((output == A52_STEREO) && | |
102 ((input == A52_DOLBY) || ((input == A52_3F) && (clev == LEVEL_3DB)))) | |
103 output = A52_DOLBY; | |
104 | |
105 if (flags & A52_ADJUST_LEVEL) | |
106 switch (CONVERT (input & 7, output)) { | |
107 | |
108 case CONVERT (A52_3F, A52_MONO): | |
109 *level *= LEVEL_3DB / (1 + clev); | |
110 break; | |
111 | |
112 case CONVERT (A52_STEREO, A52_MONO): | |
113 case CONVERT (A52_2F2R, A52_2F1R): | |
114 case CONVERT (A52_3F2R, A52_3F1R): | |
115 level_3db: | |
116 *level *= LEVEL_3DB; | |
117 break; | |
118 | |
119 case CONVERT (A52_3F2R, A52_2F1R): | |
120 if (clev < LEVEL_PLUS3DB - 1) | |
121 goto level_3db; | |
122 /* break thru */ | |
123 case CONVERT (A52_3F, A52_STEREO): | |
124 case CONVERT (A52_3F1R, A52_2F1R): | |
125 case CONVERT (A52_3F1R, A52_2F2R): | |
126 case CONVERT (A52_3F2R, A52_2F2R): | |
127 *level /= 1 + clev; | |
128 break; | |
129 | |
130 case CONVERT (A52_2F1R, A52_MONO): | |
131 *level *= LEVEL_PLUS3DB / (2 + slev); | |
132 break; | |
133 | |
134 case CONVERT (A52_2F1R, A52_STEREO): | |
135 case CONVERT (A52_3F1R, A52_3F): | |
136 *level /= 1 + slev * LEVEL_3DB; | |
137 break; | |
138 | |
139 case CONVERT (A52_3F1R, A52_MONO): | |
140 *level *= LEVEL_3DB / (1 + clev + 0.5 * slev); | |
141 break; | |
142 | |
143 case CONVERT (A52_3F1R, A52_STEREO): | |
144 *level /= 1 + clev + slev * LEVEL_3DB; | |
145 break; | |
146 | |
147 case CONVERT (A52_2F2R, A52_MONO): | |
148 *level *= LEVEL_3DB / (1 + slev); | |
149 break; | |
150 | |
151 case CONVERT (A52_2F2R, A52_STEREO): | |
152 case CONVERT (A52_3F2R, A52_3F): | |
153 *level /= 1 + slev; | |
154 break; | |
155 | |
156 case CONVERT (A52_3F2R, A52_MONO): | |
157 *level *= LEVEL_3DB / (1 + clev + slev); | |
158 break; | |
159 | |
160 case CONVERT (A52_3F2R, A52_STEREO): | |
161 *level /= 1 + clev + slev; | |
162 break; | |
163 | |
164 case CONVERT (A52_MONO, A52_DOLBY): | |
165 *level *= LEVEL_PLUS3DB; | |
166 break; | |
167 | |
168 case CONVERT (A52_3F, A52_DOLBY): | |
169 case CONVERT (A52_2F1R, A52_DOLBY): | |
170 *level *= 1 / (1 + LEVEL_3DB); | |
171 break; | |
172 | |
173 case CONVERT (A52_3F1R, A52_DOLBY): | |
174 case CONVERT (A52_2F2R, A52_DOLBY): | |
175 *level *= 1 / (1 + 2 * LEVEL_3DB); | |
176 break; | |
177 | |
178 case CONVERT (A52_3F2R, A52_DOLBY): | |
179 *level *= 1 / (1 + 3 * LEVEL_3DB); | |
180 break; | |
181 } | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
182 |
3394 | 183 return output; |
184 } | |
185 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
186 int a52_downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
187 sample_t clev, sample_t slev) |
3394 | 188 { |
189 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
190 | |
191 case CONVERT (A52_CHANNEL, A52_CHANNEL): | |
192 case CONVERT (A52_MONO, A52_MONO): | |
193 case CONVERT (A52_STEREO, A52_STEREO): | |
194 case CONVERT (A52_3F, A52_3F): | |
195 case CONVERT (A52_2F1R, A52_2F1R): | |
196 case CONVERT (A52_3F1R, A52_3F1R): | |
197 case CONVERT (A52_2F2R, A52_2F2R): | |
198 case CONVERT (A52_3F2R, A52_3F2R): | |
199 case CONVERT (A52_STEREO, A52_DOLBY): | |
200 coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level; | |
201 return 0; | |
202 | |
203 case CONVERT (A52_CHANNEL, A52_MONO): | |
204 coeff[0] = coeff[1] = level * LEVEL_6DB; | |
205 return 3; | |
206 | |
207 case CONVERT (A52_STEREO, A52_MONO): | |
208 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
209 return 3; | |
210 | |
211 case CONVERT (A52_3F, A52_MONO): | |
212 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
213 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
214 return 7; | |
215 | |
216 case CONVERT (A52_2F1R, A52_MONO): | |
217 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
218 coeff[2] = level * slev * LEVEL_3DB; | |
219 return 7; | |
220 | |
221 case CONVERT (A52_2F2R, A52_MONO): | |
222 coeff[0] = coeff[1] = level * LEVEL_3DB; | |
223 coeff[2] = coeff[3] = level * slev * LEVEL_3DB; | |
224 return 15; | |
225 | |
226 case CONVERT (A52_3F1R, A52_MONO): | |
227 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
228 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
229 coeff[3] = level * slev * LEVEL_3DB; | |
230 return 15; | |
231 | |
232 case CONVERT (A52_3F2R, A52_MONO): | |
233 coeff[0] = coeff[2] = level * LEVEL_3DB; | |
234 coeff[1] = level * clev * LEVEL_PLUS3DB; | |
235 coeff[3] = coeff[4] = level * slev * LEVEL_3DB; | |
236 return 31; | |
237 | |
238 case CONVERT (A52_MONO, A52_DOLBY): | |
239 coeff[0] = level * LEVEL_3DB; | |
240 return 0; | |
241 | |
242 case CONVERT (A52_3F, A52_DOLBY): | |
243 clev = LEVEL_3DB; | |
244 case CONVERT (A52_3F, A52_STEREO): | |
245 case CONVERT (A52_3F1R, A52_2F1R): | |
246 case CONVERT (A52_3F2R, A52_2F2R): | |
247 coeff[0] = coeff[2] = coeff[3] = coeff[4] = level; | |
248 coeff[1] = level * clev; | |
249 return 7; | |
250 | |
251 case CONVERT (A52_2F1R, A52_DOLBY): | |
252 slev = 1; | |
253 case CONVERT (A52_2F1R, A52_STEREO): | |
254 coeff[0] = coeff[1] = level; | |
255 coeff[2] = level * slev * LEVEL_3DB; | |
256 return 7; | |
257 | |
258 case CONVERT (A52_3F1R, A52_DOLBY): | |
259 clev = LEVEL_3DB; | |
260 slev = 1; | |
261 case CONVERT (A52_3F1R, A52_STEREO): | |
262 coeff[0] = coeff[2] = level; | |
263 coeff[1] = level * clev; | |
264 coeff[3] = level * slev * LEVEL_3DB; | |
265 return 15; | |
266 | |
267 case CONVERT (A52_2F2R, A52_DOLBY): | |
268 slev = LEVEL_3DB; | |
269 case CONVERT (A52_2F2R, A52_STEREO): | |
270 coeff[0] = coeff[1] = level; | |
271 coeff[2] = coeff[3] = level * slev; | |
272 return 15; | |
273 | |
274 case CONVERT (A52_3F2R, A52_DOLBY): | |
275 clev = LEVEL_3DB; | |
276 case CONVERT (A52_3F2R, A52_2F1R): | |
277 slev = LEVEL_3DB; | |
278 case CONVERT (A52_3F2R, A52_STEREO): | |
279 coeff[0] = coeff[2] = level; | |
280 coeff[1] = level * clev; | |
281 coeff[3] = coeff[4] = level * slev; | |
282 return 31; | |
283 | |
284 case CONVERT (A52_3F1R, A52_3F): | |
285 coeff[0] = coeff[1] = coeff[2] = level; | |
286 coeff[3] = level * slev * LEVEL_3DB; | |
287 return 13; | |
288 | |
289 case CONVERT (A52_3F2R, A52_3F): | |
290 coeff[0] = coeff[1] = coeff[2] = level; | |
291 coeff[3] = coeff[4] = level * slev; | |
292 return 29; | |
293 | |
294 case CONVERT (A52_2F2R, A52_2F1R): | |
295 coeff[0] = coeff[1] = level; | |
296 coeff[2] = coeff[3] = level * LEVEL_3DB; | |
297 return 12; | |
298 | |
299 case CONVERT (A52_3F2R, A52_3F1R): | |
300 coeff[0] = coeff[1] = coeff[2] = level; | |
301 coeff[3] = coeff[4] = level * LEVEL_3DB; | |
302 return 24; | |
303 | |
304 case CONVERT (A52_2F1R, A52_2F2R): | |
305 coeff[0] = coeff[1] = level; | |
306 coeff[2] = level * LEVEL_3DB; | |
307 return 0; | |
308 | |
309 case CONVERT (A52_3F1R, A52_2F2R): | |
310 coeff[0] = coeff[2] = level; | |
311 coeff[1] = level * clev; | |
312 coeff[3] = level * LEVEL_3DB; | |
313 return 7; | |
314 | |
315 case CONVERT (A52_3F1R, A52_3F2R): | |
316 coeff[0] = coeff[1] = coeff[2] = level; | |
317 coeff[3] = level * LEVEL_3DB; | |
318 return 0; | |
319 | |
320 case CONVERT (A52_CHANNEL, A52_CHANNEL1): | |
321 coeff[0] = level; | |
322 coeff[1] = 0; | |
323 return 0; | |
324 | |
325 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
326 coeff[0] = 0; | |
327 coeff[1] = level; | |
328 return 0; | |
329 } | |
330 | |
331 return -1; /* NOTREACHED */ | |
332 } | |
333 | |
334 static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias) | |
335 { | |
336 int i; | |
337 | |
338 for (i = 0; i < 256; i++) | |
339 dest[i] += src[i] + bias; | |
340 } | |
341 | |
342 static void mix3to1 (sample_t * samples, sample_t bias) | |
343 { | |
344 int i; | |
345 | |
346 for (i = 0; i < 256; i++) | |
347 samples[i] += samples[i + 256] + samples[i + 512] + bias; | |
348 } | |
349 | |
350 static void mix4to1 (sample_t * samples, sample_t bias) | |
351 { | |
352 int i; | |
353 | |
354 for (i = 0; i < 256; i++) | |
355 samples[i] += (samples[i + 256] + samples[i + 512] + | |
356 samples[i + 768] + bias); | |
357 } | |
358 | |
359 static void mix5to1 (sample_t * samples, sample_t bias) | |
360 { | |
361 int i; | |
362 | |
363 for (i = 0; i < 256; i++) | |
364 samples[i] += (samples[i + 256] + samples[i + 512] + | |
365 samples[i + 768] + samples[i + 1024] + bias); | |
366 } | |
367 | |
368 static void mix3to2 (sample_t * samples, sample_t bias) | |
369 { | |
370 int i; | |
371 sample_t common; | |
372 | |
373 for (i = 0; i < 256; i++) { | |
374 common = samples[i + 256] + bias; | |
375 samples[i] += common; | |
376 samples[i + 256] = samples[i + 512] + common; | |
377 } | |
378 } | |
379 | |
380 static void mix21to2 (sample_t * left, sample_t * right, sample_t bias) | |
381 { | |
382 int i; | |
383 sample_t common; | |
384 | |
385 for (i = 0; i < 256; i++) { | |
386 common = right[i + 256] + bias; | |
387 left[i] += common; | |
388 right[i] += common; | |
389 } | |
390 } | |
391 | |
392 static void mix21toS (sample_t * samples, sample_t bias) | |
393 { | |
394 int i; | |
395 sample_t surround; | |
396 | |
397 for (i = 0; i < 256; i++) { | |
398 surround = samples[i + 512]; | |
399 samples[i] += bias - surround; | |
400 samples[i + 256] += bias + surround; | |
401 } | |
402 } | |
403 | |
404 static void mix31to2 (sample_t * samples, sample_t bias) | |
405 { | |
406 int i; | |
407 sample_t common; | |
408 | |
409 for (i = 0; i < 256; i++) { | |
410 common = samples[i + 256] + samples[i + 768] + bias; | |
411 samples[i] += common; | |
412 samples[i + 256] = samples[i + 512] + common; | |
413 } | |
414 } | |
415 | |
416 static void mix31toS (sample_t * samples, sample_t bias) | |
417 { | |
418 int i; | |
419 sample_t common, surround; | |
420 | |
421 for (i = 0; i < 256; i++) { | |
422 common = samples[i + 256] + bias; | |
423 surround = samples[i + 768]; | |
424 samples[i] += common - surround; | |
425 samples[i + 256] = samples[i + 512] + common + surround; | |
426 } | |
427 } | |
428 | |
429 static void mix22toS (sample_t * samples, sample_t bias) | |
430 { | |
431 int i; | |
432 sample_t surround; | |
433 | |
434 for (i = 0; i < 256; i++) { | |
435 surround = samples[i + 512] + samples[i + 768]; | |
436 samples[i] += bias - surround; | |
437 samples[i + 256] += bias + surround; | |
438 } | |
439 } | |
440 | |
441 static void mix32to2 (sample_t * samples, sample_t bias) | |
442 { | |
443 int i; | |
444 sample_t common; | |
445 | |
446 for (i = 0; i < 256; i++) { | |
447 common = samples[i + 256] + bias; | |
448 samples[i] += common + samples[i + 768]; | |
449 samples[i + 256] = common + samples[i + 512] + samples[i + 1024]; | |
450 } | |
451 } | |
452 | |
453 static void mix32toS (sample_t * samples, sample_t bias) | |
454 { | |
455 int i; | |
456 sample_t common, surround; | |
457 | |
458 for (i = 0; i < 256; i++) { | |
459 common = samples[i + 256] + bias; | |
460 surround = samples[i + 768] + samples[i + 1024]; | |
461 samples[i] += common - surround; | |
462 samples[i + 256] = samples[i + 512] + common + surround; | |
463 } | |
464 } | |
465 | |
466 static void move2to1 (sample_t * src, sample_t * dest, sample_t bias) | |
467 { | |
468 int i; | |
469 | |
470 for (i = 0; i < 256; i++) | |
471 dest[i] = src[i] + src[i + 256] + bias; | |
472 } | |
473 | |
474 static void zero (sample_t * samples) | |
475 { | |
476 int i; | |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
477 |
3394 | 478 for (i = 0; i < 256; i++) |
479 samples[i] = 0; | |
480 } | |
481 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
482 void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, |
18721
722ac20fac5f
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18720
diff
changeset
|
483 sample_t clev, sample_t slev) |
3394 | 484 { |
485 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
486 | |
487 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
488 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
489 break; | |
490 | |
491 case CONVERT (A52_CHANNEL, A52_MONO): | |
492 case CONVERT (A52_STEREO, A52_MONO): | |
493 mix_2to1: | |
494 mix2to1 (samples, samples + 256, bias); | |
495 break; | |
496 | |
497 case CONVERT (A52_2F1R, A52_MONO): | |
498 if (slev == 0) | |
499 goto mix_2to1; | |
500 case CONVERT (A52_3F, A52_MONO): | |
501 mix_3to1: | |
502 mix3to1 (samples, bias); | |
503 break; | |
504 | |
505 case CONVERT (A52_3F1R, A52_MONO): | |
506 if (slev == 0) | |
507 goto mix_3to1; | |
508 case CONVERT (A52_2F2R, A52_MONO): | |
509 if (slev == 0) | |
510 goto mix_2to1; | |
511 mix4to1 (samples, bias); | |
512 break; | |
513 | |
514 case CONVERT (A52_3F2R, A52_MONO): | |
515 if (slev == 0) | |
516 goto mix_3to1; | |
517 mix5to1 (samples, bias); | |
518 break; | |
519 | |
520 case CONVERT (A52_MONO, A52_DOLBY): | |
521 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
522 break; | |
523 | |
524 case CONVERT (A52_3F, A52_STEREO): | |
525 case CONVERT (A52_3F, A52_DOLBY): | |
526 mix_3to2: | |
527 mix3to2 (samples, bias); | |
528 break; | |
529 | |
530 case CONVERT (A52_2F1R, A52_STEREO): | |
531 if (slev == 0) | |
532 break; | |
533 mix21to2 (samples, samples + 256, bias); | |
534 break; | |
535 | |
536 case CONVERT (A52_2F1R, A52_DOLBY): | |
537 mix21toS (samples, bias); | |
538 break; | |
539 | |
540 case CONVERT (A52_3F1R, A52_STEREO): | |
541 if (slev == 0) | |
542 goto mix_3to2; | |
543 mix31to2 (samples, bias); | |
544 break; | |
545 | |
546 case CONVERT (A52_3F1R, A52_DOLBY): | |
547 mix31toS (samples, bias); | |
548 break; | |
549 | |
550 case CONVERT (A52_2F2R, A52_STEREO): | |
551 if (slev == 0) | |
552 break; | |
553 mix2to1 (samples, samples + 512, bias); | |
554 mix2to1 (samples + 256, samples + 768, bias); | |
555 break; | |
556 | |
557 case CONVERT (A52_2F2R, A52_DOLBY): | |
558 mix22toS (samples, bias); | |
559 break; | |
560 | |
561 case CONVERT (A52_3F2R, A52_STEREO): | |
562 if (slev == 0) | |
563 goto mix_3to2; | |
564 mix32to2 (samples, bias); | |
565 break; | |
566 | |
567 case CONVERT (A52_3F2R, A52_DOLBY): | |
568 mix32toS (samples, bias); | |
569 break; | |
570 | |
571 case CONVERT (A52_3F1R, A52_3F): | |
572 if (slev == 0) | |
573 break; | |
574 mix21to2 (samples, samples + 512, bias); | |
575 break; | |
576 | |
577 case CONVERT (A52_3F2R, A52_3F): | |
578 if (slev == 0) | |
579 break; | |
580 mix2to1 (samples, samples + 768, bias); | |
581 mix2to1 (samples + 512, samples + 1024, bias); | |
582 break; | |
583 | |
584 case CONVERT (A52_3F1R, A52_2F1R): | |
585 mix3to2 (samples, bias); | |
586 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
587 break; | |
588 | |
589 case CONVERT (A52_2F2R, A52_2F1R): | |
590 mix2to1 (samples + 512, samples + 768, bias); | |
591 break; | |
592 | |
593 case CONVERT (A52_3F2R, A52_2F1R): | |
3678 | 594 mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used) |
3394 | 595 move2to1 (samples + 768, samples + 512, bias); |
596 break; | |
597 | |
598 case CONVERT (A52_3F2R, A52_3F1R): | |
599 mix2to1 (samples + 768, samples + 1024, bias); | |
600 break; | |
601 | |
602 case CONVERT (A52_2F1R, A52_2F2R): | |
603 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
604 break; | |
605 | |
606 case CONVERT (A52_3F1R, A52_2F2R): | |
607 mix3to2 (samples, bias); | |
608 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
609 break; | |
610 | |
611 case CONVERT (A52_3F2R, A52_2F2R): | |
612 mix3to2 (samples, bias); | |
613 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
614 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
615 break; | |
616 | |
617 case CONVERT (A52_3F1R, A52_3F2R): | |
12137 | 618 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); |
3394 | 619 break; |
620 } | |
621 } | |
622 | |
18720
4bad7f00556e
sync with liba52 0.7.4, patch by Emanuele Giaquinta >emanuele.giaquinta ! gmail * com<
rathann
parents:
18104
diff
changeset
|
623 void upmix_C (sample_t * samples, int acmod, int output) |
3394 | 624 { |
625 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
626 | |
627 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
628 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
629 break; | |
630 | |
631 case CONVERT (A52_3F2R, A52_MONO): | |
632 zero (samples + 1024); | |
633 case CONVERT (A52_3F1R, A52_MONO): | |
634 case CONVERT (A52_2F2R, A52_MONO): | |
635 zero (samples + 768); | |
636 case CONVERT (A52_3F, A52_MONO): | |
637 case CONVERT (A52_2F1R, A52_MONO): | |
638 zero (samples + 512); | |
639 case CONVERT (A52_CHANNEL, A52_MONO): | |
640 case CONVERT (A52_STEREO, A52_MONO): | |
641 zero (samples + 256); | |
642 break; | |
643 | |
644 case CONVERT (A52_3F2R, A52_STEREO): | |
645 case CONVERT (A52_3F2R, A52_DOLBY): | |
646 zero (samples + 1024); | |
647 case CONVERT (A52_3F1R, A52_STEREO): | |
648 case CONVERT (A52_3F1R, A52_DOLBY): | |
649 zero (samples + 768); | |
650 case CONVERT (A52_3F, A52_STEREO): | |
651 case CONVERT (A52_3F, A52_DOLBY): | |
652 mix_3to2: | |
653 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
654 zero (samples + 256); | |
655 break; | |
656 | |
657 case CONVERT (A52_2F2R, A52_STEREO): | |
658 case CONVERT (A52_2F2R, A52_DOLBY): | |
659 zero (samples + 768); | |
660 case CONVERT (A52_2F1R, A52_STEREO): | |
661 case CONVERT (A52_2F1R, A52_DOLBY): | |
662 zero (samples + 512); | |
663 break; | |
664 | |
665 case CONVERT (A52_3F2R, A52_3F): | |
666 zero (samples + 1024); | |
667 case CONVERT (A52_3F1R, A52_3F): | |
668 case CONVERT (A52_2F2R, A52_2F1R): | |
669 zero (samples + 768); | |
670 break; | |
671 | |
672 case CONVERT (A52_3F2R, A52_3F1R): | |
673 zero (samples + 1024); | |
674 break; | |
675 | |
676 case CONVERT (A52_3F2R, A52_2F1R): | |
677 zero (samples + 1024); | |
678 case CONVERT (A52_3F1R, A52_2F1R): | |
679 mix_31to21: | |
680 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
681 goto mix_3to2; | |
682 | |
683 case CONVERT (A52_3F2R, A52_2F2R): | |
684 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
685 goto mix_31to21; | |
686 } | |
687 } | |
3904 | 688 |
28290 | 689 #if ARCH_X86 || ARCH_X86_64 |
3904 | 690 static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias) |
691 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
692 __asm__ volatile( |
3904 | 693 "movlps %2, %%xmm7 \n\t" |
694 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 695 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
696 ASMALIGN(4) |
3904 | 697 "1: \n\t" |
16173 | 698 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
699 "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" | |
700 "addps (%1, %%"REG_S"), %%xmm0 \n\t" | |
701 "addps 16(%1, %%"REG_S"), %%xmm1\n\t" | |
3904 | 702 "addps %%xmm7, %%xmm0 \n\t" |
703 "addps %%xmm7, %%xmm1 \n\t" | |
16173 | 704 "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
705 "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" | |
706 "add $32, %%"REG_S" \n\t" | |
3904 | 707 " jnz 1b \n\t" |
708 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 709 : "%"REG_S |
3904 | 710 ); |
711 } | |
712 | |
713 static void mix3to1_SSE (sample_t * samples, sample_t bias) | |
714 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
715 __asm__ volatile( |
3904 | 716 "movlps %1, %%xmm7 \n\t" |
717 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 718 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
719 ASMALIGN(4) |
3904 | 720 "1: \n\t" |
16173 | 721 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
722 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" | |
723 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" | |
3904 | 724 "addps %%xmm7, %%xmm1 \n\t" |
725 "addps %%xmm1, %%xmm0 \n\t" | |
16173 | 726 "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
727 "add $16, %%"REG_S" \n\t" | |
3904 | 728 " jnz 1b \n\t" |
729 :: "r" (samples+256), "m" (bias) | |
16173 | 730 : "%"REG_S |
3904 | 731 ); |
732 } | |
733 | |
734 static void mix4to1_SSE (sample_t * samples, sample_t bias) | |
735 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
736 __asm__ volatile( |
3904 | 737 "movlps %1, %%xmm7 \n\t" |
738 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 739 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
740 ASMALIGN(4) |
3904 | 741 "1: \n\t" |
16173 | 742 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
743 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" | |
744 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" | |
745 "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" | |
3904 | 746 "addps %%xmm7, %%xmm0 \n\t" |
747 "addps %%xmm1, %%xmm0 \n\t" | |
16173 | 748 "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
749 "add $16, %%"REG_S" \n\t" | |
3904 | 750 " jnz 1b \n\t" |
751 :: "r" (samples+256), "m" (bias) | |
16173 | 752 : "%"REG_S |
3904 | 753 ); |
754 } | |
755 | |
756 static void mix5to1_SSE (sample_t * samples, sample_t bias) | |
757 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
758 __asm__ volatile( |
3904 | 759 "movlps %1, %%xmm7 \n\t" |
760 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 761 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
762 ASMALIGN(4) |
3904 | 763 "1: \n\t" |
16173 | 764 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
765 "movaps 1024(%0, %%"REG_S"), %%xmm1\n\t" | |
766 "addps 2048(%0, %%"REG_S"), %%xmm0\n\t" | |
767 "addps 3072(%0, %%"REG_S"), %%xmm1\n\t" | |
3904 | 768 "addps %%xmm7, %%xmm0 \n\t" |
16173 | 769 "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" |
3904 | 770 "addps %%xmm1, %%xmm0 \n\t" |
16173 | 771 "movaps %%xmm0, (%0, %%"REG_S") \n\t" |
772 "add $16, %%"REG_S" \n\t" | |
3904 | 773 " jnz 1b \n\t" |
774 :: "r" (samples+256), "m" (bias) | |
16173 | 775 : "%"REG_S |
3904 | 776 ); |
777 } | |
778 | |
779 static void mix3to2_SSE (sample_t * samples, sample_t bias) | |
780 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
781 __asm__ volatile( |
3904 | 782 "movlps %1, %%xmm7 \n\t" |
783 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 784 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
785 ASMALIGN(4) |
3904 | 786 "1: \n\t" |
16173 | 787 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
3904 | 788 "addps %%xmm7, %%xmm0 \n\t" //common |
16173 | 789 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
790 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 791 "addps %%xmm0, %%xmm1 \n\t" |
792 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 793 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
794 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
795 "add $16, %%"REG_S" \n\t" | |
3904 | 796 " jnz 1b \n\t" |
797 :: "r" (samples+256), "m" (bias) | |
16173 | 798 : "%"REG_S |
3904 | 799 ); |
800 } | |
801 | |
802 static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias) | |
803 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
804 __asm__ volatile( |
3904 | 805 "movlps %2, %%xmm7 \n\t" |
806 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 807 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
808 ASMALIGN(4) |
3904 | 809 "1: \n\t" |
16173 | 810 "movaps 1024(%1, %%"REG_S"), %%xmm0\n\t" |
3904 | 811 "addps %%xmm7, %%xmm0 \n\t" //common |
16173 | 812 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
813 "movaps (%1, %%"REG_S"), %%xmm2 \n\t" | |
3904 | 814 "addps %%xmm0, %%xmm1 \n\t" |
815 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 816 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
817 "movaps %%xmm2, (%1, %%"REG_S") \n\t" | |
818 "add $16, %%"REG_S" \n\t" | |
3904 | 819 " jnz 1b \n\t" |
820 :: "r" (left+256), "r" (right+256), "m" (bias) | |
16173 | 821 : "%"REG_S |
3904 | 822 ); |
823 } | |
824 | |
825 static void mix21toS_SSE (sample_t * samples, sample_t bias) | |
826 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
827 __asm__ volatile( |
3904 | 828 "movlps %1, %%xmm7 \n\t" |
829 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 830 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
831 ASMALIGN(4) |
3904 | 832 "1: \n\t" |
16173 | 833 "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" // surround |
834 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" | |
835 "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 836 "addps %%xmm7, %%xmm1 \n\t" |
837 "addps %%xmm7, %%xmm2 \n\t" | |
838 "subps %%xmm0, %%xmm1 \n\t" | |
839 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 840 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
841 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
842 "add $16, %%"REG_S" \n\t" | |
3904 | 843 " jnz 1b \n\t" |
844 :: "r" (samples+256), "m" (bias) | |
16173 | 845 : "%"REG_S |
3904 | 846 ); |
847 } | |
848 | |
849 static void mix31to2_SSE (sample_t * samples, sample_t bias) | |
850 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
851 __asm__ volatile( |
3904 | 852 "movlps %1, %%xmm7 \n\t" |
853 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 854 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
855 ASMALIGN(4) |
3904 | 856 "1: \n\t" |
16173 | 857 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
858 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" | |
3904 | 859 "addps %%xmm7, %%xmm0 \n\t" // common |
16173 | 860 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
861 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 862 "addps %%xmm0, %%xmm1 \n\t" |
863 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 864 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
865 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
866 "add $16, %%"REG_S" \n\t" | |
3904 | 867 " jnz 1b \n\t" |
868 :: "r" (samples+256), "m" (bias) | |
16173 | 869 : "%"REG_S |
3904 | 870 ); |
871 } | |
872 | |
873 static void mix31toS_SSE (sample_t * samples, sample_t bias) | |
874 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
875 __asm__ volatile( |
3904 | 876 "movlps %1, %%xmm7 \n\t" |
877 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 878 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
879 ASMALIGN(4) |
3904 | 880 "1: \n\t" |
16173 | 881 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
882 "movaps 3072(%0, %%"REG_S"), %%xmm3\n\t" // surround | |
3904 | 883 "addps %%xmm7, %%xmm0 \n\t" // common |
16173 | 884 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" |
885 "movaps 2048(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 886 "addps %%xmm0, %%xmm1 \n\t" |
887 "addps %%xmm0, %%xmm2 \n\t" | |
888 "subps %%xmm3, %%xmm1 \n\t" | |
889 "addps %%xmm3, %%xmm2 \n\t" | |
16173 | 890 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
891 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
892 "add $16, %%"REG_S" \n\t" | |
3904 | 893 " jnz 1b \n\t" |
894 :: "r" (samples+256), "m" (bias) | |
16173 | 895 : "%"REG_S |
3904 | 896 ); |
897 } | |
898 | |
899 static void mix22toS_SSE (sample_t * samples, sample_t bias) | |
900 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
901 __asm__ volatile( |
3904 | 902 "movlps %1, %%xmm7 \n\t" |
903 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 904 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
905 ASMALIGN(4) |
3904 | 906 "1: \n\t" |
16173 | 907 "movaps 2048(%0, %%"REG_S"), %%xmm0\n\t" |
908 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" // surround | |
909 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" | |
910 "movaps 1024(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 911 "addps %%xmm7, %%xmm1 \n\t" |
912 "addps %%xmm7, %%xmm2 \n\t" | |
913 "subps %%xmm0, %%xmm1 \n\t" | |
914 "addps %%xmm0, %%xmm2 \n\t" | |
16173 | 915 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
916 "movaps %%xmm2, 1024(%0, %%"REG_S")\n\t" | |
917 "add $16, %%"REG_S" \n\t" | |
3904 | 918 " jnz 1b \n\t" |
919 :: "r" (samples+256), "m" (bias) | |
16173 | 920 : "%"REG_S |
3904 | 921 ); |
922 } | |
923 | |
924 static void mix32to2_SSE (sample_t * samples, sample_t bias) | |
925 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
926 __asm__ volatile( |
3904 | 927 "movlps %1, %%xmm7 \n\t" |
928 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 929 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
930 ASMALIGN(4) |
3904 | 931 "1: \n\t" |
16173 | 932 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
3904 | 933 "addps %%xmm7, %%xmm0 \n\t" // common |
934 "movaps %%xmm0, %%xmm1 \n\t" // common | |
16173 | 935 "addps (%0, %%"REG_S"), %%xmm0 \n\t" |
936 "addps 2048(%0, %%"REG_S"), %%xmm1\n\t" | |
937 "addps 3072(%0, %%"REG_S"), %%xmm0\n\t" | |
938 "addps 4096(%0, %%"REG_S"), %%xmm1\n\t" | |
939 "movaps %%xmm0, (%0, %%"REG_S") \n\t" | |
940 "movaps %%xmm1, 1024(%0, %%"REG_S")\n\t" | |
941 "add $16, %%"REG_S" \n\t" | |
3904 | 942 " jnz 1b \n\t" |
943 :: "r" (samples+256), "m" (bias) | |
16173 | 944 : "%"REG_S |
3904 | 945 ); |
946 } | |
947 | |
948 static void mix32toS_SSE (sample_t * samples, sample_t bias) | |
949 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
950 __asm__ volatile( |
3904 | 951 "movlps %1, %%xmm7 \n\t" |
952 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 953 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
954 ASMALIGN(4) |
3904 | 955 "1: \n\t" |
16173 | 956 "movaps 1024(%0, %%"REG_S"), %%xmm0\n\t" |
957 "movaps 3072(%0, %%"REG_S"), %%xmm2\n\t" | |
3904 | 958 "addps %%xmm7, %%xmm0 \n\t" // common |
16173 | 959 "addps 4096(%0, %%"REG_S"), %%xmm2\n\t" // surround |
960 "movaps (%0, %%"REG_S"), %%xmm1 \n\t" | |
961 "movaps 2048(%0, %%"REG_S"), %%xmm3\n\t" | |
3904 | 962 "subps %%xmm2, %%xmm1 \n\t" |
963 "addps %%xmm2, %%xmm3 \n\t" | |
964 "addps %%xmm0, %%xmm1 \n\t" | |
965 "addps %%xmm0, %%xmm3 \n\t" | |
16173 | 966 "movaps %%xmm1, (%0, %%"REG_S") \n\t" |
967 "movaps %%xmm3, 1024(%0, %%"REG_S")\n\t" | |
968 "add $16, %%"REG_S" \n\t" | |
3904 | 969 " jnz 1b \n\t" |
970 :: "r" (samples+256), "m" (bias) | |
16173 | 971 : "%"REG_S |
3904 | 972 ); |
973 } | |
974 | |
975 static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias) | |
976 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
977 __asm__ volatile( |
3904 | 978 "movlps %2, %%xmm7 \n\t" |
979 "shufps $0x00, %%xmm7, %%xmm7 \n\t" | |
16173 | 980 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
981 ASMALIGN(4) |
3904 | 982 "1: \n\t" |
16173 | 983 "movaps (%0, %%"REG_S"), %%xmm0 \n\t" |
984 "movaps 16(%0, %%"REG_S"), %%xmm1\n\t" | |
985 "addps 1024(%0, %%"REG_S"), %%xmm0\n\t" | |
986 "addps 1040(%0, %%"REG_S"), %%xmm1\n\t" | |
3904 | 987 "addps %%xmm7, %%xmm0 \n\t" |
988 "addps %%xmm7, %%xmm1 \n\t" | |
16173 | 989 "movaps %%xmm0, (%1, %%"REG_S") \n\t" |
990 "movaps %%xmm1, 16(%1, %%"REG_S")\n\t" | |
991 "add $32, %%"REG_S" \n\t" | |
3904 | 992 " jnz 1b \n\t" |
993 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 994 : "%"REG_S |
3904 | 995 ); |
996 } | |
997 | |
998 static void zero_MMX(sample_t * samples) | |
999 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1000 __asm__ volatile( |
16173 | 1001 "mov $-1024, %%"REG_S" \n\t" |
3904 | 1002 "pxor %%mm0, %%mm0 \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1003 ASMALIGN(4) |
3904 | 1004 "1: \n\t" |
16173 | 1005 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1006 "movq %%mm0, 8(%0, %%"REG_S") \n\t" | |
1007 "movq %%mm0, 16(%0, %%"REG_S") \n\t" | |
1008 "movq %%mm0, 24(%0, %%"REG_S") \n\t" | |
1009 "add $32, %%"REG_S" \n\t" | |
3904 | 1010 " jnz 1b \n\t" |
1011 "emms" | |
1012 :: "r" (samples+256) | |
16173 | 1013 : "%"REG_S |
3904 | 1014 ); |
1015 } | |
1016 | |
1017 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, | |
1018 sample_t clev, sample_t slev) | |
1019 { | |
1020 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1021 | |
1022 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1023 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1024 break; | |
1025 | |
1026 case CONVERT (A52_CHANNEL, A52_MONO): | |
1027 case CONVERT (A52_STEREO, A52_MONO): | |
1028 mix_2to1_SSE: | |
1029 mix2to1_SSE (samples, samples + 256, bias); | |
1030 break; | |
1031 | |
1032 case CONVERT (A52_2F1R, A52_MONO): | |
1033 if (slev == 0) | |
1034 goto mix_2to1_SSE; | |
1035 case CONVERT (A52_3F, A52_MONO): | |
1036 mix_3to1_SSE: | |
1037 mix3to1_SSE (samples, bias); | |
1038 break; | |
1039 | |
1040 case CONVERT (A52_3F1R, A52_MONO): | |
1041 if (slev == 0) | |
1042 goto mix_3to1_SSE; | |
1043 case CONVERT (A52_2F2R, A52_MONO): | |
1044 if (slev == 0) | |
1045 goto mix_2to1_SSE; | |
1046 mix4to1_SSE (samples, bias); | |
1047 break; | |
1048 | |
1049 case CONVERT (A52_3F2R, A52_MONO): | |
1050 if (slev == 0) | |
1051 goto mix_3to1_SSE; | |
1052 mix5to1_SSE (samples, bias); | |
1053 break; | |
1054 | |
1055 case CONVERT (A52_MONO, A52_DOLBY): | |
1056 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1057 break; | |
1058 | |
1059 case CONVERT (A52_3F, A52_STEREO): | |
1060 case CONVERT (A52_3F, A52_DOLBY): | |
1061 mix_3to2_SSE: | |
1062 mix3to2_SSE (samples, bias); | |
1063 break; | |
1064 | |
1065 case CONVERT (A52_2F1R, A52_STEREO): | |
1066 if (slev == 0) | |
1067 break; | |
1068 mix21to2_SSE (samples, samples + 256, bias); | |
1069 break; | |
1070 | |
1071 case CONVERT (A52_2F1R, A52_DOLBY): | |
1072 mix21toS_SSE (samples, bias); | |
1073 break; | |
1074 | |
1075 case CONVERT (A52_3F1R, A52_STEREO): | |
1076 if (slev == 0) | |
1077 goto mix_3to2_SSE; | |
1078 mix31to2_SSE (samples, bias); | |
1079 break; | |
1080 | |
1081 case CONVERT (A52_3F1R, A52_DOLBY): | |
1082 mix31toS_SSE (samples, bias); | |
1083 break; | |
1084 | |
1085 case CONVERT (A52_2F2R, A52_STEREO): | |
1086 if (slev == 0) | |
1087 break; | |
1088 mix2to1_SSE (samples, samples + 512, bias); | |
1089 mix2to1_SSE (samples + 256, samples + 768, bias); | |
1090 break; | |
1091 | |
1092 case CONVERT (A52_2F2R, A52_DOLBY): | |
1093 mix22toS_SSE (samples, bias); | |
1094 break; | |
1095 | |
1096 case CONVERT (A52_3F2R, A52_STEREO): | |
1097 if (slev == 0) | |
1098 goto mix_3to2_SSE; | |
1099 mix32to2_SSE (samples, bias); | |
1100 break; | |
1101 | |
1102 case CONVERT (A52_3F2R, A52_DOLBY): | |
1103 mix32toS_SSE (samples, bias); | |
1104 break; | |
1105 | |
1106 case CONVERT (A52_3F1R, A52_3F): | |
1107 if (slev == 0) | |
1108 break; | |
1109 mix21to2_SSE (samples, samples + 512, bias); | |
1110 break; | |
1111 | |
1112 case CONVERT (A52_3F2R, A52_3F): | |
1113 if (slev == 0) | |
1114 break; | |
1115 mix2to1_SSE (samples, samples + 768, bias); | |
1116 mix2to1_SSE (samples + 512, samples + 1024, bias); | |
1117 break; | |
1118 | |
1119 case CONVERT (A52_3F1R, A52_2F1R): | |
1120 mix3to2_SSE (samples, bias); | |
1121 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1122 break; | |
1123 | |
1124 case CONVERT (A52_2F2R, A52_2F1R): | |
1125 mix2to1_SSE (samples + 512, samples + 768, bias); | |
1126 break; | |
1127 | |
1128 case CONVERT (A52_3F2R, A52_2F1R): | |
1129 mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1130 move2to1_SSE (samples + 768, samples + 512, bias); | |
1131 break; | |
1132 | |
1133 case CONVERT (A52_3F2R, A52_3F1R): | |
1134 mix2to1_SSE (samples + 768, samples + 1024, bias); | |
1135 break; | |
1136 | |
1137 case CONVERT (A52_2F1R, A52_2F2R): | |
1138 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1139 break; | |
1140 | |
1141 case CONVERT (A52_3F1R, A52_2F2R): | |
1142 mix3to2_SSE (samples, bias); | |
1143 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1144 break; | |
1145 | |
1146 case CONVERT (A52_3F2R, A52_2F2R): | |
1147 mix3to2_SSE (samples, bias); | |
1148 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1149 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1150 break; | |
1151 | |
1152 case CONVERT (A52_3F1R, A52_3F2R): | |
12137 | 1153 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); |
3904 | 1154 break; |
1155 } | |
1156 } | |
1157 | |
1158 static void upmix_MMX (sample_t * samples, int acmod, int output) | |
1159 { | |
1160 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1161 | |
1162 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1163 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1164 break; | |
1165 | |
1166 case CONVERT (A52_3F2R, A52_MONO): | |
1167 zero_MMX (samples + 1024); | |
1168 case CONVERT (A52_3F1R, A52_MONO): | |
1169 case CONVERT (A52_2F2R, A52_MONO): | |
1170 zero_MMX (samples + 768); | |
1171 case CONVERT (A52_3F, A52_MONO): | |
1172 case CONVERT (A52_2F1R, A52_MONO): | |
1173 zero_MMX (samples + 512); | |
1174 case CONVERT (A52_CHANNEL, A52_MONO): | |
1175 case CONVERT (A52_STEREO, A52_MONO): | |
1176 zero_MMX (samples + 256); | |
1177 break; | |
1178 | |
1179 case CONVERT (A52_3F2R, A52_STEREO): | |
1180 case CONVERT (A52_3F2R, A52_DOLBY): | |
1181 zero_MMX (samples + 1024); | |
1182 case CONVERT (A52_3F1R, A52_STEREO): | |
1183 case CONVERT (A52_3F1R, A52_DOLBY): | |
1184 zero_MMX (samples + 768); | |
1185 case CONVERT (A52_3F, A52_STEREO): | |
1186 case CONVERT (A52_3F, A52_DOLBY): | |
1187 mix_3to2_MMX: | |
1188 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t)); | |
1189 zero_MMX (samples + 256); | |
1190 break; | |
1191 | |
1192 case CONVERT (A52_2F2R, A52_STEREO): | |
1193 case CONVERT (A52_2F2R, A52_DOLBY): | |
1194 zero_MMX (samples + 768); | |
1195 case CONVERT (A52_2F1R, A52_STEREO): | |
1196 case CONVERT (A52_2F1R, A52_DOLBY): | |
1197 zero_MMX (samples + 512); | |
1198 break; | |
1199 | |
1200 case CONVERT (A52_3F2R, A52_3F): | |
1201 zero_MMX (samples + 1024); | |
1202 case CONVERT (A52_3F1R, A52_3F): | |
1203 case CONVERT (A52_2F2R, A52_2F1R): | |
1204 zero_MMX (samples + 768); | |
1205 break; | |
1206 | |
1207 case CONVERT (A52_3F2R, A52_3F1R): | |
1208 zero_MMX (samples + 1024); | |
1209 break; | |
1210 | |
1211 case CONVERT (A52_3F2R, A52_2F1R): | |
1212 zero_MMX (samples + 1024); | |
1213 case CONVERT (A52_3F1R, A52_2F1R): | |
1214 mix_31to21_MMX: | |
1215 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1216 goto mix_3to2_MMX; | |
1217 | |
1218 case CONVERT (A52_3F2R, A52_2F2R): | |
1219 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); | |
1220 goto mix_31to21_MMX; | |
1221 } | |
1222 } | |
4233 | 1223 |
1224 static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias) | |
1225 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1226 __asm__ volatile( |
4233 | 1227 "movd %2, %%mm7 \n\t" |
1228 "punpckldq %2, %%mm7 \n\t" | |
16173 | 1229 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1230 ASMALIGN(4) |
4233 | 1231 "1: \n\t" |
16173 | 1232 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1233 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1234 "movq 16(%0, %%"REG_S"), %%mm2 \n\t" | |
1235 "movq 24(%0, %%"REG_S"), %%mm3 \n\t" | |
1236 "pfadd (%1, %%"REG_S"), %%mm0 \n\t" | |
1237 "pfadd 8(%1, %%"REG_S"), %%mm1 \n\t" | |
1238 "pfadd 16(%1, %%"REG_S"), %%mm2 \n\t" | |
1239 "pfadd 24(%1, %%"REG_S"), %%mm3 \n\t" | |
4233 | 1240 "pfadd %%mm7, %%mm0 \n\t" |
1241 "pfadd %%mm7, %%mm1 \n\t" | |
1242 "pfadd %%mm7, %%mm2 \n\t" | |
1243 "pfadd %%mm7, %%mm3 \n\t" | |
16173 | 1244 "movq %%mm0, (%1, %%"REG_S") \n\t" |
1245 "movq %%mm1, 8(%1, %%"REG_S") \n\t" | |
1246 "movq %%mm2, 16(%1, %%"REG_S") \n\t" | |
1247 "movq %%mm3, 24(%1, %%"REG_S") \n\t" | |
1248 "add $32, %%"REG_S" \n\t" | |
4233 | 1249 " jnz 1b \n\t" |
1250 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 1251 : "%"REG_S |
4233 | 1252 ); |
1253 } | |
1254 | |
1255 static void mix3to1_3dnow (sample_t * samples, sample_t bias) | |
1256 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1257 __asm__ volatile( |
4233 | 1258 "movd %1, %%mm7 \n\t" |
1259 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1260 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1261 ASMALIGN(4) |
4233 | 1262 "1: \n\t" |
16173 | 1263 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1264 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1265 "movq 1024(%0, %%"REG_S"), %%mm2\n\t" | |
1266 "movq 1032(%0, %%"REG_S"), %%mm3\n\t" | |
1267 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" | |
1268 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1269 "pfadd %%mm7, %%mm0 \n\t" |
1270 "pfadd %%mm7, %%mm1 \n\t" | |
1271 "pfadd %%mm2, %%mm0 \n\t" | |
1272 "pfadd %%mm3, %%mm1 \n\t" | |
16173 | 1273 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1274 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1275 "add $16, %%"REG_S" \n\t" | |
4233 | 1276 " jnz 1b \n\t" |
1277 :: "r" (samples+256), "m" (bias) | |
16173 | 1278 : "%"REG_S |
4233 | 1279 ); |
1280 } | |
1281 | |
1282 static void mix4to1_3dnow (sample_t * samples, sample_t bias) | |
1283 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1284 __asm__ volatile( |
4233 | 1285 "movd %1, %%mm7 \n\t" |
1286 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1287 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1288 ASMALIGN(4) |
4233 | 1289 "1: \n\t" |
16173 | 1290 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1291 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1292 "movq 1024(%0, %%"REG_S"), %%mm2\n\t" | |
1293 "movq 1032(%0, %%"REG_S"), %%mm3\n\t" | |
1294 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" | |
1295 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" | |
1296 "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" | |
1297 "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1298 "pfadd %%mm7, %%mm0 \n\t" |
1299 "pfadd %%mm7, %%mm1 \n\t" | |
1300 "pfadd %%mm2, %%mm0 \n\t" | |
1301 "pfadd %%mm3, %%mm1 \n\t" | |
16173 | 1302 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1303 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1304 "add $16, %%"REG_S" \n\t" | |
4233 | 1305 " jnz 1b \n\t" |
1306 :: "r" (samples+256), "m" (bias) | |
16173 | 1307 : "%"REG_S |
4233 | 1308 ); |
1309 } | |
1310 | |
1311 static void mix5to1_3dnow (sample_t * samples, sample_t bias) | |
1312 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1313 __asm__ volatile( |
4233 | 1314 "movd %1, %%mm7 \n\t" |
1315 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1316 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1317 ASMALIGN(4) |
4233 | 1318 "1: \n\t" |
16173 | 1319 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1320 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1321 "movq 1024(%0, %%"REG_S"), %%mm2\n\t" | |
1322 "movq 1032(%0, %%"REG_S"), %%mm3\n\t" | |
1323 "pfadd 2048(%0, %%"REG_S"), %%mm0\n\t" | |
1324 "pfadd 2056(%0, %%"REG_S"), %%mm1\n\t" | |
1325 "pfadd 3072(%0, %%"REG_S"), %%mm2\n\t" | |
1326 "pfadd 3080(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1327 "pfadd %%mm7, %%mm0 \n\t" |
1328 "pfadd %%mm7, %%mm1 \n\t" | |
16173 | 1329 "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" |
1330 "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1331 "pfadd %%mm2, %%mm0 \n\t" |
1332 "pfadd %%mm3, %%mm1 \n\t" | |
16173 | 1333 "movq %%mm0, (%0, %%"REG_S") \n\t" |
1334 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1335 "add $16, %%"REG_S" \n\t" | |
4233 | 1336 " jnz 1b \n\t" |
1337 :: "r" (samples+256), "m" (bias) | |
16173 | 1338 : "%"REG_S |
4233 | 1339 ); |
1340 } | |
1341 | |
1342 static void mix3to2_3dnow (sample_t * samples, sample_t bias) | |
1343 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1344 __asm__ volatile( |
4233 | 1345 "movd %1, %%mm7 \n\t" |
1346 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1347 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1348 ASMALIGN(4) |
4233 | 1349 "1: \n\t" |
16173 | 1350 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1351 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1352 "pfadd %%mm7, %%mm0 \n\t" //common |
1353 "pfadd %%mm7, %%mm1 \n\t" //common | |
16173 | 1354 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1355 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1356 "movq 2048(%0, %%"REG_S"), %%mm4\n\t" | |
1357 "movq 2056(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1358 "pfadd %%mm0, %%mm2 \n\t" |
5912 | 1359 "pfadd %%mm1, %%mm3 \n\t" |
4233 | 1360 "pfadd %%mm0, %%mm4 \n\t" |
5912 | 1361 "pfadd %%mm1, %%mm5 \n\t" |
16173 | 1362 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1363 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1364 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1365 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1366 "add $16, %%"REG_S" \n\t" | |
4233 | 1367 " jnz 1b \n\t" |
1368 :: "r" (samples+256), "m" (bias) | |
16173 | 1369 : "%"REG_S |
4233 | 1370 ); |
1371 } | |
1372 | |
1373 static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias) | |
1374 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1375 __asm__ volatile( |
4233 | 1376 "movd %2, %%mm7 \n\t" |
1377 "punpckldq %2, %%mm7 \n\t" | |
16173 | 1378 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1379 ASMALIGN(4) |
4233 | 1380 "1: \n\t" |
16173 | 1381 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
1382 "movq 1032(%1, %%"REG_S"), %%mm1\n\t" | |
4233 | 1383 "pfadd %%mm7, %%mm0 \n\t" //common |
1384 "pfadd %%mm7, %%mm1 \n\t" //common | |
16173 | 1385 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1386 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1387 "movq (%1, %%"REG_S"), %%mm4 \n\t" | |
1388 "movq 8(%1, %%"REG_S"), %%mm5 \n\t" | |
4233 | 1389 "pfadd %%mm0, %%mm2 \n\t" |
1390 "pfadd %%mm1, %%mm3 \n\t" | |
1391 "pfadd %%mm0, %%mm4 \n\t" | |
1392 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1393 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1394 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1395 "movq %%mm4, (%1, %%"REG_S") \n\t" | |
1396 "movq %%mm5, 8(%1, %%"REG_S") \n\t" | |
1397 "add $16, %%"REG_S" \n\t" | |
4233 | 1398 " jnz 1b \n\t" |
1399 :: "r" (left+256), "r" (right+256), "m" (bias) | |
16173 | 1400 : "%"REG_S |
4233 | 1401 ); |
1402 } | |
1403 | |
1404 static void mix21toS_3dnow (sample_t * samples, sample_t bias) | |
1405 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1406 __asm__ volatile( |
4233 | 1407 "movd %1, %%mm7 \n\t" |
1408 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1409 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1410 ASMALIGN(4) |
4233 | 1411 "1: \n\t" |
16173 | 1412 "movq 2048(%0, %%"REG_S"), %%mm0\n\t" // surround |
1413 "movq 2056(%0, %%"REG_S"), %%mm1\n\t" // surround | |
1414 "movq (%0, %%"REG_S"), %%mm2 \n\t" | |
1415 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1416 "movq 1024(%0, %%"REG_S"), %%mm4\n\t" | |
1417 "movq 1032(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1418 "pfadd %%mm7, %%mm2 \n\t" |
1419 "pfadd %%mm7, %%mm3 \n\t" | |
1420 "pfadd %%mm7, %%mm4 \n\t" | |
1421 "pfadd %%mm7, %%mm5 \n\t" | |
1422 "pfsub %%mm0, %%mm2 \n\t" | |
1423 "pfsub %%mm1, %%mm3 \n\t" | |
1424 "pfadd %%mm0, %%mm4 \n\t" | |
1425 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1426 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1427 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1428 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1429 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1430 "add $16, %%"REG_S" \n\t" | |
4233 | 1431 " jnz 1b \n\t" |
1432 :: "r" (samples+256), "m" (bias) | |
16173 | 1433 : "%"REG_S |
4233 | 1434 ); |
1435 } | |
1436 | |
1437 static void mix31to2_3dnow (sample_t * samples, sample_t bias) | |
1438 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1439 __asm__ volatile( |
4233 | 1440 "movd %1, %%mm7 \n\t" |
1441 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1442 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1443 ASMALIGN(4) |
4233 | 1444 "1: \n\t" |
16173 | 1445 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1446 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
1447 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" | |
1448 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1449 "pfadd %%mm7, %%mm0 \n\t" // common |
1450 "pfadd %%mm7, %%mm1 \n\t" // common | |
16173 | 1451 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1452 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1453 "movq 2048(%0, %%"REG_S"), %%mm4\n\t" | |
1454 "movq 2056(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1455 "pfadd %%mm0, %%mm2 \n\t" |
1456 "pfadd %%mm1, %%mm3 \n\t" | |
1457 "pfadd %%mm0, %%mm4 \n\t" | |
1458 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1459 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1460 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1461 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1462 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1463 "add $16, %%"REG_S" \n\t" | |
4233 | 1464 " jnz 1b \n\t" |
1465 :: "r" (samples+256), "m" (bias) | |
16173 | 1466 : "%"REG_S |
4233 | 1467 ); |
1468 } | |
1469 | |
1470 static void mix31toS_3dnow (sample_t * samples, sample_t bias) | |
1471 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1472 __asm__ volatile( |
4233 | 1473 "movd %1, %%mm7 \n\t" |
1474 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1475 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1476 ASMALIGN(4) |
4233 | 1477 "1: \n\t" |
16173 | 1478 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1479 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1480 "pfadd %%mm7, %%mm0 \n\t" // common |
1481 "pfadd %%mm7, %%mm1 \n\t" // common | |
16173 | 1482 "movq (%0, %%"REG_S"), %%mm2 \n\t" |
1483 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1484 "movq 2048(%0, %%"REG_S"), %%mm4\n\t" | |
1485 "movq 2056(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1486 "pfadd %%mm0, %%mm2 \n\t" |
1487 "pfadd %%mm1, %%mm3 \n\t" | |
1488 "pfadd %%mm0, %%mm4 \n\t" | |
1489 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1490 "movq 3072(%0, %%"REG_S"), %%mm0\n\t" // surround |
1491 "movq 3080(%0, %%"REG_S"), %%mm1\n\t" // surround | |
4233 | 1492 "pfsub %%mm0, %%mm2 \n\t" |
1493 "pfsub %%mm1, %%mm3 \n\t" | |
1494 "pfadd %%mm0, %%mm4 \n\t" | |
1495 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1496 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1497 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1498 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1499 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1500 "add $16, %%"REG_S" \n\t" | |
4233 | 1501 " jnz 1b \n\t" |
1502 :: "r" (samples+256), "m" (bias) | |
16173 | 1503 : "%"REG_S |
4233 | 1504 ); |
1505 } | |
1506 | |
1507 static void mix22toS_3dnow (sample_t * samples, sample_t bias) | |
1508 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1509 __asm__ volatile( |
4233 | 1510 "movd %1, %%mm7 \n\t" |
1511 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1512 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1513 ASMALIGN(4) |
4233 | 1514 "1: \n\t" |
16173 | 1515 "movq 2048(%0, %%"REG_S"), %%mm0\n\t" |
1516 "movq 2056(%0, %%"REG_S"), %%mm1\n\t" | |
1517 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" // surround | |
1518 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" // surround | |
1519 "movq (%0, %%"REG_S"), %%mm2 \n\t" | |
1520 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1521 "movq 1024(%0, %%"REG_S"), %%mm4\n\t" | |
1522 "movq 1032(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1523 "pfadd %%mm7, %%mm2 \n\t" |
1524 "pfadd %%mm7, %%mm3 \n\t" | |
1525 "pfadd %%mm7, %%mm4 \n\t" | |
1526 "pfadd %%mm7, %%mm5 \n\t" | |
1527 "pfsub %%mm0, %%mm2 \n\t" | |
1528 "pfsub %%mm1, %%mm3 \n\t" | |
1529 "pfadd %%mm0, %%mm4 \n\t" | |
1530 "pfadd %%mm1, %%mm5 \n\t" | |
16173 | 1531 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1532 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1533 "movq %%mm4, 1024(%0, %%"REG_S")\n\t" | |
1534 "movq %%mm5, 1032(%0, %%"REG_S")\n\t" | |
1535 "add $16, %%"REG_S" \n\t" | |
4233 | 1536 " jnz 1b \n\t" |
1537 :: "r" (samples+256), "m" (bias) | |
16173 | 1538 : "%"REG_S |
4233 | 1539 ); |
1540 } | |
1541 | |
1542 static void mix32to2_3dnow (sample_t * samples, sample_t bias) | |
1543 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1544 __asm__ volatile( |
4233 | 1545 "movd %1, %%mm7 \n\t" |
1546 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1547 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1548 ASMALIGN(4) |
4233 | 1549 "1: \n\t" |
16173 | 1550 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1551 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
4233 | 1552 "pfadd %%mm7, %%mm0 \n\t" // common |
1553 "pfadd %%mm7, %%mm1 \n\t" // common | |
1554 "movq %%mm0, %%mm2 \n\t" // common | |
1555 "movq %%mm1, %%mm3 \n\t" // common | |
16173 | 1556 "pfadd (%0, %%"REG_S"), %%mm0 \n\t" |
1557 "pfadd 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1558 "pfadd 2048(%0, %%"REG_S"), %%mm2\n\t" | |
1559 "pfadd 2056(%0, %%"REG_S"), %%mm3\n\t" | |
1560 "pfadd 3072(%0, %%"REG_S"), %%mm0\n\t" | |
1561 "pfadd 3080(%0, %%"REG_S"), %%mm1\n\t" | |
1562 "pfadd 4096(%0, %%"REG_S"), %%mm2\n\t" | |
1563 "pfadd 4104(%0, %%"REG_S"), %%mm3\n\t" | |
1564 "movq %%mm0, (%0, %%"REG_S") \n\t" | |
1565 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
1566 "movq %%mm2, 1024(%0, %%"REG_S")\n\t" | |
1567 "movq %%mm3, 1032(%0, %%"REG_S")\n\t" | |
1568 "add $16, %%"REG_S" \n\t" | |
4233 | 1569 " jnz 1b \n\t" |
1570 :: "r" (samples+256), "m" (bias) | |
16173 | 1571 : "%"REG_S |
4233 | 1572 ); |
1573 } | |
1574 | |
1575 /* todo: should be optimized better */ | |
1576 static void mix32toS_3dnow (sample_t * samples, sample_t bias) | |
1577 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1578 __asm__ volatile( |
16173 | 1579 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1580 ASMALIGN(4) |
4233 | 1581 "1: \n\t" |
1582 "movd %1, %%mm7 \n\t" | |
1583 "punpckldq %1, %%mm7 \n\t" | |
16173 | 1584 "movq 1024(%0, %%"REG_S"), %%mm0\n\t" |
1585 "movq 1032(%0, %%"REG_S"), %%mm1\n\t" | |
1586 "movq 3072(%0, %%"REG_S"), %%mm4\n\t" | |
1587 "movq 3080(%0, %%"REG_S"), %%mm5\n\t" | |
4233 | 1588 "pfadd %%mm7, %%mm0 \n\t" // common |
1589 "pfadd %%mm7, %%mm1 \n\t" // common | |
16173 | 1590 "pfadd 4096(%0, %%"REG_S"), %%mm4\n\t" // surround |
1591 "pfadd 4104(%0, %%"REG_S"), %%mm5\n\t" // surround | |
1592 "movq (%0, %%"REG_S"), %%mm2 \n\t" | |
1593 "movq 8(%0, %%"REG_S"), %%mm3 \n\t" | |
1594 "movq 2048(%0, %%"REG_S"), %%mm6\n\t" | |
1595 "movq 2056(%0, %%"REG_S"), %%mm7\n\t" | |
4233 | 1596 "pfsub %%mm4, %%mm2 \n\t" |
1597 "pfsub %%mm5, %%mm3 \n\t" | |
1598 "pfadd %%mm4, %%mm6 \n\t" | |
1599 "pfadd %%mm5, %%mm7 \n\t" | |
1600 "pfadd %%mm0, %%mm2 \n\t" | |
1601 "pfadd %%mm1, %%mm3 \n\t" | |
1602 "pfadd %%mm0, %%mm6 \n\t" | |
1603 "pfadd %%mm1, %%mm7 \n\t" | |
16173 | 1604 "movq %%mm2, (%0, %%"REG_S") \n\t" |
1605 "movq %%mm3, 8(%0, %%"REG_S") \n\t" | |
1606 "movq %%mm6, 1024(%0, %%"REG_S")\n\t" | |
1607 "movq %%mm7, 1032(%0, %%"REG_S")\n\t" | |
1608 "add $16, %%"REG_S" \n\t" | |
4233 | 1609 " jnz 1b \n\t" |
1610 :: "r" (samples+256), "m" (bias) | |
16173 | 1611 : "%"REG_S |
4233 | 1612 ); |
1613 } | |
1614 | |
1615 static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias) | |
1616 { | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1617 __asm__ volatile( |
4233 | 1618 "movd %2, %%mm7 \n\t" |
1619 "punpckldq %2, %%mm7 \n\t" | |
16173 | 1620 "mov $-1024, %%"REG_S" \n\t" |
19372
6334c14b38eb
Replace asmalign.h hack by ASMALIGN cpp macros from config.h.
diego
parents:
18783
diff
changeset
|
1621 ASMALIGN(4) |
4233 | 1622 "1: \n\t" |
16173 | 1623 "movq (%0, %%"REG_S"), %%mm0 \n\t" |
1624 "movq 8(%0, %%"REG_S"), %%mm1 \n\t" | |
1625 "movq 16(%0, %%"REG_S"), %%mm2 \n\t" | |
1626 "movq 24(%0, %%"REG_S"), %%mm3 \n\t" | |
1627 "pfadd 1024(%0, %%"REG_S"), %%mm0\n\t" | |
1628 "pfadd 1032(%0, %%"REG_S"), %%mm1\n\t" | |
1629 "pfadd 1040(%0, %%"REG_S"), %%mm2\n\t" | |
1630 "pfadd 1048(%0, %%"REG_S"), %%mm3\n\t" | |
4233 | 1631 "pfadd %%mm7, %%mm0 \n\t" |
1632 "pfadd %%mm7, %%mm1 \n\t" | |
1633 "pfadd %%mm7, %%mm2 \n\t" | |
1634 "pfadd %%mm7, %%mm3 \n\t" | |
16173 | 1635 "movq %%mm0, (%1, %%"REG_S") \n\t" |
1636 "movq %%mm1, 8(%1, %%"REG_S") \n\t" | |
1637 "movq %%mm2, 16(%1, %%"REG_S") \n\t" | |
1638 "movq %%mm3, 24(%1, %%"REG_S") \n\t" | |
1639 "add $32, %%"REG_S" \n\t" | |
4233 | 1640 " jnz 1b \n\t" |
1641 :: "r" (src+256), "r" (dest+256), "m" (bias) | |
16173 | 1642 : "%"REG_S |
4233 | 1643 ); |
1644 } | |
1645 | |
1646 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, | |
1647 sample_t clev, sample_t slev) | |
1648 { | |
1649 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) { | |
1650 | |
1651 case CONVERT (A52_CHANNEL, A52_CHANNEL2): | |
1652 memcpy (samples, samples + 256, 256 * sizeof (sample_t)); | |
1653 break; | |
1654 | |
1655 case CONVERT (A52_CHANNEL, A52_MONO): | |
1656 case CONVERT (A52_STEREO, A52_MONO): | |
1657 mix_2to1_3dnow: | |
1658 mix2to1_3dnow (samples, samples + 256, bias); | |
1659 break; | |
1660 | |
1661 case CONVERT (A52_2F1R, A52_MONO): | |
1662 if (slev == 0) | |
1663 goto mix_2to1_3dnow; | |
1664 case CONVERT (A52_3F, A52_MONO): | |
1665 mix_3to1_3dnow: | |
1666 mix3to1_3dnow (samples, bias); | |
1667 break; | |
1668 | |
1669 case CONVERT (A52_3F1R, A52_MONO): | |
1670 if (slev == 0) | |
1671 goto mix_3to1_3dnow; | |
1672 case CONVERT (A52_2F2R, A52_MONO): | |
1673 if (slev == 0) | |
1674 goto mix_2to1_3dnow; | |
1675 mix4to1_3dnow (samples, bias); | |
1676 break; | |
1677 | |
1678 case CONVERT (A52_3F2R, A52_MONO): | |
1679 if (slev == 0) | |
1680 goto mix_3to1_3dnow; | |
1681 mix5to1_3dnow (samples, bias); | |
1682 break; | |
1683 | |
1684 case CONVERT (A52_MONO, A52_DOLBY): | |
1685 memcpy (samples + 256, samples, 256 * sizeof (sample_t)); | |
1686 break; | |
1687 | |
1688 case CONVERT (A52_3F, A52_STEREO): | |
1689 case CONVERT (A52_3F, A52_DOLBY): | |
1690 mix_3to2_3dnow: | |
1691 mix3to2_3dnow (samples, bias); | |
1692 break; | |
1693 | |
1694 case CONVERT (A52_2F1R, A52_STEREO): | |
1695 if (slev == 0) | |
1696 break; | |
1697 mix21to2_3dnow (samples, samples + 256, bias); | |
1698 break; | |
1699 | |
1700 case CONVERT (A52_2F1R, A52_DOLBY): | |
1701 mix21toS_3dnow (samples, bias); | |
1702 break; | |
1703 | |
1704 case CONVERT (A52_3F1R, A52_STEREO): | |
1705 if (slev == 0) | |
1706 goto mix_3to2_3dnow; | |
1707 mix31to2_3dnow (samples, bias); | |
1708 break; | |
1709 | |
1710 case CONVERT (A52_3F1R, A52_DOLBY): | |
1711 mix31toS_3dnow (samples, bias); | |
1712 break; | |
1713 | |
1714 case CONVERT (A52_2F2R, A52_STEREO): | |
1715 if (slev == 0) | |
1716 break; | |
1717 mix2to1_3dnow (samples, samples + 512, bias); | |
1718 mix2to1_3dnow (samples + 256, samples + 768, bias); | |
1719 break; | |
1720 | |
1721 case CONVERT (A52_2F2R, A52_DOLBY): | |
1722 mix22toS_3dnow (samples, bias); | |
1723 break; | |
1724 | |
1725 case CONVERT (A52_3F2R, A52_STEREO): | |
1726 if (slev == 0) | |
1727 goto mix_3to2_3dnow; | |
1728 mix32to2_3dnow (samples, bias); | |
1729 break; | |
1730 | |
1731 case CONVERT (A52_3F2R, A52_DOLBY): | |
1732 mix32toS_3dnow (samples, bias); | |
1733 break; | |
1734 | |
1735 case CONVERT (A52_3F1R, A52_3F): | |
1736 if (slev == 0) | |
1737 break; | |
1738 mix21to2_3dnow (samples, samples + 512, bias); | |
1739 break; | |
1740 | |
1741 case CONVERT (A52_3F2R, A52_3F): | |
1742 if (slev == 0) | |
1743 break; | |
1744 mix2to1_3dnow (samples, samples + 768, bias); | |
1745 mix2to1_3dnow (samples + 512, samples + 1024, bias); | |
1746 break; | |
1747 | |
1748 case CONVERT (A52_3F1R, A52_2F1R): | |
1749 mix3to2_3dnow (samples, bias); | |
1750 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1751 break; | |
1752 | |
1753 case CONVERT (A52_2F2R, A52_2F1R): | |
1754 mix2to1_3dnow (samples + 512, samples + 768, bias); | |
1755 break; | |
1756 | |
1757 case CONVERT (A52_3F2R, A52_2F1R): | |
1758 mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used) | |
1759 move2to1_3dnow (samples + 768, samples + 512, bias); | |
1760 break; | |
1761 | |
1762 case CONVERT (A52_3F2R, A52_3F1R): | |
1763 mix2to1_3dnow (samples + 768, samples + 1024, bias); | |
1764 break; | |
1765 | |
1766 case CONVERT (A52_2F1R, A52_2F2R): | |
1767 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t)); | |
1768 break; | |
1769 | |
1770 case CONVERT (A52_3F1R, A52_2F2R): | |
1771 mix3to2_3dnow (samples, bias); | |
1772 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1773 break; | |
1774 | |
1775 case CONVERT (A52_3F2R, A52_2F2R): | |
1776 mix3to2_3dnow (samples, bias); | |
1777 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t)); | |
1778 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t)); | |
1779 break; | |
1780 | |
1781 case CONVERT (A52_3F1R, A52_3F2R): | |
12137 | 1782 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t)); |
4233 | 1783 break; |
1784 } | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
27445
diff
changeset
|
1785 __asm__ volatile("femms":::"memory"); |
4233 | 1786 } |
1787 | |
16173 | 1788 #endif // ARCH_X86 || ARCH_X86_64 |