annotate liba52/downmix.c @ 11729:9653be40fb5c

Port to new video filter layer: -vf disabled --> -vf-clr
author diego
date Fri, 02 Jan 2004 22:49:40 +0000
parents d0a34309e424
children 9f297a651e11
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
1 /*
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
2 * downmix.c
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
3 * Copyright (C) 2000-2001 Michel Lespinasse <walken@zoy.org>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
5 *
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
6 * This file is part of a52dec, a free ATSC A-52 stream decoder.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
7 * See http://liba52.sourceforge.net/ for updates.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
8 *
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
9 * a52dec is free software; you can redistribute it and/or modify
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
10 * it under the terms of the GNU General Public License as published by
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
11 * the Free Software Foundation; either version 2 of the License, or
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
12 * (at your option) any later version.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
13 *
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
14 * a52dec is distributed in the hope that it will be useful,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
17 * GNU General Public License for more details.
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
18 *
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
19 * You should have received a copy of the GNU General Public License
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
20 * along with this program; if not, write to the Free Software
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
3625
84ff13d4540c sse opt of mix32toS()
michael
parents: 3624
diff changeset
22 *
84ff13d4540c sse opt of mix32toS()
michael
parents: 3624
diff changeset
23 * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
24 */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
25
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
26 #include "config.h"
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
27
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
28 #include <string.h>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
29 #include <inttypes.h>
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
30
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
31 #include "a52.h"
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
32 #include "a52_internal.h"
3910
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
33 #include "mm_accel.h"
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
34
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
35 #define CONVERT(acmod,output) (((output) << 3) + (acmod))
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
36
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
37
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
38 void (*downmix)(sample_t * samples, int acmod, int output, sample_t bias,
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
39 sample_t clev, sample_t slev)= NULL;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
40 void (*upmix)(sample_t * samples, int acmod, int output)= NULL;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
41
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
42 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
43 sample_t clev, sample_t slev);
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
44 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
45 sample_t clev, sample_t slev);
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
46 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
47 sample_t clev, sample_t slev);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
48 static void upmix_MMX (sample_t * samples, int acmod, int output);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
49 static void upmix_C (sample_t * samples, int acmod, int output);
3910
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
50
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
51 void downmix_accel_init(uint32_t mm_accel)
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
52 {
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
53 upmix= upmix_C;
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
54 downmix= downmix_C;
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
55 #ifdef ARCH_X86
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
56 if(mm_accel & MM_ACCEL_X86_MMX) upmix= upmix_MMX;
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
57 if(mm_accel & MM_ACCEL_X86_SSE) downmix= downmix_SSE;
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
58 if(mm_accel & MM_ACCEL_X86_3DNOW) downmix= downmix_3dnow;
3910
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
59 #endif
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
60 }
db1d556fcf58 runtime cpudetect the liba52 way part 2 (downmix.c)
michael
parents: 3904
diff changeset
61
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
62 int downmix_init (int input, int flags, sample_t * level,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
63 sample_t clev, sample_t slev)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
64 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
65 static uint8_t table[11][8] = {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
66 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
67 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
68 {A52_MONO, A52_MONO, A52_MONO, A52_MONO,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
69 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
70 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
71 A52_STEREO, A52_STEREO, A52_STEREO, A52_STEREO},
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
72 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
73 A52_STEREO, A52_3F, A52_STEREO, A52_3F},
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
74 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
75 A52_2F1R, A52_2F1R, A52_2F1R, A52_2F1R},
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
76 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_STEREO,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
77 A52_2F1R, A52_3F1R, A52_2F1R, A52_3F1R},
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
78 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
79 A52_2F2R, A52_2F2R, A52_2F2R, A52_2F2R},
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
80 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_3F,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
81 A52_2F2R, A52_3F2R, A52_2F2R, A52_3F2R},
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
82 {A52_CHANNEL1, A52_MONO, A52_MONO, A52_MONO,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
83 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
84 {A52_CHANNEL2, A52_MONO, A52_MONO, A52_MONO,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
85 A52_MONO, A52_MONO, A52_MONO, A52_MONO},
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
86 {A52_CHANNEL, A52_DOLBY, A52_STEREO, A52_DOLBY,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
87 A52_DOLBY, A52_DOLBY, A52_DOLBY, A52_DOLBY}
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
88 };
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
89 int output;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
90
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
91 output = flags & A52_CHANNEL_MASK;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
92 if (output > A52_DOLBY)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
93 return -1;
3738
5c1da9bc17a3 memset(?, 0, 256*sizeof(float)) in MMX
michael
parents: 3679
diff changeset
94
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
95 output = table[output][input & 7];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
96
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
97 if ((output == A52_STEREO) &&
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
98 ((input == A52_DOLBY) || ((input == A52_3F) && (clev == LEVEL_3DB))))
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
99 output = A52_DOLBY;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
100
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
101 if (flags & A52_ADJUST_LEVEL)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
102 switch (CONVERT (input & 7, output)) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
103
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
104 case CONVERT (A52_3F, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
105 *level *= LEVEL_3DB / (1 + clev);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
106 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
107
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
108 case CONVERT (A52_STEREO, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
109 case CONVERT (A52_2F2R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
110 case CONVERT (A52_3F2R, A52_3F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
111 level_3db:
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
112 *level *= LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
113 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
114
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
115 case CONVERT (A52_3F2R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
116 if (clev < LEVEL_PLUS3DB - 1)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
117 goto level_3db;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
118 /* break thru */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
119 case CONVERT (A52_3F, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
120 case CONVERT (A52_3F1R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
121 case CONVERT (A52_3F1R, A52_2F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
122 case CONVERT (A52_3F2R, A52_2F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
123 *level /= 1 + clev;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
124 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
125
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
126 case CONVERT (A52_2F1R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
127 *level *= LEVEL_PLUS3DB / (2 + slev);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
128 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
129
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
130 case CONVERT (A52_2F1R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
131 case CONVERT (A52_3F1R, A52_3F):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
132 *level /= 1 + slev * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
133 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
134
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
135 case CONVERT (A52_3F1R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
136 *level *= LEVEL_3DB / (1 + clev + 0.5 * slev);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
137 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
138
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
139 case CONVERT (A52_3F1R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
140 *level /= 1 + clev + slev * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
141 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
142
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
143 case CONVERT (A52_2F2R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
144 *level *= LEVEL_3DB / (1 + slev);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
145 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
146
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
147 case CONVERT (A52_2F2R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
148 case CONVERT (A52_3F2R, A52_3F):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
149 *level /= 1 + slev;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
150 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
151
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
152 case CONVERT (A52_3F2R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
153 *level *= LEVEL_3DB / (1 + clev + slev);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
154 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
155
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
156 case CONVERT (A52_3F2R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
157 *level /= 1 + clev + slev;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
158 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
159
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
160 case CONVERT (A52_MONO, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
161 *level *= LEVEL_PLUS3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
162 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
163
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
164 case CONVERT (A52_3F, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
165 case CONVERT (A52_2F1R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
166 *level *= 1 / (1 + LEVEL_3DB);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
167 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
168
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
169 case CONVERT (A52_3F1R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
170 case CONVERT (A52_2F2R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
171 *level *= 1 / (1 + 2 * LEVEL_3DB);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
172 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
173
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
174 case CONVERT (A52_3F2R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
175 *level *= 1 / (1 + 3 * LEVEL_3DB);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
176 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
177 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
178 return output;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
179 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
180
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
181 int downmix_coeff (sample_t * coeff, int acmod, int output, sample_t level,
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
182 sample_t clev, sample_t slev)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
183 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
184 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
185
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
186 case CONVERT (A52_CHANNEL, A52_CHANNEL):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
187 case CONVERT (A52_MONO, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
188 case CONVERT (A52_STEREO, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
189 case CONVERT (A52_3F, A52_3F):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
190 case CONVERT (A52_2F1R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
191 case CONVERT (A52_3F1R, A52_3F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
192 case CONVERT (A52_2F2R, A52_2F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
193 case CONVERT (A52_3F2R, A52_3F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
194 case CONVERT (A52_STEREO, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
195 coeff[0] = coeff[1] = coeff[2] = coeff[3] = coeff[4] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
196 return 0;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
197
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
198 case CONVERT (A52_CHANNEL, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
199 coeff[0] = coeff[1] = level * LEVEL_6DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
200 return 3;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
201
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
202 case CONVERT (A52_STEREO, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
203 coeff[0] = coeff[1] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
204 return 3;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
205
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
206 case CONVERT (A52_3F, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
207 coeff[0] = coeff[2] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
208 coeff[1] = level * clev * LEVEL_PLUS3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
209 return 7;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
210
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
211 case CONVERT (A52_2F1R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
212 coeff[0] = coeff[1] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
213 coeff[2] = level * slev * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
214 return 7;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
215
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
216 case CONVERT (A52_2F2R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
217 coeff[0] = coeff[1] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
218 coeff[2] = coeff[3] = level * slev * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
219 return 15;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
220
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
221 case CONVERT (A52_3F1R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
222 coeff[0] = coeff[2] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
223 coeff[1] = level * clev * LEVEL_PLUS3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
224 coeff[3] = level * slev * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
225 return 15;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
226
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
227 case CONVERT (A52_3F2R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
228 coeff[0] = coeff[2] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
229 coeff[1] = level * clev * LEVEL_PLUS3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
230 coeff[3] = coeff[4] = level * slev * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
231 return 31;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
232
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
233 case CONVERT (A52_MONO, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
234 coeff[0] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
235 return 0;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
236
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
237 case CONVERT (A52_3F, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
238 clev = LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
239 case CONVERT (A52_3F, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
240 case CONVERT (A52_3F1R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
241 case CONVERT (A52_3F2R, A52_2F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
242 coeff[0] = coeff[2] = coeff[3] = coeff[4] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
243 coeff[1] = level * clev;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
244 return 7;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
245
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
246 case CONVERT (A52_2F1R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
247 slev = 1;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
248 case CONVERT (A52_2F1R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
249 coeff[0] = coeff[1] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
250 coeff[2] = level * slev * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
251 return 7;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
252
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
253 case CONVERT (A52_3F1R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
254 clev = LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
255 slev = 1;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
256 case CONVERT (A52_3F1R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
257 coeff[0] = coeff[2] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
258 coeff[1] = level * clev;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
259 coeff[3] = level * slev * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
260 return 15;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
261
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
262 case CONVERT (A52_2F2R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
263 slev = LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
264 case CONVERT (A52_2F2R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
265 coeff[0] = coeff[1] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
266 coeff[2] = coeff[3] = level * slev;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
267 return 15;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
268
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
269 case CONVERT (A52_3F2R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
270 clev = LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
271 case CONVERT (A52_3F2R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
272 slev = LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
273 case CONVERT (A52_3F2R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
274 coeff[0] = coeff[2] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
275 coeff[1] = level * clev;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
276 coeff[3] = coeff[4] = level * slev;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
277 return 31;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
278
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
279 case CONVERT (A52_3F1R, A52_3F):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
280 coeff[0] = coeff[1] = coeff[2] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
281 coeff[3] = level * slev * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
282 return 13;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
283
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
284 case CONVERT (A52_3F2R, A52_3F):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
285 coeff[0] = coeff[1] = coeff[2] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
286 coeff[3] = coeff[4] = level * slev;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
287 return 29;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
288
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
289 case CONVERT (A52_2F2R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
290 coeff[0] = coeff[1] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
291 coeff[2] = coeff[3] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
292 return 12;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
293
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
294 case CONVERT (A52_3F2R, A52_3F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
295 coeff[0] = coeff[1] = coeff[2] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
296 coeff[3] = coeff[4] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
297 return 24;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
298
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
299 case CONVERT (A52_2F1R, A52_2F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
300 coeff[0] = coeff[1] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
301 coeff[2] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
302 return 0;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
303
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
304 case CONVERT (A52_3F1R, A52_2F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
305 coeff[0] = coeff[2] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
306 coeff[1] = level * clev;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
307 coeff[3] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
308 return 7;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
309
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
310 case CONVERT (A52_3F1R, A52_3F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
311 coeff[0] = coeff[1] = coeff[2] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
312 coeff[3] = level * LEVEL_3DB;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
313 return 0;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
314
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
315 case CONVERT (A52_CHANNEL, A52_CHANNEL1):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
316 coeff[0] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
317 coeff[1] = 0;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
318 return 0;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
319
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
320 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
321 coeff[0] = 0;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
322 coeff[1] = level;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
323 return 0;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
324 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
325
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
326 return -1; /* NOTREACHED */
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
327 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
328
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
329 static void mix2to1 (sample_t * dest, sample_t * src, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
330 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
331 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
332
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
333 for (i = 0; i < 256; i++)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
334 dest[i] += src[i] + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
335 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
336
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
337 static void mix3to1 (sample_t * samples, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
338 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
339 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
340
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
341 for (i = 0; i < 256; i++)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
342 samples[i] += samples[i + 256] + samples[i + 512] + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
343 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
344
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
345 static void mix4to1 (sample_t * samples, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
346 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
347 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
348
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
349 for (i = 0; i < 256; i++)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
350 samples[i] += (samples[i + 256] + samples[i + 512] +
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
351 samples[i + 768] + bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
352 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
353
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
354 static void mix5to1 (sample_t * samples, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
355 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
356 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
357
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
358 for (i = 0; i < 256; i++)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
359 samples[i] += (samples[i + 256] + samples[i + 512] +
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
360 samples[i + 768] + samples[i + 1024] + bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
361 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
362
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
363 static void mix3to2 (sample_t * samples, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
364 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
365 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
366 sample_t common;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
367
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
368 for (i = 0; i < 256; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
369 common = samples[i + 256] + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
370 samples[i] += common;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
371 samples[i + 256] = samples[i + 512] + common;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
372 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
373 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
374
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
375 static void mix21to2 (sample_t * left, sample_t * right, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
376 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
377 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
378 sample_t common;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
379
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
380 for (i = 0; i < 256; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
381 common = right[i + 256] + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
382 left[i] += common;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
383 right[i] += common;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
384 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
385 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
386
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
387 static void mix21toS (sample_t * samples, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
388 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
389 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
390 sample_t surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
391
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
392 for (i = 0; i < 256; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
393 surround = samples[i + 512];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
394 samples[i] += bias - surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
395 samples[i + 256] += bias + surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
396 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
397 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
398
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
399 static void mix31to2 (sample_t * samples, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
400 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
401 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
402 sample_t common;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
403
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
404 for (i = 0; i < 256; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
405 common = samples[i + 256] + samples[i + 768] + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
406 samples[i] += common;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
407 samples[i + 256] = samples[i + 512] + common;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
408 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
409 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
410
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
411 static void mix31toS (sample_t * samples, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
412 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
413 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
414 sample_t common, surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
415
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
416 for (i = 0; i < 256; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
417 common = samples[i + 256] + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
418 surround = samples[i + 768];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
419 samples[i] += common - surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
420 samples[i + 256] = samples[i + 512] + common + surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
421 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
422 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
423
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
424 static void mix22toS (sample_t * samples, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
425 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
426 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
427 sample_t surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
428
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
429 for (i = 0; i < 256; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
430 surround = samples[i + 512] + samples[i + 768];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
431 samples[i] += bias - surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
432 samples[i + 256] += bias + surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
433 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
434 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
435
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
436 static void mix32to2 (sample_t * samples, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
437 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
438 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
439 sample_t common;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
440
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
441 for (i = 0; i < 256; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
442 common = samples[i + 256] + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
443 samples[i] += common + samples[i + 768];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
444 samples[i + 256] = common + samples[i + 512] + samples[i + 1024];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
445 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
446 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
447
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
448 static void mix32toS (sample_t * samples, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
449 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
450 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
451 sample_t common, surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
452
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
453 for (i = 0; i < 256; i++) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
454 common = samples[i + 256] + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
455 surround = samples[i + 768] + samples[i + 1024];
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
456 samples[i] += common - surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
457 samples[i + 256] = samples[i + 512] + common + surround;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
458 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
459 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
460
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
461 static void move2to1 (sample_t * src, sample_t * dest, sample_t bias)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
462 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
463 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
464
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
465 for (i = 0; i < 256; i++)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
466 dest[i] = src[i] + src[i + 256] + bias;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
467 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
468
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
469 static void zero (sample_t * samples)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
470 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
471 int i;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
472 for (i = 0; i < 256; i++)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
473 samples[i] = 0;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
474 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
475
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
476 static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias,
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
477 sample_t clev, sample_t slev)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
478 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
479 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
480
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
481 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
482 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
483 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
484
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
485 case CONVERT (A52_CHANNEL, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
486 case CONVERT (A52_STEREO, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
487 mix_2to1:
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
488 mix2to1 (samples, samples + 256, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
489 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
490
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
491 case CONVERT (A52_2F1R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
492 if (slev == 0)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
493 goto mix_2to1;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
494 case CONVERT (A52_3F, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
495 mix_3to1:
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
496 mix3to1 (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
497 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
498
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
499 case CONVERT (A52_3F1R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
500 if (slev == 0)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
501 goto mix_3to1;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
502 case CONVERT (A52_2F2R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
503 if (slev == 0)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
504 goto mix_2to1;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
505 mix4to1 (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
506 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
507
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
508 case CONVERT (A52_3F2R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
509 if (slev == 0)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
510 goto mix_3to1;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
511 mix5to1 (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
512 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
513
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
514 case CONVERT (A52_MONO, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
515 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
516 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
517
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
518 case CONVERT (A52_3F, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
519 case CONVERT (A52_3F, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
520 mix_3to2:
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
521 mix3to2 (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
522 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
523
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
524 case CONVERT (A52_2F1R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
525 if (slev == 0)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
526 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
527 mix21to2 (samples, samples + 256, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
528 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
529
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
530 case CONVERT (A52_2F1R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
531 mix21toS (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
532 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
533
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
534 case CONVERT (A52_3F1R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
535 if (slev == 0)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
536 goto mix_3to2;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
537 mix31to2 (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
538 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
539
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
540 case CONVERT (A52_3F1R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
541 mix31toS (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
542 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
543
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
544 case CONVERT (A52_2F2R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
545 if (slev == 0)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
546 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
547 mix2to1 (samples, samples + 512, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
548 mix2to1 (samples + 256, samples + 768, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
549 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
550
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
551 case CONVERT (A52_2F2R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
552 mix22toS (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
553 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
554
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
555 case CONVERT (A52_3F2R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
556 if (slev == 0)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
557 goto mix_3to2;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
558 mix32to2 (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
559 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
560
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
561 case CONVERT (A52_3F2R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
562 mix32toS (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
563 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
564
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
565 case CONVERT (A52_3F1R, A52_3F):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
566 if (slev == 0)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
567 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
568 mix21to2 (samples, samples + 512, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
569 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
570
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
571 case CONVERT (A52_3F2R, A52_3F):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
572 if (slev == 0)
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
573 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
574 mix2to1 (samples, samples + 768, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
575 mix2to1 (samples + 512, samples + 1024, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
576 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
577
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
578 case CONVERT (A52_3F1R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
579 mix3to2 (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
580 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
581 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
582
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
583 case CONVERT (A52_2F2R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
584 mix2to1 (samples + 512, samples + 768, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
585 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
586
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
587 case CONVERT (A52_3F2R, A52_2F1R):
3678
265680bbdcfd mix3to2 in SSE
michael
parents: 3677
diff changeset
588 mix3to2 (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
589 move2to1 (samples + 768, samples + 512, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
590 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
591
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
592 case CONVERT (A52_3F2R, A52_3F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
593 mix2to1 (samples + 768, samples + 1024, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
594 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
595
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
596 case CONVERT (A52_2F1R, A52_2F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
597 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
598 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
599
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
600 case CONVERT (A52_3F1R, A52_2F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
601 mix3to2 (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
602 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
603 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
604
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
605 case CONVERT (A52_3F2R, A52_2F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
606 mix3to2 (samples, bias);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
607 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
608 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
609 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
610
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
611 case CONVERT (A52_3F1R, A52_3F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
612 memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
613 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
614 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
615 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
616
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
617 static void upmix_C (sample_t * samples, int acmod, int output)
3394
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
618 {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
619 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
620
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
621 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
622 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
623 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
624
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
625 case CONVERT (A52_3F2R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
626 zero (samples + 1024);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
627 case CONVERT (A52_3F1R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
628 case CONVERT (A52_2F2R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
629 zero (samples + 768);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
630 case CONVERT (A52_3F, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
631 case CONVERT (A52_2F1R, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
632 zero (samples + 512);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
633 case CONVERT (A52_CHANNEL, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
634 case CONVERT (A52_STEREO, A52_MONO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
635 zero (samples + 256);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
636 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
637
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
638 case CONVERT (A52_3F2R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
639 case CONVERT (A52_3F2R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
640 zero (samples + 1024);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
641 case CONVERT (A52_3F1R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
642 case CONVERT (A52_3F1R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
643 zero (samples + 768);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
644 case CONVERT (A52_3F, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
645 case CONVERT (A52_3F, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
646 mix_3to2:
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
647 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
648 zero (samples + 256);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
649 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
650
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
651 case CONVERT (A52_2F2R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
652 case CONVERT (A52_2F2R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
653 zero (samples + 768);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
654 case CONVERT (A52_2F1R, A52_STEREO):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
655 case CONVERT (A52_2F1R, A52_DOLBY):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
656 zero (samples + 512);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
657 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
658
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
659 case CONVERT (A52_3F2R, A52_3F):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
660 zero (samples + 1024);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
661 case CONVERT (A52_3F1R, A52_3F):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
662 case CONVERT (A52_2F2R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
663 zero (samples + 768);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
664 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
665
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
666 case CONVERT (A52_3F2R, A52_3F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
667 zero (samples + 1024);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
668 break;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
669
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
670 case CONVERT (A52_3F2R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
671 zero (samples + 1024);
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
672 case CONVERT (A52_3F1R, A52_2F1R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
673 mix_31to21:
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
674 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
675 goto mix_3to2;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
676
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
677 case CONVERT (A52_3F2R, A52_2F2R):
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
678 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
679 goto mix_31to21;
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
680 }
35b18ed357c2 imported from liba52 CVS
arpi
parents:
diff changeset
681 }
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
682
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
683 #ifdef ARCH_X86
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
684 static void mix2to1_SSE (sample_t * dest, sample_t * src, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
685 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
686 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
687 "movlps %2, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
688 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
689 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
690 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
691 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
692 "movaps (%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
693 "movaps 16(%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
694 "addps (%1, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
695 "addps 16(%1, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
696 "addps %%xmm7, %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
697 "addps %%xmm7, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
698 "movaps %%xmm0, (%1, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
699 "movaps %%xmm1, 16(%1, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
700 "addl $32, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
701 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
702 :: "r" (src+256), "r" (dest+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
703 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
704 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
705 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
706
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
707 static void mix3to1_SSE (sample_t * samples, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
708 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
709 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
710 "movlps %1, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
711 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
712 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
713 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
714 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
715 "movaps (%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
716 "movaps 1024(%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
717 "addps 2048(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
718 "addps %%xmm7, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
719 "addps %%xmm1, %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
720 "movaps %%xmm0, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
721 "addl $16, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
722 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
723 :: "r" (samples+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
724 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
725 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
726 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
727
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
728 static void mix4to1_SSE (sample_t * samples, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
729 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
730 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
731 "movlps %1, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
732 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
733 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
734 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
735 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
736 "movaps (%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
737 "movaps 1024(%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
738 "addps 2048(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
739 "addps 3072(%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
740 "addps %%xmm7, %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
741 "addps %%xmm1, %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
742 "movaps %%xmm0, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
743 "addl $16, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
744 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
745 :: "r" (samples+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
746 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
747 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
748 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
749
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
750 static void mix5to1_SSE (sample_t * samples, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
751 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
752 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
753 "movlps %1, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
754 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
755 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
756 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
757 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
758 "movaps (%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
759 "movaps 1024(%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
760 "addps 2048(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
761 "addps 3072(%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
762 "addps %%xmm7, %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
763 "addps 4096(%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
764 "addps %%xmm1, %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
765 "movaps %%xmm0, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
766 "addl $16, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
767 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
768 :: "r" (samples+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
769 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
770 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
771 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
772
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
773 static void mix3to2_SSE (sample_t * samples, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
774 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
775 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
776 "movlps %1, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
777 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
778 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
779 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
780 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
781 "movaps 1024(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
782 "addps %%xmm7, %%xmm0 \n\t" //common
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
783 "movaps (%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
784 "movaps 2048(%0, %%esi), %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
785 "addps %%xmm0, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
786 "addps %%xmm0, %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
787 "movaps %%xmm1, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
788 "movaps %%xmm2, 1024(%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
789 "addl $16, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
790 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
791 :: "r" (samples+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
792 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
793 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
794 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
795
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
796 static void mix21to2_SSE (sample_t * left, sample_t * right, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
797 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
798 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
799 "movlps %2, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
800 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
801 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
802 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
803 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
804 "movaps 1024(%1, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
805 "addps %%xmm7, %%xmm0 \n\t" //common
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
806 "movaps (%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
807 "movaps (%1, %%esi), %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
808 "addps %%xmm0, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
809 "addps %%xmm0, %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
810 "movaps %%xmm1, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
811 "movaps %%xmm2, (%1, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
812 "addl $16, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
813 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
814 :: "r" (left+256), "r" (right+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
815 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
816 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
817 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
818
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
819 static void mix21toS_SSE (sample_t * samples, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
820 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
821 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
822 "movlps %1, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
823 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
824 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
825 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
826 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
827 "movaps 2048(%0, %%esi), %%xmm0 \n\t" // surround
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
828 "movaps (%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
829 "movaps 1024(%0, %%esi), %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
830 "addps %%xmm7, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
831 "addps %%xmm7, %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
832 "subps %%xmm0, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
833 "addps %%xmm0, %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
834 "movaps %%xmm1, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
835 "movaps %%xmm2, 1024(%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
836 "addl $16, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
837 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
838 :: "r" (samples+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
839 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
840 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
841 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
842
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
843 static void mix31to2_SSE (sample_t * samples, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
844 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
845 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
846 "movlps %1, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
847 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
848 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
849 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
850 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
851 "movaps 1024(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
852 "addps 3072(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
853 "addps %%xmm7, %%xmm0 \n\t" // common
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
854 "movaps (%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
855 "movaps 2048(%0, %%esi), %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
856 "addps %%xmm0, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
857 "addps %%xmm0, %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
858 "movaps %%xmm1, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
859 "movaps %%xmm2, 1024(%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
860 "addl $16, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
861 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
862 :: "r" (samples+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
863 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
864 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
865 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
866
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
867 static void mix31toS_SSE (sample_t * samples, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
868 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
869 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
870 "movlps %1, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
871 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
872 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
873 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
874 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
875 "movaps 1024(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
876 "movaps 3072(%0, %%esi), %%xmm3 \n\t" // surround
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
877 "addps %%xmm7, %%xmm0 \n\t" // common
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
878 "movaps (%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
879 "movaps 2048(%0, %%esi), %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
880 "addps %%xmm0, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
881 "addps %%xmm0, %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
882 "subps %%xmm3, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
883 "addps %%xmm3, %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
884 "movaps %%xmm1, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
885 "movaps %%xmm2, 1024(%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
886 "addl $16, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
887 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
888 :: "r" (samples+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
889 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
890 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
891 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
892
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
893 static void mix22toS_SSE (sample_t * samples, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
894 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
895 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
896 "movlps %1, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
897 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
898 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
899 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
900 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
901 "movaps 2048(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
902 "addps 3072(%0, %%esi), %%xmm0 \n\t" // surround
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
903 "movaps (%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
904 "movaps 1024(%0, %%esi), %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
905 "addps %%xmm7, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
906 "addps %%xmm7, %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
907 "subps %%xmm0, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
908 "addps %%xmm0, %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
909 "movaps %%xmm1, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
910 "movaps %%xmm2, 1024(%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
911 "addl $16, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
912 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
913 :: "r" (samples+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
914 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
915 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
916 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
917
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
918 static void mix32to2_SSE (sample_t * samples, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
919 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
920 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
921 "movlps %1, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
922 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
923 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
924 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
925 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
926 "movaps 1024(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
927 "addps %%xmm7, %%xmm0 \n\t" // common
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
928 "movaps %%xmm0, %%xmm1 \n\t" // common
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
929 "addps (%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
930 "addps 2048(%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
931 "addps 3072(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
932 "addps 4096(%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
933 "movaps %%xmm0, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
934 "movaps %%xmm1, 1024(%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
935 "addl $16, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
936 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
937 :: "r" (samples+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
938 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
939 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
940 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
941
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
942 static void mix32toS_SSE (sample_t * samples, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
943 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
944 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
945 "movlps %1, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
946 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
947 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
948 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
949 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
950 "movaps 1024(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
951 "movaps 3072(%0, %%esi), %%xmm2 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
952 "addps %%xmm7, %%xmm0 \n\t" // common
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
953 "addps 4096(%0, %%esi), %%xmm2 \n\t" // surround
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
954 "movaps (%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
955 "movaps 2048(%0, %%esi), %%xmm3 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
956 "subps %%xmm2, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
957 "addps %%xmm2, %%xmm3 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
958 "addps %%xmm0, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
959 "addps %%xmm0, %%xmm3 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
960 "movaps %%xmm1, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
961 "movaps %%xmm3, 1024(%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
962 "addl $16, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
963 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
964 :: "r" (samples+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
965 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
966 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
967 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
968
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
969 static void move2to1_SSE (sample_t * src, sample_t * dest, sample_t bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
970 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
971 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
972 "movlps %2, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
973 "shufps $0x00, %%xmm7, %%xmm7 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
974 "movl $-1024, %%esi \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
975 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
976 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
977 "movaps (%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
978 "movaps 16(%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
979 "addps 1024(%0, %%esi), %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
980 "addps 1040(%0, %%esi), %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
981 "addps %%xmm7, %%xmm0 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
982 "addps %%xmm7, %%xmm1 \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
983 "movaps %%xmm0, (%1, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
984 "movaps %%xmm1, 16(%1, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
985 "addl $32, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
986 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
987 :: "r" (src+256), "r" (dest+256), "m" (bias)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
988 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
989 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
990 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
991
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
992 static void zero_MMX(sample_t * samples)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
993 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
994 asm volatile(
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
995 "movl $-1024, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
996 "pxor %%mm0, %%mm0 \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
997 ".balign 16\n\t"
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
998 "1: \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
999 "movq %%mm0, (%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1000 "movq %%mm0, 8(%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1001 "movq %%mm0, 16(%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1002 "movq %%mm0, 24(%0, %%esi) \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1003 "addl $32, %%esi \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1004 " jnz 1b \n\t"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1005 "emms"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1006 :: "r" (samples+256)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1007 : "%esi"
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1008 );
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1009 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1010
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1011 /*
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1012 I hope dest and src will be at least 8 byte aligned and size
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1013 will devide on 8 without remain
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1014 Note: untested and unused.
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1015 */
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1016 static void copy_MMX(void *dest,const void *src,unsigned size)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1017 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1018 unsigned i;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1019 size /= 64;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1020 for(i=0;i<size;i++)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1021 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1022 __asm __volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1023 "movq %0, %%mm0\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1024 "movq 8%0, %%mm1\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1025 "movq 16%0, %%mm2\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1026 "movq 24%0, %%mm3\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1027 "movq 32%0, %%mm4\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1028 "movq 40%0, %%mm5\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1029 "movq 48%0, %%mm6\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1030 "movq 56%0, %%mm7\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1031 "movq %%mm0, %1\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1032 "movq %%mm1, 8%1\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1033 "movq %%mm2, 16%1\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1034 "movq %%mm3, 24%1\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1035 "movq %%mm4, 32%1\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1036 "movq %%mm5, 40%1\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1037 "movq %%mm6, 48%1\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1038 "movq %%mm7, 56%1\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1039 :
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1040 :"m"(src),"m"(dest));
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1041 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1042 }
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1043
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1044 static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias,
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1045 sample_t clev, sample_t slev)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1046 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1047 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1048
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1049 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1050 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1051 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1052
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1053 case CONVERT (A52_CHANNEL, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1054 case CONVERT (A52_STEREO, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1055 mix_2to1_SSE:
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1056 mix2to1_SSE (samples, samples + 256, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1057 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1058
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1059 case CONVERT (A52_2F1R, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1060 if (slev == 0)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1061 goto mix_2to1_SSE;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1062 case CONVERT (A52_3F, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1063 mix_3to1_SSE:
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1064 mix3to1_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1065 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1066
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1067 case CONVERT (A52_3F1R, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1068 if (slev == 0)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1069 goto mix_3to1_SSE;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1070 case CONVERT (A52_2F2R, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1071 if (slev == 0)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1072 goto mix_2to1_SSE;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1073 mix4to1_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1074 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1075
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1076 case CONVERT (A52_3F2R, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1077 if (slev == 0)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1078 goto mix_3to1_SSE;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1079 mix5to1_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1080 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1081
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1082 case CONVERT (A52_MONO, A52_DOLBY):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1083 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1084 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1085
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1086 case CONVERT (A52_3F, A52_STEREO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1087 case CONVERT (A52_3F, A52_DOLBY):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1088 mix_3to2_SSE:
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1089 mix3to2_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1090 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1091
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1092 case CONVERT (A52_2F1R, A52_STEREO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1093 if (slev == 0)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1094 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1095 mix21to2_SSE (samples, samples + 256, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1096 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1097
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1098 case CONVERT (A52_2F1R, A52_DOLBY):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1099 mix21toS_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1100 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1101
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1102 case CONVERT (A52_3F1R, A52_STEREO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1103 if (slev == 0)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1104 goto mix_3to2_SSE;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1105 mix31to2_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1106 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1107
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1108 case CONVERT (A52_3F1R, A52_DOLBY):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1109 mix31toS_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1110 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1111
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1112 case CONVERT (A52_2F2R, A52_STEREO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1113 if (slev == 0)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1114 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1115 mix2to1_SSE (samples, samples + 512, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1116 mix2to1_SSE (samples + 256, samples + 768, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1117 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1118
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1119 case CONVERT (A52_2F2R, A52_DOLBY):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1120 mix22toS_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1121 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1122
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1123 case CONVERT (A52_3F2R, A52_STEREO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1124 if (slev == 0)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1125 goto mix_3to2_SSE;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1126 mix32to2_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1127 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1128
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1129 case CONVERT (A52_3F2R, A52_DOLBY):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1130 mix32toS_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1131 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1132
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1133 case CONVERT (A52_3F1R, A52_3F):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1134 if (slev == 0)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1135 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1136 mix21to2_SSE (samples, samples + 512, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1137 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1138
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1139 case CONVERT (A52_3F2R, A52_3F):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1140 if (slev == 0)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1141 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1142 mix2to1_SSE (samples, samples + 768, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1143 mix2to1_SSE (samples + 512, samples + 1024, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1144 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1145
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1146 case CONVERT (A52_3F1R, A52_2F1R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1147 mix3to2_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1148 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1149 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1150
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1151 case CONVERT (A52_2F2R, A52_2F1R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1152 mix2to1_SSE (samples + 512, samples + 768, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1153 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1154
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1155 case CONVERT (A52_3F2R, A52_2F1R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1156 mix3to2_SSE (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1157 move2to1_SSE (samples + 768, samples + 512, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1158 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1159
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1160 case CONVERT (A52_3F2R, A52_3F1R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1161 mix2to1_SSE (samples + 768, samples + 1024, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1162 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1163
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1164 case CONVERT (A52_2F1R, A52_2F2R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1165 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1166 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1167
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1168 case CONVERT (A52_3F1R, A52_2F2R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1169 mix3to2_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1170 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1171 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1172
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1173 case CONVERT (A52_3F2R, A52_2F2R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1174 mix3to2_SSE (samples, bias);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1175 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1176 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1177 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1178
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1179 case CONVERT (A52_3F1R, A52_3F2R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1180 memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1181 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1182 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1183 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1184
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1185 static void upmix_MMX (sample_t * samples, int acmod, int output)
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1186 {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1187 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1188
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1189 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1190 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1191 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1192
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1193 case CONVERT (A52_3F2R, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1194 zero_MMX (samples + 1024);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1195 case CONVERT (A52_3F1R, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1196 case CONVERT (A52_2F2R, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1197 zero_MMX (samples + 768);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1198 case CONVERT (A52_3F, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1199 case CONVERT (A52_2F1R, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1200 zero_MMX (samples + 512);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1201 case CONVERT (A52_CHANNEL, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1202 case CONVERT (A52_STEREO, A52_MONO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1203 zero_MMX (samples + 256);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1204 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1205
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1206 case CONVERT (A52_3F2R, A52_STEREO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1207 case CONVERT (A52_3F2R, A52_DOLBY):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1208 zero_MMX (samples + 1024);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1209 case CONVERT (A52_3F1R, A52_STEREO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1210 case CONVERT (A52_3F1R, A52_DOLBY):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1211 zero_MMX (samples + 768);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1212 case CONVERT (A52_3F, A52_STEREO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1213 case CONVERT (A52_3F, A52_DOLBY):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1214 mix_3to2_MMX:
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1215 memcpy (samples + 512, samples + 256, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1216 zero_MMX (samples + 256);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1217 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1218
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1219 case CONVERT (A52_2F2R, A52_STEREO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1220 case CONVERT (A52_2F2R, A52_DOLBY):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1221 zero_MMX (samples + 768);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1222 case CONVERT (A52_2F1R, A52_STEREO):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1223 case CONVERT (A52_2F1R, A52_DOLBY):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1224 zero_MMX (samples + 512);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1225 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1226
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1227 case CONVERT (A52_3F2R, A52_3F):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1228 zero_MMX (samples + 1024);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1229 case CONVERT (A52_3F1R, A52_3F):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1230 case CONVERT (A52_2F2R, A52_2F1R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1231 zero_MMX (samples + 768);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1232 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1233
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1234 case CONVERT (A52_3F2R, A52_3F1R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1235 zero_MMX (samples + 1024);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1236 break;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1237
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1238 case CONVERT (A52_3F2R, A52_2F1R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1239 zero_MMX (samples + 1024);
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1240 case CONVERT (A52_3F1R, A52_2F1R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1241 mix_31to21_MMX:
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1242 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1243 goto mix_3to2_MMX;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1244
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1245 case CONVERT (A52_3F2R, A52_2F2R):
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1246 memcpy (samples + 1024, samples + 768, 256 * sizeof (sample_t));
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1247 goto mix_31to21_MMX;
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1248 }
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1249 }
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1250
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1251 static void mix2to1_3dnow (sample_t * dest, sample_t * src, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1252 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1253 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1254 "movd %2, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1255 "punpckldq %2, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1256 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1257 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1258 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1259 "movq (%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1260 "movq 8(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1261 "movq 16(%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1262 "movq 24(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1263 "pfadd (%1, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1264 "pfadd 8(%1, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1265 "pfadd 16(%1, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1266 "pfadd 24(%1, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1267 "pfadd %%mm7, %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1268 "pfadd %%mm7, %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1269 "pfadd %%mm7, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1270 "pfadd %%mm7, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1271 "movq %%mm0, (%1, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1272 "movq %%mm1, 8(%1, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1273 "movq %%mm2, 16(%1, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1274 "movq %%mm3, 24(%1, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1275 "addl $32, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1276 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1277 :: "r" (src+256), "r" (dest+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1278 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1279 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1280 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1281
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1282 static void mix3to1_3dnow (sample_t * samples, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1283 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1284 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1285 "movd %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1286 "punpckldq %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1287 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1288 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1289 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1290 "movq (%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1291 "movq 8(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1292 "movq 1024(%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1293 "movq 1032(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1294 "pfadd 2048(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1295 "pfadd 2056(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1296 "pfadd %%mm7, %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1297 "pfadd %%mm7, %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1298 "pfadd %%mm2, %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1299 "pfadd %%mm3, %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1300 "movq %%mm0, (%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1301 "movq %%mm1, 8(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1302 "addl $16, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1303 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1304 :: "r" (samples+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1305 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1306 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1307 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1308
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1309 static void mix4to1_3dnow (sample_t * samples, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1310 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1311 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1312 "movd %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1313 "punpckldq %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1314 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1315 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1316 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1317 "movq (%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1318 "movq 8(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1319 "movq 1024(%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1320 "movq 1032(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1321 "pfadd 2048(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1322 "pfadd 2056(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1323 "pfadd 3072(%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1324 "pfadd 3080(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1325 "pfadd %%mm7, %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1326 "pfadd %%mm7, %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1327 "pfadd %%mm2, %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1328 "pfadd %%mm3, %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1329 "movq %%mm0, (%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1330 "movq %%mm1, 8(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1331 "addl $16, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1332 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1333 :: "r" (samples+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1334 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1335 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1336 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1337
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1338 static void mix5to1_3dnow (sample_t * samples, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1339 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1340 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1341 "movd %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1342 "punpckldq %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1343 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1344 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1345 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1346 "movq (%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1347 "movq 8(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1348 "movq 1024(%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1349 "movq 1032(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1350 "pfadd 2048(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1351 "pfadd 2056(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1352 "pfadd 3072(%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1353 "pfadd 3080(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1354 "pfadd %%mm7, %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1355 "pfadd %%mm7, %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1356 "pfadd 4096(%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1357 "pfadd 4104(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1358 "pfadd %%mm2, %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1359 "pfadd %%mm3, %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1360 "movq %%mm0, (%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1361 "movq %%mm1, 8(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1362 "addl $16, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1363 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1364 :: "r" (samples+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1365 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1366 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1367 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1368
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1369 static void mix3to2_3dnow (sample_t * samples, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1370 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1371 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1372 "movd %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1373 "punpckldq %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1374 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1375 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1376 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1377 "movq 1024(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1378 "movq 1032(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1379 "pfadd %%mm7, %%mm0 \n\t" //common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1380 "pfadd %%mm7, %%mm1 \n\t" //common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1381 "movq (%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1382 "movq 8(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1383 "movq 2048(%0, %%esi), %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1384 "movq 2056(%0, %%esi), %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1385 "pfadd %%mm0, %%mm2 \n\t"
5912
d0a34309e424 trying to fix nicks bugs ...
michael
parents: 4233
diff changeset
1386 "pfadd %%mm1, %%mm3 \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1387 "pfadd %%mm0, %%mm4 \n\t"
5912
d0a34309e424 trying to fix nicks bugs ...
michael
parents: 4233
diff changeset
1388 "pfadd %%mm1, %%mm5 \n\t"
4233
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1389 "movq %%mm2, (%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1390 "movq %%mm3, 8(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1391 "movq %%mm4, 1024(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1392 "movq %%mm5, 1032(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1393 "addl $16, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1394 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1395 :: "r" (samples+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1396 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1397 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1398 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1399
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1400 static void mix21to2_3dnow (sample_t * left, sample_t * right, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1401 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1402 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1403 "movd %2, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1404 "punpckldq %2, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1405 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1406 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1407 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1408 "movq 1024(%1, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1409 "movq 1032(%1, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1410 "pfadd %%mm7, %%mm0 \n\t" //common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1411 "pfadd %%mm7, %%mm1 \n\t" //common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1412 "movq (%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1413 "movq 8(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1414 "movq (%1, %%esi), %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1415 "movq 8(%1, %%esi), %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1416 "pfadd %%mm0, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1417 "pfadd %%mm1, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1418 "pfadd %%mm0, %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1419 "pfadd %%mm1, %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1420 "movq %%mm2, (%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1421 "movq %%mm3, 8(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1422 "movq %%mm4, (%1, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1423 "movq %%mm5, 8(%1, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1424 "addl $16, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1425 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1426 :: "r" (left+256), "r" (right+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1427 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1428 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1429 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1430
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1431 static void mix21toS_3dnow (sample_t * samples, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1432 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1433 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1434 "movd %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1435 "punpckldq %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1436 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1437 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1438 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1439 "movq 2048(%0, %%esi), %%mm0 \n\t" // surround
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1440 "movq 2056(%0, %%esi), %%mm1 \n\t" // surround
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1441 "movq (%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1442 "movq 8(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1443 "movq 1024(%0, %%esi), %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1444 "movq 1032(%0, %%esi), %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1445 "pfadd %%mm7, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1446 "pfadd %%mm7, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1447 "pfadd %%mm7, %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1448 "pfadd %%mm7, %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1449 "pfsub %%mm0, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1450 "pfsub %%mm1, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1451 "pfadd %%mm0, %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1452 "pfadd %%mm1, %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1453 "movq %%mm2, (%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1454 "movq %%mm3, 8(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1455 "movq %%mm4, 1024(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1456 "movq %%mm5, 1032(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1457 "addl $16, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1458 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1459 :: "r" (samples+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1460 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1461 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1462 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1463
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1464 static void mix31to2_3dnow (sample_t * samples, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1465 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1466 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1467 "movd %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1468 "punpckldq %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1469 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1470 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1471 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1472 "movq 1024(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1473 "movq 1032(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1474 "pfadd 3072(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1475 "pfadd 3080(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1476 "pfadd %%mm7, %%mm0 \n\t" // common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1477 "pfadd %%mm7, %%mm1 \n\t" // common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1478 "movq (%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1479 "movq 8(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1480 "movq 2048(%0, %%esi), %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1481 "movq 2056(%0, %%esi), %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1482 "pfadd %%mm0, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1483 "pfadd %%mm1, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1484 "pfadd %%mm0, %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1485 "pfadd %%mm1, %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1486 "movq %%mm2, (%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1487 "movq %%mm3, 8(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1488 "movq %%mm4, 1024(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1489 "movq %%mm5, 1032(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1490 "addl $16, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1491 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1492 :: "r" (samples+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1493 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1494 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1495 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1496
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1497 static void mix31toS_3dnow (sample_t * samples, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1498 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1499 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1500 "movd %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1501 "punpckldq %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1502 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1503 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1504 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1505 "movq 1024(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1506 "movq 1032(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1507 "pfadd %%mm7, %%mm0 \n\t" // common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1508 "pfadd %%mm7, %%mm1 \n\t" // common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1509 "movq (%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1510 "movq 8(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1511 "movq 2048(%0, %%esi), %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1512 "movq 2056(%0, %%esi), %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1513 "pfadd %%mm0, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1514 "pfadd %%mm1, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1515 "pfadd %%mm0, %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1516 "pfadd %%mm1, %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1517 "movq 3072(%0, %%esi), %%mm0 \n\t" // surround
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1518 "movq 3080(%0, %%esi), %%mm1 \n\t" // surround
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1519 "pfsub %%mm0, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1520 "pfsub %%mm1, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1521 "pfadd %%mm0, %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1522 "pfadd %%mm1, %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1523 "movq %%mm2, (%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1524 "movq %%mm3, 8(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1525 "movq %%mm4, 1024(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1526 "movq %%mm5, 1032(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1527 "addl $16, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1528 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1529 :: "r" (samples+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1530 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1531 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1532 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1533
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1534 static void mix22toS_3dnow (sample_t * samples, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1535 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1536 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1537 "movd %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1538 "punpckldq %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1539 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1540 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1541 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1542 "movq 2048(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1543 "movq 2056(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1544 "pfadd 3072(%0, %%esi), %%mm0 \n\t" // surround
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1545 "pfadd 3080(%0, %%esi), %%mm1 \n\t" // surround
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1546 "movq (%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1547 "movq 8(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1548 "movq 1024(%0, %%esi), %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1549 "movq 1032(%0, %%esi), %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1550 "pfadd %%mm7, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1551 "pfadd %%mm7, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1552 "pfadd %%mm7, %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1553 "pfadd %%mm7, %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1554 "pfsub %%mm0, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1555 "pfsub %%mm1, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1556 "pfadd %%mm0, %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1557 "pfadd %%mm1, %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1558 "movq %%mm2, (%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1559 "movq %%mm3, 8(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1560 "movq %%mm4, 1024(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1561 "movq %%mm5, 1032(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1562 "addl $16, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1563 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1564 :: "r" (samples+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1565 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1566 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1567 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1568
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1569 static void mix32to2_3dnow (sample_t * samples, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1570 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1571 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1572 "movd %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1573 "punpckldq %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1574 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1575 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1576 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1577 "movq 1024(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1578 "movq 1032(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1579 "pfadd %%mm7, %%mm0 \n\t" // common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1580 "pfadd %%mm7, %%mm1 \n\t" // common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1581 "movq %%mm0, %%mm2 \n\t" // common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1582 "movq %%mm1, %%mm3 \n\t" // common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1583 "pfadd (%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1584 "pfadd 8(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1585 "pfadd 2048(%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1586 "pfadd 2056(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1587 "pfadd 3072(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1588 "pfadd 3080(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1589 "pfadd 4096(%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1590 "pfadd 4104(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1591 "movq %%mm0, (%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1592 "movq %%mm1, 8(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1593 "movq %%mm2, 1024(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1594 "movq %%mm3, 1032(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1595 "addl $16, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1596 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1597 :: "r" (samples+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1598 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1599 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1600 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1601
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1602 /* todo: should be optimized better */
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1603 static void mix32toS_3dnow (sample_t * samples, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1604 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1605 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1606 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1607 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1608 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1609 "movd %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1610 "punpckldq %1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1611 "movq 1024(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1612 "movq 1032(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1613 "movq 3072(%0, %%esi), %%mm4 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1614 "movq 3080(%0, %%esi), %%mm5 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1615 "pfadd %%mm7, %%mm0 \n\t" // common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1616 "pfadd %%mm7, %%mm1 \n\t" // common
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1617 "pfadd 4096(%0, %%esi), %%mm4 \n\t" // surround
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1618 "pfadd 4104(%0, %%esi), %%mm5 \n\t" // surround
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1619 "movq (%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1620 "movq 8(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1621 "movq 2048(%0, %%esi), %%mm6 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1622 "movq 2056(%0, %%esi), %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1623 "pfsub %%mm4, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1624 "pfsub %%mm5, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1625 "pfadd %%mm4, %%mm6 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1626 "pfadd %%mm5, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1627 "pfadd %%mm0, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1628 "pfadd %%mm1, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1629 "pfadd %%mm0, %%mm6 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1630 "pfadd %%mm1, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1631 "movq %%mm2, (%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1632 "movq %%mm3, 8(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1633 "movq %%mm6, 1024(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1634 "movq %%mm7, 1032(%0, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1635 "addl $16, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1636 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1637 :: "r" (samples+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1638 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1639 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1640 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1641
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1642 static void move2to1_3dnow (sample_t * src, sample_t * dest, sample_t bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1643 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1644 asm volatile(
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1645 "movd %2, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1646 "punpckldq %2, %%mm7 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1647 "movl $-1024, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1648 ".balign 16\n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1649 "1: \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1650 "movq (%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1651 "movq 8(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1652 "movq 16(%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1653 "movq 24(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1654 "pfadd 1024(%0, %%esi), %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1655 "pfadd 1032(%0, %%esi), %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1656 "pfadd 1040(%0, %%esi), %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1657 "pfadd 1048(%0, %%esi), %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1658 "pfadd %%mm7, %%mm0 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1659 "pfadd %%mm7, %%mm1 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1660 "pfadd %%mm7, %%mm2 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1661 "pfadd %%mm7, %%mm3 \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1662 "movq %%mm0, (%1, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1663 "movq %%mm1, 8(%1, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1664 "movq %%mm2, 16(%1, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1665 "movq %%mm3, 24(%1, %%esi) \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1666 "addl $32, %%esi \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1667 " jnz 1b \n\t"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1668 :: "r" (src+256), "r" (dest+256), "m" (bias)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1669 : "%esi"
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1670 );
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1671 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1672
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1673 static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias,
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1674 sample_t clev, sample_t slev)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1675 {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1676 switch (CONVERT (acmod, output & A52_CHANNEL_MASK)) {
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1677
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1678 case CONVERT (A52_CHANNEL, A52_CHANNEL2):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1679 memcpy (samples, samples + 256, 256 * sizeof (sample_t));
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1680 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1681
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1682 case CONVERT (A52_CHANNEL, A52_MONO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1683 case CONVERT (A52_STEREO, A52_MONO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1684 mix_2to1_3dnow:
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1685 mix2to1_3dnow (samples, samples + 256, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1686 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1687
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1688 case CONVERT (A52_2F1R, A52_MONO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1689 if (slev == 0)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1690 goto mix_2to1_3dnow;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1691 case CONVERT (A52_3F, A52_MONO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1692 mix_3to1_3dnow:
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1693 mix3to1_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1694 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1695
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1696 case CONVERT (A52_3F1R, A52_MONO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1697 if (slev == 0)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1698 goto mix_3to1_3dnow;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1699 case CONVERT (A52_2F2R, A52_MONO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1700 if (slev == 0)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1701 goto mix_2to1_3dnow;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1702 mix4to1_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1703 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1704
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1705 case CONVERT (A52_3F2R, A52_MONO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1706 if (slev == 0)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1707 goto mix_3to1_3dnow;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1708 mix5to1_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1709 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1710
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1711 case CONVERT (A52_MONO, A52_DOLBY):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1712 memcpy (samples + 256, samples, 256 * sizeof (sample_t));
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1713 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1714
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1715 case CONVERT (A52_3F, A52_STEREO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1716 case CONVERT (A52_3F, A52_DOLBY):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1717 mix_3to2_3dnow:
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1718 mix3to2_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1719 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1720
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1721 case CONVERT (A52_2F1R, A52_STEREO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1722 if (slev == 0)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1723 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1724 mix21to2_3dnow (samples, samples + 256, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1725 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1726
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1727 case CONVERT (A52_2F1R, A52_DOLBY):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1728 mix21toS_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1729 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1730
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1731 case CONVERT (A52_3F1R, A52_STEREO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1732 if (slev == 0)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1733 goto mix_3to2_3dnow;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1734 mix31to2_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1735 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1736
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1737 case CONVERT (A52_3F1R, A52_DOLBY):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1738 mix31toS_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1739 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1740
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1741 case CONVERT (A52_2F2R, A52_STEREO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1742 if (slev == 0)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1743 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1744 mix2to1_3dnow (samples, samples + 512, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1745 mix2to1_3dnow (samples + 256, samples + 768, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1746 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1747
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1748 case CONVERT (A52_2F2R, A52_DOLBY):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1749 mix22toS_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1750 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1751
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1752 case CONVERT (A52_3F2R, A52_STEREO):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1753 if (slev == 0)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1754 goto mix_3to2_3dnow;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1755 mix32to2_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1756 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1757
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1758 case CONVERT (A52_3F2R, A52_DOLBY):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1759 mix32toS_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1760 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1761
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1762 case CONVERT (A52_3F1R, A52_3F):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1763 if (slev == 0)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1764 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1765 mix21to2_3dnow (samples, samples + 512, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1766 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1767
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1768 case CONVERT (A52_3F2R, A52_3F):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1769 if (slev == 0)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1770 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1771 mix2to1_3dnow (samples, samples + 768, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1772 mix2to1_3dnow (samples + 512, samples + 1024, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1773 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1774
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1775 case CONVERT (A52_3F1R, A52_2F1R):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1776 mix3to2_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1777 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1778 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1779
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1780 case CONVERT (A52_2F2R, A52_2F1R):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1781 mix2to1_3dnow (samples + 512, samples + 768, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1782 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1783
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1784 case CONVERT (A52_3F2R, A52_2F1R):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1785 mix3to2_3dnow (samples, bias); //FIXME possible bug? (output doesnt seem to be used)
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1786 move2to1_3dnow (samples + 768, samples + 512, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1787 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1788
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1789 case CONVERT (A52_3F2R, A52_3F1R):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1790 mix2to1_3dnow (samples + 768, samples + 1024, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1791 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1792
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1793 case CONVERT (A52_2F1R, A52_2F2R):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1794 memcpy (samples + 768, samples + 512, 256 * sizeof (sample_t));
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1795 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1796
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1797 case CONVERT (A52_3F1R, A52_2F2R):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1798 mix3to2_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1799 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1800 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1801
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1802 case CONVERT (A52_3F2R, A52_2F2R):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1803 mix3to2_3dnow (samples, bias);
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1804 memcpy (samples + 512, samples + 768, 256 * sizeof (sample_t));
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1805 memcpy (samples + 768, samples + 1024, 256 * sizeof (sample_t));
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1806 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1807
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1808 case CONVERT (A52_3F1R, A52_3F2R):
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1809 memcpy (samples + 1027, samples + 768, 256 * sizeof (sample_t));
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1810 break;
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1811 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1812 __asm __volatile("femms":::"memory");
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1813 }
ef2abfbbd1df 3dnow optimization. Not all functions are tested!!!
nick
parents: 3910
diff changeset
1814
3904
848d848521b9 runtime cpudetect
michael
parents: 3741
diff changeset
1815 #endif //ARCH_X86