Mercurial > mplayer.hg
annotate liba52/resample_mmx.c @ 27078:7f6bec0700aa
sync w/r27107, patch by C¸«±dric Viou
author | gpoirier |
---|---|
date | Fri, 20 Jun 2008 20:11:57 +0000 |
parents | 943f37a4323d |
children | 08d18fe9da52 |
rev | line source |
---|---|
25483 | 1 /* |
2 * resample_mmx.c | |
3 * Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at) | |
4 * | |
5 * This file is part of a52dec, a free ATSC A-52 stream decoder. | |
6 * See http://liba52.sourceforge.net/ for updates. | |
7 * | |
8 * File added for use with MPlayer and not part of original a52dec. | |
9 * | |
10 * a52dec is free software; you can redistribute it and/or modify | |
11 * it under the terms of the GNU General Public License as published by | |
12 * the Free Software Foundation; either version 2 of the License, or | |
13 * (at your option) any later version. | |
14 * | |
15 * a52dec is distributed in the hope that it will be useful, | |
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 * GNU General Public License for more details. | |
19 * | |
20 * You should have received a copy of the GNU General Public License | |
21 * along with this program; if not, write to the Free Software | |
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
23 */ | |
3569 | 24 |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
25 /* optimization TODO / NOTES |
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
26 * movntq is slightly faster (0.5% with the current test.c benchmark) |
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
27 * (but that is just test.c so that needs to be tested in reality) |
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
28 * and it would mean (C / MMX2 / MMX / 3DNOW) versions. |
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
29 */ |
3569 | 30 |
16173 | 31 #include "a52_internal.h" |
32 | |
33 | |
12303
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
34 static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
35 static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
36 static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
37 static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
38 |
3909 | 39 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
40 int32_t * f = (int32_t *) _f; |
3574 | 41 asm volatile( |
16173 | 42 "mov $-512, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
43 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
44 "movq "MANGLE(wm1100)", %%mm3 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
45 "movq "MANGLE(wm0101)", %%mm4 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
46 "movq "MANGLE(wm1010)", %%mm5 \n\t" |
3574 | 47 "pxor %%mm6, %%mm6 \n\t" |
48 "1: \n\t" | |
16173 | 49 "movq (%1, %%"REG_S", 2), %%mm0 \n\t" |
50 "movq 8(%1, %%"REG_S", 2), %%mm1\n\t" | |
51 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
3574 | 52 "psubd %%mm7, %%mm0 \n\t" |
53 "psubd %%mm7, %%mm1 \n\t" | |
54 "packssdw %%mm1, %%mm0 \n\t" | |
55 "movq %%mm0, %%mm1 \n\t" | |
56 "pand %%mm4, %%mm0 \n\t" | |
57 "pand %%mm5, %%mm1 \n\t" | |
16173 | 58 "movq %%mm6, (%0, %%"REG_D") \n\t" // 0 0 0 0 |
59 "movd %%mm0, 8(%0, %%"REG_D") \n\t" // A 0 | |
3574 | 60 "pand %%mm3, %%mm0 \n\t" |
16173 | 61 "movd %%mm6, 12(%0, %%"REG_D") \n\t" // 0 0 |
62 "movd %%mm1, 16(%0, %%"REG_D") \n\t" // 0 B | |
3574 | 63 "pand %%mm3, %%mm1 \n\t" |
16173 | 64 "movd %%mm6, 20(%0, %%"REG_D") \n\t" // 0 0 |
65 "movq %%mm0, 24(%0, %%"REG_D") \n\t" // 0 0 C 0 | |
66 "movq %%mm1, 32(%0, %%"REG_D") \n\t" // 0 0 0 B | |
67 "add $8, %%"REG_S" \n\t" | |
3574 | 68 " jnz 1b \n\t" |
69 "emms \n\t" | |
70 :: "r" (s16+1280), "r" (f+256) | |
16173 | 71 :"%"REG_S, "%"REG_D, "memory" |
3574 | 72 ); |
3909 | 73 return 5*256; |
74 } | |
75 | |
76 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ | |
77 int32_t * f = (int32_t *) _f; | |
3567 | 78 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it |
79 #ifdef HAVE_SSE | |
80 asm volatile( | |
16173 | 81 "mov $-1024, %%"REG_S" \n\t" |
3567 | 82 "1: \n\t" |
16173 | 83 "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t" |
84 "cvtps2pi 1024(%1, %%"REG_S"), %%mm2\n\t" | |
3567 | 85 "movq %%mm0, %%mm1 \n\t" |
86 "punpcklwd %%mm2, %%mm0 \n\t" | |
87 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 88 "movq %%mm0, (%0, %%"REG_S") \n\t" |
89 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
90 "add $16, %%"REG_S" \n\t" | |
3567 | 91 " jnz 1b \n\t" |
92 "emms \n\t" | |
93 :: "r" (s16+512), "r" (f+256) | |
16173 | 94 :"%"REG_S, "memory" |
3567 | 95 );*/ |
96 asm volatile( | |
16173 | 97 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
98 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3567 | 99 "1: \n\t" |
16173 | 100 "movq (%1, %%"REG_S"), %%mm0 \n\t" |
101 "movq 8(%1, %%"REG_S"), %%mm1 \n\t" | |
102 "movq 1024(%1, %%"REG_S"), %%mm2\n\t" | |
103 "movq 1032(%1, %%"REG_S"), %%mm3\n\t" | |
3567 | 104 "psubd %%mm7, %%mm0 \n\t" |
105 "psubd %%mm7, %%mm1 \n\t" | |
106 "psubd %%mm7, %%mm2 \n\t" | |
107 "psubd %%mm7, %%mm3 \n\t" | |
108 "packssdw %%mm1, %%mm0 \n\t" | |
109 "packssdw %%mm3, %%mm2 \n\t" | |
110 "movq %%mm0, %%mm1 \n\t" | |
111 "punpcklwd %%mm2, %%mm0 \n\t" | |
112 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 113 "movq %%mm0, (%0, %%"REG_S") \n\t" |
114 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
115 "add $16, %%"REG_S" \n\t" | |
3567 | 116 " jnz 1b \n\t" |
117 "emms \n\t" | |
118 :: "r" (s16+512), "r" (f+256) | |
16173 | 119 :"%"REG_S, "memory" |
3567 | 120 ); |
3909 | 121 return 2*256; |
122 } | |
123 | |
124 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ | |
125 int32_t * f = (int32_t *) _f; | |
3654 | 126 asm volatile( |
16173 | 127 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
128 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3654 | 129 "pxor %%mm6, %%mm6 \n\t" |
130 "movq %%mm7, %%mm5 \n\t" | |
131 "punpckldq %%mm6, %%mm5 \n\t" | |
132 "1: \n\t" | |
16173 | 133 "movd (%1, %%"REG_S"), %%mm0 \n\t" |
134 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" | |
135 "movd 1024(%1, %%"REG_S"), %%mm1\n\t" | |
136 "punpckldq 4(%1, %%"REG_S"), %%mm1\n\t" | |
137 "movd 2052(%1, %%"REG_S"), %%mm2\n\t" | |
3654 | 138 "movq %%mm7, %%mm3 \n\t" |
16173 | 139 "punpckldq 1028(%1, %%"REG_S"), %%mm3\n\t" |
140 "movd 8(%1, %%"REG_S"), %%mm4 \n\t" | |
141 "punpckldq 2056(%1, %%"REG_S"), %%mm4\n\t" | |
142 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
143 "sar $1, %%"REG_D" \n\t" | |
3654 | 144 "psubd %%mm7, %%mm0 \n\t" |
145 "psubd %%mm7, %%mm1 \n\t" | |
146 "psubd %%mm5, %%mm2 \n\t" | |
147 "psubd %%mm7, %%mm3 \n\t" | |
148 "psubd %%mm7, %%mm4 \n\t" | |
149 "packssdw %%mm6, %%mm0 \n\t" | |
150 "packssdw %%mm2, %%mm1 \n\t" | |
151 "packssdw %%mm4, %%mm3 \n\t" | |
16173 | 152 "movq %%mm0, (%0, %%"REG_D") \n\t" |
153 "movq %%mm1, 8(%0, %%"REG_D") \n\t" | |
154 "movq %%mm3, 16(%0, %%"REG_D") \n\t" | |
155 "movd 1032(%1, %%"REG_S"), %%mm1\n\t" | |
156 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" | |
157 "movd 2060(%1, %%"REG_S"), %%mm2\n\t" | |
3654 | 158 "movq %%mm7, %%mm3 \n\t" |
16173 | 159 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" |
3654 | 160 "pxor %%mm0, %%mm0 \n\t" |
161 "psubd %%mm7, %%mm1 \n\t" | |
162 "psubd %%mm5, %%mm2 \n\t" | |
163 "psubd %%mm7, %%mm3 \n\t" | |
164 "packssdw %%mm1, %%mm0 \n\t" | |
165 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 166 "movq %%mm0, 24(%0, %%"REG_D") \n\t" |
167 "movq %%mm2, 32(%0, %%"REG_D") \n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
168 |
16173 | 169 "add $16, %%"REG_S" \n\t" |
3654 | 170 " jnz 1b \n\t" |
171 "emms \n\t" | |
172 :: "r" (s16+1280), "r" (f+256) | |
16173 | 173 :"%"REG_S, "%"REG_D, "memory" |
3654 | 174 ); |
3909 | 175 return 5*256; |
176 } | |
177 | |
178 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ | |
179 int32_t * f = (int32_t *) _f; | |
3569 | 180 asm volatile( |
16173 | 181 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
182 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 183 "1: \n\t" |
16173 | 184 "movq (%1, %%"REG_S"), %%mm0 \n\t" |
185 "movq 8(%1, %%"REG_S"), %%mm1 \n\t" | |
186 "movq 1024(%1, %%"REG_S"), %%mm2\n\t" | |
187 "movq 1032(%1, %%"REG_S"), %%mm3\n\t" | |
3569 | 188 "psubd %%mm7, %%mm0 \n\t" |
189 "psubd %%mm7, %%mm1 \n\t" | |
190 "psubd %%mm7, %%mm2 \n\t" | |
191 "psubd %%mm7, %%mm3 \n\t" | |
192 "packssdw %%mm1, %%mm0 \n\t" | |
193 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 194 "movq 2048(%1, %%"REG_S"), %%mm3\n\t" |
195 "movq 2056(%1, %%"REG_S"), %%mm4\n\t" | |
196 "movq 3072(%1, %%"REG_S"), %%mm5\n\t" | |
197 "movq 3080(%1, %%"REG_S"), %%mm6\n\t" | |
3569 | 198 "psubd %%mm7, %%mm3 \n\t" |
199 "psubd %%mm7, %%mm4 \n\t" | |
200 "psubd %%mm7, %%mm5 \n\t" | |
201 "psubd %%mm7, %%mm6 \n\t" | |
202 "packssdw %%mm4, %%mm3 \n\t" | |
203 "packssdw %%mm6, %%mm5 \n\t" | |
204 "movq %%mm0, %%mm1 \n\t" | |
205 "movq %%mm3, %%mm4 \n\t" | |
206 "punpcklwd %%mm2, %%mm0 \n\t" | |
207 "punpckhwd %%mm2, %%mm1 \n\t" | |
208 "punpcklwd %%mm5, %%mm3 \n\t" | |
209 "punpckhwd %%mm5, %%mm4 \n\t" | |
210 "movq %%mm0, %%mm2 \n\t" | |
211 "movq %%mm1, %%mm5 \n\t" | |
212 "punpckldq %%mm3, %%mm0 \n\t" | |
213 "punpckhdq %%mm3, %%mm2 \n\t" | |
214 "punpckldq %%mm4, %%mm1 \n\t" | |
215 "punpckhdq %%mm4, %%mm5 \n\t" | |
16173 | 216 "movq %%mm0, (%0, %%"REG_S",2) \n\t" |
217 "movq %%mm2, 8(%0, %%"REG_S",2) \n\t" | |
218 "movq %%mm1, 16(%0, %%"REG_S",2)\n\t" | |
219 "movq %%mm5, 24(%0, %%"REG_S",2)\n\t" | |
220 "add $16, %%"REG_S" \n\t" | |
3569 | 221 " jnz 1b \n\t" |
222 "emms \n\t" | |
223 :: "r" (s16+1024), "r" (f+256) | |
16173 | 224 :"%"REG_S, "memory" |
3569 | 225 ); |
3909 | 226 return 4*256; |
227 } | |
228 | |
229 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ | |
230 int32_t * f = (int32_t *) _f; | |
3653 | 231 asm volatile( |
16173 | 232 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
233 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3653 | 234 "1: \n\t" |
16173 | 235 "movd (%1, %%"REG_S"), %%mm0 \n\t" |
236 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" | |
237 "movd 3072(%1, %%"REG_S"), %%mm1\n\t" | |
238 "punpckldq 4096(%1, %%"REG_S"), %%mm1\n\t" | |
239 "movd 1024(%1, %%"REG_S"), %%mm2\n\t" | |
240 "punpckldq 4(%1, %%"REG_S"), %%mm2\n\t" | |
241 "movd 2052(%1, %%"REG_S"), %%mm3\n\t" | |
242 "punpckldq 3076(%1, %%"REG_S"), %%mm3\n\t" | |
243 "movd 4100(%1, %%"REG_S"), %%mm4\n\t" | |
244 "punpckldq 1028(%1, %%"REG_S"), %%mm4\n\t" | |
245 "movd 8(%1, %%"REG_S"), %%mm5 \n\t" | |
246 "punpckldq 2056(%1, %%"REG_S"), %%mm5\n\t" | |
247 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
248 "sar $1, %%"REG_D" \n\t" | |
3653 | 249 "psubd %%mm7, %%mm0 \n\t" |
250 "psubd %%mm7, %%mm1 \n\t" | |
251 "psubd %%mm7, %%mm2 \n\t" | |
252 "psubd %%mm7, %%mm3 \n\t" | |
253 "psubd %%mm7, %%mm4 \n\t" | |
254 "psubd %%mm7, %%mm5 \n\t" | |
255 "packssdw %%mm1, %%mm0 \n\t" | |
256 "packssdw %%mm3, %%mm2 \n\t" | |
257 "packssdw %%mm5, %%mm4 \n\t" | |
16173 | 258 "movq %%mm0, (%0, %%"REG_D") \n\t" |
259 "movq %%mm2, 8(%0, %%"REG_D") \n\t" | |
260 "movq %%mm4, 16(%0, %%"REG_D") \n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
261 |
16173 | 262 "movd 3080(%1, %%"REG_S"), %%mm0\n\t" |
263 "punpckldq 4104(%1, %%"REG_S"), %%mm0\n\t" | |
264 "movd 1032(%1, %%"REG_S"), %%mm1\n\t" | |
265 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" | |
266 "movd 2060(%1, %%"REG_S"), %%mm2\n\t" | |
267 "punpckldq 3084(%1, %%"REG_S"), %%mm2\n\t" | |
268 "movd 4108(%1, %%"REG_S"), %%mm3\n\t" | |
269 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" | |
3653 | 270 "psubd %%mm7, %%mm0 \n\t" |
271 "psubd %%mm7, %%mm1 \n\t" | |
272 "psubd %%mm7, %%mm2 \n\t" | |
273 "psubd %%mm7, %%mm3 \n\t" | |
274 "packssdw %%mm1, %%mm0 \n\t" | |
275 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 276 "movq %%mm0, 24(%0, %%"REG_D") \n\t" |
277 "movq %%mm2, 32(%0, %%"REG_D") \n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
278 |
16173 | 279 "add $16, %%"REG_S" \n\t" |
3653 | 280 " jnz 1b \n\t" |
281 "emms \n\t" | |
282 :: "r" (s16+1280), "r" (f+256) | |
16173 | 283 :"%"REG_S, "%"REG_D, "memory" |
3653 | 284 ); |
3909 | 285 return 5*256; |
286 } | |
287 | |
288 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
289 int32_t * f = (int32_t *) _f; | |
3569 | 290 asm volatile( |
16173 | 291 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
292 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 293 "pxor %%mm6, %%mm6 \n\t" |
294 "1: \n\t" | |
16173 | 295 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
296 "movq 1032(%1, %%"REG_S"), %%mm1\n\t" | |
297 "movq (%1, %%"REG_S"), %%mm2 \n\t" | |
298 "movq 8(%1, %%"REG_S"), %%mm3 \n\t" | |
3569 | 299 "psubd %%mm7, %%mm0 \n\t" |
300 "psubd %%mm7, %%mm1 \n\t" | |
301 "psubd %%mm7, %%mm2 \n\t" | |
302 "psubd %%mm7, %%mm3 \n\t" | |
303 "packssdw %%mm1, %%mm0 \n\t" | |
304 "packssdw %%mm3, %%mm2 \n\t" | |
305 "movq %%mm0, %%mm1 \n\t" | |
306 "punpcklwd %%mm2, %%mm0 \n\t" | |
307 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 308 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
309 "movq %%mm6, (%0, %%"REG_D") \n\t" | |
310 "movd %%mm0, 8(%0, %%"REG_D") \n\t" | |
3569 | 311 "punpckhdq %%mm0, %%mm0 \n\t" |
16173 | 312 "movq %%mm6, 12(%0, %%"REG_D") \n\t" |
313 "movd %%mm0, 20(%0, %%"REG_D") \n\t" | |
314 "movq %%mm6, 24(%0, %%"REG_D") \n\t" | |
315 "movd %%mm1, 32(%0, %%"REG_D") \n\t" | |
3569 | 316 "punpckhdq %%mm1, %%mm1 \n\t" |
16173 | 317 "movq %%mm6, 36(%0, %%"REG_D") \n\t" |
318 "movd %%mm1, 44(%0, %%"REG_D") \n\t" | |
319 "add $16, %%"REG_S" \n\t" | |
3569 | 320 " jnz 1b \n\t" |
321 "emms \n\t" | |
322 :: "r" (s16+1536), "r" (f+256) | |
16173 | 323 :"%"REG_S, "%"REG_D, "memory" |
3569 | 324 ); |
3909 | 325 return 6*256; |
326 } | |
327 | |
328 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
329 int32_t * f = (int32_t *) _f; | |
3576 | 330 asm volatile( |
16173 | 331 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
332 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3576 | 333 "pxor %%mm6, %%mm6 \n\t" |
334 "1: \n\t" | |
16173 | 335 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
336 "movq 2048(%1, %%"REG_S"), %%mm1\n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
337 "movq (%1, %%"REG_S"), %%mm5 \n\t" |
3576 | 338 "psubd %%mm7, %%mm0 \n\t" |
339 "psubd %%mm7, %%mm1 \n\t" | |
340 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 341 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
342 |
3576 | 343 "pxor %%mm4, %%mm4 \n\t" |
344 "packssdw %%mm5, %%mm0 \n\t" // FfAa | |
345 "packssdw %%mm4, %%mm1 \n\t" // 00Bb | |
346 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0 | |
347 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
348 "movq %%mm0, %%mm1 \n\t" // BAba | |
349 "punpckldq %%mm4, %%mm3 \n\t" // f0XX | |
350 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
351 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
352 |
16173 | 353 "movq %%mm0, (%0, %%"REG_D") \n\t" // 00ba |
3576 | 354 "punpckhdq %%mm4, %%mm0 \n\t" // F000 |
16173 | 355 "movq %%mm3, 8(%0, %%"REG_D") \n\t" // BAf0 |
356 "movq %%mm0, 16(%0, %%"REG_D") \n\t" // F000 | |
357 "add $8, %%"REG_S" \n\t" | |
3576 | 358 " jnz 1b \n\t" |
359 "emms \n\t" | |
360 :: "r" (s16+1536), "r" (f+256) | |
16173 | 361 :"%"REG_S, "%"REG_D, "memory" |
3576 | 362 ); |
3909 | 363 return 6*256; |
364 } | |
365 | |
366 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
367 int32_t * f = (int32_t *) _f; | |
3578 | 368 asm volatile( |
16173 | 369 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
370 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3578 | 371 "pxor %%mm6, %%mm6 \n\t" |
372 "1: \n\t" | |
16173 | 373 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
374 "movq 3072(%1, %%"REG_S"), %%mm1\n\t" | |
375 "movq 2048(%1, %%"REG_S"), %%mm4\n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
376 "movq (%1, %%"REG_S"), %%mm5 \n\t" |
3578 | 377 "psubd %%mm7, %%mm0 \n\t" |
378 "psubd %%mm7, %%mm1 \n\t" | |
379 "psubd %%mm7, %%mm4 \n\t" | |
380 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 381 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
382 |
3578 | 383 "packssdw %%mm4, %%mm0 \n\t" // EeAa |
384 "packssdw %%mm5, %%mm1 \n\t" // FfBb | |
385 "movq %%mm0, %%mm2 \n\t" // EeAa | |
386 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
387 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe | |
388 "movq %%mm0, %%mm1 \n\t" // BAba | |
389 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
390 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
391 |
16173 | 392 "movq %%mm0, (%0, %%"REG_D") \n\t" |
3578 | 393 "punpckhdq %%mm2, %%mm0 \n\t" // FE00 |
394 "punpckldq %%mm1, %%mm2 \n\t" // BAfe | |
16173 | 395 "movq %%mm2, 8(%0, %%"REG_D") \n\t" |
396 "movq %%mm0, 16(%0, %%"REG_D") \n\t" | |
397 "add $8, %%"REG_S" \n\t" | |
3578 | 398 " jnz 1b \n\t" |
399 "emms \n\t" | |
400 :: "r" (s16+1536), "r" (f+256) | |
16173 | 401 :"%"REG_S, "%"REG_D, "memory" |
3578 | 402 ); |
3909 | 403 return 6*256; |
404 } | |
405 | |
406 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
407 int32_t * f = (int32_t *) _f; | |
3577 | 408 asm volatile( |
16173 | 409 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
410 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3577 | 411 // "pxor %%mm6, %%mm6 \n\t" |
412 "1: \n\t" | |
16173 | 413 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
414 "movq 2048(%1, %%"REG_S"), %%mm1\n\t" | |
415 "movq 3072(%1, %%"REG_S"), %%mm2\n\t" | |
416 "movq 4096(%1, %%"REG_S"), %%mm3\n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
417 "movq (%1, %%"REG_S"), %%mm5 \n\t" |
3577 | 418 "psubd %%mm7, %%mm0 \n\t" |
419 "psubd %%mm7, %%mm1 \n\t" | |
420 "psubd %%mm7, %%mm2 \n\t" | |
421 "psubd %%mm7, %%mm3 \n\t" | |
422 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 423 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
424 |
3577 | 425 "packssdw %%mm2, %%mm0 \n\t" // CcAa |
426 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
427 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
428 "movq %%mm0, %%mm2 \n\t" // CcAa | |
429 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
430 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
431 "pxor %%mm4, %%mm4 \n\t" // 0000 | |
432 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0 | |
433 "movq %%mm0, %%mm1 \n\t" // BAba | |
434 "movq %%mm4, %%mm3 \n\t" // F0f0 | |
435 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
436 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
437 "punpckldq %%mm1, %%mm4 \n\t" // BAf0 | |
438 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
439 |
16173 | 440 "movq %%mm0, (%0, %%"REG_D") \n\t" |
441 "movq %%mm4, 8(%0, %%"REG_D") \n\t" | |
442 "movq %%mm2, 16(%0, %%"REG_D") \n\t" | |
443 "add $8, %%"REG_S" \n\t" | |
3577 | 444 " jnz 1b \n\t" |
445 "emms \n\t" | |
446 :: "r" (s16+1536), "r" (f+256) | |
16173 | 447 :"%"REG_S, "%"REG_D, "memory" |
3577 | 448 ); |
3909 | 449 return 6*256; |
450 } | |
451 | |
452 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
453 int32_t * f = (int32_t *) _f; | |
3575 | 454 asm volatile( |
16173 | 455 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
456 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3575 | 457 // "pxor %%mm6, %%mm6 \n\t" |
458 "1: \n\t" | |
16173 | 459 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
460 "movq 3072(%1, %%"REG_S"), %%mm1\n\t" | |
461 "movq 4096(%1, %%"REG_S"), %%mm2\n\t" | |
462 "movq 5120(%1, %%"REG_S"), %%mm3\n\t" | |
463 "movq 2048(%1, %%"REG_S"), %%mm4\n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
464 "movq (%1, %%"REG_S"), %%mm5 \n\t" |
3575 | 465 "psubd %%mm7, %%mm0 \n\t" |
466 "psubd %%mm7, %%mm1 \n\t" | |
467 "psubd %%mm7, %%mm2 \n\t" | |
468 "psubd %%mm7, %%mm3 \n\t" | |
469 "psubd %%mm7, %%mm4 \n\t" | |
470 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 471 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
472 |
3575 | 473 "packssdw %%mm2, %%mm0 \n\t" // CcAa |
474 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
475 "packssdw %%mm4, %%mm4 \n\t" // EeEe | |
476 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
477 "movq %%mm0, %%mm2 \n\t" // CcAa | |
478 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
479 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
480 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe | |
481 "movq %%mm0, %%mm1 \n\t" // BAba | |
482 "movq %%mm4, %%mm3 \n\t" // FEfe | |
483 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
484 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
485 "punpckldq %%mm1, %%mm4 \n\t" // BAfe | |
486 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
487 |
16173 | 488 "movq %%mm0, (%0, %%"REG_D") \n\t" |
489 "movq %%mm4, 8(%0, %%"REG_D") \n\t" | |
490 "movq %%mm2, 16(%0, %%"REG_D") \n\t" | |
491 "add $8, %%"REG_S" \n\t" | |
3575 | 492 " jnz 1b \n\t" |
493 "emms \n\t" | |
494 :: "r" (s16+1536), "r" (f+256) | |
16173 | 495 :"%"REG_S, "%"REG_D, "memory" |
3575 | 496 ); |
3909 | 497 return 6*256; |
498 } | |
499 | |
500 | |
501 static void* a52_resample_MMX(int flags, int ch){ | |
502 switch (flags) { | |
503 case A52_MONO: | |
504 if(ch==5) return a52_resample_MONO_to_5_MMX; | |
505 break; | |
506 case A52_CHANNEL: | |
507 case A52_STEREO: | |
508 case A52_DOLBY: | |
509 if(ch==2) return a52_resample_STEREO_to_2_MMX; | |
510 break; | |
511 case A52_3F: | |
512 if(ch==5) return a52_resample_3F_to_5_MMX; | |
513 break; | |
514 case A52_2F2R: | |
515 if(ch==4) return a52_resample_2F_2R_to_4_MMX; | |
516 break; | |
517 case A52_3F2R: | |
518 if(ch==5) return a52_resample_3F_2R_to_5_MMX; | |
519 break; | |
520 case A52_MONO | A52_LFE: | |
521 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX; | |
522 break; | |
523 case A52_CHANNEL | A52_LFE: | |
524 case A52_STEREO | A52_LFE: | |
525 case A52_DOLBY | A52_LFE: | |
526 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX; | |
527 break; | |
528 case A52_3F | A52_LFE: | |
529 if(ch==6) return a52_resample_3F_LFE_to_6_MMX; | |
530 break; | |
531 case A52_2F2R | A52_LFE: | |
532 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX; | |
533 break; | |
534 case A52_3F2R | A52_LFE: | |
535 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX; | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
536 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
537 } |
3909 | 538 return NULL; |
3626 | 539 } |
540 | |
3909 | 541 |