Mercurial > mplayer.hg
annotate liba52/resample_mmx.c @ 30017:7119354805e7
Use on-stack subtitle struct for temporary storage for passing subtitles on
for rendering by libass.
This avoids mangling the static subtitle struct that is supposed to contain
the subtitles that will actually be displayed and it also minimally reduces
memory usage by freeing the subtitle lines again as early as possible.
author | reimar |
---|---|
date | Fri, 18 Dec 2009 19:29:33 +0000 |
parents | 25337a2147e7 |
children |
rev | line source |
---|---|
25483 | 1 /* |
2 * resample_mmx.c | |
3 * Copyright (C) 2001 Michael Niedermayer (michaelni@gmx.at) | |
4 * | |
5 * This file is part of a52dec, a free ATSC A-52 stream decoder. | |
6 * See http://liba52.sourceforge.net/ for updates. | |
7 * | |
8 * File added for use with MPlayer and not part of original a52dec. | |
9 * | |
10 * a52dec is free software; you can redistribute it and/or modify | |
11 * it under the terms of the GNU General Public License as published by | |
12 * the Free Software Foundation; either version 2 of the License, or | |
13 * (at your option) any later version. | |
14 * | |
15 * a52dec is distributed in the hope that it will be useful, | |
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
18 * GNU General Public License for more details. | |
19 * | |
20 * You should have received a copy of the GNU General Public License | |
21 * along with this program; if not, write to the Free Software | |
22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
23 */ | |
3569 | 24 |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
25 /* optimization TODO / NOTES |
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
26 * movntq is slightly faster (0.5% with the current test.c benchmark) |
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
27 * (but that is just test.c so that needs to be tested in reality) |
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
28 * and it would mean (C / MMX2 / MMX / 3DNOW) versions. |
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
29 */ |
3569 | 30 |
16173 | 31 #include "a52_internal.h" |
32 | |
33 | |
12303
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
34 static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
35 static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
36 static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
37 static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
38 |
3909 | 39 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
40 int32_t * f = (int32_t *) _f; |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25484
diff
changeset
|
41 __asm__ volatile( |
16173 | 42 "mov $-512, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
43 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
44 "movq "MANGLE(wm1100)", %%mm3 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
45 "movq "MANGLE(wm0101)", %%mm4 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
46 "movq "MANGLE(wm1010)", %%mm5 \n\t" |
3574 | 47 "pxor %%mm6, %%mm6 \n\t" |
48 "1: \n\t" | |
16173 | 49 "movq (%1, %%"REG_S", 2), %%mm0 \n\t" |
50 "movq 8(%1, %%"REG_S", 2), %%mm1\n\t" | |
51 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
3574 | 52 "psubd %%mm7, %%mm0 \n\t" |
53 "psubd %%mm7, %%mm1 \n\t" | |
54 "packssdw %%mm1, %%mm0 \n\t" | |
55 "movq %%mm0, %%mm1 \n\t" | |
56 "pand %%mm4, %%mm0 \n\t" | |
57 "pand %%mm5, %%mm1 \n\t" | |
16173 | 58 "movq %%mm6, (%0, %%"REG_D") \n\t" // 0 0 0 0 |
59 "movd %%mm0, 8(%0, %%"REG_D") \n\t" // A 0 | |
3574 | 60 "pand %%mm3, %%mm0 \n\t" |
16173 | 61 "movd %%mm6, 12(%0, %%"REG_D") \n\t" // 0 0 |
62 "movd %%mm1, 16(%0, %%"REG_D") \n\t" // 0 B | |
3574 | 63 "pand %%mm3, %%mm1 \n\t" |
16173 | 64 "movd %%mm6, 20(%0, %%"REG_D") \n\t" // 0 0 |
65 "movq %%mm0, 24(%0, %%"REG_D") \n\t" // 0 0 C 0 | |
66 "movq %%mm1, 32(%0, %%"REG_D") \n\t" // 0 0 0 B | |
67 "add $8, %%"REG_S" \n\t" | |
3574 | 68 " jnz 1b \n\t" |
69 "emms \n\t" | |
70 :: "r" (s16+1280), "r" (f+256) | |
16173 | 71 :"%"REG_S, "%"REG_D, "memory" |
3574 | 72 ); |
3909 | 73 return 5*256; |
74 } | |
75 | |
76 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ | |
77 int32_t * f = (int32_t *) _f; | |
3567 | 78 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it |
28290 | 79 #if HAVE_SSE |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25484
diff
changeset
|
80 __asm__ volatile( |
16173 | 81 "mov $-1024, %%"REG_S" \n\t" |
3567 | 82 "1: \n\t" |
16173 | 83 "cvtps2pi (%1, %%"REG_S"), %%mm0\n\t" |
84 "cvtps2pi 1024(%1, %%"REG_S"), %%mm2\n\t" | |
3567 | 85 "movq %%mm0, %%mm1 \n\t" |
86 "punpcklwd %%mm2, %%mm0 \n\t" | |
87 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 88 "movq %%mm0, (%0, %%"REG_S") \n\t" |
89 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
90 "add $16, %%"REG_S" \n\t" | |
3567 | 91 " jnz 1b \n\t" |
92 "emms \n\t" | |
93 :: "r" (s16+512), "r" (f+256) | |
16173 | 94 :"%"REG_S, "memory" |
3567 | 95 );*/ |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25484
diff
changeset
|
96 __asm__ volatile( |
16173 | 97 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
98 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3567 | 99 "1: \n\t" |
16173 | 100 "movq (%1, %%"REG_S"), %%mm0 \n\t" |
101 "movq 8(%1, %%"REG_S"), %%mm1 \n\t" | |
102 "movq 1024(%1, %%"REG_S"), %%mm2\n\t" | |
103 "movq 1032(%1, %%"REG_S"), %%mm3\n\t" | |
3567 | 104 "psubd %%mm7, %%mm0 \n\t" |
105 "psubd %%mm7, %%mm1 \n\t" | |
106 "psubd %%mm7, %%mm2 \n\t" | |
107 "psubd %%mm7, %%mm3 \n\t" | |
108 "packssdw %%mm1, %%mm0 \n\t" | |
109 "packssdw %%mm3, %%mm2 \n\t" | |
110 "movq %%mm0, %%mm1 \n\t" | |
111 "punpcklwd %%mm2, %%mm0 \n\t" | |
112 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 113 "movq %%mm0, (%0, %%"REG_S") \n\t" |
114 "movq %%mm1, 8(%0, %%"REG_S") \n\t" | |
115 "add $16, %%"REG_S" \n\t" | |
3567 | 116 " jnz 1b \n\t" |
117 "emms \n\t" | |
118 :: "r" (s16+512), "r" (f+256) | |
16173 | 119 :"%"REG_S, "memory" |
3567 | 120 ); |
3909 | 121 return 2*256; |
122 } | |
123 | |
124 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ | |
125 int32_t * f = (int32_t *) _f; | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25484
diff
changeset
|
126 __asm__ volatile( |
16173 | 127 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
128 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3654 | 129 "pxor %%mm6, %%mm6 \n\t" |
130 "movq %%mm7, %%mm5 \n\t" | |
131 "punpckldq %%mm6, %%mm5 \n\t" | |
132 "1: \n\t" | |
16173 | 133 "movd (%1, %%"REG_S"), %%mm0 \n\t" |
134 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" | |
135 "movd 1024(%1, %%"REG_S"), %%mm1\n\t" | |
136 "punpckldq 4(%1, %%"REG_S"), %%mm1\n\t" | |
137 "movd 2052(%1, %%"REG_S"), %%mm2\n\t" | |
3654 | 138 "movq %%mm7, %%mm3 \n\t" |
16173 | 139 "punpckldq 1028(%1, %%"REG_S"), %%mm3\n\t" |
140 "movd 8(%1, %%"REG_S"), %%mm4 \n\t" | |
141 "punpckldq 2056(%1, %%"REG_S"), %%mm4\n\t" | |
142 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
143 "sar $1, %%"REG_D" \n\t" | |
3654 | 144 "psubd %%mm7, %%mm0 \n\t" |
145 "psubd %%mm7, %%mm1 \n\t" | |
146 "psubd %%mm5, %%mm2 \n\t" | |
147 "psubd %%mm7, %%mm3 \n\t" | |
148 "psubd %%mm7, %%mm4 \n\t" | |
149 "packssdw %%mm6, %%mm0 \n\t" | |
150 "packssdw %%mm2, %%mm1 \n\t" | |
151 "packssdw %%mm4, %%mm3 \n\t" | |
16173 | 152 "movq %%mm0, (%0, %%"REG_D") \n\t" |
153 "movq %%mm1, 8(%0, %%"REG_D") \n\t" | |
154 "movq %%mm3, 16(%0, %%"REG_D") \n\t" | |
155 "movd 1032(%1, %%"REG_S"), %%mm1\n\t" | |
156 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" | |
157 "movd 2060(%1, %%"REG_S"), %%mm2\n\t" | |
3654 | 158 "movq %%mm7, %%mm3 \n\t" |
16173 | 159 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" |
3654 | 160 "pxor %%mm0, %%mm0 \n\t" |
161 "psubd %%mm7, %%mm1 \n\t" | |
162 "psubd %%mm5, %%mm2 \n\t" | |
163 "psubd %%mm7, %%mm3 \n\t" | |
164 "packssdw %%mm1, %%mm0 \n\t" | |
165 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 166 "movq %%mm0, 24(%0, %%"REG_D") \n\t" |
167 "movq %%mm2, 32(%0, %%"REG_D") \n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
168 |
16173 | 169 "add $16, %%"REG_S" \n\t" |
3654 | 170 " jnz 1b \n\t" |
171 "emms \n\t" | |
172 :: "r" (s16+1280), "r" (f+256) | |
16173 | 173 :"%"REG_S, "%"REG_D, "memory" |
3654 | 174 ); |
3909 | 175 return 5*256; |
176 } | |
177 | |
178 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ | |
179 int32_t * f = (int32_t *) _f; | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25484
diff
changeset
|
180 __asm__ volatile( |
16173 | 181 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
182 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 183 "1: \n\t" |
16173 | 184 "movq (%1, %%"REG_S"), %%mm0 \n\t" |
185 "movq 8(%1, %%"REG_S"), %%mm1 \n\t" | |
186 "movq 1024(%1, %%"REG_S"), %%mm2\n\t" | |
187 "movq 1032(%1, %%"REG_S"), %%mm3\n\t" | |
3569 | 188 "psubd %%mm7, %%mm0 \n\t" |
189 "psubd %%mm7, %%mm1 \n\t" | |
190 "psubd %%mm7, %%mm2 \n\t" | |
191 "psubd %%mm7, %%mm3 \n\t" | |
192 "packssdw %%mm1, %%mm0 \n\t" | |
193 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 194 "movq 2048(%1, %%"REG_S"), %%mm3\n\t" |
195 "movq 2056(%1, %%"REG_S"), %%mm4\n\t" | |
196 "movq 3072(%1, %%"REG_S"), %%mm5\n\t" | |
197 "movq 3080(%1, %%"REG_S"), %%mm6\n\t" | |
3569 | 198 "psubd %%mm7, %%mm3 \n\t" |
199 "psubd %%mm7, %%mm4 \n\t" | |
200 "psubd %%mm7, %%mm5 \n\t" | |
201 "psubd %%mm7, %%mm6 \n\t" | |
202 "packssdw %%mm4, %%mm3 \n\t" | |
203 "packssdw %%mm6, %%mm5 \n\t" | |
204 "movq %%mm0, %%mm1 \n\t" | |
205 "movq %%mm3, %%mm4 \n\t" | |
206 "punpcklwd %%mm2, %%mm0 \n\t" | |
207 "punpckhwd %%mm2, %%mm1 \n\t" | |
208 "punpcklwd %%mm5, %%mm3 \n\t" | |
209 "punpckhwd %%mm5, %%mm4 \n\t" | |
210 "movq %%mm0, %%mm2 \n\t" | |
211 "movq %%mm1, %%mm5 \n\t" | |
212 "punpckldq %%mm3, %%mm0 \n\t" | |
213 "punpckhdq %%mm3, %%mm2 \n\t" | |
214 "punpckldq %%mm4, %%mm1 \n\t" | |
215 "punpckhdq %%mm4, %%mm5 \n\t" | |
16173 | 216 "movq %%mm0, (%0, %%"REG_S",2) \n\t" |
217 "movq %%mm2, 8(%0, %%"REG_S",2) \n\t" | |
218 "movq %%mm1, 16(%0, %%"REG_S",2)\n\t" | |
219 "movq %%mm5, 24(%0, %%"REG_S",2)\n\t" | |
220 "add $16, %%"REG_S" \n\t" | |
3569 | 221 " jnz 1b \n\t" |
222 "emms \n\t" | |
223 :: "r" (s16+1024), "r" (f+256) | |
16173 | 224 :"%"REG_S, "memory" |
3569 | 225 ); |
3909 | 226 return 4*256; |
227 } | |
228 | |
229 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ | |
230 int32_t * f = (int32_t *) _f; | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25484
diff
changeset
|
231 __asm__ volatile( |
16173 | 232 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
233 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3653 | 234 "1: \n\t" |
16173 | 235 "movd (%1, %%"REG_S"), %%mm0 \n\t" |
236 "punpckldq 2048(%1, %%"REG_S"), %%mm0\n\t" | |
237 "movd 3072(%1, %%"REG_S"), %%mm1\n\t" | |
238 "punpckldq 4096(%1, %%"REG_S"), %%mm1\n\t" | |
239 "movd 1024(%1, %%"REG_S"), %%mm2\n\t" | |
240 "punpckldq 4(%1, %%"REG_S"), %%mm2\n\t" | |
241 "movd 2052(%1, %%"REG_S"), %%mm3\n\t" | |
242 "punpckldq 3076(%1, %%"REG_S"), %%mm3\n\t" | |
243 "movd 4100(%1, %%"REG_S"), %%mm4\n\t" | |
244 "punpckldq 1028(%1, %%"REG_S"), %%mm4\n\t" | |
245 "movd 8(%1, %%"REG_S"), %%mm5 \n\t" | |
246 "punpckldq 2056(%1, %%"REG_S"), %%mm5\n\t" | |
247 "lea (%%"REG_S", %%"REG_S", 4), %%"REG_D"\n\t" | |
248 "sar $1, %%"REG_D" \n\t" | |
3653 | 249 "psubd %%mm7, %%mm0 \n\t" |
250 "psubd %%mm7, %%mm1 \n\t" | |
251 "psubd %%mm7, %%mm2 \n\t" | |
252 "psubd %%mm7, %%mm3 \n\t" | |
253 "psubd %%mm7, %%mm4 \n\t" | |
254 "psubd %%mm7, %%mm5 \n\t" | |
255 "packssdw %%mm1, %%mm0 \n\t" | |
256 "packssdw %%mm3, %%mm2 \n\t" | |
257 "packssdw %%mm5, %%mm4 \n\t" | |
16173 | 258 "movq %%mm0, (%0, %%"REG_D") \n\t" |
259 "movq %%mm2, 8(%0, %%"REG_D") \n\t" | |
260 "movq %%mm4, 16(%0, %%"REG_D") \n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
261 |
16173 | 262 "movd 3080(%1, %%"REG_S"), %%mm0\n\t" |
263 "punpckldq 4104(%1, %%"REG_S"), %%mm0\n\t" | |
264 "movd 1032(%1, %%"REG_S"), %%mm1\n\t" | |
265 "punpckldq 12(%1, %%"REG_S"), %%mm1\n\t" | |
266 "movd 2060(%1, %%"REG_S"), %%mm2\n\t" | |
267 "punpckldq 3084(%1, %%"REG_S"), %%mm2\n\t" | |
268 "movd 4108(%1, %%"REG_S"), %%mm3\n\t" | |
269 "punpckldq 1036(%1, %%"REG_S"), %%mm3\n\t" | |
3653 | 270 "psubd %%mm7, %%mm0 \n\t" |
271 "psubd %%mm7, %%mm1 \n\t" | |
272 "psubd %%mm7, %%mm2 \n\t" | |
273 "psubd %%mm7, %%mm3 \n\t" | |
274 "packssdw %%mm1, %%mm0 \n\t" | |
275 "packssdw %%mm3, %%mm2 \n\t" | |
16173 | 276 "movq %%mm0, 24(%0, %%"REG_D") \n\t" |
277 "movq %%mm2, 32(%0, %%"REG_D") \n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
278 |
16173 | 279 "add $16, %%"REG_S" \n\t" |
3653 | 280 " jnz 1b \n\t" |
281 "emms \n\t" | |
282 :: "r" (s16+1280), "r" (f+256) | |
16173 | 283 :"%"REG_S, "%"REG_D, "memory" |
3653 | 284 ); |
3909 | 285 return 5*256; |
286 } | |
287 | |
288 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
289 int32_t * f = (int32_t *) _f; | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25484
diff
changeset
|
290 __asm__ volatile( |
16173 | 291 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
292 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 293 "pxor %%mm6, %%mm6 \n\t" |
294 "1: \n\t" | |
16173 | 295 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
296 "movq 1032(%1, %%"REG_S"), %%mm1\n\t" | |
297 "movq (%1, %%"REG_S"), %%mm2 \n\t" | |
298 "movq 8(%1, %%"REG_S"), %%mm3 \n\t" | |
3569 | 299 "psubd %%mm7, %%mm0 \n\t" |
300 "psubd %%mm7, %%mm1 \n\t" | |
301 "psubd %%mm7, %%mm2 \n\t" | |
302 "psubd %%mm7, %%mm3 \n\t" | |
303 "packssdw %%mm1, %%mm0 \n\t" | |
304 "packssdw %%mm3, %%mm2 \n\t" | |
305 "movq %%mm0, %%mm1 \n\t" | |
306 "punpcklwd %%mm2, %%mm0 \n\t" | |
307 "punpckhwd %%mm2, %%mm1 \n\t" | |
16173 | 308 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
309 "movq %%mm6, (%0, %%"REG_D") \n\t" | |
310 "movd %%mm0, 8(%0, %%"REG_D") \n\t" | |
3569 | 311 "punpckhdq %%mm0, %%mm0 \n\t" |
16173 | 312 "movq %%mm6, 12(%0, %%"REG_D") \n\t" |
313 "movd %%mm0, 20(%0, %%"REG_D") \n\t" | |
314 "movq %%mm6, 24(%0, %%"REG_D") \n\t" | |
315 "movd %%mm1, 32(%0, %%"REG_D") \n\t" | |
3569 | 316 "punpckhdq %%mm1, %%mm1 \n\t" |
16173 | 317 "movq %%mm6, 36(%0, %%"REG_D") \n\t" |
318 "movd %%mm1, 44(%0, %%"REG_D") \n\t" | |
319 "add $16, %%"REG_S" \n\t" | |
3569 | 320 " jnz 1b \n\t" |
321 "emms \n\t" | |
322 :: "r" (s16+1536), "r" (f+256) | |
16173 | 323 :"%"REG_S, "%"REG_D, "memory" |
3569 | 324 ); |
3909 | 325 return 6*256; |
326 } | |
327 | |
328 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
329 int32_t * f = (int32_t *) _f; | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25484
diff
changeset
|
330 __asm__ volatile( |
16173 | 331 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
332 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3576 | 333 "pxor %%mm6, %%mm6 \n\t" |
334 "1: \n\t" | |
16173 | 335 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
336 "movq 2048(%1, %%"REG_S"), %%mm1\n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
337 "movq (%1, %%"REG_S"), %%mm5 \n\t" |
3576 | 338 "psubd %%mm7, %%mm0 \n\t" |
339 "psubd %%mm7, %%mm1 \n\t" | |
340 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 341 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
342 |
3576 | 343 "pxor %%mm4, %%mm4 \n\t" |
344 "packssdw %%mm5, %%mm0 \n\t" // FfAa | |
345 "packssdw %%mm4, %%mm1 \n\t" // 00Bb | |
346 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0 | |
347 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
348 "movq %%mm0, %%mm1 \n\t" // BAba | |
349 "punpckldq %%mm4, %%mm3 \n\t" // f0XX | |
350 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
351 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
352 |
16173 | 353 "movq %%mm0, (%0, %%"REG_D") \n\t" // 00ba |
3576 | 354 "punpckhdq %%mm4, %%mm0 \n\t" // F000 |
16173 | 355 "movq %%mm3, 8(%0, %%"REG_D") \n\t" // BAf0 |
356 "movq %%mm0, 16(%0, %%"REG_D") \n\t" // F000 | |
357 "add $8, %%"REG_S" \n\t" | |
3576 | 358 " jnz 1b \n\t" |
359 "emms \n\t" | |
360 :: "r" (s16+1536), "r" (f+256) | |
16173 | 361 :"%"REG_S, "%"REG_D, "memory" |
3576 | 362 ); |
3909 | 363 return 6*256; |
364 } | |
365 | |
366 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
367 int32_t * f = (int32_t *) _f; | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25484
diff
changeset
|
368 __asm__ volatile( |
16173 | 369 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
370 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3578 | 371 "pxor %%mm6, %%mm6 \n\t" |
372 "1: \n\t" | |
16173 | 373 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
374 "movq 3072(%1, %%"REG_S"), %%mm1\n\t" | |
375 "movq 2048(%1, %%"REG_S"), %%mm4\n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
376 "movq (%1, %%"REG_S"), %%mm5 \n\t" |
3578 | 377 "psubd %%mm7, %%mm0 \n\t" |
378 "psubd %%mm7, %%mm1 \n\t" | |
379 "psubd %%mm7, %%mm4 \n\t" | |
380 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 381 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
382 |
3578 | 383 "packssdw %%mm4, %%mm0 \n\t" // EeAa |
384 "packssdw %%mm5, %%mm1 \n\t" // FfBb | |
385 "movq %%mm0, %%mm2 \n\t" // EeAa | |
386 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
387 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe | |
388 "movq %%mm0, %%mm1 \n\t" // BAba | |
389 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
390 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
391 |
16173 | 392 "movq %%mm0, (%0, %%"REG_D") \n\t" |
3578 | 393 "punpckhdq %%mm2, %%mm0 \n\t" // FE00 |
394 "punpckldq %%mm1, %%mm2 \n\t" // BAfe | |
16173 | 395 "movq %%mm2, 8(%0, %%"REG_D") \n\t" |
396 "movq %%mm0, 16(%0, %%"REG_D") \n\t" | |
397 "add $8, %%"REG_S" \n\t" | |
3578 | 398 " jnz 1b \n\t" |
399 "emms \n\t" | |
400 :: "r" (s16+1536), "r" (f+256) | |
16173 | 401 :"%"REG_S, "%"REG_D, "memory" |
3578 | 402 ); |
3909 | 403 return 6*256; |
404 } | |
405 | |
406 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
407 int32_t * f = (int32_t *) _f; | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25484
diff
changeset
|
408 __asm__ volatile( |
16173 | 409 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
410 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3577 | 411 // "pxor %%mm6, %%mm6 \n\t" |
412 "1: \n\t" | |
16173 | 413 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
414 "movq 2048(%1, %%"REG_S"), %%mm1\n\t" | |
415 "movq 3072(%1, %%"REG_S"), %%mm2\n\t" | |
416 "movq 4096(%1, %%"REG_S"), %%mm3\n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
417 "movq (%1, %%"REG_S"), %%mm5 \n\t" |
3577 | 418 "psubd %%mm7, %%mm0 \n\t" |
419 "psubd %%mm7, %%mm1 \n\t" | |
420 "psubd %%mm7, %%mm2 \n\t" | |
421 "psubd %%mm7, %%mm3 \n\t" | |
422 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 423 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
424 |
3577 | 425 "packssdw %%mm2, %%mm0 \n\t" // CcAa |
426 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
427 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
428 "movq %%mm0, %%mm2 \n\t" // CcAa | |
429 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
430 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
431 "pxor %%mm4, %%mm4 \n\t" // 0000 | |
432 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0 | |
433 "movq %%mm0, %%mm1 \n\t" // BAba | |
434 "movq %%mm4, %%mm3 \n\t" // F0f0 | |
435 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
436 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
437 "punpckldq %%mm1, %%mm4 \n\t" // BAf0 | |
438 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
439 |
16173 | 440 "movq %%mm0, (%0, %%"REG_D") \n\t" |
441 "movq %%mm4, 8(%0, %%"REG_D") \n\t" | |
442 "movq %%mm2, 16(%0, %%"REG_D") \n\t" | |
443 "add $8, %%"REG_S" \n\t" | |
3577 | 444 " jnz 1b \n\t" |
445 "emms \n\t" | |
446 :: "r" (s16+1536), "r" (f+256) | |
16173 | 447 :"%"REG_S, "%"REG_D, "memory" |
3577 | 448 ); |
3909 | 449 return 6*256; |
450 } | |
451 | |
452 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
453 int32_t * f = (int32_t *) _f; | |
27754
08d18fe9da52
Change all occurrences of asm and __asm to __asm__, same as was done for FFmpeg.
diego
parents:
25484
diff
changeset
|
454 __asm__ volatile( |
16173 | 455 "mov $-1024, %%"REG_S" \n\t" |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
456 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3575 | 457 // "pxor %%mm6, %%mm6 \n\t" |
458 "1: \n\t" | |
16173 | 459 "movq 1024(%1, %%"REG_S"), %%mm0\n\t" |
460 "movq 3072(%1, %%"REG_S"), %%mm1\n\t" | |
461 "movq 4096(%1, %%"REG_S"), %%mm2\n\t" | |
462 "movq 5120(%1, %%"REG_S"), %%mm3\n\t" | |
463 "movq 2048(%1, %%"REG_S"), %%mm4\n\t" | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
464 "movq (%1, %%"REG_S"), %%mm5 \n\t" |
3575 | 465 "psubd %%mm7, %%mm0 \n\t" |
466 "psubd %%mm7, %%mm1 \n\t" | |
467 "psubd %%mm7, %%mm2 \n\t" | |
468 "psubd %%mm7, %%mm3 \n\t" | |
469 "psubd %%mm7, %%mm4 \n\t" | |
470 "psubd %%mm7, %%mm5 \n\t" | |
16173 | 471 "lea (%%"REG_S", %%"REG_S", 2), %%"REG_D"\n\t" |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
472 |
3575 | 473 "packssdw %%mm2, %%mm0 \n\t" // CcAa |
474 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
475 "packssdw %%mm4, %%mm4 \n\t" // EeEe | |
476 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
477 "movq %%mm0, %%mm2 \n\t" // CcAa | |
478 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
479 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
480 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe | |
481 "movq %%mm0, %%mm1 \n\t" // BAba | |
482 "movq %%mm4, %%mm3 \n\t" // FEfe | |
483 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
484 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
485 "punpckldq %%mm1, %%mm4 \n\t" // BAfe | |
486 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC | |
25484
943f37a4323d
cosmetics: Remove trailing whitespace, reformat one comment.
diego
parents:
25483
diff
changeset
|
487 |
16173 | 488 "movq %%mm0, (%0, %%"REG_D") \n\t" |
489 "movq %%mm4, 8(%0, %%"REG_D") \n\t" | |
490 "movq %%mm2, 16(%0, %%"REG_D") \n\t" | |
491 "add $8, %%"REG_S" \n\t" | |
3575 | 492 " jnz 1b \n\t" |
493 "emms \n\t" | |
494 :: "r" (s16+1536), "r" (f+256) | |
16173 | 495 :"%"REG_S, "%"REG_D, "memory" |
3575 | 496 ); |
3909 | 497 return 6*256; |
498 } | |
499 | |
500 | |
501 static void* a52_resample_MMX(int flags, int ch){ | |
502 switch (flags) { | |
503 case A52_MONO: | |
504 if(ch==5) return a52_resample_MONO_to_5_MMX; | |
505 break; | |
506 case A52_CHANNEL: | |
507 case A52_STEREO: | |
508 case A52_DOLBY: | |
509 if(ch==2) return a52_resample_STEREO_to_2_MMX; | |
510 break; | |
511 case A52_3F: | |
512 if(ch==5) return a52_resample_3F_to_5_MMX; | |
513 break; | |
514 case A52_2F2R: | |
515 if(ch==4) return a52_resample_2F_2R_to_4_MMX; | |
516 break; | |
517 case A52_3F2R: | |
518 if(ch==5) return a52_resample_3F_2R_to_5_MMX; | |
519 break; | |
520 case A52_MONO | A52_LFE: | |
521 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX; | |
522 break; | |
523 case A52_CHANNEL | A52_LFE: | |
524 case A52_STEREO | A52_LFE: | |
525 case A52_DOLBY | A52_LFE: | |
526 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX; | |
527 break; | |
528 case A52_3F | A52_LFE: | |
529 if(ch==6) return a52_resample_3F_LFE_to_6_MMX; | |
530 break; | |
531 case A52_2F2R | A52_LFE: | |
532 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX; | |
533 break; | |
534 case A52_3F2R | A52_LFE: | |
535 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX; | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
536 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
537 } |
3909 | 538 return NULL; |
3626 | 539 } |
540 | |
3909 | 541 |