Mercurial > mplayer.hg
annotate liba52/resample_mmx.c @ 14204:05b78d7d444e
savage_vid added
author | faust3 |
---|---|
date | Tue, 21 Dec 2004 17:14:39 +0000 |
parents | f881c918739b |
children | d6219ce521e9 |
rev | line source |
---|---|
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
1 |
3569 | 2 // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL) |
3 | |
4 /* optimization TODO / NOTES | |
5 movntq is slightly faster (0.5% with the current test.c benchmark) | |
6 (but thats just test.c so that needs to be testd in reallity) | |
7 and it would mean (C / MMX2 / MMX / 3DNOW) versions | |
8 */ | |
9 | |
12303
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
10 static uint64_t attribute_used __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
11 static uint64_t attribute_used __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
12 static uint64_t attribute_used __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; |
f881c918739b
attribute_used patch by (VMiklos <mamajom at axelero dot hu>)
michael
parents:
8123
diff
changeset
|
13 static uint64_t attribute_used __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
14 |
3909 | 15 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
16 int32_t * f = (int32_t *) _f; |
3574 | 17 asm volatile( |
18 "movl $-512, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
19 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
20 "movq "MANGLE(wm1100)", %%mm3 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
21 "movq "MANGLE(wm0101)", %%mm4 \n\t" |
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
22 "movq "MANGLE(wm1010)", %%mm5 \n\t" |
3574 | 23 "pxor %%mm6, %%mm6 \n\t" |
24 "1: \n\t" | |
25 "movq (%1, %%esi, 2), %%mm0 \n\t" | |
26 "movq 8(%1, %%esi, 2), %%mm1 \n\t" | |
27 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
28 "psubd %%mm7, %%mm0 \n\t" | |
29 "psubd %%mm7, %%mm1 \n\t" | |
30 "packssdw %%mm1, %%mm0 \n\t" | |
31 "movq %%mm0, %%mm1 \n\t" | |
32 "pand %%mm4, %%mm0 \n\t" | |
33 "pand %%mm5, %%mm1 \n\t" | |
34 "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0 | |
35 "movd %%mm0, 8(%0, %%edi) \n\t" // A 0 | |
36 "pand %%mm3, %%mm0 \n\t" | |
37 "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0 | |
38 "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B | |
39 "pand %%mm3, %%mm1 \n\t" | |
40 "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0 | |
41 "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0 | |
42 "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B | |
43 "addl $8, %%esi \n\t" | |
44 " jnz 1b \n\t" | |
45 "emms \n\t" | |
46 :: "r" (s16+1280), "r" (f+256) | |
47 :"%esi", "%edi", "memory" | |
48 ); | |
3909 | 49 return 5*256; |
50 } | |
51 | |
52 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ | |
53 int32_t * f = (int32_t *) _f; | |
3567 | 54 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it |
55 #ifdef HAVE_SSE | |
56 asm volatile( | |
57 "movl $-1024, %%esi \n\t" | |
58 "1: \n\t" | |
59 "cvtps2pi (%1, %%esi), %%mm0 \n\t" | |
60 "cvtps2pi 1024(%1, %%esi), %%mm2\n\t" | |
61 "movq %%mm0, %%mm1 \n\t" | |
62 "punpcklwd %%mm2, %%mm0 \n\t" | |
63 "punpckhwd %%mm2, %%mm1 \n\t" | |
64 "movq %%mm0, (%0, %%esi) \n\t" | |
65 "movq %%mm1, 8(%0, %%esi) \n\t" | |
66 "addl $16, %%esi \n\t" | |
67 " jnz 1b \n\t" | |
68 "emms \n\t" | |
69 :: "r" (s16+512), "r" (f+256) | |
70 :"%esi", "memory" | |
71 );*/ | |
72 asm volatile( | |
73 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
74 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3567 | 75 "1: \n\t" |
76 "movq (%1, %%esi), %%mm0 \n\t" | |
77 "movq 8(%1, %%esi), %%mm1 \n\t" | |
78 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
79 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
80 "psubd %%mm7, %%mm0 \n\t" | |
81 "psubd %%mm7, %%mm1 \n\t" | |
82 "psubd %%mm7, %%mm2 \n\t" | |
83 "psubd %%mm7, %%mm3 \n\t" | |
84 "packssdw %%mm1, %%mm0 \n\t" | |
85 "packssdw %%mm3, %%mm2 \n\t" | |
86 "movq %%mm0, %%mm1 \n\t" | |
87 "punpcklwd %%mm2, %%mm0 \n\t" | |
88 "punpckhwd %%mm2, %%mm1 \n\t" | |
89 "movq %%mm0, (%0, %%esi) \n\t" | |
90 "movq %%mm1, 8(%0, %%esi) \n\t" | |
91 "addl $16, %%esi \n\t" | |
92 " jnz 1b \n\t" | |
93 "emms \n\t" | |
94 :: "r" (s16+512), "r" (f+256) | |
95 :"%esi", "memory" | |
96 ); | |
3909 | 97 return 2*256; |
98 } | |
99 | |
100 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ | |
101 int32_t * f = (int32_t *) _f; | |
3654 | 102 asm volatile( |
103 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
104 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3654 | 105 "pxor %%mm6, %%mm6 \n\t" |
106 "movq %%mm7, %%mm5 \n\t" | |
107 "punpckldq %%mm6, %%mm5 \n\t" | |
108 "1: \n\t" | |
109 "movd (%1, %%esi), %%mm0 \n\t" | |
110 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
111 "movd 1024(%1, %%esi), %%mm1 \n\t" | |
112 "punpckldq 4(%1, %%esi), %%mm1 \n\t" | |
113 "movd 2052(%1, %%esi), %%mm2 \n\t" | |
114 "movq %%mm7, %%mm3 \n\t" | |
115 "punpckldq 1028(%1, %%esi), %%mm3\n\t" | |
116 "movd 8(%1, %%esi), %%mm4 \n\t" | |
117 "punpckldq 2056(%1, %%esi), %%mm4\n\t" | |
118 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
119 "sarl $1, %%edi \n\t" | |
120 "psubd %%mm7, %%mm0 \n\t" | |
121 "psubd %%mm7, %%mm1 \n\t" | |
122 "psubd %%mm5, %%mm2 \n\t" | |
123 "psubd %%mm7, %%mm3 \n\t" | |
124 "psubd %%mm7, %%mm4 \n\t" | |
125 "packssdw %%mm6, %%mm0 \n\t" | |
126 "packssdw %%mm2, %%mm1 \n\t" | |
127 "packssdw %%mm4, %%mm3 \n\t" | |
128 "movq %%mm0, (%0, %%edi) \n\t" | |
129 "movq %%mm1, 8(%0, %%edi) \n\t" | |
130 "movq %%mm3, 16(%0, %%edi) \n\t" | |
131 | |
132 "movd 1032(%1, %%esi), %%mm1 \n\t" | |
133 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
134 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
135 "movq %%mm7, %%mm3 \n\t" | |
136 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
137 "pxor %%mm0, %%mm0 \n\t" | |
138 "psubd %%mm7, %%mm1 \n\t" | |
139 "psubd %%mm5, %%mm2 \n\t" | |
140 "psubd %%mm7, %%mm3 \n\t" | |
141 "packssdw %%mm1, %%mm0 \n\t" | |
142 "packssdw %%mm3, %%mm2 \n\t" | |
143 "movq %%mm0, 24(%0, %%edi) \n\t" | |
144 "movq %%mm2, 32(%0, %%edi) \n\t" | |
145 | |
146 "addl $16, %%esi \n\t" | |
147 " jnz 1b \n\t" | |
148 "emms \n\t" | |
149 :: "r" (s16+1280), "r" (f+256) | |
150 :"%esi", "%edi", "memory" | |
151 ); | |
3909 | 152 return 5*256; |
153 } | |
154 | |
155 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ | |
156 int32_t * f = (int32_t *) _f; | |
3569 | 157 asm volatile( |
158 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
159 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 160 "1: \n\t" |
161 "movq (%1, %%esi), %%mm0 \n\t" | |
162 "movq 8(%1, %%esi), %%mm1 \n\t" | |
163 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
164 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
165 "psubd %%mm7, %%mm0 \n\t" | |
166 "psubd %%mm7, %%mm1 \n\t" | |
167 "psubd %%mm7, %%mm2 \n\t" | |
168 "psubd %%mm7, %%mm3 \n\t" | |
169 "packssdw %%mm1, %%mm0 \n\t" | |
170 "packssdw %%mm3, %%mm2 \n\t" | |
171 "movq 2048(%1, %%esi), %%mm3 \n\t" | |
172 "movq 2056(%1, %%esi), %%mm4 \n\t" | |
173 "movq 3072(%1, %%esi), %%mm5 \n\t" | |
174 "movq 3080(%1, %%esi), %%mm6 \n\t" | |
175 "psubd %%mm7, %%mm3 \n\t" | |
176 "psubd %%mm7, %%mm4 \n\t" | |
177 "psubd %%mm7, %%mm5 \n\t" | |
178 "psubd %%mm7, %%mm6 \n\t" | |
179 "packssdw %%mm4, %%mm3 \n\t" | |
180 "packssdw %%mm6, %%mm5 \n\t" | |
181 "movq %%mm0, %%mm1 \n\t" | |
182 "movq %%mm3, %%mm4 \n\t" | |
183 "punpcklwd %%mm2, %%mm0 \n\t" | |
184 "punpckhwd %%mm2, %%mm1 \n\t" | |
185 "punpcklwd %%mm5, %%mm3 \n\t" | |
186 "punpckhwd %%mm5, %%mm4 \n\t" | |
187 "movq %%mm0, %%mm2 \n\t" | |
188 "movq %%mm1, %%mm5 \n\t" | |
189 "punpckldq %%mm3, %%mm0 \n\t" | |
190 "punpckhdq %%mm3, %%mm2 \n\t" | |
191 "punpckldq %%mm4, %%mm1 \n\t" | |
192 "punpckhdq %%mm4, %%mm5 \n\t" | |
193 "movq %%mm0, (%0, %%esi,2) \n\t" | |
194 "movq %%mm2, 8(%0, %%esi,2) \n\t" | |
195 "movq %%mm1, 16(%0, %%esi,2) \n\t" | |
196 "movq %%mm5, 24(%0, %%esi,2) \n\t" | |
197 "addl $16, %%esi \n\t" | |
198 " jnz 1b \n\t" | |
199 "emms \n\t" | |
200 :: "r" (s16+1024), "r" (f+256) | |
201 :"%esi", "memory" | |
202 ); | |
3909 | 203 return 4*256; |
204 } | |
205 | |
206 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ | |
207 int32_t * f = (int32_t *) _f; | |
3653 | 208 asm volatile( |
209 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
210 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3653 | 211 "1: \n\t" |
212 "movd (%1, %%esi), %%mm0 \n\t" | |
213 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
214 "movd 3072(%1, %%esi), %%mm1 \n\t" | |
215 "punpckldq 4096(%1, %%esi), %%mm1\n\t" | |
216 "movd 1024(%1, %%esi), %%mm2 \n\t" | |
217 "punpckldq 4(%1, %%esi), %%mm2 \n\t" | |
218 "movd 2052(%1, %%esi), %%mm3 \n\t" | |
219 "punpckldq 3076(%1, %%esi), %%mm3\n\t" | |
220 "movd 4100(%1, %%esi), %%mm4 \n\t" | |
221 "punpckldq 1028(%1, %%esi), %%mm4\n\t" | |
222 "movd 8(%1, %%esi), %%mm5 \n\t" | |
223 "punpckldq 2056(%1, %%esi), %%mm5\n\t" | |
224 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
225 "sarl $1, %%edi \n\t" | |
226 "psubd %%mm7, %%mm0 \n\t" | |
227 "psubd %%mm7, %%mm1 \n\t" | |
228 "psubd %%mm7, %%mm2 \n\t" | |
229 "psubd %%mm7, %%mm3 \n\t" | |
230 "psubd %%mm7, %%mm4 \n\t" | |
231 "psubd %%mm7, %%mm5 \n\t" | |
232 "packssdw %%mm1, %%mm0 \n\t" | |
233 "packssdw %%mm3, %%mm2 \n\t" | |
234 "packssdw %%mm5, %%mm4 \n\t" | |
235 "movq %%mm0, (%0, %%edi) \n\t" | |
236 "movq %%mm2, 8(%0, %%edi) \n\t" | |
237 "movq %%mm4, 16(%0, %%edi) \n\t" | |
238 | |
239 "movd 3080(%1, %%esi), %%mm0 \n\t" | |
240 "punpckldq 4104(%1, %%esi), %%mm0\n\t" | |
241 "movd 1032(%1, %%esi), %%mm1 \n\t" | |
242 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
243 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
244 "punpckldq 3084(%1, %%esi), %%mm2\n\t" | |
245 "movd 4108(%1, %%esi), %%mm3 \n\t" | |
246 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
247 "psubd %%mm7, %%mm0 \n\t" | |
248 "psubd %%mm7, %%mm1 \n\t" | |
249 "psubd %%mm7, %%mm2 \n\t" | |
250 "psubd %%mm7, %%mm3 \n\t" | |
251 "packssdw %%mm1, %%mm0 \n\t" | |
252 "packssdw %%mm3, %%mm2 \n\t" | |
253 "movq %%mm0, 24(%0, %%edi) \n\t" | |
254 "movq %%mm2, 32(%0, %%edi) \n\t" | |
255 | |
256 "addl $16, %%esi \n\t" | |
257 " jnz 1b \n\t" | |
258 "emms \n\t" | |
259 :: "r" (s16+1280), "r" (f+256) | |
260 :"%esi", "%edi", "memory" | |
261 ); | |
3909 | 262 return 5*256; |
263 } | |
264 | |
265 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
266 int32_t * f = (int32_t *) _f; | |
3569 | 267 asm volatile( |
268 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
269 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3569 | 270 "pxor %%mm6, %%mm6 \n\t" |
271 "1: \n\t" | |
272 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
273 "movq 1032(%1, %%esi), %%mm1 \n\t" | |
274 "movq (%1, %%esi), %%mm2 \n\t" | |
275 "movq 8(%1, %%esi), %%mm3 \n\t" | |
276 "psubd %%mm7, %%mm0 \n\t" | |
277 "psubd %%mm7, %%mm1 \n\t" | |
278 "psubd %%mm7, %%mm2 \n\t" | |
279 "psubd %%mm7, %%mm3 \n\t" | |
280 "packssdw %%mm1, %%mm0 \n\t" | |
281 "packssdw %%mm3, %%mm2 \n\t" | |
282 "movq %%mm0, %%mm1 \n\t" | |
283 "punpcklwd %%mm2, %%mm0 \n\t" | |
284 "punpckhwd %%mm2, %%mm1 \n\t" | |
285 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
286 "movq %%mm6, (%0, %%edi) \n\t" | |
287 "movd %%mm0, 8(%0, %%edi) \n\t" | |
288 "punpckhdq %%mm0, %%mm0 \n\t" | |
289 "movq %%mm6, 12(%0, %%edi) \n\t" | |
290 "movd %%mm0, 20(%0, %%edi) \n\t" | |
291 "movq %%mm6, 24(%0, %%edi) \n\t" | |
292 "movd %%mm1, 32(%0, %%edi) \n\t" | |
293 "punpckhdq %%mm1, %%mm1 \n\t" | |
294 "movq %%mm6, 36(%0, %%edi) \n\t" | |
295 "movd %%mm1, 44(%0, %%edi) \n\t" | |
296 "addl $16, %%esi \n\t" | |
297 " jnz 1b \n\t" | |
298 "emms \n\t" | |
299 :: "r" (s16+1536), "r" (f+256) | |
300 :"%esi", "%edi", "memory" | |
301 ); | |
3909 | 302 return 6*256; |
303 } | |
304 | |
305 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
306 int32_t * f = (int32_t *) _f; | |
3576 | 307 asm volatile( |
308 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
309 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3576 | 310 "pxor %%mm6, %%mm6 \n\t" |
311 "1: \n\t" | |
312 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
313 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
314 "movq (%1, %%esi), %%mm5 \n\t" | |
315 "psubd %%mm7, %%mm0 \n\t" | |
316 "psubd %%mm7, %%mm1 \n\t" | |
317 "psubd %%mm7, %%mm5 \n\t" | |
318 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
319 | |
320 "pxor %%mm4, %%mm4 \n\t" | |
321 "packssdw %%mm5, %%mm0 \n\t" // FfAa | |
322 "packssdw %%mm4, %%mm1 \n\t" // 00Bb | |
323 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0 | |
324 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
325 "movq %%mm0, %%mm1 \n\t" // BAba | |
326 "punpckldq %%mm4, %%mm3 \n\t" // f0XX | |
327 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
328 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 | |
329 | |
330 "movq %%mm0, (%0, %%edi) \n\t" // 00ba | |
331 "punpckhdq %%mm4, %%mm0 \n\t" // F000 | |
332 "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0 | |
333 "movq %%mm0, 16(%0, %%edi) \n\t" // F000 | |
334 "addl $8, %%esi \n\t" | |
335 " jnz 1b \n\t" | |
336 "emms \n\t" | |
337 :: "r" (s16+1536), "r" (f+256) | |
338 :"%esi", "%edi", "memory" | |
339 ); | |
3909 | 340 return 6*256; |
341 } | |
342 | |
343 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
344 int32_t * f = (int32_t *) _f; | |
3578 | 345 asm volatile( |
346 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
347 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3578 | 348 "pxor %%mm6, %%mm6 \n\t" |
349 "1: \n\t" | |
350 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
351 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
352 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
353 "movq (%1, %%esi), %%mm5 \n\t" | |
354 "psubd %%mm7, %%mm0 \n\t" | |
355 "psubd %%mm7, %%mm1 \n\t" | |
356 "psubd %%mm7, %%mm4 \n\t" | |
357 "psubd %%mm7, %%mm5 \n\t" | |
358 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
359 | |
360 "packssdw %%mm4, %%mm0 \n\t" // EeAa | |
361 "packssdw %%mm5, %%mm1 \n\t" // FfBb | |
362 "movq %%mm0, %%mm2 \n\t" // EeAa | |
363 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
364 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe | |
365 "movq %%mm0, %%mm1 \n\t" // BAba | |
366 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
367 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
368 | |
369 "movq %%mm0, (%0, %%edi) \n\t" | |
370 "punpckhdq %%mm2, %%mm0 \n\t" // FE00 | |
371 "punpckldq %%mm1, %%mm2 \n\t" // BAfe | |
372 "movq %%mm2, 8(%0, %%edi) \n\t" | |
373 "movq %%mm0, 16(%0, %%edi) \n\t" | |
374 "addl $8, %%esi \n\t" | |
375 " jnz 1b \n\t" | |
376 "emms \n\t" | |
377 :: "r" (s16+1536), "r" (f+256) | |
378 :"%esi", "%edi", "memory" | |
379 ); | |
3909 | 380 return 6*256; |
381 } | |
382 | |
383 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
384 int32_t * f = (int32_t *) _f; | |
3577 | 385 asm volatile( |
386 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
387 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3577 | 388 // "pxor %%mm6, %%mm6 \n\t" |
389 "1: \n\t" | |
390 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
391 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
392 "movq 3072(%1, %%esi), %%mm2 \n\t" | |
393 "movq 4096(%1, %%esi), %%mm3 \n\t" | |
394 "movq (%1, %%esi), %%mm5 \n\t" | |
395 "psubd %%mm7, %%mm0 \n\t" | |
396 "psubd %%mm7, %%mm1 \n\t" | |
397 "psubd %%mm7, %%mm2 \n\t" | |
398 "psubd %%mm7, %%mm3 \n\t" | |
399 "psubd %%mm7, %%mm5 \n\t" | |
400 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
401 | |
402 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
403 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
404 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
405 "movq %%mm0, %%mm2 \n\t" // CcAa | |
406 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
407 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
408 "pxor %%mm4, %%mm4 \n\t" // 0000 | |
409 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0 | |
410 "movq %%mm0, %%mm1 \n\t" // BAba | |
411 "movq %%mm4, %%mm3 \n\t" // F0f0 | |
412 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
413 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
414 "punpckldq %%mm1, %%mm4 \n\t" // BAf0 | |
415 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC | |
416 | |
417 "movq %%mm0, (%0, %%edi) \n\t" | |
418 "movq %%mm4, 8(%0, %%edi) \n\t" | |
419 "movq %%mm2, 16(%0, %%edi) \n\t" | |
420 "addl $8, %%esi \n\t" | |
421 " jnz 1b \n\t" | |
422 "emms \n\t" | |
423 :: "r" (s16+1536), "r" (f+256) | |
424 :"%esi", "%edi", "memory" | |
425 ); | |
3909 | 426 return 6*256; |
427 } | |
428 | |
429 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
430 int32_t * f = (int32_t *) _f; | |
3575 | 431 asm volatile( |
432 "movl $-1024, %%esi \n\t" | |
4247
2dbd637ffe05
mangle for win32 in liba52 (includes dummy mangle.h pointing to the one in main)
atmos4
parents:
3909
diff
changeset
|
433 "movq "MANGLE(magicF2W)", %%mm7 \n\t" |
3575 | 434 // "pxor %%mm6, %%mm6 \n\t" |
435 "1: \n\t" | |
436 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
437 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
438 "movq 4096(%1, %%esi), %%mm2 \n\t" | |
439 "movq 5120(%1, %%esi), %%mm3 \n\t" | |
440 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
441 "movq (%1, %%esi), %%mm5 \n\t" | |
442 "psubd %%mm7, %%mm0 \n\t" | |
443 "psubd %%mm7, %%mm1 \n\t" | |
444 "psubd %%mm7, %%mm2 \n\t" | |
445 "psubd %%mm7, %%mm3 \n\t" | |
446 "psubd %%mm7, %%mm4 \n\t" | |
447 "psubd %%mm7, %%mm5 \n\t" | |
448 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
449 | |
450 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
451 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
452 "packssdw %%mm4, %%mm4 \n\t" // EeEe | |
453 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
454 "movq %%mm0, %%mm2 \n\t" // CcAa | |
455 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
456 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
457 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe | |
458 "movq %%mm0, %%mm1 \n\t" // BAba | |
459 "movq %%mm4, %%mm3 \n\t" // FEfe | |
460 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
461 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
462 "punpckldq %%mm1, %%mm4 \n\t" // BAfe | |
463 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC | |
464 | |
465 "movq %%mm0, (%0, %%edi) \n\t" | |
466 "movq %%mm4, 8(%0, %%edi) \n\t" | |
467 "movq %%mm2, 16(%0, %%edi) \n\t" | |
468 "addl $8, %%esi \n\t" | |
469 " jnz 1b \n\t" | |
470 "emms \n\t" | |
471 :: "r" (s16+1536), "r" (f+256) | |
472 :"%esi", "%edi", "memory" | |
473 ); | |
3909 | 474 return 6*256; |
475 } | |
476 | |
477 | |
478 static void* a52_resample_MMX(int flags, int ch){ | |
479 switch (flags) { | |
480 case A52_MONO: | |
481 if(ch==5) return a52_resample_MONO_to_5_MMX; | |
482 break; | |
483 case A52_CHANNEL: | |
484 case A52_STEREO: | |
485 case A52_DOLBY: | |
486 if(ch==2) return a52_resample_STEREO_to_2_MMX; | |
487 break; | |
488 case A52_3F: | |
489 if(ch==5) return a52_resample_3F_to_5_MMX; | |
490 break; | |
491 case A52_2F2R: | |
492 if(ch==4) return a52_resample_2F_2R_to_4_MMX; | |
493 break; | |
494 case A52_3F2R: | |
495 if(ch==5) return a52_resample_3F_2R_to_5_MMX; | |
496 break; | |
497 case A52_MONO | A52_LFE: | |
498 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX; | |
499 break; | |
500 case A52_CHANNEL | A52_LFE: | |
501 case A52_STEREO | A52_LFE: | |
502 case A52_DOLBY | A52_LFE: | |
503 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX; | |
504 break; | |
505 case A52_3F | A52_LFE: | |
506 if(ch==6) return a52_resample_3F_LFE_to_6_MMX; | |
507 break; | |
508 case A52_2F2R | A52_LFE: | |
509 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX; | |
510 break; | |
511 case A52_3F2R | A52_LFE: | |
512 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX; | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
513 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
514 } |
3909 | 515 return NULL; |
3626 | 516 } |
517 | |
3909 | 518 |