Mercurial > mplayer.hg
annotate liba52/resample_mmx.c @ 4112:3822582553d6
Alpha patch by Falk Hueffner <falk.hueffner@student.uni-tuebingen.de>
author | nick |
---|---|
date | Sat, 12 Jan 2002 18:00:25 +0000 |
parents | ef32c8bdee81 |
children | 2dbd637ffe05 |
rev | line source |
---|---|
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
1 |
3569 | 2 // MMX optimizations from Michael Niedermayer (michaelni@gmx.at) (under GPL) |
3 | |
4 /* optimization TODO / NOTES | |
5 movntq is slightly faster (0.5% with the current test.c benchmark) | |
6 (but thats just test.c so that needs to be testd in reallity) | |
7 and it would mean (C / MMX2 / MMX / 3DNOW) versions | |
8 */ | |
9 | |
3574 | 10 static uint64_t __attribute__((aligned(8))) magicF2W= 0x43c0000043c00000LL; |
11 static uint64_t __attribute__((aligned(8))) wm1010= 0xFFFF0000FFFF0000LL; | |
12 static uint64_t __attribute__((aligned(8))) wm0101= 0x0000FFFF0000FFFFLL; | |
13 static uint64_t __attribute__((aligned(8))) wm1100= 0xFFFFFFFF00000000LL; | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
14 |
3909 | 15 static int a52_resample_MONO_to_5_MMX(float * _f, int16_t * s16){ |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
16 int i; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
17 int32_t * f = (int32_t *) _f; |
3574 | 18 asm volatile( |
19 "movl $-512, %%esi \n\t" | |
20 "movq magicF2W, %%mm7 \n\t" | |
21 "movq wm1100, %%mm3 \n\t" | |
22 "movq wm0101, %%mm4 \n\t" | |
23 "movq wm1010, %%mm5 \n\t" | |
24 "pxor %%mm6, %%mm6 \n\t" | |
25 "1: \n\t" | |
26 "movq (%1, %%esi, 2), %%mm0 \n\t" | |
27 "movq 8(%1, %%esi, 2), %%mm1 \n\t" | |
28 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
29 "psubd %%mm7, %%mm0 \n\t" | |
30 "psubd %%mm7, %%mm1 \n\t" | |
31 "packssdw %%mm1, %%mm0 \n\t" | |
32 "movq %%mm0, %%mm1 \n\t" | |
33 "pand %%mm4, %%mm0 \n\t" | |
34 "pand %%mm5, %%mm1 \n\t" | |
35 "movq %%mm6, (%0, %%edi) \n\t" // 0 0 0 0 | |
36 "movd %%mm0, 8(%0, %%edi) \n\t" // A 0 | |
37 "pand %%mm3, %%mm0 \n\t" | |
38 "movd %%mm6, 12(%0, %%edi) \n\t" // 0 0 | |
39 "movd %%mm1, 16(%0, %%edi) \n\t" // 0 B | |
40 "pand %%mm3, %%mm1 \n\t" | |
41 "movd %%mm6, 20(%0, %%edi) \n\t" // 0 0 | |
42 "movq %%mm0, 24(%0, %%edi) \n\t" // 0 0 C 0 | |
43 "movq %%mm1, 32(%0, %%edi) \n\t" // 0 0 0 B | |
44 "addl $8, %%esi \n\t" | |
45 " jnz 1b \n\t" | |
46 "emms \n\t" | |
47 :: "r" (s16+1280), "r" (f+256) | |
48 :"%esi", "%edi", "memory" | |
49 ); | |
3909 | 50 return 5*256; |
51 } | |
52 | |
53 static int a52_resample_STEREO_to_2_MMX(float * _f, int16_t * s16){ | |
54 int i; | |
55 int32_t * f = (int32_t *) _f; | |
3567 | 56 /* benchmark scores are 0.3% better with SSE but we would need to set bias=0 and premultiply it |
57 #ifdef HAVE_SSE | |
58 asm volatile( | |
59 "movl $-1024, %%esi \n\t" | |
60 "1: \n\t" | |
61 "cvtps2pi (%1, %%esi), %%mm0 \n\t" | |
62 "cvtps2pi 1024(%1, %%esi), %%mm2\n\t" | |
63 "movq %%mm0, %%mm1 \n\t" | |
64 "punpcklwd %%mm2, %%mm0 \n\t" | |
65 "punpckhwd %%mm2, %%mm1 \n\t" | |
66 "movq %%mm0, (%0, %%esi) \n\t" | |
67 "movq %%mm1, 8(%0, %%esi) \n\t" | |
68 "addl $16, %%esi \n\t" | |
69 " jnz 1b \n\t" | |
70 "emms \n\t" | |
71 :: "r" (s16+512), "r" (f+256) | |
72 :"%esi", "memory" | |
73 );*/ | |
74 asm volatile( | |
75 "movl $-1024, %%esi \n\t" | |
76 "movq magicF2W, %%mm7 \n\t" | |
77 "1: \n\t" | |
78 "movq (%1, %%esi), %%mm0 \n\t" | |
79 "movq 8(%1, %%esi), %%mm1 \n\t" | |
80 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
81 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
82 "psubd %%mm7, %%mm0 \n\t" | |
83 "psubd %%mm7, %%mm1 \n\t" | |
84 "psubd %%mm7, %%mm2 \n\t" | |
85 "psubd %%mm7, %%mm3 \n\t" | |
86 "packssdw %%mm1, %%mm0 \n\t" | |
87 "packssdw %%mm3, %%mm2 \n\t" | |
88 "movq %%mm0, %%mm1 \n\t" | |
89 "punpcklwd %%mm2, %%mm0 \n\t" | |
90 "punpckhwd %%mm2, %%mm1 \n\t" | |
91 "movq %%mm0, (%0, %%esi) \n\t" | |
92 "movq %%mm1, 8(%0, %%esi) \n\t" | |
93 "addl $16, %%esi \n\t" | |
94 " jnz 1b \n\t" | |
95 "emms \n\t" | |
96 :: "r" (s16+512), "r" (f+256) | |
97 :"%esi", "memory" | |
98 ); | |
3909 | 99 return 2*256; |
100 } | |
101 | |
102 static int a52_resample_3F_to_5_MMX(float * _f, int16_t * s16){ | |
103 int i; | |
104 int32_t * f = (int32_t *) _f; | |
3654 | 105 asm volatile( |
106 "movl $-1024, %%esi \n\t" | |
107 "movq magicF2W, %%mm7 \n\t" | |
108 "pxor %%mm6, %%mm6 \n\t" | |
109 "movq %%mm7, %%mm5 \n\t" | |
110 "punpckldq %%mm6, %%mm5 \n\t" | |
111 "1: \n\t" | |
112 "movd (%1, %%esi), %%mm0 \n\t" | |
113 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
114 "movd 1024(%1, %%esi), %%mm1 \n\t" | |
115 "punpckldq 4(%1, %%esi), %%mm1 \n\t" | |
116 "movd 2052(%1, %%esi), %%mm2 \n\t" | |
117 "movq %%mm7, %%mm3 \n\t" | |
118 "punpckldq 1028(%1, %%esi), %%mm3\n\t" | |
119 "movd 8(%1, %%esi), %%mm4 \n\t" | |
120 "punpckldq 2056(%1, %%esi), %%mm4\n\t" | |
121 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
122 "sarl $1, %%edi \n\t" | |
123 "psubd %%mm7, %%mm0 \n\t" | |
124 "psubd %%mm7, %%mm1 \n\t" | |
125 "psubd %%mm5, %%mm2 \n\t" | |
126 "psubd %%mm7, %%mm3 \n\t" | |
127 "psubd %%mm7, %%mm4 \n\t" | |
128 "packssdw %%mm6, %%mm0 \n\t" | |
129 "packssdw %%mm2, %%mm1 \n\t" | |
130 "packssdw %%mm4, %%mm3 \n\t" | |
131 "movq %%mm0, (%0, %%edi) \n\t" | |
132 "movq %%mm1, 8(%0, %%edi) \n\t" | |
133 "movq %%mm3, 16(%0, %%edi) \n\t" | |
134 | |
135 "movd 1032(%1, %%esi), %%mm1 \n\t" | |
136 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
137 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
138 "movq %%mm7, %%mm3 \n\t" | |
139 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
140 "pxor %%mm0, %%mm0 \n\t" | |
141 "psubd %%mm7, %%mm1 \n\t" | |
142 "psubd %%mm5, %%mm2 \n\t" | |
143 "psubd %%mm7, %%mm3 \n\t" | |
144 "packssdw %%mm1, %%mm0 \n\t" | |
145 "packssdw %%mm3, %%mm2 \n\t" | |
146 "movq %%mm0, 24(%0, %%edi) \n\t" | |
147 "movq %%mm2, 32(%0, %%edi) \n\t" | |
148 | |
149 "addl $16, %%esi \n\t" | |
150 " jnz 1b \n\t" | |
151 "emms \n\t" | |
152 :: "r" (s16+1280), "r" (f+256) | |
153 :"%esi", "%edi", "memory" | |
154 ); | |
3909 | 155 return 5*256; |
156 } | |
157 | |
158 static int a52_resample_2F_2R_to_4_MMX(float * _f, int16_t * s16){ | |
159 int i; | |
160 int32_t * f = (int32_t *) _f; | |
3569 | 161 asm volatile( |
162 "movl $-1024, %%esi \n\t" | |
163 "movq magicF2W, %%mm7 \n\t" | |
164 "1: \n\t" | |
165 "movq (%1, %%esi), %%mm0 \n\t" | |
166 "movq 8(%1, %%esi), %%mm1 \n\t" | |
167 "movq 1024(%1, %%esi), %%mm2 \n\t" | |
168 "movq 1032(%1, %%esi), %%mm3 \n\t" | |
169 "psubd %%mm7, %%mm0 \n\t" | |
170 "psubd %%mm7, %%mm1 \n\t" | |
171 "psubd %%mm7, %%mm2 \n\t" | |
172 "psubd %%mm7, %%mm3 \n\t" | |
173 "packssdw %%mm1, %%mm0 \n\t" | |
174 "packssdw %%mm3, %%mm2 \n\t" | |
175 "movq 2048(%1, %%esi), %%mm3 \n\t" | |
176 "movq 2056(%1, %%esi), %%mm4 \n\t" | |
177 "movq 3072(%1, %%esi), %%mm5 \n\t" | |
178 "movq 3080(%1, %%esi), %%mm6 \n\t" | |
179 "psubd %%mm7, %%mm3 \n\t" | |
180 "psubd %%mm7, %%mm4 \n\t" | |
181 "psubd %%mm7, %%mm5 \n\t" | |
182 "psubd %%mm7, %%mm6 \n\t" | |
183 "packssdw %%mm4, %%mm3 \n\t" | |
184 "packssdw %%mm6, %%mm5 \n\t" | |
185 "movq %%mm0, %%mm1 \n\t" | |
186 "movq %%mm3, %%mm4 \n\t" | |
187 "punpcklwd %%mm2, %%mm0 \n\t" | |
188 "punpckhwd %%mm2, %%mm1 \n\t" | |
189 "punpcklwd %%mm5, %%mm3 \n\t" | |
190 "punpckhwd %%mm5, %%mm4 \n\t" | |
191 "movq %%mm0, %%mm2 \n\t" | |
192 "movq %%mm1, %%mm5 \n\t" | |
193 "punpckldq %%mm3, %%mm0 \n\t" | |
194 "punpckhdq %%mm3, %%mm2 \n\t" | |
195 "punpckldq %%mm4, %%mm1 \n\t" | |
196 "punpckhdq %%mm4, %%mm5 \n\t" | |
197 "movq %%mm0, (%0, %%esi,2) \n\t" | |
198 "movq %%mm2, 8(%0, %%esi,2) \n\t" | |
199 "movq %%mm1, 16(%0, %%esi,2) \n\t" | |
200 "movq %%mm5, 24(%0, %%esi,2) \n\t" | |
201 "addl $16, %%esi \n\t" | |
202 " jnz 1b \n\t" | |
203 "emms \n\t" | |
204 :: "r" (s16+1024), "r" (f+256) | |
205 :"%esi", "memory" | |
206 ); | |
3909 | 207 return 4*256; |
208 } | |
209 | |
210 static int a52_resample_3F_2R_to_5_MMX(float * _f, int16_t * s16){ | |
211 int i; | |
212 int32_t * f = (int32_t *) _f; | |
3653 | 213 asm volatile( |
214 "movl $-1024, %%esi \n\t" | |
215 "movq magicF2W, %%mm7 \n\t" | |
216 "1: \n\t" | |
217 "movd (%1, %%esi), %%mm0 \n\t" | |
218 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
219 "movd 3072(%1, %%esi), %%mm1 \n\t" | |
220 "punpckldq 4096(%1, %%esi), %%mm1\n\t" | |
221 "movd 1024(%1, %%esi), %%mm2 \n\t" | |
222 "punpckldq 4(%1, %%esi), %%mm2 \n\t" | |
223 "movd 2052(%1, %%esi), %%mm3 \n\t" | |
224 "punpckldq 3076(%1, %%esi), %%mm3\n\t" | |
225 "movd 4100(%1, %%esi), %%mm4 \n\t" | |
226 "punpckldq 1028(%1, %%esi), %%mm4\n\t" | |
227 "movd 8(%1, %%esi), %%mm5 \n\t" | |
228 "punpckldq 2056(%1, %%esi), %%mm5\n\t" | |
229 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
230 "sarl $1, %%edi \n\t" | |
231 "psubd %%mm7, %%mm0 \n\t" | |
232 "psubd %%mm7, %%mm1 \n\t" | |
233 "psubd %%mm7, %%mm2 \n\t" | |
234 "psubd %%mm7, %%mm3 \n\t" | |
235 "psubd %%mm7, %%mm4 \n\t" | |
236 "psubd %%mm7, %%mm5 \n\t" | |
237 "packssdw %%mm1, %%mm0 \n\t" | |
238 "packssdw %%mm3, %%mm2 \n\t" | |
239 "packssdw %%mm5, %%mm4 \n\t" | |
240 "movq %%mm0, (%0, %%edi) \n\t" | |
241 "movq %%mm2, 8(%0, %%edi) \n\t" | |
242 "movq %%mm4, 16(%0, %%edi) \n\t" | |
243 | |
244 "movd 3080(%1, %%esi), %%mm0 \n\t" | |
245 "punpckldq 4104(%1, %%esi), %%mm0\n\t" | |
246 "movd 1032(%1, %%esi), %%mm1 \n\t" | |
247 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
248 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
249 "punpckldq 3084(%1, %%esi), %%mm2\n\t" | |
250 "movd 4108(%1, %%esi), %%mm3 \n\t" | |
251 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
252 "psubd %%mm7, %%mm0 \n\t" | |
253 "psubd %%mm7, %%mm1 \n\t" | |
254 "psubd %%mm7, %%mm2 \n\t" | |
255 "psubd %%mm7, %%mm3 \n\t" | |
256 "packssdw %%mm1, %%mm0 \n\t" | |
257 "packssdw %%mm3, %%mm2 \n\t" | |
258 "movq %%mm0, 24(%0, %%edi) \n\t" | |
259 "movq %%mm2, 32(%0, %%edi) \n\t" | |
260 | |
261 "addl $16, %%esi \n\t" | |
262 " jnz 1b \n\t" | |
263 "emms \n\t" | |
264 :: "r" (s16+1280), "r" (f+256) | |
265 :"%esi", "%edi", "memory" | |
266 ); | |
3909 | 267 return 5*256; |
268 } | |
269 | |
270 static int a52_resample_MONO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
271 int i; | |
272 int32_t * f = (int32_t *) _f; | |
3569 | 273 asm volatile( |
274 "movl $-1024, %%esi \n\t" | |
275 "movq magicF2W, %%mm7 \n\t" | |
276 "pxor %%mm6, %%mm6 \n\t" | |
277 "1: \n\t" | |
278 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
279 "movq 1032(%1, %%esi), %%mm1 \n\t" | |
280 "movq (%1, %%esi), %%mm2 \n\t" | |
281 "movq 8(%1, %%esi), %%mm3 \n\t" | |
282 "psubd %%mm7, %%mm0 \n\t" | |
283 "psubd %%mm7, %%mm1 \n\t" | |
284 "psubd %%mm7, %%mm2 \n\t" | |
285 "psubd %%mm7, %%mm3 \n\t" | |
286 "packssdw %%mm1, %%mm0 \n\t" | |
287 "packssdw %%mm3, %%mm2 \n\t" | |
288 "movq %%mm0, %%mm1 \n\t" | |
289 "punpcklwd %%mm2, %%mm0 \n\t" | |
290 "punpckhwd %%mm2, %%mm1 \n\t" | |
291 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
292 "movq %%mm6, (%0, %%edi) \n\t" | |
293 "movd %%mm0, 8(%0, %%edi) \n\t" | |
294 "punpckhdq %%mm0, %%mm0 \n\t" | |
295 "movq %%mm6, 12(%0, %%edi) \n\t" | |
296 "movd %%mm0, 20(%0, %%edi) \n\t" | |
297 "movq %%mm6, 24(%0, %%edi) \n\t" | |
298 "movd %%mm1, 32(%0, %%edi) \n\t" | |
299 "punpckhdq %%mm1, %%mm1 \n\t" | |
300 "movq %%mm6, 36(%0, %%edi) \n\t" | |
301 "movd %%mm1, 44(%0, %%edi) \n\t" | |
302 "addl $16, %%esi \n\t" | |
303 " jnz 1b \n\t" | |
304 "emms \n\t" | |
305 :: "r" (s16+1536), "r" (f+256) | |
306 :"%esi", "%edi", "memory" | |
307 ); | |
3909 | 308 return 6*256; |
309 } | |
310 | |
311 static int a52_resample_STEREO_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
312 int i; | |
313 int32_t * f = (int32_t *) _f; | |
3576 | 314 asm volatile( |
315 "movl $-1024, %%esi \n\t" | |
316 "movq magicF2W, %%mm7 \n\t" | |
317 "pxor %%mm6, %%mm6 \n\t" | |
318 "1: \n\t" | |
319 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
320 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
321 "movq (%1, %%esi), %%mm5 \n\t" | |
322 "psubd %%mm7, %%mm0 \n\t" | |
323 "psubd %%mm7, %%mm1 \n\t" | |
324 "psubd %%mm7, %%mm5 \n\t" | |
325 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
326 | |
327 "pxor %%mm4, %%mm4 \n\t" | |
328 "packssdw %%mm5, %%mm0 \n\t" // FfAa | |
329 "packssdw %%mm4, %%mm1 \n\t" // 00Bb | |
330 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0 | |
331 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
332 "movq %%mm0, %%mm1 \n\t" // BAba | |
333 "punpckldq %%mm4, %%mm3 \n\t" // f0XX | |
334 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
335 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0 | |
336 | |
337 "movq %%mm0, (%0, %%edi) \n\t" // 00ba | |
338 "punpckhdq %%mm4, %%mm0 \n\t" // F000 | |
339 "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0 | |
340 "movq %%mm0, 16(%0, %%edi) \n\t" // F000 | |
341 "addl $8, %%esi \n\t" | |
342 " jnz 1b \n\t" | |
343 "emms \n\t" | |
344 :: "r" (s16+1536), "r" (f+256) | |
345 :"%esi", "%edi", "memory" | |
346 ); | |
3909 | 347 return 6*256; |
348 } | |
349 | |
350 static int a52_resample_3F_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
351 int i; | |
352 int32_t * f = (int32_t *) _f; | |
3578 | 353 asm volatile( |
354 "movl $-1024, %%esi \n\t" | |
355 "movq magicF2W, %%mm7 \n\t" | |
356 "pxor %%mm6, %%mm6 \n\t" | |
357 "1: \n\t" | |
358 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
359 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
360 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
361 "movq (%1, %%esi), %%mm5 \n\t" | |
362 "psubd %%mm7, %%mm0 \n\t" | |
363 "psubd %%mm7, %%mm1 \n\t" | |
364 "psubd %%mm7, %%mm4 \n\t" | |
365 "psubd %%mm7, %%mm5 \n\t" | |
366 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
367 | |
368 "packssdw %%mm4, %%mm0 \n\t" // EeAa | |
369 "packssdw %%mm5, %%mm1 \n\t" // FfBb | |
370 "movq %%mm0, %%mm2 \n\t" // EeAa | |
371 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
372 "punpckhwd %%mm1, %%mm2 \n\t" // FEfe | |
373 "movq %%mm0, %%mm1 \n\t" // BAba | |
374 "punpckldq %%mm6, %%mm0 \n\t" // 00ba | |
375 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
376 | |
377 "movq %%mm0, (%0, %%edi) \n\t" | |
378 "punpckhdq %%mm2, %%mm0 \n\t" // FE00 | |
379 "punpckldq %%mm1, %%mm2 \n\t" // BAfe | |
380 "movq %%mm2, 8(%0, %%edi) \n\t" | |
381 "movq %%mm0, 16(%0, %%edi) \n\t" | |
382 "addl $8, %%esi \n\t" | |
383 " jnz 1b \n\t" | |
384 "emms \n\t" | |
385 :: "r" (s16+1536), "r" (f+256) | |
386 :"%esi", "%edi", "memory" | |
387 ); | |
3909 | 388 return 6*256; |
389 } | |
390 | |
391 static int a52_resample_2F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
392 int i; | |
393 int32_t * f = (int32_t *) _f; | |
3577 | 394 asm volatile( |
395 "movl $-1024, %%esi \n\t" | |
396 "movq magicF2W, %%mm7 \n\t" | |
397 // "pxor %%mm6, %%mm6 \n\t" | |
398 "1: \n\t" | |
399 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
400 "movq 2048(%1, %%esi), %%mm1 \n\t" | |
401 "movq 3072(%1, %%esi), %%mm2 \n\t" | |
402 "movq 4096(%1, %%esi), %%mm3 \n\t" | |
403 "movq (%1, %%esi), %%mm5 \n\t" | |
404 "psubd %%mm7, %%mm0 \n\t" | |
405 "psubd %%mm7, %%mm1 \n\t" | |
406 "psubd %%mm7, %%mm2 \n\t" | |
407 "psubd %%mm7, %%mm3 \n\t" | |
408 "psubd %%mm7, %%mm5 \n\t" | |
409 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
410 | |
411 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
412 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
413 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
414 "movq %%mm0, %%mm2 \n\t" // CcAa | |
415 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
416 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
417 "pxor %%mm4, %%mm4 \n\t" // 0000 | |
418 "punpcklwd %%mm5, %%mm4 \n\t" // F0f0 | |
419 "movq %%mm0, %%mm1 \n\t" // BAba | |
420 "movq %%mm4, %%mm3 \n\t" // F0f0 | |
421 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
422 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
423 "punpckldq %%mm1, %%mm4 \n\t" // BAf0 | |
424 "punpckhdq %%mm3, %%mm2 \n\t" // F0DC | |
425 | |
426 "movq %%mm0, (%0, %%edi) \n\t" | |
427 "movq %%mm4, 8(%0, %%edi) \n\t" | |
428 "movq %%mm2, 16(%0, %%edi) \n\t" | |
429 "addl $8, %%esi \n\t" | |
430 " jnz 1b \n\t" | |
431 "emms \n\t" | |
432 :: "r" (s16+1536), "r" (f+256) | |
433 :"%esi", "%edi", "memory" | |
434 ); | |
3909 | 435 return 6*256; |
436 } | |
437 | |
438 static int a52_resample_3F_2R_LFE_to_6_MMX(float * _f, int16_t * s16){ | |
439 int i; | |
440 int32_t * f = (int32_t *) _f; | |
3575 | 441 asm volatile( |
442 "movl $-1024, %%esi \n\t" | |
443 "movq magicF2W, %%mm7 \n\t" | |
444 // "pxor %%mm6, %%mm6 \n\t" | |
445 "1: \n\t" | |
446 "movq 1024(%1, %%esi), %%mm0 \n\t" | |
447 "movq 3072(%1, %%esi), %%mm1 \n\t" | |
448 "movq 4096(%1, %%esi), %%mm2 \n\t" | |
449 "movq 5120(%1, %%esi), %%mm3 \n\t" | |
450 "movq 2048(%1, %%esi), %%mm4 \n\t" | |
451 "movq (%1, %%esi), %%mm5 \n\t" | |
452 "psubd %%mm7, %%mm0 \n\t" | |
453 "psubd %%mm7, %%mm1 \n\t" | |
454 "psubd %%mm7, %%mm2 \n\t" | |
455 "psubd %%mm7, %%mm3 \n\t" | |
456 "psubd %%mm7, %%mm4 \n\t" | |
457 "psubd %%mm7, %%mm5 \n\t" | |
458 "leal (%%esi, %%esi, 2), %%edi \n\t" | |
459 | |
460 "packssdw %%mm2, %%mm0 \n\t" // CcAa | |
461 "packssdw %%mm3, %%mm1 \n\t" // DdBb | |
462 "packssdw %%mm4, %%mm4 \n\t" // EeEe | |
463 "packssdw %%mm5, %%mm5 \n\t" // FfFf | |
464 "movq %%mm0, %%mm2 \n\t" // CcAa | |
465 "punpcklwd %%mm1, %%mm0 \n\t" // BAba | |
466 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc | |
467 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe | |
468 "movq %%mm0, %%mm1 \n\t" // BAba | |
469 "movq %%mm4, %%mm3 \n\t" // FEfe | |
470 "punpckldq %%mm2, %%mm0 \n\t" // dcba | |
471 "punpckhdq %%mm1, %%mm1 \n\t" // BABA | |
472 "punpckldq %%mm1, %%mm4 \n\t" // BAfe | |
473 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC | |
474 | |
475 "movq %%mm0, (%0, %%edi) \n\t" | |
476 "movq %%mm4, 8(%0, %%edi) \n\t" | |
477 "movq %%mm2, 16(%0, %%edi) \n\t" | |
478 "addl $8, %%esi \n\t" | |
479 " jnz 1b \n\t" | |
480 "emms \n\t" | |
481 :: "r" (s16+1536), "r" (f+256) | |
482 :"%esi", "%edi", "memory" | |
483 ); | |
3909 | 484 return 6*256; |
485 } | |
486 | |
487 | |
488 static void* a52_resample_MMX(int flags, int ch){ | |
489 switch (flags) { | |
490 case A52_MONO: | |
491 if(ch==5) return a52_resample_MONO_to_5_MMX; | |
492 break; | |
493 case A52_CHANNEL: | |
494 case A52_STEREO: | |
495 case A52_DOLBY: | |
496 if(ch==2) return a52_resample_STEREO_to_2_MMX; | |
497 break; | |
498 case A52_3F: | |
499 if(ch==5) return a52_resample_3F_to_5_MMX; | |
500 break; | |
501 case A52_2F2R: | |
502 if(ch==4) return a52_resample_2F_2R_to_4_MMX; | |
503 break; | |
504 case A52_3F2R: | |
505 if(ch==5) return a52_resample_3F_2R_to_5_MMX; | |
506 break; | |
507 case A52_MONO | A52_LFE: | |
508 if(ch==6) return a52_resample_MONO_LFE_to_6_MMX; | |
509 break; | |
510 case A52_CHANNEL | A52_LFE: | |
511 case A52_STEREO | A52_LFE: | |
512 case A52_DOLBY | A52_LFE: | |
513 if(ch==6) return a52_resample_STEREO_LFE_to_6_MMX; | |
514 break; | |
515 case A52_3F | A52_LFE: | |
516 if(ch==6) return a52_resample_3F_LFE_to_6_MMX; | |
517 break; | |
518 case A52_2F2R | A52_LFE: | |
519 if(ch==6) return a52_resample_2F_2R_LFE_to_6_MMX; | |
520 break; | |
521 case A52_3F2R | A52_LFE: | |
522 if(ch==6) return a52_resample_3F_2R_LFE_to_6_MMX; | |
3412
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
523 break; |
21d65a4ae3c9
resample.c added - float->int conversion and channel ordering
arpi
parents:
diff
changeset
|
524 } |
3909 | 525 return NULL; |
3626 | 526 } |
527 | |
3909 | 528 |