Mercurial > mplayer.hg
comparison liba52/resample_c.c @ 3654:33c3cff374a1
sse optimize of 3F
bugfix in 3F2R
author | michael |
---|---|
date | Sat, 22 Dec 2001 00:55:58 +0000 |
parents | b11b15df02ed |
children | 6312aa265429 |
comparison
equal
deleted
inserted
replaced
3653:b11b15df02ed | 3654:33c3cff374a1 |
---|---|
224 "emms \n\t" | 224 "emms \n\t" |
225 :: "r" (s16+512), "r" (f+256) | 225 :: "r" (s16+512), "r" (f+256) |
226 :"%esi", "memory" | 226 :"%esi", "memory" |
227 ); | 227 ); |
228 break; | 228 break; |
229 case A52_3F: //FIXME Optimize | 229 case A52_3F: |
230 for (i = 0; i < 256; i++) { | 230 asm volatile( |
231 s16[5*i] = convert (f[i]); | 231 "movl $-1024, %%esi \n\t" |
232 s16[5*i+1] = convert (f[i+512]); | 232 "movq magicF2W, %%mm7 \n\t" |
233 s16[5*i+2] = s16[5*i+3] = 0; | 233 "pxor %%mm6, %%mm6 \n\t" |
234 s16[5*i+4] = convert (f[i+256]); | 234 "movq %%mm7, %%mm5 \n\t" |
235 } | 235 "punpckldq %%mm6, %%mm5 \n\t" |
236 "1: \n\t" | |
237 "movd (%1, %%esi), %%mm0 \n\t" | |
238 "punpckldq 2048(%1, %%esi), %%mm0\n\t" | |
239 "movd 1024(%1, %%esi), %%mm1 \n\t" | |
240 "punpckldq 4(%1, %%esi), %%mm1 \n\t" | |
241 "movd 2052(%1, %%esi), %%mm2 \n\t" | |
242 "movq %%mm7, %%mm3 \n\t" | |
243 "punpckldq 1028(%1, %%esi), %%mm3\n\t" | |
244 "movd 8(%1, %%esi), %%mm4 \n\t" | |
245 "punpckldq 2056(%1, %%esi), %%mm4\n\t" | |
246 "leal (%%esi, %%esi, 4), %%edi \n\t" | |
247 "sarl $1, %%edi \n\t" | |
248 "psubd %%mm7, %%mm0 \n\t" | |
249 "psubd %%mm7, %%mm1 \n\t" | |
250 "psubd %%mm5, %%mm2 \n\t" | |
251 "psubd %%mm7, %%mm3 \n\t" | |
252 "psubd %%mm7, %%mm4 \n\t" | |
253 "packssdw %%mm6, %%mm0 \n\t" | |
254 "packssdw %%mm2, %%mm1 \n\t" | |
255 "packssdw %%mm4, %%mm3 \n\t" | |
256 "movq %%mm0, (%0, %%edi) \n\t" | |
257 "movq %%mm1, 8(%0, %%edi) \n\t" | |
258 "movq %%mm3, 16(%0, %%edi) \n\t" | |
259 | |
260 "movd 1032(%1, %%esi), %%mm1 \n\t" | |
261 "punpckldq 12(%1, %%esi), %%mm1\n\t" | |
262 "movd 2060(%1, %%esi), %%mm2 \n\t" | |
263 "movq %%mm7, %%mm3 \n\t" | |
264 "punpckldq 1036(%1, %%esi), %%mm3\n\t" | |
265 "pxor %%mm0, %%mm0 \n\t" | |
266 "psubd %%mm7, %%mm1 \n\t" | |
267 "psubd %%mm5, %%mm2 \n\t" | |
268 "psubd %%mm7, %%mm3 \n\t" | |
269 "packssdw %%mm1, %%mm0 \n\t" | |
270 "packssdw %%mm3, %%mm2 \n\t" | |
271 "movq %%mm0, 24(%0, %%edi) \n\t" | |
272 "movq %%mm2, 32(%0, %%edi) \n\t" | |
273 | |
274 "addl $16, %%esi \n\t" | |
275 " jnz 1b \n\t" | |
276 "emms \n\t" | |
277 :: "r" (s16+1280), "r" (f+256) | |
278 :"%esi", "%edi", "memory" | |
279 ); | |
236 break; | 280 break; |
237 case A52_2F2R: | 281 case A52_2F2R: |
238 asm volatile( | 282 asm volatile( |
239 "movl $-1024, %%esi \n\t" | 283 "movl $-1024, %%esi \n\t" |
240 "movq magicF2W, %%mm7 \n\t" | 284 "movq magicF2W, %%mm7 \n\t" |
326 "psubd %%mm7, %%mm1 \n\t" | 370 "psubd %%mm7, %%mm1 \n\t" |
327 "psubd %%mm7, %%mm2 \n\t" | 371 "psubd %%mm7, %%mm2 \n\t" |
328 "psubd %%mm7, %%mm3 \n\t" | 372 "psubd %%mm7, %%mm3 \n\t" |
329 "packssdw %%mm1, %%mm0 \n\t" | 373 "packssdw %%mm1, %%mm0 \n\t" |
330 "packssdw %%mm3, %%mm2 \n\t" | 374 "packssdw %%mm3, %%mm2 \n\t" |
331 "packssdw %%mm5, %%mm4 \n\t" | |
332 "movq %%mm0, 24(%0, %%edi) \n\t" | 375 "movq %%mm0, 24(%0, %%edi) \n\t" |
333 "movq %%mm2, 32(%0, %%edi) \n\t" | 376 "movq %%mm2, 32(%0, %%edi) \n\t" |
334 "movq %%mm4, 40(%0, %%edi) \n\t" | |
335 | 377 |
336 "addl $16, %%esi \n\t" | 378 "addl $16, %%esi \n\t" |
337 " jnz 1b \n\t" | 379 " jnz 1b \n\t" |
338 "emms \n\t" | 380 "emms \n\t" |
339 :: "r" (s16+1280), "r" (f+256) | 381 :: "r" (s16+1280), "r" (f+256) |