comparison liba52/resample_c.c @ 3575:01a2466e035e

mmx opt
author michael
date Tue, 18 Dec 2001 01:46:41 +0000
parents 8600f40003de
children c282fd9e8534
comparison
equal deleted inserted replaced
3574:8600f40003de 3575:01a2466e035e
292 s16[6*i+4] = 0; 292 s16[6*i+4] = 0;
293 s16[6*i+5] = convert (f[i]); 293 s16[6*i+5] = convert (f[i]);
294 } 294 }
295 break; 295 break;
296 case A52_3F2R | A52_LFE: 296 case A52_3F2R | A52_LFE:
297 #ifdef HAVE_MMX
298 asm volatile(
299 "movl $-1024, %%esi \n\t"
300 "movq magicF2W, %%mm7 \n\t"
301 // "pxor %%mm6, %%mm6 \n\t"
302 "1: \n\t"
303 "movq 1024(%1, %%esi), %%mm0 \n\t"
304 "movq 3072(%1, %%esi), %%mm1 \n\t"
305 "movq 4096(%1, %%esi), %%mm2 \n\t"
306 "movq 5120(%1, %%esi), %%mm3 \n\t"
307 "movq 2048(%1, %%esi), %%mm4 \n\t"
308 "movq (%1, %%esi), %%mm5 \n\t"
309 "psubd %%mm7, %%mm0 \n\t"
310 "psubd %%mm7, %%mm1 \n\t"
311 "psubd %%mm7, %%mm2 \n\t"
312 "psubd %%mm7, %%mm3 \n\t"
313 "psubd %%mm7, %%mm4 \n\t"
314 "psubd %%mm7, %%mm5 \n\t"
315 "leal (%%esi, %%esi, 2), %%edi \n\t"
316
317 "packssdw %%mm2, %%mm0 \n\t" // CcAa
318 "packssdw %%mm3, %%mm1 \n\t" // DdBb
319 "packssdw %%mm4, %%mm4 \n\t" // EeEe
320 "packssdw %%mm5, %%mm5 \n\t" // FfFf
321 "movq %%mm0, %%mm2 \n\t" // CcAa
322 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
323 "punpckhwd %%mm1, %%mm2 \n\t" // DCdc
324 "punpcklwd %%mm5, %%mm4 \n\t" // FEfe
325 "movq %%mm0, %%mm1 \n\t" // BAba
326 "movq %%mm4, %%mm3 \n\t" // FEfe
327 "punpckldq %%mm2, %%mm0 \n\t" // dcba
328 "punpckhdq %%mm1, %%mm1 \n\t" // BABA
329 "punpckldq %%mm1, %%mm4 \n\t" // BAfe
330 "punpckhdq %%mm3, %%mm2 \n\t" // FEDC
331
332 "movq %%mm0, (%0, %%edi) \n\t"
333 "movq %%mm4, 8(%0, %%edi) \n\t"
334 "movq %%mm2, 16(%0, %%edi) \n\t"
335 "addl $8, %%esi \n\t"
336 " jnz 1b \n\t"
337 "emms \n\t"
338 :: "r" (s16+1536), "r" (f+256)
339 :"%esi", "%edi", "memory"
340 );
341 #else
297 for (i = 0; i < 256; i++) { 342 for (i = 0; i < 256; i++) {
298 s16[6*i] = convert (f[i+256]); 343 s16[6*i] = convert (f[i+256]);
299 s16[6*i+1] = convert (f[i+768]); 344 s16[6*i+1] = convert (f[i+768]);
300 s16[6*i+2] = convert (f[i+1024]); 345 s16[6*i+2] = convert (f[i+1024]);
301 s16[6*i+3] = convert (f[i+1280]); 346 s16[6*i+3] = convert (f[i+1280]);
302 s16[6*i+4] = convert (f[i+512]); 347 s16[6*i+4] = convert (f[i+512]);
303 s16[6*i+5] = convert (f[i]); 348 s16[6*i+5] = convert (f[i]);
304 } 349 }
350 #endif
305 break; 351 break;
306 } 352 }
307 return chans*256; 353 return chans*256;
308 } 354 }
309 355