comparison liba52/resample_c.c @ 3576:c282fd9e8534

mmx opt
author michael
date Tue, 18 Dec 2001 02:13:49 +0000
parents 01a2466e035e
children 6bf4dbfb941c
comparison
equal deleted inserted replaced
3575:01a2466e035e 3576:c282fd9e8534
265 #endif 265 #endif
266 break; 266 break;
267 case A52_CHANNEL | A52_LFE: 267 case A52_CHANNEL | A52_LFE:
268 case A52_STEREO | A52_LFE: 268 case A52_STEREO | A52_LFE:
269 case A52_DOLBY | A52_LFE: 269 case A52_DOLBY | A52_LFE:
270 #ifdef HAVE_MMX
271 asm volatile(
272 "movl $-1024, %%esi \n\t"
273 "movq magicF2W, %%mm7 \n\t"
274 "pxor %%mm6, %%mm6 \n\t"
275 "1: \n\t"
276 "movq 1024(%1, %%esi), %%mm0 \n\t"
277 "movq 2048(%1, %%esi), %%mm1 \n\t"
278 "movq (%1, %%esi), %%mm5 \n\t"
279 "psubd %%mm7, %%mm0 \n\t"
280 "psubd %%mm7, %%mm1 \n\t"
281 "psubd %%mm7, %%mm5 \n\t"
282 "leal (%%esi, %%esi, 2), %%edi \n\t"
283
284 "pxor %%mm4, %%mm4 \n\t"
285 "packssdw %%mm5, %%mm0 \n\t" // FfAa
286 "packssdw %%mm4, %%mm1 \n\t" // 00Bb
287 "punpckhwd %%mm0, %%mm4 \n\t" // F0f0
288 "punpcklwd %%mm1, %%mm0 \n\t" // BAba
289 "movq %%mm0, %%mm1 \n\t" // BAba
290 "punpckldq %%mm4, %%mm3 \n\t" // f0XX
291 "punpckldq %%mm6, %%mm0 \n\t" // 00ba
292 "punpckhdq %%mm1, %%mm3 \n\t" // BAf0
293
294 "movq %%mm0, (%0, %%edi) \n\t" // 00ba
295 "punpckhdq %%mm4, %%mm0 \n\t" // F000
296 "movq %%mm3, 8(%0, %%edi) \n\t" // BAf0
297 "movq %%mm0, 16(%0, %%edi) \n\t" // F000
298 "addl $8, %%esi \n\t"
299 " jnz 1b \n\t"
300 "emms \n\t"
301 :: "r" (s16+1536), "r" (f+256)
302 :"%esi", "%edi", "memory"
303 );
304 #else
270 for (i = 0; i < 256; i++) { 305 for (i = 0; i < 256; i++) {
271 s16[6*i] = convert (f[i+256]); 306 s16[6*i] = convert (f[i+256]);
272 s16[6*i+1] = convert (f[i+512]); 307 s16[6*i+1] = convert (f[i+512]);
273 s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0; 308 s16[6*i+2] = s16[6*i+3] = s16[6*i+4] = 0;
274 s16[6*i+5] = convert (f[i]); 309 s16[6*i+5] = convert (f[i]);
275 } 310 }
311 #endif
276 break; 312 break;
277 case A52_3F | A52_LFE: 313 case A52_3F | A52_LFE:
278 for (i = 0; i < 256; i++) { 314 for (i = 0; i < 256; i++) {
279 s16[6*i] = convert (f[i+256]); 315 s16[6*i] = convert (f[i+256]);
280 s16[6*i+1] = convert (f[i+768]); 316 s16[6*i+1] = convert (f[i+768]);