comparison libswscale/rgb2rgb_template.c @ 22995:70d7c6206f33

skip MMX code in rgb32to15 if the size of the input is smaller than the size of the units the MMX code processes
author ivo
date Wed, 18 Apr 2007 09:24:49 +0000
parents ac77d9ef8c83
children 2a60af5e78a7
comparison
equal deleted inserted replaced
22994:ac77d9ef8c83 22995:70d7c6206f33
511 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster) 511 #if 1 //is faster only if multiplies are reasonable fast (FIXME figure out on which cpus this is faster, on Athlon its slightly faster)
512 asm volatile( 512 asm volatile(
513 "movq %3, %%mm5 \n\t" 513 "movq %3, %%mm5 \n\t"
514 "movq %4, %%mm6 \n\t" 514 "movq %4, %%mm6 \n\t"
515 "movq %5, %%mm7 \n\t" 515 "movq %5, %%mm7 \n\t"
516 "jmp 2f \n\t"
516 ASMALIGN(4) 517 ASMALIGN(4)
517 "1: \n\t" 518 "1: \n\t"
518 PREFETCH" 32(%1) \n\t" 519 PREFETCH" 32(%1) \n\t"
519 "movd (%1), %%mm0 \n\t" 520 "movd (%1), %%mm0 \n\t"
520 "movd 4(%1), %%mm3 \n\t" 521 "movd 4(%1), %%mm3 \n\t"
534 "pslld $10, %%mm3 \n\t" 535 "pslld $10, %%mm3 \n\t"
535 "por %%mm3, %%mm0 \n\t" 536 "por %%mm3, %%mm0 \n\t"
536 MOVNTQ" %%mm0, (%0) \n\t" 537 MOVNTQ" %%mm0, (%0) \n\t"
537 "add $16, %1 \n\t" 538 "add $16, %1 \n\t"
538 "add $8, %0 \n\t" 539 "add $8, %0 \n\t"
540 "2: \n\t"
539 "cmp %2, %1 \n\t" 541 "cmp %2, %1 \n\t"
540 " jb 1b \n\t" 542 " jb 1b \n\t"
541 : "+r" (d), "+r"(s) 543 : "+r" (d), "+r"(s)
542 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215) 544 : "r" (mm_end), "m" (mask3215g), "m" (mask3216br), "m" (mul3215)
543 ); 545 );