Mercurial > libavcodec.hg
changeset 7217:726770da3234 libavcodec
dont use C-asm loops and unroll once float_to_int16_3dnow()
30% faster
author | michael |
---|---|
date | Mon, 07 Jul 2008 20:46:03 +0000 |
parents | 48b7054a507e |
children | 7f3d6509628b |
files | i386/dsputil_mmx.c |
diffstat | 1 files changed, 19 insertions(+), 12 deletions(-) [+] |
line wrap: on
line diff
--- a/i386/dsputil_mmx.c Mon Jul 07 08:29:08 2008 +0000 +++ b/i386/dsputil_mmx.c Mon Jul 07 20:46:03 2008 +0000 @@ -2024,18 +2024,25 @@ static void float_to_int16_3dnow(int16_t *dst, const float *src, int len){ // not bit-exact: pf2id uses different rounding than C and SSE - int i; - for(i=0; i<len; i+=4) { - asm volatile( - "pf2id %1, %%mm0 \n\t" - "pf2id %2, %%mm1 \n\t" - "packssdw %%mm1, %%mm0 \n\t" - "movq %%mm0, %0 \n\t" - :"=m"(dst[i]) - :"m"(src[i]), "m"(src[i+2]) - ); - } - asm volatile("femms"); + asm volatile( + "add %0 , %0 \n\t" + "lea (%2,%0,2) , %2 \n\t" + "add %0 , %1 \n\t" + "neg %0 \n\t" + "1: \n\t" + "pf2id (%2,%0,2) , %%mm0 \n\t" + "pf2id 8(%2,%0,2) , %%mm1 \n\t" + "pf2id 16(%2,%0,2) , %%mm2 \n\t" + "pf2id 24(%2,%0,2) , %%mm3 \n\t" + "packssdw %%mm1 , %%mm0 \n\t" + "packssdw %%mm3 , %%mm2 \n\t" + "movq %%mm0 , (%1,%0) \n\t" + "movq %%mm2 , 8(%1,%0) \n\t" + "add $16 , %0 \n\t" + " js 1b \n\t" + "femms \n\t" + :"+r"(len), "+r"(dst), "+r"(src) + ); } static void float_to_int16_sse(int16_t *dst, const float *src, int len){ int i;