Mercurial > libavcodec.hg
diff x86/dsputil_mmx.c @ 10300:4d1b9ca628fc libavcodec
Drop unused args from vector_fmul_add_add, simpify code, and rename
The src3 and step arguments to vector_fmul_add_add() are always zero
and one, respectively. This removes these arguments from the function,
simplifies the code accordingly, and renames the function to better
match the new operation.
author | mru |
---|---|
date | Sun, 27 Sep 2009 16:51:54 +0000 |
parents | 3b61bc6ce377 |
children | 02798c603744 |
line wrap: on
line diff
--- a/x86/dsputil_mmx.c Sun Sep 27 09:43:11 2009 +0000 +++ b/x86/dsputil_mmx.c Sun Sep 27 16:51:54 2009 +0000 @@ -2125,34 +2125,9 @@ ); } -static void vector_fmul_add_add_3dnow(float *dst, const float *src0, const float *src1, - const float *src2, int src3, int len, int step){ +static void vector_fmul_add_3dnow(float *dst, const float *src0, const float *src1, + const float *src2, int len){ x86_reg i = (len-4)*4; - if(step == 2 && src3 == 0){ - dst += (len-4)*2; - __asm__ volatile( - "1: \n\t" - "movq (%2,%0), %%mm0 \n\t" - "movq 8(%2,%0), %%mm1 \n\t" - "pfmul (%3,%0), %%mm0 \n\t" - "pfmul 8(%3,%0), %%mm1 \n\t" - "pfadd (%4,%0), %%mm0 \n\t" - "pfadd 8(%4,%0), %%mm1 \n\t" - "movd %%mm0, (%1) \n\t" - "movd %%mm1, 16(%1) \n\t" - "psrlq $32, %%mm0 \n\t" - "psrlq $32, %%mm1 \n\t" - "movd %%mm0, 8(%1) \n\t" - "movd %%mm1, 24(%1) \n\t" - "sub $32, %1 \n\t" - "sub $16, %0 \n\t" - "jge 1b \n\t" - :"+r"(i), "+r"(dst) - :"r"(src0), "r"(src1), "r"(src2) - :"memory" - ); - } - else if(step == 1 && src3 == 0){ __asm__ volatile( "1: \n\t" "movq (%2,%0), %%mm0 \n\t" @@ -2169,47 +2144,11 @@ :"r"(dst), "r"(src0), "r"(src1), "r"(src2) :"memory" ); - } - else - ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step); __asm__ volatile("femms"); } -static void vector_fmul_add_add_sse(float *dst, const float *src0, const float *src1, - const float *src2, int src3, int len, int step){ +static void vector_fmul_add_sse(float *dst, const float *src0, const float *src1, + const float *src2, int len){ x86_reg i = (len-8)*4; - if(step == 2 && src3 == 0){ - dst += (len-8)*2; - __asm__ volatile( - "1: \n\t" - "movaps (%2,%0), %%xmm0 \n\t" - "movaps 16(%2,%0), %%xmm1 \n\t" - "mulps (%3,%0), %%xmm0 \n\t" - "mulps 16(%3,%0), %%xmm1 \n\t" - "addps (%4,%0), %%xmm0 \n\t" - "addps 16(%4,%0), %%xmm1 \n\t" - "movss %%xmm0, (%1) \n\t" - "movss %%xmm1, 32(%1) \n\t" - "movhlps %%xmm0, %%xmm2 \n\t" - "movhlps %%xmm1, %%xmm3 \n\t" - "movss %%xmm2, 16(%1) \n\t" - "movss %%xmm3, 48(%1) \n\t" - "shufps $0xb1, %%xmm0, %%xmm0 \n\t" - "shufps $0xb1, %%xmm1, %%xmm1 \n\t" - "movss %%xmm0, 8(%1) \n\t" - "movss %%xmm1, 40(%1) \n\t" - "movhlps %%xmm0, %%xmm2 \n\t" - "movhlps %%xmm1, %%xmm3 \n\t" - "movss %%xmm2, 24(%1) \n\t" - "movss %%xmm3, 56(%1) \n\t" - "sub $64, %1 \n\t" - "sub $32, %0 \n\t" - "jge 1b \n\t" - :"+r"(i), "+r"(dst) - :"r"(src0), "r"(src1), "r"(src2) - :"memory" - ); - } - else if(step == 1 && src3 == 0){ __asm__ volatile( "1: \n\t" "movaps (%2,%0), %%xmm0 \n\t" @@ -2226,9 +2165,6 @@ :"r"(dst), "r"(src0), "r"(src1), "r"(src2) :"memory" ); - } - else - ff_vector_fmul_add_add_c(dst, src0, src1, src2, src3, len, step); } static void vector_fmul_window_3dnow2(float *dst, const float *src0, const float *src1, @@ -3077,7 +3013,7 @@ c->ac3_downmix = ac3_downmix_sse; c->vector_fmul = vector_fmul_sse; c->vector_fmul_reverse = vector_fmul_reverse_sse; - c->vector_fmul_add_add = vector_fmul_add_add_sse; + c->vector_fmul_add = vector_fmul_add_sse; c->vector_fmul_window = vector_fmul_window_sse; c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse; c->vector_clipf = vector_clipf_sse; @@ -3085,7 +3021,7 @@ c->float_to_int16_interleave = float_to_int16_interleave_sse; } if(mm_flags & FF_MM_3DNOW) - c->vector_fmul_add_add = vector_fmul_add_add_3dnow; // faster than sse + c->vector_fmul_add = vector_fmul_add_3dnow; // faster than sse if(mm_flags & FF_MM_SSE2){ c->int32_to_float_fmul_scalar = int32_to_float_fmul_scalar_sse2; c->float_to_int16 = float_to_int16_sse2;