# HG changeset patch # User nick # Date 1011347923 0 # Node ID ef2abfbbd1df25b2a87c9691cdc4509b25fd8b1f # Parent 3a5f381c8c197e3c410b3958394795c3a492b1ca 3dnow optimization. Not all functions are tested!!! diff -r 3a5f381c8c19 -r ef2abfbbd1df liba52/downmix.c --- a/liba52/downmix.c Fri Jan 18 09:58:18 2002 +0000 +++ b/liba52/downmix.c Fri Jan 18 09:58:43 2002 +0000 @@ -41,6 +41,8 @@ static void downmix_SSE (sample_t * samples, int acmod, int output, sample_t bias, sample_t clev, sample_t slev); +static void downmix_3dnow (sample_t * samples, int acmod, int output, sample_t bias, + sample_t clev, sample_t slev); static void downmix_C (sample_t * samples, int acmod, int output, sample_t bias, sample_t clev, sample_t slev); static void upmix_MMX (sample_t * samples, int acmod, int output); @@ -53,6 +55,7 @@ #ifdef ARCH_X86 if(mm_accel & MM_ACCEL_X86_MMX) upmix= upmix_MMX; if(mm_accel & MM_ACCEL_X86_SSE) downmix= downmix_SSE; + if(mm_accel & MM_ACCEL_X86_3DNOW) downmix= downmix_3dnow; #endif } @@ -685,6 +688,7 @@ "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps (%0, %%esi), %%xmm0 \n\t" "movaps 16(%0, %%esi), %%xmm1 \n\t" @@ -707,6 +711,7 @@ "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps (%0, %%esi), %%xmm0 \n\t" "movaps 1024(%0, %%esi), %%xmm1 \n\t" @@ -727,6 +732,7 @@ "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps (%0, %%esi), %%xmm0 \n\t" "movaps 1024(%0, %%esi), %%xmm1 \n\t" @@ -748,6 +754,7 @@ "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps (%0, %%esi), %%xmm0 \n\t" "movaps 1024(%0, %%esi), %%xmm1 \n\t" @@ -770,6 +777,7 @@ "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps 1024(%0, %%esi), %%xmm0 \n\t" "addps %%xmm7, %%xmm0 \n\t" //common @@ -792,6 +800,7 @@ "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps 1024(%1, %%esi), %%xmm0 \n\t" "addps %%xmm7, %%xmm0 \n\t" //common @@ -814,6 +823,7 @@ "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps 2048(%0, %%esi), %%xmm0 \n\t" // surround "movaps (%0, %%esi), %%xmm1 \n\t" @@ -837,6 +847,7 @@ "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps 1024(%0, %%esi), %%xmm0 \n\t" "addps 3072(%0, %%esi), %%xmm0 \n\t" @@ -860,6 +871,7 @@ "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps 1024(%0, %%esi), %%xmm0 \n\t" "movaps 3072(%0, %%esi), %%xmm3 \n\t" // surround @@ -885,6 +897,7 @@ "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps 2048(%0, %%esi), %%xmm0 \n\t" "addps 3072(%0, %%esi), %%xmm0 \n\t" // surround @@ -909,6 +922,7 @@ "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps 1024(%0, %%esi), %%xmm0 \n\t" "addps %%xmm7, %%xmm0 \n\t" // common @@ -932,6 +946,7 @@ "movlps %1, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps 1024(%0, %%esi), %%xmm0 \n\t" "movaps 3072(%0, %%esi), %%xmm2 \n\t" @@ -958,6 +973,7 @@ "movlps %2, %%xmm7 \n\t" "shufps $0x00, %%xmm7, %%xmm7 \n\t" "movl $-1024, %%esi \n\t" + ".balign 16\n\t" "1: \n\t" "movaps (%0, %%esi), %%xmm0 \n\t" "movaps 16(%0, %%esi), %%xmm1 \n\t" @@ -979,6 +995,7 @@ asm volatile( "movl $-1024, %%esi \n\t" "pxor %%mm0, %%mm0 \n\t" + ".balign 16\n\t" "1: \n\t" "movq %%mm0, (%0, %%esi) \n\t" "movq %%mm0, 8(%0, %%esi) \n\t" @@ -992,6 +1009,38 @@ ); } +/* + I hope dest and src will be at least 8 byte aligned and size + will devide on 8 without remain + Note: untested and unused. +*/ +static void copy_MMX(void *dest,const void *src,unsigned size) +{ + unsigned i; + size /= 64; + for(i=0;i