changeset 3624:0e4ff1cac6a2

sse opt of mix32to2()
author michael
date Wed, 19 Dec 2001 18:58:05 +0000
parents 3f1c2c06d0d8
children 84ff13d4540c
files liba52/downmix.c
diffstat 1 files changed, 22 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/liba52/downmix.c	Wed Dec 19 18:42:36 2001 +0000
+++ b/liba52/downmix.c	Wed Dec 19 18:58:05 2001 +0000
@@ -411,11 +411,33 @@
     int i;
     sample_t common;
 
+#ifdef HAVE_SSE
+	asm volatile(
+	"movlps %1, %%xmm7		\n\t"
+	"shufps $0x00, %%xmm7, %%xmm7	\n\t"
+	"movl $-1024, %%esi		\n\t"
+	"1:				\n\t"
+	"movaps 1024(%0, %%esi), %%xmm0	\n\t" 
+	"addps %%xmm7, %%xmm0		\n\t" // common
+	"movaps %%xmm0, %%xmm1		\n\t" // common
+	"addps (%0, %%esi), %%xmm0	\n\t" 
+	"addps 2048(%0, %%esi), %%xmm1	\n\t" 
+	"addps 3072(%0, %%esi), %%xmm0	\n\t" 
+	"addps 4096(%0, %%esi), %%xmm1	\n\t" 
+	"movaps %%xmm0, (%0, %%esi)	\n\t"
+	"movaps %%xmm1, 1024(%0, %%esi)	\n\t"
+	"addl $16, %%esi		\n\t"
+	" jnz 1b			\n\t"
+	:: "r" (samples+256), "m" (bias)
+	: "%esi"
+	);
+#else
     for (i = 0; i < 256; i++) {
 	common = samples[i + 256] + bias;
 	samples[i] += common + samples[i + 768];
 	samples[i + 256] = common + samples[i + 512] + samples[i + 1024];
     }
+#endif
 }
 
 static void mix32toS (sample_t * samples, sample_t bias)