changeset 3677:2a36c3d11dbd

mix5to1 & mix4to1 in SSE
author michael
date Sun, 23 Dec 2001 17:16:07 +0000
parents 45ecea1c23f3
children 265680bbdcfd
files liba52/downmix.c
diffstat 1 files changed, 41 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/liba52/downmix.c	Sun Dec 23 17:10:27 2001 +0000
+++ b/liba52/downmix.c	Sun Dec 23 17:16:07 2001 +0000
@@ -342,18 +342,59 @@
 {
     int i;
 
+#ifdef HAVE_SSE // FIXME untested
+	asm volatile(
+	"movlps %1, %%xmm7		\n\t"
+	"shufps $0x00, %%xmm7, %%xmm7	\n\t"
+	"movl $-1024, %%esi		\n\t"
+	"1:				\n\t"
+	"movaps (%0, %%esi), %%xmm0	\n\t" 
+	"movaps 1024(%0, %%esi), %%xmm1	\n\t" 
+	"addps 2048(%0, %%esi), %%xmm0	\n\t" 
+	"addps 3072(%0, %%esi), %%xmm1	\n\t" 
+	"addps %%xmm7, %%xmm0		\n\t"
+	"addps %%xmm1, %%xmm0		\n\t"
+	"movaps %%xmm0, (%0, %%esi)	\n\t"
+	"addl $16, %%esi		\n\t"
+	" jnz 1b			\n\t"
+	:: "r" (samples+256), "m" (bias)
+	: "%esi"
+	);
+#else
     for (i = 0; i < 256; i++)
 	samples[i] += (samples[i + 256] + samples[i + 512] +
 		       samples[i + 768] + bias);
+#endif
 }
 
 static void mix5to1 (sample_t * samples, sample_t bias)
 {
     int i;
 
+#ifdef HAVE_SSE
+	asm volatile(
+	"movlps %1, %%xmm7		\n\t"
+	"shufps $0x00, %%xmm7, %%xmm7	\n\t"
+	"movl $-1024, %%esi		\n\t"
+	"1:				\n\t"
+	"movaps (%0, %%esi), %%xmm0	\n\t" 
+	"movaps 1024(%0, %%esi), %%xmm1	\n\t" 
+	"addps 2048(%0, %%esi), %%xmm0	\n\t" 
+	"addps 3072(%0, %%esi), %%xmm1	\n\t" 
+	"addps %%xmm7, %%xmm0		\n\t"
+	"addps 4096(%0, %%esi), %%xmm1	\n\t" 
+	"addps %%xmm1, %%xmm0		\n\t"
+	"movaps %%xmm0, (%0, %%esi)	\n\t"
+	"addl $16, %%esi		\n\t"
+	" jnz 1b			\n\t"
+	:: "r" (samples+256), "m" (bias)
+	: "%esi"
+	);
+#else
     for (i = 0; i < 256; i++)
 	samples[i] += (samples[i + 256] + samples[i + 512] +
 		       samples[i + 768] + samples[i + 1024] + bias);
+#endif
 }
 
 static void mix3to2 (sample_t * samples, sample_t bias)