changeset 3625:84ff13d4540c

sse opt of mix32toS()
author michael
date Wed, 19 Dec 2001 19:35:08 +0000
parents 0e4ff1cac6a2
children e22ff7ebdc05
files liba52/downmix.c
diffstat 1 files changed, 27 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- a/liba52/downmix.c	Wed Dec 19 18:58:05 2001 +0000
+++ b/liba52/downmix.c	Wed Dec 19 19:35:08 2001 +0000
@@ -19,6 +19,8 @@
  * You should have received a copy of the GNU General Public License
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+ *
+ * SSE optimizations from Michael Niedermayer (michaelni@gmx.at)
  */
 
 #include "config.h"
@@ -445,12 +447,37 @@
     int i;
     sample_t common, surround;
 
+#ifdef HAVE_SSE
+	asm volatile(
+	"movlps %1, %%xmm7		\n\t"
+	"shufps $0x00, %%xmm7, %%xmm7	\n\t"
+	"movl $-1024, %%esi		\n\t"
+	"1:				\n\t"
+	"movaps 1024(%0, %%esi), %%xmm0	\n\t" 
+	"movaps 3072(%0, %%esi), %%xmm2	\n\t" 
+	"addps %%xmm7, %%xmm0		\n\t" // common
+	"addps 4096(%0, %%esi), %%xmm2	\n\t" // surround	
+	"movaps (%0, %%esi), %%xmm1	\n\t" 
+	"movaps 2048(%0, %%esi), %%xmm3	\n\t" 
+	"subps %%xmm2, %%xmm1		\n\t"	
+	"addps %%xmm2, %%xmm3		\n\t"	
+	"addps %%xmm0, %%xmm1		\n\t"	
+	"addps %%xmm0, %%xmm3		\n\t"	
+	"movaps %%xmm1, (%0, %%esi)	\n\t"
+	"movaps %%xmm3, 1024(%0, %%esi)	\n\t"
+	"addl $16, %%esi		\n\t"
+	" jnz 1b			\n\t"
+	:: "r" (samples+256), "m" (bias)
+	: "%esi"
+	);
+#else
     for (i = 0; i < 256; i++) {
 	common = samples[i + 256] + bias;
 	surround = samples[i + 768] + samples[i + 1024];
 	samples[i] += common - surround;
 	samples[i + 256] = samples[i + 512] + common + surround;
     }
+#endif
 }
 
 static void move2to1 (sample_t * src, sample_t * dest, sample_t bias)