changeset 11241:c8b84bb55089

removed obsoleted (or never ending) code
author alex
date Wed, 22 Oct 2003 21:08:46 +0000
parents 5403367c7032
children 34770e3d9654
files mp3lib/Makefile mp3lib/dct64_sse.s mp3lib/mp3lib_objfix.sh mp3lib/sr1.c
diffstat 4 files changed, 1 insertions(+), 2255 deletions(-) [+]
line wrap: on
line diff
--- a/mp3lib/Makefile	Wed Oct 22 20:58:46 2003 +0000
+++ b/mp3lib/Makefile	Wed Oct 22 21:08:46 2003 +0000
@@ -13,10 +13,6 @@
 OBJS += decode_i586.o
 SRCS += decode_MMX.c dct64_MMX.c tabinit_MMX.c
 OBJS += decode_MMX.o dct64_MMX.o tabinit_MMX.o
-#ifeq ($(TARGET_SSE),yes)
-#SRCS += dct64_sse.s
-#OBJS += dct64_sse.o
-#endif
 SRCS += dct36_3dnow.c dct64_3dnow.c
 OBJS += dct36_3dnow.o dct64_3dnow.o
 SRCS += dct36_k7.c dct64_k7.c
@@ -67,7 +63,7 @@
 	$(CC) $(CFLAGS) test2.c ../libvo/aclib.c -o test2 -I.. ./libMP3.a ../mp_msg-mencoder.o ../cpudetect.o -lm
 
 clean:
-	rm -f *~ *.o *.a
+	rm -f *~ *.o *.a test1 test2
 
 distclean:
 	rm -f *~ *.o *.a Makefile.bak .depend
--- a/mp3lib/dct64_sse.s	Wed Oct 22 20:58:46 2003 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,2221 +0,0 @@
-/ This code is a translation of dct64_k7.s from MPlayer.
-/ Coded by Felix Buenemann <atmosfear at users.sourceforge.net>
-/
-/ TODO: - fix phases 4 and 5 (sse)
-/       - optimize scalar FPU code? (interleave with sse code)
-/       - fix alignment (prohibits finishing this code)
-/       - then use faster insns for aligned data
-/
-/ Note: currently code is disabled as I couldn't get input data aligned!
-/
-
-//.data
-//	.align 8
-//x_plus_minus_3dnow: .long 0x00000000, 0x80000000
-//plus_1f: .float 1.0
-
-.text
-
-	.align 16
-
-	.global dct64_MMX_sse
-
-dct64_MMX_sse:
-	pushl %ebx
-	pushl %esi
-	pushl %edi
-	subl $256,%esp
-	movl 280(%esp),%eax
-
-	leal 128(%esp),%edx
-	movl 272(%esp),%esi
-	movl 276(%esp),%edi
-	movl $costab_mmx,%ebx
-	orl %ecx,%ecx
-	movl %esp,%ecx
-
-/* Phase 1 (complete, worx) */
-
-// [1] Process Block A1 (16 Bytes)
-/	movq	(%eax), %mm0
-/	movq	8(%eax), %mm4
-	movups	(%eax), %xmm0
-
-// Copy A1 to another register A2
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-// Process Block B1 (last 16 bytes)
-/	movq	120(%eax), %mm1
-/	movq	112(%eax), %mm5
-	movups	112(%eax), %xmm1
-
-/* The PSWAPD instruction swaps or reverses the upper and lower
- * doublewords of the source operand.  PSWAPD mmreg1, mmreg2
- * performs the following operations:
- * temp = mmreg2
- * mmreg1[63:32] = temp[31:0 ]
- * mmreg1[31:0 ] = temp[63:32]
- */
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-// shufps here exchanges a,b,c,d to b,a,d,c in xmm1 (desc ia32-ref p.752)
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-// Add B1 to A1
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-// Save Block A1 
-/	movq	%mm0, (%edx)
-/	movq	%mm4, 8(%edx)
-	movups	%xmm0, (%edx)
-
-// Sub B1 from A2
-/	pfsub	%mm1, %mm3
-/	pfsub	%mm5, %mm7
-	subps	%xmm1, %xmm2
-
-// Mul mem with A2
-/	pfmul	(%ebx), %mm3
-/	pfmul	8(%ebx), %mm7
-	movups	(%ebx), %xmm7
-	mulps	%xmm7, %xmm2
-
-// Shuffle A2
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-// I do a,b,c,d -> d,c,b,a to suit order when writing to mem (saves one shufps)
-	shufps	$27, %xmm2, %xmm2
-
-// Save A2 to mem (end)
-/	movq	%mm3, 120(%edx)
-/	movq	%mm7, 112(%edx)
-	movups	%xmm2, 112(%edx)
-
-// [2] Process next data block
-/	movq	16(%eax), %mm0
-/	movq	24(%eax), %mm4
-	movups	16(%eax), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	104(%eax), %mm1
-/	movq	96(%eax), %mm5
-	movups	96(%eax), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 16(%edx)
-/	movq	%mm4, 24(%edx)
-	movups	%xmm0, 16(%edx)
-
-/	pfsub	%mm1, %mm3
-/	pfsub	%mm5, %mm7
-	subps	%xmm1, %xmm2
-
-/	pfmul	16(%ebx), %mm3
-/	pfmul	24(%ebx), %mm7
-	movups	16(%ebx), %xmm7
-	mulps	%xmm7, %xmm2
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps $27, %xmm2, %xmm2
-
-/	movq	%mm3, 104(%edx)
-/	movq	%mm7, 96(%edx)
-	movups	%xmm2, 96(%edx)
-
-// [3]
-/	movq	32(%eax), %mm0
-/	movq	40(%eax), %mm4
-	movups	32(%eax), %xmm0
-	
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	88(%eax), %mm1
-/	movq	80(%eax), %mm5
-	movups	80(%eax), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 32(%edx)
-/	movq	%mm4, 40(%edx)
-	movups	%xmm0, 32(%edx)
-
-/	pfsub	%mm1, %mm3
-/	pfsub	%mm5, %mm7
-	subps	%xmm1, %xmm2
-
-/	pfmul	32(%ebx), %mm3
-/	pfmul	40(%ebx), %mm7
-	movups	32(%ebx), %xmm7
-	mulps	%xmm7, %xmm2
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$27, %xmm2, %xmm2
-
-/	movq	%mm3, 88(%edx)
-/	movq	%mm7, 80(%edx)
-	movups	%xmm2, 80(%edx)
-
-// [4]
-/	movq	48(%eax), %mm0
-/	movq	56(%eax), %mm4
-	movups	48(%eax), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	72(%eax), %mm1
-/	movq	64(%eax), %mm5
-	movups	64(%eax), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 48(%edx)
-/	movq	%mm4, 56(%edx)
-	movups	%xmm0, 48(%edx)
-
-/	pfsub	%mm1, %mm3
-/	pfsub	%mm5, %mm7
-	subps	%xmm1, %xmm2
-
-/	pfmul	48(%ebx), %mm3
-/	pfmul	56(%ebx), %mm7
-	movups	48(%ebx), %xmm7
-	mulps	%xmm7, %xmm2
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$27, %xmm2, %xmm2
-
-/	movq	%mm3, 72(%edx)
-/	movq	%mm7, 64(%edx)
-	movups	%xmm2, 64(%edx)
-
-
-// phase 1 fpu code
-/* Phase 1*/
-/*
-	flds     (%eax)
-	leal 128(%esp),%edx
-	fadds 124(%eax)
-	movl 272(%esp),%esi
-	fstps    (%edx)
-	movl 276(%esp),%edi
-
-	flds    4(%eax)
-	movl $costab_mmx,%ebx
-	fadds 120(%eax)
-	orl %ecx,%ecx
-	fstps   4(%edx)
-
-	flds     (%eax)
-	movl %esp,%ecx
-	fsubs 124(%eax)
-	fmuls    (%ebx)
-	fstps 124(%edx)
-
-	flds    4(%eax)
-	fsubs 120(%eax)
-	fmuls   4(%ebx)
-	fstps 120(%edx)
-
-	flds    8(%eax)
-	fadds 116(%eax)
-	fstps   8(%edx)
-
-	flds   12(%eax)
-	fadds 112(%eax)
-	fstps  12(%edx)
-
-	flds    8(%eax)
-	fsubs 116(%eax)
-	fmuls   8(%ebx)
-	fstps 116(%edx)
-
-	flds   12(%eax)
-	fsubs 112(%eax)
-	fmuls  12(%ebx)
-	fstps 112(%edx)
-
-	flds   16(%eax)
-	fadds 108(%eax)
-	fstps  16(%edx)
-
-	flds   20(%eax)
-	fadds 104(%eax)
-	fstps  20(%edx)
-
-	flds   16(%eax)
-	fsubs 108(%eax)
-	fmuls  16(%ebx)
-	fstps 108(%edx)
-
-	flds   20(%eax)
-	fsubs 104(%eax)
-	fmuls  20(%ebx)
-	fstps 104(%edx)
-
-	flds   24(%eax)
-	fadds 100(%eax)
-	fstps  24(%edx)
-
-	flds   28(%eax)
-	fadds  96(%eax)
-	fstps  28(%edx)
-
-	flds   24(%eax)
-	fsubs 100(%eax)
-	fmuls  24(%ebx)
-	fstps 100(%edx)
-
-	flds   28(%eax)
-	fsubs  96(%eax)
-	fmuls  28(%ebx)
-	fstps  96(%edx)
-
-	flds   32(%eax)
-	fadds  92(%eax)
-	fstps  32(%edx)
-
-	flds   36(%eax)
-	fadds  88(%eax)
-	fstps  36(%edx)
-
-	flds   32(%eax)
-	fsubs  92(%eax)
-	fmuls  32(%ebx)
-	fstps  92(%edx)
-
-	flds   36(%eax)
-	fsubs  88(%eax)
-	fmuls  36(%ebx)
-	fstps  88(%edx)
-
-	flds   40(%eax)
-	fadds  84(%eax)
-	fstps  40(%edx)
-
-	flds   44(%eax)
-	fadds  80(%eax)
-	fstps  44(%edx)
-
-	flds   40(%eax)
-	fsubs  84(%eax)
-	fmuls  40(%ebx)
-	fstps  84(%edx)
-
-	flds   44(%eax)
-	fsubs  80(%eax)
-	fmuls  44(%ebx)
-	fstps  80(%edx)
-
-	flds   48(%eax)
-	fadds  76(%eax)
-	fstps  48(%edx)
-
-	flds   52(%eax)
-	fadds  72(%eax)
-	fstps  52(%edx)
-
-	flds   48(%eax)
-	fsubs  76(%eax)
-	fmuls  48(%ebx)
-	fstps  76(%edx)
-
-	flds   52(%eax)
-	fsubs  72(%eax)
-	fmuls  52(%ebx)
-	fstps  72(%edx)
-
-	flds   56(%eax)
-	fadds  68(%eax)
-	fstps  56(%edx)
-
-	flds   60(%eax)
-	fadds  64(%eax)
-	fstps  60(%edx)
-
-	flds   56(%eax)
-	fsubs  68(%eax)
-	fmuls  56(%ebx)
-	fstps  68(%edx)
-
-	flds   60(%eax)
-	fsubs  64(%eax)
-	fmuls  60(%ebx)
-	fstps  64(%edx)
-*/	
-// end phase 1 fpu code
-
-/* Phase 2 (completed, worx) */
-
-/	movq	(%edx), %mm0
-/	movq	8(%edx), %mm4
-	movups	(%edx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	56(%edx), %mm1
-/	movq	48(%edx), %mm5
-	movups	48(%edx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, (%ecx)
-/	movq	%mm4, 8(%ecx)
-	movups	%xmm0, (%ecx)
-
-/	pfsub	%mm1, %mm3
-/	pfsub	%mm5, %mm7
-	subps	%xmm1, %xmm2
-
-/	pfmul	64(%ebx), %mm3
-/	pfmul	72(%ebx), %mm7
-	movups	64(%ebx), %xmm7
-	mulps	%xmm7, %xmm2
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$27, %xmm2, %xmm2
-
-/	movq	%mm3, 56(%ecx)
-/	movq	%mm7, 48(%ecx)
-	movups	%xmm2, 48(%ecx)
-	
-/	movq	16(%edx), %mm0
-/	movq	24(%edx), %mm4
-	movups	16(%edx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	40(%edx), %mm1
-/	movq	32(%edx), %mm5
-	movups	32(%edx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 16(%ecx)
-/	movq	%mm4, 24(%ecx)
-	movups	%xmm0, 16(%ecx)
-
-/	pfsub	%mm1, %mm3
-/	pfsub	%mm5, %mm7
-	subps	%xmm1, %xmm2
-
-/	pfmul	80(%ebx), %mm3
-/	pfmul	88(%ebx), %mm7
-	movups	80(%ebx), %xmm7
-	mulps	%xmm7, %xmm2
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$27, %xmm2, %xmm2
-
-/	movq	%mm3, 40(%ecx)
-/	movq	%mm7, 32(%ecx)
-	movups	%xmm2, 32(%ecx)
-
-
-// phase 2 fpu
-/* Phase 2*/
-/*
-	flds     (%edx)
-	fadds  60(%edx)
-	fstps    (%ecx)
-
-	flds    4(%edx)
-	fadds  56(%edx)
-	fstps   4(%ecx)
-
-	flds     (%edx)
-	fsubs  60(%edx)
-	fmuls  64(%ebx)
-	fstps  60(%ecx)
-
-	flds    4(%edx)
-	fsubs  56(%edx)
-	fmuls  68(%ebx)
-	fstps  56(%ecx)
-
-	flds    8(%edx)
-	fadds  52(%edx)
-	fstps   8(%ecx)
-
-	flds   12(%edx)
-	fadds  48(%edx)
-	fstps  12(%ecx)
-
-	flds    8(%edx)
-	fsubs  52(%edx)
-	fmuls  72(%ebx)
-	fstps  52(%ecx)
-
-	flds   12(%edx)
-	fsubs  48(%edx)
-	fmuls  76(%ebx)
-	fstps  48(%ecx)
-
-	flds   16(%edx)
-	fadds  44(%edx)
-	fstps  16(%ecx)
-
-	flds   20(%edx)
-	fadds  40(%edx)
-	fstps  20(%ecx)
-
-	flds   16(%edx)
-	fsubs  44(%edx)
-	fmuls  80(%ebx)
-	fstps  44(%ecx)
-
-	flds   20(%edx)
-	fsubs  40(%edx)
-	fmuls  84(%ebx)
-	fstps  40(%ecx)
-
-	flds   24(%edx)
-	fadds  36(%edx)
-	fstps  24(%ecx)
-
-	flds   28(%edx)
-	fadds  32(%edx)
-	fstps  28(%ecx)
-
-	flds   24(%edx)
-	fsubs  36(%edx)
-	fmuls  88(%ebx)
-	fstps  36(%ecx)
-
-	flds   28(%edx)
-	fsubs  32(%edx)
-	fmuls  92(%ebx)
-	fstps  32(%ecx)
-*/	
-// end phase 2 fpu
-
-/* Phase 3 (completed, working) */
-
-/	movq	64(%edx), %mm0
-/	movq	72(%edx), %mm4
-	movups	64(%edx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	120(%edx), %mm1
-/	movq	112(%edx), %mm5
-	movups	112(%edx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 64(%ecx)
-/	movq	%mm4, 72(%ecx)
-	movups	%xmm0, 64(%ecx)
-
-/	pfsubr	%mm1, %mm3
-/	pfsubr	%mm5, %mm7
-// optimized (xmm1<->xmm2)
-	subps	%xmm2, %xmm1
-
-/	pfmul	64(%ebx), %mm3
-/	pfmul	72(%ebx), %mm7
-	movups	64(%ebx), %xmm7
-	mulps	%xmm7, %xmm1
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$27, %xmm1, %xmm1
-
-/	movq	%mm3, 120(%ecx)
-/	movq	%mm7, 112(%ecx)
-	movups	%xmm1, 112(%ecx)
-
-
-/	movq	80(%edx), %mm0
-/	movq	88(%edx), %mm4
-	movups	80(%edx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	104(%edx), %mm1
-/	movq	96(%edx), %mm5
-	movups	96(%edx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 80(%ecx)
-/	movq	%mm4, 88(%ecx)
-	movups	%xmm0, 80(%ecx)
-
-/	pfsubr	%mm1, %mm3
-/	pfsubr	%mm5, %mm7
-// optimized (xmm1<->xmm2)
-	subps	%xmm2, %xmm1
-
-/	pfmul	80(%ebx), %mm3
-/	pfmul	88(%ebx), %mm7
-	movups	80(%ebx), %xmm7
-	mulps	%xmm7, %xmm1
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$27, %xmm1, %xmm1
-
-/	movq	%mm3, 104(%ecx)
-/	movq	%mm7, 96(%ecx)
-	movups	%xmm1, 96(%ecx)
-
-
-// phase 3 fpu
-/* Phase 3*/
-/*
-	flds   64(%edx)
-	fadds 124(%edx)
-	fstps  64(%ecx)
-
-	flds   68(%edx)
-	fadds 120(%edx)
-	fstps  68(%ecx)
-
-	flds  124(%edx)
-	fsubs  64(%edx)
-	fmuls  64(%ebx)
-	fstps 124(%ecx)
-
-	flds  120(%edx)
-	fsubs  68(%edx)
-	fmuls  68(%ebx)
-	fstps 120(%ecx)
-
-	flds   72(%edx)
-	fadds 116(%edx)
-	fstps  72(%ecx)
-
-	flds   76(%edx)
-	fadds 112(%edx)
-	fstps  76(%ecx)
-
-	flds  116(%edx)
-	fsubs  72(%edx)
-	fmuls  72(%ebx)
-	fstps 116(%ecx)
-
-	flds  112(%edx)
-	fsubs  76(%edx)
-	fmuls  76(%ebx)
-	fstps 112(%ecx)
-
-	flds   80(%edx)
-	fadds 108(%edx)
-	fstps  80(%ecx)
-
-	flds   84(%edx)
-	fadds 104(%edx)
-	fstps  84(%ecx)
-
-	flds  108(%edx)
-	fsubs  80(%edx)
-	fmuls  80(%ebx)
-	fstps 108(%ecx)
-
-	flds  104(%edx)
-	fsubs  84(%edx)
-	fmuls  84(%ebx)
-	fstps 104(%ecx)
-
-	flds   88(%edx)
-	fadds 100(%edx)
-	fstps  88(%ecx)
-
-	flds   92(%edx)
-	fadds  96(%edx)
-	fstps  92(%ecx)
-
-	flds  100(%edx)
-	fsubs  88(%edx)
-	fmuls  88(%ebx)
-	fstps 100(%ecx)
-
-	flds   96(%edx)
-	fsubs  92(%edx)
-	fmuls  92(%ebx)
-	fstps  96(%ecx)
-*/
-// end phase 3 fpu
-
-	
-/* Phase 4 (completed, buggy) */
-/*
-/	movq	96(%ebx), %mm2
-/	movq	104(%ebx), %mm6
-	movups	96(%ebx), %xmm4
-
-
-/	movq	(%ecx), %mm0
-/	movq	8(%ecx), %mm4
-	movups	(%ecx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	24(%ecx), %mm1
-/	movq	16(%ecx), %mm5
-	movups	16(%ecx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, (%edx)
-/	movq	%mm4, 8(%edx)
-	movups	%xmm0, (%edx)
-
-/	pfsub	%mm1, %mm3
-/	pfsub	%mm5, %mm7
-	subps	%xmm1, %xmm2
-
-/	pfmul	%mm2, %mm3
-/	pfmul	%mm6, %mm7
-	mulps	%xmm4, %xmm2
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$27, %xmm2, %xmm2
-
-/	movq	%mm3, 24(%edx)
-/	movq	%mm7, 16(%edx)
-	movups	%xmm2, 16(%edx)
-
-/	movq	32(%ecx), %mm0
-/	movq	40(%ecx), %mm4
-	movups	32(%ecx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	56(%ecx), %mm1
-/	movq	48(%ecx), %mm5
-	movups	48(%ecx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 32(%edx)
-/	movq	%mm4, 40(%edx)
-	movups	%xmm0, 32(%edx)
-
-/	pfsubr	%mm1, %mm3
-/	pfsubr	%mm5, %mm7
-// Luckily we can swap this (xmm1<->xmm2)
-	subps	%xmm2, %xmm1
-
-/	pfmul	%mm2, %mm3
-/	pfmul	%mm6, %mm7
-	mulps	%xmm4, %xmm1
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$27, %xmm1, %xmm1
-
-/	movq	%mm3, 56(%edx)
-/	movq	%mm7, 48(%edx)
-	movups	%xmm1, 48(%edx)
-
-
-/	movq	64(%ecx), %mm0
-/	movq	72(%ecx), %mm4
-	movups	64(%ecx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	88(%ecx), %mm1
-/	movq	80(%ecx), %mm5
-	movups	80(%ecx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 64(%edx)
-/	movq	%mm4, 72(%edx)
-	movups	%xmm0, 64(%edx)
-
-/	pfsub	%mm1, %mm3
-/	pfsub	%mm5, %mm7
-	subps	%xmm1, %xmm2
-
-/	pfmul	%mm2, %mm3
-/	pfmul	%mm6, %mm7
-	mulps	%xmm4, %xmm2
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$27, %xmm2, %xmm2
-
-/	movq	%mm3, 88(%edx)
-/	movq	%mm7, 80(%edx)
-	movups	%xmm2, 80(%edx)
-
-
-/	movq	96(%ecx), %mm0
-/	movq	104(%ecx), %mm4
-	movups	96(%ecx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	120(%ecx), %mm1
-/	movq	112(%ecx), %mm5
-	movups	112(%ecx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-////	shufps	$177, %xmm1, %xmm1
-	shufps	$27, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 96(%edx)
-/	movq	%mm4, 104(%edx)
-	movups	%xmm0, 96(%edx)
-
-/	pfsubr	%mm1, %mm3
-/	pfsubr	%mm5, %mm7
-// This is already optimized, so xmm2 must be swapped with xmm1 for rest of phase
-	subps	%xmm2, %xmm1
-
-/	pfmul	%mm2, %mm3
-/	pfmul	%mm6, %mm7
-	mulps	%xmm4, %xmm1
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$27, %xmm1, %xmm1
-
-/	movq	%mm3, 120(%edx)
-/	movq	%mm7, 112(%edx)
-	movups	%xmm1, 112(%edx)
-*/
-
-// phase 4 fpu code
-/* Phase 4*/
-
-	flds     (%ecx)
-	fadds  28(%ecx)
-	fstps    (%edx)
-
-	flds     (%ecx)
-	fsubs  28(%ecx)
-	fmuls  96(%ebx)
-	fstps  28(%edx)
-
-	flds    4(%ecx)
-	fadds  24(%ecx)
-	fstps   4(%edx)
-
-	flds    4(%ecx)
-	fsubs  24(%ecx)
-	fmuls 100(%ebx)
-	fstps  24(%edx)
-
-	flds    8(%ecx)
-	fadds  20(%ecx)
-	fstps   8(%edx)
-
-	flds    8(%ecx)
-	fsubs  20(%ecx)
-	fmuls 104(%ebx)
-	fstps  20(%edx)
-
-	flds   12(%ecx)
-	fadds  16(%ecx)
-	fstps  12(%edx)
-
-	flds   12(%ecx)
-	fsubs  16(%ecx)
-	fmuls 108(%ebx)
-	fstps  16(%edx)
-
-	flds   32(%ecx)
-	fadds  60(%ecx)
-	fstps  32(%edx)
-
-	flds   60(%ecx)
-	fsubs  32(%ecx)
-	fmuls  96(%ebx)
-	fstps  60(%edx)
-
-	flds   36(%ecx)
-	fadds  56(%ecx)
-	fstps  36(%edx)
-
-	flds   56(%ecx)
-	fsubs  36(%ecx)
-	fmuls 100(%ebx)
-	fstps  56(%edx)
-
-	flds   40(%ecx)
-	fadds  52(%ecx)
-	fstps  40(%edx)
-
-	flds   52(%ecx)
-	fsubs  40(%ecx)
-	fmuls 104(%ebx)
-	fstps  52(%edx)
-
-	flds   44(%ecx)
-	fadds  48(%ecx)
-	fstps  44(%edx)
-
-	flds   48(%ecx)
-	fsubs  44(%ecx)
-	fmuls 108(%ebx)
-	fstps  48(%edx)
-
-	flds   64(%ecx)
-	fadds  92(%ecx)
-	fstps  64(%edx)
-
-	flds   64(%ecx)
-	fsubs  92(%ecx)
-	fmuls  96(%ebx)
-	fstps  92(%edx)
-
-	flds   68(%ecx)
-	fadds  88(%ecx)
-	fstps  68(%edx)
-
-	flds   68(%ecx)
-	fsubs  88(%ecx)
-	fmuls 100(%ebx)
-	fstps  88(%edx)
-
-	flds   72(%ecx)
-	fadds  84(%ecx)
-	fstps  72(%edx)
-
-	flds   72(%ecx)
-	fsubs  84(%ecx)
-	fmuls 104(%ebx)
-	fstps  84(%edx)
-
-	flds   76(%ecx)
-	fadds  80(%ecx)
-	fstps  76(%edx)
-
-	flds   76(%ecx)
-	fsubs  80(%ecx)
-	fmuls 108(%ebx)
-	fstps  80(%edx)
-
-	flds   96(%ecx)
-	fadds 124(%ecx)
-	fstps  96(%edx)
-
-	flds  124(%ecx)
-	fsubs  96(%ecx)
-	fmuls  96(%ebx)
-	fstps 124(%edx)
-
-	flds  100(%ecx)
-	fadds 120(%ecx)
-	fstps 100(%edx)
-
-	flds  120(%ecx)
-	fsubs 100(%ecx)
-	fmuls 100(%ebx)
-	fstps 120(%edx)
-
-	flds  104(%ecx)
-	fadds 116(%ecx)
-	fstps 104(%edx)
-
-	flds  116(%ecx)
-	fsubs 104(%ecx)
-	fmuls 104(%ebx)
-	fstps 116(%edx)
-
-	flds  108(%ecx)
-	fadds 112(%ecx)
-	fstps 108(%edx)
-
-	flds  112(%ecx)
-	fsubs 108(%ecx)
-	fmuls 108(%ebx)
-	fstps 112(%edx)
-
-	flds     (%edx)
-	fadds  12(%edx)
-	fstps    (%ecx)
-
-	flds     (%edx)
-	fsubs  12(%edx)
-	fmuls 112(%ebx)
-	fstps  12(%ecx)
-
-	flds    4(%edx)
-	fadds   8(%edx)
-	fstps   4(%ecx)
-
-	flds    4(%edx)
-	fsubs   8(%edx)
-	fmuls 116(%ebx)
-	fstps   8(%ecx)
-
-	flds   16(%edx)
-	fadds  28(%edx)
-	fstps  16(%ecx)
-
-	flds   28(%edx)
-	fsubs  16(%edx)
-	fmuls 112(%ebx)
-	fstps  28(%ecx)
-
-	flds   20(%edx)
-	fadds  24(%edx)
-	fstps  20(%ecx)
-
-	flds   24(%edx)
-	fsubs  20(%edx)
-	fmuls 116(%ebx)
-	fstps  24(%ecx)
-
-	flds   32(%edx)
-	fadds  44(%edx)
-	fstps  32(%ecx)
-
-	flds   32(%edx)
-	fsubs  44(%edx)
-	fmuls 112(%ebx)
-	fstps  44(%ecx)
-
-	flds   36(%edx)
-	fadds  40(%edx)
-	fstps  36(%ecx)
-
-	flds   36(%edx)
-	fsubs  40(%edx)
-	fmuls 116(%ebx)
-	fstps  40(%ecx)
-
-	flds   48(%edx)
-	fadds  60(%edx)
-	fstps  48(%ecx)
-
-	flds   60(%edx)
-	fsubs  48(%edx)
-	fmuls 112(%ebx)
-	fstps  60(%ecx)
-
-	flds   52(%edx)
-	fadds  56(%edx)
-	fstps  52(%ecx)
-
-	flds   56(%edx)
-	fsubs  52(%edx)
-	fmuls 116(%ebx)
-	fstps  56(%ecx)
-
-	flds   64(%edx)
-	fadds  76(%edx)
-	fstps  64(%ecx)
-
-	flds   64(%edx)
-	fsubs  76(%edx)
-	fmuls 112(%ebx)
-	fstps  76(%ecx)
-
-	flds   68(%edx)
-	fadds  72(%edx)
-	fstps  68(%ecx)
-
-	flds   68(%edx)
-	fsubs  72(%edx)
-	fmuls 116(%ebx)
-	fstps  72(%ecx)
-
-	flds   80(%edx)
-	fadds  92(%edx)
-	fstps  80(%ecx)
-
-	flds   92(%edx)
-	fsubs  80(%edx)
-	fmuls 112(%ebx)
-	fstps  92(%ecx)
-
-	flds   84(%edx)
-	fadds  88(%edx)
-	fstps  84(%ecx)
-
-	flds   88(%edx)
-	fsubs  84(%edx)
-	fmuls 116(%ebx)
-	fstps  88(%ecx)
-
-	flds   96(%edx)
-	fadds 108(%edx)
-	fstps  96(%ecx)
-
-	flds   96(%edx)
-	fsubs 108(%edx)
-	fmuls 112(%ebx)
-	fstps 108(%ecx)
-
-	flds  100(%edx)
-	fadds 104(%edx)
-	fstps 100(%ecx)
-
-	flds  100(%edx)
-	fsubs 104(%edx)
-	fmuls 116(%ebx)
-	fstps 104(%ecx)
-
-	flds  112(%edx)
-	fadds 124(%edx)
-	fstps 112(%ecx)
-
-	flds  124(%edx)
-	fsubs 112(%edx)
-	fmuls 112(%ebx)
-	fstps 124(%ecx)
-
-	flds  116(%edx)
-	fadds 120(%edx)
-	fstps 116(%ecx)
-
-	flds  120(%edx)
-	fsubs 116(%edx)
-	fmuls 116(%ebx)
-	fstps 120(%ecx)
-	
-// end of phase 4 fpu
-
-// below stuff needs to be finished I use FPU code for first
-/* Phase 5 (completed, crashing) */
-/*
-/	movq	112(%ebx), %mm2
-	// move 8 byte data to (low)high quadword - check this! atmos
-	movlps	112(%ebx), %xmm4
-	// maybe I need movhlps too to get data into correct quadword
-	movlhps	%xmm4, %xmm4
-
-/	movq	(%edx), %mm0
-/	movq	16(%edx), %mm4
-	movups	(%edx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-// hmm? this is strange
-/	movq	8(%edx), %mm1
-/	movq	24(%edx), %mm5
-	movlps	8(%edx), %xmm1
-	movhps	24(%edx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-	pshufd	$177, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, (%ecx)
-/	movq	%mm4, 16(%ecx)
-	movlps	%xmm0, (%ecx)
-	movhps	%xmm0, 16(%ecx)
-
-/	pfsub	%mm1, %mm3
-/	pfsubr	%mm5, %mm7
-// I need to emulate pfsubr here
-	movaps	%xmm1, %xmm3
-	subps	%xmm2, %xmm3
-	subps	%xmm1, %xmm2
-// now move correct quadword from reverse substration in xmm3 to correct
-// quadword in xmm2 and leave other quadword with non-reversed substration untouched 
-///	shufpd	$2, %xmm3, %xmm2
-// (or $1?) (see ia32-ref p.749)
-// optimize
-	movq	%xmm2, %xmm3
-	movaps	%xmm3, %xmm2
-
-/	pfmul	%mm2, %mm3
-/	pfmul	%mm2, %mm7
-	mulps	%xmm4, %xmm2
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$177, %xmm2, %xmm2
-
-/	movq	%mm3, 8(%ecx)
-/	movq	%mm7, 24(%ecx)
-	movlps	%xmm2, 8(%ecx)
-	movhps	%xmm2, 24(%ecx)
-
-/	movq	32(%edx), %mm0
-/	movq	48(%edx), %mm4
-	movlps	32(%edx), %xmm0
-	movhps	48(%edx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	40(%edx), %mm1
-/	movq	56(%edx), %mm5
-	movlps	40(%edx), %xmm1
-	movhps	56(%edx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-	shufps	$177, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 32(%ecx)
-/	movq	%mm4, 48(%ecx)
-	movlps	%xmm0, 32(%ecx)
-	movhps	%xmm0, 48(%ecx)
-
-/	pfsub	%mm1, %mm3
-/	pfsubr	%mm5, %mm7
-	movaps	%xmm1, %xmm3
-	subps	%xmm2, %xmm3
-	subps	%xmm1, %xmm2
-///	shufpd	$2, %xmm3, %xmm2
-// (or $1?)
-// optimize
-	movq	%xmm2, %xmm3
-	movaps	%xmm3, %xmm2
-
-/	pfmul	%mm2, %mm3
-/	pfmul	%mm2, %mm7
-	mulps	%xmm4, %xmm2
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$177, %xmm2, %xmm2
-
-/	movq	%mm3, 40(%ecx)
-/	movq	%mm7, 56(%ecx)
-	movlps	%xmm2, 40(%ecx)
-	movhps	%xmm2, 56(%ecx)
-
-
-/	movq	64(%edx), %mm0
-/	movq	80(%edx), %mm4
-	movlps	64(%edx), %xmm0
-	movhps	80(%edx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	72(%edx), %mm1
-/	movq	88(%edx), %mm5
-	movlps	72(%edx), %xmm1
-	movhps	88(%edx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-	shufps	$177, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 64(%ecx)
-/	movq	%mm4, 80(%ecx)
-	movlps	%xmm0, 64(%ecx)
-	movhps	%xmm0, 80(%ecx)
-
-/	pfsub	%mm1, %mm3
-/	pfsubr	%mm5, %mm7
-	movaps	%xmm1, %xmm3
-	subps	%xmm2, %xmm3
-	subps	%xmm1, %xmm2
-///	shufpd	$2, %xmm3, %xmm2
-// (or $1?)
-// optimize
-	movq	%xmm2, %xmm3
-	movaps	%xmm3, %xmm2
-
-/	pfmul	%mm2, %mm3
-/	pfmul	%mm2, %mm7
-	mulps	%xmm4, %xmm2
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$177, %xmm2, %xmm2
-
-/	movq	%mm3, 72(%ecx)
-/	movq	%mm7, 88(%ecx)
-	movlps	%xmm2, 72(%ecx)
-	movhps	%xmm2, 88(%ecx)
-
-/	movq	96(%edx), %mm0
-/	movq	112(%edx), %mm4
-	movups	96(%edx), %xmm0
-
-/	movq	%mm0, %mm3
-/	movq	%mm4, %mm7
-	movaps	%xmm0, %xmm2
-
-/	movq	104(%edx), %mm1
-/	movq	120(%edx), %mm5
-	movlps	104(%edx), %xmm1
-	movhps	120(%edx), %xmm1
-
-/	pswapd	%mm1, %mm1
-/	pswapd	%mm5, %mm5
-	shufps	$177, %xmm1, %xmm1
-
-/	pfadd	%mm1, %mm0
-/	pfadd	%mm5, %mm4
-	addps	%xmm1, %xmm0
-
-/	movq	%mm0, 96(%ecx)
-/	movq	%mm4, 112(%ecx)
-	movups	%xmm0, 96(%ecx)
-
-/	pfsub	%mm1, %mm3
-/	pfsubr	%mm5, %mm7
-	movaps	%xmm1, %xmm3
-	subps	%xmm2, %xmm3
-	subps	%xmm1, %xmm2
-///	shufpd	$2, %xmm3, %xmm2
-// (or $1?)
-// optimize
-	movq	%xmm2, %xmm3
-	movaps	%xmm3, %xmm2
-
-/	pfmul	%mm2, %mm3
-/	pfmul	%mm2, %mm7
-	mulps	%xmm4, %xmm2
-
-/	pswapd	%mm3, %mm3
-/	pswapd	%mm7, %mm7
-	shufps	$177, %xmm2, %xmm2
-
-/	movq	%mm3, 104(%ecx)
-/	movq	%mm7, 120(%ecx)
-	movlps	%xmm2, 104(%ecx)
-	movhps	%xmm2, 120(%ecx)
-*/
-	
-	
-/* Phase 6. This is the end of easy road. */
-/* Code below is coded in scalar mode. Should be optimized */
-//
-//	movd	plus_1f, %mm6
-//	punpckldq 120(%ebx), %mm6      /* mm6 = 1.0 | 120(%ebx)*/
-//	movq	x_plus_minus_3dnow, %mm7 /* mm7 = +1 | -1 */
-/*
-	movq	32(%ecx), %mm0
-	movq	64(%ecx), %mm2
-	movq	%mm0, %mm1
-	movq	%mm2, %mm3
-	pxor	%mm7, %mm1
-	pxor	%mm7, %mm3
-	pfacc	%mm1, %mm0
-	pfacc	%mm3, %mm2
-	pfmul	%mm6, %mm0
-	pfmul	%mm6, %mm2
-	movq	%mm0, 32(%edx)
-	movq	%mm2, 64(%edx)
-
-	movd	44(%ecx), %mm0
-	movd	40(%ecx), %mm2
-	movd	120(%ebx), %mm3
-	punpckldq 76(%ecx), %mm0
-	punpckldq 72(%ecx), %mm2
-	punpckldq %mm3, %mm3
-	movq	%mm0, %mm4
-	movq	%mm2, %mm5
-	pfsub	%mm2, %mm0
-	pfmul	%mm3, %mm0
-	movq	%mm0, %mm1
-	pfadd	%mm5, %mm0
-	pfadd	%mm4, %mm0
-	movq	%mm0, %mm2
-	punpckldq %mm1, %mm0
-	punpckhdq %mm1, %mm2
-	movq	%mm0, 40(%edx)
-	movq	%mm2, 72(%edx)
-
-	movd   48(%ecx), %mm3
-	movd   60(%ecx), %mm2
-	pfsub  52(%ecx), %mm3
-	pfsub  56(%ecx), %mm2
-	pfmul 120(%ebx), %mm3
-	pfmul 120(%ebx), %mm2
-	movq	%mm2, %mm1
-
-	pfadd  56(%ecx), %mm1
-	pfadd  60(%ecx), %mm1
-	movq	%mm1, %mm0
-
-	pfadd  48(%ecx), %mm0
-	pfadd  52(%ecx), %mm0
-	pfadd	%mm3, %mm1
-	punpckldq %mm2, %mm1
-	pfadd	%mm3, %mm2
-	punpckldq %mm2, %mm0
-	movq	%mm1, 56(%edx)
-	movq	%mm0, 48(%edx)
-*/
-/*---*/
-/*
-	movd   92(%ecx), %mm1
-	pfsub  88(%ecx), %mm1
-	pfmul 120(%ebx), %mm1
-	movd   %mm1, 92(%edx)
-	pfadd  92(%ecx), %mm1
-	pfadd  88(%ecx), %mm1
-	movq   %mm1, %mm0
-	
-	pfadd  80(%ecx), %mm0
-	pfadd  84(%ecx), %mm0
-	movd   %mm0, 80(%edx)
-
-	movd   80(%ecx), %mm0
-	pfsub  84(%ecx), %mm0
-	pfmul 120(%ebx), %mm0
-	pfadd  %mm0, %mm1
-	pfadd  92(%edx), %mm0
-	punpckldq %mm1, %mm0
-	movq   %mm0, 84(%edx)
-
-	movq	96(%ecx), %mm0
-	movq	%mm0, %mm1
-	pxor	%mm7, %mm1
-	pfacc	%mm1, %mm0
-	pfmul	%mm6, %mm0
-	movq	%mm0, 96(%edx)
-
-	movd  108(%ecx), %mm0
-	pfsub 104(%ecx), %mm0
-	pfmul 120(%ebx), %mm0
-	movd  %mm0, 108(%edx)
-	pfadd 104(%ecx), %mm0
-	pfadd 108(%ecx), %mm0
-	movd  %mm0, 104(%edx)
-
-	movd  124(%ecx), %mm1
-	pfsub 120(%ecx), %mm1
-	pfmul 120(%ebx), %mm1
-	movd  %mm1, 124(%edx)
-	pfadd 120(%ecx), %mm1
-	pfadd 124(%ecx), %mm1
-	movq  %mm1, %mm0
-
-	pfadd 112(%ecx), %mm0
-	pfadd 116(%ecx), %mm0
-	movd  %mm0, 112(%edx)
-
-	movd  112(%ecx), %mm0
-	pfsub 116(%ecx), %mm0
-	pfmul 120(%ebx), %mm0
-	pfadd %mm0,%mm1
-	pfadd 124(%edx), %mm0
-	punpckldq %mm1, %mm0
-	movq  %mm0, 116(%edx)
-
-	jnz .L01
-*/
-
-	
-/* Phase 7*/
-/* Code below is coded in scalar mode. Should be optimized */
-/*
-	movd      (%ecx), %mm0
-	pfadd    4(%ecx), %mm0
-	movd     %mm0, 1024(%esi)
-
-	movd      (%ecx), %mm0
-	pfsub    4(%ecx), %mm0
-	pfmul  120(%ebx), %mm0
-	movd      %mm0, (%esi)
-	movd      %mm0, (%edi)
-
-	movd   12(%ecx), %mm0
-	pfsub   8(%ecx), %mm0
-	pfmul 120(%ebx), %mm0
-	movd    %mm0, 512(%edi)
-	pfadd   12(%ecx), %mm0
-	pfadd   8(%ecx), %mm0
-	movd    %mm0, 512(%esi)
-
-	movd   16(%ecx), %mm0
-	pfsub  20(%ecx), %mm0
-	pfmul 120(%ebx), %mm0
-	movq	%mm0, %mm3
-
-	movd   28(%ecx), %mm0
-	pfsub  24(%ecx), %mm0
-	pfmul 120(%ebx), %mm0
-	movd    %mm0, 768(%edi)
-	movq	%mm0, %mm2
-	
-	pfadd  24(%ecx), %mm0
-	pfadd  28(%ecx), %mm0
-	movq	%mm0, %mm1
-
-	pfadd  16(%ecx), %mm0
-	pfadd  20(%ecx), %mm0
-	movd   %mm0, 768(%esi)
-	pfadd  %mm3, %mm1
-	movd   %mm1, 256(%esi)
-	pfadd  %mm3, %mm2
-	movd   %mm2, 256(%edi)
-*/
-
-	
-/* Phase 8*/
-/*
-	movq   32(%edx), %mm0
-	movq   48(%edx), %mm1
-	pfadd  48(%edx), %mm0
-	pfadd  40(%edx), %mm1
-	movd   %mm0, 896(%esi)
-	movd   %mm1, 640(%esi)
-	psrlq  $32, %mm0
-	psrlq  $32, %mm1
-	movd   %mm0, 128(%edi)
-	movd   %mm1, 384(%edi)
-
-	movd   40(%edx), %mm0
-	pfadd  56(%edx), %mm0
-	movd   %mm0, 384(%esi)
-
-	movd   56(%edx), %mm0
-	pfadd  36(%edx), %mm0
-	movd   %mm0, 128(%esi)
-
-	movd   60(%edx), %mm0
-	movd   %mm0, 896(%edi)
-	pfadd  44(%edx), %mm0
-	movd   %mm0, 640(%edi)
-
-	movq   96(%edx), %mm0
-	movq   112(%edx), %mm2
-	movq   104(%edx), %mm4
-	pfadd  112(%edx), %mm0
-	pfadd  104(%edx), %mm2
-	pfadd  120(%edx), %mm4
-	movq   %mm0, %mm1
-	movq   %mm2, %mm3
-	movq   %mm4, %mm5
-	pfadd  64(%edx), %mm0
-	pfadd  80(%edx), %mm2
-	pfadd  72(%edx), %mm4
-	movd   %mm0, 960(%esi)
-	movd   %mm2, 704(%esi)
-	movd   %mm4, 448(%esi)
-	psrlq  $32, %mm0
-	psrlq  $32, %mm2
-	psrlq  $32, %mm4
-	movd   %mm0, 64(%edi)
-	movd   %mm2, 320(%edi)
-	movd   %mm4, 576(%edi)
-	pfadd  80(%edx), %mm1
-	pfadd  72(%edx), %mm3
-	pfadd  88(%edx), %mm5
-	movd   %mm1, 832(%esi)
-	movd   %mm3, 576(%esi)
-	movd   %mm5, 320(%esi)
-	psrlq  $32, %mm1
-	psrlq  $32, %mm3
-	psrlq  $32, %mm5
-	movd   %mm1, 192(%edi)
-	movd   %mm3, 448(%edi)
-	movd   %mm5, 704(%edi)
-
-	movd   120(%edx), %mm0
-	pfadd  100(%edx), %mm0
-	movq   %mm0, %mm1
-	pfadd  88(%edx), %mm0
-	movd   %mm0, 192(%esi)
-	pfadd  68(%edx), %mm1
-	movd   %mm1, 64(%esi)
-
-	movd  124(%edx), %mm0
-	movd  %mm0, 960(%edi)
-	pfadd  92(%edx), %mm0
-	movd  %mm0, 832(%edi)
-
-	jmp	.L_bye
-.L01:
-*/
-
-	
-/* Phase 9*/
-/*
-	movq	(%ecx), %mm0
-	movq	%mm0, %mm1
-	pxor    %mm7, %mm1
-	pfacc	%mm1, %mm0
-	pfmul	%mm6, %mm0
-	pf2id	%mm0, %mm0
-	movd	%mm0, %eax
-	movw    %ax, 512(%esi)
-	psrlq	$32, %mm0
-	movd	%mm0, %eax
-	movw    %ax, (%esi)
-
-	movd    12(%ecx), %mm0
-	pfsub    8(%ecx), %mm0
-	pfmul  120(%ebx), %mm0
-	pf2id    %mm0, %mm7
-	movd	 %mm7, %eax
-	movw     %ax, 256(%edi)
-	pfadd   12(%ecx), %mm0
-	pfadd    8(%ecx), %mm0
-	pf2id    %mm0, %mm0
-	movd	 %mm0, %eax
-	movw     %ax, 256(%esi)
-
-	movd   16(%ecx), %mm3
-	pfsub  20(%ecx), %mm3
-	pfmul  120(%ebx), %mm3
-	movq   %mm3, %mm2
-
-	movd   28(%ecx), %mm2
-	pfsub  24(%ecx), %mm2
-	pfmul 120(%ebx), %mm2
-	movq   %mm2, %mm1
-
-	pf2id  %mm2, %mm7
-	movd   %mm7, %eax
-	movw   %ax, 384(%edi)
-	
-	pfadd  24(%ecx), %mm1
-	pfadd  28(%ecx), %mm1
-	movq   %mm1, %mm0
-	
-	pfadd  16(%ecx), %mm0
-	pfadd  20(%ecx), %mm0
-	pf2id  %mm0, %mm0
-	movd   %mm0, %eax
-	movw   %ax, 384(%esi)
-	pfadd  %mm3, %mm1
-	pf2id  %mm1, %mm1
-	movd   %mm1, %eax
-	movw   %ax, 128(%esi)
-	pfadd  %mm3, %mm2
-	pf2id  %mm2, %mm2
-	movd   %mm2, %eax
-	movw   %ax, 128(%edi)
-*/
-
-	
-/* Phase 10*/
-/*
-	movq    32(%edx), %mm0
-	movq    48(%edx), %mm1
-	pfadd   48(%edx), %mm0
-	pfadd   40(%edx), %mm1
-	pf2id   %mm0, %mm0
-	pf2id   %mm1, %mm1
-	movd	%mm0, %eax
-	movd	%mm1, %ecx
-	movw    %ax, 448(%esi)
-	movw    %cx, 320(%esi)
-	psrlq   $32, %mm0
-	psrlq   $32, %mm1
-	movd	%mm0, %eax
-	movd	%mm1, %ecx
-	movw    %ax, 64(%edi)
-	movw    %cx, 192(%edi)
-
-	movd   40(%edx), %mm3
-	movd   56(%edx), %mm4
-	movd   60(%edx), %mm0
-	movd   44(%edx), %mm2
-	movd  120(%edx), %mm5
-	punpckldq %mm4, %mm3
-	punpckldq 124(%edx), %mm0
-	pfadd 100(%edx), %mm5
-	punpckldq 36(%edx), %mm4
-	punpckldq 92(%edx), %mm2	
-	movq  %mm5, %mm6
-	pfadd  %mm4, %mm3
-	pf2id  %mm0, %mm1
-	pf2id  %mm3, %mm3
-	pfadd  88(%edx), %mm5
-	movd   %mm1, %eax
-	movd   %mm3, %ecx
-	movw   %ax, 448(%edi)
-	movw   %cx, 192(%esi)
-	pf2id  %mm5, %mm5
-	psrlq  $32, %mm1
-        psrlq  $32, %mm3
-	movd   %mm5, %ebx
-	movd   %mm1, %eax
-	movd   %mm3, %ecx
-	movw   %bx, 96(%esi)
-	movw   %ax, 480(%edi)
-	movw   %cx, 64(%esi)
-	pfadd  %mm2, %mm0
-	pf2id  %mm0, %mm0
-	movd   %mm0, %eax
-	pfadd  68(%edx), %mm6
-	movw   %ax, 320(%edi)
-	psrlq  $32, %mm0
-	pf2id  %mm6, %mm6
-	movd   %mm0, %eax
-	movd   %mm6, %ebx
-	movw   %ax, 416(%edi)
-	movw   %bx, 32(%esi)
-
-	movq   96(%edx), %mm0
-	movq  112(%edx), %mm2
-	movq  104(%edx), %mm4
-	pfadd %mm2, %mm0
-	pfadd %mm4, %mm2
-	pfadd 120(%edx), %mm4
-	movq  %mm0, %mm1
-	movq  %mm2, %mm3
-	movq  %mm4, %mm5
-	pfadd  64(%edx), %mm0
-	pfadd  80(%edx), %mm2
-	pfadd  72(%edx), %mm4
-	pf2id  %mm0, %mm0
-	pf2id  %mm2, %mm2
-	pf2id  %mm4, %mm4
-	movd   %mm0, %eax
-	movd   %mm2, %ecx
-	movd   %mm4, %ebx
-	movw   %ax, 480(%esi)
-	movw   %cx, 352(%esi)
-	movw   %bx, 224(%esi)
-	psrlq  $32, %mm0
-	psrlq  $32, %mm2
-	psrlq  $32, %mm4
-	movd   %mm0, %eax
-	movd   %mm2, %ecx
-	movd   %mm4, %ebx
-	movw   %ax, 32(%edi)
-	movw   %cx, 160(%edi)
-	movw   %bx, 288(%edi)
-	pfadd  80(%edx), %mm1
-	pfadd  72(%edx), %mm3
-	pfadd  88(%edx), %mm5
-	pf2id  %mm1, %mm1
-	pf2id  %mm3, %mm3
-	pf2id  %mm5, %mm5
-	movd   %mm1, %eax
-	movd   %mm3, %ecx
-	movd   %mm5, %ebx
-	movw   %ax, 416(%esi)
-	movw   %cx, 288(%esi)
-	movw   %bx, 160(%esi)
-	psrlq  $32, %mm1
-	psrlq  $32, %mm3
-	psrlq  $32, %mm5
-	movd   %mm1, %eax
-	movd   %mm3, %ecx
-	movd   %mm5, %ebx
-	movw   %ax, 96(%edi)
-	movw   %cx, 224(%edi)
-	movw   %bx, 352(%edi)
-
-	movsw
-
-.L_bye:
-	addl $256,%esp
-/	femms
-	emms
-	popl %edi
-	popl %esi
-	popl %ebx
-	ret  $12
-*/
-
-// here comes old fashioned FPU code for the tough parts
-
-/* Phase 5*/
-
-	flds   32(%ecx)
-	fadds  36(%ecx)
-	fstps  32(%edx)
-
-	flds   32(%ecx)
-	fsubs  36(%ecx)
-	fmuls 120(%ebx)
-	fstps  36(%edx)
-
-	flds   44(%ecx)
-	fsubs  40(%ecx)
-	fmuls 120(%ebx)
-	fsts   44(%edx)
-	fadds  40(%ecx)
-	fadds  44(%ecx)
-	fstps  40(%edx)
-
-	flds   48(%ecx)
-	fsubs  52(%ecx)
-	fmuls 120(%ebx)
-
-	flds   60(%ecx)
-	fsubs  56(%ecx)
-	fmuls 120(%ebx)
-	fld      %st(0)
-	fadds  56(%ecx)
-	fadds  60(%ecx)
-	fld      %st(0)
-	fadds  48(%ecx)
-	fadds  52(%ecx)
-	fstps  48(%edx)
-	fadd     %st(2)
-	fstps  56(%edx)
-	fsts   60(%edx)
-	faddp    %st(1)
-	fstps  52(%edx)
-
-	flds   64(%ecx)
-	fadds  68(%ecx)
-	fstps  64(%edx)
-
-	flds   64(%ecx)
-	fsubs  68(%ecx)
-	fmuls 120(%ebx)
-	fstps  68(%edx)
-
-	flds   76(%ecx)
-	fsubs  72(%ecx)
-	fmuls 120(%ebx)
-	fsts   76(%edx)
-	fadds  72(%ecx)
-	fadds  76(%ecx)
-	fstps  72(%edx)
-
-	flds   92(%ecx)
-	fsubs  88(%ecx)
-	fmuls 120(%ebx)
-	fsts   92(%edx)
-	fadds  92(%ecx)
-	fadds  88(%ecx)
-	fld      %st(0)
-	fadds  80(%ecx)
-	fadds  84(%ecx)
-	fstps  80(%edx)
-
-	flds   80(%ecx)
-	fsubs  84(%ecx)
-	fmuls 120(%ebx)
-	fadd  %st(0), %st(1)
-	fadds 92(%edx)
-	fstps 84(%edx)
-	fstps 88(%edx)
-
-	flds   96(%ecx)
-	fadds 100(%ecx)
-	fstps  96(%edx)
-
-	flds   96(%ecx)
-	fsubs 100(%ecx)
-	fmuls 120(%ebx)
-	fstps 100(%edx)
-
-	flds  108(%ecx)
-	fsubs 104(%ecx)
-	fmuls 120(%ebx)
-	fsts  108(%edx)
-	fadds 104(%ecx)
-	fadds 108(%ecx)
-	fstps 104(%edx)
-
-	flds  124(%ecx)
-	fsubs 120(%ecx)
-	fmuls 120(%ebx)
-	fsts  124(%edx)
-	fadds 120(%ecx)
-	fadds 124(%ecx)
-	fld      %st(0)
-	fadds 112(%ecx)
-	fadds 116(%ecx)
-	fstps 112(%edx)
-
-	flds  112(%ecx)
-	fsubs 116(%ecx)
-	fmuls 120(%ebx)
-	fadd  %st(0),%st(1)
-	fadds 124(%edx)
-	fstps 116(%edx)
-	fstps 120(%edx)
-	jnz .L01
-
-	
-/* Phase 6*/
-
-	flds      (%ecx)
-	fadds    4(%ecx)
-	fstps 1024(%esi)
-
-	flds      (%ecx)
-	fsubs    4(%ecx)
-	fmuls  120(%ebx)
-	fsts      (%esi)
-	fstps     (%edi)
-
-	flds   12(%ecx)
-	fsubs   8(%ecx)
-	fmuls 120(%ebx)
-	fsts  512(%edi)
-	fadds  12(%ecx)
-	fadds   8(%ecx)
-	fstps 512(%esi)
-
-	flds   16(%ecx)
-	fsubs  20(%ecx)
-	fmuls 120(%ebx)
-
-	flds   28(%ecx)
-	fsubs  24(%ecx)
-	fmuls 120(%ebx)
-	fsts  768(%edi)
-	fld      %st(0)
-	fadds  24(%ecx)
-	fadds  28(%ecx)
-	fld      %st(0)
-	fadds  16(%ecx)
-	fadds  20(%ecx)
-	fstps 768(%esi)
-	fadd     %st(2)
-	fstps 256(%esi)
-	faddp    %st(1)
-	fstps 256(%edi)
-	
-/* Phase 7*/
-
-	flds   32(%edx)
-	fadds  48(%edx)
-	fstps 896(%esi)
-
-	flds   48(%edx)
-	fadds  40(%edx)
-	fstps 640(%esi)
-
-	flds   40(%edx)
-	fadds  56(%edx)
-	fstps 384(%esi)
-
-	flds   56(%edx)
-	fadds  36(%edx)
-	fstps 128(%esi)
-
-	flds   36(%edx)
-	fadds  52(%edx)
-	fstps 128(%edi)
-
-	flds   52(%edx)
-	fadds  44(%edx)
-	fstps 384(%edi)
-
-	flds   60(%edx)
-	fsts  896(%edi)
-	fadds  44(%edx)
-	fstps 640(%edi)
-
-	flds   96(%edx)
-	fadds 112(%edx)
-	fld      %st(0)
-	fadds  64(%edx)
-	fstps 960(%esi)
-	fadds  80(%edx)
-	fstps 832(%esi)
-
-	flds  112(%edx)
-	fadds 104(%edx)
-	fld      %st(0)
-	fadds  80(%edx)
-	fstps 704(%esi)
-	fadds  72(%edx)
-	fstps 576(%esi)
-
-	flds  104(%edx)
-	fadds 120(%edx)
-	fld      %st(0)
-	fadds  72(%edx)
-	fstps 448(%esi)
-	fadds  88(%edx)
-	fstps 320(%esi)
-
-	flds  120(%edx)
-	fadds 100(%edx)
-	fld      %st(0)
-	fadds  88(%edx)
-	fstps 192(%esi)
-	fadds  68(%edx)
-	fstps  64(%esi)
-
-	flds  100(%edx)
-	fadds 116(%edx)
-	fld      %st(0)
-	fadds  68(%edx)
-	fstps  64(%edi)
-	fadds  84(%edx)
-	fstps 192(%edi)
-
-	flds  116(%edx)
-	fadds 108(%edx)
-	fld      %st(0)
-	fadds  84(%edx)
-	fstps 320(%edi)
-	fadds  76(%edx)
-	fstps 448(%edi)
-
-	flds  108(%edx)
-	fadds 124(%edx)
-	fld      %st(0)
-	fadds  76(%edx)
-	fstps 576(%edi)
-	fadds  92(%edx)
-	fstps 704(%edi)
-
-	flds  124(%edx)
-	fsts  960(%edi)
-	fadds  92(%edx)
-	fstps 832(%edi)
-	addl $256,%esp
-	popl %edi
-	popl %esi
-	popl %ebx
-	ret
-.L01:	
-/* Phase 8*/
-
-	flds      (%ecx)
-	fadds    4(%ecx)
-	fistp  512(%esi)
-
-	flds      (%ecx)
-	fsubs    4(%ecx)
-	fmuls  120(%ebx)
-
-	fistp     (%esi)
-
-
-	flds    12(%ecx)
-	fsubs    8(%ecx)
-	fmuls  120(%ebx)
-	fist   256(%edi)
-	fadds   12(%ecx)
-	fadds    8(%ecx)
-	fistp  256(%esi)
-
-	flds   16(%ecx)
-	fsubs  20(%ecx)
-	fmuls 120(%ebx)
-
-	flds   28(%ecx)
-	fsubs  24(%ecx)
-	fmuls 120(%ebx)
-	fist  384(%edi)
-	fld      %st(0)
-	fadds  24(%ecx)
-	fadds  28(%ecx)
-	fld      %st(0)
-	fadds  16(%ecx)
-	fadds  20(%ecx)
-	fistp  384(%esi)
-	fadd     %st(2)
-	fistp  128(%esi)
-	faddp    %st(1)
-	fistp  128(%edi)
-	
-/* Phase 9*/
-
-	flds    32(%edx)
-	fadds   48(%edx)
-	fistp  448(%esi)
-
-	flds   48(%edx)
-	fadds  40(%edx)
-	fistp 320(%esi)
-
-	flds   40(%edx)
-	fadds  56(%edx)
-	fistp 192(%esi)
-
-	flds   56(%edx)
-	fadds  36(%edx)
-	fistp  64(%esi)
-
-	flds   36(%edx)
-	fadds  52(%edx)
-	fistp  64(%edi)
-
-	flds   52(%edx)
-	fadds  44(%edx)
-	fistp 192(%edi)
-
-	flds   60(%edx)
-	fist   448(%edi)
-	fadds  44(%edx)
-	fistp 320(%edi)
-
-	flds   96(%edx)
-	fadds 112(%edx)
-	fld      %st(0)
-	fadds  64(%edx)
-	fistp 480(%esi)
-	fadds  80(%edx)
-	fistp 416(%esi)
-
-	flds  112(%edx)
-	fadds 104(%edx)
-	fld      %st(0)
-	fadds  80(%edx)
-	fistp 352(%esi)
-	fadds  72(%edx)
-	fistp 288(%esi)
-
-	flds  104(%edx)
-	fadds 120(%edx)
-	fld      %st(0)
-	fadds  72(%edx)
-	fistp 224(%esi)
-	fadds  88(%edx)
-	fistp 160(%esi)
-
-	flds  120(%edx)
-	fadds 100(%edx)
-	fld      %st(0)
-	fadds  88(%edx)
-	fistp  96(%esi)
-	fadds  68(%edx)
-	fistp  32(%esi)
-
-	flds  100(%edx)
-	fadds 116(%edx)
-	fld      %st(0)
-	fadds  68(%edx)
-	fistp  32(%edi)
-	fadds  84(%edx)
-	fistp  96(%edi)
-
-	flds  116(%edx)
-	fadds 108(%edx)
-	fld      %st(0)
-	fadds  84(%edx)
-	fistp 160(%edi)
-	fadds  76(%edx)
-	fistp 224(%edi)
-
-	flds  108(%edx)
-	fadds 124(%edx)
-	fld      %st(0)
-	fadds  76(%edx)
-	fistp 288(%edi)
-	fadds  92(%edx)
-	fistp 352(%edi)
-
-	flds  124(%edx)
-	fist  480(%edi)
-	fadds  92(%edx)
-	fistp 416(%edi)
-	movsw
-	addl $256,%esp
-	popl %edi
-	popl %esi
-	popl %ebx
-	ret	$12
-
-// end of FPU stuff
--- a/mp3lib/mp3lib_objfix.sh	Wed Oct 22 20:58:46 2003 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,19 +0,0 @@
-#!/bin/sh
-# This script fixes up symbol mangling in GNU as code of mp3lib.
-# (c)2001-2002 by Felix Buenemann <atmosfear at users.sourceforge.net>A
-# This file is licensed under the GPL, more info at http://www.fsf.org/
-for i in \
-	"CpuDetect" \
-	"ipentium" \
-	"a3dnow" \
-	"isse" \
-	"dct36_3dnowex" \
-	"dct36_3dnow" \
-	"x_plus_minus_3dnow" \
-	"tfcos36" \
-	"COS9"
-do
-echo "fixing: $i=_$i"
-objcopy --redefine-sym "$i=_$i" libMP3.a
-done
-
--- a/mp3lib/sr1.c	Wed Oct 22 20:58:46 2003 +0000
+++ b/mp3lib/sr1.c	Wed Oct 22 21:08:46 2003 +0000
@@ -386,7 +386,6 @@
 extern void dct64_MMX(real *, real *, real *);
 extern void dct64_MMX_3dnow(real *, real *, real *);
 extern void dct64_MMX_3dnowex(real *, real *, real *);
-extern void dct64_MMX_sse(real *, real *, real *);
 void (*dct64_MMX_func)(real *, real *, real *);
 
 #include "../cpudetect.h"
@@ -415,15 +414,6 @@
 	synth_func = synth_1to1_MMX;
     }
 
-#if 0
-    if(gCpuCaps.hasSSE)
-    {
-	/* SSE version is buggy */
-	dct64_MMX_func = dct64_MMX_sse;
-	mp_msg(MSGT_DECAUDIO,MSGL_V,"mp3lib: using SSE optimized decore!\n");
-    }
-    else
-#endif
     if (gCpuCaps.has3DNowExt)
     {
 	dct36_func=dct36_3dnowex;