changeset 4142:5e8231233418

S->C
author nick
date Sun, 13 Jan 2002 18:26:58 +0000
parents ee1687319436
children d416ca893952
files mp3lib/Makefile mp3lib/decode_MMX.c mp3lib/decode_MMX.s
diffstat 3 files changed, 245 insertions(+), 253 deletions(-) [+]
line wrap: on
line diff
--- a/mp3lib/Makefile	Sun Jan 13 17:23:32 2002 +0000
+++ b/mp3lib/Makefile	Sun Jan 13 18:26:58 2002 +0000
@@ -12,7 +12,7 @@
 SRCS += d_cpu.s decode_i586.s
 OBJS += d_cpu.o decode_i586.o
 ifeq ($(TARGET_MMX),yes)
-SRCS += decode_MMX.s dct64_MMX.s tabinit_MMX.c
+SRCS += decode_MMX.c dct64_MMX.s tabinit_MMX.c
 OBJS += decode_MMX.o dct64_MMX.o tabinit_MMX.o
 endif
 #ifeq ($(TARGET_SSE),yes)
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mp3lib/decode_MMX.c	Sun Jan 13 18:26:58 2002 +0000
@@ -0,0 +1,244 @@
+/*
+ * this code comes under GPL
+ * This code was taken from http://www.mpg123.org
+ * See ChangeLog of mpg123-0.59s-pre.1 for detail
+ * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
+ *
+ * Local ChangeLog:
+ * - Partial loops unrolling and removing MOVW insn from loops
+*/
+#define real float /* ugly - but only way */
+
+static unsigned long long __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL;
+static unsigned long long __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL;
+unsigned long __attribute__((aligned(8))) costab_mmx[] =
+{
+	1056974725,
+	1057056395,
+	1057223771,
+	1057485416,
+	1057855544,
+	1058356026,
+	1059019886,
+	1059897405,
+	1061067246,
+	1062657950,
+	1064892987,
+	1066774581,
+	1069414683,
+	1073984175,
+	1079645762,
+	1092815430,
+	1057005197,
+	1057342072,
+	1058087743,
+	1059427869,
+	1061799040,
+	1065862217,
+	1071413542,
+	1084439708,
+	1057128951,
+	1058664893,
+	1063675095,
+	1076102863,
+	1057655764,
+	1067924853,
+	1060439283,
+};
+
+void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples,
+                      short *buffs, int *bo)
+{
+
+__asm __volatile(
+        "movl %1,%%ecx\n\t"
+        "movl %2,%%edi\n\t"
+        "movl $15,%%ebx\n\t"
+        "movl %4,%%edx\n\t"
+        "leal (%%edi,%%ecx,2),%%edi\n\t"
+	"decl %%ecx\n\t"
+        "movl %3,%%esi\n\t"
+        "movl (%%edx),%%eax\n\t"
+        "jecxz .L1\n\t"
+        "decl %%eax\n\t"
+        "andl %%ebx,%%eax\n\t"
+        "leal 1088(%%esi),%%esi\n\t"
+        "movl %%eax,(%%edx)\n\t"
+".L1:\n\t"
+        "leal (%%esi,%%eax,2),%%edx\n\t"
+        "movl %%eax,%%ebp\n\t"
+        "incl %%eax\n\t"
+        "pushl %0\n\t"
+        "andl %%ebx,%%eax\n\t"
+        "leal 544(%%esi,%%eax,2),%%ecx\n\t"
+	"incl %%ebx\n\t"
+	"testl $1, %%eax\n\t"
+	"jnz .L2\n\t"
+        "xchgl %%edx,%%ecx\n\t"
+	"incl %%ebp\n\t"
+        "leal 544(%%esi),%%esi\n\t"
+".L2:\n\t"
+	"emms\n\t"
+        "pushl %%edx\n\t"
+        "pushl %%ecx\n\t"
+        "call *dct64_MMX_func\n\t"
+	"leal 1(%%ebx), %%ecx\n\t"
+        "subl %%ebp,%%ebx\n\t"
+	"pushl %%ecx\n\t"
+	"leal decwins(%%ebx,%%ebx,1), %%edx\n\t"
+	"shrl $1, %%ecx\n\t"
+".align 16\n\t"
+".L3:\n\t"
+        "movq  (%%edx),%%mm0\n\t"
+        "movq  64(%%edx),%%mm4\n\t"
+        "pmaddwd (%%esi),%%mm0\n\t"
+        "pmaddwd 32(%%esi),%%mm4\n\t"
+        "movq  8(%%edx),%%mm1\n\t"
+        "movq  72(%%edx),%%mm5\n\t"
+        "pmaddwd 8(%%esi),%%mm1\n\t"
+        "pmaddwd 40(%%esi),%%mm5\n\t"
+        "movq  16(%%edx),%%mm2\n\t"
+        "movq  80(%%edx),%%mm6\n\t"
+        "pmaddwd 16(%%esi),%%mm2\n\t"
+        "pmaddwd 48(%%esi),%%mm6\n\t"
+        "movq  24(%%edx),%%mm3\n\t"
+        "movq  88(%%edx),%%mm7\n\t"
+        "pmaddwd 24(%%esi),%%mm3\n\t"
+        "pmaddwd 56(%%esi),%%mm7\n\t"
+        "paddd %%mm1,%%mm0\n\t"
+        "paddd %%mm5,%%mm4\n\t"
+        "paddd %%mm2,%%mm0\n\t"
+        "paddd %%mm6,%%mm4\n\t"
+        "paddd %%mm3,%%mm0\n\t"
+        "paddd %%mm7,%%mm4\n\t"
+        "movq  %%mm0,%%mm1\n\t"
+        "movq  %%mm4,%%mm5\n\t"
+        "psrlq $32,%%mm1\n\t"
+        "psrlq $32,%%mm5\n\t"
+        "paddd %%mm1,%%mm0\n\t"
+        "paddd %%mm5,%%mm4\n\t"
+        "psrad $13,%%mm0\n\t"
+        "psrad $13,%%mm4\n\t"
+        "packssdw %%mm0,%%mm0\n\t"
+        "packssdw %%mm4,%%mm4\n\t"
+
+	"movq	(%%edi), %%mm1\n\t"
+	"punpckldq %%mm4, %%mm0\n\t"
+	"pand   one_null, %%mm1\n\t"
+	"pand   null_one, %%mm0\n\t"
+	"por    %%mm0, %%mm1\n\t"
+	"movq   %%mm1,(%%edi)\n\t"
+
+        "leal 64(%%esi),%%esi\n\t"
+        "leal 128(%%edx),%%edx\n\t"
+        "leal 8(%%edi),%%edi\n\t"
+
+	"decl %%ecx\n\t"
+        "jnz  .L3\n\t"
+
+	"popl %%ecx\n\t"
+	"andl $1, %%ecx\n\t"
+	"jecxz .next_loop\n\t"
+
+        "movq  (%%edx),%%mm0\n\t"
+        "pmaddwd (%%esi),%%mm0\n\t"
+        "movq  8(%%edx),%%mm1\n\t"
+        "pmaddwd 8(%%esi),%%mm1\n\t"
+        "movq  16(%%edx),%%mm2\n\t"
+        "pmaddwd 16(%%esi),%%mm2\n\t"
+        "movq  24(%%edx),%%mm3\n\t"
+        "pmaddwd 24(%%esi),%%mm3\n\t"
+        "paddd %%mm1,%%mm0\n\t"
+        "paddd %%mm2,%%mm0\n\t"
+        "paddd %%mm3,%%mm0\n\t"
+        "movq  %%mm0,%%mm1\n\t"
+        "psrlq $32,%%mm1\n\t"
+        "paddd %%mm1,%%mm0\n\t"
+        "psrad $13,%%mm0\n\t"
+        "packssdw %%mm0,%%mm0\n\t"
+        "movd %%mm0,%%eax\n\t"
+	"movw %%ax, (%%edi)\n\t"
+        "leal 32(%%esi),%%esi\n\t"
+        "leal 64(%%edx),%%edx\n\t"
+        "leal 4(%%edi),%%edi\n\t"               
+	
+".next_loop:\n\t"
+        "subl $64,%%esi\n\t"
+        "movl $7,%%ecx\n\t"
+".align 16\n\t"
+".L4:\n\t"
+        "movq  (%%edx),%%mm0\n\t"
+        "movq  64(%%edx),%%mm4\n\t"
+        "pmaddwd (%%esi),%%mm0\n\t"
+        "pmaddwd -32(%%esi),%%mm4\n\t"
+        "movq  8(%%edx),%%mm1\n\t"
+        "movq  72(%%edx),%%mm5\n\t"
+        "pmaddwd 8(%%esi),%%mm1\n\t"
+        "pmaddwd -24(%%esi),%%mm5\n\t"
+        "movq  16(%%edx),%%mm2\n\t"
+        "movq  80(%%edx),%%mm6\n\t"
+        "pmaddwd 16(%%esi),%%mm2\n\t"
+        "pmaddwd -16(%%esi),%%mm6\n\t"
+        "movq  24(%%edx),%%mm3\n\t"
+        "movq  88(%%edx),%%mm7\n\t"
+        "pmaddwd 24(%%esi),%%mm3\n\t"
+        "pmaddwd -8(%%esi),%%mm7\n\t"
+        "paddd %%mm1,%%mm0\n\t"
+        "paddd %%mm5,%%mm4\n\t"
+        "paddd %%mm2,%%mm0\n\t"
+        "paddd %%mm6,%%mm4\n\t"
+        "paddd %%mm3,%%mm0\n\t"
+        "paddd %%mm7,%%mm4\n\t"
+        "movq  %%mm0,%%mm1\n\t"
+        "movq  %%mm4,%%mm5\n\t"
+        "psrlq $32,%%mm1\n\t"
+        "psrlq $32,%%mm5\n\t"
+        "paddd %%mm0,%%mm1\n\t"
+        "paddd %%mm4,%%mm5\n\t"
+        "psrad $13,%%mm1\n\t"
+        "psrad $13,%%mm5\n\t"
+        "packssdw %%mm1,%%mm1\n\t"
+        "packssdw %%mm5,%%mm5\n\t"
+        "psubd %%mm0,%%mm0\n\t"
+        "psubd %%mm4,%%mm4\n\t"
+        "psubsw %%mm1,%%mm0\n\t"
+        "psubsw %%mm5,%%mm4\n\t"
+
+	"movq	(%%edi), %%mm1\n\t"
+	"punpckldq %%mm4, %%mm0\n\t"
+	"pand   one_null, %%mm1\n\t"
+	"pand   null_one, %%mm0\n\t"
+	"por    %%mm0, %%mm1\n\t"
+	"movq   %%mm1,(%%edi)\n\t"
+
+        "subl $64,%%esi\n\t"
+        "addl $128,%%edx\n\t"
+        "leal 8(%%edi),%%edi\n\t"
+        "decl %%ecx\n\t"
+	"jnz  .L4\n\t"
+
+        "movq  (%%edx),%%mm0\n\t"
+        "pmaddwd (%%esi),%%mm0\n\t"
+        "movq  8(%%edx),%%mm1\n\t"
+        "pmaddwd 8(%%esi),%%mm1\n\t"
+        "movq  16(%%edx),%%mm2\n\t"
+        "pmaddwd 16(%%esi),%%mm2\n\t"
+        "movq  24(%%edx),%%mm3\n\t"
+        "pmaddwd 24(%%esi),%%mm3\n\t"
+        "paddd %%mm1,%%mm0\n\t"
+        "paddd %%mm2,%%mm0\n\t"
+        "paddd %%mm3,%%mm0\n\t"
+        "movq  %%mm0,%%mm1\n\t"
+        "psrlq $32,%%mm1\n\t"
+        "paddd %%mm0,%%mm1\n\t"
+        "psrad $13,%%mm1\n\t"
+        "packssdw %%mm1,%%mm1\n\t"
+        "psubd %%mm0,%%mm0\n\t"
+        "psubsw %%mm1,%%mm0\n\t"
+        "movd %%mm0,%%eax\n\t"
+	"movw %%ax,(%%edi)\n\t"
+	"emms\n\t"
+        :
+	:"m"(bandPtr),"m"(channel),"m"(samples),"m"(buffs),"m"(bo)
+	:"memory","%ebp","%edi","%esi","%ebx");
+}
--- a/mp3lib/decode_MMX.s	Sun Jan 13 17:23:32 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,252 +0,0 @@
-# this code comes under GPL
-# This code was taken from http://www.mpg123.org
-# See ChangeLog of mpg123-0.59s-pre.1 for detail
-# Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
-#
-# Local ChangeLog:
-# - Partial loops unrolling and removing MOVW insn from loops
-#
-
-.data
-.align 8
-null_one: .long 0x0000ffff, 0x0000ffff
-one_null: .long 0xffff0000, 0xffff0000
-.globl costab_mmx
-costab_mmx:
-	.long 1056974725
-	.long 1057056395
-	.long 1057223771
-	.long 1057485416
-	.long 1057855544
-	.long 1058356026
-	.long 1059019886
-	.long 1059897405
-	.long 1061067246
-	.long 1062657950
-	.long 1064892987
-	.long 1066774581
-	.long 1069414683
-	.long 1073984175
-	.long 1079645762
-	.long 1092815430
-	.long 1057005197
-	.long 1057342072
-	.long 1058087743
-	.long 1059427869
-	.long 1061799040
-	.long 1065862217
-	.long 1071413542
-	.long 1084439708
-	.long 1057128951
-	.long 1058664893
-	.long 1063675095
-	.long 1076102863
-	.long 1057655764
-	.long 1067924853
-	.long 1060439283
-
-.text
-
-.globl synth_1to1_MMX_s
-//
-// void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples,
-//                       short *buffs, int *bo);
-//
-synth_1to1_MMX_s:
-        pushl %ebp
-        pushl %edi
-        pushl %esi
-        pushl %ebx
-        movl 24(%esp),%ecx
-        movl 28(%esp),%edi
-        movl $15,%ebx
-        movl 36(%esp),%edx
-        leal (%edi,%ecx,2),%edi
-	decl %ecx
-        movl 32(%esp),%esi
-        movl (%edx),%eax
-        jecxz .L1
-        decl %eax
-        andl %ebx,%eax
-        leal 1088(%esi),%esi
-        movl %eax,(%edx)
-.L1:
-        leal (%esi,%eax,2),%edx
-        movl %eax,%ebp
-        incl %eax
-        pushl 20(%esp)
-        andl %ebx,%eax
-        leal 544(%esi,%eax,2),%ecx
-        incl %ebx
-	testl $1, %eax
-	jnz .L2
-        xchgl %edx,%ecx
-	incl %ebp
-        leal 544(%esi),%esi
-.L2: 
-	emms
-        pushl %edx
-        pushl %ecx
-        call *dct64_MMX_func
-	leal 1(%ebx), %ecx
-        subl %ebp,%ebx
-	pushl %ecx
-	leal decwins(%ebx,%ebx,1), %edx
-	shrl $1, %ecx
-.align 16
-.L3: 
-        movq  (%edx),%mm0
-        movq  64(%edx),%mm4
-        pmaddwd (%esi),%mm0
-        pmaddwd 32(%esi),%mm4
-        movq  8(%edx),%mm1
-        movq  72(%edx),%mm5
-        pmaddwd 8(%esi),%mm1
-        pmaddwd 40(%esi),%mm5
-        movq  16(%edx),%mm2
-        movq  80(%edx),%mm6
-        pmaddwd 16(%esi),%mm2
-        pmaddwd 48(%esi),%mm6
-        movq  24(%edx),%mm3
-        movq  88(%edx),%mm7
-        pmaddwd 24(%esi),%mm3
-        pmaddwd 56(%esi),%mm7
-        paddd %mm1,%mm0
-        paddd %mm5,%mm4
-        paddd %mm2,%mm0
-        paddd %mm6,%mm4
-        paddd %mm3,%mm0
-        paddd %mm7,%mm4
-        movq  %mm0,%mm1
-        movq  %mm4,%mm5
-        psrlq $32,%mm1
-        psrlq $32,%mm5
-        paddd %mm1,%mm0
-        paddd %mm5,%mm4
-        psrad $13,%mm0
-        psrad $13,%mm4
-        packssdw %mm0,%mm0
-        packssdw %mm4,%mm4
-
-	movq	(%edi), %mm1
-	punpckldq %mm4, %mm0
-	pand   one_null, %mm1
-	pand   null_one, %mm0
-	por    %mm0, %mm1
-	movq   %mm1,(%edi)
-
-        leal 64(%esi),%esi
-        leal 128(%edx),%edx
-        leal 8(%edi),%edi                
-
-	decl %ecx
-        jnz  .L3
-
-	popl %ecx
-	andl $1, %ecx
-	jecxz .next_loop
-
-        movq  (%edx),%mm0
-        pmaddwd (%esi),%mm0
-        movq  8(%edx),%mm1
-        pmaddwd 8(%esi),%mm1
-        movq  16(%edx),%mm2
-        pmaddwd 16(%esi),%mm2
-        movq  24(%edx),%mm3
-        pmaddwd 24(%esi),%mm3
-        paddd %mm1,%mm0
-        paddd %mm2,%mm0
-        paddd %mm3,%mm0
-        movq  %mm0,%mm1
-        psrlq $32,%mm1
-        paddd %mm1,%mm0
-        psrad $13,%mm0
-        packssdw %mm0,%mm0
-        movd %mm0,%eax
-	movw %ax, (%edi)
-        leal 32(%esi),%esi
-        leal 64(%edx),%edx
-        leal 4(%edi),%edi                
-	
-.next_loop:
-        subl $64,%esi                    
-        movl $7,%ecx
-.align 16
-.L4: 
-        movq  (%edx),%mm0
-        movq  64(%edx),%mm4
-        pmaddwd (%esi),%mm0
-        pmaddwd -32(%esi),%mm4
-        movq  8(%edx),%mm1
-        movq  72(%edx),%mm5
-        pmaddwd 8(%esi),%mm1
-        pmaddwd -24(%esi),%mm5
-        movq  16(%edx),%mm2
-        movq  80(%edx),%mm6
-        pmaddwd 16(%esi),%mm2
-        pmaddwd -16(%esi),%mm6
-        movq  24(%edx),%mm3
-        movq  88(%edx),%mm7
-        pmaddwd 24(%esi),%mm3
-        pmaddwd -8(%esi),%mm7
-        paddd %mm1,%mm0
-        paddd %mm5,%mm4
-        paddd %mm2,%mm0
-        paddd %mm6,%mm4
-        paddd %mm3,%mm0
-        paddd %mm7,%mm4
-        movq  %mm0,%mm1
-        movq  %mm4,%mm5
-        psrlq $32,%mm1
-        psrlq $32,%mm5
-        paddd %mm0,%mm1
-        paddd %mm4,%mm5
-        psrad $13,%mm1
-        psrad $13,%mm5
-        packssdw %mm1,%mm1
-        packssdw %mm5,%mm5
-        psubd %mm0,%mm0
-        psubd %mm4,%mm4
-        psubsw %mm1,%mm0
-        psubsw %mm5,%mm4
-
-	movq	(%edi), %mm1
-	punpckldq %mm4, %mm0
-	pand   one_null, %mm1
-	pand   null_one, %mm0
-	por    %mm0, %mm1
-	movq   %mm1,(%edi)
-
-        subl $64,%esi
-        addl $128,%edx
-        leal 8(%edi),%edi                
-        decl %ecx
-	jnz  .L4
-
-        movq  (%edx),%mm0
-        pmaddwd (%esi),%mm0
-        movq  8(%edx),%mm1
-        pmaddwd 8(%esi),%mm1
-        movq  16(%edx),%mm2
-        pmaddwd 16(%esi),%mm2
-        movq  24(%edx),%mm3
-        pmaddwd 24(%esi),%mm3
-        paddd %mm1,%mm0
-        paddd %mm2,%mm0
-        paddd %mm3,%mm0
-        movq  %mm0,%mm1
-        psrlq $32,%mm1
-        paddd %mm0,%mm1
-        psrad $13,%mm1
-        packssdw %mm1,%mm1
-        psubd %mm0,%mm0
-        psubsw %mm1,%mm0
-        movd %mm0,%eax
-	movw %ax,(%edi)
-
-	emms
-        popl %ebx
-        popl %esi
-        popl %edi
-        popl %ebp
-        ret