changeset 4147:4bbdda22003d

S->C
author nick
date Mon, 14 Jan 2002 09:32:51 +0000
parents 925046ea34ec
children 3b29772a4fb2
files mp3lib/Makefile mp3lib/decode_i586.c mp3lib/decode_i586.s
diffstat 3 files changed, 308 insertions(+), 322 deletions(-) [+]
line wrap: on
line diff
--- a/mp3lib/Makefile	Mon Jan 14 06:44:30 2002 +0000
+++ b/mp3lib/Makefile	Mon Jan 14 09:32:51 2002 +0000
@@ -9,7 +9,7 @@
 endif
 CFLAGS  = $(OPTFLAGS) $(EXTRA_INC)
 ifeq ($(TARGET_ARCH_X86),yes)
-SRCS += d_cpu.s decode_i586.s
+SRCS += d_cpu.s decode_i586.c
 OBJS += d_cpu.o decode_i586.o
 ifeq ($(TARGET_MMX),yes)
 SRCS += decode_MMX.c dct64_MMX.s tabinit_MMX.c
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/mp3lib/decode_i586.c	Mon Jan 14 09:32:51 2002 +0000
@@ -0,0 +1,307 @@
+/*
+* mpg123_synth_1to1 works the same way as the c version of this
+* file.  only two types of changes have been made:
+* - reordered floating point instructions to
+*   prevent pipline stalls
+* - made WRITE_SAMPLE use integer instead of
+*   (slower) floating point
+* all kinds of x86 processors should benefit from these
+* modifications.
+*
+* useful sources of information on optimizing x86 code include:
+*
+*     Intel Architecture Optimization Manual
+*     http://www.intel.com/design/pentium/manuals/242816.htm
+*
+*     Cyrix 6x86 Instruction Set Summary
+*     ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
+*
+*     AMD-K5 Processor Software Development
+*     http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
+*
+* Stefan Bieschewski <stb@acm.org>
+*
+* $Id$
+*/
+#define real float /* ugly - but only way */
+
+static long buffs[1088];
+static long bo=1;
+
+int synth_1to1_pent(real *bandPtr, int channel, short *samples)
+{
+  real tmp[3];
+  register int retval;
+    __asm __volatile(
+"        movl %1,%%eax\n\t"/*bandPtr*/
+"        movl %3,%%esi\n\t"
+"        xorl %%edi,%%edi\n\t"
+"        movl bo,%%ebp\n\t"
+"        cmpl %%edi,%2\n\t"
+"        jne .L48\n\t"
+"        decl %%ebp\n\t"
+"        andl $15,%%ebp\n\t"
+"        movl %%ebp,bo\n\t"
+"        movl $buffs,%%ecx\n\t"
+"        jmp .L49\n\t"
+".L48:\n\t"
+"        addl $2,%%esi\n\t"
+"        movl $buffs+2176,%%ecx\n\t"
+".L49:\n\t"
+"        testl $1,%%ebp\n\t"
+"        je .L50\n\t"
+"        movl %%ecx,%%ebx\n\t"
+"        movl %%ebp,%4\n\t"
+"        pushl %%eax\n\t"
+"        movl 4+%4,%%edx\n\t"
+"        leal (%%ebx,%%edx,4),%%eax\n\t"
+"        pushl %%eax\n\t"
+"        movl 8+%4,%%eax\n\t"
+"        incl %%eax\n\t"
+"        andl $15,%%eax\n\t"
+"        leal 1088(,%%eax,4),%%eax\n\t"
+"        addl %%ebx,%%eax\n\t"
+"        jmp .L74\n\t"
+".L50:\n\t"
+"        leal 1088(%%ecx),%%ebx\n\t"
+"        leal 1(%%ebp),%%edx\n\t"
+"        movl %%edx,%4\n\t"
+"        pushl %%eax\n\t"
+"        leal 1092(%%ecx,%%ebp,4),%%eax\n\t"
+"        pushl %%eax\n\t"
+"        leal (%%ecx,%%ebp,4),%%eax\n\t"
+".L74:\n\t"
+"        pushl %%eax\n\t"
+"        call dct64\n\t"
+"        addl $12,%%esp\n\t"
+"        movl %4,%%edx\n\t"
+"        leal 0(,%%edx,4),%%edx\n\t"
+"        movl $decwin+64,%%eax\n\t"
+"        movl %%eax,%%ecx\n\t"
+"        subl %%edx,%%ecx\n\t"
+"        movl $16,%%ebp\n\t"
+".L55:\n\t"
+"        flds (%%ecx)\n\t"
+"        fmuls (%%ebx)\n\t"
+"        flds 4(%%ecx)\n\t"
+"        fmuls 4(%%ebx)\n\t"
+"        fxch %%st(1)\n\t"
+"        flds 8(%%ecx)\n\t"
+"        fmuls 8(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds 12(%%ecx)\n\t"
+"        fmuls 12(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        flds 16(%%ecx)\n\t"
+"        fmuls 16(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds 20(%%ecx)\n\t"
+"        fmuls 20(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        flds 24(%%ecx)\n\t"
+"        fmuls 24(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds 28(%%ecx)\n\t"
+"        fmuls 28(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        flds 32(%%ecx)\n\t"
+"        fmuls 32(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds 36(%%ecx)\n\t"
+"        fmuls 36(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        flds 40(%%ecx)\n\t"
+"        fmuls 40(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds 44(%%ecx)\n\t"
+"        fmuls 44(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        flds 48(%%ecx)\n\t"
+"        fmuls 48(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds 52(%%ecx)\n\t"
+"        fmuls 52(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        flds 56(%%ecx)\n\t"
+"        fmuls 56(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds 60(%%ecx)\n\t"
+"        fmuls 60(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        subl $4,%%esp\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        fxch %%st(1)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        fistpl (%%esp)\n\t"
+"        popl %%eax\n\t"
+"        cmpl $32767,%%eax\n\t"
+"        jg 1f\n\t"
+"        cmpl $-32768,%%eax\n\t"
+"        jl 2f\n\t"
+"        movw %%ax,(%%esi)\n\t"
+"        jmp 4f\n\t"
+"1:      movw $32767,(%%esi)\n\t"
+"        jmp 3f\n\t"
+"2:      movw $-32768,(%%esi)\n\t"
+"3:      incl %%edi\n\t"
+"4:\n\t"
+".L54:\n\t"
+"        addl $64,%%ebx\n\t"
+"        subl $-128,%%ecx\n\t"
+"        addl $4,%%esi\n\t"
+"        decl %%ebp\n\t"
+"        jnz .L55\n\t"
+"        flds (%%ecx)\n\t"
+"        fmuls (%%ebx)\n\t"
+"        flds 8(%%ecx)\n\t"
+"        fmuls 8(%%ebx)\n\t"
+"        flds 16(%%ecx)\n\t"
+"        fmuls 16(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        flds 24(%%ecx)\n\t"
+"        fmuls 24(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        flds 32(%%ecx)\n\t"
+"        fmuls 32(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        flds 40(%%ecx)\n\t"
+"        fmuls 40(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        flds 48(%%ecx)\n\t"
+"        fmuls 48(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        flds 56(%%ecx)\n\t"
+"        fmuls 56(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        subl $4,%%esp\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        fxch %%st(1)\n\t"
+"        faddp %%st,%%st(1)\n\t"
+"        fistpl (%%esp)\n\t"
+"        popl %%eax\n\t"
+"        cmpl $32767,%%eax\n\t"
+"        jg 1f\n\t"
+"        cmpl $-32768,%%eax\n\t"
+"        jl 2f\n\t"
+"        movw %%ax,(%%esi)\n\t"
+"        jmp 4f\n\t"
+"1:      movw $32767,(%%esi)\n\t"
+"        jmp 3f\n\t"
+"2:      movw $-32768,(%%esi)\n\t"
+"3:      incl %%edi\n\t"
+"4:\n\t"
+".L62:\n\t"
+"        addl $-64,%%ebx\n\t"
+"        addl $4,%%esi\n\t"
+"        movl %4,%%edx\n\t"
+"        leal -128(%%ecx,%%edx,8),%%ecx\n\t"
+"        movl $15,%%ebp\n\t"
+".L68:\n\t"
+"        flds -4(%%ecx)\n\t"
+"        fchs\n\t"
+"        fmuls (%%ebx)\n\t"
+"        flds -8(%%ecx)\n\t"
+"        fmuls 4(%%ebx)\n\t"
+"        fxch %%st(1)\n\t"
+"        flds -12(%%ecx)\n\t"
+"        fmuls 8(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -16(%%ecx)\n\t"
+"        fmuls 12(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -20(%%ecx)\n\t"
+"        fmuls 16(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -24(%%ecx)\n\t"
+"        fmuls 20(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -28(%%ecx)\n\t"
+"        fmuls 24(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -32(%%ecx)\n\t"
+"        fmuls 28(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -36(%%ecx)\n\t"
+"        fmuls 32(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -40(%%ecx)\n\t"
+"        fmuls 36(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -44(%%ecx)\n\t"
+"        fmuls 40(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -48(%%ecx)\n\t"
+"        fmuls 44(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -52(%%ecx)\n\t"
+"        fmuls 48(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -56(%%ecx)\n\t"
+"        fmuls 52(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds -60(%%ecx)\n\t"
+"        fmuls 56(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        flds (%%ecx)\n\t"
+"        fmuls 60(%%ebx)\n\t"
+"        fxch %%st(2)\n\t"
+"        subl $4,%%esp\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        fxch %%st(1)\n\t"
+"        fsubrp %%st,%%st(1)\n\t"
+"        fistpl (%%esp)\n\t"
+"        popl %%eax\n\t"
+"        cmpl $32767,%%eax\n\t"
+"        jg 1f\n\t"
+"        cmpl $-32768,%%eax\n\t"
+"        jl 2f\n\t"
+"        movw %%ax,(%%esi)\n\t"
+"        jmp 4f\n\t"
+"1:      movw $32767,(%%esi)\n\t"
+"        jmp 3f\n\t"
+"2:      movw $-32768,(%%esi)\n\t"
+"3:      incl %%edi\n\t"
+"4:\n\t"
+".L67:\n\t"
+"        addl $-64,%%ebx\n\t"
+"        addl $-128,%%ecx\n\t"
+"        addl $4,%%esi\n\t"
+"        decl %%ebp\n\t"
+"        jnz .L68\n\t"
+"        movl %%edi,%%eax\n\t"
+	:"=a"(retval)
+	:"m"(bandPtr),"m"(channel),"m"(samples),"m"(tmp[0])
+	:"memory","%ebp","%edi","%esi","%ebx");
+  return retval;
+}
--- a/mp3lib/decode_i586.s	Mon Jan 14 06:44:30 2002 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,321 +0,0 @@
-/
-/ mpg123_synth_1to1 works the same way as the c version of this
-/ file.  only two types of changes have been made:
-/ - reordered floating point instructions to
-/   prevent pipline stalls
-/ - made WRITE_SAMPLE use integer instead of
-/   (slower) floating point
-/ all kinds of x86 processors should benefit from these
-/ modifications.
-/
-/ useful sources of information on optimizing x86 code include:
-/
-/     Intel Architecture Optimization Manual
-/     http://www.intel.com/design/pentium/manuals/242816.htm
-/
-/     Cyrix 6x86 Instruction Set Summary
-/     ftp://ftp.cyrix.com/6x86/6x-dbch6.pdf
-/
-/     AMD-K5 Processor Software Development
-/     http://www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf
-/
-/ Stefan Bieschewski <stb@acm.org>
-/
-/ $Id$
-/
-.bss
-        .comm   buffs,4352,4
-.data
-        .align 4
-bo:
-        .long 1
-.section .rodata
-        .align 8
-.LC0:
-        .long 0x0,0x40dfffc0
-        .align 8
-.LC1:
-        .long 0x0,0xc0e00000
-        .align 8
-.text
-.globl synth_1to1_pent
-synth_1to1_pent:
-        subl $12,%esp
-        pushl %ebp
-        pushl %edi
-        pushl %esi
-        pushl %ebx
-        movl 32(%esp),%eax
-        movl 40(%esp),%esi
-        xorl %edi,%edi
-        movl bo,%ebp
-        cmpl %edi,36(%esp)
-        jne .L48
-        decl %ebp
-        andl $15,%ebp
-        movl %ebp,bo
-        movl $buffs,%ecx
-        jmp .L49
-.L48:
-        addl $2,%esi
-        movl $buffs+2176,%ecx
-.L49:
-        testl $1,%ebp
-        je .L50
-        movl %ecx,%ebx
-        movl %ebp,16(%esp)
-        pushl %eax
-        movl 20(%esp),%edx
-        leal (%ebx,%edx,4),%eax
-        pushl %eax
-        movl 24(%esp),%eax
-        incl %eax
-        andl $15,%eax
-        leal 1088(,%eax,4),%eax
-        addl %ebx,%eax
-        jmp .L74
-.L50:
-        leal 1088(%ecx),%ebx
-        leal 1(%ebp),%edx
-        movl %edx,16(%esp)
-        pushl %eax
-        leal 1092(%ecx,%ebp,4),%eax
-        pushl %eax
-        leal (%ecx,%ebp,4),%eax
-.L74:
-        pushl %eax
-        call dct64
-        addl $12,%esp
-        movl 16(%esp),%edx
-        leal 0(,%edx,4),%edx
-        movl $decwin+64,%eax
-        movl %eax,%ecx
-        subl %edx,%ecx
-        movl $16,%ebp
-.L55:
-        flds (%ecx)
-        fmuls (%ebx)
-        flds 4(%ecx)
-        fmuls 4(%ebx)
-        fxch %st(1)
-        flds 8(%ecx)
-        fmuls 8(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds 12(%ecx)
-        fmuls 12(%ebx)
-        fxch %st(2)
-        faddp %st,%st(1)
-        flds 16(%ecx)
-        fmuls 16(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds 20(%ecx)
-        fmuls 20(%ebx)
-        fxch %st(2)
-        faddp %st,%st(1)
-        flds 24(%ecx)
-        fmuls 24(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds 28(%ecx)
-        fmuls 28(%ebx)
-        fxch %st(2)
-        faddp %st,%st(1)
-        flds 32(%ecx)
-        fmuls 32(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds 36(%ecx)
-        fmuls 36(%ebx)
-        fxch %st(2)
-        faddp %st,%st(1)
-        flds 40(%ecx)
-        fmuls 40(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds 44(%ecx)
-        fmuls 44(%ebx)
-        fxch %st(2)
-        faddp %st,%st(1)
-        flds 48(%ecx)
-        fmuls 48(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds 52(%ecx)
-        fmuls 52(%ebx)
-        fxch %st(2)
-        faddp %st,%st(1)
-        flds 56(%ecx)
-        fmuls 56(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds 60(%ecx)
-        fmuls 60(%ebx)
-        fxch %st(2)
-        subl $4,%esp
-        faddp %st,%st(1)
-        fxch %st(1)
-        fsubrp %st,%st(1)
-        fistpl (%esp)
-        popl %eax
-        cmpl $32767,%eax
-        jg 1f
-        cmpl $-32768,%eax
-        jl 2f
-        movw %ax,(%esi)
-        jmp 4f
-1:      movw $32767,(%esi)
-        jmp 3f
-2:      movw $-32768,(%esi)
-3:      incl %edi
-4:
-.L54:
-        addl $64,%ebx
-        subl $-128,%ecx
-        addl $4,%esi
-        decl %ebp
-        jnz .L55
-        flds (%ecx)
-        fmuls (%ebx)
-        flds 8(%ecx)
-        fmuls 8(%ebx)
-        flds 16(%ecx)
-        fmuls 16(%ebx)
-        fxch %st(2)
-        faddp %st,%st(1)
-        flds 24(%ecx)
-        fmuls 24(%ebx)
-        fxch %st(2)
-        faddp %st,%st(1)
-        flds 32(%ecx)
-        fmuls 32(%ebx)
-        fxch %st(2)
-        faddp %st,%st(1)
-        flds 40(%ecx)
-        fmuls 40(%ebx)
-        fxch %st(2)
-        faddp %st,%st(1)
-        flds 48(%ecx)
-        fmuls 48(%ebx)
-        fxch %st(2)
-        faddp %st,%st(1)
-        flds 56(%ecx)
-        fmuls 56(%ebx)
-        fxch %st(2)
-        subl $4,%esp
-        faddp %st,%st(1)
-        fxch %st(1)
-        faddp %st,%st(1)
-        fistpl (%esp)
-        popl %eax
-        cmpl $32767,%eax
-        jg 1f
-        cmpl $-32768,%eax
-        jl 2f
-        movw %ax,(%esi)
-        jmp 4f
-1:      movw $32767,(%esi)
-        jmp 3f
-2:      movw $-32768,(%esi)
-3:      incl %edi
-4:
-.L62:
-        addl $-64,%ebx
-        addl $4,%esi
-        movl 16(%esp),%edx
-        leal -128(%ecx,%edx,8),%ecx
-        movl $15,%ebp
-.L68:
-        flds -4(%ecx)
-        fchs
-        fmuls (%ebx)
-        flds -8(%ecx)
-        fmuls 4(%ebx)
-        fxch %st(1)
-        flds -12(%ecx)
-        fmuls 8(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -16(%ecx)
-        fmuls 12(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -20(%ecx)
-        fmuls 16(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -24(%ecx)
-        fmuls 20(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -28(%ecx)
-        fmuls 24(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -32(%ecx)
-        fmuls 28(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -36(%ecx)
-        fmuls 32(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -40(%ecx)
-        fmuls 36(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -44(%ecx)
-        fmuls 40(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -48(%ecx)
-        fmuls 44(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -52(%ecx)
-        fmuls 48(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -56(%ecx)
-        fmuls 52(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds -60(%ecx)
-        fmuls 56(%ebx)
-        fxch %st(2)
-        fsubrp %st,%st(1)
-        flds (%ecx)
-        fmuls 60(%ebx)
-        fxch %st(2)
-        subl $4,%esp
-        fsubrp %st,%st(1)
-        fxch %st(1)
-        fsubrp %st,%st(1)
-        fistpl (%esp)
-        popl %eax
-        cmpl $32767,%eax
-        jg 1f
-        cmpl $-32768,%eax
-        jl 2f
-        movw %ax,(%esi)
-        jmp 4f
-1:      movw $32767,(%esi)
-        jmp 3f
-2:      movw $-32768,(%esi)
-3:      incl %edi
-4:
-.L67:
-        addl $-64,%ebx
-        addl $-128,%ecx
-        addl $4,%esi
-        decl %ebp
-        jnz .L68
-        movl %edi,%eax
-        popl %ebx
-        popl %esi
-        popl %edi
-        popl %ebp
-        addl $12,%esp
-        ret
-