Mercurial > mplayer.hg
changeset 4142:5e8231233418
S->C
author | nick |
---|---|
date | Sun, 13 Jan 2002 18:26:58 +0000 |
parents | ee1687319436 |
children | d416ca893952 |
files | mp3lib/Makefile mp3lib/decode_MMX.c mp3lib/decode_MMX.s |
diffstat | 3 files changed, 245 insertions(+), 253 deletions(-) [+] |
line wrap: on
line diff
--- a/mp3lib/Makefile Sun Jan 13 17:23:32 2002 +0000 +++ b/mp3lib/Makefile Sun Jan 13 18:26:58 2002 +0000 @@ -12,7 +12,7 @@ SRCS += d_cpu.s decode_i586.s OBJS += d_cpu.o decode_i586.o ifeq ($(TARGET_MMX),yes) -SRCS += decode_MMX.s dct64_MMX.s tabinit_MMX.c +SRCS += decode_MMX.c dct64_MMX.s tabinit_MMX.c OBJS += decode_MMX.o dct64_MMX.o tabinit_MMX.o endif #ifeq ($(TARGET_SSE),yes)
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mp3lib/decode_MMX.c Sun Jan 13 18:26:58 2002 +0000 @@ -0,0 +1,244 @@ +/* + * this code comes under GPL + * This code was taken from http://www.mpg123.org + * See ChangeLog of mpg123-0.59s-pre.1 for detail + * Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> + * + * Local ChangeLog: + * - Partial loops unrolling and removing MOVW insn from loops +*/ +#define real float /* ugly - but only way */ + +static unsigned long long __attribute__((aligned(8))) null_one = 0x0000ffff0000ffffULL; +static unsigned long long __attribute__((aligned(8))) one_null = 0xffff0000ffff0000ULL; +unsigned long __attribute__((aligned(8))) costab_mmx[] = +{ + 1056974725, + 1057056395, + 1057223771, + 1057485416, + 1057855544, + 1058356026, + 1059019886, + 1059897405, + 1061067246, + 1062657950, + 1064892987, + 1066774581, + 1069414683, + 1073984175, + 1079645762, + 1092815430, + 1057005197, + 1057342072, + 1058087743, + 1059427869, + 1061799040, + 1065862217, + 1071413542, + 1084439708, + 1057128951, + 1058664893, + 1063675095, + 1076102863, + 1057655764, + 1067924853, + 1060439283, +}; + +void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples, + short *buffs, int *bo) +{ + +__asm __volatile( + "movl %1,%%ecx\n\t" + "movl %2,%%edi\n\t" + "movl $15,%%ebx\n\t" + "movl %4,%%edx\n\t" + "leal (%%edi,%%ecx,2),%%edi\n\t" + "decl %%ecx\n\t" + "movl %3,%%esi\n\t" + "movl (%%edx),%%eax\n\t" + "jecxz .L1\n\t" + "decl %%eax\n\t" + "andl %%ebx,%%eax\n\t" + "leal 1088(%%esi),%%esi\n\t" + "movl %%eax,(%%edx)\n\t" +".L1:\n\t" + "leal (%%esi,%%eax,2),%%edx\n\t" + "movl %%eax,%%ebp\n\t" + "incl %%eax\n\t" + "pushl %0\n\t" + "andl %%ebx,%%eax\n\t" + "leal 544(%%esi,%%eax,2),%%ecx\n\t" + "incl %%ebx\n\t" + "testl $1, %%eax\n\t" + "jnz .L2\n\t" + "xchgl %%edx,%%ecx\n\t" + "incl %%ebp\n\t" + "leal 544(%%esi),%%esi\n\t" +".L2:\n\t" + "emms\n\t" + "pushl %%edx\n\t" + "pushl %%ecx\n\t" + "call *dct64_MMX_func\n\t" + "leal 1(%%ebx), %%ecx\n\t" + "subl %%ebp,%%ebx\n\t" + "pushl %%ecx\n\t" + "leal decwins(%%ebx,%%ebx,1), %%edx\n\t" + "shrl $1, %%ecx\n\t" +".align 16\n\t" +".L3:\n\t" + "movq (%%edx),%%mm0\n\t" + "movq 64(%%edx),%%mm4\n\t" + "pmaddwd (%%esi),%%mm0\n\t" + "pmaddwd 32(%%esi),%%mm4\n\t" + "movq 8(%%edx),%%mm1\n\t" + "movq 72(%%edx),%%mm5\n\t" + "pmaddwd 8(%%esi),%%mm1\n\t" + "pmaddwd 40(%%esi),%%mm5\n\t" + "movq 16(%%edx),%%mm2\n\t" + "movq 80(%%edx),%%mm6\n\t" + "pmaddwd 16(%%esi),%%mm2\n\t" + "pmaddwd 48(%%esi),%%mm6\n\t" + "movq 24(%%edx),%%mm3\n\t" + "movq 88(%%edx),%%mm7\n\t" + "pmaddwd 24(%%esi),%%mm3\n\t" + "pmaddwd 56(%%esi),%%mm7\n\t" + "paddd %%mm1,%%mm0\n\t" + "paddd %%mm5,%%mm4\n\t" + "paddd %%mm2,%%mm0\n\t" + "paddd %%mm6,%%mm4\n\t" + "paddd %%mm3,%%mm0\n\t" + "paddd %%mm7,%%mm4\n\t" + "movq %%mm0,%%mm1\n\t" + "movq %%mm4,%%mm5\n\t" + "psrlq $32,%%mm1\n\t" + "psrlq $32,%%mm5\n\t" + "paddd %%mm1,%%mm0\n\t" + "paddd %%mm5,%%mm4\n\t" + "psrad $13,%%mm0\n\t" + "psrad $13,%%mm4\n\t" + "packssdw %%mm0,%%mm0\n\t" + "packssdw %%mm4,%%mm4\n\t" + + "movq (%%edi), %%mm1\n\t" + "punpckldq %%mm4, %%mm0\n\t" + "pand one_null, %%mm1\n\t" + "pand null_one, %%mm0\n\t" + "por %%mm0, %%mm1\n\t" + "movq %%mm1,(%%edi)\n\t" + + "leal 64(%%esi),%%esi\n\t" + "leal 128(%%edx),%%edx\n\t" + "leal 8(%%edi),%%edi\n\t" + + "decl %%ecx\n\t" + "jnz .L3\n\t" + + "popl %%ecx\n\t" + "andl $1, %%ecx\n\t" + "jecxz .next_loop\n\t" + + "movq (%%edx),%%mm0\n\t" + "pmaddwd (%%esi),%%mm0\n\t" + "movq 8(%%edx),%%mm1\n\t" + "pmaddwd 8(%%esi),%%mm1\n\t" + "movq 16(%%edx),%%mm2\n\t" + "pmaddwd 16(%%esi),%%mm2\n\t" + "movq 24(%%edx),%%mm3\n\t" + "pmaddwd 24(%%esi),%%mm3\n\t" + "paddd %%mm1,%%mm0\n\t" + "paddd %%mm2,%%mm0\n\t" + "paddd %%mm3,%%mm0\n\t" + "movq %%mm0,%%mm1\n\t" + "psrlq $32,%%mm1\n\t" + "paddd %%mm1,%%mm0\n\t" + "psrad $13,%%mm0\n\t" + "packssdw %%mm0,%%mm0\n\t" + "movd %%mm0,%%eax\n\t" + "movw %%ax, (%%edi)\n\t" + "leal 32(%%esi),%%esi\n\t" + "leal 64(%%edx),%%edx\n\t" + "leal 4(%%edi),%%edi\n\t" + +".next_loop:\n\t" + "subl $64,%%esi\n\t" + "movl $7,%%ecx\n\t" +".align 16\n\t" +".L4:\n\t" + "movq (%%edx),%%mm0\n\t" + "movq 64(%%edx),%%mm4\n\t" + "pmaddwd (%%esi),%%mm0\n\t" + "pmaddwd -32(%%esi),%%mm4\n\t" + "movq 8(%%edx),%%mm1\n\t" + "movq 72(%%edx),%%mm5\n\t" + "pmaddwd 8(%%esi),%%mm1\n\t" + "pmaddwd -24(%%esi),%%mm5\n\t" + "movq 16(%%edx),%%mm2\n\t" + "movq 80(%%edx),%%mm6\n\t" + "pmaddwd 16(%%esi),%%mm2\n\t" + "pmaddwd -16(%%esi),%%mm6\n\t" + "movq 24(%%edx),%%mm3\n\t" + "movq 88(%%edx),%%mm7\n\t" + "pmaddwd 24(%%esi),%%mm3\n\t" + "pmaddwd -8(%%esi),%%mm7\n\t" + "paddd %%mm1,%%mm0\n\t" + "paddd %%mm5,%%mm4\n\t" + "paddd %%mm2,%%mm0\n\t" + "paddd %%mm6,%%mm4\n\t" + "paddd %%mm3,%%mm0\n\t" + "paddd %%mm7,%%mm4\n\t" + "movq %%mm0,%%mm1\n\t" + "movq %%mm4,%%mm5\n\t" + "psrlq $32,%%mm1\n\t" + "psrlq $32,%%mm5\n\t" + "paddd %%mm0,%%mm1\n\t" + "paddd %%mm4,%%mm5\n\t" + "psrad $13,%%mm1\n\t" + "psrad $13,%%mm5\n\t" + "packssdw %%mm1,%%mm1\n\t" + "packssdw %%mm5,%%mm5\n\t" + "psubd %%mm0,%%mm0\n\t" + "psubd %%mm4,%%mm4\n\t" + "psubsw %%mm1,%%mm0\n\t" + "psubsw %%mm5,%%mm4\n\t" + + "movq (%%edi), %%mm1\n\t" + "punpckldq %%mm4, %%mm0\n\t" + "pand one_null, %%mm1\n\t" + "pand null_one, %%mm0\n\t" + "por %%mm0, %%mm1\n\t" + "movq %%mm1,(%%edi)\n\t" + + "subl $64,%%esi\n\t" + "addl $128,%%edx\n\t" + "leal 8(%%edi),%%edi\n\t" + "decl %%ecx\n\t" + "jnz .L4\n\t" + + "movq (%%edx),%%mm0\n\t" + "pmaddwd (%%esi),%%mm0\n\t" + "movq 8(%%edx),%%mm1\n\t" + "pmaddwd 8(%%esi),%%mm1\n\t" + "movq 16(%%edx),%%mm2\n\t" + "pmaddwd 16(%%esi),%%mm2\n\t" + "movq 24(%%edx),%%mm3\n\t" + "pmaddwd 24(%%esi),%%mm3\n\t" + "paddd %%mm1,%%mm0\n\t" + "paddd %%mm2,%%mm0\n\t" + "paddd %%mm3,%%mm0\n\t" + "movq %%mm0,%%mm1\n\t" + "psrlq $32,%%mm1\n\t" + "paddd %%mm0,%%mm1\n\t" + "psrad $13,%%mm1\n\t" + "packssdw %%mm1,%%mm1\n\t" + "psubd %%mm0,%%mm0\n\t" + "psubsw %%mm1,%%mm0\n\t" + "movd %%mm0,%%eax\n\t" + "movw %%ax,(%%edi)\n\t" + "emms\n\t" + : + :"m"(bandPtr),"m"(channel),"m"(samples),"m"(buffs),"m"(bo) + :"memory","%ebp","%edi","%esi","%ebx"); +}
--- a/mp3lib/decode_MMX.s Sun Jan 13 17:23:32 2002 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,252 +0,0 @@ -# this code comes under GPL -# This code was taken from http://www.mpg123.org -# See ChangeLog of mpg123-0.59s-pre.1 for detail -# Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> -# -# Local ChangeLog: -# - Partial loops unrolling and removing MOVW insn from loops -# - -.data -.align 8 -null_one: .long 0x0000ffff, 0x0000ffff -one_null: .long 0xffff0000, 0xffff0000 -.globl costab_mmx -costab_mmx: - .long 1056974725 - .long 1057056395 - .long 1057223771 - .long 1057485416 - .long 1057855544 - .long 1058356026 - .long 1059019886 - .long 1059897405 - .long 1061067246 - .long 1062657950 - .long 1064892987 - .long 1066774581 - .long 1069414683 - .long 1073984175 - .long 1079645762 - .long 1092815430 - .long 1057005197 - .long 1057342072 - .long 1058087743 - .long 1059427869 - .long 1061799040 - .long 1065862217 - .long 1071413542 - .long 1084439708 - .long 1057128951 - .long 1058664893 - .long 1063675095 - .long 1076102863 - .long 1057655764 - .long 1067924853 - .long 1060439283 - -.text - -.globl synth_1to1_MMX_s -// -// void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples, -// short *buffs, int *bo); -// -synth_1to1_MMX_s: - pushl %ebp - pushl %edi - pushl %esi - pushl %ebx - movl 24(%esp),%ecx - movl 28(%esp),%edi - movl $15,%ebx - movl 36(%esp),%edx - leal (%edi,%ecx,2),%edi - decl %ecx - movl 32(%esp),%esi - movl (%edx),%eax - jecxz .L1 - decl %eax - andl %ebx,%eax - leal 1088(%esi),%esi - movl %eax,(%edx) -.L1: - leal (%esi,%eax,2),%edx - movl %eax,%ebp - incl %eax - pushl 20(%esp) - andl %ebx,%eax - leal 544(%esi,%eax,2),%ecx - incl %ebx - testl $1, %eax - jnz .L2 - xchgl %edx,%ecx - incl %ebp - leal 544(%esi),%esi -.L2: - emms - pushl %edx - pushl %ecx - call *dct64_MMX_func - leal 1(%ebx), %ecx - subl %ebp,%ebx - pushl %ecx - leal decwins(%ebx,%ebx,1), %edx - shrl $1, %ecx -.align 16 -.L3: - movq (%edx),%mm0 - movq 64(%edx),%mm4 - pmaddwd (%esi),%mm0 - pmaddwd 32(%esi),%mm4 - movq 8(%edx),%mm1 - movq 72(%edx),%mm5 - pmaddwd 8(%esi),%mm1 - pmaddwd 40(%esi),%mm5 - movq 16(%edx),%mm2 - movq 80(%edx),%mm6 - pmaddwd 16(%esi),%mm2 - pmaddwd 48(%esi),%mm6 - movq 24(%edx),%mm3 - movq 88(%edx),%mm7 - pmaddwd 24(%esi),%mm3 - pmaddwd 56(%esi),%mm7 - paddd %mm1,%mm0 - paddd %mm5,%mm4 - paddd %mm2,%mm0 - paddd %mm6,%mm4 - paddd %mm3,%mm0 - paddd %mm7,%mm4 - movq %mm0,%mm1 - movq %mm4,%mm5 - psrlq $32,%mm1 - psrlq $32,%mm5 - paddd %mm1,%mm0 - paddd %mm5,%mm4 - psrad $13,%mm0 - psrad $13,%mm4 - packssdw %mm0,%mm0 - packssdw %mm4,%mm4 - - movq (%edi), %mm1 - punpckldq %mm4, %mm0 - pand one_null, %mm1 - pand null_one, %mm0 - por %mm0, %mm1 - movq %mm1,(%edi) - - leal 64(%esi),%esi - leal 128(%edx),%edx - leal 8(%edi),%edi - - decl %ecx - jnz .L3 - - popl %ecx - andl $1, %ecx - jecxz .next_loop - - movq (%edx),%mm0 - pmaddwd (%esi),%mm0 - movq 8(%edx),%mm1 - pmaddwd 8(%esi),%mm1 - movq 16(%edx),%mm2 - pmaddwd 16(%esi),%mm2 - movq 24(%edx),%mm3 - pmaddwd 24(%esi),%mm3 - paddd %mm1,%mm0 - paddd %mm2,%mm0 - paddd %mm3,%mm0 - movq %mm0,%mm1 - psrlq $32,%mm1 - paddd %mm1,%mm0 - psrad $13,%mm0 - packssdw %mm0,%mm0 - movd %mm0,%eax - movw %ax, (%edi) - leal 32(%esi),%esi - leal 64(%edx),%edx - leal 4(%edi),%edi - -.next_loop: - subl $64,%esi - movl $7,%ecx -.align 16 -.L4: - movq (%edx),%mm0 - movq 64(%edx),%mm4 - pmaddwd (%esi),%mm0 - pmaddwd -32(%esi),%mm4 - movq 8(%edx),%mm1 - movq 72(%edx),%mm5 - pmaddwd 8(%esi),%mm1 - pmaddwd -24(%esi),%mm5 - movq 16(%edx),%mm2 - movq 80(%edx),%mm6 - pmaddwd 16(%esi),%mm2 - pmaddwd -16(%esi),%mm6 - movq 24(%edx),%mm3 - movq 88(%edx),%mm7 - pmaddwd 24(%esi),%mm3 - pmaddwd -8(%esi),%mm7 - paddd %mm1,%mm0 - paddd %mm5,%mm4 - paddd %mm2,%mm0 - paddd %mm6,%mm4 - paddd %mm3,%mm0 - paddd %mm7,%mm4 - movq %mm0,%mm1 - movq %mm4,%mm5 - psrlq $32,%mm1 - psrlq $32,%mm5 - paddd %mm0,%mm1 - paddd %mm4,%mm5 - psrad $13,%mm1 - psrad $13,%mm5 - packssdw %mm1,%mm1 - packssdw %mm5,%mm5 - psubd %mm0,%mm0 - psubd %mm4,%mm4 - psubsw %mm1,%mm0 - psubsw %mm5,%mm4 - - movq (%edi), %mm1 - punpckldq %mm4, %mm0 - pand one_null, %mm1 - pand null_one, %mm0 - por %mm0, %mm1 - movq %mm1,(%edi) - - subl $64,%esi - addl $128,%edx - leal 8(%edi),%edi - decl %ecx - jnz .L4 - - movq (%edx),%mm0 - pmaddwd (%esi),%mm0 - movq 8(%edx),%mm1 - pmaddwd 8(%esi),%mm1 - movq 16(%edx),%mm2 - pmaddwd 16(%esi),%mm2 - movq 24(%edx),%mm3 - pmaddwd 24(%esi),%mm3 - paddd %mm1,%mm0 - paddd %mm2,%mm0 - paddd %mm3,%mm0 - movq %mm0,%mm1 - psrlq $32,%mm1 - paddd %mm0,%mm1 - psrad $13,%mm1 - packssdw %mm1,%mm1 - psubd %mm0,%mm0 - psubsw %mm1,%mm0 - movd %mm0,%eax - movw %ax,(%edi) - - emms - popl %ebx - popl %esi - popl %edi - popl %ebp - ret