Mercurial > mplayer.hg
diff mp3lib/decode_MMX.s @ 1245:03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
author | nick |
---|---|
date | Fri, 29 Jun 2001 17:55:35 +0000 |
parents | |
children | 9bf97b404134 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/mp3lib/decode_MMX.s Fri Jun 29 17:55:35 2001 +0000 @@ -0,0 +1,117 @@ +# this code comes under GPL +# This code was taken from http://www.mpg123.org +# See ChangeLog of mpg123-0.59s-pre.1 for detail +# Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> +# +# TODO: Partial loops unrolling and removing MOVW insn. +# + +.text + +.globl synth_1to1_MMX_s + +synth_1to1_MMX_s: + pushl %ebp + pushl %edi + pushl %esi + pushl %ebx + movl 24(%esp),%ecx + movl 28(%esp),%edi + movl $15,%ebx + movl 36(%esp),%edx + leal (%edi,%ecx,2),%edi + decl %ecx + movl 32(%esp),%esi + movl (%edx),%eax + jecxz .L1 + decl %eax + andl %ebx,%eax + leal 1088(%esi),%esi + movl %eax,(%edx) +.L1: + leal (%esi,%eax,2),%edx + movl %eax,%ebp + incl %eax + pushl 20(%esp) + andl %ebx,%eax + leal 544(%esi,%eax,2),%ecx + incl %ebx + testl $1, %eax + jnz .L2 + xchgl %edx,%ecx + incl %ebp + leal 544(%esi),%esi +.L2: + emms + pushl %edx + pushl %ecx + call *dct64_MMX_func + addl $12,%esp + leal 1(%ebx), %ecx + subl %ebp,%ebx + + leal decwins(%ebx,%ebx,1), %edx +.L3: + movq (%edx),%mm0 + pmaddwd (%esi),%mm0 + movq 8(%edx),%mm1 + pmaddwd 8(%esi),%mm1 + movq 16(%edx),%mm2 + pmaddwd 16(%esi),%mm2 + movq 24(%edx),%mm3 + pmaddwd 24(%esi),%mm3 + paddd %mm1,%mm0 + paddd %mm2,%mm0 + paddd %mm3,%mm0 + movq %mm0,%mm1 + psrlq $32,%mm1 + paddd %mm1,%mm0 + psrad $13,%mm0 + packssdw %mm0,%mm0 + movd %mm0,%eax + movw %ax, (%edi) + + leal 32(%esi),%esi + leal 64(%edx),%edx + leal 4(%edi),%edi + decl %ecx + jnz .L3 + + + subl $64,%esi + movl $15,%ecx +.L4: + movq (%edx),%mm0 + pmaddwd (%esi),%mm0 + movq 8(%edx),%mm1 + pmaddwd 8(%esi),%mm1 + movq 16(%edx),%mm2 + pmaddwd 16(%esi),%mm2 + movq 24(%edx),%mm3 + pmaddwd 24(%esi),%mm3 + paddd %mm1,%mm0 + paddd %mm2,%mm0 + paddd %mm3,%mm0 + movq %mm0,%mm1 + psrlq $32,%mm1 + paddd %mm0,%mm1 + psrad $13,%mm1 + packssdw %mm1,%mm1 + psubd %mm0,%mm0 + psubsw %mm1,%mm0 + movd %mm0,%eax + movw %ax,(%edi) + + subl $32,%esi + addl $64,%edx + leal 4(%edi),%edi + decl %ecx + jnz .L4 + emms + popl %ebx + popl %esi + popl %edi + popl %ebp + ret + +