Mercurial > audlegacy
changeset 169:cf71ec544543 trunk
[svn] Remove assembly optimizations, we want to play nicely with any system.
author | nenolod |
---|---|
date | Sun, 13 Nov 2005 12:24:31 -0800 |
parents | 32653058b307 |
children | 40a60fe13ed6 |
files | Plugins/Input/mpg123/Makefile.am Plugins/Input/mpg123/dct36_3dnow.s Plugins/Input/mpg123/dct64_3dnow.s Plugins/Input/mpg123/dct64_MMX.s Plugins/Input/mpg123/decode_3dnow.s Plugins/Input/mpg123/decode_MMX.s Plugins/Input/mpg123/decode_i586.s Plugins/Input/mpg123/getcpuflags.s |
diffstat | 8 files changed, 2 insertions(+), 2831 deletions(-) [+] |
line wrap: on
line diff
--- a/Plugins/Input/mpg123/Makefile.am Sun Nov 13 11:37:33 2005 -0800 +++ b/Plugins/Input/mpg123/Makefile.am Sun Nov 13 12:24:31 2005 -0800 @@ -17,36 +17,16 @@ if ARCH_X86 -if USE_X86ASM - -if USE_SIMD - -EXTRA_SRC = decode_i386.c dct64_i386.c decode_i586.s \ - decode_3dnow.s dct64_3dnow.s dct36_3dnow.s getcpuflags.s \ - dct64_MMX.s decode_MMX.s - -else - -EXTRA_SRC = decode_i386.c dct64_i386.c decode_i586.s - -endif # USE_SIMD - -else - EXTRA_SRC = decode_i386.c dct64_i386.c -endif # SE_X86_ASM - else EXTRA_SRC = decode.c dct64.c endif # ARCH_X86 -EXTRA_DIST = decode.c decode_i386.c decode_i586.s decode_3dnow.s dct64_i386.c \ - getbits.c dct64.c dct64_3dnow.s dct36_3dnow.s getcpuflags.s \ - dct64_MMX.s decode_MMX.s \ - mp3.xpm +EXTRA_DIST = decode.c decode_i386.c dct64_i386.c \ + getbits.c dct64.c mp3.xpm libmpg123_la_SOURCES = $(COMMON_SRC) $(EXTRA_SRC) libmpg123_la_LDFLAGS = $(PLUGIN_LDFLAGS)
--- a/Plugins/Input/mpg123/dct36_3dnow.s Sun Nov 13 11:37:33 2005 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,499 +0,0 @@ -/ -/ dct36_3dnow.s - 3DNow! optimized dct36() -/ -/ This code based 'dct36_3dnow.s' by Syuuhei Kashiyama -/ <squash@mb.kcom.ne.jp>,only two types of changes have been made: -/ -/ - remove PREFETCH instruction for speedup -/ - change function name for support 3DNow! automatic detect -/ -/ You can find Kashiyama's original 3dnow! support patch -/ (for mpg123-0.59o) at -/ http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese). -/ -/ by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999 -/ <kim@comtec.co.jp> - after 1.Apr.1999 -/ - -/// -/// Replacement of dct36() with AMD's 3DNow! SIMD operations support -/// -/// Syuuhei Kashiyama <squash@mb.kcom.ne.jp> -/// -/// The author of this program disclaim whole expressed or implied -/// warranties with regard to this program, and in no event shall the -/// author of this program liable to whatever resulted from the use of -/// this program. Use it at your own risk. -/// - - .globl dct36_3dnow - .type dct36_3dnow,@function -dct36_3dnow: - pushl %ebp - movl %esp,%ebp - subl $120,%esp - pushl %esi - pushl %ebx - movl 8(%ebp),%eax - movl 12(%ebp),%esi - movl 16(%ebp),%ecx - movl 20(%ebp),%edx - movl 24(%ebp),%ebx - leal -128(%ebp),%esp - - femms - movq (%eax),%mm0 - movq 4(%eax),%mm1 - pfadd %mm1,%mm0 - movq %mm0,4(%eax) - psrlq $32,%mm1 - movq 12(%eax),%mm2 - punpckldq %mm2,%mm1 - pfadd %mm2,%mm1 - movq %mm1,12(%eax) - psrlq $32,%mm2 - movq 20(%eax),%mm3 - punpckldq %mm3,%mm2 - pfadd %mm3,%mm2 - movq %mm2,20(%eax) - psrlq $32,%mm3 - movq 28(%eax),%mm4 - punpckldq %mm4,%mm3 - pfadd %mm4,%mm3 - movq %mm3,28(%eax) - psrlq $32,%mm4 - movq 36(%eax),%mm5 - punpckldq %mm5,%mm4 - pfadd %mm5,%mm4 - movq %mm4,36(%eax) - psrlq $32,%mm5 - movq 44(%eax),%mm6 - punpckldq %mm6,%mm5 - pfadd %mm6,%mm5 - movq %mm5,44(%eax) - psrlq $32,%mm6 - movq 52(%eax),%mm7 - punpckldq %mm7,%mm6 - pfadd %mm7,%mm6 - movq %mm6,52(%eax) - psrlq $32,%mm7 - movq 60(%eax),%mm0 - punpckldq %mm0,%mm7 - pfadd %mm0,%mm7 - movq %mm7,60(%eax) - psrlq $32,%mm0 - movd 68(%eax),%mm1 - pfadd %mm1,%mm0 - movd %mm0,68(%eax) - movd 4(%eax),%mm0 - movd 12(%eax),%mm1 - punpckldq %mm1,%mm0 - punpckldq 20(%eax),%mm1 - pfadd %mm1,%mm0 - movd %mm0,12(%eax) - psrlq $32,%mm0 - movd %mm0,20(%eax) - psrlq $32,%mm1 - movd 28(%eax),%mm2 - punpckldq %mm2,%mm1 - punpckldq 36(%eax),%mm2 - pfadd %mm2,%mm1 - movd %mm1,28(%eax) - psrlq $32,%mm1 - movd %mm1,36(%eax) - psrlq $32,%mm2 - movd 44(%eax),%mm3 - punpckldq %mm3,%mm2 - punpckldq 52(%eax),%mm3 - pfadd %mm3,%mm2 - movd %mm2,44(%eax) - psrlq $32,%mm2 - movd %mm2,52(%eax) - psrlq $32,%mm3 - movd 60(%eax),%mm4 - punpckldq %mm4,%mm3 - punpckldq 68(%eax),%mm4 - pfadd %mm4,%mm3 - movd %mm3,60(%eax) - psrlq $32,%mm3 - movd %mm3,68(%eax) - - movq 24(%eax),%mm0 - movq 48(%eax),%mm1 - movd COS9+12,%mm2 - punpckldq %mm2,%mm2 - movd COS9+24,%mm3 - punpckldq %mm3,%mm3 - pfmul %mm2,%mm0 - pfmul %mm3,%mm1 - pushl %eax - movl $1,%eax - movd %eax,%mm7 - pi2fd %mm7,%mm7 - popl %eax - movq 8(%eax),%mm2 - movd COS9+4,%mm3 - punpckldq %mm3,%mm3 - pfmul %mm3,%mm2 - pfadd %mm0,%mm2 - movq 40(%eax),%mm3 - movd COS9+20,%mm4 - punpckldq %mm4,%mm4 - pfmul %mm4,%mm3 - pfadd %mm3,%mm2 - movq 56(%eax),%mm3 - movd COS9+28,%mm4 - punpckldq %mm4,%mm4 - pfmul %mm4,%mm3 - pfadd %mm3,%mm2 - movq (%eax),%mm3 - movq 16(%eax),%mm4 - movd COS9+8,%mm5 - punpckldq %mm5,%mm5 - pfmul %mm5,%mm4 - pfadd %mm4,%mm3 - movq 32(%eax),%mm4 - movd COS9+16,%mm5 - punpckldq %mm5,%mm5 - pfmul %mm5,%mm4 - pfadd %mm4,%mm3 - pfadd %mm1,%mm3 - movq 64(%eax),%mm4 - movd COS9+32,%mm5 - punpckldq %mm5,%mm5 - pfmul %mm5,%mm4 - pfadd %mm4,%mm3 - movq %mm2,%mm4 - pfadd %mm3,%mm4 - movq %mm7,%mm5 - punpckldq tfcos36+0,%mm5 - pfmul %mm5,%mm4 - movq %mm4,%mm5 - pfacc %mm5,%mm5 - movd 108(%edx),%mm6 - punpckldq 104(%edx),%mm6 - pfmul %mm6,%mm5 - movd %mm5,36(%ecx) - psrlq $32,%mm5 - movd %mm5,32(%ecx) - movq %mm4,%mm6 - punpckldq %mm6,%mm5 - pfsub %mm6,%mm5 - punpckhdq %mm5,%mm5 - movd 32(%edx),%mm6 - punpckldq 36(%edx),%mm6 - pfmul %mm6,%mm5 - movd 32(%esi),%mm6 - punpckldq 36(%esi),%mm6 - pfadd %mm6,%mm5 - movd %mm5,1024(%ebx) - psrlq $32,%mm5 - movd %mm5,1152(%ebx) - movq %mm3,%mm4 - pfsub %mm2,%mm4 - movq %mm7,%mm5 - punpckldq tfcos36+32,%mm5 - pfmul %mm5,%mm4 - movq %mm4,%mm5 - pfacc %mm5,%mm5 - movd 140(%edx),%mm6 - punpckldq 72(%edx),%mm6 - pfmul %mm6,%mm5 - movd %mm5,68(%ecx) - psrlq $32,%mm5 - movd %mm5,0(%ecx) - movq %mm4,%mm6 - punpckldq %mm6,%mm5 - pfsub %mm6,%mm5 - punpckhdq %mm5,%mm5 - movd 0(%edx),%mm6 - punpckldq 68(%edx),%mm6 - pfmul %mm6,%mm5 - movd 0(%esi),%mm6 - punpckldq 68(%esi),%mm6 - pfadd %mm6,%mm5 - movd %mm5,0(%ebx) - psrlq $32,%mm5 - movd %mm5,2176(%ebx) - movq 8(%eax),%mm2 - movq 40(%eax),%mm3 - pfsub %mm3,%mm2 - movq 56(%eax),%mm3 - pfsub %mm3,%mm2 - movd COS9+12,%mm3 - punpckldq %mm3,%mm3 - pfmul %mm3,%mm2 - movq 16(%eax),%mm3 - movq 32(%eax),%mm4 - pfsub %mm4,%mm3 - movq 64(%eax),%mm4 - pfsub %mm4,%mm3 - movd COS9+24,%mm4 - punpckldq %mm4,%mm4 - pfmul %mm4,%mm3 - movq 48(%eax),%mm4 - pfsub %mm4,%mm3 - movq (%eax),%mm4 - pfadd %mm4,%mm3 - movq %mm2,%mm4 - pfadd %mm3,%mm4 - movq %mm7,%mm5 - punpckldq tfcos36+4,%mm5 - pfmul %mm5,%mm4 - movq %mm4,%mm5 - pfacc %mm5,%mm5 - movd 112(%edx),%mm6 - punpckldq 100(%edx),%mm6 - pfmul %mm6,%mm5 - movd %mm5,40(%ecx) - psrlq $32,%mm5 - movd %mm5,28(%ecx) - movq %mm4,%mm6 - punpckldq %mm6,%mm5 - pfsub %mm6,%mm5 - punpckhdq %mm5,%mm5 - movd 28(%edx),%mm6 - punpckldq 40(%edx),%mm6 - pfmul %mm6,%mm5 - movd 28(%esi),%mm6 - punpckldq 40(%esi),%mm6 - pfadd %mm6,%mm5 - movd %mm5,896(%ebx) - psrlq $32,%mm5 - movd %mm5,1280(%ebx) - movq %mm3,%mm4 - pfsub %mm2,%mm4 - movq %mm7,%mm5 - punpckldq tfcos36+28,%mm5 - pfmul %mm5,%mm4 - movq %mm4,%mm5 - pfacc %mm5,%mm5 - movd 136(%edx),%mm6 - punpckldq 76(%edx),%mm6 - pfmul %mm6,%mm5 - movd %mm5,64(%ecx) - psrlq $32,%mm5 - movd %mm5,4(%ecx) - movq %mm4,%mm6 - punpckldq %mm6,%mm5 - pfsub %mm6,%mm5 - punpckhdq %mm5,%mm5 - movd 4(%edx),%mm6 - punpckldq 64(%edx),%mm6 - pfmul %mm6,%mm5 - movd 4(%esi),%mm6 - punpckldq 64(%esi),%mm6 - pfadd %mm6,%mm5 - movd %mm5,128(%ebx) - psrlq $32,%mm5 - movd %mm5,2048(%ebx) - - movq 8(%eax),%mm2 - movd COS9+20,%mm3 - punpckldq %mm3,%mm3 - pfmul %mm3,%mm2 - pfsub %mm0,%mm2 - movq 40(%eax),%mm3 - movd COS9+28,%mm4 - punpckldq %mm4,%mm4 - pfmul %mm4,%mm3 - pfsub %mm3,%mm2 - movq 56(%eax),%mm3 - movd COS9+4,%mm4 - punpckldq %mm4,%mm4 - pfmul %mm4,%mm3 - pfadd %mm3,%mm2 - movq (%eax),%mm3 - movq 16(%eax),%mm4 - movd COS9+32,%mm5 - punpckldq %mm5,%mm5 - pfmul %mm5,%mm4 - pfsub %mm4,%mm3 - movq 32(%eax),%mm4 - movd COS9+8,%mm5 - punpckldq %mm5,%mm5 - pfmul %mm5,%mm4 - pfsub %mm4,%mm3 - pfadd %mm1,%mm3 - movq 64(%eax),%mm4 - movd COS9+16,%mm5 - punpckldq %mm5,%mm5 - pfmul %mm5,%mm4 - pfadd %mm4,%mm3 - movq %mm2,%mm4 - pfadd %mm3,%mm4 - movq %mm7,%mm5 - punpckldq tfcos36+8,%mm5 - pfmul %mm5,%mm4 - movq %mm4,%mm5 - pfacc %mm5,%mm5 - movd 116(%edx),%mm6 - punpckldq 96(%edx),%mm6 - pfmul %mm6,%mm5 - movd %mm5,44(%ecx) - psrlq $32,%mm5 - movd %mm5,24(%ecx) - movq %mm4,%mm6 - punpckldq %mm6,%mm5 - pfsub %mm6,%mm5 - punpckhdq %mm5,%mm5 - movd 24(%edx),%mm6 - punpckldq 44(%edx),%mm6 - pfmul %mm6,%mm5 - movd 24(%esi),%mm6 - punpckldq 44(%esi),%mm6 - pfadd %mm6,%mm5 - movd %mm5,768(%ebx) - psrlq $32,%mm5 - movd %mm5,1408(%ebx) - movq %mm3,%mm4 - pfsub %mm2,%mm4 - movq %mm7,%mm5 - punpckldq tfcos36+24,%mm5 - pfmul %mm5,%mm4 - movq %mm4,%mm5 - pfacc %mm5,%mm5 - movd 132(%edx),%mm6 - punpckldq 80(%edx),%mm6 - pfmul %mm6,%mm5 - movd %mm5,60(%ecx) - psrlq $32,%mm5 - movd %mm5,8(%ecx) - movq %mm4,%mm6 - punpckldq %mm6,%mm5 - pfsub %mm6,%mm5 - punpckhdq %mm5,%mm5 - movd 8(%edx),%mm6 - punpckldq 60(%edx),%mm6 - pfmul %mm6,%mm5 - movd 8(%esi),%mm6 - punpckldq 60(%esi),%mm6 - pfadd %mm6,%mm5 - movd %mm5,256(%ebx) - psrlq $32,%mm5 - movd %mm5,1920(%ebx) - movq 8(%eax),%mm2 - movd COS9+28,%mm3 - punpckldq %mm3,%mm3 - pfmul %mm3,%mm2 - pfsub %mm0,%mm2 - movq 40(%eax),%mm3 - movd COS9+4,%mm4 - punpckldq %mm4,%mm4 - pfmul %mm4,%mm3 - pfadd %mm3,%mm2 - movq 56(%eax),%mm3 - movd COS9+20,%mm4 - punpckldq %mm4,%mm4 - pfmul %mm4,%mm3 - pfsub %mm3,%mm2 - movq (%eax),%mm3 - movq 16(%eax),%mm4 - movd COS9+16,%mm5 - punpckldq %mm5,%mm5 - pfmul %mm5,%mm4 - pfsub %mm4,%mm3 - movq 32(%eax),%mm4 - movd COS9+32,%mm5 - punpckldq %mm5,%mm5 - pfmul %mm5,%mm4 - pfadd %mm4,%mm3 - pfadd %mm1,%mm3 - movq 64(%eax),%mm4 - movd COS9+8,%mm5 - punpckldq %mm5,%mm5 - pfmul %mm5,%mm4 - pfsub %mm4,%mm3 - movq %mm2,%mm4 - pfadd %mm3,%mm4 - movq %mm7,%mm5 - punpckldq tfcos36+12,%mm5 - pfmul %mm5,%mm4 - movq %mm4,%mm5 - pfacc %mm5,%mm5 - movd 120(%edx),%mm6 - punpckldq 92(%edx),%mm6 - pfmul %mm6,%mm5 - movd %mm5,48(%ecx) - psrlq $32,%mm5 - movd %mm5,20(%ecx) - movq %mm4,%mm6 - punpckldq %mm6,%mm5 - pfsub %mm6,%mm5 - punpckhdq %mm5,%mm5 - movd 20(%edx),%mm6 - punpckldq 48(%edx),%mm6 - pfmul %mm6,%mm5 - movd 20(%esi),%mm6 - punpckldq 48(%esi),%mm6 - pfadd %mm6,%mm5 - movd %mm5,640(%ebx) - psrlq $32,%mm5 - movd %mm5,1536(%ebx) - movq %mm3,%mm4 - pfsub %mm2,%mm4 - movq %mm7,%mm5 - punpckldq tfcos36+20,%mm5 - pfmul %mm5,%mm4 - movq %mm4,%mm5 - pfacc %mm5,%mm5 - movd 128(%edx),%mm6 - punpckldq 84(%edx),%mm6 - pfmul %mm6,%mm5 - movd %mm5,56(%ecx) - psrlq $32,%mm5 - movd %mm5,12(%ecx) - movq %mm4,%mm6 - punpckldq %mm6,%mm5 - pfsub %mm6,%mm5 - punpckhdq %mm5,%mm5 - movd 12(%edx),%mm6 - punpckldq 56(%edx),%mm6 - pfmul %mm6,%mm5 - movd 12(%esi),%mm6 - punpckldq 56(%esi),%mm6 - pfadd %mm6,%mm5 - movd %mm5,384(%ebx) - psrlq $32,%mm5 - movd %mm5,1792(%ebx) - - movq (%eax),%mm4 - movq 16(%eax),%mm3 - pfsub %mm3,%mm4 - movq 32(%eax),%mm3 - pfadd %mm3,%mm4 - movq 48(%eax),%mm3 - pfsub %mm3,%mm4 - movq 64(%eax),%mm3 - pfadd %mm3,%mm4 - movq %mm7,%mm5 - punpckldq tfcos36+16,%mm5 - pfmul %mm5,%mm4 - movq %mm4,%mm5 - pfacc %mm5,%mm5 - movd 124(%edx),%mm6 - punpckldq 88(%edx),%mm6 - pfmul %mm6,%mm5 - movd %mm5,52(%ecx) - psrlq $32,%mm5 - movd %mm5,16(%ecx) - movq %mm4,%mm6 - punpckldq %mm6,%mm5 - pfsub %mm6,%mm5 - punpckhdq %mm5,%mm5 - movd 16(%edx),%mm6 - punpckldq 52(%edx),%mm6 - pfmul %mm6,%mm5 - movd 16(%esi),%mm6 - punpckldq 52(%esi),%mm6 - pfadd %mm6,%mm5 - movd %mm5,512(%ebx) - psrlq $32,%mm5 - movd %mm5,1664(%ebx) - - femms - popl %ebx - popl %esi - movl %ebp,%esp - popl %ebp - ret
--- a/Plugins/Input/mpg123/dct64_3dnow.s Sun Nov 13 11:37:33 2005 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,710 +0,0 @@ -/// -/// Replacement of dct64() with AMD's 3DNow! SIMD operations support -/// -/// Syuuhei Kashiyama <squash@mb.kcom.ne.jp> -/// -/// The author of this program disclaim whole expressed or implied -/// warranties with regard to this program, and in no event shall the -/// author of this program liable to whatever resulted from the use of -/// this program. Use it at your own risk. -/// -/ -/ porting xmms-0.9.1 by Osamu kayasono <jacobi@jcom.home.ne.jp> -/ - rename function name for xmms -/ - - .globl mpg123_dct64_3dnow - .type mpg123_dct64_3dnow,@function -mpg123_dct64_3dnow: - subl $256,%esp - pushl %ebp - pushl %edi - pushl %esi - pushl %ebx - leal 16(%esp),%ebx - movl 284(%esp),%edi - movl 276(%esp),%ebp - movl 280(%esp),%edx - leal 128(%ebx),%esi - - / femms - - // 1 - movl mpg123_pnts,%eax - movq 0(%edi),%mm0 - movq %mm0,%mm1 - movd 124(%edi),%mm2 - punpckldq 120(%edi),%mm2 - movq 0(%eax),%mm3 - pfadd %mm2,%mm0 - movq %mm0,0(%ebx) - pfsub %mm2,%mm1 - pfmul %mm3,%mm1 - movd %mm1,124(%ebx) - psrlq $32,%mm1 - movd %mm1,120(%ebx) - movq 8(%edi),%mm4 - movq %mm4,%mm5 - movd 116(%edi),%mm6 - punpckldq 112(%edi),%mm6 - movq 8(%eax),%mm7 - pfadd %mm6,%mm4 - movq %mm4,8(%ebx) - pfsub %mm6,%mm5 - pfmul %mm7,%mm5 - movd %mm5,116(%ebx) - psrlq $32,%mm5 - movd %mm5,112(%ebx) - movq 16(%edi),%mm0 - movq %mm0,%mm1 - movd 108(%edi),%mm2 - punpckldq 104(%edi),%mm2 - movq 16(%eax),%mm3 - pfadd %mm2,%mm0 - movq %mm0,16(%ebx) - pfsub %mm2,%mm1 - pfmul %mm3,%mm1 - movd %mm1,108(%ebx) - psrlq $32,%mm1 - movd %mm1,104(%ebx) - movq 24(%edi),%mm4 - movq %mm4,%mm5 - movd 100(%edi),%mm6 - punpckldq 96(%edi),%mm6 - movq 24(%eax),%mm7 - pfadd %mm6,%mm4 - movq %mm4,24(%ebx) - pfsub %mm6,%mm5 - pfmul %mm7,%mm5 - movd %mm5,100(%ebx) - psrlq $32,%mm5 - movd %mm5,96(%ebx) - movq 32(%edi),%mm0 - movq %mm0,%mm1 - movd 92(%edi),%mm2 - punpckldq 88(%edi),%mm2 - movq 32(%eax),%mm3 - pfadd %mm2,%mm0 - movq %mm0,32(%ebx) - pfsub %mm2,%mm1 - pfmul %mm3,%mm1 - movd %mm1,92(%ebx) - psrlq $32,%mm1 - movd %mm1,88(%ebx) - movq 40(%edi),%mm4 - movq %mm4,%mm5 - movd 84(%edi),%mm6 - punpckldq 80(%edi),%mm6 - movq 40(%eax),%mm7 - pfadd %mm6,%mm4 - movq %mm4,40(%ebx) - pfsub %mm6,%mm5 - pfmul %mm7,%mm5 - movd %mm5,84(%ebx) - psrlq $32,%mm5 - movd %mm5,80(%ebx) - movq 48(%edi),%mm0 - movq %mm0,%mm1 - movd 76(%edi),%mm2 - punpckldq 72(%edi),%mm2 - movq 48(%eax),%mm3 - pfadd %mm2,%mm0 - movq %mm0,48(%ebx) - pfsub %mm2,%mm1 - pfmul %mm3,%mm1 - movd %mm1,76(%ebx) - psrlq $32,%mm1 - movd %mm1,72(%ebx) - movq 56(%edi),%mm4 - movq %mm4,%mm5 - movd 68(%edi),%mm6 - punpckldq 64(%edi),%mm6 - movq 56(%eax),%mm7 - pfadd %mm6,%mm4 - movq %mm4,56(%ebx) - pfsub %mm6,%mm5 - pfmul %mm7,%mm5 - movd %mm5,68(%ebx) - psrlq $32,%mm5 - movd %mm5,64(%ebx) - - // 2 - movl mpg123_pnts+4,%eax - / 0, 14 - movq 0(%ebx),%mm0 - movq %mm0,%mm1 - movd 60(%ebx),%mm2 - punpckldq 56(%ebx),%mm2 - movq 0(%eax),%mm3 - pfadd %mm2,%mm0 - movq %mm0,0(%esi) - pfsub %mm2,%mm1 - pfmul %mm3,%mm1 - movd %mm1,60(%esi) - psrlq $32,%mm1 - movd %mm1,56(%esi) - / 16, 30 - movq 64(%ebx),%mm0 - movq %mm0,%mm1 - movd 124(%ebx),%mm2 - punpckldq 120(%ebx),%mm2 - pfadd %mm2,%mm0 - movq %mm0,64(%esi) - pfsubr %mm2,%mm1 - pfmul %mm3,%mm1 - movd %mm1,124(%esi) - psrlq $32,%mm1 - movd %mm1,120(%esi) - movq 8(%ebx),%mm4 - / 2, 12 - movq %mm4,%mm5 - movd 52(%ebx),%mm6 - punpckldq 48(%ebx),%mm6 - movq 8(%eax),%mm7 - pfadd %mm6,%mm4 - movq %mm4,8(%esi) - pfsub %mm6,%mm5 - pfmul %mm7,%mm5 - movd %mm5,52(%esi) - psrlq $32,%mm5 - movd %mm5,48(%esi) - movq 72(%ebx),%mm4 - / 18, 28 - movq %mm4,%mm5 - movd 116(%ebx),%mm6 - punpckldq 112(%ebx),%mm6 - pfadd %mm6,%mm4 - movq %mm4,72(%esi) - pfsubr %mm6,%mm5 - pfmul %mm7,%mm5 - movd %mm5,116(%esi) - psrlq $32,%mm5 - movd %mm5,112(%esi) - movq 16(%ebx),%mm0 - / 4, 10 - movq %mm0,%mm1 - movd 44(%ebx),%mm2 - punpckldq 40(%ebx),%mm2 - movq 16(%eax),%mm3 - pfadd %mm2,%mm0 - movq %mm0,16(%esi) - pfsub %mm2,%mm1 - pfmul %mm3,%mm1 - movd %mm1,44(%esi) - psrlq $32,%mm1 - movd %mm1,40(%esi) - movq 80(%ebx),%mm0 - / 20, 26 - movq %mm0,%mm1 - movd 108(%ebx),%mm2 - punpckldq 104(%ebx),%mm2 - pfadd %mm2,%mm0 - movq %mm0,80(%esi) - pfsubr %mm2,%mm1 - pfmul %mm3,%mm1 - movd %mm1,108(%esi) - psrlq $32,%mm1 - movd %mm1,104(%esi) - movq 24(%ebx),%mm4 - / 6, 8 - movq %mm4,%mm5 - movd 36(%ebx),%mm6 - punpckldq 32(%ebx),%mm6 - movq 24(%eax),%mm7 - pfadd %mm6,%mm4 - movq %mm4,24(%esi) - pfsub %mm6,%mm5 - pfmul %mm7,%mm5 - movd %mm5,36(%esi) - psrlq $32,%mm5 - movd %mm5,32(%esi) - movq 88(%ebx),%mm4 - / 22, 24 - movq %mm4,%mm5 - movd 100(%ebx),%mm6 - punpckldq 96(%ebx),%mm6 - pfadd %mm6,%mm4 - movq %mm4,88(%esi) - pfsubr %mm6,%mm5 - pfmul %mm7,%mm5 - movd %mm5,100(%esi) - psrlq $32,%mm5 - movd %mm5,96(%esi) - - // 3 - movl mpg123_pnts+8,%eax - movq 0(%eax),%mm0 - movq 8(%eax),%mm1 - movq 0(%esi),%mm2 - / 0, 6 - movq %mm2,%mm3 - movd 28(%esi),%mm4 - punpckldq 24(%esi),%mm4 - pfadd %mm4,%mm2 - pfsub %mm4,%mm3 - pfmul %mm0,%mm3 - movq %mm2,0(%ebx) - movd %mm3,28(%ebx) - psrlq $32,%mm3 - movd %mm3,24(%ebx) - movq 8(%esi),%mm5 - / 2, 4 - movq %mm5,%mm6 - movd 20(%esi),%mm7 - punpckldq 16(%esi),%mm7 - pfadd %mm7,%mm5 - pfsub %mm7,%mm6 - pfmul %mm1,%mm6 - movq %mm5,8(%ebx) - movd %mm6,20(%ebx) - psrlq $32,%mm6 - movd %mm6,16(%ebx) - movq 32(%esi),%mm2 - / 8, 14 - movq %mm2,%mm3 - movd 60(%esi),%mm4 - punpckldq 56(%esi),%mm4 - pfadd %mm4,%mm2 - pfsubr %mm4,%mm3 - pfmul %mm0,%mm3 - movq %mm2,32(%ebx) - movd %mm3,60(%ebx) - psrlq $32,%mm3 - movd %mm3,56(%ebx) - movq 40(%esi),%mm5 - / 10, 12 - movq %mm5,%mm6 - movd 52(%esi),%mm7 - punpckldq 48(%esi),%mm7 - pfadd %mm7,%mm5 - pfsubr %mm7,%mm6 - pfmul %mm1,%mm6 - movq %mm5,40(%ebx) - movd %mm6,52(%ebx) - psrlq $32,%mm6 - movd %mm6,48(%ebx) - movq 64(%esi),%mm2 - / 16, 22 - movq %mm2,%mm3 - movd 92(%esi),%mm4 - punpckldq 88(%esi),%mm4 - pfadd %mm4,%mm2 - pfsub %mm4,%mm3 - pfmul %mm0,%mm3 - movq %mm2,64(%ebx) - movd %mm3,92(%ebx) - psrlq $32,%mm3 - movd %mm3,88(%ebx) - movq 72(%esi),%mm5 - / 18, 20 - movq %mm5,%mm6 - movd 84(%esi),%mm7 - punpckldq 80(%esi),%mm7 - pfadd %mm7,%mm5 - pfsub %mm7,%mm6 - pfmul %mm1,%mm6 - movq %mm5,72(%ebx) - movd %mm6,84(%ebx) - psrlq $32,%mm6 - movd %mm6,80(%ebx) - movq 96(%esi),%mm2 - / 24, 30 - movq %mm2,%mm3 - movd 124(%esi),%mm4 - punpckldq 120(%esi),%mm4 - pfadd %mm4,%mm2 - pfsubr %mm4,%mm3 - pfmul %mm0,%mm3 - movq %mm2,96(%ebx) - movd %mm3,124(%ebx) - psrlq $32,%mm3 - movd %mm3,120(%ebx) - movq 104(%esi),%mm5 - / 26, 28 - movq %mm5,%mm6 - movd 116(%esi),%mm7 - punpckldq 112(%esi),%mm7 - pfadd %mm7,%mm5 - pfsubr %mm7,%mm6 - pfmul %mm1,%mm6 - movq %mm5,104(%ebx) - movd %mm6,116(%ebx) - psrlq $32,%mm6 - movd %mm6,112(%ebx) - - // 4 - movl mpg123_pnts+12,%eax - movq 0(%eax),%mm0 - movq 0(%ebx),%mm1 - / 0 - movq %mm1,%mm2 - movd 12(%ebx),%mm3 - punpckldq 8(%ebx),%mm3 - pfadd %mm3,%mm1 - pfsub %mm3,%mm2 - pfmul %mm0,%mm2 - movq %mm1,0(%esi) - movd %mm2,12(%esi) - psrlq $32,%mm2 - movd %mm2,8(%esi) - movq 16(%ebx),%mm4 - / 4 - movq %mm4,%mm5 - movd 28(%ebx),%mm6 - punpckldq 24(%ebx),%mm6 - pfadd %mm6,%mm4 - pfsubr %mm6,%mm5 - pfmul %mm0,%mm5 - movq %mm4,16(%esi) - movd %mm5,28(%esi) - psrlq $32,%mm5 - movd %mm5,24(%esi) - movq 32(%ebx),%mm1 - / 8 - movq %mm1,%mm2 - movd 44(%ebx),%mm3 - punpckldq 40(%ebx),%mm3 - pfadd %mm3,%mm1 - pfsub %mm3,%mm2 - pfmul %mm0,%mm2 - movq %mm1,32(%esi) - movd %mm2,44(%esi) - psrlq $32,%mm2 - movd %mm2,40(%esi) - movq 48(%ebx),%mm4 - / 12 - movq %mm4,%mm5 - movd 60(%ebx),%mm6 - punpckldq 56(%ebx),%mm6 - pfadd %mm6,%mm4 - pfsubr %mm6,%mm5 - pfmul %mm0,%mm5 - movq %mm4,48(%esi) - movd %mm5,60(%esi) - psrlq $32,%mm5 - movd %mm5,56(%esi) - movq 64(%ebx),%mm1 - / 16 - movq %mm1,%mm2 - movd 76(%ebx),%mm3 - punpckldq 72(%ebx),%mm3 - pfadd %mm3,%mm1 - pfsub %mm3,%mm2 - pfmul %mm0,%mm2 - movq %mm1,64(%esi) - movd %mm2,76(%esi) - psrlq $32,%mm2 - movd %mm2,72(%esi) - movq 80(%ebx),%mm4 - / 20 - movq %mm4,%mm5 - movd 92(%ebx),%mm6 - punpckldq 88(%ebx),%mm6 - pfadd %mm6,%mm4 - pfsubr %mm6,%mm5 - pfmul %mm0,%mm5 - movq %mm4,80(%esi) - movd %mm5,92(%esi) - psrlq $32,%mm5 - movd %mm5,88(%esi) - movq 96(%ebx),%mm1 - / 24 - movq %mm1,%mm2 - movd 108(%ebx),%mm3 - punpckldq 104(%ebx),%mm3 - pfadd %mm3,%mm1 - pfsub %mm3,%mm2 - pfmul %mm0,%mm2 - movq %mm1,96(%esi) - movd %mm2,108(%esi) - psrlq $32,%mm2 - movd %mm2,104(%esi) - movq 112(%ebx),%mm4 - / 28 - movq %mm4,%mm5 - movd 124(%ebx),%mm6 - punpckldq 120(%ebx),%mm6 - pfadd %mm6,%mm4 - pfsubr %mm6,%mm5 - pfmul %mm0,%mm5 - movq %mm4,112(%esi) - movd %mm5,124(%esi) - psrlq $32,%mm5 - movd %mm5,120(%esi) - - // 5 - movl $-1,%eax - movd %eax,%mm1 - movl $1,%eax - movd %eax,%mm0 - / L | H - punpckldq %mm1,%mm0 - pi2fd %mm0,%mm0 - / 1.0 | -1.0 - movd %eax,%mm1 - pi2fd %mm1,%mm1 - movl mpg123_pnts+16,%eax - movd 0(%eax),%mm2 - punpckldq %mm2,%mm1 - / 1.0 | cos0 - movq 0(%esi),%mm2 - / 0 - movq %mm2,%mm3 - pfmul %mm0,%mm3 - pfacc %mm3,%mm2 - pfmul %mm1,%mm2 - movq %mm2,0(%ebx) - movq 8(%esi),%mm4 - movq %mm4,%mm5 - pfmul %mm0,%mm5 - pfacc %mm5,%mm4 - pfmul %mm0,%mm4 - pfmul %mm1,%mm4 - movq %mm4,%mm5 - psrlq $32,%mm5 - pfacc %mm5,%mm4 - movq %mm4,8(%ebx) - movq 16(%esi),%mm2 - / 4 - movq %mm2,%mm3 - pfmul %mm0,%mm3 - pfacc %mm3,%mm2 - pfmul %mm1,%mm2 - movq 24(%esi),%mm4 - movq %mm4,%mm5 - pfmul %mm0,%mm5 - pfacc %mm5,%mm4 - pfmul %mm0,%mm4 - pfmul %mm1,%mm4 - movq %mm4,%mm5 - psrlq $32,%mm5 - pfacc %mm5,%mm4 - movq %mm2,%mm3 - psrlq $32,%mm3 - pfadd %mm4,%mm2 - pfadd %mm3,%mm4 - movq %mm2,16(%ebx) - movq %mm4,24(%ebx) - movq 32(%esi),%mm2 - / 8 - movq %mm2,%mm3 - pfmul %mm0,%mm3 - pfacc %mm3,%mm2 - pfmul %mm1,%mm2 - movq %mm2,32(%ebx) - movq 40(%esi),%mm4 - movq %mm4,%mm5 - pfmul %mm0,%mm5 - pfacc %mm5,%mm4 - pfmul %mm0,%mm4 - pfmul %mm1,%mm4 - movq %mm4,%mm5 - psrlq $32,%mm5 - pfacc %mm5,%mm4 - movq %mm4,40(%ebx) - movq 48(%esi),%mm2 - / 12 - movq %mm2,%mm3 - pfmul %mm0,%mm3 - pfacc %mm3,%mm2 - pfmul %mm1,%mm2 - movq 56(%esi),%mm4 - movq %mm4,%mm5 - pfmul %mm0,%mm5 - pfacc %mm5,%mm4 - pfmul %mm0,%mm4 - pfmul %mm1,%mm4 - movq %mm4,%mm5 - psrlq $32,%mm5 - pfacc %mm5,%mm4 - movq %mm2,%mm3 - psrlq $32,%mm3 - pfadd %mm4,%mm2 - pfadd %mm3,%mm4 - movq %mm2,48(%ebx) - movq %mm4,56(%ebx) - movq 64(%esi),%mm2 - / 16 - movq %mm2,%mm3 - pfmul %mm0,%mm3 - pfacc %mm3,%mm2 - pfmul %mm1,%mm2 - movq %mm2,64(%ebx) - movq 72(%esi),%mm4 - movq %mm4,%mm5 - pfmul %mm0,%mm5 - pfacc %mm5,%mm4 - pfmul %mm0,%mm4 - pfmul %mm1,%mm4 - movq %mm4,%mm5 - psrlq $32,%mm5 - pfacc %mm5,%mm4 - movq %mm4,72(%ebx) - movq 80(%esi),%mm2 - / 20 - movq %mm2,%mm3 - pfmul %mm0,%mm3 - pfacc %mm3,%mm2 - pfmul %mm1,%mm2 - movq 88(%esi),%mm4 - movq %mm4,%mm5 - pfmul %mm0,%mm5 - pfacc %mm5,%mm4 - pfmul %mm0,%mm4 - pfmul %mm1,%mm4 - movq %mm4,%mm5 - psrlq $32,%mm5 - pfacc %mm5,%mm4 - movq %mm2,%mm3 - psrlq $32,%mm3 - pfadd %mm4,%mm2 - pfadd %mm3,%mm4 - movq %mm2,80(%ebx) - movq %mm4,88(%ebx) - movq 96(%esi),%mm2 - / 24 - movq %mm2,%mm3 - pfmul %mm0,%mm3 - pfacc %mm3,%mm2 - pfmul %mm1,%mm2 - movq %mm2,96(%ebx) - movq 104(%esi),%mm4 - movq %mm4,%mm5 - pfmul %mm0,%mm5 - pfacc %mm5,%mm4 - pfmul %mm0,%mm4 - pfmul %mm1,%mm4 - movq %mm4,%mm5 - psrlq $32,%mm5 - pfacc %mm5,%mm4 - movq %mm4,104(%ebx) - movq 112(%esi),%mm2 - / 28 - movq %mm2,%mm3 - pfmul %mm0,%mm3 - pfacc %mm3,%mm2 - pfmul %mm1,%mm2 - movq 120(%esi),%mm4 - movq %mm4,%mm5 - pfmul %mm0,%mm5 - pfacc %mm5,%mm4 - pfmul %mm0,%mm4 - pfmul %mm1,%mm4 - movq %mm4,%mm5 - psrlq $32,%mm5 - pfacc %mm5,%mm4 - movq %mm2,%mm3 - psrlq $32,%mm3 - pfadd %mm4,%mm2 - pfadd %mm3,%mm4 - movq %mm2,112(%ebx) - movq %mm4,120(%ebx) - - // Phase6 - movl 0(%ebx),%eax - movl %eax,1024(%ebp) - movl 4(%ebx),%eax - movl %eax,0(%ebp) - movl %eax,0(%edx) - movl 8(%ebx),%eax - movl %eax,512(%ebp) - movl 12(%ebx),%eax - movl %eax,512(%edx) - - movl 16(%ebx),%eax - movl %eax,768(%ebp) - movl 20(%ebx),%eax - movl %eax,256(%edx) - - movl 24(%ebx),%eax - movl %eax,256(%ebp) - movl 28(%ebx),%eax - movl %eax,768(%edx) - - movq 32(%ebx),%mm0 - movq 48(%ebx),%mm1 - pfadd %mm1,%mm0 - movd %mm0,896(%ebp) - psrlq $32,%mm0 - movd %mm0,128(%edx) - movq 40(%ebx),%mm2 - pfadd %mm2,%mm1 - movd %mm1,640(%ebp) - psrlq $32,%mm1 - movd %mm1,384(%edx) - - movq 56(%ebx),%mm3 - pfadd %mm3,%mm2 - movd %mm2,384(%ebp) - psrlq $32,%mm2 - movd %mm2,640(%edx) - - movd 36(%ebx),%mm4 - pfadd %mm4,%mm3 - movd %mm3,128(%ebp) - psrlq $32,%mm3 - movd %mm3,896(%edx) - movq 96(%ebx),%mm0 - movq 64(%ebx),%mm1 - - movq 112(%ebx),%mm2 - pfadd %mm2,%mm0 - movq %mm0,%mm3 - pfadd %mm1,%mm3 - movd %mm3,960(%ebp) - psrlq $32,%mm3 - movd %mm3,64(%edx) - movq 80(%ebx),%mm1 - pfadd %mm1,%mm0 - movd %mm0,832(%ebp) - psrlq $32,%mm0 - movd %mm0,192(%edx) - movq 104(%ebx),%mm3 - pfadd %mm3,%mm2 - movq %mm2,%mm4 - pfadd %mm1,%mm4 - movd %mm4,704(%ebp) - psrlq $32,%mm4 - movd %mm4,320(%edx) - movq 72(%ebx),%mm1 - pfadd %mm1,%mm2 - movd %mm2,576(%ebp) - psrlq $32,%mm2 - movd %mm2,448(%edx) - - movq 120(%ebx),%mm4 - pfadd %mm4,%mm3 - movq %mm3,%mm5 - pfadd %mm1,%mm5 - movd %mm5,448(%ebp) - psrlq $32,%mm5 - movd %mm5,576(%edx) - movq 88(%ebx),%mm1 - pfadd %mm1,%mm3 - movd %mm3,320(%ebp) - psrlq $32,%mm3 - movd %mm3,704(%edx) - - movd 100(%ebx),%mm5 - pfadd %mm5,%mm4 - movq %mm4,%mm6 - pfadd %mm1,%mm6 - movd %mm6,192(%ebp) - psrlq $32,%mm6 - movd %mm6,832(%edx) - movd 68(%ebx),%mm1 - pfadd %mm1,%mm4 - movd %mm4,64(%ebp) - psrlq $32,%mm4 - movd %mm4,960(%edx) - - / femms - - popl %ebx - popl %esi - popl %edi - popl %ebp - addl $256,%esp - - ret -
--- a/Plugins/Input/mpg123/dct64_MMX.s Sun Nov 13 11:37:33 2005 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,836 +0,0 @@ -.data - .align 32 -costab: - .long 1056974725 - .long 1057056395 - .long 1057223771 - .long 1057485416 - .long 1057855544 - .long 1058356026 - .long 1059019886 - .long 1059897405 - .long 1061067246 - .long 1062657950 - .long 1064892987 - .long 1066774581 - .long 1069414683 - .long 1073984175 - .long 1079645762 - .long 1092815430 - .long 1057005197 - .long 1057342072 - .long 1058087743 - .long 1059427869 - .long 1061799040 - .long 1065862217 - .long 1071413542 - .long 1084439708 - .long 1057128951 - .long 1058664893 - .long 1063675095 - .long 1076102863 - .long 1057655764 - .long 1067924853 - .long 1060439283 - -.text - - .align 32 -.globl mpg123_dct64_mmx -mpg123_dct64_mmx: - - xorl %ecx,%ecx -.globl dct64_MMX -dct64_MMX: - pushl %ebx - pushl %esi - pushl %edi - subl $256,%esp - movl 280(%esp),%eax - flds (%eax) - leal 128(%esp),%edx - fadds 124(%eax) - movl 272(%esp),%esi - fstps (%edx) - movl 276(%esp),%edi - flds 4(%eax) - movl $costab,%ebx - fadds 120(%eax) - orl %ecx,%ecx - fstps 4(%edx) - flds (%eax) - movl %esp,%ecx - fsubs 124(%eax) - fmuls (%ebx) - fstps 124(%edx) - flds 4(%eax) - fsubs 120(%eax) - fmuls 4(%ebx) - fstps 120(%edx) - flds 8(%eax) - fadds 116(%eax) - fstps 8(%edx) - flds 12(%eax) - fadds 112(%eax) - fstps 12(%edx) - flds 8(%eax) - fsubs 116(%eax) - fmuls 8(%ebx) - fstps 116(%edx) - flds 12(%eax) - fsubs 112(%eax) - fmuls 12(%ebx) - fstps 112(%edx) - flds 16(%eax) - fadds 108(%eax) - fstps 16(%edx) - flds 20(%eax) - fadds 104(%eax) - fstps 20(%edx) - flds 16(%eax) - fsubs 108(%eax) - fmuls 16(%ebx) - fstps 108(%edx) - flds 20(%eax) - fsubs 104(%eax) - fmuls 20(%ebx) - fstps 104(%edx) - flds 24(%eax) - fadds 100(%eax) - fstps 24(%edx) - flds 28(%eax) - fadds 96(%eax) - fstps 28(%edx) - flds 24(%eax) - fsubs 100(%eax) - fmuls 24(%ebx) - fstps 100(%edx) - flds 28(%eax) - fsubs 96(%eax) - fmuls 28(%ebx) - fstps 96(%edx) - flds 32(%eax) - fadds 92(%eax) - fstps 32(%edx) - flds 36(%eax) - fadds 88(%eax) - fstps 36(%edx) - flds 32(%eax) - fsubs 92(%eax) - fmuls 32(%ebx) - fstps 92(%edx) - flds 36(%eax) - fsubs 88(%eax) - fmuls 36(%ebx) - fstps 88(%edx) - flds 40(%eax) - fadds 84(%eax) - fstps 40(%edx) - flds 44(%eax) - fadds 80(%eax) - fstps 44(%edx) - flds 40(%eax) - fsubs 84(%eax) - fmuls 40(%ebx) - fstps 84(%edx) - flds 44(%eax) - fsubs 80(%eax) - fmuls 44(%ebx) - fstps 80(%edx) - flds 48(%eax) - fadds 76(%eax) - fstps 48(%edx) - flds 52(%eax) - fadds 72(%eax) - fstps 52(%edx) - flds 48(%eax) - fsubs 76(%eax) - fmuls 48(%ebx) - fstps 76(%edx) - flds 52(%eax) - fsubs 72(%eax) - fmuls 52(%ebx) - fstps 72(%edx) - flds 56(%eax) - fadds 68(%eax) - fstps 56(%edx) - flds 60(%eax) - fadds 64(%eax) - fstps 60(%edx) - flds 56(%eax) - fsubs 68(%eax) - fmuls 56(%ebx) - fstps 68(%edx) - flds 60(%eax) - fsubs 64(%eax) - fmuls 60(%ebx) - fstps 64(%edx) - - flds (%edx) - fadds 60(%edx) - fstps (%ecx) - flds 4(%edx) - fadds 56(%edx) - fstps 4(%ecx) - flds (%edx) - fsubs 60(%edx) - fmuls 64(%ebx) - fstps 60(%ecx) - flds 4(%edx) - fsubs 56(%edx) - fmuls 68(%ebx) - fstps 56(%ecx) - flds 8(%edx) - fadds 52(%edx) - fstps 8(%ecx) - flds 12(%edx) - fadds 48(%edx) - fstps 12(%ecx) - flds 8(%edx) - fsubs 52(%edx) - fmuls 72(%ebx) - fstps 52(%ecx) - flds 12(%edx) - fsubs 48(%edx) - fmuls 76(%ebx) - fstps 48(%ecx) - flds 16(%edx) - fadds 44(%edx) - fstps 16(%ecx) - flds 20(%edx) - fadds 40(%edx) - fstps 20(%ecx) - flds 16(%edx) - fsubs 44(%edx) - fmuls 80(%ebx) - fstps 44(%ecx) - flds 20(%edx) - fsubs 40(%edx) - fmuls 84(%ebx) - fstps 40(%ecx) - flds 24(%edx) - fadds 36(%edx) - fstps 24(%ecx) - flds 28(%edx) - fadds 32(%edx) - fstps 28(%ecx) - flds 24(%edx) - fsubs 36(%edx) - fmuls 88(%ebx) - fstps 36(%ecx) - flds 28(%edx) - fsubs 32(%edx) - fmuls 92(%ebx) - fstps 32(%ecx) - - flds 64(%edx) - fadds 124(%edx) - fstps 64(%ecx) - flds 68(%edx) - fadds 120(%edx) - fstps 68(%ecx) - flds 124(%edx) - fsubs 64(%edx) - fmuls 64(%ebx) - fstps 124(%ecx) - flds 120(%edx) - fsubs 68(%edx) - fmuls 68(%ebx) - fstps 120(%ecx) - flds 72(%edx) - fadds 116(%edx) - fstps 72(%ecx) - flds 76(%edx) - fadds 112(%edx) - fstps 76(%ecx) - flds 116(%edx) - fsubs 72(%edx) - fmuls 72(%ebx) - fstps 116(%ecx) - flds 112(%edx) - fsubs 76(%edx) - fmuls 76(%ebx) - fstps 112(%ecx) - flds 80(%edx) - fadds 108(%edx) - fstps 80(%ecx) - flds 84(%edx) - fadds 104(%edx) - fstps 84(%ecx) - flds 108(%edx) - fsubs 80(%edx) - fmuls 80(%ebx) - fstps 108(%ecx) - flds 104(%edx) - fsubs 84(%edx) - fmuls 84(%ebx) - fstps 104(%ecx) - flds 88(%edx) - fadds 100(%edx) - fstps 88(%ecx) - flds 92(%edx) - fadds 96(%edx) - fstps 92(%ecx) - flds 100(%edx) - fsubs 88(%edx) - fmuls 88(%ebx) - fstps 100(%ecx) - flds 96(%edx) - fsubs 92(%edx) - fmuls 92(%ebx) - fstps 96(%ecx) - - flds (%ecx) - fadds 28(%ecx) - fstps (%edx) - flds (%ecx) - fsubs 28(%ecx) - fmuls 96(%ebx) - fstps 28(%edx) - flds 4(%ecx) - fadds 24(%ecx) - fstps 4(%edx) - flds 4(%ecx) - fsubs 24(%ecx) - fmuls 100(%ebx) - fstps 24(%edx) - flds 8(%ecx) - fadds 20(%ecx) - fstps 8(%edx) - flds 8(%ecx) - fsubs 20(%ecx) - fmuls 104(%ebx) - fstps 20(%edx) - flds 12(%ecx) - fadds 16(%ecx) - fstps 12(%edx) - flds 12(%ecx) - fsubs 16(%ecx) - fmuls 108(%ebx) - fstps 16(%edx) - flds 32(%ecx) - fadds 60(%ecx) - fstps 32(%edx) - flds 60(%ecx) - fsubs 32(%ecx) - fmuls 96(%ebx) - fstps 60(%edx) - flds 36(%ecx) - fadds 56(%ecx) - fstps 36(%edx) - flds 56(%ecx) - fsubs 36(%ecx) - fmuls 100(%ebx) - fstps 56(%edx) - flds 40(%ecx) - fadds 52(%ecx) - fstps 40(%edx) - flds 52(%ecx) - fsubs 40(%ecx) - fmuls 104(%ebx) - fstps 52(%edx) - flds 44(%ecx) - fadds 48(%ecx) - fstps 44(%edx) - flds 48(%ecx) - fsubs 44(%ecx) - fmuls 108(%ebx) - fstps 48(%edx) - flds 64(%ecx) - fadds 92(%ecx) - fstps 64(%edx) - flds 64(%ecx) - fsubs 92(%ecx) - fmuls 96(%ebx) - fstps 92(%edx) - flds 68(%ecx) - fadds 88(%ecx) - fstps 68(%edx) - flds 68(%ecx) - fsubs 88(%ecx) - fmuls 100(%ebx) - fstps 88(%edx) - flds 72(%ecx) - fadds 84(%ecx) - fstps 72(%edx) - flds 72(%ecx) - fsubs 84(%ecx) - fmuls 104(%ebx) - fstps 84(%edx) - flds 76(%ecx) - fadds 80(%ecx) - fstps 76(%edx) - flds 76(%ecx) - fsubs 80(%ecx) - fmuls 108(%ebx) - fstps 80(%edx) - flds 96(%ecx) - fadds 124(%ecx) - fstps 96(%edx) - flds 124(%ecx) - fsubs 96(%ecx) - fmuls 96(%ebx) - fstps 124(%edx) - flds 100(%ecx) - fadds 120(%ecx) - fstps 100(%edx) - flds 120(%ecx) - fsubs 100(%ecx) - fmuls 100(%ebx) - fstps 120(%edx) - flds 104(%ecx) - fadds 116(%ecx) - fstps 104(%edx) - flds 116(%ecx) - fsubs 104(%ecx) - fmuls 104(%ebx) - fstps 116(%edx) - flds 108(%ecx) - fadds 112(%ecx) - fstps 108(%edx) - flds 112(%ecx) - fsubs 108(%ecx) - fmuls 108(%ebx) - fstps 112(%edx) - flds (%edx) - fadds 12(%edx) - fstps (%ecx) - flds (%edx) - fsubs 12(%edx) - fmuls 112(%ebx) - fstps 12(%ecx) - flds 4(%edx) - fadds 8(%edx) - fstps 4(%ecx) - flds 4(%edx) - fsubs 8(%edx) - fmuls 116(%ebx) - fstps 8(%ecx) - flds 16(%edx) - fadds 28(%edx) - fstps 16(%ecx) - flds 28(%edx) - fsubs 16(%edx) - fmuls 112(%ebx) - fstps 28(%ecx) - flds 20(%edx) - fadds 24(%edx) - fstps 20(%ecx) - flds 24(%edx) - fsubs 20(%edx) - fmuls 116(%ebx) - fstps 24(%ecx) - flds 32(%edx) - fadds 44(%edx) - fstps 32(%ecx) - flds 32(%edx) - fsubs 44(%edx) - fmuls 112(%ebx) - fstps 44(%ecx) - flds 36(%edx) - fadds 40(%edx) - fstps 36(%ecx) - flds 36(%edx) - fsubs 40(%edx) - fmuls 116(%ebx) - fstps 40(%ecx) - flds 48(%edx) - fadds 60(%edx) - fstps 48(%ecx) - flds 60(%edx) - fsubs 48(%edx) - fmuls 112(%ebx) - fstps 60(%ecx) - flds 52(%edx) - fadds 56(%edx) - fstps 52(%ecx) - flds 56(%edx) - fsubs 52(%edx) - fmuls 116(%ebx) - fstps 56(%ecx) - flds 64(%edx) - fadds 76(%edx) - fstps 64(%ecx) - flds 64(%edx) - fsubs 76(%edx) - fmuls 112(%ebx) - fstps 76(%ecx) - flds 68(%edx) - fadds 72(%edx) - fstps 68(%ecx) - flds 68(%edx) - fsubs 72(%edx) - fmuls 116(%ebx) - fstps 72(%ecx) - flds 80(%edx) - fadds 92(%edx) - fstps 80(%ecx) - flds 92(%edx) - fsubs 80(%edx) - fmuls 112(%ebx) - fstps 92(%ecx) - flds 84(%edx) - fadds 88(%edx) - fstps 84(%ecx) - flds 88(%edx) - fsubs 84(%edx) - fmuls 116(%ebx) - fstps 88(%ecx) - flds 96(%edx) - fadds 108(%edx) - fstps 96(%ecx) - flds 96(%edx) - fsubs 108(%edx) - fmuls 112(%ebx) - fstps 108(%ecx) - flds 100(%edx) - fadds 104(%edx) - fstps 100(%ecx) - flds 100(%edx) - fsubs 104(%edx) - fmuls 116(%ebx) - fstps 104(%ecx) - flds 112(%edx) - fadds 124(%edx) - fstps 112(%ecx) - flds 124(%edx) - fsubs 112(%edx) - fmuls 112(%ebx) - fstps 124(%ecx) - flds 116(%edx) - fadds 120(%edx) - fstps 116(%ecx) - flds 120(%edx) - fsubs 116(%edx) - fmuls 116(%ebx) - fstps 120(%ecx) - - flds 32(%ecx) - fadds 36(%ecx) - fstps 32(%edx) - flds 32(%ecx) - fsubs 36(%ecx) - fmuls 120(%ebx) - fstps 36(%edx) - flds 44(%ecx) - fsubs 40(%ecx) - fmuls 120(%ebx) - fsts 44(%edx) - fadds 40(%ecx) - fadds 44(%ecx) - fstps 40(%edx) - flds 48(%ecx) - fsubs 52(%ecx) - fmuls 120(%ebx) - flds 60(%ecx) - fsubs 56(%ecx) - fmuls 120(%ebx) - fld %st(0) - fadds 56(%ecx) - fadds 60(%ecx) - fld %st(0) - fadds 48(%ecx) - fadds 52(%ecx) - fstps 48(%edx) - fadd %st(2) - fstps 56(%edx) - fsts 60(%edx) - faddp %st(1) - fstps 52(%edx) - flds 64(%ecx) - fadds 68(%ecx) - fstps 64(%edx) - flds 64(%ecx) - fsubs 68(%ecx) - fmuls 120(%ebx) - fstps 68(%edx) - flds 76(%ecx) - fsubs 72(%ecx) - fmuls 120(%ebx) - fsts 76(%edx) - fadds 72(%ecx) - fadds 76(%ecx) - fstps 72(%edx) - flds 92(%ecx) - fsubs 88(%ecx) - fmuls 120(%ebx) - fsts 92(%edx) - fadds 92(%ecx) - fadds 88(%ecx) - fld %st(0) - fadds 80(%ecx) - fadds 84(%ecx) - fstps 80(%edx) - flds 80(%ecx) - fsubs 84(%ecx) - fmuls 120(%ebx) - fadd %st(0), %st(1) - fadds 92(%edx) - fstps 84(%edx) - fstps 88(%edx) - flds 96(%ecx) - fadds 100(%ecx) - fstps 96(%edx) - flds 96(%ecx) - fsubs 100(%ecx) - fmuls 120(%ebx) - fstps 100(%edx) - flds 108(%ecx) - fsubs 104(%ecx) - fmuls 120(%ebx) - fsts 108(%edx) - fadds 104(%ecx) - fadds 108(%ecx) - fstps 104(%edx) - flds 124(%ecx) - fsubs 120(%ecx) - fmuls 120(%ebx) - fsts 124(%edx) - fadds 120(%ecx) - fadds 124(%ecx) - fld %st(0) - fadds 112(%ecx) - fadds 116(%ecx) - fstps 112(%edx) - flds 112(%ecx) - fsubs 116(%ecx) - fmuls 120(%ebx) - fadd %st(0),%st(1) - fadds 124(%edx) - fstps 116(%edx) - fstps 120(%edx) - jnz .L01 - - flds (%ecx) - fadds 4(%ecx) - fstps 1024(%esi) - flds (%ecx) - fsubs 4(%ecx) - fmuls 120(%ebx) - fsts (%esi) - fstps (%edi) - flds 12(%ecx) - fsubs 8(%ecx) - fmuls 120(%ebx) - fsts 512(%edi) - fadds 12(%ecx) - fadds 8(%ecx) - fstps 512(%esi) - flds 16(%ecx) - fsubs 20(%ecx) - fmuls 120(%ebx) - flds 28(%ecx) - fsubs 24(%ecx) - fmuls 120(%ebx) - fsts 768(%edi) - fld %st(0) - fadds 24(%ecx) - fadds 28(%ecx) - fld %st(0) - fadds 16(%ecx) - fadds 20(%ecx) - fstps 768(%esi) - fadd %st(2) - fstps 256(%esi) - faddp %st(1) - fstps 256(%edi) - - flds 32(%edx) - fadds 48(%edx) - fstps 896(%esi) - flds 48(%edx) - fadds 40(%edx) - fstps 640(%esi) - flds 40(%edx) - fadds 56(%edx) - fstps 384(%esi) - flds 56(%edx) - fadds 36(%edx) - fstps 128(%esi) - flds 36(%edx) - fadds 52(%edx) - fstps 128(%edi) - flds 52(%edx) - fadds 44(%edx) - fstps 384(%edi) - flds 60(%edx) - fsts 896(%edi) - fadds 44(%edx) - fstps 640(%edi) - flds 96(%edx) - fadds 112(%edx) - fld %st(0) - fadds 64(%edx) - fstps 960(%esi) - fadds 80(%edx) - fstps 832(%esi) - flds 112(%edx) - fadds 104(%edx) - fld %st(0) - fadds 80(%edx) - fstps 704(%esi) - fadds 72(%edx) - fstps 576(%esi) - flds 104(%edx) - fadds 120(%edx) - fld %st(0) - fadds 72(%edx) - fstps 448(%esi) - fadds 88(%edx) - fstps 320(%esi) - flds 120(%edx) - fadds 100(%edx) - fld %st(0) - fadds 88(%edx) - fstps 192(%esi) - fadds 68(%edx) - fstps 64(%esi) - flds 100(%edx) - fadds 116(%edx) - fld %st(0) - fadds 68(%edx) - fstps 64(%edi) - fadds 84(%edx) - fstps 192(%edi) - flds 116(%edx) - fadds 108(%edx) - fld %st(0) - fadds 84(%edx) - fstps 320(%edi) - fadds 76(%edx) - fstps 448(%edi) - flds 108(%edx) - fadds 124(%edx) - fld %st(0) - fadds 76(%edx) - fstps 576(%edi) - fadds 92(%edx) - fstps 704(%edi) - flds 124(%edx) - fsts 960(%edi) - fadds 92(%edx) - fstps 832(%edi) - addl $256,%esp - popl %edi - popl %esi - popl %ebx - ret -.L01: - flds (%ecx) - fadds 4(%ecx) - fistp 512(%esi) - flds (%ecx) - fsubs 4(%ecx) - fmuls 120(%ebx) - - fistp (%esi) - - flds 12(%ecx) - fsubs 8(%ecx) - fmuls 120(%ebx) - fist 256(%edi) - fadds 12(%ecx) - fadds 8(%ecx) - fistp 256(%esi) - flds 16(%ecx) - fsubs 20(%ecx) - fmuls 120(%ebx) - flds 28(%ecx) - fsubs 24(%ecx) - fmuls 120(%ebx) - fist 384(%edi) - fld %st(0) - fadds 24(%ecx) - fadds 28(%ecx) - fld %st(0) - fadds 16(%ecx) - fadds 20(%ecx) - fistp 384(%esi) - fadd %st(2) - fistp 128(%esi) - faddp %st(1) - fistp 128(%edi) - - flds 32(%edx) - fadds 48(%edx) - fistp 448(%esi) - flds 48(%edx) - fadds 40(%edx) - fistp 320(%esi) - flds 40(%edx) - fadds 56(%edx) - fistp 192(%esi) - flds 56(%edx) - fadds 36(%edx) - fistp 64(%esi) - flds 36(%edx) - fadds 52(%edx) - fistp 64(%edi) - flds 52(%edx) - fadds 44(%edx) - fistp 192(%edi) - flds 60(%edx) - fist 448(%edi) - fadds 44(%edx) - fistp 320(%edi) - flds 96(%edx) - fadds 112(%edx) - fld %st(0) - fadds 64(%edx) - fistp 480(%esi) - fadds 80(%edx) - fistp 416(%esi) - flds 112(%edx) - fadds 104(%edx) - fld %st(0) - fadds 80(%edx) - fistp 352(%esi) - fadds 72(%edx) - fistp 288(%esi) - flds 104(%edx) - fadds 120(%edx) - fld %st(0) - fadds 72(%edx) - fistp 224(%esi) - fadds 88(%edx) - fistp 160(%esi) - flds 120(%edx) - fadds 100(%edx) - fld %st(0) - fadds 88(%edx) - fistp 96(%esi) - fadds 68(%edx) - fistp 32(%esi) - flds 100(%edx) - fadds 116(%edx) - fld %st(0) - fadds 68(%edx) - fistp 32(%edi) - fadds 84(%edx) - fistp 96(%edi) - flds 116(%edx) - fadds 108(%edx) - fld %st(0) - fadds 84(%edx) - fistp 160(%edi) - fadds 76(%edx) - fistp 224(%edi) - flds 108(%edx) - fadds 124(%edx) - fld %st(0) - fadds 76(%edx) - fistp 288(%edi) - fadds 92(%edx) - fistp 352(%edi) - flds 124(%edx) - fist 480(%edi) - fadds 92(%edx) - fistp 416(%edi) - movsw - addl $256,%esp - popl %edi - popl %esi - popl %ebx - ret - -
--- a/Plugins/Input/mpg123/decode_3dnow.s Sun Nov 13 11:37:33 2005 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,272 +0,0 @@ -# -# decode_3dnow.s - 3DNow! optimized synth_1to1() -# -# This code based 'decode_3dnow.s' by Syuuhei Kashiyama -# <squash@mb.kcom.ne.jp>,only two types of changes have been made: -# -# - remove PREFETCH instruction for speedup -# - change function name for support 3DNow! automatic detect -# - femms moved to before 'call dct64_3dnow' -# -# You can find Kashiyama's original 3dnow! support patch -# (for mpg123-0.59o) at -# http:#/user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese). -# -# by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999 -# <kim@comtec.co.jp> - after 1.Apr.1999 -# - -##/ -##/ Replacement of synth_1to1() with AMD's 3DNow! SIMD operations support -##/ -##/ Syuuhei Kashiyama <squash@mb.kcom.ne.jp> -##/ -##/ The author of this program disclaim whole expressed or implied -##/ warranties with regard to this program, and in no event shall the -##/ author of this program liable to whatever resulted from the use of -##/ this program. Use it at your own risk. -##/ - - .local buffs.40 - .comm buffs.40,4352,32 -.data - .align 4 - .type bo.42,@object - .size bo.42,4 -bo.42: - .long 1 -.text -.globl mpg123_synth_1to1_3dnow - .type mpg123_synth_1to1_3dnow,@function -mpg123_synth_1to1_3dnow: - subl $24,%esp - pushl %ebp - pushl %edi - xorl %ebp,%ebp - pushl %esi - pushl %ebx - movl 56(%esp),%esi - movl 52(%esp),%edi - movl 0(%esi),%esi - movl 48(%esp),%ebx - addl %edi,%esi - movl %esi,16(%esp) - - femms - -.L25: - testl %ebx,%ebx - jne .L26 - decl bo.42 - movl $buffs.40,%ecx - andl $15,bo.42 - jmp .L27 -.L26: - addl $2,16(%esp) - movl $buffs.40+2176,%ecx -.L27: - movl bo.42,%edx - testb $1,%dl - je .L28 - movl %edx,36(%esp) - movl %ecx,%ebx - movl 44(%esp),%esi - movl %edx,%edi - pushl %esi - sall $2,%edi - movl %ebx,%eax - movl %edi,24(%esp) - addl %edi,%eax - pushl %eax - movl %edx,%eax - incl %eax - andl $15,%eax - leal 1088(,%eax,4),%eax - addl %ebx,%eax - pushl %eax - call mpg123_dct64_3dnow - addl $12,%esp - jmp .L29 -.L28: - leal 1(%edx),%esi - movl 44(%esp),%edi - movl %esi,36(%esp) - leal 1092(%ecx,%edx,4),%eax - pushl %edi - leal 1088(%ecx),%ebx - pushl %eax - sall $2,%esi - leal (%ecx,%edx,4),%eax - pushl %eax - call mpg123_dct64_3dnow - addl $12,%esp - movl %esi,20(%esp) -.L29: - movl $mpg123_decwin+64,%edx - movl $16,%ecx - subl 20(%esp),%edx - movl 16(%esp),%edi - - movq (%edx),%mm0 - movq (%ebx),%mm1 - .align 32 -.L33: - movq 8(%edx),%mm3 - pfmul %mm1,%mm0 - movq 8(%ebx),%mm4 - movq 16(%edx),%mm5 - pfmul %mm4,%mm3 - movq 16(%ebx),%mm6 - pfadd %mm3,%mm0 - movq 24(%edx),%mm1 - pfmul %mm6,%mm5 - movq 24(%ebx),%mm2 - pfadd %mm5,%mm0 - movq 32(%edx),%mm3 - pfmul %mm2,%mm1 - movq 32(%ebx),%mm4 - pfadd %mm1,%mm0 - movq 40(%edx),%mm5 - pfmul %mm4,%mm3 - movq 40(%ebx),%mm6 - pfadd %mm3,%mm0 - movq 48(%edx),%mm1 - pfmul %mm6,%mm5 - movq 48(%ebx),%mm2 - pfadd %mm0,%mm5 - movq 56(%edx),%mm3 - pfmul %mm1,%mm2 - movq 56(%ebx),%mm4 - pfadd %mm5,%mm2 - addl $64,%ebx - subl $-128,%edx - movq (%edx),%mm0 - pfmul %mm4,%mm3 - movq (%ebx),%mm1 - pfadd %mm3,%mm2 - movq %mm2,%mm3 - psrlq $32,%mm3 - pfsub %mm3,%mm2 - incl %ebp - pf2id %mm2,%mm2 - packssdw %mm2,%mm2 - movd %mm2,%eax - movw %ax,0(%edi) - addl $4,%edi - decl %ecx - jnz .L33 - - movd (%ebx),%mm0 - movd (%edx),%mm1 - punpckldq 8(%ebx),%mm0 - punpckldq 8(%edx),%mm1 - movd 16(%ebx),%mm3 - movd 16(%edx),%mm4 - pfmul %mm1,%mm0 - punpckldq 24(%ebx),%mm3 - punpckldq 24(%edx),%mm4 - movd 32(%ebx),%mm5 - movd 32(%edx),%mm6 - pfmul %mm4,%mm3 - punpckldq 40(%ebx),%mm5 - punpckldq 40(%edx),%mm6 - pfadd %mm3,%mm0 - movd 48(%ebx),%mm1 - movd 48(%edx),%mm2 - pfmul %mm6,%mm5 - punpckldq 56(%ebx),%mm1 - punpckldq 56(%edx),%mm2 - pfadd %mm5,%mm0 - pfmul %mm2,%mm1 - pfadd %mm1,%mm0 - pfacc %mm1,%mm0 - pf2id %mm0,%mm0 - packssdw %mm0,%mm0 - movd %mm0,%eax - movw %ax,0(%edi) - incl %ebp - movl 36(%esp),%esi - addl $-64,%ebx - movl $15,%ebp - addl $4,%edi - leal -128(%edx,%esi,8),%edx - - movl $15,%ecx - movd (%ebx),%mm0 - movd -4(%edx),%mm1 - punpckldq 4(%ebx),%mm0 - punpckldq -8(%edx),%mm1 - .align 32 -.L46: - movd 8(%ebx),%mm3 - movd -12(%edx),%mm4 - pfmul %mm1,%mm0 - punpckldq 12(%ebx),%mm3 - punpckldq -16(%edx),%mm4 - movd 16(%ebx),%mm5 - movd -20(%edx),%mm6 - pfmul %mm4,%mm3 - punpckldq 20(%ebx),%mm5 - punpckldq -24(%edx),%mm6 - pfadd %mm3,%mm0 - movd 24(%ebx),%mm1 - movd -28(%edx),%mm2 - pfmul %mm6,%mm5 - punpckldq 28(%ebx),%mm1 - punpckldq -32(%edx),%mm2 - pfadd %mm5,%mm0 - movd 32(%ebx),%mm3 - movd -36(%edx),%mm4 - pfmul %mm2,%mm1 - punpckldq 36(%ebx),%mm3 - punpckldq -40(%edx),%mm4 - pfadd %mm1,%mm0 - movd 40(%ebx),%mm5 - movd -44(%edx),%mm6 - pfmul %mm4,%mm3 - punpckldq 44(%ebx),%mm5 - punpckldq -48(%edx),%mm6 - pfadd %mm3,%mm0 - movd 48(%ebx),%mm1 - movd -52(%edx),%mm2 - pfmul %mm6,%mm5 - punpckldq 52(%ebx),%mm1 - punpckldq -56(%edx),%mm2 - pfadd %mm0,%mm5 - movd 56(%ebx),%mm3 - movd -60(%edx),%mm4 - pfmul %mm2,%mm1 - punpckldq 60(%ebx),%mm3 - punpckldq (%edx),%mm4 - pfadd %mm1,%mm5 - addl $-128,%edx - addl $-64,%ebx - movd (%ebx),%mm0 - movd -4(%edx),%mm1 - pfmul %mm4,%mm3 - punpckldq 4(%ebx),%mm0 - punpckldq -8(%edx),%mm1 - pfadd %mm5,%mm3 - pfacc %mm3,%mm3 - incl %ebp - pf2id %mm3,%mm3 - movd %mm3,%eax - negl %eax - movd %eax,%mm3 - packssdw %mm3,%mm3 - movd %mm3,%eax - movw %ax,(%edi) - addl $4,%edi - decl %ecx - jnz .L46 - - femms - movl 56(%esp),%esi - movl %ebp,%eax - subl $-128,0(%esi) - popl %ebx - popl %esi - popl %edi - popl %ebp - addl $24,%esp - ret
--- a/Plugins/Input/mpg123/decode_MMX.s Sun Nov 13 11:37:33 2005 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,108 +0,0 @@ -# this code comes under GPL - -.text - -.globl mpg123_synth_1to1_mmx - -mpg123_synth_1to1_mmx: - pushl %ebp - pushl %edi - pushl %esi - pushl %ebx - movl 24(%esp),%ecx - movl 28(%esp),%edi - movl $15,%ebx - movl 36(%esp),%edx - leal (%edi,%ecx,2),%edi - decl %ecx - movl 32(%esp),%esi - movl (%edx),%eax - jecxz .L1 - decl %eax - andl %ebx,%eax - leal 1088(%esi),%esi - movl %eax,(%edx) -.L1: - leal (%esi,%eax,2),%edx - movl %eax,%ebp - incl %eax - pushl 20(%esp) - andl %ebx,%eax - leal 544(%esi,%eax,2),%ecx - incl %ebx - testl $1, %eax - jnz .L2 - xchgl %edx,%ecx - incl %ebp - leal 544(%esi),%esi -.L2: - pushl %edx - pushl %ecx - call mpg123_dct64_mmx - addl $12,%esp - leal 1(%ebx), %ecx - subl %ebp,%ebx - - leal mpg123_decwins(%ebx,%ebx,1), %edx -.L3: - movq (%edx),%mm0 - pmaddwd (%esi),%mm0 - movq 8(%edx),%mm1 - pmaddwd 8(%esi),%mm1 - movq 16(%edx),%mm2 - pmaddwd 16(%esi),%mm2 - movq 24(%edx),%mm3 - pmaddwd 24(%esi),%mm3 - paddd %mm1,%mm0 - paddd %mm2,%mm0 - paddd %mm3,%mm0 - movq %mm0,%mm1 - psrlq $32,%mm1 - paddd %mm1,%mm0 - psrad $13,%mm0 - packssdw %mm0,%mm0 - movd %mm0,%eax - movw %ax, (%edi) - - leal 32(%esi),%esi - leal 64(%edx),%edx - leal 4(%edi),%edi - loop .L3 - - - subl $64,%esi - movl $15,%ecx -.L4: - movq (%edx),%mm0 - pmaddwd (%esi),%mm0 - movq 8(%edx),%mm1 - pmaddwd 8(%esi),%mm1 - movq 16(%edx),%mm2 - pmaddwd 16(%esi),%mm2 - movq 24(%edx),%mm3 - pmaddwd 24(%esi),%mm3 - paddd %mm1,%mm0 - paddd %mm2,%mm0 - paddd %mm3,%mm0 - movq %mm0,%mm1 - psrlq $32,%mm1 - paddd %mm0,%mm1 - psrad $13,%mm1 - packssdw %mm1,%mm1 - psubd %mm0,%mm0 - psubsw %mm1,%mm0 - movd %mm0,%eax - movw %ax,(%edi) - - subl $32,%esi - addl $64,%edx - leal 4(%edi),%edi - loop .L4 - emms - popl %ebx - popl %esi - popl %edi - popl %ebp - ret - -
--- a/Plugins/Input/mpg123/decode_i586.s Sun Nov 13 11:37:33 2005 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,323 +0,0 @@ -# -# synth_1to1 works the same way as the c version of this -# file. only two types of changes have been made: -# - reordered floating point instructions to -# prevent pipline stalls -# - made WRITE_SAMPLE use integer instead of -# (slower) floating point -# all kinds of x86 processors should benefit from these -# modifications. -# -# useful sources of information on optimizing x86 code include: -# -# Intel Architecture Optimization Manual -# http:#/www.intel.com/design/pentium/manuals/242816.htm -# -# Cyrix 6x86 Instruction Set Summary -# ftp:#/ftp.cyrix.com/6x86/6x-dbch6.pdf -# -# AMD-K5 Processor Software Development -# http:#/www.amd.com/products/cpg/techdocs/appnotes/20007e.pdf -# -# Stefan Bieschewski <stb@acm.org> -# -# You can use this part under GPL. -# -# $Id: decode_i586.s,v 1.3 2000/10/25 11:05:23 hippm Exp $ -# -.bss - .comm buffs,4352,4 -.data - .align 4 -bo: - .long 1 -.section .rodata - .align 8 -.LC0: - .long 0x0,0x40dfffc0 - .align 8 -.LC1: - .long 0x0,0xc0e00000 - .align 8 -.text -.globl mpg123_synth_1to1_pent -mpg123_synth_1to1_pent: - subl $12,%esp - pushl %ebp - pushl %edi - pushl %esi - pushl %ebx - movl 32(%esp),%eax - movl 40(%esp),%esi - xorl %edi,%edi - movl bo,%ebp - cmpl %edi,36(%esp) - jne .L48 - decl %ebp - andl $15,%ebp - movl %ebp,bo - movl $buffs,%ecx - jmp .L49 -.L48: - addl $2,%esi - movl $buffs+2176,%ecx -.L49: - testl $1,%ebp - je .L50 - movl %ecx,%ebx - movl %ebp,16(%esp) - pushl %eax - movl 20(%esp),%edx - leal (%ebx,%edx,4),%eax - pushl %eax - movl 24(%esp),%eax - incl %eax - andl $15,%eax - leal 1088(,%eax,4),%eax - addl %ebx,%eax - jmp .L74 -.L50: - leal 1088(%ecx),%ebx - leal 1(%ebp),%edx - movl %edx,16(%esp) - pushl %eax - leal 1092(%ecx,%ebp,4),%eax - pushl %eax - leal (%ecx,%ebp,4),%eax -.L74: - pushl %eax - call mpg123_dct64 - addl $12,%esp - movl 16(%esp),%edx - leal 0(,%edx,4),%edx - movl $mpg123_decwin+64,%eax - movl %eax,%ecx - subl %edx,%ecx - movl $16,%ebp -.L55: - flds (%ecx) - fmuls (%ebx) - flds 4(%ecx) - fmuls 4(%ebx) - fxch %st(1) - flds 8(%ecx) - fmuls 8(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds 12(%ecx) - fmuls 12(%ebx) - fxch %st(2) - faddp %st,%st(1) - flds 16(%ecx) - fmuls 16(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds 20(%ecx) - fmuls 20(%ebx) - fxch %st(2) - faddp %st,%st(1) - flds 24(%ecx) - fmuls 24(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds 28(%ecx) - fmuls 28(%ebx) - fxch %st(2) - faddp %st,%st(1) - flds 32(%ecx) - fmuls 32(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds 36(%ecx) - fmuls 36(%ebx) - fxch %st(2) - faddp %st,%st(1) - flds 40(%ecx) - fmuls 40(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds 44(%ecx) - fmuls 44(%ebx) - fxch %st(2) - faddp %st,%st(1) - flds 48(%ecx) - fmuls 48(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds 52(%ecx) - fmuls 52(%ebx) - fxch %st(2) - faddp %st,%st(1) - flds 56(%ecx) - fmuls 56(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds 60(%ecx) - fmuls 60(%ebx) - fxch %st(2) - subl $4,%esp - faddp %st,%st(1) - fxch %st(1) - fsubrp %st,%st(1) - fistpl (%esp) - popl %eax - cmpl $32767,%eax - jg 1f - cmpl $-32768,%eax - jl 2f - movw %ax,(%esi) - jmp 4f -1: movw $32767,(%esi) - jmp 3f -2: movw $-32768,(%esi) -3: incl %edi -4: -.L54: - addl $64,%ebx - subl $-128,%ecx - addl $4,%esi - decl %ebp - jnz .L55 - flds (%ecx) - fmuls (%ebx) - flds 8(%ecx) - fmuls 8(%ebx) - flds 16(%ecx) - fmuls 16(%ebx) - fxch %st(2) - faddp %st,%st(1) - flds 24(%ecx) - fmuls 24(%ebx) - fxch %st(2) - faddp %st,%st(1) - flds 32(%ecx) - fmuls 32(%ebx) - fxch %st(2) - faddp %st,%st(1) - flds 40(%ecx) - fmuls 40(%ebx) - fxch %st(2) - faddp %st,%st(1) - flds 48(%ecx) - fmuls 48(%ebx) - fxch %st(2) - faddp %st,%st(1) - flds 56(%ecx) - fmuls 56(%ebx) - fxch %st(2) - subl $4,%esp - faddp %st,%st(1) - fxch %st(1) - faddp %st,%st(1) - fistpl (%esp) - popl %eax - cmpl $32767,%eax - jg 1f - cmpl $-32768,%eax - jl 2f - movw %ax,(%esi) - jmp 4f -1: movw $32767,(%esi) - jmp 3f -2: movw $-32768,(%esi) -3: incl %edi -4: -.L62: - addl $-64,%ebx - addl $4,%esi - movl 16(%esp),%edx - leal -128(%ecx,%edx,8),%ecx - movl $15,%ebp -.L68: - flds -4(%ecx) - fchs - fmuls (%ebx) - flds -8(%ecx) - fmuls 4(%ebx) - fxch %st(1) - flds -12(%ecx) - fmuls 8(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -16(%ecx) - fmuls 12(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -20(%ecx) - fmuls 16(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -24(%ecx) - fmuls 20(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -28(%ecx) - fmuls 24(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -32(%ecx) - fmuls 28(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -36(%ecx) - fmuls 32(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -40(%ecx) - fmuls 36(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -44(%ecx) - fmuls 40(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -48(%ecx) - fmuls 44(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -52(%ecx) - fmuls 48(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -56(%ecx) - fmuls 52(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds -60(%ecx) - fmuls 56(%ebx) - fxch %st(2) - fsubrp %st,%st(1) - flds (%ecx) - fmuls 60(%ebx) - fxch %st(2) - subl $4,%esp - fsubrp %st,%st(1) - fxch %st(1) - fsubrp %st,%st(1) - fistpl (%esp) - popl %eax - cmpl $32767,%eax - jg 1f - cmpl $-32768,%eax - jl 2f - movw %ax,(%esi) - jmp 4f -1: movw $32767,(%esi) - jmp 3f -2: movw $-32768,(%esi) -3: incl %edi -4: -.L67: - addl $-64,%ebx - addl $-128,%ecx - addl $4,%esi - decl %ebp - jnz .L68 - movl %edi,%eax - popl %ebx - popl %esi - popl %edi - popl %ebp - addl $12,%esp - ret -
--- a/Plugins/Input/mpg123/getcpuflags.s Sun Nov 13 11:37:33 2005 -0800 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,61 +0,0 @@ -# KIMURA Takuhiro <kim@comtec.co.jp> -# Copyright 2002 Haavard Kvaalen <havardk@xmms.org> - -# Get feature flags with cpuid -# void mpg123_getcpuid(unsigned int *fflags, unsigned int *efflags) - -.text - .align 4 -.globl mpg123_getcpuflags - .type mpg123_getcpuflags,@function -mpg123_getcpuflags: - pushl %ebp - movl %esp,%ebp - pushl %edx - pushl %ecx - pushl %ebx - pushfl # First test if cpuid is supported - pushfl # Check if the ID flag (bit 21 of eflags) sticks - popl %eax # Get eflags - movl %eax,%ebx - xorl $0x200000,%eax # Flip bit 21 - pushl %eax - popfl # Get modified eflags to flag register - pushfl - popl %eax # Get eflags again - popfl # Restore original eflags - xorl %ebx,%eax - je nocpuid - xorl %eax,%eax - cpuid # Check if eax = 1 is supported - xorl %edx,%edx - cmp $1,%eax - jl noflags - movl $1,%eax # Get feature flags - cpuid -noflags: - movl 8(%ebp),%eax - movl %edx,(%eax) - movl $0x80000000,%eax # Check support for extended level cpuid - cpuid - xorl %edx,%edx - cmp $0x80000001,%eax # Get extended feature flags - jl noeflags - movl $0x80000001,%eax - cpuid -noeflags: - movl 12(%ebp),%eax - movl %edx,(%eax) - jmp done -nocpuid: - xorl %edx,%edx - movl 8(%ebp),%eax - movl %edx,(%eax) - movl 12(%ebp),%eax - movl %edx,(%eax) -done: - popl %ebx - popl %ecx - popl %edx - leave - ret