Mercurial > mplayer.hg
view mp3lib/dct36_3dnow.s @ 4218:3931c41f740a
Added new syncengine thanks to a new previously undocumented feature of the em8300, this might fix playback on both slow and fast machines (more testing needed). This also requires users to get the em8300 driver from cvs until the next version is released (will probably happen this weekend)
Added lots of comments, should be pretty easy to understand most of the internals now
Added lots of brackets to if's for's while's etc, this is not a cosmetical thing but rather due to the fact I got some very odd bugs with else's since I didn't properly use brackets (and it's the K&R standard to have brackets everywhere)
Fixed some bugs that would occur when disabling libmp1e
Switched to default to the new naming scheme of device nodes, the driver will slowly switch over to this state, if it can't find devices under the new name it will try the old naming scheme
I stopped opening devices in non-blocking mode, it would break the new syncengine which tries to burst data to the device (alot of times meaning it will fill the fifo pretty fast which would previously result in jerkyness on fast machines)
The device now sets the initial state of the pts and speed (probably not needed, but assumption is the mother of all fuckups =)
Keep the control interface open during the entire duration of the libvo device, we might need this to flush video buffers on seeking (currently not implemented, therefore seeking is broken)
This is beta stuff to the driver, I will get some users to test it for me and do my best to fix seeking as soon as possible...
author | mswitch |
---|---|
date | Thu, 17 Jan 2002 10:33:47 +0000 |
parents | 3b5f5d1c5041 |
children |
line wrap: on
line source
/ / dct36_3dnow.s - 3DNow! optimized dct36() / / This code based 'dct36_3dnow.s' by Syuuhei Kashiyama / <squash@mb.kcom.ne.jp>,only two types of changes have been made: / / - remove PREFETCH instruction for speedup / - change function name for support 3DNow! automatic detect / / You can find Kashiyama's original 3dnow! support patch / (for mpg123-0.59o) at / http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese). / / by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999 / <kim@comtec.co.jp> - after 1.Apr.1999 / /// /// Replacement of dct36() with AMD's 3DNow! SIMD operations support /// /// Syuuhei Kashiyama <squash@mb.kcom.ne.jp> /// /// The author of this program disclaim whole expressed or implied /// warranties with regard to this program, and in no event shall the /// author of this program liable to whatever resulted from the use of /// this program. Use it at your own risk. /// .globl dct36_3dnow .type dct36_3dnow,@function dct36_3dnow: pushl %ebp movl %esp,%ebp subl $120,%esp pushl %esi pushl %ebx movl 8(%ebp),%eax movl 12(%ebp),%esi movl 16(%ebp),%ecx movl 20(%ebp),%edx movl 24(%ebp),%ebx leal -128(%ebp),%esp femms movq (%eax),%mm0 movq 4(%eax),%mm1 pfadd %mm1,%mm0 movq %mm0,4(%eax) psrlq $32,%mm1 movq 12(%eax),%mm2 punpckldq %mm2,%mm1 pfadd %mm2,%mm1 movq %mm1,12(%eax) psrlq $32,%mm2 movq 20(%eax),%mm3 punpckldq %mm3,%mm2 pfadd %mm3,%mm2 movq %mm2,20(%eax) psrlq $32,%mm3 movq 28(%eax),%mm4 punpckldq %mm4,%mm3 pfadd %mm4,%mm3 movq %mm3,28(%eax) psrlq $32,%mm4 movq 36(%eax),%mm5 punpckldq %mm5,%mm4 pfadd %mm5,%mm4 movq %mm4,36(%eax) psrlq $32,%mm5 movq 44(%eax),%mm6 punpckldq %mm6,%mm5 pfadd %mm6,%mm5 movq %mm5,44(%eax) psrlq $32,%mm6 movq 52(%eax),%mm7 punpckldq %mm7,%mm6 pfadd %mm7,%mm6 movq %mm6,52(%eax) psrlq $32,%mm7 movq 60(%eax),%mm0 punpckldq %mm0,%mm7 pfadd %mm0,%mm7 movq %mm7,60(%eax) psrlq $32,%mm0 movd 68(%eax),%mm1 pfadd %mm1,%mm0 movd %mm0,68(%eax) movd 4(%eax),%mm0 movd 12(%eax),%mm1 punpckldq %mm1,%mm0 punpckldq 20(%eax),%mm1 pfadd %mm1,%mm0 movd %mm0,12(%eax) psrlq $32,%mm0 movd %mm0,20(%eax) psrlq $32,%mm1 movd 28(%eax),%mm2 punpckldq %mm2,%mm1 punpckldq 36(%eax),%mm2 pfadd %mm2,%mm1 movd %mm1,28(%eax) psrlq $32,%mm1 movd %mm1,36(%eax) psrlq $32,%mm2 movd 44(%eax),%mm3 punpckldq %mm3,%mm2 punpckldq 52(%eax),%mm3 pfadd %mm3,%mm2 movd %mm2,44(%eax) psrlq $32,%mm2 movd %mm2,52(%eax) psrlq $32,%mm3 movd 60(%eax),%mm4 punpckldq %mm4,%mm3 punpckldq 68(%eax),%mm4 pfadd %mm4,%mm3 movd %mm3,60(%eax) psrlq $32,%mm3 movd %mm3,68(%eax) movq 24(%eax),%mm0 movq 48(%eax),%mm1 movd COS9+12,%mm2 punpckldq %mm2,%mm2 movd COS9+24,%mm3 punpckldq %mm3,%mm3 pfmul %mm2,%mm0 pfmul %mm3,%mm1 pushl %eax movl $1,%eax movd %eax,%mm7 pi2fd %mm7,%mm7 popl %eax movq 8(%eax),%mm2 movd COS9+4,%mm3 punpckldq %mm3,%mm3 pfmul %mm3,%mm2 pfadd %mm0,%mm2 movq 40(%eax),%mm3 movd COS9+20,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfadd %mm3,%mm2 movq 56(%eax),%mm3 movd COS9+28,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfadd %mm3,%mm2 movq (%eax),%mm3 movq 16(%eax),%mm4 movd COS9+8,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfadd %mm4,%mm3 movq 32(%eax),%mm4 movd COS9+16,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfadd %mm4,%mm3 pfadd %mm1,%mm3 movq 64(%eax),%mm4 movd COS9+32,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfadd %mm4,%mm3 movq %mm2,%mm4 pfadd %mm3,%mm4 movq %mm7,%mm5 punpckldq tfcos36+0,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 108(%edx),%mm6 punpckldq 104(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,36(%ecx) psrlq $32,%mm5 movd %mm5,32(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 32(%edx),%mm6 punpckldq 36(%edx),%mm6 pfmul %mm6,%mm5 movd 32(%esi),%mm6 punpckldq 36(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,1024(%ebx) psrlq $32,%mm5 movd %mm5,1152(%ebx) movq %mm3,%mm4 pfsub %mm2,%mm4 movq %mm7,%mm5 punpckldq tfcos36+32,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 140(%edx),%mm6 punpckldq 72(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,68(%ecx) psrlq $32,%mm5 movd %mm5,0(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 0(%edx),%mm6 punpckldq 68(%edx),%mm6 pfmul %mm6,%mm5 movd 0(%esi),%mm6 punpckldq 68(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,0(%ebx) psrlq $32,%mm5 movd %mm5,2176(%ebx) movq 8(%eax),%mm2 movq 40(%eax),%mm3 pfsub %mm3,%mm2 movq 56(%eax),%mm3 pfsub %mm3,%mm2 movd COS9+12,%mm3 punpckldq %mm3,%mm3 pfmul %mm3,%mm2 movq 16(%eax),%mm3 movq 32(%eax),%mm4 pfsub %mm4,%mm3 movq 64(%eax),%mm4 pfsub %mm4,%mm3 movd COS9+24,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 movq 48(%eax),%mm4 pfsub %mm4,%mm3 movq (%eax),%mm4 pfadd %mm4,%mm3 movq %mm2,%mm4 pfadd %mm3,%mm4 movq %mm7,%mm5 punpckldq tfcos36+4,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 112(%edx),%mm6 punpckldq 100(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,40(%ecx) psrlq $32,%mm5 movd %mm5,28(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 28(%edx),%mm6 punpckldq 40(%edx),%mm6 pfmul %mm6,%mm5 movd 28(%esi),%mm6 punpckldq 40(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,896(%ebx) psrlq $32,%mm5 movd %mm5,1280(%ebx) movq %mm3,%mm4 pfsub %mm2,%mm4 movq %mm7,%mm5 punpckldq tfcos36+28,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 136(%edx),%mm6 punpckldq 76(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,64(%ecx) psrlq $32,%mm5 movd %mm5,4(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 4(%edx),%mm6 punpckldq 64(%edx),%mm6 pfmul %mm6,%mm5 movd 4(%esi),%mm6 punpckldq 64(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,128(%ebx) psrlq $32,%mm5 movd %mm5,2048(%ebx) movq 8(%eax),%mm2 movd COS9+20,%mm3 punpckldq %mm3,%mm3 pfmul %mm3,%mm2 pfsub %mm0,%mm2 movq 40(%eax),%mm3 movd COS9+28,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfsub %mm3,%mm2 movq 56(%eax),%mm3 movd COS9+4,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfadd %mm3,%mm2 movq (%eax),%mm3 movq 16(%eax),%mm4 movd COS9+32,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfsub %mm4,%mm3 movq 32(%eax),%mm4 movd COS9+8,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfsub %mm4,%mm3 pfadd %mm1,%mm3 movq 64(%eax),%mm4 movd COS9+16,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfadd %mm4,%mm3 movq %mm2,%mm4 pfadd %mm3,%mm4 movq %mm7,%mm5 punpckldq tfcos36+8,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 116(%edx),%mm6 punpckldq 96(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,44(%ecx) psrlq $32,%mm5 movd %mm5,24(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 24(%edx),%mm6 punpckldq 44(%edx),%mm6 pfmul %mm6,%mm5 movd 24(%esi),%mm6 punpckldq 44(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,768(%ebx) psrlq $32,%mm5 movd %mm5,1408(%ebx) movq %mm3,%mm4 pfsub %mm2,%mm4 movq %mm7,%mm5 punpckldq tfcos36+24,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 132(%edx),%mm6 punpckldq 80(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,60(%ecx) psrlq $32,%mm5 movd %mm5,8(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 8(%edx),%mm6 punpckldq 60(%edx),%mm6 pfmul %mm6,%mm5 movd 8(%esi),%mm6 punpckldq 60(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,256(%ebx) psrlq $32,%mm5 movd %mm5,1920(%ebx) movq 8(%eax),%mm2 movd COS9+28,%mm3 punpckldq %mm3,%mm3 pfmul %mm3,%mm2 pfsub %mm0,%mm2 movq 40(%eax),%mm3 movd COS9+4,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfadd %mm3,%mm2 movq 56(%eax),%mm3 movd COS9+20,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfsub %mm3,%mm2 movq (%eax),%mm3 movq 16(%eax),%mm4 movd COS9+16,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfsub %mm4,%mm3 movq 32(%eax),%mm4 movd COS9+32,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfadd %mm4,%mm3 pfadd %mm1,%mm3 movq 64(%eax),%mm4 movd COS9+8,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfsub %mm4,%mm3 movq %mm2,%mm4 pfadd %mm3,%mm4 movq %mm7,%mm5 punpckldq tfcos36+12,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 120(%edx),%mm6 punpckldq 92(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,48(%ecx) psrlq $32,%mm5 movd %mm5,20(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 20(%edx),%mm6 punpckldq 48(%edx),%mm6 pfmul %mm6,%mm5 movd 20(%esi),%mm6 punpckldq 48(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,640(%ebx) psrlq $32,%mm5 movd %mm5,1536(%ebx) movq %mm3,%mm4 pfsub %mm2,%mm4 movq %mm7,%mm5 punpckldq tfcos36+20,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 128(%edx),%mm6 punpckldq 84(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,56(%ecx) psrlq $32,%mm5 movd %mm5,12(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 12(%edx),%mm6 punpckldq 56(%edx),%mm6 pfmul %mm6,%mm5 movd 12(%esi),%mm6 punpckldq 56(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,384(%ebx) psrlq $32,%mm5 movd %mm5,1792(%ebx) movq (%eax),%mm4 movq 16(%eax),%mm3 pfsub %mm3,%mm4 movq 32(%eax),%mm3 pfadd %mm3,%mm4 movq 48(%eax),%mm3 pfsub %mm3,%mm4 movq 64(%eax),%mm3 pfadd %mm3,%mm4 movq %mm7,%mm5 punpckldq tfcos36+16,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 124(%edx),%mm6 punpckldq 88(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,52(%ecx) psrlq $32,%mm5 movd %mm5,16(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 16(%edx),%mm6 punpckldq 52(%edx),%mm6 pfmul %mm6,%mm5 movd 16(%esi),%mm6 punpckldq 52(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,512(%ebx) psrlq $32,%mm5 movd %mm5,1664(%ebx) femms popl %ebx popl %esi movl %ebp,%esp popl %ebp ret