Mercurial > mplayer.hg
view mp3lib/dct36_k7.s @ 5699:1dde9686d33b
Good evening ladies and gentleman and welcome to the latest
installment of the ongoing show "Reworking the docs for fun and
profit". Your host Diego will be assisted by Nilmoni in presenting
you:
- spellchecking in all its glory
- a grammar to the envy of all native speakers
- answers now hopefully so clear that their respective questions shall
never be asked again
Somebody from the public raises his voice: "What about HTML errors?"
The host is quick to answer: "Yes, there have been corrections." From
the back of the auditory comes a subdued question: "And the FONT
tags..?" The room falls silent. There is no answer and the host
twitches. Finally the words "They have not been touched." escape from
his mouth, barely audible. A murmur erupts but the jury nods and
calms the crowd "Time to get back to serious hacking.". The host
leaves the stage under polite applause and everybody scuttles off for
their notebooks...
author | arpi |
---|---|
date | Fri, 19 Apr 2002 07:30:49 +0000 |
parents | 59b0a9ec8604 |
children |
line wrap: on
line source
/// /// Replacement of dct36() with AMD's 3DNowEx(DSP)! SIMD operations support /// /// This code based 'dct36_3dnow.s' by Syuuhei Kashiyama /// <squash@mb.kcom.ne.jp>,only some types of changes have been made: /// /// - added new opcode PSWAPD /// - change function name for support 3DNowEx! automatic detect /// /// note: because K7 processors are an aggresive out-of-order three-way /// superscalar ones instruction order is not significand for them. /// /// Modified by Nick Kurshev <nickols_k@mail.ru> /// / / dct36_3dnow.s - 3DNow! optimized dct36() / / This code based 'dct36_3dnow.s' by Syuuhei Kashiyama / <squash@mb.kcom.ne.jp>,only two types of changes have been made: / / - remove PREFETCH instruction for speedup / - change function name for support 3DNow! automatic detect / / You can find Kashiyama's original 3dnow! support patch / (for mpg123-0.59o) at / http://user.ecc.u-tokyo.ac.jp/~g810370/linux-simd/ (Japanese). / / by KIMURA Takuhiro <kim@hannah.ipc.miyakyo-u.ac.jp> - until 31.Mar.1999 / <kim@comtec.co.jp> - after 1.Apr.1999 / /// /// Replacement of dct36() with AMD's 3DNow! SIMD operations support /// /// Syuuhei Kashiyama <squash@mb.kcom.ne.jp> /// /// The author of this program disclaim whole expressed or implied /// warranties with regard to this program, and in no event shall the /// author of this program liable to whatever resulted from the use of /// this program. Use it at your own risk. /// .globl dct36_3dnowex .type dct36_3dnowex,@function dct36_3dnowex: pushl %ebp movl %esp,%ebp subl $120,%esp pushl %esi pushl %ebx movl 8(%ebp),%eax movl 12(%ebp),%esi movl 16(%ebp),%ecx movl 20(%ebp),%edx movl 24(%ebp),%ebx leal -128(%ebp),%esp femms movq (%eax),%mm0 movq 4(%eax),%mm1 pfadd %mm1,%mm0 movq %mm0,4(%eax) psrlq $32,%mm1 movq 12(%eax),%mm2 punpckldq %mm2,%mm1 pfadd %mm2,%mm1 movq %mm1,12(%eax) psrlq $32,%mm2 movq 20(%eax),%mm3 punpckldq %mm3,%mm2 pfadd %mm3,%mm2 movq %mm2,20(%eax) psrlq $32,%mm3 movq 28(%eax),%mm4 punpckldq %mm4,%mm3 pfadd %mm4,%mm3 movq %mm3,28(%eax) psrlq $32,%mm4 movq 36(%eax),%mm5 punpckldq %mm5,%mm4 pfadd %mm5,%mm4 movq %mm4,36(%eax) psrlq $32,%mm5 movq 44(%eax),%mm6 punpckldq %mm6,%mm5 pfadd %mm6,%mm5 movq %mm5,44(%eax) psrlq $32,%mm6 movq 52(%eax),%mm7 punpckldq %mm7,%mm6 pfadd %mm7,%mm6 movq %mm6,52(%eax) psrlq $32,%mm7 movq 60(%eax),%mm0 punpckldq %mm0,%mm7 pfadd %mm0,%mm7 movq %mm7,60(%eax) psrlq $32,%mm0 movd 68(%eax),%mm1 pfadd %mm1,%mm0 movd %mm0,68(%eax) movd 4(%eax),%mm0 movd 12(%eax),%mm1 punpckldq %mm1,%mm0 punpckldq 20(%eax),%mm1 pfadd %mm1,%mm0 movd %mm0,12(%eax) psrlq $32,%mm0 movd %mm0,20(%eax) psrlq $32,%mm1 movd 28(%eax),%mm2 punpckldq %mm2,%mm1 punpckldq 36(%eax),%mm2 pfadd %mm2,%mm1 movd %mm1,28(%eax) psrlq $32,%mm1 movd %mm1,36(%eax) psrlq $32,%mm2 movd 44(%eax),%mm3 punpckldq %mm3,%mm2 punpckldq 52(%eax),%mm3 pfadd %mm3,%mm2 movd %mm2,44(%eax) psrlq $32,%mm2 movd %mm2,52(%eax) psrlq $32,%mm3 movd 60(%eax),%mm4 punpckldq %mm4,%mm3 punpckldq 68(%eax),%mm4 pfadd %mm4,%mm3 movd %mm3,60(%eax) psrlq $32,%mm3 movd %mm3,68(%eax) movq 24(%eax),%mm0 movq 48(%eax),%mm1 movd COS9+12,%mm2 punpckldq %mm2,%mm2 movd COS9+24,%mm3 punpckldq %mm3,%mm3 pfmul %mm2,%mm0 pfmul %mm3,%mm1 pushl %eax movl $1,%eax movd %eax,%mm7 pi2fd %mm7,%mm7 popl %eax movq 8(%eax),%mm2 movd COS9+4,%mm3 punpckldq %mm3,%mm3 pfmul %mm3,%mm2 pfadd %mm0,%mm2 movq 40(%eax),%mm3 movd COS9+20,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfadd %mm3,%mm2 movq 56(%eax),%mm3 movd COS9+28,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfadd %mm3,%mm2 movq (%eax),%mm3 movq 16(%eax),%mm4 movd COS9+8,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfadd %mm4,%mm3 movq 32(%eax),%mm4 movd COS9+16,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfadd %mm4,%mm3 pfadd %mm1,%mm3 movq 64(%eax),%mm4 movd COS9+32,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfadd %mm4,%mm3 movq %mm2,%mm4 pfadd %mm3,%mm4 movq %mm7,%mm5 punpckldq tfcos36+0,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 108(%edx),%mm6 punpckldq 104(%edx),%mm6 pfmul %mm6,%mm5 pswapd %mm5, %mm5 movq %mm5, 32(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 32(%edx),%mm6 punpckldq 36(%edx),%mm6 pfmul %mm6,%mm5 movd 32(%esi),%mm6 punpckldq 36(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,1024(%ebx) psrlq $32,%mm5 movd %mm5,1152(%ebx) movq %mm3,%mm4 pfsub %mm2,%mm4 movq %mm7,%mm5 punpckldq tfcos36+32,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 140(%edx),%mm6 punpckldq 72(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,68(%ecx) psrlq $32,%mm5 movd %mm5,0(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 0(%edx),%mm6 punpckldq 68(%edx),%mm6 pfmul %mm6,%mm5 movd 0(%esi),%mm6 punpckldq 68(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,0(%ebx) psrlq $32,%mm5 movd %mm5,2176(%ebx) movq 8(%eax),%mm2 movq 40(%eax),%mm3 pfsub %mm3,%mm2 movq 56(%eax),%mm3 pfsub %mm3,%mm2 movd COS9+12,%mm3 punpckldq %mm3,%mm3 pfmul %mm3,%mm2 movq 16(%eax),%mm3 movq 32(%eax),%mm4 pfsub %mm4,%mm3 movq 64(%eax),%mm4 pfsub %mm4,%mm3 movd COS9+24,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 movq 48(%eax),%mm4 pfsub %mm4,%mm3 movq (%eax),%mm4 pfadd %mm4,%mm3 movq %mm2,%mm4 pfadd %mm3,%mm4 movq %mm7,%mm5 punpckldq tfcos36+4,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 112(%edx),%mm6 punpckldq 100(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,40(%ecx) psrlq $32,%mm5 movd %mm5,28(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 28(%edx),%mm6 punpckldq 40(%edx),%mm6 pfmul %mm6,%mm5 movd 28(%esi),%mm6 punpckldq 40(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,896(%ebx) psrlq $32,%mm5 movd %mm5,1280(%ebx) movq %mm3,%mm4 pfsub %mm2,%mm4 movq %mm7,%mm5 punpckldq tfcos36+28,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 136(%edx),%mm6 punpckldq 76(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,64(%ecx) psrlq $32,%mm5 movd %mm5,4(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 4(%edx),%mm6 punpckldq 64(%edx),%mm6 pfmul %mm6,%mm5 movd 4(%esi),%mm6 punpckldq 64(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,128(%ebx) psrlq $32,%mm5 movd %mm5,2048(%ebx) movq 8(%eax),%mm2 movd COS9+20,%mm3 punpckldq %mm3,%mm3 pfmul %mm3,%mm2 pfsub %mm0,%mm2 movq 40(%eax),%mm3 movd COS9+28,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfsub %mm3,%mm2 movq 56(%eax),%mm3 movd COS9+4,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfadd %mm3,%mm2 movq (%eax),%mm3 movq 16(%eax),%mm4 movd COS9+32,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfsub %mm4,%mm3 movq 32(%eax),%mm4 movd COS9+8,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfsub %mm4,%mm3 pfadd %mm1,%mm3 movq 64(%eax),%mm4 movd COS9+16,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfadd %mm4,%mm3 movq %mm2,%mm4 pfadd %mm3,%mm4 movq %mm7,%mm5 punpckldq tfcos36+8,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 116(%edx),%mm6 punpckldq 96(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,44(%ecx) psrlq $32,%mm5 movd %mm5,24(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 24(%edx),%mm6 punpckldq 44(%edx),%mm6 pfmul %mm6,%mm5 movd 24(%esi),%mm6 punpckldq 44(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,768(%ebx) psrlq $32,%mm5 movd %mm5,1408(%ebx) movq %mm3,%mm4 pfsub %mm2,%mm4 movq %mm7,%mm5 punpckldq tfcos36+24,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 132(%edx),%mm6 punpckldq 80(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,60(%ecx) psrlq $32,%mm5 movd %mm5,8(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 8(%edx),%mm6 punpckldq 60(%edx),%mm6 pfmul %mm6,%mm5 movd 8(%esi),%mm6 punpckldq 60(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,256(%ebx) psrlq $32,%mm5 movd %mm5,1920(%ebx) movq 8(%eax),%mm2 movd COS9+28,%mm3 punpckldq %mm3,%mm3 pfmul %mm3,%mm2 pfsub %mm0,%mm2 movq 40(%eax),%mm3 movd COS9+4,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfadd %mm3,%mm2 movq 56(%eax),%mm3 movd COS9+20,%mm4 punpckldq %mm4,%mm4 pfmul %mm4,%mm3 pfsub %mm3,%mm2 movq (%eax),%mm3 movq 16(%eax),%mm4 movd COS9+16,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfsub %mm4,%mm3 movq 32(%eax),%mm4 movd COS9+32,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfadd %mm4,%mm3 pfadd %mm1,%mm3 movq 64(%eax),%mm4 movd COS9+8,%mm5 punpckldq %mm5,%mm5 pfmul %mm5,%mm4 pfsub %mm4,%mm3 movq %mm2,%mm4 pfadd %mm3,%mm4 movq %mm7,%mm5 punpckldq tfcos36+12,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 120(%edx),%mm6 punpckldq 92(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,48(%ecx) psrlq $32,%mm5 movd %mm5,20(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 20(%edx),%mm6 punpckldq 48(%edx),%mm6 pfmul %mm6,%mm5 movd 20(%esi),%mm6 punpckldq 48(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,640(%ebx) psrlq $32,%mm5 movd %mm5,1536(%ebx) movq %mm3,%mm4 pfsub %mm2,%mm4 movq %mm7,%mm5 punpckldq tfcos36+20,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 128(%edx),%mm6 punpckldq 84(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,56(%ecx) psrlq $32,%mm5 movd %mm5,12(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 12(%edx),%mm6 punpckldq 56(%edx),%mm6 pfmul %mm6,%mm5 movd 12(%esi),%mm6 punpckldq 56(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,384(%ebx) psrlq $32,%mm5 movd %mm5,1792(%ebx) movq (%eax),%mm4 movq 16(%eax),%mm3 pfsub %mm3,%mm4 movq 32(%eax),%mm3 pfadd %mm3,%mm4 movq 48(%eax),%mm3 pfsub %mm3,%mm4 movq 64(%eax),%mm3 pfadd %mm3,%mm4 movq %mm7,%mm5 punpckldq tfcos36+16,%mm5 pfmul %mm5,%mm4 movq %mm4,%mm5 pfacc %mm5,%mm5 movd 124(%edx),%mm6 punpckldq 88(%edx),%mm6 pfmul %mm6,%mm5 movd %mm5,52(%ecx) psrlq $32,%mm5 movd %mm5,16(%ecx) movq %mm4,%mm6 punpckldq %mm6,%mm5 pfsub %mm6,%mm5 punpckhdq %mm5,%mm5 movd 16(%edx),%mm6 punpckldq 52(%edx),%mm6 pfmul %mm6,%mm5 movd 16(%esi),%mm6 punpckldq 52(%esi),%mm6 pfadd %mm6,%mm5 movd %mm5,512(%ebx) psrlq $32,%mm5 movd %mm5,1664(%ebx) femms popl %ebx popl %esi movl %ebp,%esp popl %ebp ret