Mercurial > mplayer.hg
annotate mp3lib/decode_MMX.s @ 1245:03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
author | nick |
---|---|
date | Fri, 29 Jun 2001 17:55:35 +0000 |
parents | |
children | 9bf97b404134 |
rev | line source |
---|---|
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
1 # this code comes under GPL |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
2 # This code was taken from http://www.mpg123.org |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
3 # See ChangeLog of mpg123-0.59s-pre.1 for detail |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
4 # Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
5 # |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
6 # TODO: Partial loops unrolling and removing MOVW insn. |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
7 # |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
8 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
9 .text |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
10 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
11 .globl synth_1to1_MMX_s |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
12 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
13 synth_1to1_MMX_s: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
14 pushl %ebp |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
15 pushl %edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
16 pushl %esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
17 pushl %ebx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
18 movl 24(%esp),%ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
19 movl 28(%esp),%edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
20 movl $15,%ebx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
21 movl 36(%esp),%edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
22 leal (%edi,%ecx,2),%edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
23 decl %ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
24 movl 32(%esp),%esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
25 movl (%edx),%eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
26 jecxz .L1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
27 decl %eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
28 andl %ebx,%eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
29 leal 1088(%esi),%esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
30 movl %eax,(%edx) |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
31 .L1: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
32 leal (%esi,%eax,2),%edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
33 movl %eax,%ebp |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
34 incl %eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
35 pushl 20(%esp) |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
36 andl %ebx,%eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
37 leal 544(%esi,%eax,2),%ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
38 incl %ebx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
39 testl $1, %eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
40 jnz .L2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
41 xchgl %edx,%ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
42 incl %ebp |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
43 leal 544(%esi),%esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
44 .L2: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
45 emms |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
46 pushl %edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
47 pushl %ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
48 call *dct64_MMX_func |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
49 addl $12,%esp |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
50 leal 1(%ebx), %ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
51 subl %ebp,%ebx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
52 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
53 leal decwins(%ebx,%ebx,1), %edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
54 .L3: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
55 movq (%edx),%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
56 pmaddwd (%esi),%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
57 movq 8(%edx),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
58 pmaddwd 8(%esi),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
59 movq 16(%edx),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
60 pmaddwd 16(%esi),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
61 movq 24(%edx),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
62 pmaddwd 24(%esi),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
63 paddd %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
64 paddd %mm2,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
65 paddd %mm3,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
66 movq %mm0,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
67 psrlq $32,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
68 paddd %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
69 psrad $13,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
70 packssdw %mm0,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
71 movd %mm0,%eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
72 movw %ax, (%edi) |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
73 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
74 leal 32(%esi),%esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
75 leal 64(%edx),%edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
76 leal 4(%edi),%edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
77 decl %ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
78 jnz .L3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
79 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
80 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
81 subl $64,%esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
82 movl $15,%ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
83 .L4: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
84 movq (%edx),%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
85 pmaddwd (%esi),%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
86 movq 8(%edx),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
87 pmaddwd 8(%esi),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
88 movq 16(%edx),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
89 pmaddwd 16(%esi),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
90 movq 24(%edx),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
91 pmaddwd 24(%esi),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
92 paddd %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
93 paddd %mm2,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
94 paddd %mm3,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
95 movq %mm0,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
96 psrlq $32,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
97 paddd %mm0,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
98 psrad $13,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
99 packssdw %mm1,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
100 psubd %mm0,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
101 psubsw %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
102 movd %mm0,%eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
103 movw %ax,(%edi) |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
104 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
105 subl $32,%esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
106 addl $64,%edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
107 leal 4(%edi),%edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
108 decl %ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
109 jnz .L4 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
110 emms |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
111 popl %ebx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
112 popl %esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
113 popl %edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
114 popl %ebp |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
115 ret |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
116 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
117 |