annotate mp3lib/decode_MMX.s @ 1245:03b7e2955a20

Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
author nick
date Fri, 29 Jun 2001 17:55:35 +0000
parents
children 9bf97b404134
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
1 # this code comes under GPL
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
2 # This code was taken from http://www.mpg123.org
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
3 # See ChangeLog of mpg123-0.59s-pre.1 for detail
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
4 # Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
5 #
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
6 # TODO: Partial loops unrolling and removing MOVW insn.
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
7 #
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
8
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
9 .text
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
10
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
11 .globl synth_1to1_MMX_s
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
12
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
13 synth_1to1_MMX_s:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
14 pushl %ebp
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
15 pushl %edi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
16 pushl %esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
17 pushl %ebx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
18 movl 24(%esp),%ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
19 movl 28(%esp),%edi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
20 movl $15,%ebx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
21 movl 36(%esp),%edx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
22 leal (%edi,%ecx,2),%edi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
23 decl %ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
24 movl 32(%esp),%esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
25 movl (%edx),%eax
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
26 jecxz .L1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
27 decl %eax
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
28 andl %ebx,%eax
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
29 leal 1088(%esi),%esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
30 movl %eax,(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
31 .L1:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
32 leal (%esi,%eax,2),%edx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
33 movl %eax,%ebp
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
34 incl %eax
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
35 pushl 20(%esp)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
36 andl %ebx,%eax
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
37 leal 544(%esi,%eax,2),%ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
38 incl %ebx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
39 testl $1, %eax
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
40 jnz .L2
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
41 xchgl %edx,%ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
42 incl %ebp
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
43 leal 544(%esi),%esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
44 .L2:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
45 emms
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
46 pushl %edx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
47 pushl %ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
48 call *dct64_MMX_func
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
49 addl $12,%esp
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
50 leal 1(%ebx), %ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
51 subl %ebp,%ebx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
52
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
53 leal decwins(%ebx,%ebx,1), %edx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
54 .L3:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
55 movq (%edx),%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
56 pmaddwd (%esi),%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
57 movq 8(%edx),%mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
58 pmaddwd 8(%esi),%mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
59 movq 16(%edx),%mm2
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
60 pmaddwd 16(%esi),%mm2
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
61 movq 24(%edx),%mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
62 pmaddwd 24(%esi),%mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
63 paddd %mm1,%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
64 paddd %mm2,%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
65 paddd %mm3,%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
66 movq %mm0,%mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
67 psrlq $32,%mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
68 paddd %mm1,%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
69 psrad $13,%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
70 packssdw %mm0,%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
71 movd %mm0,%eax
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
72 movw %ax, (%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
73
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
74 leal 32(%esi),%esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
75 leal 64(%edx),%edx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
76 leal 4(%edi),%edi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
77 decl %ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
78 jnz .L3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
79
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
80
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
81 subl $64,%esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
82 movl $15,%ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
83 .L4:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
84 movq (%edx),%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
85 pmaddwd (%esi),%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
86 movq 8(%edx),%mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
87 pmaddwd 8(%esi),%mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
88 movq 16(%edx),%mm2
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
89 pmaddwd 16(%esi),%mm2
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
90 movq 24(%edx),%mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
91 pmaddwd 24(%esi),%mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
92 paddd %mm1,%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
93 paddd %mm2,%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
94 paddd %mm3,%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
95 movq %mm0,%mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
96 psrlq $32,%mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
97 paddd %mm0,%mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
98 psrad $13,%mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
99 packssdw %mm1,%mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
100 psubd %mm0,%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
101 psubsw %mm1,%mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
102 movd %mm0,%eax
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
103 movw %ax,(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
104
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
105 subl $32,%esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
106 addl $64,%edx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
107 leal 4(%edi),%edi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
108 decl %ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
109 jnz .L4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
110 emms
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
111 popl %ebx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
112 popl %esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
113 popl %edi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
114 popl %ebp
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
115 ret
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
116
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff changeset
117