comparison mp3lib/decode_MMX.s @ 1259:9bf97b404134

Partial loops unrolling
author nick
date Tue, 03 Jul 2001 09:25:16 +0000
parents 03b7e2955a20
children 2864e32cd267
comparison
equal deleted inserted replaced
1258:50b8a3a5eeed 1259:9bf97b404134
1 # this code comes under GPL 1 # this code comes under GPL
2 # This code was taken from http://www.mpg123.org 2 # This code was taken from http://www.mpg123.org
3 # See ChangeLog of mpg123-0.59s-pre.1 for detail 3 # See ChangeLog of mpg123-0.59s-pre.1 for detail
4 # Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> 4 # Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
5 # 5 #
6 # TODO: Partial loops unrolling and removing MOVW insn. 6 # Local ChangeLog:
7 # - Partial loops unrolling and removing MOVW insn from loops
7 # 8 #
9
10 .data
11 .align 8
12 null_one: .long 0x0000ffff, 0x0000ffff
13 one_null: .long 0xffff0000, 0xffff0000
8 14
9 .text 15 .text
10 16
11 .globl synth_1to1_MMX_s 17 .globl synth_1to1_MMX_s
12 18
47 pushl %ecx 53 pushl %ecx
48 call *dct64_MMX_func 54 call *dct64_MMX_func
49 addl $12,%esp 55 addl $12,%esp
50 leal 1(%ebx), %ecx 56 leal 1(%ebx), %ecx
51 subl %ebp,%ebx 57 subl %ebp,%ebx
52 58 pushl %ecx
53 leal decwins(%ebx,%ebx,1), %edx 59 leal decwins(%ebx,%ebx,1), %edx
60 shrl $1, %ecx
61 .align 16
54 .L3: 62 .L3:
63 movq (%edx),%mm0
64 movq 64(%edx),%mm4
65 pmaddwd (%esi),%mm0
66 pmaddwd 32(%esi),%mm4
67 movq 8(%edx),%mm1
68 movq 72(%edx),%mm5
69 pmaddwd 8(%esi),%mm1
70 pmaddwd 40(%esi),%mm5
71 movq 16(%edx),%mm2
72 movq 80(%edx),%mm6
73 pmaddwd 16(%esi),%mm2
74 pmaddwd 48(%esi),%mm6
75 movq 24(%edx),%mm3
76 movq 88(%edx),%mm7
77 pmaddwd 24(%esi),%mm3
78 pmaddwd 56(%esi),%mm7
79 paddd %mm1,%mm0
80 paddd %mm5,%mm4
81 paddd %mm2,%mm0
82 paddd %mm6,%mm4
83 paddd %mm3,%mm0
84 paddd %mm7,%mm4
85 movq %mm0,%mm1
86 movq %mm4,%mm5
87 psrlq $32,%mm1
88 psrlq $32,%mm5
89 paddd %mm1,%mm0
90 paddd %mm5,%mm4
91 psrad $13,%mm0
92 psrad $13,%mm4
93 packssdw %mm0,%mm0
94 packssdw %mm4,%mm4
95
96 movq (%edi), %mm1
97 punpckldq %mm4, %mm0
98 pand one_null, %mm1
99 pand null_one, %mm0
100 por %mm0, %mm1
101 movq %mm1,(%edi)
102
103 leal 64(%esi),%esi
104 leal 128(%edx),%edx
105 leal 8(%edi),%edi
106
107 decl %ecx
108 jnz .L3
109
110 popl %ecx
111 andl $1, %ecx
112 jecxz .next_loop
113
55 movq (%edx),%mm0 114 movq (%edx),%mm0
56 pmaddwd (%esi),%mm0 115 pmaddwd (%esi),%mm0
57 movq 8(%edx),%mm1 116 movq 8(%edx),%mm1
58 pmaddwd 8(%esi),%mm1 117 pmaddwd 8(%esi),%mm1
59 movq 16(%edx),%mm2 118 movq 16(%edx),%mm2
68 paddd %mm1,%mm0 127 paddd %mm1,%mm0
69 psrad $13,%mm0 128 psrad $13,%mm0
70 packssdw %mm0,%mm0 129 packssdw %mm0,%mm0
71 movd %mm0,%eax 130 movd %mm0,%eax
72 movw %ax, (%edi) 131 movw %ax, (%edi)
73
74 leal 32(%esi),%esi 132 leal 32(%esi),%esi
75 leal 64(%edx),%edx 133 leal 64(%edx),%edx
76 leal 4(%edi),%edi 134 leal 4(%edi),%edi
77 decl %ecx 135
78 jnz .L3 136 .next_loop:
79
80
81 subl $64,%esi 137 subl $64,%esi
82 movl $15,%ecx 138 movl $7,%ecx
139 .align 16
83 .L4: 140 .L4:
141 movq (%edx),%mm0
142 movq 64(%edx),%mm4
143 pmaddwd (%esi),%mm0
144 pmaddwd -32(%esi),%mm4
145 movq 8(%edx),%mm1
146 movq 72(%edx),%mm5
147 pmaddwd 8(%esi),%mm1
148 pmaddwd -24(%esi),%mm5
149 movq 16(%edx),%mm2
150 movq 80(%edx),%mm6
151 pmaddwd 16(%esi),%mm2
152 pmaddwd -16(%esi),%mm6
153 movq 24(%edx),%mm3
154 movq 88(%edx),%mm7
155 pmaddwd 24(%esi),%mm3
156 pmaddwd -8(%esi),%mm7
157 paddd %mm1,%mm0
158 paddd %mm5,%mm4
159 paddd %mm2,%mm0
160 paddd %mm6,%mm4
161 paddd %mm3,%mm0
162 paddd %mm7,%mm4
163 movq %mm0,%mm1
164 movq %mm4,%mm5
165 psrlq $32,%mm1
166 psrlq $32,%mm5
167 paddd %mm0,%mm1
168 paddd %mm4,%mm5
169 psrad $13,%mm1
170 psrad $13,%mm5
171 packssdw %mm1,%mm1
172 packssdw %mm5,%mm5
173 psubd %mm0,%mm0
174 psubd %mm4,%mm4
175 psubsw %mm1,%mm0
176 psubsw %mm5,%mm4
177
178 movq (%edi), %mm1
179 punpckldq %mm4, %mm0
180 pand one_null, %mm1
181 pand null_one, %mm0
182 por %mm0, %mm1
183 movq %mm1,(%edi)
184
185 subl $64,%esi
186 addl $128,%edx
187 leal 8(%edi),%edi
188 decl %ecx
189 jnz .L4
190
84 movq (%edx),%mm0 191 movq (%edx),%mm0
85 pmaddwd (%esi),%mm0 192 pmaddwd (%esi),%mm0
86 movq 8(%edx),%mm1 193 movq 8(%edx),%mm1
87 pmaddwd 8(%esi),%mm1 194 pmaddwd 8(%esi),%mm1
88 movq 16(%edx),%mm2 195 movq 16(%edx),%mm2
100 psubd %mm0,%mm0 207 psubd %mm0,%mm0
101 psubsw %mm1,%mm0 208 psubsw %mm1,%mm0
102 movd %mm0,%eax 209 movd %mm0,%eax
103 movw %ax,(%edi) 210 movw %ax,(%edi)
104 211
105 subl $32,%esi
106 addl $64,%edx
107 leal 4(%edi),%edi
108 decl %ecx
109 jnz .L4
110 emms 212 emms
111 popl %ebx 213 popl %ebx
112 popl %esi 214 popl %esi
113 popl %edi 215 popl %edi
114 popl %ebp 216 popl %ebp