Mercurial > mplayer.hg
comparison mp3lib/decode_MMX.s @ 1259:9bf97b404134
Partial loops unrolling
author | nick |
---|---|
date | Tue, 03 Jul 2001 09:25:16 +0000 |
parents | 03b7e2955a20 |
children | 2864e32cd267 |
comparison
equal
deleted
inserted
replaced
1258:50b8a3a5eeed | 1259:9bf97b404134 |
---|---|
1 # this code comes under GPL | 1 # this code comes under GPL |
2 # This code was taken from http://www.mpg123.org | 2 # This code was taken from http://www.mpg123.org |
3 # See ChangeLog of mpg123-0.59s-pre.1 for detail | 3 # See ChangeLog of mpg123-0.59s-pre.1 for detail |
4 # Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> | 4 # Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> |
5 # | 5 # |
6 # TODO: Partial loops unrolling and removing MOVW insn. | 6 # Local ChangeLog: |
7 # - Partial loops unrolling and removing MOVW insn from loops | |
7 # | 8 # |
9 | |
10 .data | |
11 .align 8 | |
12 null_one: .long 0x0000ffff, 0x0000ffff | |
13 one_null: .long 0xffff0000, 0xffff0000 | |
8 | 14 |
9 .text | 15 .text |
10 | 16 |
11 .globl synth_1to1_MMX_s | 17 .globl synth_1to1_MMX_s |
12 | 18 |
47 pushl %ecx | 53 pushl %ecx |
48 call *dct64_MMX_func | 54 call *dct64_MMX_func |
49 addl $12,%esp | 55 addl $12,%esp |
50 leal 1(%ebx), %ecx | 56 leal 1(%ebx), %ecx |
51 subl %ebp,%ebx | 57 subl %ebp,%ebx |
52 | 58 pushl %ecx |
53 leal decwins(%ebx,%ebx,1), %edx | 59 leal decwins(%ebx,%ebx,1), %edx |
60 shrl $1, %ecx | |
61 .align 16 | |
54 .L3: | 62 .L3: |
63 movq (%edx),%mm0 | |
64 movq 64(%edx),%mm4 | |
65 pmaddwd (%esi),%mm0 | |
66 pmaddwd 32(%esi),%mm4 | |
67 movq 8(%edx),%mm1 | |
68 movq 72(%edx),%mm5 | |
69 pmaddwd 8(%esi),%mm1 | |
70 pmaddwd 40(%esi),%mm5 | |
71 movq 16(%edx),%mm2 | |
72 movq 80(%edx),%mm6 | |
73 pmaddwd 16(%esi),%mm2 | |
74 pmaddwd 48(%esi),%mm6 | |
75 movq 24(%edx),%mm3 | |
76 movq 88(%edx),%mm7 | |
77 pmaddwd 24(%esi),%mm3 | |
78 pmaddwd 56(%esi),%mm7 | |
79 paddd %mm1,%mm0 | |
80 paddd %mm5,%mm4 | |
81 paddd %mm2,%mm0 | |
82 paddd %mm6,%mm4 | |
83 paddd %mm3,%mm0 | |
84 paddd %mm7,%mm4 | |
85 movq %mm0,%mm1 | |
86 movq %mm4,%mm5 | |
87 psrlq $32,%mm1 | |
88 psrlq $32,%mm5 | |
89 paddd %mm1,%mm0 | |
90 paddd %mm5,%mm4 | |
91 psrad $13,%mm0 | |
92 psrad $13,%mm4 | |
93 packssdw %mm0,%mm0 | |
94 packssdw %mm4,%mm4 | |
95 | |
96 movq (%edi), %mm1 | |
97 punpckldq %mm4, %mm0 | |
98 pand one_null, %mm1 | |
99 pand null_one, %mm0 | |
100 por %mm0, %mm1 | |
101 movq %mm1,(%edi) | |
102 | |
103 leal 64(%esi),%esi | |
104 leal 128(%edx),%edx | |
105 leal 8(%edi),%edi | |
106 | |
107 decl %ecx | |
108 jnz .L3 | |
109 | |
110 popl %ecx | |
111 andl $1, %ecx | |
112 jecxz .next_loop | |
113 | |
55 movq (%edx),%mm0 | 114 movq (%edx),%mm0 |
56 pmaddwd (%esi),%mm0 | 115 pmaddwd (%esi),%mm0 |
57 movq 8(%edx),%mm1 | 116 movq 8(%edx),%mm1 |
58 pmaddwd 8(%esi),%mm1 | 117 pmaddwd 8(%esi),%mm1 |
59 movq 16(%edx),%mm2 | 118 movq 16(%edx),%mm2 |
68 paddd %mm1,%mm0 | 127 paddd %mm1,%mm0 |
69 psrad $13,%mm0 | 128 psrad $13,%mm0 |
70 packssdw %mm0,%mm0 | 129 packssdw %mm0,%mm0 |
71 movd %mm0,%eax | 130 movd %mm0,%eax |
72 movw %ax, (%edi) | 131 movw %ax, (%edi) |
73 | |
74 leal 32(%esi),%esi | 132 leal 32(%esi),%esi |
75 leal 64(%edx),%edx | 133 leal 64(%edx),%edx |
76 leal 4(%edi),%edi | 134 leal 4(%edi),%edi |
77 decl %ecx | 135 |
78 jnz .L3 | 136 .next_loop: |
79 | |
80 | |
81 subl $64,%esi | 137 subl $64,%esi |
82 movl $15,%ecx | 138 movl $7,%ecx |
139 .align 16 | |
83 .L4: | 140 .L4: |
141 movq (%edx),%mm0 | |
142 movq 64(%edx),%mm4 | |
143 pmaddwd (%esi),%mm0 | |
144 pmaddwd -32(%esi),%mm4 | |
145 movq 8(%edx),%mm1 | |
146 movq 72(%edx),%mm5 | |
147 pmaddwd 8(%esi),%mm1 | |
148 pmaddwd -24(%esi),%mm5 | |
149 movq 16(%edx),%mm2 | |
150 movq 80(%edx),%mm6 | |
151 pmaddwd 16(%esi),%mm2 | |
152 pmaddwd -16(%esi),%mm6 | |
153 movq 24(%edx),%mm3 | |
154 movq 88(%edx),%mm7 | |
155 pmaddwd 24(%esi),%mm3 | |
156 pmaddwd -8(%esi),%mm7 | |
157 paddd %mm1,%mm0 | |
158 paddd %mm5,%mm4 | |
159 paddd %mm2,%mm0 | |
160 paddd %mm6,%mm4 | |
161 paddd %mm3,%mm0 | |
162 paddd %mm7,%mm4 | |
163 movq %mm0,%mm1 | |
164 movq %mm4,%mm5 | |
165 psrlq $32,%mm1 | |
166 psrlq $32,%mm5 | |
167 paddd %mm0,%mm1 | |
168 paddd %mm4,%mm5 | |
169 psrad $13,%mm1 | |
170 psrad $13,%mm5 | |
171 packssdw %mm1,%mm1 | |
172 packssdw %mm5,%mm5 | |
173 psubd %mm0,%mm0 | |
174 psubd %mm4,%mm4 | |
175 psubsw %mm1,%mm0 | |
176 psubsw %mm5,%mm4 | |
177 | |
178 movq (%edi), %mm1 | |
179 punpckldq %mm4, %mm0 | |
180 pand one_null, %mm1 | |
181 pand null_one, %mm0 | |
182 por %mm0, %mm1 | |
183 movq %mm1,(%edi) | |
184 | |
185 subl $64,%esi | |
186 addl $128,%edx | |
187 leal 8(%edi),%edi | |
188 decl %ecx | |
189 jnz .L4 | |
190 | |
84 movq (%edx),%mm0 | 191 movq (%edx),%mm0 |
85 pmaddwd (%esi),%mm0 | 192 pmaddwd (%esi),%mm0 |
86 movq 8(%edx),%mm1 | 193 movq 8(%edx),%mm1 |
87 pmaddwd 8(%esi),%mm1 | 194 pmaddwd 8(%esi),%mm1 |
88 movq 16(%edx),%mm2 | 195 movq 16(%edx),%mm2 |
100 psubd %mm0,%mm0 | 207 psubd %mm0,%mm0 |
101 psubsw %mm1,%mm0 | 208 psubsw %mm1,%mm0 |
102 movd %mm0,%eax | 209 movd %mm0,%eax |
103 movw %ax,(%edi) | 210 movw %ax,(%edi) |
104 | 211 |
105 subl $32,%esi | |
106 addl $64,%edx | |
107 leal 4(%edi),%edi | |
108 decl %ecx | |
109 jnz .L4 | |
110 emms | 212 emms |
111 popl %ebx | 213 popl %ebx |
112 popl %esi | 214 popl %esi |
113 popl %edi | 215 popl %edi |
114 popl %ebp | 216 popl %ebp |