Mercurial > mplayer.hg
comparison mp3lib/decode_k7.s @ 1135:152c7c71a29c
loops alignment
author | nickols_k |
---|---|
date | Sat, 16 Jun 2001 15:24:02 +0000 |
parents | 2f0265763322 |
children |
comparison
equal
deleted
inserted
replaced
1134:3d8e39fd7d2d | 1135:152c7c71a29c |
---|---|
10 /// - added PREFETCHW opcode. It has different semantic on k7 than on k6-2 | 10 /// - added PREFETCHW opcode. It has different semantic on k7 than on k6-2 |
11 /// and saves 15-25 cpu clocks for athlon. | 11 /// and saves 15-25 cpu clocks for athlon. |
12 /// - partial unrolling loops for removing slower MOVW insns. | 12 /// - partial unrolling loops for removing slower MOVW insns. |
13 /// (Note: probably same operation should be done for decode_3dnow.s) | 13 /// (Note: probably same operation should be done for decode_3dnow.s) |
14 /// - change function name for support 3DNowEx! automatic detect | 14 /// - change function name for support 3DNowEx! automatic detect |
15 /// - added loops alignment | |
15 /// | 16 /// |
16 /// note: because K7 processors are an aggresive out-of-order three-way | 17 /// note: because K7 processors are an aggresive out-of-order three-way |
17 /// superscalar ones instruction order is not significand for them. | 18 /// superscalar ones instruction order is not significand for them. |
18 /// | 19 /// |
19 /// Benchmark: measured by mplayer on Duron-700: | 20 /// Benchmark: measured by mplayer on Duron-700: |
111 movl $decwin+64,%eax | 112 movl $decwin+64,%eax |
112 movl %eax,%ecx | 113 movl %eax,%ecx |
113 subl %edx,%ecx | 114 subl %edx,%ecx |
114 movl $8,%ebp | 115 movl $8,%ebp |
115 prefetchw (%esi) | 116 prefetchw (%esi) |
116 | 117 .align 16 |
117 .L55: | 118 .L55: |
118 | 119 |
119 movq (%ecx),%mm0 | 120 movq (%ecx),%mm0 |
120 pfmul (%ebx),%mm0 | 121 pfmul (%ebx),%mm0 |
121 movq 128(%ecx),%mm4 | 122 movq 128(%ecx),%mm4 |
229 | 230 |
230 subl $64,%ebx | 231 subl $64,%ebx |
231 addl $4,%esi | 232 addl $4,%esi |
232 addl $256,%ecx | 233 addl $256,%ecx |
233 movl $7,%ebp | 234 movl $7,%ebp |
234 | 235 .align 16 |
235 .L68: | 236 .L68: |
236 pxor %mm0, %mm0 | 237 pxor %mm0, %mm0 |
237 pxor %mm4, %mm4 | 238 pxor %mm4, %mm4 |
238 | 239 |
239 movq (%ecx),%mm1 | 240 movq (%ecx),%mm1 |