comparison mp3lib/decode_k7.s @ 1135:152c7c71a29c

loops alignment
author nickols_k
date Sat, 16 Jun 2001 15:24:02 +0000
parents 2f0265763322
children
comparison
equal deleted inserted replaced
1134:3d8e39fd7d2d 1135:152c7c71a29c
10 /// - added PREFETCHW opcode. It has different semantic on k7 than on k6-2 10 /// - added PREFETCHW opcode. It has different semantic on k7 than on k6-2
11 /// and saves 15-25 cpu clocks for athlon. 11 /// and saves 15-25 cpu clocks for athlon.
12 /// - partial unrolling loops for removing slower MOVW insns. 12 /// - partial unrolling loops for removing slower MOVW insns.
13 /// (Note: probably same operation should be done for decode_3dnow.s) 13 /// (Note: probably same operation should be done for decode_3dnow.s)
14 /// - change function name for support 3DNowEx! automatic detect 14 /// - change function name for support 3DNowEx! automatic detect
15 /// - added loops alignment
15 /// 16 ///
16 /// note: because K7 processors are an aggresive out-of-order three-way 17 /// note: because K7 processors are an aggresive out-of-order three-way
17 /// superscalar ones instruction order is not significand for them. 18 /// superscalar ones instruction order is not significand for them.
18 /// 19 ///
19 /// Benchmark: measured by mplayer on Duron-700: 20 /// Benchmark: measured by mplayer on Duron-700:
111 movl $decwin+64,%eax 112 movl $decwin+64,%eax
112 movl %eax,%ecx 113 movl %eax,%ecx
113 subl %edx,%ecx 114 subl %edx,%ecx
114 movl $8,%ebp 115 movl $8,%ebp
115 prefetchw (%esi) 116 prefetchw (%esi)
116 117 .align 16
117 .L55: 118 .L55:
118 119
119 movq (%ecx),%mm0 120 movq (%ecx),%mm0
120 pfmul (%ebx),%mm0 121 pfmul (%ebx),%mm0
121 movq 128(%ecx),%mm4 122 movq 128(%ecx),%mm4
229 230
230 subl $64,%ebx 231 subl $64,%ebx
231 addl $4,%esi 232 addl $4,%esi
232 addl $256,%ecx 233 addl $256,%ecx
233 movl $7,%ebp 234 movl $7,%ebp
234 235 .align 16
235 .L68: 236 .L68:
236 pxor %mm0, %mm0 237 pxor %mm0, %mm0
237 pxor %mm4, %mm4 238 pxor %mm4, %mm4
238 239
239 movq (%ecx),%mm1 240 movq (%ecx),%mm1