annotate mp3lib/dct64_k7.s @ 1270:8a9fa696b77d

Minor cleanups
author nick
date Wed, 04 Jul 2001 07:00:15 +0000
parents 03b7e2955a20
children 2864e32cd267
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
1 # This code was taken from http://www.mpg123.org
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
2 # See ChangeLog of mpg123-0.59s-pre.1 for detail
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
3 # Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
4 # Partial 3dnowex-DSP! optimization by Nick Kurshev
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
5 #
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
6 # TODO: finish 3dnow! optimization at least in scalar mode
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
7 #
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
8
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 781
diff changeset
9 .data
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
10 .align 8
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 781
diff changeset
11 plus_minus_3dnow: .long 0x00000000, 0x80000000
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
12 costab:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
13 .long 1056974725
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
14 .long 1057056395
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
15 .long 1057223771
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
16 .long 1057485416
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
17 .long 1057855544
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
18 .long 1058356026
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
19 .long 1059019886
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
20 .long 1059897405
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
21 .long 1061067246
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
22 .long 1062657950
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
23 .long 1064892987
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
24 .long 1066774581
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
25 .long 1069414683
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
26 .long 1073984175
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
27 .long 1079645762
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
28 .long 1092815430
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
29 .long 1057005197
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
30 .long 1057342072
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
31 .long 1058087743
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
32 .long 1059427869
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
33 .long 1061799040
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
34 .long 1065862217
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
35 .long 1071413542
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
36 .long 1084439708
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
37 .long 1057128951
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
38 .long 1058664893
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
39 .long 1063675095
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
40 .long 1076102863
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
41 .long 1057655764
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
42 .long 1067924853
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
43 .long 1060439283
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 781
diff changeset
44
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 781
diff changeset
45 .text
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
46
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
47 .align 16
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
48
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
49 .globl dct64_MMX_3dnowex
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
50 dct64_MMX_3dnowex:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
51 pushl %ebx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
52 pushl %esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
53 pushl %edi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
54 subl $256,%esp
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
55 movl 280(%esp),%eax
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
56
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
57 leal 128(%esp),%edx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
58 movl 272(%esp),%esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
59 movl 276(%esp),%edi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
60 movl $costab,%ebx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
61 orl %ecx,%ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
62 movl %esp,%ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
63 femms
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
64 /* Phase 1*/
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
65 movq (%eax), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
66 movq 8(%eax), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
67 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
68 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
69 movq 120(%eax), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
70 movq 112(%eax), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
71 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
72 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
73 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
74 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
75 movq %mm0, (%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
76 movq %mm4, 8(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
77 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
78 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
79 pfmul (%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
80 pfmul 8(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
81 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
82 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
83 movq %mm3, 120(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
84 movq %mm7, 112(%edx)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
85
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
86 movq 16(%eax), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
87 movq 24(%eax), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
88 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
89 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
90 movq 104(%eax), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
91 movq 96(%eax), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
92 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
93 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
94 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
95 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
96 movq %mm0, 16(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
97 movq %mm4, 24(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
98 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
99 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
100 pfmul 16(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
101 pfmul 24(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
102 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
103 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
104 movq %mm3, 104(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
105 movq %mm7, 96(%edx)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
106
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
107 movq 32(%eax), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
108 movq 40(%eax), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
109 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
110 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
111 movq 88(%eax), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
112 movq 80(%eax), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
113 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
114 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
115 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
116 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
117 movq %mm0, 32(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
118 movq %mm4, 40(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
119 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
120 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
121 pfmul 32(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
122 pfmul 40(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
123 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
124 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
125 movq %mm3, 88(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
126 movq %mm7, 80(%edx)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
127
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
128 movq 48(%eax), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
129 movq 56(%eax), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
130 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
131 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
132 movq 72(%eax), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
133 movq 64(%eax), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
134 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
135 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
136 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
137 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
138 movq %mm0, 48(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
139 movq %mm4, 56(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
140 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
141 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
142 pfmul 48(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
143 pfmul 56(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
144 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
145 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
146 movq %mm3, 72(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
147 movq %mm7, 64(%edx)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
148
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
149 /* Phase 2*/
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
150
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
151 movq (%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
152 movq 8(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
153 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
154 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
155 movq 56(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
156 movq 48(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
157 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
158 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
159 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
160 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
161 movq %mm0, (%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
162 movq %mm4, 8(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
163 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
164 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
165 pfmul 64(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
166 pfmul 72(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
167 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
168 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
169 movq %mm3, 56(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
170 movq %mm7, 48(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
171
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
172 movq 16(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
173 movq 24(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
174 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
175 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
176 movq 40(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
177 movq 32(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
178 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
179 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
180 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
181 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
182 movq %mm0, 16(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
183 movq %mm4, 24(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
184 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
185 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
186 pfmul 80(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
187 pfmul 88(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
188 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
189 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
190 movq %mm3, 40(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
191 movq %mm7, 32(%ecx)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
192
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
193 /* Phase 3*/
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
194
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
195 movq 64(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
196 movq 72(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
197 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
198 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
199 movq 120(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
200 movq 112(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
201 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
202 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
203 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
204 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
205 movq %mm0, 64(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
206 movq %mm4, 72(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
207 pfsubr %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
208 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
209 pfmul 64(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
210 pfmul 72(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
211 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
212 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
213 movq %mm3, 120(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
214 movq %mm7, 112(%ecx)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
215
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
216 movq 80(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
217 movq 88(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
218 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
219 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
220 movq 104(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
221 movq 96(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
222 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
223 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
224 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
225 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
226 movq %mm0, 80(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
227 movq %mm4, 88(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
228 pfsubr %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
229 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
230 pfmul 80(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
231 pfmul 88(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
232 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
233 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
234 movq %mm3, 104(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
235 movq %mm7, 96(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
236
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
237 /* Phase 4*/
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
238
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
239 movq (%ecx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
240 movq 8(%ecx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
241 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
242 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
243 movq 24(%ecx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
244 movq 16(%ecx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
245 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
246 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
247 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
248 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
249 movq %mm0, (%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
250 movq %mm4, 8(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
251 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
252 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
253 pfmul 96(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
254 pfmul 104(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
255 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
256 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
257 movq %mm3, 24(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
258 movq %mm7, 16(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
259
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
260 movq 32(%ecx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
261 movq 40(%ecx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
262 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
263 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
264 movq 56(%ecx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
265 movq 48(%ecx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
266 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
267 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
268 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
269 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
270 movq %mm0, 32(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
271 movq %mm4, 40(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
272 pfsubr %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
273 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
274 pfmul 96(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
275 pfmul 104(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
276 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
277 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
278 movq %mm3, 56(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
279 movq %mm7, 48(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
280
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
281 movq 64(%ecx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
282 movq 72(%ecx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
283 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
284 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
285 movq 88(%ecx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
286 movq 80(%ecx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
287 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
288 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
289 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
290 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
291 movq %mm0, 64(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
292 movq %mm4, 72(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
293 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
294 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
295 pfmul 96(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
296 pfmul 104(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
297 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
298 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
299 movq %mm3, 88(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
300 movq %mm7, 80(%edx)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
301
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
302 movq 96(%ecx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
303 movq 104(%ecx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
304 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
305 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
306 movq 120(%ecx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
307 movq 112(%ecx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
308 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
309 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
310 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
311 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
312 movq %mm0, 96(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
313 movq %mm4, 104(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
314 pfsubr %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
315 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
316 pfmul 96(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
317 pfmul 104(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
318 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
319 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
320 movq %mm3, 120(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
321 movq %mm7, 112(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
322
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
323 /* Phase 5 */
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
324
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
325 movq (%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
326 movq 16(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
327 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
328 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
329 movq 8(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
330 movq 24(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
331 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
332 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
333 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
334 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
335 movq %mm0, (%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
336 movq %mm4, 16(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
337 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
338 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
339 pfmul 112(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
340 pfmul 112(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
341 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
342 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
343 movq %mm3, 8(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
344 movq %mm7, 24(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
345
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
346 movq 32(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
347 movq 48(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
348 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
349 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
350 movq 40(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
351 movq 56(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
352 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
353 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
354 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
355 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
356 movq %mm0, 32(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
357 movq %mm4, 48(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
358 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
359 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
360 pfmul 112(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
361 pfmul 112(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
362 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
363 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
364 movq %mm3, 40(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
365 movq %mm7, 56(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
366
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
367 movq 64(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
368 movq 80(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
369 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
370 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
371 movq 72(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
372 movq 88(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
373 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
374 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
375 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
376 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
377 movq %mm0, 64(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
378 movq %mm4, 80(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
379 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
380 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
381 pfmul 112(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
382 pfmul 112(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
383 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
384 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
385 movq %mm3, 72(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
386 movq %mm7, 88(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
387
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
388 movq 96(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
389 movq 112(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
390 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
391 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
392 movq 104(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
393 movq 120(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
394 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
395 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
396 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
397 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
398 movq %mm0, 96(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
399 movq %mm4, 112(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
400 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
401 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
402 pfmul 112(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
403 pfmul 112(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
404 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
405 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
406 movq %mm3, 104(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
407 movq %mm7, 120(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
408
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
409 /* Phase 6. This is the end of easy road. */
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
410 movl $1, %eax
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
411 movd %eax, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
412 pi2fd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
413 movq 32(%ecx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
414 punpckldq 120(%ebx), %mm7 /* 1.0 | 120(%ebx) */
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
415 movq %mm0, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
416 movq plus_minus_3dnow, %mm6
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
417 /* n.b.: pfpnacc */
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
418 pxor %mm6, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
419 pfacc %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
420 /**/
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
421 pfmul %mm7, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
422 movq %mm0, 32(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
423 femms
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
424
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
425 flds 44(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
426 fsubs 40(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
427 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
428
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
429 fsts 44(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
430 fadds 40(%ecx) /* pfacc 40(ecx), 56(%ecx) */
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
431 fadds 44(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
432 fstps 40(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
433
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
434 flds 48(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
435 fsubs 52(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
436 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
437
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
438 flds 60(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
439 fsubs 56(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
440 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
441
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
442 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
443 fadds 56(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
444 fadds 60(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
445
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
446 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
447 fadds 48(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
448 fadds 52(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
449 fstps 48(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
450 fadd %st(2)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
451 fstps 56(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
452 fsts 60(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
453 faddp %st(1)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
454 fstps 52(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
455 /*---*/
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
456 flds 64(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
457 fadds 68(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
458 fstps 64(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
459
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
460 flds 64(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
461 fsubs 68(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
462 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
463 fstps 68(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
464
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
465 flds 76(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
466 fsubs 72(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
467 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
468 fsts 76(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
469 fadds 72(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
470 fadds 76(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
471 fstps 72(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
472
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
473 flds 92(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
474 fsubs 88(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
475 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
476 fsts 92(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
477 fadds 92(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
478 fadds 88(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
479
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
480 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
481 fadds 80(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
482 fadds 84(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
483 fstps 80(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
484
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
485 flds 80(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
486 fsubs 84(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
487 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
488 fadd %st(0), %st(1)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
489 fadds 92(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
490 fstps 84(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
491 fstps 88(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
492
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
493 flds 96(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
494 fadds 100(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
495 fstps 96(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
496
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
497 flds 96(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
498 fsubs 100(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
499 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
500 fstps 100(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
501
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
502 flds 108(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
503 fsubs 104(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
504 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
505 fsts 108(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
506 fadds 104(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
507 fadds 108(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
508 fstps 104(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
509
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
510 flds 124(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
511 fsubs 120(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
512 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
513 fsts 124(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
514 fadds 120(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
515 fadds 124(%ecx)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
516
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
517 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
518 fadds 112(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
519 fadds 116(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
520 fstps 112(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
521
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
522 flds 112(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
523 fsubs 116(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
524 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
525 fadd %st(0),%st(1)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
526 fadds 124(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
527 fstps 116(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
528 fstps 120(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
529 jnz .L01
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
530
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
531 /* Phase 7*/
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
532
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
533 flds (%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
534 fadds 4(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
535 fstps 1024(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
536
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
537 flds (%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
538 fsubs 4(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
539 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
540 fsts (%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
541 fstps (%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
542
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
543 flds 12(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
544 fsubs 8(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
545 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
546 fsts 512(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
547 fadds 12(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
548 fadds 8(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
549 fstps 512(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
550
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
551 flds 16(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
552 fsubs 20(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
553 fmuls 120(%ebx)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
554
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
555 flds 28(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
556 fsubs 24(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
557 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
558 fsts 768(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
559 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
560 fadds 24(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
561 fadds 28(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
562 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
563 fadds 16(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
564 fadds 20(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
565 fstps 768(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
566 fadd %st(2)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
567 fstps 256(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
568 faddp %st(1)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
569 fstps 256(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
570
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
571 /* Phase 8*/
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
572
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
573 flds 32(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
574 fadds 48(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
575 fstps 896(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
576
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
577 flds 48(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
578 fadds 40(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
579 fstps 640(%esi)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
580
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
581 flds 40(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
582 fadds 56(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
583 fstps 384(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
584
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
585 flds 56(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
586 fadds 36(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
587 fstps 128(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
588
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
589 flds 36(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
590 fadds 52(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
591 fstps 128(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
592
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
593 flds 52(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
594 fadds 44(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
595 fstps 384(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
596
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
597 flds 60(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
598 fsts 896(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
599 fadds 44(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
600 fstps 640(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
601
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
602 flds 96(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
603 fadds 112(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
604 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
605 fadds 64(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
606 fstps 960(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
607 fadds 80(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
608 fstps 832(%esi)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
609
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
610 flds 112(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
611 fadds 104(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
612 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
613 fadds 80(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
614 fstps 704(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
615 fadds 72(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
616 fstps 576(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
617
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
618 flds 104(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
619 fadds 120(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
620 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
621 fadds 72(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
622 fstps 448(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
623 fadds 88(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
624 fstps 320(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
625
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
626 flds 120(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
627 fadds 100(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
628 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
629 fadds 88(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
630 fstps 192(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
631 fadds 68(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
632 fstps 64(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
633
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
634 flds 100(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
635 fadds 116(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
636 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
637 fadds 68(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
638 fstps 64(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
639 fadds 84(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
640 fstps 192(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
641
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
642 flds 116(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
643 fadds 108(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
644 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
645 fadds 84(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
646 fstps 320(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
647 fadds 76(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
648 fstps 448(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
649
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
650 flds 108(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
651 fadds 124(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
652 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
653 fadds 76(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
654 fstps 576(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
655 fadds 92(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
656 fstps 704(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
657
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
658 flds 124(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
659 fsts 960(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
660 fadds 92(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
661 fstps 832(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
662 jmp .L_bye
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
663 .L01:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
664 /* Phase 9*/
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
665
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
666 flds (%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
667 fadds 4(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
668 fistp 512(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
669
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
670 flds (%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
671 fsubs 4(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
672 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
673
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
674 fistp (%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
675
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
676
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
677 flds 12(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
678 fsubs 8(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
679 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
680 fist 256(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
681 fadds 12(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
682 fadds 8(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
683 fistp 256(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
684
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
685 flds 16(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
686 fsubs 20(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
687 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
688
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
689 flds 28(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
690 fsubs 24(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
691 fmuls 120(%ebx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
692 fist 384(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
693 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
694 fadds 24(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
695 fadds 28(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
696 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
697 fadds 16(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
698 fadds 20(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
699 fistp 384(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
700 fadd %st(2)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
701 fistp 128(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
702 faddp %st(1)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
703 fistp 128(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
704
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
705 /* Phase 10*/
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
706
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
707 flds 32(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
708 fadds 48(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
709 fistp 448(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
710
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
711 flds 48(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
712 fadds 40(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
713 fistp 320(%esi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
714
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
715 flds 40(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
716 fadds 56(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
717 fistp 192(%esi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
718
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
719 flds 56(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
720 fadds 36(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
721 fistp 64(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
722
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
723 flds 36(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
724 fadds 52(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
725 fistp 64(%edi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
726
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
727 flds 52(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
728 fadds 44(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
729 fistp 192(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
730
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
731 flds 60(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
732 fist 448(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
733 fadds 44(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
734 fistp 320(%edi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
735
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
736 flds 96(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
737 fadds 112(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
738 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
739 fadds 64(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
740 fistp 480(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
741 fadds 80(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
742 fistp 416(%esi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
743
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
744 flds 112(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
745 fadds 104(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
746 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
747 fadds 80(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
748 fistp 352(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
749 fadds 72(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
750 fistp 288(%esi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
751
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
752 flds 104(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
753 fadds 120(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
754 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
755 fadds 72(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
756 fistp 224(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
757 fadds 88(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
758 fistp 160(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
759
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
760 flds 120(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
761 fadds 100(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
762 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
763 fadds 88(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
764 fistp 96(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
765 fadds 68(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
766 fistp 32(%esi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
767
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
768 flds 100(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
769 fadds 116(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
770 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
771 fadds 68(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
772 fistp 32(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
773 fadds 84(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
774 fistp 96(%edi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
775
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
776 flds 116(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
777 fadds 108(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
778 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
779 fadds 84(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
780 fistp 160(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
781 fadds 76(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
782 fistp 224(%edi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
783
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
784 flds 108(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
785 fadds 124(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
786 fld %st(0)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
787 fadds 76(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
788 fistp 288(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
789 fadds 92(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
790 fistp 352(%edi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
791
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
792 flds 124(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
793 fist 480(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
794 fadds 92(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
795 fistp 416(%edi)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
796 movsw
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
797 .L_bye:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
798 addl $256,%esp
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
799 popl %edi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
800 popl %esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
801 popl %ebx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
802 ret
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
803
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
804