Mercurial > mplayer.hg
annotate mp3lib/decode_MMX.s @ 1404:1752eedd4f97
Added checking for x86 cpu extensions using test-programs.
author | atmos4 |
---|---|
date | Sat, 28 Jul 2001 21:35:55 +0000 |
parents | c73912315dbf |
children |
rev | line source |
---|---|
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
1 # this code comes under GPL |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
2 # This code was taken from http://www.mpg123.org |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
3 # See ChangeLog of mpg123-0.59s-pre.1 for detail |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
4 # Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
5 # |
1259 | 6 # Local ChangeLog: |
7 # - Partial loops unrolling and removing MOVW insn from loops | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
8 # |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
9 |
1259 | 10 .data |
11 .align 8 | |
12 null_one: .long 0x0000ffff, 0x0000ffff | |
13 one_null: .long 0xffff0000, 0xffff0000 | |
1271
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
14 .globl costab_mmx |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
15 costab_mmx: |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
16 .long 1056974725 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
17 .long 1057056395 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
18 .long 1057223771 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
19 .long 1057485416 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
20 .long 1057855544 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
21 .long 1058356026 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
22 .long 1059019886 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
23 .long 1059897405 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
24 .long 1061067246 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
25 .long 1062657950 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
26 .long 1064892987 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
27 .long 1066774581 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
28 .long 1069414683 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
29 .long 1073984175 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
30 .long 1079645762 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
31 .long 1092815430 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
32 .long 1057005197 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
33 .long 1057342072 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
34 .long 1058087743 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
35 .long 1059427869 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
36 .long 1061799040 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
37 .long 1065862217 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
38 .long 1071413542 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
39 .long 1084439708 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
40 .long 1057128951 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
41 .long 1058664893 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
42 .long 1063675095 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
43 .long 1076102863 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
44 .long 1057655764 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
45 .long 1067924853 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
46 .long 1060439283 |
1259 | 47 |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
48 .text |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
49 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
50 .globl synth_1to1_MMX_s |
1283
c73912315dbf
My 2.10.1 gnu assembler doesn't like C comment syntax in assembler files. Use
jkeil
parents:
1277
diff
changeset
|
51 // |
c73912315dbf
My 2.10.1 gnu assembler doesn't like C comment syntax in assembler files. Use
jkeil
parents:
1277
diff
changeset
|
52 // void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples, |
c73912315dbf
My 2.10.1 gnu assembler doesn't like C comment syntax in assembler files. Use
jkeil
parents:
1277
diff
changeset
|
53 // short *buffs, int *bo); |
c73912315dbf
My 2.10.1 gnu assembler doesn't like C comment syntax in assembler files. Use
jkeil
parents:
1277
diff
changeset
|
54 // |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
55 synth_1to1_MMX_s: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
56 pushl %ebp |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
57 pushl %edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
58 pushl %esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
59 pushl %ebx |
1277 | 60 movl 24(%esp),%ecx |
61 movl 28(%esp),%edi | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
62 movl $15,%ebx |
1277 | 63 movl 36(%esp),%edx |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
64 leal (%edi,%ecx,2),%edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
65 decl %ecx |
1277 | 66 movl 32(%esp),%esi |
67 movl (%edx),%eax | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
68 jecxz .L1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
69 decl %eax |
1277 | 70 andl %ebx,%eax |
71 leal 1088(%esi),%esi | |
72 movl %eax,(%edx) | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
73 .L1: |
1277 | 74 leal (%esi,%eax,2),%edx |
75 movl %eax,%ebp | |
76 incl %eax | |
77 pushl 20(%esp) | |
78 andl %ebx,%eax | |
79 leal 544(%esi,%eax,2),%ecx | |
80 incl %ebx | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
81 testl $1, %eax |
1277 | 82 jnz .L2 |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
83 xchgl %edx,%ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
84 incl %ebp |
1277 | 85 leal 544(%esi),%esi |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
86 .L2: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
87 emms |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
88 pushl %edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
89 pushl %ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
90 call *dct64_MMX_func |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
91 leal 1(%ebx), %ecx |
1277 | 92 subl %ebp,%ebx |
1259 | 93 pushl %ecx |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
94 leal decwins(%ebx,%ebx,1), %edx |
1259 | 95 shrl $1, %ecx |
96 .align 16 | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
97 .L3: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
98 movq (%edx),%mm0 |
1259 | 99 movq 64(%edx),%mm4 |
100 pmaddwd (%esi),%mm0 | |
101 pmaddwd 32(%esi),%mm4 | |
102 movq 8(%edx),%mm1 | |
103 movq 72(%edx),%mm5 | |
104 pmaddwd 8(%esi),%mm1 | |
105 pmaddwd 40(%esi),%mm5 | |
106 movq 16(%edx),%mm2 | |
107 movq 80(%edx),%mm6 | |
108 pmaddwd 16(%esi),%mm2 | |
109 pmaddwd 48(%esi),%mm6 | |
110 movq 24(%edx),%mm3 | |
111 movq 88(%edx),%mm7 | |
112 pmaddwd 24(%esi),%mm3 | |
113 pmaddwd 56(%esi),%mm7 | |
114 paddd %mm1,%mm0 | |
115 paddd %mm5,%mm4 | |
116 paddd %mm2,%mm0 | |
117 paddd %mm6,%mm4 | |
118 paddd %mm3,%mm0 | |
119 paddd %mm7,%mm4 | |
120 movq %mm0,%mm1 | |
121 movq %mm4,%mm5 | |
122 psrlq $32,%mm1 | |
123 psrlq $32,%mm5 | |
124 paddd %mm1,%mm0 | |
125 paddd %mm5,%mm4 | |
126 psrad $13,%mm0 | |
127 psrad $13,%mm4 | |
128 packssdw %mm0,%mm0 | |
129 packssdw %mm4,%mm4 | |
130 | |
131 movq (%edi), %mm1 | |
132 punpckldq %mm4, %mm0 | |
133 pand one_null, %mm1 | |
134 pand null_one, %mm0 | |
135 por %mm0, %mm1 | |
136 movq %mm1,(%edi) | |
137 | |
138 leal 64(%esi),%esi | |
139 leal 128(%edx),%edx | |
140 leal 8(%edi),%edi | |
141 | |
142 decl %ecx | |
143 jnz .L3 | |
144 | |
145 popl %ecx | |
146 andl $1, %ecx | |
147 jecxz .next_loop | |
148 | |
149 movq (%edx),%mm0 | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
150 pmaddwd (%esi),%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
151 movq 8(%edx),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
152 pmaddwd 8(%esi),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
153 movq 16(%edx),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
154 pmaddwd 16(%esi),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
155 movq 24(%edx),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
156 pmaddwd 24(%esi),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
157 paddd %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
158 paddd %mm2,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
159 paddd %mm3,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
160 movq %mm0,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
161 psrlq $32,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
162 paddd %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
163 psrad $13,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
164 packssdw %mm0,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
165 movd %mm0,%eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
166 movw %ax, (%edi) |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
167 leal 32(%esi),%esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
168 leal 64(%edx),%edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
169 leal 4(%edi),%edi |
1259 | 170 |
171 .next_loop: | |
172 subl $64,%esi | |
173 movl $7,%ecx | |
174 .align 16 | |
175 .L4: | |
176 movq (%edx),%mm0 | |
177 movq 64(%edx),%mm4 | |
178 pmaddwd (%esi),%mm0 | |
179 pmaddwd -32(%esi),%mm4 | |
180 movq 8(%edx),%mm1 | |
181 movq 72(%edx),%mm5 | |
182 pmaddwd 8(%esi),%mm1 | |
183 pmaddwd -24(%esi),%mm5 | |
184 movq 16(%edx),%mm2 | |
185 movq 80(%edx),%mm6 | |
186 pmaddwd 16(%esi),%mm2 | |
187 pmaddwd -16(%esi),%mm6 | |
188 movq 24(%edx),%mm3 | |
189 movq 88(%edx),%mm7 | |
190 pmaddwd 24(%esi),%mm3 | |
191 pmaddwd -8(%esi),%mm7 | |
192 paddd %mm1,%mm0 | |
193 paddd %mm5,%mm4 | |
194 paddd %mm2,%mm0 | |
195 paddd %mm6,%mm4 | |
196 paddd %mm3,%mm0 | |
197 paddd %mm7,%mm4 | |
198 movq %mm0,%mm1 | |
199 movq %mm4,%mm5 | |
200 psrlq $32,%mm1 | |
201 psrlq $32,%mm5 | |
202 paddd %mm0,%mm1 | |
203 paddd %mm4,%mm5 | |
204 psrad $13,%mm1 | |
205 psrad $13,%mm5 | |
206 packssdw %mm1,%mm1 | |
207 packssdw %mm5,%mm5 | |
208 psubd %mm0,%mm0 | |
209 psubd %mm4,%mm4 | |
210 psubsw %mm1,%mm0 | |
211 psubsw %mm5,%mm4 | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
212 |
1259 | 213 movq (%edi), %mm1 |
214 punpckldq %mm4, %mm0 | |
215 pand one_null, %mm1 | |
216 pand null_one, %mm0 | |
217 por %mm0, %mm1 | |
218 movq %mm1,(%edi) | |
219 | |
220 subl $64,%esi | |
221 addl $128,%edx | |
222 leal 8(%edi),%edi | |
223 decl %ecx | |
224 jnz .L4 | |
225 | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
226 movq (%edx),%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
227 pmaddwd (%esi),%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
228 movq 8(%edx),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
229 pmaddwd 8(%esi),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
230 movq 16(%edx),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
231 pmaddwd 16(%esi),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
232 movq 24(%edx),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
233 pmaddwd 24(%esi),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
234 paddd %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
235 paddd %mm2,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
236 paddd %mm3,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
237 movq %mm0,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
238 psrlq $32,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
239 paddd %mm0,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
240 psrad $13,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
241 packssdw %mm1,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
242 psubd %mm0,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
243 psubsw %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
244 movd %mm0,%eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
245 movw %ax,(%edi) |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
246 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
247 emms |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
248 popl %ebx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
249 popl %esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
250 popl %edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
251 popl %ebp |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
252 ret |