Mercurial > mplayer.hg
annotate mp3lib/decode_MMX.s @ 2316:bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
added half uv interpolation support
added prefetch
BGR15 support in MMX (untested) (so BGR15,16,24,32 are supported)
special unscaled height version (not much faster but it doesnt interpolate uv vertically)
author | michael |
---|---|
date | Sat, 20 Oct 2001 21:12:09 +0000 |
parents | c73912315dbf |
children |
rev | line source |
---|---|
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
1 # this code comes under GPL |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
2 # This code was taken from http://www.mpg123.org |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
3 # See ChangeLog of mpg123-0.59s-pre.1 for detail |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
4 # Applied to mplayer by Nick Kurshev <nickols_k@mail.ru> |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
5 # |
1259 | 6 # Local ChangeLog: |
7 # - Partial loops unrolling and removing MOVW insn from loops | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
8 # |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
9 |
1259 | 10 .data |
11 .align 8 | |
12 null_one: .long 0x0000ffff, 0x0000ffff | |
13 one_null: .long 0xffff0000, 0xffff0000 | |
1271
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
14 .globl costab_mmx |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
15 costab_mmx: |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
16 .long 1056974725 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
17 .long 1057056395 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
18 .long 1057223771 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
19 .long 1057485416 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
20 .long 1057855544 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
21 .long 1058356026 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
22 .long 1059019886 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
23 .long 1059897405 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
24 .long 1061067246 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
25 .long 1062657950 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
26 .long 1064892987 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
27 .long 1066774581 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
28 .long 1069414683 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
29 .long 1073984175 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
30 .long 1079645762 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
31 .long 1092815430 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
32 .long 1057005197 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
33 .long 1057342072 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
34 .long 1058087743 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
35 .long 1059427869 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
36 .long 1061799040 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
37 .long 1065862217 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
38 .long 1071413542 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
39 .long 1084439708 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
40 .long 1057128951 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
41 .long 1058664893 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
42 .long 1063675095 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
43 .long 1076102863 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
44 .long 1057655764 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
45 .long 1067924853 |
2864e32cd267
Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents:
1259
diff
changeset
|
46 .long 1060439283 |
1259 | 47 |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
48 .text |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
49 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
50 .globl synth_1to1_MMX_s |
1283
c73912315dbf
My 2.10.1 gnu assembler doesn't like C comment syntax in assembler files. Use
jkeil
parents:
1277
diff
changeset
|
51 // |
c73912315dbf
My 2.10.1 gnu assembler doesn't like C comment syntax in assembler files. Use
jkeil
parents:
1277
diff
changeset
|
52 // void synth_1to1_MMX_s(real *bandPtr, int channel, short *samples, |
c73912315dbf
My 2.10.1 gnu assembler doesn't like C comment syntax in assembler files. Use
jkeil
parents:
1277
diff
changeset
|
53 // short *buffs, int *bo); |
c73912315dbf
My 2.10.1 gnu assembler doesn't like C comment syntax in assembler files. Use
jkeil
parents:
1277
diff
changeset
|
54 // |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
55 synth_1to1_MMX_s: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
56 pushl %ebp |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
57 pushl %edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
58 pushl %esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
59 pushl %ebx |
1277 | 60 movl 24(%esp),%ecx |
61 movl 28(%esp),%edi | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
62 movl $15,%ebx |
1277 | 63 movl 36(%esp),%edx |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
64 leal (%edi,%ecx,2),%edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
65 decl %ecx |
1277 | 66 movl 32(%esp),%esi |
67 movl (%edx),%eax | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
68 jecxz .L1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
69 decl %eax |
1277 | 70 andl %ebx,%eax |
71 leal 1088(%esi),%esi | |
72 movl %eax,(%edx) | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
73 .L1: |
1277 | 74 leal (%esi,%eax,2),%edx |
75 movl %eax,%ebp | |
76 incl %eax | |
77 pushl 20(%esp) | |
78 andl %ebx,%eax | |
79 leal 544(%esi,%eax,2),%ecx | |
80 incl %ebx | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
81 testl $1, %eax |
1277 | 82 jnz .L2 |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
83 xchgl %edx,%ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
84 incl %ebp |
1277 | 85 leal 544(%esi),%esi |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
86 .L2: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
87 emms |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
88 pushl %edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
89 pushl %ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
90 call *dct64_MMX_func |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
91 leal 1(%ebx), %ecx |
1277 | 92 subl %ebp,%ebx |
1259 | 93 pushl %ecx |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
94 leal decwins(%ebx,%ebx,1), %edx |
1259 | 95 shrl $1, %ecx |
96 .align 16 | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
97 .L3: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
98 movq (%edx),%mm0 |
1259 | 99 movq 64(%edx),%mm4 |
100 pmaddwd (%esi),%mm0 | |
101 pmaddwd 32(%esi),%mm4 | |
102 movq 8(%edx),%mm1 | |
103 movq 72(%edx),%mm5 | |
104 pmaddwd 8(%esi),%mm1 | |
105 pmaddwd 40(%esi),%mm5 | |
106 movq 16(%edx),%mm2 | |
107 movq 80(%edx),%mm6 | |
108 pmaddwd 16(%esi),%mm2 | |
109 pmaddwd 48(%esi),%mm6 | |
110 movq 24(%edx),%mm3 | |
111 movq 88(%edx),%mm7 | |
112 pmaddwd 24(%esi),%mm3 | |
113 pmaddwd 56(%esi),%mm7 | |
114 paddd %mm1,%mm0 | |
115 paddd %mm5,%mm4 | |
116 paddd %mm2,%mm0 | |
117 paddd %mm6,%mm4 | |
118 paddd %mm3,%mm0 | |
119 paddd %mm7,%mm4 | |
120 movq %mm0,%mm1 | |
121 movq %mm4,%mm5 | |
122 psrlq $32,%mm1 | |
123 psrlq $32,%mm5 | |
124 paddd %mm1,%mm0 | |
125 paddd %mm5,%mm4 | |
126 psrad $13,%mm0 | |
127 psrad $13,%mm4 | |
128 packssdw %mm0,%mm0 | |
129 packssdw %mm4,%mm4 | |
130 | |
131 movq (%edi), %mm1 | |
132 punpckldq %mm4, %mm0 | |
133 pand one_null, %mm1 | |
134 pand null_one, %mm0 | |
135 por %mm0, %mm1 | |
136 movq %mm1,(%edi) | |
137 | |
138 leal 64(%esi),%esi | |
139 leal 128(%edx),%edx | |
140 leal 8(%edi),%edi | |
141 | |
142 decl %ecx | |
143 jnz .L3 | |
144 | |
145 popl %ecx | |
146 andl $1, %ecx | |
147 jecxz .next_loop | |
148 | |
149 movq (%edx),%mm0 | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
150 pmaddwd (%esi),%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
151 movq 8(%edx),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
152 pmaddwd 8(%esi),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
153 movq 16(%edx),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
154 pmaddwd 16(%esi),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
155 movq 24(%edx),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
156 pmaddwd 24(%esi),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
157 paddd %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
158 paddd %mm2,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
159 paddd %mm3,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
160 movq %mm0,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
161 psrlq $32,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
162 paddd %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
163 psrad $13,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
164 packssdw %mm0,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
165 movd %mm0,%eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
166 movw %ax, (%edi) |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
167 leal 32(%esi),%esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
168 leal 64(%edx),%edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
169 leal 4(%edi),%edi |
1259 | 170 |
171 .next_loop: | |
172 subl $64,%esi | |
173 movl $7,%ecx | |
174 .align 16 | |
175 .L4: | |
176 movq (%edx),%mm0 | |
177 movq 64(%edx),%mm4 | |
178 pmaddwd (%esi),%mm0 | |
179 pmaddwd -32(%esi),%mm4 | |
180 movq 8(%edx),%mm1 | |
181 movq 72(%edx),%mm5 | |
182 pmaddwd 8(%esi),%mm1 | |
183 pmaddwd -24(%esi),%mm5 | |
184 movq 16(%edx),%mm2 | |
185 movq 80(%edx),%mm6 | |
186 pmaddwd 16(%esi),%mm2 | |
187 pmaddwd -16(%esi),%mm6 | |
188 movq 24(%edx),%mm3 | |
189 movq 88(%edx),%mm7 | |
190 pmaddwd 24(%esi),%mm3 | |
191 pmaddwd -8(%esi),%mm7 | |
192 paddd %mm1,%mm0 | |
193 paddd %mm5,%mm4 | |
194 paddd %mm2,%mm0 | |
195 paddd %mm6,%mm4 | |
196 paddd %mm3,%mm0 | |
197 paddd %mm7,%mm4 | |
198 movq %mm0,%mm1 | |
199 movq %mm4,%mm5 | |
200 psrlq $32,%mm1 | |
201 psrlq $32,%mm5 | |
202 paddd %mm0,%mm1 | |
203 paddd %mm4,%mm5 | |
204 psrad $13,%mm1 | |
205 psrad $13,%mm5 | |
206 packssdw %mm1,%mm1 | |
207 packssdw %mm5,%mm5 | |
208 psubd %mm0,%mm0 | |
209 psubd %mm4,%mm4 | |
210 psubsw %mm1,%mm0 | |
211 psubsw %mm5,%mm4 | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
212 |
1259 | 213 movq (%edi), %mm1 |
214 punpckldq %mm4, %mm0 | |
215 pand one_null, %mm1 | |
216 pand null_one, %mm0 | |
217 por %mm0, %mm1 | |
218 movq %mm1,(%edi) | |
219 | |
220 subl $64,%esi | |
221 addl $128,%edx | |
222 leal 8(%edi),%edi | |
223 decl %ecx | |
224 jnz .L4 | |
225 | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
226 movq (%edx),%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
227 pmaddwd (%esi),%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
228 movq 8(%edx),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
229 pmaddwd 8(%esi),%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
230 movq 16(%edx),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
231 pmaddwd 16(%esi),%mm2 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
232 movq 24(%edx),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
233 pmaddwd 24(%esi),%mm3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
234 paddd %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
235 paddd %mm2,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
236 paddd %mm3,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
237 movq %mm0,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
238 psrlq $32,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
239 paddd %mm0,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
240 psrad $13,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
241 packssdw %mm1,%mm1 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
242 psubd %mm0,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
243 psubsw %mm1,%mm0 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
244 movd %mm0,%eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
245 movw %ax,(%edi) |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
246 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
247 emms |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
248 popl %ebx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
249 popl %esi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
250 popl %edi |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
251 popl %ebp |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
diff
changeset
|
252 ret |