annotate mp3lib/dct64_k7.s @ 1271:2864e32cd267

Finished 3dnow optimization (in scalar mode) and minor improvements
author nick
date Wed, 04 Jul 2001 09:47:56 +0000
parents 03b7e2955a20
children 3a9699d9e7da
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
1 # This code was taken from http://www.mpg123.org
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
2 # See ChangeLog of mpg123-0.59s-pre.1 for detail
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
3 # Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
4 # Partial 3dnowex-DSP! optimization by Nick Kurshev
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
5 #
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
6 # TODO: optimize scalar 3dnow! code
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
7 # Warning: Phases 7 & 8 are not tested
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
8 #
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
9
1173
3c53cbf53e7e Better 3dnow! optimization
nickols_k
parents: 781
diff changeset
10 .text
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
11
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
12 .align 16
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
13
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
14 .globl dct64_MMX_3dnowex
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
15 dct64_MMX_3dnowex:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
16 pushl %ebx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
17 pushl %esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
18 pushl %edi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
19 subl $256,%esp
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
20 movl 280(%esp),%eax
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
21
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
22 leal 128(%esp),%edx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
23 movl 272(%esp),%esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
24 movl 276(%esp),%edi
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
25 movl $costab_mmx,%ebx
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
26 orl %ecx,%ecx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
27 movl %esp,%ecx
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
28
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
29 /* Phase 1*/
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
30 movq (%eax), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
31 movq 8(%eax), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
32 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
33 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
34 movq 120(%eax), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
35 movq 112(%eax), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
36 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
37 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
38 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
39 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
40 movq %mm0, (%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
41 movq %mm4, 8(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
42 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
43 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
44 pfmul (%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
45 pfmul 8(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
46 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
47 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
48 movq %mm3, 120(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
49 movq %mm7, 112(%edx)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
50
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
51 movq 16(%eax), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
52 movq 24(%eax), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
53 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
54 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
55 movq 104(%eax), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
56 movq 96(%eax), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
57 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
58 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
59 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
60 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
61 movq %mm0, 16(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
62 movq %mm4, 24(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
63 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
64 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
65 pfmul 16(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
66 pfmul 24(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
67 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
68 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
69 movq %mm3, 104(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
70 movq %mm7, 96(%edx)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
71
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
72 movq 32(%eax), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
73 movq 40(%eax), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
74 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
75 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
76 movq 88(%eax), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
77 movq 80(%eax), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
78 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
79 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
80 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
81 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
82 movq %mm0, 32(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
83 movq %mm4, 40(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
84 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
85 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
86 pfmul 32(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
87 pfmul 40(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
88 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
89 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
90 movq %mm3, 88(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
91 movq %mm7, 80(%edx)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
92
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
93 movq 48(%eax), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
94 movq 56(%eax), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
95 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
96 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
97 movq 72(%eax), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
98 movq 64(%eax), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
99 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
100 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
101 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
102 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
103 movq %mm0, 48(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
104 movq %mm4, 56(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
105 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
106 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
107 pfmul 48(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
108 pfmul 56(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
109 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
110 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
111 movq %mm3, 72(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
112 movq %mm7, 64(%edx)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
113
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
114 /* Phase 2*/
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
115
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
116 movq (%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
117 movq 8(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
118 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
119 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
120 movq 56(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
121 movq 48(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
122 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
123 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
124 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
125 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
126 movq %mm0, (%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
127 movq %mm4, 8(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
128 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
129 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
130 pfmul 64(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
131 pfmul 72(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
132 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
133 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
134 movq %mm3, 56(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
135 movq %mm7, 48(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
136
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
137 movq 16(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
138 movq 24(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
139 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
140 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
141 movq 40(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
142 movq 32(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
143 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
144 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
145 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
146 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
147 movq %mm0, 16(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
148 movq %mm4, 24(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
149 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
150 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
151 pfmul 80(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
152 pfmul 88(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
153 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
154 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
155 movq %mm3, 40(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
156 movq %mm7, 32(%ecx)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
157
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
158 /* Phase 3*/
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
159
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
160 movq 64(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
161 movq 72(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
162 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
163 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
164 movq 120(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
165 movq 112(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
166 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
167 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
168 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
169 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
170 movq %mm0, 64(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
171 movq %mm4, 72(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
172 pfsubr %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
173 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
174 pfmul 64(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
175 pfmul 72(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
176 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
177 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
178 movq %mm3, 120(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
179 movq %mm7, 112(%ecx)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
180
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
181 movq 80(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
182 movq 88(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
183 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
184 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
185 movq 104(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
186 movq 96(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
187 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
188 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
189 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
190 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
191 movq %mm0, 80(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
192 movq %mm4, 88(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
193 pfsubr %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
194 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
195 pfmul 80(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
196 pfmul 88(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
197 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
198 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
199 movq %mm3, 104(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
200 movq %mm7, 96(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
201
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
202 /* Phase 4*/
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
203
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
204 movq (%ecx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
205 movq 8(%ecx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
206 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
207 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
208 movq 24(%ecx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
209 movq 16(%ecx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
210 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
211 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
212 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
213 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
214 movq %mm0, (%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
215 movq %mm4, 8(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
216 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
217 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
218 pfmul 96(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
219 pfmul 104(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
220 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
221 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
222 movq %mm3, 24(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
223 movq %mm7, 16(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
224
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
225 movq 32(%ecx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
226 movq 40(%ecx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
227 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
228 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
229 movq 56(%ecx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
230 movq 48(%ecx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
231 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
232 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
233 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
234 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
235 movq %mm0, 32(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
236 movq %mm4, 40(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
237 pfsubr %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
238 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
239 pfmul 96(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
240 pfmul 104(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
241 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
242 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
243 movq %mm3, 56(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
244 movq %mm7, 48(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
246 movq 64(%ecx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
247 movq 72(%ecx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
248 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
249 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
250 movq 88(%ecx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
251 movq 80(%ecx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
252 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
253 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
254 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
255 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
256 movq %mm0, 64(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
257 movq %mm4, 72(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
258 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
259 pfsub %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
260 pfmul 96(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
261 pfmul 104(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
262 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
263 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
264 movq %mm3, 88(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
265 movq %mm7, 80(%edx)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
266
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
267 movq 96(%ecx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
268 movq 104(%ecx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
269 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
270 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
271 movq 120(%ecx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
272 movq 112(%ecx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
273 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
274 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
275 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
276 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
277 movq %mm0, 96(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
278 movq %mm4, 104(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
279 pfsubr %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
280 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
281 pfmul 96(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
282 pfmul 104(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
283 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
284 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
285 movq %mm3, 120(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
286 movq %mm7, 112(%edx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
287
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
288 /* Phase 5 */
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
289
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
290 movq (%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
291 movq 16(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
292 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
293 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
294 movq 8(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
295 movq 24(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
296 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
297 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
298 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
299 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
300 movq %mm0, (%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
301 movq %mm4, 16(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
302 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
303 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
304 pfmul 112(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
305 pfmul 112(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
306 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
307 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
308 movq %mm3, 8(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
309 movq %mm7, 24(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
310
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
311 movq 32(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
312 movq 48(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
313 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
314 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
315 movq 40(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
316 movq 56(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
317 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
318 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
319 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
320 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
321 movq %mm0, 32(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
322 movq %mm4, 48(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
323 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
324 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
325 pfmul 112(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
326 pfmul 112(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
327 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
328 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
329 movq %mm3, 40(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
330 movq %mm7, 56(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
331
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
332 movq 64(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
333 movq 80(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
334 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
335 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
336 movq 72(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
337 movq 88(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
338 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
339 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
340 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
341 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
342 movq %mm0, 64(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
343 movq %mm4, 80(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
344 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
345 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
346 pfmul 112(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
347 pfmul 112(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
348 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
349 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
350 movq %mm3, 72(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
351 movq %mm7, 88(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
352
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
353 movq 96(%edx), %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
354 movq 112(%edx), %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
355 movq %mm0, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
356 movq %mm4, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
357 movq 104(%edx), %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
358 movq 120(%edx), %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
359 pswapd %mm1, %mm1
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
360 pswapd %mm5, %mm5
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
361 pfadd %mm1, %mm0
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
362 pfadd %mm5, %mm4
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
363 movq %mm0, 96(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
364 movq %mm4, 112(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
365 pfsub %mm1, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
366 pfsubr %mm5, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
367 pfmul 112(%ebx), %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
368 pfmul 112(%ebx), %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
369 pswapd %mm3, %mm3
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
370 pswapd %mm7, %mm7
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
371 movq %mm3, 104(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
372 movq %mm7, 120(%ecx)
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
373
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
374 /* Phase 6. This is the end of easy road. */
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
375 /* Code below is coded in scalar mode. Should be optimized */
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
376
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
377 movd 32(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
378 pfadd 36(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
379 movd %mm0, 32(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
380
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
381 movd 32(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
382 pfsub 36(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
383 pfmul 120(%ebx),%mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
384 movd %mm0, 36(%edx)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
385
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
386 movd 44(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
387 pfsub 40(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
388 pfmul 120(%ebx),%mm0
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
389
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
390 movd %mm0, 44(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
391 pfadd 40(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
392 pfadd 44(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
393 movd %mm0, 40(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
394
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
395 movd 48(%ecx), %mm3
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
396 pfsub 52(%ecx), %mm3
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
397 pfmul 120(%ebx), %mm3
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
398
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
399 movd 60(%ecx), %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
400 pfsub 56(%ecx), %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
401 pfmul 120(%ebx), %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
402 movq %mm2, %mm1
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
403
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
404 pfadd 56(%ecx), %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
405 pfadd 60(%ecx), %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
406 movq %mm1, %mm0
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
407
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
408 pfadd 48(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
409 pfadd 52(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
410 movd %mm0, 48(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
411 pfadd %mm3, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
412 movd %mm1, 56(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
413 movd %mm2, 60(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
414 pfadd %mm3, %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
415 movd %mm2, 52(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
416
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
417 /*---*/
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
418 movd 64(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
419 pfadd 68(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
420 movd %mm0, 64(%edx)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
421
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
422 movd 64(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
423 pfsub 68(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
424 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
425 movd %mm0, 68(%edx)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
426
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
427 movd 76(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
428 pfsub 72(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
429 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
430 movd %mm0, 76(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
431 pfadd 72(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
432 pfadd 76(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
433 movd %mm0, 72(%edx)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
434
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
435 movd 92(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
436 pfsub 88(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
437 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
438 movd %mm0, 92(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
439 pfadd 92(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
440 pfadd 88(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
441 movq %mm0, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
442
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
443 pfadd 80(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
444 pfadd 84(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
445 movd %mm0, 80(%edx)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
446
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
447 movd 80(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
448 pfsub 84(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
449 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
450 pfadd %mm0, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
451 pfadd 92(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
452 movd %mm0, 84(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
453 movd %mm1, 88(%edx)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
454
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
455 movd 96(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
456 pfadd 100(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
457 movd %mm0, 96(%edx)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
458
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
459 movd 96(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
460 pfsub 100(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
461 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
462 movd %mm0, 100(%edx)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
463
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
464 movd 108(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
465 pfsub 104(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
466 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
467 movd %mm0, 108(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
468 pfadd 104(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
469 pfadd 108(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
470 movd %mm0, 104(%edx)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
471
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
472 movd 124(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
473 pfsub 120(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
474 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
475 movd %mm0, 124(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
476 pfadd 120(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
477 pfadd 124(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
478 movq %mm0, %mm1
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
479
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
480 pfadd 112(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
481 pfadd 116(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
482 movd %mm0, 112(%edx)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
483
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
484 movd 112(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
485 pfsub 116(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
486 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
487 pfadd %mm0,%mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
488 pfadd 124(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
489 movd %mm0, 116(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
490 movd %mm1, 120(%edx)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
491
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
492 jnz .L01
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
493
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
494 /* Phase 7*/
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
495 /* Code below is coded in scalar mode. Should be optimized */
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
496
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
497 movd (%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
498 pfadd 4(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
499 movd %mm0, 1024(%esi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
500
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
501 movd (%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
502 pfsub 4(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
503 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
504 movd %mm0, (%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
505 movd %mm0, (%edi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
506
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
507 movd 12(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
508 pfsub 8(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
509 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
510 movd %mm0, 512(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
511 pfadd 12(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
512 pfadd 8(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
513 movd %mm0, 512(%esi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
514
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
515 movd 16(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
516 pfsub 20(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
517 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
518 movq %mm0, %mm3
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
519
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
520 movd 28(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
521 pfsub 24(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
522 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
523 movd %mm0, 768(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
524 movq %mm0, %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
525
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
526 pfadd 24(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
527 pfadd 28(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
528 movq %mm0, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
529
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
530 pfadd 16(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
531 pfadd 20(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
532 movd %mm0, 768(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
533 pfadd %mm3, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
534 movd %mm1, 256(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
535 pfadd %mm3, %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
536 movd %mm2, 256(%edi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
537
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
538 /* Phase 8*/
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
539
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
540 movq 32(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
541 movq 48(%edx), %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
542 pfadd 48(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
543 pfadd 40(%edx), %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
544 movd %mm0, 896(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
545 movd %mm1, 640(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
546 psrlq $32, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
547 psrlq $32, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
548 movd %mm0, 128(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
549 movd %mm1, 384(%edi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
550
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
551 movd 40(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
552 pfadd 56(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
553 movd %mm0, 384(%esi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
554
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
555 movd 56(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
556 pfadd 36(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
557 movd %mm0, 128(%esi)
781
ee303142c2e0 improvements.
nickols_k
parents: 736
diff changeset
558
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
559 movd 60(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
560 movd %mm0, 896(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
561 pfadd 44(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
562 movd %mm0, 640(%edi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
563
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
564 movq 96(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
565 movq 112(%edx), %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
566 movq 104(%edx), %mm4
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
567 pfadd 112(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
568 pfadd 104(%edx), %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
569 pfadd 120(%edx), %mm4
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
570 movq %mm0, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
571 movq %mm2, %mm3
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
572 movq %mm4, %mm5
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
573 pfadd 64(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
574 pfadd 80(%edx), %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
575 pfadd 72(%edx), %mm4
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
576 movd %mm0, 960(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
577 movd %mm2, 704(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
578 movd %mm4, 448(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
579 psrlq $32, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
580 psrlq $32, %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
581 psrlq $32, %mm4
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
582 movd %mm0, 64(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
583 movd %mm2, 320(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
584 movd %mm4, 576(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
585 pfadd 80(%edx), %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
586 pfadd 72(%edx), %mm3
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
587 pfadd 88(%edx), %mm5
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
588 movd %mm1, 832(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
589 movd %mm3, 576(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
590 movd %mm5, 320(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
591 psrlq $32, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
592 psrlq $32, %mm3
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
593 psrlq $32, %mm5
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
594 movd %mm1, 192(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
595 movd %mm3, 448(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
596 movd %mm5, 704(%edi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
597
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
598 movd 120(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
599 pfadd 100(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
600 movq %mm0, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
601 pfadd 88(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
602 movd %mm0, 192(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
603 pfadd 68(%edx), %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
604 movd %mm1, 64(%esi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
605
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
606 movd 124(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
607 movd %mm0, 960(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
608 pfadd 92(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
609 movd %mm0, 832(%edi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
610
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
611 jmp .L_bye
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
612 .L01:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
613 /* Phase 9*/
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
614 movd (%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
615 pfadd 4(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
616 pf2id %mm0, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
617 movd %mm0, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
618 movw %ax, 512(%esi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
619
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
620 movd (%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
621 pfsub 4(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
622 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
623 pf2id %mm0, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
624 movd %mm0, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
625 movw %ax, (%esi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
626
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
627 movd 12(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
628 pfsub 8(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
629 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
630 pf2id %mm0, %mm7
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
631 movd %mm7, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
632 movw %ax, 256(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
633 pfadd 12(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
634 pfadd 8(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
635 pf2id %mm0, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
636 movd %mm0, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
637 movw %ax, 256(%esi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
638
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
639 movd 16(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
640 pfsub 20(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
641 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
642 movq %mm0, %mm3
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
643
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
644 movd 28(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
645 pfsub 24(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
646 pfmul 120(%ebx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
647 pf2id %mm0, %mm7
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
648 movd %mm7, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
649 movw %ax, 384(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
650 movq %mm0, %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
651
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
652 pfadd 24(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
653 pfadd 28(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
654 movq %mm0, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
655 pfadd 16(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
656 pfadd 20(%ecx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
657 pf2id %mm0, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
658 movd %mm0, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
659 movw %ax, 384(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
660 pfadd %mm3, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
661 pf2id %mm1, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
662 movd %mm1, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
663 movw %ax, 128(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
664 pfadd %mm3, %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
665 pf2id %mm2, %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
666 movd %mm2, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
667 movw %ax, 128(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
668
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
669
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
670 /* Phase 10*/
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
671
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
672 movq 32(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
673 movq 48(%edx), %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
674 pfadd 48(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
675 pfadd 40(%edx), %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
676 pf2id %mm0, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
677 pf2id %mm1, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
678 movd %mm0, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
679 movd %mm1, %ecx
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
680 movw %ax, 448(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
681 movw %cx, 320(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
682 psrlq $32, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
683 psrlq $32, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
684 movd %mm0, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
685 movd %mm1, %ecx
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
686 movw %ax, 64(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
687 movw %cx, 192(%edi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
688
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
689 movd 40(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
690 pfadd 56(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
691 pf2id %mm0, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
692 movd %mm0, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
693 movw %ax, 192(%esi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
694
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
695 movd 56(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
696 pfadd 36(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
697 pf2id %mm0, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
698 movd %mm0, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
699 movw %ax, 64(%esi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
700
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
701 movd 60(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
702 pf2id %mm0, %mm7
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
703 movd %mm7, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
704 movw %ax, 448(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
705 pfadd 44(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
706 pf2id %mm0, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
707 movd %mm0, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
708 movw %ax, 320(%edi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
709
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
710 movq 96(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
711 movq 112(%edx), %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
712 movq 104(%edx), %mm4
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
713 pfadd 112(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
714 pfadd 104(%edx), %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
715 pfadd 120(%edx), %mm4
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
716 movq %mm0, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
717 movq %mm2, %mm3
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
718 movq %mm4, %mm5
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
719 pfadd 64(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
720 pfadd 80(%edx), %mm2
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
721 pfadd 72(%edx), %mm4
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
722 pf2id %mm0, %mm7
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
723 pf2id %mm2, %mm6
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
724 pf2id %mm4, %mm4
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
725 movd %mm7, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
726 movd %mm6, %ecx
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
727 movd %mm4, %ebx
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
728 movw %ax, 480(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
729 movw %cx, 352(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
730 movw %bx, 224(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
731 psrlq $32, %mm7
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
732 psrlq $32, %mm6
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
733 psrlq $32, %mm4
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
734 movd %mm7, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
735 movd %mm6, %ecx
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
736 movd %mm4, %ebx
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
737 movw %ax, 32(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
738 movw %cx, 160(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
739 movw %bx, 288(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
740 pfadd 80(%edx), %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
741 pfadd 72(%edx), %mm3
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
742 pfadd 88(%edx), %mm5
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
743 pf2id %mm1, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
744 pf2id %mm3, %mm3
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
745 pf2id %mm5, %mm5
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
746 movd %mm1, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
747 movd %mm3, %ecx
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
748 movd %mm5, %ebx
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
749 movw %ax, 416(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
750 movw %cx, 288(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
751 movw %bx, 160(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
752 psrlq $32, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
753 psrlq $32, %mm3
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
754 psrlq $32, %mm5
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
755 movd %mm1, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
756 movd %mm3, %ecx
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
757 movd %mm5, %ebx
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
758 movw %ax, 96(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
759 movw %cx, 224(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
760 movw %bx, 352(%edi)
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
761
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
762 movd 120(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
763 pfadd 100(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
764 movq %mm0, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
765 pfadd 88(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
766 pf2id %mm0, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
767 movd %mm0, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
768 movw %ax, 96(%esi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
769 pfadd 68(%edx), %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
770 pf2id %mm1, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
771 movd %mm1, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
772 movw %ax, 32(%esi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
773
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
774 movq 124(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
775 pf2id %mm0, %mm1
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
776 movd %mm1, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
777 movw %ax, 480(%edi)
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
778 pfadd 92(%edx), %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
779 pf2id %mm0, %mm0
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
780 movd %mm0, %eax
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
781 movw %ax, 416(%edi)
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
782
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
783 movsw
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
784
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
785 .L_bye:
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
786 addl $256,%esp
1271
2864e32cd267 Finished 3dnow optimization (in scalar mode) and minor improvements
nick
parents: 1245
diff changeset
787 femms
1245
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
788 popl %edi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
789 popl %esi
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
790 popl %ebx
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
791 ret
03b7e2955a20 Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents: 1173
diff changeset
792
736
59b0a9ec8604 K7 3dnow-dsp support
nickols_k
parents:
diff changeset
793