comparison mp3lib/tabinit_MMX.s @ 1245:03b7e2955a20

Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
author nick
date Fri, 29 Jun 2001 17:55:35 +0000
parents
children
comparison
equal deleted inserted replaced
1244:a2c71bf9a7d3 1245:03b7e2955a20
1 # This code was taken from http://www.mpg123.org
2 # See ChangeLog of mpg123-0.59s-pre.1 for detail
3 # Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
4 .bss
5 .align 8
6 .comm decwin,2176,32
7 .align 8
8 .comm decwins,2176,32
9 .data
10 .align 8
11 intwinbase_MMX:
12 .value 0, -1, -1, -1, -1, -1, -1, -2
13 .value -2, -2, -2, -3, -3, -4, -4, -5
14 .value -5, -6, -7, -7, -8, -9, -10, -11
15 .value -13, -14, -16, -17, -19, -21, -24, -26
16 .value -29, -31, -35, -38, -41, -45, -49, -53
17 .value -58, -63, -68, -73, -79, -85, -91, -97
18 .value -104, -111, -117, -125, -132, -139, -147, -154
19 .value -161, -169, -176, -183, -190, -196, -202, -208
20 .value -213, -218, -222, -225, -227, -228, -228, -227
21 .value -224, -221, -215, -208, -200, -189, -177, -163
22 .value -146, -127, -106, -83, -57, -29, 2, 36
23 .value 72, 111, 153, 197, 244, 294, 347, 401
24 .value 459, 519, 581, 645, 711, 779, 848, 919
25 .value 991, 1064, 1137, 1210, 1283, 1356, 1428, 1498
26 .value 1567, 1634, 1698, 1759, 1817, 1870, 1919, 1962
27 .value 2001, 2032, 2057, 2075, 2085, 2087, 2080, 2063
28 .value 2037, 2000, 1952, 1893, 1822, 1739, 1644, 1535
29 .value 1414, 1280, 1131, 970, 794, 605, 402, 185
30 .value -45, -288, -545, -814, -1095, -1388, -1692, -2006
31 .value -2330, -2663, -3004, -3351, -3705, -4063, -4425, -4788
32 .value -5153, -5517, -5879, -6237, -6589, -6935, -7271, -7597
33 .value -7910, -8209, -8491, -8755, -8998, -9219, -9416, -9585
34 .value -9727, -9838, -9916, -9959, -9966, -9935, -9863, -9750
35 .value -9592, -9389, -9139, -8840, -8492, -8092, -7640, -7134
36 .value -6574, -5959, -5288, -4561, -3776, -2935, -2037, -1082
37 .value -70, 998, 2122, 3300, 4533, 5818, 7154, 8540
38 .value 9975, 11455, 12980, 14548, 16155, 17799, 19478, 21189
39 .value 22929, 24694, 26482, 28289, 30112, 31947,-26209,-24360
40 .value -22511,-20664,-18824,-16994,-15179,-13383,-11610, -9863
41 .value -8147, -6466, -4822, -3222, -1667, -162, 1289, 2684
42 .value 4019, 5290, 6494, 7629, 8692, 9679, 10590, 11420
43 .value 12169, 12835, 13415, 13908, 14313, 14630, 14856, 14992
44 .value 15038
45
46 intwindiv:
47 .long 0x47800000 # 65536.0
48 .text
49 .align 32
50 .globl make_decode_tables_MMX
51 make_decode_tables_MMX:
52 pushl %edi
53 pushl %esi
54 pushl %ebx
55
56 xorl %ecx,%ecx
57 xorl %ebx,%ebx
58 movl $32,%esi
59 movl $intwinbase_MMX,%edi
60 negl 16(%esp) # scaleval
61 pushl $2 # intwinbase step
62 .L00:
63 cmpl $528,%ecx
64 jnc .L02
65 movswl (%edi),%eax
66 cmpl $intwinbase_MMX+444,%edi
67 jc .L01
68 addl $60000,%eax
69 .L01:
70 pushl %eax
71 fildl (%esp)
72 fdivs intwindiv
73 fimull 24(%esp)
74 popl %eax
75 fsts decwin(,%ecx,4)
76 fstps decwin+64(,%ecx,4)
77 .L02:
78 leal -1(%esi),%edx
79 and %ebx,%edx
80 cmp $31,%edx
81 jnz .L03
82 addl $-1023,%ecx
83 test %esi,%ebx
84 jz .L03
85 negl 20(%esp)
86 .L03:
87 addl %esi,%ecx
88 addl (%esp),%edi
89 incl %ebx
90 cmpl $intwinbase_MMX,%edi
91 jz .L04
92 cmp $256,%ebx
93 jnz .L00
94 negl (%esp)
95 jmp .L00
96 .L04:
97 popl %eax
98
99 xorl %ecx,%ecx
100 xorl %ebx,%ebx
101 pushl $2
102 .L05:
103 cmpl $528,%ecx
104 jnc .L11
105 movswl (%edi),%eax
106 cmpl $intwinbase_MMX+444,%edi
107 jc .L06
108 addl $60000,%eax
109 .L06:
110 cltd
111 imull 20(%esp)
112 shrdl $17,%edx,%eax
113 cmpl $32767,%eax
114 movl $1055,%edx
115 jle .L07
116 movl $32767,%eax
117 jmp .L08
118 .L07:
119 cmpl $-32767,%eax
120 jge .L08
121 movl $-32767,%eax
122 .L08:
123 cmpl $512,%ecx
124 jnc .L09
125 subl %ecx,%edx
126 movw %ax,decwins(,%edx,2)
127 movw %ax,decwins-32(,%edx,2)
128 .L09:
129 testl $1,%ecx
130 jnz .L10
131 negl %eax
132 .L10:
133 movw %ax,decwins(,%ecx,2)
134 movw %ax,decwins+32(,%ecx,2)
135 .L11:
136 leal -1(%esi),%edx
137 and %ebx,%edx
138 cmp $31,%edx
139 jnz .L12
140 addl $-1023,%ecx
141 test %esi,%ebx
142 jz .L12
143 negl 20(%esp)
144 .L12:
145 addl %esi,%ecx
146 addl (%esp),%edi
147 incl %ebx
148 cmpl $intwinbase_MMX,%edi
149 jz .L13
150 cmp $256,%ebx
151 jnz .L05
152 negl (%esp)
153 jmp .L05
154 .L13:
155 popl %eax
156
157 popl %ebx
158 popl %esi
159 popl %edi
160 ret
161