Mercurial > mplayer.hg
annotate mp3lib/d_cpu.s @ 2316:bcb229557e9b
fixed alignment (static variables where sometimes not 8-byte aligned)
added half uv interpolation support
added prefetch
BGR15 support in MMX (untested) (so BGR15,16,24,32 are supported)
special unscaled height version (not much faster but it doesnt interpolate uv vertically)
author | michael |
---|---|
date | Sat, 20 Oct 2001 21:12:09 +0000 |
parents | 92b0270f8f8a |
children |
rev | line source |
---|---|
1 | 1 |
2 / --------------------------------------------------------------------------- | |
734 | 3 / Cpu function detect by Pontscho/fresh!mindworkz (c) 2000 - 2000 |
4 / 3dnow-dsp detection by Nick Kurshev (C) 2001 | |
1 | 5 / --------------------------------------------------------------------------- |
6 | |
7 .text | |
8 | |
9 .globl CpuDetect | |
10 .globl ipentium | |
11 .globl a3dnow | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
12 .globl isse |
1 | 13 |
14 / --------------------------------------------------------------------------- | |
15 / in C: unsigned long CpuDetect( void ); | |
16 / return: cpu ident number. | |
17 / --------------------------------------------------------------------------- | |
18 CpuDetect: | |
19 pushl %ebx | |
20 pushl %ecx | |
21 pushl %edx | |
22 | |
738 | 23 pushfl |
24 popl %eax | |
25 movl %eax,%ebx | |
26 xorl $0x00200000,%eax | |
27 pushl %eax | |
28 popfl | |
29 pushfl | |
30 popl %eax | |
31 cmpl %eax,%ebx | |
32 jz no_cpuid_cpudetect | |
33 | |
34 movl $1,%eax | |
1 | 35 cpuid |
738 | 36 |
37 jmp exit_cpudetect | |
38 no_cpuid_cpudetect: | |
39 xorl %eax,%eax | |
40 exit_cpudetect: | |
1 | 41 |
42 popl %edx | |
43 popl %ecx | |
44 popl %ebx | |
45 ret | |
46 | |
47 / --------------------------------------------------------------------------- | |
48 / in C: unsigled long ipentium( void ); | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
49 / return: 0 if this processor i386 or i486 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
50 / 1 otherwise |
1253 | 51 / 3 if this cpu supports mmx |
1 | 52 / --------------------------------------------------------------------------- |
53 ipentium: | |
54 pushl %ebx | |
55 pushl %ecx | |
56 pushl %edx | |
57 pushfl | |
58 popl %eax | |
59 movl %eax,%ebx | |
60 xorl $0x00200000,%eax | |
61 pushl %eax | |
62 popfl | |
63 pushfl | |
64 popl %eax | |
65 cmpl %eax,%ebx | |
66 jz no_cpuid | |
67 movl $1,%eax | |
68 cpuid | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
69 movl %eax, %ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
70 xorl %eax, %eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
71 shrl $8,%ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
72 cmpl $5,%ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
73 jb exit |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
74 incl %eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
75 test $0x00800000, %edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
76 jz exit |
1253 | 77 orl $2, %eax |
1 | 78 jmp exit |
79 no_cpuid: | |
80 xorl %eax,%eax | |
81 exit: | |
82 popl %edx | |
83 popl %ecx | |
84 popl %ebx | |
85 ret | |
86 | |
87 / --------------------------------------------------------------------------- | |
88 / in C: unsigned long a3dnow( void ); | |
734 | 89 / return: 0 if this processor does not support 3dnow! |
90 / 1 otherwise | |
1253 | 91 / 3 if this cpu supports 3dnow-dsp extension |
1 | 92 / --------------------------------------------------------------------------- |
93 a3dnow: | |
94 pushl %ebx | |
95 pushl %edx | |
96 pushl %ecx | |
97 | |
98 | |
99 call ipentium | |
734 | 100 testl %eax,%eax |
101 jz exit2 | |
1 | 102 |
103 movl $0x80000000,%eax | |
104 cpuid | |
105 cmpl $0x80000000,%eax | |
1894
92b0270f8f8a
Applied cpu detection fix by Kjetil Torgrim Homme <kjetilho at linpro dot no>.
atmos4
parents:
1253
diff
changeset
|
106 jbe no3dnow |
1 | 107 movl $0x80000001,%eax |
108 cpuid | |
734 | 109 xorl %eax,%eax |
1 | 110 testl $0x80000000,%edx |
1894
92b0270f8f8a
Applied cpu detection fix by Kjetil Torgrim Homme <kjetilho at linpro dot no>.
atmos4
parents:
1253
diff
changeset
|
111 jz no3dnow |
734 | 112 /// eax=1 - K6 3DNow! |
113 inc %eax | |
114 testl $0x40000000,%edx | |
115 jz exit2 | |
116 /// eax=2 - K7 3DNowEx! | |
1253 | 117 orl $2, %eax |
1894
92b0270f8f8a
Applied cpu detection fix by Kjetil Torgrim Homme <kjetilho at linpro dot no>.
atmos4
parents:
1253
diff
changeset
|
118 jmp exit2 |
92b0270f8f8a
Applied cpu detection fix by Kjetil Torgrim Homme <kjetilho at linpro dot no>.
atmos4
parents:
1253
diff
changeset
|
119 no3dnow: |
92b0270f8f8a
Applied cpu detection fix by Kjetil Torgrim Homme <kjetilho at linpro dot no>.
atmos4
parents:
1253
diff
changeset
|
120 xorl %eax,%eax |
1 | 121 exit2: |
122 | |
123 popl %ecx | |
124 popl %edx | |
125 popl %ebx | |
126 ret | |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
127 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
128 / --------------------------------------------------------------------------- |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
129 / in C: unsigned long isse( void ); |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
130 / return: 0 if this processor does not support sse |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
131 / 1 otherwise |
1253 | 132 / 3 if this cpu supports sse2 extension |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
133 / --------------------------------------------------------------------------- |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
134 isse: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
135 pushl %ebx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
136 pushl %edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
137 pushl %ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
138 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
139 call ipentium |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
140 testl %eax,%eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
141 jz exit3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
142 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
143 movl $1,%eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
144 cpuid |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
145 xorl %eax, %eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
146 testl $0x02000000,%edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
147 jz exit3 |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
148 incl %eax |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
149 testl $0x04000000,%edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
150 jz exit3 |
1253 | 151 orl $2, %eax |
1245
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
152 exit3: |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
153 popl %ecx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
154 popl %edx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
155 popl %ebx |
03b7e2955a20
Added newest MMX-optimized decore which speedups decoding at least on 13% for any cpu.
nick
parents:
739
diff
changeset
|
156 ret |