Mercurial > mplayer.hg
annotate libmpeg2/cpu_accel.c @ 25648:2438052a176e
Another small simplification. Slightly worse performance in the case
where a buffer underrun happens, but this really should not matter.
author | reimar |
---|---|
date | Fri, 11 Jan 2008 20:36:33 +0000 |
parents | 60a39d71e247 |
children | 11181df06389 |
rev | line source |
---|---|
9857 | 1 /* |
2 * cpu_accel.c | |
10303 | 3 * Copyright (C) 2000-2003 Michel Lespinasse <walken@zoy.org> |
9857 | 4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
5 * | |
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
7 * See http://libmpeg2.sourceforge.net/ for updates. | |
8 * | |
9 * mpeg2dec is free software; you can redistribute it and/or modify | |
10 * it under the terms of the GNU General Public License as published by | |
11 * the Free Software Foundation; either version 2 of the License, or | |
12 * (at your option) any later version. | |
13 * | |
14 * mpeg2dec is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 * GNU General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU General Public License | |
20 * along with this program; if not, write to the Free Software | |
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
14732
1385ec491ffb
Mark locally modified files as such to comply more closely with GPL 2a.
diego
parents:
13864
diff
changeset
|
22 * |
21526 | 23 * Modified for use with MPlayer, see libmpeg-0.4.1.diff for the exact changes. |
18783 | 24 * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/ |
14732
1385ec491ffb
Mark locally modified files as such to comply more closely with GPL 2a.
diego
parents:
13864
diff
changeset
|
25 * $Id$ |
9857 | 26 */ |
27 | |
28 #include "config.h" | |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
29 #include "cpudetect.h" |
9857 | 30 |
31 #include <inttypes.h> | |
32 | |
33 #include "mpeg2.h" | |
12932 | 34 #include "attributes.h" |
35 #include "mpeg2_internal.h" | |
9857 | 36 |
37 #ifdef ACCEL_DETECT | |
13864 | 38 #if defined(ARCH_X86) || defined(ARCH_X86_64) |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
39 |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
40 /* MPlayer imports libmpeg2 as decoder, which detects MMX / 3DNow! |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
41 * instructions via assembly. However, it is regarded as duplicaed work |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
42 * in MPlayer, so that we enforce to use MPlayer's implementation. |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
43 */ |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
44 #define USE_MPLAYER_CPUDETECT |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
45 |
9857 | 46 static inline uint32_t arch_accel (void) |
47 { | |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
48 #if !defined(USE_MPLAYER_CPUDETECT) |
9857 | 49 uint32_t eax, ebx, ecx, edx; |
50 int AMD; | |
51 uint32_t caps; | |
52 | |
21526 | 53 #if defined(__x86_64__) || (!defined(PIC) && !defined(__PIC__)) |
9857 | 54 #define cpuid(op,eax,ebx,ecx,edx) \ |
55 __asm__ ("cpuid" \ | |
56 : "=a" (eax), \ | |
57 "=b" (ebx), \ | |
58 "=c" (ecx), \ | |
59 "=d" (edx) \ | |
60 : "a" (op) \ | |
61 : "cc") | |
21526 | 62 #else /* PIC version : save ebx (not needed on x86_64) */ |
9857 | 63 #define cpuid(op,eax,ebx,ecx,edx) \ |
21526 | 64 __asm__ ("pushl %%ebx\n\t" \ |
9857 | 65 "cpuid\n\t" \ |
66 "movl %%ebx,%1\n\t" \ | |
21526 | 67 "popl %%ebx" \ |
9857 | 68 : "=a" (eax), \ |
69 "=r" (ebx), \ | |
70 "=c" (ecx), \ | |
71 "=d" (edx) \ | |
72 : "a" (op) \ | |
73 : "cc") | |
74 #endif | |
75 | |
21526 | 76 #ifndef __x86_64__ /* x86_64 supports the cpuid op */ |
9857 | 77 __asm__ ("pushf\n\t" |
78 "pushf\n\t" | |
79 "pop %0\n\t" | |
80 "movl %0,%1\n\t" | |
81 "xorl $0x200000,%0\n\t" | |
82 "push %0\n\t" | |
83 "popf\n\t" | |
84 "pushf\n\t" | |
85 "pop %0\n\t" | |
86 "popf" | |
87 : "=r" (eax), | |
88 "=r" (ebx) | |
89 : | |
90 : "cc"); | |
91 | |
92 if (eax == ebx) /* no cpuid */ | |
93 return 0; | |
21526 | 94 #endif |
9857 | 95 |
96 cpuid (0x00000000, eax, ebx, ecx, edx); | |
97 if (!eax) /* vendor string only */ | |
98 return 0; | |
99 | |
100 AMD = (ebx == 0x68747541) && (ecx == 0x444d4163) && (edx == 0x69746e65); | |
101 | |
102 cpuid (0x00000001, eax, ebx, ecx, edx); | |
103 if (! (edx & 0x00800000)) /* no MMX */ | |
104 return 0; | |
105 | |
106 caps = MPEG2_ACCEL_X86_MMX; | |
107 if (edx & 0x02000000) /* SSE - identical to AMD MMX extensions */ | |
108 caps = MPEG2_ACCEL_X86_MMX | MPEG2_ACCEL_X86_MMXEXT; | |
109 | |
110 cpuid (0x80000000, eax, ebx, ecx, edx); | |
111 if (eax < 0x80000001) /* no extended capabilities */ | |
112 return caps; | |
113 | |
114 cpuid (0x80000001, eax, ebx, ecx, edx); | |
115 | |
116 if (edx & 0x80000000) | |
117 caps |= MPEG2_ACCEL_X86_3DNOW; | |
118 | |
119 if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */ | |
120 caps |= MPEG2_ACCEL_X86_MMXEXT; | |
121 | |
122 return caps; | |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
123 #else /* USE_MPLAYER_CPUDETECT: Use MPlayer's cpu capability property */ |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
124 caps = 0; |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
125 if (gCpuCaps.hasMMX) |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
126 caps |= MPEG2_ACCEL_X86_MMX; |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
127 if (gCpuCaps.hasSSE2) |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
128 caps |= MPEG2_ACCEL_X86_SSE2; |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
129 if (gCpuCaps.hasMMX2) |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
130 caps |= MPEG2_ACCEL_X86_MMXEXT; |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
131 if (gCpuCaps.has3DNow) |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
132 caps |= MPEG2_ACCEL_X86_3DNOW; |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
133 |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
134 return caps; |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
135 |
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
136 #endif /* USE_MPLAYER_CPUDETECT */ |
9857 | 137 } |
13864 | 138 #endif /* ARCH_X86 || ARCH_X86_64 */ |
9857 | 139 |
13018
adb93ef6b07f
Improved SPARC CPU detection and SPARC compilation fixes.
diego
parents:
12932
diff
changeset
|
140 #if defined(ARCH_PPC) || (defined(ARCH_SPARC) && defined(HAVE_VIS)) |
9857 | 141 #include <signal.h> |
142 #include <setjmp.h> | |
143 | |
144 static sigjmp_buf jmpbuf; | |
145 static volatile sig_atomic_t canjump = 0; | |
146 | |
147 static RETSIGTYPE sigill_handler (int sig) | |
148 { | |
149 if (!canjump) { | |
150 signal (sig, SIG_DFL); | |
151 raise (sig); | |
152 } | |
153 | |
154 canjump = 0; | |
155 siglongjmp (jmpbuf, 1); | |
156 } | |
157 | |
12932 | 158 #ifdef ARCH_PPC |
21526 | 159 static uint32_t arch_accel (void) |
9857 | 160 { |
10303 | 161 static RETSIGTYPE (* oldsig) (int); |
162 | |
163 oldsig = signal (SIGILL, sigill_handler); | |
9857 | 164 if (sigsetjmp (jmpbuf, 1)) { |
10303 | 165 signal (SIGILL, oldsig); |
9857 | 166 return 0; |
167 } | |
168 | |
169 canjump = 1; | |
170 | |
15483 | 171 #if defined( __APPLE_CC__ ) && defined( __APPLE_ALTIVEC__ ) /* apple */ |
172 #define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t" | |
173 #else /* gnu */ | |
10303 | 174 #define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t" |
175 #endif | |
9857 | 176 asm volatile ("mtspr 256, %0\n\t" |
10303 | 177 VAND (0, 0, 0) |
9857 | 178 : |
179 : "r" (-1)); | |
180 | |
12932 | 181 canjump = 0; |
182 | |
10303 | 183 signal (SIGILL, oldsig); |
9857 | 184 return MPEG2_ACCEL_PPC_ALTIVEC; |
185 } | |
186 #endif /* ARCH_PPC */ | |
187 | |
12932 | 188 #ifdef ARCH_SPARC |
21526 | 189 static uint32_t arch_accel (void) |
12932 | 190 { |
191 static RETSIGTYPE (* oldsig) (int); | |
192 | |
193 oldsig = signal (SIGILL, sigill_handler); | |
194 if (sigsetjmp (jmpbuf, 1)) { | |
195 signal (SIGILL, oldsig); | |
196 return 0; | |
197 } | |
198 | |
199 canjump = 1; | |
200 | |
201 /* pdist %f0, %f0, %f0 */ | |
202 __asm__ __volatile__(".word\t0x81b007c0"); | |
203 | |
204 canjump = 0; | |
205 | |
206 if (sigsetjmp (jmpbuf, 1)) { | |
207 signal (SIGILL, oldsig); | |
208 return MPEG2_ACCEL_SPARC_VIS; | |
209 } | |
210 | |
211 canjump = 1; | |
212 | |
213 /* edge8n %g0, %g0, %g0 */ | |
214 __asm__ __volatile__(".word\t0x81b00020"); | |
215 | |
216 canjump = 0; | |
217 | |
218 signal (SIGILL, oldsig); | |
219 return MPEG2_ACCEL_SPARC_VIS | MPEG2_ACCEL_SPARC_VIS2; | |
220 } | |
221 #endif /* ARCH_SPARC */ | |
222 #endif /* ARCH_PPC || ARCH_SPARC */ | |
223 | |
9857 | 224 #ifdef ARCH_ALPHA |
21526 | 225 static uint32_t arch_accel (void) |
9857 | 226 { |
10488 | 227 #ifdef CAN_COMPILE_ALPHA_MVI |
9857 | 228 uint64_t no_mvi; |
229 | |
230 asm volatile ("amask %1, %0" | |
231 : "=r" (no_mvi) | |
232 : "rI" (256)); /* AMASK_MVI */ | |
233 return no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA | | |
234 MPEG2_ACCEL_ALPHA_MVI); | |
10488 | 235 #else |
236 return MPEG2_ACCEL_ALPHA; | |
237 #endif | |
9857 | 238 } |
239 #endif /* ARCH_ALPHA */ | |
12932 | 240 #endif /* ACCEL_DETECT */ |
9857 | 241 |
242 uint32_t mpeg2_detect_accel (void) | |
243 { | |
244 uint32_t accel; | |
245 | |
246 accel = 0; | |
247 #ifdef ACCEL_DETECT | |
13864 | 248 #if defined (ARCH_X86) || defined (ARCH_X86_64) || defined (ARCH_PPC) || defined (ARCH_ALPHA) || defined (ARCH_SPARC) |
12932 | 249 accel = arch_accel (); |
9857 | 250 #endif |
251 #endif | |
252 return accel; | |
253 } |