Mercurial > mplayer.hg
annotate libmpeg2/cpu_accel.c @ 36112:e2e188ede1d1
Use better default for YUV mode autodetection.
The lookup method is a lot faster, and if gamma is not used
we use the same code as before that does not handle gamma at all.
author | reimar |
---|---|
date | Wed, 01 May 2013 19:06:51 +0000 |
parents | e83eef58b30a |
children |
rev | line source |
---|---|
9857 | 1 /* |
2 * cpu_accel.c | |
27572 | 3 * Copyright (C) 2000-2004 Michel Lespinasse <walken@zoy.org> |
9857 | 4 * Copyright (C) 1999-2000 Aaron Holtzman <aholtzma@ess.engr.uvic.ca> |
5 * | |
6 * This file is part of mpeg2dec, a free MPEG-2 video stream decoder. | |
7 * See http://libmpeg2.sourceforge.net/ for updates. | |
8 * | |
9 * mpeg2dec is free software; you can redistribute it and/or modify | |
10 * it under the terms of the GNU General Public License as published by | |
11 * the Free Software Foundation; either version 2 of the License, or | |
12 * (at your option) any later version. | |
13 * | |
14 * mpeg2dec is distributed in the hope that it will be useful, | |
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of | |
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
17 * GNU General Public License for more details. | |
18 * | |
19 * You should have received a copy of the GNU General Public License | |
20 * along with this program; if not, write to the Free Software | |
21 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA | |
14732
1385ec491ffb
Mark locally modified files as such to comply more closely with GPL 2a.
diego
parents:
13864
diff
changeset
|
22 * |
27571
fd18fa10de53
libmpeg-0.4.1.diff was renamed to libmpeg2_changes.diff.
diego
parents:
27340
diff
changeset
|
23 * Modified for use with MPlayer, see libmpeg2_changes.diff for the exact changes. |
18783 | 24 * detailed changelog at http://svn.mplayerhq.hu/mplayer/trunk/ |
14732
1385ec491ffb
Mark locally modified files as such to comply more closely with GPL 2a.
diego
parents:
13864
diff
changeset
|
25 * $Id$ |
9857 | 26 */ |
27 | |
28 #include "config.h" | |
29 | |
30 #include <inttypes.h> | |
31 | |
32 #include "mpeg2.h" | |
12932 | 33 #include "attributes.h" |
34 #include "mpeg2_internal.h" | |
9857 | 35 |
27572 | 36 #include "cpudetect.h" |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
37 |
28290 | 38 #if ARCH_X86 || ARCH_X86_64 |
27572 | 39 static inline uint32_t arch_accel (uint32_t accel) |
40 { | |
41 /* Use MPlayer CPU detection instead of libmpeg2 variant. */ | |
42 #if 0 | |
43 if (accel & (MPEG2_ACCEL_X86_3DNOW | MPEG2_ACCEL_X86_MMXEXT)) | |
44 accel |= MPEG2_ACCEL_X86_MMX; | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
28290
diff
changeset
|
45 |
27572 | 46 if (accel & (MPEG2_ACCEL_X86_SSE2 | MPEG2_ACCEL_X86_SSE3)) |
47 accel |= MPEG2_ACCEL_X86_MMXEXT; | |
29264
e83eef58b30a
Remove all kind of trailing whitespaces from all MPlayer's files.
bircoph
parents:
28290
diff
changeset
|
48 |
27572 | 49 if (accel & (MPEG2_ACCEL_X86_SSE3)) |
50 accel |= MPEG2_ACCEL_X86_SSE2; | |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
51 |
27572 | 52 #ifdef ACCEL_DETECT |
53 if (accel & MPEG2_ACCEL_DETECT) { | |
54 uint32_t eax, ebx, ecx, edx; | |
55 int AMD; | |
9857 | 56 |
21526 | 57 #if defined(__x86_64__) || (!defined(PIC) && !defined(__PIC__)) |
9857 | 58 #define cpuid(op,eax,ebx,ecx,edx) \ |
59 __asm__ ("cpuid" \ | |
60 : "=a" (eax), \ | |
61 "=b" (ebx), \ | |
62 "=c" (ecx), \ | |
63 "=d" (edx) \ | |
64 : "a" (op) \ | |
65 : "cc") | |
27572 | 66 #else /* PIC version : save ebx (not needed on x86_64) */ |
9857 | 67 #define cpuid(op,eax,ebx,ecx,edx) \ |
21526 | 68 __asm__ ("pushl %%ebx\n\t" \ |
9857 | 69 "cpuid\n\t" \ |
70 "movl %%ebx,%1\n\t" \ | |
21526 | 71 "popl %%ebx" \ |
9857 | 72 : "=a" (eax), \ |
73 "=r" (ebx), \ | |
74 "=c" (ecx), \ | |
75 "=d" (edx) \ | |
76 : "a" (op) \ | |
77 : "cc") | |
78 #endif | |
79 | |
21526 | 80 #ifndef __x86_64__ /* x86_64 supports the cpuid op */ |
27572 | 81 __asm__ ("pushf\n\t" |
82 "pushf\n\t" | |
83 "pop %0\n\t" | |
84 "movl %0,%1\n\t" | |
85 "xorl $0x200000,%0\n\t" | |
86 "push %0\n\t" | |
87 "popf\n\t" | |
88 "pushf\n\t" | |
89 "pop %0\n\t" | |
90 "popf" | |
91 : "=r" (eax), | |
92 "=r" (ebx) | |
93 : | |
94 : "cc"); | |
9857 | 95 |
27572 | 96 if (eax == ebx) /* no cpuid */ |
97 return accel; | |
21526 | 98 #endif |
9857 | 99 |
27572 | 100 cpuid (0x00000000, eax, ebx, ecx, edx); |
101 if (!eax) /* vendor string only */ | |
102 return accel; | |
9857 | 103 |
27572 | 104 AMD = (ebx == 0x68747541 && ecx == 0x444d4163 && edx == 0x69746e65); |
9857 | 105 |
27572 | 106 cpuid (0x00000001, eax, ebx, ecx, edx); |
107 if (! (edx & 0x00800000)) /* no MMX */ | |
108 return accel; | |
9857 | 109 |
27572 | 110 accel |= MPEG2_ACCEL_X86_MMX; |
111 if (edx & 0x02000000) /* SSE - identical to AMD MMX ext. */ | |
112 accel |= MPEG2_ACCEL_X86_MMXEXT; | |
113 | |
114 if (edx & 0x04000000) /* SSE2 */ | |
115 accel |= MPEG2_ACCEL_X86_SSE2; | |
9857 | 116 |
27572 | 117 if (ecx & 0x00000001) /* SSE3 */ |
118 accel |= MPEG2_ACCEL_X86_SSE3; | |
9857 | 119 |
27572 | 120 cpuid (0x80000000, eax, ebx, ecx, edx); |
121 if (eax < 0x80000001) /* no extended capabilities */ | |
122 return accel; | |
9857 | 123 |
27572 | 124 cpuid (0x80000001, eax, ebx, ecx, edx); |
125 | |
126 if (edx & 0x80000000) | |
127 accel |= MPEG2_ACCEL_X86_3DNOW; | |
9857 | 128 |
27572 | 129 if (AMD && (edx & 0x00400000)) /* AMD MMX extensions */ |
130 accel |= MPEG2_ACCEL_X86_MMXEXT; | |
131 } | |
132 #endif /* ACCEL_DETECT */ | |
9857 | 133 |
27572 | 134 return accel; |
135 | |
136 #else /* 0 */ | |
137 accel = 0; | |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
138 if (gCpuCaps.hasMMX) |
27572 | 139 accel |= MPEG2_ACCEL_X86_MMX; |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
140 if (gCpuCaps.hasSSE2) |
27572 | 141 accel |= MPEG2_ACCEL_X86_SSE2; |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
142 if (gCpuCaps.hasMMX2) |
27572 | 143 accel |= MPEG2_ACCEL_X86_MMXEXT; |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
144 if (gCpuCaps.has3DNow) |
27572 | 145 accel |= MPEG2_ACCEL_X86_3DNOW; |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
146 |
27572 | 147 return accel; |
18727
24b2d27f2407
Use MPlayer's CPU detection module instead of libmpeg2's,
gpoirier
parents:
15483
diff
changeset
|
148 |
27572 | 149 #endif /* 0 */ |
9857 | 150 } |
13864 | 151 #endif /* ARCH_X86 || ARCH_X86_64 */ |
9857 | 152 |
28290 | 153 #if defined(ACCEL_DETECT) && (ARCH_PPC || ARCH_SPARC) |
9857 | 154 #include <signal.h> |
155 #include <setjmp.h> | |
156 | |
157 static sigjmp_buf jmpbuf; | |
158 static volatile sig_atomic_t canjump = 0; | |
159 | |
160 static RETSIGTYPE sigill_handler (int sig) | |
161 { | |
162 if (!canjump) { | |
163 signal (sig, SIG_DFL); | |
164 raise (sig); | |
165 } | |
166 | |
167 canjump = 0; | |
168 siglongjmp (jmpbuf, 1); | |
169 } | |
27572 | 170 #endif /* ACCEL_DETECT && (ARCH_PPC || ARCH_SPARC) */ |
9857 | 171 |
28290 | 172 #if ARCH_PPC |
27572 | 173 static uint32_t arch_accel (uint32_t accel) |
9857 | 174 { |
27572 | 175 #ifdef ACCEL_DETECT |
176 if ((accel & (MPEG2_ACCEL_PPC_ALTIVEC | MPEG2_ACCEL_DETECT)) == | |
177 MPEG2_ACCEL_DETECT) { | |
178 static RETSIGTYPE (* oldsig) (int); | |
10303 | 179 |
27572 | 180 oldsig = signal (SIGILL, sigill_handler); |
181 if (sigsetjmp (jmpbuf, 1)) { | |
182 signal (SIGILL, oldsig); | |
183 return accel; | |
184 } | |
9857 | 185 |
27572 | 186 canjump = 1; |
9857 | 187 |
25980
11181df06389
Do not check for __APPLE_ALTIVEC__, just check for __APPLE_CC__.
diego
parents:
21526
diff
changeset
|
188 #if defined(__APPLE_CC__) /* apple */ |
15483 | 189 #define VAND(a,b,c) "vand v" #a ",v" #b ",v" #c "\n\t" |
27572 | 190 #else /* gnu */ |
10303 | 191 #define VAND(a,b,c) "vand " #a "," #b "," #c "\n\t" |
192 #endif | |
27572 | 193 asm volatile ("mtspr 256, %0\n\t" |
194 VAND (0, 0, 0) | |
195 : | |
196 : "r" (-1)); | |
9857 | 197 |
27572 | 198 canjump = 0; |
199 accel |= MPEG2_ACCEL_PPC_ALTIVEC; | |
12932 | 200 |
27572 | 201 signal (SIGILL, oldsig); |
202 } | |
203 #endif /* ACCEL_DETECT */ | |
204 | |
205 return accel; | |
9857 | 206 } |
207 #endif /* ARCH_PPC */ | |
208 | |
28290 | 209 #if ARCH_SPARC |
27572 | 210 static uint32_t arch_accel (uint32_t accel) |
12932 | 211 { |
27572 | 212 if (accel & MPEG2_ACCEL_SPARC_VIS2) |
213 accel |= MPEG2_ACCEL_SPARC_VIS; | |
214 | |
215 #ifdef ACCEL_DETECT | |
216 if ((accel & (MPEG2_ACCEL_SPARC_VIS2 | MPEG2_ACCEL_DETECT)) == | |
217 MPEG2_ACCEL_DETECT) { | |
218 static RETSIGTYPE (* oldsig) (int); | |
12932 | 219 |
27572 | 220 oldsig = signal (SIGILL, sigill_handler); |
221 if (sigsetjmp (jmpbuf, 1)) { | |
222 signal (SIGILL, oldsig); | |
223 return accel; | |
224 } | |
12932 | 225 |
27572 | 226 canjump = 1; |
12932 | 227 |
27572 | 228 /* pdist %f0, %f0, %f0 */ |
229 __asm__ __volatile__(".word\t0x81b007c0"); | |
12932 | 230 |
27572 | 231 canjump = 0; |
232 accel |= MPEG2_ACCEL_SPARC_VIS; | |
12932 | 233 |
27572 | 234 if (sigsetjmp (jmpbuf, 1)) { |
235 signal (SIGILL, oldsig); | |
236 return accel; | |
237 } | |
238 | |
239 canjump = 1; | |
12932 | 240 |
27572 | 241 /* edge8n %g0, %g0, %g0 */ |
242 __asm__ __volatile__(".word\t0x81b00020"); | |
12932 | 243 |
27572 | 244 canjump = 0; |
245 accel |= MPEG2_ACCEL_SPARC_VIS2; | |
12932 | 246 |
27572 | 247 signal (SIGILL, oldsig); |
248 } | |
249 #endif /* ACCEL_DETECT */ | |
12932 | 250 |
27572 | 251 return accel; |
12932 | 252 } |
253 #endif /* ARCH_SPARC */ | |
254 | |
28290 | 255 #if ARCH_ALPHA |
27572 | 256 static inline uint32_t arch_accel (uint32_t accel) |
9857 | 257 { |
27572 | 258 if (accel & MPEG2_ACCEL_ALPHA_MVI) |
259 accel |= MPEG2_ACCEL_ALPHA; | |
260 | |
261 #ifdef ACCEL_DETECT | |
262 if (accel & MPEG2_ACCEL_DETECT) { | |
263 uint64_t no_mvi; | |
9857 | 264 |
27572 | 265 asm volatile ("amask %1, %0" |
266 : "=r" (no_mvi) | |
267 : "rI" (256)); /* AMASK_MVI */ | |
268 accel |= no_mvi ? MPEG2_ACCEL_ALPHA : (MPEG2_ACCEL_ALPHA | | |
269 MPEG2_ACCEL_ALPHA_MVI); | |
270 } | |
271 #endif /* ACCEL_DETECT */ | |
272 | |
273 return accel; | |
9857 | 274 } |
275 #endif /* ARCH_ALPHA */ | |
276 | |
27572 | 277 uint32_t mpeg2_detect_accel (uint32_t accel) |
9857 | 278 { |
28290 | 279 #if ARCH_X86 || ARCH_X86_64 || ARCH_PPC || ARCH_ALPHA || ARCH_SPARC |
27572 | 280 accel = arch_accel (accel); |
9857 | 281 #endif |
282 return accel; | |
283 } |