annotate ppc/check_altivec.c @ 11032:01bd040f8607 libavcodec

Unroll main loop so the edge==0 case is seperate. This allows many things to be simplified away. h264 decoder is overall 1% faster with a mbaff sample and 0.1% slower with the cathedral sample, probably because the slow loop filter code must be loaded into the code cache for each first MB of each row but isnt used for the following MBs.
author michael
date Thu, 28 Jan 2010 01:24:25 +0000
parents 35514159910f
children 50415a8f1451
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
1 /*
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
2 * This file is part of FFmpeg.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
3 *
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
4 * FFmpeg is free software; you can redistribute it and/or
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
5 * modify it under the terms of the GNU Lesser General Public
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
6 * License as published by the Free Software Foundation; either
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
7 * version 2.1 of the License, or (at your option) any later version.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
8 *
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
9 * FFmpeg is distributed in the hope that it will be useful,
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
12 * Lesser General Public License for more details.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
13 *
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
14 * You should have received a copy of the GNU Lesser General Public
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
15 * License along with FFmpeg; if not, write to the Free Software
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
16 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
17 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
18
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
19
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
20 /**
8718
e9d9d946f213 Use full internal pathname in doxygen @file directives.
diego
parents: 8699
diff changeset
21 * @file libavcodec/ppc/check_altivec.c
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
22 * Checks for AltiVec presence.
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
23 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
24
9619
35514159910f Add necessary header for CONFIG_RUNTIME_CPUDETECT preprocessor definition.
diego
parents: 9361
diff changeset
25 #include "config.h"
35514159910f Add necessary header for CONFIG_RUNTIME_CPUDETECT preprocessor definition.
diego
parents: 9361
diff changeset
26
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
27 #ifdef __APPLE__
7852
0d60b8b7f30b Fix compilation on Mac OS X 10.4: Defining _POSIX_C_SOURCE hides the u_char &
diego
parents: 6750
diff changeset
28 #undef _POSIX_C_SOURCE
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
29 #include <sys/sysctl.h>
8699
450a1c5bab22 Use '#if defined()' for OS-specific preprocessor checks.
diego
parents: 8031
diff changeset
30 #elif defined(__OpenBSD__)
7904
47717d5239fa AltiVec detection support for OpenBSD, patch by Brad, brad comstyle com.
diego
parents: 7852
diff changeset
31 #include <sys/param.h>
47717d5239fa AltiVec detection support for OpenBSD, patch by Brad, brad comstyle com.
diego
parents: 7852
diff changeset
32 #include <sys/sysctl.h>
47717d5239fa AltiVec detection support for OpenBSD, patch by Brad, brad comstyle com.
diego
parents: 7852
diff changeset
33 #include <machine/cpu.h>
8699
450a1c5bab22 Use '#if defined()' for OS-specific preprocessor checks.
diego
parents: 8031
diff changeset
34 #elif defined(__AMIGAOS4__)
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
35 #include <exec/exec.h>
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
36 #include <interfaces/exec.h>
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
37 #include <proto/exec.h>
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
38 #endif /* __APPLE__ */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
39
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
40 /**
7905
ae410599f388 spelling cosmetics
diego
parents: 7904
diff changeset
41 * This function MAY rely on signal() or fork() in order to make sure AltiVec
ae410599f388 spelling cosmetics
diego
parents: 7904
diff changeset
42 * is present.
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
43 */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
44
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
45 int has_altivec(void)
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
46 {
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
47 #ifdef __AMIGAOS4__
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
48 ULONG result = 0;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
49 extern struct ExecIFace *IExec;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
50
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
51 IExec->GetCPUInfoTags(GCIT_VectorUnit, &result, TAG_DONE);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
52 if (result == VECTORTYPE_ALTIVEC) return 1;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
53 return 0;
7904
47717d5239fa AltiVec detection support for OpenBSD, patch by Brad, brad comstyle com.
diego
parents: 7852
diff changeset
54 #elif defined(__APPLE__) || defined(__OpenBSD__)
47717d5239fa AltiVec detection support for OpenBSD, patch by Brad, brad comstyle com.
diego
parents: 7852
diff changeset
55 #ifdef __OpenBSD__
47717d5239fa AltiVec detection support for OpenBSD, patch by Brad, brad comstyle com.
diego
parents: 7852
diff changeset
56 int sels[2] = {CTL_MACHDEP, CPU_ALTIVEC};
47717d5239fa AltiVec detection support for OpenBSD, patch by Brad, brad comstyle com.
diego
parents: 7852
diff changeset
57 #else
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
58 int sels[2] = {CTL_HW, HW_VECTORUNIT};
7904
47717d5239fa AltiVec detection support for OpenBSD, patch by Brad, brad comstyle com.
diego
parents: 7852
diff changeset
59 #endif
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
60 int has_vu = 0;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
61 size_t len = sizeof(has_vu);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
62 int err;
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
63
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
64 err = sysctl(sels, 2, &has_vu, &len, NULL, 0);
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
65
6750
c93570aeb3eb Remove unnecessary parentheses from return calls.
diego
parents: 6159
diff changeset
66 if (err == 0) return has_vu != 0;
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
67 return 0;
9361
53e5c6a453e9 configure: Add --enable-runtime-cpudetect
ramiro
parents: 8718
diff changeset
68 #elif CONFIG_RUNTIME_CPUDETECT
6159
eb2e3c3b7f78 Add mfspr-based AltiVec detection code.
reimar
parents: 5757
diff changeset
69 int proc_ver;
7905
ae410599f388 spelling cosmetics
diego
parents: 7904
diff changeset
70 // Support of mfspr PVR emulation added in Linux 2.6.17.
8031
eebc7209c47f Convert asm keyword into __asm__.
flameeyes
parents: 7905
diff changeset
71 __asm__ volatile("mfspr %0, 287" : "=r" (proc_ver));
6159
eb2e3c3b7f78 Add mfspr-based AltiVec detection code.
reimar
parents: 5757
diff changeset
72 proc_ver >>= 16;
eb2e3c3b7f78 Add mfspr-based AltiVec detection code.
reimar
parents: 5757
diff changeset
73 if (proc_ver & 0x8000 ||
eb2e3c3b7f78 Add mfspr-based AltiVec detection code.
reimar
parents: 5757
diff changeset
74 proc_ver == 0x000c ||
eb2e3c3b7f78 Add mfspr-based AltiVec detection code.
reimar
parents: 5757
diff changeset
75 proc_ver == 0x0039 || proc_ver == 0x003c ||
eb2e3c3b7f78 Add mfspr-based AltiVec detection code.
reimar
parents: 5757
diff changeset
76 proc_ver == 0x0044 || proc_ver == 0x0045 ||
eb2e3c3b7f78 Add mfspr-based AltiVec detection code.
reimar
parents: 5757
diff changeset
77 proc_ver == 0x0070)
eb2e3c3b7f78 Add mfspr-based AltiVec detection code.
reimar
parents: 5757
diff changeset
78 return 1;
eb2e3c3b7f78 Add mfspr-based AltiVec detection code.
reimar
parents: 5757
diff changeset
79 return 0;
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
80 #else
7905
ae410599f388 spelling cosmetics
diego
parents: 7904
diff changeset
81 // Since we were compiled for AltiVec, just assume we have it
5757
ace63c809071 Remove uses of SIGILL for CPU extension detection, that method is not acceptable
reimar
parents: 5750
diff changeset
82 // until someone comes up with a proper way (not involving signal hacks).
ace63c809071 Remove uses of SIGILL for CPU extension detection, that method is not acceptable
reimar
parents: 5750
diff changeset
83 return 1;
5750
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
84 #endif /* __AMIGAOS4__ */
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
85 }
09f99af1db40 Sanitize altivec code so it can be built with runtime check properly
lu_zero
parents:
diff changeset
86