annotate libvo/aclib.c @ 33179:218edd8fc782

Cosmetic: Format to MPlayer coding style. Additionally: remove needless includes, group and sort includes, group and sort variables, rename gtkAOFakeSurround declaration gtkAOSurround, add #ifdefs to variable declarations, group statements by adding or removing new lines to ease reading, move assignments outside conditions, add parentheses, avoid mixing declaration and code, revise comments and add new ones.
author ib
date Fri, 15 Apr 2011 14:30:58 +0000
parents 807fce7a4bb3
children 4e2f4bd081ce
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
28446
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
1 /*
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
2 * aclib - advanced C library ;)
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
3 * Functions which improve and expand the standard C library, see aclib_template.c.
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
4 * This file only contains runtime CPU detection and config option stuff.
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
5 * runtime CPU detection by Michael Niedermayer (michaelni@gmx.at)
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
6 *
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
7 * This file is part of MPlayer.
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
8 *
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
9 * MPlayer is free software; you can redistribute it and/or modify
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
10 * it under the terms of the GNU General Public License as published by
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
11 * the Free Software Foundation; either version 2 of the License, or
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
12 * (at your option) any later version.
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
13 *
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
14 * MPlayer is distributed in the hope that it will be useful,
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
17 * GNU General Public License for more details.
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
18 *
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
19 * You should have received a copy of the GNU General Public License along
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
20 * with MPlayer; if not, write to the Free Software Foundation, Inc.,
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
22 */
7681eab10aea Add standard license headers, unify header formatting.
diego
parents: 28335
diff changeset
23
12650
ac3fd2ff2561 Unify the config.h #include, use "config.h" instead of "../config.h"
diego
parents: 12492
diff changeset
24 #include "config.h"
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
25 #include <stddef.h>
30135
807fce7a4bb3 Do not assume that "long" is the size of a register.
reimar
parents: 29114
diff changeset
26 #include <stdint.h>
21982
fa66a03e8920 Include string.h to make sure memcpy is not used without prototype
reimar
parents: 20577
diff changeset
27 #include <string.h>
13787
e047e70a9767 Handle "xxx.h" vs "../xxx.h" include paths in a consistent way.
diego
parents: 13720
diff changeset
28 #include "cpudetect.h"
8123
9fc45fe0d444 *HUGE* set of compiler warning fixes, unused variables removal
arpi
parents: 7072
diff changeset
29 #include "fastmemcpy.h"
30135
807fce7a4bb3 Do not assume that "long" is the size of a register.
reimar
parents: 29114
diff changeset
30 #include "libavutil/x86_cpu.h"
12492
4b8417674f1c fix crash due to fast_memcpy calling itself instead of libc memcpy
reimar
parents: 8127
diff changeset
31 #undef memcpy
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
32
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
33 #define BLOCK_SIZE 4096
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
34 #define CONFUSION_FACTOR 0
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
35 //Feel free to fine-tune the above 2, it might be possible to get some speedup with them :)
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
36
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
37 //#define STATISTICS
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
38
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
39 //Note: we have MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
40 //Plain C versions
29114
06540eb5ef6a Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents: 28921
diff changeset
41 //#if !HAVE_MMX || CONFIG_RUNTIME_CPUDETECT
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
42 //#define COMPILE_C
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
43 //#endif
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
44
28921
62f0032e736a Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents: 28448
diff changeset
45 #if ARCH_X86
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
46
29114
06540eb5ef6a Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents: 28921
diff changeset
47 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
48 #define COMPILE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
49 #endif
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
50
29114
06540eb5ef6a Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents: 28921
diff changeset
51 #if (HAVE_MMX2 && !HAVE_SSE2) || CONFIG_RUNTIME_CPUDETECT
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
52 #define COMPILE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
53 #endif
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
54
29114
06540eb5ef6a Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents: 28921
diff changeset
55 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
56 #define COMPILE_3DNOW
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
57 #endif
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
58
29114
06540eb5ef6a Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents: 28921
diff changeset
59 #if HAVE_SSE2 || CONFIG_RUNTIME_CPUDETECT
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
60 #define COMPILE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
61 #endif
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
62
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
63 #undef HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
64 #undef HAVE_MMX2
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
65 #undef HAVE_AMD3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
66 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
67 #undef HAVE_SSE2
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
68 #define HAVE_MMX 0
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
69 #define HAVE_MMX2 0
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
70 #define HAVE_AMD3DNOW 0
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
71 #define HAVE_SSE 0
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
72 #define HAVE_SSE2 0
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
73 /*
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
74 #ifdef COMPILE_C
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
75 #undef HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
76 #undef HAVE_MMX2
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
77 #undef HAVE_AMD3DNOW
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
78 #undef HAVE_SSE
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
79 #undef HAVE_SSE2
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
80 #define HAVE_MMX 0
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
81 #define HAVE_MMX2 0
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
82 #define HAVE_AMD3DNOW 0
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
83 #define HAVE_SSE 0
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
84 #define HAVE_SSE2 0
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
85 #define RENAME(a) a ## _C
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
86 #include "aclib_template.c"
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
87 #endif
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
88 */
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
89 //MMX versions
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
90 #ifdef COMPILE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
91 #undef RENAME
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
92 #undef HAVE_MMX
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
93 #undef HAVE_MMX2
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
94 #undef HAVE_AMD3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
95 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
96 #undef HAVE_SSE2
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
97 #define HAVE_MMX 1
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
98 #define HAVE_MMX2 0
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
99 #define HAVE_AMD3DNOW 0
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
100 #define HAVE_SSE 0
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
101 #define HAVE_SSE2 0
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
102 #define RENAME(a) a ## _MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
103 #include "aclib_template.c"
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
104 #endif
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
105
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
106 //MMX2 versions
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
107 #ifdef COMPILE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
108 #undef RENAME
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
109 #undef HAVE_MMX
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
110 #undef HAVE_MMX2
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
111 #undef HAVE_AMD3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
112 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
113 #undef HAVE_SSE2
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
114 #define HAVE_MMX 1
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
115 #define HAVE_MMX2 1
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
116 #define HAVE_AMD3DNOW 0
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
117 #define HAVE_SSE 0
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
118 #define HAVE_SSE2 0
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
119 #define RENAME(a) a ## _MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
120 #include "aclib_template.c"
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
121 #endif
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
122
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
123 //3DNOW versions
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
124 #ifdef COMPILE_3DNOW
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
125 #undef RENAME
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
126 #undef HAVE_MMX
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
127 #undef HAVE_MMX2
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
128 #undef HAVE_AMD3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
129 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
130 #undef HAVE_SSE2
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
131 #define HAVE_MMX 1
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
132 #define HAVE_MMX2 0
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
133 #define HAVE_AMD3DNOW 1
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
134 #define HAVE_SSE 0
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
135 #define HAVE_SSE2 0
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
136 #define RENAME(a) a ## _3DNow
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
137 #include "aclib_template.c"
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
138 #endif
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
139
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
140 //SSE versions (only used on SSE2 cpus)
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
141 #ifdef COMPILE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
142 #undef RENAME
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
143 #undef HAVE_MMX
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
144 #undef HAVE_MMX2
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
145 #undef HAVE_AMD3DNOW
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
146 #undef HAVE_SSE
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
147 #undef HAVE_SSE2
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
148 #define HAVE_MMX 1
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
149 #define HAVE_MMX2 1
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
150 #define HAVE_AMD3DNOW 0
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
151 #define HAVE_SSE 1
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
152 #define HAVE_SSE2 1
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
153 #define RENAME(a) a ## _SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
154 #include "aclib_template.c"
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
155 #endif
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
156
28921
62f0032e736a Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents: 28448
diff changeset
157 #endif /* ARCH_X86 */
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
158
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
159
23523
273aa6124f66 avoid utter breakage on non-x86, patch from Chris Roccati <roccati@at@pobox.dot.com>
lu_zero
parents: 21982
diff changeset
160 #undef fast_memcpy
7072
113d66d78967 removed nonsense 'inline'
arpi
parents: 5543
diff changeset
161 void * fast_memcpy(void * to, const void * from, size_t len)
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
162 {
29114
06540eb5ef6a Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents: 28921
diff changeset
163 #if CONFIG_RUNTIME_CPUDETECT
28921
62f0032e736a Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents: 28448
diff changeset
164 #if ARCH_X86
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
165 // ordered per speed fasterst first
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
166 if(gCpuCaps.hasSSE2)
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
167 fast_memcpy_SSE(to, from, len);
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
168 else if(gCpuCaps.hasMMX2)
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
169 fast_memcpy_MMX2(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
170 else if(gCpuCaps.has3DNow)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
171 fast_memcpy_3DNow(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
172 else if(gCpuCaps.hasMMX)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
173 fast_memcpy_MMX(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
174 else
28921
62f0032e736a Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents: 28448
diff changeset
175 #endif
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
176 memcpy(to, from, len); // prior to mmx we use the standart memcpy
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
177 #else
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
178 #if HAVE_SSE2
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
179 fast_memcpy_SSE(to, from, len);
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
180 #elif HAVE_MMX2
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
181 fast_memcpy_MMX2(to, from, len);
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
182 #elif HAVE_AMD3DNOW
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
183 fast_memcpy_3DNow(to, from, len);
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
184 #elif HAVE_MMX
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
185 fast_memcpy_MMX(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
186 #else
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
187 memcpy(to, from, len); // prior to mmx we use the standart memcpy
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
188 #endif
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
189
29114
06540eb5ef6a Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents: 28921
diff changeset
190 #endif //!CONFIG_RUNTIME_CPUDETECT
5543
c75f75806af1 memcpy must return destination ptr patch by Adam <adam@cfar.umd.edu>
michael
parents: 5208
diff changeset
191 return to;
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
192 }
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
193
8127
e7153e62a7f4 On non-x86 platforms, memcpy was re-implemented in mplayer and was called
jkeil
parents: 8123
diff changeset
194 #undef mem2agpcpy
7072
113d66d78967 removed nonsense 'inline'
arpi
parents: 5543
diff changeset
195 void * mem2agpcpy(void * to, const void * from, size_t len)
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
196 {
29114
06540eb5ef6a Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents: 28921
diff changeset
197 #if CONFIG_RUNTIME_CPUDETECT
28921
62f0032e736a Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents: 28448
diff changeset
198 #if ARCH_X86
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
199 // ordered per speed fasterst first
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
200 if(gCpuCaps.hasSSE2)
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
201 mem2agpcpy_SSE(to, from, len);
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
202 else if(gCpuCaps.hasMMX2)
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
203 mem2agpcpy_MMX2(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
204 else if(gCpuCaps.has3DNow)
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
205 mem2agpcpy_3DNow(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
206 else if(gCpuCaps.hasMMX)
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
207 mem2agpcpy_MMX(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
208 else
28921
62f0032e736a Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents: 28448
diff changeset
209 #endif
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
210 memcpy(to, from, len); // prior to mmx we use the standart memcpy
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
211 #else
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
212 #if HAVE_SSE2
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
213 mem2agpcpy_SSE(to, from, len);
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
214 #elif HAVE_MMX2
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
215 mem2agpcpy_MMX2(to, from, len);
28335
31287e75b5d8 HAVE_3DNOW --> HAVE_AMD3DNOW
diego
parents: 28302
diff changeset
216 #elif HAVE_AMD3DNOW
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
217 mem2agpcpy_3DNow(to, from, len);
28290
25337a2147e7 Lots and lots of #ifdef ARCH_... -> #if ARCH_...
reimar
parents: 27341
diff changeset
218 #elif HAVE_MMX
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
219 mem2agpcpy_MMX(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
220 #else
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
221 memcpy(to, from, len); // prior to mmx we use the standart memcpy
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
222 #endif
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
223
29114
06540eb5ef6a Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents: 28921
diff changeset
224 #endif //!CONFIG_RUNTIME_CPUDETECT
8123
9fc45fe0d444 *HUGE* set of compiler warning fixes, unused variables removal
arpi
parents: 7072
diff changeset
225 return to;
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
226 }