Mercurial > mplayer.hg
annotate libvo/aclib.c @ 33179:218edd8fc782
Cosmetic: Format to MPlayer coding style.
Additionally: remove needless includes, group and sort includes, group
and sort variables, rename gtkAOFakeSurround declaration gtkAOSurround,
add #ifdefs to variable declarations, group statements by adding or
removing new lines to ease reading, move assignments outside conditions,
add parentheses, avoid mixing declaration and code, revise comments and
add new ones.
author | ib |
---|---|
date | Fri, 15 Apr 2011 14:30:58 +0000 |
parents | 807fce7a4bb3 |
children | 4e2f4bd081ce |
rev | line source |
---|---|
28446
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
1 /* |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
2 * aclib - advanced C library ;) |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
3 * Functions which improve and expand the standard C library, see aclib_template.c. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
4 * This file only contains runtime CPU detection and config option stuff. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
5 * runtime CPU detection by Michael Niedermayer (michaelni@gmx.at) |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
6 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
7 * This file is part of MPlayer. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
8 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
9 * MPlayer is free software; you can redistribute it and/or modify |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
10 * it under the terms of the GNU General Public License as published by |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
11 * the Free Software Foundation; either version 2 of the License, or |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
12 * (at your option) any later version. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
13 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
14 * MPlayer is distributed in the hope that it will be useful, |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
17 * GNU General Public License for more details. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
18 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
19 * You should have received a copy of the GNU General Public License along |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
20 * with MPlayer; if not, write to the Free Software Foundation, Inc., |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
22 */ |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
23 |
12650
ac3fd2ff2561
Unify the config.h #include, use "config.h" instead of "../config.h"
diego
parents:
12492
diff
changeset
|
24 #include "config.h" |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
25 #include <stddef.h> |
30135
807fce7a4bb3
Do not assume that "long" is the size of a register.
reimar
parents:
29114
diff
changeset
|
26 #include <stdint.h> |
21982
fa66a03e8920
Include string.h to make sure memcpy is not used without prototype
reimar
parents:
20577
diff
changeset
|
27 #include <string.h> |
13787
e047e70a9767
Handle "xxx.h" vs "../xxx.h" include paths in a consistent way.
diego
parents:
13720
diff
changeset
|
28 #include "cpudetect.h" |
8123
9fc45fe0d444
*HUGE* set of compiler warning fixes, unused variables removal
arpi
parents:
7072
diff
changeset
|
29 #include "fastmemcpy.h" |
30135
807fce7a4bb3
Do not assume that "long" is the size of a register.
reimar
parents:
29114
diff
changeset
|
30 #include "libavutil/x86_cpu.h" |
12492
4b8417674f1c
fix crash due to fast_memcpy calling itself instead of libc memcpy
reimar
parents:
8127
diff
changeset
|
31 #undef memcpy |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
32 |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
33 #define BLOCK_SIZE 4096 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
34 #define CONFUSION_FACTOR 0 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
35 //Feel free to fine-tune the above 2, it might be possible to get some speedup with them :) |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
36 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
37 //#define STATISTICS |
3393 | 38 |
39 //Note: we have MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one | |
40 //Plain C versions | |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
41 //#if !HAVE_MMX || CONFIG_RUNTIME_CPUDETECT |
3393 | 42 //#define COMPILE_C |
43 //#endif | |
44 | |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
45 #if ARCH_X86 |
3393 | 46 |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
47 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT |
3393 | 48 #define COMPILE_MMX |
49 #endif | |
50 | |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
51 #if (HAVE_MMX2 && !HAVE_SSE2) || CONFIG_RUNTIME_CPUDETECT |
3393 | 52 #define COMPILE_MMX2 |
53 #endif | |
54 | |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
55 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT |
3393 | 56 #define COMPILE_3DNOW |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
57 #endif |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
58 |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
59 #if HAVE_SSE2 || CONFIG_RUNTIME_CPUDETECT |
5208 | 60 #define COMPILE_SSE |
61 #endif | |
62 | |
3393 | 63 #undef HAVE_MMX |
64 #undef HAVE_MMX2 | |
28335 | 65 #undef HAVE_AMD3DNOW |
5208 | 66 #undef HAVE_SSE |
67 #undef HAVE_SSE2 | |
28290 | 68 #define HAVE_MMX 0 |
69 #define HAVE_MMX2 0 | |
28335 | 70 #define HAVE_AMD3DNOW 0 |
28290 | 71 #define HAVE_SSE 0 |
72 #define HAVE_SSE2 0 | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
73 /* |
3393 | 74 #ifdef COMPILE_C |
75 #undef HAVE_MMX | |
76 #undef HAVE_MMX2 | |
28335 | 77 #undef HAVE_AMD3DNOW |
28290 | 78 #undef HAVE_SSE |
79 #undef HAVE_SSE2 | |
80 #define HAVE_MMX 0 | |
81 #define HAVE_MMX2 0 | |
28335 | 82 #define HAVE_AMD3DNOW 0 |
28290 | 83 #define HAVE_SSE 0 |
84 #define HAVE_SSE2 0 | |
3393 | 85 #define RENAME(a) a ## _C |
86 #include "aclib_template.c" | |
87 #endif | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
88 */ |
3393 | 89 //MMX versions |
90 #ifdef COMPILE_MMX | |
91 #undef RENAME | |
28290 | 92 #undef HAVE_MMX |
3393 | 93 #undef HAVE_MMX2 |
28335 | 94 #undef HAVE_AMD3DNOW |
5208 | 95 #undef HAVE_SSE |
96 #undef HAVE_SSE2 | |
28290 | 97 #define HAVE_MMX 1 |
98 #define HAVE_MMX2 0 | |
28335 | 99 #define HAVE_AMD3DNOW 0 |
28290 | 100 #define HAVE_SSE 0 |
101 #define HAVE_SSE2 0 | |
3393 | 102 #define RENAME(a) a ## _MMX |
103 #include "aclib_template.c" | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
104 #endif |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
105 |
3393 | 106 //MMX2 versions |
107 #ifdef COMPILE_MMX2 | |
108 #undef RENAME | |
28290 | 109 #undef HAVE_MMX |
110 #undef HAVE_MMX2 | |
28335 | 111 #undef HAVE_AMD3DNOW |
5208 | 112 #undef HAVE_SSE |
113 #undef HAVE_SSE2 | |
28290 | 114 #define HAVE_MMX 1 |
115 #define HAVE_MMX2 1 | |
28335 | 116 #define HAVE_AMD3DNOW 0 |
28290 | 117 #define HAVE_SSE 0 |
118 #define HAVE_SSE2 0 | |
3393 | 119 #define RENAME(a) a ## _MMX2 |
120 #include "aclib_template.c" | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
121 #endif |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
122 |
3393 | 123 //3DNOW versions |
124 #ifdef COMPILE_3DNOW | |
125 #undef RENAME | |
28290 | 126 #undef HAVE_MMX |
3393 | 127 #undef HAVE_MMX2 |
28335 | 128 #undef HAVE_AMD3DNOW |
5208 | 129 #undef HAVE_SSE |
130 #undef HAVE_SSE2 | |
28290 | 131 #define HAVE_MMX 1 |
132 #define HAVE_MMX2 0 | |
28335 | 133 #define HAVE_AMD3DNOW 1 |
28290 | 134 #define HAVE_SSE 0 |
135 #define HAVE_SSE2 0 | |
3393 | 136 #define RENAME(a) a ## _3DNow |
137 #include "aclib_template.c" | |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
138 #endif |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
139 |
5208 | 140 //SSE versions (only used on SSE2 cpus) |
141 #ifdef COMPILE_SSE | |
142 #undef RENAME | |
28290 | 143 #undef HAVE_MMX |
144 #undef HAVE_MMX2 | |
28335 | 145 #undef HAVE_AMD3DNOW |
28290 | 146 #undef HAVE_SSE |
147 #undef HAVE_SSE2 | |
148 #define HAVE_MMX 1 | |
149 #define HAVE_MMX2 1 | |
28335 | 150 #define HAVE_AMD3DNOW 0 |
28290 | 151 #define HAVE_SSE 1 |
152 #define HAVE_SSE2 1 | |
5208 | 153 #define RENAME(a) a ## _SSE |
154 #include "aclib_template.c" | |
155 #endif | |
156 | |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
157 #endif /* ARCH_X86 */ |
3393 | 158 |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
159 |
23523
273aa6124f66
avoid utter breakage on non-x86, patch from Chris Roccati <roccati@at@pobox.dot.com>
lu_zero
parents:
21982
diff
changeset
|
160 #undef fast_memcpy |
7072 | 161 void * fast_memcpy(void * to, const void * from, size_t len) |
3393 | 162 { |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
163 #if CONFIG_RUNTIME_CPUDETECT |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
164 #if ARCH_X86 |
3393 | 165 // ordered per speed fasterst first |
5208 | 166 if(gCpuCaps.hasSSE2) |
167 fast_memcpy_SSE(to, from, len); | |
168 else if(gCpuCaps.hasMMX2) | |
3393 | 169 fast_memcpy_MMX2(to, from, len); |
170 else if(gCpuCaps.has3DNow) | |
171 fast_memcpy_3DNow(to, from, len); | |
172 else if(gCpuCaps.hasMMX) | |
173 fast_memcpy_MMX(to, from, len); | |
174 else | |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
175 #endif |
3393 | 176 memcpy(to, from, len); // prior to mmx we use the standart memcpy |
177 #else | |
28290 | 178 #if HAVE_SSE2 |
5208 | 179 fast_memcpy_SSE(to, from, len); |
28290 | 180 #elif HAVE_MMX2 |
3393 | 181 fast_memcpy_MMX2(to, from, len); |
28335 | 182 #elif HAVE_AMD3DNOW |
3393 | 183 fast_memcpy_3DNow(to, from, len); |
28290 | 184 #elif HAVE_MMX |
3393 | 185 fast_memcpy_MMX(to, from, len); |
186 #else | |
187 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
188 #endif |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
189 |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
190 #endif //!CONFIG_RUNTIME_CPUDETECT |
5543
c75f75806af1
memcpy must return destination ptr patch by Adam <adam@cfar.umd.edu>
michael
parents:
5208
diff
changeset
|
191 return to; |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
192 } |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
193 |
8127
e7153e62a7f4
On non-x86 platforms, memcpy was re-implemented in mplayer and was called
jkeil
parents:
8123
diff
changeset
|
194 #undef mem2agpcpy |
7072 | 195 void * mem2agpcpy(void * to, const void * from, size_t len) |
4681 | 196 { |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
197 #if CONFIG_RUNTIME_CPUDETECT |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
198 #if ARCH_X86 |
4681 | 199 // ordered per speed fasterst first |
5208 | 200 if(gCpuCaps.hasSSE2) |
201 mem2agpcpy_SSE(to, from, len); | |
202 else if(gCpuCaps.hasMMX2) | |
4681 | 203 mem2agpcpy_MMX2(to, from, len); |
204 else if(gCpuCaps.has3DNow) | |
205 mem2agpcpy_3DNow(to, from, len); | |
206 else if(gCpuCaps.hasMMX) | |
207 mem2agpcpy_MMX(to, from, len); | |
208 else | |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
209 #endif |
4681 | 210 memcpy(to, from, len); // prior to mmx we use the standart memcpy |
211 #else | |
28290 | 212 #if HAVE_SSE2 |
5208 | 213 mem2agpcpy_SSE(to, from, len); |
28290 | 214 #elif HAVE_MMX2 |
4681 | 215 mem2agpcpy_MMX2(to, from, len); |
28335 | 216 #elif HAVE_AMD3DNOW |
4681 | 217 mem2agpcpy_3DNow(to, from, len); |
28290 | 218 #elif HAVE_MMX |
4681 | 219 mem2agpcpy_MMX(to, from, len); |
220 #else | |
221 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
222 #endif | |
223 | |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
224 #endif //!CONFIG_RUNTIME_CPUDETECT |
8123
9fc45fe0d444
*HUGE* set of compiler warning fixes, unused variables removal
arpi
parents:
7072
diff
changeset
|
225 return to; |
4681 | 226 } |