Mercurial > mplayer.hg
annotate libvo/aclib.c @ 35967:76d4f38ffdf9
Fix crash with file selector after pressing OK.
The crash would occur with the "dot directory" selected and pressing OK
when previously either the "directory up" button or OK (to refresh the
file list) has been pressed.
author | ib |
---|---|
date | Wed, 27 Mar 2013 18:56:13 +0000 |
parents | 4e2f4bd081ce |
children |
rev | line source |
---|---|
28446
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
1 /* |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
2 * aclib - advanced C library ;) |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
3 * Functions which improve and expand the standard C library, see aclib_template.c. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
4 * This file only contains runtime CPU detection and config option stuff. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
5 * runtime CPU detection by Michael Niedermayer (michaelni@gmx.at) |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
6 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
7 * This file is part of MPlayer. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
8 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
9 * MPlayer is free software; you can redistribute it and/or modify |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
10 * it under the terms of the GNU General Public License as published by |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
11 * the Free Software Foundation; either version 2 of the License, or |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
12 * (at your option) any later version. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
13 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
14 * MPlayer is distributed in the hope that it will be useful, |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
17 * GNU General Public License for more details. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
18 * |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
19 * You should have received a copy of the GNU General Public License along |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
20 * with MPlayer; if not, write to the Free Software Foundation, Inc., |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
21 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
22 */ |
7681eab10aea
Add standard license headers, unify header formatting.
diego
parents:
28335
diff
changeset
|
23 |
12650
ac3fd2ff2561
Unify the config.h #include, use "config.h" instead of "../config.h"
diego
parents:
12492
diff
changeset
|
24 #include "config.h" |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
25 #include <stddef.h> |
30135
807fce7a4bb3
Do not assume that "long" is the size of a register.
reimar
parents:
29114
diff
changeset
|
26 #include <stdint.h> |
21982
fa66a03e8920
Include string.h to make sure memcpy is not used without prototype
reimar
parents:
20577
diff
changeset
|
27 #include <string.h> |
13787
e047e70a9767
Handle "xxx.h" vs "../xxx.h" include paths in a consistent way.
diego
parents:
13720
diff
changeset
|
28 #include "cpudetect.h" |
8123
9fc45fe0d444
*HUGE* set of compiler warning fixes, unused variables removal
arpi
parents:
7072
diff
changeset
|
29 #include "fastmemcpy.h" |
12492
4b8417674f1c
fix crash due to fast_memcpy calling itself instead of libc memcpy
reimar
parents:
8127
diff
changeset
|
30 #undef memcpy |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
31 |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
32 #define BLOCK_SIZE 4096 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
33 #define CONFUSION_FACTOR 0 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
34 //Feel free to fine-tune the above 2, it might be possible to get some speedup with them :) |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
35 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
36 //#define STATISTICS |
3393 | 37 |
38 //Note: we have MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one | |
39 //Plain C versions | |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
40 //#if !HAVE_MMX || CONFIG_RUNTIME_CPUDETECT |
3393 | 41 //#define COMPILE_C |
42 //#endif | |
43 | |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
44 #if ARCH_X86 |
3393 | 45 |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
46 #if (HAVE_MMX && !HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT |
3393 | 47 #define COMPILE_MMX |
48 #endif | |
49 | |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
50 #if (HAVE_MMX2 && !HAVE_SSE2) || CONFIG_RUNTIME_CPUDETECT |
3393 | 51 #define COMPILE_MMX2 |
52 #endif | |
53 | |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
54 #if (HAVE_AMD3DNOW && !HAVE_MMX2) || CONFIG_RUNTIME_CPUDETECT |
3393 | 55 #define COMPILE_3DNOW |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
56 #endif |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
57 |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
58 #if HAVE_SSE2 || CONFIG_RUNTIME_CPUDETECT |
5208 | 59 #define COMPILE_SSE |
60 #endif | |
61 | |
3393 | 62 #undef HAVE_MMX |
63 #undef HAVE_MMX2 | |
28335 | 64 #undef HAVE_AMD3DNOW |
5208 | 65 #undef HAVE_SSE |
66 #undef HAVE_SSE2 | |
28290 | 67 #define HAVE_MMX 0 |
68 #define HAVE_MMX2 0 | |
28335 | 69 #define HAVE_AMD3DNOW 0 |
28290 | 70 #define HAVE_SSE 0 |
71 #define HAVE_SSE2 0 | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
72 /* |
3393 | 73 #ifdef COMPILE_C |
74 #undef HAVE_MMX | |
75 #undef HAVE_MMX2 | |
28335 | 76 #undef HAVE_AMD3DNOW |
28290 | 77 #undef HAVE_SSE |
78 #undef HAVE_SSE2 | |
79 #define HAVE_MMX 0 | |
80 #define HAVE_MMX2 0 | |
28335 | 81 #define HAVE_AMD3DNOW 0 |
28290 | 82 #define HAVE_SSE 0 |
83 #define HAVE_SSE2 0 | |
3393 | 84 #define RENAME(a) a ## _C |
85 #include "aclib_template.c" | |
86 #endif | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
87 */ |
3393 | 88 //MMX versions |
89 #ifdef COMPILE_MMX | |
90 #undef RENAME | |
28290 | 91 #undef HAVE_MMX |
3393 | 92 #undef HAVE_MMX2 |
28335 | 93 #undef HAVE_AMD3DNOW |
5208 | 94 #undef HAVE_SSE |
95 #undef HAVE_SSE2 | |
28290 | 96 #define HAVE_MMX 1 |
97 #define HAVE_MMX2 0 | |
28335 | 98 #define HAVE_AMD3DNOW 0 |
28290 | 99 #define HAVE_SSE 0 |
100 #define HAVE_SSE2 0 | |
3393 | 101 #define RENAME(a) a ## _MMX |
102 #include "aclib_template.c" | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
103 #endif |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
104 |
3393 | 105 //MMX2 versions |
106 #ifdef COMPILE_MMX2 | |
107 #undef RENAME | |
28290 | 108 #undef HAVE_MMX |
109 #undef HAVE_MMX2 | |
28335 | 110 #undef HAVE_AMD3DNOW |
5208 | 111 #undef HAVE_SSE |
112 #undef HAVE_SSE2 | |
28290 | 113 #define HAVE_MMX 1 |
114 #define HAVE_MMX2 1 | |
28335 | 115 #define HAVE_AMD3DNOW 0 |
28290 | 116 #define HAVE_SSE 0 |
117 #define HAVE_SSE2 0 | |
3393 | 118 #define RENAME(a) a ## _MMX2 |
119 #include "aclib_template.c" | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
120 #endif |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
121 |
3393 | 122 //3DNOW versions |
123 #ifdef COMPILE_3DNOW | |
124 #undef RENAME | |
28290 | 125 #undef HAVE_MMX |
3393 | 126 #undef HAVE_MMX2 |
28335 | 127 #undef HAVE_AMD3DNOW |
5208 | 128 #undef HAVE_SSE |
129 #undef HAVE_SSE2 | |
28290 | 130 #define HAVE_MMX 1 |
131 #define HAVE_MMX2 0 | |
28335 | 132 #define HAVE_AMD3DNOW 1 |
28290 | 133 #define HAVE_SSE 0 |
134 #define HAVE_SSE2 0 | |
3393 | 135 #define RENAME(a) a ## _3DNow |
136 #include "aclib_template.c" | |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
137 #endif |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
138 |
5208 | 139 //SSE versions (only used on SSE2 cpus) |
140 #ifdef COMPILE_SSE | |
141 #undef RENAME | |
28290 | 142 #undef HAVE_MMX |
143 #undef HAVE_MMX2 | |
28335 | 144 #undef HAVE_AMD3DNOW |
28290 | 145 #undef HAVE_SSE |
146 #undef HAVE_SSE2 | |
147 #define HAVE_MMX 1 | |
148 #define HAVE_MMX2 1 | |
28335 | 149 #define HAVE_AMD3DNOW 0 |
28290 | 150 #define HAVE_SSE 1 |
151 #define HAVE_SSE2 1 | |
5208 | 152 #define RENAME(a) a ## _SSE |
153 #include "aclib_template.c" | |
154 #endif | |
155 | |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
156 #endif /* ARCH_X86 */ |
3393 | 157 |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
158 |
23523
273aa6124f66
avoid utter breakage on non-x86, patch from Chris Roccati <roccati@at@pobox.dot.com>
lu_zero
parents:
21982
diff
changeset
|
159 #undef fast_memcpy |
7072 | 160 void * fast_memcpy(void * to, const void * from, size_t len) |
3393 | 161 { |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
162 #if CONFIG_RUNTIME_CPUDETECT |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
163 #if ARCH_X86 |
3393 | 164 // ordered per speed fasterst first |
5208 | 165 if(gCpuCaps.hasSSE2) |
166 fast_memcpy_SSE(to, from, len); | |
167 else if(gCpuCaps.hasMMX2) | |
3393 | 168 fast_memcpy_MMX2(to, from, len); |
169 else if(gCpuCaps.has3DNow) | |
170 fast_memcpy_3DNow(to, from, len); | |
171 else if(gCpuCaps.hasMMX) | |
172 fast_memcpy_MMX(to, from, len); | |
173 else | |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
174 #endif |
3393 | 175 memcpy(to, from, len); // prior to mmx we use the standart memcpy |
176 #else | |
28290 | 177 #if HAVE_SSE2 |
5208 | 178 fast_memcpy_SSE(to, from, len); |
28290 | 179 #elif HAVE_MMX2 |
3393 | 180 fast_memcpy_MMX2(to, from, len); |
28335 | 181 #elif HAVE_AMD3DNOW |
3393 | 182 fast_memcpy_3DNow(to, from, len); |
28290 | 183 #elif HAVE_MMX |
3393 | 184 fast_memcpy_MMX(to, from, len); |
185 #else | |
186 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
187 #endif |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
188 |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
189 #endif //!CONFIG_RUNTIME_CPUDETECT |
5543
c75f75806af1
memcpy must return destination ptr patch by Adam <adam@cfar.umd.edu>
michael
parents:
5208
diff
changeset
|
190 return to; |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
191 } |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
192 |
8127
e7153e62a7f4
On non-x86 platforms, memcpy was re-implemented in mplayer and was called
jkeil
parents:
8123
diff
changeset
|
193 #undef mem2agpcpy |
7072 | 194 void * mem2agpcpy(void * to, const void * from, size_t len) |
4681 | 195 { |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
196 #if CONFIG_RUNTIME_CPUDETECT |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
197 #if ARCH_X86 |
4681 | 198 // ordered per speed fasterst first |
5208 | 199 if(gCpuCaps.hasSSE2) |
200 mem2agpcpy_SSE(to, from, len); | |
201 else if(gCpuCaps.hasMMX2) | |
4681 | 202 mem2agpcpy_MMX2(to, from, len); |
203 else if(gCpuCaps.has3DNow) | |
204 mem2agpcpy_3DNow(to, from, len); | |
205 else if(gCpuCaps.hasMMX) | |
206 mem2agpcpy_MMX(to, from, len); | |
207 else | |
28921
62f0032e736a
Get rid of pointless preprocessor condition indirection and use ARCH_X86
diego
parents:
28448
diff
changeset
|
208 #endif |
4681 | 209 memcpy(to, from, len); // prior to mmx we use the standart memcpy |
210 #else | |
28290 | 211 #if HAVE_SSE2 |
5208 | 212 mem2agpcpy_SSE(to, from, len); |
28290 | 213 #elif HAVE_MMX2 |
4681 | 214 mem2agpcpy_MMX2(to, from, len); |
28335 | 215 #elif HAVE_AMD3DNOW |
4681 | 216 mem2agpcpy_3DNow(to, from, len); |
28290 | 217 #elif HAVE_MMX |
4681 | 218 mem2agpcpy_MMX(to, from, len); |
219 #else | |
220 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
221 #endif | |
222 | |
29114
06540eb5ef6a
Rename RUNTIME_CPUDETECT to CONFIG_RUNTIME_CPUDETECT and always define it.
ramiro
parents:
28921
diff
changeset
|
223 #endif //!CONFIG_RUNTIME_CPUDETECT |
8123
9fc45fe0d444
*HUGE* set of compiler warning fixes, unused variables removal
arpi
parents:
7072
diff
changeset
|
224 return to; |
4681 | 225 } |