Mercurial > mplayer.hg
annotate libvo/aclib.c @ 7492:5465cbd5c5ef
Modern versions of OpenSSH listen on localhost to forward the X11 connection
(X11UseLocalhost defaults to yes). The following patch permits to consider
as non local any DISPLAY environment which port is greater or equal to 10
else mplayer tries a local optimization on a distant X server which doesn't
work.
patch by Denis.Ducamp@groar.org
author | arpi |
---|---|
date | Mon, 23 Sep 2002 21:17:30 +0000 |
parents | 113d66d78967 |
children | 9fc45fe0d444 |
rev | line source |
---|---|
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
1 #include "../config.h" |
3393 | 2 #ifdef USE_FASTMEMCPY |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
3 |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
4 /* |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
5 aclib - advanced C library ;) |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
6 This file contains functions which improve and expand standard C-library |
3393 | 7 see aclib_template.c ... this file only contains runtime cpu detection and config options stuff |
8 runtime cpu detection by michael niedermayer (michaelni@gmx.at) is under GPL | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
9 */ |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
10 #include <stddef.h> |
3393 | 11 #include "../cpudetect.h" |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
12 |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
13 #define BLOCK_SIZE 4096 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
14 #define CONFUSION_FACTOR 0 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
15 //Feel free to fine-tune the above 2, it might be possible to get some speedup with them :) |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
16 |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
17 //#define STATISTICS |
3393 | 18 #ifdef ARCH_X86 |
19 #define CAN_COMPILE_X86_ASM | |
20 #endif | |
21 | |
22 //Note: we have MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one | |
23 //Plain C versions | |
24 //#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT) | |
25 //#define COMPILE_C | |
26 //#endif | |
27 | |
28 #ifdef CAN_COMPILE_X86_ASM | |
29 | |
30 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
31 #define COMPILE_MMX | |
32 #endif | |
33 | |
5208 | 34 #if (defined (HAVE_MMX2) && !defined (HAVE_SSE2)) || defined (RUNTIME_CPUDETECT) |
3393 | 35 #define COMPILE_MMX2 |
36 #endif | |
37 | |
38 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT) | |
39 #define COMPILE_3DNOW | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
40 #endif |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
41 |
5208 | 42 #if defined (HAVE_SSE2) || defined (RUNTIME_CPUDETECT) |
43 #define COMPILE_SSE | |
44 #endif | |
45 | |
3393 | 46 #undef HAVE_MMX |
47 #undef HAVE_MMX2 | |
48 #undef HAVE_3DNOW | |
5208 | 49 #undef HAVE_SSE |
50 #undef HAVE_SSE2 | |
3393 | 51 #undef ARCH_X86 |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
52 /* |
3393 | 53 #ifdef COMPILE_C |
54 #undef HAVE_MMX | |
55 #undef HAVE_MMX2 | |
56 #undef HAVE_3DNOW | |
57 #undef ARCH_X86 | |
58 #define RENAME(a) a ## _C | |
59 #include "aclib_template.c" | |
60 #endif | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
61 */ |
3393 | 62 //MMX versions |
63 #ifdef COMPILE_MMX | |
64 #undef RENAME | |
65 #define HAVE_MMX | |
66 #undef HAVE_MMX2 | |
67 #undef HAVE_3DNOW | |
5208 | 68 #undef HAVE_SSE |
69 #undef HAVE_SSE2 | |
3393 | 70 #define ARCH_X86 |
71 #define RENAME(a) a ## _MMX | |
72 #include "aclib_template.c" | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
73 #endif |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
74 |
3393 | 75 //MMX2 versions |
76 #ifdef COMPILE_MMX2 | |
77 #undef RENAME | |
78 #define HAVE_MMX | |
79 #define HAVE_MMX2 | |
80 #undef HAVE_3DNOW | |
5208 | 81 #undef HAVE_SSE |
82 #undef HAVE_SSE2 | |
3393 | 83 #define ARCH_X86 |
84 #define RENAME(a) a ## _MMX2 | |
85 #include "aclib_template.c" | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
86 #endif |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
87 |
3393 | 88 //3DNOW versions |
89 #ifdef COMPILE_3DNOW | |
90 #undef RENAME | |
91 #define HAVE_MMX | |
92 #undef HAVE_MMX2 | |
93 #define HAVE_3DNOW | |
5208 | 94 #undef HAVE_SSE |
95 #undef HAVE_SSE2 | |
3393 | 96 #define ARCH_X86 |
97 #define RENAME(a) a ## _3DNow | |
98 #include "aclib_template.c" | |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
99 #endif |
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
100 |
5208 | 101 //SSE versions (only used on SSE2 cpus) |
102 #ifdef COMPILE_SSE | |
103 #undef RENAME | |
104 #define HAVE_MMX | |
105 #define HAVE_MMX2 | |
106 #undef HAVE_3DNOW | |
107 #define HAVE_SSE | |
108 #define HAVE_SSE2 | |
109 #define ARCH_X86 | |
110 #define RENAME(a) a ## _SSE | |
111 #include "aclib_template.c" | |
112 #endif | |
113 | |
3393 | 114 #endif // CAN_COMPILE_X86_ASM |
115 | |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
116 |
7072 | 117 void * fast_memcpy(void * to, const void * from, size_t len) |
3393 | 118 { |
119 #ifdef RUNTIME_CPUDETECT | |
120 #ifdef CAN_COMPILE_X86_ASM | |
121 // ordered per speed fasterst first | |
5208 | 122 if(gCpuCaps.hasSSE2) |
123 fast_memcpy_SSE(to, from, len); | |
124 else if(gCpuCaps.hasMMX2) | |
3393 | 125 fast_memcpy_MMX2(to, from, len); |
126 else if(gCpuCaps.has3DNow) | |
127 fast_memcpy_3DNow(to, from, len); | |
128 else if(gCpuCaps.hasMMX) | |
129 fast_memcpy_MMX(to, from, len); | |
130 else | |
131 #endif //CAN_COMPILE_X86_ASM | |
132 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
133 #else | |
5208 | 134 #ifdef HAVE_SSE2 |
135 fast_memcpy_SSE(to, from, len); | |
136 #elif defined (HAVE_MMX2) | |
3393 | 137 fast_memcpy_MMX2(to, from, len); |
138 #elif defined (HAVE_3DNOW) | |
139 fast_memcpy_3DNow(to, from, len); | |
140 #elif defined (HAVE_MMX) | |
141 fast_memcpy_MMX(to, from, len); | |
142 #else | |
143 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
144 #endif |
3077
99f6db3255aa
10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents:
1123
diff
changeset
|
145 |
3393 | 146 #endif //!RUNTIME_CPUDETECT |
5543
c75f75806af1
memcpy must return destination ptr patch by Adam <adam@cfar.umd.edu>
michael
parents:
5208
diff
changeset
|
147 return to; |
698
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
148 } |
f0fbf1a9bf31
Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff
changeset
|
149 |
7072 | 150 void * mem2agpcpy(void * to, const void * from, size_t len) |
4681 | 151 { |
152 #ifdef RUNTIME_CPUDETECT | |
153 #ifdef CAN_COMPILE_X86_ASM | |
154 // ordered per speed fasterst first | |
5208 | 155 if(gCpuCaps.hasSSE2) |
156 mem2agpcpy_SSE(to, from, len); | |
157 else if(gCpuCaps.hasMMX2) | |
4681 | 158 mem2agpcpy_MMX2(to, from, len); |
159 else if(gCpuCaps.has3DNow) | |
160 mem2agpcpy_3DNow(to, from, len); | |
161 else if(gCpuCaps.hasMMX) | |
162 mem2agpcpy_MMX(to, from, len); | |
163 else | |
164 #endif //CAN_COMPILE_X86_ASM | |
165 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
166 #else | |
5208 | 167 #ifdef HAVE_SSE2 |
168 mem2agpcpy_SSE(to, from, len); | |
169 #elif defined (HAVE_MMX2) | |
4681 | 170 mem2agpcpy_MMX2(to, from, len); |
171 #elif defined (HAVE_3DNOW) | |
172 mem2agpcpy_3DNow(to, from, len); | |
173 #elif defined (HAVE_MMX) | |
174 mem2agpcpy_MMX(to, from, len); | |
175 #else | |
176 memcpy(to, from, len); // prior to mmx we use the standart memcpy | |
177 #endif | |
178 | |
179 #endif //!RUNTIME_CPUDETECT | |
180 } | |
181 | |
182 #endif /* use fastmemcpy */ | |
183 |