annotate libvo/aclib.c @ 7492:5465cbd5c5ef

Modern versions of OpenSSH listen on localhost to forward the X11 connection (X11UseLocalhost defaults to yes). The following patch permits to consider as non local any DISPLAY environment which port is greater or equal to 10 else mplayer tries a local optimization on a distant X server which doesn't work. patch by Denis.Ducamp@groar.org
author arpi
date Mon, 23 Sep 2002 21:17:30 +0000
parents 113d66d78967
children 9fc45fe0d444
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
1 #include "../config.h"
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
2 #ifdef USE_FASTMEMCPY
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
3
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
4 /*
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
5 aclib - advanced C library ;)
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
6 This file contains functions which improve and expand standard C-library
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
7 see aclib_template.c ... this file only contains runtime cpu detection and config options stuff
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
8 runtime cpu detection by michael niedermayer (michaelni@gmx.at) is under GPL
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
9 */
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
10 #include <stddef.h>
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
11 #include "../cpudetect.h"
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
12
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
13 #define BLOCK_SIZE 4096
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
14 #define CONFUSION_FACTOR 0
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
15 //Feel free to fine-tune the above 2, it might be possible to get some speedup with them :)
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
16
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
17 //#define STATISTICS
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
18 #ifdef ARCH_X86
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
19 #define CAN_COMPILE_X86_ASM
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
20 #endif
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
21
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
22 //Note: we have MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
23 //Plain C versions
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
24 //#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
25 //#define COMPILE_C
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
26 //#endif
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
27
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
28 #ifdef CAN_COMPILE_X86_ASM
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
29
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
30 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
31 #define COMPILE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
32 #endif
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
33
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
34 #if (defined (HAVE_MMX2) && !defined (HAVE_SSE2)) || defined (RUNTIME_CPUDETECT)
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
35 #define COMPILE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
36 #endif
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
37
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
38 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
39 #define COMPILE_3DNOW
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
40 #endif
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
41
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
42 #if defined (HAVE_SSE2) || defined (RUNTIME_CPUDETECT)
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
43 #define COMPILE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
44 #endif
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
45
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
46 #undef HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
47 #undef HAVE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
48 #undef HAVE_3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
49 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
50 #undef HAVE_SSE2
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
51 #undef ARCH_X86
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
52 /*
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
53 #ifdef COMPILE_C
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
54 #undef HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
55 #undef HAVE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
56 #undef HAVE_3DNOW
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
57 #undef ARCH_X86
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
58 #define RENAME(a) a ## _C
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
59 #include "aclib_template.c"
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
60 #endif
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
61 */
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
62 //MMX versions
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
63 #ifdef COMPILE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
64 #undef RENAME
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
65 #define HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
66 #undef HAVE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
67 #undef HAVE_3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
68 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
69 #undef HAVE_SSE2
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
70 #define ARCH_X86
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
71 #define RENAME(a) a ## _MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
72 #include "aclib_template.c"
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
73 #endif
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
74
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
75 //MMX2 versions
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
76 #ifdef COMPILE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
77 #undef RENAME
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
78 #define HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
79 #define HAVE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
80 #undef HAVE_3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
81 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
82 #undef HAVE_SSE2
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
83 #define ARCH_X86
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
84 #define RENAME(a) a ## _MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
85 #include "aclib_template.c"
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
86 #endif
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
87
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
88 //3DNOW versions
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
89 #ifdef COMPILE_3DNOW
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
90 #undef RENAME
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
91 #define HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
92 #undef HAVE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
93 #define HAVE_3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
94 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
95 #undef HAVE_SSE2
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
96 #define ARCH_X86
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
97 #define RENAME(a) a ## _3DNow
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
98 #include "aclib_template.c"
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
99 #endif
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
100
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
101 //SSE versions (only used on SSE2 cpus)
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
102 #ifdef COMPILE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
103 #undef RENAME
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
104 #define HAVE_MMX
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
105 #define HAVE_MMX2
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
106 #undef HAVE_3DNOW
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
107 #define HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
108 #define HAVE_SSE2
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
109 #define ARCH_X86
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
110 #define RENAME(a) a ## _SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
111 #include "aclib_template.c"
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
112 #endif
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
113
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
114 #endif // CAN_COMPILE_X86_ASM
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
115
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
116
7072
113d66d78967 removed nonsense 'inline'
arpi
parents: 5543
diff changeset
117 void * fast_memcpy(void * to, const void * from, size_t len)
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
118 {
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
119 #ifdef RUNTIME_CPUDETECT
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
120 #ifdef CAN_COMPILE_X86_ASM
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
121 // ordered per speed fasterst first
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
122 if(gCpuCaps.hasSSE2)
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
123 fast_memcpy_SSE(to, from, len);
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
124 else if(gCpuCaps.hasMMX2)
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
125 fast_memcpy_MMX2(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
126 else if(gCpuCaps.has3DNow)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
127 fast_memcpy_3DNow(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
128 else if(gCpuCaps.hasMMX)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
129 fast_memcpy_MMX(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
130 else
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
131 #endif //CAN_COMPILE_X86_ASM
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
132 memcpy(to, from, len); // prior to mmx we use the standart memcpy
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
133 #else
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
134 #ifdef HAVE_SSE2
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
135 fast_memcpy_SSE(to, from, len);
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
136 #elif defined (HAVE_MMX2)
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
137 fast_memcpy_MMX2(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
138 #elif defined (HAVE_3DNOW)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
139 fast_memcpy_3DNow(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
140 #elif defined (HAVE_MMX)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
141 fast_memcpy_MMX(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
142 #else
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
143 memcpy(to, from, len); // prior to mmx we use the standart memcpy
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
144 #endif
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
145
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
146 #endif //!RUNTIME_CPUDETECT
5543
c75f75806af1 memcpy must return destination ptr patch by Adam <adam@cfar.umd.edu>
michael
parents: 5208
diff changeset
147 return to;
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
148 }
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
149
7072
113d66d78967 removed nonsense 'inline'
arpi
parents: 5543
diff changeset
150 void * mem2agpcpy(void * to, const void * from, size_t len)
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
151 {
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
152 #ifdef RUNTIME_CPUDETECT
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
153 #ifdef CAN_COMPILE_X86_ASM
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
154 // ordered per speed fasterst first
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
155 if(gCpuCaps.hasSSE2)
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
156 mem2agpcpy_SSE(to, from, len);
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
157 else if(gCpuCaps.hasMMX2)
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
158 mem2agpcpy_MMX2(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
159 else if(gCpuCaps.has3DNow)
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
160 mem2agpcpy_3DNow(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
161 else if(gCpuCaps.hasMMX)
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
162 mem2agpcpy_MMX(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
163 else
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
164 #endif //CAN_COMPILE_X86_ASM
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
165 memcpy(to, from, len); // prior to mmx we use the standart memcpy
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
166 #else
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
167 #ifdef HAVE_SSE2
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
168 mem2agpcpy_SSE(to, from, len);
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
169 #elif defined (HAVE_MMX2)
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
170 mem2agpcpy_MMX2(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
171 #elif defined (HAVE_3DNOW)
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
172 mem2agpcpy_3DNow(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
173 #elif defined (HAVE_MMX)
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
174 mem2agpcpy_MMX(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
175 #else
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
176 memcpy(to, from, len); // prior to mmx we use the standart memcpy
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
177 #endif
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
178
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
179 #endif //!RUNTIME_CPUDETECT
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
180 }
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
181
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
182 #endif /* use fastmemcpy */
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
183