annotate libvo/aclib.c @ 18548:d73877c6bb54

synced with 1.87
author gabrov
date Thu, 18 May 2006 17:45:20 +0000
parents e047e70a9767
children 6289755ce7c7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
12650
ac3fd2ff2561 Unify the config.h #include, use "config.h" instead of "../config.h"
diego
parents: 12492
diff changeset
1 #include "config.h"
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
2 #ifdef USE_FASTMEMCPY
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
3
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
4 /*
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
5 aclib - advanced C library ;)
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
6 This file contains functions which improve and expand standard C-library
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
7 see aclib_template.c ... this file only contains runtime cpu detection and config options stuff
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
8 runtime cpu detection by michael niedermayer (michaelni@gmx.at) is under GPL
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
9 */
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
10 #include <stddef.h>
13787
e047e70a9767 Handle "xxx.h" vs "../xxx.h" include paths in a consistent way.
diego
parents: 13720
diff changeset
11 #include "cpudetect.h"
8123
9fc45fe0d444 *HUGE* set of compiler warning fixes, unused variables removal
arpi
parents: 7072
diff changeset
12 #include "fastmemcpy.h"
12492
4b8417674f1c fix crash due to fast_memcpy calling itself instead of libc memcpy
reimar
parents: 8127
diff changeset
13 #undef memcpy
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
14
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
15 #define BLOCK_SIZE 4096
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
16 #define CONFUSION_FACTOR 0
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
17 //Feel free to fine-tune the above 2, it might be possible to get some speedup with them :)
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
18
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
19 //#define STATISTICS
13720
821f464b4d90 adapting existing mmx/mmx2/sse/3dnow optimizations so they work on x86_64
aurel
parents: 12650
diff changeset
20 #if defined(ARCH_X86) || defined(ARCH_X86_64)
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
21 #define CAN_COMPILE_X86_ASM
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
22 #endif
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
23
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
24 //Note: we have MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
25 //Plain C versions
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
26 //#if !defined (HAVE_MMX) || defined (RUNTIME_CPUDETECT)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
27 //#define COMPILE_C
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
28 //#endif
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
29
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
30 #ifdef CAN_COMPILE_X86_ASM
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
31
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
32 #if (defined (HAVE_MMX) && !defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
33 #define COMPILE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
34 #endif
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
35
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
36 #if (defined (HAVE_MMX2) && !defined (HAVE_SSE2)) || defined (RUNTIME_CPUDETECT)
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
37 #define COMPILE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
38 #endif
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
39
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
40 #if (defined (HAVE_3DNOW) && !defined (HAVE_MMX2)) || defined (RUNTIME_CPUDETECT)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
41 #define COMPILE_3DNOW
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
42 #endif
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
43
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
44 #if defined (HAVE_SSE2) || defined (RUNTIME_CPUDETECT)
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
45 #define COMPILE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
46 #endif
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
47
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
48 #undef HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
49 #undef HAVE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
50 #undef HAVE_3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
51 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
52 #undef HAVE_SSE2
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
53 /*
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
54 #ifdef COMPILE_C
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
55 #undef HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
56 #undef HAVE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
57 #undef HAVE_3DNOW
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
58 #undef ARCH_X86
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
59 #define RENAME(a) a ## _C
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
60 #include "aclib_template.c"
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
61 #endif
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
62 */
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
63 //MMX versions
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
64 #ifdef COMPILE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
65 #undef RENAME
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
66 #define HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
67 #undef HAVE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
68 #undef HAVE_3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
69 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
70 #undef HAVE_SSE2
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
71 #define RENAME(a) a ## _MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
72 #include "aclib_template.c"
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
73 #endif
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
74
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
75 //MMX2 versions
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
76 #ifdef COMPILE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
77 #undef RENAME
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
78 #define HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
79 #define HAVE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
80 #undef HAVE_3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
81 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
82 #undef HAVE_SSE2
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
83 #define RENAME(a) a ## _MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
84 #include "aclib_template.c"
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
85 #endif
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
86
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
87 //3DNOW versions
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
88 #ifdef COMPILE_3DNOW
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
89 #undef RENAME
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
90 #define HAVE_MMX
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
91 #undef HAVE_MMX2
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
92 #define HAVE_3DNOW
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
93 #undef HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
94 #undef HAVE_SSE2
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
95 #define RENAME(a) a ## _3DNow
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
96 #include "aclib_template.c"
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
97 #endif
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
98
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
99 //SSE versions (only used on SSE2 cpus)
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
100 #ifdef COMPILE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
101 #undef RENAME
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
102 #define HAVE_MMX
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
103 #define HAVE_MMX2
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
104 #undef HAVE_3DNOW
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
105 #define HAVE_SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
106 #define HAVE_SSE2
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
107 #define RENAME(a) a ## _SSE
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
108 #include "aclib_template.c"
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
109 #endif
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
110
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
111 #endif // CAN_COMPILE_X86_ASM
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
112
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
113
7072
113d66d78967 removed nonsense 'inline'
arpi
parents: 5543
diff changeset
114 void * fast_memcpy(void * to, const void * from, size_t len)
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
115 {
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
116 #ifdef RUNTIME_CPUDETECT
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
117 #ifdef CAN_COMPILE_X86_ASM
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
118 // ordered per speed fasterst first
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
119 if(gCpuCaps.hasSSE2)
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
120 fast_memcpy_SSE(to, from, len);
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
121 else if(gCpuCaps.hasMMX2)
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
122 fast_memcpy_MMX2(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
123 else if(gCpuCaps.has3DNow)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
124 fast_memcpy_3DNow(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
125 else if(gCpuCaps.hasMMX)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
126 fast_memcpy_MMX(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
127 else
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
128 #endif //CAN_COMPILE_X86_ASM
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
129 memcpy(to, from, len); // prior to mmx we use the standart memcpy
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
130 #else
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
131 #ifdef HAVE_SSE2
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
132 fast_memcpy_SSE(to, from, len);
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
133 #elif defined (HAVE_MMX2)
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
134 fast_memcpy_MMX2(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
135 #elif defined (HAVE_3DNOW)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
136 fast_memcpy_3DNow(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
137 #elif defined (HAVE_MMX)
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
138 fast_memcpy_MMX(to, from, len);
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
139 #else
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
140 memcpy(to, from, len); // prior to mmx we use the standart memcpy
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
141 #endif
3077
99f6db3255aa 10-20% faster fastmemcpy :) on my p3 at least but the algo is mostly from "amd athlon processor x86 code optimization guide" so it should be faster for amd chips too, but i fear it might be slower for mem->vram copies (someone should check that, i cant) ... there are 2 #defines to finetune it (BLOCK_SIZE & CONFUSION_FACTOR)
michael
parents: 1123
diff changeset
142
3393
3624cd351618 runtime cpu detection
michael
parents: 3077
diff changeset
143 #endif //!RUNTIME_CPUDETECT
5543
c75f75806af1 memcpy must return destination ptr patch by Adam <adam@cfar.umd.edu>
michael
parents: 5208
diff changeset
144 return to;
698
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
145 }
f0fbf1a9bf31 Moving fast_memcpy to separate file (Size optimization)
nickols_k
parents:
diff changeset
146
8127
e7153e62a7f4 On non-x86 platforms, memcpy was re-implemented in mplayer and was called
jkeil
parents: 8123
diff changeset
147 #undef mem2agpcpy
7072
113d66d78967 removed nonsense 'inline'
arpi
parents: 5543
diff changeset
148 void * mem2agpcpy(void * to, const void * from, size_t len)
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
149 {
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
150 #ifdef RUNTIME_CPUDETECT
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
151 #ifdef CAN_COMPILE_X86_ASM
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
152 // ordered per speed fasterst first
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
153 if(gCpuCaps.hasSSE2)
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
154 mem2agpcpy_SSE(to, from, len);
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
155 else if(gCpuCaps.hasMMX2)
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
156 mem2agpcpy_MMX2(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
157 else if(gCpuCaps.has3DNow)
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
158 mem2agpcpy_3DNow(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
159 else if(gCpuCaps.hasMMX)
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
160 mem2agpcpy_MMX(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
161 else
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
162 #endif //CAN_COMPILE_X86_ASM
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
163 memcpy(to, from, len); // prior to mmx we use the standart memcpy
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
164 #else
5208
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
165 #ifdef HAVE_SSE2
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
166 mem2agpcpy_SSE(to, from, len);
b08228af4098 fixing runtime cpudetect with pre SSE cpus
michael
parents: 4681
diff changeset
167 #elif defined (HAVE_MMX2)
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
168 mem2agpcpy_MMX2(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
169 #elif defined (HAVE_3DNOW)
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
170 mem2agpcpy_3DNow(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
171 #elif defined (HAVE_MMX)
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
172 mem2agpcpy_MMX(to, from, len);
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
173 #else
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
174 memcpy(to, from, len); // prior to mmx we use the standart memcpy
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
175 #endif
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
176
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
177 #endif //!RUNTIME_CPUDETECT
8123
9fc45fe0d444 *HUGE* set of compiler warning fixes, unused variables removal
arpi
parents: 7072
diff changeset
178 return to;
4681
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
179 }
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
180
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
181 #endif /* use fastmemcpy */
8db59073127e mem2agpcpy()
michael
parents: 3393
diff changeset
182