Mercurial > mplayer.hg
changeset 5208:b08228af4098
fixing runtime cpudetect with pre SSE cpus
author | michael |
---|---|
date | Tue, 19 Mar 2002 22:32:45 +0000 |
parents | d337cc4ab0ee |
children | 591e470e805b |
files | libvo/aclib.c |
diffstat | 1 files changed, 38 insertions(+), 7 deletions(-) [+] |
line wrap: on
line diff
--- a/libvo/aclib.c Tue Mar 19 22:12:18 2002 +0000 +++ b/libvo/aclib.c Tue Mar 19 22:32:45 2002 +0000 @@ -15,7 +15,6 @@ //Feel free to fine-tune the above 2, it might be possible to get some speedup with them :) //#define STATISTICS - #ifdef ARCH_X86 #define CAN_COMPILE_X86_ASM #endif @@ -32,7 +31,7 @@ #define COMPILE_MMX #endif -#if defined (HAVE_MMX2) || defined (RUNTIME_CPUDETECT) +#if (defined (HAVE_MMX2) && !defined (HAVE_SSE2)) || defined (RUNTIME_CPUDETECT) #define COMPILE_MMX2 #endif @@ -40,9 +39,15 @@ #define COMPILE_3DNOW #endif +#if defined (HAVE_SSE2) || defined (RUNTIME_CPUDETECT) +#define COMPILE_SSE +#endif + #undef HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 #undef ARCH_X86 /* #ifdef COMPILE_C @@ -60,6 +65,8 @@ #define HAVE_MMX #undef HAVE_MMX2 #undef HAVE_3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 #define ARCH_X86 #define RENAME(a) a ## _MMX #include "aclib_template.c" @@ -71,6 +78,8 @@ #define HAVE_MMX #define HAVE_MMX2 #undef HAVE_3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 #define ARCH_X86 #define RENAME(a) a ## _MMX2 #include "aclib_template.c" @@ -82,11 +91,26 @@ #define HAVE_MMX #undef HAVE_MMX2 #define HAVE_3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 #define ARCH_X86 #define RENAME(a) a ## _3DNow #include "aclib_template.c" #endif +//SSE versions (only used on SSE2 cpus) +#ifdef COMPILE_SSE +#undef RENAME +#define HAVE_MMX +#define HAVE_MMX2 +#undef HAVE_3DNOW +#define HAVE_SSE +#define HAVE_SSE2 +#define ARCH_X86 +#define RENAME(a) a ## _SSE +#include "aclib_template.c" +#endif + #endif // CAN_COMPILE_X86_ASM @@ -95,7 +119,9 @@ #ifdef RUNTIME_CPUDETECT #ifdef CAN_COMPILE_X86_ASM // ordered per speed fasterst first - if(gCpuCaps.hasMMX2) + if(gCpuCaps.hasSSE2) + fast_memcpy_SSE(to, from, len); + else if(gCpuCaps.hasMMX2) fast_memcpy_MMX2(to, from, len); else if(gCpuCaps.has3DNow) fast_memcpy_3DNow(to, from, len); @@ -105,7 +131,9 @@ #endif //CAN_COMPILE_X86_ASM memcpy(to, from, len); // prior to mmx we use the standart memcpy #else -#ifdef HAVE_MMX2 +#ifdef HAVE_SSE2 + fast_memcpy_SSE(to, from, len); +#elif defined (HAVE_MMX2) fast_memcpy_MMX2(to, from, len); #elif defined (HAVE_3DNOW) fast_memcpy_3DNow(to, from, len); @@ -123,7 +151,9 @@ #ifdef RUNTIME_CPUDETECT #ifdef CAN_COMPILE_X86_ASM // ordered per speed fasterst first - if(gCpuCaps.hasMMX2) + if(gCpuCaps.hasSSE2) + mem2agpcpy_SSE(to, from, len); + else if(gCpuCaps.hasMMX2) mem2agpcpy_MMX2(to, from, len); else if(gCpuCaps.has3DNow) mem2agpcpy_3DNow(to, from, len); @@ -133,7 +163,9 @@ #endif //CAN_COMPILE_X86_ASM memcpy(to, from, len); // prior to mmx we use the standart memcpy #else -#ifdef HAVE_MMX2 +#ifdef HAVE_SSE2 + mem2agpcpy_SSE(to, from, len); +#elif defined (HAVE_MMX2) mem2agpcpy_MMX2(to, from, len); #elif defined (HAVE_3DNOW) mem2agpcpy_3DNow(to, from, len); @@ -146,6 +178,5 @@ #endif //!RUNTIME_CPUDETECT } - #endif /* use fastmemcpy */