Mercurial > mplayer.hg
changeset 28894:b29169fccda9
Fix and restructure fastmemcpybench. It is now one binary that runs all
available memcpy variants and prints benchmark results about them.
author | diego |
---|---|
date | Tue, 10 Mar 2009 10:05:09 +0000 |
parents | 33a7261a0c30 |
children | 061893d7d0c3 |
files | Makefile TOOLS/fastmem.sh TOOLS/fastmemcpybench.c |
diffstat | 3 files changed, 140 insertions(+), 45 deletions(-) [+] |
line wrap: on
line diff
--- a/Makefile Tue Mar 10 02:21:49 2009 +0000 +++ b/Makefile Tue Mar 10 10:05:09 2009 +0000 @@ -986,7 +986,7 @@ testsclean: -rm -f $(foreach file,$(TESTS),$(call ADD_ALL_EXESUFS,$(file))) -TOOLS = $(addprefix TOOLS/,alaw-gen asfinfo avi-fix avisubdump compare dump_mp4 movinfo netstream subrip vivodump) +TOOLS = $(addprefix TOOLS/,alaw-gen asfinfo avi-fix avisubdump compare dump_mp4 fastmemcpybench movinfo netstream subrip vivodump) ifdef ARCH_X86 TOOLS += TOOLS/modify_reg @@ -999,7 +999,7 @@ toolsclean: -rm -f $(foreach file,$(ALLTOOLS),$(call ADD_ALL_EXESUFS,$(file))) - -rm -f TOOLS/fastmem-* TOOLS/realcodecs/*.so.6.0 + -rm -f TOOLS/realcodecs/*.so.6.0 TOOLS/bmovl-test$(EXESUF): -lSDL_image @@ -1016,27 +1016,11 @@ TOOLS/netstream$(EXESUF) TOOLS/vivodump$(EXESUF): $(subst mplayer.o,mplayer-nomain.o,$(OBJS_MPLAYER)) $(filter-out %mencoder.o,$(OBJS_MENCODER)) $(OBJS_COMMON) $(COMMON_LIBS) $(CC) $(CFLAGS) -o $@ $^ $(EXTRALIBS_MPLAYER) $(EXTRALIBS_MENCODER) $(COMMON_LDFLAGS) -TOOLS/fastmem-c$(EXESUF): CFLAGS += -DHAVE_MMX=0 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"C\" -TOOLS/fastmem-mmx$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MMX\" -TOOLS/fastmem-k6$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"K6\" -TOOLS/fastmem-k7$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=1 -DHAVE_SSE=0 -DNAME=\"K7\" -TOOLS/fastmem-sse$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=1 -DHAVE_SSE=1 -DNAME=\"SSE\" -TOOLS/fastmem-mga-mmx$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MGA-MMX\" -DCONFIG_MGA -TOOLS/fastmem-mga-k6$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MGA-K6\" -DCONFIG_MGA -TOOLS/fastmem-mga-k7$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=1 -DHAVE_SSE=0 -DNAME=\"MGA-K7\" -DCONFIG_MGA -TOOLS/fastmem-mga-sse$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=1 -DHAVE_SSE=1 -DNAME=\"MGA-SSE\" -DCONFIG_MGA - -fastmemcpybench: $(addsuffix $(EXESUF),$(addprefix TOOLS/fastmem-,c mmx k6 k7 sse mga-mmx mga-k6 mga-k7 mga-sse)) - -TOOLS/fastmem-%$(EXESUF): TOOLS/fastmemcpybench.c libvo/aclib.c - $(CC) $(CFLAGS) -o $@ $^ - REAL_SRCS = $(wildcard TOOLS/realcodecs/*.c) REAL_TARGETS = $(REAL_SRCS:.c=.so.6.0) realcodecs: $(REAL_TARGETS) - -fastmemcpybench realcodecs: CFLAGS += -g +realcodecs: CFLAGS += -g %.so.6.0: %.o ld -shared -o $@ $< -ldl -lc
--- a/TOOLS/fastmem.sh Tue Mar 10 02:21:49 2009 +0000 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,21 +0,0 @@ - -sync -sleep 2 -./fastmem-k6 -sleep 2 -./fastmem-k7 -sleep 2 -./fastmem-mmx -sleep 2 -./fastmem-sse -sleep 2 -./fastmem-c -sleep 2 -./fastmem2-k6 -sleep 2 -./fastmem2-k7 -sleep 2 -./fastmem2-mmx -sleep 2 -./fastmem2-sse -sleep 2
--- a/TOOLS/fastmemcpybench.c Tue Mar 10 02:21:49 2009 +0000 +++ b/TOOLS/fastmemcpybench.c Tue Mar 10 10:05:09 2009 +0000 @@ -7,8 +7,6 @@ * was not confirmed through testing. */ -/* According to Uoti this code is broken. */ - #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -18,7 +16,92 @@ #include <sys/mman.h> #include <sys/time.h> #include <inttypes.h> -#include "libvo/fastmemcpy.h" + +#include "config.h" +#include "cpudetect.h" + +#define BLOCK_SIZE 4096 +#define CONFUSION_FACTOR 0 + +#if HAVE_MMX +#define COMPILE_MMX +#endif + +#if HAVE_MMX2 +#define COMPILE_MMX2 +#endif + +#if HAVE_AMD3DNOW +#define COMPILE_AMD3DNOW +#endif + +#if HAVE_SSE +#define COMPILE_SSE +#endif + +#ifdef COMPILE_MMX +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 +#define HAVE_MMX 1 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 0 +#define HAVE_SSE 0 +#define HAVE_SSE2 0 +#define RENAME(a) a ## _MMX +#include "libvo/aclib_template.c" +#endif + +#ifdef COMPILE_MMX2 +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 +#define HAVE_MMX 1 +#define HAVE_MMX2 1 +#define HAVE_AMD3DNOW 0 +#define HAVE_SSE 0 +#define HAVE_SSE2 0 +#define RENAME(a) a ## _MMX2 +#include "libvo/aclib_template.c" +#endif + +#ifdef COMPILE_AMD3DNOW +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 +#define HAVE_MMX 1 +#define HAVE_MMX2 0 +#define HAVE_AMD3DNOW 1 +#define HAVE_SSE 0 +#define HAVE_SSE2 0 +#define RENAME(a) a ## _3DNow +#include "libvo/aclib_template.c" +#endif + +#ifdef COMPILE_SSE +#undef RENAME +#undef HAVE_MMX +#undef HAVE_MMX2 +#undef HAVE_AMD3DNOW +#undef HAVE_SSE +#undef HAVE_SSE2 +#define HAVE_MMX 1 +#define HAVE_MMX2 1 +#define HAVE_AMD3DNOW 0 +#define HAVE_SSE 1 +#define HAVE_SSE2 1 +#define RENAME(a) a ## _SSE +#include "libvo/aclib_template.c" +#endif //#define ARR_SIZE 100000 #define ARR_SIZE (1024*768*2) @@ -114,11 +197,60 @@ t = GetTimer(); v1 = read_tsc(); for (i = 0; i < 100; i++) - fast_memcpy(marr1, marr2, ARR_SIZE - 16); + memcpy(marr1, marr2, ARR_SIZE - 16); + v2 = read_tsc(); + t = GetTimer() - t; + // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t + printf("libc: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, + 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t); + +#if HAVE_MMX + t = GetTimer(); + v1 = read_tsc(); + for (i = 0; i < 100; i++) + fast_memcpy_MMX(marr1, marr2, ARR_SIZE - 16); + v2 = read_tsc(); + t = GetTimer() - t; + // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t + printf("MMX: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, + 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t); +#endif + +#if HAVE_AMD3DNOW + t = GetTimer(); + v1 = read_tsc(); + for (i = 0; i < 100; i++) + fast_memcpy_3DNow(marr1, marr2, ARR_SIZE - 16); v2 = read_tsc(); t = GetTimer() - t; // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t - printf(NAME ": CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, + printf("3DNow!: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, + 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t); +#endif + +#if HAVE_MMX2 + t = GetTimer(); + v1 = read_tsc(); + for (i = 0; i < 100; i++) + fast_memcpy_MMX2(marr1, marr2, ARR_SIZE - 16); + v2 = read_tsc(); + t = GetTimer() - t; + // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t + printf("MMX2: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t); +#endif + +#if HAVE_SSE + t = GetTimer(); + v1 = read_tsc(); + for (i = 0; i < 100; i++) + fast_memcpy_SSE(marr1, marr2, ARR_SIZE - 16); + v2 = read_tsc(); + t = GetTimer() - t; + // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t + printf("SSE: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, + 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t); +#endif + return 0; }