changeset 28894:b29169fccda9

Fix and restructure fastmemcpybench. It is now one binary that runs all available memcpy variants and prints benchmark results about them.
author diego
date Tue, 10 Mar 2009 10:05:09 +0000
parents 33a7261a0c30
children 061893d7d0c3
files Makefile TOOLS/fastmem.sh TOOLS/fastmemcpybench.c
diffstat 3 files changed, 140 insertions(+), 45 deletions(-) [+]
line wrap: on
line diff
--- a/Makefile	Tue Mar 10 02:21:49 2009 +0000
+++ b/Makefile	Tue Mar 10 10:05:09 2009 +0000
@@ -986,7 +986,7 @@
 testsclean:
 	-rm -f $(foreach file,$(TESTS),$(call ADD_ALL_EXESUFS,$(file)))
 
-TOOLS = $(addprefix TOOLS/,alaw-gen asfinfo avi-fix avisubdump compare dump_mp4 movinfo netstream subrip vivodump)
+TOOLS = $(addprefix TOOLS/,alaw-gen asfinfo avi-fix avisubdump compare dump_mp4 fastmemcpybench movinfo netstream subrip vivodump)
 
 ifdef ARCH_X86
 TOOLS += TOOLS/modify_reg
@@ -999,7 +999,7 @@
 
 toolsclean:
 	-rm -f $(foreach file,$(ALLTOOLS),$(call ADD_ALL_EXESUFS,$(file)))
-	-rm -f TOOLS/fastmem-* TOOLS/realcodecs/*.so.6.0
+	-rm -f TOOLS/realcodecs/*.so.6.0
 
 TOOLS/bmovl-test$(EXESUF): -lSDL_image
 
@@ -1016,27 +1016,11 @@
 TOOLS/netstream$(EXESUF) TOOLS/vivodump$(EXESUF): $(subst mplayer.o,mplayer-nomain.o,$(OBJS_MPLAYER)) $(filter-out %mencoder.o,$(OBJS_MENCODER)) $(OBJS_COMMON) $(COMMON_LIBS)
 	$(CC) $(CFLAGS) -o $@ $^ $(EXTRALIBS_MPLAYER) $(EXTRALIBS_MENCODER) $(COMMON_LDFLAGS)
 
-TOOLS/fastmem-c$(EXESUF):       CFLAGS += -DHAVE_MMX=0 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"C\"
-TOOLS/fastmem-mmx$(EXESUF):     CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MMX\"
-TOOLS/fastmem-k6$(EXESUF):      CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"K6\"
-TOOLS/fastmem-k7$(EXESUF):      CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=1 -DHAVE_SSE=0 -DNAME=\"K7\"
-TOOLS/fastmem-sse$(EXESUF):     CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=1 -DHAVE_SSE=1 -DNAME=\"SSE\"
-TOOLS/fastmem-mga-mmx$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MGA-MMX\"  -DCONFIG_MGA
-TOOLS/fastmem-mga-k6$(EXESUF):  CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=0 -DHAVE_SSE=0 -DNAME=\"MGA-K6\"   -DCONFIG_MGA
-TOOLS/fastmem-mga-k7$(EXESUF):  CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=1 -DHAVE_MMX2=1 -DHAVE_SSE=0 -DNAME=\"MGA-K7\"   -DCONFIG_MGA
-TOOLS/fastmem-mga-sse$(EXESUF): CFLAGS += -DHAVE_MMX=1 -DHAVE_AMD3DNOW=0 -DHAVE_MMX2=1 -DHAVE_SSE=1 -DNAME=\"MGA-SSE\"  -DCONFIG_MGA
-
-fastmemcpybench: $(addsuffix $(EXESUF),$(addprefix TOOLS/fastmem-,c mmx k6 k7 sse mga-mmx mga-k6 mga-k7 mga-sse))
-
-TOOLS/fastmem-%$(EXESUF): TOOLS/fastmemcpybench.c libvo/aclib.c
-	$(CC) $(CFLAGS) -o $@ $^
-
 REAL_SRCS    = $(wildcard TOOLS/realcodecs/*.c)
 REAL_TARGETS = $(REAL_SRCS:.c=.so.6.0)
 
 realcodecs: $(REAL_TARGETS)
-
-fastmemcpybench realcodecs: CFLAGS += -g
+realcodecs: CFLAGS += -g
 
 %.so.6.0: %.o
 	ld -shared -o $@ $< -ldl -lc
--- a/TOOLS/fastmem.sh	Tue Mar 10 02:21:49 2009 +0000
+++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
@@ -1,21 +0,0 @@
-
-sync
-sleep 2
-./fastmem-k6
-sleep 2
-./fastmem-k7
-sleep 2
-./fastmem-mmx
-sleep 2
-./fastmem-sse
-sleep 2
-./fastmem-c
-sleep 2
-./fastmem2-k6
-sleep 2
-./fastmem2-k7
-sleep 2
-./fastmem2-mmx
-sleep 2
-./fastmem2-sse
-sleep 2
--- a/TOOLS/fastmemcpybench.c	Tue Mar 10 02:21:49 2009 +0000
+++ b/TOOLS/fastmemcpybench.c	Tue Mar 10 10:05:09 2009 +0000
@@ -7,8 +7,6 @@
  * was not confirmed through testing.
 */
 
-/* According to Uoti this code is broken. */
-
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@@ -18,7 +16,92 @@
 #include <sys/mman.h>
 #include <sys/time.h>
 #include <inttypes.h>
-#include "libvo/fastmemcpy.h"
+
+#include "config.h"
+#include "cpudetect.h"
+
+#define BLOCK_SIZE 4096
+#define CONFUSION_FACTOR 0
+
+#if HAVE_MMX
+#define COMPILE_MMX
+#endif
+
+#if HAVE_MMX2
+#define COMPILE_MMX2
+#endif
+
+#if HAVE_AMD3DNOW
+#define COMPILE_AMD3DNOW
+#endif
+
+#if HAVE_SSE
+#define COMPILE_SSE
+#endif
+
+#ifdef COMPILE_MMX
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 0
+#define HAVE_SSE 0
+#define HAVE_SSE2 0
+#define RENAME(a) a ## _MMX
+#include "libvo/aclib_template.c"
+#endif
+
+#ifdef COMPILE_MMX2
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 1
+#define HAVE_AMD3DNOW 0
+#define HAVE_SSE 0
+#define HAVE_SSE2 0
+#define RENAME(a) a ## _MMX2
+#include "libvo/aclib_template.c"
+#endif
+
+#ifdef COMPILE_AMD3DNOW
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 0
+#define HAVE_AMD3DNOW 1
+#define HAVE_SSE 0
+#define HAVE_SSE2 0
+#define RENAME(a) a ## _3DNow
+#include "libvo/aclib_template.c"
+#endif
+
+#ifdef COMPILE_SSE
+#undef RENAME
+#undef HAVE_MMX
+#undef HAVE_MMX2
+#undef HAVE_AMD3DNOW
+#undef HAVE_SSE
+#undef HAVE_SSE2
+#define HAVE_MMX 1
+#define HAVE_MMX2 1
+#define HAVE_AMD3DNOW 0
+#define HAVE_SSE 1
+#define HAVE_SSE2 1
+#define RENAME(a) a ## _SSE
+#include "libvo/aclib_template.c"
+#endif
 
 //#define ARR_SIZE 100000
 #define ARR_SIZE (1024*768*2)
@@ -114,11 +197,60 @@
     t  = GetTimer();
     v1 = read_tsc();
     for (i = 0; i < 100; i++)
-        fast_memcpy(marr1, marr2, ARR_SIZE - 16);
+        memcpy(marr1, marr2, ARR_SIZE - 16);
+    v2 = read_tsc();
+    t  = GetTimer() - t;
+    // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+    printf("libc:   CPU clocks=%llu = %dus  (%5.3ffps)  %5.1fMB/s\n", v2-v1, t,
+           100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+
+#if HAVE_MMX
+    t  = GetTimer();
+    v1 = read_tsc();
+    for (i = 0; i < 100; i++)
+        fast_memcpy_MMX(marr1, marr2, ARR_SIZE - 16);
+    v2 = read_tsc();
+    t  = GetTimer() - t;
+    // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+    printf("MMX:    CPU clocks=%llu = %dus  (%5.3ffps)  %5.1fMB/s\n", v2-v1, t,
+           100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
+#if HAVE_AMD3DNOW
+    t  = GetTimer();
+    v1 = read_tsc();
+    for (i = 0; i < 100; i++)
+        fast_memcpy_3DNow(marr1, marr2, ARR_SIZE - 16);
     v2 = read_tsc();
     t  = GetTimer() - t;
     // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
-    printf(NAME ": CPU clocks=%llu = %dus  (%5.3ffps)  %5.1fMB/s\n", v2-v1, t,
+    printf("3DNow!: CPU clocks=%llu = %dus  (%5.3ffps)  %5.1fMB/s\n", v2-v1, t,
+           100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
+#if HAVE_MMX2
+    t  = GetTimer();
+    v1 = read_tsc();
+    for (i = 0; i < 100; i++)
+        fast_memcpy_MMX2(marr1, marr2, ARR_SIZE - 16);
+    v2 = read_tsc();
+    t  = GetTimer() - t;
+    // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+    printf("MMX2:   CPU clocks=%llu = %dus  (%5.3ffps)  %5.1fMB/s\n", v2-v1, t,
            100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
+#if HAVE_SSE
+    t  = GetTimer();
+    v1 = read_tsc();
+    for (i = 0; i < 100; i++)
+        fast_memcpy_SSE(marr1, marr2, ARR_SIZE - 16);
+    v2 = read_tsc();
+    t  = GetTimer() - t;
+    // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
+    printf("SSE:    CPU clocks=%llu = %dus  (%5.3ffps)  %5.1fMB/s\n", v2-v1, t,
+           100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
+#endif
+
     return 0;
 }