Mercurial > mplayer.hg
annotate TOOLS/fastmemcpybench.c @ 15231:e183a591efee
More fixes by The Wanderer
author | rtognimp |
---|---|
date | Wed, 20 Apr 2005 22:07:08 +0000 |
parents | 40391656ae23 |
children | dd053aeb5442 |
rev | line source |
---|---|
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
1 /* |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
2 fastmemcpybench.c used to benchmark fastmemcpy.h code from libvo. |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
3 |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
4 Note: this code can not be used on PentMMX-PII because they contain |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
5 a bug in rdtsc. For Intel processors since P6(PII) rdpmc should be used |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
6 instead. For PIII it's disputable and seems bug was fixed but I don't |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
7 tested it. |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
8 */ |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
9 |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
10 #include <stdio.h> |
572 | 11 #include <stdlib.h> |
12 #include <string.h> | |
13 #include <sys/ioctl.h> | |
14 #include <unistd.h> | |
15 #include <fcntl.h> | |
16 #include <sys/mman.h> | |
17 #include <sys/time.h> | |
13839 | 18 #include <inttypes.h> |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
19 |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
20 #include "../libvo/fastmemcpy.h" |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
21 |
572 | 22 //#define ARR_SIZE 100000 |
23 #define ARR_SIZE (1024*768*2) | |
24 | |
25 #ifdef HAVE_MGA | |
26 | |
27 #include "../drivers/mga_vid.h" | |
28 | |
29 static int mga_next_frame=0; | |
30 | |
31 static mga_vid_config_t mga_vid_config; | |
32 static unsigned char* frame=NULL; | |
33 static int f; | |
34 | |
35 static int mga_init(){ | |
36 char *frame_mem; | |
37 | |
38 f = open("/dev/mga_vid",O_RDWR); | |
39 if(f == -1) | |
40 { | |
41 fprintf(stderr,"Couldn't open /dev/mga_vid\n"); | |
42 return(-1); | |
43 } | |
44 | |
45 mga_vid_config.num_frames=1; | |
46 mga_vid_config.frame_size=ARR_SIZE; | |
47 mga_vid_config.format=MGA_VID_FORMAT_YUY2; | |
48 | |
49 mga_vid_config.colkey_on=0; | |
50 mga_vid_config.src_width = 640; | |
51 mga_vid_config.src_height= 480; | |
52 mga_vid_config.dest_width = 320; | |
53 mga_vid_config.dest_height= 200; | |
54 mga_vid_config.x_org= 0; | |
55 mga_vid_config.y_org= 0; | |
56 | |
57 mga_vid_config.version=MGA_VID_VERSION; | |
58 if (ioctl(f,MGA_VID_CONFIG,&mga_vid_config)) | |
59 { | |
60 perror("Error in mga_vid_config ioctl()"); | |
61 printf("Your mga_vid driver version is incompatible with this MPlayer version!\n"); | |
62 exit(1); | |
63 } | |
64 ioctl(f,MGA_VID_ON,0); | |
65 | |
66 frame = (char*)mmap(0,mga_vid_config.frame_size*mga_vid_config.num_frames,PROT_WRITE,MAP_SHARED,f,0); | |
67 if(!frame){ | |
68 printf("Can't mmap mga frame\n"); | |
69 exit(1); | |
70 } | |
71 | |
72 //clear the buffer | |
73 //memset(frames[0],0x80,mga_vid_config.frame_size*mga_vid_config.num_frames); | |
74 | |
75 return 0; | |
76 | |
77 } | |
78 | |
79 #endif | |
80 | |
81 // Returns current time in microseconds | |
82 unsigned int GetTimer(){ | |
83 struct timeval tv; | |
84 struct timezone tz; | |
85 // float s; | |
86 gettimeofday(&tv,&tz); | |
87 // s=tv.tv_usec;s*=0.000001;s+=tv.tv_sec; | |
88 return (tv.tv_sec*1000000+tv.tv_usec); | |
89 } | |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
90 |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
91 static inline unsigned long long int read_tsc( void ) |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
92 { |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
93 unsigned long long int retval; |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
94 __asm __volatile ("rdtsc":"=A"(retval)::"memory"); |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
95 return retval; |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
96 } |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
97 |
3076
bfc881c0e591
newly allocated memory seems to point to only 1 zero filled 4k page or something so there is a near 100% cache hit rate ... not very realistic, writeing something in the source array fixes that so the benchmark scores are meaningfull now
michael
parents:
687
diff
changeset
|
98 unsigned char __attribute__((aligned(4096)))arr1[ARR_SIZE],arr2[ARR_SIZE]; |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
99 |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
100 int main( void ) |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
101 { |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
102 unsigned long long int v1,v2; |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
103 unsigned char * marr1,*marr2; |
572 | 104 int i; |
105 unsigned int t; | |
106 #ifdef HAVE_MGA | |
107 mga_init(); | |
108 marr1 = &frame[3]; | |
109 #else | |
3081 | 110 marr1 = &arr1[3]; |
572 | 111 #endif |
3081 | 112 marr2 = &arr2[9]; |
3076
bfc881c0e591
newly allocated memory seems to point to only 1 zero filled 4k page or something so there is a near 100% cache hit rate ... not very realistic, writeing something in the source array fixes that so the benchmark scores are meaningfull now
michael
parents:
687
diff
changeset
|
113 |
3082 | 114 for(i=0; i<ARR_SIZE-16; i++) marr1[i] = marr2[i] = i; |
3076
bfc881c0e591
newly allocated memory seems to point to only 1 zero filled 4k page or something so there is a near 100% cache hit rate ... not very realistic, writeing something in the source array fixes that so the benchmark scores are meaningfull now
michael
parents:
687
diff
changeset
|
115 |
572 | 116 t=GetTimer(); |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
117 v1 = read_tsc(); |
572 | 118 for(i=0;i<100;i++) memcpy(marr1,marr2,ARR_SIZE-16); |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
119 v2 = read_tsc(); |
572 | 120 t=GetTimer()-t; |
121 // ARR_SIZE*100/(1024*1024)/(t/1000000) = ARR_SIZE*95.36743/t | |
687 | 122 printf(NAME": cpu clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n",v2-v1,t,100000000.0f/(float)t,(float)ARR_SIZE*95.36743f/(float)t); |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
123 return 0; |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
124 } |