Mercurial > mplayer.hg
annotate TOOLS/fastmemcpybench.c @ 11993:4e2d99dbef78
spp soft thresholding patch by (James Crowson <jbcrowso at ncsu dot edu>)
author | michael |
---|---|
date | Tue, 24 Feb 2004 11:23:27 +0000 |
parents | c048ece008c0 |
children | 40391656ae23 |
rev | line source |
---|---|
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
1 /* |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
2 fastmemcpybench.c used to benchmark fastmemcpy.h code from libvo. |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
3 |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
4 Note: this code can not be used on PentMMX-PII because they contain |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
5 a bug in rdtsc. For Intel processors since P6(PII) rdpmc should be used |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
6 instead. For PIII it's disputable and seems bug was fixed but I don't |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
7 tested it. |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
8 */ |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
9 |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
10 #include <stdio.h> |
572 | 11 #include <stdlib.h> |
12 #include <string.h> | |
13 #include <sys/ioctl.h> | |
14 #include <unistd.h> | |
15 #include <fcntl.h> | |
16 #include <sys/mman.h> | |
17 #include <sys/time.h> | |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
18 |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
19 #include "../libvo/fastmemcpy.h" |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
20 |
572 | 21 //#define ARR_SIZE 100000 |
22 #define ARR_SIZE (1024*768*2) | |
23 | |
24 #ifdef HAVE_MGA | |
25 | |
26 #include "../drivers/mga_vid.h" | |
27 | |
28 static int mga_next_frame=0; | |
29 | |
30 static mga_vid_config_t mga_vid_config; | |
31 static unsigned char* frame=NULL; | |
32 static int f; | |
33 | |
34 static int mga_init(){ | |
35 char *frame_mem; | |
36 | |
37 f = open("/dev/mga_vid",O_RDWR); | |
38 if(f == -1) | |
39 { | |
40 fprintf(stderr,"Couldn't open /dev/mga_vid\n"); | |
41 return(-1); | |
42 } | |
43 | |
44 mga_vid_config.num_frames=1; | |
45 mga_vid_config.frame_size=ARR_SIZE; | |
46 mga_vid_config.format=MGA_VID_FORMAT_YUY2; | |
47 | |
48 mga_vid_config.colkey_on=0; | |
49 mga_vid_config.src_width = 640; | |
50 mga_vid_config.src_height= 480; | |
51 mga_vid_config.dest_width = 320; | |
52 mga_vid_config.dest_height= 200; | |
53 mga_vid_config.x_org= 0; | |
54 mga_vid_config.y_org= 0; | |
55 | |
56 mga_vid_config.version=MGA_VID_VERSION; | |
57 if (ioctl(f,MGA_VID_CONFIG,&mga_vid_config)) | |
58 { | |
59 perror("Error in mga_vid_config ioctl()"); | |
60 printf("Your mga_vid driver version is incompatible with this MPlayer version!\n"); | |
61 exit(1); | |
62 } | |
63 ioctl(f,MGA_VID_ON,0); | |
64 | |
65 frame = (char*)mmap(0,mga_vid_config.frame_size*mga_vid_config.num_frames,PROT_WRITE,MAP_SHARED,f,0); | |
66 if(!frame){ | |
67 printf("Can't mmap mga frame\n"); | |
68 exit(1); | |
69 } | |
70 | |
71 //clear the buffer | |
72 //memset(frames[0],0x80,mga_vid_config.frame_size*mga_vid_config.num_frames); | |
73 | |
74 return 0; | |
75 | |
76 } | |
77 | |
78 #endif | |
79 | |
80 // Returns current time in microseconds | |
81 unsigned int GetTimer(){ | |
82 struct timeval tv; | |
83 struct timezone tz; | |
84 // float s; | |
85 gettimeofday(&tv,&tz); | |
86 // s=tv.tv_usec;s*=0.000001;s+=tv.tv_sec; | |
87 return (tv.tv_sec*1000000+tv.tv_usec); | |
88 } | |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
89 |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
90 static inline unsigned long long int read_tsc( void ) |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
91 { |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
92 unsigned long long int retval; |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
93 __asm __volatile ("rdtsc":"=A"(retval)::"memory"); |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
94 return retval; |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
95 } |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
96 |
3076
bfc881c0e591
newly allocated memory seems to point to only 1 zero filled 4k page or something so there is a near 100% cache hit rate ... not very realistic, writeing something in the source array fixes that so the benchmark scores are meaningfull now
michael
parents:
687
diff
changeset
|
97 unsigned char __attribute__((aligned(4096)))arr1[ARR_SIZE],arr2[ARR_SIZE]; |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
98 |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
99 int main( void ) |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
100 { |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
101 unsigned long long int v1,v2; |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
102 unsigned char * marr1,*marr2; |
572 | 103 int i; |
104 unsigned int t; | |
105 #ifdef HAVE_MGA | |
106 mga_init(); | |
107 marr1 = &frame[3]; | |
108 #else | |
3081 | 109 marr1 = &arr1[3]; |
572 | 110 #endif |
3081 | 111 marr2 = &arr2[9]; |
3076
bfc881c0e591
newly allocated memory seems to point to only 1 zero filled 4k page or something so there is a near 100% cache hit rate ... not very realistic, writeing something in the source array fixes that so the benchmark scores are meaningfull now
michael
parents:
687
diff
changeset
|
112 |
3082 | 113 for(i=0; i<ARR_SIZE-16; i++) marr1[i] = marr2[i] = i; |
3076
bfc881c0e591
newly allocated memory seems to point to only 1 zero filled 4k page or something so there is a near 100% cache hit rate ... not very realistic, writeing something in the source array fixes that so the benchmark scores are meaningfull now
michael
parents:
687
diff
changeset
|
114 |
572 | 115 t=GetTimer(); |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
116 v1 = read_tsc(); |
572 | 117 for(i=0;i<100;i++) memcpy(marr1,marr2,ARR_SIZE-16); |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
118 v2 = read_tsc(); |
572 | 119 t=GetTimer()-t; |
120 // ARR_SIZE*100/(1024*1024)/(t/1000000) = ARR_SIZE*95.36743/t | |
687 | 121 printf(NAME": cpu clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n",v2-v1,t,100000000.0f/(float)t,(float)ARR_SIZE*95.36743f/(float)t); |
562
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
122 return 0; |
312fee2a6816
Initial release, used to benchmark fastmemcpy.h code from libvo.
atmosfear
parents:
diff
changeset
|
123 } |