comparison TOOLS/fastmemcpybench.c @ 28903:1d34931ae1e6

Replace duplicated code by a macro.
author diego
date Thu, 12 Mar 2009 11:55:26 +0000
parents b29169fccda9
children b573c7c7173b
comparison
equal deleted inserted replaced
28902:c950f0c545f6 28903:1d34931ae1e6
192 marr2 = &arr2[0]; 192 marr2 = &arr2[0];
193 193
194 for (i = 0; i < ARR_SIZE - 16; i++) 194 for (i = 0; i < ARR_SIZE - 16; i++)
195 marr1[i] = marr2[i] = i; 195 marr1[i] = marr2[i] = i;
196 196
197 t = GetTimer(); 197 #define testblock(func, name) \
198 v1 = read_tsc(); 198 t = GetTimer(); \
199 for (i = 0; i < 100; i++) 199 v1 = read_tsc(); \
200 memcpy(marr1, marr2, ARR_SIZE - 16); 200 for (i = 0; i < 100; i++) \
201 v2 = read_tsc(); 201 func(marr1, marr2, ARR_SIZE - 16); \
202 t = GetTimer() - t; 202 v2 = read_tsc(); \
203 // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t 203 t = GetTimer() - t; \
204 printf("libc: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, 204 /* ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t */ \
205 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t); 205 printf(name "CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t, \
206 100000000.0f / (float)t, (float)ARR_SIZE*95.36743f / (float)t);
207
208 testblock(memcpy, "libc: ");
206 209
207 #if HAVE_MMX 210 #if HAVE_MMX
208 t = GetTimer(); 211 testblock(fast_memcpy_MMX, "MMX: ");
209 v1 = read_tsc();
210 for (i = 0; i < 100; i++)
211 fast_memcpy_MMX(marr1, marr2, ARR_SIZE - 16);
212 v2 = read_tsc();
213 t = GetTimer() - t;
214 // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
215 printf("MMX: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
216 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
217 #endif 212 #endif
218 213
219 #if HAVE_AMD3DNOW 214 #if HAVE_AMD3DNOW
220 t = GetTimer(); 215 testblock(fast_memcpy_3DNow, "3DNow!: ");
221 v1 = read_tsc();
222 for (i = 0; i < 100; i++)
223 fast_memcpy_3DNow(marr1, marr2, ARR_SIZE - 16);
224 v2 = read_tsc();
225 t = GetTimer() - t;
226 // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
227 printf("3DNow!: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
228 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
229 #endif 216 #endif
230 217
231 #if HAVE_MMX2 218 #if HAVE_MMX2
232 t = GetTimer(); 219 testblock(fast_memcpy_MMX2, "MMX2: ");
233 v1 = read_tsc();
234 for (i = 0; i < 100; i++)
235 fast_memcpy_MMX2(marr1, marr2, ARR_SIZE - 16);
236 v2 = read_tsc();
237 t = GetTimer() - t;
238 // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
239 printf("MMX2: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
240 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
241 #endif 220 #endif
242 221
243 #if HAVE_SSE 222 #if HAVE_SSE
244 t = GetTimer(); 223 testblock(fast_memcpy_SSE, "SSE: ");
245 v1 = read_tsc();
246 for (i = 0; i < 100; i++)
247 fast_memcpy_SSE(marr1, marr2, ARR_SIZE - 16);
248 v2 = read_tsc();
249 t = GetTimer() - t;
250 // ARR_SIZE*100 / (1024*1024) / (t/1000000) = ARR_SIZE*95.36743 / t
251 printf("SSE: CPU clocks=%llu = %dus (%5.3ffps) %5.1fMB/s\n", v2-v1, t,
252 100000000.0f/(float)t, (float)ARR_SIZE*95.36743f/(float)t);
253 #endif 224 #endif
254 225
255 return 0; 226 return 0;
256 } 227 }