8 unsigned long long microsecpassed(struct timeval
* t
) {
9 struct timeval now
, diff
;
10 gettimeofday(&now
, NULL
);
11 timersub(&now
, t
, &diff
);
12 return (diff
.tv_sec
* 1000 * 1000) + diff
.tv_usec
;
17 inline unsigned long long rdtsc() {
22 : "=a"(lo
), "=d"(hi
) /* outputs */
24 : "%ebx", "%ecx"); /* clobbers*/
25 return ((unsigned long long)lo
) | (((unsigned long long)hi
) << 32);
28 static inline unsigned long long rdtsc(void) {
29 unsigned long long hi
, lo
;
31 "xorl %%eax, %%eax;\n\t"
35 :"%eax", "%ebx", "%ecx", "%edx");
38 : "=a" (lo
), "=d" (hi
)
41 "xorl %%eax, %%eax; cpuid;"
44 :"%eax", "%ebx", "%ecx", "%edx");
46 return (unsigned long long)hi
<< 32 | lo
;
50 static inline unsigned long long rdtsc(void)
52 unsigned long long int x
;
53 __asm__
volatile (".byte 0x0f, 0x31" : "=A" (x
));
56 #elif defined(__x86_64__)
57 /*static inline unsigned long long rdtsc(void) {
58 unsigned long long hi, lo;
60 "xorl %%eax, %%eax;\n\t"
64 :"%rax", "%rbx", "%rcx", "%rdx");
67 : "=a" (lo), "=d" (hi)
70 "xorl %%eax, %%eax; cpuid;"
73 :"%rax", "%rbx", "%rcx", "%rdx");
75 return (unsigned long long)hi << 32 | lo;
79 static inline unsigned long long rdtsc(void)
82 __asm__
__volatile__ ("rdtsc" : "=a"(lo
), "=d"(hi
));
83 return ( (unsigned long long)lo
)|( ((unsigned long long)hi
)<<32 );
86 static inline void serialize () {
87 __asm__
__volatile__ ("cpuid" : : "a"(0) : "ebx", "ecx", "edx" );
91 #elif defined(__powerpc__)
94 static __inline__
unsigned long long rdtsc(void)
96 unsigned long long int result
=0;
97 unsigned long int upper
, lower
,tmp
;
105 : "=r"(upper
),"=r"(lower
),"=r"(tmp
)
109 result
= result
|lower
;
117 extern void *mymemcpy(void *dest
, const void *src
, size_t n
);
118 extern void fillmem(void* mem
, size_t size
);
119 extern int dummy_access(void* mem
, size_t size
);
122 static void lock_affinity(void) {
124 assert(0 == sched_getaffinity(0, sizeof(cs
), &cs
));
125 CPU_ZERO_S(sizeof(cs
), &cs
);
126 CPU_SET_S(0, sizeof(cs
), &cs
);
127 assert(0 == sched_setaffinity(0, sizeof(cs
), &cs
));
130 static inline unsigned long test(size_t size
, char* src
, char* dst
) {
131 unsigned long ticka
, tickb
;
132 fillmem(src
, size
); // dummy call so that gcc can not guess the mem contents.
134 void* p
= mymemcpy(dst
, src
, size
);
137 if(dummy_access(dst
, size
) == 1) abort(); // dummy call so that gcc can not assume mem content is never accessed.
138 return tickb
- ticka
;
141 int main(int argc
, char** argv
) {
142 #define K(X) (1024UL * X)
143 #define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))
144 #define MIN(A,B) ((A) > (B) ? (B) : (A))
146 const size_t testsizes
[] = {
147 0, 1, 2, 3 ,4 ,5, 6, 7,
148 8, 9, 10, 11, 12, 13, 14,
166 1023, 1024, 1025, 1152, 1280, 1408,
170 K(128), K(160), K(192), K(208), K(216), K(220), K(224), K(240),
171 K(256), K(384), K(512),
172 K(1024), K(1280), K(1536), K(1792), K(2048),
173 K(2560), K(3072), K(3584), K(4096),
174 K(4352), K(4608), K(4864),
175 K(5120), K(5376), K(5632), K(5888),
176 K(6144), K(6400), K(6656), K(6912), K(7168),
177 K(8192), K(16384), K(32768), K(65536),
182 unsigned long x
, y
, ymax
;
183 unsigned long long smallest
;
187 for(x
= 1 << 28; x
> 0; --x
);
189 FILE *f
= fopen("/dev/urandom", "r");
190 for (x
= 0 ; x
< ARRAY_SIZE(testsizes
); x
++) {
193 //smallest = 0xffffffff;
194 y
= testsizes
[x
] ? testsizes
[x
] : 1;
195 ymax
= (K(65536)*(100ULL - (MIN(99ULL, ARRAY_SIZE(testsizes
) - x
-1ULL))))/y
;
196 if(testsizes
[x
] >= 64) ymax
*=2;
197 if(testsizes
[x
] >= 1024) ymax
*=2;
199 src
= malloc(testsizes
[x
] + 64);
200 dst
= malloc(testsizes
[x
] + 64);
201 /* check that the function works correctly -
202 the +1 stuff is to get unaligned start offset,
203 the 0xaa/bb stuff to check whether it writes off bounds. */
204 dst
[0] = src
[0] = 0xee;
205 memset(src
+1+testsizes
[x
], 0xbb, 32);
206 memset(dst
+1+testsizes
[x
], 0xaa, 32);
207 fread(src
+1, 1, testsizes
[x
], f
);
208 if(testsizes
[x
] >= 4) {
209 /* make sure we copy into the right direction */
210 src
[1] = 'a'; src
[2] = 'b'; src
[3] = 'c'; src
[4] = 'd';
211 dst
[1] = '0'; dst
[2] = '1'; dst
[3] = '2'; dst
[4] = '3';
213 mymemcpy(dst
+1, src
+1, testsizes
[x
]);
214 memset(src
+1+testsizes
[x
], 0xaa, 32);
215 if(memcmp(src
+1, dst
+1, testsizes
[x
]+32) || (testsizes
[x
] >= 4 && memcmp(dst
+1, "abcd", 4))) {
216 fprintf(stderr
, "warning: %s didn't pass self-test with size %zu!\n", FILENAME
, testsizes
[x
]);
219 unsigned long long curr
, total
, best
= -1ULL;
222 for(y
= 0; y
< ymax
; y
++) {
223 //__builtin_ia32_clflush(src);
224 //__builtin_ia32_clflush(dst);
225 curr
= test(testsizes
[x
], src
, dst
);
226 if(curr
< best
) best
= curr
;
231 fprintf(stdout
, "%-8zu\t%llu\n", testsizes
[x
], best
);