8 unsigned long long microsecpassed(struct timeval
* t
) {
9 struct timeval now
, diff
;
10 gettimeofday(&now
, NULL
);
11 timersub(&now
, t
, &diff
);
12 return (diff
.tv_sec
* 1000 * 1000) + diff
.tv_usec
;
17 inline unsigned long long rdtsc() {
22 : "=a"(lo
), "=d"(hi
) /* outputs */
24 : "%ebx", "%ecx"); /* clobbers*/
25 return ((unsigned long long)lo
) | (((unsigned long long)hi
) << 32);
28 static inline unsigned long long rdtsc(void) {
29 unsigned long long hi
, lo
;
31 "xorl %%eax, %%eax;\n\t"
35 :"%eax", "%ebx", "%ecx", "%edx");
38 : "=a" (lo
), "=d" (hi
)
41 "xorl %%eax, %%eax; cpuid;"
44 :"%eax", "%ebx", "%ecx", "%edx");
46 return (unsigned long long)hi
<< 32 | lo
;
50 static inline unsigned long long rdtsc(void)
52 unsigned long long int x
;
53 __asm__
volatile (".byte 0x0f, 0x31" : "=A" (x
));
56 #elif defined(__x86_64__)
57 /*static inline unsigned long long rdtsc(void) {
58 unsigned long long hi, lo;
60 "xorl %%eax, %%eax;\n\t"
64 :"%rax", "%rbx", "%rcx", "%rdx");
67 : "=a" (lo), "=d" (hi)
70 "xorl %%eax, %%eax; cpuid;"
73 :"%rax", "%rbx", "%rcx", "%rdx");
75 return (unsigned long long)hi << 32 | lo;
79 static inline unsigned long long rdtsc(void)
82 __asm__
__volatile__ ("rdtsc" : "=a"(lo
), "=d"(hi
));
83 return ( (unsigned long long)lo
)|( ((unsigned long long)hi
)<<32 );
86 #elif defined(__powerpc__)
89 static __inline__
unsigned long long rdtsc(void)
91 unsigned long long int result
=0;
92 unsigned long int upper
, lower
,tmp
;
100 : "=r"(upper
),"=r"(lower
),"=r"(tmp
)
104 result
= result
|lower
;
112 extern void *mymemcpy(void *dest
, const void *src
, size_t n
);
113 extern void fillmem(void* mem
, size_t size
);
114 extern int dummy_access(void* mem
, size_t size
);
117 static long long counter
= 0;
118 static inline void test(size_t size
, char* src
, char* dst
) {
123 counter_mod_4
= counter
% 4;
127 struct timeval start
;
129 fillmem(src
, size
); // dummy call so that gcc can not guess the mem contents.
130 mymemcpy(dst
+ counter_mod_4
, src
+ counter_mod_4
, size
- counter_mod_4
);
131 if(dummy_access(dst
, size
) == 1) abort(); // dummy call so that gcc can not assume mem content is never accessed.
137 int main(int argc
, char** argv
) {
138 #define K(X) (1024 * X)
139 #define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))
141 size_t testsizes
[] = {
164 K(128), K(160), K(192), K(208), K(216), K(220), K(224), K(240),
165 K(256), K(384), K(512),
166 K(1024), K(1280), K(1536), K(1792), K(2048),
167 K(2560), K(3072), K(3584), K(4096),
168 K(4352), K(4608), K(4864),
169 K(5120), K(5376), K(5632), K(5888),
170 K(6144), K(6400), K(6656), K(6912), K(7168),
171 K(8192), K(16384), K(32768), K(65536),
174 unsigned long x
, y
, ymax
;
175 unsigned long long smallest
;
177 FILE *f
= fopen("/dev/urandom", "r");
178 for (x
= 0 ; x
< ARRAY_SIZE(testsizes
); x
++) {
180 unsigned long long ticka
= rdtsc(), tickb
;
182 //smallest = 0xffffffff;
183 ymax
= testsizes
[x
] > K(100) ? 100 : 1000;
185 src
= malloc(testsizes
[x
]);
186 dst
= malloc(testsizes
[x
]);
187 fread(src
, 1, testsizes
[x
], f
);
188 /* check that the function actually works */
189 mymemcpy(dst
, src
, testsizes
[x
]);
190 if(memcmp(src
, dst
, testsizes
[x
])) {
191 fprintf(stderr
, "warning: %s didn't pass self-test with size %zu!\n", FILENAME
, testsizes
[x
]);
194 for(y
= 0; y
< ymax
; y
++) {
195 test(testsizes
[x
], src
, dst
);
196 //if(res < smallest) smallest = res;
199 res
= (tickb
- ticka
) / (ymax
*1.0f
);
202 fprintf(stdout
, "%-8zu\t%.4f ticks\n", testsizes
[x
], res
);