add memcpy_rep_movsb
[rofl0r-memcpy-test.git] / memcpy_test.c
blob13fe4eebca5587807d589db35ced62b6a3bab663
1 #include <stdio.h>
2 #include <string.h>
3 #include <unistd.h>
4 #include <stdlib.h>
6 #include <sys/time.h>
8 unsigned long long microsecpassed(struct timeval* t) {
9 struct timeval now, diff;
10 gettimeofday(&now, NULL);
11 timersub(&now, t, &diff);
12 return (diff.tv_sec * 1000 * 1000) + diff.tv_usec;
15 #if defined(__i386__)
17 inline unsigned long long rdtsc() {
18 unsigned int lo, hi;
19 __asm__ volatile (
20 "cpuid \n"
21 "rdtsc"
22 : "=a"(lo), "=d"(hi) /* outputs */
23 : "a"(0) /* inputs */
24 : "%ebx", "%ecx"); /* clobbers*/
25 return ((unsigned long long)lo) | (((unsigned long long)hi) << 32);
27 #elif 0
28 static inline unsigned long long rdtsc(void) {
29 unsigned long long hi, lo;
30 __asm__ __volatile__(
31 "xorl %%eax, %%eax;\n\t"
32 "push %%ebx;"
33 "cpuid\n\t"
35 :"%eax", "%ebx", "%ecx", "%edx");
36 __asm__ __volatile__(
37 "rdtsc;"
38 : "=a" (lo), "=d" (hi)
39 ::);
40 __asm__ __volatile__(
41 "xorl %%eax, %%eax; cpuid;"
42 "pop %%ebx;"
44 :"%eax", "%ebx", "%ecx", "%edx");
46 return (unsigned long long)hi << 32 | lo;
49 #elif 0
50 static inline unsigned long long rdtsc(void)
52 unsigned long long int x;
53 __asm__ volatile (".byte 0x0f, 0x31" : "=A" (x));
54 return x;
56 #elif defined(__x86_64__)
57 /*static inline unsigned long long rdtsc(void) {
58 unsigned long long hi, lo;
59 __asm__ __volatile__(
60 "xorl %%eax, %%eax;\n\t"
61 "push %%rbx;"
62 "cpuid\n\t"
64 :"%rax", "%rbx", "%rcx", "%rdx");
65 __asm__ __volatile__(
66 "rdtsc;"
67 : "=a" (lo), "=d" (hi)
68 ::);
69 __asm__ __volatile__(
70 "xorl %%eax, %%eax; cpuid;"
71 "pop %%rbx;"
73 :"%rax", "%rbx", "%rcx", "%rdx");
75 return (unsigned long long)hi << 32 | lo;
77 #elif 0
79 static inline unsigned long long rdtsc(void)
81 unsigned hi, lo;
82 __asm__ __volatile__ ("rdtsc" : "=a"(lo), "=d"(hi));
83 return ( (unsigned long long)lo)|( ((unsigned long long)hi)<<32 );
86 #elif defined(__powerpc__)
89 static __inline__ unsigned long long rdtsc(void)
91 unsigned long long int result=0;
92 unsigned long int upper, lower,tmp;
93 __asm__ volatile(
94 "0: \n"
95 "\tmftbu %0 \n"
96 "\tmftb %1 \n"
97 "\tmftbu %2 \n"
98 "\tcmpw %2,%0 \n"
99 "\tbne 0b \n"
100 : "=r"(upper),"=r"(lower),"=r"(tmp)
102 result = upper;
103 result = result<<32;
104 result = result|lower;
106 return(result);
109 #endif
112 extern void *mymemcpy(void *dest, const void *src, size_t n);
113 extern void fillmem(void* mem, size_t size);
114 extern int dummy_access(void* mem, size_t size);
117 static long long counter = 0;
118 static inline void test(size_t size, char* src, char* dst) {
119 long result;
120 long counter_mod_4;
122 if(size > 4)
123 counter_mod_4 = counter % 4;
124 else
125 counter_mod_4 = 0;
127 struct timeval start;
129 fillmem(src, size); // dummy call so that gcc can not guess the mem contents.
130 mymemcpy(dst + counter_mod_4, src + counter_mod_4, size - counter_mod_4);
131 if(dummy_access(dst, size) == 1) abort(); // dummy call so that gcc can not assume mem content is never accessed.
133 counter ++;
134 return;
137 int main(int argc, char** argv) {
138 #define K(X) (1024 * X)
139 #define ARRAY_SIZE(X) (sizeof(X) / sizeof((X)[0]))
141 size_t testsizes[] = {
142 3 ,4 ,5,
144 15, 16,
145 23, 24, 25,
146 31, 32, 33,
147 63, 64, 65,
148 95, 96, 97,
149 127, 128, 129,
150 159, 160, 161,
151 191, 192, 193,
152 224,
153 255, 256, 257,
154 288, 320, 348,
155 383, 384, 385,
156 416, 448, 476,
157 511, 512, 513,
158 548, 640, 732,
159 767, 768, 769,
160 1023, 1024, 1025,
161 1535, 1536, 1537,
162 2048, 4096, 8192,
163 16384, 32768, 65536,
164 K(128), K(160), K(192), K(208), K(216), K(220), K(224), K(240),
165 K(256), K(384), K(512),
166 K(1024), K(1280), K(1536), K(1792), K(2048),
167 K(2560), K(3072), K(3584), K(4096),
168 K(4352), K(4608), K(4864),
169 K(5120), K(5376), K(5632), K(5888),
170 K(6144), K(6400), K(6656), K(6912), K(7168),
171 K(8192), K(16384), K(32768), K(65536),
174 unsigned long x, y, ymax;
175 unsigned long long smallest;
176 double res;
177 FILE *f = fopen("/dev/urandom", "r");
178 for (x = 0 ; x < ARRAY_SIZE(testsizes); x++) {
179 char *src, *dst;
180 unsigned long long ticka = rdtsc(), tickb;
182 //smallest = 0xffffffff;
183 ymax = testsizes[x] > K(100) ? 100 : 1000;
185 src = malloc(testsizes[x]);
186 dst = malloc(testsizes[x]);
187 fread(src, 1, testsizes[x], f);
188 /* check that the function actually works */
189 mymemcpy(dst, src, testsizes[x]);
190 if(memcmp(src, dst, testsizes[x])) {
191 fprintf(stderr, "warning: %s didn't pass self-test with size %zu!\n", FILENAME, testsizes[x]);
194 for(y = 0; y < ymax; y++) {
195 test(testsizes[x], src, dst);
196 //if(res < smallest) smallest = res;
198 tickb = rdtsc();
199 res = (tickb - ticka) / (ymax*1.0f);
201 res /= ymax;
202 fprintf(stdout, "%-8zu\t%.4f ticks\n", testsizes[x], res);
203 fflush(stdout);
204 free(src);
205 free(dst);
207 fclose(f);
208 return 0;