Build system improvements
[ustl.git] / bvt / bench.cc
blob5add4470d690da5ec94cfc0d872b7db96d010a30
1 // This file is part of the ustl library, an STL implementation.
2 //
3 // Copyright (C) 2005 by Mike Sharov <msharov@users.sourceforge.net>
4 // This file is free software, distributed under the MIT License.
5 //
7 #include <ustl.h>
8 #include <time.h>
9 using namespace ustl;
11 //----------------------------------------------------------------------
12 // Copy functions
13 //----------------------------------------------------------------------
15 #if __i386__ || __x86_64__
16 extern "C" void movsb_copy (const char* src, size_t nBytes, char* dest)
18 asm volatile (
19 "cld\n\trep\n\tmovsb"
20 : "=&S"(src), "=&D"(dest)
21 : "0"(src), "1"(dest), "c"(nBytes)
22 : "memory");
25 extern "C" void movsd_copy (const char* src, size_t nBytes, char* dest)
27 asm volatile (
28 "cld\n\trep\n\tmovsl"
29 : "=&S"(src), "=&D"(dest)
30 : "0"(src), "1"(dest), "c"(nBytes / 4)
31 : "memory");
34 extern "C" void risc_copy (const char* src, size_t nBytes, char* dest)
36 unsigned long* ldest ((unsigned long*) dest);
37 const unsigned long* lsrc ((const unsigned long*) src);
38 nBytes /= sizeof(*lsrc);
39 do {
40 *ldest++ = *lsrc++;
41 } while (--nBytes);
44 extern "C" void unroll_copy (const char* src, size_t nBytes, char* dest)
46 unsigned long* ldest ((unsigned long*) dest);
47 const unsigned long* lsrc ((const unsigned long*) src);
48 nBytes /= 4 * sizeof(unsigned long);
49 do {
50 ldest[0] = lsrc[0];
51 ldest[1] = lsrc[1];
52 ldest[2] = lsrc[2];
53 ldest[3] = lsrc[3];
54 ldest += 4;
55 lsrc += 4;
56 } while (--nBytes);
59 #if CPU_HAS_MMX
60 extern "C" void mmx_copy (const char* src, size_t nBytes, char* dest)
62 nBytes /= 32;
63 do {
64 prefetch (src + 512, 0, 0);
65 asm (
66 "movq %4, %%mm0 \n\t"
67 "movq %5, %%mm1 \n\t"
68 "movq %6, %%mm2 \n\t"
69 "movq %7, %%mm3 \n\t"
70 "movq %%mm0, %0 \n\t"
71 "movq %%mm1, %1 \n\t"
72 "movq %%mm2, %2 \n\t"
73 "movq %%mm3, %3"
74 : "=m"(dest[0]), "=m"(dest[8]), "=m"(dest[16]), "=m"(dest[24])
75 : "m"(src[0]), "m"(src[8]), "m"(src[16]), "m"(src[24])
76 : "mm0", "mm1", "mm2", "mm3", "st", "st(1)", "st(2)", "st(3)");
77 src += 32;
78 dest += 32;
79 } while (--nBytes);
80 simd::reset_mmx();
82 #endif // CPU_HAS_MMX
84 #if CPU_HAS_SSE
85 extern "C" void sse_copy (const char* src, size_t nBytes, char* dest)
87 const size_t nHeadBytes = min (nBytes, Align(uintptr_t(src), 16U) - uintptr_t(src));
88 for (uoff_t i = 0; i < nHeadBytes; ++ i)
89 *dest++ = *src++;
90 nBytes -= nHeadBytes;
91 if (!(uintptr_t(dest) % 16)) {
92 const size_t nMiddleBlocks = nBytes / 32;
93 for (uoff_t i = 0; i < nMiddleBlocks; ++ i) {
94 prefetch (src + 512, 0, 0);
95 asm (
96 "movaps\t%2, %%xmm0 \n\t"
97 "movaps\t%3, %%xmm1 \n\t"
98 "movntps\t%%xmm0, %0 \n\t"
99 "movntps\t%%xmm1, %1"
100 : "=m"(dest[0]), "=m"(dest[16])
101 : "m"(src[0]), "m"(src[16])
102 : "xmm0", "xmm1");
103 src += 32;
104 dest += 32;
106 nBytes %= 32;
108 for (uoff_t i = 0; i < nBytes; ++ i)
109 *dest++ = *src++;
111 #endif // CPU_HAS_SSE
112 #endif // __i386__
114 extern "C" void memcpy_copy (const char* src, size_t nBytes, char* dest)
116 memcpy (dest, src, nBytes);
119 template <typename CopyFunction>
120 void TestCopyFunction (const char* name, CopyFunction pfn)
122 const uoff_t misalignment = 0;
123 const uoff_t headBytes = 0;
124 const uoff_t tailBytes = 0;
126 const size_t nIter = 128;
127 const size_t nBytes = 1024 * 1024 + misalignment;
129 string buf1 (nBytes), buf2 (nBytes);
130 iota (buf1.begin(), buf1.end(), '\x1');
131 fill (buf2, 0);
132 const clock_t first = clock();
133 for (uoff_t i = 0; i < nIter; ++ i)
134 (*pfn)(buf1.cdata() + headBytes, nBytes - headBytes - tailBytes, buf2.data() + headBytes + misalignment);
135 clock_t last = clock();
136 last += (last == first);
137 const size_t mbps = nIter * CLOCKS_PER_SEC / (last - first);
138 cout << name << " transfer rate is " << mbps << " Mbps, data is ";
139 size_t nBad = 0;
140 for (uoff_t i = headBytes; i < buf1.size() - tailBytes; ++ i)
141 nBad += (buf1[i] != buf2[i + misalignment]);
142 if (!nBad)
143 cout << "GOOD" << endl;
144 else {
145 cout << "BAD" << endl;
146 for (uoff_t i = headBytes; i < buf1.size() - tailBytes; ++ i)
147 if (buf1[i] != buf2[i + misalignment])
148 cout << "\t\t" << i << "\tbuf1: " << (int) buf1[i] << ", buf2: " << (int) buf2[i + misalignment] << endl;
150 cout.flush();
153 //----------------------------------------------------------------------
154 // Fill functions
155 //----------------------------------------------------------------------
157 extern "C" void memset_fill (char* dest, size_t nBytes, char v)
159 memset (dest, v, nBytes);
162 #if __i386__ || __x86_64__
163 extern "C" void stosb_fill (char* dest, size_t nBytes, char v)
165 asm volatile (
166 "cld\n\trep\n\tstosb\n\t"
167 : "=&D"(dest)
168 : "0"(dest), "a"(v), "c"(nBytes)
169 : "memory");
172 extern "C" void stosd_fill (char* dest, size_t nBytes, char v)
174 unsigned int lv;
175 pack_type (v, lv);
176 asm volatile (
177 "cld\n\trep\n\tstosl\n\t"
178 : "=&D"(dest)
179 : "0"(dest), "a"(lv), "c"(nBytes / sizeof(lv))
180 : "memory");
183 extern "C" void risc_fill (char* dest, size_t nBytes, char v)
185 unsigned long lv;
186 pack_type (v, lv);
187 unsigned long* ldest ((unsigned long*) dest);
188 nBytes /= sizeof(lv);
189 do {
190 *ldest++ = lv;
191 } while (--nBytes);
194 extern "C" void unroll_fill (char* dest, size_t nBytes, char v)
196 unsigned long lv;
197 pack_type (v, lv);
198 unsigned long* ldest ((unsigned long*) dest);
199 nBytes /= 4 * sizeof(lv);
200 do {
201 ldest[0] = lv;
202 ldest[1] = lv;
203 ldest[2] = lv;
204 ldest[3] = lv;
205 ldest += 4;
206 } while (--nBytes);
209 #if CPU_HAS_MMX
210 extern "C" void mmx_fill (char* dest, size_t nBytes, char v)
212 prefetch (dest + 512, 1, 0);
213 asm volatile (
214 "movd %0, %%mm0 \n\t"
215 "punpcklbw %%mm0, %%mm0 \n\t"
216 "punpcklwd %%mm0, %%mm0 \n\t"
217 "punpckldq %%mm0, %%mm0"
218 ::"r"(uint32_t(v))
219 : "mm0", "st");
220 const size_t nBlocks (nBytes / 32);
221 for (uoff_t i = 0; i < nBlocks; ++ i) {
222 asm volatile (
223 "movq %%mm0, %0 \n\t"
224 "movq %%mm0, %1 \n\t"
225 "movq %%mm0, %2 \n\t"
226 "movq %%mm0, %3"
227 : "=m"(dest[0]), "=m"(dest[8]), "=m"(dest[16]), "=m"(dest[24]));
228 dest += 32;
230 simd::reset_mmx();
232 #endif // CPU_HAS_MMX
233 #endif // __i386__
235 template <typename FillFunction>
236 void TestFillFunction (const char* name, FillFunction pfn)
238 const size_t nIter = 256;
239 const size_t nBytes = 1024 * 1024;
240 string buf1 (nBytes), buf2 (nBytes);
241 iota (buf1.begin(), buf1.end(), '\x1');
242 fill (buf2, 42);
243 clock_t first = clock();
244 for (uoff_t i = 0; i < nIter; ++ i)
245 (*pfn)(buf1.data(), nBytes, char(42));
246 clock_t last = clock();
247 last += (last == first);
248 const size_t mbps = nIter * CLOCKS_PER_SEC / (last - first);
249 cout << name << " transfer rate is " << mbps << " Mbps, data is ";
250 if (buf1 == buf2)
251 cout << "GOOD" << endl;
252 else
253 cout << "BAD" << endl;
254 cout.flush();
257 //----------------------------------------------------------------------
259 int main (void)
261 cout << "Testing fill" << endl;
262 cout << "---------------------------------------------------------" << endl;
263 TestFillFunction ("fill_n\t\t", &fill_n<char*, char>);
264 #if __i386__ || __x86_64__
265 #if CPU_HAS_MMX && HAVE_INT64_T
266 TestFillFunction ("mmx_fill\t", &mmx_fill);
267 #endif
268 TestFillFunction ("stosb_fill\t", &stosb_fill);
269 TestFillFunction ("stosd_fill\t", &stosd_fill);
270 TestFillFunction ("unroll_fill\t", &unroll_fill);
271 TestFillFunction ("risc_fill\t", &risc_fill);
272 #endif
273 TestFillFunction ("memset_fill\t", &memset_fill);
275 cout << endl;
276 cout << "Testing copy" << endl;
277 cout << "---------------------------------------------------------" << endl;
278 TestCopyFunction ("copy_n\t\t", &copy_n<const char*, char*>);
279 #if __i386__ || __x86_64__
280 #if CPU_HAS_SSE
281 TestCopyFunction ("sse_copy\t", &sse_copy);
282 #endif
283 #if CPU_HAS_MMX
284 TestCopyFunction ("mmx_copy\t", &mmx_copy);
285 #endif
286 TestCopyFunction ("movsb_copy\t", &movsb_copy);
287 TestCopyFunction ("movsd_copy\t", &movsd_copy);
288 TestCopyFunction ("risc_copy\t", &risc_copy);
289 TestCopyFunction ("unroll_copy\t", &unroll_copy);
290 #endif
291 TestCopyFunction ("memcpy_copy\t", &memcpy_copy);
293 return (EXIT_SUCCESS);