1 // This file is part of the ustl library, an STL implementation.
3 // Copyright (C) 2005 by Mike Sharov <msharov@users.sourceforge.net>
4 // This file is free software, distributed under the MIT License.
11 //----------------------------------------------------------------------
13 //----------------------------------------------------------------------
15 #if __i386__ || __x86_64__
16 extern "C" void movsb_copy (const char* src
, size_t nBytes
, char* dest
)
20 : "=&S"(src
), "=&D"(dest
)
21 : "0"(src
), "1"(dest
), "c"(nBytes
)
25 extern "C" void movsd_copy (const char* src
, size_t nBytes
, char* dest
)
29 : "=&S"(src
), "=&D"(dest
)
30 : "0"(src
), "1"(dest
), "c"(nBytes
/ 4)
34 extern "C" void risc_copy (const char* src
, size_t nBytes
, char* dest
)
36 unsigned long* ldest ((unsigned long*) dest
);
37 const unsigned long* lsrc ((const unsigned long*) src
);
38 nBytes
/= sizeof(*lsrc
);
44 extern "C" void unroll_copy (const char* src
, size_t nBytes
, char* dest
)
46 unsigned long* ldest ((unsigned long*) dest
);
47 const unsigned long* lsrc ((const unsigned long*) src
);
48 nBytes
/= 4 * sizeof(unsigned long);
60 extern "C" void mmx_copy (const char* src
, size_t nBytes
, char* dest
)
64 prefetch (src
+ 512, 0, 0);
74 : "=m"(dest
[0]), "=m"(dest
[8]), "=m"(dest
[16]), "=m"(dest
[24])
75 : "m"(src
[0]), "m"(src
[8]), "m"(src
[16]), "m"(src
[24])
76 : "mm0", "mm1", "mm2", "mm3", "st", "st(1)", "st(2)", "st(3)");
85 extern "C" void sse_copy (const char* src
, size_t nBytes
, char* dest
)
87 const size_t nHeadBytes
= min (nBytes
, Align(uintptr_t(src
), 16U) - uintptr_t(src
));
88 for (uoff_t i
= 0; i
< nHeadBytes
; ++ i
)
91 if (!(uintptr_t(dest
) % 16)) {
92 const size_t nMiddleBlocks
= nBytes
/ 32;
93 for (uoff_t i
= 0; i
< nMiddleBlocks
; ++ i
) {
94 prefetch (src
+ 512, 0, 0);
96 "movaps\t%2, %%xmm0 \n\t"
97 "movaps\t%3, %%xmm1 \n\t"
98 "movntps\t%%xmm0, %0 \n\t"
100 : "=m"(dest
[0]), "=m"(dest
[16])
101 : "m"(src
[0]), "m"(src
[16])
108 for (uoff_t i
= 0; i
< nBytes
; ++ i
)
111 #endif // CPU_HAS_SSE
114 extern "C" void memcpy_copy (const char* src
, size_t nBytes
, char* dest
)
116 memcpy (dest
, src
, nBytes
);
119 template <typename CopyFunction
>
120 void TestCopyFunction (const char* name
, CopyFunction pfn
)
122 const uoff_t misalignment
= 0;
123 const uoff_t headBytes
= 0;
124 const uoff_t tailBytes
= 0;
126 const size_t nIter
= 128;
127 const size_t nBytes
= 1024 * 1024 + misalignment
;
129 string
buf1 (nBytes
), buf2 (nBytes
);
130 iota (buf1
.begin(), buf1
.end(), '\x1');
132 const clock_t first
= clock();
133 for (uoff_t i
= 0; i
< nIter
; ++ i
)
134 (*pfn
)(buf1
.cdata() + headBytes
, nBytes
- headBytes
- tailBytes
, buf2
.data() + headBytes
+ misalignment
);
135 clock_t last
= clock();
136 last
+= (last
== first
);
137 const size_t mbps
= nIter
* CLOCKS_PER_SEC
/ (last
- first
);
138 cout
<< name
<< " transfer rate is " << mbps
<< " Mbps, data is ";
140 for (uoff_t i
= headBytes
; i
< buf1
.size() - tailBytes
; ++ i
)
141 nBad
+= (buf1
[i
] != buf2
[i
+ misalignment
]);
143 cout
<< "GOOD" << endl
;
145 cout
<< "BAD" << endl
;
146 for (uoff_t i
= headBytes
; i
< buf1
.size() - tailBytes
; ++ i
)
147 if (buf1
[i
] != buf2
[i
+ misalignment
])
148 cout
<< "\t\t" << i
<< "\tbuf1: " << (int) buf1
[i
] << ", buf2: " << (int) buf2
[i
+ misalignment
] << endl
;
153 //----------------------------------------------------------------------
155 //----------------------------------------------------------------------
157 extern "C" void memset_fill (char* dest
, size_t nBytes
, char v
)
159 memset (dest
, v
, nBytes
);
162 #if __i386__ || __x86_64__
163 extern "C" void stosb_fill (char* dest
, size_t nBytes
, char v
)
166 "cld\n\trep\n\tstosb\n\t"
168 : "0"(dest
), "a"(v
), "c"(nBytes
)
172 extern "C" void stosd_fill (char* dest
, size_t nBytes
, char v
)
177 "cld\n\trep\n\tstosl\n\t"
179 : "0"(dest
), "a"(lv
), "c"(nBytes
/ sizeof(lv
))
183 extern "C" void risc_fill (char* dest
, size_t nBytes
, char v
)
187 unsigned long* ldest ((unsigned long*) dest
);
188 nBytes
/= sizeof(lv
);
194 extern "C" void unroll_fill (char* dest
, size_t nBytes
, char v
)
198 unsigned long* ldest ((unsigned long*) dest
);
199 nBytes
/= 4 * sizeof(lv
);
210 extern "C" void mmx_fill (char* dest
, size_t nBytes
, char v
)
212 prefetch (dest
+ 512, 1, 0);
214 "movd %0, %%mm0 \n\t"
215 "punpcklbw %%mm0, %%mm0 \n\t"
216 "punpcklwd %%mm0, %%mm0 \n\t"
217 "punpckldq %%mm0, %%mm0"
220 const size_t nBlocks (nBytes
/ 32);
221 for (uoff_t i
= 0; i
< nBlocks
; ++ i
) {
223 "movq %%mm0, %0 \n\t"
224 "movq %%mm0, %1 \n\t"
225 "movq %%mm0, %2 \n\t"
227 : "=m"(dest
[0]), "=m"(dest
[8]), "=m"(dest
[16]), "=m"(dest
[24]));
232 #endif // CPU_HAS_MMX
235 template <typename FillFunction
>
236 void TestFillFunction (const char* name
, FillFunction pfn
)
238 const size_t nIter
= 256;
239 const size_t nBytes
= 1024 * 1024;
240 string
buf1 (nBytes
), buf2 (nBytes
);
241 iota (buf1
.begin(), buf1
.end(), '\x1');
243 clock_t first
= clock();
244 for (uoff_t i
= 0; i
< nIter
; ++ i
)
245 (*pfn
)(buf1
.data(), nBytes
, char(42));
246 clock_t last
= clock();
247 last
+= (last
== first
);
248 const size_t mbps
= nIter
* CLOCKS_PER_SEC
/ (last
- first
);
249 cout
<< name
<< " transfer rate is " << mbps
<< " Mbps, data is ";
251 cout
<< "GOOD" << endl
;
253 cout
<< "BAD" << endl
;
257 //----------------------------------------------------------------------
261 cout
<< "Testing fill" << endl
;
262 cout
<< "---------------------------------------------------------" << endl
;
263 TestFillFunction ("fill_n\t\t", &fill_n
<char*, char>);
264 #if __i386__ || __x86_64__
265 #if CPU_HAS_MMX && HAVE_INT64_T
266 TestFillFunction ("mmx_fill\t", &mmx_fill
);
268 TestFillFunction ("stosb_fill\t", &stosb_fill
);
269 TestFillFunction ("stosd_fill\t", &stosd_fill
);
270 TestFillFunction ("unroll_fill\t", &unroll_fill
);
271 TestFillFunction ("risc_fill\t", &risc_fill
);
273 TestFillFunction ("memset_fill\t", &memset_fill
);
276 cout
<< "Testing copy" << endl
;
277 cout
<< "---------------------------------------------------------" << endl
;
278 TestCopyFunction ("copy_n\t\t", ©_n
<const char*, char*>);
279 #if __i386__ || __x86_64__
281 TestCopyFunction ("sse_copy\t", &sse_copy
);
284 TestCopyFunction ("mmx_copy\t", &mmx_copy
);
286 TestCopyFunction ("movsb_copy\t", &movsb_copy
);
287 TestCopyFunction ("movsd_copy\t", &movsd_copy
);
288 TestCopyFunction ("risc_copy\t", &risc_copy
);
289 TestCopyFunction ("unroll_copy\t", &unroll_copy
);
291 TestCopyFunction ("memcpy_copy\t", &memcpy_copy
);
293 return (EXIT_SUCCESS
);