1 // Copyright 2015 Google Inc. All rights reserved.
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
7 // http://www.apache.org/licenses/LICENSE-2.0
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
15 #include "internal_macros.h"
17 #ifdef BENCHMARK_OS_WINDOWS
19 #undef StrCat // Don't let StrCat in string_util.h be renamed to lstrcatA
20 #include <versionhelpers.h>
24 #ifndef BENCHMARK_OS_FUCHSIA
25 #include <sys/resource.h>
28 #include <sys/types.h> // this header must be included before 'sys/sysctl.h' to avoid compilation error on FreeBSD
30 #if defined BENCHMARK_OS_FREEBSD || defined BENCHMARK_OS_MACOSX || \
31 defined BENCHMARK_OS_NETBSD || defined BENCHMARK_OS_OPENBSD
32 #define BENCHMARK_HAS_SYSCTL
33 #include <sys/sysctl.h>
36 #if defined(BENCHMARK_OS_SOLARIS)
57 #include "cycleclock.h"
58 #include "internal_macros.h"
61 #include "string_util.h"
66 void PrintImp(std::ostream
& out
) { out
<< std::endl
; }
68 template <class First
, class... Rest
>
69 void PrintImp(std::ostream
& out
, First
&& f
, Rest
&&... rest
) {
70 out
<< std::forward
<First
>(f
);
71 PrintImp(out
, std::forward
<Rest
>(rest
)...);
74 template <class... Args
>
75 BENCHMARK_NORETURN
void PrintErrorAndDie(Args
&&... args
) {
76 PrintImp(std::cerr
, std::forward
<Args
>(args
)...);
77 std::exit(EXIT_FAILURE
);
80 #ifdef BENCHMARK_HAS_SYSCTL
82 /// ValueUnion - A type used to correctly alias the byte-for-byte output of
83 /// `sysctl` with the result type it's to be interpreted as.
86 uint32_t uint32_value
;
87 uint64_t uint64_value
;
88 // For correct aliasing of union members from bytes.
91 using DataPtr
= std::unique_ptr
<DataT
, decltype(&std::free
)>;
93 // The size of the data union member + its trailing array size.
98 ValueUnion() : Size(0), Buff(nullptr, &std::free
) {}
100 explicit ValueUnion(size_t BuffSize
)
101 : Size(sizeof(DataT
) + BuffSize
),
102 Buff(::new (std::malloc(Size
)) DataT(), &std::free
) {}
104 ValueUnion(ValueUnion
&& other
) = default;
106 explicit operator bool() const { return bool(Buff
); }
108 char* data() const { return Buff
->bytes
; }
110 std::string
GetAsString() const { return std::string(data()); }
112 int64_t GetAsInteger() const {
113 if (Size
== sizeof(Buff
->uint32_value
))
114 return static_cast<int32_t>(Buff
->uint32_value
);
115 else if (Size
== sizeof(Buff
->uint64_value
))
116 return static_cast<int64_t>(Buff
->uint64_value
);
117 BENCHMARK_UNREACHABLE();
120 uint64_t GetAsUnsigned() const {
121 if (Size
== sizeof(Buff
->uint32_value
))
122 return Buff
->uint32_value
;
123 else if (Size
== sizeof(Buff
->uint64_value
))
124 return Buff
->uint64_value
;
125 BENCHMARK_UNREACHABLE();
128 template <class T
, int N
>
129 std::array
<T
, N
> GetAsArray() {
130 const int ArrSize
= sizeof(T
) * N
;
131 CHECK_LE(ArrSize
, Size
);
132 std::array
<T
, N
> Arr
;
133 std::memcpy(Arr
.data(), data(), ArrSize
);
138 ValueUnion
GetSysctlImp(std::string
const& Name
) {
139 #if defined BENCHMARK_OS_OPENBSD
143 if ((Name
== "hw.ncpu") || (Name
== "hw.cpuspeed")){
144 ValueUnion
buff(sizeof(int));
146 if (Name
== "hw.ncpu") {
149 mib
[1] = HW_CPUSPEED
;
152 if (sysctl(mib
, 2, buff
.data(), &buff
.Size
, nullptr, 0) == -1) {
159 size_t CurBuffSize
= 0;
160 if (sysctlbyname(Name
.c_str(), nullptr, &CurBuffSize
, nullptr, 0) == -1)
163 ValueUnion
buff(CurBuffSize
);
164 if (sysctlbyname(Name
.c_str(), buff
.data(), &buff
.Size
, nullptr, 0) == 0)
170 BENCHMARK_MAYBE_UNUSED
171 bool GetSysctl(std::string
const& Name
, std::string
* Out
) {
173 auto Buff
= GetSysctlImp(Name
);
174 if (!Buff
) return false;
175 Out
->assign(Buff
.data());
180 class = typename
std::enable_if
<std::is_integral
<Tp
>::value
>::type
>
181 bool GetSysctl(std::string
const& Name
, Tp
* Out
) {
183 auto Buff
= GetSysctlImp(Name
);
184 if (!Buff
) return false;
185 *Out
= static_cast<Tp
>(Buff
.GetAsUnsigned());
189 template <class Tp
, size_t N
>
190 bool GetSysctl(std::string
const& Name
, std::array
<Tp
, N
>* Out
) {
191 auto Buff
= GetSysctlImp(Name
);
192 if (!Buff
) return false;
193 *Out
= Buff
.GetAsArray
<Tp
, N
>();
198 template <class ArgT
>
199 bool ReadFromFile(std::string
const& fname
, ArgT
* arg
) {
201 std::ifstream
f(fname
.c_str());
202 if (!f
.is_open()) return false;
207 bool CpuScalingEnabled(int num_cpus
) {
208 // We don't have a valid CPU count, so don't even bother.
209 if (num_cpus
<= 0) return false;
210 #ifndef BENCHMARK_OS_WINDOWS
211 // On Linux, the CPUfreq subsystem exposes CPU information as files on the
212 // local file system. If reading the exported files fails, then we may not be
213 // running on Linux, so we silently ignore all the read errors.
215 for (int cpu
= 0; cpu
< num_cpus
; ++cpu
) {
216 std::string governor_file
=
217 StrCat("/sys/devices/system/cpu/cpu", cpu
, "/cpufreq/scaling_governor");
218 if (ReadFromFile(governor_file
, &res
) && res
!= "performance") return true;
224 int CountSetBitsInCPUMap(std::string Val
) {
225 auto CountBits
= [](std::string Part
) {
226 using CPUMask
= std::bitset
<sizeof(std::uintptr_t) * CHAR_BIT
>;
228 CPUMask
Mask(std::stoul(Part
, nullptr, 16));
229 return static_cast<int>(Mask
.count());
233 while ((Pos
= Val
.find(',')) != std::string::npos
) {
234 total
+= CountBits(Val
.substr(0, Pos
));
235 Val
= Val
.substr(Pos
+ 1);
238 total
+= CountBits(Val
);
243 BENCHMARK_MAYBE_UNUSED
244 std::vector
<CPUInfo::CacheInfo
> GetCacheSizesFromKVFS() {
245 std::vector
<CPUInfo::CacheInfo
> res
;
246 std::string dir
= "/sys/devices/system/cpu/cpu0/cache/";
249 CPUInfo::CacheInfo info
;
250 std::string FPath
= StrCat(dir
, "index", Idx
++, "/");
251 std::ifstream
f(StrCat(FPath
, "size").c_str());
252 if (!f
.is_open()) break;
256 PrintErrorAndDie("Failed while reading file '", FPath
, "size'");
261 "Invalid cache size format: failed to read size suffix");
262 else if (f
&& suffix
!= "K")
263 PrintErrorAndDie("Invalid cache size format: Expected bytes ", suffix
);
264 else if (suffix
== "K")
267 if (!ReadFromFile(StrCat(FPath
, "type"), &info
.type
))
268 PrintErrorAndDie("Failed to read from file ", FPath
, "type");
269 if (!ReadFromFile(StrCat(FPath
, "level"), &info
.level
))
270 PrintErrorAndDie("Failed to read from file ", FPath
, "level");
272 if (!ReadFromFile(StrCat(FPath
, "shared_cpu_map"), &map_str
))
273 PrintErrorAndDie("Failed to read from file ", FPath
, "shared_cpu_map");
274 info
.num_sharing
= CountSetBitsInCPUMap(map_str
);
281 #ifdef BENCHMARK_OS_MACOSX
282 std::vector
<CPUInfo::CacheInfo
> GetCacheSizesMacOSX() {
283 std::vector
<CPUInfo::CacheInfo
> res
;
284 std::array
<uint64_t, 4> CacheCounts
{{0, 0, 0, 0}};
285 GetSysctl("hw.cacheconfig", &CacheCounts
);
291 uint64_t num_sharing
;
292 } Cases
[] = {{"hw.l1dcachesize", "Data", 1, CacheCounts
[1]},
293 {"hw.l1icachesize", "Instruction", 1, CacheCounts
[1]},
294 {"hw.l2cachesize", "Unified", 2, CacheCounts
[2]},
295 {"hw.l3cachesize", "Unified", 3, CacheCounts
[3]}};
296 for (auto& C
: Cases
) {
298 if (!GetSysctl(C
.name
, &val
)) continue;
299 CPUInfo::CacheInfo info
;
301 info
.level
= C
.level
;
303 info
.num_sharing
= static_cast<int>(C
.num_sharing
);
304 res
.push_back(std::move(info
));
308 #elif defined(BENCHMARK_OS_WINDOWS)
309 std::vector
<CPUInfo::CacheInfo
> GetCacheSizesWindows() {
310 std::vector
<CPUInfo::CacheInfo
> res
;
311 DWORD buffer_size
= 0;
312 using PInfo
= SYSTEM_LOGICAL_PROCESSOR_INFORMATION
;
313 using CInfo
= CACHE_DESCRIPTOR
;
315 using UPtr
= std::unique_ptr
<PInfo
, decltype(&std::free
)>;
316 GetLogicalProcessorInformation(nullptr, &buffer_size
);
317 UPtr
buff((PInfo
*)malloc(buffer_size
), &std::free
);
318 if (!GetLogicalProcessorInformation(buff
.get(), &buffer_size
))
319 PrintErrorAndDie("Failed during call to GetLogicalProcessorInformation: ",
322 PInfo
* it
= buff
.get();
323 PInfo
* end
= buff
.get() + (buffer_size
/ sizeof(PInfo
));
325 for (; it
!= end
; ++it
) {
326 if (it
->Relationship
!= RelationCache
) continue;
327 using BitSet
= std::bitset
<sizeof(ULONG_PTR
) * CHAR_BIT
>;
328 BitSet
B(it
->ProcessorMask
);
329 // To prevent duplicates, only consider caches where CPU 0 is specified
330 if (!B
.test(0)) continue;
331 CInfo
* Cache
= &it
->Cache
;
332 CPUInfo::CacheInfo C
;
333 C
.num_sharing
= static_cast<int>(B
.count());
334 C
.level
= Cache
->Level
;
335 C
.size
= Cache
->Size
;
337 switch (Cache
->Type
) {
341 case CacheInstruction
:
342 C
.type
= "Instruction";
357 std::vector
<CPUInfo::CacheInfo
> GetCacheSizes() {
358 #ifdef BENCHMARK_OS_MACOSX
359 return GetCacheSizesMacOSX();
360 #elif defined(BENCHMARK_OS_WINDOWS)
361 return GetCacheSizesWindows();
363 return GetCacheSizesFromKVFS();
368 #ifdef BENCHMARK_HAS_SYSCTL
370 if (GetSysctl("hw.ncpu", &NumCPU
)) return NumCPU
;
371 fprintf(stderr
, "Err: %s\n", strerror(errno
));
372 std::exit(EXIT_FAILURE
);
373 #elif defined(BENCHMARK_OS_WINDOWS)
375 // Use memset as opposed to = {} to avoid GCC missing initializer false
377 std::memset(&sysinfo
, 0, sizeof(SYSTEM_INFO
));
378 GetSystemInfo(&sysinfo
);
379 return sysinfo
.dwNumberOfProcessors
; // number of logical
380 // processors in the current
382 #elif defined(BENCHMARK_OS_SOLARIS)
383 // Returns -1 in case of a failure.
384 int NumCPU
= sysconf(_SC_NPROCESSORS_ONLN
);
387 "sysconf(_SC_NPROCESSORS_ONLN) failed with error: %s\n",
394 std::ifstream
f("/proc/cpuinfo");
396 std::cerr
<< "failed to open /proc/cpuinfo\n";
399 const std::string Key
= "processor";
401 while (std::getline(f
, ln
)) {
402 if (ln
.empty()) continue;
403 size_t SplitIdx
= ln
.find(':');
405 if (SplitIdx
!= std::string::npos
) value
= ln
.substr(SplitIdx
+ 1);
406 if (ln
.size() >= Key
.size() && ln
.compare(0, Key
.size(), Key
) == 0) {
408 if (!value
.empty()) {
409 int CurID
= std::stoi(value
);
410 MaxID
= std::max(CurID
, MaxID
);
415 std::cerr
<< "Failure reading /proc/cpuinfo\n";
419 std::cerr
<< "Failed to read to end of /proc/cpuinfo\n";
424 if ((MaxID
+ 1) != NumCPUs
) {
426 "CPU ID assignments in /proc/cpuinfo seem messed up."
427 " This is usually caused by a bad BIOS.\n");
431 BENCHMARK_UNREACHABLE();
434 double GetCPUCyclesPerSecond() {
435 #if defined BENCHMARK_OS_LINUX || defined BENCHMARK_OS_CYGWIN
438 // If the kernel is exporting the tsc frequency use that. There are issues
439 // where cpuinfo_max_freq cannot be relied on because the BIOS may be
440 // exporintg an invalid p-state (on x86) or p-states may be used to put the
441 // processor in a new mode (turbo mode). Essentially, those frequencies
442 // cannot always be relied upon. The same reasons apply to /proc/cpuinfo as
444 if (ReadFromFile("/sys/devices/system/cpu/cpu0/tsc_freq_khz", &freq
)
445 // If CPU scaling is in effect, we want to use the *maximum* frequency,
446 // not whatever CPU speed some random processor happens to be using now.
447 || ReadFromFile("/sys/devices/system/cpu/cpu0/cpufreq/cpuinfo_max_freq",
449 // The value is in kHz (as the file name suggests). For example, on a
450 // 2GHz warpstation, the file contains the value "2000000".
451 return freq
* 1000.0;
454 const double error_value
= -1;
455 double bogo_clock
= error_value
;
457 std::ifstream
f("/proc/cpuinfo");
459 std::cerr
<< "failed to open /proc/cpuinfo\n";
463 auto startsWithKey
= [](std::string
const& Value
, std::string
const& Key
) {
464 if (Key
.size() > Value
.size()) return false;
465 auto Cmp
= [&](char X
, char Y
) {
466 return std::tolower(X
) == std::tolower(Y
);
468 return std::equal(Key
.begin(), Key
.end(), Value
.begin(), Cmp
);
472 while (std::getline(f
, ln
)) {
473 if (ln
.empty()) continue;
474 size_t SplitIdx
= ln
.find(':');
476 if (SplitIdx
!= std::string::npos
) value
= ln
.substr(SplitIdx
+ 1);
477 // When parsing the "cpu MHz" and "bogomips" (fallback) entries, we only
478 // accept positive values. Some environments (virtual machines) report zero,
479 // which would cause infinite looping in WallTime_Init.
480 if (startsWithKey(ln
, "cpu MHz")) {
481 if (!value
.empty()) {
482 double cycles_per_second
= std::stod(value
) * 1000000.0;
483 if (cycles_per_second
> 0) return cycles_per_second
;
485 } else if (startsWithKey(ln
, "bogomips")) {
486 if (!value
.empty()) {
487 bogo_clock
= std::stod(value
) * 1000000.0;
488 if (bogo_clock
< 0.0) bogo_clock
= error_value
;
493 std::cerr
<< "Failure reading /proc/cpuinfo\n";
497 std::cerr
<< "Failed to read to end of /proc/cpuinfo\n";
501 // If we found the bogomips clock, but nothing better, we'll use it (but
502 // we're not happy about it); otherwise, fallback to the rough estimation
504 if (bogo_clock
>= 0.0) return bogo_clock
;
506 #elif defined BENCHMARK_HAS_SYSCTL
507 constexpr auto* FreqStr
=
508 #if defined(BENCHMARK_OS_FREEBSD) || defined(BENCHMARK_OS_NETBSD)
510 #elif defined BENCHMARK_OS_OPENBSD
515 unsigned long long hz
= 0;
516 #if defined BENCHMARK_OS_OPENBSD
517 if (GetSysctl(FreqStr
, &hz
)) return hz
* 1000000;
519 if (GetSysctl(FreqStr
, &hz
)) return hz
;
521 fprintf(stderr
, "Unable to determine clock rate from sysctl: %s: %s\n",
522 FreqStr
, strerror(errno
));
524 #elif defined BENCHMARK_OS_WINDOWS
525 // In NT, read MHz from the registry. If we fail to do so or we're in win9x
526 // then make a crude estimate.
527 DWORD data
, data_size
= sizeof(data
);
528 if (IsWindowsXPOrGreater() &&
530 SHGetValueA(HKEY_LOCAL_MACHINE
,
531 "HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0",
532 "~MHz", nullptr, &data
, &data_size
)))
533 return static_cast<double>((int64_t)data
*
534 (int64_t)(1000 * 1000)); // was mhz
535 #elif defined (BENCHMARK_OS_SOLARIS)
536 kstat_ctl_t
*kc
= kstat_open();
538 std::cerr
<< "failed to open /dev/kstat\n";
541 kstat_t
*ksp
= kstat_lookup(kc
, (char*)"cpu_info", -1, (char*)"cpu_info0");
543 std::cerr
<< "failed to lookup in /dev/kstat\n";
546 if (kstat_read(kc
, ksp
, NULL
) < 0) {
547 std::cerr
<< "failed to read from /dev/kstat\n";
551 (kstat_named_t
*)kstat_data_lookup(ksp
, (char*)"current_clock_Hz");
553 std::cerr
<< "failed to lookup data in /dev/kstat\n";
556 if (knp
->data_type
!= KSTAT_DATA_UINT64
) {
557 std::cerr
<< "current_clock_Hz is of unexpected data type: "
558 << knp
->data_type
<< "\n";
561 double clock_hz
= knp
->value
.ui64
;
565 // If we've fallen through, attempt to roughly estimate the CPU clock rate.
566 const int estimate_time_ms
= 1000;
567 const auto start_ticks
= cycleclock::Now();
568 SleepForMilliseconds(estimate_time_ms
);
569 return static_cast<double>(cycleclock::Now() - start_ticks
);
574 const CPUInfo
& CPUInfo::Get() {
575 static const CPUInfo
* info
= new CPUInfo();
580 : num_cpus(GetNumCPUs()),
581 cycles_per_second(GetCPUCyclesPerSecond()),
582 caches(GetCacheSizes()),
583 scaling_enabled(CpuScalingEnabled(num_cpus
)) {}
585 } // end namespace benchmark