Remove gmx custom fixed int (e.g. gmx_int64_t) types
[gromacs.git] / src / gromacs / hardware / printhardware.cpp
blob9748c8e4a0b7aba59606d52ac112046b84503d19
1 /*
2 * This file is part of the GROMACS molecular simulation package.
4 * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
5 * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
6 * and including many others, as listed in the AUTHORS file in the
7 * top-level source directory and at http://www.gromacs.org.
9 * GROMACS is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU Lesser General Public License
11 * as published by the Free Software Foundation; either version 2.1
12 * of the License, or (at your option) any later version.
14 * GROMACS is distributed in the hope that it will be useful,
15 * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17 * Lesser General Public License for more details.
19 * You should have received a copy of the GNU Lesser General Public
20 * License along with GROMACS; if not, see
21 * http://www.gnu.org/licenses, or write to the Free Software Foundation,
22 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
24 * If you want to redistribute modifications to GROMACS, please
25 * consider that scientific software is very special. Version
26 * control is crucial - bugs must be traceable. We will be happy to
27 * consider code for inclusion in the official distribution, but
28 * derived work must not be called official GROMACS. Details are found
29 * in the README & COPYING files - if they are missing, get the
30 * official version at http://www.gromacs.org.
32 * To help us fund GROMACS development, we humbly ask that you cite
33 * the research papers on the package. Check out http://www.gromacs.org.
35 #include "gmxpre.h"
37 #include "printhardware.h"
39 #include "config.h"
41 #include <cstdlib>
43 #include <string>
44 #include <vector>
46 #include "gromacs/gpu_utils/gpu_utils.h"
47 #include "gromacs/hardware/cpuinfo.h"
48 #include "gromacs/hardware/hardwaretopology.h"
49 #include "gromacs/hardware/hw_info.h"
50 #include "gromacs/hardware/identifyavx512fmaunits.h"
51 #include "gromacs/mdtypes/commrec.h"
52 #include "gromacs/simd/support.h"
53 #include "gromacs/utility/basedefinitions.h"
54 #include "gromacs/utility/basenetwork.h"
55 #include "gromacs/utility/cstringutil.h"
56 #include "gromacs/utility/fatalerror.h"
57 #include "gromacs/utility/logger.h"
58 #include "gromacs/utility/programcontext.h"
59 #include "gromacs/utility/stringutil.h"
60 #include "gromacs/utility/sysinfo.h"
62 //! Constant used to help minimize preprocessed code
63 static const bool bGPUBinary = GMX_GPU != GMX_GPU_NONE;
65 /*! \internal \brief
66 * Returns the GPU information text, one GPU per line.
68 static std::string sprint_gpus(const gmx_gpu_info_t &gpu_info)
70 char stmp[STRLEN];
71 std::vector<std::string> gpuStrings;
72 for (int i = 0; i < gpu_info.n_dev; i++)
74 get_gpu_device_info_string(stmp, gpu_info, i);
75 gpuStrings.push_back(gmx::formatString(" %s", stmp));
77 return gmx::joinStrings(gpuStrings, "\n");
80 /* Give a suitable fatal error or warning if the build configuration
81 and runtime CPU do not match. */
82 static void
83 check_use_of_rdtscp_on_this_cpu(const gmx::MDLogger &mdlog,
84 const gmx::CpuInfo &cpuInfo)
86 bool binaryUsesRdtscp = HAVE_RDTSCP;
88 const char *programName = gmx::getProgramContext().displayName();
90 if (cpuInfo.supportLevel() < gmx::CpuInfo::SupportLevel::Features)
92 if (binaryUsesRdtscp)
94 GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
95 "The %s executable was compiled to use the rdtscp CPU instruction. "
96 "We cannot detect the features of your current CPU, but will proceed anyway. "
97 "If you get a crash, rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
98 programName);
101 else
103 bool cpuHasRdtscp = cpuInfo.feature(gmx::CpuInfo::Feature::X86_Rdtscp);
105 if (!cpuHasRdtscp && binaryUsesRdtscp)
107 gmx_fatal(FARGS, "The %s executable was compiled to use the rdtscp CPU instruction. "
108 "However, this is not supported by the current hardware and continuing would lead to a crash. "
109 "Please rebuild GROMACS with the GMX_USE_RDTSCP=OFF CMake option.",
110 programName);
113 if (cpuHasRdtscp && !binaryUsesRdtscp)
115 GMX_LOG(mdlog.warning).asParagraph().appendTextFormatted(
116 "The current CPU can measure timings more accurately than the code in\n"
117 "%s was configured to use. This might affect your simulation\n"
118 "speed as accurate timings are needed for load-balancing.\n"
119 "Please consider rebuilding %s with the GMX_USE_RDTSCP=ON CMake option.",
120 programName, programName);
125 static std::string detected_hardware_string(const gmx_hw_info_t *hwinfo,
126 bool bFullCpuInfo)
128 std::string s;
130 const gmx::CpuInfo &cpuInfo = *hwinfo->cpuInfo;
131 const gmx::HardwareTopology &hwTop = *hwinfo->hardwareTopology;
133 s = gmx::formatString("\n");
134 s += gmx::formatString("Running on %d node%s with total",
135 hwinfo->nphysicalnode,
136 hwinfo->nphysicalnode == 1 ? "" : "s");
137 if (hwinfo->ncore_tot > 0)
139 s += gmx::formatString(" %d cores,", hwinfo->ncore_tot);
141 s += gmx::formatString(" %d logical cores", hwinfo->nhwthread_tot);
142 if (hwinfo->gpu_info.bDetectGPUs)
144 s += gmx::formatString(", %d compatible GPU%s",
145 hwinfo->ngpu_compatible_tot,
146 hwinfo->ngpu_compatible_tot == 1 ? "" : "s");
148 else if (bGPUBinary)
150 s += gmx::formatString(" (GPU detection deactivated)");
152 s += gmx::formatString("\n");
154 if (hwinfo->nphysicalnode > 1)
156 /* Print per node hardware feature counts */
157 if (hwinfo->ncore_max > 0)
159 s += gmx::formatString(" Cores per node: %2d", hwinfo->ncore_min);
160 if (hwinfo->ncore_max > hwinfo->ncore_min)
162 s += gmx::formatString(" - %2d", hwinfo->ncore_max);
164 s += gmx::formatString("\n");
166 s += gmx::formatString(" Logical cores per node: %2d", hwinfo->nhwthread_min);
167 if (hwinfo->nhwthread_max > hwinfo->nhwthread_min)
169 s += gmx::formatString(" - %2d", hwinfo->nhwthread_max);
171 s += gmx::formatString("\n");
172 if (bGPUBinary)
174 s += gmx::formatString(" Compatible GPUs per node: %2d",
175 hwinfo->ngpu_compatible_min);
176 if (hwinfo->ngpu_compatible_max > hwinfo->ngpu_compatible_min)
178 s += gmx::formatString(" - %2d", hwinfo->ngpu_compatible_max);
180 s += gmx::formatString("\n");
181 if (hwinfo->ngpu_compatible_tot > 0)
183 if (hwinfo->bIdenticalGPUs)
185 s += gmx::formatString(" All nodes have identical type(s) of GPUs\n");
187 else
189 /* This message will also appear with identical GPU types
190 * when at least one node has no GPU.
192 s += gmx::formatString(" Different nodes have different type(s) and/or order of GPUs\n");
198 #if GMX_LIB_MPI
199 int rank;
200 char host[STRLEN];
202 gmx_gethostname(host, STRLEN);
204 MPI_Comm_rank(MPI_COMM_WORLD, &rank);
206 // TODO Use a wrapper around MPI_Get_processor_name instead.
207 s += gmx::formatString("Hardware detected on host %s (the node of MPI rank %d):\n",
208 host, rank);
209 #else
210 s += gmx::formatString("Hardware detected:\n");
211 #endif
212 s += gmx::formatString(" CPU info:\n");
214 s += gmx::formatString(" Vendor: %s\n", cpuInfo.vendorString().c_str());
216 s += gmx::formatString(" Brand: %s\n", cpuInfo.brandString().c_str());
218 if (bFullCpuInfo)
220 s += gmx::formatString(" Family: %d Model: %d Stepping: %d\n",
221 cpuInfo.family(), cpuInfo.model(), cpuInfo.stepping());
223 s += gmx::formatString(" Features:");
224 for (auto &f : cpuInfo.featureSet())
226 s += gmx::formatString(" %s", gmx::CpuInfo::featureString(f).c_str());
228 s += gmx::formatString("\n");
231 if (cpuInfo.feature(gmx::CpuInfo::Feature::X86_Avx512F))
233 int avx512fmaunits = gmx::identifyAvx512FmaUnits();
234 s += gmx::formatString(" Number of AVX-512 FMA units:");
235 if (avx512fmaunits > 0)
237 s += gmx::formatString(" %d", avx512fmaunits);
238 if (avx512fmaunits == 1)
240 s += gmx::formatString(" (AVX2 is faster w/o 2 AVX-512 FMA units)");
243 else
245 s += gmx::formatString(" Cannot run AVX-512 detection - assuming 2");
247 s += gmx::formatString("\n");
250 s += gmx::formatString(" Hardware topology: ");
251 switch (hwTop.supportLevel())
253 case gmx::HardwareTopology::SupportLevel::None:
254 s += gmx::formatString("None\n");
255 break;
256 case gmx::HardwareTopology::SupportLevel::LogicalProcessorCount:
257 s += gmx::formatString("Only logical processor count\n");
258 break;
259 case gmx::HardwareTopology::SupportLevel::Basic:
260 s += gmx::formatString("Basic\n");
261 break;
262 case gmx::HardwareTopology::SupportLevel::Full:
263 s += gmx::formatString("Full\n");
264 break;
265 case gmx::HardwareTopology::SupportLevel::FullWithDevices:
266 s += gmx::formatString("Full, with devices\n");
267 break;
270 if (!hwTop.isThisSystem())
272 s += gmx::formatString(" NOTE: Hardware topology cached or synthetic, not detected.\n");
273 if (char *p = std::getenv("HWLOC_XMLFILE"))
275 s += gmx::formatString(" HWLOC_XMLFILE=%s\n", p);
279 if (bFullCpuInfo)
281 if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Basic)
283 s += gmx::formatString(" Sockets, cores, and logical processors:\n");
285 for (auto &socket : hwTop.machine().sockets)
287 s += gmx::formatString(" Socket %2d:", socket.id);
288 for (auto &c : socket.cores)
290 s += gmx::formatString(" [");
291 for (auto &t : c.hwThreads)
293 s += gmx::formatString(" %3d", t.logicalProcessorId);
295 s += gmx::formatString("]");
297 s += gmx::formatString("\n");
300 if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::Full)
302 s += gmx::formatString(" Numa nodes:\n");
303 for (auto &n : hwTop.machine().numa.nodes)
305 s += gmx::formatString(" Node %2d (%" PRIu64 " bytes mem):", n.id, n.memory);
306 for (auto &l : n.logicalProcessorId)
308 s += gmx::formatString(" %3d", l);
310 s += gmx::formatString("\n");
312 s += gmx::formatString(" Latency:\n ");
313 for (std::size_t j = 0; j < hwTop.machine().numa.nodes.size(); j++)
315 s += gmx::formatString(" %5lu", j);
317 s += gmx::formatString("\n");
318 for (std::size_t i = 0; i < hwTop.machine().numa.nodes.size(); i++)
320 s += gmx::formatString(" %5lu", i);
321 for (std::size_t j = 0; j < hwTop.machine().numa.nodes.size(); j++)
323 s += gmx::formatString(" %5.2f", hwTop.machine().numa.relativeLatency[i][j]);
325 s += gmx::formatString("\n");
329 s += gmx::formatString(" Caches:\n");
330 for (auto &c : hwTop.machine().caches)
332 s += gmx::formatString(" L%d: %" PRIu64 " bytes, linesize %d bytes, assoc. %d, shared %d ways\n",
333 c.level, c.size, c.linesize, c.associativity, c.shared);
336 if (hwTop.supportLevel() >= gmx::HardwareTopology::SupportLevel::FullWithDevices)
338 s += gmx::formatString(" PCI devices:\n");
339 for (auto &d : hwTop.machine().devices)
341 s += gmx::formatString(" %04x:%02x:%02x.%1x Id: %04x:%04x Class: 0x%04x Numa: %d\n",
342 d.domain, d.bus, d.dev, d.func, d.vendorId, d.deviceId, d.classId, d.numaNodeId);
347 if (bGPUBinary && hwinfo->gpu_info.n_dev > 0)
349 s += gmx::formatString(" GPU info:\n");
350 s += gmx::formatString(" Number of GPUs detected: %d\n",
351 hwinfo->gpu_info.n_dev);
352 s += sprint_gpus(hwinfo->gpu_info) + "\n";
354 return s;
357 void gmx_print_detected_hardware(FILE *fplog, const t_commrec *cr,
358 const gmx_multisim_t *ms,
359 const gmx::MDLogger &mdlog,
360 const gmx_hw_info_t *hwinfo)
362 const gmx::CpuInfo &cpuInfo = *hwinfo->cpuInfo;
364 if (fplog != nullptr)
366 std::string detected;
368 detected = detected_hardware_string(hwinfo, TRUE);
370 fprintf(fplog, "%s\n", detected.c_str());
373 // Do not spam stderr with all our internal information unless
374 // there was something that actually went wrong; general information
375 // belongs in the logfile.
377 /* Check the compiled SIMD instruction set against that of the node
378 * with the lowest SIMD level support (skip if SIMD detection did not work)
380 if (cpuInfo.supportLevel() >= gmx::CpuInfo::SupportLevel::Features)
382 gmx::simdCheck(static_cast<gmx::SimdType>(hwinfo->simd_suggest_min), fplog, isMasterSimMasterRank(ms, cr));
385 /* For RDTSCP we only check on our local node and skip the MPI reduction */
386 check_use_of_rdtscp_on_this_cpu(mdlog, cpuInfo);