src/gromacs/hardware/hardwaretopology.cpp

   1 /*
   2  * This file is part of the GROMACS molecular simulation package.
   3  *
   4  * Copyright (c) 2012,2013,2014,2015,2016,2017,2018, by the GROMACS development team, led by
   5  * Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   6  * and including many others, as listed in the AUTHORS file in the
   7  * top-level source directory and at http://www.gromacs.org.
   8  *
   9  * GROMACS is free software; you can redistribute it and/or
  10  * modify it under the terms of the GNU Lesser General Public License
  11  * as published by the Free Software Foundation; either version 2.1
  12  * of the License, or (at your option) any later version.
  13  *
  14  * GROMACS is distributed in the hope that it will be useful,
  15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  17  * Lesser General Public License for more details.
  18  *
  19  * You should have received a copy of the GNU Lesser General Public
  20  * License along with GROMACS; if not, see
  21  * http://www.gnu.org/licenses, or write to the Free Software Foundation,
  22  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  23  *
  24  * If you want to redistribute modifications to GROMACS, please
  25  * consider that scientific software is very special. Version
  26  * control is crucial - bugs must be traceable. We will be happy to
  27  * consider code for inclusion in the official distribution, but
  28  * derived work must not be called official GROMACS. Details are found
  29  * in the README & COPYING files - if they are missing, get the
  30  * official version at http://www.gromacs.org.
  31  *
  32  * To help us fund GROMACS development, we humbly ask that you cite
  33  * the research papers on the package. Check out http://www.gromacs.org.
  34  */
  35
  36 /*! \internal \file
  37  * \brief
  38  * Implements gmx::HardwareTopology.
  39  *
  40  * \author Erik Lindahl <erik.lindahl@gmail.com>
  41  * \ingroup module_hardware
  42  */
  43
  44 #include "gmxpre.h"
  45
  46 #include "hardwaretopology.h"
  47
  48 #include "config.h"
  49
  50 #include <cstdio>
  51
  52 #include <algorithm>
  53 #include <functional>
  54 #include <limits>
  55 #include <utility>
  56 #include <vector>
  57
  58 #if GMX_USE_HWLOC
  59 #    include <hwloc.h>
  60 #endif
  61
  62 #include "gromacs/hardware/cpuinfo.h"
  63 #include "gromacs/utility/gmxassert.h"
  64
  65 #ifdef HAVE_UNISTD_H
  66 #    include <unistd.h>       // sysconf()
  67 #endif
  68 #if GMX_NATIVE_WINDOWS
  69 #    include <windows.h>      // GetSystemInfo()
  70 #endif
  71
  72 //! Convenience macro to help us avoid ifdefs each time we use sysconf
  73 #if !defined(_SC_NPROCESSORS_ONLN) && defined(_SC_NPROC_ONLN)
  74 #    define _SC_NPROCESSORS_ONLN _SC_NPROC_ONLN
  75 #endif
  76
  77 namespace gmx
  78 {
  79
  80 namespace
  81 {
  82
  83 /*****************************************************************************
  84  *                                                                           *
  85  *   Utility functions for extracting hardware topology from CpuInfo object  *
  86  *                                                                           *
  87  *****************************************************************************/
  88
  89 /*! \brief Initialize machine data from basic information in cpuinfo
  90  *
  91  *  \param  machine      Machine tree structure where information will be assigned
  92  *                       if the cpuinfo object contains topology information.
  93  *  \param  supportLevel If topology information is available in CpuInfo,
  94  *                       this will be updated to reflect the amount of
  95  *                       information written to the machine structure.
  96  */
  97 void
  98 parseCpuInfo(HardwareTopology::Machine *        machine,
  99              HardwareTopology::SupportLevel *   supportLevel)
 100 {
 101     CpuInfo cpuInfo(CpuInfo::detect());
 102
 103     if (!cpuInfo.logicalProcessors().empty())
 104     {
 105         int nSockets   = 0;
 106         int nCores     = 0;
 107         int nHwThreads = 0;
 108
 109         // Copy the logical processor information from cpuinfo
 110         for (auto &l : cpuInfo.logicalProcessors())
 111         {
 112             machine->logicalProcessors.push_back( { l.socketRankInMachine, l.coreRankInSocket, l.hwThreadRankInCore, -1 } );
 113             nSockets   = std::max(nSockets, l.socketRankInMachine);
 114             nCores     = std::max(nCores, l.coreRankInSocket);
 115             nHwThreads = std::max(nHwThreads, l.hwThreadRankInCore);
 116         }
 117
 118         // Fill info form sockets/cores/hwthreads
 119         int socketId   = 0;
 120         int coreId     = 0;
 121         int hwThreadId = 0;
 122
 123         machine->sockets.resize(nSockets + 1);
 124         for (auto &s : machine->sockets)
 125         {
 126             s.id = socketId++;
 127             s.cores.resize(nCores + 1);
 128             for (auto &c : s.cores)
 129             {
 130                 c.id         = coreId++;
 131                 c.numaNodeId = -1; // No numa information
 132                 c.hwThreads.resize(nHwThreads + 1);
 133                 for (auto &t : c.hwThreads)
 134                 {
 135                     t.id                 = hwThreadId++;
 136                     t.logicalProcessorId = -1; // set as unassigned for now
 137                 }
 138             }
 139         }
 140
 141         // Fill the logical processor id in the right place
 142         for (std::size_t i = 0; i < machine->logicalProcessors.size(); i++)
 143         {
 144             const HardwareTopology::LogicalProcessor &l = machine->logicalProcessors[i];
 145             machine->sockets[l.socketRankInMachine].cores[l.coreRankInSocket].hwThreads[l.hwThreadRankInCore].logicalProcessorId = static_cast<int>(i);
 146         }
 147         machine->logicalProcessorCount = machine->logicalProcessors.size();
 148         *supportLevel                  = HardwareTopology::SupportLevel::Basic;
 149     }
 150     else
 151     {
 152         *supportLevel = HardwareTopology::SupportLevel::None;
 153     }
 154 }
 155
 156 #if GMX_USE_HWLOC
 157
 158 #if HWLOC_API_VERSION < 0x00010b00
 159 #    define HWLOC_OBJ_PACKAGE  HWLOC_OBJ_SOCKET
 160 #    define HWLOC_OBJ_NUMANODE HWLOC_OBJ_NODE
 161 #endif
 162
 163 // Preprocessor variable for if hwloc api is version 1.x.x or 2.x.x
 164 #if HWLOC_API_VERSION >= 0x00020000
 165 #    define GMX_HWLOC_API_VERSION_IS_2XX 1
 166 #else
 167 #    define GMX_HWLOC_API_VERSION_IS_2XX 0
 168 #endif
 169
 170 /*****************************************************************************
 171  *                                                                           *
 172  *   Utility functions for extracting hardware topology from hwloc library   *
 173  *                                                                           *
 174  *****************************************************************************/
 175
 176 // Compatibility function for accessing hwloc_obj_t object memory with different API versions of hwloc
 177 std::size_t
 178 getHwLocObjectMemory(const hwloc_obj_t obj)
 179 {
 180 #if GMX_HWLOC_API_VERSION_IS_2XX
 181     return obj->total_memory;
 182 #else
 183     return obj->memory.total_memory;
 184 #endif
 185 }
 186
 187 /*! \brief Return vector of all descendants of a given type in hwloc topology
 188  *
 189  *  \param topo  hwloc topology handle that has been initialized and loaded
 190  *  \param obj   Non-null hwloc object.
 191  *  \param type  hwloc object type to find. The routine will only search
 192  *               on levels below obj.
 193  *
 194  *  \return vector containing all the objects of given type that are
 195  *          descendants of the provided object. If no objects of this type
 196  *          were found, the vector will be empty.
 197  */
 198 const std::vector<hwloc_obj_t>
 199 getHwLocDescendantsByType(const hwloc_topology_t topo, const hwloc_obj_t obj, const hwloc_obj_type_t type)
 200 {
 201     GMX_RELEASE_ASSERT(obj, "NULL hwloc object provided to getHwLocDescendantsByType()");
 202
 203     std::vector<hwloc_obj_t> v;
 204
 205     if (obj->type == type)
 206     {
 207         v.push_back(obj);
 208     }
 209     // Go through children; if this object has no children obj->arity is 0,
 210     // and we'll return an empty vector.
 211     hwloc_obj_t tempNode = NULL;
 212     while ((tempNode = hwloc_get_next_child(topo, obj, tempNode)) != NULL)
 213     {
 214         std::vector<hwloc_obj_t> v2 = getHwLocDescendantsByType(topo, tempNode, type);
 215         v.insert(v.end(), v2.begin(), v2.end());
 216     }
 217     return v;
 218 }
 219
 220 /*! \brief Read information about sockets, cores and threads from hwloc topology
 221  *
 222  *  \param topo    hwloc topology handle that has been initialized and loaded
 223  *  \param machine Pointer to the machine structure in the HardwareTopology
 224  *                 class, where the tree of sockets/cores/threads will be written.
 225  *
 226  *  \return If all the data is found the return value is 0, otherwise non-zero.
 227  */
 228 int
 229 parseHwLocSocketsCoresThreads(hwloc_topology_t                   topo,
 230                               HardwareTopology::Machine *        machine)
 231 {
 232     const hwloc_obj_t                      root         = hwloc_get_root_obj(topo);
 233     std::vector<hwloc_obj_t>               hwlocSockets = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PACKAGE);
 234
 235     machine->logicalProcessorCount = hwloc_get_nbobjs_by_type(topo, HWLOC_OBJ_PU);
 236     machine->logicalProcessors.resize(machine->logicalProcessorCount);
 237     machine->sockets.resize(hwlocSockets.size());
 238
 239     bool topologyOk = !hwlocSockets.empty(); // Fail if we have no sockets in machine
 240
 241     for (std::size_t i = 0; i < hwlocSockets.size() && topologyOk; i++)
 242     {
 243         // Assign information about this socket
 244         machine->sockets[i].id = hwlocSockets[i]->logical_index;
 245
 246         // Get children (cores)
 247         std::vector<hwloc_obj_t> hwlocCores = getHwLocDescendantsByType(topo, hwlocSockets[i], HWLOC_OBJ_CORE);
 248         machine->sockets[i].cores.resize(hwlocCores.size());
 249
 250         topologyOk = topologyOk && !hwlocCores.empty(); // Fail if we have no cores in socket
 251
 252         // Loop over child cores
 253         for (std::size_t j = 0; j < hwlocCores.size() && topologyOk; j++)
 254         {
 255             // Assign information about this core
 256             machine->sockets[i].cores[j].id         = hwlocCores[j]->logical_index;
 257             machine->sockets[i].cores[j].numaNodeId = -1;
 258
 259             // Get children (hwthreads)
 260             std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(topo, hwlocCores[j], HWLOC_OBJ_PU);
 261             machine->sockets[i].cores[j].hwThreads.resize(hwlocPUs.size());
 262
 263             topologyOk = topologyOk && !hwlocPUs.empty(); // Fail if we have no hwthreads in core
 264
 265             // Loop over child hwthreads
 266             for (std::size_t k = 0; k < hwlocPUs.size() && topologyOk; k++)
 267             {
 268                 // Assign information about this hwthread
 269                 std::size_t logicalProcessorId                               = hwlocPUs[k]->os_index;
 270                 machine->sockets[i].cores[j].hwThreads[k].id                 = hwlocPUs[k]->logical_index;
 271                 machine->sockets[i].cores[j].hwThreads[k].logicalProcessorId = logicalProcessorId;
 272
 273                 if (logicalProcessorId < machine->logicalProcessors.size())
 274                 {
 275                     // Cross-assign data for this hwthread to the logicalprocess vector
 276                     machine->logicalProcessors[logicalProcessorId].socketRankInMachine = static_cast<int>(i);
 277                     machine->logicalProcessors[logicalProcessorId].coreRankInSocket    = static_cast<int>(j);
 278                     machine->logicalProcessors[logicalProcessorId].hwThreadRankInCore  = static_cast<int>(k);
 279                     machine->logicalProcessors[logicalProcessorId].numaNodeId          = -1;
 280                 }
 281                 else
 282                 {
 283                     topologyOk = false;
 284                 }
 285             }
 286         }
 287     }
 288
 289     if (topologyOk)
 290     {
 291         return 0;
 292     }
 293     else
 294     {
 295         machine->logicalProcessors.clear();
 296         machine->sockets.clear();
 297         return -1;
 298     }
 299 }
 300
 301 /*! \brief Read cache information from hwloc topology
 302  *
 303  *  \param topo    hwloc topology handle that has been initialized and loaded
 304  *  \param machine Pointer to the machine structure in the HardwareTopology
 305  *                 class, where cache data will be filled.
 306  *
 307  *  \return If any cache data is found the return value is 0, otherwise non-zero.
 308  */
 309 int
 310 parseHwLocCache(hwloc_topology_t                   topo,
 311                 HardwareTopology::Machine *        machine)
 312 {
 313     // Parse caches up to L5
 314     for (int cachelevel : { 1, 2, 3, 4, 5})
 315     {
 316         int depth = hwloc_get_cache_type_depth(topo, cachelevel, HWLOC_OBJ_CACHE_DATA);
 317
 318         if (depth >= 0)
 319         {
 320             hwloc_obj_t cache = hwloc_get_next_obj_by_depth(topo, depth, nullptr);
 321             if (cache != nullptr)
 322             {
 323                 std::vector<hwloc_obj_t> hwThreads = getHwLocDescendantsByType(topo, cache, HWLOC_OBJ_PU);
 324
 325                 machine->caches.push_back( {
 326                                                static_cast<int>(cache->attr->cache.depth),
 327                                                static_cast<std::size_t>(cache->attr->cache.size),
 328                                                static_cast<int>(cache->attr->cache.linesize),
 329                                                static_cast<int>(cache->attr->cache.associativity),
 330                                                std::max(static_cast<int>(hwThreads.size()), 1)
 331                                            } );
 332             }
 333         }
 334     }
 335     return machine->caches.empty();
 336 }
 337
 338
 339 /*! \brief Read numa information from hwloc topology
 340  *
 341  *  \param topo    hwloc topology handle that has been initialized and loaded
 342  *  \param machine Pointer to the machine structure in the HardwareTopology
 343  *                 class, where numa information will be filled.
 344  *
 345  *  Hwloc should virtually always be able to detect numa information, but if
 346  *  there is only a single numa node in the system it is not reported at all.
 347  *  In this case we create a single numa node covering all cores.
 348  *
 349  *  This function uses the basic socket/core/thread information detected by
 350  *  parseHwLocSocketsCoresThreads(), which means that routine must have
 351  *  completed successfully before calling this one. If this is not the case,
 352  *  you will get an error return code.
 353  *
 354  *  \return If the data found makes sense (either in the numa node or the
 355  *          entire machine) the return value is 0, otherwise non-zero.
 356  */
 357 int
 358 parseHwLocNuma(hwloc_topology_t                   topo,
 359                HardwareTopology::Machine *        machine)
 360 {
 361     const hwloc_obj_t                  root           = hwloc_get_root_obj(topo);
 362     std::vector<hwloc_obj_t>           hwlocNumaNodes = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_NUMANODE);
 363     bool                               topologyOk     = true;
 364
 365     if (!hwlocNumaNodes.empty())
 366     {
 367         machine->numa.nodes.resize(hwlocNumaNodes.size());
 368
 369         for (std::size_t i = 0; i < hwlocNumaNodes.size(); i++)
 370         {
 371             machine->numa.nodes[i].id     = hwlocNumaNodes[i]->logical_index;
 372             machine->numa.nodes[i].memory = getHwLocObjectMemory(hwlocNumaNodes[i]);;
 373             machine->numa.nodes[i].logicalProcessorId.clear();
 374
 375             // Get list of PUs in this numa node. Get from numa node if v1.x.x, get from numa node's parent if 2.x.x
 376 #if GMX_HWLOC_API_VERSION_IS_2XX
 377             std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(topo, hwlocNumaNodes[i]->parent, HWLOC_OBJ_PU);
 378 #else
 379             std::vector<hwloc_obj_t> hwlocPUs = getHwLocDescendantsByType(topo, hwlocNumaNodes[i], HWLOC_OBJ_PU);
 380 #endif
 381             for (auto &p : hwlocPUs)
 382             {
 383                 machine->numa.nodes[i].logicalProcessorId.push_back(p->os_index);
 384
 385                 GMX_RELEASE_ASSERT(p->os_index < machine->logicalProcessors.size(), "OS index of PU in hwloc larger than processor count");
 386
 387                 machine->logicalProcessors[p->os_index].numaNodeId = static_cast<int>(i);
 388                 std::size_t s = machine->logicalProcessors[p->os_index].socketRankInMachine;
 389                 std::size_t c = machine->logicalProcessors[p->os_index].coreRankInSocket;
 390
 391                 GMX_RELEASE_ASSERT(s < machine->sockets.size(), "Socket index in logicalProcessors larger than socket count");
 392                 GMX_RELEASE_ASSERT(c < machine->sockets[s].cores.size(), "Core index in logicalProcessors larger than core count");
 393                 // Set numaNodeId in core too
 394                 machine->sockets[s].cores[c].numaNodeId = i;
 395             }
 396         }
 397         // Getting the distance matrix
 398 #if GMX_HWLOC_API_VERSION_IS_2XX
 399         // with hwloc api v. 2.x.x, distances are no longer directly accessible. Need to retrieve and release hwloc_distances_s object
 400         // In addition, there can now be multiple types of distances, ie latency, bandwidth. We look only for latency, but have to check
 401         // if multiple distance matrices are returned.
 402
 403         // If only 1 numa node exists, the v2.x.x hwloc api won't have a distances matrix, set manually
 404         if (hwlocNumaNodes.size() == 1)
 405         {
 406             machine->numa.relativeLatency       = { { 1.0 } };
 407         }
 408         else
 409         {
 410             hwloc_distances_s** dist = new hwloc_distances_s*;
 411             // Set the number of distance matrices to return (1 in our case, but hwloc 2.x.x allows
 412             // for multiple distances types and therefore multiple distance matrices)
 413             unsigned nr = 1;
 414             hwloc_distances_get(topo, &nr, dist, HWLOC_DISTANCES_KIND_MEANS_LATENCY, 0);
 415             // If no distances were found, nr will be 0, otherwise distances will be populated with 1
 416             // hwloc_distances_s object
 417             if (nr > 0 && dist[0]->nbobjs == hwlocNumaNodes.size())
 418             {
 419
 420                 machine->numa.relativeLatency.resize(dist[0]->nbobjs);
 421                 for (std::size_t i = 0; i < dist[0]->nbobjs; i++)
 422                 {
 423                     machine->numa.relativeLatency[i].resize(dist[0]->nbobjs);
 424                     for (std::size_t j = 0; j < dist[0]->nbobjs; j++)
 425                     {
 426                         machine->numa.relativeLatency[i][j] = dist[0]->values[i*dist[0]->nbobjs+j];
 427                     }
 428                 }
 429             }
 430             else
 431             {
 432                 topologyOk = false;
 433             }
 434             hwloc_distances_release(topo, dist[0]);
 435         }
 436
 437         // hwloc-2.x provides latencies as integers, but to make things more similar to the case of a single
 438         // numa node as well as hwloc-1.x, we rescale to relative floating-point values and also set the
 439         // largest relative latency value.
 440
 441         // find smallest value in matrix
 442         float minLatency = std::numeric_limits<float>::max(); // large number
 443         float maxLatency = std::numeric_limits<float>::min(); // 0.0
 444         for (const auto &v : machine->numa.relativeLatency)
 445         {
 446             auto result = std::minmax_element(v.begin(), v.end());
 447             minLatency = std::min(minLatency, *result.first);
 448             maxLatency = std::max(maxLatency, *result.second);
 449         }
 450
 451         // assign stuff
 452         for (auto &v : machine->numa.relativeLatency)
 453         {
 454             std::transform(v.begin(), v.end(), v.begin(), std::bind(std::multiplies<float>(), std::placeholders::_1, 1.0/minLatency));
 455         }
 456         machine->numa.baseLatency        = 1.0; // latencies still do not have any units in hwloc-2.x
 457         machine->numa.maxRelativeLatency = maxLatency/minLatency;
 458
 459 #else           // GMX_HWLOC_API_VERSION_IS_2XX == false, hwloc api is 1.x.x
 460         int depth = hwloc_get_type_depth(topo, HWLOC_OBJ_NUMANODE);
 461         const struct hwloc_distances_s * dist = hwloc_get_whole_distance_matrix_by_depth(topo, depth);
 462         if (dist != nullptr && dist->nbobjs == hwlocNumaNodes.size())
 463         {
 464             machine->numa.baseLatency        = dist->latency_base;
 465             machine->numa.maxRelativeLatency = dist->latency_max;
 466             machine->numa.relativeLatency.resize(dist->nbobjs);
 467             for (std::size_t i = 0; i < dist->nbobjs; i++)
 468             {
 469                 machine->numa.relativeLatency[i].resize(dist->nbobjs);
 470                 for (std::size_t j = 0; j < dist->nbobjs; j++)
 471                 {
 472                     machine->numa.relativeLatency[i][j] = dist->latency[i*dist->nbobjs+j];
 473                 }
 474             }
 475         }
 476         else
 477         {
 478             topologyOk = false;
 479         }
 480 #endif          // end GMX_HWLOC_API_VERSION_IS_2XX == false
 481     }
 482     else
 483     // Deals with the case of no numa nodes found.
 484 #if GMX_HWLOC_API_VERSION_IS_2XX
 485     // If the hwloc version is 2.x.x, and there is no numa node, something went wrong
 486     {
 487         topologyOk = false;
 488     }
 489 #else
 490     {
 491         // No numa nodes found. Use the entire machine as a numa node.
 492         // Note that this should only be the case with hwloc api v 1.x.x,
 493         // a numa node is assigned to the machine by default in v 2.x.x
 494         const hwloc_obj*const hwlocMachine = hwloc_get_next_obj_by_type(topo, HWLOC_OBJ_MACHINE, nullptr);
 495
 496         if (hwlocMachine != nullptr)
 497         {
 498             machine->numa.nodes.resize(1);
 499             machine->numa.nodes[0].id           = 0;
 500             machine->numa.nodes[0].memory       = hwlocMachine->memory.total_memory;
 501             machine->numa.baseLatency           = 10;
 502             machine->numa.maxRelativeLatency    = 1;
 503             machine->numa.relativeLatency       = { { 1.0 } };
 504
 505             for (int i = 0; i < machine->logicalProcessorCount; i++)
 506             {
 507                 machine->numa.nodes[0].logicalProcessorId.push_back(i);
 508             }
 509             for (auto &l : machine->logicalProcessors)
 510             {
 511                 l.numaNodeId = 0;
 512             }
 513             for (auto &s : machine->sockets)
 514             {
 515                 for (auto &c : s.cores)
 516                 {
 517                     c.numaNodeId = 0;
 518                 }
 519             }
 520         }
 521         else
 522         {
 523             topologyOk = false;
 524         }
 525     }
 526 #endif      // end if not GMX_HWLOC_API_VERSION_IS_2XX
 527     if (topologyOk)
 528     {
 529         return 0;
 530     }
 531     else
 532     {
 533         machine->numa.nodes.clear();
 534         return -1;
 535     }
 536
 537 }
 538
 539 /*! \brief Read PCI device information from hwloc topology
 540  *
 541  *  \param topo    hwloc topology handle that has been initialized and loaded
 542  *  \param machine Pointer to the machine structure in the HardwareTopology
 543  *                 class, where PCI device information will be filled.
 544  * *
 545  *  \return If any devices were found the return value is 0, otherwise non-zero.
 546  */
 547 int
 548 parseHwLocDevices(hwloc_topology_t                   topo,
 549                   HardwareTopology::Machine *        machine)
 550 {
 551     const hwloc_obj_t        root    = hwloc_get_root_obj(topo);
 552     std::vector<hwloc_obj_t> pcidevs = getHwLocDescendantsByType(topo, root, HWLOC_OBJ_PCI_DEVICE);
 553
 554     for (auto &p : pcidevs)
 555     {
 556 #if GMX_HWLOC_API_VERSION_IS_2XX
 557         const hwloc_obj * ancestor = nullptr;
 558         // Numa nodes not directly part of tree. Walk up the tree until we find an ancestor with a numa node
 559         hwloc_obj_t       parent = p->parent;
 560         while (parent && !parent->memory_arity)
 561         {
 562             parent = parent->parent;
 563         }
 564         if (parent)
 565         {
 566             ancestor = parent->memory_first_child;
 567         }
 568 #else           // GMX_HWLOC_API_VERSION_IS_2XX = false, api v 1.x.x
 569         // numa nodes are normal part of tree, can use hwloc ancestor function
 570         const hwloc_obj * const ancestor = hwloc_get_ancestor_obj_by_type(topo, HWLOC_OBJ_NUMANODE, p);
 571 #endif          // end if GMX_HWLOC_API_VERSION_IS_2XX
 572         int                     numaId;
 573         if (ancestor != nullptr)
 574         {
 575             numaId = ancestor->logical_index;
 576         }
 577         else
 578         {
 579             // If we only have a single numa node we belong to it, otherwise set it to -1 (unknown)
 580             numaId = (machine->numa.nodes.size() == 1) ?  0 : -1;
 581         }
 582
 583         GMX_RELEASE_ASSERT(p->attr, "Attributes should not be NULL for hwloc PCI object");
 584
 585         machine->devices.push_back( {
 586                                         p->attr->pcidev.vendor_id,
 587                                         p->attr->pcidev.device_id,
 588                                         p->attr->pcidev.class_id,
 589                                         p->attr->pcidev.domain,
 590                                         p->attr->pcidev.bus,
 591                                         p->attr->pcidev.dev,
 592                                         p->attr->pcidev.func,
 593                                         numaId
 594                                     } );
 595     }
 596     return pcidevs.empty();
 597 }
 598
 599 void
 600 parseHwLoc(HardwareTopology::Machine *        machine,
 601            HardwareTopology::SupportLevel *   supportLevel,
 602            bool *                             isThisSystem)
 603 {
 604     hwloc_topology_t    topo;
 605
 606     // Initialize a hwloc object, set flags to request IO device information too,
 607     // try to load the topology, and get the root object. If either step fails,
 608     // return that we do not have any support at all from hwloc.
 609     if (hwloc_topology_init(&topo) != 0)
 610     {
 611         hwloc_topology_destroy(topo);
 612         return; // SupportLevel::None.
 613     }
 614
 615     // Flags to look for io devices
 616 #if GMX_HWLOC_API_VERSION_IS_2XX
 617     hwloc_topology_set_io_types_filter(topo, HWLOC_TYPE_FILTER_KEEP_IMPORTANT);
 618 #else
 619     hwloc_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IO_DEVICES);
 620 #endif
 621
 622     if (hwloc_topology_load(topo) != 0 || hwloc_get_root_obj(topo) == nullptr)
 623     {
 624         hwloc_topology_destroy(topo);
 625         return; // SupportLevel::None.
 626     }
 627
 628     // If we get here, we can get a valid root object for the topology
 629     *isThisSystem = hwloc_topology_is_thissystem(topo);
 630
 631     // Parse basic information about sockets, cores, and hardware threads
 632     if (parseHwLocSocketsCoresThreads(topo, machine) == 0)
 633     {
 634         *supportLevel = HardwareTopology::SupportLevel::Basic;
 635     }
 636     else
 637     {
 638         hwloc_topology_destroy(topo);
 639         return; // SupportLevel::None.
 640     }
 641
 642     // Get information about cache and numa nodes
 643     if (parseHwLocCache(topo, machine) == 0 && parseHwLocNuma(topo, machine) == 0)
 644     {
 645         *supportLevel = HardwareTopology::SupportLevel::Full;
 646     }
 647     else
 648     {
 649         hwloc_topology_destroy(topo);
 650         return; // SupportLevel::Basic.
 651     }
 652
 653     // PCI devices
 654     if (parseHwLocDevices(topo, machine) == 0)
 655     {
 656         *supportLevel = HardwareTopology::SupportLevel::FullWithDevices;
 657     }
 658
 659     hwloc_topology_destroy(topo);
 660 // SupportLevel::Full or SupportLevel::FullWithDevices.
 661 }
 662
 663 #endif
 664
 665 /*! \brief Try to detect the number of logical processors.
 666  *
 667  *  \return The number of hardware processing units, or 0 if it fails.
 668  */
 669 int
 670 detectLogicalProcessorCount()
 671 {
 672     int count = 0;
 673
 674     {
 675 #if GMX_NATIVE_WINDOWS
 676         // Windows
 677         SYSTEM_INFO sysinfo;
 678         GetSystemInfo( &sysinfo );
 679         count = sysinfo.dwNumberOfProcessors;
 680 #elif defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
 681         // We are probably on Unix. Check if we have the argument to use before executing any calls
 682         count = sysconf(_SC_NPROCESSORS_ONLN);
 683 #else
 684         count = 0; // Neither windows nor Unix.
 685 #endif
 686     }
 687
 688     return count;
 689 }
 690
 691 }   // namespace
 692
 693 // static
 694 HardwareTopology HardwareTopology::detect()
 695 {
 696     HardwareTopology result;
 697
 698 #if GMX_USE_HWLOC
 699     parseHwLoc(&result.machine_, &result.supportLevel_, &result.isThisSystem_);
 700 #endif
 701
 702     // If something went wrong in hwloc (or if it was not present) we might
 703     // have more information in cpuInfo
 704     if (result.supportLevel_ < SupportLevel::Basic)
 705     {
 706         // There might be topology information in cpuInfo
 707         parseCpuInfo(&result.machine_, &result.supportLevel_);
 708     }
 709     // If we did not manage to get anything from either hwloc or cpuInfo, find the cpu count at least
 710     if (result.supportLevel_ == SupportLevel::None)
 711     {
 712         // No topology information; try to detect the number of logical processors at least
 713         result.machine_.logicalProcessorCount = detectLogicalProcessorCount();
 714         if (result.machine_.logicalProcessorCount > 0)
 715         {
 716             result.supportLevel_ = SupportLevel::LogicalProcessorCount;
 717         }
 718     }
 719     return result;
 720 }
 721
 722 HardwareTopology::Machine::Machine()
 723 {
 724     logicalProcessorCount   = 0;
 725     numa.baseLatency        = 0.0;
 726     numa.maxRelativeLatency = 0.0;
 727 }
 728
 729
 730 HardwareTopology::HardwareTopology()
 731     : supportLevel_(SupportLevel::None),
 732       machine_(),
 733       isThisSystem_(true)
 734 {
 735 }
 736
 737 HardwareTopology::HardwareTopology(int logicalProcessorCount)
 738     : supportLevel_(SupportLevel::None),
 739       machine_(),
 740       isThisSystem_(true)
 741 {
 742     if (logicalProcessorCount > 0)
 743     {
 744         machine_.logicalProcessorCount = logicalProcessorCount;
 745         supportLevel_                  = SupportLevel::LogicalProcessorCount;
 746     }
 747 }
 748
 749 int HardwareTopology::numberOfCores() const
 750 {
 751     if (supportLevel() >= SupportLevel::Basic)
 752     {
 753         // We assume all sockets have the same number of cores as socket 0.
 754         // Since topology information is present, we can assume there is at least one socket.
 755         return machine().sockets.size() * machine().sockets[0].cores.size();
 756     }
 757     else if (supportLevel() >= SupportLevel::LogicalProcessorCount)
 758     {
 759         return machine().logicalProcessorCount;
 760     }
 761     else
 762     {
 763         return 0;
 764     }
 765 }
 766
 767 } // namespace gmx