llvm/lib/Support/Windows/Threading.inc

   1 //===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file provides the Win32 specific implementation of Threading functions.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "llvm/ADT/SmallString.h"
  14 #include "llvm/ADT/Twine.h"
  15
  16 #include "llvm/Support/Windows/WindowsSupport.h"
  17 #include <process.h>
  18
  19 #include <bitset>
  20
  21 // Windows will at times define MemoryFence.
  22 #ifdef MemoryFence
  23 #undef MemoryFence
  24 #endif
  25
  26 namespace llvm {
  27 HANDLE
  28 llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,
  29                             llvm::Optional<unsigned> StackSizeInBytes) {
  30   HANDLE hThread = (HANDLE)::_beginthreadex(
  31       NULL, StackSizeInBytes.getValueOr(0), ThreadFunc, Arg, 0, NULL);
  32
  33   if (!hThread) {
  34     ReportLastErrorFatal("_beginthreadex failed");
  35   }
  36
  37   return hThread;
  38 }
  39
  40 void llvm_thread_join_impl(HANDLE hThread) {
  41   if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
  42     ReportLastErrorFatal("WaitForSingleObject failed");
  43   }
  44 }
  45
  46 void llvm_thread_detach_impl(HANDLE hThread) {
  47   if (::CloseHandle(hThread) == FALSE) {
  48     ReportLastErrorFatal("CloseHandle failed");
  49   }
  50 }
  51
  52 DWORD llvm_thread_get_id_impl(HANDLE hThread) {
  53   return ::GetThreadId(hThread);
  54 }
  55
  56 DWORD llvm_thread_get_current_id_impl() {
  57   return ::GetCurrentThreadId();
  58 }
  59
  60 } // namespace llvm
  61
  62 uint64_t llvm::get_threadid() {
  63   return uint64_t(::GetCurrentThreadId());
  64 }
  65
  66 uint32_t llvm::get_max_thread_name_length() { return 0; }
  67
  68 #if defined(_MSC_VER)
  69 static void SetThreadName(DWORD Id, LPCSTR Name) {
  70   constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
  71
  72 #pragma pack(push, 8)
  73   struct THREADNAME_INFO {
  74     DWORD dwType;     // Must be 0x1000.
  75     LPCSTR szName;    // Pointer to thread name
  76     DWORD dwThreadId; // Thread ID (-1 == current thread)
  77     DWORD dwFlags;    // Reserved.  Do not use.
  78   };
  79 #pragma pack(pop)
  80
  81   THREADNAME_INFO info;
  82   info.dwType = 0x1000;
  83   info.szName = Name;
  84   info.dwThreadId = Id;
  85   info.dwFlags = 0;
  86
  87   __try {
  88     ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
  89       (ULONG_PTR *)&info);
  90   }
  91   __except (EXCEPTION_EXECUTE_HANDLER) {
  92   }
  93 }
  94 #endif
  95
  96 void llvm::set_thread_name(const Twine &Name) {
  97 #if defined(_MSC_VER)
  98   // Make sure the input is null terminated.
  99   SmallString<64> Storage;
 100   StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
 101   SetThreadName(::GetCurrentThreadId(), NameStr.data());
 102 #endif
 103 }
 104
 105 void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
 106   // "Name" is not an inherent property of a thread on Windows.  In fact, when
 107   // you "set" the name, you are only firing a one-time message to a debugger
 108   // which it interprets as a program setting its threads' name.  We may be
 109   // able to get fancy by creating a TLS entry when someone calls
 110   // set_thread_name so that subsequent calls to get_thread_name return this
 111   // value.
 112   Name.clear();
 113 }
 114
 115 SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
 116   // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
 117   // Begin background processing mode. The system lowers the resource scheduling
 118   // priorities of the thread so that it can perform background work without
 119   // significantly affecting activity in the foreground.
 120   // End background processing mode. The system restores the resource scheduling
 121   // priorities of the thread as they were before the thread entered background
 122   // processing mode.
 123   return SetThreadPriority(GetCurrentThread(),
 124                            Priority == ThreadPriority::Background
 125                                ? THREAD_MODE_BACKGROUND_BEGIN
 126                                : THREAD_MODE_BACKGROUND_END)
 127              ? SetThreadPriorityResult::SUCCESS
 128              : SetThreadPriorityResult::FAILURE;
 129 }
 130
 131 struct ProcessorGroup {
 132   unsigned ID;
 133   unsigned AllThreads;
 134   unsigned UsableThreads;
 135   unsigned ThreadsPerCore;
 136   uint64_t Affinity;
 137
 138   unsigned useableCores() const {
 139     return std::max(1U, UsableThreads / ThreadsPerCore);
 140   }
 141 };
 142
 143 template <typename F>
 144 static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
 145   DWORD Len = 0;
 146   BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
 147   if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
 148     return false;
 149   }
 150   auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
 151   R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
 152   if (R) {
 153     auto *End =
 154         (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
 155     for (auto *Curr = Info; Curr < End;
 156          Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
 157                                                             Curr->Size)) {
 158       if (Curr->Relationship != Relationship)
 159         continue;
 160       Fn(Curr);
 161     }
 162   }
 163   free(Info);
 164   return true;
 165 }
 166
 167 static ArrayRef<ProcessorGroup> getProcessorGroups() {
 168   auto computeGroups = []() {
 169     SmallVector<ProcessorGroup, 4> Groups;
 170
 171     auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
 172       GROUP_RELATIONSHIP &El = ProcInfo->Group;
 173       for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
 174         ProcessorGroup G;
 175         G.ID = Groups.size();
 176         G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
 177         G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
 178         assert(G.UsableThreads <= 64);
 179         G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
 180         Groups.push_back(G);
 181       }
 182     };
 183
 184     if (!IterateProcInfo(RelationGroup, HandleGroup))
 185       return std::vector<ProcessorGroup>();
 186
 187     auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
 188       PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
 189       assert(El.GroupCount == 1);
 190       unsigned NumHyperThreads = 1;
 191       // If the flag is set, each core supports more than one hyper-thread.
 192       if (El.Flags & LTP_PC_SMT)
 193         NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
 194       unsigned I = El.GroupMask[0].Group;
 195       Groups[I].ThreadsPerCore = NumHyperThreads;
 196     };
 197
 198     if (!IterateProcInfo(RelationProcessorCore, HandleProc))
 199       return std::vector<ProcessorGroup>();
 200
 201     // If there's an affinity mask set, assume the user wants to constrain the
 202     // current process to only a single CPU group. On Windows, it is not
 203     // possible for affinity masks to cross CPU group boundaries.
 204     DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
 205     if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
 206                                  &SystemAffinityMask) &&
 207         ProcessAffinityMask != SystemAffinityMask) {
 208       // We don't expect more that 4 CPU groups on Windows (256 processors).
 209       USHORT GroupCount = 4;
 210       USHORT GroupArray[4]{};
 211       if (::GetProcessGroupAffinity(GetCurrentProcess(), &GroupCount,
 212                                     GroupArray)) {
 213         assert(GroupCount == 1 &&
 214                "On startup, a program is expected to be assigned only to "
 215                "one processor group!");
 216         unsigned CurrentGroupID = GroupArray[0];
 217         ProcessorGroup NewG{Groups[CurrentGroupID]};
 218         NewG.Affinity = ProcessAffinityMask;
 219         NewG.UsableThreads = countPopulation(ProcessAffinityMask);
 220         Groups.clear();
 221         Groups.push_back(NewG);
 222       }
 223     }
 224
 225     return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
 226   };
 227   static auto Groups = computeGroups();
 228   return ArrayRef<ProcessorGroup>(Groups);
 229 }
 230
 231 template <typename R, typename UnaryPredicate>
 232 static unsigned aggregate(R &&Range, UnaryPredicate P) {
 233   unsigned I{};
 234   for (const auto &It : Range)
 235     I += P(It);
 236   return I;
 237 }
 238
 239 // for sys::getHostNumPhysicalCores
 240 int computeHostNumPhysicalCores() {
 241   static unsigned Cores =
 242       aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
 243         return G.UsableThreads / G.ThreadsPerCore;
 244       });
 245   return Cores;
 246 }
 247
 248 int computeHostNumHardwareThreads() {
 249   static unsigned Threads =
 250       aggregate(getProcessorGroups(),
 251                 [](const ProcessorGroup &G) { return G.UsableThreads; });
 252   return Threads;
 253 }
 254
 255 // Finds the proper CPU socket where a thread number should go. Returns 'None'
 256 // if the thread shall remain on the actual CPU socket.
 257 Optional<unsigned>
 258 llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
 259   ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
 260   // Only one CPU socket in the system or process affinity was set, no need to
 261   // move the thread(s) to another CPU socket.
 262   if (Groups.size() <= 1)
 263     return None;
 264
 265   // We ask for less threads than there are hardware threads per CPU socket, no
 266   // need to dispatch threads to other CPU sockets.
 267   unsigned MaxThreadsPerSocket =
 268       UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
 269   if (compute_thread_count() <= MaxThreadsPerSocket)
 270     return None;
 271
 272   assert(ThreadPoolNum < compute_thread_count() &&
 273          "The thread index is not within thread strategy's range!");
 274
 275   // Assumes the same number of hardware threads per CPU socket.
 276   return (ThreadPoolNum * Groups.size()) / compute_thread_count();
 277 }
 278
 279 // Assign the current thread to a more appropriate CPU socket or CPU group
 280 void llvm::ThreadPoolStrategy::apply_thread_strategy(
 281     unsigned ThreadPoolNum) const {
 282   Optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
 283   if (!Socket)
 284     return;
 285   ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
 286   GROUP_AFFINITY Affinity{};
 287   Affinity.Group = Groups[*Socket].ID;
 288   Affinity.Mask = Groups[*Socket].Affinity;
 289   SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
 290 }
 291
 292 llvm::BitVector llvm::get_thread_affinity_mask() {
 293   GROUP_AFFINITY Affinity{};
 294   GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
 295
 296   static unsigned All =
 297       aggregate(getProcessorGroups(),
 298                 [](const ProcessorGroup &G) { return G.AllThreads; });
 299
 300   unsigned StartOffset =
 301       aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
 302         return G.ID < Affinity.Group ? G.AllThreads : 0;
 303       });
 304
 305   llvm::BitVector V;
 306   V.resize(All);
 307   for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
 308     if ((Affinity.Mask >> I) & 1)
 309       V.set(StartOffset + I);
 310   }
 311   return V;
 312 }
 313
 314 unsigned llvm::get_cpus() { return getProcessorGroups().size(); }