llvm/lib/Support/Windows/Threading.inc

   1 //===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file provides the Win32 specific implementation of Threading functions.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "llvm/ADT/SmallString.h"
  14 #include "llvm/ADT/Twine.h"
  15
  16 #include "llvm/Support/Windows/WindowsSupport.h"
  17 #include <process.h>
  18
  19 #include <bitset>
  20
  21 // Windows will at times define MemoryFence.
  22 #ifdef MemoryFence
  23 #undef MemoryFence
  24 #endif
  25
  26 namespace llvm {
  27 HANDLE
  28 llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,
  29                             std::optional<unsigned> StackSizeInBytes) {
  30   HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),
  31                                             ThreadFunc, Arg, 0, NULL);
  32
  33   if (!hThread) {
  34     ReportLastErrorFatal("_beginthreadex failed");
  35   }
  36
  37   return hThread;
  38 }
  39
  40 void llvm_thread_join_impl(HANDLE hThread) {
  41   if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
  42     ReportLastErrorFatal("WaitForSingleObject failed");
  43   }
  44 }
  45
  46 void llvm_thread_detach_impl(HANDLE hThread) {
  47   if (::CloseHandle(hThread) == FALSE) {
  48     ReportLastErrorFatal("CloseHandle failed");
  49   }
  50 }
  51
  52 DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); }
  53
  54 DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); }
  55
  56 } // namespace llvm
  57
  58 uint64_t llvm::get_threadid() { return uint64_t(::GetCurrentThreadId()); }
  59
  60 uint32_t llvm::get_max_thread_name_length() { return 0; }
  61
  62 #if defined(_MSC_VER)
  63 static void SetThreadName(DWORD Id, LPCSTR Name) {
  64   constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
  65
  66 #pragma pack(push, 8)
  67   struct THREADNAME_INFO {
  68     DWORD dwType;     // Must be 0x1000.
  69     LPCSTR szName;    // Pointer to thread name
  70     DWORD dwThreadId; // Thread ID (-1 == current thread)
  71     DWORD dwFlags;    // Reserved.  Do not use.
  72   };
  73 #pragma pack(pop)
  74
  75   THREADNAME_INFO info;
  76   info.dwType = 0x1000;
  77   info.szName = Name;
  78   info.dwThreadId = Id;
  79   info.dwFlags = 0;
  80
  81   __try {
  82     ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
  83                      (ULONG_PTR *)&info);
  84   } __except (EXCEPTION_EXECUTE_HANDLER) {
  85   }
  86 }
  87 #endif
  88
  89 void llvm::set_thread_name(const Twine &Name) {
  90 #if defined(_MSC_VER)
  91   // Make sure the input is null terminated.
  92   SmallString<64> Storage;
  93   StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
  94   SetThreadName(::GetCurrentThreadId(), NameStr.data());
  95 #endif
  96 }
  97
  98 void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
  99   // "Name" is not an inherent property of a thread on Windows.  In fact, when
 100   // you "set" the name, you are only firing a one-time message to a debugger
 101   // which it interprets as a program setting its threads' name.  We may be
 102   // able to get fancy by creating a TLS entry when someone calls
 103   // set_thread_name so that subsequent calls to get_thread_name return this
 104   // value.
 105   Name.clear();
 106 }
 107
 108 SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
 109   // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
 110   // Begin background processing mode. The system lowers the resource scheduling
 111   // priorities of the thread so that it can perform background work without
 112   // significantly affecting activity in the foreground.
 113   // End background processing mode. The system restores the resource scheduling
 114   // priorities of the thread as they were before the thread entered background
 115   // processing mode.
 116   //
 117   // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low
 118   return SetThreadPriority(GetCurrentThread(),
 119                            Priority != ThreadPriority::Default
 120                                ? THREAD_MODE_BACKGROUND_BEGIN
 121                                : THREAD_MODE_BACKGROUND_END)
 122              ? SetThreadPriorityResult::SUCCESS
 123              : SetThreadPriorityResult::FAILURE;
 124 }
 125
 126 struct ProcessorGroup {
 127   unsigned ID;
 128   unsigned AllThreads;
 129   unsigned UsableThreads;
 130   unsigned ThreadsPerCore;
 131   uint64_t Affinity;
 132
 133   unsigned useableCores() const {
 134     return std::max(1U, UsableThreads / ThreadsPerCore);
 135   }
 136 };
 137
 138 template <typename F>
 139 static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
 140   DWORD Len = 0;
 141   BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
 142   if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
 143     return false;
 144   }
 145   auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
 146   R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
 147   if (R) {
 148     auto *End =
 149         (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
 150     for (auto *Curr = Info; Curr < End;
 151          Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
 152                                                             Curr->Size)) {
 153       if (Curr->Relationship != Relationship)
 154         continue;
 155       Fn(Curr);
 156     }
 157   }
 158   free(Info);
 159   return true;
 160 }
 161
 162 static std::optional<std::vector<USHORT>> getActiveGroups() {
 163   USHORT Count = 0;
 164   if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr))
 165     return std::nullopt;
 166
 167   if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
 168     return std::nullopt;
 169
 170   std::vector<USHORT> Groups;
 171   Groups.resize(Count);
 172   if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data()))
 173     return std::nullopt;
 174
 175   return Groups;
 176 }
 177
 178 static ArrayRef<ProcessorGroup> getProcessorGroups() {
 179   auto computeGroups = []() {
 180     SmallVector<ProcessorGroup, 4> Groups;
 181
 182     auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
 183       GROUP_RELATIONSHIP &El = ProcInfo->Group;
 184       for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
 185         ProcessorGroup G;
 186         G.ID = Groups.size();
 187         G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
 188         G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
 189         assert(G.UsableThreads <= 64);
 190         G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
 191         Groups.push_back(G);
 192       }
 193     };
 194
 195     if (!IterateProcInfo(RelationGroup, HandleGroup))
 196       return std::vector<ProcessorGroup>();
 197
 198     auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
 199       PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
 200       assert(El.GroupCount == 1);
 201       unsigned NumHyperThreads = 1;
 202       // If the flag is set, each core supports more than one hyper-thread.
 203       if (El.Flags & LTP_PC_SMT)
 204         NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
 205       unsigned I = El.GroupMask[0].Group;
 206       Groups[I].ThreadsPerCore = NumHyperThreads;
 207     };
 208
 209     if (!IterateProcInfo(RelationProcessorCore, HandleProc))
 210       return std::vector<ProcessorGroup>();
 211
 212     auto ActiveGroups = getActiveGroups();
 213     if (!ActiveGroups)
 214       return std::vector<ProcessorGroup>();
 215
 216     // If there's an affinity mask set, assume the user wants to constrain the
 217     // current process to only a single CPU group. On Windows, it is not
 218     // possible for affinity masks to cross CPU group boundaries.
 219     DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
 220     if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
 221                                  &SystemAffinityMask)) {
 222
 223       if (ProcessAffinityMask != SystemAffinityMask) {
 224         if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) {
 225           // The process affinity mask is spurious, due to an OS bug, ignore it.
 226           return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
 227         }
 228
 229         assert(ActiveGroups->size() == 1 &&
 230                "When an affinity mask is set, the process is expected to be "
 231                "assigned to a single processor group!");
 232
 233         unsigned CurrentGroupID = (*ActiveGroups)[0];
 234         ProcessorGroup NewG{Groups[CurrentGroupID]};
 235         NewG.Affinity = ProcessAffinityMask;
 236         NewG.UsableThreads = llvm::popcount(ProcessAffinityMask);
 237         Groups.clear();
 238         Groups.push_back(NewG);
 239       }
 240     }
 241     return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
 242   };
 243   static auto Groups = computeGroups();
 244   return ArrayRef<ProcessorGroup>(Groups);
 245 }
 246
 247 template <typename R, typename UnaryPredicate>
 248 static unsigned aggregate(R &&Range, UnaryPredicate P) {
 249   unsigned I{};
 250   for (const auto &It : Range)
 251     I += P(It);
 252   return I;
 253 }
 254
 255 int llvm::get_physical_cores() {
 256   static unsigned Cores =
 257       aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
 258         return G.UsableThreads / G.ThreadsPerCore;
 259       });
 260   return Cores;
 261 }
 262
 263 static int computeHostNumHardwareThreads() {
 264   static unsigned Threads =
 265       aggregate(getProcessorGroups(),
 266                 [](const ProcessorGroup &G) { return G.UsableThreads; });
 267   return Threads;
 268 }
 269
 270 // Finds the proper CPU socket where a thread number should go. Returns
 271 // 'std::nullopt' if the thread shall remain on the actual CPU socket.
 272 std::optional<unsigned>
 273 llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
 274   ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
 275   // Only one CPU socket in the system or process affinity was set, no need to
 276   // move the thread(s) to another CPU socket.
 277   if (Groups.size() <= 1)
 278     return std::nullopt;
 279
 280   // We ask for less threads than there are hardware threads per CPU socket, no
 281   // need to dispatch threads to other CPU sockets.
 282   unsigned MaxThreadsPerSocket =
 283       UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
 284   if (compute_thread_count() <= MaxThreadsPerSocket)
 285     return std::nullopt;
 286
 287   assert(ThreadPoolNum < compute_thread_count() &&
 288          "The thread index is not within thread strategy's range!");
 289
 290   // Assumes the same number of hardware threads per CPU socket.
 291   return (ThreadPoolNum * Groups.size()) / compute_thread_count();
 292 }
 293
 294 // Assign the current thread to a more appropriate CPU socket or CPU group
 295 void llvm::ThreadPoolStrategy::apply_thread_strategy(
 296     unsigned ThreadPoolNum) const {
 297
 298   // After Windows 11 and Windows Server 2022, let the OS do the scheduling,
 299   // since a process automatically gains access to all processor groups.
 300   if (llvm::RunningWindows11OrGreater())
 301     return;
 302
 303   std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
 304   if (!Socket)
 305     return;
 306   ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
 307   GROUP_AFFINITY Affinity{};
 308   Affinity.Group = Groups[*Socket].ID;
 309   Affinity.Mask = Groups[*Socket].Affinity;
 310   SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
 311 }
 312
 313 llvm::BitVector llvm::get_thread_affinity_mask() {
 314   GROUP_AFFINITY Affinity{};
 315   GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
 316
 317   static unsigned All =
 318       aggregate(getProcessorGroups(),
 319                 [](const ProcessorGroup &G) { return G.AllThreads; });
 320
 321   unsigned StartOffset =
 322       aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
 323         return G.ID < Affinity.Group ? G.AllThreads : 0;
 324       });
 325
 326   llvm::BitVector V;
 327   V.resize(All);
 328   for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
 329     if ((Affinity.Mask >> I) & 1)
 330       V.set(StartOffset + I);
 331   }
 332   return V;
 333 }
 334
 335 unsigned llvm::get_cpus() { return getProcessorGroups().size(); }