llvm/lib/Support/Windows/Threading.inc

   1 //===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file provides the Win32 specific implementation of Threading functions.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "llvm/ADT/SmallString.h"
  14 #include "llvm/ADT/Twine.h"
  15 #include "llvm/Support/thread.h"
  16
  17 #include "llvm/Support/Windows/WindowsSupport.h"
  18 #include <process.h>
  19
  20 #include <bitset>
  21
  22 // Windows will at times define MemoryFence.
  23 #ifdef MemoryFence
  24 #undef MemoryFence
  25 #endif
  26
  27 namespace llvm {
  28 HANDLE
  29 llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,
  30                             std::optional<unsigned> StackSizeInBytes) {
  31   HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),
  32                                             ThreadFunc, Arg, 0, NULL);
  33
  34   if (!hThread) {
  35     ReportLastErrorFatal("_beginthreadex failed");
  36   }
  37
  38   return hThread;
  39 }
  40
  41 void llvm_thread_join_impl(HANDLE hThread) {
  42   if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
  43     ReportLastErrorFatal("WaitForSingleObject failed");
  44   }
  45 }
  46
  47 void llvm_thread_detach_impl(HANDLE hThread) {
  48   if (::CloseHandle(hThread) == FALSE) {
  49     ReportLastErrorFatal("CloseHandle failed");
  50   }
  51 }
  52
  53 DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); }
  54
  55 DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); }
  56
  57 } // namespace llvm
  58
  59 uint64_t llvm::get_threadid() { return uint64_t(::GetCurrentThreadId()); }
  60
  61 uint32_t llvm::get_max_thread_name_length() { return 0; }
  62
  63 #if defined(_MSC_VER)
  64 static void SetThreadName(DWORD Id, LPCSTR Name) {
  65   constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
  66
  67 #pragma pack(push, 8)
  68   struct THREADNAME_INFO {
  69     DWORD dwType;     // Must be 0x1000.
  70     LPCSTR szName;    // Pointer to thread name
  71     DWORD dwThreadId; // Thread ID (-1 == current thread)
  72     DWORD dwFlags;    // Reserved.  Do not use.
  73   };
  74 #pragma pack(pop)
  75
  76   THREADNAME_INFO info;
  77   info.dwType = 0x1000;
  78   info.szName = Name;
  79   info.dwThreadId = Id;
  80   info.dwFlags = 0;
  81
  82   __try {
  83     ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
  84                      (ULONG_PTR *)&info);
  85   } __except (EXCEPTION_EXECUTE_HANDLER) {
  86   }
  87 }
  88 #endif
  89
  90 void llvm::set_thread_name(const Twine &Name) {
  91 #if defined(_MSC_VER)
  92   // Make sure the input is null terminated.
  93   SmallString<64> Storage;
  94   StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
  95   SetThreadName(::GetCurrentThreadId(), NameStr.data());
  96 #endif
  97 }
  98
  99 void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
 100   // "Name" is not an inherent property of a thread on Windows.  In fact, when
 101   // you "set" the name, you are only firing a one-time message to a debugger
 102   // which it interprets as a program setting its threads' name.  We may be
 103   // able to get fancy by creating a TLS entry when someone calls
 104   // set_thread_name so that subsequent calls to get_thread_name return this
 105   // value.
 106   Name.clear();
 107 }
 108
 109 SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
 110   // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
 111   // Begin background processing mode. The system lowers the resource scheduling
 112   // priorities of the thread so that it can perform background work without
 113   // significantly affecting activity in the foreground.
 114   // End background processing mode. The system restores the resource scheduling
 115   // priorities of the thread as they were before the thread entered background
 116   // processing mode.
 117   //
 118   // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low
 119   return SetThreadPriority(GetCurrentThread(),
 120                            Priority != ThreadPriority::Default
 121                                ? THREAD_MODE_BACKGROUND_BEGIN
 122                                : THREAD_MODE_BACKGROUND_END)
 123              ? SetThreadPriorityResult::SUCCESS
 124              : SetThreadPriorityResult::FAILURE;
 125 }
 126
 127 struct ProcessorGroup {
 128   unsigned ID;
 129   unsigned AllThreads;
 130   unsigned UsableThreads;
 131   unsigned ThreadsPerCore;
 132   uint64_t Affinity;
 133
 134   unsigned useableCores() const {
 135     return std::max(1U, UsableThreads / ThreadsPerCore);
 136   }
 137 };
 138
 139 template <typename F>
 140 static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
 141   DWORD Len = 0;
 142   BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
 143   if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
 144     return false;
 145   }
 146   auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
 147   R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
 148   if (R) {
 149     auto *End =
 150         (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
 151     for (auto *Curr = Info; Curr < End;
 152          Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
 153                                                             Curr->Size)) {
 154       if (Curr->Relationship != Relationship)
 155         continue;
 156       Fn(Curr);
 157     }
 158   }
 159   free(Info);
 160   return true;
 161 }
 162
 163 static std::optional<std::vector<USHORT>> getActiveGroups() {
 164   USHORT Count = 0;
 165   if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr))
 166     return std::nullopt;
 167
 168   if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
 169     return std::nullopt;
 170
 171   std::vector<USHORT> Groups;
 172   Groups.resize(Count);
 173   if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data()))
 174     return std::nullopt;
 175
 176   return Groups;
 177 }
 178
 179 static ArrayRef<ProcessorGroup> getProcessorGroups() {
 180   auto computeGroups = []() {
 181     SmallVector<ProcessorGroup, 4> Groups;
 182
 183     auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
 184       GROUP_RELATIONSHIP &El = ProcInfo->Group;
 185       for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
 186         ProcessorGroup G;
 187         G.ID = Groups.size();
 188         G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
 189         G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
 190         assert(G.UsableThreads <= 64);
 191         G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
 192         Groups.push_back(G);
 193       }
 194     };
 195
 196     if (!IterateProcInfo(RelationGroup, HandleGroup))
 197       return std::vector<ProcessorGroup>();
 198
 199     auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
 200       PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
 201       assert(El.GroupCount == 1);
 202       unsigned NumHyperThreads = 1;
 203       // If the flag is set, each core supports more than one hyper-thread.
 204       if (El.Flags & LTP_PC_SMT)
 205         NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
 206       unsigned I = El.GroupMask[0].Group;
 207       Groups[I].ThreadsPerCore = NumHyperThreads;
 208     };
 209
 210     if (!IterateProcInfo(RelationProcessorCore, HandleProc))
 211       return std::vector<ProcessorGroup>();
 212
 213     auto ActiveGroups = getActiveGroups();
 214     if (!ActiveGroups)
 215       return std::vector<ProcessorGroup>();
 216
 217     // If there's an affinity mask set, assume the user wants to constrain the
 218     // current process to only a single CPU group. On Windows, it is not
 219     // possible for affinity masks to cross CPU group boundaries.
 220     DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
 221     if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
 222                                  &SystemAffinityMask)) {
 223
 224       if (ProcessAffinityMask != SystemAffinityMask) {
 225         if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) {
 226           // The process affinity mask is spurious, due to an OS bug, ignore it.
 227           return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
 228         }
 229
 230         assert(ActiveGroups->size() == 1 &&
 231                "When an affinity mask is set, the process is expected to be "
 232                "assigned to a single processor group!");
 233
 234         unsigned CurrentGroupID = (*ActiveGroups)[0];
 235         ProcessorGroup NewG{Groups[CurrentGroupID]};
 236         NewG.Affinity = ProcessAffinityMask;
 237         NewG.UsableThreads = llvm::popcount(ProcessAffinityMask);
 238         Groups.clear();
 239         Groups.push_back(NewG);
 240       }
 241     }
 242     return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
 243   };
 244   static auto Groups = computeGroups();
 245   return ArrayRef<ProcessorGroup>(Groups);
 246 }
 247
 248 template <typename R, typename UnaryPredicate>
 249 static unsigned aggregate(R &&Range, UnaryPredicate P) {
 250   unsigned I{};
 251   for (const auto &It : Range)
 252     I += P(It);
 253   return I;
 254 }
 255
 256 int llvm::get_physical_cores() {
 257   static unsigned Cores =
 258       aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
 259         return G.UsableThreads / G.ThreadsPerCore;
 260       });
 261   return Cores;
 262 }
 263
 264 static int computeHostNumHardwareThreads() {
 265   static unsigned Threads =
 266       aggregate(getProcessorGroups(),
 267                 [](const ProcessorGroup &G) { return G.UsableThreads; });
 268   return Threads;
 269 }
 270
 271 // Finds the proper CPU socket where a thread number should go. Returns
 272 // 'std::nullopt' if the thread shall remain on the actual CPU socket.
 273 std::optional<unsigned>
 274 llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
 275   ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
 276   // Only one CPU socket in the system or process affinity was set, no need to
 277   // move the thread(s) to another CPU socket.
 278   if (Groups.size() <= 1)
 279     return std::nullopt;
 280
 281   // We ask for less threads than there are hardware threads per CPU socket, no
 282   // need to dispatch threads to other CPU sockets.
 283   unsigned MaxThreadsPerSocket =
 284       UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
 285   if (compute_thread_count() <= MaxThreadsPerSocket)
 286     return std::nullopt;
 287
 288   assert(ThreadPoolNum < compute_thread_count() &&
 289          "The thread index is not within thread strategy's range!");
 290
 291   // Assumes the same number of hardware threads per CPU socket.
 292   return (ThreadPoolNum * Groups.size()) / compute_thread_count();
 293 }
 294
 295 // Assign the current thread to a more appropriate CPU socket or CPU group
 296 void llvm::ThreadPoolStrategy::apply_thread_strategy(
 297     unsigned ThreadPoolNum) const {
 298
 299   // After Windows 11 and Windows Server 2022, let the OS do the scheduling,
 300   // since a process automatically gains access to all processor groups.
 301   if (llvm::RunningWindows11OrGreater())
 302     return;
 303
 304   std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
 305   if (!Socket)
 306     return;
 307   ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
 308   GROUP_AFFINITY Affinity{};
 309   Affinity.Group = Groups[*Socket].ID;
 310   Affinity.Mask = Groups[*Socket].Affinity;
 311   SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
 312 }
 313
 314 llvm::BitVector llvm::get_thread_affinity_mask() {
 315   GROUP_AFFINITY Affinity{};
 316   GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
 317
 318   static unsigned All =
 319       aggregate(getProcessorGroups(),
 320                 [](const ProcessorGroup &G) { return G.AllThreads; });
 321
 322   unsigned StartOffset =
 323       aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
 324         return G.ID < Affinity.Group ? G.AllThreads : 0;
 325       });
 326
 327   llvm::BitVector V;
 328   V.resize(All);
 329   for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
 330     if ((Affinity.Mask >> I) & 1)
 331       V.set(StartOffset + I);
 332   }
 333   return V;
 334 }
 335
 336 unsigned llvm::get_cpus() { return getProcessorGroups().size(); }