1 //===- Windows/Threading.inc - Win32 Threading Implementation - -*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file provides the Win32 specific implementation of Threading functions.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/Twine.h"
16 #include "llvm/Support/Windows/WindowsSupport.h"
21 // Windows will at times define MemoryFence.
28 llvm_execute_on_thread_impl(unsigned(__stdcall *ThreadFunc)(void *), void *Arg,
29 std::optional<unsigned> StackSizeInBytes) {
30 HANDLE hThread = (HANDLE)::_beginthreadex(NULL, StackSizeInBytes.value_or(0),
31 ThreadFunc, Arg, 0, NULL);
34 ReportLastErrorFatal("_beginthreadex failed");
40 void llvm_thread_join_impl(HANDLE hThread) {
41 if (::WaitForSingleObject(hThread, INFINITE) == WAIT_FAILED) {
42 ReportLastErrorFatal("WaitForSingleObject failed");
46 void llvm_thread_detach_impl(HANDLE hThread) {
47 if (::CloseHandle(hThread) == FALSE) {
48 ReportLastErrorFatal("CloseHandle failed");
52 DWORD llvm_thread_get_id_impl(HANDLE hThread) { return ::GetThreadId(hThread); }
54 DWORD llvm_thread_get_current_id_impl() { return ::GetCurrentThreadId(); }
58 uint64_t llvm::get_threadid() { return uint64_t(::GetCurrentThreadId()); }
60 uint32_t llvm::get_max_thread_name_length() { return 0; }
63 static void SetThreadName(DWORD Id, LPCSTR Name) {
64 constexpr DWORD MS_VC_EXCEPTION = 0x406D1388;
67 struct THREADNAME_INFO {
68 DWORD dwType; // Must be 0x1000.
69 LPCSTR szName; // Pointer to thread name
70 DWORD dwThreadId; // Thread ID (-1 == current thread)
71 DWORD dwFlags; // Reserved. Do not use.
82 ::RaiseException(MS_VC_EXCEPTION, 0, sizeof(info) / sizeof(ULONG_PTR),
84 } __except (EXCEPTION_EXECUTE_HANDLER) {
89 void llvm::set_thread_name(const Twine &Name) {
91 // Make sure the input is null terminated.
92 SmallString<64> Storage;
93 StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
94 SetThreadName(::GetCurrentThreadId(), NameStr.data());
98 void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
99 // "Name" is not an inherent property of a thread on Windows. In fact, when
100 // you "set" the name, you are only firing a one-time message to a debugger
101 // which it interprets as a program setting its threads' name. We may be
102 // able to get fancy by creating a TLS entry when someone calls
103 // set_thread_name so that subsequent calls to get_thread_name return this
108 SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
109 // https://docs.microsoft.com/en-us/windows/desktop/api/processthreadsapi/nf-processthreadsapi-setthreadpriority
110 // Begin background processing mode. The system lowers the resource scheduling
111 // priorities of the thread so that it can perform background work without
112 // significantly affecting activity in the foreground.
113 // End background processing mode. The system restores the resource scheduling
114 // priorities of the thread as they were before the thread entered background
117 // FIXME: consider THREAD_PRIORITY_BELOW_NORMAL for Low
118 return SetThreadPriority(GetCurrentThread(),
119 Priority != ThreadPriority::Default
120 ? THREAD_MODE_BACKGROUND_BEGIN
121 : THREAD_MODE_BACKGROUND_END)
122 ? SetThreadPriorityResult::SUCCESS
123 : SetThreadPriorityResult::FAILURE;
126 struct ProcessorGroup {
129 unsigned UsableThreads;
130 unsigned ThreadsPerCore;
133 unsigned useableCores() const {
134 return std::max(1U, UsableThreads / ThreadsPerCore);
138 template <typename F>
139 static bool IterateProcInfo(LOGICAL_PROCESSOR_RELATIONSHIP Relationship, F Fn) {
141 BOOL R = ::GetLogicalProcessorInformationEx(Relationship, NULL, &Len);
142 if (R || GetLastError() != ERROR_INSUFFICIENT_BUFFER) {
145 auto *Info = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)calloc(1, Len);
146 R = ::GetLogicalProcessorInformationEx(Relationship, Info, &Len);
149 (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Info + Len);
150 for (auto *Curr = Info; Curr < End;
151 Curr = (SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *)((uint8_t *)Curr +
153 if (Curr->Relationship != Relationship)
162 static std::optional<std::vector<USHORT>> getActiveGroups() {
164 if (::GetProcessGroupAffinity(GetCurrentProcess(), &Count, nullptr))
167 if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
170 std::vector<USHORT> Groups;
171 Groups.resize(Count);
172 if (!::GetProcessGroupAffinity(GetCurrentProcess(), &Count, Groups.data()))
178 static ArrayRef<ProcessorGroup> getProcessorGroups() {
179 auto computeGroups = []() {
180 SmallVector<ProcessorGroup, 4> Groups;
182 auto HandleGroup = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
183 GROUP_RELATIONSHIP &El = ProcInfo->Group;
184 for (unsigned J = 0; J < El.ActiveGroupCount; ++J) {
186 G.ID = Groups.size();
187 G.AllThreads = El.GroupInfo[J].MaximumProcessorCount;
188 G.UsableThreads = El.GroupInfo[J].ActiveProcessorCount;
189 assert(G.UsableThreads <= 64);
190 G.Affinity = El.GroupInfo[J].ActiveProcessorMask;
195 if (!IterateProcInfo(RelationGroup, HandleGroup))
196 return std::vector<ProcessorGroup>();
198 auto HandleProc = [&](SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *ProcInfo) {
199 PROCESSOR_RELATIONSHIP &El = ProcInfo->Processor;
200 assert(El.GroupCount == 1);
201 unsigned NumHyperThreads = 1;
202 // If the flag is set, each core supports more than one hyper-thread.
203 if (El.Flags & LTP_PC_SMT)
204 NumHyperThreads = std::bitset<64>(El.GroupMask[0].Mask).count();
205 unsigned I = El.GroupMask[0].Group;
206 Groups[I].ThreadsPerCore = NumHyperThreads;
209 if (!IterateProcInfo(RelationProcessorCore, HandleProc))
210 return std::vector<ProcessorGroup>();
212 auto ActiveGroups = getActiveGroups();
214 return std::vector<ProcessorGroup>();
216 // If there's an affinity mask set, assume the user wants to constrain the
217 // current process to only a single CPU group. On Windows, it is not
218 // possible for affinity masks to cross CPU group boundaries.
219 DWORD_PTR ProcessAffinityMask = 0, SystemAffinityMask = 0;
220 if (::GetProcessAffinityMask(GetCurrentProcess(), &ProcessAffinityMask,
221 &SystemAffinityMask)) {
223 if (ProcessAffinityMask != SystemAffinityMask) {
224 if (llvm::RunningWindows11OrGreater() && ActiveGroups->size() > 1) {
225 // The process affinity mask is spurious, due to an OS bug, ignore it.
226 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
229 assert(ActiveGroups->size() == 1 &&
230 "When an affinity mask is set, the process is expected to be "
231 "assigned to a single processor group!");
233 unsigned CurrentGroupID = (*ActiveGroups)[0];
234 ProcessorGroup NewG{Groups[CurrentGroupID]};
235 NewG.Affinity = ProcessAffinityMask;
236 NewG.UsableThreads = llvm::popcount(ProcessAffinityMask);
238 Groups.push_back(NewG);
241 return std::vector<ProcessorGroup>(Groups.begin(), Groups.end());
243 static auto Groups = computeGroups();
244 return ArrayRef<ProcessorGroup>(Groups);
247 template <typename R, typename UnaryPredicate>
248 static unsigned aggregate(R &&Range, UnaryPredicate P) {
250 for (const auto &It : Range)
255 int llvm::get_physical_cores() {
256 static unsigned Cores =
257 aggregate(getProcessorGroups(), [](const ProcessorGroup &G) {
258 return G.UsableThreads / G.ThreadsPerCore;
263 static int computeHostNumHardwareThreads() {
264 static unsigned Threads =
265 aggregate(getProcessorGroups(),
266 [](const ProcessorGroup &G) { return G.UsableThreads; });
270 // Finds the proper CPU socket where a thread number should go. Returns
271 // 'std::nullopt' if the thread shall remain on the actual CPU socket.
272 std::optional<unsigned>
273 llvm::ThreadPoolStrategy::compute_cpu_socket(unsigned ThreadPoolNum) const {
274 ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
275 // Only one CPU socket in the system or process affinity was set, no need to
276 // move the thread(s) to another CPU socket.
277 if (Groups.size() <= 1)
280 // We ask for less threads than there are hardware threads per CPU socket, no
281 // need to dispatch threads to other CPU sockets.
282 unsigned MaxThreadsPerSocket =
283 UseHyperThreads ? Groups[0].UsableThreads : Groups[0].useableCores();
284 if (compute_thread_count() <= MaxThreadsPerSocket)
287 assert(ThreadPoolNum < compute_thread_count() &&
288 "The thread index is not within thread strategy's range!");
290 // Assumes the same number of hardware threads per CPU socket.
291 return (ThreadPoolNum * Groups.size()) / compute_thread_count();
294 // Assign the current thread to a more appropriate CPU socket or CPU group
295 void llvm::ThreadPoolStrategy::apply_thread_strategy(
296 unsigned ThreadPoolNum) const {
298 // After Windows 11 and Windows Server 2022, let the OS do the scheduling,
299 // since a process automatically gains access to all processor groups.
300 if (llvm::RunningWindows11OrGreater())
303 std::optional<unsigned> Socket = compute_cpu_socket(ThreadPoolNum);
306 ArrayRef<ProcessorGroup> Groups = getProcessorGroups();
307 GROUP_AFFINITY Affinity{};
308 Affinity.Group = Groups[*Socket].ID;
309 Affinity.Mask = Groups[*Socket].Affinity;
310 SetThreadGroupAffinity(GetCurrentThread(), &Affinity, nullptr);
313 llvm::BitVector llvm::get_thread_affinity_mask() {
314 GROUP_AFFINITY Affinity{};
315 GetThreadGroupAffinity(GetCurrentThread(), &Affinity);
317 static unsigned All =
318 aggregate(getProcessorGroups(),
319 [](const ProcessorGroup &G) { return G.AllThreads; });
321 unsigned StartOffset =
322 aggregate(getProcessorGroups(), [&](const ProcessorGroup &G) {
323 return G.ID < Affinity.Group ? G.AllThreads : 0;
328 for (unsigned I = 0; I < sizeof(KAFFINITY) * 8; ++I) {
329 if ((Affinity.Mask >> I) & 1)
330 V.set(StartOffset + I);
335 unsigned llvm::get_cpus() { return getProcessorGroups().size(); }