llvm/lib/Support/Unix/Threading.inc

   1 //===- Unix/Threading.inc - Unix Threading Implementation ----- -*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 // This file provides the Unix specific implementation of Threading functions.
  10 //
  11 //===----------------------------------------------------------------------===//
  12
  13 #include "Unix.h"
  14 #include "llvm/ADT/ScopeExit.h"
  15 #include "llvm/ADT/SmallString.h"
  16 #include "llvm/ADT/SmallVector.h"
  17 #include "llvm/ADT/StringRef.h"
  18 #include "llvm/ADT/Twine.h"
  19 #include "llvm/Support/MemoryBuffer.h"
  20 #include "llvm/Support/raw_ostream.h"
  21
  22 #if defined(__APPLE__)
  23 #include <mach/mach_init.h>
  24 #include <mach/mach_port.h>
  25 #include <pthread/qos.h>
  26 #include <sys/sysctl.h>
  27 #include <sys/types.h>
  28 #endif
  29
  30 #include <pthread.h>
  31
  32 #if defined(__FreeBSD__) || defined(__OpenBSD__)
  33 #include <pthread_np.h> // For pthread_getthreadid_np() / pthread_set_name_np()
  34 #endif
  35
  36 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
  37 #include <errno.h>
  38 #include <sys/cpuset.h>
  39 #include <sys/sysctl.h>
  40 #include <sys/user.h>
  41 #include <unistd.h>
  42 #endif
  43
  44 #if defined(__NetBSD__)
  45 #include <lwp.h> // For _lwp_self()
  46 #endif
  47
  48 #if defined(__OpenBSD__)
  49 #include <unistd.h> // For getthrid()
  50 #endif
  51
  52 #if defined(__linux__)
  53 #include <sched.h>       // For sched_getaffinity
  54 #include <sys/syscall.h> // For syscall codes
  55 #include <unistd.h>      // For syscall()
  56 #endif
  57
  58 namespace llvm {
  59 pthread_t
  60 llvm_execute_on_thread_impl(void *(*ThreadFunc)(void *), void *Arg,
  61                             std::optional<unsigned> StackSizeInBytes) {
  62   int errnum;
  63
  64   // Construct the attributes object.
  65   pthread_attr_t Attr;
  66   if ((errnum = ::pthread_attr_init(&Attr)) != 0) {
  67     ReportErrnumFatal("pthread_attr_init failed", errnum);
  68   }
  69
  70   auto AttrGuard = llvm::make_scope_exit([&] {
  71     if ((errnum = ::pthread_attr_destroy(&Attr)) != 0) {
  72       ReportErrnumFatal("pthread_attr_destroy failed", errnum);
  73     }
  74   });
  75
  76   // Set the requested stack size, if given.
  77   if (StackSizeInBytes) {
  78     if ((errnum = ::pthread_attr_setstacksize(&Attr, *StackSizeInBytes)) != 0) {
  79       ReportErrnumFatal("pthread_attr_setstacksize failed", errnum);
  80     }
  81   }
  82
  83   // Construct and execute the thread.
  84   pthread_t Thread;
  85   if ((errnum = ::pthread_create(&Thread, &Attr, ThreadFunc, Arg)) != 0)
  86     ReportErrnumFatal("pthread_create failed", errnum);
  87
  88   return Thread;
  89 }
  90
  91 void llvm_thread_detach_impl(pthread_t Thread) {
  92   int errnum;
  93
  94   if ((errnum = ::pthread_detach(Thread)) != 0) {
  95     ReportErrnumFatal("pthread_detach failed", errnum);
  96   }
  97 }
  98
  99 void llvm_thread_join_impl(pthread_t Thread) {
 100   int errnum;
 101
 102   if ((errnum = ::pthread_join(Thread, nullptr)) != 0) {
 103     ReportErrnumFatal("pthread_join failed", errnum);
 104   }
 105 }
 106
 107 pthread_t llvm_thread_get_id_impl(pthread_t Thread) { return Thread; }
 108
 109 pthread_t llvm_thread_get_current_id_impl() { return ::pthread_self(); }
 110
 111 } // namespace llvm
 112
 113 uint64_t llvm::get_threadid() {
 114 #if defined(__APPLE__)
 115   // Calling "mach_thread_self()" bumps the reference count on the thread
 116   // port, so we need to deallocate it. mach_task_self() doesn't bump the ref
 117   // count.
 118   thread_port_t Self = mach_thread_self();
 119   mach_port_deallocate(mach_task_self(), Self);
 120   return Self;
 121 #elif defined(__FreeBSD__)
 122   return uint64_t(pthread_getthreadid_np());
 123 #elif defined(__NetBSD__)
 124   return uint64_t(_lwp_self());
 125 #elif defined(__OpenBSD__)
 126   return uint64_t(getthrid());
 127 #elif defined(__ANDROID__)
 128   return uint64_t(gettid());
 129 #elif defined(__linux__)
 130   return uint64_t(syscall(SYS_gettid));
 131 #else
 132   return uint64_t(pthread_self());
 133 #endif
 134 }
 135
 136 static constexpr uint32_t get_max_thread_name_length_impl() {
 137 #if defined(__NetBSD__)
 138   return PTHREAD_MAX_NAMELEN_NP;
 139 #elif defined(__APPLE__)
 140   return 64;
 141 #elif defined(__linux__)
 142 #if HAVE_PTHREAD_SETNAME_NP
 143   return 16;
 144 #else
 145   return 0;
 146 #endif
 147 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 148   return 16;
 149 #elif defined(__OpenBSD__)
 150   return 32;
 151 #else
 152   return 0;
 153 #endif
 154 }
 155
 156 uint32_t llvm::get_max_thread_name_length() {
 157   return get_max_thread_name_length_impl();
 158 }
 159
 160 void llvm::set_thread_name(const Twine &Name) {
 161   // Make sure the input is null terminated.
 162   SmallString<64> Storage;
 163   StringRef NameStr = Name.toNullTerminatedStringRef(Storage);
 164
 165   // Truncate from the beginning, not the end, if the specified name is too
 166   // long.  For one, this ensures that the resulting string is still null
 167   // terminated, but additionally the end of a long thread name will usually
 168   // be more unique than the beginning, since a common pattern is for similar
 169   // threads to share a common prefix.
 170   // Note that the name length includes the null terminator.
 171   if (get_max_thread_name_length() > 0)
 172     NameStr = NameStr.take_back(get_max_thread_name_length() - 1);
 173   (void)NameStr;
 174 #if defined(__linux__)
 175 #if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || defined(__ANDROID__)
 176 #if HAVE_PTHREAD_SETNAME_NP
 177   ::pthread_setname_np(::pthread_self(), NameStr.data());
 178 #endif
 179 #endif
 180 #elif defined(__FreeBSD__) || defined(__OpenBSD__)
 181   ::pthread_set_name_np(::pthread_self(), NameStr.data());
 182 #elif defined(__NetBSD__)
 183   ::pthread_setname_np(::pthread_self(), "%s",
 184                        const_cast<char *>(NameStr.data()));
 185 #elif defined(__APPLE__)
 186   ::pthread_setname_np(NameStr.data());
 187 #endif
 188 }
 189
 190 void llvm::get_thread_name(SmallVectorImpl<char> &Name) {
 191   Name.clear();
 192
 193 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
 194   int pid = ::getpid();
 195   uint64_t tid = get_threadid();
 196
 197   struct kinfo_proc *kp = nullptr, *nkp;
 198   size_t len = 0;
 199   int error;
 200   int ctl[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PID | KERN_PROC_INC_THREAD,
 201                 (int)pid};
 202
 203   while (1) {
 204     error = sysctl(ctl, 4, kp, &len, nullptr, 0);
 205     if (kp == nullptr || (error != 0 && errno == ENOMEM)) {
 206       // Add extra space in case threads are added before next call.
 207       len += sizeof(*kp) + len / 10;
 208       nkp = (struct kinfo_proc *)::realloc(kp, len);
 209       if (nkp == nullptr) {
 210         free(kp);
 211         return;
 212       }
 213       kp = nkp;
 214       continue;
 215     }
 216     if (error != 0)
 217       len = 0;
 218     break;
 219   }
 220
 221   for (size_t i = 0; i < len / sizeof(*kp); i++) {
 222     if (kp[i].ki_tid == (lwpid_t)tid) {
 223       Name.append(kp[i].ki_tdname, kp[i].ki_tdname + strlen(kp[i].ki_tdname));
 224       break;
 225     }
 226   }
 227   free(kp);
 228   return;
 229 #elif defined(__NetBSD__)
 230   constexpr uint32_t len = get_max_thread_name_length_impl();
 231   char buf[len];
 232   ::pthread_getname_np(::pthread_self(), buf, len);
 233
 234   Name.append(buf, buf + strlen(buf));
 235 #elif defined(__OpenBSD__)
 236   constexpr uint32_t len = get_max_thread_name_length_impl();
 237   char buf[len];
 238   ::pthread_get_name_np(::pthread_self(), buf, len);
 239
 240   Name.append(buf, buf + strlen(buf));
 241 #elif defined(__linux__)
 242 #if HAVE_PTHREAD_GETNAME_NP
 243   constexpr uint32_t len = get_max_thread_name_length_impl();
 244   char Buffer[len] = {'\0'}; // FIXME: working around MSan false positive.
 245   if (0 == ::pthread_getname_np(::pthread_self(), Buffer, len))
 246     Name.append(Buffer, Buffer + strlen(Buffer));
 247 #endif
 248 #endif
 249 }
 250
 251 SetThreadPriorityResult llvm::set_thread_priority(ThreadPriority Priority) {
 252 #if defined(__linux__) && defined(SCHED_IDLE)
 253   // Some *really* old glibcs are missing SCHED_IDLE.
 254   // http://man7.org/linux/man-pages/man3/pthread_setschedparam.3.html
 255   // http://man7.org/linux/man-pages/man2/sched_setscheduler.2.html
 256   sched_param priority;
 257   // For each of the above policies, param->sched_priority must be 0.
 258   priority.sched_priority = 0;
 259   // SCHED_IDLE    for running very low priority background jobs.
 260   // SCHED_OTHER   the standard round-robin time-sharing policy;
 261   return !pthread_setschedparam(
 262              pthread_self(),
 263              // FIXME: consider SCHED_BATCH for Low
 264              Priority == ThreadPriority::Default ? SCHED_OTHER : SCHED_IDLE,
 265              &priority)
 266              ? SetThreadPriorityResult::SUCCESS
 267              : SetThreadPriorityResult::FAILURE;
 268 #elif defined(__APPLE__)
 269   // https://developer.apple.com/documentation/apple-silicon/tuning-your-code-s-performance-for-apple-silicon
 270   //
 271   // Background - Applies to work that isn’t visible to the user and may take
 272   // significant time to complete. Examples include indexing, backing up, or
 273   // synchronizing data. This class emphasizes energy efficiency.
 274   //
 275   // Utility - Applies to work that takes anywhere from a few seconds to a few
 276   // minutes to complete. Examples include downloading a document or importing
 277   // data. This class offers a balance between responsiveness, performance, and
 278   // energy efficiency.
 279   const auto qosClass = [&]() {
 280     switch (Priority) {
 281     case ThreadPriority::Background:
 282       return QOS_CLASS_BACKGROUND;
 283     case ThreadPriority::Low:
 284       return QOS_CLASS_UTILITY;
 285     case ThreadPriority::Default:
 286       return QOS_CLASS_DEFAULT;
 287     }
 288   }();
 289   return !pthread_set_qos_class_self_np(qosClass, 0)
 290              ? SetThreadPriorityResult::SUCCESS
 291              : SetThreadPriorityResult::FAILURE;
 292 #endif
 293   return SetThreadPriorityResult::FAILURE;
 294 }
 295
 296 #include <thread>
 297
 298 static int computeHostNumHardwareThreads() {
 299 #if defined(__FreeBSD__)
 300   cpuset_t mask;
 301   CPU_ZERO(&mask);
 302   if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(mask),
 303                          &mask) == 0)
 304     return CPU_COUNT(&mask);
 305 #elif defined(__linux__)
 306   cpu_set_t Set;
 307   if (sched_getaffinity(0, sizeof(Set), &Set) == 0)
 308     return CPU_COUNT(&Set);
 309 #endif
 310   // Guard against std::thread::hardware_concurrency() returning 0.
 311   if (unsigned Val = std::thread::hardware_concurrency())
 312     return Val;
 313   return 1;
 314 }
 315
 316 void llvm::ThreadPoolStrategy::apply_thread_strategy(
 317     unsigned ThreadPoolNum) const {}
 318
 319 llvm::BitVector llvm::get_thread_affinity_mask() {
 320   // FIXME: Implement
 321   llvm_unreachable("Not implemented!");
 322 }
 323
 324 unsigned llvm::get_cpus() { return 1; }
 325
 326 #if defined(__linux__) && (defined(__i386__) || defined(__x86_64__))
 327 // On Linux, the number of physical cores can be computed from /proc/cpuinfo,
 328 // using the number of unique physical/core id pairs. The following
 329 // implementation reads the /proc/cpuinfo format on an x86_64 system.
 330 static int computeHostNumPhysicalCores() {
 331   // Enabled represents the number of physical id/core id pairs with at least
 332   // one processor id enabled by the CPU affinity mask.
 333   cpu_set_t Affinity, Enabled;
 334   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) != 0)
 335     return -1;
 336   CPU_ZERO(&Enabled);
 337
 338   // Read /proc/cpuinfo as a stream (until EOF reached). It cannot be
 339   // mmapped because it appears to have 0 size.
 340   llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> Text =
 341       llvm::MemoryBuffer::getFileAsStream("/proc/cpuinfo");
 342   if (std::error_code EC = Text.getError()) {
 343     llvm::errs() << "Can't read "
 344                  << "/proc/cpuinfo: " << EC.message() << "\n";
 345     return -1;
 346   }
 347   SmallVector<StringRef, 8> strs;
 348   (*Text)->getBuffer().split(strs, "\n", /*MaxSplit=*/-1,
 349                              /*KeepEmpty=*/false);
 350   int CurProcessor = -1;
 351   int CurPhysicalId = -1;
 352   int CurSiblings = -1;
 353   int CurCoreId = -1;
 354   for (StringRef Line : strs) {
 355     std::pair<StringRef, StringRef> Data = Line.split(':');
 356     auto Name = Data.first.trim();
 357     auto Val = Data.second.trim();
 358     // These fields are available if the kernel is configured with CONFIG_SMP.
 359     if (Name == "processor")
 360       Val.getAsInteger(10, CurProcessor);
 361     else if (Name == "physical id")
 362       Val.getAsInteger(10, CurPhysicalId);
 363     else if (Name == "siblings")
 364       Val.getAsInteger(10, CurSiblings);
 365     else if (Name == "core id") {
 366       Val.getAsInteger(10, CurCoreId);
 367       // The processor id corresponds to an index into cpu_set_t.
 368       if (CPU_ISSET(CurProcessor, &Affinity))
 369         CPU_SET(CurPhysicalId * CurSiblings + CurCoreId, &Enabled);
 370     }
 371   }
 372   return CPU_COUNT(&Enabled);
 373 }
 374 #elif (defined(__linux__) && defined(__s390x__)) || defined(_AIX)
 375 static int computeHostNumPhysicalCores() {
 376   return sysconf(_SC_NPROCESSORS_ONLN);
 377 }
 378 #elif defined(__linux__) && !defined(__ANDROID__)
 379 static int computeHostNumPhysicalCores() {
 380   cpu_set_t Affinity;
 381   if (sched_getaffinity(0, sizeof(Affinity), &Affinity) == 0)
 382     return CPU_COUNT(&Affinity);
 383
 384   // The call to sched_getaffinity() may have failed because the Affinity
 385   // mask is too small for the number of CPU's on the system (i.e. the
 386   // system has more than 1024 CPUs). Allocate a mask large enough for
 387   // twice as many CPUs.
 388   cpu_set_t *DynAffinity;
 389   DynAffinity = CPU_ALLOC(2048);
 390   if (sched_getaffinity(0, CPU_ALLOC_SIZE(2048), DynAffinity) == 0) {
 391     int NumCPUs = CPU_COUNT(DynAffinity);
 392     CPU_FREE(DynAffinity);
 393     return NumCPUs;
 394   }
 395   return -1;
 396 }
 397 #elif defined(__APPLE__)
 398 // Gets the number of *physical cores* on the machine.
 399 static int computeHostNumPhysicalCores() {
 400   uint32_t count;
 401   size_t len = sizeof(count);
 402   sysctlbyname("hw.physicalcpu", &count, &len, NULL, 0);
 403   if (count < 1) {
 404     int nm[2];
 405     nm[0] = CTL_HW;
 406     nm[1] = HW_AVAILCPU;
 407     sysctl(nm, 2, &count, &len, NULL, 0);
 408     if (count < 1)
 409       return -1;
 410   }
 411   return count;
 412 }
 413 #elif defined(__MVS__)
 414 static int computeHostNumPhysicalCores() {
 415   enum {
 416     // Byte offset of the pointer to the Communications Vector Table (CVT) in
 417     // the Prefixed Save Area (PSA). The table entry is a 31-bit pointer and
 418     // will be zero-extended to uintptr_t.
 419     FLCCVT = 16,
 420     // Byte offset of the pointer to the Common System Data Area (CSD) in the
 421     // CVT. The table entry is a 31-bit pointer and will be zero-extended to
 422     // uintptr_t.
 423     CVTCSD = 660,
 424     // Byte offset to the number of live CPs in the LPAR, stored as a signed
 425     // 32-bit value in the table.
 426     CSD_NUMBER_ONLINE_STANDARD_CPS = 264,
 427   };
 428   char *PSA = 0;
 429   char *CVT = reinterpret_cast<char *>(
 430       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(PSA[FLCCVT])));
 431   char *CSD = reinterpret_cast<char *>(
 432       static_cast<uintptr_t>(reinterpret_cast<unsigned int &>(CVT[CVTCSD])));
 433   return reinterpret_cast<int &>(CSD[CSD_NUMBER_ONLINE_STANDARD_CPS]);
 434 }
 435 #else
 436 // On other systems, return -1 to indicate unknown.
 437 static int computeHostNumPhysicalCores() { return -1; }
 438 #endif
 439
 440 int llvm::get_physical_cores() {
 441   static int NumCores = computeHostNumPhysicalCores();
 442   return NumCores;
 443 }