[MemProf] Templatize CallStackRadixTreeBuilder (NFC) (#117014)
[llvm-project.git] / libc / src / __support / threads / linux / thread.cpp
blobc531d74c533550d6f94c8106322a6aeb043edee2
1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "src/__support/threads/thread.h"
10 #include "config/app.h"
11 #include "src/__support/CPP/atomic.h"
12 #include "src/__support/CPP/string_view.h"
13 #include "src/__support/CPP/stringstream.h"
14 #include "src/__support/OSUtil/syscall.h" // For syscall functions.
15 #include "src/__support/common.h"
16 #include "src/__support/error_or.h"
17 #include "src/__support/macros/config.h"
18 #include "src/__support/threads/linux/futex_utils.h" // For FutexWordType
19 #include "src/errno/libc_errno.h" // For error macros
21 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
22 #include <arm_acle.h>
23 #endif
25 #include "hdr/fcntl_macros.h"
26 #include <linux/param.h> // For EXEC_PAGESIZE.
27 #include <linux/prctl.h> // For PR_SET_NAME
28 #include <linux/sched.h> // For CLONE_* flags.
29 #include <stdint.h>
30 #include <sys/mman.h> // For PROT_* and MAP_* definitions.
31 #include <sys/syscall.h> // For syscall numbers.
33 namespace LIBC_NAMESPACE_DECL {
35 #ifdef SYS_mmap2
36 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
37 #elif defined(SYS_mmap)
38 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
39 #else
40 #error "mmap or mmap2 syscalls not available."
41 #endif
43 static constexpr size_t NAME_SIZE_MAX = 16; // Includes the null terminator
44 static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234;
45 static constexpr unsigned CLONE_SYSCALL_FLAGS =
46 CLONE_VM // Share the memory space with the parent.
47 | CLONE_FS // Share the file system with the parent.
48 | CLONE_FILES // Share the files with the parent.
49 | CLONE_SIGHAND // Share the signal handlers with the parent.
50 | CLONE_THREAD // Same thread group as the parent.
51 | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment
52 // values
53 | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent.
54 | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address
55 // wake the joining thread.
56 | CLONE_SETTLS; // Setup the thread pointer of the new thread.
58 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
59 #define CLONE_RESULT_REGISTER "x0"
60 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
61 #define CLONE_RESULT_REGISTER "t0"
62 #elif defined(LIBC_TARGET_ARCH_IS_X86_64)
63 #define CLONE_RESULT_REGISTER "rax"
64 #else
65 #error "CLONE_RESULT_REGISTER not defined for your target architecture"
66 #endif
68 static constexpr ErrorOr<size_t> add_no_overflow(size_t lhs, size_t rhs) {
69 if (lhs > SIZE_MAX - rhs)
70 return Error{EINVAL};
71 if (rhs > SIZE_MAX - lhs)
72 return Error{EINVAL};
73 return lhs + rhs;
76 static constexpr ErrorOr<size_t> round_to_page(size_t v) {
77 auto vp_or_err = add_no_overflow(v, EXEC_PAGESIZE - 1);
78 if (!vp_or_err)
79 return vp_or_err;
81 return vp_or_err.value() & -EXEC_PAGESIZE;
84 LIBC_INLINE ErrorOr<void *> alloc_stack(size_t stacksize, size_t guardsize) {
86 // Guard needs to be mapped with PROT_NONE
87 int prot = guardsize ? PROT_NONE : PROT_READ | PROT_WRITE;
88 auto size_or_err = add_no_overflow(stacksize, guardsize);
89 if (!size_or_err)
90 return Error{int(size_or_err.error())};
91 size_t size = size_or_err.value();
93 // TODO: Maybe add MAP_STACK? Currently unimplemented on linux but helps
94 // future-proof.
95 long mmap_result = LIBC_NAMESPACE::syscall_impl<long>(
96 MMAP_SYSCALL_NUMBER,
97 0, // No special address
98 size, prot,
99 MAP_ANONYMOUS | MAP_PRIVATE, // Process private.
100 -1, // Not backed by any file
101 0 // No offset
103 if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size))
104 return Error{int(-mmap_result)};
106 if (guardsize) {
107 // Give read/write permissions to actual stack.
108 // TODO: We are assuming stack growsdown here.
109 long result = LIBC_NAMESPACE::syscall_impl<long>(
110 SYS_mprotect, mmap_result + guardsize, stacksize,
111 PROT_READ | PROT_WRITE);
113 if (result != 0)
114 return Error{int(-result)};
116 mmap_result += guardsize;
117 return reinterpret_cast<void *>(mmap_result);
120 // This must always be inlined as we may be freeing the calling threads stack in
121 // which case a normal return from the top the stack would cause an invalid
122 // memory read.
123 [[gnu::always_inline]] LIBC_INLINE void
124 free_stack(void *stack, size_t stacksize, size_t guardsize) {
125 uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
126 stackaddr -= guardsize;
127 stack = reinterpret_cast<void *>(stackaddr);
128 LIBC_NAMESPACE::syscall_impl<long>(SYS_munmap, stack, stacksize + guardsize);
131 struct Thread;
133 // We align the start args to 16-byte boundary as we adjust the allocated
134 // stack memory with its size. We want the adjusted address to be at a
135 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements.
136 // If different architecture in future requires higher alignment, then we
137 // can add a platform specific alignment spec.
138 struct alignas(STACK_ALIGNMENT) StartArgs {
139 ThreadAttributes *thread_attrib;
140 ThreadRunner runner;
141 void *arg;
144 // This must always be inlined as we may be freeing the calling threads stack in
145 // which case a normal return from the top the stack would cause an invalid
146 // memory read.
147 [[gnu::always_inline]] LIBC_INLINE void
148 cleanup_thread_resources(ThreadAttributes *attrib) {
149 // Cleanup the TLS before the stack as the TLS information is stored on
150 // the stack.
151 cleanup_tls(attrib->tls, attrib->tls_size);
152 if (attrib->owned_stack)
153 free_stack(attrib->stack, attrib->stacksize, attrib->guardsize);
156 [[gnu::always_inline]] LIBC_INLINE uintptr_t get_start_args_addr() {
157 // NOTE: For __builtin_frame_address to work reliably across compilers,
158 // architectures and various optimization levels, the TU including this file
159 // should be compiled with -fno-omit-frame-pointer.
160 #ifdef LIBC_TARGET_ARCH_IS_X86_64
161 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0))
162 // The x86_64 call instruction pushes resume address on to the stack.
163 // Next, The x86_64 SysV ABI requires that the frame pointer be pushed
164 // on to the stack. So, we have to step past two 64-bit values to get
165 // to the start args.
166 + sizeof(uintptr_t) * 2;
167 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
168 // The frame pointer after cloning the new thread in the Thread::run method
169 // is set to the stack pointer where start args are stored. So, we fetch
170 // from there.
171 return reinterpret_cast<uintptr_t>(__builtin_frame_address(1));
172 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
173 // The current frame pointer is the previous stack pointer where the start
174 // args are stored.
175 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
176 #endif
179 [[gnu::noinline]] void start_thread() {
180 auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr());
181 auto *attrib = start_args->thread_attrib;
182 self.attrib = attrib;
183 self.attrib->atexit_callback_mgr = internal::get_thread_atexit_callback_mgr();
185 if (attrib->style == ThreadStyle::POSIX) {
186 attrib->retval.posix_retval =
187 start_args->runner.posix_runner(start_args->arg);
188 thread_exit(ThreadReturnValue(attrib->retval.posix_retval),
189 ThreadStyle::POSIX);
190 } else {
191 attrib->retval.stdc_retval =
192 start_args->runner.stdc_runner(start_args->arg);
193 thread_exit(ThreadReturnValue(attrib->retval.stdc_retval),
194 ThreadStyle::STDC);
198 int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
199 size_t stacksize, size_t guardsize, bool detached) {
200 bool owned_stack = false;
201 if (stack == nullptr) {
202 // TODO: Should we return EINVAL here? Should we have a generic concept of a
203 // minimum stacksize (like 16384 for pthread).
204 if (stacksize == 0)
205 stacksize = DEFAULT_STACKSIZE;
206 // Roundup stacksize/guardsize to page size.
207 // TODO: Should be also add sizeof(ThreadAttribute) and other internal
208 // meta data?
209 auto round_or_err = round_to_page(guardsize);
210 if (!round_or_err)
211 return round_or_err.error();
212 guardsize = round_or_err.value();
214 round_or_err = round_to_page(stacksize);
215 if (!round_or_err)
216 return round_or_err.error();
218 stacksize = round_or_err.value();
219 auto alloc = alloc_stack(stacksize, guardsize);
220 if (!alloc)
221 return alloc.error();
222 else
223 stack = alloc.value();
224 owned_stack = true;
227 // Validate that stack/stacksize are validly aligned.
228 uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
229 if ((stackaddr % STACK_ALIGNMENT != 0) ||
230 ((stackaddr + stacksize) % STACK_ALIGNMENT != 0)) {
231 if (owned_stack)
232 free_stack(stack, stacksize, guardsize);
233 return EINVAL;
236 TLSDescriptor tls;
237 init_tls(tls);
239 // When the new thread is spawned by the kernel, the new thread gets the
240 // stack we pass to the clone syscall. However, this stack is empty and does
241 // not have any local vars present in this function. Hence, one cannot
242 // pass arguments to the thread start function, or use any local vars from
243 // here. So, we pack them into the new stack from where the thread can sniff
244 // them out.
246 // Likewise, the actual thread state information is also stored on the
247 // stack memory.
249 static constexpr size_t INTERNAL_STACK_DATA_SIZE =
250 sizeof(StartArgs) + sizeof(ThreadAttributes) + sizeof(Futex);
252 // This is pretty arbitrary, but at the moment we don't adjust user provided
253 // stacksize (or default) to account for this data as its assumed minimal. If
254 // this assert starts failing we probably should. Likewise if we can't bound
255 // this we may overflow when we subtract it from the top of the stack.
256 static_assert(INTERNAL_STACK_DATA_SIZE < EXEC_PAGESIZE);
258 // TODO: We are assuming stack growsdown here.
259 auto adjusted_stack_or_err =
260 add_no_overflow(reinterpret_cast<uintptr_t>(stack), stacksize);
261 if (!adjusted_stack_or_err) {
262 cleanup_tls(tls.addr, tls.size);
263 if (owned_stack)
264 free_stack(stack, stacksize, guardsize);
265 return adjusted_stack_or_err.error();
268 uintptr_t adjusted_stack =
269 adjusted_stack_or_err.value() - INTERNAL_STACK_DATA_SIZE;
270 adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1);
272 auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack);
274 attrib =
275 reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs));
276 attrib->style = style;
277 attrib->detach_state =
278 uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE);
279 attrib->stack = stack;
280 attrib->stacksize = stacksize;
281 attrib->guardsize = guardsize;
282 attrib->owned_stack = owned_stack;
283 attrib->tls = tls.addr;
284 attrib->tls_size = tls.size;
286 start_args->thread_attrib = attrib;
287 start_args->runner = runner;
288 start_args->arg = arg;
290 auto clear_tid = reinterpret_cast<Futex *>(
291 adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes));
292 clear_tid->set(CLEAR_TID_VALUE);
293 attrib->platform_data = clear_tid;
295 // The clone syscall takes arguments in an architecture specific order.
296 // Also, we want the result of the syscall to be in a register as the child
297 // thread gets a completely different stack after it is created. The stack
298 // variables from this function will not be availalbe to the child thread.
299 #if defined(LIBC_TARGET_ARCH_IS_X86_64)
300 long register clone_result asm(CLONE_RESULT_REGISTER);
301 clone_result = LIBC_NAMESPACE::syscall_impl<long>(
302 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
303 &attrib->tid, // The address where the child tid is written
304 &clear_tid->val, // The futex where the child thread status is signalled
305 tls.tp // The thread pointer value for the new thread.
307 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) || \
308 defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
309 long register clone_result asm(CLONE_RESULT_REGISTER);
310 clone_result = LIBC_NAMESPACE::syscall_impl<long>(
311 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
312 &attrib->tid, // The address where the child tid is written
313 tls.tp, // The thread pointer value for the new thread.
314 &clear_tid->val // The futex where the child thread status is signalled
316 #else
317 #error "Unsupported architecture for the clone syscall."
318 #endif
320 if (clone_result == 0) {
321 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
322 // We set the frame pointer to be the same as the "sp" so that start args
323 // can be sniffed out from start_thread.
324 #ifdef __clang__
325 // GCC does not currently implement __arm_wsr64/__arm_rsr64.
326 __arm_wsr64("x29", __arm_rsr64("sp"));
327 #else
328 asm volatile("mov x29, sp");
329 #endif
330 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
331 asm volatile("mv fp, sp");
332 #endif
333 start_thread();
334 } else if (clone_result < 0) {
335 cleanup_thread_resources(attrib);
336 return static_cast<int>(-clone_result);
339 return 0;
342 int Thread::join(ThreadReturnValue &retval) {
343 wait();
345 if (attrib->style == ThreadStyle::POSIX)
346 retval.posix_retval = attrib->retval.posix_retval;
347 else
348 retval.stdc_retval = attrib->retval.stdc_retval;
350 cleanup_thread_resources(attrib);
352 return 0;
355 int Thread::detach() {
356 uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
357 if (attrib->detach_state.compare_exchange_strong(
358 joinable_state, uint32_t(DetachState::DETACHED))) {
359 return int(DetachType::SIMPLE);
362 // If the thread was already detached, then the detach method should not
363 // be called at all. If the thread is exiting, then we wait for it to exit
364 // and free up resources.
365 wait();
367 cleanup_thread_resources(attrib);
369 return int(DetachType::CLEANUP);
372 void Thread::wait() {
373 // The kernel should set the value at the clear tid address to zero.
374 // If not, it is a spurious wake and we should continue to wait on
375 // the futex.
376 auto *clear_tid = reinterpret_cast<Futex *>(attrib->platform_data);
377 // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a
378 // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE.
379 while (clear_tid->load() != 0)
380 clear_tid->wait(CLEAR_TID_VALUE, cpp::nullopt, true);
383 bool Thread::operator==(const Thread &thread) const {
384 return attrib->tid == thread.attrib->tid;
387 static constexpr cpp::string_view THREAD_NAME_PATH_PREFIX("/proc/self/task/");
388 static constexpr size_t THREAD_NAME_PATH_SIZE =
389 THREAD_NAME_PATH_PREFIX.size() +
390 IntegerToString<int>::buffer_size() + // Size of tid
391 1 + // For '/' character
392 5; // For the file name "comm" and the nullterminator.
394 static void construct_thread_name_file_path(cpp::StringStream &stream,
395 int tid) {
396 stream << THREAD_NAME_PATH_PREFIX << tid << '/' << cpp::string_view("comm")
397 << cpp::StringStream::ENDS;
400 int Thread::set_name(const cpp::string_view &name) {
401 if (name.size() >= NAME_SIZE_MAX)
402 return ERANGE;
404 if (*this == self) {
405 // If we are setting the name of the current thread, then we can
406 // use the syscall to set the name.
407 int retval =
408 LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_SET_NAME, name.data());
409 if (retval < 0)
410 return -retval;
411 else
412 return 0;
415 char path_name_buffer[THREAD_NAME_PATH_SIZE];
416 cpp::StringStream path_stream(path_name_buffer);
417 construct_thread_name_file_path(path_stream, attrib->tid);
418 #ifdef SYS_open
419 int fd =
420 LIBC_NAMESPACE::syscall_impl<int>(SYS_open, path_name_buffer, O_RDWR);
421 #else
422 int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD,
423 path_name_buffer, O_RDWR);
424 #endif
425 if (fd < 0)
426 return -fd;
428 int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_write, fd, name.data(),
429 name.size());
430 LIBC_NAMESPACE::syscall_impl<long>(SYS_close, fd);
432 if (retval < 0)
433 return -retval;
434 else if (retval != int(name.size()))
435 return EIO;
436 else
437 return 0;
440 int Thread::get_name(cpp::StringStream &name) const {
441 if (name.bufsize() < NAME_SIZE_MAX)
442 return ERANGE;
444 char name_buffer[NAME_SIZE_MAX];
446 if (*this == self) {
447 // If we are getting the name of the current thread, then we can
448 // use the syscall to get the name.
449 int retval =
450 LIBC_NAMESPACE::syscall_impl<int>(SYS_prctl, PR_GET_NAME, name_buffer);
451 if (retval < 0)
452 return -retval;
453 name << name_buffer << cpp::StringStream::ENDS;
454 return 0;
457 char path_name_buffer[THREAD_NAME_PATH_SIZE];
458 cpp::StringStream path_stream(path_name_buffer);
459 construct_thread_name_file_path(path_stream, attrib->tid);
460 #ifdef SYS_open
461 int fd =
462 LIBC_NAMESPACE::syscall_impl<int>(SYS_open, path_name_buffer, O_RDONLY);
463 #else
464 int fd = LIBC_NAMESPACE::syscall_impl<int>(SYS_openat, AT_FDCWD,
465 path_name_buffer, O_RDONLY);
466 #endif
467 if (fd < 0)
468 return -fd;
470 int retval = LIBC_NAMESPACE::syscall_impl<int>(SYS_read, fd, name_buffer,
471 NAME_SIZE_MAX);
472 LIBC_NAMESPACE::syscall_impl<long>(SYS_close, fd);
473 if (retval < 0)
474 return -retval;
475 if (retval == NAME_SIZE_MAX)
476 return ERANGE;
477 if (name_buffer[retval - 1] == '\n')
478 name_buffer[retval - 1] = '\0';
479 else
480 name_buffer[retval] = '\0';
481 name << name_buffer << cpp::StringStream::ENDS;
482 return 0;
485 void thread_exit(ThreadReturnValue retval, ThreadStyle style) {
486 auto attrib = self.attrib;
488 // The very first thing we do is to call the thread's atexit callbacks.
489 // These callbacks could be the ones registered by the language runtimes,
490 // for example, the destructors of thread local objects. They can also
491 // be destructors of the TSS objects set using API like pthread_setspecific.
492 // NOTE: We cannot call the atexit callbacks as part of the
493 // cleanup_thread_resources function as that function can be called from a
494 // different thread. The destructors of thread local and TSS objects should
495 // be called by the thread which owns them.
496 internal::call_atexit_callbacks(attrib);
498 uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
499 if (!attrib->detach_state.compare_exchange_strong(
500 joinable_state, uint32_t(DetachState::EXITING))) {
501 // Thread is detached so cleanup the resources.
502 cleanup_thread_resources(attrib);
504 // Set the CLEAR_TID address to nullptr to prevent the kernel
505 // from signalling at a non-existent futex location.
506 LIBC_NAMESPACE::syscall_impl<long>(SYS_set_tid_address, 0);
507 // Return value for detached thread should be unused. We need to avoid
508 // referencing `style` or `retval.*` because they may be stored on the stack
509 // and we have deallocated our stack!
510 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, 0);
511 __builtin_unreachable();
514 if (style == ThreadStyle::POSIX)
515 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, retval.posix_retval);
516 else
517 LIBC_NAMESPACE::syscall_impl<long>(SYS_exit, retval.stdc_retval);
518 __builtin_unreachable();
521 } // namespace LIBC_NAMESPACE_DECL