[libc][NFC] Move aligned access implementations to separate header
[llvm-project.git] / libc / src / __support / threads / linux / thread.cpp
blob994057aa949ab52b961a9f55c32ecb2cde0abc84
1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "src/__support/threads/thread.h"
10 #include "config/linux/app.h"
11 #include "src/__support/CPP/atomic.h"
12 #include "src/__support/CPP/string_view.h"
13 #include "src/__support/CPP/stringstream.h"
14 #include "src/__support/OSUtil/syscall.h" // For syscall functions.
15 #include "src/__support/common.h"
16 #include "src/__support/error_or.h"
17 #include "src/__support/threads/linux/futex_word.h" // For FutexWordType
18 #include "src/errno/libc_errno.h" // For error macros
20 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
21 #include <arm_acle.h>
22 #endif
24 #include <fcntl.h>
25 #include <linux/futex.h>
26 #include <linux/param.h> // For EXEC_PAGESIZE.
27 #include <linux/prctl.h> // For PR_SET_NAME
28 #include <linux/sched.h> // For CLONE_* flags.
29 #include <stdint.h>
30 #include <sys/mman.h> // For PROT_* and MAP_* definitions.
31 #include <sys/syscall.h> // For syscall numbers.
33 namespace __llvm_libc {
35 #ifdef SYS_mmap2
36 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
37 #elif defined(SYS_mmap)
38 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
39 #else
40 #error "mmap or mmap2 syscalls not available."
41 #endif
43 static constexpr size_t NAME_SIZE_MAX = 16; // Includes the null terminator
44 static constexpr uint32_t CLEAR_TID_VALUE = 0xABCD1234;
45 static constexpr unsigned CLONE_SYSCALL_FLAGS =
46 CLONE_VM // Share the memory space with the parent.
47 | CLONE_FS // Share the file system with the parent.
48 | CLONE_FILES // Share the files with the parent.
49 | CLONE_SIGHAND // Share the signal handlers with the parent.
50 | CLONE_THREAD // Same thread group as the parent.
51 | CLONE_SYSVSEM // Share a single list of System V semaphore adjustment
52 // values
53 | CLONE_PARENT_SETTID // Set child thread ID in |ptid| of the parent.
54 | CLONE_CHILD_CLEARTID // Let the kernel clear the tid address
55 // wake the joining thread.
56 | CLONE_SETTLS; // Setup the thread pointer of the new thread.
58 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
59 #define CLONE_RESULT_REGISTER "x0"
60 #elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
61 #define CLONE_RESULT_REGISTER "t0"
62 #elif defined(LIBC_TARGET_ARCH_IS_X86_64)
63 #define CLONE_RESULT_REGISTER "rax"
64 #else
65 #error "CLONE_RESULT_REGISTER not defined for your target architecture"
66 #endif
68 static constexpr ErrorOr<size_t> add_no_overflow(size_t lhs, size_t rhs) {
69 if (lhs > SIZE_MAX - rhs)
70 return Error{EINVAL};
71 if (rhs > SIZE_MAX - lhs)
72 return Error{EINVAL};
73 return lhs + rhs;
76 static constexpr ErrorOr<size_t> round_to_page(size_t v) {
77 auto vp_or_err = add_no_overflow(v, EXEC_PAGESIZE - 1);
78 if (!vp_or_err)
79 return vp_or_err;
81 return vp_or_err.value() & -EXEC_PAGESIZE;
84 LIBC_INLINE ErrorOr<void *> alloc_stack(size_t stacksize, size_t guardsize) {
86 // Guard needs to be mapped with PROT_NONE
87 int prot = guardsize ? PROT_NONE : PROT_READ | PROT_WRITE;
88 auto size_or_err = add_no_overflow(stacksize, guardsize);
89 if (!size_or_err)
90 return Error{int(size_or_err.error())};
91 size_t size = size_or_err.value();
93 // TODO: Maybe add MAP_STACK? Currently unimplemented on linux but helps
94 // future-proof.
95 long mmap_result =
96 __llvm_libc::syscall_impl(MMAP_SYSCALL_NUMBER,
97 0, // No special address
98 size, prot,
99 MAP_ANONYMOUS | MAP_PRIVATE, // Process private.
100 -1, // Not backed by any file
101 0 // No offset
103 if (mmap_result < 0 && (uintptr_t(mmap_result) >= UINTPTR_MAX - size))
104 return Error{int(-mmap_result)};
106 if (guardsize) {
107 // Give read/write permissions to actual stack.
108 // TODO: We are assuming stack growsdown here.
109 long result =
110 __llvm_libc::syscall_impl(SYS_mprotect, mmap_result + guardsize,
111 stacksize, PROT_READ | PROT_WRITE);
113 if (result != 0)
114 return Error{int(-result)};
116 mmap_result += guardsize;
117 return reinterpret_cast<void *>(mmap_result);
120 // This must always be inlined as we may be freeing the calling threads stack in
121 // which case a normal return from the top the stack would cause an invalid
122 // memory read.
123 [[gnu::always_inline]] LIBC_INLINE void
124 free_stack(void *stack, size_t stacksize, size_t guardsize) {
125 uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
126 stackaddr -= guardsize;
127 stack = reinterpret_cast<void *>(stackaddr);
128 __llvm_libc::syscall_impl(SYS_munmap, stack, stacksize + guardsize);
131 struct Thread;
133 // We align the start args to 16-byte boundary as we adjust the allocated
134 // stack memory with its size. We want the adjusted address to be at a
135 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements.
136 // If different architecture in future requires higher alignment, then we
137 // can add a platform specific alignment spec.
138 struct alignas(STACK_ALIGNMENT) StartArgs {
139 ThreadAttributes *thread_attrib;
140 ThreadRunner runner;
141 void *arg;
144 // This must always be inlined as we may be freeing the calling threads stack in
145 // which case a normal return from the top the stack would cause an invalid
146 // memory read.
147 [[gnu::always_inline]] LIBC_INLINE void
148 cleanup_thread_resources(ThreadAttributes *attrib) {
149 // Cleanup the TLS before the stack as the TLS information is stored on
150 // the stack.
151 cleanup_tls(attrib->tls, attrib->tls_size);
152 if (attrib->owned_stack)
153 free_stack(attrib->stack, attrib->stacksize, attrib->guardsize);
156 [[gnu::always_inline]] LIBC_INLINE uintptr_t get_start_args_addr() {
157 // NOTE: For __builtin_frame_address to work reliably across compilers,
158 // architectures and various optimization levels, the TU including this file
159 // should be compiled with -fno-omit-frame-pointer.
160 #ifdef LIBC_TARGET_ARCH_IS_X86_64
161 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0))
162 // The x86_64 call instruction pushes resume address on to the stack.
163 // Next, The x86_64 SysV ABI requires that the frame pointer be pushed
164 // on to the stack. So, we have to step past two 64-bit values to get
165 // to the start args.
166 + sizeof(uintptr_t) * 2;
167 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
168 // The frame pointer after cloning the new thread in the Thread::run method
169 // is set to the stack pointer where start args are stored. So, we fetch
170 // from there.
171 return reinterpret_cast<uintptr_t>(__builtin_frame_address(1));
172 #elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
173 // The current frame pointer is the previous stack pointer where the start
174 // args are stored.
175 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
176 #endif
179 [[gnu::noinline]] LIBC_INLINE void start_thread() {
180 auto *start_args = reinterpret_cast<StartArgs *>(get_start_args_addr());
181 auto *attrib = start_args->thread_attrib;
182 self.attrib = attrib;
183 self.attrib->atexit_callback_mgr = internal::get_thread_atexit_callback_mgr();
185 if (attrib->style == ThreadStyle::POSIX) {
186 attrib->retval.posix_retval =
187 start_args->runner.posix_runner(start_args->arg);
188 thread_exit(ThreadReturnValue(attrib->retval.posix_retval),
189 ThreadStyle::POSIX);
190 } else {
191 attrib->retval.stdc_retval =
192 start_args->runner.stdc_runner(start_args->arg);
193 thread_exit(ThreadReturnValue(attrib->retval.stdc_retval),
194 ThreadStyle::STDC);
198 int Thread::run(ThreadStyle style, ThreadRunner runner, void *arg, void *stack,
199 size_t stacksize, size_t guardsize, bool detached) {
200 bool owned_stack = false;
201 if (stack == nullptr) {
202 // TODO: Should we return EINVAL here? Should we have a generic concept of a
203 // minimum stacksize (like 16384 for pthread).
204 if (stacksize == 0)
205 stacksize = DEFAULT_STACKSIZE;
206 // Roundup stacksize/guardsize to page size.
207 // TODO: Should be also add sizeof(ThreadAttribute) and other internal
208 // meta data?
209 auto round_or_err = round_to_page(guardsize);
210 if (!round_or_err)
211 return round_or_err.error();
212 guardsize = round_or_err.value();
214 round_or_err = round_to_page(stacksize);
215 if (!round_or_err)
216 return round_or_err.error();
218 stacksize = round_or_err.value();
219 auto alloc = alloc_stack(stacksize, guardsize);
220 if (!alloc)
221 return alloc.error();
222 else
223 stack = alloc.value();
224 owned_stack = true;
227 // Validate that stack/stacksize are validly aligned.
228 uintptr_t stackaddr = reinterpret_cast<uintptr_t>(stack);
229 if ((stackaddr % STACK_ALIGNMENT != 0) ||
230 ((stackaddr + stacksize) % STACK_ALIGNMENT != 0)) {
231 if (owned_stack)
232 free_stack(stack, stacksize, guardsize);
233 return EINVAL;
236 TLSDescriptor tls;
237 init_tls(tls);
239 // When the new thread is spawned by the kernel, the new thread gets the
240 // stack we pass to the clone syscall. However, this stack is empty and does
241 // not have any local vars present in this function. Hence, one cannot
242 // pass arguments to the thread start function, or use any local vars from
243 // here. So, we pack them into the new stack from where the thread can sniff
244 // them out.
246 // Likewise, the actual thread state information is also stored on the
247 // stack memory.
249 static constexpr size_t INTERNAL_STACK_DATA_SIZE =
250 sizeof(StartArgs) + sizeof(ThreadAttributes) +
251 sizeof(cpp::Atomic<FutexWordType>);
253 // This is pretty arbitrary, but at the moment we don't adjust user provided
254 // stacksize (or default) to account for this data as its assumed minimal. If
255 // this assert starts failing we probably should. Likewise if we can't bound
256 // this we may overflow when we subtract it from the top of the stack.
257 static_assert(INTERNAL_STACK_DATA_SIZE < EXEC_PAGESIZE);
259 // TODO: We are assuming stack growsdown here.
260 auto adjusted_stack_or_err =
261 add_no_overflow(reinterpret_cast<uintptr_t>(stack), stacksize);
262 if (!adjusted_stack_or_err) {
263 cleanup_tls(tls.addr, tls.size);
264 if (owned_stack)
265 free_stack(stack, stacksize, guardsize);
266 return adjusted_stack_or_err.error();
269 uintptr_t adjusted_stack =
270 adjusted_stack_or_err.value() - INTERNAL_STACK_DATA_SIZE;
271 adjusted_stack &= ~(uintptr_t(STACK_ALIGNMENT) - 1);
273 auto *start_args = reinterpret_cast<StartArgs *>(adjusted_stack);
275 attrib =
276 reinterpret_cast<ThreadAttributes *>(adjusted_stack + sizeof(StartArgs));
277 attrib->style = style;
278 attrib->detach_state =
279 uint32_t(detached ? DetachState::DETACHED : DetachState::JOINABLE);
280 attrib->stack = stack;
281 attrib->stacksize = stacksize;
282 attrib->guardsize = guardsize;
283 attrib->owned_stack = owned_stack;
284 attrib->tls = tls.addr;
285 attrib->tls_size = tls.size;
287 start_args->thread_attrib = attrib;
288 start_args->runner = runner;
289 start_args->arg = arg;
291 auto clear_tid = reinterpret_cast<cpp::Atomic<FutexWordType> *>(
292 adjusted_stack + sizeof(StartArgs) + sizeof(ThreadAttributes));
293 clear_tid->val = CLEAR_TID_VALUE;
294 attrib->platform_data = clear_tid;
296 // The clone syscall takes arguments in an architecture specific order.
297 // Also, we want the result of the syscall to be in a register as the child
298 // thread gets a completely different stack after it is created. The stack
299 // variables from this function will not be availalbe to the child thread.
300 #if defined(LIBC_TARGET_ARCH_IS_X86_64)
301 long register clone_result asm(CLONE_RESULT_REGISTER);
302 clone_result = __llvm_libc::syscall_impl(
303 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
304 &attrib->tid, // The address where the child tid is written
305 &clear_tid->val, // The futex where the child thread status is signalled
306 tls.tp // The thread pointer value for the new thread.
308 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) || \
309 defined(LIBC_TARGET_ARCH_IS_RISCV64)
310 long register clone_result asm(CLONE_RESULT_REGISTER);
311 clone_result = __llvm_libc::syscall_impl(
312 SYS_clone, CLONE_SYSCALL_FLAGS, adjusted_stack,
313 &attrib->tid, // The address where the child tid is written
314 tls.tp, // The thread pointer value for the new thread.
315 &clear_tid->val // The futex where the child thread status is signalled
317 #else
318 #error "Unsupported architecture for the clone syscall."
319 #endif
321 if (clone_result == 0) {
322 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
323 // We set the frame pointer to be the same as the "sp" so that start args
324 // can be sniffed out from start_thread.
325 #ifdef __clang__
326 // GCC does not currently implement __arm_wsr64/__arm_rsr64.
327 __arm_wsr64("x29", __arm_rsr64("sp"));
328 #else
329 asm volatile("mov x29, sp");
330 #endif
331 #elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
332 asm volatile("mv fp, sp");
333 #endif
334 start_thread();
335 } else if (clone_result < 0) {
336 cleanup_thread_resources(attrib);
337 return -clone_result;
340 return 0;
343 int Thread::join(ThreadReturnValue &retval) {
344 wait();
346 if (attrib->style == ThreadStyle::POSIX)
347 retval.posix_retval = attrib->retval.posix_retval;
348 else
349 retval.stdc_retval = attrib->retval.stdc_retval;
351 cleanup_thread_resources(attrib);
353 return 0;
356 int Thread::detach() {
357 uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
358 if (attrib->detach_state.compare_exchange_strong(
359 joinable_state, uint32_t(DetachState::DETACHED))) {
360 return int(DetachType::SIMPLE);
363 // If the thread was already detached, then the detach method should not
364 // be called at all. If the thread is exiting, then we wait for it to exit
365 // and free up resources.
366 wait();
368 cleanup_thread_resources(attrib);
370 return int(DetachType::CLEANUP);
373 void Thread::wait() {
374 // The kernel should set the value at the clear tid address to zero.
375 // If not, it is a spurious wake and we should continue to wait on
376 // the futex.
377 auto *clear_tid =
378 reinterpret_cast<cpp::Atomic<FutexWordType> *>(attrib->platform_data);
379 while (clear_tid->load() != 0) {
380 // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a
381 // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE.
382 __llvm_libc::syscall_impl(SYS_futex, &clear_tid->val, FUTEX_WAIT,
383 CLEAR_TID_VALUE, nullptr);
387 bool Thread::operator==(const Thread &thread) const {
388 return attrib->tid == thread.attrib->tid;
391 static constexpr cpp::string_view THREAD_NAME_PATH_PREFIX("/proc/self/task/");
392 static constexpr size_t THREAD_NAME_PATH_SIZE =
393 THREAD_NAME_PATH_PREFIX.size() +
394 IntegerToString::dec_bufsize<int>() + // Size of tid
395 1 + // For '/' character
396 5; // For the file name "comm" and the nullterminator.
398 static void construct_thread_name_file_path(cpp::StringStream &stream,
399 int tid) {
400 stream << THREAD_NAME_PATH_PREFIX << tid << '/' << cpp::string_view("comm")
401 << cpp::StringStream::ENDS;
404 int Thread::set_name(const cpp::string_view &name) {
405 if (name.size() >= NAME_SIZE_MAX)
406 return ERANGE;
408 if (*this == self) {
409 // If we are setting the name of the current thread, then we can
410 // use the syscall to set the name.
411 int retval = __llvm_libc::syscall_impl(SYS_prctl, PR_SET_NAME, name.data());
412 if (retval < 0)
413 return -retval;
414 else
415 return 0;
418 char path_name_buffer[THREAD_NAME_PATH_SIZE];
419 cpp::StringStream path_stream(path_name_buffer);
420 construct_thread_name_file_path(path_stream, attrib->tid);
421 #ifdef SYS_open
422 int fd = __llvm_libc::syscall_impl(SYS_open, path_name_buffer, O_RDWR);
423 #else
424 int fd =
425 __llvm_libc::syscall_impl(SYS_openat, AT_FDCWD, path_name_buffer, O_RDWR);
426 #endif
427 if (fd < 0)
428 return -fd;
430 int retval =
431 __llvm_libc::syscall_impl(SYS_write, fd, name.data(), name.size());
432 __llvm_libc::syscall_impl(SYS_close, fd);
434 if (retval < 0)
435 return -retval;
436 else if (retval != int(name.size()))
437 return EIO;
438 else
439 return 0;
442 int Thread::get_name(cpp::StringStream &name) const {
443 if (name.bufsize() < NAME_SIZE_MAX)
444 return ERANGE;
446 char name_buffer[NAME_SIZE_MAX];
448 if (*this == self) {
449 // If we are getting the name of the current thread, then we can
450 // use the syscall to get the name.
451 int retval = __llvm_libc::syscall_impl(SYS_prctl, PR_GET_NAME, name_buffer);
452 if (retval < 0)
453 return -retval;
454 name << name_buffer << cpp::StringStream::ENDS;
455 return 0;
458 char path_name_buffer[THREAD_NAME_PATH_SIZE];
459 cpp::StringStream path_stream(path_name_buffer);
460 construct_thread_name_file_path(path_stream, attrib->tid);
461 #ifdef SYS_open
462 int fd = __llvm_libc::syscall_impl(SYS_open, path_name_buffer, O_RDONLY);
463 #else
464 int fd = __llvm_libc::syscall_impl(SYS_openat, AT_FDCWD, path_name_buffer,
465 O_RDONLY);
466 #endif
467 if (fd < 0)
468 return -fd;
470 int retval =
471 __llvm_libc::syscall_impl(SYS_read, fd, name_buffer, NAME_SIZE_MAX);
472 __llvm_libc::syscall_impl(SYS_close, fd);
473 if (retval < 0)
474 return -retval;
475 if (retval == NAME_SIZE_MAX)
476 return ERANGE;
477 if (name_buffer[retval - 1] == '\n')
478 name_buffer[retval - 1] = '\0';
479 else
480 name_buffer[retval] = '\0';
481 name << name_buffer << cpp::StringStream::ENDS;
482 return 0;
485 void thread_exit(ThreadReturnValue retval, ThreadStyle style) {
486 auto attrib = self.attrib;
488 // The very first thing we do is to call the thread's atexit callbacks.
489 // These callbacks could be the ones registered by the language runtimes,
490 // for example, the destructors of thread local objects. They can also
491 // be destructors of the TSS objects set using API like pthread_setspecific.
492 // NOTE: We cannot call the atexit callbacks as part of the
493 // cleanup_thread_resources function as that function can be called from a
494 // different thread. The destructors of thread local and TSS objects should
495 // be called by the thread which owns them.
496 internal::call_atexit_callbacks(attrib);
498 uint32_t joinable_state = uint32_t(DetachState::JOINABLE);
499 if (!attrib->detach_state.compare_exchange_strong(
500 joinable_state, uint32_t(DetachState::EXITING))) {
501 // Thread is detached so cleanup the resources.
502 cleanup_thread_resources(attrib);
504 // Set the CLEAR_TID address to nullptr to prevent the kernel
505 // from signalling at a non-existent futex location.
506 __llvm_libc::syscall_impl(SYS_set_tid_address, 0);
507 // Return value for detached thread should be unused. We need to avoid
508 // referencing `style` or `retval.*` because they may be stored on the stack
509 // and we have deallocated our stack!
510 __llvm_libc::syscall_impl(SYS_exit, 0);
511 __builtin_unreachable();
514 if (style == ThreadStyle::POSIX)
515 __llvm_libc::syscall_impl(SYS_exit, retval.posix_retval);
516 else
517 __llvm_libc::syscall_impl(SYS_exit, retval.stdc_retval);
518 __builtin_unreachable();
521 } // namespace __llvm_libc