1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "src/__support/threads/thread.h"
10 #include "config/linux/app.h"
11 #include "src/__support/CPP/atomic.h"
12 #include "src/__support/CPP/string_view.h"
13 #include "src/__support/CPP/stringstream.h"
14 #include "src/__support/OSUtil/syscall.h" // For syscall functions.
15 #include "src/__support/common.h"
16 #include "src/__support/error_or.h"
17 #include "src/__support/threads/linux/futex_word.h" // For FutexWordType
18 #include "src/errno/libc_errno.h" // For error macros
20 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
25 #include <linux/futex.h>
26 #include <linux/param.h> // For EXEC_PAGESIZE.
27 #include <linux/prctl.h> // For PR_SET_NAME
28 #include <linux/sched.h> // For CLONE_* flags.
30 #include <sys/mman.h> // For PROT_* and MAP_* definitions.
31 #include <sys/syscall.h> // For syscall numbers.
33 namespace __llvm_libc
{
36 static constexpr long MMAP_SYSCALL_NUMBER
= SYS_mmap2
;
37 #elif defined(SYS_mmap)
38 static constexpr long MMAP_SYSCALL_NUMBER
= SYS_mmap
;
40 #error "mmap or mmap2 syscalls not available."
43 static constexpr size_t NAME_SIZE_MAX
= 16; // Includes the null terminator
44 static constexpr uint32_t CLEAR_TID_VALUE
= 0xABCD1234;
45 static constexpr unsigned CLONE_SYSCALL_FLAGS
=
46 CLONE_VM
// Share the memory space with the parent.
47 | CLONE_FS
// Share the file system with the parent.
48 | CLONE_FILES
// Share the files with the parent.
49 | CLONE_SIGHAND
// Share the signal handlers with the parent.
50 | CLONE_THREAD
// Same thread group as the parent.
51 | CLONE_SYSVSEM
// Share a single list of System V semaphore adjustment
53 | CLONE_PARENT_SETTID
// Set child thread ID in |ptid| of the parent.
54 | CLONE_CHILD_CLEARTID
// Let the kernel clear the tid address
55 // wake the joining thread.
56 | CLONE_SETTLS
; // Setup the thread pointer of the new thread.
58 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
59 #define CLONE_RESULT_REGISTER "x0"
60 #elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
61 #define CLONE_RESULT_REGISTER "t0"
62 #elif defined(LIBC_TARGET_ARCH_IS_X86_64)
63 #define CLONE_RESULT_REGISTER "rax"
65 #error "CLONE_RESULT_REGISTER not defined for your target architecture"
68 static constexpr ErrorOr
<size_t> add_no_overflow(size_t lhs
, size_t rhs
) {
69 if (lhs
> SIZE_MAX
- rhs
)
71 if (rhs
> SIZE_MAX
- lhs
)
76 static constexpr ErrorOr
<size_t> round_to_page(size_t v
) {
77 auto vp_or_err
= add_no_overflow(v
, EXEC_PAGESIZE
- 1);
81 return vp_or_err
.value() & -EXEC_PAGESIZE
;
84 LIBC_INLINE ErrorOr
<void *> alloc_stack(size_t stacksize
, size_t guardsize
) {
86 // Guard needs to be mapped with PROT_NONE
87 int prot
= guardsize
? PROT_NONE
: PROT_READ
| PROT_WRITE
;
88 auto size_or_err
= add_no_overflow(stacksize
, guardsize
);
90 return Error
{int(size_or_err
.error())};
91 size_t size
= size_or_err
.value();
93 // TODO: Maybe add MAP_STACK? Currently unimplemented on linux but helps
96 __llvm_libc::syscall_impl(MMAP_SYSCALL_NUMBER
,
97 0, // No special address
99 MAP_ANONYMOUS
| MAP_PRIVATE
, // Process private.
100 -1, // Not backed by any file
103 if (mmap_result
< 0 && (uintptr_t(mmap_result
) >= UINTPTR_MAX
- size
))
104 return Error
{int(-mmap_result
)};
107 // Give read/write permissions to actual stack.
108 // TODO: We are assuming stack growsdown here.
110 __llvm_libc::syscall_impl(SYS_mprotect
, mmap_result
+ guardsize
,
111 stacksize
, PROT_READ
| PROT_WRITE
);
114 return Error
{int(-result
)};
116 mmap_result
+= guardsize
;
117 return reinterpret_cast<void *>(mmap_result
);
120 // This must always be inlined as we may be freeing the calling threads stack in
121 // which case a normal return from the top the stack would cause an invalid
123 [[gnu::always_inline
]] LIBC_INLINE
void
124 free_stack(void *stack
, size_t stacksize
, size_t guardsize
) {
125 uintptr_t stackaddr
= reinterpret_cast<uintptr_t>(stack
);
126 stackaddr
-= guardsize
;
127 stack
= reinterpret_cast<void *>(stackaddr
);
128 __llvm_libc::syscall_impl(SYS_munmap
, stack
, stacksize
+ guardsize
);
133 // We align the start args to 16-byte boundary as we adjust the allocated
134 // stack memory with its size. We want the adjusted address to be at a
135 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements.
136 // If different architecture in future requires higher alignment, then we
137 // can add a platform specific alignment spec.
138 struct alignas(STACK_ALIGNMENT
) StartArgs
{
139 ThreadAttributes
*thread_attrib
;
144 // This must always be inlined as we may be freeing the calling threads stack in
145 // which case a normal return from the top the stack would cause an invalid
147 [[gnu::always_inline
]] LIBC_INLINE
void
148 cleanup_thread_resources(ThreadAttributes
*attrib
) {
149 // Cleanup the TLS before the stack as the TLS information is stored on
151 cleanup_tls(attrib
->tls
, attrib
->tls_size
);
152 if (attrib
->owned_stack
)
153 free_stack(attrib
->stack
, attrib
->stacksize
, attrib
->guardsize
);
156 [[gnu::always_inline
]] LIBC_INLINE
uintptr_t get_start_args_addr() {
157 // NOTE: For __builtin_frame_address to work reliably across compilers,
158 // architectures and various optimization levels, the TU including this file
159 // should be compiled with -fno-omit-frame-pointer.
160 #ifdef LIBC_TARGET_ARCH_IS_X86_64
161 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0))
162 // The x86_64 call instruction pushes resume address on to the stack.
163 // Next, The x86_64 SysV ABI requires that the frame pointer be pushed
164 // on to the stack. So, we have to step past two 64-bit values to get
165 // to the start args.
166 + sizeof(uintptr_t) * 2;
167 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
168 // The frame pointer after cloning the new thread in the Thread::run method
169 // is set to the stack pointer where start args are stored. So, we fetch
171 return reinterpret_cast<uintptr_t>(__builtin_frame_address(1));
172 #elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
173 // The current frame pointer is the previous stack pointer where the start
175 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
179 [[gnu::noinline
]] LIBC_INLINE
void start_thread() {
180 auto *start_args
= reinterpret_cast<StartArgs
*>(get_start_args_addr());
181 auto *attrib
= start_args
->thread_attrib
;
182 self
.attrib
= attrib
;
183 self
.attrib
->atexit_callback_mgr
= internal::get_thread_atexit_callback_mgr();
185 if (attrib
->style
== ThreadStyle::POSIX
) {
186 attrib
->retval
.posix_retval
=
187 start_args
->runner
.posix_runner(start_args
->arg
);
188 thread_exit(ThreadReturnValue(attrib
->retval
.posix_retval
),
191 attrib
->retval
.stdc_retval
=
192 start_args
->runner
.stdc_runner(start_args
->arg
);
193 thread_exit(ThreadReturnValue(attrib
->retval
.stdc_retval
),
198 int Thread::run(ThreadStyle style
, ThreadRunner runner
, void *arg
, void *stack
,
199 size_t stacksize
, size_t guardsize
, bool detached
) {
200 bool owned_stack
= false;
201 if (stack
== nullptr) {
202 // TODO: Should we return EINVAL here? Should we have a generic concept of a
203 // minimum stacksize (like 16384 for pthread).
205 stacksize
= DEFAULT_STACKSIZE
;
206 // Roundup stacksize/guardsize to page size.
207 // TODO: Should be also add sizeof(ThreadAttribute) and other internal
209 auto round_or_err
= round_to_page(guardsize
);
211 return round_or_err
.error();
212 guardsize
= round_or_err
.value();
214 round_or_err
= round_to_page(stacksize
);
216 return round_or_err
.error();
218 stacksize
= round_or_err
.value();
219 auto alloc
= alloc_stack(stacksize
, guardsize
);
221 return alloc
.error();
223 stack
= alloc
.value();
227 // Validate that stack/stacksize are validly aligned.
228 uintptr_t stackaddr
= reinterpret_cast<uintptr_t>(stack
);
229 if ((stackaddr
% STACK_ALIGNMENT
!= 0) ||
230 ((stackaddr
+ stacksize
) % STACK_ALIGNMENT
!= 0)) {
232 free_stack(stack
, stacksize
, guardsize
);
239 // When the new thread is spawned by the kernel, the new thread gets the
240 // stack we pass to the clone syscall. However, this stack is empty and does
241 // not have any local vars present in this function. Hence, one cannot
242 // pass arguments to the thread start function, or use any local vars from
243 // here. So, we pack them into the new stack from where the thread can sniff
246 // Likewise, the actual thread state information is also stored on the
249 static constexpr size_t INTERNAL_STACK_DATA_SIZE
=
250 sizeof(StartArgs
) + sizeof(ThreadAttributes
) +
251 sizeof(cpp::Atomic
<FutexWordType
>);
253 // This is pretty arbitrary, but at the moment we don't adjust user provided
254 // stacksize (or default) to account for this data as its assumed minimal. If
255 // this assert starts failing we probably should. Likewise if we can't bound
256 // this we may overflow when we subtract it from the top of the stack.
257 static_assert(INTERNAL_STACK_DATA_SIZE
< EXEC_PAGESIZE
);
259 // TODO: We are assuming stack growsdown here.
260 auto adjusted_stack_or_err
=
261 add_no_overflow(reinterpret_cast<uintptr_t>(stack
), stacksize
);
262 if (!adjusted_stack_or_err
) {
263 cleanup_tls(tls
.addr
, tls
.size
);
265 free_stack(stack
, stacksize
, guardsize
);
266 return adjusted_stack_or_err
.error();
269 uintptr_t adjusted_stack
=
270 adjusted_stack_or_err
.value() - INTERNAL_STACK_DATA_SIZE
;
271 adjusted_stack
&= ~(uintptr_t(STACK_ALIGNMENT
) - 1);
273 auto *start_args
= reinterpret_cast<StartArgs
*>(adjusted_stack
);
276 reinterpret_cast<ThreadAttributes
*>(adjusted_stack
+ sizeof(StartArgs
));
277 attrib
->style
= style
;
278 attrib
->detach_state
=
279 uint32_t(detached
? DetachState::DETACHED
: DetachState::JOINABLE
);
280 attrib
->stack
= stack
;
281 attrib
->stacksize
= stacksize
;
282 attrib
->guardsize
= guardsize
;
283 attrib
->owned_stack
= owned_stack
;
284 attrib
->tls
= tls
.addr
;
285 attrib
->tls_size
= tls
.size
;
287 start_args
->thread_attrib
= attrib
;
288 start_args
->runner
= runner
;
289 start_args
->arg
= arg
;
291 auto clear_tid
= reinterpret_cast<cpp::Atomic
<FutexWordType
> *>(
292 adjusted_stack
+ sizeof(StartArgs
) + sizeof(ThreadAttributes
));
293 clear_tid
->val
= CLEAR_TID_VALUE
;
294 attrib
->platform_data
= clear_tid
;
296 // The clone syscall takes arguments in an architecture specific order.
297 // Also, we want the result of the syscall to be in a register as the child
298 // thread gets a completely different stack after it is created. The stack
299 // variables from this function will not be availalbe to the child thread.
300 #if defined(LIBC_TARGET_ARCH_IS_X86_64)
301 long register clone_result
asm(CLONE_RESULT_REGISTER
);
302 clone_result
= __llvm_libc::syscall_impl(
303 SYS_clone
, CLONE_SYSCALL_FLAGS
, adjusted_stack
,
304 &attrib
->tid
, // The address where the child tid is written
305 &clear_tid
->val
, // The futex where the child thread status is signalled
306 tls
.tp
// The thread pointer value for the new thread.
308 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) || \
309 defined(LIBC_TARGET_ARCH_IS_RISCV64)
310 long register clone_result
asm(CLONE_RESULT_REGISTER
);
311 clone_result
= __llvm_libc::syscall_impl(
312 SYS_clone
, CLONE_SYSCALL_FLAGS
, adjusted_stack
,
313 &attrib
->tid
, // The address where the child tid is written
314 tls
.tp
, // The thread pointer value for the new thread.
315 &clear_tid
->val
// The futex where the child thread status is signalled
318 #error "Unsupported architecture for the clone syscall."
321 if (clone_result
== 0) {
322 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
323 // We set the frame pointer to be the same as the "sp" so that start args
324 // can be sniffed out from start_thread.
326 // GCC does not currently implement __arm_wsr64/__arm_rsr64.
327 __arm_wsr64("x29", __arm_rsr64("sp"));
329 asm volatile("mov x29, sp");
331 #elif defined(LIBC_TARGET_ARCH_IS_RISCV64)
332 asm volatile("mv fp, sp");
335 } else if (clone_result
< 0) {
336 cleanup_thread_resources(attrib
);
337 return -clone_result
;
343 int Thread::join(ThreadReturnValue
&retval
) {
346 if (attrib
->style
== ThreadStyle::POSIX
)
347 retval
.posix_retval
= attrib
->retval
.posix_retval
;
349 retval
.stdc_retval
= attrib
->retval
.stdc_retval
;
351 cleanup_thread_resources(attrib
);
356 int Thread::detach() {
357 uint32_t joinable_state
= uint32_t(DetachState::JOINABLE
);
358 if (attrib
->detach_state
.compare_exchange_strong(
359 joinable_state
, uint32_t(DetachState::DETACHED
))) {
360 return int(DetachType::SIMPLE
);
363 // If the thread was already detached, then the detach method should not
364 // be called at all. If the thread is exiting, then we wait for it to exit
365 // and free up resources.
368 cleanup_thread_resources(attrib
);
370 return int(DetachType::CLEANUP
);
373 void Thread::wait() {
374 // The kernel should set the value at the clear tid address to zero.
375 // If not, it is a spurious wake and we should continue to wait on
378 reinterpret_cast<cpp::Atomic
<FutexWordType
> *>(attrib
->platform_data
);
379 while (clear_tid
->load() != 0) {
380 // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a
381 // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE.
382 __llvm_libc::syscall_impl(SYS_futex
, &clear_tid
->val
, FUTEX_WAIT
,
383 CLEAR_TID_VALUE
, nullptr);
387 bool Thread::operator==(const Thread
&thread
) const {
388 return attrib
->tid
== thread
.attrib
->tid
;
391 static constexpr cpp::string_view
THREAD_NAME_PATH_PREFIX("/proc/self/task/");
392 static constexpr size_t THREAD_NAME_PATH_SIZE
=
393 THREAD_NAME_PATH_PREFIX
.size() +
394 IntegerToString::dec_bufsize
<int>() + // Size of tid
395 1 + // For '/' character
396 5; // For the file name "comm" and the nullterminator.
398 static void construct_thread_name_file_path(cpp::StringStream
&stream
,
400 stream
<< THREAD_NAME_PATH_PREFIX
<< tid
<< '/' << cpp::string_view("comm")
401 << cpp::StringStream::ENDS
;
404 int Thread::set_name(const cpp::string_view
&name
) {
405 if (name
.size() >= NAME_SIZE_MAX
)
409 // If we are setting the name of the current thread, then we can
410 // use the syscall to set the name.
411 int retval
= __llvm_libc::syscall_impl(SYS_prctl
, PR_SET_NAME
, name
.data());
418 char path_name_buffer
[THREAD_NAME_PATH_SIZE
];
419 cpp::StringStream
path_stream(path_name_buffer
);
420 construct_thread_name_file_path(path_stream
, attrib
->tid
);
422 int fd
= __llvm_libc::syscall_impl(SYS_open
, path_name_buffer
, O_RDWR
);
425 __llvm_libc::syscall_impl(SYS_openat
, AT_FDCWD
, path_name_buffer
, O_RDWR
);
431 __llvm_libc::syscall_impl(SYS_write
, fd
, name
.data(), name
.size());
432 __llvm_libc::syscall_impl(SYS_close
, fd
);
436 else if (retval
!= int(name
.size()))
442 int Thread::get_name(cpp::StringStream
&name
) const {
443 if (name
.bufsize() < NAME_SIZE_MAX
)
446 char name_buffer
[NAME_SIZE_MAX
];
449 // If we are getting the name of the current thread, then we can
450 // use the syscall to get the name.
451 int retval
= __llvm_libc::syscall_impl(SYS_prctl
, PR_GET_NAME
, name_buffer
);
454 name
<< name_buffer
<< cpp::StringStream::ENDS
;
458 char path_name_buffer
[THREAD_NAME_PATH_SIZE
];
459 cpp::StringStream
path_stream(path_name_buffer
);
460 construct_thread_name_file_path(path_stream
, attrib
->tid
);
462 int fd
= __llvm_libc::syscall_impl(SYS_open
, path_name_buffer
, O_RDONLY
);
464 int fd
= __llvm_libc::syscall_impl(SYS_openat
, AT_FDCWD
, path_name_buffer
,
471 __llvm_libc::syscall_impl(SYS_read
, fd
, name_buffer
, NAME_SIZE_MAX
);
472 __llvm_libc::syscall_impl(SYS_close
, fd
);
475 if (retval
== NAME_SIZE_MAX
)
477 if (name_buffer
[retval
- 1] == '\n')
478 name_buffer
[retval
- 1] = '\0';
480 name_buffer
[retval
] = '\0';
481 name
<< name_buffer
<< cpp::StringStream::ENDS
;
485 void thread_exit(ThreadReturnValue retval
, ThreadStyle style
) {
486 auto attrib
= self
.attrib
;
488 // The very first thing we do is to call the thread's atexit callbacks.
489 // These callbacks could be the ones registered by the language runtimes,
490 // for example, the destructors of thread local objects. They can also
491 // be destructors of the TSS objects set using API like pthread_setspecific.
492 // NOTE: We cannot call the atexit callbacks as part of the
493 // cleanup_thread_resources function as that function can be called from a
494 // different thread. The destructors of thread local and TSS objects should
495 // be called by the thread which owns them.
496 internal::call_atexit_callbacks(attrib
);
498 uint32_t joinable_state
= uint32_t(DetachState::JOINABLE
);
499 if (!attrib
->detach_state
.compare_exchange_strong(
500 joinable_state
, uint32_t(DetachState::EXITING
))) {
501 // Thread is detached so cleanup the resources.
502 cleanup_thread_resources(attrib
);
504 // Set the CLEAR_TID address to nullptr to prevent the kernel
505 // from signalling at a non-existent futex location.
506 __llvm_libc::syscall_impl(SYS_set_tid_address
, 0);
507 // Return value for detached thread should be unused. We need to avoid
508 // referencing `style` or `retval.*` because they may be stored on the stack
509 // and we have deallocated our stack!
510 __llvm_libc::syscall_impl(SYS_exit
, 0);
511 __builtin_unreachable();
514 if (style
== ThreadStyle::POSIX
)
515 __llvm_libc::syscall_impl(SYS_exit
, retval
.posix_retval
);
517 __llvm_libc::syscall_impl(SYS_exit
, retval
.stdc_retval
);
518 __builtin_unreachable();
521 } // namespace __llvm_libc