1 //===--- Implementation of a Linux thread class -----------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "src/__support/threads/thread.h"
10 #include "config/app.h"
11 #include "src/__support/CPP/atomic.h"
12 #include "src/__support/CPP/string_view.h"
13 #include "src/__support/CPP/stringstream.h"
14 #include "src/__support/OSUtil/syscall.h" // For syscall functions.
15 #include "src/__support/common.h"
16 #include "src/__support/error_or.h"
17 #include "src/__support/macros/config.h"
18 #include "src/__support/threads/linux/futex_utils.h" // For FutexWordType
19 #include "src/errno/libc_errno.h" // For error macros
21 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
25 #include "hdr/fcntl_macros.h"
26 #include <linux/param.h> // For EXEC_PAGESIZE.
27 #include <linux/prctl.h> // For PR_SET_NAME
28 #include <linux/sched.h> // For CLONE_* flags.
30 #include <sys/mman.h> // For PROT_* and MAP_* definitions.
31 #include <sys/syscall.h> // For syscall numbers.
33 namespace LIBC_NAMESPACE_DECL
{
36 static constexpr long MMAP_SYSCALL_NUMBER
= SYS_mmap2
;
37 #elif defined(SYS_mmap)
38 static constexpr long MMAP_SYSCALL_NUMBER
= SYS_mmap
;
40 #error "mmap or mmap2 syscalls not available."
43 static constexpr size_t NAME_SIZE_MAX
= 16; // Includes the null terminator
44 static constexpr uint32_t CLEAR_TID_VALUE
= 0xABCD1234;
45 static constexpr unsigned CLONE_SYSCALL_FLAGS
=
46 CLONE_VM
// Share the memory space with the parent.
47 | CLONE_FS
// Share the file system with the parent.
48 | CLONE_FILES
// Share the files with the parent.
49 | CLONE_SIGHAND
// Share the signal handlers with the parent.
50 | CLONE_THREAD
// Same thread group as the parent.
51 | CLONE_SYSVSEM
// Share a single list of System V semaphore adjustment
53 | CLONE_PARENT_SETTID
// Set child thread ID in |ptid| of the parent.
54 | CLONE_CHILD_CLEARTID
// Let the kernel clear the tid address
55 // wake the joining thread.
56 | CLONE_SETTLS
; // Setup the thread pointer of the new thread.
58 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
59 #define CLONE_RESULT_REGISTER "x0"
60 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
61 #define CLONE_RESULT_REGISTER "t0"
62 #elif defined(LIBC_TARGET_ARCH_IS_X86_64)
63 #define CLONE_RESULT_REGISTER "rax"
65 #error "CLONE_RESULT_REGISTER not defined for your target architecture"
68 static constexpr ErrorOr
<size_t> add_no_overflow(size_t lhs
, size_t rhs
) {
69 if (lhs
> SIZE_MAX
- rhs
)
71 if (rhs
> SIZE_MAX
- lhs
)
76 static constexpr ErrorOr
<size_t> round_to_page(size_t v
) {
77 auto vp_or_err
= add_no_overflow(v
, EXEC_PAGESIZE
- 1);
81 return vp_or_err
.value() & -EXEC_PAGESIZE
;
84 LIBC_INLINE ErrorOr
<void *> alloc_stack(size_t stacksize
, size_t guardsize
) {
86 // Guard needs to be mapped with PROT_NONE
87 int prot
= guardsize
? PROT_NONE
: PROT_READ
| PROT_WRITE
;
88 auto size_or_err
= add_no_overflow(stacksize
, guardsize
);
90 return Error
{int(size_or_err
.error())};
91 size_t size
= size_or_err
.value();
93 // TODO: Maybe add MAP_STACK? Currently unimplemented on linux but helps
95 long mmap_result
= LIBC_NAMESPACE::syscall_impl
<long>(
97 0, // No special address
99 MAP_ANONYMOUS
| MAP_PRIVATE
, // Process private.
100 -1, // Not backed by any file
103 if (mmap_result
< 0 && (uintptr_t(mmap_result
) >= UINTPTR_MAX
- size
))
104 return Error
{int(-mmap_result
)};
107 // Give read/write permissions to actual stack.
108 // TODO: We are assuming stack growsdown here.
109 long result
= LIBC_NAMESPACE::syscall_impl
<long>(
110 SYS_mprotect
, mmap_result
+ guardsize
, stacksize
,
111 PROT_READ
| PROT_WRITE
);
114 return Error
{int(-result
)};
116 mmap_result
+= guardsize
;
117 return reinterpret_cast<void *>(mmap_result
);
120 // This must always be inlined as we may be freeing the calling threads stack in
121 // which case a normal return from the top the stack would cause an invalid
123 [[gnu::always_inline
]] LIBC_INLINE
void
124 free_stack(void *stack
, size_t stacksize
, size_t guardsize
) {
125 uintptr_t stackaddr
= reinterpret_cast<uintptr_t>(stack
);
126 stackaddr
-= guardsize
;
127 stack
= reinterpret_cast<void *>(stackaddr
);
128 LIBC_NAMESPACE::syscall_impl
<long>(SYS_munmap
, stack
, stacksize
+ guardsize
);
133 // We align the start args to 16-byte boundary as we adjust the allocated
134 // stack memory with its size. We want the adjusted address to be at a
135 // 16-byte boundary to satisfy the x86_64 and aarch64 ABI requirements.
136 // If different architecture in future requires higher alignment, then we
137 // can add a platform specific alignment spec.
138 struct alignas(STACK_ALIGNMENT
) StartArgs
{
139 ThreadAttributes
*thread_attrib
;
144 // This must always be inlined as we may be freeing the calling threads stack in
145 // which case a normal return from the top the stack would cause an invalid
147 [[gnu::always_inline
]] LIBC_INLINE
void
148 cleanup_thread_resources(ThreadAttributes
*attrib
) {
149 // Cleanup the TLS before the stack as the TLS information is stored on
151 cleanup_tls(attrib
->tls
, attrib
->tls_size
);
152 if (attrib
->owned_stack
)
153 free_stack(attrib
->stack
, attrib
->stacksize
, attrib
->guardsize
);
156 [[gnu::always_inline
]] LIBC_INLINE
uintptr_t get_start_args_addr() {
157 // NOTE: For __builtin_frame_address to work reliably across compilers,
158 // architectures and various optimization levels, the TU including this file
159 // should be compiled with -fno-omit-frame-pointer.
160 #ifdef LIBC_TARGET_ARCH_IS_X86_64
161 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0))
162 // The x86_64 call instruction pushes resume address on to the stack.
163 // Next, The x86_64 SysV ABI requires that the frame pointer be pushed
164 // on to the stack. So, we have to step past two 64-bit values to get
165 // to the start args.
166 + sizeof(uintptr_t) * 2;
167 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64)
168 // The frame pointer after cloning the new thread in the Thread::run method
169 // is set to the stack pointer where start args are stored. So, we fetch
171 return reinterpret_cast<uintptr_t>(__builtin_frame_address(1));
172 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
173 // The current frame pointer is the previous stack pointer where the start
175 return reinterpret_cast<uintptr_t>(__builtin_frame_address(0));
179 [[gnu::noinline
]] void start_thread() {
180 auto *start_args
= reinterpret_cast<StartArgs
*>(get_start_args_addr());
181 auto *attrib
= start_args
->thread_attrib
;
182 self
.attrib
= attrib
;
183 self
.attrib
->atexit_callback_mgr
= internal::get_thread_atexit_callback_mgr();
185 if (attrib
->style
== ThreadStyle::POSIX
) {
186 attrib
->retval
.posix_retval
=
187 start_args
->runner
.posix_runner(start_args
->arg
);
188 thread_exit(ThreadReturnValue(attrib
->retval
.posix_retval
),
191 attrib
->retval
.stdc_retval
=
192 start_args
->runner
.stdc_runner(start_args
->arg
);
193 thread_exit(ThreadReturnValue(attrib
->retval
.stdc_retval
),
198 int Thread::run(ThreadStyle style
, ThreadRunner runner
, void *arg
, void *stack
,
199 size_t stacksize
, size_t guardsize
, bool detached
) {
200 bool owned_stack
= false;
201 if (stack
== nullptr) {
202 // TODO: Should we return EINVAL here? Should we have a generic concept of a
203 // minimum stacksize (like 16384 for pthread).
205 stacksize
= DEFAULT_STACKSIZE
;
206 // Roundup stacksize/guardsize to page size.
207 // TODO: Should be also add sizeof(ThreadAttribute) and other internal
209 auto round_or_err
= round_to_page(guardsize
);
211 return round_or_err
.error();
212 guardsize
= round_or_err
.value();
214 round_or_err
= round_to_page(stacksize
);
216 return round_or_err
.error();
218 stacksize
= round_or_err
.value();
219 auto alloc
= alloc_stack(stacksize
, guardsize
);
221 return alloc
.error();
223 stack
= alloc
.value();
227 // Validate that stack/stacksize are validly aligned.
228 uintptr_t stackaddr
= reinterpret_cast<uintptr_t>(stack
);
229 if ((stackaddr
% STACK_ALIGNMENT
!= 0) ||
230 ((stackaddr
+ stacksize
) % STACK_ALIGNMENT
!= 0)) {
232 free_stack(stack
, stacksize
, guardsize
);
239 // When the new thread is spawned by the kernel, the new thread gets the
240 // stack we pass to the clone syscall. However, this stack is empty and does
241 // not have any local vars present in this function. Hence, one cannot
242 // pass arguments to the thread start function, or use any local vars from
243 // here. So, we pack them into the new stack from where the thread can sniff
246 // Likewise, the actual thread state information is also stored on the
249 static constexpr size_t INTERNAL_STACK_DATA_SIZE
=
250 sizeof(StartArgs
) + sizeof(ThreadAttributes
) + sizeof(Futex
);
252 // This is pretty arbitrary, but at the moment we don't adjust user provided
253 // stacksize (or default) to account for this data as its assumed minimal. If
254 // this assert starts failing we probably should. Likewise if we can't bound
255 // this we may overflow when we subtract it from the top of the stack.
256 static_assert(INTERNAL_STACK_DATA_SIZE
< EXEC_PAGESIZE
);
258 // TODO: We are assuming stack growsdown here.
259 auto adjusted_stack_or_err
=
260 add_no_overflow(reinterpret_cast<uintptr_t>(stack
), stacksize
);
261 if (!adjusted_stack_or_err
) {
262 cleanup_tls(tls
.addr
, tls
.size
);
264 free_stack(stack
, stacksize
, guardsize
);
265 return adjusted_stack_or_err
.error();
268 uintptr_t adjusted_stack
=
269 adjusted_stack_or_err
.value() - INTERNAL_STACK_DATA_SIZE
;
270 adjusted_stack
&= ~(uintptr_t(STACK_ALIGNMENT
) - 1);
272 auto *start_args
= reinterpret_cast<StartArgs
*>(adjusted_stack
);
275 reinterpret_cast<ThreadAttributes
*>(adjusted_stack
+ sizeof(StartArgs
));
276 attrib
->style
= style
;
277 attrib
->detach_state
=
278 uint32_t(detached
? DetachState::DETACHED
: DetachState::JOINABLE
);
279 attrib
->stack
= stack
;
280 attrib
->stacksize
= stacksize
;
281 attrib
->guardsize
= guardsize
;
282 attrib
->owned_stack
= owned_stack
;
283 attrib
->tls
= tls
.addr
;
284 attrib
->tls_size
= tls
.size
;
286 start_args
->thread_attrib
= attrib
;
287 start_args
->runner
= runner
;
288 start_args
->arg
= arg
;
290 auto clear_tid
= reinterpret_cast<Futex
*>(
291 adjusted_stack
+ sizeof(StartArgs
) + sizeof(ThreadAttributes
));
292 clear_tid
->set(CLEAR_TID_VALUE
);
293 attrib
->platform_data
= clear_tid
;
295 // The clone syscall takes arguments in an architecture specific order.
296 // Also, we want the result of the syscall to be in a register as the child
297 // thread gets a completely different stack after it is created. The stack
298 // variables from this function will not be availalbe to the child thread.
299 #if defined(LIBC_TARGET_ARCH_IS_X86_64)
300 long register clone_result
asm(CLONE_RESULT_REGISTER
);
301 clone_result
= LIBC_NAMESPACE::syscall_impl
<long>(
302 SYS_clone
, CLONE_SYSCALL_FLAGS
, adjusted_stack
,
303 &attrib
->tid
, // The address where the child tid is written
304 &clear_tid
->val
, // The futex where the child thread status is signalled
305 tls
.tp
// The thread pointer value for the new thread.
307 #elif defined(LIBC_TARGET_ARCH_IS_AARCH64) || \
308 defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
309 long register clone_result
asm(CLONE_RESULT_REGISTER
);
310 clone_result
= LIBC_NAMESPACE::syscall_impl
<long>(
311 SYS_clone
, CLONE_SYSCALL_FLAGS
, adjusted_stack
,
312 &attrib
->tid
, // The address where the child tid is written
313 tls
.tp
, // The thread pointer value for the new thread.
314 &clear_tid
->val
// The futex where the child thread status is signalled
317 #error "Unsupported architecture for the clone syscall."
320 if (clone_result
== 0) {
321 #ifdef LIBC_TARGET_ARCH_IS_AARCH64
322 // We set the frame pointer to be the same as the "sp" so that start args
323 // can be sniffed out from start_thread.
325 // GCC does not currently implement __arm_wsr64/__arm_rsr64.
326 __arm_wsr64("x29", __arm_rsr64("sp"));
328 asm volatile("mov x29, sp");
330 #elif defined(LIBC_TARGET_ARCH_IS_ANY_RISCV)
331 asm volatile("mv fp, sp");
334 } else if (clone_result
< 0) {
335 cleanup_thread_resources(attrib
);
336 return static_cast<int>(-clone_result
);
342 int Thread::join(ThreadReturnValue
&retval
) {
345 if (attrib
->style
== ThreadStyle::POSIX
)
346 retval
.posix_retval
= attrib
->retval
.posix_retval
;
348 retval
.stdc_retval
= attrib
->retval
.stdc_retval
;
350 cleanup_thread_resources(attrib
);
355 int Thread::detach() {
356 uint32_t joinable_state
= uint32_t(DetachState::JOINABLE
);
357 if (attrib
->detach_state
.compare_exchange_strong(
358 joinable_state
, uint32_t(DetachState::DETACHED
))) {
359 return int(DetachType::SIMPLE
);
362 // If the thread was already detached, then the detach method should not
363 // be called at all. If the thread is exiting, then we wait for it to exit
364 // and free up resources.
367 cleanup_thread_resources(attrib
);
369 return int(DetachType::CLEANUP
);
372 void Thread::wait() {
373 // The kernel should set the value at the clear tid address to zero.
374 // If not, it is a spurious wake and we should continue to wait on
376 auto *clear_tid
= reinterpret_cast<Futex
*>(attrib
->platform_data
);
377 // We cannot do a FUTEX_WAIT_PRIVATE here as the kernel does a
378 // FUTEX_WAKE and not a FUTEX_WAKE_PRIVATE.
379 while (clear_tid
->load() != 0)
380 clear_tid
->wait(CLEAR_TID_VALUE
, cpp::nullopt
, true);
383 bool Thread::operator==(const Thread
&thread
) const {
384 return attrib
->tid
== thread
.attrib
->tid
;
387 static constexpr cpp::string_view
THREAD_NAME_PATH_PREFIX("/proc/self/task/");
388 static constexpr size_t THREAD_NAME_PATH_SIZE
=
389 THREAD_NAME_PATH_PREFIX
.size() +
390 IntegerToString
<int>::buffer_size() + // Size of tid
391 1 + // For '/' character
392 5; // For the file name "comm" and the nullterminator.
394 static void construct_thread_name_file_path(cpp::StringStream
&stream
,
396 stream
<< THREAD_NAME_PATH_PREFIX
<< tid
<< '/' << cpp::string_view("comm")
397 << cpp::StringStream::ENDS
;
400 int Thread::set_name(const cpp::string_view
&name
) {
401 if (name
.size() >= NAME_SIZE_MAX
)
405 // If we are setting the name of the current thread, then we can
406 // use the syscall to set the name.
408 LIBC_NAMESPACE::syscall_impl
<int>(SYS_prctl
, PR_SET_NAME
, name
.data());
415 char path_name_buffer
[THREAD_NAME_PATH_SIZE
];
416 cpp::StringStream
path_stream(path_name_buffer
);
417 construct_thread_name_file_path(path_stream
, attrib
->tid
);
420 LIBC_NAMESPACE::syscall_impl
<int>(SYS_open
, path_name_buffer
, O_RDWR
);
422 int fd
= LIBC_NAMESPACE::syscall_impl
<int>(SYS_openat
, AT_FDCWD
,
423 path_name_buffer
, O_RDWR
);
428 int retval
= LIBC_NAMESPACE::syscall_impl
<int>(SYS_write
, fd
, name
.data(),
430 LIBC_NAMESPACE::syscall_impl
<long>(SYS_close
, fd
);
434 else if (retval
!= int(name
.size()))
440 int Thread::get_name(cpp::StringStream
&name
) const {
441 if (name
.bufsize() < NAME_SIZE_MAX
)
444 char name_buffer
[NAME_SIZE_MAX
];
447 // If we are getting the name of the current thread, then we can
448 // use the syscall to get the name.
450 LIBC_NAMESPACE::syscall_impl
<int>(SYS_prctl
, PR_GET_NAME
, name_buffer
);
453 name
<< name_buffer
<< cpp::StringStream::ENDS
;
457 char path_name_buffer
[THREAD_NAME_PATH_SIZE
];
458 cpp::StringStream
path_stream(path_name_buffer
);
459 construct_thread_name_file_path(path_stream
, attrib
->tid
);
462 LIBC_NAMESPACE::syscall_impl
<int>(SYS_open
, path_name_buffer
, O_RDONLY
);
464 int fd
= LIBC_NAMESPACE::syscall_impl
<int>(SYS_openat
, AT_FDCWD
,
465 path_name_buffer
, O_RDONLY
);
470 int retval
= LIBC_NAMESPACE::syscall_impl
<int>(SYS_read
, fd
, name_buffer
,
472 LIBC_NAMESPACE::syscall_impl
<long>(SYS_close
, fd
);
475 if (retval
== NAME_SIZE_MAX
)
477 if (name_buffer
[retval
- 1] == '\n')
478 name_buffer
[retval
- 1] = '\0';
480 name_buffer
[retval
] = '\0';
481 name
<< name_buffer
<< cpp::StringStream::ENDS
;
485 void thread_exit(ThreadReturnValue retval
, ThreadStyle style
) {
486 auto attrib
= self
.attrib
;
488 // The very first thing we do is to call the thread's atexit callbacks.
489 // These callbacks could be the ones registered by the language runtimes,
490 // for example, the destructors of thread local objects. They can also
491 // be destructors of the TSS objects set using API like pthread_setspecific.
492 // NOTE: We cannot call the atexit callbacks as part of the
493 // cleanup_thread_resources function as that function can be called from a
494 // different thread. The destructors of thread local and TSS objects should
495 // be called by the thread which owns them.
496 internal::call_atexit_callbacks(attrib
);
498 uint32_t joinable_state
= uint32_t(DetachState::JOINABLE
);
499 if (!attrib
->detach_state
.compare_exchange_strong(
500 joinable_state
, uint32_t(DetachState::EXITING
))) {
501 // Thread is detached so cleanup the resources.
502 cleanup_thread_resources(attrib
);
504 // Set the CLEAR_TID address to nullptr to prevent the kernel
505 // from signalling at a non-existent futex location.
506 LIBC_NAMESPACE::syscall_impl
<long>(SYS_set_tid_address
, 0);
507 // Return value for detached thread should be unused. We need to avoid
508 // referencing `style` or `retval.*` because they may be stored on the stack
509 // and we have deallocated our stack!
510 LIBC_NAMESPACE::syscall_impl
<long>(SYS_exit
, 0);
511 __builtin_unreachable();
514 if (style
== ThreadStyle::POSIX
)
515 LIBC_NAMESPACE::syscall_impl
<long>(SYS_exit
, retval
.posix_retval
);
517 LIBC_NAMESPACE::syscall_impl
<long>(SYS_exit
, retval
.stdc_retval
);
518 __builtin_unreachable();
521 } // namespace LIBC_NAMESPACE_DECL