[SCFToGPU] Convert scf.parallel+scf.reduce to gpu.all_reduce (#122782)
[llvm-project.git] / libc / startup / linux / aarch64 / tls.cpp
blobea1b50c9fb209d80ca375b854e4d13b8b6887b48
1 //===-- Implementation of tls for aarch64 ---------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "src/__support/OSUtil/syscall.h"
10 #include "src/__support/macros/config.h"
11 #include "src/__support/threads/thread.h"
12 #include "src/string/memory_utils/inline_memcpy.h"
13 #include "startup/linux/do_start.h"
15 #include <arm_acle.h>
16 #include <sys/mman.h>
17 #include <sys/syscall.h>
19 // Source documentation:
20 // https://github.com/ARM-software/abi-aa/tree/main/sysvabi64
22 namespace LIBC_NAMESPACE_DECL {
24 #ifdef SYS_mmap2
25 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap2;
26 #elif SYS_mmap
27 static constexpr long MMAP_SYSCALL_NUMBER = SYS_mmap;
28 #else
29 #error "mmap and mmap2 syscalls not available."
30 #endif
32 void init_tls(TLSDescriptor &tls_descriptor) {
33 if (app.tls.size == 0) {
34 tls_descriptor.size = 0;
35 tls_descriptor.tp = 0;
36 return;
39 // aarch64 follows the variant 1 TLS layout:
41 // 1. First entry is the dynamic thread vector pointer
42 // 2. Second entry is a 8-byte reserved word.
43 // 3. Padding for alignment.
44 // 4. The TLS data from the ELF image.
46 // The thread pointer points to the first entry.
48 const uintptr_t size_of_pointers = 2 * sizeof(uintptr_t);
49 uintptr_t padding = 0;
50 const uintptr_t ALIGNMENT_MASK = app.tls.align - 1;
51 uintptr_t diff = size_of_pointers & ALIGNMENT_MASK;
52 if (diff != 0)
53 padding += (ALIGNMENT_MASK - diff) + 1;
55 uintptr_t alloc_size = size_of_pointers + padding + app.tls.size;
57 // We cannot call the mmap function here as the functions set errno on
58 // failure. Since errno is implemented via a thread local variable, we cannot
59 // use errno before TLS is setup.
60 long mmap_ret_val = syscall_impl<long>(MMAP_SYSCALL_NUMBER, nullptr,
61 alloc_size, PROT_READ | PROT_WRITE,
62 MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
63 // We cannot check the return value with MAP_FAILED as that is the return
64 // of the mmap function and not the mmap syscall.
65 if (mmap_ret_val < 0 && static_cast<uintptr_t>(mmap_ret_val) > -app.page_size)
66 syscall_impl<long>(SYS_exit, 1);
67 uintptr_t thread_ptr = uintptr_t(reinterpret_cast<uintptr_t *>(mmap_ret_val));
68 uintptr_t tls_addr = thread_ptr + size_of_pointers + padding;
69 inline_memcpy(reinterpret_cast<char *>(tls_addr),
70 reinterpret_cast<const char *>(app.tls.address),
71 app.tls.init_size);
72 tls_descriptor.size = alloc_size;
73 tls_descriptor.addr = thread_ptr;
74 tls_descriptor.tp = thread_ptr;
77 void cleanup_tls(uintptr_t addr, uintptr_t size) {
78 if (size == 0)
79 return;
80 syscall_impl<long>(SYS_munmap, addr, size);
83 bool set_thread_ptr(uintptr_t val) {
84 // The PR for __arm_wsr64 support in GCC was merged on Dec 6, 2023, and it is
85 // not yet usable in 13.3.0
86 // https://github.com/gcc-mirror/gcc/commit/fc42900d21abd5eacb7537c3c8ffc5278d510195
87 #if __has_builtin(__builtin_arm_wsr64)
88 __builtin_arm_wsr64("tpidr_el0", val);
89 #elif __has_builtin(__builtin_aarch64_wsr)
90 __builtin_aarch64_wsr("tpidr_el0", val);
91 #elif defined(__GNUC__)
92 asm volatile("msr tpidr_el0, %0" ::"r"(val));
93 #else
94 #error "Unsupported compiler"
95 #endif
96 return true;
98 } // namespace LIBC_NAMESPACE_DECL