1 //===-- Shared memory RPC client / server utilities -------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTILS_H
10 #define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTILS_H
12 #include "src/__support/CPP/type_traits.h"
13 #include "src/__support/GPU/utils.h"
14 #include "src/__support/macros/attributes.h" // LIBC_INLINE
15 #include "src/__support/macros/properties/architectures.h"
16 #include "src/string/memory_utils/generic/byte_per_byte.h"
17 #include "src/string/memory_utils/inline_memcpy.h"
19 namespace LIBC_NAMESPACE
{
22 /// Suspend the thread briefly to assist the thread scheduler during busy loops.
23 LIBC_INLINE
void sleep_briefly() {
24 #if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
25 LIBC_INLINE_ASM("nanosleep.u32 64;" ::: "memory");
26 #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
27 __builtin_amdgcn_s_sleep(2);
28 #elif defined(LIBC_TARGET_ARCH_IS_X86)
29 __builtin_ia32_pause();
31 // Simply do nothing if sleeping isn't supported on this platform.
35 /// Conditional to indicate if this process is running on the GPU.
36 LIBC_INLINE
constexpr bool is_process_gpu() {
37 #if defined(LIBC_TARGET_ARCH_IS_GPU)
44 /// Return \p val aligned "upwards" according to \p align.
45 template <typename V
, typename A
>
46 LIBC_INLINE
constexpr V
align_up(V val
, A align
) {
47 return ((val
+ V(align
) - 1) / V(align
)) * V(align
);
50 /// Utility to provide a unified interface between the CPU and GPU's memory
51 /// model. On the GPU stack variables are always private to a lane so we can
52 /// simply use the variable passed in. On the CPU we need to allocate enough
53 /// space for the whole lane and index into it.
54 template <typename V
> LIBC_INLINE V
&lane_value(V
*val
, uint32_t id
) {
55 if constexpr (is_process_gpu())
60 /// Advance the \p p by \p bytes.
61 template <typename T
, typename U
> LIBC_INLINE T
*advance(T
*ptr
, U bytes
) {
62 if constexpr (cpp::is_const_v
<T
>)
63 return reinterpret_cast<T
*>(reinterpret_cast<const uint8_t *>(ptr
) +
66 return reinterpret_cast<T
*>(reinterpret_cast<uint8_t *>(ptr
) + bytes
);
69 /// Wrapper around the optimal memory copy implementation for the target.
70 LIBC_INLINE
void rpc_memcpy(void *dst
, const void *src
, size_t count
) {
71 // The built-in memcpy prefers to fully unroll loops. We want to minimize
72 // resource usage so we use a single nounroll loop implementation.
73 #if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
74 inline_memcpy_byte_per_byte(reinterpret_cast<Ptr
>(dst
),
75 reinterpret_cast<CPtr
>(src
), count
);
77 inline_memcpy(dst
, src
, count
);
82 } // namespace LIBC_NAMESPACE