Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / libc / src / __support / RPC / rpc_util.h
blob04620b0487f4ad10bd56922f150318a4d7abe90a
1 //===-- Shared memory RPC client / server utilities -------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #ifndef LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTILS_H
10 #define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTILS_H
12 #include "src/__support/CPP/type_traits.h"
13 #include "src/__support/GPU/utils.h"
14 #include "src/__support/macros/attributes.h" // LIBC_INLINE
15 #include "src/__support/macros/properties/architectures.h"
16 #include "src/string/memory_utils/generic/byte_per_byte.h"
17 #include "src/string/memory_utils/inline_memcpy.h"
19 namespace LIBC_NAMESPACE {
20 namespace rpc {
22 /// Suspend the thread briefly to assist the thread scheduler during busy loops.
23 LIBC_INLINE void sleep_briefly() {
24 #if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
25 LIBC_INLINE_ASM("nanosleep.u32 64;" ::: "memory");
26 #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
27 __builtin_amdgcn_s_sleep(2);
28 #elif defined(LIBC_TARGET_ARCH_IS_X86)
29 __builtin_ia32_pause();
30 #else
31 // Simply do nothing if sleeping isn't supported on this platform.
32 #endif
35 /// Conditional to indicate if this process is running on the GPU.
36 LIBC_INLINE constexpr bool is_process_gpu() {
37 #if defined(LIBC_TARGET_ARCH_IS_GPU)
38 return true;
39 #else
40 return false;
41 #endif
44 /// Return \p val aligned "upwards" according to \p align.
45 template <typename V, typename A>
46 LIBC_INLINE constexpr V align_up(V val, A align) {
47 return ((val + V(align) - 1) / V(align)) * V(align);
50 /// Utility to provide a unified interface between the CPU and GPU's memory
51 /// model. On the GPU stack variables are always private to a lane so we can
52 /// simply use the variable passed in. On the CPU we need to allocate enough
53 /// space for the whole lane and index into it.
54 template <typename V> LIBC_INLINE V &lane_value(V *val, uint32_t id) {
55 if constexpr (is_process_gpu())
56 return *val;
57 return val[id];
60 /// Advance the \p p by \p bytes.
61 template <typename T, typename U> LIBC_INLINE T *advance(T *ptr, U bytes) {
62 if constexpr (cpp::is_const_v<T>)
63 return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) +
64 bytes);
65 else
66 return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(ptr) + bytes);
69 /// Wrapper around the optimal memory copy implementation for the target.
70 LIBC_INLINE void rpc_memcpy(void *dst, const void *src, size_t count) {
71 // The built-in memcpy prefers to fully unroll loops. We want to minimize
72 // resource usage so we use a single nounroll loop implementation.
73 #if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
74 inline_memcpy_byte_per_byte(reinterpret_cast<Ptr>(dst),
75 reinterpret_cast<CPtr>(src), count);
76 #else
77 inline_memcpy(dst, src, count);
78 #endif
81 } // namespace rpc
82 } // namespace LIBC_NAMESPACE
84 #endif