libc/src/__support/RPC/rpc_util.h

   1 //===-- Shared memory RPC client / server utilities -------------*- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8
   9 #ifndef LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTILS_H
  10 #define LLVM_LIBC_SRC___SUPPORT_RPC_RPC_UTILS_H
  11
  12 #include "src/__support/CPP/type_traits.h"
  13 #include "src/__support/GPU/utils.h"
  14 #include "src/__support/macros/attributes.h" // LIBC_INLINE
  15 #include "src/__support/macros/properties/architectures.h"
  16 #include "src/string/memory_utils/generic/byte_per_byte.h"
  17 #include "src/string/memory_utils/inline_memcpy.h"
  18
  19 namespace LIBC_NAMESPACE {
  20 namespace rpc {
  21
  22 /// Suspend the thread briefly to assist the thread scheduler during busy loops.
  23 LIBC_INLINE void sleep_briefly() {
  24 #if defined(LIBC_TARGET_ARCH_IS_NVPTX) && __CUDA_ARCH__ >= 700
  25   LIBC_INLINE_ASM("nanosleep.u32 64;" ::: "memory");
  26 #elif defined(LIBC_TARGET_ARCH_IS_AMDGPU)
  27   __builtin_amdgcn_s_sleep(2);
  28 #elif defined(LIBC_TARGET_ARCH_IS_X86)
  29   __builtin_ia32_pause();
  30 #else
  31   // Simply do nothing if sleeping isn't supported on this platform.
  32 #endif
  33 }
  34
  35 /// Conditional to indicate if this process is running on the GPU.
  36 LIBC_INLINE constexpr bool is_process_gpu() {
  37 #if defined(LIBC_TARGET_ARCH_IS_GPU)
  38   return true;
  39 #else
  40   return false;
  41 #endif
  42 }
  43
  44 /// Return \p val aligned "upwards" according to \p align.
  45 template <typename V, typename A>
  46 LIBC_INLINE constexpr V align_up(V val, A align) {
  47   return ((val + V(align) - 1) / V(align)) * V(align);
  48 }
  49
  50 /// Utility to provide a unified interface between the CPU and GPU's memory
  51 /// model. On the GPU stack variables are always private to a lane so we can
  52 /// simply use the variable passed in. On the CPU we need to allocate enough
  53 /// space for the whole lane and index into it.
  54 template <typename V> LIBC_INLINE V &lane_value(V *val, uint32_t id) {
  55   if constexpr (is_process_gpu())
  56     return *val;
  57   return val[id];
  58 }
  59
  60 /// Advance the \p p by \p bytes.
  61 template <typename T, typename U> LIBC_INLINE T *advance(T *ptr, U bytes) {
  62   if constexpr (cpp::is_const_v<T>)
  63     return reinterpret_cast<T *>(reinterpret_cast<const uint8_t *>(ptr) +
  64                                  bytes);
  65   else
  66     return reinterpret_cast<T *>(reinterpret_cast<uint8_t *>(ptr) + bytes);
  67 }
  68
  69 /// Wrapper around the optimal memory copy implementation for the target.
  70 LIBC_INLINE void rpc_memcpy(void *dst, const void *src, size_t count) {
  71   // The built-in memcpy prefers to fully unroll loops. We want to minimize
  72   // resource usage so we use a single nounroll loop implementation.
  73 #if defined(LIBC_TARGET_ARCH_IS_AMDGPU)
  74   inline_memcpy_byte_per_byte(reinterpret_cast<Ptr>(dst),
  75                               reinterpret_cast<CPtr>(src), count);
  76 #else
  77   inline_memcpy(dst, src, count);
  78 #endif
  79 }
  80
  81 } // namespace rpc
  82 } // namespace LIBC_NAMESPACE
  83
  84 #endif