offload/DeviceRTL/include/Synchronization.h

   1 //===- Synchronization.h - OpenMP synchronization utilities ------- C++ -*-===//
   2 //
   3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
   4 // See https://llvm.org/LICENSE.txt for license information.
   5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
   6 //
   7 //===----------------------------------------------------------------------===//
   8 //
   9 //
  10 //===----------------------------------------------------------------------===//
  11
  12 #ifndef OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
  13 #define OMPTARGET_DEVICERTL_SYNCHRONIZATION_H
  14
  15 #include "Types.h"
  16
  17 namespace ompx {
  18
  19 namespace atomic {
  20
  21 enum OrderingTy {
  22   relaxed = __ATOMIC_RELAXED,
  23   aquire = __ATOMIC_ACQUIRE,
  24   release = __ATOMIC_RELEASE,
  25   acq_rel = __ATOMIC_ACQ_REL,
  26   seq_cst = __ATOMIC_SEQ_CST,
  27 };
  28
  29 enum MemScopeTy {
  30   all,    // All threads on all devices
  31   device, // All threads on the device
  32   cgroup  // All threads in the contention group, e.g. the team
  33 };
  34
  35 /// Atomically increment \p *Addr and wrap at \p V with \p Ordering semantics.
  36 uint32_t inc(uint32_t *Addr, uint32_t V, OrderingTy Ordering,
  37              MemScopeTy MemScope = MemScopeTy::all);
  38
  39 /// Atomically perform <op> on \p V and \p *Addr with \p Ordering semantics. The
  40 /// result is stored in \p *Addr;
  41 /// {
  42
  43 #define ATOMIC_COMMON_OP(TY)                                                   \
  44   TY add(TY *Addr, TY V, OrderingTy Ordering);                                 \
  45   TY mul(TY *Addr, TY V, OrderingTy Ordering);                                 \
  46   TY load(TY *Addr, OrderingTy Ordering);                                      \
  47   void store(TY *Addr, TY V, OrderingTy Ordering);                             \
  48   bool cas(TY *Addr, TY ExpectedV, TY DesiredV, OrderingTy OrderingSucc,       \
  49            OrderingTy OrderingFail);
  50
  51 #define ATOMIC_FP_ONLY_OP(TY)                                                  \
  52   TY min(TY *Addr, TY V, OrderingTy Ordering);                                 \
  53   TY max(TY *Addr, TY V, OrderingTy Ordering);
  54
  55 #define ATOMIC_INT_ONLY_OP(TY)                                                 \
  56   TY min(TY *Addr, TY V, OrderingTy Ordering);                                 \
  57   TY max(TY *Addr, TY V, OrderingTy Ordering);                                 \
  58   TY bit_or(TY *Addr, TY V, OrderingTy Ordering);                              \
  59   TY bit_and(TY *Addr, TY V, OrderingTy Ordering);                             \
  60   TY bit_xor(TY *Addr, TY V, OrderingTy Ordering);
  61
  62 #define ATOMIC_FP_OP(TY)                                                       \
  63   ATOMIC_FP_ONLY_OP(TY)                                                        \
  64   ATOMIC_COMMON_OP(TY)
  65
  66 #define ATOMIC_INT_OP(TY)                                                      \
  67   ATOMIC_INT_ONLY_OP(TY)                                                       \
  68   ATOMIC_COMMON_OP(TY)
  69
  70 // This needs to be kept in sync with the header. Also the reason we don't use
  71 // templates here.
  72 ATOMIC_INT_OP(int8_t)
  73 ATOMIC_INT_OP(int16_t)
  74 ATOMIC_INT_OP(int32_t)
  75 ATOMIC_INT_OP(int64_t)
  76 ATOMIC_INT_OP(uint8_t)
  77 ATOMIC_INT_OP(uint16_t)
  78 ATOMIC_INT_OP(uint32_t)
  79 ATOMIC_INT_OP(uint64_t)
  80 ATOMIC_FP_OP(float)
  81 ATOMIC_FP_OP(double)
  82
  83 #undef ATOMIC_INT_ONLY_OP
  84 #undef ATOMIC_FP_ONLY_OP
  85 #undef ATOMIC_COMMON_OP
  86 #undef ATOMIC_INT_OP
  87 #undef ATOMIC_FP_OP
  88
  89 ///}
  90
  91 } // namespace atomic
  92
  93 namespace synchronize {
  94
  95 /// Initialize the synchronization machinery. Must be called by all threads.
  96 void init(bool IsSPMD);
  97
  98 /// Synchronize all threads in a warp identified by \p Mask.
  99 void warp(LaneMaskTy Mask);
 100
 101 /// Synchronize all threads in a block and perform a fence before and after the
 102 /// barrier according to \p Ordering. Note that the fence might be part of the
 103 /// barrier.
 104 void threads(atomic::OrderingTy Ordering);
 105
 106 /// Synchronizing threads is allowed even if they all hit different instances of
 107 /// `synchronize::threads()`. However, `synchronize::threadsAligned()` is more
 108 /// restrictive in that it requires all threads to hit the same instance. The
 109 /// noinline is removed by the openmp-opt pass and helps to preserve the
 110 /// information till then.
 111 ///{
 112 #pragma omp begin assumes ext_aligned_barrier
 113
 114 /// Synchronize all threads in a block, they are reaching the same instruction
 115 /// (hence all threads in the block are "aligned"). Also perform a fence before
 116 /// and after the barrier according to \p Ordering. Note that the
 117 /// fence might be part of the barrier if the target offers this.
 118 [[gnu::noinline]] void threadsAligned(atomic::OrderingTy Ordering);
 119
 120 #pragma omp end assumes
 121 ///}
 122
 123 } // namespace synchronize
 124
 125 namespace fence {
 126
 127 /// Memory fence with \p Ordering semantics for the team.
 128 void team(atomic::OrderingTy Ordering);
 129
 130 /// Memory fence with \p Ordering semantics for the contention group.
 131 void kernel(atomic::OrderingTy Ordering);
 132
 133 /// Memory fence with \p Ordering semantics for the system.
 134 void system(atomic::OrderingTy Ordering);
 135
 136 } // namespace fence
 137
 138 } // namespace ompx
 139
 140 #endif