1 //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 //===----------------------------------------------------------------------===//
15 #include "Interface.h"
18 #pragma omp begin declare target device_type(nohost)
22 extern "C" [[gnu::weak
]] int IsSPMDMode
;
26 bool isSharedMemPtr(const void *Ptr
) { return false; }
27 void Unpack(uint64_t Val
, uint32_t *LowBits
, uint32_t *HighBits
);
28 uint64_t Pack(uint32_t LowBits
, uint32_t HighBits
);
30 /// AMDGCN Implementation
33 #pragma omp begin declare variant match(device = {arch(amdgcn)})
35 void Unpack(uint64_t Val
, uint32_t *LowBits
, uint32_t *HighBits
) {
36 static_assert(sizeof(unsigned long) == 8, "");
37 *LowBits
= (uint32_t)(Val
& 0x00000000FFFFFFFFUL
);
38 *HighBits
= (uint32_t)((Val
& 0xFFFFFFFF00000000UL
) >> 32);
41 uint64_t Pack(uint32_t LowBits
, uint32_t HighBits
) {
42 return (((uint64_t)HighBits
) << 32) | (uint64_t)LowBits
;
45 #pragma omp end declare variant
48 /// NVPTX Implementation
51 #pragma omp begin declare variant match( \
52 device = {arch(nvptx, nvptx64)}, \
53 implementation = {extension(match_any)})
55 void Unpack(uint64_t Val
, uint32_t *LowBits
, uint32_t *HighBits
) {
56 uint32_t LowBitsLocal
, HighBitsLocal
;
57 asm("mov.b64 {%0,%1}, %2;"
58 : "=r"(LowBitsLocal
), "=r"(HighBitsLocal
)
60 *LowBits
= LowBitsLocal
;
61 *HighBits
= HighBitsLocal
;
64 uint64_t Pack(uint32_t LowBits
, uint32_t HighBits
) {
66 asm("mov.b64 %0, {%1,%2};" : "=l"(Val
) : "r"(LowBits
), "r"(HighBits
));
70 #pragma omp end declare variant
73 int32_t shuffle(uint64_t Mask
, int32_t Var
, int32_t SrcLane
);
74 int32_t shuffleDown(uint64_t Mask
, int32_t Var
, uint32_t LaneDelta
,
77 /// AMDGCN Implementation
80 #pragma omp begin declare variant match(device = {arch(amdgcn)})
82 int32_t shuffle(uint64_t Mask
, int32_t Var
, int32_t SrcLane
) {
83 int Width
= mapping::getWarpSize();
84 int Self
= mapping::getThreadIdInWarp();
85 int Index
= SrcLane
+ (Self
& ~(Width
- 1));
86 return __builtin_amdgcn_ds_bpermute(Index
<< 2, Var
);
89 int32_t shuffleDown(uint64_t Mask
, int32_t Var
, uint32_t LaneDelta
,
91 int Self
= mapping::getThreadIdInWarp();
92 int Index
= Self
+ LaneDelta
;
93 Index
= (int)(LaneDelta
+ (Self
& (Width
- 1))) >= Width
? Self
: Index
;
94 return __builtin_amdgcn_ds_bpermute(Index
<< 2, Var
);
97 bool isSharedMemPtr(const void *Ptr
) {
98 return __builtin_amdgcn_is_shared(
99 (const __attribute__((address_space(0))) void *)Ptr
);
101 #pragma omp end declare variant
104 /// NVPTX Implementation
107 #pragma omp begin declare variant match( \
108 device = {arch(nvptx, nvptx64)}, \
109 implementation = {extension(match_any)})
111 int32_t shuffle(uint64_t Mask
, int32_t Var
, int32_t SrcLane
) {
112 return __nvvm_shfl_sync_idx_i32(Mask
, Var
, SrcLane
, 0x1f);
115 int32_t shuffleDown(uint64_t Mask
, int32_t Var
, uint32_t Delta
, int32_t Width
) {
116 int32_t T
= ((mapping::getWarpSize() - Width
) << 8) | 0x1f;
117 return __nvvm_shfl_sync_down_i32(Mask
, Var
, Delta
, T
);
120 bool isSharedMemPtr(const void *Ptr
) { return __nvvm_isspacep_shared(Ptr
); }
122 #pragma omp end declare variant
126 uint64_t utils::pack(uint32_t LowBits
, uint32_t HighBits
) {
127 return impl::Pack(LowBits
, HighBits
);
130 void utils::unpack(uint64_t Val
, uint32_t &LowBits
, uint32_t &HighBits
) {
131 impl::Unpack(Val
, &LowBits
, &HighBits
);
134 int32_t utils::shuffle(uint64_t Mask
, int32_t Var
, int32_t SrcLane
) {
135 return impl::shuffle(Mask
, Var
, SrcLane
);
138 int32_t utils::shuffleDown(uint64_t Mask
, int32_t Var
, uint32_t Delta
,
140 return impl::shuffleDown(Mask
, Var
, Delta
, Width
);
143 bool utils::isSharedMemPtr(void *Ptr
) { return impl::isSharedMemPtr(Ptr
); }
146 int32_t __kmpc_shuffle_int32(int32_t Val
, int16_t Delta
, int16_t SrcLane
) {
147 return impl::shuffleDown(lanes::All
, Val
, Delta
, SrcLane
);
150 int64_t __kmpc_shuffle_int64(int64_t Val
, int16_t Delta
, int16_t Width
) {
152 utils::unpack(Val
, lo
, hi
);
153 hi
= impl::shuffleDown(lanes::All
, hi
, Delta
, Width
);
154 lo
= impl::shuffleDown(lanes::All
, lo
, Delta
, Width
);
155 return utils::pack(lo
, hi
);
159 #pragma omp end declare target