Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / openmp / libomptarget / DeviceRTL / src / Utils.cpp
blobb39465aaa2ace5fdbc198d2bb224ce9f1337da54
1 //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //===----------------------------------------------------------------------===//
12 #include "Utils.h"
14 #include "Debug.h"
15 #include "Interface.h"
16 #include "Mapping.h"
18 #pragma omp begin declare target device_type(nohost)
20 using namespace ompx;
22 extern "C" [[gnu::weak]] int IsSPMDMode;
24 namespace impl {
26 bool isSharedMemPtr(const void *Ptr) { return false; }
27 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits);
28 uint64_t Pack(uint32_t LowBits, uint32_t HighBits);
30 /// AMDGCN Implementation
31 ///
32 ///{
33 #pragma omp begin declare variant match(device = {arch(amdgcn)})
35 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
36 static_assert(sizeof(unsigned long) == 8, "");
37 *LowBits = (uint32_t)(Val & 0x00000000FFFFFFFFUL);
38 *HighBits = (uint32_t)((Val & 0xFFFFFFFF00000000UL) >> 32);
41 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
42 return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits;
45 #pragma omp end declare variant
46 ///}
48 /// NVPTX Implementation
49 ///
50 ///{
51 #pragma omp begin declare variant match( \
52 device = {arch(nvptx, nvptx64)}, \
53 implementation = {extension(match_any)})
55 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
56 uint32_t LowBitsLocal, HighBitsLocal;
57 asm("mov.b64 {%0,%1}, %2;"
58 : "=r"(LowBitsLocal), "=r"(HighBitsLocal)
59 : "l"(Val));
60 *LowBits = LowBitsLocal;
61 *HighBits = HighBitsLocal;
64 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
65 uint64_t Val;
66 asm("mov.b64 %0, {%1,%2};" : "=l"(Val) : "r"(LowBits), "r"(HighBits));
67 return Val;
70 #pragma omp end declare variant
71 ///}
73 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane);
74 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
75 int32_t Width);
77 /// AMDGCN Implementation
78 ///
79 ///{
80 #pragma omp begin declare variant match(device = {arch(amdgcn)})
82 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
83 int Width = mapping::getWarpSize();
84 int Self = mapping::getThreadIdInWarp();
85 int Index = SrcLane + (Self & ~(Width - 1));
86 return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
89 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
90 int32_t Width) {
91 int Self = mapping::getThreadIdInWarp();
92 int Index = Self + LaneDelta;
93 Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index;
94 return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
97 bool isSharedMemPtr(const void *Ptr) {
98 return __builtin_amdgcn_is_shared(
99 (const __attribute__((address_space(0))) void *)Ptr);
101 #pragma omp end declare variant
102 ///}
104 /// NVPTX Implementation
106 ///{
107 #pragma omp begin declare variant match( \
108 device = {arch(nvptx, nvptx64)}, \
109 implementation = {extension(match_any)})
111 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
112 return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, 0x1f);
115 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
116 int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f;
117 return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
120 bool isSharedMemPtr(const void *Ptr) { return __nvvm_isspacep_shared(Ptr); }
122 #pragma omp end declare variant
123 ///}
124 } // namespace impl
126 uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) {
127 return impl::Pack(LowBits, HighBits);
130 void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) {
131 impl::Unpack(Val, &LowBits, &HighBits);
134 int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
135 return impl::shuffle(Mask, Var, SrcLane);
138 int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
139 int32_t Width) {
140 return impl::shuffleDown(Mask, Var, Delta, Width);
143 bool utils::isSharedMemPtr(void *Ptr) { return impl::isSharedMemPtr(Ptr); }
145 extern "C" {
146 int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) {
147 return impl::shuffleDown(lanes::All, Val, Delta, SrcLane);
150 int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) {
151 uint32_t lo, hi;
152 utils::unpack(Val, lo, hi);
153 hi = impl::shuffleDown(lanes::All, hi, Delta, Width);
154 lo = impl::shuffleDown(lanes::All, lo, Delta, Width);
155 return utils::pack(lo, hi);
159 #pragma omp end declare target