[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / openmp / libomptarget / DeviceRTL / src / Utils.cpp
blob0816f078e2ab498b5fd5137473268f21db18efa5
1 //===------- Utils.cpp - OpenMP device runtime utility functions -- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //
10 //===----------------------------------------------------------------------===//
12 #include "Utils.h"
14 #include "Debug.h"
15 #include "Interface.h"
16 #include "Mapping.h"
18 #pragma omp declare target
20 using namespace _OMP;
22 namespace _OMP {
23 /// Helper to keep code alive without introducing a performance penalty.
24 __attribute__((used, retain, weak, optnone, cold)) void keepAlive() {
25 __kmpc_get_hardware_thread_id_in_block();
26 __kmpc_get_hardware_num_threads_in_block();
27 __kmpc_get_warp_size();
28 __kmpc_barrier_simple_spmd(nullptr, 0);
29 __kmpc_barrier_simple_generic(nullptr, 0);
31 } // namespace _OMP
33 namespace impl {
35 /// AMDGCN Implementation
36 ///
37 ///{
38 #pragma omp begin declare variant match(device = {arch(amdgcn)})
40 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
41 static_assert(sizeof(unsigned long) == 8, "");
42 *LowBits = (uint32_t)(Val & 0x00000000FFFFFFFFUL);
43 *HighBits = (uint32_t)((Val & 0xFFFFFFFF00000000UL) >> 32);
46 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
47 return (((uint64_t)HighBits) << 32) | (uint64_t)LowBits;
50 #pragma omp end declare variant
52 /// NVPTX Implementation
53 ///
54 ///{
55 #pragma omp begin declare variant match( \
56 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
58 void Unpack(uint64_t Val, uint32_t *LowBits, uint32_t *HighBits) {
59 uint32_t LowBitsLocal, HighBitsLocal;
60 asm("mov.b64 {%0,%1}, %2;"
61 : "=r"(LowBitsLocal), "=r"(HighBitsLocal)
62 : "l"(Val));
63 *LowBits = LowBitsLocal;
64 *HighBits = HighBitsLocal;
67 uint64_t Pack(uint32_t LowBits, uint32_t HighBits) {
68 uint64_t Val;
69 asm("mov.b64 %0, {%1,%2};" : "=l"(Val) : "r"(LowBits), "r"(HighBits));
70 return Val;
73 #pragma omp end declare variant
75 /// AMDGCN Implementation
76 ///
77 ///{
78 #pragma omp begin declare variant match(device = {arch(amdgcn)})
80 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
81 int Width = mapping::getWarpSize();
82 int Self = mapping::getThreadIdInWarp();
83 int Index = SrcLane + (Self & ~(Width - 1));
84 return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
87 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t LaneDelta,
88 int32_t Width) {
89 int Self = mapping::getThreadIdInWarp();
90 int Index = Self + LaneDelta;
91 Index = (int)(LaneDelta + (Self & (Width - 1))) >= Width ? Self : Index;
92 return __builtin_amdgcn_ds_bpermute(Index << 2, Var);
95 #pragma omp end declare variant
96 ///}
98 /// NVPTX Implementation
99 ///
100 ///{
101 #pragma omp begin declare variant match( \
102 device = {arch(nvptx, nvptx64)}, implementation = {extension(match_any)})
104 int32_t shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
105 return __nvvm_shfl_sync_idx_i32(Mask, Var, SrcLane, 0x1f);
108 int32_t shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta, int32_t Width) {
109 int32_t T = ((mapping::getWarpSize() - Width) << 8) | 0x1f;
110 return __nvvm_shfl_sync_down_i32(Mask, Var, Delta, T);
113 #pragma omp end declare variant
114 } // namespace impl
116 uint64_t utils::pack(uint32_t LowBits, uint32_t HighBits) {
117 return impl::Pack(LowBits, HighBits);
120 void utils::unpack(uint64_t Val, uint32_t &LowBits, uint32_t &HighBits) {
121 impl::Unpack(Val, &LowBits, &HighBits);
124 int32_t utils::shuffle(uint64_t Mask, int32_t Var, int32_t SrcLane) {
125 return impl::shuffle(Mask, Var, SrcLane);
128 int32_t utils::shuffleDown(uint64_t Mask, int32_t Var, uint32_t Delta,
129 int32_t Width) {
130 return impl::shuffleDown(Mask, Var, Delta, Width);
133 extern "C" {
134 int32_t __kmpc_shuffle_int32(int32_t Val, int16_t Delta, int16_t SrcLane) {
135 FunctionTracingRAII();
136 return impl::shuffleDown(lanes::All, Val, Delta, SrcLane);
139 int64_t __kmpc_shuffle_int64(int64_t Val, int16_t Delta, int16_t Width) {
140 FunctionTracingRAII();
141 uint32_t lo, hi;
142 utils::unpack(Val, lo, hi);
143 hi = impl::shuffleDown(lanes::All, hi, Delta, Width);
144 lo = impl::shuffleDown(lanes::All, lo, Delta, Width);
145 return utils::pack(lo, hi);
149 #pragma omp end declare target