[sanitizer] Improve FreeBSD ASLR detection
[llvm-project.git] / openmp / libomptarget / DeviceRTL / src / Parallelism.cpp
blob610512a5f799b76468e8f0560931a0f292980a12
1 //===---- Parallelism.cpp - OpenMP GPU parallel implementation ---- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Parallel implementation in the GPU. Here is the pattern:
11 // while (not finished) {
13 // if (master) {
14 // sequential code, decide which par loop to do, or if finished
15 // __kmpc_kernel_prepare_parallel() // exec by master only
16 // }
17 // syncthreads // A
18 // __kmpc_kernel_parallel() // exec by all
19 // if (this thread is included in the parallel) {
20 // switch () for all parallel loops
21 // __kmpc_kernel_end_parallel() // exec only by threads in parallel
22 // }
25 // The reason we don't exec end_parallel for the threads not included
26 // in the parallel loop is that for each barrier in the parallel
27 // region, these non-included threads will cycle through the
28 // syncthread A. Thus they must preserve their current threadId that
29 // is larger than thread in team.
31 // To make a long story short...
33 //===----------------------------------------------------------------------===//
35 #include "Debug.h"
36 #include "Interface.h"
37 #include "Mapping.h"
38 #include "State.h"
39 #include "Synchronization.h"
40 #include "Types.h"
41 #include "Utils.h"
43 using namespace _OMP;
45 #pragma omp declare target
47 namespace {
49 uint32_t determineNumberOfThreads(int32_t NumThreadsClause) {
50 uint32_t NThreadsICV =
51 NumThreadsClause != -1 ? NumThreadsClause : icv::NThreads;
52 uint32_t NumThreads = mapping::getBlockSize();
54 if (NThreadsICV != 0 && NThreadsICV < NumThreads)
55 NumThreads = NThreadsICV;
57 // Round down to a multiple of WARPSIZE since it is legal to do so in OpenMP.
58 if (NumThreads < mapping::getWarpSize())
59 NumThreads = 1;
60 else
61 NumThreads = (NumThreads & ~((uint32_t)mapping::getWarpSize() - 1));
63 return NumThreads;
66 // Invoke an outlined parallel function unwrapping arguments (up to 32).
67 void invokeMicrotask(int32_t global_tid, int32_t bound_tid, void *fn,
68 void **args, int64_t nargs) {
69 DebugEntryRAII Entry(__FILE__, __LINE__, "<OpenMP Outlined Function>");
70 switch (nargs) {
71 #include "generated_microtask_cases.gen"
72 default:
73 PRINT("Too many arguments in kmp_invoke_microtask, aborting execution.\n");
74 __builtin_trap();
78 } // namespace
80 extern "C" {
82 void __kmpc_parallel_51(IdentTy *ident, int32_t, int32_t if_expr,
83 int32_t num_threads, int proc_bind, void *fn,
84 void *wrapper_fn, void **args, int64_t nargs) {
85 FunctionTracingRAII();
87 uint32_t TId = mapping::getThreadIdInBlock();
88 // Handle the serialized case first, same for SPMD/non-SPMD.
89 if (OMP_UNLIKELY(!if_expr || icv::Level)) {
90 state::enterDataEnvironment();
91 ++icv::Level;
92 invokeMicrotask(TId, 0, fn, args, nargs);
93 state::exitDataEnvironment();
94 return;
97 uint32_t NumThreads = determineNumberOfThreads(num_threads);
98 if (mapping::isSPMDMode()) {
99 // Avoid the race between the read of the `icv::Level` above and the write
100 // below by synchronizing all threads here.
101 synchronize::threadsAligned();
103 // Note that the order here is important. `icv::Level` has to be updated
104 // last or the other updates will cause a thread specific state to be
105 // created.
106 state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, NumThreads,
107 1u, TId == 0);
108 state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, TId == 0);
109 state::ValueRAII LevelRAII(icv::Level, 1u, 0u, TId == 0);
111 // Synchronize all threads after the main thread (TId == 0) set up the
112 // team state properly.
113 synchronize::threadsAligned();
115 ASSERT(state::ParallelTeamSize == NumThreads);
116 ASSERT(icv::ActiveLevel == 1u);
117 ASSERT(icv::Level == 1u);
119 if (TId < NumThreads)
120 invokeMicrotask(TId, 0, fn, args, nargs);
122 // Synchronize all threads at the end of a parallel region.
123 synchronize::threadsAligned();
126 // Synchronize all threads to make sure every thread exits the scope above;
127 // otherwise the following assertions and the assumption in
128 // __kmpc_target_deinit may not hold.
129 synchronize::threadsAligned();
131 ASSERT(state::ParallelTeamSize == 1u);
132 ASSERT(icv::ActiveLevel == 0u);
133 ASSERT(icv::Level == 0u);
134 return;
137 // We do *not* create a new data environment because all threads in the team
138 // that are active are now running this parallel region. They share the
139 // TeamState, which has an increase level-var and potentially active-level
140 // set, but they do not have individual ThreadStates yet. If they ever
141 // modify the ICVs beyond this point a ThreadStates will be allocated.
143 bool IsActiveParallelRegion = NumThreads > 1;
144 if (!IsActiveParallelRegion) {
145 state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true);
146 invokeMicrotask(TId, 0, fn, args, nargs);
147 return;
150 void **GlobalArgs = nullptr;
151 if (nargs) {
152 __kmpc_begin_sharing_variables(&GlobalArgs, nargs);
153 #pragma unroll
154 for (int I = 0; I < nargs; I++)
155 GlobalArgs[I] = args[I];
159 // Note that the order here is important. `icv::Level` has to be updated
160 // last or the other updates will cause a thread specific state to be
161 // created.
162 state::ValueRAII ParallelTeamSizeRAII(state::ParallelTeamSize, NumThreads,
163 1u, true);
164 state::ValueRAII ParallelRegionFnRAII(state::ParallelRegionFn, wrapper_fn,
165 (void *)nullptr, true);
166 state::ValueRAII ActiveLevelRAII(icv::ActiveLevel, 1u, 0u, true);
167 state::ValueRAII LevelRAII(icv::Level, 1u, 0u, true);
169 // Master signals work to activate workers.
170 synchronize::threads();
171 // Master waits for workers to signal.
172 synchronize::threads();
175 if (nargs)
176 __kmpc_end_sharing_variables();
179 __attribute__((noinline)) bool
180 __kmpc_kernel_parallel(ParallelRegionFnTy *WorkFn) {
181 FunctionTracingRAII();
182 // Work function and arguments for L1 parallel region.
183 *WorkFn = state::ParallelRegionFn;
185 // If this is the termination signal from the master, quit early.
186 if (!*WorkFn)
187 return false;
189 // Set to true for workers participating in the parallel region.
190 uint32_t TId = mapping::getThreadIdInBlock();
191 bool ThreadIsActive = TId < state::ParallelTeamSize;
192 return ThreadIsActive;
195 __attribute__((noinline)) void __kmpc_kernel_end_parallel() {
196 FunctionTracingRAII();
197 // In case we have modified an ICV for this thread before a ThreadState was
198 // created. We drop it now to not contaminate the next parallel region.
199 ASSERT(!mapping::isSPMDMode());
200 uint32_t TId = mapping::getThreadIdInBlock();
201 state::resetStateForThread(TId);
202 ASSERT(!mapping::isSPMDMode());
205 uint16_t __kmpc_parallel_level(IdentTy *, uint32_t) {
206 FunctionTracingRAII();
207 return omp_get_level();
210 int32_t __kmpc_global_thread_num(IdentTy *) {
211 FunctionTracingRAII();
212 return omp_get_thread_num();
215 void __kmpc_push_num_teams(IdentTy *loc, int32_t tid, int32_t num_teams,
216 int32_t thread_limit) {
217 FunctionTracingRAII();
220 void __kmpc_push_proc_bind(IdentTy *loc, uint32_t tid, int proc_bind) {
221 FunctionTracingRAII();
225 #pragma omp end declare target