1 //===--- Kernel.cpp - OpenMP device kernel interface -------------- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the kernel entry points for the device.
11 //===----------------------------------------------------------------------===//
14 #include "Interface.h"
17 #include "Synchronization.h"
22 #pragma omp declare target
24 static void inititializeRuntime(bool IsSPMD
) {
25 // Order is important here.
26 synchronize::init(IsSPMD
);
27 mapping::init(IsSPMD
);
31 /// Simple generic state machine for worker threads.
32 static void genericStateMachine(IdentTy
*Ident
) {
33 FunctionTracingRAII();
35 uint32_t TId
= mapping::getThreadIdInBlock();
38 ParallelRegionFnTy WorkFn
= 0;
40 // Wait for the signal that we have a new work function.
41 synchronize::threads();
43 // Retrieve the work function from the runtime.
44 bool IsActive
= __kmpc_kernel_parallel(&WorkFn
);
46 // If there is nothing more to do, break out of the state machine by
47 // returning to the caller.
52 ASSERT(!mapping::isSPMDMode());
53 ((void (*)(uint32_t, uint32_t))WorkFn
)(0, TId
);
54 __kmpc_kernel_end_parallel();
57 synchronize::threads();
66 /// \param Ident Source location identification, can be NULL.
68 int32_t __kmpc_target_init(IdentTy
*Ident
, int8_t Mode
,
69 bool UseGenericStateMachine
, bool) {
70 FunctionTracingRAII();
71 const bool IsSPMD
= Mode
& OMP_TGT_EXEC_MODE_SPMD
;
73 inititializeRuntime(/* IsSPMD */ true);
74 synchronize::threadsAligned();
76 inititializeRuntime(/* IsSPMD */ false);
77 // No need to wait since only the main threads will execute user
78 // code and workers will run into a barrier right away.
82 state::assumeInitialState(IsSPMD
);
86 if (mapping::isInitialThreadInLevel0(IsSPMD
))
89 // Enter the generic state machine if enabled and if this thread can possibly
90 // be an active worker thread.
92 // The latter check is important for NVIDIA Pascal (but not Volta) and AMD
93 // GPU. In those cases, a single thread can apparently satisfy a barrier on
94 // behalf of all threads in the same warp. Thus, it would not be safe for
95 // other threads in the main thread's warp to reach the first
96 // synchronize::threads call in genericStateMachine before the main thread
97 // reaches its corresponding synchronize::threads call: that would permit all
98 // active worker threads to proceed before the main thread has actually set
99 // state::ParallelRegionFn, and then they would immediately quit without
100 // doing any work. mapping::getBlockSize() does not include any of the main
101 // thread's warp, so none of its threads can ever be active worker threads.
102 if (UseGenericStateMachine
&&
103 mapping::getThreadIdInBlock() < mapping::getBlockSize())
104 genericStateMachine(Ident
);
106 return mapping::getThreadIdInBlock();
109 /// De-Initialization
111 /// In non-SPMD, this function releases the workers trapped in a state machine
112 /// and also any memory dynamically allocated by the runtime.
114 /// \param Ident Source location identification, can be NULL.
116 void __kmpc_target_deinit(IdentTy
*Ident
, int8_t Mode
, bool) {
117 FunctionTracingRAII();
118 const bool IsSPMD
= Mode
& OMP_TGT_EXEC_MODE_SPMD
;
119 state::assumeInitialState(IsSPMD
);
123 // Signal the workers to exit the state machine and exit the kernel.
124 state::ParallelRegionFn
= nullptr;
127 int8_t __kmpc_is_spmd_exec_mode() {
128 FunctionTracingRAII();
129 return mapping::isSPMDMode();
133 #pragma omp end declare target