1 //===--- Kernel.cpp - OpenMP device kernel interface -------------- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains the kernel entry points for the device.
11 //===----------------------------------------------------------------------===//
13 #include "Shared/Environment.h"
15 #include "Allocator.h"
17 #include "DeviceTypes.h"
18 #include "Interface.h"
21 #include "Synchronization.h"
22 #include "Workshare.h"
24 #include "llvm/Frontend/OpenMP/OMPDeviceConstants.h"
28 #pragma omp begin declare target device_type(nohost)
31 inititializeRuntime(bool IsSPMD
, KernelEnvironmentTy
&KernelEnvironment
,
32 KernelLaunchEnvironmentTy
&KernelLaunchEnvironment
) {
33 // Order is important here.
34 synchronize::init(IsSPMD
);
35 mapping::init(IsSPMD
);
36 state::init(IsSPMD
, KernelEnvironment
, KernelLaunchEnvironment
);
37 allocator::init(IsSPMD
, KernelEnvironment
);
38 workshare::init(IsSPMD
);
41 /// Simple generic state machine for worker threads.
42 static void genericStateMachine(IdentTy
*Ident
) {
43 uint32_t TId
= mapping::getThreadIdInBlock();
46 ParallelRegionFnTy WorkFn
= nullptr;
48 // Wait for the signal that we have a new work function.
49 synchronize::threads(atomic::seq_cst
);
51 // Retrieve the work function from the runtime.
52 bool IsActive
= __kmpc_kernel_parallel(&WorkFn
);
54 // If there is nothing more to do, break out of the state machine by
55 // returning to the caller.
60 ASSERT(!mapping::isSPMDMode(), nullptr);
61 ((void (*)(uint32_t, uint32_t))WorkFn
)(0, TId
);
62 __kmpc_kernel_end_parallel();
65 synchronize::threads(atomic::seq_cst
);
74 /// \param Ident Source location identification, can be NULL.
76 int32_t __kmpc_target_init(KernelEnvironmentTy
&KernelEnvironment
,
77 KernelLaunchEnvironmentTy
&KernelLaunchEnvironment
) {
78 ConfigurationEnvironmentTy
&Configuration
= KernelEnvironment
.Configuration
;
79 bool IsSPMD
= Configuration
.ExecMode
&
80 llvm::omp::OMPTgtExecModeFlags::OMP_TGT_EXEC_MODE_SPMD
;
81 bool UseGenericStateMachine
= Configuration
.UseGenericStateMachine
;
83 inititializeRuntime(/*IsSPMD=*/true, KernelEnvironment
,
84 KernelLaunchEnvironment
);
85 synchronize::threadsAligned(atomic::relaxed
);
87 inititializeRuntime(/*IsSPMD=*/false, KernelEnvironment
,
88 KernelLaunchEnvironment
);
89 // No need to wait since only the main threads will execute user
90 // code and workers will run into a barrier right away.
94 state::assumeInitialState(IsSPMD
);
96 // Synchronize to ensure the assertions above are in an aligned region.
97 // The barrier is eliminated later.
98 synchronize::threadsAligned(atomic::relaxed
);
102 if (mapping::isInitialThreadInLevel0(IsSPMD
))
105 // Enter the generic state machine if enabled and if this thread can possibly
106 // be an active worker thread.
108 // The latter check is important for NVIDIA Pascal (but not Volta) and AMD
109 // GPU. In those cases, a single thread can apparently satisfy a barrier on
110 // behalf of all threads in the same warp. Thus, it would not be safe for
111 // other threads in the main thread's warp to reach the first
112 // synchronize::threads call in genericStateMachine before the main thread
113 // reaches its corresponding synchronize::threads call: that would permit all
114 // active worker threads to proceed before the main thread has actually set
115 // state::ParallelRegionFn, and then they would immediately quit without
116 // doing any work. mapping::getMaxTeamThreads() does not include any of the
117 // main thread's warp, so none of its threads can ever be active worker
119 if (UseGenericStateMachine
&&
120 mapping::getThreadIdInBlock() < mapping::getMaxTeamThreads(IsSPMD
))
121 genericStateMachine(KernelEnvironment
.Ident
);
123 return mapping::getThreadIdInBlock();
126 /// De-Initialization
128 /// In non-SPMD, this function releases the workers trapped in a state machine
129 /// and also any memory dynamically allocated by the runtime.
131 /// \param Ident Source location identification, can be NULL.
133 void __kmpc_target_deinit() {
134 bool IsSPMD
= mapping::isSPMDMode();
138 if (mapping::isInitialThreadInLevel0(IsSPMD
)) {
139 // Signal the workers to exit the state machine and exit the kernel.
140 state::ParallelRegionFn
= nullptr;
141 } else if (!state::getKernelEnvironment()
142 .Configuration
.UseGenericStateMachine
) {
143 // Retrieve the work function just to ensure we always call
144 // __kmpc_kernel_parallel even if a custom state machine is used.
145 // TODO: this is not super pretty. The problem is we create the call to
146 // __kmpc_kernel_parallel in the openmp-opt pass but while we optimize it
147 // is not there yet. Thus, we assume we never reach it from
148 // __kmpc_target_deinit. That allows us to remove the store in there to
149 // ParallelRegionFn, which leads to bad results later on.
150 ParallelRegionFnTy WorkFn
= nullptr;
151 __kmpc_kernel_parallel(&WorkFn
);
152 ASSERT(WorkFn
== nullptr, nullptr);
156 int8_t __kmpc_is_spmd_exec_mode() { return mapping::isSPMDMode(); }
159 #pragma omp end declare target