Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / openmp / libomptarget / src / private.h
blob2a06bdbd1b708c4c41bb7dbd3cb26237ab66f440
1 //===---------- private.h - Target independent OpenMP target RTL ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Private function declarations and helper macros for debugging output.
11 //===----------------------------------------------------------------------===//
13 #ifndef _OMPTARGET_PRIVATE_H
14 #define _OMPTARGET_PRIVATE_H
16 #include "device.h"
17 #include <Debug.h>
18 #include <SourceInfo.h>
19 #include <omptarget.h>
21 #include <cstdint>
23 extern int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
24 void **ArgsBase, void **Args, int64_t *ArgSizes,
25 int64_t *ArgTypes, map_var_info_t *ArgNames,
26 void **ArgMappers, AsyncInfoTy &AsyncInfo,
27 bool FromMapper = false);
29 extern int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
30 void **ArgBases, void **Args, int64_t *ArgSizes,
31 int64_t *ArgTypes, map_var_info_t *ArgNames,
32 void **ArgMappers, AsyncInfoTy &AsyncInfo,
33 bool FromMapper = false);
35 extern int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
36 void **ArgsBase, void **Args, int64_t *ArgSizes,
37 int64_t *ArgTypes, map_var_info_t *ArgNames,
38 void **ArgMappers, AsyncInfoTy &AsyncInfo,
39 bool FromMapper = false);
41 extern int target(ident_t *Loc, DeviceTy &Device, void *HostPtr,
42 KernelArgsTy &KernelArgs, AsyncInfoTy &AsyncInfo);
44 extern int target_activate_rr(DeviceTy &Device, uint64_t MemorySize,
45 void *ReqAddr, bool isRecord, bool SaveOutput);
47 extern int target_replay(ident_t *Loc, DeviceTy &Device, void *HostPtr,
48 void *DeviceMemory, int64_t DeviceMemorySize,
49 void **TgtArgs, ptrdiff_t *TgtOffsets, int32_t NumArgs,
50 int32_t NumTeams, int32_t ThreadLimit,
51 uint64_t LoopTripCount, AsyncInfoTy &AsyncInfo);
53 extern void handleTargetOutcome(bool Success, ident_t *Loc);
54 extern bool checkDeviceAndCtors(int64_t &DeviceID, ident_t *Loc);
55 extern void *targetAllocExplicit(size_t Size, int DeviceNum, int Kind,
56 const char *Name);
57 extern void targetFreeExplicit(void *DevicePtr, int DeviceNum, int Kind,
58 const char *Name);
59 extern void *targetLockExplicit(void *HostPtr, size_t Size, int DeviceNum,
60 const char *Name);
61 extern void targetUnlockExplicit(void *HostPtr, int DeviceNum,
62 const char *Name);
64 // This structure stores information of a mapped memory region.
65 struct MapComponentInfoTy {
66 void *Base;
67 void *Begin;
68 int64_t Size;
69 int64_t Type;
70 void *Name;
71 MapComponentInfoTy() = default;
72 MapComponentInfoTy(void *Base, void *Begin, int64_t Size, int64_t Type,
73 void *Name)
74 : Base(Base), Begin(Begin), Size(Size), Type(Type), Name(Name) {}
77 // This structure stores all components of a user-defined mapper. The number of
78 // components are dynamically decided, so we utilize C++ STL vector
79 // implementation here.
80 struct MapperComponentsTy {
81 llvm::SmallVector<MapComponentInfoTy> Components;
82 int32_t size() { return Components.size(); }
85 // The mapper function pointer type. It follows the signature below:
86 // void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
87 // void *base, void *begin,
88 // size_t size, int64_t type,
89 // void * name);
90 typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t,
91 void *);
93 // Function pointer type for targetData* functions (targetDataBegin,
94 // targetDataEnd and targetDataUpdate).
95 typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **,
96 void **, int64_t *, int64_t *,
97 map_var_info_t *, void **, AsyncInfoTy &,
98 bool);
100 // Implemented in libomp, they are called from within __tgt_* functions.
101 #ifdef __cplusplus
102 extern "C" {
103 #endif
105 * The ident structure that describes a source location.
106 * The struct is identical to the one in the kmp.h file.
107 * We maintain the same data structure for compatibility.
109 typedef int kmp_int32;
110 typedef int64_t kmp_int64;
111 typedef intptr_t kmp_intptr_t;
113 typedef void *omp_depend_t;
114 struct kmp_task;
115 typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, struct kmp_task *);
116 typedef struct kmp_task {
117 void *shareds;
118 kmp_routine_entry_t routine;
119 kmp_int32 part_id;
120 } kmp_task_t;
122 typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */
123 /* Compiler flags */ /* Total compiler flags must be 16 bits */
124 unsigned tiedness : 1; /* task is either tied (1) or untied (0) */
125 unsigned final : 1; /* task is final(1) so execute immediately */
126 unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
127 code path */
128 unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to
129 invoke destructors from the runtime */
130 unsigned proxy : 1; /* task is a proxy task (it will be executed outside the
131 context of the RTL) */
132 unsigned priority_specified : 1; /* set if the compiler provides priority
133 setting for the task */
134 unsigned detachable : 1; /* 1 == can detach */
135 unsigned hidden_helper : 1; /* 1 == hidden helper task */
136 unsigned reserved : 8; /* reserved for compiler use */
138 /* Library flags */ /* Total library flags must be 16 bits */
139 unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */
140 unsigned task_serial : 1; // task is executed immediately (1) or deferred (0)
141 unsigned tasking_ser : 1; // all tasks in team are either executed immediately
142 // (1) or may be deferred (0)
143 unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel
144 // (0) [>= 2 threads]
145 /* If either team_serial or tasking_ser is set, task team may be NULL */
146 /* Task State Flags: */
147 unsigned started : 1; /* 1==started, 0==not started */
148 unsigned executing : 1; /* 1==executing, 0==not executing */
149 unsigned complete : 1; /* 1==complete, 0==not complete */
150 unsigned freed : 1; /* 1==freed, 0==allocated */
151 unsigned native : 1; /* 1==gcc-compiled task, 0==intel */
152 unsigned reserved31 : 7; /* reserved for library use */
153 } kmp_tasking_flags_t;
155 // Compiler sends us this info:
156 typedef struct kmp_depend_info {
157 kmp_intptr_t base_addr;
158 size_t len;
159 struct {
160 bool in : 1;
161 bool out : 1;
162 bool mtx : 1;
163 } flags;
164 } kmp_depend_info_t;
165 // functions that extract info from libomp; keep in sync
166 int omp_get_default_device(void) __attribute__((weak));
167 int32_t __kmpc_global_thread_num(void *) __attribute__((weak));
168 int __kmpc_get_target_offload(void) __attribute__((weak));
169 void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps,
170 kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias,
171 kmp_depend_info_t *noalias_dep_list)
172 __attribute__((weak));
173 void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid)
174 __attribute__((weak));
175 bool __kmpc_omp_has_task_team(kmp_int32 gtid) __attribute__((weak));
176 kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
177 kmp_int32 flags, size_t sizeof_kmp_task_t,
178 size_t sizeof_shareds,
179 kmp_routine_entry_t task_entry)
180 __attribute__((weak));
182 kmp_task_t *
183 __kmpc_omp_target_task_alloc(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags,
184 size_t sizeof_kmp_task_t, size_t sizeof_shareds,
185 kmp_routine_entry_t task_entry,
186 kmp_int64 device_id) __attribute__((weak));
188 kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
189 kmp_task_t *new_task, kmp_int32 ndeps,
190 kmp_depend_info_t *dep_list,
191 kmp_int32 ndeps_noalias,
192 kmp_depend_info_t *noalias_dep_list)
193 __attribute__((weak));
196 * The argument set that is passed from asynchronous memory copy to block
197 * version of memory copy invoked in helper task
199 struct TargetMemcpyArgsTy {
201 * Common attribuutes
203 void *Dst;
204 const void *Src;
205 int DstDevice;
206 int SrcDevice;
209 * The flag that denotes single dimensional or rectangle dimensional copy
211 bool IsRectMemcpy;
214 * Arguments for single dimensional copy
216 size_t Length;
217 size_t DstOffset;
218 size_t SrcOffset;
221 * Arguments for rectangle dimensional copy
223 size_t ElementSize;
224 int NumDims;
225 const size_t *Volume;
226 const size_t *DstOffsets;
227 const size_t *SrcOffsets;
228 const size_t *DstDimensions;
229 const size_t *SrcDimensions;
232 * Constructor for single dimensional copy
234 TargetMemcpyArgsTy(void *Dst, const void *Src, size_t Length,
235 size_t DstOffset, size_t SrcOffset, int DstDevice,
236 int SrcDevice)
237 : Dst(Dst), Src(Src), DstDevice(DstDevice), SrcDevice(SrcDevice),
238 IsRectMemcpy(false), Length(Length), DstOffset(DstOffset),
239 SrcOffset(SrcOffset), ElementSize(0), NumDims(0), Volume(0),
240 DstOffsets(0), SrcOffsets(0), DstDimensions(0), SrcDimensions(0){};
243 * Constructor for rectangle dimensional copy
245 TargetMemcpyArgsTy(void *Dst, const void *Src, size_t ElementSize,
246 int NumDims, const size_t *Volume,
247 const size_t *DstOffsets, const size_t *SrcOffsets,
248 const size_t *DstDimensions, const size_t *SrcDimensions,
249 int DstDevice, int SrcDevice)
250 : Dst(Dst), Src(Src), DstDevice(DstDevice), SrcDevice(SrcDevice),
251 IsRectMemcpy(true), Length(0), DstOffset(0), SrcOffset(0),
252 ElementSize(ElementSize), NumDims(NumDims), Volume(Volume),
253 DstOffsets(DstOffsets), SrcOffsets(SrcOffsets),
254 DstDimensions(DstDimensions), SrcDimensions(SrcDimensions){};
257 struct TargetMemsetArgsTy {
258 // Common attributes of a memset operation
259 void *Ptr;
260 int C;
261 size_t N;
262 int DeviceNum;
264 // no constructors defined, because this is a PoD
267 // Invalid GTID as defined by libomp; keep in sync
268 #define KMP_GTID_DNE (-2)
269 #ifdef __cplusplus
271 #endif
273 #define TARGET_NAME Libomptarget
274 #ifndef DEBUG_PREFIX
275 #define DEBUG_PREFIX GETNAME(TARGET_NAME)
276 #endif
278 ////////////////////////////////////////////////////////////////////////////////
279 /// dump a table of all the host-target pointer pairs on failure
280 static inline void dumpTargetPointerMappings(const ident_t *Loc,
281 DeviceTy &Device) {
282 DeviceTy::HDTTMapAccessorTy HDTTMap =
283 Device.HostDataToTargetMap.getExclusiveAccessor();
284 if (HDTTMap->empty())
285 return;
287 SourceInfo Kernel(Loc);
288 INFO(OMP_INFOTYPE_ALL, Device.DeviceID,
289 "OpenMP Host-Device pointer mappings after block at %s:%d:%d:\n",
290 Kernel.getFilename(), Kernel.getLine(), Kernel.getColumn());
291 INFO(OMP_INFOTYPE_ALL, Device.DeviceID, "%-18s %-18s %s %s %s %s\n",
292 "Host Ptr", "Target Ptr", "Size (B)", "DynRefCount", "HoldRefCount",
293 "Declaration");
294 for (const auto &It : *HDTTMap) {
295 HostDataToTargetTy &HDTT = *It.HDTT;
296 SourceInfo Info(HDTT.HstPtrName);
297 INFO(OMP_INFOTYPE_ALL, Device.DeviceID,
298 DPxMOD " " DPxMOD " %-8" PRIuPTR " %-11s %-12s %s at %s:%d:%d\n",
299 DPxPTR(HDTT.HstPtrBegin), DPxPTR(HDTT.TgtPtrBegin),
300 HDTT.HstPtrEnd - HDTT.HstPtrBegin, HDTT.dynRefCountToStr().c_str(),
301 HDTT.holdRefCountToStr().c_str(), Info.getName(), Info.getFilename(),
302 Info.getLine(), Info.getColumn());
306 ////////////////////////////////////////////////////////////////////////////////
307 /// Print out the names and properties of the arguments to each kernel
308 static inline void
309 printKernelArguments(const ident_t *Loc, const int64_t DeviceId,
310 const int32_t ArgNum, const int64_t *ArgSizes,
311 const int64_t *ArgTypes, const map_var_info_t *ArgNames,
312 const char *RegionType) {
313 SourceInfo Info(Loc);
314 INFO(OMP_INFOTYPE_ALL, DeviceId, "%s at %s:%d:%d with %d arguments:\n",
315 RegionType, Info.getFilename(), Info.getLine(), Info.getColumn(),
316 ArgNum);
318 for (int32_t I = 0; I < ArgNum; ++I) {
319 const map_var_info_t VarName = (ArgNames) ? ArgNames[I] : nullptr;
320 const char *Type = nullptr;
321 const char *Implicit =
322 (ArgTypes[I] & OMP_TGT_MAPTYPE_IMPLICIT) ? "(implicit)" : "";
323 if (ArgTypes[I] & OMP_TGT_MAPTYPE_TO && ArgTypes[I] & OMP_TGT_MAPTYPE_FROM)
324 Type = "tofrom";
325 else if (ArgTypes[I] & OMP_TGT_MAPTYPE_TO)
326 Type = "to";
327 else if (ArgTypes[I] & OMP_TGT_MAPTYPE_FROM)
328 Type = "from";
329 else if (ArgTypes[I] & OMP_TGT_MAPTYPE_PRIVATE)
330 Type = "private";
331 else if (ArgTypes[I] & OMP_TGT_MAPTYPE_LITERAL)
332 Type = "firstprivate";
333 else if (ArgSizes[I] != 0)
334 Type = "alloc";
335 else
336 Type = "use_address";
338 INFO(OMP_INFOTYPE_ALL, DeviceId, "%s(%s)[%" PRId64 "] %s\n", Type,
339 getNameFromMapping(VarName).c_str(), ArgSizes[I], Implicit);
343 // Wrapper for task stored async info objects.
344 class TaskAsyncInfoWrapperTy {
345 const int ExecThreadID = KMP_GTID_DNE;
346 AsyncInfoTy LocalAsyncInfo;
347 AsyncInfoTy *AsyncInfo = &LocalAsyncInfo;
348 void **TaskAsyncInfoPtr = nullptr;
350 public:
351 TaskAsyncInfoWrapperTy(DeviceTy &Device)
352 : ExecThreadID(__kmpc_global_thread_num(NULL)), LocalAsyncInfo(Device) {
353 // If we failed to acquired the current global thread id, we cannot
354 // re-enqueue the current task. Thus we should use the local blocking async
355 // info.
356 if (ExecThreadID == KMP_GTID_DNE)
357 return;
359 // Only tasks with an assigned task team can be re-enqueue and thus can
360 // use the non-blocking synchronization scheme. Thus we should use the local
361 // blocking async info, if we donĀ“t have one.
362 if (!__kmpc_omp_has_task_team(ExecThreadID))
363 return;
365 // Acquire a pointer to the AsyncInfo stored inside the current task being
366 // executed.
367 TaskAsyncInfoPtr = __kmpc_omp_get_target_async_handle_ptr(ExecThreadID);
369 // If we cannot acquire such pointer, fallback to using the local blocking
370 // async info.
371 if (!TaskAsyncInfoPtr)
372 return;
374 // When creating a new task async info, the task handle must always be
375 // invalid. We must never overwrite any task async handle and there should
376 // never be any valid handle store inside the task at this point.
377 assert((*TaskAsyncInfoPtr) == nullptr &&
378 "Task async handle is not empty when dispatching new device "
379 "operations. The handle was not cleared properly or "
380 "__tgt_target_nowait_query should have been called!");
382 // If no valid async handle is present, a new AsyncInfo will be allocated
383 // and stored in the current task.
384 AsyncInfo = new AsyncInfoTy(Device, AsyncInfoTy::SyncTy::NON_BLOCKING);
385 *TaskAsyncInfoPtr = (void *)AsyncInfo;
388 ~TaskAsyncInfoWrapperTy() {
389 // Local async info destruction is automatically handled by ~AsyncInfoTy.
390 if (AsyncInfo == &LocalAsyncInfo)
391 return;
393 // If the are device operations still pending, return immediately without
394 // deallocating the handle.
395 if (!AsyncInfo->isDone())
396 return;
398 // Delete the handle and unset it from the OpenMP task data.
399 delete AsyncInfo;
400 *TaskAsyncInfoPtr = nullptr;
403 operator AsyncInfoTy &() { return *AsyncInfo; }
406 // Implement exponential backoff counting.
407 // Linearly increments until given maximum, exponentially decrements based on
408 // given backoff factor.
409 class ExponentialBackoff {
410 int64_t Count = 0;
411 const int64_t MaxCount = 0;
412 const int64_t CountThreshold = 0;
413 const float BackoffFactor = 0.0f;
415 public:
416 ExponentialBackoff(int64_t MaxCount, int64_t CountThreshold,
417 float BackoffFactor)
418 : MaxCount(MaxCount), CountThreshold(CountThreshold),
419 BackoffFactor(BackoffFactor) {
420 assert(MaxCount >= 0 &&
421 "ExponentialBackoff: maximum count value should be non-negative");
422 assert(CountThreshold >= 0 &&
423 "ExponentialBackoff: count threshold value should be non-negative");
424 assert(BackoffFactor >= 0 && BackoffFactor < 1 &&
425 "ExponentialBackoff: backoff factor should be in [0, 1) interval");
428 void increment() { Count = std::min(Count + 1, MaxCount); }
430 void decrement() { Count *= BackoffFactor; }
432 bool isAboveThreshold() const { return Count > CountThreshold; }
435 #include "llvm/Support/TimeProfiler.h"
436 #define TIMESCOPE() llvm::TimeTraceScope TimeScope(__FUNCTION__)
437 #define TIMESCOPE_WITH_IDENT(IDENT) \
438 SourceInfo SI(IDENT); \
439 llvm::TimeTraceScope TimeScope(__FUNCTION__, SI.getProfileLocation())
440 #define TIMESCOPE_WITH_NAME_AND_IDENT(NAME, IDENT) \
441 SourceInfo SI(IDENT); \
442 llvm::TimeTraceScope TimeScope(NAME, SI.getProfileLocation())
443 #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT) \
444 SourceInfo SI(IDENT); \
445 std::string ProfileLocation = SI.getProfileLocation(); \
446 std::string RTM = RegionTypeMsg; \
447 llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM)
448 #else
449 #define TIMESCOPE()
450 #define TIMESCOPE_WITH_IDENT(IDENT)
451 #define TIMESCOPE_WITH_NAME_AND_IDENT(NAME, IDENT)
452 #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT)
454 #endif