1 //===---------- private.h - Target independent OpenMP target RTL ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Private function declarations and helper macros for debugging output.
11 //===----------------------------------------------------------------------===//
13 #ifndef _OMPTARGET_PRIVATE_H
14 #define _OMPTARGET_PRIVATE_H
18 #include <SourceInfo.h>
19 #include <omptarget.h>
23 extern int targetDataBegin(ident_t
*Loc
, DeviceTy
&Device
, int32_t ArgNum
,
24 void **ArgsBase
, void **Args
, int64_t *ArgSizes
,
25 int64_t *ArgTypes
, map_var_info_t
*ArgNames
,
26 void **ArgMappers
, AsyncInfoTy
&AsyncInfo
,
27 bool FromMapper
= false);
29 extern int targetDataEnd(ident_t
*Loc
, DeviceTy
&Device
, int32_t ArgNum
,
30 void **ArgBases
, void **Args
, int64_t *ArgSizes
,
31 int64_t *ArgTypes
, map_var_info_t
*ArgNames
,
32 void **ArgMappers
, AsyncInfoTy
&AsyncInfo
,
33 bool FromMapper
= false);
35 extern int targetDataUpdate(ident_t
*Loc
, DeviceTy
&Device
, int32_t ArgNum
,
36 void **ArgsBase
, void **Args
, int64_t *ArgSizes
,
37 int64_t *ArgTypes
, map_var_info_t
*ArgNames
,
38 void **ArgMappers
, AsyncInfoTy
&AsyncInfo
,
39 bool FromMapper
= false);
41 extern int target(ident_t
*Loc
, DeviceTy
&Device
, void *HostPtr
,
42 KernelArgsTy
&KernelArgs
, AsyncInfoTy
&AsyncInfo
);
44 extern int target_activate_rr(DeviceTy
&Device
, uint64_t MemorySize
,
45 void *ReqAddr
, bool isRecord
, bool SaveOutput
);
47 extern int target_replay(ident_t
*Loc
, DeviceTy
&Device
, void *HostPtr
,
48 void *DeviceMemory
, int64_t DeviceMemorySize
,
49 void **TgtArgs
, ptrdiff_t *TgtOffsets
, int32_t NumArgs
,
50 int32_t NumTeams
, int32_t ThreadLimit
,
51 uint64_t LoopTripCount
, AsyncInfoTy
&AsyncInfo
);
53 extern void handleTargetOutcome(bool Success
, ident_t
*Loc
);
54 extern bool checkDeviceAndCtors(int64_t &DeviceID
, ident_t
*Loc
);
55 extern void *targetAllocExplicit(size_t Size
, int DeviceNum
, int Kind
,
57 extern void targetFreeExplicit(void *DevicePtr
, int DeviceNum
, int Kind
,
59 extern void *targetLockExplicit(void *HostPtr
, size_t Size
, int DeviceNum
,
61 extern void targetUnlockExplicit(void *HostPtr
, int DeviceNum
,
64 // This structure stores information of a mapped memory region.
65 struct MapComponentInfoTy
{
71 MapComponentInfoTy() = default;
72 MapComponentInfoTy(void *Base
, void *Begin
, int64_t Size
, int64_t Type
,
74 : Base(Base
), Begin(Begin
), Size(Size
), Type(Type
), Name(Name
) {}
77 // This structure stores all components of a user-defined mapper. The number of
78 // components are dynamically decided, so we utilize C++ STL vector
79 // implementation here.
80 struct MapperComponentsTy
{
81 llvm::SmallVector
<MapComponentInfoTy
> Components
;
82 int32_t size() { return Components
.size(); }
85 // The mapper function pointer type. It follows the signature below:
86 // void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
87 // void *base, void *begin,
88 // size_t size, int64_t type,
90 typedef void (*MapperFuncPtrTy
)(void *, void *, void *, int64_t, int64_t,
93 // Function pointer type for targetData* functions (targetDataBegin,
94 // targetDataEnd and targetDataUpdate).
95 typedef int (*TargetDataFuncPtrTy
)(ident_t
*, DeviceTy
&, int32_t, void **,
96 void **, int64_t *, int64_t *,
97 map_var_info_t
*, void **, AsyncInfoTy
&,
100 // Implemented in libomp, they are called from within __tgt_* functions.
105 * The ident structure that describes a source location.
106 * The struct is identical to the one in the kmp.h file.
107 * We maintain the same data structure for compatibility.
109 typedef int kmp_int32
;
110 typedef int64_t kmp_int64
;
111 typedef intptr_t kmp_intptr_t
;
113 typedef void *omp_depend_t
;
115 typedef kmp_int32 (*kmp_routine_entry_t
)(kmp_int32
, struct kmp_task
*);
116 typedef struct kmp_task
{
118 kmp_routine_entry_t routine
;
122 typedef struct kmp_tasking_flags
{ /* Total struct must be exactly 32 bits */
123 /* Compiler flags */ /* Total compiler flags must be 16 bits */
124 unsigned tiedness
: 1; /* task is either tied (1) or untied (0) */
125 unsigned final
: 1; /* task is final(1) so execute immediately */
126 unsigned merged_if0
: 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0
128 unsigned destructors_thunk
: 1; /* set if the compiler creates a thunk to
129 invoke destructors from the runtime */
130 unsigned proxy
: 1; /* task is a proxy task (it will be executed outside the
131 context of the RTL) */
132 unsigned priority_specified
: 1; /* set if the compiler provides priority
133 setting for the task */
134 unsigned detachable
: 1; /* 1 == can detach */
135 unsigned hidden_helper
: 1; /* 1 == hidden helper task */
136 unsigned reserved
: 8; /* reserved for compiler use */
138 /* Library flags */ /* Total library flags must be 16 bits */
139 unsigned tasktype
: 1; /* task is either explicit(1) or implicit (0) */
140 unsigned task_serial
: 1; // task is executed immediately (1) or deferred (0)
141 unsigned tasking_ser
: 1; // all tasks in team are either executed immediately
142 // (1) or may be deferred (0)
143 unsigned team_serial
: 1; // entire team is serial (1) [1 thread] or parallel
144 // (0) [>= 2 threads]
145 /* If either team_serial or tasking_ser is set, task team may be NULL */
146 /* Task State Flags: */
147 unsigned started
: 1; /* 1==started, 0==not started */
148 unsigned executing
: 1; /* 1==executing, 0==not executing */
149 unsigned complete
: 1; /* 1==complete, 0==not complete */
150 unsigned freed
: 1; /* 1==freed, 0==allocated */
151 unsigned native
: 1; /* 1==gcc-compiled task, 0==intel */
152 unsigned reserved31
: 7; /* reserved for library use */
153 } kmp_tasking_flags_t
;
155 // Compiler sends us this info:
156 typedef struct kmp_depend_info
{
157 kmp_intptr_t base_addr
;
165 // functions that extract info from libomp; keep in sync
166 int omp_get_default_device(void) __attribute__((weak
));
167 int32_t __kmpc_global_thread_num(void *) __attribute__((weak
));
168 int __kmpc_get_target_offload(void) __attribute__((weak
));
169 void __kmpc_omp_wait_deps(ident_t
*loc_ref
, kmp_int32 gtid
, kmp_int32 ndeps
,
170 kmp_depend_info_t
*dep_list
, kmp_int32 ndeps_noalias
,
171 kmp_depend_info_t
*noalias_dep_list
)
172 __attribute__((weak
));
173 void **__kmpc_omp_get_target_async_handle_ptr(kmp_int32 gtid
)
174 __attribute__((weak
));
175 bool __kmpc_omp_has_task_team(kmp_int32 gtid
) __attribute__((weak
));
176 kmp_task_t
*__kmpc_omp_task_alloc(ident_t
*loc_ref
, kmp_int32 gtid
,
177 kmp_int32 flags
, size_t sizeof_kmp_task_t
,
178 size_t sizeof_shareds
,
179 kmp_routine_entry_t task_entry
)
180 __attribute__((weak
));
183 __kmpc_omp_target_task_alloc(ident_t
*loc_ref
, kmp_int32 gtid
, kmp_int32 flags
,
184 size_t sizeof_kmp_task_t
, size_t sizeof_shareds
,
185 kmp_routine_entry_t task_entry
,
186 kmp_int64 device_id
) __attribute__((weak
));
188 kmp_int32
__kmpc_omp_task_with_deps(ident_t
*loc_ref
, kmp_int32 gtid
,
189 kmp_task_t
*new_task
, kmp_int32 ndeps
,
190 kmp_depend_info_t
*dep_list
,
191 kmp_int32 ndeps_noalias
,
192 kmp_depend_info_t
*noalias_dep_list
)
193 __attribute__((weak
));
196 * The argument set that is passed from asynchronous memory copy to block
197 * version of memory copy invoked in helper task
199 struct TargetMemcpyArgsTy
{
209 * The flag that denotes single dimensional or rectangle dimensional copy
214 * Arguments for single dimensional copy
221 * Arguments for rectangle dimensional copy
225 const size_t *Volume
;
226 const size_t *DstOffsets
;
227 const size_t *SrcOffsets
;
228 const size_t *DstDimensions
;
229 const size_t *SrcDimensions
;
232 * Constructor for single dimensional copy
234 TargetMemcpyArgsTy(void *Dst
, const void *Src
, size_t Length
,
235 size_t DstOffset
, size_t SrcOffset
, int DstDevice
,
237 : Dst(Dst
), Src(Src
), DstDevice(DstDevice
), SrcDevice(SrcDevice
),
238 IsRectMemcpy(false), Length(Length
), DstOffset(DstOffset
),
239 SrcOffset(SrcOffset
), ElementSize(0), NumDims(0), Volume(0),
240 DstOffsets(0), SrcOffsets(0), DstDimensions(0), SrcDimensions(0){};
243 * Constructor for rectangle dimensional copy
245 TargetMemcpyArgsTy(void *Dst
, const void *Src
, size_t ElementSize
,
246 int NumDims
, const size_t *Volume
,
247 const size_t *DstOffsets
, const size_t *SrcOffsets
,
248 const size_t *DstDimensions
, const size_t *SrcDimensions
,
249 int DstDevice
, int SrcDevice
)
250 : Dst(Dst
), Src(Src
), DstDevice(DstDevice
), SrcDevice(SrcDevice
),
251 IsRectMemcpy(true), Length(0), DstOffset(0), SrcOffset(0),
252 ElementSize(ElementSize
), NumDims(NumDims
), Volume(Volume
),
253 DstOffsets(DstOffsets
), SrcOffsets(SrcOffsets
),
254 DstDimensions(DstDimensions
), SrcDimensions(SrcDimensions
){};
257 struct TargetMemsetArgsTy
{
258 // Common attributes of a memset operation
264 // no constructors defined, because this is a PoD
267 // Invalid GTID as defined by libomp; keep in sync
268 #define KMP_GTID_DNE (-2)
273 #define TARGET_NAME Libomptarget
275 #define DEBUG_PREFIX GETNAME(TARGET_NAME)
278 ////////////////////////////////////////////////////////////////////////////////
279 /// dump a table of all the host-target pointer pairs on failure
280 static inline void dumpTargetPointerMappings(const ident_t
*Loc
,
282 DeviceTy::HDTTMapAccessorTy HDTTMap
=
283 Device
.HostDataToTargetMap
.getExclusiveAccessor();
284 if (HDTTMap
->empty())
287 SourceInfo
Kernel(Loc
);
288 INFO(OMP_INFOTYPE_ALL
, Device
.DeviceID
,
289 "OpenMP Host-Device pointer mappings after block at %s:%d:%d:\n",
290 Kernel
.getFilename(), Kernel
.getLine(), Kernel
.getColumn());
291 INFO(OMP_INFOTYPE_ALL
, Device
.DeviceID
, "%-18s %-18s %s %s %s %s\n",
292 "Host Ptr", "Target Ptr", "Size (B)", "DynRefCount", "HoldRefCount",
294 for (const auto &It
: *HDTTMap
) {
295 HostDataToTargetTy
&HDTT
= *It
.HDTT
;
296 SourceInfo
Info(HDTT
.HstPtrName
);
297 INFO(OMP_INFOTYPE_ALL
, Device
.DeviceID
,
298 DPxMOD
" " DPxMOD
" %-8" PRIuPTR
" %-11s %-12s %s at %s:%d:%d\n",
299 DPxPTR(HDTT
.HstPtrBegin
), DPxPTR(HDTT
.TgtPtrBegin
),
300 HDTT
.HstPtrEnd
- HDTT
.HstPtrBegin
, HDTT
.dynRefCountToStr().c_str(),
301 HDTT
.holdRefCountToStr().c_str(), Info
.getName(), Info
.getFilename(),
302 Info
.getLine(), Info
.getColumn());
306 ////////////////////////////////////////////////////////////////////////////////
307 /// Print out the names and properties of the arguments to each kernel
309 printKernelArguments(const ident_t
*Loc
, const int64_t DeviceId
,
310 const int32_t ArgNum
, const int64_t *ArgSizes
,
311 const int64_t *ArgTypes
, const map_var_info_t
*ArgNames
,
312 const char *RegionType
) {
313 SourceInfo
Info(Loc
);
314 INFO(OMP_INFOTYPE_ALL
, DeviceId
, "%s at %s:%d:%d with %d arguments:\n",
315 RegionType
, Info
.getFilename(), Info
.getLine(), Info
.getColumn(),
318 for (int32_t I
= 0; I
< ArgNum
; ++I
) {
319 const map_var_info_t VarName
= (ArgNames
) ? ArgNames
[I
] : nullptr;
320 const char *Type
= nullptr;
321 const char *Implicit
=
322 (ArgTypes
[I
] & OMP_TGT_MAPTYPE_IMPLICIT
) ? "(implicit)" : "";
323 if (ArgTypes
[I
] & OMP_TGT_MAPTYPE_TO
&& ArgTypes
[I
] & OMP_TGT_MAPTYPE_FROM
)
325 else if (ArgTypes
[I
] & OMP_TGT_MAPTYPE_TO
)
327 else if (ArgTypes
[I
] & OMP_TGT_MAPTYPE_FROM
)
329 else if (ArgTypes
[I
] & OMP_TGT_MAPTYPE_PRIVATE
)
331 else if (ArgTypes
[I
] & OMP_TGT_MAPTYPE_LITERAL
)
332 Type
= "firstprivate";
333 else if (ArgSizes
[I
] != 0)
336 Type
= "use_address";
338 INFO(OMP_INFOTYPE_ALL
, DeviceId
, "%s(%s)[%" PRId64
"] %s\n", Type
,
339 getNameFromMapping(VarName
).c_str(), ArgSizes
[I
], Implicit
);
343 // Wrapper for task stored async info objects.
344 class TaskAsyncInfoWrapperTy
{
345 const int ExecThreadID
= KMP_GTID_DNE
;
346 AsyncInfoTy LocalAsyncInfo
;
347 AsyncInfoTy
*AsyncInfo
= &LocalAsyncInfo
;
348 void **TaskAsyncInfoPtr
= nullptr;
351 TaskAsyncInfoWrapperTy(DeviceTy
&Device
)
352 : ExecThreadID(__kmpc_global_thread_num(NULL
)), LocalAsyncInfo(Device
) {
353 // If we failed to acquired the current global thread id, we cannot
354 // re-enqueue the current task. Thus we should use the local blocking async
356 if (ExecThreadID
== KMP_GTID_DNE
)
359 // Only tasks with an assigned task team can be re-enqueue and thus can
360 // use the non-blocking synchronization scheme. Thus we should use the local
361 // blocking async info, if we donĀ“t have one.
362 if (!__kmpc_omp_has_task_team(ExecThreadID
))
365 // Acquire a pointer to the AsyncInfo stored inside the current task being
367 TaskAsyncInfoPtr
= __kmpc_omp_get_target_async_handle_ptr(ExecThreadID
);
369 // If we cannot acquire such pointer, fallback to using the local blocking
371 if (!TaskAsyncInfoPtr
)
374 // When creating a new task async info, the task handle must always be
375 // invalid. We must never overwrite any task async handle and there should
376 // never be any valid handle store inside the task at this point.
377 assert((*TaskAsyncInfoPtr
) == nullptr &&
378 "Task async handle is not empty when dispatching new device "
379 "operations. The handle was not cleared properly or "
380 "__tgt_target_nowait_query should have been called!");
382 // If no valid async handle is present, a new AsyncInfo will be allocated
383 // and stored in the current task.
384 AsyncInfo
= new AsyncInfoTy(Device
, AsyncInfoTy::SyncTy::NON_BLOCKING
);
385 *TaskAsyncInfoPtr
= (void *)AsyncInfo
;
388 ~TaskAsyncInfoWrapperTy() {
389 // Local async info destruction is automatically handled by ~AsyncInfoTy.
390 if (AsyncInfo
== &LocalAsyncInfo
)
393 // If the are device operations still pending, return immediately without
394 // deallocating the handle.
395 if (!AsyncInfo
->isDone())
398 // Delete the handle and unset it from the OpenMP task data.
400 *TaskAsyncInfoPtr
= nullptr;
403 operator AsyncInfoTy
&() { return *AsyncInfo
; }
406 // Implement exponential backoff counting.
407 // Linearly increments until given maximum, exponentially decrements based on
408 // given backoff factor.
409 class ExponentialBackoff
{
411 const int64_t MaxCount
= 0;
412 const int64_t CountThreshold
= 0;
413 const float BackoffFactor
= 0.0f
;
416 ExponentialBackoff(int64_t MaxCount
, int64_t CountThreshold
,
418 : MaxCount(MaxCount
), CountThreshold(CountThreshold
),
419 BackoffFactor(BackoffFactor
) {
420 assert(MaxCount
>= 0 &&
421 "ExponentialBackoff: maximum count value should be non-negative");
422 assert(CountThreshold
>= 0 &&
423 "ExponentialBackoff: count threshold value should be non-negative");
424 assert(BackoffFactor
>= 0 && BackoffFactor
< 1 &&
425 "ExponentialBackoff: backoff factor should be in [0, 1) interval");
428 void increment() { Count
= std::min(Count
+ 1, MaxCount
); }
430 void decrement() { Count
*= BackoffFactor
; }
432 bool isAboveThreshold() const { return Count
> CountThreshold
; }
435 #include "llvm/Support/TimeProfiler.h"
436 #define TIMESCOPE() llvm::TimeTraceScope TimeScope(__FUNCTION__)
437 #define TIMESCOPE_WITH_IDENT(IDENT) \
438 SourceInfo SI(IDENT); \
439 llvm::TimeTraceScope TimeScope(__FUNCTION__, SI.getProfileLocation())
440 #define TIMESCOPE_WITH_NAME_AND_IDENT(NAME, IDENT) \
441 SourceInfo SI(IDENT); \
442 llvm::TimeTraceScope TimeScope(NAME, SI.getProfileLocation())
443 #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT) \
444 SourceInfo SI(IDENT); \
445 std::string ProfileLocation = SI.getProfileLocation(); \
446 std::string RTM = RegionTypeMsg; \
447 llvm::TimeTraceScope TimeScope(__FUNCTION__, ProfileLocation + RTM)
450 #define TIMESCOPE_WITH_IDENT(IDENT)
451 #define TIMESCOPE_WITH_NAME_AND_IDENT(NAME, IDENT)
452 #define TIMESCOPE_WITH_RTM_AND_IDENT(RegionTypeMsg, IDENT)