1 //===-- OpenMP/Mapping.h - OpenMP/OpenACC pointer mapping -------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Declarations for managing host-to-device pointer mappings.
11 //===----------------------------------------------------------------------===//
13 #ifndef OMPTARGET_OPENMP_MAPPING_H
14 #define OMPTARGET_OPENMP_MAPPING_H
16 #include "ExclusiveAccess.h"
17 #include "Shared/EnvironmentVar.h"
18 #include "omptarget.h"
24 #include "llvm/ADT/SmallSet.h"
29 using map_var_info_t
= void *;
34 BoolEnvar ForceAtomic
= BoolEnvar("LIBOMPTARGET_MAP_FORCE_ATOMIC", true);
35 UseEventsForAtomicTransfers
= ForceAtomic
;
39 static const MappingConfig
&get() {
40 static MappingConfig MP
;
44 /// Flag to indicate if we use events to ensure the atomicity of
45 /// map clauses or not. Can be modified with an environment variable.
46 bool UseEventsForAtomicTransfers
= true;
49 /// Information about shadow pointers.
50 struct ShadowPtrInfoTy
{
51 void **HstPtrAddr
= nullptr;
52 void *HstPtrVal
= nullptr;
53 void **TgtPtrAddr
= nullptr;
54 void *TgtPtrVal
= nullptr;
56 bool operator==(const ShadowPtrInfoTy
&Other
) const {
57 return HstPtrAddr
== Other
.HstPtrAddr
;
61 inline bool operator<(const ShadowPtrInfoTy
&lhs
, const ShadowPtrInfoTy
&rhs
) {
62 return lhs
.HstPtrAddr
< rhs
.HstPtrAddr
;
65 /// Map between host data and target data.
66 struct HostDataToTargetTy
{
67 const uintptr_t HstPtrBase
; // host info.
68 const uintptr_t HstPtrBegin
;
69 const uintptr_t HstPtrEnd
; // non-inclusive.
70 const map_var_info_t HstPtrName
; // Optional source name of mapped variable.
72 const uintptr_t TgtAllocBegin
; // allocated target memory
73 const uintptr_t TgtPtrBegin
; // mapped target memory = TgtAllocBegin + padding
76 static const uint64_t INFRefCount
= ~(uint64_t)0;
77 static std::string
refCountToStr(uint64_t RefCount
) {
78 return RefCount
== INFRefCount
? "INF" : std::to_string(RefCount
);
82 StatesTy(uint64_t DRC
, uint64_t HRC
)
83 : DynRefCount(DRC
), HoldRefCount(HRC
) {}
84 /// The dynamic reference count is the standard reference count as of OpenMP
85 /// 4.5. The hold reference count is an OpenMP extension for the sake of
88 /// The 'ompx_hold' map type modifier is permitted only on "omp target" and
89 /// "omp target data", and "delete" is permitted only on "omp target exit
90 /// data" and associated runtime library routines. As a result, we really
91 /// need to implement "reset" functionality only for the dynamic reference
92 /// counter. Likewise, only the dynamic reference count can be infinite
93 /// because, for example, omp_target_associate_ptr and "omp declare target
94 /// link" operate only on it. Nevertheless, it's actually easier to follow
95 /// the code (and requires less assertions for special cases) when we just
96 /// implement these features generally across both reference counters here.
97 /// Thus, it's the users of this class that impose those restrictions.
100 uint64_t HoldRefCount
;
102 /// A map of shadow pointers associated with this entry, the keys are host
103 /// pointer addresses to identify stale entries.
104 llvm::SmallSet
<ShadowPtrInfoTy
, 2> ShadowPtrInfos
;
106 /// Pointer to the event corresponding to the data update of this map.
107 /// Note: At present this event is created when the first data transfer from
108 /// host to device is issued, and only being used for H2D. It is not used
109 /// for data transfer in another direction (device to host). It is still
110 /// unclear whether we need it for D2H. If in the future we need similar
111 /// mechanism for D2H, and if the event cannot be shared between them, Event
112 /// should be written as <tt>void *Event[2]</tt>.
113 void *Event
= nullptr;
115 /// Number of threads currently holding a reference to the entry at a
116 /// targetDataEnd. This is used to ensure that only the last thread that
117 /// references this entry will actually delete it.
118 int32_t DataEndThreadCount
= 0;
120 // When HostDataToTargetTy is used by std::set, std::set::iterator is const
121 // use unique_ptr to make States mutable.
122 const std::unique_ptr
<StatesTy
> States
;
125 HostDataToTargetTy(uintptr_t BP
, uintptr_t B
, uintptr_t E
,
126 uintptr_t TgtAllocBegin
, uintptr_t TgtPtrBegin
,
127 bool UseHoldRefCount
, map_var_info_t Name
= nullptr,
129 : HstPtrBase(BP
), HstPtrBegin(B
), HstPtrEnd(E
), HstPtrName(Name
),
130 TgtAllocBegin(TgtAllocBegin
), TgtPtrBegin(TgtPtrBegin
),
131 States(std::make_unique
<StatesTy
>(UseHoldRefCount
? 0
132 : IsINF
? INFRefCount
135 : IsINF
? INFRefCount
138 /// Get the total reference count. This is smarter than just getDynRefCount()
139 /// + getHoldRefCount() because it handles the case where at least one is
140 /// infinity and the other is non-zero.
141 uint64_t getTotalRefCount() const {
142 if (States
->DynRefCount
== INFRefCount
||
143 States
->HoldRefCount
== INFRefCount
)
145 return States
->DynRefCount
+ States
->HoldRefCount
;
148 /// Get the dynamic reference count.
149 uint64_t getDynRefCount() const { return States
->DynRefCount
; }
151 /// Get the hold reference count.
152 uint64_t getHoldRefCount() const { return States
->HoldRefCount
; }
154 /// Get the event bound to this data map.
155 void *getEvent() const { return States
->Event
; }
157 /// Add a new event, if necessary.
158 /// Returns OFFLOAD_FAIL if something went wrong, OFFLOAD_SUCCESS otherwise.
159 int addEventIfNecessary(DeviceTy
&Device
, AsyncInfoTy
&AsyncInfo
) const;
161 /// Functions that manages the number of threads referencing the entry in a
163 void incDataEndThreadCount() { ++States
->DataEndThreadCount
; }
165 [[nodiscard
]] int32_t decDataEndThreadCount() {
166 return --States
->DataEndThreadCount
;
169 [[nodiscard
]] int32_t getDataEndThreadCount() const {
170 return States
->DataEndThreadCount
;
173 /// Set the event bound to this data map.
174 void setEvent(void *Event
) const { States
->Event
= Event
; }
176 /// Reset the specified reference count unless it's infinity. Reset to 1
177 /// (even if currently 0) so it can be followed by a decrement.
178 void resetRefCount(bool UseHoldRefCount
) const {
179 uint64_t &ThisRefCount
=
180 UseHoldRefCount
? States
->HoldRefCount
: States
->DynRefCount
;
181 if (ThisRefCount
!= INFRefCount
)
185 /// Increment the specified reference count unless it's infinity.
186 void incRefCount(bool UseHoldRefCount
) const {
187 uint64_t &ThisRefCount
=
188 UseHoldRefCount
? States
->HoldRefCount
: States
->DynRefCount
;
189 if (ThisRefCount
!= INFRefCount
) {
191 assert(ThisRefCount
< INFRefCount
&& "refcount overflow");
195 /// Decrement the specified reference count unless it's infinity or zero, and
196 /// return the total reference count.
197 uint64_t decRefCount(bool UseHoldRefCount
) const {
198 uint64_t &ThisRefCount
=
199 UseHoldRefCount
? States
->HoldRefCount
: States
->DynRefCount
;
200 uint64_t OtherRefCount
=
201 UseHoldRefCount
? States
->DynRefCount
: States
->HoldRefCount
;
203 if (ThisRefCount
!= INFRefCount
) {
204 if (ThisRefCount
> 0)
207 assert(OtherRefCount
>= 0 && "total refcount underflow");
209 return getTotalRefCount();
212 /// Is the dynamic (and thus the total) reference count infinite?
213 bool isDynRefCountInf() const { return States
->DynRefCount
== INFRefCount
; }
215 /// Convert the dynamic reference count to a debug string.
216 std::string
dynRefCountToStr() const {
217 return refCountToStr(States
->DynRefCount
);
220 /// Convert the hold reference count to a debug string.
221 std::string
holdRefCountToStr() const {
222 return refCountToStr(States
->HoldRefCount
);
225 /// Should one decrement of the specified reference count (after resetting it
226 /// if \c AfterReset) remove this mapping?
227 bool decShouldRemove(bool UseHoldRefCount
, bool AfterReset
= false) const {
228 uint64_t ThisRefCount
=
229 UseHoldRefCount
? States
->HoldRefCount
: States
->DynRefCount
;
230 uint64_t OtherRefCount
=
231 UseHoldRefCount
? States
->DynRefCount
: States
->HoldRefCount
;
232 if (OtherRefCount
> 0)
235 return ThisRefCount
!= INFRefCount
;
236 return ThisRefCount
== 1;
239 /// Add the shadow pointer info \p ShadowPtrInfo to this entry but only if the
240 /// the target ptr value was not already present in the existing set of shadow
241 /// pointers. Return true if something was added.
242 bool addShadowPointer(const ShadowPtrInfoTy
&ShadowPtrInfo
) const {
243 auto Pair
= States
->ShadowPtrInfos
.insert(ShadowPtrInfo
);
246 // Check for a stale entry, if found, replace the old one.
247 if ((*Pair
.first
).TgtPtrVal
== ShadowPtrInfo
.TgtPtrVal
)
249 States
->ShadowPtrInfos
.erase(ShadowPtrInfo
);
250 return addShadowPointer(ShadowPtrInfo
);
253 /// Apply \p CB to all shadow pointers of this entry. Returns OFFLOAD_FAIL if
254 /// \p CB returned OFFLOAD_FAIL for any of them, otherwise this returns
255 /// OFFLOAD_SUCCESS. The entry is locked for this operation.
256 template <typename CBTy
> int foreachShadowPointerInfo(CBTy CB
) const {
257 for (auto &It
: States
->ShadowPtrInfos
)
258 if (CB(const_cast<ShadowPtrInfoTy
&>(It
)) == OFFLOAD_FAIL
)
260 return OFFLOAD_SUCCESS
;
263 /// Lock this entry for exclusive access. Ensure to get exclusive access to
265 void lock() const { Mtx
.lock(); }
267 /// Unlock this entry to allow other threads inspecting it.
268 void unlock() const { Mtx
.unlock(); }
271 // Mutex that needs to be held before the entry is inspected or modified. The
272 // HDTTMap mutex needs to be held before trying to lock any HDTT Entry.
273 mutable std::mutex Mtx
;
276 /// Wrapper around the HostDataToTargetTy to be used in the HDTT map. In
277 /// addition to the HDTT pointer we store the key value explicitly. This
278 /// allows the set to inspect (sort/search/...) this entry without an additional
279 /// load of HDTT. HDTT is a pointer to allow the modification of the set without
280 /// invalidating HDTT entries which can now be inspected at the same time.
281 struct HostDataToTargetMapKeyTy
{
284 HostDataToTargetMapKeyTy(void *Key
) : KeyValue(uintptr_t(Key
)) {}
285 HostDataToTargetMapKeyTy(uintptr_t Key
) : KeyValue(Key
) {}
286 HostDataToTargetMapKeyTy(HostDataToTargetTy
*HDTT
)
287 : KeyValue(HDTT
->HstPtrBegin
), HDTT(HDTT
) {}
288 HostDataToTargetTy
*HDTT
;
290 inline bool operator<(const HostDataToTargetMapKeyTy
&LHS
,
291 const uintptr_t &RHS
) {
292 return LHS
.KeyValue
< RHS
;
294 inline bool operator<(const uintptr_t &LHS
,
295 const HostDataToTargetMapKeyTy
&RHS
) {
296 return LHS
< RHS
.KeyValue
;
298 inline bool operator<(const HostDataToTargetMapKeyTy
&LHS
,
299 const HostDataToTargetMapKeyTy
&RHS
) {
300 return LHS
.KeyValue
< RHS
.KeyValue
;
303 /// This struct will be returned by \p DeviceTy::getTargetPointer which provides
304 /// more data than just a target pointer. A TargetPointerResultTy that has a non
305 /// null Entry owns the entry. As long as the TargetPointerResultTy (TPR) exists
306 /// the entry is locked. To give up ownership without destroying the TPR use the
307 /// reset() function.
308 struct TargetPointerResultTy
{
310 /// If the map table entry is just created
311 unsigned IsNewEntry
: 1;
312 /// If the pointer is actually a host pointer (when unified memory enabled)
313 unsigned IsHostPointer
: 1;
314 /// If the pointer is present in the mapping table.
315 unsigned IsPresent
: 1;
316 /// Flag indicating that this was the last user of the entry and the ref
319 /// If the pointer is contained.
320 unsigned IsContained
: 1;
321 } Flags
= {0, 0, 0, 0, 0};
323 TargetPointerResultTy(const TargetPointerResultTy
&) = delete;
324 TargetPointerResultTy
&operator=(const TargetPointerResultTy
&TPR
) = delete;
325 TargetPointerResultTy() {}
327 TargetPointerResultTy(FlagTy Flags
, HostDataToTargetTy
*Entry
,
329 : Flags(Flags
), TargetPointer(TargetPointer
), Entry(Entry
) {
334 TargetPointerResultTy(TargetPointerResultTy
&&TPR
)
335 : Flags(TPR
.Flags
), TargetPointer(TPR
.TargetPointer
), Entry(TPR
.Entry
) {
339 TargetPointerResultTy
&operator=(TargetPointerResultTy
&&TPR
) {
341 std::swap(Flags
, TPR
.Flags
);
342 std::swap(Entry
, TPR
.Entry
);
343 std::swap(TargetPointer
, TPR
.TargetPointer
);
348 ~TargetPointerResultTy() {
353 bool isPresent() const { return Flags
.IsPresent
; }
355 bool isHostPointer() const { return Flags
.IsHostPointer
; }
357 bool isContained() const { return Flags
.IsContained
; }
359 /// The corresponding target pointer
360 void *TargetPointer
= nullptr;
362 HostDataToTargetTy
*getEntry() const { return Entry
; }
363 void setEntry(HostDataToTargetTy
*HDTTT
,
364 HostDataToTargetTy
*OwnedTPR
= nullptr) {
368 if (Entry
&& Entry
!= OwnedTPR
)
372 void reset() { *this = TargetPointerResultTy(); }
375 /// The corresponding map table entry which is stable.
376 HostDataToTargetTy
*Entry
= nullptr;
379 struct LookupResult
{
381 unsigned IsContained
: 1;
382 unsigned ExtendsBefore
: 1;
383 unsigned ExtendsAfter
: 1;
386 LookupResult() : Flags({0, 0, 0}), TPR() {}
388 TargetPointerResultTy TPR
;
391 // This structure stores information of a mapped memory region.
392 struct MapComponentInfoTy
{
398 MapComponentInfoTy() = default;
399 MapComponentInfoTy(void *Base
, void *Begin
, int64_t Size
, int64_t Type
,
401 : Base(Base
), Begin(Begin
), Size(Size
), Type(Type
), Name(Name
) {}
404 // This structure stores all components of a user-defined mapper. The number of
405 // components are dynamically decided, so we utilize C++ STL vector
406 // implementation here.
407 struct MapperComponentsTy
{
408 llvm::SmallVector
<MapComponentInfoTy
> Components
;
409 int32_t size() { return Components
.size(); }
412 // The mapper function pointer type. It follows the signature below:
413 // void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
414 // void *base, void *begin,
415 // size_t size, int64_t type,
417 typedef void (*MapperFuncPtrTy
)(void *, void *, void *, int64_t, int64_t,
420 // Function pointer type for targetData* functions (targetDataBegin,
421 // targetDataEnd and targetDataUpdate).
422 typedef int (*TargetDataFuncPtrTy
)(ident_t
*, DeviceTy
&, int32_t, void **,
423 void **, int64_t *, int64_t *,
424 map_var_info_t
*, void **, AsyncInfoTy
&,
427 void dumpTargetPointerMappings(const ident_t
*Loc
, DeviceTy
&Device
,
428 bool toStdOut
= false);
430 int targetDataBegin(ident_t
*Loc
, DeviceTy
&Device
, int32_t ArgNum
,
431 void **ArgsBase
, void **Args
, int64_t *ArgSizes
,
432 int64_t *ArgTypes
, map_var_info_t
*ArgNames
,
433 void **ArgMappers
, AsyncInfoTy
&AsyncInfo
,
434 bool FromMapper
= false);
436 int targetDataEnd(ident_t
*Loc
, DeviceTy
&Device
, int32_t ArgNum
,
437 void **ArgBases
, void **Args
, int64_t *ArgSizes
,
438 int64_t *ArgTypes
, map_var_info_t
*ArgNames
,
439 void **ArgMappers
, AsyncInfoTy
&AsyncInfo
,
440 bool FromMapper
= false);
442 int targetDataUpdate(ident_t
*Loc
, DeviceTy
&Device
, int32_t ArgNum
,
443 void **ArgsBase
, void **Args
, int64_t *ArgSizes
,
444 int64_t *ArgTypes
, map_var_info_t
*ArgNames
,
445 void **ArgMappers
, AsyncInfoTy
&AsyncInfo
,
446 bool FromMapper
= false);
448 struct MappingInfoTy
{
449 MappingInfoTy(DeviceTy
&Device
) : Device(Device
) {}
451 /// Host data to device map type with a wrapper key indirection that allows
452 /// concurrent modification of the entries without invalidating the underlying
454 using HostDataToTargetListTy
=
455 std::set
<HostDataToTargetMapKeyTy
, std::less
<>>;
457 /// The HDTTMap is a protected object that can only be accessed by one thread
459 ProtectedObj
<HostDataToTargetListTy
> HostDataToTargetMap
;
461 /// The type used to access the HDTT map.
462 using HDTTMapAccessorTy
= decltype(HostDataToTargetMap
)::AccessorTy
;
464 /// Lookup the mapping of \p HstPtrBegin in \p HDTTMap. The accessor ensures
465 /// exclusive access to the HDTT map.
466 LookupResult
lookupMapping(HDTTMapAccessorTy
&HDTTMap
, void *HstPtrBegin
,
468 HostDataToTargetTy
*OwnedTPR
= nullptr);
470 /// Get the target pointer based on host pointer begin and base. If the
471 /// mapping already exists, the target pointer will be returned directly. In
472 /// addition, if required, the memory region pointed by \p HstPtrBegin of size
473 /// \p Size will also be transferred to the device. If the mapping doesn't
474 /// exist, and if unified shared memory is not enabled, a new mapping will be
475 /// created and the data will also be transferred accordingly. nullptr will be
476 /// returned because of any of following reasons:
477 /// - Data allocation failed;
478 /// - The user tried to do an illegal mapping;
479 /// - Data transfer issue fails.
480 TargetPointerResultTy
getTargetPointer(
481 HDTTMapAccessorTy
&HDTTMap
, void *HstPtrBegin
, void *HstPtrBase
,
482 int64_t TgtPadding
, int64_t Size
, map_var_info_t HstPtrName
,
483 bool HasFlagTo
, bool HasFlagAlways
, bool IsImplicit
, bool UpdateRefCount
,
484 bool HasCloseModifier
, bool HasPresentModifier
, bool HasHoldModifier
,
485 AsyncInfoTy
&AsyncInfo
, HostDataToTargetTy
*OwnedTPR
= nullptr,
486 bool ReleaseHDTTMap
= true);
488 /// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor
489 /// ensures exclusive access to the HDTT map.
490 void *getTgtPtrBegin(HDTTMapAccessorTy
&HDTTMap
, void *HstPtrBegin
,
493 /// Return the target pointer begin (where the data will be moved).
494 /// Used by targetDataBegin, targetDataEnd, targetDataUpdate and target.
495 /// - \p UpdateRefCount and \p UseHoldRefCount controls which and if the entry
496 /// reference counters will be decremented.
497 /// - \p MustContain enforces that the query must not extend beyond an already
498 /// mapped entry to be valid.
499 /// - \p ForceDelete deletes the entry regardless of its reference counting
500 /// (unless it is infinite).
501 /// - \p FromDataEnd tracks the number of threads referencing the entry at
502 /// targetDataEnd for delayed deletion purpose.
503 [[nodiscard
]] TargetPointerResultTy
504 getTgtPtrBegin(void *HstPtrBegin
, int64_t Size
, bool UpdateRefCount
,
505 bool UseHoldRefCount
, bool MustContain
= false,
506 bool ForceDelete
= false, bool FromDataEnd
= false);
508 /// Remove the \p Entry from the data map. Expect the entry's total reference
509 /// count to be zero and the caller thread to be the last one using it. \p
510 /// HDTTMap ensure the caller holds exclusive access and can modify the map.
511 /// Return \c OFFLOAD_SUCCESS if the map entry existed, and return \c
512 /// OFFLOAD_FAIL if not. It is the caller's responsibility to skip calling
513 /// this function if the map entry is not expected to exist because \p
514 /// HstPtrBegin uses shared memory.
515 [[nodiscard
]] int eraseMapEntry(HDTTMapAccessorTy
&HDTTMap
,
516 HostDataToTargetTy
*Entry
, int64_t Size
);
518 /// Deallocate the \p Entry from the device memory and delete it. Return \c
519 /// OFFLOAD_SUCCESS if the deallocation operations executed successfully, and
520 /// return \c OFFLOAD_FAIL otherwise.
521 [[nodiscard
]] int deallocTgtPtrAndEntry(HostDataToTargetTy
*Entry
,
524 int associatePtr(void *HstPtrBegin
, void *TgtPtrBegin
, int64_t Size
);
525 int disassociatePtr(void *HstPtrBegin
);
527 /// Print information about the transfer from \p HstPtr to \p TgtPtr (or vice
528 /// versa if \p H2D is false). If there is an existing mapping, or if \p Entry
529 /// is set, the associated metadata will be printed as well.
530 void printCopyInfo(void *TgtPtr
, void *HstPtr
, int64_t Size
, bool H2D
,
531 HostDataToTargetTy
*Entry
,
532 MappingInfoTy::HDTTMapAccessorTy
*HDTTMapPtr
);
538 #endif // OMPTARGET_OPENMP_MAPPING_H