[memprof] Use a new constructor of IndexedAllocationInfo (NFC) (#116920)
[llvm-project.git] / offload / include / OpenMP / Mapping.h
blobb9f5c1658293141bcdad8d879170dae4464a19dd
1 //===-- OpenMP/Mapping.h - OpenMP/OpenACC pointer mapping -------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Declarations for managing host-to-device pointer mappings.
11 //===----------------------------------------------------------------------===//
13 #ifndef OMPTARGET_OPENMP_MAPPING_H
14 #define OMPTARGET_OPENMP_MAPPING_H
16 #include "ExclusiveAccess.h"
17 #include "Shared/EnvironmentVar.h"
18 #include "omptarget.h"
20 #include <cstdint>
21 #include <mutex>
22 #include <string>
24 #include "llvm/ADT/SmallSet.h"
26 struct DeviceTy;
27 class AsyncInfoTy;
29 using map_var_info_t = void *;
31 class MappingConfig {
33 MappingConfig() {
34 BoolEnvar ForceAtomic = BoolEnvar("LIBOMPTARGET_MAP_FORCE_ATOMIC", true);
35 UseEventsForAtomicTransfers = ForceAtomic;
38 public:
39 static const MappingConfig &get() {
40 static MappingConfig MP;
41 return MP;
44 /// Flag to indicate if we use events to ensure the atomicity of
45 /// map clauses or not. Can be modified with an environment variable.
46 bool UseEventsForAtomicTransfers = true;
49 /// Information about shadow pointers.
50 struct ShadowPtrInfoTy {
51 void **HstPtrAddr = nullptr;
52 void *HstPtrVal = nullptr;
53 void **TgtPtrAddr = nullptr;
54 void *TgtPtrVal = nullptr;
56 bool operator==(const ShadowPtrInfoTy &Other) const {
57 return HstPtrAddr == Other.HstPtrAddr;
61 inline bool operator<(const ShadowPtrInfoTy &lhs, const ShadowPtrInfoTy &rhs) {
62 return lhs.HstPtrAddr < rhs.HstPtrAddr;
65 /// Map between host data and target data.
66 struct HostDataToTargetTy {
67 const uintptr_t HstPtrBase; // host info.
68 const uintptr_t HstPtrBegin;
69 const uintptr_t HstPtrEnd; // non-inclusive.
70 const map_var_info_t HstPtrName; // Optional source name of mapped variable.
72 const uintptr_t TgtAllocBegin; // allocated target memory
73 const uintptr_t TgtPtrBegin; // mapped target memory = TgtAllocBegin + padding
75 private:
76 static const uint64_t INFRefCount = ~(uint64_t)0;
77 static std::string refCountToStr(uint64_t RefCount) {
78 return RefCount == INFRefCount ? "INF" : std::to_string(RefCount);
81 struct StatesTy {
82 StatesTy(uint64_t DRC, uint64_t HRC)
83 : DynRefCount(DRC), HoldRefCount(HRC) {}
84 /// The dynamic reference count is the standard reference count as of OpenMP
85 /// 4.5. The hold reference count is an OpenMP extension for the sake of
86 /// OpenACC support.
87 ///
88 /// The 'ompx_hold' map type modifier is permitted only on "omp target" and
89 /// "omp target data", and "delete" is permitted only on "omp target exit
90 /// data" and associated runtime library routines. As a result, we really
91 /// need to implement "reset" functionality only for the dynamic reference
92 /// counter. Likewise, only the dynamic reference count can be infinite
93 /// because, for example, omp_target_associate_ptr and "omp declare target
94 /// link" operate only on it. Nevertheless, it's actually easier to follow
95 /// the code (and requires less assertions for special cases) when we just
96 /// implement these features generally across both reference counters here.
97 /// Thus, it's the users of this class that impose those restrictions.
98 ///
99 uint64_t DynRefCount;
100 uint64_t HoldRefCount;
102 /// A map of shadow pointers associated with this entry, the keys are host
103 /// pointer addresses to identify stale entries.
104 llvm::SmallSet<ShadowPtrInfoTy, 2> ShadowPtrInfos;
106 /// Pointer to the event corresponding to the data update of this map.
107 /// Note: At present this event is created when the first data transfer from
108 /// host to device is issued, and only being used for H2D. It is not used
109 /// for data transfer in another direction (device to host). It is still
110 /// unclear whether we need it for D2H. If in the future we need similar
111 /// mechanism for D2H, and if the event cannot be shared between them, Event
112 /// should be written as <tt>void *Event[2]</tt>.
113 void *Event = nullptr;
115 /// Number of threads currently holding a reference to the entry at a
116 /// targetDataEnd. This is used to ensure that only the last thread that
117 /// references this entry will actually delete it.
118 int32_t DataEndThreadCount = 0;
120 // When HostDataToTargetTy is used by std::set, std::set::iterator is const
121 // use unique_ptr to make States mutable.
122 const std::unique_ptr<StatesTy> States;
124 public:
125 HostDataToTargetTy(uintptr_t BP, uintptr_t B, uintptr_t E,
126 uintptr_t TgtAllocBegin, uintptr_t TgtPtrBegin,
127 bool UseHoldRefCount, map_var_info_t Name = nullptr,
128 bool IsINF = false)
129 : HstPtrBase(BP), HstPtrBegin(B), HstPtrEnd(E), HstPtrName(Name),
130 TgtAllocBegin(TgtAllocBegin), TgtPtrBegin(TgtPtrBegin),
131 States(std::make_unique<StatesTy>(UseHoldRefCount ? 0
132 : IsINF ? INFRefCount
133 : 1,
134 !UseHoldRefCount ? 0
135 : IsINF ? INFRefCount
136 : 1)) {}
138 /// Get the total reference count. This is smarter than just getDynRefCount()
139 /// + getHoldRefCount() because it handles the case where at least one is
140 /// infinity and the other is non-zero.
141 uint64_t getTotalRefCount() const {
142 if (States->DynRefCount == INFRefCount ||
143 States->HoldRefCount == INFRefCount)
144 return INFRefCount;
145 return States->DynRefCount + States->HoldRefCount;
148 /// Get the dynamic reference count.
149 uint64_t getDynRefCount() const { return States->DynRefCount; }
151 /// Get the hold reference count.
152 uint64_t getHoldRefCount() const { return States->HoldRefCount; }
154 /// Get the event bound to this data map.
155 void *getEvent() const { return States->Event; }
157 /// Add a new event, if necessary.
158 /// Returns OFFLOAD_FAIL if something went wrong, OFFLOAD_SUCCESS otherwise.
159 int addEventIfNecessary(DeviceTy &Device, AsyncInfoTy &AsyncInfo) const;
161 /// Functions that manages the number of threads referencing the entry in a
162 /// targetDataEnd.
163 void incDataEndThreadCount() { ++States->DataEndThreadCount; }
165 [[nodiscard]] int32_t decDataEndThreadCount() {
166 return --States->DataEndThreadCount;
169 [[nodiscard]] int32_t getDataEndThreadCount() const {
170 return States->DataEndThreadCount;
173 /// Set the event bound to this data map.
174 void setEvent(void *Event) const { States->Event = Event; }
176 /// Reset the specified reference count unless it's infinity. Reset to 1
177 /// (even if currently 0) so it can be followed by a decrement.
178 void resetRefCount(bool UseHoldRefCount) const {
179 uint64_t &ThisRefCount =
180 UseHoldRefCount ? States->HoldRefCount : States->DynRefCount;
181 if (ThisRefCount != INFRefCount)
182 ThisRefCount = 1;
185 /// Increment the specified reference count unless it's infinity.
186 void incRefCount(bool UseHoldRefCount) const {
187 uint64_t &ThisRefCount =
188 UseHoldRefCount ? States->HoldRefCount : States->DynRefCount;
189 if (ThisRefCount != INFRefCount) {
190 ++ThisRefCount;
191 assert(ThisRefCount < INFRefCount && "refcount overflow");
195 /// Decrement the specified reference count unless it's infinity or zero, and
196 /// return the total reference count.
197 uint64_t decRefCount(bool UseHoldRefCount) const {
198 uint64_t &ThisRefCount =
199 UseHoldRefCount ? States->HoldRefCount : States->DynRefCount;
200 uint64_t OtherRefCount =
201 UseHoldRefCount ? States->DynRefCount : States->HoldRefCount;
202 (void)OtherRefCount;
203 if (ThisRefCount != INFRefCount) {
204 if (ThisRefCount > 0)
205 --ThisRefCount;
206 else
207 assert(OtherRefCount >= 0 && "total refcount underflow");
209 return getTotalRefCount();
212 /// Is the dynamic (and thus the total) reference count infinite?
213 bool isDynRefCountInf() const { return States->DynRefCount == INFRefCount; }
215 /// Convert the dynamic reference count to a debug string.
216 std::string dynRefCountToStr() const {
217 return refCountToStr(States->DynRefCount);
220 /// Convert the hold reference count to a debug string.
221 std::string holdRefCountToStr() const {
222 return refCountToStr(States->HoldRefCount);
225 /// Should one decrement of the specified reference count (after resetting it
226 /// if \c AfterReset) remove this mapping?
227 bool decShouldRemove(bool UseHoldRefCount, bool AfterReset = false) const {
228 uint64_t ThisRefCount =
229 UseHoldRefCount ? States->HoldRefCount : States->DynRefCount;
230 uint64_t OtherRefCount =
231 UseHoldRefCount ? States->DynRefCount : States->HoldRefCount;
232 if (OtherRefCount > 0)
233 return false;
234 if (AfterReset)
235 return ThisRefCount != INFRefCount;
236 return ThisRefCount == 1;
239 /// Add the shadow pointer info \p ShadowPtrInfo to this entry but only if the
240 /// the target ptr value was not already present in the existing set of shadow
241 /// pointers. Return true if something was added.
242 bool addShadowPointer(const ShadowPtrInfoTy &ShadowPtrInfo) const {
243 auto Pair = States->ShadowPtrInfos.insert(ShadowPtrInfo);
244 if (Pair.second)
245 return true;
246 // Check for a stale entry, if found, replace the old one.
247 if ((*Pair.first).TgtPtrVal == ShadowPtrInfo.TgtPtrVal)
248 return false;
249 States->ShadowPtrInfos.erase(ShadowPtrInfo);
250 return addShadowPointer(ShadowPtrInfo);
253 /// Apply \p CB to all shadow pointers of this entry. Returns OFFLOAD_FAIL if
254 /// \p CB returned OFFLOAD_FAIL for any of them, otherwise this returns
255 /// OFFLOAD_SUCCESS. The entry is locked for this operation.
256 template <typename CBTy> int foreachShadowPointerInfo(CBTy CB) const {
257 for (auto &It : States->ShadowPtrInfos)
258 if (CB(const_cast<ShadowPtrInfoTy &>(It)) == OFFLOAD_FAIL)
259 return OFFLOAD_FAIL;
260 return OFFLOAD_SUCCESS;
263 /// Lock this entry for exclusive access. Ensure to get exclusive access to
264 /// HDTTMap first!
265 void lock() const { Mtx.lock(); }
267 /// Unlock this entry to allow other threads inspecting it.
268 void unlock() const { Mtx.unlock(); }
270 private:
271 // Mutex that needs to be held before the entry is inspected or modified. The
272 // HDTTMap mutex needs to be held before trying to lock any HDTT Entry.
273 mutable std::mutex Mtx;
276 /// Wrapper around the HostDataToTargetTy to be used in the HDTT map. In
277 /// addition to the HDTT pointer we store the key value explicitly. This
278 /// allows the set to inspect (sort/search/...) this entry without an additional
279 /// load of HDTT. HDTT is a pointer to allow the modification of the set without
280 /// invalidating HDTT entries which can now be inspected at the same time.
281 struct HostDataToTargetMapKeyTy {
282 uintptr_t KeyValue;
284 HostDataToTargetMapKeyTy(void *Key) : KeyValue(uintptr_t(Key)) {}
285 HostDataToTargetMapKeyTy(uintptr_t Key) : KeyValue(Key) {}
286 HostDataToTargetMapKeyTy(HostDataToTargetTy *HDTT)
287 : KeyValue(HDTT->HstPtrBegin), HDTT(HDTT) {}
288 HostDataToTargetTy *HDTT;
290 inline bool operator<(const HostDataToTargetMapKeyTy &LHS,
291 const uintptr_t &RHS) {
292 return LHS.KeyValue < RHS;
294 inline bool operator<(const uintptr_t &LHS,
295 const HostDataToTargetMapKeyTy &RHS) {
296 return LHS < RHS.KeyValue;
298 inline bool operator<(const HostDataToTargetMapKeyTy &LHS,
299 const HostDataToTargetMapKeyTy &RHS) {
300 return LHS.KeyValue < RHS.KeyValue;
303 /// This struct will be returned by \p DeviceTy::getTargetPointer which provides
304 /// more data than just a target pointer. A TargetPointerResultTy that has a non
305 /// null Entry owns the entry. As long as the TargetPointerResultTy (TPR) exists
306 /// the entry is locked. To give up ownership without destroying the TPR use the
307 /// reset() function.
308 struct TargetPointerResultTy {
309 struct FlagTy {
310 /// If the map table entry is just created
311 unsigned IsNewEntry : 1;
312 /// If the pointer is actually a host pointer (when unified memory enabled)
313 unsigned IsHostPointer : 1;
314 /// If the pointer is present in the mapping table.
315 unsigned IsPresent : 1;
316 /// Flag indicating that this was the last user of the entry and the ref
317 /// count is now 0.
318 unsigned IsLast : 1;
319 /// If the pointer is contained.
320 unsigned IsContained : 1;
321 } Flags = {0, 0, 0, 0, 0};
323 TargetPointerResultTy(const TargetPointerResultTy &) = delete;
324 TargetPointerResultTy &operator=(const TargetPointerResultTy &TPR) = delete;
325 TargetPointerResultTy() {}
327 TargetPointerResultTy(FlagTy Flags, HostDataToTargetTy *Entry,
328 void *TargetPointer)
329 : Flags(Flags), TargetPointer(TargetPointer), Entry(Entry) {
330 if (Entry)
331 Entry->lock();
334 TargetPointerResultTy(TargetPointerResultTy &&TPR)
335 : Flags(TPR.Flags), TargetPointer(TPR.TargetPointer), Entry(TPR.Entry) {
336 TPR.Entry = nullptr;
339 TargetPointerResultTy &operator=(TargetPointerResultTy &&TPR) {
340 if (&TPR != this) {
341 std::swap(Flags, TPR.Flags);
342 std::swap(Entry, TPR.Entry);
343 std::swap(TargetPointer, TPR.TargetPointer);
345 return *this;
348 ~TargetPointerResultTy() {
349 if (Entry)
350 Entry->unlock();
353 bool isPresent() const { return Flags.IsPresent; }
355 bool isHostPointer() const { return Flags.IsHostPointer; }
357 bool isContained() const { return Flags.IsContained; }
359 /// The corresponding target pointer
360 void *TargetPointer = nullptr;
362 HostDataToTargetTy *getEntry() const { return Entry; }
363 void setEntry(HostDataToTargetTy *HDTTT,
364 HostDataToTargetTy *OwnedTPR = nullptr) {
365 if (Entry)
366 Entry->unlock();
367 Entry = HDTTT;
368 if (Entry && Entry != OwnedTPR)
369 Entry->lock();
372 void reset() { *this = TargetPointerResultTy(); }
374 private:
375 /// The corresponding map table entry which is stable.
376 HostDataToTargetTy *Entry = nullptr;
379 struct LookupResult {
380 struct {
381 unsigned IsContained : 1;
382 unsigned ExtendsBefore : 1;
383 unsigned ExtendsAfter : 1;
384 } Flags;
386 LookupResult() : Flags({0, 0, 0}), TPR() {}
388 TargetPointerResultTy TPR;
391 // This structure stores information of a mapped memory region.
392 struct MapComponentInfoTy {
393 void *Base;
394 void *Begin;
395 int64_t Size;
396 int64_t Type;
397 void *Name;
398 MapComponentInfoTy() = default;
399 MapComponentInfoTy(void *Base, void *Begin, int64_t Size, int64_t Type,
400 void *Name)
401 : Base(Base), Begin(Begin), Size(Size), Type(Type), Name(Name) {}
404 // This structure stores all components of a user-defined mapper. The number of
405 // components are dynamically decided, so we utilize C++ STL vector
406 // implementation here.
407 struct MapperComponentsTy {
408 llvm::SmallVector<MapComponentInfoTy> Components;
409 int32_t size() { return Components.size(); }
412 // The mapper function pointer type. It follows the signature below:
413 // void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
414 // void *base, void *begin,
415 // size_t size, int64_t type,
416 // void * name);
417 typedef void (*MapperFuncPtrTy)(void *, void *, void *, int64_t, int64_t,
418 void *);
420 // Function pointer type for targetData* functions (targetDataBegin,
421 // targetDataEnd and targetDataUpdate).
422 typedef int (*TargetDataFuncPtrTy)(ident_t *, DeviceTy &, int32_t, void **,
423 void **, int64_t *, int64_t *,
424 map_var_info_t *, void **, AsyncInfoTy &,
425 bool);
427 void dumpTargetPointerMappings(const ident_t *Loc, DeviceTy &Device,
428 bool toStdOut = false);
430 int targetDataBegin(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
431 void **ArgsBase, void **Args, int64_t *ArgSizes,
432 int64_t *ArgTypes, map_var_info_t *ArgNames,
433 void **ArgMappers, AsyncInfoTy &AsyncInfo,
434 bool FromMapper = false);
436 int targetDataEnd(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
437 void **ArgBases, void **Args, int64_t *ArgSizes,
438 int64_t *ArgTypes, map_var_info_t *ArgNames,
439 void **ArgMappers, AsyncInfoTy &AsyncInfo,
440 bool FromMapper = false);
442 int targetDataUpdate(ident_t *Loc, DeviceTy &Device, int32_t ArgNum,
443 void **ArgsBase, void **Args, int64_t *ArgSizes,
444 int64_t *ArgTypes, map_var_info_t *ArgNames,
445 void **ArgMappers, AsyncInfoTy &AsyncInfo,
446 bool FromMapper = false);
448 struct MappingInfoTy {
449 MappingInfoTy(DeviceTy &Device) : Device(Device) {}
451 /// Host data to device map type with a wrapper key indirection that allows
452 /// concurrent modification of the entries without invalidating the underlying
453 /// entries.
454 using HostDataToTargetListTy =
455 std::set<HostDataToTargetMapKeyTy, std::less<>>;
457 /// The HDTTMap is a protected object that can only be accessed by one thread
458 /// at a time.
459 ProtectedObj<HostDataToTargetListTy> HostDataToTargetMap;
461 /// The type used to access the HDTT map.
462 using HDTTMapAccessorTy = decltype(HostDataToTargetMap)::AccessorTy;
464 /// Lookup the mapping of \p HstPtrBegin in \p HDTTMap. The accessor ensures
465 /// exclusive access to the HDTT map.
466 LookupResult lookupMapping(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
467 int64_t Size,
468 HostDataToTargetTy *OwnedTPR = nullptr);
470 /// Get the target pointer based on host pointer begin and base. If the
471 /// mapping already exists, the target pointer will be returned directly. In
472 /// addition, if required, the memory region pointed by \p HstPtrBegin of size
473 /// \p Size will also be transferred to the device. If the mapping doesn't
474 /// exist, and if unified shared memory is not enabled, a new mapping will be
475 /// created and the data will also be transferred accordingly. nullptr will be
476 /// returned because of any of following reasons:
477 /// - Data allocation failed;
478 /// - The user tried to do an illegal mapping;
479 /// - Data transfer issue fails.
480 TargetPointerResultTy getTargetPointer(
481 HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin, void *HstPtrBase,
482 int64_t TgtPadding, int64_t Size, map_var_info_t HstPtrName,
483 bool HasFlagTo, bool HasFlagAlways, bool IsImplicit, bool UpdateRefCount,
484 bool HasCloseModifier, bool HasPresentModifier, bool HasHoldModifier,
485 AsyncInfoTy &AsyncInfo, HostDataToTargetTy *OwnedTPR = nullptr,
486 bool ReleaseHDTTMap = true);
488 /// Return the target pointer for \p HstPtrBegin in \p HDTTMap. The accessor
489 /// ensures exclusive access to the HDTT map.
490 void *getTgtPtrBegin(HDTTMapAccessorTy &HDTTMap, void *HstPtrBegin,
491 int64_t Size);
493 /// Return the target pointer begin (where the data will be moved).
494 /// Used by targetDataBegin, targetDataEnd, targetDataUpdate and target.
495 /// - \p UpdateRefCount and \p UseHoldRefCount controls which and if the entry
496 /// reference counters will be decremented.
497 /// - \p MustContain enforces that the query must not extend beyond an already
498 /// mapped entry to be valid.
499 /// - \p ForceDelete deletes the entry regardless of its reference counting
500 /// (unless it is infinite).
501 /// - \p FromDataEnd tracks the number of threads referencing the entry at
502 /// targetDataEnd for delayed deletion purpose.
503 [[nodiscard]] TargetPointerResultTy
504 getTgtPtrBegin(void *HstPtrBegin, int64_t Size, bool UpdateRefCount,
505 bool UseHoldRefCount, bool MustContain = false,
506 bool ForceDelete = false, bool FromDataEnd = false);
508 /// Remove the \p Entry from the data map. Expect the entry's total reference
509 /// count to be zero and the caller thread to be the last one using it. \p
510 /// HDTTMap ensure the caller holds exclusive access and can modify the map.
511 /// Return \c OFFLOAD_SUCCESS if the map entry existed, and return \c
512 /// OFFLOAD_FAIL if not. It is the caller's responsibility to skip calling
513 /// this function if the map entry is not expected to exist because \p
514 /// HstPtrBegin uses shared memory.
515 [[nodiscard]] int eraseMapEntry(HDTTMapAccessorTy &HDTTMap,
516 HostDataToTargetTy *Entry, int64_t Size);
518 /// Deallocate the \p Entry from the device memory and delete it. Return \c
519 /// OFFLOAD_SUCCESS if the deallocation operations executed successfully, and
520 /// return \c OFFLOAD_FAIL otherwise.
521 [[nodiscard]] int deallocTgtPtrAndEntry(HostDataToTargetTy *Entry,
522 int64_t Size);
524 int associatePtr(void *HstPtrBegin, void *TgtPtrBegin, int64_t Size);
525 int disassociatePtr(void *HstPtrBegin);
527 /// Print information about the transfer from \p HstPtr to \p TgtPtr (or vice
528 /// versa if \p H2D is false). If there is an existing mapping, or if \p Entry
529 /// is set, the associated metadata will be printed as well.
530 void printCopyInfo(void *TgtPtr, void *HstPtr, int64_t Size, bool H2D,
531 HostDataToTargetTy *Entry,
532 MappingInfoTy::HDTTMapAccessorTy *HDTTMapPtr);
534 private:
535 DeviceTy &Device;
538 #endif // OMPTARGET_OPENMP_MAPPING_H