1 //===--------- device.cpp - Target independent OpenMP target RTL ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Functionality for managing devices that are handled by RTL plugins.
11 //===----------------------------------------------------------------------===//
14 #include "OmptCallback.h"
15 #include "OmptInterface.h"
16 #include "omptarget.h"
20 #include "Utilities.h"
31 using namespace llvm::omp::target::ompt
;
34 int HostDataToTargetTy::addEventIfNecessary(DeviceTy
&Device
,
35 AsyncInfoTy
&AsyncInfo
) const {
36 // First, check if the user disabled atomic map transfer/malloc/dealloc.
37 if (!PM
->UseEventsForAtomicTransfers
)
38 return OFFLOAD_SUCCESS
;
40 void *Event
= getEvent();
41 bool NeedNewEvent
= Event
== nullptr;
42 if (NeedNewEvent
&& Device
.createEvent(&Event
) != OFFLOAD_SUCCESS
) {
43 REPORT("Failed to create event\n");
47 // We cannot assume the event should not be nullptr because we don't
48 // know if the target support event. But if a target doesn't,
49 // recordEvent should always return success.
50 if (Device
.recordEvent(Event
, AsyncInfo
) != OFFLOAD_SUCCESS
) {
51 REPORT("Failed to set dependence on event " DPxMOD
"\n", DPxPTR(Event
));
58 return OFFLOAD_SUCCESS
;
61 DeviceTy::DeviceTy(RTLInfoTy
*RTL
)
62 : DeviceID(-1), RTL(RTL
), RTLDeviceID(-1), IsInit(false), InitFlag(),
63 HasPendingGlobals(false), PendingCtorsDtors(), PendingGlobalsMtx() {}
65 DeviceTy::~DeviceTy() {
66 if (DeviceID
== -1 || !(getInfoLevel() & OMP_INFOTYPE_DUMP_TABLE
))
69 ident_t Loc
= {0, 0, 0, 0, ";libomptarget;libomptarget;0;0;;"};
70 dumpTargetPointerMappings(&Loc
, *this);
73 int DeviceTy::associatePtr(void *HstPtrBegin
, void *TgtPtrBegin
, int64_t Size
) {
74 HDTTMapAccessorTy HDTTMap
= HostDataToTargetMap
.getExclusiveAccessor();
76 // Check if entry exists
77 auto It
= HDTTMap
->find(HstPtrBegin
);
78 if (It
!= HDTTMap
->end()) {
79 HostDataToTargetTy
&HDTT
= *It
->HDTT
;
80 std::lock_guard
<HostDataToTargetTy
> LG(HDTT
);
81 // Mapping already exists
82 bool IsValid
= HDTT
.HstPtrEnd
== (uintptr_t)HstPtrBegin
+ Size
&&
83 HDTT
.TgtPtrBegin
== (uintptr_t)TgtPtrBegin
;
85 DP("Attempt to re-associate the same device ptr+offset with the same "
86 "host ptr, nothing to do\n");
87 return OFFLOAD_SUCCESS
;
89 REPORT("Not allowed to re-associate a different device ptr+offset with "
90 "the same host ptr\n");
94 // Mapping does not exist, allocate it with refCount=INF
95 const HostDataToTargetTy
&NewEntry
=
97 ->emplace(new HostDataToTargetTy(
98 /*HstPtrBase=*/(uintptr_t)HstPtrBegin
,
99 /*HstPtrBegin=*/(uintptr_t)HstPtrBegin
,
100 /*HstPtrEnd=*/(uintptr_t)HstPtrBegin
+ Size
,
101 /*TgtAllocBegin=*/(uintptr_t)TgtPtrBegin
,
102 /*TgtPtrBegin=*/(uintptr_t)TgtPtrBegin
,
103 /*UseHoldRefCount=*/false, /*Name=*/nullptr,
104 /*IsRefCountINF=*/true))
106 DP("Creating new map entry: HstBase=" DPxMOD
", HstBegin=" DPxMOD
107 ", HstEnd=" DPxMOD
", TgtBegin=" DPxMOD
", DynRefCount=%s, "
109 DPxPTR(NewEntry
.HstPtrBase
), DPxPTR(NewEntry
.HstPtrBegin
),
110 DPxPTR(NewEntry
.HstPtrEnd
), DPxPTR(NewEntry
.TgtPtrBegin
),
111 NewEntry
.dynRefCountToStr().c_str(), NewEntry
.holdRefCountToStr().c_str());
114 // Notify the plugin about the new mapping.
115 return notifyDataMapped(HstPtrBegin
, Size
);
118 int DeviceTy::disassociatePtr(void *HstPtrBegin
) {
119 HDTTMapAccessorTy HDTTMap
= HostDataToTargetMap
.getExclusiveAccessor();
121 auto It
= HDTTMap
->find(HstPtrBegin
);
122 if (It
== HDTTMap
->end()) {
123 REPORT("Association not found\n");
127 HostDataToTargetTy
&HDTT
= *It
->HDTT
;
128 std::lock_guard
<HostDataToTargetTy
> LG(HDTT
);
130 if (HDTT
.getHoldRefCount()) {
131 // This is based on OpenACC 3.1, sec 3.2.33 "acc_unmap_data", L3656-3657:
132 // "It is an error to call acc_unmap_data if the structured reference
133 // count for the pointer is not zero."
134 REPORT("Trying to disassociate a pointer with a non-zero hold reference "
139 if (HDTT
.isDynRefCountInf()) {
140 DP("Association found, removing it\n");
141 void *Event
= HDTT
.getEvent();
146 return notifyDataUnmapped(HstPtrBegin
);
149 REPORT("Trying to disassociate a pointer which was not mapped via "
150 "omp_target_associate_ptr\n");
154 LookupResult
DeviceTy::lookupMapping(HDTTMapAccessorTy
&HDTTMap
,
155 void *HstPtrBegin
, int64_t Size
,
156 HostDataToTargetTy
*OwnedTPR
) {
158 uintptr_t HP
= (uintptr_t)HstPtrBegin
;
161 DP("Looking up mapping(HstPtrBegin=" DPxMOD
", Size=%" PRId64
")...\n",
164 if (HDTTMap
->empty())
167 auto Upper
= HDTTMap
->upper_bound(HP
);
170 // specification v5.1 Pointer Initialization for Device Data Environments
171 // upper_bound satisfies
172 // std::prev(upper)->HDTT.HstPtrBegin <= hp < upper->HDTT.HstPtrBegin
173 if (Upper
!= HDTTMap
->begin()) {
174 LR
.TPR
.setEntry(std::prev(Upper
)->HDTT
, OwnedTPR
);
175 // the left side of extended address range is satisified.
176 // hp >= LR.TPR.getEntry()->HstPtrBegin || hp >=
177 // LR.TPR.getEntry()->HstPtrBase
178 LR
.Flags
.IsContained
= HP
< LR
.TPR
.getEntry()->HstPtrEnd
||
179 HP
< LR
.TPR
.getEntry()->HstPtrBase
;
182 if (!LR
.Flags
.IsContained
&& Upper
!= HDTTMap
->end()) {
183 LR
.TPR
.setEntry(Upper
->HDTT
, OwnedTPR
);
184 // the right side of extended address range is satisified.
185 // hp < LR.TPR.getEntry()->HstPtrEnd || hp < LR.TPR.getEntry()->HstPtrBase
186 LR
.Flags
.IsContained
= HP
>= LR
.TPR
.getEntry()->HstPtrBase
;
189 // check the left bin
190 if (Upper
!= HDTTMap
->begin()) {
191 LR
.TPR
.setEntry(std::prev(Upper
)->HDTT
, OwnedTPR
);
193 LR
.Flags
.IsContained
= HP
>= LR
.TPR
.getEntry()->HstPtrBegin
&&
194 HP
< LR
.TPR
.getEntry()->HstPtrEnd
&&
195 (HP
+ Size
) <= LR
.TPR
.getEntry()->HstPtrEnd
;
196 // Does it extend beyond the mapped region?
197 LR
.Flags
.ExtendsAfter
= HP
< LR
.TPR
.getEntry()->HstPtrEnd
&&
198 (HP
+ Size
) > LR
.TPR
.getEntry()->HstPtrEnd
;
201 // check the right bin
202 if (!(LR
.Flags
.IsContained
|| LR
.Flags
.ExtendsAfter
) &&
203 Upper
!= HDTTMap
->end()) {
204 LR
.TPR
.setEntry(Upper
->HDTT
, OwnedTPR
);
205 // Does it extend into an already mapped region?
206 LR
.Flags
.ExtendsBefore
= HP
< LR
.TPR
.getEntry()->HstPtrBegin
&&
207 (HP
+ Size
) > LR
.TPR
.getEntry()->HstPtrBegin
;
208 // Does it extend beyond the mapped region?
209 LR
.Flags
.ExtendsAfter
= HP
< LR
.TPR
.getEntry()->HstPtrEnd
&&
210 (HP
+ Size
) > LR
.TPR
.getEntry()->HstPtrEnd
;
213 if (LR
.Flags
.ExtendsBefore
) {
214 DP("WARNING: Pointer is not mapped but section extends into already "
217 if (LR
.Flags
.ExtendsAfter
) {
218 DP("WARNING: Pointer is already mapped but section extends beyond mapped "
226 TargetPointerResultTy
DeviceTy::getTargetPointer(
227 HDTTMapAccessorTy
&HDTTMap
, void *HstPtrBegin
, void *HstPtrBase
,
228 int64_t TgtPadding
, int64_t Size
, map_var_info_t HstPtrName
, bool HasFlagTo
,
229 bool HasFlagAlways
, bool IsImplicit
, bool UpdateRefCount
,
230 bool HasCloseModifier
, bool HasPresentModifier
, bool HasHoldModifier
,
231 AsyncInfoTy
&AsyncInfo
, HostDataToTargetTy
*OwnedTPR
, bool ReleaseHDTTMap
) {
233 LookupResult LR
= lookupMapping(HDTTMap
, HstPtrBegin
, Size
, OwnedTPR
);
234 LR
.TPR
.Flags
.IsPresent
= true;
236 // Release the mapping table lock only after the entry is locked by
237 // attaching it to TPR. Once TPR is destroyed it will release the lock
238 // on entry. If it is returned the lock will move to the returned object.
239 // If LR.Entry is already owned/locked we avoid trying to lock it again.
241 // Check if the pointer is contained.
242 // If a variable is mapped to the device manually by the user - which would
243 // lead to the IsContained flag to be true - then we must ensure that the
244 // device address is returned even under unified memory conditions.
245 if (LR
.Flags
.IsContained
||
246 ((LR
.Flags
.ExtendsBefore
|| LR
.Flags
.ExtendsAfter
) && IsImplicit
)) {
247 const char *RefCountAction
;
248 if (UpdateRefCount
) {
249 // After this, reference count >= 1. If the reference count was 0 but the
250 // entry was still there we can reuse the data on the device and avoid a
252 LR
.TPR
.getEntry()->incRefCount(HasHoldModifier
);
253 RefCountAction
= " (incremented)";
255 // It might have been allocated with the parent, but it's still new.
256 LR
.TPR
.Flags
.IsNewEntry
= LR
.TPR
.getEntry()->getTotalRefCount() == 1;
257 RefCountAction
= " (update suppressed)";
259 const char *DynRefCountAction
= HasHoldModifier
? "" : RefCountAction
;
260 const char *HoldRefCountAction
= HasHoldModifier
? RefCountAction
: "";
261 uintptr_t Ptr
= LR
.TPR
.getEntry()->TgtPtrBegin
+
262 ((uintptr_t)HstPtrBegin
- LR
.TPR
.getEntry()->HstPtrBegin
);
263 INFO(OMP_INFOTYPE_MAPPING_EXISTS
, DeviceID
,
264 "Mapping exists%s with HstPtrBegin=" DPxMOD
", TgtPtrBegin=" DPxMOD
265 ", Size=%" PRId64
", DynRefCount=%s%s, HoldRefCount=%s%s, Name=%s\n",
266 (IsImplicit
? " (implicit)" : ""), DPxPTR(HstPtrBegin
), DPxPTR(Ptr
),
267 Size
, LR
.TPR
.getEntry()->dynRefCountToStr().c_str(), DynRefCountAction
,
268 LR
.TPR
.getEntry()->holdRefCountToStr().c_str(), HoldRefCountAction
,
269 (HstPtrName
) ? getNameFromMapping(HstPtrName
).c_str() : "unknown");
270 LR
.TPR
.TargetPointer
= (void *)Ptr
;
271 } else if ((LR
.Flags
.ExtendsBefore
|| LR
.Flags
.ExtendsAfter
) && !IsImplicit
) {
272 // Explicit extension of mapped data - not allowed.
273 MESSAGE("explicit extension not allowed: host address specified is " DPxMOD
275 " bytes), but device allocation maps to host at " DPxMOD
276 " (%" PRId64
" bytes)",
277 DPxPTR(HstPtrBegin
), Size
, DPxPTR(LR
.TPR
.getEntry()->HstPtrBegin
),
278 LR
.TPR
.getEntry()->HstPtrEnd
- LR
.TPR
.getEntry()->HstPtrBegin
);
279 if (HasPresentModifier
)
280 MESSAGE("device mapping required by 'present' map type modifier does not "
281 "exist for host address " DPxMOD
" (%" PRId64
" bytes)",
282 DPxPTR(HstPtrBegin
), Size
);
283 } else if (PM
->RTLs
.RequiresFlags
& OMP_REQ_UNIFIED_SHARED_MEMORY
&&
285 // If unified shared memory is active, implicitly mapped variables that are
286 // not privatized use host address. Any explicitly mapped variables also use
287 // host address where correctness is not impeded. In all other cases maps
289 // In addition to the mapping rules above, the close map modifier forces the
290 // mapping of the variable to the device.
292 DP("Return HstPtrBegin " DPxMOD
" Size=%" PRId64
" for unified shared "
294 DPxPTR((uintptr_t)HstPtrBegin
), Size
);
295 LR
.TPR
.Flags
.IsPresent
= false;
296 LR
.TPR
.Flags
.IsHostPointer
= true;
297 LR
.TPR
.TargetPointer
= HstPtrBegin
;
299 } else if (HasPresentModifier
) {
300 DP("Mapping required by 'present' map type modifier does not exist for "
301 "HstPtrBegin=" DPxMOD
", Size=%" PRId64
"\n",
302 DPxPTR(HstPtrBegin
), Size
);
303 MESSAGE("device mapping required by 'present' map type modifier does not "
304 "exist for host address " DPxMOD
" (%" PRId64
" bytes)",
305 DPxPTR(HstPtrBegin
), Size
);
307 // If it is not contained and Size > 0, we should create a new entry for it.
308 LR
.TPR
.Flags
.IsNewEntry
= true;
309 uintptr_t TgtAllocBegin
=
310 (uintptr_t)allocData(TgtPadding
+ Size
, HstPtrBegin
);
311 uintptr_t TgtPtrBegin
= TgtAllocBegin
+ TgtPadding
;
312 // Release the mapping table lock only after the entry is locked by
313 // attaching it to TPR.
314 LR
.TPR
.setEntry(HDTTMap
315 ->emplace(new HostDataToTargetTy(
316 (uintptr_t)HstPtrBase
, (uintptr_t)HstPtrBegin
,
317 (uintptr_t)HstPtrBegin
+ Size
, TgtAllocBegin
,
318 TgtPtrBegin
, HasHoldModifier
, HstPtrName
))
320 INFO(OMP_INFOTYPE_MAPPING_CHANGED
, DeviceID
,
321 "Creating new map entry with HstPtrBase=" DPxMOD
322 ", HstPtrBegin=" DPxMOD
", TgtAllocBegin=" DPxMOD
323 ", TgtPtrBegin=" DPxMOD
324 ", Size=%ld, DynRefCount=%s, HoldRefCount=%s, Name=%s\n",
325 DPxPTR(HstPtrBase
), DPxPTR(HstPtrBegin
), DPxPTR(TgtAllocBegin
),
326 DPxPTR(TgtPtrBegin
), Size
,
327 LR
.TPR
.getEntry()->dynRefCountToStr().c_str(),
328 LR
.TPR
.getEntry()->holdRefCountToStr().c_str(),
329 (HstPtrName
) ? getNameFromMapping(HstPtrName
).c_str() : "unknown");
330 LR
.TPR
.TargetPointer
= (void *)TgtPtrBegin
;
332 // Notify the plugin about the new mapping.
333 if (notifyDataMapped(HstPtrBegin
, Size
))
334 return {{false /* IsNewEntry */, false /* IsHostPointer */},
336 nullptr /* TargetPointer */};
338 // This entry is not present and we did not create a new entry for it.
339 LR
.TPR
.Flags
.IsPresent
= false;
342 // All mapping table modifications have been made. If the user requested it we
347 // If the target pointer is valid, and we need to transfer data, issue the
349 if (LR
.TPR
.TargetPointer
&& !LR
.TPR
.Flags
.IsHostPointer
&& HasFlagTo
&&
350 (LR
.TPR
.Flags
.IsNewEntry
|| HasFlagAlways
) && Size
!= 0) {
351 DP("Moving %" PRId64
" bytes (hst:" DPxMOD
") -> (tgt:" DPxMOD
")\n", Size
,
352 DPxPTR(HstPtrBegin
), DPxPTR(LR
.TPR
.TargetPointer
));
354 int Ret
= submitData(LR
.TPR
.TargetPointer
, HstPtrBegin
, Size
, AsyncInfo
,
356 if (Ret
!= OFFLOAD_SUCCESS
) {
357 REPORT("Copying data to device failed.\n");
358 // We will also return nullptr if the data movement fails because that
359 // pointer points to a corrupted memory region so it doesn't make any
360 // sense to continue to use it.
361 LR
.TPR
.TargetPointer
= nullptr;
362 } else if (LR
.TPR
.getEntry()->addEventIfNecessary(*this, AsyncInfo
) !=
364 return {{false /* IsNewEntry */, false /* IsHostPointer */},
366 nullptr /* TargetPointer */};
368 // If not a host pointer and no present modifier, we need to wait for the
369 // event if it exists.
370 // Note: Entry might be nullptr because of zero length array section.
371 if (LR
.TPR
.getEntry() && !LR
.TPR
.Flags
.IsHostPointer
&&
372 !HasPresentModifier
) {
373 void *Event
= LR
.TPR
.getEntry()->getEvent();
375 int Ret
= waitEvent(Event
, AsyncInfo
);
376 if (Ret
!= OFFLOAD_SUCCESS
) {
377 // If it fails to wait for the event, we need to return nullptr in
378 // case of any data race.
379 REPORT("Failed to wait for event " DPxMOD
".\n", DPxPTR(Event
));
380 return {{false /* IsNewEntry */, false /* IsHostPointer */},
382 nullptr /* TargetPointer */};
388 return std::move(LR
.TPR
);
391 TargetPointerResultTy
392 DeviceTy::getTgtPtrBegin(void *HstPtrBegin
, int64_t Size
, bool UpdateRefCount
,
393 bool UseHoldRefCount
, bool MustContain
,
394 bool ForceDelete
, bool FromDataEnd
) {
395 HDTTMapAccessorTy HDTTMap
= HostDataToTargetMap
.getExclusiveAccessor();
397 LookupResult LR
= lookupMapping(HDTTMap
, HstPtrBegin
, Size
);
399 LR
.TPR
.Flags
.IsPresent
= true;
401 if (LR
.Flags
.IsContained
||
402 (!MustContain
&& (LR
.Flags
.ExtendsBefore
|| LR
.Flags
.ExtendsAfter
))) {
403 LR
.TPR
.Flags
.IsLast
=
404 LR
.TPR
.getEntry()->decShouldRemove(UseHoldRefCount
, ForceDelete
);
407 LR
.TPR
.getEntry()->resetRefCount(UseHoldRefCount
);
408 assert(LR
.TPR
.Flags
.IsLast
==
409 LR
.TPR
.getEntry()->decShouldRemove(UseHoldRefCount
) &&
410 "expected correct IsLast prediction for reset");
413 // Increment the number of threads that is using the entry on a
414 // targetDataEnd, tracking the number of possible "deleters". A thread may
415 // come to own the entry deletion even if it was not the last one querying
416 // for it. Thus, we must track every query on targetDataEnds to ensure only
417 // the last thread that holds a reference to an entry actually deletes it.
419 LR
.TPR
.getEntry()->incDataEndThreadCount();
421 const char *RefCountAction
;
422 if (!UpdateRefCount
) {
423 RefCountAction
= " (update suppressed)";
424 } else if (LR
.TPR
.Flags
.IsLast
) {
425 LR
.TPR
.getEntry()->decRefCount(UseHoldRefCount
);
426 assert(LR
.TPR
.getEntry()->getTotalRefCount() == 0 &&
427 "Expected zero reference count when deletion is scheduled");
429 RefCountAction
= " (reset, delayed deletion)";
431 RefCountAction
= " (decremented, delayed deletion)";
433 LR
.TPR
.getEntry()->decRefCount(UseHoldRefCount
);
434 RefCountAction
= " (decremented)";
436 const char *DynRefCountAction
= UseHoldRefCount
? "" : RefCountAction
;
437 const char *HoldRefCountAction
= UseHoldRefCount
? RefCountAction
: "";
438 uintptr_t TP
= LR
.TPR
.getEntry()->TgtPtrBegin
+
439 ((uintptr_t)HstPtrBegin
- LR
.TPR
.getEntry()->HstPtrBegin
);
440 INFO(OMP_INFOTYPE_MAPPING_EXISTS
, DeviceID
,
441 "Mapping exists with HstPtrBegin=" DPxMOD
", TgtPtrBegin=" DPxMOD
", "
442 "Size=%" PRId64
", DynRefCount=%s%s, HoldRefCount=%s%s\n",
443 DPxPTR(HstPtrBegin
), DPxPTR(TP
), Size
,
444 LR
.TPR
.getEntry()->dynRefCountToStr().c_str(), DynRefCountAction
,
445 LR
.TPR
.getEntry()->holdRefCountToStr().c_str(), HoldRefCountAction
);
446 LR
.TPR
.TargetPointer
= (void *)TP
;
447 } else if (PM
->RTLs
.RequiresFlags
& OMP_REQ_UNIFIED_SHARED_MEMORY
) {
448 // If the value isn't found in the mapping and unified shared memory
449 // is on then it means we have stumbled upon a value which we need to
450 // use directly from the host.
451 DP("Get HstPtrBegin " DPxMOD
" Size=%" PRId64
" for unified shared "
453 DPxPTR((uintptr_t)HstPtrBegin
), Size
);
454 LR
.TPR
.Flags
.IsPresent
= false;
455 LR
.TPR
.Flags
.IsHostPointer
= true;
456 LR
.TPR
.TargetPointer
= HstPtrBegin
;
458 // OpenMP Specification v5.2: if a matching list item is not found, the
459 // pointer retains its original value as per firstprivate semantics.
460 LR
.TPR
.Flags
.IsPresent
= false;
461 LR
.TPR
.Flags
.IsHostPointer
= false;
462 LR
.TPR
.TargetPointer
= HstPtrBegin
;
465 return std::move(LR
.TPR
);
468 // Return the target pointer begin (where the data will be moved).
469 void *DeviceTy::getTgtPtrBegin(HDTTMapAccessorTy
&HDTTMap
, void *HstPtrBegin
,
471 uintptr_t HP
= (uintptr_t)HstPtrBegin
;
472 LookupResult LR
= lookupMapping(HDTTMap
, HstPtrBegin
, Size
);
473 if (LR
.Flags
.IsContained
|| LR
.Flags
.ExtendsBefore
|| LR
.Flags
.ExtendsAfter
) {
475 LR
.TPR
.getEntry()->TgtPtrBegin
+ (HP
- LR
.TPR
.getEntry()->HstPtrBegin
);
482 int DeviceTy::eraseMapEntry(HDTTMapAccessorTy
&HDTTMap
,
483 HostDataToTargetTy
*Entry
, int64_t Size
) {
484 assert(Entry
&& "Trying to delete a null entry from the HDTT map.");
485 assert(Entry
->getTotalRefCount() == 0 &&
486 Entry
->getDataEndThreadCount() == 0 &&
487 "Trying to delete entry that is in use or owned by another thread.");
489 INFO(OMP_INFOTYPE_MAPPING_CHANGED
, DeviceID
,
490 "Removing map entry with HstPtrBegin=" DPxMOD
", TgtPtrBegin=" DPxMOD
491 ", Size=%" PRId64
", Name=%s\n",
492 DPxPTR(Entry
->HstPtrBegin
), DPxPTR(Entry
->TgtPtrBegin
), Size
,
493 (Entry
->HstPtrName
) ? getNameFromMapping(Entry
->HstPtrName
).c_str()
496 if (HDTTMap
->erase(Entry
) == 0) {
497 REPORT("Trying to remove a non-existent map entry\n");
501 return OFFLOAD_SUCCESS
;
504 int DeviceTy::deallocTgtPtrAndEntry(HostDataToTargetTy
*Entry
, int64_t Size
) {
505 assert(Entry
&& "Trying to deallocate a null entry.");
507 DP("Deleting tgt data " DPxMOD
" of size %" PRId64
" by freeing allocation "
508 "starting at " DPxMOD
"\n",
509 DPxPTR(Entry
->TgtPtrBegin
), Size
, DPxPTR(Entry
->TgtAllocBegin
));
511 void *Event
= Entry
->getEvent();
512 if (Event
&& destroyEvent(Event
) != OFFLOAD_SUCCESS
) {
513 REPORT("Failed to destroy event " DPxMOD
"\n", DPxPTR(Event
));
517 int Ret
= deleteData((void *)Entry
->TgtAllocBegin
);
519 // Notify the plugin about the unmapped memory.
520 Ret
|= notifyDataUnmapped((void *)Entry
->HstPtrBegin
);
527 /// Init device, should not be called directly.
528 void DeviceTy::init() {
529 // Make call to init_requires if it exists for this plugin.
530 if (RTL
->init_requires
)
531 RTL
->init_requires(PM
->RTLs
.RequiresFlags
);
532 int32_t Ret
= RTL
->init_device(RTLDeviceID
);
533 if (Ret
!= OFFLOAD_SUCCESS
)
536 // Enables recording kernels if set.
537 llvm::omp::target::BoolEnvar
OMPX_RecordKernel("LIBOMPTARGET_RECORD", false);
538 if (OMPX_RecordKernel
) {
539 // Enables saving the device memory kernel output post execution if set.
540 llvm::omp::target::BoolEnvar
OMPX_ReplaySaveOutput(
541 "LIBOMPTARGET_RR_SAVE_OUTPUT", false);
542 // Sets the maximum to pre-allocate device memory.
543 llvm::omp::target::UInt64Envar
OMPX_DeviceMemorySize(
544 "LIBOMPTARGET_RR_DEVMEM_SIZE", 16);
545 DP("Activating Record-Replay for Device %d with %lu GB memory\n",
546 RTLDeviceID
, OMPX_DeviceMemorySize
.get());
548 RTL
->activate_record_replay(RTLDeviceID
,
549 OMPX_DeviceMemorySize
* 1024 * 1024 * 1024,
550 nullptr, true, OMPX_ReplaySaveOutput
);
556 /// Thread-safe method to initialize the device only once.
557 int32_t DeviceTy::initOnce() {
558 std::call_once(InitFlag
, &DeviceTy::init
, this);
560 // At this point, if IsInit is true, then either this thread or some other
561 // thread in the past successfully initialized the device, so we can return
562 // OFFLOAD_SUCCESS. If this thread executed init() via call_once() and it
563 // failed, return OFFLOAD_FAIL. If call_once did not invoke init(), it means
564 // that some other thread already attempted to execute init() and if IsInit
565 // is still false, return OFFLOAD_FAIL.
567 return OFFLOAD_SUCCESS
;
571 void DeviceTy::deinit() {
572 if (RTL
->deinit_device
)
573 RTL
->deinit_device(RTLDeviceID
);
576 // Load binary to device.
577 __tgt_target_table
*DeviceTy::loadBinary(void *Img
) {
578 std::lock_guard
<decltype(RTL
->Mtx
)> LG(RTL
->Mtx
);
579 return RTL
->load_binary(RTLDeviceID
, Img
);
582 void *DeviceTy::allocData(int64_t Size
, void *HstPtr
, int32_t Kind
) {
583 /// RAII to establish tool anchors before and after data allocation
584 void *TargetPtr
= nullptr;
585 OMPT_IF_BUILT(InterfaceRAII
TargetDataAllocRAII(
586 RegionInterface
.getCallbacks
<ompt_target_data_alloc
>(),
587 DeviceID
, HstPtr
, &TargetPtr
, Size
,
588 /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
590 TargetPtr
= RTL
->data_alloc(RTLDeviceID
, Size
, HstPtr
, Kind
);
594 int32_t DeviceTy::deleteData(void *TgtAllocBegin
, int32_t Kind
) {
595 /// RAII to establish tool anchors before and after data deletion
596 OMPT_IF_BUILT(InterfaceRAII
TargetDataDeleteRAII(
597 RegionInterface
.getCallbacks
<ompt_target_data_delete
>(),
598 DeviceID
, TgtAllocBegin
,
599 /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
601 return RTL
->data_delete(RTLDeviceID
, TgtAllocBegin
, Kind
);
604 static void printCopyInfo(int DeviceId
, bool H2D
, void *SrcPtrBegin
,
605 void *DstPtrBegin
, int64_t Size
,
606 HostDataToTargetTy
*HT
) {
608 INFO(OMP_INFOTYPE_DATA_TRANSFER
, DeviceId
,
609 "Copying data from %s to %s, %sPtr=" DPxMOD
", %sPtr=" DPxMOD
610 ", Size=%" PRId64
", Name=%s\n",
611 H2D
? "host" : "device", H2D
? "device" : "host", H2D
? "Hst" : "Tgt",
612 DPxPTR(SrcPtrBegin
), H2D
? "Tgt" : "Hst", DPxPTR(DstPtrBegin
), Size
,
613 (HT
&& HT
->HstPtrName
) ? getNameFromMapping(HT
->HstPtrName
).c_str()
617 // Submit data to device
618 int32_t DeviceTy::submitData(void *TgtPtrBegin
, void *HstPtrBegin
, int64_t Size
,
619 AsyncInfoTy
&AsyncInfo
,
620 HostDataToTargetTy
*Entry
) {
621 if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER
) {
622 HDTTMapAccessorTy HDTTMap
= HostDataToTargetMap
.getExclusiveAccessor(Entry
);
625 LR
= lookupMapping(HDTTMap
, HstPtrBegin
, Size
);
626 Entry
= LR
.TPR
.getEntry();
628 printCopyInfo(DeviceID
, /* H2D */ true, HstPtrBegin
, TgtPtrBegin
, Size
,
632 /// RAII to establish tool anchors before and after data submit
634 InterfaceRAII
TargetDataSubmitRAII(
635 RegionInterface
.getCallbacks
<ompt_target_data_transfer_to_device
>(),
636 DeviceID
, TgtPtrBegin
, HstPtrBegin
, Size
,
637 /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
639 if (!AsyncInfo
|| !RTL
->data_submit_async
|| !RTL
->synchronize
)
640 return RTL
->data_submit(RTLDeviceID
, TgtPtrBegin
, HstPtrBegin
, Size
);
641 return RTL
->data_submit_async(RTLDeviceID
, TgtPtrBegin
, HstPtrBegin
, Size
,
645 // Retrieve data from device
646 int32_t DeviceTy::retrieveData(void *HstPtrBegin
, void *TgtPtrBegin
,
647 int64_t Size
, AsyncInfoTy
&AsyncInfo
,
648 HostDataToTargetTy
*Entry
) {
649 if (getInfoLevel() & OMP_INFOTYPE_DATA_TRANSFER
) {
650 HDTTMapAccessorTy HDTTMap
= HostDataToTargetMap
.getExclusiveAccessor(Entry
);
653 LR
= lookupMapping(HDTTMap
, HstPtrBegin
, Size
);
654 Entry
= LR
.TPR
.getEntry();
656 printCopyInfo(DeviceID
, /* H2D */ false, TgtPtrBegin
, HstPtrBegin
, Size
,
660 /// RAII to establish tool anchors before and after data retrieval
662 InterfaceRAII
TargetDataRetrieveRAII(
663 RegionInterface
.getCallbacks
<ompt_target_data_transfer_from_device
>(),
664 DeviceID
, HstPtrBegin
, TgtPtrBegin
, Size
,
665 /* CodePtr */ OMPT_GET_RETURN_ADDRESS(0));)
667 if (!RTL
->data_retrieve_async
|| !RTL
->synchronize
)
668 return RTL
->data_retrieve(RTLDeviceID
, HstPtrBegin
, TgtPtrBegin
, Size
);
669 return RTL
->data_retrieve_async(RTLDeviceID
, HstPtrBegin
, TgtPtrBegin
, Size
,
673 // Copy data from current device to destination device directly
674 int32_t DeviceTy::dataExchange(void *SrcPtr
, DeviceTy
&DstDev
, void *DstPtr
,
675 int64_t Size
, AsyncInfoTy
&AsyncInfo
) {
676 if (!AsyncInfo
|| !RTL
->data_exchange_async
|| !RTL
->synchronize
) {
677 assert(RTL
->data_exchange
&& "RTL->data_exchange is nullptr");
678 return RTL
->data_exchange(RTLDeviceID
, SrcPtr
, DstDev
.RTLDeviceID
, DstPtr
,
681 return RTL
->data_exchange_async(RTLDeviceID
, SrcPtr
, DstDev
.RTLDeviceID
,
682 DstPtr
, Size
, AsyncInfo
);
685 int32_t DeviceTy::notifyDataMapped(void *HstPtr
, int64_t Size
) {
686 if (!RTL
->data_notify_mapped
)
687 return OFFLOAD_SUCCESS
;
689 DP("Notifying about new mapping: HstPtr=" DPxMOD
", Size=%" PRId64
"\n",
690 DPxPTR(HstPtr
), Size
);
692 if (RTL
->data_notify_mapped(RTLDeviceID
, HstPtr
, Size
)) {
693 REPORT("Notifiying about data mapping failed.\n");
696 return OFFLOAD_SUCCESS
;
699 int32_t DeviceTy::notifyDataUnmapped(void *HstPtr
) {
700 if (!RTL
->data_notify_unmapped
)
701 return OFFLOAD_SUCCESS
;
703 DP("Notifying about an unmapping: HstPtr=" DPxMOD
"\n", DPxPTR(HstPtr
));
705 if (RTL
->data_notify_unmapped(RTLDeviceID
, HstPtr
)) {
706 REPORT("Notifiying about data unmapping failed.\n");
709 return OFFLOAD_SUCCESS
;
712 // Run region on device
713 int32_t DeviceTy::launchKernel(void *TgtEntryPtr
, void **TgtVarsPtr
,
714 ptrdiff_t *TgtOffsets
,
715 const KernelArgsTy
&KernelArgs
,
716 AsyncInfoTy
&AsyncInfo
) {
717 return RTL
->launch_kernel(RTLDeviceID
, TgtEntryPtr
, TgtVarsPtr
, TgtOffsets
,
718 &KernelArgs
, AsyncInfo
);
721 // Run region on device
722 bool DeviceTy::printDeviceInfo(int32_t RTLDevId
) {
723 if (!RTL
->print_device_info
)
725 RTL
->print_device_info(RTLDevId
);
729 // Whether data can be copied to DstDevice directly
730 bool DeviceTy::isDataExchangable(const DeviceTy
&DstDevice
) {
731 if (RTL
!= DstDevice
.RTL
|| !RTL
->is_data_exchangable
)
734 if (RTL
->is_data_exchangable(RTLDeviceID
, DstDevice
.RTLDeviceID
))
735 return (RTL
->data_exchange
!= nullptr) ||
736 (RTL
->data_exchange_async
!= nullptr);
741 int32_t DeviceTy::synchronize(AsyncInfoTy
&AsyncInfo
) {
742 if (RTL
->synchronize
)
743 return RTL
->synchronize(RTLDeviceID
, AsyncInfo
);
744 return OFFLOAD_SUCCESS
;
747 int32_t DeviceTy::queryAsync(AsyncInfoTy
&AsyncInfo
) {
748 if (RTL
->query_async
)
749 return RTL
->query_async(RTLDeviceID
, AsyncInfo
);
751 return synchronize(AsyncInfo
);
754 int32_t DeviceTy::createEvent(void **Event
) {
755 if (RTL
->create_event
)
756 return RTL
->create_event(RTLDeviceID
, Event
);
758 return OFFLOAD_SUCCESS
;
761 int32_t DeviceTy::recordEvent(void *Event
, AsyncInfoTy
&AsyncInfo
) {
762 if (RTL
->record_event
)
763 return RTL
->record_event(RTLDeviceID
, Event
, AsyncInfo
);
765 return OFFLOAD_SUCCESS
;
768 int32_t DeviceTy::waitEvent(void *Event
, AsyncInfoTy
&AsyncInfo
) {
770 return RTL
->wait_event(RTLDeviceID
, Event
, AsyncInfo
);
772 return OFFLOAD_SUCCESS
;
775 int32_t DeviceTy::syncEvent(void *Event
) {
777 return RTL
->sync_event(RTLDeviceID
, Event
);
779 return OFFLOAD_SUCCESS
;
782 int32_t DeviceTy::destroyEvent(void *Event
) {
783 if (RTL
->create_event
)
784 return RTL
->destroy_event(RTLDeviceID
, Event
);
786 return OFFLOAD_SUCCESS
;
789 /// Check whether a device has an associated RTL and initialize it if it's not
790 /// already initialized.
791 bool deviceIsReady(int DeviceNum
) {
792 DP("Checking whether device %d is ready.\n", DeviceNum
);
793 // Devices.size() can only change while registering a new
794 // library, so try to acquire the lock of RTLs' mutex.
797 std::lock_guard
<decltype(PM
->RTLsMtx
)> LG(PM
->RTLsMtx
);
798 DevicesSize
= PM
->Devices
.size();
800 if (DevicesSize
<= (size_t)DeviceNum
) {
801 DP("Device ID %d does not have a matching RTL\n", DeviceNum
);
806 DeviceTy
&Device
= *PM
->Devices
[DeviceNum
];
808 DP("Is the device %d (local ID %d) initialized? %d\n", DeviceNum
,
809 Device
.RTLDeviceID
, Device
.IsInit
);
811 // Init the device if not done before
812 if (!Device
.IsInit
&& Device
.initOnce() != OFFLOAD_SUCCESS
) {
813 DP("Failed to init device %d\n", DeviceNum
);
817 DP("Device %d is ready to use.\n", DeviceNum
);