1 //===- PluginInterface.h - Target independent plugin device interface -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
11 #ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_PLUGININTERFACE_H
12 #define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_PLUGININTERFACE_H
19 #include <shared_mutex>
22 #include "Shared/Debug.h"
23 #include "Shared/Environment.h"
24 #include "Shared/EnvironmentVar.h"
25 #include "Shared/Requirements.h"
26 #include "Shared/Utils.h"
28 #include "GlobalHandler.h"
30 #include "MemoryManager.h"
32 #include "omptarget.h"
35 #include "omp-tools.h"
38 #include "llvm/ADT/SmallVector.h"
39 #include "llvm/Frontend/OpenMP/OMPConstants.h"
40 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
41 #include "llvm/Support/Allocator.h"
42 #include "llvm/Support/Error.h"
43 #include "llvm/Support/ErrorHandling.h"
44 #include "llvm/Support/MemoryBufferRef.h"
45 #include "llvm/Support/raw_ostream.h"
46 #include "llvm/TargetParser/Triple.h"
54 struct GenericPluginTy
;
55 struct GenericKernelTy
;
56 struct GenericDeviceTy
;
58 /// Class that wraps the __tgt_async_info to simply its usage. In case the
59 /// object is constructed without a valid __tgt_async_info, the object will use
60 /// an internal one and will synchronize the current thread with the pending
61 /// operations when calling AsyncInfoWrapperTy::finalize(). This latter function
62 /// must be called before destroying the wrapper object.
63 struct AsyncInfoWrapperTy
{
64 AsyncInfoWrapperTy(GenericDeviceTy
&Device
, __tgt_async_info
*AsyncInfoPtr
);
66 ~AsyncInfoWrapperTy() {
67 assert(!AsyncInfoPtr
&& "AsyncInfoWrapperTy not finalized");
70 /// Get the raw __tgt_async_info pointer.
71 operator __tgt_async_info
*() const { return AsyncInfoPtr
; }
73 /// Indicate whether there is queue.
74 bool hasQueue() const { return (AsyncInfoPtr
->Queue
!= nullptr); }
77 template <typename Ty
> Ty
getQueueAs() {
78 static_assert(sizeof(Ty
) == sizeof(AsyncInfoPtr
->Queue
),
79 "Queue is not of the same size as target type");
80 return static_cast<Ty
>(AsyncInfoPtr
->Queue
);
84 template <typename Ty
> void setQueueAs(Ty Queue
) {
85 static_assert(sizeof(Ty
) == sizeof(AsyncInfoPtr
->Queue
),
86 "Queue is not of the same size as target type");
87 assert(!AsyncInfoPtr
->Queue
&& "Overwriting queue");
88 AsyncInfoPtr
->Queue
= Queue
;
91 /// Synchronize with the __tgt_async_info's pending operations if it's the
92 /// internal async info. The error associated to the aysnchronous operations
93 /// issued in this queue must be provided in \p Err. This function will update
94 /// the error parameter with the result of the synchronization if it was
95 /// actually executed. This function must be called before destroying the
96 /// object and only once.
97 void finalize(Error
&Err
);
99 /// Register \p Ptr as an associated alloction that is freed after
101 void freeAllocationAfterSynchronization(void *Ptr
) {
102 AsyncInfoPtr
->AssociatedAllocations
.push_back(Ptr
);
106 GenericDeviceTy
&Device
;
107 __tgt_async_info LocalAsyncInfo
;
108 __tgt_async_info
*AsyncInfoPtr
;
111 /// The information level represents the level of a key-value property in the
112 /// info tree print (i.e. indentation). The first level should be the default.
113 enum InfoLevelKind
{ InfoLevel1
= 1, InfoLevel2
, InfoLevel3
};
115 /// Class for storing device information and later be printed. An object of this
116 /// type acts as a queue of key-value properties. Each property has a key, a
117 /// a value, and an optional unit for the value. For printing purposes, the
118 /// information can be classified into several levels. These levels are useful
119 /// for defining sections and subsections. Thus, each key-value property also
120 /// has an additional field indicating to which level belongs to. Notice that
121 /// we use the level to determine the indentation of the key-value property at
122 /// printing time. See the enum InfoLevelKind for the list of accepted levels.
124 struct InfoQueueEntryTy
{
131 std::deque
<InfoQueueEntryTy
> Queue
;
134 /// Add a new info entry to the queue. The entry requires at least a key
135 /// string in \p Key. The value in \p Value is optional and can be any type
136 /// that is representable as a string. The units in \p Units is optional and
137 /// must be a string. The info level is a template parameter that defaults to
138 /// the first level (top level).
139 template <InfoLevelKind L
= InfoLevel1
, typename T
= std::string
>
140 void add(const std::string
&Key
, T Value
= T(),
141 const std::string
&Units
= std::string()) {
142 assert(!Key
.empty() && "Invalid info key");
144 // Convert the value to a string depending on its type.
145 if constexpr (std::is_same_v
<T
, bool>)
146 Queue
.push_back({Key
, Value
? "Yes" : "No", Units
, L
});
147 else if constexpr (std::is_arithmetic_v
<T
>)
148 Queue
.push_back({Key
, std::to_string(Value
), Units
, L
});
150 Queue
.push_back({Key
, Value
, Units
, L
});
153 /// Print all info entries added to the queue.
155 // We print four spances for each level.
156 constexpr uint64_t IndentSize
= 4;
158 // Find the maximum key length (level + key) to compute the individual
159 // indentation of each entry.
160 uint64_t MaxKeySize
= 0;
161 for (const auto &Entry
: Queue
) {
162 uint64_t KeySize
= Entry
.Key
.size() + Entry
.Level
* IndentSize
;
163 if (KeySize
> MaxKeySize
)
164 MaxKeySize
= KeySize
;
167 // Print all info entries.
168 for (const auto &Entry
: Queue
) {
169 // Compute the indentations for the current entry.
170 uint64_t KeyIndentSize
= Entry
.Level
* IndentSize
;
171 uint64_t ValIndentSize
=
172 MaxKeySize
- (Entry
.Key
.size() + KeyIndentSize
) + IndentSize
;
174 llvm::outs() << std::string(KeyIndentSize
, ' ') << Entry
.Key
175 << std::string(ValIndentSize
, ' ') << Entry
.Value
176 << (Entry
.Units
.empty() ? "" : " ") << Entry
.Units
<< "\n";
181 /// Class wrapping a __tgt_device_image and its offload entry table on a
182 /// specific device. This class is responsible for storing and managing
183 /// the offload entries for an image on a device.
184 class DeviceImageTy
{
185 /// Image identifier within the corresponding device. Notice that this id is
186 /// not unique between different device; they may overlap.
189 /// The pointer to the raw __tgt_device_image.
190 const __tgt_device_image
*TgtImage
;
191 const __tgt_device_image
*TgtImageBitcode
;
193 /// Reference to the device this image is loaded on.
194 GenericDeviceTy
&Device
;
196 /// If this image has any global destructors that much be called.
197 /// FIXME: This is only required because we currently have no invariants
198 /// towards the lifetime of the underlying image. We should either copy
199 /// the image into memory locally or erase the pointers after init.
200 bool PendingGlobalDtors
;
203 DeviceImageTy(int32_t Id
, GenericDeviceTy
&Device
,
204 const __tgt_device_image
*Image
)
205 : ImageId(Id
), TgtImage(Image
), TgtImageBitcode(nullptr), Device(Device
),
206 PendingGlobalDtors(false) {
207 assert(TgtImage
&& "Invalid target image");
210 /// Get the image identifier within the device.
211 int32_t getId() const { return ImageId
; }
213 /// Get the device that this image is loaded onto.
214 GenericDeviceTy
&getDevice() const { return Device
; }
216 /// Get the pointer to the raw __tgt_device_image.
217 const __tgt_device_image
*getTgtImage() const { return TgtImage
; }
219 void setTgtImageBitcode(const __tgt_device_image
*TgtImageBitcode
) {
220 this->TgtImageBitcode
= TgtImageBitcode
;
223 const __tgt_device_image
*getTgtImageBitcode() const {
224 return TgtImageBitcode
;
227 /// Get the image starting address.
228 void *getStart() const { return TgtImage
->ImageStart
; }
230 /// Get the image size.
231 size_t getSize() const {
232 return getPtrDiff(TgtImage
->ImageEnd
, TgtImage
->ImageStart
);
235 /// Get a memory buffer reference to the whole image.
236 MemoryBufferRef
getMemoryBuffer() const {
237 return MemoryBufferRef(StringRef((const char *)getStart(), getSize()),
240 /// Accessors to the boolean value
241 bool setPendingGlobalDtors() { return PendingGlobalDtors
= true; }
242 bool hasPendingGlobalDtors() const { return PendingGlobalDtors
; }
245 /// Class implementing common functionalities of offload kernels. Each plugin
246 /// should define the specific kernel class, derive from this generic one, and
247 /// implement the necessary virtual function members.
248 struct GenericKernelTy
{
249 /// Construct a kernel with a name and a execution mode.
250 GenericKernelTy(const char *Name
)
251 : Name(Name
), PreferredNumThreads(0), MaxNumThreads(0) {}
253 virtual ~GenericKernelTy() {}
255 /// Initialize the kernel object from a specific device.
256 Error
init(GenericDeviceTy
&GenericDevice
, DeviceImageTy
&Image
);
257 virtual Error
initImpl(GenericDeviceTy
&GenericDevice
,
258 DeviceImageTy
&Image
) = 0;
260 /// Launch the kernel on the specific device. The device must be the same
261 /// one used to initialize the kernel.
262 Error
launch(GenericDeviceTy
&GenericDevice
, void **ArgPtrs
,
263 ptrdiff_t *ArgOffsets
, KernelArgsTy
&KernelArgs
,
264 AsyncInfoWrapperTy
&AsyncInfoWrapper
) const;
265 virtual Error
launchImpl(GenericDeviceTy
&GenericDevice
, uint32_t NumThreads
,
266 uint64_t NumBlocks
, KernelArgsTy
&KernelArgs
,
268 AsyncInfoWrapperTy
&AsyncInfoWrapper
) const = 0;
270 /// Get the kernel name.
271 const char *getName() const { return Name
; }
273 /// Return true if this kernel is a constructor or destructor.
274 bool isCtorOrDtor() const {
275 // TODO: This is not a great solution and should be revisited.
276 return StringRef(Name
).ends_with("tor");
279 /// Get the kernel image.
280 DeviceImageTy
&getImage() const {
281 assert(ImagePtr
&& "Kernel is not initialized!");
285 /// Return the kernel environment object for kernel \p Name.
286 const KernelEnvironmentTy
&getKernelEnvironmentForKernel() {
287 return KernelEnvironment
;
290 /// Return a device pointer to a new kernel launch environment.
291 Expected
<KernelLaunchEnvironmentTy
*>
292 getKernelLaunchEnvironment(GenericDeviceTy
&GenericDevice
,
293 AsyncInfoWrapperTy
&AsyncInfo
) const;
295 /// Indicate whether an execution mode is valid.
296 static bool isValidExecutionMode(OMPTgtExecModeFlags ExecutionMode
) {
297 switch (ExecutionMode
) {
298 case OMP_TGT_EXEC_MODE_SPMD
:
299 case OMP_TGT_EXEC_MODE_GENERIC
:
300 case OMP_TGT_EXEC_MODE_GENERIC_SPMD
:
307 /// Get the execution mode name of the kernel.
308 const char *getExecutionModeName() const {
309 switch (KernelEnvironment
.Configuration
.ExecMode
) {
310 case OMP_TGT_EXEC_MODE_SPMD
:
312 case OMP_TGT_EXEC_MODE_GENERIC
:
314 case OMP_TGT_EXEC_MODE_GENERIC_SPMD
:
315 return "Generic-SPMD";
317 llvm_unreachable("Unknown execution mode!");
320 /// Prints generic kernel launch information.
321 Error
printLaunchInfo(GenericDeviceTy
&GenericDevice
,
322 KernelArgsTy
&KernelArgs
, uint32_t NumThreads
,
323 uint64_t NumBlocks
) const;
325 /// Prints plugin-specific kernel launch information after generic kernel
326 /// launch information
327 virtual Error
printLaunchInfoDetails(GenericDeviceTy
&GenericDevice
,
328 KernelArgsTy
&KernelArgs
,
330 uint64_t NumBlocks
) const;
333 /// Prepare the arguments before launching the kernel.
334 void *prepareArgs(GenericDeviceTy
&GenericDevice
, void **ArgPtrs
,
335 ptrdiff_t *ArgOffsets
, uint32_t &NumArgs
,
336 llvm::SmallVectorImpl
<void *> &Args
,
337 llvm::SmallVectorImpl
<void *> &Ptrs
,
338 KernelLaunchEnvironmentTy
*KernelLaunchEnvironment
) const;
340 /// Get the number of threads and blocks for the kernel based on the
341 /// user-defined threads and block clauses.
342 uint32_t getNumThreads(GenericDeviceTy
&GenericDevice
,
343 uint32_t ThreadLimitClause
[3]) const;
345 /// The number of threads \p NumThreads can be adjusted by this method.
346 /// \p IsNumThreadsFromUser is true is \p NumThreads is defined by user via
347 /// thread_limit clause.
348 uint64_t getNumBlocks(GenericDeviceTy
&GenericDevice
,
349 uint32_t BlockLimitClause
[3], uint64_t LoopTripCount
,
350 uint32_t &NumThreads
, bool IsNumThreadsFromUser
) const;
352 /// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode.
353 bool isGenericSPMDMode() const {
354 return KernelEnvironment
.Configuration
.ExecMode
==
355 OMP_TGT_EXEC_MODE_GENERIC_SPMD
;
357 bool isGenericMode() const {
358 return KernelEnvironment
.Configuration
.ExecMode
==
359 OMP_TGT_EXEC_MODE_GENERIC
;
361 bool isSPMDMode() const {
362 return KernelEnvironment
.Configuration
.ExecMode
== OMP_TGT_EXEC_MODE_SPMD
;
368 /// The image that contains this kernel.
369 DeviceImageTy
*ImagePtr
= nullptr;
372 /// The preferred number of threads to run the kernel.
373 uint32_t PreferredNumThreads
;
375 /// The maximum number of threads which the kernel could leverage.
376 uint32_t MaxNumThreads
;
378 /// The kernel environment, including execution flags.
379 KernelEnvironmentTy KernelEnvironment
;
381 /// The prototype kernel launch environment.
382 KernelLaunchEnvironmentTy KernelLaunchEnvironment
;
384 /// If the kernel is a bare kernel.
385 bool IsBareKernel
= false;
388 /// Class representing a map of host pinned allocations. We track these pinned
389 /// allocations, so memory tranfers invloving these buffers can be optimized.
390 class PinnedAllocationMapTy
{
392 /// Struct representing a map entry.
394 /// The host pointer of the pinned allocation.
397 /// The pointer that devices' driver should use to transfer data from/to the
398 /// pinned allocation. In most plugins, this pointer will be the same as the
399 /// host pointer above.
400 void *DevAccessiblePtr
;
402 /// The size of the pinned allocation.
405 /// Indicate whether the allocation was locked from outside the plugin, for
406 /// instance, from the application. The externally locked allocations are
407 /// not unlocked by the plugin when unregistering the last user.
408 bool ExternallyLocked
;
410 /// The number of references to the pinned allocation. The allocation should
411 /// remain pinned and registered to the map until the number of references
413 mutable size_t References
;
415 /// Create an entry with the host and device acessible pointers, the buffer
416 /// size, and a boolean indicating whether the buffer was locked externally.
417 EntryTy(void *HstPtr
, void *DevAccessiblePtr
, size_t Size
,
418 bool ExternallyLocked
)
419 : HstPtr(HstPtr
), DevAccessiblePtr(DevAccessiblePtr
), Size(Size
),
420 ExternallyLocked(ExternallyLocked
), References(1) {}
422 /// Utility constructor used for std::set searches.
423 EntryTy(void *HstPtr
)
424 : HstPtr(HstPtr
), DevAccessiblePtr(nullptr), Size(0),
425 ExternallyLocked(false), References(0) {}
428 /// Comparator of mep entries. Use the host pointer to enforce an order
431 bool operator()(const EntryTy
&Left
, const EntryTy
&Right
) const {
432 return Left
.HstPtr
< Right
.HstPtr
;
436 typedef std::set
<EntryTy
, EntryCmpTy
> PinnedAllocSetTy
;
438 /// The map of host pinned allocations.
439 PinnedAllocSetTy Allocs
;
441 /// The mutex to protect accesses to the map.
442 mutable std::shared_mutex Mutex
;
444 /// Reference to the corresponding device.
445 GenericDeviceTy
&Device
;
447 /// Indicate whether mapped host buffers should be locked automatically.
448 bool LockMappedBuffers
;
450 /// Indicate whether failures when locking mapped buffers should be ingored.
451 bool IgnoreLockMappedFailures
;
453 /// Find an allocation that intersects with \p HstPtr pointer. Assume the
454 /// map's mutex is acquired.
455 const EntryTy
*findIntersecting(const void *HstPtr
) const {
459 // Search the first allocation with starting address that is not less than
460 // the buffer address.
461 auto It
= Allocs
.lower_bound({const_cast<void *>(HstPtr
)});
463 // Direct match of starting addresses.
464 if (It
!= Allocs
.end() && It
->HstPtr
== HstPtr
)
467 // Not direct match but may be a previous pinned allocation in the map which
468 // contains the buffer. Return false if there is no such a previous
470 if (It
== Allocs
.begin())
473 // Move to the previous pinned allocation.
476 // The buffer is not contained in the pinned allocation.
477 if (advanceVoidPtr(It
->HstPtr
, It
->Size
) > HstPtr
)
484 /// Insert an entry to the map representing a locked buffer. The number of
485 /// references is set to one.
486 Error
insertEntry(void *HstPtr
, void *DevAccessiblePtr
, size_t Size
,
487 bool ExternallyLocked
= false);
489 /// Erase an existing entry from the map.
490 Error
eraseEntry(const EntryTy
&Entry
);
492 /// Register a new user into an entry that represents a locked buffer. Check
493 /// also that the registered buffer with \p HstPtr address and \p Size is
494 /// actually contained into the entry.
495 Error
registerEntryUse(const EntryTy
&Entry
, void *HstPtr
, size_t Size
);
497 /// Unregister a user from the entry and return whether it is the last user.
498 /// If it is the last user, the entry will have to be removed from the map
499 /// and unlock the entry's host buffer (if necessary).
500 Expected
<bool> unregisterEntryUse(const EntryTy
&Entry
);
502 /// Indicate whether the first range A fully contains the second range B.
503 static bool contains(void *PtrA
, size_t SizeA
, void *PtrB
, size_t SizeB
) {
504 void *EndA
= advanceVoidPtr(PtrA
, SizeA
);
505 void *EndB
= advanceVoidPtr(PtrB
, SizeB
);
506 return (PtrB
>= PtrA
&& EndB
<= EndA
);
509 /// Indicate whether the first range A intersects with the second range B.
510 static bool intersects(void *PtrA
, size_t SizeA
, void *PtrB
, size_t SizeB
) {
511 void *EndA
= advanceVoidPtr(PtrA
, SizeA
);
512 void *EndB
= advanceVoidPtr(PtrB
, SizeB
);
513 return (PtrA
< EndB
&& PtrB
< EndA
);
517 /// Create the map of pinned allocations corresponding to a specific device.
518 PinnedAllocationMapTy(GenericDeviceTy
&Device
) : Device(Device
) {
520 // Envar that indicates whether mapped host buffers should be locked
521 // automatically. The possible values are boolean (on/off) and a special:
522 // off: Mapped host buffers are not locked.
523 // on: Mapped host buffers are locked in a best-effort approach.
524 // Failure to lock the buffers are silent.
525 // mandatory: Mapped host buffers are always locked and failures to lock
526 // a buffer results in a fatal error.
527 StringEnvar
OMPX_LockMappedBuffers("LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS",
531 if (StringParser::parse(OMPX_LockMappedBuffers
.get().data(), Enabled
)) {
532 // Parsed as a boolean value. Enable the feature if necessary.
533 LockMappedBuffers
= Enabled
;
534 IgnoreLockMappedFailures
= true;
535 } else if (OMPX_LockMappedBuffers
.get() == "mandatory") {
536 // Enable the feature and failures are fatal.
537 LockMappedBuffers
= true;
538 IgnoreLockMappedFailures
= false;
540 // Disable by default.
541 DP("Invalid value LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS=%s\n",
542 OMPX_LockMappedBuffers
.get().data());
543 LockMappedBuffers
= false;
547 /// Register a buffer that was recently allocated as a locked host buffer.
548 /// None of the already registered pinned allocations should intersect with
549 /// this new one. The registration requires the host pointer in \p HstPtr,
550 /// the device accessible pointer in \p DevAccessiblePtr, and the size of the
551 /// allocation in \p Size. The allocation must be unregistered using the
552 /// unregisterHostBuffer function.
553 Error
registerHostBuffer(void *HstPtr
, void *DevAccessiblePtr
, size_t Size
);
555 /// Unregister a host pinned allocation passing the host pointer which was
556 /// previously registered using the registerHostBuffer function. When calling
557 /// this function, the pinned allocation cannot have any other user and will
558 /// not be unlocked by this function.
559 Error
unregisterHostBuffer(void *HstPtr
);
561 /// Lock the host buffer at \p HstPtr or register a new user if it intersects
562 /// with an already existing one. A partial overlapping with extension is not
563 /// allowed. The function returns the device accessible pointer of the pinned
564 /// buffer. The buffer must be unlocked using the unlockHostBuffer function.
565 Expected
<void *> lockHostBuffer(void *HstPtr
, size_t Size
);
567 /// Unlock the host buffer at \p HstPtr or unregister a user if other users
568 /// are still using the pinned allocation. If this was the last user, the
569 /// pinned allocation is removed from the map and the memory is unlocked.
570 Error
unlockHostBuffer(void *HstPtr
);
572 /// Lock or register a host buffer that was recently mapped by libomptarget.
573 /// This behavior is applied if LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS is
574 /// enabled. Even if not enabled, externally locked buffers are registered
575 /// in order to optimize their transfers.
576 Error
lockMappedHostBuffer(void *HstPtr
, size_t Size
);
578 /// Unlock or unregister a host buffer that was unmapped by libomptarget.
579 Error
unlockUnmappedHostBuffer(void *HstPtr
);
581 /// Return the device accessible pointer associated to the host pinned
582 /// allocation which the \p HstPtr belongs, if any. Return null in case the
583 /// \p HstPtr does not belong to any host pinned allocation. The device
584 /// accessible pointer is the one that devices should use for data transfers
585 /// that involve a host pinned buffer.
586 void *getDeviceAccessiblePtrFromPinnedBuffer(const void *HstPtr
) const {
587 std::shared_lock
<std::shared_mutex
> Lock(Mutex
);
589 // Find the intersecting allocation if any.
590 const EntryTy
*Entry
= findIntersecting(HstPtr
);
594 return advanceVoidPtr(Entry
->DevAccessiblePtr
,
595 getPtrDiff(HstPtr
, Entry
->HstPtr
));
598 /// Check whether a buffer belongs to a registered host pinned allocation.
599 bool isHostPinnedBuffer(const void *HstPtr
) const {
600 std::shared_lock
<std::shared_mutex
> Lock(Mutex
);
602 // Return whether there is an intersecting allocation.
603 return (findIntersecting(const_cast<void *>(HstPtr
)) != nullptr);
607 /// Class implementing common functionalities of offload devices. Each plugin
608 /// should define the specific device class, derive from this generic one, and
609 /// implement the necessary virtual function members.
610 struct GenericDeviceTy
: public DeviceAllocatorTy
{
611 /// Construct a device with its device id within the plugin, the number of
612 /// devices in the plugin and the grid values for that kind of device.
613 GenericDeviceTy(int32_t DeviceId
, int32_t NumDevices
,
614 const llvm::omp::GV
&GridValues
);
616 /// Get the device identifier within the corresponding plugin. Notice that
617 /// this id is not unique between different plugins; they may overlap.
618 int32_t getDeviceId() const { return DeviceId
; }
620 /// Set the context of the device if needed, before calling device-specific
621 /// functions. Plugins may implement this function as a no-op if not needed.
622 virtual Error
setContext() = 0;
624 /// Initialize the device. After this call, the device should be already
625 /// working and ready to accept queries or modifications.
626 Error
init(GenericPluginTy
&Plugin
);
627 virtual Error
initImpl(GenericPluginTy
&Plugin
) = 0;
629 /// Deinitialize the device and free all its resources. After this call, the
630 /// device is no longer considered ready, so no queries or modifications are
632 Error
deinit(GenericPluginTy
&Plugin
);
633 virtual Error
deinitImpl() = 0;
635 /// Load the binary image into the device and return the target table.
636 Expected
<DeviceImageTy
*> loadBinary(GenericPluginTy
&Plugin
,
637 const __tgt_device_image
*TgtImage
);
638 virtual Expected
<DeviceImageTy
*>
639 loadBinaryImpl(const __tgt_device_image
*TgtImage
, int32_t ImageId
) = 0;
641 /// Setup the device environment if needed. Notice this setup may not be run
642 /// on some plugins. By default, it will be executed, but plugins can change
643 /// this behavior by overriding the shouldSetupDeviceEnvironment function.
644 Error
setupDeviceEnvironment(GenericPluginTy
&Plugin
, DeviceImageTy
&Image
);
646 /// Setup the global device memory pool, if the plugin requires one.
647 Error
setupDeviceMemoryPool(GenericPluginTy
&Plugin
, DeviceImageTy
&Image
,
650 // Setup the RPC server for this device if needed. This may not run on some
651 // plugins like the CPU targets. By default, it will not be executed so it is
652 // up to the target to override this using the shouldSetupRPCServer function.
653 Error
setupRPCServer(GenericPluginTy
&Plugin
, DeviceImageTy
&Image
);
655 /// Synchronize the current thread with the pending operations on the
656 /// __tgt_async_info structure.
657 Error
synchronize(__tgt_async_info
*AsyncInfo
);
658 virtual Error
synchronizeImpl(__tgt_async_info
&AsyncInfo
) = 0;
660 /// Invokes any global constructors on the device if present and is required
662 virtual Error
callGlobalConstructors(GenericPluginTy
&Plugin
,
663 DeviceImageTy
&Image
) {
664 return Error::success();
667 /// Invokes any global destructors on the device if present and is required
669 virtual Error
callGlobalDestructors(GenericPluginTy
&Plugin
,
670 DeviceImageTy
&Image
) {
671 return Error::success();
674 /// Query for the completion of the pending operations on the __tgt_async_info
675 /// structure in a non-blocking manner.
676 Error
queryAsync(__tgt_async_info
*AsyncInfo
);
677 virtual Error
queryAsyncImpl(__tgt_async_info
&AsyncInfo
) = 0;
679 /// Check whether the architecture supports VA management
680 virtual bool supportVAManagement() const { return false; }
682 /// Get the total device memory size
683 virtual Error
getDeviceMemorySize(uint64_t &DSize
);
685 /// Allocates \p RSize bytes (rounded up to page size) and hints the driver to
686 /// map it to \p VAddr. The obtained address is stored in \p Addr. At return
687 /// \p RSize contains the actual size which can be equal or larger than the
689 virtual Error
memoryVAMap(void **Addr
, void *VAddr
, size_t *RSize
);
691 /// De-allocates device memory and unmaps the virtual address \p VAddr
692 virtual Error
memoryVAUnMap(void *VAddr
, size_t Size
);
694 /// Allocate data on the device or involving the device.
695 Expected
<void *> dataAlloc(int64_t Size
, void *HostPtr
, TargetAllocTy Kind
);
697 /// Deallocate data from the device or involving the device.
698 Error
dataDelete(void *TgtPtr
, TargetAllocTy Kind
);
700 /// Pin host memory to optimize transfers and return the device accessible
701 /// pointer that devices should use for memory transfers involving the host
702 /// pinned allocation.
703 Expected
<void *> dataLock(void *HstPtr
, int64_t Size
) {
704 return PinnedAllocs
.lockHostBuffer(HstPtr
, Size
);
707 /// Unpin a host memory buffer that was previously pinned.
708 Error
dataUnlock(void *HstPtr
) {
709 return PinnedAllocs
.unlockHostBuffer(HstPtr
);
712 /// Lock the host buffer \p HstPtr with \p Size bytes with the vendor-specific
713 /// API and return the device accessible pointer.
714 virtual Expected
<void *> dataLockImpl(void *HstPtr
, int64_t Size
) = 0;
716 /// Unlock a previously locked host buffer starting at \p HstPtr.
717 virtual Error
dataUnlockImpl(void *HstPtr
) = 0;
719 /// Mark the host buffer with address \p HstPtr and \p Size bytes as a mapped
720 /// buffer. This means that libomptarget created a new mapping of that host
721 /// buffer (e.g., because a user OpenMP target map) and the buffer may be used
722 /// as source/destination of memory transfers. We can use this information to
723 /// lock the host buffer and optimize its memory transfers.
724 Error
notifyDataMapped(void *HstPtr
, int64_t Size
) {
725 return PinnedAllocs
.lockMappedHostBuffer(HstPtr
, Size
);
728 /// Mark the host buffer with address \p HstPtr as unmapped. This means that
729 /// libomptarget removed an existing mapping. If the plugin locked the buffer
730 /// in notifyDataMapped, this function should unlock it.
731 Error
notifyDataUnmapped(void *HstPtr
) {
732 return PinnedAllocs
.unlockUnmappedHostBuffer(HstPtr
);
735 /// Check whether the host buffer with address \p HstPtr is pinned by the
736 /// underlying vendor-specific runtime (if any). Retrieve the host pointer,
737 /// the device accessible pointer and the size of the original pinned buffer.
738 virtual Expected
<bool> isPinnedPtrImpl(void *HstPtr
, void *&BaseHstPtr
,
739 void *&BaseDevAccessiblePtr
,
740 size_t &BaseSize
) const = 0;
742 /// Submit data to the device (host to device transfer).
743 Error
dataSubmit(void *TgtPtr
, const void *HstPtr
, int64_t Size
,
744 __tgt_async_info
*AsyncInfo
);
745 virtual Error
dataSubmitImpl(void *TgtPtr
, const void *HstPtr
, int64_t Size
,
746 AsyncInfoWrapperTy
&AsyncInfoWrapper
) = 0;
748 /// Retrieve data from the device (device to host transfer).
749 Error
dataRetrieve(void *HstPtr
, const void *TgtPtr
, int64_t Size
,
750 __tgt_async_info
*AsyncInfo
);
751 virtual Error
dataRetrieveImpl(void *HstPtr
, const void *TgtPtr
, int64_t Size
,
752 AsyncInfoWrapperTy
&AsyncInfoWrapper
) = 0;
754 /// Exchange data between devices (device to device transfer). Calling this
755 /// function is only valid if GenericPlugin::isDataExchangable() passing the
756 /// two devices returns true.
757 Error
dataExchange(const void *SrcPtr
, GenericDeviceTy
&DstDev
, void *DstPtr
,
758 int64_t Size
, __tgt_async_info
*AsyncInfo
);
759 virtual Error
dataExchangeImpl(const void *SrcPtr
, GenericDeviceTy
&DstDev
,
760 void *DstPtr
, int64_t Size
,
761 AsyncInfoWrapperTy
&AsyncInfoWrapper
) = 0;
763 /// Run the kernel associated with \p EntryPtr
764 Error
launchKernel(void *EntryPtr
, void **ArgPtrs
, ptrdiff_t *ArgOffsets
,
765 KernelArgsTy
&KernelArgs
, __tgt_async_info
*AsyncInfo
);
767 /// Initialize a __tgt_async_info structure. Related to interop features.
768 Error
initAsyncInfo(__tgt_async_info
**AsyncInfoPtr
);
769 virtual Error
initAsyncInfoImpl(AsyncInfoWrapperTy
&AsyncInfoWrapper
) = 0;
771 /// Initialize a __tgt_device_info structure. Related to interop features.
772 Error
initDeviceInfo(__tgt_device_info
*DeviceInfo
);
773 virtual Error
initDeviceInfoImpl(__tgt_device_info
*DeviceInfo
) = 0;
776 Error
createEvent(void **EventPtrStorage
);
777 virtual Error
createEventImpl(void **EventPtrStorage
) = 0;
779 /// Destroy an event.
780 Error
destroyEvent(void *Event
);
781 virtual Error
destroyEventImpl(void *EventPtr
) = 0;
783 /// Start the recording of the event.
784 Error
recordEvent(void *Event
, __tgt_async_info
*AsyncInfo
);
785 virtual Error
recordEventImpl(void *EventPtr
,
786 AsyncInfoWrapperTy
&AsyncInfoWrapper
) = 0;
788 /// Wait for an event to finish. Notice this wait is asynchronous if the
789 /// __tgt_async_info is not nullptr.
790 Error
waitEvent(void *Event
, __tgt_async_info
*AsyncInfo
);
791 virtual Error
waitEventImpl(void *EventPtr
,
792 AsyncInfoWrapperTy
&AsyncInfoWrapper
) = 0;
794 /// Synchronize the current thread with the event.
795 Error
syncEvent(void *EventPtr
);
796 virtual Error
syncEventImpl(void *EventPtr
) = 0;
798 /// Print information about the device.
800 virtual Error
obtainInfoImpl(InfoQueueTy
&Info
) = 0;
802 /// Getters of the grid values.
803 uint32_t getWarpSize() const { return GridValues
.GV_Warp_Size
; }
804 uint32_t getThreadLimit() const { return GridValues
.GV_Max_WG_Size
; }
805 uint32_t getBlockLimit() const { return GridValues
.GV_Max_Teams
; }
806 uint32_t getDefaultNumThreads() const {
807 return GridValues
.GV_Default_WG_Size
;
809 uint32_t getDefaultNumBlocks() const {
810 return GridValues
.GV_Default_Num_Teams
;
812 uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize
; }
813 virtual uint64_t getClockFrequency() const { return CLOCKS_PER_SEC
; }
815 /// Get target compute unit kind (e.g., sm_80, or gfx908).
816 virtual std::string
getComputeUnitKind() const { return "unknown"; }
818 /// Post processing after jit backend. The ownership of \p MB will be taken.
819 virtual Expected
<std::unique_ptr
<MemoryBuffer
>>
820 doJITPostProcessing(std::unique_ptr
<MemoryBuffer
> MB
) const {
821 return std::move(MB
);
824 /// The minimum number of threads we use for a low-trip count combined loop.
825 /// Instead of using more threads we increase the outer (block/team)
827 /// @see OMPX_MinThreadsForLowTripCount
828 virtual uint32_t getMinThreadsForLowTripCountLoop() {
829 return OMPX_MinThreadsForLowTripCount
;
832 /// Get the total amount of hardware parallelism supported by the target
833 /// device. This is the total amount of warps or wavefronts that can be
834 /// resident on the device simultaneously.
835 virtual uint64_t getHardwareParallelism() const { return 0; }
837 /// Get the RPC server running on this device.
838 RPCServerTy
*getRPCServer() const { return RPCServer
; }
840 /// The number of parallel RPC ports to use on the device. In general, this
841 /// should be roughly equivalent to the amount of hardware parallelism the
842 /// device can support. This is because GPUs in general do not have forward
843 /// progress guarantees, so we minimize thread level dependencies by
844 /// allocating enough space such that each device thread can have a port. This
845 /// is likely overly pessimistic in the average case, but guarantees no
846 /// deadlocks at the cost of memory. This must be overloaded by targets
847 /// expecting to use the RPC server.
848 virtual uint64_t requestedRPCPortCount() const {
849 assert(!shouldSetupRPCServer() && "Default implementation cannot be used");
853 virtual Error
getDeviceStackSize(uint64_t &V
) = 0;
855 /// Returns true if current plugin architecture is an APU
856 /// and unified_shared_memory was not requested by the program.
857 bool useAutoZeroCopy();
858 virtual bool useAutoZeroCopyImpl() { return false; }
860 /// Allocate and construct a kernel object.
861 virtual Expected
<GenericKernelTy
&> constructKernel(const char *Name
) = 0;
864 /// Get and set the stack size and heap size for the device. If not used, the
865 /// plugin can implement the setters as no-op and setting the output
866 /// value to zero for the getters.
867 virtual Error
setDeviceStackSize(uint64_t V
) = 0;
868 virtual Error
getDeviceHeapSize(uint64_t &V
) = 0;
869 virtual Error
setDeviceHeapSize(uint64_t V
) = 0;
871 /// Indicate whether the device should setup the device environment. Notice
872 /// that returning false in this function will change the behavior of the
873 /// setupDeviceEnvironment() function.
874 virtual bool shouldSetupDeviceEnvironment() const { return true; }
876 /// Indicate whether the device should setup the global device memory pool. If
877 /// false is return the value on the device will be uninitialized.
878 virtual bool shouldSetupDeviceMemoryPool() const { return true; }
880 /// Indicate whether or not the device should setup the RPC server. This is
881 /// only necessary for unhosted targets like the GPU.
882 virtual bool shouldSetupRPCServer() const { return false; }
884 /// Pointer to the memory manager or nullptr if not available.
885 MemoryManagerTy
*MemoryManager
;
887 /// Environment variables defined by the OpenMP standard.
888 Int32Envar OMP_TeamLimit
;
889 Int32Envar OMP_NumTeams
;
890 Int32Envar OMP_TeamsThreadLimit
;
892 /// Environment variables defined by the LLVM OpenMP implementation.
893 Int32Envar OMPX_DebugKind
;
894 UInt32Envar OMPX_SharedMemorySize
;
895 UInt64Envar OMPX_TargetStackSize
;
896 UInt64Envar OMPX_TargetHeapSize
;
898 /// Environment flag to set the minimum number of threads we use for a
899 /// low-trip count combined loop. Instead of using more threads we increase
900 /// the outer (block/team) parallelism.
901 UInt32Envar OMPX_MinThreadsForLowTripCount
=
902 UInt32Envar("LIBOMPTARGET_MIN_THREADS_FOR_LOW_TRIP_COUNT", 32);
905 /// Environment variables defined by the LLVM OpenMP implementation
906 /// regarding the initial number of streams and events.
907 UInt32Envar OMPX_InitialNumStreams
;
908 UInt32Envar OMPX_InitialNumEvents
;
910 /// Array of images loaded into the device. Images are automatically
911 /// deallocated by the allocator.
912 llvm::SmallVector
<DeviceImageTy
*> LoadedImages
;
914 /// The identifier of the device within the plugin. Notice this is not a
915 /// global device id and is not the device id visible to the OpenMP user.
916 const int32_t DeviceId
;
918 /// The default grid values used for this device.
919 llvm::omp::GV GridValues
;
921 /// Enumeration used for representing the current state between two devices
922 /// two devices (both under the same plugin) for the peer access between them.
923 /// The states can be a) PENDING when the state has not been queried and needs
924 /// to be queried, b) AVAILABLE when the peer access is available to be used,
925 /// and c) UNAVAILABLE if the system does not allow it.
926 enum class PeerAccessState
: uint8_t { AVAILABLE
, UNAVAILABLE
, PENDING
};
928 /// Array of peer access states with the rest of devices. This means that if
929 /// the device I has a matrix PeerAccesses with PeerAccesses[J] == AVAILABLE,
930 /// the device I can access device J's memory directly. However, notice this
931 /// does not mean that device J can access device I's memory directly.
932 llvm::SmallVector
<PeerAccessState
> PeerAccesses
;
933 std::mutex PeerAccessesLock
;
935 /// Map of host pinned allocations used for optimize device transfers.
936 PinnedAllocationMapTy PinnedAllocs
;
938 /// A pointer to an RPC server instance attached to this device if present.
939 /// This is used to run the RPC server during task synchronization.
940 RPCServerTy
*RPCServer
;
943 /// OMPT callback functions
944 #define defineOmptCallback(Name, Type, Code) Name##_t Name##_fn = nullptr;
945 FOREACH_OMPT_DEVICE_EVENT(defineOmptCallback
)
946 #undef defineOmptCallback
948 /// Internal representation for OMPT device (initialize & finalize)
949 std::atomic
<bool> OmptInitialized
;
953 DeviceMemoryPoolTy DeviceMemoryPool
= {nullptr, 0};
954 DeviceMemoryPoolTrackingTy DeviceMemoryPoolTracking
= {0, 0, ~0U, 0};
957 /// Class implementing common functionalities of offload plugins. Each plugin
958 /// should define the specific plugin class, derive from this generic one, and
959 /// implement the necessary virtual function members.
960 struct GenericPluginTy
{
962 /// Construct a plugin instance.
963 GenericPluginTy(Triple::ArchType TA
)
964 : RequiresFlags(OMP_REQ_UNDEFINED
), GlobalHandler(nullptr), JIT(TA
),
965 RPCServer(nullptr) {}
967 virtual ~GenericPluginTy() {}
969 /// Initialize the plugin.
972 /// Initialize the plugin and return the number of available devices.
973 virtual Expected
<int32_t> initImpl() = 0;
975 /// Deinitialize the plugin and release the resources.
977 virtual Error
deinitImpl() = 0;
979 /// Get the reference to the device with a certain device id.
980 GenericDeviceTy
&getDevice(int32_t DeviceId
) {
981 assert(isValidDeviceId(DeviceId
) && "Invalid device id");
982 assert(Devices
[DeviceId
] && "Device is unitialized");
984 return *Devices
[DeviceId
];
987 /// Get the number of active devices.
988 int32_t getNumDevices() const { return NumDevices
; }
990 /// Get the plugin-specific device identifier offset.
991 int32_t getDeviceIdStartIndex() const { return DeviceIdStartIndex
; }
993 /// Set the plugin-specific device identifier offset.
994 void setDeviceIdStartIndex(int32_t Offset
) { DeviceIdStartIndex
= Offset
; }
996 /// Get the ELF code to recognize the binary image of this plugin.
997 virtual uint16_t getMagicElfBits() const = 0;
999 /// Get the target triple of this plugin.
1000 virtual Triple::ArchType
getTripleArch() const = 0;
1002 /// Allocate a structure using the internal allocator.
1003 template <typename Ty
> Ty
*allocate() {
1004 return reinterpret_cast<Ty
*>(Allocator
.Allocate(sizeof(Ty
), alignof(Ty
)));
1007 /// Get the reference to the global handler of this plugin.
1008 GenericGlobalHandlerTy
&getGlobalHandler() {
1009 assert(GlobalHandler
&& "Global handler not initialized");
1010 return *GlobalHandler
;
1013 /// Get the reference to the JIT used for all devices connected to this
1015 JITEngine
&getJIT() { return JIT
; }
1017 /// Get a reference to the RPC server used to provide host services.
1018 RPCServerTy
&getRPCServer() {
1019 assert(RPCServer
&& "RPC server not initialized");
1023 /// Get the OpenMP requires flags set for this plugin.
1024 int64_t getRequiresFlags() const { return RequiresFlags
; }
1026 /// Set the OpenMP requires flags for this plugin.
1027 void setRequiresFlag(int64_t Flags
) { RequiresFlags
= Flags
; }
1029 /// Initialize a device within the plugin.
1030 Error
initDevice(int32_t DeviceId
);
1032 /// Deinitialize a device within the plugin and release its resources.
1033 Error
deinitDevice(int32_t DeviceId
);
1035 /// Indicate whether data can be exchanged directly between two devices under
1036 /// this same plugin. If this function returns true, it's safe to call the
1037 /// GenericDeviceTy::exchangeData() function on the source device.
1038 virtual bool isDataExchangable(int32_t SrcDeviceId
, int32_t DstDeviceId
) {
1039 return isValidDeviceId(SrcDeviceId
) && isValidDeviceId(DstDeviceId
);
1042 /// Top level interface to verify if a given ELF image can be executed on a
1043 /// given target. Returns true if the \p Image is compatible with the plugin.
1044 Expected
<bool> checkELFImage(StringRef Image
) const;
1046 /// Indicate if an image is compatible with the plugin devices. Notice that
1047 /// this function may be called before actually initializing the devices. So
1048 /// we could not move this function into GenericDeviceTy.
1049 virtual Expected
<bool> isELFCompatible(StringRef Image
) const = 0;
1051 /// Indicate whether the plugin supports empty images.
1052 virtual bool supportsEmptyImages() const { return false; }
1055 /// Indicate whether a device id is valid.
1056 bool isValidDeviceId(int32_t DeviceId
) const {
1057 return (DeviceId
>= 0 && DeviceId
< getNumDevices());
1061 /// Number of devices available for the plugin.
1062 int32_t NumDevices
= 0;
1064 /// Index offset, which when added to a DeviceId, will yield a unique
1065 /// user-observable device identifier. This is especially important when
1066 /// DeviceIds of multiple plugins / RTLs need to be distinguishable.
1067 int32_t DeviceIdStartIndex
= 0;
1069 /// Array of pointers to the devices. Initially, they are all set to nullptr.
1070 /// Once a device is initialized, the pointer is stored in the position given
1071 /// by its device id. A position with nullptr means that the corresponding
1072 /// device was not initialized yet.
1073 llvm::SmallVector
<GenericDeviceTy
*> Devices
;
1075 /// OpenMP requires flags.
1076 int64_t RequiresFlags
;
1078 /// Pointer to the global handler for this plugin.
1079 GenericGlobalHandlerTy
*GlobalHandler
;
1081 /// Internal allocator for different structures.
1082 BumpPtrAllocator Allocator
;
1084 /// The JIT engine shared by all devices connected to this plugin.
1087 /// The interface between the plugin and the GPU for host services.
1088 RPCServerTy
*RPCServer
;
1091 /// Class for simplifying the getter operation of the plugin. Anywhere on the
1092 /// code, the current plugin can be retrieved by Plugin::get(). The class also
1093 /// declares functions to create plugin-specific object instances. The check(),
1094 /// createPlugin(), createDevice() and createGlobalHandler() functions should be
1095 /// defined by each plugin implementation.
1097 // Reference to the plugin instance.
1098 static GenericPluginTy
*SpecificPlugin
;
1101 if (auto Err
= init())
1102 REPORT("Failed to initialize plugin: %s\n",
1103 toString(std::move(Err
)).data());
1107 if (auto Err
= deinit())
1108 REPORT("Failed to deinitialize plugin: %s\n",
1109 toString(std::move(Err
)).data());
1112 Plugin(const Plugin
&) = delete;
1113 void operator=(const Plugin
&) = delete;
1115 /// Create and intialize the plugin instance.
1116 static Error
init() {
1117 assert(!SpecificPlugin
&& "Plugin already created");
1119 // Create the specific plugin.
1120 SpecificPlugin
= createPlugin();
1121 assert(SpecificPlugin
&& "Plugin was not created");
1123 // Initialize the plugin.
1124 return SpecificPlugin
->init();
1127 // Deinitialize and destroy the plugin instance.
1128 static Error
deinit() {
1129 assert(SpecificPlugin
&& "Plugin no longer valid");
1131 for (int32_t DevNo
= 0, NumDev
= SpecificPlugin
->getNumDevices();
1132 DevNo
< NumDev
; ++DevNo
)
1133 if (auto Err
= SpecificPlugin
->deinitDevice(DevNo
))
1136 // Deinitialize the plugin.
1137 if (auto Err
= SpecificPlugin
->deinit())
1140 // Delete the plugin instance.
1141 delete SpecificPlugin
;
1143 // Invalidate the plugin reference.
1144 SpecificPlugin
= nullptr;
1146 return Plugin::success();
1150 /// Initialize the plugin if needed. The plugin could have been initialized by
1151 /// a previous call to Plugin::get().
1152 static Error
initIfNeeded() {
1153 // Trigger the initialization if needed.
1156 return Error::success();
1159 /// Get a reference (or create if it was not created) to the plugin instance.
1160 static GenericPluginTy
&get() {
1161 // This static variable will initialize the underlying plugin instance in
1162 // case there was no previous explicit initialization. The initialization is
1164 static Plugin Plugin
;
1166 assert(SpecificPlugin
&& "Plugin is not active");
1167 return *SpecificPlugin
;
1170 /// Get a reference to the plugin with a specific plugin-specific type.
1171 template <typename Ty
> static Ty
&get() { return static_cast<Ty
&>(get()); }
1173 /// Indicate whether the plugin is active.
1174 static bool isActive() { return SpecificPlugin
!= nullptr; }
1176 /// Create a success error. This is the same as calling Error::success(), but
1177 /// it is recommended to use this one for consistency with Plugin::error() and
1178 /// Plugin::check().
1179 static Error
success() { return Error::success(); }
1181 /// Create a string error.
1182 template <typename
... ArgsTy
>
1183 static Error
error(const char *ErrFmt
, ArgsTy
... Args
) {
1184 return createStringError(inconvertibleErrorCode(), ErrFmt
, Args
...);
1187 /// Check the plugin-specific error code and return an error or success
1188 /// accordingly. In case of an error, create a string error with the error
1189 /// description. The ErrFmt should follow the format:
1190 /// "Error in <function name>[<optional info>]: %s"
1191 /// The last format specifier "%s" is mandatory and will be used to place the
1192 /// error code's description. Notice this function should be only called from
1193 /// the plugin-specific code.
1194 template <typename
... ArgsTy
>
1195 static Error
check(int32_t ErrorCode
, const char *ErrFmt
, ArgsTy
... Args
);
1197 /// Create a plugin instance.
1198 static GenericPluginTy
*createPlugin();
1200 /// Create a plugin-specific device.
1201 static GenericDeviceTy
*createDevice(int32_t DeviceId
, int32_t NumDevices
);
1203 /// Create a plugin-specific global handler.
1204 static GenericGlobalHandlerTy
*createGlobalHandler();
1207 /// Auxiliary interface class for GenericDeviceResourceManagerTy. This class
1208 /// acts as a reference to a device resource, such as a stream, and requires
1209 /// some basic functions to be implemented. The derived class should define an
1210 /// empty constructor that creates an empty and invalid resource reference. Do
1211 /// not create a new resource on the ctor, but on the create() function instead.
1213 /// The derived class should also define the type HandleTy as the underlying
1214 /// resource handle type. For instance, in a CUDA stream it would be:
1215 /// using HandleTy = CUstream;
1216 struct GenericDeviceResourceRef
{
1217 /// Create a new resource and stores a reference.
1218 virtual Error
create(GenericDeviceTy
&Device
) = 0;
1220 /// Destroy and release the resources pointed by the reference.
1221 virtual Error
destroy(GenericDeviceTy
&Device
) = 0;
1224 ~GenericDeviceResourceRef() = default;
1227 /// Class that implements a resource pool belonging to a device. This class
1228 /// operates with references to the actual resources. These reference must
1229 /// derive from the GenericDeviceResourceRef class and implement the create
1230 /// and destroy virtual functions.
1231 template <typename ResourceRef
> class GenericDeviceResourceManagerTy
{
1232 using ResourcePoolTy
= GenericDeviceResourceManagerTy
<ResourceRef
>;
1233 using ResourceHandleTy
= typename
ResourceRef::HandleTy
;
1236 /// Create an empty resource pool for a specific device.
1237 GenericDeviceResourceManagerTy(GenericDeviceTy
&Device
)
1238 : Device(Device
), NextAvailable(0) {}
1240 /// Destroy the resource pool. At this point, the deinit() function should
1241 /// already have been executed so the resource pool should be empty.
1242 virtual ~GenericDeviceResourceManagerTy() {
1243 assert(ResourcePool
.empty() && "Resource pool not empty");
1246 /// Initialize the resource pool.
1247 Error
init(uint32_t InitialSize
) {
1248 assert(ResourcePool
.empty() && "Resource pool already initialized");
1249 return ResourcePoolTy::resizeResourcePool(InitialSize
);
1252 /// Deinitialize the resource pool and delete all resources. This function
1253 /// must be called before the destructor.
1254 virtual Error
deinit() {
1256 DP("Missing %d resources to be returned\n", NextAvailable
);
1258 // TODO: This prevents a bug on libomptarget to make the plugins fail. There
1259 // may be some resources not returned. Do not destroy these ones.
1260 if (auto Err
= ResourcePoolTy::resizeResourcePool(NextAvailable
))
1263 ResourcePool
.clear();
1265 return Plugin::success();
1268 /// Get a resource from the pool or create new ones. If the function
1269 /// succeeds, the handle to the resource is saved in \p Handle.
1270 virtual Error
getResource(ResourceHandleTy
&Handle
) {
1271 // Get a resource with an empty resource processor.
1272 return getResourcesImpl(1, &Handle
,
1273 [](ResourceHandleTy
) { return Plugin::success(); });
1276 /// Get multiple resources from the pool or create new ones. If the function
1277 /// succeeds, the handles to the resources are saved in \p Handles.
1278 virtual Error
getResources(uint32_t Num
, ResourceHandleTy
*Handles
) {
1279 // Get resources with an empty resource processor.
1280 return getResourcesImpl(Num
, Handles
,
1281 [](ResourceHandleTy
) { return Plugin::success(); });
1284 /// Return resource to the pool.
1285 virtual Error
returnResource(ResourceHandleTy Handle
) {
1286 // Return a resource with an empty resource processor.
1287 return returnResourceImpl(
1288 Handle
, [](ResourceHandleTy
) { return Plugin::success(); });
1292 /// Get multiple resources from the pool or create new ones. If the function
1293 /// succeeds, the handles to the resources are saved in \p Handles. Also
1294 /// process each of the obtained resources with \p Processor.
1295 template <typename FuncTy
>
1296 Error
getResourcesImpl(uint32_t Num
, ResourceHandleTy
*Handles
,
1298 const std::lock_guard
<std::mutex
> Lock(Mutex
);
1300 assert(NextAvailable
<= ResourcePool
.size() &&
1301 "Resource pool is corrupted");
1303 if (NextAvailable
+ Num
> ResourcePool
.size())
1304 // Double the resource pool or resize it to provide the requested ones.
1305 if (auto Err
= ResourcePoolTy::resizeResourcePool(
1306 std::max(NextAvailable
* 2, NextAvailable
+ Num
)))
1309 // Save the handles in the output array parameter.
1310 for (uint32_t r
= 0; r
< Num
; ++r
)
1311 Handles
[r
] = ResourcePool
[NextAvailable
+ r
];
1313 // Process all obtained resources.
1314 for (uint32_t r
= 0; r
< Num
; ++r
)
1315 if (auto Err
= Processor(Handles
[r
]))
1318 NextAvailable
+= Num
;
1320 return Plugin::success();
1323 /// Return resource to the pool and process the resource with \p Processor.
1324 template <typename FuncTy
>
1325 Error
returnResourceImpl(ResourceHandleTy Handle
, FuncTy Processor
) {
1326 const std::lock_guard
<std::mutex
> Lock(Mutex
);
1328 // Process the returned resource.
1329 if (auto Err
= Processor(Handle
))
1332 assert(NextAvailable
> 0 && "Resource pool is corrupted");
1333 ResourcePool
[--NextAvailable
] = Handle
;
1335 return Plugin::success();
1339 /// The resources between \p OldSize and \p NewSize need to be created or
1340 /// destroyed. The mutex is locked when this function is called.
1341 Error
resizeResourcePoolImpl(uint32_t OldSize
, uint32_t NewSize
) {
1342 assert(OldSize
!= NewSize
&& "Resizing to the same size");
1344 if (auto Err
= Device
.setContext())
1347 if (OldSize
< NewSize
) {
1348 // Create new resources.
1349 for (uint32_t I
= OldSize
; I
< NewSize
; ++I
) {
1350 if (auto Err
= ResourcePool
[I
].create(Device
))
1354 // Destroy the obsolete resources.
1355 for (uint32_t I
= NewSize
; I
< OldSize
; ++I
) {
1356 if (auto Err
= ResourcePool
[I
].destroy(Device
))
1360 return Plugin::success();
1363 /// Increase or decrease the number of resources. This function should
1364 /// be called with the mutex acquired.
1365 Error
resizeResourcePool(uint32_t NewSize
) {
1366 uint32_t OldSize
= ResourcePool
.size();
1369 if (OldSize
== NewSize
)
1370 return Plugin::success();
1372 if (OldSize
< NewSize
) {
1373 // Increase the number of resources.
1374 ResourcePool
.resize(NewSize
);
1375 return ResourcePoolTy::resizeResourcePoolImpl(OldSize
, NewSize
);
1378 // Decrease the number of resources otherwise.
1379 auto Err
= ResourcePoolTy::resizeResourcePoolImpl(OldSize
, NewSize
);
1380 ResourcePool
.resize(NewSize
);
1385 /// The device to which the resources belong
1386 GenericDeviceTy
&Device
;
1388 /// Mutex for the resource pool.
1391 /// The next available resource in the pool.
1392 uint32_t NextAvailable
;
1394 /// The actual resource pool.
1395 std::deque
<ResourceRef
> ResourcePool
;
1398 /// A static check on whether or not we support RPC in libomptarget.
1399 const bool libomptargetSupportsRPC();
1401 } // namespace plugin
1402 } // namespace target
1406 #endif // OPENMP_LIBOMPTARGET_PLUGINS_COMMON_PLUGININTERFACE_H