Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / openmp / libomptarget / plugins-nextgen / common / PluginInterface / PluginInterface.h
blobd1294405c04b38f38c6a1041ddafa5a97cd65099
1 //===- PluginInterface.h - Target independent plugin device interface -----===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
11 #ifndef OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_PLUGININTERFACE_H
12 #define OPENMP_LIBOMPTARGET_PLUGINS_NEXTGEN_COMMON_PLUGININTERFACE_H
14 #include <cstddef>
15 #include <cstdint>
16 #include <deque>
17 #include <list>
18 #include <map>
19 #include <shared_mutex>
20 #include <vector>
22 #include "Debug.h"
23 #include "Environment.h"
24 #include "GlobalHandler.h"
25 #include "JIT.h"
26 #include "MemoryManager.h"
27 #include "RPC.h"
28 #include "Utilities.h"
29 #include "omptarget.h"
31 #ifdef OMPT_SUPPORT
32 #include "omp-tools.h"
33 #endif
35 #include "llvm/ADT/SmallVector.h"
36 #include "llvm/Frontend/OpenMP/OMPConstants.h"
37 #include "llvm/Frontend/OpenMP/OMPGridValues.h"
38 #include "llvm/Support/Allocator.h"
39 #include "llvm/Support/Error.h"
40 #include "llvm/Support/ErrorHandling.h"
41 #include "llvm/Support/MemoryBufferRef.h"
42 #include "llvm/Support/raw_ostream.h"
43 #include "llvm/TargetParser/Triple.h"
45 namespace llvm {
46 namespace omp {
47 namespace target {
49 namespace plugin {
51 struct GenericPluginTy;
52 struct GenericKernelTy;
53 struct GenericDeviceTy;
55 /// Class that wraps the __tgt_async_info to simply its usage. In case the
56 /// object is constructed without a valid __tgt_async_info, the object will use
57 /// an internal one and will synchronize the current thread with the pending
58 /// operations when calling AsyncInfoWrapperTy::finalize(). This latter function
59 /// must be called before destroying the wrapper object.
60 struct AsyncInfoWrapperTy {
61 AsyncInfoWrapperTy(GenericDeviceTy &Device, __tgt_async_info *AsyncInfoPtr);
63 ~AsyncInfoWrapperTy() {
64 assert(!AsyncInfoPtr && "AsyncInfoWrapperTy not finalized");
67 /// Get the raw __tgt_async_info pointer.
68 operator __tgt_async_info *() const { return AsyncInfoPtr; }
70 /// Indicate whether there is queue.
71 bool hasQueue() const { return (AsyncInfoPtr->Queue != nullptr); }
73 /// Get the queue.
74 template <typename Ty> Ty getQueueAs() {
75 static_assert(sizeof(Ty) == sizeof(AsyncInfoPtr->Queue),
76 "Queue is not of the same size as target type");
77 return static_cast<Ty>(AsyncInfoPtr->Queue);
80 /// Set the queue.
81 template <typename Ty> void setQueueAs(Ty Queue) {
82 static_assert(sizeof(Ty) == sizeof(AsyncInfoPtr->Queue),
83 "Queue is not of the same size as target type");
84 assert(!AsyncInfoPtr->Queue && "Overwriting queue");
85 AsyncInfoPtr->Queue = Queue;
88 /// Synchronize with the __tgt_async_info's pending operations if it's the
89 /// internal async info. The error associated to the aysnchronous operations
90 /// issued in this queue must be provided in \p Err. This function will update
91 /// the error parameter with the result of the synchronization if it was
92 /// actually executed. This function must be called before destroying the
93 /// object and only once.
94 void finalize(Error &Err);
96 /// Register \p Ptr as an associated alloction that is freed after
97 /// finalization.
98 void freeAllocationAfterSynchronization(void *Ptr) {
99 AsyncInfoPtr->AssociatedAllocations.push_back(Ptr);
102 private:
103 GenericDeviceTy &Device;
104 __tgt_async_info LocalAsyncInfo;
105 __tgt_async_info *AsyncInfoPtr;
108 /// The information level represents the level of a key-value property in the
109 /// info tree print (i.e. indentation). The first level should be the default.
110 enum InfoLevelKind { InfoLevel1 = 1, InfoLevel2, InfoLevel3 };
112 /// Class for storing device information and later be printed. An object of this
113 /// type acts as a queue of key-value properties. Each property has a key, a
114 /// a value, and an optional unit for the value. For printing purposes, the
115 /// information can be classified into several levels. These levels are useful
116 /// for defining sections and subsections. Thus, each key-value property also
117 /// has an additional field indicating to which level belongs to. Notice that
118 /// we use the level to determine the indentation of the key-value property at
119 /// printing time. See the enum InfoLevelKind for the list of accepted levels.
120 class InfoQueueTy {
121 struct InfoQueueEntryTy {
122 std::string Key;
123 std::string Value;
124 std::string Units;
125 uint64_t Level;
128 std::deque<InfoQueueEntryTy> Queue;
130 public:
131 /// Add a new info entry to the queue. The entry requires at least a key
132 /// string in \p Key. The value in \p Value is optional and can be any type
133 /// that is representable as a string. The units in \p Units is optional and
134 /// must be a string. The info level is a template parameter that defaults to
135 /// the first level (top level).
136 template <InfoLevelKind L = InfoLevel1, typename T = std::string>
137 void add(const std::string &Key, T Value = T(),
138 const std::string &Units = std::string()) {
139 assert(!Key.empty() && "Invalid info key");
141 // Convert the value to a string depending on its type.
142 if constexpr (std::is_same_v<T, bool>)
143 Queue.push_back({Key, Value ? "Yes" : "No", Units, L});
144 else if constexpr (std::is_arithmetic_v<T>)
145 Queue.push_back({Key, std::to_string(Value), Units, L});
146 else
147 Queue.push_back({Key, Value, Units, L});
150 /// Print all info entries added to the queue.
151 void print() const {
152 // We print four spances for each level.
153 constexpr uint64_t IndentSize = 4;
155 // Find the maximum key length (level + key) to compute the individual
156 // indentation of each entry.
157 uint64_t MaxKeySize = 0;
158 for (const auto &Entry : Queue) {
159 uint64_t KeySize = Entry.Key.size() + Entry.Level * IndentSize;
160 if (KeySize > MaxKeySize)
161 MaxKeySize = KeySize;
164 // Print all info entries.
165 for (const auto &Entry : Queue) {
166 // Compute the indentations for the current entry.
167 uint64_t KeyIndentSize = Entry.Level * IndentSize;
168 uint64_t ValIndentSize =
169 MaxKeySize - (Entry.Key.size() + KeyIndentSize) + IndentSize;
171 llvm::outs() << std::string(KeyIndentSize, ' ') << Entry.Key
172 << std::string(ValIndentSize, ' ') << Entry.Value
173 << (Entry.Units.empty() ? "" : " ") << Entry.Units << "\n";
178 /// Class wrapping a __tgt_device_image and its offload entry table on a
179 /// specific device. This class is responsible for storing and managing
180 /// the offload entries for an image on a device.
181 class DeviceImageTy {
183 /// Class representing the offload entry table. The class stores the
184 /// __tgt_target_table and a map to search in the table faster.
185 struct OffloadEntryTableTy {
186 /// Add new entry to the table.
187 void addEntry(const __tgt_offload_entry &Entry) {
188 Entries.push_back(Entry);
189 TTTablePtr.EntriesBegin = &Entries[0];
190 TTTablePtr.EntriesEnd = TTTablePtr.EntriesBegin + Entries.size();
193 /// Get the raw pointer to the __tgt_target_table.
194 operator __tgt_target_table *() {
195 if (Entries.empty())
196 return nullptr;
197 return &TTTablePtr;
200 private:
201 __tgt_target_table TTTablePtr;
202 llvm::SmallVector<__tgt_offload_entry> Entries;
204 public:
205 using const_iterator = decltype(Entries)::const_iterator;
206 const_iterator begin() const { return Entries.begin(); }
207 const_iterator end() const { return Entries.end(); }
210 /// Image identifier within the corresponding device. Notice that this id is
211 /// not unique between different device; they may overlap.
212 int32_t ImageId;
214 /// The pointer to the raw __tgt_device_image.
215 const __tgt_device_image *TgtImage;
216 const __tgt_device_image *TgtImageBitcode;
218 /// Table of offload entries.
219 OffloadEntryTableTy OffloadEntryTable;
221 public:
222 DeviceImageTy(int32_t Id, const __tgt_device_image *Image)
223 : ImageId(Id), TgtImage(Image), TgtImageBitcode(nullptr) {
224 assert(TgtImage && "Invalid target image");
227 /// Get the image identifier within the device.
228 int32_t getId() const { return ImageId; }
230 /// Get the pointer to the raw __tgt_device_image.
231 const __tgt_device_image *getTgtImage() const { return TgtImage; }
233 void setTgtImageBitcode(const __tgt_device_image *TgtImageBitcode) {
234 this->TgtImageBitcode = TgtImageBitcode;
237 const __tgt_device_image *getTgtImageBitcode() const {
238 return TgtImageBitcode;
241 /// Get the image starting address.
242 void *getStart() const { return TgtImage->ImageStart; }
244 /// Get the image size.
245 size_t getSize() const {
246 return getPtrDiff(TgtImage->ImageEnd, TgtImage->ImageStart);
249 /// Get a memory buffer reference to the whole image.
250 MemoryBufferRef getMemoryBuffer() const {
251 return MemoryBufferRef(StringRef((const char *)getStart(), getSize()),
252 "Image");
255 /// Get a reference to the offload entry table for the image.
256 OffloadEntryTableTy &getOffloadEntryTable() { return OffloadEntryTable; }
259 /// Class implementing common functionalities of offload kernels. Each plugin
260 /// should define the specific kernel class, derive from this generic one, and
261 /// implement the necessary virtual function members.
262 struct GenericKernelTy {
263 /// Construct a kernel with a name and a execution mode.
264 GenericKernelTy(const char *Name)
265 : Name(Name), PreferredNumThreads(0), MaxNumThreads(0) {}
267 virtual ~GenericKernelTy() {}
269 /// Initialize the kernel object from a specific device.
270 Error init(GenericDeviceTy &GenericDevice, DeviceImageTy &Image);
271 virtual Error initImpl(GenericDeviceTy &GenericDevice,
272 DeviceImageTy &Image) = 0;
274 /// Launch the kernel on the specific device. The device must be the same
275 /// one used to initialize the kernel.
276 Error launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
277 ptrdiff_t *ArgOffsets, KernelArgsTy &KernelArgs,
278 AsyncInfoWrapperTy &AsyncInfoWrapper) const;
279 virtual Error launchImpl(GenericDeviceTy &GenericDevice, uint32_t NumThreads,
280 uint64_t NumBlocks, KernelArgsTy &KernelArgs,
281 void *Args,
282 AsyncInfoWrapperTy &AsyncInfoWrapper) const = 0;
284 /// Get the kernel name.
285 const char *getName() const { return Name; }
287 /// Return true if this kernel is a constructor or destructor.
288 bool isCtorOrDtor() const {
289 // TODO: This is not a great solution and should be revisited.
290 return StringRef(Name).endswith("tor");
293 /// Get the kernel image.
294 DeviceImageTy &getImage() const {
295 assert(ImagePtr && "Kernel is not initialized!");
296 return *ImagePtr;
299 /// Return the kernel environment object for kernel \p Name.
300 const KernelEnvironmentTy &getKernelEnvironmentForKernel() {
301 return KernelEnvironment;
304 /// Return a device pointer to a new kernel launch environment.
305 Expected<KernelLaunchEnvironmentTy *>
306 getKernelLaunchEnvironment(GenericDeviceTy &GenericDevice,
307 AsyncInfoWrapperTy &AsyncInfo) const;
309 /// Indicate whether an execution mode is valid.
310 static bool isValidExecutionMode(OMPTgtExecModeFlags ExecutionMode) {
311 switch (ExecutionMode) {
312 case OMP_TGT_EXEC_MODE_SPMD:
313 case OMP_TGT_EXEC_MODE_GENERIC:
314 case OMP_TGT_EXEC_MODE_GENERIC_SPMD:
315 return true;
317 return false;
320 protected:
321 /// Get the execution mode name of the kernel.
322 const char *getExecutionModeName() const {
323 switch (KernelEnvironment.Configuration.ExecMode) {
324 case OMP_TGT_EXEC_MODE_SPMD:
325 return "SPMD";
326 case OMP_TGT_EXEC_MODE_GENERIC:
327 return "Generic";
328 case OMP_TGT_EXEC_MODE_GENERIC_SPMD:
329 return "Generic-SPMD";
331 llvm_unreachable("Unknown execution mode!");
334 /// Prints generic kernel launch information.
335 Error printLaunchInfo(GenericDeviceTy &GenericDevice,
336 KernelArgsTy &KernelArgs, uint32_t NumThreads,
337 uint64_t NumBlocks) const;
339 /// Prints plugin-specific kernel launch information after generic kernel
340 /// launch information
341 virtual Error printLaunchInfoDetails(GenericDeviceTy &GenericDevice,
342 KernelArgsTy &KernelArgs,
343 uint32_t NumThreads,
344 uint64_t NumBlocks) const;
346 private:
347 /// Prepare the arguments before launching the kernel.
348 void *prepareArgs(GenericDeviceTy &GenericDevice, void **ArgPtrs,
349 ptrdiff_t *ArgOffsets, uint32_t &NumArgs,
350 llvm::SmallVectorImpl<void *> &Args,
351 llvm::SmallVectorImpl<void *> &Ptrs,
352 KernelLaunchEnvironmentTy *KernelLaunchEnvironment) const;
354 /// Get the number of threads and blocks for the kernel based on the
355 /// user-defined threads and block clauses.
356 uint32_t getNumThreads(GenericDeviceTy &GenericDevice,
357 uint32_t ThreadLimitClause[3]) const;
359 /// The number of threads \p NumThreads can be adjusted by this method.
360 /// \p IsNumThreadsFromUser is true is \p NumThreads is defined by user via
361 /// thread_limit clause.
362 uint64_t getNumBlocks(GenericDeviceTy &GenericDevice,
363 uint32_t BlockLimitClause[3], uint64_t LoopTripCount,
364 uint32_t &NumThreads, bool IsNumThreadsFromUser) const;
366 /// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode.
367 bool isGenericSPMDMode() const {
368 return KernelEnvironment.Configuration.ExecMode ==
369 OMP_TGT_EXEC_MODE_GENERIC_SPMD;
371 bool isGenericMode() const {
372 return KernelEnvironment.Configuration.ExecMode ==
373 OMP_TGT_EXEC_MODE_GENERIC;
375 bool isSPMDMode() const {
376 return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_SPMD;
379 /// The kernel name.
380 const char *Name;
382 /// The image that contains this kernel.
383 DeviceImageTy *ImagePtr = nullptr;
385 protected:
386 /// The preferred number of threads to run the kernel.
387 uint32_t PreferredNumThreads;
389 /// The maximum number of threads which the kernel could leverage.
390 uint32_t MaxNumThreads;
392 /// The kernel environment, including execution flags.
393 KernelEnvironmentTy KernelEnvironment;
395 /// The prototype kernel launch environment.
396 KernelLaunchEnvironmentTy KernelLaunchEnvironment;
399 /// Class representing a map of host pinned allocations. We track these pinned
400 /// allocations, so memory tranfers invloving these buffers can be optimized.
401 class PinnedAllocationMapTy {
403 /// Struct representing a map entry.
404 struct EntryTy {
405 /// The host pointer of the pinned allocation.
406 void *HstPtr;
408 /// The pointer that devices' driver should use to transfer data from/to the
409 /// pinned allocation. In most plugins, this pointer will be the same as the
410 /// host pointer above.
411 void *DevAccessiblePtr;
413 /// The size of the pinned allocation.
414 size_t Size;
416 /// Indicate whether the allocation was locked from outside the plugin, for
417 /// instance, from the application. The externally locked allocations are
418 /// not unlocked by the plugin when unregistering the last user.
419 bool ExternallyLocked;
421 /// The number of references to the pinned allocation. The allocation should
422 /// remain pinned and registered to the map until the number of references
423 /// becomes zero.
424 mutable size_t References;
426 /// Create an entry with the host and device acessible pointers, the buffer
427 /// size, and a boolean indicating whether the buffer was locked externally.
428 EntryTy(void *HstPtr, void *DevAccessiblePtr, size_t Size,
429 bool ExternallyLocked)
430 : HstPtr(HstPtr), DevAccessiblePtr(DevAccessiblePtr), Size(Size),
431 ExternallyLocked(ExternallyLocked), References(1) {}
433 /// Utility constructor used for std::set searches.
434 EntryTy(void *HstPtr)
435 : HstPtr(HstPtr), DevAccessiblePtr(nullptr), Size(0),
436 ExternallyLocked(false), References(0) {}
439 /// Comparator of mep entries. Use the host pointer to enforce an order
440 /// between entries.
441 struct EntryCmpTy {
442 bool operator()(const EntryTy &Left, const EntryTy &Right) const {
443 return Left.HstPtr < Right.HstPtr;
447 typedef std::set<EntryTy, EntryCmpTy> PinnedAllocSetTy;
449 /// The map of host pinned allocations.
450 PinnedAllocSetTy Allocs;
452 /// The mutex to protect accesses to the map.
453 mutable std::shared_mutex Mutex;
455 /// Reference to the corresponding device.
456 GenericDeviceTy &Device;
458 /// Indicate whether mapped host buffers should be locked automatically.
459 bool LockMappedBuffers;
461 /// Indicate whether failures when locking mapped buffers should be ingored.
462 bool IgnoreLockMappedFailures;
464 /// Find an allocation that intersects with \p HstPtr pointer. Assume the
465 /// map's mutex is acquired.
466 const EntryTy *findIntersecting(const void *HstPtr) const {
467 if (Allocs.empty())
468 return nullptr;
470 // Search the first allocation with starting address that is not less than
471 // the buffer address.
472 auto It = Allocs.lower_bound({const_cast<void *>(HstPtr)});
474 // Direct match of starting addresses.
475 if (It != Allocs.end() && It->HstPtr == HstPtr)
476 return &(*It);
478 // Not direct match but may be a previous pinned allocation in the map which
479 // contains the buffer. Return false if there is no such a previous
480 // allocation.
481 if (It == Allocs.begin())
482 return nullptr;
484 // Move to the previous pinned allocation.
485 --It;
487 // The buffer is not contained in the pinned allocation.
488 if (advanceVoidPtr(It->HstPtr, It->Size) > HstPtr)
489 return &(*It);
491 // None found.
492 return nullptr;
495 /// Insert an entry to the map representing a locked buffer. The number of
496 /// references is set to one.
497 Error insertEntry(void *HstPtr, void *DevAccessiblePtr, size_t Size,
498 bool ExternallyLocked = false);
500 /// Erase an existing entry from the map.
501 Error eraseEntry(const EntryTy &Entry);
503 /// Register a new user into an entry that represents a locked buffer. Check
504 /// also that the registered buffer with \p HstPtr address and \p Size is
505 /// actually contained into the entry.
506 Error registerEntryUse(const EntryTy &Entry, void *HstPtr, size_t Size);
508 /// Unregister a user from the entry and return whether it is the last user.
509 /// If it is the last user, the entry will have to be removed from the map
510 /// and unlock the entry's host buffer (if necessary).
511 Expected<bool> unregisterEntryUse(const EntryTy &Entry);
513 /// Indicate whether the first range A fully contains the second range B.
514 static bool contains(void *PtrA, size_t SizeA, void *PtrB, size_t SizeB) {
515 void *EndA = advanceVoidPtr(PtrA, SizeA);
516 void *EndB = advanceVoidPtr(PtrB, SizeB);
517 return (PtrB >= PtrA && EndB <= EndA);
520 /// Indicate whether the first range A intersects with the second range B.
521 static bool intersects(void *PtrA, size_t SizeA, void *PtrB, size_t SizeB) {
522 void *EndA = advanceVoidPtr(PtrA, SizeA);
523 void *EndB = advanceVoidPtr(PtrB, SizeB);
524 return (PtrA < EndB && PtrB < EndA);
527 public:
528 /// Create the map of pinned allocations corresponding to a specific device.
529 PinnedAllocationMapTy(GenericDeviceTy &Device) : Device(Device) {
531 // Envar that indicates whether mapped host buffers should be locked
532 // automatically. The possible values are boolean (on/off) and a special:
533 // off: Mapped host buffers are not locked.
534 // on: Mapped host buffers are locked in a best-effort approach.
535 // Failure to lock the buffers are silent.
536 // mandatory: Mapped host buffers are always locked and failures to lock
537 // a buffer results in a fatal error.
538 StringEnvar OMPX_LockMappedBuffers("LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS",
539 "off");
541 bool Enabled;
542 if (StringParser::parse(OMPX_LockMappedBuffers.get().data(), Enabled)) {
543 // Parsed as a boolean value. Enable the feature if necessary.
544 LockMappedBuffers = Enabled;
545 IgnoreLockMappedFailures = true;
546 } else if (OMPX_LockMappedBuffers.get() == "mandatory") {
547 // Enable the feature and failures are fatal.
548 LockMappedBuffers = true;
549 IgnoreLockMappedFailures = false;
550 } else {
551 // Disable by default.
552 DP("Invalid value LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS=%s\n",
553 OMPX_LockMappedBuffers.get().data());
554 LockMappedBuffers = false;
558 /// Register a buffer that was recently allocated as a locked host buffer.
559 /// None of the already registered pinned allocations should intersect with
560 /// this new one. The registration requires the host pointer in \p HstPtr,
561 /// the device accessible pointer in \p DevAccessiblePtr, and the size of the
562 /// allocation in \p Size. The allocation must be unregistered using the
563 /// unregisterHostBuffer function.
564 Error registerHostBuffer(void *HstPtr, void *DevAccessiblePtr, size_t Size);
566 /// Unregister a host pinned allocation passing the host pointer which was
567 /// previously registered using the registerHostBuffer function. When calling
568 /// this function, the pinned allocation cannot have any other user and will
569 /// not be unlocked by this function.
570 Error unregisterHostBuffer(void *HstPtr);
572 /// Lock the host buffer at \p HstPtr or register a new user if it intersects
573 /// with an already existing one. A partial overlapping with extension is not
574 /// allowed. The function returns the device accessible pointer of the pinned
575 /// buffer. The buffer must be unlocked using the unlockHostBuffer function.
576 Expected<void *> lockHostBuffer(void *HstPtr, size_t Size);
578 /// Unlock the host buffer at \p HstPtr or unregister a user if other users
579 /// are still using the pinned allocation. If this was the last user, the
580 /// pinned allocation is removed from the map and the memory is unlocked.
581 Error unlockHostBuffer(void *HstPtr);
583 /// Lock or register a host buffer that was recently mapped by libomptarget.
584 /// This behavior is applied if LIBOMPTARGET_LOCK_MAPPED_HOST_BUFFERS is
585 /// enabled. Even if not enabled, externally locked buffers are registered
586 /// in order to optimize their transfers.
587 Error lockMappedHostBuffer(void *HstPtr, size_t Size);
589 /// Unlock or unregister a host buffer that was unmapped by libomptarget.
590 Error unlockUnmappedHostBuffer(void *HstPtr);
592 /// Return the device accessible pointer associated to the host pinned
593 /// allocation which the \p HstPtr belongs, if any. Return null in case the
594 /// \p HstPtr does not belong to any host pinned allocation. The device
595 /// accessible pointer is the one that devices should use for data transfers
596 /// that involve a host pinned buffer.
597 void *getDeviceAccessiblePtrFromPinnedBuffer(const void *HstPtr) const {
598 std::shared_lock<std::shared_mutex> Lock(Mutex);
600 // Find the intersecting allocation if any.
601 const EntryTy *Entry = findIntersecting(HstPtr);
602 if (!Entry)
603 return nullptr;
605 return advanceVoidPtr(Entry->DevAccessiblePtr,
606 getPtrDiff(HstPtr, Entry->HstPtr));
609 /// Check whether a buffer belongs to a registered host pinned allocation.
610 bool isHostPinnedBuffer(const void *HstPtr) const {
611 std::shared_lock<std::shared_mutex> Lock(Mutex);
613 // Return whether there is an intersecting allocation.
614 return (findIntersecting(const_cast<void *>(HstPtr)) != nullptr);
618 /// Class implementing common functionalities of offload devices. Each plugin
619 /// should define the specific device class, derive from this generic one, and
620 /// implement the necessary virtual function members.
621 struct GenericDeviceTy : public DeviceAllocatorTy {
622 /// Construct a device with its device id within the plugin, the number of
623 /// devices in the plugin and the grid values for that kind of device.
624 GenericDeviceTy(int32_t DeviceId, int32_t NumDevices,
625 const llvm::omp::GV &GridValues);
627 /// Get the device identifier within the corresponding plugin. Notice that
628 /// this id is not unique between different plugins; they may overlap.
629 int32_t getDeviceId() const { return DeviceId; }
631 /// Set the context of the device if needed, before calling device-specific
632 /// functions. Plugins may implement this function as a no-op if not needed.
633 virtual Error setContext() = 0;
635 /// Initialize the device. After this call, the device should be already
636 /// working and ready to accept queries or modifications.
637 Error init(GenericPluginTy &Plugin);
638 virtual Error initImpl(GenericPluginTy &Plugin) = 0;
640 /// Deinitialize the device and free all its resources. After this call, the
641 /// device is no longer considered ready, so no queries or modifications are
642 /// allowed.
643 Error deinit(GenericPluginTy &Plugin);
644 virtual Error deinitImpl() = 0;
646 /// Load the binary image into the device and return the target table.
647 Expected<__tgt_target_table *> loadBinary(GenericPluginTy &Plugin,
648 const __tgt_device_image *TgtImage);
649 virtual Expected<DeviceImageTy *>
650 loadBinaryImpl(const __tgt_device_image *TgtImage, int32_t ImageId) = 0;
652 /// Setup the device environment if needed. Notice this setup may not be run
653 /// on some plugins. By default, it will be executed, but plugins can change
654 /// this behavior by overriding the shouldSetupDeviceEnvironment function.
655 Error setupDeviceEnvironment(GenericPluginTy &Plugin, DeviceImageTy &Image);
657 /// Setup the global device memory pool, if the plugin requires one.
658 Error setupDeviceMemoryPool(GenericPluginTy &Plugin, DeviceImageTy &Image,
659 uint64_t PoolSize);
661 // Setup the RPC server for this device if needed. This may not run on some
662 // plugins like the CPU targets. By default, it will not be executed so it is
663 // up to the target to override this using the shouldSetupRPCServer function.
664 Error setupRPCServer(GenericPluginTy &Plugin, DeviceImageTy &Image);
666 /// Register the offload entries for a specific image on the device.
667 Error registerOffloadEntries(DeviceImageTy &Image);
669 /// Synchronize the current thread with the pending operations on the
670 /// __tgt_async_info structure.
671 Error synchronize(__tgt_async_info *AsyncInfo);
672 virtual Error synchronizeImpl(__tgt_async_info &AsyncInfo) = 0;
674 /// Query for the completion of the pending operations on the __tgt_async_info
675 /// structure in a non-blocking manner.
676 Error queryAsync(__tgt_async_info *AsyncInfo);
677 virtual Error queryAsyncImpl(__tgt_async_info &AsyncInfo) = 0;
679 /// Check whether the architecture supports VA management
680 virtual bool supportVAManagement() const { return false; }
682 /// Get the total device memory size
683 virtual Error getDeviceMemorySize(uint64_t &DSize);
685 /// Allocates \p RSize bytes (rounded up to page size) and hints the driver to
686 /// map it to \p VAddr. The obtained address is stored in \p Addr. At return
687 /// \p RSize contains the actual size which can be equal or larger than the
688 /// requested size.
689 virtual Error memoryVAMap(void **Addr, void *VAddr, size_t *RSize);
691 /// De-allocates device memory and unmaps the virtual address \p VAddr
692 virtual Error memoryVAUnMap(void *VAddr, size_t Size);
694 /// Allocate data on the device or involving the device.
695 Expected<void *> dataAlloc(int64_t Size, void *HostPtr, TargetAllocTy Kind);
697 /// Deallocate data from the device or involving the device.
698 Error dataDelete(void *TgtPtr, TargetAllocTy Kind);
700 /// Pin host memory to optimize transfers and return the device accessible
701 /// pointer that devices should use for memory transfers involving the host
702 /// pinned allocation.
703 Expected<void *> dataLock(void *HstPtr, int64_t Size) {
704 return PinnedAllocs.lockHostBuffer(HstPtr, Size);
707 /// Unpin a host memory buffer that was previously pinned.
708 Error dataUnlock(void *HstPtr) {
709 return PinnedAllocs.unlockHostBuffer(HstPtr);
712 /// Lock the host buffer \p HstPtr with \p Size bytes with the vendor-specific
713 /// API and return the device accessible pointer.
714 virtual Expected<void *> dataLockImpl(void *HstPtr, int64_t Size) = 0;
716 /// Unlock a previously locked host buffer starting at \p HstPtr.
717 virtual Error dataUnlockImpl(void *HstPtr) = 0;
719 /// Mark the host buffer with address \p HstPtr and \p Size bytes as a mapped
720 /// buffer. This means that libomptarget created a new mapping of that host
721 /// buffer (e.g., because a user OpenMP target map) and the buffer may be used
722 /// as source/destination of memory transfers. We can use this information to
723 /// lock the host buffer and optimize its memory transfers.
724 Error notifyDataMapped(void *HstPtr, int64_t Size) {
725 return PinnedAllocs.lockMappedHostBuffer(HstPtr, Size);
728 /// Mark the host buffer with address \p HstPtr as unmapped. This means that
729 /// libomptarget removed an existing mapping. If the plugin locked the buffer
730 /// in notifyDataMapped, this function should unlock it.
731 Error notifyDataUnmapped(void *HstPtr) {
732 return PinnedAllocs.unlockUnmappedHostBuffer(HstPtr);
735 /// Check whether the host buffer with address \p HstPtr is pinned by the
736 /// underlying vendor-specific runtime (if any). Retrieve the host pointer,
737 /// the device accessible pointer and the size of the original pinned buffer.
738 virtual Expected<bool> isPinnedPtrImpl(void *HstPtr, void *&BaseHstPtr,
739 void *&BaseDevAccessiblePtr,
740 size_t &BaseSize) const = 0;
742 /// Submit data to the device (host to device transfer).
743 Error dataSubmit(void *TgtPtr, const void *HstPtr, int64_t Size,
744 __tgt_async_info *AsyncInfo);
745 virtual Error dataSubmitImpl(void *TgtPtr, const void *HstPtr, int64_t Size,
746 AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
748 /// Retrieve data from the device (device to host transfer).
749 Error dataRetrieve(void *HstPtr, const void *TgtPtr, int64_t Size,
750 __tgt_async_info *AsyncInfo);
751 virtual Error dataRetrieveImpl(void *HstPtr, const void *TgtPtr, int64_t Size,
752 AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
754 /// Exchange data between devices (device to device transfer). Calling this
755 /// function is only valid if GenericPlugin::isDataExchangable() passing the
756 /// two devices returns true.
757 Error dataExchange(const void *SrcPtr, GenericDeviceTy &DstDev, void *DstPtr,
758 int64_t Size, __tgt_async_info *AsyncInfo);
759 virtual Error dataExchangeImpl(const void *SrcPtr, GenericDeviceTy &DstDev,
760 void *DstPtr, int64_t Size,
761 AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
763 /// Run the kernel associated with \p EntryPtr
764 Error launchKernel(void *EntryPtr, void **ArgPtrs, ptrdiff_t *ArgOffsets,
765 KernelArgsTy &KernelArgs, __tgt_async_info *AsyncInfo);
767 /// Initialize a __tgt_async_info structure. Related to interop features.
768 Error initAsyncInfo(__tgt_async_info **AsyncInfoPtr);
769 virtual Error initAsyncInfoImpl(AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
771 /// Initialize a __tgt_device_info structure. Related to interop features.
772 Error initDeviceInfo(__tgt_device_info *DeviceInfo);
773 virtual Error initDeviceInfoImpl(__tgt_device_info *DeviceInfo) = 0;
775 /// Create an event.
776 Error createEvent(void **EventPtrStorage);
777 virtual Error createEventImpl(void **EventPtrStorage) = 0;
779 /// Destroy an event.
780 Error destroyEvent(void *Event);
781 virtual Error destroyEventImpl(void *EventPtr) = 0;
783 /// Start the recording of the event.
784 Error recordEvent(void *Event, __tgt_async_info *AsyncInfo);
785 virtual Error recordEventImpl(void *EventPtr,
786 AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
788 /// Wait for an event to finish. Notice this wait is asynchronous if the
789 /// __tgt_async_info is not nullptr.
790 Error waitEvent(void *Event, __tgt_async_info *AsyncInfo);
791 virtual Error waitEventImpl(void *EventPtr,
792 AsyncInfoWrapperTy &AsyncInfoWrapper) = 0;
794 /// Synchronize the current thread with the event.
795 Error syncEvent(void *EventPtr);
796 virtual Error syncEventImpl(void *EventPtr) = 0;
798 /// Print information about the device.
799 Error printInfo();
800 virtual Error obtainInfoImpl(InfoQueueTy &Info) = 0;
802 /// Getters of the grid values.
803 uint32_t getWarpSize() const { return GridValues.GV_Warp_Size; }
804 uint32_t getThreadLimit() const { return GridValues.GV_Max_WG_Size; }
805 uint32_t getBlockLimit() const { return GridValues.GV_Max_Teams; }
806 uint32_t getDefaultNumThreads() const {
807 return GridValues.GV_Default_WG_Size;
809 uint32_t getDefaultNumBlocks() const {
810 return GridValues.GV_Default_Num_Teams;
812 uint32_t getDynamicMemorySize() const { return OMPX_SharedMemorySize; }
813 virtual uint64_t getClockFrequency() const { return CLOCKS_PER_SEC; }
815 /// Get target compute unit kind (e.g., sm_80, or gfx908).
816 virtual std::string getComputeUnitKind() const { return "unknown"; }
818 /// Post processing after jit backend. The ownership of \p MB will be taken.
819 virtual Expected<std::unique_ptr<MemoryBuffer>>
820 doJITPostProcessing(std::unique_ptr<MemoryBuffer> MB) const {
821 return std::move(MB);
824 /// The minimum number of threads we use for a low-trip count combined loop.
825 /// Instead of using more threads we increase the outer (block/team)
826 /// parallelism.
827 /// @see OMPX_MinThreadsForLowTripCount
828 virtual uint32_t getMinThreadsForLowTripCountLoop() {
829 return OMPX_MinThreadsForLowTripCount;
832 /// Get the total amount of hardware parallelism supported by the target
833 /// device. This is the total amount of warps or wavefronts that can be
834 /// resident on the device simultaneously.
835 virtual uint64_t getHardwareParallelism() const { return 0; }
837 /// Get the RPC server running on this device.
838 RPCServerTy *getRPCServer() const { return RPCServer; }
840 /// The number of parallel RPC ports to use on the device. In general, this
841 /// should be roughly equivalent to the amount of hardware parallelism the
842 /// device can support. This is because GPUs in general do not have forward
843 /// progress guarantees, so we minimize thread level dependencies by
844 /// allocating enough space such that each device thread can have a port. This
845 /// is likely overly pessimistic in the average case, but guarantees no
846 /// deadlocks at the cost of memory. This must be overloaded by targets
847 /// expecting to use the RPC server.
848 virtual uint64_t requestedRPCPortCount() const {
849 assert(!shouldSetupRPCServer() && "Default implementation cannot be used");
850 return 0;
853 private:
854 /// Register offload entry for global variable.
855 Error registerGlobalOffloadEntry(DeviceImageTy &DeviceImage,
856 const __tgt_offload_entry &GlobalEntry,
857 __tgt_offload_entry &DeviceEntry);
859 /// Register offload entry for kernel function.
860 Error registerKernelOffloadEntry(DeviceImageTy &DeviceImage,
861 const __tgt_offload_entry &KernelEntry,
862 __tgt_offload_entry &DeviceEntry);
864 /// Allocate and construct a kernel object.
865 virtual Expected<GenericKernelTy &>
866 constructKernel(const __tgt_offload_entry &KernelEntry) = 0;
868 /// Get and set the stack size and heap size for the device. If not used, the
869 /// plugin can implement the setters as no-op and setting the output
870 /// value to zero for the getters.
871 virtual Error getDeviceStackSize(uint64_t &V) = 0;
872 virtual Error setDeviceStackSize(uint64_t V) = 0;
873 virtual Error getDeviceHeapSize(uint64_t &V) = 0;
874 virtual Error setDeviceHeapSize(uint64_t V) = 0;
876 /// Indicate whether the device should setup the device environment. Notice
877 /// that returning false in this function will change the behavior of the
878 /// setupDeviceEnvironment() function.
879 virtual bool shouldSetupDeviceEnvironment() const { return true; }
881 /// Indicate whether the device should setup the global device memory pool. If
882 /// false is return the value on the device will be uninitialized.
883 virtual bool shouldSetupDeviceMemoryPool() const { return true; }
885 /// Indicate whether or not the device should setup the RPC server. This is
886 /// only necessary for unhosted targets like the GPU.
887 virtual bool shouldSetupRPCServer() const { return false; }
889 /// Pointer to the memory manager or nullptr if not available.
890 MemoryManagerTy *MemoryManager;
892 /// Environment variables defined by the OpenMP standard.
893 Int32Envar OMP_TeamLimit;
894 Int32Envar OMP_NumTeams;
895 Int32Envar OMP_TeamsThreadLimit;
897 /// Environment variables defined by the LLVM OpenMP implementation.
898 Int32Envar OMPX_DebugKind;
899 UInt32Envar OMPX_SharedMemorySize;
900 UInt64Envar OMPX_TargetStackSize;
901 UInt64Envar OMPX_TargetHeapSize;
903 /// Environment flag to set the minimum number of threads we use for a
904 /// low-trip count combined loop. Instead of using more threads we increase
905 /// the outer (block/team) parallelism.
906 UInt32Envar OMPX_MinThreadsForLowTripCount =
907 UInt32Envar("LIBOMPTARGET_MIN_THREADS_FOR_LOW_TRIP_COUNT", 32);
909 protected:
910 /// Environment variables defined by the LLVM OpenMP implementation
911 /// regarding the initial number of streams and events.
912 UInt32Envar OMPX_InitialNumStreams;
913 UInt32Envar OMPX_InitialNumEvents;
915 /// Array of images loaded into the device. Images are automatically
916 /// deallocated by the allocator.
917 llvm::SmallVector<DeviceImageTy *> LoadedImages;
919 /// The identifier of the device within the plugin. Notice this is not a
920 /// global device id and is not the device id visible to the OpenMP user.
921 const int32_t DeviceId;
923 /// The default grid values used for this device.
924 llvm::omp::GV GridValues;
926 /// Enumeration used for representing the current state between two devices
927 /// two devices (both under the same plugin) for the peer access between them.
928 /// The states can be a) PENDING when the state has not been queried and needs
929 /// to be queried, b) AVAILABLE when the peer access is available to be used,
930 /// and c) UNAVAILABLE if the system does not allow it.
931 enum class PeerAccessState : uint8_t { AVAILABLE, UNAVAILABLE, PENDING };
933 /// Array of peer access states with the rest of devices. This means that if
934 /// the device I has a matrix PeerAccesses with PeerAccesses[J] == AVAILABLE,
935 /// the device I can access device J's memory directly. However, notice this
936 /// does not mean that device J can access device I's memory directly.
937 llvm::SmallVector<PeerAccessState> PeerAccesses;
938 std::mutex PeerAccessesLock;
940 /// Map of host pinned allocations used for optimize device transfers.
941 PinnedAllocationMapTy PinnedAllocs;
943 /// A pointer to an RPC server instance attached to this device if present.
944 /// This is used to run the RPC server during task synchronization.
945 RPCServerTy *RPCServer;
947 #ifdef OMPT_SUPPORT
948 /// OMPT callback functions
949 #define defineOmptCallback(Name, Type, Code) Name##_t Name##_fn = nullptr;
950 FOREACH_OMPT_DEVICE_EVENT(defineOmptCallback)
951 #undef defineOmptCallback
953 /// Internal representation for OMPT device (initialize & finalize)
954 std::atomic<bool> OmptInitialized;
955 #endif
957 private:
958 DeviceMemoryPoolTy DeviceMemoryPool = {nullptr, 0};
959 DeviceMemoryPoolTrackingTy DeviceMemoryPoolTracking = {0, 0, ~0U, 0};
962 /// Class implementing common functionalities of offload plugins. Each plugin
963 /// should define the specific plugin class, derive from this generic one, and
964 /// implement the necessary virtual function members.
965 struct GenericPluginTy {
967 /// Construct a plugin instance.
968 GenericPluginTy(Triple::ArchType TA)
969 : RequiresFlags(OMP_REQ_UNDEFINED), GlobalHandler(nullptr), JIT(TA),
970 RPCServer(nullptr) {}
972 virtual ~GenericPluginTy() {}
974 /// Initialize the plugin.
975 Error init();
977 /// Initialize the plugin and return the number of available devices.
978 virtual Expected<int32_t> initImpl() = 0;
980 /// Deinitialize the plugin and release the resources.
981 Error deinit();
982 virtual Error deinitImpl() = 0;
984 /// Get the reference to the device with a certain device id.
985 GenericDeviceTy &getDevice(int32_t DeviceId) {
986 assert(isValidDeviceId(DeviceId) && "Invalid device id");
987 assert(Devices[DeviceId] && "Device is unitialized");
989 return *Devices[DeviceId];
992 /// Get the number of active devices.
993 int32_t getNumDevices() const { return NumDevices; }
995 /// Get the plugin-specific device identifier offset.
996 int32_t getDeviceIdStartIndex() const { return DeviceIdStartIndex; }
998 /// Set the plugin-specific device identifier offset.
999 void setDeviceIdStartIndex(int32_t Offset) { DeviceIdStartIndex = Offset; }
1001 /// Get the ELF code to recognize the binary image of this plugin.
1002 virtual uint16_t getMagicElfBits() const = 0;
1004 /// Get the target triple of this plugin.
1005 virtual Triple::ArchType getTripleArch() const = 0;
1007 /// Allocate a structure using the internal allocator.
1008 template <typename Ty> Ty *allocate() {
1009 return reinterpret_cast<Ty *>(Allocator.Allocate(sizeof(Ty), alignof(Ty)));
1012 /// Get the reference to the global handler of this plugin.
1013 GenericGlobalHandlerTy &getGlobalHandler() {
1014 assert(GlobalHandler && "Global handler not initialized");
1015 return *GlobalHandler;
1018 /// Get the reference to the JIT used for all devices connected to this
1019 /// plugin.
1020 JITEngine &getJIT() { return JIT; }
1022 /// Get a reference to the RPC server used to provide host services.
1023 RPCServerTy &getRPCServer() {
1024 assert(RPCServer && "RPC server not initialized");
1025 return *RPCServer;
1028 /// Get the OpenMP requires flags set for this plugin.
1029 int64_t getRequiresFlags() const { return RequiresFlags; }
1031 /// Set the OpenMP requires flags for this plugin.
1032 void setRequiresFlag(int64_t Flags) { RequiresFlags = Flags; }
1034 /// Initialize a device within the plugin.
1035 Error initDevice(int32_t DeviceId);
1037 /// Deinitialize a device within the plugin and release its resources.
1038 Error deinitDevice(int32_t DeviceId);
1040 /// Indicate whether data can be exchanged directly between two devices under
1041 /// this same plugin. If this function returns true, it's safe to call the
1042 /// GenericDeviceTy::exchangeData() function on the source device.
1043 virtual bool isDataExchangable(int32_t SrcDeviceId, int32_t DstDeviceId) {
1044 return isValidDeviceId(SrcDeviceId) && isValidDeviceId(DstDeviceId);
1047 /// Indicate if an image is compatible with the plugin devices. Notice that
1048 /// this function may be called before actually initializing the devices. So
1049 /// we could not move this function into GenericDeviceTy.
1050 virtual Expected<bool> isImageCompatible(__tgt_image_info *Info) const = 0;
1052 /// Indicate whether the plugin supports empty images.
1053 virtual bool supportsEmptyImages() const { return false; }
1055 protected:
1056 /// Indicate whether a device id is valid.
1057 bool isValidDeviceId(int32_t DeviceId) const {
1058 return (DeviceId >= 0 && DeviceId < getNumDevices());
1061 private:
1062 /// Number of devices available for the plugin.
1063 int32_t NumDevices = 0;
1065 /// Index offset, which when added to a DeviceId, will yield a unique
1066 /// user-observable device identifier. This is especially important when
1067 /// DeviceIds of multiple plugins / RTLs need to be distinguishable.
1068 int32_t DeviceIdStartIndex = 0;
1070 /// Array of pointers to the devices. Initially, they are all set to nullptr.
1071 /// Once a device is initialized, the pointer is stored in the position given
1072 /// by its device id. A position with nullptr means that the corresponding
1073 /// device was not initialized yet.
1074 llvm::SmallVector<GenericDeviceTy *> Devices;
1076 /// OpenMP requires flags.
1077 int64_t RequiresFlags;
1079 /// Pointer to the global handler for this plugin.
1080 GenericGlobalHandlerTy *GlobalHandler;
1082 /// Internal allocator for different structures.
1083 BumpPtrAllocator Allocator;
1085 /// The JIT engine shared by all devices connected to this plugin.
1086 JITEngine JIT;
1088 /// The interface between the plugin and the GPU for host services.
1089 RPCServerTy *RPCServer;
1092 /// Class for simplifying the getter operation of the plugin. Anywhere on the
1093 /// code, the current plugin can be retrieved by Plugin::get(). The class also
1094 /// declares functions to create plugin-specific object instances. The check(),
1095 /// createPlugin(), createDevice() and createGlobalHandler() functions should be
1096 /// defined by each plugin implementation.
1097 class Plugin {
1098 // Reference to the plugin instance.
1099 static GenericPluginTy *SpecificPlugin;
1101 Plugin() {
1102 if (auto Err = init())
1103 REPORT("Failed to initialize plugin: %s\n",
1104 toString(std::move(Err)).data());
1107 ~Plugin() {
1108 if (auto Err = deinit())
1109 REPORT("Failed to deinitialize plugin: %s\n",
1110 toString(std::move(Err)).data());
1113 Plugin(const Plugin &) = delete;
1114 void operator=(const Plugin &) = delete;
1116 /// Create and intialize the plugin instance.
1117 static Error init() {
1118 assert(!SpecificPlugin && "Plugin already created");
1120 // Create the specific plugin.
1121 SpecificPlugin = createPlugin();
1122 assert(SpecificPlugin && "Plugin was not created");
1124 // Initialize the plugin.
1125 return SpecificPlugin->init();
1128 // Deinitialize and destroy the plugin instance.
1129 static Error deinit() {
1130 assert(SpecificPlugin && "Plugin no longer valid");
1132 // Deinitialize the plugin.
1133 if (auto Err = SpecificPlugin->deinit())
1134 return Err;
1136 // Delete the plugin instance.
1137 delete SpecificPlugin;
1139 // Invalidate the plugin reference.
1140 SpecificPlugin = nullptr;
1142 return Plugin::success();
1145 public:
1146 /// Initialize the plugin if needed. The plugin could have been initialized by
1147 /// a previous call to Plugin::get().
1148 static Error initIfNeeded() {
1149 // Trigger the initialization if needed.
1150 get();
1152 return Error::success();
1155 // Deinitialize the plugin if needed. The plugin could have been deinitialized
1156 // because the plugin library was exiting.
1157 static Error deinitIfNeeded() {
1158 // Do nothing. The plugin is deinitialized automatically.
1159 return Plugin::success();
1162 /// Get a reference (or create if it was not created) to the plugin instance.
1163 static GenericPluginTy &get() {
1164 // This static variable will initialize the underlying plugin instance in
1165 // case there was no previous explicit initialization. The initialization is
1166 // thread safe.
1167 static Plugin Plugin;
1169 assert(SpecificPlugin && "Plugin is not active");
1170 return *SpecificPlugin;
1173 /// Get a reference to the plugin with a specific plugin-specific type.
1174 template <typename Ty> static Ty &get() { return static_cast<Ty &>(get()); }
1176 /// Indicate whether the plugin is active.
1177 static bool isActive() { return SpecificPlugin != nullptr; }
1179 /// Create a success error. This is the same as calling Error::success(), but
1180 /// it is recommended to use this one for consistency with Plugin::error() and
1181 /// Plugin::check().
1182 static Error success() { return Error::success(); }
1184 /// Create a string error.
1185 template <typename... ArgsTy>
1186 static Error error(const char *ErrFmt, ArgsTy... Args) {
1187 return createStringError(inconvertibleErrorCode(), ErrFmt, Args...);
1190 /// Check the plugin-specific error code and return an error or success
1191 /// accordingly. In case of an error, create a string error with the error
1192 /// description. The ErrFmt should follow the format:
1193 /// "Error in <function name>[<optional info>]: %s"
1194 /// The last format specifier "%s" is mandatory and will be used to place the
1195 /// error code's description. Notice this function should be only called from
1196 /// the plugin-specific code.
1197 template <typename... ArgsTy>
1198 static Error check(int32_t ErrorCode, const char *ErrFmt, ArgsTy... Args);
1200 /// Create a plugin instance.
1201 static GenericPluginTy *createPlugin();
1203 /// Create a plugin-specific device.
1204 static GenericDeviceTy *createDevice(int32_t DeviceId, int32_t NumDevices);
1206 /// Create a plugin-specific global handler.
1207 static GenericGlobalHandlerTy *createGlobalHandler();
1210 /// Auxiliary interface class for GenericDeviceResourceManagerTy. This class
1211 /// acts as a reference to a device resource, such as a stream, and requires
1212 /// some basic functions to be implemented. The derived class should define an
1213 /// empty constructor that creates an empty and invalid resource reference. Do
1214 /// not create a new resource on the ctor, but on the create() function instead.
1216 /// The derived class should also define the type HandleTy as the underlying
1217 /// resource handle type. For instance, in a CUDA stream it would be:
1218 /// using HandleTy = CUstream;
1219 struct GenericDeviceResourceRef {
1220 /// Create a new resource and stores a reference.
1221 virtual Error create(GenericDeviceTy &Device) = 0;
1223 /// Destroy and release the resources pointed by the reference.
1224 virtual Error destroy(GenericDeviceTy &Device) = 0;
1226 protected:
1227 ~GenericDeviceResourceRef() = default;
1230 /// Class that implements a resource pool belonging to a device. This class
1231 /// operates with references to the actual resources. These reference must
1232 /// derive from the GenericDeviceResourceRef class and implement the create
1233 /// and destroy virtual functions.
1234 template <typename ResourceRef> class GenericDeviceResourceManagerTy {
1235 using ResourcePoolTy = GenericDeviceResourceManagerTy<ResourceRef>;
1236 using ResourceHandleTy = typename ResourceRef::HandleTy;
1238 public:
1239 /// Create an empty resource pool for a specific device.
1240 GenericDeviceResourceManagerTy(GenericDeviceTy &Device)
1241 : Device(Device), NextAvailable(0) {}
1243 /// Destroy the resource pool. At this point, the deinit() function should
1244 /// already have been executed so the resource pool should be empty.
1245 virtual ~GenericDeviceResourceManagerTy() {
1246 assert(ResourcePool.empty() && "Resource pool not empty");
1249 /// Initialize the resource pool.
1250 Error init(uint32_t InitialSize) {
1251 assert(ResourcePool.empty() && "Resource pool already initialized");
1252 return ResourcePoolTy::resizeResourcePool(InitialSize);
1255 /// Deinitialize the resource pool and delete all resources. This function
1256 /// must be called before the destructor.
1257 virtual Error deinit() {
1258 if (NextAvailable)
1259 DP("Missing %d resources to be returned\n", NextAvailable);
1261 // TODO: This prevents a bug on libomptarget to make the plugins fail. There
1262 // may be some resources not returned. Do not destroy these ones.
1263 if (auto Err = ResourcePoolTy::resizeResourcePool(NextAvailable))
1264 return Err;
1266 ResourcePool.clear();
1268 return Plugin::success();
1271 /// Get a resource from the pool or create new ones. If the function
1272 /// succeeds, the handle to the resource is saved in \p Handle.
1273 virtual Error getResource(ResourceHandleTy &Handle) {
1274 // Get a resource with an empty resource processor.
1275 return getResourcesImpl(1, &Handle,
1276 [](ResourceHandleTy) { return Plugin::success(); });
1279 /// Get multiple resources from the pool or create new ones. If the function
1280 /// succeeds, the handles to the resources are saved in \p Handles.
1281 virtual Error getResources(uint32_t Num, ResourceHandleTy *Handles) {
1282 // Get resources with an empty resource processor.
1283 return getResourcesImpl(Num, Handles,
1284 [](ResourceHandleTy) { return Plugin::success(); });
1287 /// Return resource to the pool.
1288 virtual Error returnResource(ResourceHandleTy Handle) {
1289 // Return a resource with an empty resource processor.
1290 return returnResourceImpl(
1291 Handle, [](ResourceHandleTy) { return Plugin::success(); });
1294 protected:
1295 /// Get multiple resources from the pool or create new ones. If the function
1296 /// succeeds, the handles to the resources are saved in \p Handles. Also
1297 /// process each of the obtained resources with \p Processor.
1298 template <typename FuncTy>
1299 Error getResourcesImpl(uint32_t Num, ResourceHandleTy *Handles,
1300 FuncTy Processor) {
1301 const std::lock_guard<std::mutex> Lock(Mutex);
1303 assert(NextAvailable <= ResourcePool.size() &&
1304 "Resource pool is corrupted");
1306 if (NextAvailable + Num > ResourcePool.size())
1307 // Double the resource pool or resize it to provide the requested ones.
1308 if (auto Err = ResourcePoolTy::resizeResourcePool(
1309 std::max(NextAvailable * 2, NextAvailable + Num)))
1310 return Err;
1312 // Save the handles in the output array parameter.
1313 for (uint32_t r = 0; r < Num; ++r)
1314 Handles[r] = ResourcePool[NextAvailable + r];
1316 // Process all obtained resources.
1317 for (uint32_t r = 0; r < Num; ++r)
1318 if (auto Err = Processor(Handles[r]))
1319 return Err;
1321 NextAvailable += Num;
1323 return Plugin::success();
1326 /// Return resource to the pool and process the resource with \p Processor.
1327 template <typename FuncTy>
1328 Error returnResourceImpl(ResourceHandleTy Handle, FuncTy Processor) {
1329 const std::lock_guard<std::mutex> Lock(Mutex);
1331 // Process the returned resource.
1332 if (auto Err = Processor(Handle))
1333 return Err;
1335 assert(NextAvailable > 0 && "Resource pool is corrupted");
1336 ResourcePool[--NextAvailable] = Handle;
1338 return Plugin::success();
1341 protected:
1342 /// The resources between \p OldSize and \p NewSize need to be created or
1343 /// destroyed. The mutex is locked when this function is called.
1344 Error resizeResourcePoolImpl(uint32_t OldSize, uint32_t NewSize) {
1345 assert(OldSize != NewSize && "Resizing to the same size");
1347 if (auto Err = Device.setContext())
1348 return Err;
1350 if (OldSize < NewSize) {
1351 // Create new resources.
1352 for (uint32_t I = OldSize; I < NewSize; ++I) {
1353 if (auto Err = ResourcePool[I].create(Device))
1354 return Err;
1356 } else {
1357 // Destroy the obsolete resources.
1358 for (uint32_t I = NewSize; I < OldSize; ++I) {
1359 if (auto Err = ResourcePool[I].destroy(Device))
1360 return Err;
1363 return Plugin::success();
1366 /// Increase or decrease the number of resources. This function should
1367 /// be called with the mutex acquired.
1368 Error resizeResourcePool(uint32_t NewSize) {
1369 uint32_t OldSize = ResourcePool.size();
1371 // Nothing to do.
1372 if (OldSize == NewSize)
1373 return Plugin::success();
1375 if (OldSize < NewSize) {
1376 // Increase the number of resources.
1377 ResourcePool.resize(NewSize);
1378 return ResourcePoolTy::resizeResourcePoolImpl(OldSize, NewSize);
1381 // Decrease the number of resources otherwise.
1382 auto Err = ResourcePoolTy::resizeResourcePoolImpl(OldSize, NewSize);
1383 ResourcePool.resize(NewSize);
1385 return Err;
1388 /// The device to which the resources belong
1389 GenericDeviceTy &Device;
1391 /// Mutex for the resource pool.
1392 std::mutex Mutex;
1394 /// The next available resource in the pool.
1395 uint32_t NextAvailable;
1397 /// The actual resource pool.
1398 std::deque<ResourceRef> ResourcePool;
1401 /// A static check on whether or not we support RPC in libomptarget.
1402 const bool libomptargetSupportsRPC();
1404 } // namespace plugin
1405 } // namespace target
1406 } // namespace omp
1407 } // namespace llvm
1409 #endif // OPENMP_LIBOMPTARGET_PLUGINS_COMMON_PLUGININTERFACE_H