1 //===----------- api.cpp - Target independent OpenMP target RTL -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implementation of OpenMP API interface functions.
11 //===----------------------------------------------------------------------===//
14 #include "omptarget.h"
18 #include "llvm/ADT/SmallVector.h"
25 EXTERN
int omp_get_num_devices(void) {
28 size_t DevicesSize
= PM
->Devices
.size();
31 DP("Call to omp_get_num_devices returning %zd\n", DevicesSize
);
36 EXTERN
int omp_get_device_num(void) {
38 int HostDevice
= omp_get_initial_device();
40 DP("Call to omp_get_device_num returning %d\n", HostDevice
);
45 EXTERN
int omp_get_initial_device(void) {
47 int HostDevice
= omp_get_num_devices();
48 DP("Call to omp_get_initial_device returning %d\n", HostDevice
);
52 EXTERN
void *omp_target_alloc(size_t Size
, int DeviceNum
) {
53 return targetAllocExplicit(Size
, DeviceNum
, TARGET_ALLOC_DEFAULT
, __func__
);
56 EXTERN
void *llvm_omp_target_alloc_device(size_t Size
, int DeviceNum
) {
57 return targetAllocExplicit(Size
, DeviceNum
, TARGET_ALLOC_DEVICE
, __func__
);
60 EXTERN
void *llvm_omp_target_alloc_host(size_t Size
, int DeviceNum
) {
61 return targetAllocExplicit(Size
, DeviceNum
, TARGET_ALLOC_HOST
, __func__
);
64 EXTERN
void *llvm_omp_target_alloc_shared(size_t Size
, int DeviceNum
) {
65 return targetAllocExplicit(Size
, DeviceNum
, TARGET_ALLOC_SHARED
, __func__
);
68 EXTERN
void omp_target_free(void *Ptr
, int DeviceNum
) {
69 return targetFreeExplicit(Ptr
, DeviceNum
, TARGET_ALLOC_DEFAULT
, __func__
);
72 EXTERN
void llvm_omp_target_free_device(void *Ptr
, int DeviceNum
) {
73 return targetFreeExplicit(Ptr
, DeviceNum
, TARGET_ALLOC_DEVICE
, __func__
);
76 EXTERN
void llvm_omp_target_free_host(void *Ptr
, int DeviceNum
) {
77 return targetFreeExplicit(Ptr
, DeviceNum
, TARGET_ALLOC_HOST
, __func__
);
80 EXTERN
void llvm_omp_target_free_shared(void *Ptre
, int DeviceNum
) {
81 return targetFreeExplicit(Ptre
, DeviceNum
, TARGET_ALLOC_SHARED
, __func__
);
84 EXTERN
void *llvm_omp_target_dynamic_shared_alloc() { return nullptr; }
85 EXTERN
void *llvm_omp_get_dynamic_shared() { return nullptr; }
87 EXTERN
[[nodiscard
]] void *llvm_omp_target_lock_mem(void *Ptr
, size_t Size
,
89 return targetLockExplicit(Ptr
, Size
, DeviceNum
, __func__
);
92 EXTERN
void llvm_omp_target_unlock_mem(void *Ptr
, int DeviceNum
) {
93 targetUnlockExplicit(Ptr
, DeviceNum
, __func__
);
96 EXTERN
int omp_target_is_present(const void *Ptr
, int DeviceNum
) {
98 DP("Call to omp_target_is_present for device %d and address " DPxMOD
"\n",
99 DeviceNum
, DPxPTR(Ptr
));
102 DP("Call to omp_target_is_present with NULL ptr, returning false\n");
106 if (DeviceNum
== omp_get_initial_device()) {
107 DP("Call to omp_target_is_present on host, returning true\n");
112 size_t DevicesSize
= PM
->Devices
.size();
113 PM
->RTLsMtx
.unlock();
114 if (DevicesSize
<= (size_t)DeviceNum
) {
115 DP("Call to omp_target_is_present with invalid device ID, returning "
120 DeviceTy
&Device
= *PM
->Devices
[DeviceNum
];
121 // omp_target_is_present tests whether a host pointer refers to storage that
122 // is mapped to a given device. However, due to the lack of the storage size,
123 // only check 1 byte. Cannot set size 0 which checks whether the pointer (zero
124 // lengh array) is mapped instead of the referred storage.
125 TargetPointerResultTy TPR
= Device
.getTgtPtrBegin(const_cast<void *>(Ptr
), 1,
126 /*UpdateRefCount=*/false,
127 /*UseHoldRefCount=*/false);
128 int Rc
= TPR
.isPresent();
129 DP("Call to omp_target_is_present returns %d\n", Rc
);
133 EXTERN
int omp_target_memcpy(void *Dst
, const void *Src
, size_t Length
,
134 size_t DstOffset
, size_t SrcOffset
, int DstDevice
,
137 DP("Call to omp_target_memcpy, dst device %d, src device %d, "
138 "dst addr " DPxMOD
", src addr " DPxMOD
", dst offset %zu, "
139 "src offset %zu, length %zu\n",
140 DstDevice
, SrcDevice
, DPxPTR(Dst
), DPxPTR(Src
), DstOffset
, SrcOffset
,
143 if (!Dst
|| !Src
|| Length
<= 0) {
145 DP("Call to omp_target_memcpy with zero length, nothing to do\n");
146 return OFFLOAD_SUCCESS
;
149 REPORT("Call to omp_target_memcpy with invalid arguments\n");
153 if (SrcDevice
!= omp_get_initial_device() && !deviceIsReady(SrcDevice
)) {
154 REPORT("omp_target_memcpy returns OFFLOAD_FAIL\n");
158 if (DstDevice
!= omp_get_initial_device() && !deviceIsReady(DstDevice
)) {
159 REPORT("omp_target_memcpy returns OFFLOAD_FAIL\n");
163 int Rc
= OFFLOAD_SUCCESS
;
164 void *SrcAddr
= (char *)const_cast<void *>(Src
) + SrcOffset
;
165 void *DstAddr
= (char *)Dst
+ DstOffset
;
167 if (SrcDevice
== omp_get_initial_device() &&
168 DstDevice
== omp_get_initial_device()) {
169 DP("copy from host to host\n");
170 const void *P
= memcpy(DstAddr
, SrcAddr
, Length
);
173 } else if (SrcDevice
== omp_get_initial_device()) {
174 DP("copy from host to device\n");
175 DeviceTy
&DstDev
= *PM
->Devices
[DstDevice
];
176 AsyncInfoTy
AsyncInfo(DstDev
);
177 Rc
= DstDev
.submitData(DstAddr
, SrcAddr
, Length
, AsyncInfo
);
178 } else if (DstDevice
== omp_get_initial_device()) {
179 DP("copy from device to host\n");
180 DeviceTy
&SrcDev
= *PM
->Devices
[SrcDevice
];
181 AsyncInfoTy
AsyncInfo(SrcDev
);
182 Rc
= SrcDev
.retrieveData(DstAddr
, SrcAddr
, Length
, AsyncInfo
);
184 DP("copy from device to device\n");
185 DeviceTy
&SrcDev
= *PM
->Devices
[SrcDevice
];
186 DeviceTy
&DstDev
= *PM
->Devices
[DstDevice
];
187 // First try to use D2D memcpy which is more efficient. If fails, fall back
188 // to unefficient way.
189 if (SrcDev
.isDataExchangable(DstDev
)) {
190 AsyncInfoTy
AsyncInfo(SrcDev
);
191 Rc
= SrcDev
.dataExchange(SrcAddr
, DstDev
, DstAddr
, Length
, AsyncInfo
);
192 if (Rc
== OFFLOAD_SUCCESS
)
193 return OFFLOAD_SUCCESS
;
196 void *Buffer
= malloc(Length
);
198 AsyncInfoTy
AsyncInfo(SrcDev
);
199 Rc
= SrcDev
.retrieveData(Buffer
, SrcAddr
, Length
, AsyncInfo
);
201 if (Rc
== OFFLOAD_SUCCESS
) {
202 AsyncInfoTy
AsyncInfo(DstDev
);
203 Rc
= DstDev
.submitData(DstAddr
, Buffer
, Length
, AsyncInfo
);
208 DP("omp_target_memcpy returns %d\n", Rc
);
212 // The helper function that calls omp_target_memcpy or omp_target_memcpy_rect
213 static int libomp_target_memcpy_async_task(kmp_int32 Gtid
, kmp_task_t
*Task
) {
217 TargetMemcpyArgsTy
*Args
= (TargetMemcpyArgsTy
*)Task
->shareds
;
222 // Call blocked version
223 int Rc
= OFFLOAD_SUCCESS
;
224 if (Args
->IsRectMemcpy
) {
225 Rc
= omp_target_memcpy_rect(
226 Args
->Dst
, Args
->Src
, Args
->ElementSize
, Args
->NumDims
, Args
->Volume
,
227 Args
->DstOffsets
, Args
->SrcOffsets
, Args
->DstDimensions
,
228 Args
->SrcDimensions
, Args
->DstDevice
, Args
->SrcDevice
);
230 DP("omp_target_memcpy_rect returns %d\n", Rc
);
232 Rc
= omp_target_memcpy(Args
->Dst
, Args
->Src
, Args
->Length
, Args
->DstOffset
,
233 Args
->SrcOffset
, Args
->DstDevice
, Args
->SrcDevice
);
235 DP("omp_target_memcpy returns %d\n", Rc
);
238 // Release the arguments object
244 static int libomp_target_memset_async_task(kmp_int32 Gtid
, kmp_task_t
*Task
) {
248 auto *Args
= reinterpret_cast<TargetMemsetArgsTy
*>(Task
->shareds
);
252 // call omp_target_memset()
253 omp_target_memset(Args
->Ptr
, Args
->C
, Args
->N
, Args
->DeviceNum
);
257 return OFFLOAD_SUCCESS
;
261 convertDepObjVector(llvm::SmallVector
<kmp_depend_info_t
> &Vec
, int DepObjCount
,
262 omp_depend_t
*DepObjList
) {
263 for (int i
= 0; i
< DepObjCount
; ++i
) {
264 omp_depend_t DepObj
= DepObjList
[i
];
265 Vec
.push_back(*((kmp_depend_info_t
*)DepObj
));
271 libomp_helper_task_creation(T
*Args
, int (*Fn
)(kmp_int32
, kmp_task_t
*),
272 int DepObjCount
, omp_depend_t
*DepObjList
) {
273 // Create global thread ID
274 int Gtid
= __kmpc_global_thread_num(nullptr);
276 // Setup the hidden helper flags
278 kmp_tasking_flags_t
*InputFlags
= (kmp_tasking_flags_t
*)&Flags
;
279 InputFlags
->hidden_helper
= 1;
281 // Alloc the helper task
282 kmp_task_t
*Task
= __kmpc_omp_target_task_alloc(
283 nullptr, Gtid
, Flags
, sizeof(kmp_task_t
), 0, Fn
, -1);
289 // Setup the arguments for the helper task
290 Task
->shareds
= Args
;
292 // Convert types of depend objects
293 llvm::SmallVector
<kmp_depend_info_t
> DepObjs
;
294 convertDepObjVector(DepObjs
, DepObjCount
, DepObjList
);
296 // Launch the helper task
297 int Rc
= __kmpc_omp_task_with_deps(nullptr, Gtid
, Task
, DepObjCount
,
298 DepObjs
.data(), 0, nullptr);
303 EXTERN
void *omp_target_memset(void *Ptr
, int ByteVal
, size_t NumBytes
,
306 DP("Call to omp_target_memset, device %d, device pointer %p, size %zu\n",
307 DeviceNum
, Ptr
, NumBytes
);
309 // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation
310 // of unspecified behavior, see OpenMP spec).
311 if (!Ptr
|| NumBytes
== 0) {
315 if (DeviceNum
== omp_get_initial_device()) {
316 DP("filling memory on host via memset");
317 memset(Ptr
, ByteVal
, NumBytes
); // ignore return value, memset() cannot fail
319 // TODO: replace the omp_target_memset() slow path with the fast path.
320 // That will require the ability to execute a kernel from within
321 // libomptarget.so (which we do not have at the moment).
323 // This is a very slow path: create a filled array on the host and upload
324 // it to the GPU device.
325 int InitialDevice
= omp_get_initial_device();
326 void *Shadow
= omp_target_alloc(NumBytes
, InitialDevice
);
328 (void)memset(Shadow
, ByteVal
, NumBytes
);
329 (void)omp_target_memcpy(Ptr
, Shadow
, NumBytes
, 0, 0, DeviceNum
,
331 (void)omp_target_free(Shadow
, InitialDevice
);
333 // If the omp_target_alloc has failed, let's just not do anything.
334 // omp_target_memset does not have any good way to fail, so we
335 // simply avoid a catastrophic failure of the process for now.
336 DP("omp_target_memset failed to fill memory due to error with "
341 DP("omp_target_memset returns %p\n", Ptr
);
345 EXTERN
void *omp_target_memset_async(void *Ptr
, int ByteVal
, size_t NumBytes
,
346 int DeviceNum
, int DepObjCount
,
347 omp_depend_t
*DepObjList
) {
348 DP("Call to omp_target_memset_async, device %d, device pointer %p, size %zu",
349 DeviceNum
, Ptr
, NumBytes
);
351 // Behave as a no-op if N==0 or if Ptr is nullptr (as a useful implementation
352 // of unspecified behavior, see OpenMP spec).
353 if (!Ptr
|| NumBytes
== 0)
356 // Create the task object to deal with the async invocation
357 auto *Args
= new TargetMemsetArgsTy
{Ptr
, ByteVal
, NumBytes
, DeviceNum
};
359 // omp_target_memset_async() cannot fail via a return code, so ignore the
360 // return code of the helper function
361 (void)libomp_helper_task_creation(Args
, &libomp_target_memset_async_task
,
362 DepObjCount
, DepObjList
);
367 EXTERN
int omp_target_memcpy_async(void *Dst
, const void *Src
, size_t Length
,
368 size_t DstOffset
, size_t SrcOffset
,
369 int DstDevice
, int SrcDevice
,
370 int DepObjCount
, omp_depend_t
*DepObjList
) {
372 DP("Call to omp_target_memcpy_async, dst device %d, src device %d, "
373 "dst addr " DPxMOD
", src addr " DPxMOD
", dst offset %zu, "
374 "src offset %zu, length %zu\n",
375 DstDevice
, SrcDevice
, DPxPTR(Dst
), DPxPTR(Src
), DstOffset
, SrcOffset
,
378 // Check the source and dest address
379 if (Dst
== nullptr || Src
== nullptr)
382 // Create task object
383 TargetMemcpyArgsTy
*Args
= new TargetMemcpyArgsTy(
384 Dst
, Src
, Length
, DstOffset
, SrcOffset
, DstDevice
, SrcDevice
);
386 // Create and launch helper task
387 int Rc
= libomp_helper_task_creation(Args
, &libomp_target_memcpy_async_task
,
388 DepObjCount
, DepObjList
);
390 DP("omp_target_memcpy_async returns %d\n", Rc
);
395 omp_target_memcpy_rect(void *Dst
, const void *Src
, size_t ElementSize
,
396 int NumDims
, const size_t *Volume
,
397 const size_t *DstOffsets
, const size_t *SrcOffsets
,
398 const size_t *DstDimensions
, const size_t *SrcDimensions
,
399 int DstDevice
, int SrcDevice
) {
401 DP("Call to omp_target_memcpy_rect, dst device %d, src device %d, "
402 "dst addr " DPxMOD
", src addr " DPxMOD
", dst offsets " DPxMOD
", "
403 "src offsets " DPxMOD
", dst dims " DPxMOD
", src dims " DPxMOD
", "
404 "volume " DPxMOD
", element size %zu, num_dims %d\n",
405 DstDevice
, SrcDevice
, DPxPTR(Dst
), DPxPTR(Src
), DPxPTR(DstOffsets
),
406 DPxPTR(SrcOffsets
), DPxPTR(DstDimensions
), DPxPTR(SrcDimensions
),
407 DPxPTR(Volume
), ElementSize
, NumDims
);
410 DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n",
415 if (!Dst
|| !Src
|| ElementSize
< 1 || NumDims
< 1 || !Volume
||
416 !DstOffsets
|| !SrcOffsets
|| !DstDimensions
|| !SrcDimensions
) {
417 REPORT("Call to omp_target_memcpy_rect with invalid arguments\n");
423 Rc
= omp_target_memcpy(Dst
, Src
, ElementSize
* Volume
[0],
424 ElementSize
* DstOffsets
[0],
425 ElementSize
* SrcOffsets
[0], DstDevice
, SrcDevice
);
427 size_t DstSliceSize
= ElementSize
;
428 size_t SrcSliceSize
= ElementSize
;
429 for (int I
= 1; I
< NumDims
; ++I
) {
430 DstSliceSize
*= DstDimensions
[I
];
431 SrcSliceSize
*= SrcDimensions
[I
];
434 size_t DstOff
= DstOffsets
[0] * DstSliceSize
;
435 size_t SrcOff
= SrcOffsets
[0] * SrcSliceSize
;
436 for (size_t I
= 0; I
< Volume
[0]; ++I
) {
437 Rc
= omp_target_memcpy_rect(
438 (char *)Dst
+ DstOff
+ DstSliceSize
* I
,
439 (char *)const_cast<void *>(Src
) + SrcOff
+ SrcSliceSize
* I
,
440 ElementSize
, NumDims
- 1, Volume
+ 1, DstOffsets
+ 1, SrcOffsets
+ 1,
441 DstDimensions
+ 1, SrcDimensions
+ 1, DstDevice
, SrcDevice
);
444 DP("Recursive call to omp_target_memcpy_rect returns unsuccessfully\n");
450 DP("omp_target_memcpy_rect returns %d\n", Rc
);
454 EXTERN
int omp_target_memcpy_rect_async(
455 void *Dst
, const void *Src
, size_t ElementSize
, int NumDims
,
456 const size_t *Volume
, const size_t *DstOffsets
, const size_t *SrcOffsets
,
457 const size_t *DstDimensions
, const size_t *SrcDimensions
, int DstDevice
,
458 int SrcDevice
, int DepObjCount
, omp_depend_t
*DepObjList
) {
460 DP("Call to omp_target_memcpy_rect_async, dst device %d, src device %d, "
461 "dst addr " DPxMOD
", src addr " DPxMOD
", dst offsets " DPxMOD
", "
462 "src offsets " DPxMOD
", dst dims " DPxMOD
", src dims " DPxMOD
", "
463 "volume " DPxMOD
", element size %zu, num_dims %d\n",
464 DstDevice
, SrcDevice
, DPxPTR(Dst
), DPxPTR(Src
), DPxPTR(DstOffsets
),
465 DPxPTR(SrcOffsets
), DPxPTR(DstDimensions
), DPxPTR(SrcDimensions
),
466 DPxPTR(Volume
), ElementSize
, NumDims
);
468 // Need to check this first to not return OFFLOAD_FAIL instead
470 DP("Call to omp_target_memcpy_rect returns max supported dimensions %d\n",
475 // Check the source and dest address
476 if (Dst
== nullptr || Src
== nullptr)
479 // Create task object
480 TargetMemcpyArgsTy
*Args
= new TargetMemcpyArgsTy(
481 Dst
, Src
, ElementSize
, NumDims
, Volume
, DstOffsets
, SrcOffsets
,
482 DstDimensions
, SrcDimensions
, DstDevice
, SrcDevice
);
484 // Create and launch helper task
485 int Rc
= libomp_helper_task_creation(Args
, &libomp_target_memcpy_async_task
,
486 DepObjCount
, DepObjList
);
488 DP("omp_target_memcpy_rect_async returns %d\n", Rc
);
492 EXTERN
int omp_target_associate_ptr(const void *HostPtr
, const void *DevicePtr
,
493 size_t Size
, size_t DeviceOffset
,
496 DP("Call to omp_target_associate_ptr with host_ptr " DPxMOD
", "
497 "device_ptr " DPxMOD
", size %zu, device_offset %zu, device_num %d\n",
498 DPxPTR(HostPtr
), DPxPTR(DevicePtr
), Size
, DeviceOffset
, DeviceNum
);
500 if (!HostPtr
|| !DevicePtr
|| Size
<= 0) {
501 REPORT("Call to omp_target_associate_ptr with invalid arguments\n");
505 if (DeviceNum
== omp_get_initial_device()) {
506 REPORT("omp_target_associate_ptr: no association possible on the host\n");
510 if (!deviceIsReady(DeviceNum
)) {
511 REPORT("omp_target_associate_ptr returns OFFLOAD_FAIL\n");
515 DeviceTy
&Device
= *PM
->Devices
[DeviceNum
];
516 void *DeviceAddr
= (void *)((uint64_t)DevicePtr
+ (uint64_t)DeviceOffset
);
517 int Rc
= Device
.associatePtr(const_cast<void *>(HostPtr
),
518 const_cast<void *>(DeviceAddr
), Size
);
519 DP("omp_target_associate_ptr returns %d\n", Rc
);
523 EXTERN
int omp_target_disassociate_ptr(const void *HostPtr
, int DeviceNum
) {
525 DP("Call to omp_target_disassociate_ptr with host_ptr " DPxMOD
", "
527 DPxPTR(HostPtr
), DeviceNum
);
530 REPORT("Call to omp_target_associate_ptr with invalid host_ptr\n");
534 if (DeviceNum
== omp_get_initial_device()) {
536 "omp_target_disassociate_ptr: no association possible on the host\n");
540 if (!deviceIsReady(DeviceNum
)) {
541 REPORT("omp_target_disassociate_ptr returns OFFLOAD_FAIL\n");
545 DeviceTy
&Device
= *PM
->Devices
[DeviceNum
];
546 int Rc
= Device
.disassociatePtr(const_cast<void *>(HostPtr
));
547 DP("omp_target_disassociate_ptr returns %d\n", Rc
);
551 EXTERN
void *omp_get_mapped_ptr(const void *Ptr
, int DeviceNum
) {
553 DP("Call to omp_get_mapped_ptr with ptr " DPxMOD
", device_num %d.\n",
554 DPxPTR(Ptr
), DeviceNum
);
557 REPORT("Call to omp_get_mapped_ptr with nullptr.\n");
561 if (DeviceNum
== omp_get_initial_device()) {
562 REPORT("Device %d is initial device, returning Ptr " DPxMOD
".\n",
563 DeviceNum
, DPxPTR(Ptr
));
564 return const_cast<void *>(Ptr
);
567 int DevicesSize
= omp_get_initial_device();
569 std::lock_guard
<std::mutex
> LG(PM
->RTLsMtx
);
570 DevicesSize
= PM
->Devices
.size();
572 if (DevicesSize
<= DeviceNum
) {
573 DP("DeviceNum %d is invalid, returning nullptr.\n", DeviceNum
);
577 if (!deviceIsReady(DeviceNum
)) {
578 REPORT("Device %d is not ready, returning nullptr.\n", DeviceNum
);
582 auto &Device
= *PM
->Devices
[DeviceNum
];
583 TargetPointerResultTy TPR
= Device
.getTgtPtrBegin(const_cast<void *>(Ptr
), 1,
584 /*UpdateRefCount=*/false,
585 /*UseHoldRefCount=*/false);
586 if (!TPR
.isPresent()) {
587 DP("Ptr " DPxMOD
"is not present on device %d, returning nullptr.\n",
588 DPxPTR(Ptr
), DeviceNum
);
592 DP("omp_get_mapped_ptr returns " DPxMOD
".\n", DPxPTR(TPR
.TargetPointer
));
594 return TPR
.TargetPointer
;