1 //===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implement subset of cuda api by calling into cuda library via dlopen
10 // Does the dlopen/dlsym calls as part of the call to cuInit
12 //===----------------------------------------------------------------------===//
14 #include "llvm/Support/DynamicLibrary.h"
22 #include <unordered_map>
26 DLWRAP_INTERNAL(cuInit
, 1)
28 DLWRAP(cuCtxGetDevice
, 1)
29 DLWRAP(cuDeviceGet
, 2)
30 DLWRAP(cuDeviceGetAttribute
, 3)
31 DLWRAP(cuDeviceGetCount
, 1)
32 DLWRAP(cuFuncGetAttribute
, 3)
35 DLWRAP(cuDeviceGetName
, 3)
36 DLWRAP(cuDeviceTotalMem
, 2)
37 DLWRAP(cuDriverGetVersion
, 1)
39 DLWRAP(cuGetErrorString
, 2)
40 DLWRAP(cuLaunchKernel
, 11)
43 DLWRAP(cuMemAllocHost
, 2)
44 DLWRAP(cuMemAllocManaged
, 3)
46 DLWRAP(cuMemcpyDtoDAsync
, 4)
47 DLWRAP(cuMemcpyDtoH
, 3)
48 DLWRAP(cuMemcpyDtoHAsync
, 4)
49 DLWRAP(cuMemcpyHtoD
, 3)
50 DLWRAP(cuMemcpyHtoDAsync
, 4)
53 DLWRAP(cuMemFreeHost
, 1)
54 DLWRAP(cuModuleGetFunction
, 3)
55 DLWRAP(cuModuleGetGlobal
, 4)
57 DLWRAP(cuModuleUnload
, 1)
58 DLWRAP(cuStreamCreate
, 2)
59 DLWRAP(cuStreamDestroy
, 1)
60 DLWRAP(cuStreamSynchronize
, 1)
61 DLWRAP(cuStreamQuery
, 1)
62 DLWRAP(cuCtxSetCurrent
, 1)
63 DLWRAP(cuDevicePrimaryCtxRelease
, 1)
64 DLWRAP(cuDevicePrimaryCtxGetState
, 3)
65 DLWRAP(cuDevicePrimaryCtxSetFlags
, 2)
66 DLWRAP(cuDevicePrimaryCtxRetain
, 2)
67 DLWRAP(cuModuleLoadDataEx
, 5)
69 DLWRAP(cuDeviceCanAccessPeer
, 3)
70 DLWRAP(cuCtxEnablePeerAccess
, 2)
71 DLWRAP(cuMemcpyPeerAsync
, 6)
73 DLWRAP(cuCtxGetLimit
, 2)
74 DLWRAP(cuCtxSetLimit
, 2)
76 DLWRAP(cuEventCreate
, 2)
77 DLWRAP(cuEventRecord
, 2)
78 DLWRAP(cuStreamWaitEvent
, 3)
79 DLWRAP(cuEventSynchronize
, 1)
80 DLWRAP(cuEventDestroy
, 1)
85 DLWRAP(cuMemRelease
, 1)
86 DLWRAP(cuMemAddressFree
, 2)
87 DLWRAP(cuMemGetInfo
, 2)
88 DLWRAP(cuMemAddressReserve
, 5)
90 DLWRAP(cuMemCreate
, 4)
91 DLWRAP(cuMemSetAccess
, 4)
92 DLWRAP(cuMemGetAllocationGranularity
, 3)
94 #ifndef DYNAMIC_CUDA_PATH
95 #define DYNAMIC_CUDA_PATH "libcuda.so"
99 #define TARGET_NAME CUDA
102 #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
105 static bool checkForCUDA() {
106 // return true if dlopen succeeded and all functions found
108 // Prefer _v2 versions of functions if found in the library
109 std::unordered_map
<std::string
, const char *> TryFirst
= {
110 {"cuMemAlloc", "cuMemAlloc_v2"},
111 {"cuMemFree", "cuMemFree_v2"},
112 {"cuMemcpyDtoH", "cuMemcpyDtoH_v2"},
113 {"cuMemcpyHtoD", "cuMemcpyHtoD_v2"},
114 {"cuStreamDestroy", "cuStreamDestroy_v2"},
115 {"cuModuleGetGlobal", "cuModuleGetGlobal_v2"},
116 {"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"},
117 {"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"},
118 {"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"},
119 {"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"},
120 {"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"},
123 const char *CudaLib
= DYNAMIC_CUDA_PATH
;
125 auto DynlibHandle
= std::make_unique
<llvm::sys::DynamicLibrary
>(
126 llvm::sys::DynamicLibrary::getPermanentLibrary(CudaLib
, &ErrMsg
));
127 if (!DynlibHandle
->isValid()) {
128 DP("Unable to load library '%s': %s!\n", CudaLib
, ErrMsg
.c_str());
132 for (size_t I
= 0; I
< dlwrap::size(); I
++) {
133 const char *Sym
= dlwrap::symbol(I
);
135 auto It
= TryFirst
.find(Sym
);
136 if (It
!= TryFirst
.end()) {
137 const char *First
= It
->second
;
138 void *P
= DynlibHandle
->getAddressOfSymbol(First
);
140 DP("Implementing %s with dlsym(%s) -> %p\n", Sym
, First
, P
);
141 *dlwrap::pointer(I
) = P
;
146 void *P
= DynlibHandle
->getAddressOfSymbol(Sym
);
148 DP("Unable to find '%s' in '%s'!\n", Sym
, CudaLib
);
151 DP("Implementing %s with dlsym(%s) -> %p\n", Sym
, Sym
, P
);
153 *dlwrap::pointer(I
) = P
;
159 CUresult
cuInit(unsigned X
) {
160 // Note: Called exactly once from cuda rtl.cpp in a global constructor so
161 // does not need to handle being called repeatedly or concurrently
162 if (!checkForCUDA()) {
163 return CUDA_ERROR_INVALID_HANDLE
;
165 return dlwrap_cuInit(X
);