1 //===--- cuda/dynamic_cuda/cuda.pp ------------------------------- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implement subset of cuda api by calling into cuda library via dlopen
10 // Does the dlopen/dlsym calls as part of the call to cuInit
12 //===----------------------------------------------------------------------===//
19 #include <unordered_map>
25 DLWRAP_INTERNAL(cuInit
, 1);
27 DLWRAP(cuCtxGetDevice
, 1);
28 DLWRAP(cuDeviceGet
, 2);
29 DLWRAP(cuDeviceGetAttribute
, 3);
30 DLWRAP(cuDeviceGetCount
, 1);
31 DLWRAP(cuFuncGetAttribute
, 3);
34 DLWRAP(cuDeviceGetName
, 3);
35 DLWRAP(cuDeviceTotalMem
, 2);
36 DLWRAP(cuDriverGetVersion
, 1);
38 DLWRAP(cuGetErrorString
, 2);
39 DLWRAP(cuLaunchKernel
, 11);
41 DLWRAP(cuMemAlloc
, 2);
42 DLWRAP(cuMemAllocHost
, 2);
43 DLWRAP(cuMemAllocManaged
, 3);
45 DLWRAP(cuMemcpyDtoDAsync
, 4);
46 DLWRAP(cuMemcpyDtoH
, 3);
47 DLWRAP(cuMemcpyDtoHAsync
, 4);
48 DLWRAP(cuMemcpyHtoD
, 3);
49 DLWRAP(cuMemcpyHtoDAsync
, 4);
52 DLWRAP(cuMemFreeHost
, 1);
53 DLWRAP(cuModuleGetFunction
, 3);
54 DLWRAP(cuModuleGetGlobal
, 4);
56 DLWRAP(cuModuleUnload
, 1);
57 DLWRAP(cuStreamCreate
, 2);
58 DLWRAP(cuStreamDestroy
, 1);
59 DLWRAP(cuStreamSynchronize
, 1);
60 DLWRAP(cuCtxSetCurrent
, 1);
61 DLWRAP(cuDevicePrimaryCtxRelease
, 1);
62 DLWRAP(cuDevicePrimaryCtxGetState
, 3);
63 DLWRAP(cuDevicePrimaryCtxSetFlags
, 2);
64 DLWRAP(cuDevicePrimaryCtxRetain
, 2);
65 DLWRAP(cuModuleLoadDataEx
, 5);
67 DLWRAP(cuDeviceCanAccessPeer
, 3);
68 DLWRAP(cuCtxEnablePeerAccess
, 2);
69 DLWRAP(cuMemcpyPeerAsync
, 6);
71 DLWRAP(cuCtxGetLimit
, 2);
72 DLWRAP(cuCtxSetLimit
, 2);
74 DLWRAP(cuEventCreate
, 2);
75 DLWRAP(cuEventRecord
, 2);
76 DLWRAP(cuStreamWaitEvent
, 3);
77 DLWRAP(cuEventSynchronize
, 1);
78 DLWRAP(cuEventDestroy
, 1);
82 #ifndef DYNAMIC_CUDA_PATH
83 #define DYNAMIC_CUDA_PATH "libcuda.so"
86 #define TARGET_NAME CUDA
87 #define DEBUG_PREFIX "Target " GETNAME(TARGET_NAME) " RTL"
89 static bool checkForCUDA() {
90 // return true if dlopen succeeded and all functions found
92 // Prefer _v2 versions of functions if found in the library
93 std::unordered_map
<std::string
, const char *> TryFirst
= {
94 {"cuMemAlloc", "cuMemAlloc_v2"},
95 {"cuMemFree", "cuMemFree_v2"},
96 {"cuMemcpyDtoH", "cuMemcpyDtoH_v2"},
97 {"cuMemcpyHtoD", "cuMemcpyHtoD_v2"},
98 {"cuStreamDestroy", "cuStreamDestroy_v2"},
99 {"cuModuleGetGlobal", "cuModuleGetGlobal_v2"},
100 {"cuMemcpyDtoHAsync", "cuMemcpyDtoHAsync_v2"},
101 {"cuMemcpyDtoDAsync", "cuMemcpyDtoDAsync_v2"},
102 {"cuMemcpyHtoDAsync", "cuMemcpyHtoDAsync_v2"},
103 {"cuDevicePrimaryCtxRelease", "cuDevicePrimaryCtxRelease_v2"},
104 {"cuDevicePrimaryCtxSetFlags", "cuDevicePrimaryCtxSetFlags_v2"},
107 const char *CudaLib
= DYNAMIC_CUDA_PATH
;
108 void *DynlibHandle
= dlopen(CudaLib
, RTLD_NOW
);
110 DP("Unable to load library '%s': %s!\n", CudaLib
, dlerror());
114 for (size_t I
= 0; I
< dlwrap::size(); I
++) {
115 const char *Sym
= dlwrap::symbol(I
);
117 auto It
= TryFirst
.find(Sym
);
118 if (It
!= TryFirst
.end()) {
119 const char *First
= It
->second
;
120 void *P
= dlsym(DynlibHandle
, First
);
122 DP("Implementing %s with dlsym(%s) -> %p\n", Sym
, First
, P
);
123 *dlwrap::pointer(I
) = P
;
128 void *P
= dlsym(DynlibHandle
, Sym
);
130 DP("Unable to find '%s' in '%s'!\n", Sym
, CudaLib
);
133 DP("Implementing %s with dlsym(%s) -> %p\n", Sym
, Sym
, P
);
135 *dlwrap::pointer(I
) = P
;
141 CUresult
cuInit(unsigned X
) {
142 // Note: Called exactly once from cuda rtl.cpp in a global constructor so
143 // does not need to handle being called repeatedly or concurrently
144 if (!checkForCUDA()) {
145 return CUDA_ERROR_INVALID_HANDLE
;
147 return dlwrap_cuInit(X
);