1 #include "clang/Basic/Cuda.h"
3 #include "llvm/ADT/StringRef.h"
4 #include "llvm/ADT/Twine.h"
5 #include "llvm/Support/ErrorHandling.h"
6 #include "llvm/Support/VersionTuple.h"
10 struct CudaVersionMapEntry
{
13 llvm::VersionTuple TVersion
;
15 #define CUDA_ENTRY(major, minor) \
17 #major "." #minor, CudaVersion::CUDA_##major##minor, \
18 llvm::VersionTuple(major, minor) \
21 static const CudaVersionMapEntry CudaNameVersionMap
[] = {
47 {"", CudaVersion::NEW
, llvm::VersionTuple(std::numeric_limits
<int>::max())},
48 {"unknown", CudaVersion::UNKNOWN
, {}} // End of list tombstone.
52 const char *CudaVersionToString(CudaVersion V
) {
53 for (auto *I
= CudaNameVersionMap
; I
->Version
!= CudaVersion::UNKNOWN
; ++I
)
57 return CudaVersionToString(CudaVersion::UNKNOWN
);
60 CudaVersion
CudaStringToVersion(const llvm::Twine
&S
) {
61 std::string VS
= S
.str();
62 for (auto *I
= CudaNameVersionMap
; I
->Version
!= CudaVersion::UNKNOWN
; ++I
)
65 return CudaVersion::UNKNOWN
;
68 CudaVersion
ToCudaVersion(llvm::VersionTuple Version
) {
69 for (auto *I
= CudaNameVersionMap
; I
->Version
!= CudaVersion::UNKNOWN
; ++I
)
70 if (I
->TVersion
== Version
)
72 return CudaVersion::UNKNOWN
;
76 struct OffloadArchToStringMap
{
78 const char *arch_name
;
79 const char *virtual_arch_name
;
83 #define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca}
84 #define SM(sm) SM2(sm, "compute_" #sm)
85 #define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"}
86 static const OffloadArchToStringMap arch_names
[] = {
88 {OffloadArch::UNUSED
, "", ""},
89 SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi
90 SM(30), {OffloadArch::SM_32_
, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler
91 SM(50), SM(52), SM(53), // Maxwell
92 SM(60), SM(61), SM(62), // Pascal
93 SM(70), SM(72), // Volta
95 SM(80), SM(86), // Ampere
96 SM(87), // Jetson/Drive AGX Orin
97 SM(89), // Ada Lovelace
100 SM(100), // Blackwell
115 {OffloadArch::GFX9_GENERIC
, "gfx9-generic", "compute_amdgcn"},
124 {OffloadArch::GFX9_4_GENERIC
, "gfx9-4-generic", "compute_amdgcn"},
129 {OffloadArch::GFX10_1_GENERIC
, "gfx10-1-generic", "compute_amdgcn"},
130 GFX(1010), // gfx1010
131 GFX(1011), // gfx1011
132 GFX(1012), // gfx1012
133 GFX(1013), // gfx1013
134 {OffloadArch::GFX10_3_GENERIC
, "gfx10-3-generic", "compute_amdgcn"},
135 GFX(1030), // gfx1030
136 GFX(1031), // gfx1031
137 GFX(1032), // gfx1032
138 GFX(1033), // gfx1033
139 GFX(1034), // gfx1034
140 GFX(1035), // gfx1035
141 GFX(1036), // gfx1036
142 {OffloadArch::GFX11_GENERIC
, "gfx11-generic", "compute_amdgcn"},
143 GFX(1100), // gfx1100
144 GFX(1101), // gfx1101
145 GFX(1102), // gfx1102
146 GFX(1103), // gfx1103
147 GFX(1150), // gfx1150
148 GFX(1151), // gfx1151
149 GFX(1152), // gfx1152
150 GFX(1153), // gfx1153
151 {OffloadArch::GFX12_GENERIC
, "gfx12-generic", "compute_amdgcn"},
152 GFX(1200), // gfx1200
153 GFX(1201), // gfx1201
154 {OffloadArch::AMDGCNSPIRV
, "amdgcnspirv", "compute_amdgcn"},
155 {OffloadArch::Generic
, "generic", ""},
162 const char *OffloadArchToString(OffloadArch A
) {
163 auto result
= std::find_if(
164 std::begin(arch_names
), std::end(arch_names
),
165 [A
](const OffloadArchToStringMap
&map
) { return A
== map
.arch
; });
166 if (result
== std::end(arch_names
))
168 return result
->arch_name
;
171 const char *OffloadArchToVirtualArchString(OffloadArch A
) {
172 auto result
= std::find_if(
173 std::begin(arch_names
), std::end(arch_names
),
174 [A
](const OffloadArchToStringMap
&map
) { return A
== map
.arch
; });
175 if (result
== std::end(arch_names
))
177 return result
->virtual_arch_name
;
180 OffloadArch
StringToOffloadArch(llvm::StringRef S
) {
181 auto result
= std::find_if(
182 std::begin(arch_names
), std::end(arch_names
),
183 [S
](const OffloadArchToStringMap
&map
) { return S
== map
.arch_name
; });
184 if (result
== std::end(arch_names
))
185 return OffloadArch::UNKNOWN
;
189 CudaVersion
MinVersionForOffloadArch(OffloadArch A
) {
190 if (A
== OffloadArch::UNKNOWN
)
191 return CudaVersion::UNKNOWN
;
193 // AMD GPUs do not depend on CUDA versions.
194 if (IsAMDOffloadArch(A
))
195 return CudaVersion::CUDA_70
;
198 case OffloadArch::SM_20
:
199 case OffloadArch::SM_21
:
200 case OffloadArch::SM_30
:
201 case OffloadArch::SM_32_
:
202 case OffloadArch::SM_35
:
203 case OffloadArch::SM_37
:
204 case OffloadArch::SM_50
:
205 case OffloadArch::SM_52
:
206 case OffloadArch::SM_53
:
207 return CudaVersion::CUDA_70
;
208 case OffloadArch::SM_60
:
209 case OffloadArch::SM_61
:
210 case OffloadArch::SM_62
:
211 return CudaVersion::CUDA_80
;
212 case OffloadArch::SM_70
:
213 return CudaVersion::CUDA_90
;
214 case OffloadArch::SM_72
:
215 return CudaVersion::CUDA_91
;
216 case OffloadArch::SM_75
:
217 return CudaVersion::CUDA_100
;
218 case OffloadArch::SM_80
:
219 return CudaVersion::CUDA_110
;
220 case OffloadArch::SM_86
:
221 return CudaVersion::CUDA_111
;
222 case OffloadArch::SM_87
:
223 return CudaVersion::CUDA_114
;
224 case OffloadArch::SM_89
:
225 case OffloadArch::SM_90
:
226 return CudaVersion::CUDA_118
;
227 case OffloadArch::SM_90a
:
228 return CudaVersion::CUDA_120
;
229 case OffloadArch::SM_100
:
230 return CudaVersion::NEW
; // TODO: use specific CUDA version once it's
233 llvm_unreachable("invalid enum");
237 CudaVersion
MaxVersionForOffloadArch(OffloadArch A
) {
238 // AMD GPUs do not depend on CUDA versions.
239 if (IsAMDOffloadArch(A
))
240 return CudaVersion::NEW
;
243 case OffloadArch::UNKNOWN
:
244 return CudaVersion::UNKNOWN
;
245 case OffloadArch::SM_20
:
246 case OffloadArch::SM_21
:
247 return CudaVersion::CUDA_80
;
248 case OffloadArch::SM_30
:
249 case OffloadArch::SM_32_
:
250 return CudaVersion::CUDA_102
;
251 case OffloadArch::SM_35
:
252 case OffloadArch::SM_37
:
253 return CudaVersion::CUDA_118
;
255 return CudaVersion::NEW
;
259 bool CudaFeatureEnabled(llvm::VersionTuple Version
, CudaFeature Feature
) {
260 return CudaFeatureEnabled(ToCudaVersion(Version
), Feature
);
263 bool CudaFeatureEnabled(CudaVersion Version
, CudaFeature Feature
) {
265 case CudaFeature::CUDA_USES_NEW_LAUNCH
:
266 return Version
>= CudaVersion::CUDA_92
;
267 case CudaFeature::CUDA_USES_FATBIN_REGISTER_END
:
268 return Version
>= CudaVersion::CUDA_101
;
270 llvm_unreachable("Unknown CUDA feature.");