1 //===---------- DeviceOffload.cpp - Device Offloading------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements offloading to CUDA devices.
11 //===----------------------------------------------------------------------===//
13 #include "DeviceOffload.h"
15 #include "clang/Basic/TargetOptions.h"
16 #include "clang/CodeGen/ModuleBuilder.h"
17 #include "clang/Frontend/CompilerInstance.h"
19 #include "llvm/IR/LegacyPassManager.h"
20 #include "llvm/MC/TargetRegistry.h"
21 #include "llvm/Target/TargetMachine.h"
25 IncrementalCUDADeviceParser::IncrementalCUDADeviceParser(
26 Interpreter
&Interp
, std::unique_ptr
<CompilerInstance
> Instance
,
27 IncrementalParser
&HostParser
, llvm::LLVMContext
&LLVMCtx
,
28 llvm::IntrusiveRefCntPtr
<llvm::vfs::InMemoryFileSystem
> FS
,
30 : IncrementalParser(Interp
, std::move(Instance
), LLVMCtx
, Err
),
31 HostParser(HostParser
), VFS(FS
) {
34 StringRef Arch
= CI
->getTargetOpts().CPU
;
35 if (!Arch
.starts_with("sm_") || Arch
.substr(3).getAsInteger(10, SMVersion
)) {
36 Err
= llvm::joinErrors(std::move(Err
), llvm::make_error
<llvm::StringError
>(
37 "Invalid CUDA architecture",
38 llvm::inconvertibleErrorCode()));
43 llvm::Expected
<PartialTranslationUnit
&>
44 IncrementalCUDADeviceParser::Parse(llvm::StringRef Input
) {
45 auto PTU
= IncrementalParser::Parse(Input
);
47 return PTU
.takeError();
49 auto PTX
= GeneratePTX();
51 return PTX
.takeError();
53 auto Err
= GenerateFatbinary();
55 return std::move(Err
);
57 std::string FatbinFileName
=
58 "/incr_module_" + std::to_string(PTUs
.size()) + ".fatbin";
59 VFS
->addFile(FatbinFileName
, 0,
60 llvm::MemoryBuffer::getMemBuffer(
61 llvm::StringRef(FatbinContent
.data(), FatbinContent
.size()),
64 HostParser
.getCI()->getCodeGenOpts().CudaGpuBinaryFileName
= FatbinFileName
;
66 FatbinContent
.clear();
71 llvm::Expected
<llvm::StringRef
> IncrementalCUDADeviceParser::GeneratePTX() {
72 auto &PTU
= PTUs
.back();
75 const llvm::Target
*Target
= llvm::TargetRegistry::lookupTarget(
76 PTU
.TheModule
->getTargetTriple(), Error
);
78 return llvm::make_error
<llvm::StringError
>(std::move(Error
),
80 llvm::TargetOptions TO
= llvm::TargetOptions();
81 llvm::TargetMachine
*TargetMachine
= Target
->createTargetMachine(
82 PTU
.TheModule
->getTargetTriple(), getCI()->getTargetOpts().CPU
, "", TO
,
83 llvm::Reloc::Model::PIC_
);
84 PTU
.TheModule
->setDataLayout(TargetMachine
->createDataLayout());
87 llvm::raw_svector_ostream
dest(PTXCode
);
89 llvm::legacy::PassManager PM
;
90 if (TargetMachine
->addPassesToEmitFile(PM
, dest
, nullptr,
91 llvm::CodeGenFileType::AssemblyFile
)) {
92 return llvm::make_error
<llvm::StringError
>(
93 "NVPTX backend cannot produce PTX code.",
94 llvm::inconvertibleErrorCode());
97 if (!PM
.run(*PTU
.TheModule
))
98 return llvm::make_error
<llvm::StringError
>("Failed to emit PTX code.",
99 llvm::inconvertibleErrorCode());
102 while (PTXCode
.size() % 8)
104 return PTXCode
.str();
107 llvm::Error
IncrementalCUDADeviceParser::GenerateFatbinary() {
109 AddressSize64
= 0x01,
117 struct FatBinInnerHeader
{
118 uint16_t Kind
; // 0x00
119 uint16_t unknown02
; // 0x02
120 uint32_t HeaderSize
; // 0x04
121 uint32_t DataSize
; // 0x08
122 uint32_t unknown0c
; // 0x0c
123 uint32_t CompressedSize
; // 0x10
124 uint32_t SubHeaderSize
; // 0x14
125 uint16_t VersionMinor
; // 0x18
126 uint16_t VersionMajor
; // 0x1a
127 uint32_t CudaArch
; // 0x1c
128 uint32_t unknown20
; // 0x20
129 uint32_t unknown24
; // 0x24
130 uint32_t Flags
; // 0x28
131 uint32_t unknown2c
; // 0x2c
132 uint32_t unknown30
; // 0x30
133 uint32_t unknown34
; // 0x34
134 uint32_t UncompressedSize
; // 0x38
135 uint32_t unknown3c
; // 0x3c
136 uint32_t unknown40
; // 0x40
137 uint32_t unknown44
; // 0x44
138 FatBinInnerHeader(uint32_t DataSize
, uint32_t CudaArch
, uint32_t Flags
)
139 : Kind(1 /*PTX*/), unknown02(0x0101), HeaderSize(sizeof(*this)),
140 DataSize(DataSize
), unknown0c(0), CompressedSize(0),
141 SubHeaderSize(HeaderSize
- 8), VersionMinor(2), VersionMajor(4),
142 CudaArch(CudaArch
), unknown20(0), unknown24(0), Flags(Flags
),
143 unknown2c(0), unknown30(0), unknown34(0), UncompressedSize(0),
144 unknown3c(0), unknown40(0), unknown44(0) {}
147 struct FatBinHeader
{
148 uint32_t Magic
; // 0x00
149 uint16_t Version
; // 0x04
150 uint16_t HeaderSize
; // 0x06
151 uint32_t DataSize
; // 0x08
152 uint32_t unknown0c
; // 0x0c
154 FatBinHeader(uint32_t DataSize
)
155 : Magic(0xba55ed50), Version(1), HeaderSize(sizeof(*this)),
156 DataSize(DataSize
), unknown0c(0) {}
159 FatBinHeader
OuterHeader(sizeof(FatBinInnerHeader
) + PTXCode
.size());
160 FatbinContent
.append((char *)&OuterHeader
,
161 ((char *)&OuterHeader
) + OuterHeader
.HeaderSize
);
163 FatBinInnerHeader
InnerHeader(PTXCode
.size(), SMVersion
,
164 FatBinFlags::AddressSize64
|
165 FatBinFlags::HostLinux
);
166 FatbinContent
.append((char *)&InnerHeader
,
167 ((char *)&InnerHeader
) + InnerHeader
.HeaderSize
);
169 FatbinContent
.append(PTXCode
.begin(), PTXCode
.end());
171 return llvm::Error::success();
174 IncrementalCUDADeviceParser::~IncrementalCUDADeviceParser() {}