Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / openmp / libomptarget / plugins-nextgen / common / PluginInterface / JIT.cpp
blob3def95ee4920508bc140140248be26eeb444648f
1 //===- JIT.cpp - Target independent JIT infrastructure --------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //===----------------------------------------------------------------------===//
11 #include "JIT.h"
12 #include "Debug.h"
14 #include "PluginInterface.h"
15 #include "Utilities.h"
16 #include "omptarget.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/CodeGen/CommandFlags.h"
20 #include "llvm/CodeGen/MachineModuleInfo.h"
21 #include "llvm/IR/LLVMContext.h"
22 #include "llvm/IR/LLVMRemarkStreamer.h"
23 #include "llvm/IR/LegacyPassManager.h"
24 #include "llvm/IRReader/IRReader.h"
25 #include "llvm/InitializePasses.h"
26 #include "llvm/MC/TargetRegistry.h"
27 #include "llvm/Object/IRObjectFile.h"
28 #include "llvm/Passes/OptimizationLevel.h"
29 #include "llvm/Passes/PassBuilder.h"
30 #include "llvm/Support/MemoryBuffer.h"
31 #include "llvm/Support/SourceMgr.h"
32 #include "llvm/Support/TargetSelect.h"
33 #include "llvm/Support/TimeProfiler.h"
34 #include "llvm/Support/ToolOutputFile.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include "llvm/Target/TargetMachine.h"
37 #include "llvm/Target/TargetOptions.h"
38 #include "llvm/TargetParser/SubtargetFeature.h"
40 #include <mutex>
41 #include <shared_mutex>
42 #include <system_error>
44 using namespace llvm;
45 using namespace llvm::object;
46 using namespace omp;
47 using namespace omp::target;
49 static codegen::RegisterCodeGenFlags RCGF;
51 namespace {
53 /// A map from a bitcode image start address to its corresponding triple. If the
54 /// image is not in the map, it is not a bitcode image.
55 DenseMap<void *, Triple::ArchType> BitcodeImageMap;
56 std::shared_mutex BitcodeImageMapMutex;
58 std::once_flag InitFlag;
60 void init(Triple TT) {
61 #ifdef LIBOMPTARGET_JIT_NVPTX
62 if (TT.isNVPTX()) {
63 LLVMInitializeNVPTXTargetInfo();
64 LLVMInitializeNVPTXTarget();
65 LLVMInitializeNVPTXTargetMC();
66 LLVMInitializeNVPTXAsmPrinter();
68 #endif
69 #ifdef LIBOMPTARGET_JIT_AMDGPU
70 if (TT.isAMDGPU()) {
71 LLVMInitializeAMDGPUTargetInfo();
72 LLVMInitializeAMDGPUTarget();
73 LLVMInitializeAMDGPUTargetMC();
74 LLVMInitializeAMDGPUAsmPrinter();
76 #endif
79 Expected<std::unique_ptr<Module>>
80 createModuleFromMemoryBuffer(std::unique_ptr<MemoryBuffer> &MB,
81 LLVMContext &Context) {
82 SMDiagnostic Err;
83 auto Mod = parseIR(*MB, Err, Context);
84 if (!Mod)
85 return make_error<StringError>("Failed to create module",
86 inconvertibleErrorCode());
87 return std::move(Mod);
89 Expected<std::unique_ptr<Module>>
90 createModuleFromImage(const __tgt_device_image &Image, LLVMContext &Context) {
91 StringRef Data((const char *)Image.ImageStart,
92 target::getPtrDiff(Image.ImageEnd, Image.ImageStart));
93 std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
94 Data, /* BufferName */ "", /* RequiresNullTerminator */ false);
95 return createModuleFromMemoryBuffer(MB, Context);
98 OptimizationLevel getOptLevel(unsigned OptLevel) {
99 switch (OptLevel) {
100 case 0:
101 return OptimizationLevel::O0;
102 case 1:
103 return OptimizationLevel::O1;
104 case 2:
105 return OptimizationLevel::O2;
106 case 3:
107 return OptimizationLevel::O3;
109 llvm_unreachable("Invalid optimization level");
112 Expected<std::unique_ptr<TargetMachine>>
113 createTargetMachine(Module &M, std::string CPU, unsigned OptLevel) {
114 Triple TT(M.getTargetTriple());
115 std::optional<CodeGenOptLevel> CGOptLevelOrNone =
116 CodeGenOpt::getLevel(OptLevel);
117 assert(CGOptLevelOrNone && "Invalid optimization level");
118 CodeGenOptLevel CGOptLevel = *CGOptLevelOrNone;
120 std::string Msg;
121 const Target *T = TargetRegistry::lookupTarget(M.getTargetTriple(), Msg);
122 if (!T)
123 return make_error<StringError>(Msg, inconvertibleErrorCode());
125 SubtargetFeatures Features;
126 Features.getDefaultSubtargetFeatures(TT);
128 std::optional<Reloc::Model> RelocModel;
129 if (M.getModuleFlag("PIC Level"))
130 RelocModel =
131 M.getPICLevel() == PICLevel::NotPIC ? Reloc::Static : Reloc::PIC_;
133 std::optional<CodeModel::Model> CodeModel = M.getCodeModel();
135 TargetOptions Options = codegen::InitTargetOptionsFromCodeGenFlags(TT);
137 std::unique_ptr<TargetMachine> TM(
138 T->createTargetMachine(M.getTargetTriple(), CPU, Features.getString(),
139 Options, RelocModel, CodeModel, CGOptLevel));
140 if (!TM)
141 return make_error<StringError>("Failed to create target machine",
142 inconvertibleErrorCode());
143 return std::move(TM);
146 } // namespace
148 JITEngine::JITEngine(Triple::ArchType TA) : TT(Triple::getArchTypeName(TA)) {
149 std::call_once(InitFlag, init, TT);
152 void JITEngine::opt(TargetMachine *TM, TargetLibraryInfoImpl *TLII, Module &M,
153 unsigned OptLevel) {
154 PipelineTuningOptions PTO;
155 std::optional<PGOOptions> PGOOpt;
157 LoopAnalysisManager LAM;
158 FunctionAnalysisManager FAM;
159 CGSCCAnalysisManager CGAM;
160 ModuleAnalysisManager MAM;
161 ModulePassManager MPM;
163 PassBuilder PB(TM, PTO, PGOOpt, nullptr);
165 FAM.registerPass([&] { return TargetLibraryAnalysis(*TLII); });
167 // Register all the basic analyses with the managers.
168 PB.registerModuleAnalyses(MAM);
169 PB.registerCGSCCAnalyses(CGAM);
170 PB.registerFunctionAnalyses(FAM);
171 PB.registerLoopAnalyses(LAM);
172 PB.crossRegisterProxies(LAM, FAM, CGAM, MAM);
174 MPM.addPass(PB.buildPerModuleDefaultPipeline(getOptLevel(OptLevel)));
175 MPM.run(M, MAM);
178 void JITEngine::codegen(TargetMachine *TM, TargetLibraryInfoImpl *TLII,
179 Module &M, raw_pwrite_stream &OS) {
180 legacy::PassManager PM;
181 PM.add(new TargetLibraryInfoWrapperPass(*TLII));
182 MachineModuleInfoWrapperPass *MMIWP = new MachineModuleInfoWrapperPass(
183 reinterpret_cast<LLVMTargetMachine *>(TM));
184 TM->addPassesToEmitFile(PM, OS, nullptr,
185 TT.isNVPTX() ? CodeGenFileType::AssemblyFile
186 : CodeGenFileType::ObjectFile,
187 /* DisableVerify */ false, MMIWP);
189 PM.run(M);
192 Expected<std::unique_ptr<MemoryBuffer>>
193 JITEngine::backend(Module &M, const std::string &ComputeUnitKind,
194 unsigned OptLevel) {
196 auto RemarksFileOrErr = setupLLVMOptimizationRemarks(
197 M.getContext(), /* RemarksFilename */ "", /* RemarksPasses */ "",
198 /* RemarksFormat */ "", /* RemarksWithHotness */ false);
199 if (Error E = RemarksFileOrErr.takeError())
200 return std::move(E);
201 if (*RemarksFileOrErr)
202 (*RemarksFileOrErr)->keep();
204 auto TMOrErr = createTargetMachine(M, ComputeUnitKind, OptLevel);
205 if (!TMOrErr)
206 return TMOrErr.takeError();
208 std::unique_ptr<TargetMachine> TM = std::move(*TMOrErr);
209 TargetLibraryInfoImpl TLII(TT);
211 if (PreOptIRModuleFileName.isPresent()) {
212 std::error_code EC;
213 raw_fd_stream FD(PreOptIRModuleFileName.get(), EC);
214 if (EC)
215 return createStringError(
216 EC, "Could not open %s to write the pre-opt IR module\n",
217 PreOptIRModuleFileName.get().c_str());
218 M.print(FD, nullptr);
221 if (!JITSkipOpt)
222 opt(TM.get(), &TLII, M, OptLevel);
224 if (PostOptIRModuleFileName.isPresent()) {
225 std::error_code EC;
226 raw_fd_stream FD(PostOptIRModuleFileName.get(), EC);
227 if (EC)
228 return createStringError(
229 EC, "Could not open %s to write the post-opt IR module\n",
230 PreOptIRModuleFileName.get().c_str());
231 M.print(FD, nullptr);
234 // Prepare the output buffer and stream for codegen.
235 SmallVector<char> CGOutputBuffer;
236 raw_svector_ostream OS(CGOutputBuffer);
238 codegen(TM.get(), &TLII, M, OS);
240 return MemoryBuffer::getMemBufferCopy(OS.str());
243 Expected<std::unique_ptr<MemoryBuffer>>
244 JITEngine::getOrCreateObjFile(const __tgt_device_image &Image, LLVMContext &Ctx,
245 const std::string &ComputeUnitKind) {
247 // Check if the user replaces the module at runtime with a finished object.
248 if (ReplacementObjectFileName.isPresent()) {
249 auto MBOrErr =
250 MemoryBuffer::getFileOrSTDIN(ReplacementObjectFileName.get());
251 if (!MBOrErr)
252 return createStringError(MBOrErr.getError(),
253 "Could not read replacement obj from %s\n",
254 ReplacementModuleFileName.get().c_str());
255 return std::move(*MBOrErr);
258 Module *Mod = nullptr;
259 // Check if the user replaces the module at runtime or we read it from the
260 // image.
261 // TODO: Allow the user to specify images per device (Arch + ComputeUnitKind).
262 if (!ReplacementModuleFileName.isPresent()) {
263 auto ModOrErr = createModuleFromImage(Image, Ctx);
264 if (!ModOrErr)
265 return ModOrErr.takeError();
266 Mod = ModOrErr->release();
267 } else {
268 auto MBOrErr =
269 MemoryBuffer::getFileOrSTDIN(ReplacementModuleFileName.get());
270 if (!MBOrErr)
271 return createStringError(MBOrErr.getError(),
272 "Could not read replacement module from %s\n",
273 ReplacementModuleFileName.get().c_str());
274 auto ModOrErr = createModuleFromMemoryBuffer(MBOrErr.get(), Ctx);
275 if (!ModOrErr)
276 return ModOrErr.takeError();
277 Mod = ModOrErr->release();
280 return backend(*Mod, ComputeUnitKind, JITOptLevel);
283 Expected<const __tgt_device_image *>
284 JITEngine::compile(const __tgt_device_image &Image,
285 const std::string &ComputeUnitKind,
286 PostProcessingFn PostProcessing) {
287 std::lock_guard<std::mutex> Lock(ComputeUnitMapMutex);
289 // Check if we JITed this image for the given compute unit kind before.
290 ComputeUnitInfo &CUI = ComputeUnitMap[ComputeUnitKind];
291 if (__tgt_device_image *JITedImage = CUI.TgtImageMap.lookup(&Image))
292 return JITedImage;
294 auto ObjMBOrErr = getOrCreateObjFile(Image, CUI.Context, ComputeUnitKind);
295 if (!ObjMBOrErr)
296 return ObjMBOrErr.takeError();
298 auto ImageMBOrErr = PostProcessing(std::move(*ObjMBOrErr));
299 if (!ImageMBOrErr)
300 return ImageMBOrErr.takeError();
302 CUI.JITImages.push_back(std::move(*ImageMBOrErr));
303 __tgt_device_image *&JITedImage = CUI.TgtImageMap[&Image];
304 JITedImage = new __tgt_device_image();
305 *JITedImage = Image;
307 auto &ImageMB = CUI.JITImages.back();
309 JITedImage->ImageStart = const_cast<char *>(ImageMB->getBufferStart());
310 JITedImage->ImageEnd = const_cast<char *>(ImageMB->getBufferEnd());
312 return JITedImage;
315 Expected<const __tgt_device_image *>
316 JITEngine::process(const __tgt_device_image &Image,
317 target::plugin::GenericDeviceTy &Device) {
318 const std::string &ComputeUnitKind = Device.getComputeUnitKind();
320 PostProcessingFn PostProcessing = [&Device](std::unique_ptr<MemoryBuffer> MB)
321 -> Expected<std::unique_ptr<MemoryBuffer>> {
322 return Device.doJITPostProcessing(std::move(MB));
326 std::shared_lock<std::shared_mutex> SharedLock(BitcodeImageMapMutex);
327 auto Itr = BitcodeImageMap.find(Image.ImageStart);
328 if (Itr != BitcodeImageMap.end() && Itr->second == TT.getArch())
329 return compile(Image, ComputeUnitKind, PostProcessing);
332 return &Image;
335 bool JITEngine::checkBitcodeImage(const __tgt_device_image &Image) {
336 TimeTraceScope TimeScope("Check bitcode image");
337 std::lock_guard<std::shared_mutex> Lock(BitcodeImageMapMutex);
340 auto Itr = BitcodeImageMap.find(Image.ImageStart);
341 if (Itr != BitcodeImageMap.end() && Itr->second == TT.getArch())
342 return true;
345 StringRef Data(reinterpret_cast<const char *>(Image.ImageStart),
346 target::getPtrDiff(Image.ImageEnd, Image.ImageStart));
347 std::unique_ptr<MemoryBuffer> MB = MemoryBuffer::getMemBuffer(
348 Data, /* BufferName */ "", /* RequiresNullTerminator */ false);
349 if (!MB)
350 return false;
352 Expected<object::IRSymtabFile> FOrErr = object::readIRSymtab(*MB);
353 if (!FOrErr) {
354 consumeError(FOrErr.takeError());
355 return false;
358 auto ActualTriple = FOrErr->TheReader.getTargetTriple();
359 auto BitcodeTA = Triple(ActualTriple).getArch();
360 BitcodeImageMap[Image.ImageStart] = BitcodeTA;
362 DP("Is%s IR Image\n", BitcodeTA == TT.getArch() ? " " : " NOT");
364 return BitcodeTA == TT.getArch();