Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / openmp / libomptarget / tools / kernelreplay / llvm-omp-kernel-replay.cpp
blob93fc3e7853f8e9ce30a1ded5953c6f6086140efc
1 //===- llvm-omp-kernel-replay.cpp - Replay OpenMP offload kernel ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This is a command line utility to replay the execution of recorded OpenMP
10 // offload kernels.
12 //===----------------------------------------------------------------------===//
14 #include "omptarget.h"
15 #include "omptargetplugin.h"
16 #include "llvm/Support/CommandLine.h"
17 #include "llvm/Support/JSON.h"
18 #include "llvm/Support/MemoryBuffer.h"
19 #include <cstdlib>
21 using namespace llvm;
23 cl::OptionCategory ReplayOptions("llvm-omp-kernel-replay Options");
25 // InputFilename - The filename to read the json description of the kernel.
26 static cl::opt<std::string> InputFilename(cl::Positional,
27 cl::desc("<input kernel json file>"),
28 cl::Required);
30 static cl::opt<bool> VerifyOpt(
31 "verify",
32 cl::desc(
33 "Verify device memory post execution against the original output."),
34 cl::init(false), cl::cat(ReplayOptions));
36 static cl::opt<bool> SaveOutputOpt(
37 "save-output",
38 cl::desc("Save the device memory output of the replayed kernel execution."),
39 cl::init(false), cl::cat(ReplayOptions));
41 static cl::opt<unsigned> NumTeamsOpt("num-teams",
42 cl::desc("Set the number of teams."),
43 cl::init(0), cl::cat(ReplayOptions));
45 static cl::opt<unsigned> NumThreadsOpt("num-threads",
46 cl::desc("Set the number of threads."),
47 cl::init(0), cl::cat(ReplayOptions));
49 static cl::opt<int32_t> DeviceIdOpt("device-id", cl::desc("Set the device id."),
50 cl::init(-1), cl::cat(ReplayOptions));
52 int main(int argc, char **argv) {
53 cl::HideUnrelatedOptions(ReplayOptions);
54 cl::ParseCommandLineOptions(argc, argv, "llvm-omp-kernel-replay\n");
56 ErrorOr<std::unique_ptr<MemoryBuffer>> KernelInfoMB =
57 MemoryBuffer::getFile(InputFilename, /* isText */ true,
58 /* RequiresNullTerminator */ true);
59 if (!KernelInfoMB)
60 report_fatal_error("Error reading the kernel info json file");
61 Expected<json::Value> JsonKernelInfo =
62 json::parse(KernelInfoMB.get()->getBuffer());
63 if (auto Err = JsonKernelInfo.takeError())
64 report_fatal_error("Cannot parse the kernel info json file");
66 auto NumTeamsJson =
67 JsonKernelInfo->getAsObject()->getInteger("NumTeamsClause");
68 unsigned NumTeams = (NumTeamsOpt > 0 ? NumTeamsOpt : NumTeamsJson.value());
69 auto NumThreadsJson =
70 JsonKernelInfo->getAsObject()->getInteger("ThreadLimitClause");
71 unsigned NumThreads =
72 (NumThreadsOpt > 0 ? NumThreadsOpt : NumThreadsJson.value());
73 // TODO: Print a warning if number of teams/threads is explicitly set in the
74 // kernel info but overriden through command line options.
75 auto LoopTripCount =
76 JsonKernelInfo->getAsObject()->getInteger("LoopTripCount");
77 auto KernelFunc = JsonKernelInfo->getAsObject()->getString("Name");
79 SmallVector<void *> TgtArgs;
80 SmallVector<ptrdiff_t> TgtArgOffsets;
81 auto NumArgs = JsonKernelInfo->getAsObject()->getInteger("NumArgs");
82 auto *TgtArgsArray = JsonKernelInfo->getAsObject()->getArray("ArgPtrs");
83 for (auto It : *TgtArgsArray)
84 TgtArgs.push_back(reinterpret_cast<void *>(It.getAsInteger().value()));
85 auto *TgtArgOffsetsArray =
86 JsonKernelInfo->getAsObject()->getArray("ArgOffsets");
87 for (auto It : *TgtArgOffsetsArray)
88 TgtArgOffsets.push_back(static_cast<ptrdiff_t>(It.getAsInteger().value()));
90 void *BAllocStart = reinterpret_cast<void *>(
91 JsonKernelInfo->getAsObject()->getInteger("BumpAllocVAStart").value());
93 __tgt_offload_entry KernelEntry = {nullptr, nullptr, 0, 0, 0};
94 std::string KernelEntryName = KernelFunc.value().str();
95 KernelEntry.name = const_cast<char *>(KernelEntryName.c_str());
96 // Anything non-zero works to uniquely identify the kernel.
97 KernelEntry.addr = (void *)0x1;
99 ErrorOr<std::unique_ptr<MemoryBuffer>> ImageMB =
100 MemoryBuffer::getFile(KernelEntryName + ".image", /* isText */ false,
101 /* RequiresNullTerminator */ false);
102 if (!ImageMB)
103 report_fatal_error("Error reading the kernel image.");
105 __tgt_device_image DeviceImage;
106 DeviceImage.ImageStart = const_cast<char *>(ImageMB.get()->getBufferStart());
107 DeviceImage.ImageEnd = const_cast<char *>(ImageMB.get()->getBufferEnd());
108 DeviceImage.EntriesBegin = &KernelEntry;
109 DeviceImage.EntriesEnd = &KernelEntry + 1;
111 __tgt_bin_desc Desc;
112 Desc.NumDeviceImages = 1;
113 Desc.HostEntriesBegin = &KernelEntry;
114 Desc.HostEntriesEnd = &KernelEntry + 1;
115 Desc.DeviceImages = &DeviceImage;
117 auto DeviceMemorySizeJson =
118 JsonKernelInfo->getAsObject()->getInteger("DeviceMemorySize");
119 // Set device memory size to the ceiling of GB granularity.
120 uint64_t DeviceMemorySize = std::ceil(DeviceMemorySizeJson.value());
122 auto DeviceIdJson = JsonKernelInfo->getAsObject()->getInteger("DeviceId");
123 // TODO: Print warning if the user overrides the device id in the json file.
124 int32_t DeviceId = (DeviceIdOpt > -1 ? DeviceIdOpt : DeviceIdJson.value());
126 // TODO: do we need requires?
127 //__tgt_register_requires(/* Flags */1);
129 __tgt_register_lib(&Desc);
131 int Rc = __tgt_activate_record_replay(DeviceId, DeviceMemorySize, BAllocStart,
132 false, VerifyOpt);
134 if (Rc != OMP_TGT_SUCCESS) {
135 report_fatal_error("Cannot activate record replay\n");
138 ErrorOr<std::unique_ptr<MemoryBuffer>> DeviceMemoryMB =
139 MemoryBuffer::getFile(KernelEntryName + ".memory", /* isText */ false,
140 /* RequiresNullTerminator */ false);
142 if (!DeviceMemoryMB)
143 report_fatal_error("Error reading the kernel input device memory.");
145 // On AMD for currently unknown reasons we cannot copy memory mapped data to
146 // device. This is a work-around.
147 uint8_t *recored_data = new uint8_t[DeviceMemoryMB.get()->getBufferSize()];
148 std::memcpy(recored_data,
149 const_cast<char *>(DeviceMemoryMB.get()->getBuffer().data()),
150 DeviceMemorySizeJson.value() * sizeof(uint8_t));
152 __tgt_target_kernel_replay(
153 /* Loc */ nullptr, DeviceId, KernelEntry.addr, (char *)recored_data,
154 DeviceMemoryMB.get()->getBufferSize(), TgtArgs.data(),
155 TgtArgOffsets.data(), NumArgs.value(), NumTeams, NumThreads,
156 LoopTripCount.value());
158 if (VerifyOpt) {
159 ErrorOr<std::unique_ptr<MemoryBuffer>> OriginalOutputMB =
160 MemoryBuffer::getFile(KernelEntryName + ".original.output",
161 /* isText */ false,
162 /* RequiresNullTerminator */ false);
163 if (!OriginalOutputMB)
164 report_fatal_error("Error reading the kernel original output file, make "
165 "sure LIBOMPTARGET_SAVE_OUTPUT is set when recording");
166 ErrorOr<std::unique_ptr<MemoryBuffer>> ReplayOutputMB =
167 MemoryBuffer::getFile(KernelEntryName + ".replay.output",
168 /* isText */ false,
169 /* RequiresNullTerminator */ false);
170 if (!ReplayOutputMB)
171 report_fatal_error("Error reading the kernel replay output file");
173 StringRef OriginalOutput = OriginalOutputMB.get()->getBuffer();
174 StringRef ReplayOutput = ReplayOutputMB.get()->getBuffer();
175 if (OriginalOutput == ReplayOutput)
176 outs() << "[llvm-omp-kernel-replay] Replay device memory verified!\n";
177 else
178 outs() << "[llvm-omp-kernel-replay] Replay device memory failed to "
179 "verify!\n";
182 delete[] recored_data;
184 // TODO: calling unregister lib causes plugin deinit error for nextgen
185 // plugins.
186 //__tgt_unregister_lib(&Desc);
188 return 0;