1 //===- llvm-omp-kernel-replay.cpp - Replay OpenMP offload kernel ----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This is a command line utility to replay the execution of recorded OpenMP
12 //===----------------------------------------------------------------------===//
14 #include "omptarget.h"
16 #include "llvm/Frontend/Offloading/Utility.h"
17 #include "llvm/Support/CommandLine.h"
18 #include "llvm/Support/JSON.h"
19 #include "llvm/Support/MemoryBuffer.h"
26 cl::OptionCategory
ReplayOptions("llvm-omp-kernel-replay Options");
28 // InputFilename - The filename to read the json description of the kernel.
29 static cl::opt
<std::string
> InputFilename(cl::Positional
,
30 cl::desc("<input kernel json file>"),
33 static cl::opt
<bool> VerifyOpt(
36 "Verify device memory post execution against the original output."),
37 cl::init(false), cl::cat(ReplayOptions
));
39 static cl::opt
<bool> SaveOutputOpt(
41 cl::desc("Save the device memory output of the replayed kernel execution."),
42 cl::init(false), cl::cat(ReplayOptions
));
44 static cl::opt
<unsigned> NumTeamsOpt("num-teams",
45 cl::desc("Set the number of teams."),
46 cl::init(0), cl::cat(ReplayOptions
));
48 static cl::opt
<unsigned> NumThreadsOpt("num-threads",
49 cl::desc("Set the number of threads."),
50 cl::init(0), cl::cat(ReplayOptions
));
52 static cl::opt
<int32_t> DeviceIdOpt("device-id", cl::desc("Set the device id."),
53 cl::init(-1), cl::cat(ReplayOptions
));
55 int main(int argc
, char **argv
) {
56 cl::HideUnrelatedOptions(ReplayOptions
);
57 cl::ParseCommandLineOptions(argc
, argv
, "llvm-omp-kernel-replay\n");
59 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> KernelInfoMB
=
60 MemoryBuffer::getFile(InputFilename
, /*isText=*/true,
61 /*RequiresNullTerminator=*/true);
63 report_fatal_error("Error reading the kernel info json file");
64 Expected
<json::Value
> JsonKernelInfo
=
65 json::parse(KernelInfoMB
.get()->getBuffer());
66 if (auto Err
= JsonKernelInfo
.takeError())
67 report_fatal_error("Cannot parse the kernel info json file");
70 JsonKernelInfo
->getAsObject()->getInteger("NumTeamsClause");
71 unsigned NumTeams
= (NumTeamsOpt
> 0 ? NumTeamsOpt
: NumTeamsJson
.value());
73 JsonKernelInfo
->getAsObject()->getInteger("ThreadLimitClause");
75 (NumThreadsOpt
> 0 ? NumThreadsOpt
: NumThreadsJson
.value());
76 // TODO: Print a warning if number of teams/threads is explicitly set in the
77 // kernel info but overriden through command line options.
79 JsonKernelInfo
->getAsObject()->getInteger("LoopTripCount");
80 auto KernelFunc
= JsonKernelInfo
->getAsObject()->getString("Name");
82 SmallVector
<void *> TgtArgs
;
83 SmallVector
<ptrdiff_t> TgtArgOffsets
;
84 auto NumArgs
= JsonKernelInfo
->getAsObject()->getInteger("NumArgs");
85 auto *TgtArgsArray
= JsonKernelInfo
->getAsObject()->getArray("ArgPtrs");
86 for (auto It
: *TgtArgsArray
)
87 TgtArgs
.push_back(reinterpret_cast<void *>(It
.getAsInteger().value()));
88 auto *TgtArgOffsetsArray
=
89 JsonKernelInfo
->getAsObject()->getArray("ArgOffsets");
90 for (auto It
: *TgtArgOffsetsArray
)
91 TgtArgOffsets
.push_back(static_cast<ptrdiff_t>(It
.getAsInteger().value()));
93 void *BAllocStart
= reinterpret_cast<void *>(
94 JsonKernelInfo
->getAsObject()->getInteger("BumpAllocVAStart").value());
96 llvm::offloading::EntryTy KernelEntry
= {nullptr, nullptr, 0, 0, 0};
97 std::string KernelEntryName
= KernelFunc
.value().str();
98 KernelEntry
.SymbolName
= const_cast<char *>(KernelEntryName
.c_str());
99 // Anything non-zero works to uniquely identify the kernel.
100 KernelEntry
.Address
= (void *)0x1;
102 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> ImageMB
=
103 MemoryBuffer::getFile(KernelEntryName
+ ".image", /*isText=*/false,
104 /*RequiresNullTerminator=*/false);
106 report_fatal_error("Error reading the kernel image.");
108 __tgt_device_image DeviceImage
;
109 DeviceImage
.ImageStart
= const_cast<char *>(ImageMB
.get()->getBufferStart());
110 DeviceImage
.ImageEnd
= const_cast<char *>(ImageMB
.get()->getBufferEnd());
111 DeviceImage
.EntriesBegin
= &KernelEntry
;
112 DeviceImage
.EntriesEnd
= &KernelEntry
+ 1;
115 Desc
.NumDeviceImages
= 1;
116 Desc
.HostEntriesBegin
= &KernelEntry
;
117 Desc
.HostEntriesEnd
= &KernelEntry
+ 1;
118 Desc
.DeviceImages
= &DeviceImage
;
120 auto DeviceMemorySizeJson
=
121 JsonKernelInfo
->getAsObject()->getInteger("DeviceMemorySize");
122 // Set device memory size to the ceiling of GB granularity.
123 uint64_t DeviceMemorySize
= std::ceil(DeviceMemorySizeJson
.value());
125 auto DeviceIdJson
= JsonKernelInfo
->getAsObject()->getInteger("DeviceId");
126 // TODO: Print warning if the user overrides the device id in the json file.
127 int32_t DeviceId
= (DeviceIdOpt
> -1 ? DeviceIdOpt
: DeviceIdJson
.value());
129 // TODO: do we need requires?
130 //__tgt_register_requires(/*Flags=*/1);
132 __tgt_register_lib(&Desc
);
134 uint64_t ReqPtrArgOffset
= 0;
135 int Rc
= __tgt_activate_record_replay(DeviceId
, DeviceMemorySize
, BAllocStart
,
136 false, VerifyOpt
, ReqPtrArgOffset
);
138 if (Rc
!= OMP_TGT_SUCCESS
) {
139 report_fatal_error("Cannot activate record replay\n");
142 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> DeviceMemoryMB
=
143 MemoryBuffer::getFile(KernelEntryName
+ ".memory", /*isText=*/false,
144 /*RequiresNullTerminator=*/false);
147 report_fatal_error("Error reading the kernel input device memory.");
149 // On AMD for currently unknown reasons we cannot copy memory mapped data to
150 // device. This is a work-around.
151 uint8_t *recored_data
= new uint8_t[DeviceMemoryMB
.get()->getBufferSize()];
152 std::memcpy(recored_data
,
153 const_cast<char *>(DeviceMemoryMB
.get()->getBuffer().data()),
154 DeviceMemoryMB
.get()->getBufferSize());
156 // If necessary, adjust pointer arguments.
157 if (ReqPtrArgOffset
) {
158 for (auto *&Arg
: TgtArgs
) {
159 auto ArgInt
= uintptr_t(Arg
);
160 // Try to find pointer arguments.
161 if (ArgInt
< uintptr_t(BAllocStart
) ||
162 ArgInt
>= uintptr_t(BAllocStart
) + DeviceMemorySize
)
164 Arg
= reinterpret_cast<void *>(ArgInt
- ReqPtrArgOffset
);
168 __tgt_target_kernel_replay(
169 /*Loc=*/nullptr, DeviceId
, KernelEntry
.Address
, (char *)recored_data
,
170 DeviceMemoryMB
.get()->getBufferSize(), TgtArgs
.data(),
171 TgtArgOffsets
.data(), NumArgs
.value(), NumTeams
, NumThreads
,
172 LoopTripCount
.value());
175 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> OriginalOutputMB
=
176 MemoryBuffer::getFile(KernelEntryName
+ ".original.output",
178 /*RequiresNullTerminator=*/false);
179 if (!OriginalOutputMB
)
180 report_fatal_error("Error reading the kernel original output file, make "
181 "sure LIBOMPTARGET_SAVE_OUTPUT is set when recording");
182 ErrorOr
<std::unique_ptr
<MemoryBuffer
>> ReplayOutputMB
=
183 MemoryBuffer::getFile(KernelEntryName
+ ".replay.output",
185 /*RequiresNullTerminator=*/false);
187 report_fatal_error("Error reading the kernel replay output file");
189 StringRef OriginalOutput
= OriginalOutputMB
.get()->getBuffer();
190 StringRef ReplayOutput
= ReplayOutputMB
.get()->getBuffer();
191 if (OriginalOutput
== ReplayOutput
)
192 outs() << "[llvm-omp-kernel-replay] Replay device memory verified!\n";
194 outs() << "[llvm-omp-kernel-replay] Replay device memory failed to "
198 delete[] recored_data
;