Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / bolt / lib / Profile / BoltAddressTranslation.cpp
blobe004309e0e21365008774d033f25b65588f3ff24
1 //===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Profile/BoltAddressTranslation.h"
10 #include "bolt/Core/BinaryFunction.h"
11 #include "llvm/Support/DataExtractor.h"
12 #include "llvm/Support/Errc.h"
14 #define DEBUG_TYPE "bolt-bat"
16 namespace llvm {
17 namespace bolt {
19 const char *BoltAddressTranslation::SECTION_NAME = ".note.bolt_bat";
21 void BoltAddressTranslation::writeEntriesForBB(MapTy &Map,
22 const BinaryBasicBlock &BB,
23 uint64_t FuncAddress) {
24 const uint64_t BBOutputOffset =
25 BB.getOutputAddressRange().first - FuncAddress;
26 const uint32_t BBInputOffset = BB.getInputOffset();
28 // Every output BB must track back to an input BB for profile collection
29 // in bolted binaries. If we are missing an offset, it means this block was
30 // created by a pass. We will skip writing any entries for it, and this means
31 // any traffic happening in this block will map to the previous block in the
32 // layout. This covers the case where an input basic block is split into two,
33 // and the second one lacks any offset.
34 if (BBInputOffset == BinaryBasicBlock::INVALID_OFFSET)
35 return;
37 LLVM_DEBUG(dbgs() << "BB " << BB.getName() << "\n");
38 LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(BBOutputOffset)
39 << " Val: " << Twine::utohexstr(BBInputOffset) << "\n");
40 // In case of conflicts (same Key mapping to different Vals), the last
41 // update takes precedence. Of course it is not ideal to have conflicts and
42 // those happen when we have an empty BB that either contained only
43 // NOPs or a jump to the next block (successor). Either way, the successor
44 // and this deleted block will both share the same output address (the same
45 // key), and we need to map back. We choose here to privilege the successor by
46 // allowing it to overwrite the previously inserted key in the map.
47 Map[BBOutputOffset] = BBInputOffset;
49 const auto &IOAddressMap =
50 BB.getFunction()->getBinaryContext().getIOAddressMap();
52 for (const auto &[InputOffset, Sym] : BB.getLocSyms()) {
53 const auto InputAddress = BB.getFunction()->getAddress() + InputOffset;
54 const auto OutputAddress = IOAddressMap.lookup(InputAddress);
55 assert(OutputAddress && "Unknown instruction address");
56 const auto OutputOffset = *OutputAddress - FuncAddress;
58 // Is this the first instruction in the BB? No need to duplicate the entry.
59 if (OutputOffset == BBOutputOffset)
60 continue;
62 LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset) << " Val: "
63 << Twine::utohexstr(InputOffset) << " (branch)\n");
64 Map.insert(
65 std::pair<uint32_t, uint32_t>(OutputOffset, InputOffset | BRANCHENTRY));
69 void BoltAddressTranslation::write(const BinaryContext &BC, raw_ostream &OS) {
70 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
71 for (auto &BFI : BC.getBinaryFunctions()) {
72 const BinaryFunction &Function = BFI.second;
73 // We don't need a translation table if the body of the function hasn't
74 // changed
75 if (Function.isIgnored() || (!BC.HasRelocations && !Function.isSimple()))
76 continue;
78 LLVM_DEBUG(dbgs() << "Function name: " << Function.getPrintName() << "\n");
79 LLVM_DEBUG(dbgs() << " Address reference: 0x"
80 << Twine::utohexstr(Function.getOutputAddress()) << "\n");
82 MapTy Map;
83 for (const BinaryBasicBlock *const BB :
84 Function.getLayout().getMainFragment())
85 writeEntriesForBB(Map, *BB, Function.getOutputAddress());
86 Maps.emplace(Function.getOutputAddress(), std::move(Map));
88 if (!Function.isSplit())
89 continue;
91 // Split maps
92 LLVM_DEBUG(dbgs() << " Cold part\n");
93 for (const FunctionFragment &FF :
94 Function.getLayout().getSplitFragments()) {
95 Map.clear();
96 for (const BinaryBasicBlock *const BB : FF)
97 writeEntriesForBB(Map, *BB, FF.getAddress());
99 Maps.emplace(FF.getAddress(), std::move(Map));
100 ColdPartSource.emplace(FF.getAddress(), Function.getOutputAddress());
104 const uint32_t NumFuncs = Maps.size();
105 OS.write(reinterpret_cast<const char *>(&NumFuncs), 4);
106 LLVM_DEBUG(dbgs() << "Writing " << NumFuncs << " functions for BAT.\n");
107 for (auto &MapEntry : Maps) {
108 const uint64_t Address = MapEntry.first;
109 MapTy &Map = MapEntry.second;
110 const uint32_t NumEntries = Map.size();
111 LLVM_DEBUG(dbgs() << "Writing " << NumEntries << " entries for 0x"
112 << Twine::utohexstr(Address) << ".\n");
113 OS.write(reinterpret_cast<const char *>(&Address), 8);
114 OS.write(reinterpret_cast<const char *>(&NumEntries), 4);
115 for (std::pair<const uint32_t, uint32_t> &KeyVal : Map) {
116 OS.write(reinterpret_cast<const char *>(&KeyVal.first), 4);
117 OS.write(reinterpret_cast<const char *>(&KeyVal.second), 4);
120 const uint32_t NumColdEntries = ColdPartSource.size();
121 LLVM_DEBUG(dbgs() << "Writing " << NumColdEntries
122 << " cold part mappings.\n");
123 OS.write(reinterpret_cast<const char *>(&NumColdEntries), 4);
124 for (std::pair<const uint64_t, uint64_t> &ColdEntry : ColdPartSource) {
125 OS.write(reinterpret_cast<const char *>(&ColdEntry.first), 8);
126 OS.write(reinterpret_cast<const char *>(&ColdEntry.second), 8);
127 LLVM_DEBUG(dbgs() << " " << Twine::utohexstr(ColdEntry.first) << " -> "
128 << Twine::utohexstr(ColdEntry.second) << "\n");
131 outs() << "BOLT-INFO: Wrote " << Maps.size() << " BAT maps\n";
132 outs() << "BOLT-INFO: Wrote " << NumColdEntries
133 << " BAT cold-to-hot entries\n";
136 std::error_code BoltAddressTranslation::parse(StringRef Buf) {
137 DataExtractor DE = DataExtractor(Buf, true, 8);
138 uint64_t Offset = 0;
139 if (Buf.size() < 12)
140 return make_error_code(llvm::errc::io_error);
142 const uint32_t NameSz = DE.getU32(&Offset);
143 const uint32_t DescSz = DE.getU32(&Offset);
144 const uint32_t Type = DE.getU32(&Offset);
146 if (Type != BinarySection::NT_BOLT_BAT ||
147 Buf.size() + Offset < alignTo(NameSz, 4) + DescSz)
148 return make_error_code(llvm::errc::io_error);
150 StringRef Name = Buf.slice(Offset, Offset + NameSz);
151 Offset = alignTo(Offset + NameSz, 4);
152 if (Name.substr(0, 4) != "BOLT")
153 return make_error_code(llvm::errc::io_error);
155 if (Buf.size() - Offset < 4)
156 return make_error_code(llvm::errc::io_error);
158 const uint32_t NumFunctions = DE.getU32(&Offset);
159 LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions << " functions\n");
160 for (uint32_t I = 0; I < NumFunctions; ++I) {
161 if (Buf.size() - Offset < 12)
162 return make_error_code(llvm::errc::io_error);
164 const uint64_t Address = DE.getU64(&Offset);
165 const uint32_t NumEntries = DE.getU32(&Offset);
166 MapTy Map;
168 LLVM_DEBUG(dbgs() << "Parsing " << NumEntries << " entries for 0x"
169 << Twine::utohexstr(Address) << "\n");
170 if (Buf.size() - Offset < 8 * NumEntries)
171 return make_error_code(llvm::errc::io_error);
172 for (uint32_t J = 0; J < NumEntries; ++J) {
173 const uint32_t OutputAddr = DE.getU32(&Offset);
174 const uint32_t InputAddr = DE.getU32(&Offset);
175 Map.insert(std::pair<uint32_t, uint32_t>(OutputAddr, InputAddr));
176 LLVM_DEBUG(dbgs() << Twine::utohexstr(OutputAddr) << " -> "
177 << Twine::utohexstr(InputAddr) << "\n");
179 Maps.insert(std::pair<uint64_t, MapTy>(Address, Map));
182 if (Buf.size() - Offset < 4)
183 return make_error_code(llvm::errc::io_error);
185 const uint32_t NumColdEntries = DE.getU32(&Offset);
186 LLVM_DEBUG(dbgs() << "Parsing " << NumColdEntries << " cold part mappings\n");
187 for (uint32_t I = 0; I < NumColdEntries; ++I) {
188 if (Buf.size() - Offset < 16)
189 return make_error_code(llvm::errc::io_error);
190 const uint32_t ColdAddress = DE.getU64(&Offset);
191 const uint32_t HotAddress = DE.getU64(&Offset);
192 ColdPartSource.insert(
193 std::pair<uint64_t, uint64_t>(ColdAddress, HotAddress));
194 LLVM_DEBUG(dbgs() << Twine::utohexstr(ColdAddress) << " -> "
195 << Twine::utohexstr(HotAddress) << "\n");
197 outs() << "BOLT-INFO: Parsed " << Maps.size() << " BAT entries\n";
198 outs() << "BOLT-INFO: Parsed " << NumColdEntries
199 << " BAT cold-to-hot entries\n";
201 return std::error_code();
204 void BoltAddressTranslation::dump(raw_ostream &OS) {
205 const size_t NumTables = Maps.size();
206 OS << "BAT tables for " << NumTables << " functions:\n";
207 for (const auto &MapEntry : Maps) {
208 OS << "Function Address: 0x" << Twine::utohexstr(MapEntry.first) << "\n";
209 OS << "BB mappings:\n";
210 for (const auto &Entry : MapEntry.second) {
211 const bool IsBranch = Entry.second & BRANCHENTRY;
212 const uint32_t Val = Entry.second & ~BRANCHENTRY;
213 OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
214 << "0x" << Twine::utohexstr(Val);
215 if (IsBranch)
216 OS << " (branch)";
217 OS << "\n";
219 OS << "\n";
221 const size_t NumColdParts = ColdPartSource.size();
222 if (!NumColdParts)
223 return;
225 OS << NumColdParts << " cold mappings:\n";
226 for (const auto &Entry : ColdPartSource) {
227 OS << "0x" << Twine::utohexstr(Entry.first) << " -> "
228 << Twine::utohexstr(Entry.second) << "\n";
230 OS << "\n";
233 uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress,
234 uint64_t Offset,
235 bool IsBranchSrc) const {
236 auto Iter = Maps.find(FuncAddress);
237 if (Iter == Maps.end())
238 return Offset;
240 const MapTy &Map = Iter->second;
241 auto KeyVal = Map.upper_bound(Offset);
242 if (KeyVal == Map.begin())
243 return Offset;
245 --KeyVal;
247 const uint32_t Val = KeyVal->second & ~BRANCHENTRY;
248 // Branch source addresses are translated to the first instruction of the
249 // source BB to avoid accounting for modifications BOLT may have made in the
250 // BB regarding deletion/addition of instructions.
251 if (IsBranchSrc)
252 return Val;
253 return Offset - KeyVal->first + Val;
256 std::optional<BoltAddressTranslation::FallthroughListTy>
257 BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress,
258 uint64_t From,
259 uint64_t To) const {
260 SmallVector<std::pair<uint64_t, uint64_t>, 16> Res;
262 // Filter out trivial case
263 if (From >= To)
264 return Res;
266 From -= FuncAddress;
267 To -= FuncAddress;
269 auto Iter = Maps.find(FuncAddress);
270 if (Iter == Maps.end())
271 return std::nullopt;
273 const MapTy &Map = Iter->second;
274 auto FromIter = Map.upper_bound(From);
275 if (FromIter == Map.begin())
276 return Res;
277 // Skip instruction entries, to create fallthroughs we are only interested in
278 // BB boundaries
279 do {
280 if (FromIter == Map.begin())
281 return Res;
282 --FromIter;
283 } while (FromIter->second & BRANCHENTRY);
285 auto ToIter = Map.upper_bound(To);
286 if (ToIter == Map.begin())
287 return Res;
288 --ToIter;
289 if (FromIter->first >= ToIter->first)
290 return Res;
292 for (auto Iter = FromIter; Iter != ToIter;) {
293 const uint32_t Src = Iter->first;
294 if (Iter->second & BRANCHENTRY) {
295 ++Iter;
296 continue;
299 ++Iter;
300 while (Iter->second & BRANCHENTRY && Iter != ToIter)
301 ++Iter;
302 if (Iter->second & BRANCHENTRY)
303 break;
304 Res.emplace_back(Src, Iter->first);
307 return Res;
310 uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address) const {
311 auto Iter = ColdPartSource.find(Address);
312 if (Iter == ColdPartSource.end())
313 return 0;
314 return Iter->second;
317 bool BoltAddressTranslation::enabledFor(
318 llvm::object::ELFObjectFileBase *InputFile) const {
319 for (const SectionRef &Section : InputFile->sections()) {
320 Expected<StringRef> SectionNameOrErr = Section.getName();
321 if (Error E = SectionNameOrErr.takeError())
322 continue;
324 if (SectionNameOrErr.get() == SECTION_NAME)
325 return true;
327 return false;
329 } // namespace bolt
330 } // namespace llvm