1 //===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Profile/BoltAddressTranslation.h"
10 #include "bolt/Core/BinaryFunction.h"
11 #include "llvm/Support/Errc.h"
12 #include "llvm/Support/Error.h"
13 #include "llvm/Support/LEB128.h"
15 #define DEBUG_TYPE "bolt-bat"
20 const char *BoltAddressTranslation::SECTION_NAME
= ".note.bolt_bat";
22 void BoltAddressTranslation::writeEntriesForBB(MapTy
&Map
,
23 const BinaryBasicBlock
&BB
,
24 uint64_t FuncAddress
) {
25 const uint64_t BBOutputOffset
=
26 BB
.getOutputAddressRange().first
- FuncAddress
;
27 const uint32_t BBInputOffset
= BB
.getInputOffset();
29 // Every output BB must track back to an input BB for profile collection
30 // in bolted binaries. If we are missing an offset, it means this block was
31 // created by a pass. We will skip writing any entries for it, and this means
32 // any traffic happening in this block will map to the previous block in the
33 // layout. This covers the case where an input basic block is split into two,
34 // and the second one lacks any offset.
35 if (BBInputOffset
== BinaryBasicBlock::INVALID_OFFSET
)
38 LLVM_DEBUG(dbgs() << "BB " << BB
.getName() << "\n");
39 LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(BBOutputOffset
)
40 << " Val: " << Twine::utohexstr(BBInputOffset
) << "\n");
41 // In case of conflicts (same Key mapping to different Vals), the last
42 // update takes precedence. Of course it is not ideal to have conflicts and
43 // those happen when we have an empty BB that either contained only
44 // NOPs or a jump to the next block (successor). Either way, the successor
45 // and this deleted block will both share the same output address (the same
46 // key), and we need to map back. We choose here to privilege the successor by
47 // allowing it to overwrite the previously inserted key in the map.
48 Map
[BBOutputOffset
] = BBInputOffset
<< 1;
50 const auto &IOAddressMap
=
51 BB
.getFunction()->getBinaryContext().getIOAddressMap();
53 for (const auto &[InputOffset
, Sym
] : BB
.getLocSyms()) {
54 const auto InputAddress
= BB
.getFunction()->getAddress() + InputOffset
;
55 const auto OutputAddress
= IOAddressMap
.lookup(InputAddress
);
56 assert(OutputAddress
&& "Unknown instruction address");
57 const auto OutputOffset
= *OutputAddress
- FuncAddress
;
59 // Is this the first instruction in the BB? No need to duplicate the entry.
60 if (OutputOffset
== BBOutputOffset
)
63 LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset
) << " Val: "
64 << Twine::utohexstr(InputOffset
) << " (branch)\n");
65 Map
.insert(std::pair
<uint32_t, uint32_t>(OutputOffset
,
66 (InputOffset
<< 1) | BRANCHENTRY
));
70 void BoltAddressTranslation::write(const BinaryContext
&BC
, raw_ostream
&OS
) {
71 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
72 for (auto &BFI
: BC
.getBinaryFunctions()) {
73 const BinaryFunction
&Function
= BFI
.second
;
74 // We don't need a translation table if the body of the function hasn't
76 if (Function
.isIgnored() || (!BC
.HasRelocations
&& !Function
.isSimple()))
79 LLVM_DEBUG(dbgs() << "Function name: " << Function
.getPrintName() << "\n");
80 LLVM_DEBUG(dbgs() << " Address reference: 0x"
81 << Twine::utohexstr(Function
.getOutputAddress()) << "\n");
84 for (const BinaryBasicBlock
*const BB
:
85 Function
.getLayout().getMainFragment())
86 writeEntriesForBB(Map
, *BB
, Function
.getOutputAddress());
87 Maps
.emplace(Function
.getOutputAddress(), std::move(Map
));
89 if (!Function
.isSplit())
93 LLVM_DEBUG(dbgs() << " Cold part\n");
94 for (const FunctionFragment
&FF
:
95 Function
.getLayout().getSplitFragments()) {
97 for (const BinaryBasicBlock
*const BB
: FF
)
98 writeEntriesForBB(Map
, *BB
, FF
.getAddress());
100 Maps
.emplace(FF
.getAddress(), std::move(Map
));
101 ColdPartSource
.emplace(FF
.getAddress(), Function
.getOutputAddress());
105 // Output addresses are delta-encoded
106 uint64_t PrevAddress
= 0;
107 writeMaps
</*Cold=*/false>(Maps
, PrevAddress
, OS
);
108 writeMaps
</*Cold=*/true>(Maps
, PrevAddress
, OS
);
110 outs() << "BOLT-INFO: Wrote " << Maps
.size() << " BAT maps\n";
114 void BoltAddressTranslation::writeMaps(std::map
<uint64_t, MapTy
> &Maps
,
115 uint64_t &PrevAddress
, raw_ostream
&OS
) {
116 const uint32_t NumFuncs
=
117 llvm::count_if(llvm::make_first_range(Maps
), [&](const uint64_t Address
) {
118 return Cold
== ColdPartSource
.count(Address
);
120 encodeULEB128(NumFuncs
, OS
);
121 LLVM_DEBUG(dbgs() << "Writing " << NumFuncs
<< (Cold
? " cold" : "")
122 << " functions for BAT.\n");
123 size_t PrevIndex
= 0;
124 for (auto &MapEntry
: Maps
) {
125 const uint64_t Address
= MapEntry
.first
;
126 // Only process cold fragments in cold mode, and vice versa.
127 if (Cold
!= ColdPartSource
.count(Address
))
129 MapTy
&Map
= MapEntry
.second
;
130 const uint32_t NumEntries
= Map
.size();
131 LLVM_DEBUG(dbgs() << "Writing " << NumEntries
<< " entries for 0x"
132 << Twine::utohexstr(Address
) << ".\n");
133 encodeULEB128(Address
- PrevAddress
, OS
);
134 PrevAddress
= Address
;
137 std::distance(ColdPartSource
.begin(), ColdPartSource
.find(Address
));
138 encodeULEB128(HotIndex
- PrevIndex
, OS
);
139 PrevIndex
= HotIndex
;
141 encodeULEB128(NumEntries
, OS
);
142 uint64_t InOffset
= 0;
143 // Output and Input addresses and delta-encoded
144 for (std::pair
<const uint32_t, uint32_t> &KeyVal
: Map
) {
145 const uint64_t OutputAddress
= KeyVal
.first
+ Address
;
146 encodeULEB128(OutputAddress
- PrevAddress
, OS
);
147 PrevAddress
= OutputAddress
;
148 encodeSLEB128(KeyVal
.second
- InOffset
, OS
);
149 InOffset
= KeyVal
.second
;
154 std::error_code
BoltAddressTranslation::parse(StringRef Buf
) {
155 DataExtractor DE
= DataExtractor(Buf
, true, 8);
158 return make_error_code(llvm::errc::io_error
);
160 const uint32_t NameSz
= DE
.getU32(&Offset
);
161 const uint32_t DescSz
= DE
.getU32(&Offset
);
162 const uint32_t Type
= DE
.getU32(&Offset
);
164 if (Type
!= BinarySection::NT_BOLT_BAT
||
165 Buf
.size() + Offset
< alignTo(NameSz
, 4) + DescSz
)
166 return make_error_code(llvm::errc::io_error
);
168 StringRef Name
= Buf
.slice(Offset
, Offset
+ NameSz
);
169 Offset
= alignTo(Offset
+ NameSz
, 4);
170 if (Name
.substr(0, 4) != "BOLT")
171 return make_error_code(llvm::errc::io_error
);
173 Error
Err(Error::success());
174 std::vector
<uint64_t> HotFuncs
;
175 uint64_t PrevAddress
= 0;
176 parseMaps
</*Cold=*/false>(HotFuncs
, PrevAddress
, DE
, Offset
, Err
);
177 parseMaps
</*Cold=*/true>(HotFuncs
, PrevAddress
, DE
, Offset
, Err
);
178 outs() << "BOLT-INFO: Parsed " << Maps
.size() << " BAT entries\n";
179 return errorToErrorCode(std::move(Err
));
183 void BoltAddressTranslation::parseMaps(std::vector
<uint64_t> &HotFuncs
,
184 uint64_t &PrevAddress
, DataExtractor
&DE
,
185 uint64_t &Offset
, Error
&Err
) {
186 const uint32_t NumFunctions
= DE
.getULEB128(&Offset
, &Err
);
187 LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions
<< (Cold
? " cold" : "")
190 for (uint32_t I
= 0; I
< NumFunctions
; ++I
) {
191 const uint64_t Address
= PrevAddress
+ DE
.getULEB128(&Offset
, &Err
);
192 PrevAddress
= Address
;
194 HotIndex
+= DE
.getULEB128(&Offset
, &Err
);
195 ColdPartSource
.emplace(Address
, HotFuncs
[HotIndex
]);
197 HotFuncs
.push_back(Address
);
199 const uint32_t NumEntries
= DE
.getULEB128(&Offset
, &Err
);
202 LLVM_DEBUG(dbgs() << "Parsing " << NumEntries
<< " entries for 0x"
203 << Twine::utohexstr(Address
) << "\n");
204 uint64_t InputOffset
= 0;
205 for (uint32_t J
= 0; J
< NumEntries
; ++J
) {
206 const uint64_t OutputDelta
= DE
.getULEB128(&Offset
, &Err
);
207 const uint64_t OutputAddress
= PrevAddress
+ OutputDelta
;
208 const uint64_t OutputOffset
= OutputAddress
- Address
;
209 PrevAddress
= OutputAddress
;
210 const int64_t InputDelta
= DE
.getSLEB128(&Offset
, &Err
);
211 InputOffset
+= InputDelta
;
212 Map
.insert(std::pair
<uint32_t, uint32_t>(OutputOffset
, InputOffset
));
214 dbgs() << formatv("{0:x} -> {1:x} ({2}/{3}b -> {4}/{5}b), {6:x}\n",
215 OutputOffset
, InputOffset
, OutputDelta
,
216 encodeULEB128(OutputDelta
, nulls()), InputDelta
,
217 encodeSLEB128(InputDelta
, nulls()), OutputAddress
));
219 Maps
.insert(std::pair
<uint64_t, MapTy
>(Address
, Map
));
223 void BoltAddressTranslation::dump(raw_ostream
&OS
) {
224 const size_t NumTables
= Maps
.size();
225 OS
<< "BAT tables for " << NumTables
<< " functions:\n";
226 for (const auto &MapEntry
: Maps
) {
227 OS
<< "Function Address: 0x" << Twine::utohexstr(MapEntry
.first
) << "\n";
228 OS
<< "BB mappings:\n";
229 for (const auto &Entry
: MapEntry
.second
) {
230 const bool IsBranch
= Entry
.second
& BRANCHENTRY
;
231 const uint32_t Val
= Entry
.second
>> 1; // dropping BRANCHENTRY bit
232 OS
<< "0x" << Twine::utohexstr(Entry
.first
) << " -> "
233 << "0x" << Twine::utohexstr(Val
);
240 const size_t NumColdParts
= ColdPartSource
.size();
244 OS
<< NumColdParts
<< " cold mappings:\n";
245 for (const auto &Entry
: ColdPartSource
) {
246 OS
<< "0x" << Twine::utohexstr(Entry
.first
) << " -> "
247 << Twine::utohexstr(Entry
.second
) << "\n";
252 uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress
,
254 bool IsBranchSrc
) const {
255 auto Iter
= Maps
.find(FuncAddress
);
256 if (Iter
== Maps
.end())
259 const MapTy
&Map
= Iter
->second
;
260 auto KeyVal
= Map
.upper_bound(Offset
);
261 if (KeyVal
== Map
.begin())
266 const uint32_t Val
= KeyVal
->second
>> 1; // dropping BRANCHENTRY bit
267 // Branch source addresses are translated to the first instruction of the
268 // source BB to avoid accounting for modifications BOLT may have made in the
269 // BB regarding deletion/addition of instructions.
272 return Offset
- KeyVal
->first
+ Val
;
275 std::optional
<BoltAddressTranslation::FallthroughListTy
>
276 BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress
,
279 SmallVector
<std::pair
<uint64_t, uint64_t>, 16> Res
;
281 // Filter out trivial case
288 auto Iter
= Maps
.find(FuncAddress
);
289 if (Iter
== Maps
.end())
292 const MapTy
&Map
= Iter
->second
;
293 auto FromIter
= Map
.upper_bound(From
);
294 if (FromIter
== Map
.begin())
296 // Skip instruction entries, to create fallthroughs we are only interested in
299 if (FromIter
== Map
.begin())
302 } while (FromIter
->second
& BRANCHENTRY
);
304 auto ToIter
= Map
.upper_bound(To
);
305 if (ToIter
== Map
.begin())
308 if (FromIter
->first
>= ToIter
->first
)
311 for (auto Iter
= FromIter
; Iter
!= ToIter
;) {
312 const uint32_t Src
= Iter
->first
;
313 if (Iter
->second
& BRANCHENTRY
) {
319 while (Iter
->second
& BRANCHENTRY
&& Iter
!= ToIter
)
321 if (Iter
->second
& BRANCHENTRY
)
323 Res
.emplace_back(Src
, Iter
->first
);
329 uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address
) const {
330 auto Iter
= ColdPartSource
.find(Address
);
331 if (Iter
== ColdPartSource
.end())
336 bool BoltAddressTranslation::enabledFor(
337 llvm::object::ELFObjectFileBase
*InputFile
) const {
338 for (const SectionRef
&Section
: InputFile
->sections()) {
339 Expected
<StringRef
> SectionNameOrErr
= Section
.getName();
340 if (Error E
= SectionNameOrErr
.takeError())
343 if (SectionNameOrErr
.get() == SECTION_NAME
)