1 //===- bolt/Profile/BoltAddressTranslation.cpp ----------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "bolt/Profile/BoltAddressTranslation.h"
10 #include "bolt/Core/BinaryFunction.h"
11 #include "llvm/Support/DataExtractor.h"
12 #include "llvm/Support/Errc.h"
14 #define DEBUG_TYPE "bolt-bat"
19 const char *BoltAddressTranslation::SECTION_NAME
= ".note.bolt_bat";
21 void BoltAddressTranslation::writeEntriesForBB(MapTy
&Map
,
22 const BinaryBasicBlock
&BB
,
23 uint64_t FuncAddress
) {
24 const uint64_t BBOutputOffset
=
25 BB
.getOutputAddressRange().first
- FuncAddress
;
26 const uint32_t BBInputOffset
= BB
.getInputOffset();
28 // Every output BB must track back to an input BB for profile collection
29 // in bolted binaries. If we are missing an offset, it means this block was
30 // created by a pass. We will skip writing any entries for it, and this means
31 // any traffic happening in this block will map to the previous block in the
32 // layout. This covers the case where an input basic block is split into two,
33 // and the second one lacks any offset.
34 if (BBInputOffset
== BinaryBasicBlock::INVALID_OFFSET
)
37 LLVM_DEBUG(dbgs() << "BB " << BB
.getName() << "\n");
38 LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(BBOutputOffset
)
39 << " Val: " << Twine::utohexstr(BBInputOffset
) << "\n");
40 // In case of conflicts (same Key mapping to different Vals), the last
41 // update takes precedence. Of course it is not ideal to have conflicts and
42 // those happen when we have an empty BB that either contained only
43 // NOPs or a jump to the next block (successor). Either way, the successor
44 // and this deleted block will both share the same output address (the same
45 // key), and we need to map back. We choose here to privilege the successor by
46 // allowing it to overwrite the previously inserted key in the map.
47 Map
[BBOutputOffset
] = BBInputOffset
;
49 const auto &IOAddressMap
=
50 BB
.getFunction()->getBinaryContext().getIOAddressMap();
52 for (const auto &[InputOffset
, Sym
] : BB
.getLocSyms()) {
53 const auto InputAddress
= BB
.getFunction()->getAddress() + InputOffset
;
54 const auto OutputAddress
= IOAddressMap
.lookup(InputAddress
);
55 assert(OutputAddress
&& "Unknown instruction address");
56 const auto OutputOffset
= *OutputAddress
- FuncAddress
;
58 // Is this the first instruction in the BB? No need to duplicate the entry.
59 if (OutputOffset
== BBOutputOffset
)
62 LLVM_DEBUG(dbgs() << " Key: " << Twine::utohexstr(OutputOffset
) << " Val: "
63 << Twine::utohexstr(InputOffset
) << " (branch)\n");
65 std::pair
<uint32_t, uint32_t>(OutputOffset
, InputOffset
| BRANCHENTRY
));
69 void BoltAddressTranslation::write(const BinaryContext
&BC
, raw_ostream
&OS
) {
70 LLVM_DEBUG(dbgs() << "BOLT-DEBUG: Writing BOLT Address Translation Tables\n");
71 for (auto &BFI
: BC
.getBinaryFunctions()) {
72 const BinaryFunction
&Function
= BFI
.second
;
73 // We don't need a translation table if the body of the function hasn't
75 if (Function
.isIgnored() || (!BC
.HasRelocations
&& !Function
.isSimple()))
78 LLVM_DEBUG(dbgs() << "Function name: " << Function
.getPrintName() << "\n");
79 LLVM_DEBUG(dbgs() << " Address reference: 0x"
80 << Twine::utohexstr(Function
.getOutputAddress()) << "\n");
83 for (const BinaryBasicBlock
*const BB
:
84 Function
.getLayout().getMainFragment())
85 writeEntriesForBB(Map
, *BB
, Function
.getOutputAddress());
86 Maps
.emplace(Function
.getOutputAddress(), std::move(Map
));
88 if (!Function
.isSplit())
92 LLVM_DEBUG(dbgs() << " Cold part\n");
93 for (const FunctionFragment
&FF
:
94 Function
.getLayout().getSplitFragments()) {
96 for (const BinaryBasicBlock
*const BB
: FF
)
97 writeEntriesForBB(Map
, *BB
, FF
.getAddress());
99 Maps
.emplace(FF
.getAddress(), std::move(Map
));
100 ColdPartSource
.emplace(FF
.getAddress(), Function
.getOutputAddress());
104 const uint32_t NumFuncs
= Maps
.size();
105 OS
.write(reinterpret_cast<const char *>(&NumFuncs
), 4);
106 LLVM_DEBUG(dbgs() << "Writing " << NumFuncs
<< " functions for BAT.\n");
107 for (auto &MapEntry
: Maps
) {
108 const uint64_t Address
= MapEntry
.first
;
109 MapTy
&Map
= MapEntry
.second
;
110 const uint32_t NumEntries
= Map
.size();
111 LLVM_DEBUG(dbgs() << "Writing " << NumEntries
<< " entries for 0x"
112 << Twine::utohexstr(Address
) << ".\n");
113 OS
.write(reinterpret_cast<const char *>(&Address
), 8);
114 OS
.write(reinterpret_cast<const char *>(&NumEntries
), 4);
115 for (std::pair
<const uint32_t, uint32_t> &KeyVal
: Map
) {
116 OS
.write(reinterpret_cast<const char *>(&KeyVal
.first
), 4);
117 OS
.write(reinterpret_cast<const char *>(&KeyVal
.second
), 4);
120 const uint32_t NumColdEntries
= ColdPartSource
.size();
121 LLVM_DEBUG(dbgs() << "Writing " << NumColdEntries
122 << " cold part mappings.\n");
123 OS
.write(reinterpret_cast<const char *>(&NumColdEntries
), 4);
124 for (std::pair
<const uint64_t, uint64_t> &ColdEntry
: ColdPartSource
) {
125 OS
.write(reinterpret_cast<const char *>(&ColdEntry
.first
), 8);
126 OS
.write(reinterpret_cast<const char *>(&ColdEntry
.second
), 8);
127 LLVM_DEBUG(dbgs() << " " << Twine::utohexstr(ColdEntry
.first
) << " -> "
128 << Twine::utohexstr(ColdEntry
.second
) << "\n");
131 outs() << "BOLT-INFO: Wrote " << Maps
.size() << " BAT maps\n";
132 outs() << "BOLT-INFO: Wrote " << NumColdEntries
133 << " BAT cold-to-hot entries\n";
136 std::error_code
BoltAddressTranslation::parse(StringRef Buf
) {
137 DataExtractor DE
= DataExtractor(Buf
, true, 8);
140 return make_error_code(llvm::errc::io_error
);
142 const uint32_t NameSz
= DE
.getU32(&Offset
);
143 const uint32_t DescSz
= DE
.getU32(&Offset
);
144 const uint32_t Type
= DE
.getU32(&Offset
);
146 if (Type
!= BinarySection::NT_BOLT_BAT
||
147 Buf
.size() + Offset
< alignTo(NameSz
, 4) + DescSz
)
148 return make_error_code(llvm::errc::io_error
);
150 StringRef Name
= Buf
.slice(Offset
, Offset
+ NameSz
);
151 Offset
= alignTo(Offset
+ NameSz
, 4);
152 if (Name
.substr(0, 4) != "BOLT")
153 return make_error_code(llvm::errc::io_error
);
155 if (Buf
.size() - Offset
< 4)
156 return make_error_code(llvm::errc::io_error
);
158 const uint32_t NumFunctions
= DE
.getU32(&Offset
);
159 LLVM_DEBUG(dbgs() << "Parsing " << NumFunctions
<< " functions\n");
160 for (uint32_t I
= 0; I
< NumFunctions
; ++I
) {
161 if (Buf
.size() - Offset
< 12)
162 return make_error_code(llvm::errc::io_error
);
164 const uint64_t Address
= DE
.getU64(&Offset
);
165 const uint32_t NumEntries
= DE
.getU32(&Offset
);
168 LLVM_DEBUG(dbgs() << "Parsing " << NumEntries
<< " entries for 0x"
169 << Twine::utohexstr(Address
) << "\n");
170 if (Buf
.size() - Offset
< 8 * NumEntries
)
171 return make_error_code(llvm::errc::io_error
);
172 for (uint32_t J
= 0; J
< NumEntries
; ++J
) {
173 const uint32_t OutputAddr
= DE
.getU32(&Offset
);
174 const uint32_t InputAddr
= DE
.getU32(&Offset
);
175 Map
.insert(std::pair
<uint32_t, uint32_t>(OutputAddr
, InputAddr
));
176 LLVM_DEBUG(dbgs() << Twine::utohexstr(OutputAddr
) << " -> "
177 << Twine::utohexstr(InputAddr
) << "\n");
179 Maps
.insert(std::pair
<uint64_t, MapTy
>(Address
, Map
));
182 if (Buf
.size() - Offset
< 4)
183 return make_error_code(llvm::errc::io_error
);
185 const uint32_t NumColdEntries
= DE
.getU32(&Offset
);
186 LLVM_DEBUG(dbgs() << "Parsing " << NumColdEntries
<< " cold part mappings\n");
187 for (uint32_t I
= 0; I
< NumColdEntries
; ++I
) {
188 if (Buf
.size() - Offset
< 16)
189 return make_error_code(llvm::errc::io_error
);
190 const uint32_t ColdAddress
= DE
.getU64(&Offset
);
191 const uint32_t HotAddress
= DE
.getU64(&Offset
);
192 ColdPartSource
.insert(
193 std::pair
<uint64_t, uint64_t>(ColdAddress
, HotAddress
));
194 LLVM_DEBUG(dbgs() << Twine::utohexstr(ColdAddress
) << " -> "
195 << Twine::utohexstr(HotAddress
) << "\n");
197 outs() << "BOLT-INFO: Parsed " << Maps
.size() << " BAT entries\n";
198 outs() << "BOLT-INFO: Parsed " << NumColdEntries
199 << " BAT cold-to-hot entries\n";
201 return std::error_code();
204 void BoltAddressTranslation::dump(raw_ostream
&OS
) {
205 const size_t NumTables
= Maps
.size();
206 OS
<< "BAT tables for " << NumTables
<< " functions:\n";
207 for (const auto &MapEntry
: Maps
) {
208 OS
<< "Function Address: 0x" << Twine::utohexstr(MapEntry
.first
) << "\n";
209 OS
<< "BB mappings:\n";
210 for (const auto &Entry
: MapEntry
.second
) {
211 const bool IsBranch
= Entry
.second
& BRANCHENTRY
;
212 const uint32_t Val
= Entry
.second
& ~BRANCHENTRY
;
213 OS
<< "0x" << Twine::utohexstr(Entry
.first
) << " -> "
214 << "0x" << Twine::utohexstr(Val
);
221 const size_t NumColdParts
= ColdPartSource
.size();
225 OS
<< NumColdParts
<< " cold mappings:\n";
226 for (const auto &Entry
: ColdPartSource
) {
227 OS
<< "0x" << Twine::utohexstr(Entry
.first
) << " -> "
228 << Twine::utohexstr(Entry
.second
) << "\n";
233 uint64_t BoltAddressTranslation::translate(uint64_t FuncAddress
,
235 bool IsBranchSrc
) const {
236 auto Iter
= Maps
.find(FuncAddress
);
237 if (Iter
== Maps
.end())
240 const MapTy
&Map
= Iter
->second
;
241 auto KeyVal
= Map
.upper_bound(Offset
);
242 if (KeyVal
== Map
.begin())
247 const uint32_t Val
= KeyVal
->second
& ~BRANCHENTRY
;
248 // Branch source addresses are translated to the first instruction of the
249 // source BB to avoid accounting for modifications BOLT may have made in the
250 // BB regarding deletion/addition of instructions.
253 return Offset
- KeyVal
->first
+ Val
;
256 std::optional
<BoltAddressTranslation::FallthroughListTy
>
257 BoltAddressTranslation::getFallthroughsInTrace(uint64_t FuncAddress
,
260 SmallVector
<std::pair
<uint64_t, uint64_t>, 16> Res
;
262 // Filter out trivial case
269 auto Iter
= Maps
.find(FuncAddress
);
270 if (Iter
== Maps
.end())
273 const MapTy
&Map
= Iter
->second
;
274 auto FromIter
= Map
.upper_bound(From
);
275 if (FromIter
== Map
.begin())
277 // Skip instruction entries, to create fallthroughs we are only interested in
280 if (FromIter
== Map
.begin())
283 } while (FromIter
->second
& BRANCHENTRY
);
285 auto ToIter
= Map
.upper_bound(To
);
286 if (ToIter
== Map
.begin())
289 if (FromIter
->first
>= ToIter
->first
)
292 for (auto Iter
= FromIter
; Iter
!= ToIter
;) {
293 const uint32_t Src
= Iter
->first
;
294 if (Iter
->second
& BRANCHENTRY
) {
300 while (Iter
->second
& BRANCHENTRY
&& Iter
!= ToIter
)
302 if (Iter
->second
& BRANCHENTRY
)
304 Res
.emplace_back(Src
, Iter
->first
);
310 uint64_t BoltAddressTranslation::fetchParentAddress(uint64_t Address
) const {
311 auto Iter
= ColdPartSource
.find(Address
);
312 if (Iter
== ColdPartSource
.end())
317 bool BoltAddressTranslation::enabledFor(
318 llvm::object::ELFObjectFileBase
*InputFile
) const {
319 for (const SectionRef
&Section
: InputFile
->sections()) {
320 Expected
<StringRef
> SectionNameOrErr
= Section
.getName();
321 if (Error E
= SectionNameOrErr
.takeError())
324 if (SectionNameOrErr
.get() == SECTION_NAME
)