1 //===- SymbolizableObjectFile.cpp -----------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implementation of SymbolizableObjectFile class.
11 //===----------------------------------------------------------------------===//
13 #include "SymbolizableObjectFile.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/BinaryFormat/COFF.h"
18 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
19 #include "llvm/DebugInfo/Symbolize/SymbolizableModule.h"
20 #include "llvm/Object/COFF.h"
21 #include "llvm/Object/ObjectFile.h"
22 #include "llvm/Object/SymbolSize.h"
23 #include "llvm/Support/Casting.h"
24 #include "llvm/Support/DataExtractor.h"
25 #include "llvm/Support/Error.h"
30 #include <system_error>
35 using namespace object
;
36 using namespace symbolize
;
38 static DILineInfoSpecifier
39 getDILineInfoSpecifier(FunctionNameKind FNKind
) {
40 return DILineInfoSpecifier(
41 DILineInfoSpecifier::FileLineInfoKind::AbsoluteFilePath
, FNKind
);
44 ErrorOr
<std::unique_ptr
<SymbolizableObjectFile
>>
45 SymbolizableObjectFile::create(const object::ObjectFile
*Obj
,
46 std::unique_ptr
<DIContext
> DICtx
,
47 bool UntagAddresses
) {
49 std::unique_ptr
<SymbolizableObjectFile
> res(
50 new SymbolizableObjectFile(Obj
, std::move(DICtx
), UntagAddresses
));
51 std::unique_ptr
<DataExtractor
> OpdExtractor
;
52 uint64_t OpdAddress
= 0;
53 // Find the .opd (function descriptor) section if any, for big-endian
55 if (Obj
->getArch() == Triple::ppc64
) {
56 for (section_iterator Section
: Obj
->sections()) {
57 Expected
<StringRef
> NameOrErr
= Section
->getName();
59 return errorToErrorCode(NameOrErr
.takeError());
61 if (*NameOrErr
== ".opd") {
62 Expected
<StringRef
> E
= Section
->getContents();
64 return errorToErrorCode(E
.takeError());
65 OpdExtractor
.reset(new DataExtractor(*E
, Obj
->isLittleEndian(),
66 Obj
->getBytesInAddress()));
67 OpdAddress
= Section
->getAddress();
72 std::vector
<std::pair
<SymbolRef
, uint64_t>> Symbols
=
73 computeSymbolSizes(*Obj
);
74 for (auto &P
: Symbols
)
75 res
->addSymbol(P
.first
, P
.second
, OpdExtractor
.get(), OpdAddress
);
77 // If this is a COFF object and we didn't find any symbols, try the export
79 if (Symbols
.empty()) {
80 if (auto *CoffObj
= dyn_cast
<COFFObjectFile
>(Obj
))
81 if (auto EC
= res
->addCoffExportSymbols(CoffObj
))
85 std::vector
<std::pair
<SymbolDesc
, StringRef
>> &Fs
= res
->Functions
,
87 auto Uniquify
= [](std::vector
<std::pair
<SymbolDesc
, StringRef
>> &S
) {
88 // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
89 // pick the one with the largest Size. This helps us avoid symbols with no
90 // size information (Size=0).
92 auto I
= S
.begin(), E
= S
.end(), J
= S
.begin();
95 while (++I
!= E
&& OI
->first
.Addr
== I
->first
.Addr
) {
104 return std::move(res
);
107 SymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile
*Obj
,
108 std::unique_ptr
<DIContext
> DICtx
,
110 : Module(Obj
), DebugInfoContext(std::move(DICtx
)),
111 UntagAddresses(UntagAddresses
) {}
115 struct OffsetNamePair
{
119 bool operator<(const OffsetNamePair
&R
) const {
120 return Offset
< R
.Offset
;
124 } // end anonymous namespace
126 std::error_code
SymbolizableObjectFile::addCoffExportSymbols(
127 const COFFObjectFile
*CoffObj
) {
128 // Get all export names and offsets.
129 std::vector
<OffsetNamePair
> ExportSyms
;
130 for (const ExportDirectoryEntryRef
&Ref
: CoffObj
->export_directories()) {
133 if (auto EC
= Ref
.getSymbolName(Name
))
135 if (auto EC
= Ref
.getExportRVA(Offset
))
137 ExportSyms
.push_back(OffsetNamePair
{Offset
, Name
});
139 if (ExportSyms
.empty())
140 return std::error_code();
142 // Sort by ascending offset.
143 array_pod_sort(ExportSyms
.begin(), ExportSyms
.end());
145 // Approximate the symbol sizes by assuming they run to the next symbol.
146 // FIXME: This assumes all exports are functions.
147 uint64_t ImageBase
= CoffObj
->getImageBase();
148 for (auto I
= ExportSyms
.begin(), E
= ExportSyms
.end(); I
!= E
; ++I
) {
149 OffsetNamePair
&Export
= *I
;
150 // FIXME: The last export has a one byte size now.
151 uint32_t NextOffset
= I
!= E
? I
->Offset
: Export
.Offset
+ 1;
152 uint64_t SymbolStart
= ImageBase
+ Export
.Offset
;
153 uint64_t SymbolSize
= NextOffset
- Export
.Offset
;
154 SymbolDesc SD
= {SymbolStart
, SymbolSize
};
155 Functions
.emplace_back(SD
, Export
.Name
);
157 return std::error_code();
160 std::error_code
SymbolizableObjectFile::addSymbol(const SymbolRef
&Symbol
,
162 DataExtractor
*OpdExtractor
,
163 uint64_t OpdAddress
) {
164 // Avoid adding symbols from an unknown/undefined section.
165 const ObjectFile
*Obj
= Symbol
.getObject();
166 Expected
<section_iterator
> Sec
= Symbol
.getSection();
167 if (!Sec
|| (Obj
&& Obj
->section_end() == *Sec
))
168 return std::error_code();
169 Expected
<SymbolRef::Type
> SymbolTypeOrErr
= Symbol
.getType();
170 if (!SymbolTypeOrErr
)
171 return errorToErrorCode(SymbolTypeOrErr
.takeError());
172 SymbolRef::Type SymbolType
= *SymbolTypeOrErr
;
173 if (SymbolType
!= SymbolRef::ST_Function
&& SymbolType
!= SymbolRef::ST_Data
)
174 return std::error_code();
175 Expected
<uint64_t> SymbolAddressOrErr
= Symbol
.getAddress();
176 if (!SymbolAddressOrErr
)
177 return errorToErrorCode(SymbolAddressOrErr
.takeError());
178 uint64_t SymbolAddress
= *SymbolAddressOrErr
;
179 if (UntagAddresses
) {
180 // For kernel addresses, bits 56-63 need to be set, so we sign extend bit 55
181 // into bits 56-63 instead of masking them out.
182 SymbolAddress
&= (1ull << 56) - 1;
183 SymbolAddress
= (int64_t(SymbolAddress
) << 8) >> 8;
186 // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
187 // function descriptors. The first word of the descriptor is a pointer to
188 // the function's code.
189 // For the purposes of symbolization, pretend the symbol's address is that
190 // of the function's code, not the descriptor.
191 uint64_t OpdOffset
= SymbolAddress
- OpdAddress
;
192 if (OpdExtractor
->isValidOffsetForAddress(OpdOffset
))
193 SymbolAddress
= OpdExtractor
->getAddress(&OpdOffset
);
195 Expected
<StringRef
> SymbolNameOrErr
= Symbol
.getName();
196 if (!SymbolNameOrErr
)
197 return errorToErrorCode(SymbolNameOrErr
.takeError());
198 StringRef SymbolName
= *SymbolNameOrErr
;
199 // Mach-O symbol table names have leading underscore, skip it.
200 if (Module
->isMachO() && !SymbolName
.empty() && SymbolName
[0] == '_')
201 SymbolName
= SymbolName
.drop_front();
202 // FIXME: If a function has alias, there are two entries in symbol table
203 // with same address size. Make sure we choose the correct one.
204 auto &M
= SymbolType
== SymbolRef::ST_Function
? Functions
: Objects
;
205 SymbolDesc SD
= { SymbolAddress
, SymbolSize
};
206 M
.emplace_back(SD
, SymbolName
);
207 return std::error_code();
210 // Return true if this is a 32-bit x86 PE COFF module.
211 bool SymbolizableObjectFile::isWin32Module() const {
212 auto *CoffObject
= dyn_cast
<COFFObjectFile
>(Module
);
213 return CoffObject
&& CoffObject
->getMachine() == COFF::IMAGE_FILE_MACHINE_I386
;
216 uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
217 if (auto *CoffObject
= dyn_cast
<COFFObjectFile
>(Module
))
218 return CoffObject
->getImageBase();
222 bool SymbolizableObjectFile::getNameFromSymbolTable(SymbolRef::Type Type
,
226 uint64_t &Size
) const {
227 const auto &Symbols
= Type
== SymbolRef::ST_Function
? Functions
: Objects
;
228 std::pair
<SymbolDesc
, StringRef
> SD
{{Address
, UINT64_C(-1)}, StringRef()};
229 auto SymbolIterator
= llvm::upper_bound(Symbols
, SD
);
230 if (SymbolIterator
== Symbols
.begin())
233 if (SymbolIterator
->first
.Size
!= 0 &&
234 SymbolIterator
->first
.Addr
+ SymbolIterator
->first
.Size
<= Address
)
236 Name
= SymbolIterator
->second
.str();
237 Addr
= SymbolIterator
->first
.Addr
;
238 Size
= SymbolIterator
->first
.Size
;
242 bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
243 FunctionNameKind FNKind
, bool UseSymbolTable
) const {
244 // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
245 // better answers for linkage names than the DIContext. Otherwise, we are
246 // probably using PEs and PDBs, and we shouldn't do the override. PE files
247 // generally only contain the names of exported symbols.
248 return FNKind
== FunctionNameKind::LinkageName
&& UseSymbolTable
&&
249 isa
<DWARFContext
>(DebugInfoContext
.get());
253 SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset
,
254 FunctionNameKind FNKind
,
255 bool UseSymbolTable
) const {
256 if (ModuleOffset
.SectionIndex
== object::SectionedAddress::UndefSection
)
257 ModuleOffset
.SectionIndex
=
258 getModuleSectionIndexForAddress(ModuleOffset
.Address
);
259 DILineInfo LineInfo
= DebugInfoContext
->getLineInfoForAddress(
260 ModuleOffset
, getDILineInfoSpecifier(FNKind
));
262 // Override function name from symbol table if necessary.
263 if (shouldOverrideWithSymbolTable(FNKind
, UseSymbolTable
)) {
264 std::string FunctionName
;
265 uint64_t Start
, Size
;
266 if (getNameFromSymbolTable(SymbolRef::ST_Function
, ModuleOffset
.Address
,
267 FunctionName
, Start
, Size
)) {
268 LineInfo
.FunctionName
= FunctionName
;
274 DIInliningInfo
SymbolizableObjectFile::symbolizeInlinedCode(
275 object::SectionedAddress ModuleOffset
, FunctionNameKind FNKind
,
276 bool UseSymbolTable
) const {
277 if (ModuleOffset
.SectionIndex
== object::SectionedAddress::UndefSection
)
278 ModuleOffset
.SectionIndex
=
279 getModuleSectionIndexForAddress(ModuleOffset
.Address
);
280 DIInliningInfo InlinedContext
= DebugInfoContext
->getInliningInfoForAddress(
281 ModuleOffset
, getDILineInfoSpecifier(FNKind
));
283 // Make sure there is at least one frame in context.
284 if (InlinedContext
.getNumberOfFrames() == 0)
285 InlinedContext
.addFrame(DILineInfo());
287 // Override the function name in lower frame with name from symbol table.
288 if (shouldOverrideWithSymbolTable(FNKind
, UseSymbolTable
)) {
289 std::string FunctionName
;
290 uint64_t Start
, Size
;
291 if (getNameFromSymbolTable(SymbolRef::ST_Function
, ModuleOffset
.Address
,
292 FunctionName
, Start
, Size
)) {
293 InlinedContext
.getMutableFrame(InlinedContext
.getNumberOfFrames() - 1)
294 ->FunctionName
= FunctionName
;
298 return InlinedContext
;
301 DIGlobal
SymbolizableObjectFile::symbolizeData(
302 object::SectionedAddress ModuleOffset
) const {
304 getNameFromSymbolTable(SymbolRef::ST_Data
, ModuleOffset
.Address
, Res
.Name
,
305 Res
.Start
, Res
.Size
);
309 std::vector
<DILocal
> SymbolizableObjectFile::symbolizeFrame(
310 object::SectionedAddress ModuleOffset
) const {
311 if (ModuleOffset
.SectionIndex
== object::SectionedAddress::UndefSection
)
312 ModuleOffset
.SectionIndex
=
313 getModuleSectionIndexForAddress(ModuleOffset
.Address
);
314 return DebugInfoContext
->getLocalsForAddress(ModuleOffset
);
317 /// Search for the first occurence of specified Address in ObjectFile.
318 uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
319 uint64_t Address
) const {
321 for (SectionRef Sec
: Module
->sections()) {
322 if (!Sec
.isText() || Sec
.isVirtual())
325 if (Address
>= Sec
.getAddress() &&
326 Address
< Sec
.getAddress() + Sec
.getSize())
327 return Sec
.getIndex();
330 return object::SectionedAddress::UndefSection
;