1 //===- SymbolizableObjectFile.cpp -----------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // Implementation of SymbolizableObjectFile class.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/DebugInfo/Symbolize/SymbolizableObjectFile.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/BinaryFormat/COFF.h"
16 #include "llvm/DebugInfo/DWARF/DWARFContext.h"
17 #include "llvm/Object/COFF.h"
18 #include "llvm/Object/ELFObjectFile.h"
19 #include "llvm/Object/ObjectFile.h"
20 #include "llvm/Object/SymbolSize.h"
21 #include "llvm/Support/Casting.h"
22 #include "llvm/Support/DataExtractor.h"
23 #include "llvm/TargetParser/Triple.h"
27 using namespace object
;
28 using namespace symbolize
;
30 Expected
<std::unique_ptr
<SymbolizableObjectFile
>>
31 SymbolizableObjectFile::create(const object::ObjectFile
*Obj
,
32 std::unique_ptr
<DIContext
> DICtx
,
33 bool UntagAddresses
) {
35 std::unique_ptr
<SymbolizableObjectFile
> res(
36 new SymbolizableObjectFile(Obj
, std::move(DICtx
), UntagAddresses
));
37 std::unique_ptr
<DataExtractor
> OpdExtractor
;
38 uint64_t OpdAddress
= 0;
39 // Find the .opd (function descriptor) section if any, for big-endian
41 if (Obj
->getArch() == Triple::ppc64
) {
42 for (section_iterator Section
: Obj
->sections()) {
43 Expected
<StringRef
> NameOrErr
= Section
->getName();
45 return NameOrErr
.takeError();
47 if (*NameOrErr
== ".opd") {
48 Expected
<StringRef
> E
= Section
->getContents();
51 OpdExtractor
.reset(new DataExtractor(*E
, Obj
->isLittleEndian(),
52 Obj
->getBytesInAddress()));
53 OpdAddress
= Section
->getAddress();
58 std::vector
<std::pair
<SymbolRef
, uint64_t>> Symbols
=
59 computeSymbolSizes(*Obj
);
60 for (auto &P
: Symbols
)
62 res
->addSymbol(P
.first
, P
.second
, OpdExtractor
.get(), OpdAddress
))
65 // If this is a COFF object and we didn't find any symbols, try the export
67 if (Symbols
.empty()) {
68 if (auto *CoffObj
= dyn_cast
<COFFObjectFile
>(Obj
))
69 if (Error E
= res
->addCoffExportSymbols(CoffObj
))
73 std::vector
<SymbolDesc
> &SS
= res
->Symbols
;
74 // Sort by (Addr,Size,Name). If several SymbolDescs share the same Addr,
75 // pick the one with the largest Size. This helps us avoid symbols with no
76 // size information (Size=0).
77 llvm::stable_sort(SS
);
78 auto I
= SS
.begin(), E
= SS
.end(), J
= SS
.begin();
81 while (++I
!= E
&& OI
->Addr
== I
->Addr
) {
85 SS
.erase(J
, SS
.end());
87 return std::move(res
);
90 SymbolizableObjectFile::SymbolizableObjectFile(const ObjectFile
*Obj
,
91 std::unique_ptr
<DIContext
> DICtx
,
93 : Module(Obj
), DebugInfoContext(std::move(DICtx
)),
94 UntagAddresses(UntagAddresses
) {}
98 struct OffsetNamePair
{
102 bool operator<(const OffsetNamePair
&R
) const {
103 return Offset
< R
.Offset
;
107 } // end anonymous namespace
109 Error
SymbolizableObjectFile::addCoffExportSymbols(
110 const COFFObjectFile
*CoffObj
) {
111 // Get all export names and offsets.
112 std::vector
<OffsetNamePair
> ExportSyms
;
113 for (const ExportDirectoryEntryRef
&Ref
: CoffObj
->export_directories()) {
116 if (auto EC
= Ref
.getSymbolName(Name
))
118 if (auto EC
= Ref
.getExportRVA(Offset
))
120 ExportSyms
.push_back(OffsetNamePair
{Offset
, Name
});
122 if (ExportSyms
.empty())
123 return Error::success();
125 // Sort by ascending offset.
126 array_pod_sort(ExportSyms
.begin(), ExportSyms
.end());
128 // Approximate the symbol sizes by assuming they run to the next symbol.
129 // FIXME: This assumes all exports are functions.
130 uint64_t ImageBase
= CoffObj
->getImageBase();
131 for (auto I
= ExportSyms
.begin(), E
= ExportSyms
.end(); I
!= E
; ++I
) {
132 OffsetNamePair
&Export
= *I
;
133 // FIXME: The last export has a one byte size now.
134 uint32_t NextOffset
= I
!= E
? I
->Offset
: Export
.Offset
+ 1;
135 uint64_t SymbolStart
= ImageBase
+ Export
.Offset
;
136 uint64_t SymbolSize
= NextOffset
- Export
.Offset
;
137 Symbols
.push_back({SymbolStart
, SymbolSize
, Export
.Name
, 0});
139 return Error::success();
142 Error
SymbolizableObjectFile::addSymbol(const SymbolRef
&Symbol
,
144 DataExtractor
*OpdExtractor
,
145 uint64_t OpdAddress
) {
146 // Avoid adding symbols from an unknown/undefined section.
147 const ObjectFile
&Obj
= *Symbol
.getObject();
148 Expected
<StringRef
> SymbolNameOrErr
= Symbol
.getName();
149 if (!SymbolNameOrErr
)
150 return SymbolNameOrErr
.takeError();
151 StringRef SymbolName
= *SymbolNameOrErr
;
154 Obj
.isELF() ? ELFSymbolRef(Symbol
).getRawDataRefImpl().d
.b
: 0;
155 Expected
<section_iterator
> Sec
= Symbol
.getSection();
156 if (!Sec
|| Obj
.section_end() == *Sec
) {
158 // Store the (index, filename) pair for a file symbol.
159 ELFSymbolRef
ESym(Symbol
);
160 if (ESym
.getELFType() == ELF::STT_FILE
)
161 FileSymbols
.emplace_back(ELFSymIdx
, SymbolName
);
163 return Error::success();
166 Expected
<SymbolRef::Type
> SymbolTypeOrErr
= Symbol
.getType();
167 if (!SymbolTypeOrErr
)
168 return SymbolTypeOrErr
.takeError();
169 SymbolRef::Type SymbolType
= *SymbolTypeOrErr
;
171 // Ignore any symbols coming from sections that don't have runtime
173 if ((elf_section_iterator(*Sec
)->getFlags() & ELF::SHF_ALLOC
) == 0)
174 return Error::success();
176 // Allow function and data symbols. Additionally allow STT_NONE, which are
177 // common for functions defined in assembly.
178 uint8_t Type
= ELFSymbolRef(Symbol
).getELFType();
179 if (Type
!= ELF::STT_NOTYPE
&& Type
!= ELF::STT_FUNC
&&
180 Type
!= ELF::STT_OBJECT
&& Type
!= ELF::STT_GNU_IFUNC
)
181 return Error::success();
182 // Some STT_NOTYPE symbols are not desired. This excludes STT_SECTION and
183 // ARM mapping symbols.
184 uint32_t Flags
= cantFail(Symbol
.getFlags());
185 if (Flags
& SymbolRef::SF_FormatSpecific
)
186 return Error::success();
187 } else if (SymbolType
!= SymbolRef::ST_Function
&&
188 SymbolType
!= SymbolRef::ST_Data
) {
189 return Error::success();
192 Expected
<uint64_t> SymbolAddressOrErr
= Symbol
.getAddress();
193 if (!SymbolAddressOrErr
)
194 return SymbolAddressOrErr
.takeError();
195 uint64_t SymbolAddress
= *SymbolAddressOrErr
;
196 if (UntagAddresses
) {
197 // For kernel addresses, bits 56-63 need to be set, so we sign extend bit 55
198 // into bits 56-63 instead of masking them out.
199 SymbolAddress
&= (1ull << 56) - 1;
200 SymbolAddress
= (int64_t(SymbolAddress
) << 8) >> 8;
203 // For big-endian PowerPC64 ELF, symbols in the .opd section refer to
204 // function descriptors. The first word of the descriptor is a pointer to
205 // the function's code.
206 // For the purposes of symbolization, pretend the symbol's address is that
207 // of the function's code, not the descriptor.
208 uint64_t OpdOffset
= SymbolAddress
- OpdAddress
;
209 if (OpdExtractor
->isValidOffsetForAddress(OpdOffset
))
210 SymbolAddress
= OpdExtractor
->getAddress(&OpdOffset
);
212 // Mach-O symbol table names have leading underscore, skip it.
213 if (Module
->isMachO())
214 SymbolName
.consume_front("_");
216 if (Obj
.isELF() && ELFSymbolRef(Symbol
).getBinding() != ELF::STB_LOCAL
)
218 Symbols
.push_back({SymbolAddress
, SymbolSize
, SymbolName
, ELFSymIdx
});
219 return Error::success();
222 // Return true if this is a 32-bit x86 PE COFF module.
223 bool SymbolizableObjectFile::isWin32Module() const {
224 auto *CoffObject
= dyn_cast
<COFFObjectFile
>(Module
);
225 return CoffObject
&& CoffObject
->getMachine() == COFF::IMAGE_FILE_MACHINE_I386
;
228 uint64_t SymbolizableObjectFile::getModulePreferredBase() const {
229 if (auto *CoffObject
= dyn_cast
<COFFObjectFile
>(Module
))
230 return CoffObject
->getImageBase();
234 bool SymbolizableObjectFile::getNameFromSymbolTable(
235 uint64_t Address
, std::string
&Name
, uint64_t &Addr
, uint64_t &Size
,
236 std::string
&FileName
) const {
237 SymbolDesc SD
{Address
, UINT64_C(-1), StringRef(), 0};
238 auto SymbolIterator
= llvm::upper_bound(Symbols
, SD
);
239 if (SymbolIterator
== Symbols
.begin())
242 if (SymbolIterator
->Size
!= 0 &&
243 SymbolIterator
->Addr
+ SymbolIterator
->Size
<= Address
)
245 Name
= SymbolIterator
->Name
.str();
246 Addr
= SymbolIterator
->Addr
;
247 Size
= SymbolIterator
->Size
;
249 if (SymbolIterator
->ELFLocalSymIdx
!= 0) {
250 // If this is an ELF local symbol, find the STT_FILE symbol preceding
251 // SymbolIterator to get the filename. The ELF spec requires the STT_FILE
252 // symbol (if present) precedes the other STB_LOCAL symbols for the file.
253 assert(Module
->isELF());
254 auto It
= llvm::upper_bound(
256 std::make_pair(SymbolIterator
->ELFLocalSymIdx
, StringRef()));
257 if (It
!= FileSymbols
.begin())
258 FileName
= It
[-1].second
.str();
263 bool SymbolizableObjectFile::shouldOverrideWithSymbolTable(
264 FunctionNameKind FNKind
, bool UseSymbolTable
) const {
265 // When DWARF is used with -gline-tables-only / -gmlt, the symbol table gives
266 // better answers for linkage names than the DIContext. Otherwise, we are
267 // probably using PEs and PDBs, and we shouldn't do the override. PE files
268 // generally only contain the names of exported symbols.
269 return FNKind
== FunctionNameKind::LinkageName
&& UseSymbolTable
&&
270 isa
<DWARFContext
>(DebugInfoContext
.get());
274 SymbolizableObjectFile::symbolizeCode(object::SectionedAddress ModuleOffset
,
275 DILineInfoSpecifier LineInfoSpecifier
,
276 bool UseSymbolTable
) const {
277 if (ModuleOffset
.SectionIndex
== object::SectionedAddress::UndefSection
)
278 ModuleOffset
.SectionIndex
=
279 getModuleSectionIndexForAddress(ModuleOffset
.Address
);
280 DILineInfo LineInfo
=
281 DebugInfoContext
->getLineInfoForAddress(ModuleOffset
, LineInfoSpecifier
);
283 // Override function name from symbol table if necessary.
284 if (shouldOverrideWithSymbolTable(LineInfoSpecifier
.FNKind
, UseSymbolTable
)) {
285 std::string FunctionName
, FileName
;
286 uint64_t Start
, Size
;
287 if (getNameFromSymbolTable(ModuleOffset
.Address
, FunctionName
, Start
, Size
,
289 LineInfo
.FunctionName
= FunctionName
;
290 LineInfo
.StartAddress
= Start
;
291 if (LineInfo
.FileName
== DILineInfo::BadString
&& !FileName
.empty())
292 LineInfo
.FileName
= FileName
;
298 DIInliningInfo
SymbolizableObjectFile::symbolizeInlinedCode(
299 object::SectionedAddress ModuleOffset
,
300 DILineInfoSpecifier LineInfoSpecifier
, bool UseSymbolTable
) const {
301 if (ModuleOffset
.SectionIndex
== object::SectionedAddress::UndefSection
)
302 ModuleOffset
.SectionIndex
=
303 getModuleSectionIndexForAddress(ModuleOffset
.Address
);
304 DIInliningInfo InlinedContext
= DebugInfoContext
->getInliningInfoForAddress(
305 ModuleOffset
, LineInfoSpecifier
);
307 // Make sure there is at least one frame in context.
308 if (InlinedContext
.getNumberOfFrames() == 0)
309 InlinedContext
.addFrame(DILineInfo());
311 // Override the function name in lower frame with name from symbol table.
312 if (shouldOverrideWithSymbolTable(LineInfoSpecifier
.FNKind
, UseSymbolTable
)) {
313 std::string FunctionName
, FileName
;
314 uint64_t Start
, Size
;
315 if (getNameFromSymbolTable(ModuleOffset
.Address
, FunctionName
, Start
, Size
,
317 DILineInfo
*LI
= InlinedContext
.getMutableFrame(
318 InlinedContext
.getNumberOfFrames() - 1);
319 LI
->FunctionName
= FunctionName
;
320 LI
->StartAddress
= Start
;
321 if (LI
->FileName
== DILineInfo::BadString
&& !FileName
.empty())
322 LI
->FileName
= FileName
;
326 return InlinedContext
;
329 DIGlobal
SymbolizableObjectFile::symbolizeData(
330 object::SectionedAddress ModuleOffset
) const {
332 std::string FileName
;
333 getNameFromSymbolTable(ModuleOffset
.Address
, Res
.Name
, Res
.Start
, Res
.Size
,
335 Res
.DeclFile
= FileName
;
337 // Try and get a better filename:lineno pair from the debuginfo, if present.
338 DILineInfo DL
= DebugInfoContext
->getLineInfoForDataAddress(ModuleOffset
);
340 Res
.DeclFile
= DL
.FileName
;
341 Res
.DeclLine
= DL
.Line
;
346 std::vector
<DILocal
> SymbolizableObjectFile::symbolizeFrame(
347 object::SectionedAddress ModuleOffset
) const {
348 if (ModuleOffset
.SectionIndex
== object::SectionedAddress::UndefSection
)
349 ModuleOffset
.SectionIndex
=
350 getModuleSectionIndexForAddress(ModuleOffset
.Address
);
351 return DebugInfoContext
->getLocalsForAddress(ModuleOffset
);
354 std::vector
<object::SectionedAddress
>
355 SymbolizableObjectFile::findSymbol(StringRef Symbol
, uint64_t Offset
) const {
356 std::vector
<object::SectionedAddress
> Result
;
357 for (const SymbolDesc
&Sym
: Symbols
) {
358 if (Sym
.Name
.equals(Symbol
)) {
359 uint64_t Addr
= Sym
.Addr
;
360 if (Offset
< Sym
.Size
)
362 object::SectionedAddress A
{Addr
, getModuleSectionIndexForAddress(Addr
)};
369 /// Search for the first occurence of specified Address in ObjectFile.
370 uint64_t SymbolizableObjectFile::getModuleSectionIndexForAddress(
371 uint64_t Address
) const {
373 for (SectionRef Sec
: Module
->sections()) {
374 if (!Sec
.isText() || Sec
.isVirtual())
377 if (Address
>= Sec
.getAddress() &&
378 Address
< Sec
.getAddress() + Sec
.getSize())
379 return Sec
.getIndex();
382 return object::SectionedAddress::UndefSection
;