1 //===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Object/IRSymtab.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/SmallPtrSet.h"
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/Config/llvm-config.h"
18 #include "llvm/IR/Comdat.h"
19 #include "llvm/IR/DataLayout.h"
20 #include "llvm/IR/GlobalAlias.h"
21 #include "llvm/IR/GlobalObject.h"
22 #include "llvm/IR/Mangler.h"
23 #include "llvm/IR/Metadata.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Bitcode/BitcodeReader.h"
26 #include "llvm/MC/StringTableBuilder.h"
27 #include "llvm/Object/IRObjectFile.h"
28 #include "llvm/Object/ModuleSymbolTable.h"
29 #include "llvm/Object/SymbolicFile.h"
30 #include "llvm/Support/Allocator.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/Support/StringSaver.h"
34 #include "llvm/Support/VCSRevision.h"
35 #include "llvm/Support/raw_ostream.h"
42 using namespace irsymtab
;
44 static const char *LibcallRoutineNames
[] = {
45 #define HANDLE_LIBCALL(code, name) name,
46 #include "llvm/IR/RuntimeLibcalls.def"
52 const char *getExpectedProducerName() {
53 static char DefaultName
[] = LLVM_VERSION_STRING
58 // Allows for testing of the irsymtab writer and upgrade mechanism. This
59 // environment variable should not be set by users.
60 if (char *OverrideName
= getenv("LLVM_OVERRIDE_PRODUCER"))
65 const char *kExpectedProducerName
= getExpectedProducerName();
67 /// Stores the temporary state that is required to build an IR symbol table.
69 SmallVector
<char, 0> &Symtab
;
70 StringTableBuilder
&StrtabBuilder
;
73 // This ctor initializes a StringSaver using the passed in BumpPtrAllocator.
74 // The StringTableBuilder does not create a copy of any strings added to it,
75 // so this provides somewhere to store any strings that we create.
76 Builder(SmallVector
<char, 0> &Symtab
, StringTableBuilder
&StrtabBuilder
,
77 BumpPtrAllocator
&Alloc
)
78 : Symtab(Symtab
), StrtabBuilder(StrtabBuilder
), Saver(Alloc
) {}
80 DenseMap
<const Comdat
*, int> ComdatMap
;
84 std::vector
<storage::Comdat
> Comdats
;
85 std::vector
<storage::Module
> Mods
;
86 std::vector
<storage::Symbol
> Syms
;
87 std::vector
<storage::Uncommon
> Uncommons
;
89 std::string COFFLinkerOpts
;
90 raw_string_ostream COFFLinkerOptsOS
{COFFLinkerOpts
};
92 std::vector
<storage::Str
> DependentLibraries
;
94 void setStr(storage::Str
&S
, StringRef Value
) {
95 S
.Offset
= StrtabBuilder
.add(Value
);
96 S
.Size
= Value
.size();
100 void writeRange(storage::Range
<T
> &R
, const std::vector
<T
> &Objs
) {
101 R
.Offset
= Symtab
.size();
102 R
.Size
= Objs
.size();
103 Symtab
.insert(Symtab
.end(), reinterpret_cast<const char *>(Objs
.data()),
104 reinterpret_cast<const char *>(Objs
.data() + Objs
.size()));
107 Expected
<int> getComdatIndex(const Comdat
*C
, const Module
*M
);
109 Error
addModule(Module
*M
);
110 Error
addSymbol(const ModuleSymbolTable
&Msymtab
,
111 const SmallPtrSet
<GlobalValue
*, 4> &Used
,
112 ModuleSymbolTable::Symbol Sym
);
114 Error
build(ArrayRef
<Module
*> Mods
);
117 Error
Builder::addModule(Module
*M
) {
118 if (M
->getDataLayoutStr().empty())
119 return make_error
<StringError
>("input module has no datalayout",
120 inconvertibleErrorCode());
122 // Symbols in the llvm.used list will get the FB_Used bit and will not be
123 // internalized. We do this for llvm.compiler.used as well:
125 // IR symbol table tracks module-level asm symbol references but not inline
126 // asm. A symbol only referenced by inline asm is not in the IR symbol table,
127 // so we may not know that the definition (in another translation unit) is
128 // referenced. That definition may have __attribute__((used)) (which lowers to
129 // llvm.compiler.used on ELF targets) to communicate to the compiler that it
130 // may be used by inline asm. The usage is perfectly fine, so we treat
131 // llvm.compiler.used conservatively as llvm.used to work around our own
133 SmallVector
<GlobalValue
*, 4> UsedV
;
134 collectUsedGlobalVariables(*M
, UsedV
, /*CompilerUsed=*/false);
135 collectUsedGlobalVariables(*M
, UsedV
, /*CompilerUsed=*/true);
136 SmallPtrSet
<GlobalValue
*, 4> Used(UsedV
.begin(), UsedV
.end());
138 ModuleSymbolTable Msymtab
;
139 Msymtab
.addModule(M
);
142 Mod
.Begin
= Syms
.size();
143 Mod
.End
= Syms
.size() + Msymtab
.symbols().size();
144 Mod
.UncBegin
= Uncommons
.size();
147 if (TT
.isOSBinFormatCOFF()) {
148 if (auto E
= M
->materializeMetadata())
150 if (NamedMDNode
*LinkerOptions
=
151 M
->getNamedMetadata("llvm.linker.options")) {
152 for (MDNode
*MDOptions
: LinkerOptions
->operands())
153 for (const MDOperand
&MDOption
: cast
<MDNode
>(MDOptions
)->operands())
154 COFFLinkerOptsOS
<< " " << cast
<MDString
>(MDOption
)->getString();
158 if (TT
.isOSBinFormatELF()) {
159 if (auto E
= M
->materializeMetadata())
161 if (NamedMDNode
*N
= M
->getNamedMetadata("llvm.dependent-libraries")) {
162 for (MDNode
*MDOptions
: N
->operands()) {
163 const auto OperandStr
=
164 cast
<MDString
>(cast
<MDNode
>(MDOptions
)->getOperand(0))->getString();
165 storage::Str Specifier
;
166 setStr(Specifier
, OperandStr
);
167 DependentLibraries
.emplace_back(Specifier
);
172 for (ModuleSymbolTable::Symbol Msym
: Msymtab
.symbols())
173 if (Error Err
= addSymbol(Msymtab
, Used
, Msym
))
176 return Error::success();
179 Expected
<int> Builder::getComdatIndex(const Comdat
*C
, const Module
*M
) {
180 auto P
= ComdatMap
.insert(std::make_pair(C
, Comdats
.size()));
183 if (TT
.isOSBinFormatCOFF()) {
184 const GlobalValue
*GV
= M
->getNamedValue(C
->getName());
186 return make_error
<StringError
>("Could not find leader",
187 inconvertibleErrorCode());
188 // Internal leaders do not affect symbol resolution, therefore they do not
189 // appear in the symbol table.
190 if (GV
->hasLocalLinkage()) {
191 P
.first
->second
= -1;
194 llvm::raw_string_ostream
OS(Name
);
195 Mang
.getNameWithPrefix(OS
, GV
, false);
197 Name
= std::string(C
->getName());
200 storage::Comdat Comdat
;
201 setStr(Comdat
.Name
, Saver
.save(Name
));
202 Comdat
.SelectionKind
= C
->getSelectionKind();
203 Comdats
.push_back(Comdat
);
206 return P
.first
->second
;
209 Error
Builder::addSymbol(const ModuleSymbolTable
&Msymtab
,
210 const SmallPtrSet
<GlobalValue
*, 4> &Used
,
211 ModuleSymbolTable::Symbol Msym
) {
213 storage::Symbol
&Sym
= Syms
.back();
216 storage::Uncommon
*Unc
= nullptr;
217 auto Uncommon
= [&]() -> storage::Uncommon
& {
220 Sym
.Flags
|= 1 << storage::Symbol::FB_has_uncommon
;
221 Uncommons
.emplace_back();
222 Unc
= &Uncommons
.back();
224 setStr(Unc
->COFFWeakExternFallbackName
, "");
225 setStr(Unc
->SectionName
, "");
229 SmallString
<64> Name
;
231 raw_svector_ostream
OS(Name
);
232 Msymtab
.printSymbolName(OS
, Msym
);
234 setStr(Sym
.Name
, Saver
.save(Name
.str()));
236 auto Flags
= Msymtab
.getSymbolFlags(Msym
);
237 if (Flags
& object::BasicSymbolRef::SF_Undefined
)
238 Sym
.Flags
|= 1 << storage::Symbol::FB_undefined
;
239 if (Flags
& object::BasicSymbolRef::SF_Weak
)
240 Sym
.Flags
|= 1 << storage::Symbol::FB_weak
;
241 if (Flags
& object::BasicSymbolRef::SF_Common
)
242 Sym
.Flags
|= 1 << storage::Symbol::FB_common
;
243 if (Flags
& object::BasicSymbolRef::SF_Indirect
)
244 Sym
.Flags
|= 1 << storage::Symbol::FB_indirect
;
245 if (Flags
& object::BasicSymbolRef::SF_Global
)
246 Sym
.Flags
|= 1 << storage::Symbol::FB_global
;
247 if (Flags
& object::BasicSymbolRef::SF_FormatSpecific
)
248 Sym
.Flags
|= 1 << storage::Symbol::FB_format_specific
;
249 if (Flags
& object::BasicSymbolRef::SF_Executable
)
250 Sym
.Flags
|= 1 << storage::Symbol::FB_executable
;
252 Sym
.ComdatIndex
= -1;
253 auto *GV
= Msym
.dyn_cast
<GlobalValue
*>();
255 // Undefined module asm symbols act as GC roots and are implicitly used.
256 if (Flags
& object::BasicSymbolRef::SF_Undefined
)
257 Sym
.Flags
|= 1 << storage::Symbol::FB_used
;
258 setStr(Sym
.IRName
, "");
259 return Error::success();
262 setStr(Sym
.IRName
, GV
->getName());
264 bool IsBuiltinFunc
= llvm::is_contained(LibcallRoutineNames
, GV
->getName());
266 if (Used
.count(GV
) || IsBuiltinFunc
)
267 Sym
.Flags
|= 1 << storage::Symbol::FB_used
;
268 if (GV
->isThreadLocal())
269 Sym
.Flags
|= 1 << storage::Symbol::FB_tls
;
270 if (GV
->hasGlobalUnnamedAddr())
271 Sym
.Flags
|= 1 << storage::Symbol::FB_unnamed_addr
;
272 if (GV
->canBeOmittedFromSymbolTable())
273 Sym
.Flags
|= 1 << storage::Symbol::FB_may_omit
;
274 Sym
.Flags
|= unsigned(GV
->getVisibility()) << storage::Symbol::FB_visibility
;
276 if (Flags
& object::BasicSymbolRef::SF_Common
) {
277 auto *GVar
= dyn_cast
<GlobalVariable
>(GV
);
279 return make_error
<StringError
>("Only variables can have common linkage!",
280 inconvertibleErrorCode());
281 Uncommon().CommonSize
=
282 GV
->getParent()->getDataLayout().getTypeAllocSize(GV
->getValueType());
283 Uncommon().CommonAlign
= GVar
->getAlignment();
286 const GlobalObject
*Base
= GV
->getBaseObject();
288 return make_error
<StringError
>("Unable to determine comdat of alias!",
289 inconvertibleErrorCode());
290 if (const Comdat
*C
= Base
->getComdat()) {
291 Expected
<int> ComdatIndexOrErr
= getComdatIndex(C
, GV
->getParent());
292 if (!ComdatIndexOrErr
)
293 return ComdatIndexOrErr
.takeError();
294 Sym
.ComdatIndex
= *ComdatIndexOrErr
;
297 if (TT
.isOSBinFormatCOFF()) {
298 emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS
, GV
, TT
, Mang
);
300 if ((Flags
& object::BasicSymbolRef::SF_Weak
) &&
301 (Flags
& object::BasicSymbolRef::SF_Indirect
)) {
302 auto *Fallback
= dyn_cast
<GlobalValue
>(
303 cast
<GlobalAlias
>(GV
)->getAliasee()->stripPointerCasts());
305 return make_error
<StringError
>("Invalid weak external",
306 inconvertibleErrorCode());
307 std::string FallbackName
;
308 raw_string_ostream
OS(FallbackName
);
309 Msymtab
.printSymbolName(OS
, Fallback
);
311 setStr(Uncommon().COFFWeakExternFallbackName
, Saver
.save(FallbackName
));
315 if (!Base
->getSection().empty())
316 setStr(Uncommon().SectionName
, Saver
.save(Base
->getSection()));
318 return Error::success();
321 Error
Builder::build(ArrayRef
<Module
*> IRMods
) {
324 assert(!IRMods
.empty());
325 Hdr
.Version
= storage::Header::kCurrentVersion
;
326 setStr(Hdr
.Producer
, kExpectedProducerName
);
327 setStr(Hdr
.TargetTriple
, IRMods
[0]->getTargetTriple());
328 setStr(Hdr
.SourceFileName
, IRMods
[0]->getSourceFileName());
329 TT
= Triple(IRMods
[0]->getTargetTriple());
331 for (auto *M
: IRMods
)
332 if (Error Err
= addModule(M
))
335 COFFLinkerOptsOS
.flush();
336 setStr(Hdr
.COFFLinkerOpts
, Saver
.save(COFFLinkerOpts
));
338 // We are about to fill in the header's range fields, so reserve space for it
339 // and copy it in afterwards.
340 Symtab
.resize(sizeof(storage::Header
));
341 writeRange(Hdr
.Modules
, Mods
);
342 writeRange(Hdr
.Comdats
, Comdats
);
343 writeRange(Hdr
.Symbols
, Syms
);
344 writeRange(Hdr
.Uncommons
, Uncommons
);
345 writeRange(Hdr
.DependentLibraries
, DependentLibraries
);
346 *reinterpret_cast<storage::Header
*>(Symtab
.data()) = Hdr
;
347 return Error::success();
350 } // end anonymous namespace
352 Error
irsymtab::build(ArrayRef
<Module
*> Mods
, SmallVector
<char, 0> &Symtab
,
353 StringTableBuilder
&StrtabBuilder
,
354 BumpPtrAllocator
&Alloc
) {
355 return Builder(Symtab
, StrtabBuilder
, Alloc
).build(Mods
);
358 // Upgrade a vector of bitcode modules created by an old version of LLVM by
359 // creating an irsymtab for them in the current format.
360 static Expected
<FileContents
> upgrade(ArrayRef
<BitcodeModule
> BMs
) {
364 std::vector
<Module
*> Mods
;
365 std::vector
<std::unique_ptr
<Module
>> OwnedMods
;
366 for (auto BM
: BMs
) {
367 Expected
<std::unique_ptr
<Module
>> MOrErr
=
368 BM
.getLazyModule(Ctx
, /*ShouldLazyLoadMetadata*/ true,
369 /*IsImporting*/ false);
371 return MOrErr
.takeError();
373 Mods
.push_back(MOrErr
->get());
374 OwnedMods
.push_back(std::move(*MOrErr
));
377 StringTableBuilder
StrtabBuilder(StringTableBuilder::RAW
);
378 BumpPtrAllocator Alloc
;
379 if (Error E
= build(Mods
, FC
.Symtab
, StrtabBuilder
, Alloc
))
382 StrtabBuilder
.finalizeInOrder();
383 FC
.Strtab
.resize(StrtabBuilder
.getSize());
384 StrtabBuilder
.write((uint8_t *)FC
.Strtab
.data());
386 FC
.TheReader
= {{FC
.Symtab
.data(), FC
.Symtab
.size()},
387 {FC
.Strtab
.data(), FC
.Strtab
.size()}};
388 return std::move(FC
);
391 Expected
<FileContents
> irsymtab::readBitcode(const BitcodeFileContents
&BFC
) {
392 if (BFC
.Mods
.empty())
393 return make_error
<StringError
>("Bitcode file does not contain any modules",
394 inconvertibleErrorCode());
396 if (BFC
.StrtabForSymtab
.empty() ||
397 BFC
.Symtab
.size() < sizeof(storage::Header
))
398 return upgrade(BFC
.Mods
);
400 // We cannot use the regular reader to read the version and producer, because
401 // it will expect the header to be in the current format. The only thing we
402 // can rely on is that the version and producer will be present as the first
404 auto *Hdr
= reinterpret_cast<const storage::Header
*>(BFC
.Symtab
.data());
405 unsigned Version
= Hdr
->Version
;
406 StringRef Producer
= Hdr
->Producer
.get(BFC
.StrtabForSymtab
);
407 if (Version
!= storage::Header::kCurrentVersion
||
408 Producer
!= kExpectedProducerName
)
409 return upgrade(BFC
.Mods
);
412 FC
.TheReader
= {{BFC
.Symtab
.data(), BFC
.Symtab
.size()},
413 {BFC
.StrtabForSymtab
.data(), BFC
.StrtabForSymtab
.size()}};
415 // Finally, make sure that the number of modules in the symbol table matches
416 // the number of modules in the bitcode file. If they differ, it may mean that
417 // the bitcode file was created by binary concatenation, so we need to create
418 // a new symbol table from scratch.
419 if (FC
.TheReader
.getNumModules() != BFC
.Mods
.size())
420 return upgrade(std::move(BFC
.Mods
));
422 return std::move(FC
);