1 //===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Object/IRSymtab.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/SmallPtrSet.h"
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Bitcode/BitcodeReader.h"
17 #include "llvm/Config/llvm-config.h"
18 #include "llvm/IR/Comdat.h"
19 #include "llvm/IR/DataLayout.h"
20 #include "llvm/IR/GlobalAlias.h"
21 #include "llvm/IR/GlobalObject.h"
22 #include "llvm/IR/Mangler.h"
23 #include "llvm/IR/Metadata.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/IR/RuntimeLibcalls.h"
26 #include "llvm/MC/StringTableBuilder.h"
27 #include "llvm/Object/ModuleSymbolTable.h"
28 #include "llvm/Object/SymbolicFile.h"
29 #include "llvm/Support/Allocator.h"
30 #include "llvm/Support/Casting.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/Support/StringSaver.h"
34 #include "llvm/Support/VCSRevision.h"
35 #include "llvm/Support/raw_ostream.h"
36 #include "llvm/TargetParser/Triple.h"
43 using namespace irsymtab
;
45 static cl::opt
<bool> DisableBitcodeVersionUpgrade(
46 "disable-bitcode-version-upgrade", cl::Hidden
,
47 cl::desc("Disable automatic bitcode upgrade for version mismatch"));
49 static const char *PreservedSymbols
[] = {
50 // There are global variables, so put it here instead of in
51 // RuntimeLibcalls.def.
52 // TODO: Are there similar such variables?
59 const char *getExpectedProducerName() {
60 static char DefaultName
[] = LLVM_VERSION_STRING
65 // Allows for testing of the irsymtab writer and upgrade mechanism. This
66 // environment variable should not be set by users.
67 if (char *OverrideName
= getenv("LLVM_OVERRIDE_PRODUCER"))
72 const char *kExpectedProducerName
= getExpectedProducerName();
74 /// Stores the temporary state that is required to build an IR symbol table.
76 SmallVector
<char, 0> &Symtab
;
77 StringTableBuilder
&StrtabBuilder
;
80 // This ctor initializes a StringSaver using the passed in BumpPtrAllocator.
81 // The StringTableBuilder does not create a copy of any strings added to it,
82 // so this provides somewhere to store any strings that we create.
83 Builder(SmallVector
<char, 0> &Symtab
, StringTableBuilder
&StrtabBuilder
,
84 BumpPtrAllocator
&Alloc
)
85 : Symtab(Symtab
), StrtabBuilder(StrtabBuilder
), Saver(Alloc
) {}
87 DenseMap
<const Comdat
*, int> ComdatMap
;
91 std::vector
<storage::Comdat
> Comdats
;
92 std::vector
<storage::Module
> Mods
;
93 std::vector
<storage::Symbol
> Syms
;
94 std::vector
<storage::Uncommon
> Uncommons
;
96 std::string COFFLinkerOpts
;
97 raw_string_ostream COFFLinkerOptsOS
{COFFLinkerOpts
};
99 std::vector
<storage::Str
> DependentLibraries
;
101 void setStr(storage::Str
&S
, StringRef Value
) {
102 S
.Offset
= StrtabBuilder
.add(Value
);
103 S
.Size
= Value
.size();
106 template <typename T
>
107 void writeRange(storage::Range
<T
> &R
, const std::vector
<T
> &Objs
) {
108 R
.Offset
= Symtab
.size();
109 R
.Size
= Objs
.size();
110 Symtab
.insert(Symtab
.end(), reinterpret_cast<const char *>(Objs
.data()),
111 reinterpret_cast<const char *>(Objs
.data() + Objs
.size()));
114 Expected
<int> getComdatIndex(const Comdat
*C
, const Module
*M
);
116 Error
addModule(Module
*M
);
117 Error
addSymbol(const ModuleSymbolTable
&Msymtab
,
118 const SmallPtrSet
<GlobalValue
*, 4> &Used
,
119 ModuleSymbolTable::Symbol Sym
);
121 Error
build(ArrayRef
<Module
*> Mods
);
124 Error
Builder::addModule(Module
*M
) {
125 if (M
->getDataLayoutStr().empty())
126 return make_error
<StringError
>("input module has no datalayout",
127 inconvertibleErrorCode());
129 // Symbols in the llvm.used list will get the FB_Used bit and will not be
130 // internalized. We do this for llvm.compiler.used as well:
132 // IR symbol table tracks module-level asm symbol references but not inline
133 // asm. A symbol only referenced by inline asm is not in the IR symbol table,
134 // so we may not know that the definition (in another translation unit) is
135 // referenced. That definition may have __attribute__((used)) (which lowers to
136 // llvm.compiler.used on ELF targets) to communicate to the compiler that it
137 // may be used by inline asm. The usage is perfectly fine, so we treat
138 // llvm.compiler.used conservatively as llvm.used to work around our own
140 SmallVector
<GlobalValue
*, 4> UsedV
;
141 collectUsedGlobalVariables(*M
, UsedV
, /*CompilerUsed=*/false);
142 collectUsedGlobalVariables(*M
, UsedV
, /*CompilerUsed=*/true);
143 SmallPtrSet
<GlobalValue
*, 4> Used(UsedV
.begin(), UsedV
.end());
145 ModuleSymbolTable Msymtab
;
146 Msymtab
.addModule(M
);
149 Mod
.Begin
= Syms
.size();
150 Mod
.End
= Syms
.size() + Msymtab
.symbols().size();
151 Mod
.UncBegin
= Uncommons
.size();
154 if (TT
.isOSBinFormatCOFF()) {
155 if (auto E
= M
->materializeMetadata())
157 if (NamedMDNode
*LinkerOptions
=
158 M
->getNamedMetadata("llvm.linker.options")) {
159 for (MDNode
*MDOptions
: LinkerOptions
->operands())
160 for (const MDOperand
&MDOption
: cast
<MDNode
>(MDOptions
)->operands())
161 COFFLinkerOptsOS
<< " " << cast
<MDString
>(MDOption
)->getString();
165 if (TT
.isOSBinFormatELF()) {
166 if (auto E
= M
->materializeMetadata())
168 if (NamedMDNode
*N
= M
->getNamedMetadata("llvm.dependent-libraries")) {
169 for (MDNode
*MDOptions
: N
->operands()) {
170 const auto OperandStr
=
171 cast
<MDString
>(cast
<MDNode
>(MDOptions
)->getOperand(0))->getString();
172 storage::Str Specifier
;
173 setStr(Specifier
, OperandStr
);
174 DependentLibraries
.emplace_back(Specifier
);
179 for (ModuleSymbolTable::Symbol Msym
: Msymtab
.symbols())
180 if (Error Err
= addSymbol(Msymtab
, Used
, Msym
))
183 return Error::success();
186 Expected
<int> Builder::getComdatIndex(const Comdat
*C
, const Module
*M
) {
187 auto P
= ComdatMap
.insert(std::make_pair(C
, Comdats
.size()));
190 if (TT
.isOSBinFormatCOFF()) {
191 const GlobalValue
*GV
= M
->getNamedValue(C
->getName());
193 return make_error
<StringError
>("Could not find leader",
194 inconvertibleErrorCode());
195 // Internal leaders do not affect symbol resolution, therefore they do not
196 // appear in the symbol table.
197 if (GV
->hasLocalLinkage()) {
198 P
.first
->second
= -1;
201 llvm::raw_string_ostream
OS(Name
);
202 Mang
.getNameWithPrefix(OS
, GV
, false);
204 Name
= std::string(C
->getName());
207 storage::Comdat Comdat
;
208 setStr(Comdat
.Name
, Saver
.save(Name
));
209 Comdat
.SelectionKind
= C
->getSelectionKind();
210 Comdats
.push_back(Comdat
);
213 return P
.first
->second
;
216 static DenseSet
<StringRef
> buildPreservedSymbolsSet(const Triple
&TT
) {
217 DenseSet
<StringRef
> PreservedSymbolSet(std::begin(PreservedSymbols
),
218 std::end(PreservedSymbols
));
220 RTLIB::RuntimeLibcallsInfo
Libcalls(TT
);
221 for (const char *Name
: Libcalls
.getLibcallNames()) {
223 PreservedSymbolSet
.insert(Name
);
225 return PreservedSymbolSet
;
228 Error
Builder::addSymbol(const ModuleSymbolTable
&Msymtab
,
229 const SmallPtrSet
<GlobalValue
*, 4> &Used
,
230 ModuleSymbolTable::Symbol Msym
) {
232 storage::Symbol
&Sym
= Syms
.back();
235 storage::Uncommon
*Unc
= nullptr;
236 auto Uncommon
= [&]() -> storage::Uncommon
& {
239 Sym
.Flags
|= 1 << storage::Symbol::FB_has_uncommon
;
240 Uncommons
.emplace_back();
241 Unc
= &Uncommons
.back();
243 setStr(Unc
->COFFWeakExternFallbackName
, "");
244 setStr(Unc
->SectionName
, "");
248 SmallString
<64> Name
;
250 raw_svector_ostream
OS(Name
);
251 Msymtab
.printSymbolName(OS
, Msym
);
253 setStr(Sym
.Name
, Saver
.save(Name
.str()));
255 auto Flags
= Msymtab
.getSymbolFlags(Msym
);
256 if (Flags
& object::BasicSymbolRef::SF_Undefined
)
257 Sym
.Flags
|= 1 << storage::Symbol::FB_undefined
;
258 if (Flags
& object::BasicSymbolRef::SF_Weak
)
259 Sym
.Flags
|= 1 << storage::Symbol::FB_weak
;
260 if (Flags
& object::BasicSymbolRef::SF_Common
)
261 Sym
.Flags
|= 1 << storage::Symbol::FB_common
;
262 if (Flags
& object::BasicSymbolRef::SF_Indirect
)
263 Sym
.Flags
|= 1 << storage::Symbol::FB_indirect
;
264 if (Flags
& object::BasicSymbolRef::SF_Global
)
265 Sym
.Flags
|= 1 << storage::Symbol::FB_global
;
266 if (Flags
& object::BasicSymbolRef::SF_FormatSpecific
)
267 Sym
.Flags
|= 1 << storage::Symbol::FB_format_specific
;
268 if (Flags
& object::BasicSymbolRef::SF_Executable
)
269 Sym
.Flags
|= 1 << storage::Symbol::FB_executable
;
271 Sym
.ComdatIndex
= -1;
272 auto *GV
= dyn_cast_if_present
<GlobalValue
*>(Msym
);
274 // Undefined module asm symbols act as GC roots and are implicitly used.
275 if (Flags
& object::BasicSymbolRef::SF_Undefined
)
276 Sym
.Flags
|= 1 << storage::Symbol::FB_used
;
277 setStr(Sym
.IRName
, "");
278 return Error::success();
281 setStr(Sym
.IRName
, GV
->getName());
283 static const DenseSet
<StringRef
> PreservedSymbolsSet
=
284 buildPreservedSymbolsSet(
285 llvm::Triple(GV
->getParent()->getTargetTriple()));
286 bool IsPreservedSymbol
= PreservedSymbolsSet
.contains(GV
->getName());
288 if (Used
.count(GV
) || IsPreservedSymbol
)
289 Sym
.Flags
|= 1 << storage::Symbol::FB_used
;
290 if (GV
->isThreadLocal())
291 Sym
.Flags
|= 1 << storage::Symbol::FB_tls
;
292 if (GV
->hasGlobalUnnamedAddr())
293 Sym
.Flags
|= 1 << storage::Symbol::FB_unnamed_addr
;
294 if (GV
->canBeOmittedFromSymbolTable())
295 Sym
.Flags
|= 1 << storage::Symbol::FB_may_omit
;
296 Sym
.Flags
|= unsigned(GV
->getVisibility()) << storage::Symbol::FB_visibility
;
298 if (Flags
& object::BasicSymbolRef::SF_Common
) {
299 auto *GVar
= dyn_cast
<GlobalVariable
>(GV
);
301 return make_error
<StringError
>("Only variables can have common linkage!",
302 inconvertibleErrorCode());
303 Uncommon().CommonSize
=
304 GV
->getDataLayout().getTypeAllocSize(GV
->getValueType());
305 Uncommon().CommonAlign
= GVar
->getAlign() ? GVar
->getAlign()->value() : 0;
308 const GlobalObject
*GO
= GV
->getAliaseeObject();
310 if (isa
<GlobalIFunc
>(GV
))
311 GO
= cast
<GlobalIFunc
>(GV
)->getResolverFunction();
313 return make_error
<StringError
>("Unable to determine comdat of alias!",
314 inconvertibleErrorCode());
316 if (const Comdat
*C
= GO
->getComdat()) {
317 Expected
<int> ComdatIndexOrErr
= getComdatIndex(C
, GV
->getParent());
318 if (!ComdatIndexOrErr
)
319 return ComdatIndexOrErr
.takeError();
320 Sym
.ComdatIndex
= *ComdatIndexOrErr
;
323 if (TT
.isOSBinFormatCOFF()) {
324 emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS
, GV
, TT
, Mang
);
326 if ((Flags
& object::BasicSymbolRef::SF_Weak
) &&
327 (Flags
& object::BasicSymbolRef::SF_Indirect
)) {
328 auto *Fallback
= dyn_cast
<GlobalValue
>(
329 cast
<GlobalAlias
>(GV
)->getAliasee()->stripPointerCasts());
331 return make_error
<StringError
>("Invalid weak external",
332 inconvertibleErrorCode());
333 std::string FallbackName
;
334 raw_string_ostream
OS(FallbackName
);
335 Msymtab
.printSymbolName(OS
, Fallback
);
337 setStr(Uncommon().COFFWeakExternFallbackName
, Saver
.save(FallbackName
));
341 if (!GO
->getSection().empty())
342 setStr(Uncommon().SectionName
, Saver
.save(GO
->getSection()));
344 return Error::success();
347 Error
Builder::build(ArrayRef
<Module
*> IRMods
) {
350 assert(!IRMods
.empty());
351 Hdr
.Version
= storage::Header::kCurrentVersion
;
352 setStr(Hdr
.Producer
, kExpectedProducerName
);
353 setStr(Hdr
.TargetTriple
, IRMods
[0]->getTargetTriple());
354 setStr(Hdr
.SourceFileName
, IRMods
[0]->getSourceFileName());
355 TT
= Triple(IRMods
[0]->getTargetTriple());
357 for (auto *M
: IRMods
)
358 if (Error Err
= addModule(M
))
361 COFFLinkerOptsOS
.flush();
362 setStr(Hdr
.COFFLinkerOpts
, Saver
.save(COFFLinkerOpts
));
364 // We are about to fill in the header's range fields, so reserve space for it
365 // and copy it in afterwards.
366 Symtab
.resize(sizeof(storage::Header
));
367 writeRange(Hdr
.Modules
, Mods
);
368 writeRange(Hdr
.Comdats
, Comdats
);
369 writeRange(Hdr
.Symbols
, Syms
);
370 writeRange(Hdr
.Uncommons
, Uncommons
);
371 writeRange(Hdr
.DependentLibraries
, DependentLibraries
);
372 *reinterpret_cast<storage::Header
*>(Symtab
.data()) = Hdr
;
373 return Error::success();
376 } // end anonymous namespace
378 Error
irsymtab::build(ArrayRef
<Module
*> Mods
, SmallVector
<char, 0> &Symtab
,
379 StringTableBuilder
&StrtabBuilder
,
380 BumpPtrAllocator
&Alloc
) {
381 return Builder(Symtab
, StrtabBuilder
, Alloc
).build(Mods
);
384 // Upgrade a vector of bitcode modules created by an old version of LLVM by
385 // creating an irsymtab for them in the current format.
386 static Expected
<FileContents
> upgrade(ArrayRef
<BitcodeModule
> BMs
) {
390 std::vector
<Module
*> Mods
;
391 std::vector
<std::unique_ptr
<Module
>> OwnedMods
;
392 for (auto BM
: BMs
) {
393 Expected
<std::unique_ptr
<Module
>> MOrErr
=
394 BM
.getLazyModule(Ctx
, /*ShouldLazyLoadMetadata*/ true,
395 /*IsImporting*/ false);
397 return MOrErr
.takeError();
399 Mods
.push_back(MOrErr
->get());
400 OwnedMods
.push_back(std::move(*MOrErr
));
403 StringTableBuilder
StrtabBuilder(StringTableBuilder::RAW
);
404 BumpPtrAllocator Alloc
;
405 if (Error E
= build(Mods
, FC
.Symtab
, StrtabBuilder
, Alloc
))
408 StrtabBuilder
.finalizeInOrder();
409 FC
.Strtab
.resize(StrtabBuilder
.getSize());
410 StrtabBuilder
.write((uint8_t *)FC
.Strtab
.data());
412 FC
.TheReader
= {{FC
.Symtab
.data(), FC
.Symtab
.size()},
413 {FC
.Strtab
.data(), FC
.Strtab
.size()}};
414 return std::move(FC
);
417 Expected
<FileContents
> irsymtab::readBitcode(const BitcodeFileContents
&BFC
) {
418 if (BFC
.Mods
.empty())
419 return make_error
<StringError
>("Bitcode file does not contain any modules",
420 inconvertibleErrorCode());
422 if (!DisableBitcodeVersionUpgrade
) {
423 if (BFC
.StrtabForSymtab
.empty() ||
424 BFC
.Symtab
.size() < sizeof(storage::Header
))
425 return upgrade(BFC
.Mods
);
427 // We cannot use the regular reader to read the version and producer,
428 // because it will expect the header to be in the current format. The only
429 // thing we can rely on is that the version and producer will be present as
430 // the first struct elements.
431 auto *Hdr
= reinterpret_cast<const storage::Header
*>(BFC
.Symtab
.data());
432 unsigned Version
= Hdr
->Version
;
433 StringRef Producer
= Hdr
->Producer
.get(BFC
.StrtabForSymtab
);
434 if (Version
!= storage::Header::kCurrentVersion
||
435 Producer
!= kExpectedProducerName
)
436 return upgrade(BFC
.Mods
);
440 FC
.TheReader
= {{BFC
.Symtab
.data(), BFC
.Symtab
.size()},
441 {BFC
.StrtabForSymtab
.data(), BFC
.StrtabForSymtab
.size()}};
443 // Finally, make sure that the number of modules in the symbol table matches
444 // the number of modules in the bitcode file. If they differ, it may mean that
445 // the bitcode file was created by binary concatenation, so we need to create
446 // a new symbol table from scratch.
447 if (FC
.TheReader
.getNumModules() != BFC
.Mods
.size())
448 return upgrade(std::move(BFC
.Mods
));
450 return std::move(FC
);