1 //===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Object/IRSymtab.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/SmallPtrSet.h"
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/Bitcode/BitcodeReader.h"
17 #include "llvm/Config/llvm-config.h"
18 #include "llvm/IR/Comdat.h"
19 #include "llvm/IR/DataLayout.h"
20 #include "llvm/IR/GlobalAlias.h"
21 #include "llvm/IR/GlobalObject.h"
22 #include "llvm/IR/Mangler.h"
23 #include "llvm/IR/Metadata.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/MC/StringTableBuilder.h"
26 #include "llvm/Object/ModuleSymbolTable.h"
27 #include "llvm/Object/SymbolicFile.h"
28 #include "llvm/Support/Allocator.h"
29 #include "llvm/Support/Casting.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/Error.h"
32 #include "llvm/Support/StringSaver.h"
33 #include "llvm/Support/VCSRevision.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/TargetParser/Triple.h"
42 using namespace irsymtab
;
44 static cl::opt
<bool> DisableBitcodeVersionUpgrade(
45 "disable-bitcode-version-upgrade", cl::Hidden
,
46 cl::desc("Disable automatic bitcode upgrade for version mismatch"));
48 static const char *PreservedSymbols
[] = {
49 #define HANDLE_LIBCALL(code, name) name,
50 #include "llvm/IR/RuntimeLibcalls.def"
52 // There are global variables, so put it here instead of in
53 // RuntimeLibcalls.def.
54 // TODO: Are there similar such variables?
61 const char *getExpectedProducerName() {
62 static char DefaultName
[] = LLVM_VERSION_STRING
67 // Allows for testing of the irsymtab writer and upgrade mechanism. This
68 // environment variable should not be set by users.
69 if (char *OverrideName
= getenv("LLVM_OVERRIDE_PRODUCER"))
74 const char *kExpectedProducerName
= getExpectedProducerName();
76 /// Stores the temporary state that is required to build an IR symbol table.
78 SmallVector
<char, 0> &Symtab
;
79 StringTableBuilder
&StrtabBuilder
;
82 // This ctor initializes a StringSaver using the passed in BumpPtrAllocator.
83 // The StringTableBuilder does not create a copy of any strings added to it,
84 // so this provides somewhere to store any strings that we create.
85 Builder(SmallVector
<char, 0> &Symtab
, StringTableBuilder
&StrtabBuilder
,
86 BumpPtrAllocator
&Alloc
)
87 : Symtab(Symtab
), StrtabBuilder(StrtabBuilder
), Saver(Alloc
) {}
89 DenseMap
<const Comdat
*, int> ComdatMap
;
93 std::vector
<storage::Comdat
> Comdats
;
94 std::vector
<storage::Module
> Mods
;
95 std::vector
<storage::Symbol
> Syms
;
96 std::vector
<storage::Uncommon
> Uncommons
;
98 std::string COFFLinkerOpts
;
99 raw_string_ostream COFFLinkerOptsOS
{COFFLinkerOpts
};
101 std::vector
<storage::Str
> DependentLibraries
;
103 void setStr(storage::Str
&S
, StringRef Value
) {
104 S
.Offset
= StrtabBuilder
.add(Value
);
105 S
.Size
= Value
.size();
108 template <typename T
>
109 void writeRange(storage::Range
<T
> &R
, const std::vector
<T
> &Objs
) {
110 R
.Offset
= Symtab
.size();
111 R
.Size
= Objs
.size();
112 Symtab
.insert(Symtab
.end(), reinterpret_cast<const char *>(Objs
.data()),
113 reinterpret_cast<const char *>(Objs
.data() + Objs
.size()));
116 Expected
<int> getComdatIndex(const Comdat
*C
, const Module
*M
);
118 Error
addModule(Module
*M
);
119 Error
addSymbol(const ModuleSymbolTable
&Msymtab
,
120 const SmallPtrSet
<GlobalValue
*, 4> &Used
,
121 ModuleSymbolTable::Symbol Sym
);
123 Error
build(ArrayRef
<Module
*> Mods
);
126 Error
Builder::addModule(Module
*M
) {
127 if (M
->getDataLayoutStr().empty())
128 return make_error
<StringError
>("input module has no datalayout",
129 inconvertibleErrorCode());
131 // Symbols in the llvm.used list will get the FB_Used bit and will not be
132 // internalized. We do this for llvm.compiler.used as well:
134 // IR symbol table tracks module-level asm symbol references but not inline
135 // asm. A symbol only referenced by inline asm is not in the IR symbol table,
136 // so we may not know that the definition (in another translation unit) is
137 // referenced. That definition may have __attribute__((used)) (which lowers to
138 // llvm.compiler.used on ELF targets) to communicate to the compiler that it
139 // may be used by inline asm. The usage is perfectly fine, so we treat
140 // llvm.compiler.used conservatively as llvm.used to work around our own
142 SmallVector
<GlobalValue
*, 4> UsedV
;
143 collectUsedGlobalVariables(*M
, UsedV
, /*CompilerUsed=*/false);
144 collectUsedGlobalVariables(*M
, UsedV
, /*CompilerUsed=*/true);
145 SmallPtrSet
<GlobalValue
*, 4> Used(UsedV
.begin(), UsedV
.end());
147 ModuleSymbolTable Msymtab
;
148 Msymtab
.addModule(M
);
151 Mod
.Begin
= Syms
.size();
152 Mod
.End
= Syms
.size() + Msymtab
.symbols().size();
153 Mod
.UncBegin
= Uncommons
.size();
156 if (TT
.isOSBinFormatCOFF()) {
157 if (auto E
= M
->materializeMetadata())
159 if (NamedMDNode
*LinkerOptions
=
160 M
->getNamedMetadata("llvm.linker.options")) {
161 for (MDNode
*MDOptions
: LinkerOptions
->operands())
162 for (const MDOperand
&MDOption
: cast
<MDNode
>(MDOptions
)->operands())
163 COFFLinkerOptsOS
<< " " << cast
<MDString
>(MDOption
)->getString();
167 if (TT
.isOSBinFormatELF()) {
168 if (auto E
= M
->materializeMetadata())
170 if (NamedMDNode
*N
= M
->getNamedMetadata("llvm.dependent-libraries")) {
171 for (MDNode
*MDOptions
: N
->operands()) {
172 const auto OperandStr
=
173 cast
<MDString
>(cast
<MDNode
>(MDOptions
)->getOperand(0))->getString();
174 storage::Str Specifier
;
175 setStr(Specifier
, OperandStr
);
176 DependentLibraries
.emplace_back(Specifier
);
181 for (ModuleSymbolTable::Symbol Msym
: Msymtab
.symbols())
182 if (Error Err
= addSymbol(Msymtab
, Used
, Msym
))
185 return Error::success();
188 Expected
<int> Builder::getComdatIndex(const Comdat
*C
, const Module
*M
) {
189 auto P
= ComdatMap
.insert(std::make_pair(C
, Comdats
.size()));
192 if (TT
.isOSBinFormatCOFF()) {
193 const GlobalValue
*GV
= M
->getNamedValue(C
->getName());
195 return make_error
<StringError
>("Could not find leader",
196 inconvertibleErrorCode());
197 // Internal leaders do not affect symbol resolution, therefore they do not
198 // appear in the symbol table.
199 if (GV
->hasLocalLinkage()) {
200 P
.first
->second
= -1;
203 llvm::raw_string_ostream
OS(Name
);
204 Mang
.getNameWithPrefix(OS
, GV
, false);
206 Name
= std::string(C
->getName());
209 storage::Comdat Comdat
;
210 setStr(Comdat
.Name
, Saver
.save(Name
));
211 Comdat
.SelectionKind
= C
->getSelectionKind();
212 Comdats
.push_back(Comdat
);
215 return P
.first
->second
;
218 static DenseSet
<StringRef
> buildPreservedSymbolsSet() {
219 return DenseSet
<StringRef
>(std::begin(PreservedSymbols
),
220 std::end(PreservedSymbols
));
223 Error
Builder::addSymbol(const ModuleSymbolTable
&Msymtab
,
224 const SmallPtrSet
<GlobalValue
*, 4> &Used
,
225 ModuleSymbolTable::Symbol Msym
) {
227 storage::Symbol
&Sym
= Syms
.back();
230 storage::Uncommon
*Unc
= nullptr;
231 auto Uncommon
= [&]() -> storage::Uncommon
& {
234 Sym
.Flags
|= 1 << storage::Symbol::FB_has_uncommon
;
235 Uncommons
.emplace_back();
236 Unc
= &Uncommons
.back();
238 setStr(Unc
->COFFWeakExternFallbackName
, "");
239 setStr(Unc
->SectionName
, "");
243 SmallString
<64> Name
;
245 raw_svector_ostream
OS(Name
);
246 Msymtab
.printSymbolName(OS
, Msym
);
248 setStr(Sym
.Name
, Saver
.save(Name
.str()));
250 auto Flags
= Msymtab
.getSymbolFlags(Msym
);
251 if (Flags
& object::BasicSymbolRef::SF_Undefined
)
252 Sym
.Flags
|= 1 << storage::Symbol::FB_undefined
;
253 if (Flags
& object::BasicSymbolRef::SF_Weak
)
254 Sym
.Flags
|= 1 << storage::Symbol::FB_weak
;
255 if (Flags
& object::BasicSymbolRef::SF_Common
)
256 Sym
.Flags
|= 1 << storage::Symbol::FB_common
;
257 if (Flags
& object::BasicSymbolRef::SF_Indirect
)
258 Sym
.Flags
|= 1 << storage::Symbol::FB_indirect
;
259 if (Flags
& object::BasicSymbolRef::SF_Global
)
260 Sym
.Flags
|= 1 << storage::Symbol::FB_global
;
261 if (Flags
& object::BasicSymbolRef::SF_FormatSpecific
)
262 Sym
.Flags
|= 1 << storage::Symbol::FB_format_specific
;
263 if (Flags
& object::BasicSymbolRef::SF_Executable
)
264 Sym
.Flags
|= 1 << storage::Symbol::FB_executable
;
266 Sym
.ComdatIndex
= -1;
267 auto *GV
= dyn_cast_if_present
<GlobalValue
*>(Msym
);
269 // Undefined module asm symbols act as GC roots and are implicitly used.
270 if (Flags
& object::BasicSymbolRef::SF_Undefined
)
271 Sym
.Flags
|= 1 << storage::Symbol::FB_used
;
272 setStr(Sym
.IRName
, "");
273 return Error::success();
276 setStr(Sym
.IRName
, GV
->getName());
278 static const DenseSet
<StringRef
> PreservedSymbolsSet
=
279 buildPreservedSymbolsSet();
280 bool IsPreservedSymbol
= PreservedSymbolsSet
.contains(GV
->getName());
282 if (Used
.count(GV
) || IsPreservedSymbol
)
283 Sym
.Flags
|= 1 << storage::Symbol::FB_used
;
284 if (GV
->isThreadLocal())
285 Sym
.Flags
|= 1 << storage::Symbol::FB_tls
;
286 if (GV
->hasGlobalUnnamedAddr())
287 Sym
.Flags
|= 1 << storage::Symbol::FB_unnamed_addr
;
288 if (GV
->canBeOmittedFromSymbolTable())
289 Sym
.Flags
|= 1 << storage::Symbol::FB_may_omit
;
290 Sym
.Flags
|= unsigned(GV
->getVisibility()) << storage::Symbol::FB_visibility
;
292 if (Flags
& object::BasicSymbolRef::SF_Common
) {
293 auto *GVar
= dyn_cast
<GlobalVariable
>(GV
);
295 return make_error
<StringError
>("Only variables can have common linkage!",
296 inconvertibleErrorCode());
297 Uncommon().CommonSize
=
298 GV
->getParent()->getDataLayout().getTypeAllocSize(GV
->getValueType());
299 Uncommon().CommonAlign
= GVar
->getAlign() ? GVar
->getAlign()->value() : 0;
302 const GlobalObject
*GO
= GV
->getAliaseeObject();
304 if (isa
<GlobalIFunc
>(GV
))
305 GO
= cast
<GlobalIFunc
>(GV
)->getResolverFunction();
307 return make_error
<StringError
>("Unable to determine comdat of alias!",
308 inconvertibleErrorCode());
310 if (const Comdat
*C
= GO
->getComdat()) {
311 Expected
<int> ComdatIndexOrErr
= getComdatIndex(C
, GV
->getParent());
312 if (!ComdatIndexOrErr
)
313 return ComdatIndexOrErr
.takeError();
314 Sym
.ComdatIndex
= *ComdatIndexOrErr
;
317 if (TT
.isOSBinFormatCOFF()) {
318 emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS
, GV
, TT
, Mang
);
320 if ((Flags
& object::BasicSymbolRef::SF_Weak
) &&
321 (Flags
& object::BasicSymbolRef::SF_Indirect
)) {
322 auto *Fallback
= dyn_cast
<GlobalValue
>(
323 cast
<GlobalAlias
>(GV
)->getAliasee()->stripPointerCasts());
325 return make_error
<StringError
>("Invalid weak external",
326 inconvertibleErrorCode());
327 std::string FallbackName
;
328 raw_string_ostream
OS(FallbackName
);
329 Msymtab
.printSymbolName(OS
, Fallback
);
331 setStr(Uncommon().COFFWeakExternFallbackName
, Saver
.save(FallbackName
));
335 if (!GO
->getSection().empty())
336 setStr(Uncommon().SectionName
, Saver
.save(GO
->getSection()));
338 return Error::success();
341 Error
Builder::build(ArrayRef
<Module
*> IRMods
) {
344 assert(!IRMods
.empty());
345 Hdr
.Version
= storage::Header::kCurrentVersion
;
346 setStr(Hdr
.Producer
, kExpectedProducerName
);
347 setStr(Hdr
.TargetTriple
, IRMods
[0]->getTargetTriple());
348 setStr(Hdr
.SourceFileName
, IRMods
[0]->getSourceFileName());
349 TT
= Triple(IRMods
[0]->getTargetTriple());
351 for (auto *M
: IRMods
)
352 if (Error Err
= addModule(M
))
355 COFFLinkerOptsOS
.flush();
356 setStr(Hdr
.COFFLinkerOpts
, Saver
.save(COFFLinkerOpts
));
358 // We are about to fill in the header's range fields, so reserve space for it
359 // and copy it in afterwards.
360 Symtab
.resize(sizeof(storage::Header
));
361 writeRange(Hdr
.Modules
, Mods
);
362 writeRange(Hdr
.Comdats
, Comdats
);
363 writeRange(Hdr
.Symbols
, Syms
);
364 writeRange(Hdr
.Uncommons
, Uncommons
);
365 writeRange(Hdr
.DependentLibraries
, DependentLibraries
);
366 *reinterpret_cast<storage::Header
*>(Symtab
.data()) = Hdr
;
367 return Error::success();
370 } // end anonymous namespace
372 Error
irsymtab::build(ArrayRef
<Module
*> Mods
, SmallVector
<char, 0> &Symtab
,
373 StringTableBuilder
&StrtabBuilder
,
374 BumpPtrAllocator
&Alloc
) {
375 return Builder(Symtab
, StrtabBuilder
, Alloc
).build(Mods
);
378 // Upgrade a vector of bitcode modules created by an old version of LLVM by
379 // creating an irsymtab for them in the current format.
380 static Expected
<FileContents
> upgrade(ArrayRef
<BitcodeModule
> BMs
) {
384 std::vector
<Module
*> Mods
;
385 std::vector
<std::unique_ptr
<Module
>> OwnedMods
;
386 for (auto BM
: BMs
) {
387 Expected
<std::unique_ptr
<Module
>> MOrErr
=
388 BM
.getLazyModule(Ctx
, /*ShouldLazyLoadMetadata*/ true,
389 /*IsImporting*/ false);
391 return MOrErr
.takeError();
393 Mods
.push_back(MOrErr
->get());
394 OwnedMods
.push_back(std::move(*MOrErr
));
397 StringTableBuilder
StrtabBuilder(StringTableBuilder::RAW
);
398 BumpPtrAllocator Alloc
;
399 if (Error E
= build(Mods
, FC
.Symtab
, StrtabBuilder
, Alloc
))
402 StrtabBuilder
.finalizeInOrder();
403 FC
.Strtab
.resize(StrtabBuilder
.getSize());
404 StrtabBuilder
.write((uint8_t *)FC
.Strtab
.data());
406 FC
.TheReader
= {{FC
.Symtab
.data(), FC
.Symtab
.size()},
407 {FC
.Strtab
.data(), FC
.Strtab
.size()}};
408 return std::move(FC
);
411 Expected
<FileContents
> irsymtab::readBitcode(const BitcodeFileContents
&BFC
) {
412 if (BFC
.Mods
.empty())
413 return make_error
<StringError
>("Bitcode file does not contain any modules",
414 inconvertibleErrorCode());
416 if (!DisableBitcodeVersionUpgrade
) {
417 if (BFC
.StrtabForSymtab
.empty() ||
418 BFC
.Symtab
.size() < sizeof(storage::Header
))
419 return upgrade(BFC
.Mods
);
421 // We cannot use the regular reader to read the version and producer,
422 // because it will expect the header to be in the current format. The only
423 // thing we can rely on is that the version and producer will be present as
424 // the first struct elements.
425 auto *Hdr
= reinterpret_cast<const storage::Header
*>(BFC
.Symtab
.data());
426 unsigned Version
= Hdr
->Version
;
427 StringRef Producer
= Hdr
->Producer
.get(BFC
.StrtabForSymtab
);
428 if (Version
!= storage::Header::kCurrentVersion
||
429 Producer
!= kExpectedProducerName
)
430 return upgrade(BFC
.Mods
);
434 FC
.TheReader
= {{BFC
.Symtab
.data(), BFC
.Symtab
.size()},
435 {BFC
.StrtabForSymtab
.data(), BFC
.StrtabForSymtab
.size()}};
437 // Finally, make sure that the number of modules in the symbol table matches
438 // the number of modules in the bitcode file. If they differ, it may mean that
439 // the bitcode file was created by binary concatenation, so we need to create
440 // a new symbol table from scratch.
441 if (FC
.TheReader
.getNumModules() != BFC
.Mods
.size())
442 return upgrade(std::move(BFC
.Mods
));
444 return std::move(FC
);