1 //===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Object/IRSymtab.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/SmallPtrSet.h"
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/Config/llvm-config.h"
18 #include "llvm/IR/Comdat.h"
19 #include "llvm/IR/DataLayout.h"
20 #include "llvm/IR/GlobalAlias.h"
21 #include "llvm/IR/GlobalObject.h"
22 #include "llvm/IR/Mangler.h"
23 #include "llvm/IR/Metadata.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Bitcode/BitcodeReader.h"
26 #include "llvm/MC/StringTableBuilder.h"
27 #include "llvm/Object/IRObjectFile.h"
28 #include "llvm/Object/ModuleSymbolTable.h"
29 #include "llvm/Object/SymbolicFile.h"
30 #include "llvm/Support/Allocator.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/Support/StringSaver.h"
34 #include "llvm/Support/VCSRevision.h"
35 #include "llvm/Support/raw_ostream.h"
42 using namespace irsymtab
;
44 static const char *LibcallRoutineNames
[] = {
45 #define HANDLE_LIBCALL(code, name) name,
46 #include "llvm/IR/RuntimeLibcalls.def"
52 const char *getExpectedProducerName() {
53 static char DefaultName
[] = LLVM_VERSION_STRING
58 // Allows for testing of the irsymtab writer and upgrade mechanism. This
59 // environment variable should not be set by users.
60 if (char *OverrideName
= getenv("LLVM_OVERRIDE_PRODUCER"))
65 const char *kExpectedProducerName
= getExpectedProducerName();
67 /// Stores the temporary state that is required to build an IR symbol table.
69 SmallVector
<char, 0> &Symtab
;
70 StringTableBuilder
&StrtabBuilder
;
73 // This ctor initializes a StringSaver using the passed in BumpPtrAllocator.
74 // The StringTableBuilder does not create a copy of any strings added to it,
75 // so this provides somewhere to store any strings that we create.
76 Builder(SmallVector
<char, 0> &Symtab
, StringTableBuilder
&StrtabBuilder
,
77 BumpPtrAllocator
&Alloc
)
78 : Symtab(Symtab
), StrtabBuilder(StrtabBuilder
), Saver(Alloc
) {}
80 DenseMap
<const Comdat
*, int> ComdatMap
;
84 std::vector
<storage::Comdat
> Comdats
;
85 std::vector
<storage::Module
> Mods
;
86 std::vector
<storage::Symbol
> Syms
;
87 std::vector
<storage::Uncommon
> Uncommons
;
89 std::string COFFLinkerOpts
;
90 raw_string_ostream COFFLinkerOptsOS
{COFFLinkerOpts
};
92 std::vector
<storage::Str
> DependentLibraries
;
94 void setStr(storage::Str
&S
, StringRef Value
) {
95 S
.Offset
= StrtabBuilder
.add(Value
);
96 S
.Size
= Value
.size();
100 void writeRange(storage::Range
<T
> &R
, const std::vector
<T
> &Objs
) {
101 R
.Offset
= Symtab
.size();
102 R
.Size
= Objs
.size();
103 Symtab
.insert(Symtab
.end(), reinterpret_cast<const char *>(Objs
.data()),
104 reinterpret_cast<const char *>(Objs
.data() + Objs
.size()));
107 Expected
<int> getComdatIndex(const Comdat
*C
, const Module
*M
);
109 Error
addModule(Module
*M
);
110 Error
addSymbol(const ModuleSymbolTable
&Msymtab
,
111 const SmallPtrSet
<GlobalValue
*, 8> &Used
,
112 ModuleSymbolTable::Symbol Sym
);
114 Error
build(ArrayRef
<Module
*> Mods
);
117 Error
Builder::addModule(Module
*M
) {
118 if (M
->getDataLayoutStr().empty())
119 return make_error
<StringError
>("input module has no datalayout",
120 inconvertibleErrorCode());
122 SmallPtrSet
<GlobalValue
*, 8> Used
;
123 collectUsedGlobalVariables(*M
, Used
, /*CompilerUsed*/ false);
125 ModuleSymbolTable Msymtab
;
126 Msymtab
.addModule(M
);
129 Mod
.Begin
= Syms
.size();
130 Mod
.End
= Syms
.size() + Msymtab
.symbols().size();
131 Mod
.UncBegin
= Uncommons
.size();
134 if (TT
.isOSBinFormatCOFF()) {
135 if (auto E
= M
->materializeMetadata())
137 if (NamedMDNode
*LinkerOptions
=
138 M
->getNamedMetadata("llvm.linker.options")) {
139 for (MDNode
*MDOptions
: LinkerOptions
->operands())
140 for (const MDOperand
&MDOption
: cast
<MDNode
>(MDOptions
)->operands())
141 COFFLinkerOptsOS
<< " " << cast
<MDString
>(MDOption
)->getString();
145 if (TT
.isOSBinFormatELF()) {
146 if (auto E
= M
->materializeMetadata())
148 if (NamedMDNode
*N
= M
->getNamedMetadata("llvm.dependent-libraries")) {
149 for (MDNode
*MDOptions
: N
->operands()) {
150 const auto OperandStr
=
151 cast
<MDString
>(cast
<MDNode
>(MDOptions
)->getOperand(0))->getString();
152 storage::Str Specifier
;
153 setStr(Specifier
, OperandStr
);
154 DependentLibraries
.emplace_back(Specifier
);
159 for (ModuleSymbolTable::Symbol Msym
: Msymtab
.symbols())
160 if (Error Err
= addSymbol(Msymtab
, Used
, Msym
))
163 return Error::success();
166 Expected
<int> Builder::getComdatIndex(const Comdat
*C
, const Module
*M
) {
167 auto P
= ComdatMap
.insert(std::make_pair(C
, Comdats
.size()));
170 if (TT
.isOSBinFormatCOFF()) {
171 const GlobalValue
*GV
= M
->getNamedValue(C
->getName());
173 return make_error
<StringError
>("Could not find leader",
174 inconvertibleErrorCode());
175 // Internal leaders do not affect symbol resolution, therefore they do not
176 // appear in the symbol table.
177 if (GV
->hasLocalLinkage()) {
178 P
.first
->second
= -1;
181 llvm::raw_string_ostream
OS(Name
);
182 Mang
.getNameWithPrefix(OS
, GV
, false);
187 storage::Comdat Comdat
;
188 setStr(Comdat
.Name
, Saver
.save(Name
));
189 Comdats
.push_back(Comdat
);
192 return P
.first
->second
;
195 Error
Builder::addSymbol(const ModuleSymbolTable
&Msymtab
,
196 const SmallPtrSet
<GlobalValue
*, 8> &Used
,
197 ModuleSymbolTable::Symbol Msym
) {
199 storage::Symbol
&Sym
= Syms
.back();
202 storage::Uncommon
*Unc
= nullptr;
203 auto Uncommon
= [&]() -> storage::Uncommon
& {
206 Sym
.Flags
|= 1 << storage::Symbol::FB_has_uncommon
;
207 Uncommons
.emplace_back();
208 Unc
= &Uncommons
.back();
210 setStr(Unc
->COFFWeakExternFallbackName
, "");
211 setStr(Unc
->SectionName
, "");
215 SmallString
<64> Name
;
217 raw_svector_ostream
OS(Name
);
218 Msymtab
.printSymbolName(OS
, Msym
);
220 setStr(Sym
.Name
, Saver
.save(StringRef(Name
)));
222 auto Flags
= Msymtab
.getSymbolFlags(Msym
);
223 if (Flags
& object::BasicSymbolRef::SF_Undefined
)
224 Sym
.Flags
|= 1 << storage::Symbol::FB_undefined
;
225 if (Flags
& object::BasicSymbolRef::SF_Weak
)
226 Sym
.Flags
|= 1 << storage::Symbol::FB_weak
;
227 if (Flags
& object::BasicSymbolRef::SF_Common
)
228 Sym
.Flags
|= 1 << storage::Symbol::FB_common
;
229 if (Flags
& object::BasicSymbolRef::SF_Indirect
)
230 Sym
.Flags
|= 1 << storage::Symbol::FB_indirect
;
231 if (Flags
& object::BasicSymbolRef::SF_Global
)
232 Sym
.Flags
|= 1 << storage::Symbol::FB_global
;
233 if (Flags
& object::BasicSymbolRef::SF_FormatSpecific
)
234 Sym
.Flags
|= 1 << storage::Symbol::FB_format_specific
;
235 if (Flags
& object::BasicSymbolRef::SF_Executable
)
236 Sym
.Flags
|= 1 << storage::Symbol::FB_executable
;
238 Sym
.ComdatIndex
= -1;
239 auto *GV
= Msym
.dyn_cast
<GlobalValue
*>();
241 // Undefined module asm symbols act as GC roots and are implicitly used.
242 if (Flags
& object::BasicSymbolRef::SF_Undefined
)
243 Sym
.Flags
|= 1 << storage::Symbol::FB_used
;
244 setStr(Sym
.IRName
, "");
245 return Error::success();
248 setStr(Sym
.IRName
, GV
->getName());
250 bool IsBuiltinFunc
= false;
252 for (const char *LibcallName
: LibcallRoutineNames
)
253 if (GV
->getName() == LibcallName
)
254 IsBuiltinFunc
= true;
256 if (Used
.count(GV
) || IsBuiltinFunc
)
257 Sym
.Flags
|= 1 << storage::Symbol::FB_used
;
258 if (GV
->isThreadLocal())
259 Sym
.Flags
|= 1 << storage::Symbol::FB_tls
;
260 if (GV
->hasGlobalUnnamedAddr())
261 Sym
.Flags
|= 1 << storage::Symbol::FB_unnamed_addr
;
262 if (GV
->canBeOmittedFromSymbolTable())
263 Sym
.Flags
|= 1 << storage::Symbol::FB_may_omit
;
264 Sym
.Flags
|= unsigned(GV
->getVisibility()) << storage::Symbol::FB_visibility
;
266 if (Flags
& object::BasicSymbolRef::SF_Common
) {
267 Uncommon().CommonSize
= GV
->getParent()->getDataLayout().getTypeAllocSize(
268 GV
->getType()->getElementType());
269 Uncommon().CommonAlign
= GV
->getAlignment();
272 const GlobalObject
*Base
= GV
->getBaseObject();
274 return make_error
<StringError
>("Unable to determine comdat of alias!",
275 inconvertibleErrorCode());
276 if (const Comdat
*C
= Base
->getComdat()) {
277 Expected
<int> ComdatIndexOrErr
= getComdatIndex(C
, GV
->getParent());
278 if (!ComdatIndexOrErr
)
279 return ComdatIndexOrErr
.takeError();
280 Sym
.ComdatIndex
= *ComdatIndexOrErr
;
283 if (TT
.isOSBinFormatCOFF()) {
284 emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS
, GV
, TT
, Mang
);
286 if ((Flags
& object::BasicSymbolRef::SF_Weak
) &&
287 (Flags
& object::BasicSymbolRef::SF_Indirect
)) {
288 auto *Fallback
= dyn_cast
<GlobalValue
>(
289 cast
<GlobalAlias
>(GV
)->getAliasee()->stripPointerCasts());
291 return make_error
<StringError
>("Invalid weak external",
292 inconvertibleErrorCode());
293 std::string FallbackName
;
294 raw_string_ostream
OS(FallbackName
);
295 Msymtab
.printSymbolName(OS
, Fallback
);
297 setStr(Uncommon().COFFWeakExternFallbackName
, Saver
.save(FallbackName
));
301 if (!Base
->getSection().empty())
302 setStr(Uncommon().SectionName
, Saver
.save(Base
->getSection()));
304 return Error::success();
307 Error
Builder::build(ArrayRef
<Module
*> IRMods
) {
310 assert(!IRMods
.empty());
311 Hdr
.Version
= storage::Header::kCurrentVersion
;
312 setStr(Hdr
.Producer
, kExpectedProducerName
);
313 setStr(Hdr
.TargetTriple
, IRMods
[0]->getTargetTriple());
314 setStr(Hdr
.SourceFileName
, IRMods
[0]->getSourceFileName());
315 TT
= Triple(IRMods
[0]->getTargetTriple());
317 for (auto *M
: IRMods
)
318 if (Error Err
= addModule(M
))
321 COFFLinkerOptsOS
.flush();
322 setStr(Hdr
.COFFLinkerOpts
, Saver
.save(COFFLinkerOpts
));
324 // We are about to fill in the header's range fields, so reserve space for it
325 // and copy it in afterwards.
326 Symtab
.resize(sizeof(storage::Header
));
327 writeRange(Hdr
.Modules
, Mods
);
328 writeRange(Hdr
.Comdats
, Comdats
);
329 writeRange(Hdr
.Symbols
, Syms
);
330 writeRange(Hdr
.Uncommons
, Uncommons
);
331 writeRange(Hdr
.DependentLibraries
, DependentLibraries
);
332 *reinterpret_cast<storage::Header
*>(Symtab
.data()) = Hdr
;
333 return Error::success();
336 } // end anonymous namespace
338 Error
irsymtab::build(ArrayRef
<Module
*> Mods
, SmallVector
<char, 0> &Symtab
,
339 StringTableBuilder
&StrtabBuilder
,
340 BumpPtrAllocator
&Alloc
) {
341 return Builder(Symtab
, StrtabBuilder
, Alloc
).build(Mods
);
344 // Upgrade a vector of bitcode modules created by an old version of LLVM by
345 // creating an irsymtab for them in the current format.
346 static Expected
<FileContents
> upgrade(ArrayRef
<BitcodeModule
> BMs
) {
350 std::vector
<Module
*> Mods
;
351 std::vector
<std::unique_ptr
<Module
>> OwnedMods
;
352 for (auto BM
: BMs
) {
353 Expected
<std::unique_ptr
<Module
>> MOrErr
=
354 BM
.getLazyModule(Ctx
, /*ShouldLazyLoadMetadata*/ true,
355 /*IsImporting*/ false);
357 return MOrErr
.takeError();
359 Mods
.push_back(MOrErr
->get());
360 OwnedMods
.push_back(std::move(*MOrErr
));
363 StringTableBuilder
StrtabBuilder(StringTableBuilder::RAW
);
364 BumpPtrAllocator Alloc
;
365 if (Error E
= build(Mods
, FC
.Symtab
, StrtabBuilder
, Alloc
))
368 StrtabBuilder
.finalizeInOrder();
369 FC
.Strtab
.resize(StrtabBuilder
.getSize());
370 StrtabBuilder
.write((uint8_t *)FC
.Strtab
.data());
372 FC
.TheReader
= {{FC
.Symtab
.data(), FC
.Symtab
.size()},
373 {FC
.Strtab
.data(), FC
.Strtab
.size()}};
374 return std::move(FC
);
377 Expected
<FileContents
> irsymtab::readBitcode(const BitcodeFileContents
&BFC
) {
378 if (BFC
.Mods
.empty())
379 return make_error
<StringError
>("Bitcode file does not contain any modules",
380 inconvertibleErrorCode());
382 if (BFC
.StrtabForSymtab
.empty() ||
383 BFC
.Symtab
.size() < sizeof(storage::Header
))
384 return upgrade(BFC
.Mods
);
386 // We cannot use the regular reader to read the version and producer, because
387 // it will expect the header to be in the current format. The only thing we
388 // can rely on is that the version and producer will be present as the first
390 auto *Hdr
= reinterpret_cast<const storage::Header
*>(BFC
.Symtab
.data());
391 unsigned Version
= Hdr
->Version
;
392 StringRef Producer
= Hdr
->Producer
.get(BFC
.StrtabForSymtab
);
393 if (Version
!= storage::Header::kCurrentVersion
||
394 Producer
!= kExpectedProducerName
)
395 return upgrade(BFC
.Mods
);
398 FC
.TheReader
= {{BFC
.Symtab
.data(), BFC
.Symtab
.size()},
399 {BFC
.StrtabForSymtab
.data(), BFC
.StrtabForSymtab
.size()}};
401 // Finally, make sure that the number of modules in the symbol table matches
402 // the number of modules in the bitcode file. If they differ, it may mean that
403 // the bitcode file was created by binary concatenation, so we need to create
404 // a new symbol table from scratch.
405 if (FC
.TheReader
.getNumModules() != BFC
.Mods
.size())
406 return upgrade(std::move(BFC
.Mods
));
408 return std::move(FC
);