1 //===- IRSymtab.cpp - implementation of IR symbol tables ------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "llvm/Object/IRSymtab.h"
10 #include "llvm/ADT/ArrayRef.h"
11 #include "llvm/ADT/DenseMap.h"
12 #include "llvm/ADT/SmallPtrSet.h"
13 #include "llvm/ADT/SmallString.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/ADT/StringRef.h"
16 #include "llvm/ADT/Triple.h"
17 #include "llvm/Config/llvm-config.h"
18 #include "llvm/IR/Comdat.h"
19 #include "llvm/IR/DataLayout.h"
20 #include "llvm/IR/GlobalAlias.h"
21 #include "llvm/IR/GlobalObject.h"
22 #include "llvm/IR/Mangler.h"
23 #include "llvm/IR/Metadata.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Bitcode/BitcodeReader.h"
26 #include "llvm/MC/StringTableBuilder.h"
27 #include "llvm/Object/IRObjectFile.h"
28 #include "llvm/Object/ModuleSymbolTable.h"
29 #include "llvm/Object/SymbolicFile.h"
30 #include "llvm/Support/Allocator.h"
31 #include "llvm/Support/Casting.h"
32 #include "llvm/Support/Error.h"
33 #include "llvm/Support/StringSaver.h"
34 #include "llvm/Support/VCSRevision.h"
35 #include "llvm/Support/raw_ostream.h"
42 using namespace irsymtab
;
44 static const char *LibcallRoutineNames
[] = {
45 #define HANDLE_LIBCALL(code, name) name,
46 #include "llvm/IR/RuntimeLibcalls.def"
52 const char *getExpectedProducerName() {
53 static char DefaultName
[] = LLVM_VERSION_STRING
58 // Allows for testing of the irsymtab writer and upgrade mechanism. This
59 // environment variable should not be set by users.
60 if (char *OverrideName
= getenv("LLVM_OVERRIDE_PRODUCER"))
65 const char *kExpectedProducerName
= getExpectedProducerName();
67 /// Stores the temporary state that is required to build an IR symbol table.
69 SmallVector
<char, 0> &Symtab
;
70 StringTableBuilder
&StrtabBuilder
;
73 // This ctor initializes a StringSaver using the passed in BumpPtrAllocator.
74 // The StringTableBuilder does not create a copy of any strings added to it,
75 // so this provides somewhere to store any strings that we create.
76 Builder(SmallVector
<char, 0> &Symtab
, StringTableBuilder
&StrtabBuilder
,
77 BumpPtrAllocator
&Alloc
)
78 : Symtab(Symtab
), StrtabBuilder(StrtabBuilder
), Saver(Alloc
) {}
80 DenseMap
<const Comdat
*, int> ComdatMap
;
84 std::vector
<storage::Comdat
> Comdats
;
85 std::vector
<storage::Module
> Mods
;
86 std::vector
<storage::Symbol
> Syms
;
87 std::vector
<storage::Uncommon
> Uncommons
;
89 std::string COFFLinkerOpts
;
90 raw_string_ostream COFFLinkerOptsOS
{COFFLinkerOpts
};
92 void setStr(storage::Str
&S
, StringRef Value
) {
93 S
.Offset
= StrtabBuilder
.add(Value
);
94 S
.Size
= Value
.size();
98 void writeRange(storage::Range
<T
> &R
, const std::vector
<T
> &Objs
) {
99 R
.Offset
= Symtab
.size();
100 R
.Size
= Objs
.size();
101 Symtab
.insert(Symtab
.end(), reinterpret_cast<const char *>(Objs
.data()),
102 reinterpret_cast<const char *>(Objs
.data() + Objs
.size()));
105 Expected
<int> getComdatIndex(const Comdat
*C
, const Module
*M
);
107 Error
addModule(Module
*M
);
108 Error
addSymbol(const ModuleSymbolTable
&Msymtab
,
109 const SmallPtrSet
<GlobalValue
*, 8> &Used
,
110 ModuleSymbolTable::Symbol Sym
);
112 Error
build(ArrayRef
<Module
*> Mods
);
115 Error
Builder::addModule(Module
*M
) {
116 if (M
->getDataLayoutStr().empty())
117 return make_error
<StringError
>("input module has no datalayout",
118 inconvertibleErrorCode());
120 SmallPtrSet
<GlobalValue
*, 8> Used
;
121 collectUsedGlobalVariables(*M
, Used
, /*CompilerUsed*/ false);
123 ModuleSymbolTable Msymtab
;
124 Msymtab
.addModule(M
);
127 Mod
.Begin
= Syms
.size();
128 Mod
.End
= Syms
.size() + Msymtab
.symbols().size();
129 Mod
.UncBegin
= Uncommons
.size();
132 if (TT
.isOSBinFormatCOFF()) {
133 if (auto E
= M
->materializeMetadata())
135 if (NamedMDNode
*LinkerOptions
=
136 M
->getNamedMetadata("llvm.linker.options")) {
137 for (MDNode
*MDOptions
: LinkerOptions
->operands())
138 for (const MDOperand
&MDOption
: cast
<MDNode
>(MDOptions
)->operands())
139 COFFLinkerOptsOS
<< " " << cast
<MDString
>(MDOption
)->getString();
143 for (ModuleSymbolTable::Symbol Msym
: Msymtab
.symbols())
144 if (Error Err
= addSymbol(Msymtab
, Used
, Msym
))
147 return Error::success();
150 Expected
<int> Builder::getComdatIndex(const Comdat
*C
, const Module
*M
) {
151 auto P
= ComdatMap
.insert(std::make_pair(C
, Comdats
.size()));
154 if (TT
.isOSBinFormatCOFF()) {
155 const GlobalValue
*GV
= M
->getNamedValue(C
->getName());
157 return make_error
<StringError
>("Could not find leader",
158 inconvertibleErrorCode());
159 // Internal leaders do not affect symbol resolution, therefore they do not
160 // appear in the symbol table.
161 if (GV
->hasLocalLinkage()) {
162 P
.first
->second
= -1;
165 llvm::raw_string_ostream
OS(Name
);
166 Mang
.getNameWithPrefix(OS
, GV
, false);
171 storage::Comdat Comdat
;
172 setStr(Comdat
.Name
, Saver
.save(Name
));
173 Comdats
.push_back(Comdat
);
176 return P
.first
->second
;
179 Error
Builder::addSymbol(const ModuleSymbolTable
&Msymtab
,
180 const SmallPtrSet
<GlobalValue
*, 8> &Used
,
181 ModuleSymbolTable::Symbol Msym
) {
183 storage::Symbol
&Sym
= Syms
.back();
186 storage::Uncommon
*Unc
= nullptr;
187 auto Uncommon
= [&]() -> storage::Uncommon
& {
190 Sym
.Flags
|= 1 << storage::Symbol::FB_has_uncommon
;
191 Uncommons
.emplace_back();
192 Unc
= &Uncommons
.back();
194 setStr(Unc
->COFFWeakExternFallbackName
, "");
195 setStr(Unc
->SectionName
, "");
199 SmallString
<64> Name
;
201 raw_svector_ostream
OS(Name
);
202 Msymtab
.printSymbolName(OS
, Msym
);
204 setStr(Sym
.Name
, Saver
.save(StringRef(Name
)));
206 auto Flags
= Msymtab
.getSymbolFlags(Msym
);
207 if (Flags
& object::BasicSymbolRef::SF_Undefined
)
208 Sym
.Flags
|= 1 << storage::Symbol::FB_undefined
;
209 if (Flags
& object::BasicSymbolRef::SF_Weak
)
210 Sym
.Flags
|= 1 << storage::Symbol::FB_weak
;
211 if (Flags
& object::BasicSymbolRef::SF_Common
)
212 Sym
.Flags
|= 1 << storage::Symbol::FB_common
;
213 if (Flags
& object::BasicSymbolRef::SF_Indirect
)
214 Sym
.Flags
|= 1 << storage::Symbol::FB_indirect
;
215 if (Flags
& object::BasicSymbolRef::SF_Global
)
216 Sym
.Flags
|= 1 << storage::Symbol::FB_global
;
217 if (Flags
& object::BasicSymbolRef::SF_FormatSpecific
)
218 Sym
.Flags
|= 1 << storage::Symbol::FB_format_specific
;
219 if (Flags
& object::BasicSymbolRef::SF_Executable
)
220 Sym
.Flags
|= 1 << storage::Symbol::FB_executable
;
222 Sym
.ComdatIndex
= -1;
223 auto *GV
= Msym
.dyn_cast
<GlobalValue
*>();
225 // Undefined module asm symbols act as GC roots and are implicitly used.
226 if (Flags
& object::BasicSymbolRef::SF_Undefined
)
227 Sym
.Flags
|= 1 << storage::Symbol::FB_used
;
228 setStr(Sym
.IRName
, "");
229 return Error::success();
232 setStr(Sym
.IRName
, GV
->getName());
234 bool IsBuiltinFunc
= false;
236 for (const char *LibcallName
: LibcallRoutineNames
)
237 if (GV
->getName() == LibcallName
)
238 IsBuiltinFunc
= true;
240 if (Used
.count(GV
) || IsBuiltinFunc
)
241 Sym
.Flags
|= 1 << storage::Symbol::FB_used
;
242 if (GV
->isThreadLocal())
243 Sym
.Flags
|= 1 << storage::Symbol::FB_tls
;
244 if (GV
->hasGlobalUnnamedAddr())
245 Sym
.Flags
|= 1 << storage::Symbol::FB_unnamed_addr
;
246 if (GV
->canBeOmittedFromSymbolTable())
247 Sym
.Flags
|= 1 << storage::Symbol::FB_may_omit
;
248 Sym
.Flags
|= unsigned(GV
->getVisibility()) << storage::Symbol::FB_visibility
;
250 if (Flags
& object::BasicSymbolRef::SF_Common
) {
251 Uncommon().CommonSize
= GV
->getParent()->getDataLayout().getTypeAllocSize(
252 GV
->getType()->getElementType());
253 Uncommon().CommonAlign
= GV
->getAlignment();
256 const GlobalObject
*Base
= GV
->getBaseObject();
258 return make_error
<StringError
>("Unable to determine comdat of alias!",
259 inconvertibleErrorCode());
260 if (const Comdat
*C
= Base
->getComdat()) {
261 Expected
<int> ComdatIndexOrErr
= getComdatIndex(C
, GV
->getParent());
262 if (!ComdatIndexOrErr
)
263 return ComdatIndexOrErr
.takeError();
264 Sym
.ComdatIndex
= *ComdatIndexOrErr
;
267 if (TT
.isOSBinFormatCOFF()) {
268 emitLinkerFlagsForGlobalCOFF(COFFLinkerOptsOS
, GV
, TT
, Mang
);
270 if ((Flags
& object::BasicSymbolRef::SF_Weak
) &&
271 (Flags
& object::BasicSymbolRef::SF_Indirect
)) {
272 auto *Fallback
= dyn_cast
<GlobalValue
>(
273 cast
<GlobalAlias
>(GV
)->getAliasee()->stripPointerCasts());
275 return make_error
<StringError
>("Invalid weak external",
276 inconvertibleErrorCode());
277 std::string FallbackName
;
278 raw_string_ostream
OS(FallbackName
);
279 Msymtab
.printSymbolName(OS
, Fallback
);
281 setStr(Uncommon().COFFWeakExternFallbackName
, Saver
.save(FallbackName
));
285 if (!Base
->getSection().empty())
286 setStr(Uncommon().SectionName
, Saver
.save(Base
->getSection()));
288 return Error::success();
291 Error
Builder::build(ArrayRef
<Module
*> IRMods
) {
294 assert(!IRMods
.empty());
295 Hdr
.Version
= storage::Header::kCurrentVersion
;
296 setStr(Hdr
.Producer
, kExpectedProducerName
);
297 setStr(Hdr
.TargetTriple
, IRMods
[0]->getTargetTriple());
298 setStr(Hdr
.SourceFileName
, IRMods
[0]->getSourceFileName());
299 TT
= Triple(IRMods
[0]->getTargetTriple());
301 for (auto *M
: IRMods
)
302 if (Error Err
= addModule(M
))
305 COFFLinkerOptsOS
.flush();
306 setStr(Hdr
.COFFLinkerOpts
, Saver
.save(COFFLinkerOpts
));
308 // We are about to fill in the header's range fields, so reserve space for it
309 // and copy it in afterwards.
310 Symtab
.resize(sizeof(storage::Header
));
311 writeRange(Hdr
.Modules
, Mods
);
312 writeRange(Hdr
.Comdats
, Comdats
);
313 writeRange(Hdr
.Symbols
, Syms
);
314 writeRange(Hdr
.Uncommons
, Uncommons
);
316 *reinterpret_cast<storage::Header
*>(Symtab
.data()) = Hdr
;
317 return Error::success();
320 } // end anonymous namespace
322 Error
irsymtab::build(ArrayRef
<Module
*> Mods
, SmallVector
<char, 0> &Symtab
,
323 StringTableBuilder
&StrtabBuilder
,
324 BumpPtrAllocator
&Alloc
) {
325 return Builder(Symtab
, StrtabBuilder
, Alloc
).build(Mods
);
328 // Upgrade a vector of bitcode modules created by an old version of LLVM by
329 // creating an irsymtab for them in the current format.
330 static Expected
<FileContents
> upgrade(ArrayRef
<BitcodeModule
> BMs
) {
334 std::vector
<Module
*> Mods
;
335 std::vector
<std::unique_ptr
<Module
>> OwnedMods
;
336 for (auto BM
: BMs
) {
337 Expected
<std::unique_ptr
<Module
>> MOrErr
=
338 BM
.getLazyModule(Ctx
, /*ShouldLazyLoadMetadata*/ true,
339 /*IsImporting*/ false);
341 return MOrErr
.takeError();
343 Mods
.push_back(MOrErr
->get());
344 OwnedMods
.push_back(std::move(*MOrErr
));
347 StringTableBuilder
StrtabBuilder(StringTableBuilder::RAW
);
348 BumpPtrAllocator Alloc
;
349 if (Error E
= build(Mods
, FC
.Symtab
, StrtabBuilder
, Alloc
))
352 StrtabBuilder
.finalizeInOrder();
353 FC
.Strtab
.resize(StrtabBuilder
.getSize());
354 StrtabBuilder
.write((uint8_t *)FC
.Strtab
.data());
356 FC
.TheReader
= {{FC
.Symtab
.data(), FC
.Symtab
.size()},
357 {FC
.Strtab
.data(), FC
.Strtab
.size()}};
358 return std::move(FC
);
361 Expected
<FileContents
> irsymtab::readBitcode(const BitcodeFileContents
&BFC
) {
362 if (BFC
.Mods
.empty())
363 return make_error
<StringError
>("Bitcode file does not contain any modules",
364 inconvertibleErrorCode());
366 if (BFC
.StrtabForSymtab
.empty() ||
367 BFC
.Symtab
.size() < sizeof(storage::Header
))
368 return upgrade(BFC
.Mods
);
370 // We cannot use the regular reader to read the version and producer, because
371 // it will expect the header to be in the current format. The only thing we
372 // can rely on is that the version and producer will be present as the first
374 auto *Hdr
= reinterpret_cast<const storage::Header
*>(BFC
.Symtab
.data());
375 unsigned Version
= Hdr
->Version
;
376 StringRef Producer
= Hdr
->Producer
.get(BFC
.StrtabForSymtab
);
377 if (Version
!= storage::Header::kCurrentVersion
||
378 Producer
!= kExpectedProducerName
)
379 return upgrade(BFC
.Mods
);
382 FC
.TheReader
= {{BFC
.Symtab
.data(), BFC
.Symtab
.size()},
383 {BFC
.StrtabForSymtab
.data(), BFC
.StrtabForSymtab
.size()}};
385 // Finally, make sure that the number of modules in the symbol table matches
386 // the number of modules in the bitcode file. If they differ, it may mean that
387 // the bitcode file was created by binary concatenation, so we need to create
388 // a new symbol table from scratch.
389 if (FC
.TheReader
.getNumModules() != BFC
.Mods
.size())
390 return upgrade(std::move(BFC
.Mods
));
392 return std::move(FC
);