1 //===- IRSymtab.h - data definitions for IR symbol tables -------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains data definitions and a reader and builder for a symbol
10 // table for LLVM IR. Its purpose is to allow linkers and other consumers of
11 // bitcode files to efficiently read the symbol table for symbol resolution
12 // purposes without needing to construct a module in memory.
14 // As with most object files the symbol table has two parts: the symbol table
15 // itself and a string table which is referenced by the symbol table.
17 // A symbol table corresponds to a single bitcode file, which may consist of
18 // multiple modules, so symbol tables may likewise contain symbols for multiple
21 //===----------------------------------------------------------------------===//
23 #ifndef LLVM_OBJECT_IRSYMTAB_H
24 #define LLVM_OBJECT_IRSYMTAB_H
26 #include "llvm/ADT/ArrayRef.h"
27 #include "llvm/ADT/StringRef.h"
28 #include "llvm/ADT/iterator_range.h"
29 #include "llvm/IR/GlobalValue.h"
30 #include "llvm/Object/SymbolicFile.h"
31 #include "llvm/Support/Endian.h"
32 #include "llvm/Support/Error.h"
39 struct BitcodeFileContents
;
40 class StringTableBuilder
;
46 // The data structures in this namespace define the low-level serialization
47 // format. Clients that just want to read a symbol table should use the
48 // irsymtab::Reader class.
50 using Word
= support::ulittle32_t
;
52 /// A reference to a string in the string table.
56 StringRef
get(StringRef Strtab
) const {
57 return {Strtab
.data() + Offset
, Size
};
61 /// A reference to a range of objects in the symbol table.
62 template <typename T
> struct Range
{
65 ArrayRef
<T
> get(StringRef Symtab
) const {
66 return {reinterpret_cast<const T
*>(Symtab
.data() + Offset
), Size
};
70 /// Describes the range of a particular module's symbols within the symbol
75 /// The index of the first Uncommon for this Module.
79 /// This is equivalent to an IR comdat.
84 /// Contains the information needed by linkers for symbol resolution, as well as
85 /// by the LTO implementation itself.
87 /// The mangled symbol name.
90 /// The unmangled symbol name, or the empty string if this is not an IR
94 /// The index into Header::Comdats, or -1 if not a comdat member.
99 FB_visibility
, // 2 bits
100 FB_has_uncommon
= FB_visibility
+ 2,
115 /// This data structure contains rarely used symbol fields and is optionally
116 /// referenced by a Symbol.
118 Word CommonSize
, CommonAlign
;
120 /// COFF-specific: the name of the symbol that a weak external resolves to
122 Str COFFWeakExternFallbackName
;
124 /// Specified section name, if any.
130 /// Version number of the symtab format. This number should be incremented
131 /// when the format changes, but it does not need to be incremented if a
132 /// change to LLVM would cause it to create a different symbol table.
134 enum { kCurrentVersion
= 2 };
136 /// The producer's version string (LLVM_VERSION_STRING " " LLVM_REVISION).
137 /// Consumers should rebuild the symbol table from IR if the producer's
138 /// version does not match the consumer's version due to potential differences
139 /// in symbol table format, symbol enumeration order and so on.
142 Range
<Module
> Modules
;
143 Range
<Comdat
> Comdats
;
144 Range
<Symbol
> Symbols
;
145 Range
<Uncommon
> Uncommons
;
147 Str TargetTriple
, SourceFileName
;
149 /// COFF-specific: linker directives.
152 /// Dependent Library Specifiers
153 Range
<Str
> DependentLibraries
;
156 } // end namespace storage
158 /// Fills in Symtab and StrtabBuilder with a valid symbol and string table for
160 Error
build(ArrayRef
<Module
*> Mods
, SmallVector
<char, 0> &Symtab
,
161 StringTableBuilder
&StrtabBuilder
, BumpPtrAllocator
&Alloc
);
163 /// This represents a symbol that has been read from a storage::Symbol and
164 /// possibly a storage::Uncommon.
166 // Copied from storage::Symbol.
167 StringRef Name
, IRName
;
171 // Copied from storage::Uncommon.
172 uint32_t CommonSize
, CommonAlign
;
173 StringRef COFFWeakExternFallbackName
;
174 StringRef SectionName
;
176 /// Returns the mangled symbol name.
177 StringRef
getName() const { return Name
; }
179 /// Returns the unmangled symbol name, or the empty string if this is not an
181 StringRef
getIRName() const { return IRName
; }
183 /// Returns the index into the comdat table (see Reader::getComdatTable()), or
184 /// -1 if not a comdat member.
185 int getComdatIndex() const { return ComdatIndex
; }
187 using S
= storage::Symbol
;
189 GlobalValue::VisibilityTypes
getVisibility() const {
190 return GlobalValue::VisibilityTypes((Flags
>> S::FB_visibility
) & 3);
193 bool isUndefined() const { return (Flags
>> S::FB_undefined
) & 1; }
194 bool isWeak() const { return (Flags
>> S::FB_weak
) & 1; }
195 bool isCommon() const { return (Flags
>> S::FB_common
) & 1; }
196 bool isIndirect() const { return (Flags
>> S::FB_indirect
) & 1; }
197 bool isUsed() const { return (Flags
>> S::FB_used
) & 1; }
198 bool isTLS() const { return (Flags
>> S::FB_tls
) & 1; }
200 bool canBeOmittedFromSymbolTable() const {
201 return (Flags
>> S::FB_may_omit
) & 1;
204 bool isGlobal() const { return (Flags
>> S::FB_global
) & 1; }
205 bool isFormatSpecific() const { return (Flags
>> S::FB_format_specific
) & 1; }
206 bool isUnnamedAddr() const { return (Flags
>> S::FB_unnamed_addr
) & 1; }
207 bool isExecutable() const { return (Flags
>> S::FB_executable
) & 1; }
209 uint64_t getCommonSize() const {
214 uint32_t getCommonAlignment() const {
219 /// COFF-specific: for weak externals, returns the name of the symbol that is
220 /// used as a fallback if the weak external remains undefined.
221 StringRef
getCOFFWeakExternalFallback() const {
222 assert(isWeak() && isIndirect());
223 return COFFWeakExternFallbackName
;
226 StringRef
getSectionName() const { return SectionName
; }
229 /// This class can be used to read a Symtab and Strtab produced by
232 StringRef Symtab
, Strtab
;
234 ArrayRef
<storage::Module
> Modules
;
235 ArrayRef
<storage::Comdat
> Comdats
;
236 ArrayRef
<storage::Symbol
> Symbols
;
237 ArrayRef
<storage::Uncommon
> Uncommons
;
238 ArrayRef
<storage::Str
> DependentLibraries
;
240 StringRef
str(storage::Str S
) const { return S
.get(Strtab
); }
242 template <typename T
> ArrayRef
<T
> range(storage::Range
<T
> R
) const {
243 return R
.get(Symtab
);
246 const storage::Header
&header() const {
247 return *reinterpret_cast<const storage::Header
*>(Symtab
.data());
254 Reader(StringRef Symtab
, StringRef Strtab
) : Symtab(Symtab
), Strtab(Strtab
) {
255 Modules
= range(header().Modules
);
256 Comdats
= range(header().Comdats
);
257 Symbols
= range(header().Symbols
);
258 Uncommons
= range(header().Uncommons
);
259 DependentLibraries
= range(header().DependentLibraries
);
262 using symbol_range
= iterator_range
<object::content_iterator
<SymbolRef
>>;
264 /// Returns the symbol table for the entire bitcode file.
265 /// The symbols enumerated by this method are ephemeral, but they can be
266 /// copied into an irsymtab::Symbol object.
267 symbol_range
symbols() const;
269 size_t getNumModules() const { return Modules
.size(); }
271 /// Returns a slice of the symbol table for the I'th module in the file.
272 /// The symbols enumerated by this method are ephemeral, but they can be
273 /// copied into an irsymtab::Symbol object.
274 symbol_range
module_symbols(unsigned I
) const;
276 StringRef
getTargetTriple() const { return str(header().TargetTriple
); }
278 /// Returns the source file path specified at compile time.
279 StringRef
getSourceFileName() const { return str(header().SourceFileName
); }
281 /// Returns a table with all the comdats used by this file.
282 std::vector
<StringRef
> getComdatTable() const {
283 std::vector
<StringRef
> ComdatTable
;
284 ComdatTable
.reserve(Comdats
.size());
285 for (auto C
: Comdats
)
286 ComdatTable
.push_back(str(C
.Name
));
290 /// COFF-specific: returns linker options specified in the input file.
291 StringRef
getCOFFLinkerOpts() const { return str(header().COFFLinkerOpts
); }
293 /// Returns dependent library specifiers
294 std::vector
<StringRef
> getDependentLibraries() const {
295 std::vector
<StringRef
> Specifiers
;
296 Specifiers
.reserve(DependentLibraries
.size());
297 for (auto S
: DependentLibraries
) {
298 Specifiers
.push_back(str(S
));
304 /// Ephemeral symbols produced by Reader::symbols() and
305 /// Reader::module_symbols().
306 class Reader::SymbolRef
: public Symbol
{
307 const storage::Symbol
*SymI
, *SymE
;
308 const storage::Uncommon
*UncI
;
315 Name
= R
->str(SymI
->Name
);
316 IRName
= R
->str(SymI
->IRName
);
317 ComdatIndex
= SymI
->ComdatIndex
;
320 if (Flags
& (1 << storage::Symbol::FB_has_uncommon
)) {
321 CommonSize
= UncI
->CommonSize
;
322 CommonAlign
= UncI
->CommonAlign
;
323 COFFWeakExternFallbackName
= R
->str(UncI
->COFFWeakExternFallbackName
);
324 SectionName
= R
->str(UncI
->SectionName
);
326 // Reset this field so it can be queried unconditionally for all symbols.
331 SymbolRef(const storage::Symbol
*SymI
, const storage::Symbol
*SymE
,
332 const storage::Uncommon
*UncI
, const Reader
*R
)
333 : SymI(SymI
), SymE(SymE
), UncI(UncI
), R(R
) {
339 if (Flags
& (1 << storage::Symbol::FB_has_uncommon
))
344 bool operator==(const SymbolRef
&Other
) const { return SymI
== Other
.SymI
; }
347 inline Reader::symbol_range
Reader::symbols() const {
348 return {SymbolRef(Symbols
.begin(), Symbols
.end(), Uncommons
.begin(), this),
349 SymbolRef(Symbols
.end(), Symbols
.end(), nullptr, this)};
352 inline Reader::symbol_range
Reader::module_symbols(unsigned I
) const {
353 const storage::Module
&M
= Modules
[I
];
354 const storage::Symbol
*MBegin
= Symbols
.begin() + M
.Begin
,
355 *MEnd
= Symbols
.begin() + M
.End
;
356 return {SymbolRef(MBegin
, MEnd
, Uncommons
.begin() + M
.UncBegin
, this),
357 SymbolRef(MEnd
, MEnd
, nullptr, this)};
360 /// The contents of the irsymtab in a bitcode file. Any underlying data for the
361 /// irsymtab are owned by Symtab and Strtab.
362 struct FileContents
{
363 SmallVector
<char, 0> Symtab
, Strtab
;
367 /// Reads the contents of a bitcode file, creating its irsymtab if necessary.
368 Expected
<FileContents
> readBitcode(const BitcodeFileContents
&BFC
);
370 } // end namespace irsymtab
371 } // end namespace llvm
373 #endif // LLVM_OBJECT_IRSYMTAB_H