1 //===- LinkerScript.h -------------------------------------------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #ifndef LLD_ELF_LINKER_SCRIPT_H
10 #define LLD_ELF_LINKER_SCRIPT_H
13 #include "InputSection.h"
15 #include "lld/Common/LLVM.h"
16 #include "lld/Common/Strings.h"
17 #include "llvm/ADT/ArrayRef.h"
18 #include "llvm/ADT/DenseMap.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/SmallVector.h"
21 #include "llvm/ADT/StringRef.h"
22 #include "llvm/Support/Compiler.h"
33 class InputSectionBase
;
39 struct SectionClassDesc
;
41 // This represents an r-value in the linker script.
43 ExprValue(SectionBase
*sec
, bool forceAbsolute
, uint64_t val
,
45 : sec(sec
), val(val
), forceAbsolute(forceAbsolute
), loc(loc
.str()) {}
47 ExprValue(uint64_t val
) : ExprValue(nullptr, false, val
, "") {}
49 bool isAbsolute() const { return forceAbsolute
|| sec
== nullptr; }
50 uint64_t getValue() const;
51 uint64_t getSecAddr() const;
52 uint64_t getSectionOffset() const;
54 // If a value is relative to a section, it has a non-null Sec.
58 uint64_t alignment
= 1;
60 // The original st_type if the expression represents a symbol. Any operation
61 // resets type to STT_NOTYPE.
62 uint8_t type
= llvm::ELF::STT_NOTYPE
;
64 // True if this expression is enclosed in ABSOLUTE().
65 // This flag affects the return value of getValue().
68 // Original source location. Used for error messages.
72 // This represents an expression in the linker script.
73 // ScriptParser::readExpr reads an expression and returns an Expr.
74 // Later, we evaluate the expression by calling the function.
75 using Expr
= std::function
<ExprValue()>;
77 // This enum is used to implement linker script SECTIONS command.
78 // https://sourceware.org/binutils/docs/ld/SECTIONS.html#SECTIONS
79 enum SectionsCommandKind
{
80 AssignmentKind
, // . = expr or <sym> = expr
83 ByteKind
, // BYTE(expr), SHORT(expr), LONG(expr) or QUAD(expr)
84 ClassKind
, // CLASS(class_name)
87 struct SectionCommand
{
88 SectionCommand(int k
) : kind(k
) {}
92 // This represents ". = <expr>" or "<symbol> = <expr>".
93 struct SymbolAssignment
: SectionCommand
{
94 SymbolAssignment(StringRef name
, Expr e
, unsigned symOrder
, std::string loc
)
95 : SectionCommand(AssignmentKind
), name(name
), expression(e
),
96 symOrder(symOrder
), location(loc
) {}
98 static bool classof(const SectionCommand
*c
) {
99 return c
->kind
== AssignmentKind
;
102 // The LHS of an expression. Name is either a symbol name or ".".
104 Defined
*sym
= nullptr;
106 // The RHS of an expression.
109 // Command attributes for PROVIDE, HIDDEN and PROVIDE_HIDDEN.
110 bool provide
= false;
113 // This assignment references DATA_SEGMENT_RELRO_END.
114 bool dataSegmentRelroEnd
= false;
118 // Holds file name and line number for error reporting.
119 std::string location
;
121 // A string representation of this command. We use this for -Map.
122 std::string commandString
;
124 // Address of this assignment command.
127 // Size of this assignment command. This is usually 0, but if
128 // you move '.' this may be greater than 0.
132 // Linker scripts allow additional constraints to be put on output sections.
133 // If an output section is marked as ONLY_IF_RO, the section is created
134 // only if its input sections are read-only. Likewise, an output section
135 // with ONLY_IF_RW is created if all input sections are RW.
136 enum class ConstraintKind
{ NoConstraint
, ReadOnly
, ReadWrite
};
138 // This struct is used to represent the location and size of regions of
139 // target memory. Instances of the struct are created by parsing the
141 struct MemoryRegion
{
142 MemoryRegion(StringRef name
, Expr origin
, Expr length
, uint32_t flags
,
143 uint32_t invFlags
, uint32_t negFlags
, uint32_t negInvFlags
)
144 : name(std::string(name
)), origin(origin
), length(length
), flags(flags
),
145 invFlags(invFlags
), negFlags(negFlags
), negInvFlags(negInvFlags
) {}
150 // A section can be assigned to the region if any of these ELF section flags
153 // ... or any of these flags are not set.
154 // For example, the memory region attribute "r" maps to SHF_WRITE.
156 // A section cannot be assigned to the region if any of these ELF section
159 // ... or any of these flags are not set.
160 // For example, the memory region attribute "!r" maps to SHF_WRITE.
161 uint32_t negInvFlags
;
164 uint64_t getOrigin() const { return origin().getValue(); }
165 uint64_t getLength() const { return length().getValue(); }
167 bool compatibleWith(uint32_t secFlags
) const {
168 if ((secFlags
& negFlags
) || (~secFlags
& negInvFlags
))
170 return (secFlags
& flags
) || (~secFlags
& invFlags
);
174 // This struct represents one section match pattern in SECTIONS() command.
175 // It can optionally have negative match pattern for EXCLUDED_FILE command.
176 // Also it may be surrounded with SORT() command, so contains sorting rules.
177 class SectionPattern
{
178 StringMatcher excludedFilePat
;
180 // Cache of the most recent input argument and result of excludesFile().
181 mutable std::optional
<std::pair
<const InputFile
*, bool>> excludesFileCache
;
184 SectionPattern(StringMatcher
&&pat1
, StringMatcher
&&pat2
)
185 : excludedFilePat(pat1
), sectionPat(pat2
),
186 sortOuter(SortSectionPolicy::Default
),
187 sortInner(SortSectionPolicy::Default
) {}
189 bool excludesFile(const InputFile
&file
) const;
191 StringMatcher sectionPat
;
192 SortSectionPolicy sortOuter
;
193 SortSectionPolicy sortInner
;
196 class InputSectionDescription
: public SectionCommand
{
197 SingleStringMatcher filePat
;
199 // Cache of the most recent input argument and result of matchesFile().
200 mutable std::optional
<std::pair
<const InputFile
*, bool>> matchesFileCache
;
203 InputSectionDescription(StringRef filePattern
, uint64_t withFlags
= 0,
204 uint64_t withoutFlags
= 0, StringRef classRef
= {})
205 : SectionCommand(InputSectionKind
), filePat(filePattern
),
206 classRef(classRef
), withFlags(withFlags
), withoutFlags(withoutFlags
) {
207 assert((filePattern
.empty() || classRef
.empty()) &&
208 "file pattern and class reference are mutually exclusive");
211 static bool classof(const SectionCommand
*c
) {
212 return c
->kind
== InputSectionKind
;
215 bool matchesFile(const InputFile
&file
) const;
217 // Input sections that matches at least one of SectionPatterns
218 // will be associated with this InputSectionDescription.
219 SmallVector
<SectionPattern
, 0> sectionPatterns
;
221 // If present, input section matching uses class membership instead of file
222 // and section patterns (mutually exclusive).
225 // Includes InputSections and MergeInputSections. Used temporarily during
226 // assignment of input sections to output sections.
227 SmallVector
<InputSectionBase
*, 0> sectionBases
;
229 // Used after the finalizeInputSections() pass. MergeInputSections have been
230 // merged into MergeSyntheticSections.
231 SmallVector
<InputSection
*, 0> sections
;
233 // Temporary record of synthetic ThunkSection instances and the pass that
234 // they were created in. This is used to insert newly created ThunkSections
235 // into Sections at the end of a createThunks() pass.
236 SmallVector
<std::pair
<ThunkSection
*, uint32_t>, 0> thunkSections
;
238 // SectionPatterns can be filtered with the INPUT_SECTION_FLAGS command.
240 uint64_t withoutFlags
;
243 // Represents BYTE(), SHORT(), LONG(), or QUAD().
244 struct ByteCommand
: SectionCommand
{
245 ByteCommand(Expr e
, unsigned size
, std::string commandString
)
246 : SectionCommand(ByteKind
), commandString(commandString
), expression(e
),
249 static bool classof(const SectionCommand
*c
) { return c
->kind
== ByteKind
; }
251 // Keeps string representing the command. Used for -Map" is perhaps better.
252 std::string commandString
;
256 // This is just an offset of this assignment command in the output section.
259 // Size of this data command.
263 struct InsertCommand
{
264 SmallVector
<StringRef
, 0> names
;
269 // A NOCROSSREFS/NOCROSSREFS_TO command that prohibits references between
270 // certain output sections.
271 struct NoCrossRefCommand
{
272 SmallVector
<StringRef
, 0> outputSections
;
274 // When true, this describes a NOCROSSREFS_TO command that probits references
275 // to the first output section from any of the other sections.
276 bool toFirst
= false;
279 struct PhdrsCommand
{
281 unsigned type
= llvm::ELF::PT_NULL
;
282 bool hasFilehdr
= false;
283 bool hasPhdrs
= false;
284 std::optional
<unsigned> flags
;
285 Expr lmaExpr
= nullptr;
288 class LinkerScript final
{
289 // Temporary state used in processSectionCommands() and assignAddresses()
290 // that must be reinitialized for each call to the above functions, and must
291 // not be used outside of the scope of a call to the above functions.
292 struct AddressState
{
293 AddressState(const LinkerScript
&);
294 OutputSection
*outSec
= nullptr;
295 MemoryRegion
*memRegion
= nullptr;
296 MemoryRegion
*lmaRegion
= nullptr;
297 uint64_t lmaOffset
= 0;
298 uint64_t tbssAddr
= 0;
302 SmallVector
<std::unique_ptr
<OutputDesc
>, 0> descPool
;
303 llvm::DenseMap
<llvm::CachedHashStringRef
, OutputDesc
*> nameToOutputSection
;
305 StringRef
getOutputSectionName(const InputSectionBase
*s
) const;
306 void addSymbol(SymbolAssignment
*cmd
);
307 void declareSymbol(SymbolAssignment
*cmd
);
308 void assignSymbol(SymbolAssignment
*cmd
, bool inSec
);
309 void setDot(Expr e
, const Twine
&loc
, bool inSec
);
310 void expandOutputSection(uint64_t size
);
311 void expandMemoryRegions(uint64_t size
);
313 SmallVector
<InputSectionBase
*, 0>
314 computeInputSections(const InputSectionDescription
*,
315 ArrayRef
<InputSectionBase
*>, const SectionBase
&outCmd
);
317 SmallVector
<InputSectionBase
*, 0> createInputSectionList(OutputSection
&cmd
);
319 void discardSynthetic(OutputSection
&);
321 SmallVector
<size_t, 0> getPhdrIndices(OutputSection
*sec
);
323 std::pair
<MemoryRegion
*, MemoryRegion
*>
324 findMemoryRegion(OutputSection
*sec
, MemoryRegion
*hint
);
326 bool assignOffsets(OutputSection
*sec
);
328 // This captures the local AddressState and makes it accessible
329 // deliberately. This is needed as there are some cases where we cannot just
330 // thread the current state through to a lambda function created by the
332 // This should remain a plain pointer as its lifetime is smaller than
334 AddressState
*state
= nullptr;
336 std::unique_ptr
<OutputSection
> aether
;
341 // OutputSection may be incomplete. Avoid inline ctor/dtor.
342 LinkerScript(Ctx
&ctx
);
345 OutputDesc
*createOutputSection(StringRef name
, StringRef location
);
346 OutputDesc
*getOrCreateOutputSection(StringRef name
);
348 bool hasPhdrsCommands() { return !phdrsCommands
.empty(); }
349 uint64_t getDot() { return dot
; }
350 void discard(InputSectionBase
&s
);
352 ExprValue
getSymbolValue(StringRef name
, const Twine
&loc
);
354 void addOrphanSections();
355 void diagnoseOrphanHandling() const;
356 void diagnoseMissingSGSectionAddress() const;
357 void adjustOutputSections();
358 void adjustSectionsAfterSorting();
360 SmallVector
<std::unique_ptr
<PhdrEntry
>, 0> createPhdrs();
361 bool needsInterpSection();
363 bool shouldKeep(InputSectionBase
*s
);
364 std::pair
<const OutputSection
*, const Defined
*> assignAddresses();
365 bool spillSections();
366 void erasePotentialSpillSections();
367 void allocateHeaders(SmallVector
<std::unique_ptr
<PhdrEntry
>, 0> &phdrs
);
368 void processSectionCommands();
369 void processSymbolAssignments();
370 void declareSymbols();
372 // Used to handle INSERT AFTER statements.
373 void processInsertCommands();
375 // Describe memory region usage.
376 void printMemoryUsage(raw_ostream
&os
);
378 // Record a pending error during an assignAddresses invocation.
379 // assignAddresses is executed more than once. Therefore, lld::error should be
380 // avoided to not report duplicate errors.
381 void recordError(const Twine
&msg
);
383 // Check backward location counter assignment and memory region/LMA overflows.
384 void checkFinalScriptConditions() const;
386 // Add symbols that are referenced in the linker script to the symbol table.
387 // Symbols referenced in a PROVIDE command are only added to the symbol table
388 // if the PROVIDE command actually provides the symbol.
389 // It also adds the symbols referenced by the used PROVIDE symbols to the
390 // linker script referenced symbols list.
391 void addScriptReferencedSymbolsToSymTable();
393 // Returns true if the PROVIDE symbol should be added to the link.
394 // A PROVIDE symbol is added to the link only if it satisfies an
395 // undefined reference.
396 bool shouldAddProvideSym(StringRef symName
);
398 // SECTIONS command list.
399 SmallVector
<SectionCommand
*, 0> sectionCommands
;
401 // PHDRS command list.
402 SmallVector
<PhdrsCommand
, 0> phdrsCommands
;
404 bool hasSectionsCommand
= false;
405 bool seenDataAlign
= false;
406 bool seenRelroEnd
= false;
407 bool errorOnMissingSection
= false;
408 SmallVector
<SmallString
<0>, 0> recordedErrors
;
410 // List of section patterns specified with KEEP commands. They will
411 // be kept even if they are unused and --gc-sections is specified.
412 SmallVector
<InputSectionDescription
*, 0> keptSections
;
414 // A map from memory region name to a memory region descriptor.
415 llvm::MapVector
<llvm::StringRef
, MemoryRegion
*> memoryRegions
;
417 // A list of symbols referenced by the script.
418 SmallVector
<llvm::StringRef
, 0> referencedSymbols
;
420 // Used to implement INSERT [AFTER|BEFORE]. Contains output sections that need
422 SmallVector
<InsertCommand
, 0> insertCommands
;
424 // OutputSections specified by OVERWRITE_SECTIONS.
425 SmallVector
<OutputDesc
*, 0> overwriteSections
;
427 // NOCROSSREFS(_TO) commands.
428 SmallVector
<NoCrossRefCommand
, 0> noCrossRefs
;
430 // Sections that will be warned/errored by --orphan-handling.
431 SmallVector
<const InputSectionBase
*, 0> orphanSections
;
433 // Stores the mapping: PROVIDE symbol -> symbols referred in the PROVIDE
434 // expression. For example, if the PROVIDE command is:
436 // PROVIDE(v = a + b + c);
438 // then provideMap should contain the mapping: 'v' -> ['a', 'b', 'c']
439 llvm::MapVector
<StringRef
, SmallVector
<StringRef
, 0>> provideMap
;
440 // Store defined symbols that should ignore PROVIDE commands.
441 llvm::DenseSet
<Symbol
*> unusedProvideSyms
;
443 // List of potential spill locations (PotentialSpillSection) for an input
445 struct PotentialSpillList
{
447 PotentialSpillSection
*head
;
448 PotentialSpillSection
*tail
;
450 llvm::DenseMap
<InputSectionBase
*, PotentialSpillList
> potentialSpillLists
;
452 // Named lists of input sections that can be collectively referenced in output
453 // section descriptions. Multiple references allow for sections to spill from
454 // one output section to another.
455 llvm::DenseMap
<llvm::CachedHashStringRef
, SectionClassDesc
*> sectionClasses
;
458 } // end namespace lld::elf
460 #endif // LLD_ELF_LINKER_SCRIPT_H