1 //===- LoongArch.cpp ------------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
10 #include "OutputSections.h"
12 #include "SyntheticSections.h"
14 #include "llvm/Support/LEB128.h"
17 using namespace llvm::object
;
18 using namespace llvm::support::endian
;
19 using namespace llvm::ELF
;
21 using namespace lld::elf
;
24 class LoongArch final
: public TargetInfo
{
27 uint32_t calcEFlags() const override
;
28 int64_t getImplicitAddend(const uint8_t *buf
, RelType type
) const override
;
29 void writeGotPlt(uint8_t *buf
, const Symbol
&s
) const override
;
30 void writeIgotPlt(uint8_t *buf
, const Symbol
&s
) const override
;
31 void writePltHeader(uint8_t *buf
) const override
;
32 void writePlt(uint8_t *buf
, const Symbol
&sym
,
33 uint64_t pltEntryAddr
) const override
;
34 RelType
getDynRel(RelType type
) const override
;
35 RelExpr
getRelExpr(RelType type
, const Symbol
&s
,
36 const uint8_t *loc
) const override
;
37 bool usesOnlyLowPageBits(RelType type
) const override
;
38 void relocate(uint8_t *loc
, const Relocation
&rel
,
39 uint64_t val
) const override
;
40 bool relaxOnce(int pass
) const override
;
41 void finalizeRelax(int passes
) const override
;
43 } // end anonymous namespace
55 PCADDU12I
= 0x1c000000,
72 // Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences
73 // like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i`
74 // produces a PC-relative intermediate value with the lowest 12 bits zeroed (the
75 // "page") for the next instruction to add in the "page offset". (`pcalau12i`
76 // stands for something like "PC ALigned Add Upper that starts from the 12th
79 // Here a "page" is in fact just another way to refer to the 12-bit range
80 // allowed by the immediate field of the addi/ld/st instructions, and not
81 // related to the system or the kernel's actual page size. The semantics happen
82 // to match the AArch64 `adrp`, so the concept of "page" is borrowed here.
83 static uint64_t getLoongArchPage(uint64_t p
) {
84 return p
& ~static_cast<uint64_t>(0xfff);
87 static uint32_t lo12(uint32_t val
) { return val
& 0xfff; }
89 // Calculate the adjusted page delta between dest and PC.
90 uint64_t elf::getLoongArchPageDelta(uint64_t dest
, uint64_t pc
, RelType type
) {
91 // Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d
92 // + lu52i.d`, they must be adjacent so that we can infer the PC of
93 // `pcalau12i` when calculating the page delta for the other two instructions
94 // (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit
95 // complicated. Just use psABI recommended algorithm.
96 uint64_t pcalau12i_pc
;
98 case R_LARCH_PCALA64_LO20
:
99 case R_LARCH_GOT64_PC_LO20
:
100 case R_LARCH_TLS_IE64_PC_LO20
:
101 case R_LARCH_TLS_DESC64_PC_LO20
:
102 pcalau12i_pc
= pc
- 8;
104 case R_LARCH_PCALA64_HI12
:
105 case R_LARCH_GOT64_PC_HI12
:
106 case R_LARCH_TLS_IE64_PC_HI12
:
107 case R_LARCH_TLS_DESC64_PC_HI12
:
108 pcalau12i_pc
= pc
- 12;
114 uint64_t result
= getLoongArchPage(dest
) - getLoongArchPage(pcalau12i_pc
);
116 result
+= 0x1000 - 0x1'0000'0000;
117 if (result
& 0x8000'0000)
118 result
+= 0x1'0000'0000;
122 static uint32_t hi20(uint32_t val
) { return (val
+ 0x800) >> 12; }
124 static uint32_t insn(uint32_t op
, uint32_t d
, uint32_t j
, uint32_t k
) {
125 return op
| d
| (j
<< 5) | (k
<< 10);
128 // Extract bits v[begin:end], where range is inclusive.
129 static uint32_t extractBits(uint64_t v
, uint32_t begin
, uint32_t end
) {
130 return begin
== 63 ? v
>> end
: (v
& ((1ULL << (begin
+ 1)) - 1)) >> end
;
133 static uint32_t setD5k16(uint32_t insn
, uint32_t imm
) {
134 uint32_t immLo
= extractBits(imm
, 15, 0);
135 uint32_t immHi
= extractBits(imm
, 20, 16);
136 return (insn
& 0xfc0003e0) | (immLo
<< 10) | immHi
;
139 static uint32_t setD10k16(uint32_t insn
, uint32_t imm
) {
140 uint32_t immLo
= extractBits(imm
, 15, 0);
141 uint32_t immHi
= extractBits(imm
, 25, 16);
142 return (insn
& 0xfc000000) | (immLo
<< 10) | immHi
;
145 static uint32_t setJ20(uint32_t insn
, uint32_t imm
) {
146 return (insn
& 0xfe00001f) | (extractBits(imm
, 19, 0) << 5);
149 static uint32_t setK12(uint32_t insn
, uint32_t imm
) {
150 return (insn
& 0xffc003ff) | (extractBits(imm
, 11, 0) << 10);
153 static uint32_t setK16(uint32_t insn
, uint32_t imm
) {
154 return (insn
& 0xfc0003ff) | (extractBits(imm
, 15, 0) << 10);
157 static bool isJirl(uint32_t insn
) {
158 return (insn
& 0xfc000000) == JIRL
;
161 static void handleUleb128(uint8_t *loc
, uint64_t val
) {
162 const uint32_t maxcount
= 1 + 64 / 7;
164 const char *error
= nullptr;
165 uint64_t orig
= decodeULEB128(loc
, &count
, nullptr, &error
);
166 if (count
> maxcount
|| (count
== maxcount
&& error
))
167 errorOrWarn(getErrorLocation(loc
) + "extra space for uleb128");
168 uint64_t mask
= count
< maxcount
? (1ULL << 7 * count
) - 1 : -1ULL;
169 encodeULEB128((orig
+ val
) & mask
, loc
, count
);
172 LoongArch::LoongArch() {
173 // The LoongArch ISA itself does not have a limit on page sizes. According to
174 // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is
175 // 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to
177 // However, practically the maximum usable page size is constrained by the
178 // kernel implementation, and 64KiB is the biggest non-huge page size
179 // supported by Linux as of v6.4. The most widespread page size in use,
181 defaultCommonPageSize
= 16384;
182 defaultMaxPageSize
= 65536;
183 write32le(trapInstr
.data(), BREAK
); // break 0
185 copyRel
= R_LARCH_COPY
;
186 pltRel
= R_LARCH_JUMP_SLOT
;
187 relativeRel
= R_LARCH_RELATIVE
;
188 iRelativeRel
= R_LARCH_IRELATIVE
;
191 symbolicRel
= R_LARCH_64
;
192 tlsModuleIndexRel
= R_LARCH_TLS_DTPMOD64
;
193 tlsOffsetRel
= R_LARCH_TLS_DTPREL64
;
194 tlsGotRel
= R_LARCH_TLS_TPREL64
;
195 tlsDescRel
= R_LARCH_TLS_DESC64
;
197 symbolicRel
= R_LARCH_32
;
198 tlsModuleIndexRel
= R_LARCH_TLS_DTPMOD32
;
199 tlsOffsetRel
= R_LARCH_TLS_DTPREL32
;
200 tlsGotRel
= R_LARCH_TLS_TPREL32
;
201 tlsDescRel
= R_LARCH_TLS_DESC32
;
204 gotRel
= symbolicRel
;
206 // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map
207 gotPltHeaderEntriesNum
= 2;
214 static uint32_t getEFlags(const InputFile
*f
) {
216 return cast
<ObjFile
<ELF64LE
>>(f
)->getObj().getHeader().e_flags
;
217 return cast
<ObjFile
<ELF32LE
>>(f
)->getObj().getHeader().e_flags
;
220 static bool inputFileHasCode(const InputFile
*f
) {
221 for (const auto *sec
: f
->getSections())
222 if (sec
&& sec
->flags
& SHF_EXECINSTR
)
228 uint32_t LoongArch::calcEFlags() const {
229 // If there are only binary input files (from -b binary), use a
230 // value of 0 for the ELF header flags.
231 if (ctx
.objectFiles
.empty())
235 const InputFile
*targetFile
;
236 for (const InputFile
*f
: ctx
.objectFiles
) {
237 // Do not enforce ABI compatibility if the input file does not contain code.
238 // This is useful for allowing linkage with data-only object files produced
239 // with tools like objcopy, that have zero e_flags.
240 if (!inputFileHasCode(f
))
243 // Take the first non-zero e_flags as the reference.
244 uint32_t flags
= getEFlags(f
);
245 if (target
== 0 && flags
!= 0) {
250 if ((flags
& EF_LOONGARCH_ABI_MODIFIER_MASK
) !=
251 (target
& EF_LOONGARCH_ABI_MODIFIER_MASK
))
253 ": cannot link object files with different ABI from " +
254 toString(targetFile
));
256 // We cannot process psABI v1.x / object ABI v0 files (containing stack
257 // relocations), unlike ld.bfd.
259 // Instead of blindly accepting every v0 object and only failing at
260 // relocation processing time, just disallow interlink altogether. We
261 // don't expect significant usage of object ABI v0 in the wild (the old
262 // world may continue using object ABI v0 for a while, but as it's not
263 // binary-compatible with the upstream i.e. new-world ecosystem, it's not
264 // being considered here).
266 // There are briefly some new-world systems with object ABI v0 binaries too.
267 // It is because these systems were built before the new ABI was finalized.
268 // These are not supported either due to the extremely small number of them,
269 // and the few impacted users are advised to simply rebuild world or
270 // reinstall a recent system.
271 if ((flags
& EF_LOONGARCH_OBJABI_MASK
) != EF_LOONGARCH_OBJABI_V1
)
272 error(toString(f
) + ": unsupported object file ABI version");
278 int64_t LoongArch::getImplicitAddend(const uint8_t *buf
, RelType type
) const {
281 internalLinkerError(getErrorLocation(buf
),
282 "cannot read addend for relocation " + toString(type
));
285 case R_LARCH_TLS_DTPMOD32
:
286 case R_LARCH_TLS_DTPREL32
:
287 case R_LARCH_TLS_TPREL32
:
288 return SignExtend64
<32>(read32le(buf
));
290 case R_LARCH_TLS_DTPMOD64
:
291 case R_LARCH_TLS_DTPREL64
:
292 case R_LARCH_TLS_TPREL64
:
293 return read64le(buf
);
294 case R_LARCH_RELATIVE
:
295 case R_LARCH_IRELATIVE
:
296 return config
->is64
? read64le(buf
) : read32le(buf
);
298 case R_LARCH_JUMP_SLOT
:
299 // These relocations are defined as not having an implicit addend.
301 case R_LARCH_TLS_DESC32
:
302 return read32le(buf
+ 4);
303 case R_LARCH_TLS_DESC64
:
304 return read64le(buf
+ 8);
308 void LoongArch::writeGotPlt(uint8_t *buf
, const Symbol
&s
) const {
310 write64le(buf
, in
.plt
->getVA());
312 write32le(buf
, in
.plt
->getVA());
315 void LoongArch::writeIgotPlt(uint8_t *buf
, const Symbol
&s
) const {
316 if (config
->writeAddends
) {
318 write64le(buf
, s
.getVA());
320 write32le(buf
, s
.getVA());
324 void LoongArch::writePltHeader(uint8_t *buf
) const {
325 // The LoongArch PLT is currently structured just like that of RISCV.
326 // Annoyingly, this means the PLT is still using `pcaddu12i` to perform
327 // PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`),
328 // in contrast to the AArch64-like page-offset scheme with `pcalau12i` that
329 // is used everywhere else involving PC-relative operations in the LoongArch
332 // The `pcrel_{hi20,lo12}` operators are illustrative only and not really
333 // supported by LoongArch assemblers.
335 // pcaddu12i $t2, %pcrel_hi20(.got.plt)
336 // sub.[wd] $t1, $t1, $t3
337 // ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve
338 // addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0]
339 // addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt)
340 // srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0]
341 // ld.[wd] $t0, $t0, Wordsize ; t0 = link_map
343 uint32_t offset
= in
.gotPlt
->getVA() - in
.plt
->getVA();
344 uint32_t sub
= config
->is64
? SUB_D
: SUB_W
;
345 uint32_t ld
= config
->is64
? LD_D
: LD_W
;
346 uint32_t addi
= config
->is64
? ADDI_D
: ADDI_W
;
347 uint32_t srli
= config
->is64
? SRLI_D
: SRLI_W
;
348 write32le(buf
+ 0, insn(PCADDU12I
, R_T2
, hi20(offset
), 0));
349 write32le(buf
+ 4, insn(sub
, R_T1
, R_T1
, R_T3
));
350 write32le(buf
+ 8, insn(ld
, R_T3
, R_T2
, lo12(offset
)));
351 write32le(buf
+ 12, insn(addi
, R_T1
, R_T1
, lo12(-target
->pltHeaderSize
- 12)));
352 write32le(buf
+ 16, insn(addi
, R_T0
, R_T2
, lo12(offset
)));
353 write32le(buf
+ 20, insn(srli
, R_T1
, R_T1
, config
->is64
? 1 : 2));
354 write32le(buf
+ 24, insn(ld
, R_T0
, R_T0
, config
->wordsize
));
355 write32le(buf
+ 28, insn(JIRL
, R_ZERO
, R_T3
, 0));
358 void LoongArch::writePlt(uint8_t *buf
, const Symbol
&sym
,
359 uint64_t pltEntryAddr
) const {
360 // See the comment in writePltHeader for reason why pcaddu12i is used instead
361 // of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days.
363 // pcaddu12i $t3, %pcrel_hi20(f@.got.plt)
364 // ld.[wd] $t3, $t3, %pcrel_lo12(f@.got.plt)
367 uint32_t offset
= sym
.getGotPltVA() - pltEntryAddr
;
368 write32le(buf
+ 0, insn(PCADDU12I
, R_T3
, hi20(offset
), 0));
370 insn(config
->is64
? LD_D
: LD_W
, R_T3
, R_T3
, lo12(offset
)));
371 write32le(buf
+ 8, insn(JIRL
, R_T1
, R_T3
, 0));
372 write32le(buf
+ 12, insn(ANDI
, R_ZERO
, R_ZERO
, 0));
375 RelType
LoongArch::getDynRel(RelType type
) const {
376 return type
== target
->symbolicRel
? type
377 : static_cast<RelType
>(R_LARCH_NONE
);
380 RelExpr
LoongArch::getRelExpr(const RelType type
, const Symbol
&s
,
381 const uint8_t *loc
) const {
384 case R_LARCH_MARK_LA
:
385 case R_LARCH_MARK_PCREL
:
389 case R_LARCH_ABS_HI20
:
390 case R_LARCH_ABS_LO12
:
391 case R_LARCH_ABS64_LO20
:
392 case R_LARCH_ABS64_HI12
:
394 case R_LARCH_PCALA_LO12
:
395 // We could just R_ABS, but the JIRL instruction reuses the relocation type
396 // for a different purpose. The questionable usage is part of glibc 2.37
397 // libc_nonshared.a [1], which is linked into user programs, so we have to
398 // work around it for a while, even if a new relocation type may be
399 // introduced in the future [2].
401 // [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a
402 // [2]: https://github.com/loongson/la-abi-specs/pull/3
403 return isJirl(read32le(loc
)) ? R_PLT
: R_ABS
;
404 case R_LARCH_TLS_DTPREL32
:
405 case R_LARCH_TLS_DTPREL64
:
407 case R_LARCH_TLS_TPREL32
:
408 case R_LARCH_TLS_TPREL64
:
409 case R_LARCH_TLS_LE_HI20
:
410 case R_LARCH_TLS_LE_LO12
:
411 case R_LARCH_TLS_LE64_LO20
:
412 case R_LARCH_TLS_LE64_HI12
:
419 case R_LARCH_ADD_ULEB128
:
425 case R_LARCH_SUB_ULEB128
:
426 // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse
427 // the RelExpr to avoid code duplication.
429 case R_LARCH_32_PCREL
:
430 case R_LARCH_64_PCREL
:
431 case R_LARCH_PCREL20_S2
:
438 case R_LARCH_GOT_PC_HI20
:
439 case R_LARCH_GOT64_PC_LO20
:
440 case R_LARCH_GOT64_PC_HI12
:
441 case R_LARCH_TLS_IE_PC_HI20
:
442 case R_LARCH_TLS_IE64_PC_LO20
:
443 case R_LARCH_TLS_IE64_PC_HI12
:
444 return R_LOONGARCH_GOT_PAGE_PC
;
445 case R_LARCH_GOT_PC_LO12
:
446 case R_LARCH_TLS_IE_PC_LO12
:
447 return R_LOONGARCH_GOT
;
448 case R_LARCH_TLS_LD_PC_HI20
:
449 case R_LARCH_TLS_GD_PC_HI20
:
450 return R_LOONGARCH_TLSGD_PAGE_PC
;
451 case R_LARCH_PCALA_HI20
:
452 // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT
453 // anyway so why waste time checking only to get everything relaxed back to
456 // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want
457 // both the HI20 and LO12 to potentially refer to the PLT. But in reality
458 // the HI20 reloc appears earlier, and the relocs don't contain enough
459 // information to let us properly resolve semantics per symbol.
460 // Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20
461 // relocs, hence it is nearly impossible to 100% accurately determine each
462 // HI20's "flavor" without taking big performance hits, in the presence of
463 // edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far
464 // apart that relationship is not certain anymore), and programmer mistakes
465 // (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3).
467 // Ideally we would scan in an extra pass for all LO12s on JIRL, then mark
468 // every HI20 reloc referring to the same symbol differently; this is not
469 // feasible with the current function signature of getRelExpr that doesn't
470 // allow for such inter-pass state.
472 // So, unfortunately we have to again workaround this quirk the same way as
473 // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only
474 // relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later
476 return R_LOONGARCH_PLT_PAGE_PC
;
477 case R_LARCH_PCALA64_LO20
:
478 case R_LARCH_PCALA64_HI12
:
479 return R_LOONGARCH_PAGE_PC
;
480 case R_LARCH_GOT_HI20
:
481 case R_LARCH_GOT_LO12
:
482 case R_LARCH_GOT64_LO20
:
483 case R_LARCH_GOT64_HI12
:
484 case R_LARCH_TLS_IE_HI20
:
485 case R_LARCH_TLS_IE_LO12
:
486 case R_LARCH_TLS_IE64_LO20
:
487 case R_LARCH_TLS_IE64_HI12
:
489 case R_LARCH_TLS_LD_HI20
:
491 case R_LARCH_TLS_GD_HI20
:
494 return config
->relax
? R_RELAX_HINT
: R_NONE
;
497 case R_LARCH_TLS_DESC_PC_HI20
:
498 case R_LARCH_TLS_DESC64_PC_LO20
:
499 case R_LARCH_TLS_DESC64_PC_HI12
:
500 return R_LOONGARCH_TLSDESC_PAGE_PC
;
501 case R_LARCH_TLS_DESC_PC_LO12
:
502 case R_LARCH_TLS_DESC_LD
:
503 case R_LARCH_TLS_DESC_HI20
:
504 case R_LARCH_TLS_DESC_LO12
:
505 case R_LARCH_TLS_DESC64_LO20
:
506 case R_LARCH_TLS_DESC64_HI12
:
508 case R_LARCH_TLS_DESC_CALL
:
509 return R_TLSDESC_CALL
;
511 // Other known relocs that are explicitly unimplemented:
513 // - psABI v1 relocs that need a stateful stack machine to work, and not
514 // required when implementing psABI v2;
515 // - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the
516 // two GNU vtable-related relocs).
518 // [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51
520 error(getErrorLocation(loc
) + "unknown relocation (" + Twine(type
) +
521 ") against symbol " + toString(s
));
526 bool LoongArch::usesOnlyLowPageBits(RelType type
) const {
530 case R_LARCH_PCALA_LO12
:
531 case R_LARCH_GOT_LO12
:
532 case R_LARCH_GOT_PC_LO12
:
533 case R_LARCH_TLS_IE_PC_LO12
:
534 case R_LARCH_TLS_DESC_LO12
:
535 case R_LARCH_TLS_DESC_PC_LO12
:
540 void LoongArch::relocate(uint8_t *loc
, const Relocation
&rel
,
541 uint64_t val
) const {
543 case R_LARCH_32_PCREL
:
544 checkInt(loc
, val
, 32, rel
);
547 case R_LARCH_TLS_DTPREL32
:
551 case R_LARCH_TLS_DTPREL64
:
552 case R_LARCH_64_PCREL
:
556 case R_LARCH_PCREL20_S2
:
557 checkInt(loc
, val
, 22, rel
);
558 checkAlignment(loc
, val
, 4, rel
);
559 write32le(loc
, setJ20(read32le(loc
), val
>> 2));
563 checkInt(loc
, val
, 18, rel
);
564 checkAlignment(loc
, val
, 4, rel
);
565 write32le(loc
, setK16(read32le(loc
), val
>> 2));
569 checkInt(loc
, val
, 23, rel
);
570 checkAlignment(loc
, val
, 4, rel
);
571 write32le(loc
, setD5k16(read32le(loc
), val
>> 2));
575 checkInt(loc
, val
, 28, rel
);
576 checkAlignment(loc
, val
, 4, rel
);
577 write32le(loc
, setD10k16(read32le(loc
), val
>> 2));
580 case R_LARCH_CALL36
: {
581 // This relocation is designed for adjacent pcaddu18i+jirl pairs that
582 // are patched in one time. Because of sign extension of these insns'
583 // immediate fields, the relocation range is [-128G - 0x20000, +128G -
584 // 0x20000) (of course must be 4-byte aligned).
585 if (((int64_t)val
+ 0x20000) != llvm::SignExtend64(val
+ 0x20000, 38))
586 reportRangeError(loc
, rel
, Twine(val
), llvm::minIntN(38) - 0x20000,
587 llvm::maxIntN(38) - 0x20000);
588 checkAlignment(loc
, val
, 4, rel
);
589 // Since jirl performs sign extension on the offset immediate, adds (1<<17)
590 // to original val to get the correct hi20.
591 uint32_t hi20
= extractBits(val
+ (1 << 17), 37, 18);
592 // Despite the name, the lower part is actually 18 bits with 4-byte aligned.
593 uint32_t lo16
= extractBits(val
, 17, 2);
594 write32le(loc
, setJ20(read32le(loc
), hi20
));
595 write32le(loc
+ 4, setK16(read32le(loc
+ 4), lo16
));
599 // Relocs intended for `addi`, `ld` or `st`.
600 case R_LARCH_PCALA_LO12
:
601 // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12
602 // on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes
603 // removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly
604 // its immediate slot width is different too (16, not 12).
605 // In this case, process like an R_LARCH_B16, but without overflow checking
606 // and only taking the value's lowest 12 bits.
607 if (isJirl(read32le(loc
))) {
608 checkAlignment(loc
, val
, 4, rel
);
609 val
= SignExtend64
<12>(val
);
610 write32le(loc
, setK16(read32le(loc
), val
>> 2));
614 case R_LARCH_ABS_LO12
:
615 case R_LARCH_GOT_PC_LO12
:
616 case R_LARCH_GOT_LO12
:
617 case R_LARCH_TLS_LE_LO12
:
618 case R_LARCH_TLS_IE_PC_LO12
:
619 case R_LARCH_TLS_IE_LO12
:
620 case R_LARCH_TLS_DESC_PC_LO12
:
621 case R_LARCH_TLS_DESC_LO12
:
622 write32le(loc
, setK12(read32le(loc
), extractBits(val
, 11, 0)));
625 // Relocs intended for `lu12i.w` or `pcalau12i`.
626 case R_LARCH_ABS_HI20
:
627 case R_LARCH_PCALA_HI20
:
628 case R_LARCH_GOT_PC_HI20
:
629 case R_LARCH_GOT_HI20
:
630 case R_LARCH_TLS_LE_HI20
:
631 case R_LARCH_TLS_IE_PC_HI20
:
632 case R_LARCH_TLS_IE_HI20
:
633 case R_LARCH_TLS_LD_PC_HI20
:
634 case R_LARCH_TLS_LD_HI20
:
635 case R_LARCH_TLS_GD_PC_HI20
:
636 case R_LARCH_TLS_GD_HI20
:
637 case R_LARCH_TLS_DESC_PC_HI20
:
638 case R_LARCH_TLS_DESC_HI20
:
639 write32le(loc
, setJ20(read32le(loc
), extractBits(val
, 31, 12)));
642 // Relocs intended for `lu32i.d`.
643 case R_LARCH_ABS64_LO20
:
644 case R_LARCH_PCALA64_LO20
:
645 case R_LARCH_GOT64_PC_LO20
:
646 case R_LARCH_GOT64_LO20
:
647 case R_LARCH_TLS_LE64_LO20
:
648 case R_LARCH_TLS_IE64_PC_LO20
:
649 case R_LARCH_TLS_IE64_LO20
:
650 case R_LARCH_TLS_DESC64_PC_LO20
:
651 case R_LARCH_TLS_DESC64_LO20
:
652 write32le(loc
, setJ20(read32le(loc
), extractBits(val
, 51, 32)));
655 // Relocs intended for `lu52i.d`.
656 case R_LARCH_ABS64_HI12
:
657 case R_LARCH_PCALA64_HI12
:
658 case R_LARCH_GOT64_PC_HI12
:
659 case R_LARCH_GOT64_HI12
:
660 case R_LARCH_TLS_LE64_HI12
:
661 case R_LARCH_TLS_IE64_PC_HI12
:
662 case R_LARCH_TLS_IE64_HI12
:
663 case R_LARCH_TLS_DESC64_PC_HI12
:
664 case R_LARCH_TLS_DESC64_HI12
:
665 write32le(loc
, setK12(read32le(loc
), extractBits(val
, 63, 52)));
669 *loc
= (*loc
& 0xc0) | ((*loc
+ val
) & 0x3f);
675 write16le(loc
, read16le(loc
) + val
);
678 write32le(loc
, read32le(loc
) + val
);
681 write64le(loc
, read64le(loc
) + val
);
683 case R_LARCH_ADD_ULEB128
:
684 handleUleb128(loc
, val
);
687 *loc
= (*loc
& 0xc0) | ((*loc
- val
) & 0x3f);
693 write16le(loc
, read16le(loc
) - val
);
696 write32le(loc
, read32le(loc
) - val
);
699 write64le(loc
, read64le(loc
) - val
);
701 case R_LARCH_SUB_ULEB128
:
702 handleUleb128(loc
, -val
);
705 case R_LARCH_MARK_LA
:
706 case R_LARCH_MARK_PCREL
:
711 return; // Ignored (for now)
713 case R_LARCH_TLS_DESC_LD
:
714 return; // nothing to do.
715 case R_LARCH_TLS_DESC32
:
716 write32le(loc
+ 4, val
);
718 case R_LARCH_TLS_DESC64
:
719 write64le(loc
+ 8, val
);
723 llvm_unreachable("unknown relocation");
727 static bool relax(InputSection
&sec
) {
728 const uint64_t secAddr
= sec
.getVA();
729 const MutableArrayRef
<Relocation
> relocs
= sec
.relocs();
730 auto &aux
= *sec
.relaxAux
;
731 bool changed
= false;
732 ArrayRef
<SymbolAnchor
> sa
= ArrayRef(aux
.anchors
);
735 std::fill_n(aux
.relocTypes
.get(), relocs
.size(), R_LARCH_NONE
);
737 for (auto [i
, r
] : llvm::enumerate(relocs
)) {
738 const uint64_t loc
= secAddr
+ r
.offset
- delta
;
739 uint32_t &cur
= aux
.relocDeltas
[i
], remove
= 0;
741 case R_LARCH_ALIGN
: {
742 const uint64_t addend
=
743 r
.sym
->isUndefined() ? Log2_64(r
.addend
) + 1 : r
.addend
;
744 const uint64_t allBytes
= (1ULL << (addend
& 0xff)) - 4;
745 const uint64_t align
= 1ULL << (addend
& 0xff);
746 const uint64_t maxBytes
= addend
>> 8;
747 const uint64_t off
= loc
& (align
- 1);
748 const uint64_t curBytes
= off
== 0 ? 0 : align
- off
;
749 // All bytes beyond the alignment boundary should be removed.
750 // If emit bytes more than max bytes to emit, remove all.
751 if (maxBytes
!= 0 && curBytes
> maxBytes
)
754 remove
= allBytes
- curBytes
;
755 // If we can't satisfy this alignment, we've found a bad input.
756 if (LLVM_UNLIKELY(static_cast<int32_t>(remove
) < 0)) {
757 errorOrWarn(getErrorLocation((const uint8_t *)loc
) +
758 "insufficient padding bytes for " + lld::toString(r
.type
) +
759 ": " + Twine(allBytes
) + " bytes available for " +
760 "requested alignment of " + Twine(align
) + " bytes");
767 // For all anchors whose offsets are <= r.offset, they are preceded by
768 // the previous relocation whose `relocDeltas` value equals `delta`.
769 // Decrease their st_value and update their st_size.
770 for (; sa
.size() && sa
[0].offset
<= r
.offset
; sa
= sa
.slice(1)) {
772 sa
[0].d
->size
= sa
[0].offset
- delta
- sa
[0].d
->value
;
774 sa
[0].d
->value
= sa
[0].offset
- delta
;
783 for (const SymbolAnchor
&a
: sa
) {
785 a
.d
->size
= a
.offset
- delta
- a
.d
->value
;
787 a
.d
->value
= a
.offset
- delta
;
789 // Inform assignAddresses that the size has changed.
790 if (!isUInt
<32>(delta
))
791 fatal("section size decrease is too large: " + Twine(delta
));
792 sec
.bytesDropped
= delta
;
796 // When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
797 // the absence of a linker script. For call and load/store R_LARCH_RELAX, code
798 // shrinkage may reduce displacement and make more relocations eligible for
799 // relaxation. Code shrinkage may increase displacement to a call/load/store
800 // target at a higher fixed address, invalidating an earlier relaxation. Any
801 // change in section sizes can have cascading effect and require another
803 bool LoongArch::relaxOnce(int pass
) const {
804 if (config
->relocatable
)
810 SmallVector
<InputSection
*, 0> storage
;
811 bool changed
= false;
812 for (OutputSection
*osec
: outputSections
) {
813 if (!(osec
->flags
& SHF_EXECINSTR
))
815 for (InputSection
*sec
: getInputSections(*osec
, storage
))
816 changed
|= relax(*sec
);
821 void LoongArch::finalizeRelax(int passes
) const {
822 log("relaxation passes: " + Twine(passes
));
823 SmallVector
<InputSection
*, 0> storage
;
824 for (OutputSection
*osec
: outputSections
) {
825 if (!(osec
->flags
& SHF_EXECINSTR
))
827 for (InputSection
*sec
: getInputSections(*osec
, storage
)) {
828 RelaxAux
&aux
= *sec
->relaxAux
;
829 if (!aux
.relocDeltas
)
832 MutableArrayRef
<Relocation
> rels
= sec
->relocs();
833 ArrayRef
<uint8_t> old
= sec
->content();
834 size_t newSize
= old
.size() - aux
.relocDeltas
[rels
.size() - 1];
835 uint8_t *p
= context().bAlloc
.Allocate
<uint8_t>(newSize
);
840 sec
->bytesDropped
= 0;
842 // Update section content: remove NOPs for R_LARCH_ALIGN and rewrite
843 // instructions for relaxed relocations.
844 for (size_t i
= 0, e
= rels
.size(); i
!= e
; ++i
) {
845 uint32_t remove
= aux
.relocDeltas
[i
] - delta
;
846 delta
= aux
.relocDeltas
[i
];
847 if (remove
== 0 && aux
.relocTypes
[i
] == R_LARCH_NONE
)
850 // Copy from last location to the current relocated location.
851 const Relocation
&r
= rels
[i
];
852 uint64_t size
= r
.offset
- offset
;
853 memcpy(p
, old
.data() + offset
, size
);
855 offset
= r
.offset
+ remove
;
857 memcpy(p
, old
.data() + offset
, old
.size() - offset
);
859 // Subtract the previous relocDeltas value from the relocation offset.
860 // For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease
861 // their r_offset by the same delta.
863 for (size_t i
= 0, e
= rels
.size(); i
!= e
;) {
864 uint64_t cur
= rels
[i
].offset
;
866 rels
[i
].offset
-= delta
;
867 if (aux
.relocTypes
[i
] != R_LARCH_NONE
)
868 rels
[i
].type
= aux
.relocTypes
[i
];
869 } while (++i
!= e
&& rels
[i
].offset
== cur
);
870 delta
= aux
.relocDeltas
[i
- 1];
876 TargetInfo
*elf::getLoongArchTargetInfo() {
877 static LoongArch target
;