[NFC][opt] Improve help message (#97805)
[llvm-project.git] / lld / ELF / Arch / LoongArch.cpp
blobc6ee73f23d471ac1bc46471cd8dda79e40a34030
1 //===- LoongArch.cpp ------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
10 #include "OutputSections.h"
11 #include "Symbols.h"
12 #include "SyntheticSections.h"
13 #include "Target.h"
14 #include "llvm/Support/LEB128.h"
16 using namespace llvm;
17 using namespace llvm::object;
18 using namespace llvm::support::endian;
19 using namespace llvm::ELF;
20 using namespace lld;
21 using namespace lld::elf;
23 namespace {
24 class LoongArch final : public TargetInfo {
25 public:
26 LoongArch();
27 uint32_t calcEFlags() const override;
28 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
29 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
30 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
31 void writePltHeader(uint8_t *buf) const override;
32 void writePlt(uint8_t *buf, const Symbol &sym,
33 uint64_t pltEntryAddr) const override;
34 RelType getDynRel(RelType type) const override;
35 RelExpr getRelExpr(RelType type, const Symbol &s,
36 const uint8_t *loc) const override;
37 bool usesOnlyLowPageBits(RelType type) const override;
38 void relocate(uint8_t *loc, const Relocation &rel,
39 uint64_t val) const override;
40 bool relaxOnce(int pass) const override;
41 void finalizeRelax(int passes) const override;
43 } // end anonymous namespace
45 namespace {
46 enum Op {
47 SUB_W = 0x00110000,
48 SUB_D = 0x00118000,
49 BREAK = 0x002a0000,
50 SRLI_W = 0x00448000,
51 SRLI_D = 0x00450000,
52 ADDI_W = 0x02800000,
53 ADDI_D = 0x02c00000,
54 ANDI = 0x03400000,
55 PCADDU12I = 0x1c000000,
56 LD_W = 0x28800000,
57 LD_D = 0x28c00000,
58 JIRL = 0x4c000000,
61 enum Reg {
62 R_ZERO = 0,
63 R_RA = 1,
64 R_TP = 2,
65 R_T0 = 12,
66 R_T1 = 13,
67 R_T2 = 14,
68 R_T3 = 15,
70 } // namespace
72 // Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences
73 // like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i`
74 // produces a PC-relative intermediate value with the lowest 12 bits zeroed (the
75 // "page") for the next instruction to add in the "page offset". (`pcalau12i`
76 // stands for something like "PC ALigned Add Upper that starts from the 12th
77 // bit, Immediate".)
79 // Here a "page" is in fact just another way to refer to the 12-bit range
80 // allowed by the immediate field of the addi/ld/st instructions, and not
81 // related to the system or the kernel's actual page size. The semantics happen
82 // to match the AArch64 `adrp`, so the concept of "page" is borrowed here.
83 static uint64_t getLoongArchPage(uint64_t p) {
84 return p & ~static_cast<uint64_t>(0xfff);
87 static uint32_t lo12(uint32_t val) { return val & 0xfff; }
89 // Calculate the adjusted page delta between dest and PC.
90 uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc, RelType type) {
91 // Note that if the sequence being relocated is `pcalau12i + addi.d + lu32i.d
92 // + lu52i.d`, they must be adjacent so that we can infer the PC of
93 // `pcalau12i` when calculating the page delta for the other two instructions
94 // (lu32i.d and lu52i.d). Compensate all the sign-extensions is a bit
95 // complicated. Just use psABI recommended algorithm.
96 uint64_t pcalau12i_pc;
97 switch (type) {
98 case R_LARCH_PCALA64_LO20:
99 case R_LARCH_GOT64_PC_LO20:
100 case R_LARCH_TLS_IE64_PC_LO20:
101 case R_LARCH_TLS_DESC64_PC_LO20:
102 pcalau12i_pc = pc - 8;
103 break;
104 case R_LARCH_PCALA64_HI12:
105 case R_LARCH_GOT64_PC_HI12:
106 case R_LARCH_TLS_IE64_PC_HI12:
107 case R_LARCH_TLS_DESC64_PC_HI12:
108 pcalau12i_pc = pc - 12;
109 break;
110 default:
111 pcalau12i_pc = pc;
112 break;
114 uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pcalau12i_pc);
115 if (dest & 0x800)
116 result += 0x1000 - 0x1'0000'0000;
117 if (result & 0x8000'0000)
118 result += 0x1'0000'0000;
119 return result;
122 static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
124 static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) {
125 return op | d | (j << 5) | (k << 10);
128 // Extract bits v[begin:end], where range is inclusive.
129 static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
130 return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end;
133 static uint32_t setD5k16(uint32_t insn, uint32_t imm) {
134 uint32_t immLo = extractBits(imm, 15, 0);
135 uint32_t immHi = extractBits(imm, 20, 16);
136 return (insn & 0xfc0003e0) | (immLo << 10) | immHi;
139 static uint32_t setD10k16(uint32_t insn, uint32_t imm) {
140 uint32_t immLo = extractBits(imm, 15, 0);
141 uint32_t immHi = extractBits(imm, 25, 16);
142 return (insn & 0xfc000000) | (immLo << 10) | immHi;
145 static uint32_t setJ20(uint32_t insn, uint32_t imm) {
146 return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5);
149 static uint32_t setK12(uint32_t insn, uint32_t imm) {
150 return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10);
153 static uint32_t setK16(uint32_t insn, uint32_t imm) {
154 return (insn & 0xfc0003ff) | (extractBits(imm, 15, 0) << 10);
157 static bool isJirl(uint32_t insn) {
158 return (insn & 0xfc000000) == JIRL;
161 static void handleUleb128(uint8_t *loc, uint64_t val) {
162 const uint32_t maxcount = 1 + 64 / 7;
163 uint32_t count;
164 const char *error = nullptr;
165 uint64_t orig = decodeULEB128(loc, &count, nullptr, &error);
166 if (count > maxcount || (count == maxcount && error))
167 errorOrWarn(getErrorLocation(loc) + "extra space for uleb128");
168 uint64_t mask = count < maxcount ? (1ULL << 7 * count) - 1 : -1ULL;
169 encodeULEB128((orig + val) & mask, loc, count);
172 LoongArch::LoongArch() {
173 // The LoongArch ISA itself does not have a limit on page sizes. According to
174 // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is
175 // 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to
176 // "unlimited".
177 // However, practically the maximum usable page size is constrained by the
178 // kernel implementation, and 64KiB is the biggest non-huge page size
179 // supported by Linux as of v6.4. The most widespread page size in use,
180 // though, is 16KiB.
181 defaultCommonPageSize = 16384;
182 defaultMaxPageSize = 65536;
183 write32le(trapInstr.data(), BREAK); // break 0
185 copyRel = R_LARCH_COPY;
186 pltRel = R_LARCH_JUMP_SLOT;
187 relativeRel = R_LARCH_RELATIVE;
188 iRelativeRel = R_LARCH_IRELATIVE;
190 if (config->is64) {
191 symbolicRel = R_LARCH_64;
192 tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64;
193 tlsOffsetRel = R_LARCH_TLS_DTPREL64;
194 tlsGotRel = R_LARCH_TLS_TPREL64;
195 tlsDescRel = R_LARCH_TLS_DESC64;
196 } else {
197 symbolicRel = R_LARCH_32;
198 tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32;
199 tlsOffsetRel = R_LARCH_TLS_DTPREL32;
200 tlsGotRel = R_LARCH_TLS_TPREL32;
201 tlsDescRel = R_LARCH_TLS_DESC32;
204 gotRel = symbolicRel;
206 // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map
207 gotPltHeaderEntriesNum = 2;
209 pltHeaderSize = 32;
210 pltEntrySize = 16;
211 ipltEntrySize = 16;
214 static uint32_t getEFlags(const InputFile *f) {
215 if (config->is64)
216 return cast<ObjFile<ELF64LE>>(f)->getObj().getHeader().e_flags;
217 return cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
220 static bool inputFileHasCode(const InputFile *f) {
221 for (const auto *sec : f->getSections())
222 if (sec && sec->flags & SHF_EXECINSTR)
223 return true;
225 return false;
228 uint32_t LoongArch::calcEFlags() const {
229 // If there are only binary input files (from -b binary), use a
230 // value of 0 for the ELF header flags.
231 if (ctx.objectFiles.empty())
232 return 0;
234 uint32_t target = 0;
235 const InputFile *targetFile;
236 for (const InputFile *f : ctx.objectFiles) {
237 // Do not enforce ABI compatibility if the input file does not contain code.
238 // This is useful for allowing linkage with data-only object files produced
239 // with tools like objcopy, that have zero e_flags.
240 if (!inputFileHasCode(f))
241 continue;
243 // Take the first non-zero e_flags as the reference.
244 uint32_t flags = getEFlags(f);
245 if (target == 0 && flags != 0) {
246 target = flags;
247 targetFile = f;
250 if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) !=
251 (target & EF_LOONGARCH_ABI_MODIFIER_MASK))
252 error(toString(f) +
253 ": cannot link object files with different ABI from " +
254 toString(targetFile));
256 // We cannot process psABI v1.x / object ABI v0 files (containing stack
257 // relocations), unlike ld.bfd.
259 // Instead of blindly accepting every v0 object and only failing at
260 // relocation processing time, just disallow interlink altogether. We
261 // don't expect significant usage of object ABI v0 in the wild (the old
262 // world may continue using object ABI v0 for a while, but as it's not
263 // binary-compatible with the upstream i.e. new-world ecosystem, it's not
264 // being considered here).
266 // There are briefly some new-world systems with object ABI v0 binaries too.
267 // It is because these systems were built before the new ABI was finalized.
268 // These are not supported either due to the extremely small number of them,
269 // and the few impacted users are advised to simply rebuild world or
270 // reinstall a recent system.
271 if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1)
272 error(toString(f) + ": unsupported object file ABI version");
275 return target;
278 int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const {
279 switch (type) {
280 default:
281 internalLinkerError(getErrorLocation(buf),
282 "cannot read addend for relocation " + toString(type));
283 return 0;
284 case R_LARCH_32:
285 case R_LARCH_TLS_DTPMOD32:
286 case R_LARCH_TLS_DTPREL32:
287 case R_LARCH_TLS_TPREL32:
288 return SignExtend64<32>(read32le(buf));
289 case R_LARCH_64:
290 case R_LARCH_TLS_DTPMOD64:
291 case R_LARCH_TLS_DTPREL64:
292 case R_LARCH_TLS_TPREL64:
293 return read64le(buf);
294 case R_LARCH_RELATIVE:
295 case R_LARCH_IRELATIVE:
296 return config->is64 ? read64le(buf) : read32le(buf);
297 case R_LARCH_NONE:
298 case R_LARCH_JUMP_SLOT:
299 // These relocations are defined as not having an implicit addend.
300 return 0;
301 case R_LARCH_TLS_DESC32:
302 return read32le(buf + 4);
303 case R_LARCH_TLS_DESC64:
304 return read64le(buf + 8);
308 void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const {
309 if (config->is64)
310 write64le(buf, in.plt->getVA());
311 else
312 write32le(buf, in.plt->getVA());
315 void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
316 if (config->writeAddends) {
317 if (config->is64)
318 write64le(buf, s.getVA());
319 else
320 write32le(buf, s.getVA());
324 void LoongArch::writePltHeader(uint8_t *buf) const {
325 // The LoongArch PLT is currently structured just like that of RISCV.
326 // Annoyingly, this means the PLT is still using `pcaddu12i` to perform
327 // PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`),
328 // in contrast to the AArch64-like page-offset scheme with `pcalau12i` that
329 // is used everywhere else involving PC-relative operations in the LoongArch
330 // ELF psABI v2.00.
332 // The `pcrel_{hi20,lo12}` operators are illustrative only and not really
333 // supported by LoongArch assemblers.
335 // pcaddu12i $t2, %pcrel_hi20(.got.plt)
336 // sub.[wd] $t1, $t1, $t3
337 // ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve
338 // addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0]
339 // addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt)
340 // srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0]
341 // ld.[wd] $t0, $t0, Wordsize ; t0 = link_map
342 // jr $t3
343 uint32_t offset = in.gotPlt->getVA() - in.plt->getVA();
344 uint32_t sub = config->is64 ? SUB_D : SUB_W;
345 uint32_t ld = config->is64 ? LD_D : LD_W;
346 uint32_t addi = config->is64 ? ADDI_D : ADDI_W;
347 uint32_t srli = config->is64 ? SRLI_D : SRLI_W;
348 write32le(buf + 0, insn(PCADDU12I, R_T2, hi20(offset), 0));
349 write32le(buf + 4, insn(sub, R_T1, R_T1, R_T3));
350 write32le(buf + 8, insn(ld, R_T3, R_T2, lo12(offset)));
351 write32le(buf + 12, insn(addi, R_T1, R_T1, lo12(-target->pltHeaderSize - 12)));
352 write32le(buf + 16, insn(addi, R_T0, R_T2, lo12(offset)));
353 write32le(buf + 20, insn(srli, R_T1, R_T1, config->is64 ? 1 : 2));
354 write32le(buf + 24, insn(ld, R_T0, R_T0, config->wordsize));
355 write32le(buf + 28, insn(JIRL, R_ZERO, R_T3, 0));
358 void LoongArch::writePlt(uint8_t *buf, const Symbol &sym,
359 uint64_t pltEntryAddr) const {
360 // See the comment in writePltHeader for reason why pcaddu12i is used instead
361 // of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days.
363 // pcaddu12i $t3, %pcrel_hi20(f@.got.plt)
364 // ld.[wd] $t3, $t3, %pcrel_lo12(f@.got.plt)
365 // jirl $t1, $t3, 0
366 // nop
367 uint32_t offset = sym.getGotPltVA() - pltEntryAddr;
368 write32le(buf + 0, insn(PCADDU12I, R_T3, hi20(offset), 0));
369 write32le(buf + 4,
370 insn(config->is64 ? LD_D : LD_W, R_T3, R_T3, lo12(offset)));
371 write32le(buf + 8, insn(JIRL, R_T1, R_T3, 0));
372 write32le(buf + 12, insn(ANDI, R_ZERO, R_ZERO, 0));
375 RelType LoongArch::getDynRel(RelType type) const {
376 return type == target->symbolicRel ? type
377 : static_cast<RelType>(R_LARCH_NONE);
380 RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
381 const uint8_t *loc) const {
382 switch (type) {
383 case R_LARCH_NONE:
384 case R_LARCH_MARK_LA:
385 case R_LARCH_MARK_PCREL:
386 return R_NONE;
387 case R_LARCH_32:
388 case R_LARCH_64:
389 case R_LARCH_ABS_HI20:
390 case R_LARCH_ABS_LO12:
391 case R_LARCH_ABS64_LO20:
392 case R_LARCH_ABS64_HI12:
393 return R_ABS;
394 case R_LARCH_PCALA_LO12:
395 // We could just R_ABS, but the JIRL instruction reuses the relocation type
396 // for a different purpose. The questionable usage is part of glibc 2.37
397 // libc_nonshared.a [1], which is linked into user programs, so we have to
398 // work around it for a while, even if a new relocation type may be
399 // introduced in the future [2].
401 // [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a
402 // [2]: https://github.com/loongson/la-abi-specs/pull/3
403 return isJirl(read32le(loc)) ? R_PLT : R_ABS;
404 case R_LARCH_TLS_DTPREL32:
405 case R_LARCH_TLS_DTPREL64:
406 return R_DTPREL;
407 case R_LARCH_TLS_TPREL32:
408 case R_LARCH_TLS_TPREL64:
409 case R_LARCH_TLS_LE_HI20:
410 case R_LARCH_TLS_LE_LO12:
411 case R_LARCH_TLS_LE64_LO20:
412 case R_LARCH_TLS_LE64_HI12:
413 return R_TPREL;
414 case R_LARCH_ADD6:
415 case R_LARCH_ADD8:
416 case R_LARCH_ADD16:
417 case R_LARCH_ADD32:
418 case R_LARCH_ADD64:
419 case R_LARCH_ADD_ULEB128:
420 case R_LARCH_SUB6:
421 case R_LARCH_SUB8:
422 case R_LARCH_SUB16:
423 case R_LARCH_SUB32:
424 case R_LARCH_SUB64:
425 case R_LARCH_SUB_ULEB128:
426 // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse
427 // the RelExpr to avoid code duplication.
428 return R_RISCV_ADD;
429 case R_LARCH_32_PCREL:
430 case R_LARCH_64_PCREL:
431 case R_LARCH_PCREL20_S2:
432 return R_PC;
433 case R_LARCH_B16:
434 case R_LARCH_B21:
435 case R_LARCH_B26:
436 case R_LARCH_CALL36:
437 return R_PLT_PC;
438 case R_LARCH_GOT_PC_HI20:
439 case R_LARCH_GOT64_PC_LO20:
440 case R_LARCH_GOT64_PC_HI12:
441 case R_LARCH_TLS_IE_PC_HI20:
442 case R_LARCH_TLS_IE64_PC_LO20:
443 case R_LARCH_TLS_IE64_PC_HI12:
444 return R_LOONGARCH_GOT_PAGE_PC;
445 case R_LARCH_GOT_PC_LO12:
446 case R_LARCH_TLS_IE_PC_LO12:
447 return R_LOONGARCH_GOT;
448 case R_LARCH_TLS_LD_PC_HI20:
449 case R_LARCH_TLS_GD_PC_HI20:
450 return R_LOONGARCH_TLSGD_PAGE_PC;
451 case R_LARCH_PCALA_HI20:
452 // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT
453 // anyway so why waste time checking only to get everything relaxed back to
454 // it?
456 // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want
457 // both the HI20 and LO12 to potentially refer to the PLT. But in reality
458 // the HI20 reloc appears earlier, and the relocs don't contain enough
459 // information to let us properly resolve semantics per symbol.
460 // Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20
461 // relocs, hence it is nearly impossible to 100% accurately determine each
462 // HI20's "flavor" without taking big performance hits, in the presence of
463 // edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far
464 // apart that relationship is not certain anymore), and programmer mistakes
465 // (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3).
467 // Ideally we would scan in an extra pass for all LO12s on JIRL, then mark
468 // every HI20 reloc referring to the same symbol differently; this is not
469 // feasible with the current function signature of getRelExpr that doesn't
470 // allow for such inter-pass state.
472 // So, unfortunately we have to again workaround this quirk the same way as
473 // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only
474 // relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later
475 // stage.
476 return R_LOONGARCH_PLT_PAGE_PC;
477 case R_LARCH_PCALA64_LO20:
478 case R_LARCH_PCALA64_HI12:
479 return R_LOONGARCH_PAGE_PC;
480 case R_LARCH_GOT_HI20:
481 case R_LARCH_GOT_LO12:
482 case R_LARCH_GOT64_LO20:
483 case R_LARCH_GOT64_HI12:
484 case R_LARCH_TLS_IE_HI20:
485 case R_LARCH_TLS_IE_LO12:
486 case R_LARCH_TLS_IE64_LO20:
487 case R_LARCH_TLS_IE64_HI12:
488 return R_GOT;
489 case R_LARCH_TLS_LD_HI20:
490 return R_TLSLD_GOT;
491 case R_LARCH_TLS_GD_HI20:
492 return R_TLSGD_GOT;
493 case R_LARCH_RELAX:
494 return config->relax ? R_RELAX_HINT : R_NONE;
495 case R_LARCH_ALIGN:
496 return R_RELAX_HINT;
497 case R_LARCH_TLS_DESC_PC_HI20:
498 case R_LARCH_TLS_DESC64_PC_LO20:
499 case R_LARCH_TLS_DESC64_PC_HI12:
500 return R_LOONGARCH_TLSDESC_PAGE_PC;
501 case R_LARCH_TLS_DESC_PC_LO12:
502 case R_LARCH_TLS_DESC_LD:
503 case R_LARCH_TLS_DESC_HI20:
504 case R_LARCH_TLS_DESC_LO12:
505 case R_LARCH_TLS_DESC64_LO20:
506 case R_LARCH_TLS_DESC64_HI12:
507 return R_TLSDESC;
508 case R_LARCH_TLS_DESC_CALL:
509 return R_TLSDESC_CALL;
511 // Other known relocs that are explicitly unimplemented:
513 // - psABI v1 relocs that need a stateful stack machine to work, and not
514 // required when implementing psABI v2;
515 // - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the
516 // two GNU vtable-related relocs).
518 // [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51
519 default:
520 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
521 ") against symbol " + toString(s));
522 return R_NONE;
526 bool LoongArch::usesOnlyLowPageBits(RelType type) const {
527 switch (type) {
528 default:
529 return false;
530 case R_LARCH_PCALA_LO12:
531 case R_LARCH_GOT_LO12:
532 case R_LARCH_GOT_PC_LO12:
533 case R_LARCH_TLS_IE_PC_LO12:
534 case R_LARCH_TLS_DESC_LO12:
535 case R_LARCH_TLS_DESC_PC_LO12:
536 return true;
540 void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
541 uint64_t val) const {
542 switch (rel.type) {
543 case R_LARCH_32_PCREL:
544 checkInt(loc, val, 32, rel);
545 [[fallthrough]];
546 case R_LARCH_32:
547 case R_LARCH_TLS_DTPREL32:
548 write32le(loc, val);
549 return;
550 case R_LARCH_64:
551 case R_LARCH_TLS_DTPREL64:
552 case R_LARCH_64_PCREL:
553 write64le(loc, val);
554 return;
556 case R_LARCH_PCREL20_S2:
557 checkInt(loc, val, 22, rel);
558 checkAlignment(loc, val, 4, rel);
559 write32le(loc, setJ20(read32le(loc), val >> 2));
560 return;
562 case R_LARCH_B16:
563 checkInt(loc, val, 18, rel);
564 checkAlignment(loc, val, 4, rel);
565 write32le(loc, setK16(read32le(loc), val >> 2));
566 return;
568 case R_LARCH_B21:
569 checkInt(loc, val, 23, rel);
570 checkAlignment(loc, val, 4, rel);
571 write32le(loc, setD5k16(read32le(loc), val >> 2));
572 return;
574 case R_LARCH_B26:
575 checkInt(loc, val, 28, rel);
576 checkAlignment(loc, val, 4, rel);
577 write32le(loc, setD10k16(read32le(loc), val >> 2));
578 return;
580 case R_LARCH_CALL36: {
581 // This relocation is designed for adjacent pcaddu18i+jirl pairs that
582 // are patched in one time. Because of sign extension of these insns'
583 // immediate fields, the relocation range is [-128G - 0x20000, +128G -
584 // 0x20000) (of course must be 4-byte aligned).
585 if (((int64_t)val + 0x20000) != llvm::SignExtend64(val + 0x20000, 38))
586 reportRangeError(loc, rel, Twine(val), llvm::minIntN(38) - 0x20000,
587 llvm::maxIntN(38) - 0x20000);
588 checkAlignment(loc, val, 4, rel);
589 // Since jirl performs sign extension on the offset immediate, adds (1<<17)
590 // to original val to get the correct hi20.
591 uint32_t hi20 = extractBits(val + (1 << 17), 37, 18);
592 // Despite the name, the lower part is actually 18 bits with 4-byte aligned.
593 uint32_t lo16 = extractBits(val, 17, 2);
594 write32le(loc, setJ20(read32le(loc), hi20));
595 write32le(loc + 4, setK16(read32le(loc + 4), lo16));
596 return;
599 // Relocs intended for `addi`, `ld` or `st`.
600 case R_LARCH_PCALA_LO12:
601 // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12
602 // on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes
603 // removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly
604 // its immediate slot width is different too (16, not 12).
605 // In this case, process like an R_LARCH_B16, but without overflow checking
606 // and only taking the value's lowest 12 bits.
607 if (isJirl(read32le(loc))) {
608 checkAlignment(loc, val, 4, rel);
609 val = SignExtend64<12>(val);
610 write32le(loc, setK16(read32le(loc), val >> 2));
611 return;
613 [[fallthrough]];
614 case R_LARCH_ABS_LO12:
615 case R_LARCH_GOT_PC_LO12:
616 case R_LARCH_GOT_LO12:
617 case R_LARCH_TLS_LE_LO12:
618 case R_LARCH_TLS_IE_PC_LO12:
619 case R_LARCH_TLS_IE_LO12:
620 case R_LARCH_TLS_DESC_PC_LO12:
621 case R_LARCH_TLS_DESC_LO12:
622 write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0)));
623 return;
625 // Relocs intended for `lu12i.w` or `pcalau12i`.
626 case R_LARCH_ABS_HI20:
627 case R_LARCH_PCALA_HI20:
628 case R_LARCH_GOT_PC_HI20:
629 case R_LARCH_GOT_HI20:
630 case R_LARCH_TLS_LE_HI20:
631 case R_LARCH_TLS_IE_PC_HI20:
632 case R_LARCH_TLS_IE_HI20:
633 case R_LARCH_TLS_LD_PC_HI20:
634 case R_LARCH_TLS_LD_HI20:
635 case R_LARCH_TLS_GD_PC_HI20:
636 case R_LARCH_TLS_GD_HI20:
637 case R_LARCH_TLS_DESC_PC_HI20:
638 case R_LARCH_TLS_DESC_HI20:
639 write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12)));
640 return;
642 // Relocs intended for `lu32i.d`.
643 case R_LARCH_ABS64_LO20:
644 case R_LARCH_PCALA64_LO20:
645 case R_LARCH_GOT64_PC_LO20:
646 case R_LARCH_GOT64_LO20:
647 case R_LARCH_TLS_LE64_LO20:
648 case R_LARCH_TLS_IE64_PC_LO20:
649 case R_LARCH_TLS_IE64_LO20:
650 case R_LARCH_TLS_DESC64_PC_LO20:
651 case R_LARCH_TLS_DESC64_LO20:
652 write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32)));
653 return;
655 // Relocs intended for `lu52i.d`.
656 case R_LARCH_ABS64_HI12:
657 case R_LARCH_PCALA64_HI12:
658 case R_LARCH_GOT64_PC_HI12:
659 case R_LARCH_GOT64_HI12:
660 case R_LARCH_TLS_LE64_HI12:
661 case R_LARCH_TLS_IE64_PC_HI12:
662 case R_LARCH_TLS_IE64_HI12:
663 case R_LARCH_TLS_DESC64_PC_HI12:
664 case R_LARCH_TLS_DESC64_HI12:
665 write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52)));
666 return;
668 case R_LARCH_ADD6:
669 *loc = (*loc & 0xc0) | ((*loc + val) & 0x3f);
670 return;
671 case R_LARCH_ADD8:
672 *loc += val;
673 return;
674 case R_LARCH_ADD16:
675 write16le(loc, read16le(loc) + val);
676 return;
677 case R_LARCH_ADD32:
678 write32le(loc, read32le(loc) + val);
679 return;
680 case R_LARCH_ADD64:
681 write64le(loc, read64le(loc) + val);
682 return;
683 case R_LARCH_ADD_ULEB128:
684 handleUleb128(loc, val);
685 return;
686 case R_LARCH_SUB6:
687 *loc = (*loc & 0xc0) | ((*loc - val) & 0x3f);
688 return;
689 case R_LARCH_SUB8:
690 *loc -= val;
691 return;
692 case R_LARCH_SUB16:
693 write16le(loc, read16le(loc) - val);
694 return;
695 case R_LARCH_SUB32:
696 write32le(loc, read32le(loc) - val);
697 return;
698 case R_LARCH_SUB64:
699 write64le(loc, read64le(loc) - val);
700 return;
701 case R_LARCH_SUB_ULEB128:
702 handleUleb128(loc, -val);
703 return;
705 case R_LARCH_MARK_LA:
706 case R_LARCH_MARK_PCREL:
707 // no-op
708 return;
710 case R_LARCH_RELAX:
711 return; // Ignored (for now)
713 case R_LARCH_TLS_DESC_LD:
714 return; // nothing to do.
715 case R_LARCH_TLS_DESC32:
716 write32le(loc + 4, val);
717 return;
718 case R_LARCH_TLS_DESC64:
719 write64le(loc + 8, val);
720 return;
722 default:
723 llvm_unreachable("unknown relocation");
727 static bool relax(InputSection &sec) {
728 const uint64_t secAddr = sec.getVA();
729 const MutableArrayRef<Relocation> relocs = sec.relocs();
730 auto &aux = *sec.relaxAux;
731 bool changed = false;
732 ArrayRef<SymbolAnchor> sa = ArrayRef(aux.anchors);
733 uint64_t delta = 0;
735 std::fill_n(aux.relocTypes.get(), relocs.size(), R_LARCH_NONE);
736 aux.writes.clear();
737 for (auto [i, r] : llvm::enumerate(relocs)) {
738 const uint64_t loc = secAddr + r.offset - delta;
739 uint32_t &cur = aux.relocDeltas[i], remove = 0;
740 switch (r.type) {
741 case R_LARCH_ALIGN: {
742 const uint64_t addend =
743 r.sym->isUndefined() ? Log2_64(r.addend) + 1 : r.addend;
744 const uint64_t allBytes = (1ULL << (addend & 0xff)) - 4;
745 const uint64_t align = 1ULL << (addend & 0xff);
746 const uint64_t maxBytes = addend >> 8;
747 const uint64_t off = loc & (align - 1);
748 const uint64_t curBytes = off == 0 ? 0 : align - off;
749 // All bytes beyond the alignment boundary should be removed.
750 // If emit bytes more than max bytes to emit, remove all.
751 if (maxBytes != 0 && curBytes > maxBytes)
752 remove = allBytes;
753 else
754 remove = allBytes - curBytes;
755 // If we can't satisfy this alignment, we've found a bad input.
756 if (LLVM_UNLIKELY(static_cast<int32_t>(remove) < 0)) {
757 errorOrWarn(getErrorLocation((const uint8_t *)loc) +
758 "insufficient padding bytes for " + lld::toString(r.type) +
759 ": " + Twine(allBytes) + " bytes available for " +
760 "requested alignment of " + Twine(align) + " bytes");
761 remove = 0;
763 break;
767 // For all anchors whose offsets are <= r.offset, they are preceded by
768 // the previous relocation whose `relocDeltas` value equals `delta`.
769 // Decrease their st_value and update their st_size.
770 for (; sa.size() && sa[0].offset <= r.offset; sa = sa.slice(1)) {
771 if (sa[0].end)
772 sa[0].d->size = sa[0].offset - delta - sa[0].d->value;
773 else
774 sa[0].d->value = sa[0].offset - delta;
776 delta += remove;
777 if (delta != cur) {
778 cur = delta;
779 changed = true;
783 for (const SymbolAnchor &a : sa) {
784 if (a.end)
785 a.d->size = a.offset - delta - a.d->value;
786 else
787 a.d->value = a.offset - delta;
789 // Inform assignAddresses that the size has changed.
790 if (!isUInt<32>(delta))
791 fatal("section size decrease is too large: " + Twine(delta));
792 sec.bytesDropped = delta;
793 return changed;
796 // When relaxing just R_LARCH_ALIGN, relocDeltas is usually changed only once in
797 // the absence of a linker script. For call and load/store R_LARCH_RELAX, code
798 // shrinkage may reduce displacement and make more relocations eligible for
799 // relaxation. Code shrinkage may increase displacement to a call/load/store
800 // target at a higher fixed address, invalidating an earlier relaxation. Any
801 // change in section sizes can have cascading effect and require another
802 // relaxation pass.
803 bool LoongArch::relaxOnce(int pass) const {
804 if (config->relocatable)
805 return false;
807 if (pass == 0)
808 initSymbolAnchors();
810 SmallVector<InputSection *, 0> storage;
811 bool changed = false;
812 for (OutputSection *osec : outputSections) {
813 if (!(osec->flags & SHF_EXECINSTR))
814 continue;
815 for (InputSection *sec : getInputSections(*osec, storage))
816 changed |= relax(*sec);
818 return changed;
821 void LoongArch::finalizeRelax(int passes) const {
822 log("relaxation passes: " + Twine(passes));
823 SmallVector<InputSection *, 0> storage;
824 for (OutputSection *osec : outputSections) {
825 if (!(osec->flags & SHF_EXECINSTR))
826 continue;
827 for (InputSection *sec : getInputSections(*osec, storage)) {
828 RelaxAux &aux = *sec->relaxAux;
829 if (!aux.relocDeltas)
830 continue;
832 MutableArrayRef<Relocation> rels = sec->relocs();
833 ArrayRef<uint8_t> old = sec->content();
834 size_t newSize = old.size() - aux.relocDeltas[rels.size() - 1];
835 uint8_t *p = context().bAlloc.Allocate<uint8_t>(newSize);
836 uint64_t offset = 0;
837 int64_t delta = 0;
838 sec->content_ = p;
839 sec->size = newSize;
840 sec->bytesDropped = 0;
842 // Update section content: remove NOPs for R_LARCH_ALIGN and rewrite
843 // instructions for relaxed relocations.
844 for (size_t i = 0, e = rels.size(); i != e; ++i) {
845 uint32_t remove = aux.relocDeltas[i] - delta;
846 delta = aux.relocDeltas[i];
847 if (remove == 0 && aux.relocTypes[i] == R_LARCH_NONE)
848 continue;
850 // Copy from last location to the current relocated location.
851 const Relocation &r = rels[i];
852 uint64_t size = r.offset - offset;
853 memcpy(p, old.data() + offset, size);
854 p += size;
855 offset = r.offset + remove;
857 memcpy(p, old.data() + offset, old.size() - offset);
859 // Subtract the previous relocDeltas value from the relocation offset.
860 // For a pair of R_LARCH_XXX/R_LARCH_RELAX with the same offset, decrease
861 // their r_offset by the same delta.
862 delta = 0;
863 for (size_t i = 0, e = rels.size(); i != e;) {
864 uint64_t cur = rels[i].offset;
865 do {
866 rels[i].offset -= delta;
867 if (aux.relocTypes[i] != R_LARCH_NONE)
868 rels[i].type = aux.relocTypes[i];
869 } while (++i != e && rels[i].offset == cur);
870 delta = aux.relocDeltas[i - 1];
876 TargetInfo *elf::getLoongArchTargetInfo() {
877 static LoongArch target;
878 return &target;