Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / lld / ELF / Arch / LoongArch.cpp
blob04ddb4682917b4b2a3ae3fc12830c449bcc013d3
1 //===- LoongArch.cpp ------------------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "InputFiles.h"
10 #include "OutputSections.h"
11 #include "Symbols.h"
12 #include "SyntheticSections.h"
13 #include "Target.h"
15 using namespace llvm;
16 using namespace llvm::object;
17 using namespace llvm::support::endian;
18 using namespace llvm::ELF;
19 using namespace lld;
20 using namespace lld::elf;
22 namespace {
23 class LoongArch final : public TargetInfo {
24 public:
25 LoongArch();
26 uint32_t calcEFlags() const override;
27 int64_t getImplicitAddend(const uint8_t *buf, RelType type) const override;
28 void writeGotPlt(uint8_t *buf, const Symbol &s) const override;
29 void writeIgotPlt(uint8_t *buf, const Symbol &s) const override;
30 void writePltHeader(uint8_t *buf) const override;
31 void writePlt(uint8_t *buf, const Symbol &sym,
32 uint64_t pltEntryAddr) const override;
33 RelType getDynRel(RelType type) const override;
34 RelExpr getRelExpr(RelType type, const Symbol &s,
35 const uint8_t *loc) const override;
36 bool usesOnlyLowPageBits(RelType type) const override;
37 void relocate(uint8_t *loc, const Relocation &rel,
38 uint64_t val) const override;
40 } // end anonymous namespace
42 enum Op {
43 SUB_W = 0x00110000,
44 SUB_D = 0x00118000,
45 BREAK = 0x002a0000,
46 SRLI_W = 0x00448000,
47 SRLI_D = 0x00450000,
48 ADDI_W = 0x02800000,
49 ADDI_D = 0x02c00000,
50 ANDI = 0x03400000,
51 PCADDU12I = 0x1c000000,
52 LD_W = 0x28800000,
53 LD_D = 0x28c00000,
54 JIRL = 0x4c000000,
57 enum Reg {
58 R_ZERO = 0,
59 R_RA = 1,
60 R_TP = 2,
61 R_T0 = 12,
62 R_T1 = 13,
63 R_T2 = 14,
64 R_T3 = 15,
67 // Mask out the input's lowest 12 bits for use with `pcalau12i`, in sequences
68 // like `pcalau12i + addi.[wd]` or `pcalau12i + {ld,st}.*` where the `pcalau12i`
69 // produces a PC-relative intermediate value with the lowest 12 bits zeroed (the
70 // "page") for the next instruction to add in the "page offset". (`pcalau12i`
71 // stands for something like "PC ALigned Add Upper that starts from the 12th
72 // bit, Immediate".)
74 // Here a "page" is in fact just another way to refer to the 12-bit range
75 // allowed by the immediate field of the addi/ld/st instructions, and not
76 // related to the system or the kernel's actual page size. The sematics happens
77 // to match the AArch64 `adrp`, so the concept of "page" is borrowed here.
78 static uint64_t getLoongArchPage(uint64_t p) {
79 return p & ~static_cast<uint64_t>(0xfff);
82 static uint32_t lo12(uint32_t val) { return val & 0xfff; }
84 // Calculate the adjusted page delta between dest and PC.
85 uint64_t elf::getLoongArchPageDelta(uint64_t dest, uint64_t pc) {
86 // Consider the large code model access pattern, of which the smaller code
87 // models' access patterns are a subset:
89 // pcalau12i U, %foo_hi20(sym) ; b in [-0x80000, 0x7ffff]
90 // addi.d T, zero, %foo_lo12(sym) ; a in [-0x800, 0x7ff]
91 // lu32i.d T, %foo64_lo20(sym) ; c in [-0x80000, 0x7ffff]
92 // lu52i.d T, T, %foo64_hi12(sym) ; d in [-0x800, 0x7ff]
93 // {ldx,stx,add}.* dest, U, T
95 // Let page(pc) = 0xRRR'QQQQQ'PPPPP'000 and dest = 0xZZZ'YYYYY'XXXXX'AAA,
96 // with RQ, P, ZY, X and A representing the respective bitfields as unsigned
97 // integers. We have:
99 // page(dest) = 0xZZZ'YYYYY'XXXXX'000
100 // - page(pc) = 0xRRR'QQQQQ'PPPPP'000
101 // ----------------------------------
102 // 0xddd'ccccc'bbbbb'000
104 // Now consider the above pattern's actual effects:
106 // page(pc) 0xRRR'QQQQQ'PPPPP'000
107 // pcalau12i + 0xiii'iiiii'bbbbb'000
108 // addi + 0xjjj'jjjjj'kkkkk'AAA
109 // lu32i.d & lu52i.d + 0xddd'ccccc'00000'000
110 // --------------------------------------------------
111 // dest = U + T
112 // = ((RQ<<32) + (P<<12) + i + (b<<12)) + (j + k + A + (cd<<32))
113 // = (((RQ+cd)<<32) + i + j) + (((P+b)<<12) + k) + A
114 // = (ZY<<32) + (X<<12) + A
116 // ZY<<32 = (RQ<<32)+(cd<<32)+i+j, X<<12 = (P<<12)+(b<<12)+k
117 // cd<<32 = (ZY<<32)-(RQ<<32)-i-j, b<<12 = (X<<12)-(P<<12)-k
119 // where i and k are terms representing the effect of b's and A's sign
120 // extension respectively.
122 // i = signed b < 0 ? -0x10000'0000 : 0
123 // k = signed A < 0 ? -0x1000 : 0
125 // The j term is a bit complex: it represents the higher half of
126 // sign-extended bits from A that are effectively lost if i == 0 but k != 0,
127 // due to overwriting by lu32i.d & lu52i.d.
129 // j = signed A < 0 && signed b >= 0 ? 0x10000'0000 : 0
131 // The actual effect of the instruction sequence before the final addition,
132 // i.e. our desired result value, is thus:
134 // result = (cd<<32) + (b<<12)
135 // = (ZY<<32)-(RQ<<32)-i-j + (X<<12)-(P<<12)-k
136 // = ((ZY<<32)+(X<<12)) - ((RQ<<32)+(P<<12)) - i - j - k
137 // = page(dest) - page(pc) - i - j - k
139 // when signed A >= 0 && signed b >= 0:
141 // i = j = k = 0
142 // result = page(dest) - page(pc)
144 // when signed A >= 0 && signed b < 0:
146 // i = -0x10000'0000, j = k = 0
147 // result = page(dest) - page(pc) + 0x10000'0000
149 // when signed A < 0 && signed b >= 0:
151 // i = 0, j = 0x10000'0000, k = -0x1000
152 // result = page(dest) - page(pc) - 0x10000'0000 + 0x1000
154 // when signed A < 0 && signed b < 0:
156 // i = -0x10000'0000, j = 0, k = -0x1000
157 // result = page(dest) - page(pc) + 0x1000
158 uint64_t result = getLoongArchPage(dest) - getLoongArchPage(pc);
159 bool negativeA = lo12(dest) > 0x7ff;
160 bool negativeB = (result & 0x8000'0000) != 0;
162 if (negativeA)
163 result += 0x1000;
164 if (negativeA && !negativeB)
165 result -= 0x10000'0000;
166 else if (!negativeA && negativeB)
167 result += 0x10000'0000;
169 return result;
172 static uint32_t hi20(uint32_t val) { return (val + 0x800) >> 12; }
174 static uint32_t insn(uint32_t op, uint32_t d, uint32_t j, uint32_t k) {
175 return op | d | (j << 5) | (k << 10);
178 // Extract bits v[begin:end], where range is inclusive.
179 static uint32_t extractBits(uint64_t v, uint32_t begin, uint32_t end) {
180 return begin == 63 ? v >> end : (v & ((1ULL << (begin + 1)) - 1)) >> end;
183 static uint32_t setD5k16(uint32_t insn, uint32_t imm) {
184 uint32_t immLo = extractBits(imm, 15, 0);
185 uint32_t immHi = extractBits(imm, 20, 16);
186 return (insn & 0xfc0003e0) | (immLo << 10) | immHi;
189 static uint32_t setD10k16(uint32_t insn, uint32_t imm) {
190 uint32_t immLo = extractBits(imm, 15, 0);
191 uint32_t immHi = extractBits(imm, 25, 16);
192 return (insn & 0xfc000000) | (immLo << 10) | immHi;
195 static uint32_t setJ20(uint32_t insn, uint32_t imm) {
196 return (insn & 0xfe00001f) | (extractBits(imm, 19, 0) << 5);
199 static uint32_t setK12(uint32_t insn, uint32_t imm) {
200 return (insn & 0xffc003ff) | (extractBits(imm, 11, 0) << 10);
203 static uint32_t setK16(uint32_t insn, uint32_t imm) {
204 return (insn & 0xfc0003ff) | (extractBits(imm, 15, 0) << 10);
207 static bool isJirl(uint32_t insn) {
208 return (insn & 0xfc000000) == JIRL;
211 LoongArch::LoongArch() {
212 // The LoongArch ISA itself does not have a limit on page sizes. According to
213 // the ISA manual, the PS (page size) field in MTLB entries and CSR.STLBPS is
214 // 6 bits wide, meaning the maximum page size is 2^63 which is equivalent to
215 // "unlimited".
216 // However, practically the maximum usable page size is constrained by the
217 // kernel implementation, and 64KiB is the biggest non-huge page size
218 // supported by Linux as of v6.4. The most widespread page size in use,
219 // though, is 16KiB.
220 defaultCommonPageSize = 16384;
221 defaultMaxPageSize = 65536;
222 write32le(trapInstr.data(), BREAK); // break 0
224 copyRel = R_LARCH_COPY;
225 pltRel = R_LARCH_JUMP_SLOT;
226 relativeRel = R_LARCH_RELATIVE;
227 iRelativeRel = R_LARCH_IRELATIVE;
229 if (config->is64) {
230 symbolicRel = R_LARCH_64;
231 tlsModuleIndexRel = R_LARCH_TLS_DTPMOD64;
232 tlsOffsetRel = R_LARCH_TLS_DTPREL64;
233 tlsGotRel = R_LARCH_TLS_TPREL64;
234 } else {
235 symbolicRel = R_LARCH_32;
236 tlsModuleIndexRel = R_LARCH_TLS_DTPMOD32;
237 tlsOffsetRel = R_LARCH_TLS_DTPREL32;
238 tlsGotRel = R_LARCH_TLS_TPREL32;
241 gotRel = symbolicRel;
243 // .got.plt[0] = _dl_runtime_resolve, .got.plt[1] = link_map
244 gotPltHeaderEntriesNum = 2;
246 pltHeaderSize = 32;
247 pltEntrySize = 16;
248 ipltEntrySize = 16;
251 static uint32_t getEFlags(const InputFile *f) {
252 if (config->is64)
253 return cast<ObjFile<ELF64LE>>(f)->getObj().getHeader().e_flags;
254 return cast<ObjFile<ELF32LE>>(f)->getObj().getHeader().e_flags;
257 static bool inputFileHasCode(const InputFile *f) {
258 for (const auto *sec : f->getSections())
259 if (sec && sec->flags & SHF_EXECINSTR)
260 return true;
262 return false;
265 uint32_t LoongArch::calcEFlags() const {
266 // If there are only binary input files (from -b binary), use a
267 // value of 0 for the ELF header flags.
268 if (ctx.objectFiles.empty())
269 return 0;
271 uint32_t target = 0;
272 const InputFile *targetFile;
273 for (const InputFile *f : ctx.objectFiles) {
274 // Do not enforce ABI compatibility if the input file does not contain code.
275 // This is useful for allowing linkage with data-only object files produced
276 // with tools like objcopy, that have zero e_flags.
277 if (!inputFileHasCode(f))
278 continue;
280 // Take the first non-zero e_flags as the reference.
281 uint32_t flags = getEFlags(f);
282 if (target == 0 && flags != 0) {
283 target = flags;
284 targetFile = f;
287 if ((flags & EF_LOONGARCH_ABI_MODIFIER_MASK) !=
288 (target & EF_LOONGARCH_ABI_MODIFIER_MASK))
289 error(toString(f) +
290 ": cannot link object files with different ABI from " +
291 toString(targetFile));
293 // We cannot process psABI v1.x / object ABI v0 files (containing stack
294 // relocations), unlike ld.bfd.
296 // Instead of blindly accepting every v0 object and only failing at
297 // relocation processing time, just disallow interlink altogether. We
298 // don't expect significant usage of object ABI v0 in the wild (the old
299 // world may continue using object ABI v0 for a while, but as it's not
300 // binary-compatible with the upstream i.e. new-world ecosystem, it's not
301 // being considered here).
303 // There are briefly some new-world systems with object ABI v0 binaries too.
304 // It is because these systems were built before the new ABI was finalized.
305 // These are not supported either due to the extremely small number of them,
306 // and the few impacted users are advised to simply rebuild world or
307 // reinstall a recent system.
308 if ((flags & EF_LOONGARCH_OBJABI_MASK) != EF_LOONGARCH_OBJABI_V1)
309 error(toString(f) + ": unsupported object file ABI version");
312 return target;
315 int64_t LoongArch::getImplicitAddend(const uint8_t *buf, RelType type) const {
316 switch (type) {
317 default:
318 internalLinkerError(getErrorLocation(buf),
319 "cannot read addend for relocation " + toString(type));
320 return 0;
321 case R_LARCH_32:
322 case R_LARCH_TLS_DTPMOD32:
323 case R_LARCH_TLS_DTPREL32:
324 case R_LARCH_TLS_TPREL32:
325 return SignExtend64<32>(read32le(buf));
326 case R_LARCH_64:
327 case R_LARCH_TLS_DTPMOD64:
328 case R_LARCH_TLS_DTPREL64:
329 case R_LARCH_TLS_TPREL64:
330 return read64le(buf);
331 case R_LARCH_RELATIVE:
332 case R_LARCH_IRELATIVE:
333 return config->is64 ? read64le(buf) : read32le(buf);
334 case R_LARCH_NONE:
335 case R_LARCH_JUMP_SLOT:
336 // These relocations are defined as not having an implicit addend.
337 return 0;
341 void LoongArch::writeGotPlt(uint8_t *buf, const Symbol &s) const {
342 if (config->is64)
343 write64le(buf, in.plt->getVA());
344 else
345 write32le(buf, in.plt->getVA());
348 void LoongArch::writeIgotPlt(uint8_t *buf, const Symbol &s) const {
349 if (config->writeAddends) {
350 if (config->is64)
351 write64le(buf, s.getVA());
352 else
353 write32le(buf, s.getVA());
357 void LoongArch::writePltHeader(uint8_t *buf) const {
358 // The LoongArch PLT is currently structured just like that of RISCV.
359 // Annoyingly, this means the PLT is still using `pcaddu12i` to perform
360 // PC-relative addressing (because `pcaddu12i` is the same as RISCV `auipc`),
361 // in contrast to the AArch64-like page-offset scheme with `pcalau12i` that
362 // is used everywhere else involving PC-relative operations in the LoongArch
363 // ELF psABI v2.00.
365 // The `pcrel_{hi20,lo12}` operators are illustrative only and not really
366 // supported by LoongArch assemblers.
368 // pcaddu12i $t2, %pcrel_hi20(.got.plt)
369 // sub.[wd] $t1, $t1, $t3
370 // ld.[wd] $t3, $t2, %pcrel_lo12(.got.plt) ; t3 = _dl_runtime_resolve
371 // addi.[wd] $t1, $t1, -pltHeaderSize-12 ; t1 = &.plt[i] - &.plt[0]
372 // addi.[wd] $t0, $t2, %pcrel_lo12(.got.plt)
373 // srli.[wd] $t1, $t1, (is64?1:2) ; t1 = &.got.plt[i] - &.got.plt[0]
374 // ld.[wd] $t0, $t0, Wordsize ; t0 = link_map
375 // jr $t3
376 uint32_t offset = in.gotPlt->getVA() - in.plt->getVA();
377 uint32_t sub = config->is64 ? SUB_D : SUB_W;
378 uint32_t ld = config->is64 ? LD_D : LD_W;
379 uint32_t addi = config->is64 ? ADDI_D : ADDI_W;
380 uint32_t srli = config->is64 ? SRLI_D : SRLI_W;
381 write32le(buf + 0, insn(PCADDU12I, R_T2, hi20(offset), 0));
382 write32le(buf + 4, insn(sub, R_T1, R_T1, R_T3));
383 write32le(buf + 8, insn(ld, R_T3, R_T2, lo12(offset)));
384 write32le(buf + 12, insn(addi, R_T1, R_T1, lo12(-target->pltHeaderSize - 12)));
385 write32le(buf + 16, insn(addi, R_T0, R_T2, lo12(offset)));
386 write32le(buf + 20, insn(srli, R_T1, R_T1, config->is64 ? 1 : 2));
387 write32le(buf + 24, insn(ld, R_T0, R_T0, config->wordsize));
388 write32le(buf + 28, insn(JIRL, R_ZERO, R_T3, 0));
391 void LoongArch::writePlt(uint8_t *buf, const Symbol &sym,
392 uint64_t pltEntryAddr) const {
393 // See the comment in writePltHeader for reason why pcaddu12i is used instead
394 // of the pcalau12i that's more commonly seen in the ELF psABI v2.0 days.
396 // pcaddu12i $t3, %pcrel_hi20(f@.got.plt)
397 // ld.[wd] $t3, $t3, %pcrel_lo12(f@.got.plt)
398 // jirl $t1, $t3, 0
399 // nop
400 uint32_t offset = sym.getGotPltVA() - pltEntryAddr;
401 write32le(buf + 0, insn(PCADDU12I, R_T3, hi20(offset), 0));
402 write32le(buf + 4,
403 insn(config->is64 ? LD_D : LD_W, R_T3, R_T3, lo12(offset)));
404 write32le(buf + 8, insn(JIRL, R_T1, R_T3, 0));
405 write32le(buf + 12, insn(ANDI, R_ZERO, R_ZERO, 0));
408 RelType LoongArch::getDynRel(RelType type) const {
409 return type == target->symbolicRel ? type
410 : static_cast<RelType>(R_LARCH_NONE);
413 RelExpr LoongArch::getRelExpr(const RelType type, const Symbol &s,
414 const uint8_t *loc) const {
415 switch (type) {
416 case R_LARCH_NONE:
417 case R_LARCH_MARK_LA:
418 case R_LARCH_MARK_PCREL:
419 return R_NONE;
420 case R_LARCH_32:
421 case R_LARCH_64:
422 case R_LARCH_ABS_HI20:
423 case R_LARCH_ABS_LO12:
424 case R_LARCH_ABS64_LO20:
425 case R_LARCH_ABS64_HI12:
426 return R_ABS;
427 case R_LARCH_PCALA_LO12:
428 // We could just R_ABS, but the JIRL instruction reuses the relocation type
429 // for a different purpose. The questionable usage is part of glibc 2.37
430 // libc_nonshared.a [1], which is linked into user programs, so we have to
431 // work around it for a while, even if a new relocation type may be
432 // introduced in the future [2].
434 // [1]: https://sourceware.org/git/?p=glibc.git;a=commitdiff;h=9f482b73f41a9a1bbfb173aad0733d1c824c788a
435 // [2]: https://github.com/loongson/la-abi-specs/pull/3
436 return isJirl(read32le(loc)) ? R_PLT : R_ABS;
437 case R_LARCH_TLS_DTPREL32:
438 case R_LARCH_TLS_DTPREL64:
439 return R_DTPREL;
440 case R_LARCH_TLS_TPREL32:
441 case R_LARCH_TLS_TPREL64:
442 case R_LARCH_TLS_LE_HI20:
443 case R_LARCH_TLS_LE_LO12:
444 case R_LARCH_TLS_LE64_LO20:
445 case R_LARCH_TLS_LE64_HI12:
446 return R_TPREL;
447 case R_LARCH_ADD8:
448 case R_LARCH_ADD16:
449 case R_LARCH_ADD32:
450 case R_LARCH_ADD64:
451 case R_LARCH_SUB8:
452 case R_LARCH_SUB16:
453 case R_LARCH_SUB32:
454 case R_LARCH_SUB64:
455 // The LoongArch add/sub relocs behave like the RISCV counterparts; reuse
456 // the RelExpr to avoid code duplication.
457 return R_RISCV_ADD;
458 case R_LARCH_32_PCREL:
459 case R_LARCH_64_PCREL:
460 case R_LARCH_PCREL20_S2:
461 return R_PC;
462 case R_LARCH_B16:
463 case R_LARCH_B21:
464 case R_LARCH_B26:
465 return R_PLT_PC;
466 case R_LARCH_GOT_PC_HI20:
467 case R_LARCH_GOT64_PC_LO20:
468 case R_LARCH_GOT64_PC_HI12:
469 case R_LARCH_TLS_IE_PC_HI20:
470 case R_LARCH_TLS_IE64_PC_LO20:
471 case R_LARCH_TLS_IE64_PC_HI12:
472 return R_LOONGARCH_GOT_PAGE_PC;
473 case R_LARCH_GOT_PC_LO12:
474 case R_LARCH_TLS_IE_PC_LO12:
475 return R_LOONGARCH_GOT;
476 case R_LARCH_TLS_LD_PC_HI20:
477 case R_LARCH_TLS_GD_PC_HI20:
478 return R_LOONGARCH_TLSGD_PAGE_PC;
479 case R_LARCH_PCALA_HI20:
480 // Why not R_LOONGARCH_PAGE_PC, majority of references don't go through PLT
481 // anyway so why waste time checking only to get everything relaxed back to
482 // it?
484 // This is again due to the R_LARCH_PCALA_LO12 on JIRL case, where we want
485 // both the HI20 and LO12 to potentially refer to the PLT. But in reality
486 // the HI20 reloc appears earlier, and the relocs don't contain enough
487 // information to let us properly resolve semantics per symbol.
488 // Unlike RISCV, our LO12 relocs *do not* point to their corresponding HI20
489 // relocs, hence it is nearly impossible to 100% accurately determine each
490 // HI20's "flavor" without taking big performance hits, in the presence of
491 // edge cases (e.g. HI20 without pairing LO12; paired LO12 placed so far
492 // apart that relationship is not certain anymore), and programmer mistakes
493 // (e.g. as outlined in https://github.com/loongson/la-abi-specs/pull/3).
495 // Ideally we would scan in an extra pass for all LO12s on JIRL, then mark
496 // every HI20 reloc referring to the same symbol differently; this is not
497 // feasible with the current function signature of getRelExpr that doesn't
498 // allow for such inter-pass state.
500 // So, unfortunately we have to again workaround this quirk the same way as
501 // BFD: assuming every R_LARCH_PCALA_HI20 is potentially PLT-needing, only
502 // relaxing back to R_LOONGARCH_PAGE_PC if it's known not so at a later
503 // stage.
504 return R_LOONGARCH_PLT_PAGE_PC;
505 case R_LARCH_PCALA64_LO20:
506 case R_LARCH_PCALA64_HI12:
507 return R_LOONGARCH_PAGE_PC;
508 case R_LARCH_GOT_HI20:
509 case R_LARCH_GOT_LO12:
510 case R_LARCH_GOT64_LO20:
511 case R_LARCH_GOT64_HI12:
512 case R_LARCH_TLS_IE_HI20:
513 case R_LARCH_TLS_IE_LO12:
514 case R_LARCH_TLS_IE64_LO20:
515 case R_LARCH_TLS_IE64_HI12:
516 return R_GOT;
517 case R_LARCH_TLS_LD_HI20:
518 return R_TLSLD_GOT;
519 case R_LARCH_TLS_GD_HI20:
520 return R_TLSGD_GOT;
521 case R_LARCH_RELAX:
522 // LoongArch linker relaxation is not implemented yet.
523 return R_NONE;
525 // Other known relocs that are explicitly unimplemented:
527 // - psABI v1 relocs that need a stateful stack machine to work, and not
528 // required when implementing psABI v2;
529 // - relocs that are not used anywhere (R_LARCH_{ADD,SUB}_24 [1], and the
530 // two GNU vtable-related relocs).
532 // [1]: https://web.archive.org/web/20230709064026/https://github.com/loongson/LoongArch-Documentation/issues/51
533 default:
534 error(getErrorLocation(loc) + "unknown relocation (" + Twine(type) +
535 ") against symbol " + toString(s));
536 return R_NONE;
540 bool LoongArch::usesOnlyLowPageBits(RelType type) const {
541 switch (type) {
542 default:
543 return false;
544 case R_LARCH_PCALA_LO12:
545 case R_LARCH_GOT_LO12:
546 case R_LARCH_GOT_PC_LO12:
547 case R_LARCH_TLS_IE_PC_LO12:
548 return true;
552 void LoongArch::relocate(uint8_t *loc, const Relocation &rel,
553 uint64_t val) const {
554 switch (rel.type) {
555 case R_LARCH_32_PCREL:
556 checkInt(loc, val, 32, rel);
557 [[fallthrough]];
558 case R_LARCH_32:
559 case R_LARCH_TLS_DTPREL32:
560 write32le(loc, val);
561 return;
562 case R_LARCH_64:
563 case R_LARCH_TLS_DTPREL64:
564 case R_LARCH_64_PCREL:
565 write64le(loc, val);
566 return;
568 case R_LARCH_PCREL20_S2:
569 checkInt(loc, val, 22, rel);
570 checkAlignment(loc, val, 4, rel);
571 write32le(loc, setJ20(read32le(loc), val >> 2));
572 return;
574 case R_LARCH_B16:
575 checkInt(loc, val, 18, rel);
576 checkAlignment(loc, val, 4, rel);
577 write32le(loc, setK16(read32le(loc), val >> 2));
578 return;
580 case R_LARCH_B21:
581 checkInt(loc, val, 23, rel);
582 checkAlignment(loc, val, 4, rel);
583 write32le(loc, setD5k16(read32le(loc), val >> 2));
584 return;
586 case R_LARCH_B26:
587 checkInt(loc, val, 28, rel);
588 checkAlignment(loc, val, 4, rel);
589 write32le(loc, setD10k16(read32le(loc), val >> 2));
590 return;
592 // Relocs intended for `addi`, `ld` or `st`.
593 case R_LARCH_PCALA_LO12:
594 // We have to again inspect the insn word to handle the R_LARCH_PCALA_LO12
595 // on JIRL case: firstly JIRL wants its immediate's 2 lowest zeroes
596 // removed by us (in contrast to regular R_LARCH_PCALA_LO12), secondly
597 // its immediate slot width is different too (16, not 12).
598 // In this case, process like an R_LARCH_B16, but without overflow checking
599 // and only taking the value's lowest 12 bits.
600 if (isJirl(read32le(loc))) {
601 checkAlignment(loc, val, 4, rel);
602 val = SignExtend64<12>(val);
603 write32le(loc, setK16(read32le(loc), val >> 2));
604 return;
606 [[fallthrough]];
607 case R_LARCH_ABS_LO12:
608 case R_LARCH_GOT_PC_LO12:
609 case R_LARCH_GOT_LO12:
610 case R_LARCH_TLS_LE_LO12:
611 case R_LARCH_TLS_IE_PC_LO12:
612 case R_LARCH_TLS_IE_LO12:
613 write32le(loc, setK12(read32le(loc), extractBits(val, 11, 0)));
614 return;
616 // Relocs intended for `lu12i.w` or `pcalau12i`.
617 case R_LARCH_ABS_HI20:
618 case R_LARCH_PCALA_HI20:
619 case R_LARCH_GOT_PC_HI20:
620 case R_LARCH_GOT_HI20:
621 case R_LARCH_TLS_LE_HI20:
622 case R_LARCH_TLS_IE_PC_HI20:
623 case R_LARCH_TLS_IE_HI20:
624 case R_LARCH_TLS_LD_PC_HI20:
625 case R_LARCH_TLS_LD_HI20:
626 case R_LARCH_TLS_GD_PC_HI20:
627 case R_LARCH_TLS_GD_HI20:
628 write32le(loc, setJ20(read32le(loc), extractBits(val, 31, 12)));
629 return;
631 // Relocs intended for `lu32i.d`.
632 case R_LARCH_ABS64_LO20:
633 case R_LARCH_PCALA64_LO20:
634 case R_LARCH_GOT64_PC_LO20:
635 case R_LARCH_GOT64_LO20:
636 case R_LARCH_TLS_LE64_LO20:
637 case R_LARCH_TLS_IE64_PC_LO20:
638 case R_LARCH_TLS_IE64_LO20:
639 write32le(loc, setJ20(read32le(loc), extractBits(val, 51, 32)));
640 return;
642 // Relocs intended for `lu52i.d`.
643 case R_LARCH_ABS64_HI12:
644 case R_LARCH_PCALA64_HI12:
645 case R_LARCH_GOT64_PC_HI12:
646 case R_LARCH_GOT64_HI12:
647 case R_LARCH_TLS_LE64_HI12:
648 case R_LARCH_TLS_IE64_PC_HI12:
649 case R_LARCH_TLS_IE64_HI12:
650 write32le(loc, setK12(read32le(loc), extractBits(val, 63, 52)));
651 return;
653 case R_LARCH_ADD8:
654 *loc += val;
655 return;
656 case R_LARCH_ADD16:
657 write16le(loc, read16le(loc) + val);
658 return;
659 case R_LARCH_ADD32:
660 write32le(loc, read32le(loc) + val);
661 return;
662 case R_LARCH_ADD64:
663 write64le(loc, read64le(loc) + val);
664 return;
665 case R_LARCH_SUB8:
666 *loc -= val;
667 return;
668 case R_LARCH_SUB16:
669 write16le(loc, read16le(loc) - val);
670 return;
671 case R_LARCH_SUB32:
672 write32le(loc, read32le(loc) - val);
673 return;
674 case R_LARCH_SUB64:
675 write64le(loc, read64le(loc) - val);
676 return;
678 case R_LARCH_MARK_LA:
679 case R_LARCH_MARK_PCREL:
680 // no-op
681 return;
683 case R_LARCH_RELAX:
684 return; // Ignored (for now)
686 default:
687 llvm_unreachable("unknown relocation");
691 TargetInfo *elf::getLoongArchTargetInfo() {
692 static LoongArch target;
693 return &target;