Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / lib / ExecutionEngine / Orc / OrcABISupport.cpp
blob6d568199378a025c567114ef894165670adfb611
1 //===------------- OrcABISupport.cpp - ABI specific support code ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
9 #include "llvm/ExecutionEngine/Orc/OrcABISupport.h"
10 #include "llvm/Support/FormatVariadic.h"
11 #include "llvm/Support/Process.h"
12 #include "llvm/Support/raw_ostream.h"
14 #define DEBUG_TYPE "orc"
16 using namespace llvm;
17 using namespace llvm::orc;
19 template <typename ORCABI>
20 static bool stubAndPointerRangesOk(ExecutorAddr StubBlockAddr,
21 ExecutorAddr PointerBlockAddr,
22 unsigned NumStubs) {
23 constexpr unsigned MaxDisp = ORCABI::StubToPointerMaxDisplacement;
24 ExecutorAddr FirstStub = StubBlockAddr;
25 ExecutorAddr LastStub = FirstStub + ((NumStubs - 1) * ORCABI::StubSize);
26 ExecutorAddr FirstPointer = PointerBlockAddr;
27 ExecutorAddr LastPointer = FirstPointer + ((NumStubs - 1) * ORCABI::StubSize);
29 if (FirstStub < FirstPointer) {
30 if (LastStub >= FirstPointer)
31 return false; // Ranges overlap.
32 return (FirstPointer - FirstStub <= MaxDisp) &&
33 (LastPointer - LastStub <= MaxDisp); // out-of-range.
36 if (LastPointer >= FirstStub)
37 return false; // Ranges overlap.
39 return (FirstStub - FirstPointer <= MaxDisp) &&
40 (LastStub - LastPointer <= MaxDisp);
43 namespace llvm {
44 namespace orc {
46 void OrcAArch64::writeResolverCode(char *ResolverWorkingMem,
47 ExecutorAddr ResolverTargetAddress,
48 ExecutorAddr ReentryFnAddr,
49 ExecutorAddr ReentryCtxAddr) {
51 const uint32_t ResolverCode[] = {
52 // resolver_entry:
53 0xa9bf47fd, // 0x000: stp x29, x17, [sp, #-16]!
54 0x910003fd, // 0x004: mov x29, sp
55 0xa9bf73fb, // 0x008: stp x27, x28, [sp, #-16]!
56 0xa9bf6bf9, // 0x00c: stp x25, x26, [sp, #-16]!
57 0xa9bf63f7, // 0x010: stp x23, x24, [sp, #-16]!
58 0xa9bf5bf5, // 0x014: stp x21, x22, [sp, #-16]!
59 0xa9bf53f3, // 0x018: stp x19, x20, [sp, #-16]!
60 0xa9bf3fee, // 0x01c: stp x14, x15, [sp, #-16]!
61 0xa9bf37ec, // 0x020: stp x12, x13, [sp, #-16]!
62 0xa9bf2fea, // 0x024: stp x10, x11, [sp, #-16]!
63 0xa9bf27e8, // 0x028: stp x8, x9, [sp, #-16]!
64 0xa9bf1fe6, // 0x02c: stp x6, x7, [sp, #-16]!
65 0xa9bf17e4, // 0x030: stp x4, x5, [sp, #-16]!
66 0xa9bf0fe2, // 0x034: stp x2, x3, [sp, #-16]!
67 0xa9bf07e0, // 0x038: stp x0, x1, [sp, #-16]!
68 0xadbf7ffe, // 0x03c: stp q30, q31, [sp, #-32]!
69 0xadbf77fc, // 0x040: stp q28, q29, [sp, #-32]!
70 0xadbf6ffa, // 0x044: stp q26, q27, [sp, #-32]!
71 0xadbf67f8, // 0x048: stp q24, q25, [sp, #-32]!
72 0xadbf5ff6, // 0x04c: stp q22, q23, [sp, #-32]!
73 0xadbf57f4, // 0x050: stp q20, q21, [sp, #-32]!
74 0xadbf4ff2, // 0x054: stp q18, q19, [sp, #-32]!
75 0xadbf47f0, // 0x058: stp q16, q17, [sp, #-32]!
76 0xadbf3fee, // 0x05c: stp q14, q15, [sp, #-32]!
77 0xadbf37ec, // 0x060: stp q12, q13, [sp, #-32]!
78 0xadbf2fea, // 0x064: stp q10, q11, [sp, #-32]!
79 0xadbf27e8, // 0x068: stp q8, q9, [sp, #-32]!
80 0xadbf1fe6, // 0x06c: stp q6, q7, [sp, #-32]!
81 0xadbf17e4, // 0x070: stp q4, q5, [sp, #-32]!
82 0xadbf0fe2, // 0x074: stp q2, q3, [sp, #-32]!
83 0xadbf07e0, // 0x078: stp q0, q1, [sp, #-32]!
84 0x580004e0, // 0x07c: ldr x0, Lreentry_ctx_ptr
85 0xaa1e03e1, // 0x080: mov x1, x30
86 0xd1003021, // 0x084: sub x1, x1, #12
87 0x58000442, // 0x088: ldr x2, Lreentry_fn_ptr
88 0xd63f0040, // 0x08c: blr x2
89 0xaa0003f1, // 0x090: mov x17, x0
90 0xacc107e0, // 0x094: ldp q0, q1, [sp], #32
91 0xacc10fe2, // 0x098: ldp q2, q3, [sp], #32
92 0xacc117e4, // 0x09c: ldp q4, q5, [sp], #32
93 0xacc11fe6, // 0x0a0: ldp q6, q7, [sp], #32
94 0xacc127e8, // 0x0a4: ldp q8, q9, [sp], #32
95 0xacc12fea, // 0x0a8: ldp q10, q11, [sp], #32
96 0xacc137ec, // 0x0ac: ldp q12, q13, [sp], #32
97 0xacc13fee, // 0x0b0: ldp q14, q15, [sp], #32
98 0xacc147f0, // 0x0b4: ldp q16, q17, [sp], #32
99 0xacc14ff2, // 0x0b8: ldp q18, q19, [sp], #32
100 0xacc157f4, // 0x0bc: ldp q20, q21, [sp], #32
101 0xacc15ff6, // 0x0c0: ldp q22, q23, [sp], #32
102 0xacc167f8, // 0x0c4: ldp q24, q25, [sp], #32
103 0xacc16ffa, // 0x0c8: ldp q26, q27, [sp], #32
104 0xacc177fc, // 0x0cc: ldp q28, q29, [sp], #32
105 0xacc17ffe, // 0x0d0: ldp q30, q31, [sp], #32
106 0xa8c107e0, // 0x0d4: ldp x0, x1, [sp], #16
107 0xa8c10fe2, // 0x0d8: ldp x2, x3, [sp], #16
108 0xa8c117e4, // 0x0dc: ldp x4, x5, [sp], #16
109 0xa8c11fe6, // 0x0e0: ldp x6, x7, [sp], #16
110 0xa8c127e8, // 0x0e4: ldp x8, x9, [sp], #16
111 0xa8c12fea, // 0x0e8: ldp x10, x11, [sp], #16
112 0xa8c137ec, // 0x0ec: ldp x12, x13, [sp], #16
113 0xa8c13fee, // 0x0f0: ldp x14, x15, [sp], #16
114 0xa8c153f3, // 0x0f4: ldp x19, x20, [sp], #16
115 0xa8c15bf5, // 0x0f8: ldp x21, x22, [sp], #16
116 0xa8c163f7, // 0x0fc: ldp x23, x24, [sp], #16
117 0xa8c16bf9, // 0x100: ldp x25, x26, [sp], #16
118 0xa8c173fb, // 0x104: ldp x27, x28, [sp], #16
119 0xa8c17bfd, // 0x108: ldp x29, x30, [sp], #16
120 0xd65f0220, // 0x10c: ret x17
121 0x01234567, // 0x110: Lreentry_fn_ptr:
122 0xdeadbeef, // 0x114: .quad 0
123 0x98765432, // 0x118: Lreentry_ctx_ptr:
124 0xcafef00d // 0x11c: .quad 0
127 const unsigned ReentryFnAddrOffset = 0x110;
128 const unsigned ReentryCtxAddrOffset = 0x118;
130 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
131 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
132 sizeof(uint64_t));
133 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
134 sizeof(uint64_t));
137 void OrcAArch64::writeTrampolines(char *TrampolineBlockWorkingMem,
138 ExecutorAddr TrampolineBlockTargetAddress,
139 ExecutorAddr ResolverAddr,
140 unsigned NumTrampolines) {
142 unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
144 memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
145 sizeof(uint64_t));
147 // OffsetToPtr is actually the offset from the PC for the 2nd instruction, so
148 // subtract 32-bits.
149 OffsetToPtr -= 4;
151 uint32_t *Trampolines =
152 reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
154 for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
155 Trampolines[3 * I + 0] = 0xaa1e03f1; // mov x17, x30
156 Trampolines[3 * I + 1] = 0x58000010 | (OffsetToPtr << 3); // adr x16, Lptr
157 Trampolines[3 * I + 2] = 0xd63f0200; // blr x16
161 void OrcAArch64::writeIndirectStubsBlock(
162 char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
163 ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
164 // Stub format is:
166 // .section __orc_stubs
167 // stub1:
168 // ldr x16, ptr1 ; PC-rel load of ptr1
169 // br x16 ; Jump to resolver
170 // stub2:
171 // ldr x16, ptr2 ; PC-rel load of ptr2
172 // br x16 ; Jump to resolver
174 // ...
176 // .section __orc_ptrs
177 // ptr1:
178 // .quad 0x0
179 // ptr2:
180 // .quad 0x0
182 // ...
184 static_assert(StubSize == PointerSize,
185 "Pointer and stub size must match for algorithm below");
186 assert(stubAndPointerRangesOk<OrcAArch64>(
187 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
188 "PointersBlock is out of range");
189 uint64_t PtrDisplacement =
190 PointersBlockTargetAddress - StubsBlockTargetAddress;
191 assert((PtrDisplacement % 8 == 0) &&
192 "Displacement to pointer is not a multiple of 8");
193 uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
194 uint64_t PtrOffsetField = ((PtrDisplacement >> 2) & 0x7ffff) << 5;
196 for (unsigned I = 0; I < NumStubs; ++I)
197 Stub[I] = 0xd61f020058000010 | PtrOffsetField;
200 void OrcX86_64_Base::writeTrampolines(char *TrampolineBlockWorkingMem,
201 ExecutorAddr TrampolineBlockTargetAddress,
202 ExecutorAddr ResolverAddr,
203 unsigned NumTrampolines) {
205 unsigned OffsetToPtr = NumTrampolines * TrampolineSize;
207 memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
208 sizeof(uint64_t));
210 uint64_t *Trampolines =
211 reinterpret_cast<uint64_t *>(TrampolineBlockWorkingMem);
212 uint64_t CallIndirPCRel = 0xf1c40000000015ff;
214 for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize)
215 Trampolines[I] = CallIndirPCRel | ((OffsetToPtr - 6) << 16);
218 void OrcX86_64_Base::writeIndirectStubsBlock(
219 char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
220 ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
221 // Stub format is:
223 // .section __orc_stubs
224 // stub1:
225 // jmpq *ptr1(%rip)
226 // .byte 0xC4 ; <- Invalid opcode padding.
227 // .byte 0xF1
228 // stub2:
229 // jmpq *ptr2(%rip)
231 // ...
233 // .section __orc_ptrs
234 // ptr1:
235 // .quad 0x0
236 // ptr2:
237 // .quad 0x0
239 // ...
241 // Populate the stubs page stubs and mark it executable.
242 static_assert(StubSize == PointerSize,
243 "Pointer and stub size must match for algorithm below");
244 assert(stubAndPointerRangesOk<OrcX86_64_Base>(
245 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
246 "PointersBlock is out of range");
247 uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
248 uint64_t PtrOffsetField =
249 (PointersBlockTargetAddress - StubsBlockTargetAddress - 6) << 16;
250 for (unsigned I = 0; I < NumStubs; ++I)
251 Stub[I] = 0xF1C40000000025ff | PtrOffsetField;
254 void OrcX86_64_SysV::writeResolverCode(char *ResolverWorkingMem,
255 ExecutorAddr ResolverTargetAddress,
256 ExecutorAddr ReentryFnAddr,
257 ExecutorAddr ReentryCtxAddr) {
259 LLVM_DEBUG({
260 dbgs() << "Writing resolver code to "
261 << formatv("{0:x16}", ResolverTargetAddress) << "\n";
264 const uint8_t ResolverCode[] = {
265 // resolver_entry:
266 0x55, // 0x00: pushq %rbp
267 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp
268 0x50, // 0x04: pushq %rax
269 0x53, // 0x05: pushq %rbx
270 0x51, // 0x06: pushq %rcx
271 0x52, // 0x07: pushq %rdx
272 0x56, // 0x08: pushq %rsi
273 0x57, // 0x09: pushq %rdi
274 0x41, 0x50, // 0x0a: pushq %r8
275 0x41, 0x51, // 0x0c: pushq %r9
276 0x41, 0x52, // 0x0e: pushq %r10
277 0x41, 0x53, // 0x10: pushq %r11
278 0x41, 0x54, // 0x12: pushq %r12
279 0x41, 0x55, // 0x14: pushq %r13
280 0x41, 0x56, // 0x16: pushq %r14
281 0x41, 0x57, // 0x18: pushq %r15
282 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp
283 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp)
284 0x48, 0xbf, // 0x26: movabsq <CBMgr>, %rdi
286 // 0x28: JIT re-entry ctx addr.
287 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
289 0x48, 0x8b, 0x75, 0x08, // 0x30: movq 8(%rbp), %rsi
290 0x48, 0x83, 0xee, 0x06, // 0x34: subq $6, %rsi
291 0x48, 0xb8, // 0x38: movabsq <REntry>, %rax
293 // 0x3a: JIT re-entry fn addr:
294 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
296 0xff, 0xd0, // 0x42: callq *%rax
297 0x48, 0x89, 0x45, 0x08, // 0x44: movq %rax, 8(%rbp)
298 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x48: fxrstor64 (%rsp)
299 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x4d: addq 0x208, %rsp
300 0x41, 0x5f, // 0x54: popq %r15
301 0x41, 0x5e, // 0x56: popq %r14
302 0x41, 0x5d, // 0x58: popq %r13
303 0x41, 0x5c, // 0x5a: popq %r12
304 0x41, 0x5b, // 0x5c: popq %r11
305 0x41, 0x5a, // 0x5e: popq %r10
306 0x41, 0x59, // 0x60: popq %r9
307 0x41, 0x58, // 0x62: popq %r8
308 0x5f, // 0x64: popq %rdi
309 0x5e, // 0x65: popq %rsi
310 0x5a, // 0x66: popq %rdx
311 0x59, // 0x67: popq %rcx
312 0x5b, // 0x68: popq %rbx
313 0x58, // 0x69: popq %rax
314 0x5d, // 0x6a: popq %rbp
315 0xc3, // 0x6b: retq
318 const unsigned ReentryFnAddrOffset = 0x3a;
319 const unsigned ReentryCtxAddrOffset = 0x28;
321 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
322 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
323 sizeof(uint64_t));
324 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
325 sizeof(uint64_t));
328 void OrcX86_64_Win32::writeResolverCode(char *ResolverWorkingMem,
329 ExecutorAddr ResolverTargetAddress,
330 ExecutorAddr ReentryFnAddr,
331 ExecutorAddr ReentryCtxAddr) {
333 // resolverCode is similar to OrcX86_64 with differences specific to windows
334 // x64 calling convention: arguments go into rcx, rdx and come in reverse
335 // order, shadow space allocation on stack
336 const uint8_t ResolverCode[] = {
337 // resolver_entry:
338 0x55, // 0x00: pushq %rbp
339 0x48, 0x89, 0xe5, // 0x01: movq %rsp, %rbp
340 0x50, // 0x04: pushq %rax
341 0x53, // 0x05: pushq %rbx
342 0x51, // 0x06: pushq %rcx
343 0x52, // 0x07: pushq %rdx
344 0x56, // 0x08: pushq %rsi
345 0x57, // 0x09: pushq %rdi
346 0x41, 0x50, // 0x0a: pushq %r8
347 0x41, 0x51, // 0x0c: pushq %r9
348 0x41, 0x52, // 0x0e: pushq %r10
349 0x41, 0x53, // 0x10: pushq %r11
350 0x41, 0x54, // 0x12: pushq %r12
351 0x41, 0x55, // 0x14: pushq %r13
352 0x41, 0x56, // 0x16: pushq %r14
353 0x41, 0x57, // 0x18: pushq %r15
354 0x48, 0x81, 0xec, 0x08, 0x02, 0x00, 0x00, // 0x1a: subq 0x208, %rsp
355 0x48, 0x0f, 0xae, 0x04, 0x24, // 0x21: fxsave64 (%rsp)
357 0x48, 0xb9, // 0x26: movabsq <CBMgr>, %rcx
358 // 0x28: JIT re-entry ctx addr.
359 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
361 0x48, 0x8B, 0x55, 0x08, // 0x30: mov rdx, [rbp+0x8]
362 0x48, 0x83, 0xea, 0x06, // 0x34: sub rdx, 0x6
364 0x48, 0xb8, // 0x38: movabsq <REntry>, %rax
365 // 0x3a: JIT re-entry fn addr:
366 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
368 // 0x42: sub rsp, 0x20 (Allocate shadow space)
369 0x48, 0x83, 0xEC, 0x20,
370 0xff, 0xd0, // 0x46: callq *%rax
372 // 0x48: add rsp, 0x20 (Free shadow space)
373 0x48, 0x83, 0xC4, 0x20,
375 0x48, 0x89, 0x45, 0x08, // 0x4C: movq %rax, 8(%rbp)
376 0x48, 0x0f, 0xae, 0x0c, 0x24, // 0x50: fxrstor64 (%rsp)
377 0x48, 0x81, 0xc4, 0x08, 0x02, 0x00, 0x00, // 0x55: addq 0x208, %rsp
378 0x41, 0x5f, // 0x5C: popq %r15
379 0x41, 0x5e, // 0x5E: popq %r14
380 0x41, 0x5d, // 0x60: popq %r13
381 0x41, 0x5c, // 0x62: popq %r12
382 0x41, 0x5b, // 0x64: popq %r11
383 0x41, 0x5a, // 0x66: popq %r10
384 0x41, 0x59, // 0x68: popq %r9
385 0x41, 0x58, // 0x6a: popq %r8
386 0x5f, // 0x6c: popq %rdi
387 0x5e, // 0x6d: popq %rsi
388 0x5a, // 0x6e: popq %rdx
389 0x59, // 0x6f: popq %rcx
390 0x5b, // 0x70: popq %rbx
391 0x58, // 0x71: popq %rax
392 0x5d, // 0x72: popq %rbp
393 0xc3, // 0x73: retq
396 const unsigned ReentryFnAddrOffset = 0x3a;
397 const unsigned ReentryCtxAddrOffset = 0x28;
399 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
400 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
401 sizeof(uint64_t));
402 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
403 sizeof(uint64_t));
406 void OrcI386::writeResolverCode(char *ResolverWorkingMem,
407 ExecutorAddr ResolverTargetAddress,
408 ExecutorAddr ReentryFnAddr,
409 ExecutorAddr ReentryCtxAddr) {
411 assert((ReentryFnAddr.getValue() >> 32) == 0 && "ReentryFnAddr out of range");
412 assert((ReentryCtxAddr.getValue() >> 32) == 0 &&
413 "ReentryCtxAddr out of range");
415 const uint8_t ResolverCode[] = {
416 // resolver_entry:
417 0x55, // 0x00: pushl %ebp
418 0x89, 0xe5, // 0x01: movl %esp, %ebp
419 0x54, // 0x03: pushl %esp
420 0x83, 0xe4, 0xf0, // 0x04: andl $-0x10, %esp
421 0x50, // 0x07: pushl %eax
422 0x53, // 0x08: pushl %ebx
423 0x51, // 0x09: pushl %ecx
424 0x52, // 0x0a: pushl %edx
425 0x56, // 0x0b: pushl %esi
426 0x57, // 0x0c: pushl %edi
427 0x81, 0xec, 0x18, 0x02, 0x00, 0x00, // 0x0d: subl $0x218, %esp
428 0x0f, 0xae, 0x44, 0x24, 0x10, // 0x13: fxsave 0x10(%esp)
429 0x8b, 0x75, 0x04, // 0x18: movl 0x4(%ebp), %esi
430 0x83, 0xee, 0x05, // 0x1b: subl $0x5, %esi
431 0x89, 0x74, 0x24, 0x04, // 0x1e: movl %esi, 0x4(%esp)
432 0xc7, 0x04, 0x24, 0x00, 0x00, 0x00,
433 0x00, // 0x22: movl <cbmgr>, (%esp)
434 0xb8, 0x00, 0x00, 0x00, 0x00, // 0x29: movl <reentry>, %eax
435 0xff, 0xd0, // 0x2e: calll *%eax
436 0x89, 0x45, 0x04, // 0x30: movl %eax, 0x4(%ebp)
437 0x0f, 0xae, 0x4c, 0x24, 0x10, // 0x33: fxrstor 0x10(%esp)
438 0x81, 0xc4, 0x18, 0x02, 0x00, 0x00, // 0x38: addl $0x218, %esp
439 0x5f, // 0x3e: popl %edi
440 0x5e, // 0x3f: popl %esi
441 0x5a, // 0x40: popl %edx
442 0x59, // 0x41: popl %ecx
443 0x5b, // 0x42: popl %ebx
444 0x58, // 0x43: popl %eax
445 0x8b, 0x65, 0xfc, // 0x44: movl -0x4(%ebp), %esp
446 0x5d, // 0x48: popl %ebp
447 0xc3 // 0x49: retl
450 const unsigned ReentryFnAddrOffset = 0x2a;
451 const unsigned ReentryCtxAddrOffset = 0x25;
453 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
454 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
455 sizeof(uint32_t));
456 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
457 sizeof(uint32_t));
460 void OrcI386::writeTrampolines(char *TrampolineWorkingMem,
461 ExecutorAddr TrampolineBlockTargetAddress,
462 ExecutorAddr ResolverAddr,
463 unsigned NumTrampolines) {
464 assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range");
466 uint64_t CallRelImm = 0xF1C4C400000000e8;
467 uint64_t ResolverRel = ResolverAddr - TrampolineBlockTargetAddress - 5;
469 uint64_t *Trampolines = reinterpret_cast<uint64_t *>(TrampolineWorkingMem);
470 for (unsigned I = 0; I < NumTrampolines; ++I, ResolverRel -= TrampolineSize)
471 Trampolines[I] = CallRelImm | (ResolverRel << 8);
474 void OrcI386::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
475 ExecutorAddr StubsBlockTargetAddress,
476 ExecutorAddr PointersBlockTargetAddress,
477 unsigned NumStubs) {
478 assert((StubsBlockTargetAddress.getValue() >> 32) == 0 &&
479 "StubsBlockTargetAddress is out of range");
480 assert((PointersBlockTargetAddress.getValue() >> 32) == 0 &&
481 "PointersBlockTargetAddress is out of range");
483 // Stub format is:
485 // .section __orc_stubs
486 // stub1:
487 // jmpq *ptr1
488 // .byte 0xC4 ; <- Invalid opcode padding.
489 // .byte 0xF1
490 // stub2:
491 // jmpq *ptr2
493 // ...
495 // .section __orc_ptrs
496 // ptr1:
497 // .quad 0x0
498 // ptr2:
499 // .quad 0x0
501 // ...
503 assert(stubAndPointerRangesOk<OrcI386>(
504 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
505 "PointersBlock is out of range");
507 uint64_t *Stub = reinterpret_cast<uint64_t *>(StubsBlockWorkingMem);
508 uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
509 for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 4)
510 Stub[I] = 0xF1C40000000025ff | (PtrAddr << 16);
513 void OrcMips32_Base::writeResolverCode(char *ResolverWorkingMem,
514 ExecutorAddr ResolverTargetAddress,
515 ExecutorAddr ReentryFnAddr,
516 ExecutorAddr ReentryCtxAddr,
517 bool isBigEndian) {
519 const uint32_t ResolverCode[] = {
520 // resolver_entry:
521 0x27bdff98, // 0x00: addiu $sp,$sp,-104
522 0xafa20000, // 0x04: sw $v0,0($sp)
523 0xafa30004, // 0x08: sw $v1,4($sp)
524 0xafa40008, // 0x0c: sw $a0,8($sp)
525 0xafa5000c, // 0x10: sw $a1,12($sp)
526 0xafa60010, // 0x14: sw $a2,16($sp)
527 0xafa70014, // 0x18: sw $a3,20($sp)
528 0xafb00018, // 0x1c: sw $s0,24($sp)
529 0xafb1001c, // 0x20: sw $s1,28($sp)
530 0xafb20020, // 0x24: sw $s2,32($sp)
531 0xafb30024, // 0x28: sw $s3,36($sp)
532 0xafb40028, // 0x2c: sw $s4,40($sp)
533 0xafb5002c, // 0x30: sw $s5,44($sp)
534 0xafb60030, // 0x34: sw $s6,48($sp)
535 0xafb70034, // 0x38: sw $s7,52($sp)
536 0xafa80038, // 0x3c: sw $t0,56($sp)
537 0xafa9003c, // 0x40: sw $t1,60($sp)
538 0xafaa0040, // 0x44: sw $t2,64($sp)
539 0xafab0044, // 0x48: sw $t3,68($sp)
540 0xafac0048, // 0x4c: sw $t4,72($sp)
541 0xafad004c, // 0x50: sw $t5,76($sp)
542 0xafae0050, // 0x54: sw $t6,80($sp)
543 0xafaf0054, // 0x58: sw $t7,84($sp)
544 0xafb80058, // 0x5c: sw $t8,88($sp)
545 0xafb9005c, // 0x60: sw $t9,92($sp)
546 0xafbe0060, // 0x64: sw $fp,96($sp)
547 0xafbf0064, // 0x68: sw $ra,100($sp)
549 // JIT re-entry ctx addr.
550 0x00000000, // 0x6c: lui $a0,ctx
551 0x00000000, // 0x70: addiu $a0,$a0,ctx
553 0x03e02825, // 0x74: move $a1, $ra
554 0x24a5ffec, // 0x78: addiu $a1,$a1,-20
556 // JIT re-entry fn addr:
557 0x00000000, // 0x7c: lui $t9,reentry
558 0x00000000, // 0x80: addiu $t9,$t9,reentry
560 0x0320f809, // 0x84: jalr $t9
561 0x00000000, // 0x88: nop
562 0x8fbf0064, // 0x8c: lw $ra,100($sp)
563 0x8fbe0060, // 0x90: lw $fp,96($sp)
564 0x8fb9005c, // 0x94: lw $t9,92($sp)
565 0x8fb80058, // 0x98: lw $t8,88($sp)
566 0x8faf0054, // 0x9c: lw $t7,84($sp)
567 0x8fae0050, // 0xa0: lw $t6,80($sp)
568 0x8fad004c, // 0xa4: lw $t5,76($sp)
569 0x8fac0048, // 0xa8: lw $t4,72($sp)
570 0x8fab0044, // 0xac: lw $t3,68($sp)
571 0x8faa0040, // 0xb0: lw $t2,64($sp)
572 0x8fa9003c, // 0xb4: lw $t1,60($sp)
573 0x8fa80038, // 0xb8: lw $t0,56($sp)
574 0x8fb70034, // 0xbc: lw $s7,52($sp)
575 0x8fb60030, // 0xc0: lw $s6,48($sp)
576 0x8fb5002c, // 0xc4: lw $s5,44($sp)
577 0x8fb40028, // 0xc8: lw $s4,40($sp)
578 0x8fb30024, // 0xcc: lw $s3,36($sp)
579 0x8fb20020, // 0xd0: lw $s2,32($sp)
580 0x8fb1001c, // 0xd4: lw $s1,28($sp)
581 0x8fb00018, // 0xd8: lw $s0,24($sp)
582 0x8fa70014, // 0xdc: lw $a3,20($sp)
583 0x8fa60010, // 0xe0: lw $a2,16($sp)
584 0x8fa5000c, // 0xe4: lw $a1,12($sp)
585 0x8fa40008, // 0xe8: lw $a0,8($sp)
586 0x27bd0068, // 0xec: addiu $sp,$sp,104
587 0x0300f825, // 0xf0: move $ra, $t8
588 0x03200008, // 0xf4: jr $t9
589 0x00000000, // 0xf8: move $t9, $v0/v1
592 const unsigned ReentryFnAddrOffset = 0x7c; // JIT re-entry fn addr lui
593 const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry context addr lui
594 const unsigned Offsett = 0xf8;
596 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
598 // Depending on endian return value will be in v0 or v1.
599 uint32_t MoveVxT9 = isBigEndian ? 0x0060c825 : 0x0040c825;
600 memcpy(ResolverWorkingMem + Offsett, &MoveVxT9, sizeof(MoveVxT9));
602 uint32_t ReentryCtxLUi =
603 0x3c040000 | (((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
604 uint32_t ReentryCtxADDiu = 0x24840000 | (ReentryCtxAddr.getValue() & 0xFFFF);
605 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi,
606 sizeof(ReentryCtxLUi));
607 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset + 4, &ReentryCtxADDiu,
608 sizeof(ReentryCtxADDiu));
610 uint32_t ReentryFnLUi =
611 0x3c190000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
612 uint32_t ReentryFnADDiu = 0x27390000 | (ReentryFnAddr.getValue() & 0xFFFF);
613 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi,
614 sizeof(ReentryFnLUi));
615 memcpy(ResolverWorkingMem + ReentryFnAddrOffset + 4, &ReentryFnADDiu,
616 sizeof(ReentryFnADDiu));
619 void OrcMips32_Base::writeTrampolines(char *TrampolineBlockWorkingMem,
620 ExecutorAddr TrampolineBlockTargetAddress,
621 ExecutorAddr ResolverAddr,
622 unsigned NumTrampolines) {
624 assert((ResolverAddr.getValue() >> 32) == 0 && "ResolverAddr out of range");
626 uint32_t *Trampolines =
627 reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
628 uint32_t RHiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16);
630 for (unsigned I = 0; I < NumTrampolines; ++I) {
631 // move $t8,$ra
632 // lui $t9,ResolverAddr
633 // addiu $t9,$t9,ResolverAddr
634 // jalr $t9
635 // nop
636 Trampolines[5 * I + 0] = 0x03e0c025;
637 Trampolines[5 * I + 1] = 0x3c190000 | (RHiAddr & 0xFFFF);
638 Trampolines[5 * I + 2] = 0x27390000 | (ResolverAddr.getValue() & 0xFFFF);
639 Trampolines[5 * I + 3] = 0x0320f809;
640 Trampolines[5 * I + 4] = 0x00000000;
644 void OrcMips32_Base::writeIndirectStubsBlock(
645 char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
646 ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
647 assert((StubsBlockTargetAddress.getValue() >> 32) == 0 &&
648 "InitialPtrVal is out of range");
650 // Stub format is:
652 // .section __orc_stubs
653 // stub1:
654 // lui $t9, ptr1
655 // lw $t9, %lo(ptr1)($t9)
656 // jr $t9
657 // stub2:
658 // lui $t9, ptr2
659 // lw $t9,%lo(ptr1)($t9)
660 // jr $t9
662 // ...
664 // .section __orc_ptrs
665 // ptr1:
666 // .word 0x0
667 // ptr2:
668 // .word 0x0
670 // i..
672 assert(stubAndPointerRangesOk<OrcMips32_Base>(
673 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
674 "PointersBlock is out of range");
676 // Populate the stubs page stubs and mark it executable.
677 uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
678 uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
680 for (unsigned I = 0; I < NumStubs; ++I) {
681 uint32_t HiAddr = ((PtrAddr + 0x8000) >> 16);
682 Stub[4 * I + 0] = 0x3c190000 | (HiAddr & 0xFFFF); // lui $t9,ptr1
683 Stub[4 * I + 1] = 0x8f390000 | (PtrAddr & 0xFFFF); // lw $t9,%lo(ptr1)($t9)
684 Stub[4 * I + 2] = 0x03200008; // jr $t9
685 Stub[4 * I + 3] = 0x00000000; // nop
686 PtrAddr += 4;
690 void OrcMips64::writeResolverCode(char *ResolverWorkingMem,
691 ExecutorAddr ResolverTargetAddress,
692 ExecutorAddr ReentryFnAddr,
693 ExecutorAddr ReentryCtxAddr) {
695 const uint32_t ResolverCode[] = {
696 //resolver_entry:
697 0x67bdff30, // 0x00: daddiu $sp,$sp,-208
698 0xffa20000, // 0x04: sd v0,0(sp)
699 0xffa30008, // 0x08: sd v1,8(sp)
700 0xffa40010, // 0x0c: sd a0,16(sp)
701 0xffa50018, // 0x10: sd a1,24(sp)
702 0xffa60020, // 0x14: sd a2,32(sp)
703 0xffa70028, // 0x18: sd a3,40(sp)
704 0xffa80030, // 0x1c: sd a4,48(sp)
705 0xffa90038, // 0x20: sd a5,56(sp)
706 0xffaa0040, // 0x24: sd a6,64(sp)
707 0xffab0048, // 0x28: sd a7,72(sp)
708 0xffac0050, // 0x2c: sd t0,80(sp)
709 0xffad0058, // 0x30: sd t1,88(sp)
710 0xffae0060, // 0x34: sd t2,96(sp)
711 0xffaf0068, // 0x38: sd t3,104(sp)
712 0xffb00070, // 0x3c: sd s0,112(sp)
713 0xffb10078, // 0x40: sd s1,120(sp)
714 0xffb20080, // 0x44: sd s2,128(sp)
715 0xffb30088, // 0x48: sd s3,136(sp)
716 0xffb40090, // 0x4c: sd s4,144(sp)
717 0xffb50098, // 0x50: sd s5,152(sp)
718 0xffb600a0, // 0x54: sd s6,160(sp)
719 0xffb700a8, // 0x58: sd s7,168(sp)
720 0xffb800b0, // 0x5c: sd t8,176(sp)
721 0xffb900b8, // 0x60: sd t9,184(sp)
722 0xffbe00c0, // 0x64: sd fp,192(sp)
723 0xffbf00c8, // 0x68: sd ra,200(sp)
725 // JIT re-entry ctx addr.
726 0x00000000, // 0x6c: lui $a0,heighest(ctx)
727 0x00000000, // 0x70: daddiu $a0,$a0,heigher(ctx)
728 0x00000000, // 0x74: dsll $a0,$a0,16
729 0x00000000, // 0x78: daddiu $a0,$a0,hi(ctx)
730 0x00000000, // 0x7c: dsll $a0,$a0,16
731 0x00000000, // 0x80: daddiu $a0,$a0,lo(ctx)
733 0x03e02825, // 0x84: move $a1, $ra
734 0x64a5ffdc, // 0x88: daddiu $a1,$a1,-36
736 // JIT re-entry fn addr:
737 0x00000000, // 0x8c: lui $t9,reentry
738 0x00000000, // 0x90: daddiu $t9,$t9,reentry
739 0x00000000, // 0x94: dsll $t9,$t9,
740 0x00000000, // 0x98: daddiu $t9,$t9,
741 0x00000000, // 0x9c: dsll $t9,$t9,
742 0x00000000, // 0xa0: daddiu $t9,$t9,
743 0x0320f809, // 0xa4: jalr $t9
744 0x00000000, // 0xa8: nop
745 0xdfbf00c8, // 0xac: ld ra, 200(sp)
746 0xdfbe00c0, // 0xb0: ld fp, 192(sp)
747 0xdfb900b8, // 0xb4: ld t9, 184(sp)
748 0xdfb800b0, // 0xb8: ld t8, 176(sp)
749 0xdfb700a8, // 0xbc: ld s7, 168(sp)
750 0xdfb600a0, // 0xc0: ld s6, 160(sp)
751 0xdfb50098, // 0xc4: ld s5, 152(sp)
752 0xdfb40090, // 0xc8: ld s4, 144(sp)
753 0xdfb30088, // 0xcc: ld s3, 136(sp)
754 0xdfb20080, // 0xd0: ld s2, 128(sp)
755 0xdfb10078, // 0xd4: ld s1, 120(sp)
756 0xdfb00070, // 0xd8: ld s0, 112(sp)
757 0xdfaf0068, // 0xdc: ld t3, 104(sp)
758 0xdfae0060, // 0xe0: ld t2, 96(sp)
759 0xdfad0058, // 0xe4: ld t1, 88(sp)
760 0xdfac0050, // 0xe8: ld t0, 80(sp)
761 0xdfab0048, // 0xec: ld a7, 72(sp)
762 0xdfaa0040, // 0xf0: ld a6, 64(sp)
763 0xdfa90038, // 0xf4: ld a5, 56(sp)
764 0xdfa80030, // 0xf8: ld a4, 48(sp)
765 0xdfa70028, // 0xfc: ld a3, 40(sp)
766 0xdfa60020, // 0x100: ld a2, 32(sp)
767 0xdfa50018, // 0x104: ld a1, 24(sp)
768 0xdfa40010, // 0x108: ld a0, 16(sp)
769 0xdfa30008, // 0x10c: ld v1, 8(sp)
770 0x67bd00d0, // 0x110: daddiu $sp,$sp,208
771 0x0300f825, // 0x114: move $ra, $t8
772 0x03200008, // 0x118: jr $t9
773 0x0040c825, // 0x11c: move $t9, $v0
776 const unsigned ReentryFnAddrOffset = 0x8c; // JIT re-entry fn addr lui
777 const unsigned ReentryCtxAddrOffset = 0x6c; // JIT re-entry ctx addr lui
779 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
781 uint32_t ReentryCtxLUi =
782 0x3c040000 |
783 (((ReentryCtxAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF);
784 uint32_t ReentryCtxDADDiu =
785 0x64840000 | (((ReentryCtxAddr.getValue() + 0x80008000) >> 32) & 0xFFFF);
786 uint32_t ReentryCtxDSLL = 0x00042438;
787 uint32_t ReentryCtxDADDiu2 =
788 0x64840000 | ((((ReentryCtxAddr.getValue() + 0x8000) >> 16) & 0xFFFF));
789 uint32_t ReentryCtxDSLL2 = 0x00042438;
790 uint32_t ReentryCtxDADDiu3 =
791 0x64840000 | (ReentryCtxAddr.getValue() & 0xFFFF);
793 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxLUi,
794 sizeof(ReentryCtxLUi));
795 memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 4), &ReentryCtxDADDiu,
796 sizeof(ReentryCtxDADDiu));
797 memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 8), &ReentryCtxDSLL,
798 sizeof(ReentryCtxDSLL));
799 memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 12), &ReentryCtxDADDiu2,
800 sizeof(ReentryCtxDADDiu2));
801 memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 16), &ReentryCtxDSLL2,
802 sizeof(ReentryCtxDSLL2));
803 memcpy(ResolverWorkingMem + (ReentryCtxAddrOffset + 20), &ReentryCtxDADDiu3,
804 sizeof(ReentryCtxDADDiu3));
806 uint32_t ReentryFnLUi =
807 0x3c190000 |
808 (((ReentryFnAddr.getValue() + 0x800080008000) >> 48) & 0xFFFF);
810 uint32_t ReentryFnDADDiu =
811 0x67390000 | (((ReentryFnAddr.getValue() + 0x80008000) >> 32) & 0xFFFF);
813 uint32_t ReentryFnDSLL = 0x0019cc38;
815 uint32_t ReentryFnDADDiu2 =
816 0x67390000 | (((ReentryFnAddr.getValue() + 0x8000) >> 16) & 0xFFFF);
818 uint32_t ReentryFnDSLL2 = 0x0019cc38;
820 uint32_t ReentryFnDADDiu3 = 0x67390000 | (ReentryFnAddr.getValue() & 0xFFFF);
822 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnLUi,
823 sizeof(ReentryFnLUi));
824 memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 4), &ReentryFnDADDiu,
825 sizeof(ReentryFnDADDiu));
826 memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 8), &ReentryFnDSLL,
827 sizeof(ReentryFnDSLL));
828 memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 12), &ReentryFnDADDiu2,
829 sizeof(ReentryFnDADDiu2));
830 memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 16), &ReentryFnDSLL2,
831 sizeof(ReentryFnDSLL2));
832 memcpy(ResolverWorkingMem + (ReentryFnAddrOffset + 20), &ReentryFnDADDiu3,
833 sizeof(ReentryFnDADDiu3));
836 void OrcMips64::writeTrampolines(char *TrampolineBlockWorkingMem,
837 ExecutorAddr TrampolineBlockTargetAddress,
838 ExecutorAddr ResolverAddr,
839 unsigned NumTrampolines) {
841 uint32_t *Trampolines =
842 reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
844 uint64_t HeighestAddr = ((ResolverAddr.getValue() + 0x800080008000) >> 48);
845 uint64_t HeigherAddr = ((ResolverAddr.getValue() + 0x80008000) >> 32);
846 uint64_t HiAddr = ((ResolverAddr.getValue() + 0x8000) >> 16);
848 for (unsigned I = 0; I < NumTrampolines; ++I) {
849 Trampolines[10 * I + 0] = 0x03e0c025; // move $t8,$ra
850 Trampolines[10 * I + 1] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,resolveAddr
851 Trampolines[10 * I + 2] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(resolveAddr)
852 Trampolines[10 * I + 3] = 0x0019cc38; // dsll $t9,$t9,16
853 Trampolines[10 * I + 4] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr)
854 Trampolines[10 * I + 5] = 0x0019cc38; // dsll $t9,$t9,16
855 Trampolines[10 * I + 6] = 0x67390000 | (ResolverAddr.getValue() &
856 0xFFFF); // daddiu $t9,$t9,%lo(ptr)
857 Trampolines[10 * I + 7] = 0x0320f809; // jalr $t9
858 Trampolines[10 * I + 8] = 0x00000000; // nop
859 Trampolines[10 * I + 9] = 0x00000000; // nop
863 void OrcMips64::writeIndirectStubsBlock(char *StubsBlockWorkingMem,
864 ExecutorAddr StubsBlockTargetAddress,
865 ExecutorAddr PointersBlockTargetAddress,
866 unsigned NumStubs) {
867 // Stub format is:
869 // .section __orc_stubs
870 // stub1:
871 // lui $t9,ptr1
872 // dsll $t9,$t9,16
873 // daddiu $t9,$t9,%hi(ptr)
874 // dsll $t9,$t9,16
875 // ld $t9,%lo(ptr)
876 // jr $t9
877 // stub2:
878 // lui $t9,ptr1
879 // dsll $t9,$t9,16
880 // daddiu $t9,$t9,%hi(ptr)
881 // dsll $t9,$t9,16
882 // ld $t9,%lo(ptr)
883 // jr $t9
885 // ...
887 // .section __orc_ptrs
888 // ptr1:
889 // .dword 0x0
890 // ptr2:
891 // .dword 0x0
893 // ...
895 assert(stubAndPointerRangesOk<OrcMips64>(
896 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
897 "PointersBlock is out of range");
899 // Populate the stubs page stubs and mark it executable.
900 uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
901 uint64_t PtrAddr = PointersBlockTargetAddress.getValue();
903 for (unsigned I = 0; I < NumStubs; ++I, PtrAddr += 8) {
904 uint64_t HeighestAddr = ((PtrAddr + 0x800080008000) >> 48);
905 uint64_t HeigherAddr = ((PtrAddr + 0x80008000) >> 32);
906 uint64_t HiAddr = ((PtrAddr + 0x8000) >> 16);
907 Stub[8 * I + 0] = 0x3c190000 | (HeighestAddr & 0xFFFF); // lui $t9,ptr1
908 Stub[8 * I + 1] = 0x67390000 | (HeigherAddr & 0xFFFF); // daddiu $t9,$t9,%higher(ptr)
909 Stub[8 * I + 2] = 0x0019cc38; // dsll $t9,$t9,16
910 Stub[8 * I + 3] = 0x67390000 | (HiAddr & 0xFFFF); // daddiu $t9,$t9,%hi(ptr)
911 Stub[8 * I + 4] = 0x0019cc38; // dsll $t9,$t9,16
912 Stub[8 * I + 5] = 0xdf390000 | (PtrAddr & 0xFFFF); // ld $t9,%lo(ptr)
913 Stub[8 * I + 6] = 0x03200008; // jr $t9
914 Stub[8 * I + 7] = 0x00000000; // nop
918 void OrcRiscv64::writeResolverCode(char *ResolverWorkingMem,
919 ExecutorAddr ResolverTargetAddress,
920 ExecutorAddr ReentryFnAddr,
921 ExecutorAddr ReentryCtxAddr) {
923 const uint32_t ResolverCode[] = {
924 0xef810113, // 0x00: addi sp,sp,-264
925 0x00813023, // 0x04: sd s0,0(sp)
926 0x00913423, // 0x08: sd s1,8(sp)
927 0x01213823, // 0x0c: sd s2,16(sp)
928 0x01313c23, // 0x10: sd s3,24(sp)
929 0x03413023, // 0x14: sd s4,32(sp)
930 0x03513423, // 0x18: sd s5,40(sp)
931 0x03613823, // 0x1c: sd s6,48(sp)
932 0x03713c23, // 0x20: sd s7,56(sp)
933 0x05813023, // 0x24: sd s8,64(sp)
934 0x05913423, // 0x28: sd s9,72(sp)
935 0x05a13823, // 0x2c: sd s10,80(sp)
936 0x05b13c23, // 0x30: sd s11,88(sp)
937 0x06113023, // 0x34: sd ra,96(sp)
938 0x06a13423, // 0x38: sd a0,104(sp)
939 0x06b13823, // 0x3c: sd a1,112(sp)
940 0x06c13c23, // 0x40: sd a2,120(sp)
941 0x08d13023, // 0x44: sd a3,128(sp)
942 0x08e13423, // 0x48: sd a4,136(sp)
943 0x08f13823, // 0x4c: sd a5,144(sp)
944 0x09013c23, // 0x50: sd a6,152(sp)
945 0x0b113023, // 0x54: sd a7,160(sp)
946 0x0a813427, // 0x58: fsd fs0,168(sp)
947 0x0a913827, // 0x5c: fsd fs1,176(sp)
948 0x0b213c27, // 0x60: fsd fs2,184(sp)
949 0x0d313027, // 0x64: fsd fs3,192(sp)
950 0x0d413427, // 0x68: fsd fs4,200(sp)
951 0x0d513827, // 0x6c: fsd fs5,208(sp)
952 0x0d613c27, // 0x70: fsd fs6,216(sp)
953 0x0f713027, // 0x74: fsd fs7,224(sp)
954 0x0f813427, // 0x78: fsd fs8,232(sp)
955 0x0f913827, // 0x7c: fsd fs9,240(sp)
956 0x0fa13c27, // 0x80: fsd fs10,248(sp)
957 0x11b13027, // 0x84: fsd fs11,256(sp)
958 0x00000517, // 0x88: auipc a0,0x0
959 0x0b053503, // 0x8c: ld a0,176(a0) # 0x138
960 0x00030593, // 0x90: mv a1,t1
961 0xff458593, // 0x94: addi a1,a1,-12
962 0x00000617, // 0x98: auipc a2,0x0
963 0x0a863603, // 0x9c: ld a2,168(a2) # 0x140
964 0x000600e7, // 0xa0: jalr a2
965 0x00050293, // 0xa4: mv t0,a0
966 0x00013403, // 0xa8: ld s0,0(sp)
967 0x00813483, // 0xac: ld s1,8(sp)
968 0x01013903, // 0xb0: ld s2,16(sp)
969 0x01813983, // 0xb4: ld s3,24(sp)
970 0x02013a03, // 0xb8: ld s4,32(sp)
971 0x02813a83, // 0xbc: ld s5,40(sp)
972 0x03013b03, // 0xc0: ld s6,48(sp)
973 0x03813b83, // 0xc4: ld s7,56(sp)
974 0x04013c03, // 0xc8: ld s8,64(sp)
975 0x04813c83, // 0xcc: ld s9,72(sp)
976 0x05013d03, // 0xd0: ld s10,80(sp)
977 0x05813d83, // 0xd4: ld s11,88(sp)
978 0x06013083, // 0xd8: ld ra,96(sp)
979 0x06813503, // 0xdc: ld a0,104(sp)
980 0x07013583, // 0xe0: ld a1,112(sp)
981 0x07813603, // 0xe4: ld a2,120(sp)
982 0x08013683, // 0xe8: ld a3,128(sp)
983 0x08813703, // 0xec: ld a4,136(sp)
984 0x09013783, // 0xf0: ld a5,144(sp)
985 0x09813803, // 0xf4: ld a6,152(sp)
986 0x0a013883, // 0xf8: ld a7,160(sp)
987 0x0a813407, // 0xfc: fld fs0,168(sp)
988 0x0b013487, // 0x100: fld fs1,176(sp)
989 0x0b813907, // 0x104: fld fs2,184(sp)
990 0x0c013987, // 0x108: fld fs3,192(sp)
991 0x0c813a07, // 0x10c: fld fs4,200(sp)
992 0x0d013a87, // 0x110: fld fs5,208(sp)
993 0x0d813b07, // 0x114: fld fs6,216(sp)
994 0x0e013b87, // 0x118: fld fs7,224(sp)
995 0x0e813c07, // 0x11c: fld fs8,232(sp)
996 0x0f013c87, // 0x120: fld fs9,240(sp)
997 0x0f813d07, // 0x124: fld fs10,248(sp)
998 0x10013d87, // 0x128: fld fs11,256(sp)
999 0x10810113, // 0x12c: addi sp,sp,264
1000 0x00028067, // 0x130: jr t0
1001 0x12345678, // 0x134: padding to align at 8 byte
1002 0x12345678, // 0x138: Lreentry_ctx_ptr:
1003 0xdeadbeef, // 0x13c: .quad 0
1004 0x98765432, // 0x140: Lreentry_fn_ptr:
1005 0xcafef00d // 0x144: .quad 0
1008 const unsigned ReentryCtxAddrOffset = 0x138;
1009 const unsigned ReentryFnAddrOffset = 0x140;
1011 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
1012 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
1013 sizeof(uint64_t));
1014 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
1015 sizeof(uint64_t));
1018 void OrcRiscv64::writeTrampolines(char *TrampolineBlockWorkingMem,
1019 ExecutorAddr TrampolineBlockTargetAddress,
1020 ExecutorAddr ResolverAddr,
1021 unsigned NumTrampolines) {
1023 unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
1025 memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
1026 sizeof(uint64_t));
1028 uint32_t *Trampolines =
1029 reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
1030 for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
1031 uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xFFFFF000;
1032 uint32_t Lo12 = OffsetToPtr - Hi20;
1033 Trampolines[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
1034 Trampolines[4 * I + 1] =
1035 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
1036 Trampolines[4 * I + 2] = 0x00028367; // jalr t1, t0
1037 Trampolines[4 * I + 3] = 0xdeadface; // padding
1041 void OrcRiscv64::writeIndirectStubsBlock(
1042 char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
1043 ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
1044 // Stub format is:
1046 // .section __orc_stubs
1047 // stub1:
1048 // auipc t0, %hi(ptr1) ; PC-rel load of ptr1
1049 // ld t0, %lo(t0)
1050 // jr t0 ; Jump to resolver
1051 // .quad 0 ; Pad to 16 bytes
1052 // stub2:
1053 // auipc t0, %hi(ptr1) ; PC-rel load of ptr1
1054 // ld t0, %lo(t0)
1055 // jr t0 ; Jump to resolver
1056 // .quad 0
1058 // ...
1060 // .section __orc_ptrs
1061 // ptr1:
1062 // .quad 0x0
1063 // ptr2:
1064 // .quad 0x0
1066 // ...
1068 assert(stubAndPointerRangesOk<OrcRiscv64>(
1069 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
1070 "PointersBlock is out of range");
1072 uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
1074 for (unsigned I = 0; I < NumStubs; ++I) {
1075 uint64_t PtrDisplacement =
1076 PointersBlockTargetAddress - StubsBlockTargetAddress;
1077 uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xFFFFF000;
1078 uint32_t Lo12 = PtrDisplacement - Hi20;
1079 Stub[4 * I + 0] = 0x00000297 | Hi20; // auipc t0, %hi(Lptr)
1080 Stub[4 * I + 1] = 0x0002b283 | ((Lo12 & 0xFFF) << 20); // ld t0, %lo(Lptr)
1081 Stub[4 * I + 2] = 0x00028067; // jr t0
1082 Stub[4 * I + 3] = 0xfeedbeef; // padding
1083 PointersBlockTargetAddress += PointerSize;
1084 StubsBlockTargetAddress += StubSize;
1088 void OrcLoongArch64::writeResolverCode(char *ResolverWorkingMem,
1089 ExecutorAddr ResolverTargetAddress,
1090 ExecutorAddr ReentryFnAddr,
1091 ExecutorAddr ReentryCtxAddr) {
1093 LLVM_DEBUG({
1094 dbgs() << "Writing resolver code to "
1095 << formatv("{0:x16}", ResolverTargetAddress) << "\n";
1098 const uint32_t ResolverCode[] = {
1099 0x02fde063, // 0x0: addi.d $sp, $sp, -136(0xf78)
1100 0x29c00061, // 0x4: st.d $ra, $sp, 0
1101 0x29c02064, // 0x8: st.d $a0, $sp, 8(0x8)
1102 0x29c04065, // 0xc: st.d $a1, $sp, 16(0x10)
1103 0x29c06066, // 0x10: st.d $a2, $sp, 24(0x18)
1104 0x29c08067, // 0x14: st.d $a3, $sp, 32(0x20)
1105 0x29c0a068, // 0x18: st.d $a4, $sp, 40(0x28)
1106 0x29c0c069, // 0x1c: st.d $a5, $sp, 48(0x30)
1107 0x29c0e06a, // 0x20: st.d $a6, $sp, 56(0x38)
1108 0x29c1006b, // 0x24: st.d $a7, $sp, 64(0x40)
1109 0x2bc12060, // 0x28: fst.d $fa0, $sp, 72(0x48)
1110 0x2bc14061, // 0x2c: fst.d $fa1, $sp, 80(0x50)
1111 0x2bc16062, // 0x30: fst.d $fa2, $sp, 88(0x58)
1112 0x2bc18063, // 0x34: fst.d $fa3, $sp, 96(0x60)
1113 0x2bc1a064, // 0x38: fst.d $fa4, $sp, 104(0x68)
1114 0x2bc1c065, // 0x3c: fst.d $fa5, $sp, 112(0x70)
1115 0x2bc1e066, // 0x40: fst.d $fa6, $sp, 120(0x78)
1116 0x2bc20067, // 0x44: fst.d $fa7, $sp, 128(0x80)
1117 0x1c000004, // 0x48: pcaddu12i $a0, 0
1118 0x28c1c084, // 0x4c: ld.d $a0, $a0, 112(0x70)
1119 0x001501a5, // 0x50: move $a1, $t1
1120 0x02ffd0a5, // 0x54: addi.d $a1, $a1, -12(0xff4)
1121 0x1c000006, // 0x58: pcaddu12i $a2, 0
1122 0x28c1a0c6, // 0x5c: ld.d $a2, $a2, 104(0x68)
1123 0x4c0000c1, // 0x60: jirl $ra, $a2, 0
1124 0x0015008c, // 0x64: move $t0, $a0
1125 0x2b820067, // 0x68: fld.d $fa7, $sp, 128(0x80)
1126 0x2b81e066, // 0x6c: fld.d $fa6, $sp, 120(0x78)
1127 0x2b81c065, // 0x70: fld.d $fa5, $sp, 112(0x70)
1128 0x2b81a064, // 0x74: fld.d $fa4, $sp, 104(0x68)
1129 0x2b818063, // 0x78: fld.d $fa3, $sp, 96(0x60)
1130 0x2b816062, // 0x7c: fld.d $fa2, $sp, 88(0x58)
1131 0x2b814061, // 0x80: fld.d $fa1, $sp, 80(0x50)
1132 0x2b812060, // 0x84: fld.d $fa0, $sp, 72(0x48)
1133 0x28c1006b, // 0x88: ld.d $a7, $sp, 64(0x40)
1134 0x28c0e06a, // 0x8c: ld.d $a6, $sp, 56(0x38)
1135 0x28c0c069, // 0x90: ld.d $a5, $sp, 48(0x30)
1136 0x28c0a068, // 0x94: ld.d $a4, $sp, 40(0x28)
1137 0x28c08067, // 0x98: ld.d $a3, $sp, 32(0x20)
1138 0x28c06066, // 0x9c: ld.d $a2, $sp, 24(0x18)
1139 0x28c04065, // 0xa0: ld.d $a1, $sp, 16(0x10)
1140 0x28c02064, // 0xa4: ld.d $a0, $sp, 8(0x8)
1141 0x28c00061, // 0xa8: ld.d $ra, $sp, 0
1142 0x02c22063, // 0xac: addi.d $sp, $sp, 136(0x88)
1143 0x4c000180, // 0xb0: jr $t0
1144 0x00000000, // 0xb4: padding to align at 8 bytes
1145 0x01234567, // 0xb8: Lreentry_ctx_ptr:
1146 0xdeedbeef, // 0xbc: .dword 0
1147 0x98765432, // 0xc0: Lreentry_fn_ptr:
1148 0xcafef00d, // 0xc4: .dword 0
1151 const unsigned ReentryCtxAddrOffset = 0xb8;
1152 const unsigned ReentryFnAddrOffset = 0xc0;
1154 memcpy(ResolverWorkingMem, ResolverCode, sizeof(ResolverCode));
1155 memcpy(ResolverWorkingMem + ReentryFnAddrOffset, &ReentryFnAddr,
1156 sizeof(uint64_t));
1157 memcpy(ResolverWorkingMem + ReentryCtxAddrOffset, &ReentryCtxAddr,
1158 sizeof(uint64_t));
1161 void OrcLoongArch64::writeTrampolines(char *TrampolineBlockWorkingMem,
1162 ExecutorAddr TrampolineBlockTargetAddress,
1163 ExecutorAddr ResolverAddr,
1164 unsigned NumTrampolines) {
1166 LLVM_DEBUG({
1167 dbgs() << "Writing trampoline code to "
1168 << formatv("{0:x16}", TrampolineBlockTargetAddress) << "\n";
1171 unsigned OffsetToPtr = alignTo(NumTrampolines * TrampolineSize, 8);
1173 memcpy(TrampolineBlockWorkingMem + OffsetToPtr, &ResolverAddr,
1174 sizeof(uint64_t));
1176 uint32_t *Trampolines =
1177 reinterpret_cast<uint32_t *>(TrampolineBlockWorkingMem);
1178 for (unsigned I = 0; I < NumTrampolines; ++I, OffsetToPtr -= TrampolineSize) {
1179 uint32_t Hi20 = (OffsetToPtr + 0x800) & 0xfffff000;
1180 uint32_t Lo12 = OffsetToPtr - Hi20;
1181 Trampolines[4 * I + 0] =
1182 0x1c00000c |
1183 (((Hi20 >> 12) & 0xfffff) << 5); // pcaddu12i $t0, %pc_hi20(Lptr)
1184 Trampolines[4 * I + 1] =
1185 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr)
1186 Trampolines[4 * I + 2] = 0x4c00018d; // jirl $t1, $t0, 0
1187 Trampolines[4 * I + 3] = 0x0; // padding
1191 void OrcLoongArch64::writeIndirectStubsBlock(
1192 char *StubsBlockWorkingMem, ExecutorAddr StubsBlockTargetAddress,
1193 ExecutorAddr PointersBlockTargetAddress, unsigned NumStubs) {
1194 // Stub format is:
1196 // .section __orc_stubs
1197 // stub1:
1198 // pcaddu12i $t0, %pc_hi20(ptr1) ; PC-rel load of ptr1
1199 // ld.d $t0, $t0, %pc_lo12(ptr1)
1200 // jr $t0 ; Jump to resolver
1201 // .dword 0 ; Pad to 16 bytes
1202 // stub2:
1203 // pcaddu12i $t0, %pc_hi20(ptr2) ; PC-rel load of ptr2
1204 // ld.d $t0, $t0, %pc_lo12(ptr2)
1205 // jr $t0 ; Jump to resolver
1206 // .dword 0 ; Pad to 16 bytes
1207 // ...
1209 // .section __orc_ptrs
1210 // ptr1:
1211 // .dword 0x0
1212 // ptr2:
1213 // .dword 0x0
1214 // ...
1215 LLVM_DEBUG({
1216 dbgs() << "Writing stubs code to "
1217 << formatv("{0:x16}", StubsBlockTargetAddress) << "\n";
1219 assert(stubAndPointerRangesOk<OrcLoongArch64>(
1220 StubsBlockTargetAddress, PointersBlockTargetAddress, NumStubs) &&
1221 "PointersBlock is out of range");
1223 uint32_t *Stub = reinterpret_cast<uint32_t *>(StubsBlockWorkingMem);
1225 for (unsigned I = 0; I < NumStubs; ++I) {
1226 uint64_t PtrDisplacement =
1227 PointersBlockTargetAddress - StubsBlockTargetAddress;
1228 uint32_t Hi20 = (PtrDisplacement + 0x800) & 0xfffff000;
1229 uint32_t Lo12 = PtrDisplacement - Hi20;
1230 Stub[4 * I + 0] = 0x1c00000c | (((Hi20 >> 12) & 0xfffff)
1231 << 5); // pcaddu12i $t0, %pc_hi20(Lptr)
1232 Stub[4 * I + 1] =
1233 0x28c0018c | ((Lo12 & 0xfff) << 10); // ld.d $t0, $t0, %pc_lo12(Lptr)
1234 Stub[4 * I + 2] = 0x4c000180; // jr $t0
1235 Stub[4 * I + 3] = 0x0; // padding
1236 PointersBlockTargetAddress += PointerSize;
1237 StubsBlockTargetAddress += StubSize;
1241 } // End namespace orc.
1242 } // End namespace llvm.