drd/tests/tsan_thread_wrappers_pthread.h: Fix MyThread::ThreadBody()
[valgrind.git] / VEX / priv / host_arm64_defs.c
bloba1328ff8fe5ae6f07df5696fecd1462b82c1188d
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_defs.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2013-2017 OpenWorks
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26 02110-1301, USA.
28 The GNU General Public License is contained in the file COPYING.
31 #include "libvex_basictypes.h"
32 #include "libvex.h"
33 #include "libvex_trc_values.h"
35 #include "main_util.h"
36 #include "host_generic_regs.h"
37 #include "host_arm64_defs.h"
40 /* --------- Registers. --------- */
42 /* The usual HReg abstraction. We use the following classes only:
43 X regs (64 bit int)
44 D regs (64 bit float, also used for 32 bit float)
45 Q regs (128 bit vector)
48 const RRegUniverse* getRRegUniverse_ARM64 ( void )
50 /* The real-register universe is a big constant, so we just want to
51 initialise it once. */
52 static RRegUniverse rRegUniverse_ARM64;
53 static Bool rRegUniverse_ARM64_initted = False;
55 /* Handy shorthand, nothing more */
56 RRegUniverse* ru = &rRegUniverse_ARM64;
58 /* This isn't thread-safe. Sigh. */
59 if (LIKELY(rRegUniverse_ARM64_initted))
60 return ru;
62 RRegUniverse__init(ru);
64 /* Add the registers. The initial segment of this array must be
65 those available for allocation by reg-alloc, and those that
66 follow are not available for allocation. */
67 ru->allocable_start[HRcInt64] = ru->size;
68 ru->regs[ru->size++] = hregARM64_X22();
69 ru->regs[ru->size++] = hregARM64_X23();
70 ru->regs[ru->size++] = hregARM64_X24();
71 ru->regs[ru->size++] = hregARM64_X25();
72 ru->regs[ru->size++] = hregARM64_X26();
73 ru->regs[ru->size++] = hregARM64_X27();
74 ru->regs[ru->size++] = hregARM64_X28();
76 ru->regs[ru->size++] = hregARM64_X0();
77 ru->regs[ru->size++] = hregARM64_X1();
78 ru->regs[ru->size++] = hregARM64_X2();
79 ru->regs[ru->size++] = hregARM64_X3();
80 ru->regs[ru->size++] = hregARM64_X4();
81 ru->regs[ru->size++] = hregARM64_X5();
82 ru->regs[ru->size++] = hregARM64_X6();
83 ru->regs[ru->size++] = hregARM64_X7();
84 ru->allocable_end[HRcInt64] = ru->size - 1;
85 // X8 is used as a ProfInc temporary, not available to regalloc.
86 // X9 is a chaining/spill temporary, not available to regalloc.
88 // Do we really need all these?
89 //ru->regs[ru->size++] = hregARM64_X10();
90 //ru->regs[ru->size++] = hregARM64_X11();
91 //ru->regs[ru->size++] = hregARM64_X12();
92 //ru->regs[ru->size++] = hregARM64_X13();
93 //ru->regs[ru->size++] = hregARM64_X14();
94 //ru->regs[ru->size++] = hregARM64_X15();
95 // X21 is the guest state pointer, not available to regalloc.
97 // vector regs. Unfortunately not callee-saved.
98 ru->allocable_start[HRcVec128] = ru->size;
99 ru->regs[ru->size++] = hregARM64_Q16();
100 ru->regs[ru->size++] = hregARM64_Q17();
101 ru->regs[ru->size++] = hregARM64_Q18();
102 ru->regs[ru->size++] = hregARM64_Q19();
103 ru->regs[ru->size++] = hregARM64_Q20();
104 ru->allocable_end[HRcVec128] = ru->size - 1;
106 // F64 regs, all of which are callee-saved
107 ru->allocable_start[HRcFlt64] = ru->size;
108 ru->regs[ru->size++] = hregARM64_D8();
109 ru->regs[ru->size++] = hregARM64_D9();
110 ru->regs[ru->size++] = hregARM64_D10();
111 ru->regs[ru->size++] = hregARM64_D11();
112 ru->regs[ru->size++] = hregARM64_D12();
113 ru->regs[ru->size++] = hregARM64_D13();
114 ru->allocable_end[HRcFlt64] = ru->size - 1;
116 ru->allocable = ru->size;
117 /* And other regs, not available to the allocator. */
119 // unavail: x21 as GSP
120 // x8 is used as a ProfInc temporary
121 // x9 is used as a spill/reload/chaining/call temporary
122 // x30 as LR
123 // x31 because dealing with the SP-vs-ZR overloading is too
124 // confusing, and we don't need to do so, so let's just avoid
125 // the problem
127 // Currently, we have 15 allocatable integer registers:
128 // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
130 // Hence for the allocatable integer registers we have:
132 // callee-saved: 22 23 24 25 26 27 28
133 // caller-saved: 0 1 2 3 4 5 6 7
135 // If the set of available registers changes or if the e/r status
136 // changes, be sure to re-check/sync the definition of
137 // getRegUsage for ARM64Instr_Call too.
139 ru->regs[ru->size++] = hregARM64_X8();
140 ru->regs[ru->size++] = hregARM64_X9();
141 ru->regs[ru->size++] = hregARM64_X21();
143 rRegUniverse_ARM64_initted = True;
145 RRegUniverse__check_is_sane(ru);
146 return ru;
150 UInt ppHRegARM64 ( HReg reg ) {
151 Int r;
152 /* Be generic for all virtual regs. */
153 if (hregIsVirtual(reg)) {
154 return ppHReg(reg);
156 /* But specific for real regs. */
157 switch (hregClass(reg)) {
158 case HRcInt64:
159 r = hregEncoding(reg);
160 vassert(r >= 0 && r < 31);
161 return vex_printf("x%d", r);
162 case HRcFlt64:
163 r = hregEncoding(reg);
164 vassert(r >= 0 && r < 32);
165 return vex_printf("d%d", r);
166 case HRcVec128:
167 r = hregEncoding(reg);
168 vassert(r >= 0 && r < 32);
169 return vex_printf("q%d", r);
170 default:
171 vpanic("ppHRegARM64");
175 static UInt ppHRegARM64asSreg ( HReg reg ) {
176 UInt written = ppHRegARM64(reg);
177 written += vex_printf("(S-reg)");
178 return written;
181 static UInt ppHRegARM64asHreg ( HReg reg ) {
182 UInt written = ppHRegARM64(reg);
183 written += vex_printf("(H-reg)");
184 return written;
188 /* --------- Condition codes, ARM64 encoding. --------- */
190 static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
191 switch (cond) {
192 case ARM64cc_EQ: return "eq";
193 case ARM64cc_NE: return "ne";
194 case ARM64cc_CS: return "cs";
195 case ARM64cc_CC: return "cc";
196 case ARM64cc_MI: return "mi";
197 case ARM64cc_PL: return "pl";
198 case ARM64cc_VS: return "vs";
199 case ARM64cc_VC: return "vc";
200 case ARM64cc_HI: return "hi";
201 case ARM64cc_LS: return "ls";
202 case ARM64cc_GE: return "ge";
203 case ARM64cc_LT: return "lt";
204 case ARM64cc_GT: return "gt";
205 case ARM64cc_LE: return "le";
206 case ARM64cc_AL: return "al"; // default
207 case ARM64cc_NV: return "nv";
208 default: vpanic("showARM64CondCode");
213 /* --------- Memory address expressions (amodes). --------- */
215 ARM64AMode* ARM64AMode_RI9 ( HReg reg, Int simm9 ) {
216 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
217 am->tag = ARM64am_RI9;
218 am->ARM64am.RI9.reg = reg;
219 am->ARM64am.RI9.simm9 = simm9;
220 vassert(-256 <= simm9 && simm9 <= 255);
221 return am;
224 ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
225 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
226 am->tag = ARM64am_RI12;
227 am->ARM64am.RI12.reg = reg;
228 am->ARM64am.RI12.uimm12 = uimm12;
229 am->ARM64am.RI12.szB = szB;
230 vassert(uimm12 >= 0 && uimm12 <= 4095);
231 switch (szB) {
232 case 1: case 2: case 4: case 8: break;
233 default: vassert(0);
235 return am;
238 ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
239 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
240 am->tag = ARM64am_RR;
241 am->ARM64am.RR.base = base;
242 am->ARM64am.RR.index = index;
243 return am;
246 static void ppARM64AMode ( ARM64AMode* am ) {
247 switch (am->tag) {
248 case ARM64am_RI9:
249 vex_printf("%d(", am->ARM64am.RI9.simm9);
250 ppHRegARM64(am->ARM64am.RI9.reg);
251 vex_printf(")");
252 break;
253 case ARM64am_RI12:
254 vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
255 * (UInt)am->ARM64am.RI12.uimm12);
256 ppHRegARM64(am->ARM64am.RI12.reg);
257 vex_printf(")");
258 break;
259 case ARM64am_RR:
260 vex_printf("(");
261 ppHRegARM64(am->ARM64am.RR.base);
262 vex_printf(",");
263 ppHRegARM64(am->ARM64am.RR.index);
264 vex_printf(")");
265 break;
266 default:
267 vassert(0);
271 static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
272 switch (am->tag) {
273 case ARM64am_RI9:
274 addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
275 return;
276 case ARM64am_RI12:
277 addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
278 return;
279 case ARM64am_RR:
280 addHRegUse(u, HRmRead, am->ARM64am.RR.base);
281 addHRegUse(u, HRmRead, am->ARM64am.RR.index);
282 return;
283 default:
284 vpanic("addRegUsage_ARM64Amode");
288 static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
289 switch (am->tag) {
290 case ARM64am_RI9:
291 am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
292 return;
293 case ARM64am_RI12:
294 am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
295 return;
296 case ARM64am_RR:
297 am->ARM64am.RR.base = lookupHRegRemap(m, am->ARM64am.RR.base);
298 am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
299 return;
300 default:
301 vpanic("mapRegs_ARM64Amode");
306 /* --------- Reg or uimm12<<{0,12} operands --------- */
308 ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
309 ARM64RIA* riA = LibVEX_Alloc_inline(sizeof(ARM64RIA));
310 riA->tag = ARM64riA_I12;
311 riA->ARM64riA.I12.imm12 = imm12;
312 riA->ARM64riA.I12.shift = shift;
313 vassert(imm12 < 4096);
314 vassert(shift == 0 || shift == 12);
315 return riA;
317 ARM64RIA* ARM64RIA_R ( HReg reg ) {
318 ARM64RIA* riA = LibVEX_Alloc_inline(sizeof(ARM64RIA));
319 riA->tag = ARM64riA_R;
320 riA->ARM64riA.R.reg = reg;
321 return riA;
324 static void ppARM64RIA ( ARM64RIA* riA ) {
325 switch (riA->tag) {
326 case ARM64riA_I12:
327 vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
328 << riA->ARM64riA.I12.shift));
329 break;
330 case ARM64riA_R:
331 ppHRegARM64(riA->ARM64riA.R.reg);
332 break;
333 default:
334 vassert(0);
338 static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
339 switch (riA->tag) {
340 case ARM64riA_I12:
341 return;
342 case ARM64riA_R:
343 addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
344 return;
345 default:
346 vpanic("addRegUsage_ARM64RIA");
350 static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
351 switch (riA->tag) {
352 case ARM64riA_I12:
353 return;
354 case ARM64riA_R:
355 riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
356 return;
357 default:
358 vpanic("mapRegs_ARM64RIA");
363 /* --------- Reg or "bitfield" (logic immediate) operands --------- */
365 ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
366 ARM64RIL* riL = LibVEX_Alloc_inline(sizeof(ARM64RIL));
367 riL->tag = ARM64riL_I13;
368 riL->ARM64riL.I13.bitN = bitN;
369 riL->ARM64riL.I13.immR = immR;
370 riL->ARM64riL.I13.immS = immS;
371 vassert(bitN < 2);
372 vassert(immR < 64);
373 vassert(immS < 64);
374 return riL;
376 ARM64RIL* ARM64RIL_R ( HReg reg ) {
377 ARM64RIL* riL = LibVEX_Alloc_inline(sizeof(ARM64RIL));
378 riL->tag = ARM64riL_R;
379 riL->ARM64riL.R.reg = reg;
380 return riL;
383 static void ppARM64RIL ( ARM64RIL* riL ) {
384 switch (riL->tag) {
385 case ARM64riL_I13:
386 vex_printf("#nrs(%u,%u,%u)",
387 (UInt)riL->ARM64riL.I13.bitN,
388 (UInt)riL->ARM64riL.I13.immR,
389 (UInt)riL->ARM64riL.I13.immS);
390 break;
391 case ARM64riL_R:
392 ppHRegARM64(riL->ARM64riL.R.reg);
393 break;
394 default:
395 vassert(0);
399 static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
400 switch (riL->tag) {
401 case ARM64riL_I13:
402 return;
403 case ARM64riL_R:
404 addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
405 return;
406 default:
407 vpanic("addRegUsage_ARM64RIL");
411 static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
412 switch (riL->tag) {
413 case ARM64riL_I13:
414 return;
415 case ARM64riL_R:
416 riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
417 return;
418 default:
419 vpanic("mapRegs_ARM64RIL");
424 /* --------------- Reg or uimm6 operands --------------- */
426 ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
427 ARM64RI6* ri6 = LibVEX_Alloc_inline(sizeof(ARM64RI6));
428 ri6->tag = ARM64ri6_I6;
429 ri6->ARM64ri6.I6.imm6 = imm6;
430 vassert(imm6 > 0 && imm6 < 64);
431 return ri6;
433 ARM64RI6* ARM64RI6_R ( HReg reg ) {
434 ARM64RI6* ri6 = LibVEX_Alloc_inline(sizeof(ARM64RI6));
435 ri6->tag = ARM64ri6_R;
436 ri6->ARM64ri6.R.reg = reg;
437 return ri6;
440 static void ppARM64RI6 ( ARM64RI6* ri6 ) {
441 switch (ri6->tag) {
442 case ARM64ri6_I6:
443 vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
444 break;
445 case ARM64ri6_R:
446 ppHRegARM64(ri6->ARM64ri6.R.reg);
447 break;
448 default:
449 vassert(0);
453 static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
454 switch (ri6->tag) {
455 case ARM64ri6_I6:
456 return;
457 case ARM64ri6_R:
458 addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
459 return;
460 default:
461 vpanic("addRegUsage_ARM64RI6");
465 static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
466 switch (ri6->tag) {
467 case ARM64ri6_I6:
468 return;
469 case ARM64ri6_R:
470 ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
471 return;
472 default:
473 vpanic("mapRegs_ARM64RI6");
478 /* --------- Instructions. --------- */
480 static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
481 switch (op) {
482 case ARM64lo_AND: return "and";
483 case ARM64lo_OR: return "orr";
484 case ARM64lo_XOR: return "eor";
485 default: vpanic("showARM64LogicOp");
489 static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
490 switch (op) {
491 case ARM64sh_SHL: return "lsl";
492 case ARM64sh_SHR: return "lsr";
493 case ARM64sh_SAR: return "asr";
494 default: vpanic("showARM64ShiftOp");
498 static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
499 switch (op) {
500 case ARM64un_NEG: return "neg";
501 case ARM64un_NOT: return "not";
502 case ARM64un_CLZ: return "clz";
503 default: vpanic("showARM64UnaryOp");
507 static const HChar* showARM64MulOp ( ARM64MulOp op ) {
508 switch (op) {
509 case ARM64mul_PLAIN: return "mul ";
510 case ARM64mul_ZX: return "umulh";
511 case ARM64mul_SX: return "smulh";
512 default: vpanic("showARM64MulOp");
516 static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
517 /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
518 ARM64CvtOp op ) {
519 switch (op) {
520 case ARM64cvt_F32_I32S:
521 *syn = 's'; *fszB = 4; *iszB = 4; break;
522 case ARM64cvt_F64_I32S:
523 *syn = 's'; *fszB = 8; *iszB = 4; break;
524 case ARM64cvt_F32_I64S:
525 *syn = 's'; *fszB = 4; *iszB = 8; break;
526 case ARM64cvt_F64_I64S:
527 *syn = 's'; *fszB = 8; *iszB = 8; break;
528 case ARM64cvt_F32_I32U:
529 *syn = 'u'; *fszB = 4; *iszB = 4; break;
530 case ARM64cvt_F64_I32U:
531 *syn = 'u'; *fszB = 8; *iszB = 4; break;
532 case ARM64cvt_F32_I64U:
533 *syn = 'u'; *fszB = 4; *iszB = 8; break;
534 case ARM64cvt_F64_I64U:
535 *syn = 'u'; *fszB = 8; *iszB = 8; break;
536 default:
537 vpanic("characteriseARM64CvtOp");
541 static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
542 switch (op) {
543 case ARM64fpb_ADD: return "add";
544 case ARM64fpb_SUB: return "sub";
545 case ARM64fpb_MUL: return "mul";
546 case ARM64fpb_DIV: return "div";
547 default: vpanic("showARM64FpBinOp");
551 static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
552 switch (op) {
553 case ARM64fpu_NEG: return "neg ";
554 case ARM64fpu_ABS: return "abs ";
555 case ARM64fpu_SQRT: return "sqrt ";
556 case ARM64fpu_RINT: return "rinti";
557 case ARM64fpu_RECPX: return "recpx";
558 default: vpanic("showARM64FpUnaryOp");
562 static void showARM64VecBinOp(/*OUT*/const HChar** nm,
563 /*OUT*/const HChar** ar, ARM64VecBinOp op ) {
564 switch (op) {
565 case ARM64vecb_ADD64x2: *nm = "add "; *ar = "2d"; return;
566 case ARM64vecb_ADD32x4: *nm = "add "; *ar = "4s"; return;
567 case ARM64vecb_ADD16x8: *nm = "add "; *ar = "8h"; return;
568 case ARM64vecb_ADD8x16: *nm = "add "; *ar = "16b"; return;
569 case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return;
570 case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return;
571 case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return;
572 case ARM64vecb_SUB8x16: *nm = "sub "; *ar = "16b"; return;
573 case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return;
574 case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return;
575 case ARM64vecb_MUL8x16: *nm = "mul "; *ar = "16b"; return;
576 case ARM64vecb_FADD64x2: *nm = "fadd "; *ar = "2d"; return;
577 case ARM64vecb_FSUB64x2: *nm = "fsub "; *ar = "2d"; return;
578 case ARM64vecb_FMUL64x2: *nm = "fmul "; *ar = "2d"; return;
579 case ARM64vecb_FDIV64x2: *nm = "fdiv "; *ar = "2d"; return;
580 case ARM64vecb_FADD32x4: *nm = "fadd "; *ar = "4s"; return;
581 case ARM64vecb_FSUB32x4: *nm = "fsub "; *ar = "4s"; return;
582 case ARM64vecb_FMUL32x4: *nm = "fmul "; *ar = "4s"; return;
583 case ARM64vecb_FDIV32x4: *nm = "fdiv "; *ar = "4s"; return;
584 case ARM64vecb_FMAX64x2: *nm = "fmax "; *ar = "2d"; return;
585 case ARM64vecb_FMAX32x4: *nm = "fmax "; *ar = "4s"; return;
586 case ARM64vecb_FMIN64x2: *nm = "fmin "; *ar = "2d"; return;
587 case ARM64vecb_FMIN32x4: *nm = "fmin "; *ar = "4s"; return;
588 case ARM64vecb_UMAX32x4: *nm = "umax "; *ar = "4s"; return;
589 case ARM64vecb_UMAX16x8: *nm = "umax "; *ar = "8h"; return;
590 case ARM64vecb_UMAX8x16: *nm = "umax "; *ar = "16b"; return;
591 case ARM64vecb_UMIN32x4: *nm = "umin "; *ar = "4s"; return;
592 case ARM64vecb_UMIN16x8: *nm = "umin "; *ar = "8h"; return;
593 case ARM64vecb_UMIN8x16: *nm = "umin "; *ar = "16b"; return;
594 case ARM64vecb_SMAX32x4: *nm = "smax "; *ar = "4s"; return;
595 case ARM64vecb_SMAX16x8: *nm = "smax "; *ar = "8h"; return;
596 case ARM64vecb_SMAX8x16: *nm = "smax "; *ar = "16b"; return;
597 case ARM64vecb_SMIN32x4: *nm = "smin "; *ar = "4s"; return;
598 case ARM64vecb_SMIN16x8: *nm = "smin "; *ar = "8h"; return;
599 case ARM64vecb_SMIN8x16: *nm = "smin "; *ar = "16b"; return;
600 case ARM64vecb_AND: *nm = "and "; *ar = "16b"; return;
601 case ARM64vecb_ORR: *nm = "orr "; *ar = "16b"; return;
602 case ARM64vecb_XOR: *nm = "eor "; *ar = "16b"; return;
603 case ARM64vecb_CMEQ64x2: *nm = "cmeq "; *ar = "2d"; return;
604 case ARM64vecb_CMEQ32x4: *nm = "cmeq "; *ar = "4s"; return;
605 case ARM64vecb_CMEQ16x8: *nm = "cmeq "; *ar = "8h"; return;
606 case ARM64vecb_CMEQ8x16: *nm = "cmeq "; *ar = "16b"; return;
607 case ARM64vecb_CMHI64x2: *nm = "cmhi "; *ar = "2d"; return;
608 case ARM64vecb_CMHI32x4: *nm = "cmhi "; *ar = "4s"; return;
609 case ARM64vecb_CMHI16x8: *nm = "cmhi "; *ar = "8h"; return;
610 case ARM64vecb_CMHI8x16: *nm = "cmhi "; *ar = "16b"; return;
611 case ARM64vecb_CMGT64x2: *nm = "cmgt "; *ar = "2d"; return;
612 case ARM64vecb_CMGT32x4: *nm = "cmgt "; *ar = "4s"; return;
613 case ARM64vecb_CMGT16x8: *nm = "cmgt "; *ar = "8h"; return;
614 case ARM64vecb_CMGT8x16: *nm = "cmgt "; *ar = "16b"; return;
615 case ARM64vecb_FCMEQ64x2: *nm = "fcmeq "; *ar = "2d"; return;
616 case ARM64vecb_FCMEQ32x4: *nm = "fcmeq "; *ar = "4s"; return;
617 case ARM64vecb_FCMGE64x2: *nm = "fcmge "; *ar = "2d"; return;
618 case ARM64vecb_FCMGE32x4: *nm = "fcmge "; *ar = "4s"; return;
619 case ARM64vecb_FCMGT64x2: *nm = "fcmgt "; *ar = "2d"; return;
620 case ARM64vecb_FCMGT32x4: *nm = "fcmgt "; *ar = "4s"; return;
621 case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return;
622 case ARM64vecb_UZP164x2: *nm = "uzp1 "; *ar = "2d"; return;
623 case ARM64vecb_UZP132x4: *nm = "uzp1 "; *ar = "4s"; return;
624 case ARM64vecb_UZP116x8: *nm = "uzp1 "; *ar = "8h"; return;
625 case ARM64vecb_UZP18x16: *nm = "uzp1 "; *ar = "16b"; return;
626 case ARM64vecb_UZP264x2: *nm = "uzp2 "; *ar = "2d"; return;
627 case ARM64vecb_UZP232x4: *nm = "uzp2 "; *ar = "4s"; return;
628 case ARM64vecb_UZP216x8: *nm = "uzp2 "; *ar = "8h"; return;
629 case ARM64vecb_UZP28x16: *nm = "uzp2 "; *ar = "16b"; return;
630 case ARM64vecb_ZIP132x4: *nm = "zip1 "; *ar = "4s"; return;
631 case ARM64vecb_ZIP116x8: *nm = "zip1 "; *ar = "8h"; return;
632 case ARM64vecb_ZIP18x16: *nm = "zip1 "; *ar = "16b"; return;
633 case ARM64vecb_ZIP232x4: *nm = "zip2 "; *ar = "4s"; return;
634 case ARM64vecb_ZIP216x8: *nm = "zip2 "; *ar = "8h"; return;
635 case ARM64vecb_ZIP28x16: *nm = "zip2 "; *ar = "16b"; return;
636 case ARM64vecb_PMUL8x16: *nm = "pmul "; *ar = "16b"; return;
637 case ARM64vecb_PMULL8x8: *nm = "pmull "; *ar = "8hbb"; return;
638 case ARM64vecb_UMULL2DSS: *nm = "umull "; *ar = "2dss"; return;
639 case ARM64vecb_UMULL4SHH: *nm = "umull "; *ar = "4shh"; return;
640 case ARM64vecb_UMULL8HBB: *nm = "umull "; *ar = "8hbb"; return;
641 case ARM64vecb_SMULL2DSS: *nm = "smull "; *ar = "2dss"; return;
642 case ARM64vecb_SMULL4SHH: *nm = "smull "; *ar = "4shh"; return;
643 case ARM64vecb_SMULL8HBB: *nm = "smull "; *ar = "8hbb"; return;
644 case ARM64vecb_SQADD64x2: *nm = "sqadd "; *ar = "2d"; return;
645 case ARM64vecb_SQADD32x4: *nm = "sqadd "; *ar = "4s"; return;
646 case ARM64vecb_SQADD16x8: *nm = "sqadd "; *ar = "8h"; return;
647 case ARM64vecb_SQADD8x16: *nm = "sqadd "; *ar = "16b"; return;
648 case ARM64vecb_UQADD64x2: *nm = "uqadd "; *ar = "2d"; return;
649 case ARM64vecb_UQADD32x4: *nm = "uqadd "; *ar = "4s"; return;
650 case ARM64vecb_UQADD16x8: *nm = "uqadd "; *ar = "8h"; return;
651 case ARM64vecb_UQADD8x16: *nm = "uqadd "; *ar = "16b"; return;
652 case ARM64vecb_SQSUB64x2: *nm = "sqsub "; *ar = "2d"; return;
653 case ARM64vecb_SQSUB32x4: *nm = "sqsub "; *ar = "4s"; return;
654 case ARM64vecb_SQSUB16x8: *nm = "sqsub "; *ar = "8h"; return;
655 case ARM64vecb_SQSUB8x16: *nm = "sqsub "; *ar = "16b"; return;
656 case ARM64vecb_UQSUB64x2: *nm = "uqsub "; *ar = "2d"; return;
657 case ARM64vecb_UQSUB32x4: *nm = "uqsub "; *ar = "4s"; return;
658 case ARM64vecb_UQSUB16x8: *nm = "uqsub "; *ar = "8h"; return;
659 case ARM64vecb_UQSUB8x16: *nm = "uqsub "; *ar = "16b"; return;
660 case ARM64vecb_SQDMULL2DSS: *nm = "sqdmull"; *ar = "2dss"; return;
661 case ARM64vecb_SQDMULL4SHH: *nm = "sqdmull"; *ar = "4shh"; return;
662 case ARM64vecb_SQDMULH32x4: *nm = "sqdmulh"; *ar = "4s"; return;
663 case ARM64vecb_SQDMULH16x8: *nm = "sqdmulh"; *ar = "8h"; return;
664 case ARM64vecb_SQRDMULH32x4: *nm = "sqrdmulh"; *ar = "4s"; return;
665 case ARM64vecb_SQRDMULH16x8: *nm = "sqrdmulh"; *ar = "8h"; return;
666 case ARM64vecb_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
667 case ARM64vecb_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
668 case ARM64vecb_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
669 case ARM64vecb_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
670 case ARM64vecb_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
671 case ARM64vecb_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
672 case ARM64vecb_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
673 case ARM64vecb_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
674 case ARM64vecb_SQRSHL64x2: *nm = "sqrshl"; *ar = "2d"; return;
675 case ARM64vecb_SQRSHL32x4: *nm = "sqrshl"; *ar = "4s"; return;
676 case ARM64vecb_SQRSHL16x8: *nm = "sqrshl"; *ar = "8h"; return;
677 case ARM64vecb_SQRSHL8x16: *nm = "sqrshl"; *ar = "16b"; return;
678 case ARM64vecb_UQRSHL64x2: *nm = "uqrshl"; *ar = "2d"; return;
679 case ARM64vecb_UQRSHL32x4: *nm = "uqrshl"; *ar = "4s"; return;
680 case ARM64vecb_UQRSHL16x8: *nm = "uqrshl"; *ar = "8h"; return;
681 case ARM64vecb_UQRSHL8x16: *nm = "uqrshl"; *ar = "16b"; return;
682 case ARM64vecb_SSHL64x2: *nm = "sshl "; *ar = "2d"; return;
683 case ARM64vecb_SSHL32x4: *nm = "sshl "; *ar = "4s"; return;
684 case ARM64vecb_SSHL16x8: *nm = "sshl "; *ar = "8h"; return;
685 case ARM64vecb_SSHL8x16: *nm = "sshl "; *ar = "16b"; return;
686 case ARM64vecb_USHL64x2: *nm = "ushl "; *ar = "2d"; return;
687 case ARM64vecb_USHL32x4: *nm = "ushl "; *ar = "4s"; return;
688 case ARM64vecb_USHL16x8: *nm = "ushl "; *ar = "8h"; return;
689 case ARM64vecb_USHL8x16: *nm = "ushl "; *ar = "16b"; return;
690 case ARM64vecb_SRSHL64x2: *nm = "srshl "; *ar = "2d"; return;
691 case ARM64vecb_SRSHL32x4: *nm = "srshl "; *ar = "4s"; return;
692 case ARM64vecb_SRSHL16x8: *nm = "srshl "; *ar = "8h"; return;
693 case ARM64vecb_SRSHL8x16: *nm = "srshl "; *ar = "16b"; return;
694 case ARM64vecb_URSHL64x2: *nm = "urshl "; *ar = "2d"; return;
695 case ARM64vecb_URSHL32x4: *nm = "urshl "; *ar = "4s"; return;
696 case ARM64vecb_URSHL16x8: *nm = "urshl "; *ar = "8h"; return;
697 case ARM64vecb_URSHL8x16: *nm = "urshl "; *ar = "16b"; return;
698 case ARM64vecb_FRECPS64x2: *nm = "frecps"; *ar = "2d"; return;
699 case ARM64vecb_FRECPS32x4: *nm = "frecps"; *ar = "4s"; return;
700 case ARM64vecb_FRSQRTS64x2: *nm = "frsqrts"; *ar = "2d"; return;
701 case ARM64vecb_FRSQRTS32x4: *nm = "frsqrts"; *ar = "4s"; return;
702 default: vpanic("showARM64VecBinOp");
706 static void showARM64VecModifyOp(/*OUT*/const HChar** nm,
707 /*OUT*/const HChar** ar,
708 ARM64VecModifyOp op ) {
709 switch (op) {
710 case ARM64vecmo_SUQADD64x2: *nm = "suqadd"; *ar = "2d"; return;
711 case ARM64vecmo_SUQADD32x4: *nm = "suqadd"; *ar = "4s"; return;
712 case ARM64vecmo_SUQADD16x8: *nm = "suqadd"; *ar = "8h"; return;
713 case ARM64vecmo_SUQADD8x16: *nm = "suqadd"; *ar = "16b"; return;
714 case ARM64vecmo_USQADD64x2: *nm = "usqadd"; *ar = "2d"; return;
715 case ARM64vecmo_USQADD32x4: *nm = "usqadd"; *ar = "4s"; return;
716 case ARM64vecmo_USQADD16x8: *nm = "usqadd"; *ar = "8h"; return;
717 case ARM64vecmo_USQADD8x16: *nm = "usqadd"; *ar = "16b"; return;
718 default: vpanic("showARM64VecModifyOp");
722 static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
723 /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
725 switch (op) {
726 case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return;
727 case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
728 case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
729 case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
730 case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
731 case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return;
732 case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return;
733 case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return;
734 case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return;
735 case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return;
736 case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return;
737 case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return;
738 case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return;
739 case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return;
740 case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return;
741 case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return;
742 case ARM64vecu_RBIT: *nm = "rbit "; *ar = "16b"; return;
743 case ARM64vecu_REV1616B: *nm = "rev16"; *ar = "16b"; return;
744 case ARM64vecu_REV3216B: *nm = "rev32"; *ar = "16b"; return;
745 case ARM64vecu_REV328H: *nm = "rev32"; *ar = "8h"; return;
746 case ARM64vecu_REV6416B: *nm = "rev64"; *ar = "16b"; return;
747 case ARM64vecu_REV648H: *nm = "rev64"; *ar = "8h"; return;
748 case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return;
749 case ARM64vecu_URECPE32x4: *nm = "urecpe"; *ar = "4s"; return;
750 case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s"; return;
751 case ARM64vecu_FRECPE64x2: *nm = "frecpe"; *ar = "2d"; return;
752 case ARM64vecu_FRECPE32x4: *nm = "frecpe"; *ar = "4s"; return;
753 case ARM64vecu_FRSQRTE64x2: *nm = "frsqrte"; *ar = "2d"; return;
754 case ARM64vecu_FRSQRTE32x4: *nm = "frsqrte"; *ar = "4s"; return;
755 case ARM64vecu_FSQRT64x2: *nm = "fsqrt"; *ar = "2d"; return;
756 case ARM64vecu_FSQRT32x4: *nm = "fsqrt"; *ar = "4s"; return;
757 default: vpanic("showARM64VecUnaryOp");
761 static void showARM64VecShiftImmOp(/*OUT*/const HChar** nm,
762 /*OUT*/const HChar** ar,
763 ARM64VecShiftImmOp op )
765 switch (op) {
766 case ARM64vecshi_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
767 case ARM64vecshi_USHR32x4: *nm = "ushr "; *ar = "4s"; return;
768 case ARM64vecshi_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
769 case ARM64vecshi_USHR8x16: *nm = "ushr "; *ar = "16b"; return;
770 case ARM64vecshi_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
771 case ARM64vecshi_SSHR32x4: *nm = "sshr "; *ar = "4s"; return;
772 case ARM64vecshi_SSHR16x8: *nm = "sshr "; *ar = "8h"; return;
773 case ARM64vecshi_SSHR8x16: *nm = "sshr "; *ar = "16b"; return;
774 case ARM64vecshi_SHL64x2: *nm = "shl "; *ar = "2d"; return;
775 case ARM64vecshi_SHL32x4: *nm = "shl "; *ar = "4s"; return;
776 case ARM64vecshi_SHL16x8: *nm = "shl "; *ar = "8h"; return;
777 case ARM64vecshi_SHL8x16: *nm = "shl "; *ar = "16b"; return;
778 case ARM64vecshi_SQSHRN2SD: *nm = "sqshrn"; *ar = "2sd"; return;
779 case ARM64vecshi_SQSHRN4HS: *nm = "sqshrn"; *ar = "4hs"; return;
780 case ARM64vecshi_SQSHRN8BH: *nm = "sqshrn"; *ar = "8bh"; return;
781 case ARM64vecshi_UQSHRN2SD: *nm = "uqshrn"; *ar = "2sd"; return;
782 case ARM64vecshi_UQSHRN4HS: *nm = "uqshrn"; *ar = "4hs"; return;
783 case ARM64vecshi_UQSHRN8BH: *nm = "uqshrn"; *ar = "8bh"; return;
784 case ARM64vecshi_SQSHRUN2SD: *nm = "sqshrun"; *ar = "2sd"; return;
785 case ARM64vecshi_SQSHRUN4HS: *nm = "sqshrun"; *ar = "4hs"; return;
786 case ARM64vecshi_SQSHRUN8BH: *nm = "sqshrun"; *ar = "8bh"; return;
787 case ARM64vecshi_SQRSHRN2SD: *nm = "sqrshrn"; *ar = "2sd"; return;
788 case ARM64vecshi_SQRSHRN4HS: *nm = "sqrshrn"; *ar = "4hs"; return;
789 case ARM64vecshi_SQRSHRN8BH: *nm = "sqrshrn"; *ar = "8bh"; return;
790 case ARM64vecshi_UQRSHRN2SD: *nm = "uqrshrn"; *ar = "2sd"; return;
791 case ARM64vecshi_UQRSHRN4HS: *nm = "uqrshrn"; *ar = "4hs"; return;
792 case ARM64vecshi_UQRSHRN8BH: *nm = "uqrshrn"; *ar = "8bh"; return;
793 case ARM64vecshi_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
794 case ARM64vecshi_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
795 case ARM64vecshi_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
796 case ARM64vecshi_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
797 case ARM64vecshi_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
798 case ARM64vecshi_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
799 case ARM64vecshi_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
800 case ARM64vecshi_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
801 case ARM64vecshi_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
802 case ARM64vecshi_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
803 case ARM64vecshi_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
804 case ARM64vecshi_SQSHLU64x2: *nm = "sqshlu"; *ar = "2d"; return;
805 case ARM64vecshi_SQSHLU32x4: *nm = "sqshlu"; *ar = "4s"; return;
806 case ARM64vecshi_SQSHLU16x8: *nm = "sqshlu"; *ar = "8h"; return;
807 case ARM64vecshi_SQSHLU8x16: *nm = "sqshlu"; *ar = "16b"; return;
808 default: vpanic("showARM64VecShiftImmOp");
812 static const HChar* showARM64VecNarrowOp(ARM64VecNarrowOp op) {
813 switch (op) {
814 case ARM64vecna_XTN: return "xtn ";
815 case ARM64vecna_SQXTN: return "sqxtn ";
816 case ARM64vecna_UQXTN: return "uqxtn ";
817 case ARM64vecna_SQXTUN: return "sqxtun";
818 default: vpanic("showARM64VecNarrowOp");
822 ARM64Instr* ARM64Instr_Arith ( HReg dst,
823 HReg argL, ARM64RIA* argR, Bool isAdd ) {
824 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
825 i->tag = ARM64in_Arith;
826 i->ARM64in.Arith.dst = dst;
827 i->ARM64in.Arith.argL = argL;
828 i->ARM64in.Arith.argR = argR;
829 i->ARM64in.Arith.isAdd = isAdd;
830 return i;
832 ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
833 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
834 i->tag = ARM64in_Cmp;
835 i->ARM64in.Cmp.argL = argL;
836 i->ARM64in.Cmp.argR = argR;
837 i->ARM64in.Cmp.is64 = is64;
838 return i;
840 ARM64Instr* ARM64Instr_Logic ( HReg dst,
841 HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
842 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
843 i->tag = ARM64in_Logic;
844 i->ARM64in.Logic.dst = dst;
845 i->ARM64in.Logic.argL = argL;
846 i->ARM64in.Logic.argR = argR;
847 i->ARM64in.Logic.op = op;
848 return i;
850 ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
851 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
852 i->tag = ARM64in_Test;
853 i->ARM64in.Test.argL = argL;
854 i->ARM64in.Test.argR = argR;
855 return i;
857 ARM64Instr* ARM64Instr_Shift ( HReg dst,
858 HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
859 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
860 i->tag = ARM64in_Shift;
861 i->ARM64in.Shift.dst = dst;
862 i->ARM64in.Shift.argL = argL;
863 i->ARM64in.Shift.argR = argR;
864 i->ARM64in.Shift.op = op;
865 return i;
867 ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
868 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
869 i->tag = ARM64in_Unary;
870 i->ARM64in.Unary.dst = dst;
871 i->ARM64in.Unary.src = src;
872 i->ARM64in.Unary.op = op;
873 return i;
875 ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
876 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
877 i->tag = ARM64in_MovI;
878 i->ARM64in.MovI.dst = dst;
879 i->ARM64in.MovI.src = src;
880 vassert(hregClass(src) == HRcInt64);
881 vassert(hregClass(dst) == HRcInt64);
882 return i;
884 ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
885 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
886 i->tag = ARM64in_Imm64;
887 i->ARM64in.Imm64.dst = dst;
888 i->ARM64in.Imm64.imm64 = imm64;
889 return i;
891 ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
892 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
893 i->tag = ARM64in_LdSt64;
894 i->ARM64in.LdSt64.isLoad = isLoad;
895 i->ARM64in.LdSt64.rD = rD;
896 i->ARM64in.LdSt64.amode = amode;
897 return i;
899 ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
900 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
901 i->tag = ARM64in_LdSt32;
902 i->ARM64in.LdSt32.isLoad = isLoad;
903 i->ARM64in.LdSt32.rD = rD;
904 i->ARM64in.LdSt32.amode = amode;
905 return i;
907 ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
908 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
909 i->tag = ARM64in_LdSt16;
910 i->ARM64in.LdSt16.isLoad = isLoad;
911 i->ARM64in.LdSt16.rD = rD;
912 i->ARM64in.LdSt16.amode = amode;
913 return i;
915 ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
916 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
917 i->tag = ARM64in_LdSt8;
918 i->ARM64in.LdSt8.isLoad = isLoad;
919 i->ARM64in.LdSt8.rD = rD;
920 i->ARM64in.LdSt8.amode = amode;
921 return i;
923 ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
924 ARM64CondCode cond, Bool toFastEP ) {
925 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
926 i->tag = ARM64in_XDirect;
927 i->ARM64in.XDirect.dstGA = dstGA;
928 i->ARM64in.XDirect.amPC = amPC;
929 i->ARM64in.XDirect.cond = cond;
930 i->ARM64in.XDirect.toFastEP = toFastEP;
931 return i;
933 ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
934 ARM64CondCode cond ) {
935 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
936 i->tag = ARM64in_XIndir;
937 i->ARM64in.XIndir.dstGA = dstGA;
938 i->ARM64in.XIndir.amPC = amPC;
939 i->ARM64in.XIndir.cond = cond;
940 return i;
942 ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
943 ARM64CondCode cond, IRJumpKind jk ) {
944 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
945 i->tag = ARM64in_XAssisted;
946 i->ARM64in.XAssisted.dstGA = dstGA;
947 i->ARM64in.XAssisted.amPC = amPC;
948 i->ARM64in.XAssisted.cond = cond;
949 i->ARM64in.XAssisted.jk = jk;
950 return i;
952 ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
953 ARM64CondCode cond ) {
954 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
955 i->tag = ARM64in_CSel;
956 i->ARM64in.CSel.dst = dst;
957 i->ARM64in.CSel.argL = argL;
958 i->ARM64in.CSel.argR = argR;
959 i->ARM64in.CSel.cond = cond;
960 return i;
962 ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, Addr64 target, Int nArgRegs,
963 RetLoc rloc ) {
964 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
965 i->tag = ARM64in_Call;
966 i->ARM64in.Call.cond = cond;
967 i->ARM64in.Call.target = target;
968 i->ARM64in.Call.nArgRegs = nArgRegs;
969 i->ARM64in.Call.rloc = rloc;
970 vassert(is_sane_RetLoc(rloc));
971 return i;
973 extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
974 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
975 i->tag = ARM64in_AddToSP;
976 i->ARM64in.AddToSP.simm = simm;
977 vassert(-4096 < simm && simm < 4096);
978 vassert(0 == (simm & 0xF));
979 return i;
981 extern ARM64Instr* ARM64Instr_FromSP ( HReg dst ) {
982 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
983 i->tag = ARM64in_FromSP;
984 i->ARM64in.FromSP.dst = dst;
985 return i;
987 ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
988 ARM64MulOp op ) {
989 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
990 i->tag = ARM64in_Mul;
991 i->ARM64in.Mul.dst = dst;
992 i->ARM64in.Mul.argL = argL;
993 i->ARM64in.Mul.argR = argR;
994 i->ARM64in.Mul.op = op;
995 return i;
997 ARM64Instr* ARM64Instr_LdrEX ( Int szB ) {
998 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
999 i->tag = ARM64in_LdrEX;
1000 i->ARM64in.LdrEX.szB = szB;
1001 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1002 return i;
1004 ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
1005 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1006 i->tag = ARM64in_StrEX;
1007 i->ARM64in.StrEX.szB = szB;
1008 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1009 return i;
1011 ARM64Instr* ARM64Instr_CAS ( Int szB ) {
1012 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1013 i->tag = ARM64in_CAS;
1014 i->ARM64in.CAS.szB = szB;
1015 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1016 return i;
1018 ARM64Instr* ARM64Instr_MFence ( void ) {
1019 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1020 i->tag = ARM64in_MFence;
1021 return i;
1023 ARM64Instr* ARM64Instr_ClrEX ( void ) {
1024 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1025 i->tag = ARM64in_ClrEX;
1026 return i;
1028 ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1029 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1030 i->tag = ARM64in_VLdStH;
1031 i->ARM64in.VLdStH.isLoad = isLoad;
1032 i->ARM64in.VLdStH.hD = sD;
1033 i->ARM64in.VLdStH.rN = rN;
1034 i->ARM64in.VLdStH.uimm12 = uimm12;
1035 vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
1036 return i;
1038 ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1039 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1040 i->tag = ARM64in_VLdStS;
1041 i->ARM64in.VLdStS.isLoad = isLoad;
1042 i->ARM64in.VLdStS.sD = sD;
1043 i->ARM64in.VLdStS.rN = rN;
1044 i->ARM64in.VLdStS.uimm12 = uimm12;
1045 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
1046 return i;
1048 ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
1049 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1050 i->tag = ARM64in_VLdStD;
1051 i->ARM64in.VLdStD.isLoad = isLoad;
1052 i->ARM64in.VLdStD.dD = dD;
1053 i->ARM64in.VLdStD.rN = rN;
1054 i->ARM64in.VLdStD.uimm12 = uimm12;
1055 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
1056 return i;
1058 ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
1059 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1060 i->tag = ARM64in_VLdStQ;
1061 i->ARM64in.VLdStQ.isLoad = isLoad;
1062 i->ARM64in.VLdStQ.rQ = rQ;
1063 i->ARM64in.VLdStQ.rN = rN;
1064 return i;
1066 ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
1067 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1068 i->tag = ARM64in_VCvtI2F;
1069 i->ARM64in.VCvtI2F.how = how;
1070 i->ARM64in.VCvtI2F.rD = rD;
1071 i->ARM64in.VCvtI2F.rS = rS;
1072 return i;
1074 ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
1075 UChar armRM ) {
1076 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1077 i->tag = ARM64in_VCvtF2I;
1078 i->ARM64in.VCvtF2I.how = how;
1079 i->ARM64in.VCvtF2I.rD = rD;
1080 i->ARM64in.VCvtF2I.rS = rS;
1081 i->ARM64in.VCvtF2I.armRM = armRM;
1082 vassert(armRM <= 3);
1083 return i;
1085 ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1086 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1087 i->tag = ARM64in_VCvtSD;
1088 i->ARM64in.VCvtSD.sToD = sToD;
1089 i->ARM64in.VCvtSD.dst = dst;
1090 i->ARM64in.VCvtSD.src = src;
1091 return i;
1093 ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ) {
1094 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1095 i->tag = ARM64in_VCvtHS;
1096 i->ARM64in.VCvtHS.hToS = hToS;
1097 i->ARM64in.VCvtHS.dst = dst;
1098 i->ARM64in.VCvtHS.src = src;
1099 return i;
1101 ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ) {
1102 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1103 i->tag = ARM64in_VCvtHD;
1104 i->ARM64in.VCvtHD.hToD = hToD;
1105 i->ARM64in.VCvtHD.dst = dst;
1106 i->ARM64in.VCvtHD.src = src;
1107 return i;
1109 ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1110 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1111 i->tag = ARM64in_VUnaryD;
1112 i->ARM64in.VUnaryD.op = op;
1113 i->ARM64in.VUnaryD.dst = dst;
1114 i->ARM64in.VUnaryD.src = src;
1115 return i;
1117 ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1118 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1119 i->tag = ARM64in_VUnaryS;
1120 i->ARM64in.VUnaryS.op = op;
1121 i->ARM64in.VUnaryS.dst = dst;
1122 i->ARM64in.VUnaryS.src = src;
1123 return i;
1125 ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
1126 HReg dst, HReg argL, HReg argR ) {
1127 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1128 i->tag = ARM64in_VBinD;
1129 i->ARM64in.VBinD.op = op;
1130 i->ARM64in.VBinD.dst = dst;
1131 i->ARM64in.VBinD.argL = argL;
1132 i->ARM64in.VBinD.argR = argR;
1133 return i;
1135 ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
1136 HReg dst, HReg argL, HReg argR ) {
1137 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1138 i->tag = ARM64in_VBinS;
1139 i->ARM64in.VBinS.op = op;
1140 i->ARM64in.VBinS.dst = dst;
1141 i->ARM64in.VBinS.argL = argL;
1142 i->ARM64in.VBinS.argR = argR;
1143 return i;
1145 ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
1146 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1147 i->tag = ARM64in_VCmpD;
1148 i->ARM64in.VCmpD.argL = argL;
1149 i->ARM64in.VCmpD.argR = argR;
1150 return i;
1152 ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
1153 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1154 i->tag = ARM64in_VCmpS;
1155 i->ARM64in.VCmpS.argL = argL;
1156 i->ARM64in.VCmpS.argR = argR;
1157 return i;
1159 ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR,
1160 ARM64CondCode cond, Bool isD ) {
1161 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1162 i->tag = ARM64in_VFCSel;
1163 i->ARM64in.VFCSel.dst = dst;
1164 i->ARM64in.VFCSel.argL = argL;
1165 i->ARM64in.VFCSel.argR = argR;
1166 i->ARM64in.VFCSel.cond = cond;
1167 i->ARM64in.VFCSel.isD = isD;
1168 return i;
1170 ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
1171 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1172 i->tag = ARM64in_FPCR;
1173 i->ARM64in.FPCR.toFPCR = toFPCR;
1174 i->ARM64in.FPCR.iReg = iReg;
1175 return i;
1177 ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg ) {
1178 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1179 i->tag = ARM64in_FPSR;
1180 i->ARM64in.FPSR.toFPSR = toFPSR;
1181 i->ARM64in.FPSR.iReg = iReg;
1182 return i;
1184 ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op,
1185 HReg dst, HReg argL, HReg argR ) {
1186 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1187 i->tag = ARM64in_VBinV;
1188 i->ARM64in.VBinV.op = op;
1189 i->ARM64in.VBinV.dst = dst;
1190 i->ARM64in.VBinV.argL = argL;
1191 i->ARM64in.VBinV.argR = argR;
1192 return i;
1194 ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp op, HReg mod, HReg arg ) {
1195 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1196 i->tag = ARM64in_VModifyV;
1197 i->ARM64in.VModifyV.op = op;
1198 i->ARM64in.VModifyV.mod = mod;
1199 i->ARM64in.VModifyV.arg = arg;
1200 return i;
1202 ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
1203 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1204 i->tag = ARM64in_VUnaryV;
1205 i->ARM64in.VUnaryV.op = op;
1206 i->ARM64in.VUnaryV.dst = dst;
1207 i->ARM64in.VUnaryV.arg = arg;
1208 return i;
1210 ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op,
1211 UInt dszBlg2, HReg dst, HReg src ) {
1212 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1213 i->tag = ARM64in_VNarrowV;
1214 i->ARM64in.VNarrowV.op = op;
1215 i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
1216 i->ARM64in.VNarrowV.dst = dst;
1217 i->ARM64in.VNarrowV.src = src;
1218 vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
1219 return i;
1221 ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op,
1222 HReg dst, HReg src, UInt amt ) {
1223 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1224 i->tag = ARM64in_VShiftImmV;
1225 i->ARM64in.VShiftImmV.op = op;
1226 i->ARM64in.VShiftImmV.dst = dst;
1227 i->ARM64in.VShiftImmV.src = src;
1228 i->ARM64in.VShiftImmV.amt = amt;
1229 UInt minSh = 0;
1230 UInt maxSh = 0;
1231 switch (op) {
1232 /* For right shifts, the allowed shift amounts are 1 .. lane_size.
1233 For left shifts, the allowed shift amounts are 0 .. lane_size-1.
1235 case ARM64vecshi_USHR64x2: case ARM64vecshi_SSHR64x2:
1236 case ARM64vecshi_UQSHRN2SD: case ARM64vecshi_SQSHRN2SD:
1237 case ARM64vecshi_SQSHRUN2SD:
1238 case ARM64vecshi_UQRSHRN2SD: case ARM64vecshi_SQRSHRN2SD:
1239 case ARM64vecshi_SQRSHRUN2SD:
1240 minSh = 1; maxSh = 64; break;
1241 case ARM64vecshi_SHL64x2:
1242 case ARM64vecshi_UQSHL64x2: case ARM64vecshi_SQSHL64x2:
1243 case ARM64vecshi_SQSHLU64x2:
1244 minSh = 0; maxSh = 63; break;
1245 case ARM64vecshi_USHR32x4: case ARM64vecshi_SSHR32x4:
1246 case ARM64vecshi_UQSHRN4HS: case ARM64vecshi_SQSHRN4HS:
1247 case ARM64vecshi_SQSHRUN4HS:
1248 case ARM64vecshi_UQRSHRN4HS: case ARM64vecshi_SQRSHRN4HS:
1249 case ARM64vecshi_SQRSHRUN4HS:
1250 minSh = 1; maxSh = 32; break;
1251 case ARM64vecshi_SHL32x4:
1252 case ARM64vecshi_UQSHL32x4: case ARM64vecshi_SQSHL32x4:
1253 case ARM64vecshi_SQSHLU32x4:
1254 minSh = 0; maxSh = 31; break;
1255 case ARM64vecshi_USHR16x8: case ARM64vecshi_SSHR16x8:
1256 case ARM64vecshi_UQSHRN8BH: case ARM64vecshi_SQSHRN8BH:
1257 case ARM64vecshi_SQSHRUN8BH:
1258 case ARM64vecshi_UQRSHRN8BH: case ARM64vecshi_SQRSHRN8BH:
1259 case ARM64vecshi_SQRSHRUN8BH:
1260 minSh = 1; maxSh = 16; break;
1261 case ARM64vecshi_SHL16x8:
1262 case ARM64vecshi_UQSHL16x8: case ARM64vecshi_SQSHL16x8:
1263 case ARM64vecshi_SQSHLU16x8:
1264 minSh = 0; maxSh = 15; break;
1265 case ARM64vecshi_USHR8x16: case ARM64vecshi_SSHR8x16:
1266 minSh = 1; maxSh = 8; break;
1267 case ARM64vecshi_SHL8x16:
1268 case ARM64vecshi_UQSHL8x16: case ARM64vecshi_SQSHL8x16:
1269 case ARM64vecshi_SQSHLU8x16:
1270 minSh = 0; maxSh = 7; break;
1271 default:
1272 vassert(0);
1274 vassert(maxSh > 0);
1275 vassert(amt >= minSh && amt <= maxSh);
1276 return i;
1278 ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
1279 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1280 i->tag = ARM64in_VExtV;
1281 i->ARM64in.VExtV.dst = dst;
1282 i->ARM64in.VExtV.srcLo = srcLo;
1283 i->ARM64in.VExtV.srcHi = srcHi;
1284 i->ARM64in.VExtV.amtB = amtB;
1285 vassert(amtB >= 1 && amtB <= 15);
1286 return i;
1288 ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
1289 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1290 i->tag = ARM64in_VImmQ;
1291 i->ARM64in.VImmQ.rQ = rQ;
1292 i->ARM64in.VImmQ.imm = imm;
1293 /* Check that this is something that can actually be emitted. */
1294 switch (imm) {
1295 case 0x0000: case 0x0001: case 0x0003:
1296 case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
1297 break;
1298 default:
1299 vassert(0);
1301 return i;
1303 ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
1304 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1305 i->tag = ARM64in_VDfromX;
1306 i->ARM64in.VDfromX.rD = rD;
1307 i->ARM64in.VDfromX.rX = rX;
1308 return i;
1310 ARM64Instr* ARM64Instr_VQfromX ( HReg rQ, HReg rXlo ) {
1311 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1312 i->tag = ARM64in_VQfromX;
1313 i->ARM64in.VQfromX.rQ = rQ;
1314 i->ARM64in.VQfromX.rXlo = rXlo;
1315 return i;
1317 ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
1318 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1319 i->tag = ARM64in_VQfromXX;
1320 i->ARM64in.VQfromXX.rQ = rQ;
1321 i->ARM64in.VQfromXX.rXhi = rXhi;
1322 i->ARM64in.VQfromXX.rXlo = rXlo;
1323 return i;
1325 ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
1326 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1327 i->tag = ARM64in_VXfromQ;
1328 i->ARM64in.VXfromQ.rX = rX;
1329 i->ARM64in.VXfromQ.rQ = rQ;
1330 i->ARM64in.VXfromQ.laneNo = laneNo;
1331 vassert(laneNo <= 1);
1332 return i;
1334 ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) {
1335 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1336 i->tag = ARM64in_VXfromDorS;
1337 i->ARM64in.VXfromDorS.rX = rX;
1338 i->ARM64in.VXfromDorS.rDorS = rDorS;
1339 i->ARM64in.VXfromDorS.fromD = fromD;
1340 return i;
1342 ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
1343 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1344 i->tag = ARM64in_VMov;
1345 i->ARM64in.VMov.szB = szB;
1346 i->ARM64in.VMov.dst = dst;
1347 i->ARM64in.VMov.src = src;
1348 switch (szB) {
1349 case 16:
1350 vassert(hregClass(src) == HRcVec128);
1351 vassert(hregClass(dst) == HRcVec128);
1352 break;
1353 case 8:
1354 vassert(hregClass(src) == HRcFlt64);
1355 vassert(hregClass(dst) == HRcFlt64);
1356 break;
1357 default:
1358 vpanic("ARM64Instr_VMov");
1360 return i;
1362 ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
1363 ARM64AMode* amFailAddr ) {
1364 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1365 i->tag = ARM64in_EvCheck;
1366 i->ARM64in.EvCheck.amCounter = amCounter;
1367 i->ARM64in.EvCheck.amFailAddr = amFailAddr;
1368 return i;
1370 ARM64Instr* ARM64Instr_ProfInc ( void ) {
1371 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1372 i->tag = ARM64in_ProfInc;
1373 return i;
1376 /* ... */
1378 void ppARM64Instr ( const ARM64Instr* i ) {
1379 switch (i->tag) {
1380 case ARM64in_Arith:
1381 vex_printf("%s ", i->ARM64in.Arith.isAdd ? "add" : "sub");
1382 ppHRegARM64(i->ARM64in.Arith.dst);
1383 vex_printf(", ");
1384 ppHRegARM64(i->ARM64in.Arith.argL);
1385 vex_printf(", ");
1386 ppARM64RIA(i->ARM64in.Arith.argR);
1387 return;
1388 case ARM64in_Cmp:
1389 vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? " " : "(w)" );
1390 ppHRegARM64(i->ARM64in.Cmp.argL);
1391 vex_printf(", ");
1392 ppARM64RIA(i->ARM64in.Cmp.argR);
1393 return;
1394 case ARM64in_Logic:
1395 vex_printf("%s ", showARM64LogicOp(i->ARM64in.Logic.op));
1396 ppHRegARM64(i->ARM64in.Logic.dst);
1397 vex_printf(", ");
1398 ppHRegARM64(i->ARM64in.Logic.argL);
1399 vex_printf(", ");
1400 ppARM64RIL(i->ARM64in.Logic.argR);
1401 return;
1402 case ARM64in_Test:
1403 vex_printf("tst ");
1404 ppHRegARM64(i->ARM64in.Test.argL);
1405 vex_printf(", ");
1406 ppARM64RIL(i->ARM64in.Test.argR);
1407 return;
1408 case ARM64in_Shift:
1409 vex_printf("%s ", showARM64ShiftOp(i->ARM64in.Shift.op));
1410 ppHRegARM64(i->ARM64in.Shift.dst);
1411 vex_printf(", ");
1412 ppHRegARM64(i->ARM64in.Shift.argL);
1413 vex_printf(", ");
1414 ppARM64RI6(i->ARM64in.Shift.argR);
1415 return;
1416 case ARM64in_Unary:
1417 vex_printf("%s ", showARM64UnaryOp(i->ARM64in.Unary.op));
1418 ppHRegARM64(i->ARM64in.Unary.dst);
1419 vex_printf(", ");
1420 ppHRegARM64(i->ARM64in.Unary.src);
1421 return;
1422 case ARM64in_MovI:
1423 vex_printf("mov ");
1424 ppHRegARM64(i->ARM64in.MovI.dst);
1425 vex_printf(", ");
1426 ppHRegARM64(i->ARM64in.MovI.src);
1427 return;
1428 case ARM64in_Imm64:
1429 vex_printf("imm64 ");
1430 ppHRegARM64(i->ARM64in.Imm64.dst);
1431 vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
1432 return;
1433 case ARM64in_LdSt64:
1434 if (i->ARM64in.LdSt64.isLoad) {
1435 vex_printf("ldr ");
1436 ppHRegARM64(i->ARM64in.LdSt64.rD);
1437 vex_printf(", ");
1438 ppARM64AMode(i->ARM64in.LdSt64.amode);
1439 } else {
1440 vex_printf("str ");
1441 ppARM64AMode(i->ARM64in.LdSt64.amode);
1442 vex_printf(", ");
1443 ppHRegARM64(i->ARM64in.LdSt64.rD);
1445 return;
1446 case ARM64in_LdSt32:
1447 if (i->ARM64in.LdSt32.isLoad) {
1448 vex_printf("ldruw ");
1449 ppHRegARM64(i->ARM64in.LdSt32.rD);
1450 vex_printf(", ");
1451 ppARM64AMode(i->ARM64in.LdSt32.amode);
1452 } else {
1453 vex_printf("strw ");
1454 ppARM64AMode(i->ARM64in.LdSt32.amode);
1455 vex_printf(", ");
1456 ppHRegARM64(i->ARM64in.LdSt32.rD);
1458 return;
1459 case ARM64in_LdSt16:
1460 if (i->ARM64in.LdSt16.isLoad) {
1461 vex_printf("ldruh ");
1462 ppHRegARM64(i->ARM64in.LdSt16.rD);
1463 vex_printf(", ");
1464 ppARM64AMode(i->ARM64in.LdSt16.amode);
1465 } else {
1466 vex_printf("strh ");
1467 ppARM64AMode(i->ARM64in.LdSt16.amode);
1468 vex_printf(", ");
1469 ppHRegARM64(i->ARM64in.LdSt16.rD);
1471 return;
1472 case ARM64in_LdSt8:
1473 if (i->ARM64in.LdSt8.isLoad) {
1474 vex_printf("ldrub ");
1475 ppHRegARM64(i->ARM64in.LdSt8.rD);
1476 vex_printf(", ");
1477 ppARM64AMode(i->ARM64in.LdSt8.amode);
1478 } else {
1479 vex_printf("strb ");
1480 ppARM64AMode(i->ARM64in.LdSt8.amode);
1481 vex_printf(", ");
1482 ppHRegARM64(i->ARM64in.LdSt8.rD);
1484 return;
1485 case ARM64in_XDirect:
1486 vex_printf("(xDirect) ");
1487 vex_printf("if (%%pstate.%s) { ",
1488 showARM64CondCode(i->ARM64in.XDirect.cond));
1489 vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
1490 vex_printf("str x9,");
1491 ppARM64AMode(i->ARM64in.XDirect.amPC);
1492 vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
1493 i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
1494 vex_printf("blr x9 }");
1495 return;
1496 case ARM64in_XIndir:
1497 vex_printf("(xIndir) ");
1498 vex_printf("if (%%pstate.%s) { ",
1499 showARM64CondCode(i->ARM64in.XIndir.cond));
1500 vex_printf("str ");
1501 ppHRegARM64(i->ARM64in.XIndir.dstGA);
1502 vex_printf(",");
1503 ppARM64AMode(i->ARM64in.XIndir.amPC);
1504 vex_printf("; imm64 x9,$disp_cp_xindir; ");
1505 vex_printf("br x9 }");
1506 return;
1507 case ARM64in_XAssisted:
1508 vex_printf("(xAssisted) ");
1509 vex_printf("if (%%pstate.%s) { ",
1510 showARM64CondCode(i->ARM64in.XAssisted.cond));
1511 vex_printf("str ");
1512 ppHRegARM64(i->ARM64in.XAssisted.dstGA);
1513 vex_printf(",");
1514 ppARM64AMode(i->ARM64in.XAssisted.amPC);
1515 vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
1516 (Int)i->ARM64in.XAssisted.jk);
1517 vex_printf("imm64 x9,$disp_cp_xassisted; ");
1518 vex_printf("br x9 }");
1519 return;
1520 case ARM64in_CSel:
1521 vex_printf("csel ");
1522 ppHRegARM64(i->ARM64in.CSel.dst);
1523 vex_printf(", ");
1524 ppHRegARM64(i->ARM64in.CSel.argL);
1525 vex_printf(", ");
1526 ppHRegARM64(i->ARM64in.CSel.argR);
1527 vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
1528 return;
1529 case ARM64in_Call:
1530 vex_printf("call%s ",
1531 i->ARM64in.Call.cond==ARM64cc_AL
1532 ? " " : showARM64CondCode(i->ARM64in.Call.cond));
1533 vex_printf("0x%llx [nArgRegs=%d, ",
1534 i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
1535 ppRetLoc(i->ARM64in.Call.rloc);
1536 vex_printf("]");
1537 return;
1538 case ARM64in_AddToSP: {
1539 Int simm = i->ARM64in.AddToSP.simm;
1540 vex_printf("%s xsp, xsp, #%d", simm < 0 ? "sub" : "add",
1541 simm < 0 ? -simm : simm);
1542 return;
1544 case ARM64in_FromSP:
1545 vex_printf("mov ");
1546 ppHRegARM64(i->ARM64in.FromSP.dst);
1547 vex_printf(", xsp");
1548 return;
1549 case ARM64in_Mul:
1550 vex_printf("%s ", showARM64MulOp(i->ARM64in.Mul.op));
1551 ppHRegARM64(i->ARM64in.Mul.dst);
1552 vex_printf(", ");
1553 ppHRegARM64(i->ARM64in.Mul.argL);
1554 vex_printf(", ");
1555 ppHRegARM64(i->ARM64in.Mul.argR);
1556 return;
1558 case ARM64in_LdrEX: {
1559 const HChar* sz = " ";
1560 switch (i->ARM64in.LdrEX.szB) {
1561 case 1: sz = "b"; break;
1562 case 2: sz = "h"; break;
1563 case 4: case 8: break;
1564 default: vassert(0);
1566 vex_printf("ldxr%s %c2, [x4]",
1567 sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w');
1568 return;
1570 case ARM64in_StrEX: {
1571 const HChar* sz = " ";
1572 switch (i->ARM64in.StrEX.szB) {
1573 case 1: sz = "b"; break;
1574 case 2: sz = "h"; break;
1575 case 4: case 8: break;
1576 default: vassert(0);
1578 vex_printf("stxr%s w0, %c2, [x4]",
1579 sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
1580 return;
1582 case ARM64in_CAS: {
1583 vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB);
1584 return;
1586 case ARM64in_MFence:
1587 vex_printf("(mfence) dsb sy; dmb sy; isb");
1588 return;
1589 case ARM64in_ClrEX:
1590 vex_printf("clrex #15");
1591 return;
1592 case ARM64in_VLdStH:
1593 if (i->ARM64in.VLdStH.isLoad) {
1594 vex_printf("ldr ");
1595 ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1596 vex_printf(", %u(", i->ARM64in.VLdStH.uimm12);
1597 ppHRegARM64(i->ARM64in.VLdStH.rN);
1598 vex_printf(")");
1599 } else {
1600 vex_printf("str ");
1601 vex_printf("%u(", i->ARM64in.VLdStH.uimm12);
1602 ppHRegARM64(i->ARM64in.VLdStH.rN);
1603 vex_printf("), ");
1604 ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1606 return;
1607 case ARM64in_VLdStS:
1608 if (i->ARM64in.VLdStS.isLoad) {
1609 vex_printf("ldr ");
1610 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1611 vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
1612 ppHRegARM64(i->ARM64in.VLdStS.rN);
1613 vex_printf(")");
1614 } else {
1615 vex_printf("str ");
1616 vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
1617 ppHRegARM64(i->ARM64in.VLdStS.rN);
1618 vex_printf("), ");
1619 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1621 return;
1622 case ARM64in_VLdStD:
1623 if (i->ARM64in.VLdStD.isLoad) {
1624 vex_printf("ldr ");
1625 ppHRegARM64(i->ARM64in.VLdStD.dD);
1626 vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
1627 ppHRegARM64(i->ARM64in.VLdStD.rN);
1628 vex_printf(")");
1629 } else {
1630 vex_printf("str ");
1631 vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
1632 ppHRegARM64(i->ARM64in.VLdStD.rN);
1633 vex_printf("), ");
1634 ppHRegARM64(i->ARM64in.VLdStD.dD);
1636 return;
1637 case ARM64in_VLdStQ:
1638 if (i->ARM64in.VLdStQ.isLoad)
1639 vex_printf("ld1.2d {");
1640 else
1641 vex_printf("st1.2d {");
1642 ppHRegARM64(i->ARM64in.VLdStQ.rQ);
1643 vex_printf("}, [");
1644 ppHRegARM64(i->ARM64in.VLdStQ.rN);
1645 vex_printf("]");
1646 return;
1647 case ARM64in_VCvtI2F: {
1648 HChar syn = '?';
1649 UInt fszB = 0;
1650 UInt iszB = 0;
1651 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
1652 vex_printf("%ccvtf ", syn);
1653 ppHRegARM64(i->ARM64in.VCvtI2F.rD);
1654 vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
1655 ppHRegARM64(i->ARM64in.VCvtI2F.rS);
1656 vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
1657 return;
1659 case ARM64in_VCvtF2I: {
1660 HChar syn = '?';
1661 UInt fszB = 0;
1662 UInt iszB = 0;
1663 HChar rmo = '?';
1664 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
1665 UChar armRM = i->ARM64in.VCvtF2I.armRM;
1666 if (armRM < 4) rmo = "npmz"[armRM];
1667 vex_printf("fcvt%c%c ", rmo, syn);
1668 ppHRegARM64(i->ARM64in.VCvtF2I.rD);
1669 vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
1670 ppHRegARM64(i->ARM64in.VCvtF2I.rS);
1671 vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
1672 return;
1674 case ARM64in_VCvtSD:
1675 vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
1676 if (i->ARM64in.VCvtSD.sToD) {
1677 ppHRegARM64(i->ARM64in.VCvtSD.dst);
1678 vex_printf(", ");
1679 ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
1680 } else {
1681 ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
1682 vex_printf(", ");
1683 ppHRegARM64(i->ARM64in.VCvtSD.src);
1685 return;
1686 case ARM64in_VCvtHS:
1687 vex_printf("fcvt%s ", i->ARM64in.VCvtHS.hToS ? "h2s" : "s2h");
1688 if (i->ARM64in.VCvtHS.hToS) {
1689 ppHRegARM64asSreg(i->ARM64in.VCvtHS.dst);
1690 vex_printf(", ");
1691 ppHRegARM64asHreg(i->ARM64in.VCvtHS.src);
1692 } else {
1693 ppHRegARM64asHreg(i->ARM64in.VCvtHS.dst);
1694 vex_printf(", ");
1695 ppHRegARM64asSreg(i->ARM64in.VCvtHS.src);
1697 return;
1698 case ARM64in_VCvtHD:
1699 vex_printf("fcvt%s ", i->ARM64in.VCvtHD.hToD ? "h2d" : "d2h");
1700 if (i->ARM64in.VCvtHD.hToD) {
1701 ppHRegARM64(i->ARM64in.VCvtHD.dst);
1702 vex_printf(", ");
1703 ppHRegARM64asHreg(i->ARM64in.VCvtHD.src);
1704 } else {
1705 ppHRegARM64asHreg(i->ARM64in.VCvtHD.dst);
1706 vex_printf(", ");
1707 ppHRegARM64(i->ARM64in.VCvtHD.src);
1709 return;
1710 case ARM64in_VUnaryD:
1711 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
1712 ppHRegARM64(i->ARM64in.VUnaryD.dst);
1713 vex_printf(", ");
1714 ppHRegARM64(i->ARM64in.VUnaryD.src);
1715 return;
1716 case ARM64in_VUnaryS:
1717 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
1718 ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
1719 vex_printf(", ");
1720 ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
1721 return;
1722 case ARM64in_VBinD:
1723 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinD.op));
1724 ppHRegARM64(i->ARM64in.VBinD.dst);
1725 vex_printf(", ");
1726 ppHRegARM64(i->ARM64in.VBinD.argL);
1727 vex_printf(", ");
1728 ppHRegARM64(i->ARM64in.VBinD.argR);
1729 return;
1730 case ARM64in_VBinS:
1731 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinS.op));
1732 ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
1733 vex_printf(", ");
1734 ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
1735 vex_printf(", ");
1736 ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
1737 return;
1738 case ARM64in_VCmpD:
1739 vex_printf("fcmp ");
1740 ppHRegARM64(i->ARM64in.VCmpD.argL);
1741 vex_printf(", ");
1742 ppHRegARM64(i->ARM64in.VCmpD.argR);
1743 return;
1744 case ARM64in_VCmpS:
1745 vex_printf("fcmp ");
1746 ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
1747 vex_printf(", ");
1748 ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
1749 return;
1750 case ARM64in_VFCSel: {
1751 UInt (*ppHRegARM64fp)(HReg)
1752 = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg);
1753 vex_printf("fcsel ");
1754 ppHRegARM64fp(i->ARM64in.VFCSel.dst);
1755 vex_printf(", ");
1756 ppHRegARM64fp(i->ARM64in.VFCSel.argL);
1757 vex_printf(", ");
1758 ppHRegARM64fp(i->ARM64in.VFCSel.argR);
1759 vex_printf(", %s", showARM64CondCode(i->ARM64in.VFCSel.cond));
1760 return;
1762 case ARM64in_FPCR:
1763 if (i->ARM64in.FPCR.toFPCR) {
1764 vex_printf("msr fpcr, ");
1765 ppHRegARM64(i->ARM64in.FPCR.iReg);
1766 } else {
1767 vex_printf("mrs ");
1768 ppHRegARM64(i->ARM64in.FPCR.iReg);
1769 vex_printf(", fpcr");
1771 return;
1772 case ARM64in_FPSR:
1773 if (i->ARM64in.FPSR.toFPSR) {
1774 vex_printf("msr fpsr, ");
1775 ppHRegARM64(i->ARM64in.FPSR.iReg);
1776 } else {
1777 vex_printf("mrs ");
1778 ppHRegARM64(i->ARM64in.FPSR.iReg);
1779 vex_printf(", fpsr");
1781 return;
1782 case ARM64in_VBinV: {
1783 const HChar* nm = "??";
1784 const HChar* ar = "??";
1785 showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op);
1786 vex_printf("%s ", nm);
1787 ppHRegARM64(i->ARM64in.VBinV.dst);
1788 vex_printf(".%s, ", ar);
1789 ppHRegARM64(i->ARM64in.VBinV.argL);
1790 vex_printf(".%s, ", ar);
1791 ppHRegARM64(i->ARM64in.VBinV.argR);
1792 vex_printf(".%s", ar);
1793 return;
1795 case ARM64in_VModifyV: {
1796 const HChar* nm = "??";
1797 const HChar* ar = "??";
1798 showARM64VecModifyOp(&nm, &ar, i->ARM64in.VModifyV.op);
1799 vex_printf("%s ", nm);
1800 ppHRegARM64(i->ARM64in.VModifyV.mod);
1801 vex_printf(".%s, ", ar);
1802 ppHRegARM64(i->ARM64in.VModifyV.arg);
1803 vex_printf(".%s", ar);
1804 return;
1806 case ARM64in_VUnaryV: {
1807 const HChar* nm = "??";
1808 const HChar* ar = "??";
1809 showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op);
1810 vex_printf("%s ", nm);
1811 ppHRegARM64(i->ARM64in.VUnaryV.dst);
1812 vex_printf(".%s, ", ar);
1813 ppHRegARM64(i->ARM64in.VUnaryV.arg);
1814 vex_printf(".%s", ar);
1815 return;
1817 case ARM64in_VNarrowV: {
1818 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
1819 const HChar* darr[3] = { "8b", "4h", "2s" };
1820 const HChar* sarr[3] = { "8h", "4s", "2d" };
1821 const HChar* nm = showARM64VecNarrowOp(i->ARM64in.VNarrowV.op);
1822 vex_printf("%s ", nm);
1823 ppHRegARM64(i->ARM64in.VNarrowV.dst);
1824 vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
1825 ppHRegARM64(i->ARM64in.VNarrowV.src);
1826 vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??");
1827 return;
1829 case ARM64in_VShiftImmV: {
1830 const HChar* nm = "??";
1831 const HChar* ar = "??";
1832 showARM64VecShiftImmOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
1833 vex_printf("%s ", nm);
1834 ppHRegARM64(i->ARM64in.VShiftImmV.dst);
1835 vex_printf(".%s, ", ar);
1836 ppHRegARM64(i->ARM64in.VShiftImmV.src);
1837 vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
1838 return;
1840 case ARM64in_VExtV: {
1841 vex_printf("ext ");
1842 ppHRegARM64(i->ARM64in.VExtV.dst);
1843 vex_printf(".16b, ");
1844 ppHRegARM64(i->ARM64in.VExtV.srcLo);
1845 vex_printf(".16b, ");
1846 ppHRegARM64(i->ARM64in.VExtV.srcHi);
1847 vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
1848 return;
1850 case ARM64in_VImmQ:
1851 vex_printf("qimm ");
1852 ppHRegARM64(i->ARM64in.VImmQ.rQ);
1853 vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
1854 return;
1855 case ARM64in_VDfromX:
1856 vex_printf("fmov ");
1857 ppHRegARM64(i->ARM64in.VDfromX.rD);
1858 vex_printf(", ");
1859 ppHRegARM64(i->ARM64in.VDfromX.rX);
1860 return;
1861 case ARM64in_VQfromX:
1862 vex_printf("fmov ");
1863 ppHRegARM64(i->ARM64in.VQfromX.rQ);
1864 vex_printf(".d[0], ");
1865 ppHRegARM64(i->ARM64in.VQfromX.rXlo);
1866 return;
1867 case ARM64in_VQfromXX:
1868 vex_printf("qFromXX ");
1869 ppHRegARM64(i->ARM64in.VQfromXX.rQ);
1870 vex_printf(", ");
1871 ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
1872 vex_printf(", ");
1873 ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
1874 return;
1875 case ARM64in_VXfromQ:
1876 vex_printf("fmov ");
1877 ppHRegARM64(i->ARM64in.VXfromQ.rX);
1878 vex_printf(", ");
1879 ppHRegARM64(i->ARM64in.VXfromQ.rQ);
1880 vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
1881 return;
1882 case ARM64in_VXfromDorS:
1883 vex_printf("fmov ");
1884 ppHRegARM64(i->ARM64in.VXfromDorS.rX);
1885 vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W');
1886 ppHRegARM64(i->ARM64in.VXfromDorS.rDorS);
1887 vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S');
1888 return;
1889 case ARM64in_VMov: {
1890 UChar aux = '?';
1891 switch (i->ARM64in.VMov.szB) {
1892 case 16: aux = 'q'; break;
1893 case 8: aux = 'd'; break;
1894 case 4: aux = 's'; break;
1895 default: break;
1897 vex_printf("mov(%c) ", aux);
1898 ppHRegARM64(i->ARM64in.VMov.dst);
1899 vex_printf(", ");
1900 ppHRegARM64(i->ARM64in.VMov.src);
1901 return;
1903 case ARM64in_EvCheck:
1904 vex_printf("(evCheck) ldr w9,");
1905 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1906 vex_printf("; subs w9,w9,$1; str w9,");
1907 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
1908 vex_printf("; bpl nofail; ldr x9,");
1909 ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
1910 vex_printf("; br x9; nofail:");
1911 return;
1912 case ARM64in_ProfInc:
1913 vex_printf("(profInc) imm64-fixed4 x9,$NotKnownYet; "
1914 "ldr x8,[x9]; add x8,x8,#1, str x8,[x9]");
1915 return;
1916 default:
1917 vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
1918 vpanic("ppARM64Instr(1)");
1919 return;
1924 /* --------- Helpers for register allocation. --------- */
1926 void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
1928 vassert(mode64 == True);
1929 initHRegUsage(u);
1930 switch (i->tag) {
1931 case ARM64in_Arith:
1932 addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
1933 addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
1934 addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
1935 return;
1936 case ARM64in_Cmp:
1937 addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
1938 addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
1939 return;
1940 case ARM64in_Logic:
1941 addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
1942 addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
1943 addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
1944 return;
1945 case ARM64in_Test:
1946 addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
1947 addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
1948 return;
1949 case ARM64in_Shift:
1950 addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
1951 addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
1952 addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
1953 return;
1954 case ARM64in_Unary:
1955 addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
1956 addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
1957 return;
1958 case ARM64in_MovI:
1959 addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
1960 addHRegUse(u, HRmRead, i->ARM64in.MovI.src);
1961 u->isRegRegMove = True;
1962 u->regMoveSrc = i->ARM64in.MovI.src;
1963 u->regMoveDst = i->ARM64in.MovI.dst;
1964 return;
1965 case ARM64in_Imm64:
1966 addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
1967 return;
1968 case ARM64in_LdSt64:
1969 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
1970 if (i->ARM64in.LdSt64.isLoad) {
1971 addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
1972 } else {
1973 addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
1975 return;
1976 case ARM64in_LdSt32:
1977 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
1978 if (i->ARM64in.LdSt32.isLoad) {
1979 addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
1980 } else {
1981 addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
1983 return;
1984 case ARM64in_LdSt16:
1985 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
1986 if (i->ARM64in.LdSt16.isLoad) {
1987 addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
1988 } else {
1989 addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
1991 return;
1992 case ARM64in_LdSt8:
1993 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
1994 if (i->ARM64in.LdSt8.isLoad) {
1995 addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
1996 } else {
1997 addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
1999 return;
2000 /* XDirect/XIndir/XAssisted are also a bit subtle. They
2001 conditionally exit the block. Hence we only need to list (1)
2002 the registers that they read, and (2) the registers that they
2003 write in the case where the block is not exited. (2) is
2004 empty, hence only (1) is relevant here. */
2005 case ARM64in_XDirect:
2006 addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
2007 return;
2008 case ARM64in_XIndir:
2009 addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
2010 addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
2011 return;
2012 case ARM64in_XAssisted:
2013 addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
2014 addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
2015 return;
2016 case ARM64in_CSel:
2017 addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
2018 addHRegUse(u, HRmRead, i->ARM64in.CSel.argL);
2019 addHRegUse(u, HRmRead, i->ARM64in.CSel.argR);
2020 return;
2021 case ARM64in_Call:
2022 /* logic and comments copied/modified from x86 back end */
2023 /* This is a bit subtle. */
2024 /* First off, claim it trashes all the caller-saved regs
2025 which fall within the register allocator's jurisdiction.
2026 These I believe to be x0 to x7 and the 128-bit vector
2027 registers in use, q16 .. q20. */
2028 addHRegUse(u, HRmWrite, hregARM64_X0());
2029 addHRegUse(u, HRmWrite, hregARM64_X1());
2030 addHRegUse(u, HRmWrite, hregARM64_X2());
2031 addHRegUse(u, HRmWrite, hregARM64_X3());
2032 addHRegUse(u, HRmWrite, hregARM64_X4());
2033 addHRegUse(u, HRmWrite, hregARM64_X5());
2034 addHRegUse(u, HRmWrite, hregARM64_X6());
2035 addHRegUse(u, HRmWrite, hregARM64_X7());
2036 addHRegUse(u, HRmWrite, hregARM64_Q16());
2037 addHRegUse(u, HRmWrite, hregARM64_Q17());
2038 addHRegUse(u, HRmWrite, hregARM64_Q18());
2039 addHRegUse(u, HRmWrite, hregARM64_Q19());
2040 addHRegUse(u, HRmWrite, hregARM64_Q20());
2041 /* Now we have to state any parameter-carrying registers
2042 which might be read. This depends on nArgRegs. */
2043 switch (i->ARM64in.Call.nArgRegs) {
2044 case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
2045 case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
2046 case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
2047 case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
2048 case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
2049 case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
2050 case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
2051 case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
2052 case 0: break;
2053 default: vpanic("getRegUsage_ARM64:Call:regparms");
2055 /* Finally, there is the issue that the insn trashes a
2056 register because the literal target address has to be
2057 loaded into a register. However, we reserve x9 for that
2058 purpose so there's no further complexity here. Stating x9
2059 as trashed is pointless since it's not under the control
2060 of the allocator, but what the hell. */
2061 addHRegUse(u, HRmWrite, hregARM64_X9());
2062 return;
2063 case ARM64in_AddToSP:
2064 /* Only changes SP, but regalloc doesn't control that, hence
2065 we don't care. */
2066 return;
2067 case ARM64in_FromSP:
2068 addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
2069 return;
2070 case ARM64in_Mul:
2071 addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
2072 addHRegUse(u, HRmRead, i->ARM64in.Mul.argL);
2073 addHRegUse(u, HRmRead, i->ARM64in.Mul.argR);
2074 return;
2075 case ARM64in_LdrEX:
2076 addHRegUse(u, HRmRead, hregARM64_X4());
2077 addHRegUse(u, HRmWrite, hregARM64_X2());
2078 return;
2079 case ARM64in_StrEX:
2080 addHRegUse(u, HRmRead, hregARM64_X4());
2081 addHRegUse(u, HRmWrite, hregARM64_X0());
2082 addHRegUse(u, HRmRead, hregARM64_X2());
2083 return;
2084 case ARM64in_CAS:
2085 addHRegUse(u, HRmRead, hregARM64_X3());
2086 addHRegUse(u, HRmRead, hregARM64_X5());
2087 addHRegUse(u, HRmRead, hregARM64_X7());
2088 addHRegUse(u, HRmWrite, hregARM64_X1());
2089 /* Pointless to state this since X8 is not available to RA. */
2090 addHRegUse(u, HRmWrite, hregARM64_X8());
2091 break;
2092 case ARM64in_MFence:
2093 return;
2094 case ARM64in_ClrEX:
2095 return;
2096 case ARM64in_VLdStH:
2097 addHRegUse(u, HRmRead, i->ARM64in.VLdStH.rN);
2098 if (i->ARM64in.VLdStH.isLoad) {
2099 addHRegUse(u, HRmWrite, i->ARM64in.VLdStH.hD);
2100 } else {
2101 addHRegUse(u, HRmRead, i->ARM64in.VLdStH.hD);
2103 return;
2104 case ARM64in_VLdStS:
2105 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
2106 if (i->ARM64in.VLdStS.isLoad) {
2107 addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
2108 } else {
2109 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
2111 return;
2112 case ARM64in_VLdStD:
2113 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
2114 if (i->ARM64in.VLdStD.isLoad) {
2115 addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
2116 } else {
2117 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
2119 return;
2120 case ARM64in_VLdStQ:
2121 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
2122 if (i->ARM64in.VLdStQ.isLoad)
2123 addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
2124 else
2125 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
2126 return;
2127 case ARM64in_VCvtI2F:
2128 addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
2129 addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
2130 return;
2131 case ARM64in_VCvtF2I:
2132 addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
2133 addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
2134 return;
2135 case ARM64in_VCvtSD:
2136 addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
2137 addHRegUse(u, HRmRead, i->ARM64in.VCvtSD.src);
2138 return;
2139 case ARM64in_VCvtHS:
2140 addHRegUse(u, HRmWrite, i->ARM64in.VCvtHS.dst);
2141 addHRegUse(u, HRmRead, i->ARM64in.VCvtHS.src);
2142 return;
2143 case ARM64in_VCvtHD:
2144 addHRegUse(u, HRmWrite, i->ARM64in.VCvtHD.dst);
2145 addHRegUse(u, HRmRead, i->ARM64in.VCvtHD.src);
2146 return;
2147 case ARM64in_VUnaryD:
2148 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
2149 addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
2150 return;
2151 case ARM64in_VUnaryS:
2152 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
2153 addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
2154 return;
2155 case ARM64in_VBinD:
2156 addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
2157 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
2158 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
2159 return;
2160 case ARM64in_VBinS:
2161 addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
2162 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
2163 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
2164 return;
2165 case ARM64in_VCmpD:
2166 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
2167 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
2168 return;
2169 case ARM64in_VCmpS:
2170 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
2171 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
2172 return;
2173 case ARM64in_VFCSel:
2174 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argL);
2175 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argR);
2176 addHRegUse(u, HRmWrite, i->ARM64in.VFCSel.dst);
2177 return;
2178 case ARM64in_FPCR:
2179 if (i->ARM64in.FPCR.toFPCR)
2180 addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
2181 else
2182 addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
2183 return;
2184 case ARM64in_FPSR:
2185 if (i->ARM64in.FPSR.toFPSR)
2186 addHRegUse(u, HRmRead, i->ARM64in.FPSR.iReg);
2187 else
2188 addHRegUse(u, HRmWrite, i->ARM64in.FPSR.iReg);
2189 return;
2190 case ARM64in_VBinV:
2191 addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst);
2192 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
2193 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
2194 return;
2195 case ARM64in_VModifyV:
2196 addHRegUse(u, HRmWrite, i->ARM64in.VModifyV.mod);
2197 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.mod);
2198 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.arg);
2199 return;
2200 case ARM64in_VUnaryV:
2201 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
2202 addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
2203 return;
2204 case ARM64in_VNarrowV:
2205 addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst);
2206 addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src);
2207 return;
2208 case ARM64in_VShiftImmV:
2209 addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
2210 addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
2211 return;
2212 case ARM64in_VExtV:
2213 addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
2214 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
2215 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
2216 return;
2217 case ARM64in_VImmQ:
2218 addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
2219 return;
2220 case ARM64in_VDfromX:
2221 addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
2222 addHRegUse(u, HRmRead, i->ARM64in.VDfromX.rX);
2223 return;
2224 case ARM64in_VQfromX:
2225 addHRegUse(u, HRmWrite, i->ARM64in.VQfromX.rQ);
2226 addHRegUse(u, HRmRead, i->ARM64in.VQfromX.rXlo);
2227 return;
2228 case ARM64in_VQfromXX:
2229 addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
2230 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXhi);
2231 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXlo);
2232 return;
2233 case ARM64in_VXfromQ:
2234 addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
2235 addHRegUse(u, HRmRead, i->ARM64in.VXfromQ.rQ);
2236 return;
2237 case ARM64in_VXfromDorS:
2238 addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX);
2239 addHRegUse(u, HRmRead, i->ARM64in.VXfromDorS.rDorS);
2240 return;
2241 case ARM64in_VMov:
2242 addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
2243 addHRegUse(u, HRmRead, i->ARM64in.VMov.src);
2244 u->isRegRegMove = True;
2245 u->regMoveSrc = i->ARM64in.VMov.src;
2246 u->regMoveDst = i->ARM64in.VMov.dst;
2247 return;
2248 case ARM64in_EvCheck:
2249 /* We expect both amodes only to mention x21, so this is in
2250 fact pointless, since x21 isn't allocatable, but
2251 anyway.. */
2252 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
2253 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
2254 addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
2255 return;
2256 case ARM64in_ProfInc:
2257 /* Again, pointless to actually state these since neither
2258 is available to RA. */
2259 addHRegUse(u, HRmWrite, hregARM64_X9()); /* unavail to RA */
2260 addHRegUse(u, HRmWrite, hregARM64_X8()); /* unavail to RA */
2261 return;
2262 default:
2263 ppARM64Instr(i);
2264 vpanic("getRegUsage_ARM64Instr");
2269 void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
2271 vassert(mode64 == True);
2272 switch (i->tag) {
2273 case ARM64in_Arith:
2274 i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
2275 i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
2276 mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
2277 return;
2278 case ARM64in_Cmp:
2279 i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
2280 mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
2281 return;
2282 case ARM64in_Logic:
2283 i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
2284 i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
2285 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2286 return;
2287 case ARM64in_Test:
2288 i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
2289 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2290 return;
2291 case ARM64in_Shift:
2292 i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
2293 i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
2294 mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
2295 return;
2296 case ARM64in_Unary:
2297 i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
2298 i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
2299 return;
2300 case ARM64in_MovI:
2301 i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
2302 i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
2303 return;
2304 case ARM64in_Imm64:
2305 i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
2306 return;
2307 case ARM64in_LdSt64:
2308 i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
2309 mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
2310 return;
2311 case ARM64in_LdSt32:
2312 i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
2313 mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
2314 return;
2315 case ARM64in_LdSt16:
2316 i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
2317 mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
2318 return;
2319 case ARM64in_LdSt8:
2320 i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
2321 mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
2322 return;
2323 case ARM64in_XDirect:
2324 mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
2325 return;
2326 case ARM64in_XIndir:
2327 i->ARM64in.XIndir.dstGA
2328 = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
2329 mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
2330 return;
2331 case ARM64in_XAssisted:
2332 i->ARM64in.XAssisted.dstGA
2333 = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
2334 mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
2335 return;
2336 case ARM64in_CSel:
2337 i->ARM64in.CSel.dst = lookupHRegRemap(m, i->ARM64in.CSel.dst);
2338 i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
2339 i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
2340 return;
2341 case ARM64in_Call:
2342 return;
2343 case ARM64in_AddToSP:
2344 return;
2345 case ARM64in_FromSP:
2346 i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
2347 return;
2348 case ARM64in_Mul:
2349 i->ARM64in.Mul.dst = lookupHRegRemap(m, i->ARM64in.Mul.dst);
2350 i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
2351 i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
2352 break;
2353 case ARM64in_LdrEX:
2354 return;
2355 case ARM64in_StrEX:
2356 return;
2357 case ARM64in_CAS:
2358 return;
2359 case ARM64in_MFence:
2360 return;
2361 case ARM64in_ClrEX:
2362 return;
2363 case ARM64in_VLdStH:
2364 i->ARM64in.VLdStH.hD = lookupHRegRemap(m, i->ARM64in.VLdStH.hD);
2365 i->ARM64in.VLdStH.rN = lookupHRegRemap(m, i->ARM64in.VLdStH.rN);
2366 return;
2367 case ARM64in_VLdStS:
2368 i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
2369 i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
2370 return;
2371 case ARM64in_VLdStD:
2372 i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
2373 i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
2374 return;
2375 case ARM64in_VLdStQ:
2376 i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
2377 i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
2378 return;
2379 case ARM64in_VCvtI2F:
2380 i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
2381 i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
2382 return;
2383 case ARM64in_VCvtF2I:
2384 i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
2385 i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
2386 return;
2387 case ARM64in_VCvtSD:
2388 i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
2389 i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
2390 return;
2391 case ARM64in_VCvtHS:
2392 i->ARM64in.VCvtHS.dst = lookupHRegRemap(m, i->ARM64in.VCvtHS.dst);
2393 i->ARM64in.VCvtHS.src = lookupHRegRemap(m, i->ARM64in.VCvtHS.src);
2394 return;
2395 case ARM64in_VCvtHD:
2396 i->ARM64in.VCvtHD.dst = lookupHRegRemap(m, i->ARM64in.VCvtHD.dst);
2397 i->ARM64in.VCvtHD.src = lookupHRegRemap(m, i->ARM64in.VCvtHD.src);
2398 return;
2399 case ARM64in_VUnaryD:
2400 i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
2401 i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
2402 return;
2403 case ARM64in_VUnaryS:
2404 i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
2405 i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
2406 return;
2407 case ARM64in_VBinD:
2408 i->ARM64in.VBinD.dst = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
2409 i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
2410 i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
2411 return;
2412 case ARM64in_VBinS:
2413 i->ARM64in.VBinS.dst = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
2414 i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
2415 i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
2416 return;
2417 case ARM64in_VCmpD:
2418 i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
2419 i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
2420 return;
2421 case ARM64in_VCmpS:
2422 i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
2423 i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
2424 return;
2425 case ARM64in_VFCSel:
2426 i->ARM64in.VFCSel.argL = lookupHRegRemap(m, i->ARM64in.VFCSel.argL);
2427 i->ARM64in.VFCSel.argR = lookupHRegRemap(m, i->ARM64in.VFCSel.argR);
2428 i->ARM64in.VFCSel.dst = lookupHRegRemap(m, i->ARM64in.VFCSel.dst);
2429 return;
2430 case ARM64in_FPCR:
2431 i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
2432 return;
2433 case ARM64in_FPSR:
2434 i->ARM64in.FPSR.iReg = lookupHRegRemap(m, i->ARM64in.FPSR.iReg);
2435 return;
2436 case ARM64in_VBinV:
2437 i->ARM64in.VBinV.dst = lookupHRegRemap(m, i->ARM64in.VBinV.dst);
2438 i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
2439 i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
2440 return;
2441 case ARM64in_VModifyV:
2442 i->ARM64in.VModifyV.mod = lookupHRegRemap(m, i->ARM64in.VModifyV.mod);
2443 i->ARM64in.VModifyV.arg = lookupHRegRemap(m, i->ARM64in.VModifyV.arg);
2444 return;
2445 case ARM64in_VUnaryV:
2446 i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
2447 i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
2448 return;
2449 case ARM64in_VNarrowV:
2450 i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst);
2451 i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src);
2452 return;
2453 case ARM64in_VShiftImmV:
2454 i->ARM64in.VShiftImmV.dst
2455 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst);
2456 i->ARM64in.VShiftImmV.src
2457 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
2458 return;
2459 case ARM64in_VExtV:
2460 i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
2461 i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
2462 i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
2463 return;
2464 case ARM64in_VImmQ:
2465 i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
2466 return;
2467 case ARM64in_VDfromX:
2468 i->ARM64in.VDfromX.rD
2469 = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
2470 i->ARM64in.VDfromX.rX
2471 = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
2472 return;
2473 case ARM64in_VQfromX:
2474 i->ARM64in.VQfromX.rQ
2475 = lookupHRegRemap(m, i->ARM64in.VQfromX.rQ);
2476 i->ARM64in.VQfromX.rXlo
2477 = lookupHRegRemap(m, i->ARM64in.VQfromX.rXlo);
2478 return;
2479 case ARM64in_VQfromXX:
2480 i->ARM64in.VQfromXX.rQ
2481 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
2482 i->ARM64in.VQfromXX.rXhi
2483 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
2484 i->ARM64in.VQfromXX.rXlo
2485 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
2486 return;
2487 case ARM64in_VXfromQ:
2488 i->ARM64in.VXfromQ.rX
2489 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
2490 i->ARM64in.VXfromQ.rQ
2491 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
2492 return;
2493 case ARM64in_VXfromDorS:
2494 i->ARM64in.VXfromDorS.rX
2495 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX);
2496 i->ARM64in.VXfromDorS.rDorS
2497 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS);
2498 return;
2499 case ARM64in_VMov:
2500 i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
2501 i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
2502 return;
2503 case ARM64in_EvCheck:
2504 /* We expect both amodes only to mention x21, so this is in
2505 fact pointless, since x21 isn't allocatable, but
2506 anyway.. */
2507 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
2508 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
2509 return;
2510 case ARM64in_ProfInc:
2511 /* hardwires x8 and x9 -- nothing to modify. */
2512 return;
2513 default:
2514 ppARM64Instr(i);
2515 vpanic("mapRegs_ARM64Instr");
2519 /* Generate arm spill/reload instructions under the direction of the
2520 register allocator. Note it's critical these don't write the
2521 condition codes. */
2523 void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2524 HReg rreg, Int offsetB, Bool mode64 )
2526 HRegClass rclass;
2527 vassert(offsetB >= 0);
2528 vassert(!hregIsVirtual(rreg));
2529 vassert(mode64 == True);
2530 *i1 = *i2 = NULL;
2531 rclass = hregClass(rreg);
2532 switch (rclass) {
2533 case HRcInt64:
2534 vassert(0 == (offsetB & 7));
2535 offsetB >>= 3;
2536 vassert(offsetB < 4096);
2537 *i1 = ARM64Instr_LdSt64(
2538 False/*!isLoad*/,
2539 rreg,
2540 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2542 return;
2543 case HRcFlt64:
2544 vassert(0 == (offsetB & 7));
2545 vassert(offsetB >= 0 && offsetB < 32768);
2546 *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
2547 rreg, hregARM64_X21(), offsetB);
2548 return;
2549 case HRcVec128: {
2550 HReg x21 = hregARM64_X21(); // baseblock
2551 HReg x9 = hregARM64_X9(); // spill temporary
2552 vassert(0 == (offsetB & 15)); // check sane alignment
2553 vassert(offsetB < 4096);
2554 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2555 *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
2556 return;
2558 default:
2559 ppHRegClass(rclass);
2560 vpanic("genSpill_ARM: unimplemented regclass");
2564 void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2565 HReg rreg, Int offsetB, Bool mode64 )
2567 HRegClass rclass;
2568 vassert(offsetB >= 0);
2569 vassert(!hregIsVirtual(rreg));
2570 vassert(mode64 == True);
2571 *i1 = *i2 = NULL;
2572 rclass = hregClass(rreg);
2573 switch (rclass) {
2574 case HRcInt64:
2575 vassert(0 == (offsetB & 7));
2576 offsetB >>= 3;
2577 vassert(offsetB < 4096);
2578 *i1 = ARM64Instr_LdSt64(
2579 True/*isLoad*/,
2580 rreg,
2581 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2583 return;
2584 case HRcFlt64:
2585 vassert(0 == (offsetB & 7));
2586 vassert(offsetB >= 0 && offsetB < 32768);
2587 *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
2588 rreg, hregARM64_X21(), offsetB);
2589 return;
2590 case HRcVec128: {
2591 HReg x21 = hregARM64_X21(); // baseblock
2592 HReg x9 = hregARM64_X9(); // spill temporary
2593 vassert(0 == (offsetB & 15)); // check sane alignment
2594 vassert(offsetB < 4096);
2595 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2596 *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
2597 return;
2599 default:
2600 ppHRegClass(rclass);
2601 vpanic("genReload_ARM: unimplemented regclass");
2605 ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool mode64)
2607 switch (hregClass(from)) {
2608 case HRcInt64:
2609 return ARM64Instr_MovI(to, from);
2610 case HRcFlt64:
2611 return ARM64Instr_VMov(8, to, from);
2612 case HRcVec128:
2613 return ARM64Instr_VMov(16, to, from);
2614 default:
2615 ppHRegClass(hregClass(from));
2616 vpanic("genMove_ARM64: unimplemented regclass");
2621 /* Emit an instruction into buf and return the number of bytes used.
2622 Note that buf is not the insn's final place, and therefore it is
2623 imperative to emit position-independent code. */
2625 static inline UInt iregEnc ( HReg r )
2627 UInt n;
2628 vassert(hregClass(r) == HRcInt64);
2629 vassert(!hregIsVirtual(r));
2630 n = hregEncoding(r);
2631 vassert(n <= 30);
2632 return n;
2635 static inline UInt dregEnc ( HReg r )
2637 UInt n;
2638 vassert(hregClass(r) == HRcFlt64);
2639 vassert(!hregIsVirtual(r));
2640 n = hregEncoding(r);
2641 vassert(n <= 31);
2642 return n;
2645 static inline UInt qregEnc ( HReg r )
2647 UInt n;
2648 vassert(hregClass(r) == HRcVec128);
2649 vassert(!hregIsVirtual(r));
2650 n = hregEncoding(r);
2651 vassert(n <= 31);
2652 return n;
2655 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2656 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2658 #define X00 BITS4(0,0, 0,0)
2659 #define X01 BITS4(0,0, 0,1)
2660 #define X10 BITS4(0,0, 1,0)
2661 #define X11 BITS4(0,0, 1,1)
2663 #define X000 BITS4(0, 0,0,0)
2664 #define X001 BITS4(0, 0,0,1)
2665 #define X010 BITS4(0, 0,1,0)
2666 #define X011 BITS4(0, 0,1,1)
2667 #define X100 BITS4(0, 1,0,0)
2668 #define X101 BITS4(0, 1,0,1)
2669 #define X110 BITS4(0, 1,1,0)
2670 #define X111 BITS4(0, 1,1,1)
2672 #define X0000 BITS4(0,0,0,0)
2673 #define X0001 BITS4(0,0,0,1)
2674 #define X0010 BITS4(0,0,1,0)
2675 #define X0011 BITS4(0,0,1,1)
2677 #define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
2678 ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
2680 #define X00000 BITS8(0,0,0, 0,0,0,0,0)
2681 #define X00001 BITS8(0,0,0, 0,0,0,0,1)
2682 #define X00110 BITS8(0,0,0, 0,0,1,1,0)
2683 #define X00111 BITS8(0,0,0, 0,0,1,1,1)
2684 #define X01000 BITS8(0,0,0, 0,1,0,0,0)
2685 #define X10000 BITS8(0,0,0, 1,0,0,0,0)
2686 #define X11000 BITS8(0,0,0, 1,1,0,0,0)
2687 #define X11110 BITS8(0,0,0, 1,1,1,1,0)
2688 #define X11111 BITS8(0,0,0, 1,1,1,1,1)
2690 #define X000000 BITS8(0,0, 0,0,0,0,0,0)
2691 #define X000001 BITS8(0,0, 0,0,0,0,0,1)
2692 #define X000010 BITS8(0,0, 0,0,0,0,1,0)
2693 #define X000011 BITS8(0,0, 0,0,0,0,1,1)
2694 #define X000100 BITS8(0,0, 0,0,0,1,0,0)
2695 #define X000110 BITS8(0,0, 0,0,0,1,1,0)
2696 #define X000111 BITS8(0,0, 0,0,0,1,1,1)
2697 #define X001000 BITS8(0,0, 0,0,1,0,0,0)
2698 #define X001001 BITS8(0,0, 0,0,1,0,0,1)
2699 #define X001010 BITS8(0,0, 0,0,1,0,1,0)
2700 #define X001011 BITS8(0,0, 0,0,1,0,1,1)
2701 #define X001101 BITS8(0,0, 0,0,1,1,0,1)
2702 #define X001110 BITS8(0,0, 0,0,1,1,1,0)
2703 #define X001111 BITS8(0,0, 0,0,1,1,1,1)
2704 #define X010000 BITS8(0,0, 0,1,0,0,0,0)
2705 #define X010001 BITS8(0,0, 0,1,0,0,0,1)
2706 #define X010010 BITS8(0,0, 0,1,0,0,1,0)
2707 #define X010011 BITS8(0,0, 0,1,0,0,1,1)
2708 #define X010101 BITS8(0,0, 0,1,0,1,0,1)
2709 #define X010110 BITS8(0,0, 0,1,0,1,1,0)
2710 #define X010111 BITS8(0,0, 0,1,0,1,1,1)
2711 #define X011001 BITS8(0,0, 0,1,1,0,0,1)
2712 #define X011010 BITS8(0,0, 0,1,1,0,1,0)
2713 #define X011011 BITS8(0,0, 0,1,1,0,1,1)
2714 #define X011101 BITS8(0,0, 0,1,1,1,0,1)
2715 #define X011110 BITS8(0,0, 0,1,1,1,1,0)
2716 #define X011111 BITS8(0,0, 0,1,1,1,1,1)
2717 #define X100001 BITS8(0,0, 1,0,0,0,0,1)
2718 #define X100011 BITS8(0,0, 1,0,0,0,1,1)
2719 #define X100100 BITS8(0,0, 1,0,0,1,0,0)
2720 #define X100101 BITS8(0,0, 1,0,0,1,0,1)
2721 #define X100110 BITS8(0,0, 1,0,0,1,1,0)
2722 #define X100111 BITS8(0,0, 1,0,0,1,1,1)
2723 #define X101101 BITS8(0,0, 1,0,1,1,0,1)
2724 #define X101110 BITS8(0,0, 1,0,1,1,1,0)
2725 #define X110000 BITS8(0,0, 1,1,0,0,0,0)
2726 #define X110001 BITS8(0,0, 1,1,0,0,0,1)
2727 #define X110010 BITS8(0,0, 1,1,0,0,1,0)
2728 #define X110100 BITS8(0,0, 1,1,0,1,0,0)
2729 #define X110101 BITS8(0,0, 1,1,0,1,0,1)
2730 #define X110110 BITS8(0,0, 1,1,0,1,1,0)
2731 #define X110111 BITS8(0,0, 1,1,0,1,1,1)
2732 #define X111000 BITS8(0,0, 1,1,1,0,0,0)
2733 #define X111001 BITS8(0,0, 1,1,1,0,0,1)
2734 #define X111101 BITS8(0,0, 1,1,1,1,0,1)
2735 #define X111110 BITS8(0,0, 1,1,1,1,1,0)
2736 #define X111111 BITS8(0,0, 1,1,1,1,1,1)
2738 #define X0001000 BITS8(0, 0,0,0,1,0,0,0)
2739 #define X0010000 BITS8(0, 0,0,1,0,0,0,0)
2740 #define X0100000 BITS8(0, 0,1,0,0,0,0,0)
2741 #define X1000000 BITS8(0, 1,0,0,0,0,0,0)
2743 #define X00100000 BITS8(0,0,1,0,0,0,0,0)
2744 #define X00100001 BITS8(0,0,1,0,0,0,0,1)
2745 #define X00100010 BITS8(0,0,1,0,0,0,1,0)
2746 #define X00100011 BITS8(0,0,1,0,0,0,1,1)
2747 #define X01010000 BITS8(0,1,0,1,0,0,0,0)
2748 #define X01010001 BITS8(0,1,0,1,0,0,0,1)
2749 #define X01010100 BITS8(0,1,0,1,0,1,0,0)
2750 #define X01011000 BITS8(0,1,0,1,1,0,0,0)
2751 #define X01100000 BITS8(0,1,1,0,0,0,0,0)
2752 #define X01100001 BITS8(0,1,1,0,0,0,0,1)
2753 #define X01100010 BITS8(0,1,1,0,0,0,1,0)
2754 #define X01100011 BITS8(0,1,1,0,0,0,1,1)
2755 #define X01110000 BITS8(0,1,1,1,0,0,0,0)
2756 #define X01110001 BITS8(0,1,1,1,0,0,0,1)
2757 #define X01110010 BITS8(0,1,1,1,0,0,1,0)
2758 #define X01110011 BITS8(0,1,1,1,0,0,1,1)
2759 #define X01110100 BITS8(0,1,1,1,0,1,0,0)
2760 #define X01110101 BITS8(0,1,1,1,0,1,0,1)
2761 #define X01110110 BITS8(0,1,1,1,0,1,1,0)
2762 #define X01110111 BITS8(0,1,1,1,0,1,1,1)
2763 #define X11000001 BITS8(1,1,0,0,0,0,0,1)
2764 #define X11000011 BITS8(1,1,0,0,0,0,1,1)
2765 #define X11010100 BITS8(1,1,0,1,0,1,0,0)
2766 #define X11010110 BITS8(1,1,0,1,0,1,1,0)
2767 #define X11011000 BITS8(1,1,0,1,1,0,0,0)
2768 #define X11011010 BITS8(1,1,0,1,1,0,1,0)
2769 #define X11011110 BITS8(1,1,0,1,1,1,1,0)
2770 #define X11100010 BITS8(1,1,1,0,0,0,1,0)
2771 #define X11110001 BITS8(1,1,1,1,0,0,0,1)
2772 #define X11110011 BITS8(1,1,1,1,0,0,1,1)
2773 #define X11110101 BITS8(1,1,1,1,0,1,0,1)
2774 #define X11110111 BITS8(1,1,1,1,0,1,1,1)
2777 /* --- 4 fields --- */
2779 static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
2780 vassert(8+19+1+4 == 32);
2781 vassert(f1 < (1<<8));
2782 vassert(f2 < (1<<19));
2783 vassert(f3 < (1<<1));
2784 vassert(f4 < (1<<4));
2785 UInt w = 0;
2786 w = (w << 8) | f1;
2787 w = (w << 19) | f2;
2788 w = (w << 1) | f3;
2789 w = (w << 4) | f4;
2790 return w;
2793 /* --- 5 fields --- */
2795 static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
2796 UInt f3, UInt f4, UInt f5 ) {
2797 vassert(3+6+2+16+5 == 32);
2798 vassert(f1 < (1<<3));
2799 vassert(f2 < (1<<6));
2800 vassert(f3 < (1<<2));
2801 vassert(f4 < (1<<16));
2802 vassert(f5 < (1<<5));
2803 UInt w = 0;
2804 w = (w << 3) | f1;
2805 w = (w << 6) | f2;
2806 w = (w << 2) | f3;
2807 w = (w << 16) | f4;
2808 w = (w << 5) | f5;
2809 return w;
2812 /* --- 6 fields --- */
2814 static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
2815 UInt f4, UInt f5, UInt f6 ) {
2816 vassert(2+6+2+12+5+5 == 32);
2817 vassert(f1 < (1<<2));
2818 vassert(f2 < (1<<6));
2819 vassert(f3 < (1<<2));
2820 vassert(f4 < (1<<12));
2821 vassert(f5 < (1<<5));
2822 vassert(f6 < (1<<5));
2823 UInt w = 0;
2824 w = (w << 2) | f1;
2825 w = (w << 6) | f2;
2826 w = (w << 2) | f3;
2827 w = (w << 12) | f4;
2828 w = (w << 5) | f5;
2829 w = (w << 5) | f6;
2830 return w;
2833 static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
2834 UInt f4, UInt f5, UInt f6 ) {
2835 vassert(3+8+5+6+5+5 == 32);
2836 vassert(f1 < (1<<3));
2837 vassert(f2 < (1<<8));
2838 vassert(f3 < (1<<5));
2839 vassert(f4 < (1<<6));
2840 vassert(f5 < (1<<5));
2841 vassert(f6 < (1<<5));
2842 UInt w = 0;
2843 w = (w << 3) | f1;
2844 w = (w << 8) | f2;
2845 w = (w << 5) | f3;
2846 w = (w << 6) | f4;
2847 w = (w << 5) | f5;
2848 w = (w << 5) | f6;
2849 return w;
2852 static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
2853 UInt f4, UInt f5, UInt f6 ) {
2854 vassert(3+8+5+6+5+5 == 32);
2855 vassert(f1 < (1<<3));
2856 vassert(f2 < (1<<5));
2857 vassert(f3 < (1<<8));
2858 vassert(f4 < (1<<6));
2859 vassert(f5 < (1<<5));
2860 vassert(f6 < (1<<5));
2861 UInt w = 0;
2862 w = (w << 3) | f1;
2863 w = (w << 5) | f2;
2864 w = (w << 8) | f3;
2865 w = (w << 6) | f4;
2866 w = (w << 5) | f5;
2867 w = (w << 5) | f6;
2868 return w;
2871 static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3,
2872 UInt f4, UInt f5, UInt f6 ) {
2873 vassert(3+6+7+6+5+5 == 32);
2874 vassert(f1 < (1<<3));
2875 vassert(f2 < (1<<6));
2876 vassert(f3 < (1<<7));
2877 vassert(f4 < (1<<6));
2878 vassert(f5 < (1<<5));
2879 vassert(f6 < (1<<5));
2880 UInt w = 0;
2881 w = (w << 3) | f1;
2882 w = (w << 6) | f2;
2883 w = (w << 7) | f3;
2884 w = (w << 6) | f4;
2885 w = (w << 5) | f5;
2886 w = (w << 5) | f6;
2887 return w;
2890 /* --- 7 fields --- */
2892 static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
2893 UInt f4, UInt f5, UInt f6, UInt f7 ) {
2894 vassert(2+6+3+9+2+5+5 == 32);
2895 vassert(f1 < (1<<2));
2896 vassert(f2 < (1<<6));
2897 vassert(f3 < (1<<3));
2898 vassert(f4 < (1<<9));
2899 vassert(f5 < (1<<2));
2900 vassert(f6 < (1<<5));
2901 vassert(f7 < (1<<5));
2902 UInt w = 0;
2903 w = (w << 2) | f1;
2904 w = (w << 6) | f2;
2905 w = (w << 3) | f3;
2906 w = (w << 9) | f4;
2907 w = (w << 2) | f5;
2908 w = (w << 5) | f6;
2909 w = (w << 5) | f7;
2910 return w;
2913 static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
2914 UInt f4, UInt f5, UInt f6, UInt f7 ) {
2915 vassert(3+6+1+6+6+5+5 == 32);
2916 vassert(f1 < (1<<3));
2917 vassert(f2 < (1<<6));
2918 vassert(f3 < (1<<1));
2919 vassert(f4 < (1<<6));
2920 vassert(f5 < (1<<6));
2921 vassert(f6 < (1<<5));
2922 vassert(f7 < (1<<5));
2923 UInt w = 0;
2924 w = (w << 3) | f1;
2925 w = (w << 6) | f2;
2926 w = (w << 1) | f3;
2927 w = (w << 6) | f4;
2928 w = (w << 6) | f5;
2929 w = (w << 5) | f6;
2930 w = (w << 5) | f7;
2931 return w;
2935 //ZZ #define X0000 BITS4(0,0,0,0)
2936 //ZZ #define X0001 BITS4(0,0,0,1)
2937 //ZZ #define X0010 BITS4(0,0,1,0)
2938 //ZZ #define X0011 BITS4(0,0,1,1)
2939 //ZZ #define X0100 BITS4(0,1,0,0)
2940 //ZZ #define X0101 BITS4(0,1,0,1)
2941 //ZZ #define X0110 BITS4(0,1,1,0)
2942 //ZZ #define X0111 BITS4(0,1,1,1)
2943 //ZZ #define X1000 BITS4(1,0,0,0)
2944 //ZZ #define X1001 BITS4(1,0,0,1)
2945 //ZZ #define X1010 BITS4(1,0,1,0)
2946 //ZZ #define X1011 BITS4(1,0,1,1)
2947 //ZZ #define X1100 BITS4(1,1,0,0)
2948 //ZZ #define X1101 BITS4(1,1,0,1)
2949 //ZZ #define X1110 BITS4(1,1,1,0)
2950 //ZZ #define X1111 BITS4(1,1,1,1)
2952 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2953 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2954 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2955 (((zzx3) & 0xF) << 12))
2957 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2958 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2959 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2960 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
2962 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
2963 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2964 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2965 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
2967 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2968 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2969 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2970 (((zzx0) & 0xF) << 0))
2972 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
2973 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2974 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2975 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
2976 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
2978 #define XX______(zzx7,zzx6) \
2979 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2983 /* Get an immediate into a register, using only that register. */
2984 static UInt* imm64_to_ireg ( UInt* p, Int xD, ULong imm64 )
2986 if (imm64 == 0) {
2987 // This has to be special-cased, since the logic below
2988 // will leave the register unchanged in this case.
2989 // MOVZ xD, #0, LSL #0
2990 *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
2991 return p;
2994 // There must be at least one non-zero halfword. Find the
2995 // lowest nonzero such, and use MOVZ to install it and zero
2996 // out the rest of the register.
2997 UShort h[4];
2998 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
2999 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3000 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3001 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3003 UInt i;
3004 for (i = 0; i < 4; i++) {
3005 if (h[i] != 0)
3006 break;
3008 vassert(i < 4);
3010 // MOVZ xD, h[i], LSL (16*i)
3011 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3013 // Work on upwards through h[i], using MOVK to stuff in any
3014 // remaining nonzero elements.
3015 i++;
3016 for (; i < 4; i++) {
3017 if (h[i] == 0)
3018 continue;
3019 // MOVK xD, h[i], LSL (16*i)
3020 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3023 return p;
3026 /* Get an immediate into a register, using only that register, and
3027 generating exactly 4 instructions, regardless of the value of the
3028 immediate. This is used when generating sections of code that need
3029 to be patched later, so as to guarantee a specific size. */
3030 static UInt* imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3032 UShort h[4];
3033 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3034 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3035 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3036 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3037 // Work on upwards through h[i], using MOVK to stuff in the
3038 // remaining elements.
3039 UInt i;
3040 for (i = 0; i < 4; i++) {
3041 if (i == 0) {
3042 // MOVZ xD, h[0], LSL (16*0)
3043 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3044 } else {
3045 // MOVK xD, h[i], LSL (16*i)
3046 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3049 return p;
3052 /* Check whether p points at a 4-insn sequence cooked up by
3053 imm64_to_ireg_EXACTLY4(). */
3054 static Bool is_imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3056 UShort h[4];
3057 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3058 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3059 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3060 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3061 // Work on upwards through h[i], using MOVK to stuff in the
3062 // remaining elements.
3063 UInt i;
3064 for (i = 0; i < 4; i++) {
3065 UInt expected;
3066 if (i == 0) {
3067 // MOVZ xD, h[0], LSL (16*0)
3068 expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3069 } else {
3070 // MOVK xD, h[i], LSL (16*i)
3071 expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3073 if (p[i] != expected)
3074 return False;
3076 return True;
3080 /* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
3081 rD, using the given amode for the address. */
3082 static UInt* do_load_or_store8 ( UInt* p,
3083 Bool isLoad, UInt wD, ARM64AMode* am )
3085 vassert(wD <= 30);
3086 if (am->tag == ARM64am_RI9) {
3087 /* STURB Wd, [Xn|SP + simm9]: 00 111000 000 simm9 00 n d
3088 LDURB Wd, [Xn|SP + simm9]: 00 111000 010 simm9 00 n d
3090 Int simm9 = am->ARM64am.RI9.simm9;
3091 vassert(-256 <= simm9 && simm9 <= 255);
3092 UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
3093 simm9 & 0x1FF, X00,
3094 iregEnc(am->ARM64am.RI9.reg), wD);
3095 *p++ = instr;
3096 return p;
3098 if (am->tag == ARM64am_RI12) {
3099 /* STRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 00 imm12 n d
3100 LDRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 01 imm12 n d
3102 UInt uimm12 = am->ARM64am.RI12.uimm12;
3103 UInt scale = am->ARM64am.RI12.szB;
3104 vassert(scale == 1); /* failure of this is serious. Do not ignore. */
3105 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3106 vassert(xN <= 30);
3107 UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
3108 uimm12, xN, wD);
3109 *p++ = instr;
3110 return p;
3112 if (am->tag == ARM64am_RR) {
3113 /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
3114 LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
3116 UInt xN = iregEnc(am->ARM64am.RR.base);
3117 UInt xM = iregEnc(am->ARM64am.RR.index);
3118 vassert(xN <= 30);
3119 UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
3120 xM, X011010, xN, wD);
3121 *p++ = instr;
3122 return p;
3124 vpanic("do_load_or_store8");
3125 vassert(0);
3129 /* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
3130 rD, using the given amode for the address. */
3131 static UInt* do_load_or_store16 ( UInt* p,
3132 Bool isLoad, UInt wD, ARM64AMode* am )
3134 vassert(wD <= 30);
3135 if (am->tag == ARM64am_RI9) {
3136 /* STURH Wd, [Xn|SP + simm9]: 01 111000 000 simm9 00 n d
3137 LDURH Wd, [Xn|SP + simm9]: 01 111000 010 simm9 00 n d
3139 Int simm9 = am->ARM64am.RI9.simm9;
3140 vassert(-256 <= simm9 && simm9 <= 255);
3141 UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
3142 simm9 & 0x1FF, X00,
3143 iregEnc(am->ARM64am.RI9.reg), wD);
3144 *p++ = instr;
3145 return p;
3147 if (am->tag == ARM64am_RI12) {
3148 /* STRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 00 imm12 n d
3149 LDRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 01 imm12 n d
3151 UInt uimm12 = am->ARM64am.RI12.uimm12;
3152 UInt scale = am->ARM64am.RI12.szB;
3153 vassert(scale == 2); /* failure of this is serious. Do not ignore. */
3154 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3155 vassert(xN <= 30);
3156 UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
3157 uimm12, xN, wD);
3158 *p++ = instr;
3159 return p;
3161 if (am->tag == ARM64am_RR) {
3162 /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
3163 LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
3165 UInt xN = iregEnc(am->ARM64am.RR.base);
3166 UInt xM = iregEnc(am->ARM64am.RR.index);
3167 vassert(xN <= 30);
3168 UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
3169 xM, X011010, xN, wD);
3170 *p++ = instr;
3171 return p;
3173 vpanic("do_load_or_store16");
3174 vassert(0);
3178 /* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
3179 rD, using the given amode for the address. */
3180 static UInt* do_load_or_store32 ( UInt* p,
3181 Bool isLoad, UInt wD, ARM64AMode* am )
3183 vassert(wD <= 30);
3184 if (am->tag == ARM64am_RI9) {
3185 /* STUR Wd, [Xn|SP + simm9]: 10 111000 000 simm9 00 n d
3186 LDUR Wd, [Xn|SP + simm9]: 10 111000 010 simm9 00 n d
3188 Int simm9 = am->ARM64am.RI9.simm9;
3189 vassert(-256 <= simm9 && simm9 <= 255);
3190 UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
3191 simm9 & 0x1FF, X00,
3192 iregEnc(am->ARM64am.RI9.reg), wD);
3193 *p++ = instr;
3194 return p;
3196 if (am->tag == ARM64am_RI12) {
3197 /* STR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 00 imm12 n d
3198 LDR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 01 imm12 n d
3200 UInt uimm12 = am->ARM64am.RI12.uimm12;
3201 UInt scale = am->ARM64am.RI12.szB;
3202 vassert(scale == 4); /* failure of this is serious. Do not ignore. */
3203 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3204 vassert(xN <= 30);
3205 UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
3206 uimm12, xN, wD);
3207 *p++ = instr;
3208 return p;
3210 if (am->tag == ARM64am_RR) {
3211 /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
3212 LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
3214 UInt xN = iregEnc(am->ARM64am.RR.base);
3215 UInt xM = iregEnc(am->ARM64am.RR.index);
3216 vassert(xN <= 30);
3217 UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
3218 xM, X011010, xN, wD);
3219 *p++ = instr;
3220 return p;
3222 vpanic("do_load_or_store32");
3223 vassert(0);
3227 /* Generate a 64 bit load or store to/from xD, using the given amode
3228 for the address. */
3229 static UInt* do_load_or_store64 ( UInt* p,
3230 Bool isLoad, UInt xD, ARM64AMode* am )
3232 /* In all these cases, Rn can't be 31 since that means SP. */
3233 vassert(xD <= 30);
3234 if (am->tag == ARM64am_RI9) {
3235 /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d
3236 LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d
3238 Int simm9 = am->ARM64am.RI9.simm9;
3239 vassert(-256 <= simm9 && simm9 <= 255);
3240 UInt xN = iregEnc(am->ARM64am.RI9.reg);
3241 vassert(xN <= 30);
3242 UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
3243 simm9 & 0x1FF, X00, xN, xD);
3244 *p++ = instr;
3245 return p;
3247 if (am->tag == ARM64am_RI12) {
3248 /* STR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 00 imm12 n d
3249 LDR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 01 imm12 n d
3251 UInt uimm12 = am->ARM64am.RI12.uimm12;
3252 UInt scale = am->ARM64am.RI12.szB;
3253 vassert(scale == 8); /* failure of this is serious. Do not ignore. */
3254 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3255 vassert(xN <= 30);
3256 UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
3257 uimm12, xN, xD);
3258 *p++ = instr;
3259 return p;
3261 if (am->tag == ARM64am_RR) {
3262 /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
3263 LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
3265 UInt xN = iregEnc(am->ARM64am.RR.base);
3266 UInt xM = iregEnc(am->ARM64am.RR.index);
3267 vassert(xN <= 30);
3268 UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
3269 xM, X011010, xN, xD);
3270 *p++ = instr;
3271 return p;
3273 vpanic("do_load_or_store64");
3274 vassert(0);
3278 /* Emit an instruction into buf and return the number of bytes used.
3279 Note that buf is not the insn's final place, and therefore it is
3280 imperative to emit position-independent code. If the emitted
3281 instruction was a profiler inc, set *is_profInc to True, else
3282 leave it unchanged. */
3284 Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
3285 UChar* buf, Int nbuf, const ARM64Instr* i,
3286 Bool mode64, VexEndness endness_host,
3287 const void* disp_cp_chain_me_to_slowEP,
3288 const void* disp_cp_chain_me_to_fastEP,
3289 const void* disp_cp_xindir,
3290 const void* disp_cp_xassisted )
3292 UInt* p = (UInt*)buf;
3293 vassert(nbuf >= 32);
3294 vassert(mode64 == True);
3295 vassert(0 == (((HWord)buf) & 3));
3297 switch (i->tag) {
3298 case ARM64in_Arith: {
3299 UInt rD = iregEnc(i->ARM64in.Arith.dst);
3300 UInt rN = iregEnc(i->ARM64in.Arith.argL);
3301 ARM64RIA* argR = i->ARM64in.Arith.argR;
3302 switch (argR->tag) {
3303 case ARM64riA_I12:
3304 *p++ = X_2_6_2_12_5_5(
3305 i->ARM64in.Arith.isAdd ? X10 : X11,
3306 X010001,
3307 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3308 argR->ARM64riA.I12.imm12, rN, rD
3310 break;
3311 case ARM64riA_R: {
3312 UInt rM = iregEnc(i->ARM64in.Arith.argR->ARM64riA.R.reg);
3313 *p++ = X_3_8_5_6_5_5(
3314 i->ARM64in.Arith.isAdd ? X100 : X110,
3315 X01011000, rM, X000000, rN, rD
3317 break;
3319 default:
3320 goto bad;
3322 goto done;
3324 case ARM64in_Cmp: {
3325 UInt rD = 31; /* XZR, we are going to dump the result */
3326 UInt rN = iregEnc(i->ARM64in.Cmp.argL);
3327 ARM64RIA* argR = i->ARM64in.Cmp.argR;
3328 Bool is64 = i->ARM64in.Cmp.is64;
3329 switch (argR->tag) {
3330 case ARM64riA_I12:
3331 /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
3332 /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
3333 *p++ = X_2_6_2_12_5_5(
3334 is64 ? X11 : X01, X110001,
3335 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3336 argR->ARM64riA.I12.imm12, rN, rD);
3337 break;
3338 case ARM64riA_R: {
3339 /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
3340 /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
3341 UInt rM = iregEnc(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
3342 *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
3343 X01011000, rM, X000000, rN, rD);
3344 break;
3346 default:
3347 goto bad;
3349 goto done;
3351 case ARM64in_Logic: {
3352 UInt rD = iregEnc(i->ARM64in.Logic.dst);
3353 UInt rN = iregEnc(i->ARM64in.Logic.argL);
3354 ARM64RIL* argR = i->ARM64in.Logic.argR;
3355 UInt opc = 0; /* invalid */
3356 vassert(rD < 31);
3357 vassert(rN < 31);
3358 switch (i->ARM64in.Logic.op) {
3359 case ARM64lo_OR: opc = X101; break;
3360 case ARM64lo_AND: opc = X100; break;
3361 case ARM64lo_XOR: opc = X110; break;
3362 default: break;
3364 vassert(opc != 0);
3365 switch (argR->tag) {
3366 case ARM64riL_I13: {
3367 /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
3368 /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
3369 /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
3370 *p++ = X_3_6_1_6_6_5_5(
3371 opc, X100100, argR->ARM64riL.I13.bitN,
3372 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3373 rN, rD
3375 break;
3377 case ARM64riL_R: {
3378 /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
3379 /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
3380 /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
3381 UInt rM = iregEnc(argR->ARM64riL.R.reg);
3382 vassert(rM < 31);
3383 *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
3384 break;
3386 default:
3387 goto bad;
3389 goto done;
3391 case ARM64in_Test: {
3392 UInt rD = 31; /* XZR, we are going to dump the result */
3393 UInt rN = iregEnc(i->ARM64in.Test.argL);
3394 ARM64RIL* argR = i->ARM64in.Test.argR;
3395 switch (argR->tag) {
3396 case ARM64riL_I13: {
3397 /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
3398 *p++ = X_3_6_1_6_6_5_5(
3399 X111, X100100, argR->ARM64riL.I13.bitN,
3400 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3401 rN, rD
3403 break;
3405 default:
3406 goto bad;
3408 goto done;
3410 case ARM64in_Shift: {
3411 UInt rD = iregEnc(i->ARM64in.Shift.dst);
3412 UInt rN = iregEnc(i->ARM64in.Shift.argL);
3413 ARM64RI6* argR = i->ARM64in.Shift.argR;
3414 vassert(rD < 31);
3415 vassert(rN < 31);
3416 switch (argR->tag) {
3417 case ARM64ri6_I6: {
3418 /* 110 1001101 (63-sh) (64-sh) nn dd LSL Xd, Xn, sh */
3419 /* 110 1001101 sh 63 nn dd LSR Xd, Xn, sh */
3420 /* 100 1001101 sh 63 nn dd ASR Xd, Xn, sh */
3421 UInt sh = argR->ARM64ri6.I6.imm6;
3422 vassert(sh > 0 && sh < 64);
3423 switch (i->ARM64in.Shift.op) {
3424 case ARM64sh_SHL:
3425 *p++ = X_3_6_1_6_6_5_5(X110, X100110,
3426 1, 64-sh, 63-sh, rN, rD);
3427 break;
3428 case ARM64sh_SHR:
3429 *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
3430 break;
3431 case ARM64sh_SAR:
3432 *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
3433 break;
3434 default:
3435 vassert(0);
3437 break;
3439 case ARM64ri6_R: {
3440 /* 100 1101 0110 mm 001000 nn dd LSL Xd, Xn, Xm */
3441 /* 100 1101 0110 mm 001001 nn dd LSR Xd, Xn, Xm */
3442 /* 100 1101 0110 mm 001010 nn dd ASR Xd, Xn, Xm */
3443 UInt rM = iregEnc(argR->ARM64ri6.R.reg);
3444 vassert(rM < 31);
3445 UInt subOpc = 0;
3446 switch (i->ARM64in.Shift.op) {
3447 case ARM64sh_SHL: subOpc = X001000; break;
3448 case ARM64sh_SHR: subOpc = X001001; break;
3449 case ARM64sh_SAR: subOpc = X001010; break;
3450 default: vassert(0);
3452 *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
3453 break;
3455 default:
3456 vassert(0);
3458 goto done;
3460 case ARM64in_Unary: {
3461 UInt rDst = iregEnc(i->ARM64in.Unary.dst);
3462 UInt rSrc = iregEnc(i->ARM64in.Unary.src);
3463 switch (i->ARM64in.Unary.op) {
3464 case ARM64un_CLZ:
3465 /* 1 10 1101 0110 00000 00010 0 nn dd CLZ Xd, Xn */
3466 /* 1 10 1101 0110 00000 00010 1 nn dd CLS Xd, Xn (unimp) */
3467 *p++ = X_3_8_5_6_5_5(X110,
3468 X11010110, X00000, X000100, rSrc, rDst);
3469 goto done;
3470 case ARM64un_NEG:
3471 /* 1 10 01011 000 m 000000 11111 d NEG Xd,Xm */
3472 /* 0 10 01011 000 m 000000 11111 d NEG Wd,Wm (unimp) */
3473 *p++ = X_3_8_5_6_5_5(X110,
3474 X01011000, rSrc, X000000, X11111, rDst);
3475 goto done;
3476 case ARM64un_NOT: {
3477 /* 1 01 01010 00 1 m 000000 11111 d MVN Xd,Xm */
3478 *p++ = X_3_8_5_6_5_5(X101,
3479 X01010001, rSrc, X000000, X11111, rDst);
3480 goto done;
3482 default:
3483 break;
3485 goto bad;
3487 case ARM64in_MovI: {
3488 /* We generate the "preferred form", ORR Xd, XZR, Xm
3489 101 01010 00 0 m 000000 11111 d
3491 UInt instr = 0xAA0003E0;
3492 UInt d = iregEnc(i->ARM64in.MovI.dst);
3493 UInt m = iregEnc(i->ARM64in.MovI.src);
3494 *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
3495 goto done;
3497 case ARM64in_Imm64: {
3498 p = imm64_to_ireg( p, iregEnc(i->ARM64in.Imm64.dst),
3499 i->ARM64in.Imm64.imm64 );
3500 goto done;
3502 case ARM64in_LdSt64: {
3503 p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
3504 iregEnc(i->ARM64in.LdSt64.rD),
3505 i->ARM64in.LdSt64.amode );
3506 goto done;
3508 case ARM64in_LdSt32: {
3509 p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
3510 iregEnc(i->ARM64in.LdSt32.rD),
3511 i->ARM64in.LdSt32.amode );
3512 goto done;
3514 case ARM64in_LdSt16: {
3515 p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
3516 iregEnc(i->ARM64in.LdSt16.rD),
3517 i->ARM64in.LdSt16.amode );
3518 goto done;
3520 case ARM64in_LdSt8: {
3521 p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
3522 iregEnc(i->ARM64in.LdSt8.rD),
3523 i->ARM64in.LdSt8.amode );
3524 goto done;
3527 case ARM64in_XDirect: {
3528 /* NB: what goes on here has to be very closely coordinated
3529 with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
3530 /* We're generating chain-me requests here, so we need to be
3531 sure this is actually allowed -- no-redir translations
3532 can't use chain-me's. Hence: */
3533 vassert(disp_cp_chain_me_to_slowEP != NULL);
3534 vassert(disp_cp_chain_me_to_fastEP != NULL);
3536 /* Use ptmp for backpatching conditional jumps. */
3537 UInt* ptmp = NULL;
3539 /* First off, if this is conditional, create a conditional
3540 jump over the rest of it. Or at least, leave a space for
3541 it that we will shortly fill in. */
3542 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3543 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3544 ptmp = p;
3545 *p++ = 0;
3548 /* Update the guest PC. */
3549 /* imm64 x9, dstGA */
3550 /* str x9, amPC */
3551 p = imm64_to_ireg(p, /*x*/9, i->ARM64in.XDirect.dstGA);
3552 p = do_load_or_store64(p, False/*!isLoad*/,
3553 /*x*/9, i->ARM64in.XDirect.amPC);
3555 /* --- FIRST PATCHABLE BYTE follows --- */
3556 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3557 calling to) backs up the return address, so as to find the
3558 address of the first patchable byte. So: don't change the
3559 number of instructions (5) below. */
3560 /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
3561 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
3562 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
3563 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
3564 /* blr x9 */
3565 const void* disp_cp_chain_me
3566 = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3567 : disp_cp_chain_me_to_slowEP;
3568 p = imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
3569 *p++ = 0xD63F0120;
3570 /* --- END of PATCHABLE BYTES --- */
3572 /* Fix up the conditional jump, if there was one. */
3573 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3574 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3575 vassert(delta > 0 && delta <= 40);
3576 vassert((delta & 3) == 0);
3577 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3578 vassert(notCond <= 13); /* Neither AL nor NV */
3579 vassert(ptmp != NULL);
3580 delta = delta >> 2;
3581 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3583 goto done;
3586 case ARM64in_XIndir: {
3587 // XIndir is more or less the same as XAssisted, except
3588 // we don't have a trc value to hand back, so there's no
3589 // write to r21
3590 /* Use ptmp for backpatching conditional jumps. */
3591 //UInt* ptmp = NULL;
3593 /* First off, if this is conditional, create a conditional
3594 jump over the rest of it. Or at least, leave a space for
3595 it that we will shortly fill in. */
3596 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3597 vassert(0); //ATC
3598 //ZZ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3599 //ZZ ptmp = p;
3600 //ZZ *p++ = 0;
3603 /* Update the guest PC. */
3604 /* str r-dstGA, amPC */
3605 p = do_load_or_store64(p, False/*!isLoad*/,
3606 iregEnc(i->ARM64in.XIndir.dstGA),
3607 i->ARM64in.XIndir.amPC);
3609 /* imm64 x9, VG_(disp_cp_xindir) */
3610 /* br x9 */
3611 p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xindir);
3612 *p++ = 0xD61F0120; /* br x9 */
3614 /* Fix up the conditional jump, if there was one. */
3615 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3616 vassert(0); //ATC
3617 //ZZ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3618 //ZZ vassert(delta > 0 && delta < 40);
3619 //ZZ vassert((delta & 3) == 0);
3620 //ZZ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3621 //ZZ vassert(notCond <= 13); /* Neither AL nor NV */
3622 //ZZ delta = (delta >> 2) - 2;
3623 //ZZ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3625 goto done;
3628 case ARM64in_XAssisted: {
3629 /* Use ptmp for backpatching conditional jumps. */
3630 UInt* ptmp = NULL;
3632 /* First off, if this is conditional, create a conditional
3633 jump over the rest of it. Or at least, leave a space for
3634 it that we will shortly fill in. I think this can only
3635 ever happen when VEX is driven by the switchbacker. */
3636 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3637 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3638 ptmp = p;
3639 *p++ = 0;
3642 /* Update the guest PC. */
3643 /* str r-dstGA, amPC */
3644 p = do_load_or_store64(p, False/*!isLoad*/,
3645 iregEnc(i->ARM64in.XAssisted.dstGA),
3646 i->ARM64in.XAssisted.amPC);
3648 /* movw r21, $magic_number */
3649 UInt trcval = 0;
3650 switch (i->ARM64in.XAssisted.jk) {
3651 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3652 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3653 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3654 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3655 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3656 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3657 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3658 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3659 case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
3660 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3661 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3662 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3663 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3664 /* We don't expect to see the following being assisted. */
3665 //case Ijk_Ret:
3666 //case Ijk_Call:
3667 /* fallthrough */
3668 default:
3669 ppIRJumpKind(i->ARM64in.XAssisted.jk);
3670 vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
3671 "unexpected jump kind");
3673 vassert(trcval != 0);
3674 p = imm64_to_ireg(p, /*x*/21, (ULong)trcval);
3676 /* imm64 x9, VG_(disp_cp_xassisted) */
3677 /* br x9 */
3678 p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xassisted);
3679 *p++ = 0xD61F0120; /* br x9 */
3681 /* Fix up the conditional jump, if there was one. */
3682 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
3683 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3684 vassert(delta > 0 && delta < 40);
3685 vassert((delta & 3) == 0);
3686 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3687 vassert(notCond <= 13); /* Neither AL nor NV */
3688 vassert(ptmp != NULL);
3689 delta = delta >> 2;
3690 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3692 goto done;
3695 case ARM64in_CSel: {
3696 /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
3697 UInt dd = iregEnc(i->ARM64in.CSel.dst);
3698 UInt nn = iregEnc(i->ARM64in.CSel.argL);
3699 UInt mm = iregEnc(i->ARM64in.CSel.argR);
3700 UInt cond = (UInt)i->ARM64in.CSel.cond;
3701 vassert(dd < 31 && nn < 31 && mm < 31 && cond < 16);
3702 *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
3703 goto done;
3706 case ARM64in_Call: {
3707 /* We'll use x9 as a scratch register to put the target
3708 address in. */
3709 if (i->ARM64in.Call.cond != ARM64cc_AL
3710 && i->ARM64in.Call.rloc.pri != RLPri_None) {
3711 /* The call might not happen (it isn't unconditional) and
3712 it returns a result. In this case we will need to
3713 generate a control flow diamond to put 0x555..555 in
3714 the return register(s) in the case where the call
3715 doesn't happen. If this ever becomes necessary, maybe
3716 copy code from the 32-bit ARM equivalent. Until that
3717 day, just give up. */
3718 goto bad;
3721 UInt* ptmp = NULL;
3722 if (i->ARM64in.Call.cond != ARM64cc_AL) {
3723 /* Create a hole to put a conditional branch in. We'll
3724 patch it once we know the branch length. */
3725 ptmp = p;
3726 *p++ = 0;
3729 // x9 = &target
3730 p = imm64_to_ireg( (UInt*)p, /*x*/9, (ULong)i->ARM64in.Call.target );
3731 // blr x9
3732 *p++ = 0xD63F0120;
3734 // Patch the hole if necessary
3735 if (i->ARM64in.Call.cond != ARM64cc_AL) {
3736 ULong dist = (ULong)(p - ptmp);
3737 /* imm64_to_ireg produces between 1 and 4 insns, and
3738 then there's the BLR itself. Hence: */
3739 vassert(dist >= 2 && dist <= 5);
3740 vassert(ptmp != NULL);
3741 // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
3742 *ptmp = X_8_19_1_4(X01010100, dist, 0,
3743 1 ^ (UInt)i->ARM64in.Call.cond);
3744 } else {
3745 vassert(ptmp == NULL);
3748 goto done;
3751 case ARM64in_AddToSP: {
3752 /* 10,0 10001 00 imm12 11111 11111 ADD xsp, xsp, #imm12
3753 11,0 10001 00 imm12 11111 11111 SUB xsp, xsp, #imm12
3755 Int simm12 = i->ARM64in.AddToSP.simm;
3756 vassert(-4096 < simm12 && simm12 < 4096);
3757 vassert(0 == (simm12 & 0xF));
3758 if (simm12 >= 0) {
3759 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
3760 } else {
3761 *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
3763 goto done;
3766 case ARM64in_FromSP: {
3767 /* 10,0 10001 00 0..(12)..0 11111 dd MOV Xd, xsp */
3768 UInt dd = iregEnc(i->ARM64in.FromSP.dst);
3769 vassert(dd < 31);
3770 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
3771 goto done;
3774 case ARM64in_Mul: {
3775 /* 100 11011 110 mm 011111 nn dd UMULH Xd, Xn,Xm
3776 100 11011 010 mm 011111 nn dd SMULH Xd, Xn,Xm
3777 100 11011 000 mm 011111 nn dd MUL Xd, Xn,Xm
3779 UInt dd = iregEnc(i->ARM64in.Mul.dst);
3780 UInt nn = iregEnc(i->ARM64in.Mul.argL);
3781 UInt mm = iregEnc(i->ARM64in.Mul.argR);
3782 vassert(dd < 31 && nn < 31 && mm < 31);
3783 switch (i->ARM64in.Mul.op) {
3784 case ARM64mul_ZX:
3785 *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
3786 goto done;
3787 case ARM64mul_SX:
3788 *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
3789 goto done;
3790 case ARM64mul_PLAIN:
3791 *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
3792 goto done;
3793 default:
3794 vassert(0);
3796 goto bad;
3798 case ARM64in_LdrEX: {
3799 /* 085F7C82 ldxrb w2, [x4]
3800 485F7C82 ldxrh w2, [x4]
3801 885F7C82 ldxr w2, [x4]
3802 C85F7C82 ldxr x2, [x4]
3804 switch (i->ARM64in.LdrEX.szB) {
3805 case 1: *p++ = 0x085F7C82; goto done;
3806 case 2: *p++ = 0x485F7C82; goto done;
3807 case 4: *p++ = 0x885F7C82; goto done;
3808 case 8: *p++ = 0xC85F7C82; goto done;
3809 default: break;
3811 goto bad;
3813 case ARM64in_StrEX: {
3814 /* 08007C82 stxrb w0, w2, [x4]
3815 48007C82 stxrh w0, w2, [x4]
3816 88007C82 stxr w0, w2, [x4]
3817 C8007C82 stxr w0, x2, [x4]
3819 switch (i->ARM64in.StrEX.szB) {
3820 case 1: *p++ = 0x08007C82; goto done;
3821 case 2: *p++ = 0x48007C82; goto done;
3822 case 4: *p++ = 0x88007C82; goto done;
3823 case 8: *p++ = 0xC8007C82; goto done;
3824 default: break;
3826 goto bad;
3828 case ARM64in_CAS: {
3829 /* This isn't simple. For an explanation see the comment in
3830 host_arm64_defs.h on the the definition of ARM64Instr case
3831 CAS. */
3832 /* Generate:
3833 -- one of:
3834 mov x8, x5 // AA0503E8
3835 and x8, x5, #0xFFFFFFFF // 92407CA8
3836 and x8, x5, #0xFFFF // 92403CA8
3837 and x8, x5, #0xFF // 92401CA8
3839 -- one of:
3840 ldxr x1, [x3] // C85F7C61
3841 ldxr w1, [x3] // 885F7C61
3842 ldxrh w1, [x3] // 485F7C61
3843 ldxrb w1, [x3] // 085F7C61
3845 -- always:
3846 cmp x1, x8 // EB08003F
3847 bne out // 54000061
3849 -- one of:
3850 stxr w1, x7, [x3] // C8017C67
3851 stxr w1, w7, [x3] // 88017C67
3852 stxrh w1, w7, [x3] // 48017C67
3853 stxrb w1, w7, [x3] // 08017C67
3855 -- always:
3856 eor x1, x5, x1 // CA0100A1
3857 out:
3859 switch (i->ARM64in.CAS.szB) {
3860 case 8: *p++ = 0xAA0503E8; break;
3861 case 4: *p++ = 0x92407CA8; break;
3862 case 2: *p++ = 0x92403CA8; break;
3863 case 1: *p++ = 0x92401CA8; break;
3864 default: vassert(0);
3866 switch (i->ARM64in.CAS.szB) {
3867 case 8: *p++ = 0xC85F7C61; break;
3868 case 4: *p++ = 0x885F7C61; break;
3869 case 2: *p++ = 0x485F7C61; break;
3870 case 1: *p++ = 0x085F7C61; break;
3872 *p++ = 0xEB08003F;
3873 *p++ = 0x54000061;
3874 switch (i->ARM64in.CAS.szB) {
3875 case 8: *p++ = 0xC8017C67; break;
3876 case 4: *p++ = 0x88017C67; break;
3877 case 2: *p++ = 0x48017C67; break;
3878 case 1: *p++ = 0x08017C67; break;
3880 *p++ = 0xCA0100A1;
3881 goto done;
3883 case ARM64in_MFence: {
3884 *p++ = 0xD5033F9F; /* DSB sy */
3885 *p++ = 0xD5033FBF; /* DMB sy */
3886 *p++ = 0xD5033FDF; /* ISB */
3887 goto done;
3889 case ARM64in_ClrEX: {
3890 *p++ = 0xD5033F5F; /* clrex #15 */
3891 goto done;
3893 case ARM64in_VLdStH: {
3894 /* 01 111101 01 imm12 n t LDR Ht, [Xn|SP, #imm12 * 2]
3895 01 111101 00 imm12 n t STR Ht, [Xn|SP, #imm12 * 2]
3897 UInt hD = dregEnc(i->ARM64in.VLdStH.hD);
3898 UInt rN = iregEnc(i->ARM64in.VLdStH.rN);
3899 UInt uimm12 = i->ARM64in.VLdStH.uimm12;
3900 Bool isLD = i->ARM64in.VLdStH.isLoad;
3901 vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
3902 uimm12 >>= 1;
3903 vassert(uimm12 < (1<<12));
3904 vassert(hD < 32);
3905 vassert(rN < 31);
3906 *p++ = X_2_6_2_12_5_5(X01, X111101, isLD ? X01 : X00,
3907 uimm12, rN, hD);
3908 goto done;
3910 case ARM64in_VLdStS: {
3911 /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4]
3912 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4]
3914 UInt sD = dregEnc(i->ARM64in.VLdStS.sD);
3915 UInt rN = iregEnc(i->ARM64in.VLdStS.rN);
3916 UInt uimm12 = i->ARM64in.VLdStS.uimm12;
3917 Bool isLD = i->ARM64in.VLdStS.isLoad;
3918 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
3919 uimm12 >>= 2;
3920 vassert(uimm12 < (1<<12));
3921 vassert(sD < 32);
3922 vassert(rN < 31);
3923 *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
3924 uimm12, rN, sD);
3925 goto done;
3927 case ARM64in_VLdStD: {
3928 /* 11 111101 01 imm12 n t LDR Dt, [Xn|SP, #imm12 * 8]
3929 11 111101 00 imm12 n t STR Dt, [Xn|SP, #imm12 * 8]
3931 UInt dD = dregEnc(i->ARM64in.VLdStD.dD);
3932 UInt rN = iregEnc(i->ARM64in.VLdStD.rN);
3933 UInt uimm12 = i->ARM64in.VLdStD.uimm12;
3934 Bool isLD = i->ARM64in.VLdStD.isLoad;
3935 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
3936 uimm12 >>= 3;
3937 vassert(uimm12 < (1<<12));
3938 vassert(dD < 32);
3939 vassert(rN < 31);
3940 *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
3941 uimm12, rN, dD);
3942 goto done;
3944 case ARM64in_VLdStQ: {
3945 /* 0100 1100 0000 0000 0111 11 rN rQ st1 {vQ.2d}, [<rN|SP>]
3946 0100 1100 0100 0000 0111 11 rN rQ ld1 {vQ.2d}, [<rN|SP>]
3948 UInt rQ = qregEnc(i->ARM64in.VLdStQ.rQ);
3949 UInt rN = iregEnc(i->ARM64in.VLdStQ.rN);
3950 vassert(rQ < 32);
3951 vassert(rN < 31);
3952 if (i->ARM64in.VLdStQ.isLoad) {
3953 *p++ = 0x4C407C00 | (rN << 5) | rQ;
3954 } else {
3955 *p++ = 0x4C007C00 | (rN << 5) | rQ;
3957 goto done;
3959 case ARM64in_VCvtI2F: {
3960 /* 31 28 23 21 20 18 15 9 4
3961 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
3962 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
3963 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
3964 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
3965 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
3966 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
3967 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
3968 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
3970 UInt rN = iregEnc(i->ARM64in.VCvtI2F.rS);
3971 UInt rD = dregEnc(i->ARM64in.VCvtI2F.rD);
3972 ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
3973 /* Just handle cases as they show up. */
3974 switch (how) {
3975 case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
3976 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
3977 break;
3978 case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
3979 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
3980 break;
3981 case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
3982 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
3983 break;
3984 case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
3985 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
3986 break;
3987 case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
3988 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
3989 break;
3990 case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
3991 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
3992 break;
3993 case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
3994 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
3995 break;
3996 case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn */
3997 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
3998 break;
3999 default:
4000 goto bad; //ATC
4002 goto done;
4004 case ARM64in_VCvtF2I: {
4005 /* 30 23 20 18 15 9 4
4006 sf 00,11110,0x 1 00 000,000000 n d FCVTNS Rd, Fn (round to
4007 sf 00,11110,0x 1 00 001,000000 n d FCVTNU Rd, Fn nearest)
4008 ---------------- 01 -------------- FCVTP-------- (round to +inf)
4009 ---------------- 10 -------------- FCVTM-------- (round to -inf)
4010 ---------------- 11 -------------- FCVTZ-------- (round to zero)
4012 Rd is Xd when sf==1, Wd when sf==0
4013 Fn is Dn when x==1, Sn when x==0
4014 20:19 carry the rounding mode, using the same encoding as FPCR
4016 UInt rD = iregEnc(i->ARM64in.VCvtF2I.rD);
4017 UInt rN = dregEnc(i->ARM64in.VCvtF2I.rS);
4018 ARM64CvtOp how = i->ARM64in.VCvtF2I.how;
4019 UChar armRM = i->ARM64in.VCvtF2I.armRM;
4020 /* Just handle cases as they show up. */
4021 switch (how) {
4022 case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
4023 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3),
4024 X000000, rN, rD);
4025 break;
4026 case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
4027 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3),
4028 X000000, rN, rD);
4029 break;
4030 case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
4031 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3),
4032 X000000, rN, rD);
4033 break;
4034 case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
4035 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3),
4036 X000000, rN, rD);
4037 break;
4038 case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
4039 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3),
4040 X000000, rN, rD);
4041 break;
4042 case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
4043 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3),
4044 X000000, rN, rD);
4045 break;
4046 case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
4047 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3),
4048 X000000, rN, rD);
4049 break;
4050 case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
4051 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3),
4052 X000000, rN, rD);
4053 break;
4054 default:
4055 goto bad; //ATC
4057 goto done;
4059 case ARM64in_VCvtSD: {
4060 /* 31 23 21 16 14 9 4
4061 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D)
4062 ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S)
4063 Rounding, when dst is smaller than src, is per the FPCR.
4065 UInt dd = dregEnc(i->ARM64in.VCvtSD.dst);
4066 UInt nn = dregEnc(i->ARM64in.VCvtSD.src);
4067 if (i->ARM64in.VCvtSD.sToD) {
4068 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
4069 } else {
4070 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
4072 goto done;
4074 case ARM64in_VCvtHS: {
4075 /* 31 23 21 16 14 9 4
4076 000,11110, 11 10001 0,0 10000 n d FCVT Sd, Hn (H->S)
4077 ---------- 00 ----- 1,1 --------- FCVT Hd, Sn (S->H)
4078 Rounding, when dst is smaller than src, is per the FPCR.
4080 UInt dd = dregEnc(i->ARM64in.VCvtHS.dst);
4081 UInt nn = dregEnc(i->ARM64in.VCvtHS.src);
4082 if (i->ARM64in.VCvtHS.hToS) {
4083 *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X010000, nn, dd);
4084 } else {
4085 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X110000, nn, dd);
4087 goto done;
4089 case ARM64in_VCvtHD: {
4090 /* 31 23 21 16 14 9 4
4091 000,11110, 11 10001 0,1 10000 n d FCVT Dd, Hn (H->D)
4092 ---------- 01 ----- 1,1 --------- FCVT Hd, Dn (D->H)
4093 Rounding, when dst is smaller than src, is per the FPCR.
4095 UInt dd = dregEnc(i->ARM64in.VCvtHD.dst);
4096 UInt nn = dregEnc(i->ARM64in.VCvtHD.src);
4097 if (i->ARM64in.VCvtHD.hToD) {
4098 *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X110000, nn, dd);
4099 } else {
4100 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X110000, nn, dd);
4102 goto done;
4104 case ARM64in_VUnaryD: {
4105 /* 31 23 21 16 14 9 4
4106 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
4107 ------------------- 0,1 --------- FABS ------
4108 ------------------- 1,0 --------- FNEG ------
4109 ------------------- 1,1 --------- FSQRT -----
4111 UInt dD = dregEnc(i->ARM64in.VUnaryD.dst);
4112 UInt dN = dregEnc(i->ARM64in.VUnaryD.src);
4113 UInt b16 = 2; /* impossible */
4114 UInt b15 = 2; /* impossible */
4115 switch (i->ARM64in.VUnaryD.op) {
4116 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4117 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4118 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4119 default: break;
4121 if (b16 < 2 && b15 < 2) {
4122 *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
4123 (b15 << 5) | X10000, dN, dD);
4124 goto done;
4127 000, 11110 01 1,001 11,1 10000 n d FRINTI Dd, Dm (round per FPCR)
4129 if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
4130 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
4131 goto done;
4134 010, 11110 11 1,0000 1,1111 10 n d FRECPX Dd, Dm
4136 if (i->ARM64in.VUnaryD.op == ARM64fpu_RECPX) {
4137 *p++ = X_3_8_5_6_5_5(X010, X11110111, X00001, X111110, dN, dD);
4138 goto done;
4140 goto bad;
4142 case ARM64in_VUnaryS: {
4143 /* 31 23 21 16 14 9 4
4144 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled)
4145 ------------------- 0,1 --------- FABS ------
4146 ------------------- 1,0 --------- FNEG ------
4147 ------------------- 1,1 --------- FSQRT -----
4149 UInt sD = dregEnc(i->ARM64in.VUnaryS.dst);
4150 UInt sN = dregEnc(i->ARM64in.VUnaryS.src);
4151 UInt b16 = 2; /* impossible */
4152 UInt b15 = 2; /* impossible */
4153 switch (i->ARM64in.VUnaryS.op) {
4154 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4155 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4156 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4157 default: break;
4159 if (b16 < 2 && b15 < 2) {
4160 *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
4161 (b15 << 5) | X10000, sN, sD);
4162 goto done;
4165 000, 11110 00 1,001 11,1 10000 n d FRINTI Sd, Sm (round per FPCR)
4167 if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
4168 *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
4169 goto done;
4172 010, 11110 10 1,0000 1,1111 10 n d FRECPX Sd, Sm
4174 if (i->ARM64in.VUnaryS.op == ARM64fpu_RECPX) {
4175 *p++ = X_3_8_5_6_5_5(X010, X11110101, X00001, X111110, sN, sD);
4176 goto done;
4178 goto bad;
4180 case ARM64in_VBinD: {
4181 /* 31 23 20 15 11 9 4
4182 ---------------- 0000 ------ FMUL --------
4183 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
4184 ---------------- 0010 ------ FADD --------
4185 ---------------- 0011 ------ FSUB --------
4187 UInt dD = dregEnc(i->ARM64in.VBinD.dst);
4188 UInt dN = dregEnc(i->ARM64in.VBinD.argL);
4189 UInt dM = dregEnc(i->ARM64in.VBinD.argR);
4190 UInt b1512 = 16; /* impossible */
4191 switch (i->ARM64in.VBinD.op) {
4192 case ARM64fpb_DIV: b1512 = X0001; break;
4193 case ARM64fpb_MUL: b1512 = X0000; break;
4194 case ARM64fpb_SUB: b1512 = X0011; break;
4195 case ARM64fpb_ADD: b1512 = X0010; break;
4196 default: goto bad;
4198 vassert(b1512 < 16);
4199 *p++
4200 = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
4201 goto done;
4203 case ARM64in_VBinS: {
4204 /* 31 23 20 15 11 9 4
4205 ---------------- 0000 ------ FMUL --------
4206 000 11110 001 m 0001 10 n d FDIV Dd,Dn,Dm
4207 ---------------- 0010 ------ FADD --------
4208 ---------------- 0011 ------ FSUB --------
4210 UInt sD = dregEnc(i->ARM64in.VBinS.dst);
4211 UInt sN = dregEnc(i->ARM64in.VBinS.argL);
4212 UInt sM = dregEnc(i->ARM64in.VBinS.argR);
4213 UInt b1512 = 16; /* impossible */
4214 switch (i->ARM64in.VBinS.op) {
4215 case ARM64fpb_DIV: b1512 = X0001; break;
4216 case ARM64fpb_MUL: b1512 = X0000; break;
4217 case ARM64fpb_SUB: b1512 = X0011; break;
4218 case ARM64fpb_ADD: b1512 = X0010; break;
4219 default: goto bad;
4221 vassert(b1512 < 16);
4222 *p++
4223 = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
4224 goto done;
4226 case ARM64in_VCmpD: {
4227 /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */
4228 UInt dN = dregEnc(i->ARM64in.VCmpD.argL);
4229 UInt dM = dregEnc(i->ARM64in.VCmpD.argR);
4230 *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
4231 goto done;
4233 case ARM64in_VCmpS: {
4234 /* 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm */
4235 UInt sN = dregEnc(i->ARM64in.VCmpS.argL);
4236 UInt sM = dregEnc(i->ARM64in.VCmpS.argR);
4237 *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
4238 goto done;
4240 case ARM64in_VFCSel: {
4241 /* 31 23 21 20 15 11 9 5
4242 000 11110 00 1 m cond 11 n d FCSEL Sd,Sn,Sm,cond
4243 000 11110 01 1 m cond 11 n d FCSEL Dd,Dn,Dm,cond
4245 Bool isD = i->ARM64in.VFCSel.isD;
4246 UInt dd = dregEnc(i->ARM64in.VFCSel.dst);
4247 UInt nn = dregEnc(i->ARM64in.VFCSel.argL);
4248 UInt mm = dregEnc(i->ARM64in.VFCSel.argR);
4249 UInt cond = (UInt)i->ARM64in.VFCSel.cond;
4250 vassert(cond < 16);
4251 *p++ = X_3_8_5_6_5_5(X000, isD ? X11110011 : X11110001,
4252 mm, (cond << 2) | X000011, nn, dd);
4253 goto done;
4255 case ARM64in_FPCR: {
4256 Bool toFPCR = i->ARM64in.FPCR.toFPCR;
4257 UInt iReg = iregEnc(i->ARM64in.FPCR.iReg);
4258 if (toFPCR) {
4259 /* 0xD51B44 000 Rt MSR fpcr, rT */
4260 *p++ = 0xD51B4400 | (iReg & 0x1F);
4261 goto done;
4263 goto bad; // FPCR -> iReg case currently ATC
4265 case ARM64in_FPSR: {
4266 Bool toFPSR = i->ARM64in.FPSR.toFPSR;
4267 UInt iReg = iregEnc(i->ARM64in.FPSR.iReg);
4268 if (toFPSR) {
4269 /* 0xD51B44 001 Rt MSR fpsr, rT */
4270 *p++ = 0xD51B4420 | (iReg & 0x1F);
4271 } else {
4272 /* 0xD53B44 001 Rt MRS rT, fpsr */
4273 *p++ = 0xD53B4420 | (iReg & 0x1F);
4275 goto done;
4277 case ARM64in_VBinV: {
4278 /* 31 23 20 15 9 4
4279 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d
4280 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s
4281 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h
4282 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b
4284 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d
4285 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s
4286 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h
4287 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b
4289 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s
4290 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h
4291 010 01110 00 1 m 100111 n d MUL Vd.16b, Vn.16b, Vm.16b
4293 010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d
4294 010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s
4295 010 01110 11 1 m 110101 n d FSUB Vd.2d, Vn.2d, Vm.2d
4296 010 01110 10 1 m 110101 n d FSUB Vd.4s, Vn.4s, Vm.4s
4298 011 01110 01 1 m 110111 n d FMUL Vd.2d, Vn.2d, Vm.2d
4299 011 01110 00 1 m 110111 n d FMUL Vd.4s, Vn.4s, Vm.4s
4300 011 01110 01 1 m 111111 n d FDIV Vd.2d, Vn.2d, Vm.2d
4301 011 01110 00 1 m 111111 n d FDIV Vd.4s, Vn.4s, Vm.4s
4303 010 01110 01 1 m 111101 n d FMAX Vd.2d, Vn.2d, Vm.2d
4304 010 01110 00 1 m 111101 n d FMAX Vd.4s, Vn.4s, Vm.4s
4305 010 01110 11 1 m 111101 n d FMIN Vd.2d, Vn.2d, Vm.2d
4306 010 01110 10 1 m 111101 n d FMIN Vd.4s, Vn.4s, Vm.4s
4308 011 01110 10 1 m 011001 n d UMAX Vd.4s, Vn.4s, Vm.4s
4309 011 01110 01 1 m 011001 n d UMAX Vd.8h, Vn.8h, Vm.8h
4310 011 01110 00 1 m 011001 n d UMAX Vd.16b, Vn.16b, Vm.16b
4312 011 01110 10 1 m 011011 n d UMIN Vd.4s, Vn.4s, Vm.4s
4313 011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h
4314 011 01110 00 1 m 011011 n d UMIN Vd.16b, Vn.16b, Vm.16b
4316 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
4317 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
4318 010 01110 00 1 m 011001 n d SMAX Vd.16b, Vn.16b, Vm.16b
4320 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
4321 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
4322 010 01110 00 1 m 011011 n d SMIN Vd.16b, Vn.16b, Vm.16b
4324 010 01110 00 1 m 000111 n d AND Vd, Vn, Vm
4325 010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm
4326 011 01110 00 1 m 000111 n d EOR Vd, Vn, Vm
4328 011 01110 11 1 m 100011 n d CMEQ Vd.2d, Vn.2d, Vm.2d
4329 011 01110 10 1 m 100011 n d CMEQ Vd.4s, Vn.4s, Vm.4s
4330 011 01110 01 1 m 100011 n d CMEQ Vd.8h, Vn.8h, Vm.8h
4331 011 01110 00 1 m 100011 n d CMEQ Vd.16b, Vn.16b, Vm.16b
4333 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d
4334 011 01110 10 1 m 001101 n d CMHI Vd.4s, Vn.4s, Vm.4s
4335 011 01110 01 1 m 001101 n d CMHI Vd.8h, Vn.8h, Vm.8h
4336 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b
4338 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d
4339 010 01110 10 1 m 001101 n d CMGT Vd.4s, Vn.4s, Vm.4s
4340 010 01110 01 1 m 001101 n d CMGT Vd.8h, Vn.8h, Vm.8h
4341 010 01110 00 1 m 001101 n d CMGT Vd.16b, Vn.16b, Vm.16b
4343 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d
4344 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s
4346 011 01110 01 1 m 111001 n d FCMGE Vd.2d, Vn.2d, Vm.2d
4347 011 01110 00 1 m 111001 n d FCMGE Vd.4s, Vn.4s, Vm.4s
4349 011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d
4350 011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s
4352 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b
4354 010 01110 11 0 m 000110 n d UZP1 Vd.2d, Vn.2d, Vm.2d
4355 010 01110 10 0 m 000110 n d UZP1 Vd.4s, Vn.4s, Vm.4s
4356 010 01110 01 0 m 000110 n d UZP1 Vd.8h, Vn.8h, Vm.8h
4357 010 01110 00 0 m 000110 n d UZP1 Vd.16b, Vn.16b, Vm.16b
4359 010 01110 11 0 m 010110 n d UZP2 Vd.2d, Vn.2d, Vm.2d
4360 010 01110 10 0 m 010110 n d UZP2 Vd.4s, Vn.4s, Vm.4s
4361 010 01110 01 0 m 010110 n d UZP2 Vd.8h, Vn.8h, Vm.8h
4362 010 01110 00 0 m 010110 n d UZP2 Vd.16b, Vn.16b, Vm.16b
4364 010 01110 10 0 m 001110 n d ZIP1 Vd.4s, Vn.4s, Vm.4s
4365 010 01110 01 0 m 001110 n d ZIP1 Vd.8h, Vn.8h, Vm.8h
4366 010 01110 10 0 m 001110 n d ZIP1 Vd.16b, Vn.16b, Vm.16b
4368 010 01110 10 0 m 011110 n d ZIP2 Vd.4s, Vn.4s, Vm.4s
4369 010 01110 01 0 m 011110 n d ZIP2 Vd.8h, Vn.8h, Vm.8h
4370 010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b
4372 011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b
4374 000 01110 00 1 m 111000 n d PMULL Vd.8h, Vn.8b, Vm.8b
4376 001 01110 10 1 m 110000 n d UMULL Vd.2d, Vn.2s, Vm.2s
4377 001 01110 01 1 m 110000 n d UMULL Vd.4s, Vn.4h, Vm.4h
4378 001 01110 00 1 m 110000 n d UMULL Vd.8h, Vn.8b, Vm.8b
4380 000 01110 10 1 m 110000 n d SMULL Vd.2d, Vn.2s, Vm.2s
4381 000 01110 01 1 m 110000 n d SMULL Vd.4s, Vn.4h, Vm.4h
4382 000 01110 00 1 m 110000 n d SMULL Vd.8h, Vn.8b, Vm.8b
4384 010 01110 11 1 m 000011 n d SQADD Vd.2d, Vn.2d, Vm.2d
4385 010 01110 10 1 m 000011 n d SQADD Vd.4s, Vn.4s, Vm.4s
4386 010 01110 01 1 m 000011 n d SQADD Vd.8h, Vn.8h, Vm.8h
4387 010 01110 00 1 m 000011 n d SQADD Vd.16b, Vn.16b, Vm.16b
4389 011 01110 11 1 m 000011 n d UQADD Vd.2d, Vn.2d, Vm.2d
4390 011 01110 10 1 m 000011 n d UQADD Vd.4s, Vn.4s, Vm.4s
4391 011 01110 01 1 m 000011 n d UQADD Vd.8h, Vn.8h, Vm.8h
4392 011 01110 00 1 m 000011 n d UQADD Vd.16b, Vn.16b, Vm.16b
4394 010 01110 11 1 m 001011 n d SQSUB Vd.2d, Vn.2d, Vm.2d
4395 010 01110 10 1 m 001011 n d SQSUB Vd.4s, Vn.4s, Vm.4s
4396 010 01110 01 1 m 001011 n d SQSUB Vd.8h, Vn.8h, Vm.8h
4397 010 01110 00 1 m 001011 n d SQSUB Vd.16b, Vn.16b, Vm.16b
4399 011 01110 11 1 m 001011 n d UQSUB Vd.2d, Vn.2d, Vm.2d
4400 011 01110 10 1 m 001011 n d UQSUB Vd.4s, Vn.4s, Vm.4s
4401 011 01110 01 1 m 001011 n d UQSUB Vd.8h, Vn.8h, Vm.8h
4402 011 01110 00 1 m 001011 n d UQSUB Vd.16b, Vn.16b, Vm.16b
4404 000 01110 10 1 m 110100 n d SQDMULL Vd.2d, Vn.2s, Vm.2s
4405 000 01110 01 1 m 110100 n d SQDMULL Vd.4s, Vn.4h, Vm.4h
4407 010 01110 10 1 m 101101 n d SQDMULH Vd.4s, Vn.4s, Vm.4s
4408 010 01110 01 1 m 101101 n d SQDMULH Vd.8h, Vn.8h, Vm.8h
4409 011 01110 10 1 m 101101 n d SQRDMULH Vd.4s, Vn.4s, Vm.4s
4410 011 01110 10 1 m 101101 n d SQRDMULH Vd.8h, Vn.8h, Vm.8h
4412 010 01110 sz 1 m 010011 n d SQSHL@sz Vd, Vn, Vm
4413 010 01110 sz 1 m 010111 n d SQRSHL@sz Vd, Vn, Vm
4414 011 01110 sz 1 m 010011 n d UQSHL@sz Vd, Vn, Vm
4415 011 01110 sz 1 m 010111 n d URQSHL@sz Vd, Vn, Vm
4417 010 01110 sz 1 m 010001 n d SSHL@sz Vd, Vn, Vm
4418 010 01110 sz 1 m 010101 n d SRSHL@sz Vd, Vn, Vm
4419 011 01110 sz 1 m 010001 n d USHL@sz Vd, Vn, Vm
4420 011 01110 sz 1 m 010101 n d URSHL@sz Vd, Vn, Vm
4422 010 01110 01 1 m 111111 n d FRECPS Vd.2d, Vn.2d, Vm.2d
4423 010 01110 00 1 m 111111 n d FRECPS Vd.4s, Vn.4s, Vm.4s
4424 010 01110 11 1 m 111111 n d FRSQRTS Vd.2d, Vn.2d, Vm.2d
4425 010 01110 10 1 m 111111 n d FRSQRTS Vd.4s, Vn.4s, Vm.4s
4427 UInt vD = qregEnc(i->ARM64in.VBinV.dst);
4428 UInt vN = qregEnc(i->ARM64in.VBinV.argL);
4429 UInt vM = qregEnc(i->ARM64in.VBinV.argR);
4430 switch (i->ARM64in.VBinV.op) {
4431 case ARM64vecb_ADD64x2:
4432 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
4433 break;
4434 case ARM64vecb_ADD32x4:
4435 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
4436 break;
4437 case ARM64vecb_ADD16x8:
4438 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
4439 break;
4440 case ARM64vecb_ADD8x16:
4441 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
4442 break;
4443 case ARM64vecb_SUB64x2:
4444 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
4445 break;
4446 case ARM64vecb_SUB32x4:
4447 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD);
4448 break;
4449 case ARM64vecb_SUB16x8:
4450 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
4451 break;
4452 case ARM64vecb_SUB8x16:
4453 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
4454 break;
4455 case ARM64vecb_MUL32x4:
4456 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
4457 break;
4458 case ARM64vecb_MUL16x8:
4459 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
4460 break;
4461 case ARM64vecb_MUL8x16:
4462 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD);
4463 break;
4464 case ARM64vecb_FADD64x2:
4465 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
4466 break;
4467 case ARM64vecb_FADD32x4:
4468 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD);
4469 break;
4470 case ARM64vecb_FSUB64x2:
4471 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD);
4472 break;
4473 case ARM64vecb_FSUB32x4:
4474 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD);
4475 break;
4476 case ARM64vecb_FMUL64x2:
4477 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD);
4478 break;
4479 case ARM64vecb_FMUL32x4:
4480 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD);
4481 break;
4482 case ARM64vecb_FDIV64x2:
4483 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD);
4484 break;
4485 case ARM64vecb_FDIV32x4:
4486 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
4487 break;
4489 case ARM64vecb_FMAX64x2:
4490 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111101, vN, vD);
4491 break;
4492 case ARM64vecb_FMAX32x4:
4493 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111101, vN, vD);
4494 break;
4495 case ARM64vecb_FMIN64x2:
4496 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111101, vN, vD);
4497 break;
4498 case ARM64vecb_FMIN32x4:
4499 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111101, vN, vD);
4500 break;
4502 case ARM64vecb_UMAX32x4:
4503 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
4504 break;
4505 case ARM64vecb_UMAX16x8:
4506 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD);
4507 break;
4508 case ARM64vecb_UMAX8x16:
4509 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD);
4510 break;
4512 case ARM64vecb_UMIN32x4:
4513 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD);
4514 break;
4515 case ARM64vecb_UMIN16x8:
4516 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
4517 break;
4518 case ARM64vecb_UMIN8x16:
4519 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
4520 break;
4522 case ARM64vecb_SMAX32x4:
4523 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
4524 break;
4525 case ARM64vecb_SMAX16x8:
4526 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
4527 break;
4528 case ARM64vecb_SMAX8x16:
4529 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
4530 break;
4532 case ARM64vecb_SMIN32x4:
4533 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
4534 break;
4535 case ARM64vecb_SMIN16x8:
4536 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
4537 break;
4538 case ARM64vecb_SMIN8x16:
4539 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
4540 break;
4542 case ARM64vecb_AND:
4543 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
4544 break;
4545 case ARM64vecb_ORR:
4546 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
4547 break;
4548 case ARM64vecb_XOR:
4549 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD);
4550 break;
4552 case ARM64vecb_CMEQ64x2:
4553 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD);
4554 break;
4555 case ARM64vecb_CMEQ32x4:
4556 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD);
4557 break;
4558 case ARM64vecb_CMEQ16x8:
4559 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD);
4560 break;
4561 case ARM64vecb_CMEQ8x16:
4562 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
4563 break;
4565 case ARM64vecb_CMHI64x2:
4566 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001101, vN, vD);
4567 break;
4568 case ARM64vecb_CMHI32x4:
4569 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001101, vN, vD);
4570 break;
4571 case ARM64vecb_CMHI16x8:
4572 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001101, vN, vD);
4573 break;
4574 case ARM64vecb_CMHI8x16:
4575 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001101, vN, vD);
4576 break;
4578 case ARM64vecb_CMGT64x2:
4579 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001101, vN, vD);
4580 break;
4581 case ARM64vecb_CMGT32x4:
4582 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001101, vN, vD);
4583 break;
4584 case ARM64vecb_CMGT16x8:
4585 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001101, vN, vD);
4586 break;
4587 case ARM64vecb_CMGT8x16:
4588 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001101, vN, vD);
4589 break;
4591 case ARM64vecb_FCMEQ64x2:
4592 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
4593 break;
4594 case ARM64vecb_FCMEQ32x4:
4595 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
4596 break;
4598 case ARM64vecb_FCMGE64x2:
4599 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
4600 break;
4601 case ARM64vecb_FCMGE32x4:
4602 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
4603 break;
4605 case ARM64vecb_FCMGT64x2:
4606 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
4607 break;
4608 case ARM64vecb_FCMGT32x4:
4609 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
4610 break;
4612 case ARM64vecb_TBL1:
4613 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD);
4614 break;
4616 case ARM64vecb_UZP164x2:
4617 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X000110, vN, vD);
4618 break;
4619 case ARM64vecb_UZP132x4:
4620 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X000110, vN, vD);
4621 break;
4622 case ARM64vecb_UZP116x8:
4623 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X000110, vN, vD);
4624 break;
4625 case ARM64vecb_UZP18x16:
4626 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000110, vN, vD);
4627 break;
4629 case ARM64vecb_UZP264x2:
4630 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X010110, vN, vD);
4631 break;
4632 case ARM64vecb_UZP232x4:
4633 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X010110, vN, vD);
4634 break;
4635 case ARM64vecb_UZP216x8:
4636 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X010110, vN, vD);
4637 break;
4638 case ARM64vecb_UZP28x16:
4639 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X010110, vN, vD);
4640 break;
4642 case ARM64vecb_ZIP132x4:
4643 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X001110, vN, vD);
4644 break;
4645 case ARM64vecb_ZIP116x8:
4646 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X001110, vN, vD);
4647 break;
4648 case ARM64vecb_ZIP18x16:
4649 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X001110, vN, vD);
4650 break;
4652 case ARM64vecb_ZIP232x4:
4653 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X011110, vN, vD);
4654 break;
4655 case ARM64vecb_ZIP216x8:
4656 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X011110, vN, vD);
4657 break;
4658 case ARM64vecb_ZIP28x16:
4659 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD);
4660 break;
4662 case ARM64vecb_PMUL8x16:
4663 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
4664 break;
4666 case ARM64vecb_PMULL8x8:
4667 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD);
4668 break;
4670 case ARM64vecb_UMULL2DSS:
4671 *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
4672 break;
4673 case ARM64vecb_UMULL4SHH:
4674 *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
4675 break;
4676 case ARM64vecb_UMULL8HBB:
4677 *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
4678 break;
4680 case ARM64vecb_SMULL2DSS:
4681 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110000, vN, vD);
4682 break;
4683 case ARM64vecb_SMULL4SHH:
4684 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110000, vN, vD);
4685 break;
4686 case ARM64vecb_SMULL8HBB:
4687 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X110000, vN, vD);
4688 break;
4690 case ARM64vecb_SQADD64x2:
4691 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X000011, vN, vD);
4692 break;
4693 case ARM64vecb_SQADD32x4:
4694 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000011, vN, vD);
4695 break;
4696 case ARM64vecb_SQADD16x8:
4697 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X000011, vN, vD);
4698 break;
4699 case ARM64vecb_SQADD8x16:
4700 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000011, vN, vD);
4701 break;
4703 case ARM64vecb_UQADD64x2:
4704 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X000011, vN, vD);
4705 break;
4706 case ARM64vecb_UQADD32x4:
4707 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X000011, vN, vD);
4708 break;
4709 case ARM64vecb_UQADD16x8:
4710 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X000011, vN, vD);
4711 break;
4712 case ARM64vecb_UQADD8x16:
4713 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000011, vN, vD);
4714 break;
4716 case ARM64vecb_SQSUB64x2:
4717 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001011, vN, vD);
4718 break;
4719 case ARM64vecb_SQSUB32x4:
4720 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001011, vN, vD);
4721 break;
4722 case ARM64vecb_SQSUB16x8:
4723 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001011, vN, vD);
4724 break;
4725 case ARM64vecb_SQSUB8x16:
4726 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001011, vN, vD);
4727 break;
4729 case ARM64vecb_UQSUB64x2:
4730 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001011, vN, vD);
4731 break;
4732 case ARM64vecb_UQSUB32x4:
4733 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001011, vN, vD);
4734 break;
4735 case ARM64vecb_UQSUB16x8:
4736 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001011, vN, vD);
4737 break;
4738 case ARM64vecb_UQSUB8x16:
4739 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001011, vN, vD);
4740 break;
4742 case ARM64vecb_SQDMULL2DSS:
4743 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110100, vN, vD);
4744 break;
4745 case ARM64vecb_SQDMULL4SHH:
4746 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110100, vN, vD);
4747 break;
4749 case ARM64vecb_SQDMULH32x4:
4750 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X101101, vN, vD);
4751 break;
4752 case ARM64vecb_SQDMULH16x8:
4753 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X101101, vN, vD);
4754 break;
4755 case ARM64vecb_SQRDMULH32x4:
4756 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X101101, vN, vD);
4757 break;
4758 case ARM64vecb_SQRDMULH16x8:
4759 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X101101, vN, vD);
4760 break;
4762 case ARM64vecb_SQSHL64x2:
4763 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010011, vN, vD);
4764 break;
4765 case ARM64vecb_SQSHL32x4:
4766 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010011, vN, vD);
4767 break;
4768 case ARM64vecb_SQSHL16x8:
4769 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010011, vN, vD);
4770 break;
4771 case ARM64vecb_SQSHL8x16:
4772 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010011, vN, vD);
4773 break;
4775 case ARM64vecb_SQRSHL64x2:
4776 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010111, vN, vD);
4777 break;
4778 case ARM64vecb_SQRSHL32x4:
4779 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010111, vN, vD);
4780 break;
4781 case ARM64vecb_SQRSHL16x8:
4782 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010111, vN, vD);
4783 break;
4784 case ARM64vecb_SQRSHL8x16:
4785 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010111, vN, vD);
4786 break;
4788 case ARM64vecb_UQSHL64x2:
4789 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010011, vN, vD);
4790 break;
4791 case ARM64vecb_UQSHL32x4:
4792 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010011, vN, vD);
4793 break;
4794 case ARM64vecb_UQSHL16x8:
4795 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010011, vN, vD);
4796 break;
4797 case ARM64vecb_UQSHL8x16:
4798 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010011, vN, vD);
4799 break;
4801 case ARM64vecb_UQRSHL64x2:
4802 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010111, vN, vD);
4803 break;
4804 case ARM64vecb_UQRSHL32x4:
4805 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010111, vN, vD);
4806 break;
4807 case ARM64vecb_UQRSHL16x8:
4808 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010111, vN, vD);
4809 break;
4810 case ARM64vecb_UQRSHL8x16:
4811 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010111, vN, vD);
4812 break;
4814 case ARM64vecb_SSHL64x2:
4815 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010001, vN, vD);
4816 break;
4817 case ARM64vecb_SSHL32x4:
4818 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010001, vN, vD);
4819 break;
4820 case ARM64vecb_SSHL16x8:
4821 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010001, vN, vD);
4822 break;
4823 case ARM64vecb_SSHL8x16:
4824 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010001, vN, vD);
4825 break;
4827 case ARM64vecb_SRSHL64x2:
4828 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010101, vN, vD);
4829 break;
4830 case ARM64vecb_SRSHL32x4:
4831 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010101, vN, vD);
4832 break;
4833 case ARM64vecb_SRSHL16x8:
4834 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010101, vN, vD);
4835 break;
4836 case ARM64vecb_SRSHL8x16:
4837 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010101, vN, vD);
4838 break;
4840 case ARM64vecb_USHL64x2:
4841 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010001, vN, vD);
4842 break;
4843 case ARM64vecb_USHL32x4:
4844 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010001, vN, vD);
4845 break;
4846 case ARM64vecb_USHL16x8:
4847 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010001, vN, vD);
4848 break;
4849 case ARM64vecb_USHL8x16:
4850 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010001, vN, vD);
4851 break;
4853 case ARM64vecb_URSHL64x2:
4854 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010101, vN, vD);
4855 break;
4856 case ARM64vecb_URSHL32x4:
4857 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010101, vN, vD);
4858 break;
4859 case ARM64vecb_URSHL16x8:
4860 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010101, vN, vD);
4861 break;
4862 case ARM64vecb_URSHL8x16:
4863 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD);
4864 break;
4866 case ARM64vecb_FRECPS64x2:
4867 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111111, vN, vD);
4868 break;
4869 case ARM64vecb_FRECPS32x4:
4870 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111111, vN, vD);
4871 break;
4872 case ARM64vecb_FRSQRTS64x2:
4873 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111111, vN, vD);
4874 break;
4875 case ARM64vecb_FRSQRTS32x4:
4876 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111111, vN, vD);
4877 break;
4879 default:
4880 goto bad;
4882 goto done;
4884 case ARM64in_VModifyV: {
4885 /* 31 23 20 15 9 4
4886 010 01110 sz 1 00000 001110 n d SUQADD@sz Vd, Vn
4887 011 01110 sz 1 00000 001110 n d USQADD@sz Vd, Vn
4889 UInt vD = qregEnc(i->ARM64in.VModifyV.mod);
4890 UInt vN = qregEnc(i->ARM64in.VModifyV.arg);
4891 switch (i->ARM64in.VModifyV.op) {
4892 case ARM64vecmo_SUQADD64x2:
4893 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X001110, vN, vD);
4894 break;
4895 case ARM64vecmo_SUQADD32x4:
4896 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X001110, vN, vD);
4897 break;
4898 case ARM64vecmo_SUQADD16x8:
4899 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X001110, vN, vD);
4900 break;
4901 case ARM64vecmo_SUQADD8x16:
4902 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X001110, vN, vD);
4903 break;
4904 case ARM64vecmo_USQADD64x2:
4905 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X001110, vN, vD);
4906 break;
4907 case ARM64vecmo_USQADD32x4:
4908 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X001110, vN, vD);
4909 break;
4910 case ARM64vecmo_USQADD16x8:
4911 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X001110, vN, vD);
4912 break;
4913 case ARM64vecmo_USQADD8x16:
4914 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X001110, vN, vD);
4915 break;
4916 default:
4917 goto bad;
4919 goto done;
4921 case ARM64in_VUnaryV: {
4922 /* 31 23 20 15 9 4
4923 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d
4924 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s
4925 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d
4926 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s
4927 011 01110 00 1 00000 010110 n d NOT Vd.16b, Vn.16b
4929 010 01110 11 1 00000 101110 n d ABS Vd.2d, Vn.2d
4930 010 01110 10 1 00000 101110 n d ABS Vd.4s, Vn.4s
4931 010 01110 01 1 00000 101110 n d ABS Vd.8h, Vn.8h
4932 010 01110 00 1 00000 101110 n d ABS Vd.16b, Vn.16b
4934 010 01110 10 1 00000 010010 n d CLS Vd.4s, Vn.4s
4935 010 01110 01 1 00000 010010 n d CLS Vd.8h, Vn.8h
4936 010 01110 00 1 00000 010010 n d CLS Vd.16b, Vn.16b
4938 011 01110 10 1 00000 010010 n d CLZ Vd.4s, Vn.4s
4939 011 01110 01 1 00000 010010 n d CLZ Vd.8h, Vn.8h
4940 011 01110 00 1 00000 010010 n d CLZ Vd.16b, Vn.16b
4942 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b
4944 011 01110 01 1 00000 010110 n d RBIT Vd.16b, Vn.16b
4945 010 01110 00 1 00000 000110 n d REV16 Vd.16b, Vn.16b
4946 011 01110 00 1 00000 000010 n d REV32 Vd.16b, Vn.16b
4947 011 01110 01 1 00000 000010 n d REV32 Vd.8h, Vn.8h
4949 010 01110 00 1 00000 000010 n d REV64 Vd.16b, Vn.16b
4950 010 01110 01 1 00000 000010 n d REV64 Vd.8h, Vn.8h
4951 010 01110 10 1 00000 000010 n d REV64 Vd.4s, Vn.4s
4953 010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s
4954 011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s
4956 010 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
4957 010 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
4959 011 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
4960 011 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
4962 011 01110 11 1 00001 111110 n d FSQRT Vd.2d, Vn.2d
4963 011 01110 10 1 00001 111110 n d FSQRT Vd.4s, Vn.4s
4965 UInt vD = qregEnc(i->ARM64in.VUnaryV.dst);
4966 UInt vN = qregEnc(i->ARM64in.VUnaryV.arg);
4967 switch (i->ARM64in.VUnaryV.op) {
4968 case ARM64vecu_FABS64x2:
4969 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
4970 break;
4971 case ARM64vecu_FABS32x4:
4972 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
4973 break;
4974 case ARM64vecu_FNEG64x2:
4975 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
4976 break;
4977 case ARM64vecu_FNEG32x4:
4978 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
4979 break;
4980 case ARM64vecu_NOT:
4981 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
4982 break;
4983 case ARM64vecu_ABS64x2:
4984 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X101110, vN, vD);
4985 break;
4986 case ARM64vecu_ABS32x4:
4987 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X101110, vN, vD);
4988 break;
4989 case ARM64vecu_ABS16x8:
4990 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X101110, vN, vD);
4991 break;
4992 case ARM64vecu_ABS8x16:
4993 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD);
4994 break;
4995 case ARM64vecu_CLS32x4:
4996 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD);
4997 break;
4998 case ARM64vecu_CLS16x8:
4999 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD);
5000 break;
5001 case ARM64vecu_CLS8x16:
5002 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD);
5003 break;
5004 case ARM64vecu_CLZ32x4:
5005 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD);
5006 break;
5007 case ARM64vecu_CLZ16x8:
5008 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD);
5009 break;
5010 case ARM64vecu_CLZ8x16:
5011 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD);
5012 break;
5013 case ARM64vecu_CNT8x16:
5014 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
5015 break;
5016 case ARM64vecu_RBIT:
5017 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010110, vN, vD);
5018 break;
5019 case ARM64vecu_REV1616B:
5020 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000110, vN, vD);
5021 break;
5022 case ARM64vecu_REV3216B:
5023 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X000010, vN, vD);
5024 break;
5025 case ARM64vecu_REV328H:
5026 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X000010, vN, vD);
5027 break;
5028 case ARM64vecu_REV6416B:
5029 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000010, vN, vD);
5030 break;
5031 case ARM64vecu_REV648H:
5032 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X000010, vN, vD);
5033 break;
5034 case ARM64vecu_REV644S:
5035 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD);
5036 break;
5037 case ARM64vecu_URECPE32x4:
5038 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD);
5039 break;
5040 case ARM64vecu_URSQRTE32x4:
5041 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
5042 break;
5043 case ARM64vecu_FRECPE64x2:
5044 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00001, X110110, vN, vD);
5045 break;
5046 case ARM64vecu_FRECPE32x4:
5047 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110110, vN, vD);
5048 break;
5049 case ARM64vecu_FRSQRTE64x2:
5050 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X110110, vN, vD);
5051 break;
5052 case ARM64vecu_FRSQRTE32x4:
5053 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110110, vN, vD);
5054 break;
5055 case ARM64vecu_FSQRT64x2:
5056 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X111110, vN, vD);
5057 break;
5058 case ARM64vecu_FSQRT32x4:
5059 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X111110, vN, vD);
5060 break;
5061 default:
5062 goto bad;
5064 goto done;
5066 case ARM64in_VNarrowV: {
5067 /* 31 23 21 15 9 4
5068 000 01110 00 1,00001 001010 n d XTN Vd.8b, Vn.8h
5069 000 01110 01 1,00001 001010 n d XTN Vd.4h, Vn.4s
5070 000 01110 10 1,00001 001010 n d XTN Vd.2s, Vn.2d
5072 001 01110 00 1,00001 001010 n d SQXTUN Vd.8b, Vn.8h
5073 001 01110 01 1,00001 001010 n d SQXTUN Vd.4h, Vn.4s
5074 001 01110 10 1,00001 001010 n d SQXTUN Vd.2s, Vn.2d
5076 000 01110 00 1,00001 010010 n d SQXTN Vd.8b, Vn.8h
5077 000 01110 01 1,00001 010010 n d SQXTN Vd.4h, Vn.4s
5078 000 01110 10 1,00001 010010 n d SQXTN Vd.2s, Vn.2d
5080 001 01110 00 1,00001 010010 n d UQXTN Vd.8b, Vn.8h
5081 001 01110 01 1,00001 010010 n d UQXTN Vd.4h, Vn.4s
5082 001 01110 10 1,00001 010010 n d UQXTN Vd.2s, Vn.2d
5084 UInt vD = qregEnc(i->ARM64in.VNarrowV.dst);
5085 UInt vN = qregEnc(i->ARM64in.VNarrowV.src);
5086 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
5087 vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
5088 switch (i->ARM64in.VNarrowV.op) {
5089 case ARM64vecna_XTN:
5090 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5091 X00001, X001010, vN, vD);
5092 goto done;
5093 case ARM64vecna_SQXTUN:
5094 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5095 X00001, X001010, vN, vD);
5096 goto done;
5097 case ARM64vecna_SQXTN:
5098 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5099 X00001, X010010, vN, vD);
5100 goto done;
5101 case ARM64vecna_UQXTN:
5102 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5103 X00001, X010010, vN, vD);
5104 goto done;
5105 default:
5106 break;
5108 goto bad;
5110 case ARM64in_VShiftImmV: {
5112 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
5113 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
5115 001 011110 immh immb 100101 n d UQSHRN ,,#sh
5116 000 011110 immh immb 100101 n d SQSHRN ,,#sh
5117 001 011110 immh immb 100001 n d SQSHRUN ,,#sh
5119 001 011110 immh immb 100111 n d UQRSHRN ,,#sh
5120 000 011110 immh immb 100111 n d SQRSHRN ,,#sh
5121 001 011110 immh immb 100011 n d SQRSHRUN ,,#sh
5123 where immh:immb
5124 = case T of
5125 2d | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
5126 4s | sh in 1..32 -> let xxxxx = 32-sh in 01xx:xxx
5127 8h | sh in 1..16 -> let xxxx = 16-sh in 001x:xxx
5128 16b | sh in 1..8 -> let xxx = 8-sh in 0001:xxx
5130 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
5132 011 011110 immh immb 011101 n d UQSHL Vd.T, Vn.T, #sh
5133 010 011110 immh immb 011101 n d SQSHL Vd.T, Vn.T, #sh
5134 011 011110 immh immb 011001 n d SQSHLU Vd.T, Vn.T, #sh
5136 where immh:immb
5137 = case T of
5138 2d | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
5139 4s | sh in 0..31 -> let xxxxx = sh in 01xx:xxx
5140 8h | sh in 0..15 -> let xxxx = sh in 001x:xxx
5141 16b | sh in 0..7 -> let xxx = sh in 0001:xxx
5143 UInt vD = qregEnc(i->ARM64in.VShiftImmV.dst);
5144 UInt vN = qregEnc(i->ARM64in.VShiftImmV.src);
5145 UInt sh = i->ARM64in.VShiftImmV.amt;
5146 UInt tmpl = 0; /* invalid */
5148 const UInt tmpl_USHR
5149 = X_3_6_7_6_5_5(X011, X011110, 0, X000001, vN, vD);
5150 const UInt tmpl_SSHR
5151 = X_3_6_7_6_5_5(X010, X011110, 0, X000001, vN, vD);
5153 const UInt tmpl_UQSHRN
5154 = X_3_6_7_6_5_5(X001, X011110, 0, X100101, vN, vD);
5155 const UInt tmpl_SQSHRN
5156 = X_3_6_7_6_5_5(X000, X011110, 0, X100101, vN, vD);
5157 const UInt tmpl_SQSHRUN
5158 = X_3_6_7_6_5_5(X001, X011110, 0, X100001, vN, vD);
5160 const UInt tmpl_UQRSHRN
5161 = X_3_6_7_6_5_5(X001, X011110, 0, X100111, vN, vD);
5162 const UInt tmpl_SQRSHRN
5163 = X_3_6_7_6_5_5(X000, X011110, 0, X100111, vN, vD);
5164 const UInt tmpl_SQRSHRUN
5165 = X_3_6_7_6_5_5(X001, X011110, 0, X100011, vN, vD);
5167 const UInt tmpl_SHL
5168 = X_3_6_7_6_5_5(X010, X011110, 0, X010101, vN, vD);
5170 const UInt tmpl_UQSHL
5171 = X_3_6_7_6_5_5(X011, X011110, 0, X011101, vN, vD);
5172 const UInt tmpl_SQSHL
5173 = X_3_6_7_6_5_5(X010, X011110, 0, X011101, vN, vD);
5174 const UInt tmpl_SQSHLU
5175 = X_3_6_7_6_5_5(X011, X011110, 0, X011001, vN, vD);
5177 switch (i->ARM64in.VShiftImmV.op) {
5178 case ARM64vecshi_SSHR64x2: tmpl = tmpl_SSHR; goto right64x2;
5179 case ARM64vecshi_USHR64x2: tmpl = tmpl_USHR; goto right64x2;
5180 case ARM64vecshi_SHL64x2: tmpl = tmpl_SHL; goto left64x2;
5181 case ARM64vecshi_UQSHL64x2: tmpl = tmpl_UQSHL; goto left64x2;
5182 case ARM64vecshi_SQSHL64x2: tmpl = tmpl_SQSHL; goto left64x2;
5183 case ARM64vecshi_SQSHLU64x2: tmpl = tmpl_SQSHLU; goto left64x2;
5184 case ARM64vecshi_SSHR32x4: tmpl = tmpl_SSHR; goto right32x4;
5185 case ARM64vecshi_USHR32x4: tmpl = tmpl_USHR; goto right32x4;
5186 case ARM64vecshi_UQSHRN2SD: tmpl = tmpl_UQSHRN; goto right32x4;
5187 case ARM64vecshi_SQSHRN2SD: tmpl = tmpl_SQSHRN; goto right32x4;
5188 case ARM64vecshi_SQSHRUN2SD: tmpl = tmpl_SQSHRUN; goto right32x4;
5189 case ARM64vecshi_UQRSHRN2SD: tmpl = tmpl_UQRSHRN; goto right32x4;
5190 case ARM64vecshi_SQRSHRN2SD: tmpl = tmpl_SQRSHRN; goto right32x4;
5191 case ARM64vecshi_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
5192 case ARM64vecshi_SHL32x4: tmpl = tmpl_SHL; goto left32x4;
5193 case ARM64vecshi_UQSHL32x4: tmpl = tmpl_UQSHL; goto left32x4;
5194 case ARM64vecshi_SQSHL32x4: tmpl = tmpl_SQSHL; goto left32x4;
5195 case ARM64vecshi_SQSHLU32x4: tmpl = tmpl_SQSHLU; goto left32x4;
5196 case ARM64vecshi_SSHR16x8: tmpl = tmpl_SSHR; goto right16x8;
5197 case ARM64vecshi_USHR16x8: tmpl = tmpl_USHR; goto right16x8;
5198 case ARM64vecshi_UQSHRN4HS: tmpl = tmpl_UQSHRN; goto right16x8;
5199 case ARM64vecshi_SQSHRN4HS: tmpl = tmpl_SQSHRN; goto right16x8;
5200 case ARM64vecshi_SQSHRUN4HS: tmpl = tmpl_SQSHRUN; goto right16x8;
5201 case ARM64vecshi_UQRSHRN4HS: tmpl = tmpl_UQRSHRN; goto right16x8;
5202 case ARM64vecshi_SQRSHRN4HS: tmpl = tmpl_SQRSHRN; goto right16x8;
5203 case ARM64vecshi_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
5204 case ARM64vecshi_SHL16x8: tmpl = tmpl_SHL; goto left16x8;
5205 case ARM64vecshi_UQSHL16x8: tmpl = tmpl_UQSHL; goto left16x8;
5206 case ARM64vecshi_SQSHL16x8: tmpl = tmpl_SQSHL; goto left16x8;
5207 case ARM64vecshi_SQSHLU16x8: tmpl = tmpl_SQSHLU; goto left16x8;
5208 case ARM64vecshi_SSHR8x16: tmpl = tmpl_SSHR; goto right8x16;
5209 case ARM64vecshi_USHR8x16: tmpl = tmpl_USHR; goto right8x16;
5210 case ARM64vecshi_UQSHRN8BH: tmpl = tmpl_UQSHRN; goto right8x16;
5211 case ARM64vecshi_SQSHRN8BH: tmpl = tmpl_SQSHRN; goto right8x16;
5212 case ARM64vecshi_SQSHRUN8BH: tmpl = tmpl_SQSHRUN; goto right8x16;
5213 case ARM64vecshi_UQRSHRN8BH: tmpl = tmpl_UQRSHRN; goto right8x16;
5214 case ARM64vecshi_SQRSHRN8BH: tmpl = tmpl_SQRSHRN; goto right8x16;
5215 case ARM64vecshi_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
5216 case ARM64vecshi_SHL8x16: tmpl = tmpl_SHL; goto left8x16;
5217 case ARM64vecshi_UQSHL8x16: tmpl = tmpl_UQSHL; goto left8x16;
5218 case ARM64vecshi_SQSHL8x16: tmpl = tmpl_SQSHL; goto left8x16;
5219 case ARM64vecshi_SQSHLU8x16: tmpl = tmpl_SQSHLU; goto left8x16;
5221 default: break;
5223 right64x2:
5224 if (sh >= 1 && sh <= 63) {
5225 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | (64-sh), 0,0,0);
5226 goto done;
5228 break;
5229 right32x4:
5230 if (sh >= 1 && sh <= 32) {
5231 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | (32-sh), 0,0,0);
5232 goto done;
5234 break;
5235 right16x8:
5236 if (sh >= 1 && sh <= 16) {
5237 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | (16-sh), 0,0,0);
5238 goto done;
5240 break;
5241 right8x16:
5242 if (sh >= 1 && sh <= 8) {
5243 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | (8-sh), 0,0,0);
5244 goto done;
5246 break;
5248 left64x2:
5249 if (sh >= 0 && sh <= 63) {
5250 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | sh, 0,0,0);
5251 goto done;
5253 break;
5254 left32x4:
5255 if (sh >= 0 && sh <= 31) {
5256 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | sh, 0,0,0);
5257 goto done;
5259 break;
5260 left16x8:
5261 if (sh >= 0 && sh <= 15) {
5262 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | sh, 0,0,0);
5263 goto done;
5265 break;
5266 left8x16:
5267 if (sh >= 0 && sh <= 7) {
5268 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | sh, 0,0,0);
5269 goto done;
5271 break;
5273 goto bad;
5275 case ARM64in_VExtV: {
5277 011 01110 000 m 0 imm4 0 n d EXT Vd.16b, Vn.16b, Vm.16b, #imm4
5278 where imm4 = the shift amount, in bytes,
5279 Vn is low operand, Vm is high operand
5281 UInt vD = qregEnc(i->ARM64in.VExtV.dst);
5282 UInt vN = qregEnc(i->ARM64in.VExtV.srcLo);
5283 UInt vM = qregEnc(i->ARM64in.VExtV.srcHi);
5284 UInt imm4 = i->ARM64in.VExtV.amtB;
5285 vassert(imm4 >= 1 && imm4 <= 15);
5286 *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
5287 X000000 | (imm4 << 1), vN, vD);
5288 goto done;
5290 case ARM64in_VImmQ: {
5291 UInt rQ = qregEnc(i->ARM64in.VImmQ.rQ);
5292 UShort imm = i->ARM64in.VImmQ.imm;
5293 vassert(rQ < 32);
5294 switch (imm) {
5295 case 0x0000:
5296 // movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ
5297 *p++ = 0x4F000400 | rQ;
5298 goto done;
5299 case 0x0001:
5300 // movi rQ, #0xFF == 0x2F 0x00 0xE4 001 rQ
5301 *p++ = 0x2F00E420 | rQ;
5302 goto done;
5303 case 0x0003:
5304 // movi rQ, #0xFFFF == 0x2F 0x00 0xE4 011 rQ
5305 *p++ = 0x2F00E460 | rQ;
5306 goto done;
5307 case 0x000F:
5308 // movi rQ, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rQ
5309 *p++ = 0x2F00E5E0 | rQ;
5310 goto done;
5311 case 0x003F:
5312 // movi rQ, #0xFFFFFFFFFFFF == 0x2F 0x01 0xE7 111 rQ
5313 *p++ = 0x2F01E7E0 | rQ;
5314 goto done;
5315 case 0x00FF:
5316 // movi rQ, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rQ
5317 *p++ = 0x2F07E7E0 | rQ;
5318 goto done;
5319 case 0xFFFF:
5320 // mvni rQ.4s, #0x0 == 0x6F 0x00 0x04 000 rQ
5321 *p++ = 0x6F000400 | rQ;
5322 goto done;
5323 default:
5324 break;
5326 goto bad; /* no other handled cases right now */
5329 case ARM64in_VDfromX: {
5330 /* INS Vd.D[0], rX
5331 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5332 This isn't wonderful, in the sense that the upper half of
5333 the vector register stays unchanged and thus the insn is
5334 data dependent on its output register. */
5335 UInt dd = dregEnc(i->ARM64in.VDfromX.rD);
5336 UInt xx = iregEnc(i->ARM64in.VDfromX.rX);
5337 vassert(xx < 31);
5338 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5339 goto done;
5342 case ARM64in_VQfromX: {
5343 /* FMOV D, X
5344 1001 1110 0110 0111 0000 00 nn dd FMOV Vd.D[0], Xn
5345 I think this zeroes out the top half of the destination, which
5346 is what we need. TODO: can we do VDfromX and VQfromXX better? */
5347 UInt dd = qregEnc(i->ARM64in.VQfromX.rQ);
5348 UInt xx = iregEnc(i->ARM64in.VQfromX.rXlo);
5349 vassert(xx < 31);
5350 *p++ = 0x9E670000 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5351 goto done;
5354 case ARM64in_VQfromXX: {
5355 /* What we really generate is a two insn sequence:
5356 INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
5357 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5358 0100 1110 0001 1000 0001 11 nn dd INS Vd.D[1], Xn
5360 UInt qq = qregEnc(i->ARM64in.VQfromXX.rQ);
5361 UInt xhi = iregEnc(i->ARM64in.VQfromXX.rXhi);
5362 UInt xlo = iregEnc(i->ARM64in.VQfromXX.rXlo);
5363 vassert(xhi < 31 && xlo < 31);
5364 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
5365 *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
5366 goto done;
5369 case ARM64in_VXfromQ: {
5370 /* 010 0111 0000 01000 001111 nn dd UMOV Xd, Vn.D[0]
5371 010 0111 0000 11000 001111 nn dd UMOV Xd, Vn.D[1]
5373 UInt dd = iregEnc(i->ARM64in.VXfromQ.rX);
5374 UInt nn = qregEnc(i->ARM64in.VXfromQ.rQ);
5375 UInt laneNo = i->ARM64in.VXfromQ.laneNo;
5376 vassert(dd < 31);
5377 vassert(laneNo < 2);
5378 *p++ = X_3_8_5_6_5_5(X010, X01110000,
5379 laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
5380 goto done;
5383 case ARM64in_VXfromDorS: {
5384 /* 000 11110001 00110 000000 n d FMOV Wd, Sn
5385 100 11110011 00110 000000 n d FMOV Xd, Dn
5387 UInt dd = iregEnc(i->ARM64in.VXfromDorS.rX);
5388 UInt nn = dregEnc(i->ARM64in.VXfromDorS.rDorS);
5389 Bool fromD = i->ARM64in.VXfromDorS.fromD;
5390 vassert(dd < 31);
5391 *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000,
5392 fromD ? X11110011 : X11110001,
5393 X00110, X000000, nn, dd);
5394 goto done;
5397 case ARM64in_VMov: {
5398 /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
5399 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
5400 010 01110 10 1 n 0 00111 n d MOV Vd.16b, Vn.16b
5402 HReg rD = i->ARM64in.VMov.dst;
5403 HReg rN = i->ARM64in.VMov.src;
5404 switch (i->ARM64in.VMov.szB) {
5405 case 16: {
5406 UInt dd = qregEnc(rD);
5407 UInt nn = qregEnc(rN);
5408 *p++ = X_3_8_5_6_5_5(X010, X01110101, nn, X000111, nn, dd);
5409 goto done;
5411 case 8: {
5412 UInt dd = dregEnc(rD);
5413 UInt nn = dregEnc(rN);
5414 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
5415 goto done;
5417 default:
5418 break;
5420 goto bad;
5423 case ARM64in_EvCheck: {
5424 /* The sequence is fixed (canned) except for the two amodes
5425 supplied by the insn. These don't change the length, though.
5426 We generate:
5427 ldr w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5428 subs w9, w9, #1
5429 str w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5430 bpl nofail
5431 ldr x9, [x21 + #0] 0 == offsetof(host_EvC_FAILADDR)
5432 br x9
5433 nofail:
5435 UInt* p0 = p;
5436 p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
5437 i->ARM64in.EvCheck.amCounter);
5438 *p++ = 0x71000529; /* subs w9, w9, #1 */
5439 p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
5440 i->ARM64in.EvCheck.amCounter);
5441 *p++ = 0x54000065; /* bpl nofail */
5442 p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
5443 i->ARM64in.EvCheck.amFailAddr);
5444 *p++ = 0xD61F0120; /* br x9 */
5445 /* nofail: */
5447 /* Crosscheck */
5448 vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0);
5449 goto done;
5452 case ARM64in_ProfInc: {
5453 /* We generate:
5454 (ctrP is unknown now, so use 0x6555'7555'8555'9566 in the
5455 expectation that a later call to LibVEX_patchProfCtr
5456 will be used to fill in the immediate fields once the
5457 right value is known.)
5458 imm64-exactly4 x9, 0x6555'7555'8555'9566
5459 ldr x8, [x9]
5460 add x8, x8, #1
5461 str x8, [x9]
5463 p = imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL);
5464 *p++ = 0xF9400128;
5465 *p++ = 0x91000508;
5466 *p++ = 0xF9000128;
5467 /* Tell the caller .. */
5468 vassert(!(*is_profInc));
5469 *is_profInc = True;
5470 goto done;
5473 /* ... */
5474 default:
5475 goto bad;
5478 bad:
5479 ppARM64Instr(i);
5480 vpanic("emit_ARM64Instr");
5481 /*NOTREACHED*/
5483 done:
5484 vassert(((UChar*)p) - &buf[0] <= 40);
5485 return ((UChar*)p) - &buf[0];
5489 /* How big is an event check? See case for ARM64in_EvCheck in
5490 emit_ARM64Instr just above. That crosschecks what this returns, so
5491 we can tell if we're inconsistent. */
5492 Int evCheckSzB_ARM64 (void)
5494 return 24;
5498 /* NB: what goes on here has to be very closely coordinated with the
5499 emitInstr case for XDirect, above. */
5500 VexInvalRange chainXDirect_ARM64 ( VexEndness endness_host,
5501 void* place_to_chain,
5502 const void* disp_cp_chain_me_EXPECTED,
5503 const void* place_to_jump_to )
5505 vassert(endness_host == VexEndnessLE);
5507 /* What we're expecting to see is:
5508 movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
5509 movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
5510 movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
5511 movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
5512 blr x9
5514 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5515 D6 3F 01 20
5517 UInt* p = (UInt*)place_to_chain;
5518 vassert(0 == (3 & (HWord)p));
5519 vassert(is_imm64_to_ireg_EXACTLY4(
5520 p, /*x*/9, (Addr)disp_cp_chain_me_EXPECTED));
5521 vassert(p[4] == 0xD63F0120);
5523 /* And what we want to change it to is:
5524 movw x9, place_to_jump_to[15:0]
5525 movk x9, place_to_jump_to[31:15], lsl 16
5526 movk x9, place_to_jump_to[47:32], lsl 32
5527 movk x9, place_to_jump_to[63:48], lsl 48
5528 br x9
5530 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5531 D6 1F 01 20
5533 The replacement has the same length as the original.
5535 (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)place_to_jump_to);
5536 p[4] = 0xD61F0120;
5538 VexInvalRange vir = {(HWord)p, 20};
5539 return vir;
5543 /* NB: what goes on here has to be very closely coordinated with the
5544 emitInstr case for XDirect, above. */
5545 VexInvalRange unchainXDirect_ARM64 ( VexEndness endness_host,
5546 void* place_to_unchain,
5547 const void* place_to_jump_to_EXPECTED,
5548 const void* disp_cp_chain_me )
5550 vassert(endness_host == VexEndnessLE);
5552 /* What we're expecting to see is:
5553 movw x9, place_to_jump_to_EXPECTED[15:0]
5554 movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
5555 movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
5556 movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
5557 br x9
5559 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5560 D6 1F 01 20
5562 UInt* p = (UInt*)place_to_unchain;
5563 vassert(0 == (3 & (HWord)p));
5564 vassert(is_imm64_to_ireg_EXACTLY4(
5565 p, /*x*/9, (Addr)place_to_jump_to_EXPECTED));
5566 vassert(p[4] == 0xD61F0120);
5568 /* And what we want to change it to is:
5569 movw x9, disp_cp_chain_me_to[15:0]
5570 movk x9, disp_cp_chain_me_to[31:15], lsl 16
5571 movk x9, disp_cp_chain_me_to[47:32], lsl 32
5572 movk x9, disp_cp_chain_me_to[63:48], lsl 48
5573 blr x9
5575 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5576 D6 3F 01 20
5578 (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
5579 p[4] = 0xD63F0120;
5581 VexInvalRange vir = {(HWord)p, 20};
5582 return vir;
5586 /* Patch the counter address into a profile inc point, as previously
5587 created by the ARM64in_ProfInc case for emit_ARM64Instr. */
5588 VexInvalRange patchProfInc_ARM64 ( VexEndness endness_host,
5589 void* place_to_patch,
5590 const ULong* location_of_counter )
5592 vassert(sizeof(ULong*) == 8);
5593 vassert(endness_host == VexEndnessLE);
5594 UInt* p = (UInt*)place_to_patch;
5595 vassert(0 == (3 & (HWord)p));
5596 vassert(is_imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL));
5597 vassert(p[4] == 0xF9400128);
5598 vassert(p[5] == 0x91000508);
5599 vassert(p[6] == 0xF9000128);
5600 imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)location_of_counter);
5601 VexInvalRange vir = {(HWord)p, 4*4};
5602 return vir;
5605 /*---------------------------------------------------------------*/
5606 /*--- end host_arm64_defs.c ---*/
5607 /*---------------------------------------------------------------*/