Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / priv / host_arm64_defs.c
blobdc5d198e0dc3f54dc6d93d7759779c7958c25b26
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_defs.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2013-2017 OpenWorks
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "libvex_basictypes.h"
30 #include "libvex.h"
31 #include "libvex_trc_values.h"
33 #include "main_util.h"
34 #include "host_generic_regs.h"
35 #include "host_arm64_defs.h"
38 /* --------- Registers. --------- */
40 /* The usual HReg abstraction. We use the following classes only:
41 X regs (64 bit int)
42 D regs (64 bit float, also used for 32 bit float)
43 Q regs (128 bit vector)
46 const RRegUniverse* getRRegUniverse_ARM64 ( void )
48 /* The real-register universe is a big constant, so we just want to
49 initialise it once. */
50 static RRegUniverse rRegUniverse_ARM64;
51 static Bool rRegUniverse_ARM64_initted = False;
53 /* Handy shorthand, nothing more */
54 RRegUniverse* ru = &rRegUniverse_ARM64;
56 /* This isn't thread-safe. Sigh. */
57 if (LIKELY(rRegUniverse_ARM64_initted))
58 return ru;
60 RRegUniverse__init(ru);
62 /* Add the registers. The initial segment of this array must be
63 those available for allocation by reg-alloc, and those that
64 follow are not available for allocation. */
65 ru->allocable_start[HRcInt64] = ru->size;
66 ru->regs[ru->size++] = hregARM64_X22();
67 ru->regs[ru->size++] = hregARM64_X23();
68 ru->regs[ru->size++] = hregARM64_X24();
69 ru->regs[ru->size++] = hregARM64_X25();
70 ru->regs[ru->size++] = hregARM64_X26();
71 ru->regs[ru->size++] = hregARM64_X27();
72 ru->regs[ru->size++] = hregARM64_X28();
74 ru->regs[ru->size++] = hregARM64_X0();
75 ru->regs[ru->size++] = hregARM64_X1();
76 ru->regs[ru->size++] = hregARM64_X2();
77 ru->regs[ru->size++] = hregARM64_X3();
78 ru->regs[ru->size++] = hregARM64_X4();
79 ru->regs[ru->size++] = hregARM64_X5();
80 ru->regs[ru->size++] = hregARM64_X6();
81 ru->regs[ru->size++] = hregARM64_X7();
82 ru->allocable_end[HRcInt64] = ru->size - 1;
83 // X8 is used as a ProfInc temporary, not available to regalloc.
84 // X9 is a chaining/spill temporary, not available to regalloc.
86 // Do we really need all these?
87 //ru->regs[ru->size++] = hregARM64_X10();
88 //ru->regs[ru->size++] = hregARM64_X11();
89 //ru->regs[ru->size++] = hregARM64_X12();
90 //ru->regs[ru->size++] = hregARM64_X13();
91 //ru->regs[ru->size++] = hregARM64_X14();
92 //ru->regs[ru->size++] = hregARM64_X15();
93 // X21 is the guest state pointer, not available to regalloc.
95 // vector regs. Unfortunately not callee-saved.
96 ru->allocable_start[HRcVec128] = ru->size;
97 ru->regs[ru->size++] = hregARM64_Q16();
98 ru->regs[ru->size++] = hregARM64_Q17();
99 ru->regs[ru->size++] = hregARM64_Q18();
100 ru->regs[ru->size++] = hregARM64_Q19();
101 ru->regs[ru->size++] = hregARM64_Q20();
102 ru->allocable_end[HRcVec128] = ru->size - 1;
104 // F64 regs, all of which are callee-saved
105 ru->allocable_start[HRcFlt64] = ru->size;
106 ru->regs[ru->size++] = hregARM64_D8();
107 ru->regs[ru->size++] = hregARM64_D9();
108 ru->regs[ru->size++] = hregARM64_D10();
109 ru->regs[ru->size++] = hregARM64_D11();
110 ru->regs[ru->size++] = hregARM64_D12();
111 ru->regs[ru->size++] = hregARM64_D13();
112 ru->allocable_end[HRcFlt64] = ru->size - 1;
114 ru->allocable = ru->size;
115 /* And other regs, not available to the allocator. */
117 // unavail: x21 as GSP
118 // x8 is used as a ProfInc temporary
119 // x9 is used as a spill/reload/chaining/call temporary
120 // x30 as LR
122 // x31 is mentionable, but not allocatable, and is dangerous to use
123 // because of SP-vs-ZR overloading. Here, we call it `XZR_XSP`. Whether
124 // it denotes the zero register or the stack pointer depends both on what
125 // kind of instruction it appears in and even on the position within an
126 // instruction that it appears. So be careful. There's absolutely
127 // nothing to prevent shooting oneself in the foot.
129 // Currently, we have 15 allocatable integer registers:
130 // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
132 // Hence for the allocatable integer registers we have:
134 // callee-saved: 22 23 24 25 26 27 28
135 // caller-saved: 0 1 2 3 4 5 6 7
137 // If the set of available registers changes or if the e/r status
138 // changes, be sure to re-check/sync the definition of
139 // getRegUsage for ARM64Instr_Call too.
141 ru->regs[ru->size++] = hregARM64_X8();
142 ru->regs[ru->size++] = hregARM64_X9();
143 ru->regs[ru->size++] = hregARM64_X21();
144 ru->regs[ru->size++] = hregARM64_XZR_XSP();
146 rRegUniverse_ARM64_initted = True;
148 RRegUniverse__check_is_sane(ru);
149 return ru;
153 UInt ppHRegARM64 ( HReg reg ) {
154 Int r;
155 /* Be generic for all virtual regs. */
156 if (hregIsVirtual(reg)) {
157 return ppHReg(reg);
159 /* But specific for real regs. */
160 switch (hregClass(reg)) {
161 case HRcInt64:
162 r = hregEncoding(reg);
163 vassert(r >= 0 && r <= 31);
164 return r ==31 ? vex_printf("xzr/xsp") : vex_printf("x%d", r);
165 case HRcFlt64:
166 r = hregEncoding(reg);
167 vassert(r >= 0 && r < 32);
168 return vex_printf("d%d", r);
169 case HRcVec128:
170 r = hregEncoding(reg);
171 vassert(r >= 0 && r < 32);
172 return vex_printf("q%d", r);
173 default:
174 vpanic("ppHRegARM64");
178 static UInt ppHRegARM64asSreg ( HReg reg ) {
179 UInt written = ppHRegARM64(reg);
180 written += vex_printf("(S-reg)");
181 return written;
184 static UInt ppHRegARM64asHreg ( HReg reg ) {
185 UInt written = ppHRegARM64(reg);
186 written += vex_printf("(H-reg)");
187 return written;
191 /* --------- Condition codes, ARM64 encoding. --------- */
193 static const HChar* showARM64CondCode ( ARM64CondCode cond ) {
194 switch (cond) {
195 case ARM64cc_EQ: return "eq";
196 case ARM64cc_NE: return "ne";
197 case ARM64cc_CS: return "cs";
198 case ARM64cc_CC: return "cc";
199 case ARM64cc_MI: return "mi";
200 case ARM64cc_PL: return "pl";
201 case ARM64cc_VS: return "vs";
202 case ARM64cc_VC: return "vc";
203 case ARM64cc_HI: return "hi";
204 case ARM64cc_LS: return "ls";
205 case ARM64cc_GE: return "ge";
206 case ARM64cc_LT: return "lt";
207 case ARM64cc_GT: return "gt";
208 case ARM64cc_LE: return "le";
209 case ARM64cc_AL: return "al"; // default
210 case ARM64cc_NV: return "nv";
211 default: vpanic("showARM64CondCode");
216 /* --------- Memory address expressions (amodes). --------- */
218 ARM64AMode* ARM64AMode_RI9 ( HReg reg, Int simm9 ) {
219 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
220 am->tag = ARM64am_RI9;
221 am->ARM64am.RI9.reg = reg;
222 am->ARM64am.RI9.simm9 = simm9;
223 vassert(-256 <= simm9 && simm9 <= 255);
224 return am;
227 ARM64AMode* ARM64AMode_RI12 ( HReg reg, Int uimm12, UChar szB ) {
228 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
229 am->tag = ARM64am_RI12;
230 am->ARM64am.RI12.reg = reg;
231 am->ARM64am.RI12.uimm12 = uimm12;
232 am->ARM64am.RI12.szB = szB;
233 vassert(uimm12 >= 0 && uimm12 <= 4095);
234 switch (szB) {
235 case 1: case 2: case 4: case 8: break;
236 default: vassert(0);
238 return am;
241 ARM64AMode* ARM64AMode_RR ( HReg base, HReg index ) {
242 ARM64AMode* am = LibVEX_Alloc_inline(sizeof(ARM64AMode));
243 am->tag = ARM64am_RR;
244 am->ARM64am.RR.base = base;
245 am->ARM64am.RR.index = index;
246 return am;
249 static void ppARM64AMode ( ARM64AMode* am ) {
250 switch (am->tag) {
251 case ARM64am_RI9:
252 vex_printf("%d(", am->ARM64am.RI9.simm9);
253 ppHRegARM64(am->ARM64am.RI9.reg);
254 vex_printf(")");
255 break;
256 case ARM64am_RI12:
257 vex_printf("%u(", (UInt)am->ARM64am.RI12.szB
258 * (UInt)am->ARM64am.RI12.uimm12);
259 ppHRegARM64(am->ARM64am.RI12.reg);
260 vex_printf(")");
261 break;
262 case ARM64am_RR:
263 vex_printf("(");
264 ppHRegARM64(am->ARM64am.RR.base);
265 vex_printf(",");
266 ppHRegARM64(am->ARM64am.RR.index);
267 vex_printf(")");
268 break;
269 default:
270 vassert(0);
274 static void addRegUsage_ARM64AMode ( HRegUsage* u, ARM64AMode* am ) {
275 switch (am->tag) {
276 case ARM64am_RI9:
277 addHRegUse(u, HRmRead, am->ARM64am.RI9.reg);
278 return;
279 case ARM64am_RI12:
280 addHRegUse(u, HRmRead, am->ARM64am.RI12.reg);
281 return;
282 case ARM64am_RR:
283 addHRegUse(u, HRmRead, am->ARM64am.RR.base);
284 addHRegUse(u, HRmRead, am->ARM64am.RR.index);
285 return;
286 default:
287 vpanic("addRegUsage_ARM64Amode");
291 static void mapRegs_ARM64AMode ( HRegRemap* m, ARM64AMode* am ) {
292 switch (am->tag) {
293 case ARM64am_RI9:
294 am->ARM64am.RI9.reg = lookupHRegRemap(m, am->ARM64am.RI9.reg);
295 return;
296 case ARM64am_RI12:
297 am->ARM64am.RI12.reg = lookupHRegRemap(m, am->ARM64am.RI12.reg);
298 return;
299 case ARM64am_RR:
300 am->ARM64am.RR.base = lookupHRegRemap(m, am->ARM64am.RR.base);
301 am->ARM64am.RR.index = lookupHRegRemap(m, am->ARM64am.RR.index);
302 return;
303 default:
304 vpanic("mapRegs_ARM64Amode");
309 /* --------- Reg or uimm12<<{0,12} operands --------- */
311 ARM64RIA* ARM64RIA_I12 ( UShort imm12, UChar shift ) {
312 ARM64RIA* riA = LibVEX_Alloc_inline(sizeof(ARM64RIA));
313 riA->tag = ARM64riA_I12;
314 riA->ARM64riA.I12.imm12 = imm12;
315 riA->ARM64riA.I12.shift = shift;
316 vassert(imm12 < 4096);
317 vassert(shift == 0 || shift == 12);
318 return riA;
320 ARM64RIA* ARM64RIA_R ( HReg reg ) {
321 ARM64RIA* riA = LibVEX_Alloc_inline(sizeof(ARM64RIA));
322 riA->tag = ARM64riA_R;
323 riA->ARM64riA.R.reg = reg;
324 return riA;
327 static void ppARM64RIA ( ARM64RIA* riA ) {
328 switch (riA->tag) {
329 case ARM64riA_I12:
330 vex_printf("#%u",(UInt)(riA->ARM64riA.I12.imm12
331 << riA->ARM64riA.I12.shift));
332 break;
333 case ARM64riA_R:
334 ppHRegARM64(riA->ARM64riA.R.reg);
335 break;
336 default:
337 vassert(0);
341 static void addRegUsage_ARM64RIA ( HRegUsage* u, ARM64RIA* riA ) {
342 switch (riA->tag) {
343 case ARM64riA_I12:
344 return;
345 case ARM64riA_R:
346 addHRegUse(u, HRmRead, riA->ARM64riA.R.reg);
347 return;
348 default:
349 vpanic("addRegUsage_ARM64RIA");
353 static void mapRegs_ARM64RIA ( HRegRemap* m, ARM64RIA* riA ) {
354 switch (riA->tag) {
355 case ARM64riA_I12:
356 return;
357 case ARM64riA_R:
358 riA->ARM64riA.R.reg = lookupHRegRemap(m, riA->ARM64riA.R.reg);
359 return;
360 default:
361 vpanic("mapRegs_ARM64RIA");
366 /* --------- Reg or "bitfield" (logic immediate) operands --------- */
368 ARM64RIL* ARM64RIL_I13 ( UChar bitN, UChar immR, UChar immS ) {
369 ARM64RIL* riL = LibVEX_Alloc_inline(sizeof(ARM64RIL));
370 riL->tag = ARM64riL_I13;
371 riL->ARM64riL.I13.bitN = bitN;
372 riL->ARM64riL.I13.immR = immR;
373 riL->ARM64riL.I13.immS = immS;
374 vassert(bitN < 2);
375 vassert(immR < 64);
376 vassert(immS < 64);
377 return riL;
379 ARM64RIL* ARM64RIL_R ( HReg reg ) {
380 ARM64RIL* riL = LibVEX_Alloc_inline(sizeof(ARM64RIL));
381 riL->tag = ARM64riL_R;
382 riL->ARM64riL.R.reg = reg;
383 return riL;
386 static void ppARM64RIL ( ARM64RIL* riL ) {
387 switch (riL->tag) {
388 case ARM64riL_I13:
389 vex_printf("#nrs(%u,%u,%u)",
390 (UInt)riL->ARM64riL.I13.bitN,
391 (UInt)riL->ARM64riL.I13.immR,
392 (UInt)riL->ARM64riL.I13.immS);
393 break;
394 case ARM64riL_R:
395 ppHRegARM64(riL->ARM64riL.R.reg);
396 break;
397 default:
398 vassert(0);
402 static void addRegUsage_ARM64RIL ( HRegUsage* u, ARM64RIL* riL ) {
403 switch (riL->tag) {
404 case ARM64riL_I13:
405 return;
406 case ARM64riL_R:
407 addHRegUse(u, HRmRead, riL->ARM64riL.R.reg);
408 return;
409 default:
410 vpanic("addRegUsage_ARM64RIL");
414 static void mapRegs_ARM64RIL ( HRegRemap* m, ARM64RIL* riL ) {
415 switch (riL->tag) {
416 case ARM64riL_I13:
417 return;
418 case ARM64riL_R:
419 riL->ARM64riL.R.reg = lookupHRegRemap(m, riL->ARM64riL.R.reg);
420 return;
421 default:
422 vpanic("mapRegs_ARM64RIL");
427 /* --------------- Reg or uimm6 operands --------------- */
429 ARM64RI6* ARM64RI6_I6 ( UInt imm6 ) {
430 ARM64RI6* ri6 = LibVEX_Alloc_inline(sizeof(ARM64RI6));
431 ri6->tag = ARM64ri6_I6;
432 ri6->ARM64ri6.I6.imm6 = imm6;
433 vassert(imm6 > 0 && imm6 < 64);
434 return ri6;
436 ARM64RI6* ARM64RI6_R ( HReg reg ) {
437 ARM64RI6* ri6 = LibVEX_Alloc_inline(sizeof(ARM64RI6));
438 ri6->tag = ARM64ri6_R;
439 ri6->ARM64ri6.R.reg = reg;
440 return ri6;
443 static void ppARM64RI6 ( ARM64RI6* ri6 ) {
444 switch (ri6->tag) {
445 case ARM64ri6_I6:
446 vex_printf("#%u", ri6->ARM64ri6.I6.imm6);
447 break;
448 case ARM64ri6_R:
449 ppHRegARM64(ri6->ARM64ri6.R.reg);
450 break;
451 default:
452 vassert(0);
456 static void addRegUsage_ARM64RI6 ( HRegUsage* u, ARM64RI6* ri6 ) {
457 switch (ri6->tag) {
458 case ARM64ri6_I6:
459 return;
460 case ARM64ri6_R:
461 addHRegUse(u, HRmRead, ri6->ARM64ri6.R.reg);
462 return;
463 default:
464 vpanic("addRegUsage_ARM64RI6");
468 static void mapRegs_ARM64RI6 ( HRegRemap* m, ARM64RI6* ri6 ) {
469 switch (ri6->tag) {
470 case ARM64ri6_I6:
471 return;
472 case ARM64ri6_R:
473 ri6->ARM64ri6.R.reg = lookupHRegRemap(m, ri6->ARM64ri6.R.reg);
474 return;
475 default:
476 vpanic("mapRegs_ARM64RI6");
481 /* --------- Instructions. --------- */
483 static const HChar* showARM64LogicOp ( ARM64LogicOp op ) {
484 switch (op) {
485 case ARM64lo_AND: return "and";
486 case ARM64lo_OR: return "orr";
487 case ARM64lo_XOR: return "eor";
488 default: vpanic("showARM64LogicOp");
492 static const HChar* showARM64ShiftOp ( ARM64ShiftOp op ) {
493 switch (op) {
494 case ARM64sh_SHL: return "lsl";
495 case ARM64sh_SHR: return "lsr";
496 case ARM64sh_SAR: return "asr";
497 default: vpanic("showARM64ShiftOp");
501 static const HChar* showARM64RRSOp ( ARM64RRSOp op ) {
502 switch (op) {
503 case ARM64rrs_ADD: return "add";
504 case ARM64rrs_SUB: return "sub";
505 case ARM64rrs_AND: return "and";
506 case ARM64rrs_OR: return "orr";
507 case ARM64rrs_XOR: return "eor";
508 default: vpanic("showARM64RRSOp");
512 static const HChar* showARM64UnaryOp ( ARM64UnaryOp op ) {
513 switch (op) {
514 case ARM64un_NEG: return "neg";
515 case ARM64un_NOT: return "not";
516 case ARM64un_CLZ: return "clz";
517 default: vpanic("showARM64UnaryOp");
521 static const HChar* showARM64MulOp ( ARM64MulOp op ) {
522 switch (op) {
523 case ARM64mul_PLAIN: return "mul ";
524 case ARM64mul_ZX: return "umulh";
525 case ARM64mul_SX: return "smulh";
526 default: vpanic("showARM64MulOp");
530 static void characteriseARM64CvtOp ( /*OUT*/HChar* syn,
531 /*OUT*/UInt* fszB, /*OUT*/UInt* iszB,
532 ARM64CvtOp op ) {
533 switch (op) {
534 case ARM64cvt_F32_I32S:
535 *syn = 's'; *fszB = 4; *iszB = 4; break;
536 case ARM64cvt_F64_I32S:
537 *syn = 's'; *fszB = 8; *iszB = 4; break;
538 case ARM64cvt_F32_I64S:
539 *syn = 's'; *fszB = 4; *iszB = 8; break;
540 case ARM64cvt_F64_I64S:
541 *syn = 's'; *fszB = 8; *iszB = 8; break;
542 case ARM64cvt_F32_I32U:
543 *syn = 'u'; *fszB = 4; *iszB = 4; break;
544 case ARM64cvt_F64_I32U:
545 *syn = 'u'; *fszB = 8; *iszB = 4; break;
546 case ARM64cvt_F32_I64U:
547 *syn = 'u'; *fszB = 4; *iszB = 8; break;
548 case ARM64cvt_F64_I64U:
549 *syn = 'u'; *fszB = 8; *iszB = 8; break;
550 default:
551 vpanic("characteriseARM64CvtOp");
555 static const HChar* showARM64FpBinOp ( ARM64FpBinOp op ) {
556 switch (op) {
557 case ARM64fpb_ADD: return "add";
558 case ARM64fpb_SUB: return "sub";
559 case ARM64fpb_MUL: return "mul";
560 case ARM64fpb_DIV: return "div";
561 default: vpanic("showARM64FpBinOp");
565 static const HChar* showARM64FpTriOp ( ARM64FpTriOp op ) {
566 switch (op) {
567 case ARM64fpt_FMADD: return "fmadd";
568 case ARM64fpt_FMSUB: return "fmsub";
569 default: vpanic("showARM64FpTriOp");
573 static const HChar* showARM64FpUnaryOp ( ARM64FpUnaryOp op ) {
574 switch (op) {
575 case ARM64fpu_NEG: return "neg ";
576 case ARM64fpu_ABS: return "abs ";
577 case ARM64fpu_SQRT: return "sqrt ";
578 case ARM64fpu_RINT: return "rinti";
579 case ARM64fpu_RINTA0: return "rinta";
580 case ARM64fpu_RINTE: return "rintn";
581 case ARM64fpu_RECPX: return "recpx";
582 default: vpanic("showARM64FpUnaryOp");
586 static void showARM64VecBinOp(/*OUT*/const HChar** nm,
587 /*OUT*/const HChar** ar, ARM64VecBinOp op ) {
588 switch (op) {
589 case ARM64vecb_ADD64x2: *nm = "add "; *ar = "2d"; return;
590 case ARM64vecb_ADD32x4: *nm = "add "; *ar = "4s"; return;
591 case ARM64vecb_ADD16x8: *nm = "add "; *ar = "8h"; return;
592 case ARM64vecb_ADD8x16: *nm = "add "; *ar = "16b"; return;
593 case ARM64vecb_SUB64x2: *nm = "sub "; *ar = "2d"; return;
594 case ARM64vecb_SUB32x4: *nm = "sub "; *ar = "4s"; return;
595 case ARM64vecb_SUB16x8: *nm = "sub "; *ar = "8h"; return;
596 case ARM64vecb_SUB8x16: *nm = "sub "; *ar = "16b"; return;
597 case ARM64vecb_MUL32x4: *nm = "mul "; *ar = "4s"; return;
598 case ARM64vecb_MUL16x8: *nm = "mul "; *ar = "8h"; return;
599 case ARM64vecb_MUL8x16: *nm = "mul "; *ar = "16b"; return;
600 case ARM64vecb_FADD64x2: *nm = "fadd "; *ar = "2d"; return;
601 case ARM64vecb_FSUB64x2: *nm = "fsub "; *ar = "2d"; return;
602 case ARM64vecb_FMUL64x2: *nm = "fmul "; *ar = "2d"; return;
603 case ARM64vecb_FDIV64x2: *nm = "fdiv "; *ar = "2d"; return;
604 case ARM64vecb_FADD32x4: *nm = "fadd "; *ar = "4s"; return;
605 case ARM64vecb_FADD16x8: *nm = "fadd "; *ar = "8h"; return;
606 case ARM64vecb_FSUB32x4: *nm = "fsub "; *ar = "4s"; return;
607 case ARM64vecb_FSUB16x8: *nm = "fsub "; *ar = "8h"; return;
608 case ARM64vecb_FMUL32x4: *nm = "fmul "; *ar = "4s"; return;
609 case ARM64vecb_FDIV32x4: *nm = "fdiv "; *ar = "4s"; return;
610 case ARM64vecb_FMAX64x2: *nm = "fmax "; *ar = "2d"; return;
611 case ARM64vecb_FMAX32x4: *nm = "fmax "; *ar = "4s"; return;
612 case ARM64vecb_FMIN64x2: *nm = "fmin "; *ar = "2d"; return;
613 case ARM64vecb_FMIN32x4: *nm = "fmin "; *ar = "4s"; return;
614 case ARM64vecb_UMAX32x4: *nm = "umax "; *ar = "4s"; return;
615 case ARM64vecb_UMAX16x8: *nm = "umax "; *ar = "8h"; return;
616 case ARM64vecb_UMAX8x16: *nm = "umax "; *ar = "16b"; return;
617 case ARM64vecb_UMIN32x4: *nm = "umin "; *ar = "4s"; return;
618 case ARM64vecb_UMIN16x8: *nm = "umin "; *ar = "8h"; return;
619 case ARM64vecb_UMIN8x16: *nm = "umin "; *ar = "16b"; return;
620 case ARM64vecb_SMAX32x4: *nm = "smax "; *ar = "4s"; return;
621 case ARM64vecb_SMAX16x8: *nm = "smax "; *ar = "8h"; return;
622 case ARM64vecb_SMAX8x16: *nm = "smax "; *ar = "16b"; return;
623 case ARM64vecb_SMIN32x4: *nm = "smin "; *ar = "4s"; return;
624 case ARM64vecb_SMIN16x8: *nm = "smin "; *ar = "8h"; return;
625 case ARM64vecb_SMIN8x16: *nm = "smin "; *ar = "16b"; return;
626 case ARM64vecb_AND: *nm = "and "; *ar = "16b"; return;
627 case ARM64vecb_ORR: *nm = "orr "; *ar = "16b"; return;
628 case ARM64vecb_XOR: *nm = "eor "; *ar = "16b"; return;
629 case ARM64vecb_CMEQ64x2: *nm = "cmeq "; *ar = "2d"; return;
630 case ARM64vecb_CMEQ32x4: *nm = "cmeq "; *ar = "4s"; return;
631 case ARM64vecb_CMEQ16x8: *nm = "cmeq "; *ar = "8h"; return;
632 case ARM64vecb_CMEQ8x16: *nm = "cmeq "; *ar = "16b"; return;
633 case ARM64vecb_CMHI64x2: *nm = "cmhi "; *ar = "2d"; return;
634 case ARM64vecb_CMHI32x4: *nm = "cmhi "; *ar = "4s"; return;
635 case ARM64vecb_CMHI16x8: *nm = "cmhi "; *ar = "8h"; return;
636 case ARM64vecb_CMHI8x16: *nm = "cmhi "; *ar = "16b"; return;
637 case ARM64vecb_CMGT64x2: *nm = "cmgt "; *ar = "2d"; return;
638 case ARM64vecb_CMGT32x4: *nm = "cmgt "; *ar = "4s"; return;
639 case ARM64vecb_CMGT16x8: *nm = "cmgt "; *ar = "8h"; return;
640 case ARM64vecb_CMGT8x16: *nm = "cmgt "; *ar = "16b"; return;
641 case ARM64vecb_FCMEQ64x2: *nm = "fcmeq "; *ar = "2d"; return;
642 case ARM64vecb_FCMEQ32x4: *nm = "fcmeq "; *ar = "4s"; return;
643 case ARM64vecb_FCMGE64x2: *nm = "fcmge "; *ar = "2d"; return;
644 case ARM64vecb_FCMGE32x4: *nm = "fcmge "; *ar = "4s"; return;
645 case ARM64vecb_FCMGE16x8: *nm = "fcmge "; *ar = "8h"; return;
646 case ARM64vecb_FCMGT64x2: *nm = "fcmgt "; *ar = "2d"; return;
647 case ARM64vecb_FCMGT32x4: *nm = "fcmgt "; *ar = "4s"; return;
648 case ARM64vecb_FCMGT16x8: *nm = "fcmgt "; *ar = "8h"; return;
649 case ARM64vecb_FCMEQ16x8: *nm = "fcmeq "; *ar = "8h"; return;
650 case ARM64vecb_TBL1: *nm = "tbl "; *ar = "16b"; return;
651 case ARM64vecb_UZP164x2: *nm = "uzp1 "; *ar = "2d"; return;
652 case ARM64vecb_UZP132x4: *nm = "uzp1 "; *ar = "4s"; return;
653 case ARM64vecb_UZP116x8: *nm = "uzp1 "; *ar = "8h"; return;
654 case ARM64vecb_UZP18x16: *nm = "uzp1 "; *ar = "16b"; return;
655 case ARM64vecb_UZP264x2: *nm = "uzp2 "; *ar = "2d"; return;
656 case ARM64vecb_UZP232x4: *nm = "uzp2 "; *ar = "4s"; return;
657 case ARM64vecb_UZP216x8: *nm = "uzp2 "; *ar = "8h"; return;
658 case ARM64vecb_UZP28x16: *nm = "uzp2 "; *ar = "16b"; return;
659 case ARM64vecb_ZIP132x4: *nm = "zip1 "; *ar = "4s"; return;
660 case ARM64vecb_ZIP116x8: *nm = "zip1 "; *ar = "8h"; return;
661 case ARM64vecb_ZIP18x16: *nm = "zip1 "; *ar = "16b"; return;
662 case ARM64vecb_ZIP232x4: *nm = "zip2 "; *ar = "4s"; return;
663 case ARM64vecb_ZIP216x8: *nm = "zip2 "; *ar = "8h"; return;
664 case ARM64vecb_ZIP28x16: *nm = "zip2 "; *ar = "16b"; return;
665 case ARM64vecb_PMUL8x16: *nm = "pmul "; *ar = "16b"; return;
666 case ARM64vecb_PMULL8x8: *nm = "pmull "; *ar = "8hbb"; return;
667 case ARM64vecb_UMULL2DSS: *nm = "umull "; *ar = "2dss"; return;
668 case ARM64vecb_UMULL4SHH: *nm = "umull "; *ar = "4shh"; return;
669 case ARM64vecb_UMULL8HBB: *nm = "umull "; *ar = "8hbb"; return;
670 case ARM64vecb_SMULL2DSS: *nm = "smull "; *ar = "2dss"; return;
671 case ARM64vecb_SMULL4SHH: *nm = "smull "; *ar = "4shh"; return;
672 case ARM64vecb_SMULL8HBB: *nm = "smull "; *ar = "8hbb"; return;
673 case ARM64vecb_SQADD64x2: *nm = "sqadd "; *ar = "2d"; return;
674 case ARM64vecb_SQADD32x4: *nm = "sqadd "; *ar = "4s"; return;
675 case ARM64vecb_SQADD16x8: *nm = "sqadd "; *ar = "8h"; return;
676 case ARM64vecb_SQADD8x16: *nm = "sqadd "; *ar = "16b"; return;
677 case ARM64vecb_UQADD64x2: *nm = "uqadd "; *ar = "2d"; return;
678 case ARM64vecb_UQADD32x4: *nm = "uqadd "; *ar = "4s"; return;
679 case ARM64vecb_UQADD16x8: *nm = "uqadd "; *ar = "8h"; return;
680 case ARM64vecb_UQADD8x16: *nm = "uqadd "; *ar = "16b"; return;
681 case ARM64vecb_SQSUB64x2: *nm = "sqsub "; *ar = "2d"; return;
682 case ARM64vecb_SQSUB32x4: *nm = "sqsub "; *ar = "4s"; return;
683 case ARM64vecb_SQSUB16x8: *nm = "sqsub "; *ar = "8h"; return;
684 case ARM64vecb_SQSUB8x16: *nm = "sqsub "; *ar = "16b"; return;
685 case ARM64vecb_UQSUB64x2: *nm = "uqsub "; *ar = "2d"; return;
686 case ARM64vecb_UQSUB32x4: *nm = "uqsub "; *ar = "4s"; return;
687 case ARM64vecb_UQSUB16x8: *nm = "uqsub "; *ar = "8h"; return;
688 case ARM64vecb_UQSUB8x16: *nm = "uqsub "; *ar = "16b"; return;
689 case ARM64vecb_SQDMULL2DSS: *nm = "sqdmull"; *ar = "2dss"; return;
690 case ARM64vecb_SQDMULL4SHH: *nm = "sqdmull"; *ar = "4shh"; return;
691 case ARM64vecb_SQDMULH32x4: *nm = "sqdmulh"; *ar = "4s"; return;
692 case ARM64vecb_SQDMULH16x8: *nm = "sqdmulh"; *ar = "8h"; return;
693 case ARM64vecb_SQRDMULH32x4: *nm = "sqrdmulh"; *ar = "4s"; return;
694 case ARM64vecb_SQRDMULH16x8: *nm = "sqrdmulh"; *ar = "8h"; return;
695 case ARM64vecb_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
696 case ARM64vecb_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
697 case ARM64vecb_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
698 case ARM64vecb_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
699 case ARM64vecb_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
700 case ARM64vecb_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
701 case ARM64vecb_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
702 case ARM64vecb_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
703 case ARM64vecb_SQRSHL64x2: *nm = "sqrshl"; *ar = "2d"; return;
704 case ARM64vecb_SQRSHL32x4: *nm = "sqrshl"; *ar = "4s"; return;
705 case ARM64vecb_SQRSHL16x8: *nm = "sqrshl"; *ar = "8h"; return;
706 case ARM64vecb_SQRSHL8x16: *nm = "sqrshl"; *ar = "16b"; return;
707 case ARM64vecb_UQRSHL64x2: *nm = "uqrshl"; *ar = "2d"; return;
708 case ARM64vecb_UQRSHL32x4: *nm = "uqrshl"; *ar = "4s"; return;
709 case ARM64vecb_UQRSHL16x8: *nm = "uqrshl"; *ar = "8h"; return;
710 case ARM64vecb_UQRSHL8x16: *nm = "uqrshl"; *ar = "16b"; return;
711 case ARM64vecb_SSHL64x2: *nm = "sshl "; *ar = "2d"; return;
712 case ARM64vecb_SSHL32x4: *nm = "sshl "; *ar = "4s"; return;
713 case ARM64vecb_SSHL16x8: *nm = "sshl "; *ar = "8h"; return;
714 case ARM64vecb_SSHL8x16: *nm = "sshl "; *ar = "16b"; return;
715 case ARM64vecb_USHL64x2: *nm = "ushl "; *ar = "2d"; return;
716 case ARM64vecb_USHL32x4: *nm = "ushl "; *ar = "4s"; return;
717 case ARM64vecb_USHL16x8: *nm = "ushl "; *ar = "8h"; return;
718 case ARM64vecb_USHL8x16: *nm = "ushl "; *ar = "16b"; return;
719 case ARM64vecb_SRSHL64x2: *nm = "srshl "; *ar = "2d"; return;
720 case ARM64vecb_SRSHL32x4: *nm = "srshl "; *ar = "4s"; return;
721 case ARM64vecb_SRSHL16x8: *nm = "srshl "; *ar = "8h"; return;
722 case ARM64vecb_SRSHL8x16: *nm = "srshl "; *ar = "16b"; return;
723 case ARM64vecb_URSHL64x2: *nm = "urshl "; *ar = "2d"; return;
724 case ARM64vecb_URSHL32x4: *nm = "urshl "; *ar = "4s"; return;
725 case ARM64vecb_URSHL16x8: *nm = "urshl "; *ar = "8h"; return;
726 case ARM64vecb_URSHL8x16: *nm = "urshl "; *ar = "16b"; return;
727 case ARM64vecb_FRECPS64x2: *nm = "frecps"; *ar = "2d"; return;
728 case ARM64vecb_FRECPS32x4: *nm = "frecps"; *ar = "4s"; return;
729 case ARM64vecb_FRSQRTS64x2: *nm = "frsqrts"; *ar = "2d"; return;
730 case ARM64vecb_FRSQRTS32x4: *nm = "frsqrts"; *ar = "4s"; return;
731 default: vpanic("showARM64VecBinOp");
735 static void showARM64VecModifyOp(/*OUT*/const HChar** nm,
736 /*OUT*/const HChar** ar,
737 ARM64VecModifyOp op ) {
738 switch (op) {
739 case ARM64vecmo_SUQADD64x2: *nm = "suqadd"; *ar = "2d"; return;
740 case ARM64vecmo_SUQADD32x4: *nm = "suqadd"; *ar = "4s"; return;
741 case ARM64vecmo_SUQADD16x8: *nm = "suqadd"; *ar = "8h"; return;
742 case ARM64vecmo_SUQADD8x16: *nm = "suqadd"; *ar = "16b"; return;
743 case ARM64vecmo_USQADD64x2: *nm = "usqadd"; *ar = "2d"; return;
744 case ARM64vecmo_USQADD32x4: *nm = "usqadd"; *ar = "4s"; return;
745 case ARM64vecmo_USQADD16x8: *nm = "usqadd"; *ar = "8h"; return;
746 case ARM64vecmo_USQADD8x16: *nm = "usqadd"; *ar = "16b"; return;
747 default: vpanic("showARM64VecModifyOp");
751 static void showARM64VecUnaryOp(/*OUT*/const HChar** nm,
752 /*OUT*/const HChar** ar, ARM64VecUnaryOp op )
754 switch (op) {
755 case ARM64vecu_FNEG64x2: *nm = "fneg "; *ar = "2d"; return;
756 case ARM64vecu_FNEG32x4: *nm = "fneg "; *ar = "4s"; return;
757 case ARM64vecu_FNEG16x8: *nm = "fneg "; *ar = "8h"; return;
758 case ARM64vecu_FABS64x2: *nm = "fabs "; *ar = "2d"; return;
759 case ARM64vecu_FABS32x4: *nm = "fabs "; *ar = "4s"; return;
760 case ARM64vecu_FABS16x8: *nm = "fabs "; *ar = "8h"; return;
761 case ARM64vecu_NOT: *nm = "not "; *ar = "all"; return;
762 case ARM64vecu_ABS64x2: *nm = "abs "; *ar = "2d"; return;
763 case ARM64vecu_ABS32x4: *nm = "abs "; *ar = "4s"; return;
764 case ARM64vecu_ABS16x8: *nm = "abs "; *ar = "8h"; return;
765 case ARM64vecu_ABS8x16: *nm = "abs "; *ar = "16b"; return;
766 case ARM64vecu_CLS32x4: *nm = "cls "; *ar = "4s"; return;
767 case ARM64vecu_CLS16x8: *nm = "cls "; *ar = "8h"; return;
768 case ARM64vecu_CLS8x16: *nm = "cls "; *ar = "16b"; return;
769 case ARM64vecu_CLZ32x4: *nm = "clz "; *ar = "4s"; return;
770 case ARM64vecu_CLZ16x8: *nm = "clz "; *ar = "8h"; return;
771 case ARM64vecu_CLZ8x16: *nm = "clz "; *ar = "16b"; return;
772 case ARM64vecu_CNT8x16: *nm = "cnt "; *ar = "16b"; return;
773 case ARM64vecu_RBIT: *nm = "rbit "; *ar = "16b"; return;
774 case ARM64vecu_REV1616B: *nm = "rev16"; *ar = "16b"; return;
775 case ARM64vecu_REV3216B: *nm = "rev32"; *ar = "16b"; return;
776 case ARM64vecu_REV328H: *nm = "rev32"; *ar = "8h"; return;
777 case ARM64vecu_REV6416B: *nm = "rev64"; *ar = "16b"; return;
778 case ARM64vecu_REV648H: *nm = "rev64"; *ar = "8h"; return;
779 case ARM64vecu_REV644S: *nm = "rev64"; *ar = "4s"; return;
780 case ARM64vecu_URECPE32x4: *nm = "urecpe"; *ar = "4s"; return;
781 case ARM64vecu_URSQRTE32x4: *nm = "ursqrte"; *ar = "4s"; return;
782 case ARM64vecu_FRECPE64x2: *nm = "frecpe"; *ar = "2d"; return;
783 case ARM64vecu_FRECPE32x4: *nm = "frecpe"; *ar = "4s"; return;
784 case ARM64vecu_FRSQRTE64x2: *nm = "frsqrte"; *ar = "2d"; return;
785 case ARM64vecu_FRSQRTE32x4: *nm = "frsqrte"; *ar = "4s"; return;
786 case ARM64vecu_FSQRT64x2: *nm = "fsqrt"; *ar = "2d"; return;
787 case ARM64vecu_FSQRT32x4: *nm = "fsqrt"; *ar = "4s"; return;
788 case ARM64vecu_FSQRT16x8: *nm = "fsqrt"; *ar = "8h"; return;
789 default: vpanic("showARM64VecUnaryOp");
793 static void showARM64VecShiftImmOp(/*OUT*/const HChar** nm,
794 /*OUT*/const HChar** ar,
795 ARM64VecShiftImmOp op )
797 switch (op) {
798 case ARM64vecshi_USHR64x2: *nm = "ushr "; *ar = "2d"; return;
799 case ARM64vecshi_USHR32x4: *nm = "ushr "; *ar = "4s"; return;
800 case ARM64vecshi_USHR16x8: *nm = "ushr "; *ar = "8h"; return;
801 case ARM64vecshi_USHR8x16: *nm = "ushr "; *ar = "16b"; return;
802 case ARM64vecshi_SSHR64x2: *nm = "sshr "; *ar = "2d"; return;
803 case ARM64vecshi_SSHR32x4: *nm = "sshr "; *ar = "4s"; return;
804 case ARM64vecshi_SSHR16x8: *nm = "sshr "; *ar = "8h"; return;
805 case ARM64vecshi_SSHR8x16: *nm = "sshr "; *ar = "16b"; return;
806 case ARM64vecshi_SHL64x2: *nm = "shl "; *ar = "2d"; return;
807 case ARM64vecshi_SHL32x4: *nm = "shl "; *ar = "4s"; return;
808 case ARM64vecshi_SHL16x8: *nm = "shl "; *ar = "8h"; return;
809 case ARM64vecshi_SHL8x16: *nm = "shl "; *ar = "16b"; return;
810 case ARM64vecshi_SQSHRN2SD: *nm = "sqshrn"; *ar = "2sd"; return;
811 case ARM64vecshi_SQSHRN4HS: *nm = "sqshrn"; *ar = "4hs"; return;
812 case ARM64vecshi_SQSHRN8BH: *nm = "sqshrn"; *ar = "8bh"; return;
813 case ARM64vecshi_UQSHRN2SD: *nm = "uqshrn"; *ar = "2sd"; return;
814 case ARM64vecshi_UQSHRN4HS: *nm = "uqshrn"; *ar = "4hs"; return;
815 case ARM64vecshi_UQSHRN8BH: *nm = "uqshrn"; *ar = "8bh"; return;
816 case ARM64vecshi_SQSHRUN2SD: *nm = "sqshrun"; *ar = "2sd"; return;
817 case ARM64vecshi_SQSHRUN4HS: *nm = "sqshrun"; *ar = "4hs"; return;
818 case ARM64vecshi_SQSHRUN8BH: *nm = "sqshrun"; *ar = "8bh"; return;
819 case ARM64vecshi_SQRSHRN2SD: *nm = "sqrshrn"; *ar = "2sd"; return;
820 case ARM64vecshi_SQRSHRN4HS: *nm = "sqrshrn"; *ar = "4hs"; return;
821 case ARM64vecshi_SQRSHRN8BH: *nm = "sqrshrn"; *ar = "8bh"; return;
822 case ARM64vecshi_UQRSHRN2SD: *nm = "uqrshrn"; *ar = "2sd"; return;
823 case ARM64vecshi_UQRSHRN4HS: *nm = "uqrshrn"; *ar = "4hs"; return;
824 case ARM64vecshi_UQRSHRN8BH: *nm = "uqrshrn"; *ar = "8bh"; return;
825 case ARM64vecshi_SQRSHRUN2SD: *nm = "sqrshrun"; *ar = "2sd"; return;
826 case ARM64vecshi_SQRSHRUN4HS: *nm = "sqrshrun"; *ar = "4hs"; return;
827 case ARM64vecshi_SQRSHRUN8BH: *nm = "sqrshrun"; *ar = "8bh"; return;
828 case ARM64vecshi_UQSHL64x2: *nm = "uqshl "; *ar = "2d"; return;
829 case ARM64vecshi_UQSHL32x4: *nm = "uqshl "; *ar = "4s"; return;
830 case ARM64vecshi_UQSHL16x8: *nm = "uqshl "; *ar = "8h"; return;
831 case ARM64vecshi_UQSHL8x16: *nm = "uqshl "; *ar = "16b"; return;
832 case ARM64vecshi_SQSHL64x2: *nm = "sqshl "; *ar = "2d"; return;
833 case ARM64vecshi_SQSHL32x4: *nm = "sqshl "; *ar = "4s"; return;
834 case ARM64vecshi_SQSHL16x8: *nm = "sqshl "; *ar = "8h"; return;
835 case ARM64vecshi_SQSHL8x16: *nm = "sqshl "; *ar = "16b"; return;
836 case ARM64vecshi_SQSHLU64x2: *nm = "sqshlu"; *ar = "2d"; return;
837 case ARM64vecshi_SQSHLU32x4: *nm = "sqshlu"; *ar = "4s"; return;
838 case ARM64vecshi_SQSHLU16x8: *nm = "sqshlu"; *ar = "8h"; return;
839 case ARM64vecshi_SQSHLU8x16: *nm = "sqshlu"; *ar = "16b"; return;
840 default: vpanic("showARM64VecShiftImmOp");
844 static const HChar* showARM64VecNarrowOp(ARM64VecNarrowOp op) {
845 switch (op) {
846 case ARM64vecna_XTN: return "xtn ";
847 case ARM64vecna_SQXTN: return "sqxtn ";
848 case ARM64vecna_UQXTN: return "uqxtn ";
849 case ARM64vecna_SQXTUN: return "sqxtun";
850 default: vpanic("showARM64VecNarrowOp");
854 ARM64Instr* ARM64Instr_Arith ( HReg dst,
855 HReg argL, ARM64RIA* argR, Bool isAdd ) {
856 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
857 i->tag = ARM64in_Arith;
858 i->ARM64in.Arith.dst = dst;
859 i->ARM64in.Arith.argL = argL;
860 i->ARM64in.Arith.argR = argR;
861 i->ARM64in.Arith.isAdd = isAdd;
862 return i;
864 ARM64Instr* ARM64Instr_Cmp ( HReg argL, ARM64RIA* argR, Bool is64 ) {
865 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
866 i->tag = ARM64in_Cmp;
867 i->ARM64in.Cmp.argL = argL;
868 i->ARM64in.Cmp.argR = argR;
869 i->ARM64in.Cmp.is64 = is64;
870 return i;
872 ARM64Instr* ARM64Instr_Logic ( HReg dst,
873 HReg argL, ARM64RIL* argR, ARM64LogicOp op ) {
874 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
875 i->tag = ARM64in_Logic;
876 i->ARM64in.Logic.dst = dst;
877 i->ARM64in.Logic.argL = argL;
878 i->ARM64in.Logic.argR = argR;
879 i->ARM64in.Logic.op = op;
880 return i;
882 ARM64Instr* ARM64Instr_RRS ( HReg dst, HReg argL, HReg argR,
883 ARM64ShiftOp shiftOp, UChar amt,
884 ARM64RRSOp mainOp ) {
885 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
886 i->tag = ARM64in_RRS;
887 i->ARM64in.RRS.dst = dst;
888 i->ARM64in.RRS.argL = argL;
889 i->ARM64in.RRS.argR = argR;
890 i->ARM64in.RRS.shiftOp = shiftOp;
891 i->ARM64in.RRS.amt = amt;
892 i->ARM64in.RRS.mainOp = mainOp;
893 vassert(amt >= 1 && amt <= 63);
894 return i;
896 ARM64Instr* ARM64Instr_Test ( HReg argL, ARM64RIL* argR ) {
897 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
898 i->tag = ARM64in_Test;
899 i->ARM64in.Test.argL = argL;
900 i->ARM64in.Test.argR = argR;
901 return i;
903 ARM64Instr* ARM64Instr_Shift ( HReg dst,
904 HReg argL, ARM64RI6* argR, ARM64ShiftOp op ) {
905 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
906 i->tag = ARM64in_Shift;
907 i->ARM64in.Shift.dst = dst;
908 i->ARM64in.Shift.argL = argL;
909 i->ARM64in.Shift.argR = argR;
910 i->ARM64in.Shift.op = op;
911 return i;
913 ARM64Instr* ARM64Instr_Unary ( HReg dst, HReg src, ARM64UnaryOp op ) {
914 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
915 i->tag = ARM64in_Unary;
916 i->ARM64in.Unary.dst = dst;
917 i->ARM64in.Unary.src = src;
918 i->ARM64in.Unary.op = op;
919 return i;
921 ARM64Instr* ARM64Instr_Set64 ( HReg dst, ARM64CondCode cond ) {
922 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
923 i->tag = ARM64in_Set64;
924 i->ARM64in.Set64.dst = dst;
925 i->ARM64in.Set64.cond = cond;
926 return i;
928 ARM64Instr* ARM64Instr_MovI ( HReg dst, HReg src ) {
929 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
930 i->tag = ARM64in_MovI;
931 i->ARM64in.MovI.dst = dst;
932 i->ARM64in.MovI.src = src;
933 vassert(hregClass(src) == HRcInt64);
934 vassert(hregClass(dst) == HRcInt64);
935 return i;
937 ARM64Instr* ARM64Instr_Imm64 ( HReg dst, ULong imm64 ) {
938 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
939 i->tag = ARM64in_Imm64;
940 i->ARM64in.Imm64.dst = dst;
941 i->ARM64in.Imm64.imm64 = imm64;
942 return i;
944 ARM64Instr* ARM64Instr_LdSt64 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
945 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
946 i->tag = ARM64in_LdSt64;
947 i->ARM64in.LdSt64.isLoad = isLoad;
948 i->ARM64in.LdSt64.rD = rD;
949 i->ARM64in.LdSt64.amode = amode;
950 return i;
952 ARM64Instr* ARM64Instr_LdSt32 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
953 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
954 i->tag = ARM64in_LdSt32;
955 i->ARM64in.LdSt32.isLoad = isLoad;
956 i->ARM64in.LdSt32.rD = rD;
957 i->ARM64in.LdSt32.amode = amode;
958 return i;
960 ARM64Instr* ARM64Instr_LdSt16 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
961 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
962 i->tag = ARM64in_LdSt16;
963 i->ARM64in.LdSt16.isLoad = isLoad;
964 i->ARM64in.LdSt16.rD = rD;
965 i->ARM64in.LdSt16.amode = amode;
966 return i;
968 ARM64Instr* ARM64Instr_LdSt8 ( Bool isLoad, HReg rD, ARM64AMode* amode ) {
969 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
970 i->tag = ARM64in_LdSt8;
971 i->ARM64in.LdSt8.isLoad = isLoad;
972 i->ARM64in.LdSt8.rD = rD;
973 i->ARM64in.LdSt8.amode = amode;
974 return i;
976 ARM64Instr* ARM64Instr_XDirect ( Addr64 dstGA, ARM64AMode* amPC,
977 ARM64CondCode cond, Bool toFastEP ) {
978 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
979 i->tag = ARM64in_XDirect;
980 i->ARM64in.XDirect.dstGA = dstGA;
981 i->ARM64in.XDirect.amPC = amPC;
982 i->ARM64in.XDirect.cond = cond;
983 i->ARM64in.XDirect.toFastEP = toFastEP;
984 return i;
986 ARM64Instr* ARM64Instr_XIndir ( HReg dstGA, ARM64AMode* amPC,
987 ARM64CondCode cond ) {
988 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
989 i->tag = ARM64in_XIndir;
990 i->ARM64in.XIndir.dstGA = dstGA;
991 i->ARM64in.XIndir.amPC = amPC;
992 i->ARM64in.XIndir.cond = cond;
993 return i;
995 ARM64Instr* ARM64Instr_XAssisted ( HReg dstGA, ARM64AMode* amPC,
996 ARM64CondCode cond, IRJumpKind jk ) {
997 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
998 i->tag = ARM64in_XAssisted;
999 i->ARM64in.XAssisted.dstGA = dstGA;
1000 i->ARM64in.XAssisted.amPC = amPC;
1001 i->ARM64in.XAssisted.cond = cond;
1002 i->ARM64in.XAssisted.jk = jk;
1003 return i;
1005 ARM64Instr* ARM64Instr_CSel ( HReg dst, HReg argL, HReg argR,
1006 ARM64CondCode cond ) {
1007 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1008 i->tag = ARM64in_CSel;
1009 i->ARM64in.CSel.dst = dst;
1010 i->ARM64in.CSel.argL = argL;
1011 i->ARM64in.CSel.argR = argR;
1012 i->ARM64in.CSel.cond = cond;
1013 return i;
1015 ARM64Instr* ARM64Instr_Call ( ARM64CondCode cond, Addr64 target, Int nArgRegs,
1016 RetLoc rloc ) {
1017 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1018 i->tag = ARM64in_Call;
1019 i->ARM64in.Call.cond = cond;
1020 i->ARM64in.Call.target = target;
1021 i->ARM64in.Call.nArgRegs = nArgRegs;
1022 i->ARM64in.Call.rloc = rloc;
1023 vassert(is_sane_RetLoc(rloc));
1024 return i;
1026 extern ARM64Instr* ARM64Instr_AddToSP ( Int simm ) {
1027 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1028 i->tag = ARM64in_AddToSP;
1029 i->ARM64in.AddToSP.simm = simm;
1030 vassert(-4096 < simm && simm < 4096);
1031 vassert(0 == (simm & 0xF));
1032 return i;
1034 extern ARM64Instr* ARM64Instr_FromSP ( HReg dst ) {
1035 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1036 i->tag = ARM64in_FromSP;
1037 i->ARM64in.FromSP.dst = dst;
1038 return i;
1040 ARM64Instr* ARM64Instr_Mul ( HReg dst, HReg argL, HReg argR,
1041 ARM64MulOp op ) {
1042 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1043 i->tag = ARM64in_Mul;
1044 i->ARM64in.Mul.dst = dst;
1045 i->ARM64in.Mul.argL = argL;
1046 i->ARM64in.Mul.argR = argR;
1047 i->ARM64in.Mul.op = op;
1048 return i;
1050 ARM64Instr* ARM64Instr_LdrEX ( Int szB ) {
1051 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1052 i->tag = ARM64in_LdrEX;
1053 i->ARM64in.LdrEX.szB = szB;
1054 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1055 return i;
1057 ARM64Instr* ARM64Instr_StrEX ( Int szB ) {
1058 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1059 i->tag = ARM64in_StrEX;
1060 i->ARM64in.StrEX.szB = szB;
1061 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1062 return i;
1064 ARM64Instr* ARM64Instr_LdrEXP ( void ) {
1065 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1066 i->tag = ARM64in_LdrEXP;
1067 return i;
1069 ARM64Instr* ARM64Instr_StrEXP ( void ) {
1070 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1071 i->tag = ARM64in_StrEXP;
1072 return i;
1074 ARM64Instr* ARM64Instr_CAS ( Int szB ) {
1075 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1076 i->tag = ARM64in_CAS;
1077 i->ARM64in.CAS.szB = szB;
1078 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1079 return i;
1081 ARM64Instr* ARM64Instr_CASP ( Int szB ) {
1082 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1083 i->tag = ARM64in_CASP;
1084 i->ARM64in.CASP.szB = szB;
1085 vassert(szB == 8 || szB == 4);
1086 return i;
1088 ARM64Instr* ARM64Instr_MFence ( void ) {
1089 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1090 i->tag = ARM64in_MFence;
1091 return i;
1093 ARM64Instr* ARM64Instr_ClrEX ( void ) {
1094 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1095 i->tag = ARM64in_ClrEX;
1096 return i;
1098 ARM64Instr* ARM64Instr_VLdStH ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1099 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1100 i->tag = ARM64in_VLdStH;
1101 i->ARM64in.VLdStH.isLoad = isLoad;
1102 i->ARM64in.VLdStH.hD = sD;
1103 i->ARM64in.VLdStH.rN = rN;
1104 i->ARM64in.VLdStH.uimm12 = uimm12;
1105 vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
1106 return i;
1108 ARM64Instr* ARM64Instr_VLdStS ( Bool isLoad, HReg sD, HReg rN, UInt uimm12 ) {
1109 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1110 i->tag = ARM64in_VLdStS;
1111 i->ARM64in.VLdStS.isLoad = isLoad;
1112 i->ARM64in.VLdStS.sD = sD;
1113 i->ARM64in.VLdStS.rN = rN;
1114 i->ARM64in.VLdStS.uimm12 = uimm12;
1115 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
1116 return i;
1118 ARM64Instr* ARM64Instr_VLdStD ( Bool isLoad, HReg dD, HReg rN, UInt uimm12 ) {
1119 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1120 i->tag = ARM64in_VLdStD;
1121 i->ARM64in.VLdStD.isLoad = isLoad;
1122 i->ARM64in.VLdStD.dD = dD;
1123 i->ARM64in.VLdStD.rN = rN;
1124 i->ARM64in.VLdStD.uimm12 = uimm12;
1125 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
1126 return i;
1128 ARM64Instr* ARM64Instr_VLdStQ ( Bool isLoad, HReg rQ, HReg rN ) {
1129 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1130 i->tag = ARM64in_VLdStQ;
1131 i->ARM64in.VLdStQ.isLoad = isLoad;
1132 i->ARM64in.VLdStQ.rQ = rQ;
1133 i->ARM64in.VLdStQ.rN = rN;
1134 return i;
1136 ARM64Instr* ARM64Instr_VCvtI2F ( ARM64CvtOp how, HReg rD, HReg rS ) {
1137 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1138 i->tag = ARM64in_VCvtI2F;
1139 i->ARM64in.VCvtI2F.how = how;
1140 i->ARM64in.VCvtI2F.rD = rD;
1141 i->ARM64in.VCvtI2F.rS = rS;
1142 return i;
1144 ARM64Instr* ARM64Instr_VCvtF2I ( ARM64CvtOp how, HReg rD, HReg rS,
1145 UChar armRM, Bool tiesToAway ) {
1146 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1147 i->tag = ARM64in_VCvtF2I;
1148 i->ARM64in.VCvtF2I.how = how;
1149 i->ARM64in.VCvtF2I.rD = rD;
1150 i->ARM64in.VCvtF2I.rS = rS;
1151 i->ARM64in.VCvtF2I.armRM = armRM;
1152 i->ARM64in.VCvtF2I.tiesToAway = tiesToAway;
1153 vassert(armRM <= 3);
1154 return i;
1156 ARM64Instr* ARM64Instr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1157 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1158 i->tag = ARM64in_VCvtSD;
1159 i->ARM64in.VCvtSD.sToD = sToD;
1160 i->ARM64in.VCvtSD.dst = dst;
1161 i->ARM64in.VCvtSD.src = src;
1162 return i;
1164 ARM64Instr* ARM64Instr_VCvtHS ( Bool hToS, HReg dst, HReg src ) {
1165 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1166 i->tag = ARM64in_VCvtHS;
1167 i->ARM64in.VCvtHS.hToS = hToS;
1168 i->ARM64in.VCvtHS.dst = dst;
1169 i->ARM64in.VCvtHS.src = src;
1170 return i;
1172 ARM64Instr* ARM64Instr_VCvtHD ( Bool hToD, HReg dst, HReg src ) {
1173 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1174 i->tag = ARM64in_VCvtHD;
1175 i->ARM64in.VCvtHD.hToD = hToD;
1176 i->ARM64in.VCvtHD.dst = dst;
1177 i->ARM64in.VCvtHD.src = src;
1178 return i;
1180 ARM64Instr* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1181 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1182 i->tag = ARM64in_VUnaryD;
1183 i->ARM64in.VUnaryD.op = op;
1184 i->ARM64in.VUnaryD.dst = dst;
1185 i->ARM64in.VUnaryD.src = src;
1186 return i;
1188 ARM64Instr* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1189 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1190 i->tag = ARM64in_VUnaryS;
1191 i->ARM64in.VUnaryS.op = op;
1192 i->ARM64in.VUnaryS.dst = dst;
1193 i->ARM64in.VUnaryS.src = src;
1194 return i;
1196 ARM64Instr* ARM64Instr_VUnaryH ( ARM64FpUnaryOp op, HReg dst, HReg src ) {
1197 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1198 i->tag = ARM64in_VUnaryH;
1199 i->ARM64in.VUnaryH.op = op;
1200 i->ARM64in.VUnaryH.dst = dst;
1201 i->ARM64in.VUnaryH.src = src;
1202 return i;
1204 ARM64Instr* ARM64Instr_VBinD ( ARM64FpBinOp op,
1205 HReg dst, HReg argL, HReg argR ) {
1206 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1207 i->tag = ARM64in_VBinD;
1208 i->ARM64in.VBinD.op = op;
1209 i->ARM64in.VBinD.dst = dst;
1210 i->ARM64in.VBinD.argL = argL;
1211 i->ARM64in.VBinD.argR = argR;
1212 return i;
1214 ARM64Instr* ARM64Instr_VBinS ( ARM64FpBinOp op,
1215 HReg dst, HReg argL, HReg argR ) {
1216 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1217 i->tag = ARM64in_VBinS;
1218 i->ARM64in.VBinS.op = op;
1219 i->ARM64in.VBinS.dst = dst;
1220 i->ARM64in.VBinS.argL = argL;
1221 i->ARM64in.VBinS.argR = argR;
1222 return i;
1224 ARM64Instr* ARM64Instr_VBinH ( ARM64FpBinOp op,
1225 HReg dst, HReg argL, HReg argR ) {
1226 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1227 i->tag = ARM64in_VBinH;
1228 i->ARM64in.VBinH.op = op;
1229 i->ARM64in.VBinH.dst = dst;
1230 i->ARM64in.VBinH.argL = argL;
1231 i->ARM64in.VBinH.argR = argR;
1232 return i;
1234 ARM64Instr* ARM64Instr_VTriD ( ARM64FpTriOp op,
1235 HReg dst, HReg arg1, HReg arg2, HReg arg3 ) {
1236 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1237 i->tag = ARM64in_VTriD;
1238 i->ARM64in.VTriD.op = op;
1239 i->ARM64in.VTriD.dst = dst;
1240 i->ARM64in.VTriD.arg1 = arg1;
1241 i->ARM64in.VTriD.arg2 = arg2;
1242 i->ARM64in.VTriD.arg3 = arg3;
1243 return i;
1245 ARM64Instr* ARM64Instr_VTriS ( ARM64FpTriOp op,
1246 HReg dst, HReg arg1, HReg arg2, HReg arg3 ) {
1247 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1248 i->tag = ARM64in_VTriS;
1249 i->ARM64in.VTriS.op = op;
1250 i->ARM64in.VTriS.dst = dst;
1251 i->ARM64in.VTriS.arg1 = arg1;
1252 i->ARM64in.VTriS.arg2 = arg2;
1253 i->ARM64in.VTriS.arg3 = arg3;
1254 return i;
1256 ARM64Instr* ARM64Instr_VCmpD ( HReg argL, HReg argR ) {
1257 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1258 i->tag = ARM64in_VCmpD;
1259 i->ARM64in.VCmpD.argL = argL;
1260 i->ARM64in.VCmpD.argR = argR;
1261 return i;
1263 ARM64Instr* ARM64Instr_VCmpS ( HReg argL, HReg argR ) {
1264 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1265 i->tag = ARM64in_VCmpS;
1266 i->ARM64in.VCmpS.argL = argL;
1267 i->ARM64in.VCmpS.argR = argR;
1268 return i;
1270 ARM64Instr* ARM64Instr_VCmpH ( HReg argL, HReg argR ) {
1271 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1272 i->tag = ARM64in_VCmpH;
1273 i->ARM64in.VCmpH.argL = argL;
1274 i->ARM64in.VCmpH.argR = argR;
1275 return i;
1277 ARM64Instr* ARM64Instr_VFCSel ( HReg dst, HReg argL, HReg argR,
1278 ARM64CondCode cond, Bool isD ) {
1279 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1280 i->tag = ARM64in_VFCSel;
1281 i->ARM64in.VFCSel.dst = dst;
1282 i->ARM64in.VFCSel.argL = argL;
1283 i->ARM64in.VFCSel.argR = argR;
1284 i->ARM64in.VFCSel.cond = cond;
1285 i->ARM64in.VFCSel.isD = isD;
1286 return i;
1288 ARM64Instr* ARM64Instr_FPCR ( Bool toFPCR, HReg iReg ) {
1289 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1290 i->tag = ARM64in_FPCR;
1291 i->ARM64in.FPCR.toFPCR = toFPCR;
1292 i->ARM64in.FPCR.iReg = iReg;
1293 return i;
1295 ARM64Instr* ARM64Instr_FPSR ( Bool toFPSR, HReg iReg ) {
1296 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1297 i->tag = ARM64in_FPSR;
1298 i->ARM64in.FPSR.toFPSR = toFPSR;
1299 i->ARM64in.FPSR.iReg = iReg;
1300 return i;
1302 ARM64Instr* ARM64Instr_VBinV ( ARM64VecBinOp op,
1303 HReg dst, HReg argL, HReg argR ) {
1304 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1305 i->tag = ARM64in_VBinV;
1306 i->ARM64in.VBinV.op = op;
1307 i->ARM64in.VBinV.dst = dst;
1308 i->ARM64in.VBinV.argL = argL;
1309 i->ARM64in.VBinV.argR = argR;
1310 return i;
1312 ARM64Instr* ARM64Instr_VModifyV ( ARM64VecModifyOp op, HReg mod, HReg arg ) {
1313 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1314 i->tag = ARM64in_VModifyV;
1315 i->ARM64in.VModifyV.op = op;
1316 i->ARM64in.VModifyV.mod = mod;
1317 i->ARM64in.VModifyV.arg = arg;
1318 return i;
1320 ARM64Instr* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op, HReg dst, HReg arg ) {
1321 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1322 i->tag = ARM64in_VUnaryV;
1323 i->ARM64in.VUnaryV.op = op;
1324 i->ARM64in.VUnaryV.dst = dst;
1325 i->ARM64in.VUnaryV.arg = arg;
1326 return i;
1328 ARM64Instr* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op,
1329 UInt dszBlg2, HReg dst, HReg src ) {
1330 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1331 i->tag = ARM64in_VNarrowV;
1332 i->ARM64in.VNarrowV.op = op;
1333 i->ARM64in.VNarrowV.dszBlg2 = dszBlg2;
1334 i->ARM64in.VNarrowV.dst = dst;
1335 i->ARM64in.VNarrowV.src = src;
1336 vassert(dszBlg2 == 0 || dszBlg2 == 1 || dszBlg2 == 2);
1337 return i;
1339 ARM64Instr* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op,
1340 HReg dst, HReg src, UInt amt ) {
1341 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1342 i->tag = ARM64in_VShiftImmV;
1343 i->ARM64in.VShiftImmV.op = op;
1344 i->ARM64in.VShiftImmV.dst = dst;
1345 i->ARM64in.VShiftImmV.src = src;
1346 i->ARM64in.VShiftImmV.amt = amt;
1347 UInt minSh = 0;
1348 UInt maxSh = 0;
1349 switch (op) {
1350 /* For right shifts, the allowed shift amounts are 1 .. lane_size.
1351 For left shifts, the allowed shift amounts are 0 .. lane_size-1.
1353 case ARM64vecshi_USHR64x2: case ARM64vecshi_SSHR64x2:
1354 case ARM64vecshi_UQSHRN2SD: case ARM64vecshi_SQSHRN2SD:
1355 case ARM64vecshi_SQSHRUN2SD:
1356 case ARM64vecshi_UQRSHRN2SD: case ARM64vecshi_SQRSHRN2SD:
1357 case ARM64vecshi_SQRSHRUN2SD:
1358 minSh = 1; maxSh = 64; break;
1359 case ARM64vecshi_SHL64x2:
1360 case ARM64vecshi_UQSHL64x2: case ARM64vecshi_SQSHL64x2:
1361 case ARM64vecshi_SQSHLU64x2:
1362 minSh = 0; maxSh = 63; break;
1363 case ARM64vecshi_USHR32x4: case ARM64vecshi_SSHR32x4:
1364 case ARM64vecshi_UQSHRN4HS: case ARM64vecshi_SQSHRN4HS:
1365 case ARM64vecshi_SQSHRUN4HS:
1366 case ARM64vecshi_UQRSHRN4HS: case ARM64vecshi_SQRSHRN4HS:
1367 case ARM64vecshi_SQRSHRUN4HS:
1368 minSh = 1; maxSh = 32; break;
1369 case ARM64vecshi_SHL32x4:
1370 case ARM64vecshi_UQSHL32x4: case ARM64vecshi_SQSHL32x4:
1371 case ARM64vecshi_SQSHLU32x4:
1372 minSh = 0; maxSh = 31; break;
1373 case ARM64vecshi_USHR16x8: case ARM64vecshi_SSHR16x8:
1374 case ARM64vecshi_UQSHRN8BH: case ARM64vecshi_SQSHRN8BH:
1375 case ARM64vecshi_SQSHRUN8BH:
1376 case ARM64vecshi_UQRSHRN8BH: case ARM64vecshi_SQRSHRN8BH:
1377 case ARM64vecshi_SQRSHRUN8BH:
1378 minSh = 1; maxSh = 16; break;
1379 case ARM64vecshi_SHL16x8:
1380 case ARM64vecshi_UQSHL16x8: case ARM64vecshi_SQSHL16x8:
1381 case ARM64vecshi_SQSHLU16x8:
1382 minSh = 0; maxSh = 15; break;
1383 case ARM64vecshi_USHR8x16: case ARM64vecshi_SSHR8x16:
1384 minSh = 1; maxSh = 8; break;
1385 case ARM64vecshi_SHL8x16:
1386 case ARM64vecshi_UQSHL8x16: case ARM64vecshi_SQSHL8x16:
1387 case ARM64vecshi_SQSHLU8x16:
1388 minSh = 0; maxSh = 7; break;
1389 default:
1390 vassert(0);
1392 vassert(maxSh > 0);
1393 vassert(amt >= minSh && amt <= maxSh);
1394 return i;
1396 ARM64Instr* ARM64Instr_VExtV ( HReg dst, HReg srcLo, HReg srcHi, UInt amtB ) {
1397 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1398 i->tag = ARM64in_VExtV;
1399 i->ARM64in.VExtV.dst = dst;
1400 i->ARM64in.VExtV.srcLo = srcLo;
1401 i->ARM64in.VExtV.srcHi = srcHi;
1402 i->ARM64in.VExtV.amtB = amtB;
1403 vassert(amtB >= 1 && amtB <= 15);
1404 return i;
1406 ARM64Instr* ARM64Instr_VImmQ (HReg rQ, UShort imm) {
1407 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1408 i->tag = ARM64in_VImmQ;
1409 i->ARM64in.VImmQ.rQ = rQ;
1410 i->ARM64in.VImmQ.imm = imm;
1411 /* Check that this is something that can actually be emitted. */
1412 switch (imm) {
1413 case 0x0000: case 0x0001: case 0x0003:
1414 case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
1415 break;
1416 default:
1417 vassert(0);
1419 return i;
1421 ARM64Instr* ARM64Instr_VDfromX ( HReg rD, HReg rX ) {
1422 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1423 i->tag = ARM64in_VDfromX;
1424 i->ARM64in.VDfromX.rD = rD;
1425 i->ARM64in.VDfromX.rX = rX;
1426 return i;
1428 ARM64Instr* ARM64Instr_VQfromX ( HReg rQ, HReg rXlo ) {
1429 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1430 i->tag = ARM64in_VQfromX;
1431 i->ARM64in.VQfromX.rQ = rQ;
1432 i->ARM64in.VQfromX.rXlo = rXlo;
1433 return i;
1435 ARM64Instr* ARM64Instr_VQfromXX ( HReg rQ, HReg rXhi, HReg rXlo ) {
1436 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1437 i->tag = ARM64in_VQfromXX;
1438 i->ARM64in.VQfromXX.rQ = rQ;
1439 i->ARM64in.VQfromXX.rXhi = rXhi;
1440 i->ARM64in.VQfromXX.rXlo = rXlo;
1441 return i;
1443 ARM64Instr* ARM64Instr_VXfromQ ( HReg rX, HReg rQ, UInt laneNo ) {
1444 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1445 i->tag = ARM64in_VXfromQ;
1446 i->ARM64in.VXfromQ.rX = rX;
1447 i->ARM64in.VXfromQ.rQ = rQ;
1448 i->ARM64in.VXfromQ.laneNo = laneNo;
1449 vassert(laneNo <= 1);
1450 return i;
1452 ARM64Instr* ARM64Instr_VXfromDorS ( HReg rX, HReg rDorS, Bool fromD ) {
1453 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1454 i->tag = ARM64in_VXfromDorS;
1455 i->ARM64in.VXfromDorS.rX = rX;
1456 i->ARM64in.VXfromDorS.rDorS = rDorS;
1457 i->ARM64in.VXfromDorS.fromD = fromD;
1458 return i;
1460 ARM64Instr* ARM64Instr_VMov ( UInt szB, HReg dst, HReg src ) {
1461 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1462 i->tag = ARM64in_VMov;
1463 i->ARM64in.VMov.szB = szB;
1464 i->ARM64in.VMov.dst = dst;
1465 i->ARM64in.VMov.src = src;
1466 switch (szB) {
1467 case 16:
1468 vassert(hregClass(src) == HRcVec128);
1469 vassert(hregClass(dst) == HRcVec128);
1470 break;
1471 case 8:
1472 vassert(hregClass(src) == HRcFlt64);
1473 vassert(hregClass(dst) == HRcFlt64);
1474 break;
1475 default:
1476 vpanic("ARM64Instr_VMov");
1478 return i;
1480 ARM64Instr* ARM64Instr_EvCheck ( ARM64AMode* amCounter,
1481 ARM64AMode* amFailAddr ) {
1482 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1483 i->tag = ARM64in_EvCheck;
1484 i->ARM64in.EvCheck.amCounter = amCounter;
1485 i->ARM64in.EvCheck.amFailAddr = amFailAddr;
1486 return i;
1488 ARM64Instr* ARM64Instr_ProfInc ( void ) {
1489 ARM64Instr* i = LibVEX_Alloc_inline(sizeof(ARM64Instr));
1490 i->tag = ARM64in_ProfInc;
1491 return i;
1494 /* ... */
1496 void ppARM64Instr ( const ARM64Instr* i ) {
1497 switch (i->tag) {
1498 case ARM64in_Arith:
1499 vex_printf("%s ", i->ARM64in.Arith.isAdd ? "add" : "sub");
1500 ppHRegARM64(i->ARM64in.Arith.dst);
1501 vex_printf(", ");
1502 ppHRegARM64(i->ARM64in.Arith.argL);
1503 vex_printf(", ");
1504 ppARM64RIA(i->ARM64in.Arith.argR);
1505 return;
1506 case ARM64in_Cmp:
1507 vex_printf("cmp%s ", i->ARM64in.Cmp.is64 ? " " : "(w)" );
1508 ppHRegARM64(i->ARM64in.Cmp.argL);
1509 vex_printf(", ");
1510 ppARM64RIA(i->ARM64in.Cmp.argR);
1511 return;
1512 case ARM64in_Logic:
1513 vex_printf("%s ", showARM64LogicOp(i->ARM64in.Logic.op));
1514 ppHRegARM64(i->ARM64in.Logic.dst);
1515 vex_printf(", ");
1516 ppHRegARM64(i->ARM64in.Logic.argL);
1517 vex_printf(", ");
1518 ppARM64RIL(i->ARM64in.Logic.argR);
1519 return;
1520 case ARM64in_RRS:
1521 vex_printf("%s ", showARM64RRSOp(i->ARM64in.RRS.mainOp));
1522 ppHRegARM64(i->ARM64in.RRS.dst);
1523 vex_printf(", ");
1524 ppHRegARM64(i->ARM64in.RRS.argL);
1525 vex_printf(", ");
1526 ppHRegARM64(i->ARM64in.RRS.argR);
1527 vex_printf(", %s #%u", showARM64ShiftOp(i->ARM64in.RRS.shiftOp),
1528 i->ARM64in.RRS.amt);
1529 return;
1530 case ARM64in_Test:
1531 vex_printf("tst ");
1532 ppHRegARM64(i->ARM64in.Test.argL);
1533 vex_printf(", ");
1534 ppARM64RIL(i->ARM64in.Test.argR);
1535 return;
1536 case ARM64in_Shift:
1537 vex_printf("%s ", showARM64ShiftOp(i->ARM64in.Shift.op));
1538 ppHRegARM64(i->ARM64in.Shift.dst);
1539 vex_printf(", ");
1540 ppHRegARM64(i->ARM64in.Shift.argL);
1541 vex_printf(", ");
1542 ppARM64RI6(i->ARM64in.Shift.argR);
1543 return;
1544 case ARM64in_Unary:
1545 vex_printf("%s ", showARM64UnaryOp(i->ARM64in.Unary.op));
1546 ppHRegARM64(i->ARM64in.Unary.dst);
1547 vex_printf(", ");
1548 ppHRegARM64(i->ARM64in.Unary.src);
1549 return;
1550 case ARM64in_Set64:
1551 vex_printf("cset ");
1552 ppHRegARM64(i->ARM64in.Set64.dst);
1553 vex_printf(", %s", showARM64CondCode(i->ARM64in.Set64.cond));
1554 return;
1555 case ARM64in_MovI:
1556 vex_printf("mov ");
1557 ppHRegARM64(i->ARM64in.MovI.dst);
1558 vex_printf(", ");
1559 ppHRegARM64(i->ARM64in.MovI.src);
1560 return;
1561 case ARM64in_Imm64:
1562 vex_printf("imm64 ");
1563 ppHRegARM64(i->ARM64in.Imm64.dst);
1564 vex_printf(", 0x%llx", i->ARM64in.Imm64.imm64);
1565 return;
1566 case ARM64in_LdSt64:
1567 if (i->ARM64in.LdSt64.isLoad) {
1568 vex_printf("ldr ");
1569 ppHRegARM64(i->ARM64in.LdSt64.rD);
1570 vex_printf(", ");
1571 ppARM64AMode(i->ARM64in.LdSt64.amode);
1572 } else {
1573 vex_printf("str ");
1574 ppARM64AMode(i->ARM64in.LdSt64.amode);
1575 vex_printf(", ");
1576 ppHRegARM64(i->ARM64in.LdSt64.rD);
1578 return;
1579 case ARM64in_LdSt32:
1580 if (i->ARM64in.LdSt32.isLoad) {
1581 vex_printf("ldruw ");
1582 ppHRegARM64(i->ARM64in.LdSt32.rD);
1583 vex_printf(", ");
1584 ppARM64AMode(i->ARM64in.LdSt32.amode);
1585 } else {
1586 vex_printf("strw ");
1587 ppARM64AMode(i->ARM64in.LdSt32.amode);
1588 vex_printf(", ");
1589 ppHRegARM64(i->ARM64in.LdSt32.rD);
1591 return;
1592 case ARM64in_LdSt16:
1593 if (i->ARM64in.LdSt16.isLoad) {
1594 vex_printf("ldruh ");
1595 ppHRegARM64(i->ARM64in.LdSt16.rD);
1596 vex_printf(", ");
1597 ppARM64AMode(i->ARM64in.LdSt16.amode);
1598 } else {
1599 vex_printf("strh ");
1600 ppARM64AMode(i->ARM64in.LdSt16.amode);
1601 vex_printf(", ");
1602 ppHRegARM64(i->ARM64in.LdSt16.rD);
1604 return;
1605 case ARM64in_LdSt8:
1606 if (i->ARM64in.LdSt8.isLoad) {
1607 vex_printf("ldrub ");
1608 ppHRegARM64(i->ARM64in.LdSt8.rD);
1609 vex_printf(", ");
1610 ppARM64AMode(i->ARM64in.LdSt8.amode);
1611 } else {
1612 vex_printf("strb ");
1613 ppARM64AMode(i->ARM64in.LdSt8.amode);
1614 vex_printf(", ");
1615 ppHRegARM64(i->ARM64in.LdSt8.rD);
1617 return;
1618 case ARM64in_XDirect:
1619 vex_printf("(xDirect) ");
1620 vex_printf("if (%%pstate.%s) { ",
1621 showARM64CondCode(i->ARM64in.XDirect.cond));
1622 vex_printf("imm64 x9,0x%llx; ", i->ARM64in.XDirect.dstGA);
1623 vex_printf("str x9,");
1624 ppARM64AMode(i->ARM64in.XDirect.amPC);
1625 vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
1626 i->ARM64in.XDirect.toFastEP ? "fast" : "slow");
1627 vex_printf("blr x9 }");
1628 return;
1629 case ARM64in_XIndir:
1630 vex_printf("(xIndir) ");
1631 vex_printf("if (%%pstate.%s) { ",
1632 showARM64CondCode(i->ARM64in.XIndir.cond));
1633 vex_printf("str ");
1634 ppHRegARM64(i->ARM64in.XIndir.dstGA);
1635 vex_printf(",");
1636 ppARM64AMode(i->ARM64in.XIndir.amPC);
1637 vex_printf("; imm64 x9,$disp_cp_xindir; ");
1638 vex_printf("br x9 }");
1639 return;
1640 case ARM64in_XAssisted:
1641 vex_printf("(xAssisted) ");
1642 vex_printf("if (%%pstate.%s) { ",
1643 showARM64CondCode(i->ARM64in.XAssisted.cond));
1644 vex_printf("str ");
1645 ppHRegARM64(i->ARM64in.XAssisted.dstGA);
1646 vex_printf(",");
1647 ppARM64AMode(i->ARM64in.XAssisted.amPC);
1648 vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
1649 (Int)i->ARM64in.XAssisted.jk);
1650 vex_printf("imm64 x9,$disp_cp_xassisted; ");
1651 vex_printf("br x9 }");
1652 return;
1653 case ARM64in_CSel:
1654 vex_printf("csel ");
1655 ppHRegARM64(i->ARM64in.CSel.dst);
1656 vex_printf(", ");
1657 ppHRegARM64(i->ARM64in.CSel.argL);
1658 vex_printf(", ");
1659 ppHRegARM64(i->ARM64in.CSel.argR);
1660 vex_printf(", %s", showARM64CondCode(i->ARM64in.CSel.cond));
1661 return;
1662 case ARM64in_Call:
1663 vex_printf("call%s ",
1664 i->ARM64in.Call.cond==ARM64cc_AL
1665 ? " " : showARM64CondCode(i->ARM64in.Call.cond));
1666 vex_printf("0x%llx [nArgRegs=%d, ",
1667 i->ARM64in.Call.target, i->ARM64in.Call.nArgRegs);
1668 ppRetLoc(i->ARM64in.Call.rloc);
1669 vex_printf("]");
1670 return;
1671 case ARM64in_AddToSP: {
1672 Int simm = i->ARM64in.AddToSP.simm;
1673 vex_printf("%s xsp, xsp, #%d", simm < 0 ? "sub" : "add",
1674 simm < 0 ? -simm : simm);
1675 return;
1677 case ARM64in_FromSP:
1678 vex_printf("mov ");
1679 ppHRegARM64(i->ARM64in.FromSP.dst);
1680 vex_printf(", xsp");
1681 return;
1682 case ARM64in_Mul:
1683 vex_printf("%s ", showARM64MulOp(i->ARM64in.Mul.op));
1684 ppHRegARM64(i->ARM64in.Mul.dst);
1685 vex_printf(", ");
1686 ppHRegARM64(i->ARM64in.Mul.argL);
1687 vex_printf(", ");
1688 ppHRegARM64(i->ARM64in.Mul.argR);
1689 return;
1691 case ARM64in_LdrEX: {
1692 const HChar* sz = " ";
1693 switch (i->ARM64in.LdrEX.szB) {
1694 case 1: sz = "b"; break;
1695 case 2: sz = "h"; break;
1696 case 4: case 8: break;
1697 default: vassert(0);
1699 vex_printf("ldxr%s %c2, [x4]",
1700 sz, i->ARM64in.LdrEX.szB == 8 ? 'x' : 'w');
1701 return;
1703 case ARM64in_StrEX: {
1704 const HChar* sz = " ";
1705 switch (i->ARM64in.StrEX.szB) {
1706 case 1: sz = "b"; break;
1707 case 2: sz = "h"; break;
1708 case 4: case 8: break;
1709 default: vassert(0);
1711 vex_printf("stxr%s w0, %c2, [x4]",
1712 sz, i->ARM64in.StrEX.szB == 8 ? 'x' : 'w');
1713 return;
1715 case ARM64in_LdrEXP:
1716 vex_printf("ldxp x2, x3, [x4]");
1717 return;
1718 case ARM64in_StrEXP:
1719 vex_printf("stxp w0, x2, x3, [x4]");
1720 return;
1721 case ARM64in_CAS: {
1722 vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i->ARM64in.CAS.szB);
1723 return;
1725 case ARM64in_CASP: {
1726 vex_printf("x0,x1 = casp(2x%dbit)(x2, x4,x5 -> x6,x7)",
1727 8 * i->ARM64in.CASP.szB);
1728 return;
1730 case ARM64in_MFence:
1731 vex_printf("(mfence) dsb sy; dmb sy; isb");
1732 return;
1733 case ARM64in_ClrEX:
1734 vex_printf("clrex #15");
1735 return;
1736 case ARM64in_VLdStH:
1737 if (i->ARM64in.VLdStH.isLoad) {
1738 vex_printf("ldr ");
1739 ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1740 vex_printf(", %u(", i->ARM64in.VLdStH.uimm12);
1741 ppHRegARM64(i->ARM64in.VLdStH.rN);
1742 vex_printf(")");
1743 } else {
1744 vex_printf("str ");
1745 vex_printf("%u(", i->ARM64in.VLdStH.uimm12);
1746 ppHRegARM64(i->ARM64in.VLdStH.rN);
1747 vex_printf("), ");
1748 ppHRegARM64asHreg(i->ARM64in.VLdStH.hD);
1750 return;
1751 case ARM64in_VLdStS:
1752 if (i->ARM64in.VLdStS.isLoad) {
1753 vex_printf("ldr ");
1754 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1755 vex_printf(", %u(", i->ARM64in.VLdStS.uimm12);
1756 ppHRegARM64(i->ARM64in.VLdStS.rN);
1757 vex_printf(")");
1758 } else {
1759 vex_printf("str ");
1760 vex_printf("%u(", i->ARM64in.VLdStS.uimm12);
1761 ppHRegARM64(i->ARM64in.VLdStS.rN);
1762 vex_printf("), ");
1763 ppHRegARM64asSreg(i->ARM64in.VLdStS.sD);
1765 return;
1766 case ARM64in_VLdStD:
1767 if (i->ARM64in.VLdStD.isLoad) {
1768 vex_printf("ldr ");
1769 ppHRegARM64(i->ARM64in.VLdStD.dD);
1770 vex_printf(", %u(", i->ARM64in.VLdStD.uimm12);
1771 ppHRegARM64(i->ARM64in.VLdStD.rN);
1772 vex_printf(")");
1773 } else {
1774 vex_printf("str ");
1775 vex_printf("%u(", i->ARM64in.VLdStD.uimm12);
1776 ppHRegARM64(i->ARM64in.VLdStD.rN);
1777 vex_printf("), ");
1778 ppHRegARM64(i->ARM64in.VLdStD.dD);
1780 return;
1781 case ARM64in_VLdStQ:
1782 if (i->ARM64in.VLdStQ.isLoad)
1783 vex_printf("ld1.2d {");
1784 else
1785 vex_printf("st1.2d {");
1786 ppHRegARM64(i->ARM64in.VLdStQ.rQ);
1787 vex_printf("}, [");
1788 ppHRegARM64(i->ARM64in.VLdStQ.rN);
1789 vex_printf("]");
1790 return;
1791 case ARM64in_VCvtI2F: {
1792 HChar syn = '?';
1793 UInt fszB = 0;
1794 UInt iszB = 0;
1795 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtI2F.how);
1796 vex_printf("%ccvtf ", syn);
1797 ppHRegARM64(i->ARM64in.VCvtI2F.rD);
1798 vex_printf("(%c-reg), ", fszB == 4 ? 'S' : 'D');
1799 ppHRegARM64(i->ARM64in.VCvtI2F.rS);
1800 vex_printf("(%c-reg)", iszB == 4 ? 'W' : 'X');
1801 return;
1803 case ARM64in_VCvtF2I: {
1804 HChar syn = '?';
1805 UInt fszB = 0;
1806 UInt iszB = 0;
1807 HChar rmo = '?';
1808 characteriseARM64CvtOp(&syn, &fszB, &iszB, i->ARM64in.VCvtF2I.how);
1809 UChar armRM = i->ARM64in.VCvtF2I.armRM;
1810 if (armRM < 4) rmo = "npmz"[armRM];
1811 vex_printf("fcvt%c%c ", rmo, syn);
1812 ppHRegARM64(i->ARM64in.VCvtF2I.rD);
1813 vex_printf("(%c-reg), ", iszB == 4 ? 'W' : 'X');
1814 ppHRegARM64(i->ARM64in.VCvtF2I.rS);
1815 vex_printf("(%c-reg)", fszB == 4 ? 'S' : 'D');
1816 return;
1818 case ARM64in_VCvtSD:
1819 vex_printf("fcvt%s ", i->ARM64in.VCvtSD.sToD ? "s2d" : "d2s");
1820 if (i->ARM64in.VCvtSD.sToD) {
1821 ppHRegARM64(i->ARM64in.VCvtSD.dst);
1822 vex_printf(", ");
1823 ppHRegARM64asSreg(i->ARM64in.VCvtSD.src);
1824 } else {
1825 ppHRegARM64asSreg(i->ARM64in.VCvtSD.dst);
1826 vex_printf(", ");
1827 ppHRegARM64(i->ARM64in.VCvtSD.src);
1829 return;
1830 case ARM64in_VCvtHS:
1831 vex_printf("fcvt%s ", i->ARM64in.VCvtHS.hToS ? "h2s" : "s2h");
1832 if (i->ARM64in.VCvtHS.hToS) {
1833 ppHRegARM64asSreg(i->ARM64in.VCvtHS.dst);
1834 vex_printf(", ");
1835 ppHRegARM64asHreg(i->ARM64in.VCvtHS.src);
1836 } else {
1837 ppHRegARM64asHreg(i->ARM64in.VCvtHS.dst);
1838 vex_printf(", ");
1839 ppHRegARM64asSreg(i->ARM64in.VCvtHS.src);
1841 return;
1842 case ARM64in_VCvtHD:
1843 vex_printf("fcvt%s ", i->ARM64in.VCvtHD.hToD ? "h2d" : "d2h");
1844 if (i->ARM64in.VCvtHD.hToD) {
1845 ppHRegARM64(i->ARM64in.VCvtHD.dst);
1846 vex_printf(", ");
1847 ppHRegARM64asHreg(i->ARM64in.VCvtHD.src);
1848 } else {
1849 ppHRegARM64asHreg(i->ARM64in.VCvtHD.dst);
1850 vex_printf(", ");
1851 ppHRegARM64(i->ARM64in.VCvtHD.src);
1853 return;
1854 case ARM64in_VUnaryD:
1855 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryD.op));
1856 ppHRegARM64(i->ARM64in.VUnaryD.dst);
1857 vex_printf(", ");
1858 ppHRegARM64(i->ARM64in.VUnaryD.src);
1859 return;
1860 case ARM64in_VUnaryS:
1861 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryS.op));
1862 ppHRegARM64asSreg(i->ARM64in.VUnaryS.dst);
1863 vex_printf(", ");
1864 ppHRegARM64asSreg(i->ARM64in.VUnaryS.src);
1865 return;
1866 case ARM64in_VUnaryH:
1867 vex_printf("f%s ", showARM64FpUnaryOp(i->ARM64in.VUnaryH.op));
1868 ppHRegARM64asHreg(i->ARM64in.VUnaryH.dst);
1869 vex_printf(", ");
1870 ppHRegARM64asHreg(i->ARM64in.VUnaryH.src);
1871 return;
1872 case ARM64in_VBinD:
1873 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinD.op));
1874 ppHRegARM64(i->ARM64in.VBinD.dst);
1875 vex_printf(", ");
1876 ppHRegARM64(i->ARM64in.VBinD.argL);
1877 vex_printf(", ");
1878 ppHRegARM64(i->ARM64in.VBinD.argR);
1879 return;
1880 case ARM64in_VBinS:
1881 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinS.op));
1882 ppHRegARM64asSreg(i->ARM64in.VBinS.dst);
1883 vex_printf(", ");
1884 ppHRegARM64asSreg(i->ARM64in.VBinS.argL);
1885 vex_printf(", ");
1886 ppHRegARM64asSreg(i->ARM64in.VBinS.argR);
1887 return;
1888 case ARM64in_VBinH:
1889 vex_printf("f%s ", showARM64FpBinOp(i->ARM64in.VBinH.op));
1890 ppHRegARM64asHreg(i->ARM64in.VBinH.dst);
1891 vex_printf(", ");
1892 ppHRegARM64asHreg(i->ARM64in.VBinH.argL);
1893 vex_printf(", ");
1894 ppHRegARM64asHreg(i->ARM64in.VBinH.argR);
1895 return;
1896 case ARM64in_VTriD:
1897 vex_printf("f%s ", showARM64FpTriOp(i->ARM64in.VTriD.op));
1898 ppHRegARM64(i->ARM64in.VTriD.dst);
1899 vex_printf(", ");
1900 ppHRegARM64(i->ARM64in.VTriD.arg1);
1901 vex_printf(", ");
1902 ppHRegARM64(i->ARM64in.VTriD.arg2);
1903 vex_printf(", ");
1904 ppHRegARM64(i->ARM64in.VTriD.arg3);
1905 return;
1906 case ARM64in_VTriS:
1907 vex_printf("f%s ", showARM64FpTriOp(i->ARM64in.VTriS.op));
1908 ppHRegARM64asSreg(i->ARM64in.VTriS.dst);
1909 vex_printf(", ");
1910 ppHRegARM64asSreg(i->ARM64in.VTriS.arg1);
1911 vex_printf(", ");
1912 ppHRegARM64asSreg(i->ARM64in.VTriS.arg2);
1913 vex_printf(", ");
1914 ppHRegARM64asSreg(i->ARM64in.VTriS.arg3);
1915 return;
1916 case ARM64in_VCmpD:
1917 vex_printf("fcmp ");
1918 ppHRegARM64(i->ARM64in.VCmpD.argL);
1919 vex_printf(", ");
1920 ppHRegARM64(i->ARM64in.VCmpD.argR);
1921 return;
1922 case ARM64in_VCmpS:
1923 vex_printf("fcmp ");
1924 ppHRegARM64asSreg(i->ARM64in.VCmpS.argL);
1925 vex_printf(", ");
1926 ppHRegARM64asSreg(i->ARM64in.VCmpS.argR);
1927 return;
1928 case ARM64in_VCmpH:
1929 vex_printf("fcmp ");
1930 ppHRegARM64asHreg(i->ARM64in.VCmpH.argL);
1931 vex_printf(", ");
1932 ppHRegARM64asHreg(i->ARM64in.VCmpH.argR);
1933 return;
1934 case ARM64in_VFCSel: {
1935 UInt (*ppHRegARM64fp)(HReg)
1936 = (i->ARM64in.VFCSel.isD ? ppHRegARM64 : ppHRegARM64asSreg);
1937 vex_printf("fcsel ");
1938 ppHRegARM64fp(i->ARM64in.VFCSel.dst);
1939 vex_printf(", ");
1940 ppHRegARM64fp(i->ARM64in.VFCSel.argL);
1941 vex_printf(", ");
1942 ppHRegARM64fp(i->ARM64in.VFCSel.argR);
1943 vex_printf(", %s", showARM64CondCode(i->ARM64in.VFCSel.cond));
1944 return;
1946 case ARM64in_FPCR:
1947 if (i->ARM64in.FPCR.toFPCR) {
1948 vex_printf("msr fpcr, ");
1949 ppHRegARM64(i->ARM64in.FPCR.iReg);
1950 } else {
1951 vex_printf("mrs ");
1952 ppHRegARM64(i->ARM64in.FPCR.iReg);
1953 vex_printf(", fpcr");
1955 return;
1956 case ARM64in_FPSR:
1957 if (i->ARM64in.FPSR.toFPSR) {
1958 vex_printf("msr fpsr, ");
1959 ppHRegARM64(i->ARM64in.FPSR.iReg);
1960 } else {
1961 vex_printf("mrs ");
1962 ppHRegARM64(i->ARM64in.FPSR.iReg);
1963 vex_printf(", fpsr");
1965 return;
1966 case ARM64in_VBinV: {
1967 const HChar* nm = "??";
1968 const HChar* ar = "??";
1969 showARM64VecBinOp(&nm, &ar, i->ARM64in.VBinV.op);
1970 vex_printf("%s ", nm);
1971 ppHRegARM64(i->ARM64in.VBinV.dst);
1972 vex_printf(".%s, ", ar);
1973 ppHRegARM64(i->ARM64in.VBinV.argL);
1974 vex_printf(".%s, ", ar);
1975 ppHRegARM64(i->ARM64in.VBinV.argR);
1976 vex_printf(".%s", ar);
1977 return;
1979 case ARM64in_VModifyV: {
1980 const HChar* nm = "??";
1981 const HChar* ar = "??";
1982 showARM64VecModifyOp(&nm, &ar, i->ARM64in.VModifyV.op);
1983 vex_printf("%s ", nm);
1984 ppHRegARM64(i->ARM64in.VModifyV.mod);
1985 vex_printf(".%s, ", ar);
1986 ppHRegARM64(i->ARM64in.VModifyV.arg);
1987 vex_printf(".%s", ar);
1988 return;
1990 case ARM64in_VUnaryV: {
1991 const HChar* nm = "??";
1992 const HChar* ar = "??";
1993 showARM64VecUnaryOp(&nm, &ar, i->ARM64in.VUnaryV.op);
1994 vex_printf("%s ", nm);
1995 ppHRegARM64(i->ARM64in.VUnaryV.dst);
1996 vex_printf(".%s, ", ar);
1997 ppHRegARM64(i->ARM64in.VUnaryV.arg);
1998 vex_printf(".%s", ar);
1999 return;
2001 case ARM64in_VNarrowV: {
2002 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
2003 const HChar* darr[3] = { "8b", "4h", "2s" };
2004 const HChar* sarr[3] = { "8h", "4s", "2d" };
2005 const HChar* nm = showARM64VecNarrowOp(i->ARM64in.VNarrowV.op);
2006 vex_printf("%s ", nm);
2007 ppHRegARM64(i->ARM64in.VNarrowV.dst);
2008 vex_printf(".%s, ", dszBlg2 < 3 ? darr[dszBlg2] : "??");
2009 ppHRegARM64(i->ARM64in.VNarrowV.src);
2010 vex_printf(".%s", dszBlg2 < 3 ? sarr[dszBlg2] : "??");
2011 return;
2013 case ARM64in_VShiftImmV: {
2014 const HChar* nm = "??";
2015 const HChar* ar = "??";
2016 showARM64VecShiftImmOp(&nm, &ar, i->ARM64in.VShiftImmV.op);
2017 vex_printf("%s ", nm);
2018 ppHRegARM64(i->ARM64in.VShiftImmV.dst);
2019 vex_printf(".%s, ", ar);
2020 ppHRegARM64(i->ARM64in.VShiftImmV.src);
2021 vex_printf(".%s, #%u", ar, i->ARM64in.VShiftImmV.amt);
2022 return;
2024 case ARM64in_VExtV: {
2025 vex_printf("ext ");
2026 ppHRegARM64(i->ARM64in.VExtV.dst);
2027 vex_printf(".16b, ");
2028 ppHRegARM64(i->ARM64in.VExtV.srcLo);
2029 vex_printf(".16b, ");
2030 ppHRegARM64(i->ARM64in.VExtV.srcHi);
2031 vex_printf(".16b, #%u", i->ARM64in.VExtV.amtB);
2032 return;
2034 case ARM64in_VImmQ:
2035 vex_printf("qimm ");
2036 ppHRegARM64(i->ARM64in.VImmQ.rQ);
2037 vex_printf(", Bits16toBytes16(0x%x)", (UInt)i->ARM64in.VImmQ.imm);
2038 return;
2039 case ARM64in_VDfromX:
2040 vex_printf("fmov ");
2041 ppHRegARM64(i->ARM64in.VDfromX.rD);
2042 vex_printf(", ");
2043 ppHRegARM64(i->ARM64in.VDfromX.rX);
2044 return;
2045 case ARM64in_VQfromX:
2046 vex_printf("fmov ");
2047 ppHRegARM64(i->ARM64in.VQfromX.rQ);
2048 vex_printf(".d[0], ");
2049 ppHRegARM64(i->ARM64in.VQfromX.rXlo);
2050 return;
2051 case ARM64in_VQfromXX:
2052 vex_printf("qFromXX ");
2053 ppHRegARM64(i->ARM64in.VQfromXX.rQ);
2054 vex_printf(", ");
2055 ppHRegARM64(i->ARM64in.VQfromXX.rXhi);
2056 vex_printf(", ");
2057 ppHRegARM64(i->ARM64in.VQfromXX.rXlo);
2058 return;
2059 case ARM64in_VXfromQ:
2060 vex_printf("fmov ");
2061 ppHRegARM64(i->ARM64in.VXfromQ.rX);
2062 vex_printf(", ");
2063 ppHRegARM64(i->ARM64in.VXfromQ.rQ);
2064 vex_printf(".d[%u]", i->ARM64in.VXfromQ.laneNo);
2065 return;
2066 case ARM64in_VXfromDorS:
2067 vex_printf("fmov ");
2068 ppHRegARM64(i->ARM64in.VXfromDorS.rX);
2069 vex_printf("(%c-reg), ", i->ARM64in.VXfromDorS.fromD ? 'X':'W');
2070 ppHRegARM64(i->ARM64in.VXfromDorS.rDorS);
2071 vex_printf("(%c-reg)", i->ARM64in.VXfromDorS.fromD ? 'D' : 'S');
2072 return;
2073 case ARM64in_VMov: {
2074 UChar aux = '?';
2075 switch (i->ARM64in.VMov.szB) {
2076 case 16: aux = 'q'; break;
2077 case 8: aux = 'd'; break;
2078 case 4: aux = 's'; break;
2079 default: break;
2081 vex_printf("mov(%c) ", aux);
2082 ppHRegARM64(i->ARM64in.VMov.dst);
2083 vex_printf(", ");
2084 ppHRegARM64(i->ARM64in.VMov.src);
2085 return;
2087 case ARM64in_EvCheck:
2088 vex_printf("(evCheck) ldr w9,");
2089 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
2090 vex_printf("; subs w9,w9,$1; str w9,");
2091 ppARM64AMode(i->ARM64in.EvCheck.amCounter);
2092 vex_printf("; bpl nofail; ldr x9,");
2093 ppARM64AMode(i->ARM64in.EvCheck.amFailAddr);
2094 vex_printf("; br x9; nofail:");
2095 return;
2096 case ARM64in_ProfInc:
2097 vex_printf("(profInc) imm64-fixed4 x9,$NotKnownYet; "
2098 "ldr x8,[x9]; add x8,x8,#1, str x8,[x9]");
2099 return;
2100 default:
2101 vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int)i->tag);
2102 vpanic("ppARM64Instr(1)");
2103 return;
2108 /* --------- Helpers for register allocation. --------- */
2110 void getRegUsage_ARM64Instr ( HRegUsage* u, const ARM64Instr* i, Bool mode64 )
2112 vassert(mode64 == True);
2113 initHRegUsage(u);
2114 switch (i->tag) {
2115 case ARM64in_Arith:
2116 addHRegUse(u, HRmWrite, i->ARM64in.Arith.dst);
2117 addHRegUse(u, HRmRead, i->ARM64in.Arith.argL);
2118 addRegUsage_ARM64RIA(u, i->ARM64in.Arith.argR);
2119 return;
2120 case ARM64in_Cmp:
2121 addHRegUse(u, HRmRead, i->ARM64in.Cmp.argL);
2122 addRegUsage_ARM64RIA(u, i->ARM64in.Cmp.argR);
2123 return;
2124 case ARM64in_Logic:
2125 addHRegUse(u, HRmWrite, i->ARM64in.Logic.dst);
2126 addHRegUse(u, HRmRead, i->ARM64in.Logic.argL);
2127 addRegUsage_ARM64RIL(u, i->ARM64in.Logic.argR);
2128 return;
2129 case ARM64in_RRS:
2130 addHRegUse(u, HRmWrite, i->ARM64in.RRS.dst);
2131 addHRegUse(u, HRmRead, i->ARM64in.RRS.argL);
2132 addHRegUse(u, HRmRead, i->ARM64in.RRS.argR);
2133 return;
2134 case ARM64in_Test:
2135 addHRegUse(u, HRmRead, i->ARM64in.Test.argL);
2136 addRegUsage_ARM64RIL(u, i->ARM64in.Test.argR);
2137 return;
2138 case ARM64in_Shift:
2139 addHRegUse(u, HRmWrite, i->ARM64in.Shift.dst);
2140 addHRegUse(u, HRmRead, i->ARM64in.Shift.argL);
2141 addRegUsage_ARM64RI6(u, i->ARM64in.Shift.argR);
2142 return;
2143 case ARM64in_Unary:
2144 addHRegUse(u, HRmWrite, i->ARM64in.Unary.dst);
2145 addHRegUse(u, HRmRead, i->ARM64in.Unary.src);
2146 return;
2147 case ARM64in_Set64:
2148 addHRegUse(u, HRmWrite, i->ARM64in.Set64.dst);
2149 return;
2150 case ARM64in_MovI:
2151 addHRegUse(u, HRmWrite, i->ARM64in.MovI.dst);
2152 addHRegUse(u, HRmRead, i->ARM64in.MovI.src);
2153 u->isRegRegMove = True;
2154 u->regMoveSrc = i->ARM64in.MovI.src;
2155 u->regMoveDst = i->ARM64in.MovI.dst;
2156 return;
2157 case ARM64in_Imm64:
2158 addHRegUse(u, HRmWrite, i->ARM64in.Imm64.dst);
2159 return;
2160 case ARM64in_LdSt64:
2161 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt64.amode);
2162 if (i->ARM64in.LdSt64.isLoad) {
2163 addHRegUse(u, HRmWrite, i->ARM64in.LdSt64.rD);
2164 } else {
2165 addHRegUse(u, HRmRead, i->ARM64in.LdSt64.rD);
2167 return;
2168 case ARM64in_LdSt32:
2169 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt32.amode);
2170 if (i->ARM64in.LdSt32.isLoad) {
2171 addHRegUse(u, HRmWrite, i->ARM64in.LdSt32.rD);
2172 } else {
2173 addHRegUse(u, HRmRead, i->ARM64in.LdSt32.rD);
2175 return;
2176 case ARM64in_LdSt16:
2177 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt16.amode);
2178 if (i->ARM64in.LdSt16.isLoad) {
2179 addHRegUse(u, HRmWrite, i->ARM64in.LdSt16.rD);
2180 } else {
2181 addHRegUse(u, HRmRead, i->ARM64in.LdSt16.rD);
2183 return;
2184 case ARM64in_LdSt8:
2185 addRegUsage_ARM64AMode(u, i->ARM64in.LdSt8.amode);
2186 if (i->ARM64in.LdSt8.isLoad) {
2187 addHRegUse(u, HRmWrite, i->ARM64in.LdSt8.rD);
2188 } else {
2189 addHRegUse(u, HRmRead, i->ARM64in.LdSt8.rD);
2191 return;
2192 /* XDirect/XIndir/XAssisted are also a bit subtle. They
2193 conditionally exit the block. Hence we only need to list (1)
2194 the registers that they read, and (2) the registers that they
2195 write in the case where the block is not exited. (2) is
2196 empty, hence only (1) is relevant here. */
2197 case ARM64in_XDirect:
2198 addRegUsage_ARM64AMode(u, i->ARM64in.XDirect.amPC);
2199 return;
2200 case ARM64in_XIndir:
2201 addHRegUse(u, HRmRead, i->ARM64in.XIndir.dstGA);
2202 addRegUsage_ARM64AMode(u, i->ARM64in.XIndir.amPC);
2203 return;
2204 case ARM64in_XAssisted:
2205 addHRegUse(u, HRmRead, i->ARM64in.XAssisted.dstGA);
2206 addRegUsage_ARM64AMode(u, i->ARM64in.XAssisted.amPC);
2207 return;
2208 case ARM64in_CSel:
2209 addHRegUse(u, HRmWrite, i->ARM64in.CSel.dst);
2210 addHRegUse(u, HRmRead, i->ARM64in.CSel.argL);
2211 addHRegUse(u, HRmRead, i->ARM64in.CSel.argR);
2212 return;
2213 case ARM64in_Call:
2214 /* logic and comments copied/modified from x86 back end */
2215 /* This is a bit subtle. */
2216 /* First off, claim it trashes all the caller-saved regs
2217 which fall within the register allocator's jurisdiction.
2218 These I believe to be x0 to x7 and the 128-bit vector
2219 registers in use, q16 .. q20. */
2220 addHRegUse(u, HRmWrite, hregARM64_X0());
2221 addHRegUse(u, HRmWrite, hregARM64_X1());
2222 addHRegUse(u, HRmWrite, hregARM64_X2());
2223 addHRegUse(u, HRmWrite, hregARM64_X3());
2224 addHRegUse(u, HRmWrite, hregARM64_X4());
2225 addHRegUse(u, HRmWrite, hregARM64_X5());
2226 addHRegUse(u, HRmWrite, hregARM64_X6());
2227 addHRegUse(u, HRmWrite, hregARM64_X7());
2228 addHRegUse(u, HRmWrite, hregARM64_Q16());
2229 addHRegUse(u, HRmWrite, hregARM64_Q17());
2230 addHRegUse(u, HRmWrite, hregARM64_Q18());
2231 addHRegUse(u, HRmWrite, hregARM64_Q19());
2232 addHRegUse(u, HRmWrite, hregARM64_Q20());
2233 /* Now we have to state any parameter-carrying registers
2234 which might be read. This depends on nArgRegs. */
2235 switch (i->ARM64in.Call.nArgRegs) {
2236 case 8: addHRegUse(u, HRmRead, hregARM64_X7()); /*fallthru*/
2237 case 7: addHRegUse(u, HRmRead, hregARM64_X6()); /*fallthru*/
2238 case 6: addHRegUse(u, HRmRead, hregARM64_X5()); /*fallthru*/
2239 case 5: addHRegUse(u, HRmRead, hregARM64_X4()); /*fallthru*/
2240 case 4: addHRegUse(u, HRmRead, hregARM64_X3()); /*fallthru*/
2241 case 3: addHRegUse(u, HRmRead, hregARM64_X2()); /*fallthru*/
2242 case 2: addHRegUse(u, HRmRead, hregARM64_X1()); /*fallthru*/
2243 case 1: addHRegUse(u, HRmRead, hregARM64_X0()); break;
2244 case 0: break;
2245 default: vpanic("getRegUsage_ARM64:Call:regparms");
2247 /* Finally, there is the issue that the insn trashes a
2248 register because the literal target address has to be
2249 loaded into a register. However, we reserve x9 for that
2250 purpose so there's no further complexity here. Stating x9
2251 as trashed is pointless since it's not under the control
2252 of the allocator, but what the hell. */
2253 addHRegUse(u, HRmWrite, hregARM64_X9());
2254 return;
2255 case ARM64in_AddToSP:
2256 /* Only changes SP, but regalloc doesn't control that, hence
2257 we don't care. */
2258 return;
2259 case ARM64in_FromSP:
2260 addHRegUse(u, HRmWrite, i->ARM64in.FromSP.dst);
2261 return;
2262 case ARM64in_Mul:
2263 addHRegUse(u, HRmWrite, i->ARM64in.Mul.dst);
2264 addHRegUse(u, HRmRead, i->ARM64in.Mul.argL);
2265 addHRegUse(u, HRmRead, i->ARM64in.Mul.argR);
2266 return;
2267 case ARM64in_LdrEX:
2268 addHRegUse(u, HRmRead, hregARM64_X4());
2269 addHRegUse(u, HRmWrite, hregARM64_X2());
2270 return;
2271 case ARM64in_StrEX:
2272 addHRegUse(u, HRmRead, hregARM64_X4());
2273 addHRegUse(u, HRmWrite, hregARM64_X0());
2274 addHRegUse(u, HRmRead, hregARM64_X2());
2275 return;
2276 case ARM64in_LdrEXP:
2277 addHRegUse(u, HRmRead, hregARM64_X4());
2278 addHRegUse(u, HRmWrite, hregARM64_X2());
2279 addHRegUse(u, HRmWrite, hregARM64_X3());
2280 return;
2281 case ARM64in_StrEXP:
2282 addHRegUse(u, HRmRead, hregARM64_X4());
2283 addHRegUse(u, HRmWrite, hregARM64_X0());
2284 addHRegUse(u, HRmRead, hregARM64_X2());
2285 addHRegUse(u, HRmRead, hregARM64_X3());
2286 return;
2287 case ARM64in_CAS:
2288 addHRegUse(u, HRmRead, hregARM64_X3());
2289 addHRegUse(u, HRmRead, hregARM64_X5());
2290 addHRegUse(u, HRmRead, hregARM64_X7());
2291 addHRegUse(u, HRmWrite, hregARM64_X1());
2292 /* Pointless to state this since X8 is not available to RA. */
2293 addHRegUse(u, HRmWrite, hregARM64_X8());
2294 break;
2295 case ARM64in_CASP:
2296 addHRegUse(u, HRmRead, hregARM64_X2());
2297 addHRegUse(u, HRmRead, hregARM64_X4());
2298 addHRegUse(u, HRmRead, hregARM64_X5());
2299 addHRegUse(u, HRmRead, hregARM64_X6());
2300 addHRegUse(u, HRmRead, hregARM64_X7());
2301 addHRegUse(u, HRmWrite, hregARM64_X0());
2302 addHRegUse(u, HRmWrite, hregARM64_X1());
2303 addHRegUse(u, HRmWrite, hregARM64_X9());
2304 addHRegUse(u, HRmWrite, hregARM64_X8());
2305 addHRegUse(u, HRmWrite, hregARM64_X3());
2306 break;
2307 case ARM64in_MFence:
2308 return;
2309 case ARM64in_ClrEX:
2310 return;
2311 case ARM64in_VLdStH:
2312 addHRegUse(u, HRmRead, i->ARM64in.VLdStH.rN);
2313 if (i->ARM64in.VLdStH.isLoad) {
2314 addHRegUse(u, HRmWrite, i->ARM64in.VLdStH.hD);
2315 } else {
2316 addHRegUse(u, HRmRead, i->ARM64in.VLdStH.hD);
2318 return;
2319 case ARM64in_VLdStS:
2320 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.rN);
2321 if (i->ARM64in.VLdStS.isLoad) {
2322 addHRegUse(u, HRmWrite, i->ARM64in.VLdStS.sD);
2323 } else {
2324 addHRegUse(u, HRmRead, i->ARM64in.VLdStS.sD);
2326 return;
2327 case ARM64in_VLdStD:
2328 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.rN);
2329 if (i->ARM64in.VLdStD.isLoad) {
2330 addHRegUse(u, HRmWrite, i->ARM64in.VLdStD.dD);
2331 } else {
2332 addHRegUse(u, HRmRead, i->ARM64in.VLdStD.dD);
2334 return;
2335 case ARM64in_VLdStQ:
2336 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rN);
2337 if (i->ARM64in.VLdStQ.isLoad)
2338 addHRegUse(u, HRmWrite, i->ARM64in.VLdStQ.rQ);
2339 else
2340 addHRegUse(u, HRmRead, i->ARM64in.VLdStQ.rQ);
2341 return;
2342 case ARM64in_VCvtI2F:
2343 addHRegUse(u, HRmRead, i->ARM64in.VCvtI2F.rS);
2344 addHRegUse(u, HRmWrite, i->ARM64in.VCvtI2F.rD);
2345 return;
2346 case ARM64in_VCvtF2I:
2347 addHRegUse(u, HRmRead, i->ARM64in.VCvtF2I.rS);
2348 addHRegUse(u, HRmWrite, i->ARM64in.VCvtF2I.rD);
2349 return;
2350 case ARM64in_VCvtSD:
2351 addHRegUse(u, HRmWrite, i->ARM64in.VCvtSD.dst);
2352 addHRegUse(u, HRmRead, i->ARM64in.VCvtSD.src);
2353 return;
2354 case ARM64in_VCvtHS:
2355 addHRegUse(u, HRmWrite, i->ARM64in.VCvtHS.dst);
2356 addHRegUse(u, HRmRead, i->ARM64in.VCvtHS.src);
2357 return;
2358 case ARM64in_VCvtHD:
2359 addHRegUse(u, HRmWrite, i->ARM64in.VCvtHD.dst);
2360 addHRegUse(u, HRmRead, i->ARM64in.VCvtHD.src);
2361 return;
2362 case ARM64in_VUnaryD:
2363 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryD.dst);
2364 addHRegUse(u, HRmRead, i->ARM64in.VUnaryD.src);
2365 return;
2366 case ARM64in_VUnaryS:
2367 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryS.dst);
2368 addHRegUse(u, HRmRead, i->ARM64in.VUnaryS.src);
2369 return;
2370 case ARM64in_VUnaryH:
2371 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryH.dst);
2372 addHRegUse(u, HRmRead, i->ARM64in.VUnaryH.src);
2373 return;
2374 case ARM64in_VBinD:
2375 addHRegUse(u, HRmWrite, i->ARM64in.VBinD.dst);
2376 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argL);
2377 addHRegUse(u, HRmRead, i->ARM64in.VBinD.argR);
2378 return;
2379 case ARM64in_VBinS:
2380 addHRegUse(u, HRmWrite, i->ARM64in.VBinS.dst);
2381 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argL);
2382 addHRegUse(u, HRmRead, i->ARM64in.VBinS.argR);
2383 return;
2384 case ARM64in_VBinH:
2385 addHRegUse(u, HRmWrite, i->ARM64in.VBinH.dst);
2386 addHRegUse(u, HRmRead, i->ARM64in.VBinH.argL);
2387 addHRegUse(u, HRmRead, i->ARM64in.VBinH.argR);
2388 return;
2389 case ARM64in_VTriD:
2390 addHRegUse(u, HRmWrite, i->ARM64in.VTriD.dst);
2391 addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg1);
2392 addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg2);
2393 addHRegUse(u, HRmRead, i->ARM64in.VTriD.arg3);
2394 return;
2395 case ARM64in_VTriS:
2396 addHRegUse(u, HRmWrite, i->ARM64in.VTriS.dst);
2397 addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg1);
2398 addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg2);
2399 addHRegUse(u, HRmRead, i->ARM64in.VTriS.arg3);
2400 return;
2401 case ARM64in_VCmpD:
2402 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argL);
2403 addHRegUse(u, HRmRead, i->ARM64in.VCmpD.argR);
2404 return;
2405 case ARM64in_VCmpS:
2406 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argL);
2407 addHRegUse(u, HRmRead, i->ARM64in.VCmpS.argR);
2408 return;
2409 case ARM64in_VCmpH:
2410 addHRegUse(u, HRmRead, i->ARM64in.VCmpH.argL);
2411 addHRegUse(u, HRmRead, i->ARM64in.VCmpH.argR);
2412 return;
2413 case ARM64in_VFCSel:
2414 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argL);
2415 addHRegUse(u, HRmRead, i->ARM64in.VFCSel.argR);
2416 addHRegUse(u, HRmWrite, i->ARM64in.VFCSel.dst);
2417 return;
2418 case ARM64in_FPCR:
2419 if (i->ARM64in.FPCR.toFPCR)
2420 addHRegUse(u, HRmRead, i->ARM64in.FPCR.iReg);
2421 else
2422 addHRegUse(u, HRmWrite, i->ARM64in.FPCR.iReg);
2423 return;
2424 case ARM64in_FPSR:
2425 if (i->ARM64in.FPSR.toFPSR)
2426 addHRegUse(u, HRmRead, i->ARM64in.FPSR.iReg);
2427 else
2428 addHRegUse(u, HRmWrite, i->ARM64in.FPSR.iReg);
2429 return;
2430 case ARM64in_VBinV:
2431 addHRegUse(u, HRmWrite, i->ARM64in.VBinV.dst);
2432 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argL);
2433 addHRegUse(u, HRmRead, i->ARM64in.VBinV.argR);
2434 return;
2435 case ARM64in_VModifyV:
2436 addHRegUse(u, HRmWrite, i->ARM64in.VModifyV.mod);
2437 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.mod);
2438 addHRegUse(u, HRmRead, i->ARM64in.VModifyV.arg);
2439 return;
2440 case ARM64in_VUnaryV:
2441 addHRegUse(u, HRmWrite, i->ARM64in.VUnaryV.dst);
2442 addHRegUse(u, HRmRead, i->ARM64in.VUnaryV.arg);
2443 return;
2444 case ARM64in_VNarrowV:
2445 addHRegUse(u, HRmWrite, i->ARM64in.VNarrowV.dst);
2446 addHRegUse(u, HRmRead, i->ARM64in.VNarrowV.src);
2447 return;
2448 case ARM64in_VShiftImmV:
2449 addHRegUse(u, HRmWrite, i->ARM64in.VShiftImmV.dst);
2450 addHRegUse(u, HRmRead, i->ARM64in.VShiftImmV.src);
2451 return;
2452 case ARM64in_VExtV:
2453 addHRegUse(u, HRmWrite, i->ARM64in.VExtV.dst);
2454 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcLo);
2455 addHRegUse(u, HRmRead, i->ARM64in.VExtV.srcHi);
2456 return;
2457 case ARM64in_VImmQ:
2458 addHRegUse(u, HRmWrite, i->ARM64in.VImmQ.rQ);
2459 return;
2460 case ARM64in_VDfromX:
2461 addHRegUse(u, HRmWrite, i->ARM64in.VDfromX.rD);
2462 addHRegUse(u, HRmRead, i->ARM64in.VDfromX.rX);
2463 return;
2464 case ARM64in_VQfromX:
2465 addHRegUse(u, HRmWrite, i->ARM64in.VQfromX.rQ);
2466 addHRegUse(u, HRmRead, i->ARM64in.VQfromX.rXlo);
2467 return;
2468 case ARM64in_VQfromXX:
2469 addHRegUse(u, HRmWrite, i->ARM64in.VQfromXX.rQ);
2470 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXhi);
2471 addHRegUse(u, HRmRead, i->ARM64in.VQfromXX.rXlo);
2472 return;
2473 case ARM64in_VXfromQ:
2474 addHRegUse(u, HRmWrite, i->ARM64in.VXfromQ.rX);
2475 addHRegUse(u, HRmRead, i->ARM64in.VXfromQ.rQ);
2476 return;
2477 case ARM64in_VXfromDorS:
2478 addHRegUse(u, HRmWrite, i->ARM64in.VXfromDorS.rX);
2479 addHRegUse(u, HRmRead, i->ARM64in.VXfromDorS.rDorS);
2480 return;
2481 case ARM64in_VMov:
2482 addHRegUse(u, HRmWrite, i->ARM64in.VMov.dst);
2483 addHRegUse(u, HRmRead, i->ARM64in.VMov.src);
2484 u->isRegRegMove = True;
2485 u->regMoveSrc = i->ARM64in.VMov.src;
2486 u->regMoveDst = i->ARM64in.VMov.dst;
2487 return;
2488 case ARM64in_EvCheck:
2489 /* We expect both amodes only to mention x21, so this is in
2490 fact pointless, since x21 isn't allocatable, but
2491 anyway.. */
2492 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amCounter);
2493 addRegUsage_ARM64AMode(u, i->ARM64in.EvCheck.amFailAddr);
2494 addHRegUse(u, HRmWrite, hregARM64_X9()); /* also unavail to RA */
2495 return;
2496 case ARM64in_ProfInc:
2497 /* Again, pointless to actually state these since neither
2498 is available to RA. */
2499 addHRegUse(u, HRmWrite, hregARM64_X9()); /* unavail to RA */
2500 addHRegUse(u, HRmWrite, hregARM64_X8()); /* unavail to RA */
2501 return;
2502 default:
2503 ppARM64Instr(i);
2504 vpanic("getRegUsage_ARM64Instr");
2509 void mapRegs_ARM64Instr ( HRegRemap* m, ARM64Instr* i, Bool mode64 )
2511 vassert(mode64 == True);
2512 switch (i->tag) {
2513 case ARM64in_Arith:
2514 i->ARM64in.Arith.dst = lookupHRegRemap(m, i->ARM64in.Arith.dst);
2515 i->ARM64in.Arith.argL = lookupHRegRemap(m, i->ARM64in.Arith.argL);
2516 mapRegs_ARM64RIA(m, i->ARM64in.Arith.argR);
2517 return;
2518 case ARM64in_Cmp:
2519 i->ARM64in.Cmp.argL = lookupHRegRemap(m, i->ARM64in.Cmp.argL);
2520 mapRegs_ARM64RIA(m, i->ARM64in.Cmp.argR);
2521 return;
2522 case ARM64in_Logic:
2523 i->ARM64in.Logic.dst = lookupHRegRemap(m, i->ARM64in.Logic.dst);
2524 i->ARM64in.Logic.argL = lookupHRegRemap(m, i->ARM64in.Logic.argL);
2525 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2526 return;
2527 case ARM64in_RRS:
2528 i->ARM64in.RRS.dst = lookupHRegRemap(m, i->ARM64in.RRS.dst);
2529 i->ARM64in.RRS.argL = lookupHRegRemap(m, i->ARM64in.RRS.argL);
2530 i->ARM64in.RRS.argR = lookupHRegRemap(m, i->ARM64in.RRS.argR);
2531 return;
2532 case ARM64in_Test:
2533 i->ARM64in.Test.argL = lookupHRegRemap(m, i->ARM64in.Test.argL);
2534 mapRegs_ARM64RIL(m, i->ARM64in.Logic.argR);
2535 return;
2536 case ARM64in_Shift:
2537 i->ARM64in.Shift.dst = lookupHRegRemap(m, i->ARM64in.Shift.dst);
2538 i->ARM64in.Shift.argL = lookupHRegRemap(m, i->ARM64in.Shift.argL);
2539 mapRegs_ARM64RI6(m, i->ARM64in.Shift.argR);
2540 return;
2541 case ARM64in_Unary:
2542 i->ARM64in.Unary.dst = lookupHRegRemap(m, i->ARM64in.Unary.dst);
2543 i->ARM64in.Unary.src = lookupHRegRemap(m, i->ARM64in.Unary.src);
2544 return;
2545 case ARM64in_Set64:
2546 i->ARM64in.Set64.dst = lookupHRegRemap(m, i->ARM64in.Set64.dst);
2547 return;
2548 case ARM64in_MovI:
2549 i->ARM64in.MovI.dst = lookupHRegRemap(m, i->ARM64in.MovI.dst);
2550 i->ARM64in.MovI.src = lookupHRegRemap(m, i->ARM64in.MovI.src);
2551 return;
2552 case ARM64in_Imm64:
2553 i->ARM64in.Imm64.dst = lookupHRegRemap(m, i->ARM64in.Imm64.dst);
2554 return;
2555 case ARM64in_LdSt64:
2556 i->ARM64in.LdSt64.rD = lookupHRegRemap(m, i->ARM64in.LdSt64.rD);
2557 mapRegs_ARM64AMode(m, i->ARM64in.LdSt64.amode);
2558 return;
2559 case ARM64in_LdSt32:
2560 i->ARM64in.LdSt32.rD = lookupHRegRemap(m, i->ARM64in.LdSt32.rD);
2561 mapRegs_ARM64AMode(m, i->ARM64in.LdSt32.amode);
2562 return;
2563 case ARM64in_LdSt16:
2564 i->ARM64in.LdSt16.rD = lookupHRegRemap(m, i->ARM64in.LdSt16.rD);
2565 mapRegs_ARM64AMode(m, i->ARM64in.LdSt16.amode);
2566 return;
2567 case ARM64in_LdSt8:
2568 i->ARM64in.LdSt8.rD = lookupHRegRemap(m, i->ARM64in.LdSt8.rD);
2569 mapRegs_ARM64AMode(m, i->ARM64in.LdSt8.amode);
2570 return;
2571 case ARM64in_XDirect:
2572 mapRegs_ARM64AMode(m, i->ARM64in.XDirect.amPC);
2573 return;
2574 case ARM64in_XIndir:
2575 i->ARM64in.XIndir.dstGA
2576 = lookupHRegRemap(m, i->ARM64in.XIndir.dstGA);
2577 mapRegs_ARM64AMode(m, i->ARM64in.XIndir.amPC);
2578 return;
2579 case ARM64in_XAssisted:
2580 i->ARM64in.XAssisted.dstGA
2581 = lookupHRegRemap(m, i->ARM64in.XAssisted.dstGA);
2582 mapRegs_ARM64AMode(m, i->ARM64in.XAssisted.amPC);
2583 return;
2584 case ARM64in_CSel:
2585 i->ARM64in.CSel.dst = lookupHRegRemap(m, i->ARM64in.CSel.dst);
2586 i->ARM64in.CSel.argL = lookupHRegRemap(m, i->ARM64in.CSel.argL);
2587 i->ARM64in.CSel.argR = lookupHRegRemap(m, i->ARM64in.CSel.argR);
2588 return;
2589 case ARM64in_Call:
2590 return;
2591 case ARM64in_AddToSP:
2592 return;
2593 case ARM64in_FromSP:
2594 i->ARM64in.FromSP.dst = lookupHRegRemap(m, i->ARM64in.FromSP.dst);
2595 return;
2596 case ARM64in_Mul:
2597 i->ARM64in.Mul.dst = lookupHRegRemap(m, i->ARM64in.Mul.dst);
2598 i->ARM64in.Mul.argL = lookupHRegRemap(m, i->ARM64in.Mul.argL);
2599 i->ARM64in.Mul.argR = lookupHRegRemap(m, i->ARM64in.Mul.argR);
2600 break;
2601 case ARM64in_LdrEX:
2602 return;
2603 case ARM64in_StrEX:
2604 return;
2605 case ARM64in_LdrEXP:
2606 return;
2607 case ARM64in_StrEXP:
2608 return;
2609 case ARM64in_CAS:
2610 return;
2611 case ARM64in_CASP:
2612 return;
2613 case ARM64in_MFence:
2614 return;
2615 case ARM64in_ClrEX:
2616 return;
2617 case ARM64in_VLdStH:
2618 i->ARM64in.VLdStH.hD = lookupHRegRemap(m, i->ARM64in.VLdStH.hD);
2619 i->ARM64in.VLdStH.rN = lookupHRegRemap(m, i->ARM64in.VLdStH.rN);
2620 return;
2621 case ARM64in_VLdStS:
2622 i->ARM64in.VLdStS.sD = lookupHRegRemap(m, i->ARM64in.VLdStS.sD);
2623 i->ARM64in.VLdStS.rN = lookupHRegRemap(m, i->ARM64in.VLdStS.rN);
2624 return;
2625 case ARM64in_VLdStD:
2626 i->ARM64in.VLdStD.dD = lookupHRegRemap(m, i->ARM64in.VLdStD.dD);
2627 i->ARM64in.VLdStD.rN = lookupHRegRemap(m, i->ARM64in.VLdStD.rN);
2628 return;
2629 case ARM64in_VLdStQ:
2630 i->ARM64in.VLdStQ.rQ = lookupHRegRemap(m, i->ARM64in.VLdStQ.rQ);
2631 i->ARM64in.VLdStQ.rN = lookupHRegRemap(m, i->ARM64in.VLdStQ.rN);
2632 return;
2633 case ARM64in_VCvtI2F:
2634 i->ARM64in.VCvtI2F.rS = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rS);
2635 i->ARM64in.VCvtI2F.rD = lookupHRegRemap(m, i->ARM64in.VCvtI2F.rD);
2636 return;
2637 case ARM64in_VCvtF2I:
2638 i->ARM64in.VCvtF2I.rS = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rS);
2639 i->ARM64in.VCvtF2I.rD = lookupHRegRemap(m, i->ARM64in.VCvtF2I.rD);
2640 return;
2641 case ARM64in_VCvtSD:
2642 i->ARM64in.VCvtSD.dst = lookupHRegRemap(m, i->ARM64in.VCvtSD.dst);
2643 i->ARM64in.VCvtSD.src = lookupHRegRemap(m, i->ARM64in.VCvtSD.src);
2644 return;
2645 case ARM64in_VCvtHS:
2646 i->ARM64in.VCvtHS.dst = lookupHRegRemap(m, i->ARM64in.VCvtHS.dst);
2647 i->ARM64in.VCvtHS.src = lookupHRegRemap(m, i->ARM64in.VCvtHS.src);
2648 return;
2649 case ARM64in_VCvtHD:
2650 i->ARM64in.VCvtHD.dst = lookupHRegRemap(m, i->ARM64in.VCvtHD.dst);
2651 i->ARM64in.VCvtHD.src = lookupHRegRemap(m, i->ARM64in.VCvtHD.src);
2652 return;
2653 case ARM64in_VUnaryD:
2654 i->ARM64in.VUnaryD.dst = lookupHRegRemap(m, i->ARM64in.VUnaryD.dst);
2655 i->ARM64in.VUnaryD.src = lookupHRegRemap(m, i->ARM64in.VUnaryD.src);
2656 return;
2657 case ARM64in_VUnaryS:
2658 i->ARM64in.VUnaryS.dst = lookupHRegRemap(m, i->ARM64in.VUnaryS.dst);
2659 i->ARM64in.VUnaryS.src = lookupHRegRemap(m, i->ARM64in.VUnaryS.src);
2660 return;
2661 case ARM64in_VUnaryH:
2662 i->ARM64in.VUnaryH.dst = lookupHRegRemap(m, i->ARM64in.VUnaryH.dst);
2663 i->ARM64in.VUnaryH.src = lookupHRegRemap(m, i->ARM64in.VUnaryH.src);
2664 return;
2665 case ARM64in_VBinD:
2666 i->ARM64in.VBinD.dst = lookupHRegRemap(m, i->ARM64in.VBinD.dst);
2667 i->ARM64in.VBinD.argL = lookupHRegRemap(m, i->ARM64in.VBinD.argL);
2668 i->ARM64in.VBinD.argR = lookupHRegRemap(m, i->ARM64in.VBinD.argR);
2669 return;
2670 case ARM64in_VBinS:
2671 i->ARM64in.VBinS.dst = lookupHRegRemap(m, i->ARM64in.VBinS.dst);
2672 i->ARM64in.VBinS.argL = lookupHRegRemap(m, i->ARM64in.VBinS.argL);
2673 i->ARM64in.VBinS.argR = lookupHRegRemap(m, i->ARM64in.VBinS.argR);
2674 return;
2675 case ARM64in_VBinH:
2676 i->ARM64in.VBinH.dst = lookupHRegRemap(m, i->ARM64in.VBinH.dst);
2677 i->ARM64in.VBinH.argL = lookupHRegRemap(m, i->ARM64in.VBinH.argL);
2678 i->ARM64in.VBinH.argR = lookupHRegRemap(m, i->ARM64in.VBinH.argR);
2679 return;
2680 case ARM64in_VTriD:
2681 i->ARM64in.VTriD.dst = lookupHRegRemap(m, i->ARM64in.VTriD.dst);
2682 i->ARM64in.VTriD.arg1 = lookupHRegRemap(m, i->ARM64in.VTriD.arg1);
2683 i->ARM64in.VTriD.arg2 = lookupHRegRemap(m, i->ARM64in.VTriD.arg2);
2684 i->ARM64in.VTriD.arg3 = lookupHRegRemap(m, i->ARM64in.VTriD.arg3);
2685 return;
2686 case ARM64in_VTriS:
2687 i->ARM64in.VTriS.dst = lookupHRegRemap(m, i->ARM64in.VTriS.dst);
2688 i->ARM64in.VTriS.arg1 = lookupHRegRemap(m, i->ARM64in.VTriS.arg1);
2689 i->ARM64in.VTriS.arg2 = lookupHRegRemap(m, i->ARM64in.VTriS.arg2);
2690 i->ARM64in.VTriS.arg3 = lookupHRegRemap(m, i->ARM64in.VTriS.arg3);
2691 return;
2692 case ARM64in_VCmpD:
2693 i->ARM64in.VCmpD.argL = lookupHRegRemap(m, i->ARM64in.VCmpD.argL);
2694 i->ARM64in.VCmpD.argR = lookupHRegRemap(m, i->ARM64in.VCmpD.argR);
2695 return;
2696 case ARM64in_VCmpS:
2697 i->ARM64in.VCmpS.argL = lookupHRegRemap(m, i->ARM64in.VCmpS.argL);
2698 i->ARM64in.VCmpS.argR = lookupHRegRemap(m, i->ARM64in.VCmpS.argR);
2699 return;
2700 case ARM64in_VCmpH:
2701 i->ARM64in.VCmpH.argL = lookupHRegRemap(m, i->ARM64in.VCmpH.argL);
2702 i->ARM64in.VCmpH.argR = lookupHRegRemap(m, i->ARM64in.VCmpH.argR);
2703 return;
2704 case ARM64in_VFCSel:
2705 i->ARM64in.VFCSel.argL = lookupHRegRemap(m, i->ARM64in.VFCSel.argL);
2706 i->ARM64in.VFCSel.argR = lookupHRegRemap(m, i->ARM64in.VFCSel.argR);
2707 i->ARM64in.VFCSel.dst = lookupHRegRemap(m, i->ARM64in.VFCSel.dst);
2708 return;
2709 case ARM64in_FPCR:
2710 i->ARM64in.FPCR.iReg = lookupHRegRemap(m, i->ARM64in.FPCR.iReg);
2711 return;
2712 case ARM64in_FPSR:
2713 i->ARM64in.FPSR.iReg = lookupHRegRemap(m, i->ARM64in.FPSR.iReg);
2714 return;
2715 case ARM64in_VBinV:
2716 i->ARM64in.VBinV.dst = lookupHRegRemap(m, i->ARM64in.VBinV.dst);
2717 i->ARM64in.VBinV.argL = lookupHRegRemap(m, i->ARM64in.VBinV.argL);
2718 i->ARM64in.VBinV.argR = lookupHRegRemap(m, i->ARM64in.VBinV.argR);
2719 return;
2720 case ARM64in_VModifyV:
2721 i->ARM64in.VModifyV.mod = lookupHRegRemap(m, i->ARM64in.VModifyV.mod);
2722 i->ARM64in.VModifyV.arg = lookupHRegRemap(m, i->ARM64in.VModifyV.arg);
2723 return;
2724 case ARM64in_VUnaryV:
2725 i->ARM64in.VUnaryV.dst = lookupHRegRemap(m, i->ARM64in.VUnaryV.dst);
2726 i->ARM64in.VUnaryV.arg = lookupHRegRemap(m, i->ARM64in.VUnaryV.arg);
2727 return;
2728 case ARM64in_VNarrowV:
2729 i->ARM64in.VNarrowV.dst = lookupHRegRemap(m, i->ARM64in.VNarrowV.dst);
2730 i->ARM64in.VNarrowV.src = lookupHRegRemap(m, i->ARM64in.VNarrowV.src);
2731 return;
2732 case ARM64in_VShiftImmV:
2733 i->ARM64in.VShiftImmV.dst
2734 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.dst);
2735 i->ARM64in.VShiftImmV.src
2736 = lookupHRegRemap(m, i->ARM64in.VShiftImmV.src);
2737 return;
2738 case ARM64in_VExtV:
2739 i->ARM64in.VExtV.dst = lookupHRegRemap(m, i->ARM64in.VExtV.dst);
2740 i->ARM64in.VExtV.srcLo = lookupHRegRemap(m, i->ARM64in.VExtV.srcLo);
2741 i->ARM64in.VExtV.srcHi = lookupHRegRemap(m, i->ARM64in.VExtV.srcHi);
2742 return;
2743 case ARM64in_VImmQ:
2744 i->ARM64in.VImmQ.rQ = lookupHRegRemap(m, i->ARM64in.VImmQ.rQ);
2745 return;
2746 case ARM64in_VDfromX:
2747 i->ARM64in.VDfromX.rD
2748 = lookupHRegRemap(m, i->ARM64in.VDfromX.rD);
2749 i->ARM64in.VDfromX.rX
2750 = lookupHRegRemap(m, i->ARM64in.VDfromX.rX);
2751 return;
2752 case ARM64in_VQfromX:
2753 i->ARM64in.VQfromX.rQ
2754 = lookupHRegRemap(m, i->ARM64in.VQfromX.rQ);
2755 i->ARM64in.VQfromX.rXlo
2756 = lookupHRegRemap(m, i->ARM64in.VQfromX.rXlo);
2757 return;
2758 case ARM64in_VQfromXX:
2759 i->ARM64in.VQfromXX.rQ
2760 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rQ);
2761 i->ARM64in.VQfromXX.rXhi
2762 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXhi);
2763 i->ARM64in.VQfromXX.rXlo
2764 = lookupHRegRemap(m, i->ARM64in.VQfromXX.rXlo);
2765 return;
2766 case ARM64in_VXfromQ:
2767 i->ARM64in.VXfromQ.rX
2768 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rX);
2769 i->ARM64in.VXfromQ.rQ
2770 = lookupHRegRemap(m, i->ARM64in.VXfromQ.rQ);
2771 return;
2772 case ARM64in_VXfromDorS:
2773 i->ARM64in.VXfromDorS.rX
2774 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rX);
2775 i->ARM64in.VXfromDorS.rDorS
2776 = lookupHRegRemap(m, i->ARM64in.VXfromDorS.rDorS);
2777 return;
2778 case ARM64in_VMov:
2779 i->ARM64in.VMov.dst = lookupHRegRemap(m, i->ARM64in.VMov.dst);
2780 i->ARM64in.VMov.src = lookupHRegRemap(m, i->ARM64in.VMov.src);
2781 return;
2782 case ARM64in_EvCheck:
2783 /* We expect both amodes only to mention x21, so this is in
2784 fact pointless, since x21 isn't allocatable, but
2785 anyway.. */
2786 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amCounter);
2787 mapRegs_ARM64AMode(m, i->ARM64in.EvCheck.amFailAddr);
2788 return;
2789 case ARM64in_ProfInc:
2790 /* hardwires x8 and x9 -- nothing to modify. */
2791 return;
2792 default:
2793 ppARM64Instr(i);
2794 vpanic("mapRegs_ARM64Instr");
2798 /* Generate arm spill/reload instructions under the direction of the
2799 register allocator. Note it's critical these don't write the
2800 condition codes. */
2802 void genSpill_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2803 HReg rreg, Int offsetB, Bool mode64 )
2805 HRegClass rclass;
2806 vassert(offsetB >= 0);
2807 vassert(!hregIsVirtual(rreg));
2808 vassert(mode64 == True);
2809 *i1 = *i2 = NULL;
2810 rclass = hregClass(rreg);
2811 switch (rclass) {
2812 case HRcInt64:
2813 vassert(0 == (offsetB & 7));
2814 offsetB >>= 3;
2815 vassert(offsetB < 4096);
2816 *i1 = ARM64Instr_LdSt64(
2817 False/*!isLoad*/,
2818 rreg,
2819 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2821 return;
2822 case HRcFlt64:
2823 vassert(0 == (offsetB & 7));
2824 vassert(offsetB >= 0 && offsetB < 32768);
2825 *i1 = ARM64Instr_VLdStD(False/*!isLoad*/,
2826 rreg, hregARM64_X21(), offsetB);
2827 return;
2828 case HRcVec128: {
2829 HReg x21 = hregARM64_X21(); // baseblock
2830 HReg x9 = hregARM64_X9(); // spill temporary
2831 vassert(0 == (offsetB & 15)); // check sane alignment
2832 vassert(offsetB < 4096);
2833 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2834 *i2 = ARM64Instr_VLdStQ(False/*!isLoad*/, rreg, x9);
2835 return;
2837 default:
2838 ppHRegClass(rclass);
2839 vpanic("genSpill_ARM: unimplemented regclass");
2843 void genReload_ARM64 ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2844 HReg rreg, Int offsetB, Bool mode64 )
2846 HRegClass rclass;
2847 vassert(offsetB >= 0);
2848 vassert(!hregIsVirtual(rreg));
2849 vassert(mode64 == True);
2850 *i1 = *i2 = NULL;
2851 rclass = hregClass(rreg);
2852 switch (rclass) {
2853 case HRcInt64:
2854 vassert(0 == (offsetB & 7));
2855 offsetB >>= 3;
2856 vassert(offsetB < 4096);
2857 *i1 = ARM64Instr_LdSt64(
2858 True/*isLoad*/,
2859 rreg,
2860 ARM64AMode_RI12(hregARM64_X21(), offsetB, 8)
2862 return;
2863 case HRcFlt64:
2864 vassert(0 == (offsetB & 7));
2865 vassert(offsetB >= 0 && offsetB < 32768);
2866 *i1 = ARM64Instr_VLdStD(True/*isLoad*/,
2867 rreg, hregARM64_X21(), offsetB);
2868 return;
2869 case HRcVec128: {
2870 HReg x21 = hregARM64_X21(); // baseblock
2871 HReg x9 = hregARM64_X9(); // spill temporary
2872 vassert(0 == (offsetB & 15)); // check sane alignment
2873 vassert(offsetB < 4096);
2874 *i1 = ARM64Instr_Arith(x9, x21, ARM64RIA_I12(offsetB, 0), True);
2875 *i2 = ARM64Instr_VLdStQ(True/*isLoad*/, rreg, x9);
2876 return;
2878 default:
2879 ppHRegClass(rclass);
2880 vpanic("genReload_ARM: unimplemented regclass");
2884 ARM64Instr* genMove_ARM64(HReg from, HReg to, Bool mode64)
2886 switch (hregClass(from)) {
2887 case HRcInt64:
2888 return ARM64Instr_MovI(to, from);
2889 case HRcFlt64:
2890 return ARM64Instr_VMov(8, to, from);
2891 case HRcVec128:
2892 return ARM64Instr_VMov(16, to, from);
2893 default:
2894 ppHRegClass(hregClass(from));
2895 vpanic("genMove_ARM64: unimplemented regclass");
2900 /* Emit an instruction into buf and return the number of bytes used.
2901 Note that buf is not the insn's final place, and therefore it is
2902 imperative to emit position-independent code. */
2904 static inline UInt iregEnc ( HReg r )
2906 UInt n;
2907 vassert(hregClass(r) == HRcInt64);
2908 vassert(!hregIsVirtual(r));
2909 n = hregEncoding(r);
2910 vassert(n <= 30);
2911 return n;
2914 static inline UInt iregEncOr31 ( HReg r )
2916 // This is the same as iregEnc() except that we're allowed to use the
2917 // "special" encoding number 31, which means, depending on the context,
2918 // either XZR/WZR or SP.
2919 UInt n;
2920 vassert(hregClass(r) == HRcInt64);
2921 vassert(!hregIsVirtual(r));
2922 n = hregEncoding(r);
2923 vassert(n <= 31);
2924 return n;
2927 static inline UInt dregEnc ( HReg r )
2929 UInt n;
2930 vassert(hregClass(r) == HRcFlt64);
2931 vassert(!hregIsVirtual(r));
2932 n = hregEncoding(r);
2933 vassert(n <= 31);
2934 return n;
2937 static inline UInt qregEnc ( HReg r )
2939 UInt n;
2940 vassert(hregClass(r) == HRcVec128);
2941 vassert(!hregIsVirtual(r));
2942 n = hregEncoding(r);
2943 vassert(n <= 31);
2944 return n;
2947 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2948 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2950 #define X00 BITS4(0,0, 0,0)
2951 #define X01 BITS4(0,0, 0,1)
2952 #define X10 BITS4(0,0, 1,0)
2953 #define X11 BITS4(0,0, 1,1)
2955 #define X000 BITS4(0, 0,0,0)
2956 #define X001 BITS4(0, 0,0,1)
2957 #define X010 BITS4(0, 0,1,0)
2958 #define X011 BITS4(0, 0,1,1)
2959 #define X100 BITS4(0, 1,0,0)
2960 #define X101 BITS4(0, 1,0,1)
2961 #define X110 BITS4(0, 1,1,0)
2962 #define X111 BITS4(0, 1,1,1)
2964 #define X0000 BITS4(0,0,0,0)
2965 #define X0001 BITS4(0,0,0,1)
2966 #define X0010 BITS4(0,0,1,0)
2967 #define X0011 BITS4(0,0,1,1)
2969 #define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
2970 ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
2972 #define X00000 BITS8(0,0,0, 0,0,0,0,0)
2973 #define X00001 BITS8(0,0,0, 0,0,0,0,1)
2974 #define X00100 BITS8(0,0,0, 0,0,1,0,0)
2975 #define X00110 BITS8(0,0,0, 0,0,1,1,0)
2976 #define X00111 BITS8(0,0,0, 0,0,1,1,1)
2977 #define X01000 BITS8(0,0,0, 0,1,0,0,0)
2978 #define X10000 BITS8(0,0,0, 1,0,0,0,0)
2979 #define X11000 BITS8(0,0,0, 1,1,0,0,0)
2980 #define X11001 BITS8(0,0,0, 1,1,0,0,1)
2981 #define X11110 BITS8(0,0,0, 1,1,1,1,0)
2982 #define X11111 BITS8(0,0,0, 1,1,1,1,1)
2984 #define X000000 BITS8(0,0, 0,0,0,0,0,0)
2985 #define X000001 BITS8(0,0, 0,0,0,0,0,1)
2986 #define X000010 BITS8(0,0, 0,0,0,0,1,0)
2987 #define X000011 BITS8(0,0, 0,0,0,0,1,1)
2988 #define X000100 BITS8(0,0, 0,0,0,1,0,0)
2989 #define X000101 BITS8(0,0, 0,0,0,1,0,1)
2990 #define X000110 BITS8(0,0, 0,0,0,1,1,0)
2991 #define X000111 BITS8(0,0, 0,0,0,1,1,1)
2992 #define X001000 BITS8(0,0, 0,0,1,0,0,0)
2993 #define X001001 BITS8(0,0, 0,0,1,0,0,1)
2994 #define X001010 BITS8(0,0, 0,0,1,0,1,0)
2995 #define X001011 BITS8(0,0, 0,0,1,0,1,1)
2996 #define X001101 BITS8(0,0, 0,0,1,1,0,1)
2997 #define X001110 BITS8(0,0, 0,0,1,1,1,0)
2998 #define X001111 BITS8(0,0, 0,0,1,1,1,1)
2999 #define X010000 BITS8(0,0, 0,1,0,0,0,0)
3000 #define X010001 BITS8(0,0, 0,1,0,0,0,1)
3001 #define X010010 BITS8(0,0, 0,1,0,0,1,0)
3002 #define X010011 BITS8(0,0, 0,1,0,0,1,1)
3003 #define X010101 BITS8(0,0, 0,1,0,1,0,1)
3004 #define X010110 BITS8(0,0, 0,1,0,1,1,0)
3005 #define X010111 BITS8(0,0, 0,1,0,1,1,1)
3006 #define X011001 BITS8(0,0, 0,1,1,0,0,1)
3007 #define X011010 BITS8(0,0, 0,1,1,0,1,0)
3008 #define X011011 BITS8(0,0, 0,1,1,0,1,1)
3009 #define X011101 BITS8(0,0, 0,1,1,1,0,1)
3010 #define X011110 BITS8(0,0, 0,1,1,1,1,0)
3011 #define X011111 BITS8(0,0, 0,1,1,1,1,1)
3012 #define X100001 BITS8(0,0, 1,0,0,0,0,1)
3013 #define X100011 BITS8(0,0, 1,0,0,0,1,1)
3014 #define X100100 BITS8(0,0, 1,0,0,1,0,0)
3015 #define X100101 BITS8(0,0, 1,0,0,1,0,1)
3016 #define X100110 BITS8(0,0, 1,0,0,1,1,0)
3017 #define X100111 BITS8(0,0, 1,0,0,1,1,1)
3018 #define X101101 BITS8(0,0, 1,0,1,1,0,1)
3019 #define X101110 BITS8(0,0, 1,0,1,1,1,0)
3020 #define X110000 BITS8(0,0, 1,1,0,0,0,0)
3021 #define X110001 BITS8(0,0, 1,1,0,0,0,1)
3022 #define X110010 BITS8(0,0, 1,1,0,0,1,0)
3023 #define X110100 BITS8(0,0, 1,1,0,1,0,0)
3024 #define X110101 BITS8(0,0, 1,1,0,1,0,1)
3025 #define X110110 BITS8(0,0, 1,1,0,1,1,0)
3026 #define X110111 BITS8(0,0, 1,1,0,1,1,1)
3027 #define X111000 BITS8(0,0, 1,1,1,0,0,0)
3028 #define X111001 BITS8(0,0, 1,1,1,0,0,1)
3029 #define X111101 BITS8(0,0, 1,1,1,1,0,1)
3030 #define X111110 BITS8(0,0, 1,1,1,1,1,0)
3031 #define X111111 BITS8(0,0, 1,1,1,1,1,1)
3033 #define X0001000 BITS8(0, 0,0,0,1,0,0,0)
3034 #define X0010000 BITS8(0, 0,0,1,0,0,0,0)
3035 #define X0100000 BITS8(0, 0,1,0,0,0,0,0)
3036 #define X1000000 BITS8(0, 1,0,0,0,0,0,0)
3038 #define X00100000 BITS8(0,0,1,0,0,0,0,0)
3039 #define X00100001 BITS8(0,0,1,0,0,0,0,1)
3040 #define X00100010 BITS8(0,0,1,0,0,0,1,0)
3041 #define X00100011 BITS8(0,0,1,0,0,0,1,1)
3042 #define X01010000 BITS8(0,1,0,1,0,0,0,0)
3043 #define X01010001 BITS8(0,1,0,1,0,0,0,1)
3044 #define X01010100 BITS8(0,1,0,1,0,1,0,0)
3045 #define X01011000 BITS8(0,1,0,1,1,0,0,0)
3046 #define X01100000 BITS8(0,1,1,0,0,0,0,0)
3047 #define X01100001 BITS8(0,1,1,0,0,0,0,1)
3048 #define X01100010 BITS8(0,1,1,0,0,0,1,0)
3049 #define X01100011 BITS8(0,1,1,0,0,0,1,1)
3050 #define X01110000 BITS8(0,1,1,1,0,0,0,0)
3051 #define X01110001 BITS8(0,1,1,1,0,0,0,1)
3052 #define X01110010 BITS8(0,1,1,1,0,0,1,0)
3053 #define X01110011 BITS8(0,1,1,1,0,0,1,1)
3054 #define X01110100 BITS8(0,1,1,1,0,1,0,0)
3055 #define X01110101 BITS8(0,1,1,1,0,1,0,1)
3056 #define X01110110 BITS8(0,1,1,1,0,1,1,0)
3057 #define X01110111 BITS8(0,1,1,1,0,1,1,1)
3058 #define X10001010 BITS8(1,0,0,0,1,0,1,0)
3059 #define X10001011 BITS8(1,0,0,0,1,0,1,1)
3060 #define X10101010 BITS8(1,0,1,0,1,0,1,0)
3061 #define X11000001 BITS8(1,1,0,0,0,0,0,1)
3062 #define X11000011 BITS8(1,1,0,0,0,0,1,1)
3063 #define X11001010 BITS8(1,1,0,0,1,0,1,0)
3064 #define X11001011 BITS8(1,1,0,0,1,0,1,1)
3065 #define X11010100 BITS8(1,1,0,1,0,1,0,0)
3066 #define X11010110 BITS8(1,1,0,1,0,1,1,0)
3067 #define X11011000 BITS8(1,1,0,1,1,0,0,0)
3068 #define X11011010 BITS8(1,1,0,1,1,0,1,0)
3069 #define X11011110 BITS8(1,1,0,1,1,1,1,0)
3070 #define X11100010 BITS8(1,1,1,0,0,0,1,0)
3071 #define X11110001 BITS8(1,1,1,1,0,0,0,1)
3072 #define X11110010 BITS8(1,1,1,1,0,0,1,0)
3073 #define X11110011 BITS8(1,1,1,1,0,0,1,1)
3074 #define X11110101 BITS8(1,1,1,1,0,1,0,1)
3075 #define X11110111 BITS8(1,1,1,1,0,1,1,1)
3076 #define X11111000 BITS8(1,1,1,1,1,0,0,0)
3077 #define X11111010 BITS8(1,1,1,1,1,0,1,0)
3079 /* --- 4 fields --- */
3081 static inline UInt X_8_19_1_4 ( UInt f1, UInt f2, UInt f3, UInt f4 ) {
3082 vassert(8+19+1+4 == 32);
3083 vassert(f1 < (1<<8));
3084 vassert(f2 < (1<<19));
3085 vassert(f3 < (1<<1));
3086 vassert(f4 < (1<<4));
3087 UInt w = 0;
3088 w = (w << 8) | f1;
3089 w = (w << 19) | f2;
3090 w = (w << 1) | f3;
3091 w = (w << 4) | f4;
3092 return w;
3095 /* --- 5 fields --- */
3097 static inline UInt X_3_6_2_16_5 ( UInt f1, UInt f2,
3098 UInt f3, UInt f4, UInt f5 ) {
3099 vassert(3+6+2+16+5 == 32);
3100 vassert(f1 < (1<<3));
3101 vassert(f2 < (1<<6));
3102 vassert(f3 < (1<<2));
3103 vassert(f4 < (1<<16));
3104 vassert(f5 < (1<<5));
3105 UInt w = 0;
3106 w = (w << 3) | f1;
3107 w = (w << 6) | f2;
3108 w = (w << 2) | f3;
3109 w = (w << 16) | f4;
3110 w = (w << 5) | f5;
3111 return w;
3114 /* --- 6 fields --- */
3116 static inline UInt X_2_6_2_12_5_5 ( UInt f1, UInt f2, UInt f3,
3117 UInt f4, UInt f5, UInt f6 ) {
3118 vassert(2+6+2+12+5+5 == 32);
3119 vassert(f1 < (1<<2));
3120 vassert(f2 < (1<<6));
3121 vassert(f3 < (1<<2));
3122 vassert(f4 < (1<<12));
3123 vassert(f5 < (1<<5));
3124 vassert(f6 < (1<<5));
3125 UInt w = 0;
3126 w = (w << 2) | f1;
3127 w = (w << 6) | f2;
3128 w = (w << 2) | f3;
3129 w = (w << 12) | f4;
3130 w = (w << 5) | f5;
3131 w = (w << 5) | f6;
3132 return w;
3135 static inline UInt X_3_8_5_6_5_5 ( UInt f1, UInt f2, UInt f3,
3136 UInt f4, UInt f5, UInt f6 ) {
3137 vassert(3+8+5+6+5+5 == 32);
3138 vassert(f1 < (1<<3));
3139 vassert(f2 < (1<<8));
3140 vassert(f3 < (1<<5));
3141 vassert(f4 < (1<<6));
3142 vassert(f5 < (1<<5));
3143 vassert(f6 < (1<<5));
3144 UInt w = 0;
3145 w = (w << 3) | f1;
3146 w = (w << 8) | f2;
3147 w = (w << 5) | f3;
3148 w = (w << 6) | f4;
3149 w = (w << 5) | f5;
3150 w = (w << 5) | f6;
3151 return w;
3154 static inline UInt X_3_5_8_6_5_5 ( UInt f1, UInt f2, UInt f3,
3155 UInt f4, UInt f5, UInt f6 ) {
3156 vassert(3+8+5+6+5+5 == 32);
3157 vassert(f1 < (1<<3));
3158 vassert(f2 < (1<<5));
3159 vassert(f3 < (1<<8));
3160 vassert(f4 < (1<<6));
3161 vassert(f5 < (1<<5));
3162 vassert(f6 < (1<<5));
3163 UInt w = 0;
3164 w = (w << 3) | f1;
3165 w = (w << 5) | f2;
3166 w = (w << 8) | f3;
3167 w = (w << 6) | f4;
3168 w = (w << 5) | f5;
3169 w = (w << 5) | f6;
3170 return w;
3173 static inline UInt X_3_6_7_6_5_5 ( UInt f1, UInt f2, UInt f3,
3174 UInt f4, UInt f5, UInt f6 ) {
3175 vassert(3+6+7+6+5+5 == 32);
3176 vassert(f1 < (1<<3));
3177 vassert(f2 < (1<<6));
3178 vassert(f3 < (1<<7));
3179 vassert(f4 < (1<<6));
3180 vassert(f5 < (1<<5));
3181 vassert(f6 < (1<<5));
3182 UInt w = 0;
3183 w = (w << 3) | f1;
3184 w = (w << 6) | f2;
3185 w = (w << 7) | f3;
3186 w = (w << 6) | f4;
3187 w = (w << 5) | f5;
3188 w = (w << 5) | f6;
3189 return w;
3192 /* --- 7 fields --- */
3194 static inline UInt X_2_6_3_9_2_5_5 ( UInt f1, UInt f2, UInt f3,
3195 UInt f4, UInt f5, UInt f6, UInt f7 ) {
3196 vassert(2+6+3+9+2+5+5 == 32);
3197 vassert(f1 < (1<<2));
3198 vassert(f2 < (1<<6));
3199 vassert(f3 < (1<<3));
3200 vassert(f4 < (1<<9));
3201 vassert(f5 < (1<<2));
3202 vassert(f6 < (1<<5));
3203 vassert(f7 < (1<<5));
3204 UInt w = 0;
3205 w = (w << 2) | f1;
3206 w = (w << 6) | f2;
3207 w = (w << 3) | f3;
3208 w = (w << 9) | f4;
3209 w = (w << 2) | f5;
3210 w = (w << 5) | f6;
3211 w = (w << 5) | f7;
3212 return w;
3215 static inline UInt X_3_6_1_6_6_5_5 ( UInt f1, UInt f2, UInt f3,
3216 UInt f4, UInt f5, UInt f6, UInt f7 ) {
3217 vassert(3+6+1+6+6+5+5 == 32);
3218 vassert(f1 < (1<<3));
3219 vassert(f2 < (1<<6));
3220 vassert(f3 < (1<<1));
3221 vassert(f4 < (1<<6));
3222 vassert(f5 < (1<<6));
3223 vassert(f6 < (1<<5));
3224 vassert(f7 < (1<<5));
3225 UInt w = 0;
3226 w = (w << 3) | f1;
3227 w = (w << 6) | f2;
3228 w = (w << 1) | f3;
3229 w = (w << 6) | f4;
3230 w = (w << 6) | f5;
3231 w = (w << 5) | f6;
3232 w = (w << 5) | f7;
3233 return w;
3236 static inline UInt X_3_8_5_1_5_5_5 ( UInt f1, UInt f2, UInt f3, UInt f4,
3237 UInt f5, UInt f6, UInt f7 ) {
3238 vassert(3+8+5+1+5+5+5 == 32);
3239 vassert(f1 < (1<<3));
3240 vassert(f2 < (1<<8));
3241 vassert(f3 < (1<<5));
3242 vassert(f4 < (1<<1));
3243 vassert(f5 < (1<<5));
3244 vassert(f6 < (1<<5));
3245 vassert(f7 < (1<<5));
3246 UInt w = 0;
3247 w = (w << 3) | f1;
3248 w = (w << 8) | f2;
3249 w = (w << 5) | f3;
3250 w = (w << 1) | f4;
3251 w = (w << 5) | f5;
3252 w = (w << 5) | f6;
3253 w = (w << 5) | f7;
3254 return w;
3257 static inline UInt X_8_2_1_5_6_5_5 ( UInt f1, UInt f2, UInt f3, UInt f4,
3258 UInt f5, UInt f6, UInt f7 ) {
3259 vassert(8+2+1+5+6+5+5 == 32);
3260 vassert(f1 < (1<<8));
3261 vassert(f2 < (1<<2));
3262 vassert(f3 < (1<<1));
3263 vassert(f4 < (1<<5));
3264 vassert(f5 < (1<<6));
3265 vassert(f6 < (1<<5));
3266 vassert(f7 < (1<<5));
3267 UInt w = 0;
3268 w = (w << 8) | f1;
3269 w = (w << 2) | f2;
3270 w = (w << 1) | f3;
3271 w = (w << 5) | f4;
3272 w = (w << 6) | f5;
3273 w = (w << 5) | f6;
3274 w = (w << 5) | f7;
3275 return w;
3278 //ZZ #define X0000 BITS4(0,0,0,0)
3279 //ZZ #define X0001 BITS4(0,0,0,1)
3280 //ZZ #define X0010 BITS4(0,0,1,0)
3281 //ZZ #define X0011 BITS4(0,0,1,1)
3282 //ZZ #define X0100 BITS4(0,1,0,0)
3283 //ZZ #define X0101 BITS4(0,1,0,1)
3284 //ZZ #define X0110 BITS4(0,1,1,0)
3285 //ZZ #define X0111 BITS4(0,1,1,1)
3286 //ZZ #define X1000 BITS4(1,0,0,0)
3287 //ZZ #define X1001 BITS4(1,0,0,1)
3288 //ZZ #define X1010 BITS4(1,0,1,0)
3289 //ZZ #define X1011 BITS4(1,0,1,1)
3290 //ZZ #define X1100 BITS4(1,1,0,0)
3291 //ZZ #define X1101 BITS4(1,1,0,1)
3292 //ZZ #define X1110 BITS4(1,1,1,0)
3293 //ZZ #define X1111 BITS4(1,1,1,1)
3295 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
3296 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3297 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3298 (((zzx3) & 0xF) << 12))
3300 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
3301 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3302 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3303 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
3305 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
3306 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3307 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3308 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
3310 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
3311 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3312 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
3313 (((zzx0) & 0xF) << 0))
3315 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
3316 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
3317 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
3318 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
3319 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
3321 #define XX______(zzx7,zzx6) \
3322 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
3326 /* Get an immediate into a register, using only that register. */
3327 static UInt* imm64_to_ireg ( UInt* p, Int xD, ULong imm64 )
3329 if (imm64 == 0) {
3330 // This has to be special-cased, since the logic below
3331 // will leave the register unchanged in this case.
3332 // MOVZ xD, #0, LSL #0
3333 *p++ = X_3_6_2_16_5(X110, X100101, X00, 0/*imm16*/, xD);
3334 return p;
3337 // There must be at least one non-zero halfword. Find the
3338 // lowest nonzero such, and use MOVZ to install it and zero
3339 // out the rest of the register.
3340 UShort h[4];
3341 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3342 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3343 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3344 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3346 UInt i;
3347 for (i = 0; i < 4; i++) {
3348 if (h[i] != 0)
3349 break;
3351 vassert(i < 4);
3353 // MOVZ xD, h[i], LSL (16*i)
3354 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3356 // Work on upwards through h[i], using MOVK to stuff in any
3357 // remaining nonzero elements.
3358 i++;
3359 for (; i < 4; i++) {
3360 if (h[i] == 0)
3361 continue;
3362 // MOVK xD, h[i], LSL (16*i)
3363 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3366 return p;
3369 /* Get an immediate into a register, using only that register, and
3370 generating exactly 4 instructions, regardless of the value of the
3371 immediate. This is used when generating sections of code that need
3372 to be patched later, so as to guarantee a specific size. */
3373 static UInt* imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3375 UShort h[4];
3376 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3377 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3378 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3379 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3380 // Work on upwards through h[i], using MOVK to stuff in the
3381 // remaining elements.
3382 UInt i;
3383 for (i = 0; i < 4; i++) {
3384 if (i == 0) {
3385 // MOVZ xD, h[0], LSL (16*0)
3386 *p++ = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3387 } else {
3388 // MOVK xD, h[i], LSL (16*i)
3389 *p++ = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3392 return p;
3395 /* Check whether p points at a 4-insn sequence cooked up by
3396 imm64_to_ireg_EXACTLY4(). */
3397 static Bool is_imm64_to_ireg_EXACTLY4 ( UInt* p, Int xD, ULong imm64 )
3399 UShort h[4];
3400 h[3] = (UShort)((imm64 >> 48) & 0xFFFF);
3401 h[2] = (UShort)((imm64 >> 32) & 0xFFFF);
3402 h[1] = (UShort)((imm64 >> 16) & 0xFFFF);
3403 h[0] = (UShort)((imm64 >> 0) & 0xFFFF);
3404 // Work on upwards through h[i], using MOVK to stuff in the
3405 // remaining elements.
3406 UInt i;
3407 for (i = 0; i < 4; i++) {
3408 UInt expected;
3409 if (i == 0) {
3410 // MOVZ xD, h[0], LSL (16*0)
3411 expected = X_3_6_2_16_5(X110, X100101, i, h[i], xD);
3412 } else {
3413 // MOVK xD, h[i], LSL (16*i)
3414 expected = X_3_6_2_16_5(X111, X100101, i, h[i], xD);
3416 if (p[i] != expected)
3417 return False;
3419 return True;
3423 /* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
3424 rD, using the given amode for the address. */
3425 static UInt* do_load_or_store8 ( UInt* p,
3426 Bool isLoad, UInt wD, ARM64AMode* am )
3428 vassert(wD <= 30);
3429 if (am->tag == ARM64am_RI9) {
3430 /* STURB Wd, [Xn|SP + simm9]: 00 111000 000 simm9 00 n d
3431 LDURB Wd, [Xn|SP + simm9]: 00 111000 010 simm9 00 n d
3433 Int simm9 = am->ARM64am.RI9.simm9;
3434 vassert(-256 <= simm9 && simm9 <= 255);
3435 UInt instr = X_2_6_3_9_2_5_5(X00, X111000, isLoad ? X010 : X000,
3436 simm9 & 0x1FF, X00,
3437 iregEnc(am->ARM64am.RI9.reg), wD);
3438 *p++ = instr;
3439 return p;
3441 if (am->tag == ARM64am_RI12) {
3442 /* STRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 00 imm12 n d
3443 LDRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 01 imm12 n d
3445 UInt uimm12 = am->ARM64am.RI12.uimm12;
3446 UInt scale = am->ARM64am.RI12.szB;
3447 vassert(scale == 1); /* failure of this is serious. Do not ignore. */
3448 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3449 vassert(xN <= 30);
3450 UInt instr = X_2_6_2_12_5_5(X00, X111001, isLoad ? X01 : X00,
3451 uimm12, xN, wD);
3452 *p++ = instr;
3453 return p;
3455 if (am->tag == ARM64am_RR) {
3456 /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
3457 LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
3459 UInt xN = iregEnc(am->ARM64am.RR.base);
3460 UInt xM = iregEnc(am->ARM64am.RR.index);
3461 vassert(xN <= 30);
3462 UInt instr = X_3_8_5_6_5_5(X001, isLoad ? X11000011 : X11000001,
3463 xM, X011010, xN, wD);
3464 *p++ = instr;
3465 return p;
3467 vpanic("do_load_or_store8");
3468 vassert(0);
3472 /* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
3473 rD, using the given amode for the address. */
3474 static UInt* do_load_or_store16 ( UInt* p,
3475 Bool isLoad, UInt wD, ARM64AMode* am )
3477 vassert(wD <= 30);
3478 if (am->tag == ARM64am_RI9) {
3479 /* STURH Wd, [Xn|SP + simm9]: 01 111000 000 simm9 00 n d
3480 LDURH Wd, [Xn|SP + simm9]: 01 111000 010 simm9 00 n d
3482 Int simm9 = am->ARM64am.RI9.simm9;
3483 vassert(-256 <= simm9 && simm9 <= 255);
3484 UInt instr = X_2_6_3_9_2_5_5(X01, X111000, isLoad ? X010 : X000,
3485 simm9 & 0x1FF, X00,
3486 iregEnc(am->ARM64am.RI9.reg), wD);
3487 *p++ = instr;
3488 return p;
3490 if (am->tag == ARM64am_RI12) {
3491 /* STRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 00 imm12 n d
3492 LDRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 01 imm12 n d
3494 UInt uimm12 = am->ARM64am.RI12.uimm12;
3495 UInt scale = am->ARM64am.RI12.szB;
3496 vassert(scale == 2); /* failure of this is serious. Do not ignore. */
3497 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3498 vassert(xN <= 30);
3499 UInt instr = X_2_6_2_12_5_5(X01, X111001, isLoad ? X01 : X00,
3500 uimm12, xN, wD);
3501 *p++ = instr;
3502 return p;
3504 if (am->tag == ARM64am_RR) {
3505 /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
3506 LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
3508 UInt xN = iregEnc(am->ARM64am.RR.base);
3509 UInt xM = iregEnc(am->ARM64am.RR.index);
3510 vassert(xN <= 30);
3511 UInt instr = X_3_8_5_6_5_5(X011, isLoad ? X11000011 : X11000001,
3512 xM, X011010, xN, wD);
3513 *p++ = instr;
3514 return p;
3516 vpanic("do_load_or_store16");
3517 vassert(0);
3521 /* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
3522 rD, using the given amode for the address. */
3523 static UInt* do_load_or_store32 ( UInt* p,
3524 Bool isLoad, UInt wD, ARM64AMode* am )
3526 vassert(wD <= 30);
3527 if (am->tag == ARM64am_RI9) {
3528 /* STUR Wd, [Xn|SP + simm9]: 10 111000 000 simm9 00 n d
3529 LDUR Wd, [Xn|SP + simm9]: 10 111000 010 simm9 00 n d
3531 Int simm9 = am->ARM64am.RI9.simm9;
3532 vassert(-256 <= simm9 && simm9 <= 255);
3533 UInt instr = X_2_6_3_9_2_5_5(X10, X111000, isLoad ? X010 : X000,
3534 simm9 & 0x1FF, X00,
3535 iregEnc(am->ARM64am.RI9.reg), wD);
3536 *p++ = instr;
3537 return p;
3539 if (am->tag == ARM64am_RI12) {
3540 /* STR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 00 imm12 n d
3541 LDR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 01 imm12 n d
3543 UInt uimm12 = am->ARM64am.RI12.uimm12;
3544 UInt scale = am->ARM64am.RI12.szB;
3545 vassert(scale == 4); /* failure of this is serious. Do not ignore. */
3546 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3547 vassert(xN <= 30);
3548 UInt instr = X_2_6_2_12_5_5(X10, X111001, isLoad ? X01 : X00,
3549 uimm12, xN, wD);
3550 *p++ = instr;
3551 return p;
3553 if (am->tag == ARM64am_RR) {
3554 /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
3555 LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
3557 UInt xN = iregEnc(am->ARM64am.RR.base);
3558 UInt xM = iregEnc(am->ARM64am.RR.index);
3559 vassert(xN <= 30);
3560 UInt instr = X_3_8_5_6_5_5(X101, isLoad ? X11000011 : X11000001,
3561 xM, X011010, xN, wD);
3562 *p++ = instr;
3563 return p;
3565 vpanic("do_load_or_store32");
3566 vassert(0);
3570 /* Generate a 64 bit integer load or store to/from xD, using the given amode
3571 for the address. */
3572 static UInt* do_load_or_store64 ( UInt* p,
3573 Bool isLoad, UInt xD, ARM64AMode* am )
3575 /* In all these cases, Rn can't be 31 since that means SP. But Rd can be
3576 31, meaning XZR/WZR. */
3577 vassert(xD <= 31);
3578 if (am->tag == ARM64am_RI9) {
3579 /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d
3580 LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d
3582 Int simm9 = am->ARM64am.RI9.simm9;
3583 vassert(-256 <= simm9 && simm9 <= 255);
3584 UInt xN = iregEnc(am->ARM64am.RI9.reg);
3585 vassert(xN <= 30);
3586 UInt instr = X_2_6_3_9_2_5_5(X11, X111000, isLoad ? X010 : X000,
3587 simm9 & 0x1FF, X00, xN, xD);
3588 *p++ = instr;
3589 return p;
3591 if (am->tag == ARM64am_RI12) {
3592 /* STR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 00 imm12 n d
3593 LDR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 01 imm12 n d
3595 UInt uimm12 = am->ARM64am.RI12.uimm12;
3596 UInt scale = am->ARM64am.RI12.szB;
3597 vassert(scale == 8); /* failure of this is serious. Do not ignore. */
3598 UInt xN = iregEnc(am->ARM64am.RI12.reg);
3599 vassert(xN <= 30);
3600 UInt instr = X_2_6_2_12_5_5(X11, X111001, isLoad ? X01 : X00,
3601 uimm12, xN, xD);
3602 *p++ = instr;
3603 return p;
3605 if (am->tag == ARM64am_RR) {
3606 /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
3607 LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
3609 UInt xN = iregEnc(am->ARM64am.RR.base);
3610 UInt xM = iregEnc(am->ARM64am.RR.index);
3611 vassert(xN <= 30);
3612 UInt instr = X_3_8_5_6_5_5(X111, isLoad ? X11000011 : X11000001,
3613 xM, X011010, xN, xD);
3614 *p++ = instr;
3615 return p;
3617 vpanic("do_load_or_store64");
3618 vassert(0);
3622 /* Emit an instruction into buf and return the number of bytes used.
3623 Note that buf is not the insn's final place, and therefore it is
3624 imperative to emit position-independent code. If the emitted
3625 instruction was a profiler inc, set *is_profInc to True, else
3626 leave it unchanged. */
3628 Int emit_ARM64Instr ( /*MB_MOD*/Bool* is_profInc,
3629 UChar* buf, Int nbuf, const ARM64Instr* i,
3630 Bool mode64, VexEndness endness_host,
3631 const void* disp_cp_chain_me_to_slowEP,
3632 const void* disp_cp_chain_me_to_fastEP,
3633 const void* disp_cp_xindir,
3634 const void* disp_cp_xassisted )
3636 UInt* p = (UInt*)buf;
3637 vassert(nbuf >= 32);
3638 vassert(mode64 == True);
3639 vassert(0 == (((HWord)buf) & 3));
3641 switch (i->tag) {
3642 case ARM64in_Arith: {
3643 UInt rD = iregEnc(i->ARM64in.Arith.dst);
3644 UInt rN = iregEnc(i->ARM64in.Arith.argL);
3645 ARM64RIA* argR = i->ARM64in.Arith.argR;
3646 switch (argR->tag) {
3647 case ARM64riA_I12:
3648 *p++ = X_2_6_2_12_5_5(
3649 i->ARM64in.Arith.isAdd ? X10 : X11,
3650 X010001,
3651 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3652 argR->ARM64riA.I12.imm12, rN, rD
3654 break;
3655 case ARM64riA_R: {
3656 UInt rM = iregEnc(i->ARM64in.Arith.argR->ARM64riA.R.reg);
3657 *p++ = X_3_8_5_6_5_5(
3658 i->ARM64in.Arith.isAdd ? X100 : X110,
3659 X01011000, rM, X000000, rN, rD
3661 break;
3663 default:
3664 goto bad;
3666 goto done;
3668 case ARM64in_Cmp: {
3669 UInt rD = 31; /* XZR, we are going to dump the result */
3670 UInt rN = iregEnc(i->ARM64in.Cmp.argL);
3671 ARM64RIA* argR = i->ARM64in.Cmp.argR;
3672 Bool is64 = i->ARM64in.Cmp.is64;
3673 switch (argR->tag) {
3674 case ARM64riA_I12:
3675 /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
3676 /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
3677 *p++ = X_2_6_2_12_5_5(
3678 is64 ? X11 : X01, X110001,
3679 argR->ARM64riA.I12.shift == 12 ? X01 : X00,
3680 argR->ARM64riA.I12.imm12, rN, rD);
3681 break;
3682 case ARM64riA_R: {
3683 /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
3684 /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
3685 UInt rM = iregEnc(i->ARM64in.Cmp.argR->ARM64riA.R.reg);
3686 *p++ = X_3_8_5_6_5_5(is64 ? X111 : X011,
3687 X01011000, rM, X000000, rN, rD);
3688 break;
3690 default:
3691 goto bad;
3693 goto done;
3695 case ARM64in_Logic: {
3696 UInt rD = iregEnc(i->ARM64in.Logic.dst);
3697 UInt rN = iregEnc(i->ARM64in.Logic.argL);
3698 ARM64RIL* argR = i->ARM64in.Logic.argR;
3699 UInt opc = 0; /* invalid */
3700 vassert(rD < 31);
3701 vassert(rN < 31);
3702 switch (i->ARM64in.Logic.op) {
3703 case ARM64lo_OR: opc = X101; break;
3704 case ARM64lo_AND: opc = X100; break;
3705 case ARM64lo_XOR: opc = X110; break;
3706 default: break;
3708 vassert(opc != 0);
3709 switch (argR->tag) {
3710 case ARM64riL_I13: {
3711 /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
3712 /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
3713 /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
3714 *p++ = X_3_6_1_6_6_5_5(
3715 opc, X100100, argR->ARM64riL.I13.bitN,
3716 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3717 rN, rD
3719 break;
3721 case ARM64riL_R: {
3722 /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
3723 /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
3724 /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
3725 UInt rM = iregEnc(argR->ARM64riL.R.reg);
3726 vassert(rM < 31);
3727 *p++ = X_3_8_5_6_5_5(opc, X01010000, rM, X000000, rN, rD);
3728 break;
3730 default:
3731 goto bad;
3733 goto done;
3735 case ARM64in_RRS: {
3736 UInt top8 = 0;
3737 switch (i->ARM64in.RRS.mainOp) {
3738 case ARM64rrs_ADD: top8 = X10001011; break;
3739 case ARM64rrs_SUB: top8 = X11001011; break;
3740 case ARM64rrs_AND: top8 = X10001010; break;
3741 case ARM64rrs_XOR: top8 = X11001010; break;
3742 case ARM64rrs_OR: top8 = X10101010; break;
3743 default: vassert(0); /*NOTREACHED*/
3745 UInt sh = 0;
3746 switch (i->ARM64in.RRS.shiftOp) {
3747 case ARM64sh_SHL: sh = X00; break;
3748 case ARM64sh_SHR: sh = X01; break;
3749 case ARM64sh_SAR: sh = X10; break;
3750 default: vassert(0); /*NOTREACHED*/
3752 UInt amt = i->ARM64in.RRS.amt;
3753 vassert(amt >= 1 && amt <= 63);
3754 *p++ = X_8_2_1_5_6_5_5(top8, sh, 0,
3755 iregEnc(i->ARM64in.RRS.argR), amt,
3756 iregEnc(i->ARM64in.RRS.argL),
3757 iregEnc(i->ARM64in.RRS.dst));
3758 goto done;
3760 case ARM64in_Test: {
3761 UInt rD = 31; /* XZR, we are going to dump the result */
3762 UInt rN = iregEnc(i->ARM64in.Test.argL);
3763 ARM64RIL* argR = i->ARM64in.Test.argR;
3764 switch (argR->tag) {
3765 case ARM64riL_I13: {
3766 /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
3767 *p++ = X_3_6_1_6_6_5_5(
3768 X111, X100100, argR->ARM64riL.I13.bitN,
3769 argR->ARM64riL.I13.immR, argR->ARM64riL.I13.immS,
3770 rN, rD
3772 break;
3774 default:
3775 goto bad;
3777 goto done;
3779 case ARM64in_Shift: {
3780 UInt rD = iregEnc(i->ARM64in.Shift.dst);
3781 UInt rN = iregEnc(i->ARM64in.Shift.argL);
3782 ARM64RI6* argR = i->ARM64in.Shift.argR;
3783 vassert(rD < 31);
3784 vassert(rN < 31);
3785 switch (argR->tag) {
3786 case ARM64ri6_I6: {
3787 /* 110 1001101 (63-sh) (64-sh) nn dd LSL Xd, Xn, sh */
3788 /* 110 1001101 sh 63 nn dd LSR Xd, Xn, sh */
3789 /* 100 1001101 sh 63 nn dd ASR Xd, Xn, sh */
3790 UInt sh = argR->ARM64ri6.I6.imm6;
3791 vassert(sh > 0 && sh < 64);
3792 switch (i->ARM64in.Shift.op) {
3793 case ARM64sh_SHL:
3794 *p++ = X_3_6_1_6_6_5_5(X110, X100110,
3795 1, 64-sh, 63-sh, rN, rD);
3796 break;
3797 case ARM64sh_SHR:
3798 *p++ = X_3_6_1_6_6_5_5(X110, X100110, 1, sh, 63, rN, rD);
3799 break;
3800 case ARM64sh_SAR:
3801 *p++ = X_3_6_1_6_6_5_5(X100, X100110, 1, sh, 63, rN, rD);
3802 break;
3803 default:
3804 vassert(0);
3806 break;
3808 case ARM64ri6_R: {
3809 /* 100 1101 0110 mm 001000 nn dd LSL Xd, Xn, Xm */
3810 /* 100 1101 0110 mm 001001 nn dd LSR Xd, Xn, Xm */
3811 /* 100 1101 0110 mm 001010 nn dd ASR Xd, Xn, Xm */
3812 UInt rM = iregEnc(argR->ARM64ri6.R.reg);
3813 vassert(rM < 31);
3814 UInt subOpc = 0;
3815 switch (i->ARM64in.Shift.op) {
3816 case ARM64sh_SHL: subOpc = X001000; break;
3817 case ARM64sh_SHR: subOpc = X001001; break;
3818 case ARM64sh_SAR: subOpc = X001010; break;
3819 default: vassert(0);
3821 *p++ = X_3_8_5_6_5_5(X100, X11010110, rM, subOpc, rN, rD);
3822 break;
3824 default:
3825 vassert(0);
3827 goto done;
3829 case ARM64in_Unary: {
3830 UInt rDst = iregEnc(i->ARM64in.Unary.dst);
3831 UInt rSrc = iregEnc(i->ARM64in.Unary.src);
3832 switch (i->ARM64in.Unary.op) {
3833 case ARM64un_CLZ:
3834 /* 1 10 1101 0110 00000 00010 0 nn dd CLZ Xd, Xn */
3835 /* 1 10 1101 0110 00000 00010 1 nn dd CLS Xd, Xn (unimp) */
3836 *p++ = X_3_8_5_6_5_5(X110,
3837 X11010110, X00000, X000100, rSrc, rDst);
3838 goto done;
3839 case ARM64un_NEG:
3840 /* 1 10 01011 000 m 000000 11111 d NEG Xd,Xm */
3841 /* 0 10 01011 000 m 000000 11111 d NEG Wd,Wm (unimp) */
3842 *p++ = X_3_8_5_6_5_5(X110,
3843 X01011000, rSrc, X000000, X11111, rDst);
3844 goto done;
3845 case ARM64un_NOT: {
3846 /* 1 01 01010 00 1 m 000000 11111 d MVN Xd,Xm */
3847 *p++ = X_3_8_5_6_5_5(X101,
3848 X01010001, rSrc, X000000, X11111, rDst);
3849 goto done;
3851 default:
3852 break;
3854 goto bad;
3856 case ARM64in_Set64: {
3857 /* 1 00 1101 0100 11111 invert(cond) 01 11111 Rd CSET Rd, Cond */
3858 UInt rDst = iregEnc(i->ARM64in.Set64.dst);
3859 UInt cc = (UInt)i->ARM64in.Set64.cond;
3860 vassert(cc < 14);
3861 *p++ = X_3_8_5_6_5_5(X100, X11010100, X11111,
3862 ((cc ^ 1) << 2) | X01, X11111, rDst);
3863 goto done;
3865 case ARM64in_MovI: {
3866 /* We generate the "preferred form", ORR Xd, XZR, Xm
3867 101 01010 00 0 m 000000 11111 d
3869 UInt instr = 0xAA0003E0;
3870 UInt d = iregEnc(i->ARM64in.MovI.dst);
3871 UInt m = iregEnc(i->ARM64in.MovI.src);
3872 *p++ = instr | ((m & 31) << 16) | ((d & 31) << 0);
3873 goto done;
3875 case ARM64in_Imm64: {
3876 p = imm64_to_ireg( p, iregEnc(i->ARM64in.Imm64.dst),
3877 i->ARM64in.Imm64.imm64 );
3878 goto done;
3880 case ARM64in_LdSt64: {
3881 p = do_load_or_store64( p, i->ARM64in.LdSt64.isLoad,
3882 iregEncOr31(i->ARM64in.LdSt64.rD),
3883 i->ARM64in.LdSt64.amode );
3884 goto done;
3886 case ARM64in_LdSt32: {
3887 p = do_load_or_store32( p, i->ARM64in.LdSt32.isLoad,
3888 iregEnc(i->ARM64in.LdSt32.rD),
3889 i->ARM64in.LdSt32.amode );
3890 goto done;
3892 case ARM64in_LdSt16: {
3893 p = do_load_or_store16( p, i->ARM64in.LdSt16.isLoad,
3894 iregEnc(i->ARM64in.LdSt16.rD),
3895 i->ARM64in.LdSt16.amode );
3896 goto done;
3898 case ARM64in_LdSt8: {
3899 p = do_load_or_store8( p, i->ARM64in.LdSt8.isLoad,
3900 iregEnc(i->ARM64in.LdSt8.rD),
3901 i->ARM64in.LdSt8.amode );
3902 goto done;
3905 case ARM64in_XDirect: {
3906 /* NB: what goes on here has to be very closely coordinated
3907 with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
3908 /* We're generating chain-me requests here, so we need to be
3909 sure this is actually allowed -- no-redir translations
3910 can't use chain-me's. Hence: */
3911 vassert(disp_cp_chain_me_to_slowEP != NULL);
3912 vassert(disp_cp_chain_me_to_fastEP != NULL);
3914 /* Use ptmp for backpatching conditional jumps. */
3915 UInt* ptmp = NULL;
3917 /* First off, if this is conditional, create a conditional
3918 jump over the rest of it. Or at least, leave a space for
3919 it that we will shortly fill in. */
3920 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3921 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
3922 ptmp = p;
3923 *p++ = 0;
3926 /* Update the guest PC. */
3927 /* imm64 x9, dstGA */
3928 /* str x9, amPC */
3929 p = imm64_to_ireg(p, /*x*/9, i->ARM64in.XDirect.dstGA);
3930 p = do_load_or_store64(p, False/*!isLoad*/,
3931 /*x*/9, i->ARM64in.XDirect.amPC);
3933 /* --- FIRST PATCHABLE BYTE follows --- */
3934 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3935 calling to) backs up the return address, so as to find the
3936 address of the first patchable byte. So: don't change the
3937 number of instructions (5) below. */
3938 /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
3939 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
3940 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
3941 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
3942 /* blr x9 */
3943 const void* disp_cp_chain_me
3944 = i->ARM64in.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3945 : disp_cp_chain_me_to_slowEP;
3946 p = imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
3947 *p++ = 0xD63F0120;
3948 /* --- END of PATCHABLE BYTES --- */
3950 /* Fix up the conditional jump, if there was one. */
3951 if (i->ARM64in.XDirect.cond != ARM64cc_AL) {
3952 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3953 vassert(delta > 0 && delta <= 40);
3954 vassert((delta & 3) == 0);
3955 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
3956 vassert(notCond <= 13); /* Neither AL nor NV */
3957 vassert(ptmp != NULL);
3958 delta = delta >> 2;
3959 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
3961 goto done;
3964 case ARM64in_XIndir: {
3965 // XIndir is more or less the same as XAssisted, except
3966 // we don't have a trc value to hand back, so there's no
3967 // write to r21
3968 /* Use ptmp for backpatching conditional jumps. */
3969 //UInt* ptmp = NULL;
3971 /* First off, if this is conditional, create a conditional
3972 jump over the rest of it. Or at least, leave a space for
3973 it that we will shortly fill in. */
3974 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3975 vassert(0); //ATC
3976 //ZZ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3977 //ZZ ptmp = p;
3978 //ZZ *p++ = 0;
3981 /* Update the guest PC. */
3982 /* str r-dstGA, amPC */
3983 p = do_load_or_store64(p, False/*!isLoad*/,
3984 iregEnc(i->ARM64in.XIndir.dstGA),
3985 i->ARM64in.XIndir.amPC);
3987 /* imm64 x9, VG_(disp_cp_xindir) */
3988 /* br x9 */
3989 p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xindir);
3990 *p++ = 0xD61F0120; /* br x9 */
3992 /* Fix up the conditional jump, if there was one. */
3993 if (i->ARM64in.XIndir.cond != ARM64cc_AL) {
3994 vassert(0); //ATC
3995 //ZZ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3996 //ZZ vassert(delta > 0 && delta < 40);
3997 //ZZ vassert((delta & 3) == 0);
3998 //ZZ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3999 //ZZ vassert(notCond <= 13); /* Neither AL nor NV */
4000 //ZZ delta = (delta >> 2) - 2;
4001 //ZZ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
4003 goto done;
4006 case ARM64in_XAssisted: {
4007 /* Use ptmp for backpatching conditional jumps. */
4008 UInt* ptmp = NULL;
4010 /* First off, if this is conditional, create a conditional
4011 jump over the rest of it. Or at least, leave a space for
4012 it that we will shortly fill in. I think this can only
4013 ever happen when VEX is driven by the switchbacker. */
4014 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
4015 vassert(i->ARM64in.XDirect.cond != ARM64cc_NV);
4016 ptmp = p;
4017 *p++ = 0;
4020 /* Update the guest PC. */
4021 /* str r-dstGA, amPC */
4022 p = do_load_or_store64(p, False/*!isLoad*/,
4023 iregEnc(i->ARM64in.XAssisted.dstGA),
4024 i->ARM64in.XAssisted.amPC);
4026 /* movw r21, $magic_number */
4027 UInt trcval = 0;
4028 switch (i->ARM64in.XAssisted.jk) {
4029 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
4030 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
4031 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
4032 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
4033 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
4034 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
4035 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
4036 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
4037 case Ijk_FlushDCache: trcval = VEX_TRC_JMP_FLUSHDCACHE; break;
4038 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
4039 case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
4040 case Ijk_SigBUS: trcval = VEX_TRC_JMP_SIGBUS; break;
4041 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
4042 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
4043 /* We don't expect to see the following being assisted. */
4044 //case Ijk_Ret:
4045 //case Ijk_Call:
4046 /* fallthrough */
4047 default:
4048 ppIRJumpKind(i->ARM64in.XAssisted.jk);
4049 vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
4050 "unexpected jump kind");
4052 vassert(trcval != 0);
4053 p = imm64_to_ireg(p, /*x*/21, (ULong)trcval);
4055 /* imm64 x9, VG_(disp_cp_xassisted) */
4056 /* br x9 */
4057 p = imm64_to_ireg(p, /*x*/9, (Addr)disp_cp_xassisted);
4058 *p++ = 0xD61F0120; /* br x9 */
4060 /* Fix up the conditional jump, if there was one. */
4061 if (i->ARM64in.XAssisted.cond != ARM64cc_AL) {
4062 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
4063 vassert(delta > 0 && delta < 40);
4064 vassert((delta & 3) == 0);
4065 UInt notCond = 1 ^ (UInt)i->ARM64in.XDirect.cond;
4066 vassert(notCond <= 13); /* Neither AL nor NV */
4067 vassert(ptmp != NULL);
4068 delta = delta >> 2;
4069 *ptmp = X_8_19_1_4(X01010100, delta & ((1<<19)-1), 0, notCond);
4071 goto done;
4074 case ARM64in_CSel: {
4075 /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
4076 UInt dd = iregEnc(i->ARM64in.CSel.dst);
4077 UInt nn = iregEnc(i->ARM64in.CSel.argL);
4078 UInt mm = iregEncOr31(i->ARM64in.CSel.argR); // Can be XZR
4079 UInt cond = (UInt)i->ARM64in.CSel.cond;
4080 vassert(dd < 31 && nn < 31 && mm <= 31 && cond < 16);
4081 *p++ = X_3_8_5_6_5_5(X100, X11010100, mm, cond << 2, nn, dd);
4082 goto done;
4085 case ARM64in_Call: {
4086 /* We'll use x9 as a scratch register to put the target
4087 address in. */
4088 if (i->ARM64in.Call.cond != ARM64cc_AL
4089 && i->ARM64in.Call.rloc.pri != RLPri_None) {
4090 /* The call might not happen (it isn't unconditional) and
4091 it returns a result. In this case we will need to
4092 generate a control flow diamond to put 0x555..555 in
4093 the return register(s) in the case where the call
4094 doesn't happen. If this ever becomes necessary, maybe
4095 copy code from the 32-bit ARM equivalent. Until that
4096 day, just give up. */
4097 goto bad;
4100 UInt* ptmp = NULL;
4101 if (i->ARM64in.Call.cond != ARM64cc_AL) {
4102 /* Create a hole to put a conditional branch in. We'll
4103 patch it once we know the branch length. */
4104 ptmp = p;
4105 *p++ = 0;
4108 // x9 = &target
4109 p = imm64_to_ireg( (UInt*)p, /*x*/9, (ULong)i->ARM64in.Call.target );
4110 // blr x9
4111 *p++ = 0xD63F0120;
4113 // Patch the hole if necessary
4114 if (i->ARM64in.Call.cond != ARM64cc_AL) {
4115 ULong dist = (ULong)(p - ptmp);
4116 /* imm64_to_ireg produces between 1 and 4 insns, and
4117 then there's the BLR itself. Hence: */
4118 vassert(dist >= 2 && dist <= 5);
4119 vassert(ptmp != NULL);
4120 // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
4121 *ptmp = X_8_19_1_4(X01010100, dist, 0,
4122 1 ^ (UInt)i->ARM64in.Call.cond);
4123 } else {
4124 vassert(ptmp == NULL);
4127 goto done;
4130 case ARM64in_AddToSP: {
4131 /* 10,0 10001 00 imm12 11111 11111 ADD xsp, xsp, #imm12
4132 11,0 10001 00 imm12 11111 11111 SUB xsp, xsp, #imm12
4134 Int simm12 = i->ARM64in.AddToSP.simm;
4135 vassert(-4096 < simm12 && simm12 < 4096);
4136 vassert(0 == (simm12 & 0xF));
4137 if (simm12 >= 0) {
4138 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, simm12, X11111, X11111);
4139 } else {
4140 *p++ = X_2_6_2_12_5_5(X11, X010001, X00, -simm12, X11111, X11111);
4142 goto done;
4145 case ARM64in_FromSP: {
4146 /* 10,0 10001 00 0..(12)..0 11111 dd MOV Xd, xsp */
4147 UInt dd = iregEnc(i->ARM64in.FromSP.dst);
4148 vassert(dd < 31);
4149 *p++ = X_2_6_2_12_5_5(X10, X010001, X00, 0, X11111, dd);
4150 goto done;
4153 case ARM64in_Mul: {
4154 /* 100 11011 110 mm 011111 nn dd UMULH Xd, Xn,Xm
4155 100 11011 010 mm 011111 nn dd SMULH Xd, Xn,Xm
4156 100 11011 000 mm 011111 nn dd MUL Xd, Xn,Xm
4158 UInt dd = iregEnc(i->ARM64in.Mul.dst);
4159 UInt nn = iregEnc(i->ARM64in.Mul.argL);
4160 UInt mm = iregEnc(i->ARM64in.Mul.argR);
4161 vassert(dd < 31 && nn < 31 && mm < 31);
4162 switch (i->ARM64in.Mul.op) {
4163 case ARM64mul_ZX:
4164 *p++ = X_3_8_5_6_5_5(X100, X11011110, mm, X011111, nn, dd);
4165 goto done;
4166 case ARM64mul_SX:
4167 *p++ = X_3_8_5_6_5_5(X100, X11011010, mm, X011111, nn, dd);
4168 goto done;
4169 case ARM64mul_PLAIN:
4170 *p++ = X_3_8_5_6_5_5(X100, X11011000, mm, X011111, nn, dd);
4171 goto done;
4172 default:
4173 vassert(0);
4175 goto bad;
4177 case ARM64in_LdrEX: {
4178 /* 085F7C82 ldxrb w2, [x4]
4179 485F7C82 ldxrh w2, [x4]
4180 885F7C82 ldxr w2, [x4]
4181 C85F7C82 ldxr x2, [x4]
4183 switch (i->ARM64in.LdrEX.szB) {
4184 case 1: *p++ = 0x085F7C82; goto done;
4185 case 2: *p++ = 0x485F7C82; goto done;
4186 case 4: *p++ = 0x885F7C82; goto done;
4187 case 8: *p++ = 0xC85F7C82; goto done;
4188 default: break;
4190 goto bad;
4192 case ARM64in_StrEX: {
4193 /* 08007C82 stxrb w0, w2, [x4]
4194 48007C82 stxrh w0, w2, [x4]
4195 88007C82 stxr w0, w2, [x4]
4196 C8007C82 stxr w0, x2, [x4]
4198 switch (i->ARM64in.StrEX.szB) {
4199 case 1: *p++ = 0x08007C82; goto done;
4200 case 2: *p++ = 0x48007C82; goto done;
4201 case 4: *p++ = 0x88007C82; goto done;
4202 case 8: *p++ = 0xC8007C82; goto done;
4203 default: break;
4205 goto bad;
4207 case ARM64in_LdrEXP: {
4208 // 820C7FC8 ldxp x2, x3, [x4]
4209 *p++ = 0xC87F0C82;
4210 goto done;
4212 case ARM64in_StrEXP: {
4213 // 820C20C8 stxp w0, x2, x3, [x4]
4214 *p++ = 0xC8200C82;
4215 goto done;
4217 case ARM64in_CAS: {
4218 /* This isn't simple. For an explanation see the comment in
4219 host_arm64_defs.h on the definition of ARM64Instr case CAS.
4221 NOTE: We could place "loop:" after mov/and but then we need
4222 an additional scratch register.
4224 /* Generate:
4226 loop:
4227 -- one of:
4228 mov x8, x5 // AA0503E8
4229 and x8, x5, #0xFFFFFFFF // 92407CA8
4230 and x8, x5, #0xFFFF // 92403CA8
4231 and x8, x5, #0xFF // 92401CA8
4233 -- one of:
4234 ldxr x1, [x3] // C85F7C61
4235 ldxr w1, [x3] // 885F7C61
4236 ldxrh w1, [x3] // 485F7C61
4237 ldxrb w1, [x3] // 085F7C61
4239 -- always:
4240 cmp x1, x8 // EB08003F
4241 bne out // 54000061
4243 -- one of:
4244 stxr w8, x7, [x3] // C8087C67
4245 stxr w8, w7, [x3] // 88087C67
4246 stxrh w8, w7, [x3] // 48087C67
4247 stxrb w8, w7, [x3] // 08087C67
4249 -- always:
4250 cbne w8, loop // 35FFFF68
4251 out:
4253 switch (i->ARM64in.CAS.szB) {
4254 case 8: *p++ = 0xAA0503E8; break;
4255 case 4: *p++ = 0x92407CA8; break;
4256 case 2: *p++ = 0x92403CA8; break;
4257 case 1: *p++ = 0x92401CA8; break;
4258 default: vassert(0);
4260 switch (i->ARM64in.CAS.szB) {
4261 case 8: *p++ = 0xC85F7C61; break;
4262 case 4: *p++ = 0x885F7C61; break;
4263 case 2: *p++ = 0x485F7C61; break;
4264 case 1: *p++ = 0x085F7C61; break;
4266 *p++ = 0xEB08003F;
4267 *p++ = 0x54000061;
4268 switch (i->ARM64in.CAS.szB) {
4269 case 8: *p++ = 0xC8087C67; break;
4270 case 4: *p++ = 0x88087C67; break;
4271 case 2: *p++ = 0x48087C67; break;
4272 case 1: *p++ = 0x08087C67; break;
4274 *p++ = 0x35FFFF68;
4275 goto done;
4277 case ARM64in_CASP: {
4278 /* Generate:
4279 CASP <Xs>, <X(s+1)>, <Xt>, <X(t+1)>, [<Xn|SP>{,#0}]
4281 Register allocation (see ARM64in_CASP in getRegUsage_ARM64Instr):
4282 Xn: memory address
4283 -> X2 (INPUT)
4284 Xs, X(s+1): values to be compared with value read from address
4285 -> X4,X5 (INPUTS)
4286 -> X0,X1 (OUTPUTS) loaded from memory and compared with
4287 scratch registers X8,X9 (CLOBBERED) which contain
4288 contents of X4,X5
4289 Xt, X(t+1): values to be stored to memory if X0,X1==X8,X9
4290 -> X6,X7 (INPUT)
4292 loop:
4293 -- two of:
4294 mov x8, x4 // AA0403E8
4295 mov x9, x5 // AA0503E9
4296 and x8, x4, #0xFFFFFFFF // 92407C88
4297 and x9, x5, #0xFFFFFFFF // 92407CA9
4299 -- one of:
4300 ldxp x0,x1, [x2] // C87F0440
4301 ldxp w0,w1, [x2] // 887F0440
4303 -- always:
4304 cmp x0, x8 // EB08001F
4305 bne out // 540000A1
4306 cmp x1, x9 // EB09003F
4307 bne out // 54000061
4309 -- one of:
4310 stxp w3, x6, x7, [x2] // C8231C46
4311 stxp w3, w6, w7, [x2] // 88231C46
4313 -- always:
4314 cbnz w3, loop // 35FFFF03
4315 out:
4317 switch (i->ARM64in.CASP.szB) {
4318 case 8: *p++ = 0xAA0403E8; *p++ = 0xAA0503E9; break;
4319 case 4: *p++ = 0x92407C88; *p++ = 0x92407CA9; break;
4320 default: vassert(0);
4322 switch (i->ARM64in.CASP.szB) {
4323 case 8: *p++ = 0xC87F0440; break;
4324 case 4: *p++ = 0x887F0440; break;
4325 default: vassert(0);
4327 *p++ = 0xEB08001F;
4328 *p++ = 0x540000A1;
4329 *p++ = 0xEB09003F;
4330 *p++ = 0x54000061;
4331 switch (i->ARM64in.CASP.szB) {
4332 case 8: *p++ = 0xC8231C46; break;
4333 case 4: *p++ = 0x88231C46; break;
4334 default: vassert(0);
4336 *p++ = 0x35FFFF03;
4337 goto done;
4339 case ARM64in_MFence: {
4340 *p++ = 0xD5033F9F; /* DSB sy */
4341 *p++ = 0xD5033FBF; /* DMB sy */
4342 *p++ = 0xD5033FDF; /* ISB */
4343 goto done;
4345 case ARM64in_ClrEX: {
4346 *p++ = 0xD5033F5F; /* clrex #15 */
4347 goto done;
4349 case ARM64in_VLdStH: {
4350 /* 01 111101 01 imm12 n t LDR Ht, [Xn|SP, #imm12 * 2]
4351 01 111101 00 imm12 n t STR Ht, [Xn|SP, #imm12 * 2]
4353 UInt hD = dregEnc(i->ARM64in.VLdStH.hD);
4354 UInt rN = iregEnc(i->ARM64in.VLdStH.rN);
4355 UInt uimm12 = i->ARM64in.VLdStH.uimm12;
4356 Bool isLD = i->ARM64in.VLdStH.isLoad;
4357 vassert(uimm12 < 8192 && 0 == (uimm12 & 1));
4358 uimm12 >>= 1;
4359 vassert(uimm12 < (1<<12));
4360 vassert(hD < 32);
4361 vassert(rN < 31);
4362 *p++ = X_2_6_2_12_5_5(X01, X111101, isLD ? X01 : X00,
4363 uimm12, rN, hD);
4364 goto done;
4366 case ARM64in_VLdStS: {
4367 /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4]
4368 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4]
4370 UInt sD = dregEnc(i->ARM64in.VLdStS.sD);
4371 UInt rN = iregEnc(i->ARM64in.VLdStS.rN);
4372 UInt uimm12 = i->ARM64in.VLdStS.uimm12;
4373 Bool isLD = i->ARM64in.VLdStS.isLoad;
4374 vassert(uimm12 < 16384 && 0 == (uimm12 & 3));
4375 uimm12 >>= 2;
4376 vassert(uimm12 < (1<<12));
4377 vassert(sD < 32);
4378 vassert(rN < 31);
4379 *p++ = X_2_6_2_12_5_5(X10, X111101, isLD ? X01 : X00,
4380 uimm12, rN, sD);
4381 goto done;
4383 case ARM64in_VLdStD: {
4384 /* 11 111101 01 imm12 n t LDR Dt, [Xn|SP, #imm12 * 8]
4385 11 111101 00 imm12 n t STR Dt, [Xn|SP, #imm12 * 8]
4387 UInt dD = dregEnc(i->ARM64in.VLdStD.dD);
4388 UInt rN = iregEnc(i->ARM64in.VLdStD.rN);
4389 UInt uimm12 = i->ARM64in.VLdStD.uimm12;
4390 Bool isLD = i->ARM64in.VLdStD.isLoad;
4391 vassert(uimm12 < 32768 && 0 == (uimm12 & 7));
4392 uimm12 >>= 3;
4393 vassert(uimm12 < (1<<12));
4394 vassert(dD < 32);
4395 vassert(rN < 31);
4396 *p++ = X_2_6_2_12_5_5(X11, X111101, isLD ? X01 : X00,
4397 uimm12, rN, dD);
4398 goto done;
4400 case ARM64in_VLdStQ: {
4401 /* 0100 1100 0000 0000 0111 11 rN rQ st1 {vQ.2d}, [<rN|SP>]
4402 0100 1100 0100 0000 0111 11 rN rQ ld1 {vQ.2d}, [<rN|SP>]
4404 UInt rQ = qregEnc(i->ARM64in.VLdStQ.rQ);
4405 UInt rN = iregEnc(i->ARM64in.VLdStQ.rN);
4406 vassert(rQ < 32);
4407 vassert(rN < 31);
4408 if (i->ARM64in.VLdStQ.isLoad) {
4409 *p++ = 0x4C407C00 | (rN << 5) | rQ;
4410 } else {
4411 *p++ = 0x4C007C00 | (rN << 5) | rQ;
4413 goto done;
4415 case ARM64in_VCvtI2F: {
4416 /* 31 28 23 21 20 18 15 9 4
4417 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
4418 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
4419 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
4420 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
4421 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
4422 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
4423 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
4424 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
4426 UInt rN = iregEnc(i->ARM64in.VCvtI2F.rS);
4427 UInt rD = dregEnc(i->ARM64in.VCvtI2F.rD);
4428 ARM64CvtOp how = i->ARM64in.VCvtI2F.how;
4429 /* Just handle cases as they show up. */
4430 switch (how) {
4431 case ARM64cvt_F32_I32S: /* SCVTF Sd, Wn */
4432 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X000000, rN, rD);
4433 break;
4434 case ARM64cvt_F64_I32S: /* SCVTF Dd, Wn */
4435 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X000000, rN, rD);
4436 break;
4437 case ARM64cvt_F32_I64S: /* SCVTF Sd, Xn */
4438 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100010, X000000, rN, rD);
4439 break;
4440 case ARM64cvt_F64_I64S: /* SCVTF Dd, Xn */
4441 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100010, X000000, rN, rD);
4442 break;
4443 case ARM64cvt_F32_I32U: /* UCVTF Sd, Wn */
4444 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X000000, rN, rD);
4445 break;
4446 case ARM64cvt_F64_I32U: /* UCVTF Dd, Wn */
4447 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X000000, rN, rD);
4448 break;
4449 case ARM64cvt_F32_I64U: /* UCVTF Sd, Xn */
4450 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100011, X000000, rN, rD);
4451 break;
4452 case ARM64cvt_F64_I64U: /* UCVTF Dd, Xn */
4453 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100011, X000000, rN, rD);
4454 break;
4455 default:
4456 goto bad; //ATC
4458 goto done;
4460 case ARM64in_VCvtF2I: {
4461 /* 30 23 20 18 15 9 4
4462 sf 00,11110,0x 1 00 000,000000 n d FCVTNS Rd, Fn (round to
4463 sf 00,11110,0x 1 00 001,000000 n d FCVTNU Rd, Fn nearest)
4464 ---------------- 01 -------------- FCVTP-------- (round to +inf)
4465 ---------------- 10 -------------- FCVTM-------- (round to -inf)
4466 ---------------- 11 -------------- FCVTZ-------- (round to zero)
4467 ---------------- 00 100 ---------- FCVTAS------- (nearest, ties away)
4468 ---------------- 00 101 ---------- FCVTAU------- (nearest, ties away)
4470 Rd is Xd when sf==1, Wd when sf==0
4471 Fn is Dn when x==1, Sn when x==0
4472 20:19 carry the rounding mode, using the same encoding as FPCR
4473 18 enable translation to FCVTA{S,U}
4475 UInt rD = iregEnc(i->ARM64in.VCvtF2I.rD);
4476 UInt rN = dregEnc(i->ARM64in.VCvtF2I.rS);
4477 ARM64CvtOp how = i->ARM64in.VCvtF2I.how;
4478 UChar armRM = i->ARM64in.VCvtF2I.armRM;
4479 UChar bit18 = i->ARM64in.VCvtF2I.tiesToAway ? 4 : 0;
4480 /* Just handle cases as they show up. */
4481 switch (how) {
4482 case ARM64cvt_F64_I32S: /* FCVTxS Wd, Dn */
4483 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100000 | (armRM << 3) | bit18,
4484 X000000, rN, rD);
4485 break;
4486 case ARM64cvt_F64_I32U: /* FCVTxU Wd, Dn */
4487 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100001 | (armRM << 3) | bit18,
4488 X000000, rN, rD);
4489 break;
4490 case ARM64cvt_F64_I64S: /* FCVTxS Xd, Dn */
4491 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100000 | (armRM << 3) | bit18,
4492 X000000, rN, rD);
4493 break;
4494 case ARM64cvt_F64_I64U: /* FCVTxU Xd, Dn */
4495 *p++ = X_3_5_8_6_5_5(X100, X11110, X01100001 | (armRM << 3) | bit18,
4496 X000000, rN, rD);
4497 break;
4498 case ARM64cvt_F32_I32S: /* FCVTxS Wd, Sn */
4499 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100000 | (armRM << 3) | bit18,
4500 X000000, rN, rD);
4501 break;
4502 case ARM64cvt_F32_I32U: /* FCVTxU Wd, Sn */
4503 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100001 | (armRM << 3) | bit18,
4504 X000000, rN, rD);
4505 break;
4506 case ARM64cvt_F32_I64S: /* FCVTxS Xd, Sn */
4507 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100000 | (armRM << 3) | bit18,
4508 X000000, rN, rD);
4509 break;
4510 case ARM64cvt_F32_I64U: /* FCVTxU Xd, Sn */
4511 *p++ = X_3_5_8_6_5_5(X100, X11110, X00100001 | (armRM << 3) | bit18,
4512 X000000, rN, rD);
4513 break;
4514 default:
4515 goto bad; //ATC
4517 goto done;
4519 case ARM64in_VCvtSD: {
4520 /* 31 23 21 16 14 9 4
4521 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D)
4522 ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S)
4523 Rounding, when dst is smaller than src, is per the FPCR.
4525 UInt dd = dregEnc(i->ARM64in.VCvtSD.dst);
4526 UInt nn = dregEnc(i->ARM64in.VCvtSD.src);
4527 if (i->ARM64in.VCvtSD.sToD) {
4528 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100010, X110000, nn, dd);
4529 } else {
4530 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100010, X010000, nn, dd);
4532 goto done;
4534 case ARM64in_VCvtHS: {
4535 /* 31 23 21 16 14 9 4
4536 000,11110, 11 10001 0,0 10000 n d FCVT Sd, Hn (H->S)
4537 ---------- 00 ----- 1,1 --------- FCVT Hd, Sn (S->H)
4538 Rounding, when dst is smaller than src, is per the FPCR.
4540 UInt dd = dregEnc(i->ARM64in.VCvtHS.dst);
4541 UInt nn = dregEnc(i->ARM64in.VCvtHS.src);
4542 if (i->ARM64in.VCvtHS.hToS) {
4543 *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X010000, nn, dd);
4544 } else {
4545 *p++ = X_3_5_8_6_5_5(X000, X11110, X00100011, X110000, nn, dd);
4547 goto done;
4549 case ARM64in_VCvtHD: {
4550 /* 31 23 21 16 14 9 4
4551 000,11110, 11 10001 0,1 10000 n d FCVT Dd, Hn (H->D)
4552 ---------- 01 ----- 1,1 --------- FCVT Hd, Dn (D->H)
4553 Rounding, when dst is smaller than src, is per the FPCR.
4555 UInt dd = dregEnc(i->ARM64in.VCvtHD.dst);
4556 UInt nn = dregEnc(i->ARM64in.VCvtHD.src);
4557 if (i->ARM64in.VCvtHD.hToD) {
4558 *p++ = X_3_5_8_6_5_5(X000, X11110, X11100010, X110000, nn, dd);
4559 } else {
4560 *p++ = X_3_5_8_6_5_5(X000, X11110, X01100011, X110000, nn, dd);
4562 goto done;
4564 case ARM64in_VUnaryD: {
4565 /* 31 23 21 16 14 9 4
4566 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
4567 ------------------- 0,1 --------- FABS ------
4568 ------------------- 1,0 --------- FNEG ------
4569 ------------------- 1,1 --------- FSQRT -----
4571 UInt dD = dregEnc(i->ARM64in.VUnaryD.dst);
4572 UInt dN = dregEnc(i->ARM64in.VUnaryD.src);
4573 UInt b16 = 2; /* impossible */
4574 UInt b15 = 2; /* impossible */
4575 switch (i->ARM64in.VUnaryD.op) {
4576 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4577 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4578 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4579 default: break;
4581 if (b16 < 2 && b15 < 2) {
4582 *p++ = X_3_8_5_6_5_5(X000, X11110011, (X0000 << 1) | b16,
4583 (b15 << 5) | X10000, dN, dD);
4584 goto done;
4587 000, 11110 01 1,001 11,1 10000 n d FRINTI Dd, Dm (round per FPCR)
4589 if (i->ARM64in.VUnaryD.op == ARM64fpu_RINT) {
4590 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00111, X110000, dN, dD);
4591 goto done;
4594 000, 11110 01 1,001 10,0 10000 n d FRINTA Dd, Dm (round away from zero)
4596 if (i->ARM64in.VUnaryD.op == ARM64fpu_RINTA0) {
4597 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00110, X010000, dN, dD);
4598 goto done;
4601 000, 11110 01 1,001 10,0 10000 n d FRINTN Dd, Dm (round to even)
4603 if (i->ARM64in.VUnaryD.op == ARM64fpu_RINTE) {
4604 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00100, X010000, dN, dD);
4605 goto done;
4608 010, 11110 11 1,0000 1,1111 10 n d FRECPX Dd, Dm
4610 if (i->ARM64in.VUnaryD.op == ARM64fpu_RECPX) {
4611 *p++ = X_3_8_5_6_5_5(X010, X11110111, X00001, X111110, dN, dD);
4612 goto done;
4614 goto bad;
4616 case ARM64in_VUnaryS: {
4617 /* 31 23 21 16 14 9 4
4618 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled)
4619 ------------------- 0,1 --------- FABS ------
4620 ------------------- 1,0 --------- FNEG ------
4621 ------------------- 1,1 --------- FSQRT -----
4623 UInt sD = dregEnc(i->ARM64in.VUnaryS.dst);
4624 UInt sN = dregEnc(i->ARM64in.VUnaryS.src);
4625 UInt b16 = 2; /* impossible */
4626 UInt b15 = 2; /* impossible */
4627 switch (i->ARM64in.VUnaryS.op) {
4628 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4629 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4630 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4631 default: break;
4633 if (b16 < 2 && b15 < 2) {
4634 *p++ = X_3_8_5_6_5_5(X000, X11110001, (X0000 << 1) | b16,
4635 (b15 << 5) | X10000, sN, sD);
4636 goto done;
4639 000, 11110 00 1,001 11,1 10000 n d FRINTI Sd, Sm (round per FPCR)
4641 if (i->ARM64in.VUnaryS.op == ARM64fpu_RINT) {
4642 *p++ = X_3_8_5_6_5_5(X000, X11110001, X00111, X110000, sN, sD);
4643 goto done;
4646 000, 11110 00 1,001 11,1 10000 n d FRINTA Sd, Sm (round away from zero)
4648 if (i->ARM64in.VUnaryS.op == ARM64fpu_RINTA0) {
4649 *p++ = X_3_8_5_6_5_5(X000, X11110001, X00110, X010000, sN, sD);
4650 goto done;
4653 000, 11110 00 1,001 11,1 10000 n d FRINTN Sd, Sm (round to even)
4655 if (i->ARM64in.VUnaryS.op == ARM64fpu_RINTE) {
4656 *p++ = X_3_8_5_6_5_5(X000, X11110001, X00100, X010000, sN, sD);
4657 goto done;
4660 010, 11110 10 1,0000 1,1111 10 n d FRECPX Sd, Sm
4662 if (i->ARM64in.VUnaryS.op == ARM64fpu_RECPX) {
4663 *p++ = X_3_8_5_6_5_5(X010, X11110101, X00001, X111110, sN, sD);
4664 goto done;
4666 goto bad;
4668 case ARM64in_VUnaryH: {
4669 /* 31 23 21 16 14 9 4
4670 000 11110 11 1 0000 0,1 10000 n d FABS Hd, Hn
4671 ------------------- 1,0 --------- FNEG Hd, Hn
4672 ------------------- 1,1 --------- FSQRT Hd, Hn
4674 UInt hD = dregEnc(i->ARM64in.VUnaryH.dst);
4675 UInt hN = dregEnc(i->ARM64in.VUnaryH.src);
4676 /* opc field (bits 15 and 16) */
4677 UInt b16 = 2; /* impossible */
4678 UInt b15 = 2; /* impossible */
4679 switch (i->ARM64in.VUnaryH.op) {
4680 case ARM64fpu_NEG: b16 = 1; b15 = 0; break;
4681 case ARM64fpu_SQRT: b16 = 1; b15 = 1; break;
4682 case ARM64fpu_ABS: b16 = 0; b15 = 1; break;
4683 default: break;
4686 000, 11110 11 1,0000 01,10000 n d FABS Hd, Hn
4687 ---, ----- -- -,---- 10,----- n d FNEG Hd, Hn
4688 ---, ----- -- -,---- 11,----- n d FSQRT Hd, Hn
4690 if (b16 < 2 && b15 < 2) {
4691 *p++ = X_3_8_5_6_5_5(X000, X11110111, (X0000 << 1) | b16,
4692 (b15 << 5) | X10000, hN, hD);
4693 goto done;
4695 goto bad;
4697 case ARM64in_VBinD: {
4698 /* 31 23 20 15 11 9 4
4699 ---------------- 0000 ------ FMUL --------
4700 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
4701 ---------------- 0010 ------ FADD --------
4702 ---------------- 0011 ------ FSUB --------
4704 UInt dD = dregEnc(i->ARM64in.VBinD.dst);
4705 UInt dN = dregEnc(i->ARM64in.VBinD.argL);
4706 UInt dM = dregEnc(i->ARM64in.VBinD.argR);
4707 UInt b1512 = 16; /* impossible */
4708 switch (i->ARM64in.VBinD.op) {
4709 case ARM64fpb_DIV: b1512 = X0001; break;
4710 case ARM64fpb_MUL: b1512 = X0000; break;
4711 case ARM64fpb_SUB: b1512 = X0011; break;
4712 case ARM64fpb_ADD: b1512 = X0010; break;
4713 default: goto bad;
4715 vassert(b1512 < 16);
4716 *p++
4717 = X_3_8_5_6_5_5(X000, X11110011, dM, (b1512 << 2) | X10, dN, dD);
4718 goto done;
4720 case ARM64in_VBinS: {
4721 /* 31 23 20 15 11 9 4
4722 ---------------- 0000 ------ FMUL --------
4723 000 11110 001 m 0001 10 n d FDIV Dd,Dn,Dm
4724 ---------------- 0010 ------ FADD --------
4725 ---------------- 0011 ------ FSUB --------
4727 UInt sD = dregEnc(i->ARM64in.VBinS.dst);
4728 UInt sN = dregEnc(i->ARM64in.VBinS.argL);
4729 UInt sM = dregEnc(i->ARM64in.VBinS.argR);
4730 UInt b1512 = 16; /* impossible */
4731 switch (i->ARM64in.VBinS.op) {
4732 case ARM64fpb_DIV: b1512 = X0001; break;
4733 case ARM64fpb_MUL: b1512 = X0000; break;
4734 case ARM64fpb_SUB: b1512 = X0011; break;
4735 case ARM64fpb_ADD: b1512 = X0010; break;
4736 default: goto bad;
4738 vassert(b1512 < 16);
4739 *p++
4740 = X_3_8_5_6_5_5(X000, X11110001, sM, (b1512 << 2) | X10, sN, sD);
4741 goto done;
4743 case ARM64in_VBinH: {
4744 /* 31 23 20 15 11 9 4
4745 000 11110 111 m 0010 10 n d FADD Hd,Hn,Hm
4746 000 11110 111 m 0011 10 n d FSUB Hd,Hn,Hm
4748 UInt hD = dregEnc(i->ARM64in.VBinH.dst);
4749 UInt hN = dregEnc(i->ARM64in.VBinH.argL);
4750 UInt hM = dregEnc(i->ARM64in.VBinH.argR);
4751 UInt b1512 = 16; /* impossible */
4752 switch (i->ARM64in.VBinH.op) {
4753 case ARM64fpb_ADD: b1512 = X0010; break;
4754 case ARM64fpb_SUB: b1512 = X0011; break;
4755 default: goto bad;
4757 vassert(b1512 < 16);
4758 *p++
4759 = X_3_8_5_6_5_5(X000, X11110111, hM, (b1512 << 2) | X10, hN, hD);
4760 goto done;
4762 case ARM64in_VTriD: {
4763 /* 31 20 15 14 9 4
4764 000 11111 010 m 0 a n d FMADD Dd,Dn,Dm,Da
4765 ---------------- 1 ------ FMSUB -----------
4767 UInt dD = dregEnc(i->ARM64in.VTriD.dst);
4768 UInt dN = dregEnc(i->ARM64in.VTriD.arg1);
4769 UInt dM = dregEnc(i->ARM64in.VTriD.arg2);
4770 UInt dA = dregEnc(i->ARM64in.VTriD.arg3);
4771 UInt b15 = 2; /* impossible */
4772 switch (i->ARM64in.VTriD.op) {
4773 case ARM64fpt_FMADD: b15 = 0; break;
4774 case ARM64fpt_FMSUB: b15 = 1; break;
4775 default: goto bad;
4777 vassert(b15 < 2);
4778 *p++ = X_3_8_5_1_5_5_5(X000, X11111010, dM, b15, dA, dN, dD);
4779 goto done;
4781 case ARM64in_VTriS: {
4782 /* 31 20 15 14 9 4
4783 000 11111 000 m 0 a n d FMADD Dd,Dn,Dm,Da
4784 ---------------- 1 ------ FMSUB -----------
4786 UInt dD = dregEnc(i->ARM64in.VTriD.dst);
4787 UInt dN = dregEnc(i->ARM64in.VTriD.arg1);
4788 UInt dM = dregEnc(i->ARM64in.VTriD.arg2);
4789 UInt dA = dregEnc(i->ARM64in.VTriD.arg3);
4790 UInt b15 = 2; /* impossible */
4791 switch (i->ARM64in.VTriD.op) {
4792 case ARM64fpt_FMADD: b15 = 0; break;
4793 case ARM64fpt_FMSUB: b15 = 1; break;
4794 default: goto bad;
4796 vassert(b15 < 2);
4797 *p++ = X_3_8_5_1_5_5_5(X000, X11111000, dM, b15, dA, dN, dD);
4798 goto done;
4800 case ARM64in_VCmpD: {
4801 /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */
4802 UInt dN = dregEnc(i->ARM64in.VCmpD.argL);
4803 UInt dM = dregEnc(i->ARM64in.VCmpD.argR);
4804 *p++ = X_3_8_5_6_5_5(X000, X11110011, dM, X001000, dN, X00000);
4805 goto done;
4807 case ARM64in_VCmpS: {
4808 /* 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm */
4809 UInt sN = dregEnc(i->ARM64in.VCmpS.argL);
4810 UInt sM = dregEnc(i->ARM64in.VCmpS.argR);
4811 *p++ = X_3_8_5_6_5_5(X000, X11110001, sM, X001000, sN, X00000);
4812 goto done;
4814 case ARM64in_VCmpH: {
4815 /* 000 11110 11 1 m 00 1000 n 00 000 FCMP Hn, Hm */
4816 UInt hN = dregEnc(i->ARM64in.VCmpH.argL);
4817 UInt hM = dregEnc(i->ARM64in.VCmpH.argR);
4818 *p++ = X_3_8_5_6_5_5(X000, X11110111, hM, X001000, hN, X00000);
4819 goto done;
4821 case ARM64in_VFCSel: {
4822 /* 31 23 21 20 15 11 9 5
4823 000 11110 00 1 m cond 11 n d FCSEL Sd,Sn,Sm,cond
4824 000 11110 01 1 m cond 11 n d FCSEL Dd,Dn,Dm,cond
4826 Bool isD = i->ARM64in.VFCSel.isD;
4827 UInt dd = dregEnc(i->ARM64in.VFCSel.dst);
4828 UInt nn = dregEnc(i->ARM64in.VFCSel.argL);
4829 UInt mm = dregEnc(i->ARM64in.VFCSel.argR);
4830 UInt cond = (UInt)i->ARM64in.VFCSel.cond;
4831 vassert(cond < 16);
4832 *p++ = X_3_8_5_6_5_5(X000, isD ? X11110011 : X11110001,
4833 mm, (cond << 2) | X000011, nn, dd);
4834 goto done;
4836 case ARM64in_FPCR: {
4837 Bool toFPCR = i->ARM64in.FPCR.toFPCR;
4838 UInt iReg = iregEnc(i->ARM64in.FPCR.iReg);
4839 if (toFPCR) {
4840 /* 0xD51B44 000 Rt MSR fpcr, rT */
4841 *p++ = 0xD51B4400 | (iReg & 0x1F);
4842 goto done;
4844 goto bad; // FPCR -> iReg case currently ATC
4846 case ARM64in_FPSR: {
4847 Bool toFPSR = i->ARM64in.FPSR.toFPSR;
4848 UInt iReg = iregEnc(i->ARM64in.FPSR.iReg);
4849 if (toFPSR) {
4850 /* 0xD51B44 001 Rt MSR fpsr, rT */
4851 *p++ = 0xD51B4420 | (iReg & 0x1F);
4852 } else {
4853 /* 0xD53B44 001 Rt MRS rT, fpsr */
4854 *p++ = 0xD53B4420 | (iReg & 0x1F);
4856 goto done;
4858 case ARM64in_VBinV: {
4859 /* 31 23 20 15 9 4
4860 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d
4861 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s
4862 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h
4863 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b
4865 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d
4866 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s
4867 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h
4868 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b
4870 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s
4871 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h
4872 010 01110 00 1 m 100111 n d MUL Vd.16b, Vn.16b, Vm.16b
4874 010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d
4875 010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s
4876 010 01110 11 1 m 110101 n d FSUB Vd.2d, Vn.2d, Vm.2d
4877 010 01110 10 1 m 110101 n d FSUB Vd.4s, Vn.4s, Vm.4s
4879 011 01110 01 1 m 110111 n d FMUL Vd.2d, Vn.2d, Vm.2d
4880 011 01110 00 1 m 110111 n d FMUL Vd.4s, Vn.4s, Vm.4s
4881 011 01110 01 1 m 111111 n d FDIV Vd.2d, Vn.2d, Vm.2d
4882 011 01110 00 1 m 111111 n d FDIV Vd.4s, Vn.4s, Vm.4s
4884 010 01110 01 1 m 111101 n d FMAX Vd.2d, Vn.2d, Vm.2d
4885 010 01110 00 1 m 111101 n d FMAX Vd.4s, Vn.4s, Vm.4s
4886 010 01110 11 1 m 111101 n d FMIN Vd.2d, Vn.2d, Vm.2d
4887 010 01110 10 1 m 111101 n d FMIN Vd.4s, Vn.4s, Vm.4s
4889 011 01110 10 1 m 011001 n d UMAX Vd.4s, Vn.4s, Vm.4s
4890 011 01110 01 1 m 011001 n d UMAX Vd.8h, Vn.8h, Vm.8h
4891 011 01110 00 1 m 011001 n d UMAX Vd.16b, Vn.16b, Vm.16b
4893 011 01110 10 1 m 011011 n d UMIN Vd.4s, Vn.4s, Vm.4s
4894 011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h
4895 011 01110 00 1 m 011011 n d UMIN Vd.16b, Vn.16b, Vm.16b
4897 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
4898 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
4899 010 01110 00 1 m 011001 n d SMAX Vd.16b, Vn.16b, Vm.16b
4901 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
4902 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
4903 010 01110 00 1 m 011011 n d SMIN Vd.16b, Vn.16b, Vm.16b
4905 010 01110 00 1 m 000111 n d AND Vd, Vn, Vm
4906 010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm
4907 011 01110 00 1 m 000111 n d EOR Vd, Vn, Vm
4909 011 01110 11 1 m 100011 n d CMEQ Vd.2d, Vn.2d, Vm.2d
4910 011 01110 10 1 m 100011 n d CMEQ Vd.4s, Vn.4s, Vm.4s
4911 011 01110 01 1 m 100011 n d CMEQ Vd.8h, Vn.8h, Vm.8h
4912 011 01110 00 1 m 100011 n d CMEQ Vd.16b, Vn.16b, Vm.16b
4914 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d
4915 011 01110 10 1 m 001101 n d CMHI Vd.4s, Vn.4s, Vm.4s
4916 011 01110 01 1 m 001101 n d CMHI Vd.8h, Vn.8h, Vm.8h
4917 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b
4919 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d
4920 010 01110 10 1 m 001101 n d CMGT Vd.4s, Vn.4s, Vm.4s
4921 010 01110 01 1 m 001101 n d CMGT Vd.8h, Vn.8h, Vm.8h
4922 010 01110 00 1 m 001101 n d CMGT Vd.16b, Vn.16b, Vm.16b
4924 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d
4925 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s
4927 011 01110 01 1 m 111001 n d FCMGE Vd.2d, Vn.2d, Vm.2d
4928 011 01110 00 1 m 111001 n d FCMGE Vd.4s, Vn.4s, Vm.4s
4930 011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d
4931 011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s
4933 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b
4935 010 01110 11 0 m 000110 n d UZP1 Vd.2d, Vn.2d, Vm.2d
4936 010 01110 10 0 m 000110 n d UZP1 Vd.4s, Vn.4s, Vm.4s
4937 010 01110 01 0 m 000110 n d UZP1 Vd.8h, Vn.8h, Vm.8h
4938 010 01110 00 0 m 000110 n d UZP1 Vd.16b, Vn.16b, Vm.16b
4940 010 01110 11 0 m 010110 n d UZP2 Vd.2d, Vn.2d, Vm.2d
4941 010 01110 10 0 m 010110 n d UZP2 Vd.4s, Vn.4s, Vm.4s
4942 010 01110 01 0 m 010110 n d UZP2 Vd.8h, Vn.8h, Vm.8h
4943 010 01110 00 0 m 010110 n d UZP2 Vd.16b, Vn.16b, Vm.16b
4945 010 01110 10 0 m 001110 n d ZIP1 Vd.4s, Vn.4s, Vm.4s
4946 010 01110 01 0 m 001110 n d ZIP1 Vd.8h, Vn.8h, Vm.8h
4947 010 01110 10 0 m 001110 n d ZIP1 Vd.16b, Vn.16b, Vm.16b
4949 010 01110 10 0 m 011110 n d ZIP2 Vd.4s, Vn.4s, Vm.4s
4950 010 01110 01 0 m 011110 n d ZIP2 Vd.8h, Vn.8h, Vm.8h
4951 010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b
4953 011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b
4955 000 01110 00 1 m 111000 n d PMULL Vd.8h, Vn.8b, Vm.8b
4957 001 01110 10 1 m 110000 n d UMULL Vd.2d, Vn.2s, Vm.2s
4958 001 01110 01 1 m 110000 n d UMULL Vd.4s, Vn.4h, Vm.4h
4959 001 01110 00 1 m 110000 n d UMULL Vd.8h, Vn.8b, Vm.8b
4961 000 01110 10 1 m 110000 n d SMULL Vd.2d, Vn.2s, Vm.2s
4962 000 01110 01 1 m 110000 n d SMULL Vd.4s, Vn.4h, Vm.4h
4963 000 01110 00 1 m 110000 n d SMULL Vd.8h, Vn.8b, Vm.8b
4965 010 01110 11 1 m 000011 n d SQADD Vd.2d, Vn.2d, Vm.2d
4966 010 01110 10 1 m 000011 n d SQADD Vd.4s, Vn.4s, Vm.4s
4967 010 01110 01 1 m 000011 n d SQADD Vd.8h, Vn.8h, Vm.8h
4968 010 01110 00 1 m 000011 n d SQADD Vd.16b, Vn.16b, Vm.16b
4970 011 01110 11 1 m 000011 n d UQADD Vd.2d, Vn.2d, Vm.2d
4971 011 01110 10 1 m 000011 n d UQADD Vd.4s, Vn.4s, Vm.4s
4972 011 01110 01 1 m 000011 n d UQADD Vd.8h, Vn.8h, Vm.8h
4973 011 01110 00 1 m 000011 n d UQADD Vd.16b, Vn.16b, Vm.16b
4975 010 01110 11 1 m 001011 n d SQSUB Vd.2d, Vn.2d, Vm.2d
4976 010 01110 10 1 m 001011 n d SQSUB Vd.4s, Vn.4s, Vm.4s
4977 010 01110 01 1 m 001011 n d SQSUB Vd.8h, Vn.8h, Vm.8h
4978 010 01110 00 1 m 001011 n d SQSUB Vd.16b, Vn.16b, Vm.16b
4980 011 01110 11 1 m 001011 n d UQSUB Vd.2d, Vn.2d, Vm.2d
4981 011 01110 10 1 m 001011 n d UQSUB Vd.4s, Vn.4s, Vm.4s
4982 011 01110 01 1 m 001011 n d UQSUB Vd.8h, Vn.8h, Vm.8h
4983 011 01110 00 1 m 001011 n d UQSUB Vd.16b, Vn.16b, Vm.16b
4985 000 01110 10 1 m 110100 n d SQDMULL Vd.2d, Vn.2s, Vm.2s
4986 000 01110 01 1 m 110100 n d SQDMULL Vd.4s, Vn.4h, Vm.4h
4988 010 01110 10 1 m 101101 n d SQDMULH Vd.4s, Vn.4s, Vm.4s
4989 010 01110 01 1 m 101101 n d SQDMULH Vd.8h, Vn.8h, Vm.8h
4990 011 01110 10 1 m 101101 n d SQRDMULH Vd.4s, Vn.4s, Vm.4s
4991 011 01110 10 1 m 101101 n d SQRDMULH Vd.8h, Vn.8h, Vm.8h
4993 010 01110 sz 1 m 010011 n d SQSHL@sz Vd, Vn, Vm
4994 010 01110 sz 1 m 010111 n d SQRSHL@sz Vd, Vn, Vm
4995 011 01110 sz 1 m 010011 n d UQSHL@sz Vd, Vn, Vm
4996 011 01110 sz 1 m 010111 n d URQSHL@sz Vd, Vn, Vm
4998 010 01110 sz 1 m 010001 n d SSHL@sz Vd, Vn, Vm
4999 010 01110 sz 1 m 010101 n d SRSHL@sz Vd, Vn, Vm
5000 011 01110 sz 1 m 010001 n d USHL@sz Vd, Vn, Vm
5001 011 01110 sz 1 m 010101 n d URSHL@sz Vd, Vn, Vm
5003 010 01110 01 1 m 111111 n d FRECPS Vd.2d, Vn.2d, Vm.2d
5004 010 01110 00 1 m 111111 n d FRECPS Vd.4s, Vn.4s, Vm.4s
5005 010 01110 11 1 m 111111 n d FRSQRTS Vd.2d, Vn.2d, Vm.2d
5006 010 01110 10 1 m 111111 n d FRSQRTS Vd.4s, Vn.4s, Vm.4s
5008 UInt vD = qregEnc(i->ARM64in.VBinV.dst);
5009 UInt vN = qregEnc(i->ARM64in.VBinV.argL);
5010 UInt vM = qregEnc(i->ARM64in.VBinV.argR);
5011 switch (i->ARM64in.VBinV.op) {
5012 case ARM64vecb_ADD64x2:
5013 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X100001, vN, vD);
5014 break;
5015 case ARM64vecb_ADD32x4:
5016 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100001, vN, vD);
5017 break;
5018 case ARM64vecb_ADD16x8:
5019 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100001, vN, vD);
5020 break;
5021 case ARM64vecb_ADD8x16:
5022 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100001, vN, vD);
5023 break;
5024 case ARM64vecb_SUB64x2:
5025 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100001, vN, vD);
5026 break;
5027 case ARM64vecb_SUB32x4:
5028 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100001, vN, vD);
5029 break;
5030 case ARM64vecb_SUB16x8:
5031 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100001, vN, vD);
5032 break;
5033 case ARM64vecb_SUB8x16:
5034 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100001, vN, vD);
5035 break;
5036 case ARM64vecb_MUL32x4:
5037 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X100111, vN, vD);
5038 break;
5039 case ARM64vecb_MUL16x8:
5040 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X100111, vN, vD);
5041 break;
5042 case ARM64vecb_MUL8x16:
5043 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X100111, vN, vD);
5044 break;
5045 case ARM64vecb_FADD64x2:
5046 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X110101, vN, vD);
5047 break;
5048 case ARM64vecb_FADD32x4:
5049 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X110101, vN, vD);
5050 break;
5051 case ARM64vecb_FADD16x8:
5052 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X000101, vN, vD);
5053 break;
5054 case ARM64vecb_FSUB64x2:
5055 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X110101, vN, vD);
5056 break;
5057 case ARM64vecb_FSUB32x4:
5058 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X110101, vN, vD);
5059 break;
5060 case ARM64vecb_FSUB16x8:
5061 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X000101, vN, vD);
5062 break;
5063 case ARM64vecb_FMUL64x2:
5064 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X110111, vN, vD);
5065 break;
5066 case ARM64vecb_FMUL32x4:
5067 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X110111, vN, vD);
5068 break;
5069 case ARM64vecb_FDIV64x2:
5070 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111111, vN, vD);
5071 break;
5072 case ARM64vecb_FDIV32x4:
5073 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111111, vN, vD);
5074 break;
5076 case ARM64vecb_FMAX64x2:
5077 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111101, vN, vD);
5078 break;
5079 case ARM64vecb_FMAX32x4:
5080 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111101, vN, vD);
5081 break;
5082 case ARM64vecb_FMIN64x2:
5083 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111101, vN, vD);
5084 break;
5085 case ARM64vecb_FMIN32x4:
5086 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111101, vN, vD);
5087 break;
5089 case ARM64vecb_UMAX32x4:
5090 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011001, vN, vD);
5091 break;
5092 case ARM64vecb_UMAX16x8:
5093 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011001, vN, vD);
5094 break;
5095 case ARM64vecb_UMAX8x16:
5096 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011001, vN, vD);
5097 break;
5099 case ARM64vecb_UMIN32x4:
5100 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X011011, vN, vD);
5101 break;
5102 case ARM64vecb_UMIN16x8:
5103 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X011011, vN, vD);
5104 break;
5105 case ARM64vecb_UMIN8x16:
5106 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X011011, vN, vD);
5107 break;
5109 case ARM64vecb_SMAX32x4:
5110 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011001, vN, vD);
5111 break;
5112 case ARM64vecb_SMAX16x8:
5113 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011001, vN, vD);
5114 break;
5115 case ARM64vecb_SMAX8x16:
5116 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011001, vN, vD);
5117 break;
5119 case ARM64vecb_SMIN32x4:
5120 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X011011, vN, vD);
5121 break;
5122 case ARM64vecb_SMIN16x8:
5123 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X011011, vN, vD);
5124 break;
5125 case ARM64vecb_SMIN8x16:
5126 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X011011, vN, vD);
5127 break;
5129 case ARM64vecb_AND:
5130 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000111, vN, vD);
5131 break;
5132 case ARM64vecb_ORR:
5133 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000111, vN, vD);
5134 break;
5135 case ARM64vecb_XOR:
5136 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000111, vN, vD);
5137 break;
5139 case ARM64vecb_CMEQ64x2:
5140 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X100011, vN, vD);
5141 break;
5142 case ARM64vecb_CMEQ32x4:
5143 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X100011, vN, vD);
5144 break;
5145 case ARM64vecb_CMEQ16x8:
5146 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X100011, vN, vD);
5147 break;
5148 case ARM64vecb_CMEQ8x16:
5149 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100011, vN, vD);
5150 break;
5152 case ARM64vecb_CMHI64x2:
5153 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001101, vN, vD);
5154 break;
5155 case ARM64vecb_CMHI32x4:
5156 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001101, vN, vD);
5157 break;
5158 case ARM64vecb_CMHI16x8:
5159 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001101, vN, vD);
5160 break;
5161 case ARM64vecb_CMHI8x16:
5162 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001101, vN, vD);
5163 break;
5165 case ARM64vecb_CMGT64x2:
5166 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001101, vN, vD);
5167 break;
5168 case ARM64vecb_CMGT32x4:
5169 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001101, vN, vD);
5170 break;
5171 case ARM64vecb_CMGT16x8:
5172 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001101, vN, vD);
5173 break;
5174 case ARM64vecb_CMGT8x16:
5175 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001101, vN, vD);
5176 break;
5178 case ARM64vecb_FCMEQ64x2:
5179 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111001, vN, vD);
5180 break;
5181 case ARM64vecb_FCMEQ32x4:
5182 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111001, vN, vD);
5183 break;
5184 case ARM64vecb_FCMEQ16x8:
5185 *p++ = X_3_8_5_6_5_5(X010, X11110010, vM, X001001, vN, vD);
5186 break;
5188 case ARM64vecb_FCMGE64x2:
5189 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X111001, vN, vD);
5190 break;
5191 case ARM64vecb_FCMGE32x4:
5192 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X111001, vN, vD);
5193 break;
5194 case ARM64vecb_FCMGE16x8:
5195 *p++ = X_3_8_5_6_5_5(X011, X01110010, vM, X001001, vN, vD);
5196 break;
5198 case ARM64vecb_FCMGT64x2:
5199 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X111001, vN, vD);
5200 break;
5201 case ARM64vecb_FCMGT32x4:
5202 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X111001, vN, vD);
5203 break;
5204 case ARM64vecb_FCMGT16x8:
5205 *p++ = X_3_8_5_6_5_5(X011, X01110110, vM, X001001, vN, vD);
5206 break;
5208 case ARM64vecb_TBL1:
5209 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000000, vN, vD);
5210 break;
5212 case ARM64vecb_UZP164x2:
5213 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X000110, vN, vD);
5214 break;
5215 case ARM64vecb_UZP132x4:
5216 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X000110, vN, vD);
5217 break;
5218 case ARM64vecb_UZP116x8:
5219 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X000110, vN, vD);
5220 break;
5221 case ARM64vecb_UZP18x16:
5222 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X000110, vN, vD);
5223 break;
5225 case ARM64vecb_UZP264x2:
5226 *p++ = X_3_8_5_6_5_5(X010, X01110110, vM, X010110, vN, vD);
5227 break;
5228 case ARM64vecb_UZP232x4:
5229 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X010110, vN, vD);
5230 break;
5231 case ARM64vecb_UZP216x8:
5232 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X010110, vN, vD);
5233 break;
5234 case ARM64vecb_UZP28x16:
5235 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X010110, vN, vD);
5236 break;
5238 case ARM64vecb_ZIP132x4:
5239 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X001110, vN, vD);
5240 break;
5241 case ARM64vecb_ZIP116x8:
5242 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X001110, vN, vD);
5243 break;
5244 case ARM64vecb_ZIP18x16:
5245 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X001110, vN, vD);
5246 break;
5248 case ARM64vecb_ZIP232x4:
5249 *p++ = X_3_8_5_6_5_5(X010, X01110100, vM, X011110, vN, vD);
5250 break;
5251 case ARM64vecb_ZIP216x8:
5252 *p++ = X_3_8_5_6_5_5(X010, X01110010, vM, X011110, vN, vD);
5253 break;
5254 case ARM64vecb_ZIP28x16:
5255 *p++ = X_3_8_5_6_5_5(X010, X01110000, vM, X011110, vN, vD);
5256 break;
5258 case ARM64vecb_PMUL8x16:
5259 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X100111, vN, vD);
5260 break;
5262 case ARM64vecb_PMULL8x8:
5263 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X111000, vN, vD);
5264 break;
5266 case ARM64vecb_UMULL2DSS:
5267 *p++ = X_3_8_5_6_5_5(X001, X01110101, vM, X110000, vN, vD);
5268 break;
5269 case ARM64vecb_UMULL4SHH:
5270 *p++ = X_3_8_5_6_5_5(X001, X01110011, vM, X110000, vN, vD);
5271 break;
5272 case ARM64vecb_UMULL8HBB:
5273 *p++ = X_3_8_5_6_5_5(X001, X01110001, vM, X110000, vN, vD);
5274 break;
5276 case ARM64vecb_SMULL2DSS:
5277 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110000, vN, vD);
5278 break;
5279 case ARM64vecb_SMULL4SHH:
5280 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110000, vN, vD);
5281 break;
5282 case ARM64vecb_SMULL8HBB:
5283 *p++ = X_3_8_5_6_5_5(X000, X01110001, vM, X110000, vN, vD);
5284 break;
5286 case ARM64vecb_SQADD64x2:
5287 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X000011, vN, vD);
5288 break;
5289 case ARM64vecb_SQADD32x4:
5290 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X000011, vN, vD);
5291 break;
5292 case ARM64vecb_SQADD16x8:
5293 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X000011, vN, vD);
5294 break;
5295 case ARM64vecb_SQADD8x16:
5296 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X000011, vN, vD);
5297 break;
5299 case ARM64vecb_UQADD64x2:
5300 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X000011, vN, vD);
5301 break;
5302 case ARM64vecb_UQADD32x4:
5303 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X000011, vN, vD);
5304 break;
5305 case ARM64vecb_UQADD16x8:
5306 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X000011, vN, vD);
5307 break;
5308 case ARM64vecb_UQADD8x16:
5309 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X000011, vN, vD);
5310 break;
5312 case ARM64vecb_SQSUB64x2:
5313 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X001011, vN, vD);
5314 break;
5315 case ARM64vecb_SQSUB32x4:
5316 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X001011, vN, vD);
5317 break;
5318 case ARM64vecb_SQSUB16x8:
5319 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X001011, vN, vD);
5320 break;
5321 case ARM64vecb_SQSUB8x16:
5322 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X001011, vN, vD);
5323 break;
5325 case ARM64vecb_UQSUB64x2:
5326 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X001011, vN, vD);
5327 break;
5328 case ARM64vecb_UQSUB32x4:
5329 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X001011, vN, vD);
5330 break;
5331 case ARM64vecb_UQSUB16x8:
5332 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X001011, vN, vD);
5333 break;
5334 case ARM64vecb_UQSUB8x16:
5335 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X001011, vN, vD);
5336 break;
5338 case ARM64vecb_SQDMULL2DSS:
5339 *p++ = X_3_8_5_6_5_5(X000, X01110101, vM, X110100, vN, vD);
5340 break;
5341 case ARM64vecb_SQDMULL4SHH:
5342 *p++ = X_3_8_5_6_5_5(X000, X01110011, vM, X110100, vN, vD);
5343 break;
5345 case ARM64vecb_SQDMULH32x4:
5346 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X101101, vN, vD);
5347 break;
5348 case ARM64vecb_SQDMULH16x8:
5349 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X101101, vN, vD);
5350 break;
5351 case ARM64vecb_SQRDMULH32x4:
5352 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X101101, vN, vD);
5353 break;
5354 case ARM64vecb_SQRDMULH16x8:
5355 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X101101, vN, vD);
5356 break;
5358 case ARM64vecb_SQSHL64x2:
5359 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010011, vN, vD);
5360 break;
5361 case ARM64vecb_SQSHL32x4:
5362 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010011, vN, vD);
5363 break;
5364 case ARM64vecb_SQSHL16x8:
5365 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010011, vN, vD);
5366 break;
5367 case ARM64vecb_SQSHL8x16:
5368 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010011, vN, vD);
5369 break;
5371 case ARM64vecb_SQRSHL64x2:
5372 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010111, vN, vD);
5373 break;
5374 case ARM64vecb_SQRSHL32x4:
5375 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010111, vN, vD);
5376 break;
5377 case ARM64vecb_SQRSHL16x8:
5378 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010111, vN, vD);
5379 break;
5380 case ARM64vecb_SQRSHL8x16:
5381 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010111, vN, vD);
5382 break;
5384 case ARM64vecb_UQSHL64x2:
5385 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010011, vN, vD);
5386 break;
5387 case ARM64vecb_UQSHL32x4:
5388 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010011, vN, vD);
5389 break;
5390 case ARM64vecb_UQSHL16x8:
5391 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010011, vN, vD);
5392 break;
5393 case ARM64vecb_UQSHL8x16:
5394 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010011, vN, vD);
5395 break;
5397 case ARM64vecb_UQRSHL64x2:
5398 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010111, vN, vD);
5399 break;
5400 case ARM64vecb_UQRSHL32x4:
5401 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010111, vN, vD);
5402 break;
5403 case ARM64vecb_UQRSHL16x8:
5404 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010111, vN, vD);
5405 break;
5406 case ARM64vecb_UQRSHL8x16:
5407 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010111, vN, vD);
5408 break;
5410 case ARM64vecb_SSHL64x2:
5411 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010001, vN, vD);
5412 break;
5413 case ARM64vecb_SSHL32x4:
5414 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010001, vN, vD);
5415 break;
5416 case ARM64vecb_SSHL16x8:
5417 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010001, vN, vD);
5418 break;
5419 case ARM64vecb_SSHL8x16:
5420 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010001, vN, vD);
5421 break;
5423 case ARM64vecb_SRSHL64x2:
5424 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X010101, vN, vD);
5425 break;
5426 case ARM64vecb_SRSHL32x4:
5427 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X010101, vN, vD);
5428 break;
5429 case ARM64vecb_SRSHL16x8:
5430 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X010101, vN, vD);
5431 break;
5432 case ARM64vecb_SRSHL8x16:
5433 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X010101, vN, vD);
5434 break;
5436 case ARM64vecb_USHL64x2:
5437 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010001, vN, vD);
5438 break;
5439 case ARM64vecb_USHL32x4:
5440 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010001, vN, vD);
5441 break;
5442 case ARM64vecb_USHL16x8:
5443 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010001, vN, vD);
5444 break;
5445 case ARM64vecb_USHL8x16:
5446 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010001, vN, vD);
5447 break;
5449 case ARM64vecb_URSHL64x2:
5450 *p++ = X_3_8_5_6_5_5(X011, X01110111, vM, X010101, vN, vD);
5451 break;
5452 case ARM64vecb_URSHL32x4:
5453 *p++ = X_3_8_5_6_5_5(X011, X01110101, vM, X010101, vN, vD);
5454 break;
5455 case ARM64vecb_URSHL16x8:
5456 *p++ = X_3_8_5_6_5_5(X011, X01110011, vM, X010101, vN, vD);
5457 break;
5458 case ARM64vecb_URSHL8x16:
5459 *p++ = X_3_8_5_6_5_5(X011, X01110001, vM, X010101, vN, vD);
5460 break;
5462 case ARM64vecb_FRECPS64x2:
5463 *p++ = X_3_8_5_6_5_5(X010, X01110011, vM, X111111, vN, vD);
5464 break;
5465 case ARM64vecb_FRECPS32x4:
5466 *p++ = X_3_8_5_6_5_5(X010, X01110001, vM, X111111, vN, vD);
5467 break;
5468 case ARM64vecb_FRSQRTS64x2:
5469 *p++ = X_3_8_5_6_5_5(X010, X01110111, vM, X111111, vN, vD);
5470 break;
5471 case ARM64vecb_FRSQRTS32x4:
5472 *p++ = X_3_8_5_6_5_5(X010, X01110101, vM, X111111, vN, vD);
5473 break;
5475 default:
5476 goto bad;
5478 goto done;
5480 case ARM64in_VModifyV: {
5481 /* 31 23 20 15 9 4
5482 010 01110 sz 1 00000 001110 n d SUQADD@sz Vd, Vn
5483 011 01110 sz 1 00000 001110 n d USQADD@sz Vd, Vn
5485 UInt vD = qregEnc(i->ARM64in.VModifyV.mod);
5486 UInt vN = qregEnc(i->ARM64in.VModifyV.arg);
5487 switch (i->ARM64in.VModifyV.op) {
5488 case ARM64vecmo_SUQADD64x2:
5489 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X001110, vN, vD);
5490 break;
5491 case ARM64vecmo_SUQADD32x4:
5492 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X001110, vN, vD);
5493 break;
5494 case ARM64vecmo_SUQADD16x8:
5495 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X001110, vN, vD);
5496 break;
5497 case ARM64vecmo_SUQADD8x16:
5498 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X001110, vN, vD);
5499 break;
5500 case ARM64vecmo_USQADD64x2:
5501 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X001110, vN, vD);
5502 break;
5503 case ARM64vecmo_USQADD32x4:
5504 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X001110, vN, vD);
5505 break;
5506 case ARM64vecmo_USQADD16x8:
5507 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X001110, vN, vD);
5508 break;
5509 case ARM64vecmo_USQADD8x16:
5510 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X001110, vN, vD);
5511 break;
5512 default:
5513 goto bad;
5515 goto done;
5517 case ARM64in_VUnaryV: {
5518 /* 31 23 20 15 9 4
5519 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d
5520 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s
5521 010 01110 11 1 11000 111110 n d FABS Vd.8h, Vn.8h
5522 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d
5523 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s
5524 011 01110 11 1 11000 111110 n d FNEG Vd.8h, Vn.8h
5525 011 01110 00 1 00000 010110 n d NOT Vd.16b, Vn.16b
5527 010 01110 11 1 00000 101110 n d ABS Vd.2d, Vn.2d
5528 010 01110 10 1 00000 101110 n d ABS Vd.4s, Vn.4s
5529 010 01110 01 1 00000 101110 n d ABS Vd.8h, Vn.8h
5530 010 01110 00 1 00000 101110 n d ABS Vd.16b, Vn.16b
5532 010 01110 10 1 00000 010010 n d CLS Vd.4s, Vn.4s
5533 010 01110 01 1 00000 010010 n d CLS Vd.8h, Vn.8h
5534 010 01110 00 1 00000 010010 n d CLS Vd.16b, Vn.16b
5536 011 01110 10 1 00000 010010 n d CLZ Vd.4s, Vn.4s
5537 011 01110 01 1 00000 010010 n d CLZ Vd.8h, Vn.8h
5538 011 01110 00 1 00000 010010 n d CLZ Vd.16b, Vn.16b
5540 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b
5542 011 01110 01 1 00000 010110 n d RBIT Vd.16b, Vn.16b
5543 010 01110 00 1 00000 000110 n d REV16 Vd.16b, Vn.16b
5544 011 01110 00 1 00000 000010 n d REV32 Vd.16b, Vn.16b
5545 011 01110 01 1 00000 000010 n d REV32 Vd.8h, Vn.8h
5547 010 01110 00 1 00000 000010 n d REV64 Vd.16b, Vn.16b
5548 010 01110 01 1 00000 000010 n d REV64 Vd.8h, Vn.8h
5549 010 01110 10 1 00000 000010 n d REV64 Vd.4s, Vn.4s
5551 010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s
5552 011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s
5554 010 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
5555 010 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
5557 011 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
5558 011 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
5560 011 01110 11 1 00001 111110 n d FSQRT Vd.2d, Vn.2d
5561 011 01110 10 1 00001 111110 n d FSQRT Vd.4s, Vn.4s
5562 011 01110 11 1 11001 111110 n d FSQRT Vd.8h, Vn.8h
5564 UInt vD = qregEnc(i->ARM64in.VUnaryV.dst);
5565 UInt vN = qregEnc(i->ARM64in.VUnaryV.arg);
5566 switch (i->ARM64in.VUnaryV.op) {
5567 case ARM64vecu_FABS16x8:
5568 *p++ = X_3_8_5_6_5_5(X010, X01110111, X11000, X111110, vN, vD);
5569 break;
5570 case ARM64vecu_FABS64x2:
5571 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X111110, vN, vD);
5572 break;
5573 case ARM64vecu_FABS32x4:
5574 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X111110, vN, vD);
5575 break;
5576 case ARM64vecu_FNEG16x8:
5577 *p++ = X_3_8_5_6_5_5(X011, X01110111, X11000, X111110, vN, vD);
5578 break;
5579 case ARM64vecu_FNEG64x2:
5580 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00000, X111110, vN, vD);
5581 break;
5582 case ARM64vecu_FNEG32x4:
5583 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X111110, vN, vD);
5584 break;
5585 case ARM64vecu_NOT:
5586 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010110, vN, vD);
5587 break;
5588 case ARM64vecu_ABS64x2:
5589 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00000, X101110, vN, vD);
5590 break;
5591 case ARM64vecu_ABS32x4:
5592 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X101110, vN, vD);
5593 break;
5594 case ARM64vecu_ABS16x8:
5595 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X101110, vN, vD);
5596 break;
5597 case ARM64vecu_ABS8x16:
5598 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X101110, vN, vD);
5599 break;
5600 case ARM64vecu_CLS32x4:
5601 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X010010, vN, vD);
5602 break;
5603 case ARM64vecu_CLS16x8:
5604 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X010010, vN, vD);
5605 break;
5606 case ARM64vecu_CLS8x16:
5607 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010010, vN, vD);
5608 break;
5609 case ARM64vecu_CLZ32x4:
5610 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00000, X010010, vN, vD);
5611 break;
5612 case ARM64vecu_CLZ16x8:
5613 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010010, vN, vD);
5614 break;
5615 case ARM64vecu_CLZ8x16:
5616 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X010010, vN, vD);
5617 break;
5618 case ARM64vecu_CNT8x16:
5619 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X010110, vN, vD);
5620 break;
5621 case ARM64vecu_RBIT:
5622 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X010110, vN, vD);
5623 break;
5624 case ARM64vecu_REV1616B:
5625 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000110, vN, vD);
5626 break;
5627 case ARM64vecu_REV3216B:
5628 *p++ = X_3_8_5_6_5_5(X011, X01110001, X00000, X000010, vN, vD);
5629 break;
5630 case ARM64vecu_REV328H:
5631 *p++ = X_3_8_5_6_5_5(X011, X01110011, X00000, X000010, vN, vD);
5632 break;
5633 case ARM64vecu_REV6416B:
5634 *p++ = X_3_8_5_6_5_5(X010, X01110001, X00000, X000010, vN, vD);
5635 break;
5636 case ARM64vecu_REV648H:
5637 *p++ = X_3_8_5_6_5_5(X010, X01110011, X00000, X000010, vN, vD);
5638 break;
5639 case ARM64vecu_REV644S:
5640 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00000, X000010, vN, vD);
5641 break;
5642 case ARM64vecu_URECPE32x4:
5643 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110010, vN, vD);
5644 break;
5645 case ARM64vecu_URSQRTE32x4:
5646 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110010, vN, vD);
5647 break;
5648 case ARM64vecu_FRECPE64x2:
5649 *p++ = X_3_8_5_6_5_5(X010, X01110111, X00001, X110110, vN, vD);
5650 break;
5651 case ARM64vecu_FRECPE32x4:
5652 *p++ = X_3_8_5_6_5_5(X010, X01110101, X00001, X110110, vN, vD);
5653 break;
5654 case ARM64vecu_FRSQRTE64x2:
5655 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X110110, vN, vD);
5656 break;
5657 case ARM64vecu_FRSQRTE32x4:
5658 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X110110, vN, vD);
5659 break;
5660 case ARM64vecu_FSQRT64x2:
5661 *p++ = X_3_8_5_6_5_5(X011, X01110111, X00001, X111110, vN, vD);
5662 break;
5663 case ARM64vecu_FSQRT32x4:
5664 *p++ = X_3_8_5_6_5_5(X011, X01110101, X00001, X111110, vN, vD);
5665 break;
5666 case ARM64vecu_FSQRT16x8:
5667 *p++ = X_3_8_5_6_5_5(X011, X01110111, X11001, X111110, vN, vD);
5668 break;
5669 default:
5670 goto bad;
5672 goto done;
5674 case ARM64in_VNarrowV: {
5675 /* 31 23 21 15 9 4
5676 000 01110 00 1,00001 001010 n d XTN Vd.8b, Vn.8h
5677 000 01110 01 1,00001 001010 n d XTN Vd.4h, Vn.4s
5678 000 01110 10 1,00001 001010 n d XTN Vd.2s, Vn.2d
5680 001 01110 00 1,00001 001010 n d SQXTUN Vd.8b, Vn.8h
5681 001 01110 01 1,00001 001010 n d SQXTUN Vd.4h, Vn.4s
5682 001 01110 10 1,00001 001010 n d SQXTUN Vd.2s, Vn.2d
5684 000 01110 00 1,00001 010010 n d SQXTN Vd.8b, Vn.8h
5685 000 01110 01 1,00001 010010 n d SQXTN Vd.4h, Vn.4s
5686 000 01110 10 1,00001 010010 n d SQXTN Vd.2s, Vn.2d
5688 001 01110 00 1,00001 010010 n d UQXTN Vd.8b, Vn.8h
5689 001 01110 01 1,00001 010010 n d UQXTN Vd.4h, Vn.4s
5690 001 01110 10 1,00001 010010 n d UQXTN Vd.2s, Vn.2d
5692 UInt vD = qregEnc(i->ARM64in.VNarrowV.dst);
5693 UInt vN = qregEnc(i->ARM64in.VNarrowV.src);
5694 UInt dszBlg2 = i->ARM64in.VNarrowV.dszBlg2;
5695 vassert(dszBlg2 >= 0 && dszBlg2 <= 2);
5696 switch (i->ARM64in.VNarrowV.op) {
5697 case ARM64vecna_XTN:
5698 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5699 X00001, X001010, vN, vD);
5700 goto done;
5701 case ARM64vecna_SQXTUN:
5702 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5703 X00001, X001010, vN, vD);
5704 goto done;
5705 case ARM64vecna_SQXTN:
5706 *p++ = X_3_8_5_6_5_5(X000, X01110001 | (dszBlg2 << 1),
5707 X00001, X010010, vN, vD);
5708 goto done;
5709 case ARM64vecna_UQXTN:
5710 *p++ = X_3_8_5_6_5_5(X001, X01110001 | (dszBlg2 << 1),
5711 X00001, X010010, vN, vD);
5712 goto done;
5713 default:
5714 break;
5716 goto bad;
5718 case ARM64in_VShiftImmV: {
5720 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
5721 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
5723 001 011110 immh immb 100101 n d UQSHRN ,,#sh
5724 000 011110 immh immb 100101 n d SQSHRN ,,#sh
5725 001 011110 immh immb 100001 n d SQSHRUN ,,#sh
5727 001 011110 immh immb 100111 n d UQRSHRN ,,#sh
5728 000 011110 immh immb 100111 n d SQRSHRN ,,#sh
5729 001 011110 immh immb 100011 n d SQRSHRUN ,,#sh
5731 where immh:immb
5732 = case T of
5733 2d | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
5734 4s | sh in 1..32 -> let xxxxx = 32-sh in 01xx:xxx
5735 8h | sh in 1..16 -> let xxxx = 16-sh in 001x:xxx
5736 16b | sh in 1..8 -> let xxx = 8-sh in 0001:xxx
5738 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
5740 011 011110 immh immb 011101 n d UQSHL Vd.T, Vn.T, #sh
5741 010 011110 immh immb 011101 n d SQSHL Vd.T, Vn.T, #sh
5742 011 011110 immh immb 011001 n d SQSHLU Vd.T, Vn.T, #sh
5744 where immh:immb
5745 = case T of
5746 2d | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
5747 4s | sh in 0..31 -> let xxxxx = sh in 01xx:xxx
5748 8h | sh in 0..15 -> let xxxx = sh in 001x:xxx
5749 16b | sh in 0..7 -> let xxx = sh in 0001:xxx
5751 UInt vD = qregEnc(i->ARM64in.VShiftImmV.dst);
5752 UInt vN = qregEnc(i->ARM64in.VShiftImmV.src);
5753 UInt sh = i->ARM64in.VShiftImmV.amt;
5754 UInt tmpl = 0; /* invalid */
5756 const UInt tmpl_USHR
5757 = X_3_6_7_6_5_5(X011, X011110, 0, X000001, vN, vD);
5758 const UInt tmpl_SSHR
5759 = X_3_6_7_6_5_5(X010, X011110, 0, X000001, vN, vD);
5761 const UInt tmpl_UQSHRN
5762 = X_3_6_7_6_5_5(X001, X011110, 0, X100101, vN, vD);
5763 const UInt tmpl_SQSHRN
5764 = X_3_6_7_6_5_5(X000, X011110, 0, X100101, vN, vD);
5765 const UInt tmpl_SQSHRUN
5766 = X_3_6_7_6_5_5(X001, X011110, 0, X100001, vN, vD);
5768 const UInt tmpl_UQRSHRN
5769 = X_3_6_7_6_5_5(X001, X011110, 0, X100111, vN, vD);
5770 const UInt tmpl_SQRSHRN
5771 = X_3_6_7_6_5_5(X000, X011110, 0, X100111, vN, vD);
5772 const UInt tmpl_SQRSHRUN
5773 = X_3_6_7_6_5_5(X001, X011110, 0, X100011, vN, vD);
5775 const UInt tmpl_SHL
5776 = X_3_6_7_6_5_5(X010, X011110, 0, X010101, vN, vD);
5778 const UInt tmpl_UQSHL
5779 = X_3_6_7_6_5_5(X011, X011110, 0, X011101, vN, vD);
5780 const UInt tmpl_SQSHL
5781 = X_3_6_7_6_5_5(X010, X011110, 0, X011101, vN, vD);
5782 const UInt tmpl_SQSHLU
5783 = X_3_6_7_6_5_5(X011, X011110, 0, X011001, vN, vD);
5785 switch (i->ARM64in.VShiftImmV.op) {
5786 case ARM64vecshi_SSHR64x2: tmpl = tmpl_SSHR; goto right64x2;
5787 case ARM64vecshi_USHR64x2: tmpl = tmpl_USHR; goto right64x2;
5788 case ARM64vecshi_SHL64x2: tmpl = tmpl_SHL; goto left64x2;
5789 case ARM64vecshi_UQSHL64x2: tmpl = tmpl_UQSHL; goto left64x2;
5790 case ARM64vecshi_SQSHL64x2: tmpl = tmpl_SQSHL; goto left64x2;
5791 case ARM64vecshi_SQSHLU64x2: tmpl = tmpl_SQSHLU; goto left64x2;
5792 case ARM64vecshi_SSHR32x4: tmpl = tmpl_SSHR; goto right32x4;
5793 case ARM64vecshi_USHR32x4: tmpl = tmpl_USHR; goto right32x4;
5794 case ARM64vecshi_UQSHRN2SD: tmpl = tmpl_UQSHRN; goto right32x4;
5795 case ARM64vecshi_SQSHRN2SD: tmpl = tmpl_SQSHRN; goto right32x4;
5796 case ARM64vecshi_SQSHRUN2SD: tmpl = tmpl_SQSHRUN; goto right32x4;
5797 case ARM64vecshi_UQRSHRN2SD: tmpl = tmpl_UQRSHRN; goto right32x4;
5798 case ARM64vecshi_SQRSHRN2SD: tmpl = tmpl_SQRSHRN; goto right32x4;
5799 case ARM64vecshi_SQRSHRUN2SD: tmpl = tmpl_SQRSHRUN; goto right32x4;
5800 case ARM64vecshi_SHL32x4: tmpl = tmpl_SHL; goto left32x4;
5801 case ARM64vecshi_UQSHL32x4: tmpl = tmpl_UQSHL; goto left32x4;
5802 case ARM64vecshi_SQSHL32x4: tmpl = tmpl_SQSHL; goto left32x4;
5803 case ARM64vecshi_SQSHLU32x4: tmpl = tmpl_SQSHLU; goto left32x4;
5804 case ARM64vecshi_SSHR16x8: tmpl = tmpl_SSHR; goto right16x8;
5805 case ARM64vecshi_USHR16x8: tmpl = tmpl_USHR; goto right16x8;
5806 case ARM64vecshi_UQSHRN4HS: tmpl = tmpl_UQSHRN; goto right16x8;
5807 case ARM64vecshi_SQSHRN4HS: tmpl = tmpl_SQSHRN; goto right16x8;
5808 case ARM64vecshi_SQSHRUN4HS: tmpl = tmpl_SQSHRUN; goto right16x8;
5809 case ARM64vecshi_UQRSHRN4HS: tmpl = tmpl_UQRSHRN; goto right16x8;
5810 case ARM64vecshi_SQRSHRN4HS: tmpl = tmpl_SQRSHRN; goto right16x8;
5811 case ARM64vecshi_SQRSHRUN4HS: tmpl = tmpl_SQRSHRUN; goto right16x8;
5812 case ARM64vecshi_SHL16x8: tmpl = tmpl_SHL; goto left16x8;
5813 case ARM64vecshi_UQSHL16x8: tmpl = tmpl_UQSHL; goto left16x8;
5814 case ARM64vecshi_SQSHL16x8: tmpl = tmpl_SQSHL; goto left16x8;
5815 case ARM64vecshi_SQSHLU16x8: tmpl = tmpl_SQSHLU; goto left16x8;
5816 case ARM64vecshi_SSHR8x16: tmpl = tmpl_SSHR; goto right8x16;
5817 case ARM64vecshi_USHR8x16: tmpl = tmpl_USHR; goto right8x16;
5818 case ARM64vecshi_UQSHRN8BH: tmpl = tmpl_UQSHRN; goto right8x16;
5819 case ARM64vecshi_SQSHRN8BH: tmpl = tmpl_SQSHRN; goto right8x16;
5820 case ARM64vecshi_SQSHRUN8BH: tmpl = tmpl_SQSHRUN; goto right8x16;
5821 case ARM64vecshi_UQRSHRN8BH: tmpl = tmpl_UQRSHRN; goto right8x16;
5822 case ARM64vecshi_SQRSHRN8BH: tmpl = tmpl_SQRSHRN; goto right8x16;
5823 case ARM64vecshi_SQRSHRUN8BH: tmpl = tmpl_SQRSHRUN; goto right8x16;
5824 case ARM64vecshi_SHL8x16: tmpl = tmpl_SHL; goto left8x16;
5825 case ARM64vecshi_UQSHL8x16: tmpl = tmpl_UQSHL; goto left8x16;
5826 case ARM64vecshi_SQSHL8x16: tmpl = tmpl_SQSHL; goto left8x16;
5827 case ARM64vecshi_SQSHLU8x16: tmpl = tmpl_SQSHLU; goto left8x16;
5829 default: break;
5831 right64x2:
5832 if (sh >= 1 && sh <= 63) {
5833 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | (64-sh), 0,0,0);
5834 goto done;
5836 break;
5837 right32x4:
5838 if (sh >= 1 && sh <= 32) {
5839 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | (32-sh), 0,0,0);
5840 goto done;
5842 break;
5843 right16x8:
5844 if (sh >= 1 && sh <= 16) {
5845 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | (16-sh), 0,0,0);
5846 goto done;
5848 break;
5849 right8x16:
5850 if (sh >= 1 && sh <= 8) {
5851 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | (8-sh), 0,0,0);
5852 goto done;
5854 break;
5856 left64x2:
5857 if (sh >= 0 && sh <= 63) {
5858 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X1000000 | sh, 0,0,0);
5859 goto done;
5861 break;
5862 left32x4:
5863 if (sh >= 0 && sh <= 31) {
5864 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0100000 | sh, 0,0,0);
5865 goto done;
5867 break;
5868 left16x8:
5869 if (sh >= 0 && sh <= 15) {
5870 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0010000 | sh, 0,0,0);
5871 goto done;
5873 break;
5874 left8x16:
5875 if (sh >= 0 && sh <= 7) {
5876 *p++ = tmpl | X_3_6_7_6_5_5(0,0, X0001000 | sh, 0,0,0);
5877 goto done;
5879 break;
5881 goto bad;
5883 case ARM64in_VExtV: {
5885 011 01110 000 m 0 imm4 0 n d EXT Vd.16b, Vn.16b, Vm.16b, #imm4
5886 where imm4 = the shift amount, in bytes,
5887 Vn is low operand, Vm is high operand
5889 UInt vD = qregEnc(i->ARM64in.VExtV.dst);
5890 UInt vN = qregEnc(i->ARM64in.VExtV.srcLo);
5891 UInt vM = qregEnc(i->ARM64in.VExtV.srcHi);
5892 UInt imm4 = i->ARM64in.VExtV.amtB;
5893 vassert(imm4 >= 1 && imm4 <= 15);
5894 *p++ = X_3_8_5_6_5_5(X011, X01110000, vM,
5895 X000000 | (imm4 << 1), vN, vD);
5896 goto done;
5898 case ARM64in_VImmQ: {
5899 UInt rQ = qregEnc(i->ARM64in.VImmQ.rQ);
5900 UShort imm = i->ARM64in.VImmQ.imm;
5901 vassert(rQ < 32);
5902 switch (imm) {
5903 case 0x0000:
5904 // movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ
5905 *p++ = 0x4F000400 | rQ;
5906 goto done;
5907 case 0x0001:
5908 // movi rQ, #0xFF == 0x2F 0x00 0xE4 001 rQ
5909 *p++ = 0x2F00E420 | rQ;
5910 goto done;
5911 case 0x0003:
5912 // movi rQ, #0xFFFF == 0x2F 0x00 0xE4 011 rQ
5913 *p++ = 0x2F00E460 | rQ;
5914 goto done;
5915 case 0x000F:
5916 // movi rQ, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rQ
5917 *p++ = 0x2F00E5E0 | rQ;
5918 goto done;
5919 case 0x003F:
5920 // movi rQ, #0xFFFFFFFFFFFF == 0x2F 0x01 0xE7 111 rQ
5921 *p++ = 0x2F01E7E0 | rQ;
5922 goto done;
5923 case 0x00FF:
5924 // movi rQ, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rQ
5925 *p++ = 0x2F07E7E0 | rQ;
5926 goto done;
5927 case 0xFFFF:
5928 // mvni rQ.4s, #0x0 == 0x6F 0x00 0x04 000 rQ
5929 *p++ = 0x6F000400 | rQ;
5930 goto done;
5931 default:
5932 break;
5934 goto bad; /* no other handled cases right now */
5937 case ARM64in_VDfromX: {
5938 /* INS Vd.D[0], rX
5939 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5940 This isn't wonderful, in the sense that the upper half of
5941 the vector register stays unchanged and thus the insn is
5942 data dependent on its output register. */
5943 UInt dd = dregEnc(i->ARM64in.VDfromX.rD);
5944 UInt xx = iregEnc(i->ARM64in.VDfromX.rX);
5945 vassert(xx < 31);
5946 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5947 goto done;
5950 case ARM64in_VQfromX: {
5951 /* FMOV D, X
5952 1001 1110 0110 0111 0000 00 nn dd FMOV Vd.D[0], Xn
5953 I think this zeroes out the top half of the destination, which
5954 is what we need. TODO: can we do VDfromX and VQfromXX better? */
5955 UInt dd = qregEnc(i->ARM64in.VQfromX.rQ);
5956 UInt xx = iregEnc(i->ARM64in.VQfromX.rXlo);
5957 vassert(xx < 31);
5958 *p++ = 0x9E670000 | X_2_6_2_12_5_5(0,0,0,0,xx,dd);
5959 goto done;
5962 case ARM64in_VQfromXX: {
5963 /* What we really generate is a two insn sequence:
5964 INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
5965 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5966 0100 1110 0001 1000 0001 11 nn dd INS Vd.D[1], Xn
5968 UInt qq = qregEnc(i->ARM64in.VQfromXX.rQ);
5969 UInt xhi = iregEnc(i->ARM64in.VQfromXX.rXhi);
5970 UInt xlo = iregEnc(i->ARM64in.VQfromXX.rXlo);
5971 vassert(xhi < 31 && xlo < 31);
5972 *p++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo,qq);
5973 *p++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi,qq);
5974 goto done;
5977 case ARM64in_VXfromQ: {
5978 /* 010 0111 0000 01000 001111 nn dd UMOV Xd, Vn.D[0]
5979 010 0111 0000 11000 001111 nn dd UMOV Xd, Vn.D[1]
5981 UInt dd = iregEnc(i->ARM64in.VXfromQ.rX);
5982 UInt nn = qregEnc(i->ARM64in.VXfromQ.rQ);
5983 UInt laneNo = i->ARM64in.VXfromQ.laneNo;
5984 vassert(dd < 31);
5985 vassert(laneNo < 2);
5986 *p++ = X_3_8_5_6_5_5(X010, X01110000,
5987 laneNo == 1 ? X11000 : X01000, X001111, nn, dd);
5988 goto done;
5991 case ARM64in_VXfromDorS: {
5992 /* 000 11110001 00110 000000 n d FMOV Wd, Sn
5993 100 11110011 00110 000000 n d FMOV Xd, Dn
5995 UInt dd = iregEnc(i->ARM64in.VXfromDorS.rX);
5996 UInt nn = dregEnc(i->ARM64in.VXfromDorS.rDorS);
5997 Bool fromD = i->ARM64in.VXfromDorS.fromD;
5998 vassert(dd < 31);
5999 *p++ = X_3_8_5_6_5_5(fromD ? X100 : X000,
6000 fromD ? X11110011 : X11110001,
6001 X00110, X000000, nn, dd);
6002 goto done;
6005 case ARM64in_VMov: {
6006 /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
6007 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
6008 010 01110 10 1 n 0 00111 n d MOV Vd.16b, Vn.16b
6010 HReg rD = i->ARM64in.VMov.dst;
6011 HReg rN = i->ARM64in.VMov.src;
6012 switch (i->ARM64in.VMov.szB) {
6013 case 16: {
6014 UInt dd = qregEnc(rD);
6015 UInt nn = qregEnc(rN);
6016 *p++ = X_3_8_5_6_5_5(X010, X01110101, nn, X000111, nn, dd);
6017 goto done;
6019 case 8: {
6020 UInt dd = dregEnc(rD);
6021 UInt nn = dregEnc(rN);
6022 *p++ = X_3_8_5_6_5_5(X000, X11110011, X00000, X010000, nn, dd);
6023 goto done;
6025 default:
6026 break;
6028 goto bad;
6031 case ARM64in_EvCheck: {
6032 /* The sequence is fixed (canned) except for the two amodes
6033 supplied by the insn. These don't change the length, though.
6034 We generate:
6035 ldr w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
6036 subs w9, w9, #1
6037 str w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
6038 bpl nofail
6039 ldr x9, [x21 + #0] 0 == offsetof(host_EvC_FAILADDR)
6040 br x9
6041 nofail:
6043 UInt* p0 = p;
6044 p = do_load_or_store32(p, True/*isLoad*/, /*w*/9,
6045 i->ARM64in.EvCheck.amCounter);
6046 *p++ = 0x71000529; /* subs w9, w9, #1 */
6047 p = do_load_or_store32(p, False/*!isLoad*/, /*w*/9,
6048 i->ARM64in.EvCheck.amCounter);
6049 *p++ = 0x54000065; /* bpl nofail */
6050 p = do_load_or_store64(p, True/*isLoad*/, /*x*/9,
6051 i->ARM64in.EvCheck.amFailAddr);
6052 *p++ = 0xD61F0120; /* br x9 */
6053 /* nofail: */
6055 /* Crosscheck */
6056 vassert(evCheckSzB_ARM64() == (UChar*)p - (UChar*)p0);
6057 goto done;
6060 case ARM64in_ProfInc: {
6061 /* We generate:
6062 (ctrP is unknown now, so use 0x6555'7555'8555'9566 in the
6063 expectation that a later call to LibVEX_patchProfCtr
6064 will be used to fill in the immediate fields once the
6065 right value is known.)
6066 imm64-exactly4 x9, 0x6555'7555'8555'9566
6067 ldr x8, [x9]
6068 add x8, x8, #1
6069 str x8, [x9]
6071 p = imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL);
6072 *p++ = 0xF9400128;
6073 *p++ = 0x91000508;
6074 *p++ = 0xF9000128;
6075 /* Tell the caller .. */
6076 vassert(!(*is_profInc));
6077 *is_profInc = True;
6078 goto done;
6081 /* ... */
6082 default:
6083 goto bad;
6086 bad:
6087 ppARM64Instr(i);
6088 vpanic("emit_ARM64Instr");
6089 /*NOTREACHED*/
6091 done:
6092 vassert(((UChar*)p) - &buf[0] <= 40);
6093 return ((UChar*)p) - &buf[0];
6097 /* How big is an event check? See case for ARM64in_EvCheck in
6098 emit_ARM64Instr just above. That crosschecks what this returns, so
6099 we can tell if we're inconsistent. */
6100 Int evCheckSzB_ARM64 (void)
6102 return 24;
6106 /* NB: what goes on here has to be very closely coordinated with the
6107 emitInstr case for XDirect, above. */
6108 VexInvalRange chainXDirect_ARM64 ( VexEndness endness_host,
6109 void* place_to_chain,
6110 const void* disp_cp_chain_me_EXPECTED,
6111 const void* place_to_jump_to )
6113 vassert(endness_host == VexEndnessLE);
6115 /* What we're expecting to see is:
6116 movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
6117 movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
6118 movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
6119 movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
6120 blr x9
6122 <16 bytes generated by imm64_to_ireg_EXACTLY4>
6123 D6 3F 01 20
6125 UInt* p = (UInt*)place_to_chain;
6126 vassert(0 == (3 & (HWord)p));
6127 vassert(is_imm64_to_ireg_EXACTLY4(
6128 p, /*x*/9, (Addr)disp_cp_chain_me_EXPECTED));
6129 vassert(p[4] == 0xD63F0120);
6131 /* And what we want to change it to is:
6132 movw x9, place_to_jump_to[15:0]
6133 movk x9, place_to_jump_to[31:15], lsl 16
6134 movk x9, place_to_jump_to[47:32], lsl 32
6135 movk x9, place_to_jump_to[63:48], lsl 48
6136 br x9
6138 <16 bytes generated by imm64_to_ireg_EXACTLY4>
6139 D6 1F 01 20
6141 The replacement has the same length as the original.
6143 (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)place_to_jump_to);
6144 p[4] = 0xD61F0120;
6146 VexInvalRange vir = {(HWord)p, 20};
6147 return vir;
6151 /* NB: what goes on here has to be very closely coordinated with the
6152 emitInstr case for XDirect, above. */
6153 VexInvalRange unchainXDirect_ARM64 ( VexEndness endness_host,
6154 void* place_to_unchain,
6155 const void* place_to_jump_to_EXPECTED,
6156 const void* disp_cp_chain_me )
6158 vassert(endness_host == VexEndnessLE);
6160 /* What we're expecting to see is:
6161 movw x9, place_to_jump_to_EXPECTED[15:0]
6162 movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
6163 movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
6164 movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
6165 br x9
6167 <16 bytes generated by imm64_to_ireg_EXACTLY4>
6168 D6 1F 01 20
6170 UInt* p = (UInt*)place_to_unchain;
6171 vassert(0 == (3 & (HWord)p));
6172 vassert(is_imm64_to_ireg_EXACTLY4(
6173 p, /*x*/9, (Addr)place_to_jump_to_EXPECTED));
6174 vassert(p[4] == 0xD61F0120);
6176 /* And what we want to change it to is:
6177 movw x9, disp_cp_chain_me_to[15:0]
6178 movk x9, disp_cp_chain_me_to[31:15], lsl 16
6179 movk x9, disp_cp_chain_me_to[47:32], lsl 32
6180 movk x9, disp_cp_chain_me_to[63:48], lsl 48
6181 blr x9
6183 <16 bytes generated by imm64_to_ireg_EXACTLY4>
6184 D6 3F 01 20
6186 (void)imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)disp_cp_chain_me);
6187 p[4] = 0xD63F0120;
6189 VexInvalRange vir = {(HWord)p, 20};
6190 return vir;
6194 /* Patch the counter address into a profile inc point, as previously
6195 created by the ARM64in_ProfInc case for emit_ARM64Instr. */
6196 VexInvalRange patchProfInc_ARM64 ( VexEndness endness_host,
6197 void* place_to_patch,
6198 const ULong* location_of_counter )
6200 vassert(sizeof(ULong*) == 8);
6201 vassert(endness_host == VexEndnessLE);
6202 UInt* p = (UInt*)place_to_patch;
6203 vassert(0 == (3 & (HWord)p));
6204 vassert(is_imm64_to_ireg_EXACTLY4(p, /*x*/9, 0x6555755585559566ULL));
6205 vassert(p[4] == 0xF9400128);
6206 vassert(p[5] == 0x91000508);
6207 vassert(p[6] == 0xF9000128);
6208 imm64_to_ireg_EXACTLY4(p, /*x*/9, (Addr)location_of_counter);
6209 VexInvalRange vir = {(HWord)p, 4*4};
6210 return vir;
6213 /*---------------------------------------------------------------*/
6214 /*--- end host_arm64_defs.c ---*/
6215 /*---------------------------------------------------------------*/