2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm64_defs.c ---*/
4 /*---------------------------------------------------------------*/
7 This file is part of Valgrind, a dynamic binary instrumentation
10 Copyright (C) 2013-2017 OpenWorks
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, write to the Free Software
25 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
28 The GNU General Public License is contained in the file COPYING.
31 #include "libvex_basictypes.h"
33 #include "libvex_trc_values.h"
35 #include "main_util.h"
36 #include "host_generic_regs.h"
37 #include "host_arm64_defs.h"
40 /* --------- Registers. --------- */
42 /* The usual HReg abstraction. We use the following classes only:
44 D regs (64 bit float, also used for 32 bit float)
45 Q regs (128 bit vector)
48 const RRegUniverse
* getRRegUniverse_ARM64 ( void )
50 /* The real-register universe is a big constant, so we just want to
51 initialise it once. */
52 static RRegUniverse rRegUniverse_ARM64
;
53 static Bool rRegUniverse_ARM64_initted
= False
;
55 /* Handy shorthand, nothing more */
56 RRegUniverse
* ru
= &rRegUniverse_ARM64
;
58 /* This isn't thread-safe. Sigh. */
59 if (LIKELY(rRegUniverse_ARM64_initted
))
62 RRegUniverse__init(ru
);
64 /* Add the registers. The initial segment of this array must be
65 those available for allocation by reg-alloc, and those that
66 follow are not available for allocation. */
67 ru
->allocable_start
[HRcInt64
] = ru
->size
;
68 ru
->regs
[ru
->size
++] = hregARM64_X22();
69 ru
->regs
[ru
->size
++] = hregARM64_X23();
70 ru
->regs
[ru
->size
++] = hregARM64_X24();
71 ru
->regs
[ru
->size
++] = hregARM64_X25();
72 ru
->regs
[ru
->size
++] = hregARM64_X26();
73 ru
->regs
[ru
->size
++] = hregARM64_X27();
74 ru
->regs
[ru
->size
++] = hregARM64_X28();
76 ru
->regs
[ru
->size
++] = hregARM64_X0();
77 ru
->regs
[ru
->size
++] = hregARM64_X1();
78 ru
->regs
[ru
->size
++] = hregARM64_X2();
79 ru
->regs
[ru
->size
++] = hregARM64_X3();
80 ru
->regs
[ru
->size
++] = hregARM64_X4();
81 ru
->regs
[ru
->size
++] = hregARM64_X5();
82 ru
->regs
[ru
->size
++] = hregARM64_X6();
83 ru
->regs
[ru
->size
++] = hregARM64_X7();
84 ru
->allocable_end
[HRcInt64
] = ru
->size
- 1;
85 // X8 is used as a ProfInc temporary, not available to regalloc.
86 // X9 is a chaining/spill temporary, not available to regalloc.
88 // Do we really need all these?
89 //ru->regs[ru->size++] = hregARM64_X10();
90 //ru->regs[ru->size++] = hregARM64_X11();
91 //ru->regs[ru->size++] = hregARM64_X12();
92 //ru->regs[ru->size++] = hregARM64_X13();
93 //ru->regs[ru->size++] = hregARM64_X14();
94 //ru->regs[ru->size++] = hregARM64_X15();
95 // X21 is the guest state pointer, not available to regalloc.
97 // vector regs. Unfortunately not callee-saved.
98 ru
->allocable_start
[HRcVec128
] = ru
->size
;
99 ru
->regs
[ru
->size
++] = hregARM64_Q16();
100 ru
->regs
[ru
->size
++] = hregARM64_Q17();
101 ru
->regs
[ru
->size
++] = hregARM64_Q18();
102 ru
->regs
[ru
->size
++] = hregARM64_Q19();
103 ru
->regs
[ru
->size
++] = hregARM64_Q20();
104 ru
->allocable_end
[HRcVec128
] = ru
->size
- 1;
106 // F64 regs, all of which are callee-saved
107 ru
->allocable_start
[HRcFlt64
] = ru
->size
;
108 ru
->regs
[ru
->size
++] = hregARM64_D8();
109 ru
->regs
[ru
->size
++] = hregARM64_D9();
110 ru
->regs
[ru
->size
++] = hregARM64_D10();
111 ru
->regs
[ru
->size
++] = hregARM64_D11();
112 ru
->regs
[ru
->size
++] = hregARM64_D12();
113 ru
->regs
[ru
->size
++] = hregARM64_D13();
114 ru
->allocable_end
[HRcFlt64
] = ru
->size
- 1;
116 ru
->allocable
= ru
->size
;
117 /* And other regs, not available to the allocator. */
119 // unavail: x21 as GSP
120 // x8 is used as a ProfInc temporary
121 // x9 is used as a spill/reload/chaining/call temporary
123 // x31 because dealing with the SP-vs-ZR overloading is too
124 // confusing, and we don't need to do so, so let's just avoid
127 // Currently, we have 15 allocatable integer registers:
128 // 0 1 2 3 4 5 6 7 22 23 24 25 26 27 28
130 // Hence for the allocatable integer registers we have:
132 // callee-saved: 22 23 24 25 26 27 28
133 // caller-saved: 0 1 2 3 4 5 6 7
135 // If the set of available registers changes or if the e/r status
136 // changes, be sure to re-check/sync the definition of
137 // getRegUsage for ARM64Instr_Call too.
139 ru
->regs
[ru
->size
++] = hregARM64_X8();
140 ru
->regs
[ru
->size
++] = hregARM64_X9();
141 ru
->regs
[ru
->size
++] = hregARM64_X21();
143 rRegUniverse_ARM64_initted
= True
;
145 RRegUniverse__check_is_sane(ru
);
150 UInt
ppHRegARM64 ( HReg reg
) {
152 /* Be generic for all virtual regs. */
153 if (hregIsVirtual(reg
)) {
156 /* But specific for real regs. */
157 switch (hregClass(reg
)) {
159 r
= hregEncoding(reg
);
160 vassert(r
>= 0 && r
< 31);
161 return vex_printf("x%d", r
);
163 r
= hregEncoding(reg
);
164 vassert(r
>= 0 && r
< 32);
165 return vex_printf("d%d", r
);
167 r
= hregEncoding(reg
);
168 vassert(r
>= 0 && r
< 32);
169 return vex_printf("q%d", r
);
171 vpanic("ppHRegARM64");
175 static UInt
ppHRegARM64asSreg ( HReg reg
) {
176 UInt written
= ppHRegARM64(reg
);
177 written
+= vex_printf("(S-reg)");
181 static UInt
ppHRegARM64asHreg ( HReg reg
) {
182 UInt written
= ppHRegARM64(reg
);
183 written
+= vex_printf("(H-reg)");
188 /* --------- Condition codes, ARM64 encoding. --------- */
190 static const HChar
* showARM64CondCode ( ARM64CondCode cond
) {
192 case ARM64cc_EQ
: return "eq";
193 case ARM64cc_NE
: return "ne";
194 case ARM64cc_CS
: return "cs";
195 case ARM64cc_CC
: return "cc";
196 case ARM64cc_MI
: return "mi";
197 case ARM64cc_PL
: return "pl";
198 case ARM64cc_VS
: return "vs";
199 case ARM64cc_VC
: return "vc";
200 case ARM64cc_HI
: return "hi";
201 case ARM64cc_LS
: return "ls";
202 case ARM64cc_GE
: return "ge";
203 case ARM64cc_LT
: return "lt";
204 case ARM64cc_GT
: return "gt";
205 case ARM64cc_LE
: return "le";
206 case ARM64cc_AL
: return "al"; // default
207 case ARM64cc_NV
: return "nv";
208 default: vpanic("showARM64CondCode");
213 /* --------- Memory address expressions (amodes). --------- */
215 ARM64AMode
* ARM64AMode_RI9 ( HReg reg
, Int simm9
) {
216 ARM64AMode
* am
= LibVEX_Alloc_inline(sizeof(ARM64AMode
));
217 am
->tag
= ARM64am_RI9
;
218 am
->ARM64am
.RI9
.reg
= reg
;
219 am
->ARM64am
.RI9
.simm9
= simm9
;
220 vassert(-256 <= simm9
&& simm9
<= 255);
224 ARM64AMode
* ARM64AMode_RI12 ( HReg reg
, Int uimm12
, UChar szB
) {
225 ARM64AMode
* am
= LibVEX_Alloc_inline(sizeof(ARM64AMode
));
226 am
->tag
= ARM64am_RI12
;
227 am
->ARM64am
.RI12
.reg
= reg
;
228 am
->ARM64am
.RI12
.uimm12
= uimm12
;
229 am
->ARM64am
.RI12
.szB
= szB
;
230 vassert(uimm12
>= 0 && uimm12
<= 4095);
232 case 1: case 2: case 4: case 8: break;
238 ARM64AMode
* ARM64AMode_RR ( HReg base
, HReg index
) {
239 ARM64AMode
* am
= LibVEX_Alloc_inline(sizeof(ARM64AMode
));
240 am
->tag
= ARM64am_RR
;
241 am
->ARM64am
.RR
.base
= base
;
242 am
->ARM64am
.RR
.index
= index
;
246 static void ppARM64AMode ( ARM64AMode
* am
) {
249 vex_printf("%d(", am
->ARM64am
.RI9
.simm9
);
250 ppHRegARM64(am
->ARM64am
.RI9
.reg
);
254 vex_printf("%u(", (UInt
)am
->ARM64am
.RI12
.szB
255 * (UInt
)am
->ARM64am
.RI12
.uimm12
);
256 ppHRegARM64(am
->ARM64am
.RI12
.reg
);
261 ppHRegARM64(am
->ARM64am
.RR
.base
);
263 ppHRegARM64(am
->ARM64am
.RR
.index
);
271 static void addRegUsage_ARM64AMode ( HRegUsage
* u
, ARM64AMode
* am
) {
274 addHRegUse(u
, HRmRead
, am
->ARM64am
.RI9
.reg
);
277 addHRegUse(u
, HRmRead
, am
->ARM64am
.RI12
.reg
);
280 addHRegUse(u
, HRmRead
, am
->ARM64am
.RR
.base
);
281 addHRegUse(u
, HRmRead
, am
->ARM64am
.RR
.index
);
284 vpanic("addRegUsage_ARM64Amode");
288 static void mapRegs_ARM64AMode ( HRegRemap
* m
, ARM64AMode
* am
) {
291 am
->ARM64am
.RI9
.reg
= lookupHRegRemap(m
, am
->ARM64am
.RI9
.reg
);
294 am
->ARM64am
.RI12
.reg
= lookupHRegRemap(m
, am
->ARM64am
.RI12
.reg
);
297 am
->ARM64am
.RR
.base
= lookupHRegRemap(m
, am
->ARM64am
.RR
.base
);
298 am
->ARM64am
.RR
.index
= lookupHRegRemap(m
, am
->ARM64am
.RR
.index
);
301 vpanic("mapRegs_ARM64Amode");
306 /* --------- Reg or uimm12<<{0,12} operands --------- */
308 ARM64RIA
* ARM64RIA_I12 ( UShort imm12
, UChar shift
) {
309 ARM64RIA
* riA
= LibVEX_Alloc_inline(sizeof(ARM64RIA
));
310 riA
->tag
= ARM64riA_I12
;
311 riA
->ARM64riA
.I12
.imm12
= imm12
;
312 riA
->ARM64riA
.I12
.shift
= shift
;
313 vassert(imm12
< 4096);
314 vassert(shift
== 0 || shift
== 12);
317 ARM64RIA
* ARM64RIA_R ( HReg reg
) {
318 ARM64RIA
* riA
= LibVEX_Alloc_inline(sizeof(ARM64RIA
));
319 riA
->tag
= ARM64riA_R
;
320 riA
->ARM64riA
.R
.reg
= reg
;
324 static void ppARM64RIA ( ARM64RIA
* riA
) {
327 vex_printf("#%u",(UInt
)(riA
->ARM64riA
.I12
.imm12
328 << riA
->ARM64riA
.I12
.shift
));
331 ppHRegARM64(riA
->ARM64riA
.R
.reg
);
338 static void addRegUsage_ARM64RIA ( HRegUsage
* u
, ARM64RIA
* riA
) {
343 addHRegUse(u
, HRmRead
, riA
->ARM64riA
.R
.reg
);
346 vpanic("addRegUsage_ARM64RIA");
350 static void mapRegs_ARM64RIA ( HRegRemap
* m
, ARM64RIA
* riA
) {
355 riA
->ARM64riA
.R
.reg
= lookupHRegRemap(m
, riA
->ARM64riA
.R
.reg
);
358 vpanic("mapRegs_ARM64RIA");
363 /* --------- Reg or "bitfield" (logic immediate) operands --------- */
365 ARM64RIL
* ARM64RIL_I13 ( UChar bitN
, UChar immR
, UChar immS
) {
366 ARM64RIL
* riL
= LibVEX_Alloc_inline(sizeof(ARM64RIL
));
367 riL
->tag
= ARM64riL_I13
;
368 riL
->ARM64riL
.I13
.bitN
= bitN
;
369 riL
->ARM64riL
.I13
.immR
= immR
;
370 riL
->ARM64riL
.I13
.immS
= immS
;
376 ARM64RIL
* ARM64RIL_R ( HReg reg
) {
377 ARM64RIL
* riL
= LibVEX_Alloc_inline(sizeof(ARM64RIL
));
378 riL
->tag
= ARM64riL_R
;
379 riL
->ARM64riL
.R
.reg
= reg
;
383 static void ppARM64RIL ( ARM64RIL
* riL
) {
386 vex_printf("#nrs(%u,%u,%u)",
387 (UInt
)riL
->ARM64riL
.I13
.bitN
,
388 (UInt
)riL
->ARM64riL
.I13
.immR
,
389 (UInt
)riL
->ARM64riL
.I13
.immS
);
392 ppHRegARM64(riL
->ARM64riL
.R
.reg
);
399 static void addRegUsage_ARM64RIL ( HRegUsage
* u
, ARM64RIL
* riL
) {
404 addHRegUse(u
, HRmRead
, riL
->ARM64riL
.R
.reg
);
407 vpanic("addRegUsage_ARM64RIL");
411 static void mapRegs_ARM64RIL ( HRegRemap
* m
, ARM64RIL
* riL
) {
416 riL
->ARM64riL
.R
.reg
= lookupHRegRemap(m
, riL
->ARM64riL
.R
.reg
);
419 vpanic("mapRegs_ARM64RIL");
424 /* --------------- Reg or uimm6 operands --------------- */
426 ARM64RI6
* ARM64RI6_I6 ( UInt imm6
) {
427 ARM64RI6
* ri6
= LibVEX_Alloc_inline(sizeof(ARM64RI6
));
428 ri6
->tag
= ARM64ri6_I6
;
429 ri6
->ARM64ri6
.I6
.imm6
= imm6
;
430 vassert(imm6
> 0 && imm6
< 64);
433 ARM64RI6
* ARM64RI6_R ( HReg reg
) {
434 ARM64RI6
* ri6
= LibVEX_Alloc_inline(sizeof(ARM64RI6
));
435 ri6
->tag
= ARM64ri6_R
;
436 ri6
->ARM64ri6
.R
.reg
= reg
;
440 static void ppARM64RI6 ( ARM64RI6
* ri6
) {
443 vex_printf("#%u", ri6
->ARM64ri6
.I6
.imm6
);
446 ppHRegARM64(ri6
->ARM64ri6
.R
.reg
);
453 static void addRegUsage_ARM64RI6 ( HRegUsage
* u
, ARM64RI6
* ri6
) {
458 addHRegUse(u
, HRmRead
, ri6
->ARM64ri6
.R
.reg
);
461 vpanic("addRegUsage_ARM64RI6");
465 static void mapRegs_ARM64RI6 ( HRegRemap
* m
, ARM64RI6
* ri6
) {
470 ri6
->ARM64ri6
.R
.reg
= lookupHRegRemap(m
, ri6
->ARM64ri6
.R
.reg
);
473 vpanic("mapRegs_ARM64RI6");
478 /* --------- Instructions. --------- */
480 static const HChar
* showARM64LogicOp ( ARM64LogicOp op
) {
482 case ARM64lo_AND
: return "and";
483 case ARM64lo_OR
: return "orr";
484 case ARM64lo_XOR
: return "eor";
485 default: vpanic("showARM64LogicOp");
489 static const HChar
* showARM64ShiftOp ( ARM64ShiftOp op
) {
491 case ARM64sh_SHL
: return "lsl";
492 case ARM64sh_SHR
: return "lsr";
493 case ARM64sh_SAR
: return "asr";
494 default: vpanic("showARM64ShiftOp");
498 static const HChar
* showARM64UnaryOp ( ARM64UnaryOp op
) {
500 case ARM64un_NEG
: return "neg";
501 case ARM64un_NOT
: return "not";
502 case ARM64un_CLZ
: return "clz";
503 default: vpanic("showARM64UnaryOp");
507 static const HChar
* showARM64MulOp ( ARM64MulOp op
) {
509 case ARM64mul_PLAIN
: return "mul ";
510 case ARM64mul_ZX
: return "umulh";
511 case ARM64mul_SX
: return "smulh";
512 default: vpanic("showARM64MulOp");
516 static void characteriseARM64CvtOp ( /*OUT*/HChar
* syn
,
517 /*OUT*/UInt
* fszB
, /*OUT*/UInt
* iszB
,
520 case ARM64cvt_F32_I32S
:
521 *syn
= 's'; *fszB
= 4; *iszB
= 4; break;
522 case ARM64cvt_F64_I32S
:
523 *syn
= 's'; *fszB
= 8; *iszB
= 4; break;
524 case ARM64cvt_F32_I64S
:
525 *syn
= 's'; *fszB
= 4; *iszB
= 8; break;
526 case ARM64cvt_F64_I64S
:
527 *syn
= 's'; *fszB
= 8; *iszB
= 8; break;
528 case ARM64cvt_F32_I32U
:
529 *syn
= 'u'; *fszB
= 4; *iszB
= 4; break;
530 case ARM64cvt_F64_I32U
:
531 *syn
= 'u'; *fszB
= 8; *iszB
= 4; break;
532 case ARM64cvt_F32_I64U
:
533 *syn
= 'u'; *fszB
= 4; *iszB
= 8; break;
534 case ARM64cvt_F64_I64U
:
535 *syn
= 'u'; *fszB
= 8; *iszB
= 8; break;
537 vpanic("characteriseARM64CvtOp");
541 static const HChar
* showARM64FpBinOp ( ARM64FpBinOp op
) {
543 case ARM64fpb_ADD
: return "add";
544 case ARM64fpb_SUB
: return "sub";
545 case ARM64fpb_MUL
: return "mul";
546 case ARM64fpb_DIV
: return "div";
547 default: vpanic("showARM64FpBinOp");
551 static const HChar
* showARM64FpUnaryOp ( ARM64FpUnaryOp op
) {
553 case ARM64fpu_NEG
: return "neg ";
554 case ARM64fpu_ABS
: return "abs ";
555 case ARM64fpu_SQRT
: return "sqrt ";
556 case ARM64fpu_RINT
: return "rinti";
557 case ARM64fpu_RECPX
: return "recpx";
558 default: vpanic("showARM64FpUnaryOp");
562 static void showARM64VecBinOp(/*OUT*/const HChar
** nm
,
563 /*OUT*/const HChar
** ar
, ARM64VecBinOp op
) {
565 case ARM64vecb_ADD64x2
: *nm
= "add "; *ar
= "2d"; return;
566 case ARM64vecb_ADD32x4
: *nm
= "add "; *ar
= "4s"; return;
567 case ARM64vecb_ADD16x8
: *nm
= "add "; *ar
= "8h"; return;
568 case ARM64vecb_ADD8x16
: *nm
= "add "; *ar
= "16b"; return;
569 case ARM64vecb_SUB64x2
: *nm
= "sub "; *ar
= "2d"; return;
570 case ARM64vecb_SUB32x4
: *nm
= "sub "; *ar
= "4s"; return;
571 case ARM64vecb_SUB16x8
: *nm
= "sub "; *ar
= "8h"; return;
572 case ARM64vecb_SUB8x16
: *nm
= "sub "; *ar
= "16b"; return;
573 case ARM64vecb_MUL32x4
: *nm
= "mul "; *ar
= "4s"; return;
574 case ARM64vecb_MUL16x8
: *nm
= "mul "; *ar
= "8h"; return;
575 case ARM64vecb_MUL8x16
: *nm
= "mul "; *ar
= "16b"; return;
576 case ARM64vecb_FADD64x2
: *nm
= "fadd "; *ar
= "2d"; return;
577 case ARM64vecb_FSUB64x2
: *nm
= "fsub "; *ar
= "2d"; return;
578 case ARM64vecb_FMUL64x2
: *nm
= "fmul "; *ar
= "2d"; return;
579 case ARM64vecb_FDIV64x2
: *nm
= "fdiv "; *ar
= "2d"; return;
580 case ARM64vecb_FADD32x4
: *nm
= "fadd "; *ar
= "4s"; return;
581 case ARM64vecb_FSUB32x4
: *nm
= "fsub "; *ar
= "4s"; return;
582 case ARM64vecb_FMUL32x4
: *nm
= "fmul "; *ar
= "4s"; return;
583 case ARM64vecb_FDIV32x4
: *nm
= "fdiv "; *ar
= "4s"; return;
584 case ARM64vecb_FMAX64x2
: *nm
= "fmax "; *ar
= "2d"; return;
585 case ARM64vecb_FMAX32x4
: *nm
= "fmax "; *ar
= "4s"; return;
586 case ARM64vecb_FMIN64x2
: *nm
= "fmin "; *ar
= "2d"; return;
587 case ARM64vecb_FMIN32x4
: *nm
= "fmin "; *ar
= "4s"; return;
588 case ARM64vecb_UMAX32x4
: *nm
= "umax "; *ar
= "4s"; return;
589 case ARM64vecb_UMAX16x8
: *nm
= "umax "; *ar
= "8h"; return;
590 case ARM64vecb_UMAX8x16
: *nm
= "umax "; *ar
= "16b"; return;
591 case ARM64vecb_UMIN32x4
: *nm
= "umin "; *ar
= "4s"; return;
592 case ARM64vecb_UMIN16x8
: *nm
= "umin "; *ar
= "8h"; return;
593 case ARM64vecb_UMIN8x16
: *nm
= "umin "; *ar
= "16b"; return;
594 case ARM64vecb_SMAX32x4
: *nm
= "smax "; *ar
= "4s"; return;
595 case ARM64vecb_SMAX16x8
: *nm
= "smax "; *ar
= "8h"; return;
596 case ARM64vecb_SMAX8x16
: *nm
= "smax "; *ar
= "16b"; return;
597 case ARM64vecb_SMIN32x4
: *nm
= "smin "; *ar
= "4s"; return;
598 case ARM64vecb_SMIN16x8
: *nm
= "smin "; *ar
= "8h"; return;
599 case ARM64vecb_SMIN8x16
: *nm
= "smin "; *ar
= "16b"; return;
600 case ARM64vecb_AND
: *nm
= "and "; *ar
= "16b"; return;
601 case ARM64vecb_ORR
: *nm
= "orr "; *ar
= "16b"; return;
602 case ARM64vecb_XOR
: *nm
= "eor "; *ar
= "16b"; return;
603 case ARM64vecb_CMEQ64x2
: *nm
= "cmeq "; *ar
= "2d"; return;
604 case ARM64vecb_CMEQ32x4
: *nm
= "cmeq "; *ar
= "4s"; return;
605 case ARM64vecb_CMEQ16x8
: *nm
= "cmeq "; *ar
= "8h"; return;
606 case ARM64vecb_CMEQ8x16
: *nm
= "cmeq "; *ar
= "16b"; return;
607 case ARM64vecb_CMHI64x2
: *nm
= "cmhi "; *ar
= "2d"; return;
608 case ARM64vecb_CMHI32x4
: *nm
= "cmhi "; *ar
= "4s"; return;
609 case ARM64vecb_CMHI16x8
: *nm
= "cmhi "; *ar
= "8h"; return;
610 case ARM64vecb_CMHI8x16
: *nm
= "cmhi "; *ar
= "16b"; return;
611 case ARM64vecb_CMGT64x2
: *nm
= "cmgt "; *ar
= "2d"; return;
612 case ARM64vecb_CMGT32x4
: *nm
= "cmgt "; *ar
= "4s"; return;
613 case ARM64vecb_CMGT16x8
: *nm
= "cmgt "; *ar
= "8h"; return;
614 case ARM64vecb_CMGT8x16
: *nm
= "cmgt "; *ar
= "16b"; return;
615 case ARM64vecb_FCMEQ64x2
: *nm
= "fcmeq "; *ar
= "2d"; return;
616 case ARM64vecb_FCMEQ32x4
: *nm
= "fcmeq "; *ar
= "4s"; return;
617 case ARM64vecb_FCMGE64x2
: *nm
= "fcmge "; *ar
= "2d"; return;
618 case ARM64vecb_FCMGE32x4
: *nm
= "fcmge "; *ar
= "4s"; return;
619 case ARM64vecb_FCMGT64x2
: *nm
= "fcmgt "; *ar
= "2d"; return;
620 case ARM64vecb_FCMGT32x4
: *nm
= "fcmgt "; *ar
= "4s"; return;
621 case ARM64vecb_TBL1
: *nm
= "tbl "; *ar
= "16b"; return;
622 case ARM64vecb_UZP164x2
: *nm
= "uzp1 "; *ar
= "2d"; return;
623 case ARM64vecb_UZP132x4
: *nm
= "uzp1 "; *ar
= "4s"; return;
624 case ARM64vecb_UZP116x8
: *nm
= "uzp1 "; *ar
= "8h"; return;
625 case ARM64vecb_UZP18x16
: *nm
= "uzp1 "; *ar
= "16b"; return;
626 case ARM64vecb_UZP264x2
: *nm
= "uzp2 "; *ar
= "2d"; return;
627 case ARM64vecb_UZP232x4
: *nm
= "uzp2 "; *ar
= "4s"; return;
628 case ARM64vecb_UZP216x8
: *nm
= "uzp2 "; *ar
= "8h"; return;
629 case ARM64vecb_UZP28x16
: *nm
= "uzp2 "; *ar
= "16b"; return;
630 case ARM64vecb_ZIP132x4
: *nm
= "zip1 "; *ar
= "4s"; return;
631 case ARM64vecb_ZIP116x8
: *nm
= "zip1 "; *ar
= "8h"; return;
632 case ARM64vecb_ZIP18x16
: *nm
= "zip1 "; *ar
= "16b"; return;
633 case ARM64vecb_ZIP232x4
: *nm
= "zip2 "; *ar
= "4s"; return;
634 case ARM64vecb_ZIP216x8
: *nm
= "zip2 "; *ar
= "8h"; return;
635 case ARM64vecb_ZIP28x16
: *nm
= "zip2 "; *ar
= "16b"; return;
636 case ARM64vecb_PMUL8x16
: *nm
= "pmul "; *ar
= "16b"; return;
637 case ARM64vecb_PMULL8x8
: *nm
= "pmull "; *ar
= "8hbb"; return;
638 case ARM64vecb_UMULL2DSS
: *nm
= "umull "; *ar
= "2dss"; return;
639 case ARM64vecb_UMULL4SHH
: *nm
= "umull "; *ar
= "4shh"; return;
640 case ARM64vecb_UMULL8HBB
: *nm
= "umull "; *ar
= "8hbb"; return;
641 case ARM64vecb_SMULL2DSS
: *nm
= "smull "; *ar
= "2dss"; return;
642 case ARM64vecb_SMULL4SHH
: *nm
= "smull "; *ar
= "4shh"; return;
643 case ARM64vecb_SMULL8HBB
: *nm
= "smull "; *ar
= "8hbb"; return;
644 case ARM64vecb_SQADD64x2
: *nm
= "sqadd "; *ar
= "2d"; return;
645 case ARM64vecb_SQADD32x4
: *nm
= "sqadd "; *ar
= "4s"; return;
646 case ARM64vecb_SQADD16x8
: *nm
= "sqadd "; *ar
= "8h"; return;
647 case ARM64vecb_SQADD8x16
: *nm
= "sqadd "; *ar
= "16b"; return;
648 case ARM64vecb_UQADD64x2
: *nm
= "uqadd "; *ar
= "2d"; return;
649 case ARM64vecb_UQADD32x4
: *nm
= "uqadd "; *ar
= "4s"; return;
650 case ARM64vecb_UQADD16x8
: *nm
= "uqadd "; *ar
= "8h"; return;
651 case ARM64vecb_UQADD8x16
: *nm
= "uqadd "; *ar
= "16b"; return;
652 case ARM64vecb_SQSUB64x2
: *nm
= "sqsub "; *ar
= "2d"; return;
653 case ARM64vecb_SQSUB32x4
: *nm
= "sqsub "; *ar
= "4s"; return;
654 case ARM64vecb_SQSUB16x8
: *nm
= "sqsub "; *ar
= "8h"; return;
655 case ARM64vecb_SQSUB8x16
: *nm
= "sqsub "; *ar
= "16b"; return;
656 case ARM64vecb_UQSUB64x2
: *nm
= "uqsub "; *ar
= "2d"; return;
657 case ARM64vecb_UQSUB32x4
: *nm
= "uqsub "; *ar
= "4s"; return;
658 case ARM64vecb_UQSUB16x8
: *nm
= "uqsub "; *ar
= "8h"; return;
659 case ARM64vecb_UQSUB8x16
: *nm
= "uqsub "; *ar
= "16b"; return;
660 case ARM64vecb_SQDMULL2DSS
: *nm
= "sqdmull"; *ar
= "2dss"; return;
661 case ARM64vecb_SQDMULL4SHH
: *nm
= "sqdmull"; *ar
= "4shh"; return;
662 case ARM64vecb_SQDMULH32x4
: *nm
= "sqdmulh"; *ar
= "4s"; return;
663 case ARM64vecb_SQDMULH16x8
: *nm
= "sqdmulh"; *ar
= "8h"; return;
664 case ARM64vecb_SQRDMULH32x4
: *nm
= "sqrdmulh"; *ar
= "4s"; return;
665 case ARM64vecb_SQRDMULH16x8
: *nm
= "sqrdmulh"; *ar
= "8h"; return;
666 case ARM64vecb_SQSHL64x2
: *nm
= "sqshl "; *ar
= "2d"; return;
667 case ARM64vecb_SQSHL32x4
: *nm
= "sqshl "; *ar
= "4s"; return;
668 case ARM64vecb_SQSHL16x8
: *nm
= "sqshl "; *ar
= "8h"; return;
669 case ARM64vecb_SQSHL8x16
: *nm
= "sqshl "; *ar
= "16b"; return;
670 case ARM64vecb_UQSHL64x2
: *nm
= "uqshl "; *ar
= "2d"; return;
671 case ARM64vecb_UQSHL32x4
: *nm
= "uqshl "; *ar
= "4s"; return;
672 case ARM64vecb_UQSHL16x8
: *nm
= "uqshl "; *ar
= "8h"; return;
673 case ARM64vecb_UQSHL8x16
: *nm
= "uqshl "; *ar
= "16b"; return;
674 case ARM64vecb_SQRSHL64x2
: *nm
= "sqrshl"; *ar
= "2d"; return;
675 case ARM64vecb_SQRSHL32x4
: *nm
= "sqrshl"; *ar
= "4s"; return;
676 case ARM64vecb_SQRSHL16x8
: *nm
= "sqrshl"; *ar
= "8h"; return;
677 case ARM64vecb_SQRSHL8x16
: *nm
= "sqrshl"; *ar
= "16b"; return;
678 case ARM64vecb_UQRSHL64x2
: *nm
= "uqrshl"; *ar
= "2d"; return;
679 case ARM64vecb_UQRSHL32x4
: *nm
= "uqrshl"; *ar
= "4s"; return;
680 case ARM64vecb_UQRSHL16x8
: *nm
= "uqrshl"; *ar
= "8h"; return;
681 case ARM64vecb_UQRSHL8x16
: *nm
= "uqrshl"; *ar
= "16b"; return;
682 case ARM64vecb_SSHL64x2
: *nm
= "sshl "; *ar
= "2d"; return;
683 case ARM64vecb_SSHL32x4
: *nm
= "sshl "; *ar
= "4s"; return;
684 case ARM64vecb_SSHL16x8
: *nm
= "sshl "; *ar
= "8h"; return;
685 case ARM64vecb_SSHL8x16
: *nm
= "sshl "; *ar
= "16b"; return;
686 case ARM64vecb_USHL64x2
: *nm
= "ushl "; *ar
= "2d"; return;
687 case ARM64vecb_USHL32x4
: *nm
= "ushl "; *ar
= "4s"; return;
688 case ARM64vecb_USHL16x8
: *nm
= "ushl "; *ar
= "8h"; return;
689 case ARM64vecb_USHL8x16
: *nm
= "ushl "; *ar
= "16b"; return;
690 case ARM64vecb_SRSHL64x2
: *nm
= "srshl "; *ar
= "2d"; return;
691 case ARM64vecb_SRSHL32x4
: *nm
= "srshl "; *ar
= "4s"; return;
692 case ARM64vecb_SRSHL16x8
: *nm
= "srshl "; *ar
= "8h"; return;
693 case ARM64vecb_SRSHL8x16
: *nm
= "srshl "; *ar
= "16b"; return;
694 case ARM64vecb_URSHL64x2
: *nm
= "urshl "; *ar
= "2d"; return;
695 case ARM64vecb_URSHL32x4
: *nm
= "urshl "; *ar
= "4s"; return;
696 case ARM64vecb_URSHL16x8
: *nm
= "urshl "; *ar
= "8h"; return;
697 case ARM64vecb_URSHL8x16
: *nm
= "urshl "; *ar
= "16b"; return;
698 case ARM64vecb_FRECPS64x2
: *nm
= "frecps"; *ar
= "2d"; return;
699 case ARM64vecb_FRECPS32x4
: *nm
= "frecps"; *ar
= "4s"; return;
700 case ARM64vecb_FRSQRTS64x2
: *nm
= "frsqrts"; *ar
= "2d"; return;
701 case ARM64vecb_FRSQRTS32x4
: *nm
= "frsqrts"; *ar
= "4s"; return;
702 default: vpanic("showARM64VecBinOp");
706 static void showARM64VecModifyOp(/*OUT*/const HChar
** nm
,
707 /*OUT*/const HChar
** ar
,
708 ARM64VecModifyOp op
) {
710 case ARM64vecmo_SUQADD64x2
: *nm
= "suqadd"; *ar
= "2d"; return;
711 case ARM64vecmo_SUQADD32x4
: *nm
= "suqadd"; *ar
= "4s"; return;
712 case ARM64vecmo_SUQADD16x8
: *nm
= "suqadd"; *ar
= "8h"; return;
713 case ARM64vecmo_SUQADD8x16
: *nm
= "suqadd"; *ar
= "16b"; return;
714 case ARM64vecmo_USQADD64x2
: *nm
= "usqadd"; *ar
= "2d"; return;
715 case ARM64vecmo_USQADD32x4
: *nm
= "usqadd"; *ar
= "4s"; return;
716 case ARM64vecmo_USQADD16x8
: *nm
= "usqadd"; *ar
= "8h"; return;
717 case ARM64vecmo_USQADD8x16
: *nm
= "usqadd"; *ar
= "16b"; return;
718 default: vpanic("showARM64VecModifyOp");
722 static void showARM64VecUnaryOp(/*OUT*/const HChar
** nm
,
723 /*OUT*/const HChar
** ar
, ARM64VecUnaryOp op
)
726 case ARM64vecu_FNEG64x2
: *nm
= "fneg "; *ar
= "2d"; return;
727 case ARM64vecu_FNEG32x4
: *nm
= "fneg "; *ar
= "4s"; return;
728 case ARM64vecu_FABS64x2
: *nm
= "fabs "; *ar
= "2d"; return;
729 case ARM64vecu_FABS32x4
: *nm
= "fabs "; *ar
= "4s"; return;
730 case ARM64vecu_NOT
: *nm
= "not "; *ar
= "all"; return;
731 case ARM64vecu_ABS64x2
: *nm
= "abs "; *ar
= "2d"; return;
732 case ARM64vecu_ABS32x4
: *nm
= "abs "; *ar
= "4s"; return;
733 case ARM64vecu_ABS16x8
: *nm
= "abs "; *ar
= "8h"; return;
734 case ARM64vecu_ABS8x16
: *nm
= "abs "; *ar
= "16b"; return;
735 case ARM64vecu_CLS32x4
: *nm
= "cls "; *ar
= "4s"; return;
736 case ARM64vecu_CLS16x8
: *nm
= "cls "; *ar
= "8h"; return;
737 case ARM64vecu_CLS8x16
: *nm
= "cls "; *ar
= "16b"; return;
738 case ARM64vecu_CLZ32x4
: *nm
= "clz "; *ar
= "4s"; return;
739 case ARM64vecu_CLZ16x8
: *nm
= "clz "; *ar
= "8h"; return;
740 case ARM64vecu_CLZ8x16
: *nm
= "clz "; *ar
= "16b"; return;
741 case ARM64vecu_CNT8x16
: *nm
= "cnt "; *ar
= "16b"; return;
742 case ARM64vecu_RBIT
: *nm
= "rbit "; *ar
= "16b"; return;
743 case ARM64vecu_REV1616B
: *nm
= "rev16"; *ar
= "16b"; return;
744 case ARM64vecu_REV3216B
: *nm
= "rev32"; *ar
= "16b"; return;
745 case ARM64vecu_REV328H
: *nm
= "rev32"; *ar
= "8h"; return;
746 case ARM64vecu_REV6416B
: *nm
= "rev64"; *ar
= "16b"; return;
747 case ARM64vecu_REV648H
: *nm
= "rev64"; *ar
= "8h"; return;
748 case ARM64vecu_REV644S
: *nm
= "rev64"; *ar
= "4s"; return;
749 case ARM64vecu_URECPE32x4
: *nm
= "urecpe"; *ar
= "4s"; return;
750 case ARM64vecu_URSQRTE32x4
: *nm
= "ursqrte"; *ar
= "4s"; return;
751 case ARM64vecu_FRECPE64x2
: *nm
= "frecpe"; *ar
= "2d"; return;
752 case ARM64vecu_FRECPE32x4
: *nm
= "frecpe"; *ar
= "4s"; return;
753 case ARM64vecu_FRSQRTE64x2
: *nm
= "frsqrte"; *ar
= "2d"; return;
754 case ARM64vecu_FRSQRTE32x4
: *nm
= "frsqrte"; *ar
= "4s"; return;
755 case ARM64vecu_FSQRT64x2
: *nm
= "fsqrt"; *ar
= "2d"; return;
756 case ARM64vecu_FSQRT32x4
: *nm
= "fsqrt"; *ar
= "4s"; return;
757 default: vpanic("showARM64VecUnaryOp");
761 static void showARM64VecShiftImmOp(/*OUT*/const HChar
** nm
,
762 /*OUT*/const HChar
** ar
,
763 ARM64VecShiftImmOp op
)
766 case ARM64vecshi_USHR64x2
: *nm
= "ushr "; *ar
= "2d"; return;
767 case ARM64vecshi_USHR32x4
: *nm
= "ushr "; *ar
= "4s"; return;
768 case ARM64vecshi_USHR16x8
: *nm
= "ushr "; *ar
= "8h"; return;
769 case ARM64vecshi_USHR8x16
: *nm
= "ushr "; *ar
= "16b"; return;
770 case ARM64vecshi_SSHR64x2
: *nm
= "sshr "; *ar
= "2d"; return;
771 case ARM64vecshi_SSHR32x4
: *nm
= "sshr "; *ar
= "4s"; return;
772 case ARM64vecshi_SSHR16x8
: *nm
= "sshr "; *ar
= "8h"; return;
773 case ARM64vecshi_SSHR8x16
: *nm
= "sshr "; *ar
= "16b"; return;
774 case ARM64vecshi_SHL64x2
: *nm
= "shl "; *ar
= "2d"; return;
775 case ARM64vecshi_SHL32x4
: *nm
= "shl "; *ar
= "4s"; return;
776 case ARM64vecshi_SHL16x8
: *nm
= "shl "; *ar
= "8h"; return;
777 case ARM64vecshi_SHL8x16
: *nm
= "shl "; *ar
= "16b"; return;
778 case ARM64vecshi_SQSHRN2SD
: *nm
= "sqshrn"; *ar
= "2sd"; return;
779 case ARM64vecshi_SQSHRN4HS
: *nm
= "sqshrn"; *ar
= "4hs"; return;
780 case ARM64vecshi_SQSHRN8BH
: *nm
= "sqshrn"; *ar
= "8bh"; return;
781 case ARM64vecshi_UQSHRN2SD
: *nm
= "uqshrn"; *ar
= "2sd"; return;
782 case ARM64vecshi_UQSHRN4HS
: *nm
= "uqshrn"; *ar
= "4hs"; return;
783 case ARM64vecshi_UQSHRN8BH
: *nm
= "uqshrn"; *ar
= "8bh"; return;
784 case ARM64vecshi_SQSHRUN2SD
: *nm
= "sqshrun"; *ar
= "2sd"; return;
785 case ARM64vecshi_SQSHRUN4HS
: *nm
= "sqshrun"; *ar
= "4hs"; return;
786 case ARM64vecshi_SQSHRUN8BH
: *nm
= "sqshrun"; *ar
= "8bh"; return;
787 case ARM64vecshi_SQRSHRN2SD
: *nm
= "sqrshrn"; *ar
= "2sd"; return;
788 case ARM64vecshi_SQRSHRN4HS
: *nm
= "sqrshrn"; *ar
= "4hs"; return;
789 case ARM64vecshi_SQRSHRN8BH
: *nm
= "sqrshrn"; *ar
= "8bh"; return;
790 case ARM64vecshi_UQRSHRN2SD
: *nm
= "uqrshrn"; *ar
= "2sd"; return;
791 case ARM64vecshi_UQRSHRN4HS
: *nm
= "uqrshrn"; *ar
= "4hs"; return;
792 case ARM64vecshi_UQRSHRN8BH
: *nm
= "uqrshrn"; *ar
= "8bh"; return;
793 case ARM64vecshi_SQRSHRUN2SD
: *nm
= "sqrshrun"; *ar
= "2sd"; return;
794 case ARM64vecshi_SQRSHRUN4HS
: *nm
= "sqrshrun"; *ar
= "4hs"; return;
795 case ARM64vecshi_SQRSHRUN8BH
: *nm
= "sqrshrun"; *ar
= "8bh"; return;
796 case ARM64vecshi_UQSHL64x2
: *nm
= "uqshl "; *ar
= "2d"; return;
797 case ARM64vecshi_UQSHL32x4
: *nm
= "uqshl "; *ar
= "4s"; return;
798 case ARM64vecshi_UQSHL16x8
: *nm
= "uqshl "; *ar
= "8h"; return;
799 case ARM64vecshi_UQSHL8x16
: *nm
= "uqshl "; *ar
= "16b"; return;
800 case ARM64vecshi_SQSHL64x2
: *nm
= "sqshl "; *ar
= "2d"; return;
801 case ARM64vecshi_SQSHL32x4
: *nm
= "sqshl "; *ar
= "4s"; return;
802 case ARM64vecshi_SQSHL16x8
: *nm
= "sqshl "; *ar
= "8h"; return;
803 case ARM64vecshi_SQSHL8x16
: *nm
= "sqshl "; *ar
= "16b"; return;
804 case ARM64vecshi_SQSHLU64x2
: *nm
= "sqshlu"; *ar
= "2d"; return;
805 case ARM64vecshi_SQSHLU32x4
: *nm
= "sqshlu"; *ar
= "4s"; return;
806 case ARM64vecshi_SQSHLU16x8
: *nm
= "sqshlu"; *ar
= "8h"; return;
807 case ARM64vecshi_SQSHLU8x16
: *nm
= "sqshlu"; *ar
= "16b"; return;
808 default: vpanic("showARM64VecShiftImmOp");
812 static const HChar
* showARM64VecNarrowOp(ARM64VecNarrowOp op
) {
814 case ARM64vecna_XTN
: return "xtn ";
815 case ARM64vecna_SQXTN
: return "sqxtn ";
816 case ARM64vecna_UQXTN
: return "uqxtn ";
817 case ARM64vecna_SQXTUN
: return "sqxtun";
818 default: vpanic("showARM64VecNarrowOp");
822 ARM64Instr
* ARM64Instr_Arith ( HReg dst
,
823 HReg argL
, ARM64RIA
* argR
, Bool isAdd
) {
824 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
825 i
->tag
= ARM64in_Arith
;
826 i
->ARM64in
.Arith
.dst
= dst
;
827 i
->ARM64in
.Arith
.argL
= argL
;
828 i
->ARM64in
.Arith
.argR
= argR
;
829 i
->ARM64in
.Arith
.isAdd
= isAdd
;
832 ARM64Instr
* ARM64Instr_Cmp ( HReg argL
, ARM64RIA
* argR
, Bool is64
) {
833 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
834 i
->tag
= ARM64in_Cmp
;
835 i
->ARM64in
.Cmp
.argL
= argL
;
836 i
->ARM64in
.Cmp
.argR
= argR
;
837 i
->ARM64in
.Cmp
.is64
= is64
;
840 ARM64Instr
* ARM64Instr_Logic ( HReg dst
,
841 HReg argL
, ARM64RIL
* argR
, ARM64LogicOp op
) {
842 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
843 i
->tag
= ARM64in_Logic
;
844 i
->ARM64in
.Logic
.dst
= dst
;
845 i
->ARM64in
.Logic
.argL
= argL
;
846 i
->ARM64in
.Logic
.argR
= argR
;
847 i
->ARM64in
.Logic
.op
= op
;
850 ARM64Instr
* ARM64Instr_Test ( HReg argL
, ARM64RIL
* argR
) {
851 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
852 i
->tag
= ARM64in_Test
;
853 i
->ARM64in
.Test
.argL
= argL
;
854 i
->ARM64in
.Test
.argR
= argR
;
857 ARM64Instr
* ARM64Instr_Shift ( HReg dst
,
858 HReg argL
, ARM64RI6
* argR
, ARM64ShiftOp op
) {
859 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
860 i
->tag
= ARM64in_Shift
;
861 i
->ARM64in
.Shift
.dst
= dst
;
862 i
->ARM64in
.Shift
.argL
= argL
;
863 i
->ARM64in
.Shift
.argR
= argR
;
864 i
->ARM64in
.Shift
.op
= op
;
867 ARM64Instr
* ARM64Instr_Unary ( HReg dst
, HReg src
, ARM64UnaryOp op
) {
868 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
869 i
->tag
= ARM64in_Unary
;
870 i
->ARM64in
.Unary
.dst
= dst
;
871 i
->ARM64in
.Unary
.src
= src
;
872 i
->ARM64in
.Unary
.op
= op
;
875 ARM64Instr
* ARM64Instr_MovI ( HReg dst
, HReg src
) {
876 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
877 i
->tag
= ARM64in_MovI
;
878 i
->ARM64in
.MovI
.dst
= dst
;
879 i
->ARM64in
.MovI
.src
= src
;
880 vassert(hregClass(src
) == HRcInt64
);
881 vassert(hregClass(dst
) == HRcInt64
);
884 ARM64Instr
* ARM64Instr_Imm64 ( HReg dst
, ULong imm64
) {
885 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
886 i
->tag
= ARM64in_Imm64
;
887 i
->ARM64in
.Imm64
.dst
= dst
;
888 i
->ARM64in
.Imm64
.imm64
= imm64
;
891 ARM64Instr
* ARM64Instr_LdSt64 ( Bool isLoad
, HReg rD
, ARM64AMode
* amode
) {
892 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
893 i
->tag
= ARM64in_LdSt64
;
894 i
->ARM64in
.LdSt64
.isLoad
= isLoad
;
895 i
->ARM64in
.LdSt64
.rD
= rD
;
896 i
->ARM64in
.LdSt64
.amode
= amode
;
899 ARM64Instr
* ARM64Instr_LdSt32 ( Bool isLoad
, HReg rD
, ARM64AMode
* amode
) {
900 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
901 i
->tag
= ARM64in_LdSt32
;
902 i
->ARM64in
.LdSt32
.isLoad
= isLoad
;
903 i
->ARM64in
.LdSt32
.rD
= rD
;
904 i
->ARM64in
.LdSt32
.amode
= amode
;
907 ARM64Instr
* ARM64Instr_LdSt16 ( Bool isLoad
, HReg rD
, ARM64AMode
* amode
) {
908 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
909 i
->tag
= ARM64in_LdSt16
;
910 i
->ARM64in
.LdSt16
.isLoad
= isLoad
;
911 i
->ARM64in
.LdSt16
.rD
= rD
;
912 i
->ARM64in
.LdSt16
.amode
= amode
;
915 ARM64Instr
* ARM64Instr_LdSt8 ( Bool isLoad
, HReg rD
, ARM64AMode
* amode
) {
916 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
917 i
->tag
= ARM64in_LdSt8
;
918 i
->ARM64in
.LdSt8
.isLoad
= isLoad
;
919 i
->ARM64in
.LdSt8
.rD
= rD
;
920 i
->ARM64in
.LdSt8
.amode
= amode
;
923 ARM64Instr
* ARM64Instr_XDirect ( Addr64 dstGA
, ARM64AMode
* amPC
,
924 ARM64CondCode cond
, Bool toFastEP
) {
925 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
926 i
->tag
= ARM64in_XDirect
;
927 i
->ARM64in
.XDirect
.dstGA
= dstGA
;
928 i
->ARM64in
.XDirect
.amPC
= amPC
;
929 i
->ARM64in
.XDirect
.cond
= cond
;
930 i
->ARM64in
.XDirect
.toFastEP
= toFastEP
;
933 ARM64Instr
* ARM64Instr_XIndir ( HReg dstGA
, ARM64AMode
* amPC
,
934 ARM64CondCode cond
) {
935 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
936 i
->tag
= ARM64in_XIndir
;
937 i
->ARM64in
.XIndir
.dstGA
= dstGA
;
938 i
->ARM64in
.XIndir
.amPC
= amPC
;
939 i
->ARM64in
.XIndir
.cond
= cond
;
942 ARM64Instr
* ARM64Instr_XAssisted ( HReg dstGA
, ARM64AMode
* amPC
,
943 ARM64CondCode cond
, IRJumpKind jk
) {
944 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
945 i
->tag
= ARM64in_XAssisted
;
946 i
->ARM64in
.XAssisted
.dstGA
= dstGA
;
947 i
->ARM64in
.XAssisted
.amPC
= amPC
;
948 i
->ARM64in
.XAssisted
.cond
= cond
;
949 i
->ARM64in
.XAssisted
.jk
= jk
;
952 ARM64Instr
* ARM64Instr_CSel ( HReg dst
, HReg argL
, HReg argR
,
953 ARM64CondCode cond
) {
954 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
955 i
->tag
= ARM64in_CSel
;
956 i
->ARM64in
.CSel
.dst
= dst
;
957 i
->ARM64in
.CSel
.argL
= argL
;
958 i
->ARM64in
.CSel
.argR
= argR
;
959 i
->ARM64in
.CSel
.cond
= cond
;
962 ARM64Instr
* ARM64Instr_Call ( ARM64CondCode cond
, Addr64 target
, Int nArgRegs
,
964 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
965 i
->tag
= ARM64in_Call
;
966 i
->ARM64in
.Call
.cond
= cond
;
967 i
->ARM64in
.Call
.target
= target
;
968 i
->ARM64in
.Call
.nArgRegs
= nArgRegs
;
969 i
->ARM64in
.Call
.rloc
= rloc
;
970 vassert(is_sane_RetLoc(rloc
));
973 extern ARM64Instr
* ARM64Instr_AddToSP ( Int simm
) {
974 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
975 i
->tag
= ARM64in_AddToSP
;
976 i
->ARM64in
.AddToSP
.simm
= simm
;
977 vassert(-4096 < simm
&& simm
< 4096);
978 vassert(0 == (simm
& 0xF));
981 extern ARM64Instr
* ARM64Instr_FromSP ( HReg dst
) {
982 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
983 i
->tag
= ARM64in_FromSP
;
984 i
->ARM64in
.FromSP
.dst
= dst
;
987 ARM64Instr
* ARM64Instr_Mul ( HReg dst
, HReg argL
, HReg argR
,
989 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
990 i
->tag
= ARM64in_Mul
;
991 i
->ARM64in
.Mul
.dst
= dst
;
992 i
->ARM64in
.Mul
.argL
= argL
;
993 i
->ARM64in
.Mul
.argR
= argR
;
994 i
->ARM64in
.Mul
.op
= op
;
997 ARM64Instr
* ARM64Instr_LdrEX ( Int szB
) {
998 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
999 i
->tag
= ARM64in_LdrEX
;
1000 i
->ARM64in
.LdrEX
.szB
= szB
;
1001 vassert(szB
== 8 || szB
== 4 || szB
== 2 || szB
== 1);
1004 ARM64Instr
* ARM64Instr_StrEX ( Int szB
) {
1005 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1006 i
->tag
= ARM64in_StrEX
;
1007 i
->ARM64in
.StrEX
.szB
= szB
;
1008 vassert(szB
== 8 || szB
== 4 || szB
== 2 || szB
== 1);
1011 ARM64Instr
* ARM64Instr_CAS ( Int szB
) {
1012 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1013 i
->tag
= ARM64in_CAS
;
1014 i
->ARM64in
.CAS
.szB
= szB
;
1015 vassert(szB
== 8 || szB
== 4 || szB
== 2 || szB
== 1);
1018 ARM64Instr
* ARM64Instr_MFence ( void ) {
1019 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1020 i
->tag
= ARM64in_MFence
;
1023 ARM64Instr
* ARM64Instr_ClrEX ( void ) {
1024 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1025 i
->tag
= ARM64in_ClrEX
;
1028 ARM64Instr
* ARM64Instr_VLdStH ( Bool isLoad
, HReg sD
, HReg rN
, UInt uimm12
) {
1029 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1030 i
->tag
= ARM64in_VLdStH
;
1031 i
->ARM64in
.VLdStH
.isLoad
= isLoad
;
1032 i
->ARM64in
.VLdStH
.hD
= sD
;
1033 i
->ARM64in
.VLdStH
.rN
= rN
;
1034 i
->ARM64in
.VLdStH
.uimm12
= uimm12
;
1035 vassert(uimm12
< 8192 && 0 == (uimm12
& 1));
1038 ARM64Instr
* ARM64Instr_VLdStS ( Bool isLoad
, HReg sD
, HReg rN
, UInt uimm12
) {
1039 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1040 i
->tag
= ARM64in_VLdStS
;
1041 i
->ARM64in
.VLdStS
.isLoad
= isLoad
;
1042 i
->ARM64in
.VLdStS
.sD
= sD
;
1043 i
->ARM64in
.VLdStS
.rN
= rN
;
1044 i
->ARM64in
.VLdStS
.uimm12
= uimm12
;
1045 vassert(uimm12
< 16384 && 0 == (uimm12
& 3));
1048 ARM64Instr
* ARM64Instr_VLdStD ( Bool isLoad
, HReg dD
, HReg rN
, UInt uimm12
) {
1049 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1050 i
->tag
= ARM64in_VLdStD
;
1051 i
->ARM64in
.VLdStD
.isLoad
= isLoad
;
1052 i
->ARM64in
.VLdStD
.dD
= dD
;
1053 i
->ARM64in
.VLdStD
.rN
= rN
;
1054 i
->ARM64in
.VLdStD
.uimm12
= uimm12
;
1055 vassert(uimm12
< 32768 && 0 == (uimm12
& 7));
1058 ARM64Instr
* ARM64Instr_VLdStQ ( Bool isLoad
, HReg rQ
, HReg rN
) {
1059 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1060 i
->tag
= ARM64in_VLdStQ
;
1061 i
->ARM64in
.VLdStQ
.isLoad
= isLoad
;
1062 i
->ARM64in
.VLdStQ
.rQ
= rQ
;
1063 i
->ARM64in
.VLdStQ
.rN
= rN
;
1066 ARM64Instr
* ARM64Instr_VCvtI2F ( ARM64CvtOp how
, HReg rD
, HReg rS
) {
1067 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1068 i
->tag
= ARM64in_VCvtI2F
;
1069 i
->ARM64in
.VCvtI2F
.how
= how
;
1070 i
->ARM64in
.VCvtI2F
.rD
= rD
;
1071 i
->ARM64in
.VCvtI2F
.rS
= rS
;
1074 ARM64Instr
* ARM64Instr_VCvtF2I ( ARM64CvtOp how
, HReg rD
, HReg rS
,
1076 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1077 i
->tag
= ARM64in_VCvtF2I
;
1078 i
->ARM64in
.VCvtF2I
.how
= how
;
1079 i
->ARM64in
.VCvtF2I
.rD
= rD
;
1080 i
->ARM64in
.VCvtF2I
.rS
= rS
;
1081 i
->ARM64in
.VCvtF2I
.armRM
= armRM
;
1082 vassert(armRM
<= 3);
1085 ARM64Instr
* ARM64Instr_VCvtSD ( Bool sToD
, HReg dst
, HReg src
) {
1086 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1087 i
->tag
= ARM64in_VCvtSD
;
1088 i
->ARM64in
.VCvtSD
.sToD
= sToD
;
1089 i
->ARM64in
.VCvtSD
.dst
= dst
;
1090 i
->ARM64in
.VCvtSD
.src
= src
;
1093 ARM64Instr
* ARM64Instr_VCvtHS ( Bool hToS
, HReg dst
, HReg src
) {
1094 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1095 i
->tag
= ARM64in_VCvtHS
;
1096 i
->ARM64in
.VCvtHS
.hToS
= hToS
;
1097 i
->ARM64in
.VCvtHS
.dst
= dst
;
1098 i
->ARM64in
.VCvtHS
.src
= src
;
1101 ARM64Instr
* ARM64Instr_VCvtHD ( Bool hToD
, HReg dst
, HReg src
) {
1102 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1103 i
->tag
= ARM64in_VCvtHD
;
1104 i
->ARM64in
.VCvtHD
.hToD
= hToD
;
1105 i
->ARM64in
.VCvtHD
.dst
= dst
;
1106 i
->ARM64in
.VCvtHD
.src
= src
;
1109 ARM64Instr
* ARM64Instr_VUnaryD ( ARM64FpUnaryOp op
, HReg dst
, HReg src
) {
1110 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1111 i
->tag
= ARM64in_VUnaryD
;
1112 i
->ARM64in
.VUnaryD
.op
= op
;
1113 i
->ARM64in
.VUnaryD
.dst
= dst
;
1114 i
->ARM64in
.VUnaryD
.src
= src
;
1117 ARM64Instr
* ARM64Instr_VUnaryS ( ARM64FpUnaryOp op
, HReg dst
, HReg src
) {
1118 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1119 i
->tag
= ARM64in_VUnaryS
;
1120 i
->ARM64in
.VUnaryS
.op
= op
;
1121 i
->ARM64in
.VUnaryS
.dst
= dst
;
1122 i
->ARM64in
.VUnaryS
.src
= src
;
1125 ARM64Instr
* ARM64Instr_VBinD ( ARM64FpBinOp op
,
1126 HReg dst
, HReg argL
, HReg argR
) {
1127 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1128 i
->tag
= ARM64in_VBinD
;
1129 i
->ARM64in
.VBinD
.op
= op
;
1130 i
->ARM64in
.VBinD
.dst
= dst
;
1131 i
->ARM64in
.VBinD
.argL
= argL
;
1132 i
->ARM64in
.VBinD
.argR
= argR
;
1135 ARM64Instr
* ARM64Instr_VBinS ( ARM64FpBinOp op
,
1136 HReg dst
, HReg argL
, HReg argR
) {
1137 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1138 i
->tag
= ARM64in_VBinS
;
1139 i
->ARM64in
.VBinS
.op
= op
;
1140 i
->ARM64in
.VBinS
.dst
= dst
;
1141 i
->ARM64in
.VBinS
.argL
= argL
;
1142 i
->ARM64in
.VBinS
.argR
= argR
;
1145 ARM64Instr
* ARM64Instr_VCmpD ( HReg argL
, HReg argR
) {
1146 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1147 i
->tag
= ARM64in_VCmpD
;
1148 i
->ARM64in
.VCmpD
.argL
= argL
;
1149 i
->ARM64in
.VCmpD
.argR
= argR
;
1152 ARM64Instr
* ARM64Instr_VCmpS ( HReg argL
, HReg argR
) {
1153 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1154 i
->tag
= ARM64in_VCmpS
;
1155 i
->ARM64in
.VCmpS
.argL
= argL
;
1156 i
->ARM64in
.VCmpS
.argR
= argR
;
1159 ARM64Instr
* ARM64Instr_VFCSel ( HReg dst
, HReg argL
, HReg argR
,
1160 ARM64CondCode cond
, Bool isD
) {
1161 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1162 i
->tag
= ARM64in_VFCSel
;
1163 i
->ARM64in
.VFCSel
.dst
= dst
;
1164 i
->ARM64in
.VFCSel
.argL
= argL
;
1165 i
->ARM64in
.VFCSel
.argR
= argR
;
1166 i
->ARM64in
.VFCSel
.cond
= cond
;
1167 i
->ARM64in
.VFCSel
.isD
= isD
;
1170 ARM64Instr
* ARM64Instr_FPCR ( Bool toFPCR
, HReg iReg
) {
1171 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1172 i
->tag
= ARM64in_FPCR
;
1173 i
->ARM64in
.FPCR
.toFPCR
= toFPCR
;
1174 i
->ARM64in
.FPCR
.iReg
= iReg
;
1177 ARM64Instr
* ARM64Instr_FPSR ( Bool toFPSR
, HReg iReg
) {
1178 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1179 i
->tag
= ARM64in_FPSR
;
1180 i
->ARM64in
.FPSR
.toFPSR
= toFPSR
;
1181 i
->ARM64in
.FPSR
.iReg
= iReg
;
1184 ARM64Instr
* ARM64Instr_VBinV ( ARM64VecBinOp op
,
1185 HReg dst
, HReg argL
, HReg argR
) {
1186 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1187 i
->tag
= ARM64in_VBinV
;
1188 i
->ARM64in
.VBinV
.op
= op
;
1189 i
->ARM64in
.VBinV
.dst
= dst
;
1190 i
->ARM64in
.VBinV
.argL
= argL
;
1191 i
->ARM64in
.VBinV
.argR
= argR
;
1194 ARM64Instr
* ARM64Instr_VModifyV ( ARM64VecModifyOp op
, HReg mod
, HReg arg
) {
1195 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1196 i
->tag
= ARM64in_VModifyV
;
1197 i
->ARM64in
.VModifyV
.op
= op
;
1198 i
->ARM64in
.VModifyV
.mod
= mod
;
1199 i
->ARM64in
.VModifyV
.arg
= arg
;
1202 ARM64Instr
* ARM64Instr_VUnaryV ( ARM64VecUnaryOp op
, HReg dst
, HReg arg
) {
1203 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1204 i
->tag
= ARM64in_VUnaryV
;
1205 i
->ARM64in
.VUnaryV
.op
= op
;
1206 i
->ARM64in
.VUnaryV
.dst
= dst
;
1207 i
->ARM64in
.VUnaryV
.arg
= arg
;
1210 ARM64Instr
* ARM64Instr_VNarrowV ( ARM64VecNarrowOp op
,
1211 UInt dszBlg2
, HReg dst
, HReg src
) {
1212 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1213 i
->tag
= ARM64in_VNarrowV
;
1214 i
->ARM64in
.VNarrowV
.op
= op
;
1215 i
->ARM64in
.VNarrowV
.dszBlg2
= dszBlg2
;
1216 i
->ARM64in
.VNarrowV
.dst
= dst
;
1217 i
->ARM64in
.VNarrowV
.src
= src
;
1218 vassert(dszBlg2
== 0 || dszBlg2
== 1 || dszBlg2
== 2);
1221 ARM64Instr
* ARM64Instr_VShiftImmV ( ARM64VecShiftImmOp op
,
1222 HReg dst
, HReg src
, UInt amt
) {
1223 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1224 i
->tag
= ARM64in_VShiftImmV
;
1225 i
->ARM64in
.VShiftImmV
.op
= op
;
1226 i
->ARM64in
.VShiftImmV
.dst
= dst
;
1227 i
->ARM64in
.VShiftImmV
.src
= src
;
1228 i
->ARM64in
.VShiftImmV
.amt
= amt
;
1232 /* For right shifts, the allowed shift amounts are 1 .. lane_size.
1233 For left shifts, the allowed shift amounts are 0 .. lane_size-1.
1235 case ARM64vecshi_USHR64x2
: case ARM64vecshi_SSHR64x2
:
1236 case ARM64vecshi_UQSHRN2SD
: case ARM64vecshi_SQSHRN2SD
:
1237 case ARM64vecshi_SQSHRUN2SD
:
1238 case ARM64vecshi_UQRSHRN2SD
: case ARM64vecshi_SQRSHRN2SD
:
1239 case ARM64vecshi_SQRSHRUN2SD
:
1240 minSh
= 1; maxSh
= 64; break;
1241 case ARM64vecshi_SHL64x2
:
1242 case ARM64vecshi_UQSHL64x2
: case ARM64vecshi_SQSHL64x2
:
1243 case ARM64vecshi_SQSHLU64x2
:
1244 minSh
= 0; maxSh
= 63; break;
1245 case ARM64vecshi_USHR32x4
: case ARM64vecshi_SSHR32x4
:
1246 case ARM64vecshi_UQSHRN4HS
: case ARM64vecshi_SQSHRN4HS
:
1247 case ARM64vecshi_SQSHRUN4HS
:
1248 case ARM64vecshi_UQRSHRN4HS
: case ARM64vecshi_SQRSHRN4HS
:
1249 case ARM64vecshi_SQRSHRUN4HS
:
1250 minSh
= 1; maxSh
= 32; break;
1251 case ARM64vecshi_SHL32x4
:
1252 case ARM64vecshi_UQSHL32x4
: case ARM64vecshi_SQSHL32x4
:
1253 case ARM64vecshi_SQSHLU32x4
:
1254 minSh
= 0; maxSh
= 31; break;
1255 case ARM64vecshi_USHR16x8
: case ARM64vecshi_SSHR16x8
:
1256 case ARM64vecshi_UQSHRN8BH
: case ARM64vecshi_SQSHRN8BH
:
1257 case ARM64vecshi_SQSHRUN8BH
:
1258 case ARM64vecshi_UQRSHRN8BH
: case ARM64vecshi_SQRSHRN8BH
:
1259 case ARM64vecshi_SQRSHRUN8BH
:
1260 minSh
= 1; maxSh
= 16; break;
1261 case ARM64vecshi_SHL16x8
:
1262 case ARM64vecshi_UQSHL16x8
: case ARM64vecshi_SQSHL16x8
:
1263 case ARM64vecshi_SQSHLU16x8
:
1264 minSh
= 0; maxSh
= 15; break;
1265 case ARM64vecshi_USHR8x16
: case ARM64vecshi_SSHR8x16
:
1266 minSh
= 1; maxSh
= 8; break;
1267 case ARM64vecshi_SHL8x16
:
1268 case ARM64vecshi_UQSHL8x16
: case ARM64vecshi_SQSHL8x16
:
1269 case ARM64vecshi_SQSHLU8x16
:
1270 minSh
= 0; maxSh
= 7; break;
1275 vassert(amt
>= minSh
&& amt
<= maxSh
);
1278 ARM64Instr
* ARM64Instr_VExtV ( HReg dst
, HReg srcLo
, HReg srcHi
, UInt amtB
) {
1279 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1280 i
->tag
= ARM64in_VExtV
;
1281 i
->ARM64in
.VExtV
.dst
= dst
;
1282 i
->ARM64in
.VExtV
.srcLo
= srcLo
;
1283 i
->ARM64in
.VExtV
.srcHi
= srcHi
;
1284 i
->ARM64in
.VExtV
.amtB
= amtB
;
1285 vassert(amtB
>= 1 && amtB
<= 15);
1288 ARM64Instr
* ARM64Instr_VImmQ (HReg rQ
, UShort imm
) {
1289 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1290 i
->tag
= ARM64in_VImmQ
;
1291 i
->ARM64in
.VImmQ
.rQ
= rQ
;
1292 i
->ARM64in
.VImmQ
.imm
= imm
;
1293 /* Check that this is something that can actually be emitted. */
1295 case 0x0000: case 0x0001: case 0x0003:
1296 case 0x000F: case 0x003F: case 0x00FF: case 0xFFFF:
1303 ARM64Instr
* ARM64Instr_VDfromX ( HReg rD
, HReg rX
) {
1304 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1305 i
->tag
= ARM64in_VDfromX
;
1306 i
->ARM64in
.VDfromX
.rD
= rD
;
1307 i
->ARM64in
.VDfromX
.rX
= rX
;
1310 ARM64Instr
* ARM64Instr_VQfromX ( HReg rQ
, HReg rXlo
) {
1311 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1312 i
->tag
= ARM64in_VQfromX
;
1313 i
->ARM64in
.VQfromX
.rQ
= rQ
;
1314 i
->ARM64in
.VQfromX
.rXlo
= rXlo
;
1317 ARM64Instr
* ARM64Instr_VQfromXX ( HReg rQ
, HReg rXhi
, HReg rXlo
) {
1318 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1319 i
->tag
= ARM64in_VQfromXX
;
1320 i
->ARM64in
.VQfromXX
.rQ
= rQ
;
1321 i
->ARM64in
.VQfromXX
.rXhi
= rXhi
;
1322 i
->ARM64in
.VQfromXX
.rXlo
= rXlo
;
1325 ARM64Instr
* ARM64Instr_VXfromQ ( HReg rX
, HReg rQ
, UInt laneNo
) {
1326 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1327 i
->tag
= ARM64in_VXfromQ
;
1328 i
->ARM64in
.VXfromQ
.rX
= rX
;
1329 i
->ARM64in
.VXfromQ
.rQ
= rQ
;
1330 i
->ARM64in
.VXfromQ
.laneNo
= laneNo
;
1331 vassert(laneNo
<= 1);
1334 ARM64Instr
* ARM64Instr_VXfromDorS ( HReg rX
, HReg rDorS
, Bool fromD
) {
1335 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1336 i
->tag
= ARM64in_VXfromDorS
;
1337 i
->ARM64in
.VXfromDorS
.rX
= rX
;
1338 i
->ARM64in
.VXfromDorS
.rDorS
= rDorS
;
1339 i
->ARM64in
.VXfromDorS
.fromD
= fromD
;
1342 ARM64Instr
* ARM64Instr_VMov ( UInt szB
, HReg dst
, HReg src
) {
1343 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1344 i
->tag
= ARM64in_VMov
;
1345 i
->ARM64in
.VMov
.szB
= szB
;
1346 i
->ARM64in
.VMov
.dst
= dst
;
1347 i
->ARM64in
.VMov
.src
= src
;
1350 vassert(hregClass(src
) == HRcVec128
);
1351 vassert(hregClass(dst
) == HRcVec128
);
1354 vassert(hregClass(src
) == HRcFlt64
);
1355 vassert(hregClass(dst
) == HRcFlt64
);
1358 vpanic("ARM64Instr_VMov");
1362 ARM64Instr
* ARM64Instr_EvCheck ( ARM64AMode
* amCounter
,
1363 ARM64AMode
* amFailAddr
) {
1364 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1365 i
->tag
= ARM64in_EvCheck
;
1366 i
->ARM64in
.EvCheck
.amCounter
= amCounter
;
1367 i
->ARM64in
.EvCheck
.amFailAddr
= amFailAddr
;
1370 ARM64Instr
* ARM64Instr_ProfInc ( void ) {
1371 ARM64Instr
* i
= LibVEX_Alloc_inline(sizeof(ARM64Instr
));
1372 i
->tag
= ARM64in_ProfInc
;
1378 void ppARM64Instr ( const ARM64Instr
* i
) {
1381 vex_printf("%s ", i
->ARM64in
.Arith
.isAdd
? "add" : "sub");
1382 ppHRegARM64(i
->ARM64in
.Arith
.dst
);
1384 ppHRegARM64(i
->ARM64in
.Arith
.argL
);
1386 ppARM64RIA(i
->ARM64in
.Arith
.argR
);
1389 vex_printf("cmp%s ", i
->ARM64in
.Cmp
.is64
? " " : "(w)" );
1390 ppHRegARM64(i
->ARM64in
.Cmp
.argL
);
1392 ppARM64RIA(i
->ARM64in
.Cmp
.argR
);
1395 vex_printf("%s ", showARM64LogicOp(i
->ARM64in
.Logic
.op
));
1396 ppHRegARM64(i
->ARM64in
.Logic
.dst
);
1398 ppHRegARM64(i
->ARM64in
.Logic
.argL
);
1400 ppARM64RIL(i
->ARM64in
.Logic
.argR
);
1404 ppHRegARM64(i
->ARM64in
.Test
.argL
);
1406 ppARM64RIL(i
->ARM64in
.Test
.argR
);
1409 vex_printf("%s ", showARM64ShiftOp(i
->ARM64in
.Shift
.op
));
1410 ppHRegARM64(i
->ARM64in
.Shift
.dst
);
1412 ppHRegARM64(i
->ARM64in
.Shift
.argL
);
1414 ppARM64RI6(i
->ARM64in
.Shift
.argR
);
1417 vex_printf("%s ", showARM64UnaryOp(i
->ARM64in
.Unary
.op
));
1418 ppHRegARM64(i
->ARM64in
.Unary
.dst
);
1420 ppHRegARM64(i
->ARM64in
.Unary
.src
);
1424 ppHRegARM64(i
->ARM64in
.MovI
.dst
);
1426 ppHRegARM64(i
->ARM64in
.MovI
.src
);
1429 vex_printf("imm64 ");
1430 ppHRegARM64(i
->ARM64in
.Imm64
.dst
);
1431 vex_printf(", 0x%llx", i
->ARM64in
.Imm64
.imm64
);
1433 case ARM64in_LdSt64
:
1434 if (i
->ARM64in
.LdSt64
.isLoad
) {
1436 ppHRegARM64(i
->ARM64in
.LdSt64
.rD
);
1438 ppARM64AMode(i
->ARM64in
.LdSt64
.amode
);
1441 ppARM64AMode(i
->ARM64in
.LdSt64
.amode
);
1443 ppHRegARM64(i
->ARM64in
.LdSt64
.rD
);
1446 case ARM64in_LdSt32
:
1447 if (i
->ARM64in
.LdSt32
.isLoad
) {
1448 vex_printf("ldruw ");
1449 ppHRegARM64(i
->ARM64in
.LdSt32
.rD
);
1451 ppARM64AMode(i
->ARM64in
.LdSt32
.amode
);
1453 vex_printf("strw ");
1454 ppARM64AMode(i
->ARM64in
.LdSt32
.amode
);
1456 ppHRegARM64(i
->ARM64in
.LdSt32
.rD
);
1459 case ARM64in_LdSt16
:
1460 if (i
->ARM64in
.LdSt16
.isLoad
) {
1461 vex_printf("ldruh ");
1462 ppHRegARM64(i
->ARM64in
.LdSt16
.rD
);
1464 ppARM64AMode(i
->ARM64in
.LdSt16
.amode
);
1466 vex_printf("strh ");
1467 ppARM64AMode(i
->ARM64in
.LdSt16
.amode
);
1469 ppHRegARM64(i
->ARM64in
.LdSt16
.rD
);
1473 if (i
->ARM64in
.LdSt8
.isLoad
) {
1474 vex_printf("ldrub ");
1475 ppHRegARM64(i
->ARM64in
.LdSt8
.rD
);
1477 ppARM64AMode(i
->ARM64in
.LdSt8
.amode
);
1479 vex_printf("strb ");
1480 ppARM64AMode(i
->ARM64in
.LdSt8
.amode
);
1482 ppHRegARM64(i
->ARM64in
.LdSt8
.rD
);
1485 case ARM64in_XDirect
:
1486 vex_printf("(xDirect) ");
1487 vex_printf("if (%%pstate.%s) { ",
1488 showARM64CondCode(i
->ARM64in
.XDirect
.cond
));
1489 vex_printf("imm64 x9,0x%llx; ", i
->ARM64in
.XDirect
.dstGA
);
1490 vex_printf("str x9,");
1491 ppARM64AMode(i
->ARM64in
.XDirect
.amPC
);
1492 vex_printf("; imm64-exactly4 x9,$disp_cp_chain_me_to_%sEP; ",
1493 i
->ARM64in
.XDirect
.toFastEP
? "fast" : "slow");
1494 vex_printf("blr x9 }");
1496 case ARM64in_XIndir
:
1497 vex_printf("(xIndir) ");
1498 vex_printf("if (%%pstate.%s) { ",
1499 showARM64CondCode(i
->ARM64in
.XIndir
.cond
));
1501 ppHRegARM64(i
->ARM64in
.XIndir
.dstGA
);
1503 ppARM64AMode(i
->ARM64in
.XIndir
.amPC
);
1504 vex_printf("; imm64 x9,$disp_cp_xindir; ");
1505 vex_printf("br x9 }");
1507 case ARM64in_XAssisted
:
1508 vex_printf("(xAssisted) ");
1509 vex_printf("if (%%pstate.%s) { ",
1510 showARM64CondCode(i
->ARM64in
.XAssisted
.cond
));
1512 ppHRegARM64(i
->ARM64in
.XAssisted
.dstGA
);
1514 ppARM64AMode(i
->ARM64in
.XAssisted
.amPC
);
1515 vex_printf("; movw x21,$IRJumpKind_to_TRCVAL(%d); ",
1516 (Int
)i
->ARM64in
.XAssisted
.jk
);
1517 vex_printf("imm64 x9,$disp_cp_xassisted; ");
1518 vex_printf("br x9 }");
1521 vex_printf("csel ");
1522 ppHRegARM64(i
->ARM64in
.CSel
.dst
);
1524 ppHRegARM64(i
->ARM64in
.CSel
.argL
);
1526 ppHRegARM64(i
->ARM64in
.CSel
.argR
);
1527 vex_printf(", %s", showARM64CondCode(i
->ARM64in
.CSel
.cond
));
1530 vex_printf("call%s ",
1531 i
->ARM64in
.Call
.cond
==ARM64cc_AL
1532 ? " " : showARM64CondCode(i
->ARM64in
.Call
.cond
));
1533 vex_printf("0x%llx [nArgRegs=%d, ",
1534 i
->ARM64in
.Call
.target
, i
->ARM64in
.Call
.nArgRegs
);
1535 ppRetLoc(i
->ARM64in
.Call
.rloc
);
1538 case ARM64in_AddToSP
: {
1539 Int simm
= i
->ARM64in
.AddToSP
.simm
;
1540 vex_printf("%s xsp, xsp, #%d", simm
< 0 ? "sub" : "add",
1541 simm
< 0 ? -simm
: simm
);
1544 case ARM64in_FromSP
:
1546 ppHRegARM64(i
->ARM64in
.FromSP
.dst
);
1547 vex_printf(", xsp");
1550 vex_printf("%s ", showARM64MulOp(i
->ARM64in
.Mul
.op
));
1551 ppHRegARM64(i
->ARM64in
.Mul
.dst
);
1553 ppHRegARM64(i
->ARM64in
.Mul
.argL
);
1555 ppHRegARM64(i
->ARM64in
.Mul
.argR
);
1558 case ARM64in_LdrEX
: {
1559 const HChar
* sz
= " ";
1560 switch (i
->ARM64in
.LdrEX
.szB
) {
1561 case 1: sz
= "b"; break;
1562 case 2: sz
= "h"; break;
1563 case 4: case 8: break;
1564 default: vassert(0);
1566 vex_printf("ldxr%s %c2, [x4]",
1567 sz
, i
->ARM64in
.LdrEX
.szB
== 8 ? 'x' : 'w');
1570 case ARM64in_StrEX
: {
1571 const HChar
* sz
= " ";
1572 switch (i
->ARM64in
.StrEX
.szB
) {
1573 case 1: sz
= "b"; break;
1574 case 2: sz
= "h"; break;
1575 case 4: case 8: break;
1576 default: vassert(0);
1578 vex_printf("stxr%s w0, %c2, [x4]",
1579 sz
, i
->ARM64in
.StrEX
.szB
== 8 ? 'x' : 'w');
1583 vex_printf("x1 = cas(%dbit)(x3, x5 -> x7)", 8 * i
->ARM64in
.CAS
.szB
);
1586 case ARM64in_MFence
:
1587 vex_printf("(mfence) dsb sy; dmb sy; isb");
1590 vex_printf("clrex #15");
1592 case ARM64in_VLdStH
:
1593 if (i
->ARM64in
.VLdStH
.isLoad
) {
1595 ppHRegARM64asHreg(i
->ARM64in
.VLdStH
.hD
);
1596 vex_printf(", %u(", i
->ARM64in
.VLdStH
.uimm12
);
1597 ppHRegARM64(i
->ARM64in
.VLdStH
.rN
);
1601 vex_printf("%u(", i
->ARM64in
.VLdStH
.uimm12
);
1602 ppHRegARM64(i
->ARM64in
.VLdStH
.rN
);
1604 ppHRegARM64asHreg(i
->ARM64in
.VLdStH
.hD
);
1607 case ARM64in_VLdStS
:
1608 if (i
->ARM64in
.VLdStS
.isLoad
) {
1610 ppHRegARM64asSreg(i
->ARM64in
.VLdStS
.sD
);
1611 vex_printf(", %u(", i
->ARM64in
.VLdStS
.uimm12
);
1612 ppHRegARM64(i
->ARM64in
.VLdStS
.rN
);
1616 vex_printf("%u(", i
->ARM64in
.VLdStS
.uimm12
);
1617 ppHRegARM64(i
->ARM64in
.VLdStS
.rN
);
1619 ppHRegARM64asSreg(i
->ARM64in
.VLdStS
.sD
);
1622 case ARM64in_VLdStD
:
1623 if (i
->ARM64in
.VLdStD
.isLoad
) {
1625 ppHRegARM64(i
->ARM64in
.VLdStD
.dD
);
1626 vex_printf(", %u(", i
->ARM64in
.VLdStD
.uimm12
);
1627 ppHRegARM64(i
->ARM64in
.VLdStD
.rN
);
1631 vex_printf("%u(", i
->ARM64in
.VLdStD
.uimm12
);
1632 ppHRegARM64(i
->ARM64in
.VLdStD
.rN
);
1634 ppHRegARM64(i
->ARM64in
.VLdStD
.dD
);
1637 case ARM64in_VLdStQ
:
1638 if (i
->ARM64in
.VLdStQ
.isLoad
)
1639 vex_printf("ld1.2d {");
1641 vex_printf("st1.2d {");
1642 ppHRegARM64(i
->ARM64in
.VLdStQ
.rQ
);
1644 ppHRegARM64(i
->ARM64in
.VLdStQ
.rN
);
1647 case ARM64in_VCvtI2F
: {
1651 characteriseARM64CvtOp(&syn
, &fszB
, &iszB
, i
->ARM64in
.VCvtI2F
.how
);
1652 vex_printf("%ccvtf ", syn
);
1653 ppHRegARM64(i
->ARM64in
.VCvtI2F
.rD
);
1654 vex_printf("(%c-reg), ", fszB
== 4 ? 'S' : 'D');
1655 ppHRegARM64(i
->ARM64in
.VCvtI2F
.rS
);
1656 vex_printf("(%c-reg)", iszB
== 4 ? 'W' : 'X');
1659 case ARM64in_VCvtF2I
: {
1664 characteriseARM64CvtOp(&syn
, &fszB
, &iszB
, i
->ARM64in
.VCvtF2I
.how
);
1665 UChar armRM
= i
->ARM64in
.VCvtF2I
.armRM
;
1666 if (armRM
< 4) rmo
= "npmz"[armRM
];
1667 vex_printf("fcvt%c%c ", rmo
, syn
);
1668 ppHRegARM64(i
->ARM64in
.VCvtF2I
.rD
);
1669 vex_printf("(%c-reg), ", iszB
== 4 ? 'W' : 'X');
1670 ppHRegARM64(i
->ARM64in
.VCvtF2I
.rS
);
1671 vex_printf("(%c-reg)", fszB
== 4 ? 'S' : 'D');
1674 case ARM64in_VCvtSD
:
1675 vex_printf("fcvt%s ", i
->ARM64in
.VCvtSD
.sToD
? "s2d" : "d2s");
1676 if (i
->ARM64in
.VCvtSD
.sToD
) {
1677 ppHRegARM64(i
->ARM64in
.VCvtSD
.dst
);
1679 ppHRegARM64asSreg(i
->ARM64in
.VCvtSD
.src
);
1681 ppHRegARM64asSreg(i
->ARM64in
.VCvtSD
.dst
);
1683 ppHRegARM64(i
->ARM64in
.VCvtSD
.src
);
1686 case ARM64in_VCvtHS
:
1687 vex_printf("fcvt%s ", i
->ARM64in
.VCvtHS
.hToS
? "h2s" : "s2h");
1688 if (i
->ARM64in
.VCvtHS
.hToS
) {
1689 ppHRegARM64asSreg(i
->ARM64in
.VCvtHS
.dst
);
1691 ppHRegARM64asHreg(i
->ARM64in
.VCvtHS
.src
);
1693 ppHRegARM64asHreg(i
->ARM64in
.VCvtHS
.dst
);
1695 ppHRegARM64asSreg(i
->ARM64in
.VCvtHS
.src
);
1698 case ARM64in_VCvtHD
:
1699 vex_printf("fcvt%s ", i
->ARM64in
.VCvtHD
.hToD
? "h2d" : "d2h");
1700 if (i
->ARM64in
.VCvtHD
.hToD
) {
1701 ppHRegARM64(i
->ARM64in
.VCvtHD
.dst
);
1703 ppHRegARM64asHreg(i
->ARM64in
.VCvtHD
.src
);
1705 ppHRegARM64asHreg(i
->ARM64in
.VCvtHD
.dst
);
1707 ppHRegARM64(i
->ARM64in
.VCvtHD
.src
);
1710 case ARM64in_VUnaryD
:
1711 vex_printf("f%s ", showARM64FpUnaryOp(i
->ARM64in
.VUnaryD
.op
));
1712 ppHRegARM64(i
->ARM64in
.VUnaryD
.dst
);
1714 ppHRegARM64(i
->ARM64in
.VUnaryD
.src
);
1716 case ARM64in_VUnaryS
:
1717 vex_printf("f%s ", showARM64FpUnaryOp(i
->ARM64in
.VUnaryS
.op
));
1718 ppHRegARM64asSreg(i
->ARM64in
.VUnaryS
.dst
);
1720 ppHRegARM64asSreg(i
->ARM64in
.VUnaryS
.src
);
1723 vex_printf("f%s ", showARM64FpBinOp(i
->ARM64in
.VBinD
.op
));
1724 ppHRegARM64(i
->ARM64in
.VBinD
.dst
);
1726 ppHRegARM64(i
->ARM64in
.VBinD
.argL
);
1728 ppHRegARM64(i
->ARM64in
.VBinD
.argR
);
1731 vex_printf("f%s ", showARM64FpBinOp(i
->ARM64in
.VBinS
.op
));
1732 ppHRegARM64asSreg(i
->ARM64in
.VBinS
.dst
);
1734 ppHRegARM64asSreg(i
->ARM64in
.VBinS
.argL
);
1736 ppHRegARM64asSreg(i
->ARM64in
.VBinS
.argR
);
1739 vex_printf("fcmp ");
1740 ppHRegARM64(i
->ARM64in
.VCmpD
.argL
);
1742 ppHRegARM64(i
->ARM64in
.VCmpD
.argR
);
1745 vex_printf("fcmp ");
1746 ppHRegARM64asSreg(i
->ARM64in
.VCmpS
.argL
);
1748 ppHRegARM64asSreg(i
->ARM64in
.VCmpS
.argR
);
1750 case ARM64in_VFCSel
: {
1751 UInt (*ppHRegARM64fp
)(HReg
)
1752 = (i
->ARM64in
.VFCSel
.isD
? ppHRegARM64
: ppHRegARM64asSreg
);
1753 vex_printf("fcsel ");
1754 ppHRegARM64fp(i
->ARM64in
.VFCSel
.dst
);
1756 ppHRegARM64fp(i
->ARM64in
.VFCSel
.argL
);
1758 ppHRegARM64fp(i
->ARM64in
.VFCSel
.argR
);
1759 vex_printf(", %s", showARM64CondCode(i
->ARM64in
.VFCSel
.cond
));
1763 if (i
->ARM64in
.FPCR
.toFPCR
) {
1764 vex_printf("msr fpcr, ");
1765 ppHRegARM64(i
->ARM64in
.FPCR
.iReg
);
1768 ppHRegARM64(i
->ARM64in
.FPCR
.iReg
);
1769 vex_printf(", fpcr");
1773 if (i
->ARM64in
.FPSR
.toFPSR
) {
1774 vex_printf("msr fpsr, ");
1775 ppHRegARM64(i
->ARM64in
.FPSR
.iReg
);
1778 ppHRegARM64(i
->ARM64in
.FPSR
.iReg
);
1779 vex_printf(", fpsr");
1782 case ARM64in_VBinV
: {
1783 const HChar
* nm
= "??";
1784 const HChar
* ar
= "??";
1785 showARM64VecBinOp(&nm
, &ar
, i
->ARM64in
.VBinV
.op
);
1786 vex_printf("%s ", nm
);
1787 ppHRegARM64(i
->ARM64in
.VBinV
.dst
);
1788 vex_printf(".%s, ", ar
);
1789 ppHRegARM64(i
->ARM64in
.VBinV
.argL
);
1790 vex_printf(".%s, ", ar
);
1791 ppHRegARM64(i
->ARM64in
.VBinV
.argR
);
1792 vex_printf(".%s", ar
);
1795 case ARM64in_VModifyV
: {
1796 const HChar
* nm
= "??";
1797 const HChar
* ar
= "??";
1798 showARM64VecModifyOp(&nm
, &ar
, i
->ARM64in
.VModifyV
.op
);
1799 vex_printf("%s ", nm
);
1800 ppHRegARM64(i
->ARM64in
.VModifyV
.mod
);
1801 vex_printf(".%s, ", ar
);
1802 ppHRegARM64(i
->ARM64in
.VModifyV
.arg
);
1803 vex_printf(".%s", ar
);
1806 case ARM64in_VUnaryV
: {
1807 const HChar
* nm
= "??";
1808 const HChar
* ar
= "??";
1809 showARM64VecUnaryOp(&nm
, &ar
, i
->ARM64in
.VUnaryV
.op
);
1810 vex_printf("%s ", nm
);
1811 ppHRegARM64(i
->ARM64in
.VUnaryV
.dst
);
1812 vex_printf(".%s, ", ar
);
1813 ppHRegARM64(i
->ARM64in
.VUnaryV
.arg
);
1814 vex_printf(".%s", ar
);
1817 case ARM64in_VNarrowV
: {
1818 UInt dszBlg2
= i
->ARM64in
.VNarrowV
.dszBlg2
;
1819 const HChar
* darr
[3] = { "8b", "4h", "2s" };
1820 const HChar
* sarr
[3] = { "8h", "4s", "2d" };
1821 const HChar
* nm
= showARM64VecNarrowOp(i
->ARM64in
.VNarrowV
.op
);
1822 vex_printf("%s ", nm
);
1823 ppHRegARM64(i
->ARM64in
.VNarrowV
.dst
);
1824 vex_printf(".%s, ", dszBlg2
< 3 ? darr
[dszBlg2
] : "??");
1825 ppHRegARM64(i
->ARM64in
.VNarrowV
.src
);
1826 vex_printf(".%s", dszBlg2
< 3 ? sarr
[dszBlg2
] : "??");
1829 case ARM64in_VShiftImmV
: {
1830 const HChar
* nm
= "??";
1831 const HChar
* ar
= "??";
1832 showARM64VecShiftImmOp(&nm
, &ar
, i
->ARM64in
.VShiftImmV
.op
);
1833 vex_printf("%s ", nm
);
1834 ppHRegARM64(i
->ARM64in
.VShiftImmV
.dst
);
1835 vex_printf(".%s, ", ar
);
1836 ppHRegARM64(i
->ARM64in
.VShiftImmV
.src
);
1837 vex_printf(".%s, #%u", ar
, i
->ARM64in
.VShiftImmV
.amt
);
1840 case ARM64in_VExtV
: {
1842 ppHRegARM64(i
->ARM64in
.VExtV
.dst
);
1843 vex_printf(".16b, ");
1844 ppHRegARM64(i
->ARM64in
.VExtV
.srcLo
);
1845 vex_printf(".16b, ");
1846 ppHRegARM64(i
->ARM64in
.VExtV
.srcHi
);
1847 vex_printf(".16b, #%u", i
->ARM64in
.VExtV
.amtB
);
1851 vex_printf("qimm ");
1852 ppHRegARM64(i
->ARM64in
.VImmQ
.rQ
);
1853 vex_printf(", Bits16toBytes16(0x%x)", (UInt
)i
->ARM64in
.VImmQ
.imm
);
1855 case ARM64in_VDfromX
:
1856 vex_printf("fmov ");
1857 ppHRegARM64(i
->ARM64in
.VDfromX
.rD
);
1859 ppHRegARM64(i
->ARM64in
.VDfromX
.rX
);
1861 case ARM64in_VQfromX
:
1862 vex_printf("fmov ");
1863 ppHRegARM64(i
->ARM64in
.VQfromX
.rQ
);
1864 vex_printf(".d[0], ");
1865 ppHRegARM64(i
->ARM64in
.VQfromX
.rXlo
);
1867 case ARM64in_VQfromXX
:
1868 vex_printf("qFromXX ");
1869 ppHRegARM64(i
->ARM64in
.VQfromXX
.rQ
);
1871 ppHRegARM64(i
->ARM64in
.VQfromXX
.rXhi
);
1873 ppHRegARM64(i
->ARM64in
.VQfromXX
.rXlo
);
1875 case ARM64in_VXfromQ
:
1876 vex_printf("fmov ");
1877 ppHRegARM64(i
->ARM64in
.VXfromQ
.rX
);
1879 ppHRegARM64(i
->ARM64in
.VXfromQ
.rQ
);
1880 vex_printf(".d[%u]", i
->ARM64in
.VXfromQ
.laneNo
);
1882 case ARM64in_VXfromDorS
:
1883 vex_printf("fmov ");
1884 ppHRegARM64(i
->ARM64in
.VXfromDorS
.rX
);
1885 vex_printf("(%c-reg), ", i
->ARM64in
.VXfromDorS
.fromD
? 'X':'W');
1886 ppHRegARM64(i
->ARM64in
.VXfromDorS
.rDorS
);
1887 vex_printf("(%c-reg)", i
->ARM64in
.VXfromDorS
.fromD
? 'D' : 'S');
1889 case ARM64in_VMov
: {
1891 switch (i
->ARM64in
.VMov
.szB
) {
1892 case 16: aux
= 'q'; break;
1893 case 8: aux
= 'd'; break;
1894 case 4: aux
= 's'; break;
1897 vex_printf("mov(%c) ", aux
);
1898 ppHRegARM64(i
->ARM64in
.VMov
.dst
);
1900 ppHRegARM64(i
->ARM64in
.VMov
.src
);
1903 case ARM64in_EvCheck
:
1904 vex_printf("(evCheck) ldr w9,");
1905 ppARM64AMode(i
->ARM64in
.EvCheck
.amCounter
);
1906 vex_printf("; subs w9,w9,$1; str w9,");
1907 ppARM64AMode(i
->ARM64in
.EvCheck
.amCounter
);
1908 vex_printf("; bpl nofail; ldr x9,");
1909 ppARM64AMode(i
->ARM64in
.EvCheck
.amFailAddr
);
1910 vex_printf("; br x9; nofail:");
1912 case ARM64in_ProfInc
:
1913 vex_printf("(profInc) imm64-fixed4 x9,$NotKnownYet; "
1914 "ldr x8,[x9]; add x8,x8,#1, str x8,[x9]");
1917 vex_printf("ppARM64Instr: unhandled case (tag %d)", (Int
)i
->tag
);
1918 vpanic("ppARM64Instr(1)");
1924 /* --------- Helpers for register allocation. --------- */
1926 void getRegUsage_ARM64Instr ( HRegUsage
* u
, const ARM64Instr
* i
, Bool mode64
)
1928 vassert(mode64
== True
);
1932 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Arith
.dst
);
1933 addHRegUse(u
, HRmRead
, i
->ARM64in
.Arith
.argL
);
1934 addRegUsage_ARM64RIA(u
, i
->ARM64in
.Arith
.argR
);
1937 addHRegUse(u
, HRmRead
, i
->ARM64in
.Cmp
.argL
);
1938 addRegUsage_ARM64RIA(u
, i
->ARM64in
.Cmp
.argR
);
1941 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Logic
.dst
);
1942 addHRegUse(u
, HRmRead
, i
->ARM64in
.Logic
.argL
);
1943 addRegUsage_ARM64RIL(u
, i
->ARM64in
.Logic
.argR
);
1946 addHRegUse(u
, HRmRead
, i
->ARM64in
.Test
.argL
);
1947 addRegUsage_ARM64RIL(u
, i
->ARM64in
.Test
.argR
);
1950 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Shift
.dst
);
1951 addHRegUse(u
, HRmRead
, i
->ARM64in
.Shift
.argL
);
1952 addRegUsage_ARM64RI6(u
, i
->ARM64in
.Shift
.argR
);
1955 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Unary
.dst
);
1956 addHRegUse(u
, HRmRead
, i
->ARM64in
.Unary
.src
);
1959 addHRegUse(u
, HRmWrite
, i
->ARM64in
.MovI
.dst
);
1960 addHRegUse(u
, HRmRead
, i
->ARM64in
.MovI
.src
);
1961 u
->isRegRegMove
= True
;
1962 u
->regMoveSrc
= i
->ARM64in
.MovI
.src
;
1963 u
->regMoveDst
= i
->ARM64in
.MovI
.dst
;
1966 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Imm64
.dst
);
1968 case ARM64in_LdSt64
:
1969 addRegUsage_ARM64AMode(u
, i
->ARM64in
.LdSt64
.amode
);
1970 if (i
->ARM64in
.LdSt64
.isLoad
) {
1971 addHRegUse(u
, HRmWrite
, i
->ARM64in
.LdSt64
.rD
);
1973 addHRegUse(u
, HRmRead
, i
->ARM64in
.LdSt64
.rD
);
1976 case ARM64in_LdSt32
:
1977 addRegUsage_ARM64AMode(u
, i
->ARM64in
.LdSt32
.amode
);
1978 if (i
->ARM64in
.LdSt32
.isLoad
) {
1979 addHRegUse(u
, HRmWrite
, i
->ARM64in
.LdSt32
.rD
);
1981 addHRegUse(u
, HRmRead
, i
->ARM64in
.LdSt32
.rD
);
1984 case ARM64in_LdSt16
:
1985 addRegUsage_ARM64AMode(u
, i
->ARM64in
.LdSt16
.amode
);
1986 if (i
->ARM64in
.LdSt16
.isLoad
) {
1987 addHRegUse(u
, HRmWrite
, i
->ARM64in
.LdSt16
.rD
);
1989 addHRegUse(u
, HRmRead
, i
->ARM64in
.LdSt16
.rD
);
1993 addRegUsage_ARM64AMode(u
, i
->ARM64in
.LdSt8
.amode
);
1994 if (i
->ARM64in
.LdSt8
.isLoad
) {
1995 addHRegUse(u
, HRmWrite
, i
->ARM64in
.LdSt8
.rD
);
1997 addHRegUse(u
, HRmRead
, i
->ARM64in
.LdSt8
.rD
);
2000 /* XDirect/XIndir/XAssisted are also a bit subtle. They
2001 conditionally exit the block. Hence we only need to list (1)
2002 the registers that they read, and (2) the registers that they
2003 write in the case where the block is not exited. (2) is
2004 empty, hence only (1) is relevant here. */
2005 case ARM64in_XDirect
:
2006 addRegUsage_ARM64AMode(u
, i
->ARM64in
.XDirect
.amPC
);
2008 case ARM64in_XIndir
:
2009 addHRegUse(u
, HRmRead
, i
->ARM64in
.XIndir
.dstGA
);
2010 addRegUsage_ARM64AMode(u
, i
->ARM64in
.XIndir
.amPC
);
2012 case ARM64in_XAssisted
:
2013 addHRegUse(u
, HRmRead
, i
->ARM64in
.XAssisted
.dstGA
);
2014 addRegUsage_ARM64AMode(u
, i
->ARM64in
.XAssisted
.amPC
);
2017 addHRegUse(u
, HRmWrite
, i
->ARM64in
.CSel
.dst
);
2018 addHRegUse(u
, HRmRead
, i
->ARM64in
.CSel
.argL
);
2019 addHRegUse(u
, HRmRead
, i
->ARM64in
.CSel
.argR
);
2022 /* logic and comments copied/modified from x86 back end */
2023 /* This is a bit subtle. */
2024 /* First off, claim it trashes all the caller-saved regs
2025 which fall within the register allocator's jurisdiction.
2026 These I believe to be x0 to x7 and the 128-bit vector
2027 registers in use, q16 .. q20. */
2028 addHRegUse(u
, HRmWrite
, hregARM64_X0());
2029 addHRegUse(u
, HRmWrite
, hregARM64_X1());
2030 addHRegUse(u
, HRmWrite
, hregARM64_X2());
2031 addHRegUse(u
, HRmWrite
, hregARM64_X3());
2032 addHRegUse(u
, HRmWrite
, hregARM64_X4());
2033 addHRegUse(u
, HRmWrite
, hregARM64_X5());
2034 addHRegUse(u
, HRmWrite
, hregARM64_X6());
2035 addHRegUse(u
, HRmWrite
, hregARM64_X7());
2036 addHRegUse(u
, HRmWrite
, hregARM64_Q16());
2037 addHRegUse(u
, HRmWrite
, hregARM64_Q17());
2038 addHRegUse(u
, HRmWrite
, hregARM64_Q18());
2039 addHRegUse(u
, HRmWrite
, hregARM64_Q19());
2040 addHRegUse(u
, HRmWrite
, hregARM64_Q20());
2041 /* Now we have to state any parameter-carrying registers
2042 which might be read. This depends on nArgRegs. */
2043 switch (i
->ARM64in
.Call
.nArgRegs
) {
2044 case 8: addHRegUse(u
, HRmRead
, hregARM64_X7()); /*fallthru*/
2045 case 7: addHRegUse(u
, HRmRead
, hregARM64_X6()); /*fallthru*/
2046 case 6: addHRegUse(u
, HRmRead
, hregARM64_X5()); /*fallthru*/
2047 case 5: addHRegUse(u
, HRmRead
, hregARM64_X4()); /*fallthru*/
2048 case 4: addHRegUse(u
, HRmRead
, hregARM64_X3()); /*fallthru*/
2049 case 3: addHRegUse(u
, HRmRead
, hregARM64_X2()); /*fallthru*/
2050 case 2: addHRegUse(u
, HRmRead
, hregARM64_X1()); /*fallthru*/
2051 case 1: addHRegUse(u
, HRmRead
, hregARM64_X0()); break;
2053 default: vpanic("getRegUsage_ARM64:Call:regparms");
2055 /* Finally, there is the issue that the insn trashes a
2056 register because the literal target address has to be
2057 loaded into a register. However, we reserve x9 for that
2058 purpose so there's no further complexity here. Stating x9
2059 as trashed is pointless since it's not under the control
2060 of the allocator, but what the hell. */
2061 addHRegUse(u
, HRmWrite
, hregARM64_X9());
2063 case ARM64in_AddToSP
:
2064 /* Only changes SP, but regalloc doesn't control that, hence
2067 case ARM64in_FromSP
:
2068 addHRegUse(u
, HRmWrite
, i
->ARM64in
.FromSP
.dst
);
2071 addHRegUse(u
, HRmWrite
, i
->ARM64in
.Mul
.dst
);
2072 addHRegUse(u
, HRmRead
, i
->ARM64in
.Mul
.argL
);
2073 addHRegUse(u
, HRmRead
, i
->ARM64in
.Mul
.argR
);
2076 addHRegUse(u
, HRmRead
, hregARM64_X4());
2077 addHRegUse(u
, HRmWrite
, hregARM64_X2());
2080 addHRegUse(u
, HRmRead
, hregARM64_X4());
2081 addHRegUse(u
, HRmWrite
, hregARM64_X0());
2082 addHRegUse(u
, HRmRead
, hregARM64_X2());
2085 addHRegUse(u
, HRmRead
, hregARM64_X3());
2086 addHRegUse(u
, HRmRead
, hregARM64_X5());
2087 addHRegUse(u
, HRmRead
, hregARM64_X7());
2088 addHRegUse(u
, HRmWrite
, hregARM64_X1());
2089 /* Pointless to state this since X8 is not available to RA. */
2090 addHRegUse(u
, HRmWrite
, hregARM64_X8());
2092 case ARM64in_MFence
:
2096 case ARM64in_VLdStH
:
2097 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStH
.rN
);
2098 if (i
->ARM64in
.VLdStH
.isLoad
) {
2099 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VLdStH
.hD
);
2101 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStH
.hD
);
2104 case ARM64in_VLdStS
:
2105 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStS
.rN
);
2106 if (i
->ARM64in
.VLdStS
.isLoad
) {
2107 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VLdStS
.sD
);
2109 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStS
.sD
);
2112 case ARM64in_VLdStD
:
2113 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStD
.rN
);
2114 if (i
->ARM64in
.VLdStD
.isLoad
) {
2115 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VLdStD
.dD
);
2117 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStD
.dD
);
2120 case ARM64in_VLdStQ
:
2121 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStQ
.rN
);
2122 if (i
->ARM64in
.VLdStQ
.isLoad
)
2123 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VLdStQ
.rQ
);
2125 addHRegUse(u
, HRmRead
, i
->ARM64in
.VLdStQ
.rQ
);
2127 case ARM64in_VCvtI2F
:
2128 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCvtI2F
.rS
);
2129 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VCvtI2F
.rD
);
2131 case ARM64in_VCvtF2I
:
2132 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCvtF2I
.rS
);
2133 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VCvtF2I
.rD
);
2135 case ARM64in_VCvtSD
:
2136 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VCvtSD
.dst
);
2137 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCvtSD
.src
);
2139 case ARM64in_VCvtHS
:
2140 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VCvtHS
.dst
);
2141 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCvtHS
.src
);
2143 case ARM64in_VCvtHD
:
2144 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VCvtHD
.dst
);
2145 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCvtHD
.src
);
2147 case ARM64in_VUnaryD
:
2148 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VUnaryD
.dst
);
2149 addHRegUse(u
, HRmRead
, i
->ARM64in
.VUnaryD
.src
);
2151 case ARM64in_VUnaryS
:
2152 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VUnaryS
.dst
);
2153 addHRegUse(u
, HRmRead
, i
->ARM64in
.VUnaryS
.src
);
2156 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VBinD
.dst
);
2157 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinD
.argL
);
2158 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinD
.argR
);
2161 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VBinS
.dst
);
2162 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinS
.argL
);
2163 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinS
.argR
);
2166 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCmpD
.argL
);
2167 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCmpD
.argR
);
2170 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCmpS
.argL
);
2171 addHRegUse(u
, HRmRead
, i
->ARM64in
.VCmpS
.argR
);
2173 case ARM64in_VFCSel
:
2174 addHRegUse(u
, HRmRead
, i
->ARM64in
.VFCSel
.argL
);
2175 addHRegUse(u
, HRmRead
, i
->ARM64in
.VFCSel
.argR
);
2176 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VFCSel
.dst
);
2179 if (i
->ARM64in
.FPCR
.toFPCR
)
2180 addHRegUse(u
, HRmRead
, i
->ARM64in
.FPCR
.iReg
);
2182 addHRegUse(u
, HRmWrite
, i
->ARM64in
.FPCR
.iReg
);
2185 if (i
->ARM64in
.FPSR
.toFPSR
)
2186 addHRegUse(u
, HRmRead
, i
->ARM64in
.FPSR
.iReg
);
2188 addHRegUse(u
, HRmWrite
, i
->ARM64in
.FPSR
.iReg
);
2191 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VBinV
.dst
);
2192 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinV
.argL
);
2193 addHRegUse(u
, HRmRead
, i
->ARM64in
.VBinV
.argR
);
2195 case ARM64in_VModifyV
:
2196 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VModifyV
.mod
);
2197 addHRegUse(u
, HRmRead
, i
->ARM64in
.VModifyV
.mod
);
2198 addHRegUse(u
, HRmRead
, i
->ARM64in
.VModifyV
.arg
);
2200 case ARM64in_VUnaryV
:
2201 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VUnaryV
.dst
);
2202 addHRegUse(u
, HRmRead
, i
->ARM64in
.VUnaryV
.arg
);
2204 case ARM64in_VNarrowV
:
2205 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VNarrowV
.dst
);
2206 addHRegUse(u
, HRmRead
, i
->ARM64in
.VNarrowV
.src
);
2208 case ARM64in_VShiftImmV
:
2209 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VShiftImmV
.dst
);
2210 addHRegUse(u
, HRmRead
, i
->ARM64in
.VShiftImmV
.src
);
2213 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VExtV
.dst
);
2214 addHRegUse(u
, HRmRead
, i
->ARM64in
.VExtV
.srcLo
);
2215 addHRegUse(u
, HRmRead
, i
->ARM64in
.VExtV
.srcHi
);
2218 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VImmQ
.rQ
);
2220 case ARM64in_VDfromX
:
2221 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VDfromX
.rD
);
2222 addHRegUse(u
, HRmRead
, i
->ARM64in
.VDfromX
.rX
);
2224 case ARM64in_VQfromX
:
2225 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VQfromX
.rQ
);
2226 addHRegUse(u
, HRmRead
, i
->ARM64in
.VQfromX
.rXlo
);
2228 case ARM64in_VQfromXX
:
2229 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VQfromXX
.rQ
);
2230 addHRegUse(u
, HRmRead
, i
->ARM64in
.VQfromXX
.rXhi
);
2231 addHRegUse(u
, HRmRead
, i
->ARM64in
.VQfromXX
.rXlo
);
2233 case ARM64in_VXfromQ
:
2234 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VXfromQ
.rX
);
2235 addHRegUse(u
, HRmRead
, i
->ARM64in
.VXfromQ
.rQ
);
2237 case ARM64in_VXfromDorS
:
2238 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VXfromDorS
.rX
);
2239 addHRegUse(u
, HRmRead
, i
->ARM64in
.VXfromDorS
.rDorS
);
2242 addHRegUse(u
, HRmWrite
, i
->ARM64in
.VMov
.dst
);
2243 addHRegUse(u
, HRmRead
, i
->ARM64in
.VMov
.src
);
2244 u
->isRegRegMove
= True
;
2245 u
->regMoveSrc
= i
->ARM64in
.VMov
.src
;
2246 u
->regMoveDst
= i
->ARM64in
.VMov
.dst
;
2248 case ARM64in_EvCheck
:
2249 /* We expect both amodes only to mention x21, so this is in
2250 fact pointless, since x21 isn't allocatable, but
2252 addRegUsage_ARM64AMode(u
, i
->ARM64in
.EvCheck
.amCounter
);
2253 addRegUsage_ARM64AMode(u
, i
->ARM64in
.EvCheck
.amFailAddr
);
2254 addHRegUse(u
, HRmWrite
, hregARM64_X9()); /* also unavail to RA */
2256 case ARM64in_ProfInc
:
2257 /* Again, pointless to actually state these since neither
2258 is available to RA. */
2259 addHRegUse(u
, HRmWrite
, hregARM64_X9()); /* unavail to RA */
2260 addHRegUse(u
, HRmWrite
, hregARM64_X8()); /* unavail to RA */
2264 vpanic("getRegUsage_ARM64Instr");
2269 void mapRegs_ARM64Instr ( HRegRemap
* m
, ARM64Instr
* i
, Bool mode64
)
2271 vassert(mode64
== True
);
2274 i
->ARM64in
.Arith
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Arith
.dst
);
2275 i
->ARM64in
.Arith
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Arith
.argL
);
2276 mapRegs_ARM64RIA(m
, i
->ARM64in
.Arith
.argR
);
2279 i
->ARM64in
.Cmp
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Cmp
.argL
);
2280 mapRegs_ARM64RIA(m
, i
->ARM64in
.Cmp
.argR
);
2283 i
->ARM64in
.Logic
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Logic
.dst
);
2284 i
->ARM64in
.Logic
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Logic
.argL
);
2285 mapRegs_ARM64RIL(m
, i
->ARM64in
.Logic
.argR
);
2288 i
->ARM64in
.Test
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Test
.argL
);
2289 mapRegs_ARM64RIL(m
, i
->ARM64in
.Logic
.argR
);
2292 i
->ARM64in
.Shift
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Shift
.dst
);
2293 i
->ARM64in
.Shift
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Shift
.argL
);
2294 mapRegs_ARM64RI6(m
, i
->ARM64in
.Shift
.argR
);
2297 i
->ARM64in
.Unary
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Unary
.dst
);
2298 i
->ARM64in
.Unary
.src
= lookupHRegRemap(m
, i
->ARM64in
.Unary
.src
);
2301 i
->ARM64in
.MovI
.dst
= lookupHRegRemap(m
, i
->ARM64in
.MovI
.dst
);
2302 i
->ARM64in
.MovI
.src
= lookupHRegRemap(m
, i
->ARM64in
.MovI
.src
);
2305 i
->ARM64in
.Imm64
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Imm64
.dst
);
2307 case ARM64in_LdSt64
:
2308 i
->ARM64in
.LdSt64
.rD
= lookupHRegRemap(m
, i
->ARM64in
.LdSt64
.rD
);
2309 mapRegs_ARM64AMode(m
, i
->ARM64in
.LdSt64
.amode
);
2311 case ARM64in_LdSt32
:
2312 i
->ARM64in
.LdSt32
.rD
= lookupHRegRemap(m
, i
->ARM64in
.LdSt32
.rD
);
2313 mapRegs_ARM64AMode(m
, i
->ARM64in
.LdSt32
.amode
);
2315 case ARM64in_LdSt16
:
2316 i
->ARM64in
.LdSt16
.rD
= lookupHRegRemap(m
, i
->ARM64in
.LdSt16
.rD
);
2317 mapRegs_ARM64AMode(m
, i
->ARM64in
.LdSt16
.amode
);
2320 i
->ARM64in
.LdSt8
.rD
= lookupHRegRemap(m
, i
->ARM64in
.LdSt8
.rD
);
2321 mapRegs_ARM64AMode(m
, i
->ARM64in
.LdSt8
.amode
);
2323 case ARM64in_XDirect
:
2324 mapRegs_ARM64AMode(m
, i
->ARM64in
.XDirect
.amPC
);
2326 case ARM64in_XIndir
:
2327 i
->ARM64in
.XIndir
.dstGA
2328 = lookupHRegRemap(m
, i
->ARM64in
.XIndir
.dstGA
);
2329 mapRegs_ARM64AMode(m
, i
->ARM64in
.XIndir
.amPC
);
2331 case ARM64in_XAssisted
:
2332 i
->ARM64in
.XAssisted
.dstGA
2333 = lookupHRegRemap(m
, i
->ARM64in
.XAssisted
.dstGA
);
2334 mapRegs_ARM64AMode(m
, i
->ARM64in
.XAssisted
.amPC
);
2337 i
->ARM64in
.CSel
.dst
= lookupHRegRemap(m
, i
->ARM64in
.CSel
.dst
);
2338 i
->ARM64in
.CSel
.argL
= lookupHRegRemap(m
, i
->ARM64in
.CSel
.argL
);
2339 i
->ARM64in
.CSel
.argR
= lookupHRegRemap(m
, i
->ARM64in
.CSel
.argR
);
2343 case ARM64in_AddToSP
:
2345 case ARM64in_FromSP
:
2346 i
->ARM64in
.FromSP
.dst
= lookupHRegRemap(m
, i
->ARM64in
.FromSP
.dst
);
2349 i
->ARM64in
.Mul
.dst
= lookupHRegRemap(m
, i
->ARM64in
.Mul
.dst
);
2350 i
->ARM64in
.Mul
.argL
= lookupHRegRemap(m
, i
->ARM64in
.Mul
.argL
);
2351 i
->ARM64in
.Mul
.argR
= lookupHRegRemap(m
, i
->ARM64in
.Mul
.argR
);
2359 case ARM64in_MFence
:
2363 case ARM64in_VLdStH
:
2364 i
->ARM64in
.VLdStH
.hD
= lookupHRegRemap(m
, i
->ARM64in
.VLdStH
.hD
);
2365 i
->ARM64in
.VLdStH
.rN
= lookupHRegRemap(m
, i
->ARM64in
.VLdStH
.rN
);
2367 case ARM64in_VLdStS
:
2368 i
->ARM64in
.VLdStS
.sD
= lookupHRegRemap(m
, i
->ARM64in
.VLdStS
.sD
);
2369 i
->ARM64in
.VLdStS
.rN
= lookupHRegRemap(m
, i
->ARM64in
.VLdStS
.rN
);
2371 case ARM64in_VLdStD
:
2372 i
->ARM64in
.VLdStD
.dD
= lookupHRegRemap(m
, i
->ARM64in
.VLdStD
.dD
);
2373 i
->ARM64in
.VLdStD
.rN
= lookupHRegRemap(m
, i
->ARM64in
.VLdStD
.rN
);
2375 case ARM64in_VLdStQ
:
2376 i
->ARM64in
.VLdStQ
.rQ
= lookupHRegRemap(m
, i
->ARM64in
.VLdStQ
.rQ
);
2377 i
->ARM64in
.VLdStQ
.rN
= lookupHRegRemap(m
, i
->ARM64in
.VLdStQ
.rN
);
2379 case ARM64in_VCvtI2F
:
2380 i
->ARM64in
.VCvtI2F
.rS
= lookupHRegRemap(m
, i
->ARM64in
.VCvtI2F
.rS
);
2381 i
->ARM64in
.VCvtI2F
.rD
= lookupHRegRemap(m
, i
->ARM64in
.VCvtI2F
.rD
);
2383 case ARM64in_VCvtF2I
:
2384 i
->ARM64in
.VCvtF2I
.rS
= lookupHRegRemap(m
, i
->ARM64in
.VCvtF2I
.rS
);
2385 i
->ARM64in
.VCvtF2I
.rD
= lookupHRegRemap(m
, i
->ARM64in
.VCvtF2I
.rD
);
2387 case ARM64in_VCvtSD
:
2388 i
->ARM64in
.VCvtSD
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VCvtSD
.dst
);
2389 i
->ARM64in
.VCvtSD
.src
= lookupHRegRemap(m
, i
->ARM64in
.VCvtSD
.src
);
2391 case ARM64in_VCvtHS
:
2392 i
->ARM64in
.VCvtHS
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VCvtHS
.dst
);
2393 i
->ARM64in
.VCvtHS
.src
= lookupHRegRemap(m
, i
->ARM64in
.VCvtHS
.src
);
2395 case ARM64in_VCvtHD
:
2396 i
->ARM64in
.VCvtHD
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VCvtHD
.dst
);
2397 i
->ARM64in
.VCvtHD
.src
= lookupHRegRemap(m
, i
->ARM64in
.VCvtHD
.src
);
2399 case ARM64in_VUnaryD
:
2400 i
->ARM64in
.VUnaryD
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryD
.dst
);
2401 i
->ARM64in
.VUnaryD
.src
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryD
.src
);
2403 case ARM64in_VUnaryS
:
2404 i
->ARM64in
.VUnaryS
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryS
.dst
);
2405 i
->ARM64in
.VUnaryS
.src
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryS
.src
);
2408 i
->ARM64in
.VBinD
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VBinD
.dst
);
2409 i
->ARM64in
.VBinD
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VBinD
.argL
);
2410 i
->ARM64in
.VBinD
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VBinD
.argR
);
2413 i
->ARM64in
.VBinS
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VBinS
.dst
);
2414 i
->ARM64in
.VBinS
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VBinS
.argL
);
2415 i
->ARM64in
.VBinS
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VBinS
.argR
);
2418 i
->ARM64in
.VCmpD
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VCmpD
.argL
);
2419 i
->ARM64in
.VCmpD
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VCmpD
.argR
);
2422 i
->ARM64in
.VCmpS
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VCmpS
.argL
);
2423 i
->ARM64in
.VCmpS
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VCmpS
.argR
);
2425 case ARM64in_VFCSel
:
2426 i
->ARM64in
.VFCSel
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VFCSel
.argL
);
2427 i
->ARM64in
.VFCSel
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VFCSel
.argR
);
2428 i
->ARM64in
.VFCSel
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VFCSel
.dst
);
2431 i
->ARM64in
.FPCR
.iReg
= lookupHRegRemap(m
, i
->ARM64in
.FPCR
.iReg
);
2434 i
->ARM64in
.FPSR
.iReg
= lookupHRegRemap(m
, i
->ARM64in
.FPSR
.iReg
);
2437 i
->ARM64in
.VBinV
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VBinV
.dst
);
2438 i
->ARM64in
.VBinV
.argL
= lookupHRegRemap(m
, i
->ARM64in
.VBinV
.argL
);
2439 i
->ARM64in
.VBinV
.argR
= lookupHRegRemap(m
, i
->ARM64in
.VBinV
.argR
);
2441 case ARM64in_VModifyV
:
2442 i
->ARM64in
.VModifyV
.mod
= lookupHRegRemap(m
, i
->ARM64in
.VModifyV
.mod
);
2443 i
->ARM64in
.VModifyV
.arg
= lookupHRegRemap(m
, i
->ARM64in
.VModifyV
.arg
);
2445 case ARM64in_VUnaryV
:
2446 i
->ARM64in
.VUnaryV
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryV
.dst
);
2447 i
->ARM64in
.VUnaryV
.arg
= lookupHRegRemap(m
, i
->ARM64in
.VUnaryV
.arg
);
2449 case ARM64in_VNarrowV
:
2450 i
->ARM64in
.VNarrowV
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VNarrowV
.dst
);
2451 i
->ARM64in
.VNarrowV
.src
= lookupHRegRemap(m
, i
->ARM64in
.VNarrowV
.src
);
2453 case ARM64in_VShiftImmV
:
2454 i
->ARM64in
.VShiftImmV
.dst
2455 = lookupHRegRemap(m
, i
->ARM64in
.VShiftImmV
.dst
);
2456 i
->ARM64in
.VShiftImmV
.src
2457 = lookupHRegRemap(m
, i
->ARM64in
.VShiftImmV
.src
);
2460 i
->ARM64in
.VExtV
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VExtV
.dst
);
2461 i
->ARM64in
.VExtV
.srcLo
= lookupHRegRemap(m
, i
->ARM64in
.VExtV
.srcLo
);
2462 i
->ARM64in
.VExtV
.srcHi
= lookupHRegRemap(m
, i
->ARM64in
.VExtV
.srcHi
);
2465 i
->ARM64in
.VImmQ
.rQ
= lookupHRegRemap(m
, i
->ARM64in
.VImmQ
.rQ
);
2467 case ARM64in_VDfromX
:
2468 i
->ARM64in
.VDfromX
.rD
2469 = lookupHRegRemap(m
, i
->ARM64in
.VDfromX
.rD
);
2470 i
->ARM64in
.VDfromX
.rX
2471 = lookupHRegRemap(m
, i
->ARM64in
.VDfromX
.rX
);
2473 case ARM64in_VQfromX
:
2474 i
->ARM64in
.VQfromX
.rQ
2475 = lookupHRegRemap(m
, i
->ARM64in
.VQfromX
.rQ
);
2476 i
->ARM64in
.VQfromX
.rXlo
2477 = lookupHRegRemap(m
, i
->ARM64in
.VQfromX
.rXlo
);
2479 case ARM64in_VQfromXX
:
2480 i
->ARM64in
.VQfromXX
.rQ
2481 = lookupHRegRemap(m
, i
->ARM64in
.VQfromXX
.rQ
);
2482 i
->ARM64in
.VQfromXX
.rXhi
2483 = lookupHRegRemap(m
, i
->ARM64in
.VQfromXX
.rXhi
);
2484 i
->ARM64in
.VQfromXX
.rXlo
2485 = lookupHRegRemap(m
, i
->ARM64in
.VQfromXX
.rXlo
);
2487 case ARM64in_VXfromQ
:
2488 i
->ARM64in
.VXfromQ
.rX
2489 = lookupHRegRemap(m
, i
->ARM64in
.VXfromQ
.rX
);
2490 i
->ARM64in
.VXfromQ
.rQ
2491 = lookupHRegRemap(m
, i
->ARM64in
.VXfromQ
.rQ
);
2493 case ARM64in_VXfromDorS
:
2494 i
->ARM64in
.VXfromDorS
.rX
2495 = lookupHRegRemap(m
, i
->ARM64in
.VXfromDorS
.rX
);
2496 i
->ARM64in
.VXfromDorS
.rDorS
2497 = lookupHRegRemap(m
, i
->ARM64in
.VXfromDorS
.rDorS
);
2500 i
->ARM64in
.VMov
.dst
= lookupHRegRemap(m
, i
->ARM64in
.VMov
.dst
);
2501 i
->ARM64in
.VMov
.src
= lookupHRegRemap(m
, i
->ARM64in
.VMov
.src
);
2503 case ARM64in_EvCheck
:
2504 /* We expect both amodes only to mention x21, so this is in
2505 fact pointless, since x21 isn't allocatable, but
2507 mapRegs_ARM64AMode(m
, i
->ARM64in
.EvCheck
.amCounter
);
2508 mapRegs_ARM64AMode(m
, i
->ARM64in
.EvCheck
.amFailAddr
);
2510 case ARM64in_ProfInc
:
2511 /* hardwires x8 and x9 -- nothing to modify. */
2515 vpanic("mapRegs_ARM64Instr");
2519 /* Generate arm spill/reload instructions under the direction of the
2520 register allocator. Note it's critical these don't write the
2523 void genSpill_ARM64 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
2524 HReg rreg
, Int offsetB
, Bool mode64
)
2527 vassert(offsetB
>= 0);
2528 vassert(!hregIsVirtual(rreg
));
2529 vassert(mode64
== True
);
2531 rclass
= hregClass(rreg
);
2534 vassert(0 == (offsetB
& 7));
2536 vassert(offsetB
< 4096);
2537 *i1
= ARM64Instr_LdSt64(
2540 ARM64AMode_RI12(hregARM64_X21(), offsetB
, 8)
2544 vassert(0 == (offsetB
& 7));
2545 vassert(offsetB
>= 0 && offsetB
< 32768);
2546 *i1
= ARM64Instr_VLdStD(False
/*!isLoad*/,
2547 rreg
, hregARM64_X21(), offsetB
);
2550 HReg x21
= hregARM64_X21(); // baseblock
2551 HReg x9
= hregARM64_X9(); // spill temporary
2552 vassert(0 == (offsetB
& 15)); // check sane alignment
2553 vassert(offsetB
< 4096);
2554 *i1
= ARM64Instr_Arith(x9
, x21
, ARM64RIA_I12(offsetB
, 0), True
);
2555 *i2
= ARM64Instr_VLdStQ(False
/*!isLoad*/, rreg
, x9
);
2559 ppHRegClass(rclass
);
2560 vpanic("genSpill_ARM: unimplemented regclass");
2564 void genReload_ARM64 ( /*OUT*/HInstr
** i1
, /*OUT*/HInstr
** i2
,
2565 HReg rreg
, Int offsetB
, Bool mode64
)
2568 vassert(offsetB
>= 0);
2569 vassert(!hregIsVirtual(rreg
));
2570 vassert(mode64
== True
);
2572 rclass
= hregClass(rreg
);
2575 vassert(0 == (offsetB
& 7));
2577 vassert(offsetB
< 4096);
2578 *i1
= ARM64Instr_LdSt64(
2581 ARM64AMode_RI12(hregARM64_X21(), offsetB
, 8)
2585 vassert(0 == (offsetB
& 7));
2586 vassert(offsetB
>= 0 && offsetB
< 32768);
2587 *i1
= ARM64Instr_VLdStD(True
/*isLoad*/,
2588 rreg
, hregARM64_X21(), offsetB
);
2591 HReg x21
= hregARM64_X21(); // baseblock
2592 HReg x9
= hregARM64_X9(); // spill temporary
2593 vassert(0 == (offsetB
& 15)); // check sane alignment
2594 vassert(offsetB
< 4096);
2595 *i1
= ARM64Instr_Arith(x9
, x21
, ARM64RIA_I12(offsetB
, 0), True
);
2596 *i2
= ARM64Instr_VLdStQ(True
/*isLoad*/, rreg
, x9
);
2600 ppHRegClass(rclass
);
2601 vpanic("genReload_ARM: unimplemented regclass");
2605 ARM64Instr
* genMove_ARM64(HReg from
, HReg to
, Bool mode64
)
2607 switch (hregClass(from
)) {
2609 return ARM64Instr_MovI(to
, from
);
2611 return ARM64Instr_VMov(8, to
, from
);
2613 return ARM64Instr_VMov(16, to
, from
);
2615 ppHRegClass(hregClass(from
));
2616 vpanic("genMove_ARM64: unimplemented regclass");
2621 /* Emit an instruction into buf and return the number of bytes used.
2622 Note that buf is not the insn's final place, and therefore it is
2623 imperative to emit position-independent code. */
2625 static inline UInt
iregEnc ( HReg r
)
2628 vassert(hregClass(r
) == HRcInt64
);
2629 vassert(!hregIsVirtual(r
));
2630 n
= hregEncoding(r
);
2635 static inline UInt
dregEnc ( HReg r
)
2638 vassert(hregClass(r
) == HRcFlt64
);
2639 vassert(!hregIsVirtual(r
));
2640 n
= hregEncoding(r
);
2645 static inline UInt
qregEnc ( HReg r
)
2648 vassert(hregClass(r
) == HRcVec128
);
2649 vassert(!hregIsVirtual(r
));
2650 n
= hregEncoding(r
);
2655 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2656 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2658 #define X00 BITS4(0,0, 0,0)
2659 #define X01 BITS4(0,0, 0,1)
2660 #define X10 BITS4(0,0, 1,0)
2661 #define X11 BITS4(0,0, 1,1)
2663 #define X000 BITS4(0, 0,0,0)
2664 #define X001 BITS4(0, 0,0,1)
2665 #define X010 BITS4(0, 0,1,0)
2666 #define X011 BITS4(0, 0,1,1)
2667 #define X100 BITS4(0, 1,0,0)
2668 #define X101 BITS4(0, 1,0,1)
2669 #define X110 BITS4(0, 1,1,0)
2670 #define X111 BITS4(0, 1,1,1)
2672 #define X0000 BITS4(0,0,0,0)
2673 #define X0001 BITS4(0,0,0,1)
2674 #define X0010 BITS4(0,0,1,0)
2675 #define X0011 BITS4(0,0,1,1)
2677 #define BITS8(zzb7,zzb6,zzb5,zzb4,zzb3,zzb2,zzb1,zzb0) \
2678 ((BITS4(zzb7,zzb6,zzb5,zzb4) << 4) | BITS4(zzb3,zzb2,zzb1,zzb0))
2680 #define X00000 BITS8(0,0,0, 0,0,0,0,0)
2681 #define X00001 BITS8(0,0,0, 0,0,0,0,1)
2682 #define X00110 BITS8(0,0,0, 0,0,1,1,0)
2683 #define X00111 BITS8(0,0,0, 0,0,1,1,1)
2684 #define X01000 BITS8(0,0,0, 0,1,0,0,0)
2685 #define X10000 BITS8(0,0,0, 1,0,0,0,0)
2686 #define X11000 BITS8(0,0,0, 1,1,0,0,0)
2687 #define X11110 BITS8(0,0,0, 1,1,1,1,0)
2688 #define X11111 BITS8(0,0,0, 1,1,1,1,1)
2690 #define X000000 BITS8(0,0, 0,0,0,0,0,0)
2691 #define X000001 BITS8(0,0, 0,0,0,0,0,1)
2692 #define X000010 BITS8(0,0, 0,0,0,0,1,0)
2693 #define X000011 BITS8(0,0, 0,0,0,0,1,1)
2694 #define X000100 BITS8(0,0, 0,0,0,1,0,0)
2695 #define X000110 BITS8(0,0, 0,0,0,1,1,0)
2696 #define X000111 BITS8(0,0, 0,0,0,1,1,1)
2697 #define X001000 BITS8(0,0, 0,0,1,0,0,0)
2698 #define X001001 BITS8(0,0, 0,0,1,0,0,1)
2699 #define X001010 BITS8(0,0, 0,0,1,0,1,0)
2700 #define X001011 BITS8(0,0, 0,0,1,0,1,1)
2701 #define X001101 BITS8(0,0, 0,0,1,1,0,1)
2702 #define X001110 BITS8(0,0, 0,0,1,1,1,0)
2703 #define X001111 BITS8(0,0, 0,0,1,1,1,1)
2704 #define X010000 BITS8(0,0, 0,1,0,0,0,0)
2705 #define X010001 BITS8(0,0, 0,1,0,0,0,1)
2706 #define X010010 BITS8(0,0, 0,1,0,0,1,0)
2707 #define X010011 BITS8(0,0, 0,1,0,0,1,1)
2708 #define X010101 BITS8(0,0, 0,1,0,1,0,1)
2709 #define X010110 BITS8(0,0, 0,1,0,1,1,0)
2710 #define X010111 BITS8(0,0, 0,1,0,1,1,1)
2711 #define X011001 BITS8(0,0, 0,1,1,0,0,1)
2712 #define X011010 BITS8(0,0, 0,1,1,0,1,0)
2713 #define X011011 BITS8(0,0, 0,1,1,0,1,1)
2714 #define X011101 BITS8(0,0, 0,1,1,1,0,1)
2715 #define X011110 BITS8(0,0, 0,1,1,1,1,0)
2716 #define X011111 BITS8(0,0, 0,1,1,1,1,1)
2717 #define X100001 BITS8(0,0, 1,0,0,0,0,1)
2718 #define X100011 BITS8(0,0, 1,0,0,0,1,1)
2719 #define X100100 BITS8(0,0, 1,0,0,1,0,0)
2720 #define X100101 BITS8(0,0, 1,0,0,1,0,1)
2721 #define X100110 BITS8(0,0, 1,0,0,1,1,0)
2722 #define X100111 BITS8(0,0, 1,0,0,1,1,1)
2723 #define X101101 BITS8(0,0, 1,0,1,1,0,1)
2724 #define X101110 BITS8(0,0, 1,0,1,1,1,0)
2725 #define X110000 BITS8(0,0, 1,1,0,0,0,0)
2726 #define X110001 BITS8(0,0, 1,1,0,0,0,1)
2727 #define X110010 BITS8(0,0, 1,1,0,0,1,0)
2728 #define X110100 BITS8(0,0, 1,1,0,1,0,0)
2729 #define X110101 BITS8(0,0, 1,1,0,1,0,1)
2730 #define X110110 BITS8(0,0, 1,1,0,1,1,0)
2731 #define X110111 BITS8(0,0, 1,1,0,1,1,1)
2732 #define X111000 BITS8(0,0, 1,1,1,0,0,0)
2733 #define X111001 BITS8(0,0, 1,1,1,0,0,1)
2734 #define X111101 BITS8(0,0, 1,1,1,1,0,1)
2735 #define X111110 BITS8(0,0, 1,1,1,1,1,0)
2736 #define X111111 BITS8(0,0, 1,1,1,1,1,1)
2738 #define X0001000 BITS8(0, 0,0,0,1,0,0,0)
2739 #define X0010000 BITS8(0, 0,0,1,0,0,0,0)
2740 #define X0100000 BITS8(0, 0,1,0,0,0,0,0)
2741 #define X1000000 BITS8(0, 1,0,0,0,0,0,0)
2743 #define X00100000 BITS8(0,0,1,0,0,0,0,0)
2744 #define X00100001 BITS8(0,0,1,0,0,0,0,1)
2745 #define X00100010 BITS8(0,0,1,0,0,0,1,0)
2746 #define X00100011 BITS8(0,0,1,0,0,0,1,1)
2747 #define X01010000 BITS8(0,1,0,1,0,0,0,0)
2748 #define X01010001 BITS8(0,1,0,1,0,0,0,1)
2749 #define X01010100 BITS8(0,1,0,1,0,1,0,0)
2750 #define X01011000 BITS8(0,1,0,1,1,0,0,0)
2751 #define X01100000 BITS8(0,1,1,0,0,0,0,0)
2752 #define X01100001 BITS8(0,1,1,0,0,0,0,1)
2753 #define X01100010 BITS8(0,1,1,0,0,0,1,0)
2754 #define X01100011 BITS8(0,1,1,0,0,0,1,1)
2755 #define X01110000 BITS8(0,1,1,1,0,0,0,0)
2756 #define X01110001 BITS8(0,1,1,1,0,0,0,1)
2757 #define X01110010 BITS8(0,1,1,1,0,0,1,0)
2758 #define X01110011 BITS8(0,1,1,1,0,0,1,1)
2759 #define X01110100 BITS8(0,1,1,1,0,1,0,0)
2760 #define X01110101 BITS8(0,1,1,1,0,1,0,1)
2761 #define X01110110 BITS8(0,1,1,1,0,1,1,0)
2762 #define X01110111 BITS8(0,1,1,1,0,1,1,1)
2763 #define X11000001 BITS8(1,1,0,0,0,0,0,1)
2764 #define X11000011 BITS8(1,1,0,0,0,0,1,1)
2765 #define X11010100 BITS8(1,1,0,1,0,1,0,0)
2766 #define X11010110 BITS8(1,1,0,1,0,1,1,0)
2767 #define X11011000 BITS8(1,1,0,1,1,0,0,0)
2768 #define X11011010 BITS8(1,1,0,1,1,0,1,0)
2769 #define X11011110 BITS8(1,1,0,1,1,1,1,0)
2770 #define X11100010 BITS8(1,1,1,0,0,0,1,0)
2771 #define X11110001 BITS8(1,1,1,1,0,0,0,1)
2772 #define X11110011 BITS8(1,1,1,1,0,0,1,1)
2773 #define X11110101 BITS8(1,1,1,1,0,1,0,1)
2774 #define X11110111 BITS8(1,1,1,1,0,1,1,1)
2777 /* --- 4 fields --- */
2779 static inline UInt
X_8_19_1_4 ( UInt f1
, UInt f2
, UInt f3
, UInt f4
) {
2780 vassert(8+19+1+4 == 32);
2781 vassert(f1
< (1<<8));
2782 vassert(f2
< (1<<19));
2783 vassert(f3
< (1<<1));
2784 vassert(f4
< (1<<4));
2793 /* --- 5 fields --- */
2795 static inline UInt
X_3_6_2_16_5 ( UInt f1
, UInt f2
,
2796 UInt f3
, UInt f4
, UInt f5
) {
2797 vassert(3+6+2+16+5 == 32);
2798 vassert(f1
< (1<<3));
2799 vassert(f2
< (1<<6));
2800 vassert(f3
< (1<<2));
2801 vassert(f4
< (1<<16));
2802 vassert(f5
< (1<<5));
2812 /* --- 6 fields --- */
2814 static inline UInt
X_2_6_2_12_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2815 UInt f4
, UInt f5
, UInt f6
) {
2816 vassert(2+6+2+12+5+5 == 32);
2817 vassert(f1
< (1<<2));
2818 vassert(f2
< (1<<6));
2819 vassert(f3
< (1<<2));
2820 vassert(f4
< (1<<12));
2821 vassert(f5
< (1<<5));
2822 vassert(f6
< (1<<5));
2833 static inline UInt
X_3_8_5_6_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2834 UInt f4
, UInt f5
, UInt f6
) {
2835 vassert(3+8+5+6+5+5 == 32);
2836 vassert(f1
< (1<<3));
2837 vassert(f2
< (1<<8));
2838 vassert(f3
< (1<<5));
2839 vassert(f4
< (1<<6));
2840 vassert(f5
< (1<<5));
2841 vassert(f6
< (1<<5));
2852 static inline UInt
X_3_5_8_6_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2853 UInt f4
, UInt f5
, UInt f6
) {
2854 vassert(3+8+5+6+5+5 == 32);
2855 vassert(f1
< (1<<3));
2856 vassert(f2
< (1<<5));
2857 vassert(f3
< (1<<8));
2858 vassert(f4
< (1<<6));
2859 vassert(f5
< (1<<5));
2860 vassert(f6
< (1<<5));
2871 static inline UInt
X_3_6_7_6_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2872 UInt f4
, UInt f5
, UInt f6
) {
2873 vassert(3+6+7+6+5+5 == 32);
2874 vassert(f1
< (1<<3));
2875 vassert(f2
< (1<<6));
2876 vassert(f3
< (1<<7));
2877 vassert(f4
< (1<<6));
2878 vassert(f5
< (1<<5));
2879 vassert(f6
< (1<<5));
2890 /* --- 7 fields --- */
2892 static inline UInt
X_2_6_3_9_2_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2893 UInt f4
, UInt f5
, UInt f6
, UInt f7
) {
2894 vassert(2+6+3+9+2+5+5 == 32);
2895 vassert(f1
< (1<<2));
2896 vassert(f2
< (1<<6));
2897 vassert(f3
< (1<<3));
2898 vassert(f4
< (1<<9));
2899 vassert(f5
< (1<<2));
2900 vassert(f6
< (1<<5));
2901 vassert(f7
< (1<<5));
2913 static inline UInt
X_3_6_1_6_6_5_5 ( UInt f1
, UInt f2
, UInt f3
,
2914 UInt f4
, UInt f5
, UInt f6
, UInt f7
) {
2915 vassert(3+6+1+6+6+5+5 == 32);
2916 vassert(f1
< (1<<3));
2917 vassert(f2
< (1<<6));
2918 vassert(f3
< (1<<1));
2919 vassert(f4
< (1<<6));
2920 vassert(f5
< (1<<6));
2921 vassert(f6
< (1<<5));
2922 vassert(f7
< (1<<5));
2935 //ZZ #define X0000 BITS4(0,0,0,0)
2936 //ZZ #define X0001 BITS4(0,0,0,1)
2937 //ZZ #define X0010 BITS4(0,0,1,0)
2938 //ZZ #define X0011 BITS4(0,0,1,1)
2939 //ZZ #define X0100 BITS4(0,1,0,0)
2940 //ZZ #define X0101 BITS4(0,1,0,1)
2941 //ZZ #define X0110 BITS4(0,1,1,0)
2942 //ZZ #define X0111 BITS4(0,1,1,1)
2943 //ZZ #define X1000 BITS4(1,0,0,0)
2944 //ZZ #define X1001 BITS4(1,0,0,1)
2945 //ZZ #define X1010 BITS4(1,0,1,0)
2946 //ZZ #define X1011 BITS4(1,0,1,1)
2947 //ZZ #define X1100 BITS4(1,1,0,0)
2948 //ZZ #define X1101 BITS4(1,1,0,1)
2949 //ZZ #define X1110 BITS4(1,1,1,0)
2950 //ZZ #define X1111 BITS4(1,1,1,1)
2952 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2953 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2954 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2955 (((zzx3) & 0xF) << 12))
2957 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2958 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2959 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2960 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
2962 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
2963 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2964 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2965 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
2967 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2968 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2969 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2970 (((zzx0) & 0xF) << 0))
2972 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
2973 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24) | \
2974 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2975 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
2976 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
2978 #define XX______(zzx7,zzx6) \
2979 ((((zzx7) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2983 /* Get an immediate into a register, using only that register. */
2984 static UInt
* imm64_to_ireg ( UInt
* p
, Int xD
, ULong imm64
)
2987 // This has to be special-cased, since the logic below
2988 // will leave the register unchanged in this case.
2989 // MOVZ xD, #0, LSL #0
2990 *p
++ = X_3_6_2_16_5(X110
, X100101
, X00
, 0/*imm16*/, xD
);
2994 // There must be at least one non-zero halfword. Find the
2995 // lowest nonzero such, and use MOVZ to install it and zero
2996 // out the rest of the register.
2998 h
[3] = (UShort
)((imm64
>> 48) & 0xFFFF);
2999 h
[2] = (UShort
)((imm64
>> 32) & 0xFFFF);
3000 h
[1] = (UShort
)((imm64
>> 16) & 0xFFFF);
3001 h
[0] = (UShort
)((imm64
>> 0) & 0xFFFF);
3004 for (i
= 0; i
< 4; i
++) {
3010 // MOVZ xD, h[i], LSL (16*i)
3011 *p
++ = X_3_6_2_16_5(X110
, X100101
, i
, h
[i
], xD
);
3013 // Work on upwards through h[i], using MOVK to stuff in any
3014 // remaining nonzero elements.
3016 for (; i
< 4; i
++) {
3019 // MOVK xD, h[i], LSL (16*i)
3020 *p
++ = X_3_6_2_16_5(X111
, X100101
, i
, h
[i
], xD
);
3026 /* Get an immediate into a register, using only that register, and
3027 generating exactly 4 instructions, regardless of the value of the
3028 immediate. This is used when generating sections of code that need
3029 to be patched later, so as to guarantee a specific size. */
3030 static UInt
* imm64_to_ireg_EXACTLY4 ( UInt
* p
, Int xD
, ULong imm64
)
3033 h
[3] = (UShort
)((imm64
>> 48) & 0xFFFF);
3034 h
[2] = (UShort
)((imm64
>> 32) & 0xFFFF);
3035 h
[1] = (UShort
)((imm64
>> 16) & 0xFFFF);
3036 h
[0] = (UShort
)((imm64
>> 0) & 0xFFFF);
3037 // Work on upwards through h[i], using MOVK to stuff in the
3038 // remaining elements.
3040 for (i
= 0; i
< 4; i
++) {
3042 // MOVZ xD, h[0], LSL (16*0)
3043 *p
++ = X_3_6_2_16_5(X110
, X100101
, i
, h
[i
], xD
);
3045 // MOVK xD, h[i], LSL (16*i)
3046 *p
++ = X_3_6_2_16_5(X111
, X100101
, i
, h
[i
], xD
);
3052 /* Check whether p points at a 4-insn sequence cooked up by
3053 imm64_to_ireg_EXACTLY4(). */
3054 static Bool
is_imm64_to_ireg_EXACTLY4 ( UInt
* p
, Int xD
, ULong imm64
)
3057 h
[3] = (UShort
)((imm64
>> 48) & 0xFFFF);
3058 h
[2] = (UShort
)((imm64
>> 32) & 0xFFFF);
3059 h
[1] = (UShort
)((imm64
>> 16) & 0xFFFF);
3060 h
[0] = (UShort
)((imm64
>> 0) & 0xFFFF);
3061 // Work on upwards through h[i], using MOVK to stuff in the
3062 // remaining elements.
3064 for (i
= 0; i
< 4; i
++) {
3067 // MOVZ xD, h[0], LSL (16*0)
3068 expected
= X_3_6_2_16_5(X110
, X100101
, i
, h
[i
], xD
);
3070 // MOVK xD, h[i], LSL (16*i)
3071 expected
= X_3_6_2_16_5(X111
, X100101
, i
, h
[i
], xD
);
3073 if (p
[i
] != expected
)
3080 /* Generate a 8 bit store or 8-to-64 unsigned widening load from/to
3081 rD, using the given amode for the address. */
3082 static UInt
* do_load_or_store8 ( UInt
* p
,
3083 Bool isLoad
, UInt wD
, ARM64AMode
* am
)
3086 if (am
->tag
== ARM64am_RI9
) {
3087 /* STURB Wd, [Xn|SP + simm9]: 00 111000 000 simm9 00 n d
3088 LDURB Wd, [Xn|SP + simm9]: 00 111000 010 simm9 00 n d
3090 Int simm9
= am
->ARM64am
.RI9
.simm9
;
3091 vassert(-256 <= simm9
&& simm9
<= 255);
3092 UInt instr
= X_2_6_3_9_2_5_5(X00
, X111000
, isLoad
? X010
: X000
,
3094 iregEnc(am
->ARM64am
.RI9
.reg
), wD
);
3098 if (am
->tag
== ARM64am_RI12
) {
3099 /* STRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 00 imm12 n d
3100 LDRB Wd, [Xn|SP + uimm12 * 1]: 00 111 001 01 imm12 n d
3102 UInt uimm12
= am
->ARM64am
.RI12
.uimm12
;
3103 UInt scale
= am
->ARM64am
.RI12
.szB
;
3104 vassert(scale
== 1); /* failure of this is serious. Do not ignore. */
3105 UInt xN
= iregEnc(am
->ARM64am
.RI12
.reg
);
3107 UInt instr
= X_2_6_2_12_5_5(X00
, X111001
, isLoad
? X01
: X00
,
3112 if (am
->tag
== ARM64am_RR
) {
3113 /* STRB Xd, [Xn|SP, Xm]: 00 111 000 001 m 011 0 10 n d
3114 LDRB Xd, [Xn|SP, Xm]: 00 111 000 011 m 011 0 10 n d
3116 UInt xN
= iregEnc(am
->ARM64am
.RR
.base
);
3117 UInt xM
= iregEnc(am
->ARM64am
.RR
.index
);
3119 UInt instr
= X_3_8_5_6_5_5(X001
, isLoad
? X11000011
: X11000001
,
3120 xM
, X011010
, xN
, wD
);
3124 vpanic("do_load_or_store8");
3129 /* Generate a 16 bit store or 16-to-64 unsigned widening load from/to
3130 rD, using the given amode for the address. */
3131 static UInt
* do_load_or_store16 ( UInt
* p
,
3132 Bool isLoad
, UInt wD
, ARM64AMode
* am
)
3135 if (am
->tag
== ARM64am_RI9
) {
3136 /* STURH Wd, [Xn|SP + simm9]: 01 111000 000 simm9 00 n d
3137 LDURH Wd, [Xn|SP + simm9]: 01 111000 010 simm9 00 n d
3139 Int simm9
= am
->ARM64am
.RI9
.simm9
;
3140 vassert(-256 <= simm9
&& simm9
<= 255);
3141 UInt instr
= X_2_6_3_9_2_5_5(X01
, X111000
, isLoad
? X010
: X000
,
3143 iregEnc(am
->ARM64am
.RI9
.reg
), wD
);
3147 if (am
->tag
== ARM64am_RI12
) {
3148 /* STRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 00 imm12 n d
3149 LDRH Wd, [Xn|SP + uimm12 * 2]: 01 111 001 01 imm12 n d
3151 UInt uimm12
= am
->ARM64am
.RI12
.uimm12
;
3152 UInt scale
= am
->ARM64am
.RI12
.szB
;
3153 vassert(scale
== 2); /* failure of this is serious. Do not ignore. */
3154 UInt xN
= iregEnc(am
->ARM64am
.RI12
.reg
);
3156 UInt instr
= X_2_6_2_12_5_5(X01
, X111001
, isLoad
? X01
: X00
,
3161 if (am
->tag
== ARM64am_RR
) {
3162 /* STRH Xd, [Xn|SP, Xm]: 01 111 000 001 m 011 0 10 n d
3163 LDRH Xd, [Xn|SP, Xm]: 01 111 000 011 m 011 0 10 n d
3165 UInt xN
= iregEnc(am
->ARM64am
.RR
.base
);
3166 UInt xM
= iregEnc(am
->ARM64am
.RR
.index
);
3168 UInt instr
= X_3_8_5_6_5_5(X011
, isLoad
? X11000011
: X11000001
,
3169 xM
, X011010
, xN
, wD
);
3173 vpanic("do_load_or_store16");
3178 /* Generate a 32 bit store or 32-to-64 unsigned widening load from/to
3179 rD, using the given amode for the address. */
3180 static UInt
* do_load_or_store32 ( UInt
* p
,
3181 Bool isLoad
, UInt wD
, ARM64AMode
* am
)
3184 if (am
->tag
== ARM64am_RI9
) {
3185 /* STUR Wd, [Xn|SP + simm9]: 10 111000 000 simm9 00 n d
3186 LDUR Wd, [Xn|SP + simm9]: 10 111000 010 simm9 00 n d
3188 Int simm9
= am
->ARM64am
.RI9
.simm9
;
3189 vassert(-256 <= simm9
&& simm9
<= 255);
3190 UInt instr
= X_2_6_3_9_2_5_5(X10
, X111000
, isLoad
? X010
: X000
,
3192 iregEnc(am
->ARM64am
.RI9
.reg
), wD
);
3196 if (am
->tag
== ARM64am_RI12
) {
3197 /* STR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 00 imm12 n d
3198 LDR Wd, [Xn|SP + uimm12 * 4]: 10 111 001 01 imm12 n d
3200 UInt uimm12
= am
->ARM64am
.RI12
.uimm12
;
3201 UInt scale
= am
->ARM64am
.RI12
.szB
;
3202 vassert(scale
== 4); /* failure of this is serious. Do not ignore. */
3203 UInt xN
= iregEnc(am
->ARM64am
.RI12
.reg
);
3205 UInt instr
= X_2_6_2_12_5_5(X10
, X111001
, isLoad
? X01
: X00
,
3210 if (am
->tag
== ARM64am_RR
) {
3211 /* STR Wd, [Xn|SP, Xm]: 10 111 000 001 m 011 0 10 n d
3212 LDR Wd, [Xn|SP, Xm]: 10 111 000 011 m 011 0 10 n d
3214 UInt xN
= iregEnc(am
->ARM64am
.RR
.base
);
3215 UInt xM
= iregEnc(am
->ARM64am
.RR
.index
);
3217 UInt instr
= X_3_8_5_6_5_5(X101
, isLoad
? X11000011
: X11000001
,
3218 xM
, X011010
, xN
, wD
);
3222 vpanic("do_load_or_store32");
3227 /* Generate a 64 bit load or store to/from xD, using the given amode
3229 static UInt
* do_load_or_store64 ( UInt
* p
,
3230 Bool isLoad
, UInt xD
, ARM64AMode
* am
)
3232 /* In all these cases, Rn can't be 31 since that means SP. */
3234 if (am
->tag
== ARM64am_RI9
) {
3235 /* STUR Xd, [Xn|SP + simm9]: 11 111000 000 simm9 00 n d
3236 LDUR Xd, [Xn|SP + simm9]: 11 111000 010 simm9 00 n d
3238 Int simm9
= am
->ARM64am
.RI9
.simm9
;
3239 vassert(-256 <= simm9
&& simm9
<= 255);
3240 UInt xN
= iregEnc(am
->ARM64am
.RI9
.reg
);
3242 UInt instr
= X_2_6_3_9_2_5_5(X11
, X111000
, isLoad
? X010
: X000
,
3243 simm9
& 0x1FF, X00
, xN
, xD
);
3247 if (am
->tag
== ARM64am_RI12
) {
3248 /* STR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 00 imm12 n d
3249 LDR Xd, [Xn|SP + uimm12 * 8]: 11 111 001 01 imm12 n d
3251 UInt uimm12
= am
->ARM64am
.RI12
.uimm12
;
3252 UInt scale
= am
->ARM64am
.RI12
.szB
;
3253 vassert(scale
== 8); /* failure of this is serious. Do not ignore. */
3254 UInt xN
= iregEnc(am
->ARM64am
.RI12
.reg
);
3256 UInt instr
= X_2_6_2_12_5_5(X11
, X111001
, isLoad
? X01
: X00
,
3261 if (am
->tag
== ARM64am_RR
) {
3262 /* STR Xd, [Xn|SP, Xm]: 11 111 000 001 m 011 0 10 n d
3263 LDR Xd, [Xn|SP, Xm]: 11 111 000 011 m 011 0 10 n d
3265 UInt xN
= iregEnc(am
->ARM64am
.RR
.base
);
3266 UInt xM
= iregEnc(am
->ARM64am
.RR
.index
);
3268 UInt instr
= X_3_8_5_6_5_5(X111
, isLoad
? X11000011
: X11000001
,
3269 xM
, X011010
, xN
, xD
);
3273 vpanic("do_load_or_store64");
3278 /* Emit an instruction into buf and return the number of bytes used.
3279 Note that buf is not the insn's final place, and therefore it is
3280 imperative to emit position-independent code. If the emitted
3281 instruction was a profiler inc, set *is_profInc to True, else
3282 leave it unchanged. */
3284 Int
emit_ARM64Instr ( /*MB_MOD*/Bool
* is_profInc
,
3285 UChar
* buf
, Int nbuf
, const ARM64Instr
* i
,
3286 Bool mode64
, VexEndness endness_host
,
3287 const void* disp_cp_chain_me_to_slowEP
,
3288 const void* disp_cp_chain_me_to_fastEP
,
3289 const void* disp_cp_xindir
,
3290 const void* disp_cp_xassisted
)
3292 UInt
* p
= (UInt
*)buf
;
3293 vassert(nbuf
>= 32);
3294 vassert(mode64
== True
);
3295 vassert(0 == (((HWord
)buf
) & 3));
3298 case ARM64in_Arith
: {
3299 UInt rD
= iregEnc(i
->ARM64in
.Arith
.dst
);
3300 UInt rN
= iregEnc(i
->ARM64in
.Arith
.argL
);
3301 ARM64RIA
* argR
= i
->ARM64in
.Arith
.argR
;
3302 switch (argR
->tag
) {
3304 *p
++ = X_2_6_2_12_5_5(
3305 i
->ARM64in
.Arith
.isAdd
? X10
: X11
,
3307 argR
->ARM64riA
.I12
.shift
== 12 ? X01
: X00
,
3308 argR
->ARM64riA
.I12
.imm12
, rN
, rD
3312 UInt rM
= iregEnc(i
->ARM64in
.Arith
.argR
->ARM64riA
.R
.reg
);
3313 *p
++ = X_3_8_5_6_5_5(
3314 i
->ARM64in
.Arith
.isAdd
? X100
: X110
,
3315 X01011000
, rM
, X000000
, rN
, rD
3325 UInt rD
= 31; /* XZR, we are going to dump the result */
3326 UInt rN
= iregEnc(i
->ARM64in
.Cmp
.argL
);
3327 ARM64RIA
* argR
= i
->ARM64in
.Cmp
.argR
;
3328 Bool is64
= i
->ARM64in
.Cmp
.is64
;
3329 switch (argR
->tag
) {
3331 /* 1 11 10001 sh imm12 Rn Rd = SUBS Xd, Xn, #imm */
3332 /* 0 11 10001 sh imm12 Rn Rd = SUBS Wd, Wn, #imm */
3333 *p
++ = X_2_6_2_12_5_5(
3334 is64
? X11
: X01
, X110001
,
3335 argR
->ARM64riA
.I12
.shift
== 12 ? X01
: X00
,
3336 argR
->ARM64riA
.I12
.imm12
, rN
, rD
);
3339 /* 1 11 01011 00 0 Rm 000000 Rn Rd = SUBS Xd, Xn, Xm */
3340 /* 0 11 01011 00 0 Rm 000000 Rn Rd = SUBS Wd, Wn, Wm */
3341 UInt rM
= iregEnc(i
->ARM64in
.Cmp
.argR
->ARM64riA
.R
.reg
);
3342 *p
++ = X_3_8_5_6_5_5(is64
? X111
: X011
,
3343 X01011000
, rM
, X000000
, rN
, rD
);
3351 case ARM64in_Logic
: {
3352 UInt rD
= iregEnc(i
->ARM64in
.Logic
.dst
);
3353 UInt rN
= iregEnc(i
->ARM64in
.Logic
.argL
);
3354 ARM64RIL
* argR
= i
->ARM64in
.Logic
.argR
;
3355 UInt opc
= 0; /* invalid */
3358 switch (i
->ARM64in
.Logic
.op
) {
3359 case ARM64lo_OR
: opc
= X101
; break;
3360 case ARM64lo_AND
: opc
= X100
; break;
3361 case ARM64lo_XOR
: opc
= X110
; break;
3365 switch (argR
->tag
) {
3366 case ARM64riL_I13
: {
3367 /* 1 01 100100 N immR immS Rn Rd = ORR <Xd|Sp>, Xn, #imm */
3368 /* 1 00 100100 N immR immS Rn Rd = AND <Xd|Sp>, Xn, #imm */
3369 /* 1 10 100100 N immR immS Rn Rd = EOR <Xd|Sp>, Xn, #imm */
3370 *p
++ = X_3_6_1_6_6_5_5(
3371 opc
, X100100
, argR
->ARM64riL
.I13
.bitN
,
3372 argR
->ARM64riL
.I13
.immR
, argR
->ARM64riL
.I13
.immS
,
3378 /* 1 01 01010 00 0 m 000000 n d = ORR Xd, Xn, Xm */
3379 /* 1 00 01010 00 0 m 000000 n d = AND Xd, Xn, Xm */
3380 /* 1 10 01010 00 0 m 000000 n d = EOR Xd, Xn, Xm */
3381 UInt rM
= iregEnc(argR
->ARM64riL
.R
.reg
);
3383 *p
++ = X_3_8_5_6_5_5(opc
, X01010000
, rM
, X000000
, rN
, rD
);
3391 case ARM64in_Test
: {
3392 UInt rD
= 31; /* XZR, we are going to dump the result */
3393 UInt rN
= iregEnc(i
->ARM64in
.Test
.argL
);
3394 ARM64RIL
* argR
= i
->ARM64in
.Test
.argR
;
3395 switch (argR
->tag
) {
3396 case ARM64riL_I13
: {
3397 /* 1 11 100100 N immR immS Rn Rd = ANDS Xd, Xn, #imm */
3398 *p
++ = X_3_6_1_6_6_5_5(
3399 X111
, X100100
, argR
->ARM64riL
.I13
.bitN
,
3400 argR
->ARM64riL
.I13
.immR
, argR
->ARM64riL
.I13
.immS
,
3410 case ARM64in_Shift
: {
3411 UInt rD
= iregEnc(i
->ARM64in
.Shift
.dst
);
3412 UInt rN
= iregEnc(i
->ARM64in
.Shift
.argL
);
3413 ARM64RI6
* argR
= i
->ARM64in
.Shift
.argR
;
3416 switch (argR
->tag
) {
3418 /* 110 1001101 (63-sh) (64-sh) nn dd LSL Xd, Xn, sh */
3419 /* 110 1001101 sh 63 nn dd LSR Xd, Xn, sh */
3420 /* 100 1001101 sh 63 nn dd ASR Xd, Xn, sh */
3421 UInt sh
= argR
->ARM64ri6
.I6
.imm6
;
3422 vassert(sh
> 0 && sh
< 64);
3423 switch (i
->ARM64in
.Shift
.op
) {
3425 *p
++ = X_3_6_1_6_6_5_5(X110
, X100110
,
3426 1, 64-sh
, 63-sh
, rN
, rD
);
3429 *p
++ = X_3_6_1_6_6_5_5(X110
, X100110
, 1, sh
, 63, rN
, rD
);
3432 *p
++ = X_3_6_1_6_6_5_5(X100
, X100110
, 1, sh
, 63, rN
, rD
);
3440 /* 100 1101 0110 mm 001000 nn dd LSL Xd, Xn, Xm */
3441 /* 100 1101 0110 mm 001001 nn dd LSR Xd, Xn, Xm */
3442 /* 100 1101 0110 mm 001010 nn dd ASR Xd, Xn, Xm */
3443 UInt rM
= iregEnc(argR
->ARM64ri6
.R
.reg
);
3446 switch (i
->ARM64in
.Shift
.op
) {
3447 case ARM64sh_SHL
: subOpc
= X001000
; break;
3448 case ARM64sh_SHR
: subOpc
= X001001
; break;
3449 case ARM64sh_SAR
: subOpc
= X001010
; break;
3450 default: vassert(0);
3452 *p
++ = X_3_8_5_6_5_5(X100
, X11010110
, rM
, subOpc
, rN
, rD
);
3460 case ARM64in_Unary
: {
3461 UInt rDst
= iregEnc(i
->ARM64in
.Unary
.dst
);
3462 UInt rSrc
= iregEnc(i
->ARM64in
.Unary
.src
);
3463 switch (i
->ARM64in
.Unary
.op
) {
3465 /* 1 10 1101 0110 00000 00010 0 nn dd CLZ Xd, Xn */
3466 /* 1 10 1101 0110 00000 00010 1 nn dd CLS Xd, Xn (unimp) */
3467 *p
++ = X_3_8_5_6_5_5(X110
,
3468 X11010110
, X00000
, X000100
, rSrc
, rDst
);
3471 /* 1 10 01011 000 m 000000 11111 d NEG Xd,Xm */
3472 /* 0 10 01011 000 m 000000 11111 d NEG Wd,Wm (unimp) */
3473 *p
++ = X_3_8_5_6_5_5(X110
,
3474 X01011000
, rSrc
, X000000
, X11111
, rDst
);
3477 /* 1 01 01010 00 1 m 000000 11111 d MVN Xd,Xm */
3478 *p
++ = X_3_8_5_6_5_5(X101
,
3479 X01010001
, rSrc
, X000000
, X11111
, rDst
);
3487 case ARM64in_MovI
: {
3488 /* We generate the "preferred form", ORR Xd, XZR, Xm
3489 101 01010 00 0 m 000000 11111 d
3491 UInt instr
= 0xAA0003E0;
3492 UInt d
= iregEnc(i
->ARM64in
.MovI
.dst
);
3493 UInt m
= iregEnc(i
->ARM64in
.MovI
.src
);
3494 *p
++ = instr
| ((m
& 31) << 16) | ((d
& 31) << 0);
3497 case ARM64in_Imm64
: {
3498 p
= imm64_to_ireg( p
, iregEnc(i
->ARM64in
.Imm64
.dst
),
3499 i
->ARM64in
.Imm64
.imm64
);
3502 case ARM64in_LdSt64
: {
3503 p
= do_load_or_store64( p
, i
->ARM64in
.LdSt64
.isLoad
,
3504 iregEnc(i
->ARM64in
.LdSt64
.rD
),
3505 i
->ARM64in
.LdSt64
.amode
);
3508 case ARM64in_LdSt32
: {
3509 p
= do_load_or_store32( p
, i
->ARM64in
.LdSt32
.isLoad
,
3510 iregEnc(i
->ARM64in
.LdSt32
.rD
),
3511 i
->ARM64in
.LdSt32
.amode
);
3514 case ARM64in_LdSt16
: {
3515 p
= do_load_or_store16( p
, i
->ARM64in
.LdSt16
.isLoad
,
3516 iregEnc(i
->ARM64in
.LdSt16
.rD
),
3517 i
->ARM64in
.LdSt16
.amode
);
3520 case ARM64in_LdSt8
: {
3521 p
= do_load_or_store8( p
, i
->ARM64in
.LdSt8
.isLoad
,
3522 iregEnc(i
->ARM64in
.LdSt8
.rD
),
3523 i
->ARM64in
.LdSt8
.amode
);
3527 case ARM64in_XDirect
: {
3528 /* NB: what goes on here has to be very closely coordinated
3529 with chainXDirect_ARM64 and unchainXDirect_ARM64 below. */
3530 /* We're generating chain-me requests here, so we need to be
3531 sure this is actually allowed -- no-redir translations
3532 can't use chain-me's. Hence: */
3533 vassert(disp_cp_chain_me_to_slowEP
!= NULL
);
3534 vassert(disp_cp_chain_me_to_fastEP
!= NULL
);
3536 /* Use ptmp for backpatching conditional jumps. */
3539 /* First off, if this is conditional, create a conditional
3540 jump over the rest of it. Or at least, leave a space for
3541 it that we will shortly fill in. */
3542 if (i
->ARM64in
.XDirect
.cond
!= ARM64cc_AL
) {
3543 vassert(i
->ARM64in
.XDirect
.cond
!= ARM64cc_NV
);
3548 /* Update the guest PC. */
3549 /* imm64 x9, dstGA */
3551 p
= imm64_to_ireg(p
, /*x*/9, i
->ARM64in
.XDirect
.dstGA
);
3552 p
= do_load_or_store64(p
, False
/*!isLoad*/,
3553 /*x*/9, i
->ARM64in
.XDirect
.amPC
);
3555 /* --- FIRST PATCHABLE BYTE follows --- */
3556 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3557 calling to) backs up the return address, so as to find the
3558 address of the first patchable byte. So: don't change the
3559 number of instructions (5) below. */
3560 /* movw x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[15:0] */
3561 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[31:15], lsl 16 */
3562 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[47:32], lsl 32 */
3563 /* movk x9, VG_(disp_cp_chain_me_to_{slowEP,fastEP})[63:48], lsl 48 */
3565 const void* disp_cp_chain_me
3566 = i
->ARM64in
.XDirect
.toFastEP
? disp_cp_chain_me_to_fastEP
3567 : disp_cp_chain_me_to_slowEP
;
3568 p
= imm64_to_ireg_EXACTLY4(p
, /*x*/9, (Addr
)disp_cp_chain_me
);
3570 /* --- END of PATCHABLE BYTES --- */
3572 /* Fix up the conditional jump, if there was one. */
3573 if (i
->ARM64in
.XDirect
.cond
!= ARM64cc_AL
) {
3574 Int delta
= (UChar
*)p
- (UChar
*)ptmp
; /* must be signed */
3575 vassert(delta
> 0 && delta
<= 40);
3576 vassert((delta
& 3) == 0);
3577 UInt notCond
= 1 ^ (UInt
)i
->ARM64in
.XDirect
.cond
;
3578 vassert(notCond
<= 13); /* Neither AL nor NV */
3579 vassert(ptmp
!= NULL
);
3581 *ptmp
= X_8_19_1_4(X01010100
, delta
& ((1<<19)-1), 0, notCond
);
3586 case ARM64in_XIndir
: {
3587 // XIndir is more or less the same as XAssisted, except
3588 // we don't have a trc value to hand back, so there's no
3590 /* Use ptmp for backpatching conditional jumps. */
3591 //UInt* ptmp = NULL;
3593 /* First off, if this is conditional, create a conditional
3594 jump over the rest of it. Or at least, leave a space for
3595 it that we will shortly fill in. */
3596 if (i
->ARM64in
.XIndir
.cond
!= ARM64cc_AL
) {
3598 //ZZ vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3603 /* Update the guest PC. */
3604 /* str r-dstGA, amPC */
3605 p
= do_load_or_store64(p
, False
/*!isLoad*/,
3606 iregEnc(i
->ARM64in
.XIndir
.dstGA
),
3607 i
->ARM64in
.XIndir
.amPC
);
3609 /* imm64 x9, VG_(disp_cp_xindir) */
3611 p
= imm64_to_ireg(p
, /*x*/9, (Addr
)disp_cp_xindir
);
3612 *p
++ = 0xD61F0120; /* br x9 */
3614 /* Fix up the conditional jump, if there was one. */
3615 if (i
->ARM64in
.XIndir
.cond
!= ARM64cc_AL
) {
3617 //ZZ Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3618 //ZZ vassert(delta > 0 && delta < 40);
3619 //ZZ vassert((delta & 3) == 0);
3620 //ZZ UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3621 //ZZ vassert(notCond <= 13); /* Neither AL nor NV */
3622 //ZZ delta = (delta >> 2) - 2;
3623 //ZZ *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3628 case ARM64in_XAssisted
: {
3629 /* Use ptmp for backpatching conditional jumps. */
3632 /* First off, if this is conditional, create a conditional
3633 jump over the rest of it. Or at least, leave a space for
3634 it that we will shortly fill in. I think this can only
3635 ever happen when VEX is driven by the switchbacker. */
3636 if (i
->ARM64in
.XAssisted
.cond
!= ARM64cc_AL
) {
3637 vassert(i
->ARM64in
.XDirect
.cond
!= ARM64cc_NV
);
3642 /* Update the guest PC. */
3643 /* str r-dstGA, amPC */
3644 p
= do_load_or_store64(p
, False
/*!isLoad*/,
3645 iregEnc(i
->ARM64in
.XAssisted
.dstGA
),
3646 i
->ARM64in
.XAssisted
.amPC
);
3648 /* movw r21, $magic_number */
3650 switch (i
->ARM64in
.XAssisted
.jk
) {
3651 case Ijk_ClientReq
: trcval
= VEX_TRC_JMP_CLIENTREQ
; break;
3652 case Ijk_Sys_syscall
: trcval
= VEX_TRC_JMP_SYS_SYSCALL
; break;
3653 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3654 case Ijk_Yield
: trcval
= VEX_TRC_JMP_YIELD
; break;
3655 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3656 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3657 case Ijk_NoDecode
: trcval
= VEX_TRC_JMP_NODECODE
; break;
3658 case Ijk_InvalICache
: trcval
= VEX_TRC_JMP_INVALICACHE
; break;
3659 case Ijk_FlushDCache
: trcval
= VEX_TRC_JMP_FLUSHDCACHE
; break;
3660 case Ijk_NoRedir
: trcval
= VEX_TRC_JMP_NOREDIR
; break;
3661 case Ijk_SigTRAP
: trcval
= VEX_TRC_JMP_SIGTRAP
; break;
3662 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3663 case Ijk_Boring
: trcval
= VEX_TRC_JMP_BORING
; break;
3664 /* We don't expect to see the following being assisted. */
3669 ppIRJumpKind(i
->ARM64in
.XAssisted
.jk
);
3670 vpanic("emit_ARM64Instr.ARM64in_XAssisted: "
3671 "unexpected jump kind");
3673 vassert(trcval
!= 0);
3674 p
= imm64_to_ireg(p
, /*x*/21, (ULong
)trcval
);
3676 /* imm64 x9, VG_(disp_cp_xassisted) */
3678 p
= imm64_to_ireg(p
, /*x*/9, (Addr
)disp_cp_xassisted
);
3679 *p
++ = 0xD61F0120; /* br x9 */
3681 /* Fix up the conditional jump, if there was one. */
3682 if (i
->ARM64in
.XAssisted
.cond
!= ARM64cc_AL
) {
3683 Int delta
= (UChar
*)p
- (UChar
*)ptmp
; /* must be signed */
3684 vassert(delta
> 0 && delta
< 40);
3685 vassert((delta
& 3) == 0);
3686 UInt notCond
= 1 ^ (UInt
)i
->ARM64in
.XDirect
.cond
;
3687 vassert(notCond
<= 13); /* Neither AL nor NV */
3688 vassert(ptmp
!= NULL
);
3690 *ptmp
= X_8_19_1_4(X01010100
, delta
& ((1<<19)-1), 0, notCond
);
3695 case ARM64in_CSel
: {
3696 /* 100 1101 0100 mm cond 00 nn dd = CSEL Xd, Xn, Xm, cond */
3697 UInt dd
= iregEnc(i
->ARM64in
.CSel
.dst
);
3698 UInt nn
= iregEnc(i
->ARM64in
.CSel
.argL
);
3699 UInt mm
= iregEnc(i
->ARM64in
.CSel
.argR
);
3700 UInt cond
= (UInt
)i
->ARM64in
.CSel
.cond
;
3701 vassert(dd
< 31 && nn
< 31 && mm
< 31 && cond
< 16);
3702 *p
++ = X_3_8_5_6_5_5(X100
, X11010100
, mm
, cond
<< 2, nn
, dd
);
3706 case ARM64in_Call
: {
3707 /* We'll use x9 as a scratch register to put the target
3709 if (i
->ARM64in
.Call
.cond
!= ARM64cc_AL
3710 && i
->ARM64in
.Call
.rloc
.pri
!= RLPri_None
) {
3711 /* The call might not happen (it isn't unconditional) and
3712 it returns a result. In this case we will need to
3713 generate a control flow diamond to put 0x555..555 in
3714 the return register(s) in the case where the call
3715 doesn't happen. If this ever becomes necessary, maybe
3716 copy code from the 32-bit ARM equivalent. Until that
3717 day, just give up. */
3722 if (i
->ARM64in
.Call
.cond
!= ARM64cc_AL
) {
3723 /* Create a hole to put a conditional branch in. We'll
3724 patch it once we know the branch length. */
3730 p
= imm64_to_ireg( (UInt
*)p
, /*x*/9, (ULong
)i
->ARM64in
.Call
.target
);
3734 // Patch the hole if necessary
3735 if (i
->ARM64in
.Call
.cond
!= ARM64cc_AL
) {
3736 ULong dist
= (ULong
)(p
- ptmp
);
3737 /* imm64_to_ireg produces between 1 and 4 insns, and
3738 then there's the BLR itself. Hence: */
3739 vassert(dist
>= 2 && dist
<= 5);
3740 vassert(ptmp
!= NULL
);
3741 // 01010100 simm19 0 cond = B.cond (here + simm19 << 2)
3742 *ptmp
= X_8_19_1_4(X01010100
, dist
, 0,
3743 1 ^ (UInt
)i
->ARM64in
.Call
.cond
);
3745 vassert(ptmp
== NULL
);
3751 case ARM64in_AddToSP
: {
3752 /* 10,0 10001 00 imm12 11111 11111 ADD xsp, xsp, #imm12
3753 11,0 10001 00 imm12 11111 11111 SUB xsp, xsp, #imm12
3755 Int simm12
= i
->ARM64in
.AddToSP
.simm
;
3756 vassert(-4096 < simm12
&& simm12
< 4096);
3757 vassert(0 == (simm12
& 0xF));
3759 *p
++ = X_2_6_2_12_5_5(X10
, X010001
, X00
, simm12
, X11111
, X11111
);
3761 *p
++ = X_2_6_2_12_5_5(X11
, X010001
, X00
, -simm12
, X11111
, X11111
);
3766 case ARM64in_FromSP
: {
3767 /* 10,0 10001 00 0..(12)..0 11111 dd MOV Xd, xsp */
3768 UInt dd
= iregEnc(i
->ARM64in
.FromSP
.dst
);
3770 *p
++ = X_2_6_2_12_5_5(X10
, X010001
, X00
, 0, X11111
, dd
);
3775 /* 100 11011 110 mm 011111 nn dd UMULH Xd, Xn,Xm
3776 100 11011 010 mm 011111 nn dd SMULH Xd, Xn,Xm
3777 100 11011 000 mm 011111 nn dd MUL Xd, Xn,Xm
3779 UInt dd
= iregEnc(i
->ARM64in
.Mul
.dst
);
3780 UInt nn
= iregEnc(i
->ARM64in
.Mul
.argL
);
3781 UInt mm
= iregEnc(i
->ARM64in
.Mul
.argR
);
3782 vassert(dd
< 31 && nn
< 31 && mm
< 31);
3783 switch (i
->ARM64in
.Mul
.op
) {
3785 *p
++ = X_3_8_5_6_5_5(X100
, X11011110
, mm
, X011111
, nn
, dd
);
3788 *p
++ = X_3_8_5_6_5_5(X100
, X11011010
, mm
, X011111
, nn
, dd
);
3790 case ARM64mul_PLAIN
:
3791 *p
++ = X_3_8_5_6_5_5(X100
, X11011000
, mm
, X011111
, nn
, dd
);
3798 case ARM64in_LdrEX
: {
3799 /* 085F7C82 ldxrb w2, [x4]
3800 485F7C82 ldxrh w2, [x4]
3801 885F7C82 ldxr w2, [x4]
3802 C85F7C82 ldxr x2, [x4]
3804 switch (i
->ARM64in
.LdrEX
.szB
) {
3805 case 1: *p
++ = 0x085F7C82; goto done
;
3806 case 2: *p
++ = 0x485F7C82; goto done
;
3807 case 4: *p
++ = 0x885F7C82; goto done
;
3808 case 8: *p
++ = 0xC85F7C82; goto done
;
3813 case ARM64in_StrEX
: {
3814 /* 08007C82 stxrb w0, w2, [x4]
3815 48007C82 stxrh w0, w2, [x4]
3816 88007C82 stxr w0, w2, [x4]
3817 C8007C82 stxr w0, x2, [x4]
3819 switch (i
->ARM64in
.StrEX
.szB
) {
3820 case 1: *p
++ = 0x08007C82; goto done
;
3821 case 2: *p
++ = 0x48007C82; goto done
;
3822 case 4: *p
++ = 0x88007C82; goto done
;
3823 case 8: *p
++ = 0xC8007C82; goto done
;
3829 /* This isn't simple. For an explanation see the comment in
3830 host_arm64_defs.h on the the definition of ARM64Instr case
3834 mov x8, x5 // AA0503E8
3835 and x8, x5, #0xFFFFFFFF // 92407CA8
3836 and x8, x5, #0xFFFF // 92403CA8
3837 and x8, x5, #0xFF // 92401CA8
3840 ldxr x1, [x3] // C85F7C61
3841 ldxr w1, [x3] // 885F7C61
3842 ldxrh w1, [x3] // 485F7C61
3843 ldxrb w1, [x3] // 085F7C61
3846 cmp x1, x8 // EB08003F
3850 stxr w1, x7, [x3] // C8017C67
3851 stxr w1, w7, [x3] // 88017C67
3852 stxrh w1, w7, [x3] // 48017C67
3853 stxrb w1, w7, [x3] // 08017C67
3856 eor x1, x5, x1 // CA0100A1
3859 switch (i
->ARM64in
.CAS
.szB
) {
3860 case 8: *p
++ = 0xAA0503E8; break;
3861 case 4: *p
++ = 0x92407CA8; break;
3862 case 2: *p
++ = 0x92403CA8; break;
3863 case 1: *p
++ = 0x92401CA8; break;
3864 default: vassert(0);
3866 switch (i
->ARM64in
.CAS
.szB
) {
3867 case 8: *p
++ = 0xC85F7C61; break;
3868 case 4: *p
++ = 0x885F7C61; break;
3869 case 2: *p
++ = 0x485F7C61; break;
3870 case 1: *p
++ = 0x085F7C61; break;
3874 switch (i
->ARM64in
.CAS
.szB
) {
3875 case 8: *p
++ = 0xC8017C67; break;
3876 case 4: *p
++ = 0x88017C67; break;
3877 case 2: *p
++ = 0x48017C67; break;
3878 case 1: *p
++ = 0x08017C67; break;
3883 case ARM64in_MFence
: {
3884 *p
++ = 0xD5033F9F; /* DSB sy */
3885 *p
++ = 0xD5033FBF; /* DMB sy */
3886 *p
++ = 0xD5033FDF; /* ISB */
3889 case ARM64in_ClrEX
: {
3890 *p
++ = 0xD5033F5F; /* clrex #15 */
3893 case ARM64in_VLdStH
: {
3894 /* 01 111101 01 imm12 n t LDR Ht, [Xn|SP, #imm12 * 2]
3895 01 111101 00 imm12 n t STR Ht, [Xn|SP, #imm12 * 2]
3897 UInt hD
= dregEnc(i
->ARM64in
.VLdStH
.hD
);
3898 UInt rN
= iregEnc(i
->ARM64in
.VLdStH
.rN
);
3899 UInt uimm12
= i
->ARM64in
.VLdStH
.uimm12
;
3900 Bool isLD
= i
->ARM64in
.VLdStH
.isLoad
;
3901 vassert(uimm12
< 8192 && 0 == (uimm12
& 1));
3903 vassert(uimm12
< (1<<12));
3906 *p
++ = X_2_6_2_12_5_5(X01
, X111101
, isLD
? X01
: X00
,
3910 case ARM64in_VLdStS
: {
3911 /* 10 111101 01 imm12 n t LDR St, [Xn|SP, #imm12 * 4]
3912 10 111101 00 imm12 n t STR St, [Xn|SP, #imm12 * 4]
3914 UInt sD
= dregEnc(i
->ARM64in
.VLdStS
.sD
);
3915 UInt rN
= iregEnc(i
->ARM64in
.VLdStS
.rN
);
3916 UInt uimm12
= i
->ARM64in
.VLdStS
.uimm12
;
3917 Bool isLD
= i
->ARM64in
.VLdStS
.isLoad
;
3918 vassert(uimm12
< 16384 && 0 == (uimm12
& 3));
3920 vassert(uimm12
< (1<<12));
3923 *p
++ = X_2_6_2_12_5_5(X10
, X111101
, isLD
? X01
: X00
,
3927 case ARM64in_VLdStD
: {
3928 /* 11 111101 01 imm12 n t LDR Dt, [Xn|SP, #imm12 * 8]
3929 11 111101 00 imm12 n t STR Dt, [Xn|SP, #imm12 * 8]
3931 UInt dD
= dregEnc(i
->ARM64in
.VLdStD
.dD
);
3932 UInt rN
= iregEnc(i
->ARM64in
.VLdStD
.rN
);
3933 UInt uimm12
= i
->ARM64in
.VLdStD
.uimm12
;
3934 Bool isLD
= i
->ARM64in
.VLdStD
.isLoad
;
3935 vassert(uimm12
< 32768 && 0 == (uimm12
& 7));
3937 vassert(uimm12
< (1<<12));
3940 *p
++ = X_2_6_2_12_5_5(X11
, X111101
, isLD
? X01
: X00
,
3944 case ARM64in_VLdStQ
: {
3945 /* 0100 1100 0000 0000 0111 11 rN rQ st1 {vQ.2d}, [<rN|SP>]
3946 0100 1100 0100 0000 0111 11 rN rQ ld1 {vQ.2d}, [<rN|SP>]
3948 UInt rQ
= qregEnc(i
->ARM64in
.VLdStQ
.rQ
);
3949 UInt rN
= iregEnc(i
->ARM64in
.VLdStQ
.rN
);
3952 if (i
->ARM64in
.VLdStQ
.isLoad
) {
3953 *p
++ = 0x4C407C00 | (rN
<< 5) | rQ
;
3955 *p
++ = 0x4C007C00 | (rN
<< 5) | rQ
;
3959 case ARM64in_VCvtI2F
: {
3960 /* 31 28 23 21 20 18 15 9 4
3961 000 11110 00 1 00 010 000000 n d SCVTF Sd, Wn
3962 000 11110 01 1 00 010 000000 n d SCVTF Dd, Wn
3963 100 11110 00 1 00 010 000000 n d SCVTF Sd, Xn
3964 100 11110 01 1 00 010 000000 n d SCVTF Dd, Xn
3965 000 11110 00 1 00 011 000000 n d UCVTF Sd, Wn
3966 000 11110 01 1 00 011 000000 n d UCVTF Dd, Wn
3967 100 11110 00 1 00 011 000000 n d UCVTF Sd, Xn
3968 100 11110 01 1 00 011 000000 n d UCVTF Dd, Xn
3970 UInt rN
= iregEnc(i
->ARM64in
.VCvtI2F
.rS
);
3971 UInt rD
= dregEnc(i
->ARM64in
.VCvtI2F
.rD
);
3972 ARM64CvtOp how
= i
->ARM64in
.VCvtI2F
.how
;
3973 /* Just handle cases as they show up. */
3975 case ARM64cvt_F32_I32S
: /* SCVTF Sd, Wn */
3976 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100010
, X000000
, rN
, rD
);
3978 case ARM64cvt_F64_I32S
: /* SCVTF Dd, Wn */
3979 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100010
, X000000
, rN
, rD
);
3981 case ARM64cvt_F32_I64S
: /* SCVTF Sd, Xn */
3982 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X00100010
, X000000
, rN
, rD
);
3984 case ARM64cvt_F64_I64S
: /* SCVTF Dd, Xn */
3985 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X01100010
, X000000
, rN
, rD
);
3987 case ARM64cvt_F32_I32U
: /* UCVTF Sd, Wn */
3988 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100011
, X000000
, rN
, rD
);
3990 case ARM64cvt_F64_I32U
: /* UCVTF Dd, Wn */
3991 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100011
, X000000
, rN
, rD
);
3993 case ARM64cvt_F32_I64U
: /* UCVTF Sd, Xn */
3994 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X00100011
, X000000
, rN
, rD
);
3996 case ARM64cvt_F64_I64U
: /* UCVTF Dd, Xn */
3997 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X01100011
, X000000
, rN
, rD
);
4004 case ARM64in_VCvtF2I
: {
4005 /* 30 23 20 18 15 9 4
4006 sf 00,11110,0x 1 00 000,000000 n d FCVTNS Rd, Fn (round to
4007 sf 00,11110,0x 1 00 001,000000 n d FCVTNU Rd, Fn nearest)
4008 ---------------- 01 -------------- FCVTP-------- (round to +inf)
4009 ---------------- 10 -------------- FCVTM-------- (round to -inf)
4010 ---------------- 11 -------------- FCVTZ-------- (round to zero)
4012 Rd is Xd when sf==1, Wd when sf==0
4013 Fn is Dn when x==1, Sn when x==0
4014 20:19 carry the rounding mode, using the same encoding as FPCR
4016 UInt rD
= iregEnc(i
->ARM64in
.VCvtF2I
.rD
);
4017 UInt rN
= dregEnc(i
->ARM64in
.VCvtF2I
.rS
);
4018 ARM64CvtOp how
= i
->ARM64in
.VCvtF2I
.how
;
4019 UChar armRM
= i
->ARM64in
.VCvtF2I
.armRM
;
4020 /* Just handle cases as they show up. */
4022 case ARM64cvt_F64_I32S
: /* FCVTxS Wd, Dn */
4023 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100000
| (armRM
<< 3),
4026 case ARM64cvt_F64_I32U
: /* FCVTxU Wd, Dn */
4027 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100001
| (armRM
<< 3),
4030 case ARM64cvt_F64_I64S
: /* FCVTxS Xd, Dn */
4031 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X01100000
| (armRM
<< 3),
4034 case ARM64cvt_F64_I64U
: /* FCVTxU Xd, Dn */
4035 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X01100001
| (armRM
<< 3),
4038 case ARM64cvt_F32_I32S
: /* FCVTxS Wd, Sn */
4039 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100000
| (armRM
<< 3),
4042 case ARM64cvt_F32_I32U
: /* FCVTxU Wd, Sn */
4043 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100001
| (armRM
<< 3),
4046 case ARM64cvt_F32_I64S
: /* FCVTxS Xd, Sn */
4047 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X00100000
| (armRM
<< 3),
4050 case ARM64cvt_F32_I64U
: /* FCVTxU Xd, Sn */
4051 *p
++ = X_3_5_8_6_5_5(X100
, X11110
, X00100001
| (armRM
<< 3),
4059 case ARM64in_VCvtSD
: {
4060 /* 31 23 21 16 14 9 4
4061 000,11110, 00 10001 0,1 10000 n d FCVT Dd, Sn (S->D)
4062 ---------- 01 ----- 0,0 --------- FCVT Sd, Dn (D->S)
4063 Rounding, when dst is smaller than src, is per the FPCR.
4065 UInt dd
= dregEnc(i
->ARM64in
.VCvtSD
.dst
);
4066 UInt nn
= dregEnc(i
->ARM64in
.VCvtSD
.src
);
4067 if (i
->ARM64in
.VCvtSD
.sToD
) {
4068 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100010
, X110000
, nn
, dd
);
4070 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100010
, X010000
, nn
, dd
);
4074 case ARM64in_VCvtHS
: {
4075 /* 31 23 21 16 14 9 4
4076 000,11110, 11 10001 0,0 10000 n d FCVT Sd, Hn (H->S)
4077 ---------- 00 ----- 1,1 --------- FCVT Hd, Sn (S->H)
4078 Rounding, when dst is smaller than src, is per the FPCR.
4080 UInt dd
= dregEnc(i
->ARM64in
.VCvtHS
.dst
);
4081 UInt nn
= dregEnc(i
->ARM64in
.VCvtHS
.src
);
4082 if (i
->ARM64in
.VCvtHS
.hToS
) {
4083 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X11100010
, X010000
, nn
, dd
);
4085 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X00100011
, X110000
, nn
, dd
);
4089 case ARM64in_VCvtHD
: {
4090 /* 31 23 21 16 14 9 4
4091 000,11110, 11 10001 0,1 10000 n d FCVT Dd, Hn (H->D)
4092 ---------- 01 ----- 1,1 --------- FCVT Hd, Dn (D->H)
4093 Rounding, when dst is smaller than src, is per the FPCR.
4095 UInt dd
= dregEnc(i
->ARM64in
.VCvtHD
.dst
);
4096 UInt nn
= dregEnc(i
->ARM64in
.VCvtHD
.src
);
4097 if (i
->ARM64in
.VCvtHD
.hToD
) {
4098 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X11100010
, X110000
, nn
, dd
);
4100 *p
++ = X_3_5_8_6_5_5(X000
, X11110
, X01100011
, X110000
, nn
, dd
);
4104 case ARM64in_VUnaryD
: {
4105 /* 31 23 21 16 14 9 4
4106 000,11110 01 1,0000 0,0 10000 n d FMOV Dd, Dn (not handled)
4107 ------------------- 0,1 --------- FABS ------
4108 ------------------- 1,0 --------- FNEG ------
4109 ------------------- 1,1 --------- FSQRT -----
4111 UInt dD
= dregEnc(i
->ARM64in
.VUnaryD
.dst
);
4112 UInt dN
= dregEnc(i
->ARM64in
.VUnaryD
.src
);
4113 UInt b16
= 2; /* impossible */
4114 UInt b15
= 2; /* impossible */
4115 switch (i
->ARM64in
.VUnaryD
.op
) {
4116 case ARM64fpu_NEG
: b16
= 1; b15
= 0; break;
4117 case ARM64fpu_SQRT
: b16
= 1; b15
= 1; break;
4118 case ARM64fpu_ABS
: b16
= 0; b15
= 1; break;
4121 if (b16
< 2 && b15
< 2) {
4122 *p
++ = X_3_8_5_6_5_5(X000
, X11110011
, (X0000
<< 1) | b16
,
4123 (b15
<< 5) | X10000
, dN
, dD
);
4127 000, 11110 01 1,001 11,1 10000 n d FRINTI Dd, Dm (round per FPCR)
4129 if (i
->ARM64in
.VUnaryD
.op
== ARM64fpu_RINT
) {
4130 *p
++ = X_3_8_5_6_5_5(X000
, X11110011
, X00111
, X110000
, dN
, dD
);
4134 010, 11110 11 1,0000 1,1111 10 n d FRECPX Dd, Dm
4136 if (i
->ARM64in
.VUnaryD
.op
== ARM64fpu_RECPX
) {
4137 *p
++ = X_3_8_5_6_5_5(X010
, X11110111
, X00001
, X111110
, dN
, dD
);
4142 case ARM64in_VUnaryS
: {
4143 /* 31 23 21 16 14 9 4
4144 000,11110 00 1,0000 0,0 10000 n d FMOV Sd, Sn (not handled)
4145 ------------------- 0,1 --------- FABS ------
4146 ------------------- 1,0 --------- FNEG ------
4147 ------------------- 1,1 --------- FSQRT -----
4149 UInt sD
= dregEnc(i
->ARM64in
.VUnaryS
.dst
);
4150 UInt sN
= dregEnc(i
->ARM64in
.VUnaryS
.src
);
4151 UInt b16
= 2; /* impossible */
4152 UInt b15
= 2; /* impossible */
4153 switch (i
->ARM64in
.VUnaryS
.op
) {
4154 case ARM64fpu_NEG
: b16
= 1; b15
= 0; break;
4155 case ARM64fpu_SQRT
: b16
= 1; b15
= 1; break;
4156 case ARM64fpu_ABS
: b16
= 0; b15
= 1; break;
4159 if (b16
< 2 && b15
< 2) {
4160 *p
++ = X_3_8_5_6_5_5(X000
, X11110001
, (X0000
<< 1) | b16
,
4161 (b15
<< 5) | X10000
, sN
, sD
);
4165 000, 11110 00 1,001 11,1 10000 n d FRINTI Sd, Sm (round per FPCR)
4167 if (i
->ARM64in
.VUnaryS
.op
== ARM64fpu_RINT
) {
4168 *p
++ = X_3_8_5_6_5_5(X000
, X11110001
, X00111
, X110000
, sN
, sD
);
4172 010, 11110 10 1,0000 1,1111 10 n d FRECPX Sd, Sm
4174 if (i
->ARM64in
.VUnaryS
.op
== ARM64fpu_RECPX
) {
4175 *p
++ = X_3_8_5_6_5_5(X010
, X11110101
, X00001
, X111110
, sN
, sD
);
4180 case ARM64in_VBinD
: {
4181 /* 31 23 20 15 11 9 4
4182 ---------------- 0000 ------ FMUL --------
4183 000 11110 011 m 0001 10 n d FDIV Dd,Dn,Dm
4184 ---------------- 0010 ------ FADD --------
4185 ---------------- 0011 ------ FSUB --------
4187 UInt dD
= dregEnc(i
->ARM64in
.VBinD
.dst
);
4188 UInt dN
= dregEnc(i
->ARM64in
.VBinD
.argL
);
4189 UInt dM
= dregEnc(i
->ARM64in
.VBinD
.argR
);
4190 UInt b1512
= 16; /* impossible */
4191 switch (i
->ARM64in
.VBinD
.op
) {
4192 case ARM64fpb_DIV
: b1512
= X0001
; break;
4193 case ARM64fpb_MUL
: b1512
= X0000
; break;
4194 case ARM64fpb_SUB
: b1512
= X0011
; break;
4195 case ARM64fpb_ADD
: b1512
= X0010
; break;
4198 vassert(b1512
< 16);
4200 = X_3_8_5_6_5_5(X000
, X11110011
, dM
, (b1512
<< 2) | X10
, dN
, dD
);
4203 case ARM64in_VBinS
: {
4204 /* 31 23 20 15 11 9 4
4205 ---------------- 0000 ------ FMUL --------
4206 000 11110 001 m 0001 10 n d FDIV Dd,Dn,Dm
4207 ---------------- 0010 ------ FADD --------
4208 ---------------- 0011 ------ FSUB --------
4210 UInt sD
= dregEnc(i
->ARM64in
.VBinS
.dst
);
4211 UInt sN
= dregEnc(i
->ARM64in
.VBinS
.argL
);
4212 UInt sM
= dregEnc(i
->ARM64in
.VBinS
.argR
);
4213 UInt b1512
= 16; /* impossible */
4214 switch (i
->ARM64in
.VBinS
.op
) {
4215 case ARM64fpb_DIV
: b1512
= X0001
; break;
4216 case ARM64fpb_MUL
: b1512
= X0000
; break;
4217 case ARM64fpb_SUB
: b1512
= X0011
; break;
4218 case ARM64fpb_ADD
: b1512
= X0010
; break;
4221 vassert(b1512
< 16);
4223 = X_3_8_5_6_5_5(X000
, X11110001
, sM
, (b1512
<< 2) | X10
, sN
, sD
);
4226 case ARM64in_VCmpD
: {
4227 /* 000 11110 01 1 m 00 1000 n 00 000 FCMP Dn, Dm */
4228 UInt dN
= dregEnc(i
->ARM64in
.VCmpD
.argL
);
4229 UInt dM
= dregEnc(i
->ARM64in
.VCmpD
.argR
);
4230 *p
++ = X_3_8_5_6_5_5(X000
, X11110011
, dM
, X001000
, dN
, X00000
);
4233 case ARM64in_VCmpS
: {
4234 /* 000 11110 00 1 m 00 1000 n 00 000 FCMP Sn, Sm */
4235 UInt sN
= dregEnc(i
->ARM64in
.VCmpS
.argL
);
4236 UInt sM
= dregEnc(i
->ARM64in
.VCmpS
.argR
);
4237 *p
++ = X_3_8_5_6_5_5(X000
, X11110001
, sM
, X001000
, sN
, X00000
);
4240 case ARM64in_VFCSel
: {
4241 /* 31 23 21 20 15 11 9 5
4242 000 11110 00 1 m cond 11 n d FCSEL Sd,Sn,Sm,cond
4243 000 11110 01 1 m cond 11 n d FCSEL Dd,Dn,Dm,cond
4245 Bool isD
= i
->ARM64in
.VFCSel
.isD
;
4246 UInt dd
= dregEnc(i
->ARM64in
.VFCSel
.dst
);
4247 UInt nn
= dregEnc(i
->ARM64in
.VFCSel
.argL
);
4248 UInt mm
= dregEnc(i
->ARM64in
.VFCSel
.argR
);
4249 UInt cond
= (UInt
)i
->ARM64in
.VFCSel
.cond
;
4251 *p
++ = X_3_8_5_6_5_5(X000
, isD
? X11110011
: X11110001
,
4252 mm
, (cond
<< 2) | X000011
, nn
, dd
);
4255 case ARM64in_FPCR
: {
4256 Bool toFPCR
= i
->ARM64in
.FPCR
.toFPCR
;
4257 UInt iReg
= iregEnc(i
->ARM64in
.FPCR
.iReg
);
4259 /* 0xD51B44 000 Rt MSR fpcr, rT */
4260 *p
++ = 0xD51B4400 | (iReg
& 0x1F);
4263 goto bad
; // FPCR -> iReg case currently ATC
4265 case ARM64in_FPSR
: {
4266 Bool toFPSR
= i
->ARM64in
.FPSR
.toFPSR
;
4267 UInt iReg
= iregEnc(i
->ARM64in
.FPSR
.iReg
);
4269 /* 0xD51B44 001 Rt MSR fpsr, rT */
4270 *p
++ = 0xD51B4420 | (iReg
& 0x1F);
4272 /* 0xD53B44 001 Rt MRS rT, fpsr */
4273 *p
++ = 0xD53B4420 | (iReg
& 0x1F);
4277 case ARM64in_VBinV
: {
4279 010 01110 11 1 m 100001 n d ADD Vd.2d, Vn.2d, Vm.2d
4280 010 01110 10 1 m 100001 n d ADD Vd.4s, Vn.4s, Vm.4s
4281 010 01110 01 1 m 100001 n d ADD Vd.8h, Vn.8h, Vm.8h
4282 010 01110 00 1 m 100001 n d ADD Vd.16b, Vn.16b, Vm.16b
4284 011 01110 11 1 m 100001 n d SUB Vd.2d, Vn.2d, Vm.2d
4285 011 01110 10 1 m 100001 n d SUB Vd.4s, Vn.4s, Vm.4s
4286 011 01110 01 1 m 100001 n d SUB Vd.8h, Vn.8h, Vm.8h
4287 011 01110 00 1 m 100001 n d SUB Vd.16b, Vn.16b, Vm.16b
4289 010 01110 10 1 m 100111 n d MUL Vd.4s, Vn.4s, Vm.4s
4290 010 01110 01 1 m 100111 n d MUL Vd.8h, Vn.8h, Vm.8h
4291 010 01110 00 1 m 100111 n d MUL Vd.16b, Vn.16b, Vm.16b
4293 010 01110 01 1 m 110101 n d FADD Vd.2d, Vn.2d, Vm.2d
4294 010 01110 00 1 m 110101 n d FADD Vd.4s, Vn.4s, Vm.4s
4295 010 01110 11 1 m 110101 n d FSUB Vd.2d, Vn.2d, Vm.2d
4296 010 01110 10 1 m 110101 n d FSUB Vd.4s, Vn.4s, Vm.4s
4298 011 01110 01 1 m 110111 n d FMUL Vd.2d, Vn.2d, Vm.2d
4299 011 01110 00 1 m 110111 n d FMUL Vd.4s, Vn.4s, Vm.4s
4300 011 01110 01 1 m 111111 n d FDIV Vd.2d, Vn.2d, Vm.2d
4301 011 01110 00 1 m 111111 n d FDIV Vd.4s, Vn.4s, Vm.4s
4303 010 01110 01 1 m 111101 n d FMAX Vd.2d, Vn.2d, Vm.2d
4304 010 01110 00 1 m 111101 n d FMAX Vd.4s, Vn.4s, Vm.4s
4305 010 01110 11 1 m 111101 n d FMIN Vd.2d, Vn.2d, Vm.2d
4306 010 01110 10 1 m 111101 n d FMIN Vd.4s, Vn.4s, Vm.4s
4308 011 01110 10 1 m 011001 n d UMAX Vd.4s, Vn.4s, Vm.4s
4309 011 01110 01 1 m 011001 n d UMAX Vd.8h, Vn.8h, Vm.8h
4310 011 01110 00 1 m 011001 n d UMAX Vd.16b, Vn.16b, Vm.16b
4312 011 01110 10 1 m 011011 n d UMIN Vd.4s, Vn.4s, Vm.4s
4313 011 01110 01 1 m 011011 n d UMIN Vd.8h, Vn.8h, Vm.8h
4314 011 01110 00 1 m 011011 n d UMIN Vd.16b, Vn.16b, Vm.16b
4316 010 01110 10 1 m 011001 n d SMAX Vd.4s, Vn.4s, Vm.4s
4317 010 01110 01 1 m 011001 n d SMAX Vd.8h, Vn.8h, Vm.8h
4318 010 01110 00 1 m 011001 n d SMAX Vd.16b, Vn.16b, Vm.16b
4320 010 01110 10 1 m 011011 n d SMIN Vd.4s, Vn.4s, Vm.4s
4321 010 01110 01 1 m 011011 n d SMIN Vd.8h, Vn.8h, Vm.8h
4322 010 01110 00 1 m 011011 n d SMIN Vd.16b, Vn.16b, Vm.16b
4324 010 01110 00 1 m 000111 n d AND Vd, Vn, Vm
4325 010 01110 10 1 m 000111 n d ORR Vd, Vn, Vm
4326 011 01110 00 1 m 000111 n d EOR Vd, Vn, Vm
4328 011 01110 11 1 m 100011 n d CMEQ Vd.2d, Vn.2d, Vm.2d
4329 011 01110 10 1 m 100011 n d CMEQ Vd.4s, Vn.4s, Vm.4s
4330 011 01110 01 1 m 100011 n d CMEQ Vd.8h, Vn.8h, Vm.8h
4331 011 01110 00 1 m 100011 n d CMEQ Vd.16b, Vn.16b, Vm.16b
4333 011 01110 11 1 m 001101 n d CMHI Vd.2d, Vn.2d, Vm.2d
4334 011 01110 10 1 m 001101 n d CMHI Vd.4s, Vn.4s, Vm.4s
4335 011 01110 01 1 m 001101 n d CMHI Vd.8h, Vn.8h, Vm.8h
4336 011 01110 00 1 m 001101 n d CMHI Vd.16b, Vn.16b, Vm.16b
4338 010 01110 11 1 m 001101 n d CMGT Vd.2d, Vn.2d, Vm.2d
4339 010 01110 10 1 m 001101 n d CMGT Vd.4s, Vn.4s, Vm.4s
4340 010 01110 01 1 m 001101 n d CMGT Vd.8h, Vn.8h, Vm.8h
4341 010 01110 00 1 m 001101 n d CMGT Vd.16b, Vn.16b, Vm.16b
4343 010 01110 01 1 m 111001 n d FCMEQ Vd.2d, Vn.2d, Vm.2d
4344 010 01110 00 1 m 111001 n d FCMEQ Vd.4s, Vn.4s, Vm.4s
4346 011 01110 01 1 m 111001 n d FCMGE Vd.2d, Vn.2d, Vm.2d
4347 011 01110 00 1 m 111001 n d FCMGE Vd.4s, Vn.4s, Vm.4s
4349 011 01110 11 1 m 111001 n d FCMGT Vd.2d, Vn.2d, Vm.2d
4350 011 01110 10 1 m 111001 n d FCMGT Vd.4s, Vn.4s, Vm.4s
4352 010 01110 00 0 m 000000 n d TBL Vd.16b, {Vn.16b}, Vm.16b
4354 010 01110 11 0 m 000110 n d UZP1 Vd.2d, Vn.2d, Vm.2d
4355 010 01110 10 0 m 000110 n d UZP1 Vd.4s, Vn.4s, Vm.4s
4356 010 01110 01 0 m 000110 n d UZP1 Vd.8h, Vn.8h, Vm.8h
4357 010 01110 00 0 m 000110 n d UZP1 Vd.16b, Vn.16b, Vm.16b
4359 010 01110 11 0 m 010110 n d UZP2 Vd.2d, Vn.2d, Vm.2d
4360 010 01110 10 0 m 010110 n d UZP2 Vd.4s, Vn.4s, Vm.4s
4361 010 01110 01 0 m 010110 n d UZP2 Vd.8h, Vn.8h, Vm.8h
4362 010 01110 00 0 m 010110 n d UZP2 Vd.16b, Vn.16b, Vm.16b
4364 010 01110 10 0 m 001110 n d ZIP1 Vd.4s, Vn.4s, Vm.4s
4365 010 01110 01 0 m 001110 n d ZIP1 Vd.8h, Vn.8h, Vm.8h
4366 010 01110 10 0 m 001110 n d ZIP1 Vd.16b, Vn.16b, Vm.16b
4368 010 01110 10 0 m 011110 n d ZIP2 Vd.4s, Vn.4s, Vm.4s
4369 010 01110 01 0 m 011110 n d ZIP2 Vd.8h, Vn.8h, Vm.8h
4370 010 01110 10 0 m 011110 n d ZIP2 Vd.16b, Vn.16b, Vm.16b
4372 011 01110 00 1 m 100111 n d PMUL Vd.16b, Vn.16b, Vm.16b
4374 000 01110 00 1 m 111000 n d PMULL Vd.8h, Vn.8b, Vm.8b
4376 001 01110 10 1 m 110000 n d UMULL Vd.2d, Vn.2s, Vm.2s
4377 001 01110 01 1 m 110000 n d UMULL Vd.4s, Vn.4h, Vm.4h
4378 001 01110 00 1 m 110000 n d UMULL Vd.8h, Vn.8b, Vm.8b
4380 000 01110 10 1 m 110000 n d SMULL Vd.2d, Vn.2s, Vm.2s
4381 000 01110 01 1 m 110000 n d SMULL Vd.4s, Vn.4h, Vm.4h
4382 000 01110 00 1 m 110000 n d SMULL Vd.8h, Vn.8b, Vm.8b
4384 010 01110 11 1 m 000011 n d SQADD Vd.2d, Vn.2d, Vm.2d
4385 010 01110 10 1 m 000011 n d SQADD Vd.4s, Vn.4s, Vm.4s
4386 010 01110 01 1 m 000011 n d SQADD Vd.8h, Vn.8h, Vm.8h
4387 010 01110 00 1 m 000011 n d SQADD Vd.16b, Vn.16b, Vm.16b
4389 011 01110 11 1 m 000011 n d UQADD Vd.2d, Vn.2d, Vm.2d
4390 011 01110 10 1 m 000011 n d UQADD Vd.4s, Vn.4s, Vm.4s
4391 011 01110 01 1 m 000011 n d UQADD Vd.8h, Vn.8h, Vm.8h
4392 011 01110 00 1 m 000011 n d UQADD Vd.16b, Vn.16b, Vm.16b
4394 010 01110 11 1 m 001011 n d SQSUB Vd.2d, Vn.2d, Vm.2d
4395 010 01110 10 1 m 001011 n d SQSUB Vd.4s, Vn.4s, Vm.4s
4396 010 01110 01 1 m 001011 n d SQSUB Vd.8h, Vn.8h, Vm.8h
4397 010 01110 00 1 m 001011 n d SQSUB Vd.16b, Vn.16b, Vm.16b
4399 011 01110 11 1 m 001011 n d UQSUB Vd.2d, Vn.2d, Vm.2d
4400 011 01110 10 1 m 001011 n d UQSUB Vd.4s, Vn.4s, Vm.4s
4401 011 01110 01 1 m 001011 n d UQSUB Vd.8h, Vn.8h, Vm.8h
4402 011 01110 00 1 m 001011 n d UQSUB Vd.16b, Vn.16b, Vm.16b
4404 000 01110 10 1 m 110100 n d SQDMULL Vd.2d, Vn.2s, Vm.2s
4405 000 01110 01 1 m 110100 n d SQDMULL Vd.4s, Vn.4h, Vm.4h
4407 010 01110 10 1 m 101101 n d SQDMULH Vd.4s, Vn.4s, Vm.4s
4408 010 01110 01 1 m 101101 n d SQDMULH Vd.8h, Vn.8h, Vm.8h
4409 011 01110 10 1 m 101101 n d SQRDMULH Vd.4s, Vn.4s, Vm.4s
4410 011 01110 10 1 m 101101 n d SQRDMULH Vd.8h, Vn.8h, Vm.8h
4412 010 01110 sz 1 m 010011 n d SQSHL@sz Vd, Vn, Vm
4413 010 01110 sz 1 m 010111 n d SQRSHL@sz Vd, Vn, Vm
4414 011 01110 sz 1 m 010011 n d UQSHL@sz Vd, Vn, Vm
4415 011 01110 sz 1 m 010111 n d URQSHL@sz Vd, Vn, Vm
4417 010 01110 sz 1 m 010001 n d SSHL@sz Vd, Vn, Vm
4418 010 01110 sz 1 m 010101 n d SRSHL@sz Vd, Vn, Vm
4419 011 01110 sz 1 m 010001 n d USHL@sz Vd, Vn, Vm
4420 011 01110 sz 1 m 010101 n d URSHL@sz Vd, Vn, Vm
4422 010 01110 01 1 m 111111 n d FRECPS Vd.2d, Vn.2d, Vm.2d
4423 010 01110 00 1 m 111111 n d FRECPS Vd.4s, Vn.4s, Vm.4s
4424 010 01110 11 1 m 111111 n d FRSQRTS Vd.2d, Vn.2d, Vm.2d
4425 010 01110 10 1 m 111111 n d FRSQRTS Vd.4s, Vn.4s, Vm.4s
4427 UInt vD
= qregEnc(i
->ARM64in
.VBinV
.dst
);
4428 UInt vN
= qregEnc(i
->ARM64in
.VBinV
.argL
);
4429 UInt vM
= qregEnc(i
->ARM64in
.VBinV
.argR
);
4430 switch (i
->ARM64in
.VBinV
.op
) {
4431 case ARM64vecb_ADD64x2
:
4432 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X100001
, vN
, vD
);
4434 case ARM64vecb_ADD32x4
:
4435 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X100001
, vN
, vD
);
4437 case ARM64vecb_ADD16x8
:
4438 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X100001
, vN
, vD
);
4440 case ARM64vecb_ADD8x16
:
4441 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X100001
, vN
, vD
);
4443 case ARM64vecb_SUB64x2
:
4444 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X100001
, vN
, vD
);
4446 case ARM64vecb_SUB32x4
:
4447 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X100001
, vN
, vD
);
4449 case ARM64vecb_SUB16x8
:
4450 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X100001
, vN
, vD
);
4452 case ARM64vecb_SUB8x16
:
4453 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X100001
, vN
, vD
);
4455 case ARM64vecb_MUL32x4
:
4456 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X100111
, vN
, vD
);
4458 case ARM64vecb_MUL16x8
:
4459 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X100111
, vN
, vD
);
4461 case ARM64vecb_MUL8x16
:
4462 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X100111
, vN
, vD
);
4464 case ARM64vecb_FADD64x2
:
4465 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X110101
, vN
, vD
);
4467 case ARM64vecb_FADD32x4
:
4468 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X110101
, vN
, vD
);
4470 case ARM64vecb_FSUB64x2
:
4471 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X110101
, vN
, vD
);
4473 case ARM64vecb_FSUB32x4
:
4474 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X110101
, vN
, vD
);
4476 case ARM64vecb_FMUL64x2
:
4477 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X110111
, vN
, vD
);
4479 case ARM64vecb_FMUL32x4
:
4480 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X110111
, vN
, vD
);
4482 case ARM64vecb_FDIV64x2
:
4483 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X111111
, vN
, vD
);
4485 case ARM64vecb_FDIV32x4
:
4486 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X111111
, vN
, vD
);
4489 case ARM64vecb_FMAX64x2
:
4490 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X111101
, vN
, vD
);
4492 case ARM64vecb_FMAX32x4
:
4493 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X111101
, vN
, vD
);
4495 case ARM64vecb_FMIN64x2
:
4496 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X111101
, vN
, vD
);
4498 case ARM64vecb_FMIN32x4
:
4499 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X111101
, vN
, vD
);
4502 case ARM64vecb_UMAX32x4
:
4503 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X011001
, vN
, vD
);
4505 case ARM64vecb_UMAX16x8
:
4506 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X011001
, vN
, vD
);
4508 case ARM64vecb_UMAX8x16
:
4509 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X011001
, vN
, vD
);
4512 case ARM64vecb_UMIN32x4
:
4513 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X011011
, vN
, vD
);
4515 case ARM64vecb_UMIN16x8
:
4516 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X011011
, vN
, vD
);
4518 case ARM64vecb_UMIN8x16
:
4519 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X011011
, vN
, vD
);
4522 case ARM64vecb_SMAX32x4
:
4523 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X011001
, vN
, vD
);
4525 case ARM64vecb_SMAX16x8
:
4526 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X011001
, vN
, vD
);
4528 case ARM64vecb_SMAX8x16
:
4529 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X011001
, vN
, vD
);
4532 case ARM64vecb_SMIN32x4
:
4533 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X011011
, vN
, vD
);
4535 case ARM64vecb_SMIN16x8
:
4536 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X011011
, vN
, vD
);
4538 case ARM64vecb_SMIN8x16
:
4539 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X011011
, vN
, vD
);
4543 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X000111
, vN
, vD
);
4546 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X000111
, vN
, vD
);
4549 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X000111
, vN
, vD
);
4552 case ARM64vecb_CMEQ64x2
:
4553 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X100011
, vN
, vD
);
4555 case ARM64vecb_CMEQ32x4
:
4556 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X100011
, vN
, vD
);
4558 case ARM64vecb_CMEQ16x8
:
4559 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X100011
, vN
, vD
);
4561 case ARM64vecb_CMEQ8x16
:
4562 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X100011
, vN
, vD
);
4565 case ARM64vecb_CMHI64x2
:
4566 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X001101
, vN
, vD
);
4568 case ARM64vecb_CMHI32x4
:
4569 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X001101
, vN
, vD
);
4571 case ARM64vecb_CMHI16x8
:
4572 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X001101
, vN
, vD
);
4574 case ARM64vecb_CMHI8x16
:
4575 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X001101
, vN
, vD
);
4578 case ARM64vecb_CMGT64x2
:
4579 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X001101
, vN
, vD
);
4581 case ARM64vecb_CMGT32x4
:
4582 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X001101
, vN
, vD
);
4584 case ARM64vecb_CMGT16x8
:
4585 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X001101
, vN
, vD
);
4587 case ARM64vecb_CMGT8x16
:
4588 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X001101
, vN
, vD
);
4591 case ARM64vecb_FCMEQ64x2
:
4592 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X111001
, vN
, vD
);
4594 case ARM64vecb_FCMEQ32x4
:
4595 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X111001
, vN
, vD
);
4598 case ARM64vecb_FCMGE64x2
:
4599 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X111001
, vN
, vD
);
4601 case ARM64vecb_FCMGE32x4
:
4602 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X111001
, vN
, vD
);
4605 case ARM64vecb_FCMGT64x2
:
4606 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X111001
, vN
, vD
);
4608 case ARM64vecb_FCMGT32x4
:
4609 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X111001
, vN
, vD
);
4612 case ARM64vecb_TBL1
:
4613 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
, vM
, X000000
, vN
, vD
);
4616 case ARM64vecb_UZP164x2
:
4617 *p
++ = X_3_8_5_6_5_5(X010
, X01110110
, vM
, X000110
, vN
, vD
);
4619 case ARM64vecb_UZP132x4
:
4620 *p
++ = X_3_8_5_6_5_5(X010
, X01110100
, vM
, X000110
, vN
, vD
);
4622 case ARM64vecb_UZP116x8
:
4623 *p
++ = X_3_8_5_6_5_5(X010
, X01110010
, vM
, X000110
, vN
, vD
);
4625 case ARM64vecb_UZP18x16
:
4626 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
, vM
, X000110
, vN
, vD
);
4629 case ARM64vecb_UZP264x2
:
4630 *p
++ = X_3_8_5_6_5_5(X010
, X01110110
, vM
, X010110
, vN
, vD
);
4632 case ARM64vecb_UZP232x4
:
4633 *p
++ = X_3_8_5_6_5_5(X010
, X01110100
, vM
, X010110
, vN
, vD
);
4635 case ARM64vecb_UZP216x8
:
4636 *p
++ = X_3_8_5_6_5_5(X010
, X01110010
, vM
, X010110
, vN
, vD
);
4638 case ARM64vecb_UZP28x16
:
4639 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
, vM
, X010110
, vN
, vD
);
4642 case ARM64vecb_ZIP132x4
:
4643 *p
++ = X_3_8_5_6_5_5(X010
, X01110100
, vM
, X001110
, vN
, vD
);
4645 case ARM64vecb_ZIP116x8
:
4646 *p
++ = X_3_8_5_6_5_5(X010
, X01110010
, vM
, X001110
, vN
, vD
);
4648 case ARM64vecb_ZIP18x16
:
4649 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
, vM
, X001110
, vN
, vD
);
4652 case ARM64vecb_ZIP232x4
:
4653 *p
++ = X_3_8_5_6_5_5(X010
, X01110100
, vM
, X011110
, vN
, vD
);
4655 case ARM64vecb_ZIP216x8
:
4656 *p
++ = X_3_8_5_6_5_5(X010
, X01110010
, vM
, X011110
, vN
, vD
);
4658 case ARM64vecb_ZIP28x16
:
4659 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
, vM
, X011110
, vN
, vD
);
4662 case ARM64vecb_PMUL8x16
:
4663 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X100111
, vN
, vD
);
4666 case ARM64vecb_PMULL8x8
:
4667 *p
++ = X_3_8_5_6_5_5(X000
, X01110001
, vM
, X111000
, vN
, vD
);
4670 case ARM64vecb_UMULL2DSS
:
4671 *p
++ = X_3_8_5_6_5_5(X001
, X01110101
, vM
, X110000
, vN
, vD
);
4673 case ARM64vecb_UMULL4SHH
:
4674 *p
++ = X_3_8_5_6_5_5(X001
, X01110011
, vM
, X110000
, vN
, vD
);
4676 case ARM64vecb_UMULL8HBB
:
4677 *p
++ = X_3_8_5_6_5_5(X001
, X01110001
, vM
, X110000
, vN
, vD
);
4680 case ARM64vecb_SMULL2DSS
:
4681 *p
++ = X_3_8_5_6_5_5(X000
, X01110101
, vM
, X110000
, vN
, vD
);
4683 case ARM64vecb_SMULL4SHH
:
4684 *p
++ = X_3_8_5_6_5_5(X000
, X01110011
, vM
, X110000
, vN
, vD
);
4686 case ARM64vecb_SMULL8HBB
:
4687 *p
++ = X_3_8_5_6_5_5(X000
, X01110001
, vM
, X110000
, vN
, vD
);
4690 case ARM64vecb_SQADD64x2
:
4691 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X000011
, vN
, vD
);
4693 case ARM64vecb_SQADD32x4
:
4694 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X000011
, vN
, vD
);
4696 case ARM64vecb_SQADD16x8
:
4697 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X000011
, vN
, vD
);
4699 case ARM64vecb_SQADD8x16
:
4700 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X000011
, vN
, vD
);
4703 case ARM64vecb_UQADD64x2
:
4704 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X000011
, vN
, vD
);
4706 case ARM64vecb_UQADD32x4
:
4707 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X000011
, vN
, vD
);
4709 case ARM64vecb_UQADD16x8
:
4710 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X000011
, vN
, vD
);
4712 case ARM64vecb_UQADD8x16
:
4713 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X000011
, vN
, vD
);
4716 case ARM64vecb_SQSUB64x2
:
4717 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X001011
, vN
, vD
);
4719 case ARM64vecb_SQSUB32x4
:
4720 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X001011
, vN
, vD
);
4722 case ARM64vecb_SQSUB16x8
:
4723 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X001011
, vN
, vD
);
4725 case ARM64vecb_SQSUB8x16
:
4726 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X001011
, vN
, vD
);
4729 case ARM64vecb_UQSUB64x2
:
4730 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X001011
, vN
, vD
);
4732 case ARM64vecb_UQSUB32x4
:
4733 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X001011
, vN
, vD
);
4735 case ARM64vecb_UQSUB16x8
:
4736 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X001011
, vN
, vD
);
4738 case ARM64vecb_UQSUB8x16
:
4739 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X001011
, vN
, vD
);
4742 case ARM64vecb_SQDMULL2DSS
:
4743 *p
++ = X_3_8_5_6_5_5(X000
, X01110101
, vM
, X110100
, vN
, vD
);
4745 case ARM64vecb_SQDMULL4SHH
:
4746 *p
++ = X_3_8_5_6_5_5(X000
, X01110011
, vM
, X110100
, vN
, vD
);
4749 case ARM64vecb_SQDMULH32x4
:
4750 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X101101
, vN
, vD
);
4752 case ARM64vecb_SQDMULH16x8
:
4753 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X101101
, vN
, vD
);
4755 case ARM64vecb_SQRDMULH32x4
:
4756 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X101101
, vN
, vD
);
4758 case ARM64vecb_SQRDMULH16x8
:
4759 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X101101
, vN
, vD
);
4762 case ARM64vecb_SQSHL64x2
:
4763 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X010011
, vN
, vD
);
4765 case ARM64vecb_SQSHL32x4
:
4766 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X010011
, vN
, vD
);
4768 case ARM64vecb_SQSHL16x8
:
4769 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X010011
, vN
, vD
);
4771 case ARM64vecb_SQSHL8x16
:
4772 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X010011
, vN
, vD
);
4775 case ARM64vecb_SQRSHL64x2
:
4776 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X010111
, vN
, vD
);
4778 case ARM64vecb_SQRSHL32x4
:
4779 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X010111
, vN
, vD
);
4781 case ARM64vecb_SQRSHL16x8
:
4782 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X010111
, vN
, vD
);
4784 case ARM64vecb_SQRSHL8x16
:
4785 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X010111
, vN
, vD
);
4788 case ARM64vecb_UQSHL64x2
:
4789 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X010011
, vN
, vD
);
4791 case ARM64vecb_UQSHL32x4
:
4792 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X010011
, vN
, vD
);
4794 case ARM64vecb_UQSHL16x8
:
4795 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X010011
, vN
, vD
);
4797 case ARM64vecb_UQSHL8x16
:
4798 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X010011
, vN
, vD
);
4801 case ARM64vecb_UQRSHL64x2
:
4802 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X010111
, vN
, vD
);
4804 case ARM64vecb_UQRSHL32x4
:
4805 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X010111
, vN
, vD
);
4807 case ARM64vecb_UQRSHL16x8
:
4808 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X010111
, vN
, vD
);
4810 case ARM64vecb_UQRSHL8x16
:
4811 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X010111
, vN
, vD
);
4814 case ARM64vecb_SSHL64x2
:
4815 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X010001
, vN
, vD
);
4817 case ARM64vecb_SSHL32x4
:
4818 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X010001
, vN
, vD
);
4820 case ARM64vecb_SSHL16x8
:
4821 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X010001
, vN
, vD
);
4823 case ARM64vecb_SSHL8x16
:
4824 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X010001
, vN
, vD
);
4827 case ARM64vecb_SRSHL64x2
:
4828 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X010101
, vN
, vD
);
4830 case ARM64vecb_SRSHL32x4
:
4831 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X010101
, vN
, vD
);
4833 case ARM64vecb_SRSHL16x8
:
4834 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X010101
, vN
, vD
);
4836 case ARM64vecb_SRSHL8x16
:
4837 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X010101
, vN
, vD
);
4840 case ARM64vecb_USHL64x2
:
4841 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X010001
, vN
, vD
);
4843 case ARM64vecb_USHL32x4
:
4844 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X010001
, vN
, vD
);
4846 case ARM64vecb_USHL16x8
:
4847 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X010001
, vN
, vD
);
4849 case ARM64vecb_USHL8x16
:
4850 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X010001
, vN
, vD
);
4853 case ARM64vecb_URSHL64x2
:
4854 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, vM
, X010101
, vN
, vD
);
4856 case ARM64vecb_URSHL32x4
:
4857 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, vM
, X010101
, vN
, vD
);
4859 case ARM64vecb_URSHL16x8
:
4860 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, vM
, X010101
, vN
, vD
);
4862 case ARM64vecb_URSHL8x16
:
4863 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, vM
, X010101
, vN
, vD
);
4866 case ARM64vecb_FRECPS64x2
:
4867 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, vM
, X111111
, vN
, vD
);
4869 case ARM64vecb_FRECPS32x4
:
4870 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, vM
, X111111
, vN
, vD
);
4872 case ARM64vecb_FRSQRTS64x2
:
4873 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, vM
, X111111
, vN
, vD
);
4875 case ARM64vecb_FRSQRTS32x4
:
4876 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, vM
, X111111
, vN
, vD
);
4884 case ARM64in_VModifyV
: {
4886 010 01110 sz 1 00000 001110 n d SUQADD@sz Vd, Vn
4887 011 01110 sz 1 00000 001110 n d USQADD@sz Vd, Vn
4889 UInt vD
= qregEnc(i
->ARM64in
.VModifyV
.mod
);
4890 UInt vN
= qregEnc(i
->ARM64in
.VModifyV
.arg
);
4891 switch (i
->ARM64in
.VModifyV
.op
) {
4892 case ARM64vecmo_SUQADD64x2
:
4893 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, X00000
, X001110
, vN
, vD
);
4895 case ARM64vecmo_SUQADD32x4
:
4896 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00000
, X001110
, vN
, vD
);
4898 case ARM64vecmo_SUQADD16x8
:
4899 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, X00000
, X001110
, vN
, vD
);
4901 case ARM64vecmo_SUQADD8x16
:
4902 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X001110
, vN
, vD
);
4904 case ARM64vecmo_USQADD64x2
:
4905 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, X00000
, X001110
, vN
, vD
);
4907 case ARM64vecmo_USQADD32x4
:
4908 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00000
, X001110
, vN
, vD
);
4910 case ARM64vecmo_USQADD16x8
:
4911 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, X00000
, X001110
, vN
, vD
);
4913 case ARM64vecmo_USQADD8x16
:
4914 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, X00000
, X001110
, vN
, vD
);
4921 case ARM64in_VUnaryV
: {
4923 010 01110 11 1 00000 111110 n d FABS Vd.2d, Vn.2d
4924 010 01110 10 1 00000 111110 n d FABS Vd.4s, Vn.4s
4925 011 01110 11 1 00000 111110 n d FNEG Vd.2d, Vn.2d
4926 011 01110 10 1 00000 111110 n d FNEG Vd.4s, Vn.4s
4927 011 01110 00 1 00000 010110 n d NOT Vd.16b, Vn.16b
4929 010 01110 11 1 00000 101110 n d ABS Vd.2d, Vn.2d
4930 010 01110 10 1 00000 101110 n d ABS Vd.4s, Vn.4s
4931 010 01110 01 1 00000 101110 n d ABS Vd.8h, Vn.8h
4932 010 01110 00 1 00000 101110 n d ABS Vd.16b, Vn.16b
4934 010 01110 10 1 00000 010010 n d CLS Vd.4s, Vn.4s
4935 010 01110 01 1 00000 010010 n d CLS Vd.8h, Vn.8h
4936 010 01110 00 1 00000 010010 n d CLS Vd.16b, Vn.16b
4938 011 01110 10 1 00000 010010 n d CLZ Vd.4s, Vn.4s
4939 011 01110 01 1 00000 010010 n d CLZ Vd.8h, Vn.8h
4940 011 01110 00 1 00000 010010 n d CLZ Vd.16b, Vn.16b
4942 010 01110 00 1 00000 010110 n d CNT Vd.16b, Vn.16b
4944 011 01110 01 1 00000 010110 n d RBIT Vd.16b, Vn.16b
4945 010 01110 00 1 00000 000110 n d REV16 Vd.16b, Vn.16b
4946 011 01110 00 1 00000 000010 n d REV32 Vd.16b, Vn.16b
4947 011 01110 01 1 00000 000010 n d REV32 Vd.8h, Vn.8h
4949 010 01110 00 1 00000 000010 n d REV64 Vd.16b, Vn.16b
4950 010 01110 01 1 00000 000010 n d REV64 Vd.8h, Vn.8h
4951 010 01110 10 1 00000 000010 n d REV64 Vd.4s, Vn.4s
4953 010 01110 10 1 00001 110010 n d URECPE Vd.4s, Vn.4s
4954 011 01110 10 1 00001 110010 n d URSQRTE Vd.4s, Vn.4s
4956 010 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
4957 010 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
4959 011 01110 11 1 00001 110110 n d FRECPE Vd.2d, Vn.2d
4960 011 01110 10 1 00001 110110 n d FRECPE Vd.4s, Vn.4s
4962 011 01110 11 1 00001 111110 n d FSQRT Vd.2d, Vn.2d
4963 011 01110 10 1 00001 111110 n d FSQRT Vd.4s, Vn.4s
4965 UInt vD
= qregEnc(i
->ARM64in
.VUnaryV
.dst
);
4966 UInt vN
= qregEnc(i
->ARM64in
.VUnaryV
.arg
);
4967 switch (i
->ARM64in
.VUnaryV
.op
) {
4968 case ARM64vecu_FABS64x2
:
4969 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, X00000
, X111110
, vN
, vD
);
4971 case ARM64vecu_FABS32x4
:
4972 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00000
, X111110
, vN
, vD
);
4974 case ARM64vecu_FNEG64x2
:
4975 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, X00000
, X111110
, vN
, vD
);
4977 case ARM64vecu_FNEG32x4
:
4978 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00000
, X111110
, vN
, vD
);
4981 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, X00000
, X010110
, vN
, vD
);
4983 case ARM64vecu_ABS64x2
:
4984 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, X00000
, X101110
, vN
, vD
);
4986 case ARM64vecu_ABS32x4
:
4987 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00000
, X101110
, vN
, vD
);
4989 case ARM64vecu_ABS16x8
:
4990 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, X00000
, X101110
, vN
, vD
);
4992 case ARM64vecu_ABS8x16
:
4993 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X101110
, vN
, vD
);
4995 case ARM64vecu_CLS32x4
:
4996 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00000
, X010010
, vN
, vD
);
4998 case ARM64vecu_CLS16x8
:
4999 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, X00000
, X010010
, vN
, vD
);
5001 case ARM64vecu_CLS8x16
:
5002 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X010010
, vN
, vD
);
5004 case ARM64vecu_CLZ32x4
:
5005 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00000
, X010010
, vN
, vD
);
5007 case ARM64vecu_CLZ16x8
:
5008 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, X00000
, X010010
, vN
, vD
);
5010 case ARM64vecu_CLZ8x16
:
5011 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, X00000
, X010010
, vN
, vD
);
5013 case ARM64vecu_CNT8x16
:
5014 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X010110
, vN
, vD
);
5016 case ARM64vecu_RBIT
:
5017 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, X00000
, X010110
, vN
, vD
);
5019 case ARM64vecu_REV1616B
:
5020 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X000110
, vN
, vD
);
5022 case ARM64vecu_REV3216B
:
5023 *p
++ = X_3_8_5_6_5_5(X011
, X01110001
, X00000
, X000010
, vN
, vD
);
5025 case ARM64vecu_REV328H
:
5026 *p
++ = X_3_8_5_6_5_5(X011
, X01110011
, X00000
, X000010
, vN
, vD
);
5028 case ARM64vecu_REV6416B
:
5029 *p
++ = X_3_8_5_6_5_5(X010
, X01110001
, X00000
, X000010
, vN
, vD
);
5031 case ARM64vecu_REV648H
:
5032 *p
++ = X_3_8_5_6_5_5(X010
, X01110011
, X00000
, X000010
, vN
, vD
);
5034 case ARM64vecu_REV644S
:
5035 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00000
, X000010
, vN
, vD
);
5037 case ARM64vecu_URECPE32x4
:
5038 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00001
, X110010
, vN
, vD
);
5040 case ARM64vecu_URSQRTE32x4
:
5041 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00001
, X110010
, vN
, vD
);
5043 case ARM64vecu_FRECPE64x2
:
5044 *p
++ = X_3_8_5_6_5_5(X010
, X01110111
, X00001
, X110110
, vN
, vD
);
5046 case ARM64vecu_FRECPE32x4
:
5047 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, X00001
, X110110
, vN
, vD
);
5049 case ARM64vecu_FRSQRTE64x2
:
5050 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, X00001
, X110110
, vN
, vD
);
5052 case ARM64vecu_FRSQRTE32x4
:
5053 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00001
, X110110
, vN
, vD
);
5055 case ARM64vecu_FSQRT64x2
:
5056 *p
++ = X_3_8_5_6_5_5(X011
, X01110111
, X00001
, X111110
, vN
, vD
);
5058 case ARM64vecu_FSQRT32x4
:
5059 *p
++ = X_3_8_5_6_5_5(X011
, X01110101
, X00001
, X111110
, vN
, vD
);
5066 case ARM64in_VNarrowV
: {
5068 000 01110 00 1,00001 001010 n d XTN Vd.8b, Vn.8h
5069 000 01110 01 1,00001 001010 n d XTN Vd.4h, Vn.4s
5070 000 01110 10 1,00001 001010 n d XTN Vd.2s, Vn.2d
5072 001 01110 00 1,00001 001010 n d SQXTUN Vd.8b, Vn.8h
5073 001 01110 01 1,00001 001010 n d SQXTUN Vd.4h, Vn.4s
5074 001 01110 10 1,00001 001010 n d SQXTUN Vd.2s, Vn.2d
5076 000 01110 00 1,00001 010010 n d SQXTN Vd.8b, Vn.8h
5077 000 01110 01 1,00001 010010 n d SQXTN Vd.4h, Vn.4s
5078 000 01110 10 1,00001 010010 n d SQXTN Vd.2s, Vn.2d
5080 001 01110 00 1,00001 010010 n d UQXTN Vd.8b, Vn.8h
5081 001 01110 01 1,00001 010010 n d UQXTN Vd.4h, Vn.4s
5082 001 01110 10 1,00001 010010 n d UQXTN Vd.2s, Vn.2d
5084 UInt vD
= qregEnc(i
->ARM64in
.VNarrowV
.dst
);
5085 UInt vN
= qregEnc(i
->ARM64in
.VNarrowV
.src
);
5086 UInt dszBlg2
= i
->ARM64in
.VNarrowV
.dszBlg2
;
5087 vassert(dszBlg2
>= 0 && dszBlg2
<= 2);
5088 switch (i
->ARM64in
.VNarrowV
.op
) {
5089 case ARM64vecna_XTN
:
5090 *p
++ = X_3_8_5_6_5_5(X000
, X01110001
| (dszBlg2
<< 1),
5091 X00001
, X001010
, vN
, vD
);
5093 case ARM64vecna_SQXTUN
:
5094 *p
++ = X_3_8_5_6_5_5(X001
, X01110001
| (dszBlg2
<< 1),
5095 X00001
, X001010
, vN
, vD
);
5097 case ARM64vecna_SQXTN
:
5098 *p
++ = X_3_8_5_6_5_5(X000
, X01110001
| (dszBlg2
<< 1),
5099 X00001
, X010010
, vN
, vD
);
5101 case ARM64vecna_UQXTN
:
5102 *p
++ = X_3_8_5_6_5_5(X001
, X01110001
| (dszBlg2
<< 1),
5103 X00001
, X010010
, vN
, vD
);
5110 case ARM64in_VShiftImmV
: {
5112 011 011110 immh immb 000001 n d USHR Vd.T, Vn.T, #sh
5113 010 011110 immh immb 000001 n d SSHR Vd.T, Vn.T, #sh
5115 001 011110 immh immb 100101 n d UQSHRN ,,#sh
5116 000 011110 immh immb 100101 n d SQSHRN ,,#sh
5117 001 011110 immh immb 100001 n d SQSHRUN ,,#sh
5119 001 011110 immh immb 100111 n d UQRSHRN ,,#sh
5120 000 011110 immh immb 100111 n d SQRSHRN ,,#sh
5121 001 011110 immh immb 100011 n d SQRSHRUN ,,#sh
5125 2d | sh in 1..64 -> let xxxxxx = 64-sh in 1xxx:xxx
5126 4s | sh in 1..32 -> let xxxxx = 32-sh in 01xx:xxx
5127 8h | sh in 1..16 -> let xxxx = 16-sh in 001x:xxx
5128 16b | sh in 1..8 -> let xxx = 8-sh in 0001:xxx
5130 010 011110 immh immb 010101 n d SHL Vd.T, Vn.T, #sh
5132 011 011110 immh immb 011101 n d UQSHL Vd.T, Vn.T, #sh
5133 010 011110 immh immb 011101 n d SQSHL Vd.T, Vn.T, #sh
5134 011 011110 immh immb 011001 n d SQSHLU Vd.T, Vn.T, #sh
5138 2d | sh in 0..63 -> let xxxxxx = sh in 1xxx:xxx
5139 4s | sh in 0..31 -> let xxxxx = sh in 01xx:xxx
5140 8h | sh in 0..15 -> let xxxx = sh in 001x:xxx
5141 16b | sh in 0..7 -> let xxx = sh in 0001:xxx
5143 UInt vD
= qregEnc(i
->ARM64in
.VShiftImmV
.dst
);
5144 UInt vN
= qregEnc(i
->ARM64in
.VShiftImmV
.src
);
5145 UInt sh
= i
->ARM64in
.VShiftImmV
.amt
;
5146 UInt tmpl
= 0; /* invalid */
5148 const UInt tmpl_USHR
5149 = X_3_6_7_6_5_5(X011
, X011110
, 0, X000001
, vN
, vD
);
5150 const UInt tmpl_SSHR
5151 = X_3_6_7_6_5_5(X010
, X011110
, 0, X000001
, vN
, vD
);
5153 const UInt tmpl_UQSHRN
5154 = X_3_6_7_6_5_5(X001
, X011110
, 0, X100101
, vN
, vD
);
5155 const UInt tmpl_SQSHRN
5156 = X_3_6_7_6_5_5(X000
, X011110
, 0, X100101
, vN
, vD
);
5157 const UInt tmpl_SQSHRUN
5158 = X_3_6_7_6_5_5(X001
, X011110
, 0, X100001
, vN
, vD
);
5160 const UInt tmpl_UQRSHRN
5161 = X_3_6_7_6_5_5(X001
, X011110
, 0, X100111
, vN
, vD
);
5162 const UInt tmpl_SQRSHRN
5163 = X_3_6_7_6_5_5(X000
, X011110
, 0, X100111
, vN
, vD
);
5164 const UInt tmpl_SQRSHRUN
5165 = X_3_6_7_6_5_5(X001
, X011110
, 0, X100011
, vN
, vD
);
5168 = X_3_6_7_6_5_5(X010
, X011110
, 0, X010101
, vN
, vD
);
5170 const UInt tmpl_UQSHL
5171 = X_3_6_7_6_5_5(X011
, X011110
, 0, X011101
, vN
, vD
);
5172 const UInt tmpl_SQSHL
5173 = X_3_6_7_6_5_5(X010
, X011110
, 0, X011101
, vN
, vD
);
5174 const UInt tmpl_SQSHLU
5175 = X_3_6_7_6_5_5(X011
, X011110
, 0, X011001
, vN
, vD
);
5177 switch (i
->ARM64in
.VShiftImmV
.op
) {
5178 case ARM64vecshi_SSHR64x2
: tmpl
= tmpl_SSHR
; goto right64x2
;
5179 case ARM64vecshi_USHR64x2
: tmpl
= tmpl_USHR
; goto right64x2
;
5180 case ARM64vecshi_SHL64x2
: tmpl
= tmpl_SHL
; goto left64x2
;
5181 case ARM64vecshi_UQSHL64x2
: tmpl
= tmpl_UQSHL
; goto left64x2
;
5182 case ARM64vecshi_SQSHL64x2
: tmpl
= tmpl_SQSHL
; goto left64x2
;
5183 case ARM64vecshi_SQSHLU64x2
: tmpl
= tmpl_SQSHLU
; goto left64x2
;
5184 case ARM64vecshi_SSHR32x4
: tmpl
= tmpl_SSHR
; goto right32x4
;
5185 case ARM64vecshi_USHR32x4
: tmpl
= tmpl_USHR
; goto right32x4
;
5186 case ARM64vecshi_UQSHRN2SD
: tmpl
= tmpl_UQSHRN
; goto right32x4
;
5187 case ARM64vecshi_SQSHRN2SD
: tmpl
= tmpl_SQSHRN
; goto right32x4
;
5188 case ARM64vecshi_SQSHRUN2SD
: tmpl
= tmpl_SQSHRUN
; goto right32x4
;
5189 case ARM64vecshi_UQRSHRN2SD
: tmpl
= tmpl_UQRSHRN
; goto right32x4
;
5190 case ARM64vecshi_SQRSHRN2SD
: tmpl
= tmpl_SQRSHRN
; goto right32x4
;
5191 case ARM64vecshi_SQRSHRUN2SD
: tmpl
= tmpl_SQRSHRUN
; goto right32x4
;
5192 case ARM64vecshi_SHL32x4
: tmpl
= tmpl_SHL
; goto left32x4
;
5193 case ARM64vecshi_UQSHL32x4
: tmpl
= tmpl_UQSHL
; goto left32x4
;
5194 case ARM64vecshi_SQSHL32x4
: tmpl
= tmpl_SQSHL
; goto left32x4
;
5195 case ARM64vecshi_SQSHLU32x4
: tmpl
= tmpl_SQSHLU
; goto left32x4
;
5196 case ARM64vecshi_SSHR16x8
: tmpl
= tmpl_SSHR
; goto right16x8
;
5197 case ARM64vecshi_USHR16x8
: tmpl
= tmpl_USHR
; goto right16x8
;
5198 case ARM64vecshi_UQSHRN4HS
: tmpl
= tmpl_UQSHRN
; goto right16x8
;
5199 case ARM64vecshi_SQSHRN4HS
: tmpl
= tmpl_SQSHRN
; goto right16x8
;
5200 case ARM64vecshi_SQSHRUN4HS
: tmpl
= tmpl_SQSHRUN
; goto right16x8
;
5201 case ARM64vecshi_UQRSHRN4HS
: tmpl
= tmpl_UQRSHRN
; goto right16x8
;
5202 case ARM64vecshi_SQRSHRN4HS
: tmpl
= tmpl_SQRSHRN
; goto right16x8
;
5203 case ARM64vecshi_SQRSHRUN4HS
: tmpl
= tmpl_SQRSHRUN
; goto right16x8
;
5204 case ARM64vecshi_SHL16x8
: tmpl
= tmpl_SHL
; goto left16x8
;
5205 case ARM64vecshi_UQSHL16x8
: tmpl
= tmpl_UQSHL
; goto left16x8
;
5206 case ARM64vecshi_SQSHL16x8
: tmpl
= tmpl_SQSHL
; goto left16x8
;
5207 case ARM64vecshi_SQSHLU16x8
: tmpl
= tmpl_SQSHLU
; goto left16x8
;
5208 case ARM64vecshi_SSHR8x16
: tmpl
= tmpl_SSHR
; goto right8x16
;
5209 case ARM64vecshi_USHR8x16
: tmpl
= tmpl_USHR
; goto right8x16
;
5210 case ARM64vecshi_UQSHRN8BH
: tmpl
= tmpl_UQSHRN
; goto right8x16
;
5211 case ARM64vecshi_SQSHRN8BH
: tmpl
= tmpl_SQSHRN
; goto right8x16
;
5212 case ARM64vecshi_SQSHRUN8BH
: tmpl
= tmpl_SQSHRUN
; goto right8x16
;
5213 case ARM64vecshi_UQRSHRN8BH
: tmpl
= tmpl_UQRSHRN
; goto right8x16
;
5214 case ARM64vecshi_SQRSHRN8BH
: tmpl
= tmpl_SQRSHRN
; goto right8x16
;
5215 case ARM64vecshi_SQRSHRUN8BH
: tmpl
= tmpl_SQRSHRUN
; goto right8x16
;
5216 case ARM64vecshi_SHL8x16
: tmpl
= tmpl_SHL
; goto left8x16
;
5217 case ARM64vecshi_UQSHL8x16
: tmpl
= tmpl_UQSHL
; goto left8x16
;
5218 case ARM64vecshi_SQSHL8x16
: tmpl
= tmpl_SQSHL
; goto left8x16
;
5219 case ARM64vecshi_SQSHLU8x16
: tmpl
= tmpl_SQSHLU
; goto left8x16
;
5224 if (sh
>= 1 && sh
<= 63) {
5225 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X1000000
| (64-sh
), 0,0,0);
5230 if (sh
>= 1 && sh
<= 32) {
5231 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0100000
| (32-sh
), 0,0,0);
5236 if (sh
>= 1 && sh
<= 16) {
5237 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0010000
| (16-sh
), 0,0,0);
5242 if (sh
>= 1 && sh
<= 8) {
5243 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0001000
| (8-sh
), 0,0,0);
5249 if (sh
>= 0 && sh
<= 63) {
5250 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X1000000
| sh
, 0,0,0);
5255 if (sh
>= 0 && sh
<= 31) {
5256 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0100000
| sh
, 0,0,0);
5261 if (sh
>= 0 && sh
<= 15) {
5262 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0010000
| sh
, 0,0,0);
5267 if (sh
>= 0 && sh
<= 7) {
5268 *p
++ = tmpl
| X_3_6_7_6_5_5(0,0, X0001000
| sh
, 0,0,0);
5275 case ARM64in_VExtV
: {
5277 011 01110 000 m 0 imm4 0 n d EXT Vd.16b, Vn.16b, Vm.16b, #imm4
5278 where imm4 = the shift amount, in bytes,
5279 Vn is low operand, Vm is high operand
5281 UInt vD
= qregEnc(i
->ARM64in
.VExtV
.dst
);
5282 UInt vN
= qregEnc(i
->ARM64in
.VExtV
.srcLo
);
5283 UInt vM
= qregEnc(i
->ARM64in
.VExtV
.srcHi
);
5284 UInt imm4
= i
->ARM64in
.VExtV
.amtB
;
5285 vassert(imm4
>= 1 && imm4
<= 15);
5286 *p
++ = X_3_8_5_6_5_5(X011
, X01110000
, vM
,
5287 X000000
| (imm4
<< 1), vN
, vD
);
5290 case ARM64in_VImmQ
: {
5291 UInt rQ
= qregEnc(i
->ARM64in
.VImmQ
.rQ
);
5292 UShort imm
= i
->ARM64in
.VImmQ
.imm
;
5296 // movi rQ.4s, #0x0 == 0x4F 0x00 0x04 000 rQ
5297 *p
++ = 0x4F000400 | rQ
;
5300 // movi rQ, #0xFF == 0x2F 0x00 0xE4 001 rQ
5301 *p
++ = 0x2F00E420 | rQ
;
5304 // movi rQ, #0xFFFF == 0x2F 0x00 0xE4 011 rQ
5305 *p
++ = 0x2F00E460 | rQ
;
5308 // movi rQ, #0xFFFFFFFF == 0x2F 0x00 0xE5 111 rQ
5309 *p
++ = 0x2F00E5E0 | rQ
;
5312 // movi rQ, #0xFFFFFFFFFFFF == 0x2F 0x01 0xE7 111 rQ
5313 *p
++ = 0x2F01E7E0 | rQ
;
5316 // movi rQ, #0xFFFFFFFFFFFFFFFF == 0x2F 0x07 0xE7 111 rQ
5317 *p
++ = 0x2F07E7E0 | rQ
;
5320 // mvni rQ.4s, #0x0 == 0x6F 0x00 0x04 000 rQ
5321 *p
++ = 0x6F000400 | rQ
;
5326 goto bad
; /* no other handled cases right now */
5329 case ARM64in_VDfromX
: {
5331 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5332 This isn't wonderful, in the sense that the upper half of
5333 the vector register stays unchanged and thus the insn is
5334 data dependent on its output register. */
5335 UInt dd
= dregEnc(i
->ARM64in
.VDfromX
.rD
);
5336 UInt xx
= iregEnc(i
->ARM64in
.VDfromX
.rX
);
5338 *p
++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xx
,dd
);
5342 case ARM64in_VQfromX
: {
5344 1001 1110 0110 0111 0000 00 nn dd FMOV Vd.D[0], Xn
5345 I think this zeroes out the top half of the destination, which
5346 is what we need. TODO: can we do VDfromX and VQfromXX better? */
5347 UInt dd
= qregEnc(i
->ARM64in
.VQfromX
.rQ
);
5348 UInt xx
= iregEnc(i
->ARM64in
.VQfromX
.rXlo
);
5350 *p
++ = 0x9E670000 | X_2_6_2_12_5_5(0,0,0,0,xx
,dd
);
5354 case ARM64in_VQfromXX
: {
5355 /* What we really generate is a two insn sequence:
5356 INS Vd.D[0], Xlo; INS Vd.D[1], Xhi
5357 0100 1110 0000 1000 0001 11 nn dd INS Vd.D[0], Xn
5358 0100 1110 0001 1000 0001 11 nn dd INS Vd.D[1], Xn
5360 UInt qq
= qregEnc(i
->ARM64in
.VQfromXX
.rQ
);
5361 UInt xhi
= iregEnc(i
->ARM64in
.VQfromXX
.rXhi
);
5362 UInt xlo
= iregEnc(i
->ARM64in
.VQfromXX
.rXlo
);
5363 vassert(xhi
< 31 && xlo
< 31);
5364 *p
++ = 0x4E081C00 | X_2_6_2_12_5_5(0,0,0,0,xlo
,qq
);
5365 *p
++ = 0x4E181C00 | X_2_6_2_12_5_5(0,0,0,0,xhi
,qq
);
5369 case ARM64in_VXfromQ
: {
5370 /* 010 0111 0000 01000 001111 nn dd UMOV Xd, Vn.D[0]
5371 010 0111 0000 11000 001111 nn dd UMOV Xd, Vn.D[1]
5373 UInt dd
= iregEnc(i
->ARM64in
.VXfromQ
.rX
);
5374 UInt nn
= qregEnc(i
->ARM64in
.VXfromQ
.rQ
);
5375 UInt laneNo
= i
->ARM64in
.VXfromQ
.laneNo
;
5377 vassert(laneNo
< 2);
5378 *p
++ = X_3_8_5_6_5_5(X010
, X01110000
,
5379 laneNo
== 1 ? X11000
: X01000
, X001111
, nn
, dd
);
5383 case ARM64in_VXfromDorS
: {
5384 /* 000 11110001 00110 000000 n d FMOV Wd, Sn
5385 100 11110011 00110 000000 n d FMOV Xd, Dn
5387 UInt dd
= iregEnc(i
->ARM64in
.VXfromDorS
.rX
);
5388 UInt nn
= dregEnc(i
->ARM64in
.VXfromDorS
.rDorS
);
5389 Bool fromD
= i
->ARM64in
.VXfromDorS
.fromD
;
5391 *p
++ = X_3_8_5_6_5_5(fromD
? X100
: X000
,
5392 fromD
? X11110011
: X11110001
,
5393 X00110
, X000000
, nn
, dd
);
5397 case ARM64in_VMov
: {
5398 /* 000 11110 00 10000 00 10000 n d FMOV Sd, Sn
5399 000 11110 01 10000 00 10000 n d FMOV Dd, Dn
5400 010 01110 10 1 n 0 00111 n d MOV Vd.16b, Vn.16b
5402 HReg rD
= i
->ARM64in
.VMov
.dst
;
5403 HReg rN
= i
->ARM64in
.VMov
.src
;
5404 switch (i
->ARM64in
.VMov
.szB
) {
5406 UInt dd
= qregEnc(rD
);
5407 UInt nn
= qregEnc(rN
);
5408 *p
++ = X_3_8_5_6_5_5(X010
, X01110101
, nn
, X000111
, nn
, dd
);
5412 UInt dd
= dregEnc(rD
);
5413 UInt nn
= dregEnc(rN
);
5414 *p
++ = X_3_8_5_6_5_5(X000
, X11110011
, X00000
, X010000
, nn
, dd
);
5423 case ARM64in_EvCheck
: {
5424 /* The sequence is fixed (canned) except for the two amodes
5425 supplied by the insn. These don't change the length, though.
5427 ldr w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5429 str w9, [x21 + #8] 8 == offsetof(host_EvC_COUNTER)
5431 ldr x9, [x21 + #0] 0 == offsetof(host_EvC_FAILADDR)
5436 p
= do_load_or_store32(p
, True
/*isLoad*/, /*w*/9,
5437 i
->ARM64in
.EvCheck
.amCounter
);
5438 *p
++ = 0x71000529; /* subs w9, w9, #1 */
5439 p
= do_load_or_store32(p
, False
/*!isLoad*/, /*w*/9,
5440 i
->ARM64in
.EvCheck
.amCounter
);
5441 *p
++ = 0x54000065; /* bpl nofail */
5442 p
= do_load_or_store64(p
, True
/*isLoad*/, /*x*/9,
5443 i
->ARM64in
.EvCheck
.amFailAddr
);
5444 *p
++ = 0xD61F0120; /* br x9 */
5448 vassert(evCheckSzB_ARM64() == (UChar
*)p
- (UChar
*)p0
);
5452 case ARM64in_ProfInc
: {
5454 (ctrP is unknown now, so use 0x6555'7555'8555'9566 in the
5455 expectation that a later call to LibVEX_patchProfCtr
5456 will be used to fill in the immediate fields once the
5457 right value is known.)
5458 imm64-exactly4 x9, 0x6555'7555'8555'9566
5463 p
= imm64_to_ireg_EXACTLY4(p
, /*x*/9, 0x6555755585559566ULL
);
5467 /* Tell the caller .. */
5468 vassert(!(*is_profInc
));
5480 vpanic("emit_ARM64Instr");
5484 vassert(((UChar
*)p
) - &buf
[0] <= 40);
5485 return ((UChar
*)p
) - &buf
[0];
5489 /* How big is an event check? See case for ARM64in_EvCheck in
5490 emit_ARM64Instr just above. That crosschecks what this returns, so
5491 we can tell if we're inconsistent. */
5492 Int
evCheckSzB_ARM64 (void)
5498 /* NB: what goes on here has to be very closely coordinated with the
5499 emitInstr case for XDirect, above. */
5500 VexInvalRange
chainXDirect_ARM64 ( VexEndness endness_host
,
5501 void* place_to_chain
,
5502 const void* disp_cp_chain_me_EXPECTED
,
5503 const void* place_to_jump_to
)
5505 vassert(endness_host
== VexEndnessLE
);
5507 /* What we're expecting to see is:
5508 movw x9, disp_cp_chain_me_to_EXPECTED[15:0]
5509 movk x9, disp_cp_chain_me_to_EXPECTED[31:15], lsl 16
5510 movk x9, disp_cp_chain_me_to_EXPECTED[47:32], lsl 32
5511 movk x9, disp_cp_chain_me_to_EXPECTED[63:48], lsl 48
5514 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5517 UInt
* p
= (UInt
*)place_to_chain
;
5518 vassert(0 == (3 & (HWord
)p
));
5519 vassert(is_imm64_to_ireg_EXACTLY4(
5520 p
, /*x*/9, (Addr
)disp_cp_chain_me_EXPECTED
));
5521 vassert(p
[4] == 0xD63F0120);
5523 /* And what we want to change it to is:
5524 movw x9, place_to_jump_to[15:0]
5525 movk x9, place_to_jump_to[31:15], lsl 16
5526 movk x9, place_to_jump_to[47:32], lsl 32
5527 movk x9, place_to_jump_to[63:48], lsl 48
5530 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5533 The replacement has the same length as the original.
5535 (void)imm64_to_ireg_EXACTLY4(p
, /*x*/9, (Addr
)place_to_jump_to
);
5538 VexInvalRange vir
= {(HWord
)p
, 20};
5543 /* NB: what goes on here has to be very closely coordinated with the
5544 emitInstr case for XDirect, above. */
5545 VexInvalRange
unchainXDirect_ARM64 ( VexEndness endness_host
,
5546 void* place_to_unchain
,
5547 const void* place_to_jump_to_EXPECTED
,
5548 const void* disp_cp_chain_me
)
5550 vassert(endness_host
== VexEndnessLE
);
5552 /* What we're expecting to see is:
5553 movw x9, place_to_jump_to_EXPECTED[15:0]
5554 movk x9, place_to_jump_to_EXPECTED[31:15], lsl 16
5555 movk x9, place_to_jump_to_EXPECTED[47:32], lsl 32
5556 movk x9, place_to_jump_to_EXPECTED[63:48], lsl 48
5559 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5562 UInt
* p
= (UInt
*)place_to_unchain
;
5563 vassert(0 == (3 & (HWord
)p
));
5564 vassert(is_imm64_to_ireg_EXACTLY4(
5565 p
, /*x*/9, (Addr
)place_to_jump_to_EXPECTED
));
5566 vassert(p
[4] == 0xD61F0120);
5568 /* And what we want to change it to is:
5569 movw x9, disp_cp_chain_me_to[15:0]
5570 movk x9, disp_cp_chain_me_to[31:15], lsl 16
5571 movk x9, disp_cp_chain_me_to[47:32], lsl 32
5572 movk x9, disp_cp_chain_me_to[63:48], lsl 48
5575 <16 bytes generated by imm64_to_ireg_EXACTLY4>
5578 (void)imm64_to_ireg_EXACTLY4(p
, /*x*/9, (Addr
)disp_cp_chain_me
);
5581 VexInvalRange vir
= {(HWord
)p
, 20};
5586 /* Patch the counter address into a profile inc point, as previously
5587 created by the ARM64in_ProfInc case for emit_ARM64Instr. */
5588 VexInvalRange
patchProfInc_ARM64 ( VexEndness endness_host
,
5589 void* place_to_patch
,
5590 const ULong
* location_of_counter
)
5592 vassert(sizeof(ULong
*) == 8);
5593 vassert(endness_host
== VexEndnessLE
);
5594 UInt
* p
= (UInt
*)place_to_patch
;
5595 vassert(0 == (3 & (HWord
)p
));
5596 vassert(is_imm64_to_ireg_EXACTLY4(p
, /*x*/9, 0x6555755585559566ULL
));
5597 vassert(p
[4] == 0xF9400128);
5598 vassert(p
[5] == 0x91000508);
5599 vassert(p
[6] == 0xF9000128);
5600 imm64_to_ireg_EXACTLY4(p
, /*x*/9, (Addr
)location_of_counter
);
5601 VexInvalRange vir
= {(HWord
)p
, 4*4};
5605 /*---------------------------------------------------------------*/
5606 /*--- end host_arm64_defs.c ---*/
5607 /*---------------------------------------------------------------*/