drd/tests/tsan_thread_wrappers_pthread.h: Fix MyThread::ThreadBody()
[valgrind.git] / VEX / priv / host_arm_defs.c
blob3de6d5011644aa9e511fd0f5c7cf31fc2fc0d2ea
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_defs.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 NEON support is
14 Copyright (C) 2010-2017 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, write to the Free Software
30 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
31 02110-1301, USA.
33 The GNU General Public License is contained in the file COPYING.
36 #include "libvex_basictypes.h"
37 #include "libvex.h"
38 #include "libvex_trc_values.h"
40 #include "main_util.h"
41 #include "host_generic_regs.h"
42 #include "host_arm_defs.h"
44 UInt arm_hwcaps = 0;
47 /* --------- Registers. --------- */
49 const RRegUniverse* getRRegUniverse_ARM ( void )
51 /* The real-register universe is a big constant, so we just want to
52 initialise it once. */
53 static RRegUniverse rRegUniverse_ARM;
54 static Bool rRegUniverse_ARM_initted = False;
56 /* Handy shorthand, nothing more */
57 RRegUniverse* ru = &rRegUniverse_ARM;
59 /* This isn't thread-safe. Sigh. */
60 if (LIKELY(rRegUniverse_ARM_initted))
61 return ru;
63 RRegUniverse__init(ru);
65 /* Add the registers. The initial segment of this array must be
66 those available for allocation by reg-alloc, and those that
67 follow are not available for allocation. */
69 /* Callee saves ones are listed first, since we prefer them
70 if they're available. */
71 ru->allocable_start[HRcInt32] = ru->size;
72 ru->regs[ru->size++] = hregARM_R4();
73 ru->regs[ru->size++] = hregARM_R5();
74 ru->regs[ru->size++] = hregARM_R6();
75 ru->regs[ru->size++] = hregARM_R7();
76 ru->regs[ru->size++] = hregARM_R10();
77 ru->regs[ru->size++] = hregARM_R11();
78 /* Otherwise we'll have to slum it out with caller-saves ones. */
79 ru->regs[ru->size++] = hregARM_R0();
80 ru->regs[ru->size++] = hregARM_R1();
81 ru->regs[ru->size++] = hregARM_R2();
82 ru->regs[ru->size++] = hregARM_R3();
83 ru->regs[ru->size++] = hregARM_R9();
84 ru->allocable_end[HRcInt32] = ru->size - 1;
86 /* FP registers. Note: these are all callee-save. Yay! Hence we
87 don't need to mention them as trashed in getHRegUsage for
88 ARMInstr_Call. */
89 ru->allocable_start[HRcFlt64] = ru->size;
90 ru->regs[ru->size++] = hregARM_D8();
91 ru->regs[ru->size++] = hregARM_D9();
92 ru->regs[ru->size++] = hregARM_D10();
93 ru->regs[ru->size++] = hregARM_D11();
94 ru->regs[ru->size++] = hregARM_D12();
95 ru->allocable_end[HRcFlt64] = ru->size - 1;
97 ru->allocable_start[HRcFlt32] = ru->size;
98 ru->regs[ru->size++] = hregARM_S26();
99 ru->regs[ru->size++] = hregARM_S27();
100 ru->regs[ru->size++] = hregARM_S28();
101 ru->regs[ru->size++] = hregARM_S29();
102 ru->regs[ru->size++] = hregARM_S30();
103 ru->allocable_end[HRcFlt32] = ru->size - 1;
105 ru->allocable_start[HRcVec128] = ru->size;
106 ru->regs[ru->size++] = hregARM_Q8();
107 ru->regs[ru->size++] = hregARM_Q9();
108 ru->regs[ru->size++] = hregARM_Q10();
109 ru->regs[ru->size++] = hregARM_Q11();
110 ru->regs[ru->size++] = hregARM_Q12();
111 ru->allocable_end[HRcVec128] = ru->size - 1;
112 ru->allocable = ru->size;
114 /* And other regs, not available to the allocator. */
116 // unavail: r8 as GSP
117 // r12 is used as a spill/reload temporary
118 // r13 as SP
119 // r14 as LR
120 // r15 as PC
122 // All in all, we have 11 allocatable integer registers:
123 // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
124 // and r12 dedicated as a spill temporary.
125 // 13 14 and 15 are not under the allocator's control.
127 // Hence for the allocatable registers we have:
129 // callee-saved: 4 5 6 7 (8) 9 10 11
130 // caller-saved: 0 1 2 3
131 // Note 9 is ambiguous: the base EABI does not give an e/r-saved
132 // designation for it, but the Linux instantiation of the ABI
133 // specifies it as callee-saved.
135 // If the set of available registers changes or if the e/r status
136 // changes, be sure to re-check/sync the definition of
137 // getHRegUsage for ARMInstr_Call too.
138 ru->regs[ru->size++] = hregARM_R8();
139 ru->regs[ru->size++] = hregARM_R12();
140 ru->regs[ru->size++] = hregARM_R13();
141 ru->regs[ru->size++] = hregARM_R14();
142 ru->regs[ru->size++] = hregARM_R15();
143 ru->regs[ru->size++] = hregARM_Q13();
144 ru->regs[ru->size++] = hregARM_Q14();
145 ru->regs[ru->size++] = hregARM_Q15();
147 rRegUniverse_ARM_initted = True;
149 RRegUniverse__check_is_sane(ru);
150 return ru;
154 UInt ppHRegARM ( HReg reg ) {
155 Int r;
156 /* Be generic for all virtual regs. */
157 if (hregIsVirtual(reg)) {
158 return ppHReg(reg);
160 /* But specific for real regs. */
161 switch (hregClass(reg)) {
162 case HRcInt32:
163 r = hregEncoding(reg);
164 vassert(r >= 0 && r < 16);
165 return vex_printf("r%d", r);
166 case HRcFlt64:
167 r = hregEncoding(reg);
168 vassert(r >= 0 && r < 32);
169 return vex_printf("d%d", r);
170 case HRcFlt32:
171 r = hregEncoding(reg);
172 vassert(r >= 0 && r < 32);
173 return vex_printf("s%d", r);
174 case HRcVec128:
175 r = hregEncoding(reg);
176 vassert(r >= 0 && r < 16);
177 return vex_printf("q%d", r);
178 default:
179 vpanic("ppHRegARM");
184 /* --------- Condition codes, ARM encoding. --------- */
186 const HChar* showARMCondCode ( ARMCondCode cond ) {
187 switch (cond) {
188 case ARMcc_EQ: return "eq";
189 case ARMcc_NE: return "ne";
190 case ARMcc_HS: return "hs";
191 case ARMcc_LO: return "lo";
192 case ARMcc_MI: return "mi";
193 case ARMcc_PL: return "pl";
194 case ARMcc_VS: return "vs";
195 case ARMcc_VC: return "vc";
196 case ARMcc_HI: return "hi";
197 case ARMcc_LS: return "ls";
198 case ARMcc_GE: return "ge";
199 case ARMcc_LT: return "lt";
200 case ARMcc_GT: return "gt";
201 case ARMcc_LE: return "le";
202 case ARMcc_AL: return "al"; // default
203 case ARMcc_NV: return "nv";
204 default: vpanic("showARMCondCode");
209 /* --------- Mem AModes: Addressing Mode 1 --------- */
211 ARMAMode1* ARMAMode1_RI ( HReg reg, Int simm13 ) {
212 ARMAMode1* am = LibVEX_Alloc_inline(sizeof(ARMAMode1));
213 am->tag = ARMam1_RI;
214 am->ARMam1.RI.reg = reg;
215 am->ARMam1.RI.simm13 = simm13;
216 vassert(-4095 <= simm13 && simm13 <= 4095);
217 return am;
219 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
220 ARMAMode1* am = LibVEX_Alloc_inline(sizeof(ARMAMode1));
221 am->tag = ARMam1_RRS;
222 am->ARMam1.RRS.base = base;
223 am->ARMam1.RRS.index = index;
224 am->ARMam1.RRS.shift = shift;
225 vassert(0 <= shift && shift <= 3);
226 return am;
229 void ppARMAMode1 ( ARMAMode1* am ) {
230 switch (am->tag) {
231 case ARMam1_RI:
232 vex_printf("%d(", am->ARMam1.RI.simm13);
233 ppHRegARM(am->ARMam1.RI.reg);
234 vex_printf(")");
235 break;
236 case ARMam1_RRS:
237 vex_printf("(");
238 ppHRegARM(am->ARMam1.RRS.base);
239 vex_printf(",");
240 ppHRegARM(am->ARMam1.RRS.index);
241 vex_printf(",%u)", am->ARMam1.RRS.shift);
242 break;
243 default:
244 vassert(0);
248 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
249 switch (am->tag) {
250 case ARMam1_RI:
251 addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
252 return;
253 case ARMam1_RRS:
254 // addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
255 // addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
256 // return;
257 default:
258 vpanic("addRegUsage_ARMAmode1");
262 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
263 switch (am->tag) {
264 case ARMam1_RI:
265 am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
266 return;
267 case ARMam1_RRS:
268 //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
269 //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
270 //return;
271 default:
272 vpanic("mapRegs_ARMAmode1");
277 /* --------- Mem AModes: Addressing Mode 2 --------- */
279 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
280 ARMAMode2* am = LibVEX_Alloc_inline(sizeof(ARMAMode2));
281 am->tag = ARMam2_RI;
282 am->ARMam2.RI.reg = reg;
283 am->ARMam2.RI.simm9 = simm9;
284 vassert(-255 <= simm9 && simm9 <= 255);
285 return am;
287 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
288 ARMAMode2* am = LibVEX_Alloc_inline(sizeof(ARMAMode2));
289 am->tag = ARMam2_RR;
290 am->ARMam2.RR.base = base;
291 am->ARMam2.RR.index = index;
292 return am;
295 void ppARMAMode2 ( ARMAMode2* am ) {
296 switch (am->tag) {
297 case ARMam2_RI:
298 vex_printf("%d(", am->ARMam2.RI.simm9);
299 ppHRegARM(am->ARMam2.RI.reg);
300 vex_printf(")");
301 break;
302 case ARMam2_RR:
303 vex_printf("(");
304 ppHRegARM(am->ARMam2.RR.base);
305 vex_printf(",");
306 ppHRegARM(am->ARMam2.RR.index);
307 vex_printf(")");
308 break;
309 default:
310 vassert(0);
314 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
315 switch (am->tag) {
316 case ARMam2_RI:
317 addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
318 return;
319 case ARMam2_RR:
320 // addHRegUse(u, HRmRead, am->ARMam2.RR.base);
321 // addHRegUse(u, HRmRead, am->ARMam2.RR.index);
322 // return;
323 default:
324 vpanic("addRegUsage_ARMAmode2");
328 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
329 switch (am->tag) {
330 case ARMam2_RI:
331 am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
332 return;
333 case ARMam2_RR:
334 //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
335 //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
336 //return;
337 default:
338 vpanic("mapRegs_ARMAmode2");
343 /* --------- Mem AModes: Addressing Mode VFP --------- */
345 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
346 ARMAModeV* am = LibVEX_Alloc_inline(sizeof(ARMAModeV));
347 vassert(simm11 >= -1020 && simm11 <= 1020);
348 vassert(0 == (simm11 & 3));
349 am->reg = reg;
350 am->simm11 = simm11;
351 return am;
354 void ppARMAModeV ( ARMAModeV* am ) {
355 vex_printf("%d(", am->simm11);
356 ppHRegARM(am->reg);
357 vex_printf(")");
360 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
361 addHRegUse(u, HRmRead, am->reg);
364 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
365 am->reg = lookupHRegRemap(m, am->reg);
369 /* --------- Mem AModes: Addressing Mode Neon ------- */
371 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
372 ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
373 am->tag = ARMamN_RR;
374 am->ARMamN.RR.rN = rN;
375 am->ARMamN.RR.rM = rM;
376 return am;
379 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
380 ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
381 am->tag = ARMamN_R;
382 am->ARMamN.R.rN = rN;
383 return am;
386 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
387 if (am->tag == ARMamN_R) {
388 addHRegUse(u, HRmRead, am->ARMamN.R.rN);
389 } else {
390 addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
391 addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
395 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
396 if (am->tag == ARMamN_R) {
397 am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
398 } else {
399 am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
400 am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
404 void ppARMAModeN ( ARMAModeN* am ) {
405 vex_printf("[");
406 if (am->tag == ARMamN_R) {
407 ppHRegARM(am->ARMamN.R.rN);
408 } else {
409 ppHRegARM(am->ARMamN.RR.rN);
411 vex_printf("]");
412 if (am->tag == ARMamN_RR) {
413 vex_printf(", ");
414 ppHRegARM(am->ARMamN.RR.rM);
419 /* --------- Reg or imm-8x4 operands --------- */
421 static UInt ROR32 ( UInt x, UInt sh ) {
422 vassert(sh >= 0 && sh < 32);
423 if (sh == 0)
424 return x;
425 else
426 return (x << (32-sh)) | (x >> sh);
429 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
430 ARMRI84* ri84 = LibVEX_Alloc_inline(sizeof(ARMRI84));
431 ri84->tag = ARMri84_I84;
432 ri84->ARMri84.I84.imm8 = imm8;
433 ri84->ARMri84.I84.imm4 = imm4;
434 vassert(imm8 >= 0 && imm8 <= 255);
435 vassert(imm4 >= 0 && imm4 <= 15);
436 return ri84;
438 ARMRI84* ARMRI84_R ( HReg reg ) {
439 ARMRI84* ri84 = LibVEX_Alloc_inline(sizeof(ARMRI84));
440 ri84->tag = ARMri84_R;
441 ri84->ARMri84.R.reg = reg;
442 return ri84;
445 void ppARMRI84 ( ARMRI84* ri84 ) {
446 switch (ri84->tag) {
447 case ARMri84_I84:
448 vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
449 2 * ri84->ARMri84.I84.imm4));
450 break;
451 case ARMri84_R:
452 ppHRegARM(ri84->ARMri84.R.reg);
453 break;
454 default:
455 vassert(0);
459 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
460 switch (ri84->tag) {
461 case ARMri84_I84:
462 return;
463 case ARMri84_R:
464 addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
465 return;
466 default:
467 vpanic("addRegUsage_ARMRI84");
471 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
472 switch (ri84->tag) {
473 case ARMri84_I84:
474 return;
475 case ARMri84_R:
476 ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
477 return;
478 default:
479 vpanic("mapRegs_ARMRI84");
484 /* --------- Reg or imm5 operands --------- */
486 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
487 ARMRI5* ri5 = LibVEX_Alloc_inline(sizeof(ARMRI5));
488 ri5->tag = ARMri5_I5;
489 ri5->ARMri5.I5.imm5 = imm5;
490 vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
491 return ri5;
493 ARMRI5* ARMRI5_R ( HReg reg ) {
494 ARMRI5* ri5 = LibVEX_Alloc_inline(sizeof(ARMRI5));
495 ri5->tag = ARMri5_R;
496 ri5->ARMri5.R.reg = reg;
497 return ri5;
500 void ppARMRI5 ( ARMRI5* ri5 ) {
501 switch (ri5->tag) {
502 case ARMri5_I5:
503 vex_printf("%u", ri5->ARMri5.I5.imm5);
504 break;
505 case ARMri5_R:
506 ppHRegARM(ri5->ARMri5.R.reg);
507 break;
508 default:
509 vassert(0);
513 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
514 switch (ri5->tag) {
515 case ARMri5_I5:
516 return;
517 case ARMri5_R:
518 addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
519 return;
520 default:
521 vpanic("addRegUsage_ARMRI5");
525 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
526 switch (ri5->tag) {
527 case ARMri5_I5:
528 return;
529 case ARMri5_R:
530 ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
531 return;
532 default:
533 vpanic("mapRegs_ARMRI5");
537 /* -------- Neon Immediate operatnd --------- */
539 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
540 ARMNImm* i = LibVEX_Alloc_inline(sizeof(ARMNImm));
541 i->type = type;
542 i->imm8 = imm8;
543 return i;
546 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
547 int i, j;
548 ULong y, x = imm->imm8;
549 switch (imm->type) {
550 case 3:
551 x = x << 8; /* fallthrough */
552 case 2:
553 x = x << 8; /* fallthrough */
554 case 1:
555 x = x << 8; /* fallthrough */
556 case 0:
557 return (x << 32) | x;
558 case 5:
559 case 6:
560 if (imm->type == 5)
561 x = x << 8;
562 else
563 x = (x << 8) | x;
564 /* fallthrough */
565 case 4:
566 x = (x << 16) | x;
567 return (x << 32) | x;
568 case 8:
569 x = (x << 8) | 0xFF;
570 /* fallthrough */
571 case 7:
572 x = (x << 8) | 0xFF;
573 return (x << 32) | x;
574 case 9:
575 x = 0;
576 for (i = 7; i >= 0; i--) {
577 y = ((ULong)imm->imm8 >> i) & 1;
578 for (j = 0; j < 8; j++) {
579 x = (x << 1) | y;
582 return x;
583 case 10:
584 x |= (x & 0x80) << 5;
585 x |= (~x & 0x40) << 5;
586 x &= 0x187F; /* 0001 1000 0111 1111 */
587 x |= (x & 0x40) << 4;
588 x |= (x & 0x40) << 3;
589 x |= (x & 0x40) << 2;
590 x |= (x & 0x40) << 1;
591 x = x << 19;
592 x = (x << 32) | x;
593 return x;
594 default:
595 vpanic("ARMNImm_to_Imm64");
599 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
600 ARMNImm tmp;
601 if ((x & 0xFFFFFFFF) == (x >> 32)) {
602 if ((x & 0xFFFFFF00) == 0)
603 return ARMNImm_TI(0, x & 0xFF);
604 if ((x & 0xFFFF00FF) == 0)
605 return ARMNImm_TI(1, (x >> 8) & 0xFF);
606 if ((x & 0xFF00FFFF) == 0)
607 return ARMNImm_TI(2, (x >> 16) & 0xFF);
608 if ((x & 0x00FFFFFF) == 0)
609 return ARMNImm_TI(3, (x >> 24) & 0xFF);
610 if ((x & 0xFFFF00FF) == 0xFF)
611 return ARMNImm_TI(7, (x >> 8) & 0xFF);
612 if ((x & 0xFF00FFFF) == 0xFFFF)
613 return ARMNImm_TI(8, (x >> 16) & 0xFF);
614 if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
615 if ((x & 0xFF00) == 0)
616 return ARMNImm_TI(4, x & 0xFF);
617 if ((x & 0x00FF) == 0)
618 return ARMNImm_TI(5, (x >> 8) & 0xFF);
619 if ((x & 0xFF) == ((x >> 8) & 0xFF))
620 return ARMNImm_TI(6, x & 0xFF);
622 if ((x & 0x7FFFF) == 0) {
623 tmp.type = 10;
624 tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
625 if (ARMNImm_to_Imm64(&tmp) == x)
626 return ARMNImm_TI(tmp.type, tmp.imm8);
628 } else {
629 /* This can only be type 9. */
630 tmp.imm8 = (((x >> 56) & 1) << 7)
631 | (((x >> 48) & 1) << 6)
632 | (((x >> 40) & 1) << 5)
633 | (((x >> 32) & 1) << 4)
634 | (((x >> 24) & 1) << 3)
635 | (((x >> 16) & 1) << 2)
636 | (((x >> 8) & 1) << 1)
637 | (((x >> 0) & 1) << 0);
638 tmp.type = 9;
639 if (ARMNImm_to_Imm64 (&tmp) == x)
640 return ARMNImm_TI(tmp.type, tmp.imm8);
642 return NULL;
645 void ppARMNImm (ARMNImm* i) {
646 ULong x = ARMNImm_to_Imm64(i);
647 vex_printf("0x%llX%llX", x, x);
650 /* -- Register or scalar operand --- */
652 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
654 ARMNRS *p = LibVEX_Alloc_inline(sizeof(ARMNRS));
655 p->tag = tag;
656 p->reg = reg;
657 p->index = index;
658 return p;
661 void ppARMNRS(ARMNRS *p)
663 ppHRegARM(p->reg);
664 if (p->tag == ARMNRS_Scalar) {
665 vex_printf("[%u]", p->index);
669 /* --------- Instructions. --------- */
671 const HChar* showARMAluOp ( ARMAluOp op ) {
672 switch (op) {
673 case ARMalu_ADD: return "add";
674 case ARMalu_ADDS: return "adds";
675 case ARMalu_ADC: return "adc";
676 case ARMalu_SUB: return "sub";
677 case ARMalu_SUBS: return "subs";
678 case ARMalu_SBC: return "sbc";
679 case ARMalu_AND: return "and";
680 case ARMalu_BIC: return "bic";
681 case ARMalu_OR: return "orr";
682 case ARMalu_XOR: return "xor";
683 default: vpanic("showARMAluOp");
687 const HChar* showARMShiftOp ( ARMShiftOp op ) {
688 switch (op) {
689 case ARMsh_SHL: return "shl";
690 case ARMsh_SHR: return "shr";
691 case ARMsh_SAR: return "sar";
692 default: vpanic("showARMShiftOp");
696 const HChar* showARMUnaryOp ( ARMUnaryOp op ) {
697 switch (op) {
698 case ARMun_NEG: return "neg";
699 case ARMun_NOT: return "not";
700 case ARMun_CLZ: return "clz";
701 default: vpanic("showARMUnaryOp");
705 const HChar* showARMMulOp ( ARMMulOp op ) {
706 switch (op) {
707 case ARMmul_PLAIN: return "mul";
708 case ARMmul_ZX: return "umull";
709 case ARMmul_SX: return "smull";
710 default: vpanic("showARMMulOp");
714 const HChar* showARMVfpOp ( ARMVfpOp op ) {
715 switch (op) {
716 case ARMvfp_ADD: return "add";
717 case ARMvfp_SUB: return "sub";
718 case ARMvfp_MUL: return "mul";
719 case ARMvfp_DIV: return "div";
720 default: vpanic("showARMVfpOp");
724 const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
725 switch (op) {
726 case ARMvfpu_COPY: return "cpy";
727 case ARMvfpu_NEG: return "neg";
728 case ARMvfpu_ABS: return "abs";
729 case ARMvfpu_SQRT: return "sqrt";
730 default: vpanic("showARMVfpUnaryOp");
734 const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
735 switch (op) {
736 case ARMneon_VAND: return "vand";
737 case ARMneon_VORR: return "vorr";
738 case ARMneon_VXOR: return "veor";
739 case ARMneon_VADD: return "vadd";
740 case ARMneon_VRHADDS: return "vrhadd";
741 case ARMneon_VRHADDU: return "vrhadd";
742 case ARMneon_VADDFP: return "vadd";
743 case ARMneon_VPADDFP: return "vpadd";
744 case ARMneon_VABDFP: return "vabd";
745 case ARMneon_VSUB: return "vsub";
746 case ARMneon_VSUBFP: return "vsub";
747 case ARMneon_VMINU: return "vmin";
748 case ARMneon_VMINS: return "vmin";
749 case ARMneon_VMINF: return "vmin";
750 case ARMneon_VMAXU: return "vmax";
751 case ARMneon_VMAXS: return "vmax";
752 case ARMneon_VMAXF: return "vmax";
753 case ARMneon_VQADDU: return "vqadd";
754 case ARMneon_VQADDS: return "vqadd";
755 case ARMneon_VQSUBU: return "vqsub";
756 case ARMneon_VQSUBS: return "vqsub";
757 case ARMneon_VCGTU: return "vcgt";
758 case ARMneon_VCGTS: return "vcgt";
759 case ARMneon_VCGTF: return "vcgt";
760 case ARMneon_VCGEF: return "vcgt";
761 case ARMneon_VCGEU: return "vcge";
762 case ARMneon_VCGES: return "vcge";
763 case ARMneon_VCEQ: return "vceq";
764 case ARMneon_VCEQF: return "vceq";
765 case ARMneon_VPADD: return "vpadd";
766 case ARMneon_VPMINU: return "vpmin";
767 case ARMneon_VPMINS: return "vpmin";
768 case ARMneon_VPMINF: return "vpmin";
769 case ARMneon_VPMAXU: return "vpmax";
770 case ARMneon_VPMAXS: return "vpmax";
771 case ARMneon_VPMAXF: return "vpmax";
772 case ARMneon_VEXT: return "vext";
773 case ARMneon_VMUL: return "vmuli";
774 case ARMneon_VMULLU: return "vmull";
775 case ARMneon_VMULLS: return "vmull";
776 case ARMneon_VMULP: return "vmul";
777 case ARMneon_VMULFP: return "vmul";
778 case ARMneon_VMULLP: return "vmul";
779 case ARMneon_VQDMULH: return "vqdmulh";
780 case ARMneon_VQRDMULH: return "vqrdmulh";
781 case ARMneon_VQDMULL: return "vqdmull";
782 case ARMneon_VTBL: return "vtbl";
783 case ARMneon_VRECPS: return "vrecps";
784 case ARMneon_VRSQRTS: return "vrecps";
785 case ARMneon_INVALID: return "??invalid??";
786 /* ... */
787 default: vpanic("showARMNeonBinOp");
791 const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
792 switch (op) {
793 case ARMneon_VAND:
794 case ARMneon_VORR:
795 case ARMneon_VXOR:
796 return "";
797 case ARMneon_VADD:
798 case ARMneon_VSUB:
799 case ARMneon_VEXT:
800 case ARMneon_VMUL:
801 case ARMneon_VPADD:
802 case ARMneon_VTBL:
803 case ARMneon_VCEQ:
804 return ".i";
805 case ARMneon_VRHADDU:
806 case ARMneon_VMINU:
807 case ARMneon_VMAXU:
808 case ARMneon_VQADDU:
809 case ARMneon_VQSUBU:
810 case ARMneon_VCGTU:
811 case ARMneon_VCGEU:
812 case ARMneon_VMULLU:
813 case ARMneon_VPMINU:
814 case ARMneon_VPMAXU:
815 return ".u";
816 case ARMneon_VRHADDS:
817 case ARMneon_VMINS:
818 case ARMneon_VMAXS:
819 case ARMneon_VQADDS:
820 case ARMneon_VQSUBS:
821 case ARMneon_VCGTS:
822 case ARMneon_VCGES:
823 case ARMneon_VQDMULL:
824 case ARMneon_VMULLS:
825 case ARMneon_VPMINS:
826 case ARMneon_VPMAXS:
827 case ARMneon_VQDMULH:
828 case ARMneon_VQRDMULH:
829 return ".s";
830 case ARMneon_VMULP:
831 case ARMneon_VMULLP:
832 return ".p";
833 case ARMneon_VADDFP:
834 case ARMneon_VABDFP:
835 case ARMneon_VPADDFP:
836 case ARMneon_VSUBFP:
837 case ARMneon_VMULFP:
838 case ARMneon_VMINF:
839 case ARMneon_VMAXF:
840 case ARMneon_VPMINF:
841 case ARMneon_VPMAXF:
842 case ARMneon_VCGTF:
843 case ARMneon_VCGEF:
844 case ARMneon_VCEQF:
845 case ARMneon_VRECPS:
846 case ARMneon_VRSQRTS:
847 return ".f";
848 /* ... */
849 default: vpanic("showARMNeonBinOpDataType");
853 const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
854 switch (op) {
855 case ARMneon_COPY: return "vmov";
856 case ARMneon_COPYLS: return "vmov";
857 case ARMneon_COPYLU: return "vmov";
858 case ARMneon_COPYN: return "vmov";
859 case ARMneon_COPYQNSS: return "vqmovn";
860 case ARMneon_COPYQNUS: return "vqmovun";
861 case ARMneon_COPYQNUU: return "vqmovn";
862 case ARMneon_NOT: return "vmvn";
863 case ARMneon_EQZ: return "vceq";
864 case ARMneon_CNT: return "vcnt";
865 case ARMneon_CLS: return "vcls";
866 case ARMneon_CLZ: return "vclz";
867 case ARMneon_DUP: return "vdup";
868 case ARMneon_PADDLS: return "vpaddl";
869 case ARMneon_PADDLU: return "vpaddl";
870 case ARMneon_VQSHLNSS: return "vqshl";
871 case ARMneon_VQSHLNUU: return "vqshl";
872 case ARMneon_VQSHLNUS: return "vqshlu";
873 case ARMneon_REV16: return "vrev16";
874 case ARMneon_REV32: return "vrev32";
875 case ARMneon_REV64: return "vrev64";
876 case ARMneon_VCVTFtoU: return "vcvt";
877 case ARMneon_VCVTFtoS: return "vcvt";
878 case ARMneon_VCVTUtoF: return "vcvt";
879 case ARMneon_VCVTStoF: return "vcvt";
880 case ARMneon_VCVTFtoFixedU: return "vcvt";
881 case ARMneon_VCVTFtoFixedS: return "vcvt";
882 case ARMneon_VCVTFixedUtoF: return "vcvt";
883 case ARMneon_VCVTFixedStoF: return "vcvt";
884 case ARMneon_VCVTF32toF16: return "vcvt";
885 case ARMneon_VCVTF16toF32: return "vcvt";
886 case ARMneon_VRECIP: return "vrecip";
887 case ARMneon_VRECIPF: return "vrecipf";
888 case ARMneon_VNEGF: return "vneg";
889 case ARMneon_ABS: return "vabs";
890 case ARMneon_VABSFP: return "vabsfp";
891 case ARMneon_VRSQRTEFP: return "vrsqrtefp";
892 case ARMneon_VRSQRTE: return "vrsqrte";
893 /* ... */
894 default: vpanic("showARMNeonUnOp");
898 const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
899 switch (op) {
900 case ARMneon_COPY:
901 case ARMneon_NOT:
902 return "";
903 case ARMneon_COPYN:
904 case ARMneon_EQZ:
905 case ARMneon_CNT:
906 case ARMneon_DUP:
907 case ARMneon_REV16:
908 case ARMneon_REV32:
909 case ARMneon_REV64:
910 return ".i";
911 case ARMneon_COPYLU:
912 case ARMneon_PADDLU:
913 case ARMneon_COPYQNUU:
914 case ARMneon_VQSHLNUU:
915 case ARMneon_VRECIP:
916 case ARMneon_VRSQRTE:
917 return ".u";
918 case ARMneon_CLS:
919 case ARMneon_CLZ:
920 case ARMneon_COPYLS:
921 case ARMneon_PADDLS:
922 case ARMneon_COPYQNSS:
923 case ARMneon_COPYQNUS:
924 case ARMneon_VQSHLNSS:
925 case ARMneon_VQSHLNUS:
926 case ARMneon_ABS:
927 return ".s";
928 case ARMneon_VRECIPF:
929 case ARMneon_VNEGF:
930 case ARMneon_VABSFP:
931 case ARMneon_VRSQRTEFP:
932 return ".f";
933 case ARMneon_VCVTFtoU: return ".u32.f32";
934 case ARMneon_VCVTFtoS: return ".s32.f32";
935 case ARMneon_VCVTUtoF: return ".f32.u32";
936 case ARMneon_VCVTStoF: return ".f32.s32";
937 case ARMneon_VCVTF16toF32: return ".f32.f16";
938 case ARMneon_VCVTF32toF16: return ".f16.f32";
939 case ARMneon_VCVTFtoFixedU: return ".u32.f32";
940 case ARMneon_VCVTFtoFixedS: return ".s32.f32";
941 case ARMneon_VCVTFixedUtoF: return ".f32.u32";
942 case ARMneon_VCVTFixedStoF: return ".f32.s32";
943 /* ... */
944 default: vpanic("showARMNeonUnOpDataType");
948 const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
949 switch (op) {
950 case ARMneon_SETELEM: return "vmov";
951 case ARMneon_GETELEMU: return "vmov";
952 case ARMneon_GETELEMS: return "vmov";
953 case ARMneon_VDUP: return "vdup";
954 /* ... */
955 default: vpanic("showARMNeonUnarySOp");
959 const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
960 switch (op) {
961 case ARMneon_SETELEM:
962 case ARMneon_VDUP:
963 return ".i";
964 case ARMneon_GETELEMS:
965 return ".s";
966 case ARMneon_GETELEMU:
967 return ".u";
968 /* ... */
969 default: vpanic("showARMNeonUnarySOp");
973 const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
974 switch (op) {
975 case ARMneon_VSHL: return "vshl";
976 case ARMneon_VSAL: return "vshl";
977 case ARMneon_VQSHL: return "vqshl";
978 case ARMneon_VQSAL: return "vqshl";
979 /* ... */
980 default: vpanic("showARMNeonShiftOp");
984 const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
985 switch (op) {
986 case ARMneon_VSHL:
987 case ARMneon_VQSHL:
988 return ".u";
989 case ARMneon_VSAL:
990 case ARMneon_VQSAL:
991 return ".s";
992 /* ... */
993 default: vpanic("showARMNeonShiftOpDataType");
997 const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
998 switch (op) {
999 case ARMneon_TRN: return "vtrn";
1000 case ARMneon_ZIP: return "vzip";
1001 case ARMneon_UZP: return "vuzp";
1002 /* ... */
1003 default: vpanic("showARMNeonDualOp");
1007 const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1008 switch (op) {
1009 case ARMneon_TRN:
1010 case ARMneon_ZIP:
1011 case ARMneon_UZP:
1012 return "i";
1013 /* ... */
1014 default: vpanic("showARMNeonDualOp");
1018 static const HChar* showARMNeonDataSize_wrk ( UInt size )
1020 switch (size) {
1021 case 0: return "8";
1022 case 1: return "16";
1023 case 2: return "32";
1024 case 3: return "64";
1025 default: vpanic("showARMNeonDataSize");
1029 static const HChar* showARMNeonDataSize ( const ARMInstr* i )
1031 switch (i->tag) {
1032 case ARMin_NBinary:
1033 if (i->ARMin.NBinary.op == ARMneon_VEXT)
1034 return "8";
1035 if (i->ARMin.NBinary.op == ARMneon_VAND ||
1036 i->ARMin.NBinary.op == ARMneon_VORR ||
1037 i->ARMin.NBinary.op == ARMneon_VXOR)
1038 return "";
1039 return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1040 case ARMin_NUnary:
1041 if (i->ARMin.NUnary.op == ARMneon_COPY ||
1042 i->ARMin.NUnary.op == ARMneon_NOT ||
1043 i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1044 i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1045 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1046 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1047 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1048 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1049 i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1050 i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1051 i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1052 i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1053 return "";
1054 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1055 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1056 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1057 UInt size;
1058 size = i->ARMin.NUnary.size;
1059 if (size & 0x40)
1060 return "64";
1061 if (size & 0x20)
1062 return "32";
1063 if (size & 0x10)
1064 return "16";
1065 if (size & 0x08)
1066 return "8";
1067 vpanic("showARMNeonDataSize");
1069 return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1070 case ARMin_NUnaryS:
1071 if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1072 int size;
1073 size = i->ARMin.NUnaryS.size;
1074 if ((size & 1) == 1)
1075 return "8";
1076 if ((size & 3) == 2)
1077 return "16";
1078 if ((size & 7) == 4)
1079 return "32";
1080 vpanic("showARMNeonDataSize");
1082 return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1083 case ARMin_NShift:
1084 return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1085 case ARMin_NDual:
1086 return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1087 default:
1088 vpanic("showARMNeonDataSize");
1092 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1093 HReg dst, HReg argL, ARMRI84* argR ) {
1094 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1095 i->tag = ARMin_Alu;
1096 i->ARMin.Alu.op = op;
1097 i->ARMin.Alu.dst = dst;
1098 i->ARMin.Alu.argL = argL;
1099 i->ARMin.Alu.argR = argR;
1100 return i;
1102 ARMInstr* ARMInstr_Shift ( ARMShiftOp op,
1103 HReg dst, HReg argL, ARMRI5* argR ) {
1104 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1105 i->tag = ARMin_Shift;
1106 i->ARMin.Shift.op = op;
1107 i->ARMin.Shift.dst = dst;
1108 i->ARMin.Shift.argL = argL;
1109 i->ARMin.Shift.argR = argR;
1110 return i;
1112 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1113 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1114 i->tag = ARMin_Unary;
1115 i->ARMin.Unary.op = op;
1116 i->ARMin.Unary.dst = dst;
1117 i->ARMin.Unary.src = src;
1118 return i;
1120 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1121 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1122 i->tag = ARMin_CmpOrTst;
1123 i->ARMin.CmpOrTst.isCmp = isCmp;
1124 i->ARMin.CmpOrTst.argL = argL;
1125 i->ARMin.CmpOrTst.argR = argR;
1126 return i;
1128 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1129 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1130 i->tag = ARMin_Mov;
1131 i->ARMin.Mov.dst = dst;
1132 i->ARMin.Mov.src = src;
1133 return i;
1135 ARMInstr* ARMInstr_Imm32 ( HReg dst, UInt imm32 ) {
1136 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1137 i->tag = ARMin_Imm32;
1138 i->ARMin.Imm32.dst = dst;
1139 i->ARMin.Imm32.imm32 = imm32;
1140 return i;
1142 ARMInstr* ARMInstr_LdSt32 ( ARMCondCode cc,
1143 Bool isLoad, HReg rD, ARMAMode1* amode ) {
1144 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1145 i->tag = ARMin_LdSt32;
1146 i->ARMin.LdSt32.cc = cc;
1147 i->ARMin.LdSt32.isLoad = isLoad;
1148 i->ARMin.LdSt32.rD = rD;
1149 i->ARMin.LdSt32.amode = amode;
1150 vassert(cc != ARMcc_NV);
1151 return i;
1153 ARMInstr* ARMInstr_LdSt16 ( ARMCondCode cc,
1154 Bool isLoad, Bool signedLoad,
1155 HReg rD, ARMAMode2* amode ) {
1156 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1157 i->tag = ARMin_LdSt16;
1158 i->ARMin.LdSt16.cc = cc;
1159 i->ARMin.LdSt16.isLoad = isLoad;
1160 i->ARMin.LdSt16.signedLoad = signedLoad;
1161 i->ARMin.LdSt16.rD = rD;
1162 i->ARMin.LdSt16.amode = amode;
1163 vassert(cc != ARMcc_NV);
1164 return i;
1166 ARMInstr* ARMInstr_LdSt8U ( ARMCondCode cc,
1167 Bool isLoad, HReg rD, ARMAMode1* amode ) {
1168 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1169 i->tag = ARMin_LdSt8U;
1170 i->ARMin.LdSt8U.cc = cc;
1171 i->ARMin.LdSt8U.isLoad = isLoad;
1172 i->ARMin.LdSt8U.rD = rD;
1173 i->ARMin.LdSt8U.amode = amode;
1174 vassert(cc != ARMcc_NV);
1175 return i;
1177 ARMInstr* ARMInstr_Ld8S ( ARMCondCode cc, HReg rD, ARMAMode2* amode ) {
1178 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1179 i->tag = ARMin_Ld8S;
1180 i->ARMin.Ld8S.cc = cc;
1181 i->ARMin.Ld8S.rD = rD;
1182 i->ARMin.Ld8S.amode = amode;
1183 vassert(cc != ARMcc_NV);
1184 return i;
1186 ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
1187 ARMCondCode cond, Bool toFastEP ) {
1188 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1189 i->tag = ARMin_XDirect;
1190 i->ARMin.XDirect.dstGA = dstGA;
1191 i->ARMin.XDirect.amR15T = amR15T;
1192 i->ARMin.XDirect.cond = cond;
1193 i->ARMin.XDirect.toFastEP = toFastEP;
1194 return i;
1196 ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
1197 ARMCondCode cond ) {
1198 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1199 i->tag = ARMin_XIndir;
1200 i->ARMin.XIndir.dstGA = dstGA;
1201 i->ARMin.XIndir.amR15T = amR15T;
1202 i->ARMin.XIndir.cond = cond;
1203 return i;
1205 ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
1206 ARMCondCode cond, IRJumpKind jk ) {
1207 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1208 i->tag = ARMin_XAssisted;
1209 i->ARMin.XAssisted.dstGA = dstGA;
1210 i->ARMin.XAssisted.amR15T = amR15T;
1211 i->ARMin.XAssisted.cond = cond;
1212 i->ARMin.XAssisted.jk = jk;
1213 return i;
1215 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1216 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1217 i->tag = ARMin_CMov;
1218 i->ARMin.CMov.cond = cond;
1219 i->ARMin.CMov.dst = dst;
1220 i->ARMin.CMov.src = src;
1221 vassert(cond != ARMcc_AL);
1222 return i;
1224 ARMInstr* ARMInstr_Call ( ARMCondCode cond, Addr32 target, Int nArgRegs,
1225 RetLoc rloc ) {
1226 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1227 i->tag = ARMin_Call;
1228 i->ARMin.Call.cond = cond;
1229 i->ARMin.Call.target = target;
1230 i->ARMin.Call.nArgRegs = nArgRegs;
1231 i->ARMin.Call.rloc = rloc;
1232 vassert(is_sane_RetLoc(rloc));
1233 return i;
1235 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
1236 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1237 i->tag = ARMin_Mul;
1238 i->ARMin.Mul.op = op;
1239 return i;
1241 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1242 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1243 i->tag = ARMin_LdrEX;
1244 i->ARMin.LdrEX.szB = szB;
1245 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1246 return i;
1248 ARMInstr* ARMInstr_StrEX ( Int szB ) {
1249 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1250 i->tag = ARMin_StrEX;
1251 i->ARMin.StrEX.szB = szB;
1252 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1253 return i;
1255 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1256 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1257 i->tag = ARMin_VLdStD;
1258 i->ARMin.VLdStD.isLoad = isLoad;
1259 i->ARMin.VLdStD.dD = dD;
1260 i->ARMin.VLdStD.amode = am;
1261 return i;
1263 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1264 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1265 i->tag = ARMin_VLdStS;
1266 i->ARMin.VLdStS.isLoad = isLoad;
1267 i->ARMin.VLdStS.fD = fD;
1268 i->ARMin.VLdStS.amode = am;
1269 return i;
1271 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1272 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1273 i->tag = ARMin_VAluD;
1274 i->ARMin.VAluD.op = op;
1275 i->ARMin.VAluD.dst = dst;
1276 i->ARMin.VAluD.argL = argL;
1277 i->ARMin.VAluD.argR = argR;
1278 return i;
1280 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1281 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1282 i->tag = ARMin_VAluS;
1283 i->ARMin.VAluS.op = op;
1284 i->ARMin.VAluS.dst = dst;
1285 i->ARMin.VAluS.argL = argL;
1286 i->ARMin.VAluS.argR = argR;
1287 return i;
1289 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1290 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1291 i->tag = ARMin_VUnaryD;
1292 i->ARMin.VUnaryD.op = op;
1293 i->ARMin.VUnaryD.dst = dst;
1294 i->ARMin.VUnaryD.src = src;
1295 return i;
1297 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1298 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1299 i->tag = ARMin_VUnaryS;
1300 i->ARMin.VUnaryS.op = op;
1301 i->ARMin.VUnaryS.dst = dst;
1302 i->ARMin.VUnaryS.src = src;
1303 return i;
1305 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1306 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1307 i->tag = ARMin_VCmpD;
1308 i->ARMin.VCmpD.argL = argL;
1309 i->ARMin.VCmpD.argR = argR;
1310 return i;
1312 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1313 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1314 i->tag = ARMin_VCMovD;
1315 i->ARMin.VCMovD.cond = cond;
1316 i->ARMin.VCMovD.dst = dst;
1317 i->ARMin.VCMovD.src = src;
1318 vassert(cond != ARMcc_AL);
1319 return i;
1321 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1322 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1323 i->tag = ARMin_VCMovS;
1324 i->ARMin.VCMovS.cond = cond;
1325 i->ARMin.VCMovS.dst = dst;
1326 i->ARMin.VCMovS.src = src;
1327 vassert(cond != ARMcc_AL);
1328 return i;
1330 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1331 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1332 i->tag = ARMin_VCvtSD;
1333 i->ARMin.VCvtSD.sToD = sToD;
1334 i->ARMin.VCvtSD.dst = dst;
1335 i->ARMin.VCvtSD.src = src;
1336 return i;
1338 ARMInstr* ARMInstr_VXferQ ( Bool toQ, HReg qD, HReg dHi, HReg dLo ) {
1339 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1340 i->tag = ARMin_VXferQ;
1341 i->ARMin.VXferQ.toQ = toQ;
1342 i->ARMin.VXferQ.qD = qD;
1343 i->ARMin.VXferQ.dHi = dHi;
1344 i->ARMin.VXferQ.dLo = dLo;
1345 return i;
1347 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1348 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1349 i->tag = ARMin_VXferD;
1350 i->ARMin.VXferD.toD = toD;
1351 i->ARMin.VXferD.dD = dD;
1352 i->ARMin.VXferD.rHi = rHi;
1353 i->ARMin.VXferD.rLo = rLo;
1354 return i;
1356 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1357 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1358 i->tag = ARMin_VXferS;
1359 i->ARMin.VXferS.toS = toS;
1360 i->ARMin.VXferS.fD = fD;
1361 i->ARMin.VXferS.rLo = rLo;
1362 return i;
1364 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1365 HReg dst, HReg src ) {
1366 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1367 i->tag = ARMin_VCvtID;
1368 i->ARMin.VCvtID.iToD = iToD;
1369 i->ARMin.VCvtID.syned = syned;
1370 i->ARMin.VCvtID.dst = dst;
1371 i->ARMin.VCvtID.src = src;
1372 return i;
1374 ARMInstr* ARMInstr_VRIntR ( Bool isF64, HReg dst, HReg src )
1376 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1377 i->tag = ARMin_VRIntR;
1378 i->ARMin.VRIntR.isF64 = isF64;
1379 i->ARMin.VRIntR.dst = dst ;
1380 i->ARMin.VRIntR.src = src;
1381 return i;
1383 ARMInstr* ARMInstr_VMinMaxNum ( Bool isF64, Bool isMax,
1384 HReg dst, HReg srcL, HReg srcR )
1386 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1387 i->tag = ARMin_VMinMaxNum;
1388 i->ARMin.VMinMaxNum.isF64 = isF64;
1389 i->ARMin.VMinMaxNum.isMax = isMax;
1390 i->ARMin.VMinMaxNum.dst = dst ;
1391 i->ARMin.VMinMaxNum.srcL = srcL;
1392 i->ARMin.VMinMaxNum.srcR = srcR;
1393 return i;
1395 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1396 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1397 i->tag = ARMin_FPSCR;
1398 i->ARMin.FPSCR.toFPSCR = toFPSCR;
1399 i->ARMin.FPSCR.iReg = iReg;
1400 return i;
1402 ARMInstr* ARMInstr_MFence ( void ) {
1403 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1404 i->tag = ARMin_MFence;
1405 return i;
1407 ARMInstr* ARMInstr_CLREX( void ) {
1408 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1409 i->tag = ARMin_CLREX;
1410 return i;
1413 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1414 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1415 i->tag = ARMin_NLdStQ;
1416 i->ARMin.NLdStQ.isLoad = isLoad;
1417 i->ARMin.NLdStQ.dQ = dQ;
1418 i->ARMin.NLdStQ.amode = amode;
1419 return i;
1422 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1423 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1424 i->tag = ARMin_NLdStD;
1425 i->ARMin.NLdStD.isLoad = isLoad;
1426 i->ARMin.NLdStD.dD = dD;
1427 i->ARMin.NLdStD.amode = amode;
1428 return i;
1431 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1432 UInt size, Bool Q ) {
1433 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1434 i->tag = ARMin_NUnary;
1435 i->ARMin.NUnary.op = op;
1436 i->ARMin.NUnary.src = nQ;
1437 i->ARMin.NUnary.dst = dQ;
1438 i->ARMin.NUnary.size = size;
1439 i->ARMin.NUnary.Q = Q;
1440 return i;
1443 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1444 UInt size, Bool Q ) {
1445 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1446 i->tag = ARMin_NUnaryS;
1447 i->ARMin.NUnaryS.op = op;
1448 i->ARMin.NUnaryS.src = src;
1449 i->ARMin.NUnaryS.dst = dst;
1450 i->ARMin.NUnaryS.size = size;
1451 i->ARMin.NUnaryS.Q = Q;
1452 return i;
1455 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1456 UInt size, Bool Q ) {
1457 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1458 i->tag = ARMin_NDual;
1459 i->ARMin.NDual.op = op;
1460 i->ARMin.NDual.arg1 = nQ;
1461 i->ARMin.NDual.arg2 = mQ;
1462 i->ARMin.NDual.size = size;
1463 i->ARMin.NDual.Q = Q;
1464 return i;
1467 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1468 HReg dst, HReg argL, HReg argR,
1469 UInt size, Bool Q ) {
1470 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1471 i->tag = ARMin_NBinary;
1472 i->ARMin.NBinary.op = op;
1473 i->ARMin.NBinary.argL = argL;
1474 i->ARMin.NBinary.argR = argR;
1475 i->ARMin.NBinary.dst = dst;
1476 i->ARMin.NBinary.size = size;
1477 i->ARMin.NBinary.Q = Q;
1478 return i;
1481 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1482 ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1483 i->tag = ARMin_NeonImm;
1484 i->ARMin.NeonImm.dst = dst;
1485 i->ARMin.NeonImm.imm = imm;
1486 return i;
1489 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1490 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1491 i->tag = ARMin_NCMovQ;
1492 i->ARMin.NCMovQ.cond = cond;
1493 i->ARMin.NCMovQ.dst = dst;
1494 i->ARMin.NCMovQ.src = src;
1495 vassert(cond != ARMcc_AL);
1496 return i;
1499 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1500 HReg dst, HReg argL, HReg argR,
1501 UInt size, Bool Q ) {
1502 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1503 i->tag = ARMin_NShift;
1504 i->ARMin.NShift.op = op;
1505 i->ARMin.NShift.argL = argL;
1506 i->ARMin.NShift.argR = argR;
1507 i->ARMin.NShift.dst = dst;
1508 i->ARMin.NShift.size = size;
1509 i->ARMin.NShift.Q = Q;
1510 return i;
1513 ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
1515 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1516 i->tag = ARMin_NShl64;
1517 i->ARMin.NShl64.dst = dst;
1518 i->ARMin.NShl64.src = src;
1519 i->ARMin.NShl64.amt = amt;
1520 vassert(amt >= 1 && amt <= 63);
1521 return i;
1524 /* Helper copy-pasted from isel.c */
1525 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1527 UInt i;
1528 for (i = 0; i < 16; i++) {
1529 if (0 == (u & 0xFFFFFF00)) {
1530 *u8 = u;
1531 *u4 = i;
1532 return True;
1534 u = ROR32(u, 30);
1536 vassert(i == 16);
1537 return False;
1540 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1541 UInt u8, u4;
1542 ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1543 /* Try to generate single ADD if possible */
1544 if (fitsIn8x4(&u8, &u4, imm32)) {
1545 i->tag = ARMin_Alu;
1546 i->ARMin.Alu.op = ARMalu_ADD;
1547 i->ARMin.Alu.dst = rD;
1548 i->ARMin.Alu.argL = rN;
1549 i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1550 } else {
1551 i->tag = ARMin_Add32;
1552 i->ARMin.Add32.rD = rD;
1553 i->ARMin.Add32.rN = rN;
1554 i->ARMin.Add32.imm32 = imm32;
1556 return i;
1559 ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
1560 ARMAMode1* amFailAddr ) {
1561 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1562 i->tag = ARMin_EvCheck;
1563 i->ARMin.EvCheck.amCounter = amCounter;
1564 i->ARMin.EvCheck.amFailAddr = amFailAddr;
1565 return i;
1568 ARMInstr* ARMInstr_ProfInc ( void ) {
1569 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1570 i->tag = ARMin_ProfInc;
1571 return i;
1574 /* ... */
1576 void ppARMInstr ( const ARMInstr* i ) {
1577 switch (i->tag) {
1578 case ARMin_Alu:
1579 vex_printf("%-4s ", showARMAluOp(i->ARMin.Alu.op));
1580 ppHRegARM(i->ARMin.Alu.dst);
1581 vex_printf(", ");
1582 ppHRegARM(i->ARMin.Alu.argL);
1583 vex_printf(", ");
1584 ppARMRI84(i->ARMin.Alu.argR);
1585 return;
1586 case ARMin_Shift:
1587 vex_printf("%s ", showARMShiftOp(i->ARMin.Shift.op));
1588 ppHRegARM(i->ARMin.Shift.dst);
1589 vex_printf(", ");
1590 ppHRegARM(i->ARMin.Shift.argL);
1591 vex_printf(", ");
1592 ppARMRI5(i->ARMin.Shift.argR);
1593 return;
1594 case ARMin_Unary:
1595 vex_printf("%s ", showARMUnaryOp(i->ARMin.Unary.op));
1596 ppHRegARM(i->ARMin.Unary.dst);
1597 vex_printf(", ");
1598 ppHRegARM(i->ARMin.Unary.src);
1599 return;
1600 case ARMin_CmpOrTst:
1601 vex_printf("%s ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1602 ppHRegARM(i->ARMin.CmpOrTst.argL);
1603 vex_printf(", ");
1604 ppARMRI84(i->ARMin.CmpOrTst.argR);
1605 return;
1606 case ARMin_Mov:
1607 vex_printf("mov ");
1608 ppHRegARM(i->ARMin.Mov.dst);
1609 vex_printf(", ");
1610 ppARMRI84(i->ARMin.Mov.src);
1611 return;
1612 case ARMin_Imm32:
1613 vex_printf("imm ");
1614 ppHRegARM(i->ARMin.Imm32.dst);
1615 vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1616 return;
1617 case ARMin_LdSt32:
1618 if (i->ARMin.LdSt32.isLoad) {
1619 vex_printf("ldr%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? " "
1620 : showARMCondCode(i->ARMin.LdSt32.cc));
1621 ppHRegARM(i->ARMin.LdSt32.rD);
1622 vex_printf(", ");
1623 ppARMAMode1(i->ARMin.LdSt32.amode);
1624 } else {
1625 vex_printf("str%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? " "
1626 : showARMCondCode(i->ARMin.LdSt32.cc));
1627 ppARMAMode1(i->ARMin.LdSt32.amode);
1628 vex_printf(", ");
1629 ppHRegARM(i->ARMin.LdSt32.rD);
1631 return;
1632 case ARMin_LdSt16:
1633 if (i->ARMin.LdSt16.isLoad) {
1634 vex_printf("%s%s%s",
1635 i->ARMin.LdSt16.signedLoad ? "ldrsh" : "ldrh",
1636 i->ARMin.LdSt16.cc == ARMcc_AL ? " "
1637 : showARMCondCode(i->ARMin.LdSt16.cc),
1638 i->ARMin.LdSt16.signedLoad ? " " : " ");
1639 ppHRegARM(i->ARMin.LdSt16.rD);
1640 vex_printf(", ");
1641 ppARMAMode2(i->ARMin.LdSt16.amode);
1642 } else {
1643 vex_printf("strh%s ",
1644 i->ARMin.LdSt16.cc == ARMcc_AL ? " "
1645 : showARMCondCode(i->ARMin.LdSt16.cc));
1646 ppARMAMode2(i->ARMin.LdSt16.amode);
1647 vex_printf(", ");
1648 ppHRegARM(i->ARMin.LdSt16.rD);
1650 return;
1651 case ARMin_LdSt8U:
1652 if (i->ARMin.LdSt8U.isLoad) {
1653 vex_printf("ldrb%s ", i->ARMin.LdSt8U.cc == ARMcc_AL ? " "
1654 : showARMCondCode(i->ARMin.LdSt8U.cc));
1655 ppHRegARM(i->ARMin.LdSt8U.rD);
1656 vex_printf(", ");
1657 ppARMAMode1(i->ARMin.LdSt8U.amode);
1658 } else {
1659 vex_printf("strb%s ", i->ARMin.LdSt8U.cc == ARMcc_AL ? " "
1660 : showARMCondCode(i->ARMin.LdSt8U.cc));
1661 ppARMAMode1(i->ARMin.LdSt8U.amode);
1662 vex_printf(", ");
1663 ppHRegARM(i->ARMin.LdSt8U.rD);
1665 return;
1666 case ARMin_Ld8S:
1667 vex_printf("ldrsb%s ", i->ARMin.Ld8S.cc == ARMcc_AL ? " "
1668 : showARMCondCode(i->ARMin.Ld8S.cc));
1669 ppARMAMode2(i->ARMin.Ld8S.amode);
1670 vex_printf(", ");
1671 ppHRegARM(i->ARMin.Ld8S.rD);
1672 return;
1673 case ARMin_XDirect:
1674 vex_printf("(xDirect) ");
1675 vex_printf("if (%%cpsr.%s) { ",
1676 showARMCondCode(i->ARMin.XDirect.cond));
1677 vex_printf("movw r12,0x%x; ",
1678 (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
1679 vex_printf("movt r12,0x%x; ",
1680 (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
1681 vex_printf("str r12,");
1682 ppARMAMode1(i->ARMin.XDirect.amR15T);
1683 vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
1684 i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1685 vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
1686 i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1687 vex_printf("blx r12 }");
1688 return;
1689 case ARMin_XIndir:
1690 vex_printf("(xIndir) ");
1691 vex_printf("if (%%cpsr.%s) { ",
1692 showARMCondCode(i->ARMin.XIndir.cond));
1693 vex_printf("str ");
1694 ppHRegARM(i->ARMin.XIndir.dstGA);
1695 vex_printf(",");
1696 ppARMAMode1(i->ARMin.XIndir.amR15T);
1697 vex_printf("; movw r12,LO16($disp_cp_xindir); ");
1698 vex_printf("movt r12,HI16($disp_cp_xindir); ");
1699 vex_printf("blx r12 }");
1700 return;
1701 case ARMin_XAssisted:
1702 vex_printf("(xAssisted) ");
1703 vex_printf("if (%%cpsr.%s) { ",
1704 showARMCondCode(i->ARMin.XAssisted.cond));
1705 vex_printf("str ");
1706 ppHRegARM(i->ARMin.XAssisted.dstGA);
1707 vex_printf(",");
1708 ppARMAMode1(i->ARMin.XAssisted.amR15T);
1709 vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
1710 (Int)i->ARMin.XAssisted.jk);
1711 vex_printf("movw r12,LO16($disp_cp_xassisted); ");
1712 vex_printf("movt r12,HI16($disp_cp_xassisted); ");
1713 vex_printf("blx r12 }");
1714 return;
1715 case ARMin_CMov:
1716 vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1717 ppHRegARM(i->ARMin.CMov.dst);
1718 vex_printf(", ");
1719 ppARMRI84(i->ARMin.CMov.src);
1720 return;
1721 case ARMin_Call:
1722 vex_printf("call%s ",
1723 i->ARMin.Call.cond==ARMcc_AL
1724 ? "" : showARMCondCode(i->ARMin.Call.cond));
1725 vex_printf("0x%x [nArgRegs=%d, ",
1726 i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1727 ppRetLoc(i->ARMin.Call.rloc);
1728 vex_printf("]");
1729 return;
1730 case ARMin_Mul:
1731 vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1732 if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1733 vex_printf("r0, r2, r3");
1734 } else {
1735 vex_printf("r1:r0, r2, r3");
1737 return;
1738 case ARMin_LdrEX: {
1739 const HChar* sz = "";
1740 switch (i->ARMin.LdrEX.szB) {
1741 case 1: sz = "b"; break; case 2: sz = "h"; break;
1742 case 8: sz = "d"; break; case 4: break;
1743 default: vassert(0);
1745 vex_printf("ldrex%s %sr2, [r4]",
1746 sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
1747 return;
1749 case ARMin_StrEX: {
1750 const HChar* sz = "";
1751 switch (i->ARMin.StrEX.szB) {
1752 case 1: sz = "b"; break; case 2: sz = "h"; break;
1753 case 8: sz = "d"; break; case 4: break;
1754 default: vassert(0);
1756 vex_printf("strex%s r0, %sr2, [r4]",
1757 sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
1758 return;
1760 case ARMin_VLdStD:
1761 if (i->ARMin.VLdStD.isLoad) {
1762 vex_printf("fldd ");
1763 ppHRegARM(i->ARMin.VLdStD.dD);
1764 vex_printf(", ");
1765 ppARMAModeV(i->ARMin.VLdStD.amode);
1766 } else {
1767 vex_printf("fstd ");
1768 ppARMAModeV(i->ARMin.VLdStD.amode);
1769 vex_printf(", ");
1770 ppHRegARM(i->ARMin.VLdStD.dD);
1772 return;
1773 case ARMin_VLdStS:
1774 if (i->ARMin.VLdStS.isLoad) {
1775 vex_printf("flds ");
1776 ppHRegARM(i->ARMin.VLdStS.fD);
1777 vex_printf(", ");
1778 ppARMAModeV(i->ARMin.VLdStS.amode);
1779 } else {
1780 vex_printf("fsts ");
1781 ppARMAModeV(i->ARMin.VLdStS.amode);
1782 vex_printf(", ");
1783 ppHRegARM(i->ARMin.VLdStS.fD);
1785 return;
1786 case ARMin_VAluD:
1787 vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1788 ppHRegARM(i->ARMin.VAluD.dst);
1789 vex_printf(", ");
1790 ppHRegARM(i->ARMin.VAluD.argL);
1791 vex_printf(", ");
1792 ppHRegARM(i->ARMin.VAluD.argR);
1793 return;
1794 case ARMin_VAluS:
1795 vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1796 ppHRegARM(i->ARMin.VAluS.dst);
1797 vex_printf(", ");
1798 ppHRegARM(i->ARMin.VAluS.argL);
1799 vex_printf(", ");
1800 ppHRegARM(i->ARMin.VAluS.argR);
1801 return;
1802 case ARMin_VUnaryD:
1803 vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1804 ppHRegARM(i->ARMin.VUnaryD.dst);
1805 vex_printf(", ");
1806 ppHRegARM(i->ARMin.VUnaryD.src);
1807 return;
1808 case ARMin_VUnaryS:
1809 vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1810 ppHRegARM(i->ARMin.VUnaryS.dst);
1811 vex_printf(", ");
1812 ppHRegARM(i->ARMin.VUnaryS.src);
1813 return;
1814 case ARMin_VCmpD:
1815 vex_printf("fcmpd ");
1816 ppHRegARM(i->ARMin.VCmpD.argL);
1817 vex_printf(", ");
1818 ppHRegARM(i->ARMin.VCmpD.argR);
1819 vex_printf(" ; fmstat");
1820 return;
1821 case ARMin_VCMovD:
1822 vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1823 ppHRegARM(i->ARMin.VCMovD.dst);
1824 vex_printf(", ");
1825 ppHRegARM(i->ARMin.VCMovD.src);
1826 return;
1827 case ARMin_VCMovS:
1828 vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1829 ppHRegARM(i->ARMin.VCMovS.dst);
1830 vex_printf(", ");
1831 ppHRegARM(i->ARMin.VCMovS.src);
1832 return;
1833 case ARMin_VCvtSD:
1834 vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1835 ppHRegARM(i->ARMin.VCvtSD.dst);
1836 vex_printf(", ");
1837 ppHRegARM(i->ARMin.VCvtSD.src);
1838 return;
1839 case ARMin_VXferQ:
1840 if (i->ARMin.VXferQ.toQ) {
1841 vex_printf("vmov ");
1842 ppHRegARM(i->ARMin.VXferQ.qD);
1843 vex_printf("-lo64, ");
1844 ppHRegARM(i->ARMin.VXferQ.dLo);
1845 vex_printf(" ; vmov ");
1846 ppHRegARM(i->ARMin.VXferQ.qD);
1847 vex_printf("-hi64, ");
1848 ppHRegARM(i->ARMin.VXferQ.dHi);
1849 } else {
1850 vex_printf("vmov ");
1851 ppHRegARM(i->ARMin.VXferQ.dLo);
1852 vex_printf(", ");
1853 ppHRegARM(i->ARMin.VXferQ.qD);
1854 vex_printf("-lo64");
1855 vex_printf(" ; vmov ");
1856 ppHRegARM(i->ARMin.VXferQ.dHi);
1857 vex_printf(", ");
1858 ppHRegARM(i->ARMin.VXferQ.qD);
1859 vex_printf("-hi64");
1861 return;
1862 case ARMin_VXferD:
1863 vex_printf("vmov ");
1864 if (i->ARMin.VXferD.toD) {
1865 ppHRegARM(i->ARMin.VXferD.dD);
1866 vex_printf(", ");
1867 ppHRegARM(i->ARMin.VXferD.rLo);
1868 vex_printf(", ");
1869 ppHRegARM(i->ARMin.VXferD.rHi);
1870 } else {
1871 ppHRegARM(i->ARMin.VXferD.rLo);
1872 vex_printf(", ");
1873 ppHRegARM(i->ARMin.VXferD.rHi);
1874 vex_printf(", ");
1875 ppHRegARM(i->ARMin.VXferD.dD);
1877 return;
1878 case ARMin_VXferS:
1879 vex_printf("vmov ");
1880 if (i->ARMin.VXferS.toS) {
1881 ppHRegARM(i->ARMin.VXferS.fD);
1882 vex_printf(", ");
1883 ppHRegARM(i->ARMin.VXferS.rLo);
1884 } else {
1885 ppHRegARM(i->ARMin.VXferS.rLo);
1886 vex_printf(", ");
1887 ppHRegARM(i->ARMin.VXferS.fD);
1889 return;
1890 case ARMin_VCvtID: {
1891 const HChar* nm = "?";
1892 if (i->ARMin.VCvtID.iToD) {
1893 nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1894 } else {
1895 nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1897 vex_printf("%s ", nm);
1898 ppHRegARM(i->ARMin.VCvtID.dst);
1899 vex_printf(", ");
1900 ppHRegARM(i->ARMin.VCvtID.src);
1901 return;
1903 case ARMin_VRIntR: {
1904 const HChar* sz = i->ARMin.VRIntR.isF64 ? "f64" : "f32";
1905 vex_printf("vrintr.%s.%s ", sz, sz);
1906 ppHRegARM(i->ARMin.VRIntR.dst);
1907 vex_printf(", ");
1908 ppHRegARM(i->ARMin.VRIntR.src);
1909 return;
1911 case ARMin_VMinMaxNum: {
1912 const HChar* sz = i->ARMin.VMinMaxNum.isF64 ? "f64" : "f32";
1913 const HChar* nm = i->ARMin.VMinMaxNum.isMax ? "vmaxnm" : "vminnm";
1914 vex_printf("%s.%s ", nm, sz);
1915 ppHRegARM(i->ARMin.VMinMaxNum.dst);
1916 vex_printf(", ");
1917 ppHRegARM(i->ARMin.VMinMaxNum.srcL);
1918 vex_printf(", ");
1919 ppHRegARM(i->ARMin.VMinMaxNum.srcR);
1920 return;
1922 case ARMin_FPSCR:
1923 if (i->ARMin.FPSCR.toFPSCR) {
1924 vex_printf("fmxr fpscr, ");
1925 ppHRegARM(i->ARMin.FPSCR.iReg);
1926 } else {
1927 vex_printf("fmrx ");
1928 ppHRegARM(i->ARMin.FPSCR.iReg);
1929 vex_printf(", fpscr");
1931 return;
1932 case ARMin_MFence:
1933 vex_printf("(mfence) dsb sy; dmb sy; isb");
1934 return;
1935 case ARMin_CLREX:
1936 vex_printf("clrex");
1937 return;
1938 case ARMin_NLdStQ:
1939 if (i->ARMin.NLdStQ.isLoad)
1940 vex_printf("vld1.32 {");
1941 else
1942 vex_printf("vst1.32 {");
1943 ppHRegARM(i->ARMin.NLdStQ.dQ);
1944 vex_printf("} ");
1945 ppARMAModeN(i->ARMin.NLdStQ.amode);
1946 return;
1947 case ARMin_NLdStD:
1948 if (i->ARMin.NLdStD.isLoad)
1949 vex_printf("vld1.32 {");
1950 else
1951 vex_printf("vst1.32 {");
1952 ppHRegARM(i->ARMin.NLdStD.dD);
1953 vex_printf("} ");
1954 ppARMAModeN(i->ARMin.NLdStD.amode);
1955 return;
1956 case ARMin_NUnary:
1957 vex_printf("%s%s%s ",
1958 showARMNeonUnOp(i->ARMin.NUnary.op),
1959 showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1960 showARMNeonDataSize(i));
1961 ppHRegARM(i->ARMin.NUnary.dst);
1962 vex_printf(", ");
1963 ppHRegARM(i->ARMin.NUnary.src);
1964 if (i->ARMin.NUnary.op == ARMneon_EQZ)
1965 vex_printf(", #0");
1966 if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1967 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1968 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1969 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1970 vex_printf(", #%u", i->ARMin.NUnary.size);
1972 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1973 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1974 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1975 UInt size;
1976 size = i->ARMin.NUnary.size;
1977 if (size & 0x40) {
1978 vex_printf(", #%u", size - 64);
1979 } else if (size & 0x20) {
1980 vex_printf(", #%u", size - 32);
1981 } else if (size & 0x10) {
1982 vex_printf(", #%u", size - 16);
1983 } else if (size & 0x08) {
1984 vex_printf(", #%u", size - 8);
1987 return;
1988 case ARMin_NUnaryS:
1989 vex_printf("%s%s%s ",
1990 showARMNeonUnOpS(i->ARMin.NUnaryS.op),
1991 showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
1992 showARMNeonDataSize(i));
1993 ppARMNRS(i->ARMin.NUnaryS.dst);
1994 vex_printf(", ");
1995 ppARMNRS(i->ARMin.NUnaryS.src);
1996 return;
1997 case ARMin_NShift:
1998 vex_printf("%s%s%s ",
1999 showARMNeonShiftOp(i->ARMin.NShift.op),
2000 showARMNeonShiftOpDataType(i->ARMin.NShift.op),
2001 showARMNeonDataSize(i));
2002 ppHRegARM(i->ARMin.NShift.dst);
2003 vex_printf(", ");
2004 ppHRegARM(i->ARMin.NShift.argL);
2005 vex_printf(", ");
2006 ppHRegARM(i->ARMin.NShift.argR);
2007 return;
2008 case ARMin_NShl64:
2009 vex_printf("vshl.i64 ");
2010 ppHRegARM(i->ARMin.NShl64.dst);
2011 vex_printf(", ");
2012 ppHRegARM(i->ARMin.NShl64.src);
2013 vex_printf(", #%u", i->ARMin.NShl64.amt);
2014 return;
2015 case ARMin_NDual:
2016 vex_printf("%s%s%s ",
2017 showARMNeonDualOp(i->ARMin.NDual.op),
2018 showARMNeonDualOpDataType(i->ARMin.NDual.op),
2019 showARMNeonDataSize(i));
2020 ppHRegARM(i->ARMin.NDual.arg1);
2021 vex_printf(", ");
2022 ppHRegARM(i->ARMin.NDual.arg2);
2023 return;
2024 case ARMin_NBinary:
2025 vex_printf("%s%s%s",
2026 showARMNeonBinOp(i->ARMin.NBinary.op),
2027 showARMNeonBinOpDataType(i->ARMin.NBinary.op),
2028 showARMNeonDataSize(i));
2029 vex_printf(" ");
2030 ppHRegARM(i->ARMin.NBinary.dst);
2031 vex_printf(", ");
2032 ppHRegARM(i->ARMin.NBinary.argL);
2033 vex_printf(", ");
2034 ppHRegARM(i->ARMin.NBinary.argR);
2035 return;
2036 case ARMin_NeonImm:
2037 vex_printf("vmov ");
2038 ppHRegARM(i->ARMin.NeonImm.dst);
2039 vex_printf(", ");
2040 ppARMNImm(i->ARMin.NeonImm.imm);
2041 return;
2042 case ARMin_NCMovQ:
2043 vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
2044 ppHRegARM(i->ARMin.NCMovQ.dst);
2045 vex_printf(", ");
2046 ppHRegARM(i->ARMin.NCMovQ.src);
2047 return;
2048 case ARMin_Add32:
2049 vex_printf("add32 ");
2050 ppHRegARM(i->ARMin.Add32.rD);
2051 vex_printf(", ");
2052 ppHRegARM(i->ARMin.Add32.rN);
2053 vex_printf(", ");
2054 vex_printf("%u", i->ARMin.Add32.imm32);
2055 return;
2056 case ARMin_EvCheck:
2057 vex_printf("(evCheck) ldr r12,");
2058 ppARMAMode1(i->ARMin.EvCheck.amCounter);
2059 vex_printf("; subs r12,r12,$1; str r12,");
2060 ppARMAMode1(i->ARMin.EvCheck.amCounter);
2061 vex_printf("; bpl nofail; ldr r12,");
2062 ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
2063 vex_printf("; bx r12; nofail:");
2064 return;
2065 case ARMin_ProfInc:
2066 vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
2067 "movw r12,HI16($NotKnownYet); "
2068 "ldr r11,[r12]; "
2069 "adds r11,r11,$1; "
2070 "str r11,[r12]; "
2071 "ldr r11,[r12+4]; "
2072 "adc r11,r11,$0; "
2073 "str r11,[r12+4]");
2074 return;
2075 default:
2076 vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
2077 vpanic("ppARMInstr(1)");
2078 return;
2083 /* --------- Helpers for register allocation. --------- */
2085 void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
2087 vassert(mode64 == False);
2088 initHRegUsage(u);
2089 switch (i->tag) {
2090 case ARMin_Alu:
2091 addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
2092 addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
2093 addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
2094 return;
2095 case ARMin_Shift:
2096 addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
2097 addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
2098 addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
2099 return;
2100 case ARMin_Unary:
2101 addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
2102 addHRegUse(u, HRmRead, i->ARMin.Unary.src);
2103 return;
2104 case ARMin_CmpOrTst:
2105 addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
2106 addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
2107 return;
2108 case ARMin_Mov:
2109 addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
2110 addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
2112 if (i->ARMin.Mov.src->tag == ARMri84_R) {
2113 u->isRegRegMove = True;
2114 u->regMoveSrc = i->ARMin.Mov.src->ARMri84.R.reg;
2115 u->regMoveDst = i->ARMin.Mov.dst;
2117 return;
2118 case ARMin_Imm32:
2119 addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
2120 return;
2121 case ARMin_LdSt32:
2122 addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
2123 if (i->ARMin.LdSt32.isLoad) {
2124 addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
2125 if (i->ARMin.LdSt32.cc != ARMcc_AL)
2126 addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2127 } else {
2128 addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2130 return;
2131 case ARMin_LdSt16:
2132 addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
2133 if (i->ARMin.LdSt16.isLoad) {
2134 addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
2135 if (i->ARMin.LdSt16.cc != ARMcc_AL)
2136 addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2137 } else {
2138 addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2140 return;
2141 case ARMin_LdSt8U:
2142 addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
2143 if (i->ARMin.LdSt8U.isLoad) {
2144 addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
2145 if (i->ARMin.LdSt8U.cc != ARMcc_AL)
2146 addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2147 } else {
2148 addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2150 return;
2151 case ARMin_Ld8S:
2152 addRegUsage_ARMAMode2(u, i->ARMin.Ld8S.amode);
2153 addHRegUse(u, HRmWrite, i->ARMin.Ld8S.rD);
2154 if (i->ARMin.Ld8S.cc != ARMcc_AL)
2155 addHRegUse(u, HRmRead, i->ARMin.Ld8S.rD);
2156 return;
2157 /* XDirect/XIndir/XAssisted are also a bit subtle. They
2158 conditionally exit the block. Hence we only need to list (1)
2159 the registers that they read, and (2) the registers that they
2160 write in the case where the block is not exited. (2) is
2161 empty, hence only (1) is relevant here. */
2162 case ARMin_XDirect:
2163 addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
2164 return;
2165 case ARMin_XIndir:
2166 addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
2167 addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
2168 return;
2169 case ARMin_XAssisted:
2170 addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
2171 addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
2172 return;
2173 case ARMin_CMov:
2174 addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
2175 addHRegUse(u, HRmRead, i->ARMin.CMov.dst);
2176 addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
2177 return;
2178 case ARMin_Call:
2179 /* logic and comments copied/modified from x86 back end */
2180 /* This is a bit subtle. */
2181 /* First off, claim it trashes all the caller-saved regs
2182 which fall within the register allocator's jurisdiction.
2183 These I believe to be r0,1,2,3. If it turns out that r9
2184 is also caller-saved, then we'll have to add that here
2185 too. */
2186 addHRegUse(u, HRmWrite, hregARM_R0());
2187 addHRegUse(u, HRmWrite, hregARM_R1());
2188 addHRegUse(u, HRmWrite, hregARM_R2());
2189 addHRegUse(u, HRmWrite, hregARM_R3());
2190 /* Now we have to state any parameter-carrying registers
2191 which might be read. This depends on nArgRegs. */
2192 switch (i->ARMin.Call.nArgRegs) {
2193 case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
2194 case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
2195 case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
2196 case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
2197 case 0: break;
2198 default: vpanic("getRegUsage_ARM:Call:regparms");
2200 /* Finally, there is the issue that the insn trashes a
2201 register because the literal target address has to be
2202 loaded into a register. Fortunately, for the nArgRegs=
2203 0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
2204 this does not cause any further damage. For the
2205 nArgRegs=4 case, we'll have to choose another register
2206 arbitrarily since all the caller saved regs are used for
2207 parameters, and so we might as well choose r11.
2209 if (i->ARMin.Call.nArgRegs == 4)
2210 addHRegUse(u, HRmWrite, hregARM_R11());
2211 /* Upshot of this is that the assembler really must observe
2212 the here-stated convention of which register to use as an
2213 address temporary, depending on nArgRegs: 0==r0,
2214 1==r1, 2==r2, 3==r3, 4==r11 */
2215 return;
2216 case ARMin_Mul:
2217 addHRegUse(u, HRmRead, hregARM_R2());
2218 addHRegUse(u, HRmRead, hregARM_R3());
2219 addHRegUse(u, HRmWrite, hregARM_R0());
2220 if (i->ARMin.Mul.op != ARMmul_PLAIN)
2221 addHRegUse(u, HRmWrite, hregARM_R1());
2222 return;
2223 case ARMin_LdrEX:
2224 addHRegUse(u, HRmRead, hregARM_R4());
2225 addHRegUse(u, HRmWrite, hregARM_R2());
2226 if (i->ARMin.LdrEX.szB == 8)
2227 addHRegUse(u, HRmWrite, hregARM_R3());
2228 return;
2229 case ARMin_StrEX:
2230 addHRegUse(u, HRmRead, hregARM_R4());
2231 addHRegUse(u, HRmWrite, hregARM_R0());
2232 addHRegUse(u, HRmRead, hregARM_R2());
2233 if (i->ARMin.StrEX.szB == 8)
2234 addHRegUse(u, HRmRead, hregARM_R3());
2235 return;
2236 case ARMin_VLdStD:
2237 addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2238 if (i->ARMin.VLdStD.isLoad) {
2239 addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2240 } else {
2241 addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2243 return;
2244 case ARMin_VLdStS:
2245 addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2246 if (i->ARMin.VLdStS.isLoad) {
2247 addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2248 } else {
2249 addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2251 return;
2252 case ARMin_VAluD:
2253 addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2254 addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2255 addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2256 return;
2257 case ARMin_VAluS:
2258 addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2259 addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2260 addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2261 return;
2262 case ARMin_VUnaryD:
2263 addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2264 addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2266 if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2267 u->isRegRegMove = True;
2268 u->regMoveSrc = i->ARMin.VUnaryD.src;
2269 u->regMoveDst = i->ARMin.VUnaryD.dst;
2271 return;
2272 case ARMin_VUnaryS:
2273 addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2274 addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2276 if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2277 u->isRegRegMove = True;
2278 u->regMoveSrc = i->ARMin.VUnaryS.src;
2279 u->regMoveDst = i->ARMin.VUnaryS.dst;
2281 return;
2282 case ARMin_VCmpD:
2283 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2284 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2285 return;
2286 case ARMin_VCMovD:
2287 addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2288 addHRegUse(u, HRmRead, i->ARMin.VCMovD.dst);
2289 addHRegUse(u, HRmRead, i->ARMin.VCMovD.src);
2290 return;
2291 case ARMin_VCMovS:
2292 addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2293 addHRegUse(u, HRmRead, i->ARMin.VCMovS.dst);
2294 addHRegUse(u, HRmRead, i->ARMin.VCMovS.src);
2295 return;
2296 case ARMin_VCvtSD:
2297 addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2298 addHRegUse(u, HRmRead, i->ARMin.VCvtSD.src);
2299 return;
2300 case ARMin_VXferQ:
2301 if (i->ARMin.VXferQ.toQ) {
2302 addHRegUse(u, HRmWrite, i->ARMin.VXferQ.qD);
2303 addHRegUse(u, HRmRead, i->ARMin.VXferQ.dHi);
2304 addHRegUse(u, HRmRead, i->ARMin.VXferQ.dLo);
2305 } else {
2306 addHRegUse(u, HRmRead, i->ARMin.VXferQ.qD);
2307 addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dHi);
2308 addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dLo);
2310 return;
2311 case ARMin_VXferD:
2312 if (i->ARMin.VXferD.toD) {
2313 addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2314 addHRegUse(u, HRmRead, i->ARMin.VXferD.rHi);
2315 addHRegUse(u, HRmRead, i->ARMin.VXferD.rLo);
2316 } else {
2317 addHRegUse(u, HRmRead, i->ARMin.VXferD.dD);
2318 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2319 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2321 return;
2322 case ARMin_VXferS:
2323 if (i->ARMin.VXferS.toS) {
2324 addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2325 addHRegUse(u, HRmRead, i->ARMin.VXferS.rLo);
2326 } else {
2327 addHRegUse(u, HRmRead, i->ARMin.VXferS.fD);
2328 addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2330 return;
2331 case ARMin_VCvtID:
2332 addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2333 addHRegUse(u, HRmRead, i->ARMin.VCvtID.src);
2334 return;
2335 case ARMin_VRIntR:
2336 addHRegUse(u, HRmWrite, i->ARMin.VRIntR.dst);
2337 addHRegUse(u, HRmRead, i->ARMin.VRIntR.src);
2338 return;
2339 case ARMin_VMinMaxNum:
2340 addHRegUse(u, HRmWrite, i->ARMin.VMinMaxNum.dst);
2341 addHRegUse(u, HRmRead, i->ARMin.VMinMaxNum.srcL);
2342 addHRegUse(u, HRmRead, i->ARMin.VMinMaxNum.srcR);
2343 return;
2344 case ARMin_FPSCR:
2345 if (i->ARMin.FPSCR.toFPSCR)
2346 addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2347 else
2348 addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2349 return;
2350 case ARMin_MFence:
2351 return;
2352 case ARMin_CLREX:
2353 return;
2354 case ARMin_NLdStQ:
2355 if (i->ARMin.NLdStQ.isLoad)
2356 addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2357 else
2358 addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2359 addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2360 return;
2361 case ARMin_NLdStD:
2362 if (i->ARMin.NLdStD.isLoad)
2363 addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2364 else
2365 addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2366 addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2367 return;
2368 case ARMin_NUnary:
2369 addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2370 addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2372 if (i->ARMin.NUnary.op == ARMneon_COPY) {
2373 u->isRegRegMove = True;
2374 u->regMoveSrc = i->ARMin.NUnary.src;
2375 u->regMoveDst = i->ARMin.NUnary.dst;
2377 return;
2378 case ARMin_NUnaryS:
2379 addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2380 addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2381 return;
2382 case ARMin_NShift:
2383 addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2384 addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2385 addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2386 return;
2387 case ARMin_NShl64:
2388 addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
2389 addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
2390 return;
2391 case ARMin_NDual:
2392 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2393 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2394 addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2395 addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2396 return;
2397 case ARMin_NBinary:
2398 addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2399 /* TODO: sometimes dst is also being read! */
2400 // XXX fix this
2401 addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2402 addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2403 return;
2404 case ARMin_NeonImm:
2405 addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2406 return;
2407 case ARMin_NCMovQ:
2408 addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2409 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.dst);
2410 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.src);
2411 return;
2412 case ARMin_Add32:
2413 addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2414 addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2415 return;
2416 case ARMin_EvCheck:
2417 /* We expect both amodes only to mention r8, so this is in
2418 fact pointless, since r8 isn't allocatable, but
2419 anyway.. */
2420 addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
2421 addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
2422 addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
2423 return;
2424 case ARMin_ProfInc:
2425 addHRegUse(u, HRmWrite, hregARM_R12());
2426 addHRegUse(u, HRmWrite, hregARM_R11());
2427 return;
2428 default:
2429 ppARMInstr(i);
2430 vpanic("getRegUsage_ARMInstr");
2435 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2437 vassert(mode64 == False);
2438 switch (i->tag) {
2439 case ARMin_Alu:
2440 i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2441 i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2442 mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2443 return;
2444 case ARMin_Shift:
2445 i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2446 i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2447 mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2448 return;
2449 case ARMin_Unary:
2450 i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2451 i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2452 return;
2453 case ARMin_CmpOrTst:
2454 i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2455 mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2456 return;
2457 case ARMin_Mov:
2458 i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2459 mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2460 return;
2461 case ARMin_Imm32:
2462 i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2463 return;
2464 case ARMin_LdSt32:
2465 i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2466 mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2467 return;
2468 case ARMin_LdSt16:
2469 i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2470 mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2471 return;
2472 case ARMin_LdSt8U:
2473 i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2474 mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2475 return;
2476 case ARMin_Ld8S:
2477 i->ARMin.Ld8S.rD = lookupHRegRemap(m, i->ARMin.Ld8S.rD);
2478 mapRegs_ARMAMode2(m, i->ARMin.Ld8S.amode);
2479 return;
2480 case ARMin_XDirect:
2481 mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
2482 return;
2483 case ARMin_XIndir:
2484 i->ARMin.XIndir.dstGA
2485 = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
2486 mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
2487 return;
2488 case ARMin_XAssisted:
2489 i->ARMin.XAssisted.dstGA
2490 = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
2491 mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
2492 return;
2493 case ARMin_CMov:
2494 i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2495 mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2496 return;
2497 case ARMin_Call:
2498 return;
2499 case ARMin_Mul:
2500 return;
2501 case ARMin_LdrEX:
2502 return;
2503 case ARMin_StrEX:
2504 return;
2505 case ARMin_VLdStD:
2506 i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2507 mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2508 return;
2509 case ARMin_VLdStS:
2510 i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2511 mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2512 return;
2513 case ARMin_VAluD:
2514 i->ARMin.VAluD.dst = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2515 i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2516 i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2517 return;
2518 case ARMin_VAluS:
2519 i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2520 i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2521 i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2522 return;
2523 case ARMin_VUnaryD:
2524 i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2525 i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2526 return;
2527 case ARMin_VUnaryS:
2528 i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2529 i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2530 return;
2531 case ARMin_VCmpD:
2532 i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2533 i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2534 return;
2535 case ARMin_VCMovD:
2536 i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2537 i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2538 return;
2539 case ARMin_VCMovS:
2540 i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2541 i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2542 return;
2543 case ARMin_VCvtSD:
2544 i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2545 i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2546 return;
2547 case ARMin_VXferQ:
2548 i->ARMin.VXferQ.qD = lookupHRegRemap(m, i->ARMin.VXferQ.qD);
2549 i->ARMin.VXferQ.dHi = lookupHRegRemap(m, i->ARMin.VXferQ.dHi);
2550 i->ARMin.VXferQ.dLo = lookupHRegRemap(m, i->ARMin.VXferQ.dLo);
2551 return;
2552 case ARMin_VXferD:
2553 i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2554 i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2555 i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2556 return;
2557 case ARMin_VXferS:
2558 i->ARMin.VXferS.fD = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2559 i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2560 return;
2561 case ARMin_VCvtID:
2562 i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2563 i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2564 return;
2565 case ARMin_VRIntR:
2566 i->ARMin.VRIntR.dst = lookupHRegRemap(m, i->ARMin.VRIntR.dst);
2567 i->ARMin.VRIntR.src = lookupHRegRemap(m, i->ARMin.VRIntR.src);
2568 return;
2569 case ARMin_VMinMaxNum:
2570 i->ARMin.VMinMaxNum.dst
2571 = lookupHRegRemap(m, i->ARMin.VMinMaxNum.dst);
2572 i->ARMin.VMinMaxNum.srcL
2573 = lookupHRegRemap(m, i->ARMin.VMinMaxNum.srcL);
2574 i->ARMin.VMinMaxNum.srcR
2575 = lookupHRegRemap(m, i->ARMin.VMinMaxNum.srcR);
2576 return;
2577 case ARMin_FPSCR:
2578 i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2579 return;
2580 case ARMin_MFence:
2581 return;
2582 case ARMin_CLREX:
2583 return;
2584 case ARMin_NLdStQ:
2585 i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2586 mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2587 return;
2588 case ARMin_NLdStD:
2589 i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2590 mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2591 return;
2592 case ARMin_NUnary:
2593 i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2594 i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2595 return;
2596 case ARMin_NUnaryS:
2597 i->ARMin.NUnaryS.src->reg
2598 = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2599 i->ARMin.NUnaryS.dst->reg
2600 = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2601 return;
2602 case ARMin_NShift:
2603 i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2604 i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2605 i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2606 return;
2607 case ARMin_NShl64:
2608 i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
2609 i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
2610 return;
2611 case ARMin_NDual:
2612 i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2613 i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2614 return;
2615 case ARMin_NBinary:
2616 i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2617 i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2618 i->ARMin.NBinary.dst = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2619 return;
2620 case ARMin_NeonImm:
2621 i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2622 return;
2623 case ARMin_NCMovQ:
2624 i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2625 i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2626 return;
2627 case ARMin_Add32:
2628 i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2629 i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2630 return;
2631 case ARMin_EvCheck:
2632 /* We expect both amodes only to mention r8, so this is in
2633 fact pointless, since r8 isn't allocatable, but
2634 anyway.. */
2635 mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
2636 mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
2637 return;
2638 case ARMin_ProfInc:
2639 /* hardwires r11 and r12 -- nothing to modify. */
2640 return;
2641 default:
2642 ppARMInstr(i);
2643 vpanic("mapRegs_ARMInstr");
2647 /* Generate arm spill/reload instructions under the direction of the
2648 register allocator. Note it's critical these don't write the
2649 condition codes. */
2651 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2652 HReg rreg, Int offsetB, Bool mode64 )
2654 HRegClass rclass;
2655 vassert(offsetB >= 0);
2656 vassert(!hregIsVirtual(rreg));
2657 vassert(mode64 == False);
2658 *i1 = *i2 = NULL;
2659 rclass = hregClass(rreg);
2660 switch (rclass) {
2661 case HRcInt32:
2662 vassert(offsetB <= 4095);
2663 *i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
2664 rreg,
2665 ARMAMode1_RI(hregARM_R8(), offsetB) );
2666 return;
2667 case HRcFlt32:
2668 case HRcFlt64: {
2669 HReg r8 = hregARM_R8(); /* baseblock */
2670 HReg r12 = hregARM_R12(); /* spill temp */
2671 HReg base = r8;
2672 vassert(0 == (offsetB & 3));
2673 if (offsetB >= 1024) {
2674 Int offsetKB = offsetB / 1024;
2675 /* r12 = r8 + (1024 * offsetKB) */
2676 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2677 ARMRI84_I84(offsetKB, 11));
2678 offsetB -= (1024 * offsetKB);
2679 base = r12;
2681 vassert(offsetB <= 1020);
2682 if (rclass == HRcFlt32) {
2683 *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2684 rreg,
2685 mkARMAModeV(base, offsetB) );
2686 } else {
2687 *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2688 rreg,
2689 mkARMAModeV(base, offsetB) );
2691 return;
2693 case HRcVec128: {
2694 HReg r8 = hregARM_R8();
2695 HReg r12 = hregARM_R12();
2696 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2697 *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2698 return;
2700 default:
2701 ppHRegClass(rclass);
2702 vpanic("genSpill_ARM: unimplemented regclass");
2706 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2707 HReg rreg, Int offsetB, Bool mode64 )
2709 HRegClass rclass;
2710 vassert(offsetB >= 0);
2711 vassert(!hregIsVirtual(rreg));
2712 vassert(mode64 == False);
2713 *i1 = *i2 = NULL;
2714 rclass = hregClass(rreg);
2715 switch (rclass) {
2716 case HRcInt32:
2717 vassert(offsetB <= 4095);
2718 *i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
2719 rreg,
2720 ARMAMode1_RI(hregARM_R8(), offsetB) );
2721 return;
2722 case HRcFlt32:
2723 case HRcFlt64: {
2724 HReg r8 = hregARM_R8(); /* baseblock */
2725 HReg r12 = hregARM_R12(); /* spill temp */
2726 HReg base = r8;
2727 vassert(0 == (offsetB & 3));
2728 if (offsetB >= 1024) {
2729 Int offsetKB = offsetB / 1024;
2730 /* r12 = r8 + (1024 * offsetKB) */
2731 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2732 ARMRI84_I84(offsetKB, 11));
2733 offsetB -= (1024 * offsetKB);
2734 base = r12;
2736 vassert(offsetB <= 1020);
2737 if (rclass == HRcFlt32) {
2738 *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2739 rreg,
2740 mkARMAModeV(base, offsetB) );
2741 } else {
2742 *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2743 rreg,
2744 mkARMAModeV(base, offsetB) );
2746 return;
2748 case HRcVec128: {
2749 HReg r8 = hregARM_R8();
2750 HReg r12 = hregARM_R12();
2751 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2752 *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2753 return;
2755 default:
2756 ppHRegClass(rclass);
2757 vpanic("genReload_ARM: unimplemented regclass");
2761 ARMInstr* genMove_ARM(HReg from, HReg to, Bool mode64)
2763 switch (hregClass(from)) {
2764 case HRcInt32:
2765 return ARMInstr_Mov(to, ARMRI84_R(from));
2766 case HRcFlt32:
2767 return ARMInstr_VUnaryS(ARMvfpu_COPY, to, from);
2768 case HRcFlt64:
2769 return ARMInstr_VUnaryD(ARMvfpu_COPY, to, from);
2770 case HRcVec128:
2771 return ARMInstr_NUnary(ARMneon_COPY, to, from, 4, False);
2772 default:
2773 ppHRegClass(hregClass(from));
2774 vpanic("genMove_ARM: unimplemented regclass");
2778 /* Emit an instruction into buf and return the number of bytes used.
2779 Note that buf is not the insn's final place, and therefore it is
2780 imperative to emit position-independent code. */
2782 static inline UInt iregEnc ( HReg r )
2784 UInt n;
2785 vassert(hregClass(r) == HRcInt32);
2786 vassert(!hregIsVirtual(r));
2787 n = hregEncoding(r);
2788 vassert(n <= 15);
2789 return n;
2792 static inline UInt dregEnc ( HReg r )
2794 UInt n;
2795 vassert(hregClass(r) == HRcFlt64);
2796 vassert(!hregIsVirtual(r));
2797 n = hregEncoding(r);
2798 vassert(n <= 31);
2799 return n;
2802 static inline UInt fregEnc ( HReg r )
2804 UInt n;
2805 vassert(hregClass(r) == HRcFlt32);
2806 vassert(!hregIsVirtual(r));
2807 n = hregEncoding(r);
2808 vassert(n <= 31);
2809 return n;
2812 static inline UInt qregEnc ( HReg r )
2814 UInt n;
2815 vassert(hregClass(r) == HRcVec128);
2816 vassert(!hregIsVirtual(r));
2817 n = hregEncoding(r);
2818 vassert(n <= 15);
2819 return n;
2822 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2823 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2824 #define X0000 BITS4(0,0,0,0)
2825 #define X0001 BITS4(0,0,0,1)
2826 #define X0010 BITS4(0,0,1,0)
2827 #define X0011 BITS4(0,0,1,1)
2828 #define X0100 BITS4(0,1,0,0)
2829 #define X0101 BITS4(0,1,0,1)
2830 #define X0110 BITS4(0,1,1,0)
2831 #define X0111 BITS4(0,1,1,1)
2832 #define X1000 BITS4(1,0,0,0)
2833 #define X1001 BITS4(1,0,0,1)
2834 #define X1010 BITS4(1,0,1,0)
2835 #define X1011 BITS4(1,0,1,1)
2836 #define X1100 BITS4(1,1,0,0)
2837 #define X1101 BITS4(1,1,0,1)
2838 #define X1110 BITS4(1,1,1,0)
2839 #define X1111 BITS4(1,1,1,1)
2841 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2842 (((((UInt)(zzx7)) & 0xF) << 28) | \
2843 (((zzx6) & 0xF) << 24) | \
2844 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2845 (((zzx3) & 0xF) << 12))
2847 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2848 (((((UInt)(zzx7)) & 0xF) << 28) | \
2849 (((zzx6) & 0xF) << 24) | \
2850 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2851 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
2853 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
2854 (((((UInt)(zzx7)) & 0xF) << 28) | \
2855 (((zzx6) & 0xF) << 24) | \
2856 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2857 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
2859 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2860 (((((UInt)(zzx7)) & 0xF) << 28) | \
2861 (((zzx6) & 0xF) << 24) | \
2862 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2863 (((zzx0) & 0xF) << 0))
2865 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
2866 (((((UInt)(zzx7)) & 0xF) << 28) | \
2867 (((zzx6) & 0xF) << 24) | \
2868 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2869 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
2870 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
2872 #define XX______(zzx7,zzx6) \
2873 (((((UInt)(zzx7)) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2875 /* Generate a skeletal insn that involves an a RI84 shifter operand.
2876 Returns a word which is all zeroes apart from bits 25 and 11..0,
2877 since it is those that encode the shifter operand (at least to the
2878 extent that we care about it.) */
2879 static UInt skeletal_RI84 ( ARMRI84* ri )
2881 UInt instr;
2882 if (ri->tag == ARMri84_I84) {
2883 vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2884 vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2885 instr = 1 << 25;
2886 instr |= (ri->ARMri84.I84.imm4 << 8);
2887 instr |= ri->ARMri84.I84.imm8;
2888 } else {
2889 instr = 0 << 25;
2890 instr |= iregEnc(ri->ARMri84.R.reg);
2892 return instr;
2895 /* Ditto for RI5. Resulting word is zeroes apart from bit 4 and bits
2896 11..7. */
2897 static UInt skeletal_RI5 ( ARMRI5* ri )
2899 UInt instr;
2900 if (ri->tag == ARMri5_I5) {
2901 UInt imm5 = ri->ARMri5.I5.imm5;
2902 vassert(imm5 >= 1 && imm5 <= 31);
2903 instr = 0 << 4;
2904 instr |= imm5 << 7;
2905 } else {
2906 instr = 1 << 4;
2907 instr |= iregEnc(ri->ARMri5.R.reg) << 8;
2909 return instr;
2913 /* Get an immediate into a register, using only that
2914 register. (very lame..) */
2915 static UInt* imm32_to_ireg ( UInt* p, Int rD, UInt imm32 )
2917 UInt instr;
2918 vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2919 #if 0
2920 if (0 == (imm32 & ~0xFF)) {
2921 /* mov with a immediate shifter operand of (0, imm32) (??) */
2922 instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2923 instr |= imm32;
2924 *p++ = instr;
2925 } else {
2926 // this is very bad; causes Dcache pollution
2927 // ldr rD, [pc]
2928 instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2929 *p++ = instr;
2930 // b .+8
2931 instr = 0xEA000000;
2932 *p++ = instr;
2933 // .word imm32
2934 *p++ = imm32;
2936 #else
2937 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2938 /* Generate movw rD, #low16. Then, if the high 16 are
2939 nonzero, generate movt rD, #high16. */
2940 UInt lo16 = imm32 & 0xFFFF;
2941 UInt hi16 = (imm32 >> 16) & 0xFFFF;
2942 instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2943 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2944 lo16 & 0xF);
2945 *p++ = instr;
2946 if (hi16 != 0) {
2947 instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2948 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2949 hi16 & 0xF);
2950 *p++ = instr;
2952 } else {
2953 UInt imm, rot;
2954 UInt op = X1010;
2955 UInt rN = 0;
2956 if ((imm32 & 0xFF) || (imm32 == 0)) {
2957 imm = imm32 & 0xFF;
2958 rot = 0;
2959 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2960 *p++ = instr;
2961 op = X1000;
2962 rN = rD;
2964 if (imm32 & 0xFF000000) {
2965 imm = (imm32 >> 24) & 0xFF;
2966 rot = 4;
2967 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2968 *p++ = instr;
2969 op = X1000;
2970 rN = rD;
2972 if (imm32 & 0xFF0000) {
2973 imm = (imm32 >> 16) & 0xFF;
2974 rot = 8;
2975 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2976 *p++ = instr;
2977 op = X1000;
2978 rN = rD;
2980 if (imm32 & 0xFF00) {
2981 imm = (imm32 >> 8) & 0xFF;
2982 rot = 12;
2983 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2984 *p++ = instr;
2985 op = X1000;
2986 rN = rD;
2989 #endif
2990 return p;
2993 /* Get an immediate into a register, using only that register, and
2994 generating exactly 2 instructions, regardless of the value of the
2995 immediate. This is used when generating sections of code that need
2996 to be patched later, so as to guarantee a specific size. */
2997 static UInt* imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2999 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
3000 /* Generate movw rD, #low16 ; movt rD, #high16. */
3001 UInt lo16 = imm32 & 0xFFFF;
3002 UInt hi16 = (imm32 >> 16) & 0xFFFF;
3003 UInt instr;
3004 instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
3005 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
3006 lo16 & 0xF);
3007 *p++ = instr;
3008 instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
3009 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
3010 hi16 & 0xF);
3011 *p++ = instr;
3012 } else {
3013 vassert(0); /* lose */
3015 return p;
3018 /* Check whether p points at a 2-insn sequence cooked up by
3019 imm32_to_ireg_EXACTLY2(). */
3020 static Bool is_imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
3022 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
3023 /* Generate movw rD, #low16 ; movt rD, #high16. */
3024 UInt lo16 = imm32 & 0xFFFF;
3025 UInt hi16 = (imm32 >> 16) & 0xFFFF;
3026 UInt i0, i1;
3027 i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
3028 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
3029 lo16 & 0xF);
3030 i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
3031 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
3032 hi16 & 0xF);
3033 return p[0] == i0 && p[1] == i1;
3034 } else {
3035 vassert(0); /* lose */
3040 static UInt* do_load_or_store32 ( UInt* p,
3041 Bool isLoad, UInt rD, ARMAMode1* am )
3043 vassert(rD <= 12);
3044 vassert(am->tag == ARMam1_RI); // RR case is not handled
3045 UInt bB = 0;
3046 UInt bL = isLoad ? 1 : 0;
3047 Int simm12;
3048 UInt instr, bP;
3049 if (am->ARMam1.RI.simm13 < 0) {
3050 bP = 0;
3051 simm12 = -am->ARMam1.RI.simm13;
3052 } else {
3053 bP = 1;
3054 simm12 = am->ARMam1.RI.simm13;
3056 vassert(simm12 >= 0 && simm12 <= 4095);
3057 instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
3058 iregEnc(am->ARMam1.RI.reg),
3059 rD);
3060 instr |= simm12;
3061 *p++ = instr;
3062 return p;
3066 /* Emit an instruction into buf and return the number of bytes used.
3067 Note that buf is not the insn's final place, and therefore it is
3068 imperative to emit position-independent code. If the emitted
3069 instruction was a profiler inc, set *is_profInc to True, else
3070 leave it unchanged. */
3072 Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
3073 UChar* buf, Int nbuf, const ARMInstr* i,
3074 Bool mode64, VexEndness endness_host,
3075 const void* disp_cp_chain_me_to_slowEP,
3076 const void* disp_cp_chain_me_to_fastEP,
3077 const void* disp_cp_xindir,
3078 const void* disp_cp_xassisted )
3080 UInt* p = (UInt*)buf;
3081 vassert(nbuf >= 32);
3082 vassert(mode64 == False);
3083 vassert(0 == (((HWord)buf) & 3));
3085 switch (i->tag) {
3086 case ARMin_Alu: {
3087 UInt instr, subopc;
3088 UInt rD = iregEnc(i->ARMin.Alu.dst);
3089 UInt rN = iregEnc(i->ARMin.Alu.argL);
3090 ARMRI84* argR = i->ARMin.Alu.argR;
3091 switch (i->ARMin.Alu.op) {
3092 case ARMalu_ADDS: /* fallthru */
3093 case ARMalu_ADD: subopc = X0100; break;
3094 case ARMalu_ADC: subopc = X0101; break;
3095 case ARMalu_SUBS: /* fallthru */
3096 case ARMalu_SUB: subopc = X0010; break;
3097 case ARMalu_SBC: subopc = X0110; break;
3098 case ARMalu_AND: subopc = X0000; break;
3099 case ARMalu_BIC: subopc = X1110; break;
3100 case ARMalu_OR: subopc = X1100; break;
3101 case ARMalu_XOR: subopc = X0001; break;
3102 default: goto bad;
3104 instr = skeletal_RI84(argR);
3105 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3106 (subopc << 1) & 0xF, rN, rD);
3107 if (i->ARMin.Alu.op == ARMalu_ADDS
3108 || i->ARMin.Alu.op == ARMalu_SUBS) {
3109 instr |= 1<<20; /* set the S bit */
3111 *p++ = instr;
3112 goto done;
3114 case ARMin_Shift: {
3115 UInt instr, subopc;
3116 UInt rD = iregEnc(i->ARMin.Shift.dst);
3117 UInt rM = iregEnc(i->ARMin.Shift.argL);
3118 ARMRI5* argR = i->ARMin.Shift.argR;
3119 switch (i->ARMin.Shift.op) {
3120 case ARMsh_SHL: subopc = X0000; break;
3121 case ARMsh_SHR: subopc = X0001; break;
3122 case ARMsh_SAR: subopc = X0010; break;
3123 default: goto bad;
3125 instr = skeletal_RI5(argR);
3126 instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
3127 instr |= (subopc & 3) << 5;
3128 *p++ = instr;
3129 goto done;
3131 case ARMin_Unary: {
3132 UInt instr;
3133 UInt rDst = iregEnc(i->ARMin.Unary.dst);
3134 UInt rSrc = iregEnc(i->ARMin.Unary.src);
3135 switch (i->ARMin.Unary.op) {
3136 case ARMun_CLZ:
3137 instr = XXXXXXXX(X1110,X0001,X0110,X1111,
3138 rDst,X1111,X0001,rSrc);
3139 *p++ = instr;
3140 goto done;
3141 case ARMun_NEG: /* RSB rD,rS,#0 */
3142 instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
3143 *p++ = instr;
3144 goto done;
3145 case ARMun_NOT: {
3146 UInt subopc = X1111; /* MVN */
3147 instr = rSrc;
3148 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3149 (subopc << 1) & 0xF, 0, rDst);
3150 *p++ = instr;
3151 goto done;
3153 default:
3154 break;
3156 goto bad;
3158 case ARMin_CmpOrTst: {
3159 UInt instr = skeletal_RI84(i->ARMin.CmpOrTst.argR);
3160 UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
3161 UInt SBZ = 0;
3162 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3163 ((subopc << 1) & 0xF) | 1,
3164 iregEnc(i->ARMin.CmpOrTst.argL), SBZ );
3165 *p++ = instr;
3166 goto done;
3168 case ARMin_Mov: {
3169 UInt instr = skeletal_RI84(i->ARMin.Mov.src);
3170 UInt subopc = X1101; /* MOV */
3171 UInt SBZ = 0;
3172 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3173 (subopc << 1) & 0xF, SBZ,
3174 iregEnc(i->ARMin.Mov.dst));
3175 *p++ = instr;
3176 goto done;
3178 case ARMin_Imm32: {
3179 p = imm32_to_ireg( (UInt*)p, iregEnc(i->ARMin.Imm32.dst),
3180 i->ARMin.Imm32.imm32 );
3181 goto done;
3183 case ARMin_LdSt32:
3184 case ARMin_LdSt8U: {
3185 UInt bL, bB;
3186 HReg rD;
3187 ARMAMode1* am;
3188 ARMCondCode cc;
3189 if (i->tag == ARMin_LdSt32) {
3190 bB = 0;
3191 bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
3192 am = i->ARMin.LdSt32.amode;
3193 rD = i->ARMin.LdSt32.rD;
3194 cc = i->ARMin.LdSt32.cc;
3195 } else {
3196 bB = 1;
3197 bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
3198 am = i->ARMin.LdSt8U.amode;
3199 rD = i->ARMin.LdSt8U.rD;
3200 cc = i->ARMin.LdSt8U.cc;
3202 vassert(cc != ARMcc_NV);
3203 if (am->tag == ARMam1_RI) {
3204 Int simm12;
3205 UInt instr, bP;
3206 if (am->ARMam1.RI.simm13 < 0) {
3207 bP = 0;
3208 simm12 = -am->ARMam1.RI.simm13;
3209 } else {
3210 bP = 1;
3211 simm12 = am->ARMam1.RI.simm13;
3213 vassert(simm12 >= 0 && simm12 <= 4095);
3214 instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
3215 iregEnc(am->ARMam1.RI.reg),
3216 iregEnc(rD));
3217 instr |= simm12;
3218 *p++ = instr;
3219 goto done;
3220 } else {
3221 // RR case
3222 goto bad;
3225 case ARMin_LdSt16: {
3226 HReg rD = i->ARMin.LdSt16.rD;
3227 UInt bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
3228 UInt bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
3229 ARMAMode2* am = i->ARMin.LdSt16.amode;
3230 ARMCondCode cc = i->ARMin.LdSt16.cc;
3231 vassert(cc != ARMcc_NV);
3232 if (am->tag == ARMam2_RI) {
3233 HReg rN = am->ARMam2.RI.reg;
3234 Int simm8;
3235 UInt bP, imm8hi, imm8lo, instr;
3236 if (am->ARMam2.RI.simm9 < 0) {
3237 bP = 0;
3238 simm8 = -am->ARMam2.RI.simm9;
3239 } else {
3240 bP = 1;
3241 simm8 = am->ARMam2.RI.simm9;
3243 vassert(simm8 >= 0 && simm8 <= 255);
3244 imm8hi = (simm8 >> 4) & 0xF;
3245 imm8lo = simm8 & 0xF;
3246 vassert(!(bL == 0 && bS == 1)); // "! signed store"
3247 /**/ if (bL == 0 && bS == 0) {
3248 // strh
3249 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregEnc(rN),
3250 iregEnc(rD), imm8hi, X1011, imm8lo);
3251 *p++ = instr;
3252 goto done;
3254 else if (bL == 1 && bS == 0) {
3255 // ldrh
3256 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3257 iregEnc(rD), imm8hi, X1011, imm8lo);
3258 *p++ = instr;
3259 goto done;
3261 else if (bL == 1 && bS == 1) {
3262 // ldrsh
3263 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3264 iregEnc(rD), imm8hi, X1111, imm8lo);
3265 *p++ = instr;
3266 goto done;
3268 else vassert(0); // ill-constructed insn
3269 } else {
3270 // RR case
3271 goto bad;
3274 case ARMin_Ld8S: {
3275 HReg rD = i->ARMin.Ld8S.rD;
3276 ARMAMode2* am = i->ARMin.Ld8S.amode;
3277 ARMCondCode cc = i->ARMin.Ld8S.cc;
3278 vassert(cc != ARMcc_NV);
3279 if (am->tag == ARMam2_RI) {
3280 HReg rN = am->ARMam2.RI.reg;
3281 Int simm8;
3282 UInt bP, imm8hi, imm8lo, instr;
3283 if (am->ARMam2.RI.simm9 < 0) {
3284 bP = 0;
3285 simm8 = -am->ARMam2.RI.simm9;
3286 } else {
3287 bP = 1;
3288 simm8 = am->ARMam2.RI.simm9;
3290 vassert(simm8 >= 0 && simm8 <= 255);
3291 imm8hi = (simm8 >> 4) & 0xF;
3292 imm8lo = simm8 & 0xF;
3293 // ldrsb
3294 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3295 iregEnc(rD), imm8hi, X1101, imm8lo);
3296 *p++ = instr;
3297 goto done;
3298 } else {
3299 // RR case
3300 goto bad;
3304 case ARMin_XDirect: {
3305 /* NB: what goes on here has to be very closely coordinated
3306 with the chainXDirect_ARM and unchainXDirect_ARM below. */
3307 /* We're generating chain-me requests here, so we need to be
3308 sure this is actually allowed -- no-redir translations
3309 can't use chain-me's. Hence: */
3310 vassert(disp_cp_chain_me_to_slowEP != NULL);
3311 vassert(disp_cp_chain_me_to_fastEP != NULL);
3313 /* Use ptmp for backpatching conditional jumps. */
3314 UInt* ptmp = NULL;
3316 /* First off, if this is conditional, create a conditional
3317 jump over the rest of it. Or at least, leave a space for
3318 it that we will shortly fill in. */
3319 if (i->ARMin.XDirect.cond != ARMcc_AL) {
3320 vassert(i->ARMin.XDirect.cond != ARMcc_NV);
3321 ptmp = p;
3322 *p++ = 0;
3325 /* Update the guest R15T. */
3326 /* movw r12, lo16(dstGA) */
3327 /* movt r12, hi16(dstGA) */
3328 /* str r12, amR15T */
3329 p = imm32_to_ireg(p, /*r*/12, i->ARMin.XDirect.dstGA);
3330 p = do_load_or_store32(p, False/*!isLoad*/,
3331 /*r*/12, i->ARMin.XDirect.amR15T);
3333 /* --- FIRST PATCHABLE BYTE follows --- */
3334 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3335 calling to) backs up the return address, so as to find the
3336 address of the first patchable byte. So: don't change the
3337 number of instructions (3) below. */
3338 /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3339 /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3340 /* blx r12 (A1) */
3341 const void* disp_cp_chain_me
3342 = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3343 : disp_cp_chain_me_to_slowEP;
3344 p = imm32_to_ireg_EXACTLY2(p, /*r*/12,
3345 (UInt)(Addr)disp_cp_chain_me);
3346 *p++ = 0xE12FFF3C;
3347 /* --- END of PATCHABLE BYTES --- */
3349 /* Fix up the conditional jump, if there was one. */
3350 if (i->ARMin.XDirect.cond != ARMcc_AL) {
3351 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3352 vassert(delta > 0 && delta < 40);
3353 vassert((delta & 3) == 0);
3354 UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
3355 vassert(notCond <= 13); /* Neither AL nor NV */
3356 delta = (delta >> 2) - 2;
3357 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3359 goto done;
3362 case ARMin_XIndir: {
3363 /* We're generating transfers that could lead indirectly to a
3364 chain-me, so we need to be sure this is actually allowed
3365 -- no-redir translations are not allowed to reach normal
3366 translations without going through the scheduler. That
3367 means no XDirects or XIndirs out from no-redir
3368 translations. Hence: */
3369 vassert(disp_cp_xindir != NULL);
3371 /* Use ptmp for backpatching conditional jumps. */
3372 UInt* ptmp = NULL;
3374 /* First off, if this is conditional, create a conditional
3375 jump over the rest of it. Or at least, leave a space for
3376 it that we will shortly fill in. */
3377 if (i->ARMin.XIndir.cond != ARMcc_AL) {
3378 vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3379 ptmp = p;
3380 *p++ = 0;
3383 /* Update the guest R15T. */
3384 /* str r-dstGA, amR15T */
3385 p = do_load_or_store32(p, False/*!isLoad*/,
3386 iregEnc(i->ARMin.XIndir.dstGA),
3387 i->ARMin.XIndir.amR15T);
3389 /* movw r12, lo16(VG_(disp_cp_xindir)) */
3390 /* movt r12, hi16(VG_(disp_cp_xindir)) */
3391 /* bx r12 (A1) */
3392 p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xindir);
3393 *p++ = 0xE12FFF1C;
3395 /* Fix up the conditional jump, if there was one. */
3396 if (i->ARMin.XIndir.cond != ARMcc_AL) {
3397 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3398 vassert(delta > 0 && delta < 40);
3399 vassert((delta & 3) == 0);
3400 UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3401 vassert(notCond <= 13); /* Neither AL nor NV */
3402 delta = (delta >> 2) - 2;
3403 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3405 goto done;
3408 case ARMin_XAssisted: {
3409 /* Use ptmp for backpatching conditional jumps. */
3410 UInt* ptmp = NULL;
3412 /* First off, if this is conditional, create a conditional
3413 jump over the rest of it. Or at least, leave a space for
3414 it that we will shortly fill in. */
3415 if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3416 vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
3417 ptmp = p;
3418 *p++ = 0;
3421 /* Update the guest R15T. */
3422 /* str r-dstGA, amR15T */
3423 p = do_load_or_store32(p, False/*!isLoad*/,
3424 iregEnc(i->ARMin.XAssisted.dstGA),
3425 i->ARMin.XAssisted.amR15T);
3427 /* movw r8, $magic_number */
3428 UInt trcval = 0;
3429 switch (i->ARMin.XAssisted.jk) {
3430 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3431 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3432 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3433 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3434 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3435 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3436 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3437 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3438 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3439 //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3440 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3441 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3442 /* We don't expect to see the following being assisted. */
3443 //case Ijk_Ret:
3444 //case Ijk_Call:
3445 /* fallthrough */
3446 default:
3447 ppIRJumpKind(i->ARMin.XAssisted.jk);
3448 vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
3450 vassert(trcval != 0);
3451 p = imm32_to_ireg(p, /*r*/8, trcval);
3453 /* movw r12, lo16(VG_(disp_cp_xassisted)) */
3454 /* movt r12, hi16(VG_(disp_cp_xassisted)) */
3455 /* bx r12 (A1) */
3456 p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xassisted);
3457 *p++ = 0xE12FFF1C;
3459 /* Fix up the conditional jump, if there was one. */
3460 if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3461 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3462 vassert(delta > 0 && delta < 40);
3463 vassert((delta & 3) == 0);
3464 UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
3465 vassert(notCond <= 13); /* Neither AL nor NV */
3466 delta = (delta >> 2) - 2;
3467 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3469 goto done;
3472 case ARMin_CMov: {
3473 UInt instr = skeletal_RI84(i->ARMin.CMov.src);
3474 UInt subopc = X1101; /* MOV */
3475 UInt SBZ = 0;
3476 instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
3477 (subopc << 1) & 0xF, SBZ,
3478 iregEnc(i->ARMin.CMov.dst));
3479 *p++ = instr;
3480 goto done;
3483 case ARMin_Call: {
3484 UInt instr;
3485 /* Decide on a scratch reg used to hold to the call address.
3486 This has to be done as per the comments in getRegUsage. */
3487 Int scratchNo;
3488 switch (i->ARMin.Call.nArgRegs) {
3489 case 0: scratchNo = 0; break;
3490 case 1: scratchNo = 1; break;
3491 case 2: scratchNo = 2; break;
3492 case 3: scratchNo = 3; break;
3493 case 4: scratchNo = 11; break;
3494 default: vassert(0);
3496 /* If we don't need to do any fixup actions in the case that
3497 the call doesn't happen, just do the simple thing and emit
3498 straight-line code. We hope this is the common case. */
3499 if (i->ARMin.Call.cond == ARMcc_AL/*call always happens*/
3500 || i->ARMin.Call.rloc.pri == RLPri_None/*no fixup action*/) {
3501 // r"scratchNo" = &target
3502 p = imm32_to_ireg( (UInt*)p,
3503 scratchNo, (UInt)i->ARMin.Call.target );
3504 // blx{cond} r"scratchNo"
3505 instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
3506 X0011, scratchNo);
3507 instr |= 0xFFF << 8; // stick in the SBOnes
3508 *p++ = instr;
3509 } else {
3510 Int delta;
3511 /* Complex case. We have to generate an if-then-else
3512 diamond. */
3513 // before:
3514 // b{!cond} else:
3515 // r"scratchNo" = &target
3516 // blx{AL} r"scratchNo"
3517 // preElse:
3518 // b after:
3519 // else:
3520 // mov r0, #0x55555555 // possibly
3521 // mov r1, r0 // possibly
3522 // after:
3524 // before:
3525 UInt* pBefore = p;
3527 // b{!cond} else: // ptmp1 points here
3528 *p++ = 0; // filled in later
3530 // r"scratchNo" = &target
3531 p = imm32_to_ireg( (UInt*)p,
3532 scratchNo, (UInt)i->ARMin.Call.target );
3534 // blx{AL} r"scratchNo"
3535 instr = XXX___XX(ARMcc_AL, X0001, X0010, /*___*/
3536 X0011, scratchNo);
3537 instr |= 0xFFF << 8; // stick in the SBOnes
3538 *p++ = instr;
3540 // preElse:
3541 UInt* pPreElse = p;
3543 // b after:
3544 *p++ = 0; // filled in later
3546 // else:
3547 delta = (UChar*)p - (UChar*)pBefore;
3548 delta = (delta >> 2) - 2;
3549 *pBefore
3550 = XX______(1 ^ i->ARMin.Call.cond, X1010) | (delta & 0xFFFFFF);
3552 /* Do the 'else' actions */
3553 switch (i->ARMin.Call.rloc.pri) {
3554 case RLPri_Int:
3555 p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3556 break;
3557 case RLPri_2Int:
3558 vassert(0); //ATC
3559 p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3560 /* mov r1, r0 */
3561 *p++ = 0xE1A01000;
3562 break;
3563 case RLPri_None: case RLPri_INVALID: default:
3564 vassert(0);
3567 // after:
3568 delta = (UChar*)p - (UChar*)pPreElse;
3569 delta = (delta >> 2) - 2;
3570 *pPreElse = XX______(ARMcc_AL, X1010) | (delta & 0xFFFFFF);
3573 goto done;
3576 case ARMin_Mul: {
3577 /* E0000392 mul r0, r2, r3
3578 E0810392 umull r0(LO), r1(HI), r2, r3
3579 E0C10392 smull r0(LO), r1(HI), r2, r3
3581 switch (i->ARMin.Mul.op) {
3582 case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
3583 case ARMmul_ZX: *p++ = 0xE0810392; goto done;
3584 case ARMmul_SX: *p++ = 0xE0C10392; goto done;
3585 default: vassert(0);
3587 goto bad;
3589 case ARMin_LdrEX: {
3590 /* E1D42F9F ldrexb r2, [r4]
3591 E1F42F9F ldrexh r2, [r4]
3592 E1942F9F ldrex r2, [r4]
3593 E1B42F9F ldrexd r2, r3, [r4]
3595 switch (i->ARMin.LdrEX.szB) {
3596 case 1: *p++ = 0xE1D42F9F; goto done;
3597 case 2: *p++ = 0xE1F42F9F; goto done;
3598 case 4: *p++ = 0xE1942F9F; goto done;
3599 case 8: *p++ = 0xE1B42F9F; goto done;
3600 default: break;
3602 goto bad;
3604 case ARMin_StrEX: {
3605 /* E1C40F92 strexb r0, r2, [r4]
3606 E1E40F92 strexh r0, r2, [r4]
3607 E1840F92 strex r0, r2, [r4]
3608 E1A40F92 strexd r0, r2, r3, [r4]
3610 switch (i->ARMin.StrEX.szB) {
3611 case 1: *p++ = 0xE1C40F92; goto done;
3612 case 2: *p++ = 0xE1E40F92; goto done;
3613 case 4: *p++ = 0xE1840F92; goto done;
3614 case 8: *p++ = 0xE1A40F92; goto done;
3615 default: break;
3617 goto bad;
3619 case ARMin_VLdStD: {
3620 UInt dD = dregEnc(i->ARMin.VLdStD.dD);
3621 UInt rN = iregEnc(i->ARMin.VLdStD.amode->reg);
3622 Int simm11 = i->ARMin.VLdStD.amode->simm11;
3623 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3624 UInt bU = simm11 >= 0 ? 1 : 0;
3625 UInt bL = i->ARMin.VLdStD.isLoad ? 1 : 0;
3626 UInt insn;
3627 vassert(0 == (off8 & 3));
3628 off8 >>= 2;
3629 vassert(0 == (off8 & 0xFFFFFF00));
3630 insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3631 insn |= off8;
3632 *p++ = insn;
3633 goto done;
3635 case ARMin_VLdStS: {
3636 UInt fD = fregEnc(i->ARMin.VLdStS.fD);
3637 UInt rN = iregEnc(i->ARMin.VLdStS.amode->reg);
3638 Int simm11 = i->ARMin.VLdStS.amode->simm11;
3639 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3640 UInt bU = simm11 >= 0 ? 1 : 0;
3641 UInt bL = i->ARMin.VLdStS.isLoad ? 1 : 0;
3642 UInt bD = fD & 1;
3643 UInt insn;
3644 vassert(0 == (off8 & 3));
3645 off8 >>= 2;
3646 vassert(0 == (off8 & 0xFFFFFF00));
3647 insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3648 insn |= off8;
3649 *p++ = insn;
3650 goto done;
3652 case ARMin_VAluD: {
3653 UInt dN = dregEnc(i->ARMin.VAluD.argL);
3654 UInt dD = dregEnc(i->ARMin.VAluD.dst);
3655 UInt dM = dregEnc(i->ARMin.VAluD.argR);
3656 UInt pqrs = X1111; /* undefined */
3657 switch (i->ARMin.VAluD.op) {
3658 case ARMvfp_ADD: pqrs = X0110; break;
3659 case ARMvfp_SUB: pqrs = X0111; break;
3660 case ARMvfp_MUL: pqrs = X0100; break;
3661 case ARMvfp_DIV: pqrs = X1000; break;
3662 default: goto bad;
3664 vassert(pqrs != X1111);
3665 UInt bP = (pqrs >> 3) & 1;
3666 UInt bQ = (pqrs >> 2) & 1;
3667 UInt bR = (pqrs >> 1) & 1;
3668 UInt bS = (pqrs >> 0) & 1;
3669 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3670 X1011, BITS4(0,bS,0,0), dM);
3671 *p++ = insn;
3672 goto done;
3674 case ARMin_VAluS: {
3675 UInt dN = fregEnc(i->ARMin.VAluS.argL);
3676 UInt dD = fregEnc(i->ARMin.VAluS.dst);
3677 UInt dM = fregEnc(i->ARMin.VAluS.argR);
3678 UInt bN = dN & 1;
3679 UInt bD = dD & 1;
3680 UInt bM = dM & 1;
3681 UInt pqrs = X1111; /* undefined */
3682 switch (i->ARMin.VAluS.op) {
3683 case ARMvfp_ADD: pqrs = X0110; break;
3684 case ARMvfp_SUB: pqrs = X0111; break;
3685 case ARMvfp_MUL: pqrs = X0100; break;
3686 case ARMvfp_DIV: pqrs = X1000; break;
3687 default: goto bad;
3689 vassert(pqrs != X1111);
3690 UInt bP = (pqrs >> 3) & 1;
3691 UInt bQ = (pqrs >> 2) & 1;
3692 UInt bR = (pqrs >> 1) & 1;
3693 UInt bS = (pqrs >> 0) & 1;
3694 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3695 (dN >> 1), (dD >> 1),
3696 X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3697 *p++ = insn;
3698 goto done;
3700 case ARMin_VUnaryD: {
3701 UInt dD = dregEnc(i->ARMin.VUnaryD.dst);
3702 UInt dM = dregEnc(i->ARMin.VUnaryD.src);
3703 UInt insn = 0;
3704 switch (i->ARMin.VUnaryD.op) {
3705 case ARMvfpu_COPY:
3706 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3707 break;
3708 case ARMvfpu_ABS:
3709 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3710 break;
3711 case ARMvfpu_NEG:
3712 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3713 break;
3714 case ARMvfpu_SQRT:
3715 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3716 break;
3717 default:
3718 goto bad;
3720 *p++ = insn;
3721 goto done;
3723 case ARMin_VUnaryS: {
3724 UInt fD = fregEnc(i->ARMin.VUnaryS.dst);
3725 UInt fM = fregEnc(i->ARMin.VUnaryS.src);
3726 UInt insn = 0;
3727 switch (i->ARMin.VUnaryS.op) {
3728 case ARMvfpu_COPY:
3729 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3730 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3731 (fM >> 1));
3732 break;
3733 case ARMvfpu_ABS:
3734 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3735 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3736 (fM >> 1));
3737 break;
3738 case ARMvfpu_NEG:
3739 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3740 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3741 (fM >> 1));
3742 break;
3743 case ARMvfpu_SQRT:
3744 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3745 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3746 (fM >> 1));
3747 break;
3748 default:
3749 goto bad;
3751 *p++ = insn;
3752 goto done;
3754 case ARMin_VCmpD: {
3755 UInt dD = dregEnc(i->ARMin.VCmpD.argL);
3756 UInt dM = dregEnc(i->ARMin.VCmpD.argR);
3757 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3758 *p++ = insn; /* FCMPD dD, dM */
3759 *p++ = 0xEEF1FA10; /* FMSTAT */
3760 goto done;
3762 case ARMin_VCMovD: {
3763 UInt cc = (UInt)i->ARMin.VCMovD.cond;
3764 UInt dD = dregEnc(i->ARMin.VCMovD.dst);
3765 UInt dM = dregEnc(i->ARMin.VCMovD.src);
3766 vassert(cc < 16 && cc != ARMcc_AL);
3767 UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3768 *p++ = insn;
3769 goto done;
3771 case ARMin_VCMovS: {
3772 UInt cc = (UInt)i->ARMin.VCMovS.cond;
3773 UInt fD = fregEnc(i->ARMin.VCMovS.dst);
3774 UInt fM = fregEnc(i->ARMin.VCMovS.src);
3775 vassert(cc < 16 && cc != ARMcc_AL);
3776 UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3777 X0000,(fD >> 1),X1010,
3778 BITS4(0,1,(fM & 1),0), (fM >> 1));
3779 *p++ = insn;
3780 goto done;
3782 case ARMin_VCvtSD: {
3783 if (i->ARMin.VCvtSD.sToD) {
3784 UInt dD = dregEnc(i->ARMin.VCvtSD.dst);
3785 UInt fM = fregEnc(i->ARMin.VCvtSD.src);
3786 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3787 BITS4(1,1, (fM & 1), 0),
3788 (fM >> 1));
3789 *p++ = insn;
3790 goto done;
3791 } else {
3792 UInt fD = fregEnc(i->ARMin.VCvtSD.dst);
3793 UInt dM = dregEnc(i->ARMin.VCvtSD.src);
3794 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3795 X0111, (fD >> 1),
3796 X1011, X1100, dM);
3797 *p++ = insn;
3798 goto done;
3801 case ARMin_VXferQ: {
3802 UInt insn;
3803 UInt qD = qregEnc(i->ARMin.VXferQ.qD);
3804 UInt dHi = dregEnc(i->ARMin.VXferQ.dHi);
3805 UInt dLo = dregEnc(i->ARMin.VXferQ.dLo);
3806 /* This is a bit tricky. We need to make 2 D-D moves and we rely
3807 on the fact that the Q register can be treated as two D registers.
3808 We also rely on the fact that the register allocator will allocate
3809 the two D's and the Q to disjoint parts of the register file,
3810 and so we don't have to worry about the first move's destination
3811 being the same as the second move's source, etc. We do have
3812 assertions though. */
3813 /* The ARM ARM specifies that
3814 D<2n> maps to the least significant half of Q<n>
3815 D<2n+1> maps to the most significant half of Q<n>
3816 So there are no issues with endianness here.
3818 UInt qDlo = 2 * qD + 0;
3819 UInt qDhi = 2 * qD + 1;
3820 /* Stay sane .. */
3821 vassert(qDhi != dHi && qDhi != dLo);
3822 vassert(qDlo != dHi && qDlo != dLo);
3823 /* vmov dX, dY is
3824 F 2 (0,dX[4],1,0) dY[3:0] dX[3:0] 1 (dY[4],0,dY[4],1) dY[3:0]
3826 # define VMOV_D_D(_xx,_yy) \
3827 XXXXXXXX( 0xF, 0x2, BITS4(0, (((_xx) >> 4) & 1), 1, 0), \
3828 ((_yy) & 0xF), ((_xx) & 0xF), 0x1, \
3829 BITS4( (((_yy) >> 4) & 1), 0, (((_yy) >> 4) & 1), 1), \
3830 ((_yy) & 0xF) )
3831 if (i->ARMin.VXferQ.toQ) {
3832 insn = VMOV_D_D(qDlo, dLo); *p++ = insn;
3833 insn = VMOV_D_D(qDhi, dHi); *p++ = insn;
3834 } else {
3835 insn = VMOV_D_D(dLo, qDlo); *p++ = insn;
3836 insn = VMOV_D_D(dHi, qDhi); *p++ = insn;
3838 # undef VMOV_D_D
3839 goto done;
3841 case ARMin_VXferD: {
3842 UInt dD = dregEnc(i->ARMin.VXferD.dD);
3843 UInt rHi = iregEnc(i->ARMin.VXferD.rHi);
3844 UInt rLo = iregEnc(i->ARMin.VXferD.rLo);
3845 /* vmov dD, rLo, rHi is
3846 E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3847 vmov rLo, rHi, dD is
3848 E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3850 UInt insn
3851 = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3852 rHi, rLo, 0xB,
3853 BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3854 *p++ = insn;
3855 goto done;
3857 case ARMin_VXferS: {
3858 UInt fD = fregEnc(i->ARMin.VXferS.fD);
3859 UInt rLo = iregEnc(i->ARMin.VXferS.rLo);
3860 /* vmov fD, rLo is
3861 E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3862 vmov rLo, fD is
3863 E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3865 UInt insn
3866 = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3867 (fD >> 1) & 0xF, rLo, 0xA,
3868 BITS4((fD & 1),0,0,1), 0);
3869 *p++ = insn;
3870 goto done;
3872 case ARMin_VCvtID: {
3873 Bool iToD = i->ARMin.VCvtID.iToD;
3874 Bool syned = i->ARMin.VCvtID.syned;
3875 if (iToD && syned) {
3876 // FSITOD: I32S-in-freg to F64-in-dreg
3877 UInt regF = fregEnc(i->ARMin.VCvtID.src);
3878 UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3879 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3880 X1011, BITS4(1,1,(regF & 1),0),
3881 (regF >> 1) & 0xF);
3882 *p++ = insn;
3883 goto done;
3885 if (iToD && (!syned)) {
3886 // FUITOD: I32U-in-freg to F64-in-dreg
3887 UInt regF = fregEnc(i->ARMin.VCvtID.src);
3888 UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3889 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3890 X1011, BITS4(0,1,(regF & 1),0),
3891 (regF >> 1) & 0xF);
3892 *p++ = insn;
3893 goto done;
3895 if ((!iToD) && syned) {
3896 // FTOSID: F64-in-dreg to I32S-in-freg
3897 UInt regD = dregEnc(i->ARMin.VCvtID.src);
3898 UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3899 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3900 X1101, (regF >> 1) & 0xF,
3901 X1011, X0100, regD);
3902 *p++ = insn;
3903 goto done;
3905 if ((!iToD) && (!syned)) {
3906 // FTOUID: F64-in-dreg to I32U-in-freg
3907 UInt regD = dregEnc(i->ARMin.VCvtID.src);
3908 UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3909 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3910 X1100, (regF >> 1) & 0xF,
3911 X1011, X0100, regD);
3912 *p++ = insn;
3913 goto done;
3915 /*UNREACHED*/
3916 vassert(0);
3918 case ARMin_VRIntR: { /* NB: ARM v8 and above only */
3919 Bool isF64 = i->ARMin.VRIntR.isF64;
3920 UInt rDst = (isF64 ? dregEnc : fregEnc)(i->ARMin.VRIntR.dst);
3921 UInt rSrc = (isF64 ? dregEnc : fregEnc)(i->ARMin.VRIntR.src);
3922 /* The encoding of registers here differs strangely for the
3923 F32 and F64 cases. */
3924 UInt D, Vd, M, Vm;
3925 if (isF64) {
3926 D = (rDst >> 4) & 1;
3927 Vd = rDst & 0xF;
3928 M = (rSrc >> 4) & 1;
3929 Vm = rSrc & 0xF;
3930 } else {
3931 Vd = (rDst >> 1) & 0xF;
3932 D = rDst & 1;
3933 Vm = (rSrc >> 1) & 0xF;
3934 M = rSrc & 1;
3936 vassert(D <= 1 && Vd <= 15 && M <= 1 && Vm <= 15);
3937 *p++ = XXXXXXXX(0xE, X1110, X1011 | (D << 2), X0110, Vd,
3938 isF64 ? X1011 : X1010, X0100 | (M << 1), Vm);
3939 goto done;
3941 case ARMin_VMinMaxNum: {
3942 Bool isF64 = i->ARMin.VMinMaxNum.isF64;
3943 Bool isMax = i->ARMin.VMinMaxNum.isMax;
3944 UInt rDst = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.dst);
3945 UInt rSrcL = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.srcL);
3946 UInt rSrcR = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.srcR);
3947 /* The encoding of registers here differs strangely for the
3948 F32 and F64 cases. */
3949 UInt D, Vd, N, Vn, M, Vm;
3950 if (isF64) {
3951 D = (rDst >> 4) & 1;
3952 Vd = rDst & 0xF;
3953 N = (rSrcL >> 4) & 1;
3954 Vn = rSrcL & 0xF;
3955 M = (rSrcR >> 4) & 1;
3956 Vm = rSrcR & 0xF;
3957 } else {
3958 Vd = (rDst >> 1) & 0xF;
3959 D = rDst & 1;
3960 Vn = (rSrcL >> 1) & 0xF;
3961 N = rSrcL & 1;
3962 Vm = (rSrcR >> 1) & 0xF;
3963 M = rSrcR & 1;
3965 vassert(D <= 1 && Vd <= 15 && M <= 1 && Vm <= 15 && N <= 1
3966 && Vn <= 15);
3967 *p++ = XXXXXXXX(X1111,X1110, X1000 | (D << 2), Vn, Vd,
3968 X1010 | (isF64 ? 1 : 0),
3969 (N << 3) | ((isMax ? 0 : 1) << 2) | (M << 1) | 0,
3970 Vm);
3971 goto done;
3973 case ARMin_FPSCR: {
3974 Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3975 UInt iReg = iregEnc(i->ARMin.FPSCR.iReg);
3976 if (toFPSCR) {
3977 /* fmxr fpscr, iReg is EEE1 iReg A10 */
3978 *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3979 goto done;
3981 goto bad; // FPSCR -> iReg case currently ATC
3983 case ARMin_MFence: {
3984 // It's not clear (to me) how these relate to the ARMv7
3985 // versions, so let's just use the v7 versions as they
3986 // are at least well documented.
3987 //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3988 //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3989 //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */
3990 *p++ = 0xF57FF04F; /* DSB sy */
3991 *p++ = 0xF57FF05F; /* DMB sy */
3992 *p++ = 0xF57FF06F; /* ISB */
3993 goto done;
3995 case ARMin_CLREX: {
3996 *p++ = 0xF57FF01F; /* clrex */
3997 goto done;
4000 case ARMin_NLdStQ: {
4001 UInt regD = qregEnc(i->ARMin.NLdStQ.dQ) << 1;
4002 UInt regN, regM;
4003 UInt D = regD >> 4;
4004 UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
4005 UInt insn;
4006 vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
4007 regD &= 0xF;
4008 if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
4009 regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
4010 regM = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
4011 } else {
4012 regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
4013 regM = 15;
4015 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
4016 regN, regD, X1010, X1000, regM);
4017 *p++ = insn;
4018 goto done;
4020 case ARMin_NLdStD: {
4021 UInt regD = dregEnc(i->ARMin.NLdStD.dD);
4022 UInt regN, regM;
4023 UInt D = regD >> 4;
4024 UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
4025 UInt insn;
4026 vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
4027 regD &= 0xF;
4028 if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
4029 regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
4030 regM = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
4031 } else {
4032 regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.R.rN);
4033 regM = 15;
4035 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
4036 regN, regD, X0111, X1000, regM);
4037 *p++ = insn;
4038 goto done;
4040 case ARMin_NUnaryS: {
4041 UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
4042 UInt regD, D;
4043 UInt regM, M;
4044 UInt size = i->ARMin.NUnaryS.size;
4045 UInt insn;
4046 UInt opc, opc1, opc2;
4047 switch (i->ARMin.NUnaryS.op) {
4048 case ARMneon_VDUP:
4049 if (i->ARMin.NUnaryS.size >= 16)
4050 goto bad;
4051 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
4052 goto bad;
4053 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4054 goto bad;
4055 regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
4056 ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1)
4057 : dregEnc(i->ARMin.NUnaryS.dst->reg);
4058 regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
4059 ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1)
4060 : dregEnc(i->ARMin.NUnaryS.src->reg);
4061 D = regD >> 4;
4062 M = regM >> 4;
4063 regD &= 0xf;
4064 regM &= 0xf;
4065 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
4066 (i->ARMin.NUnaryS.size & 0xf), regD,
4067 X1100, BITS4(0,Q,M,0), regM);
4068 *p++ = insn;
4069 goto done;
4070 case ARMneon_SETELEM:
4071 regD = Q ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1) :
4072 dregEnc(i->ARMin.NUnaryS.dst->reg);
4073 regM = iregEnc(i->ARMin.NUnaryS.src->reg);
4074 M = regM >> 4;
4075 D = regD >> 4;
4076 regM &= 0xF;
4077 regD &= 0xF;
4078 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
4079 goto bad;
4080 switch (size) {
4081 case 0:
4082 if (i->ARMin.NUnaryS.dst->index > 7)
4083 goto bad;
4084 opc = X1000 | i->ARMin.NUnaryS.dst->index;
4085 break;
4086 case 1:
4087 if (i->ARMin.NUnaryS.dst->index > 3)
4088 goto bad;
4089 opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
4090 break;
4091 case 2:
4092 if (i->ARMin.NUnaryS.dst->index > 1)
4093 goto bad;
4094 opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
4095 break;
4096 default:
4097 goto bad;
4099 opc1 = (opc >> 2) & 3;
4100 opc2 = opc & 3;
4101 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
4102 regD, regM, X1011,
4103 BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
4104 *p++ = insn;
4105 goto done;
4106 case ARMneon_GETELEMU:
4107 regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
4108 dregEnc(i->ARMin.NUnaryS.src->reg);
4109 regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
4110 M = regM >> 4;
4111 D = regD >> 4;
4112 regM &= 0xF;
4113 regD &= 0xF;
4114 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4115 goto bad;
4116 switch (size) {
4117 case 0:
4118 if (Q && i->ARMin.NUnaryS.src->index > 7) {
4119 regM++;
4120 i->ARMin.NUnaryS.src->index -= 8;
4122 if (i->ARMin.NUnaryS.src->index > 7)
4123 goto bad;
4124 opc = X1000 | i->ARMin.NUnaryS.src->index;
4125 break;
4126 case 1:
4127 if (Q && i->ARMin.NUnaryS.src->index > 3) {
4128 regM++;
4129 i->ARMin.NUnaryS.src->index -= 4;
4131 if (i->ARMin.NUnaryS.src->index > 3)
4132 goto bad;
4133 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
4134 break;
4135 case 2:
4136 goto bad;
4137 default:
4138 goto bad;
4140 opc1 = (opc >> 2) & 3;
4141 opc2 = opc & 3;
4142 insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
4143 regM, regD, X1011,
4144 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
4145 *p++ = insn;
4146 goto done;
4147 case ARMneon_GETELEMS:
4148 regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
4149 dregEnc(i->ARMin.NUnaryS.src->reg);
4150 regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
4151 M = regM >> 4;
4152 D = regD >> 4;
4153 regM &= 0xF;
4154 regD &= 0xF;
4155 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4156 goto bad;
4157 switch (size) {
4158 case 0:
4159 if (Q && i->ARMin.NUnaryS.src->index > 7) {
4160 regM++;
4161 i->ARMin.NUnaryS.src->index -= 8;
4163 if (i->ARMin.NUnaryS.src->index > 7)
4164 goto bad;
4165 opc = X1000 | i->ARMin.NUnaryS.src->index;
4166 break;
4167 case 1:
4168 if (Q && i->ARMin.NUnaryS.src->index > 3) {
4169 regM++;
4170 i->ARMin.NUnaryS.src->index -= 4;
4172 if (i->ARMin.NUnaryS.src->index > 3)
4173 goto bad;
4174 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
4175 break;
4176 case 2:
4177 if (Q && i->ARMin.NUnaryS.src->index > 1) {
4178 regM++;
4179 i->ARMin.NUnaryS.src->index -= 2;
4181 if (i->ARMin.NUnaryS.src->index > 1)
4182 goto bad;
4183 opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
4184 break;
4185 default:
4186 goto bad;
4188 opc1 = (opc >> 2) & 3;
4189 opc2 = opc & 3;
4190 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
4191 regM, regD, X1011,
4192 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
4193 *p++ = insn;
4194 goto done;
4195 default:
4196 goto bad;
4199 case ARMin_NUnary: {
4200 UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
4201 UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
4202 ? (qregEnc(i->ARMin.NUnary.dst) << 1)
4203 : dregEnc(i->ARMin.NUnary.dst);
4204 UInt regM, M;
4205 UInt D = regD >> 4;
4206 UInt sz1 = i->ARMin.NUnary.size >> 1;
4207 UInt sz2 = i->ARMin.NUnary.size & 1;
4208 UInt sz = i->ARMin.NUnary.size;
4209 UInt insn;
4210 UInt F = 0; /* TODO: floating point EQZ ??? */
4211 if (i->ARMin.NUnary.op != ARMneon_DUP) {
4212 regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
4213 ? (qregEnc(i->ARMin.NUnary.src) << 1)
4214 : dregEnc(i->ARMin.NUnary.src);
4215 M = regM >> 4;
4216 } else {
4217 regM = iregEnc(i->ARMin.NUnary.src);
4218 M = regM >> 4;
4220 regD &= 0xF;
4221 regM &= 0xF;
4222 switch (i->ARMin.NUnary.op) {
4223 case ARMneon_COPY: /* VMOV reg, reg */
4224 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
4225 BITS4(M,Q,M,1), regM);
4226 break;
4227 case ARMneon_COPYN: /* VMOVN regD, regQ */
4228 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4229 regD, X0010, BITS4(0,0,M,0), regM);
4230 break;
4231 case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
4232 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4233 regD, X0010, BITS4(1,0,M,0), regM);
4234 break;
4235 case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
4236 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4237 regD, X0010, BITS4(0,1,M,0), regM);
4238 break;
4239 case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
4240 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4241 regD, X0010, BITS4(1,1,M,0), regM);
4242 break;
4243 case ARMneon_COPYLS: /* VMOVL regQ, regD */
4244 if (sz >= 3)
4245 goto bad;
4246 insn = XXXXXXXX(0xF, X0010,
4247 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4248 BITS4((sz == 0) ? 1 : 0,0,0,0),
4249 regD, X1010, BITS4(0,0,M,1), regM);
4250 break;
4251 case ARMneon_COPYLU: /* VMOVL regQ, regD */
4252 if (sz >= 3)
4253 goto bad;
4254 insn = XXXXXXXX(0xF, X0011,
4255 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4256 BITS4((sz == 0) ? 1 : 0,0,0,0),
4257 regD, X1010, BITS4(0,0,M,1), regM);
4258 break;
4259 case ARMneon_NOT: /* VMVN reg, reg*/
4260 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4261 BITS4(1,Q,M,0), regM);
4262 break;
4263 case ARMneon_EQZ:
4264 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4265 regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
4266 break;
4267 case ARMneon_CNT:
4268 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4269 BITS4(0,Q,M,0), regM);
4270 break;
4271 case ARMneon_CLZ:
4272 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4273 regD, X0100, BITS4(1,Q,M,0), regM);
4274 break;
4275 case ARMneon_CLS:
4276 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4277 regD, X0100, BITS4(0,Q,M,0), regM);
4278 break;
4279 case ARMneon_ABS:
4280 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4281 regD, X0011, BITS4(0,Q,M,0), regM);
4282 break;
4283 case ARMneon_DUP:
4284 sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
4285 sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
4286 vassert(sz1 + sz2 < 2);
4287 insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
4288 X1011, BITS4(D,0,sz2,1), X0000);
4289 break;
4290 case ARMneon_REV16:
4291 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4292 regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
4293 break;
4294 case ARMneon_REV32:
4295 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4296 regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
4297 break;
4298 case ARMneon_REV64:
4299 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4300 regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
4301 break;
4302 case ARMneon_PADDLU:
4303 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4304 regD, X0010, BITS4(1,Q,M,0), regM);
4305 break;
4306 case ARMneon_PADDLS:
4307 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4308 regD, X0010, BITS4(0,Q,M,0), regM);
4309 break;
4310 case ARMneon_VQSHLNUU:
4311 insn = XXXXXXXX(0xF, X0011,
4312 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4313 sz & 0xf, regD, X0111,
4314 BITS4(sz >> 6,Q,M,1), regM);
4315 break;
4316 case ARMneon_VQSHLNSS:
4317 insn = XXXXXXXX(0xF, X0010,
4318 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4319 sz & 0xf, regD, X0111,
4320 BITS4(sz >> 6,Q,M,1), regM);
4321 break;
4322 case ARMneon_VQSHLNUS:
4323 insn = XXXXXXXX(0xF, X0011,
4324 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4325 sz & 0xf, regD, X0110,
4326 BITS4(sz >> 6,Q,M,1), regM);
4327 break;
4328 case ARMneon_VCVTFtoS:
4329 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4330 BITS4(0,Q,M,0), regM);
4331 break;
4332 case ARMneon_VCVTFtoU:
4333 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4334 BITS4(1,Q,M,0), regM);
4335 break;
4336 case ARMneon_VCVTStoF:
4337 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4338 BITS4(0,Q,M,0), regM);
4339 break;
4340 case ARMneon_VCVTUtoF:
4341 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4342 BITS4(1,Q,M,0), regM);
4343 break;
4344 case ARMneon_VCVTFtoFixedU:
4345 sz1 = (sz >> 5) & 1;
4346 sz2 = (sz >> 4) & 1;
4347 sz &= 0xf;
4348 insn = XXXXXXXX(0xF, X0011,
4349 BITS4(1,D,sz1,sz2), sz, regD, X1111,
4350 BITS4(0,Q,M,1), regM);
4351 break;
4352 case ARMneon_VCVTFtoFixedS:
4353 sz1 = (sz >> 5) & 1;
4354 sz2 = (sz >> 4) & 1;
4355 sz &= 0xf;
4356 insn = XXXXXXXX(0xF, X0010,
4357 BITS4(1,D,sz1,sz2), sz, regD, X1111,
4358 BITS4(0,Q,M,1), regM);
4359 break;
4360 case ARMneon_VCVTFixedUtoF:
4361 sz1 = (sz >> 5) & 1;
4362 sz2 = (sz >> 4) & 1;
4363 sz &= 0xf;
4364 insn = XXXXXXXX(0xF, X0011,
4365 BITS4(1,D,sz1,sz2), sz, regD, X1110,
4366 BITS4(0,Q,M,1), regM);
4367 break;
4368 case ARMneon_VCVTFixedStoF:
4369 sz1 = (sz >> 5) & 1;
4370 sz2 = (sz >> 4) & 1;
4371 sz &= 0xf;
4372 insn = XXXXXXXX(0xF, X0010,
4373 BITS4(1,D,sz1,sz2), sz, regD, X1110,
4374 BITS4(0,Q,M,1), regM);
4375 break;
4376 case ARMneon_VCVTF32toF16:
4377 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
4378 BITS4(0,0,M,0), regM);
4379 break;
4380 case ARMneon_VCVTF16toF32:
4381 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
4382 BITS4(0,0,M,0), regM);
4383 break;
4384 case ARMneon_VRECIP:
4385 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4386 BITS4(0,Q,M,0), regM);
4387 break;
4388 case ARMneon_VRECIPF:
4389 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4390 BITS4(0,Q,M,0), regM);
4391 break;
4392 case ARMneon_VABSFP:
4393 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4394 BITS4(0,Q,M,0), regM);
4395 break;
4396 case ARMneon_VRSQRTEFP:
4397 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4398 BITS4(1,Q,M,0), regM);
4399 break;
4400 case ARMneon_VRSQRTE:
4401 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4402 BITS4(1,Q,M,0), regM);
4403 break;
4404 case ARMneon_VNEGF:
4405 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4406 BITS4(1,Q,M,0), regM);
4407 break;
4409 default:
4410 goto bad;
4412 *p++ = insn;
4413 goto done;
4415 case ARMin_NDual: {
4416 UInt Q = i->ARMin.NDual.Q ? 1 : 0;
4417 UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
4418 ? (qregEnc(i->ARMin.NDual.arg1) << 1)
4419 : dregEnc(i->ARMin.NDual.arg1);
4420 UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
4421 ? (qregEnc(i->ARMin.NDual.arg2) << 1)
4422 : dregEnc(i->ARMin.NDual.arg2);
4423 UInt D = regD >> 4;
4424 UInt M = regM >> 4;
4425 UInt sz1 = i->ARMin.NDual.size >> 1;
4426 UInt sz2 = i->ARMin.NDual.size & 1;
4427 UInt insn;
4428 regD &= 0xF;
4429 regM &= 0xF;
4430 switch (i->ARMin.NDual.op) {
4431 case ARMneon_TRN: /* VTRN reg, reg */
4432 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4433 regD, X0000, BITS4(1,Q,M,0), regM);
4434 break;
4435 case ARMneon_ZIP: /* VZIP reg, reg */
4436 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4437 regD, X0001, BITS4(1,Q,M,0), regM);
4438 break;
4439 case ARMneon_UZP: /* VUZP reg, reg */
4440 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4441 regD, X0001, BITS4(0,Q,M,0), regM);
4442 break;
4443 default:
4444 goto bad;
4446 *p++ = insn;
4447 goto done;
4449 case ARMin_NBinary: {
4450 UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
4451 UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
4452 ? (qregEnc(i->ARMin.NBinary.dst) << 1)
4453 : dregEnc(i->ARMin.NBinary.dst);
4454 UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
4455 ? (qregEnc(i->ARMin.NBinary.argL) << 1)
4456 : dregEnc(i->ARMin.NBinary.argL);
4457 UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
4458 ? (qregEnc(i->ARMin.NBinary.argR) << 1)
4459 : dregEnc(i->ARMin.NBinary.argR);
4460 UInt sz1 = i->ARMin.NBinary.size >> 1;
4461 UInt sz2 = i->ARMin.NBinary.size & 1;
4462 UInt D = regD >> 4;
4463 UInt N = regN >> 4;
4464 UInt M = regM >> 4;
4465 UInt insn;
4466 regD &= 0xF;
4467 regM &= 0xF;
4468 regN &= 0xF;
4469 switch (i->ARMin.NBinary.op) {
4470 case ARMneon_VAND: /* VAND reg, reg, reg */
4471 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
4472 BITS4(N,Q,M,1), regM);
4473 break;
4474 case ARMneon_VORR: /* VORR reg, reg, reg*/
4475 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
4476 BITS4(N,Q,M,1), regM);
4477 break;
4478 case ARMneon_VXOR: /* VEOR reg, reg, reg */
4479 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
4480 BITS4(N,Q,M,1), regM);
4481 break;
4482 case ARMneon_VADD: /* VADD reg, reg, reg */
4483 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4484 X1000, BITS4(N,Q,M,0), regM);
4485 break;
4486 case ARMneon_VSUB: /* VSUB reg, reg, reg */
4487 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4488 X1000, BITS4(N,Q,M,0), regM);
4489 break;
4490 case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
4491 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4492 X0110, BITS4(N,Q,M,1), regM);
4493 break;
4494 case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
4495 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4496 X0110, BITS4(N,Q,M,1), regM);
4497 break;
4498 case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
4499 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4500 X0110, BITS4(N,Q,M,0), regM);
4501 break;
4502 case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
4503 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4504 X0110, BITS4(N,Q,M,0), regM);
4505 break;
4506 case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
4507 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4508 X0001, BITS4(N,Q,M,0), regM);
4509 break;
4510 case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
4511 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4512 X0001, BITS4(N,Q,M,0), regM);
4513 break;
4514 case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
4515 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4516 X0000, BITS4(N,Q,M,1), regM);
4517 break;
4518 case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
4519 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4520 X0000, BITS4(N,Q,M,1), regM);
4521 break;
4522 case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
4523 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4524 X0010, BITS4(N,Q,M,1), regM);
4525 break;
4526 case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
4527 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4528 X0010, BITS4(N,Q,M,1), regM);
4529 break;
4530 case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
4531 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4532 X0011, BITS4(N,Q,M,0), regM);
4533 break;
4534 case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
4535 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4536 X0011, BITS4(N,Q,M,0), regM);
4537 break;
4538 case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
4539 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4540 X0011, BITS4(N,Q,M,1), regM);
4541 break;
4542 case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
4543 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4544 X0011, BITS4(N,Q,M,1), regM);
4545 break;
4546 case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
4547 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4548 X1000, BITS4(N,Q,M,1), regM);
4549 break;
4550 case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
4551 if (i->ARMin.NBinary.size >= 16)
4552 goto bad;
4553 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
4554 i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
4555 regM);
4556 break;
4557 case ARMneon_VMUL:
4558 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4559 X1001, BITS4(N,Q,M,1), regM);
4560 break;
4561 case ARMneon_VMULLU:
4562 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
4563 X1100, BITS4(N,0,M,0), regM);
4564 break;
4565 case ARMneon_VMULLS:
4566 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4567 X1100, BITS4(N,0,M,0), regM);
4568 break;
4569 case ARMneon_VMULP:
4570 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4571 X1001, BITS4(N,Q,M,1), regM);
4572 break;
4573 case ARMneon_VMULFP:
4574 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4575 X1101, BITS4(N,Q,M,1), regM);
4576 break;
4577 case ARMneon_VMULLP:
4578 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4579 X1110, BITS4(N,0,M,0), regM);
4580 break;
4581 case ARMneon_VQDMULH:
4582 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4583 X1011, BITS4(N,Q,M,0), regM);
4584 break;
4585 case ARMneon_VQRDMULH:
4586 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4587 X1011, BITS4(N,Q,M,0), regM);
4588 break;
4589 case ARMneon_VQDMULL:
4590 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4591 X1101, BITS4(N,0,M,0), regM);
4592 break;
4593 case ARMneon_VTBL:
4594 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
4595 X1000, BITS4(N,0,M,0), regM);
4596 break;
4597 case ARMneon_VPADD:
4598 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4599 X1011, BITS4(N,Q,M,1), regM);
4600 break;
4601 case ARMneon_VPADDFP:
4602 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4603 X1101, BITS4(N,Q,M,0), regM);
4604 break;
4605 case ARMneon_VPMINU:
4606 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4607 X1010, BITS4(N,Q,M,1), regM);
4608 break;
4609 case ARMneon_VPMINS:
4610 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4611 X1010, BITS4(N,Q,M,1), regM);
4612 break;
4613 case ARMneon_VPMAXU:
4614 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4615 X1010, BITS4(N,Q,M,0), regM);
4616 break;
4617 case ARMneon_VPMAXS:
4618 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4619 X1010, BITS4(N,Q,M,0), regM);
4620 break;
4621 case ARMneon_VADDFP: /* VADD reg, reg, reg */
4622 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4623 X1101, BITS4(N,Q,M,0), regM);
4624 break;
4625 case ARMneon_VSUBFP: /* VADD reg, reg, reg */
4626 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4627 X1101, BITS4(N,Q,M,0), regM);
4628 break;
4629 case ARMneon_VABDFP: /* VABD reg, reg, reg */
4630 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4631 X1101, BITS4(N,Q,M,0), regM);
4632 break;
4633 case ARMneon_VMINF:
4634 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4635 X1111, BITS4(N,Q,M,0), regM);
4636 break;
4637 case ARMneon_VMAXF:
4638 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4639 X1111, BITS4(N,Q,M,0), regM);
4640 break;
4641 case ARMneon_VPMINF:
4642 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4643 X1111, BITS4(N,Q,M,0), regM);
4644 break;
4645 case ARMneon_VPMAXF:
4646 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4647 X1111, BITS4(N,Q,M,0), regM);
4648 break;
4649 case ARMneon_VRECPS:
4650 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
4651 BITS4(N,Q,M,1), regM);
4652 break;
4653 case ARMneon_VCGTF:
4654 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
4655 BITS4(N,Q,M,0), regM);
4656 break;
4657 case ARMneon_VCGEF:
4658 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
4659 BITS4(N,Q,M,0), regM);
4660 break;
4661 case ARMneon_VCEQF:
4662 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
4663 BITS4(N,Q,M,0), regM);
4664 break;
4665 case ARMneon_VRSQRTS:
4666 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
4667 BITS4(N,Q,M,1), regM);
4668 break;
4669 default:
4670 goto bad;
4672 *p++ = insn;
4673 goto done;
4675 case ARMin_NShift: {
4676 UInt Q = i->ARMin.NShift.Q ? 1 : 0;
4677 UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
4678 ? (qregEnc(i->ARMin.NShift.dst) << 1)
4679 : dregEnc(i->ARMin.NShift.dst);
4680 UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
4681 ? (qregEnc(i->ARMin.NShift.argL) << 1)
4682 : dregEnc(i->ARMin.NShift.argL);
4683 UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
4684 ? (qregEnc(i->ARMin.NShift.argR) << 1)
4685 : dregEnc(i->ARMin.NShift.argR);
4686 UInt sz1 = i->ARMin.NShift.size >> 1;
4687 UInt sz2 = i->ARMin.NShift.size & 1;
4688 UInt D = regD >> 4;
4689 UInt N = regN >> 4;
4690 UInt M = regM >> 4;
4691 UInt insn;
4692 regD &= 0xF;
4693 regM &= 0xF;
4694 regN &= 0xF;
4695 switch (i->ARMin.NShift.op) {
4696 case ARMneon_VSHL:
4697 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4698 X0100, BITS4(N,Q,M,0), regM);
4699 break;
4700 case ARMneon_VSAL:
4701 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4702 X0100, BITS4(N,Q,M,0), regM);
4703 break;
4704 case ARMneon_VQSHL:
4705 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4706 X0100, BITS4(N,Q,M,1), regM);
4707 break;
4708 case ARMneon_VQSAL:
4709 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4710 X0100, BITS4(N,Q,M,1), regM);
4711 break;
4712 default:
4713 goto bad;
4715 *p++ = insn;
4716 goto done;
4718 case ARMin_NShl64: {
4719 HReg regDreg = i->ARMin.NShl64.dst;
4720 HReg regMreg = i->ARMin.NShl64.src;
4721 UInt amt = i->ARMin.NShl64.amt;
4722 vassert(amt >= 1 && amt <= 63);
4723 vassert(hregClass(regDreg) == HRcFlt64);
4724 vassert(hregClass(regMreg) == HRcFlt64);
4725 UInt regD = dregEnc(regDreg);
4726 UInt regM = dregEnc(regMreg);
4727 UInt D = (regD >> 4) & 1;
4728 UInt Vd = regD & 0xF;
4729 UInt L = 1;
4730 UInt Q = 0; /* always 64-bit */
4731 UInt M = (regM >> 4) & 1;
4732 UInt Vm = regM & 0xF;
4733 UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
4734 amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
4735 *p++ = insn;
4736 goto done;
4738 case ARMin_NeonImm: {
4739 UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
4740 UInt regD = Q ? (qregEnc(i->ARMin.NeonImm.dst) << 1) :
4741 dregEnc(i->ARMin.NeonImm.dst);
4742 UInt D = regD >> 4;
4743 UInt imm = i->ARMin.NeonImm.imm->imm8;
4744 UInt tp = i->ARMin.NeonImm.imm->type;
4745 UInt j = imm >> 7;
4746 UInt imm3 = (imm >> 4) & 0x7;
4747 UInt imm4 = imm & 0xF;
4748 UInt cmode, op;
4749 UInt insn;
4750 regD &= 0xF;
4751 if (tp == 9)
4752 op = 1;
4753 else
4754 op = 0;
4755 switch (tp) {
4756 case 0:
4757 case 1:
4758 case 2:
4759 case 3:
4760 case 4:
4761 case 5:
4762 cmode = tp << 1;
4763 break;
4764 case 9:
4765 case 6:
4766 cmode = 14;
4767 break;
4768 case 7:
4769 cmode = 12;
4770 break;
4771 case 8:
4772 cmode = 13;
4773 break;
4774 case 10:
4775 cmode = 15;
4776 break;
4777 default:
4778 vpanic("ARMin_NeonImm");
4781 insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4782 cmode, BITS4(0,Q,op,1), imm4);
4783 *p++ = insn;
4784 goto done;
4786 case ARMin_NCMovQ: {
4787 UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4788 UInt qM = qregEnc(i->ARMin.NCMovQ.src) << 1;
4789 UInt qD = qregEnc(i->ARMin.NCMovQ.dst) << 1;
4790 UInt vM = qM & 0xF;
4791 UInt vD = qD & 0xF;
4792 UInt M = (qM >> 4) & 1;
4793 UInt D = (qD >> 4) & 1;
4794 vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4795 /* b!cc here+8: !cc A00 0000 */
4796 UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4797 *p++ = insn;
4798 /* vmov qD, qM */
4799 insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4800 vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4801 *p++ = insn;
4802 goto done;
4804 case ARMin_Add32: {
4805 UInt regD = iregEnc(i->ARMin.Add32.rD);
4806 UInt regN = iregEnc(i->ARMin.Add32.rN);
4807 UInt imm32 = i->ARMin.Add32.imm32;
4808 vassert(regD != regN);
4809 /* MOV regD, imm32 */
4810 p = imm32_to_ireg((UInt *)p, regD, imm32);
4811 /* ADD regD, regN, regD */
4812 UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4813 *p++ = insn;
4814 goto done;
4817 case ARMin_EvCheck: {
4818 /* We generate:
4819 ldr r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
4820 subs r12, r12, #1 (A1)
4821 str r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
4822 bpl nofail
4823 ldr r12, [r8 + #0] 0 == offsetof(host_EvC_FAILADDR)
4824 bx r12
4825 nofail:
4827 UInt* p0 = p;
4828 p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4829 i->ARMin.EvCheck.amCounter);
4830 *p++ = 0xE25CC001; /* subs r12, r12, #1 */
4831 p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
4832 i->ARMin.EvCheck.amCounter);
4833 *p++ = 0x5A000001; /* bpl nofail */
4834 p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4835 i->ARMin.EvCheck.amFailAddr);
4836 *p++ = 0xE12FFF1C; /* bx r12 */
4837 /* nofail: */
4839 /* Crosscheck */
4840 vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
4841 goto done;
4844 case ARMin_ProfInc: {
4845 /* We generate:
4846 (ctrP is unknown now, so use 0x65556555 in the
4847 expectation that a later call to LibVEX_patchProfCtr
4848 will be used to fill in the immediate fields once the
4849 right value is known.)
4850 movw r12, lo16(0x65556555)
4851 movt r12, lo16(0x65556555)
4852 ldr r11, [r12]
4853 adds r11, r11, #1
4854 str r11, [r12]
4855 ldr r11, [r12+4]
4856 adc r11, r11, #0
4857 str r11, [r12+4]
4859 p = imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555);
4860 *p++ = 0xE59CB000;
4861 *p++ = 0xE29BB001;
4862 *p++ = 0xE58CB000;
4863 *p++ = 0xE59CB004;
4864 *p++ = 0xE2ABB000;
4865 *p++ = 0xE58CB004;
4866 /* Tell the caller .. */
4867 vassert(!(*is_profInc));
4868 *is_profInc = True;
4869 goto done;
4872 /* ... */
4873 default:
4874 goto bad;
4877 bad:
4878 ppARMInstr(i);
4879 vpanic("emit_ARMInstr");
4880 /*NOTREACHED*/
4882 done:
4883 vassert(((UChar*)p) - &buf[0] <= 32);
4884 return ((UChar*)p) - &buf[0];
4888 /* How big is an event check? See case for ARMin_EvCheck in
4889 emit_ARMInstr just above. That crosschecks what this returns, so
4890 we can tell if we're inconsistent. */
4891 Int evCheckSzB_ARM (void)
4893 return 24;
4897 /* NB: what goes on here has to be very closely coordinated with the
4898 emitInstr case for XDirect, above. */
4899 VexInvalRange chainXDirect_ARM ( VexEndness endness_host,
4900 void* place_to_chain,
4901 const void* disp_cp_chain_me_EXPECTED,
4902 const void* place_to_jump_to )
4904 vassert(endness_host == VexEndnessLE);
4906 /* What we're expecting to see is:
4907 movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
4908 movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
4909 blx r12
4911 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4912 E1 2F FF 3C
4914 UInt* p = (UInt*)place_to_chain;
4915 vassert(0 == (3 & (HWord)p));
4916 vassert(is_imm32_to_ireg_EXACTLY2(
4917 p, /*r*/12, (UInt)(Addr)disp_cp_chain_me_EXPECTED));
4918 vassert(p[2] == 0xE12FFF3C);
4919 /* And what we want to change it to is either:
4920 (general case)
4921 movw r12, lo16(place_to_jump_to)
4922 movt r12, hi16(place_to_jump_to)
4923 bx r12
4925 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4926 E1 2F FF 1C
4927 ---OR---
4928 in the case where the displacement falls within 26 bits
4929 b disp24; undef; undef
4931 EA <3 bytes == disp24>
4932 FF 00 00 00
4933 FF 00 00 00
4935 In both cases the replacement has the same length as the original.
4936 To remain sane & verifiable,
4937 (1) limit the displacement for the short form to
4938 (say) +/- 30 million, so as to avoid wraparound
4939 off-by-ones
4940 (2) even if the short form is applicable, once every (say)
4941 1024 times use the long form anyway, so as to maintain
4942 verifiability
4945 /* This is the delta we need to put into a B insn. It's relative
4946 to the start of the next-but-one insn, hence the -8. */
4947 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 8;
4948 Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
4949 vassert(0 == (delta & (Long)3));
4951 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4952 if (shortOK) {
4953 shortCTR++; // thread safety bleh
4954 if (0 == (shortCTR & 0x3FF)) {
4955 shortOK = False;
4956 if (0)
4957 vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
4958 "using long form\n", shortCTR);
4962 /* And make the modifications. */
4963 if (shortOK) {
4964 UInt uimm24 = (UInt)(delta >> 2);
4965 UInt uimm24_shl8 = uimm24 << 8;
4966 Int simm24 = (Int)uimm24_shl8;
4967 simm24 >>= 8;
4968 vassert(uimm24 == simm24);
4969 p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
4970 p[1] = 0xFF000000;
4971 p[2] = 0xFF000000;
4972 } else {
4973 (void)imm32_to_ireg_EXACTLY2(
4974 p, /*r*/12, (UInt)(Addr)place_to_jump_to);
4975 p[2] = 0xE12FFF1C;
4978 VexInvalRange vir = {(HWord)p, 12};
4979 return vir;
4983 /* NB: what goes on here has to be very closely coordinated with the
4984 emitInstr case for XDirect, above. */
4985 VexInvalRange unchainXDirect_ARM ( VexEndness endness_host,
4986 void* place_to_unchain,
4987 const void* place_to_jump_to_EXPECTED,
4988 const void* disp_cp_chain_me )
4990 vassert(endness_host == VexEndnessLE);
4992 /* What we're expecting to see is:
4993 (general case)
4994 movw r12, lo16(place_to_jump_to_EXPECTED)
4995 movt r12, lo16(place_to_jump_to_EXPECTED)
4996 bx r12
4998 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4999 E1 2F FF 1C
5000 ---OR---
5001 in the case where the displacement falls within 26 bits
5002 b disp24; undef; undef
5004 EA <3 bytes == disp24>
5005 FF 00 00 00
5006 FF 00 00 00
5008 UInt* p = (UInt*)place_to_unchain;
5009 vassert(0 == (3 & (HWord)p));
5011 Bool valid = False;
5012 if (is_imm32_to_ireg_EXACTLY2(
5013 p, /*r*/12, (UInt)(Addr)place_to_jump_to_EXPECTED)
5014 && p[2] == 0xE12FFF1C) {
5015 valid = True; /* it's the long form */
5016 if (0)
5017 vex_printf("QQQ unchainXDirect_ARM: found long form\n");
5018 } else
5019 if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
5020 /* It's the short form. Check the displacement is right. */
5021 Int simm24 = p[0] & 0x00FFFFFF;
5022 simm24 <<= 8; simm24 >>= 8;
5023 if ((UChar*)p + (simm24 << 2) + 8 == place_to_jump_to_EXPECTED) {
5024 valid = True;
5025 if (0)
5026 vex_printf("QQQ unchainXDirect_ARM: found short form\n");
5029 vassert(valid);
5031 /* And what we want to change it to is:
5032 movw r12, lo16(disp_cp_chain_me)
5033 movt r12, hi16(disp_cp_chain_me)
5034 blx r12
5036 <8 bytes generated by imm32_to_ireg_EXACTLY2>
5037 E1 2F FF 3C
5039 (void)imm32_to_ireg_EXACTLY2(
5040 p, /*r*/12, (UInt)(Addr)disp_cp_chain_me);
5041 p[2] = 0xE12FFF3C;
5042 VexInvalRange vir = {(HWord)p, 12};
5043 return vir;
5047 /* Patch the counter address into a profile inc point, as previously
5048 created by the ARMin_ProfInc case for emit_ARMInstr. */
5049 VexInvalRange patchProfInc_ARM ( VexEndness endness_host,
5050 void* place_to_patch,
5051 const ULong* location_of_counter )
5053 vassert(endness_host == VexEndnessLE);
5054 vassert(sizeof(ULong*) == 4);
5055 UInt* p = (UInt*)place_to_patch;
5056 vassert(0 == (3 & (HWord)p));
5057 vassert(is_imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555));
5058 vassert(p[2] == 0xE59CB000);
5059 vassert(p[3] == 0xE29BB001);
5060 vassert(p[4] == 0xE58CB000);
5061 vassert(p[5] == 0xE59CB004);
5062 vassert(p[6] == 0xE2ABB000);
5063 vassert(p[7] == 0xE58CB004);
5064 imm32_to_ireg_EXACTLY2(p, /*r*/12, (UInt)(Addr)location_of_counter);
5065 VexInvalRange vir = {(HWord)p, 8};
5066 return vir;
5070 #undef BITS4
5071 #undef X0000
5072 #undef X0001
5073 #undef X0010
5074 #undef X0011
5075 #undef X0100
5076 #undef X0101
5077 #undef X0110
5078 #undef X0111
5079 #undef X1000
5080 #undef X1001
5081 #undef X1010
5082 #undef X1011
5083 #undef X1100
5084 #undef X1101
5085 #undef X1110
5086 #undef X1111
5087 #undef XXXXX___
5088 #undef XXXXXX__
5089 #undef XXX___XX
5090 #undef XXXXX__X
5091 #undef XXXXXXXX
5092 #undef XX______
5094 /*---------------------------------------------------------------*/
5095 /*--- end host_arm_defs.c ---*/
5096 /*---------------------------------------------------------------*/