Bug 497723 - forgot to restore callgrind output cleanup
[valgrind.git] / VEX / priv / host_arm_defs.c
blobe30336583dcc020e674f883596619a12a35d037d
2 /*---------------------------------------------------------------*/
3 /*--- begin host_arm_defs.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2004-2017 OpenWorks LLP
11 info@open-works.net
13 NEON support is
14 Copyright (C) 2010-2017 Samsung Electronics
15 contributed by Dmitry Zhurikhin <zhur@ispras.ru>
16 and Kirill Batuzov <batuzovk@ispras.ru>
18 This program is free software; you can redistribute it and/or
19 modify it under the terms of the GNU General Public License as
20 published by the Free Software Foundation; either version 2 of the
21 License, or (at your option) any later version.
23 This program is distributed in the hope that it will be useful, but
24 WITHOUT ANY WARRANTY; without even the implied warranty of
25 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
26 General Public License for more details.
28 You should have received a copy of the GNU General Public License
29 along with this program; if not, see <http://www.gnu.org/licenses/>.
31 The GNU General Public License is contained in the file COPYING.
34 #include "libvex_basictypes.h"
35 #include "libvex.h"
36 #include "libvex_trc_values.h"
38 #include "main_util.h"
39 #include "host_generic_regs.h"
40 #include "host_arm_defs.h"
42 UInt arm_hwcaps = 0;
45 /* --------- Registers. --------- */
47 const RRegUniverse* getRRegUniverse_ARM ( void )
49 /* The real-register universe is a big constant, so we just want to
50 initialise it once. */
51 static RRegUniverse rRegUniverse_ARM;
52 static Bool rRegUniverse_ARM_initted = False;
54 /* Handy shorthand, nothing more */
55 RRegUniverse* ru = &rRegUniverse_ARM;
57 /* This isn't thread-safe. Sigh. */
58 if (LIKELY(rRegUniverse_ARM_initted))
59 return ru;
61 RRegUniverse__init(ru);
63 /* Add the registers. The initial segment of this array must be
64 those available for allocation by reg-alloc, and those that
65 follow are not available for allocation. */
67 /* Callee saves ones are listed first, since we prefer them
68 if they're available. */
69 ru->allocable_start[HRcInt32] = ru->size;
70 ru->regs[ru->size++] = hregARM_R4();
71 ru->regs[ru->size++] = hregARM_R5();
72 ru->regs[ru->size++] = hregARM_R6();
73 ru->regs[ru->size++] = hregARM_R7();
74 ru->regs[ru->size++] = hregARM_R10();
75 ru->regs[ru->size++] = hregARM_R11();
76 /* Otherwise we'll have to slum it out with caller-saves ones. */
77 ru->regs[ru->size++] = hregARM_R0();
78 ru->regs[ru->size++] = hregARM_R1();
79 ru->regs[ru->size++] = hregARM_R2();
80 ru->regs[ru->size++] = hregARM_R3();
81 ru->regs[ru->size++] = hregARM_R9();
82 ru->allocable_end[HRcInt32] = ru->size - 1;
84 /* FP registers. Note: these are all callee-save. Yay! Hence we
85 don't need to mention them as trashed in getHRegUsage for
86 ARMInstr_Call. */
87 ru->allocable_start[HRcFlt64] = ru->size;
88 ru->regs[ru->size++] = hregARM_D8();
89 ru->regs[ru->size++] = hregARM_D9();
90 ru->regs[ru->size++] = hregARM_D10();
91 ru->regs[ru->size++] = hregARM_D11();
92 ru->regs[ru->size++] = hregARM_D12();
93 ru->allocable_end[HRcFlt64] = ru->size - 1;
95 ru->allocable_start[HRcFlt32] = ru->size;
96 ru->regs[ru->size++] = hregARM_S26();
97 ru->regs[ru->size++] = hregARM_S27();
98 ru->regs[ru->size++] = hregARM_S28();
99 ru->regs[ru->size++] = hregARM_S29();
100 ru->regs[ru->size++] = hregARM_S30();
101 ru->allocable_end[HRcFlt32] = ru->size - 1;
103 ru->allocable_start[HRcVec128] = ru->size;
104 ru->regs[ru->size++] = hregARM_Q8();
105 ru->regs[ru->size++] = hregARM_Q9();
106 ru->regs[ru->size++] = hregARM_Q10();
107 ru->regs[ru->size++] = hregARM_Q11();
108 ru->regs[ru->size++] = hregARM_Q12();
109 ru->allocable_end[HRcVec128] = ru->size - 1;
110 ru->allocable = ru->size;
112 /* And other regs, not available to the allocator. */
114 // unavail: r8 as GSP
115 // r12 is used as a spill/reload temporary
116 // r13 as SP
117 // r14 as LR
118 // r15 as PC
120 // All in all, we have 11 allocatable integer registers:
121 // 0 1 2 3 4 5 6 7 9 10 11, with r8 dedicated as GSP
122 // and r12 dedicated as a spill temporary.
123 // 13 14 and 15 are not under the allocator's control.
125 // Hence for the allocatable registers we have:
127 // callee-saved: 4 5 6 7 (8) 9 10 11
128 // caller-saved: 0 1 2 3
129 // Note 9 is ambiguous: the base EABI does not give an e/r-saved
130 // designation for it, but the Linux instantiation of the ABI
131 // specifies it as callee-saved.
133 // If the set of available registers changes or if the e/r status
134 // changes, be sure to re-check/sync the definition of
135 // getHRegUsage for ARMInstr_Call too.
136 ru->regs[ru->size++] = hregARM_R8();
137 ru->regs[ru->size++] = hregARM_R12();
138 ru->regs[ru->size++] = hregARM_R13();
139 ru->regs[ru->size++] = hregARM_R14();
140 ru->regs[ru->size++] = hregARM_R15();
141 ru->regs[ru->size++] = hregARM_Q13();
142 ru->regs[ru->size++] = hregARM_Q14();
143 ru->regs[ru->size++] = hregARM_Q15();
145 rRegUniverse_ARM_initted = True;
147 RRegUniverse__check_is_sane(ru);
148 return ru;
152 UInt ppHRegARM ( HReg reg ) {
153 Int r;
154 /* Be generic for all virtual regs. */
155 if (hregIsVirtual(reg)) {
156 return ppHReg(reg);
158 /* But specific for real regs. */
159 switch (hregClass(reg)) {
160 case HRcInt32:
161 r = hregEncoding(reg);
162 vassert(r >= 0 && r < 16);
163 return vex_printf("r%d", r);
164 case HRcFlt64:
165 r = hregEncoding(reg);
166 vassert(r >= 0 && r < 32);
167 return vex_printf("d%d", r);
168 case HRcFlt32:
169 r = hregEncoding(reg);
170 vassert(r >= 0 && r < 32);
171 return vex_printf("s%d", r);
172 case HRcVec128:
173 r = hregEncoding(reg);
174 vassert(r >= 0 && r < 16);
175 return vex_printf("q%d", r);
176 default:
177 vpanic("ppHRegARM");
182 /* --------- Condition codes, ARM encoding. --------- */
184 const HChar* showARMCondCode ( ARMCondCode cond ) {
185 switch (cond) {
186 case ARMcc_EQ: return "eq";
187 case ARMcc_NE: return "ne";
188 case ARMcc_HS: return "hs";
189 case ARMcc_LO: return "lo";
190 case ARMcc_MI: return "mi";
191 case ARMcc_PL: return "pl";
192 case ARMcc_VS: return "vs";
193 case ARMcc_VC: return "vc";
194 case ARMcc_HI: return "hi";
195 case ARMcc_LS: return "ls";
196 case ARMcc_GE: return "ge";
197 case ARMcc_LT: return "lt";
198 case ARMcc_GT: return "gt";
199 case ARMcc_LE: return "le";
200 case ARMcc_AL: return "al"; // default
201 case ARMcc_NV: return "nv";
202 default: vpanic("showARMCondCode");
207 /* --------- Mem AModes: Addressing Mode 1 --------- */
209 ARMAMode1* ARMAMode1_RI ( HReg reg, Int simm13 ) {
210 ARMAMode1* am = LibVEX_Alloc_inline(sizeof(ARMAMode1));
211 am->tag = ARMam1_RI;
212 am->ARMam1.RI.reg = reg;
213 am->ARMam1.RI.simm13 = simm13;
214 vassert(-4095 <= simm13 && simm13 <= 4095);
215 return am;
217 ARMAMode1* ARMAMode1_RRS ( HReg base, HReg index, UInt shift ) {
218 ARMAMode1* am = LibVEX_Alloc_inline(sizeof(ARMAMode1));
219 am->tag = ARMam1_RRS;
220 am->ARMam1.RRS.base = base;
221 am->ARMam1.RRS.index = index;
222 am->ARMam1.RRS.shift = shift;
223 vassert(shift <= 3);
224 return am;
227 void ppARMAMode1 ( ARMAMode1* am ) {
228 switch (am->tag) {
229 case ARMam1_RI:
230 vex_printf("%d(", am->ARMam1.RI.simm13);
231 ppHRegARM(am->ARMam1.RI.reg);
232 vex_printf(")");
233 break;
234 case ARMam1_RRS:
235 vex_printf("(");
236 ppHRegARM(am->ARMam1.RRS.base);
237 vex_printf(",");
238 ppHRegARM(am->ARMam1.RRS.index);
239 vex_printf(",%u)", am->ARMam1.RRS.shift);
240 break;
241 default:
242 vassert(0);
246 static void addRegUsage_ARMAMode1 ( HRegUsage* u, ARMAMode1* am ) {
247 switch (am->tag) {
248 case ARMam1_RI:
249 addHRegUse(u, HRmRead, am->ARMam1.RI.reg);
250 return;
251 case ARMam1_RRS:
252 // addHRegUse(u, HRmRead, am->ARMam1.RRS.base);
253 // addHRegUse(u, HRmRead, am->ARMam1.RRS.index);
254 // return;
255 default:
256 vpanic("addRegUsage_ARMAmode1");
260 static void mapRegs_ARMAMode1 ( HRegRemap* m, ARMAMode1* am ) {
261 switch (am->tag) {
262 case ARMam1_RI:
263 am->ARMam1.RI.reg = lookupHRegRemap(m, am->ARMam1.RI.reg);
264 return;
265 case ARMam1_RRS:
266 //am->ARMam1.RR.base =lookupHRegRemap(m, am->ARMam1.RR.base);
267 //am->ARMam1.RR.index = lookupHRegRemap(m, am->ARMam1.RR.index);
268 //return;
269 default:
270 vpanic("mapRegs_ARMAmode1");
275 /* --------- Mem AModes: Addressing Mode 2 --------- */
277 ARMAMode2* ARMAMode2_RI ( HReg reg, Int simm9 ) {
278 ARMAMode2* am = LibVEX_Alloc_inline(sizeof(ARMAMode2));
279 am->tag = ARMam2_RI;
280 am->ARMam2.RI.reg = reg;
281 am->ARMam2.RI.simm9 = simm9;
282 vassert(-255 <= simm9 && simm9 <= 255);
283 return am;
285 ARMAMode2* ARMAMode2_RR ( HReg base, HReg index ) {
286 ARMAMode2* am = LibVEX_Alloc_inline(sizeof(ARMAMode2));
287 am->tag = ARMam2_RR;
288 am->ARMam2.RR.base = base;
289 am->ARMam2.RR.index = index;
290 return am;
293 void ppARMAMode2 ( ARMAMode2* am ) {
294 switch (am->tag) {
295 case ARMam2_RI:
296 vex_printf("%d(", am->ARMam2.RI.simm9);
297 ppHRegARM(am->ARMam2.RI.reg);
298 vex_printf(")");
299 break;
300 case ARMam2_RR:
301 vex_printf("(");
302 ppHRegARM(am->ARMam2.RR.base);
303 vex_printf(",");
304 ppHRegARM(am->ARMam2.RR.index);
305 vex_printf(")");
306 break;
307 default:
308 vassert(0);
312 static void addRegUsage_ARMAMode2 ( HRegUsage* u, ARMAMode2* am ) {
313 switch (am->tag) {
314 case ARMam2_RI:
315 addHRegUse(u, HRmRead, am->ARMam2.RI.reg);
316 return;
317 case ARMam2_RR:
318 // addHRegUse(u, HRmRead, am->ARMam2.RR.base);
319 // addHRegUse(u, HRmRead, am->ARMam2.RR.index);
320 // return;
321 default:
322 vpanic("addRegUsage_ARMAmode2");
326 static void mapRegs_ARMAMode2 ( HRegRemap* m, ARMAMode2* am ) {
327 switch (am->tag) {
328 case ARMam2_RI:
329 am->ARMam2.RI.reg = lookupHRegRemap(m, am->ARMam2.RI.reg);
330 return;
331 case ARMam2_RR:
332 //am->ARMam2.RR.base =lookupHRegRemap(m, am->ARMam2.RR.base);
333 //am->ARMam2.RR.index = lookupHRegRemap(m, am->ARMam2.RR.index);
334 //return;
335 default:
336 vpanic("mapRegs_ARMAmode2");
341 /* --------- Mem AModes: Addressing Mode VFP --------- */
343 ARMAModeV* mkARMAModeV ( HReg reg, Int simm11 ) {
344 ARMAModeV* am = LibVEX_Alloc_inline(sizeof(ARMAModeV));
345 vassert(simm11 >= -1020 && simm11 <= 1020);
346 vassert(0 == (simm11 & 3));
347 am->reg = reg;
348 am->simm11 = simm11;
349 return am;
352 void ppARMAModeV ( ARMAModeV* am ) {
353 vex_printf("%d(", am->simm11);
354 ppHRegARM(am->reg);
355 vex_printf(")");
358 static void addRegUsage_ARMAModeV ( HRegUsage* u, ARMAModeV* am ) {
359 addHRegUse(u, HRmRead, am->reg);
362 static void mapRegs_ARMAModeV ( HRegRemap* m, ARMAModeV* am ) {
363 am->reg = lookupHRegRemap(m, am->reg);
367 /* --------- Mem AModes: Addressing Mode Neon ------- */
369 ARMAModeN *mkARMAModeN_RR ( HReg rN, HReg rM ) {
370 ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
371 am->tag = ARMamN_RR;
372 am->ARMamN.RR.rN = rN;
373 am->ARMamN.RR.rM = rM;
374 return am;
377 ARMAModeN *mkARMAModeN_R ( HReg rN ) {
378 ARMAModeN* am = LibVEX_Alloc_inline(sizeof(ARMAModeN));
379 am->tag = ARMamN_R;
380 am->ARMamN.R.rN = rN;
381 return am;
384 static void addRegUsage_ARMAModeN ( HRegUsage* u, ARMAModeN* am ) {
385 if (am->tag == ARMamN_R) {
386 addHRegUse(u, HRmRead, am->ARMamN.R.rN);
387 } else {
388 addHRegUse(u, HRmRead, am->ARMamN.RR.rN);
389 addHRegUse(u, HRmRead, am->ARMamN.RR.rM);
393 static void mapRegs_ARMAModeN ( HRegRemap* m, ARMAModeN* am ) {
394 if (am->tag == ARMamN_R) {
395 am->ARMamN.R.rN = lookupHRegRemap(m, am->ARMamN.R.rN);
396 } else {
397 am->ARMamN.RR.rN = lookupHRegRemap(m, am->ARMamN.RR.rN);
398 am->ARMamN.RR.rM = lookupHRegRemap(m, am->ARMamN.RR.rM);
402 void ppARMAModeN ( ARMAModeN* am ) {
403 vex_printf("[");
404 if (am->tag == ARMamN_R) {
405 ppHRegARM(am->ARMamN.R.rN);
406 } else {
407 ppHRegARM(am->ARMamN.RR.rN);
409 vex_printf("]");
410 if (am->tag == ARMamN_RR) {
411 vex_printf(", ");
412 ppHRegARM(am->ARMamN.RR.rM);
417 /* --------- Reg or imm-8x4 operands --------- */
419 static UInt ROR32 ( UInt x, UInt sh ) {
420 vassert(sh < 32);
421 if (sh == 0)
422 return x;
423 else
424 return (x << (32-sh)) | (x >> sh);
427 ARMRI84* ARMRI84_I84 ( UShort imm8, UShort imm4 ) {
428 ARMRI84* ri84 = LibVEX_Alloc_inline(sizeof(ARMRI84));
429 ri84->tag = ARMri84_I84;
430 ri84->ARMri84.I84.imm8 = imm8;
431 ri84->ARMri84.I84.imm4 = imm4;
432 vassert(imm8 <= 255);
433 vassert(imm4 <= 15);
434 return ri84;
436 ARMRI84* ARMRI84_R ( HReg reg ) {
437 ARMRI84* ri84 = LibVEX_Alloc_inline(sizeof(ARMRI84));
438 ri84->tag = ARMri84_R;
439 ri84->ARMri84.R.reg = reg;
440 return ri84;
443 void ppARMRI84 ( ARMRI84* ri84 ) {
444 switch (ri84->tag) {
445 case ARMri84_I84:
446 vex_printf("0x%x", ROR32(ri84->ARMri84.I84.imm8,
447 2 * ri84->ARMri84.I84.imm4));
448 break;
449 case ARMri84_R:
450 ppHRegARM(ri84->ARMri84.R.reg);
451 break;
452 default:
453 vassert(0);
457 static void addRegUsage_ARMRI84 ( HRegUsage* u, ARMRI84* ri84 ) {
458 switch (ri84->tag) {
459 case ARMri84_I84:
460 return;
461 case ARMri84_R:
462 addHRegUse(u, HRmRead, ri84->ARMri84.R.reg);
463 return;
464 default:
465 vpanic("addRegUsage_ARMRI84");
469 static void mapRegs_ARMRI84 ( HRegRemap* m, ARMRI84* ri84 ) {
470 switch (ri84->tag) {
471 case ARMri84_I84:
472 return;
473 case ARMri84_R:
474 ri84->ARMri84.R.reg = lookupHRegRemap(m, ri84->ARMri84.R.reg);
475 return;
476 default:
477 vpanic("mapRegs_ARMRI84");
482 /* --------- Reg or imm5 operands --------- */
484 ARMRI5* ARMRI5_I5 ( UInt imm5 ) {
485 ARMRI5* ri5 = LibVEX_Alloc_inline(sizeof(ARMRI5));
486 ri5->tag = ARMri5_I5;
487 ri5->ARMri5.I5.imm5 = imm5;
488 vassert(imm5 > 0 && imm5 <= 31); // zero is not allowed
489 return ri5;
491 ARMRI5* ARMRI5_R ( HReg reg ) {
492 ARMRI5* ri5 = LibVEX_Alloc_inline(sizeof(ARMRI5));
493 ri5->tag = ARMri5_R;
494 ri5->ARMri5.R.reg = reg;
495 return ri5;
498 void ppARMRI5 ( ARMRI5* ri5 ) {
499 switch (ri5->tag) {
500 case ARMri5_I5:
501 vex_printf("%u", ri5->ARMri5.I5.imm5);
502 break;
503 case ARMri5_R:
504 ppHRegARM(ri5->ARMri5.R.reg);
505 break;
506 default:
507 vassert(0);
511 static void addRegUsage_ARMRI5 ( HRegUsage* u, ARMRI5* ri5 ) {
512 switch (ri5->tag) {
513 case ARMri5_I5:
514 return;
515 case ARMri5_R:
516 addHRegUse(u, HRmRead, ri5->ARMri5.R.reg);
517 return;
518 default:
519 vpanic("addRegUsage_ARMRI5");
523 static void mapRegs_ARMRI5 ( HRegRemap* m, ARMRI5* ri5 ) {
524 switch (ri5->tag) {
525 case ARMri5_I5:
526 return;
527 case ARMri5_R:
528 ri5->ARMri5.R.reg = lookupHRegRemap(m, ri5->ARMri5.R.reg);
529 return;
530 default:
531 vpanic("mapRegs_ARMRI5");
535 /* -------- Neon Immediate operatnd --------- */
537 ARMNImm* ARMNImm_TI ( UInt type, UInt imm8 ) {
538 ARMNImm* i = LibVEX_Alloc_inline(sizeof(ARMNImm));
539 i->type = type;
540 i->imm8 = imm8;
541 return i;
544 ULong ARMNImm_to_Imm64 ( ARMNImm* imm ) {
545 int i, j;
546 ULong y, x = imm->imm8;
547 switch (imm->type) {
548 case 3:
549 x = x << 8; /* fallthrough */
550 case 2:
551 x = x << 8; /* fallthrough */
552 case 1:
553 x = x << 8; /* fallthrough */
554 case 0:
555 return (x << 32) | x;
556 case 5:
557 case 6:
558 if (imm->type == 5)
559 x = x << 8;
560 else
561 x = (x << 8) | x;
562 /* fallthrough */
563 case 4:
564 x = (x << 16) | x;
565 return (x << 32) | x;
566 case 8:
567 x = (x << 8) | 0xFF;
568 /* fallthrough */
569 case 7:
570 x = (x << 8) | 0xFF;
571 return (x << 32) | x;
572 case 9:
573 x = 0;
574 for (i = 7; i >= 0; i--) {
575 y = ((ULong)imm->imm8 >> i) & 1;
576 for (j = 0; j < 8; j++) {
577 x = (x << 1) | y;
580 return x;
581 case 10:
582 x |= (x & 0x80) << 5;
583 x |= (~x & 0x40) << 5;
584 x &= 0x187F; /* 0001 1000 0111 1111 */
585 x |= (x & 0x40) << 4;
586 x |= (x & 0x40) << 3;
587 x |= (x & 0x40) << 2;
588 x |= (x & 0x40) << 1;
589 x = x << 19;
590 x = (x << 32) | x;
591 return x;
592 default:
593 vpanic("ARMNImm_to_Imm64");
597 ARMNImm* Imm64_to_ARMNImm ( ULong x ) {
598 ARMNImm tmp;
599 if ((x & 0xFFFFFFFF) == (x >> 32)) {
600 if ((x & 0xFFFFFF00) == 0)
601 return ARMNImm_TI(0, x & 0xFF);
602 if ((x & 0xFFFF00FF) == 0)
603 return ARMNImm_TI(1, (x >> 8) & 0xFF);
604 if ((x & 0xFF00FFFF) == 0)
605 return ARMNImm_TI(2, (x >> 16) & 0xFF);
606 if ((x & 0x00FFFFFF) == 0)
607 return ARMNImm_TI(3, (x >> 24) & 0xFF);
608 if ((x & 0xFFFF00FF) == 0xFF)
609 return ARMNImm_TI(7, (x >> 8) & 0xFF);
610 if ((x & 0xFF00FFFF) == 0xFFFF)
611 return ARMNImm_TI(8, (x >> 16) & 0xFF);
612 if ((x & 0xFFFF) == ((x >> 16) & 0xFFFF)) {
613 if ((x & 0xFF00) == 0)
614 return ARMNImm_TI(4, x & 0xFF);
615 if ((x & 0x00FF) == 0)
616 return ARMNImm_TI(5, (x >> 8) & 0xFF);
617 if ((x & 0xFF) == ((x >> 8) & 0xFF))
618 return ARMNImm_TI(6, x & 0xFF);
620 if ((x & 0x7FFFF) == 0) {
621 tmp.type = 10;
622 tmp.imm8 = ((x >> 19) & 0x7F) | ((x >> 24) & 0x80);
623 if (ARMNImm_to_Imm64(&tmp) == x)
624 return ARMNImm_TI(tmp.type, tmp.imm8);
626 } else {
627 /* This can only be type 9. */
628 tmp.imm8 = (((x >> 56) & 1) << 7)
629 | (((x >> 48) & 1) << 6)
630 | (((x >> 40) & 1) << 5)
631 | (((x >> 32) & 1) << 4)
632 | (((x >> 24) & 1) << 3)
633 | (((x >> 16) & 1) << 2)
634 | (((x >> 8) & 1) << 1)
635 | (((x >> 0) & 1) << 0);
636 tmp.type = 9;
637 if (ARMNImm_to_Imm64 (&tmp) == x)
638 return ARMNImm_TI(tmp.type, tmp.imm8);
640 return NULL;
643 void ppARMNImm (ARMNImm* i) {
644 ULong x = ARMNImm_to_Imm64(i);
645 vex_printf("0x%llX%llX", x, x);
648 /* -- Register or scalar operand --- */
650 ARMNRS* mkARMNRS(ARMNRS_tag tag, HReg reg, UInt index)
652 ARMNRS *p = LibVEX_Alloc_inline(sizeof(ARMNRS));
653 p->tag = tag;
654 p->reg = reg;
655 p->index = index;
656 return p;
659 void ppARMNRS(ARMNRS *p)
661 ppHRegARM(p->reg);
662 if (p->tag == ARMNRS_Scalar) {
663 vex_printf("[%u]", p->index);
667 /* --------- Instructions. --------- */
669 const HChar* showARMAluOp ( ARMAluOp op ) {
670 switch (op) {
671 case ARMalu_ADD: return "add";
672 case ARMalu_ADDS: return "adds";
673 case ARMalu_ADC: return "adc";
674 case ARMalu_SUB: return "sub";
675 case ARMalu_SUBS: return "subs";
676 case ARMalu_SBC: return "sbc";
677 case ARMalu_AND: return "and";
678 case ARMalu_BIC: return "bic";
679 case ARMalu_OR: return "orr";
680 case ARMalu_XOR: return "xor";
681 default: vpanic("showARMAluOp");
685 const HChar* showARMShiftOp ( ARMShiftOp op ) {
686 switch (op) {
687 case ARMsh_SHL: return "shl";
688 case ARMsh_SHR: return "shr";
689 case ARMsh_SAR: return "sar";
690 default: vpanic("showARMShiftOp");
694 const HChar* showARMUnaryOp ( ARMUnaryOp op ) {
695 switch (op) {
696 case ARMun_NEG: return "neg";
697 case ARMun_NOT: return "not";
698 case ARMun_CLZ: return "clz";
699 default: vpanic("showARMUnaryOp");
703 const HChar* showARMMulOp ( ARMMulOp op ) {
704 switch (op) {
705 case ARMmul_PLAIN: return "mul";
706 case ARMmul_ZX: return "umull";
707 case ARMmul_SX: return "smull";
708 default: vpanic("showARMMulOp");
712 const HChar* showARMVfpOp ( ARMVfpOp op ) {
713 switch (op) {
714 case ARMvfp_ADD: return "add";
715 case ARMvfp_SUB: return "sub";
716 case ARMvfp_MUL: return "mul";
717 case ARMvfp_DIV: return "div";
718 default: vpanic("showARMVfpOp");
722 const HChar* showARMVfpUnaryOp ( ARMVfpUnaryOp op ) {
723 switch (op) {
724 case ARMvfpu_COPY: return "cpy";
725 case ARMvfpu_NEG: return "neg";
726 case ARMvfpu_ABS: return "abs";
727 case ARMvfpu_SQRT: return "sqrt";
728 default: vpanic("showARMVfpUnaryOp");
732 const HChar* showARMNeonBinOp ( ARMNeonBinOp op ) {
733 switch (op) {
734 case ARMneon_VAND: return "vand";
735 case ARMneon_VORR: return "vorr";
736 case ARMneon_VXOR: return "veor";
737 case ARMneon_VADD: return "vadd";
738 case ARMneon_VRHADDS: return "vrhadd";
739 case ARMneon_VRHADDU: return "vrhadd";
740 case ARMneon_VADDFP: return "vadd";
741 case ARMneon_VPADDFP: return "vpadd";
742 case ARMneon_VABDFP: return "vabd";
743 case ARMneon_VSUB: return "vsub";
744 case ARMneon_VSUBFP: return "vsub";
745 case ARMneon_VMINU: return "vmin";
746 case ARMneon_VMINS: return "vmin";
747 case ARMneon_VMINF: return "vmin";
748 case ARMneon_VMAXU: return "vmax";
749 case ARMneon_VMAXS: return "vmax";
750 case ARMneon_VMAXF: return "vmax";
751 case ARMneon_VQADDU: return "vqadd";
752 case ARMneon_VQADDS: return "vqadd";
753 case ARMneon_VQSUBU: return "vqsub";
754 case ARMneon_VQSUBS: return "vqsub";
755 case ARMneon_VCGTU: return "vcgt";
756 case ARMneon_VCGTS: return "vcgt";
757 case ARMneon_VCGTF: return "vcgt";
758 case ARMneon_VCGEF: return "vcgt";
759 case ARMneon_VCGEU: return "vcge";
760 case ARMneon_VCGES: return "vcge";
761 case ARMneon_VCEQ: return "vceq";
762 case ARMneon_VCEQF: return "vceq";
763 case ARMneon_VPADD: return "vpadd";
764 case ARMneon_VPMINU: return "vpmin";
765 case ARMneon_VPMINS: return "vpmin";
766 case ARMneon_VPMINF: return "vpmin";
767 case ARMneon_VPMAXU: return "vpmax";
768 case ARMneon_VPMAXS: return "vpmax";
769 case ARMneon_VPMAXF: return "vpmax";
770 case ARMneon_VEXT: return "vext";
771 case ARMneon_VMUL: return "vmuli";
772 case ARMneon_VMULLU: return "vmull";
773 case ARMneon_VMULLS: return "vmull";
774 case ARMneon_VMULP: return "vmul";
775 case ARMneon_VMULFP: return "vmul";
776 case ARMneon_VMULLP: return "vmul";
777 case ARMneon_VQDMULH: return "vqdmulh";
778 case ARMneon_VQRDMULH: return "vqrdmulh";
779 case ARMneon_VQDMULL: return "vqdmull";
780 case ARMneon_VTBL: return "vtbl";
781 case ARMneon_VRECPS: return "vrecps";
782 case ARMneon_VRSQRTS: return "vrecps";
783 case ARMneon_INVALID: return "??invalid??";
784 /* ... */
785 default: vpanic("showARMNeonBinOp");
789 const HChar* showARMNeonBinOpDataType ( ARMNeonBinOp op ) {
790 switch (op) {
791 case ARMneon_VAND:
792 case ARMneon_VORR:
793 case ARMneon_VXOR:
794 return "";
795 case ARMneon_VADD:
796 case ARMneon_VSUB:
797 case ARMneon_VEXT:
798 case ARMneon_VMUL:
799 case ARMneon_VPADD:
800 case ARMneon_VTBL:
801 case ARMneon_VCEQ:
802 return ".i";
803 case ARMneon_VRHADDU:
804 case ARMneon_VMINU:
805 case ARMneon_VMAXU:
806 case ARMneon_VQADDU:
807 case ARMneon_VQSUBU:
808 case ARMneon_VCGTU:
809 case ARMneon_VCGEU:
810 case ARMneon_VMULLU:
811 case ARMneon_VPMINU:
812 case ARMneon_VPMAXU:
813 return ".u";
814 case ARMneon_VRHADDS:
815 case ARMneon_VMINS:
816 case ARMneon_VMAXS:
817 case ARMneon_VQADDS:
818 case ARMneon_VQSUBS:
819 case ARMneon_VCGTS:
820 case ARMneon_VCGES:
821 case ARMneon_VQDMULL:
822 case ARMneon_VMULLS:
823 case ARMneon_VPMINS:
824 case ARMneon_VPMAXS:
825 case ARMneon_VQDMULH:
826 case ARMneon_VQRDMULH:
827 return ".s";
828 case ARMneon_VMULP:
829 case ARMneon_VMULLP:
830 return ".p";
831 case ARMneon_VADDFP:
832 case ARMneon_VABDFP:
833 case ARMneon_VPADDFP:
834 case ARMneon_VSUBFP:
835 case ARMneon_VMULFP:
836 case ARMneon_VMINF:
837 case ARMneon_VMAXF:
838 case ARMneon_VPMINF:
839 case ARMneon_VPMAXF:
840 case ARMneon_VCGTF:
841 case ARMneon_VCGEF:
842 case ARMneon_VCEQF:
843 case ARMneon_VRECPS:
844 case ARMneon_VRSQRTS:
845 return ".f";
846 /* ... */
847 default: vpanic("showARMNeonBinOpDataType");
851 const HChar* showARMNeonUnOp ( ARMNeonUnOp op ) {
852 switch (op) {
853 case ARMneon_COPY: return "vmov";
854 case ARMneon_COPYLS: return "vmov";
855 case ARMneon_COPYLU: return "vmov";
856 case ARMneon_COPYN: return "vmov";
857 case ARMneon_COPYQNSS: return "vqmovn";
858 case ARMneon_COPYQNUS: return "vqmovun";
859 case ARMneon_COPYQNUU: return "vqmovn";
860 case ARMneon_NOT: return "vmvn";
861 case ARMneon_EQZ: return "vceq";
862 case ARMneon_CNT: return "vcnt";
863 case ARMneon_CLS: return "vcls";
864 case ARMneon_CLZ: return "vclz";
865 case ARMneon_DUP: return "vdup";
866 case ARMneon_PADDLS: return "vpaddl";
867 case ARMneon_PADDLU: return "vpaddl";
868 case ARMneon_VQSHLNSS: return "vqshl";
869 case ARMneon_VQSHLNUU: return "vqshl";
870 case ARMneon_VQSHLNUS: return "vqshlu";
871 case ARMneon_REV16: return "vrev16";
872 case ARMneon_REV32: return "vrev32";
873 case ARMneon_REV64: return "vrev64";
874 case ARMneon_VCVTFtoU: return "vcvt";
875 case ARMneon_VCVTFtoS: return "vcvt";
876 case ARMneon_VCVTUtoF: return "vcvt";
877 case ARMneon_VCVTStoF: return "vcvt";
878 case ARMneon_VCVTFtoFixedU: return "vcvt";
879 case ARMneon_VCVTFtoFixedS: return "vcvt";
880 case ARMneon_VCVTFixedUtoF: return "vcvt";
881 case ARMneon_VCVTFixedStoF: return "vcvt";
882 case ARMneon_VCVTF32toF16: return "vcvt";
883 case ARMneon_VCVTF16toF32: return "vcvt";
884 case ARMneon_VRECIP: return "vrecip";
885 case ARMneon_VRECIPF: return "vrecipf";
886 case ARMneon_VNEGF: return "vneg";
887 case ARMneon_ABS: return "vabs";
888 case ARMneon_VABSFP: return "vabsfp";
889 case ARMneon_VRSQRTEFP: return "vrsqrtefp";
890 case ARMneon_VRSQRTE: return "vrsqrte";
891 /* ... */
892 default: vpanic("showARMNeonUnOp");
896 const HChar* showARMNeonUnOpDataType ( ARMNeonUnOp op ) {
897 switch (op) {
898 case ARMneon_COPY:
899 case ARMneon_NOT:
900 return "";
901 case ARMneon_COPYN:
902 case ARMneon_EQZ:
903 case ARMneon_CNT:
904 case ARMneon_DUP:
905 case ARMneon_REV16:
906 case ARMneon_REV32:
907 case ARMneon_REV64:
908 return ".i";
909 case ARMneon_COPYLU:
910 case ARMneon_PADDLU:
911 case ARMneon_COPYQNUU:
912 case ARMneon_VQSHLNUU:
913 case ARMneon_VRECIP:
914 case ARMneon_VRSQRTE:
915 return ".u";
916 case ARMneon_CLS:
917 case ARMneon_CLZ:
918 case ARMneon_COPYLS:
919 case ARMneon_PADDLS:
920 case ARMneon_COPYQNSS:
921 case ARMneon_COPYQNUS:
922 case ARMneon_VQSHLNSS:
923 case ARMneon_VQSHLNUS:
924 case ARMneon_ABS:
925 return ".s";
926 case ARMneon_VRECIPF:
927 case ARMneon_VNEGF:
928 case ARMneon_VABSFP:
929 case ARMneon_VRSQRTEFP:
930 return ".f";
931 case ARMneon_VCVTFtoU: return ".u32.f32";
932 case ARMneon_VCVTFtoS: return ".s32.f32";
933 case ARMneon_VCVTUtoF: return ".f32.u32";
934 case ARMneon_VCVTStoF: return ".f32.s32";
935 case ARMneon_VCVTF16toF32: return ".f32.f16";
936 case ARMneon_VCVTF32toF16: return ".f16.f32";
937 case ARMneon_VCVTFtoFixedU: return ".u32.f32";
938 case ARMneon_VCVTFtoFixedS: return ".s32.f32";
939 case ARMneon_VCVTFixedUtoF: return ".f32.u32";
940 case ARMneon_VCVTFixedStoF: return ".f32.s32";
941 /* ... */
942 default: vpanic("showARMNeonUnOpDataType");
946 const HChar* showARMNeonUnOpS ( ARMNeonUnOpS op ) {
947 switch (op) {
948 case ARMneon_SETELEM: return "vmov";
949 case ARMneon_GETELEMU: return "vmov";
950 case ARMneon_GETELEMS: return "vmov";
951 case ARMneon_VDUP: return "vdup";
952 /* ... */
953 default: vpanic("showARMNeonUnarySOp");
957 const HChar* showARMNeonUnOpSDataType ( ARMNeonUnOpS op ) {
958 switch (op) {
959 case ARMneon_SETELEM:
960 case ARMneon_VDUP:
961 return ".i";
962 case ARMneon_GETELEMS:
963 return ".s";
964 case ARMneon_GETELEMU:
965 return ".u";
966 /* ... */
967 default: vpanic("showARMNeonUnarySOp");
971 const HChar* showARMNeonShiftOp ( ARMNeonShiftOp op ) {
972 switch (op) {
973 case ARMneon_VSHL: return "vshl";
974 case ARMneon_VSAL: return "vshl";
975 case ARMneon_VQSHL: return "vqshl";
976 case ARMneon_VQSAL: return "vqshl";
977 /* ... */
978 default: vpanic("showARMNeonShiftOp");
982 const HChar* showARMNeonShiftOpDataType ( ARMNeonShiftOp op ) {
983 switch (op) {
984 case ARMneon_VSHL:
985 case ARMneon_VQSHL:
986 return ".u";
987 case ARMneon_VSAL:
988 case ARMneon_VQSAL:
989 return ".s";
990 /* ... */
991 default: vpanic("showARMNeonShiftOpDataType");
995 const HChar* showARMNeonDualOp ( ARMNeonDualOp op ) {
996 switch (op) {
997 case ARMneon_TRN: return "vtrn";
998 case ARMneon_ZIP: return "vzip";
999 case ARMneon_UZP: return "vuzp";
1000 /* ... */
1001 default: vpanic("showARMNeonDualOp");
1005 const HChar* showARMNeonDualOpDataType ( ARMNeonDualOp op ) {
1006 switch (op) {
1007 case ARMneon_TRN:
1008 case ARMneon_ZIP:
1009 case ARMneon_UZP:
1010 return "i";
1011 /* ... */
1012 default: vpanic("showARMNeonDualOp");
1016 static const HChar* showARMNeonDataSize_wrk ( UInt size )
1018 switch (size) {
1019 case 0: return "8";
1020 case 1: return "16";
1021 case 2: return "32";
1022 case 3: return "64";
1023 default: vpanic("showARMNeonDataSize");
1027 static const HChar* showARMNeonDataSize ( const ARMInstr* i )
1029 switch (i->tag) {
1030 case ARMin_NBinary:
1031 if (i->ARMin.NBinary.op == ARMneon_VEXT)
1032 return "8";
1033 if (i->ARMin.NBinary.op == ARMneon_VAND ||
1034 i->ARMin.NBinary.op == ARMneon_VORR ||
1035 i->ARMin.NBinary.op == ARMneon_VXOR)
1036 return "";
1037 return showARMNeonDataSize_wrk(i->ARMin.NBinary.size);
1038 case ARMin_NUnary:
1039 if (i->ARMin.NUnary.op == ARMneon_COPY ||
1040 i->ARMin.NUnary.op == ARMneon_NOT ||
1041 i->ARMin.NUnary.op == ARMneon_VCVTF32toF16||
1042 i->ARMin.NUnary.op == ARMneon_VCVTF16toF32||
1043 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1044 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1045 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1046 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF ||
1047 i->ARMin.NUnary.op == ARMneon_VCVTFtoS ||
1048 i->ARMin.NUnary.op == ARMneon_VCVTFtoU ||
1049 i->ARMin.NUnary.op == ARMneon_VCVTStoF ||
1050 i->ARMin.NUnary.op == ARMneon_VCVTUtoF)
1051 return "";
1052 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1053 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1054 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1055 UInt size;
1056 size = i->ARMin.NUnary.size;
1057 if (size & 0x40)
1058 return "64";
1059 if (size & 0x20)
1060 return "32";
1061 if (size & 0x10)
1062 return "16";
1063 if (size & 0x08)
1064 return "8";
1065 vpanic("showARMNeonDataSize");
1067 return showARMNeonDataSize_wrk(i->ARMin.NUnary.size);
1068 case ARMin_NUnaryS:
1069 if (i->ARMin.NUnaryS.op == ARMneon_VDUP) {
1070 int size;
1071 size = i->ARMin.NUnaryS.size;
1072 if ((size & 1) == 1)
1073 return "8";
1074 if ((size & 3) == 2)
1075 return "16";
1076 if ((size & 7) == 4)
1077 return "32";
1078 vpanic("showARMNeonDataSize");
1080 return showARMNeonDataSize_wrk(i->ARMin.NUnaryS.size);
1081 case ARMin_NShift:
1082 return showARMNeonDataSize_wrk(i->ARMin.NShift.size);
1083 case ARMin_NDual:
1084 return showARMNeonDataSize_wrk(i->ARMin.NDual.size);
1085 default:
1086 vpanic("showARMNeonDataSize");
1090 ARMInstr* ARMInstr_Alu ( ARMAluOp op,
1091 HReg dst, HReg argL, ARMRI84* argR ) {
1092 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1093 i->tag = ARMin_Alu;
1094 i->ARMin.Alu.op = op;
1095 i->ARMin.Alu.dst = dst;
1096 i->ARMin.Alu.argL = argL;
1097 i->ARMin.Alu.argR = argR;
1098 return i;
1100 ARMInstr* ARMInstr_Shift ( ARMShiftOp op,
1101 HReg dst, HReg argL, ARMRI5* argR ) {
1102 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1103 i->tag = ARMin_Shift;
1104 i->ARMin.Shift.op = op;
1105 i->ARMin.Shift.dst = dst;
1106 i->ARMin.Shift.argL = argL;
1107 i->ARMin.Shift.argR = argR;
1108 return i;
1110 ARMInstr* ARMInstr_Unary ( ARMUnaryOp op, HReg dst, HReg src ) {
1111 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1112 i->tag = ARMin_Unary;
1113 i->ARMin.Unary.op = op;
1114 i->ARMin.Unary.dst = dst;
1115 i->ARMin.Unary.src = src;
1116 return i;
1118 ARMInstr* ARMInstr_CmpOrTst ( Bool isCmp, HReg argL, ARMRI84* argR ) {
1119 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1120 i->tag = ARMin_CmpOrTst;
1121 i->ARMin.CmpOrTst.isCmp = isCmp;
1122 i->ARMin.CmpOrTst.argL = argL;
1123 i->ARMin.CmpOrTst.argR = argR;
1124 return i;
1126 ARMInstr* ARMInstr_Mov ( HReg dst, ARMRI84* src ) {
1127 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1128 i->tag = ARMin_Mov;
1129 i->ARMin.Mov.dst = dst;
1130 i->ARMin.Mov.src = src;
1131 return i;
1133 ARMInstr* ARMInstr_Imm32 ( HReg dst, UInt imm32 ) {
1134 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1135 i->tag = ARMin_Imm32;
1136 i->ARMin.Imm32.dst = dst;
1137 i->ARMin.Imm32.imm32 = imm32;
1138 return i;
1140 ARMInstr* ARMInstr_LdSt32 ( ARMCondCode cc,
1141 Bool isLoad, HReg rD, ARMAMode1* amode ) {
1142 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1143 i->tag = ARMin_LdSt32;
1144 i->ARMin.LdSt32.cc = cc;
1145 i->ARMin.LdSt32.isLoad = isLoad;
1146 i->ARMin.LdSt32.rD = rD;
1147 i->ARMin.LdSt32.amode = amode;
1148 vassert(cc != ARMcc_NV);
1149 return i;
1151 ARMInstr* ARMInstr_LdSt16 ( ARMCondCode cc,
1152 Bool isLoad, Bool signedLoad,
1153 HReg rD, ARMAMode2* amode ) {
1154 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1155 i->tag = ARMin_LdSt16;
1156 i->ARMin.LdSt16.cc = cc;
1157 i->ARMin.LdSt16.isLoad = isLoad;
1158 i->ARMin.LdSt16.signedLoad = signedLoad;
1159 i->ARMin.LdSt16.rD = rD;
1160 i->ARMin.LdSt16.amode = amode;
1161 vassert(cc != ARMcc_NV);
1162 return i;
1164 ARMInstr* ARMInstr_LdSt8U ( ARMCondCode cc,
1165 Bool isLoad, HReg rD, ARMAMode1* amode ) {
1166 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1167 i->tag = ARMin_LdSt8U;
1168 i->ARMin.LdSt8U.cc = cc;
1169 i->ARMin.LdSt8U.isLoad = isLoad;
1170 i->ARMin.LdSt8U.rD = rD;
1171 i->ARMin.LdSt8U.amode = amode;
1172 vassert(cc != ARMcc_NV);
1173 return i;
1175 ARMInstr* ARMInstr_Ld8S ( ARMCondCode cc, HReg rD, ARMAMode2* amode ) {
1176 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1177 i->tag = ARMin_Ld8S;
1178 i->ARMin.Ld8S.cc = cc;
1179 i->ARMin.Ld8S.rD = rD;
1180 i->ARMin.Ld8S.amode = amode;
1181 vassert(cc != ARMcc_NV);
1182 return i;
1184 ARMInstr* ARMInstr_XDirect ( Addr32 dstGA, ARMAMode1* amR15T,
1185 ARMCondCode cond, Bool toFastEP ) {
1186 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1187 i->tag = ARMin_XDirect;
1188 i->ARMin.XDirect.dstGA = dstGA;
1189 i->ARMin.XDirect.amR15T = amR15T;
1190 i->ARMin.XDirect.cond = cond;
1191 i->ARMin.XDirect.toFastEP = toFastEP;
1192 return i;
1194 ARMInstr* ARMInstr_XIndir ( HReg dstGA, ARMAMode1* amR15T,
1195 ARMCondCode cond ) {
1196 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1197 i->tag = ARMin_XIndir;
1198 i->ARMin.XIndir.dstGA = dstGA;
1199 i->ARMin.XIndir.amR15T = amR15T;
1200 i->ARMin.XIndir.cond = cond;
1201 return i;
1203 ARMInstr* ARMInstr_XAssisted ( HReg dstGA, ARMAMode1* amR15T,
1204 ARMCondCode cond, IRJumpKind jk ) {
1205 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1206 i->tag = ARMin_XAssisted;
1207 i->ARMin.XAssisted.dstGA = dstGA;
1208 i->ARMin.XAssisted.amR15T = amR15T;
1209 i->ARMin.XAssisted.cond = cond;
1210 i->ARMin.XAssisted.jk = jk;
1211 return i;
1213 ARMInstr* ARMInstr_CMov ( ARMCondCode cond, HReg dst, ARMRI84* src ) {
1214 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1215 i->tag = ARMin_CMov;
1216 i->ARMin.CMov.cond = cond;
1217 i->ARMin.CMov.dst = dst;
1218 i->ARMin.CMov.src = src;
1219 vassert(cond != ARMcc_AL);
1220 return i;
1222 ARMInstr* ARMInstr_Call ( ARMCondCode cond, Addr32 target, Int nArgRegs,
1223 RetLoc rloc ) {
1224 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1225 i->tag = ARMin_Call;
1226 i->ARMin.Call.cond = cond;
1227 i->ARMin.Call.target = target;
1228 i->ARMin.Call.nArgRegs = nArgRegs;
1229 i->ARMin.Call.rloc = rloc;
1230 vassert(is_sane_RetLoc(rloc));
1231 return i;
1233 ARMInstr* ARMInstr_Mul ( ARMMulOp op ) {
1234 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1235 i->tag = ARMin_Mul;
1236 i->ARMin.Mul.op = op;
1237 return i;
1239 ARMInstr* ARMInstr_LdrEX ( Int szB ) {
1240 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1241 i->tag = ARMin_LdrEX;
1242 i->ARMin.LdrEX.szB = szB;
1243 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1244 return i;
1246 ARMInstr* ARMInstr_StrEX ( Int szB ) {
1247 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1248 i->tag = ARMin_StrEX;
1249 i->ARMin.StrEX.szB = szB;
1250 vassert(szB == 8 || szB == 4 || szB == 2 || szB == 1);
1251 return i;
1253 ARMInstr* ARMInstr_VLdStD ( Bool isLoad, HReg dD, ARMAModeV* am ) {
1254 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1255 i->tag = ARMin_VLdStD;
1256 i->ARMin.VLdStD.isLoad = isLoad;
1257 i->ARMin.VLdStD.dD = dD;
1258 i->ARMin.VLdStD.amode = am;
1259 return i;
1261 ARMInstr* ARMInstr_VLdStS ( Bool isLoad, HReg fD, ARMAModeV* am ) {
1262 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1263 i->tag = ARMin_VLdStS;
1264 i->ARMin.VLdStS.isLoad = isLoad;
1265 i->ARMin.VLdStS.fD = fD;
1266 i->ARMin.VLdStS.amode = am;
1267 return i;
1269 ARMInstr* ARMInstr_VAluD ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1270 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1271 i->tag = ARMin_VAluD;
1272 i->ARMin.VAluD.op = op;
1273 i->ARMin.VAluD.dst = dst;
1274 i->ARMin.VAluD.argL = argL;
1275 i->ARMin.VAluD.argR = argR;
1276 return i;
1278 ARMInstr* ARMInstr_VAluS ( ARMVfpOp op, HReg dst, HReg argL, HReg argR ) {
1279 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1280 i->tag = ARMin_VAluS;
1281 i->ARMin.VAluS.op = op;
1282 i->ARMin.VAluS.dst = dst;
1283 i->ARMin.VAluS.argL = argL;
1284 i->ARMin.VAluS.argR = argR;
1285 return i;
1287 ARMInstr* ARMInstr_VUnaryD ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1288 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1289 i->tag = ARMin_VUnaryD;
1290 i->ARMin.VUnaryD.op = op;
1291 i->ARMin.VUnaryD.dst = dst;
1292 i->ARMin.VUnaryD.src = src;
1293 return i;
1295 ARMInstr* ARMInstr_VUnaryS ( ARMVfpUnaryOp op, HReg dst, HReg src ) {
1296 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1297 i->tag = ARMin_VUnaryS;
1298 i->ARMin.VUnaryS.op = op;
1299 i->ARMin.VUnaryS.dst = dst;
1300 i->ARMin.VUnaryS.src = src;
1301 return i;
1303 ARMInstr* ARMInstr_VCmpD ( HReg argL, HReg argR ) {
1304 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1305 i->tag = ARMin_VCmpD;
1306 i->ARMin.VCmpD.argL = argL;
1307 i->ARMin.VCmpD.argR = argR;
1308 return i;
1310 ARMInstr* ARMInstr_VCMovD ( ARMCondCode cond, HReg dst, HReg src ) {
1311 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1312 i->tag = ARMin_VCMovD;
1313 i->ARMin.VCMovD.cond = cond;
1314 i->ARMin.VCMovD.dst = dst;
1315 i->ARMin.VCMovD.src = src;
1316 vassert(cond != ARMcc_AL);
1317 return i;
1319 ARMInstr* ARMInstr_VCMovS ( ARMCondCode cond, HReg dst, HReg src ) {
1320 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1321 i->tag = ARMin_VCMovS;
1322 i->ARMin.VCMovS.cond = cond;
1323 i->ARMin.VCMovS.dst = dst;
1324 i->ARMin.VCMovS.src = src;
1325 vassert(cond != ARMcc_AL);
1326 return i;
1328 ARMInstr* ARMInstr_VCvtSD ( Bool sToD, HReg dst, HReg src ) {
1329 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1330 i->tag = ARMin_VCvtSD;
1331 i->ARMin.VCvtSD.sToD = sToD;
1332 i->ARMin.VCvtSD.dst = dst;
1333 i->ARMin.VCvtSD.src = src;
1334 return i;
1336 ARMInstr* ARMInstr_VXferQ ( Bool toQ, HReg qD, HReg dHi, HReg dLo ) {
1337 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1338 i->tag = ARMin_VXferQ;
1339 i->ARMin.VXferQ.toQ = toQ;
1340 i->ARMin.VXferQ.qD = qD;
1341 i->ARMin.VXferQ.dHi = dHi;
1342 i->ARMin.VXferQ.dLo = dLo;
1343 return i;
1345 ARMInstr* ARMInstr_VXferD ( Bool toD, HReg dD, HReg rHi, HReg rLo ) {
1346 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1347 i->tag = ARMin_VXferD;
1348 i->ARMin.VXferD.toD = toD;
1349 i->ARMin.VXferD.dD = dD;
1350 i->ARMin.VXferD.rHi = rHi;
1351 i->ARMin.VXferD.rLo = rLo;
1352 return i;
1354 ARMInstr* ARMInstr_VXferS ( Bool toS, HReg fD, HReg rLo ) {
1355 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1356 i->tag = ARMin_VXferS;
1357 i->ARMin.VXferS.toS = toS;
1358 i->ARMin.VXferS.fD = fD;
1359 i->ARMin.VXferS.rLo = rLo;
1360 return i;
1362 ARMInstr* ARMInstr_VCvtID ( Bool iToD, Bool syned,
1363 HReg dst, HReg src ) {
1364 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1365 i->tag = ARMin_VCvtID;
1366 i->ARMin.VCvtID.iToD = iToD;
1367 i->ARMin.VCvtID.syned = syned;
1368 i->ARMin.VCvtID.dst = dst;
1369 i->ARMin.VCvtID.src = src;
1370 return i;
1372 ARMInstr* ARMInstr_VRIntR ( Bool isF64, HReg dst, HReg src )
1374 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1375 i->tag = ARMin_VRIntR;
1376 i->ARMin.VRIntR.isF64 = isF64;
1377 i->ARMin.VRIntR.dst = dst ;
1378 i->ARMin.VRIntR.src = src;
1379 return i;
1381 ARMInstr* ARMInstr_VMinMaxNum ( Bool isF64, Bool isMax,
1382 HReg dst, HReg srcL, HReg srcR )
1384 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1385 i->tag = ARMin_VMinMaxNum;
1386 i->ARMin.VMinMaxNum.isF64 = isF64;
1387 i->ARMin.VMinMaxNum.isMax = isMax;
1388 i->ARMin.VMinMaxNum.dst = dst ;
1389 i->ARMin.VMinMaxNum.srcL = srcL;
1390 i->ARMin.VMinMaxNum.srcR = srcR;
1391 return i;
1393 ARMInstr* ARMInstr_FPSCR ( Bool toFPSCR, HReg iReg ) {
1394 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1395 i->tag = ARMin_FPSCR;
1396 i->ARMin.FPSCR.toFPSCR = toFPSCR;
1397 i->ARMin.FPSCR.iReg = iReg;
1398 return i;
1400 ARMInstr* ARMInstr_MFence ( void ) {
1401 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1402 i->tag = ARMin_MFence;
1403 return i;
1405 ARMInstr* ARMInstr_CLREX( void ) {
1406 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1407 i->tag = ARMin_CLREX;
1408 return i;
1411 ARMInstr* ARMInstr_NLdStQ ( Bool isLoad, HReg dQ, ARMAModeN *amode ) {
1412 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1413 i->tag = ARMin_NLdStQ;
1414 i->ARMin.NLdStQ.isLoad = isLoad;
1415 i->ARMin.NLdStQ.dQ = dQ;
1416 i->ARMin.NLdStQ.amode = amode;
1417 return i;
1420 ARMInstr* ARMInstr_NLdStD ( Bool isLoad, HReg dD, ARMAModeN *amode ) {
1421 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1422 i->tag = ARMin_NLdStD;
1423 i->ARMin.NLdStD.isLoad = isLoad;
1424 i->ARMin.NLdStD.dD = dD;
1425 i->ARMin.NLdStD.amode = amode;
1426 return i;
1429 ARMInstr* ARMInstr_NUnary ( ARMNeonUnOp op, HReg dQ, HReg nQ,
1430 UInt size, Bool Q ) {
1431 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1432 i->tag = ARMin_NUnary;
1433 i->ARMin.NUnary.op = op;
1434 i->ARMin.NUnary.src = nQ;
1435 i->ARMin.NUnary.dst = dQ;
1436 i->ARMin.NUnary.size = size;
1437 i->ARMin.NUnary.Q = Q;
1438 return i;
1441 ARMInstr* ARMInstr_NUnaryS ( ARMNeonUnOpS op, ARMNRS* dst, ARMNRS* src,
1442 UInt size, Bool Q ) {
1443 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1444 i->tag = ARMin_NUnaryS;
1445 i->ARMin.NUnaryS.op = op;
1446 i->ARMin.NUnaryS.src = src;
1447 i->ARMin.NUnaryS.dst = dst;
1448 i->ARMin.NUnaryS.size = size;
1449 i->ARMin.NUnaryS.Q = Q;
1450 return i;
1453 ARMInstr* ARMInstr_NDual ( ARMNeonDualOp op, HReg nQ, HReg mQ,
1454 UInt size, Bool Q ) {
1455 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1456 i->tag = ARMin_NDual;
1457 i->ARMin.NDual.op = op;
1458 i->ARMin.NDual.arg1 = nQ;
1459 i->ARMin.NDual.arg2 = mQ;
1460 i->ARMin.NDual.size = size;
1461 i->ARMin.NDual.Q = Q;
1462 return i;
1465 ARMInstr* ARMInstr_NBinary ( ARMNeonBinOp op,
1466 HReg dst, HReg argL, HReg argR,
1467 UInt size, Bool Q ) {
1468 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1469 i->tag = ARMin_NBinary;
1470 i->ARMin.NBinary.op = op;
1471 i->ARMin.NBinary.argL = argL;
1472 i->ARMin.NBinary.argR = argR;
1473 i->ARMin.NBinary.dst = dst;
1474 i->ARMin.NBinary.size = size;
1475 i->ARMin.NBinary.Q = Q;
1476 return i;
1479 ARMInstr* ARMInstr_NeonImm (HReg dst, ARMNImm* imm ) {
1480 ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1481 i->tag = ARMin_NeonImm;
1482 i->ARMin.NeonImm.dst = dst;
1483 i->ARMin.NeonImm.imm = imm;
1484 return i;
1487 ARMInstr* ARMInstr_NCMovQ ( ARMCondCode cond, HReg dst, HReg src ) {
1488 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1489 i->tag = ARMin_NCMovQ;
1490 i->ARMin.NCMovQ.cond = cond;
1491 i->ARMin.NCMovQ.dst = dst;
1492 i->ARMin.NCMovQ.src = src;
1493 vassert(cond != ARMcc_AL);
1494 return i;
1497 ARMInstr* ARMInstr_NShift ( ARMNeonShiftOp op,
1498 HReg dst, HReg argL, HReg argR,
1499 UInt size, Bool Q ) {
1500 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1501 i->tag = ARMin_NShift;
1502 i->ARMin.NShift.op = op;
1503 i->ARMin.NShift.argL = argL;
1504 i->ARMin.NShift.argR = argR;
1505 i->ARMin.NShift.dst = dst;
1506 i->ARMin.NShift.size = size;
1507 i->ARMin.NShift.Q = Q;
1508 return i;
1511 ARMInstr* ARMInstr_NShl64 ( HReg dst, HReg src, UInt amt )
1513 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1514 i->tag = ARMin_NShl64;
1515 i->ARMin.NShl64.dst = dst;
1516 i->ARMin.NShl64.src = src;
1517 i->ARMin.NShl64.amt = amt;
1518 vassert(amt >= 1 && amt <= 63);
1519 return i;
1522 /* Helper copy-pasted from isel.c */
1523 static Bool fitsIn8x4 ( UInt* u8, UInt* u4, UInt u )
1525 UInt i;
1526 for (i = 0; i < 16; i++) {
1527 if (0 == (u & 0xFFFFFF00)) {
1528 *u8 = u;
1529 *u4 = i;
1530 return True;
1532 u = ROR32(u, 30);
1534 vassert(i == 16);
1535 return False;
1538 ARMInstr* ARMInstr_Add32 ( HReg rD, HReg rN, UInt imm32 ) {
1539 UInt u8, u4;
1540 ARMInstr *i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1541 /* Try to generate single ADD if possible */
1542 if (fitsIn8x4(&u8, &u4, imm32)) {
1543 i->tag = ARMin_Alu;
1544 i->ARMin.Alu.op = ARMalu_ADD;
1545 i->ARMin.Alu.dst = rD;
1546 i->ARMin.Alu.argL = rN;
1547 i->ARMin.Alu.argR = ARMRI84_I84(u8, u4);
1548 } else {
1549 i->tag = ARMin_Add32;
1550 i->ARMin.Add32.rD = rD;
1551 i->ARMin.Add32.rN = rN;
1552 i->ARMin.Add32.imm32 = imm32;
1554 return i;
1557 ARMInstr* ARMInstr_EvCheck ( ARMAMode1* amCounter,
1558 ARMAMode1* amFailAddr ) {
1559 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1560 i->tag = ARMin_EvCheck;
1561 i->ARMin.EvCheck.amCounter = amCounter;
1562 i->ARMin.EvCheck.amFailAddr = amFailAddr;
1563 return i;
1566 ARMInstr* ARMInstr_ProfInc ( void ) {
1567 ARMInstr* i = LibVEX_Alloc_inline(sizeof(ARMInstr));
1568 i->tag = ARMin_ProfInc;
1569 return i;
1572 /* ... */
1574 void ppARMInstr ( const ARMInstr* i ) {
1575 switch (i->tag) {
1576 case ARMin_Alu:
1577 vex_printf("%-4s ", showARMAluOp(i->ARMin.Alu.op));
1578 ppHRegARM(i->ARMin.Alu.dst);
1579 vex_printf(", ");
1580 ppHRegARM(i->ARMin.Alu.argL);
1581 vex_printf(", ");
1582 ppARMRI84(i->ARMin.Alu.argR);
1583 return;
1584 case ARMin_Shift:
1585 vex_printf("%s ", showARMShiftOp(i->ARMin.Shift.op));
1586 ppHRegARM(i->ARMin.Shift.dst);
1587 vex_printf(", ");
1588 ppHRegARM(i->ARMin.Shift.argL);
1589 vex_printf(", ");
1590 ppARMRI5(i->ARMin.Shift.argR);
1591 return;
1592 case ARMin_Unary:
1593 vex_printf("%s ", showARMUnaryOp(i->ARMin.Unary.op));
1594 ppHRegARM(i->ARMin.Unary.dst);
1595 vex_printf(", ");
1596 ppHRegARM(i->ARMin.Unary.src);
1597 return;
1598 case ARMin_CmpOrTst:
1599 vex_printf("%s ", i->ARMin.CmpOrTst.isCmp ? "cmp" : "tst");
1600 ppHRegARM(i->ARMin.CmpOrTst.argL);
1601 vex_printf(", ");
1602 ppARMRI84(i->ARMin.CmpOrTst.argR);
1603 return;
1604 case ARMin_Mov:
1605 vex_printf("mov ");
1606 ppHRegARM(i->ARMin.Mov.dst);
1607 vex_printf(", ");
1608 ppARMRI84(i->ARMin.Mov.src);
1609 return;
1610 case ARMin_Imm32:
1611 vex_printf("imm ");
1612 ppHRegARM(i->ARMin.Imm32.dst);
1613 vex_printf(", 0x%x", i->ARMin.Imm32.imm32);
1614 return;
1615 case ARMin_LdSt32:
1616 if (i->ARMin.LdSt32.isLoad) {
1617 vex_printf("ldr%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? " "
1618 : showARMCondCode(i->ARMin.LdSt32.cc));
1619 ppHRegARM(i->ARMin.LdSt32.rD);
1620 vex_printf(", ");
1621 ppARMAMode1(i->ARMin.LdSt32.amode);
1622 } else {
1623 vex_printf("str%s ", i->ARMin.LdSt32.cc == ARMcc_AL ? " "
1624 : showARMCondCode(i->ARMin.LdSt32.cc));
1625 ppARMAMode1(i->ARMin.LdSt32.amode);
1626 vex_printf(", ");
1627 ppHRegARM(i->ARMin.LdSt32.rD);
1629 return;
1630 case ARMin_LdSt16:
1631 if (i->ARMin.LdSt16.isLoad) {
1632 vex_printf("%s%s%s",
1633 i->ARMin.LdSt16.signedLoad ? "ldrsh" : "ldrh",
1634 i->ARMin.LdSt16.cc == ARMcc_AL ? " "
1635 : showARMCondCode(i->ARMin.LdSt16.cc),
1636 i->ARMin.LdSt16.signedLoad ? " " : " ");
1637 ppHRegARM(i->ARMin.LdSt16.rD);
1638 vex_printf(", ");
1639 ppARMAMode2(i->ARMin.LdSt16.amode);
1640 } else {
1641 vex_printf("strh%s ",
1642 i->ARMin.LdSt16.cc == ARMcc_AL ? " "
1643 : showARMCondCode(i->ARMin.LdSt16.cc));
1644 ppARMAMode2(i->ARMin.LdSt16.amode);
1645 vex_printf(", ");
1646 ppHRegARM(i->ARMin.LdSt16.rD);
1648 return;
1649 case ARMin_LdSt8U:
1650 if (i->ARMin.LdSt8U.isLoad) {
1651 vex_printf("ldrb%s ", i->ARMin.LdSt8U.cc == ARMcc_AL ? " "
1652 : showARMCondCode(i->ARMin.LdSt8U.cc));
1653 ppHRegARM(i->ARMin.LdSt8U.rD);
1654 vex_printf(", ");
1655 ppARMAMode1(i->ARMin.LdSt8U.amode);
1656 } else {
1657 vex_printf("strb%s ", i->ARMin.LdSt8U.cc == ARMcc_AL ? " "
1658 : showARMCondCode(i->ARMin.LdSt8U.cc));
1659 ppARMAMode1(i->ARMin.LdSt8U.amode);
1660 vex_printf(", ");
1661 ppHRegARM(i->ARMin.LdSt8U.rD);
1663 return;
1664 case ARMin_Ld8S:
1665 vex_printf("ldrsb%s ", i->ARMin.Ld8S.cc == ARMcc_AL ? " "
1666 : showARMCondCode(i->ARMin.Ld8S.cc));
1667 ppARMAMode2(i->ARMin.Ld8S.amode);
1668 vex_printf(", ");
1669 ppHRegARM(i->ARMin.Ld8S.rD);
1670 return;
1671 case ARMin_XDirect:
1672 vex_printf("(xDirect) ");
1673 vex_printf("if (%%cpsr.%s) { ",
1674 showARMCondCode(i->ARMin.XDirect.cond));
1675 vex_printf("movw r12,0x%x; ",
1676 (UInt)(i->ARMin.XDirect.dstGA & 0xFFFF));
1677 vex_printf("movt r12,0x%x; ",
1678 (UInt)((i->ARMin.XDirect.dstGA >> 16) & 0xFFFF));
1679 vex_printf("str r12,");
1680 ppARMAMode1(i->ARMin.XDirect.amR15T);
1681 vex_printf("; movw r12,LO16($disp_cp_chain_me_to_%sEP); ",
1682 i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1683 vex_printf("movt r12,HI16($disp_cp_chain_me_to_%sEP); ",
1684 i->ARMin.XDirect.toFastEP ? "fast" : "slow");
1685 vex_printf("blx r12 }");
1686 return;
1687 case ARMin_XIndir:
1688 vex_printf("(xIndir) ");
1689 vex_printf("if (%%cpsr.%s) { ",
1690 showARMCondCode(i->ARMin.XIndir.cond));
1691 vex_printf("str ");
1692 ppHRegARM(i->ARMin.XIndir.dstGA);
1693 vex_printf(",");
1694 ppARMAMode1(i->ARMin.XIndir.amR15T);
1695 vex_printf("; movw r12,LO16($disp_cp_xindir); ");
1696 vex_printf("movt r12,HI16($disp_cp_xindir); ");
1697 vex_printf("blx r12 }");
1698 return;
1699 case ARMin_XAssisted:
1700 vex_printf("(xAssisted) ");
1701 vex_printf("if (%%cpsr.%s) { ",
1702 showARMCondCode(i->ARMin.XAssisted.cond));
1703 vex_printf("str ");
1704 ppHRegARM(i->ARMin.XAssisted.dstGA);
1705 vex_printf(",");
1706 ppARMAMode1(i->ARMin.XAssisted.amR15T);
1707 vex_printf("movw r8,$IRJumpKind_to_TRCVAL(%d); ",
1708 (Int)i->ARMin.XAssisted.jk);
1709 vex_printf("movw r12,LO16($disp_cp_xassisted); ");
1710 vex_printf("movt r12,HI16($disp_cp_xassisted); ");
1711 vex_printf("blx r12 }");
1712 return;
1713 case ARMin_CMov:
1714 vex_printf("mov%s ", showARMCondCode(i->ARMin.CMov.cond));
1715 ppHRegARM(i->ARMin.CMov.dst);
1716 vex_printf(", ");
1717 ppARMRI84(i->ARMin.CMov.src);
1718 return;
1719 case ARMin_Call:
1720 vex_printf("call%s ",
1721 i->ARMin.Call.cond==ARMcc_AL
1722 ? "" : showARMCondCode(i->ARMin.Call.cond));
1723 vex_printf("0x%x [nArgRegs=%d, ",
1724 i->ARMin.Call.target, i->ARMin.Call.nArgRegs);
1725 ppRetLoc(i->ARMin.Call.rloc);
1726 vex_printf("]");
1727 return;
1728 case ARMin_Mul:
1729 vex_printf("%-5s ", showARMMulOp(i->ARMin.Mul.op));
1730 if (i->ARMin.Mul.op == ARMmul_PLAIN) {
1731 vex_printf("r0, r2, r3");
1732 } else {
1733 vex_printf("r1:r0, r2, r3");
1735 return;
1736 case ARMin_LdrEX: {
1737 const HChar* sz = "";
1738 switch (i->ARMin.LdrEX.szB) {
1739 case 1: sz = "b"; break; case 2: sz = "h"; break;
1740 case 8: sz = "d"; break; case 4: break;
1741 default: vassert(0);
1743 vex_printf("ldrex%s %sr2, [r4]",
1744 sz, i->ARMin.LdrEX.szB == 8 ? "r3:" : "");
1745 return;
1747 case ARMin_StrEX: {
1748 const HChar* sz = "";
1749 switch (i->ARMin.StrEX.szB) {
1750 case 1: sz = "b"; break; case 2: sz = "h"; break;
1751 case 8: sz = "d"; break; case 4: break;
1752 default: vassert(0);
1754 vex_printf("strex%s r0, %sr2, [r4]",
1755 sz, i->ARMin.StrEX.szB == 8 ? "r3:" : "");
1756 return;
1758 case ARMin_VLdStD:
1759 if (i->ARMin.VLdStD.isLoad) {
1760 vex_printf("fldd ");
1761 ppHRegARM(i->ARMin.VLdStD.dD);
1762 vex_printf(", ");
1763 ppARMAModeV(i->ARMin.VLdStD.amode);
1764 } else {
1765 vex_printf("fstd ");
1766 ppARMAModeV(i->ARMin.VLdStD.amode);
1767 vex_printf(", ");
1768 ppHRegARM(i->ARMin.VLdStD.dD);
1770 return;
1771 case ARMin_VLdStS:
1772 if (i->ARMin.VLdStS.isLoad) {
1773 vex_printf("flds ");
1774 ppHRegARM(i->ARMin.VLdStS.fD);
1775 vex_printf(", ");
1776 ppARMAModeV(i->ARMin.VLdStS.amode);
1777 } else {
1778 vex_printf("fsts ");
1779 ppARMAModeV(i->ARMin.VLdStS.amode);
1780 vex_printf(", ");
1781 ppHRegARM(i->ARMin.VLdStS.fD);
1783 return;
1784 case ARMin_VAluD:
1785 vex_printf("f%-3sd ", showARMVfpOp(i->ARMin.VAluD.op));
1786 ppHRegARM(i->ARMin.VAluD.dst);
1787 vex_printf(", ");
1788 ppHRegARM(i->ARMin.VAluD.argL);
1789 vex_printf(", ");
1790 ppHRegARM(i->ARMin.VAluD.argR);
1791 return;
1792 case ARMin_VAluS:
1793 vex_printf("f%-3ss ", showARMVfpOp(i->ARMin.VAluS.op));
1794 ppHRegARM(i->ARMin.VAluS.dst);
1795 vex_printf(", ");
1796 ppHRegARM(i->ARMin.VAluS.argL);
1797 vex_printf(", ");
1798 ppHRegARM(i->ARMin.VAluS.argR);
1799 return;
1800 case ARMin_VUnaryD:
1801 vex_printf("f%-3sd ", showARMVfpUnaryOp(i->ARMin.VUnaryD.op));
1802 ppHRegARM(i->ARMin.VUnaryD.dst);
1803 vex_printf(", ");
1804 ppHRegARM(i->ARMin.VUnaryD.src);
1805 return;
1806 case ARMin_VUnaryS:
1807 vex_printf("f%-3ss ", showARMVfpUnaryOp(i->ARMin.VUnaryS.op));
1808 ppHRegARM(i->ARMin.VUnaryS.dst);
1809 vex_printf(", ");
1810 ppHRegARM(i->ARMin.VUnaryS.src);
1811 return;
1812 case ARMin_VCmpD:
1813 vex_printf("fcmpd ");
1814 ppHRegARM(i->ARMin.VCmpD.argL);
1815 vex_printf(", ");
1816 ppHRegARM(i->ARMin.VCmpD.argR);
1817 vex_printf(" ; fmstat");
1818 return;
1819 case ARMin_VCMovD:
1820 vex_printf("fcpyd%s ", showARMCondCode(i->ARMin.VCMovD.cond));
1821 ppHRegARM(i->ARMin.VCMovD.dst);
1822 vex_printf(", ");
1823 ppHRegARM(i->ARMin.VCMovD.src);
1824 return;
1825 case ARMin_VCMovS:
1826 vex_printf("fcpys%s ", showARMCondCode(i->ARMin.VCMovS.cond));
1827 ppHRegARM(i->ARMin.VCMovS.dst);
1828 vex_printf(", ");
1829 ppHRegARM(i->ARMin.VCMovS.src);
1830 return;
1831 case ARMin_VCvtSD:
1832 vex_printf("fcvt%s ", i->ARMin.VCvtSD.sToD ? "ds" : "sd");
1833 ppHRegARM(i->ARMin.VCvtSD.dst);
1834 vex_printf(", ");
1835 ppHRegARM(i->ARMin.VCvtSD.src);
1836 return;
1837 case ARMin_VXferQ:
1838 if (i->ARMin.VXferQ.toQ) {
1839 vex_printf("vmov ");
1840 ppHRegARM(i->ARMin.VXferQ.qD);
1841 vex_printf("-lo64, ");
1842 ppHRegARM(i->ARMin.VXferQ.dLo);
1843 vex_printf(" ; vmov ");
1844 ppHRegARM(i->ARMin.VXferQ.qD);
1845 vex_printf("-hi64, ");
1846 ppHRegARM(i->ARMin.VXferQ.dHi);
1847 } else {
1848 vex_printf("vmov ");
1849 ppHRegARM(i->ARMin.VXferQ.dLo);
1850 vex_printf(", ");
1851 ppHRegARM(i->ARMin.VXferQ.qD);
1852 vex_printf("-lo64");
1853 vex_printf(" ; vmov ");
1854 ppHRegARM(i->ARMin.VXferQ.dHi);
1855 vex_printf(", ");
1856 ppHRegARM(i->ARMin.VXferQ.qD);
1857 vex_printf("-hi64");
1859 return;
1860 case ARMin_VXferD:
1861 vex_printf("vmov ");
1862 if (i->ARMin.VXferD.toD) {
1863 ppHRegARM(i->ARMin.VXferD.dD);
1864 vex_printf(", ");
1865 ppHRegARM(i->ARMin.VXferD.rLo);
1866 vex_printf(", ");
1867 ppHRegARM(i->ARMin.VXferD.rHi);
1868 } else {
1869 ppHRegARM(i->ARMin.VXferD.rLo);
1870 vex_printf(", ");
1871 ppHRegARM(i->ARMin.VXferD.rHi);
1872 vex_printf(", ");
1873 ppHRegARM(i->ARMin.VXferD.dD);
1875 return;
1876 case ARMin_VXferS:
1877 vex_printf("vmov ");
1878 if (i->ARMin.VXferS.toS) {
1879 ppHRegARM(i->ARMin.VXferS.fD);
1880 vex_printf(", ");
1881 ppHRegARM(i->ARMin.VXferS.rLo);
1882 } else {
1883 ppHRegARM(i->ARMin.VXferS.rLo);
1884 vex_printf(", ");
1885 ppHRegARM(i->ARMin.VXferS.fD);
1887 return;
1888 case ARMin_VCvtID: {
1889 const HChar* nm = "?";
1890 if (i->ARMin.VCvtID.iToD) {
1891 nm = i->ARMin.VCvtID.syned ? "fsitod" : "fuitod";
1892 } else {
1893 nm = i->ARMin.VCvtID.syned ? "ftosid" : "ftouid";
1895 vex_printf("%s ", nm);
1896 ppHRegARM(i->ARMin.VCvtID.dst);
1897 vex_printf(", ");
1898 ppHRegARM(i->ARMin.VCvtID.src);
1899 return;
1901 case ARMin_VRIntR: {
1902 const HChar* sz = i->ARMin.VRIntR.isF64 ? "f64" : "f32";
1903 vex_printf("vrintr.%s.%s ", sz, sz);
1904 ppHRegARM(i->ARMin.VRIntR.dst);
1905 vex_printf(", ");
1906 ppHRegARM(i->ARMin.VRIntR.src);
1907 return;
1909 case ARMin_VMinMaxNum: {
1910 const HChar* sz = i->ARMin.VMinMaxNum.isF64 ? "f64" : "f32";
1911 const HChar* nm = i->ARMin.VMinMaxNum.isMax ? "vmaxnm" : "vminnm";
1912 vex_printf("%s.%s ", nm, sz);
1913 ppHRegARM(i->ARMin.VMinMaxNum.dst);
1914 vex_printf(", ");
1915 ppHRegARM(i->ARMin.VMinMaxNum.srcL);
1916 vex_printf(", ");
1917 ppHRegARM(i->ARMin.VMinMaxNum.srcR);
1918 return;
1920 case ARMin_FPSCR:
1921 if (i->ARMin.FPSCR.toFPSCR) {
1922 vex_printf("fmxr fpscr, ");
1923 ppHRegARM(i->ARMin.FPSCR.iReg);
1924 } else {
1925 vex_printf("fmrx ");
1926 ppHRegARM(i->ARMin.FPSCR.iReg);
1927 vex_printf(", fpscr");
1929 return;
1930 case ARMin_MFence:
1931 vex_printf("(mfence) dsb sy; dmb sy; isb");
1932 return;
1933 case ARMin_CLREX:
1934 vex_printf("clrex");
1935 return;
1936 case ARMin_NLdStQ:
1937 if (i->ARMin.NLdStQ.isLoad)
1938 vex_printf("vld1.32 {");
1939 else
1940 vex_printf("vst1.32 {");
1941 ppHRegARM(i->ARMin.NLdStQ.dQ);
1942 vex_printf("} ");
1943 ppARMAModeN(i->ARMin.NLdStQ.amode);
1944 return;
1945 case ARMin_NLdStD:
1946 if (i->ARMin.NLdStD.isLoad)
1947 vex_printf("vld1.32 {");
1948 else
1949 vex_printf("vst1.32 {");
1950 ppHRegARM(i->ARMin.NLdStD.dD);
1951 vex_printf("} ");
1952 ppARMAModeN(i->ARMin.NLdStD.amode);
1953 return;
1954 case ARMin_NUnary:
1955 vex_printf("%s%s%s ",
1956 showARMNeonUnOp(i->ARMin.NUnary.op),
1957 showARMNeonUnOpDataType(i->ARMin.NUnary.op),
1958 showARMNeonDataSize(i));
1959 ppHRegARM(i->ARMin.NUnary.dst);
1960 vex_printf(", ");
1961 ppHRegARM(i->ARMin.NUnary.src);
1962 if (i->ARMin.NUnary.op == ARMneon_EQZ)
1963 vex_printf(", #0");
1964 if (i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedS ||
1965 i->ARMin.NUnary.op == ARMneon_VCVTFtoFixedU ||
1966 i->ARMin.NUnary.op == ARMneon_VCVTFixedStoF ||
1967 i->ARMin.NUnary.op == ARMneon_VCVTFixedUtoF) {
1968 vex_printf(", #%u", i->ARMin.NUnary.size);
1970 if (i->ARMin.NUnary.op == ARMneon_VQSHLNSS ||
1971 i->ARMin.NUnary.op == ARMneon_VQSHLNUU ||
1972 i->ARMin.NUnary.op == ARMneon_VQSHLNUS) {
1973 UInt size;
1974 size = i->ARMin.NUnary.size;
1975 if (size & 0x40) {
1976 vex_printf(", #%u", size - 64);
1977 } else if (size & 0x20) {
1978 vex_printf(", #%u", size - 32);
1979 } else if (size & 0x10) {
1980 vex_printf(", #%u", size - 16);
1981 } else if (size & 0x08) {
1982 vex_printf(", #%u", size - 8);
1985 return;
1986 case ARMin_NUnaryS:
1987 vex_printf("%s%s%s ",
1988 showARMNeonUnOpS(i->ARMin.NUnaryS.op),
1989 showARMNeonUnOpSDataType(i->ARMin.NUnaryS.op),
1990 showARMNeonDataSize(i));
1991 ppARMNRS(i->ARMin.NUnaryS.dst);
1992 vex_printf(", ");
1993 ppARMNRS(i->ARMin.NUnaryS.src);
1994 return;
1995 case ARMin_NShift:
1996 vex_printf("%s%s%s ",
1997 showARMNeonShiftOp(i->ARMin.NShift.op),
1998 showARMNeonShiftOpDataType(i->ARMin.NShift.op),
1999 showARMNeonDataSize(i));
2000 ppHRegARM(i->ARMin.NShift.dst);
2001 vex_printf(", ");
2002 ppHRegARM(i->ARMin.NShift.argL);
2003 vex_printf(", ");
2004 ppHRegARM(i->ARMin.NShift.argR);
2005 return;
2006 case ARMin_NShl64:
2007 vex_printf("vshl.i64 ");
2008 ppHRegARM(i->ARMin.NShl64.dst);
2009 vex_printf(", ");
2010 ppHRegARM(i->ARMin.NShl64.src);
2011 vex_printf(", #%u", i->ARMin.NShl64.amt);
2012 return;
2013 case ARMin_NDual:
2014 vex_printf("%s%s%s ",
2015 showARMNeonDualOp(i->ARMin.NDual.op),
2016 showARMNeonDualOpDataType(i->ARMin.NDual.op),
2017 showARMNeonDataSize(i));
2018 ppHRegARM(i->ARMin.NDual.arg1);
2019 vex_printf(", ");
2020 ppHRegARM(i->ARMin.NDual.arg2);
2021 return;
2022 case ARMin_NBinary:
2023 vex_printf("%s%s%s",
2024 showARMNeonBinOp(i->ARMin.NBinary.op),
2025 showARMNeonBinOpDataType(i->ARMin.NBinary.op),
2026 showARMNeonDataSize(i));
2027 vex_printf(" ");
2028 ppHRegARM(i->ARMin.NBinary.dst);
2029 vex_printf(", ");
2030 ppHRegARM(i->ARMin.NBinary.argL);
2031 vex_printf(", ");
2032 ppHRegARM(i->ARMin.NBinary.argR);
2033 return;
2034 case ARMin_NeonImm:
2035 vex_printf("vmov ");
2036 ppHRegARM(i->ARMin.NeonImm.dst);
2037 vex_printf(", ");
2038 ppARMNImm(i->ARMin.NeonImm.imm);
2039 return;
2040 case ARMin_NCMovQ:
2041 vex_printf("vmov%s ", showARMCondCode(i->ARMin.NCMovQ.cond));
2042 ppHRegARM(i->ARMin.NCMovQ.dst);
2043 vex_printf(", ");
2044 ppHRegARM(i->ARMin.NCMovQ.src);
2045 return;
2046 case ARMin_Add32:
2047 vex_printf("add32 ");
2048 ppHRegARM(i->ARMin.Add32.rD);
2049 vex_printf(", ");
2050 ppHRegARM(i->ARMin.Add32.rN);
2051 vex_printf(", ");
2052 vex_printf("%u", i->ARMin.Add32.imm32);
2053 return;
2054 case ARMin_EvCheck:
2055 vex_printf("(evCheck) ldr r12,");
2056 ppARMAMode1(i->ARMin.EvCheck.amCounter);
2057 vex_printf("; subs r12,r12,$1; str r12,");
2058 ppARMAMode1(i->ARMin.EvCheck.amCounter);
2059 vex_printf("; bpl nofail; ldr r12,");
2060 ppARMAMode1(i->ARMin.EvCheck.amFailAddr);
2061 vex_printf("; bx r12; nofail:");
2062 return;
2063 case ARMin_ProfInc:
2064 vex_printf("(profInc) movw r12,LO16($NotKnownYet); "
2065 "movw r12,HI16($NotKnownYet); "
2066 "ldr r11,[r12]; "
2067 "adds r11,r11,$1; "
2068 "str r11,[r12]; "
2069 "ldr r11,[r12+4]; "
2070 "adc r11,r11,$0; "
2071 "str r11,[r12+4]");
2072 return;
2073 default:
2074 vex_printf("ppARMInstr: unhandled case (tag %d)", (Int)i->tag);
2075 vpanic("ppARMInstr(1)");
2076 return;
2081 /* --------- Helpers for register allocation. --------- */
2083 void getRegUsage_ARMInstr ( HRegUsage* u, const ARMInstr* i, Bool mode64 )
2085 vassert(mode64 == False);
2086 initHRegUsage(u);
2087 switch (i->tag) {
2088 case ARMin_Alu:
2089 addHRegUse(u, HRmWrite, i->ARMin.Alu.dst);
2090 addHRegUse(u, HRmRead, i->ARMin.Alu.argL);
2091 addRegUsage_ARMRI84(u, i->ARMin.Alu.argR);
2092 return;
2093 case ARMin_Shift:
2094 addHRegUse(u, HRmWrite, i->ARMin.Shift.dst);
2095 addHRegUse(u, HRmRead, i->ARMin.Shift.argL);
2096 addRegUsage_ARMRI5(u, i->ARMin.Shift.argR);
2097 return;
2098 case ARMin_Unary:
2099 addHRegUse(u, HRmWrite, i->ARMin.Unary.dst);
2100 addHRegUse(u, HRmRead, i->ARMin.Unary.src);
2101 return;
2102 case ARMin_CmpOrTst:
2103 addHRegUse(u, HRmRead, i->ARMin.CmpOrTst.argL);
2104 addRegUsage_ARMRI84(u, i->ARMin.CmpOrTst.argR);
2105 return;
2106 case ARMin_Mov:
2107 addHRegUse(u, HRmWrite, i->ARMin.Mov.dst);
2108 addRegUsage_ARMRI84(u, i->ARMin.Mov.src);
2110 if (i->ARMin.Mov.src->tag == ARMri84_R) {
2111 u->isRegRegMove = True;
2112 u->regMoveSrc = i->ARMin.Mov.src->ARMri84.R.reg;
2113 u->regMoveDst = i->ARMin.Mov.dst;
2115 return;
2116 case ARMin_Imm32:
2117 addHRegUse(u, HRmWrite, i->ARMin.Imm32.dst);
2118 return;
2119 case ARMin_LdSt32:
2120 addRegUsage_ARMAMode1(u, i->ARMin.LdSt32.amode);
2121 if (i->ARMin.LdSt32.isLoad) {
2122 addHRegUse(u, HRmWrite, i->ARMin.LdSt32.rD);
2123 if (i->ARMin.LdSt32.cc != ARMcc_AL)
2124 addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2125 } else {
2126 addHRegUse(u, HRmRead, i->ARMin.LdSt32.rD);
2128 return;
2129 case ARMin_LdSt16:
2130 addRegUsage_ARMAMode2(u, i->ARMin.LdSt16.amode);
2131 if (i->ARMin.LdSt16.isLoad) {
2132 addHRegUse(u, HRmWrite, i->ARMin.LdSt16.rD);
2133 if (i->ARMin.LdSt16.cc != ARMcc_AL)
2134 addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2135 } else {
2136 addHRegUse(u, HRmRead, i->ARMin.LdSt16.rD);
2138 return;
2139 case ARMin_LdSt8U:
2140 addRegUsage_ARMAMode1(u, i->ARMin.LdSt8U.amode);
2141 if (i->ARMin.LdSt8U.isLoad) {
2142 addHRegUse(u, HRmWrite, i->ARMin.LdSt8U.rD);
2143 if (i->ARMin.LdSt8U.cc != ARMcc_AL)
2144 addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2145 } else {
2146 addHRegUse(u, HRmRead, i->ARMin.LdSt8U.rD);
2148 return;
2149 case ARMin_Ld8S:
2150 addRegUsage_ARMAMode2(u, i->ARMin.Ld8S.amode);
2151 addHRegUse(u, HRmWrite, i->ARMin.Ld8S.rD);
2152 if (i->ARMin.Ld8S.cc != ARMcc_AL)
2153 addHRegUse(u, HRmRead, i->ARMin.Ld8S.rD);
2154 return;
2155 /* XDirect/XIndir/XAssisted are also a bit subtle. They
2156 conditionally exit the block. Hence we only need to list (1)
2157 the registers that they read, and (2) the registers that they
2158 write in the case where the block is not exited. (2) is
2159 empty, hence only (1) is relevant here. */
2160 case ARMin_XDirect:
2161 addRegUsage_ARMAMode1(u, i->ARMin.XDirect.amR15T);
2162 return;
2163 case ARMin_XIndir:
2164 addHRegUse(u, HRmRead, i->ARMin.XIndir.dstGA);
2165 addRegUsage_ARMAMode1(u, i->ARMin.XIndir.amR15T);
2166 return;
2167 case ARMin_XAssisted:
2168 addHRegUse(u, HRmRead, i->ARMin.XAssisted.dstGA);
2169 addRegUsage_ARMAMode1(u, i->ARMin.XAssisted.amR15T);
2170 return;
2171 case ARMin_CMov:
2172 addHRegUse(u, HRmWrite, i->ARMin.CMov.dst);
2173 addHRegUse(u, HRmRead, i->ARMin.CMov.dst);
2174 addRegUsage_ARMRI84(u, i->ARMin.CMov.src);
2175 return;
2176 case ARMin_Call:
2177 /* logic and comments copied/modified from x86 back end */
2178 /* This is a bit subtle. */
2179 /* First off, claim it trashes all the caller-saved regs
2180 which fall within the register allocator's jurisdiction.
2181 These I believe to be r0,1,2,3. If it turns out that r9
2182 is also caller-saved, then we'll have to add that here
2183 too. */
2184 addHRegUse(u, HRmWrite, hregARM_R0());
2185 addHRegUse(u, HRmWrite, hregARM_R1());
2186 addHRegUse(u, HRmWrite, hregARM_R2());
2187 addHRegUse(u, HRmWrite, hregARM_R3());
2188 /* Now we have to state any parameter-carrying registers
2189 which might be read. This depends on nArgRegs. */
2190 switch (i->ARMin.Call.nArgRegs) {
2191 case 4: addHRegUse(u, HRmRead, hregARM_R3()); /*fallthru*/
2192 case 3: addHRegUse(u, HRmRead, hregARM_R2()); /*fallthru*/
2193 case 2: addHRegUse(u, HRmRead, hregARM_R1()); /*fallthru*/
2194 case 1: addHRegUse(u, HRmRead, hregARM_R0()); break;
2195 case 0: break;
2196 default: vpanic("getRegUsage_ARM:Call:regparms");
2198 /* Finally, there is the issue that the insn trashes a
2199 register because the literal target address has to be
2200 loaded into a register. Fortunately, for the nArgRegs=
2201 0/1/2/3 case, we can use r0, r1, r2 or r3 respectively, so
2202 this does not cause any further damage. For the
2203 nArgRegs=4 case, we'll have to choose another register
2204 arbitrarily since all the caller saved regs are used for
2205 parameters, and so we might as well choose r11.
2207 if (i->ARMin.Call.nArgRegs == 4)
2208 addHRegUse(u, HRmWrite, hregARM_R11());
2209 /* Upshot of this is that the assembler really must observe
2210 the here-stated convention of which register to use as an
2211 address temporary, depending on nArgRegs: 0==r0,
2212 1==r1, 2==r2, 3==r3, 4==r11 */
2213 return;
2214 case ARMin_Mul:
2215 addHRegUse(u, HRmRead, hregARM_R2());
2216 addHRegUse(u, HRmRead, hregARM_R3());
2217 addHRegUse(u, HRmWrite, hregARM_R0());
2218 if (i->ARMin.Mul.op != ARMmul_PLAIN)
2219 addHRegUse(u, HRmWrite, hregARM_R1());
2220 return;
2221 case ARMin_LdrEX:
2222 addHRegUse(u, HRmRead, hregARM_R4());
2223 addHRegUse(u, HRmWrite, hregARM_R2());
2224 if (i->ARMin.LdrEX.szB == 8)
2225 addHRegUse(u, HRmWrite, hregARM_R3());
2226 return;
2227 case ARMin_StrEX:
2228 addHRegUse(u, HRmRead, hregARM_R4());
2229 addHRegUse(u, HRmWrite, hregARM_R0());
2230 addHRegUse(u, HRmRead, hregARM_R2());
2231 if (i->ARMin.StrEX.szB == 8)
2232 addHRegUse(u, HRmRead, hregARM_R3());
2233 return;
2234 case ARMin_VLdStD:
2235 addRegUsage_ARMAModeV(u, i->ARMin.VLdStD.amode);
2236 if (i->ARMin.VLdStD.isLoad) {
2237 addHRegUse(u, HRmWrite, i->ARMin.VLdStD.dD);
2238 } else {
2239 addHRegUse(u, HRmRead, i->ARMin.VLdStD.dD);
2241 return;
2242 case ARMin_VLdStS:
2243 addRegUsage_ARMAModeV(u, i->ARMin.VLdStS.amode);
2244 if (i->ARMin.VLdStS.isLoad) {
2245 addHRegUse(u, HRmWrite, i->ARMin.VLdStS.fD);
2246 } else {
2247 addHRegUse(u, HRmRead, i->ARMin.VLdStS.fD);
2249 return;
2250 case ARMin_VAluD:
2251 addHRegUse(u, HRmWrite, i->ARMin.VAluD.dst);
2252 addHRegUse(u, HRmRead, i->ARMin.VAluD.argL);
2253 addHRegUse(u, HRmRead, i->ARMin.VAluD.argR);
2254 return;
2255 case ARMin_VAluS:
2256 addHRegUse(u, HRmWrite, i->ARMin.VAluS.dst);
2257 addHRegUse(u, HRmRead, i->ARMin.VAluS.argL);
2258 addHRegUse(u, HRmRead, i->ARMin.VAluS.argR);
2259 return;
2260 case ARMin_VUnaryD:
2261 addHRegUse(u, HRmWrite, i->ARMin.VUnaryD.dst);
2262 addHRegUse(u, HRmRead, i->ARMin.VUnaryD.src);
2264 if (i->ARMin.VUnaryD.op == ARMvfpu_COPY) {
2265 u->isRegRegMove = True;
2266 u->regMoveSrc = i->ARMin.VUnaryD.src;
2267 u->regMoveDst = i->ARMin.VUnaryD.dst;
2269 return;
2270 case ARMin_VUnaryS:
2271 addHRegUse(u, HRmWrite, i->ARMin.VUnaryS.dst);
2272 addHRegUse(u, HRmRead, i->ARMin.VUnaryS.src);
2274 if (i->ARMin.VUnaryS.op == ARMvfpu_COPY) {
2275 u->isRegRegMove = True;
2276 u->regMoveSrc = i->ARMin.VUnaryS.src;
2277 u->regMoveDst = i->ARMin.VUnaryS.dst;
2279 return;
2280 case ARMin_VCmpD:
2281 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argL);
2282 addHRegUse(u, HRmRead, i->ARMin.VCmpD.argR);
2283 return;
2284 case ARMin_VCMovD:
2285 addHRegUse(u, HRmWrite, i->ARMin.VCMovD.dst);
2286 addHRegUse(u, HRmRead, i->ARMin.VCMovD.dst);
2287 addHRegUse(u, HRmRead, i->ARMin.VCMovD.src);
2288 return;
2289 case ARMin_VCMovS:
2290 addHRegUse(u, HRmWrite, i->ARMin.VCMovS.dst);
2291 addHRegUse(u, HRmRead, i->ARMin.VCMovS.dst);
2292 addHRegUse(u, HRmRead, i->ARMin.VCMovS.src);
2293 return;
2294 case ARMin_VCvtSD:
2295 addHRegUse(u, HRmWrite, i->ARMin.VCvtSD.dst);
2296 addHRegUse(u, HRmRead, i->ARMin.VCvtSD.src);
2297 return;
2298 case ARMin_VXferQ:
2299 if (i->ARMin.VXferQ.toQ) {
2300 addHRegUse(u, HRmWrite, i->ARMin.VXferQ.qD);
2301 addHRegUse(u, HRmRead, i->ARMin.VXferQ.dHi);
2302 addHRegUse(u, HRmRead, i->ARMin.VXferQ.dLo);
2303 } else {
2304 addHRegUse(u, HRmRead, i->ARMin.VXferQ.qD);
2305 addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dHi);
2306 addHRegUse(u, HRmWrite, i->ARMin.VXferQ.dLo);
2308 return;
2309 case ARMin_VXferD:
2310 if (i->ARMin.VXferD.toD) {
2311 addHRegUse(u, HRmWrite, i->ARMin.VXferD.dD);
2312 addHRegUse(u, HRmRead, i->ARMin.VXferD.rHi);
2313 addHRegUse(u, HRmRead, i->ARMin.VXferD.rLo);
2314 } else {
2315 addHRegUse(u, HRmRead, i->ARMin.VXferD.dD);
2316 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rHi);
2317 addHRegUse(u, HRmWrite, i->ARMin.VXferD.rLo);
2319 return;
2320 case ARMin_VXferS:
2321 if (i->ARMin.VXferS.toS) {
2322 addHRegUse(u, HRmWrite, i->ARMin.VXferS.fD);
2323 addHRegUse(u, HRmRead, i->ARMin.VXferS.rLo);
2324 } else {
2325 addHRegUse(u, HRmRead, i->ARMin.VXferS.fD);
2326 addHRegUse(u, HRmWrite, i->ARMin.VXferS.rLo);
2328 return;
2329 case ARMin_VCvtID:
2330 addHRegUse(u, HRmWrite, i->ARMin.VCvtID.dst);
2331 addHRegUse(u, HRmRead, i->ARMin.VCvtID.src);
2332 return;
2333 case ARMin_VRIntR:
2334 addHRegUse(u, HRmWrite, i->ARMin.VRIntR.dst);
2335 addHRegUse(u, HRmRead, i->ARMin.VRIntR.src);
2336 return;
2337 case ARMin_VMinMaxNum:
2338 addHRegUse(u, HRmWrite, i->ARMin.VMinMaxNum.dst);
2339 addHRegUse(u, HRmRead, i->ARMin.VMinMaxNum.srcL);
2340 addHRegUse(u, HRmRead, i->ARMin.VMinMaxNum.srcR);
2341 return;
2342 case ARMin_FPSCR:
2343 if (i->ARMin.FPSCR.toFPSCR)
2344 addHRegUse(u, HRmRead, i->ARMin.FPSCR.iReg);
2345 else
2346 addHRegUse(u, HRmWrite, i->ARMin.FPSCR.iReg);
2347 return;
2348 case ARMin_MFence:
2349 return;
2350 case ARMin_CLREX:
2351 return;
2352 case ARMin_NLdStQ:
2353 if (i->ARMin.NLdStQ.isLoad)
2354 addHRegUse(u, HRmWrite, i->ARMin.NLdStQ.dQ);
2355 else
2356 addHRegUse(u, HRmRead, i->ARMin.NLdStQ.dQ);
2357 addRegUsage_ARMAModeN(u, i->ARMin.NLdStQ.amode);
2358 return;
2359 case ARMin_NLdStD:
2360 if (i->ARMin.NLdStD.isLoad)
2361 addHRegUse(u, HRmWrite, i->ARMin.NLdStD.dD);
2362 else
2363 addHRegUse(u, HRmRead, i->ARMin.NLdStD.dD);
2364 addRegUsage_ARMAModeN(u, i->ARMin.NLdStD.amode);
2365 return;
2366 case ARMin_NUnary:
2367 addHRegUse(u, HRmWrite, i->ARMin.NUnary.dst);
2368 addHRegUse(u, HRmRead, i->ARMin.NUnary.src);
2370 if (i->ARMin.NUnary.op == ARMneon_COPY) {
2371 u->isRegRegMove = True;
2372 u->regMoveSrc = i->ARMin.NUnary.src;
2373 u->regMoveDst = i->ARMin.NUnary.dst;
2375 return;
2376 case ARMin_NUnaryS:
2377 addHRegUse(u, HRmWrite, i->ARMin.NUnaryS.dst->reg);
2378 addHRegUse(u, HRmRead, i->ARMin.NUnaryS.src->reg);
2379 return;
2380 case ARMin_NShift:
2381 addHRegUse(u, HRmWrite, i->ARMin.NShift.dst);
2382 addHRegUse(u, HRmRead, i->ARMin.NShift.argL);
2383 addHRegUse(u, HRmRead, i->ARMin.NShift.argR);
2384 return;
2385 case ARMin_NShl64:
2386 addHRegUse(u, HRmWrite, i->ARMin.NShl64.dst);
2387 addHRegUse(u, HRmRead, i->ARMin.NShl64.src);
2388 return;
2389 case ARMin_NDual:
2390 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg1);
2391 addHRegUse(u, HRmWrite, i->ARMin.NDual.arg2);
2392 addHRegUse(u, HRmRead, i->ARMin.NDual.arg1);
2393 addHRegUse(u, HRmRead, i->ARMin.NDual.arg2);
2394 return;
2395 case ARMin_NBinary:
2396 addHRegUse(u, HRmWrite, i->ARMin.NBinary.dst);
2397 /* TODO: sometimes dst is also being read! */
2398 // XXX fix this
2399 addHRegUse(u, HRmRead, i->ARMin.NBinary.argL);
2400 addHRegUse(u, HRmRead, i->ARMin.NBinary.argR);
2401 return;
2402 case ARMin_NeonImm:
2403 addHRegUse(u, HRmWrite, i->ARMin.NeonImm.dst);
2404 return;
2405 case ARMin_NCMovQ:
2406 addHRegUse(u, HRmWrite, i->ARMin.NCMovQ.dst);
2407 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.dst);
2408 addHRegUse(u, HRmRead, i->ARMin.NCMovQ.src);
2409 return;
2410 case ARMin_Add32:
2411 addHRegUse(u, HRmWrite, i->ARMin.Add32.rD);
2412 addHRegUse(u, HRmRead, i->ARMin.Add32.rN);
2413 return;
2414 case ARMin_EvCheck:
2415 /* We expect both amodes only to mention r8, so this is in
2416 fact pointless, since r8 isn't allocatable, but
2417 anyway.. */
2418 addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amCounter);
2419 addRegUsage_ARMAMode1(u, i->ARMin.EvCheck.amFailAddr);
2420 addHRegUse(u, HRmWrite, hregARM_R12()); /* also unavail to RA */
2421 return;
2422 case ARMin_ProfInc:
2423 addHRegUse(u, HRmWrite, hregARM_R12());
2424 addHRegUse(u, HRmWrite, hregARM_R11());
2425 return;
2426 default:
2427 ppARMInstr(i);
2428 vpanic("getRegUsage_ARMInstr");
2433 void mapRegs_ARMInstr ( HRegRemap* m, ARMInstr* i, Bool mode64 )
2435 vassert(mode64 == False);
2436 switch (i->tag) {
2437 case ARMin_Alu:
2438 i->ARMin.Alu.dst = lookupHRegRemap(m, i->ARMin.Alu.dst);
2439 i->ARMin.Alu.argL = lookupHRegRemap(m, i->ARMin.Alu.argL);
2440 mapRegs_ARMRI84(m, i->ARMin.Alu.argR);
2441 return;
2442 case ARMin_Shift:
2443 i->ARMin.Shift.dst = lookupHRegRemap(m, i->ARMin.Shift.dst);
2444 i->ARMin.Shift.argL = lookupHRegRemap(m, i->ARMin.Shift.argL);
2445 mapRegs_ARMRI5(m, i->ARMin.Shift.argR);
2446 return;
2447 case ARMin_Unary:
2448 i->ARMin.Unary.dst = lookupHRegRemap(m, i->ARMin.Unary.dst);
2449 i->ARMin.Unary.src = lookupHRegRemap(m, i->ARMin.Unary.src);
2450 return;
2451 case ARMin_CmpOrTst:
2452 i->ARMin.CmpOrTst.argL = lookupHRegRemap(m, i->ARMin.CmpOrTst.argL);
2453 mapRegs_ARMRI84(m, i->ARMin.CmpOrTst.argR);
2454 return;
2455 case ARMin_Mov:
2456 i->ARMin.Mov.dst = lookupHRegRemap(m, i->ARMin.Mov.dst);
2457 mapRegs_ARMRI84(m, i->ARMin.Mov.src);
2458 return;
2459 case ARMin_Imm32:
2460 i->ARMin.Imm32.dst = lookupHRegRemap(m, i->ARMin.Imm32.dst);
2461 return;
2462 case ARMin_LdSt32:
2463 i->ARMin.LdSt32.rD = lookupHRegRemap(m, i->ARMin.LdSt32.rD);
2464 mapRegs_ARMAMode1(m, i->ARMin.LdSt32.amode);
2465 return;
2466 case ARMin_LdSt16:
2467 i->ARMin.LdSt16.rD = lookupHRegRemap(m, i->ARMin.LdSt16.rD);
2468 mapRegs_ARMAMode2(m, i->ARMin.LdSt16.amode);
2469 return;
2470 case ARMin_LdSt8U:
2471 i->ARMin.LdSt8U.rD = lookupHRegRemap(m, i->ARMin.LdSt8U.rD);
2472 mapRegs_ARMAMode1(m, i->ARMin.LdSt8U.amode);
2473 return;
2474 case ARMin_Ld8S:
2475 i->ARMin.Ld8S.rD = lookupHRegRemap(m, i->ARMin.Ld8S.rD);
2476 mapRegs_ARMAMode2(m, i->ARMin.Ld8S.amode);
2477 return;
2478 case ARMin_XDirect:
2479 mapRegs_ARMAMode1(m, i->ARMin.XDirect.amR15T);
2480 return;
2481 case ARMin_XIndir:
2482 i->ARMin.XIndir.dstGA
2483 = lookupHRegRemap(m, i->ARMin.XIndir.dstGA);
2484 mapRegs_ARMAMode1(m, i->ARMin.XIndir.amR15T);
2485 return;
2486 case ARMin_XAssisted:
2487 i->ARMin.XAssisted.dstGA
2488 = lookupHRegRemap(m, i->ARMin.XAssisted.dstGA);
2489 mapRegs_ARMAMode1(m, i->ARMin.XAssisted.amR15T);
2490 return;
2491 case ARMin_CMov:
2492 i->ARMin.CMov.dst = lookupHRegRemap(m, i->ARMin.CMov.dst);
2493 mapRegs_ARMRI84(m, i->ARMin.CMov.src);
2494 return;
2495 case ARMin_Call:
2496 return;
2497 case ARMin_Mul:
2498 return;
2499 case ARMin_LdrEX:
2500 return;
2501 case ARMin_StrEX:
2502 return;
2503 case ARMin_VLdStD:
2504 i->ARMin.VLdStD.dD = lookupHRegRemap(m, i->ARMin.VLdStD.dD);
2505 mapRegs_ARMAModeV(m, i->ARMin.VLdStD.amode);
2506 return;
2507 case ARMin_VLdStS:
2508 i->ARMin.VLdStS.fD = lookupHRegRemap(m, i->ARMin.VLdStS.fD);
2509 mapRegs_ARMAModeV(m, i->ARMin.VLdStS.amode);
2510 return;
2511 case ARMin_VAluD:
2512 i->ARMin.VAluD.dst = lookupHRegRemap(m, i->ARMin.VAluD.dst);
2513 i->ARMin.VAluD.argL = lookupHRegRemap(m, i->ARMin.VAluD.argL);
2514 i->ARMin.VAluD.argR = lookupHRegRemap(m, i->ARMin.VAluD.argR);
2515 return;
2516 case ARMin_VAluS:
2517 i->ARMin.VAluS.dst = lookupHRegRemap(m, i->ARMin.VAluS.dst);
2518 i->ARMin.VAluS.argL = lookupHRegRemap(m, i->ARMin.VAluS.argL);
2519 i->ARMin.VAluS.argR = lookupHRegRemap(m, i->ARMin.VAluS.argR);
2520 return;
2521 case ARMin_VUnaryD:
2522 i->ARMin.VUnaryD.dst = lookupHRegRemap(m, i->ARMin.VUnaryD.dst);
2523 i->ARMin.VUnaryD.src = lookupHRegRemap(m, i->ARMin.VUnaryD.src);
2524 return;
2525 case ARMin_VUnaryS:
2526 i->ARMin.VUnaryS.dst = lookupHRegRemap(m, i->ARMin.VUnaryS.dst);
2527 i->ARMin.VUnaryS.src = lookupHRegRemap(m, i->ARMin.VUnaryS.src);
2528 return;
2529 case ARMin_VCmpD:
2530 i->ARMin.VCmpD.argL = lookupHRegRemap(m, i->ARMin.VCmpD.argL);
2531 i->ARMin.VCmpD.argR = lookupHRegRemap(m, i->ARMin.VCmpD.argR);
2532 return;
2533 case ARMin_VCMovD:
2534 i->ARMin.VCMovD.dst = lookupHRegRemap(m, i->ARMin.VCMovD.dst);
2535 i->ARMin.VCMovD.src = lookupHRegRemap(m, i->ARMin.VCMovD.src);
2536 return;
2537 case ARMin_VCMovS:
2538 i->ARMin.VCMovS.dst = lookupHRegRemap(m, i->ARMin.VCMovS.dst);
2539 i->ARMin.VCMovS.src = lookupHRegRemap(m, i->ARMin.VCMovS.src);
2540 return;
2541 case ARMin_VCvtSD:
2542 i->ARMin.VCvtSD.dst = lookupHRegRemap(m, i->ARMin.VCvtSD.dst);
2543 i->ARMin.VCvtSD.src = lookupHRegRemap(m, i->ARMin.VCvtSD.src);
2544 return;
2545 case ARMin_VXferQ:
2546 i->ARMin.VXferQ.qD = lookupHRegRemap(m, i->ARMin.VXferQ.qD);
2547 i->ARMin.VXferQ.dHi = lookupHRegRemap(m, i->ARMin.VXferQ.dHi);
2548 i->ARMin.VXferQ.dLo = lookupHRegRemap(m, i->ARMin.VXferQ.dLo);
2549 return;
2550 case ARMin_VXferD:
2551 i->ARMin.VXferD.dD = lookupHRegRemap(m, i->ARMin.VXferD.dD);
2552 i->ARMin.VXferD.rHi = lookupHRegRemap(m, i->ARMin.VXferD.rHi);
2553 i->ARMin.VXferD.rLo = lookupHRegRemap(m, i->ARMin.VXferD.rLo);
2554 return;
2555 case ARMin_VXferS:
2556 i->ARMin.VXferS.fD = lookupHRegRemap(m, i->ARMin.VXferS.fD);
2557 i->ARMin.VXferS.rLo = lookupHRegRemap(m, i->ARMin.VXferS.rLo);
2558 return;
2559 case ARMin_VCvtID:
2560 i->ARMin.VCvtID.dst = lookupHRegRemap(m, i->ARMin.VCvtID.dst);
2561 i->ARMin.VCvtID.src = lookupHRegRemap(m, i->ARMin.VCvtID.src);
2562 return;
2563 case ARMin_VRIntR:
2564 i->ARMin.VRIntR.dst = lookupHRegRemap(m, i->ARMin.VRIntR.dst);
2565 i->ARMin.VRIntR.src = lookupHRegRemap(m, i->ARMin.VRIntR.src);
2566 return;
2567 case ARMin_VMinMaxNum:
2568 i->ARMin.VMinMaxNum.dst
2569 = lookupHRegRemap(m, i->ARMin.VMinMaxNum.dst);
2570 i->ARMin.VMinMaxNum.srcL
2571 = lookupHRegRemap(m, i->ARMin.VMinMaxNum.srcL);
2572 i->ARMin.VMinMaxNum.srcR
2573 = lookupHRegRemap(m, i->ARMin.VMinMaxNum.srcR);
2574 return;
2575 case ARMin_FPSCR:
2576 i->ARMin.FPSCR.iReg = lookupHRegRemap(m, i->ARMin.FPSCR.iReg);
2577 return;
2578 case ARMin_MFence:
2579 return;
2580 case ARMin_CLREX:
2581 return;
2582 case ARMin_NLdStQ:
2583 i->ARMin.NLdStQ.dQ = lookupHRegRemap(m, i->ARMin.NLdStQ.dQ);
2584 mapRegs_ARMAModeN(m, i->ARMin.NLdStQ.amode);
2585 return;
2586 case ARMin_NLdStD:
2587 i->ARMin.NLdStD.dD = lookupHRegRemap(m, i->ARMin.NLdStD.dD);
2588 mapRegs_ARMAModeN(m, i->ARMin.NLdStD.amode);
2589 return;
2590 case ARMin_NUnary:
2591 i->ARMin.NUnary.src = lookupHRegRemap(m, i->ARMin.NUnary.src);
2592 i->ARMin.NUnary.dst = lookupHRegRemap(m, i->ARMin.NUnary.dst);
2593 return;
2594 case ARMin_NUnaryS:
2595 i->ARMin.NUnaryS.src->reg
2596 = lookupHRegRemap(m, i->ARMin.NUnaryS.src->reg);
2597 i->ARMin.NUnaryS.dst->reg
2598 = lookupHRegRemap(m, i->ARMin.NUnaryS.dst->reg);
2599 return;
2600 case ARMin_NShift:
2601 i->ARMin.NShift.dst = lookupHRegRemap(m, i->ARMin.NShift.dst);
2602 i->ARMin.NShift.argL = lookupHRegRemap(m, i->ARMin.NShift.argL);
2603 i->ARMin.NShift.argR = lookupHRegRemap(m, i->ARMin.NShift.argR);
2604 return;
2605 case ARMin_NShl64:
2606 i->ARMin.NShl64.dst = lookupHRegRemap(m, i->ARMin.NShl64.dst);
2607 i->ARMin.NShl64.src = lookupHRegRemap(m, i->ARMin.NShl64.src);
2608 return;
2609 case ARMin_NDual:
2610 i->ARMin.NDual.arg1 = lookupHRegRemap(m, i->ARMin.NDual.arg1);
2611 i->ARMin.NDual.arg2 = lookupHRegRemap(m, i->ARMin.NDual.arg2);
2612 return;
2613 case ARMin_NBinary:
2614 i->ARMin.NBinary.argL = lookupHRegRemap(m, i->ARMin.NBinary.argL);
2615 i->ARMin.NBinary.argR = lookupHRegRemap(m, i->ARMin.NBinary.argR);
2616 i->ARMin.NBinary.dst = lookupHRegRemap(m, i->ARMin.NBinary.dst);
2617 return;
2618 case ARMin_NeonImm:
2619 i->ARMin.NeonImm.dst = lookupHRegRemap(m, i->ARMin.NeonImm.dst);
2620 return;
2621 case ARMin_NCMovQ:
2622 i->ARMin.NCMovQ.dst = lookupHRegRemap(m, i->ARMin.NCMovQ.dst);
2623 i->ARMin.NCMovQ.src = lookupHRegRemap(m, i->ARMin.NCMovQ.src);
2624 return;
2625 case ARMin_Add32:
2626 i->ARMin.Add32.rD = lookupHRegRemap(m, i->ARMin.Add32.rD);
2627 i->ARMin.Add32.rN = lookupHRegRemap(m, i->ARMin.Add32.rN);
2628 return;
2629 case ARMin_EvCheck:
2630 /* We expect both amodes only to mention r8, so this is in
2631 fact pointless, since r8 isn't allocatable, but
2632 anyway.. */
2633 mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amCounter);
2634 mapRegs_ARMAMode1(m, i->ARMin.EvCheck.amFailAddr);
2635 return;
2636 case ARMin_ProfInc:
2637 /* hardwires r11 and r12 -- nothing to modify. */
2638 return;
2639 default:
2640 ppARMInstr(i);
2641 vpanic("mapRegs_ARMInstr");
2645 /* Generate arm spill/reload instructions under the direction of the
2646 register allocator. Note it's critical these don't write the
2647 condition codes. */
2649 void genSpill_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2650 HReg rreg, Int offsetB, Bool mode64 )
2652 HRegClass rclass;
2653 vassert(offsetB >= 0);
2654 vassert(!hregIsVirtual(rreg));
2655 vassert(mode64 == False);
2656 *i1 = *i2 = NULL;
2657 rclass = hregClass(rreg);
2658 switch (rclass) {
2659 case HRcInt32:
2660 vassert(offsetB <= 4095);
2661 *i1 = ARMInstr_LdSt32( ARMcc_AL, False/*!isLoad*/,
2662 rreg,
2663 ARMAMode1_RI(hregARM_R8(), offsetB) );
2664 return;
2665 case HRcFlt32:
2666 case HRcFlt64: {
2667 HReg r8 = hregARM_R8(); /* baseblock */
2668 HReg r12 = hregARM_R12(); /* spill temp */
2669 HReg base = r8;
2670 vassert(0 == (offsetB & 3));
2671 if (offsetB >= 1024) {
2672 Int offsetKB = offsetB / 1024;
2673 /* r12 = r8 + (1024 * offsetKB) */
2674 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2675 ARMRI84_I84(offsetKB, 11));
2676 offsetB -= (1024 * offsetKB);
2677 base = r12;
2679 vassert(offsetB <= 1020);
2680 if (rclass == HRcFlt32) {
2681 *i2 = ARMInstr_VLdStS( False/*!isLoad*/,
2682 rreg,
2683 mkARMAModeV(base, offsetB) );
2684 } else {
2685 *i2 = ARMInstr_VLdStD( False/*!isLoad*/,
2686 rreg,
2687 mkARMAModeV(base, offsetB) );
2689 return;
2691 case HRcVec128: {
2692 HReg r8 = hregARM_R8();
2693 HReg r12 = hregARM_R12();
2694 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2695 *i2 = ARMInstr_NLdStQ(False, rreg, mkARMAModeN_R(r12));
2696 return;
2698 default:
2699 ppHRegClass(rclass);
2700 vpanic("genSpill_ARM: unimplemented regclass");
2704 void genReload_ARM ( /*OUT*/HInstr** i1, /*OUT*/HInstr** i2,
2705 HReg rreg, Int offsetB, Bool mode64 )
2707 HRegClass rclass;
2708 vassert(offsetB >= 0);
2709 vassert(!hregIsVirtual(rreg));
2710 vassert(mode64 == False);
2711 *i1 = *i2 = NULL;
2712 rclass = hregClass(rreg);
2713 switch (rclass) {
2714 case HRcInt32:
2715 vassert(offsetB <= 4095);
2716 *i1 = ARMInstr_LdSt32( ARMcc_AL, True/*isLoad*/,
2717 rreg,
2718 ARMAMode1_RI(hregARM_R8(), offsetB) );
2719 return;
2720 case HRcFlt32:
2721 case HRcFlt64: {
2722 HReg r8 = hregARM_R8(); /* baseblock */
2723 HReg r12 = hregARM_R12(); /* spill temp */
2724 HReg base = r8;
2725 vassert(0 == (offsetB & 3));
2726 if (offsetB >= 1024) {
2727 Int offsetKB = offsetB / 1024;
2728 /* r12 = r8 + (1024 * offsetKB) */
2729 *i1 = ARMInstr_Alu(ARMalu_ADD, r12, r8,
2730 ARMRI84_I84(offsetKB, 11));
2731 offsetB -= (1024 * offsetKB);
2732 base = r12;
2734 vassert(offsetB <= 1020);
2735 if (rclass == HRcFlt32) {
2736 *i2 = ARMInstr_VLdStS( True/*isLoad*/,
2737 rreg,
2738 mkARMAModeV(base, offsetB) );
2739 } else {
2740 *i2 = ARMInstr_VLdStD( True/*isLoad*/,
2741 rreg,
2742 mkARMAModeV(base, offsetB) );
2744 return;
2746 case HRcVec128: {
2747 HReg r8 = hregARM_R8();
2748 HReg r12 = hregARM_R12();
2749 *i1 = ARMInstr_Add32(r12, r8, offsetB);
2750 *i2 = ARMInstr_NLdStQ(True, rreg, mkARMAModeN_R(r12));
2751 return;
2753 default:
2754 ppHRegClass(rclass);
2755 vpanic("genReload_ARM: unimplemented regclass");
2759 ARMInstr* genMove_ARM(HReg from, HReg to, Bool mode64)
2761 switch (hregClass(from)) {
2762 case HRcInt32:
2763 return ARMInstr_Mov(to, ARMRI84_R(from));
2764 case HRcFlt32:
2765 return ARMInstr_VUnaryS(ARMvfpu_COPY, to, from);
2766 case HRcFlt64:
2767 return ARMInstr_VUnaryD(ARMvfpu_COPY, to, from);
2768 case HRcVec128:
2769 return ARMInstr_NUnary(ARMneon_COPY, to, from, 4, False);
2770 default:
2771 ppHRegClass(hregClass(from));
2772 vpanic("genMove_ARM: unimplemented regclass");
2776 /* Emit an instruction into buf and return the number of bytes used.
2777 Note that buf is not the insn's final place, and therefore it is
2778 imperative to emit position-independent code. */
2780 static inline UInt iregEnc ( HReg r )
2782 UInt n;
2783 vassert(hregClass(r) == HRcInt32);
2784 vassert(!hregIsVirtual(r));
2785 n = hregEncoding(r);
2786 vassert(n <= 15);
2787 return n;
2790 static inline UInt dregEnc ( HReg r )
2792 UInt n;
2793 vassert(hregClass(r) == HRcFlt64);
2794 vassert(!hregIsVirtual(r));
2795 n = hregEncoding(r);
2796 vassert(n <= 31);
2797 return n;
2800 static inline UInt fregEnc ( HReg r )
2802 UInt n;
2803 vassert(hregClass(r) == HRcFlt32);
2804 vassert(!hregIsVirtual(r));
2805 n = hregEncoding(r);
2806 vassert(n <= 31);
2807 return n;
2810 static inline UInt qregEnc ( HReg r )
2812 UInt n;
2813 vassert(hregClass(r) == HRcVec128);
2814 vassert(!hregIsVirtual(r));
2815 n = hregEncoding(r);
2816 vassert(n <= 15);
2817 return n;
2820 #define BITS4(zzb3,zzb2,zzb1,zzb0) \
2821 (((zzb3) << 3) | ((zzb2) << 2) | ((zzb1) << 1) | (zzb0))
2822 #define X0000 BITS4(0,0,0,0)
2823 #define X0001 BITS4(0,0,0,1)
2824 #define X0010 BITS4(0,0,1,0)
2825 #define X0011 BITS4(0,0,1,1)
2826 #define X0100 BITS4(0,1,0,0)
2827 #define X0101 BITS4(0,1,0,1)
2828 #define X0110 BITS4(0,1,1,0)
2829 #define X0111 BITS4(0,1,1,1)
2830 #define X1000 BITS4(1,0,0,0)
2831 #define X1001 BITS4(1,0,0,1)
2832 #define X1010 BITS4(1,0,1,0)
2833 #define X1011 BITS4(1,0,1,1)
2834 #define X1100 BITS4(1,1,0,0)
2835 #define X1101 BITS4(1,1,0,1)
2836 #define X1110 BITS4(1,1,1,0)
2837 #define X1111 BITS4(1,1,1,1)
2839 #define XXXXX___(zzx7,zzx6,zzx5,zzx4,zzx3) \
2840 (((((UInt)(zzx7)) & 0xF) << 28) | \
2841 (((zzx6) & 0xF) << 24) | \
2842 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2843 (((zzx3) & 0xF) << 12))
2845 #define XXXXXX__(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2) \
2846 (((((UInt)(zzx7)) & 0xF) << 28) | \
2847 (((zzx6) & 0xF) << 24) | \
2848 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2849 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8))
2851 #define XXXXX__X(zzx7,zzx6,zzx5,zzx4,zzx3,zzx0) \
2852 (((((UInt)(zzx7)) & 0xF) << 28) | \
2853 (((zzx6) & 0xF) << 24) | \
2854 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2855 (((zzx3) & 0xF) << 12) | (((zzx0) & 0xF) << 0))
2857 #define XXX___XX(zzx7,zzx6,zzx5,zzx1,zzx0) \
2858 (((((UInt)(zzx7)) & 0xF) << 28) | \
2859 (((zzx6) & 0xF) << 24) | \
2860 (((zzx5) & 0xF) << 20) | (((zzx1) & 0xF) << 4) | \
2861 (((zzx0) & 0xF) << 0))
2863 #define XXXXXXXX(zzx7,zzx6,zzx5,zzx4,zzx3,zzx2,zzx1,zzx0) \
2864 (((((UInt)(zzx7)) & 0xF) << 28) | \
2865 (((zzx6) & 0xF) << 24) | \
2866 (((zzx5) & 0xF) << 20) | (((zzx4) & 0xF) << 16) | \
2867 (((zzx3) & 0xF) << 12) | (((zzx2) & 0xF) << 8) | \
2868 (((zzx1) & 0xF) << 4) | (((zzx0) & 0xF) << 0))
2870 #define XX______(zzx7,zzx6) \
2871 (((((UInt)(zzx7)) & 0xF) << 28) | (((zzx6) & 0xF) << 24))
2873 /* Generate a skeletal insn that involves an a RI84 shifter operand.
2874 Returns a word which is all zeroes apart from bits 25 and 11..0,
2875 since it is those that encode the shifter operand (at least to the
2876 extent that we care about it.) */
2877 static UInt skeletal_RI84 ( ARMRI84* ri )
2879 UInt instr;
2880 if (ri->tag == ARMri84_I84) {
2881 vassert(0 == (ri->ARMri84.I84.imm4 & ~0x0F));
2882 vassert(0 == (ri->ARMri84.I84.imm8 & ~0xFF));
2883 instr = 1 << 25;
2884 instr |= (ri->ARMri84.I84.imm4 << 8);
2885 instr |= ri->ARMri84.I84.imm8;
2886 } else {
2887 instr = 0 << 25;
2888 instr |= iregEnc(ri->ARMri84.R.reg);
2890 return instr;
2893 /* Ditto for RI5. Resulting word is zeroes apart from bit 4 and bits
2894 11..7. */
2895 static UInt skeletal_RI5 ( ARMRI5* ri )
2897 UInt instr;
2898 if (ri->tag == ARMri5_I5) {
2899 UInt imm5 = ri->ARMri5.I5.imm5;
2900 vassert(imm5 >= 1 && imm5 <= 31);
2901 instr = 0 << 4;
2902 instr |= imm5 << 7;
2903 } else {
2904 instr = 1 << 4;
2905 instr |= iregEnc(ri->ARMri5.R.reg) << 8;
2907 return instr;
2911 /* Get an immediate into a register, using only that
2912 register. (very lame..) */
2913 static UInt* imm32_to_ireg ( UInt* p, Int rD, UInt imm32 )
2915 UInt instr;
2916 vassert(rD >= 0 && rD <= 14); // r15 not good to mess with!
2917 #if 0
2918 if (0 == (imm32 & ~0xFF)) {
2919 /* mov with a immediate shifter operand of (0, imm32) (??) */
2920 instr = XXXXXX__(X1110,X0011,X1010,X0000,rD,X0000);
2921 instr |= imm32;
2922 *p++ = instr;
2923 } else {
2924 // this is very bad; causes Dcache pollution
2925 // ldr rD, [pc]
2926 instr = XXXXX___(X1110,X0101,X1001,X1111,rD);
2927 *p++ = instr;
2928 // b .+8
2929 instr = 0xEA000000;
2930 *p++ = instr;
2931 // .word imm32
2932 *p++ = imm32;
2934 #else
2935 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2936 /* Generate movw rD, #low16. Then, if the high 16 are
2937 nonzero, generate movt rD, #high16. */
2938 UInt lo16 = imm32 & 0xFFFF;
2939 UInt hi16 = (imm32 >> 16) & 0xFFFF;
2940 instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
2941 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
2942 lo16 & 0xF);
2943 *p++ = instr;
2944 if (hi16 != 0) {
2945 instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
2946 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
2947 hi16 & 0xF);
2948 *p++ = instr;
2950 } else {
2951 UInt imm, rot;
2952 UInt op = X1010;
2953 UInt rN = 0;
2954 if ((imm32 & 0xFF) || (imm32 == 0)) {
2955 imm = imm32 & 0xFF;
2956 rot = 0;
2957 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2958 *p++ = instr;
2959 op = X1000;
2960 rN = rD;
2962 if (imm32 & 0xFF000000) {
2963 imm = (imm32 >> 24) & 0xFF;
2964 rot = 4;
2965 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2966 *p++ = instr;
2967 op = X1000;
2968 rN = rD;
2970 if (imm32 & 0xFF0000) {
2971 imm = (imm32 >> 16) & 0xFF;
2972 rot = 8;
2973 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2974 *p++ = instr;
2975 op = X1000;
2976 rN = rD;
2978 if (imm32 & 0xFF00) {
2979 imm = (imm32 >> 8) & 0xFF;
2980 rot = 12;
2981 instr = XXXXXXXX(0xE, 0x3, op, rN, rD, rot, imm >> 4, imm & 0xF);
2982 *p++ = instr;
2983 op = X1000;
2984 rN = rD;
2987 #endif
2988 return p;
2991 /* Get an immediate into a register, using only that register, and
2992 generating exactly 2 instructions, regardless of the value of the
2993 immediate. This is used when generating sections of code that need
2994 to be patched later, so as to guarantee a specific size. */
2995 static UInt* imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
2997 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
2998 /* Generate movw rD, #low16 ; movt rD, #high16. */
2999 UInt lo16 = imm32 & 0xFFFF;
3000 UInt hi16 = (imm32 >> 16) & 0xFFFF;
3001 UInt instr;
3002 instr = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
3003 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
3004 lo16 & 0xF);
3005 *p++ = instr;
3006 instr = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
3007 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
3008 hi16 & 0xF);
3009 *p++ = instr;
3010 } else {
3011 vassert(0); /* lose */
3013 return p;
3016 /* Check whether p points at a 2-insn sequence cooked up by
3017 imm32_to_ireg_EXACTLY2(). */
3018 static Bool is_imm32_to_ireg_EXACTLY2 ( UInt* p, Int rD, UInt imm32 )
3020 if (VEX_ARM_ARCHLEVEL(arm_hwcaps) > 6) {
3021 /* Generate movw rD, #low16 ; movt rD, #high16. */
3022 UInt lo16 = imm32 & 0xFFFF;
3023 UInt hi16 = (imm32 >> 16) & 0xFFFF;
3024 UInt i0, i1;
3025 i0 = XXXXXXXX(0xE, 0x3, 0x0, (lo16 >> 12) & 0xF, rD,
3026 (lo16 >> 8) & 0xF, (lo16 >> 4) & 0xF,
3027 lo16 & 0xF);
3028 i1 = XXXXXXXX(0xE, 0x3, 0x4, (hi16 >> 12) & 0xF, rD,
3029 (hi16 >> 8) & 0xF, (hi16 >> 4) & 0xF,
3030 hi16 & 0xF);
3031 return p[0] == i0 && p[1] == i1;
3032 } else {
3033 vassert(0); /* lose */
3038 static UInt* do_load_or_store32 ( UInt* p,
3039 Bool isLoad, UInt rD, ARMAMode1* am )
3041 vassert(rD <= 12);
3042 vassert(am->tag == ARMam1_RI); // RR case is not handled
3043 UInt bB = 0;
3044 UInt bL = isLoad ? 1 : 0;
3045 Int simm12;
3046 UInt instr, bP;
3047 if (am->ARMam1.RI.simm13 < 0) {
3048 bP = 0;
3049 simm12 = -am->ARMam1.RI.simm13;
3050 } else {
3051 bP = 1;
3052 simm12 = am->ARMam1.RI.simm13;
3054 vassert(simm12 >= 0 && simm12 <= 4095);
3055 instr = XXXXX___(X1110,X0101,BITS4(bP,bB,0,bL),
3056 iregEnc(am->ARMam1.RI.reg),
3057 rD);
3058 instr |= simm12;
3059 *p++ = instr;
3060 return p;
3064 /* Emit an instruction into buf and return the number of bytes used.
3065 Note that buf is not the insn's final place, and therefore it is
3066 imperative to emit position-independent code. If the emitted
3067 instruction was a profiler inc, set *is_profInc to True, else
3068 leave it unchanged. */
3070 Int emit_ARMInstr ( /*MB_MOD*/Bool* is_profInc,
3071 UChar* buf, Int nbuf, const ARMInstr* i,
3072 Bool mode64, VexEndness endness_host,
3073 const void* disp_cp_chain_me_to_slowEP,
3074 const void* disp_cp_chain_me_to_fastEP,
3075 const void* disp_cp_xindir,
3076 const void* disp_cp_xassisted )
3078 UInt* p = (UInt*)buf;
3079 vassert(nbuf >= 32);
3080 vassert(mode64 == False);
3081 vassert(0 == (((HWord)buf) & 3));
3083 switch (i->tag) {
3084 case ARMin_Alu: {
3085 UInt instr, subopc;
3086 UInt rD = iregEnc(i->ARMin.Alu.dst);
3087 UInt rN = iregEnc(i->ARMin.Alu.argL);
3088 ARMRI84* argR = i->ARMin.Alu.argR;
3089 switch (i->ARMin.Alu.op) {
3090 case ARMalu_ADDS: /* fallthru */
3091 case ARMalu_ADD: subopc = X0100; break;
3092 case ARMalu_ADC: subopc = X0101; break;
3093 case ARMalu_SUBS: /* fallthru */
3094 case ARMalu_SUB: subopc = X0010; break;
3095 case ARMalu_SBC: subopc = X0110; break;
3096 case ARMalu_AND: subopc = X0000; break;
3097 case ARMalu_BIC: subopc = X1110; break;
3098 case ARMalu_OR: subopc = X1100; break;
3099 case ARMalu_XOR: subopc = X0001; break;
3100 default: goto bad;
3102 instr = skeletal_RI84(argR);
3103 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3104 (subopc << 1) & 0xF, rN, rD);
3105 if (i->ARMin.Alu.op == ARMalu_ADDS
3106 || i->ARMin.Alu.op == ARMalu_SUBS) {
3107 instr |= 1<<20; /* set the S bit */
3109 *p++ = instr;
3110 goto done;
3112 case ARMin_Shift: {
3113 UInt instr, subopc;
3114 UInt rD = iregEnc(i->ARMin.Shift.dst);
3115 UInt rM = iregEnc(i->ARMin.Shift.argL);
3116 ARMRI5* argR = i->ARMin.Shift.argR;
3117 switch (i->ARMin.Shift.op) {
3118 case ARMsh_SHL: subopc = X0000; break;
3119 case ARMsh_SHR: subopc = X0001; break;
3120 case ARMsh_SAR: subopc = X0010; break;
3121 default: goto bad;
3123 instr = skeletal_RI5(argR);
3124 instr |= XXXXX__X(X1110,X0001,X1010,X0000,rD, /* _ _ */ rM);
3125 instr |= (subopc & 3) << 5;
3126 *p++ = instr;
3127 goto done;
3129 case ARMin_Unary: {
3130 UInt instr;
3131 UInt rDst = iregEnc(i->ARMin.Unary.dst);
3132 UInt rSrc = iregEnc(i->ARMin.Unary.src);
3133 switch (i->ARMin.Unary.op) {
3134 case ARMun_CLZ:
3135 instr = XXXXXXXX(X1110,X0001,X0110,X1111,
3136 rDst,X1111,X0001,rSrc);
3137 *p++ = instr;
3138 goto done;
3139 case ARMun_NEG: /* RSB rD,rS,#0 */
3140 instr = XXXXX___(X1110,0x2,0x6,rSrc,rDst);
3141 *p++ = instr;
3142 goto done;
3143 case ARMun_NOT: {
3144 UInt subopc = X1111; /* MVN */
3145 instr = rSrc;
3146 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3147 (subopc << 1) & 0xF, 0, rDst);
3148 *p++ = instr;
3149 goto done;
3151 default:
3152 break;
3154 goto bad;
3156 case ARMin_CmpOrTst: {
3157 UInt instr = skeletal_RI84(i->ARMin.CmpOrTst.argR);
3158 UInt subopc = i->ARMin.CmpOrTst.isCmp ? X1010 : X1000;
3159 UInt SBZ = 0;
3160 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3161 ((subopc << 1) & 0xF) | 1,
3162 iregEnc(i->ARMin.CmpOrTst.argL), SBZ );
3163 *p++ = instr;
3164 goto done;
3166 case ARMin_Mov: {
3167 UInt instr = skeletal_RI84(i->ARMin.Mov.src);
3168 UInt subopc = X1101; /* MOV */
3169 UInt SBZ = 0;
3170 instr |= XXXXX___(X1110, (1 & (subopc >> 3)),
3171 (subopc << 1) & 0xF, SBZ,
3172 iregEnc(i->ARMin.Mov.dst));
3173 *p++ = instr;
3174 goto done;
3176 case ARMin_Imm32: {
3177 p = imm32_to_ireg( (UInt*)p, iregEnc(i->ARMin.Imm32.dst),
3178 i->ARMin.Imm32.imm32 );
3179 goto done;
3181 case ARMin_LdSt32:
3182 case ARMin_LdSt8U: {
3183 UInt bL, bB;
3184 HReg rD;
3185 ARMAMode1* am;
3186 ARMCondCode cc;
3187 if (i->tag == ARMin_LdSt32) {
3188 bB = 0;
3189 bL = i->ARMin.LdSt32.isLoad ? 1 : 0;
3190 am = i->ARMin.LdSt32.amode;
3191 rD = i->ARMin.LdSt32.rD;
3192 cc = i->ARMin.LdSt32.cc;
3193 } else {
3194 bB = 1;
3195 bL = i->ARMin.LdSt8U.isLoad ? 1 : 0;
3196 am = i->ARMin.LdSt8U.amode;
3197 rD = i->ARMin.LdSt8U.rD;
3198 cc = i->ARMin.LdSt8U.cc;
3200 vassert(cc != ARMcc_NV);
3201 if (am->tag == ARMam1_RI) {
3202 Int simm12;
3203 UInt instr, bP;
3204 if (am->ARMam1.RI.simm13 < 0) {
3205 bP = 0;
3206 simm12 = -am->ARMam1.RI.simm13;
3207 } else {
3208 bP = 1;
3209 simm12 = am->ARMam1.RI.simm13;
3211 vassert(simm12 >= 0 && simm12 <= 4095);
3212 instr = XXXXX___(cc,X0101,BITS4(bP,bB,0,bL),
3213 iregEnc(am->ARMam1.RI.reg),
3214 iregEnc(rD));
3215 instr |= simm12;
3216 *p++ = instr;
3217 goto done;
3218 } else {
3219 // RR case
3220 goto bad;
3223 case ARMin_LdSt16: {
3224 HReg rD = i->ARMin.LdSt16.rD;
3225 UInt bS = i->ARMin.LdSt16.signedLoad ? 1 : 0;
3226 UInt bL = i->ARMin.LdSt16.isLoad ? 1 : 0;
3227 ARMAMode2* am = i->ARMin.LdSt16.amode;
3228 ARMCondCode cc = i->ARMin.LdSt16.cc;
3229 vassert(cc != ARMcc_NV);
3230 if (am->tag == ARMam2_RI) {
3231 HReg rN = am->ARMam2.RI.reg;
3232 Int simm8;
3233 UInt bP, imm8hi, imm8lo, instr;
3234 if (am->ARMam2.RI.simm9 < 0) {
3235 bP = 0;
3236 simm8 = -am->ARMam2.RI.simm9;
3237 } else {
3238 bP = 1;
3239 simm8 = am->ARMam2.RI.simm9;
3241 vassert(simm8 >= 0 && simm8 <= 255);
3242 imm8hi = (simm8 >> 4) & 0xF;
3243 imm8lo = simm8 & 0xF;
3244 vassert(!(bL == 0 && bS == 1)); // "! signed store"
3245 /**/ if (bL == 0 && bS == 0) {
3246 // strh
3247 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,0), iregEnc(rN),
3248 iregEnc(rD), imm8hi, X1011, imm8lo);
3249 *p++ = instr;
3250 goto done;
3252 else if (bL == 1 && bS == 0) {
3253 // ldrh
3254 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3255 iregEnc(rD), imm8hi, X1011, imm8lo);
3256 *p++ = instr;
3257 goto done;
3259 else if (bL == 1 && bS == 1) {
3260 // ldrsh
3261 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3262 iregEnc(rD), imm8hi, X1111, imm8lo);
3263 *p++ = instr;
3264 goto done;
3266 else vassert(0); // ill-constructed insn
3267 } else {
3268 // RR case
3269 goto bad;
3272 case ARMin_Ld8S: {
3273 HReg rD = i->ARMin.Ld8S.rD;
3274 ARMAMode2* am = i->ARMin.Ld8S.amode;
3275 ARMCondCode cc = i->ARMin.Ld8S.cc;
3276 vassert(cc != ARMcc_NV);
3277 if (am->tag == ARMam2_RI) {
3278 HReg rN = am->ARMam2.RI.reg;
3279 Int simm8;
3280 UInt bP, imm8hi, imm8lo, instr;
3281 if (am->ARMam2.RI.simm9 < 0) {
3282 bP = 0;
3283 simm8 = -am->ARMam2.RI.simm9;
3284 } else {
3285 bP = 1;
3286 simm8 = am->ARMam2.RI.simm9;
3288 vassert(simm8 >= 0 && simm8 <= 255);
3289 imm8hi = (simm8 >> 4) & 0xF;
3290 imm8lo = simm8 & 0xF;
3291 // ldrsb
3292 instr = XXXXXXXX(cc,X0001, BITS4(bP,1,0,1), iregEnc(rN),
3293 iregEnc(rD), imm8hi, X1101, imm8lo);
3294 *p++ = instr;
3295 goto done;
3296 } else {
3297 // RR case
3298 goto bad;
3302 case ARMin_XDirect: {
3303 /* NB: what goes on here has to be very closely coordinated
3304 with the chainXDirect_ARM and unchainXDirect_ARM below. */
3305 /* We're generating chain-me requests here, so we need to be
3306 sure this is actually allowed -- no-redir translations
3307 can't use chain-me's. Hence: */
3308 vassert(disp_cp_chain_me_to_slowEP != NULL);
3309 vassert(disp_cp_chain_me_to_fastEP != NULL);
3311 /* Use ptmp for backpatching conditional jumps. */
3312 UInt* ptmp = NULL;
3314 /* First off, if this is conditional, create a conditional
3315 jump over the rest of it. Or at least, leave a space for
3316 it that we will shortly fill in. */
3317 if (i->ARMin.XDirect.cond != ARMcc_AL) {
3318 vassert(i->ARMin.XDirect.cond != ARMcc_NV);
3319 ptmp = p;
3320 *p++ = 0;
3323 /* Update the guest R15T. */
3324 /* movw r12, lo16(dstGA) */
3325 /* movt r12, hi16(dstGA) */
3326 /* str r12, amR15T */
3327 p = imm32_to_ireg(p, /*r*/12, i->ARMin.XDirect.dstGA);
3328 p = do_load_or_store32(p, False/*!isLoad*/,
3329 /*r*/12, i->ARMin.XDirect.amR15T);
3331 /* --- FIRST PATCHABLE BYTE follows --- */
3332 /* VG_(disp_cp_chain_me_to_{slowEP,fastEP}) (where we're
3333 calling to) backs up the return address, so as to find the
3334 address of the first patchable byte. So: don't change the
3335 number of instructions (3) below. */
3336 /* movw r12, lo16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3337 /* movt r12, hi16(VG_(disp_cp_chain_me_to_{slowEP,fastEP})) */
3338 /* blx r12 (A1) */
3339 const void* disp_cp_chain_me
3340 = i->ARMin.XDirect.toFastEP ? disp_cp_chain_me_to_fastEP
3341 : disp_cp_chain_me_to_slowEP;
3342 p = imm32_to_ireg_EXACTLY2(p, /*r*/12,
3343 (UInt)(Addr)disp_cp_chain_me);
3344 *p++ = 0xE12FFF3C;
3345 /* --- END of PATCHABLE BYTES --- */
3347 /* Fix up the conditional jump, if there was one. */
3348 if (i->ARMin.XDirect.cond != ARMcc_AL) {
3349 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3350 vassert(delta > 0 && delta < 40);
3351 vassert((delta & 3) == 0);
3352 UInt notCond = 1 ^ (UInt)i->ARMin.XDirect.cond;
3353 vassert(notCond <= 13); /* Neither AL nor NV */
3354 delta = (delta >> 2) - 2;
3355 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3357 goto done;
3360 case ARMin_XIndir: {
3361 /* We're generating transfers that could lead indirectly to a
3362 chain-me, so we need to be sure this is actually allowed
3363 -- no-redir translations are not allowed to reach normal
3364 translations without going through the scheduler. That
3365 means no XDirects or XIndirs out from no-redir
3366 translations. Hence: */
3367 vassert(disp_cp_xindir != NULL);
3369 /* Use ptmp for backpatching conditional jumps. */
3370 UInt* ptmp = NULL;
3372 /* First off, if this is conditional, create a conditional
3373 jump over the rest of it. Or at least, leave a space for
3374 it that we will shortly fill in. */
3375 if (i->ARMin.XIndir.cond != ARMcc_AL) {
3376 vassert(i->ARMin.XIndir.cond != ARMcc_NV);
3377 ptmp = p;
3378 *p++ = 0;
3381 /* Update the guest R15T. */
3382 /* str r-dstGA, amR15T */
3383 p = do_load_or_store32(p, False/*!isLoad*/,
3384 iregEnc(i->ARMin.XIndir.dstGA),
3385 i->ARMin.XIndir.amR15T);
3387 /* movw r12, lo16(VG_(disp_cp_xindir)) */
3388 /* movt r12, hi16(VG_(disp_cp_xindir)) */
3389 /* bx r12 (A1) */
3390 p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xindir);
3391 *p++ = 0xE12FFF1C;
3393 /* Fix up the conditional jump, if there was one. */
3394 if (i->ARMin.XIndir.cond != ARMcc_AL) {
3395 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3396 vassert(delta > 0 && delta < 40);
3397 vassert((delta & 3) == 0);
3398 UInt notCond = 1 ^ (UInt)i->ARMin.XIndir.cond;
3399 vassert(notCond <= 13); /* Neither AL nor NV */
3400 delta = (delta >> 2) - 2;
3401 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3403 goto done;
3406 case ARMin_XAssisted: {
3407 /* Use ptmp for backpatching conditional jumps. */
3408 UInt* ptmp = NULL;
3410 /* First off, if this is conditional, create a conditional
3411 jump over the rest of it. Or at least, leave a space for
3412 it that we will shortly fill in. */
3413 if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3414 vassert(i->ARMin.XAssisted.cond != ARMcc_NV);
3415 ptmp = p;
3416 *p++ = 0;
3419 /* Update the guest R15T. */
3420 /* str r-dstGA, amR15T */
3421 p = do_load_or_store32(p, False/*!isLoad*/,
3422 iregEnc(i->ARMin.XAssisted.dstGA),
3423 i->ARMin.XAssisted.amR15T);
3425 /* movw r8, $magic_number */
3426 UInt trcval = 0;
3427 switch (i->ARMin.XAssisted.jk) {
3428 case Ijk_ClientReq: trcval = VEX_TRC_JMP_CLIENTREQ; break;
3429 case Ijk_Sys_syscall: trcval = VEX_TRC_JMP_SYS_SYSCALL; break;
3430 //case Ijk_Sys_int128: trcval = VEX_TRC_JMP_SYS_INT128; break;
3431 case Ijk_Yield: trcval = VEX_TRC_JMP_YIELD; break;
3432 //case Ijk_EmWarn: trcval = VEX_TRC_JMP_EMWARN; break;
3433 //case Ijk_MapFail: trcval = VEX_TRC_JMP_MAPFAIL; break;
3434 case Ijk_NoDecode: trcval = VEX_TRC_JMP_NODECODE; break;
3435 case Ijk_InvalICache: trcval = VEX_TRC_JMP_INVALICACHE; break;
3436 case Ijk_NoRedir: trcval = VEX_TRC_JMP_NOREDIR; break;
3437 //case Ijk_SigTRAP: trcval = VEX_TRC_JMP_SIGTRAP; break;
3438 //case Ijk_SigSEGV: trcval = VEX_TRC_JMP_SIGSEGV; break;
3439 case Ijk_Boring: trcval = VEX_TRC_JMP_BORING; break;
3440 /* We don't expect to see the following being assisted. */
3441 //case Ijk_Ret:
3442 //case Ijk_Call:
3443 /* fallthrough */
3444 default:
3445 ppIRJumpKind(i->ARMin.XAssisted.jk);
3446 vpanic("emit_ARMInstr.ARMin_XAssisted: unexpected jump kind");
3448 vassert(trcval != 0);
3449 p = imm32_to_ireg(p, /*r*/8, trcval);
3451 /* movw r12, lo16(VG_(disp_cp_xassisted)) */
3452 /* movt r12, hi16(VG_(disp_cp_xassisted)) */
3453 /* bx r12 (A1) */
3454 p = imm32_to_ireg(p, /*r*/12, (UInt)(Addr)disp_cp_xassisted);
3455 *p++ = 0xE12FFF1C;
3457 /* Fix up the conditional jump, if there was one. */
3458 if (i->ARMin.XAssisted.cond != ARMcc_AL) {
3459 Int delta = (UChar*)p - (UChar*)ptmp; /* must be signed */
3460 vassert(delta > 0 && delta < 40);
3461 vassert((delta & 3) == 0);
3462 UInt notCond = 1 ^ (UInt)i->ARMin.XAssisted.cond;
3463 vassert(notCond <= 13); /* Neither AL nor NV */
3464 delta = (delta >> 2) - 2;
3465 *ptmp = XX______(notCond, X1010) | (delta & 0xFFFFFF);
3467 goto done;
3470 case ARMin_CMov: {
3471 UInt instr = skeletal_RI84(i->ARMin.CMov.src);
3472 UInt subopc = X1101; /* MOV */
3473 UInt SBZ = 0;
3474 instr |= XXXXX___(i->ARMin.CMov.cond, (1 & (subopc >> 3)),
3475 (subopc << 1) & 0xF, SBZ,
3476 iregEnc(i->ARMin.CMov.dst));
3477 *p++ = instr;
3478 goto done;
3481 case ARMin_Call: {
3482 UInt instr;
3483 /* Decide on a scratch reg used to hold to the call address.
3484 This has to be done as per the comments in getRegUsage. */
3485 Int scratchNo;
3486 switch (i->ARMin.Call.nArgRegs) {
3487 case 0: scratchNo = 0; break;
3488 case 1: scratchNo = 1; break;
3489 case 2: scratchNo = 2; break;
3490 case 3: scratchNo = 3; break;
3491 case 4: scratchNo = 11; break;
3492 default: vassert(0);
3494 /* If we don't need to do any fixup actions in the case that
3495 the call doesn't happen, just do the simple thing and emit
3496 straight-line code. We hope this is the common case. */
3497 if (i->ARMin.Call.cond == ARMcc_AL/*call always happens*/
3498 || i->ARMin.Call.rloc.pri == RLPri_None/*no fixup action*/) {
3499 // r"scratchNo" = &target
3500 p = imm32_to_ireg( (UInt*)p,
3501 scratchNo, (UInt)i->ARMin.Call.target );
3502 // blx{cond} r"scratchNo"
3503 instr = XXX___XX(i->ARMin.Call.cond, X0001, X0010, /*___*/
3504 X0011, scratchNo);
3505 instr |= 0xFFF << 8; // stick in the SBOnes
3506 *p++ = instr;
3507 } else {
3508 Int delta;
3509 /* Complex case. We have to generate an if-then-else
3510 diamond. */
3511 // before:
3512 // b{!cond} else:
3513 // r"scratchNo" = &target
3514 // blx{AL} r"scratchNo"
3515 // preElse:
3516 // b after:
3517 // else:
3518 // mov r0, #0x55555555 // possibly
3519 // mov r1, r0 // possibly
3520 // after:
3522 // before:
3523 UInt* pBefore = p;
3525 // b{!cond} else: // ptmp1 points here
3526 *p++ = 0; // filled in later
3528 // r"scratchNo" = &target
3529 p = imm32_to_ireg( (UInt*)p,
3530 scratchNo, (UInt)i->ARMin.Call.target );
3532 // blx{AL} r"scratchNo"
3533 instr = XXX___XX(ARMcc_AL, X0001, X0010, /*___*/
3534 X0011, scratchNo);
3535 instr |= 0xFFF << 8; // stick in the SBOnes
3536 *p++ = instr;
3538 // preElse:
3539 UInt* pPreElse = p;
3541 // b after:
3542 *p++ = 0; // filled in later
3544 // else:
3545 delta = (UChar*)p - (UChar*)pBefore;
3546 delta = (delta >> 2) - 2;
3547 *pBefore
3548 = XX______(1 ^ i->ARMin.Call.cond, X1010) | (delta & 0xFFFFFF);
3550 /* Do the 'else' actions */
3551 switch (i->ARMin.Call.rloc.pri) {
3552 case RLPri_Int:
3553 p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3554 break;
3555 case RLPri_2Int:
3556 vassert(0); //ATC
3557 p = imm32_to_ireg_EXACTLY2(p, /*r*/0, 0x55555555);
3558 /* mov r1, r0 */
3559 *p++ = 0xE1A01000;
3560 break;
3561 case RLPri_None: case RLPri_INVALID: default:
3562 vassert(0);
3565 // after:
3566 delta = (UChar*)p - (UChar*)pPreElse;
3567 delta = (delta >> 2) - 2;
3568 *pPreElse = XX______(ARMcc_AL, X1010) | (delta & 0xFFFFFF);
3571 goto done;
3574 case ARMin_Mul: {
3575 /* E0000392 mul r0, r2, r3
3576 E0810392 umull r0(LO), r1(HI), r2, r3
3577 E0C10392 smull r0(LO), r1(HI), r2, r3
3579 switch (i->ARMin.Mul.op) {
3580 case ARMmul_PLAIN: *p++ = 0xE0000392; goto done;
3581 case ARMmul_ZX: *p++ = 0xE0810392; goto done;
3582 case ARMmul_SX: *p++ = 0xE0C10392; goto done;
3583 default: vassert(0);
3585 goto bad;
3587 case ARMin_LdrEX: {
3588 /* E1D42F9F ldrexb r2, [r4]
3589 E1F42F9F ldrexh r2, [r4]
3590 E1942F9F ldrex r2, [r4]
3591 E1B42F9F ldrexd r2, r3, [r4]
3593 switch (i->ARMin.LdrEX.szB) {
3594 case 1: *p++ = 0xE1D42F9F; goto done;
3595 case 2: *p++ = 0xE1F42F9F; goto done;
3596 case 4: *p++ = 0xE1942F9F; goto done;
3597 case 8: *p++ = 0xE1B42F9F; goto done;
3598 default: break;
3600 goto bad;
3602 case ARMin_StrEX: {
3603 /* E1C40F92 strexb r0, r2, [r4]
3604 E1E40F92 strexh r0, r2, [r4]
3605 E1840F92 strex r0, r2, [r4]
3606 E1A40F92 strexd r0, r2, r3, [r4]
3608 switch (i->ARMin.StrEX.szB) {
3609 case 1: *p++ = 0xE1C40F92; goto done;
3610 case 2: *p++ = 0xE1E40F92; goto done;
3611 case 4: *p++ = 0xE1840F92; goto done;
3612 case 8: *p++ = 0xE1A40F92; goto done;
3613 default: break;
3615 goto bad;
3617 case ARMin_VLdStD: {
3618 UInt dD = dregEnc(i->ARMin.VLdStD.dD);
3619 UInt rN = iregEnc(i->ARMin.VLdStD.amode->reg);
3620 Int simm11 = i->ARMin.VLdStD.amode->simm11;
3621 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3622 UInt bU = simm11 >= 0 ? 1 : 0;
3623 UInt bL = i->ARMin.VLdStD.isLoad ? 1 : 0;
3624 UInt insn;
3625 vassert(0 == (off8 & 3));
3626 off8 >>= 2;
3627 vassert(0 == (off8 & 0xFFFFFF00));
3628 insn = XXXXXX__(0xE,X1101,BITS4(bU,0,0,bL),rN,dD,X1011);
3629 insn |= off8;
3630 *p++ = insn;
3631 goto done;
3633 case ARMin_VLdStS: {
3634 UInt fD = fregEnc(i->ARMin.VLdStS.fD);
3635 UInt rN = iregEnc(i->ARMin.VLdStS.amode->reg);
3636 Int simm11 = i->ARMin.VLdStS.amode->simm11;
3637 UInt off8 = simm11 >= 0 ? simm11 : ((UInt)(-simm11));
3638 UInt bU = simm11 >= 0 ? 1 : 0;
3639 UInt bL = i->ARMin.VLdStS.isLoad ? 1 : 0;
3640 UInt bD = fD & 1;
3641 UInt insn;
3642 vassert(0 == (off8 & 3));
3643 off8 >>= 2;
3644 vassert(0 == (off8 & 0xFFFFFF00));
3645 insn = XXXXXX__(0xE,X1101,BITS4(bU,bD,0,bL),rN, (fD >> 1), X1010);
3646 insn |= off8;
3647 *p++ = insn;
3648 goto done;
3650 case ARMin_VAluD: {
3651 UInt dN = dregEnc(i->ARMin.VAluD.argL);
3652 UInt dD = dregEnc(i->ARMin.VAluD.dst);
3653 UInt dM = dregEnc(i->ARMin.VAluD.argR);
3654 UInt pqrs = X1111; /* undefined */
3655 switch (i->ARMin.VAluD.op) {
3656 case ARMvfp_ADD: pqrs = X0110; break;
3657 case ARMvfp_SUB: pqrs = X0111; break;
3658 case ARMvfp_MUL: pqrs = X0100; break;
3659 case ARMvfp_DIV: pqrs = X1000; break;
3660 default: goto bad;
3662 vassert(pqrs != X1111);
3663 UInt bP = (pqrs >> 3) & 1;
3664 UInt bQ = (pqrs >> 2) & 1;
3665 UInt bR = (pqrs >> 1) & 1;
3666 UInt bS = (pqrs >> 0) & 1;
3667 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,0,bQ,bR), dN, dD,
3668 X1011, BITS4(0,bS,0,0), dM);
3669 *p++ = insn;
3670 goto done;
3672 case ARMin_VAluS: {
3673 UInt dN = fregEnc(i->ARMin.VAluS.argL);
3674 UInt dD = fregEnc(i->ARMin.VAluS.dst);
3675 UInt dM = fregEnc(i->ARMin.VAluS.argR);
3676 UInt bN = dN & 1;
3677 UInt bD = dD & 1;
3678 UInt bM = dM & 1;
3679 UInt pqrs = X1111; /* undefined */
3680 switch (i->ARMin.VAluS.op) {
3681 case ARMvfp_ADD: pqrs = X0110; break;
3682 case ARMvfp_SUB: pqrs = X0111; break;
3683 case ARMvfp_MUL: pqrs = X0100; break;
3684 case ARMvfp_DIV: pqrs = X1000; break;
3685 default: goto bad;
3687 vassert(pqrs != X1111);
3688 UInt bP = (pqrs >> 3) & 1;
3689 UInt bQ = (pqrs >> 2) & 1;
3690 UInt bR = (pqrs >> 1) & 1;
3691 UInt bS = (pqrs >> 0) & 1;
3692 UInt insn = XXXXXXXX(0xE, X1110, BITS4(bP,bD,bQ,bR),
3693 (dN >> 1), (dD >> 1),
3694 X1010, BITS4(bN,bS,bM,0), (dM >> 1));
3695 *p++ = insn;
3696 goto done;
3698 case ARMin_VUnaryD: {
3699 UInt dD = dregEnc(i->ARMin.VUnaryD.dst);
3700 UInt dM = dregEnc(i->ARMin.VUnaryD.src);
3701 UInt insn = 0;
3702 switch (i->ARMin.VUnaryD.op) {
3703 case ARMvfpu_COPY:
3704 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X0100,dM);
3705 break;
3706 case ARMvfpu_ABS:
3707 insn = XXXXXXXX(0xE, X1110,X1011,X0000,dD,X1011,X1100,dM);
3708 break;
3709 case ARMvfpu_NEG:
3710 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X0100,dM);
3711 break;
3712 case ARMvfpu_SQRT:
3713 insn = XXXXXXXX(0xE, X1110,X1011,X0001,dD,X1011,X1100,dM);
3714 break;
3715 default:
3716 goto bad;
3718 *p++ = insn;
3719 goto done;
3721 case ARMin_VUnaryS: {
3722 UInt fD = fregEnc(i->ARMin.VUnaryS.dst);
3723 UInt fM = fregEnc(i->ARMin.VUnaryS.src);
3724 UInt insn = 0;
3725 switch (i->ARMin.VUnaryS.op) {
3726 case ARMvfpu_COPY:
3727 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3728 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3729 (fM >> 1));
3730 break;
3731 case ARMvfpu_ABS:
3732 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0000,
3733 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3734 (fM >> 1));
3735 break;
3736 case ARMvfpu_NEG:
3737 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3738 (fD >> 1), X1010, BITS4(0,1,(fM & 1),0),
3739 (fM >> 1));
3740 break;
3741 case ARMvfpu_SQRT:
3742 insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1), X0001,
3743 (fD >> 1), X1010, BITS4(1,1,(fM & 1),0),
3744 (fM >> 1));
3745 break;
3746 default:
3747 goto bad;
3749 *p++ = insn;
3750 goto done;
3752 case ARMin_VCmpD: {
3753 UInt dD = dregEnc(i->ARMin.VCmpD.argL);
3754 UInt dM = dregEnc(i->ARMin.VCmpD.argR);
3755 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0100, dD, X1011, X0100, dM);
3756 *p++ = insn; /* FCMPD dD, dM */
3757 *p++ = 0xEEF1FA10; /* FMSTAT */
3758 goto done;
3760 case ARMin_VCMovD: {
3761 UInt cc = (UInt)i->ARMin.VCMovD.cond;
3762 UInt dD = dregEnc(i->ARMin.VCMovD.dst);
3763 UInt dM = dregEnc(i->ARMin.VCMovD.src);
3764 vassert(cc < 16 && cc != ARMcc_AL);
3765 UInt insn = XXXXXXXX(cc, X1110,X1011,X0000,dD,X1011,X0100,dM);
3766 *p++ = insn;
3767 goto done;
3769 case ARMin_VCMovS: {
3770 UInt cc = (UInt)i->ARMin.VCMovS.cond;
3771 UInt fD = fregEnc(i->ARMin.VCMovS.dst);
3772 UInt fM = fregEnc(i->ARMin.VCMovS.src);
3773 vassert(cc < 16 && cc != ARMcc_AL);
3774 UInt insn = XXXXXXXX(cc, X1110, BITS4(1,(fD & 1),1,1),
3775 X0000,(fD >> 1),X1010,
3776 BITS4(0,1,(fM & 1),0), (fM >> 1));
3777 *p++ = insn;
3778 goto done;
3780 case ARMin_VCvtSD: {
3781 if (i->ARMin.VCvtSD.sToD) {
3782 UInt dD = dregEnc(i->ARMin.VCvtSD.dst);
3783 UInt fM = fregEnc(i->ARMin.VCvtSD.src);
3784 UInt insn = XXXXXXXX(0xE, X1110, X1011, X0111, dD, X1010,
3785 BITS4(1,1, (fM & 1), 0),
3786 (fM >> 1));
3787 *p++ = insn;
3788 goto done;
3789 } else {
3790 UInt fD = fregEnc(i->ARMin.VCvtSD.dst);
3791 UInt dM = dregEnc(i->ARMin.VCvtSD.src);
3792 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(fD & 1),1,1),
3793 X0111, (fD >> 1),
3794 X1011, X1100, dM);
3795 *p++ = insn;
3796 goto done;
3799 case ARMin_VXferQ: {
3800 UInt insn;
3801 UInt qD = qregEnc(i->ARMin.VXferQ.qD);
3802 UInt dHi = dregEnc(i->ARMin.VXferQ.dHi);
3803 UInt dLo = dregEnc(i->ARMin.VXferQ.dLo);
3804 /* This is a bit tricky. We need to make 2 D-D moves and we rely
3805 on the fact that the Q register can be treated as two D registers.
3806 We also rely on the fact that the register allocator will allocate
3807 the two D's and the Q to disjoint parts of the register file,
3808 and so we don't have to worry about the first move's destination
3809 being the same as the second move's source, etc. We do have
3810 assertions though. */
3811 /* The ARM ARM specifies that
3812 D<2n> maps to the least significant half of Q<n>
3813 D<2n+1> maps to the most significant half of Q<n>
3814 So there are no issues with endianness here.
3816 UInt qDlo = 2 * qD + 0;
3817 UInt qDhi = 2 * qD + 1;
3818 /* Stay sane .. */
3819 vassert(qDhi != dHi && qDhi != dLo);
3820 vassert(qDlo != dHi && qDlo != dLo);
3821 /* vmov dX, dY is
3822 F 2 (0,dX[4],1,0) dY[3:0] dX[3:0] 1 (dY[4],0,dY[4],1) dY[3:0]
3824 # define VMOV_D_D(_xx,_yy) \
3825 XXXXXXXX( 0xF, 0x2, BITS4(0, (((_xx) >> 4) & 1), 1, 0), \
3826 ((_yy) & 0xF), ((_xx) & 0xF), 0x1, \
3827 BITS4( (((_yy) >> 4) & 1), 0, (((_yy) >> 4) & 1), 1), \
3828 ((_yy) & 0xF) )
3829 if (i->ARMin.VXferQ.toQ) {
3830 insn = VMOV_D_D(qDlo, dLo); *p++ = insn;
3831 insn = VMOV_D_D(qDhi, dHi); *p++ = insn;
3832 } else {
3833 insn = VMOV_D_D(dLo, qDlo); *p++ = insn;
3834 insn = VMOV_D_D(dHi, qDhi); *p++ = insn;
3836 # undef VMOV_D_D
3837 goto done;
3839 case ARMin_VXferD: {
3840 UInt dD = dregEnc(i->ARMin.VXferD.dD);
3841 UInt rHi = iregEnc(i->ARMin.VXferD.rHi);
3842 UInt rLo = iregEnc(i->ARMin.VXferD.rLo);
3843 /* vmov dD, rLo, rHi is
3844 E C 4 rHi rLo B (0,0,dD[4],1) dD[3:0]
3845 vmov rLo, rHi, dD is
3846 E C 5 rHi rLo B (0,0,dD[4],1) dD[3:0]
3848 UInt insn
3849 = XXXXXXXX(0xE, 0xC, i->ARMin.VXferD.toD ? 4 : 5,
3850 rHi, rLo, 0xB,
3851 BITS4(0,0, ((dD >> 4) & 1), 1), (dD & 0xF));
3852 *p++ = insn;
3853 goto done;
3855 case ARMin_VXferS: {
3856 UInt fD = fregEnc(i->ARMin.VXferS.fD);
3857 UInt rLo = iregEnc(i->ARMin.VXferS.rLo);
3858 /* vmov fD, rLo is
3859 E E 0 fD[4:1] rLo A (fD[0],0,0,1) 0
3860 vmov rLo, fD is
3861 E E 1 fD[4:1] rLo A (fD[0],0,0,1) 0
3863 UInt insn
3864 = XXXXXXXX(0xE, 0xE, i->ARMin.VXferS.toS ? 0 : 1,
3865 (fD >> 1) & 0xF, rLo, 0xA,
3866 BITS4((fD & 1),0,0,1), 0);
3867 *p++ = insn;
3868 goto done;
3870 case ARMin_VCvtID: {
3871 Bool iToD = i->ARMin.VCvtID.iToD;
3872 Bool syned = i->ARMin.VCvtID.syned;
3873 if (iToD && syned) {
3874 // FSITOD: I32S-in-freg to F64-in-dreg
3875 UInt regF = fregEnc(i->ARMin.VCvtID.src);
3876 UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3877 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3878 X1011, BITS4(1,1,(regF & 1),0),
3879 (regF >> 1) & 0xF);
3880 *p++ = insn;
3881 goto done;
3883 if (iToD && (!syned)) {
3884 // FUITOD: I32U-in-freg to F64-in-dreg
3885 UInt regF = fregEnc(i->ARMin.VCvtID.src);
3886 UInt regD = dregEnc(i->ARMin.VCvtID.dst);
3887 UInt insn = XXXXXXXX(0xE, X1110, X1011, X1000, regD,
3888 X1011, BITS4(0,1,(regF & 1),0),
3889 (regF >> 1) & 0xF);
3890 *p++ = insn;
3891 goto done;
3893 if ((!iToD) && syned) {
3894 // FTOSID: F64-in-dreg to I32S-in-freg
3895 UInt regD = dregEnc(i->ARMin.VCvtID.src);
3896 UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3897 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3898 X1101, (regF >> 1) & 0xF,
3899 X1011, X0100, regD);
3900 *p++ = insn;
3901 goto done;
3903 if ((!iToD) && (!syned)) {
3904 // FTOUID: F64-in-dreg to I32U-in-freg
3905 UInt regD = dregEnc(i->ARMin.VCvtID.src);
3906 UInt regF = fregEnc(i->ARMin.VCvtID.dst);
3907 UInt insn = XXXXXXXX(0xE, X1110, BITS4(1,(regF & 1),1,1),
3908 X1100, (regF >> 1) & 0xF,
3909 X1011, X0100, regD);
3910 *p++ = insn;
3911 goto done;
3913 /*UNREACHED*/
3914 vassert(0);
3916 case ARMin_VRIntR: { /* NB: ARM v8 and above only */
3917 Bool isF64 = i->ARMin.VRIntR.isF64;
3918 UInt rDst = (isF64 ? dregEnc : fregEnc)(i->ARMin.VRIntR.dst);
3919 UInt rSrc = (isF64 ? dregEnc : fregEnc)(i->ARMin.VRIntR.src);
3920 /* The encoding of registers here differs strangely for the
3921 F32 and F64 cases. */
3922 UInt D, Vd, M, Vm;
3923 if (isF64) {
3924 D = (rDst >> 4) & 1;
3925 Vd = rDst & 0xF;
3926 M = (rSrc >> 4) & 1;
3927 Vm = rSrc & 0xF;
3928 } else {
3929 Vd = (rDst >> 1) & 0xF;
3930 D = rDst & 1;
3931 Vm = (rSrc >> 1) & 0xF;
3932 M = rSrc & 1;
3934 vassert(D <= 1 && Vd <= 15 && M <= 1 && Vm <= 15);
3935 *p++ = XXXXXXXX(0xE, X1110, X1011 | (D << 2), X0110, Vd,
3936 isF64 ? X1011 : X1010, X0100 | (M << 1), Vm);
3937 goto done;
3939 case ARMin_VMinMaxNum: {
3940 Bool isF64 = i->ARMin.VMinMaxNum.isF64;
3941 Bool isMax = i->ARMin.VMinMaxNum.isMax;
3942 UInt rDst = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.dst);
3943 UInt rSrcL = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.srcL);
3944 UInt rSrcR = (isF64 ? dregEnc : fregEnc)(i->ARMin.VMinMaxNum.srcR);
3945 /* The encoding of registers here differs strangely for the
3946 F32 and F64 cases. */
3947 UInt D, Vd, N, Vn, M, Vm;
3948 if (isF64) {
3949 D = (rDst >> 4) & 1;
3950 Vd = rDst & 0xF;
3951 N = (rSrcL >> 4) & 1;
3952 Vn = rSrcL & 0xF;
3953 M = (rSrcR >> 4) & 1;
3954 Vm = rSrcR & 0xF;
3955 } else {
3956 Vd = (rDst >> 1) & 0xF;
3957 D = rDst & 1;
3958 Vn = (rSrcL >> 1) & 0xF;
3959 N = rSrcL & 1;
3960 Vm = (rSrcR >> 1) & 0xF;
3961 M = rSrcR & 1;
3963 vassert(D <= 1 && Vd <= 15 && M <= 1 && Vm <= 15 && N <= 1
3964 && Vn <= 15);
3965 *p++ = XXXXXXXX(X1111,X1110, X1000 | (D << 2), Vn, Vd,
3966 X1010 | (isF64 ? 1 : 0),
3967 (N << 3) | ((isMax ? 0 : 1) << 2) | (M << 1) | 0,
3968 Vm);
3969 goto done;
3971 case ARMin_FPSCR: {
3972 Bool toFPSCR = i->ARMin.FPSCR.toFPSCR;
3973 UInt iReg = iregEnc(i->ARMin.FPSCR.iReg);
3974 if (toFPSCR) {
3975 /* fmxr fpscr, iReg is EEE1 iReg A10 */
3976 *p++ = 0xEEE10A10 | ((iReg & 0xF) << 12);
3977 goto done;
3979 goto bad; // FPSCR -> iReg case currently ATC
3981 case ARMin_MFence: {
3982 // It's not clear (to me) how these relate to the ARMv7
3983 // versions, so let's just use the v7 versions as they
3984 // are at least well documented.
3985 //*p++ = 0xEE070F9A; /* mcr 15,0,r0,c7,c10,4 (DSB) */
3986 //*p++ = 0xEE070FBA; /* mcr 15,0,r0,c7,c10,5 (DMB) */
3987 //*p++ = 0xEE070F95; /* mcr 15,0,r0,c7,c5,4 (ISB) */
3988 *p++ = 0xF57FF04F; /* DSB sy */
3989 *p++ = 0xF57FF05F; /* DMB sy */
3990 *p++ = 0xF57FF06F; /* ISB */
3991 goto done;
3993 case ARMin_CLREX: {
3994 *p++ = 0xF57FF01F; /* clrex */
3995 goto done;
3998 case ARMin_NLdStQ: {
3999 UInt regD = qregEnc(i->ARMin.NLdStQ.dQ) << 1;
4000 UInt regN, regM;
4001 UInt D = regD >> 4;
4002 UInt bL = i->ARMin.NLdStQ.isLoad ? 1 : 0;
4003 UInt insn;
4004 vassert(hregClass(i->ARMin.NLdStQ.dQ) == HRcVec128);
4005 regD &= 0xF;
4006 if (i->ARMin.NLdStQ.amode->tag == ARMamN_RR) {
4007 regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rN);
4008 regM = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.RR.rM);
4009 } else {
4010 regN = iregEnc(i->ARMin.NLdStQ.amode->ARMamN.R.rN);
4011 regM = 15;
4013 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
4014 regN, regD, X1010, X1000, regM);
4015 *p++ = insn;
4016 goto done;
4018 case ARMin_NLdStD: {
4019 UInt regD = dregEnc(i->ARMin.NLdStD.dD);
4020 UInt regN, regM;
4021 UInt D = regD >> 4;
4022 UInt bL = i->ARMin.NLdStD.isLoad ? 1 : 0;
4023 UInt insn;
4024 vassert(hregClass(i->ARMin.NLdStD.dD) == HRcFlt64);
4025 regD &= 0xF;
4026 if (i->ARMin.NLdStD.amode->tag == ARMamN_RR) {
4027 regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rN);
4028 regM = iregEnc(i->ARMin.NLdStD.amode->ARMamN.RR.rM);
4029 } else {
4030 regN = iregEnc(i->ARMin.NLdStD.amode->ARMamN.R.rN);
4031 regM = 15;
4033 insn = XXXXXXXX(0xF, X0100, BITS4(0, D, bL, 0),
4034 regN, regD, X0111, X1000, regM);
4035 *p++ = insn;
4036 goto done;
4038 case ARMin_NUnaryS: {
4039 UInt Q = i->ARMin.NUnaryS.Q ? 1 : 0;
4040 UInt regD, D;
4041 UInt regM, M;
4042 UInt size = i->ARMin.NUnaryS.size;
4043 UInt insn;
4044 UInt opc, opc1, opc2;
4045 switch (i->ARMin.NUnaryS.op) {
4046 case ARMneon_VDUP:
4047 if (i->ARMin.NUnaryS.size >= 16)
4048 goto bad;
4049 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Reg)
4050 goto bad;
4051 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4052 goto bad;
4053 regD = (hregClass(i->ARMin.NUnaryS.dst->reg) == HRcVec128)
4054 ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1)
4055 : dregEnc(i->ARMin.NUnaryS.dst->reg);
4056 regM = (hregClass(i->ARMin.NUnaryS.src->reg) == HRcVec128)
4057 ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1)
4058 : dregEnc(i->ARMin.NUnaryS.src->reg);
4059 D = regD >> 4;
4060 M = regM >> 4;
4061 regD &= 0xf;
4062 regM &= 0xf;
4063 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1),
4064 (i->ARMin.NUnaryS.size & 0xf), regD,
4065 X1100, BITS4(0,Q,M,0), regM);
4066 *p++ = insn;
4067 goto done;
4068 case ARMneon_SETELEM:
4069 regD = Q ? (qregEnc(i->ARMin.NUnaryS.dst->reg) << 1) :
4070 dregEnc(i->ARMin.NUnaryS.dst->reg);
4071 regM = iregEnc(i->ARMin.NUnaryS.src->reg);
4072 M = regM >> 4;
4073 D = regD >> 4;
4074 regM &= 0xF;
4075 regD &= 0xF;
4076 if (i->ARMin.NUnaryS.dst->tag != ARMNRS_Scalar)
4077 goto bad;
4078 switch (size) {
4079 case 0:
4080 if (i->ARMin.NUnaryS.dst->index > 7)
4081 goto bad;
4082 opc = X1000 | i->ARMin.NUnaryS.dst->index;
4083 break;
4084 case 1:
4085 if (i->ARMin.NUnaryS.dst->index > 3)
4086 goto bad;
4087 opc = X0001 | (i->ARMin.NUnaryS.dst->index << 1);
4088 break;
4089 case 2:
4090 if (i->ARMin.NUnaryS.dst->index > 1)
4091 goto bad;
4092 opc = X0000 | (i->ARMin.NUnaryS.dst->index << 2);
4093 break;
4094 default:
4095 goto bad;
4097 opc1 = (opc >> 2) & 3;
4098 opc2 = opc & 3;
4099 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),0),
4100 regD, regM, X1011,
4101 BITS4(D,(opc2 >> 1),(opc2 & 1),1), X0000);
4102 *p++ = insn;
4103 goto done;
4104 case ARMneon_GETELEMU:
4105 regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
4106 dregEnc(i->ARMin.NUnaryS.src->reg);
4107 regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
4108 M = regM >> 4;
4109 D = regD >> 4;
4110 regM &= 0xF;
4111 regD &= 0xF;
4112 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4113 goto bad;
4114 switch (size) {
4115 case 0:
4116 if (Q && i->ARMin.NUnaryS.src->index > 7) {
4117 regM++;
4118 i->ARMin.NUnaryS.src->index -= 8;
4120 if (i->ARMin.NUnaryS.src->index > 7)
4121 goto bad;
4122 opc = X1000 | i->ARMin.NUnaryS.src->index;
4123 break;
4124 case 1:
4125 if (Q && i->ARMin.NUnaryS.src->index > 3) {
4126 regM++;
4127 i->ARMin.NUnaryS.src->index -= 4;
4129 if (i->ARMin.NUnaryS.src->index > 3)
4130 goto bad;
4131 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
4132 break;
4133 case 2:
4134 goto bad;
4135 default:
4136 goto bad;
4138 opc1 = (opc >> 2) & 3;
4139 opc2 = opc & 3;
4140 insn = XXXXXXXX(0xE, X1110, BITS4(1,(opc1 >> 1),(opc1 & 1),1),
4141 regM, regD, X1011,
4142 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
4143 *p++ = insn;
4144 goto done;
4145 case ARMneon_GETELEMS:
4146 regM = Q ? (qregEnc(i->ARMin.NUnaryS.src->reg) << 1) :
4147 dregEnc(i->ARMin.NUnaryS.src->reg);
4148 regD = iregEnc(i->ARMin.NUnaryS.dst->reg);
4149 M = regM >> 4;
4150 D = regD >> 4;
4151 regM &= 0xF;
4152 regD &= 0xF;
4153 if (i->ARMin.NUnaryS.src->tag != ARMNRS_Scalar)
4154 goto bad;
4155 switch (size) {
4156 case 0:
4157 if (Q && i->ARMin.NUnaryS.src->index > 7) {
4158 regM++;
4159 i->ARMin.NUnaryS.src->index -= 8;
4161 if (i->ARMin.NUnaryS.src->index > 7)
4162 goto bad;
4163 opc = X1000 | i->ARMin.NUnaryS.src->index;
4164 break;
4165 case 1:
4166 if (Q && i->ARMin.NUnaryS.src->index > 3) {
4167 regM++;
4168 i->ARMin.NUnaryS.src->index -= 4;
4170 if (i->ARMin.NUnaryS.src->index > 3)
4171 goto bad;
4172 opc = X0001 | (i->ARMin.NUnaryS.src->index << 1);
4173 break;
4174 case 2:
4175 if (Q && i->ARMin.NUnaryS.src->index > 1) {
4176 regM++;
4177 i->ARMin.NUnaryS.src->index -= 2;
4179 if (i->ARMin.NUnaryS.src->index > 1)
4180 goto bad;
4181 opc = X0000 | (i->ARMin.NUnaryS.src->index << 2);
4182 break;
4183 default:
4184 goto bad;
4186 opc1 = (opc >> 2) & 3;
4187 opc2 = opc & 3;
4188 insn = XXXXXXXX(0xE, X1110, BITS4(0,(opc1 >> 1),(opc1 & 1),1),
4189 regM, regD, X1011,
4190 BITS4(M,(opc2 >> 1),(opc2 & 1),1), X0000);
4191 *p++ = insn;
4192 goto done;
4193 default:
4194 goto bad;
4197 case ARMin_NUnary: {
4198 UInt Q = i->ARMin.NUnary.Q ? 1 : 0;
4199 UInt regD = (hregClass(i->ARMin.NUnary.dst) == HRcVec128)
4200 ? (qregEnc(i->ARMin.NUnary.dst) << 1)
4201 : dregEnc(i->ARMin.NUnary.dst);
4202 UInt regM, M;
4203 UInt D = regD >> 4;
4204 UInt sz1 = i->ARMin.NUnary.size >> 1;
4205 UInt sz2 = i->ARMin.NUnary.size & 1;
4206 UInt sz = i->ARMin.NUnary.size;
4207 UInt insn;
4208 UInt F = 0; /* TODO: floating point EQZ ??? */
4209 if (i->ARMin.NUnary.op != ARMneon_DUP) {
4210 regM = (hregClass(i->ARMin.NUnary.src) == HRcVec128)
4211 ? (qregEnc(i->ARMin.NUnary.src) << 1)
4212 : dregEnc(i->ARMin.NUnary.src);
4213 M = regM >> 4;
4214 } else {
4215 regM = iregEnc(i->ARMin.NUnary.src);
4216 M = regM >> 4;
4218 regD &= 0xF;
4219 regM &= 0xF;
4220 switch (i->ARMin.NUnary.op) {
4221 case ARMneon_COPY: /* VMOV reg, reg */
4222 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regM, regD, X0001,
4223 BITS4(M,Q,M,1), regM);
4224 break;
4225 case ARMneon_COPYN: /* VMOVN regD, regQ */
4226 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4227 regD, X0010, BITS4(0,0,M,0), regM);
4228 break;
4229 case ARMneon_COPYQNSS: /* VQMOVN regD, regQ */
4230 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4231 regD, X0010, BITS4(1,0,M,0), regM);
4232 break;
4233 case ARMneon_COPYQNUS: /* VQMOVUN regD, regQ */
4234 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4235 regD, X0010, BITS4(0,1,M,0), regM);
4236 break;
4237 case ARMneon_COPYQNUU: /* VQMOVN regD, regQ */
4238 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4239 regD, X0010, BITS4(1,1,M,0), regM);
4240 break;
4241 case ARMneon_COPYLS: /* VMOVL regQ, regD */
4242 if (sz >= 3)
4243 goto bad;
4244 insn = XXXXXXXX(0xF, X0010,
4245 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4246 BITS4((sz == 0) ? 1 : 0,0,0,0),
4247 regD, X1010, BITS4(0,0,M,1), regM);
4248 break;
4249 case ARMneon_COPYLU: /* VMOVL regQ, regD */
4250 if (sz >= 3)
4251 goto bad;
4252 insn = XXXXXXXX(0xF, X0011,
4253 BITS4(1,D,(sz == 2) ? 1 : 0,(sz == 1) ? 1 : 0),
4254 BITS4((sz == 0) ? 1 : 0,0,0,0),
4255 regD, X1010, BITS4(0,0,M,1), regM);
4256 break;
4257 case ARMneon_NOT: /* VMVN reg, reg*/
4258 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4259 BITS4(1,Q,M,0), regM);
4260 break;
4261 case ARMneon_EQZ:
4262 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4263 regD, BITS4(0,F,0,1), BITS4(0,Q,M,0), regM);
4264 break;
4265 case ARMneon_CNT:
4266 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0000, regD, X0101,
4267 BITS4(0,Q,M,0), regM);
4268 break;
4269 case ARMneon_CLZ:
4270 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4271 regD, X0100, BITS4(1,Q,M,0), regM);
4272 break;
4273 case ARMneon_CLS:
4274 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4275 regD, X0100, BITS4(0,Q,M,0), regM);
4276 break;
4277 case ARMneon_ABS:
4278 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,1),
4279 regD, X0011, BITS4(0,Q,M,0), regM);
4280 break;
4281 case ARMneon_DUP:
4282 sz1 = i->ARMin.NUnary.size == 0 ? 1 : 0;
4283 sz2 = i->ARMin.NUnary.size == 1 ? 1 : 0;
4284 vassert(sz1 + sz2 < 2);
4285 insn = XXXXXXXX(0xE, X1110, BITS4(1, sz1, Q, 0), regD, regM,
4286 X1011, BITS4(D,0,sz2,1), X0000);
4287 break;
4288 case ARMneon_REV16:
4289 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4290 regD, BITS4(0,0,0,1), BITS4(0,Q,M,0), regM);
4291 break;
4292 case ARMneon_REV32:
4293 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4294 regD, BITS4(0,0,0,0), BITS4(1,Q,M,0), regM);
4295 break;
4296 case ARMneon_REV64:
4297 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4298 regD, BITS4(0,0,0,0), BITS4(0,Q,M,0), regM);
4299 break;
4300 case ARMneon_PADDLU:
4301 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4302 regD, X0010, BITS4(1,Q,M,0), regM);
4303 break;
4304 case ARMneon_PADDLS:
4305 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,0,0),
4306 regD, X0010, BITS4(0,Q,M,0), regM);
4307 break;
4308 case ARMneon_VQSHLNUU:
4309 insn = XXXXXXXX(0xF, X0011,
4310 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4311 sz & 0xf, regD, X0111,
4312 BITS4(sz >> 6,Q,M,1), regM);
4313 break;
4314 case ARMneon_VQSHLNSS:
4315 insn = XXXXXXXX(0xF, X0010,
4316 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4317 sz & 0xf, regD, X0111,
4318 BITS4(sz >> 6,Q,M,1), regM);
4319 break;
4320 case ARMneon_VQSHLNUS:
4321 insn = XXXXXXXX(0xF, X0011,
4322 (1 << 3) | (D << 2) | ((sz >> 4) & 3),
4323 sz & 0xf, regD, X0110,
4324 BITS4(sz >> 6,Q,M,1), regM);
4325 break;
4326 case ARMneon_VCVTFtoS:
4327 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4328 BITS4(0,Q,M,0), regM);
4329 break;
4330 case ARMneon_VCVTFtoU:
4331 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0111,
4332 BITS4(1,Q,M,0), regM);
4333 break;
4334 case ARMneon_VCVTStoF:
4335 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4336 BITS4(0,Q,M,0), regM);
4337 break;
4338 case ARMneon_VCVTUtoF:
4339 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0110,
4340 BITS4(1,Q,M,0), regM);
4341 break;
4342 case ARMneon_VCVTFtoFixedU:
4343 sz1 = (sz >> 5) & 1;
4344 sz2 = (sz >> 4) & 1;
4345 sz &= 0xf;
4346 insn = XXXXXXXX(0xF, X0011,
4347 BITS4(1,D,sz1,sz2), sz, regD, X1111,
4348 BITS4(0,Q,M,1), regM);
4349 break;
4350 case ARMneon_VCVTFtoFixedS:
4351 sz1 = (sz >> 5) & 1;
4352 sz2 = (sz >> 4) & 1;
4353 sz &= 0xf;
4354 insn = XXXXXXXX(0xF, X0010,
4355 BITS4(1,D,sz1,sz2), sz, regD, X1111,
4356 BITS4(0,Q,M,1), regM);
4357 break;
4358 case ARMneon_VCVTFixedUtoF:
4359 sz1 = (sz >> 5) & 1;
4360 sz2 = (sz >> 4) & 1;
4361 sz &= 0xf;
4362 insn = XXXXXXXX(0xF, X0011,
4363 BITS4(1,D,sz1,sz2), sz, regD, X1110,
4364 BITS4(0,Q,M,1), regM);
4365 break;
4366 case ARMneon_VCVTFixedStoF:
4367 sz1 = (sz >> 5) & 1;
4368 sz2 = (sz >> 4) & 1;
4369 sz &= 0xf;
4370 insn = XXXXXXXX(0xF, X0010,
4371 BITS4(1,D,sz1,sz2), sz, regD, X1110,
4372 BITS4(0,Q,M,1), regM);
4373 break;
4374 case ARMneon_VCVTF32toF16:
4375 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0110,
4376 BITS4(0,0,M,0), regM);
4377 break;
4378 case ARMneon_VCVTF16toF32:
4379 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X0110, regD, X0111,
4380 BITS4(0,0,M,0), regM);
4381 break;
4382 case ARMneon_VRECIP:
4383 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4384 BITS4(0,Q,M,0), regM);
4385 break;
4386 case ARMneon_VRECIPF:
4387 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4388 BITS4(0,Q,M,0), regM);
4389 break;
4390 case ARMneon_VABSFP:
4391 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4392 BITS4(0,Q,M,0), regM);
4393 break;
4394 case ARMneon_VRSQRTEFP:
4395 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0101,
4396 BITS4(1,Q,M,0), regM);
4397 break;
4398 case ARMneon_VRSQRTE:
4399 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1011, regD, X0100,
4400 BITS4(1,Q,M,0), regM);
4401 break;
4402 case ARMneon_VNEGF:
4403 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), X1001, regD, X0111,
4404 BITS4(1,Q,M,0), regM);
4405 break;
4407 default:
4408 goto bad;
4410 *p++ = insn;
4411 goto done;
4413 case ARMin_NDual: {
4414 UInt Q = i->ARMin.NDual.Q ? 1 : 0;
4415 UInt regD = (hregClass(i->ARMin.NDual.arg1) == HRcVec128)
4416 ? (qregEnc(i->ARMin.NDual.arg1) << 1)
4417 : dregEnc(i->ARMin.NDual.arg1);
4418 UInt regM = (hregClass(i->ARMin.NDual.arg2) == HRcVec128)
4419 ? (qregEnc(i->ARMin.NDual.arg2) << 1)
4420 : dregEnc(i->ARMin.NDual.arg2);
4421 UInt D = regD >> 4;
4422 UInt M = regM >> 4;
4423 UInt sz1 = i->ARMin.NDual.size >> 1;
4424 UInt sz2 = i->ARMin.NDual.size & 1;
4425 UInt insn;
4426 regD &= 0xF;
4427 regM &= 0xF;
4428 switch (i->ARMin.NDual.op) {
4429 case ARMneon_TRN: /* VTRN reg, reg */
4430 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4431 regD, X0000, BITS4(1,Q,M,0), regM);
4432 break;
4433 case ARMneon_ZIP: /* VZIP reg, reg */
4434 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4435 regD, X0001, BITS4(1,Q,M,0), regM);
4436 break;
4437 case ARMneon_UZP: /* VUZP reg, reg */
4438 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), BITS4(sz1,sz2,1,0),
4439 regD, X0001, BITS4(0,Q,M,0), regM);
4440 break;
4441 default:
4442 goto bad;
4444 *p++ = insn;
4445 goto done;
4447 case ARMin_NBinary: {
4448 UInt Q = i->ARMin.NBinary.Q ? 1 : 0;
4449 UInt regD = (hregClass(i->ARMin.NBinary.dst) == HRcVec128)
4450 ? (qregEnc(i->ARMin.NBinary.dst) << 1)
4451 : dregEnc(i->ARMin.NBinary.dst);
4452 UInt regN = (hregClass(i->ARMin.NBinary.argL) == HRcVec128)
4453 ? (qregEnc(i->ARMin.NBinary.argL) << 1)
4454 : dregEnc(i->ARMin.NBinary.argL);
4455 UInt regM = (hregClass(i->ARMin.NBinary.argR) == HRcVec128)
4456 ? (qregEnc(i->ARMin.NBinary.argR) << 1)
4457 : dregEnc(i->ARMin.NBinary.argR);
4458 UInt sz1 = i->ARMin.NBinary.size >> 1;
4459 UInt sz2 = i->ARMin.NBinary.size & 1;
4460 UInt D = regD >> 4;
4461 UInt N = regN >> 4;
4462 UInt M = regM >> 4;
4463 UInt insn;
4464 regD &= 0xF;
4465 regM &= 0xF;
4466 regN &= 0xF;
4467 switch (i->ARMin.NBinary.op) {
4468 case ARMneon_VAND: /* VAND reg, reg, reg */
4469 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X0001,
4470 BITS4(N,Q,M,1), regM);
4471 break;
4472 case ARMneon_VORR: /* VORR reg, reg, reg*/
4473 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X0001,
4474 BITS4(N,Q,M,1), regM);
4475 break;
4476 case ARMneon_VXOR: /* VEOR reg, reg, reg */
4477 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X0001,
4478 BITS4(N,Q,M,1), regM);
4479 break;
4480 case ARMneon_VADD: /* VADD reg, reg, reg */
4481 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4482 X1000, BITS4(N,Q,M,0), regM);
4483 break;
4484 case ARMneon_VSUB: /* VSUB reg, reg, reg */
4485 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4486 X1000, BITS4(N,Q,M,0), regM);
4487 break;
4488 case ARMneon_VMINU: /* VMIN.Uxx reg, reg, reg */
4489 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4490 X0110, BITS4(N,Q,M,1), regM);
4491 break;
4492 case ARMneon_VMINS: /* VMIN.Sxx reg, reg, reg */
4493 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4494 X0110, BITS4(N,Q,M,1), regM);
4495 break;
4496 case ARMneon_VMAXU: /* VMAX.Uxx reg, reg, reg */
4497 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4498 X0110, BITS4(N,Q,M,0), regM);
4499 break;
4500 case ARMneon_VMAXS: /* VMAX.Sxx reg, reg, reg */
4501 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4502 X0110, BITS4(N,Q,M,0), regM);
4503 break;
4504 case ARMneon_VRHADDS: /* VRHADD.Sxx reg, reg, reg */
4505 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4506 X0001, BITS4(N,Q,M,0), regM);
4507 break;
4508 case ARMneon_VRHADDU: /* VRHADD.Uxx reg, reg, reg */
4509 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4510 X0001, BITS4(N,Q,M,0), regM);
4511 break;
4512 case ARMneon_VQADDU: /* VQADD unsigned reg, reg, reg */
4513 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4514 X0000, BITS4(N,Q,M,1), regM);
4515 break;
4516 case ARMneon_VQADDS: /* VQADD signed reg, reg, reg */
4517 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4518 X0000, BITS4(N,Q,M,1), regM);
4519 break;
4520 case ARMneon_VQSUBU: /* VQSUB unsigned reg, reg, reg */
4521 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4522 X0010, BITS4(N,Q,M,1), regM);
4523 break;
4524 case ARMneon_VQSUBS: /* VQSUB signed reg, reg, reg */
4525 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4526 X0010, BITS4(N,Q,M,1), regM);
4527 break;
4528 case ARMneon_VCGTU: /* VCGT unsigned reg, reg, reg */
4529 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4530 X0011, BITS4(N,Q,M,0), regM);
4531 break;
4532 case ARMneon_VCGTS: /* VCGT signed reg, reg, reg */
4533 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4534 X0011, BITS4(N,Q,M,0), regM);
4535 break;
4536 case ARMneon_VCGEU: /* VCGE unsigned reg, reg, reg */
4537 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4538 X0011, BITS4(N,Q,M,1), regM);
4539 break;
4540 case ARMneon_VCGES: /* VCGE signed reg, reg, reg */
4541 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4542 X0011, BITS4(N,Q,M,1), regM);
4543 break;
4544 case ARMneon_VCEQ: /* VCEQ reg, reg, reg */
4545 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4546 X1000, BITS4(N,Q,M,1), regM);
4547 break;
4548 case ARMneon_VEXT: /* VEXT.8 reg, reg, #imm4*/
4549 if (i->ARMin.NBinary.size >= 16)
4550 goto bad;
4551 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,1,1), regN, regD,
4552 i->ARMin.NBinary.size & 0xf, BITS4(N,Q,M,0),
4553 regM);
4554 break;
4555 case ARMneon_VMUL:
4556 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4557 X1001, BITS4(N,Q,M,1), regM);
4558 break;
4559 case ARMneon_VMULLU:
4560 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,sz1,sz2), regN, regD,
4561 X1100, BITS4(N,0,M,0), regM);
4562 break;
4563 case ARMneon_VMULLS:
4564 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4565 X1100, BITS4(N,0,M,0), regM);
4566 break;
4567 case ARMneon_VMULP:
4568 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4569 X1001, BITS4(N,Q,M,1), regM);
4570 break;
4571 case ARMneon_VMULFP:
4572 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4573 X1101, BITS4(N,Q,M,1), regM);
4574 break;
4575 case ARMneon_VMULLP:
4576 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4577 X1110, BITS4(N,0,M,0), regM);
4578 break;
4579 case ARMneon_VQDMULH:
4580 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4581 X1011, BITS4(N,Q,M,0), regM);
4582 break;
4583 case ARMneon_VQRDMULH:
4584 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4585 X1011, BITS4(N,Q,M,0), regM);
4586 break;
4587 case ARMneon_VQDMULL:
4588 insn = XXXXXXXX(0xF, X0010, BITS4(1,D,sz1,sz2), regN, regD,
4589 X1101, BITS4(N,0,M,0), regM);
4590 break;
4591 case ARMneon_VTBL:
4592 insn = XXXXXXXX(0xF, X0011, BITS4(1,D,1,1), regN, regD,
4593 X1000, BITS4(N,0,M,0), regM);
4594 break;
4595 case ARMneon_VPADD:
4596 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4597 X1011, BITS4(N,Q,M,1), regM);
4598 break;
4599 case ARMneon_VPADDFP:
4600 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4601 X1101, BITS4(N,Q,M,0), regM);
4602 break;
4603 case ARMneon_VPMINU:
4604 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4605 X1010, BITS4(N,Q,M,1), regM);
4606 break;
4607 case ARMneon_VPMINS:
4608 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4609 X1010, BITS4(N,Q,M,1), regM);
4610 break;
4611 case ARMneon_VPMAXU:
4612 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4613 X1010, BITS4(N,Q,M,0), regM);
4614 break;
4615 case ARMneon_VPMAXS:
4616 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4617 X1010, BITS4(N,Q,M,0), regM);
4618 break;
4619 case ARMneon_VADDFP: /* VADD reg, reg, reg */
4620 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4621 X1101, BITS4(N,Q,M,0), regM);
4622 break;
4623 case ARMneon_VSUBFP: /* VADD reg, reg, reg */
4624 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4625 X1101, BITS4(N,Q,M,0), regM);
4626 break;
4627 case ARMneon_VABDFP: /* VABD reg, reg, reg */
4628 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4629 X1101, BITS4(N,Q,M,0), regM);
4630 break;
4631 case ARMneon_VMINF:
4632 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD,
4633 X1111, BITS4(N,Q,M,0), regM);
4634 break;
4635 case ARMneon_VMAXF:
4636 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD,
4637 X1111, BITS4(N,Q,M,0), regM);
4638 break;
4639 case ARMneon_VPMINF:
4640 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD,
4641 X1111, BITS4(N,Q,M,0), regM);
4642 break;
4643 case ARMneon_VPMAXF:
4644 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD,
4645 X1111, BITS4(N,Q,M,0), regM);
4646 break;
4647 case ARMneon_VRECPS:
4648 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1111,
4649 BITS4(N,Q,M,1), regM);
4650 break;
4651 case ARMneon_VCGTF:
4652 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,1,0), regN, regD, X1110,
4653 BITS4(N,Q,M,0), regM);
4654 break;
4655 case ARMneon_VCGEF:
4656 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,0,0), regN, regD, X1110,
4657 BITS4(N,Q,M,0), regM);
4658 break;
4659 case ARMneon_VCEQF:
4660 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,0,0), regN, regD, X1110,
4661 BITS4(N,Q,M,0), regM);
4662 break;
4663 case ARMneon_VRSQRTS:
4664 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,1,0), regN, regD, X1111,
4665 BITS4(N,Q,M,1), regM);
4666 break;
4667 default:
4668 goto bad;
4670 *p++ = insn;
4671 goto done;
4673 case ARMin_NShift: {
4674 UInt Q = i->ARMin.NShift.Q ? 1 : 0;
4675 UInt regD = (hregClass(i->ARMin.NShift.dst) == HRcVec128)
4676 ? (qregEnc(i->ARMin.NShift.dst) << 1)
4677 : dregEnc(i->ARMin.NShift.dst);
4678 UInt regM = (hregClass(i->ARMin.NShift.argL) == HRcVec128)
4679 ? (qregEnc(i->ARMin.NShift.argL) << 1)
4680 : dregEnc(i->ARMin.NShift.argL);
4681 UInt regN = (hregClass(i->ARMin.NShift.argR) == HRcVec128)
4682 ? (qregEnc(i->ARMin.NShift.argR) << 1)
4683 : dregEnc(i->ARMin.NShift.argR);
4684 UInt sz1 = i->ARMin.NShift.size >> 1;
4685 UInt sz2 = i->ARMin.NShift.size & 1;
4686 UInt D = regD >> 4;
4687 UInt N = regN >> 4;
4688 UInt M = regM >> 4;
4689 UInt insn;
4690 regD &= 0xF;
4691 regM &= 0xF;
4692 regN &= 0xF;
4693 switch (i->ARMin.NShift.op) {
4694 case ARMneon_VSHL:
4695 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4696 X0100, BITS4(N,Q,M,0), regM);
4697 break;
4698 case ARMneon_VSAL:
4699 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4700 X0100, BITS4(N,Q,M,0), regM);
4701 break;
4702 case ARMneon_VQSHL:
4703 insn = XXXXXXXX(0xF, X0011, BITS4(0,D,sz1,sz2), regN, regD,
4704 X0100, BITS4(N,Q,M,1), regM);
4705 break;
4706 case ARMneon_VQSAL:
4707 insn = XXXXXXXX(0xF, X0010, BITS4(0,D,sz1,sz2), regN, regD,
4708 X0100, BITS4(N,Q,M,1), regM);
4709 break;
4710 default:
4711 goto bad;
4713 *p++ = insn;
4714 goto done;
4716 case ARMin_NShl64: {
4717 HReg regDreg = i->ARMin.NShl64.dst;
4718 HReg regMreg = i->ARMin.NShl64.src;
4719 UInt amt = i->ARMin.NShl64.amt;
4720 vassert(amt >= 1 && amt <= 63);
4721 vassert(hregClass(regDreg) == HRcFlt64);
4722 vassert(hregClass(regMreg) == HRcFlt64);
4723 UInt regD = dregEnc(regDreg);
4724 UInt regM = dregEnc(regMreg);
4725 UInt D = (regD >> 4) & 1;
4726 UInt Vd = regD & 0xF;
4727 UInt L = 1;
4728 UInt Q = 0; /* always 64-bit */
4729 UInt M = (regM >> 4) & 1;
4730 UInt Vm = regM & 0xF;
4731 UInt insn = XXXXXXXX(X1111,X0010, BITS4(1,D,(amt>>5)&1,(amt>>4)&1),
4732 amt & 0xF, Vd, X0101, BITS4(L,Q,M,1), Vm);
4733 *p++ = insn;
4734 goto done;
4736 case ARMin_NeonImm: {
4737 UInt Q = (hregClass(i->ARMin.NeonImm.dst) == HRcVec128) ? 1 : 0;
4738 UInt regD = Q ? (qregEnc(i->ARMin.NeonImm.dst) << 1) :
4739 dregEnc(i->ARMin.NeonImm.dst);
4740 UInt D = regD >> 4;
4741 UInt imm = i->ARMin.NeonImm.imm->imm8;
4742 UInt tp = i->ARMin.NeonImm.imm->type;
4743 UInt j = imm >> 7;
4744 UInt imm3 = (imm >> 4) & 0x7;
4745 UInt imm4 = imm & 0xF;
4746 UInt cmode, op;
4747 UInt insn;
4748 regD &= 0xF;
4749 if (tp == 9)
4750 op = 1;
4751 else
4752 op = 0;
4753 switch (tp) {
4754 case 0:
4755 case 1:
4756 case 2:
4757 case 3:
4758 case 4:
4759 case 5:
4760 cmode = tp << 1;
4761 break;
4762 case 9:
4763 case 6:
4764 cmode = 14;
4765 break;
4766 case 7:
4767 cmode = 12;
4768 break;
4769 case 8:
4770 cmode = 13;
4771 break;
4772 case 10:
4773 cmode = 15;
4774 break;
4775 default:
4776 vpanic("ARMin_NeonImm");
4779 insn = XXXXXXXX(0xF, BITS4(0,0,1,j), BITS4(1,D,0,0), imm3, regD,
4780 cmode, BITS4(0,Q,op,1), imm4);
4781 *p++ = insn;
4782 goto done;
4784 case ARMin_NCMovQ: {
4785 UInt cc = (UInt)i->ARMin.NCMovQ.cond;
4786 UInt qM = qregEnc(i->ARMin.NCMovQ.src) << 1;
4787 UInt qD = qregEnc(i->ARMin.NCMovQ.dst) << 1;
4788 UInt vM = qM & 0xF;
4789 UInt vD = qD & 0xF;
4790 UInt M = (qM >> 4) & 1;
4791 UInt D = (qD >> 4) & 1;
4792 vassert(cc < 16 && cc != ARMcc_AL && cc != ARMcc_NV);
4793 /* b!cc here+8: !cc A00 0000 */
4794 UInt insn = XXXXXXXX(cc ^ 1, 0xA, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0);
4795 *p++ = insn;
4796 /* vmov qD, qM */
4797 insn = XXXXXXXX(0xF, 0x2, BITS4(0,D,1,0),
4798 vM, vD, BITS4(0,0,0,1), BITS4(M,1,M,1), vM);
4799 *p++ = insn;
4800 goto done;
4802 case ARMin_Add32: {
4803 UInt regD = iregEnc(i->ARMin.Add32.rD);
4804 UInt regN = iregEnc(i->ARMin.Add32.rN);
4805 UInt imm32 = i->ARMin.Add32.imm32;
4806 vassert(regD != regN);
4807 /* MOV regD, imm32 */
4808 p = imm32_to_ireg((UInt *)p, regD, imm32);
4809 /* ADD regD, regN, regD */
4810 UInt insn = XXXXXXXX(0xE, 0, X1000, regN, regD, 0, 0, regD);
4811 *p++ = insn;
4812 goto done;
4815 case ARMin_EvCheck: {
4816 /* We generate:
4817 ldr r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
4818 subs r12, r12, #1 (A1)
4819 str r12, [r8 + #4] 4 == offsetof(host_EvC_COUNTER)
4820 bpl nofail
4821 ldr r12, [r8 + #0] 0 == offsetof(host_EvC_FAILADDR)
4822 bx r12
4823 nofail:
4825 UInt* p0 = p;
4826 p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4827 i->ARMin.EvCheck.amCounter);
4828 *p++ = 0xE25CC001; /* subs r12, r12, #1 */
4829 p = do_load_or_store32(p, False/*!isLoad*/, /*r*/12,
4830 i->ARMin.EvCheck.amCounter);
4831 *p++ = 0x5A000001; /* bpl nofail */
4832 p = do_load_or_store32(p, True/*isLoad*/, /*r*/12,
4833 i->ARMin.EvCheck.amFailAddr);
4834 *p++ = 0xE12FFF1C; /* bx r12 */
4835 /* nofail: */
4837 /* Crosscheck */
4838 vassert(evCheckSzB_ARM() == (UChar*)p - (UChar*)p0);
4839 goto done;
4842 case ARMin_ProfInc: {
4843 /* We generate:
4844 (ctrP is unknown now, so use 0x65556555 in the
4845 expectation that a later call to LibVEX_patchProfCtr
4846 will be used to fill in the immediate fields once the
4847 right value is known.)
4848 movw r12, lo16(0x65556555)
4849 movt r12, lo16(0x65556555)
4850 ldr r11, [r12]
4851 adds r11, r11, #1
4852 str r11, [r12]
4853 ldr r11, [r12+4]
4854 adc r11, r11, #0
4855 str r11, [r12+4]
4857 p = imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555);
4858 *p++ = 0xE59CB000;
4859 *p++ = 0xE29BB001;
4860 *p++ = 0xE58CB000;
4861 *p++ = 0xE59CB004;
4862 *p++ = 0xE2ABB000;
4863 *p++ = 0xE58CB004;
4864 /* Tell the caller .. */
4865 vassert(!(*is_profInc));
4866 *is_profInc = True;
4867 goto done;
4870 /* ... */
4871 default:
4872 goto bad;
4875 bad:
4876 ppARMInstr(i);
4877 vpanic("emit_ARMInstr");
4878 /*NOTREACHED*/
4880 done:
4881 vassert(((UChar*)p) - &buf[0] <= 32);
4882 return ((UChar*)p) - &buf[0];
4886 /* How big is an event check? See case for ARMin_EvCheck in
4887 emit_ARMInstr just above. That crosschecks what this returns, so
4888 we can tell if we're inconsistent. */
4889 Int evCheckSzB_ARM (void)
4891 return 24;
4895 /* NB: what goes on here has to be very closely coordinated with the
4896 emitInstr case for XDirect, above. */
4897 VexInvalRange chainXDirect_ARM ( VexEndness endness_host,
4898 void* place_to_chain,
4899 const void* disp_cp_chain_me_EXPECTED,
4900 const void* place_to_jump_to )
4902 vassert(endness_host == VexEndnessLE);
4904 /* What we're expecting to see is:
4905 movw r12, lo16(disp_cp_chain_me_to_EXPECTED)
4906 movt r12, hi16(disp_cp_chain_me_to_EXPECTED)
4907 blx r12
4909 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4910 E1 2F FF 3C
4912 UInt* p = (UInt*)place_to_chain;
4913 vassert(0 == (3 & (HWord)p));
4914 vassert(is_imm32_to_ireg_EXACTLY2(
4915 p, /*r*/12, (UInt)(Addr)disp_cp_chain_me_EXPECTED));
4916 vassert(p[2] == 0xE12FFF3C);
4917 /* And what we want to change it to is either:
4918 (general case)
4919 movw r12, lo16(place_to_jump_to)
4920 movt r12, hi16(place_to_jump_to)
4921 bx r12
4923 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4924 E1 2F FF 1C
4925 ---OR---
4926 in the case where the displacement falls within 26 bits
4927 b disp24; undef; undef
4929 EA <3 bytes == disp24>
4930 FF 00 00 00
4931 FF 00 00 00
4933 In both cases the replacement has the same length as the original.
4934 To remain sane & verifiable,
4935 (1) limit the displacement for the short form to
4936 (say) +/- 30 million, so as to avoid wraparound
4937 off-by-ones
4938 (2) even if the short form is applicable, once every (say)
4939 1024 times use the long form anyway, so as to maintain
4940 verifiability
4943 /* This is the delta we need to put into a B insn. It's relative
4944 to the start of the next-but-one insn, hence the -8. */
4945 Long delta = (Long)((const UChar *)place_to_jump_to - (const UChar*)p) - 8;
4946 Bool shortOK = delta >= -30*1000*1000 && delta < 30*1000*1000;
4947 vassert(0 == (delta & (Long)3));
4949 static UInt shortCTR = 0; /* DO NOT MAKE NON-STATIC */
4950 if (shortOK) {
4951 shortCTR++; // thread safety bleh
4952 if (0 == (shortCTR & 0x3FF)) {
4953 shortOK = False;
4954 if (0)
4955 vex_printf("QQQ chainXDirect_ARM: shortCTR = %u, "
4956 "using long form\n", shortCTR);
4960 /* And make the modifications. */
4961 if (shortOK) {
4962 UInt uimm24 = (UInt)(delta >> 2);
4963 UInt uimm24_shl8 = uimm24 << 8;
4964 Int simm24 = (Int)uimm24_shl8;
4965 simm24 >>= 8;
4966 vassert(uimm24 == simm24);
4967 p[0] = 0xEA000000 | (simm24 & 0x00FFFFFF);
4968 p[1] = 0xFF000000;
4969 p[2] = 0xFF000000;
4970 } else {
4971 (void)imm32_to_ireg_EXACTLY2(
4972 p, /*r*/12, (UInt)(Addr)place_to_jump_to);
4973 p[2] = 0xE12FFF1C;
4976 VexInvalRange vir = {(HWord)p, 12};
4977 return vir;
4981 /* NB: what goes on here has to be very closely coordinated with the
4982 emitInstr case for XDirect, above. */
4983 VexInvalRange unchainXDirect_ARM ( VexEndness endness_host,
4984 void* place_to_unchain,
4985 const void* place_to_jump_to_EXPECTED,
4986 const void* disp_cp_chain_me )
4988 vassert(endness_host == VexEndnessLE);
4990 /* What we're expecting to see is:
4991 (general case)
4992 movw r12, lo16(place_to_jump_to_EXPECTED)
4993 movt r12, lo16(place_to_jump_to_EXPECTED)
4994 bx r12
4996 <8 bytes generated by imm32_to_ireg_EXACTLY2>
4997 E1 2F FF 1C
4998 ---OR---
4999 in the case where the displacement falls within 26 bits
5000 b disp24; undef; undef
5002 EA <3 bytes == disp24>
5003 FF 00 00 00
5004 FF 00 00 00
5006 UInt* p = (UInt*)place_to_unchain;
5007 vassert(0 == (3 & (HWord)p));
5009 Bool valid = False;
5010 if (is_imm32_to_ireg_EXACTLY2(
5011 p, /*r*/12, (UInt)(Addr)place_to_jump_to_EXPECTED)
5012 && p[2] == 0xE12FFF1C) {
5013 valid = True; /* it's the long form */
5014 if (0)
5015 vex_printf("QQQ unchainXDirect_ARM: found long form\n");
5016 } else
5017 if ((p[0] >> 24) == 0xEA && p[1] == 0xFF000000 && p[2] == 0xFF000000) {
5018 /* It's the short form. Check the displacement is right. */
5019 Int simm24 = p[0] & 0x00FFFFFF;
5020 simm24 <<= 8; simm24 >>= 8;
5021 if ((UChar*)p + (simm24 << 2) + 8 == place_to_jump_to_EXPECTED) {
5022 valid = True;
5023 if (0)
5024 vex_printf("QQQ unchainXDirect_ARM: found short form\n");
5027 vassert(valid);
5029 /* And what we want to change it to is:
5030 movw r12, lo16(disp_cp_chain_me)
5031 movt r12, hi16(disp_cp_chain_me)
5032 blx r12
5034 <8 bytes generated by imm32_to_ireg_EXACTLY2>
5035 E1 2F FF 3C
5037 (void)imm32_to_ireg_EXACTLY2(
5038 p, /*r*/12, (UInt)(Addr)disp_cp_chain_me);
5039 p[2] = 0xE12FFF3C;
5040 VexInvalRange vir = {(HWord)p, 12};
5041 return vir;
5045 /* Patch the counter address into a profile inc point, as previously
5046 created by the ARMin_ProfInc case for emit_ARMInstr. */
5047 VexInvalRange patchProfInc_ARM ( VexEndness endness_host,
5048 void* place_to_patch,
5049 const ULong* location_of_counter )
5051 vassert(endness_host == VexEndnessLE);
5052 vassert(sizeof(ULong*) == 4);
5053 UInt* p = (UInt*)place_to_patch;
5054 vassert(0 == (3 & (HWord)p));
5055 vassert(is_imm32_to_ireg_EXACTLY2(p, /*r*/12, 0x65556555));
5056 vassert(p[2] == 0xE59CB000);
5057 vassert(p[3] == 0xE29BB001);
5058 vassert(p[4] == 0xE58CB000);
5059 vassert(p[5] == 0xE59CB004);
5060 vassert(p[6] == 0xE2ABB000);
5061 vassert(p[7] == 0xE58CB004);
5062 imm32_to_ireg_EXACTLY2(p, /*r*/12, (UInt)(Addr)location_of_counter);
5063 VexInvalRange vir = {(HWord)p, 8};
5064 return vir;
5068 #undef BITS4
5069 #undef X0000
5070 #undef X0001
5071 #undef X0010
5072 #undef X0011
5073 #undef X0100
5074 #undef X0101
5075 #undef X0110
5076 #undef X0111
5077 #undef X1000
5078 #undef X1001
5079 #undef X1010
5080 #undef X1011
5081 #undef X1100
5082 #undef X1101
5083 #undef X1110
5084 #undef X1111
5085 #undef XXXXX___
5086 #undef XXXXXX__
5087 #undef XXX___XX
5088 #undef XXXXX__X
5089 #undef XXXXXXXX
5090 #undef XX______
5092 /*---------------------------------------------------------------*/
5093 /*--- end host_arm_defs.c ---*/
5094 /*---------------------------------------------------------------*/