regtest: add check for -Wl,--no-warn-execstack
[valgrind.git] / VEX / priv / guest_arm64_helpers.c
blobd75798b2b401b3174d484b106ffdd0aae66d3cec
2 /*---------------------------------------------------------------*/
3 /*--- begin guest_arm64_helpers.c ---*/
4 /*---------------------------------------------------------------*/
6 /*
7 This file is part of Valgrind, a dynamic binary instrumentation
8 framework.
10 Copyright (C) 2013-2017 OpenWorks
11 info@open-works.net
13 This program is free software; you can redistribute it and/or
14 modify it under the terms of the GNU General Public License as
15 published by the Free Software Foundation; either version 2 of the
16 License, or (at your option) any later version.
18 This program is distributed in the hope that it will be useful, but
19 WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
21 General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with this program; if not, see <http://www.gnu.org/licenses/>.
26 The GNU General Public License is contained in the file COPYING.
29 #include "libvex_basictypes.h"
30 #include "libvex_emnote.h"
31 #include "libvex_guest_arm64.h"
32 #include "libvex_ir.h"
33 #include "libvex.h"
35 #include "main_util.h"
36 #include "main_globals.h"
37 #include "guest_generic_bb_to_IR.h"
38 #include "guest_arm64_defs.h"
41 /* This file contains helper functions for arm guest code. Calls to
42 these functions are generated by the back end. These calls are of
43 course in the host machine code and this file will be compiled to
44 host machine code, so that all makes sense.
46 Only change the signatures of these helper functions very
47 carefully. If you change the signature here, you'll have to change
48 the parameters passed to it in the IR calls constructed by
49 guest_arm64_toIR.c.
53 /* Set to 1 to get detailed profiling info about individual N, Z, C
54 and V flag evaluation. */
55 #define PROFILE_NZCV_FLAGS 0
57 #if PROFILE_NZCV_FLAGS
59 static UInt tab_eval[ARM64G_CC_OP_NUMBER][16];
60 static UInt initted = 0;
61 static UInt tot_evals = 0;
63 static void initCounts ( void )
65 UInt i, j;
66 for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
67 for (j = 0; j < 16; j++) {
68 tab_eval[i][j] = 0;
71 initted = 1;
74 static void showCounts ( void )
76 const HChar* nameCC[16]
77 = { "EQ", "NE", "CS", "CC", "MI", "PL", "VS", "VC",
78 "HI", "LS", "GE", "LT", "GT", "LE", "AL", "NV" };
79 UInt i, j;
80 ULong sum = 0;
81 vex_printf("\nCC_OP 0 1 2 3 "
82 " 4 5 6\n");
83 vex_printf( "--------------------------------------------------"
84 "--------------------------\n");
85 for (j = 0; j < 16; j++) {
86 vex_printf("%2d %s ", j, nameCC[j]);
87 for (i = 0; i < ARM64G_CC_OP_NUMBER; i++) {
88 vex_printf("%9d ", tab_eval[i][j]);
89 sum += tab_eval[i][j];
91 vex_printf("\n");
93 vex_printf("(In total %llu calls)\n", sum);
96 #define NOTE_EVAL(_cc_op, _cond) \
97 do { \
98 if (!initted) initCounts(); \
99 vassert( ((UInt)(_cc_op)) < ARM64G_CC_OP_NUMBER); \
100 vassert( ((UInt)(_cond)) < 16); \
101 tab_eval[(UInt)(_cc_op)][(UInt)(cond)]++; \
102 tot_evals++; \
103 if (0 == (tot_evals & 0x7FFF)) \
104 showCounts(); \
105 } while (0)
107 #endif /* PROFILE_NZCV_FLAGS */
110 /* Calculate the N flag from the supplied thunk components, in the
111 least significant bit of the word. Returned bits 63:1 are zero. */
112 static
113 ULong arm64g_calculate_flag_n ( ULong cc_op, ULong cc_dep1,
114 ULong cc_dep2, ULong cc_dep3 )
116 switch (cc_op) {
117 case ARM64G_CC_OP_COPY: {
118 /* (nzcv:28x0, unused, unused) */
119 ULong nf = (cc_dep1 >> ARM64G_CC_SHIFT_N) & 1;
120 return nf;
122 case ARM64G_CC_OP_ADD32: {
123 /* (argL, argR, unused) */
124 UInt argL = (UInt)cc_dep1;
125 UInt argR = (UInt)cc_dep2;
126 UInt res = argL + argR;
127 ULong nf = (ULong)(res >> 31);
128 return nf;
130 case ARM64G_CC_OP_ADD64: {
131 /* (argL, argR, unused) */
132 ULong argL = cc_dep1;
133 ULong argR = cc_dep2;
134 ULong res = argL + argR;
135 ULong nf = (ULong)(res >> 63);
136 return nf;
138 case ARM64G_CC_OP_SUB32: {
139 /* (argL, argR, unused) */
140 UInt argL = (UInt)cc_dep1;
141 UInt argR = (UInt)cc_dep2;
142 UInt res = argL - argR;
143 ULong nf = (ULong)(res >> 31);
144 return nf;
146 case ARM64G_CC_OP_SUB64: {
147 /* (argL, argR, unused) */
148 ULong argL = cc_dep1;
149 ULong argR = cc_dep2;
150 ULong res = argL - argR;
151 ULong nf = res >> 63;
152 return nf;
154 case ARM64G_CC_OP_ADC32: {
155 /* (argL, argR, oldC) */
156 UInt argL = cc_dep1;
157 UInt argR = cc_dep2;
158 UInt oldC = cc_dep3;
159 vassert((oldC & ~1) == 0);
160 UInt res = argL + argR + oldC;
161 ULong nf = (ULong)(res >> 31);
162 return nf;
164 case ARM64G_CC_OP_ADC64: {
165 /* (argL, argR, oldC) */
166 ULong argL = cc_dep1;
167 ULong argR = cc_dep2;
168 ULong oldC = cc_dep3;
169 vassert((oldC & ~1) == 0);
170 ULong res = argL + argR + oldC;
171 ULong nf = res >> 63;
172 return nf;
174 case ARM64G_CC_OP_SBC32: {
175 /* (argL, argR, oldC) */
176 UInt argL = cc_dep1;
177 UInt argR = cc_dep2;
178 UInt oldC = cc_dep3;
179 vassert((oldC & ~1) == 0);
180 UInt res = argL - argR - (oldC ^ 1);
181 ULong nf = (ULong)(res >> 31);
182 return nf;
184 case ARM64G_CC_OP_SBC64: {
185 /* (argL, argR, oldC) */
186 ULong argL = cc_dep1;
187 ULong argR = cc_dep2;
188 ULong oldC = cc_dep3;
189 vassert((oldC & ~1) == 0);
190 ULong res = argL - argR - (oldC ^ 1);
191 ULong nf = res >> 63;
192 return nf;
194 case ARM64G_CC_OP_LOGIC32: {
195 /* (res, unused, unused) */
196 UInt res = (UInt)cc_dep1;
197 ULong nf = res >> 31;
198 return nf;
200 case ARM64G_CC_OP_LOGIC64: {
201 /* (res, unused, unused) */
202 ULong res = cc_dep1;
203 ULong nf = res >> 63;
204 return nf;
206 //ZZ case ARMG_CC_OP_MUL: {
207 //ZZ /* (res, unused, oldC:oldV) */
208 //ZZ UInt res = cc_dep1;
209 //ZZ UInt nf = res >> 31;
210 //ZZ return nf;
211 //ZZ }
212 //ZZ case ARMG_CC_OP_MULL: {
213 //ZZ /* (resLo32, resHi32, oldC:oldV) */
214 //ZZ UInt resHi32 = cc_dep2;
215 //ZZ UInt nf = resHi32 >> 31;
216 //ZZ return nf;
217 //ZZ }
218 default:
219 /* shouldn't really make these calls from generated code */
220 vex_printf("arm64g_calculate_flag_n"
221 "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
222 cc_op, cc_dep1, cc_dep2, cc_dep3 );
223 vpanic("arm64g_calculate_flag_n");
228 /* Calculate the Z flag from the supplied thunk components, in the
229 least significant bit of the word. Returned bits 63:1 are zero. */
230 static
231 ULong arm64g_calculate_flag_z ( ULong cc_op, ULong cc_dep1,
232 ULong cc_dep2, ULong cc_dep3 )
234 switch (cc_op) {
235 case ARM64G_CC_OP_COPY: {
236 /* (nzcv:28x0, unused, unused) */
237 ULong zf = (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1;
238 return zf;
240 case ARM64G_CC_OP_ADD32: {
241 /* (argL, argR, unused) */
242 UInt argL = (UInt)cc_dep1;
243 UInt argR = (UInt)cc_dep2;
244 UInt res = argL + argR;
245 ULong zf = res == 0;
246 return zf;
248 case ARM64G_CC_OP_ADD64: {
249 /* (argL, argR, unused) */
250 ULong argL = cc_dep1;
251 ULong argR = cc_dep2;
252 ULong res = argL + argR;
253 ULong zf = res == 0;
254 return zf;
256 case ARM64G_CC_OP_SUB32: {
257 /* (argL, argR, unused) */
258 UInt argL = (UInt)cc_dep1;
259 UInt argR = (UInt)cc_dep2;
260 UInt res = argL - argR;
261 ULong zf = res == 0;
262 return zf;
264 case ARM64G_CC_OP_SUB64: {
265 /* (argL, argR, unused) */
266 ULong argL = cc_dep1;
267 ULong argR = cc_dep2;
268 ULong res = argL - argR;
269 ULong zf = res == 0;
270 return zf;
272 case ARM64G_CC_OP_ADC32: {
273 /* (argL, argR, oldC) */
274 UInt argL = cc_dep1;
275 UInt argR = cc_dep2;
276 UInt oldC = cc_dep3;
277 vassert((oldC & ~1) == 0);
278 UInt res = argL + argR + oldC;
279 ULong zf = res == 0;
280 return zf;
282 case ARM64G_CC_OP_ADC64: {
283 /* (argL, argR, oldC) */
284 ULong argL = cc_dep1;
285 ULong argR = cc_dep2;
286 ULong oldC = cc_dep3;
287 vassert((oldC & ~1) == 0);
288 ULong res = argL + argR + oldC;
289 ULong zf = res == 0;
290 return zf;
292 case ARM64G_CC_OP_SBC32: {
293 /* (argL, argR, oldC) */
294 UInt argL = cc_dep1;
295 UInt argR = cc_dep2;
296 UInt oldC = cc_dep3;
297 vassert((oldC & ~1) == 0);
298 UInt res = argL - argR - (oldC ^ 1);
299 ULong zf = res == 0;
300 return zf;
302 case ARM64G_CC_OP_SBC64: {
303 /* (argL, argR, oldC) */
304 ULong argL = cc_dep1;
305 ULong argR = cc_dep2;
306 ULong oldC = cc_dep3;
307 vassert((oldC & ~1) == 0);
308 ULong res = argL - argR - (oldC ^ 1);
309 ULong zf = res == 0;
310 return zf;
312 case ARM64G_CC_OP_LOGIC32: {
313 /* (res, unused, unused) */
314 UInt res = (UInt)cc_dep1;
315 ULong zf = res == 0;
316 return zf;
318 case ARM64G_CC_OP_LOGIC64: {
319 /* (res, unused, unused) */
320 ULong res = cc_dep1;
321 ULong zf = res == 0;
322 return zf;
324 //ZZ case ARMG_CC_OP_MUL: {
325 //ZZ /* (res, unused, oldC:oldV) */
326 //ZZ UInt res = cc_dep1;
327 //ZZ UInt zf = res == 0;
328 //ZZ return zf;
329 //ZZ }
330 //ZZ case ARMG_CC_OP_MULL: {
331 //ZZ /* (resLo32, resHi32, oldC:oldV) */
332 //ZZ UInt resLo32 = cc_dep1;
333 //ZZ UInt resHi32 = cc_dep2;
334 //ZZ UInt zf = (resHi32|resLo32) == 0;
335 //ZZ return zf;
336 //ZZ }
337 default:
338 /* shouldn't really make these calls from generated code */
339 vex_printf("arm64g_calculate_flag_z"
340 "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
341 cc_op, cc_dep1, cc_dep2, cc_dep3 );
342 vpanic("arm64g_calculate_flag_z");
347 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
348 /* Calculate the C flag from the supplied thunk components, in the
349 least significant bit of the word. Returned bits 63:1 are zero. */
350 ULong arm64g_calculate_flag_c ( ULong cc_op, ULong cc_dep1,
351 ULong cc_dep2, ULong cc_dep3 )
353 switch (cc_op) {
354 case ARM64G_CC_OP_COPY: {
355 /* (nzcv:28x0, unused, unused) */
356 ULong cf = (cc_dep1 >> ARM64G_CC_SHIFT_C) & 1;
357 return cf;
359 case ARM64G_CC_OP_ADD32: {
360 /* (argL, argR, unused) */
361 UInt argL = (UInt)cc_dep1;
362 UInt argR = (UInt)cc_dep2;
363 UInt res = argL + argR;
364 ULong cf = res < argL;
365 return cf;
367 case ARM64G_CC_OP_ADD64: {
368 /* (argL, argR, unused) */
369 ULong argL = cc_dep1;
370 ULong argR = cc_dep2;
371 ULong res = argL + argR;
372 ULong cf = res < argL;
373 return cf;
375 case ARM64G_CC_OP_SUB32: {
376 /* (argL, argR, unused) */
377 UInt argL = (UInt)cc_dep1;
378 UInt argR = (UInt)cc_dep2;
379 ULong cf = argL >= argR;
380 return cf;
382 case ARM64G_CC_OP_SUB64: {
383 /* (argL, argR, unused) */
384 ULong argL = cc_dep1;
385 ULong argR = cc_dep2;
386 ULong cf = argL >= argR;
387 return cf;
389 case ARM64G_CC_OP_ADC32: {
390 /* (argL, argR, oldC) */
391 UInt argL = cc_dep1;
392 UInt argR = cc_dep2;
393 UInt oldC = cc_dep3;
394 vassert((oldC & ~1) == 0);
395 UInt res = argL + argR + oldC;
396 ULong cf = oldC ? (res <= argL) : (res < argL);
397 return cf;
399 case ARM64G_CC_OP_ADC64: {
400 /* (argL, argR, oldC) */
401 ULong argL = cc_dep1;
402 ULong argR = cc_dep2;
403 ULong oldC = cc_dep3;
404 vassert((oldC & ~1) == 0);
405 ULong res = argL + argR + oldC;
406 ULong cf = oldC ? (res <= argL) : (res < argL);
407 return cf;
409 case ARM64G_CC_OP_SBC32: {
410 /* (argL, argR, oldC) */
411 UInt argL = cc_dep1;
412 UInt argR = cc_dep2;
413 UInt oldC = cc_dep3;
414 vassert((oldC & ~1) == 0);
415 ULong cf = oldC ? (argL >= argR) : (argL > argR);
416 return cf;
418 case ARM64G_CC_OP_SBC64: {
419 /* (argL, argR, oldC) */
420 ULong argL = cc_dep1;
421 ULong argR = cc_dep2;
422 ULong oldC = cc_dep3;
423 vassert((oldC & ~1) == 0);
424 ULong cf = oldC ? (argL >= argR) : (argL > argR);
425 return cf;
427 case ARM64G_CC_OP_LOGIC32:
428 case ARM64G_CC_OP_LOGIC64: {
429 /* (res, unused, unused) */
430 return 0; // C after logic is zero on arm64
432 //ZZ case ARMG_CC_OP_MUL: {
433 //ZZ /* (res, unused, oldC:oldV) */
434 //ZZ UInt oldC = (cc_dep3 >> 1) & 1;
435 //ZZ vassert((cc_dep3 & ~3) == 0);
436 //ZZ UInt cf = oldC;
437 //ZZ return cf;
438 //ZZ }
439 //ZZ case ARMG_CC_OP_MULL: {
440 //ZZ /* (resLo32, resHi32, oldC:oldV) */
441 //ZZ UInt oldC = (cc_dep3 >> 1) & 1;
442 //ZZ vassert((cc_dep3 & ~3) == 0);
443 //ZZ UInt cf = oldC;
444 //ZZ return cf;
445 //ZZ }
446 default:
447 /* shouldn't really make these calls from generated code */
448 vex_printf("arm64g_calculate_flag_c"
449 "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
450 cc_op, cc_dep1, cc_dep2, cc_dep3 );
451 vpanic("arm64g_calculate_flag_c");
456 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
457 /* Calculate the V flag from the supplied thunk components, in the
458 least significant bit of the word. Returned bits 63:1 are zero. */
459 static
460 ULong arm64g_calculate_flag_v ( ULong cc_op, ULong cc_dep1,
461 ULong cc_dep2, ULong cc_dep3 )
463 switch (cc_op) {
464 case ARM64G_CC_OP_COPY: {
465 /* (nzcv:28x0, unused, unused) */
466 ULong vf = (cc_dep1 >> ARM64G_CC_SHIFT_V) & 1;
467 return vf;
469 case ARM64G_CC_OP_ADD32: {
470 /* (argL, argR, unused) */
471 UInt argL = (UInt)cc_dep1;
472 UInt argR = (UInt)cc_dep2;
473 UInt res = argL + argR;
474 ULong vf = (ULong)(((res ^ argL) & (res ^ argR)) >> 31);
475 return vf;
477 case ARM64G_CC_OP_ADD64: {
478 /* (argL, argR, unused) */
479 ULong argL = cc_dep1;
480 ULong argR = cc_dep2;
481 ULong res = argL + argR;
482 ULong vf = ((res ^ argL) & (res ^ argR)) >> 63;
483 return vf;
485 case ARM64G_CC_OP_SUB32: {
486 /* (argL, argR, unused) */
487 UInt argL = (UInt)cc_dep1;
488 UInt argR = (UInt)cc_dep2;
489 UInt res = argL - argR;
490 ULong vf = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31);
491 return vf;
493 case ARM64G_CC_OP_SUB64: {
494 /* (argL, argR, unused) */
495 ULong argL = cc_dep1;
496 ULong argR = cc_dep2;
497 ULong res = argL - argR;
498 ULong vf = (((argL ^ argR) & (argL ^ res))) >> 63;
499 return vf;
501 case ARM64G_CC_OP_ADC32: {
502 /* (argL, argR, oldC) */
503 UInt argL = cc_dep1;
504 UInt argR = cc_dep2;
505 UInt oldC = cc_dep3;
506 vassert((oldC & ~1) == 0);
507 UInt res = argL + argR + oldC;
508 ULong vf = (ULong)(((res ^ argL) & (res ^ argR)) >> 31);
509 return vf;
511 case ARM64G_CC_OP_ADC64: {
512 /* (argL, argR, oldC) */
513 ULong argL = cc_dep1;
514 ULong argR = cc_dep2;
515 ULong oldC = cc_dep3;
516 vassert((oldC & ~1) == 0);
517 ULong res = argL + argR + oldC;
518 ULong vf = ((res ^ argL) & (res ^ argR)) >> 63;
519 return vf;
521 case ARM64G_CC_OP_SBC32: {
522 /* (argL, argR, oldC) */
523 UInt argL = cc_dep1;
524 UInt argR = cc_dep2;
525 UInt oldC = cc_dep3;
526 vassert((oldC & ~1) == 0);
527 UInt res = argL - argR - (oldC ^ 1);
528 ULong vf = (ULong)(((argL ^ argR) & (argL ^ res)) >> 31);
529 return vf;
531 case ARM64G_CC_OP_SBC64: {
532 /* (argL, argR, oldC) */
533 ULong argL = cc_dep1;
534 ULong argR = cc_dep2;
535 ULong oldC = cc_dep3;
536 vassert((oldC & ~1) == 0);
537 ULong res = argL - argR - (oldC ^ 1);
538 ULong vf = ((argL ^ argR) & (argL ^ res)) >> 63;
539 return vf;
541 case ARM64G_CC_OP_LOGIC32:
542 case ARM64G_CC_OP_LOGIC64: {
543 /* (res, unused, unused) */
544 return 0; // V after logic is zero on arm64
546 //ZZ case ARMG_CC_OP_MUL: {
547 //ZZ /* (res, unused, oldC:oldV) */
548 //ZZ UInt oldV = (cc_dep3 >> 0) & 1;
549 //ZZ vassert((cc_dep3 & ~3) == 0);
550 //ZZ UInt vf = oldV;
551 //ZZ return vf;
552 //ZZ }
553 //ZZ case ARMG_CC_OP_MULL: {
554 //ZZ /* (resLo32, resHi32, oldC:oldV) */
555 //ZZ UInt oldV = (cc_dep3 >> 0) & 1;
556 //ZZ vassert((cc_dep3 & ~3) == 0);
557 //ZZ UInt vf = oldV;
558 //ZZ return vf;
559 //ZZ }
560 default:
561 /* shouldn't really make these calls from generated code */
562 vex_printf("arm64g_calculate_flag_v"
563 "( op=%llu, dep1=0x%llx, dep2=0x%llx, dep3=0x%llx )\n",
564 cc_op, cc_dep1, cc_dep2, cc_dep3 );
565 vpanic("arm64g_calculate_flag_v");
570 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
571 /* Calculate NZCV from the supplied thunk components, in the positions
572 they appear in the CPSR, viz bits 31:28 for N Z C V respectively.
573 Returned bits 27:0 are zero. */
574 ULong arm64g_calculate_flags_nzcv ( ULong cc_op, ULong cc_dep1,
575 ULong cc_dep2, ULong cc_dep3 )
577 ULong f;
578 ULong res = 0;
579 f = 1 & arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
580 res |= (f << ARM64G_CC_SHIFT_N);
581 f = 1 & arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
582 res |= (f << ARM64G_CC_SHIFT_Z);
583 f = 1 & arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
584 res |= (f << ARM64G_CC_SHIFT_C);
585 f = 1 & arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
586 res |= (f << ARM64G_CC_SHIFT_V);
587 return res;
590 void LibVEX_GuestARM64_put_nzcv_c ( ULong new_carry_flag,
591 /*MOD*/VexGuestARM64State* vex_state )
593 ULong nzcv = arm64g_calculate_flags_nzcv(
594 vex_state->guest_CC_OP,
595 vex_state->guest_CC_DEP1,
596 vex_state->guest_CC_DEP2,
597 vex_state->guest_CC_NDEP
599 if (new_carry_flag & 1) {
600 nzcv |= ARM64G_CC_MASK_C;
601 } else {
602 nzcv &= ~ARM64G_CC_MASK_C;
604 vex_state->guest_CC_OP = ARM64G_CC_OP_COPY;
605 vex_state->guest_CC_DEP1 = nzcv;
606 vex_state->guest_CC_DEP2 = 0;
607 vex_state->guest_CC_NDEP = 0;
610 //ZZ
611 //ZZ /* CALLED FROM GENERATED CODE: CLEAN HELPER */
612 //ZZ /* Calculate the QC flag from the arguments, in the lowest bit
613 //ZZ of the word (bit 0). Urr, having this out of line is bizarre.
614 //ZZ Push back inline. */
615 //ZZ UInt armg_calculate_flag_qc ( UInt resL1, UInt resL2,
616 //ZZ UInt resR1, UInt resR2 )
617 //ZZ {
618 //ZZ if (resL1 != resR1 || resL2 != resR2)
619 //ZZ return 1;
620 //ZZ else
621 //ZZ return 0;
622 //ZZ }
624 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
625 /* Calculate the specified condition from the thunk components, in the
626 lowest bit of the word (bit 0). Returned bits 63:1 are zero. */
627 ULong arm64g_calculate_condition ( /* ARM64Condcode << 4 | cc_op */
628 ULong cond_n_op ,
629 ULong cc_dep1,
630 ULong cc_dep2, ULong cc_dep3 )
632 ULong cond = cond_n_op >> 4;
633 ULong cc_op = cond_n_op & 0xF;
634 ULong inv = cond & 1;
635 ULong nf, zf, vf, cf;
637 # if PROFILE_NZCV_FLAGS
638 NOTE_EVAL(cc_op, cond);
639 # endif
641 // vex_printf("XXXXXXXX %llx %llx %llx %llx\n",
642 // cond_n_op, cc_dep1, cc_dep2, cc_dep3);
644 switch (cond) {
645 case ARM64CondEQ: // Z=1 => z
646 case ARM64CondNE: // Z=0
647 zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
648 return inv ^ zf;
650 case ARM64CondCS: // C=1 => c
651 case ARM64CondCC: // C=0
652 cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
653 return inv ^ cf;
655 case ARM64CondMI: // N=1 => n
656 case ARM64CondPL: // N=0
657 nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
658 return inv ^ nf;
660 case ARM64CondVS: // V=1 => v
661 case ARM64CondVC: // V=0
662 vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
663 return inv ^ vf;
665 case ARM64CondHI: // C=1 && Z=0 => c & ~z
666 case ARM64CondLS: // C=0 || Z=1
667 cf = arm64g_calculate_flag_c(cc_op, cc_dep1, cc_dep2, cc_dep3);
668 zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
669 return inv ^ (1 & (cf & ~zf));
671 case ARM64CondGE: // N=V => ~(n^v)
672 case ARM64CondLT: // N!=V
673 nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
674 vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
675 return inv ^ (1 & ~(nf ^ vf));
677 case ARM64CondGT: // Z=0 && N=V => ~z & ~(n^v) => ~(z | (n^v))
678 case ARM64CondLE: // Z=1 || N!=V
679 nf = arm64g_calculate_flag_n(cc_op, cc_dep1, cc_dep2, cc_dep3);
680 vf = arm64g_calculate_flag_v(cc_op, cc_dep1, cc_dep2, cc_dep3);
681 zf = arm64g_calculate_flag_z(cc_op, cc_dep1, cc_dep2, cc_dep3);
682 return inv ^ (1 & ~(zf | (nf ^ vf)));
684 case ARM64CondAL: // 1
685 case ARM64CondNV: // 1
686 return 1;
688 default:
689 /* shouldn't really make these calls from generated code */
690 vex_printf("arm64g_calculate_condition(ARM64)"
691 "( %llu, %llu, 0x%llx, 0x%llx, 0x%llx )\n",
692 cond, cc_op, cc_dep1, cc_dep2, cc_dep3 );
693 vpanic("armg_calculate_condition(ARM64)");
698 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
699 ULong arm64g_calc_crc32b ( ULong acc, ULong bits )
701 UInt i;
702 ULong crc = (bits & 0xFFULL) ^ acc;
703 for (i = 0; i < 8; i++)
704 crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
705 return crc;
708 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
709 ULong arm64g_calc_crc32h ( ULong acc, ULong bits )
711 UInt i;
712 ULong crc = (bits & 0xFFFFULL) ^ acc;
713 for (i = 0; i < 16; i++)
714 crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
715 return crc;
718 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
719 ULong arm64g_calc_crc32w ( ULong acc, ULong bits )
721 UInt i;
722 ULong crc = (bits & 0xFFFFFFFFULL) ^ acc;
723 for (i = 0; i < 32; i++)
724 crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
725 return crc;
728 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
729 ULong arm64g_calc_crc32x ( ULong acc, ULong bits )
731 UInt i;
732 ULong crc = bits ^ acc;
733 for (i = 0; i < 64; i++)
734 crc = (crc >> 1) ^ ((crc & 1) ? 0xEDB88320ULL : 0);
735 return crc;
739 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
740 ULong arm64g_calc_crc32cb ( ULong acc, ULong bits )
742 UInt i;
743 ULong crc = (bits & 0xFFULL) ^ acc;
744 for (i = 0; i < 8; i++)
745 crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
746 return crc;
749 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
750 ULong arm64g_calc_crc32ch ( ULong acc, ULong bits )
752 UInt i;
753 ULong crc = (bits & 0xFFFFULL) ^ acc;
754 for (i = 0; i < 16; i++)
755 crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
756 return crc;
759 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
760 ULong arm64g_calc_crc32cw ( ULong acc, ULong bits )
762 UInt i;
763 ULong crc = (bits & 0xFFFFFFFFULL) ^ acc;
764 for (i = 0; i < 32; i++)
765 crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
766 return crc;
769 /* CALLED FROM GENERATED CODE: CLEAN HELPER */
770 ULong arm64g_calc_crc32cx ( ULong acc, ULong bits )
772 UInt i;
773 ULong crc = bits ^ acc;
774 for (i = 0; i < 64; i++)
775 crc = (crc >> 1) ^ ((crc & 1) ? 0x82F63B78ULL : 0);
776 return crc;
779 /* CALLED FROM GENERATED CODE */
780 /* DIRTY HELPER (non-referentially-transparent) */
781 /* Horrible hack. On non-arm64 platforms, return 0. */
782 ULong arm64g_dirtyhelper_MRS_DCZID_EL0 ( void )
784 # if defined(__aarch64__) && !defined(__arm__)
785 ULong w = 0x5555555555555555ULL; /* overwritten */
786 __asm__ __volatile__("mrs %0, dczid_el0" : "=r"(w));
787 return w;
788 # else
789 return 0ULL;
790 # endif
793 /* CALLED FROM GENERATED CODE */
794 /* DIRTY HELPER (non-referentially-transparent) */
795 /* Horrible hack. On non-arm64 platforms, return 0. */
796 ULong arm64g_dirtyhelper_MRS_CNTVCT_EL0 ( void )
798 # if defined(__aarch64__) && !defined(__arm__)
799 ULong w = 0x5555555555555555ULL; /* overwritten */
800 __asm__ __volatile__("mrs %0, cntvct_el0" : "=r"(w));
801 return w;
802 # else
803 return 0ULL;
804 # endif
808 /* CALLED FROM GENERATED CODE */
809 /* DIRTY HELPER (non-referentially-transparent) */
810 /* Horrible hack. On non-arm64 platforms, return 0. */
811 ULong arm64g_dirtyhelper_MRS_CNTFRQ_EL0 ( void )
813 # if defined(__aarch64__) && !defined(__arm__)
814 ULong w = 0x5555555555555555ULL; /* overwritten */
815 __asm__ __volatile__("mrs %0, cntfrq_el0" : "=r"(w));
816 return w;
817 # else
818 return 0ULL;
819 # endif
822 /* CALLED FROM GENERATED CODE */
823 /* DIRTY HELPER (non-referentially-transparent) */
824 /* Horrible hack. On non-arm64 platforms, return 0. */
825 ULong arm64g_dirtyhelper_MRS_MIDR_EL1 ( void )
827 # if defined(__aarch64__) && !defined(__arm__)
828 ULong w = 0x5555555555555555ULL; /* overwritten */
829 __asm__ __volatile__("mrs %0, midr_el1" : "=r"(w));
830 return w;
831 # else
832 return 0ULL;
833 # endif
836 /* CALLED FROM GENERATED CODE */
837 /* DIRTY HELPER (non-referentially-transparent) */
838 /* Horrible hack. On non-arm64 platforms, return 0. */
839 ULong arm64g_dirtyhelper_MRS_ID_AA64PFR0_EL1 ( void )
841 # if defined(__aarch64__) && !defined(__arm__)
842 ULong w = 0x5555555555555555ULL; /* overwritten */
843 __asm__ __volatile__("mrs %0, id_aa64pfr0_el1" : "=r"(w));
845 // The control word uses the following nibbles (as seen on RPi)
846 // unsupported unless indicated
847 // 0 to 3 - EL0 to EL3 exception level handling
848 // 4 - FP includes half-precision (partial support)
849 // 5 - AdvSIMD also includes haf-precision
851 /* If half-precision fp is present we fall back to normal
852 half precision implementation because of missing support in the emulation.
853 If no AdvSIMD and FP are implemented, we preserve the value */
854 w = (w >> 16);
855 w &= 0xff;
856 switch(w) {
857 case 0x01:
858 w = 0x0;
859 break;
860 case 0xff:
861 w = (0xFF<<16);
862 break;
863 default:
864 w = 0x0;
865 break;
868 return w;
869 # else
870 return 0ULL;
871 # endif
874 /* CALLED FROM GENERATED CODE */
875 /* DIRTY HELPER (non-referentially-transparent) */
876 /* Horrible hack. On non-arm64 platforms, return 0. */
877 ULong arm64g_dirtyhelper_MRS_ID_AA64MMFR0_EL1 ( void )
879 # if defined(__aarch64__) && !defined(__arm__)
880 ULong w = 0x5555555555555555ULL; /* overwritten */
881 __asm__ __volatile__("mrs %0, id_aa64mmfr0_el1" : "=r"(w));
882 return w;
883 # else
884 return 0ULL;
885 # endif
888 /* CALLED FROM GENERATED CODE */
889 /* DIRTY HELPER (non-referentially-transparent) */
890 /* Horrible hack. On non-arm64 platforms, return 0. */
891 ULong arm64g_dirtyhelper_MRS_ID_AA64MMFR1_EL1 ( void )
893 # if defined(__aarch64__) && !defined(__arm__)
894 ULong w = 0x5555555555555555ULL; /* overwritten */
895 __asm__ __volatile__("mrs %0, id_aa64mmfr1_el1" : "=r"(w));
897 /* Clear VH and HAFDBS bits */
898 w &= ~(0xF0F);
899 return w;
900 # else
901 return 0ULL;
902 # endif
905 /* CALLED FROM GENERATED CODE */
906 /* DIRTY HELPER (non-referentially-transparent) */
907 /* Horrible hack. On non-arm64 platforms, return 0. */
908 ULong arm64g_dirtyhelper_MRS_ID_AA64ISAR0_EL1 ( void )
910 # if defined(__aarch64__) && !defined(__arm__)
911 ULong w = 0x5555555555555555ULL; /* overwritten */
912 __asm__ __volatile__("mrs %0, id_aa64isar0_el1" : "=r"(w));
914 // In the mask below, nibbles are (higher nibbles all unsupported)
915 // 0 - RES0
916 // 1 - AES
917 // 2 - SHA1
918 // 3 - SHA2
919 // 4 - CRC32
920 // 5 - Atomic bits
921 // 6 - TME (unsupported)
922 // 7 - RDM
923 // 8 - SHA3 (unsupported)
924 // 9 - SM3 (unsupported)
925 // 10 - SM4 (unsupported)
926 // 11 - DP
928 // 10
929 // 109876543210
930 w &= 0xF000F0FFFFFF;
932 return w;
933 # else
934 return 0ULL;
935 # endif
938 /* CALLED FROM GENERATED CODE */
939 /* DIRTY HELPER (non-referentially-transparent) */
940 /* Horrible hack. On non-arm64 platforms, return 0. */
941 ULong arm64g_dirtyhelper_MRS_ID_AA64ISAR1_EL1 ( void )
943 # if defined(__aarch64__) && !defined(__arm__)
944 ULong w = 0x5555555555555555ULL; /* overwritten */
945 __asm__ __volatile__("mrs %0, id_aa64isar1_el1" : "=r"(w));
947 // only nibble 0 DBP
948 w &= 0xF;
950 return w;
951 # else
952 return 0ULL;
953 # endif
956 void arm64g_dirtyhelper_PMULLQ ( /*OUT*/V128* res, ULong arg1, ULong arg2 )
958 /* This doesn't need to be a dirty helper, except for the fact that
959 a clean helper can't return a 128 bit value. This is a pretty
960 lame implementation of PMULLQ, but at least it doesn't contain any
961 data dependent branches, and has lots of ILP. I guess we could unroll
962 the loop completely and offer extensive prayers to the gods of ILP
963 if more performance is needed. */
964 UInt i;
965 ULong accHi = 0, accLo = 0;
966 ULong op2Hi = 0, op2Lo = arg2;
967 for (i = 0; i < 64; i++) {
968 /* Make |mask| be all 0s or all 1s, a copy of arg1[i] */
969 Long mask = arg1 << (63-i);
970 mask >>= 63;
971 accHi ^= (op2Hi & mask);
972 accLo ^= (op2Lo & mask);
973 /* do: op2Hi:op2Lo <<=u 1 */
974 op2Hi <<= 1;
975 op2Hi |= ((op2Lo >> 63) & 1);
976 op2Lo <<= 1;
978 res->w64[1] = accHi;
979 res->w64[0] = accLo;
983 /*---------------------------------------------------------------*/
984 /*--- Crypto instruction helpers ---*/
985 /*---------------------------------------------------------------*/
987 /* DIRTY HELPERS for doing AES support:
988 * AESE (SubBytes, then ShiftRows)
989 * AESD (InvShiftRows, then InvSubBytes)
990 * AESMC (MixColumns)
991 * AESIMC (InvMixColumns)
992 These don't actually have to be dirty helpers -- they could be
993 clean, but for the fact that they return a V128 and a clean helper
994 can't do that.
996 The ARMv8 manual seems to imply that AESE first performs ShiftRows,
997 then SubBytes. This seems to contradict FIPS 197, so the
998 implementation below is consistent with FIPS 197. One can observe
999 that the two transformations commute -- the order in which they
1000 happen makes no difference to the result. So the ambiguity doesn't
1001 actually matter, but it is confusing. The v8 manual looks correct
1002 about AESD, though.
1004 The three functions rj_xtime, aesMixColumn and aesInvMixColumn only,
1005 are taken from "A byte-oriented AES-256 implementation" and are subject
1006 to the following usage terms:
1008 Byte-oriented AES-256 implementation.
1009 All lookup tables replaced with 'on the fly' calculations.
1011 Copyright (c) 2007-2011 Ilya O. Levin, http://www.literatecode.com
1012 Other contributors: Hal Finney
1014 Permission to use, copy, modify, and distribute this software for any
1015 purpose with or without fee is hereby granted, provided that the above
1016 copyright notice and this permission notice appear in all copies.
1018 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
1019 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
1020 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
1021 ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
1022 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
1023 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
1024 OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
1027 const UChar aesMapSubBytes[256]
1028 = { 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5,
1029 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76,
1030 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0,
1031 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0,
1032 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc,
1033 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15,
1034 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a,
1035 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75,
1036 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0,
1037 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84,
1038 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b,
1039 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf,
1040 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85,
1041 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8,
1042 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5,
1043 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2,
1044 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17,
1045 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73,
1046 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88,
1047 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb,
1048 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c,
1049 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79,
1050 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9,
1051 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08,
1052 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6,
1053 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a,
1054 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e,
1055 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e,
1056 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94,
1057 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf,
1058 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68,
1059 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16
1062 const UChar aesMapInvSubBytes[256]
1063 = { 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38,
1064 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb,
1065 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87,
1066 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb,
1067 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d,
1068 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e,
1069 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2,
1070 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25,
1071 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16,
1072 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92,
1073 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda,
1074 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84,
1075 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a,
1076 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06,
1077 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02,
1078 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b,
1079 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea,
1080 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73,
1081 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85,
1082 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e,
1083 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89,
1084 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b,
1085 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20,
1086 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4,
1087 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31,
1088 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f,
1089 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d,
1090 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef,
1091 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0,
1092 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61,
1093 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26,
1094 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d
1097 static inline UChar rj_xtime ( UChar x )
1099 UChar y = (UChar)(x << 1);
1100 return (x & 0x80) ? (y ^ 0x1b) : y;
1103 static void aesMixColumn ( /*MOD*/UChar* r )
1105 UChar a = r[0];
1106 UChar b = r[1];
1107 UChar c = r[2];
1108 UChar d = r[3];
1109 UChar e = a ^ b ^ c ^ d;
1110 r[0] ^= e ^ rj_xtime(a ^ b);
1111 r[1] ^= e ^ rj_xtime(b ^ c);
1112 r[2] ^= e ^ rj_xtime(c ^ d);
1113 r[3] ^= e ^ rj_xtime(d ^ a);
1116 static void aesInvMixColumn ( /*MOD*/UChar* r )
1118 UChar a = r[0];
1119 UChar b = r[1];
1120 UChar c = r[2];
1121 UChar d = r[3];
1122 UChar e = a ^ b ^ c ^ d;
1123 UChar z = rj_xtime(e);
1124 UChar x = e ^ rj_xtime(rj_xtime(z ^ a ^ c));
1125 UChar y = e ^ rj_xtime(rj_xtime(z ^ b ^ d));
1126 r[0] ^= x ^ rj_xtime(a ^ b);
1127 r[1] ^= y ^ rj_xtime(b ^ c);
1128 r[2] ^= x ^ rj_xtime(c ^ d);
1129 r[3] ^= y ^ rj_xtime(d ^ a);
1133 /* CALLED FROM GENERATED CODE */
1134 void arm64g_dirtyhelper_AESE ( /*OUT*/V128* res, ULong argHi, ULong argLo )
1136 res->w64[1] = argHi;
1137 res->w64[0] = argLo;
1139 /* First do SubBytes on the State. */
1140 UInt i;
1141 for (i = 0; i < 16; i++) {
1142 res->w8[i] = aesMapSubBytes[res->w8[i] & 0xFF];
1145 /* Then do ShiftRows on the State. */
1146 # define XX(_ix) res->w8[_ix]
1147 { UChar old1 = XX(1);
1148 XX(1) = XX(5); XX(5) = XX(9); XX(9) = XX(13); XX(13) = old1;
1150 { UChar old2 = XX(2); UChar old6 = XX(6);
1151 XX(2) = XX(10); XX(6) = XX(14); XX(10) = old2; XX(14) = old6;
1153 { UChar old15 = XX(15);
1154 XX(15) = XX(11); XX(11) = XX(7); XX(7) = XX(3); XX(3) = old15;
1156 # undef XX
1160 /* CALLED FROM GENERATED CODE */
1161 void arm64g_dirtyhelper_AESD ( /*OUT*/V128* res, ULong argHi, ULong argLo )
1163 res->w64[1] = argHi;
1164 res->w64[0] = argLo;
1166 /* First do InvShiftRows on the State. */
1167 # define XX(_ix) res->w8[_ix]
1168 { UChar old13 = XX(13);
1169 XX(13) = XX(9); XX(9) = XX(5); XX(5) = XX(1); XX(1) = old13;
1171 { UChar old14 = XX(14); UChar old10 = XX(10);
1172 XX(14) = XX(6); XX(10) = XX(2); XX(6) = old14; XX(2) = old10;
1174 { UChar old3 = XX(3);
1175 XX(3) = XX(7); XX(7) = XX(11); XX(11) = XX(15); XX(15) = old3;
1177 # undef XX
1179 /* Then do InvSubBytes on the State. */
1180 UInt i;
1181 for (i = 0; i < 16; i++) {
1182 res->w8[i] = aesMapInvSubBytes[res->w8[i] & 0xFF];
1187 /* CALLED FROM GENERATED CODE */
1188 void arm64g_dirtyhelper_AESMC ( /*OUT*/V128* res, ULong argHi, ULong argLo )
1190 res->w64[1] = argHi;
1191 res->w64[0] = argLo;
1192 aesMixColumn(&res->w8[0]);
1193 aesMixColumn(&res->w8[4]);
1194 aesMixColumn(&res->w8[8]);
1195 aesMixColumn(&res->w8[12]);
1199 /* CALLED FROM GENERATED CODE */
1200 void arm64g_dirtyhelper_AESIMC ( /*OUT*/V128* res, ULong argHi, ULong argLo )
1202 res->w64[1] = argHi;
1203 res->w64[0] = argLo;
1204 aesInvMixColumn(&res->w8[0]);
1205 aesInvMixColumn(&res->w8[4]);
1206 aesInvMixColumn(&res->w8[8]);
1207 aesInvMixColumn(&res->w8[12]);
1211 /* DIRTY HELPERS for SHA instruction support. As with the AES helpers
1212 above, these are actually pure functions and are only dirty because
1213 clean helpers can't return a V128. */
1215 static inline UInt ROL32 ( UInt x, UInt sh ) {
1216 vassert(sh > 0 && sh < 32);
1217 return (x << sh) | (x >> (32 - sh));
1220 static inline UInt ROR32 ( UInt x, UInt sh ) {
1221 vassert(sh > 0 && sh < 32);
1222 return (x >> sh) | (x << (32 - sh));
1225 static inline ULong ROR64 ( ULong x, ULong sh ) {
1226 vassert(sh > 0 && sh < 64);
1227 return (x >> sh) | (x << (64 - sh));
1230 static inline UInt SHAchoose ( UInt x, UInt y, UInt z ) {
1231 return ((y ^ z) & x) ^ z;
1234 static inline UInt SHAmajority ( UInt x, UInt y, UInt z ) {
1235 return (x & y) | ((x | y) & z);
1238 static inline UInt SHAparity ( UInt x, UInt y, UInt z ) {
1239 return x ^ y ^ z;
1242 static inline UInt SHAhashSIGMA0 ( UInt x ) {
1243 return ROR32(x, 2) ^ ROR32(x, 13) ^ ROR32(x, 22);
1246 static inline UInt SHAhashSIGMA1 ( UInt x ) {
1247 return ROR32(x, 6) ^ ROR32(x, 11) ^ ROR32(x, 25);
1250 static void SHA256hash ( /*MOD*/V128* X, /*MOD*/V128* Y, const V128* W )
1252 UInt e;
1253 for (e = 0; e <= 3; e++) {
1254 UInt chs = SHAchoose(Y->w32[0], Y->w32[1], Y->w32[2]);
1255 UInt maj = SHAmajority(X->w32[0], X->w32[1], X->w32[2]);
1256 UInt t = Y->w32[3] + SHAhashSIGMA1(Y->w32[0]) + chs + W->w32[e];
1257 X->w32[3] = t + X->w32[3];
1258 Y->w32[3] = t + SHAhashSIGMA0(X->w32[0]) + maj;
1259 UInt ts = Y->w32[3];
1260 Y->w32[3] = Y->w32[2];
1261 Y->w32[2] = Y->w32[1];
1262 Y->w32[1] = Y->w32[0];
1263 Y->w32[0] = X->w32[3];
1264 X->w32[3] = X->w32[2];
1265 X->w32[2] = X->w32[1];
1266 X->w32[1] = X->w32[0];
1267 X->w32[0] = ts;
1271 /* CALLED FROM GENERATED CODE */
1272 void arm64g_dirtyhelper_SHA1C ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1273 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1275 vassert(nHi == 0);
1276 vassert((nLo >> 32) == 0);
1277 V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1278 UInt Y; Y = (UInt)nLo;
1279 V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1280 UInt e;
1281 for (e = 0; e <= 3; e++) {
1282 UInt t = SHAchoose(X.w32[1], X.w32[2], X.w32[3]);
1283 Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
1284 X.w32[1] = ROL32(X.w32[1], 30);
1285 UInt oldY = Y;
1286 Y = X.w32[3];
1287 X.w32[3] = X.w32[2];
1288 X.w32[2] = X.w32[1];
1289 X.w32[1] = X.w32[0];
1290 X.w32[0] = oldY;
1292 res->w64[1] = X.w64[1];
1293 res->w64[0] = X.w64[0];
1296 /* CALLED FROM GENERATED CODE */
1297 void arm64g_dirtyhelper_SHA1H ( /*OUT*/V128* res, ULong nHi, ULong nLo )
1299 vassert(nHi == 0);
1300 vassert((nLo >> 32) == 0);
1301 res->w32[3] = res->w32[2] = res->w32[1] = 0;
1302 res->w32[0] = ROL32((UInt)nLo, 30);
1305 /* CALLED FROM GENERATED CODE */
1306 void arm64g_dirtyhelper_SHA1M ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1307 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1309 vassert(nHi == 0);
1310 vassert((nLo >> 32) == 0);
1311 V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1312 UInt Y; Y = (UInt)nLo;
1313 V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1314 UInt e;
1315 for (e = 0; e <= 3; e++) {
1316 UInt t = SHAmajority(X.w32[1], X.w32[2], X.w32[3]);
1317 Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
1318 X.w32[1] = ROL32(X.w32[1], 30);
1319 UInt oldY = Y;
1320 Y = X.w32[3];
1321 X.w32[3] = X.w32[2];
1322 X.w32[2] = X.w32[1];
1323 X.w32[1] = X.w32[0];
1324 X.w32[0] = oldY;
1326 res->w64[1] = X.w64[1];
1327 res->w64[0] = X.w64[0];
1330 /* CALLED FROM GENERATED CODE */
1331 void arm64g_dirtyhelper_SHA1P ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1332 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1334 vassert(nHi == 0);
1335 vassert((nLo >> 32) == 0);
1336 V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1337 UInt Y; Y = (UInt)nLo;
1338 V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1339 UInt e;
1340 for (e = 0; e <= 3; e++) {
1341 UInt t = SHAparity(X.w32[1], X.w32[2], X.w32[3]);
1342 Y = Y + ROL32(X.w32[0], 5) + t + W.w32[e];
1343 X.w32[1] = ROL32(X.w32[1], 30);
1344 UInt oldY = Y;
1345 Y = X.w32[3];
1346 X.w32[3] = X.w32[2];
1347 X.w32[2] = X.w32[1];
1348 X.w32[1] = X.w32[0];
1349 X.w32[0] = oldY;
1351 res->w64[1] = X.w64[1];
1352 res->w64[0] = X.w64[0];
1355 /* CALLED FROM GENERATED CODE */
1356 void arm64g_dirtyhelper_SHA1SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1357 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1359 res->w64[1] = nLo;
1360 res->w64[0] = dHi;
1361 res->w64[1] ^= dHi ^ mHi;
1362 res->w64[0] ^= dLo ^ mLo;
1365 /* CALLED FROM GENERATED CODE */
1366 void arm64g_dirtyhelper_SHA1SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1367 ULong nHi, ULong nLo )
1369 /* This computes "T = Vd ^ (Vn >>u 32)" */
1370 V128 T; T.w64[1] = nHi; T.w64[0] = nLo;
1371 T.w32[0] = T.w32[1];
1372 T.w32[1] = T.w32[2];
1373 T.w32[2] = T.w32[3];
1374 T.w32[3] = 0;
1375 T.w64[1] ^= dHi;
1376 T.w64[0] ^= dLo;
1377 /* */
1378 res->w32[0] = ROL32(T.w32[0], 1);
1379 res->w32[1] = ROL32(T.w32[1], 1);
1380 res->w32[2] = ROL32(T.w32[2], 1);
1381 res->w32[3] = ROL32(T.w32[3], 1) ^ ROL32(T.w32[0], 2);
1384 /* CALLED FROM GENERATED CODE */
1385 void arm64g_dirtyhelper_SHA256H2 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1386 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1388 V128 X; X.w64[1] = nHi; X.w64[0] = nLo;
1389 V128 Y; Y.w64[1] = dHi; Y.w64[0] = dLo;
1390 V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1391 SHA256hash(&X, &Y, &W);
1392 res->w64[1] = Y.w64[1];
1393 res->w64[0] = Y.w64[0];
1396 /* CALLED FROM GENERATED CODE */
1397 void arm64g_dirtyhelper_SHA256H ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1398 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1400 V128 X; X.w64[1] = dHi; X.w64[0] = dLo;
1401 V128 Y; Y.w64[1] = nHi; Y.w64[0] = nLo;
1402 V128 W; W.w64[1] = mHi; W.w64[0] = mLo;
1403 SHA256hash(&X, &Y, &W);
1404 res->w64[1] = X.w64[1];
1405 res->w64[0] = X.w64[0];
1408 /* CALLED FROM GENERATED CODE */
1409 void arm64g_dirtyhelper_SHA256SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1410 ULong nHi, ULong nLo )
1413 res->w64[1] = res->w64[0] = 0;
1414 V128 op1; op1.w64[1] = dHi; op1.w64[0] = dLo;
1415 V128 op2; op2.w64[1] = nHi; op2.w64[0] = nLo;
1416 V128 T;
1417 T.w32[3] = op2.w32[0];
1418 T.w32[2] = op1.w32[3];
1419 T.w32[1] = op1.w32[2];
1420 T.w32[0] = op1.w32[1];
1421 UInt e;
1422 for (e = 0; e <= 3; e++) {
1423 UInt elt = T.w32[e];
1424 elt = ROR32(elt, 7) ^ ROR32(elt, 18) ^ (elt >> 3);
1425 res->w32[e] = elt + op1.w32[e];
1429 /* CALLED FROM GENERATED CODE */
1430 void arm64g_dirtyhelper_SHA256SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1431 ULong nHi, ULong nLo,
1432 ULong mHi, ULong mLo )
1434 res->w64[0] = res->w64[1] = 0;
1435 V128 op1; op1.w64[1] = dHi; op1.w64[0] = dLo;
1436 V128 op2; op2.w64[1] = nHi; op2.w64[0] = nLo;
1437 V128 op3; op3.w64[1] = mHi; op3.w64[0] = mLo;
1438 V128 T0;
1439 T0.w32[3] = op3.w32[0];
1440 T0.w32[2] = op2.w32[3];
1441 T0.w32[1] = op2.w32[2];
1442 T0.w32[0] = op2.w32[1];
1443 UInt T1[2];
1444 UInt e;
1445 T1[1] = op3.w32[3];
1446 T1[0] = op3.w32[2];
1447 for (e = 0; e <= 1; e++) {
1448 UInt elt = T1[e];
1449 elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ (elt >> 10);
1450 elt = elt + op1.w32[e] + T0.w32[e];
1451 res->w32[e] = elt;
1453 T1[1] = res->w32[1];
1454 T1[0] = res->w32[0];
1455 for (e = 2; e <= 3; e++) {
1456 UInt elt = T1[e-2];
1457 elt = ROR32(elt, 17) ^ ROR32(elt, 19) ^ (elt >> 10);
1458 elt = elt + op1.w32[e] + T0.w32[e];
1459 res->w32[e] = elt;
1463 /* CALLED FROM GENERATED CODE */
1464 void arm64g_dirtyhelper_SHA512H2 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1465 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1467 vassert(nHi == 0);
1468 ULong X = nLo;
1469 V128 Y; Y.w64[1] = mHi; Y.w64[0] = mLo;
1470 V128 W; W.w64[1] = dHi; W.w64[0] = dLo;
1471 ULong NSigma0 = ROR64(Y.w64[0], 28) ^ ROR64(Y.w64[0], 34)
1472 ^ ROR64(Y.w64[0], 39);
1473 res->w64[1] = (X & Y.w64[1]) ^ (X & Y.w64[0]) ^ (Y.w64[1] & Y.w64[0]);
1474 res->w64[1] += NSigma0 + W.w64[1];
1475 NSigma0 = ROR64(res->w64[1], 28) ^ ROR64(res->w64[1], 34)
1476 ^ ROR64(res->w64[1], 39);
1477 res->w64[0] = (res->w64[1] & Y.w64[0]) ^ (res->w64[1] & Y.w64[1])
1478 ^ (Y.w64[0] & Y.w64[1]);
1479 res->w64[0] += NSigma0 + W.w64[0];
1482 /* CALLED FROM GENERATED CODE */
1483 void arm64g_dirtyhelper_SHA512H ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1484 ULong nHi, ULong nLo, ULong mHi, ULong mLo )
1486 V128 X; X.w64[1] = nHi; X.w64[0] = nLo;
1487 V128 Y; Y.w64[1] = mHi; Y.w64[0] = mLo;
1488 V128 W; W.w64[1] = dHi; W.w64[0] = dLo;
1489 ULong MSigma1 = ROR64(Y.w64[1], 14) ^ ROR64(Y.w64[1], 18)
1490 ^ ROR64(Y.w64[1], 41);
1491 res->w64[1] = (Y.w64[1] & X.w64[0]) ^ (~Y.w64[1] & X.w64[1]);
1492 res->w64[1] += MSigma1 + W.w64[1];
1493 ULong tmp = res->w64[1] + Y.w64[0];
1494 MSigma1 = ROR64(tmp, 14) ^ ROR64(tmp, 18) ^ ROR64(tmp, 41);
1495 res->w64[0] = (tmp & Y.w64[1]) ^ (~tmp & X.w64[0]);
1496 res->w64[0] += MSigma1 + W.w64[0];
1499 /* CALLED FROM GENERATED CODE */
1500 void arm64g_dirtyhelper_SHA512SU0 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1501 ULong nHi, ULong nLo )
1504 vassert(nHi == 0);
1505 ULong X = nLo;
1506 V128 W; W.w64[1] = dHi; W.w64[0] = dLo;
1507 ULong sig0 = ROR64(W.w64[1], 1) ^ ROR64(W.w64[1], 8) ^ (W.w64[1] >> 7);
1508 res->w64[0] = W.w64[0] + sig0;
1509 sig0 = ROR64(X, 1) ^ ROR64(X, 8) ^ (X >> 7);
1510 res->w64[1] = W.w64[1] + sig0;
1513 /* CALLED FROM GENERATED CODE */
1514 void arm64g_dirtyhelper_SHA512SU1 ( /*OUT*/V128* res, ULong dHi, ULong dLo,
1515 ULong nHi, ULong nLo,
1516 ULong mHi, ULong mLo )
1518 V128 X; X.w64[1] = nHi; X.w64[0] = nLo;
1519 V128 Y; Y.w64[1] = mHi; Y.w64[0] = mLo;
1520 V128 W; W.w64[1] = dHi; W.w64[0] = dLo;
1521 ULong sig1 = ROR64(X.w64[1], 19) ^ ROR64(X.w64[1], 61) ^ (X.w64[1] >> 6);
1522 res->w64[1] = W.w64[1] + sig1 + Y.w64[1];
1523 sig1 = ROR64(X.w64[0], 19) ^ ROR64(X.w64[0], 61) ^ (X.w64[0] >> 6);
1524 res->w64[0] = W.w64[0] + sig1 + Y.w64[0];
1528 /*---------------------------------------------------------------*/
1529 /*--- Flag-helpers translation-time function specialisers. ---*/
1530 /*--- These help iropt specialise calls the above run-time ---*/
1531 /*--- flags functions. ---*/
1532 /*---------------------------------------------------------------*/
1534 /* Used by the optimiser to try specialisations. Returns an
1535 equivalent expression, or NULL if none. */
1537 static Bool isU64 ( IRExpr* e, ULong n )
1539 return
1540 toBool( e->tag == Iex_Const
1541 && e->Iex.Const.con->tag == Ico_U64
1542 && e->Iex.Const.con->Ico.U64 == n );
1545 IRExpr* guest_arm64_spechelper ( const HChar* function_name,
1546 IRExpr** args,
1547 IRStmt** precedingStmts,
1548 Int n_precedingStmts )
1550 # define unop(_op,_a1) IRExpr_Unop((_op),(_a1))
1551 # define binop(_op,_a1,_a2) IRExpr_Binop((_op),(_a1),(_a2))
1552 # define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
1553 # define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
1554 # define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
1556 Int i, arity = 0;
1557 for (i = 0; args[i]; i++)
1558 arity++;
1559 //ZZ # if 0
1560 //ZZ vex_printf("spec request:\n");
1561 //ZZ vex_printf(" %s ", function_name);
1562 //ZZ for (i = 0; i < arity; i++) {
1563 //ZZ vex_printf(" ");
1564 //ZZ ppIRExpr(args[i]);
1565 //ZZ }
1566 //ZZ vex_printf("\n");
1567 //ZZ # endif
1569 /* --------- specialising "arm64g_calculate_condition" --------- */
1571 if (vex_streq(function_name, "arm64g_calculate_condition")) {
1573 /* specialise calls to the "arm64g_calculate_condition" function.
1574 Not sure whether this is strictly necessary, but: the
1575 replacement IR must produce only the values 0 or 1. Bits
1576 63:1 are required to be zero. */
1577 IRExpr *cond_n_op, *cc_dep1, *cc_dep2 ; //, *cc_ndep;
1578 vassert(arity == 4);
1579 cond_n_op = args[0]; /* (ARM64Condcode << 4) | ARM64G_CC_OP_* */
1580 cc_dep1 = args[1];
1581 cc_dep2 = args[2];
1582 //cc_ndep = args[3];
1584 /*---------------- SUB64 ----------------*/
1586 /* 0, 1 */
1587 if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB64)) {
1588 /* EQ after SUB --> test argL == argR */
1589 return unop(Iop_1Uto64,
1590 binop(Iop_CmpEQ64, cc_dep1, cc_dep2));
1592 if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB64)) {
1593 /* NE after SUB --> test argL != argR */
1594 return unop(Iop_1Uto64,
1595 binop(Iop_CmpNE64, cc_dep1, cc_dep2));
1598 /* 2, 3 */
1599 if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB64)) {
1600 /* CS after SUB --> test argL >=u argR
1601 --> test argR <=u argL */
1602 return unop(Iop_1Uto64,
1603 binop(Iop_CmpLE64U, cc_dep2, cc_dep1));
1605 if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB64)) {
1606 /* CC after SUB --> test argL <u argR */
1607 return unop(Iop_1Uto64,
1608 binop(Iop_CmpLT64U, cc_dep1, cc_dep2));
1611 /* 8, 9 */
1612 if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB64)) {
1613 /* LS after SUB --> test argL <=u argR */
1614 return unop(Iop_1Uto64,
1615 binop(Iop_CmpLE64U, cc_dep1, cc_dep2));
1617 if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB64)) {
1618 /* HI after SUB --> test argL >u argR
1619 --> test argR <u argL */
1620 return unop(Iop_1Uto64,
1621 binop(Iop_CmpLT64U, cc_dep2, cc_dep1));
1624 /* 10, 11 */
1625 if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB64)) {
1626 /* LT after SUB --> test argL <s argR */
1627 return unop(Iop_1Uto64,
1628 binop(Iop_CmpLT64S, cc_dep1, cc_dep2));
1630 if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB64)) {
1631 /* GE after SUB --> test argL >=s argR
1632 --> test argR <=s argL */
1633 return unop(Iop_1Uto64,
1634 binop(Iop_CmpLE64S, cc_dep2, cc_dep1));
1637 /* 12, 13 */
1638 if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB64)) {
1639 /* GT after SUB --> test argL >s argR
1640 --> test argR <s argL */
1641 return unop(Iop_1Uto64,
1642 binop(Iop_CmpLT64S, cc_dep2, cc_dep1));
1644 if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB64)) {
1645 /* LE after SUB --> test argL <=s argR */
1646 return unop(Iop_1Uto64,
1647 binop(Iop_CmpLE64S, cc_dep1, cc_dep2));
1650 /*---------------- SUB32 ----------------*/
1652 /* 0, 1 */
1653 if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_SUB32)) {
1654 /* EQ after SUB --> test argL == argR */
1655 return unop(Iop_1Uto64,
1656 binop(Iop_CmpEQ32, unop(Iop_64to32, cc_dep1),
1657 unop(Iop_64to32, cc_dep2)));
1659 if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_SUB32)) {
1660 /* NE after SUB --> test argL != argR */
1661 return unop(Iop_1Uto64,
1662 binop(Iop_CmpNE32, unop(Iop_64to32, cc_dep1),
1663 unop(Iop_64to32, cc_dep2)));
1666 /* 2, 3 */
1667 if (isU64(cond_n_op, (ARM64CondCS << 4) | ARM64G_CC_OP_SUB32)) {
1668 /* CS after SUB --> test argL >=u argR
1669 --> test argR <=u argL */
1670 return unop(Iop_1Uto64,
1671 binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep2),
1672 unop(Iop_64to32, cc_dep1)));
1674 if (isU64(cond_n_op, (ARM64CondCC << 4) | ARM64G_CC_OP_SUB32)) {
1675 /* CC after SUB --> test argL <u argR */
1676 return unop(Iop_1Uto64,
1677 binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep1),
1678 unop(Iop_64to32, cc_dep2)));
1681 /* 8, 9 */
1682 if (isU64(cond_n_op, (ARM64CondLS << 4) | ARM64G_CC_OP_SUB32)) {
1683 /* LS after SUB --> test argL <=u argR */
1684 return unop(Iop_1Uto64,
1685 binop(Iop_CmpLE32U, unop(Iop_64to32, cc_dep1),
1686 unop(Iop_64to32, cc_dep2)));
1688 if (isU64(cond_n_op, (ARM64CondHI << 4) | ARM64G_CC_OP_SUB32)) {
1689 /* HI after SUB --> test argL >u argR
1690 --> test argR <u argL */
1691 return unop(Iop_1Uto64,
1692 binop(Iop_CmpLT32U, unop(Iop_64to32, cc_dep2),
1693 unop(Iop_64to32, cc_dep1)));
1696 /* 10, 11 */
1697 if (isU64(cond_n_op, (ARM64CondLT << 4) | ARM64G_CC_OP_SUB32)) {
1698 /* LT after SUB --> test argL <s argR */
1699 return unop(Iop_1Uto64,
1700 binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep1),
1701 unop(Iop_64to32, cc_dep2)));
1703 if (isU64(cond_n_op, (ARM64CondGE << 4) | ARM64G_CC_OP_SUB32)) {
1704 /* GE after SUB --> test argL >=s argR
1705 --> test argR <=s argL */
1706 return unop(Iop_1Uto64,
1707 binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep2),
1708 unop(Iop_64to32, cc_dep1)));
1711 /* 12, 13 */
1712 if (isU64(cond_n_op, (ARM64CondGT << 4) | ARM64G_CC_OP_SUB32)) {
1713 /* GT after SUB --> test argL >s argR
1714 --> test argR <s argL */
1715 return unop(Iop_1Uto64,
1716 binop(Iop_CmpLT32S, unop(Iop_64to32, cc_dep2),
1717 unop(Iop_64to32, cc_dep1)));
1719 if (isU64(cond_n_op, (ARM64CondLE << 4) | ARM64G_CC_OP_SUB32)) {
1720 /* LE after SUB --> test argL <=s argR */
1721 return unop(Iop_1Uto64,
1722 binop(Iop_CmpLE32S, unop(Iop_64to32, cc_dep1),
1723 unop(Iop_64to32, cc_dep2)));
1726 //ZZ /*---------------- SBB ----------------*/
1727 //ZZ
1728 //ZZ if (isU32(cond_n_op, (ARMCondHS << 4) | ARMG_CC_OP_SBB)) {
1729 //ZZ /* This seems to happen a lot in softfloat code, eg __divdf3+140 */
1730 //ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1731 //ZZ /* HS after SBB (same as C after SBB below)
1732 //ZZ --> oldC ? (argL >=u argR) : (argL >u argR)
1733 //ZZ --> oldC ? (argR <=u argL) : (argR <u argL)
1734 //ZZ */
1735 //ZZ return
1736 //ZZ IRExpr_ITE(
1737 //ZZ binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
1738 //ZZ /* case oldC != 0 */
1739 //ZZ unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
1740 //ZZ /* case oldC == 0 */
1741 //ZZ unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
1742 //ZZ );
1743 //ZZ }
1745 /*---------------- LOGIC32 ----------------*/
1747 if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_LOGIC32)) {
1748 /* EQ after LOGIC32 --> test res[31:0] == 0 */
1749 return unop(Iop_1Uto64,
1750 binop(Iop_CmpEQ32,
1751 unop(Iop_64to32, cc_dep1), mkU32(0)));
1753 if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_LOGIC32)) {
1754 /* NE after LOGIC32 --> test res[31:0] != 0 */
1755 return unop(Iop_1Uto64,
1756 binop(Iop_CmpNE32,
1757 unop(Iop_64to32, cc_dep1), mkU32(0)));
1760 /*---------------- LOGIC64 ----------------*/
1762 if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_LOGIC64)) {
1763 /* EQ after LOGIC64 --> test res[63:0] == 0 */
1764 return unop(Iop_1Uto64,
1765 binop(Iop_CmpEQ64, cc_dep1, mkU64(0)));
1767 if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_LOGIC64)) {
1768 /* NE after LOGIC64 --> test res[63:0] != 0 */
1769 return unop(Iop_1Uto64,
1770 binop(Iop_CmpNE64, cc_dep1, mkU64(0)));
1773 //ZZ if (isU32(cond_n_op, (ARMCondNE << 4) | ARMG_CC_OP_LOGIC)) {
1774 //ZZ /* NE after LOGIC --> test res != 0 */
1775 //ZZ return unop(Iop_1Uto32,
1776 //ZZ binop(Iop_CmpNE32, cc_dep1, mkU32(0)));
1777 //ZZ }
1778 //ZZ
1779 //ZZ if (isU32(cond_n_op, (ARMCondPL << 4) | ARMG_CC_OP_LOGIC)) {
1780 //ZZ /* PL after LOGIC --> test (res >> 31) == 0 */
1781 //ZZ return unop(Iop_1Uto32,
1782 //ZZ binop(Iop_CmpEQ32,
1783 //ZZ binop(Iop_Shr32, cc_dep1, mkU8(31)),
1784 //ZZ mkU32(0)));
1785 //ZZ }
1786 //ZZ if (isU32(cond_n_op, (ARMCondMI << 4) | ARMG_CC_OP_LOGIC)) {
1787 //ZZ /* MI after LOGIC --> test (res >> 31) == 1 */
1788 //ZZ return unop(Iop_1Uto32,
1789 //ZZ binop(Iop_CmpEQ32,
1790 //ZZ binop(Iop_Shr32, cc_dep1, mkU8(31)),
1791 //ZZ mkU32(1)));
1792 //ZZ }
1794 /*---------------- COPY ----------------*/
1796 if (isU64(cond_n_op, (ARM64CondEQ << 4) | ARM64G_CC_OP_COPY)) {
1797 /* EQ after COPY --> (cc_dep1 >> ARM64G_CC_SHIFT_Z) & 1 */
1798 return binop(Iop_And64,
1799 binop(Iop_Shr64, cc_dep1,
1800 mkU8(ARM64G_CC_SHIFT_Z)),
1801 mkU64(1));
1803 if (isU64(cond_n_op, (ARM64CondNE << 4) | ARM64G_CC_OP_COPY)) {
1804 /* NE after COPY --> ((cc_dep1 >> ARM64G_CC_SHIFT_Z) ^ 1) & 1 */
1805 return binop(Iop_And64,
1806 binop(Iop_Xor64,
1807 binop(Iop_Shr64, cc_dep1,
1808 mkU8(ARM64G_CC_SHIFT_Z)),
1809 mkU64(1)),
1810 mkU64(1));
1813 //ZZ /*----------------- AL -----------------*/
1814 //ZZ
1815 //ZZ /* A critically important case for Thumb code.
1816 //ZZ
1817 //ZZ What we're trying to spot is the case where cond_n_op is an
1818 //ZZ expression of the form Or32(..., 0xE0) since that means the
1819 //ZZ caller is asking for CondAL and we can simply return 1
1820 //ZZ without caring what the ... part is. This is a potentially
1821 //ZZ dodgy kludge in that it assumes that the ... part has zeroes
1822 //ZZ in bits 7:4, so that the result of the Or32 is guaranteed to
1823 //ZZ be 0xE in bits 7:4. Given that the places where this first
1824 //ZZ arg are constructed (in guest_arm_toIR.c) are very
1825 //ZZ constrained, we can get away with this. To make this
1826 //ZZ guaranteed safe would require to have a new primop, Slice44
1827 //ZZ or some such, thusly
1828 //ZZ
1829 //ZZ Slice44(arg1, arg2) = 0--(24)--0 arg1[7:4] arg2[3:0]
1830 //ZZ
1831 //ZZ and we would then look for Slice44(0xE0, ...)
1832 //ZZ which would give the required safety property.
1833 //ZZ
1834 //ZZ It would be infeasibly expensive to scan backwards through
1835 //ZZ the entire block looking for an assignment to the temp, so
1836 //ZZ just look at the previous 16 statements. That should find it
1837 //ZZ if it is an interesting case, as a result of how the
1838 //ZZ boilerplate guff at the start of each Thumb insn translation
1839 //ZZ is made.
1840 //ZZ */
1841 //ZZ if (cond_n_op->tag == Iex_RdTmp) {
1842 //ZZ Int j;
1843 //ZZ IRTemp look_for = cond_n_op->Iex.RdTmp.tmp;
1844 //ZZ Int limit = n_precedingStmts - 16;
1845 //ZZ if (limit < 0) limit = 0;
1846 //ZZ if (0) vex_printf("scanning %d .. %d\n", n_precedingStmts-1, limit);
1847 //ZZ for (j = n_precedingStmts - 1; j >= limit; j--) {
1848 //ZZ IRStmt* st = precedingStmts[j];
1849 //ZZ if (st->tag == Ist_WrTmp
1850 //ZZ && st->Ist.WrTmp.tmp == look_for
1851 //ZZ && st->Ist.WrTmp.data->tag == Iex_Binop
1852 //ZZ && st->Ist.WrTmp.data->Iex.Binop.op == Iop_Or32
1853 //ZZ && isU32(st->Ist.WrTmp.data->Iex.Binop.arg2, (ARMCondAL << 4)))
1854 //ZZ return mkU32(1);
1855 //ZZ }
1856 //ZZ /* Didn't find any useful binding to the first arg
1857 //ZZ in the previous 16 stmts. */
1858 //ZZ }
1861 //ZZ /* --------- specialising "armg_calculate_flag_c" --------- */
1862 //ZZ
1863 //ZZ else
1864 //ZZ if (vex_streq(function_name, "armg_calculate_flag_c")) {
1865 //ZZ
1866 //ZZ /* specialise calls to the "armg_calculate_flag_c" function.
1867 //ZZ Note that the returned value must be either 0 or 1; nonzero
1868 //ZZ bits 31:1 are not allowed. In turn, incoming oldV and oldC
1869 //ZZ values (from the thunk) are assumed to have bits 31:1
1870 //ZZ clear. */
1871 //ZZ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1872 //ZZ vassert(arity == 4);
1873 //ZZ cc_op = args[0]; /* ARMG_CC_OP_* */
1874 //ZZ cc_dep1 = args[1];
1875 //ZZ cc_dep2 = args[2];
1876 //ZZ cc_ndep = args[3];
1877 //ZZ
1878 //ZZ if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
1879 //ZZ /* Thunk args are (result, shco, oldV) */
1880 //ZZ /* C after LOGIC --> shco */
1881 //ZZ return cc_dep2;
1882 //ZZ }
1883 //ZZ
1884 //ZZ if (isU32(cc_op, ARMG_CC_OP_SUB)) {
1885 //ZZ /* Thunk args are (argL, argR, unused) */
1886 //ZZ /* C after SUB --> argL >=u argR
1887 //ZZ --> argR <=u argL */
1888 //ZZ return unop(Iop_1Uto32,
1889 //ZZ binop(Iop_CmpLE32U, cc_dep2, cc_dep1));
1890 //ZZ }
1891 //ZZ
1892 //ZZ if (isU32(cc_op, ARMG_CC_OP_SBB)) {
1893 //ZZ /* This happens occasionally in softfloat code, eg __divdf3+140 */
1894 //ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1895 //ZZ /* C after SBB (same as HS after SBB above)
1896 //ZZ --> oldC ? (argL >=u argR) : (argL >u argR)
1897 //ZZ --> oldC ? (argR <=u argL) : (argR <u argL)
1898 //ZZ */
1899 //ZZ return
1900 //ZZ IRExpr_ITE(
1901 //ZZ binop(Iop_CmpNE32, cc_ndep, mkU32(0)),
1902 //ZZ /* case oldC != 0 */
1903 //ZZ unop(Iop_1Uto32, binop(Iop_CmpLE32U, cc_dep2, cc_dep1)),
1904 //ZZ /* case oldC == 0 */
1905 //ZZ unop(Iop_1Uto32, binop(Iop_CmpLT32U, cc_dep2, cc_dep1))
1906 //ZZ );
1907 //ZZ }
1908 //ZZ
1909 //ZZ }
1910 //ZZ
1911 //ZZ /* --------- specialising "armg_calculate_flag_v" --------- */
1912 //ZZ
1913 //ZZ else
1914 //ZZ if (vex_streq(function_name, "armg_calculate_flag_v")) {
1915 //ZZ
1916 //ZZ /* specialise calls to the "armg_calculate_flag_v" function.
1917 //ZZ Note that the returned value must be either 0 or 1; nonzero
1918 //ZZ bits 31:1 are not allowed. In turn, incoming oldV and oldC
1919 //ZZ values (from the thunk) are assumed to have bits 31:1
1920 //ZZ clear. */
1921 //ZZ IRExpr *cc_op, *cc_dep1, *cc_dep2, *cc_ndep;
1922 //ZZ vassert(arity == 4);
1923 //ZZ cc_op = args[0]; /* ARMG_CC_OP_* */
1924 //ZZ cc_dep1 = args[1];
1925 //ZZ cc_dep2 = args[2];
1926 //ZZ cc_ndep = args[3];
1927 //ZZ
1928 //ZZ if (isU32(cc_op, ARMG_CC_OP_LOGIC)) {
1929 //ZZ /* Thunk args are (result, shco, oldV) */
1930 //ZZ /* V after LOGIC --> oldV */
1931 //ZZ return cc_ndep;
1932 //ZZ }
1933 //ZZ
1934 //ZZ if (isU32(cc_op, ARMG_CC_OP_SUB)) {
1935 //ZZ /* Thunk args are (argL, argR, unused) */
1936 //ZZ /* V after SUB
1937 //ZZ --> let res = argL - argR
1938 //ZZ in ((argL ^ argR) & (argL ^ res)) >> 31
1939 //ZZ --> ((argL ^ argR) & (argL ^ (argL - argR))) >> 31
1940 //ZZ */
1941 //ZZ IRExpr* argL = cc_dep1;
1942 //ZZ IRExpr* argR = cc_dep2;
1943 //ZZ return
1944 //ZZ binop(Iop_Shr32,
1945 //ZZ binop(Iop_And32,
1946 //ZZ binop(Iop_Xor32, argL, argR),
1947 //ZZ binop(Iop_Xor32, argL, binop(Iop_Sub32, argL, argR))
1948 //ZZ ),
1949 //ZZ mkU8(31)
1950 //ZZ );
1951 //ZZ }
1952 //ZZ
1953 //ZZ if (isU32(cc_op, ARMG_CC_OP_SBB)) {
1954 //ZZ /* This happens occasionally in softfloat code, eg __divdf3+140 */
1955 //ZZ /* thunk is: (dep1=argL, dep2=argR, ndep=oldC) */
1956 //ZZ /* V after SBB
1957 //ZZ --> let res = argL - argR - (oldC ^ 1)
1958 //ZZ in (argL ^ argR) & (argL ^ res) & 1
1959 //ZZ */
1960 //ZZ return
1961 //ZZ binop(
1962 //ZZ Iop_And32,
1963 //ZZ binop(
1964 //ZZ Iop_And32,
1965 //ZZ // argL ^ argR
1966 //ZZ binop(Iop_Xor32, cc_dep1, cc_dep2),
1967 //ZZ // argL ^ (argL - argR - (oldC ^ 1))
1968 //ZZ binop(Iop_Xor32,
1969 //ZZ cc_dep1,
1970 //ZZ binop(Iop_Sub32,
1971 //ZZ binop(Iop_Sub32, cc_dep1, cc_dep2),
1972 //ZZ binop(Iop_Xor32, cc_ndep, mkU32(1)))
1973 //ZZ )
1974 //ZZ ),
1975 //ZZ mkU32(1)
1976 //ZZ );
1977 //ZZ }
1978 //ZZ
1979 //ZZ }
1981 # undef unop
1982 # undef binop
1983 # undef mkU64
1984 # undef mkU8
1986 return NULL;
1990 /*----------------------------------------------*/
1991 /*--- The exported fns .. ---*/
1992 /*----------------------------------------------*/
1994 //ZZ /* VISIBLE TO LIBVEX CLIENT */
1995 //ZZ #if 0
1996 //ZZ void LibVEX_GuestARM_put_flags ( UInt flags_native,
1997 //ZZ /*OUT*/VexGuestARMState* vex_state )
1998 //ZZ {
1999 //ZZ vassert(0); // FIXME
2000 //ZZ
2001 //ZZ /* Mask out everything except N Z V C. */
2002 //ZZ flags_native
2003 //ZZ &= (ARMG_CC_MASK_N | ARMG_CC_MASK_Z | ARMG_CC_MASK_V | ARMG_CC_MASK_C);
2004 //ZZ
2005 //ZZ vex_state->guest_CC_OP = ARMG_CC_OP_COPY;
2006 //ZZ vex_state->guest_CC_DEP1 = flags_native;
2007 //ZZ vex_state->guest_CC_DEP2 = 0;
2008 //ZZ vex_state->guest_CC_NDEP = 0;
2009 //ZZ }
2010 //ZZ #endif
2012 /* negative zero carry o-v-erflow flags */
2013 /* VISIBLE TO LIBVEX CLIENT */
2014 ULong LibVEX_GuestARM64_get_nzcv ( /*IN*/const VexGuestARM64State* vex_state )
2016 ULong nzcv = 0;
2017 // NZCV
2018 nzcv |= arm64g_calculate_flags_nzcv(
2019 vex_state->guest_CC_OP,
2020 vex_state->guest_CC_DEP1,
2021 vex_state->guest_CC_DEP2,
2022 vex_state->guest_CC_NDEP
2024 vassert(0 == (nzcv & 0xFFFFFFFF0FFFFFFFULL));
2025 //ZZ // Q
2026 //ZZ if (vex_state->guest_QFLAG32 > 0)
2027 //ZZ cpsr |= (1 << 27);
2028 //ZZ // GE
2029 //ZZ if (vex_state->guest_GEFLAG0 > 0)
2030 //ZZ cpsr |= (1 << 16);
2031 //ZZ if (vex_state->guest_GEFLAG1 > 0)
2032 //ZZ cpsr |= (1 << 17);
2033 //ZZ if (vex_state->guest_GEFLAG2 > 0)
2034 //ZZ cpsr |= (1 << 18);
2035 //ZZ if (vex_state->guest_GEFLAG3 > 0)
2036 //ZZ cpsr |= (1 << 19);
2037 //ZZ // M
2038 //ZZ cpsr |= (1 << 4); // 0b10000 means user-mode
2039 //ZZ // J,T J (bit 24) is zero by initialisation above
2040 //ZZ // T we copy from R15T[0]
2041 //ZZ if (vex_state->guest_R15T & 1)
2042 //ZZ cpsr |= (1 << 5);
2043 //ZZ // ITSTATE we punt on for the time being. Could compute it
2044 //ZZ // if needed though.
2045 //ZZ // E, endianness, 0 (littleendian) from initialisation above
2046 //ZZ // A,I,F disable some async exceptions. Not sure about these.
2047 //ZZ // Leave as zero for the time being.
2048 return nzcv;
2051 /* floating point status resgister */
2052 /* VISIBLE TO LIBVEX CLIENT */
2053 ULong LibVEX_GuestARM64_get_fpsr ( const VexGuestARM64State* vex_state )
2055 UInt w32 = vex_state->guest_QCFLAG[0] | vex_state->guest_QCFLAG[1]
2056 | vex_state->guest_QCFLAG[2] | vex_state->guest_QCFLAG[3];
2057 ULong fpsr = 0;
2058 // QC
2059 if (w32 != 0)
2060 fpsr |= (1 << 27);
2061 return fpsr;
2064 void LibVEX_GuestARM64_set_fpsr ( /*MOD*/VexGuestARM64State* vex_state,
2065 ULong fpsr )
2067 // QC
2068 vex_state->guest_QCFLAG[0] = (UInt)((fpsr >> 27) & 1);
2069 vex_state->guest_QCFLAG[1] = 0;
2070 vex_state->guest_QCFLAG[2] = 0;
2071 vex_state->guest_QCFLAG[3] = 0;
2074 /* VISIBLE TO LIBVEX CLIENT */
2075 void LibVEX_GuestARM64_initialise ( /*OUT*/VexGuestARM64State* vex_state )
2077 vex_bzero(vex_state, sizeof(*vex_state));
2078 //ZZ vex_state->host_EvC_FAILADDR = 0;
2079 //ZZ vex_state->host_EvC_COUNTER = 0;
2080 //ZZ
2081 //ZZ vex_state->guest_R0 = 0;
2082 //ZZ vex_state->guest_R1 = 0;
2083 //ZZ vex_state->guest_R2 = 0;
2084 //ZZ vex_state->guest_R3 = 0;
2085 //ZZ vex_state->guest_R4 = 0;
2086 //ZZ vex_state->guest_R5 = 0;
2087 //ZZ vex_state->guest_R6 = 0;
2088 //ZZ vex_state->guest_R7 = 0;
2089 //ZZ vex_state->guest_R8 = 0;
2090 //ZZ vex_state->guest_R9 = 0;
2091 //ZZ vex_state->guest_R10 = 0;
2092 //ZZ vex_state->guest_R11 = 0;
2093 //ZZ vex_state->guest_R12 = 0;
2094 //ZZ vex_state->guest_R13 = 0;
2095 //ZZ vex_state->guest_R14 = 0;
2096 //ZZ vex_state->guest_R15T = 0; /* NB: implies ARM mode */
2097 //ZZ
2098 vex_state->guest_CC_OP = ARM64G_CC_OP_COPY;
2099 //ZZ vex_state->guest_CC_DEP1 = 0;
2100 //ZZ vex_state->guest_CC_DEP2 = 0;
2101 //ZZ vex_state->guest_CC_NDEP = 0;
2102 //ZZ vex_state->guest_QFLAG32 = 0;
2103 //ZZ vex_state->guest_GEFLAG0 = 0;
2104 //ZZ vex_state->guest_GEFLAG1 = 0;
2105 //ZZ vex_state->guest_GEFLAG2 = 0;
2106 //ZZ vex_state->guest_GEFLAG3 = 0;
2107 //ZZ
2108 //ZZ vex_state->guest_EMNOTE = EmNote_NONE;
2109 //ZZ vex_state->guest_CMSTART = 0;
2110 //ZZ vex_state->guest_CMLEN = 0;
2111 //ZZ vex_state->guest_NRADDR = 0;
2112 //ZZ vex_state->guest_IP_AT_SYSCALL = 0;
2113 //ZZ
2114 //ZZ vex_state->guest_D0 = 0;
2115 //ZZ vex_state->guest_D1 = 0;
2116 //ZZ vex_state->guest_D2 = 0;
2117 //ZZ vex_state->guest_D3 = 0;
2118 //ZZ vex_state->guest_D4 = 0;
2119 //ZZ vex_state->guest_D5 = 0;
2120 //ZZ vex_state->guest_D6 = 0;
2121 //ZZ vex_state->guest_D7 = 0;
2122 //ZZ vex_state->guest_D8 = 0;
2123 //ZZ vex_state->guest_D9 = 0;
2124 //ZZ vex_state->guest_D10 = 0;
2125 //ZZ vex_state->guest_D11 = 0;
2126 //ZZ vex_state->guest_D12 = 0;
2127 //ZZ vex_state->guest_D13 = 0;
2128 //ZZ vex_state->guest_D14 = 0;
2129 //ZZ vex_state->guest_D15 = 0;
2130 //ZZ vex_state->guest_D16 = 0;
2131 //ZZ vex_state->guest_D17 = 0;
2132 //ZZ vex_state->guest_D18 = 0;
2133 //ZZ vex_state->guest_D19 = 0;
2134 //ZZ vex_state->guest_D20 = 0;
2135 //ZZ vex_state->guest_D21 = 0;
2136 //ZZ vex_state->guest_D22 = 0;
2137 //ZZ vex_state->guest_D23 = 0;
2138 //ZZ vex_state->guest_D24 = 0;
2139 //ZZ vex_state->guest_D25 = 0;
2140 //ZZ vex_state->guest_D26 = 0;
2141 //ZZ vex_state->guest_D27 = 0;
2142 //ZZ vex_state->guest_D28 = 0;
2143 //ZZ vex_state->guest_D29 = 0;
2144 //ZZ vex_state->guest_D30 = 0;
2145 //ZZ vex_state->guest_D31 = 0;
2146 //ZZ
2147 //ZZ /* ARM encoded; zero is the default as it happens (result flags
2148 //ZZ (NZCV) cleared, FZ disabled, round to nearest, non-vector mode,
2149 //ZZ all exns masked, all exn sticky bits cleared). */
2150 //ZZ vex_state->guest_FPSCR = 0;
2151 //ZZ
2152 //ZZ vex_state->guest_TPIDRURO = 0;
2153 //ZZ
2154 //ZZ /* Not in a Thumb IT block. */
2155 //ZZ vex_state->guest_ITSTATE = 0;
2156 //ZZ
2157 //ZZ vex_state->padding1 = 0;
2158 //ZZ vex_state->padding2 = 0;
2159 //ZZ vex_state->padding3 = 0;
2160 //ZZ vex_state->padding4 = 0;
2161 //ZZ vex_state->padding5 = 0;
2165 /*-----------------------------------------------------------*/
2166 /*--- Describing the arm guest state, for the benefit ---*/
2167 /*--- of iropt and instrumenters. ---*/
2168 /*-----------------------------------------------------------*/
2170 /* Figure out if any part of the guest state contained in minoff
2171 .. maxoff requires precise memory exceptions. If in doubt return
2172 True (but this generates significantly slower code).
2174 We enforce precise exns for guest SP, PC, 29(FP), 30(LR).
2175 That might be overkill (for 29 and 30); I don't know.
2177 Bool guest_arm64_state_requires_precise_mem_exns (
2178 Int minoff, Int maxoff, VexRegisterUpdates pxControl
2181 Int xsp_min = offsetof(VexGuestARM64State, guest_XSP);
2182 Int xsp_max = xsp_min + 8 - 1;
2183 Int pc_min = offsetof(VexGuestARM64State, guest_PC);
2184 Int pc_max = pc_min + 8 - 1;
2186 if (maxoff < xsp_min || minoff > xsp_max) {
2187 /* no overlap with xsp */
2188 if (pxControl == VexRegUpdSpAtMemAccess)
2189 return False; // We only need to check stack pointer.
2190 } else {
2191 return True;
2194 if (maxoff < pc_min || minoff > pc_max) {
2195 /* no overlap with pc */
2196 } else {
2197 return True;
2200 /* Guessing that we need PX for FP, but I don't really know. */
2201 Int x29_min = offsetof(VexGuestARM64State, guest_X29);
2202 Int x29_max = x29_min + 8 - 1;
2204 if (maxoff < x29_min || minoff > x29_max) {
2205 /* no overlap with x29 */
2206 } else {
2207 return True;
2210 /* Guessing that we need PX for LR, but I don't really know. */
2211 Int x30_min = offsetof(VexGuestARM64State, guest_X30);
2212 Int x30_max = x30_min + 8 - 1;
2214 if (maxoff < x30_min || minoff > x30_max) {
2215 /* no overlap with r30 */
2216 } else {
2217 return True;
2220 return False;
2224 #define ALWAYSDEFD(field) \
2225 { offsetof(VexGuestARM64State, field), \
2226 (sizeof ((VexGuestARM64State*)0)->field) }
2227 VexGuestLayout
2228 arm64Guest_layout
2229 = {
2230 /* Total size of the guest state, in bytes. */
2231 .total_sizeB = sizeof(VexGuestARM64State),
2233 /* Describe the stack pointer. */
2234 .offset_SP = offsetof(VexGuestARM64State,guest_XSP),
2235 .sizeof_SP = 8,
2237 /* Describe the instruction pointer. */
2238 .offset_IP = offsetof(VexGuestARM64State,guest_PC),
2239 .sizeof_IP = 8,
2241 /* Describe any sections to be regarded by Memcheck as
2242 'always-defined'. */
2243 .n_alwaysDefd = 9,
2245 /* flags thunk: OP is always defd, whereas DEP1 and DEP2
2246 have to be tracked. See detailed comment in gdefs.h on
2247 meaning of thunk fields. */
2248 .alwaysDefd
2249 = { /* 0 */ ALWAYSDEFD(guest_PC),
2250 /* 1 */ ALWAYSDEFD(guest_CC_OP),
2251 /* 2 */ ALWAYSDEFD(guest_CC_NDEP),
2252 /* 3 */ ALWAYSDEFD(guest_EMNOTE),
2253 /* 4 */ ALWAYSDEFD(guest_CMSTART),
2254 /* 5 */ ALWAYSDEFD(guest_CMLEN),
2255 /* 6 */ ALWAYSDEFD(guest_NRADDR),
2256 /* 7 */ ALWAYSDEFD(guest_IP_AT_SYSCALL),
2257 /* 8 */ ALWAYSDEFD(guest_TPIDR_EL0)
2262 /*---------------------------------------------------------------*/
2263 /*--- end guest_arm64_helpers.c ---*/
2264 /*---------------------------------------------------------------*/