Fix s390x_dirtyhelper_vec_op signature for non-s390x case.
[valgrind.git] / memcheck / mc_translate.c
blob68a2ab3bb6d4eb680ec90e130361a52e94b3e66b
2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations. ---*/
4 /*--- mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of MemCheck, a heavyweight Valgrind tool for
9 detecting memory errors.
11 Copyright (C) 2000-2017 Julian Seward
12 jseward@acm.org
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_poolalloc.h" // For mc_include.h
34 #include "pub_tool_hashtable.h" // For mc_include.h
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_tooliface.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_xarray.h"
40 #include "pub_tool_mallocfree.h"
41 #include "pub_tool_libcbase.h"
43 #include "mc_include.h"
46 /* FIXMEs JRS 2011-June-16.
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
52 Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional
53 saturating shifts): the interpretation is overly pessimistic.
54 See comments on the relevant cases below for details.
56 Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts,
57 both rounding and non-rounding variants): ditto
60 /* This file implements the Memcheck instrumentation, and in
61 particular contains the core of its undefined value detection
62 machinery. For a comprehensive background of the terminology,
63 algorithms and rationale used herein, read:
65 Using Valgrind to detect undefined value errors with
66 bit-precision
68 Julian Seward and Nicholas Nethercote
70 2005 USENIX Annual Technical Conference (General Track),
71 Anaheim, CA, USA, April 10-15, 2005.
73 ----
75 Here is as good a place as any to record exactly when V bits are and
76 should be checked, why, and what function is responsible.
79 Memcheck complains when an undefined value is used:
81 1. In the condition of a conditional branch. Because it could cause
82 incorrect control flow, and thus cause incorrect externally-visible
83 behaviour. [mc_translate.c:complainIfUndefined]
85 2. As an argument to a system call, or as the value that specifies
86 the system call number. Because it could cause an incorrect
87 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
89 3. As the address in a load or store. Because it could cause an
90 incorrect value to be used later, which could cause externally-visible
91 behaviour (eg. via incorrect control flow or an incorrect system call
92 argument) [complainIfUndefined]
94 4. As the target address of a branch. Because it could cause incorrect
95 control flow. [complainIfUndefined]
97 5. As an argument to setenv, unsetenv, or putenv. Because it could put
98 an incorrect value into the external environment.
99 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
101 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
102 [complainIfUndefined]
104 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
105 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
106 requested it. [in memcheck.h]
109 Memcheck also complains, but should not, when an undefined value is used:
111 8. As the shift value in certain SIMD shift operations (but not in the
112 standard integer shift operations). This inconsistency is due to
113 historical reasons.) [complainIfUndefined]
116 Memcheck does not complain, but should, when an undefined value is used:
118 9. As an input to a client request. Because the client request may
119 affect the visible behaviour -- see bug #144362 for an example
120 involving the malloc replacements in vg_replace_malloc.c and
121 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
122 isn't identified. That bug report also has some info on how to solve
123 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
126 In practice, 1 and 2 account for the vast majority of cases.
129 /* Generation of addr-definedness, addr-validity and
130 guard-definedness checks pertaining to loads and stores (Iex_Load,
131 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
132 loads/stores) was re-checked 11 May 2013. */
135 /*------------------------------------------------------------*/
136 /*--- Forward decls ---*/
137 /*------------------------------------------------------------*/
139 struct _MCEnv;
141 // See below for comments explaining what this is for.
142 typedef
143 enum __attribute__((packed)) { HuUnU=0, HuPCa=1, HuOth=2 }
144 HowUsed;
146 static IRType shadowTypeV ( IRType ty );
147 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e,
148 HowUsed hu/*use HuOth if unknown*/ );
149 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
151 static IRExpr *i128_const_zero(void);
154 /*------------------------------------------------------------*/
155 /*--- Memcheck running state, and tmp management. ---*/
156 /*------------------------------------------------------------*/
158 /* For a few (maybe 1%) IROps, we have both a cheaper, less exact vbit
159 propagation scheme, and a more expensive, more precise vbit propagation
160 scheme. This enum describes, for such an IROp, which scheme to use. */
161 typedef
162 enum {
163 // Use the cheaper, less-exact variant.
164 DLcheap=4,
165 // Choose between cheap and expensive based on analysis of the block
166 // to be instrumented. Note that the choice may be done on a
167 // per-instance basis of the IROp that this DetailLevel describes.
168 DLauto,
169 // Use the more expensive, more-exact variant.
170 DLexpensive
172 DetailLevel;
175 /* A readonly part of the running state. For IROps that have both a
176 less-exact and more-exact interpretation, records which interpretation is
177 to be used. */
178 typedef
179 struct {
180 // For Add32/64 and Sub32/64, all 3 settings are allowed. For the
181 // DLauto case, a per-instance decision is to be made by inspecting
182 // the associated tmp's entry in MCEnv.tmpHowUsed.
183 DetailLevel dl_Add32;
184 DetailLevel dl_Add64;
185 DetailLevel dl_Sub32;
186 DetailLevel dl_Sub64;
187 // For Cmp{EQ,NE}{64,32,16,8}, only DLcheap and DLexpensive are
188 // allowed.
189 DetailLevel dl_CmpEQ64_CmpNE64;
190 DetailLevel dl_CmpEQ32_CmpNE32;
191 DetailLevel dl_CmpEQ16_CmpNE16;
192 DetailLevel dl_CmpEQ8_CmpNE8;
194 DetailLevelByOp;
196 static void DetailLevelByOp__set_all ( /*OUT*/DetailLevelByOp* dlbo,
197 DetailLevel dl )
199 dlbo->dl_Add32 = dl;
200 dlbo->dl_Add64 = dl;
201 dlbo->dl_Sub32 = dl;
202 dlbo->dl_Sub64 = dl;
203 dlbo->dl_CmpEQ64_CmpNE64 = dl;
204 dlbo->dl_CmpEQ32_CmpNE32 = dl;
205 dlbo->dl_CmpEQ16_CmpNE16 = dl;
206 dlbo->dl_CmpEQ8_CmpNE8 = dl;
209 static void DetailLevelByOp__check_sanity ( const DetailLevelByOp* dlbo )
211 tl_assert(dlbo->dl_Add32 >= DLcheap && dlbo->dl_Add32 <= DLexpensive);
212 tl_assert(dlbo->dl_Add64 >= DLcheap && dlbo->dl_Add64 <= DLexpensive);
213 tl_assert(dlbo->dl_Sub32 >= DLcheap && dlbo->dl_Sub32 <= DLexpensive);
214 tl_assert(dlbo->dl_Sub64 >= DLcheap && dlbo->dl_Sub64 <= DLexpensive);
215 tl_assert(dlbo->dl_CmpEQ64_CmpNE64 == DLcheap
216 || dlbo->dl_CmpEQ64_CmpNE64 == DLexpensive);
217 tl_assert(dlbo->dl_CmpEQ32_CmpNE32 == DLcheap
218 || dlbo->dl_CmpEQ32_CmpNE32 == DLexpensive);
219 tl_assert(dlbo->dl_CmpEQ16_CmpNE16 == DLcheap
220 || dlbo->dl_CmpEQ16_CmpNE16 == DLexpensive);
221 tl_assert(dlbo->dl_CmpEQ8_CmpNE8 == DLcheap
222 || dlbo->dl_CmpEQ8_CmpNE8 == DLexpensive);
225 static UInt DetailLevelByOp__count ( const DetailLevelByOp* dlbo,
226 DetailLevel dl )
228 UInt n = 0;
229 n += (dlbo->dl_Add32 == dl ? 1 : 0);
230 n += (dlbo->dl_Add64 == dl ? 1 : 0);
231 n += (dlbo->dl_Sub32 == dl ? 1 : 0);
232 n += (dlbo->dl_Sub64 == dl ? 1 : 0);
233 n += (dlbo->dl_CmpEQ64_CmpNE64 == dl ? 1 : 0);
234 n += (dlbo->dl_CmpEQ32_CmpNE32 == dl ? 1 : 0);
235 n += (dlbo->dl_CmpEQ16_CmpNE16 == dl ? 1 : 0);
236 n += (dlbo->dl_CmpEQ8_CmpNE8 == dl ? 1 : 0);
237 return n;
241 /* Carries info about a particular tmp. The tmp's number is not
242 recorded, as this is implied by (equal to) its index in the tmpMap
243 in MCEnv. The tmp's type is also not recorded, as this is present
244 in MCEnv.sb->tyenv.
246 When .kind is Orig, .shadowV and .shadowB may give the identities
247 of the temps currently holding the associated definedness (shadowV)
248 and origin (shadowB) values, or these may be IRTemp_INVALID if code
249 to compute such values has not yet been emitted.
251 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
252 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
253 illogical for a shadow tmp itself to be shadowed.
255 typedef
256 enum { Orig=1, VSh=2, BSh=3 }
257 TempKind;
259 typedef
260 struct {
261 TempKind kind;
262 IRTemp shadowV;
263 IRTemp shadowB;
265 TempMapEnt;
268 /* A |HowUsed| value carries analysis results about how values are used,
269 pertaining to whether we need to instrument integer adds expensively or
270 not. The running state carries a (readonly) mapping from original tmp to
271 a HowUsed value for it. A usage value can be one of three values,
272 forming a 3-point chain lattice.
274 HuOth ("Other") used in some arbitrary way
276 HuPCa ("PCast") used *only* in effectively a PCast, in which all
277 | we care about is the all-defined vs not-all-defined distinction
279 HuUnU ("Unused") not used at all.
281 The "safe" (don't-know) end of the lattice is "HuOth". See comments
282 below in |preInstrumentationAnalysis| for further details.
284 /* DECLARED ABOVE:
285 typedef
286 enum __attribute__((packed)) { HuUnU=0, HuPCa=1, HuOth=2 }
287 HowUsed;
290 // Not actually necessary, but we don't want to waste D1 space.
291 STATIC_ASSERT(sizeof(HowUsed) == 1);
294 /* Carries around state during memcheck instrumentation. */
295 typedef
296 struct _MCEnv {
297 /* MODIFIED: the superblock being constructed. IRStmts are
298 added. */
299 IRSB* sb;
300 Bool trace;
302 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
303 current kind and possibly shadow temps for each temp in the
304 IRSB being constructed. Note that it does not contain the
305 type of each tmp. If you want to know the type, look at the
306 relevant entry in sb->tyenv. It follows that at all times
307 during the instrumentation process, the valid indices for
308 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
309 total number of Orig, V- and B- temps allocated so far.
311 The reason for this strange split (types in one place, all
312 other info in another) is that we need the types to be
313 attached to sb so as to make it possible to do
314 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
315 instrumentation process. */
316 XArray* /* of TempMapEnt */ tmpMap;
318 /* READONLY: contains details of which ops should be expensively
319 instrumented. */
320 DetailLevelByOp dlbo;
322 /* READONLY: for each original tmp, how the tmp is used. This is
323 computed by |preInstrumentationAnalysis|. Valid indices are
324 0 .. #temps_in_sb-1 (same as for tmpMap). */
325 HowUsed* tmpHowUsed;
327 /* READONLY: the guest layout. This indicates which parts of
328 the guest state should be regarded as 'always defined'. */
329 const VexGuestLayout* layout;
331 /* READONLY: the host word type. Needed for constructing
332 arguments of type 'HWord' to be passed to helper functions.
333 Ity_I32 or Ity_I64 only. */
334 IRType hWordTy;
336 MCEnv;
339 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
340 demand), as they are encountered. This is for two reasons.
342 (1) (less important reason): Many original tmps are unused due to
343 initial IR optimisation, and we do not want to spaces in tables
344 tracking them.
346 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
347 table indexed [0 .. n_types-1], which gives the current shadow for
348 each original tmp, or INVALID_IRTEMP if none is so far assigned.
349 It is necessary to support making multiple assignments to a shadow
350 -- specifically, after testing a shadow for definedness, it needs
351 to be made defined. But IR's SSA property disallows this.
353 (2) (more important reason): Therefore, when a shadow needs to get
354 a new value, a new temporary is created, the value is assigned to
355 that, and the tmpMap is updated to reflect the new binding.
357 A corollary is that if the tmpMap maps a given tmp to
358 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
359 there's a read-before-write error in the original tmps. The IR
360 sanity checker should catch all such anomalies, however.
363 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
364 both the table in mce->sb and to our auxiliary mapping. Note that
365 newTemp may cause mce->tmpMap to resize, hence previous results
366 from VG_(indexXA)(mce->tmpMap) are invalidated. */
367 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
369 Word newIx;
370 TempMapEnt ent;
371 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
372 ent.kind = kind;
373 ent.shadowV = IRTemp_INVALID;
374 ent.shadowB = IRTemp_INVALID;
375 newIx = VG_(addToXA)( mce->tmpMap, &ent );
376 tl_assert(newIx == (Word)tmp);
377 return tmp;
381 /* Find the tmp currently shadowing the given original tmp. If none
382 so far exists, allocate one. */
383 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
385 TempMapEnt* ent;
386 /* VG_(indexXA) range-checks 'orig', hence no need to check
387 here. */
388 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
389 tl_assert(ent->kind == Orig);
390 if (ent->shadowV == IRTemp_INVALID) {
391 IRTemp tmpV
392 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
393 /* newTemp may cause mce->tmpMap to resize, hence previous results
394 from VG_(indexXA) are invalid. */
395 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
396 tl_assert(ent->kind == Orig);
397 tl_assert(ent->shadowV == IRTemp_INVALID);
398 ent->shadowV = tmpV;
400 return ent->shadowV;
403 /* Allocate a new shadow for the given original tmp. This means any
404 previous shadow is abandoned. This is needed because it is
405 necessary to give a new value to a shadow once it has been tested
406 for undefinedness, but unfortunately IR's SSA property disallows
407 this. Instead we must abandon the old shadow, allocate a new one
408 and use that instead.
410 This is the same as findShadowTmpV, except we don't bother to see
411 if a shadow temp already existed -- we simply allocate a new one
412 regardless. */
413 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
415 TempMapEnt* ent;
416 /* VG_(indexXA) range-checks 'orig', hence no need to check
417 here. */
418 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
419 tl_assert(ent->kind == Orig);
420 if (1) {
421 IRTemp tmpV
422 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
423 /* newTemp may cause mce->tmpMap to resize, hence previous results
424 from VG_(indexXA) are invalid. */
425 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
426 tl_assert(ent->kind == Orig);
427 ent->shadowV = tmpV;
432 /*------------------------------------------------------------*/
433 /*--- IRAtoms -- a subset of IRExprs ---*/
434 /*------------------------------------------------------------*/
436 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
437 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
438 input, most of this code deals in atoms. Usefully, a value atom
439 always has a V-value which is also an atom: constants are shadowed
440 by constants, and temps are shadowed by the corresponding shadow
441 temporary. */
443 typedef IRExpr IRAtom;
445 /* (used for sanity checks only): is this an atom which looks
446 like it's from original code? */
447 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
449 if (a1->tag == Iex_Const)
450 return True;
451 if (a1->tag == Iex_RdTmp) {
452 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
453 return ent->kind == Orig;
455 return False;
458 /* (used for sanity checks only): is this an atom which looks
459 like it's from shadow code? */
460 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
462 if (a1->tag == Iex_Const)
463 return True;
464 if (a1->tag == Iex_RdTmp) {
465 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
466 return ent->kind == VSh || ent->kind == BSh;
468 return False;
471 /* (used for sanity checks only): check that both args are atoms and
472 are identically-kinded. */
473 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
475 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
476 return True;
477 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
478 return True;
479 return False;
483 /*------------------------------------------------------------*/
484 /*--- Type management ---*/
485 /*------------------------------------------------------------*/
487 /* Shadow state is always accessed using integer types. This returns
488 an integer type with the same size (as per sizeofIRType) as the
489 given type. The only valid shadow types are Bit, I8, I16, I32,
490 I64, I128, V128, V256. */
492 static IRType shadowTypeV ( IRType ty )
494 switch (ty) {
495 case Ity_I1:
496 case Ity_I8:
497 case Ity_I16:
498 case Ity_I32:
499 case Ity_I64:
500 case Ity_I128: return ty;
501 case Ity_F16: return Ity_I16;
502 case Ity_F32: return Ity_I32;
503 case Ity_D32: return Ity_I32;
504 case Ity_F64: return Ity_I64;
505 case Ity_D64: return Ity_I64;
506 case Ity_F128: return Ity_I128;
507 case Ity_D128: return Ity_I128;
508 case Ity_V128: return Ity_V128;
509 case Ity_V256: return Ity_V256;
510 default: ppIRType(ty);
511 VG_(tool_panic)("memcheck:shadowTypeV");
515 /* Produce a 'defined' value of the given shadow type. Should only be
516 supplied shadow types (Bit/I8/I16/I32/UI64). */
517 static IRExpr* definedOfType ( IRType ty ) {
518 switch (ty) {
519 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
520 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
521 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
522 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
523 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
524 case Ity_I128: return i128_const_zero();
525 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
526 case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000));
527 default: VG_(tool_panic)("memcheck:definedOfType");
532 /*------------------------------------------------------------*/
533 /*--- Constructing IR fragments ---*/
534 /*------------------------------------------------------------*/
536 /* add stmt to a bb */
537 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
538 if (mce->trace) {
539 VG_(printf)(" %c: ", cat);
540 ppIRStmt(st);
541 VG_(printf)("\n");
543 addStmtToIRSB(mce->sb, st);
546 /* assign value to tmp */
547 static inline
548 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
549 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
552 /* build various kinds of expressions */
553 #define triop(_op, _arg1, _arg2, _arg3) \
554 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
555 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
556 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
557 #define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
558 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
559 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
560 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
561 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
562 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
563 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
565 /* Bind the given expression to a new temporary, and return the
566 temporary. This effectively converts an arbitrary expression into
567 an atom.
569 'ty' is the type of 'e' and hence the type that the new temporary
570 needs to be. But passing it in is redundant, since we can deduce
571 the type merely by inspecting 'e'. So at least use that fact to
572 assert that the two types agree. */
573 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
575 TempKind k;
576 IRTemp t;
577 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
579 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
580 switch (cat) {
581 case 'V': k = VSh; break;
582 case 'B': k = BSh; break;
583 case 'C': k = Orig; break;
584 /* happens when we are making up new "orig"
585 expressions, for IRCAS handling */
586 default: tl_assert(0);
588 t = newTemp(mce, ty, k);
589 assign(cat, mce, t, e);
590 return mkexpr(t);
594 /*------------------------------------------------------------*/
595 /*--- Helper functions for 128-bit ops ---*/
596 /*------------------------------------------------------------*/
598 static IRExpr *i128_const_zero(void)
600 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
601 return binop(Iop_64HLto128, z64, z64);
604 /* There are no I128-bit loads and/or stores [as generated by any
605 current front ends]. So we do not need to worry about that in
606 expr2vbits_Load */
609 /*------------------------------------------------------------*/
610 /*--- Constructing definedness primitive ops ---*/
611 /*------------------------------------------------------------*/
613 /* --------- Defined-if-either-defined --------- */
615 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
616 tl_assert(isShadowAtom(mce,a1));
617 tl_assert(isShadowAtom(mce,a2));
618 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
621 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
622 tl_assert(isShadowAtom(mce,a1));
623 tl_assert(isShadowAtom(mce,a2));
624 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
627 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
628 tl_assert(isShadowAtom(mce,a1));
629 tl_assert(isShadowAtom(mce,a2));
630 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
633 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
634 tl_assert(isShadowAtom(mce,a1));
635 tl_assert(isShadowAtom(mce,a2));
636 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
639 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
640 tl_assert(isShadowAtom(mce,a1));
641 tl_assert(isShadowAtom(mce,a2));
642 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
645 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
646 tl_assert(isShadowAtom(mce,a1));
647 tl_assert(isShadowAtom(mce,a2));
648 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
651 /* --------- Undefined-if-either-undefined --------- */
653 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
654 tl_assert(isShadowAtom(mce,a1));
655 tl_assert(isShadowAtom(mce,a2));
656 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
659 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
660 tl_assert(isShadowAtom(mce,a1));
661 tl_assert(isShadowAtom(mce,a2));
662 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
665 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
666 tl_assert(isShadowAtom(mce,a1));
667 tl_assert(isShadowAtom(mce,a2));
668 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
671 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
672 tl_assert(isShadowAtom(mce,a1));
673 tl_assert(isShadowAtom(mce,a2));
674 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
677 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
678 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
679 tl_assert(isShadowAtom(mce,a1));
680 tl_assert(isShadowAtom(mce,a2));
681 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
682 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
683 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
684 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
685 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
686 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
688 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
691 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
692 tl_assert(isShadowAtom(mce,a1));
693 tl_assert(isShadowAtom(mce,a2));
694 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
697 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
698 tl_assert(isShadowAtom(mce,a1));
699 tl_assert(isShadowAtom(mce,a2));
700 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
703 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
704 switch (vty) {
705 case Ity_I8: return mkUifU8(mce, a1, a2);
706 case Ity_I16: return mkUifU16(mce, a1, a2);
707 case Ity_I32: return mkUifU32(mce, a1, a2);
708 case Ity_I64: return mkUifU64(mce, a1, a2);
709 case Ity_I128: return mkUifU128(mce, a1, a2);
710 case Ity_V128: return mkUifUV128(mce, a1, a2);
711 case Ity_V256: return mkUifUV256(mce, a1, a2);
712 default:
713 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
714 VG_(tool_panic)("memcheck:mkUifU");
718 /* --------- The Left-family of operations. --------- */
720 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
721 tl_assert(isShadowAtom(mce,a1));
722 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
725 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
726 tl_assert(isShadowAtom(mce,a1));
727 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
730 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
731 tl_assert(isShadowAtom(mce,a1));
732 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
735 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
736 tl_assert(isShadowAtom(mce,a1));
737 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
740 /* --------- 'Improvement' functions for AND/OR. --------- */
742 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
743 defined (0); all other -> undefined (1).
745 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
747 tl_assert(isOriginalAtom(mce, data));
748 tl_assert(isShadowAtom(mce, vbits));
749 tl_assert(sameKindedAtoms(data, vbits));
750 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
753 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
755 tl_assert(isOriginalAtom(mce, data));
756 tl_assert(isShadowAtom(mce, vbits));
757 tl_assert(sameKindedAtoms(data, vbits));
758 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
761 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
763 tl_assert(isOriginalAtom(mce, data));
764 tl_assert(isShadowAtom(mce, vbits));
765 tl_assert(sameKindedAtoms(data, vbits));
766 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
769 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
771 tl_assert(isOriginalAtom(mce, data));
772 tl_assert(isShadowAtom(mce, vbits));
773 tl_assert(sameKindedAtoms(data, vbits));
774 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
777 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
779 tl_assert(isOriginalAtom(mce, data));
780 tl_assert(isShadowAtom(mce, vbits));
781 tl_assert(sameKindedAtoms(data, vbits));
782 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
785 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
787 tl_assert(isOriginalAtom(mce, data));
788 tl_assert(isShadowAtom(mce, vbits));
789 tl_assert(sameKindedAtoms(data, vbits));
790 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
793 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
794 defined (0); all other -> undefined (1).
796 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
798 tl_assert(isOriginalAtom(mce, data));
799 tl_assert(isShadowAtom(mce, vbits));
800 tl_assert(sameKindedAtoms(data, vbits));
801 return assignNew(
802 'V', mce, Ity_I8,
803 binop(Iop_Or8,
804 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
805 vbits) );
808 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
810 tl_assert(isOriginalAtom(mce, data));
811 tl_assert(isShadowAtom(mce, vbits));
812 tl_assert(sameKindedAtoms(data, vbits));
813 return assignNew(
814 'V', mce, Ity_I16,
815 binop(Iop_Or16,
816 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
817 vbits) );
820 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
822 tl_assert(isOriginalAtom(mce, data));
823 tl_assert(isShadowAtom(mce, vbits));
824 tl_assert(sameKindedAtoms(data, vbits));
825 return assignNew(
826 'V', mce, Ity_I32,
827 binop(Iop_Or32,
828 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
829 vbits) );
832 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
834 tl_assert(isOriginalAtom(mce, data));
835 tl_assert(isShadowAtom(mce, vbits));
836 tl_assert(sameKindedAtoms(data, vbits));
837 return assignNew(
838 'V', mce, Ity_I64,
839 binop(Iop_Or64,
840 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
841 vbits) );
844 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
846 tl_assert(isOriginalAtom(mce, data));
847 tl_assert(isShadowAtom(mce, vbits));
848 tl_assert(sameKindedAtoms(data, vbits));
849 return assignNew(
850 'V', mce, Ity_V128,
851 binop(Iop_OrV128,
852 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
853 vbits) );
856 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
858 tl_assert(isOriginalAtom(mce, data));
859 tl_assert(isShadowAtom(mce, vbits));
860 tl_assert(sameKindedAtoms(data, vbits));
861 return assignNew(
862 'V', mce, Ity_V256,
863 binop(Iop_OrV256,
864 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
865 vbits) );
868 /* --------- Pessimising casts. --------- */
870 /* The function returns an expression of type DST_TY. If any of the VBITS
871 is undefined (value == 1) the resulting expression has all bits set to
872 1. Otherwise, all bits are 0. */
874 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
876 IRType src_ty;
877 IRAtom* tmp1;
879 /* Note, dst_ty is a shadow type, not an original type. */
880 tl_assert(isShadowAtom(mce,vbits));
881 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
883 /* Fast-track some common cases */
884 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
885 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
887 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
888 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
890 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
891 /* PCast the arg, then clone it. */
892 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
893 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
896 if (src_ty == Ity_I32 && dst_ty == Ity_V128) {
897 /* PCast the arg, then clone it 4 times. */
898 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
899 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
900 return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
903 if (src_ty == Ity_I32 && dst_ty == Ity_V256) {
904 /* PCast the arg, then clone it 8 times. */
905 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
906 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
907 tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
908 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp));
911 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
912 /* PCast the arg. This gives all 0s or all 1s. Then throw away
913 the top half. */
914 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
915 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
918 if (src_ty == Ity_V128 && dst_ty == Ity_I64) {
919 /* Use InterleaveHI64x2 to copy the top half of the vector into
920 the bottom half. Then we can UifU it with the original, throw
921 away the upper half of the result, and PCast-I64-to-I64
922 the lower half. */
923 // Generates vbits[127:64] : vbits[127:64]
924 IRAtom* hi64hi64
925 = assignNew('V', mce, Ity_V128,
926 binop(Iop_InterleaveHI64x2, vbits, vbits));
927 // Generates
928 // UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0])
929 // == vbits[127:64] : UifU(vbits[127:64],vbits[63:0])
930 IRAtom* lohi64
931 = mkUifUV128(mce, hi64hi64, vbits);
932 // Generates UifU(vbits[127:64],vbits[63:0])
933 IRAtom* lo64
934 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, lohi64));
935 // Generates
936 // PCast-to-I64( UifU(vbits[127:64], vbits[63:0] )
937 // == PCast-to-I64( vbits[127:0] )
938 IRAtom* res
939 = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, lo64));
940 return res;
943 /* Else do it the slow way .. */
944 /* First of all, collapse vbits down to a single bit. */
945 tmp1 = NULL;
946 switch (src_ty) {
947 case Ity_I1:
948 tmp1 = vbits;
949 break;
950 case Ity_I8:
951 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
952 break;
953 case Ity_I16:
954 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
955 break;
956 case Ity_I32:
957 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
958 break;
959 case Ity_I64:
960 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
961 break;
962 case Ity_I128: {
963 /* Gah. Chop it in half, OR the halves together, and compare
964 that with zero. */
965 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
966 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
967 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
968 tmp1 = assignNew('V', mce, Ity_I1,
969 unop(Iop_CmpNEZ64, tmp4));
970 break;
972 case Ity_V128: {
973 /* Chop it in half, OR the halves together, and compare that
974 * with zero.
976 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vbits));
977 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vbits));
978 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
979 tmp1 = assignNew('V', mce, Ity_I1,
980 unop(Iop_CmpNEZ64, tmp4));
981 break;
983 default:
984 ppIRType(src_ty);
985 VG_(tool_panic)("mkPCastTo(1)");
987 tl_assert(tmp1);
988 /* Now widen up to the dst type. */
989 switch (dst_ty) {
990 case Ity_I1:
991 return tmp1;
992 case Ity_I8:
993 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
994 case Ity_I16:
995 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
996 case Ity_I32:
997 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
998 case Ity_I64:
999 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1000 case Ity_V128:
1001 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1002 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
1003 return tmp1;
1004 case Ity_I128:
1005 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1006 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
1007 return tmp1;
1008 case Ity_V256:
1009 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1010 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
1011 tmp1, tmp1));
1012 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
1013 tmp1, tmp1));
1014 return tmp1;
1015 default:
1016 ppIRType(dst_ty);
1017 VG_(tool_panic)("mkPCastTo(2)");
1021 /* This is a minor variant. It takes an arg of some type and returns
1022 a value of the same type. The result consists entirely of Defined
1023 (zero) bits except its least significant bit, which is a PCast of
1024 the entire argument down to a single bit. */
1025 static IRAtom* mkPCastXXtoXXlsb ( MCEnv* mce, IRAtom* varg, IRType ty )
1027 if (ty == Ity_V128) {
1028 /* --- Case for V128 --- */
1029 IRAtom* varg128 = varg;
1030 // generates: PCast-to-I64(varg128)
1031 IRAtom* pcdTo64 = mkPCastTo(mce, Ity_I64, varg128);
1032 // Now introduce zeros (defined bits) in the top 63 places
1033 // generates: Def--(63)--Def PCast-to-I1(varg128)
1034 IRAtom* d63pc
1035 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcdTo64, mkU64(1)));
1036 // generates: Def--(64)--Def
1037 IRAtom* d64
1038 = definedOfType(Ity_I64);
1039 // generates: Def--(127)--Def PCast-to-I1(varg128)
1040 IRAtom* res
1041 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, d64, d63pc));
1042 return res;
1044 if (ty == Ity_I64) {
1045 /* --- Case for I64 --- */
1046 // PCast to 64
1047 IRAtom* pcd = mkPCastTo(mce, Ity_I64, varg);
1048 // Zero (Def) out the top 63 bits
1049 IRAtom* res
1050 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcd, mkU64(1)));
1051 return res;
1053 /*NOTREACHED*/
1054 tl_assert(0);
1057 /* --------- Optimistic casts. --------- */
1059 /* The function takes and returns an expression of type TY. If any of the
1060 VBITS indicate defined (value == 0) the resulting expression has all bits
1061 set to 0. Otherwise, all bits are 1. In words, if any bits are defined
1062 then all bits are made to be defined.
1064 In short we compute (vbits - (vbits >>u 1)) >>s (bitsize(vbits)-1).
1066 static IRAtom* mkOCastAt( MCEnv* mce, IRType ty, IRAtom* vbits )
1068 IROp opSUB, opSHR, opSAR;
1069 UInt sh;
1071 switch (ty) {
1072 case Ity_I64:
1073 opSUB = Iop_Sub64; opSHR = Iop_Shr64; opSAR = Iop_Sar64; sh = 63;
1074 break;
1075 case Ity_I32:
1076 opSUB = Iop_Sub32; opSHR = Iop_Shr32; opSAR = Iop_Sar32; sh = 31;
1077 break;
1078 case Ity_I16:
1079 opSUB = Iop_Sub16; opSHR = Iop_Shr16; opSAR = Iop_Sar16; sh = 15;
1080 break;
1081 case Ity_I8:
1082 opSUB = Iop_Sub8; opSHR = Iop_Shr8; opSAR = Iop_Sar8; sh = 7;
1083 break;
1084 default:
1085 ppIRType(ty);
1086 VG_(tool_panic)("mkOCastTo");
1089 IRAtom *shr1, *at;
1090 shr1 = assignNew('V', mce,ty, binop(opSHR, vbits, mkU8(1)));
1091 at = assignNew('V', mce,ty, binop(opSUB, vbits, shr1));
1092 at = assignNew('V', mce,ty, binop(opSAR, at, mkU8(sh)));
1093 return at;
1097 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
1099 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
1100 PCasting to Ity_U1. However, sometimes it is necessary to be more
1101 accurate. The insight is that the result is defined if two
1102 corresponding bits can be found, one from each argument, so that
1103 both bits are defined but are different -- that makes EQ say "No"
1104 and NE say "Yes". Hence, we compute an improvement term and DifD
1105 it onto the "normal" (UifU) result.
1107 The result is:
1109 PCastTo<1> (
1110 -- naive version
1111 UifU<sz>(vxx, vyy)
1113 `DifD<sz>`
1115 -- improvement term
1116 OCast<sz>(vec)
1119 where
1120 vec contains 0 (defined) bits where the corresponding arg bits
1121 are defined but different, and 1 bits otherwise.
1123 vec = Or<sz>( vxx, // 0 iff bit defined
1124 vyy, // 0 iff bit defined
1125 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
1128 If any bit of vec is 0, the result is defined and so the
1129 improvement term should produce 0...0, else it should produce
1130 1...1.
1132 Hence require for the improvement term:
1134 OCast(vec) = if vec == 1...1 then 1...1 else 0...0
1136 which you can think of as an "optimistic cast" (OCast, the opposite of
1137 the normal "pessimistic cast" (PCast) family. An OCast says all bits
1138 are defined if any bit is defined.
1140 It is possible to show that
1142 if vec == 1...1 then 1...1 else 0...0
1144 can be implemented in straight-line code as
1146 (vec - (vec >>u 1)) >>s (word-size-in-bits - 1)
1148 We note that vec contains the sub-term Or<sz>(vxx, vyy). Since UifU is
1149 implemented with Or (since 1 signifies undefinedness), this is a
1150 duplicate of the UifU<sz>(vxx, vyy) term and so we can CSE it out, giving
1151 a final version of:
1153 let naive = UifU<sz>(vxx, vyy)
1154 vec = Or<sz>(naive, Not<sz>(Xor<sz)(xx, yy))
1156 PCastTo<1>( DifD<sz>(naive, OCast<sz>(vec)) )
1158 This was extensively re-analysed and checked on 6 July 05 and again
1159 in July 2017.
1161 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
1162 IRType ty,
1163 IRAtom* vxx, IRAtom* vyy,
1164 IRAtom* xx, IRAtom* yy )
1166 IRAtom *naive, *vec, *improved, *final_cast;
1167 IROp opDIFD, opUIFU, opOR, opXOR, opNOT;
1169 tl_assert(isShadowAtom(mce,vxx));
1170 tl_assert(isShadowAtom(mce,vyy));
1171 tl_assert(isOriginalAtom(mce,xx));
1172 tl_assert(isOriginalAtom(mce,yy));
1173 tl_assert(sameKindedAtoms(vxx,xx));
1174 tl_assert(sameKindedAtoms(vyy,yy));
1176 switch (ty) {
1177 case Ity_I8:
1178 opDIFD = Iop_And8;
1179 opUIFU = Iop_Or8;
1180 opOR = Iop_Or8;
1181 opXOR = Iop_Xor8;
1182 opNOT = Iop_Not8;
1183 break;
1184 case Ity_I16:
1185 opDIFD = Iop_And16;
1186 opUIFU = Iop_Or16;
1187 opOR = Iop_Or16;
1188 opXOR = Iop_Xor16;
1189 opNOT = Iop_Not16;
1190 break;
1191 case Ity_I32:
1192 opDIFD = Iop_And32;
1193 opUIFU = Iop_Or32;
1194 opOR = Iop_Or32;
1195 opXOR = Iop_Xor32;
1196 opNOT = Iop_Not32;
1197 break;
1198 case Ity_I64:
1199 opDIFD = Iop_And64;
1200 opUIFU = Iop_Or64;
1201 opOR = Iop_Or64;
1202 opXOR = Iop_Xor64;
1203 opNOT = Iop_Not64;
1204 break;
1205 default:
1206 VG_(tool_panic)("expensiveCmpEQorNE");
1209 naive
1210 = assignNew('V', mce, ty, binop(opUIFU, vxx, vyy));
1212 vec
1213 = assignNew(
1214 'V', mce,ty,
1215 binop( opOR,
1216 naive,
1217 assignNew(
1218 'V', mce,ty,
1219 unop(opNOT,
1220 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
1222 improved
1223 = assignNew( 'V', mce,ty,
1224 binop(opDIFD, naive, mkOCastAt(mce, ty, vec)));
1226 final_cast
1227 = mkPCastTo( mce, Ity_I1, improved );
1229 return final_cast;
1233 /* --------- Semi-accurate interpretation of CmpORD. --------- */
1235 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
1237 CmpORD32S(x,y) = 1<<3 if x <s y
1238 = 1<<2 if x >s y
1239 = 1<<1 if x == y
1241 and similarly the unsigned variant. The default interpretation is:
1243 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
1244 & (7<<1)
1246 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
1247 are zero and therefore defined (viz, zero).
1249 Also deal with a special case better:
1251 CmpORD32S(x,0)
1253 Here, bit 3 (LT) of the result is a copy of the top bit of x and
1254 will be defined even if the rest of x isn't. In which case we do:
1256 CmpORD32S#(x,x#,0,{impliedly 0}#)
1257 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
1258 | (x# >>u 31) << 3 -- LT# = x#[31]
1260 Analogous handling for CmpORD64{S,U}.
1262 static Bool isZeroU32 ( IRAtom* e )
1264 return
1265 toBool( e->tag == Iex_Const
1266 && e->Iex.Const.con->tag == Ico_U32
1267 && e->Iex.Const.con->Ico.U32 == 0 );
1270 static Bool isZeroU64 ( IRAtom* e )
1272 return
1273 toBool( e->tag == Iex_Const
1274 && e->Iex.Const.con->tag == Ico_U64
1275 && e->Iex.Const.con->Ico.U64 == 0 );
1278 static IRAtom* doCmpORD ( MCEnv* mce,
1279 IROp cmp_op,
1280 IRAtom* xxhash, IRAtom* yyhash,
1281 IRAtom* xx, IRAtom* yy )
1283 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1284 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1285 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
1286 IROp opAND = m64 ? Iop_And64 : Iop_And32;
1287 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
1288 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
1289 IRType ty = m64 ? Ity_I64 : Ity_I32;
1290 Int width = m64 ? 64 : 32;
1292 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1294 IRAtom* threeLeft1 = NULL;
1295 IRAtom* sevenLeft1 = NULL;
1297 tl_assert(isShadowAtom(mce,xxhash));
1298 tl_assert(isShadowAtom(mce,yyhash));
1299 tl_assert(isOriginalAtom(mce,xx));
1300 tl_assert(isOriginalAtom(mce,yy));
1301 tl_assert(sameKindedAtoms(xxhash,xx));
1302 tl_assert(sameKindedAtoms(yyhash,yy));
1303 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1304 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
1306 if (0) {
1307 ppIROp(cmp_op); VG_(printf)(" ");
1308 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1311 if (syned && isZero(yy)) {
1312 /* fancy interpretation */
1313 /* if yy is zero, then it must be fully defined (zero#). */
1314 tl_assert(isZero(yyhash));
1315 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
1316 return
1317 binop(
1318 opOR,
1319 assignNew(
1320 'V', mce,ty,
1321 binop(
1322 opAND,
1323 mkPCastTo(mce,ty, xxhash),
1324 threeLeft1
1326 assignNew(
1327 'V', mce,ty,
1328 binop(
1329 opSHL,
1330 assignNew(
1331 'V', mce,ty,
1332 binop(opSHR, xxhash, mkU8(width-1))),
1333 mkU8(3)
1336 } else {
1337 /* standard interpretation */
1338 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
1339 return
1340 binop(
1341 opAND,
1342 mkPCastTo( mce,ty,
1343 mkUifU(mce,ty, xxhash,yyhash)),
1344 sevenLeft1
1350 /*------------------------------------------------------------*/
1351 /*--- Emit a test and complaint if something is undefined. ---*/
1352 /*------------------------------------------------------------*/
1354 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1357 /* Set the annotations on a dirty helper to indicate that the stack
1358 pointer and instruction pointers might be read. This is the
1359 behaviour of all 'emit-a-complaint' style functions we might
1360 call. */
1362 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1363 di->nFxState = 2;
1364 di->fxState[0].fx = Ifx_Read;
1365 di->fxState[0].offset = mce->layout->offset_SP;
1366 di->fxState[0].size = mce->layout->sizeof_SP;
1367 di->fxState[0].nRepeats = 0;
1368 di->fxState[0].repeatLen = 0;
1369 di->fxState[1].fx = Ifx_Read;
1370 di->fxState[1].offset = mce->layout->offset_IP;
1371 di->fxState[1].size = mce->layout->sizeof_IP;
1372 di->fxState[1].nRepeats = 0;
1373 di->fxState[1].repeatLen = 0;
1377 /* Check the supplied *original* |atom| for undefinedness, and emit a
1378 complaint if so. Once that happens, mark it as defined. This is
1379 possible because the atom is either a tmp or literal. If it's a
1380 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1381 be defined. In fact as mentioned above, we will have to allocate a
1382 new tmp to carry the new 'defined' shadow value, and update the
1383 original->tmp mapping accordingly; we cannot simply assign a new
1384 value to an existing shadow tmp as this breaks SSAness.
1386 The checks are performed, any resulting complaint emitted, and
1387 |atom|'s shadow temp set to 'defined', ONLY in the case that
1388 |guard| evaluates to True at run-time. If it evaluates to False
1389 then no action is performed. If |guard| is NULL (the usual case)
1390 then it is assumed to be always-true, and hence these actions are
1391 performed unconditionally.
1393 This routine does not generate code to check the definedness of
1394 |guard|. The caller is assumed to have taken care of that already.
1396 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
1398 IRAtom* vatom;
1399 IRType ty;
1400 Int sz;
1401 IRDirty* di;
1402 IRAtom* cond;
1403 IRAtom* origin;
1404 void* fn;
1405 const HChar* nm;
1406 IRExpr** args;
1407 Int nargs;
1409 // Don't do V bit tests if we're not reporting undefined value errors.
1410 if (MC_(clo_mc_level) == 1)
1411 return;
1413 if (guard)
1414 tl_assert(isOriginalAtom(mce, guard));
1416 /* Since the original expression is atomic, there's no duplicated
1417 work generated by making multiple V-expressions for it. So we
1418 don't really care about the possibility that someone else may
1419 also create a V-interpretion for it. */
1420 tl_assert(isOriginalAtom(mce, atom));
1421 vatom = expr2vbits( mce, atom, HuOth );
1422 tl_assert(isShadowAtom(mce, vatom));
1423 tl_assert(sameKindedAtoms(atom, vatom));
1425 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1427 /* sz is only used for constructing the error message */
1428 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1430 cond = mkPCastTo( mce, Ity_I1, vatom );
1431 /* cond will be 0 if all defined, and 1 if any not defined. */
1433 /* Get the origin info for the value we are about to check. At
1434 least, if we are doing origin tracking. If not, use a dummy
1435 zero origin. */
1436 if (MC_(clo_mc_level) == 3) {
1437 origin = schemeE( mce, atom );
1438 if (mce->hWordTy == Ity_I64) {
1439 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1441 } else {
1442 origin = NULL;
1445 fn = NULL;
1446 nm = NULL;
1447 args = NULL;
1448 nargs = -1;
1450 switch (sz) {
1451 case 0:
1452 if (origin) {
1453 fn = &MC_(helperc_value_check0_fail_w_o);
1454 nm = "MC_(helperc_value_check0_fail_w_o)";
1455 args = mkIRExprVec_1(origin);
1456 nargs = 1;
1457 } else {
1458 fn = &MC_(helperc_value_check0_fail_no_o);
1459 nm = "MC_(helperc_value_check0_fail_no_o)";
1460 args = mkIRExprVec_0();
1461 nargs = 0;
1463 break;
1464 case 1:
1465 if (origin) {
1466 fn = &MC_(helperc_value_check1_fail_w_o);
1467 nm = "MC_(helperc_value_check1_fail_w_o)";
1468 args = mkIRExprVec_1(origin);
1469 nargs = 1;
1470 } else {
1471 fn = &MC_(helperc_value_check1_fail_no_o);
1472 nm = "MC_(helperc_value_check1_fail_no_o)";
1473 args = mkIRExprVec_0();
1474 nargs = 0;
1476 break;
1477 case 4:
1478 if (origin) {
1479 fn = &MC_(helperc_value_check4_fail_w_o);
1480 nm = "MC_(helperc_value_check4_fail_w_o)";
1481 args = mkIRExprVec_1(origin);
1482 nargs = 1;
1483 } else {
1484 fn = &MC_(helperc_value_check4_fail_no_o);
1485 nm = "MC_(helperc_value_check4_fail_no_o)";
1486 args = mkIRExprVec_0();
1487 nargs = 0;
1489 break;
1490 case 8:
1491 if (origin) {
1492 fn = &MC_(helperc_value_check8_fail_w_o);
1493 nm = "MC_(helperc_value_check8_fail_w_o)";
1494 args = mkIRExprVec_1(origin);
1495 nargs = 1;
1496 } else {
1497 fn = &MC_(helperc_value_check8_fail_no_o);
1498 nm = "MC_(helperc_value_check8_fail_no_o)";
1499 args = mkIRExprVec_0();
1500 nargs = 0;
1502 break;
1503 case 2:
1504 case 16:
1505 if (origin) {
1506 fn = &MC_(helperc_value_checkN_fail_w_o);
1507 nm = "MC_(helperc_value_checkN_fail_w_o)";
1508 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1509 nargs = 2;
1510 } else {
1511 fn = &MC_(helperc_value_checkN_fail_no_o);
1512 nm = "MC_(helperc_value_checkN_fail_no_o)";
1513 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1514 nargs = 1;
1516 break;
1517 default:
1518 VG_(tool_panic)("unexpected szB");
1521 tl_assert(fn);
1522 tl_assert(nm);
1523 tl_assert(args);
1524 tl_assert(nargs >= 0 && nargs <= 2);
1525 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1526 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1528 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1529 VG_(fnptr_to_fnentry)( fn ), args );
1530 di->guard = cond; // and cond is PCast-to-1(atom#)
1532 /* If the complaint is to be issued under a guard condition, AND
1533 that into the guard condition for the helper call. */
1534 if (guard) {
1535 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1536 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1537 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1538 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1541 setHelperAnns( mce, di );
1542 stmt( 'V', mce, IRStmt_Dirty(di));
1544 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1545 defined -- but only in the case where the guard evaluates to
1546 True at run-time. Do the update by setting the orig->shadow
1547 mapping for tmp to reflect the fact that this shadow is getting
1548 a new value. */
1549 tl_assert(isIRAtom(vatom));
1550 /* sameKindedAtoms ... */
1551 if (vatom->tag == Iex_RdTmp) {
1552 tl_assert(atom->tag == Iex_RdTmp);
1553 if (guard == NULL) {
1554 // guard is 'always True', hence update unconditionally
1555 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1556 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1557 definedOfType(ty));
1558 } else {
1559 // update the temp only conditionally. Do this by copying
1560 // its old value when the guard is False.
1561 // The old value ..
1562 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1563 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1564 IRAtom* new_tmpV
1565 = assignNew('V', mce, shadowTypeV(ty),
1566 IRExpr_ITE(guard, definedOfType(ty),
1567 mkexpr(old_tmpV)));
1568 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
1574 /*------------------------------------------------------------*/
1575 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1576 /*------------------------------------------------------------*/
1578 /* Examine the always-defined sections declared in layout to see if
1579 the (offset,size) section is within one. Note, is is an error to
1580 partially fall into such a region: (offset,size) should either be
1581 completely in such a region or completely not-in such a region.
1583 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1585 Int minoffD, maxoffD, i;
1586 Int minoff = offset;
1587 Int maxoff = minoff + size - 1;
1588 tl_assert((minoff & ~0xFFFF) == 0);
1589 tl_assert((maxoff & ~0xFFFF) == 0);
1591 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1592 minoffD = mce->layout->alwaysDefd[i].offset;
1593 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1594 tl_assert((minoffD & ~0xFFFF) == 0);
1595 tl_assert((maxoffD & ~0xFFFF) == 0);
1597 if (maxoff < minoffD || maxoffD < minoff)
1598 continue; /* no overlap */
1599 if (minoff >= minoffD && maxoff <= maxoffD)
1600 return True; /* completely contained in an always-defd section */
1602 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1604 return False; /* could not find any containing section */
1608 /* Generate into bb suitable actions to shadow this Put. If the state
1609 slice is marked 'always defined', do nothing. Otherwise, write the
1610 supplied V bits to the shadow state. We can pass in either an
1611 original atom or a V-atom, but not both. In the former case the
1612 relevant V-bits are then generated from the original.
1613 We assume here, that the definedness of GUARD has already been checked.
1615 static
1616 void do_shadow_PUT ( MCEnv* mce, Int offset,
1617 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
1619 IRType ty;
1621 // Don't do shadow PUTs if we're not doing undefined value checking.
1622 // Their absence lets Vex's optimiser remove all the shadow computation
1623 // that they depend on, which includes GETs of the shadow registers.
1624 if (MC_(clo_mc_level) == 1)
1625 return;
1627 if (atom) {
1628 tl_assert(!vatom);
1629 tl_assert(isOriginalAtom(mce, atom));
1630 vatom = expr2vbits( mce, atom, HuOth );
1631 } else {
1632 tl_assert(vatom);
1633 tl_assert(isShadowAtom(mce, vatom));
1636 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1637 tl_assert(ty != Ity_I1);
1638 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1639 /* later: no ... */
1640 /* emit code to emit a complaint if any of the vbits are 1. */
1641 /* complainIfUndefined(mce, atom); */
1642 } else {
1643 /* Do a plain shadow Put. */
1644 if (guard) {
1645 /* If the guard expression evaluates to false we simply Put the value
1646 that is already stored in the guest state slot */
1647 IRAtom *cond, *iffalse;
1649 cond = assignNew('V', mce, Ity_I1, guard);
1650 iffalse = assignNew('V', mce, ty,
1651 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1652 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
1654 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
1659 /* Return an expression which contains the V bits corresponding to the
1660 given GETI (passed in in pieces).
1662 static
1663 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
1665 IRAtom* vatom;
1666 IRType ty, tyS;
1667 Int arrSize;;
1668 IRRegArray* descr = puti->descr;
1669 IRAtom* ix = puti->ix;
1670 Int bias = puti->bias;
1671 IRAtom* atom = puti->data;
1673 // Don't do shadow PUTIs if we're not doing undefined value checking.
1674 // Their absence lets Vex's optimiser remove all the shadow computation
1675 // that they depend on, which includes GETIs of the shadow registers.
1676 if (MC_(clo_mc_level) == 1)
1677 return;
1679 tl_assert(isOriginalAtom(mce,atom));
1680 vatom = expr2vbits( mce, atom, HuOth );
1681 tl_assert(sameKindedAtoms(atom, vatom));
1682 ty = descr->elemTy;
1683 tyS = shadowTypeV(ty);
1684 arrSize = descr->nElems * sizeofIRType(ty);
1685 tl_assert(ty != Ity_I1);
1686 tl_assert(isOriginalAtom(mce,ix));
1687 complainIfUndefined(mce, ix, NULL);
1688 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1689 /* later: no ... */
1690 /* emit code to emit a complaint if any of the vbits are 1. */
1691 /* complainIfUndefined(mce, atom); */
1692 } else {
1693 /* Do a cloned version of the Put that refers to the shadow
1694 area. */
1695 IRRegArray* new_descr
1696 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1697 tyS, descr->nElems);
1698 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
1703 /* Return an expression which contains the V bits corresponding to the
1704 given GET (passed in in pieces).
1706 static
1707 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1709 IRType tyS = shadowTypeV(ty);
1710 tl_assert(ty != Ity_I1);
1711 tl_assert(ty != Ity_I128);
1712 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1713 /* Always defined, return all zeroes of the relevant type */
1714 return definedOfType(tyS);
1715 } else {
1716 /* return a cloned version of the Get that refers to the shadow
1717 area. */
1718 /* FIXME: this isn't an atom! */
1719 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1724 /* Return an expression which contains the V bits corresponding to the
1725 given GETI (passed in in pieces).
1727 static
1728 IRExpr* shadow_GETI ( MCEnv* mce,
1729 IRRegArray* descr, IRAtom* ix, Int bias )
1731 IRType ty = descr->elemTy;
1732 IRType tyS = shadowTypeV(ty);
1733 Int arrSize = descr->nElems * sizeofIRType(ty);
1734 tl_assert(ty != Ity_I1);
1735 tl_assert(isOriginalAtom(mce,ix));
1736 complainIfUndefined(mce, ix, NULL);
1737 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1738 /* Always defined, return all zeroes of the relevant type */
1739 return definedOfType(tyS);
1740 } else {
1741 /* return a cloned version of the Get that refers to the shadow
1742 area. */
1743 IRRegArray* new_descr
1744 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1745 tyS, descr->nElems);
1746 return IRExpr_GetI( new_descr, ix, bias );
1751 /*------------------------------------------------------------*/
1752 /*--- Generating approximations for unknown operations, ---*/
1753 /*--- using lazy-propagate semantics ---*/
1754 /*------------------------------------------------------------*/
1756 /* Lazy propagation of undefinedness from two values, resulting in the
1757 specified shadow type.
1759 static
1760 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1762 IRAtom* at;
1763 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1764 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1765 tl_assert(isShadowAtom(mce,va1));
1766 tl_assert(isShadowAtom(mce,va2));
1768 /* The general case is inefficient because PCast is an expensive
1769 operation. Here are some special cases which use PCast only
1770 once rather than twice. */
1772 /* I64 x I64 -> I64 */
1773 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1774 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1775 at = mkUifU(mce, Ity_I64, va1, va2);
1776 at = mkPCastTo(mce, Ity_I64, at);
1777 return at;
1780 /* I64 x I64 -> I32 */
1781 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1782 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1783 at = mkUifU(mce, Ity_I64, va1, va2);
1784 at = mkPCastTo(mce, Ity_I32, at);
1785 return at;
1788 /* I32 x I32 -> I32 */
1789 if (t1 == Ity_I32 && t2 == Ity_I32 && finalVty == Ity_I32) {
1790 if (0) VG_(printf)("mkLazy2: I32 x I32 -> I32\n");
1791 at = mkUifU(mce, Ity_I32, va1, va2);
1792 at = mkPCastTo(mce, Ity_I32, at);
1793 return at;
1796 if (0) {
1797 VG_(printf)("mkLazy2 ");
1798 ppIRType(t1);
1799 VG_(printf)("_");
1800 ppIRType(t2);
1801 VG_(printf)("_");
1802 ppIRType(finalVty);
1803 VG_(printf)("\n");
1806 /* General case: force everything via 32-bit intermediaries. */
1807 at = mkPCastTo(mce, Ity_I32, va1);
1808 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1809 at = mkPCastTo(mce, finalVty, at);
1810 return at;
1814 /* 3-arg version of the above. */
1815 static
1816 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1817 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1819 IRAtom* at;
1820 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1821 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1822 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1823 tl_assert(isShadowAtom(mce,va1));
1824 tl_assert(isShadowAtom(mce,va2));
1825 tl_assert(isShadowAtom(mce,va3));
1827 /* The general case is inefficient because PCast is an expensive
1828 operation. Here are some special cases which use PCast only
1829 twice rather than three times. */
1831 /* I32 x I64 x I64 -> I64 */
1832 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1833 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1834 && finalVty == Ity_I64) {
1835 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1836 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1837 mode indication which is fully defined, this should get
1838 folded out later. */
1839 at = mkPCastTo(mce, Ity_I64, va1);
1840 /* Now fold in 2nd and 3rd args. */
1841 at = mkUifU(mce, Ity_I64, at, va2);
1842 at = mkUifU(mce, Ity_I64, at, va3);
1843 /* and PCast once again. */
1844 at = mkPCastTo(mce, Ity_I64, at);
1845 return at;
1848 /* I32 x I8 x I64 -> I64 */
1849 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
1850 && finalVty == Ity_I64) {
1851 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
1852 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1853 * rounding mode indication which is fully defined, this should
1854 * get folded out later.
1856 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1857 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1858 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1859 at = mkUifU(mce, Ity_I64, at, va3);
1860 /* and PCast once again. */
1861 at = mkPCastTo(mce, Ity_I64, at);
1862 return at;
1865 /* I32 x I64 x I64 -> I32 */
1866 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1867 && finalVty == Ity_I32) {
1868 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1869 at = mkPCastTo(mce, Ity_I64, va1);
1870 at = mkUifU(mce, Ity_I64, at, va2);
1871 at = mkUifU(mce, Ity_I64, at, va3);
1872 at = mkPCastTo(mce, Ity_I32, at);
1873 return at;
1876 /* I32 x I32 x I32 -> I32 */
1877 /* 32-bit FP idiom, as (eg) happens on ARM */
1878 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1879 && finalVty == Ity_I32) {
1880 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1881 at = va1;
1882 at = mkUifU(mce, Ity_I32, at, va2);
1883 at = mkUifU(mce, Ity_I32, at, va3);
1884 at = mkPCastTo(mce, Ity_I32, at);
1885 return at;
1888 /* I32 x I128 x I128 -> I128 */
1889 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1890 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1891 && finalVty == Ity_I128) {
1892 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1893 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1894 mode indication which is fully defined, this should get
1895 folded out later. */
1896 at = mkPCastTo(mce, Ity_I128, va1);
1897 /* Now fold in 2nd and 3rd args. */
1898 at = mkUifU(mce, Ity_I128, at, va2);
1899 at = mkUifU(mce, Ity_I128, at, va3);
1900 /* and PCast once again. */
1901 at = mkPCastTo(mce, Ity_I128, at);
1902 return at;
1905 /* I32 x I8 x I128 -> I128 */
1906 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1907 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
1908 && finalVty == Ity_I128) {
1909 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
1910 /* Use I64 as an intermediate type, which means PCasting all 3
1911 args to I64 to start with. 1st arg is typically a rounding
1912 mode indication which is fully defined, so we hope that it
1913 will get folded out later. */
1914 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1915 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1916 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
1917 /* Now UifU all three together. */
1918 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1919 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3)
1920 /* and PCast once again. */
1921 at = mkPCastTo(mce, Ity_I128, at);
1922 return at;
1924 if (1) {
1925 VG_(printf)("mkLazy3: ");
1926 ppIRType(t1);
1927 VG_(printf)(" x ");
1928 ppIRType(t2);
1929 VG_(printf)(" x ");
1930 ppIRType(t3);
1931 VG_(printf)(" -> ");
1932 ppIRType(finalVty);
1933 VG_(printf)("\n");
1936 tl_assert(0);
1937 /* General case: force everything via 32-bit intermediaries. */
1939 at = mkPCastTo(mce, Ity_I32, va1);
1940 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1941 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1942 at = mkPCastTo(mce, finalVty, at);
1943 return at;
1948 /* 4-arg version of the above. */
1949 static
1950 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1951 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1953 IRAtom* at;
1954 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1955 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1956 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1957 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1958 tl_assert(isShadowAtom(mce,va1));
1959 tl_assert(isShadowAtom(mce,va2));
1960 tl_assert(isShadowAtom(mce,va3));
1961 tl_assert(isShadowAtom(mce,va4));
1963 /* The general case is inefficient because PCast is an expensive
1964 operation. Here are some special cases which use PCast only
1965 twice rather than three times. */
1967 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1969 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 && t4 == Ity_I128
1970 && finalVty == Ity_I128) {
1971 if (0) VG_(printf)("mkLazy4: I32 x I128 x I128 x I128 -> I128\n");
1972 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1973 mode indication which is fully defined, this should get
1974 folded out later. */
1975 at = mkPCastTo(mce, Ity_I128, va1);
1976 /* Now fold in 2nd, 3rd, 4th args. */
1977 at = mkUifU(mce, Ity_I128, at, va2);
1978 at = mkUifU(mce, Ity_I128, at, va3);
1979 at = mkUifU(mce, Ity_I128, at, va4);
1980 /* and PCast once again. */
1981 at = mkPCastTo(mce, Ity_I128, at);
1982 return at;
1985 /* I32 x I64 x I64 x I64 -> I64 */
1986 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1987 && finalVty == Ity_I64) {
1988 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1989 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1990 mode indication which is fully defined, this should get
1991 folded out later. */
1992 at = mkPCastTo(mce, Ity_I64, va1);
1993 /* Now fold in 2nd, 3rd, 4th args. */
1994 at = mkUifU(mce, Ity_I64, at, va2);
1995 at = mkUifU(mce, Ity_I64, at, va3);
1996 at = mkUifU(mce, Ity_I64, at, va4);
1997 /* and PCast once again. */
1998 at = mkPCastTo(mce, Ity_I64, at);
1999 return at;
2001 /* I32 x I32 x I32 x I32 -> I32 */
2002 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
2003 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
2004 && finalVty == Ity_I32) {
2005 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
2006 at = va1;
2007 /* Now fold in 2nd, 3rd, 4th args. */
2008 at = mkUifU(mce, Ity_I32, at, va2);
2009 at = mkUifU(mce, Ity_I32, at, va3);
2010 at = mkUifU(mce, Ity_I32, at, va4);
2011 at = mkPCastTo(mce, Ity_I32, at);
2012 return at;
2015 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I8 && t4 == Ity_I8
2016 && finalVty == Ity_I32) {
2017 if (0) VG_(printf)("mkLazy4: I32 x I8 x I8 x I8 -> I32\n");
2018 at = mkPCastTo(mce, Ity_I8, va1);
2019 /* Now fold in 2nd, 3rd, 4th args. */
2020 at = mkUifU(mce, Ity_I8, at, va2);
2021 at = mkUifU(mce, Ity_I8, at, va3);
2022 at = mkUifU(mce, Ity_I8, at, va4);
2023 at = mkPCastTo(mce, Ity_I32, at);
2024 return at;
2027 if (t1 == Ity_I64 && t2 == Ity_I8 && t3 == Ity_I8 && t4 == Ity_I8
2028 && finalVty == Ity_I64) {
2029 if (0) VG_(printf)("mkLazy4: I64 x I8 x I8 x I8 -> I64\n");
2030 at = mkPCastTo(mce, Ity_I8, va1);
2031 /* Now fold in 2nd, 3rd, 4th args. */
2032 at = mkUifU(mce, Ity_I8, at, va2);
2033 at = mkUifU(mce, Ity_I8, at, va3);
2034 at = mkUifU(mce, Ity_I8, at, va4);
2035 at = mkPCastTo(mce, Ity_I64, at);
2036 return at;
2039 if (1) {
2040 VG_(printf)("mkLazy4: ");
2041 ppIRType(t1);
2042 VG_(printf)(" x ");
2043 ppIRType(t2);
2044 VG_(printf)(" x ");
2045 ppIRType(t3);
2046 VG_(printf)(" x ");
2047 ppIRType(t4);
2048 VG_(printf)(" -> ");
2049 ppIRType(finalVty);
2050 VG_(printf)("\n");
2053 tl_assert(0);
2057 /* Do the lazy propagation game from a null-terminated vector of
2058 atoms. This is presumably the arguments to a helper call, so the
2059 IRCallee info is also supplied in order that we can know which
2060 arguments should be ignored (via the .mcx_mask field).
2062 static
2063 IRAtom* mkLazyN ( MCEnv* mce,
2064 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
2066 Int i;
2067 IRAtom* here;
2068 IRAtom* curr;
2069 IRType mergeTy;
2070 Bool mergeTy64 = True;
2072 /* Decide on the type of the merge intermediary. If all relevant
2073 args are I64, then it's I64. In all other circumstances, use
2074 I32. */
2075 for (i = 0; exprvec[i]; i++) {
2076 tl_assert(i < 32);
2077 tl_assert(isOriginalAtom(mce, exprvec[i]));
2078 if (cee->mcx_mask & (1<<i))
2079 continue;
2080 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
2081 mergeTy64 = False;
2084 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
2085 curr = definedOfType(mergeTy);
2087 for (i = 0; exprvec[i]; i++) {
2088 tl_assert(i < 32);
2089 tl_assert(isOriginalAtom(mce, exprvec[i]));
2090 /* Only take notice of this arg if the callee's mc-exclusion
2091 mask does not say it is to be excluded. */
2092 if (cee->mcx_mask & (1<<i)) {
2093 /* the arg is to be excluded from definedness checking. Do
2094 nothing. */
2095 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
2096 } else {
2097 /* calculate the arg's definedness, and pessimistically merge
2098 it in. */
2099 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i], HuOth) );
2100 curr = mergeTy64
2101 ? mkUifU64(mce, here, curr)
2102 : mkUifU32(mce, here, curr);
2105 return mkPCastTo(mce, finalVtype, curr );
2109 /*------------------------------------------------------------*/
2110 /*--- Generating expensive sequences for exact carry-chain ---*/
2111 /*--- propagation in add/sub and related operations. ---*/
2112 /*------------------------------------------------------------*/
2114 static
2115 IRAtom* expensiveAddSub ( MCEnv* mce,
2116 Bool add,
2117 IRType ty,
2118 IRAtom* qaa, IRAtom* qbb,
2119 IRAtom* aa, IRAtom* bb )
2121 IRAtom *a_min, *b_min, *a_max, *b_max;
2122 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
2124 tl_assert(isShadowAtom(mce,qaa));
2125 tl_assert(isShadowAtom(mce,qbb));
2126 tl_assert(isOriginalAtom(mce,aa));
2127 tl_assert(isOriginalAtom(mce,bb));
2128 tl_assert(sameKindedAtoms(qaa,aa));
2129 tl_assert(sameKindedAtoms(qbb,bb));
2131 switch (ty) {
2132 case Ity_I32:
2133 opAND = Iop_And32;
2134 opOR = Iop_Or32;
2135 opXOR = Iop_Xor32;
2136 opNOT = Iop_Not32;
2137 opADD = Iop_Add32;
2138 opSUB = Iop_Sub32;
2139 break;
2140 case Ity_I64:
2141 opAND = Iop_And64;
2142 opOR = Iop_Or64;
2143 opXOR = Iop_Xor64;
2144 opNOT = Iop_Not64;
2145 opADD = Iop_Add64;
2146 opSUB = Iop_Sub64;
2147 break;
2148 default:
2149 VG_(tool_panic)("expensiveAddSub");
2152 // a_min = aa & ~qaa
2153 a_min = assignNew('V', mce,ty,
2154 binop(opAND, aa,
2155 assignNew('V', mce,ty, unop(opNOT, qaa))));
2157 // b_min = bb & ~qbb
2158 b_min = assignNew('V', mce,ty,
2159 binop(opAND, bb,
2160 assignNew('V', mce,ty, unop(opNOT, qbb))));
2162 // a_max = aa | qaa
2163 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
2165 // b_max = bb | qbb
2166 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
2168 if (add) {
2169 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
2170 return
2171 assignNew('V', mce,ty,
2172 binop( opOR,
2173 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
2174 assignNew('V', mce,ty,
2175 binop( opXOR,
2176 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
2177 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
2182 } else {
2183 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max - b_min))
2184 return
2185 assignNew('V', mce,ty,
2186 binop( opOR,
2187 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
2188 assignNew('V', mce,ty,
2189 binop( opXOR,
2190 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
2191 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
2201 static
2202 IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
2203 IRAtom* atom, IRAtom* vatom )
2205 IRType ty;
2206 IROp xorOp, subOp, andOp;
2207 IRExpr *one;
2208 IRAtom *improver, *improved;
2209 tl_assert(isShadowAtom(mce,vatom));
2210 tl_assert(isOriginalAtom(mce,atom));
2211 tl_assert(sameKindedAtoms(atom,vatom));
2213 switch (czop) {
2214 case Iop_Ctz32:
2215 ty = Ity_I32;
2216 xorOp = Iop_Xor32;
2217 subOp = Iop_Sub32;
2218 andOp = Iop_And32;
2219 one = mkU32(1);
2220 break;
2221 case Iop_Ctz64:
2222 ty = Ity_I64;
2223 xorOp = Iop_Xor64;
2224 subOp = Iop_Sub64;
2225 andOp = Iop_And64;
2226 one = mkU64(1);
2227 break;
2228 default:
2229 ppIROp(czop);
2230 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
2233 // improver = atom ^ (atom - 1)
2235 // That is, improver has its low ctz(atom) bits equal to one;
2236 // higher bits (if any) equal to zero.
2237 improver = assignNew('V', mce,ty,
2238 binop(xorOp,
2239 atom,
2240 assignNew('V', mce, ty,
2241 binop(subOp, atom, one))));
2243 // improved = vatom & improver
2245 // That is, treat any V bits above the first ctz(atom) bits as
2246 // "defined".
2247 improved = assignNew('V', mce, ty,
2248 binop(andOp, vatom, improver));
2250 // Return pessimizing cast of improved.
2251 return mkPCastTo(mce, ty, improved);
2255 /*------------------------------------------------------------*/
2256 /*--- Scalar shifts. ---*/
2257 /*------------------------------------------------------------*/
2259 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
2260 idea is to shift the definedness bits by the original shift amount.
2261 This introduces 0s ("defined") in new positions for left shifts and
2262 unsigned right shifts, and copies the top definedness bit for
2263 signed right shifts. So, conveniently, applying the original shift
2264 operator to the definedness bits for the left arg is exactly the
2265 right thing to do:
2267 (qaa << bb)
2269 However if the shift amount is undefined then the whole result
2270 is undefined. Hence need:
2272 (qaa << bb) `UifU` PCast(qbb)
2274 If the shift amount bb is a literal than qbb will say 'all defined'
2275 and the UifU and PCast will get folded out by post-instrumentation
2276 optimisation.
2278 static IRAtom* scalarShift ( MCEnv* mce,
2279 IRType ty,
2280 IROp original_op,
2281 IRAtom* qaa, IRAtom* qbb,
2282 IRAtom* aa, IRAtom* bb )
2284 tl_assert(isShadowAtom(mce,qaa));
2285 tl_assert(isShadowAtom(mce,qbb));
2286 tl_assert(isOriginalAtom(mce,aa));
2287 tl_assert(isOriginalAtom(mce,bb));
2288 tl_assert(sameKindedAtoms(qaa,aa));
2289 tl_assert(sameKindedAtoms(qbb,bb));
2290 return
2291 assignNew(
2292 'V', mce, ty,
2293 mkUifU( mce, ty,
2294 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
2295 mkPCastTo(mce, ty, qbb)
2301 /*------------------------------------------------------------*/
2302 /*--- Helpers for dealing with vector primops. ---*/
2303 /*------------------------------------------------------------*/
2305 /* Vector pessimisation -- pessimise within each lane individually. */
2307 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
2309 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
2312 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
2314 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
2317 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
2319 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
2322 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
2324 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
2327 static IRAtom* mkPCast128x1 ( MCEnv* mce, IRAtom* at )
2329 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ128x1, at));
2332 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
2334 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
2337 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
2339 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
2342 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
2344 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
2347 static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
2349 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
2352 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
2354 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
2357 static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
2359 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
2362 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
2364 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
2367 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
2369 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
2372 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
2374 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
2378 /* Here's a simple scheme capable of handling ops derived from SSE1
2379 code and while only generating ops that can be efficiently
2380 implemented in SSE1. */
2382 /* All-lanes versions are straightforward:
2384 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
2386 unary32Fx4(x,y) ==> PCast32x4(x#)
2388 Lowest-lane-only versions are more complex:
2390 binary32F0x4(x,y) ==> SetV128lo32(
2391 x#,
2392 PCast32(V128to32(UifUV128(x#,y#)))
2395 This is perhaps not so obvious. In particular, it's faster to
2396 do a V128-bit UifU and then take the bottom 32 bits than the more
2397 obvious scheme of taking the bottom 32 bits of each operand
2398 and doing a 32-bit UifU. Basically since UifU is fast and
2399 chopping lanes off vector values is slow.
2401 Finally:
2403 unary32F0x4(x) ==> SetV128lo32(
2404 x#,
2405 PCast32(V128to32(x#))
2408 Where:
2410 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2411 PCast32x4(v#) = CmpNEZ32x4(v#)
2414 static
2415 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2417 IRAtom* at;
2418 tl_assert(isShadowAtom(mce, vatomX));
2419 tl_assert(isShadowAtom(mce, vatomY));
2420 at = mkUifUV128(mce, vatomX, vatomY);
2421 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
2422 return at;
2425 static
2426 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2428 IRAtom* at;
2429 tl_assert(isShadowAtom(mce, vatomX));
2430 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
2431 return at;
2434 static
2435 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2437 IRAtom* at;
2438 tl_assert(isShadowAtom(mce, vatomX));
2439 tl_assert(isShadowAtom(mce, vatomY));
2440 at = mkUifUV128(mce, vatomX, vatomY);
2441 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
2442 at = mkPCastTo(mce, Ity_I32, at);
2443 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2444 return at;
2447 static
2448 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2450 IRAtom* at;
2451 tl_assert(isShadowAtom(mce, vatomX));
2452 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
2453 at = mkPCastTo(mce, Ity_I32, at);
2454 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2455 return at;
2458 /* --- ... and ... 64Fx2 versions of the same ... --- */
2460 static
2461 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2463 IRAtom* at;
2464 tl_assert(isShadowAtom(mce, vatomX));
2465 tl_assert(isShadowAtom(mce, vatomY));
2466 at = mkUifUV128(mce, vatomX, vatomY);
2467 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
2468 return at;
2471 static
2472 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2474 IRAtom* at;
2475 tl_assert(isShadowAtom(mce, vatomX));
2476 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
2477 return at;
2480 static
2481 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2483 IRAtom* at;
2484 tl_assert(isShadowAtom(mce, vatomX));
2485 tl_assert(isShadowAtom(mce, vatomY));
2486 at = mkUifUV128(mce, vatomX, vatomY);
2487 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
2488 at = mkPCastTo(mce, Ity_I64, at);
2489 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2490 return at;
2493 static
2494 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2496 IRAtom* at;
2497 tl_assert(isShadowAtom(mce, vatomX));
2498 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
2499 at = mkPCastTo(mce, Ity_I64, at);
2500 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2501 return at;
2504 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
2506 static
2507 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2509 IRAtom* at;
2510 tl_assert(isShadowAtom(mce, vatomX));
2511 tl_assert(isShadowAtom(mce, vatomY));
2512 at = mkUifU64(mce, vatomX, vatomY);
2513 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2514 return at;
2517 static
2518 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2520 IRAtom* at;
2521 tl_assert(isShadowAtom(mce, vatomX));
2522 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2523 return at;
2526 /* --- ... and ... 64Fx4 versions of the same ... --- */
2528 static
2529 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2531 IRAtom* at;
2532 tl_assert(isShadowAtom(mce, vatomX));
2533 tl_assert(isShadowAtom(mce, vatomY));
2534 at = mkUifUV256(mce, vatomX, vatomY);
2535 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2536 return at;
2539 static
2540 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2542 IRAtom* at;
2543 tl_assert(isShadowAtom(mce, vatomX));
2544 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2545 return at;
2548 /* --- ... and ... 32Fx8 versions of the same ... --- */
2550 static
2551 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2553 IRAtom* at;
2554 tl_assert(isShadowAtom(mce, vatomX));
2555 tl_assert(isShadowAtom(mce, vatomY));
2556 at = mkUifUV256(mce, vatomX, vatomY);
2557 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2558 return at;
2561 static
2562 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2564 IRAtom* at;
2565 tl_assert(isShadowAtom(mce, vatomX));
2566 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2567 return at;
2570 /* --- 64Fx2 binary FP ops, with rounding mode --- */
2572 static
2573 IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM,
2574 IRAtom* vatomX, IRAtom* vatomY )
2576 /* This is the same as binary64Fx2, except that we subsequently
2577 pessimise vRM (definedness of the rounding mode), widen to 128
2578 bits and UifU it into the result. As with the scalar cases, if
2579 the RM is a constant then it is defined and so this extra bit
2580 will get constant-folded out later. */
2581 // "do" the vector args
2582 IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY);
2583 // PCast the RM, and widen it to 128 bits
2584 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2585 // Roll it into the result
2586 t1 = mkUifUV128(mce, t1, t2);
2587 return t1;
2590 /* --- ... and ... 32Fx4 versions of the same --- */
2592 static
2593 IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2594 IRAtom* vatomX, IRAtom* vatomY )
2596 IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY);
2597 // PCast the RM, and widen it to 128 bits
2598 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2599 // Roll it into the result
2600 t1 = mkUifUV128(mce, t1, t2);
2601 return t1;
2604 /* --- ... and ... 64Fx4 versions of the same --- */
2606 static
2607 IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2608 IRAtom* vatomX, IRAtom* vatomY )
2610 IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY);
2611 // PCast the RM, and widen it to 256 bits
2612 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2613 // Roll it into the result
2614 t1 = mkUifUV256(mce, t1, t2);
2615 return t1;
2618 /* --- ... and ... 32Fx8 versions of the same --- */
2620 static
2621 IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM,
2622 IRAtom* vatomX, IRAtom* vatomY )
2624 IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY);
2625 // PCast the RM, and widen it to 256 bits
2626 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2627 // Roll it into the result
2628 t1 = mkUifUV256(mce, t1, t2);
2629 return t1;
2632 /* --- 64Fx2 unary FP ops, with rounding mode --- */
2634 static
2635 IRAtom* unary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2637 /* Same scheme as binary64Fx2_w_rm. */
2638 // "do" the vector arg
2639 IRAtom* t1 = unary64Fx2(mce, vatomX);
2640 // PCast the RM, and widen it to 128 bits
2641 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2642 // Roll it into the result
2643 t1 = mkUifUV128(mce, t1, t2);
2644 return t1;
2647 /* --- ... and ... 32Fx4 versions of the same --- */
2649 static
2650 IRAtom* unary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2652 /* Same scheme as unary32Fx4_w_rm. */
2653 IRAtom* t1 = unary32Fx4(mce, vatomX);
2654 // PCast the RM, and widen it to 128 bits
2655 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2656 // Roll it into the result
2657 t1 = mkUifUV128(mce, t1, t2);
2658 return t1;
2662 /* --- --- Vector saturated narrowing --- --- */
2664 /* We used to do something very clever here, but on closer inspection
2665 (2011-Jun-15), and in particular bug #279698, it turns out to be
2666 wrong. Part of the problem came from the fact that for a long
2667 time, the IR primops to do with saturated narrowing were
2668 underspecified and managed to confuse multiple cases which needed
2669 to be separate: the op names had a signedness qualifier, but in
2670 fact the source and destination signednesses needed to be specified
2671 independently, so the op names really need two independent
2672 signedness specifiers.
2674 As of 2011-Jun-15 (ish) the underspecification was sorted out
2675 properly. The incorrect instrumentation remained, though. That
2676 has now (2011-Oct-22) been fixed.
2678 What we now do is simple:
2680 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2681 number of lanes, X is the source lane width and signedness, and Y
2682 is the destination lane width and signedness. In all cases the
2683 destination lane width is half the source lane width, so the names
2684 have a bit of redundancy, but are at least easy to read.
2686 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2687 to unsigned 16s.
2689 Let Vanilla(OP) be a function that takes OP, one of these
2690 saturating narrowing ops, and produces the same "shaped" narrowing
2691 op which is not saturating, but merely dumps the most significant
2692 bits. "same shape" means that the lane numbers and widths are the
2693 same as with OP.
2695 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2696 = Iop_NarrowBin32to16x8,
2697 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2698 dumping the top half of each lane.
2700 So, with that in place, the scheme is simple, and it is simple to
2701 pessimise each lane individually and then apply Vanilla(OP) so as
2702 to get the result in the right "shape". If the original OP is
2703 QNarrowBinXtoYxZ then we produce
2705 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2707 or for the case when OP is unary (Iop_QNarrowUn*)
2709 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2711 static
2712 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2714 switch (qnarrowOp) {
2715 /* Binary: (128, 128) -> 128 */
2716 case Iop_QNarrowBin16Sto8Ux16:
2717 case Iop_QNarrowBin16Sto8Sx16:
2718 case Iop_QNarrowBin16Uto8Ux16:
2719 case Iop_QNarrowBin64Sto32Sx4:
2720 case Iop_QNarrowBin64Uto32Ux4:
2721 return Iop_NarrowBin16to8x16;
2722 case Iop_QNarrowBin32Sto16Ux8:
2723 case Iop_QNarrowBin32Sto16Sx8:
2724 case Iop_QNarrowBin32Uto16Ux8:
2725 return Iop_NarrowBin32to16x8;
2726 /* Binary: (64, 64) -> 64 */
2727 case Iop_QNarrowBin32Sto16Sx4:
2728 return Iop_NarrowBin32to16x4;
2729 case Iop_QNarrowBin16Sto8Ux8:
2730 case Iop_QNarrowBin16Sto8Sx8:
2731 return Iop_NarrowBin16to8x8;
2732 /* Unary: 128 -> 64 */
2733 case Iop_QNarrowUn64Uto32Ux2:
2734 case Iop_QNarrowUn64Sto32Sx2:
2735 case Iop_QNarrowUn64Sto32Ux2:
2736 return Iop_NarrowUn64to32x2;
2737 case Iop_QNarrowUn32Uto16Ux4:
2738 case Iop_QNarrowUn32Sto16Sx4:
2739 case Iop_QNarrowUn32Sto16Ux4:
2740 case Iop_F32toF16x4:
2741 return Iop_NarrowUn32to16x4;
2742 case Iop_QNarrowUn16Uto8Ux8:
2743 case Iop_QNarrowUn16Sto8Sx8:
2744 case Iop_QNarrowUn16Sto8Ux8:
2745 return Iop_NarrowUn16to8x8;
2746 default:
2747 ppIROp(qnarrowOp);
2748 VG_(tool_panic)("vanillaNarrowOpOfShape");
2752 static
2753 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2754 IRAtom* vatom1, IRAtom* vatom2)
2756 IRAtom *at1, *at2, *at3;
2757 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2758 switch (narrow_op) {
2759 case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break;
2760 case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break;
2761 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2762 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2763 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2764 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2765 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2766 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2767 default: VG_(tool_panic)("vectorNarrowBinV128");
2769 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2770 tl_assert(isShadowAtom(mce,vatom1));
2771 tl_assert(isShadowAtom(mce,vatom2));
2772 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2773 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2774 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2775 return at3;
2778 static
2779 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2780 IRAtom* vatom1, IRAtom* vatom2)
2782 IRAtom *at1, *at2, *at3;
2783 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2784 switch (narrow_op) {
2785 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2786 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2787 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2788 default: VG_(tool_panic)("vectorNarrowBin64");
2790 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2791 tl_assert(isShadowAtom(mce,vatom1));
2792 tl_assert(isShadowAtom(mce,vatom2));
2793 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2794 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2795 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
2796 return at3;
2799 static
2800 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
2801 IRAtom* vatom1)
2803 IRAtom *at1, *at2;
2804 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2805 tl_assert(isShadowAtom(mce,vatom1));
2806 /* For vanilla narrowing (non-saturating), we can just apply
2807 the op directly to the V bits. */
2808 switch (narrow_op) {
2809 case Iop_NarrowUn16to8x8:
2810 case Iop_NarrowUn32to16x4:
2811 case Iop_NarrowUn64to32x2:
2812 case Iop_F32toF16x4:
2813 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2814 return at1;
2815 default:
2816 break; /* Do Plan B */
2818 /* Plan B: for ops that involve a saturation operation on the args,
2819 we must PCast before the vanilla narrow. */
2820 switch (narrow_op) {
2821 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2822 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2823 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2824 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2825 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2826 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2827 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2828 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2829 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2830 default: VG_(tool_panic)("vectorNarrowUnV128");
2832 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2833 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2834 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
2835 return at2;
2838 static
2839 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2840 IRAtom* vatom1)
2842 IRAtom *at1, *at2;
2843 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2844 switch (longen_op) {
2845 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2846 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2847 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2848 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2849 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2850 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2851 case Iop_F16toF32x4: pcast = mkPCast32x4; break;
2852 default: VG_(tool_panic)("vectorWidenI64");
2854 tl_assert(isShadowAtom(mce,vatom1));
2855 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2856 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2857 return at2;
2861 /* --- --- Vector integer arithmetic --- --- */
2863 /* Simple ... UifU the args and per-lane pessimise the results. */
2865 /* --- V256-bit versions --- */
2867 static
2868 IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2870 IRAtom* at;
2871 at = mkUifUV256(mce, vatom1, vatom2);
2872 at = mkPCast8x32(mce, at);
2873 return at;
2876 static
2877 IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2879 IRAtom* at;
2880 at = mkUifUV256(mce, vatom1, vatom2);
2881 at = mkPCast16x16(mce, at);
2882 return at;
2885 static
2886 IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2888 IRAtom* at;
2889 at = mkUifUV256(mce, vatom1, vatom2);
2890 at = mkPCast32x8(mce, at);
2891 return at;
2894 static
2895 IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2897 IRAtom* at;
2898 at = mkUifUV256(mce, vatom1, vatom2);
2899 at = mkPCast64x4(mce, at);
2900 return at;
2903 /* --- V128-bit versions --- */
2905 static
2906 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2908 IRAtom* at;
2909 at = mkUifUV128(mce, vatom1, vatom2);
2910 at = mkPCast8x16(mce, at);
2911 return at;
2914 static
2915 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2917 IRAtom* at;
2918 at = mkUifUV128(mce, vatom1, vatom2);
2919 at = mkPCast16x8(mce, at);
2920 return at;
2923 static
2924 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2926 IRAtom* at;
2927 at = mkUifUV128(mce, vatom1, vatom2);
2928 at = mkPCast32x4(mce, at);
2929 return at;
2932 static
2933 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2935 IRAtom* at;
2936 at = mkUifUV128(mce, vatom1, vatom2);
2937 at = mkPCast64x2(mce, at);
2938 return at;
2941 static
2942 IRAtom* binary128Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2944 IRAtom* at;
2945 at = mkUifUV128(mce, vatom1, vatom2);
2946 at = mkPCast128x1(mce, at);
2947 return at;
2950 /* --- 64-bit versions --- */
2952 static
2953 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2955 IRAtom* at;
2956 at = mkUifU64(mce, vatom1, vatom2);
2957 at = mkPCast8x8(mce, at);
2958 return at;
2961 static
2962 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2964 IRAtom* at;
2965 at = mkUifU64(mce, vatom1, vatom2);
2966 at = mkPCast16x4(mce, at);
2967 return at;
2970 static
2971 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2973 IRAtom* at;
2974 at = mkUifU64(mce, vatom1, vatom2);
2975 at = mkPCast32x2(mce, at);
2976 return at;
2979 static
2980 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2982 IRAtom* at;
2983 at = mkUifU64(mce, vatom1, vatom2);
2984 at = mkPCastTo(mce, Ity_I64, at);
2985 return at;
2988 /* --- 32-bit versions --- */
2990 static
2991 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2993 IRAtom* at;
2994 at = mkUifU32(mce, vatom1, vatom2);
2995 at = mkPCast8x4(mce, at);
2996 return at;
2999 static
3000 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
3002 IRAtom* at;
3003 at = mkUifU32(mce, vatom1, vatom2);
3004 at = mkPCast16x2(mce, at);
3005 return at;
3009 /*------------------------------------------------------------*/
3010 /*--- Generate shadow values from all kinds of IRExprs. ---*/
3011 /*------------------------------------------------------------*/
3013 static
3014 IRAtom* expr2vbits_Qop ( MCEnv* mce,
3015 IROp op,
3016 IRAtom* atom1, IRAtom* atom2,
3017 IRAtom* atom3, IRAtom* atom4 )
3019 IRAtom* vatom1 = expr2vbits( mce, atom1, HuOth );
3020 IRAtom* vatom2 = expr2vbits( mce, atom2, HuOth );
3021 IRAtom* vatom3 = expr2vbits( mce, atom3, HuOth );
3022 IRAtom* vatom4 = expr2vbits( mce, atom4, HuOth );
3024 tl_assert(isOriginalAtom(mce,atom1));
3025 tl_assert(isOriginalAtom(mce,atom2));
3026 tl_assert(isOriginalAtom(mce,atom3));
3027 tl_assert(isOriginalAtom(mce,atom4));
3028 tl_assert(isShadowAtom(mce,vatom1));
3029 tl_assert(isShadowAtom(mce,vatom2));
3030 tl_assert(isShadowAtom(mce,vatom3));
3031 tl_assert(isShadowAtom(mce,vatom4));
3032 tl_assert(sameKindedAtoms(atom1,vatom1));
3033 tl_assert(sameKindedAtoms(atom2,vatom2));
3034 tl_assert(sameKindedAtoms(atom3,vatom3));
3035 tl_assert(sameKindedAtoms(atom4,vatom4));
3036 switch (op) {
3037 case Iop_MAddF64:
3038 case Iop_MAddF64r32:
3039 case Iop_MSubF64:
3040 case Iop_MSubF64r32:
3041 /* I32(rm) x F64 x F64 x F64 -> F64 */
3042 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
3044 case Iop_MAddF32:
3045 case Iop_MSubF32:
3046 /* I32(rm) x F32 x F32 x F32 -> F32 */
3047 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
3049 case Iop_MAddF128:
3050 case Iop_MSubF128:
3051 case Iop_NegMAddF128:
3052 case Iop_NegMSubF128:
3053 /* I32(rm) x F128 x F128 x F128 -> F128 */
3054 return mkLazy4(mce, Ity_I128, vatom1, vatom2, vatom3, vatom4);
3056 /* V256-bit data-steering */
3057 case Iop_64x4toV256:
3058 return assignNew('V', mce, Ity_V256,
3059 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
3061 /* I32/I64 x I8 x I8 x I8 -> I32/I64 */
3062 case Iop_Rotx32:
3063 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
3064 case Iop_Rotx64:
3065 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
3066 default:
3067 ppIROp(op);
3068 VG_(tool_panic)("memcheck:expr2vbits_Qop");
3073 static
3074 IRAtom* expr2vbits_Triop ( MCEnv* mce,
3075 IROp op,
3076 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
3078 IRAtom* vatom1 = expr2vbits( mce, atom1, HuOth );
3079 IRAtom* vatom2 = expr2vbits( mce, atom2, HuOth );
3080 IRAtom* vatom3 = expr2vbits( mce, atom3, HuOth );
3082 tl_assert(isOriginalAtom(mce,atom1));
3083 tl_assert(isOriginalAtom(mce,atom2));
3084 tl_assert(isOriginalAtom(mce,atom3));
3085 tl_assert(isShadowAtom(mce,vatom1));
3086 tl_assert(isShadowAtom(mce,vatom2));
3087 tl_assert(isShadowAtom(mce,vatom3));
3088 tl_assert(sameKindedAtoms(atom1,vatom1));
3089 tl_assert(sameKindedAtoms(atom2,vatom2));
3090 tl_assert(sameKindedAtoms(atom3,vatom3));
3091 switch (op) {
3092 case Iop_AddF128:
3093 case Iop_SubF128:
3094 case Iop_MulF128:
3095 case Iop_DivF128:
3096 case Iop_AddD128:
3097 case Iop_SubD128:
3098 case Iop_MulD128:
3099 case Iop_DivD128:
3100 case Iop_QuantizeD128:
3101 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
3102 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
3103 case Iop_AddF64:
3104 case Iop_AddD64:
3105 case Iop_AddF64r32:
3106 case Iop_SubF64:
3107 case Iop_SubD64:
3108 case Iop_SubF64r32:
3109 case Iop_MulF64:
3110 case Iop_MulD64:
3111 case Iop_MulF64r32:
3112 case Iop_DivF64:
3113 case Iop_DivD64:
3114 case Iop_DivF64r32:
3115 case Iop_ScaleF64:
3116 case Iop_Yl2xF64:
3117 case Iop_Yl2xp1F64:
3118 case Iop_AtanF64:
3119 case Iop_PRemF64:
3120 case Iop_PRem1F64:
3121 case Iop_QuantizeD64:
3122 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
3123 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
3124 case Iop_PRemC3210F64:
3125 case Iop_PRem1C3210F64:
3126 /* I32(rm) x F64 x F64 -> I32 */
3127 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
3128 case Iop_AddF32:
3129 case Iop_SubF32:
3130 case Iop_MulF32:
3131 case Iop_DivF32:
3132 /* I32(rm) x F32 x F32 -> I32 */
3133 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
3134 case Iop_SignificanceRoundD64:
3135 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
3136 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
3137 case Iop_SignificanceRoundD128:
3138 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
3139 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
3140 case Iop_SliceV128:
3141 /* (V128, V128, I8) -> V128 */
3142 complainIfUndefined(mce, atom3, NULL);
3143 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
3144 case Iop_Slice64:
3145 /* (I64, I64, I8) -> I64 */
3146 complainIfUndefined(mce, atom3, NULL);
3147 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
3148 case Iop_SetElem8x8:
3149 case Iop_SetElem16x4:
3150 case Iop_SetElem32x2:
3151 complainIfUndefined(mce, atom2, NULL);
3152 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
3154 case Iop_SetElem8x16:
3155 case Iop_SetElem16x8:
3156 case Iop_SetElem32x4:
3157 case Iop_SetElem64x2:
3158 complainIfUndefined(mce, atom2, NULL);
3159 return assignNew('V', mce, Ity_V128, triop(op, vatom1, atom2, vatom3));
3161 case Iop_Perm8x16x2:
3162 /* (V128, V128, V128) -> V128 */
3163 complainIfUndefined(mce, atom3, NULL);
3164 return mkUifUV128(
3165 mce,
3166 assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)),
3167 mkPCast8x16(mce, vatom3)
3170 /* Vector FP with rounding mode as the first arg */
3171 case Iop_Add64Fx2:
3172 case Iop_Sub64Fx2:
3173 case Iop_Mul64Fx2:
3174 case Iop_Div64Fx2:
3175 case Iop_Scale2_64Fx2:
3176 return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3);
3178 case Iop_Add32Fx4:
3179 case Iop_Sub32Fx4:
3180 case Iop_Mul32Fx4:
3181 case Iop_Div32Fx4:
3182 case Iop_Scale2_32Fx4:
3183 return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3);
3185 case Iop_Add64Fx4:
3186 case Iop_Sub64Fx4:
3187 case Iop_Mul64Fx4:
3188 case Iop_Div64Fx4:
3189 return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3);
3191 case Iop_Add32Fx8:
3192 case Iop_Sub32Fx8:
3193 case Iop_Mul32Fx8:
3194 case Iop_Div32Fx8:
3195 return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3);
3197 case Iop_F32x4_2toQ16x8:
3198 return assignNew('V', mce, Ity_V128,
3199 binop(Iop_PackEvenLanes16x8,
3200 unary32Fx4_w_rm(mce, vatom1, vatom2),
3201 unary32Fx4_w_rm(mce, vatom1, vatom3)));
3202 case Iop_F64x2_2toQ32x4:
3203 return assignNew('V', mce, Ity_V128,
3204 binop(Iop_PackEvenLanes32x4,
3205 unary64Fx2_w_rm(mce, vatom1, vatom2),
3206 unary64Fx2_w_rm(mce, vatom1, vatom3)));
3209 default:
3210 ppIROp(op);
3211 VG_(tool_panic)("memcheck:expr2vbits_Triop");
3216 static
3217 IRAtom* expr2vbits_Binop ( MCEnv* mce,
3218 IROp op,
3219 IRAtom* atom1, IRAtom* atom2,
3220 HowUsed hu/*use HuOth if unknown*/ )
3222 IRType and_or_ty;
3223 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
3224 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
3225 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
3227 IRAtom* vatom1 = expr2vbits( mce, atom1, HuOth );
3228 IRAtom* vatom2 = expr2vbits( mce, atom2, HuOth );
3230 tl_assert(isOriginalAtom(mce,atom1));
3231 tl_assert(isOriginalAtom(mce,atom2));
3232 tl_assert(isShadowAtom(mce,vatom1));
3233 tl_assert(isShadowAtom(mce,vatom2));
3234 tl_assert(sameKindedAtoms(atom1,vatom1));
3235 tl_assert(sameKindedAtoms(atom2,vatom2));
3236 switch (op) {
3238 /* 32-bit SIMD */
3240 case Iop_Add16x2:
3241 case Iop_HAdd16Ux2:
3242 case Iop_HAdd16Sx2:
3243 case Iop_Sub16x2:
3244 case Iop_HSub16Ux2:
3245 case Iop_HSub16Sx2:
3246 case Iop_QAdd16Sx2:
3247 case Iop_QSub16Sx2:
3248 case Iop_QSub16Ux2:
3249 case Iop_QAdd16Ux2:
3250 return binary16Ix2(mce, vatom1, vatom2);
3252 case Iop_Add8x4:
3253 case Iop_HAdd8Ux4:
3254 case Iop_HAdd8Sx4:
3255 case Iop_Sub8x4:
3256 case Iop_HSub8Ux4:
3257 case Iop_HSub8Sx4:
3258 case Iop_QSub8Ux4:
3259 case Iop_QAdd8Ux4:
3260 case Iop_QSub8Sx4:
3261 case Iop_QAdd8Sx4:
3262 return binary8Ix4(mce, vatom1, vatom2);
3264 /* 64-bit SIMD */
3266 case Iop_ShrN8x8:
3267 case Iop_ShrN16x4:
3268 case Iop_ShrN32x2:
3269 case Iop_SarN8x8:
3270 case Iop_SarN16x4:
3271 case Iop_SarN32x2:
3272 case Iop_ShlN16x4:
3273 case Iop_ShlN32x2:
3274 case Iop_ShlN8x8:
3275 /* Same scheme as with all other shifts. */
3276 complainIfUndefined(mce, atom2, NULL);
3277 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3279 case Iop_QNarrowBin32Sto16Sx4:
3280 case Iop_QNarrowBin16Sto8Sx8:
3281 case Iop_QNarrowBin16Sto8Ux8:
3282 return vectorNarrowBin64(mce, op, vatom1, vatom2);
3284 case Iop_Min8Ux8:
3285 case Iop_Min8Sx8:
3286 case Iop_Max8Ux8:
3287 case Iop_Max8Sx8:
3288 case Iop_Avg8Ux8:
3289 case Iop_QSub8Sx8:
3290 case Iop_QSub8Ux8:
3291 case Iop_Sub8x8:
3292 case Iop_CmpGT8Sx8:
3293 case Iop_CmpGT8Ux8:
3294 case Iop_CmpEQ8x8:
3295 case Iop_QAdd8Sx8:
3296 case Iop_QAdd8Ux8:
3297 case Iop_QSal8x8:
3298 case Iop_QShl8x8:
3299 case Iop_Add8x8:
3300 case Iop_Mul8x8:
3301 case Iop_PolynomialMul8x8:
3302 return binary8Ix8(mce, vatom1, vatom2);
3304 case Iop_Min16Sx4:
3305 case Iop_Min16Ux4:
3306 case Iop_Max16Sx4:
3307 case Iop_Max16Ux4:
3308 case Iop_Avg16Ux4:
3309 case Iop_QSub16Ux4:
3310 case Iop_QSub16Sx4:
3311 case Iop_Sub16x4:
3312 case Iop_Mul16x4:
3313 case Iop_MulHi16Sx4:
3314 case Iop_MulHi16Ux4:
3315 case Iop_CmpGT16Sx4:
3316 case Iop_CmpGT16Ux4:
3317 case Iop_CmpEQ16x4:
3318 case Iop_QAdd16Sx4:
3319 case Iop_QAdd16Ux4:
3320 case Iop_QSal16x4:
3321 case Iop_QShl16x4:
3322 case Iop_Add16x4:
3323 case Iop_QDMulHi16Sx4:
3324 case Iop_QRDMulHi16Sx4:
3325 return binary16Ix4(mce, vatom1, vatom2);
3327 case Iop_Sub32x2:
3328 case Iop_Mul32x2:
3329 case Iop_Max32Sx2:
3330 case Iop_Max32Ux2:
3331 case Iop_Min32Sx2:
3332 case Iop_Min32Ux2:
3333 case Iop_CmpGT32Sx2:
3334 case Iop_CmpGT32Ux2:
3335 case Iop_CmpEQ32x2:
3336 case Iop_Add32x2:
3337 case Iop_QAdd32Ux2:
3338 case Iop_QAdd32Sx2:
3339 case Iop_QSub32Ux2:
3340 case Iop_QSub32Sx2:
3341 case Iop_QSal32x2:
3342 case Iop_QShl32x2:
3343 case Iop_QDMulHi32Sx2:
3344 case Iop_QRDMulHi32Sx2:
3345 return binary32Ix2(mce, vatom1, vatom2);
3347 case Iop_QSub64Ux1:
3348 case Iop_QSub64Sx1:
3349 case Iop_QAdd64Ux1:
3350 case Iop_QAdd64Sx1:
3351 case Iop_QSal64x1:
3352 case Iop_QShl64x1:
3353 case Iop_Sal64x1:
3354 return binary64Ix1(mce, vatom1, vatom2);
3356 case Iop_QShlNsatSU8x8:
3357 case Iop_QShlNsatUU8x8:
3358 case Iop_QShlNsatSS8x8:
3359 complainIfUndefined(mce, atom2, NULL);
3360 return mkPCast8x8(mce, vatom1);
3362 case Iop_QShlNsatSU16x4:
3363 case Iop_QShlNsatUU16x4:
3364 case Iop_QShlNsatSS16x4:
3365 complainIfUndefined(mce, atom2, NULL);
3366 return mkPCast16x4(mce, vatom1);
3368 case Iop_QShlNsatSU32x2:
3369 case Iop_QShlNsatUU32x2:
3370 case Iop_QShlNsatSS32x2:
3371 complainIfUndefined(mce, atom2, NULL);
3372 return mkPCast32x2(mce, vatom1);
3374 case Iop_QShlNsatSU64x1:
3375 case Iop_QShlNsatUU64x1:
3376 case Iop_QShlNsatSS64x1:
3377 complainIfUndefined(mce, atom2, NULL);
3378 return mkPCast32x2(mce, vatom1);
3380 case Iop_PwMax32Sx2:
3381 case Iop_PwMax32Ux2:
3382 case Iop_PwMin32Sx2:
3383 case Iop_PwMin32Ux2:
3384 case Iop_PwMax32Fx2:
3385 case Iop_PwMin32Fx2:
3386 return assignNew('V', mce, Ity_I64,
3387 binop(Iop_PwMax32Ux2,
3388 mkPCast32x2(mce, vatom1),
3389 mkPCast32x2(mce, vatom2)));
3391 case Iop_PwMax16Sx4:
3392 case Iop_PwMax16Ux4:
3393 case Iop_PwMin16Sx4:
3394 case Iop_PwMin16Ux4:
3395 return assignNew('V', mce, Ity_I64,
3396 binop(Iop_PwMax16Ux4,
3397 mkPCast16x4(mce, vatom1),
3398 mkPCast16x4(mce, vatom2)));
3400 case Iop_PwMax8Sx8:
3401 case Iop_PwMax8Ux8:
3402 case Iop_PwMin8Sx8:
3403 case Iop_PwMin8Ux8:
3404 return assignNew('V', mce, Ity_I64,
3405 binop(Iop_PwMax8Ux8,
3406 mkPCast8x8(mce, vatom1),
3407 mkPCast8x8(mce, vatom2)));
3409 case Iop_PwAdd32x2:
3410 case Iop_PwAdd32Fx2:
3411 return mkPCast32x2(mce,
3412 assignNew('V', mce, Ity_I64,
3413 binop(Iop_PwAdd32x2,
3414 mkPCast32x2(mce, vatom1),
3415 mkPCast32x2(mce, vatom2))));
3417 case Iop_PwAdd16x4:
3418 return mkPCast16x4(mce,
3419 assignNew('V', mce, Ity_I64,
3420 binop(op, mkPCast16x4(mce, vatom1),
3421 mkPCast16x4(mce, vatom2))));
3423 case Iop_PwAdd8x8:
3424 return mkPCast8x8(mce,
3425 assignNew('V', mce, Ity_I64,
3426 binop(op, mkPCast8x8(mce, vatom1),
3427 mkPCast8x8(mce, vatom2))));
3429 case Iop_Shl8x8:
3430 case Iop_Shr8x8:
3431 case Iop_Sar8x8:
3432 case Iop_Sal8x8:
3433 return mkUifU64(mce,
3434 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3435 mkPCast8x8(mce,vatom2)
3438 case Iop_Shl16x4:
3439 case Iop_Shr16x4:
3440 case Iop_Sar16x4:
3441 case Iop_Sal16x4:
3442 return mkUifU64(mce,
3443 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3444 mkPCast16x4(mce,vatom2)
3447 case Iop_Shl32x2:
3448 case Iop_Shr32x2:
3449 case Iop_Sar32x2:
3450 case Iop_Sal32x2:
3451 return mkUifU64(mce,
3452 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3453 mkPCast32x2(mce,vatom2)
3456 /* 64-bit data-steering */
3457 case Iop_InterleaveLO32x2:
3458 case Iop_InterleaveLO16x4:
3459 case Iop_InterleaveLO8x8:
3460 case Iop_InterleaveHI32x2:
3461 case Iop_InterleaveHI16x4:
3462 case Iop_InterleaveHI8x8:
3463 case Iop_CatOddLanes8x8:
3464 case Iop_CatEvenLanes8x8:
3465 case Iop_CatOddLanes16x4:
3466 case Iop_CatEvenLanes16x4:
3467 case Iop_InterleaveOddLanes8x8:
3468 case Iop_InterleaveEvenLanes8x8:
3469 case Iop_InterleaveOddLanes16x4:
3470 case Iop_InterleaveEvenLanes16x4:
3471 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3473 case Iop_GetElem8x8:
3474 complainIfUndefined(mce, atom2, NULL);
3475 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3476 case Iop_GetElem16x4:
3477 complainIfUndefined(mce, atom2, NULL);
3478 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3479 case Iop_GetElem32x2:
3480 complainIfUndefined(mce, atom2, NULL);
3481 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3483 /* Perm8x8: rearrange values in left arg using steering values
3484 from right arg. So rearrange the vbits in the same way but
3485 pessimise wrt steering values. */
3486 case Iop_Perm8x8:
3487 return mkUifU64(
3488 mce,
3489 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3490 mkPCast8x8(mce, vatom2)
3493 /* V128-bit SIMD */
3495 case Iop_Sqrt32Fx4:
3496 return unary32Fx4_w_rm(mce, vatom1, vatom2);
3497 case Iop_Sqrt64Fx2:
3498 return unary64Fx2_w_rm(mce, vatom1, vatom2);
3500 case Iop_ShrN8x16:
3501 case Iop_ShrN16x8:
3502 case Iop_ShrN32x4:
3503 case Iop_ShrN64x2:
3504 case Iop_SarN8x16:
3505 case Iop_SarN16x8:
3506 case Iop_SarN32x4:
3507 case Iop_SarN64x2:
3508 case Iop_ShlN8x16:
3509 case Iop_ShlN16x8:
3510 case Iop_ShlN32x4:
3511 case Iop_ShlN64x2:
3512 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3513 this is wrong now, scalar shifts are done properly lazily.
3514 Vector shifts should be fixed too. */
3515 complainIfUndefined(mce, atom2, NULL);
3516 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3518 /* V x V shifts/rotates are done using the standard lazy scheme. */
3519 /* For the non-rounding variants of bi-di vector x vector
3520 shifts (the Iop_Sh.. ops, that is) we use the lazy scheme.
3521 But note that this is overly pessimistic, because in fact only
3522 the bottom 8 bits of each lane of the second argument are taken
3523 into account when shifting. So really we ought to ignore
3524 undefinedness in bits 8 and above of each lane in the
3525 second argument. */
3526 case Iop_Shl8x16:
3527 case Iop_Shr8x16:
3528 case Iop_Sar8x16:
3529 case Iop_Sal8x16:
3530 case Iop_Rol8x16:
3531 case Iop_Sh8Sx16:
3532 case Iop_Sh8Ux16:
3533 return mkUifUV128(mce,
3534 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3535 mkPCast8x16(mce,vatom2)
3538 case Iop_Shl16x8:
3539 case Iop_Shr16x8:
3540 case Iop_Sar16x8:
3541 case Iop_Sal16x8:
3542 case Iop_Rol16x8:
3543 case Iop_Sh16Sx8:
3544 case Iop_Sh16Ux8:
3545 return mkUifUV128(mce,
3546 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3547 mkPCast16x8(mce,vatom2)
3550 case Iop_Shl32x4:
3551 case Iop_Shr32x4:
3552 case Iop_Sar32x4:
3553 case Iop_Sal32x4:
3554 case Iop_Rol32x4:
3555 case Iop_Sh32Sx4:
3556 case Iop_Sh32Ux4:
3557 return mkUifUV128(mce,
3558 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3559 mkPCast32x4(mce,vatom2)
3562 case Iop_Shl64x2:
3563 case Iop_Shr64x2:
3564 case Iop_Sar64x2:
3565 case Iop_Sal64x2:
3566 case Iop_Rol64x2:
3567 case Iop_Sh64Sx2:
3568 case Iop_Sh64Ux2:
3569 return mkUifUV128(mce,
3570 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3571 mkPCast64x2(mce,vatom2)
3574 /* For the rounding variants of bi-di vector x vector shifts, the
3575 rounding adjustment can cause undefinedness to propagate through
3576 the entire lane, in the worst case. Too complex to handle
3577 properly .. just UifU the arguments and then PCast them.
3578 Suboptimal but safe. */
3579 case Iop_Rsh8Sx16:
3580 case Iop_Rsh8Ux16:
3581 return binary8Ix16(mce, vatom1, vatom2);
3582 case Iop_Rsh16Sx8:
3583 case Iop_Rsh16Ux8:
3584 return binary16Ix8(mce, vatom1, vatom2);
3585 case Iop_Rsh32Sx4:
3586 case Iop_Rsh32Ux4:
3587 return binary32Ix4(mce, vatom1, vatom2);
3588 case Iop_Rsh64Sx2:
3589 case Iop_Rsh64Ux2:
3590 return binary64Ix2(mce, vatom1, vatom2);
3592 case Iop_F32ToFixed32Ux4_RZ:
3593 case Iop_F32ToFixed32Sx4_RZ:
3594 case Iop_Fixed32UToF32x4_RN:
3595 case Iop_Fixed32SToF32x4_RN:
3596 complainIfUndefined(mce, atom2, NULL);
3597 return mkPCast32x4(mce, vatom1);
3599 case Iop_F32ToFixed32Ux2_RZ:
3600 case Iop_F32ToFixed32Sx2_RZ:
3601 case Iop_Fixed32UToF32x2_RN:
3602 case Iop_Fixed32SToF32x2_RN:
3603 complainIfUndefined(mce, atom2, NULL);
3604 return mkPCast32x2(mce, vatom1);
3606 case Iop_QSub8Ux16:
3607 case Iop_QSub8Sx16:
3608 case Iop_Sub8x16:
3609 case Iop_Min8Ux16:
3610 case Iop_Min8Sx16:
3611 case Iop_Max8Ux16:
3612 case Iop_Max8Sx16:
3613 case Iop_CmpGT8Sx16:
3614 case Iop_CmpGT8Ux16:
3615 case Iop_CmpEQ8x16:
3616 case Iop_Avg8Ux16:
3617 case Iop_Avg8Sx16:
3618 case Iop_QAdd8Ux16:
3619 case Iop_QAdd8Sx16:
3620 case Iop_QAddExtUSsatSS8x16:
3621 case Iop_QAddExtSUsatUU8x16:
3622 case Iop_QSal8x16:
3623 case Iop_QShl8x16:
3624 case Iop_Add8x16:
3625 case Iop_Mul8x16:
3626 case Iop_MulHi8Sx16:
3627 case Iop_MulHi8Ux16:
3628 case Iop_PolynomialMul8x16:
3629 case Iop_PolynomialMulAdd8x16:
3630 return binary8Ix16(mce, vatom1, vatom2);
3632 case Iop_QSub16Ux8:
3633 case Iop_QSub16Sx8:
3634 case Iop_Sub16x8:
3635 case Iop_Mul16x8:
3636 case Iop_MulHi16Sx8:
3637 case Iop_MulHi16Ux8:
3638 case Iop_Min16Sx8:
3639 case Iop_Min16Ux8:
3640 case Iop_Max16Sx8:
3641 case Iop_Max16Ux8:
3642 case Iop_CmpGT16Sx8:
3643 case Iop_CmpGT16Ux8:
3644 case Iop_CmpEQ16x8:
3645 case Iop_Avg16Ux8:
3646 case Iop_Avg16Sx8:
3647 case Iop_QAdd16Ux8:
3648 case Iop_QAdd16Sx8:
3649 case Iop_QAddExtUSsatSS16x8:
3650 case Iop_QAddExtSUsatUU16x8:
3651 case Iop_QSal16x8:
3652 case Iop_QShl16x8:
3653 case Iop_Add16x8:
3654 case Iop_QDMulHi16Sx8:
3655 case Iop_QRDMulHi16Sx8:
3656 case Iop_PolynomialMulAdd16x8:
3657 return binary16Ix8(mce, vatom1, vatom2);
3659 case Iop_Sub32x4:
3660 case Iop_CmpGT32Sx4:
3661 case Iop_CmpGT32Ux4:
3662 case Iop_CmpEQ32x4:
3663 case Iop_QAdd32Sx4:
3664 case Iop_QAdd32Ux4:
3665 case Iop_QSub32Sx4:
3666 case Iop_QSub32Ux4:
3667 case Iop_QAddExtUSsatSS32x4:
3668 case Iop_QAddExtSUsatUU32x4:
3669 case Iop_QSal32x4:
3670 case Iop_QShl32x4:
3671 case Iop_Avg32Ux4:
3672 case Iop_Avg32Sx4:
3673 case Iop_Add32x4:
3674 case Iop_Max32Ux4:
3675 case Iop_Max32Sx4:
3676 case Iop_Min32Ux4:
3677 case Iop_Min32Sx4:
3678 case Iop_Mul32x4:
3679 case Iop_MulHi32Sx4:
3680 case Iop_MulHi32Ux4:
3681 case Iop_QDMulHi32Sx4:
3682 case Iop_QRDMulHi32Sx4:
3683 case Iop_PolynomialMulAdd32x4:
3684 return binary32Ix4(mce, vatom1, vatom2);
3686 case Iop_Sub64x2:
3687 case Iop_Add64x2:
3688 case Iop_Avg64Ux2:
3689 case Iop_Avg64Sx2:
3690 case Iop_Max64Sx2:
3691 case Iop_Max64Ux2:
3692 case Iop_Min64Sx2:
3693 case Iop_Min64Ux2:
3694 case Iop_CmpEQ64x2:
3695 case Iop_CmpGT64Sx2:
3696 case Iop_CmpGT64Ux2:
3697 case Iop_QSal64x2:
3698 case Iop_QShl64x2:
3699 case Iop_QAdd64Ux2:
3700 case Iop_QAdd64Sx2:
3701 case Iop_QSub64Ux2:
3702 case Iop_QSub64Sx2:
3703 case Iop_QAddExtUSsatSS64x2:
3704 case Iop_QAddExtSUsatUU64x2:
3705 case Iop_PolynomialMulAdd64x2:
3706 case Iop_CipherV128:
3707 case Iop_CipherLV128:
3708 case Iop_NCipherV128:
3709 case Iop_NCipherLV128:
3710 case Iop_MulI128by10E:
3711 case Iop_MulI128by10ECarry:
3712 return binary64Ix2(mce, vatom1, vatom2);
3714 case Iop_Add128x1:
3715 case Iop_Sub128x1:
3716 case Iop_CmpNEZ128x1:
3717 return binary128Ix1(mce, vatom1, vatom2);
3719 case Iop_QNarrowBin64Sto32Sx4:
3720 case Iop_QNarrowBin64Uto32Ux4:
3721 case Iop_QNarrowBin32Sto16Sx8:
3722 case Iop_QNarrowBin32Uto16Ux8:
3723 case Iop_QNarrowBin32Sto16Ux8:
3724 case Iop_QNarrowBin16Sto8Sx16:
3725 case Iop_QNarrowBin16Uto8Ux16:
3726 case Iop_QNarrowBin16Sto8Ux16:
3727 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
3729 case Iop_Min64Fx2:
3730 case Iop_Max64Fx2:
3731 case Iop_CmpLT64Fx2:
3732 case Iop_CmpLE64Fx2:
3733 case Iop_CmpEQ64Fx2:
3734 case Iop_CmpUN64Fx2:
3735 case Iop_RecipStep64Fx2:
3736 case Iop_RSqrtStep64Fx2:
3737 return binary64Fx2(mce, vatom1, vatom2);
3739 case Iop_Sub64F0x2:
3740 case Iop_Mul64F0x2:
3741 case Iop_Min64F0x2:
3742 case Iop_Max64F0x2:
3743 case Iop_Div64F0x2:
3744 case Iop_CmpLT64F0x2:
3745 case Iop_CmpLE64F0x2:
3746 case Iop_CmpEQ64F0x2:
3747 case Iop_CmpUN64F0x2:
3748 case Iop_Add64F0x2:
3749 return binary64F0x2(mce, vatom1, vatom2);
3751 case Iop_Min32Fx4:
3752 case Iop_Max32Fx4:
3753 case Iop_CmpLT32Fx4:
3754 case Iop_CmpLE32Fx4:
3755 case Iop_CmpEQ32Fx4:
3756 case Iop_CmpUN32Fx4:
3757 case Iop_CmpGT32Fx4:
3758 case Iop_CmpGE32Fx4:
3759 case Iop_RecipStep32Fx4:
3760 case Iop_RSqrtStep32Fx4:
3761 return binary32Fx4(mce, vatom1, vatom2);
3763 case Iop_Sub32Fx2:
3764 case Iop_Mul32Fx2:
3765 case Iop_Min32Fx2:
3766 case Iop_Max32Fx2:
3767 case Iop_CmpEQ32Fx2:
3768 case Iop_CmpGT32Fx2:
3769 case Iop_CmpGE32Fx2:
3770 case Iop_Add32Fx2:
3771 case Iop_RecipStep32Fx2:
3772 case Iop_RSqrtStep32Fx2:
3773 return binary32Fx2(mce, vatom1, vatom2);
3775 case Iop_Sub32F0x4:
3776 case Iop_Mul32F0x4:
3777 case Iop_Min32F0x4:
3778 case Iop_Max32F0x4:
3779 case Iop_Div32F0x4:
3780 case Iop_CmpLT32F0x4:
3781 case Iop_CmpLE32F0x4:
3782 case Iop_CmpEQ32F0x4:
3783 case Iop_CmpUN32F0x4:
3784 case Iop_Add32F0x4:
3785 return binary32F0x4(mce, vatom1, vatom2);
3787 case Iop_QShlNsatSU8x16:
3788 case Iop_QShlNsatUU8x16:
3789 case Iop_QShlNsatSS8x16:
3790 complainIfUndefined(mce, atom2, NULL);
3791 return mkPCast8x16(mce, vatom1);
3793 case Iop_QShlNsatSU16x8:
3794 case Iop_QShlNsatUU16x8:
3795 case Iop_QShlNsatSS16x8:
3796 complainIfUndefined(mce, atom2, NULL);
3797 return mkPCast16x8(mce, vatom1);
3799 case Iop_QShlNsatSU32x4:
3800 case Iop_QShlNsatUU32x4:
3801 case Iop_QShlNsatSS32x4:
3802 complainIfUndefined(mce, atom2, NULL);
3803 return mkPCast32x4(mce, vatom1);
3805 case Iop_QShlNsatSU64x2:
3806 case Iop_QShlNsatUU64x2:
3807 case Iop_QShlNsatSS64x2:
3808 complainIfUndefined(mce, atom2, NULL);
3809 return mkPCast32x4(mce, vatom1);
3811 /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128.
3812 To make this simpler, do the following:
3813 * complain if the shift amount (the I8) is undefined
3814 * pcast each lane at the wide width
3815 * truncate each lane to half width
3816 * pcast the resulting 64-bit value to a single bit and use
3817 that as the least significant bit of the upper half of the
3818 result. */
3819 case Iop_QandQShrNnarrow64Uto32Ux2:
3820 case Iop_QandQSarNnarrow64Sto32Sx2:
3821 case Iop_QandQSarNnarrow64Sto32Ux2:
3822 case Iop_QandQRShrNnarrow64Uto32Ux2:
3823 case Iop_QandQRSarNnarrow64Sto32Sx2:
3824 case Iop_QandQRSarNnarrow64Sto32Ux2:
3825 case Iop_QandQShrNnarrow32Uto16Ux4:
3826 case Iop_QandQSarNnarrow32Sto16Sx4:
3827 case Iop_QandQSarNnarrow32Sto16Ux4:
3828 case Iop_QandQRShrNnarrow32Uto16Ux4:
3829 case Iop_QandQRSarNnarrow32Sto16Sx4:
3830 case Iop_QandQRSarNnarrow32Sto16Ux4:
3831 case Iop_QandQShrNnarrow16Uto8Ux8:
3832 case Iop_QandQSarNnarrow16Sto8Sx8:
3833 case Iop_QandQSarNnarrow16Sto8Ux8:
3834 case Iop_QandQRShrNnarrow16Uto8Ux8:
3835 case Iop_QandQRSarNnarrow16Sto8Sx8:
3836 case Iop_QandQRSarNnarrow16Sto8Ux8:
3838 IRAtom* (*fnPessim) (MCEnv*, IRAtom*) = NULL;
3839 IROp opNarrow = Iop_INVALID;
3840 switch (op) {
3841 case Iop_QandQShrNnarrow64Uto32Ux2:
3842 case Iop_QandQSarNnarrow64Sto32Sx2:
3843 case Iop_QandQSarNnarrow64Sto32Ux2:
3844 case Iop_QandQRShrNnarrow64Uto32Ux2:
3845 case Iop_QandQRSarNnarrow64Sto32Sx2:
3846 case Iop_QandQRSarNnarrow64Sto32Ux2:
3847 fnPessim = mkPCast64x2;
3848 opNarrow = Iop_NarrowUn64to32x2;
3849 break;
3850 case Iop_QandQShrNnarrow32Uto16Ux4:
3851 case Iop_QandQSarNnarrow32Sto16Sx4:
3852 case Iop_QandQSarNnarrow32Sto16Ux4:
3853 case Iop_QandQRShrNnarrow32Uto16Ux4:
3854 case Iop_QandQRSarNnarrow32Sto16Sx4:
3855 case Iop_QandQRSarNnarrow32Sto16Ux4:
3856 fnPessim = mkPCast32x4;
3857 opNarrow = Iop_NarrowUn32to16x4;
3858 break;
3859 case Iop_QandQShrNnarrow16Uto8Ux8:
3860 case Iop_QandQSarNnarrow16Sto8Sx8:
3861 case Iop_QandQSarNnarrow16Sto8Ux8:
3862 case Iop_QandQRShrNnarrow16Uto8Ux8:
3863 case Iop_QandQRSarNnarrow16Sto8Sx8:
3864 case Iop_QandQRSarNnarrow16Sto8Ux8:
3865 fnPessim = mkPCast16x8;
3866 opNarrow = Iop_NarrowUn16to8x8;
3867 break;
3868 default:
3869 tl_assert(0);
3871 complainIfUndefined(mce, atom2, NULL);
3872 // Pessimised shift result
3873 IRAtom* shV
3874 = fnPessim(mce, vatom1);
3875 // Narrowed, pessimised shift result
3876 IRAtom* shVnarrowed
3877 = assignNew('V', mce, Ity_I64, unop(opNarrow, shV));
3878 // Generates: Def--(63)--Def PCast-to-I1(narrowed)
3879 IRAtom* qV = mkPCastXXtoXXlsb(mce, shVnarrowed, Ity_I64);
3880 // and assemble the result
3881 return assignNew('V', mce, Ity_V128,
3882 binop(Iop_64HLtoV128, qV, shVnarrowed));
3885 case Iop_Mull32Sx2:
3886 case Iop_Mull32Ux2:
3887 case Iop_QDMull32Sx2:
3888 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3889 mkUifU64(mce, vatom1, vatom2));
3891 case Iop_Mull16Sx4:
3892 case Iop_Mull16Ux4:
3893 case Iop_QDMull16Sx4:
3894 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3895 mkUifU64(mce, vatom1, vatom2));
3897 case Iop_Mull8Sx8:
3898 case Iop_Mull8Ux8:
3899 case Iop_PolynomialMull8x8:
3900 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3901 mkUifU64(mce, vatom1, vatom2));
3903 case Iop_PwAdd32x4:
3904 return mkPCast32x4(mce,
3905 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3906 mkPCast32x4(mce, vatom2))));
3908 case Iop_PwAdd16x8:
3909 return mkPCast16x8(mce,
3910 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3911 mkPCast16x8(mce, vatom2))));
3913 case Iop_PwAdd8x16:
3914 return mkPCast8x16(mce,
3915 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3916 mkPCast8x16(mce, vatom2))));
3918 /* V128-bit data-steering */
3919 case Iop_SetV128lo32:
3920 case Iop_SetV128lo64:
3921 case Iop_64HLtoV128:
3922 case Iop_InterleaveLO64x2:
3923 case Iop_InterleaveLO32x4:
3924 case Iop_InterleaveLO16x8:
3925 case Iop_InterleaveLO8x16:
3926 case Iop_InterleaveHI64x2:
3927 case Iop_InterleaveHI32x4:
3928 case Iop_InterleaveHI16x8:
3929 case Iop_InterleaveHI8x16:
3930 case Iop_CatOddLanes8x16:
3931 case Iop_CatOddLanes16x8:
3932 case Iop_CatOddLanes32x4:
3933 case Iop_CatEvenLanes8x16:
3934 case Iop_CatEvenLanes16x8:
3935 case Iop_CatEvenLanes32x4:
3936 case Iop_InterleaveOddLanes8x16:
3937 case Iop_InterleaveOddLanes16x8:
3938 case Iop_InterleaveOddLanes32x4:
3939 case Iop_InterleaveEvenLanes8x16:
3940 case Iop_InterleaveEvenLanes16x8:
3941 case Iop_InterleaveEvenLanes32x4:
3942 case Iop_PackOddLanes8x16:
3943 case Iop_PackOddLanes16x8:
3944 case Iop_PackOddLanes32x4:
3945 case Iop_PackEvenLanes8x16:
3946 case Iop_PackEvenLanes16x8:
3947 case Iop_PackEvenLanes32x4:
3948 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
3950 case Iop_GetElem8x16:
3951 complainIfUndefined(mce, atom2, NULL);
3952 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3953 case Iop_GetElem16x8:
3954 complainIfUndefined(mce, atom2, NULL);
3955 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3956 case Iop_GetElem32x4:
3957 complainIfUndefined(mce, atom2, NULL);
3958 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3959 case Iop_GetElem64x2:
3960 complainIfUndefined(mce, atom2, NULL);
3961 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3963 /* Perm8x16: rearrange values in left arg using steering values
3964 from right arg. So rearrange the vbits in the same way but
3965 pessimise wrt steering values. Perm32x4 ditto. */
3966 case Iop_Perm8x16:
3967 return mkUifUV128(
3968 mce,
3969 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3970 mkPCast8x16(mce, vatom2)
3972 case Iop_Perm32x4:
3973 return mkUifUV128(
3974 mce,
3975 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3976 mkPCast32x4(mce, vatom2)
3979 /* These two take the lower half of each 16-bit lane, sign/zero
3980 extend it to 32, and multiply together, producing a 32x4
3981 result (and implicitly ignoring half the operand bits). So
3982 treat it as a bunch of independent 16x8 operations, but then
3983 do 32-bit shifts left-right to copy the lower half results
3984 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3985 into the upper half of each result lane. */
3986 case Iop_MullEven16Ux8:
3987 case Iop_MullEven16Sx8: {
3988 IRAtom* at;
3989 at = binary16Ix8(mce,vatom1,vatom2);
3990 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3991 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
3992 return at;
3995 /* Same deal as Iop_MullEven16{S,U}x8 */
3996 case Iop_MullEven8Ux16:
3997 case Iop_MullEven8Sx16: {
3998 IRAtom* at;
3999 at = binary8Ix16(mce,vatom1,vatom2);
4000 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
4001 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
4002 return at;
4005 /* Same deal as Iop_MullEven16{S,U}x8 */
4006 case Iop_MullEven32Ux4:
4007 case Iop_MullEven32Sx4: {
4008 IRAtom* at;
4009 at = binary32Ix4(mce,vatom1,vatom2);
4010 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32)));
4011 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32)));
4012 return at;
4015 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
4016 32x4 -> 16x8 laneage, discarding the upper half of each lane.
4017 Simply apply same op to the V bits, since this really no more
4018 than a data steering operation. */
4019 case Iop_NarrowBin32to16x8:
4020 case Iop_NarrowBin16to8x16:
4021 case Iop_NarrowBin64to32x4:
4022 return assignNew('V', mce, Ity_V128,
4023 binop(op, vatom1, vatom2));
4025 case Iop_ShrV128:
4026 case Iop_SarV128:
4027 case Iop_ShlV128:
4028 case Iop_I128StoBCD128:
4029 /* Same scheme as with all other shifts. Note: 10 Nov 05:
4030 this is wrong now, scalar shifts are done properly lazily.
4031 Vector shifts should be fixed too. */
4032 complainIfUndefined(mce, atom2, NULL);
4033 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
4035 case Iop_BCDAdd:
4036 case Iop_BCDSub:
4037 return mkLazy2(mce, Ity_V128, vatom1, vatom2);
4039 /* SHA Iops */
4040 case Iop_SHA256:
4041 case Iop_SHA512:
4042 complainIfUndefined(mce, atom2, NULL);
4043 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
4045 /* I128-bit data-steering */
4046 case Iop_64HLto128:
4047 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
4049 /* V256-bit SIMD */
4051 case Iop_Max64Fx4:
4052 case Iop_Min64Fx4:
4053 return binary64Fx4(mce, vatom1, vatom2);
4055 case Iop_Max32Fx8:
4056 case Iop_Min32Fx8:
4057 return binary32Fx8(mce, vatom1, vatom2);
4059 /* V256-bit data-steering */
4060 case Iop_V128HLtoV256:
4061 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
4063 /* Scalar floating point */
4065 case Iop_F32toI64S:
4066 case Iop_F32toI64U:
4067 /* I32(rm) x F32 -> I64 */
4068 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4070 case Iop_I64StoF32:
4071 /* I32(rm) x I64 -> F32 */
4072 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4074 case Iop_RoundF64toInt:
4075 case Iop_RoundF64toF32:
4076 case Iop_F64toI64S:
4077 case Iop_F64toI64U:
4078 case Iop_I64StoF64:
4079 case Iop_I64UtoF64:
4080 case Iop_SinF64:
4081 case Iop_CosF64:
4082 case Iop_TanF64:
4083 case Iop_2xm1F64:
4084 case Iop_SqrtF64:
4085 case Iop_RecpExpF64:
4086 /* I32(rm) x I64/F64 -> I64/F64 */
4087 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4089 case Iop_ShlD64:
4090 case Iop_ShrD64:
4091 case Iop_RoundD64toInt:
4092 /* I32(rm) x D64 -> D64 */
4093 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4095 case Iop_ShlD128:
4096 case Iop_ShrD128:
4097 case Iop_RoundD128toInt:
4098 /* I32(rm) x D128 -> D128 */
4099 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4101 case Iop_RoundF128toInt:
4102 /* I32(rm) x F128 -> F128 */
4103 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4105 case Iop_D64toI64S:
4106 case Iop_D64toI64U:
4107 case Iop_I64StoD64:
4108 case Iop_I64UtoD64:
4109 /* I32(rm) x I64/D64 -> D64/I64 */
4110 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4112 case Iop_F32toD32:
4113 case Iop_F64toD32:
4114 case Iop_F128toD32:
4115 case Iop_D32toF32:
4116 case Iop_D64toF32:
4117 case Iop_D128toF32:
4118 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
4119 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4121 case Iop_F32toD64:
4122 case Iop_F64toD64:
4123 case Iop_F128toD64:
4124 case Iop_D32toF64:
4125 case Iop_D64toF64:
4126 case Iop_D128toF64:
4127 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
4128 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4130 case Iop_F32toD128:
4131 case Iop_F64toD128:
4132 case Iop_F128toD128:
4133 case Iop_D32toF128:
4134 case Iop_D64toF128:
4135 case Iop_D128toF128:
4136 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
4137 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4139 case Iop_RoundF32toInt:
4140 case Iop_SqrtF32:
4141 case Iop_RecpExpF32:
4142 /* I32(rm) x I32/F32 -> I32/F32 */
4143 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4145 case Iop_SqrtF128:
4146 /* I32(rm) x F128 -> F128 */
4147 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4149 case Iop_I32StoF32:
4150 case Iop_I32UtoF32:
4151 case Iop_F32toI32S:
4152 case Iop_F32toI32U:
4153 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
4154 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4156 case Iop_F64toF16:
4157 case Iop_F32toF16:
4158 /* First arg is I32 (rounding mode), second is F64/F32 (data). */
4159 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
4161 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
4162 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
4163 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
4164 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */
4165 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
4166 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4168 case Iop_F128toI128S: /* IRRoundingMode(I32) x F128 -> signed I128 */
4169 case Iop_RndF128: /* IRRoundingMode(I32) x F128 -> F128 */
4170 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4172 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
4173 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
4174 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
4175 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */
4176 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */
4177 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
4178 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4180 case Iop_F64HLtoF128:
4181 case Iop_D64HLtoD128:
4182 return assignNew('V', mce, Ity_I128,
4183 binop(Iop_64HLto128, vatom1, vatom2));
4185 case Iop_F64toI32U:
4186 case Iop_F64toI32S:
4187 case Iop_F64toF32:
4188 case Iop_I64UtoF32:
4189 case Iop_D64toI32U:
4190 case Iop_D64toI32S:
4191 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
4192 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4194 case Iop_D64toD32:
4195 /* First arg is I32 (rounding mode), second is D64 (data). */
4196 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4198 case Iop_F64toI16S:
4199 /* First arg is I32 (rounding mode), second is F64 (data). */
4200 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
4202 case Iop_InsertExpD64:
4203 /* I64 x I64 -> D64 */
4204 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4206 case Iop_InsertExpD128:
4207 /* I64 x I128 -> D128 */
4208 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4210 case Iop_CmpF32:
4211 case Iop_CmpF64:
4212 case Iop_CmpF128:
4213 case Iop_CmpD64:
4214 case Iop_CmpD128:
4215 case Iop_CmpExpD64:
4216 case Iop_CmpExpD128:
4217 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4219 case Iop_MaxNumF32:
4220 case Iop_MinNumF32:
4221 /* F32 x F32 -> F32 */
4222 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4224 case Iop_MaxNumF64:
4225 case Iop_MinNumF64:
4226 /* F64 x F64 -> F64 */
4227 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4229 /* non-FP after here */
4231 case Iop_DivModU64to32:
4232 case Iop_DivModS64to32:
4233 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4235 case Iop_DivModU128to64:
4236 case Iop_DivModS128to64:
4237 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4239 case Iop_8HLto16:
4240 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
4241 case Iop_16HLto32:
4242 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
4243 case Iop_32HLto64:
4244 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
4246 case Iop_DivModU64to64:
4247 case Iop_DivModS64to64: {
4248 IRAtom* vTmp64 = mkLazy2(mce, Ity_I64, vatom1, vatom2);
4249 return assignNew('V', mce, Ity_I128,
4250 binop(Iop_64HLto128, vTmp64, vTmp64));
4253 case Iop_MullS64:
4254 case Iop_MullU64: {
4255 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
4256 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
4257 return assignNew('V', mce, Ity_I128,
4258 binop(Iop_64HLto128, vHi64, vLo64));
4261 case Iop_DivModU32to32:
4262 case Iop_DivModS32to32: {
4263 IRAtom* vTmp32 = mkLazy2(mce, Ity_I32, vatom1, vatom2);
4264 return assignNew('V', mce, Ity_I64,
4265 binop(Iop_32HLto64, vTmp32, vTmp32));
4268 case Iop_MullS32:
4269 case Iop_MullU32: {
4270 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
4271 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
4272 return assignNew('V', mce, Ity_I64,
4273 binop(Iop_32HLto64, vHi32, vLo32));
4276 case Iop_MullS16:
4277 case Iop_MullU16: {
4278 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
4279 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
4280 return assignNew('V', mce, Ity_I32,
4281 binop(Iop_16HLto32, vHi16, vLo16));
4284 case Iop_MullS8:
4285 case Iop_MullU8: {
4286 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
4287 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
4288 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
4291 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
4292 case Iop_DivS32:
4293 case Iop_DivU32:
4294 case Iop_DivU32E:
4295 case Iop_DivS32E:
4296 case Iop_QAdd32S: /* could probably do better */
4297 case Iop_QSub32S: /* could probably do better */
4298 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4300 case Iop_DivS64:
4301 case Iop_DivU64:
4302 case Iop_DivS64E:
4303 case Iop_DivU64E:
4304 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4306 case Iop_Add32:
4307 if (mce->dlbo.dl_Add32 == DLexpensive
4308 || (mce->dlbo.dl_Add32 == DLauto && hu == HuOth)) {
4309 return expensiveAddSub(mce,True,Ity_I32,
4310 vatom1,vatom2, atom1,atom2);
4311 } else {
4312 goto cheap_AddSub32;
4314 case Iop_Sub32:
4315 if (mce->dlbo.dl_Sub32 == DLexpensive
4316 || (mce->dlbo.dl_Sub32 == DLauto && hu == HuOth)) {
4317 return expensiveAddSub(mce,False,Ity_I32,
4318 vatom1,vatom2, atom1,atom2);
4319 } else {
4320 goto cheap_AddSub32;
4323 cheap_AddSub32:
4324 case Iop_Mul32:
4325 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
4327 case Iop_CmpORD32S:
4328 case Iop_CmpORD32U:
4329 case Iop_CmpORD64S:
4330 case Iop_CmpORD64U:
4331 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
4333 case Iop_Add64:
4334 if (mce->dlbo.dl_Add64 == DLexpensive
4335 || (mce->dlbo.dl_Add64 == DLauto && hu == HuOth)) {
4336 return expensiveAddSub(mce,True,Ity_I64,
4337 vatom1,vatom2, atom1,atom2);
4338 } else {
4339 goto cheap_AddSub64;
4341 case Iop_Sub64:
4342 if (mce->dlbo.dl_Sub64 == DLexpensive
4343 || (mce->dlbo.dl_Sub64 == DLauto && hu == HuOth)) {
4344 return expensiveAddSub(mce,False,Ity_I64,
4345 vatom1,vatom2, atom1,atom2);
4346 } else {
4347 goto cheap_AddSub64;
4350 cheap_AddSub64:
4351 case Iop_Mul64:
4352 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
4354 case Iop_Mul16:
4355 case Iop_Add16:
4356 case Iop_Sub16:
4357 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
4359 case Iop_Mul8:
4360 case Iop_Sub8:
4361 case Iop_Add8:
4362 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
4364 ////---- CmpXX64
4365 case Iop_CmpEQ64: case Iop_CmpNE64:
4366 if (mce->dlbo.dl_CmpEQ64_CmpNE64 == DLexpensive)
4367 goto expensive_cmp64;
4368 else
4369 goto cheap_cmp64;
4371 expensive_cmp64:
4372 case Iop_ExpCmpNE64:
4373 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
4375 cheap_cmp64:
4376 case Iop_CmpLE64S: case Iop_CmpLE64U:
4377 case Iop_CmpLT64U: case Iop_CmpLT64S:
4378 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
4380 ////---- CmpXX32
4381 case Iop_CmpEQ32: case Iop_CmpNE32:
4382 if (mce->dlbo.dl_CmpEQ32_CmpNE32 == DLexpensive)
4383 goto expensive_cmp32;
4384 else
4385 goto cheap_cmp32;
4387 expensive_cmp32:
4388 case Iop_ExpCmpNE32:
4389 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
4391 cheap_cmp32:
4392 case Iop_CmpLE32S: case Iop_CmpLE32U:
4393 case Iop_CmpLT32U: case Iop_CmpLT32S:
4394 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
4396 ////---- CmpXX16
4397 case Iop_CmpEQ16: case Iop_CmpNE16:
4398 if (mce->dlbo.dl_CmpEQ16_CmpNE16 == DLexpensive)
4399 goto expensive_cmp16;
4400 else
4401 goto cheap_cmp16;
4403 expensive_cmp16:
4404 case Iop_ExpCmpNE16:
4405 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
4407 cheap_cmp16:
4408 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
4410 ////---- CmpXX8
4411 case Iop_CmpEQ8: case Iop_CmpNE8:
4412 if (mce->dlbo.dl_CmpEQ8_CmpNE8 == DLexpensive)
4413 goto expensive_cmp8;
4414 else
4415 goto cheap_cmp8;
4417 expensive_cmp8:
4418 return expensiveCmpEQorNE(mce,Ity_I8, vatom1,vatom2, atom1,atom2 );
4420 cheap_cmp8:
4421 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
4423 ////---- end CmpXX{64,32,16,8}
4425 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
4426 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
4427 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
4428 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
4429 /* Just say these all produce a defined result, regardless
4430 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
4431 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
4433 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
4434 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
4436 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
4437 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
4439 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
4440 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
4442 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
4443 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
4445 case Iop_AndV256:
4446 uifu = mkUifUV256; difd = mkDifDV256;
4447 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
4448 case Iop_AndV128:
4449 uifu = mkUifUV128; difd = mkDifDV128;
4450 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
4451 case Iop_And64:
4452 uifu = mkUifU64; difd = mkDifD64;
4453 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
4454 case Iop_And32:
4455 uifu = mkUifU32; difd = mkDifD32;
4456 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
4457 case Iop_And16:
4458 uifu = mkUifU16; difd = mkDifD16;
4459 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
4460 case Iop_And8:
4461 uifu = mkUifU8; difd = mkDifD8;
4462 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
4464 case Iop_OrV256:
4465 uifu = mkUifUV256; difd = mkDifDV256;
4466 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
4467 case Iop_OrV128:
4468 uifu = mkUifUV128; difd = mkDifDV128;
4469 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
4470 case Iop_Or64:
4471 uifu = mkUifU64; difd = mkDifD64;
4472 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
4473 case Iop_Or32:
4474 uifu = mkUifU32; difd = mkDifD32;
4475 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
4476 case Iop_Or16:
4477 uifu = mkUifU16; difd = mkDifD16;
4478 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
4479 case Iop_Or8:
4480 uifu = mkUifU8; difd = mkDifD8;
4481 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
4483 do_And_Or:
4484 return
4485 assignNew(
4486 'V', mce,
4487 and_or_ty,
4488 difd(mce, uifu(mce, vatom1, vatom2),
4489 difd(mce, improve(mce, atom1, vatom1),
4490 improve(mce, atom2, vatom2) ) ) );
4492 case Iop_Xor8:
4493 return mkUifU8(mce, vatom1, vatom2);
4494 case Iop_Xor16:
4495 return mkUifU16(mce, vatom1, vatom2);
4496 case Iop_Xor32:
4497 return mkUifU32(mce, vatom1, vatom2);
4498 case Iop_Xor64:
4499 return mkUifU64(mce, vatom1, vatom2);
4500 case Iop_XorV128:
4501 return mkUifUV128(mce, vatom1, vatom2);
4502 case Iop_XorV256:
4503 return mkUifUV256(mce, vatom1, vatom2);
4505 /* V256-bit SIMD */
4507 case Iop_ShrN16x16:
4508 case Iop_ShrN32x8:
4509 case Iop_ShrN64x4:
4510 case Iop_SarN16x16:
4511 case Iop_SarN32x8:
4512 case Iop_ShlN16x16:
4513 case Iop_ShlN32x8:
4514 case Iop_ShlN64x4:
4515 /* Same scheme as with all other shifts. Note: 22 Oct 05:
4516 this is wrong now, scalar shifts are done properly lazily.
4517 Vector shifts should be fixed too. */
4518 complainIfUndefined(mce, atom2, NULL);
4519 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
4521 case Iop_QSub8Ux32:
4522 case Iop_QSub8Sx32:
4523 case Iop_Sub8x32:
4524 case Iop_Min8Ux32:
4525 case Iop_Min8Sx32:
4526 case Iop_Max8Ux32:
4527 case Iop_Max8Sx32:
4528 case Iop_CmpGT8Sx32:
4529 case Iop_CmpEQ8x32:
4530 case Iop_Avg8Ux32:
4531 case Iop_QAdd8Ux32:
4532 case Iop_QAdd8Sx32:
4533 case Iop_Add8x32:
4534 return binary8Ix32(mce, vatom1, vatom2);
4536 case Iop_QSub16Ux16:
4537 case Iop_QSub16Sx16:
4538 case Iop_Sub16x16:
4539 case Iop_Mul16x16:
4540 case Iop_MulHi16Sx16:
4541 case Iop_MulHi16Ux16:
4542 case Iop_Min16Sx16:
4543 case Iop_Min16Ux16:
4544 case Iop_Max16Sx16:
4545 case Iop_Max16Ux16:
4546 case Iop_CmpGT16Sx16:
4547 case Iop_CmpEQ16x16:
4548 case Iop_Avg16Ux16:
4549 case Iop_QAdd16Ux16:
4550 case Iop_QAdd16Sx16:
4551 case Iop_Add16x16:
4552 return binary16Ix16(mce, vatom1, vatom2);
4554 case Iop_Sub32x8:
4555 case Iop_CmpGT32Sx8:
4556 case Iop_CmpEQ32x8:
4557 case Iop_Add32x8:
4558 case Iop_Max32Ux8:
4559 case Iop_Max32Sx8:
4560 case Iop_Min32Ux8:
4561 case Iop_Min32Sx8:
4562 case Iop_Mul32x8:
4563 return binary32Ix8(mce, vatom1, vatom2);
4565 case Iop_Sub64x4:
4566 case Iop_Add64x4:
4567 case Iop_CmpEQ64x4:
4568 case Iop_CmpGT64Sx4:
4569 return binary64Ix4(mce, vatom1, vatom2);
4571 /* Perm32x8: rearrange values in left arg using steering values
4572 from right arg. So rearrange the vbits in the same way but
4573 pessimise wrt steering values. */
4574 case Iop_Perm32x8:
4575 return mkUifUV256(
4576 mce,
4577 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
4578 mkPCast32x8(mce, vatom2)
4581 /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256.
4582 Handle the shifted results in the same way that other
4583 binary Q ops are handled, eg QSub: UifU the two args,
4584 then pessimise -- which is binaryNIxM. But for the upper
4585 V128, we require to generate just 1 bit which is the
4586 pessimised shift result, with 127 defined zeroes above it.
4588 Note that this overly pessimistic in that in fact only the
4589 bottom 8 bits of each lane of the second arg determine the shift
4590 amount. Really we ought to ignore any undefinedness in the
4591 rest of the lanes of the second arg. */
4592 case Iop_QandSQsh64x2: case Iop_QandUQsh64x2:
4593 case Iop_QandSQRsh64x2: case Iop_QandUQRsh64x2:
4594 case Iop_QandSQsh32x4: case Iop_QandUQsh32x4:
4595 case Iop_QandSQRsh32x4: case Iop_QandUQRsh32x4:
4596 case Iop_QandSQsh16x8: case Iop_QandUQsh16x8:
4597 case Iop_QandSQRsh16x8: case Iop_QandUQRsh16x8:
4598 case Iop_QandSQsh8x16: case Iop_QandUQsh8x16:
4599 case Iop_QandSQRsh8x16: case Iop_QandUQRsh8x16:
4601 // The function to generate the pessimised shift result
4602 IRAtom* (*binaryNIxM)(MCEnv*,IRAtom*,IRAtom*) = NULL;
4603 switch (op) {
4604 case Iop_QandSQsh64x2:
4605 case Iop_QandUQsh64x2:
4606 case Iop_QandSQRsh64x2:
4607 case Iop_QandUQRsh64x2:
4608 binaryNIxM = binary64Ix2;
4609 break;
4610 case Iop_QandSQsh32x4:
4611 case Iop_QandUQsh32x4:
4612 case Iop_QandSQRsh32x4:
4613 case Iop_QandUQRsh32x4:
4614 binaryNIxM = binary32Ix4;
4615 break;
4616 case Iop_QandSQsh16x8:
4617 case Iop_QandUQsh16x8:
4618 case Iop_QandSQRsh16x8:
4619 case Iop_QandUQRsh16x8:
4620 binaryNIxM = binary16Ix8;
4621 break;
4622 case Iop_QandSQsh8x16:
4623 case Iop_QandUQsh8x16:
4624 case Iop_QandSQRsh8x16:
4625 case Iop_QandUQRsh8x16:
4626 binaryNIxM = binary8Ix16;
4627 break;
4628 default:
4629 tl_assert(0);
4631 tl_assert(binaryNIxM);
4632 // Pessimised shift result, shV[127:0]
4633 IRAtom* shV = binaryNIxM(mce, vatom1, vatom2);
4634 // Generates: Def--(127)--Def PCast-to-I1(shV)
4635 IRAtom* qV = mkPCastXXtoXXlsb(mce, shV, Ity_V128);
4636 // and assemble the result
4637 return assignNew('V', mce, Ity_V256,
4638 binop(Iop_V128HLtoV256, qV, shV));
4641 default:
4642 ppIROp(op);
4643 VG_(tool_panic)("memcheck:expr2vbits_Binop");
4648 static
4649 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
4651 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
4652 selection of shadow operation implicitly duplicates the logic in
4653 do_shadow_LoadG and should be kept in sync (in the very unlikely
4654 event that the interpretation of such widening ops changes in
4655 future). See comment in do_shadow_LoadG. */
4656 IRAtom* vatom = expr2vbits( mce, atom, HuOth );
4657 tl_assert(isOriginalAtom(mce,atom));
4658 switch (op) {
4660 case Iop_Abs64Fx2:
4661 case Iop_Neg64Fx2:
4662 case Iop_RSqrtEst64Fx2:
4663 case Iop_RecipEst64Fx2:
4664 case Iop_Log2_64Fx2:
4665 return unary64Fx2(mce, vatom);
4667 case Iop_Sqrt64F0x2:
4668 return unary64F0x2(mce, vatom);
4670 case Iop_Sqrt32Fx8:
4671 case Iop_RSqrtEst32Fx8:
4672 case Iop_RecipEst32Fx8:
4673 return unary32Fx8(mce, vatom);
4675 case Iop_Sqrt64Fx4:
4676 return unary64Fx4(mce, vatom);
4678 case Iop_RecipEst32Fx4:
4679 case Iop_I32UtoFx4:
4680 case Iop_I32StoFx4:
4681 case Iop_QFtoI32Ux4_RZ:
4682 case Iop_QFtoI32Sx4_RZ:
4683 case Iop_RoundF32x4_RM:
4684 case Iop_RoundF32x4_RP:
4685 case Iop_RoundF32x4_RN:
4686 case Iop_RoundF32x4_RZ:
4687 case Iop_RecipEst32Ux4:
4688 case Iop_Abs32Fx4:
4689 case Iop_Neg32Fx4:
4690 case Iop_RSqrtEst32Fx4:
4691 case Iop_Log2_32Fx4:
4692 return unary32Fx4(mce, vatom);
4694 case Iop_I32UtoFx2:
4695 case Iop_I32StoFx2:
4696 case Iop_RecipEst32Fx2:
4697 case Iop_RecipEst32Ux2:
4698 case Iop_Abs32Fx2:
4699 case Iop_Neg32Fx2:
4700 case Iop_RSqrtEst32Fx2:
4701 return unary32Fx2(mce, vatom);
4703 case Iop_Sqrt32F0x4:
4704 case Iop_RSqrtEst32F0x4:
4705 case Iop_RecipEst32F0x4:
4706 return unary32F0x4(mce, vatom);
4708 case Iop_32UtoV128:
4709 case Iop_64UtoV128:
4710 case Iop_Dup8x16:
4711 case Iop_Dup16x8:
4712 case Iop_Dup32x4:
4713 case Iop_Reverse1sIn8_x16:
4714 case Iop_Reverse8sIn16_x8:
4715 case Iop_Reverse8sIn32_x4:
4716 case Iop_Reverse16sIn32_x4:
4717 case Iop_Reverse8sIn64_x2:
4718 case Iop_Reverse16sIn64_x2:
4719 case Iop_Reverse32sIn64_x2:
4720 case Iop_V256toV128_1: case Iop_V256toV128_0:
4721 case Iop_ZeroHI64ofV128:
4722 case Iop_ZeroHI96ofV128:
4723 case Iop_ZeroHI112ofV128:
4724 case Iop_ZeroHI120ofV128:
4725 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4727 case Iop_F128HItoF64: /* F128 -> high half of F128 */
4728 case Iop_D128HItoD64: /* D128 -> high half of D128 */
4729 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
4730 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
4731 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
4732 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
4734 case Iop_NegF128:
4735 case Iop_AbsF128:
4736 case Iop_RndF128:
4737 case Iop_TruncF128toI64S: /* F128 -> I64S */
4738 case Iop_TruncF128toI32S: /* F128 -> I32S (result stored in 64-bits) */
4739 case Iop_TruncF128toI64U: /* F128 -> I64U */
4740 case Iop_TruncF128toI32U: /* F128 -> I32U (result stored in 64-bits) */
4741 return mkPCastTo(mce, Ity_I128, vatom);
4743 case Iop_BCD128toI128S:
4744 case Iop_MulI128by10:
4745 case Iop_MulI128by10Carry:
4746 case Iop_F16toF64x2:
4747 case Iop_F64toF16x2:
4748 return vatom;
4750 case Iop_I32StoF128: /* signed I32 -> F128 */
4751 case Iop_I64StoF128: /* signed I64 -> F128 */
4752 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
4753 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
4754 case Iop_F32toF128: /* F32 -> F128 */
4755 case Iop_F64toF128: /* F64 -> F128 */
4756 case Iop_I32StoD128: /* signed I64 -> D128 */
4757 case Iop_I64StoD128: /* signed I64 -> D128 */
4758 case Iop_I32UtoD128: /* unsigned I32 -> D128 */
4759 case Iop_I64UtoD128: /* unsigned I64 -> D128 */
4760 return mkPCastTo(mce, Ity_I128, vatom);
4762 case Iop_F16toF64:
4763 case Iop_F32toF64:
4764 case Iop_I32StoF64:
4765 case Iop_I32UtoF64:
4766 case Iop_NegF64:
4767 case Iop_AbsF64:
4768 case Iop_RSqrtEst5GoodF64:
4769 case Iop_RoundF64toF64_NEAREST:
4770 case Iop_RoundF64toF64_NegINF:
4771 case Iop_RoundF64toF64_PosINF:
4772 case Iop_RoundF64toF64_ZERO:
4773 case Iop_Clz64:
4774 case Iop_D32toD64:
4775 case Iop_I32StoD64:
4776 case Iop_I32UtoD64:
4777 case Iop_ExtractExpD64: /* D64 -> I64 */
4778 case Iop_ExtractExpD128: /* D128 -> I64 */
4779 case Iop_ExtractSigD64: /* D64 -> I64 */
4780 case Iop_ExtractSigD128: /* D128 -> I64 */
4781 case Iop_DPBtoBCD:
4782 case Iop_BCDtoDPB:
4783 return mkPCastTo(mce, Ity_I64, vatom);
4785 case Iop_D64toD128:
4786 return mkPCastTo(mce, Ity_I128, vatom);
4788 case Iop_Clz32:
4789 case Iop_TruncF64asF32:
4790 case Iop_NegF32:
4791 case Iop_AbsF32:
4792 case Iop_F16toF32:
4793 return mkPCastTo(mce, Ity_I32, vatom);
4795 case Iop_Ctz32:
4796 case Iop_Ctz64:
4797 return expensiveCountTrailingZeroes(mce, op, atom, vatom);
4799 case Iop_1Uto64:
4800 case Iop_1Sto64:
4801 case Iop_8Uto64:
4802 case Iop_8Sto64:
4803 case Iop_16Uto64:
4804 case Iop_16Sto64:
4805 case Iop_32Sto64:
4806 case Iop_32Uto64:
4807 case Iop_V128to64:
4808 case Iop_V128HIto64:
4809 case Iop_128HIto64:
4810 case Iop_128to64:
4811 case Iop_Dup8x8:
4812 case Iop_Dup16x4:
4813 case Iop_Dup32x2:
4814 case Iop_Reverse8sIn16_x4:
4815 case Iop_Reverse8sIn32_x2:
4816 case Iop_Reverse16sIn32_x2:
4817 case Iop_Reverse8sIn64_x1:
4818 case Iop_Reverse16sIn64_x1:
4819 case Iop_Reverse32sIn64_x1:
4820 case Iop_V256to64_0: case Iop_V256to64_1:
4821 case Iop_V256to64_2: case Iop_V256to64_3:
4822 return assignNew('V', mce, Ity_I64, unop(op, vatom));
4824 case Iop_64to32:
4825 case Iop_64HIto32:
4826 case Iop_1Uto32:
4827 case Iop_1Sto32:
4828 case Iop_8Uto32:
4829 case Iop_16Uto32:
4830 case Iop_16Sto32:
4831 case Iop_8Sto32:
4832 case Iop_V128to32:
4833 return assignNew('V', mce, Ity_I32, unop(op, vatom));
4835 case Iop_8Sto16:
4836 case Iop_8Uto16:
4837 case Iop_32to16:
4838 case Iop_32HIto16:
4839 case Iop_64to16:
4840 case Iop_GetMSBs8x16:
4841 return assignNew('V', mce, Ity_I16, unop(op, vatom));
4843 case Iop_1Uto8:
4844 case Iop_1Sto8:
4845 case Iop_16to8:
4846 case Iop_16HIto8:
4847 case Iop_32to8:
4848 case Iop_64to8:
4849 case Iop_GetMSBs8x8:
4850 return assignNew('V', mce, Ity_I8, unop(op, vatom));
4852 case Iop_32to1:
4853 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
4855 case Iop_64to1:
4856 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
4858 case Iop_ReinterpF64asI64:
4859 case Iop_ReinterpI64asF64:
4860 case Iop_ReinterpI32asF32:
4861 case Iop_ReinterpF32asI32:
4862 case Iop_ReinterpI64asD64:
4863 case Iop_ReinterpD64asI64:
4864 case Iop_NotV256:
4865 case Iop_NotV128:
4866 case Iop_Not64:
4867 case Iop_Not32:
4868 case Iop_Not16:
4869 case Iop_Not8:
4870 case Iop_Not1:
4871 return vatom;
4873 case Iop_CmpNEZ8x8:
4874 case Iop_Cnt8x8:
4875 case Iop_Clz8x8:
4876 case Iop_Cls8x8:
4877 case Iop_Abs8x8:
4878 return mkPCast8x8(mce, vatom);
4880 case Iop_CmpNEZ8x16:
4881 case Iop_Cnt8x16:
4882 case Iop_Clz8x16:
4883 case Iop_Cls8x16:
4884 case Iop_Abs8x16:
4885 case Iop_Ctz8x16:
4886 return mkPCast8x16(mce, vatom);
4888 case Iop_CmpNEZ16x4:
4889 case Iop_Clz16x4:
4890 case Iop_Cls16x4:
4891 case Iop_Abs16x4:
4892 return mkPCast16x4(mce, vatom);
4894 case Iop_CmpNEZ16x8:
4895 case Iop_Clz16x8:
4896 case Iop_Cls16x8:
4897 case Iop_Abs16x8:
4898 case Iop_Ctz16x8:
4899 return mkPCast16x8(mce, vatom);
4901 case Iop_CmpNEZ32x2:
4902 case Iop_Clz32x2:
4903 case Iop_Cls32x2:
4904 case Iop_FtoI32Ux2_RZ:
4905 case Iop_FtoI32Sx2_RZ:
4906 case Iop_Abs32x2:
4907 return mkPCast32x2(mce, vatom);
4909 case Iop_CmpNEZ32x4:
4910 case Iop_Clz32x4:
4911 case Iop_Cls32x4:
4912 case Iop_FtoI32Ux4_RZ:
4913 case Iop_FtoI32Sx4_RZ:
4914 case Iop_Abs32x4:
4915 case Iop_RSqrtEst32Ux4:
4916 case Iop_Ctz32x4:
4917 return mkPCast32x4(mce, vatom);
4919 case Iop_CmpwNEZ32:
4920 return mkPCastTo(mce, Ity_I32, vatom);
4922 case Iop_CmpwNEZ64:
4923 return mkPCastTo(mce, Ity_I64, vatom);
4925 case Iop_CmpNEZ64x2:
4926 case Iop_CipherSV128:
4927 case Iop_Clz64x2:
4928 case Iop_Abs64x2:
4929 case Iop_Ctz64x2:
4930 return mkPCast64x2(mce, vatom);
4932 case Iop_PwBitMtxXpose64x2:
4933 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4935 case Iop_NarrowUn16to8x8:
4936 case Iop_NarrowUn32to16x4:
4937 case Iop_NarrowUn64to32x2:
4938 case Iop_QNarrowUn16Sto8Sx8:
4939 case Iop_QNarrowUn16Sto8Ux8:
4940 case Iop_QNarrowUn16Uto8Ux8:
4941 case Iop_QNarrowUn32Sto16Sx4:
4942 case Iop_QNarrowUn32Sto16Ux4:
4943 case Iop_QNarrowUn32Uto16Ux4:
4944 case Iop_QNarrowUn64Sto32Sx2:
4945 case Iop_QNarrowUn64Sto32Ux2:
4946 case Iop_QNarrowUn64Uto32Ux2:
4947 case Iop_F32toF16x4:
4948 return vectorNarrowUnV128(mce, op, vatom);
4950 case Iop_Widen8Sto16x8:
4951 case Iop_Widen8Uto16x8:
4952 case Iop_Widen16Sto32x4:
4953 case Iop_Widen16Uto32x4:
4954 case Iop_Widen32Sto64x2:
4955 case Iop_Widen32Uto64x2:
4956 case Iop_F16toF32x4:
4957 return vectorWidenI64(mce, op, vatom);
4959 case Iop_PwAddL32Ux2:
4960 case Iop_PwAddL32Sx2:
4961 return mkPCastTo(mce, Ity_I64,
4962 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
4964 case Iop_PwAddL16Ux4:
4965 case Iop_PwAddL16Sx4:
4966 return mkPCast32x2(mce,
4967 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
4969 case Iop_PwAddL8Ux8:
4970 case Iop_PwAddL8Sx8:
4971 return mkPCast16x4(mce,
4972 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
4974 case Iop_PwAddL32Ux4:
4975 case Iop_PwAddL32Sx4:
4976 return mkPCast64x2(mce,
4977 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
4979 case Iop_PwAddL64Ux2:
4980 return mkPCast128x1(mce,
4981 assignNew('V', mce, Ity_V128, unop(op, mkPCast64x2(mce, vatom))));
4983 case Iop_PwAddL16Ux8:
4984 case Iop_PwAddL16Sx8:
4985 return mkPCast32x4(mce,
4986 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
4988 case Iop_PwAddL8Ux16:
4989 case Iop_PwAddL8Sx16:
4990 return mkPCast16x8(mce,
4991 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
4993 case Iop_I64UtoF32:
4994 default:
4995 ppIROp(op);
4996 VG_(tool_panic)("memcheck:expr2vbits_Unop");
5001 /* Worker function -- do not call directly. See comments on
5002 expr2vbits_Load for the meaning of |guard|.
5004 Generates IR to (1) perform a definedness test of |addr|, (2)
5005 perform a validity test of |addr|, and (3) return the Vbits for the
5006 location indicated by |addr|. All of this only happens when
5007 |guard| is NULL or |guard| evaluates to True at run time.
5009 If |guard| evaluates to False at run time, the returned value is
5010 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
5011 performed.
5013 The definedness of |guard| itself is not checked. That is assumed
5014 to have been done before this point, by the caller. */
5015 static
5016 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
5017 IREndness end, IRType ty,
5018 IRAtom* addr, UInt bias, IRAtom* guard )
5020 tl_assert(isOriginalAtom(mce,addr));
5021 tl_assert(end == Iend_LE || end == Iend_BE);
5023 /* First, emit a definedness test for the address. This also sets
5024 the address (shadow) to 'defined' following the test. */
5025 complainIfUndefined( mce, addr, guard );
5027 /* Now cook up a call to the relevant helper function, to read the
5028 data V bits from shadow memory. */
5029 ty = shadowTypeV(ty);
5031 void* helper = NULL;
5032 const HChar* hname = NULL;
5033 Bool ret_via_outparam = False;
5035 if (end == Iend_LE) {
5036 switch (ty) {
5037 case Ity_V256: helper = &MC_(helperc_LOADV256le);
5038 hname = "MC_(helperc_LOADV256le)";
5039 ret_via_outparam = True;
5040 break;
5041 case Ity_V128: helper = &MC_(helperc_LOADV128le);
5042 hname = "MC_(helperc_LOADV128le)";
5043 ret_via_outparam = True;
5044 break;
5045 case Ity_I64: helper = &MC_(helperc_LOADV64le);
5046 hname = "MC_(helperc_LOADV64le)";
5047 break;
5048 case Ity_I32: helper = &MC_(helperc_LOADV32le);
5049 hname = "MC_(helperc_LOADV32le)";
5050 break;
5051 case Ity_I16: helper = &MC_(helperc_LOADV16le);
5052 hname = "MC_(helperc_LOADV16le)";
5053 break;
5054 case Ity_I8: helper = &MC_(helperc_LOADV8);
5055 hname = "MC_(helperc_LOADV8)";
5056 break;
5057 default: ppIRType(ty);
5058 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)");
5060 } else {
5061 switch (ty) {
5062 case Ity_V256: helper = &MC_(helperc_LOADV256be);
5063 hname = "MC_(helperc_LOADV256be)";
5064 ret_via_outparam = True;
5065 break;
5066 case Ity_V128: helper = &MC_(helperc_LOADV128be);
5067 hname = "MC_(helperc_LOADV128be)";
5068 ret_via_outparam = True;
5069 break;
5070 case Ity_I64: helper = &MC_(helperc_LOADV64be);
5071 hname = "MC_(helperc_LOADV64be)";
5072 break;
5073 case Ity_I32: helper = &MC_(helperc_LOADV32be);
5074 hname = "MC_(helperc_LOADV32be)";
5075 break;
5076 case Ity_I16: helper = &MC_(helperc_LOADV16be);
5077 hname = "MC_(helperc_LOADV16be)";
5078 break;
5079 case Ity_I8: helper = &MC_(helperc_LOADV8);
5080 hname = "MC_(helperc_LOADV8)";
5081 break;
5082 default: ppIRType(ty);
5083 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)");
5087 tl_assert(helper);
5088 tl_assert(hname);
5090 /* Generate the actual address into addrAct. */
5091 IRAtom* addrAct;
5092 if (bias == 0) {
5093 addrAct = addr;
5094 } else {
5095 IROp mkAdd;
5096 IRAtom* eBias;
5097 IRType tyAddr = mce->hWordTy;
5098 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
5099 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
5100 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
5101 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
5104 /* We need to have a place to park the V bits we're just about to
5105 read. */
5106 IRTemp datavbits = newTemp(mce, ty, VSh);
5108 /* Here's the call. */
5109 IRDirty* di;
5110 if (ret_via_outparam) {
5111 di = unsafeIRDirty_1_N( datavbits,
5112 2/*regparms*/,
5113 hname, VG_(fnptr_to_fnentry)( helper ),
5114 mkIRExprVec_2( IRExpr_VECRET(), addrAct ) );
5115 } else {
5116 di = unsafeIRDirty_1_N( datavbits,
5117 1/*regparms*/,
5118 hname, VG_(fnptr_to_fnentry)( helper ),
5119 mkIRExprVec_1( addrAct ) );
5122 setHelperAnns( mce, di );
5123 if (guard) {
5124 di->guard = guard;
5125 /* Ideally the didn't-happen return value here would be all-ones
5126 (all-undefined), so it'd be obvious if it got used
5127 inadvertently. We can get by with the IR-mandated default
5128 value (0b01 repeating, 0x55 etc) as that'll still look pretty
5129 undefined if it ever leaks out. */
5131 stmt( 'V', mce, IRStmt_Dirty(di) );
5133 return mkexpr(datavbits);
5137 /* Generate IR to do a shadow load. The helper is expected to check
5138 the validity of the address and return the V bits for that address.
5139 This can optionally be controlled by a guard, which is assumed to
5140 be True if NULL. In the case where the guard is False at runtime,
5141 the helper will return the didn't-do-the-call value of 0x55..55.
5142 Since that means "completely undefined result", the caller of
5143 this function will need to fix up the result somehow in that
5144 case.
5146 Caller of this function is also expected to have checked the
5147 definedness of |guard| before this point.
5149 static
5150 IRAtom* expr2vbits_Load ( MCEnv* mce,
5151 IREndness end, IRType ty,
5152 IRAtom* addr, UInt bias,
5153 IRAtom* guard )
5155 tl_assert(end == Iend_LE || end == Iend_BE);
5156 switch (shadowTypeV(ty)) {
5157 case Ity_I8:
5158 case Ity_I16:
5159 case Ity_I32:
5160 case Ity_I64:
5161 case Ity_V128:
5162 case Ity_V256:
5163 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
5164 default:
5165 VG_(tool_panic)("expr2vbits_Load");
5170 /* The most general handler for guarded loads. Assumes the
5171 definedness of GUARD has already been checked by the caller. A
5172 GUARD of NULL is assumed to mean "always True". Generates code to
5173 check the definedness and validity of ADDR.
5175 Generate IR to do a shadow load from ADDR and return the V bits.
5176 The loaded type is TY. The loaded data is then (shadow) widened by
5177 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
5178 evaluates to False at run time then the returned Vbits are simply
5179 VALT instead. Note therefore that the argument type of VWIDEN must
5180 be TY and the result type of VWIDEN must equal the type of VALT.
5182 static
5183 IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce,
5184 IREndness end, IRType ty,
5185 IRAtom* addr, UInt bias,
5186 IRAtom* guard,
5187 IROp vwiden, IRAtom* valt )
5189 /* Sanity check the conversion operation, and also set TYWIDE. */
5190 IRType tyWide = Ity_INVALID;
5191 switch (vwiden) {
5192 case Iop_INVALID:
5193 tyWide = ty;
5194 break;
5195 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32:
5196 tyWide = Ity_I32;
5197 break;
5198 default:
5199 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General");
5202 /* If the guard evaluates to True, this will hold the loaded V bits
5203 at TY. If the guard evaluates to False, this will be all
5204 ones, meaning "all undefined", in which case we will have to
5205 replace it using an ITE below. */
5206 IRAtom* iftrue1
5207 = assignNew('V', mce, ty,
5208 expr2vbits_Load(mce, end, ty, addr, bias, guard));
5209 /* Now (shadow-) widen the loaded V bits to the desired width. In
5210 the guard-is-False case, the allowable widening operators will
5211 in the worst case (unsigned widening) at least leave the
5212 pre-widened part as being marked all-undefined, and in the best
5213 case (signed widening) mark the whole widened result as
5214 undefined. Anyway, it doesn't matter really, since in this case
5215 we will replace said value with the default value |valt| using an
5216 ITE. */
5217 IRAtom* iftrue2
5218 = vwiden == Iop_INVALID
5219 ? iftrue1
5220 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1));
5221 /* These are the V bits we will return if the load doesn't take
5222 place. */
5223 IRAtom* iffalse
5224 = valt;
5225 /* Prepare the cond for the ITE. Convert a NULL cond into
5226 something that iropt knows how to fold out later. */
5227 IRAtom* cond
5228 = guard == NULL ? mkU1(1) : guard;
5229 /* And assemble the final result. */
5230 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse));
5234 /* A simpler handler for guarded loads, in which there is no
5235 conversion operation, and the default V bit return (when the guard
5236 evaluates to False at runtime) is "all defined". If there is no
5237 guard expression or the guard is always TRUE this function behaves
5238 like expr2vbits_Load. It is assumed that definedness of GUARD has
5239 already been checked at the call site. */
5240 static
5241 IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce,
5242 IREndness end, IRType ty,
5243 IRAtom* addr, UInt bias,
5244 IRAtom *guard )
5246 return expr2vbits_Load_guarded_General(
5247 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty)
5252 static
5253 IRAtom* expr2vbits_ITE ( MCEnv* mce,
5254 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
5256 IRAtom *vbitsC, *vbits0, *vbits1;
5257 IRType ty;
5258 /* Given ITE(cond, iftrue, iffalse), generate
5259 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
5260 That is, steer the V bits like the originals, but trash the
5261 result if the steering value is undefined. This gives
5262 lazy propagation. */
5263 tl_assert(isOriginalAtom(mce, cond));
5264 tl_assert(isOriginalAtom(mce, iftrue));
5265 tl_assert(isOriginalAtom(mce, iffalse));
5267 vbitsC = expr2vbits(mce, cond, HuOth); // could we use HuPCa here?
5268 vbits1 = expr2vbits(mce, iftrue, HuOth);
5269 vbits0 = expr2vbits(mce, iffalse, HuOth);
5270 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
5272 return
5273 mkUifU(mce, ty, assignNew('V', mce, ty,
5274 IRExpr_ITE(cond, vbits1, vbits0)),
5275 mkPCastTo(mce, ty, vbitsC) );
5278 /* --------- This is the main expression-handling function. --------- */
5280 static
5281 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e,
5282 HowUsed hu/*use HuOth if unknown*/ )
5284 switch (e->tag) {
5286 case Iex_Get:
5287 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
5289 case Iex_GetI:
5290 return shadow_GETI( mce, e->Iex.GetI.descr,
5291 e->Iex.GetI.ix, e->Iex.GetI.bias );
5293 case Iex_RdTmp:
5294 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
5296 case Iex_Const:
5297 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
5299 case Iex_Qop:
5300 return expr2vbits_Qop(
5301 mce,
5302 e->Iex.Qop.details->op,
5303 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
5304 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
5307 case Iex_Triop:
5308 return expr2vbits_Triop(
5309 mce,
5310 e->Iex.Triop.details->op,
5311 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
5312 e->Iex.Triop.details->arg3
5315 case Iex_Binop:
5316 return expr2vbits_Binop(
5317 mce,
5318 e->Iex.Binop.op,
5319 e->Iex.Binop.arg1, e->Iex.Binop.arg2,
5323 case Iex_Unop:
5324 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
5326 case Iex_Load:
5327 return expr2vbits_Load( mce, e->Iex.Load.end,
5328 e->Iex.Load.ty,
5329 e->Iex.Load.addr, 0/*addr bias*/,
5330 NULL/* guard == "always True"*/ );
5332 case Iex_CCall:
5333 return mkLazyN( mce, e->Iex.CCall.args,
5334 e->Iex.CCall.retty,
5335 e->Iex.CCall.cee );
5337 case Iex_ITE:
5338 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
5339 e->Iex.ITE.iffalse);
5341 default:
5342 VG_(printf)("\n");
5343 ppIRExpr(e);
5344 VG_(printf)("\n");
5345 VG_(tool_panic)("memcheck: expr2vbits");
5350 /*------------------------------------------------------------*/
5351 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
5352 /*------------------------------------------------------------*/
5354 /* Widen a value to the host word size. */
5356 static
5357 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
5359 IRType ty, tyH;
5361 /* vatom is vbits-value and as such can only have a shadow type. */
5362 tl_assert(isShadowAtom(mce,vatom));
5364 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
5365 tyH = mce->hWordTy;
5367 if (tyH == Ity_I32) {
5368 switch (ty) {
5369 case Ity_I32:
5370 return vatom;
5371 case Ity_I16:
5372 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
5373 case Ity_I8:
5374 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
5375 default:
5376 goto unhandled;
5378 } else
5379 if (tyH == Ity_I64) {
5380 switch (ty) {
5381 case Ity_I32:
5382 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
5383 case Ity_I16:
5384 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
5385 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
5386 case Ity_I8:
5387 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
5388 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
5389 default:
5390 goto unhandled;
5392 } else {
5393 goto unhandled;
5395 unhandled:
5396 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
5397 VG_(tool_panic)("zwidenToHostWord");
5401 /* Generate a shadow store. |addr| is always the original address
5402 atom. You can pass in either originals or V-bits for the data
5403 atom, but obviously not both. This function generates a check for
5404 the definedness and (indirectly) the validity of |addr|, but only
5405 when |guard| evaluates to True at run time (or is NULL).
5407 |guard| :: Ity_I1 controls whether the store really happens; NULL
5408 means it unconditionally does. Note that |guard| itself is not
5409 checked for definedness; the caller of this function must do that
5410 if necessary.
5412 static
5413 void do_shadow_Store ( MCEnv* mce,
5414 IREndness end,
5415 IRAtom* addr, UInt bias,
5416 IRAtom* data, IRAtom* vdata,
5417 IRAtom* guard )
5419 IROp mkAdd;
5420 IRType ty, tyAddr;
5421 void* helper = NULL;
5422 const HChar* hname = NULL;
5423 IRConst* c;
5425 tyAddr = mce->hWordTy;
5426 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
5427 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
5428 tl_assert( end == Iend_LE || end == Iend_BE );
5430 if (data) {
5431 tl_assert(!vdata);
5432 tl_assert(isOriginalAtom(mce, data));
5433 tl_assert(bias == 0);
5434 vdata = expr2vbits( mce, data, HuOth );
5435 } else {
5436 tl_assert(vdata);
5439 tl_assert(isOriginalAtom(mce,addr));
5440 tl_assert(isShadowAtom(mce,vdata));
5442 if (guard) {
5443 tl_assert(isOriginalAtom(mce, guard));
5444 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5447 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
5449 // If we're not doing undefined value checking, pretend that this value
5450 // is "all valid". That lets Vex's optimiser remove some of the V bit
5451 // shadow computation ops that precede it.
5452 if (MC_(clo_mc_level) == 1) {
5453 switch (ty) {
5454 case Ity_V256: // V256 weirdness -- used four times
5455 c = IRConst_V256(V_BITS32_DEFINED); break;
5456 case Ity_V128: // V128 weirdness -- used twice
5457 c = IRConst_V128(V_BITS16_DEFINED); break;
5458 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
5459 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
5460 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
5461 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
5462 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5464 vdata = IRExpr_Const( c );
5467 /* First, emit a definedness test for the address. This also sets
5468 the address (shadow) to 'defined' following the test. Both of
5469 those actions are gated on |guard|. */
5470 complainIfUndefined( mce, addr, guard );
5472 /* Now decide which helper function to call to write the data V
5473 bits into shadow memory. */
5474 if (end == Iend_LE) {
5475 switch (ty) {
5476 case Ity_V256: /* we'll use the helper four times */
5477 case Ity_V128: /* we'll use the helper twice */
5478 case Ity_I64: helper = &MC_(helperc_STOREV64le);
5479 hname = "MC_(helperc_STOREV64le)";
5480 break;
5481 case Ity_I32: helper = &MC_(helperc_STOREV32le);
5482 hname = "MC_(helperc_STOREV32le)";
5483 break;
5484 case Ity_I16: helper = &MC_(helperc_STOREV16le);
5485 hname = "MC_(helperc_STOREV16le)";
5486 break;
5487 case Ity_I8: helper = &MC_(helperc_STOREV8);
5488 hname = "MC_(helperc_STOREV8)";
5489 break;
5490 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5492 } else {
5493 switch (ty) {
5494 case Ity_V128: /* we'll use the helper twice */
5495 case Ity_I64: helper = &MC_(helperc_STOREV64be);
5496 hname = "MC_(helperc_STOREV64be)";
5497 break;
5498 case Ity_I32: helper = &MC_(helperc_STOREV32be);
5499 hname = "MC_(helperc_STOREV32be)";
5500 break;
5501 case Ity_I16: helper = &MC_(helperc_STOREV16be);
5502 hname = "MC_(helperc_STOREV16be)";
5503 break;
5504 case Ity_I8: helper = &MC_(helperc_STOREV8);
5505 hname = "MC_(helperc_STOREV8)";
5506 break;
5507 /* Note, no V256 case here, because no big-endian target that
5508 we support, has 256 vectors. */
5509 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
5513 if (UNLIKELY(ty == Ity_V256)) {
5515 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
5516 Q3 being the most significant lane. */
5517 /* These are the offsets of the Qs in memory. */
5518 Int offQ0, offQ1, offQ2, offQ3;
5520 /* Various bits for constructing the 4 lane helper calls */
5521 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
5522 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
5523 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
5524 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
5526 if (end == Iend_LE) {
5527 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
5528 } else {
5529 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
5532 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
5533 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
5534 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
5535 diQ0 = unsafeIRDirty_0_N(
5536 1/*regparms*/,
5537 hname, VG_(fnptr_to_fnentry)( helper ),
5538 mkIRExprVec_2( addrQ0, vdataQ0 )
5541 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
5542 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
5543 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
5544 diQ1 = unsafeIRDirty_0_N(
5545 1/*regparms*/,
5546 hname, VG_(fnptr_to_fnentry)( helper ),
5547 mkIRExprVec_2( addrQ1, vdataQ1 )
5550 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
5551 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
5552 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
5553 diQ2 = unsafeIRDirty_0_N(
5554 1/*regparms*/,
5555 hname, VG_(fnptr_to_fnentry)( helper ),
5556 mkIRExprVec_2( addrQ2, vdataQ2 )
5559 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
5560 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
5561 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
5562 diQ3 = unsafeIRDirty_0_N(
5563 1/*regparms*/,
5564 hname, VG_(fnptr_to_fnentry)( helper ),
5565 mkIRExprVec_2( addrQ3, vdataQ3 )
5568 if (guard)
5569 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
5571 setHelperAnns( mce, diQ0 );
5572 setHelperAnns( mce, diQ1 );
5573 setHelperAnns( mce, diQ2 );
5574 setHelperAnns( mce, diQ3 );
5575 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
5576 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
5577 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
5578 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
5581 else if (UNLIKELY(ty == Ity_V128)) {
5583 /* V128-bit case */
5584 /* See comment in next clause re 64-bit regparms */
5585 /* also, need to be careful about endianness */
5587 Int offLo64, offHi64;
5588 IRDirty *diLo64, *diHi64;
5589 IRAtom *addrLo64, *addrHi64;
5590 IRAtom *vdataLo64, *vdataHi64;
5591 IRAtom *eBiasLo64, *eBiasHi64;
5593 if (end == Iend_LE) {
5594 offLo64 = 0;
5595 offHi64 = 8;
5596 } else {
5597 offLo64 = 8;
5598 offHi64 = 0;
5601 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
5602 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
5603 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
5604 diLo64 = unsafeIRDirty_0_N(
5605 1/*regparms*/,
5606 hname, VG_(fnptr_to_fnentry)( helper ),
5607 mkIRExprVec_2( addrLo64, vdataLo64 )
5609 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
5610 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
5611 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
5612 diHi64 = unsafeIRDirty_0_N(
5613 1/*regparms*/,
5614 hname, VG_(fnptr_to_fnentry)( helper ),
5615 mkIRExprVec_2( addrHi64, vdataHi64 )
5617 if (guard) diLo64->guard = guard;
5618 if (guard) diHi64->guard = guard;
5619 setHelperAnns( mce, diLo64 );
5620 setHelperAnns( mce, diHi64 );
5621 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
5622 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
5624 } else {
5626 IRDirty *di;
5627 IRAtom *addrAct;
5629 /* 8/16/32/64-bit cases */
5630 /* Generate the actual address into addrAct. */
5631 if (bias == 0) {
5632 addrAct = addr;
5633 } else {
5634 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
5635 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
5638 if (ty == Ity_I64) {
5639 /* We can't do this with regparm 2 on 32-bit platforms, since
5640 the back ends aren't clever enough to handle 64-bit
5641 regparm args. Therefore be different. */
5642 di = unsafeIRDirty_0_N(
5643 1/*regparms*/,
5644 hname, VG_(fnptr_to_fnentry)( helper ),
5645 mkIRExprVec_2( addrAct, vdata )
5647 } else {
5648 di = unsafeIRDirty_0_N(
5649 2/*regparms*/,
5650 hname, VG_(fnptr_to_fnentry)( helper ),
5651 mkIRExprVec_2( addrAct,
5652 zwidenToHostWord( mce, vdata ))
5655 if (guard) di->guard = guard;
5656 setHelperAnns( mce, di );
5657 stmt( 'V', mce, IRStmt_Dirty(di) );
5663 /* Do lazy pessimistic propagation through a dirty helper call, by
5664 looking at the annotations on it. This is the most complex part of
5665 Memcheck. */
5667 static IRType szToITy ( Int n )
5669 switch (n) {
5670 case 1: return Ity_I8;
5671 case 2: return Ity_I16;
5672 case 4: return Ity_I32;
5673 case 8: return Ity_I64;
5674 default: VG_(tool_panic)("szToITy(memcheck)");
5678 static
5679 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
5681 Int i, k, n, toDo, gSz, gOff;
5682 IRAtom *src, *here, *curr;
5683 IRType tySrc, tyDst;
5684 IRTemp dst;
5685 IREndness end;
5687 /* What's the native endianness? We need to know this. */
5688 # if defined(VG_BIGENDIAN)
5689 end = Iend_BE;
5690 # elif defined(VG_LITTLEENDIAN)
5691 end = Iend_LE;
5692 # else
5693 # error "Unknown endianness"
5694 # endif
5696 /* First check the guard. */
5697 complainIfUndefined(mce, d->guard, NULL);
5699 /* Now round up all inputs and PCast over them. */
5700 curr = definedOfType(Ity_I32);
5702 /* Inputs: unmasked args
5703 Note: arguments are evaluated REGARDLESS of the guard expression */
5704 for (i = 0; d->args[i]; i++) {
5705 IRAtom* arg = d->args[i];
5706 if ( (d->cee->mcx_mask & (1<<i))
5707 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)) ) {
5708 /* ignore this arg */
5709 } else {
5710 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg, HuOth) );
5711 curr = mkUifU32(mce, here, curr);
5715 /* Inputs: guest state that we read. */
5716 for (i = 0; i < d->nFxState; i++) {
5717 tl_assert(d->fxState[i].fx != Ifx_None);
5718 if (d->fxState[i].fx == Ifx_Write)
5719 continue;
5721 /* Enumerate the described state segments */
5722 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5723 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5724 gSz = d->fxState[i].size;
5726 /* Ignore any sections marked as 'always defined'. */
5727 if (isAlwaysDefd(mce, gOff, gSz)) {
5728 if (0)
5729 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5730 gOff, gSz);
5731 continue;
5734 /* This state element is read or modified. So we need to
5735 consider it. If larger than 8 bytes, deal with it in
5736 8-byte chunks. */
5737 while (True) {
5738 tl_assert(gSz >= 0);
5739 if (gSz == 0) break;
5740 n = gSz <= 8 ? gSz : 8;
5741 /* update 'curr' with UifU of the state slice
5742 gOff .. gOff+n-1 */
5743 tySrc = szToITy( n );
5745 /* Observe the guard expression. If it is false use an
5746 all-bits-defined bit pattern */
5747 IRAtom *cond, *iffalse, *iftrue;
5749 cond = assignNew('V', mce, Ity_I1, d->guard);
5750 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
5751 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
5752 src = assignNew('V', mce, tySrc,
5753 IRExpr_ITE(cond, iftrue, iffalse));
5755 here = mkPCastTo( mce, Ity_I32, src );
5756 curr = mkUifU32(mce, here, curr);
5757 gSz -= n;
5758 gOff += n;
5763 /* Inputs: memory. First set up some info needed regardless of
5764 whether we're doing reads or writes. */
5766 if (d->mFx != Ifx_None) {
5767 /* Because we may do multiple shadow loads/stores from the same
5768 base address, it's best to do a single test of its
5769 definedness right now. Post-instrumentation optimisation
5770 should remove all but this test. */
5771 IRType tyAddr;
5772 tl_assert(d->mAddr);
5773 complainIfUndefined(mce, d->mAddr, d->guard);
5775 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
5776 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
5777 tl_assert(tyAddr == mce->hWordTy); /* not really right */
5780 /* Deal with memory inputs (reads or modifies) */
5781 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
5782 toDo = d->mSize;
5783 /* chew off 32-bit chunks. We don't care about the endianness
5784 since it's all going to be condensed down to a single bit,
5785 but nevertheless choose an endianness which is hopefully
5786 native to the platform. */
5787 while (toDo >= 4) {
5788 here = mkPCastTo(
5789 mce, Ity_I32,
5790 expr2vbits_Load_guarded_Simple(
5791 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard )
5793 curr = mkUifU32(mce, here, curr);
5794 toDo -= 4;
5796 /* chew off 16-bit chunks */
5797 while (toDo >= 2) {
5798 here = mkPCastTo(
5799 mce, Ity_I32,
5800 expr2vbits_Load_guarded_Simple(
5801 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard )
5803 curr = mkUifU32(mce, here, curr);
5804 toDo -= 2;
5806 /* chew off the remaining 8-bit chunk, if any */
5807 if (toDo == 1) {
5808 here = mkPCastTo(
5809 mce, Ity_I32,
5810 expr2vbits_Load_guarded_Simple(
5811 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard )
5813 curr = mkUifU32(mce, here, curr);
5814 toDo -= 1;
5816 tl_assert(toDo == 0);
5819 /* Whew! So curr is a 32-bit V-value summarising pessimistically
5820 all the inputs to the helper. Now we need to re-distribute the
5821 results to all destinations. */
5823 /* Outputs: the destination temporary, if there is one. */
5824 if (d->tmp != IRTemp_INVALID) {
5825 dst = findShadowTmpV(mce, d->tmp);
5826 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
5827 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
5830 /* Outputs: guest state that we write or modify. */
5831 for (i = 0; i < d->nFxState; i++) {
5832 tl_assert(d->fxState[i].fx != Ifx_None);
5833 if (d->fxState[i].fx == Ifx_Read)
5834 continue;
5836 /* Enumerate the described state segments */
5837 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5838 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5839 gSz = d->fxState[i].size;
5841 /* Ignore any sections marked as 'always defined'. */
5842 if (isAlwaysDefd(mce, gOff, gSz))
5843 continue;
5845 /* This state element is written or modified. So we need to
5846 consider it. If larger than 8 bytes, deal with it in
5847 8-byte chunks. */
5848 while (True) {
5849 tl_assert(gSz >= 0);
5850 if (gSz == 0) break;
5851 n = gSz <= 8 ? gSz : 8;
5852 /* Write suitably-casted 'curr' to the state slice
5853 gOff .. gOff+n-1 */
5854 tyDst = szToITy( n );
5855 do_shadow_PUT( mce, gOff,
5856 NULL, /* original atom */
5857 mkPCastTo( mce, tyDst, curr ), d->guard );
5858 gSz -= n;
5859 gOff += n;
5864 /* Outputs: memory that we write or modify. Same comments about
5865 endianness as above apply. */
5866 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
5867 toDo = d->mSize;
5868 /* chew off 32-bit chunks */
5869 while (toDo >= 4) {
5870 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5871 NULL, /* original data */
5872 mkPCastTo( mce, Ity_I32, curr ),
5873 d->guard );
5874 toDo -= 4;
5876 /* chew off 16-bit chunks */
5877 while (toDo >= 2) {
5878 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5879 NULL, /* original data */
5880 mkPCastTo( mce, Ity_I16, curr ),
5881 d->guard );
5882 toDo -= 2;
5884 /* chew off the remaining 8-bit chunk, if any */
5885 if (toDo == 1) {
5886 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5887 NULL, /* original data */
5888 mkPCastTo( mce, Ity_I8, curr ),
5889 d->guard );
5890 toDo -= 1;
5892 tl_assert(toDo == 0);
5898 /* We have an ABI hint telling us that [base .. base+len-1] is to
5899 become undefined ("writable"). Generate code to call a helper to
5900 notify the A/V bit machinery of this fact.
5902 We call
5903 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5904 Addr nia );
5906 static
5907 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
5909 IRDirty* di;
5911 if (MC_(clo_mc_level) == 3) {
5912 di = unsafeIRDirty_0_N(
5913 3/*regparms*/,
5914 "MC_(helperc_MAKE_STACK_UNINIT_w_o)",
5915 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_w_o) ),
5916 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
5918 } else {
5919 /* We ignore the supplied nia, since it is irrelevant. */
5920 tl_assert(MC_(clo_mc_level) == 2 || MC_(clo_mc_level) == 1);
5921 /* Special-case the len==128 case, since that is for amd64-ELF,
5922 which is a very common target. */
5923 if (len == 128) {
5924 di = unsafeIRDirty_0_N(
5925 1/*regparms*/,
5926 "MC_(helperc_MAKE_STACK_UNINIT_128_no_o)",
5927 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_128_no_o)),
5928 mkIRExprVec_1( base )
5930 } else {
5931 di = unsafeIRDirty_0_N(
5932 2/*regparms*/,
5933 "MC_(helperc_MAKE_STACK_UNINIT_no_o)",
5934 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_no_o) ),
5935 mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) )
5940 stmt( 'V', mce, IRStmt_Dirty(di) );
5944 /* ------ Dealing with IRCAS (big and complex) ------ */
5946 /* FWDS */
5947 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5948 IRAtom* baseaddr, Int offset );
5949 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
5950 static void gen_store_b ( MCEnv* mce, Int szB,
5951 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5952 IRAtom* guard );
5954 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
5955 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
5958 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5959 IRExpr.Consts, else this asserts. If they are both Consts, it
5960 doesn't do anything. So that just leaves the RdTmp case.
5962 In which case: this assigns the shadow value SHADOW to the IR
5963 shadow temporary associated with ORIG. That is, ORIG, being an
5964 original temporary, will have a shadow temporary associated with
5965 it. However, in the case envisaged here, there will so far have
5966 been no IR emitted to actually write a shadow value into that
5967 temporary. What this routine does is to (emit IR to) copy the
5968 value in SHADOW into said temporary, so that after this call,
5969 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5970 value in SHADOW.
5972 Point is to allow callers to compute "by hand" a shadow value for
5973 ORIG, and force it to be associated with ORIG.
5975 How do we know that that shadow associated with ORIG has not so far
5976 been assigned to? Well, we don't per se know that, but supposing
5977 it had. Then this routine would create a second assignment to it,
5978 and later the IR sanity checker would barf. But that never
5979 happens. QED.
5981 static void bind_shadow_tmp_to_orig ( UChar how,
5982 MCEnv* mce,
5983 IRAtom* orig, IRAtom* shadow )
5985 tl_assert(isOriginalAtom(mce, orig));
5986 tl_assert(isShadowAtom(mce, shadow));
5987 switch (orig->tag) {
5988 case Iex_Const:
5989 tl_assert(shadow->tag == Iex_Const);
5990 break;
5991 case Iex_RdTmp:
5992 tl_assert(shadow->tag == Iex_RdTmp);
5993 if (how == 'V') {
5994 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
5995 shadow);
5996 } else {
5997 tl_assert(how == 'B');
5998 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
5999 shadow);
6001 break;
6002 default:
6003 tl_assert(0);
6008 static
6009 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
6011 /* Scheme is (both single- and double- cases):
6013 1. fetch data#,dataB (the proposed new value)
6015 2. fetch expd#,expdB (what we expect to see at the address)
6017 3. check definedness of address
6019 4. load old#,oldB from shadow memory; this also checks
6020 addressibility of the address
6022 5. the CAS itself
6024 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
6026 7. if "expected == old" (as computed by (6))
6027 store data#,dataB to shadow memory
6029 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
6030 'data' but 7 stores 'data#'. Hence it is possible for the
6031 shadow data to be incorrectly checked and/or updated:
6033 * 7 is at least gated correctly, since the 'expected == old'
6034 condition is derived from outputs of 5. However, the shadow
6035 write could happen too late: imagine after 5 we are
6036 descheduled, a different thread runs, writes a different
6037 (shadow) value at the address, and then we resume, hence
6038 overwriting the shadow value written by the other thread.
6040 Because the original memory access is atomic, there's no way to
6041 make both the original and shadow accesses into a single atomic
6042 thing, hence this is unavoidable.
6044 At least as Valgrind stands, I don't think it's a problem, since
6045 we're single threaded *and* we guarantee that there are no
6046 context switches during the execution of any specific superblock
6047 -- context switches can only happen at superblock boundaries.
6049 If Valgrind ever becomes MT in the future, then it might be more
6050 of a problem. A possible kludge would be to artificially
6051 associate with the location, a lock, which we must acquire and
6052 release around the transaction as a whole. Hmm, that probably
6053 would't work properly since it only guards us against other
6054 threads doing CASs on the same location, not against other
6055 threads doing normal reads and writes.
6057 ------------------------------------------------------------
6059 COMMENT_ON_CasCmpEQ:
6061 Note two things. Firstly, in the sequence above, we compute
6062 "expected == old", but we don't check definedness of it. Why
6063 not? Also, the x86 and amd64 front ends use
6064 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
6065 determination (expected == old ?) for themselves, and we also
6066 don't check definedness for those primops; we just say that the
6067 result is defined. Why? Details follow.
6069 x86/amd64 contains various forms of locked insns:
6070 * lock prefix before all basic arithmetic insn;
6071 eg lock xorl %reg1,(%reg2)
6072 * atomic exchange reg-mem
6073 * compare-and-swaps
6075 Rather than attempt to represent them all, which would be a
6076 royal PITA, I used a result from Maurice Herlihy
6077 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
6078 demonstrates that compare-and-swap is a primitive more general
6079 than the other two, and so can be used to represent all of them.
6080 So the translation scheme for (eg) lock incl (%reg) is as
6081 follows:
6083 again:
6084 old = * %reg
6085 new = old + 1
6086 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
6088 The "atomically" is the CAS bit. The scheme is always the same:
6089 get old value from memory, compute new value, atomically stuff
6090 new value back in memory iff the old value has not changed (iow,
6091 no other thread modified it in the meantime). If it has changed
6092 then we've been out-raced and we have to start over.
6094 Now that's all very neat, but it has the bad side effect of
6095 introducing an explicit equality test into the translation.
6096 Consider the behaviour of said code on a memory location which
6097 is uninitialised. We will wind up doing a comparison on
6098 uninitialised data, and mc duly complains.
6100 What's difficult about this is, the common case is that the
6101 location is uncontended, and so we're usually comparing the same
6102 value (* %reg) with itself. So we shouldn't complain even if it
6103 is undefined. But mc doesn't know that.
6105 My solution is to mark the == in the IR specially, so as to tell
6106 mc that it almost certainly compares a value with itself, and we
6107 should just regard the result as always defined. Rather than
6108 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
6109 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
6111 So there's always the question of, can this give a false
6112 negative? eg, imagine that initially, * %reg is defined; and we
6113 read that; but then in the gap between the read and the CAS, a
6114 different thread writes an undefined (and different) value at
6115 the location. Then the CAS in this thread will fail and we will
6116 go back to "again:", but without knowing that the trip back
6117 there was based on an undefined comparison. No matter; at least
6118 the other thread won the race and the location is correctly
6119 marked as undefined. What if it wrote an uninitialised version
6120 of the same value that was there originally, though?
6122 etc etc. Seems like there's a small corner case in which we
6123 might lose the fact that something's defined -- we're out-raced
6124 in between the "old = * reg" and the "atomically {", _and_ the
6125 other thread is writing in an undefined version of what's
6126 already there. Well, that seems pretty unlikely.
6130 If we ever need to reinstate it .. code which generates a
6131 definedness test for "expected == old" was removed at r10432 of
6132 this file.
6134 if (cas->oldHi == IRTemp_INVALID) {
6135 do_shadow_CAS_single( mce, cas );
6136 } else {
6137 do_shadow_CAS_double( mce, cas );
6142 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
6144 IRAtom *vdataLo = NULL, *bdataLo = NULL;
6145 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
6146 IRAtom *voldLo = NULL, *boldLo = NULL;
6147 IRAtom *expd_eq_old = NULL;
6148 IROp opCasCmpEQ;
6149 Int elemSzB;
6150 IRType elemTy;
6151 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
6153 /* single CAS */
6154 tl_assert(cas->oldHi == IRTemp_INVALID);
6155 tl_assert(cas->expdHi == NULL);
6156 tl_assert(cas->dataHi == NULL);
6158 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
6159 switch (elemTy) {
6160 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
6161 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
6162 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
6163 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
6164 default: tl_assert(0); /* IR defn disallows any other types */
6167 /* 1. fetch data# (the proposed new value) */
6168 tl_assert(isOriginalAtom(mce, cas->dataLo));
6169 vdataLo
6170 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo, HuOth));
6171 tl_assert(isShadowAtom(mce, vdataLo));
6172 if (otrak) {
6173 bdataLo
6174 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
6175 tl_assert(isShadowAtom(mce, bdataLo));
6178 /* 2. fetch expected# (what we expect to see at the address) */
6179 tl_assert(isOriginalAtom(mce, cas->expdLo));
6180 vexpdLo
6181 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo, HuOth));
6182 tl_assert(isShadowAtom(mce, vexpdLo));
6183 if (otrak) {
6184 bexpdLo
6185 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
6186 tl_assert(isShadowAtom(mce, bexpdLo));
6189 /* 3. check definedness of address */
6190 /* 4. fetch old# from shadow memory; this also checks
6191 addressibility of the address */
6192 voldLo
6193 = assignNew(
6194 'V', mce, elemTy,
6195 expr2vbits_Load(
6196 mce,
6197 cas->end, elemTy, cas->addr, 0/*Addr bias*/,
6198 NULL/*always happens*/
6200 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
6201 if (otrak) {
6202 boldLo
6203 = assignNew('B', mce, Ity_I32,
6204 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
6205 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
6208 /* 5. the CAS itself */
6209 stmt( 'C', mce, IRStmt_CAS(cas) );
6211 /* 6. compute "expected == old" */
6212 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6213 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6214 tree, but it's not copied from the input block. */
6215 expd_eq_old
6216 = assignNew('C', mce, Ity_I1,
6217 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
6219 /* 7. if "expected == old"
6220 store data# to shadow memory */
6221 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
6222 NULL/*data*/, vdataLo/*vdata*/,
6223 expd_eq_old/*guard for store*/ );
6224 if (otrak) {
6225 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
6226 bdataLo/*bdata*/,
6227 expd_eq_old/*guard for store*/ );
6232 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
6234 IRAtom *vdataHi = NULL, *bdataHi = NULL;
6235 IRAtom *vdataLo = NULL, *bdataLo = NULL;
6236 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
6237 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
6238 IRAtom *voldHi = NULL, *boldHi = NULL;
6239 IRAtom *voldLo = NULL, *boldLo = NULL;
6240 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
6241 IRAtom *expd_eq_old = NULL, *zero = NULL;
6242 IROp opCasCmpEQ, opOr, opXor;
6243 Int elemSzB, memOffsLo, memOffsHi;
6244 IRType elemTy;
6245 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
6247 /* double CAS */
6248 tl_assert(cas->oldHi != IRTemp_INVALID);
6249 tl_assert(cas->expdHi != NULL);
6250 tl_assert(cas->dataHi != NULL);
6252 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
6253 switch (elemTy) {
6254 case Ity_I8:
6255 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
6256 elemSzB = 1; zero = mkU8(0);
6257 break;
6258 case Ity_I16:
6259 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
6260 elemSzB = 2; zero = mkU16(0);
6261 break;
6262 case Ity_I32:
6263 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
6264 elemSzB = 4; zero = mkU32(0);
6265 break;
6266 case Ity_I64:
6267 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
6268 elemSzB = 8; zero = mkU64(0);
6269 break;
6270 default:
6271 tl_assert(0); /* IR defn disallows any other types */
6274 /* 1. fetch data# (the proposed new value) */
6275 tl_assert(isOriginalAtom(mce, cas->dataHi));
6276 tl_assert(isOriginalAtom(mce, cas->dataLo));
6277 vdataHi
6278 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi, HuOth));
6279 vdataLo
6280 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo, HuOth));
6281 tl_assert(isShadowAtom(mce, vdataHi));
6282 tl_assert(isShadowAtom(mce, vdataLo));
6283 if (otrak) {
6284 bdataHi
6285 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
6286 bdataLo
6287 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
6288 tl_assert(isShadowAtom(mce, bdataHi));
6289 tl_assert(isShadowAtom(mce, bdataLo));
6292 /* 2. fetch expected# (what we expect to see at the address) */
6293 tl_assert(isOriginalAtom(mce, cas->expdHi));
6294 tl_assert(isOriginalAtom(mce, cas->expdLo));
6295 vexpdHi
6296 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi, HuOth));
6297 vexpdLo
6298 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo, HuOth));
6299 tl_assert(isShadowAtom(mce, vexpdHi));
6300 tl_assert(isShadowAtom(mce, vexpdLo));
6301 if (otrak) {
6302 bexpdHi
6303 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
6304 bexpdLo
6305 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
6306 tl_assert(isShadowAtom(mce, bexpdHi));
6307 tl_assert(isShadowAtom(mce, bexpdLo));
6310 /* 3. check definedness of address */
6311 /* 4. fetch old# from shadow memory; this also checks
6312 addressibility of the address */
6313 if (cas->end == Iend_LE) {
6314 memOffsLo = 0;
6315 memOffsHi = elemSzB;
6316 } else {
6317 tl_assert(cas->end == Iend_BE);
6318 memOffsLo = elemSzB;
6319 memOffsHi = 0;
6321 voldHi
6322 = assignNew(
6323 'V', mce, elemTy,
6324 expr2vbits_Load(
6325 mce,
6326 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/,
6327 NULL/*always happens*/
6329 voldLo
6330 = assignNew(
6331 'V', mce, elemTy,
6332 expr2vbits_Load(
6333 mce,
6334 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/,
6335 NULL/*always happens*/
6337 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
6338 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
6339 if (otrak) {
6340 boldHi
6341 = assignNew('B', mce, Ity_I32,
6342 gen_load_b(mce, elemSzB, cas->addr,
6343 memOffsHi/*addr bias*/));
6344 boldLo
6345 = assignNew('B', mce, Ity_I32,
6346 gen_load_b(mce, elemSzB, cas->addr,
6347 memOffsLo/*addr bias*/));
6348 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
6349 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
6352 /* 5. the CAS itself */
6353 stmt( 'C', mce, IRStmt_CAS(cas) );
6355 /* 6. compute "expected == old" */
6356 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6357 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6358 tree, but it's not copied from the input block. */
6360 xHi = oldHi ^ expdHi;
6361 xLo = oldLo ^ expdLo;
6362 xHL = xHi | xLo;
6363 expd_eq_old = xHL == 0;
6365 xHi = assignNew('C', mce, elemTy,
6366 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
6367 xLo = assignNew('C', mce, elemTy,
6368 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
6369 xHL = assignNew('C', mce, elemTy,
6370 binop(opOr, xHi, xLo));
6371 expd_eq_old
6372 = assignNew('C', mce, Ity_I1,
6373 binop(opCasCmpEQ, xHL, zero));
6375 /* 7. if "expected == old"
6376 store data# to shadow memory */
6377 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
6378 NULL/*data*/, vdataHi/*vdata*/,
6379 expd_eq_old/*guard for store*/ );
6380 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
6381 NULL/*data*/, vdataLo/*vdata*/,
6382 expd_eq_old/*guard for store*/ );
6383 if (otrak) {
6384 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
6385 bdataHi/*bdata*/,
6386 expd_eq_old/*guard for store*/ );
6387 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
6388 bdataLo/*bdata*/,
6389 expd_eq_old/*guard for store*/ );
6394 /* ------ Dealing with LL/SC (not difficult) ------ */
6396 static void do_shadow_LLSC ( MCEnv* mce,
6397 IREndness stEnd,
6398 IRTemp stResult,
6399 IRExpr* stAddr,
6400 IRExpr* stStoredata )
6402 /* In short: treat a load-linked like a normal load followed by an
6403 assignment of the loaded (shadow) data to the result temporary.
6404 Treat a store-conditional like a normal store, and mark the
6405 result temporary as defined. */
6406 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
6407 IRTemp resTmp = findShadowTmpV(mce, stResult);
6409 tl_assert(isIRAtom(stAddr));
6410 if (stStoredata)
6411 tl_assert(isIRAtom(stStoredata));
6413 if (stStoredata == NULL) {
6414 /* Load Linked */
6415 /* Just treat this as a normal load, followed by an assignment of
6416 the value to .result. */
6417 /* Stay sane */
6418 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6419 || resTy == Ity_I16 || resTy == Ity_I8);
6420 assign( 'V', mce, resTmp,
6421 expr2vbits_Load(
6422 mce, stEnd, resTy, stAddr, 0/*addr bias*/,
6423 NULL/*always happens*/) );
6424 } else {
6425 /* Store Conditional */
6426 /* Stay sane */
6427 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
6428 stStoredata);
6429 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
6430 || dataTy == Ity_I16 || dataTy == Ity_I8);
6431 do_shadow_Store( mce, stEnd,
6432 stAddr, 0/* addr bias */,
6433 stStoredata,
6434 NULL /* shadow data */,
6435 NULL/*guard*/ );
6436 /* This is a store conditional, so it writes to .result a value
6437 indicating whether or not the store succeeded. Just claim
6438 this value is always defined. In the PowerPC interpretation
6439 of store-conditional, definedness of the success indication
6440 depends on whether the address of the store matches the
6441 reservation address. But we can't tell that here (and
6442 anyway, we're not being PowerPC-specific). At least we are
6443 guaranteed that the definedness of the store address, and its
6444 addressibility, will be checked as per normal. So it seems
6445 pretty safe to just say that the success indication is always
6446 defined.
6448 In schemeS, for origin tracking, we must correspondingly set
6449 a no-origin value for the origin shadow of .result.
6451 tl_assert(resTy == Ity_I1);
6452 assign( 'V', mce, resTmp, definedOfType(resTy) );
6457 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6459 static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg )
6461 complainIfUndefined(mce, sg->guard, NULL);
6462 /* do_shadow_Store will generate code to check the definedness and
6463 validity of sg->addr, in the case where sg->guard evaluates to
6464 True at run-time. */
6465 do_shadow_Store( mce, sg->end,
6466 sg->addr, 0/* addr bias */,
6467 sg->data,
6468 NULL /* shadow data */,
6469 sg->guard );
6472 static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
6474 complainIfUndefined(mce, lg->guard, NULL);
6475 /* expr2vbits_Load_guarded_General will generate code to check the
6476 definedness and validity of lg->addr, in the case where
6477 lg->guard evaluates to True at run-time. */
6479 /* Look at the LoadG's built-in conversion operation, to determine
6480 the source (actual loaded data) type, and the equivalent IROp.
6481 NOTE that implicitly we are taking a widening operation to be
6482 applied to original atoms and producing one that applies to V
6483 bits. Since signed and unsigned widening are self-shadowing,
6484 this is a straight copy of the op (modulo swapping from the
6485 IRLoadGOp form to the IROp form). Note also therefore that this
6486 implicitly duplicates the logic to do with said widening ops in
6487 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
6488 IROp vwiden = Iop_INVALID;
6489 IRType loadedTy = Ity_INVALID;
6490 switch (lg->cvt) {
6491 case ILGop_IdentV128: loadedTy = Ity_V128; vwiden = Iop_INVALID; break;
6492 case ILGop_Ident64: loadedTy = Ity_I64; vwiden = Iop_INVALID; break;
6493 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break;
6494 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break;
6495 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break;
6496 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break;
6497 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break;
6498 default: VG_(tool_panic)("do_shadow_LoadG");
6501 IRAtom* vbits_alt
6502 = expr2vbits( mce, lg->alt, HuOth );
6503 IRAtom* vbits_final
6504 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy,
6505 lg->addr, 0/*addr bias*/,
6506 lg->guard, vwiden, vbits_alt );
6507 /* And finally, bind the V bits to the destination temporary. */
6508 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final );
6512 /*------------------------------------------------------------*/
6513 /*--- Origin tracking stuff ---*/
6514 /*------------------------------------------------------------*/
6516 /* Almost identical to findShadowTmpV. */
6517 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
6519 TempMapEnt* ent;
6520 /* VG_(indexXA) range-checks 'orig', hence no need to check
6521 here. */
6522 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6523 tl_assert(ent->kind == Orig);
6524 if (ent->shadowB == IRTemp_INVALID) {
6525 IRTemp tmpB
6526 = newTemp( mce, Ity_I32, BSh );
6527 /* newTemp may cause mce->tmpMap to resize, hence previous results
6528 from VG_(indexXA) are invalid. */
6529 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6530 tl_assert(ent->kind == Orig);
6531 tl_assert(ent->shadowB == IRTemp_INVALID);
6532 ent->shadowB = tmpB;
6534 return ent->shadowB;
6537 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
6539 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
6543 /* Make a guarded origin load, with no special handling in the
6544 didn't-happen case. A GUARD of NULL is assumed to mean "always
6545 True".
6547 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6548 return the otag. The loaded size is SZB. If GUARD evaluates to
6549 False at run time then the returned otag is zero.
6551 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB,
6552 IRAtom* baseaddr,
6553 Int offset, IRExpr* guard )
6555 void* hFun;
6556 const HChar* hName;
6557 IRTemp bTmp;
6558 IRDirty* di;
6559 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
6560 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6561 IRAtom* ea = baseaddr;
6562 if (offset != 0) {
6563 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6564 : mkU64( (Long)(Int)offset );
6565 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6567 bTmp = newTemp(mce, mce->hWordTy, BSh);
6569 switch (szB) {
6570 case 1: hFun = (void*)&MC_(helperc_b_load1);
6571 hName = "MC_(helperc_b_load1)";
6572 break;
6573 case 2: hFun = (void*)&MC_(helperc_b_load2);
6574 hName = "MC_(helperc_b_load2)";
6575 break;
6576 case 4: hFun = (void*)&MC_(helperc_b_load4);
6577 hName = "MC_(helperc_b_load4)";
6578 break;
6579 case 8: hFun = (void*)&MC_(helperc_b_load8);
6580 hName = "MC_(helperc_b_load8)";
6581 break;
6582 case 16: hFun = (void*)&MC_(helperc_b_load16);
6583 hName = "MC_(helperc_b_load16)";
6584 break;
6585 case 32: hFun = (void*)&MC_(helperc_b_load32);
6586 hName = "MC_(helperc_b_load32)";
6587 break;
6588 default:
6589 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
6590 tl_assert(0);
6592 di = unsafeIRDirty_1_N(
6593 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
6594 mkIRExprVec_1( ea )
6596 if (guard) {
6597 di->guard = guard;
6598 /* Ideally the didn't-happen return value here would be
6599 all-zeroes (unknown-origin), so it'd be harmless if it got
6600 used inadvertently. We slum it out with the IR-mandated
6601 default value (0b01 repeating, 0x55 etc) as that'll probably
6602 trump all legitimate otags via Max32, and it's pretty
6603 obviously bogus. */
6605 /* no need to mess with any annotations. This call accesses
6606 neither guest state nor guest memory. */
6607 stmt( 'B', mce, IRStmt_Dirty(di) );
6608 if (mce->hWordTy == Ity_I64) {
6609 /* 64-bit host */
6610 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
6611 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
6612 return mkexpr(bTmp32);
6613 } else {
6614 /* 32-bit host */
6615 return mkexpr(bTmp);
6620 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
6621 loaded size is SZB. The load is regarded as unconditional (always
6622 happens).
6624 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
6625 Int offset )
6627 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/);
6631 /* The most general handler for guarded origin loads. A GUARD of NULL
6632 is assumed to mean "always True".
6634 Generate IR to do a shadow origin load from ADDR+BIAS and return
6635 the B bits. The loaded type is TY. If GUARD evaluates to False at
6636 run time then the returned B bits are simply BALT instead.
6638 static
6639 IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce,
6640 IRType ty,
6641 IRAtom* addr, UInt bias,
6642 IRAtom* guard, IRAtom* balt )
6644 /* If the guard evaluates to True, this will hold the loaded
6645 origin. If the guard evaluates to False, this will be zero,
6646 meaning "unknown origin", in which case we will have to replace
6647 it using an ITE below. */
6648 IRAtom* iftrue
6649 = assignNew('B', mce, Ity_I32,
6650 gen_guarded_load_b(mce, sizeofIRType(ty),
6651 addr, bias, guard));
6652 /* These are the bits we will return if the load doesn't take
6653 place. */
6654 IRAtom* iffalse
6655 = balt;
6656 /* Prepare the cond for the ITE. Convert a NULL cond into
6657 something that iropt knows how to fold out later. */
6658 IRAtom* cond
6659 = guard == NULL ? mkU1(1) : guard;
6660 /* And assemble the final result. */
6661 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse));
6665 /* Generate a shadow origins store. guard :: Ity_I1 controls whether
6666 the store really happens; NULL means it unconditionally does. */
6667 static void gen_store_b ( MCEnv* mce, Int szB,
6668 IRAtom* baseaddr, Int offset, IRAtom* dataB,
6669 IRAtom* guard )
6671 void* hFun;
6672 const HChar* hName;
6673 IRDirty* di;
6674 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
6675 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6676 IRAtom* ea = baseaddr;
6677 if (guard) {
6678 tl_assert(isOriginalAtom(mce, guard));
6679 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
6681 if (offset != 0) {
6682 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6683 : mkU64( (Long)(Int)offset );
6684 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6686 if (mce->hWordTy == Ity_I64)
6687 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
6689 switch (szB) {
6690 case 1: hFun = (void*)&MC_(helperc_b_store1);
6691 hName = "MC_(helperc_b_store1)";
6692 break;
6693 case 2: hFun = (void*)&MC_(helperc_b_store2);
6694 hName = "MC_(helperc_b_store2)";
6695 break;
6696 case 4: hFun = (void*)&MC_(helperc_b_store4);
6697 hName = "MC_(helperc_b_store4)";
6698 break;
6699 case 8: hFun = (void*)&MC_(helperc_b_store8);
6700 hName = "MC_(helperc_b_store8)";
6701 break;
6702 case 16: hFun = (void*)&MC_(helperc_b_store16);
6703 hName = "MC_(helperc_b_store16)";
6704 break;
6705 case 32: hFun = (void*)&MC_(helperc_b_store32);
6706 hName = "MC_(helperc_b_store32)";
6707 break;
6708 default:
6709 tl_assert(0);
6711 di = unsafeIRDirty_0_N( 2/*regparms*/,
6712 hName, VG_(fnptr_to_fnentry)( hFun ),
6713 mkIRExprVec_2( ea, dataB )
6715 /* no need to mess with any annotations. This call accesses
6716 neither guest state nor guest memory. */
6717 if (guard) di->guard = guard;
6718 stmt( 'B', mce, IRStmt_Dirty(di) );
6721 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
6722 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
6723 if (eTy == Ity_I64)
6724 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
6725 if (eTy == Ity_I32)
6726 return e;
6727 tl_assert(0);
6730 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
6731 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
6732 tl_assert(eTy == Ity_I32);
6733 if (dstTy == Ity_I64)
6734 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
6735 tl_assert(0);
6739 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
6741 tl_assert(MC_(clo_mc_level) == 3);
6743 switch (e->tag) {
6745 case Iex_GetI: {
6746 IRRegArray* descr_b;
6747 IRAtom *t1, *t2, *t3, *t4;
6748 IRRegArray* descr = e->Iex.GetI.descr;
6749 IRType equivIntTy
6750 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6751 /* If this array is unshadowable for whatever reason, use the
6752 usual approximation. */
6753 if (equivIntTy == Ity_INVALID)
6754 return mkU32(0);
6755 tl_assert(sizeofIRType(equivIntTy) >= 4);
6756 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6757 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6758 equivIntTy, descr->nElems );
6759 /* Do a shadow indexed get of the same size, giving t1. Take
6760 the bottom 32 bits of it, giving t2. Compute into t3 the
6761 origin for the index (almost certainly zero, but there's
6762 no harm in being completely general here, since iropt will
6763 remove any useless code), and fold it in, giving a final
6764 value t4. */
6765 t1 = assignNew( 'B', mce, equivIntTy,
6766 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
6767 e->Iex.GetI.bias ));
6768 t2 = narrowTo32( mce, t1 );
6769 t3 = schemeE( mce, e->Iex.GetI.ix );
6770 t4 = gen_maxU32( mce, t2, t3 );
6771 return t4;
6773 case Iex_CCall: {
6774 Int i;
6775 IRAtom* here;
6776 IRExpr** args = e->Iex.CCall.args;
6777 IRAtom* curr = mkU32(0);
6778 for (i = 0; args[i]; i++) {
6779 tl_assert(i < 32);
6780 tl_assert(isOriginalAtom(mce, args[i]));
6781 /* Only take notice of this arg if the callee's
6782 mc-exclusion mask does not say it is to be excluded. */
6783 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
6784 /* the arg is to be excluded from definedness checking.
6785 Do nothing. */
6786 if (0) VG_(printf)("excluding %s(%d)\n",
6787 e->Iex.CCall.cee->name, i);
6788 } else {
6789 /* calculate the arg's definedness, and pessimistically
6790 merge it in. */
6791 here = schemeE( mce, args[i] );
6792 curr = gen_maxU32( mce, curr, here );
6795 return curr;
6797 case Iex_Load: {
6798 Int dszB;
6799 dszB = sizeofIRType(e->Iex.Load.ty);
6800 /* assert that the B value for the address is already
6801 available (somewhere) */
6802 tl_assert(isIRAtom(e->Iex.Load.addr));
6803 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
6804 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
6806 case Iex_ITE: {
6807 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond );
6808 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue );
6809 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse );
6810 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
6812 case Iex_Qop: {
6813 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
6814 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
6815 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
6816 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
6817 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
6818 gen_maxU32( mce, b3, b4 ) );
6820 case Iex_Triop: {
6821 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
6822 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
6823 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
6824 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6826 case Iex_Binop: {
6827 switch (e->Iex.Binop.op) {
6828 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
6829 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6830 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6831 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6832 /* Just say these all produce a defined result,
6833 regardless of their arguments. See
6834 COMMENT_ON_CasCmpEQ in this file. */
6835 return mkU32(0);
6836 default: {
6837 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6838 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6839 return gen_maxU32( mce, b1, b2 );
6842 tl_assert(0);
6843 /*NOTREACHED*/
6845 case Iex_Unop: {
6846 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6847 return b1;
6849 case Iex_Const:
6850 return mkU32(0);
6851 case Iex_RdTmp:
6852 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6853 case Iex_Get: {
6854 Int b_offset = MC_(get_otrack_shadow_offset)(
6855 e->Iex.Get.offset,
6856 sizeofIRType(e->Iex.Get.ty)
6858 tl_assert(b_offset >= -1
6859 && b_offset <= mce->layout->total_sizeB -4);
6860 if (b_offset >= 0) {
6861 /* FIXME: this isn't an atom! */
6862 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6863 Ity_I32 );
6865 return mkU32(0);
6867 default:
6868 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6869 ppIRExpr(e);
6870 VG_(tool_panic)("memcheck:schemeE");
6875 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6877 // This is a hacked version of do_shadow_Dirty
6878 Int i, k, n, toDo, gSz, gOff;
6879 IRAtom *here, *curr;
6880 IRTemp dst;
6882 /* First check the guard. */
6883 curr = schemeE( mce, d->guard );
6885 /* Now round up all inputs and maxU32 over them. */
6887 /* Inputs: unmasked args
6888 Note: arguments are evaluated REGARDLESS of the guard expression */
6889 for (i = 0; d->args[i]; i++) {
6890 IRAtom* arg = d->args[i];
6891 if ( (d->cee->mcx_mask & (1<<i))
6892 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)) ) {
6893 /* ignore this arg */
6894 } else {
6895 here = schemeE( mce, arg );
6896 curr = gen_maxU32( mce, curr, here );
6900 /* Inputs: guest state that we read. */
6901 for (i = 0; i < d->nFxState; i++) {
6902 tl_assert(d->fxState[i].fx != Ifx_None);
6903 if (d->fxState[i].fx == Ifx_Write)
6904 continue;
6906 /* Enumerate the described state segments */
6907 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6908 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6909 gSz = d->fxState[i].size;
6911 /* Ignore any sections marked as 'always defined'. */
6912 if (isAlwaysDefd(mce, gOff, gSz)) {
6913 if (0)
6914 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6915 gOff, gSz);
6916 continue;
6919 /* This state element is read or modified. So we need to
6920 consider it. If larger than 4 bytes, deal with it in
6921 4-byte chunks. */
6922 while (True) {
6923 Int b_offset;
6924 tl_assert(gSz >= 0);
6925 if (gSz == 0) break;
6926 n = gSz <= 4 ? gSz : 4;
6927 /* update 'curr' with maxU32 of the state slice
6928 gOff .. gOff+n-1 */
6929 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6930 if (b_offset != -1) {
6931 /* Observe the guard expression. If it is false use 0, i.e.
6932 nothing is known about the origin */
6933 IRAtom *cond, *iffalse, *iftrue;
6935 cond = assignNew( 'B', mce, Ity_I1, d->guard);
6936 iffalse = mkU32(0);
6937 iftrue = assignNew( 'B', mce, Ity_I32,
6938 IRExpr_Get(b_offset
6939 + 2*mce->layout->total_sizeB,
6940 Ity_I32));
6941 here = assignNew( 'B', mce, Ity_I32,
6942 IRExpr_ITE(cond, iftrue, iffalse));
6943 curr = gen_maxU32( mce, curr, here );
6945 gSz -= n;
6946 gOff += n;
6951 /* Inputs: memory */
6953 if (d->mFx != Ifx_None) {
6954 /* Because we may do multiple shadow loads/stores from the same
6955 base address, it's best to do a single test of its
6956 definedness right now. Post-instrumentation optimisation
6957 should remove all but this test. */
6958 tl_assert(d->mAddr);
6959 here = schemeE( mce, d->mAddr );
6960 curr = gen_maxU32( mce, curr, here );
6963 /* Deal with memory inputs (reads or modifies) */
6964 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
6965 toDo = d->mSize;
6966 /* chew off 32-bit chunks. We don't care about the endianness
6967 since it's all going to be condensed down to a single bit,
6968 but nevertheless choose an endianness which is hopefully
6969 native to the platform. */
6970 while (toDo >= 4) {
6971 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6972 d->guard );
6973 curr = gen_maxU32( mce, curr, here );
6974 toDo -= 4;
6976 /* handle possible 16-bit excess */
6977 while (toDo >= 2) {
6978 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6979 d->guard );
6980 curr = gen_maxU32( mce, curr, here );
6981 toDo -= 2;
6983 /* chew off the remaining 8-bit chunk, if any */
6984 if (toDo == 1) {
6985 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6986 d->guard );
6987 curr = gen_maxU32( mce, curr, here );
6988 toDo -= 1;
6990 tl_assert(toDo == 0);
6993 /* Whew! So curr is a 32-bit B-value which should give an origin
6994 of some use if any of the inputs to the helper are undefined.
6995 Now we need to re-distribute the results to all destinations. */
6997 /* Outputs: the destination temporary, if there is one. */
6998 if (d->tmp != IRTemp_INVALID) {
6999 dst = findShadowTmpB(mce, d->tmp);
7000 assign( 'V', mce, dst, curr );
7003 /* Outputs: guest state that we write or modify. */
7004 for (i = 0; i < d->nFxState; i++) {
7005 tl_assert(d->fxState[i].fx != Ifx_None);
7006 if (d->fxState[i].fx == Ifx_Read)
7007 continue;
7009 /* Enumerate the described state segments */
7010 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
7011 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
7012 gSz = d->fxState[i].size;
7014 /* Ignore any sections marked as 'always defined'. */
7015 if (isAlwaysDefd(mce, gOff, gSz))
7016 continue;
7018 /* This state element is written or modified. So we need to
7019 consider it. If larger than 4 bytes, deal with it in
7020 4-byte chunks. */
7021 while (True) {
7022 Int b_offset;
7023 tl_assert(gSz >= 0);
7024 if (gSz == 0) break;
7025 n = gSz <= 4 ? gSz : 4;
7026 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
7027 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
7028 if (b_offset != -1) {
7030 /* If the guard expression evaluates to false we simply Put
7031 the value that is already stored in the guest state slot */
7032 IRAtom *cond, *iffalse;
7034 cond = assignNew('B', mce, Ity_I1,
7035 d->guard);
7036 iffalse = assignNew('B', mce, Ity_I32,
7037 IRExpr_Get(b_offset +
7038 2*mce->layout->total_sizeB,
7039 Ity_I32));
7040 curr = assignNew('V', mce, Ity_I32,
7041 IRExpr_ITE(cond, curr, iffalse));
7043 stmt( 'B', mce, IRStmt_Put(b_offset
7044 + 2*mce->layout->total_sizeB,
7045 curr ));
7047 gSz -= n;
7048 gOff += n;
7053 /* Outputs: memory that we write or modify. Same comments about
7054 endianness as above apply. */
7055 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
7056 toDo = d->mSize;
7057 /* chew off 32-bit chunks */
7058 while (toDo >= 4) {
7059 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
7060 d->guard );
7061 toDo -= 4;
7063 /* handle possible 16-bit excess */
7064 while (toDo >= 2) {
7065 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
7066 d->guard );
7067 toDo -= 2;
7069 /* chew off the remaining 8-bit chunk, if any */
7070 if (toDo == 1) {
7071 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
7072 d->guard );
7073 toDo -= 1;
7075 tl_assert(toDo == 0);
7080 /* Generate IR for origin shadowing for a general guarded store. */
7081 static void do_origins_Store_guarded ( MCEnv* mce,
7082 IREndness stEnd,
7083 IRExpr* stAddr,
7084 IRExpr* stData,
7085 IRExpr* guard )
7087 Int dszB;
7088 IRAtom* dataB;
7089 /* assert that the B value for the address is already available
7090 (somewhere), since the call to schemeE will want to see it.
7091 XXXX how does this actually ensure that?? */
7092 tl_assert(isIRAtom(stAddr));
7093 tl_assert(isIRAtom(stData));
7094 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
7095 dataB = schemeE( mce, stData );
7096 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard );
7100 /* Generate IR for origin shadowing for a plain store. */
7101 static void do_origins_Store_plain ( MCEnv* mce,
7102 IREndness stEnd,
7103 IRExpr* stAddr,
7104 IRExpr* stData )
7106 do_origins_Store_guarded ( mce, stEnd, stAddr, stData,
7107 NULL/*guard*/ );
7111 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7113 static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg )
7115 do_origins_Store_guarded( mce, sg->end, sg->addr,
7116 sg->data, sg->guard );
7119 static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
7121 IRType loadedTy = Ity_INVALID;
7122 switch (lg->cvt) {
7123 case ILGop_IdentV128: loadedTy = Ity_V128; break;
7124 case ILGop_Ident64: loadedTy = Ity_I64; break;
7125 case ILGop_Ident32: loadedTy = Ity_I32; break;
7126 case ILGop_16Uto32: loadedTy = Ity_I16; break;
7127 case ILGop_16Sto32: loadedTy = Ity_I16; break;
7128 case ILGop_8Uto32: loadedTy = Ity_I8; break;
7129 case ILGop_8Sto32: loadedTy = Ity_I8; break;
7130 default: VG_(tool_panic)("schemeS.IRLoadG");
7132 IRAtom* ori_alt
7133 = schemeE( mce,lg->alt );
7134 IRAtom* ori_final
7135 = expr2ori_Load_guarded_General(mce, loadedTy,
7136 lg->addr, 0/*addr bias*/,
7137 lg->guard, ori_alt );
7138 /* And finally, bind the origin to the destination temporary. */
7139 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final );
7143 static void schemeS ( MCEnv* mce, IRStmt* st )
7145 tl_assert(MC_(clo_mc_level) == 3);
7147 switch (st->tag) {
7149 case Ist_AbiHint:
7150 /* The value-check instrumenter handles this - by arranging
7151 to pass the address of the next instruction to
7152 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
7153 happen for origin tracking w.r.t. AbiHints. So there is
7154 nothing to do here. */
7155 break;
7157 case Ist_PutI: {
7158 IRPutI *puti = st->Ist.PutI.details;
7159 IRRegArray* descr_b;
7160 IRAtom *t1, *t2, *t3, *t4;
7161 IRRegArray* descr = puti->descr;
7162 IRType equivIntTy
7163 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
7164 /* If this array is unshadowable for whatever reason,
7165 generate no code. */
7166 if (equivIntTy == Ity_INVALID)
7167 break;
7168 tl_assert(sizeofIRType(equivIntTy) >= 4);
7169 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
7170 descr_b
7171 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
7172 equivIntTy, descr->nElems );
7173 /* Compute a value to Put - the conjoinment of the origin for
7174 the data to be Put-ted (obviously) and of the index value
7175 (not so obviously). */
7176 t1 = schemeE( mce, puti->data );
7177 t2 = schemeE( mce, puti->ix );
7178 t3 = gen_maxU32( mce, t1, t2 );
7179 t4 = zWidenFrom32( mce, equivIntTy, t3 );
7180 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
7181 puti->bias, t4) ));
7182 break;
7185 case Ist_Dirty:
7186 do_origins_Dirty( mce, st->Ist.Dirty.details );
7187 break;
7189 case Ist_Store:
7190 do_origins_Store_plain( mce, st->Ist.Store.end,
7191 st->Ist.Store.addr,
7192 st->Ist.Store.data );
7193 break;
7195 case Ist_StoreG:
7196 do_origins_StoreG( mce, st->Ist.StoreG.details );
7197 break;
7199 case Ist_LoadG:
7200 do_origins_LoadG( mce, st->Ist.LoadG.details );
7201 break;
7203 case Ist_LLSC: {
7204 /* In short: treat a load-linked like a normal load followed
7205 by an assignment of the loaded (shadow) data the result
7206 temporary. Treat a store-conditional like a normal store,
7207 and mark the result temporary as defined. */
7208 if (st->Ist.LLSC.storedata == NULL) {
7209 /* Load Linked */
7210 IRType resTy
7211 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
7212 IRExpr* vanillaLoad
7213 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
7214 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
7215 || resTy == Ity_I16 || resTy == Ity_I8);
7216 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7217 schemeE(mce, vanillaLoad));
7218 } else {
7219 /* Store conditional */
7220 do_origins_Store_plain( mce, st->Ist.LLSC.end,
7221 st->Ist.LLSC.addr,
7222 st->Ist.LLSC.storedata );
7223 /* For the rationale behind this, see comments at the
7224 place where the V-shadow for .result is constructed, in
7225 do_shadow_LLSC. In short, we regard .result as
7226 always-defined. */
7227 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7228 mkU32(0) );
7230 break;
7233 case Ist_Put: {
7234 Int b_offset
7235 = MC_(get_otrack_shadow_offset)(
7236 st->Ist.Put.offset,
7237 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
7239 if (b_offset >= 0) {
7240 /* FIXME: this isn't an atom! */
7241 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
7242 schemeE( mce, st->Ist.Put.data )) );
7244 break;
7247 case Ist_WrTmp:
7248 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
7249 schemeE(mce, st->Ist.WrTmp.data) );
7250 break;
7252 case Ist_MBE:
7253 case Ist_NoOp:
7254 case Ist_Exit:
7255 case Ist_IMark:
7256 break;
7258 default:
7259 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
7260 ppIRStmt(st);
7261 VG_(tool_panic)("memcheck:schemeS");
7266 /*------------------------------------------------------------*/
7267 /*--- Post-tree-build final tidying ---*/
7268 /*------------------------------------------------------------*/
7270 /* This exploits the observation that Memcheck often produces
7271 repeated conditional calls of the form
7273 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
7275 with the same guard expression G guarding the same helper call.
7276 The second and subsequent calls are redundant. This usually
7277 results from instrumentation of guest code containing multiple
7278 memory references at different constant offsets from the same base
7279 register. After optimisation of the instrumentation, you get a
7280 test for the definedness of the base register for each memory
7281 reference, which is kinda pointless. MC_(final_tidy) therefore
7282 looks for such repeated calls and removes all but the first. */
7285 /* With some testing on perf/bz2.c, on amd64 and x86, compiled with
7286 gcc-5.3.1 -O2, it appears that 16 entries in the array are enough to
7287 get almost all the benefits of this transformation whilst causing
7288 the slide-back case to just often enough to be verifiably
7289 correct. For posterity, the numbers are:
7291 bz2-32
7293 1 4,336 (112,212 -> 1,709,473; ratio 15.2)
7294 2 4,336 (112,194 -> 1,669,895; ratio 14.9)
7295 3 4,336 (112,194 -> 1,660,713; ratio 14.8)
7296 4 4,336 (112,194 -> 1,658,555; ratio 14.8)
7297 5 4,336 (112,194 -> 1,655,447; ratio 14.8)
7298 6 4,336 (112,194 -> 1,655,101; ratio 14.8)
7299 7 4,336 (112,194 -> 1,654,858; ratio 14.7)
7300 8 4,336 (112,194 -> 1,654,810; ratio 14.7)
7301 10 4,336 (112,194 -> 1,654,621; ratio 14.7)
7302 12 4,336 (112,194 -> 1,654,678; ratio 14.7)
7303 16 4,336 (112,194 -> 1,654,494; ratio 14.7)
7304 32 4,336 (112,194 -> 1,654,602; ratio 14.7)
7305 inf 4,336 (112,194 -> 1,654,602; ratio 14.7)
7307 bz2-64
7309 1 4,113 (107,329 -> 1,822,171; ratio 17.0)
7310 2 4,113 (107,329 -> 1,806,443; ratio 16.8)
7311 3 4,113 (107,329 -> 1,803,967; ratio 16.8)
7312 4 4,113 (107,329 -> 1,802,785; ratio 16.8)
7313 5 4,113 (107,329 -> 1,802,412; ratio 16.8)
7314 6 4,113 (107,329 -> 1,802,062; ratio 16.8)
7315 7 4,113 (107,329 -> 1,801,976; ratio 16.8)
7316 8 4,113 (107,329 -> 1,801,886; ratio 16.8)
7317 10 4,113 (107,329 -> 1,801,653; ratio 16.8)
7318 12 4,113 (107,329 -> 1,801,526; ratio 16.8)
7319 16 4,113 (107,329 -> 1,801,298; ratio 16.8)
7320 32 4,113 (107,329 -> 1,800,827; ratio 16.8)
7321 inf 4,113 (107,329 -> 1,800,827; ratio 16.8)
7324 /* Structs for recording which (helper, guard) pairs we have already
7325 seen. */
7327 #define N_TIDYING_PAIRS 16
7329 typedef
7330 struct { void* entry; IRExpr* guard; }
7331 Pair;
7333 typedef
7334 struct {
7335 Pair pairs[N_TIDYING_PAIRS +1/*for bounds checking*/];
7336 UInt pairsUsed;
7338 Pairs;
7341 /* Return True if e1 and e2 definitely denote the same value (used to
7342 compare guards). Return False if unknown; False is the safe
7343 answer. Since guest registers and guest memory do not have the
7344 SSA property we must return False if any Gets or Loads appear in
7345 the expression. This implicitly assumes that e1 and e2 have the
7346 same IR type, which is always true here -- the type is Ity_I1. */
7348 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
7350 if (e1->tag != e2->tag)
7351 return False;
7352 switch (e1->tag) {
7353 case Iex_Const:
7354 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
7355 case Iex_Binop:
7356 return e1->Iex.Binop.op == e2->Iex.Binop.op
7357 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
7358 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
7359 case Iex_Unop:
7360 return e1->Iex.Unop.op == e2->Iex.Unop.op
7361 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
7362 case Iex_RdTmp:
7363 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
7364 case Iex_ITE:
7365 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond )
7366 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue )
7367 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse );
7368 case Iex_Qop:
7369 case Iex_Triop:
7370 case Iex_CCall:
7371 /* be lazy. Could define equality for these, but they never
7372 appear to be used. */
7373 return False;
7374 case Iex_Get:
7375 case Iex_GetI:
7376 case Iex_Load:
7377 /* be conservative - these may not give the same value each
7378 time */
7379 return False;
7380 case Iex_Binder:
7381 /* should never see this */
7382 /* fallthrough */
7383 default:
7384 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
7385 ppIRExpr(e1);
7386 VG_(tool_panic)("memcheck:sameIRValue");
7387 return False;
7391 /* See if 'pairs' already has an entry for (entry, guard). Return
7392 True if so. If not, add an entry. */
7394 static
7395 Bool check_or_add ( Pairs* tidyingEnv, IRExpr* guard, void* entry )
7397 UInt i, n = tidyingEnv->pairsUsed;
7398 tl_assert(n <= N_TIDYING_PAIRS);
7399 for (i = 0; i < n; i++) {
7400 if (tidyingEnv->pairs[i].entry == entry
7401 && sameIRValue(tidyingEnv->pairs[i].guard, guard))
7402 return True;
7404 /* (guard, entry) wasn't found in the array. Add it at the end.
7405 If the array is already full, slide the entries one slot
7406 backwards. This means we will lose to ability to detect
7407 duplicates from the pair in slot zero, but that happens so
7408 rarely that it's unlikely to have much effect on overall code
7409 quality. Also, this strategy loses the check for the oldest
7410 tracked exit (memory reference, basically) and so that is (I'd
7411 guess) least likely to be re-used after this point. */
7412 tl_assert(i == n);
7413 if (n == N_TIDYING_PAIRS) {
7414 for (i = 1; i < N_TIDYING_PAIRS; i++) {
7415 tidyingEnv->pairs[i-1] = tidyingEnv->pairs[i];
7417 tidyingEnv->pairs[N_TIDYING_PAIRS-1].entry = entry;
7418 tidyingEnv->pairs[N_TIDYING_PAIRS-1].guard = guard;
7419 } else {
7420 tl_assert(n < N_TIDYING_PAIRS);
7421 tidyingEnv->pairs[n].entry = entry;
7422 tidyingEnv->pairs[n].guard = guard;
7423 n++;
7424 tidyingEnv->pairsUsed = n;
7426 return False;
7429 static Bool is_helperc_value_checkN_fail ( const HChar* name )
7431 /* This is expensive because it happens a lot. We are checking to
7432 see whether |name| is one of the following 8 strings:
7434 MC_(helperc_value_check8_fail_no_o)
7435 MC_(helperc_value_check4_fail_no_o)
7436 MC_(helperc_value_check0_fail_no_o)
7437 MC_(helperc_value_check1_fail_no_o)
7438 MC_(helperc_value_check8_fail_w_o)
7439 MC_(helperc_value_check0_fail_w_o)
7440 MC_(helperc_value_check1_fail_w_o)
7441 MC_(helperc_value_check4_fail_w_o)
7443 To speed it up, check the common prefix just once, rather than
7444 all 8 times.
7446 const HChar* prefix = "MC_(helperc_value_check";
7448 HChar n, p;
7449 while (True) {
7450 n = *name;
7451 p = *prefix;
7452 if (p == 0) break; /* ran off the end of the prefix */
7453 /* We still have some prefix to use */
7454 if (n == 0) return False; /* have prefix, but name ran out */
7455 if (n != p) return False; /* have both pfx and name, but no match */
7456 name++;
7457 prefix++;
7460 /* Check the part after the prefix. */
7461 tl_assert(*prefix == 0 && *name != 0);
7462 return 0==VG_(strcmp)(name, "8_fail_no_o)")
7463 || 0==VG_(strcmp)(name, "4_fail_no_o)")
7464 || 0==VG_(strcmp)(name, "0_fail_no_o)")
7465 || 0==VG_(strcmp)(name, "1_fail_no_o)")
7466 || 0==VG_(strcmp)(name, "8_fail_w_o)")
7467 || 0==VG_(strcmp)(name, "4_fail_w_o)")
7468 || 0==VG_(strcmp)(name, "0_fail_w_o)")
7469 || 0==VG_(strcmp)(name, "1_fail_w_o)");
7472 IRSB* MC_(final_tidy) ( IRSB* sb_in )
7474 Int i;
7475 IRStmt* st;
7476 IRDirty* di;
7477 IRExpr* guard;
7478 IRCallee* cee;
7479 Bool alreadyPresent;
7480 Pairs pairs;
7482 pairs.pairsUsed = 0;
7484 pairs.pairs[N_TIDYING_PAIRS].entry = (void*)0x123;
7485 pairs.pairs[N_TIDYING_PAIRS].guard = (IRExpr*)0x456;
7487 /* Scan forwards through the statements. Each time a call to one
7488 of the relevant helpers is seen, check if we have made a
7489 previous call to the same helper using the same guard
7490 expression, and if so, delete the call. */
7491 for (i = 0; i < sb_in->stmts_used; i++) {
7492 st = sb_in->stmts[i];
7493 tl_assert(st);
7494 if (st->tag != Ist_Dirty)
7495 continue;
7496 di = st->Ist.Dirty.details;
7497 guard = di->guard;
7498 tl_assert(guard);
7499 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
7500 cee = di->cee;
7501 if (!is_helperc_value_checkN_fail( cee->name ))
7502 continue;
7503 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
7504 guard 'guard'. Check if we have already seen a call to this
7505 function with the same guard. If so, delete it. If not,
7506 add it to the set of calls we do know about. */
7507 alreadyPresent = check_or_add( &pairs, guard, cee->addr );
7508 if (alreadyPresent) {
7509 sb_in->stmts[i] = IRStmt_NoOp();
7510 if (0) VG_(printf)("XX\n");
7514 tl_assert(pairs.pairs[N_TIDYING_PAIRS].entry == (void*)0x123);
7515 tl_assert(pairs.pairs[N_TIDYING_PAIRS].guard == (IRExpr*)0x456);
7517 return sb_in;
7520 #undef N_TIDYING_PAIRS
7523 /*------------------------------------------------------------*/
7524 /*--- Startup assertion checking ---*/
7525 /*------------------------------------------------------------*/
7527 void MC_(do_instrumentation_startup_checks)( void )
7529 /* Make a best-effort check to see that is_helperc_value_checkN_fail
7530 is working as we expect. */
7532 # define CHECK(_expected, _string) \
7533 tl_assert((_expected) == is_helperc_value_checkN_fail(_string))
7535 /* It should identify these 8, and no others, as targets. */
7536 CHECK(True, "MC_(helperc_value_check8_fail_no_o)");
7537 CHECK(True, "MC_(helperc_value_check4_fail_no_o)");
7538 CHECK(True, "MC_(helperc_value_check0_fail_no_o)");
7539 CHECK(True, "MC_(helperc_value_check1_fail_no_o)");
7540 CHECK(True, "MC_(helperc_value_check8_fail_w_o)");
7541 CHECK(True, "MC_(helperc_value_check0_fail_w_o)");
7542 CHECK(True, "MC_(helperc_value_check1_fail_w_o)");
7543 CHECK(True, "MC_(helperc_value_check4_fail_w_o)");
7545 /* Ad-hoc selection of other strings gathered via a quick test. */
7546 CHECK(False, "amd64g_dirtyhelper_CPUID_avx2");
7547 CHECK(False, "amd64g_dirtyhelper_RDTSC");
7548 CHECK(False, "MC_(helperc_b_load1)");
7549 CHECK(False, "MC_(helperc_b_load2)");
7550 CHECK(False, "MC_(helperc_b_load4)");
7551 CHECK(False, "MC_(helperc_b_load8)");
7552 CHECK(False, "MC_(helperc_b_load16)");
7553 CHECK(False, "MC_(helperc_b_load32)");
7554 CHECK(False, "MC_(helperc_b_store1)");
7555 CHECK(False, "MC_(helperc_b_store2)");
7556 CHECK(False, "MC_(helperc_b_store4)");
7557 CHECK(False, "MC_(helperc_b_store8)");
7558 CHECK(False, "MC_(helperc_b_store16)");
7559 CHECK(False, "MC_(helperc_b_store32)");
7560 CHECK(False, "MC_(helperc_LOADV8)");
7561 CHECK(False, "MC_(helperc_LOADV16le)");
7562 CHECK(False, "MC_(helperc_LOADV32le)");
7563 CHECK(False, "MC_(helperc_LOADV64le)");
7564 CHECK(False, "MC_(helperc_LOADV128le)");
7565 CHECK(False, "MC_(helperc_LOADV256le)");
7566 CHECK(False, "MC_(helperc_STOREV16le)");
7567 CHECK(False, "MC_(helperc_STOREV32le)");
7568 CHECK(False, "MC_(helperc_STOREV64le)");
7569 CHECK(False, "MC_(helperc_STOREV8)");
7570 CHECK(False, "track_die_mem_stack_8");
7571 CHECK(False, "track_new_mem_stack_8_w_ECU");
7572 CHECK(False, "MC_(helperc_MAKE_STACK_UNINIT_w_o)");
7573 CHECK(False, "VG_(unknown_SP_update_w_ECU)");
7575 # undef CHECK
7579 /*------------------------------------------------------------*/
7580 /*--- Memcheck main ---*/
7581 /*------------------------------------------------------------*/
7583 static Bool isBogusAtom ( IRAtom* at )
7585 if (at->tag == Iex_RdTmp)
7586 return False;
7587 tl_assert(at->tag == Iex_Const);
7589 ULong n = 0;
7590 IRConst* con = at->Iex.Const.con;
7591 switch (con->tag) {
7592 case Ico_U1: return False;
7593 case Ico_U8: n = (ULong)con->Ico.U8; break;
7594 case Ico_U16: n = (ULong)con->Ico.U16; break;
7595 case Ico_U32: n = (ULong)con->Ico.U32; break;
7596 case Ico_U64: n = (ULong)con->Ico.U64; break;
7597 case Ico_F32: return False;
7598 case Ico_F64: return False;
7599 case Ico_F32i: return False;
7600 case Ico_F64i: return False;
7601 case Ico_V128: return False;
7602 case Ico_V256: return False;
7603 default: ppIRExpr(at); tl_assert(0);
7605 /* VG_(printf)("%llx\n", n); */
7606 /* Shortcuts */
7607 if (LIKELY(n <= 0x0000000000001000ULL)) return False;
7608 if (LIKELY(n >= 0xFFFFFFFFFFFFF000ULL)) return False;
7609 /* The list of bogus atoms is: */
7610 return (/*32*/ n == 0xFEFEFEFFULL
7611 /*32*/ || n == 0x80808080ULL
7612 /*32*/ || n == 0x7F7F7F7FULL
7613 /*32*/ || n == 0x7EFEFEFFULL
7614 /*32*/ || n == 0x81010100ULL
7615 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
7616 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
7617 /*64*/ || n == 0x0000000000008080ULL
7618 /*64*/ || n == 0x8080808080808080ULL
7619 /*64*/ || n == 0x0101010101010101ULL
7624 /* Does 'st' mention any of the literals identified/listed in
7625 isBogusAtom()? */
7626 static inline Bool containsBogusLiterals ( /*FLAT*/ IRStmt* st )
7628 Int i;
7629 IRExpr* e;
7630 IRDirty* d;
7631 IRCAS* cas;
7632 switch (st->tag) {
7633 case Ist_WrTmp:
7634 e = st->Ist.WrTmp.data;
7635 switch (e->tag) {
7636 case Iex_Get:
7637 case Iex_RdTmp:
7638 return False;
7639 case Iex_Const:
7640 return isBogusAtom(e);
7641 case Iex_Unop:
7642 return isBogusAtom(e->Iex.Unop.arg)
7643 || e->Iex.Unop.op == Iop_GetMSBs8x16;
7644 case Iex_GetI:
7645 return isBogusAtom(e->Iex.GetI.ix);
7646 case Iex_Binop:
7647 return isBogusAtom(e->Iex.Binop.arg1)
7648 || isBogusAtom(e->Iex.Binop.arg2);
7649 case Iex_Triop:
7650 return isBogusAtom(e->Iex.Triop.details->arg1)
7651 || isBogusAtom(e->Iex.Triop.details->arg2)
7652 || isBogusAtom(e->Iex.Triop.details->arg3);
7653 case Iex_Qop:
7654 return isBogusAtom(e->Iex.Qop.details->arg1)
7655 || isBogusAtom(e->Iex.Qop.details->arg2)
7656 || isBogusAtom(e->Iex.Qop.details->arg3)
7657 || isBogusAtom(e->Iex.Qop.details->arg4);
7658 case Iex_ITE:
7659 return isBogusAtom(e->Iex.ITE.cond)
7660 || isBogusAtom(e->Iex.ITE.iftrue)
7661 || isBogusAtom(e->Iex.ITE.iffalse);
7662 case Iex_Load:
7663 return isBogusAtom(e->Iex.Load.addr);
7664 case Iex_CCall:
7665 for (i = 0; e->Iex.CCall.args[i]; i++)
7666 if (isBogusAtom(e->Iex.CCall.args[i]))
7667 return True;
7668 return False;
7669 default:
7670 goto unhandled;
7672 case Ist_Dirty:
7673 d = st->Ist.Dirty.details;
7674 for (i = 0; d->args[i]; i++) {
7675 IRAtom* atom = d->args[i];
7676 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(atom))) {
7677 if (isBogusAtom(atom))
7678 return True;
7681 if (isBogusAtom(d->guard))
7682 return True;
7683 if (d->mAddr && isBogusAtom(d->mAddr))
7684 return True;
7685 return False;
7686 case Ist_Put:
7687 return isBogusAtom(st->Ist.Put.data);
7688 case Ist_PutI:
7689 return isBogusAtom(st->Ist.PutI.details->ix)
7690 || isBogusAtom(st->Ist.PutI.details->data);
7691 case Ist_Store:
7692 return isBogusAtom(st->Ist.Store.addr)
7693 || isBogusAtom(st->Ist.Store.data);
7694 case Ist_StoreG: {
7695 IRStoreG* sg = st->Ist.StoreG.details;
7696 return isBogusAtom(sg->addr) || isBogusAtom(sg->data)
7697 || isBogusAtom(sg->guard);
7699 case Ist_LoadG: {
7700 IRLoadG* lg = st->Ist.LoadG.details;
7701 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt)
7702 || isBogusAtom(lg->guard);
7704 case Ist_Exit:
7705 return isBogusAtom(st->Ist.Exit.guard);
7706 case Ist_AbiHint:
7707 return isBogusAtom(st->Ist.AbiHint.base)
7708 || isBogusAtom(st->Ist.AbiHint.nia);
7709 case Ist_NoOp:
7710 case Ist_IMark:
7711 case Ist_MBE:
7712 return False;
7713 case Ist_CAS:
7714 cas = st->Ist.CAS.details;
7715 return isBogusAtom(cas->addr)
7716 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
7717 || isBogusAtom(cas->expdLo)
7718 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
7719 || isBogusAtom(cas->dataLo);
7720 case Ist_LLSC:
7721 return isBogusAtom(st->Ist.LLSC.addr)
7722 || (st->Ist.LLSC.storedata
7723 ? isBogusAtom(st->Ist.LLSC.storedata)
7724 : False);
7725 default:
7726 unhandled:
7727 ppIRStmt(st);
7728 VG_(tool_panic)("hasBogusLiterals");
7733 /* This is the pre-instrumentation analysis. It does a backwards pass over
7734 the stmts in |sb_in| to determine a HowUsed value for each tmp defined in
7735 the block.
7737 Unrelatedly, it also checks all literals in the block with |isBogusAtom|,
7738 as a positive result from that is a strong indication that we need to
7739 expensively instrument add/sub in the block. We do both analyses in one
7740 pass, even though they are independent, so as to avoid the overhead of
7741 having to traverse the whole block twice.
7743 The usage pass proceeds as follows. Let max= be the max operation in the
7744 HowUsed lattice, hence
7746 X max= Y means X = max(X, Y)
7748 then
7750 for t in original tmps . useEnv[t] = HuUnU
7752 for t used in the block's . next field
7753 useEnv[t] max= HuPCa // because jmp targets are PCast-tested
7755 for st iterating *backwards* in the block
7757 match st
7759 case "t1 = load(t2)" // case 1
7760 useEnv[t2] max= HuPCa
7762 case "t1 = add(t2, t3)" // case 2
7763 useEnv[t2] max= useEnv[t1]
7764 useEnv[t3] max= useEnv[t1]
7766 other
7767 for t in st.usedTmps // case 3
7768 useEnv[t] max= HuOth
7769 // same as useEnv[t] = HuOth
7771 The general idea is that we accumulate, in useEnv[], information about
7772 how each tmp is used. That can be updated as we work further back
7773 through the block and find more uses of it, but its HowUsed value can
7774 only ascend the lattice, not descend.
7776 Initially we mark all tmps as unused. In case (1), if a tmp is seen to
7777 be used as a memory address, then its use is at least HuPCa. The point
7778 is that for a memory address we will add instrumentation to check if any
7779 bit of the address is undefined, which means that we won't need expensive
7780 V-bit propagation through an add expression that computed the address --
7781 cheap add instrumentation will be equivalent.
7783 Note in case (1) that if we have previously seen a non-memory-address use
7784 of the tmp, then its use will already be HuOth and will be unchanged by
7785 the max= operation. And if it turns out that the source of the tmp was
7786 an add, then we'll have to expensively instrument the add, because we
7787 can't prove that, for the previous non-memory-address use of the tmp,
7788 cheap and expensive instrumentation will be equivalent.
7790 In case 2, we propagate the usage-mode of the result of an add back
7791 through to its operands. Again, we use max= so as to take account of the
7792 fact that t2 or t3 might later in the block (viz, earlier in the
7793 iteration) have been used in a way that requires expensive add
7794 instrumentation.
7796 In case 3, we deal with all other tmp uses. We assume that we'll need a
7797 result that is as accurate as possible, so we max= HuOth into its use
7798 mode. Since HuOth is the top of the lattice, that's equivalent to just
7799 setting its use to HuOth.
7801 The net result of all this is that:
7803 tmps that are used either
7804 - only as a memory address, or
7805 - only as part of a tree of adds that computes a memory address,
7806 and has no other use
7807 are marked as HuPCa, and so we can instrument their generating Add
7808 nodes cheaply, which is the whole point of this analysis
7810 tmps that are used any other way at all are marked as HuOth
7812 tmps that are unused are marked as HuUnU. We don't expect to see any
7813 since we expect that the incoming IR has had all dead assignments
7814 removed by previous optimisation passes. Nevertheless the analysis is
7815 correct even in the presence of dead tmps.
7817 A final comment on dead tmps. In case 1 and case 2, we could actually
7818 conditionalise the updates thusly:
7820 if (useEnv[t1] > HuUnU) { useEnv[t2] max= HuPCa } // case 1
7822 if (useEnv[t1] > HuUnU) { useEnv[t2] max= useEnv[t1] } // case 2
7823 if (useEnv[t1] > HuUnU) { useEnv[t3] max= useEnv[t1] } // case 2
7825 In other words, if the assigned-to tmp |t1| is never used, then there's
7826 no point in propagating any use through to its operands. That won't
7827 change the final HuPCa-vs-HuOth results, which is what we care about.
7828 Given that we expect to get dead-code-free inputs, there's no point in
7829 adding this extra refinement.
7832 /* Helper for |preInstrumentationAnalysis|. */
7833 static inline void noteTmpUsesIn ( /*MOD*/HowUsed* useEnv,
7834 UInt tyenvUsed,
7835 HowUsed newUse, IRAtom* at )
7837 /* For the atom |at|, declare that for any tmp |t| in |at|, we will have
7838 seen a use of |newUse|. So, merge that info into |t|'s accumulated
7839 use info. */
7840 switch (at->tag) {
7841 case Iex_GSPTR:
7842 case Iex_Const:
7843 return;
7844 case Iex_RdTmp: {
7845 IRTemp t = at->Iex.RdTmp.tmp;
7846 tl_assert(t < tyenvUsed); // "is an original tmp"
7847 // The "max" operation in the lattice
7848 if (newUse > useEnv[t]) useEnv[t] = newUse;
7849 return;
7851 default:
7852 // We should never get here -- it implies non-flat IR
7853 ppIRExpr(at);
7854 VG_(tool_panic)("noteTmpUsesIn");
7856 /*NOTREACHED*/
7857 tl_assert(0);
7861 static void preInstrumentationAnalysis ( /*OUT*/HowUsed** useEnvP,
7862 /*OUT*/Bool* hasBogusLiteralsP,
7863 const IRSB* sb_in )
7865 const UInt nOrigTmps = (UInt)sb_in->tyenv->types_used;
7867 // We've seen no bogus literals so far.
7868 Bool bogus = False;
7870 // This is calloc'd, so implicitly all entries are initialised to HuUnU.
7871 HowUsed* useEnv = VG_(calloc)("mc.preInstrumentationAnalysis.1",
7872 nOrigTmps, sizeof(HowUsed));
7874 // Firstly, roll in contributions from the final dst address.
7875 bogus = isBogusAtom(sb_in->next);
7876 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, sb_in->next);
7878 // Now work backwards through the stmts.
7879 for (Int i = sb_in->stmts_used-1; i >= 0; i--) {
7880 IRStmt* st = sb_in->stmts[i];
7882 // Deal with literals.
7883 if (LIKELY(!bogus)) {
7884 bogus = containsBogusLiterals(st);
7887 // Deal with tmp uses.
7888 switch (st->tag) {
7889 case Ist_WrTmp: {
7890 IRTemp dst = st->Ist.WrTmp.tmp;
7891 IRExpr* rhs = st->Ist.WrTmp.data;
7892 // This is the one place where we have to consider all possible
7893 // tags for |rhs|, and can't just assume it is a tmp or a const.
7894 switch (rhs->tag) {
7895 case Iex_RdTmp:
7896 // just propagate demand for |dst| into this tmp use.
7897 noteTmpUsesIn(useEnv, nOrigTmps, useEnv[dst], rhs);
7898 break;
7899 case Iex_Unop:
7900 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, rhs->Iex.Unop.arg);
7901 break;
7902 case Iex_Binop:
7903 if (rhs->Iex.Binop.op == Iop_Add64
7904 || rhs->Iex.Binop.op == Iop_Add32) {
7905 // propagate demand for |dst| through to the operands.
7906 noteTmpUsesIn(useEnv, nOrigTmps,
7907 useEnv[dst], rhs->Iex.Binop.arg1);
7908 noteTmpUsesIn(useEnv, nOrigTmps,
7909 useEnv[dst], rhs->Iex.Binop.arg2);
7910 } else {
7911 // just say that the operands are used in some unknown way.
7912 noteTmpUsesIn(useEnv, nOrigTmps,
7913 HuOth, rhs->Iex.Binop.arg1);
7914 noteTmpUsesIn(useEnv, nOrigTmps,
7915 HuOth, rhs->Iex.Binop.arg2);
7917 break;
7918 case Iex_Triop: {
7919 // All operands are used in some unknown way.
7920 IRTriop* tri = rhs->Iex.Triop.details;
7921 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, tri->arg1);
7922 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, tri->arg2);
7923 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, tri->arg3);
7924 break;
7926 case Iex_Qop: {
7927 // All operands are used in some unknown way.
7928 IRQop* qop = rhs->Iex.Qop.details;
7929 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg1);
7930 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg2);
7931 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg3);
7932 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg4);
7933 break;
7935 case Iex_Load:
7936 // The address will be checked (== PCasted).
7937 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, rhs->Iex.Load.addr);
7938 break;
7939 case Iex_ITE:
7940 // The condition is PCasted, the then- and else-values
7941 // aren't.
7942 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, rhs->Iex.ITE.cond);
7943 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, rhs->Iex.ITE.iftrue);
7944 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, rhs->Iex.ITE.iffalse);
7945 break;
7946 case Iex_CCall:
7947 // The args are used in unknown ways.
7948 for (IRExpr** args = rhs->Iex.CCall.args; *args; args++) {
7949 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, *args);
7951 break;
7952 case Iex_GetI: {
7953 // The index will be checked/PCasted (see do_shadow_GETI)
7954 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, rhs->Iex.GetI.ix);
7955 break;
7957 case Iex_Const:
7958 case Iex_Get:
7959 break;
7960 default:
7961 ppIRExpr(rhs);
7962 VG_(tool_panic)("preInstrumentationAnalysis:"
7963 " unhandled IRExpr");
7965 break;
7967 case Ist_Store:
7968 // The address will be checked (== PCasted). The data will be
7969 // used in some unknown way.
7970 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, st->Ist.Store.addr);
7971 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.Store.data);
7972 break;
7973 case Ist_Exit:
7974 // The guard will be checked (== PCasted)
7975 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, st->Ist.Exit.guard);
7976 break;
7977 case Ist_Put:
7978 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.Put.data);
7979 break;
7980 case Ist_PutI: {
7981 IRPutI* putI = st->Ist.PutI.details;
7982 // The index will be checked/PCasted (see do_shadow_PUTI). The
7983 // data will be used in an unknown way.
7984 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, putI->ix);
7985 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, putI->data);
7986 break;
7988 case Ist_Dirty: {
7989 IRDirty* d = st->Ist.Dirty.details;
7990 // The guard will be checked (== PCasted)
7991 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, d->guard);
7992 // The args will be used in unknown ways.
7993 for (IRExpr** args = d->args; *args; args++) {
7994 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, *args);
7996 break;
7998 case Ist_CAS: {
7999 IRCAS* cas = st->Ist.CAS.details;
8000 // Address will be pcasted, everything else used as unknown
8001 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, cas->addr);
8002 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->expdLo);
8003 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->dataLo);
8004 if (cas->expdHi)
8005 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->expdHi);
8006 if (cas->dataHi)
8007 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->dataHi);
8008 break;
8010 case Ist_AbiHint:
8011 // Both exprs are used in unknown ways. TODO: can we safely
8012 // just ignore AbiHints?
8013 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.AbiHint.base);
8014 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.AbiHint.nia);
8015 break;
8016 case Ist_StoreG: {
8017 // We might be able to do better, and use HuPCa for the addr.
8018 // It's not immediately obvious that we can, because the address
8019 // is regarded as "used" only when the guard is true.
8020 IRStoreG* sg = st->Ist.StoreG.details;
8021 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, sg->addr);
8022 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, sg->data);
8023 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, sg->guard);
8024 break;
8026 case Ist_LoadG: {
8027 // Per similar comments to Ist_StoreG .. not sure whether this
8028 // is really optimal.
8029 IRLoadG* lg = st->Ist.LoadG.details;
8030 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, lg->addr);
8031 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, lg->alt);
8032 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, lg->guard);
8033 break;
8035 case Ist_LLSC: {
8036 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, st->Ist.LLSC.addr);
8037 if (st->Ist.LLSC.storedata)
8038 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.LLSC.storedata);
8039 break;
8041 case Ist_MBE:
8042 case Ist_IMark:
8043 case Ist_NoOp:
8044 break;
8045 default: {
8046 ppIRStmt(st);
8047 VG_(tool_panic)("preInstrumentationAnalysis: unhandled IRStmt");
8050 } // Now work backwards through the stmts.
8052 // Return the computed use env and the bogus-atom flag.
8053 tl_assert(*useEnvP == NULL);
8054 *useEnvP = useEnv;
8056 tl_assert(*hasBogusLiteralsP == False);
8057 *hasBogusLiteralsP = bogus;
8061 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
8062 IRSB* sb_in,
8063 const VexGuestLayout* layout,
8064 const VexGuestExtents* vge,
8065 const VexArchInfo* archinfo_host,
8066 IRType gWordTy, IRType hWordTy )
8068 Bool verboze = 0||False;
8069 Int i, j, first_stmt;
8070 IRStmt* st;
8071 MCEnv mce;
8072 IRSB* sb_out;
8074 if (gWordTy != hWordTy) {
8075 /* We don't currently support this case. */
8076 VG_(tool_panic)("host/guest word size mismatch");
8079 /* Check we're not completely nuts */
8080 tl_assert(sizeof(UWord) == sizeof(void*));
8081 tl_assert(sizeof(Word) == sizeof(void*));
8082 tl_assert(sizeof(Addr) == sizeof(void*));
8083 tl_assert(sizeof(ULong) == 8);
8084 tl_assert(sizeof(Long) == 8);
8085 tl_assert(sizeof(UInt) == 4);
8086 tl_assert(sizeof(Int) == 4);
8088 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
8090 /* Set up SB */
8091 sb_out = deepCopyIRSBExceptStmts(sb_in);
8093 /* Set up the running environment. Both .sb and .tmpMap are
8094 modified as we go along. Note that tmps are added to both
8095 .sb->tyenv and .tmpMap together, so the valid index-set for
8096 those two arrays should always be identical. */
8097 VG_(memset)(&mce, 0, sizeof(mce));
8098 mce.sb = sb_out;
8099 mce.trace = verboze;
8100 mce.layout = layout;
8101 mce.hWordTy = hWordTy;
8102 mce.tmpHowUsed = NULL;
8104 /* BEGIN decide on expense levels for instrumentation. */
8106 /* Initially, select the cheap version of everything for which we have an
8107 option. */
8108 DetailLevelByOp__set_all( &mce.dlbo, DLcheap );
8110 /* Take account of the --expensive-definedness-checks= flag. */
8111 if (MC_(clo_expensive_definedness_checks) == EdcNO) {
8112 /* We just selected 'cheap for everything', so we don't need to do
8113 anything here. mce.tmpHowUsed remains NULL. */
8115 else if (MC_(clo_expensive_definedness_checks) == EdcYES) {
8116 /* Select 'expensive for everything'. mce.tmpHowUsed remains NULL. */
8117 DetailLevelByOp__set_all( &mce.dlbo, DLexpensive );
8119 else {
8120 tl_assert(MC_(clo_expensive_definedness_checks) == EdcAUTO);
8121 /* We'll make our own selection, based on known per-target constraints
8122 and also on analysis of the block to be instrumented. First, set
8123 up default values for detail levels.
8125 On x86 and amd64, we'll routinely encounter code optimised by LLVM
8126 5 and above. Enable accurate interpretation of the following.
8127 LLVM uses adds for some bitfield inserts, and we get a lot of false
8128 errors if the cheap interpretation is used, alas. Could solve this
8129 much better if we knew which of such adds came from x86/amd64 LEA
8130 instructions, since these are the only ones really needing the
8131 expensive interpretation, but that would require some way to tag
8132 them in the _toIR.c front ends, which is a lot of faffing around.
8133 So for now we use preInstrumentationAnalysis() to detect adds which
8134 are used only to construct memory addresses, which is an
8135 approximation to the above, and is self-contained.*/
8136 # if defined(VGA_x86)
8137 mce.dlbo.dl_Add32 = DLauto;
8138 mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
8139 # elif defined(VGA_amd64)
8140 mce.dlbo.dl_Add64 = DLauto;
8141 mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
8142 # endif
8144 /* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then
8145 fill it in. */
8146 Bool hasBogusLiterals = False;
8147 preInstrumentationAnalysis( &mce.tmpHowUsed, &hasBogusLiterals, sb_in );
8149 if (hasBogusLiterals) {
8150 /* This happens very rarely. In this case just select expensive
8151 for everything, and throw away the tmp-use analysis results. */
8152 DetailLevelByOp__set_all( &mce.dlbo, DLexpensive );
8153 VG_(free)( mce.tmpHowUsed );
8154 mce.tmpHowUsed = NULL;
8155 } else {
8156 /* Nothing. mce.tmpHowUsed contains tmp-use analysis results,
8157 which will be used for some subset of Iop_{Add,Sub}{32,64},
8158 based on which ones are set to DLauto for this target. */
8162 DetailLevelByOp__check_sanity( &mce.dlbo );
8164 if (0) {
8165 // Debug printing: which tmps have been identified as PCast-only use
8166 if (mce.tmpHowUsed) {
8167 VG_(printf)("Cheapies: ");
8168 for (UInt q = 0; q < sb_in->tyenv->types_used; q++) {
8169 if (mce.tmpHowUsed[q] == HuPCa) {
8170 VG_(printf)("t%u ", q);
8173 VG_(printf)("\n");
8176 // Debug printing: number of ops by detail level
8177 UChar nCheap = DetailLevelByOp__count( &mce.dlbo, DLcheap );
8178 UChar nAuto = DetailLevelByOp__count( &mce.dlbo, DLauto );
8179 UChar nExpensive = DetailLevelByOp__count( &mce.dlbo, DLexpensive );
8180 tl_assert(nCheap + nAuto + nExpensive == 8);
8182 VG_(printf)("%u,%u,%u ", nCheap, nAuto, nExpensive);
8184 /* END decide on expense levels for instrumentation. */
8186 /* Initialise the running the tmp environment. */
8188 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
8189 sizeof(TempMapEnt));
8190 VG_(hintSizeXA) (mce.tmpMap, sb_in->tyenv->types_used);
8191 for (i = 0; i < sb_in->tyenv->types_used; i++) {
8192 TempMapEnt ent;
8193 ent.kind = Orig;
8194 ent.shadowV = IRTemp_INVALID;
8195 ent.shadowB = IRTemp_INVALID;
8196 VG_(addToXA)( mce.tmpMap, &ent );
8198 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
8200 /* Finally, begin instrumentation. */
8201 /* Copy verbatim any IR preamble preceding the first IMark */
8203 tl_assert(mce.sb == sb_out);
8204 tl_assert(mce.sb != sb_in);
8206 i = 0;
8207 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
8209 st = sb_in->stmts[i];
8210 tl_assert(st);
8211 tl_assert(isFlatIRStmt(st));
8213 stmt( 'C', &mce, sb_in->stmts[i] );
8214 i++;
8217 /* Nasty problem. IR optimisation of the pre-instrumented IR may
8218 cause the IR following the preamble to contain references to IR
8219 temporaries defined in the preamble. Because the preamble isn't
8220 instrumented, these temporaries don't have any shadows.
8221 Nevertheless uses of them following the preamble will cause
8222 memcheck to generate references to their shadows. End effect is
8223 to cause IR sanity check failures, due to references to
8224 non-existent shadows. This is only evident for the complex
8225 preambles used for function wrapping on TOC-afflicted platforms
8226 (ppc64-linux).
8228 The following loop therefore scans the preamble looking for
8229 assignments to temporaries. For each one found it creates an
8230 assignment to the corresponding (V) shadow temp, marking it as
8231 'defined'. This is the same resulting IR as if the main
8232 instrumentation loop before had been applied to the statement
8233 'tmp = CONSTANT'.
8235 Similarly, if origin tracking is enabled, we must generate an
8236 assignment for the corresponding origin (B) shadow, claiming
8237 no-origin, as appropriate for a defined value.
8239 for (j = 0; j < i; j++) {
8240 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
8241 /* findShadowTmpV checks its arg is an original tmp;
8242 no need to assert that here. */
8243 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
8244 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
8245 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
8246 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
8247 if (MC_(clo_mc_level) == 3) {
8248 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
8249 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
8250 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
8252 if (0) {
8253 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
8254 ppIRType( ty_v );
8255 VG_(printf)("\n");
8260 /* Iterate over the remaining stmts to generate instrumentation. */
8262 tl_assert(sb_in->stmts_used > 0);
8263 tl_assert(i >= 0);
8264 tl_assert(i < sb_in->stmts_used);
8265 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
8267 for (/* use current i*/; i < sb_in->stmts_used; i++) {
8269 st = sb_in->stmts[i];
8270 first_stmt = sb_out->stmts_used;
8272 if (verboze) {
8273 VG_(printf)("\n");
8274 ppIRStmt(st);
8275 VG_(printf)("\n");
8278 if (MC_(clo_mc_level) == 3) {
8279 /* See comments on case Ist_CAS below. */
8280 if (st->tag != Ist_CAS)
8281 schemeS( &mce, st );
8284 /* Generate instrumentation code for each stmt ... */
8286 switch (st->tag) {
8288 case Ist_WrTmp: {
8289 IRTemp dst = st->Ist.WrTmp.tmp;
8290 tl_assert(dst < (UInt)sb_in->tyenv->types_used);
8291 HowUsed hu = mce.tmpHowUsed ? mce.tmpHowUsed[dst]
8292 : HuOth/*we don't know, so play safe*/;
8293 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
8294 expr2vbits( &mce, st->Ist.WrTmp.data, hu ));
8295 break;
8298 case Ist_Put:
8299 do_shadow_PUT( &mce,
8300 st->Ist.Put.offset,
8301 st->Ist.Put.data,
8302 NULL /* shadow atom */, NULL /* guard */ );
8303 break;
8305 case Ist_PutI:
8306 do_shadow_PUTI( &mce, st->Ist.PutI.details);
8307 break;
8309 case Ist_Store:
8310 do_shadow_Store( &mce, st->Ist.Store.end,
8311 st->Ist.Store.addr, 0/* addr bias */,
8312 st->Ist.Store.data,
8313 NULL /* shadow data */,
8314 NULL/*guard*/ );
8315 break;
8317 case Ist_StoreG:
8318 do_shadow_StoreG( &mce, st->Ist.StoreG.details );
8319 break;
8321 case Ist_LoadG:
8322 do_shadow_LoadG( &mce, st->Ist.LoadG.details );
8323 break;
8325 case Ist_Exit:
8326 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
8327 break;
8329 case Ist_IMark:
8330 break;
8332 case Ist_NoOp:
8333 case Ist_MBE:
8334 break;
8336 case Ist_Dirty:
8337 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
8338 break;
8340 case Ist_AbiHint:
8341 do_AbiHint( &mce, st->Ist.AbiHint.base,
8342 st->Ist.AbiHint.len,
8343 st->Ist.AbiHint.nia );
8344 break;
8346 case Ist_CAS:
8347 do_shadow_CAS( &mce, st->Ist.CAS.details );
8348 /* Note, do_shadow_CAS copies the CAS itself to the output
8349 block, because it needs to add instrumentation both
8350 before and after it. Hence skip the copy below. Also
8351 skip the origin-tracking stuff (call to schemeS) above,
8352 since that's all tangled up with it too; do_shadow_CAS
8353 does it all. */
8354 break;
8356 case Ist_LLSC:
8357 do_shadow_LLSC( &mce,
8358 st->Ist.LLSC.end,
8359 st->Ist.LLSC.result,
8360 st->Ist.LLSC.addr,
8361 st->Ist.LLSC.storedata );
8362 break;
8364 default:
8365 VG_(printf)("\n");
8366 ppIRStmt(st);
8367 VG_(printf)("\n");
8368 VG_(tool_panic)("memcheck: unhandled IRStmt");
8370 } /* switch (st->tag) */
8372 if (0 && verboze) {
8373 for (j = first_stmt; j < sb_out->stmts_used; j++) {
8374 VG_(printf)(" ");
8375 ppIRStmt(sb_out->stmts[j]);
8376 VG_(printf)("\n");
8378 VG_(printf)("\n");
8381 /* ... and finally copy the stmt itself to the output. Except,
8382 skip the copy of IRCASs; see comments on case Ist_CAS
8383 above. */
8384 if (st->tag != Ist_CAS)
8385 stmt('C', &mce, st);
8388 /* Now we need to complain if the jump target is undefined. */
8389 first_stmt = sb_out->stmts_used;
8391 if (verboze) {
8392 VG_(printf)("sb_in->next = ");
8393 ppIRExpr(sb_in->next);
8394 VG_(printf)("\n\n");
8397 complainIfUndefined( &mce, sb_in->next, NULL );
8399 if (0 && verboze) {
8400 for (j = first_stmt; j < sb_out->stmts_used; j++) {
8401 VG_(printf)(" ");
8402 ppIRStmt(sb_out->stmts[j]);
8403 VG_(printf)("\n");
8405 VG_(printf)("\n");
8408 /* If this fails, there's been some serious snafu with tmp management,
8409 that should be investigated. */
8410 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
8411 VG_(deleteXA)( mce.tmpMap );
8413 if (mce.tmpHowUsed) {
8414 VG_(free)( mce.tmpHowUsed );
8417 tl_assert(mce.sb == sb_out);
8418 return sb_out;
8422 /*--------------------------------------------------------------------*/
8423 /*--- end mc_translate.c ---*/
8424 /*--------------------------------------------------------------------*/