Add DRD suppression patterns for races triggered by std::ostream
[valgrind.git] / memcheck / mc_translate.c
blob666719160092d44d9e054e0fffb9ef0949e29aa4
2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations. ---*/
4 /*--- mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
7 /*
8 This file is part of MemCheck, a heavyweight Valgrind tool for
9 detecting memory errors.
11 Copyright (C) 2000-2017 Julian Seward
12 jseward@acm.org
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27 02111-1307, USA.
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_poolalloc.h" // For mc_include.h
34 #include "pub_tool_hashtable.h" // For mc_include.h
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_tooliface.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_xarray.h"
40 #include "pub_tool_mallocfree.h"
41 #include "pub_tool_libcbase.h"
43 #include "mc_include.h"
46 /* FIXMEs JRS 2011-June-16.
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
52 Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional
53 saturating shifts): the interpretation is overly pessimistic.
54 See comments on the relevant cases below for details.
56 Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts,
57 both rounding and non-rounding variants): ditto
60 /* This file implements the Memcheck instrumentation, and in
61 particular contains the core of its undefined value detection
62 machinery. For a comprehensive background of the terminology,
63 algorithms and rationale used herein, read:
65 Using Valgrind to detect undefined value errors with
66 bit-precision
68 Julian Seward and Nicholas Nethercote
70 2005 USENIX Annual Technical Conference (General Track),
71 Anaheim, CA, USA, April 10-15, 2005.
73 ----
75 Here is as good a place as any to record exactly when V bits are and
76 should be checked, why, and what function is responsible.
79 Memcheck complains when an undefined value is used:
81 1. In the condition of a conditional branch. Because it could cause
82 incorrect control flow, and thus cause incorrect externally-visible
83 behaviour. [mc_translate.c:complainIfUndefined]
85 2. As an argument to a system call, or as the value that specifies
86 the system call number. Because it could cause an incorrect
87 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
89 3. As the address in a load or store. Because it could cause an
90 incorrect value to be used later, which could cause externally-visible
91 behaviour (eg. via incorrect control flow or an incorrect system call
92 argument) [complainIfUndefined]
94 4. As the target address of a branch. Because it could cause incorrect
95 control flow. [complainIfUndefined]
97 5. As an argument to setenv, unsetenv, or putenv. Because it could put
98 an incorrect value into the external environment.
99 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
101 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
102 [complainIfUndefined]
104 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
105 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
106 requested it. [in memcheck.h]
109 Memcheck also complains, but should not, when an undefined value is used:
111 8. As the shift value in certain SIMD shift operations (but not in the
112 standard integer shift operations). This inconsistency is due to
113 historical reasons.) [complainIfUndefined]
116 Memcheck does not complain, but should, when an undefined value is used:
118 9. As an input to a client request. Because the client request may
119 affect the visible behaviour -- see bug #144362 for an example
120 involving the malloc replacements in vg_replace_malloc.c and
121 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
122 isn't identified. That bug report also has some info on how to solve
123 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
126 In practice, 1 and 2 account for the vast majority of cases.
129 /* Generation of addr-definedness, addr-validity and
130 guard-definedness checks pertaining to loads and stores (Iex_Load,
131 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
132 loads/stores) was re-checked 11 May 2013. */
135 /*------------------------------------------------------------*/
136 /*--- Forward decls ---*/
137 /*------------------------------------------------------------*/
139 struct _MCEnv;
141 // See below for comments explaining what this is for.
142 typedef
143 enum __attribute__((packed)) { HuUnU=0, HuPCa=1, HuOth=2 }
144 HowUsed;
146 static IRType shadowTypeV ( IRType ty );
147 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e,
148 HowUsed hu/*use HuOth if unknown*/ );
149 static IRTemp findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
151 static IRExpr *i128_const_zero(void);
154 /*------------------------------------------------------------*/
155 /*--- Memcheck running state, and tmp management. ---*/
156 /*------------------------------------------------------------*/
158 /* For a few (maybe 1%) IROps, we have both a cheaper, less exact vbit
159 propagation scheme, and a more expensive, more precise vbit propagation
160 scheme. This enum describes, for such an IROp, which scheme to use. */
161 typedef
162 enum {
163 // Use the cheaper, less-exact variant.
164 DLcheap=4,
165 // Choose between cheap and expensive based on analysis of the block
166 // to be instrumented. Note that the choice may be done on a
167 // per-instance basis of the IROp that this DetailLevel describes.
168 DLauto,
169 // Use the more expensive, more-exact variant.
170 DLexpensive
172 DetailLevel;
175 /* A readonly part of the running state. For IROps that have both a
176 less-exact and more-exact interpretation, records which interpretation is
177 to be used. */
178 typedef
179 struct {
180 // For Add32/64 and Sub32/64, all 3 settings are allowed. For the
181 // DLauto case, a per-instance decision is to be made by inspecting
182 // the associated tmp's entry in MCEnv.tmpHowUsed.
183 DetailLevel dl_Add32;
184 DetailLevel dl_Add64;
185 DetailLevel dl_Sub32;
186 DetailLevel dl_Sub64;
187 // For Cmp{EQ,NE}{64,32,16,8}, only DLcheap and DLexpensive are
188 // allowed.
189 DetailLevel dl_CmpEQ64_CmpNE64;
190 DetailLevel dl_CmpEQ32_CmpNE32;
191 DetailLevel dl_CmpEQ16_CmpNE16;
192 DetailLevel dl_CmpEQ8_CmpNE8;
194 DetailLevelByOp;
196 static void DetailLevelByOp__set_all ( /*OUT*/DetailLevelByOp* dlbo,
197 DetailLevel dl )
199 dlbo->dl_Add32 = dl;
200 dlbo->dl_Add64 = dl;
201 dlbo->dl_Sub32 = dl;
202 dlbo->dl_Sub64 = dl;
203 dlbo->dl_CmpEQ64_CmpNE64 = dl;
204 dlbo->dl_CmpEQ32_CmpNE32 = dl;
205 dlbo->dl_CmpEQ16_CmpNE16 = dl;
206 dlbo->dl_CmpEQ8_CmpNE8 = dl;
209 static void DetailLevelByOp__check_sanity ( const DetailLevelByOp* dlbo )
211 tl_assert(dlbo->dl_Add32 >= DLcheap && dlbo->dl_Add32 <= DLexpensive);
212 tl_assert(dlbo->dl_Add64 >= DLcheap && dlbo->dl_Add64 <= DLexpensive);
213 tl_assert(dlbo->dl_Sub32 >= DLcheap && dlbo->dl_Sub32 <= DLexpensive);
214 tl_assert(dlbo->dl_Sub64 >= DLcheap && dlbo->dl_Sub64 <= DLexpensive);
215 tl_assert(dlbo->dl_CmpEQ64_CmpNE64 == DLcheap
216 || dlbo->dl_CmpEQ64_CmpNE64 == DLexpensive);
217 tl_assert(dlbo->dl_CmpEQ32_CmpNE32 == DLcheap
218 || dlbo->dl_CmpEQ32_CmpNE32 == DLexpensive);
219 tl_assert(dlbo->dl_CmpEQ16_CmpNE16 == DLcheap
220 || dlbo->dl_CmpEQ16_CmpNE16 == DLexpensive);
221 tl_assert(dlbo->dl_CmpEQ8_CmpNE8 == DLcheap
222 || dlbo->dl_CmpEQ8_CmpNE8 == DLexpensive);
225 static UInt DetailLevelByOp__count ( const DetailLevelByOp* dlbo,
226 DetailLevel dl )
228 UInt n = 0;
229 n += (dlbo->dl_Add32 == dl ? 1 : 0);
230 n += (dlbo->dl_Add64 == dl ? 1 : 0);
231 n += (dlbo->dl_Sub32 == dl ? 1 : 0);
232 n += (dlbo->dl_Sub64 == dl ? 1 : 0);
233 n += (dlbo->dl_CmpEQ64_CmpNE64 == dl ? 1 : 0);
234 n += (dlbo->dl_CmpEQ32_CmpNE32 == dl ? 1 : 0);
235 n += (dlbo->dl_CmpEQ16_CmpNE16 == dl ? 1 : 0);
236 n += (dlbo->dl_CmpEQ8_CmpNE8 == dl ? 1 : 0);
237 return n;
241 /* Carries info about a particular tmp. The tmp's number is not
242 recorded, as this is implied by (equal to) its index in the tmpMap
243 in MCEnv. The tmp's type is also not recorded, as this is present
244 in MCEnv.sb->tyenv.
246 When .kind is Orig, .shadowV and .shadowB may give the identities
247 of the temps currently holding the associated definedness (shadowV)
248 and origin (shadowB) values, or these may be IRTemp_INVALID if code
249 to compute such values has not yet been emitted.
251 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
252 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
253 illogical for a shadow tmp itself to be shadowed.
255 typedef
256 enum { Orig=1, VSh=2, BSh=3 }
257 TempKind;
259 typedef
260 struct {
261 TempKind kind;
262 IRTemp shadowV;
263 IRTemp shadowB;
265 TempMapEnt;
268 /* A |HowUsed| value carries analysis results about how values are used,
269 pertaining to whether we need to instrument integer adds expensively or
270 not. The running state carries a (readonly) mapping from original tmp to
271 a HowUsed value for it. A usage value can be one of three values,
272 forming a 3-point chain lattice.
274 HuOth ("Other") used in some arbitrary way
276 HuPCa ("PCast") used *only* in effectively a PCast, in which all
277 | we care about is the all-defined vs not-all-defined distinction
279 HuUnU ("Unused") not used at all.
281 The "safe" (don't-know) end of the lattice is "HuOth". See comments
282 below in |preInstrumentationAnalysis| for further details.
284 /* DECLARED ABOVE:
285 typedef
286 enum __attribute__((packed)) { HuUnU=0, HuPCa=1, HuOth=2 }
287 HowUsed;
290 // Not actually necessary, but we don't want to waste D1 space.
291 STATIC_ASSERT(sizeof(HowUsed) == 1);
294 /* Carries around state during memcheck instrumentation. */
295 typedef
296 struct _MCEnv {
297 /* MODIFIED: the superblock being constructed. IRStmts are
298 added. */
299 IRSB* sb;
300 Bool trace;
302 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
303 current kind and possibly shadow temps for each temp in the
304 IRSB being constructed. Note that it does not contain the
305 type of each tmp. If you want to know the type, look at the
306 relevant entry in sb->tyenv. It follows that at all times
307 during the instrumentation process, the valid indices for
308 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
309 total number of Orig, V- and B- temps allocated so far.
311 The reason for this strange split (types in one place, all
312 other info in another) is that we need the types to be
313 attached to sb so as to make it possible to do
314 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
315 instrumentation process. */
316 XArray* /* of TempMapEnt */ tmpMap;
318 /* READONLY: contains details of which ops should be expensively
319 instrumented. */
320 DetailLevelByOp dlbo;
322 /* READONLY: for each original tmp, how the tmp is used. This is
323 computed by |preInstrumentationAnalysis|. Valid indices are
324 0 .. #temps_in_sb-1 (same as for tmpMap). */
325 HowUsed* tmpHowUsed;
327 /* READONLY: the guest layout. This indicates which parts of
328 the guest state should be regarded as 'always defined'. */
329 const VexGuestLayout* layout;
331 /* READONLY: the host word type. Needed for constructing
332 arguments of type 'HWord' to be passed to helper functions.
333 Ity_I32 or Ity_I64 only. */
334 IRType hWordTy;
336 MCEnv;
339 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
340 demand), as they are encountered. This is for two reasons.
342 (1) (less important reason): Many original tmps are unused due to
343 initial IR optimisation, and we do not want to spaces in tables
344 tracking them.
346 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
347 table indexed [0 .. n_types-1], which gives the current shadow for
348 each original tmp, or INVALID_IRTEMP if none is so far assigned.
349 It is necessary to support making multiple assignments to a shadow
350 -- specifically, after testing a shadow for definedness, it needs
351 to be made defined. But IR's SSA property disallows this.
353 (2) (more important reason): Therefore, when a shadow needs to get
354 a new value, a new temporary is created, the value is assigned to
355 that, and the tmpMap is updated to reflect the new binding.
357 A corollary is that if the tmpMap maps a given tmp to
358 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
359 there's a read-before-write error in the original tmps. The IR
360 sanity checker should catch all such anomalies, however.
363 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
364 both the table in mce->sb and to our auxiliary mapping. Note that
365 newTemp may cause mce->tmpMap to resize, hence previous results
366 from VG_(indexXA)(mce->tmpMap) are invalidated. */
367 static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
369 Word newIx;
370 TempMapEnt ent;
371 IRTemp tmp = newIRTemp(mce->sb->tyenv, ty);
372 ent.kind = kind;
373 ent.shadowV = IRTemp_INVALID;
374 ent.shadowB = IRTemp_INVALID;
375 newIx = VG_(addToXA)( mce->tmpMap, &ent );
376 tl_assert(newIx == (Word)tmp);
377 return tmp;
381 /* Find the tmp currently shadowing the given original tmp. If none
382 so far exists, allocate one. */
383 static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
385 TempMapEnt* ent;
386 /* VG_(indexXA) range-checks 'orig', hence no need to check
387 here. */
388 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
389 tl_assert(ent->kind == Orig);
390 if (ent->shadowV == IRTemp_INVALID) {
391 IRTemp tmpV
392 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
393 /* newTemp may cause mce->tmpMap to resize, hence previous results
394 from VG_(indexXA) are invalid. */
395 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
396 tl_assert(ent->kind == Orig);
397 tl_assert(ent->shadowV == IRTemp_INVALID);
398 ent->shadowV = tmpV;
400 return ent->shadowV;
403 /* Allocate a new shadow for the given original tmp. This means any
404 previous shadow is abandoned. This is needed because it is
405 necessary to give a new value to a shadow once it has been tested
406 for undefinedness, but unfortunately IR's SSA property disallows
407 this. Instead we must abandon the old shadow, allocate a new one
408 and use that instead.
410 This is the same as findShadowTmpV, except we don't bother to see
411 if a shadow temp already existed -- we simply allocate a new one
412 regardless. */
413 static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
415 TempMapEnt* ent;
416 /* VG_(indexXA) range-checks 'orig', hence no need to check
417 here. */
418 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
419 tl_assert(ent->kind == Orig);
420 if (1) {
421 IRTemp tmpV
422 = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
423 /* newTemp may cause mce->tmpMap to resize, hence previous results
424 from VG_(indexXA) are invalid. */
425 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
426 tl_assert(ent->kind == Orig);
427 ent->shadowV = tmpV;
432 /*------------------------------------------------------------*/
433 /*--- IRAtoms -- a subset of IRExprs ---*/
434 /*------------------------------------------------------------*/
436 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
437 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
438 input, most of this code deals in atoms. Usefully, a value atom
439 always has a V-value which is also an atom: constants are shadowed
440 by constants, and temps are shadowed by the corresponding shadow
441 temporary. */
443 typedef IRExpr IRAtom;
445 /* (used for sanity checks only): is this an atom which looks
446 like it's from original code? */
447 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
449 if (a1->tag == Iex_Const)
450 return True;
451 if (a1->tag == Iex_RdTmp) {
452 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
453 return ent->kind == Orig;
455 return False;
458 /* (used for sanity checks only): is this an atom which looks
459 like it's from shadow code? */
460 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
462 if (a1->tag == Iex_Const)
463 return True;
464 if (a1->tag == Iex_RdTmp) {
465 TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
466 return ent->kind == VSh || ent->kind == BSh;
468 return False;
471 /* (used for sanity checks only): check that both args are atoms and
472 are identically-kinded. */
473 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
475 if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
476 return True;
477 if (a1->tag == Iex_Const && a2->tag == Iex_Const)
478 return True;
479 return False;
483 /*------------------------------------------------------------*/
484 /*--- Type management ---*/
485 /*------------------------------------------------------------*/
487 /* Shadow state is always accessed using integer types. This returns
488 an integer type with the same size (as per sizeofIRType) as the
489 given type. The only valid shadow types are Bit, I8, I16, I32,
490 I64, I128, V128, V256. */
492 static IRType shadowTypeV ( IRType ty )
494 switch (ty) {
495 case Ity_I1:
496 case Ity_I8:
497 case Ity_I16:
498 case Ity_I32:
499 case Ity_I64:
500 case Ity_I128: return ty;
501 case Ity_F16: return Ity_I16;
502 case Ity_F32: return Ity_I32;
503 case Ity_D32: return Ity_I32;
504 case Ity_F64: return Ity_I64;
505 case Ity_D64: return Ity_I64;
506 case Ity_F128: return Ity_I128;
507 case Ity_D128: return Ity_I128;
508 case Ity_V128: return Ity_V128;
509 case Ity_V256: return Ity_V256;
510 default: ppIRType(ty);
511 VG_(tool_panic)("memcheck:shadowTypeV");
515 /* Produce a 'defined' value of the given shadow type. Should only be
516 supplied shadow types (Bit/I8/I16/I32/UI64). */
517 static IRExpr* definedOfType ( IRType ty ) {
518 switch (ty) {
519 case Ity_I1: return IRExpr_Const(IRConst_U1(False));
520 case Ity_I8: return IRExpr_Const(IRConst_U8(0));
521 case Ity_I16: return IRExpr_Const(IRConst_U16(0));
522 case Ity_I32: return IRExpr_Const(IRConst_U32(0));
523 case Ity_I64: return IRExpr_Const(IRConst_U64(0));
524 case Ity_I128: return i128_const_zero();
525 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
526 case Ity_V256: return IRExpr_Const(IRConst_V256(0x00000000));
527 default: VG_(tool_panic)("memcheck:definedOfType");
532 /*------------------------------------------------------------*/
533 /*--- Constructing IR fragments ---*/
534 /*------------------------------------------------------------*/
536 /* add stmt to a bb */
537 static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
538 if (mce->trace) {
539 VG_(printf)(" %c: ", cat);
540 ppIRStmt(st);
541 VG_(printf)("\n");
543 addStmtToIRSB(mce->sb, st);
546 /* assign value to tmp */
547 static inline
548 void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
549 stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
552 /* build various kinds of expressions */
553 #define triop(_op, _arg1, _arg2, _arg3) \
554 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
555 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
556 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
557 #define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
558 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
559 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
560 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
561 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
562 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
563 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
565 /* Bind the given expression to a new temporary, and return the
566 temporary. This effectively converts an arbitrary expression into
567 an atom.
569 'ty' is the type of 'e' and hence the type that the new temporary
570 needs to be. But passing it in is redundant, since we can deduce
571 the type merely by inspecting 'e'. So at least use that fact to
572 assert that the two types agree. */
573 static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
575 TempKind k;
576 IRTemp t;
577 IRType tyE = typeOfIRExpr(mce->sb->tyenv, e);
579 tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
580 switch (cat) {
581 case 'V': k = VSh; break;
582 case 'B': k = BSh; break;
583 case 'C': k = Orig; break;
584 /* happens when we are making up new "orig"
585 expressions, for IRCAS handling */
586 default: tl_assert(0);
588 t = newTemp(mce, ty, k);
589 assign(cat, mce, t, e);
590 return mkexpr(t);
594 /*------------------------------------------------------------*/
595 /*--- Helper functions for 128-bit ops ---*/
596 /*------------------------------------------------------------*/
598 static IRExpr *i128_const_zero(void)
600 IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
601 return binop(Iop_64HLto128, z64, z64);
604 /* There are no I128-bit loads and/or stores [as generated by any
605 current front ends]. So we do not need to worry about that in
606 expr2vbits_Load */
609 /*------------------------------------------------------------*/
610 /*--- Constructing definedness primitive ops ---*/
611 /*------------------------------------------------------------*/
613 /* --------- Defined-if-either-defined --------- */
615 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
616 tl_assert(isShadowAtom(mce,a1));
617 tl_assert(isShadowAtom(mce,a2));
618 return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
621 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
622 tl_assert(isShadowAtom(mce,a1));
623 tl_assert(isShadowAtom(mce,a2));
624 return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
627 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
628 tl_assert(isShadowAtom(mce,a1));
629 tl_assert(isShadowAtom(mce,a2));
630 return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
633 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
634 tl_assert(isShadowAtom(mce,a1));
635 tl_assert(isShadowAtom(mce,a2));
636 return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
639 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
640 tl_assert(isShadowAtom(mce,a1));
641 tl_assert(isShadowAtom(mce,a2));
642 return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
645 static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
646 tl_assert(isShadowAtom(mce,a1));
647 tl_assert(isShadowAtom(mce,a2));
648 return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
651 /* --------- Undefined-if-either-undefined --------- */
653 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
654 tl_assert(isShadowAtom(mce,a1));
655 tl_assert(isShadowAtom(mce,a2));
656 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
659 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
660 tl_assert(isShadowAtom(mce,a1));
661 tl_assert(isShadowAtom(mce,a2));
662 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
665 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
666 tl_assert(isShadowAtom(mce,a1));
667 tl_assert(isShadowAtom(mce,a2));
668 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
671 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
672 tl_assert(isShadowAtom(mce,a1));
673 tl_assert(isShadowAtom(mce,a2));
674 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
677 static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
678 IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
679 tl_assert(isShadowAtom(mce,a1));
680 tl_assert(isShadowAtom(mce,a2));
681 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
682 tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
683 tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
684 tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
685 tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
686 tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
688 return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
691 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
692 tl_assert(isShadowAtom(mce,a1));
693 tl_assert(isShadowAtom(mce,a2));
694 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
697 static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
698 tl_assert(isShadowAtom(mce,a1));
699 tl_assert(isShadowAtom(mce,a2));
700 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
703 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
704 switch (vty) {
705 case Ity_I8: return mkUifU8(mce, a1, a2);
706 case Ity_I16: return mkUifU16(mce, a1, a2);
707 case Ity_I32: return mkUifU32(mce, a1, a2);
708 case Ity_I64: return mkUifU64(mce, a1, a2);
709 case Ity_I128: return mkUifU128(mce, a1, a2);
710 case Ity_V128: return mkUifUV128(mce, a1, a2);
711 case Ity_V256: return mkUifUV256(mce, a1, a2);
712 default:
713 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
714 VG_(tool_panic)("memcheck:mkUifU");
718 /* --------- The Left-family of operations. --------- */
720 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
721 tl_assert(isShadowAtom(mce,a1));
722 return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
725 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
726 tl_assert(isShadowAtom(mce,a1));
727 return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
730 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
731 tl_assert(isShadowAtom(mce,a1));
732 return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
735 static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
736 tl_assert(isShadowAtom(mce,a1));
737 return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
740 /* --------- 'Improvement' functions for AND/OR. --------- */
742 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
743 defined (0); all other -> undefined (1).
745 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
747 tl_assert(isOriginalAtom(mce, data));
748 tl_assert(isShadowAtom(mce, vbits));
749 tl_assert(sameKindedAtoms(data, vbits));
750 return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
753 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
755 tl_assert(isOriginalAtom(mce, data));
756 tl_assert(isShadowAtom(mce, vbits));
757 tl_assert(sameKindedAtoms(data, vbits));
758 return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
761 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
763 tl_assert(isOriginalAtom(mce, data));
764 tl_assert(isShadowAtom(mce, vbits));
765 tl_assert(sameKindedAtoms(data, vbits));
766 return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
769 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
771 tl_assert(isOriginalAtom(mce, data));
772 tl_assert(isShadowAtom(mce, vbits));
773 tl_assert(sameKindedAtoms(data, vbits));
774 return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
777 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
779 tl_assert(isOriginalAtom(mce, data));
780 tl_assert(isShadowAtom(mce, vbits));
781 tl_assert(sameKindedAtoms(data, vbits));
782 return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
785 static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
787 tl_assert(isOriginalAtom(mce, data));
788 tl_assert(isShadowAtom(mce, vbits));
789 tl_assert(sameKindedAtoms(data, vbits));
790 return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
793 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
794 defined (0); all other -> undefined (1).
796 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
798 tl_assert(isOriginalAtom(mce, data));
799 tl_assert(isShadowAtom(mce, vbits));
800 tl_assert(sameKindedAtoms(data, vbits));
801 return assignNew(
802 'V', mce, Ity_I8,
803 binop(Iop_Or8,
804 assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
805 vbits) );
808 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
810 tl_assert(isOriginalAtom(mce, data));
811 tl_assert(isShadowAtom(mce, vbits));
812 tl_assert(sameKindedAtoms(data, vbits));
813 return assignNew(
814 'V', mce, Ity_I16,
815 binop(Iop_Or16,
816 assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
817 vbits) );
820 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
822 tl_assert(isOriginalAtom(mce, data));
823 tl_assert(isShadowAtom(mce, vbits));
824 tl_assert(sameKindedAtoms(data, vbits));
825 return assignNew(
826 'V', mce, Ity_I32,
827 binop(Iop_Or32,
828 assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
829 vbits) );
832 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
834 tl_assert(isOriginalAtom(mce, data));
835 tl_assert(isShadowAtom(mce, vbits));
836 tl_assert(sameKindedAtoms(data, vbits));
837 return assignNew(
838 'V', mce, Ity_I64,
839 binop(Iop_Or64,
840 assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
841 vbits) );
844 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
846 tl_assert(isOriginalAtom(mce, data));
847 tl_assert(isShadowAtom(mce, vbits));
848 tl_assert(sameKindedAtoms(data, vbits));
849 return assignNew(
850 'V', mce, Ity_V128,
851 binop(Iop_OrV128,
852 assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
853 vbits) );
856 static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
858 tl_assert(isOriginalAtom(mce, data));
859 tl_assert(isShadowAtom(mce, vbits));
860 tl_assert(sameKindedAtoms(data, vbits));
861 return assignNew(
862 'V', mce, Ity_V256,
863 binop(Iop_OrV256,
864 assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
865 vbits) );
868 /* --------- Pessimising casts. --------- */
870 /* The function returns an expression of type DST_TY. If any of the VBITS
871 is undefined (value == 1) the resulting expression has all bits set to
872 1. Otherwise, all bits are 0. */
874 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
876 IRType src_ty;
877 IRAtom* tmp1;
879 /* Note, dst_ty is a shadow type, not an original type. */
880 tl_assert(isShadowAtom(mce,vbits));
881 src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
883 /* Fast-track some common cases */
884 if (src_ty == Ity_I32 && dst_ty == Ity_I32)
885 return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
887 if (src_ty == Ity_I64 && dst_ty == Ity_I64)
888 return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
890 if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
891 /* PCast the arg, then clone it. */
892 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
893 return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
896 if (src_ty == Ity_I32 && dst_ty == Ity_V128) {
897 /* PCast the arg, then clone it 4 times. */
898 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
899 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
900 return assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
903 if (src_ty == Ity_I32 && dst_ty == Ity_V256) {
904 /* PCast the arg, then clone it 8 times. */
905 IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
906 tmp = assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
907 tmp = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp, tmp));
908 return assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256, tmp, tmp));
911 if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
912 /* PCast the arg. This gives all 0s or all 1s. Then throw away
913 the top half. */
914 IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
915 return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
918 if (src_ty == Ity_V128 && dst_ty == Ity_I64) {
919 /* Use InterleaveHI64x2 to copy the top half of the vector into
920 the bottom half. Then we can UifU it with the original, throw
921 away the upper half of the result, and PCast-I64-to-I64
922 the lower half. */
923 // Generates vbits[127:64] : vbits[127:64]
924 IRAtom* hi64hi64
925 = assignNew('V', mce, Ity_V128,
926 binop(Iop_InterleaveHI64x2, vbits, vbits));
927 // Generates
928 // UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0])
929 // == vbits[127:64] : UifU(vbits[127:64],vbits[63:0])
930 IRAtom* lohi64
931 = mkUifUV128(mce, hi64hi64, vbits);
932 // Generates UifU(vbits[127:64],vbits[63:0])
933 IRAtom* lo64
934 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, lohi64));
935 // Generates
936 // PCast-to-I64( UifU(vbits[127:64], vbits[63:0] )
937 // == PCast-to-I64( vbits[127:0] )
938 IRAtom* res
939 = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, lo64));
940 return res;
943 /* Else do it the slow way .. */
944 /* First of all, collapse vbits down to a single bit. */
945 tmp1 = NULL;
946 switch (src_ty) {
947 case Ity_I1:
948 tmp1 = vbits;
949 break;
950 case Ity_I8:
951 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
952 break;
953 case Ity_I16:
954 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
955 break;
956 case Ity_I32:
957 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
958 break;
959 case Ity_I64:
960 tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
961 break;
962 case Ity_I128: {
963 /* Gah. Chop it in half, OR the halves together, and compare
964 that with zero. */
965 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
966 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
967 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
968 tmp1 = assignNew('V', mce, Ity_I1,
969 unop(Iop_CmpNEZ64, tmp4));
970 break;
972 case Ity_V128: {
973 /* Chop it in half, OR the halves together, and compare that
974 * with zero.
976 IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vbits));
977 IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vbits));
978 IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
979 tmp1 = assignNew('V', mce, Ity_I1,
980 unop(Iop_CmpNEZ64, tmp4));
981 break;
983 default:
984 ppIRType(src_ty);
985 VG_(tool_panic)("mkPCastTo(1)");
987 tl_assert(tmp1);
988 /* Now widen up to the dst type. */
989 switch (dst_ty) {
990 case Ity_I1:
991 return tmp1;
992 case Ity_I8:
993 return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
994 case Ity_I16:
995 return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
996 case Ity_I32:
997 return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
998 case Ity_I64:
999 return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1000 case Ity_V128:
1001 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1002 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
1003 return tmp1;
1004 case Ity_I128:
1005 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1006 tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
1007 return tmp1;
1008 case Ity_V256:
1009 tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1010 tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128,
1011 tmp1, tmp1));
1012 tmp1 = assignNew('V', mce, Ity_V256, binop(Iop_V128HLtoV256,
1013 tmp1, tmp1));
1014 return tmp1;
1015 default:
1016 ppIRType(dst_ty);
1017 VG_(tool_panic)("mkPCastTo(2)");
1021 /* This is a minor variant. It takes an arg of some type and returns
1022 a value of the same type. The result consists entirely of Defined
1023 (zero) bits except its least significant bit, which is a PCast of
1024 the entire argument down to a single bit. */
1025 static IRAtom* mkPCastXXtoXXlsb ( MCEnv* mce, IRAtom* varg, IRType ty )
1027 if (ty == Ity_V128) {
1028 /* --- Case for V128 --- */
1029 IRAtom* varg128 = varg;
1030 // generates: PCast-to-I64(varg128)
1031 IRAtom* pcdTo64 = mkPCastTo(mce, Ity_I64, varg128);
1032 // Now introduce zeros (defined bits) in the top 63 places
1033 // generates: Def--(63)--Def PCast-to-I1(varg128)
1034 IRAtom* d63pc
1035 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcdTo64, mkU64(1)));
1036 // generates: Def--(64)--Def
1037 IRAtom* d64
1038 = definedOfType(Ity_I64);
1039 // generates: Def--(127)--Def PCast-to-I1(varg128)
1040 IRAtom* res
1041 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, d64, d63pc));
1042 return res;
1044 if (ty == Ity_I64) {
1045 /* --- Case for I64 --- */
1046 // PCast to 64
1047 IRAtom* pcd = mkPCastTo(mce, Ity_I64, varg);
1048 // Zero (Def) out the top 63 bits
1049 IRAtom* res
1050 = assignNew('V', mce, Ity_I64, binop(Iop_And64, pcd, mkU64(1)));
1051 return res;
1053 /*NOTREACHED*/
1054 tl_assert(0);
1057 /* --------- Optimistic casts. --------- */
1059 /* The function takes and returns an expression of type TY. If any of the
1060 VBITS indicate defined (value == 0) the resulting expression has all bits
1061 set to 0. Otherwise, all bits are 1. In words, if any bits are defined
1062 then all bits are made to be defined.
1064 In short we compute (vbits - (vbits >>u 1)) >>s (bitsize(vbits)-1).
1066 static IRAtom* mkOCastAt( MCEnv* mce, IRType ty, IRAtom* vbits )
1068 IROp opSUB, opSHR, opSAR;
1069 UInt sh;
1071 switch (ty) {
1072 case Ity_I64:
1073 opSUB = Iop_Sub64; opSHR = Iop_Shr64; opSAR = Iop_Sar64; sh = 63;
1074 break;
1075 case Ity_I32:
1076 opSUB = Iop_Sub32; opSHR = Iop_Shr32; opSAR = Iop_Sar32; sh = 31;
1077 break;
1078 case Ity_I16:
1079 opSUB = Iop_Sub16; opSHR = Iop_Shr16; opSAR = Iop_Sar16; sh = 15;
1080 break;
1081 case Ity_I8:
1082 opSUB = Iop_Sub8; opSHR = Iop_Shr8; opSAR = Iop_Sar8; sh = 7;
1083 break;
1084 default:
1085 ppIRType(ty);
1086 VG_(tool_panic)("mkOCastTo");
1089 IRAtom *shr1, *at;
1090 shr1 = assignNew('V', mce,ty, binop(opSHR, vbits, mkU8(1)));
1091 at = assignNew('V', mce,ty, binop(opSUB, vbits, shr1));
1092 at = assignNew('V', mce,ty, binop(opSAR, at, mkU8(sh)));
1093 return at;
1097 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
1099 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
1100 PCasting to Ity_U1. However, sometimes it is necessary to be more
1101 accurate. The insight is that the result is defined if two
1102 corresponding bits can be found, one from each argument, so that
1103 both bits are defined but are different -- that makes EQ say "No"
1104 and NE say "Yes". Hence, we compute an improvement term and DifD
1105 it onto the "normal" (UifU) result.
1107 The result is:
1109 PCastTo<1> (
1110 -- naive version
1111 UifU<sz>(vxx, vyy)
1113 `DifD<sz>`
1115 -- improvement term
1116 OCast<sz>(vec)
1119 where
1120 vec contains 0 (defined) bits where the corresponding arg bits
1121 are defined but different, and 1 bits otherwise.
1123 vec = Or<sz>( vxx, // 0 iff bit defined
1124 vyy, // 0 iff bit defined
1125 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
1128 If any bit of vec is 0, the result is defined and so the
1129 improvement term should produce 0...0, else it should produce
1130 1...1.
1132 Hence require for the improvement term:
1134 OCast(vec) = if vec == 1...1 then 1...1 else 0...0
1136 which you can think of as an "optimistic cast" (OCast, the opposite of
1137 the normal "pessimistic cast" (PCast) family. An OCast says all bits
1138 are defined if any bit is defined.
1140 It is possible to show that
1142 if vec == 1...1 then 1...1 else 0...0
1144 can be implemented in straight-line code as
1146 (vec - (vec >>u 1)) >>s (word-size-in-bits - 1)
1148 We note that vec contains the sub-term Or<sz>(vxx, vyy). Since UifU is
1149 implemented with Or (since 1 signifies undefinedness), this is a
1150 duplicate of the UifU<sz>(vxx, vyy) term and so we can CSE it out, giving
1151 a final version of:
1153 let naive = UifU<sz>(vxx, vyy)
1154 vec = Or<sz>(naive, Not<sz>(Xor<sz)(xx, yy))
1156 PCastTo<1>( DifD<sz>(naive, OCast<sz>(vec)) )
1158 This was extensively re-analysed and checked on 6 July 05 and again
1159 in July 2017.
1161 static IRAtom* expensiveCmpEQorNE ( MCEnv* mce,
1162 IRType ty,
1163 IRAtom* vxx, IRAtom* vyy,
1164 IRAtom* xx, IRAtom* yy )
1166 IRAtom *naive, *vec, *improved, *final_cast;
1167 IROp opDIFD, opUIFU, opOR, opXOR, opNOT;
1169 tl_assert(isShadowAtom(mce,vxx));
1170 tl_assert(isShadowAtom(mce,vyy));
1171 tl_assert(isOriginalAtom(mce,xx));
1172 tl_assert(isOriginalAtom(mce,yy));
1173 tl_assert(sameKindedAtoms(vxx,xx));
1174 tl_assert(sameKindedAtoms(vyy,yy));
1176 switch (ty) {
1177 case Ity_I8:
1178 opDIFD = Iop_And8;
1179 opUIFU = Iop_Or8;
1180 opOR = Iop_Or8;
1181 opXOR = Iop_Xor8;
1182 opNOT = Iop_Not8;
1183 break;
1184 case Ity_I16:
1185 opDIFD = Iop_And16;
1186 opUIFU = Iop_Or16;
1187 opOR = Iop_Or16;
1188 opXOR = Iop_Xor16;
1189 opNOT = Iop_Not16;
1190 break;
1191 case Ity_I32:
1192 opDIFD = Iop_And32;
1193 opUIFU = Iop_Or32;
1194 opOR = Iop_Or32;
1195 opXOR = Iop_Xor32;
1196 opNOT = Iop_Not32;
1197 break;
1198 case Ity_I64:
1199 opDIFD = Iop_And64;
1200 opUIFU = Iop_Or64;
1201 opOR = Iop_Or64;
1202 opXOR = Iop_Xor64;
1203 opNOT = Iop_Not64;
1204 break;
1205 default:
1206 VG_(tool_panic)("expensiveCmpEQorNE");
1209 naive
1210 = assignNew('V', mce, ty, binop(opUIFU, vxx, vyy));
1212 vec
1213 = assignNew(
1214 'V', mce,ty,
1215 binop( opOR,
1216 naive,
1217 assignNew(
1218 'V', mce,ty,
1219 unop(opNOT,
1220 assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
1222 improved
1223 = assignNew( 'V', mce,ty,
1224 binop(opDIFD, naive, mkOCastAt(mce, ty, vec)));
1226 final_cast
1227 = mkPCastTo( mce, Ity_I1, improved );
1229 return final_cast;
1233 /* --------- Semi-accurate interpretation of CmpORD. --------- */
1235 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
1237 CmpORD32S(x,y) = 1<<3 if x <s y
1238 = 1<<2 if x >s y
1239 = 1<<1 if x == y
1241 and similarly the unsigned variant. The default interpretation is:
1243 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
1244 & (7<<1)
1246 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
1247 are zero and therefore defined (viz, zero).
1249 Also deal with a special case better:
1251 CmpORD32S(x,0)
1253 Here, bit 3 (LT) of the result is a copy of the top bit of x and
1254 will be defined even if the rest of x isn't. In which case we do:
1256 CmpORD32S#(x,x#,0,{impliedly 0}#)
1257 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
1258 | (x# >>u 31) << 3 -- LT# = x#[31]
1260 Analogous handling for CmpORD64{S,U}.
1262 static Bool isZeroU32 ( IRAtom* e )
1264 return
1265 toBool( e->tag == Iex_Const
1266 && e->Iex.Const.con->tag == Ico_U32
1267 && e->Iex.Const.con->Ico.U32 == 0 );
1270 static Bool isZeroU64 ( IRAtom* e )
1272 return
1273 toBool( e->tag == Iex_Const
1274 && e->Iex.Const.con->tag == Ico_U64
1275 && e->Iex.Const.con->Ico.U64 == 0 );
1278 static IRAtom* doCmpORD ( MCEnv* mce,
1279 IROp cmp_op,
1280 IRAtom* xxhash, IRAtom* yyhash,
1281 IRAtom* xx, IRAtom* yy )
1283 Bool m64 = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1284 Bool syned = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1285 IROp opOR = m64 ? Iop_Or64 : Iop_Or32;
1286 IROp opAND = m64 ? Iop_And64 : Iop_And32;
1287 IROp opSHL = m64 ? Iop_Shl64 : Iop_Shl32;
1288 IROp opSHR = m64 ? Iop_Shr64 : Iop_Shr32;
1289 IRType ty = m64 ? Ity_I64 : Ity_I32;
1290 Int width = m64 ? 64 : 32;
1292 Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1294 IRAtom* threeLeft1 = NULL;
1295 IRAtom* sevenLeft1 = NULL;
1297 tl_assert(isShadowAtom(mce,xxhash));
1298 tl_assert(isShadowAtom(mce,yyhash));
1299 tl_assert(isOriginalAtom(mce,xx));
1300 tl_assert(isOriginalAtom(mce,yy));
1301 tl_assert(sameKindedAtoms(xxhash,xx));
1302 tl_assert(sameKindedAtoms(yyhash,yy));
1303 tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1304 || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
1306 if (0) {
1307 ppIROp(cmp_op); VG_(printf)(" ");
1308 ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1311 if (syned && isZero(yy)) {
1312 /* fancy interpretation */
1313 /* if yy is zero, then it must be fully defined (zero#). */
1314 tl_assert(isZero(yyhash));
1315 threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
1316 return
1317 binop(
1318 opOR,
1319 assignNew(
1320 'V', mce,ty,
1321 binop(
1322 opAND,
1323 mkPCastTo(mce,ty, xxhash),
1324 threeLeft1
1326 assignNew(
1327 'V', mce,ty,
1328 binop(
1329 opSHL,
1330 assignNew(
1331 'V', mce,ty,
1332 binop(opSHR, xxhash, mkU8(width-1))),
1333 mkU8(3)
1336 } else {
1337 /* standard interpretation */
1338 sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
1339 return
1340 binop(
1341 opAND,
1342 mkPCastTo( mce,ty,
1343 mkUifU(mce,ty, xxhash,yyhash)),
1344 sevenLeft1
1350 /*------------------------------------------------------------*/
1351 /*--- Emit a test and complaint if something is undefined. ---*/
1352 /*------------------------------------------------------------*/
1354 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1357 /* Set the annotations on a dirty helper to indicate that the stack
1358 pointer and instruction pointers might be read. This is the
1359 behaviour of all 'emit-a-complaint' style functions we might
1360 call. */
1362 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1363 di->nFxState = 2;
1364 di->fxState[0].fx = Ifx_Read;
1365 di->fxState[0].offset = mce->layout->offset_SP;
1366 di->fxState[0].size = mce->layout->sizeof_SP;
1367 di->fxState[0].nRepeats = 0;
1368 di->fxState[0].repeatLen = 0;
1369 di->fxState[1].fx = Ifx_Read;
1370 di->fxState[1].offset = mce->layout->offset_IP;
1371 di->fxState[1].size = mce->layout->sizeof_IP;
1372 di->fxState[1].nRepeats = 0;
1373 di->fxState[1].repeatLen = 0;
1377 /* Check the supplied *original* |atom| for undefinedness, and emit a
1378 complaint if so. Once that happens, mark it as defined. This is
1379 possible because the atom is either a tmp or literal. If it's a
1380 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1381 be defined. In fact as mentioned above, we will have to allocate a
1382 new tmp to carry the new 'defined' shadow value, and update the
1383 original->tmp mapping accordingly; we cannot simply assign a new
1384 value to an existing shadow tmp as this breaks SSAness.
1386 The checks are performed, any resulting complaint emitted, and
1387 |atom|'s shadow temp set to 'defined', ONLY in the case that
1388 |guard| evaluates to True at run-time. If it evaluates to False
1389 then no action is performed. If |guard| is NULL (the usual case)
1390 then it is assumed to be always-true, and hence these actions are
1391 performed unconditionally.
1393 This routine does not generate code to check the definedness of
1394 |guard|. The caller is assumed to have taken care of that already.
1396 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
1398 IRAtom* vatom;
1399 IRType ty;
1400 Int sz;
1401 IRDirty* di;
1402 IRAtom* cond;
1403 IRAtom* origin;
1404 void* fn;
1405 const HChar* nm;
1406 IRExpr** args;
1407 Int nargs;
1409 // Don't do V bit tests if we're not reporting undefined value errors.
1410 if (MC_(clo_mc_level) == 1)
1411 return;
1413 if (guard)
1414 tl_assert(isOriginalAtom(mce, guard));
1416 /* Since the original expression is atomic, there's no duplicated
1417 work generated by making multiple V-expressions for it. So we
1418 don't really care about the possibility that someone else may
1419 also create a V-interpretion for it. */
1420 tl_assert(isOriginalAtom(mce, atom));
1421 vatom = expr2vbits( mce, atom, HuOth );
1422 tl_assert(isShadowAtom(mce, vatom));
1423 tl_assert(sameKindedAtoms(atom, vatom));
1425 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1427 /* sz is only used for constructing the error message */
1428 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1430 cond = mkPCastTo( mce, Ity_I1, vatom );
1431 /* cond will be 0 if all defined, and 1 if any not defined. */
1433 /* Get the origin info for the value we are about to check. At
1434 least, if we are doing origin tracking. If not, use a dummy
1435 zero origin. */
1436 if (MC_(clo_mc_level) == 3) {
1437 origin = schemeE( mce, atom );
1438 if (mce->hWordTy == Ity_I64) {
1439 origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1441 } else {
1442 origin = NULL;
1445 fn = NULL;
1446 nm = NULL;
1447 args = NULL;
1448 nargs = -1;
1450 switch (sz) {
1451 case 0:
1452 if (origin) {
1453 fn = &MC_(helperc_value_check0_fail_w_o);
1454 nm = "MC_(helperc_value_check0_fail_w_o)";
1455 args = mkIRExprVec_1(origin);
1456 nargs = 1;
1457 } else {
1458 fn = &MC_(helperc_value_check0_fail_no_o);
1459 nm = "MC_(helperc_value_check0_fail_no_o)";
1460 args = mkIRExprVec_0();
1461 nargs = 0;
1463 break;
1464 case 1:
1465 if (origin) {
1466 fn = &MC_(helperc_value_check1_fail_w_o);
1467 nm = "MC_(helperc_value_check1_fail_w_o)";
1468 args = mkIRExprVec_1(origin);
1469 nargs = 1;
1470 } else {
1471 fn = &MC_(helperc_value_check1_fail_no_o);
1472 nm = "MC_(helperc_value_check1_fail_no_o)";
1473 args = mkIRExprVec_0();
1474 nargs = 0;
1476 break;
1477 case 4:
1478 if (origin) {
1479 fn = &MC_(helperc_value_check4_fail_w_o);
1480 nm = "MC_(helperc_value_check4_fail_w_o)";
1481 args = mkIRExprVec_1(origin);
1482 nargs = 1;
1483 } else {
1484 fn = &MC_(helperc_value_check4_fail_no_o);
1485 nm = "MC_(helperc_value_check4_fail_no_o)";
1486 args = mkIRExprVec_0();
1487 nargs = 0;
1489 break;
1490 case 8:
1491 if (origin) {
1492 fn = &MC_(helperc_value_check8_fail_w_o);
1493 nm = "MC_(helperc_value_check8_fail_w_o)";
1494 args = mkIRExprVec_1(origin);
1495 nargs = 1;
1496 } else {
1497 fn = &MC_(helperc_value_check8_fail_no_o);
1498 nm = "MC_(helperc_value_check8_fail_no_o)";
1499 args = mkIRExprVec_0();
1500 nargs = 0;
1502 break;
1503 case 2:
1504 case 16:
1505 if (origin) {
1506 fn = &MC_(helperc_value_checkN_fail_w_o);
1507 nm = "MC_(helperc_value_checkN_fail_w_o)";
1508 args = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1509 nargs = 2;
1510 } else {
1511 fn = &MC_(helperc_value_checkN_fail_no_o);
1512 nm = "MC_(helperc_value_checkN_fail_no_o)";
1513 args = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1514 nargs = 1;
1516 break;
1517 default:
1518 VG_(tool_panic)("unexpected szB");
1521 tl_assert(fn);
1522 tl_assert(nm);
1523 tl_assert(args);
1524 tl_assert(nargs >= 0 && nargs <= 2);
1525 tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1526 || (MC_(clo_mc_level) == 2 && origin == NULL) );
1528 di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1529 VG_(fnptr_to_fnentry)( fn ), args );
1530 di->guard = cond; // and cond is PCast-to-1(atom#)
1532 /* If the complaint is to be issued under a guard condition, AND
1533 that into the guard condition for the helper call. */
1534 if (guard) {
1535 IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1536 IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1537 IRAtom *e = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1538 di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1541 setHelperAnns( mce, di );
1542 stmt( 'V', mce, IRStmt_Dirty(di));
1544 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1545 defined -- but only in the case where the guard evaluates to
1546 True at run-time. Do the update by setting the orig->shadow
1547 mapping for tmp to reflect the fact that this shadow is getting
1548 a new value. */
1549 tl_assert(isIRAtom(vatom));
1550 /* sameKindedAtoms ... */
1551 if (vatom->tag == Iex_RdTmp) {
1552 tl_assert(atom->tag == Iex_RdTmp);
1553 if (guard == NULL) {
1554 // guard is 'always True', hence update unconditionally
1555 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1556 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1557 definedOfType(ty));
1558 } else {
1559 // update the temp only conditionally. Do this by copying
1560 // its old value when the guard is False.
1561 // The old value ..
1562 IRTemp old_tmpV = findShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1563 newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1564 IRAtom* new_tmpV
1565 = assignNew('V', mce, shadowTypeV(ty),
1566 IRExpr_ITE(guard, definedOfType(ty),
1567 mkexpr(old_tmpV)));
1568 assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp), new_tmpV);
1574 /*------------------------------------------------------------*/
1575 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1576 /*------------------------------------------------------------*/
1578 /* Examine the always-defined sections declared in layout to see if
1579 the (offset,size) section is within one. Note, is is an error to
1580 partially fall into such a region: (offset,size) should either be
1581 completely in such a region or completely not-in such a region.
1583 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1585 Int minoffD, maxoffD, i;
1586 Int minoff = offset;
1587 Int maxoff = minoff + size - 1;
1588 tl_assert((minoff & ~0xFFFF) == 0);
1589 tl_assert((maxoff & ~0xFFFF) == 0);
1591 for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1592 minoffD = mce->layout->alwaysDefd[i].offset;
1593 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1594 tl_assert((minoffD & ~0xFFFF) == 0);
1595 tl_assert((maxoffD & ~0xFFFF) == 0);
1597 if (maxoff < minoffD || maxoffD < minoff)
1598 continue; /* no overlap */
1599 if (minoff >= minoffD && maxoff <= maxoffD)
1600 return True; /* completely contained in an always-defd section */
1602 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1604 return False; /* could not find any containing section */
1608 /* Generate into bb suitable actions to shadow this Put. If the state
1609 slice is marked 'always defined', do nothing. Otherwise, write the
1610 supplied V bits to the shadow state. We can pass in either an
1611 original atom or a V-atom, but not both. In the former case the
1612 relevant V-bits are then generated from the original.
1613 We assume here, that the definedness of GUARD has already been checked.
1615 static
1616 void do_shadow_PUT ( MCEnv* mce, Int offset,
1617 IRAtom* atom, IRAtom* vatom, IRExpr *guard )
1619 IRType ty;
1621 // Don't do shadow PUTs if we're not doing undefined value checking.
1622 // Their absence lets Vex's optimiser remove all the shadow computation
1623 // that they depend on, which includes GETs of the shadow registers.
1624 if (MC_(clo_mc_level) == 1)
1625 return;
1627 if (atom) {
1628 tl_assert(!vatom);
1629 tl_assert(isOriginalAtom(mce, atom));
1630 vatom = expr2vbits( mce, atom, HuOth );
1631 } else {
1632 tl_assert(vatom);
1633 tl_assert(isShadowAtom(mce, vatom));
1636 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1637 tl_assert(ty != Ity_I1);
1638 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1639 /* later: no ... */
1640 /* emit code to emit a complaint if any of the vbits are 1. */
1641 /* complainIfUndefined(mce, atom); */
1642 } else {
1643 /* Do a plain shadow Put. */
1644 if (guard) {
1645 /* If the guard expression evaluates to false we simply Put the value
1646 that is already stored in the guest state slot */
1647 IRAtom *cond, *iffalse;
1649 cond = assignNew('V', mce, Ity_I1, guard);
1650 iffalse = assignNew('V', mce, ty,
1651 IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1652 vatom = assignNew('V', mce, ty, IRExpr_ITE(cond, vatom, iffalse));
1654 stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
1659 /* Return an expression which contains the V bits corresponding to the
1660 given GETI (passed in in pieces).
1662 static
1663 void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
1665 IRAtom* vatom;
1666 IRType ty, tyS;
1667 Int arrSize;;
1668 IRRegArray* descr = puti->descr;
1669 IRAtom* ix = puti->ix;
1670 Int bias = puti->bias;
1671 IRAtom* atom = puti->data;
1673 // Don't do shadow PUTIs if we're not doing undefined value checking.
1674 // Their absence lets Vex's optimiser remove all the shadow computation
1675 // that they depend on, which includes GETIs of the shadow registers.
1676 if (MC_(clo_mc_level) == 1)
1677 return;
1679 tl_assert(isOriginalAtom(mce,atom));
1680 vatom = expr2vbits( mce, atom, HuOth );
1681 tl_assert(sameKindedAtoms(atom, vatom));
1682 ty = descr->elemTy;
1683 tyS = shadowTypeV(ty);
1684 arrSize = descr->nElems * sizeofIRType(ty);
1685 tl_assert(ty != Ity_I1);
1686 tl_assert(isOriginalAtom(mce,ix));
1687 complainIfUndefined(mce, ix, NULL);
1688 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1689 /* later: no ... */
1690 /* emit code to emit a complaint if any of the vbits are 1. */
1691 /* complainIfUndefined(mce, atom); */
1692 } else {
1693 /* Do a cloned version of the Put that refers to the shadow
1694 area. */
1695 IRRegArray* new_descr
1696 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1697 tyS, descr->nElems);
1698 stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
1703 /* Return an expression which contains the V bits corresponding to the
1704 given GET (passed in in pieces).
1706 static
1707 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1709 IRType tyS = shadowTypeV(ty);
1710 tl_assert(ty != Ity_I1);
1711 tl_assert(ty != Ity_I128);
1712 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1713 /* Always defined, return all zeroes of the relevant type */
1714 return definedOfType(tyS);
1715 } else {
1716 /* return a cloned version of the Get that refers to the shadow
1717 area. */
1718 /* FIXME: this isn't an atom! */
1719 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1724 /* Return an expression which contains the V bits corresponding to the
1725 given GETI (passed in in pieces).
1727 static
1728 IRExpr* shadow_GETI ( MCEnv* mce,
1729 IRRegArray* descr, IRAtom* ix, Int bias )
1731 IRType ty = descr->elemTy;
1732 IRType tyS = shadowTypeV(ty);
1733 Int arrSize = descr->nElems * sizeofIRType(ty);
1734 tl_assert(ty != Ity_I1);
1735 tl_assert(isOriginalAtom(mce,ix));
1736 complainIfUndefined(mce, ix, NULL);
1737 if (isAlwaysDefd(mce, descr->base, arrSize)) {
1738 /* Always defined, return all zeroes of the relevant type */
1739 return definedOfType(tyS);
1740 } else {
1741 /* return a cloned version of the Get that refers to the shadow
1742 area. */
1743 IRRegArray* new_descr
1744 = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1745 tyS, descr->nElems);
1746 return IRExpr_GetI( new_descr, ix, bias );
1751 /*------------------------------------------------------------*/
1752 /*--- Generating approximations for unknown operations, ---*/
1753 /*--- using lazy-propagate semantics ---*/
1754 /*------------------------------------------------------------*/
1756 /* Lazy propagation of undefinedness from two values, resulting in the
1757 specified shadow type.
1759 static
1760 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1762 IRAtom* at;
1763 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1764 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1765 tl_assert(isShadowAtom(mce,va1));
1766 tl_assert(isShadowAtom(mce,va2));
1768 /* The general case is inefficient because PCast is an expensive
1769 operation. Here are some special cases which use PCast only
1770 once rather than twice. */
1772 /* I64 x I64 -> I64 */
1773 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1774 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1775 at = mkUifU(mce, Ity_I64, va1, va2);
1776 at = mkPCastTo(mce, Ity_I64, at);
1777 return at;
1780 /* I64 x I64 -> I32 */
1781 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1782 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1783 at = mkUifU(mce, Ity_I64, va1, va2);
1784 at = mkPCastTo(mce, Ity_I32, at);
1785 return at;
1788 /* I32 x I32 -> I32 */
1789 if (t1 == Ity_I32 && t2 == Ity_I32 && finalVty == Ity_I32) {
1790 if (0) VG_(printf)("mkLazy2: I32 x I32 -> I32\n");
1791 at = mkUifU(mce, Ity_I32, va1, va2);
1792 at = mkPCastTo(mce, Ity_I32, at);
1793 return at;
1796 if (0) {
1797 VG_(printf)("mkLazy2 ");
1798 ppIRType(t1);
1799 VG_(printf)("_");
1800 ppIRType(t2);
1801 VG_(printf)("_");
1802 ppIRType(finalVty);
1803 VG_(printf)("\n");
1806 /* General case: force everything via 32-bit intermediaries. */
1807 at = mkPCastTo(mce, Ity_I32, va1);
1808 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1809 at = mkPCastTo(mce, finalVty, at);
1810 return at;
1814 /* 3-arg version of the above. */
1815 static
1816 IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1817 IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1819 IRAtom* at;
1820 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1821 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1822 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1823 tl_assert(isShadowAtom(mce,va1));
1824 tl_assert(isShadowAtom(mce,va2));
1825 tl_assert(isShadowAtom(mce,va3));
1827 /* The general case is inefficient because PCast is an expensive
1828 operation. Here are some special cases which use PCast only
1829 twice rather than three times. */
1831 /* I32 x I64 x I64 -> I64 */
1832 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1833 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1834 && finalVty == Ity_I64) {
1835 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1836 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1837 mode indication which is fully defined, this should get
1838 folded out later. */
1839 at = mkPCastTo(mce, Ity_I64, va1);
1840 /* Now fold in 2nd and 3rd args. */
1841 at = mkUifU(mce, Ity_I64, at, va2);
1842 at = mkUifU(mce, Ity_I64, at, va3);
1843 /* and PCast once again. */
1844 at = mkPCastTo(mce, Ity_I64, at);
1845 return at;
1848 /* I32 x I8 x I64 -> I64 */
1849 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I64
1850 && finalVty == Ity_I64) {
1851 if (0) VG_(printf)("mkLazy3: I32 x I8 x I64 -> I64\n");
1852 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1853 * rounding mode indication which is fully defined, this should
1854 * get folded out later.
1856 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1857 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1858 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1859 at = mkUifU(mce, Ity_I64, at, va3);
1860 /* and PCast once again. */
1861 at = mkPCastTo(mce, Ity_I64, at);
1862 return at;
1865 /* I32 x I64 x I64 -> I32 */
1866 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1867 && finalVty == Ity_I32) {
1868 if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1869 at = mkPCastTo(mce, Ity_I64, va1);
1870 at = mkUifU(mce, Ity_I64, at, va2);
1871 at = mkUifU(mce, Ity_I64, at, va3);
1872 at = mkPCastTo(mce, Ity_I32, at);
1873 return at;
1876 /* I32 x I32 x I32 -> I32 */
1877 /* 32-bit FP idiom, as (eg) happens on ARM */
1878 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1879 && finalVty == Ity_I32) {
1880 if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1881 at = va1;
1882 at = mkUifU(mce, Ity_I32, at, va2);
1883 at = mkUifU(mce, Ity_I32, at, va3);
1884 at = mkPCastTo(mce, Ity_I32, at);
1885 return at;
1888 /* I32 x I128 x I128 -> I128 */
1889 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1890 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1891 && finalVty == Ity_I128) {
1892 if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1893 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1894 mode indication which is fully defined, this should get
1895 folded out later. */
1896 at = mkPCastTo(mce, Ity_I128, va1);
1897 /* Now fold in 2nd and 3rd args. */
1898 at = mkUifU(mce, Ity_I128, at, va2);
1899 at = mkUifU(mce, Ity_I128, at, va3);
1900 /* and PCast once again. */
1901 at = mkPCastTo(mce, Ity_I128, at);
1902 return at;
1905 /* I32 x I8 x I128 -> I128 */
1906 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1907 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I128
1908 && finalVty == Ity_I128) {
1909 if (0) VG_(printf)("mkLazy3: I32 x I8 x I128 -> I128\n");
1910 /* Use I64 as an intermediate type, which means PCasting all 3
1911 args to I64 to start with. 1st arg is typically a rounding
1912 mode indication which is fully defined, so we hope that it
1913 will get folded out later. */
1914 IRAtom* at1 = mkPCastTo(mce, Ity_I64, va1);
1915 IRAtom* at2 = mkPCastTo(mce, Ity_I64, va2);
1916 IRAtom* at3 = mkPCastTo(mce, Ity_I64, va3);
1917 /* Now UifU all three together. */
1918 at = mkUifU(mce, Ity_I64, at1, at2); // UifU(PCast(va1), PCast(va2))
1919 at = mkUifU(mce, Ity_I64, at, at3); // ... `UifU` PCast(va3)
1920 /* and PCast once again. */
1921 at = mkPCastTo(mce, Ity_I128, at);
1922 return at;
1924 if (1) {
1925 VG_(printf)("mkLazy3: ");
1926 ppIRType(t1);
1927 VG_(printf)(" x ");
1928 ppIRType(t2);
1929 VG_(printf)(" x ");
1930 ppIRType(t3);
1931 VG_(printf)(" -> ");
1932 ppIRType(finalVty);
1933 VG_(printf)("\n");
1936 tl_assert(0);
1937 /* General case: force everything via 32-bit intermediaries. */
1939 at = mkPCastTo(mce, Ity_I32, va1);
1940 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1941 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1942 at = mkPCastTo(mce, finalVty, at);
1943 return at;
1948 /* 4-arg version of the above. */
1949 static
1950 IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1951 IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1953 IRAtom* at;
1954 IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1955 IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1956 IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1957 IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1958 tl_assert(isShadowAtom(mce,va1));
1959 tl_assert(isShadowAtom(mce,va2));
1960 tl_assert(isShadowAtom(mce,va3));
1961 tl_assert(isShadowAtom(mce,va4));
1963 /* The general case is inefficient because PCast is an expensive
1964 operation. Here are some special cases which use PCast only
1965 twice rather than three times. */
1967 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1969 if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128 && t4 == Ity_I128
1970 && finalVty == Ity_I128) {
1971 if (0) VG_(printf)("mkLazy4: I32 x I128 x I128 x I128 -> I128\n");
1972 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1973 mode indication which is fully defined, this should get
1974 folded out later. */
1975 at = mkPCastTo(mce, Ity_I128, va1);
1976 /* Now fold in 2nd, 3rd, 4th args. */
1977 at = mkUifU(mce, Ity_I128, at, va2);
1978 at = mkUifU(mce, Ity_I128, at, va3);
1979 at = mkUifU(mce, Ity_I128, at, va4);
1980 /* and PCast once again. */
1981 at = mkPCastTo(mce, Ity_I128, at);
1982 return at;
1985 /* I32 x I64 x I64 x I64 -> I64 */
1986 if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1987 && finalVty == Ity_I64) {
1988 if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1989 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1990 mode indication which is fully defined, this should get
1991 folded out later. */
1992 at = mkPCastTo(mce, Ity_I64, va1);
1993 /* Now fold in 2nd, 3rd, 4th args. */
1994 at = mkUifU(mce, Ity_I64, at, va2);
1995 at = mkUifU(mce, Ity_I64, at, va3);
1996 at = mkUifU(mce, Ity_I64, at, va4);
1997 /* and PCast once again. */
1998 at = mkPCastTo(mce, Ity_I64, at);
1999 return at;
2001 /* I32 x I32 x I32 x I32 -> I32 */
2002 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
2003 if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
2004 && finalVty == Ity_I32) {
2005 if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
2006 at = va1;
2007 /* Now fold in 2nd, 3rd, 4th args. */
2008 at = mkUifU(mce, Ity_I32, at, va2);
2009 at = mkUifU(mce, Ity_I32, at, va3);
2010 at = mkUifU(mce, Ity_I32, at, va4);
2011 at = mkPCastTo(mce, Ity_I32, at);
2012 return at;
2015 if (t1 == Ity_I32 && t2 == Ity_I8 && t3 == Ity_I8 && t4 == Ity_I8
2016 && finalVty == Ity_I32) {
2017 if (0) VG_(printf)("mkLazy4: I32 x I8 x I8 x I8 -> I32\n");
2018 at = mkPCastTo(mce, Ity_I8, va1);
2019 /* Now fold in 2nd, 3rd, 4th args. */
2020 at = mkUifU(mce, Ity_I8, at, va2);
2021 at = mkUifU(mce, Ity_I8, at, va3);
2022 at = mkUifU(mce, Ity_I8, at, va4);
2023 at = mkPCastTo(mce, Ity_I32, at);
2024 return at;
2027 if (t1 == Ity_I64 && t2 == Ity_I8 && t3 == Ity_I8 && t4 == Ity_I8
2028 && finalVty == Ity_I64) {
2029 if (0) VG_(printf)("mkLazy4: I64 x I8 x I8 x I8 -> I64\n");
2030 at = mkPCastTo(mce, Ity_I8, va1);
2031 /* Now fold in 2nd, 3rd, 4th args. */
2032 at = mkUifU(mce, Ity_I8, at, va2);
2033 at = mkUifU(mce, Ity_I8, at, va3);
2034 at = mkUifU(mce, Ity_I8, at, va4);
2035 at = mkPCastTo(mce, Ity_I64, at);
2036 return at;
2039 if (1) {
2040 VG_(printf)("mkLazy4: ");
2041 ppIRType(t1);
2042 VG_(printf)(" x ");
2043 ppIRType(t2);
2044 VG_(printf)(" x ");
2045 ppIRType(t3);
2046 VG_(printf)(" x ");
2047 ppIRType(t4);
2048 VG_(printf)(" -> ");
2049 ppIRType(finalVty);
2050 VG_(printf)("\n");
2053 tl_assert(0);
2057 /* Do the lazy propagation game from a null-terminated vector of
2058 atoms. This is presumably the arguments to a helper call, so the
2059 IRCallee info is also supplied in order that we can know which
2060 arguments should be ignored (via the .mcx_mask field).
2062 static
2063 IRAtom* mkLazyN ( MCEnv* mce,
2064 IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
2066 Int i;
2067 IRAtom* here;
2068 IRAtom* curr;
2069 IRType mergeTy;
2070 Bool mergeTy64 = True;
2072 /* Decide on the type of the merge intermediary. If all relevant
2073 args are I64, then it's I64. In all other circumstances, use
2074 I32. */
2075 for (i = 0; exprvec[i]; i++) {
2076 tl_assert(i < 32);
2077 tl_assert(isOriginalAtom(mce, exprvec[i]));
2078 if (cee->mcx_mask & (1<<i))
2079 continue;
2080 if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
2081 mergeTy64 = False;
2084 mergeTy = mergeTy64 ? Ity_I64 : Ity_I32;
2085 curr = definedOfType(mergeTy);
2087 for (i = 0; exprvec[i]; i++) {
2088 tl_assert(i < 32);
2089 tl_assert(isOriginalAtom(mce, exprvec[i]));
2090 /* Only take notice of this arg if the callee's mc-exclusion
2091 mask does not say it is to be excluded. */
2092 if (cee->mcx_mask & (1<<i)) {
2093 /* the arg is to be excluded from definedness checking. Do
2094 nothing. */
2095 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
2096 } else {
2097 /* calculate the arg's definedness, and pessimistically merge
2098 it in. */
2099 here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i], HuOth) );
2100 curr = mergeTy64
2101 ? mkUifU64(mce, here, curr)
2102 : mkUifU32(mce, here, curr);
2105 return mkPCastTo(mce, finalVtype, curr );
2109 /*------------------------------------------------------------*/
2110 /*--- Generating expensive sequences for exact carry-chain ---*/
2111 /*--- propagation in add/sub and related operations. ---*/
2112 /*------------------------------------------------------------*/
2114 static
2115 IRAtom* expensiveAddSub ( MCEnv* mce,
2116 Bool add,
2117 IRType ty,
2118 IRAtom* qaa, IRAtom* qbb,
2119 IRAtom* aa, IRAtom* bb )
2121 IRAtom *a_min, *b_min, *a_max, *b_max;
2122 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB;
2124 tl_assert(isShadowAtom(mce,qaa));
2125 tl_assert(isShadowAtom(mce,qbb));
2126 tl_assert(isOriginalAtom(mce,aa));
2127 tl_assert(isOriginalAtom(mce,bb));
2128 tl_assert(sameKindedAtoms(qaa,aa));
2129 tl_assert(sameKindedAtoms(qbb,bb));
2131 switch (ty) {
2132 case Ity_I32:
2133 opAND = Iop_And32;
2134 opOR = Iop_Or32;
2135 opXOR = Iop_Xor32;
2136 opNOT = Iop_Not32;
2137 opADD = Iop_Add32;
2138 opSUB = Iop_Sub32;
2139 break;
2140 case Ity_I64:
2141 opAND = Iop_And64;
2142 opOR = Iop_Or64;
2143 opXOR = Iop_Xor64;
2144 opNOT = Iop_Not64;
2145 opADD = Iop_Add64;
2146 opSUB = Iop_Sub64;
2147 break;
2148 default:
2149 VG_(tool_panic)("expensiveAddSub");
2152 // a_min = aa & ~qaa
2153 a_min = assignNew('V', mce,ty,
2154 binop(opAND, aa,
2155 assignNew('V', mce,ty, unop(opNOT, qaa))));
2157 // b_min = bb & ~qbb
2158 b_min = assignNew('V', mce,ty,
2159 binop(opAND, bb,
2160 assignNew('V', mce,ty, unop(opNOT, qbb))));
2162 // a_max = aa | qaa
2163 a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
2165 // b_max = bb | qbb
2166 b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
2168 if (add) {
2169 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
2170 return
2171 assignNew('V', mce,ty,
2172 binop( opOR,
2173 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
2174 assignNew('V', mce,ty,
2175 binop( opXOR,
2176 assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
2177 assignNew('V', mce,ty, binop(opADD, a_max, b_max))
2182 } else {
2183 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max - b_min))
2184 return
2185 assignNew('V', mce,ty,
2186 binop( opOR,
2187 assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
2188 assignNew('V', mce,ty,
2189 binop( opXOR,
2190 assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
2191 assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
2201 static
2202 IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
2203 IRAtom* atom, IRAtom* vatom )
2205 IRType ty;
2206 IROp xorOp, subOp, andOp;
2207 IRExpr *one;
2208 IRAtom *improver, *improved;
2209 tl_assert(isShadowAtom(mce,vatom));
2210 tl_assert(isOriginalAtom(mce,atom));
2211 tl_assert(sameKindedAtoms(atom,vatom));
2213 switch (czop) {
2214 case Iop_Ctz32:
2215 ty = Ity_I32;
2216 xorOp = Iop_Xor32;
2217 subOp = Iop_Sub32;
2218 andOp = Iop_And32;
2219 one = mkU32(1);
2220 break;
2221 case Iop_Ctz64:
2222 ty = Ity_I64;
2223 xorOp = Iop_Xor64;
2224 subOp = Iop_Sub64;
2225 andOp = Iop_And64;
2226 one = mkU64(1);
2227 break;
2228 default:
2229 ppIROp(czop);
2230 VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
2233 // improver = atom ^ (atom - 1)
2235 // That is, improver has its low ctz(atom) bits equal to one;
2236 // higher bits (if any) equal to zero.
2237 improver = assignNew('V', mce,ty,
2238 binop(xorOp,
2239 atom,
2240 assignNew('V', mce, ty,
2241 binop(subOp, atom, one))));
2243 // improved = vatom & improver
2245 // That is, treat any V bits above the first ctz(atom) bits as
2246 // "defined".
2247 improved = assignNew('V', mce, ty,
2248 binop(andOp, vatom, improver));
2250 // Return pessimizing cast of improved.
2251 return mkPCastTo(mce, ty, improved);
2255 /*------------------------------------------------------------*/
2256 /*--- Scalar shifts. ---*/
2257 /*------------------------------------------------------------*/
2259 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
2260 idea is to shift the definedness bits by the original shift amount.
2261 This introduces 0s ("defined") in new positions for left shifts and
2262 unsigned right shifts, and copies the top definedness bit for
2263 signed right shifts. So, conveniently, applying the original shift
2264 operator to the definedness bits for the left arg is exactly the
2265 right thing to do:
2267 (qaa << bb)
2269 However if the shift amount is undefined then the whole result
2270 is undefined. Hence need:
2272 (qaa << bb) `UifU` PCast(qbb)
2274 If the shift amount bb is a literal than qbb will say 'all defined'
2275 and the UifU and PCast will get folded out by post-instrumentation
2276 optimisation.
2278 static IRAtom* scalarShift ( MCEnv* mce,
2279 IRType ty,
2280 IROp original_op,
2281 IRAtom* qaa, IRAtom* qbb,
2282 IRAtom* aa, IRAtom* bb )
2284 tl_assert(isShadowAtom(mce,qaa));
2285 tl_assert(isShadowAtom(mce,qbb));
2286 tl_assert(isOriginalAtom(mce,aa));
2287 tl_assert(isOriginalAtom(mce,bb));
2288 tl_assert(sameKindedAtoms(qaa,aa));
2289 tl_assert(sameKindedAtoms(qbb,bb));
2290 return
2291 assignNew(
2292 'V', mce, ty,
2293 mkUifU( mce, ty,
2294 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
2295 mkPCastTo(mce, ty, qbb)
2301 /*------------------------------------------------------------*/
2302 /*--- Helpers for dealing with vector primops. ---*/
2303 /*------------------------------------------------------------*/
2305 /* Vector pessimisation -- pessimise within each lane individually. */
2307 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
2309 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
2312 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
2314 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
2317 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
2319 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
2322 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
2324 return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
2327 static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
2329 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
2332 static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
2334 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
2337 static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
2339 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
2342 static IRAtom* mkPCast16x16 ( MCEnv* mce, IRAtom* at )
2344 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ16x16, at));
2347 static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
2349 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
2352 static IRAtom* mkPCast8x32 ( MCEnv* mce, IRAtom* at )
2354 return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ8x32, at));
2357 static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
2359 return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
2362 static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
2364 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
2367 static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
2369 return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
2373 /* Here's a simple scheme capable of handling ops derived from SSE1
2374 code and while only generating ops that can be efficiently
2375 implemented in SSE1. */
2377 /* All-lanes versions are straightforward:
2379 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
2381 unary32Fx4(x,y) ==> PCast32x4(x#)
2383 Lowest-lane-only versions are more complex:
2385 binary32F0x4(x,y) ==> SetV128lo32(
2386 x#,
2387 PCast32(V128to32(UifUV128(x#,y#)))
2390 This is perhaps not so obvious. In particular, it's faster to
2391 do a V128-bit UifU and then take the bottom 32 bits than the more
2392 obvious scheme of taking the bottom 32 bits of each operand
2393 and doing a 32-bit UifU. Basically since UifU is fast and
2394 chopping lanes off vector values is slow.
2396 Finally:
2398 unary32F0x4(x) ==> SetV128lo32(
2399 x#,
2400 PCast32(V128to32(x#))
2403 Where:
2405 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2406 PCast32x4(v#) = CmpNEZ32x4(v#)
2409 static
2410 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2412 IRAtom* at;
2413 tl_assert(isShadowAtom(mce, vatomX));
2414 tl_assert(isShadowAtom(mce, vatomY));
2415 at = mkUifUV128(mce, vatomX, vatomY);
2416 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
2417 return at;
2420 static
2421 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2423 IRAtom* at;
2424 tl_assert(isShadowAtom(mce, vatomX));
2425 at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
2426 return at;
2429 static
2430 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2432 IRAtom* at;
2433 tl_assert(isShadowAtom(mce, vatomX));
2434 tl_assert(isShadowAtom(mce, vatomY));
2435 at = mkUifUV128(mce, vatomX, vatomY);
2436 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
2437 at = mkPCastTo(mce, Ity_I32, at);
2438 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2439 return at;
2442 static
2443 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2445 IRAtom* at;
2446 tl_assert(isShadowAtom(mce, vatomX));
2447 at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
2448 at = mkPCastTo(mce, Ity_I32, at);
2449 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2450 return at;
2453 /* --- ... and ... 64Fx2 versions of the same ... --- */
2455 static
2456 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2458 IRAtom* at;
2459 tl_assert(isShadowAtom(mce, vatomX));
2460 tl_assert(isShadowAtom(mce, vatomY));
2461 at = mkUifUV128(mce, vatomX, vatomY);
2462 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
2463 return at;
2466 static
2467 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2469 IRAtom* at;
2470 tl_assert(isShadowAtom(mce, vatomX));
2471 at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
2472 return at;
2475 static
2476 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2478 IRAtom* at;
2479 tl_assert(isShadowAtom(mce, vatomX));
2480 tl_assert(isShadowAtom(mce, vatomY));
2481 at = mkUifUV128(mce, vatomX, vatomY);
2482 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
2483 at = mkPCastTo(mce, Ity_I64, at);
2484 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2485 return at;
2488 static
2489 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2491 IRAtom* at;
2492 tl_assert(isShadowAtom(mce, vatomX));
2493 at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
2494 at = mkPCastTo(mce, Ity_I64, at);
2495 at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2496 return at;
2499 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
2501 static
2502 IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2504 IRAtom* at;
2505 tl_assert(isShadowAtom(mce, vatomX));
2506 tl_assert(isShadowAtom(mce, vatomY));
2507 at = mkUifU64(mce, vatomX, vatomY);
2508 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2509 return at;
2512 static
2513 IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2515 IRAtom* at;
2516 tl_assert(isShadowAtom(mce, vatomX));
2517 at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2518 return at;
2521 /* --- ... and ... 64Fx4 versions of the same ... --- */
2523 static
2524 IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2526 IRAtom* at;
2527 tl_assert(isShadowAtom(mce, vatomX));
2528 tl_assert(isShadowAtom(mce, vatomY));
2529 at = mkUifUV256(mce, vatomX, vatomY);
2530 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2531 return at;
2534 static
2535 IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2537 IRAtom* at;
2538 tl_assert(isShadowAtom(mce, vatomX));
2539 at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2540 return at;
2543 /* --- ... and ... 32Fx8 versions of the same ... --- */
2545 static
2546 IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2548 IRAtom* at;
2549 tl_assert(isShadowAtom(mce, vatomX));
2550 tl_assert(isShadowAtom(mce, vatomY));
2551 at = mkUifUV256(mce, vatomX, vatomY);
2552 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2553 return at;
2556 static
2557 IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2559 IRAtom* at;
2560 tl_assert(isShadowAtom(mce, vatomX));
2561 at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2562 return at;
2565 /* --- 64Fx2 binary FP ops, with rounding mode --- */
2567 static
2568 IRAtom* binary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM,
2569 IRAtom* vatomX, IRAtom* vatomY )
2571 /* This is the same as binary64Fx2, except that we subsequently
2572 pessimise vRM (definedness of the rounding mode), widen to 128
2573 bits and UifU it into the result. As with the scalar cases, if
2574 the RM is a constant then it is defined and so this extra bit
2575 will get constant-folded out later. */
2576 // "do" the vector args
2577 IRAtom* t1 = binary64Fx2(mce, vatomX, vatomY);
2578 // PCast the RM, and widen it to 128 bits
2579 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2580 // Roll it into the result
2581 t1 = mkUifUV128(mce, t1, t2);
2582 return t1;
2585 /* --- ... and ... 32Fx4 versions of the same --- */
2587 static
2588 IRAtom* binary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2589 IRAtom* vatomX, IRAtom* vatomY )
2591 IRAtom* t1 = binary32Fx4(mce, vatomX, vatomY);
2592 // PCast the RM, and widen it to 128 bits
2593 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2594 // Roll it into the result
2595 t1 = mkUifUV128(mce, t1, t2);
2596 return t1;
2599 /* --- ... and ... 64Fx4 versions of the same --- */
2601 static
2602 IRAtom* binary64Fx4_w_rm ( MCEnv* mce, IRAtom* vRM,
2603 IRAtom* vatomX, IRAtom* vatomY )
2605 IRAtom* t1 = binary64Fx4(mce, vatomX, vatomY);
2606 // PCast the RM, and widen it to 256 bits
2607 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2608 // Roll it into the result
2609 t1 = mkUifUV256(mce, t1, t2);
2610 return t1;
2613 /* --- ... and ... 32Fx8 versions of the same --- */
2615 static
2616 IRAtom* binary32Fx8_w_rm ( MCEnv* mce, IRAtom* vRM,
2617 IRAtom* vatomX, IRAtom* vatomY )
2619 IRAtom* t1 = binary32Fx8(mce, vatomX, vatomY);
2620 // PCast the RM, and widen it to 256 bits
2621 IRAtom* t2 = mkPCastTo(mce, Ity_V256, vRM);
2622 // Roll it into the result
2623 t1 = mkUifUV256(mce, t1, t2);
2624 return t1;
2627 /* --- 64Fx2 unary FP ops, with rounding mode --- */
2629 static
2630 IRAtom* unary64Fx2_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2632 /* Same scheme as binary64Fx2_w_rm. */
2633 // "do" the vector arg
2634 IRAtom* t1 = unary64Fx2(mce, vatomX);
2635 // PCast the RM, and widen it to 128 bits
2636 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2637 // Roll it into the result
2638 t1 = mkUifUV128(mce, t1, t2);
2639 return t1;
2642 /* --- ... and ... 32Fx4 versions of the same --- */
2644 static
2645 IRAtom* unary32Fx4_w_rm ( MCEnv* mce, IRAtom* vRM, IRAtom* vatomX )
2647 /* Same scheme as unary32Fx4_w_rm. */
2648 IRAtom* t1 = unary32Fx4(mce, vatomX);
2649 // PCast the RM, and widen it to 128 bits
2650 IRAtom* t2 = mkPCastTo(mce, Ity_V128, vRM);
2651 // Roll it into the result
2652 t1 = mkUifUV128(mce, t1, t2);
2653 return t1;
2657 /* --- --- Vector saturated narrowing --- --- */
2659 /* We used to do something very clever here, but on closer inspection
2660 (2011-Jun-15), and in particular bug #279698, it turns out to be
2661 wrong. Part of the problem came from the fact that for a long
2662 time, the IR primops to do with saturated narrowing were
2663 underspecified and managed to confuse multiple cases which needed
2664 to be separate: the op names had a signedness qualifier, but in
2665 fact the source and destination signednesses needed to be specified
2666 independently, so the op names really need two independent
2667 signedness specifiers.
2669 As of 2011-Jun-15 (ish) the underspecification was sorted out
2670 properly. The incorrect instrumentation remained, though. That
2671 has now (2011-Oct-22) been fixed.
2673 What we now do is simple:
2675 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2676 number of lanes, X is the source lane width and signedness, and Y
2677 is the destination lane width and signedness. In all cases the
2678 destination lane width is half the source lane width, so the names
2679 have a bit of redundancy, but are at least easy to read.
2681 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2682 to unsigned 16s.
2684 Let Vanilla(OP) be a function that takes OP, one of these
2685 saturating narrowing ops, and produces the same "shaped" narrowing
2686 op which is not saturating, but merely dumps the most significant
2687 bits. "same shape" means that the lane numbers and widths are the
2688 same as with OP.
2690 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2691 = Iop_NarrowBin32to16x8,
2692 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2693 dumping the top half of each lane.
2695 So, with that in place, the scheme is simple, and it is simple to
2696 pessimise each lane individually and then apply Vanilla(OP) so as
2697 to get the result in the right "shape". If the original OP is
2698 QNarrowBinXtoYxZ then we produce
2700 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2702 or for the case when OP is unary (Iop_QNarrowUn*)
2704 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2706 static
2707 IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2709 switch (qnarrowOp) {
2710 /* Binary: (128, 128) -> 128 */
2711 case Iop_QNarrowBin16Sto8Ux16:
2712 case Iop_QNarrowBin16Sto8Sx16:
2713 case Iop_QNarrowBin16Uto8Ux16:
2714 case Iop_QNarrowBin64Sto32Sx4:
2715 case Iop_QNarrowBin64Uto32Ux4:
2716 return Iop_NarrowBin16to8x16;
2717 case Iop_QNarrowBin32Sto16Ux8:
2718 case Iop_QNarrowBin32Sto16Sx8:
2719 case Iop_QNarrowBin32Uto16Ux8:
2720 return Iop_NarrowBin32to16x8;
2721 /* Binary: (64, 64) -> 64 */
2722 case Iop_QNarrowBin32Sto16Sx4:
2723 return Iop_NarrowBin32to16x4;
2724 case Iop_QNarrowBin16Sto8Ux8:
2725 case Iop_QNarrowBin16Sto8Sx8:
2726 return Iop_NarrowBin16to8x8;
2727 /* Unary: 128 -> 64 */
2728 case Iop_QNarrowUn64Uto32Ux2:
2729 case Iop_QNarrowUn64Sto32Sx2:
2730 case Iop_QNarrowUn64Sto32Ux2:
2731 return Iop_NarrowUn64to32x2;
2732 case Iop_QNarrowUn32Uto16Ux4:
2733 case Iop_QNarrowUn32Sto16Sx4:
2734 case Iop_QNarrowUn32Sto16Ux4:
2735 case Iop_F32toF16x4:
2736 return Iop_NarrowUn32to16x4;
2737 case Iop_QNarrowUn16Uto8Ux8:
2738 case Iop_QNarrowUn16Sto8Sx8:
2739 case Iop_QNarrowUn16Sto8Ux8:
2740 return Iop_NarrowUn16to8x8;
2741 default:
2742 ppIROp(qnarrowOp);
2743 VG_(tool_panic)("vanillaNarrowOpOfShape");
2747 static
2748 IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2749 IRAtom* vatom1, IRAtom* vatom2)
2751 IRAtom *at1, *at2, *at3;
2752 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2753 switch (narrow_op) {
2754 case Iop_QNarrowBin64Sto32Sx4: pcast = mkPCast32x4; break;
2755 case Iop_QNarrowBin64Uto32Ux4: pcast = mkPCast32x4; break;
2756 case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2757 case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2758 case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2759 case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2760 case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2761 case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2762 default: VG_(tool_panic)("vectorNarrowBinV128");
2764 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2765 tl_assert(isShadowAtom(mce,vatom1));
2766 tl_assert(isShadowAtom(mce,vatom2));
2767 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2768 at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2769 at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2770 return at3;
2773 static
2774 IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2775 IRAtom* vatom1, IRAtom* vatom2)
2777 IRAtom *at1, *at2, *at3;
2778 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2779 switch (narrow_op) {
2780 case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2781 case Iop_QNarrowBin16Sto8Sx8: pcast = mkPCast16x4; break;
2782 case Iop_QNarrowBin16Sto8Ux8: pcast = mkPCast16x4; break;
2783 default: VG_(tool_panic)("vectorNarrowBin64");
2785 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2786 tl_assert(isShadowAtom(mce,vatom1));
2787 tl_assert(isShadowAtom(mce,vatom2));
2788 at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2789 at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2790 at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
2791 return at3;
2794 static
2795 IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
2796 IRAtom* vatom1)
2798 IRAtom *at1, *at2;
2799 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2800 tl_assert(isShadowAtom(mce,vatom1));
2801 /* For vanilla narrowing (non-saturating), we can just apply
2802 the op directly to the V bits. */
2803 switch (narrow_op) {
2804 case Iop_NarrowUn16to8x8:
2805 case Iop_NarrowUn32to16x4:
2806 case Iop_NarrowUn64to32x2:
2807 case Iop_F32toF16x4:
2808 at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2809 return at1;
2810 default:
2811 break; /* Do Plan B */
2813 /* Plan B: for ops that involve a saturation operation on the args,
2814 we must PCast before the vanilla narrow. */
2815 switch (narrow_op) {
2816 case Iop_QNarrowUn16Sto8Sx8: pcast = mkPCast16x8; break;
2817 case Iop_QNarrowUn16Sto8Ux8: pcast = mkPCast16x8; break;
2818 case Iop_QNarrowUn16Uto8Ux8: pcast = mkPCast16x8; break;
2819 case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2820 case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2821 case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2822 case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2823 case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2824 case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2825 default: VG_(tool_panic)("vectorNarrowUnV128");
2827 IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2828 at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2829 at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
2830 return at2;
2833 static
2834 IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2835 IRAtom* vatom1)
2837 IRAtom *at1, *at2;
2838 IRAtom* (*pcast)( MCEnv*, IRAtom* );
2839 switch (longen_op) {
2840 case Iop_Widen8Uto16x8: pcast = mkPCast16x8; break;
2841 case Iop_Widen8Sto16x8: pcast = mkPCast16x8; break;
2842 case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2843 case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2844 case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2845 case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2846 case Iop_F16toF32x4: pcast = mkPCast32x4; break;
2847 default: VG_(tool_panic)("vectorWidenI64");
2849 tl_assert(isShadowAtom(mce,vatom1));
2850 at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2851 at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2852 return at2;
2856 /* --- --- Vector integer arithmetic --- --- */
2858 /* Simple ... UifU the args and per-lane pessimise the results. */
2860 /* --- V256-bit versions --- */
2862 static
2863 IRAtom* binary8Ix32 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2865 IRAtom* at;
2866 at = mkUifUV256(mce, vatom1, vatom2);
2867 at = mkPCast8x32(mce, at);
2868 return at;
2871 static
2872 IRAtom* binary16Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2874 IRAtom* at;
2875 at = mkUifUV256(mce, vatom1, vatom2);
2876 at = mkPCast16x16(mce, at);
2877 return at;
2880 static
2881 IRAtom* binary32Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2883 IRAtom* at;
2884 at = mkUifUV256(mce, vatom1, vatom2);
2885 at = mkPCast32x8(mce, at);
2886 return at;
2889 static
2890 IRAtom* binary64Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2892 IRAtom* at;
2893 at = mkUifUV256(mce, vatom1, vatom2);
2894 at = mkPCast64x4(mce, at);
2895 return at;
2898 /* --- V128-bit versions --- */
2900 static
2901 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2903 IRAtom* at;
2904 at = mkUifUV128(mce, vatom1, vatom2);
2905 at = mkPCast8x16(mce, at);
2906 return at;
2909 static
2910 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2912 IRAtom* at;
2913 at = mkUifUV128(mce, vatom1, vatom2);
2914 at = mkPCast16x8(mce, at);
2915 return at;
2918 static
2919 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2921 IRAtom* at;
2922 at = mkUifUV128(mce, vatom1, vatom2);
2923 at = mkPCast32x4(mce, at);
2924 return at;
2927 static
2928 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2930 IRAtom* at;
2931 at = mkUifUV128(mce, vatom1, vatom2);
2932 at = mkPCast64x2(mce, at);
2933 return at;
2936 /* --- 64-bit versions --- */
2938 static
2939 IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2941 IRAtom* at;
2942 at = mkUifU64(mce, vatom1, vatom2);
2943 at = mkPCast8x8(mce, at);
2944 return at;
2947 static
2948 IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2950 IRAtom* at;
2951 at = mkUifU64(mce, vatom1, vatom2);
2952 at = mkPCast16x4(mce, at);
2953 return at;
2956 static
2957 IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2959 IRAtom* at;
2960 at = mkUifU64(mce, vatom1, vatom2);
2961 at = mkPCast32x2(mce, at);
2962 return at;
2965 static
2966 IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2968 IRAtom* at;
2969 at = mkUifU64(mce, vatom1, vatom2);
2970 at = mkPCastTo(mce, Ity_I64, at);
2971 return at;
2974 /* --- 32-bit versions --- */
2976 static
2977 IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2979 IRAtom* at;
2980 at = mkUifU32(mce, vatom1, vatom2);
2981 at = mkPCast8x4(mce, at);
2982 return at;
2985 static
2986 IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2988 IRAtom* at;
2989 at = mkUifU32(mce, vatom1, vatom2);
2990 at = mkPCast16x2(mce, at);
2991 return at;
2995 /*------------------------------------------------------------*/
2996 /*--- Generate shadow values from all kinds of IRExprs. ---*/
2997 /*------------------------------------------------------------*/
2999 static
3000 IRAtom* expr2vbits_Qop ( MCEnv* mce,
3001 IROp op,
3002 IRAtom* atom1, IRAtom* atom2,
3003 IRAtom* atom3, IRAtom* atom4 )
3005 IRAtom* vatom1 = expr2vbits( mce, atom1, HuOth );
3006 IRAtom* vatom2 = expr2vbits( mce, atom2, HuOth );
3007 IRAtom* vatom3 = expr2vbits( mce, atom3, HuOth );
3008 IRAtom* vatom4 = expr2vbits( mce, atom4, HuOth );
3010 tl_assert(isOriginalAtom(mce,atom1));
3011 tl_assert(isOriginalAtom(mce,atom2));
3012 tl_assert(isOriginalAtom(mce,atom3));
3013 tl_assert(isOriginalAtom(mce,atom4));
3014 tl_assert(isShadowAtom(mce,vatom1));
3015 tl_assert(isShadowAtom(mce,vatom2));
3016 tl_assert(isShadowAtom(mce,vatom3));
3017 tl_assert(isShadowAtom(mce,vatom4));
3018 tl_assert(sameKindedAtoms(atom1,vatom1));
3019 tl_assert(sameKindedAtoms(atom2,vatom2));
3020 tl_assert(sameKindedAtoms(atom3,vatom3));
3021 tl_assert(sameKindedAtoms(atom4,vatom4));
3022 switch (op) {
3023 case Iop_MAddF64:
3024 case Iop_MAddF64r32:
3025 case Iop_MSubF64:
3026 case Iop_MSubF64r32:
3027 /* I32(rm) x F64 x F64 x F64 -> F64 */
3028 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
3030 case Iop_MAddF32:
3031 case Iop_MSubF32:
3032 /* I32(rm) x F32 x F32 x F32 -> F32 */
3033 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
3035 case Iop_MAddF128:
3036 case Iop_MSubF128:
3037 case Iop_NegMAddF128:
3038 case Iop_NegMSubF128:
3039 /* I32(rm) x F128 x F128 x F128 -> F128 */
3040 return mkLazy4(mce, Ity_I128, vatom1, vatom2, vatom3, vatom4);
3042 /* V256-bit data-steering */
3043 case Iop_64x4toV256:
3044 return assignNew('V', mce, Ity_V256,
3045 IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
3047 /* I32/I64 x I8 x I8 x I8 -> I32/I64 */
3048 case Iop_Rotx32:
3049 return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
3050 case Iop_Rotx64:
3051 return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
3052 default:
3053 ppIROp(op);
3054 VG_(tool_panic)("memcheck:expr2vbits_Qop");
3059 static
3060 IRAtom* expr2vbits_Triop ( MCEnv* mce,
3061 IROp op,
3062 IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
3064 IRAtom* vatom1 = expr2vbits( mce, atom1, HuOth );
3065 IRAtom* vatom2 = expr2vbits( mce, atom2, HuOth );
3066 IRAtom* vatom3 = expr2vbits( mce, atom3, HuOth );
3068 tl_assert(isOriginalAtom(mce,atom1));
3069 tl_assert(isOriginalAtom(mce,atom2));
3070 tl_assert(isOriginalAtom(mce,atom3));
3071 tl_assert(isShadowAtom(mce,vatom1));
3072 tl_assert(isShadowAtom(mce,vatom2));
3073 tl_assert(isShadowAtom(mce,vatom3));
3074 tl_assert(sameKindedAtoms(atom1,vatom1));
3075 tl_assert(sameKindedAtoms(atom2,vatom2));
3076 tl_assert(sameKindedAtoms(atom3,vatom3));
3077 switch (op) {
3078 case Iop_AddF128:
3079 case Iop_SubF128:
3080 case Iop_MulF128:
3081 case Iop_DivF128:
3082 case Iop_AddD128:
3083 case Iop_SubD128:
3084 case Iop_MulD128:
3085 case Iop_DivD128:
3086 case Iop_QuantizeD128:
3087 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
3088 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
3089 case Iop_AddF64:
3090 case Iop_AddD64:
3091 case Iop_AddF64r32:
3092 case Iop_SubF64:
3093 case Iop_SubD64:
3094 case Iop_SubF64r32:
3095 case Iop_MulF64:
3096 case Iop_MulD64:
3097 case Iop_MulF64r32:
3098 case Iop_DivF64:
3099 case Iop_DivD64:
3100 case Iop_DivF64r32:
3101 case Iop_ScaleF64:
3102 case Iop_Yl2xF64:
3103 case Iop_Yl2xp1F64:
3104 case Iop_AtanF64:
3105 case Iop_PRemF64:
3106 case Iop_PRem1F64:
3107 case Iop_QuantizeD64:
3108 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
3109 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
3110 case Iop_PRemC3210F64:
3111 case Iop_PRem1C3210F64:
3112 /* I32(rm) x F64 x F64 -> I32 */
3113 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
3114 case Iop_AddF32:
3115 case Iop_SubF32:
3116 case Iop_MulF32:
3117 case Iop_DivF32:
3118 /* I32(rm) x F32 x F32 -> I32 */
3119 return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
3120 case Iop_SignificanceRoundD64:
3121 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
3122 return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
3123 case Iop_SignificanceRoundD128:
3124 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
3125 return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
3126 case Iop_SliceV128:
3127 /* (V128, V128, I8) -> V128 */
3128 complainIfUndefined(mce, atom3, NULL);
3129 return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
3130 case Iop_Slice64:
3131 /* (I64, I64, I8) -> I64 */
3132 complainIfUndefined(mce, atom3, NULL);
3133 return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
3134 case Iop_SetElem8x8:
3135 case Iop_SetElem16x4:
3136 case Iop_SetElem32x2:
3137 complainIfUndefined(mce, atom2, NULL);
3138 return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
3140 case Iop_SetElem8x16:
3141 case Iop_SetElem16x8:
3142 case Iop_SetElem32x4:
3143 case Iop_SetElem64x2:
3144 complainIfUndefined(mce, atom2, NULL);
3145 return assignNew('V', mce, Ity_V128, triop(op, vatom1, atom2, vatom3));
3147 case Iop_Perm8x16x2:
3148 /* (V128, V128, V128) -> V128 */
3149 complainIfUndefined(mce, atom3, NULL);
3150 return mkUifUV128(
3151 mce,
3152 assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3)),
3153 mkPCast8x16(mce, vatom3)
3156 /* Vector FP with rounding mode as the first arg */
3157 case Iop_Add64Fx2:
3158 case Iop_Sub64Fx2:
3159 case Iop_Mul64Fx2:
3160 case Iop_Div64Fx2:
3161 case Iop_Scale2_64Fx2:
3162 return binary64Fx2_w_rm(mce, vatom1, vatom2, vatom3);
3164 case Iop_Add32Fx4:
3165 case Iop_Sub32Fx4:
3166 case Iop_Mul32Fx4:
3167 case Iop_Div32Fx4:
3168 case Iop_Scale2_32Fx4:
3169 return binary32Fx4_w_rm(mce, vatom1, vatom2, vatom3);
3171 case Iop_Add64Fx4:
3172 case Iop_Sub64Fx4:
3173 case Iop_Mul64Fx4:
3174 case Iop_Div64Fx4:
3175 return binary64Fx4_w_rm(mce, vatom1, vatom2, vatom3);
3177 case Iop_Add32Fx8:
3178 case Iop_Sub32Fx8:
3179 case Iop_Mul32Fx8:
3180 case Iop_Div32Fx8:
3181 return binary32Fx8_w_rm(mce, vatom1, vatom2, vatom3);
3183 case Iop_F32x4_2toQ16x8:
3184 return assignNew('V', mce, Ity_V128,
3185 binop(Iop_PackEvenLanes16x8,
3186 unary32Fx4_w_rm(mce, vatom1, vatom2),
3187 unary32Fx4_w_rm(mce, vatom1, vatom3)));
3188 case Iop_F64x2_2toQ32x4:
3189 return assignNew('V', mce, Ity_V128,
3190 binop(Iop_PackEvenLanes32x4,
3191 unary64Fx2_w_rm(mce, vatom1, vatom2),
3192 unary64Fx2_w_rm(mce, vatom1, vatom3)));
3195 default:
3196 ppIROp(op);
3197 VG_(tool_panic)("memcheck:expr2vbits_Triop");
3202 static
3203 IRAtom* expr2vbits_Binop ( MCEnv* mce,
3204 IROp op,
3205 IRAtom* atom1, IRAtom* atom2,
3206 HowUsed hu/*use HuOth if unknown*/ )
3208 IRType and_or_ty;
3209 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*);
3210 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*);
3211 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
3213 IRAtom* vatom1 = expr2vbits( mce, atom1, HuOth );
3214 IRAtom* vatom2 = expr2vbits( mce, atom2, HuOth );
3216 tl_assert(isOriginalAtom(mce,atom1));
3217 tl_assert(isOriginalAtom(mce,atom2));
3218 tl_assert(isShadowAtom(mce,vatom1));
3219 tl_assert(isShadowAtom(mce,vatom2));
3220 tl_assert(sameKindedAtoms(atom1,vatom1));
3221 tl_assert(sameKindedAtoms(atom2,vatom2));
3222 switch (op) {
3224 /* 32-bit SIMD */
3226 case Iop_Add16x2:
3227 case Iop_HAdd16Ux2:
3228 case Iop_HAdd16Sx2:
3229 case Iop_Sub16x2:
3230 case Iop_HSub16Ux2:
3231 case Iop_HSub16Sx2:
3232 case Iop_QAdd16Sx2:
3233 case Iop_QSub16Sx2:
3234 case Iop_QSub16Ux2:
3235 case Iop_QAdd16Ux2:
3236 return binary16Ix2(mce, vatom1, vatom2);
3238 case Iop_Add8x4:
3239 case Iop_HAdd8Ux4:
3240 case Iop_HAdd8Sx4:
3241 case Iop_Sub8x4:
3242 case Iop_HSub8Ux4:
3243 case Iop_HSub8Sx4:
3244 case Iop_QSub8Ux4:
3245 case Iop_QAdd8Ux4:
3246 case Iop_QSub8Sx4:
3247 case Iop_QAdd8Sx4:
3248 return binary8Ix4(mce, vatom1, vatom2);
3250 /* 64-bit SIMD */
3252 case Iop_ShrN8x8:
3253 case Iop_ShrN16x4:
3254 case Iop_ShrN32x2:
3255 case Iop_SarN8x8:
3256 case Iop_SarN16x4:
3257 case Iop_SarN32x2:
3258 case Iop_ShlN16x4:
3259 case Iop_ShlN32x2:
3260 case Iop_ShlN8x8:
3261 /* Same scheme as with all other shifts. */
3262 complainIfUndefined(mce, atom2, NULL);
3263 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3265 case Iop_QNarrowBin32Sto16Sx4:
3266 case Iop_QNarrowBin16Sto8Sx8:
3267 case Iop_QNarrowBin16Sto8Ux8:
3268 return vectorNarrowBin64(mce, op, vatom1, vatom2);
3270 case Iop_Min8Ux8:
3271 case Iop_Min8Sx8:
3272 case Iop_Max8Ux8:
3273 case Iop_Max8Sx8:
3274 case Iop_Avg8Ux8:
3275 case Iop_QSub8Sx8:
3276 case Iop_QSub8Ux8:
3277 case Iop_Sub8x8:
3278 case Iop_CmpGT8Sx8:
3279 case Iop_CmpGT8Ux8:
3280 case Iop_CmpEQ8x8:
3281 case Iop_QAdd8Sx8:
3282 case Iop_QAdd8Ux8:
3283 case Iop_QSal8x8:
3284 case Iop_QShl8x8:
3285 case Iop_Add8x8:
3286 case Iop_Mul8x8:
3287 case Iop_PolynomialMul8x8:
3288 return binary8Ix8(mce, vatom1, vatom2);
3290 case Iop_Min16Sx4:
3291 case Iop_Min16Ux4:
3292 case Iop_Max16Sx4:
3293 case Iop_Max16Ux4:
3294 case Iop_Avg16Ux4:
3295 case Iop_QSub16Ux4:
3296 case Iop_QSub16Sx4:
3297 case Iop_Sub16x4:
3298 case Iop_Mul16x4:
3299 case Iop_MulHi16Sx4:
3300 case Iop_MulHi16Ux4:
3301 case Iop_CmpGT16Sx4:
3302 case Iop_CmpGT16Ux4:
3303 case Iop_CmpEQ16x4:
3304 case Iop_QAdd16Sx4:
3305 case Iop_QAdd16Ux4:
3306 case Iop_QSal16x4:
3307 case Iop_QShl16x4:
3308 case Iop_Add16x4:
3309 case Iop_QDMulHi16Sx4:
3310 case Iop_QRDMulHi16Sx4:
3311 return binary16Ix4(mce, vatom1, vatom2);
3313 case Iop_Sub32x2:
3314 case Iop_Mul32x2:
3315 case Iop_Max32Sx2:
3316 case Iop_Max32Ux2:
3317 case Iop_Min32Sx2:
3318 case Iop_Min32Ux2:
3319 case Iop_CmpGT32Sx2:
3320 case Iop_CmpGT32Ux2:
3321 case Iop_CmpEQ32x2:
3322 case Iop_Add32x2:
3323 case Iop_QAdd32Ux2:
3324 case Iop_QAdd32Sx2:
3325 case Iop_QSub32Ux2:
3326 case Iop_QSub32Sx2:
3327 case Iop_QSal32x2:
3328 case Iop_QShl32x2:
3329 case Iop_QDMulHi32Sx2:
3330 case Iop_QRDMulHi32Sx2:
3331 return binary32Ix2(mce, vatom1, vatom2);
3333 case Iop_QSub64Ux1:
3334 case Iop_QSub64Sx1:
3335 case Iop_QAdd64Ux1:
3336 case Iop_QAdd64Sx1:
3337 case Iop_QSal64x1:
3338 case Iop_QShl64x1:
3339 case Iop_Sal64x1:
3340 return binary64Ix1(mce, vatom1, vatom2);
3342 case Iop_QShlNsatSU8x8:
3343 case Iop_QShlNsatUU8x8:
3344 case Iop_QShlNsatSS8x8:
3345 complainIfUndefined(mce, atom2, NULL);
3346 return mkPCast8x8(mce, vatom1);
3348 case Iop_QShlNsatSU16x4:
3349 case Iop_QShlNsatUU16x4:
3350 case Iop_QShlNsatSS16x4:
3351 complainIfUndefined(mce, atom2, NULL);
3352 return mkPCast16x4(mce, vatom1);
3354 case Iop_QShlNsatSU32x2:
3355 case Iop_QShlNsatUU32x2:
3356 case Iop_QShlNsatSS32x2:
3357 complainIfUndefined(mce, atom2, NULL);
3358 return mkPCast32x2(mce, vatom1);
3360 case Iop_QShlNsatSU64x1:
3361 case Iop_QShlNsatUU64x1:
3362 case Iop_QShlNsatSS64x1:
3363 complainIfUndefined(mce, atom2, NULL);
3364 return mkPCast32x2(mce, vatom1);
3366 case Iop_PwMax32Sx2:
3367 case Iop_PwMax32Ux2:
3368 case Iop_PwMin32Sx2:
3369 case Iop_PwMin32Ux2:
3370 case Iop_PwMax32Fx2:
3371 case Iop_PwMin32Fx2:
3372 return assignNew('V', mce, Ity_I64,
3373 binop(Iop_PwMax32Ux2,
3374 mkPCast32x2(mce, vatom1),
3375 mkPCast32x2(mce, vatom2)));
3377 case Iop_PwMax16Sx4:
3378 case Iop_PwMax16Ux4:
3379 case Iop_PwMin16Sx4:
3380 case Iop_PwMin16Ux4:
3381 return assignNew('V', mce, Ity_I64,
3382 binop(Iop_PwMax16Ux4,
3383 mkPCast16x4(mce, vatom1),
3384 mkPCast16x4(mce, vatom2)));
3386 case Iop_PwMax8Sx8:
3387 case Iop_PwMax8Ux8:
3388 case Iop_PwMin8Sx8:
3389 case Iop_PwMin8Ux8:
3390 return assignNew('V', mce, Ity_I64,
3391 binop(Iop_PwMax8Ux8,
3392 mkPCast8x8(mce, vatom1),
3393 mkPCast8x8(mce, vatom2)));
3395 case Iop_PwAdd32x2:
3396 case Iop_PwAdd32Fx2:
3397 return mkPCast32x2(mce,
3398 assignNew('V', mce, Ity_I64,
3399 binop(Iop_PwAdd32x2,
3400 mkPCast32x2(mce, vatom1),
3401 mkPCast32x2(mce, vatom2))));
3403 case Iop_PwAdd16x4:
3404 return mkPCast16x4(mce,
3405 assignNew('V', mce, Ity_I64,
3406 binop(op, mkPCast16x4(mce, vatom1),
3407 mkPCast16x4(mce, vatom2))));
3409 case Iop_PwAdd8x8:
3410 return mkPCast8x8(mce,
3411 assignNew('V', mce, Ity_I64,
3412 binop(op, mkPCast8x8(mce, vatom1),
3413 mkPCast8x8(mce, vatom2))));
3415 case Iop_Shl8x8:
3416 case Iop_Shr8x8:
3417 case Iop_Sar8x8:
3418 case Iop_Sal8x8:
3419 return mkUifU64(mce,
3420 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3421 mkPCast8x8(mce,vatom2)
3424 case Iop_Shl16x4:
3425 case Iop_Shr16x4:
3426 case Iop_Sar16x4:
3427 case Iop_Sal16x4:
3428 return mkUifU64(mce,
3429 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3430 mkPCast16x4(mce,vatom2)
3433 case Iop_Shl32x2:
3434 case Iop_Shr32x2:
3435 case Iop_Sar32x2:
3436 case Iop_Sal32x2:
3437 return mkUifU64(mce,
3438 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3439 mkPCast32x2(mce,vatom2)
3442 /* 64-bit data-steering */
3443 case Iop_InterleaveLO32x2:
3444 case Iop_InterleaveLO16x4:
3445 case Iop_InterleaveLO8x8:
3446 case Iop_InterleaveHI32x2:
3447 case Iop_InterleaveHI16x4:
3448 case Iop_InterleaveHI8x8:
3449 case Iop_CatOddLanes8x8:
3450 case Iop_CatEvenLanes8x8:
3451 case Iop_CatOddLanes16x4:
3452 case Iop_CatEvenLanes16x4:
3453 case Iop_InterleaveOddLanes8x8:
3454 case Iop_InterleaveEvenLanes8x8:
3455 case Iop_InterleaveOddLanes16x4:
3456 case Iop_InterleaveEvenLanes16x4:
3457 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3459 case Iop_GetElem8x8:
3460 complainIfUndefined(mce, atom2, NULL);
3461 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3462 case Iop_GetElem16x4:
3463 complainIfUndefined(mce, atom2, NULL);
3464 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3465 case Iop_GetElem32x2:
3466 complainIfUndefined(mce, atom2, NULL);
3467 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3469 /* Perm8x8: rearrange values in left arg using steering values
3470 from right arg. So rearrange the vbits in the same way but
3471 pessimise wrt steering values. */
3472 case Iop_Perm8x8:
3473 return mkUifU64(
3474 mce,
3475 assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
3476 mkPCast8x8(mce, vatom2)
3479 /* V128-bit SIMD */
3481 case Iop_Sqrt32Fx4:
3482 return unary32Fx4_w_rm(mce, vatom1, vatom2);
3483 case Iop_Sqrt64Fx2:
3484 return unary64Fx2_w_rm(mce, vatom1, vatom2);
3486 case Iop_ShrN8x16:
3487 case Iop_ShrN16x8:
3488 case Iop_ShrN32x4:
3489 case Iop_ShrN64x2:
3490 case Iop_SarN8x16:
3491 case Iop_SarN16x8:
3492 case Iop_SarN32x4:
3493 case Iop_SarN64x2:
3494 case Iop_ShlN8x16:
3495 case Iop_ShlN16x8:
3496 case Iop_ShlN32x4:
3497 case Iop_ShlN64x2:
3498 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3499 this is wrong now, scalar shifts are done properly lazily.
3500 Vector shifts should be fixed too. */
3501 complainIfUndefined(mce, atom2, NULL);
3502 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3504 /* V x V shifts/rotates are done using the standard lazy scheme. */
3505 /* For the non-rounding variants of bi-di vector x vector
3506 shifts (the Iop_Sh.. ops, that is) we use the lazy scheme.
3507 But note that this is overly pessimistic, because in fact only
3508 the bottom 8 bits of each lane of the second argument are taken
3509 into account when shifting. So really we ought to ignore
3510 undefinedness in bits 8 and above of each lane in the
3511 second argument. */
3512 case Iop_Shl8x16:
3513 case Iop_Shr8x16:
3514 case Iop_Sar8x16:
3515 case Iop_Sal8x16:
3516 case Iop_Rol8x16:
3517 case Iop_Sh8Sx16:
3518 case Iop_Sh8Ux16:
3519 return mkUifUV128(mce,
3520 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3521 mkPCast8x16(mce,vatom2)
3524 case Iop_Shl16x8:
3525 case Iop_Shr16x8:
3526 case Iop_Sar16x8:
3527 case Iop_Sal16x8:
3528 case Iop_Rol16x8:
3529 case Iop_Sh16Sx8:
3530 case Iop_Sh16Ux8:
3531 return mkUifUV128(mce,
3532 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3533 mkPCast16x8(mce,vatom2)
3536 case Iop_Shl32x4:
3537 case Iop_Shr32x4:
3538 case Iop_Sar32x4:
3539 case Iop_Sal32x4:
3540 case Iop_Rol32x4:
3541 case Iop_Sh32Sx4:
3542 case Iop_Sh32Ux4:
3543 return mkUifUV128(mce,
3544 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3545 mkPCast32x4(mce,vatom2)
3548 case Iop_Shl64x2:
3549 case Iop_Shr64x2:
3550 case Iop_Sar64x2:
3551 case Iop_Sal64x2:
3552 case Iop_Rol64x2:
3553 case Iop_Sh64Sx2:
3554 case Iop_Sh64Ux2:
3555 return mkUifUV128(mce,
3556 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3557 mkPCast64x2(mce,vatom2)
3560 /* For the rounding variants of bi-di vector x vector shifts, the
3561 rounding adjustment can cause undefinedness to propagate through
3562 the entire lane, in the worst case. Too complex to handle
3563 properly .. just UifU the arguments and then PCast them.
3564 Suboptimal but safe. */
3565 case Iop_Rsh8Sx16:
3566 case Iop_Rsh8Ux16:
3567 return binary8Ix16(mce, vatom1, vatom2);
3568 case Iop_Rsh16Sx8:
3569 case Iop_Rsh16Ux8:
3570 return binary16Ix8(mce, vatom1, vatom2);
3571 case Iop_Rsh32Sx4:
3572 case Iop_Rsh32Ux4:
3573 return binary32Ix4(mce, vatom1, vatom2);
3574 case Iop_Rsh64Sx2:
3575 case Iop_Rsh64Ux2:
3576 return binary64Ix2(mce, vatom1, vatom2);
3578 case Iop_F32ToFixed32Ux4_RZ:
3579 case Iop_F32ToFixed32Sx4_RZ:
3580 case Iop_Fixed32UToF32x4_RN:
3581 case Iop_Fixed32SToF32x4_RN:
3582 complainIfUndefined(mce, atom2, NULL);
3583 return mkPCast32x4(mce, vatom1);
3585 case Iop_F32ToFixed32Ux2_RZ:
3586 case Iop_F32ToFixed32Sx2_RZ:
3587 case Iop_Fixed32UToF32x2_RN:
3588 case Iop_Fixed32SToF32x2_RN:
3589 complainIfUndefined(mce, atom2, NULL);
3590 return mkPCast32x2(mce, vatom1);
3592 case Iop_QSub8Ux16:
3593 case Iop_QSub8Sx16:
3594 case Iop_Sub8x16:
3595 case Iop_Min8Ux16:
3596 case Iop_Min8Sx16:
3597 case Iop_Max8Ux16:
3598 case Iop_Max8Sx16:
3599 case Iop_CmpGT8Sx16:
3600 case Iop_CmpGT8Ux16:
3601 case Iop_CmpEQ8x16:
3602 case Iop_Avg8Ux16:
3603 case Iop_Avg8Sx16:
3604 case Iop_QAdd8Ux16:
3605 case Iop_QAdd8Sx16:
3606 case Iop_QAddExtUSsatSS8x16:
3607 case Iop_QAddExtSUsatUU8x16:
3608 case Iop_QSal8x16:
3609 case Iop_QShl8x16:
3610 case Iop_Add8x16:
3611 case Iop_Mul8x16:
3612 case Iop_PolynomialMul8x16:
3613 case Iop_PolynomialMulAdd8x16:
3614 return binary8Ix16(mce, vatom1, vatom2);
3616 case Iop_QSub16Ux8:
3617 case Iop_QSub16Sx8:
3618 case Iop_Sub16x8:
3619 case Iop_Mul16x8:
3620 case Iop_MulHi16Sx8:
3621 case Iop_MulHi16Ux8:
3622 case Iop_Min16Sx8:
3623 case Iop_Min16Ux8:
3624 case Iop_Max16Sx8:
3625 case Iop_Max16Ux8:
3626 case Iop_CmpGT16Sx8:
3627 case Iop_CmpGT16Ux8:
3628 case Iop_CmpEQ16x8:
3629 case Iop_Avg16Ux8:
3630 case Iop_Avg16Sx8:
3631 case Iop_QAdd16Ux8:
3632 case Iop_QAdd16Sx8:
3633 case Iop_QAddExtUSsatSS16x8:
3634 case Iop_QAddExtSUsatUU16x8:
3635 case Iop_QSal16x8:
3636 case Iop_QShl16x8:
3637 case Iop_Add16x8:
3638 case Iop_QDMulHi16Sx8:
3639 case Iop_QRDMulHi16Sx8:
3640 case Iop_PolynomialMulAdd16x8:
3641 return binary16Ix8(mce, vatom1, vatom2);
3643 case Iop_Sub32x4:
3644 case Iop_CmpGT32Sx4:
3645 case Iop_CmpGT32Ux4:
3646 case Iop_CmpEQ32x4:
3647 case Iop_QAdd32Sx4:
3648 case Iop_QAdd32Ux4:
3649 case Iop_QSub32Sx4:
3650 case Iop_QSub32Ux4:
3651 case Iop_QAddExtUSsatSS32x4:
3652 case Iop_QAddExtSUsatUU32x4:
3653 case Iop_QSal32x4:
3654 case Iop_QShl32x4:
3655 case Iop_Avg32Ux4:
3656 case Iop_Avg32Sx4:
3657 case Iop_Add32x4:
3658 case Iop_Max32Ux4:
3659 case Iop_Max32Sx4:
3660 case Iop_Min32Ux4:
3661 case Iop_Min32Sx4:
3662 case Iop_Mul32x4:
3663 case Iop_QDMulHi32Sx4:
3664 case Iop_QRDMulHi32Sx4:
3665 case Iop_PolynomialMulAdd32x4:
3666 return binary32Ix4(mce, vatom1, vatom2);
3668 case Iop_Sub64x2:
3669 case Iop_Add64x2:
3670 case Iop_Max64Sx2:
3671 case Iop_Max64Ux2:
3672 case Iop_Min64Sx2:
3673 case Iop_Min64Ux2:
3674 case Iop_CmpEQ64x2:
3675 case Iop_CmpGT64Sx2:
3676 case Iop_CmpGT64Ux2:
3677 case Iop_QSal64x2:
3678 case Iop_QShl64x2:
3679 case Iop_QAdd64Ux2:
3680 case Iop_QAdd64Sx2:
3681 case Iop_QSub64Ux2:
3682 case Iop_QSub64Sx2:
3683 case Iop_QAddExtUSsatSS64x2:
3684 case Iop_QAddExtSUsatUU64x2:
3685 case Iop_PolynomialMulAdd64x2:
3686 case Iop_CipherV128:
3687 case Iop_CipherLV128:
3688 case Iop_NCipherV128:
3689 case Iop_NCipherLV128:
3690 case Iop_MulI128by10E:
3691 case Iop_MulI128by10ECarry:
3692 return binary64Ix2(mce, vatom1, vatom2);
3694 case Iop_QNarrowBin64Sto32Sx4:
3695 case Iop_QNarrowBin64Uto32Ux4:
3696 case Iop_QNarrowBin32Sto16Sx8:
3697 case Iop_QNarrowBin32Uto16Ux8:
3698 case Iop_QNarrowBin32Sto16Ux8:
3699 case Iop_QNarrowBin16Sto8Sx16:
3700 case Iop_QNarrowBin16Uto8Ux16:
3701 case Iop_QNarrowBin16Sto8Ux16:
3702 return vectorNarrowBinV128(mce, op, vatom1, vatom2);
3704 case Iop_Min64Fx2:
3705 case Iop_Max64Fx2:
3706 case Iop_CmpLT64Fx2:
3707 case Iop_CmpLE64Fx2:
3708 case Iop_CmpEQ64Fx2:
3709 case Iop_CmpUN64Fx2:
3710 case Iop_RecipStep64Fx2:
3711 case Iop_RSqrtStep64Fx2:
3712 return binary64Fx2(mce, vatom1, vatom2);
3714 case Iop_Sub64F0x2:
3715 case Iop_Mul64F0x2:
3716 case Iop_Min64F0x2:
3717 case Iop_Max64F0x2:
3718 case Iop_Div64F0x2:
3719 case Iop_CmpLT64F0x2:
3720 case Iop_CmpLE64F0x2:
3721 case Iop_CmpEQ64F0x2:
3722 case Iop_CmpUN64F0x2:
3723 case Iop_Add64F0x2:
3724 return binary64F0x2(mce, vatom1, vatom2);
3726 case Iop_Min32Fx4:
3727 case Iop_Max32Fx4:
3728 case Iop_CmpLT32Fx4:
3729 case Iop_CmpLE32Fx4:
3730 case Iop_CmpEQ32Fx4:
3731 case Iop_CmpUN32Fx4:
3732 case Iop_CmpGT32Fx4:
3733 case Iop_CmpGE32Fx4:
3734 case Iop_RecipStep32Fx4:
3735 case Iop_RSqrtStep32Fx4:
3736 return binary32Fx4(mce, vatom1, vatom2);
3738 case Iop_Sub32Fx2:
3739 case Iop_Mul32Fx2:
3740 case Iop_Min32Fx2:
3741 case Iop_Max32Fx2:
3742 case Iop_CmpEQ32Fx2:
3743 case Iop_CmpGT32Fx2:
3744 case Iop_CmpGE32Fx2:
3745 case Iop_Add32Fx2:
3746 case Iop_RecipStep32Fx2:
3747 case Iop_RSqrtStep32Fx2:
3748 return binary32Fx2(mce, vatom1, vatom2);
3750 case Iop_Sub32F0x4:
3751 case Iop_Mul32F0x4:
3752 case Iop_Min32F0x4:
3753 case Iop_Max32F0x4:
3754 case Iop_Div32F0x4:
3755 case Iop_CmpLT32F0x4:
3756 case Iop_CmpLE32F0x4:
3757 case Iop_CmpEQ32F0x4:
3758 case Iop_CmpUN32F0x4:
3759 case Iop_Add32F0x4:
3760 return binary32F0x4(mce, vatom1, vatom2);
3762 case Iop_QShlNsatSU8x16:
3763 case Iop_QShlNsatUU8x16:
3764 case Iop_QShlNsatSS8x16:
3765 complainIfUndefined(mce, atom2, NULL);
3766 return mkPCast8x16(mce, vatom1);
3768 case Iop_QShlNsatSU16x8:
3769 case Iop_QShlNsatUU16x8:
3770 case Iop_QShlNsatSS16x8:
3771 complainIfUndefined(mce, atom2, NULL);
3772 return mkPCast16x8(mce, vatom1);
3774 case Iop_QShlNsatSU32x4:
3775 case Iop_QShlNsatUU32x4:
3776 case Iop_QShlNsatSS32x4:
3777 complainIfUndefined(mce, atom2, NULL);
3778 return mkPCast32x4(mce, vatom1);
3780 case Iop_QShlNsatSU64x2:
3781 case Iop_QShlNsatUU64x2:
3782 case Iop_QShlNsatSS64x2:
3783 complainIfUndefined(mce, atom2, NULL);
3784 return mkPCast32x4(mce, vatom1);
3786 /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128.
3787 To make this simpler, do the following:
3788 * complain if the shift amount (the I8) is undefined
3789 * pcast each lane at the wide width
3790 * truncate each lane to half width
3791 * pcast the resulting 64-bit value to a single bit and use
3792 that as the least significant bit of the upper half of the
3793 result. */
3794 case Iop_QandQShrNnarrow64Uto32Ux2:
3795 case Iop_QandQSarNnarrow64Sto32Sx2:
3796 case Iop_QandQSarNnarrow64Sto32Ux2:
3797 case Iop_QandQRShrNnarrow64Uto32Ux2:
3798 case Iop_QandQRSarNnarrow64Sto32Sx2:
3799 case Iop_QandQRSarNnarrow64Sto32Ux2:
3800 case Iop_QandQShrNnarrow32Uto16Ux4:
3801 case Iop_QandQSarNnarrow32Sto16Sx4:
3802 case Iop_QandQSarNnarrow32Sto16Ux4:
3803 case Iop_QandQRShrNnarrow32Uto16Ux4:
3804 case Iop_QandQRSarNnarrow32Sto16Sx4:
3805 case Iop_QandQRSarNnarrow32Sto16Ux4:
3806 case Iop_QandQShrNnarrow16Uto8Ux8:
3807 case Iop_QandQSarNnarrow16Sto8Sx8:
3808 case Iop_QandQSarNnarrow16Sto8Ux8:
3809 case Iop_QandQRShrNnarrow16Uto8Ux8:
3810 case Iop_QandQRSarNnarrow16Sto8Sx8:
3811 case Iop_QandQRSarNnarrow16Sto8Ux8:
3813 IRAtom* (*fnPessim) (MCEnv*, IRAtom*) = NULL;
3814 IROp opNarrow = Iop_INVALID;
3815 switch (op) {
3816 case Iop_QandQShrNnarrow64Uto32Ux2:
3817 case Iop_QandQSarNnarrow64Sto32Sx2:
3818 case Iop_QandQSarNnarrow64Sto32Ux2:
3819 case Iop_QandQRShrNnarrow64Uto32Ux2:
3820 case Iop_QandQRSarNnarrow64Sto32Sx2:
3821 case Iop_QandQRSarNnarrow64Sto32Ux2:
3822 fnPessim = mkPCast64x2;
3823 opNarrow = Iop_NarrowUn64to32x2;
3824 break;
3825 case Iop_QandQShrNnarrow32Uto16Ux4:
3826 case Iop_QandQSarNnarrow32Sto16Sx4:
3827 case Iop_QandQSarNnarrow32Sto16Ux4:
3828 case Iop_QandQRShrNnarrow32Uto16Ux4:
3829 case Iop_QandQRSarNnarrow32Sto16Sx4:
3830 case Iop_QandQRSarNnarrow32Sto16Ux4:
3831 fnPessim = mkPCast32x4;
3832 opNarrow = Iop_NarrowUn32to16x4;
3833 break;
3834 case Iop_QandQShrNnarrow16Uto8Ux8:
3835 case Iop_QandQSarNnarrow16Sto8Sx8:
3836 case Iop_QandQSarNnarrow16Sto8Ux8:
3837 case Iop_QandQRShrNnarrow16Uto8Ux8:
3838 case Iop_QandQRSarNnarrow16Sto8Sx8:
3839 case Iop_QandQRSarNnarrow16Sto8Ux8:
3840 fnPessim = mkPCast16x8;
3841 opNarrow = Iop_NarrowUn16to8x8;
3842 break;
3843 default:
3844 tl_assert(0);
3846 complainIfUndefined(mce, atom2, NULL);
3847 // Pessimised shift result
3848 IRAtom* shV
3849 = fnPessim(mce, vatom1);
3850 // Narrowed, pessimised shift result
3851 IRAtom* shVnarrowed
3852 = assignNew('V', mce, Ity_I64, unop(opNarrow, shV));
3853 // Generates: Def--(63)--Def PCast-to-I1(narrowed)
3854 IRAtom* qV = mkPCastXXtoXXlsb(mce, shVnarrowed, Ity_I64);
3855 // and assemble the result
3856 return assignNew('V', mce, Ity_V128,
3857 binop(Iop_64HLtoV128, qV, shVnarrowed));
3860 case Iop_Mull32Sx2:
3861 case Iop_Mull32Ux2:
3862 case Iop_QDMull32Sx2:
3863 return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3864 mkUifU64(mce, vatom1, vatom2));
3866 case Iop_Mull16Sx4:
3867 case Iop_Mull16Ux4:
3868 case Iop_QDMull16Sx4:
3869 return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3870 mkUifU64(mce, vatom1, vatom2));
3872 case Iop_Mull8Sx8:
3873 case Iop_Mull8Ux8:
3874 case Iop_PolynomialMull8x8:
3875 return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3876 mkUifU64(mce, vatom1, vatom2));
3878 case Iop_PwAdd32x4:
3879 return mkPCast32x4(mce,
3880 assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3881 mkPCast32x4(mce, vatom2))));
3883 case Iop_PwAdd16x8:
3884 return mkPCast16x8(mce,
3885 assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3886 mkPCast16x8(mce, vatom2))));
3888 case Iop_PwAdd8x16:
3889 return mkPCast8x16(mce,
3890 assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3891 mkPCast8x16(mce, vatom2))));
3893 /* V128-bit data-steering */
3894 case Iop_SetV128lo32:
3895 case Iop_SetV128lo64:
3896 case Iop_64HLtoV128:
3897 case Iop_InterleaveLO64x2:
3898 case Iop_InterleaveLO32x4:
3899 case Iop_InterleaveLO16x8:
3900 case Iop_InterleaveLO8x16:
3901 case Iop_InterleaveHI64x2:
3902 case Iop_InterleaveHI32x4:
3903 case Iop_InterleaveHI16x8:
3904 case Iop_InterleaveHI8x16:
3905 case Iop_CatOddLanes8x16:
3906 case Iop_CatOddLanes16x8:
3907 case Iop_CatOddLanes32x4:
3908 case Iop_CatEvenLanes8x16:
3909 case Iop_CatEvenLanes16x8:
3910 case Iop_CatEvenLanes32x4:
3911 case Iop_InterleaveOddLanes8x16:
3912 case Iop_InterleaveOddLanes16x8:
3913 case Iop_InterleaveOddLanes32x4:
3914 case Iop_InterleaveEvenLanes8x16:
3915 case Iop_InterleaveEvenLanes16x8:
3916 case Iop_InterleaveEvenLanes32x4:
3917 case Iop_PackOddLanes8x16:
3918 case Iop_PackOddLanes16x8:
3919 case Iop_PackOddLanes32x4:
3920 case Iop_PackEvenLanes8x16:
3921 case Iop_PackEvenLanes16x8:
3922 case Iop_PackEvenLanes32x4:
3923 return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
3925 case Iop_GetElem8x16:
3926 complainIfUndefined(mce, atom2, NULL);
3927 return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3928 case Iop_GetElem16x8:
3929 complainIfUndefined(mce, atom2, NULL);
3930 return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3931 case Iop_GetElem32x4:
3932 complainIfUndefined(mce, atom2, NULL);
3933 return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3934 case Iop_GetElem64x2:
3935 complainIfUndefined(mce, atom2, NULL);
3936 return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3938 /* Perm8x16: rearrange values in left arg using steering values
3939 from right arg. So rearrange the vbits in the same way but
3940 pessimise wrt steering values. Perm32x4 ditto. */
3941 case Iop_Perm8x16:
3942 return mkUifUV128(
3943 mce,
3944 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3945 mkPCast8x16(mce, vatom2)
3947 case Iop_Perm32x4:
3948 return mkUifUV128(
3949 mce,
3950 assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3951 mkPCast32x4(mce, vatom2)
3954 /* These two take the lower half of each 16-bit lane, sign/zero
3955 extend it to 32, and multiply together, producing a 32x4
3956 result (and implicitly ignoring half the operand bits). So
3957 treat it as a bunch of independent 16x8 operations, but then
3958 do 32-bit shifts left-right to copy the lower half results
3959 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3960 into the upper half of each result lane. */
3961 case Iop_MullEven16Ux8:
3962 case Iop_MullEven16Sx8: {
3963 IRAtom* at;
3964 at = binary16Ix8(mce,vatom1,vatom2);
3965 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3966 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
3967 return at;
3970 /* Same deal as Iop_MullEven16{S,U}x8 */
3971 case Iop_MullEven8Ux16:
3972 case Iop_MullEven8Sx16: {
3973 IRAtom* at;
3974 at = binary8Ix16(mce,vatom1,vatom2);
3975 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3976 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
3977 return at;
3980 /* Same deal as Iop_MullEven16{S,U}x8 */
3981 case Iop_MullEven32Ux4:
3982 case Iop_MullEven32Sx4: {
3983 IRAtom* at;
3984 at = binary32Ix4(mce,vatom1,vatom2);
3985 at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN64x2, at, mkU8(32)));
3986 at = assignNew('V', mce, Ity_V128, binop(Iop_SarN64x2, at, mkU8(32)));
3987 return at;
3990 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3991 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3992 Simply apply same op to the V bits, since this really no more
3993 than a data steering operation. */
3994 case Iop_NarrowBin32to16x8:
3995 case Iop_NarrowBin16to8x16:
3996 case Iop_NarrowBin64to32x4:
3997 return assignNew('V', mce, Ity_V128,
3998 binop(op, vatom1, vatom2));
4000 case Iop_ShrV128:
4001 case Iop_ShlV128:
4002 case Iop_I128StoBCD128:
4003 /* Same scheme as with all other shifts. Note: 10 Nov 05:
4004 this is wrong now, scalar shifts are done properly lazily.
4005 Vector shifts should be fixed too. */
4006 complainIfUndefined(mce, atom2, NULL);
4007 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
4009 case Iop_BCDAdd:
4010 case Iop_BCDSub:
4011 return mkLazy2(mce, Ity_V128, vatom1, vatom2);
4013 /* SHA Iops */
4014 case Iop_SHA256:
4015 case Iop_SHA512:
4016 complainIfUndefined(mce, atom2, NULL);
4017 return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
4019 /* I128-bit data-steering */
4020 case Iop_64HLto128:
4021 return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
4023 /* V256-bit SIMD */
4025 case Iop_Max64Fx4:
4026 case Iop_Min64Fx4:
4027 return binary64Fx4(mce, vatom1, vatom2);
4029 case Iop_Max32Fx8:
4030 case Iop_Min32Fx8:
4031 return binary32Fx8(mce, vatom1, vatom2);
4033 /* V256-bit data-steering */
4034 case Iop_V128HLtoV256:
4035 return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
4037 /* Scalar floating point */
4039 case Iop_F32toI64S:
4040 case Iop_F32toI64U:
4041 /* I32(rm) x F32 -> I64 */
4042 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4044 case Iop_I64StoF32:
4045 /* I32(rm) x I64 -> F32 */
4046 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4048 case Iop_RoundF64toInt:
4049 case Iop_RoundF64toF32:
4050 case Iop_F64toI64S:
4051 case Iop_F64toI64U:
4052 case Iop_I64StoF64:
4053 case Iop_I64UtoF64:
4054 case Iop_SinF64:
4055 case Iop_CosF64:
4056 case Iop_TanF64:
4057 case Iop_2xm1F64:
4058 case Iop_SqrtF64:
4059 case Iop_RecpExpF64:
4060 /* I32(rm) x I64/F64 -> I64/F64 */
4061 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4063 case Iop_ShlD64:
4064 case Iop_ShrD64:
4065 case Iop_RoundD64toInt:
4066 /* I32(rm) x D64 -> D64 */
4067 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4069 case Iop_ShlD128:
4070 case Iop_ShrD128:
4071 case Iop_RoundD128toInt:
4072 /* I32(rm) x D128 -> D128 */
4073 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4075 case Iop_RoundF128toInt:
4076 /* I32(rm) x F128 -> F128 */
4077 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4079 case Iop_D64toI64S:
4080 case Iop_D64toI64U:
4081 case Iop_I64StoD64:
4082 case Iop_I64UtoD64:
4083 /* I32(rm) x I64/D64 -> D64/I64 */
4084 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4086 case Iop_F32toD32:
4087 case Iop_F64toD32:
4088 case Iop_F128toD32:
4089 case Iop_D32toF32:
4090 case Iop_D64toF32:
4091 case Iop_D128toF32:
4092 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
4093 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4095 case Iop_F32toD64:
4096 case Iop_F64toD64:
4097 case Iop_F128toD64:
4098 case Iop_D32toF64:
4099 case Iop_D64toF64:
4100 case Iop_D128toF64:
4101 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
4102 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4104 case Iop_F32toD128:
4105 case Iop_F64toD128:
4106 case Iop_F128toD128:
4107 case Iop_D32toF128:
4108 case Iop_D64toF128:
4109 case Iop_D128toF128:
4110 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
4111 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4113 case Iop_RoundF32toInt:
4114 case Iop_SqrtF32:
4115 case Iop_RecpExpF32:
4116 /* I32(rm) x I32/F32 -> I32/F32 */
4117 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4119 case Iop_SqrtF128:
4120 /* I32(rm) x F128 -> F128 */
4121 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4123 case Iop_I32StoF32:
4124 case Iop_I32UtoF32:
4125 case Iop_F32toI32S:
4126 case Iop_F32toI32U:
4127 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
4128 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4130 case Iop_F64toF16:
4131 case Iop_F32toF16:
4132 /* First arg is I32 (rounding mode), second is F64/F32 (data). */
4133 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
4135 case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32 */
4136 case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
4137 case Iop_F128toF32: /* IRRoundingMode(I32) x F128 -> F32 */
4138 case Iop_D128toI32S: /* IRRoundingMode(I32) x D128 -> signed I32 */
4139 case Iop_D128toI32U: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
4140 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4142 case Iop_F128toI128S: /* IRRoundingMode(I32) x F128 -> signed I128 */
4143 case Iop_RndF128: /* IRRoundingMode(I32) x F128 -> F128 */
4144 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4146 case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64 */
4147 case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
4148 case Iop_F128toF64: /* IRRoundingMode(I32) x F128 -> F64 */
4149 case Iop_D128toD64: /* IRRoundingMode(I64) x D128 -> D64 */
4150 case Iop_D128toI64S: /* IRRoundingMode(I64) x D128 -> signed I64 */
4151 case Iop_D128toI64U: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
4152 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4154 case Iop_F64HLtoF128:
4155 case Iop_D64HLtoD128:
4156 return assignNew('V', mce, Ity_I128,
4157 binop(Iop_64HLto128, vatom1, vatom2));
4159 case Iop_F64toI32U:
4160 case Iop_F64toI32S:
4161 case Iop_F64toF32:
4162 case Iop_I64UtoF32:
4163 case Iop_D64toI32U:
4164 case Iop_D64toI32S:
4165 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
4166 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4168 case Iop_D64toD32:
4169 /* First arg is I32 (rounding mode), second is D64 (data). */
4170 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4172 case Iop_F64toI16S:
4173 /* First arg is I32 (rounding mode), second is F64 (data). */
4174 return mkLazy2(mce, Ity_I16, vatom1, vatom2);
4176 case Iop_InsertExpD64:
4177 /* I64 x I64 -> D64 */
4178 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4180 case Iop_InsertExpD128:
4181 /* I64 x I128 -> D128 */
4182 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4184 case Iop_CmpF32:
4185 case Iop_CmpF64:
4186 case Iop_CmpF128:
4187 case Iop_CmpD64:
4188 case Iop_CmpD128:
4189 case Iop_CmpExpD64:
4190 case Iop_CmpExpD128:
4191 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4193 case Iop_MaxNumF32:
4194 case Iop_MinNumF32:
4195 /* F32 x F32 -> F32 */
4196 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4198 case Iop_MaxNumF64:
4199 case Iop_MinNumF64:
4200 /* F64 x F64 -> F64 */
4201 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4203 /* non-FP after here */
4205 case Iop_DivModU64to32:
4206 case Iop_DivModS64to32:
4207 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4209 case Iop_DivModU128to64:
4210 case Iop_DivModS128to64:
4211 return mkLazy2(mce, Ity_I128, vatom1, vatom2);
4213 case Iop_8HLto16:
4214 return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
4215 case Iop_16HLto32:
4216 return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
4217 case Iop_32HLto64:
4218 return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
4220 case Iop_DivModU64to64:
4221 case Iop_DivModS64to64: {
4222 IRAtom* vTmp64 = mkLazy2(mce, Ity_I64, vatom1, vatom2);
4223 return assignNew('V', mce, Ity_I128,
4224 binop(Iop_64HLto128, vTmp64, vTmp64));
4227 case Iop_MullS64:
4228 case Iop_MullU64: {
4229 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
4230 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
4231 return assignNew('V', mce, Ity_I128,
4232 binop(Iop_64HLto128, vHi64, vLo64));
4235 case Iop_DivModU32to32:
4236 case Iop_DivModS32to32: {
4237 IRAtom* vTmp32 = mkLazy2(mce, Ity_I32, vatom1, vatom2);
4238 return assignNew('V', mce, Ity_I64,
4239 binop(Iop_32HLto64, vTmp32, vTmp32));
4242 case Iop_MullS32:
4243 case Iop_MullU32: {
4244 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
4245 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
4246 return assignNew('V', mce, Ity_I64,
4247 binop(Iop_32HLto64, vHi32, vLo32));
4250 case Iop_MullS16:
4251 case Iop_MullU16: {
4252 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
4253 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
4254 return assignNew('V', mce, Ity_I32,
4255 binop(Iop_16HLto32, vHi16, vLo16));
4258 case Iop_MullS8:
4259 case Iop_MullU8: {
4260 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
4261 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
4262 return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
4265 case Iop_Sad8Ux4: /* maybe we could do better? ftm, do mkLazy2. */
4266 case Iop_DivS32:
4267 case Iop_DivU32:
4268 case Iop_DivU32E:
4269 case Iop_DivS32E:
4270 case Iop_QAdd32S: /* could probably do better */
4271 case Iop_QSub32S: /* could probably do better */
4272 return mkLazy2(mce, Ity_I32, vatom1, vatom2);
4274 case Iop_DivS64:
4275 case Iop_DivU64:
4276 case Iop_DivS64E:
4277 case Iop_DivU64E:
4278 return mkLazy2(mce, Ity_I64, vatom1, vatom2);
4280 case Iop_Add32:
4281 if (mce->dlbo.dl_Add32 == DLexpensive
4282 || (mce->dlbo.dl_Add32 == DLauto && hu == HuOth)) {
4283 return expensiveAddSub(mce,True,Ity_I32,
4284 vatom1,vatom2, atom1,atom2);
4285 } else {
4286 goto cheap_AddSub32;
4288 case Iop_Sub32:
4289 if (mce->dlbo.dl_Sub32 == DLexpensive
4290 || (mce->dlbo.dl_Sub32 == DLauto && hu == HuOth)) {
4291 return expensiveAddSub(mce,False,Ity_I32,
4292 vatom1,vatom2, atom1,atom2);
4293 } else {
4294 goto cheap_AddSub32;
4297 cheap_AddSub32:
4298 case Iop_Mul32:
4299 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
4301 case Iop_CmpORD32S:
4302 case Iop_CmpORD32U:
4303 case Iop_CmpORD64S:
4304 case Iop_CmpORD64U:
4305 return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
4307 case Iop_Add64:
4308 if (mce->dlbo.dl_Add64 == DLexpensive
4309 || (mce->dlbo.dl_Add64 == DLauto && hu == HuOth)) {
4310 return expensiveAddSub(mce,True,Ity_I64,
4311 vatom1,vatom2, atom1,atom2);
4312 } else {
4313 goto cheap_AddSub64;
4315 case Iop_Sub64:
4316 if (mce->dlbo.dl_Sub64 == DLexpensive
4317 || (mce->dlbo.dl_Sub64 == DLauto && hu == HuOth)) {
4318 return expensiveAddSub(mce,False,Ity_I64,
4319 vatom1,vatom2, atom1,atom2);
4320 } else {
4321 goto cheap_AddSub64;
4324 cheap_AddSub64:
4325 case Iop_Mul64:
4326 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
4328 case Iop_Mul16:
4329 case Iop_Add16:
4330 case Iop_Sub16:
4331 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
4333 case Iop_Mul8:
4334 case Iop_Sub8:
4335 case Iop_Add8:
4336 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
4338 ////---- CmpXX64
4339 case Iop_CmpEQ64: case Iop_CmpNE64:
4340 if (mce->dlbo.dl_CmpEQ64_CmpNE64 == DLexpensive)
4341 goto expensive_cmp64;
4342 else
4343 goto cheap_cmp64;
4345 expensive_cmp64:
4346 case Iop_ExpCmpNE64:
4347 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
4349 cheap_cmp64:
4350 case Iop_CmpLE64S: case Iop_CmpLE64U:
4351 case Iop_CmpLT64U: case Iop_CmpLT64S:
4352 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
4354 ////---- CmpXX32
4355 case Iop_CmpEQ32: case Iop_CmpNE32:
4356 if (mce->dlbo.dl_CmpEQ32_CmpNE32 == DLexpensive)
4357 goto expensive_cmp32;
4358 else
4359 goto cheap_cmp32;
4361 expensive_cmp32:
4362 case Iop_ExpCmpNE32:
4363 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
4365 cheap_cmp32:
4366 case Iop_CmpLE32S: case Iop_CmpLE32U:
4367 case Iop_CmpLT32U: case Iop_CmpLT32S:
4368 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
4370 ////---- CmpXX16
4371 case Iop_CmpEQ16: case Iop_CmpNE16:
4372 if (mce->dlbo.dl_CmpEQ16_CmpNE16 == DLexpensive)
4373 goto expensive_cmp16;
4374 else
4375 goto cheap_cmp16;
4377 expensive_cmp16:
4378 case Iop_ExpCmpNE16:
4379 return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
4381 cheap_cmp16:
4382 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
4384 ////---- CmpXX8
4385 case Iop_CmpEQ8: case Iop_CmpNE8:
4386 if (mce->dlbo.dl_CmpEQ8_CmpNE8 == DLexpensive)
4387 goto expensive_cmp8;
4388 else
4389 goto cheap_cmp8;
4391 expensive_cmp8:
4392 return expensiveCmpEQorNE(mce,Ity_I8, vatom1,vatom2, atom1,atom2 );
4394 cheap_cmp8:
4395 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
4397 ////---- end CmpXX{64,32,16,8}
4399 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
4400 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
4401 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
4402 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
4403 /* Just say these all produce a defined result, regardless
4404 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
4405 return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
4407 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
4408 return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
4410 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
4411 return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
4413 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
4414 return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
4416 case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
4417 return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
4419 case Iop_AndV256:
4420 uifu = mkUifUV256; difd = mkDifDV256;
4421 and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
4422 case Iop_AndV128:
4423 uifu = mkUifUV128; difd = mkDifDV128;
4424 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
4425 case Iop_And64:
4426 uifu = mkUifU64; difd = mkDifD64;
4427 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
4428 case Iop_And32:
4429 uifu = mkUifU32; difd = mkDifD32;
4430 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
4431 case Iop_And16:
4432 uifu = mkUifU16; difd = mkDifD16;
4433 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
4434 case Iop_And8:
4435 uifu = mkUifU8; difd = mkDifD8;
4436 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
4438 case Iop_OrV256:
4439 uifu = mkUifUV256; difd = mkDifDV256;
4440 and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
4441 case Iop_OrV128:
4442 uifu = mkUifUV128; difd = mkDifDV128;
4443 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
4444 case Iop_Or64:
4445 uifu = mkUifU64; difd = mkDifD64;
4446 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
4447 case Iop_Or32:
4448 uifu = mkUifU32; difd = mkDifD32;
4449 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
4450 case Iop_Or16:
4451 uifu = mkUifU16; difd = mkDifD16;
4452 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
4453 case Iop_Or8:
4454 uifu = mkUifU8; difd = mkDifD8;
4455 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
4457 do_And_Or:
4458 return
4459 assignNew(
4460 'V', mce,
4461 and_or_ty,
4462 difd(mce, uifu(mce, vatom1, vatom2),
4463 difd(mce, improve(mce, atom1, vatom1),
4464 improve(mce, atom2, vatom2) ) ) );
4466 case Iop_Xor8:
4467 return mkUifU8(mce, vatom1, vatom2);
4468 case Iop_Xor16:
4469 return mkUifU16(mce, vatom1, vatom2);
4470 case Iop_Xor32:
4471 return mkUifU32(mce, vatom1, vatom2);
4472 case Iop_Xor64:
4473 return mkUifU64(mce, vatom1, vatom2);
4474 case Iop_XorV128:
4475 return mkUifUV128(mce, vatom1, vatom2);
4476 case Iop_XorV256:
4477 return mkUifUV256(mce, vatom1, vatom2);
4479 /* V256-bit SIMD */
4481 case Iop_ShrN16x16:
4482 case Iop_ShrN32x8:
4483 case Iop_ShrN64x4:
4484 case Iop_SarN16x16:
4485 case Iop_SarN32x8:
4486 case Iop_ShlN16x16:
4487 case Iop_ShlN32x8:
4488 case Iop_ShlN64x4:
4489 /* Same scheme as with all other shifts. Note: 22 Oct 05:
4490 this is wrong now, scalar shifts are done properly lazily.
4491 Vector shifts should be fixed too. */
4492 complainIfUndefined(mce, atom2, NULL);
4493 return assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2));
4495 case Iop_QSub8Ux32:
4496 case Iop_QSub8Sx32:
4497 case Iop_Sub8x32:
4498 case Iop_Min8Ux32:
4499 case Iop_Min8Sx32:
4500 case Iop_Max8Ux32:
4501 case Iop_Max8Sx32:
4502 case Iop_CmpGT8Sx32:
4503 case Iop_CmpEQ8x32:
4504 case Iop_Avg8Ux32:
4505 case Iop_QAdd8Ux32:
4506 case Iop_QAdd8Sx32:
4507 case Iop_Add8x32:
4508 return binary8Ix32(mce, vatom1, vatom2);
4510 case Iop_QSub16Ux16:
4511 case Iop_QSub16Sx16:
4512 case Iop_Sub16x16:
4513 case Iop_Mul16x16:
4514 case Iop_MulHi16Sx16:
4515 case Iop_MulHi16Ux16:
4516 case Iop_Min16Sx16:
4517 case Iop_Min16Ux16:
4518 case Iop_Max16Sx16:
4519 case Iop_Max16Ux16:
4520 case Iop_CmpGT16Sx16:
4521 case Iop_CmpEQ16x16:
4522 case Iop_Avg16Ux16:
4523 case Iop_QAdd16Ux16:
4524 case Iop_QAdd16Sx16:
4525 case Iop_Add16x16:
4526 return binary16Ix16(mce, vatom1, vatom2);
4528 case Iop_Sub32x8:
4529 case Iop_CmpGT32Sx8:
4530 case Iop_CmpEQ32x8:
4531 case Iop_Add32x8:
4532 case Iop_Max32Ux8:
4533 case Iop_Max32Sx8:
4534 case Iop_Min32Ux8:
4535 case Iop_Min32Sx8:
4536 case Iop_Mul32x8:
4537 return binary32Ix8(mce, vatom1, vatom2);
4539 case Iop_Sub64x4:
4540 case Iop_Add64x4:
4541 case Iop_CmpEQ64x4:
4542 case Iop_CmpGT64Sx4:
4543 return binary64Ix4(mce, vatom1, vatom2);
4545 /* Perm32x8: rearrange values in left arg using steering values
4546 from right arg. So rearrange the vbits in the same way but
4547 pessimise wrt steering values. */
4548 case Iop_Perm32x8:
4549 return mkUifUV256(
4550 mce,
4551 assignNew('V', mce, Ity_V256, binop(op, vatom1, atom2)),
4552 mkPCast32x8(mce, vatom2)
4555 /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256.
4556 Handle the shifted results in the same way that other
4557 binary Q ops are handled, eg QSub: UifU the two args,
4558 then pessimise -- which is binaryNIxM. But for the upper
4559 V128, we require to generate just 1 bit which is the
4560 pessimised shift result, with 127 defined zeroes above it.
4562 Note that this overly pessimistic in that in fact only the
4563 bottom 8 bits of each lane of the second arg determine the shift
4564 amount. Really we ought to ignore any undefinedness in the
4565 rest of the lanes of the second arg. */
4566 case Iop_QandSQsh64x2: case Iop_QandUQsh64x2:
4567 case Iop_QandSQRsh64x2: case Iop_QandUQRsh64x2:
4568 case Iop_QandSQsh32x4: case Iop_QandUQsh32x4:
4569 case Iop_QandSQRsh32x4: case Iop_QandUQRsh32x4:
4570 case Iop_QandSQsh16x8: case Iop_QandUQsh16x8:
4571 case Iop_QandSQRsh16x8: case Iop_QandUQRsh16x8:
4572 case Iop_QandSQsh8x16: case Iop_QandUQsh8x16:
4573 case Iop_QandSQRsh8x16: case Iop_QandUQRsh8x16:
4575 // The function to generate the pessimised shift result
4576 IRAtom* (*binaryNIxM)(MCEnv*,IRAtom*,IRAtom*) = NULL;
4577 switch (op) {
4578 case Iop_QandSQsh64x2:
4579 case Iop_QandUQsh64x2:
4580 case Iop_QandSQRsh64x2:
4581 case Iop_QandUQRsh64x2:
4582 binaryNIxM = binary64Ix2;
4583 break;
4584 case Iop_QandSQsh32x4:
4585 case Iop_QandUQsh32x4:
4586 case Iop_QandSQRsh32x4:
4587 case Iop_QandUQRsh32x4:
4588 binaryNIxM = binary32Ix4;
4589 break;
4590 case Iop_QandSQsh16x8:
4591 case Iop_QandUQsh16x8:
4592 case Iop_QandSQRsh16x8:
4593 case Iop_QandUQRsh16x8:
4594 binaryNIxM = binary16Ix8;
4595 break;
4596 case Iop_QandSQsh8x16:
4597 case Iop_QandUQsh8x16:
4598 case Iop_QandSQRsh8x16:
4599 case Iop_QandUQRsh8x16:
4600 binaryNIxM = binary8Ix16;
4601 break;
4602 default:
4603 tl_assert(0);
4605 tl_assert(binaryNIxM);
4606 // Pessimised shift result, shV[127:0]
4607 IRAtom* shV = binaryNIxM(mce, vatom1, vatom2);
4608 // Generates: Def--(127)--Def PCast-to-I1(shV)
4609 IRAtom* qV = mkPCastXXtoXXlsb(mce, shV, Ity_V128);
4610 // and assemble the result
4611 return assignNew('V', mce, Ity_V256,
4612 binop(Iop_V128HLtoV256, qV, shV));
4615 default:
4616 ppIROp(op);
4617 VG_(tool_panic)("memcheck:expr2vbits_Binop");
4622 static
4623 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
4625 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
4626 selection of shadow operation implicitly duplicates the logic in
4627 do_shadow_LoadG and should be kept in sync (in the very unlikely
4628 event that the interpretation of such widening ops changes in
4629 future). See comment in do_shadow_LoadG. */
4630 IRAtom* vatom = expr2vbits( mce, atom, HuOth );
4631 tl_assert(isOriginalAtom(mce,atom));
4632 switch (op) {
4634 case Iop_Abs64Fx2:
4635 case Iop_Neg64Fx2:
4636 case Iop_RSqrtEst64Fx2:
4637 case Iop_RecipEst64Fx2:
4638 case Iop_Log2_64Fx2:
4639 return unary64Fx2(mce, vatom);
4641 case Iop_Sqrt64F0x2:
4642 return unary64F0x2(mce, vatom);
4644 case Iop_Sqrt32Fx8:
4645 case Iop_RSqrtEst32Fx8:
4646 case Iop_RecipEst32Fx8:
4647 return unary32Fx8(mce, vatom);
4649 case Iop_Sqrt64Fx4:
4650 return unary64Fx4(mce, vatom);
4652 case Iop_RecipEst32Fx4:
4653 case Iop_I32UtoFx4:
4654 case Iop_I32StoFx4:
4655 case Iop_QFtoI32Ux4_RZ:
4656 case Iop_QFtoI32Sx4_RZ:
4657 case Iop_RoundF32x4_RM:
4658 case Iop_RoundF32x4_RP:
4659 case Iop_RoundF32x4_RN:
4660 case Iop_RoundF32x4_RZ:
4661 case Iop_RecipEst32Ux4:
4662 case Iop_Abs32Fx4:
4663 case Iop_Neg32Fx4:
4664 case Iop_RSqrtEst32Fx4:
4665 case Iop_Log2_32Fx4:
4666 return unary32Fx4(mce, vatom);
4668 case Iop_I32UtoFx2:
4669 case Iop_I32StoFx2:
4670 case Iop_RecipEst32Fx2:
4671 case Iop_RecipEst32Ux2:
4672 case Iop_Abs32Fx2:
4673 case Iop_Neg32Fx2:
4674 case Iop_RSqrtEst32Fx2:
4675 return unary32Fx2(mce, vatom);
4677 case Iop_Sqrt32F0x4:
4678 case Iop_RSqrtEst32F0x4:
4679 case Iop_RecipEst32F0x4:
4680 return unary32F0x4(mce, vatom);
4682 case Iop_32UtoV128:
4683 case Iop_64UtoV128:
4684 case Iop_Dup8x16:
4685 case Iop_Dup16x8:
4686 case Iop_Dup32x4:
4687 case Iop_Reverse1sIn8_x16:
4688 case Iop_Reverse8sIn16_x8:
4689 case Iop_Reverse8sIn32_x4:
4690 case Iop_Reverse16sIn32_x4:
4691 case Iop_Reverse8sIn64_x2:
4692 case Iop_Reverse16sIn64_x2:
4693 case Iop_Reverse32sIn64_x2:
4694 case Iop_V256toV128_1: case Iop_V256toV128_0:
4695 case Iop_ZeroHI64ofV128:
4696 case Iop_ZeroHI96ofV128:
4697 case Iop_ZeroHI112ofV128:
4698 case Iop_ZeroHI120ofV128:
4699 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4701 case Iop_F128HItoF64: /* F128 -> high half of F128 */
4702 case Iop_D128HItoD64: /* D128 -> high half of D128 */
4703 return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
4704 case Iop_F128LOtoF64: /* F128 -> low half of F128 */
4705 case Iop_D128LOtoD64: /* D128 -> low half of D128 */
4706 return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
4708 case Iop_NegF128:
4709 case Iop_AbsF128:
4710 case Iop_RndF128:
4711 case Iop_TruncF128toI64S: /* F128 -> I64S */
4712 case Iop_TruncF128toI32S: /* F128 -> I32S (result stored in 64-bits) */
4713 case Iop_TruncF128toI64U: /* F128 -> I64U */
4714 case Iop_TruncF128toI32U: /* F128 -> I32U (result stored in 64-bits) */
4715 return mkPCastTo(mce, Ity_I128, vatom);
4717 case Iop_BCD128toI128S:
4718 case Iop_MulI128by10:
4719 case Iop_MulI128by10Carry:
4720 case Iop_F16toF64x2:
4721 case Iop_F64toF16x2:
4722 return vatom;
4724 case Iop_I32StoF128: /* signed I32 -> F128 */
4725 case Iop_I64StoF128: /* signed I64 -> F128 */
4726 case Iop_I32UtoF128: /* unsigned I32 -> F128 */
4727 case Iop_I64UtoF128: /* unsigned I64 -> F128 */
4728 case Iop_F32toF128: /* F32 -> F128 */
4729 case Iop_F64toF128: /* F64 -> F128 */
4730 case Iop_I32StoD128: /* signed I64 -> D128 */
4731 case Iop_I64StoD128: /* signed I64 -> D128 */
4732 case Iop_I32UtoD128: /* unsigned I32 -> D128 */
4733 case Iop_I64UtoD128: /* unsigned I64 -> D128 */
4734 return mkPCastTo(mce, Ity_I128, vatom);
4736 case Iop_F16toF64:
4737 case Iop_F32toF64:
4738 case Iop_I32StoF64:
4739 case Iop_I32UtoF64:
4740 case Iop_NegF64:
4741 case Iop_AbsF64:
4742 case Iop_RSqrtEst5GoodF64:
4743 case Iop_RoundF64toF64_NEAREST:
4744 case Iop_RoundF64toF64_NegINF:
4745 case Iop_RoundF64toF64_PosINF:
4746 case Iop_RoundF64toF64_ZERO:
4747 case Iop_Clz64:
4748 case Iop_D32toD64:
4749 case Iop_I32StoD64:
4750 case Iop_I32UtoD64:
4751 case Iop_ExtractExpD64: /* D64 -> I64 */
4752 case Iop_ExtractExpD128: /* D128 -> I64 */
4753 case Iop_ExtractSigD64: /* D64 -> I64 */
4754 case Iop_ExtractSigD128: /* D128 -> I64 */
4755 case Iop_DPBtoBCD:
4756 case Iop_BCDtoDPB:
4757 return mkPCastTo(mce, Ity_I64, vatom);
4759 case Iop_D64toD128:
4760 return mkPCastTo(mce, Ity_I128, vatom);
4762 case Iop_Clz32:
4763 case Iop_TruncF64asF32:
4764 case Iop_NegF32:
4765 case Iop_AbsF32:
4766 case Iop_F16toF32:
4767 return mkPCastTo(mce, Ity_I32, vatom);
4769 case Iop_Ctz32:
4770 case Iop_Ctz64:
4771 return expensiveCountTrailingZeroes(mce, op, atom, vatom);
4773 case Iop_1Uto64:
4774 case Iop_1Sto64:
4775 case Iop_8Uto64:
4776 case Iop_8Sto64:
4777 case Iop_16Uto64:
4778 case Iop_16Sto64:
4779 case Iop_32Sto64:
4780 case Iop_32Uto64:
4781 case Iop_V128to64:
4782 case Iop_V128HIto64:
4783 case Iop_128HIto64:
4784 case Iop_128to64:
4785 case Iop_Dup8x8:
4786 case Iop_Dup16x4:
4787 case Iop_Dup32x2:
4788 case Iop_Reverse8sIn16_x4:
4789 case Iop_Reverse8sIn32_x2:
4790 case Iop_Reverse16sIn32_x2:
4791 case Iop_Reverse8sIn64_x1:
4792 case Iop_Reverse16sIn64_x1:
4793 case Iop_Reverse32sIn64_x1:
4794 case Iop_V256to64_0: case Iop_V256to64_1:
4795 case Iop_V256to64_2: case Iop_V256to64_3:
4796 return assignNew('V', mce, Ity_I64, unop(op, vatom));
4798 case Iop_64to32:
4799 case Iop_64HIto32:
4800 case Iop_1Uto32:
4801 case Iop_1Sto32:
4802 case Iop_8Uto32:
4803 case Iop_16Uto32:
4804 case Iop_16Sto32:
4805 case Iop_8Sto32:
4806 case Iop_V128to32:
4807 return assignNew('V', mce, Ity_I32, unop(op, vatom));
4809 case Iop_8Sto16:
4810 case Iop_8Uto16:
4811 case Iop_32to16:
4812 case Iop_32HIto16:
4813 case Iop_64to16:
4814 case Iop_GetMSBs8x16:
4815 return assignNew('V', mce, Ity_I16, unop(op, vatom));
4817 case Iop_1Uto8:
4818 case Iop_1Sto8:
4819 case Iop_16to8:
4820 case Iop_16HIto8:
4821 case Iop_32to8:
4822 case Iop_64to8:
4823 case Iop_GetMSBs8x8:
4824 return assignNew('V', mce, Ity_I8, unop(op, vatom));
4826 case Iop_32to1:
4827 return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
4829 case Iop_64to1:
4830 return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
4832 case Iop_ReinterpF64asI64:
4833 case Iop_ReinterpI64asF64:
4834 case Iop_ReinterpI32asF32:
4835 case Iop_ReinterpF32asI32:
4836 case Iop_ReinterpI64asD64:
4837 case Iop_ReinterpD64asI64:
4838 case Iop_NotV256:
4839 case Iop_NotV128:
4840 case Iop_Not64:
4841 case Iop_Not32:
4842 case Iop_Not16:
4843 case Iop_Not8:
4844 case Iop_Not1:
4845 return vatom;
4847 case Iop_CmpNEZ8x8:
4848 case Iop_Cnt8x8:
4849 case Iop_Clz8x8:
4850 case Iop_Cls8x8:
4851 case Iop_Abs8x8:
4852 return mkPCast8x8(mce, vatom);
4854 case Iop_CmpNEZ8x16:
4855 case Iop_Cnt8x16:
4856 case Iop_Clz8x16:
4857 case Iop_Cls8x16:
4858 case Iop_Abs8x16:
4859 case Iop_Ctz8x16:
4860 return mkPCast8x16(mce, vatom);
4862 case Iop_CmpNEZ16x4:
4863 case Iop_Clz16x4:
4864 case Iop_Cls16x4:
4865 case Iop_Abs16x4:
4866 return mkPCast16x4(mce, vatom);
4868 case Iop_CmpNEZ16x8:
4869 case Iop_Clz16x8:
4870 case Iop_Cls16x8:
4871 case Iop_Abs16x8:
4872 case Iop_Ctz16x8:
4873 return mkPCast16x8(mce, vatom);
4875 case Iop_CmpNEZ32x2:
4876 case Iop_Clz32x2:
4877 case Iop_Cls32x2:
4878 case Iop_FtoI32Ux2_RZ:
4879 case Iop_FtoI32Sx2_RZ:
4880 case Iop_Abs32x2:
4881 return mkPCast32x2(mce, vatom);
4883 case Iop_CmpNEZ32x4:
4884 case Iop_Clz32x4:
4885 case Iop_Cls32x4:
4886 case Iop_FtoI32Ux4_RZ:
4887 case Iop_FtoI32Sx4_RZ:
4888 case Iop_Abs32x4:
4889 case Iop_RSqrtEst32Ux4:
4890 case Iop_Ctz32x4:
4891 return mkPCast32x4(mce, vatom);
4893 case Iop_CmpwNEZ32:
4894 return mkPCastTo(mce, Ity_I32, vatom);
4896 case Iop_CmpwNEZ64:
4897 return mkPCastTo(mce, Ity_I64, vatom);
4899 case Iop_CmpNEZ64x2:
4900 case Iop_CipherSV128:
4901 case Iop_Clz64x2:
4902 case Iop_Abs64x2:
4903 case Iop_Ctz64x2:
4904 return mkPCast64x2(mce, vatom);
4906 case Iop_PwBitMtxXpose64x2:
4907 return assignNew('V', mce, Ity_V128, unop(op, vatom));
4909 case Iop_NarrowUn16to8x8:
4910 case Iop_NarrowUn32to16x4:
4911 case Iop_NarrowUn64to32x2:
4912 case Iop_QNarrowUn16Sto8Sx8:
4913 case Iop_QNarrowUn16Sto8Ux8:
4914 case Iop_QNarrowUn16Uto8Ux8:
4915 case Iop_QNarrowUn32Sto16Sx4:
4916 case Iop_QNarrowUn32Sto16Ux4:
4917 case Iop_QNarrowUn32Uto16Ux4:
4918 case Iop_QNarrowUn64Sto32Sx2:
4919 case Iop_QNarrowUn64Sto32Ux2:
4920 case Iop_QNarrowUn64Uto32Ux2:
4921 case Iop_F32toF16x4:
4922 return vectorNarrowUnV128(mce, op, vatom);
4924 case Iop_Widen8Sto16x8:
4925 case Iop_Widen8Uto16x8:
4926 case Iop_Widen16Sto32x4:
4927 case Iop_Widen16Uto32x4:
4928 case Iop_Widen32Sto64x2:
4929 case Iop_Widen32Uto64x2:
4930 case Iop_F16toF32x4:
4931 return vectorWidenI64(mce, op, vatom);
4933 case Iop_PwAddL32Ux2:
4934 case Iop_PwAddL32Sx2:
4935 return mkPCastTo(mce, Ity_I64,
4936 assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
4938 case Iop_PwAddL16Ux4:
4939 case Iop_PwAddL16Sx4:
4940 return mkPCast32x2(mce,
4941 assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
4943 case Iop_PwAddL8Ux8:
4944 case Iop_PwAddL8Sx8:
4945 return mkPCast16x4(mce,
4946 assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
4948 case Iop_PwAddL32Ux4:
4949 case Iop_PwAddL32Sx4:
4950 return mkPCast64x2(mce,
4951 assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
4953 case Iop_PwAddL16Ux8:
4954 case Iop_PwAddL16Sx8:
4955 return mkPCast32x4(mce,
4956 assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
4958 case Iop_PwAddL8Ux16:
4959 case Iop_PwAddL8Sx16:
4960 return mkPCast16x8(mce,
4961 assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
4963 case Iop_I64UtoF32:
4964 default:
4965 ppIROp(op);
4966 VG_(tool_panic)("memcheck:expr2vbits_Unop");
4971 /* Worker function -- do not call directly. See comments on
4972 expr2vbits_Load for the meaning of |guard|.
4974 Generates IR to (1) perform a definedness test of |addr|, (2)
4975 perform a validity test of |addr|, and (3) return the Vbits for the
4976 location indicated by |addr|. All of this only happens when
4977 |guard| is NULL or |guard| evaluates to True at run time.
4979 If |guard| evaluates to False at run time, the returned value is
4980 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
4981 performed.
4983 The definedness of |guard| itself is not checked. That is assumed
4984 to have been done before this point, by the caller. */
4985 static
4986 IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
4987 IREndness end, IRType ty,
4988 IRAtom* addr, UInt bias, IRAtom* guard )
4990 tl_assert(isOriginalAtom(mce,addr));
4991 tl_assert(end == Iend_LE || end == Iend_BE);
4993 /* First, emit a definedness test for the address. This also sets
4994 the address (shadow) to 'defined' following the test. */
4995 complainIfUndefined( mce, addr, guard );
4997 /* Now cook up a call to the relevant helper function, to read the
4998 data V bits from shadow memory. */
4999 ty = shadowTypeV(ty);
5001 void* helper = NULL;
5002 const HChar* hname = NULL;
5003 Bool ret_via_outparam = False;
5005 if (end == Iend_LE) {
5006 switch (ty) {
5007 case Ity_V256: helper = &MC_(helperc_LOADV256le);
5008 hname = "MC_(helperc_LOADV256le)";
5009 ret_via_outparam = True;
5010 break;
5011 case Ity_V128: helper = &MC_(helperc_LOADV128le);
5012 hname = "MC_(helperc_LOADV128le)";
5013 ret_via_outparam = True;
5014 break;
5015 case Ity_I64: helper = &MC_(helperc_LOADV64le);
5016 hname = "MC_(helperc_LOADV64le)";
5017 break;
5018 case Ity_I32: helper = &MC_(helperc_LOADV32le);
5019 hname = "MC_(helperc_LOADV32le)";
5020 break;
5021 case Ity_I16: helper = &MC_(helperc_LOADV16le);
5022 hname = "MC_(helperc_LOADV16le)";
5023 break;
5024 case Ity_I8: helper = &MC_(helperc_LOADV8);
5025 hname = "MC_(helperc_LOADV8)";
5026 break;
5027 default: ppIRType(ty);
5028 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(LE)");
5030 } else {
5031 switch (ty) {
5032 case Ity_V256: helper = &MC_(helperc_LOADV256be);
5033 hname = "MC_(helperc_LOADV256be)";
5034 ret_via_outparam = True;
5035 break;
5036 case Ity_V128: helper = &MC_(helperc_LOADV128be);
5037 hname = "MC_(helperc_LOADV128be)";
5038 ret_via_outparam = True;
5039 break;
5040 case Ity_I64: helper = &MC_(helperc_LOADV64be);
5041 hname = "MC_(helperc_LOADV64be)";
5042 break;
5043 case Ity_I32: helper = &MC_(helperc_LOADV32be);
5044 hname = "MC_(helperc_LOADV32be)";
5045 break;
5046 case Ity_I16: helper = &MC_(helperc_LOADV16be);
5047 hname = "MC_(helperc_LOADV16be)";
5048 break;
5049 case Ity_I8: helper = &MC_(helperc_LOADV8);
5050 hname = "MC_(helperc_LOADV8)";
5051 break;
5052 default: ppIRType(ty);
5053 VG_(tool_panic)("memcheck:expr2vbits_Load_WRK(BE)");
5057 tl_assert(helper);
5058 tl_assert(hname);
5060 /* Generate the actual address into addrAct. */
5061 IRAtom* addrAct;
5062 if (bias == 0) {
5063 addrAct = addr;
5064 } else {
5065 IROp mkAdd;
5066 IRAtom* eBias;
5067 IRType tyAddr = mce->hWordTy;
5068 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
5069 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
5070 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
5071 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
5074 /* We need to have a place to park the V bits we're just about to
5075 read. */
5076 IRTemp datavbits = newTemp(mce, ty, VSh);
5078 /* Here's the call. */
5079 IRDirty* di;
5080 if (ret_via_outparam) {
5081 di = unsafeIRDirty_1_N( datavbits,
5082 2/*regparms*/,
5083 hname, VG_(fnptr_to_fnentry)( helper ),
5084 mkIRExprVec_2( IRExpr_VECRET(), addrAct ) );
5085 } else {
5086 di = unsafeIRDirty_1_N( datavbits,
5087 1/*regparms*/,
5088 hname, VG_(fnptr_to_fnentry)( helper ),
5089 mkIRExprVec_1( addrAct ) );
5092 setHelperAnns( mce, di );
5093 if (guard) {
5094 di->guard = guard;
5095 /* Ideally the didn't-happen return value here would be all-ones
5096 (all-undefined), so it'd be obvious if it got used
5097 inadvertently. We can get by with the IR-mandated default
5098 value (0b01 repeating, 0x55 etc) as that'll still look pretty
5099 undefined if it ever leaks out. */
5101 stmt( 'V', mce, IRStmt_Dirty(di) );
5103 return mkexpr(datavbits);
5107 /* Generate IR to do a shadow load. The helper is expected to check
5108 the validity of the address and return the V bits for that address.
5109 This can optionally be controlled by a guard, which is assumed to
5110 be True if NULL. In the case where the guard is False at runtime,
5111 the helper will return the didn't-do-the-call value of 0x55..55.
5112 Since that means "completely undefined result", the caller of
5113 this function will need to fix up the result somehow in that
5114 case.
5116 Caller of this function is also expected to have checked the
5117 definedness of |guard| before this point.
5119 static
5120 IRAtom* expr2vbits_Load ( MCEnv* mce,
5121 IREndness end, IRType ty,
5122 IRAtom* addr, UInt bias,
5123 IRAtom* guard )
5125 tl_assert(end == Iend_LE || end == Iend_BE);
5126 switch (shadowTypeV(ty)) {
5127 case Ity_I8:
5128 case Ity_I16:
5129 case Ity_I32:
5130 case Ity_I64:
5131 case Ity_V128:
5132 case Ity_V256:
5133 return expr2vbits_Load_WRK(mce, end, ty, addr, bias, guard);
5134 default:
5135 VG_(tool_panic)("expr2vbits_Load");
5140 /* The most general handler for guarded loads. Assumes the
5141 definedness of GUARD has already been checked by the caller. A
5142 GUARD of NULL is assumed to mean "always True". Generates code to
5143 check the definedness and validity of ADDR.
5145 Generate IR to do a shadow load from ADDR and return the V bits.
5146 The loaded type is TY. The loaded data is then (shadow) widened by
5147 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
5148 evaluates to False at run time then the returned Vbits are simply
5149 VALT instead. Note therefore that the argument type of VWIDEN must
5150 be TY and the result type of VWIDEN must equal the type of VALT.
5152 static
5153 IRAtom* expr2vbits_Load_guarded_General ( MCEnv* mce,
5154 IREndness end, IRType ty,
5155 IRAtom* addr, UInt bias,
5156 IRAtom* guard,
5157 IROp vwiden, IRAtom* valt )
5159 /* Sanity check the conversion operation, and also set TYWIDE. */
5160 IRType tyWide = Ity_INVALID;
5161 switch (vwiden) {
5162 case Iop_INVALID:
5163 tyWide = ty;
5164 break;
5165 case Iop_16Uto32: case Iop_16Sto32: case Iop_8Uto32: case Iop_8Sto32:
5166 tyWide = Ity_I32;
5167 break;
5168 default:
5169 VG_(tool_panic)("memcheck:expr2vbits_Load_guarded_General");
5172 /* If the guard evaluates to True, this will hold the loaded V bits
5173 at TY. If the guard evaluates to False, this will be all
5174 ones, meaning "all undefined", in which case we will have to
5175 replace it using an ITE below. */
5176 IRAtom* iftrue1
5177 = assignNew('V', mce, ty,
5178 expr2vbits_Load(mce, end, ty, addr, bias, guard));
5179 /* Now (shadow-) widen the loaded V bits to the desired width. In
5180 the guard-is-False case, the allowable widening operators will
5181 in the worst case (unsigned widening) at least leave the
5182 pre-widened part as being marked all-undefined, and in the best
5183 case (signed widening) mark the whole widened result as
5184 undefined. Anyway, it doesn't matter really, since in this case
5185 we will replace said value with the default value |valt| using an
5186 ITE. */
5187 IRAtom* iftrue2
5188 = vwiden == Iop_INVALID
5189 ? iftrue1
5190 : assignNew('V', mce, tyWide, unop(vwiden, iftrue1));
5191 /* These are the V bits we will return if the load doesn't take
5192 place. */
5193 IRAtom* iffalse
5194 = valt;
5195 /* Prepare the cond for the ITE. Convert a NULL cond into
5196 something that iropt knows how to fold out later. */
5197 IRAtom* cond
5198 = guard == NULL ? mkU1(1) : guard;
5199 /* And assemble the final result. */
5200 return assignNew('V', mce, tyWide, IRExpr_ITE(cond, iftrue2, iffalse));
5204 /* A simpler handler for guarded loads, in which there is no
5205 conversion operation, and the default V bit return (when the guard
5206 evaluates to False at runtime) is "all defined". If there is no
5207 guard expression or the guard is always TRUE this function behaves
5208 like expr2vbits_Load. It is assumed that definedness of GUARD has
5209 already been checked at the call site. */
5210 static
5211 IRAtom* expr2vbits_Load_guarded_Simple ( MCEnv* mce,
5212 IREndness end, IRType ty,
5213 IRAtom* addr, UInt bias,
5214 IRAtom *guard )
5216 return expr2vbits_Load_guarded_General(
5217 mce, end, ty, addr, bias, guard, Iop_INVALID, definedOfType(ty)
5222 static
5223 IRAtom* expr2vbits_ITE ( MCEnv* mce,
5224 IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
5226 IRAtom *vbitsC, *vbits0, *vbits1;
5227 IRType ty;
5228 /* Given ITE(cond, iftrue, iffalse), generate
5229 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
5230 That is, steer the V bits like the originals, but trash the
5231 result if the steering value is undefined. This gives
5232 lazy propagation. */
5233 tl_assert(isOriginalAtom(mce, cond));
5234 tl_assert(isOriginalAtom(mce, iftrue));
5235 tl_assert(isOriginalAtom(mce, iffalse));
5237 vbitsC = expr2vbits(mce, cond, HuOth); // could we use HuPCa here?
5238 vbits1 = expr2vbits(mce, iftrue, HuOth);
5239 vbits0 = expr2vbits(mce, iffalse, HuOth);
5240 ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
5242 return
5243 mkUifU(mce, ty, assignNew('V', mce, ty,
5244 IRExpr_ITE(cond, vbits1, vbits0)),
5245 mkPCastTo(mce, ty, vbitsC) );
5248 /* --------- This is the main expression-handling function. --------- */
5250 static
5251 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e,
5252 HowUsed hu/*use HuOth if unknown*/ )
5254 switch (e->tag) {
5256 case Iex_Get:
5257 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
5259 case Iex_GetI:
5260 return shadow_GETI( mce, e->Iex.GetI.descr,
5261 e->Iex.GetI.ix, e->Iex.GetI.bias );
5263 case Iex_RdTmp:
5264 return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
5266 case Iex_Const:
5267 return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
5269 case Iex_Qop:
5270 return expr2vbits_Qop(
5271 mce,
5272 e->Iex.Qop.details->op,
5273 e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
5274 e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
5277 case Iex_Triop:
5278 return expr2vbits_Triop(
5279 mce,
5280 e->Iex.Triop.details->op,
5281 e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
5282 e->Iex.Triop.details->arg3
5285 case Iex_Binop:
5286 return expr2vbits_Binop(
5287 mce,
5288 e->Iex.Binop.op,
5289 e->Iex.Binop.arg1, e->Iex.Binop.arg2,
5293 case Iex_Unop:
5294 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
5296 case Iex_Load:
5297 return expr2vbits_Load( mce, e->Iex.Load.end,
5298 e->Iex.Load.ty,
5299 e->Iex.Load.addr, 0/*addr bias*/,
5300 NULL/* guard == "always True"*/ );
5302 case Iex_CCall:
5303 return mkLazyN( mce, e->Iex.CCall.args,
5304 e->Iex.CCall.retty,
5305 e->Iex.CCall.cee );
5307 case Iex_ITE:
5308 return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
5309 e->Iex.ITE.iffalse);
5311 default:
5312 VG_(printf)("\n");
5313 ppIRExpr(e);
5314 VG_(printf)("\n");
5315 VG_(tool_panic)("memcheck: expr2vbits");
5320 /*------------------------------------------------------------*/
5321 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
5322 /*------------------------------------------------------------*/
5324 /* Widen a value to the host word size. */
5326 static
5327 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
5329 IRType ty, tyH;
5331 /* vatom is vbits-value and as such can only have a shadow type. */
5332 tl_assert(isShadowAtom(mce,vatom));
5334 ty = typeOfIRExpr(mce->sb->tyenv, vatom);
5335 tyH = mce->hWordTy;
5337 if (tyH == Ity_I32) {
5338 switch (ty) {
5339 case Ity_I32:
5340 return vatom;
5341 case Ity_I16:
5342 return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
5343 case Ity_I8:
5344 return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
5345 default:
5346 goto unhandled;
5348 } else
5349 if (tyH == Ity_I64) {
5350 switch (ty) {
5351 case Ity_I32:
5352 return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
5353 case Ity_I16:
5354 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
5355 assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
5356 case Ity_I8:
5357 return assignNew('V', mce, tyH, unop(Iop_32Uto64,
5358 assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
5359 default:
5360 goto unhandled;
5362 } else {
5363 goto unhandled;
5365 unhandled:
5366 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
5367 VG_(tool_panic)("zwidenToHostWord");
5371 /* Generate a shadow store. |addr| is always the original address
5372 atom. You can pass in either originals or V-bits for the data
5373 atom, but obviously not both. This function generates a check for
5374 the definedness and (indirectly) the validity of |addr|, but only
5375 when |guard| evaluates to True at run time (or is NULL).
5377 |guard| :: Ity_I1 controls whether the store really happens; NULL
5378 means it unconditionally does. Note that |guard| itself is not
5379 checked for definedness; the caller of this function must do that
5380 if necessary.
5382 static
5383 void do_shadow_Store ( MCEnv* mce,
5384 IREndness end,
5385 IRAtom* addr, UInt bias,
5386 IRAtom* data, IRAtom* vdata,
5387 IRAtom* guard )
5389 IROp mkAdd;
5390 IRType ty, tyAddr;
5391 void* helper = NULL;
5392 const HChar* hname = NULL;
5393 IRConst* c;
5395 tyAddr = mce->hWordTy;
5396 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
5397 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
5398 tl_assert( end == Iend_LE || end == Iend_BE );
5400 if (data) {
5401 tl_assert(!vdata);
5402 tl_assert(isOriginalAtom(mce, data));
5403 tl_assert(bias == 0);
5404 vdata = expr2vbits( mce, data, HuOth );
5405 } else {
5406 tl_assert(vdata);
5409 tl_assert(isOriginalAtom(mce,addr));
5410 tl_assert(isShadowAtom(mce,vdata));
5412 if (guard) {
5413 tl_assert(isOriginalAtom(mce, guard));
5414 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5417 ty = typeOfIRExpr(mce->sb->tyenv, vdata);
5419 // If we're not doing undefined value checking, pretend that this value
5420 // is "all valid". That lets Vex's optimiser remove some of the V bit
5421 // shadow computation ops that precede it.
5422 if (MC_(clo_mc_level) == 1) {
5423 switch (ty) {
5424 case Ity_V256: // V256 weirdness -- used four times
5425 c = IRConst_V256(V_BITS32_DEFINED); break;
5426 case Ity_V128: // V128 weirdness -- used twice
5427 c = IRConst_V128(V_BITS16_DEFINED); break;
5428 case Ity_I64: c = IRConst_U64 (V_BITS64_DEFINED); break;
5429 case Ity_I32: c = IRConst_U32 (V_BITS32_DEFINED); break;
5430 case Ity_I16: c = IRConst_U16 (V_BITS16_DEFINED); break;
5431 case Ity_I8: c = IRConst_U8 (V_BITS8_DEFINED); break;
5432 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5434 vdata = IRExpr_Const( c );
5437 /* First, emit a definedness test for the address. This also sets
5438 the address (shadow) to 'defined' following the test. Both of
5439 those actions are gated on |guard|. */
5440 complainIfUndefined( mce, addr, guard );
5442 /* Now decide which helper function to call to write the data V
5443 bits into shadow memory. */
5444 if (end == Iend_LE) {
5445 switch (ty) {
5446 case Ity_V256: /* we'll use the helper four times */
5447 case Ity_V128: /* we'll use the helper twice */
5448 case Ity_I64: helper = &MC_(helperc_STOREV64le);
5449 hname = "MC_(helperc_STOREV64le)";
5450 break;
5451 case Ity_I32: helper = &MC_(helperc_STOREV32le);
5452 hname = "MC_(helperc_STOREV32le)";
5453 break;
5454 case Ity_I16: helper = &MC_(helperc_STOREV16le);
5455 hname = "MC_(helperc_STOREV16le)";
5456 break;
5457 case Ity_I8: helper = &MC_(helperc_STOREV8);
5458 hname = "MC_(helperc_STOREV8)";
5459 break;
5460 default: VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
5462 } else {
5463 switch (ty) {
5464 case Ity_V128: /* we'll use the helper twice */
5465 case Ity_I64: helper = &MC_(helperc_STOREV64be);
5466 hname = "MC_(helperc_STOREV64be)";
5467 break;
5468 case Ity_I32: helper = &MC_(helperc_STOREV32be);
5469 hname = "MC_(helperc_STOREV32be)";
5470 break;
5471 case Ity_I16: helper = &MC_(helperc_STOREV16be);
5472 hname = "MC_(helperc_STOREV16be)";
5473 break;
5474 case Ity_I8: helper = &MC_(helperc_STOREV8);
5475 hname = "MC_(helperc_STOREV8)";
5476 break;
5477 /* Note, no V256 case here, because no big-endian target that
5478 we support, has 256 vectors. */
5479 default: VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
5483 if (UNLIKELY(ty == Ity_V256)) {
5485 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
5486 Q3 being the most significant lane. */
5487 /* These are the offsets of the Qs in memory. */
5488 Int offQ0, offQ1, offQ2, offQ3;
5490 /* Various bits for constructing the 4 lane helper calls */
5491 IRDirty *diQ0, *diQ1, *diQ2, *diQ3;
5492 IRAtom *addrQ0, *addrQ1, *addrQ2, *addrQ3;
5493 IRAtom *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
5494 IRAtom *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
5496 if (end == Iend_LE) {
5497 offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
5498 } else {
5499 offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
5502 eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
5503 addrQ0 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
5504 vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
5505 diQ0 = unsafeIRDirty_0_N(
5506 1/*regparms*/,
5507 hname, VG_(fnptr_to_fnentry)( helper ),
5508 mkIRExprVec_2( addrQ0, vdataQ0 )
5511 eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
5512 addrQ1 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
5513 vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
5514 diQ1 = unsafeIRDirty_0_N(
5515 1/*regparms*/,
5516 hname, VG_(fnptr_to_fnentry)( helper ),
5517 mkIRExprVec_2( addrQ1, vdataQ1 )
5520 eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
5521 addrQ2 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
5522 vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
5523 diQ2 = unsafeIRDirty_0_N(
5524 1/*regparms*/,
5525 hname, VG_(fnptr_to_fnentry)( helper ),
5526 mkIRExprVec_2( addrQ2, vdataQ2 )
5529 eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
5530 addrQ3 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
5531 vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
5532 diQ3 = unsafeIRDirty_0_N(
5533 1/*regparms*/,
5534 hname, VG_(fnptr_to_fnentry)( helper ),
5535 mkIRExprVec_2( addrQ3, vdataQ3 )
5538 if (guard)
5539 diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
5541 setHelperAnns( mce, diQ0 );
5542 setHelperAnns( mce, diQ1 );
5543 setHelperAnns( mce, diQ2 );
5544 setHelperAnns( mce, diQ3 );
5545 stmt( 'V', mce, IRStmt_Dirty(diQ0) );
5546 stmt( 'V', mce, IRStmt_Dirty(diQ1) );
5547 stmt( 'V', mce, IRStmt_Dirty(diQ2) );
5548 stmt( 'V', mce, IRStmt_Dirty(diQ3) );
5551 else if (UNLIKELY(ty == Ity_V128)) {
5553 /* V128-bit case */
5554 /* See comment in next clause re 64-bit regparms */
5555 /* also, need to be careful about endianness */
5557 Int offLo64, offHi64;
5558 IRDirty *diLo64, *diHi64;
5559 IRAtom *addrLo64, *addrHi64;
5560 IRAtom *vdataLo64, *vdataHi64;
5561 IRAtom *eBiasLo64, *eBiasHi64;
5563 if (end == Iend_LE) {
5564 offLo64 = 0;
5565 offHi64 = 8;
5566 } else {
5567 offLo64 = 8;
5568 offHi64 = 0;
5571 eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
5572 addrLo64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
5573 vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
5574 diLo64 = unsafeIRDirty_0_N(
5575 1/*regparms*/,
5576 hname, VG_(fnptr_to_fnentry)( helper ),
5577 mkIRExprVec_2( addrLo64, vdataLo64 )
5579 eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
5580 addrHi64 = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
5581 vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
5582 diHi64 = unsafeIRDirty_0_N(
5583 1/*regparms*/,
5584 hname, VG_(fnptr_to_fnentry)( helper ),
5585 mkIRExprVec_2( addrHi64, vdataHi64 )
5587 if (guard) diLo64->guard = guard;
5588 if (guard) diHi64->guard = guard;
5589 setHelperAnns( mce, diLo64 );
5590 setHelperAnns( mce, diHi64 );
5591 stmt( 'V', mce, IRStmt_Dirty(diLo64) );
5592 stmt( 'V', mce, IRStmt_Dirty(diHi64) );
5594 } else {
5596 IRDirty *di;
5597 IRAtom *addrAct;
5599 /* 8/16/32/64-bit cases */
5600 /* Generate the actual address into addrAct. */
5601 if (bias == 0) {
5602 addrAct = addr;
5603 } else {
5604 IRAtom* eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
5605 addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
5608 if (ty == Ity_I64) {
5609 /* We can't do this with regparm 2 on 32-bit platforms, since
5610 the back ends aren't clever enough to handle 64-bit
5611 regparm args. Therefore be different. */
5612 di = unsafeIRDirty_0_N(
5613 1/*regparms*/,
5614 hname, VG_(fnptr_to_fnentry)( helper ),
5615 mkIRExprVec_2( addrAct, vdata )
5617 } else {
5618 di = unsafeIRDirty_0_N(
5619 2/*regparms*/,
5620 hname, VG_(fnptr_to_fnentry)( helper ),
5621 mkIRExprVec_2( addrAct,
5622 zwidenToHostWord( mce, vdata ))
5625 if (guard) di->guard = guard;
5626 setHelperAnns( mce, di );
5627 stmt( 'V', mce, IRStmt_Dirty(di) );
5633 /* Do lazy pessimistic propagation through a dirty helper call, by
5634 looking at the annotations on it. This is the most complex part of
5635 Memcheck. */
5637 static IRType szToITy ( Int n )
5639 switch (n) {
5640 case 1: return Ity_I8;
5641 case 2: return Ity_I16;
5642 case 4: return Ity_I32;
5643 case 8: return Ity_I64;
5644 default: VG_(tool_panic)("szToITy(memcheck)");
5648 static
5649 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
5651 Int i, k, n, toDo, gSz, gOff;
5652 IRAtom *src, *here, *curr;
5653 IRType tySrc, tyDst;
5654 IRTemp dst;
5655 IREndness end;
5657 /* What's the native endianness? We need to know this. */
5658 # if defined(VG_BIGENDIAN)
5659 end = Iend_BE;
5660 # elif defined(VG_LITTLEENDIAN)
5661 end = Iend_LE;
5662 # else
5663 # error "Unknown endianness"
5664 # endif
5666 /* First check the guard. */
5667 complainIfUndefined(mce, d->guard, NULL);
5669 /* Now round up all inputs and PCast over them. */
5670 curr = definedOfType(Ity_I32);
5672 /* Inputs: unmasked args
5673 Note: arguments are evaluated REGARDLESS of the guard expression */
5674 for (i = 0; d->args[i]; i++) {
5675 IRAtom* arg = d->args[i];
5676 if ( (d->cee->mcx_mask & (1<<i))
5677 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)) ) {
5678 /* ignore this arg */
5679 } else {
5680 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, arg, HuOth) );
5681 curr = mkUifU32(mce, here, curr);
5685 /* Inputs: guest state that we read. */
5686 for (i = 0; i < d->nFxState; i++) {
5687 tl_assert(d->fxState[i].fx != Ifx_None);
5688 if (d->fxState[i].fx == Ifx_Write)
5689 continue;
5691 /* Enumerate the described state segments */
5692 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5693 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5694 gSz = d->fxState[i].size;
5696 /* Ignore any sections marked as 'always defined'. */
5697 if (isAlwaysDefd(mce, gOff, gSz)) {
5698 if (0)
5699 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5700 gOff, gSz);
5701 continue;
5704 /* This state element is read or modified. So we need to
5705 consider it. If larger than 8 bytes, deal with it in
5706 8-byte chunks. */
5707 while (True) {
5708 tl_assert(gSz >= 0);
5709 if (gSz == 0) break;
5710 n = gSz <= 8 ? gSz : 8;
5711 /* update 'curr' with UifU of the state slice
5712 gOff .. gOff+n-1 */
5713 tySrc = szToITy( n );
5715 /* Observe the guard expression. If it is false use an
5716 all-bits-defined bit pattern */
5717 IRAtom *cond, *iffalse, *iftrue;
5719 cond = assignNew('V', mce, Ity_I1, d->guard);
5720 iftrue = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
5721 iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
5722 src = assignNew('V', mce, tySrc,
5723 IRExpr_ITE(cond, iftrue, iffalse));
5725 here = mkPCastTo( mce, Ity_I32, src );
5726 curr = mkUifU32(mce, here, curr);
5727 gSz -= n;
5728 gOff += n;
5733 /* Inputs: memory. First set up some info needed regardless of
5734 whether we're doing reads or writes. */
5736 if (d->mFx != Ifx_None) {
5737 /* Because we may do multiple shadow loads/stores from the same
5738 base address, it's best to do a single test of its
5739 definedness right now. Post-instrumentation optimisation
5740 should remove all but this test. */
5741 IRType tyAddr;
5742 tl_assert(d->mAddr);
5743 complainIfUndefined(mce, d->mAddr, d->guard);
5745 tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
5746 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
5747 tl_assert(tyAddr == mce->hWordTy); /* not really right */
5750 /* Deal with memory inputs (reads or modifies) */
5751 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
5752 toDo = d->mSize;
5753 /* chew off 32-bit chunks. We don't care about the endianness
5754 since it's all going to be condensed down to a single bit,
5755 but nevertheless choose an endianness which is hopefully
5756 native to the platform. */
5757 while (toDo >= 4) {
5758 here = mkPCastTo(
5759 mce, Ity_I32,
5760 expr2vbits_Load_guarded_Simple(
5761 mce, end, Ity_I32, d->mAddr, d->mSize - toDo, d->guard )
5763 curr = mkUifU32(mce, here, curr);
5764 toDo -= 4;
5766 /* chew off 16-bit chunks */
5767 while (toDo >= 2) {
5768 here = mkPCastTo(
5769 mce, Ity_I32,
5770 expr2vbits_Load_guarded_Simple(
5771 mce, end, Ity_I16, d->mAddr, d->mSize - toDo, d->guard )
5773 curr = mkUifU32(mce, here, curr);
5774 toDo -= 2;
5776 /* chew off the remaining 8-bit chunk, if any */
5777 if (toDo == 1) {
5778 here = mkPCastTo(
5779 mce, Ity_I32,
5780 expr2vbits_Load_guarded_Simple(
5781 mce, end, Ity_I8, d->mAddr, d->mSize - toDo, d->guard )
5783 curr = mkUifU32(mce, here, curr);
5784 toDo -= 1;
5786 tl_assert(toDo == 0);
5789 /* Whew! So curr is a 32-bit V-value summarising pessimistically
5790 all the inputs to the helper. Now we need to re-distribute the
5791 results to all destinations. */
5793 /* Outputs: the destination temporary, if there is one. */
5794 if (d->tmp != IRTemp_INVALID) {
5795 dst = findShadowTmpV(mce, d->tmp);
5796 tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
5797 assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
5800 /* Outputs: guest state that we write or modify. */
5801 for (i = 0; i < d->nFxState; i++) {
5802 tl_assert(d->fxState[i].fx != Ifx_None);
5803 if (d->fxState[i].fx == Ifx_Read)
5804 continue;
5806 /* Enumerate the described state segments */
5807 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
5808 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
5809 gSz = d->fxState[i].size;
5811 /* Ignore any sections marked as 'always defined'. */
5812 if (isAlwaysDefd(mce, gOff, gSz))
5813 continue;
5815 /* This state element is written or modified. So we need to
5816 consider it. If larger than 8 bytes, deal with it in
5817 8-byte chunks. */
5818 while (True) {
5819 tl_assert(gSz >= 0);
5820 if (gSz == 0) break;
5821 n = gSz <= 8 ? gSz : 8;
5822 /* Write suitably-casted 'curr' to the state slice
5823 gOff .. gOff+n-1 */
5824 tyDst = szToITy( n );
5825 do_shadow_PUT( mce, gOff,
5826 NULL, /* original atom */
5827 mkPCastTo( mce, tyDst, curr ), d->guard );
5828 gSz -= n;
5829 gOff += n;
5834 /* Outputs: memory that we write or modify. Same comments about
5835 endianness as above apply. */
5836 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
5837 toDo = d->mSize;
5838 /* chew off 32-bit chunks */
5839 while (toDo >= 4) {
5840 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5841 NULL, /* original data */
5842 mkPCastTo( mce, Ity_I32, curr ),
5843 d->guard );
5844 toDo -= 4;
5846 /* chew off 16-bit chunks */
5847 while (toDo >= 2) {
5848 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5849 NULL, /* original data */
5850 mkPCastTo( mce, Ity_I16, curr ),
5851 d->guard );
5852 toDo -= 2;
5854 /* chew off the remaining 8-bit chunk, if any */
5855 if (toDo == 1) {
5856 do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
5857 NULL, /* original data */
5858 mkPCastTo( mce, Ity_I8, curr ),
5859 d->guard );
5860 toDo -= 1;
5862 tl_assert(toDo == 0);
5868 /* We have an ABI hint telling us that [base .. base+len-1] is to
5869 become undefined ("writable"). Generate code to call a helper to
5870 notify the A/V bit machinery of this fact.
5872 We call
5873 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5874 Addr nia );
5876 static
5877 void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
5879 IRDirty* di;
5881 if (MC_(clo_mc_level) == 3) {
5882 di = unsafeIRDirty_0_N(
5883 3/*regparms*/,
5884 "MC_(helperc_MAKE_STACK_UNINIT_w_o)",
5885 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_w_o) ),
5886 mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
5888 } else {
5889 /* We ignore the supplied nia, since it is irrelevant. */
5890 tl_assert(MC_(clo_mc_level) == 2 || MC_(clo_mc_level) == 1);
5891 /* Special-case the len==128 case, since that is for amd64-ELF,
5892 which is a very common target. */
5893 if (len == 128) {
5894 di = unsafeIRDirty_0_N(
5895 1/*regparms*/,
5896 "MC_(helperc_MAKE_STACK_UNINIT_128_no_o)",
5897 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_128_no_o)),
5898 mkIRExprVec_1( base )
5900 } else {
5901 di = unsafeIRDirty_0_N(
5902 2/*regparms*/,
5903 "MC_(helperc_MAKE_STACK_UNINIT_no_o)",
5904 VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT_no_o) ),
5905 mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) )
5910 stmt( 'V', mce, IRStmt_Dirty(di) );
5914 /* ------ Dealing with IRCAS (big and complex) ------ */
5916 /* FWDS */
5917 static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5918 IRAtom* baseaddr, Int offset );
5919 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
5920 static void gen_store_b ( MCEnv* mce, Int szB,
5921 IRAtom* baseaddr, Int offset, IRAtom* dataB,
5922 IRAtom* guard );
5924 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
5925 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
5928 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5929 IRExpr.Consts, else this asserts. If they are both Consts, it
5930 doesn't do anything. So that just leaves the RdTmp case.
5932 In which case: this assigns the shadow value SHADOW to the IR
5933 shadow temporary associated with ORIG. That is, ORIG, being an
5934 original temporary, will have a shadow temporary associated with
5935 it. However, in the case envisaged here, there will so far have
5936 been no IR emitted to actually write a shadow value into that
5937 temporary. What this routine does is to (emit IR to) copy the
5938 value in SHADOW into said temporary, so that after this call,
5939 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5940 value in SHADOW.
5942 Point is to allow callers to compute "by hand" a shadow value for
5943 ORIG, and force it to be associated with ORIG.
5945 How do we know that that shadow associated with ORIG has not so far
5946 been assigned to? Well, we don't per se know that, but supposing
5947 it had. Then this routine would create a second assignment to it,
5948 and later the IR sanity checker would barf. But that never
5949 happens. QED.
5951 static void bind_shadow_tmp_to_orig ( UChar how,
5952 MCEnv* mce,
5953 IRAtom* orig, IRAtom* shadow )
5955 tl_assert(isOriginalAtom(mce, orig));
5956 tl_assert(isShadowAtom(mce, shadow));
5957 switch (orig->tag) {
5958 case Iex_Const:
5959 tl_assert(shadow->tag == Iex_Const);
5960 break;
5961 case Iex_RdTmp:
5962 tl_assert(shadow->tag == Iex_RdTmp);
5963 if (how == 'V') {
5964 assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
5965 shadow);
5966 } else {
5967 tl_assert(how == 'B');
5968 assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
5969 shadow);
5971 break;
5972 default:
5973 tl_assert(0);
5978 static
5979 void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
5981 /* Scheme is (both single- and double- cases):
5983 1. fetch data#,dataB (the proposed new value)
5985 2. fetch expd#,expdB (what we expect to see at the address)
5987 3. check definedness of address
5989 4. load old#,oldB from shadow memory; this also checks
5990 addressibility of the address
5992 5. the CAS itself
5994 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
5996 7. if "expected == old" (as computed by (6))
5997 store data#,dataB to shadow memory
5999 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
6000 'data' but 7 stores 'data#'. Hence it is possible for the
6001 shadow data to be incorrectly checked and/or updated:
6003 * 7 is at least gated correctly, since the 'expected == old'
6004 condition is derived from outputs of 5. However, the shadow
6005 write could happen too late: imagine after 5 we are
6006 descheduled, a different thread runs, writes a different
6007 (shadow) value at the address, and then we resume, hence
6008 overwriting the shadow value written by the other thread.
6010 Because the original memory access is atomic, there's no way to
6011 make both the original and shadow accesses into a single atomic
6012 thing, hence this is unavoidable.
6014 At least as Valgrind stands, I don't think it's a problem, since
6015 we're single threaded *and* we guarantee that there are no
6016 context switches during the execution of any specific superblock
6017 -- context switches can only happen at superblock boundaries.
6019 If Valgrind ever becomes MT in the future, then it might be more
6020 of a problem. A possible kludge would be to artificially
6021 associate with the location, a lock, which we must acquire and
6022 release around the transaction as a whole. Hmm, that probably
6023 would't work properly since it only guards us against other
6024 threads doing CASs on the same location, not against other
6025 threads doing normal reads and writes.
6027 ------------------------------------------------------------
6029 COMMENT_ON_CasCmpEQ:
6031 Note two things. Firstly, in the sequence above, we compute
6032 "expected == old", but we don't check definedness of it. Why
6033 not? Also, the x86 and amd64 front ends use
6034 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
6035 determination (expected == old ?) for themselves, and we also
6036 don't check definedness for those primops; we just say that the
6037 result is defined. Why? Details follow.
6039 x86/amd64 contains various forms of locked insns:
6040 * lock prefix before all basic arithmetic insn;
6041 eg lock xorl %reg1,(%reg2)
6042 * atomic exchange reg-mem
6043 * compare-and-swaps
6045 Rather than attempt to represent them all, which would be a
6046 royal PITA, I used a result from Maurice Herlihy
6047 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
6048 demonstrates that compare-and-swap is a primitive more general
6049 than the other two, and so can be used to represent all of them.
6050 So the translation scheme for (eg) lock incl (%reg) is as
6051 follows:
6053 again:
6054 old = * %reg
6055 new = old + 1
6056 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
6058 The "atomically" is the CAS bit. The scheme is always the same:
6059 get old value from memory, compute new value, atomically stuff
6060 new value back in memory iff the old value has not changed (iow,
6061 no other thread modified it in the meantime). If it has changed
6062 then we've been out-raced and we have to start over.
6064 Now that's all very neat, but it has the bad side effect of
6065 introducing an explicit equality test into the translation.
6066 Consider the behaviour of said code on a memory location which
6067 is uninitialised. We will wind up doing a comparison on
6068 uninitialised data, and mc duly complains.
6070 What's difficult about this is, the common case is that the
6071 location is uncontended, and so we're usually comparing the same
6072 value (* %reg) with itself. So we shouldn't complain even if it
6073 is undefined. But mc doesn't know that.
6075 My solution is to mark the == in the IR specially, so as to tell
6076 mc that it almost certainly compares a value with itself, and we
6077 should just regard the result as always defined. Rather than
6078 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
6079 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
6081 So there's always the question of, can this give a false
6082 negative? eg, imagine that initially, * %reg is defined; and we
6083 read that; but then in the gap between the read and the CAS, a
6084 different thread writes an undefined (and different) value at
6085 the location. Then the CAS in this thread will fail and we will
6086 go back to "again:", but without knowing that the trip back
6087 there was based on an undefined comparison. No matter; at least
6088 the other thread won the race and the location is correctly
6089 marked as undefined. What if it wrote an uninitialised version
6090 of the same value that was there originally, though?
6092 etc etc. Seems like there's a small corner case in which we
6093 might lose the fact that something's defined -- we're out-raced
6094 in between the "old = * reg" and the "atomically {", _and_ the
6095 other thread is writing in an undefined version of what's
6096 already there. Well, that seems pretty unlikely.
6100 If we ever need to reinstate it .. code which generates a
6101 definedness test for "expected == old" was removed at r10432 of
6102 this file.
6104 if (cas->oldHi == IRTemp_INVALID) {
6105 do_shadow_CAS_single( mce, cas );
6106 } else {
6107 do_shadow_CAS_double( mce, cas );
6112 static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
6114 IRAtom *vdataLo = NULL, *bdataLo = NULL;
6115 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
6116 IRAtom *voldLo = NULL, *boldLo = NULL;
6117 IRAtom *expd_eq_old = NULL;
6118 IROp opCasCmpEQ;
6119 Int elemSzB;
6120 IRType elemTy;
6121 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
6123 /* single CAS */
6124 tl_assert(cas->oldHi == IRTemp_INVALID);
6125 tl_assert(cas->expdHi == NULL);
6126 tl_assert(cas->dataHi == NULL);
6128 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
6129 switch (elemTy) {
6130 case Ity_I8: elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8; break;
6131 case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
6132 case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
6133 case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
6134 default: tl_assert(0); /* IR defn disallows any other types */
6137 /* 1. fetch data# (the proposed new value) */
6138 tl_assert(isOriginalAtom(mce, cas->dataLo));
6139 vdataLo
6140 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo, HuOth));
6141 tl_assert(isShadowAtom(mce, vdataLo));
6142 if (otrak) {
6143 bdataLo
6144 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
6145 tl_assert(isShadowAtom(mce, bdataLo));
6148 /* 2. fetch expected# (what we expect to see at the address) */
6149 tl_assert(isOriginalAtom(mce, cas->expdLo));
6150 vexpdLo
6151 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo, HuOth));
6152 tl_assert(isShadowAtom(mce, vexpdLo));
6153 if (otrak) {
6154 bexpdLo
6155 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
6156 tl_assert(isShadowAtom(mce, bexpdLo));
6159 /* 3. check definedness of address */
6160 /* 4. fetch old# from shadow memory; this also checks
6161 addressibility of the address */
6162 voldLo
6163 = assignNew(
6164 'V', mce, elemTy,
6165 expr2vbits_Load(
6166 mce,
6167 cas->end, elemTy, cas->addr, 0/*Addr bias*/,
6168 NULL/*always happens*/
6170 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
6171 if (otrak) {
6172 boldLo
6173 = assignNew('B', mce, Ity_I32,
6174 gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
6175 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
6178 /* 5. the CAS itself */
6179 stmt( 'C', mce, IRStmt_CAS(cas) );
6181 /* 6. compute "expected == old" */
6182 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6183 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6184 tree, but it's not copied from the input block. */
6185 expd_eq_old
6186 = assignNew('C', mce, Ity_I1,
6187 binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
6189 /* 7. if "expected == old"
6190 store data# to shadow memory */
6191 do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
6192 NULL/*data*/, vdataLo/*vdata*/,
6193 expd_eq_old/*guard for store*/ );
6194 if (otrak) {
6195 gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
6196 bdataLo/*bdata*/,
6197 expd_eq_old/*guard for store*/ );
6202 static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
6204 IRAtom *vdataHi = NULL, *bdataHi = NULL;
6205 IRAtom *vdataLo = NULL, *bdataLo = NULL;
6206 IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
6207 IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
6208 IRAtom *voldHi = NULL, *boldHi = NULL;
6209 IRAtom *voldLo = NULL, *boldLo = NULL;
6210 IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
6211 IRAtom *expd_eq_old = NULL, *zero = NULL;
6212 IROp opCasCmpEQ, opOr, opXor;
6213 Int elemSzB, memOffsLo, memOffsHi;
6214 IRType elemTy;
6215 Bool otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
6217 /* double CAS */
6218 tl_assert(cas->oldHi != IRTemp_INVALID);
6219 tl_assert(cas->expdHi != NULL);
6220 tl_assert(cas->dataHi != NULL);
6222 elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
6223 switch (elemTy) {
6224 case Ity_I8:
6225 opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
6226 elemSzB = 1; zero = mkU8(0);
6227 break;
6228 case Ity_I16:
6229 opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
6230 elemSzB = 2; zero = mkU16(0);
6231 break;
6232 case Ity_I32:
6233 opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
6234 elemSzB = 4; zero = mkU32(0);
6235 break;
6236 case Ity_I64:
6237 opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
6238 elemSzB = 8; zero = mkU64(0);
6239 break;
6240 default:
6241 tl_assert(0); /* IR defn disallows any other types */
6244 /* 1. fetch data# (the proposed new value) */
6245 tl_assert(isOriginalAtom(mce, cas->dataHi));
6246 tl_assert(isOriginalAtom(mce, cas->dataLo));
6247 vdataHi
6248 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi, HuOth));
6249 vdataLo
6250 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo, HuOth));
6251 tl_assert(isShadowAtom(mce, vdataHi));
6252 tl_assert(isShadowAtom(mce, vdataLo));
6253 if (otrak) {
6254 bdataHi
6255 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
6256 bdataLo
6257 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
6258 tl_assert(isShadowAtom(mce, bdataHi));
6259 tl_assert(isShadowAtom(mce, bdataLo));
6262 /* 2. fetch expected# (what we expect to see at the address) */
6263 tl_assert(isOriginalAtom(mce, cas->expdHi));
6264 tl_assert(isOriginalAtom(mce, cas->expdLo));
6265 vexpdHi
6266 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi, HuOth));
6267 vexpdLo
6268 = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo, HuOth));
6269 tl_assert(isShadowAtom(mce, vexpdHi));
6270 tl_assert(isShadowAtom(mce, vexpdLo));
6271 if (otrak) {
6272 bexpdHi
6273 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
6274 bexpdLo
6275 = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
6276 tl_assert(isShadowAtom(mce, bexpdHi));
6277 tl_assert(isShadowAtom(mce, bexpdLo));
6280 /* 3. check definedness of address */
6281 /* 4. fetch old# from shadow memory; this also checks
6282 addressibility of the address */
6283 if (cas->end == Iend_LE) {
6284 memOffsLo = 0;
6285 memOffsHi = elemSzB;
6286 } else {
6287 tl_assert(cas->end == Iend_BE);
6288 memOffsLo = elemSzB;
6289 memOffsHi = 0;
6291 voldHi
6292 = assignNew(
6293 'V', mce, elemTy,
6294 expr2vbits_Load(
6295 mce,
6296 cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/,
6297 NULL/*always happens*/
6299 voldLo
6300 = assignNew(
6301 'V', mce, elemTy,
6302 expr2vbits_Load(
6303 mce,
6304 cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/,
6305 NULL/*always happens*/
6307 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
6308 bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
6309 if (otrak) {
6310 boldHi
6311 = assignNew('B', mce, Ity_I32,
6312 gen_load_b(mce, elemSzB, cas->addr,
6313 memOffsHi/*addr bias*/));
6314 boldLo
6315 = assignNew('B', mce, Ity_I32,
6316 gen_load_b(mce, elemSzB, cas->addr,
6317 memOffsLo/*addr bias*/));
6318 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
6319 bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
6322 /* 5. the CAS itself */
6323 stmt( 'C', mce, IRStmt_CAS(cas) );
6325 /* 6. compute "expected == old" */
6326 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6327 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6328 tree, but it's not copied from the input block. */
6330 xHi = oldHi ^ expdHi;
6331 xLo = oldLo ^ expdLo;
6332 xHL = xHi | xLo;
6333 expd_eq_old = xHL == 0;
6335 xHi = assignNew('C', mce, elemTy,
6336 binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
6337 xLo = assignNew('C', mce, elemTy,
6338 binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
6339 xHL = assignNew('C', mce, elemTy,
6340 binop(opOr, xHi, xLo));
6341 expd_eq_old
6342 = assignNew('C', mce, Ity_I1,
6343 binop(opCasCmpEQ, xHL, zero));
6345 /* 7. if "expected == old"
6346 store data# to shadow memory */
6347 do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
6348 NULL/*data*/, vdataHi/*vdata*/,
6349 expd_eq_old/*guard for store*/ );
6350 do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
6351 NULL/*data*/, vdataLo/*vdata*/,
6352 expd_eq_old/*guard for store*/ );
6353 if (otrak) {
6354 gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
6355 bdataHi/*bdata*/,
6356 expd_eq_old/*guard for store*/ );
6357 gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
6358 bdataLo/*bdata*/,
6359 expd_eq_old/*guard for store*/ );
6364 /* ------ Dealing with LL/SC (not difficult) ------ */
6366 static void do_shadow_LLSC ( MCEnv* mce,
6367 IREndness stEnd,
6368 IRTemp stResult,
6369 IRExpr* stAddr,
6370 IRExpr* stStoredata )
6372 /* In short: treat a load-linked like a normal load followed by an
6373 assignment of the loaded (shadow) data to the result temporary.
6374 Treat a store-conditional like a normal store, and mark the
6375 result temporary as defined. */
6376 IRType resTy = typeOfIRTemp(mce->sb->tyenv, stResult);
6377 IRTemp resTmp = findShadowTmpV(mce, stResult);
6379 tl_assert(isIRAtom(stAddr));
6380 if (stStoredata)
6381 tl_assert(isIRAtom(stStoredata));
6383 if (stStoredata == NULL) {
6384 /* Load Linked */
6385 /* Just treat this as a normal load, followed by an assignment of
6386 the value to .result. */
6387 /* Stay sane */
6388 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6389 || resTy == Ity_I16 || resTy == Ity_I8);
6390 assign( 'V', mce, resTmp,
6391 expr2vbits_Load(
6392 mce, stEnd, resTy, stAddr, 0/*addr bias*/,
6393 NULL/*always happens*/) );
6394 } else {
6395 /* Store Conditional */
6396 /* Stay sane */
6397 IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
6398 stStoredata);
6399 tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
6400 || dataTy == Ity_I16 || dataTy == Ity_I8);
6401 do_shadow_Store( mce, stEnd,
6402 stAddr, 0/* addr bias */,
6403 stStoredata,
6404 NULL /* shadow data */,
6405 NULL/*guard*/ );
6406 /* This is a store conditional, so it writes to .result a value
6407 indicating whether or not the store succeeded. Just claim
6408 this value is always defined. In the PowerPC interpretation
6409 of store-conditional, definedness of the success indication
6410 depends on whether the address of the store matches the
6411 reservation address. But we can't tell that here (and
6412 anyway, we're not being PowerPC-specific). At least we are
6413 guaranteed that the definedness of the store address, and its
6414 addressibility, will be checked as per normal. So it seems
6415 pretty safe to just say that the success indication is always
6416 defined.
6418 In schemeS, for origin tracking, we must correspondingly set
6419 a no-origin value for the origin shadow of .result.
6421 tl_assert(resTy == Ity_I1);
6422 assign( 'V', mce, resTmp, definedOfType(resTy) );
6427 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6429 static void do_shadow_StoreG ( MCEnv* mce, IRStoreG* sg )
6431 complainIfUndefined(mce, sg->guard, NULL);
6432 /* do_shadow_Store will generate code to check the definedness and
6433 validity of sg->addr, in the case where sg->guard evaluates to
6434 True at run-time. */
6435 do_shadow_Store( mce, sg->end,
6436 sg->addr, 0/* addr bias */,
6437 sg->data,
6438 NULL /* shadow data */,
6439 sg->guard );
6442 static void do_shadow_LoadG ( MCEnv* mce, IRLoadG* lg )
6444 complainIfUndefined(mce, lg->guard, NULL);
6445 /* expr2vbits_Load_guarded_General will generate code to check the
6446 definedness and validity of lg->addr, in the case where
6447 lg->guard evaluates to True at run-time. */
6449 /* Look at the LoadG's built-in conversion operation, to determine
6450 the source (actual loaded data) type, and the equivalent IROp.
6451 NOTE that implicitly we are taking a widening operation to be
6452 applied to original atoms and producing one that applies to V
6453 bits. Since signed and unsigned widening are self-shadowing,
6454 this is a straight copy of the op (modulo swapping from the
6455 IRLoadGOp form to the IROp form). Note also therefore that this
6456 implicitly duplicates the logic to do with said widening ops in
6457 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
6458 IROp vwiden = Iop_INVALID;
6459 IRType loadedTy = Ity_INVALID;
6460 switch (lg->cvt) {
6461 case ILGop_IdentV128: loadedTy = Ity_V128; vwiden = Iop_INVALID; break;
6462 case ILGop_Ident64: loadedTy = Ity_I64; vwiden = Iop_INVALID; break;
6463 case ILGop_Ident32: loadedTy = Ity_I32; vwiden = Iop_INVALID; break;
6464 case ILGop_16Uto32: loadedTy = Ity_I16; vwiden = Iop_16Uto32; break;
6465 case ILGop_16Sto32: loadedTy = Ity_I16; vwiden = Iop_16Sto32; break;
6466 case ILGop_8Uto32: loadedTy = Ity_I8; vwiden = Iop_8Uto32; break;
6467 case ILGop_8Sto32: loadedTy = Ity_I8; vwiden = Iop_8Sto32; break;
6468 default: VG_(tool_panic)("do_shadow_LoadG");
6471 IRAtom* vbits_alt
6472 = expr2vbits( mce, lg->alt, HuOth );
6473 IRAtom* vbits_final
6474 = expr2vbits_Load_guarded_General(mce, lg->end, loadedTy,
6475 lg->addr, 0/*addr bias*/,
6476 lg->guard, vwiden, vbits_alt );
6477 /* And finally, bind the V bits to the destination temporary. */
6478 assign( 'V', mce, findShadowTmpV(mce, lg->dst), vbits_final );
6482 /*------------------------------------------------------------*/
6483 /*--- Origin tracking stuff ---*/
6484 /*------------------------------------------------------------*/
6486 /* Almost identical to findShadowTmpV. */
6487 static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
6489 TempMapEnt* ent;
6490 /* VG_(indexXA) range-checks 'orig', hence no need to check
6491 here. */
6492 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6493 tl_assert(ent->kind == Orig);
6494 if (ent->shadowB == IRTemp_INVALID) {
6495 IRTemp tmpB
6496 = newTemp( mce, Ity_I32, BSh );
6497 /* newTemp may cause mce->tmpMap to resize, hence previous results
6498 from VG_(indexXA) are invalid. */
6499 ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
6500 tl_assert(ent->kind == Orig);
6501 tl_assert(ent->shadowB == IRTemp_INVALID);
6502 ent->shadowB = tmpB;
6504 return ent->shadowB;
6507 static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
6509 return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
6513 /* Make a guarded origin load, with no special handling in the
6514 didn't-happen case. A GUARD of NULL is assumed to mean "always
6515 True".
6517 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6518 return the otag. The loaded size is SZB. If GUARD evaluates to
6519 False at run time then the returned otag is zero.
6521 static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB,
6522 IRAtom* baseaddr,
6523 Int offset, IRExpr* guard )
6525 void* hFun;
6526 const HChar* hName;
6527 IRTemp bTmp;
6528 IRDirty* di;
6529 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
6530 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6531 IRAtom* ea = baseaddr;
6532 if (offset != 0) {
6533 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6534 : mkU64( (Long)(Int)offset );
6535 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6537 bTmp = newTemp(mce, mce->hWordTy, BSh);
6539 switch (szB) {
6540 case 1: hFun = (void*)&MC_(helperc_b_load1);
6541 hName = "MC_(helperc_b_load1)";
6542 break;
6543 case 2: hFun = (void*)&MC_(helperc_b_load2);
6544 hName = "MC_(helperc_b_load2)";
6545 break;
6546 case 4: hFun = (void*)&MC_(helperc_b_load4);
6547 hName = "MC_(helperc_b_load4)";
6548 break;
6549 case 8: hFun = (void*)&MC_(helperc_b_load8);
6550 hName = "MC_(helperc_b_load8)";
6551 break;
6552 case 16: hFun = (void*)&MC_(helperc_b_load16);
6553 hName = "MC_(helperc_b_load16)";
6554 break;
6555 case 32: hFun = (void*)&MC_(helperc_b_load32);
6556 hName = "MC_(helperc_b_load32)";
6557 break;
6558 default:
6559 VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
6560 tl_assert(0);
6562 di = unsafeIRDirty_1_N(
6563 bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
6564 mkIRExprVec_1( ea )
6566 if (guard) {
6567 di->guard = guard;
6568 /* Ideally the didn't-happen return value here would be
6569 all-zeroes (unknown-origin), so it'd be harmless if it got
6570 used inadvertently. We slum it out with the IR-mandated
6571 default value (0b01 repeating, 0x55 etc) as that'll probably
6572 trump all legitimate otags via Max32, and it's pretty
6573 obviously bogus. */
6575 /* no need to mess with any annotations. This call accesses
6576 neither guest state nor guest memory. */
6577 stmt( 'B', mce, IRStmt_Dirty(di) );
6578 if (mce->hWordTy == Ity_I64) {
6579 /* 64-bit host */
6580 IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
6581 assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
6582 return mkexpr(bTmp32);
6583 } else {
6584 /* 32-bit host */
6585 return mkexpr(bTmp);
6590 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
6591 loaded size is SZB. The load is regarded as unconditional (always
6592 happens).
6594 static IRAtom* gen_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
6595 Int offset )
6597 return gen_guarded_load_b(mce, szB, baseaddr, offset, NULL/*guard*/);
6601 /* The most general handler for guarded origin loads. A GUARD of NULL
6602 is assumed to mean "always True".
6604 Generate IR to do a shadow origin load from ADDR+BIAS and return
6605 the B bits. The loaded type is TY. If GUARD evaluates to False at
6606 run time then the returned B bits are simply BALT instead.
6608 static
6609 IRAtom* expr2ori_Load_guarded_General ( MCEnv* mce,
6610 IRType ty,
6611 IRAtom* addr, UInt bias,
6612 IRAtom* guard, IRAtom* balt )
6614 /* If the guard evaluates to True, this will hold the loaded
6615 origin. If the guard evaluates to False, this will be zero,
6616 meaning "unknown origin", in which case we will have to replace
6617 it using an ITE below. */
6618 IRAtom* iftrue
6619 = assignNew('B', mce, Ity_I32,
6620 gen_guarded_load_b(mce, sizeofIRType(ty),
6621 addr, bias, guard));
6622 /* These are the bits we will return if the load doesn't take
6623 place. */
6624 IRAtom* iffalse
6625 = balt;
6626 /* Prepare the cond for the ITE. Convert a NULL cond into
6627 something that iropt knows how to fold out later. */
6628 IRAtom* cond
6629 = guard == NULL ? mkU1(1) : guard;
6630 /* And assemble the final result. */
6631 return assignNew('B', mce, Ity_I32, IRExpr_ITE(cond, iftrue, iffalse));
6635 /* Generate a shadow origins store. guard :: Ity_I1 controls whether
6636 the store really happens; NULL means it unconditionally does. */
6637 static void gen_store_b ( MCEnv* mce, Int szB,
6638 IRAtom* baseaddr, Int offset, IRAtom* dataB,
6639 IRAtom* guard )
6641 void* hFun;
6642 const HChar* hName;
6643 IRDirty* di;
6644 IRType aTy = typeOfIRExpr( mce->sb->tyenv, baseaddr );
6645 IROp opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
6646 IRAtom* ea = baseaddr;
6647 if (guard) {
6648 tl_assert(isOriginalAtom(mce, guard));
6649 tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
6651 if (offset != 0) {
6652 IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
6653 : mkU64( (Long)(Int)offset );
6654 ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
6656 if (mce->hWordTy == Ity_I64)
6657 dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
6659 switch (szB) {
6660 case 1: hFun = (void*)&MC_(helperc_b_store1);
6661 hName = "MC_(helperc_b_store1)";
6662 break;
6663 case 2: hFun = (void*)&MC_(helperc_b_store2);
6664 hName = "MC_(helperc_b_store2)";
6665 break;
6666 case 4: hFun = (void*)&MC_(helperc_b_store4);
6667 hName = "MC_(helperc_b_store4)";
6668 break;
6669 case 8: hFun = (void*)&MC_(helperc_b_store8);
6670 hName = "MC_(helperc_b_store8)";
6671 break;
6672 case 16: hFun = (void*)&MC_(helperc_b_store16);
6673 hName = "MC_(helperc_b_store16)";
6674 break;
6675 case 32: hFun = (void*)&MC_(helperc_b_store32);
6676 hName = "MC_(helperc_b_store32)";
6677 break;
6678 default:
6679 tl_assert(0);
6681 di = unsafeIRDirty_0_N( 2/*regparms*/,
6682 hName, VG_(fnptr_to_fnentry)( hFun ),
6683 mkIRExprVec_2( ea, dataB )
6685 /* no need to mess with any annotations. This call accesses
6686 neither guest state nor guest memory. */
6687 if (guard) di->guard = guard;
6688 stmt( 'B', mce, IRStmt_Dirty(di) );
6691 static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
6692 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
6693 if (eTy == Ity_I64)
6694 return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
6695 if (eTy == Ity_I32)
6696 return e;
6697 tl_assert(0);
6700 static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
6701 IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
6702 tl_assert(eTy == Ity_I32);
6703 if (dstTy == Ity_I64)
6704 return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
6705 tl_assert(0);
6709 static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
6711 tl_assert(MC_(clo_mc_level) == 3);
6713 switch (e->tag) {
6715 case Iex_GetI: {
6716 IRRegArray* descr_b;
6717 IRAtom *t1, *t2, *t3, *t4;
6718 IRRegArray* descr = e->Iex.GetI.descr;
6719 IRType equivIntTy
6720 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6721 /* If this array is unshadowable for whatever reason, use the
6722 usual approximation. */
6723 if (equivIntTy == Ity_INVALID)
6724 return mkU32(0);
6725 tl_assert(sizeofIRType(equivIntTy) >= 4);
6726 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6727 descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6728 equivIntTy, descr->nElems );
6729 /* Do a shadow indexed get of the same size, giving t1. Take
6730 the bottom 32 bits of it, giving t2. Compute into t3 the
6731 origin for the index (almost certainly zero, but there's
6732 no harm in being completely general here, since iropt will
6733 remove any useless code), and fold it in, giving a final
6734 value t4. */
6735 t1 = assignNew( 'B', mce, equivIntTy,
6736 IRExpr_GetI( descr_b, e->Iex.GetI.ix,
6737 e->Iex.GetI.bias ));
6738 t2 = narrowTo32( mce, t1 );
6739 t3 = schemeE( mce, e->Iex.GetI.ix );
6740 t4 = gen_maxU32( mce, t2, t3 );
6741 return t4;
6743 case Iex_CCall: {
6744 Int i;
6745 IRAtom* here;
6746 IRExpr** args = e->Iex.CCall.args;
6747 IRAtom* curr = mkU32(0);
6748 for (i = 0; args[i]; i++) {
6749 tl_assert(i < 32);
6750 tl_assert(isOriginalAtom(mce, args[i]));
6751 /* Only take notice of this arg if the callee's
6752 mc-exclusion mask does not say it is to be excluded. */
6753 if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
6754 /* the arg is to be excluded from definedness checking.
6755 Do nothing. */
6756 if (0) VG_(printf)("excluding %s(%d)\n",
6757 e->Iex.CCall.cee->name, i);
6758 } else {
6759 /* calculate the arg's definedness, and pessimistically
6760 merge it in. */
6761 here = schemeE( mce, args[i] );
6762 curr = gen_maxU32( mce, curr, here );
6765 return curr;
6767 case Iex_Load: {
6768 Int dszB;
6769 dszB = sizeofIRType(e->Iex.Load.ty);
6770 /* assert that the B value for the address is already
6771 available (somewhere) */
6772 tl_assert(isIRAtom(e->Iex.Load.addr));
6773 tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
6774 return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
6776 case Iex_ITE: {
6777 IRAtom* b1 = schemeE( mce, e->Iex.ITE.cond );
6778 IRAtom* b3 = schemeE( mce, e->Iex.ITE.iftrue );
6779 IRAtom* b2 = schemeE( mce, e->Iex.ITE.iffalse );
6780 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
6782 case Iex_Qop: {
6783 IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
6784 IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
6785 IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
6786 IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
6787 return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
6788 gen_maxU32( mce, b3, b4 ) );
6790 case Iex_Triop: {
6791 IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
6792 IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
6793 IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
6794 return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6796 case Iex_Binop: {
6797 switch (e->Iex.Binop.op) {
6798 case Iop_CasCmpEQ8: case Iop_CasCmpNE8:
6799 case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6800 case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6801 case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6802 /* Just say these all produce a defined result,
6803 regardless of their arguments. See
6804 COMMENT_ON_CasCmpEQ in this file. */
6805 return mkU32(0);
6806 default: {
6807 IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6808 IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6809 return gen_maxU32( mce, b1, b2 );
6812 tl_assert(0);
6813 /*NOTREACHED*/
6815 case Iex_Unop: {
6816 IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6817 return b1;
6819 case Iex_Const:
6820 return mkU32(0);
6821 case Iex_RdTmp:
6822 return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6823 case Iex_Get: {
6824 Int b_offset = MC_(get_otrack_shadow_offset)(
6825 e->Iex.Get.offset,
6826 sizeofIRType(e->Iex.Get.ty)
6828 tl_assert(b_offset >= -1
6829 && b_offset <= mce->layout->total_sizeB -4);
6830 if (b_offset >= 0) {
6831 /* FIXME: this isn't an atom! */
6832 return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6833 Ity_I32 );
6835 return mkU32(0);
6837 default:
6838 VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6839 ppIRExpr(e);
6840 VG_(tool_panic)("memcheck:schemeE");
6845 static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6847 // This is a hacked version of do_shadow_Dirty
6848 Int i, k, n, toDo, gSz, gOff;
6849 IRAtom *here, *curr;
6850 IRTemp dst;
6852 /* First check the guard. */
6853 curr = schemeE( mce, d->guard );
6855 /* Now round up all inputs and maxU32 over them. */
6857 /* Inputs: unmasked args
6858 Note: arguments are evaluated REGARDLESS of the guard expression */
6859 for (i = 0; d->args[i]; i++) {
6860 IRAtom* arg = d->args[i];
6861 if ( (d->cee->mcx_mask & (1<<i))
6862 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg)) ) {
6863 /* ignore this arg */
6864 } else {
6865 here = schemeE( mce, arg );
6866 curr = gen_maxU32( mce, curr, here );
6870 /* Inputs: guest state that we read. */
6871 for (i = 0; i < d->nFxState; i++) {
6872 tl_assert(d->fxState[i].fx != Ifx_None);
6873 if (d->fxState[i].fx == Ifx_Write)
6874 continue;
6876 /* Enumerate the described state segments */
6877 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6878 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6879 gSz = d->fxState[i].size;
6881 /* Ignore any sections marked as 'always defined'. */
6882 if (isAlwaysDefd(mce, gOff, gSz)) {
6883 if (0)
6884 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6885 gOff, gSz);
6886 continue;
6889 /* This state element is read or modified. So we need to
6890 consider it. If larger than 4 bytes, deal with it in
6891 4-byte chunks. */
6892 while (True) {
6893 Int b_offset;
6894 tl_assert(gSz >= 0);
6895 if (gSz == 0) break;
6896 n = gSz <= 4 ? gSz : 4;
6897 /* update 'curr' with maxU32 of the state slice
6898 gOff .. gOff+n-1 */
6899 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6900 if (b_offset != -1) {
6901 /* Observe the guard expression. If it is false use 0, i.e.
6902 nothing is known about the origin */
6903 IRAtom *cond, *iffalse, *iftrue;
6905 cond = assignNew( 'B', mce, Ity_I1, d->guard);
6906 iffalse = mkU32(0);
6907 iftrue = assignNew( 'B', mce, Ity_I32,
6908 IRExpr_Get(b_offset
6909 + 2*mce->layout->total_sizeB,
6910 Ity_I32));
6911 here = assignNew( 'B', mce, Ity_I32,
6912 IRExpr_ITE(cond, iftrue, iffalse));
6913 curr = gen_maxU32( mce, curr, here );
6915 gSz -= n;
6916 gOff += n;
6921 /* Inputs: memory */
6923 if (d->mFx != Ifx_None) {
6924 /* Because we may do multiple shadow loads/stores from the same
6925 base address, it's best to do a single test of its
6926 definedness right now. Post-instrumentation optimisation
6927 should remove all but this test. */
6928 tl_assert(d->mAddr);
6929 here = schemeE( mce, d->mAddr );
6930 curr = gen_maxU32( mce, curr, here );
6933 /* Deal with memory inputs (reads or modifies) */
6934 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
6935 toDo = d->mSize;
6936 /* chew off 32-bit chunks. We don't care about the endianness
6937 since it's all going to be condensed down to a single bit,
6938 but nevertheless choose an endianness which is hopefully
6939 native to the platform. */
6940 while (toDo >= 4) {
6941 here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6942 d->guard );
6943 curr = gen_maxU32( mce, curr, here );
6944 toDo -= 4;
6946 /* handle possible 16-bit excess */
6947 while (toDo >= 2) {
6948 here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6949 d->guard );
6950 curr = gen_maxU32( mce, curr, here );
6951 toDo -= 2;
6953 /* chew off the remaining 8-bit chunk, if any */
6954 if (toDo == 1) {
6955 here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6956 d->guard );
6957 curr = gen_maxU32( mce, curr, here );
6958 toDo -= 1;
6960 tl_assert(toDo == 0);
6963 /* Whew! So curr is a 32-bit B-value which should give an origin
6964 of some use if any of the inputs to the helper are undefined.
6965 Now we need to re-distribute the results to all destinations. */
6967 /* Outputs: the destination temporary, if there is one. */
6968 if (d->tmp != IRTemp_INVALID) {
6969 dst = findShadowTmpB(mce, d->tmp);
6970 assign( 'V', mce, dst, curr );
6973 /* Outputs: guest state that we write or modify. */
6974 for (i = 0; i < d->nFxState; i++) {
6975 tl_assert(d->fxState[i].fx != Ifx_None);
6976 if (d->fxState[i].fx == Ifx_Read)
6977 continue;
6979 /* Enumerate the described state segments */
6980 for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6981 gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6982 gSz = d->fxState[i].size;
6984 /* Ignore any sections marked as 'always defined'. */
6985 if (isAlwaysDefd(mce, gOff, gSz))
6986 continue;
6988 /* This state element is written or modified. So we need to
6989 consider it. If larger than 4 bytes, deal with it in
6990 4-byte chunks. */
6991 while (True) {
6992 Int b_offset;
6993 tl_assert(gSz >= 0);
6994 if (gSz == 0) break;
6995 n = gSz <= 4 ? gSz : 4;
6996 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6997 b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6998 if (b_offset != -1) {
7000 /* If the guard expression evaluates to false we simply Put
7001 the value that is already stored in the guest state slot */
7002 IRAtom *cond, *iffalse;
7004 cond = assignNew('B', mce, Ity_I1,
7005 d->guard);
7006 iffalse = assignNew('B', mce, Ity_I32,
7007 IRExpr_Get(b_offset +
7008 2*mce->layout->total_sizeB,
7009 Ity_I32));
7010 curr = assignNew('V', mce, Ity_I32,
7011 IRExpr_ITE(cond, curr, iffalse));
7013 stmt( 'B', mce, IRStmt_Put(b_offset
7014 + 2*mce->layout->total_sizeB,
7015 curr ));
7017 gSz -= n;
7018 gOff += n;
7023 /* Outputs: memory that we write or modify. Same comments about
7024 endianness as above apply. */
7025 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
7026 toDo = d->mSize;
7027 /* chew off 32-bit chunks */
7028 while (toDo >= 4) {
7029 gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
7030 d->guard );
7031 toDo -= 4;
7033 /* handle possible 16-bit excess */
7034 while (toDo >= 2) {
7035 gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
7036 d->guard );
7037 toDo -= 2;
7039 /* chew off the remaining 8-bit chunk, if any */
7040 if (toDo == 1) {
7041 gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
7042 d->guard );
7043 toDo -= 1;
7045 tl_assert(toDo == 0);
7050 /* Generate IR for origin shadowing for a general guarded store. */
7051 static void do_origins_Store_guarded ( MCEnv* mce,
7052 IREndness stEnd,
7053 IRExpr* stAddr,
7054 IRExpr* stData,
7055 IRExpr* guard )
7057 Int dszB;
7058 IRAtom* dataB;
7059 /* assert that the B value for the address is already available
7060 (somewhere), since the call to schemeE will want to see it.
7061 XXXX how does this actually ensure that?? */
7062 tl_assert(isIRAtom(stAddr));
7063 tl_assert(isIRAtom(stData));
7064 dszB = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
7065 dataB = schemeE( mce, stData );
7066 gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB, guard );
7070 /* Generate IR for origin shadowing for a plain store. */
7071 static void do_origins_Store_plain ( MCEnv* mce,
7072 IREndness stEnd,
7073 IRExpr* stAddr,
7074 IRExpr* stData )
7076 do_origins_Store_guarded ( mce, stEnd, stAddr, stData,
7077 NULL/*guard*/ );
7081 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7083 static void do_origins_StoreG ( MCEnv* mce, IRStoreG* sg )
7085 do_origins_Store_guarded( mce, sg->end, sg->addr,
7086 sg->data, sg->guard );
7089 static void do_origins_LoadG ( MCEnv* mce, IRLoadG* lg )
7091 IRType loadedTy = Ity_INVALID;
7092 switch (lg->cvt) {
7093 case ILGop_IdentV128: loadedTy = Ity_V128; break;
7094 case ILGop_Ident64: loadedTy = Ity_I64; break;
7095 case ILGop_Ident32: loadedTy = Ity_I32; break;
7096 case ILGop_16Uto32: loadedTy = Ity_I16; break;
7097 case ILGop_16Sto32: loadedTy = Ity_I16; break;
7098 case ILGop_8Uto32: loadedTy = Ity_I8; break;
7099 case ILGop_8Sto32: loadedTy = Ity_I8; break;
7100 default: VG_(tool_panic)("schemeS.IRLoadG");
7102 IRAtom* ori_alt
7103 = schemeE( mce,lg->alt );
7104 IRAtom* ori_final
7105 = expr2ori_Load_guarded_General(mce, loadedTy,
7106 lg->addr, 0/*addr bias*/,
7107 lg->guard, ori_alt );
7108 /* And finally, bind the origin to the destination temporary. */
7109 assign( 'B', mce, findShadowTmpB(mce, lg->dst), ori_final );
7113 static void schemeS ( MCEnv* mce, IRStmt* st )
7115 tl_assert(MC_(clo_mc_level) == 3);
7117 switch (st->tag) {
7119 case Ist_AbiHint:
7120 /* The value-check instrumenter handles this - by arranging
7121 to pass the address of the next instruction to
7122 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
7123 happen for origin tracking w.r.t. AbiHints. So there is
7124 nothing to do here. */
7125 break;
7127 case Ist_PutI: {
7128 IRPutI *puti = st->Ist.PutI.details;
7129 IRRegArray* descr_b;
7130 IRAtom *t1, *t2, *t3, *t4;
7131 IRRegArray* descr = puti->descr;
7132 IRType equivIntTy
7133 = MC_(get_otrack_reg_array_equiv_int_type)(descr);
7134 /* If this array is unshadowable for whatever reason,
7135 generate no code. */
7136 if (equivIntTy == Ity_INVALID)
7137 break;
7138 tl_assert(sizeofIRType(equivIntTy) >= 4);
7139 tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
7140 descr_b
7141 = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
7142 equivIntTy, descr->nElems );
7143 /* Compute a value to Put - the conjoinment of the origin for
7144 the data to be Put-ted (obviously) and of the index value
7145 (not so obviously). */
7146 t1 = schemeE( mce, puti->data );
7147 t2 = schemeE( mce, puti->ix );
7148 t3 = gen_maxU32( mce, t1, t2 );
7149 t4 = zWidenFrom32( mce, equivIntTy, t3 );
7150 stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
7151 puti->bias, t4) ));
7152 break;
7155 case Ist_Dirty:
7156 do_origins_Dirty( mce, st->Ist.Dirty.details );
7157 break;
7159 case Ist_Store:
7160 do_origins_Store_plain( mce, st->Ist.Store.end,
7161 st->Ist.Store.addr,
7162 st->Ist.Store.data );
7163 break;
7165 case Ist_StoreG:
7166 do_origins_StoreG( mce, st->Ist.StoreG.details );
7167 break;
7169 case Ist_LoadG:
7170 do_origins_LoadG( mce, st->Ist.LoadG.details );
7171 break;
7173 case Ist_LLSC: {
7174 /* In short: treat a load-linked like a normal load followed
7175 by an assignment of the loaded (shadow) data the result
7176 temporary. Treat a store-conditional like a normal store,
7177 and mark the result temporary as defined. */
7178 if (st->Ist.LLSC.storedata == NULL) {
7179 /* Load Linked */
7180 IRType resTy
7181 = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
7182 IRExpr* vanillaLoad
7183 = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
7184 tl_assert(resTy == Ity_I64 || resTy == Ity_I32
7185 || resTy == Ity_I16 || resTy == Ity_I8);
7186 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7187 schemeE(mce, vanillaLoad));
7188 } else {
7189 /* Store conditional */
7190 do_origins_Store_plain( mce, st->Ist.LLSC.end,
7191 st->Ist.LLSC.addr,
7192 st->Ist.LLSC.storedata );
7193 /* For the rationale behind this, see comments at the
7194 place where the V-shadow for .result is constructed, in
7195 do_shadow_LLSC. In short, we regard .result as
7196 always-defined. */
7197 assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
7198 mkU32(0) );
7200 break;
7203 case Ist_Put: {
7204 Int b_offset
7205 = MC_(get_otrack_shadow_offset)(
7206 st->Ist.Put.offset,
7207 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
7209 if (b_offset >= 0) {
7210 /* FIXME: this isn't an atom! */
7211 stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
7212 schemeE( mce, st->Ist.Put.data )) );
7214 break;
7217 case Ist_WrTmp:
7218 assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
7219 schemeE(mce, st->Ist.WrTmp.data) );
7220 break;
7222 case Ist_MBE:
7223 case Ist_NoOp:
7224 case Ist_Exit:
7225 case Ist_IMark:
7226 break;
7228 default:
7229 VG_(printf)("mc_translate.c: schemeS: unhandled: ");
7230 ppIRStmt(st);
7231 VG_(tool_panic)("memcheck:schemeS");
7236 /*------------------------------------------------------------*/
7237 /*--- Post-tree-build final tidying ---*/
7238 /*------------------------------------------------------------*/
7240 /* This exploits the observation that Memcheck often produces
7241 repeated conditional calls of the form
7243 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
7245 with the same guard expression G guarding the same helper call.
7246 The second and subsequent calls are redundant. This usually
7247 results from instrumentation of guest code containing multiple
7248 memory references at different constant offsets from the same base
7249 register. After optimisation of the instrumentation, you get a
7250 test for the definedness of the base register for each memory
7251 reference, which is kinda pointless. MC_(final_tidy) therefore
7252 looks for such repeated calls and removes all but the first. */
7255 /* With some testing on perf/bz2.c, on amd64 and x86, compiled with
7256 gcc-5.3.1 -O2, it appears that 16 entries in the array are enough to
7257 get almost all the benefits of this transformation whilst causing
7258 the slide-back case to just often enough to be verifiably
7259 correct. For posterity, the numbers are:
7261 bz2-32
7263 1 4,336 (112,212 -> 1,709,473; ratio 15.2)
7264 2 4,336 (112,194 -> 1,669,895; ratio 14.9)
7265 3 4,336 (112,194 -> 1,660,713; ratio 14.8)
7266 4 4,336 (112,194 -> 1,658,555; ratio 14.8)
7267 5 4,336 (112,194 -> 1,655,447; ratio 14.8)
7268 6 4,336 (112,194 -> 1,655,101; ratio 14.8)
7269 7 4,336 (112,194 -> 1,654,858; ratio 14.7)
7270 8 4,336 (112,194 -> 1,654,810; ratio 14.7)
7271 10 4,336 (112,194 -> 1,654,621; ratio 14.7)
7272 12 4,336 (112,194 -> 1,654,678; ratio 14.7)
7273 16 4,336 (112,194 -> 1,654,494; ratio 14.7)
7274 32 4,336 (112,194 -> 1,654,602; ratio 14.7)
7275 inf 4,336 (112,194 -> 1,654,602; ratio 14.7)
7277 bz2-64
7279 1 4,113 (107,329 -> 1,822,171; ratio 17.0)
7280 2 4,113 (107,329 -> 1,806,443; ratio 16.8)
7281 3 4,113 (107,329 -> 1,803,967; ratio 16.8)
7282 4 4,113 (107,329 -> 1,802,785; ratio 16.8)
7283 5 4,113 (107,329 -> 1,802,412; ratio 16.8)
7284 6 4,113 (107,329 -> 1,802,062; ratio 16.8)
7285 7 4,113 (107,329 -> 1,801,976; ratio 16.8)
7286 8 4,113 (107,329 -> 1,801,886; ratio 16.8)
7287 10 4,113 (107,329 -> 1,801,653; ratio 16.8)
7288 12 4,113 (107,329 -> 1,801,526; ratio 16.8)
7289 16 4,113 (107,329 -> 1,801,298; ratio 16.8)
7290 32 4,113 (107,329 -> 1,800,827; ratio 16.8)
7291 inf 4,113 (107,329 -> 1,800,827; ratio 16.8)
7294 /* Structs for recording which (helper, guard) pairs we have already
7295 seen. */
7297 #define N_TIDYING_PAIRS 16
7299 typedef
7300 struct { void* entry; IRExpr* guard; }
7301 Pair;
7303 typedef
7304 struct {
7305 Pair pairs[N_TIDYING_PAIRS +1/*for bounds checking*/];
7306 UInt pairsUsed;
7308 Pairs;
7311 /* Return True if e1 and e2 definitely denote the same value (used to
7312 compare guards). Return False if unknown; False is the safe
7313 answer. Since guest registers and guest memory do not have the
7314 SSA property we must return False if any Gets or Loads appear in
7315 the expression. This implicitly assumes that e1 and e2 have the
7316 same IR type, which is always true here -- the type is Ity_I1. */
7318 static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
7320 if (e1->tag != e2->tag)
7321 return False;
7322 switch (e1->tag) {
7323 case Iex_Const:
7324 return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
7325 case Iex_Binop:
7326 return e1->Iex.Binop.op == e2->Iex.Binop.op
7327 && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
7328 && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
7329 case Iex_Unop:
7330 return e1->Iex.Unop.op == e2->Iex.Unop.op
7331 && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
7332 case Iex_RdTmp:
7333 return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
7334 case Iex_ITE:
7335 return sameIRValue( e1->Iex.ITE.cond, e2->Iex.ITE.cond )
7336 && sameIRValue( e1->Iex.ITE.iftrue, e2->Iex.ITE.iftrue )
7337 && sameIRValue( e1->Iex.ITE.iffalse, e2->Iex.ITE.iffalse );
7338 case Iex_Qop:
7339 case Iex_Triop:
7340 case Iex_CCall:
7341 /* be lazy. Could define equality for these, but they never
7342 appear to be used. */
7343 return False;
7344 case Iex_Get:
7345 case Iex_GetI:
7346 case Iex_Load:
7347 /* be conservative - these may not give the same value each
7348 time */
7349 return False;
7350 case Iex_Binder:
7351 /* should never see this */
7352 /* fallthrough */
7353 default:
7354 VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
7355 ppIRExpr(e1);
7356 VG_(tool_panic)("memcheck:sameIRValue");
7357 return False;
7361 /* See if 'pairs' already has an entry for (entry, guard). Return
7362 True if so. If not, add an entry. */
7364 static
7365 Bool check_or_add ( Pairs* tidyingEnv, IRExpr* guard, void* entry )
7367 UInt i, n = tidyingEnv->pairsUsed;
7368 tl_assert(n <= N_TIDYING_PAIRS);
7369 for (i = 0; i < n; i++) {
7370 if (tidyingEnv->pairs[i].entry == entry
7371 && sameIRValue(tidyingEnv->pairs[i].guard, guard))
7372 return True;
7374 /* (guard, entry) wasn't found in the array. Add it at the end.
7375 If the array is already full, slide the entries one slot
7376 backwards. This means we will lose to ability to detect
7377 duplicates from the pair in slot zero, but that happens so
7378 rarely that it's unlikely to have much effect on overall code
7379 quality. Also, this strategy loses the check for the oldest
7380 tracked exit (memory reference, basically) and so that is (I'd
7381 guess) least likely to be re-used after this point. */
7382 tl_assert(i == n);
7383 if (n == N_TIDYING_PAIRS) {
7384 for (i = 1; i < N_TIDYING_PAIRS; i++) {
7385 tidyingEnv->pairs[i-1] = tidyingEnv->pairs[i];
7387 tidyingEnv->pairs[N_TIDYING_PAIRS-1].entry = entry;
7388 tidyingEnv->pairs[N_TIDYING_PAIRS-1].guard = guard;
7389 } else {
7390 tl_assert(n < N_TIDYING_PAIRS);
7391 tidyingEnv->pairs[n].entry = entry;
7392 tidyingEnv->pairs[n].guard = guard;
7393 n++;
7394 tidyingEnv->pairsUsed = n;
7396 return False;
7399 static Bool is_helperc_value_checkN_fail ( const HChar* name )
7401 /* This is expensive because it happens a lot. We are checking to
7402 see whether |name| is one of the following 8 strings:
7404 MC_(helperc_value_check8_fail_no_o)
7405 MC_(helperc_value_check4_fail_no_o)
7406 MC_(helperc_value_check0_fail_no_o)
7407 MC_(helperc_value_check1_fail_no_o)
7408 MC_(helperc_value_check8_fail_w_o)
7409 MC_(helperc_value_check0_fail_w_o)
7410 MC_(helperc_value_check1_fail_w_o)
7411 MC_(helperc_value_check4_fail_w_o)
7413 To speed it up, check the common prefix just once, rather than
7414 all 8 times.
7416 const HChar* prefix = "MC_(helperc_value_check";
7418 HChar n, p;
7419 while (True) {
7420 n = *name;
7421 p = *prefix;
7422 if (p == 0) break; /* ran off the end of the prefix */
7423 /* We still have some prefix to use */
7424 if (n == 0) return False; /* have prefix, but name ran out */
7425 if (n != p) return False; /* have both pfx and name, but no match */
7426 name++;
7427 prefix++;
7430 /* Check the part after the prefix. */
7431 tl_assert(*prefix == 0 && *name != 0);
7432 return 0==VG_(strcmp)(name, "8_fail_no_o)")
7433 || 0==VG_(strcmp)(name, "4_fail_no_o)")
7434 || 0==VG_(strcmp)(name, "0_fail_no_o)")
7435 || 0==VG_(strcmp)(name, "1_fail_no_o)")
7436 || 0==VG_(strcmp)(name, "8_fail_w_o)")
7437 || 0==VG_(strcmp)(name, "4_fail_w_o)")
7438 || 0==VG_(strcmp)(name, "0_fail_w_o)")
7439 || 0==VG_(strcmp)(name, "1_fail_w_o)");
7442 IRSB* MC_(final_tidy) ( IRSB* sb_in )
7444 Int i;
7445 IRStmt* st;
7446 IRDirty* di;
7447 IRExpr* guard;
7448 IRCallee* cee;
7449 Bool alreadyPresent;
7450 Pairs pairs;
7452 pairs.pairsUsed = 0;
7454 pairs.pairs[N_TIDYING_PAIRS].entry = (void*)0x123;
7455 pairs.pairs[N_TIDYING_PAIRS].guard = (IRExpr*)0x456;
7457 /* Scan forwards through the statements. Each time a call to one
7458 of the relevant helpers is seen, check if we have made a
7459 previous call to the same helper using the same guard
7460 expression, and if so, delete the call. */
7461 for (i = 0; i < sb_in->stmts_used; i++) {
7462 st = sb_in->stmts[i];
7463 tl_assert(st);
7464 if (st->tag != Ist_Dirty)
7465 continue;
7466 di = st->Ist.Dirty.details;
7467 guard = di->guard;
7468 tl_assert(guard);
7469 if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
7470 cee = di->cee;
7471 if (!is_helperc_value_checkN_fail( cee->name ))
7472 continue;
7473 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
7474 guard 'guard'. Check if we have already seen a call to this
7475 function with the same guard. If so, delete it. If not,
7476 add it to the set of calls we do know about. */
7477 alreadyPresent = check_or_add( &pairs, guard, cee->addr );
7478 if (alreadyPresent) {
7479 sb_in->stmts[i] = IRStmt_NoOp();
7480 if (0) VG_(printf)("XX\n");
7484 tl_assert(pairs.pairs[N_TIDYING_PAIRS].entry == (void*)0x123);
7485 tl_assert(pairs.pairs[N_TIDYING_PAIRS].guard == (IRExpr*)0x456);
7487 return sb_in;
7490 #undef N_TIDYING_PAIRS
7493 /*------------------------------------------------------------*/
7494 /*--- Startup assertion checking ---*/
7495 /*------------------------------------------------------------*/
7497 void MC_(do_instrumentation_startup_checks)( void )
7499 /* Make a best-effort check to see that is_helperc_value_checkN_fail
7500 is working as we expect. */
7502 # define CHECK(_expected, _string) \
7503 tl_assert((_expected) == is_helperc_value_checkN_fail(_string))
7505 /* It should identify these 8, and no others, as targets. */
7506 CHECK(True, "MC_(helperc_value_check8_fail_no_o)");
7507 CHECK(True, "MC_(helperc_value_check4_fail_no_o)");
7508 CHECK(True, "MC_(helperc_value_check0_fail_no_o)");
7509 CHECK(True, "MC_(helperc_value_check1_fail_no_o)");
7510 CHECK(True, "MC_(helperc_value_check8_fail_w_o)");
7511 CHECK(True, "MC_(helperc_value_check0_fail_w_o)");
7512 CHECK(True, "MC_(helperc_value_check1_fail_w_o)");
7513 CHECK(True, "MC_(helperc_value_check4_fail_w_o)");
7515 /* Ad-hoc selection of other strings gathered via a quick test. */
7516 CHECK(False, "amd64g_dirtyhelper_CPUID_avx2");
7517 CHECK(False, "amd64g_dirtyhelper_RDTSC");
7518 CHECK(False, "MC_(helperc_b_load1)");
7519 CHECK(False, "MC_(helperc_b_load2)");
7520 CHECK(False, "MC_(helperc_b_load4)");
7521 CHECK(False, "MC_(helperc_b_load8)");
7522 CHECK(False, "MC_(helperc_b_load16)");
7523 CHECK(False, "MC_(helperc_b_load32)");
7524 CHECK(False, "MC_(helperc_b_store1)");
7525 CHECK(False, "MC_(helperc_b_store2)");
7526 CHECK(False, "MC_(helperc_b_store4)");
7527 CHECK(False, "MC_(helperc_b_store8)");
7528 CHECK(False, "MC_(helperc_b_store16)");
7529 CHECK(False, "MC_(helperc_b_store32)");
7530 CHECK(False, "MC_(helperc_LOADV8)");
7531 CHECK(False, "MC_(helperc_LOADV16le)");
7532 CHECK(False, "MC_(helperc_LOADV32le)");
7533 CHECK(False, "MC_(helperc_LOADV64le)");
7534 CHECK(False, "MC_(helperc_LOADV128le)");
7535 CHECK(False, "MC_(helperc_LOADV256le)");
7536 CHECK(False, "MC_(helperc_STOREV16le)");
7537 CHECK(False, "MC_(helperc_STOREV32le)");
7538 CHECK(False, "MC_(helperc_STOREV64le)");
7539 CHECK(False, "MC_(helperc_STOREV8)");
7540 CHECK(False, "track_die_mem_stack_8");
7541 CHECK(False, "track_new_mem_stack_8_w_ECU");
7542 CHECK(False, "MC_(helperc_MAKE_STACK_UNINIT_w_o)");
7543 CHECK(False, "VG_(unknown_SP_update_w_ECU)");
7545 # undef CHECK
7549 /*------------------------------------------------------------*/
7550 /*--- Memcheck main ---*/
7551 /*------------------------------------------------------------*/
7553 static Bool isBogusAtom ( IRAtom* at )
7555 if (at->tag == Iex_RdTmp)
7556 return False;
7557 tl_assert(at->tag == Iex_Const);
7559 ULong n = 0;
7560 IRConst* con = at->Iex.Const.con;
7561 switch (con->tag) {
7562 case Ico_U1: return False;
7563 case Ico_U8: n = (ULong)con->Ico.U8; break;
7564 case Ico_U16: n = (ULong)con->Ico.U16; break;
7565 case Ico_U32: n = (ULong)con->Ico.U32; break;
7566 case Ico_U64: n = (ULong)con->Ico.U64; break;
7567 case Ico_F32: return False;
7568 case Ico_F64: return False;
7569 case Ico_F32i: return False;
7570 case Ico_F64i: return False;
7571 case Ico_V128: return False;
7572 case Ico_V256: return False;
7573 default: ppIRExpr(at); tl_assert(0);
7575 /* VG_(printf)("%llx\n", n); */
7576 /* Shortcuts */
7577 if (LIKELY(n <= 0x0000000000001000ULL)) return False;
7578 if (LIKELY(n >= 0xFFFFFFFFFFFFF000ULL)) return False;
7579 /* The list of bogus atoms is: */
7580 return (/*32*/ n == 0xFEFEFEFFULL
7581 /*32*/ || n == 0x80808080ULL
7582 /*32*/ || n == 0x7F7F7F7FULL
7583 /*32*/ || n == 0x7EFEFEFFULL
7584 /*32*/ || n == 0x81010100ULL
7585 /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
7586 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
7587 /*64*/ || n == 0x0000000000008080ULL
7588 /*64*/ || n == 0x8080808080808080ULL
7589 /*64*/ || n == 0x0101010101010101ULL
7594 /* Does 'st' mention any of the literals identified/listed in
7595 isBogusAtom()? */
7596 static inline Bool containsBogusLiterals ( /*FLAT*/ IRStmt* st )
7598 Int i;
7599 IRExpr* e;
7600 IRDirty* d;
7601 IRCAS* cas;
7602 switch (st->tag) {
7603 case Ist_WrTmp:
7604 e = st->Ist.WrTmp.data;
7605 switch (e->tag) {
7606 case Iex_Get:
7607 case Iex_RdTmp:
7608 return False;
7609 case Iex_Const:
7610 return isBogusAtom(e);
7611 case Iex_Unop:
7612 return isBogusAtom(e->Iex.Unop.arg)
7613 || e->Iex.Unop.op == Iop_GetMSBs8x16;
7614 case Iex_GetI:
7615 return isBogusAtom(e->Iex.GetI.ix);
7616 case Iex_Binop:
7617 return isBogusAtom(e->Iex.Binop.arg1)
7618 || isBogusAtom(e->Iex.Binop.arg2);
7619 case Iex_Triop:
7620 return isBogusAtom(e->Iex.Triop.details->arg1)
7621 || isBogusAtom(e->Iex.Triop.details->arg2)
7622 || isBogusAtom(e->Iex.Triop.details->arg3);
7623 case Iex_Qop:
7624 return isBogusAtom(e->Iex.Qop.details->arg1)
7625 || isBogusAtom(e->Iex.Qop.details->arg2)
7626 || isBogusAtom(e->Iex.Qop.details->arg3)
7627 || isBogusAtom(e->Iex.Qop.details->arg4);
7628 case Iex_ITE:
7629 return isBogusAtom(e->Iex.ITE.cond)
7630 || isBogusAtom(e->Iex.ITE.iftrue)
7631 || isBogusAtom(e->Iex.ITE.iffalse);
7632 case Iex_Load:
7633 return isBogusAtom(e->Iex.Load.addr);
7634 case Iex_CCall:
7635 for (i = 0; e->Iex.CCall.args[i]; i++)
7636 if (isBogusAtom(e->Iex.CCall.args[i]))
7637 return True;
7638 return False;
7639 default:
7640 goto unhandled;
7642 case Ist_Dirty:
7643 d = st->Ist.Dirty.details;
7644 for (i = 0; d->args[i]; i++) {
7645 IRAtom* atom = d->args[i];
7646 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(atom))) {
7647 if (isBogusAtom(atom))
7648 return True;
7651 if (isBogusAtom(d->guard))
7652 return True;
7653 if (d->mAddr && isBogusAtom(d->mAddr))
7654 return True;
7655 return False;
7656 case Ist_Put:
7657 return isBogusAtom(st->Ist.Put.data);
7658 case Ist_PutI:
7659 return isBogusAtom(st->Ist.PutI.details->ix)
7660 || isBogusAtom(st->Ist.PutI.details->data);
7661 case Ist_Store:
7662 return isBogusAtom(st->Ist.Store.addr)
7663 || isBogusAtom(st->Ist.Store.data);
7664 case Ist_StoreG: {
7665 IRStoreG* sg = st->Ist.StoreG.details;
7666 return isBogusAtom(sg->addr) || isBogusAtom(sg->data)
7667 || isBogusAtom(sg->guard);
7669 case Ist_LoadG: {
7670 IRLoadG* lg = st->Ist.LoadG.details;
7671 return isBogusAtom(lg->addr) || isBogusAtom(lg->alt)
7672 || isBogusAtom(lg->guard);
7674 case Ist_Exit:
7675 return isBogusAtom(st->Ist.Exit.guard);
7676 case Ist_AbiHint:
7677 return isBogusAtom(st->Ist.AbiHint.base)
7678 || isBogusAtom(st->Ist.AbiHint.nia);
7679 case Ist_NoOp:
7680 case Ist_IMark:
7681 case Ist_MBE:
7682 return False;
7683 case Ist_CAS:
7684 cas = st->Ist.CAS.details;
7685 return isBogusAtom(cas->addr)
7686 || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
7687 || isBogusAtom(cas->expdLo)
7688 || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
7689 || isBogusAtom(cas->dataLo);
7690 case Ist_LLSC:
7691 return isBogusAtom(st->Ist.LLSC.addr)
7692 || (st->Ist.LLSC.storedata
7693 ? isBogusAtom(st->Ist.LLSC.storedata)
7694 : False);
7695 default:
7696 unhandled:
7697 ppIRStmt(st);
7698 VG_(tool_panic)("hasBogusLiterals");
7703 /* This is the pre-instrumentation analysis. It does a backwards pass over
7704 the stmts in |sb_in| to determine a HowUsed value for each tmp defined in
7705 the block.
7707 Unrelatedly, it also checks all literals in the block with |isBogusAtom|,
7708 as a positive result from that is a strong indication that we need to
7709 expensively instrument add/sub in the block. We do both analyses in one
7710 pass, even though they are independent, so as to avoid the overhead of
7711 having to traverse the whole block twice.
7713 The usage pass proceeds as follows. Let max= be the max operation in the
7714 HowUsed lattice, hence
7716 X max= Y means X = max(X, Y)
7718 then
7720 for t in original tmps . useEnv[t] = HuUnU
7722 for t used in the block's . next field
7723 useEnv[t] max= HuPCa // because jmp targets are PCast-tested
7725 for st iterating *backwards* in the block
7727 match st
7729 case "t1 = load(t2)" // case 1
7730 useEnv[t2] max= HuPCa
7732 case "t1 = add(t2, t3)" // case 2
7733 useEnv[t2] max= useEnv[t1]
7734 useEnv[t3] max= useEnv[t1]
7736 other
7737 for t in st.usedTmps // case 3
7738 useEnv[t] max= HuOth
7739 // same as useEnv[t] = HuOth
7741 The general idea is that we accumulate, in useEnv[], information about
7742 how each tmp is used. That can be updated as we work further back
7743 through the block and find more uses of it, but its HowUsed value can
7744 only ascend the lattice, not descend.
7746 Initially we mark all tmps as unused. In case (1), if a tmp is seen to
7747 be used as a memory address, then its use is at least HuPCa. The point
7748 is that for a memory address we will add instrumentation to check if any
7749 bit of the address is undefined, which means that we won't need expensive
7750 V-bit propagation through an add expression that computed the address --
7751 cheap add instrumentation will be equivalent.
7753 Note in case (1) that if we have previously seen a non-memory-address use
7754 of the tmp, then its use will already be HuOth and will be unchanged by
7755 the max= operation. And if it turns out that the source of the tmp was
7756 an add, then we'll have to expensively instrument the add, because we
7757 can't prove that, for the previous non-memory-address use of the tmp,
7758 cheap and expensive instrumentation will be equivalent.
7760 In case 2, we propagate the usage-mode of the result of an add back
7761 through to its operands. Again, we use max= so as to take account of the
7762 fact that t2 or t3 might later in the block (viz, earlier in the
7763 iteration) have been used in a way that requires expensive add
7764 instrumentation.
7766 In case 3, we deal with all other tmp uses. We assume that we'll need a
7767 result that is as accurate as possible, so we max= HuOth into its use
7768 mode. Since HuOth is the top of the lattice, that's equivalent to just
7769 setting its use to HuOth.
7771 The net result of all this is that:
7773 tmps that are used either
7774 - only as a memory address, or
7775 - only as part of a tree of adds that computes a memory address,
7776 and has no other use
7777 are marked as HuPCa, and so we can instrument their generating Add
7778 nodes cheaply, which is the whole point of this analysis
7780 tmps that are used any other way at all are marked as HuOth
7782 tmps that are unused are marked as HuUnU. We don't expect to see any
7783 since we expect that the incoming IR has had all dead assignments
7784 removed by previous optimisation passes. Nevertheless the analysis is
7785 correct even in the presence of dead tmps.
7787 A final comment on dead tmps. In case 1 and case 2, we could actually
7788 conditionalise the updates thusly:
7790 if (useEnv[t1] > HuUnU) { useEnv[t2] max= HuPCa } // case 1
7792 if (useEnv[t1] > HuUnU) { useEnv[t2] max= useEnv[t1] } // case 2
7793 if (useEnv[t1] > HuUnU) { useEnv[t3] max= useEnv[t1] } // case 2
7795 In other words, if the assigned-to tmp |t1| is never used, then there's
7796 no point in propagating any use through to its operands. That won't
7797 change the final HuPCa-vs-HuOth results, which is what we care about.
7798 Given that we expect to get dead-code-free inputs, there's no point in
7799 adding this extra refinement.
7802 /* Helper for |preInstrumentationAnalysis|. */
7803 static inline void noteTmpUsesIn ( /*MOD*/HowUsed* useEnv,
7804 UInt tyenvUsed,
7805 HowUsed newUse, IRAtom* at )
7807 /* For the atom |at|, declare that for any tmp |t| in |at|, we will have
7808 seen a use of |newUse|. So, merge that info into |t|'s accumulated
7809 use info. */
7810 switch (at->tag) {
7811 case Iex_GSPTR:
7812 case Iex_Const:
7813 return;
7814 case Iex_RdTmp: {
7815 IRTemp t = at->Iex.RdTmp.tmp;
7816 tl_assert(t < tyenvUsed); // "is an original tmp"
7817 // The "max" operation in the lattice
7818 if (newUse > useEnv[t]) useEnv[t] = newUse;
7819 return;
7821 default:
7822 // We should never get here -- it implies non-flat IR
7823 ppIRExpr(at);
7824 VG_(tool_panic)("noteTmpUsesIn");
7826 /*NOTREACHED*/
7827 tl_assert(0);
7831 static void preInstrumentationAnalysis ( /*OUT*/HowUsed** useEnvP,
7832 /*OUT*/Bool* hasBogusLiteralsP,
7833 const IRSB* sb_in )
7835 const UInt nOrigTmps = (UInt)sb_in->tyenv->types_used;
7837 // We've seen no bogus literals so far.
7838 Bool bogus = False;
7840 // This is calloc'd, so implicitly all entries are initialised to HuUnU.
7841 HowUsed* useEnv = VG_(calloc)("mc.preInstrumentationAnalysis.1",
7842 nOrigTmps, sizeof(HowUsed));
7844 // Firstly, roll in contributions from the final dst address.
7845 bogus = isBogusAtom(sb_in->next);
7846 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, sb_in->next);
7848 // Now work backwards through the stmts.
7849 for (Int i = sb_in->stmts_used-1; i >= 0; i--) {
7850 IRStmt* st = sb_in->stmts[i];
7852 // Deal with literals.
7853 if (LIKELY(!bogus)) {
7854 bogus = containsBogusLiterals(st);
7857 // Deal with tmp uses.
7858 switch (st->tag) {
7859 case Ist_WrTmp: {
7860 IRTemp dst = st->Ist.WrTmp.tmp;
7861 IRExpr* rhs = st->Ist.WrTmp.data;
7862 // This is the one place where we have to consider all possible
7863 // tags for |rhs|, and can't just assume it is a tmp or a const.
7864 switch (rhs->tag) {
7865 case Iex_RdTmp:
7866 // just propagate demand for |dst| into this tmp use.
7867 noteTmpUsesIn(useEnv, nOrigTmps, useEnv[dst], rhs);
7868 break;
7869 case Iex_Unop:
7870 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, rhs->Iex.Unop.arg);
7871 break;
7872 case Iex_Binop:
7873 if (rhs->Iex.Binop.op == Iop_Add64
7874 || rhs->Iex.Binop.op == Iop_Add32) {
7875 // propagate demand for |dst| through to the operands.
7876 noteTmpUsesIn(useEnv, nOrigTmps,
7877 useEnv[dst], rhs->Iex.Binop.arg1);
7878 noteTmpUsesIn(useEnv, nOrigTmps,
7879 useEnv[dst], rhs->Iex.Binop.arg2);
7880 } else {
7881 // just say that the operands are used in some unknown way.
7882 noteTmpUsesIn(useEnv, nOrigTmps,
7883 HuOth, rhs->Iex.Binop.arg1);
7884 noteTmpUsesIn(useEnv, nOrigTmps,
7885 HuOth, rhs->Iex.Binop.arg2);
7887 break;
7888 case Iex_Triop: {
7889 // All operands are used in some unknown way.
7890 IRTriop* tri = rhs->Iex.Triop.details;
7891 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, tri->arg1);
7892 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, tri->arg2);
7893 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, tri->arg3);
7894 break;
7896 case Iex_Qop: {
7897 // All operands are used in some unknown way.
7898 IRQop* qop = rhs->Iex.Qop.details;
7899 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg1);
7900 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg2);
7901 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg3);
7902 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, qop->arg4);
7903 break;
7905 case Iex_Load:
7906 // The address will be checked (== PCasted).
7907 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, rhs->Iex.Load.addr);
7908 break;
7909 case Iex_ITE:
7910 // The condition is PCasted, the then- and else-values
7911 // aren't.
7912 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, rhs->Iex.ITE.cond);
7913 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, rhs->Iex.ITE.iftrue);
7914 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, rhs->Iex.ITE.iffalse);
7915 break;
7916 case Iex_CCall:
7917 // The args are used in unknown ways.
7918 for (IRExpr** args = rhs->Iex.CCall.args; *args; args++) {
7919 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, *args);
7921 break;
7922 case Iex_GetI: {
7923 // The index will be checked/PCasted (see do_shadow_GETI)
7924 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, rhs->Iex.GetI.ix);
7925 break;
7927 case Iex_Const:
7928 case Iex_Get:
7929 break;
7930 default:
7931 ppIRExpr(rhs);
7932 VG_(tool_panic)("preInstrumentationAnalysis:"
7933 " unhandled IRExpr");
7935 break;
7937 case Ist_Store:
7938 // The address will be checked (== PCasted). The data will be
7939 // used in some unknown way.
7940 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, st->Ist.Store.addr);
7941 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.Store.data);
7942 break;
7943 case Ist_Exit:
7944 // The guard will be checked (== PCasted)
7945 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, st->Ist.Exit.guard);
7946 break;
7947 case Ist_Put:
7948 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.Put.data);
7949 break;
7950 case Ist_PutI: {
7951 IRPutI* putI = st->Ist.PutI.details;
7952 // The index will be checked/PCasted (see do_shadow_PUTI). The
7953 // data will be used in an unknown way.
7954 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, putI->ix);
7955 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, putI->data);
7956 break;
7958 case Ist_Dirty: {
7959 IRDirty* d = st->Ist.Dirty.details;
7960 // The guard will be checked (== PCasted)
7961 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, d->guard);
7962 // The args will be used in unknown ways.
7963 for (IRExpr** args = d->args; *args; args++) {
7964 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, *args);
7966 break;
7968 case Ist_CAS: {
7969 IRCAS* cas = st->Ist.CAS.details;
7970 // Address will be pcasted, everything else used as unknown
7971 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, cas->addr);
7972 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->expdLo);
7973 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->dataLo);
7974 if (cas->expdHi)
7975 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->expdHi);
7976 if (cas->dataHi)
7977 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, cas->dataHi);
7978 break;
7980 case Ist_AbiHint:
7981 // Both exprs are used in unknown ways. TODO: can we safely
7982 // just ignore AbiHints?
7983 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.AbiHint.base);
7984 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.AbiHint.nia);
7985 break;
7986 case Ist_StoreG: {
7987 // We might be able to do better, and use HuPCa for the addr.
7988 // It's not immediately obvious that we can, because the address
7989 // is regarded as "used" only when the guard is true.
7990 IRStoreG* sg = st->Ist.StoreG.details;
7991 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, sg->addr);
7992 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, sg->data);
7993 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, sg->guard);
7994 break;
7996 case Ist_LoadG: {
7997 // Per similar comments to Ist_StoreG .. not sure whether this
7998 // is really optimal.
7999 IRLoadG* lg = st->Ist.LoadG.details;
8000 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, lg->addr);
8001 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, lg->alt);
8002 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, lg->guard);
8003 break;
8005 case Ist_LLSC: {
8006 noteTmpUsesIn(useEnv, nOrigTmps, HuPCa, st->Ist.LLSC.addr);
8007 if (st->Ist.LLSC.storedata)
8008 noteTmpUsesIn(useEnv, nOrigTmps, HuOth, st->Ist.LLSC.storedata);
8009 break;
8011 case Ist_MBE:
8012 case Ist_IMark:
8013 case Ist_NoOp:
8014 break;
8015 default: {
8016 ppIRStmt(st);
8017 VG_(tool_panic)("preInstrumentationAnalysis: unhandled IRStmt");
8020 } // Now work backwards through the stmts.
8022 // Return the computed use env and the bogus-atom flag.
8023 tl_assert(*useEnvP == NULL);
8024 *useEnvP = useEnv;
8026 tl_assert(*hasBogusLiteralsP == False);
8027 *hasBogusLiteralsP = bogus;
8031 IRSB* MC_(instrument) ( VgCallbackClosure* closure,
8032 IRSB* sb_in,
8033 const VexGuestLayout* layout,
8034 const VexGuestExtents* vge,
8035 const VexArchInfo* archinfo_host,
8036 IRType gWordTy, IRType hWordTy )
8038 Bool verboze = 0||False;
8039 Int i, j, first_stmt;
8040 IRStmt* st;
8041 MCEnv mce;
8042 IRSB* sb_out;
8044 if (gWordTy != hWordTy) {
8045 /* We don't currently support this case. */
8046 VG_(tool_panic)("host/guest word size mismatch");
8049 /* Check we're not completely nuts */
8050 tl_assert(sizeof(UWord) == sizeof(void*));
8051 tl_assert(sizeof(Word) == sizeof(void*));
8052 tl_assert(sizeof(Addr) == sizeof(void*));
8053 tl_assert(sizeof(ULong) == 8);
8054 tl_assert(sizeof(Long) == 8);
8055 tl_assert(sizeof(UInt) == 4);
8056 tl_assert(sizeof(Int) == 4);
8058 tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
8060 /* Set up SB */
8061 sb_out = deepCopyIRSBExceptStmts(sb_in);
8063 /* Set up the running environment. Both .sb and .tmpMap are
8064 modified as we go along. Note that tmps are added to both
8065 .sb->tyenv and .tmpMap together, so the valid index-set for
8066 those two arrays should always be identical. */
8067 VG_(memset)(&mce, 0, sizeof(mce));
8068 mce.sb = sb_out;
8069 mce.trace = verboze;
8070 mce.layout = layout;
8071 mce.hWordTy = hWordTy;
8072 mce.tmpHowUsed = NULL;
8074 /* BEGIN decide on expense levels for instrumentation. */
8076 /* Initially, select the cheap version of everything for which we have an
8077 option. */
8078 DetailLevelByOp__set_all( &mce.dlbo, DLcheap );
8080 /* Take account of the --expensive-definedness-checks= flag. */
8081 if (MC_(clo_expensive_definedness_checks) == EdcNO) {
8082 /* We just selected 'cheap for everything', so we don't need to do
8083 anything here. mce.tmpHowUsed remains NULL. */
8085 else if (MC_(clo_expensive_definedness_checks) == EdcYES) {
8086 /* Select 'expensive for everything'. mce.tmpHowUsed remains NULL. */
8087 DetailLevelByOp__set_all( &mce.dlbo, DLexpensive );
8089 else {
8090 tl_assert(MC_(clo_expensive_definedness_checks) == EdcAUTO);
8091 /* We'll make our own selection, based on known per-target constraints
8092 and also on analysis of the block to be instrumented. First, set
8093 up default values for detail levels.
8095 On x86 and amd64, we'll routinely encounter code optimised by LLVM
8096 5 and above. Enable accurate interpretation of the following.
8097 LLVM uses adds for some bitfield inserts, and we get a lot of false
8098 errors if the cheap interpretation is used, alas. Could solve this
8099 much better if we knew which of such adds came from x86/amd64 LEA
8100 instructions, since these are the only ones really needing the
8101 expensive interpretation, but that would require some way to tag
8102 them in the _toIR.c front ends, which is a lot of faffing around.
8103 So for now we use preInstrumentationAnalysis() to detect adds which
8104 are used only to construct memory addresses, which is an
8105 approximation to the above, and is self-contained.*/
8106 # if defined(VGA_x86)
8107 mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
8108 # elif defined(VGA_amd64)
8109 mce.dlbo.dl_Add64 = DLauto;
8110 mce.dlbo.dl_CmpEQ32_CmpNE32 = DLexpensive;
8111 # endif
8113 /* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then
8114 fill it in. */
8115 Bool hasBogusLiterals = False;
8116 preInstrumentationAnalysis( &mce.tmpHowUsed, &hasBogusLiterals, sb_in );
8118 if (hasBogusLiterals) {
8119 /* This happens very rarely. In this case just select expensive
8120 for everything, and throw away the tmp-use analysis results. */
8121 DetailLevelByOp__set_all( &mce.dlbo, DLexpensive );
8122 VG_(free)( mce.tmpHowUsed );
8123 mce.tmpHowUsed = NULL;
8124 } else {
8125 /* Nothing. mce.tmpHowUsed contains tmp-use analysis results,
8126 which will be used for some subset of Iop_{Add,Sub}{32,64},
8127 based on which ones are set to DLauto for this target. */
8131 DetailLevelByOp__check_sanity( &mce.dlbo );
8133 if (0) {
8134 // Debug printing: which tmps have been identified as PCast-only use
8135 if (mce.tmpHowUsed) {
8136 VG_(printf)("Cheapies: ");
8137 for (UInt q = 0; q < sb_in->tyenv->types_used; q++) {
8138 if (mce.tmpHowUsed[q] == HuPCa) {
8139 VG_(printf)("t%u ", q);
8142 VG_(printf)("\n");
8145 // Debug printing: number of ops by detail level
8146 UChar nCheap = DetailLevelByOp__count( &mce.dlbo, DLcheap );
8147 UChar nAuto = DetailLevelByOp__count( &mce.dlbo, DLauto );
8148 UChar nExpensive = DetailLevelByOp__count( &mce.dlbo, DLexpensive );
8149 tl_assert(nCheap + nAuto + nExpensive == 8);
8151 VG_(printf)("%u,%u,%u ", nCheap, nAuto, nExpensive);
8153 /* END decide on expense levels for instrumentation. */
8155 /* Initialise the running the tmp environment. */
8157 mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
8158 sizeof(TempMapEnt));
8159 VG_(hintSizeXA) (mce.tmpMap, sb_in->tyenv->types_used);
8160 for (i = 0; i < sb_in->tyenv->types_used; i++) {
8161 TempMapEnt ent;
8162 ent.kind = Orig;
8163 ent.shadowV = IRTemp_INVALID;
8164 ent.shadowB = IRTemp_INVALID;
8165 VG_(addToXA)( mce.tmpMap, &ent );
8167 tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
8169 /* Finally, begin instrumentation. */
8170 /* Copy verbatim any IR preamble preceding the first IMark */
8172 tl_assert(mce.sb == sb_out);
8173 tl_assert(mce.sb != sb_in);
8175 i = 0;
8176 while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
8178 st = sb_in->stmts[i];
8179 tl_assert(st);
8180 tl_assert(isFlatIRStmt(st));
8182 stmt( 'C', &mce, sb_in->stmts[i] );
8183 i++;
8186 /* Nasty problem. IR optimisation of the pre-instrumented IR may
8187 cause the IR following the preamble to contain references to IR
8188 temporaries defined in the preamble. Because the preamble isn't
8189 instrumented, these temporaries don't have any shadows.
8190 Nevertheless uses of them following the preamble will cause
8191 memcheck to generate references to their shadows. End effect is
8192 to cause IR sanity check failures, due to references to
8193 non-existent shadows. This is only evident for the complex
8194 preambles used for function wrapping on TOC-afflicted platforms
8195 (ppc64-linux).
8197 The following loop therefore scans the preamble looking for
8198 assignments to temporaries. For each one found it creates an
8199 assignment to the corresponding (V) shadow temp, marking it as
8200 'defined'. This is the same resulting IR as if the main
8201 instrumentation loop before had been applied to the statement
8202 'tmp = CONSTANT'.
8204 Similarly, if origin tracking is enabled, we must generate an
8205 assignment for the corresponding origin (B) shadow, claiming
8206 no-origin, as appropriate for a defined value.
8208 for (j = 0; j < i; j++) {
8209 if (sb_in->stmts[j]->tag == Ist_WrTmp) {
8210 /* findShadowTmpV checks its arg is an original tmp;
8211 no need to assert that here. */
8212 IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
8213 IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
8214 IRType ty_v = typeOfIRTemp(sb_out->tyenv, tmp_v);
8215 assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
8216 if (MC_(clo_mc_level) == 3) {
8217 IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
8218 tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
8219 assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
8221 if (0) {
8222 VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
8223 ppIRType( ty_v );
8224 VG_(printf)("\n");
8229 /* Iterate over the remaining stmts to generate instrumentation. */
8231 tl_assert(sb_in->stmts_used > 0);
8232 tl_assert(i >= 0);
8233 tl_assert(i < sb_in->stmts_used);
8234 tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
8236 for (/* use current i*/; i < sb_in->stmts_used; i++) {
8238 st = sb_in->stmts[i];
8239 first_stmt = sb_out->stmts_used;
8241 if (verboze) {
8242 VG_(printf)("\n");
8243 ppIRStmt(st);
8244 VG_(printf)("\n");
8247 if (MC_(clo_mc_level) == 3) {
8248 /* See comments on case Ist_CAS below. */
8249 if (st->tag != Ist_CAS)
8250 schemeS( &mce, st );
8253 /* Generate instrumentation code for each stmt ... */
8255 switch (st->tag) {
8257 case Ist_WrTmp: {
8258 IRTemp dst = st->Ist.WrTmp.tmp;
8259 tl_assert(dst < (UInt)sb_in->tyenv->types_used);
8260 HowUsed hu = mce.tmpHowUsed ? mce.tmpHowUsed[dst]
8261 : HuOth/*we don't know, so play safe*/;
8262 assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
8263 expr2vbits( &mce, st->Ist.WrTmp.data, hu ));
8264 break;
8267 case Ist_Put:
8268 do_shadow_PUT( &mce,
8269 st->Ist.Put.offset,
8270 st->Ist.Put.data,
8271 NULL /* shadow atom */, NULL /* guard */ );
8272 break;
8274 case Ist_PutI:
8275 do_shadow_PUTI( &mce, st->Ist.PutI.details);
8276 break;
8278 case Ist_Store:
8279 do_shadow_Store( &mce, st->Ist.Store.end,
8280 st->Ist.Store.addr, 0/* addr bias */,
8281 st->Ist.Store.data,
8282 NULL /* shadow data */,
8283 NULL/*guard*/ );
8284 break;
8286 case Ist_StoreG:
8287 do_shadow_StoreG( &mce, st->Ist.StoreG.details );
8288 break;
8290 case Ist_LoadG:
8291 do_shadow_LoadG( &mce, st->Ist.LoadG.details );
8292 break;
8294 case Ist_Exit:
8295 complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
8296 break;
8298 case Ist_IMark:
8299 break;
8301 case Ist_NoOp:
8302 case Ist_MBE:
8303 break;
8305 case Ist_Dirty:
8306 do_shadow_Dirty( &mce, st->Ist.Dirty.details );
8307 break;
8309 case Ist_AbiHint:
8310 do_AbiHint( &mce, st->Ist.AbiHint.base,
8311 st->Ist.AbiHint.len,
8312 st->Ist.AbiHint.nia );
8313 break;
8315 case Ist_CAS:
8316 do_shadow_CAS( &mce, st->Ist.CAS.details );
8317 /* Note, do_shadow_CAS copies the CAS itself to the output
8318 block, because it needs to add instrumentation both
8319 before and after it. Hence skip the copy below. Also
8320 skip the origin-tracking stuff (call to schemeS) above,
8321 since that's all tangled up with it too; do_shadow_CAS
8322 does it all. */
8323 break;
8325 case Ist_LLSC:
8326 do_shadow_LLSC( &mce,
8327 st->Ist.LLSC.end,
8328 st->Ist.LLSC.result,
8329 st->Ist.LLSC.addr,
8330 st->Ist.LLSC.storedata );
8331 break;
8333 default:
8334 VG_(printf)("\n");
8335 ppIRStmt(st);
8336 VG_(printf)("\n");
8337 VG_(tool_panic)("memcheck: unhandled IRStmt");
8339 } /* switch (st->tag) */
8341 if (0 && verboze) {
8342 for (j = first_stmt; j < sb_out->stmts_used; j++) {
8343 VG_(printf)(" ");
8344 ppIRStmt(sb_out->stmts[j]);
8345 VG_(printf)("\n");
8347 VG_(printf)("\n");
8350 /* ... and finally copy the stmt itself to the output. Except,
8351 skip the copy of IRCASs; see comments on case Ist_CAS
8352 above. */
8353 if (st->tag != Ist_CAS)
8354 stmt('C', &mce, st);
8357 /* Now we need to complain if the jump target is undefined. */
8358 first_stmt = sb_out->stmts_used;
8360 if (verboze) {
8361 VG_(printf)("sb_in->next = ");
8362 ppIRExpr(sb_in->next);
8363 VG_(printf)("\n\n");
8366 complainIfUndefined( &mce, sb_in->next, NULL );
8368 if (0 && verboze) {
8369 for (j = first_stmt; j < sb_out->stmts_used; j++) {
8370 VG_(printf)(" ");
8371 ppIRStmt(sb_out->stmts[j]);
8372 VG_(printf)("\n");
8374 VG_(printf)("\n");
8377 /* If this fails, there's been some serious snafu with tmp management,
8378 that should be investigated. */
8379 tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
8380 VG_(deleteXA)( mce.tmpMap );
8382 if (mce.tmpHowUsed) {
8383 VG_(free)( mce.tmpHowUsed );
8386 tl_assert(mce.sb == sb_out);
8387 return sb_out;
8391 /*--------------------------------------------------------------------*/
8392 /*--- end mc_translate.c ---*/
8393 /*--------------------------------------------------------------------*/