2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations. ---*/
4 /*--- mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of MemCheck, a heavyweight Valgrind tool for
9 detecting memory errors.
11 Copyright (C) 2000-2017 Julian Seward
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_poolalloc.h" // For mc_include.h
34 #include "pub_tool_hashtable.h" // For mc_include.h
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_tooliface.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_xarray.h"
40 #include "pub_tool_mallocfree.h"
41 #include "pub_tool_libcbase.h"
43 #include "mc_include.h"
46 /* FIXMEs JRS 2011-June-16.
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
52 Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional
53 saturating shifts): the interpretation is overly pessimistic.
54 See comments on the relevant cases below for details.
56 Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts,
57 both rounding and non-rounding variants): ditto
60 /* This file implements the Memcheck instrumentation, and in
61 particular contains the core of its undefined value detection
62 machinery. For a comprehensive background of the terminology,
63 algorithms and rationale used herein, read:
65 Using Valgrind to detect undefined value errors with
68 Julian Seward and Nicholas Nethercote
70 2005 USENIX Annual Technical Conference (General Track),
71 Anaheim, CA, USA, April 10-15, 2005.
75 Here is as good a place as any to record exactly when V bits are and
76 should be checked, why, and what function is responsible.
79 Memcheck complains when an undefined value is used:
81 1. In the condition of a conditional branch. Because it could cause
82 incorrect control flow, and thus cause incorrect externally-visible
83 behaviour. [mc_translate.c:complainIfUndefined]
85 2. As an argument to a system call, or as the value that specifies
86 the system call number. Because it could cause an incorrect
87 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
89 3. As the address in a load or store. Because it could cause an
90 incorrect value to be used later, which could cause externally-visible
91 behaviour (eg. via incorrect control flow or an incorrect system call
92 argument) [complainIfUndefined]
94 4. As the target address of a branch. Because it could cause incorrect
95 control flow. [complainIfUndefined]
97 5. As an argument to setenv, unsetenv, or putenv. Because it could put
98 an incorrect value into the external environment.
99 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
101 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
102 [complainIfUndefined]
104 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
105 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
106 requested it. [in memcheck.h]
109 Memcheck also complains, but should not, when an undefined value is used:
111 8. As the shift value in certain SIMD shift operations (but not in the
112 standard integer shift operations). This inconsistency is due to
113 historical reasons.) [complainIfUndefined]
116 Memcheck does not complain, but should, when an undefined value is used:
118 9. As an input to a client request. Because the client request may
119 affect the visible behaviour -- see bug #144362 for an example
120 involving the malloc replacements in vg_replace_malloc.c and
121 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
122 isn't identified. That bug report also has some info on how to solve
123 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
126 In practice, 1 and 2 account for the vast majority of cases.
129 /* Generation of addr-definedness, addr-validity and
130 guard-definedness checks pertaining to loads and stores (Iex_Load,
131 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
132 loads/stores) was re-checked 11 May 2013. */
135 /*------------------------------------------------------------*/
136 /*--- Forward decls ---*/
137 /*------------------------------------------------------------*/
141 // See below for comments explaining what this is for.
143 enum __attribute__((packed
)) { HuUnU
=0, HuPCa
=1, HuOth
=2 }
146 static IRType
shadowTypeV ( IRType ty
);
147 static IRExpr
* expr2vbits ( struct _MCEnv
* mce
, IRExpr
* e
,
148 HowUsed hu
/*use HuOth if unknown*/ );
149 static IRTemp
findShadowTmpB ( struct _MCEnv
* mce
, IRTemp orig
);
151 static IRExpr
*i128_const_zero(void);
154 /*------------------------------------------------------------*/
155 /*--- Memcheck running state, and tmp management. ---*/
156 /*------------------------------------------------------------*/
158 /* For a few (maybe 1%) IROps, we have both a cheaper, less exact vbit
159 propagation scheme, and a more expensive, more precise vbit propagation
160 scheme. This enum describes, for such an IROp, which scheme to use. */
163 // Use the cheaper, less-exact variant.
165 // Choose between cheap and expensive based on analysis of the block
166 // to be instrumented. Note that the choice may be done on a
167 // per-instance basis of the IROp that this DetailLevel describes.
169 // Use the more expensive, more-exact variant.
175 /* A readonly part of the running state. For IROps that have both a
176 less-exact and more-exact interpretation, records which interpretation is
180 // For Add32/64 and Sub32/64, all 3 settings are allowed. For the
181 // DLauto case, a per-instance decision is to be made by inspecting
182 // the associated tmp's entry in MCEnv.tmpHowUsed.
183 DetailLevel dl_Add32
;
184 DetailLevel dl_Add64
;
185 DetailLevel dl_Sub32
;
186 DetailLevel dl_Sub64
;
187 // For Cmp{EQ,NE}{64,32,16,8}, only DLcheap and DLexpensive are
189 DetailLevel dl_CmpEQ64_CmpNE64
;
190 DetailLevel dl_CmpEQ32_CmpNE32
;
191 DetailLevel dl_CmpEQ16_CmpNE16
;
192 DetailLevel dl_CmpEQ8_CmpNE8
;
196 static void DetailLevelByOp__set_all ( /*OUT*/DetailLevelByOp
* dlbo
,
203 dlbo
->dl_CmpEQ64_CmpNE64
= dl
;
204 dlbo
->dl_CmpEQ32_CmpNE32
= dl
;
205 dlbo
->dl_CmpEQ16_CmpNE16
= dl
;
206 dlbo
->dl_CmpEQ8_CmpNE8
= dl
;
209 static void DetailLevelByOp__check_sanity ( const DetailLevelByOp
* dlbo
)
211 tl_assert(dlbo
->dl_Add32
>= DLcheap
&& dlbo
->dl_Add32
<= DLexpensive
);
212 tl_assert(dlbo
->dl_Add64
>= DLcheap
&& dlbo
->dl_Add64
<= DLexpensive
);
213 tl_assert(dlbo
->dl_Sub32
>= DLcheap
&& dlbo
->dl_Sub32
<= DLexpensive
);
214 tl_assert(dlbo
->dl_Sub64
>= DLcheap
&& dlbo
->dl_Sub64
<= DLexpensive
);
215 tl_assert(dlbo
->dl_CmpEQ64_CmpNE64
== DLcheap
216 || dlbo
->dl_CmpEQ64_CmpNE64
== DLexpensive
);
217 tl_assert(dlbo
->dl_CmpEQ32_CmpNE32
== DLcheap
218 || dlbo
->dl_CmpEQ32_CmpNE32
== DLexpensive
);
219 tl_assert(dlbo
->dl_CmpEQ16_CmpNE16
== DLcheap
220 || dlbo
->dl_CmpEQ16_CmpNE16
== DLexpensive
);
221 tl_assert(dlbo
->dl_CmpEQ8_CmpNE8
== DLcheap
222 || dlbo
->dl_CmpEQ8_CmpNE8
== DLexpensive
);
225 static UInt
DetailLevelByOp__count ( const DetailLevelByOp
* dlbo
,
229 n
+= (dlbo
->dl_Add32
== dl
? 1 : 0);
230 n
+= (dlbo
->dl_Add64
== dl
? 1 : 0);
231 n
+= (dlbo
->dl_Sub32
== dl
? 1 : 0);
232 n
+= (dlbo
->dl_Sub64
== dl
? 1 : 0);
233 n
+= (dlbo
->dl_CmpEQ64_CmpNE64
== dl
? 1 : 0);
234 n
+= (dlbo
->dl_CmpEQ32_CmpNE32
== dl
? 1 : 0);
235 n
+= (dlbo
->dl_CmpEQ16_CmpNE16
== dl
? 1 : 0);
236 n
+= (dlbo
->dl_CmpEQ8_CmpNE8
== dl
? 1 : 0);
241 /* Carries info about a particular tmp. The tmp's number is not
242 recorded, as this is implied by (equal to) its index in the tmpMap
243 in MCEnv. The tmp's type is also not recorded, as this is present
246 When .kind is Orig, .shadowV and .shadowB may give the identities
247 of the temps currently holding the associated definedness (shadowV)
248 and origin (shadowB) values, or these may be IRTemp_INVALID if code
249 to compute such values has not yet been emitted.
251 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
252 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
253 illogical for a shadow tmp itself to be shadowed.
256 enum { Orig
=1, VSh
=2, BSh
=3 }
268 /* A |HowUsed| value carries analysis results about how values are used,
269 pertaining to whether we need to instrument integer adds expensively or
270 not. The running state carries a (readonly) mapping from original tmp to
271 a HowUsed value for it. A usage value can be one of three values,
272 forming a 3-point chain lattice.
274 HuOth ("Other") used in some arbitrary way
276 HuPCa ("PCast") used *only* in effectively a PCast, in which all
277 | we care about is the all-defined vs not-all-defined distinction
279 HuUnU ("Unused") not used at all.
281 The "safe" (don't-know) end of the lattice is "HuOth". See comments
282 below in |preInstrumentationAnalysis| for further details.
286 enum __attribute__((packed)) { HuUnU=0, HuPCa=1, HuOth=2 }
290 // Not actually necessary, but we don't want to waste D1 space.
291 STATIC_ASSERT(sizeof(HowUsed
) == 1);
294 /* Carries around state during memcheck instrumentation. */
297 /* MODIFIED: the superblock being constructed. IRStmts are
302 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
303 current kind and possibly shadow temps for each temp in the
304 IRSB being constructed. Note that it does not contain the
305 type of each tmp. If you want to know the type, look at the
306 relevant entry in sb->tyenv. It follows that at all times
307 during the instrumentation process, the valid indices for
308 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
309 total number of Orig, V- and B- temps allocated so far.
311 The reason for this strange split (types in one place, all
312 other info in another) is that we need the types to be
313 attached to sb so as to make it possible to do
314 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
315 instrumentation process. */
316 XArray
* /* of TempMapEnt */ tmpMap
;
318 /* READONLY: contains details of which ops should be expensively
320 DetailLevelByOp dlbo
;
322 /* READONLY: for each original tmp, how the tmp is used. This is
323 computed by |preInstrumentationAnalysis|. Valid indices are
324 0 .. #temps_in_sb-1 (same as for tmpMap). */
327 /* READONLY: the guest layout. This indicates which parts of
328 the guest state should be regarded as 'always defined'. */
329 const VexGuestLayout
* layout
;
331 /* READONLY: the host word type. Needed for constructing
332 arguments of type 'HWord' to be passed to helper functions.
333 Ity_I32 or Ity_I64 only. */
339 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
340 demand), as they are encountered. This is for two reasons.
342 (1) (less important reason): Many original tmps are unused due to
343 initial IR optimisation, and we do not want to spaces in tables
346 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
347 table indexed [0 .. n_types-1], which gives the current shadow for
348 each original tmp, or INVALID_IRTEMP if none is so far assigned.
349 It is necessary to support making multiple assignments to a shadow
350 -- specifically, after testing a shadow for definedness, it needs
351 to be made defined. But IR's SSA property disallows this.
353 (2) (more important reason): Therefore, when a shadow needs to get
354 a new value, a new temporary is created, the value is assigned to
355 that, and the tmpMap is updated to reflect the new binding.
357 A corollary is that if the tmpMap maps a given tmp to
358 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
359 there's a read-before-write error in the original tmps. The IR
360 sanity checker should catch all such anomalies, however.
363 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
364 both the table in mce->sb and to our auxiliary mapping. Note that
365 newTemp may cause mce->tmpMap to resize, hence previous results
366 from VG_(indexXA)(mce->tmpMap) are invalidated. */
367 static IRTemp
newTemp ( MCEnv
* mce
, IRType ty
, TempKind kind
)
371 IRTemp tmp
= newIRTemp(mce
->sb
->tyenv
, ty
);
373 ent
.shadowV
= IRTemp_INVALID
;
374 ent
.shadowB
= IRTemp_INVALID
;
375 newIx
= VG_(addToXA
)( mce
->tmpMap
, &ent
);
376 tl_assert(newIx
== (Word
)tmp
);
381 /* Find the tmp currently shadowing the given original tmp. If none
382 so far exists, allocate one. */
383 static IRTemp
findShadowTmpV ( MCEnv
* mce
, IRTemp orig
)
386 /* VG_(indexXA) range-checks 'orig', hence no need to check
388 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
389 tl_assert(ent
->kind
== Orig
);
390 if (ent
->shadowV
== IRTemp_INVALID
) {
392 = newTemp( mce
, shadowTypeV(mce
->sb
->tyenv
->types
[orig
]), VSh
);
393 /* newTemp may cause mce->tmpMap to resize, hence previous results
394 from VG_(indexXA) are invalid. */
395 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
396 tl_assert(ent
->kind
== Orig
);
397 tl_assert(ent
->shadowV
== IRTemp_INVALID
);
403 /* Allocate a new shadow for the given original tmp. This means any
404 previous shadow is abandoned. This is needed because it is
405 necessary to give a new value to a shadow once it has been tested
406 for undefinedness, but unfortunately IR's SSA property disallows
407 this. Instead we must abandon the old shadow, allocate a new one
408 and use that instead.
410 This is the same as findShadowTmpV, except we don't bother to see
411 if a shadow temp already existed -- we simply allocate a new one
413 static void newShadowTmpV ( MCEnv
* mce
, IRTemp orig
)
416 /* VG_(indexXA) range-checks 'orig', hence no need to check
418 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
419 tl_assert(ent
->kind
== Orig
);
422 = newTemp( mce
, shadowTypeV(mce
->sb
->tyenv
->types
[orig
]), VSh
);
423 /* newTemp may cause mce->tmpMap to resize, hence previous results
424 from VG_(indexXA) are invalid. */
425 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
426 tl_assert(ent
->kind
== Orig
);
432 /*------------------------------------------------------------*/
433 /*--- IRAtoms -- a subset of IRExprs ---*/
434 /*------------------------------------------------------------*/
436 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
437 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
438 input, most of this code deals in atoms. Usefully, a value atom
439 always has a V-value which is also an atom: constants are shadowed
440 by constants, and temps are shadowed by the corresponding shadow
443 typedef IRExpr IRAtom
;
445 /* (used for sanity checks only): is this an atom which looks
446 like it's from original code? */
447 static Bool
isOriginalAtom ( MCEnv
* mce
, IRAtom
* a1
)
449 if (a1
->tag
== Iex_Const
)
451 if (a1
->tag
== Iex_RdTmp
) {
452 TempMapEnt
* ent
= VG_(indexXA
)( mce
->tmpMap
, a1
->Iex
.RdTmp
.tmp
);
453 return ent
->kind
== Orig
;
458 /* (used for sanity checks only): is this an atom which looks
459 like it's from shadow code? */
460 static Bool
isShadowAtom ( MCEnv
* mce
, IRAtom
* a1
)
462 if (a1
->tag
== Iex_Const
)
464 if (a1
->tag
== Iex_RdTmp
) {
465 TempMapEnt
* ent
= VG_(indexXA
)( mce
->tmpMap
, a1
->Iex
.RdTmp
.tmp
);
466 return ent
->kind
== VSh
|| ent
->kind
== BSh
;
471 /* (used for sanity checks only): check that both args are atoms and
472 are identically-kinded. */
473 static Bool
sameKindedAtoms ( IRAtom
* a1
, IRAtom
* a2
)
475 if (a1
->tag
== Iex_RdTmp
&& a2
->tag
== Iex_RdTmp
)
477 if (a1
->tag
== Iex_Const
&& a2
->tag
== Iex_Const
)
483 /*------------------------------------------------------------*/
484 /*--- Type management ---*/
485 /*------------------------------------------------------------*/
487 /* Shadow state is always accessed using integer types. This returns
488 an integer type with the same size (as per sizeofIRType) as the
489 given type. The only valid shadow types are Bit, I8, I16, I32,
490 I64, I128, V128, V256. */
492 static IRType
shadowTypeV ( IRType ty
)
500 case Ity_I128
: return ty
;
501 case Ity_F16
: return Ity_I16
;
502 case Ity_F32
: return Ity_I32
;
503 case Ity_D32
: return Ity_I32
;
504 case Ity_F64
: return Ity_I64
;
505 case Ity_D64
: return Ity_I64
;
506 case Ity_F128
: return Ity_I128
;
507 case Ity_D128
: return Ity_I128
;
508 case Ity_V128
: return Ity_V128
;
509 case Ity_V256
: return Ity_V256
;
510 default: ppIRType(ty
);
511 VG_(tool_panic
)("memcheck:shadowTypeV");
515 /* Produce a 'defined' value of the given shadow type. Should only be
516 supplied shadow types (Bit/I8/I16/I32/UI64). */
517 static IRExpr
* definedOfType ( IRType ty
) {
519 case Ity_I1
: return IRExpr_Const(IRConst_U1(False
));
520 case Ity_I8
: return IRExpr_Const(IRConst_U8(0));
521 case Ity_I16
: return IRExpr_Const(IRConst_U16(0));
522 case Ity_I32
: return IRExpr_Const(IRConst_U32(0));
523 case Ity_I64
: return IRExpr_Const(IRConst_U64(0));
524 case Ity_I128
: return i128_const_zero();
525 case Ity_V128
: return IRExpr_Const(IRConst_V128(0x0000));
526 case Ity_V256
: return IRExpr_Const(IRConst_V256(0x00000000));
527 default: VG_(tool_panic
)("memcheck:definedOfType");
532 /*------------------------------------------------------------*/
533 /*--- Constructing IR fragments ---*/
534 /*------------------------------------------------------------*/
536 /* add stmt to a bb */
537 static inline void stmt ( HChar cat
, MCEnv
* mce
, IRStmt
* st
) {
539 VG_(printf
)(" %c: ", cat
);
543 addStmtToIRSB(mce
->sb
, st
);
546 /* assign value to tmp */
548 void assign ( HChar cat
, MCEnv
* mce
, IRTemp tmp
, IRExpr
* expr
) {
549 stmt(cat
, mce
, IRStmt_WrTmp(tmp
,expr
));
552 /* build various kinds of expressions */
553 #define triop(_op, _arg1, _arg2, _arg3) \
554 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
555 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
556 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
557 #define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
558 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
559 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
560 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
561 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
562 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
563 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
565 /* Bind the given expression to a new temporary, and return the
566 temporary. This effectively converts an arbitrary expression into
569 'ty' is the type of 'e' and hence the type that the new temporary
570 needs to be. But passing it in is redundant, since we can deduce
571 the type merely by inspecting 'e'. So at least use that fact to
572 assert that the two types agree. */
573 static IRAtom
* assignNew ( HChar cat
, MCEnv
* mce
, IRType ty
, IRExpr
* e
)
577 IRType tyE
= typeOfIRExpr(mce
->sb
->tyenv
, e
);
579 tl_assert(tyE
== ty
); /* so 'ty' is redundant (!) */
581 case 'V': k
= VSh
; break;
582 case 'B': k
= BSh
; break;
583 case 'C': k
= Orig
; break;
584 /* happens when we are making up new "orig"
585 expressions, for IRCAS handling */
586 default: tl_assert(0);
588 t
= newTemp(mce
, ty
, k
);
589 assign(cat
, mce
, t
, e
);
594 /*------------------------------------------------------------*/
595 /*--- Helper functions for 128-bit ops ---*/
596 /*------------------------------------------------------------*/
598 static IRExpr
*i128_const_zero(void)
600 IRAtom
* z64
= IRExpr_Const(IRConst_U64(0));
601 return binop(Iop_64HLto128
, z64
, z64
);
604 /* There are no I128-bit loads and/or stores [as generated by any
605 current front ends]. So we do not need to worry about that in
609 /*------------------------------------------------------------*/
610 /*--- Constructing definedness primitive ops ---*/
611 /*------------------------------------------------------------*/
613 /* --------- Defined-if-either-defined --------- */
615 static IRAtom
* mkDifD8 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
616 tl_assert(isShadowAtom(mce
,a1
));
617 tl_assert(isShadowAtom(mce
,a2
));
618 return assignNew('V', mce
, Ity_I8
, binop(Iop_And8
, a1
, a2
));
621 static IRAtom
* mkDifD16 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
622 tl_assert(isShadowAtom(mce
,a1
));
623 tl_assert(isShadowAtom(mce
,a2
));
624 return assignNew('V', mce
, Ity_I16
, binop(Iop_And16
, a1
, a2
));
627 static IRAtom
* mkDifD32 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
628 tl_assert(isShadowAtom(mce
,a1
));
629 tl_assert(isShadowAtom(mce
,a2
));
630 return assignNew('V', mce
, Ity_I32
, binop(Iop_And32
, a1
, a2
));
633 static IRAtom
* mkDifD64 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
634 tl_assert(isShadowAtom(mce
,a1
));
635 tl_assert(isShadowAtom(mce
,a2
));
636 return assignNew('V', mce
, Ity_I64
, binop(Iop_And64
, a1
, a2
));
639 static IRAtom
* mkDifDV128 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
640 tl_assert(isShadowAtom(mce
,a1
));
641 tl_assert(isShadowAtom(mce
,a2
));
642 return assignNew('V', mce
, Ity_V128
, binop(Iop_AndV128
, a1
, a2
));
645 static IRAtom
* mkDifDV256 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
646 tl_assert(isShadowAtom(mce
,a1
));
647 tl_assert(isShadowAtom(mce
,a2
));
648 return assignNew('V', mce
, Ity_V256
, binop(Iop_AndV256
, a1
, a2
));
651 /* --------- Undefined-if-either-undefined --------- */
653 static IRAtom
* mkUifU8 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
654 tl_assert(isShadowAtom(mce
,a1
));
655 tl_assert(isShadowAtom(mce
,a2
));
656 return assignNew('V', mce
, Ity_I8
, binop(Iop_Or8
, a1
, a2
));
659 static IRAtom
* mkUifU16 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
660 tl_assert(isShadowAtom(mce
,a1
));
661 tl_assert(isShadowAtom(mce
,a2
));
662 return assignNew('V', mce
, Ity_I16
, binop(Iop_Or16
, a1
, a2
));
665 static IRAtom
* mkUifU32 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
666 tl_assert(isShadowAtom(mce
,a1
));
667 tl_assert(isShadowAtom(mce
,a2
));
668 return assignNew('V', mce
, Ity_I32
, binop(Iop_Or32
, a1
, a2
));
671 static IRAtom
* mkUifU64 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
672 tl_assert(isShadowAtom(mce
,a1
));
673 tl_assert(isShadowAtom(mce
,a2
));
674 return assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, a1
, a2
));
677 static IRAtom
* mkUifU128 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
678 IRAtom
*tmp1
, *tmp2
, *tmp3
, *tmp4
, *tmp5
, *tmp6
;
679 tl_assert(isShadowAtom(mce
,a1
));
680 tl_assert(isShadowAtom(mce
,a2
));
681 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, a1
));
682 tmp2
= assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, a1
));
683 tmp3
= assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, a2
));
684 tmp4
= assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, a2
));
685 tmp5
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp1
, tmp3
));
686 tmp6
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp2
, tmp4
));
688 return assignNew('V', mce
, Ity_I128
, binop(Iop_64HLto128
, tmp6
, tmp5
));
691 static IRAtom
* mkUifUV128 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
692 tl_assert(isShadowAtom(mce
,a1
));
693 tl_assert(isShadowAtom(mce
,a2
));
694 return assignNew('V', mce
, Ity_V128
, binop(Iop_OrV128
, a1
, a2
));
697 static IRAtom
* mkUifUV256 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
698 tl_assert(isShadowAtom(mce
,a1
));
699 tl_assert(isShadowAtom(mce
,a2
));
700 return assignNew('V', mce
, Ity_V256
, binop(Iop_OrV256
, a1
, a2
));
703 static IRAtom
* mkUifU ( MCEnv
* mce
, IRType vty
, IRAtom
* a1
, IRAtom
* a2
) {
705 case Ity_I8
: return mkUifU8(mce
, a1
, a2
);
706 case Ity_I16
: return mkUifU16(mce
, a1
, a2
);
707 case Ity_I32
: return mkUifU32(mce
, a1
, a2
);
708 case Ity_I64
: return mkUifU64(mce
, a1
, a2
);
709 case Ity_I128
: return mkUifU128(mce
, a1
, a2
);
710 case Ity_V128
: return mkUifUV128(mce
, a1
, a2
);
711 case Ity_V256
: return mkUifUV256(mce
, a1
, a2
);
713 VG_(printf
)("\n"); ppIRType(vty
); VG_(printf
)("\n");
714 VG_(tool_panic
)("memcheck:mkUifU");
718 /* --------- The Left-family of operations. --------- */
720 static IRAtom
* mkLeft8 ( MCEnv
* mce
, IRAtom
* a1
) {
721 tl_assert(isShadowAtom(mce
,a1
));
722 return assignNew('V', mce
, Ity_I8
, unop(Iop_Left8
, a1
));
725 static IRAtom
* mkLeft16 ( MCEnv
* mce
, IRAtom
* a1
) {
726 tl_assert(isShadowAtom(mce
,a1
));
727 return assignNew('V', mce
, Ity_I16
, unop(Iop_Left16
, a1
));
730 static IRAtom
* mkLeft32 ( MCEnv
* mce
, IRAtom
* a1
) {
731 tl_assert(isShadowAtom(mce
,a1
));
732 return assignNew('V', mce
, Ity_I32
, unop(Iop_Left32
, a1
));
735 static IRAtom
* mkLeft64 ( MCEnv
* mce
, IRAtom
* a1
) {
736 tl_assert(isShadowAtom(mce
,a1
));
737 return assignNew('V', mce
, Ity_I64
, unop(Iop_Left64
, a1
));
740 /* --------- 'Improvement' functions for AND/OR. --------- */
742 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
743 defined (0); all other -> undefined (1).
745 static IRAtom
* mkImproveAND8 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
747 tl_assert(isOriginalAtom(mce
, data
));
748 tl_assert(isShadowAtom(mce
, vbits
));
749 tl_assert(sameKindedAtoms(data
, vbits
));
750 return assignNew('V', mce
, Ity_I8
, binop(Iop_Or8
, data
, vbits
));
753 static IRAtom
* mkImproveAND16 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
755 tl_assert(isOriginalAtom(mce
, data
));
756 tl_assert(isShadowAtom(mce
, vbits
));
757 tl_assert(sameKindedAtoms(data
, vbits
));
758 return assignNew('V', mce
, Ity_I16
, binop(Iop_Or16
, data
, vbits
));
761 static IRAtom
* mkImproveAND32 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
763 tl_assert(isOriginalAtom(mce
, data
));
764 tl_assert(isShadowAtom(mce
, vbits
));
765 tl_assert(sameKindedAtoms(data
, vbits
));
766 return assignNew('V', mce
, Ity_I32
, binop(Iop_Or32
, data
, vbits
));
769 static IRAtom
* mkImproveAND64 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
771 tl_assert(isOriginalAtom(mce
, data
));
772 tl_assert(isShadowAtom(mce
, vbits
));
773 tl_assert(sameKindedAtoms(data
, vbits
));
774 return assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, data
, vbits
));
777 static IRAtom
* mkImproveANDV128 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
779 tl_assert(isOriginalAtom(mce
, data
));
780 tl_assert(isShadowAtom(mce
, vbits
));
781 tl_assert(sameKindedAtoms(data
, vbits
));
782 return assignNew('V', mce
, Ity_V128
, binop(Iop_OrV128
, data
, vbits
));
785 static IRAtom
* mkImproveANDV256 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
787 tl_assert(isOriginalAtom(mce
, data
));
788 tl_assert(isShadowAtom(mce
, vbits
));
789 tl_assert(sameKindedAtoms(data
, vbits
));
790 return assignNew('V', mce
, Ity_V256
, binop(Iop_OrV256
, data
, vbits
));
793 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
794 defined (0); all other -> undefined (1).
796 static IRAtom
* mkImproveOR8 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
798 tl_assert(isOriginalAtom(mce
, data
));
799 tl_assert(isShadowAtom(mce
, vbits
));
800 tl_assert(sameKindedAtoms(data
, vbits
));
804 assignNew('V', mce
, Ity_I8
, unop(Iop_Not8
, data
)),
808 static IRAtom
* mkImproveOR16 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
810 tl_assert(isOriginalAtom(mce
, data
));
811 tl_assert(isShadowAtom(mce
, vbits
));
812 tl_assert(sameKindedAtoms(data
, vbits
));
816 assignNew('V', mce
, Ity_I16
, unop(Iop_Not16
, data
)),
820 static IRAtom
* mkImproveOR32 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
822 tl_assert(isOriginalAtom(mce
, data
));
823 tl_assert(isShadowAtom(mce
, vbits
));
824 tl_assert(sameKindedAtoms(data
, vbits
));
828 assignNew('V', mce
, Ity_I32
, unop(Iop_Not32
, data
)),
832 static IRAtom
* mkImproveOR64 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
834 tl_assert(isOriginalAtom(mce
, data
));
835 tl_assert(isShadowAtom(mce
, vbits
));
836 tl_assert(sameKindedAtoms(data
, vbits
));
840 assignNew('V', mce
, Ity_I64
, unop(Iop_Not64
, data
)),
844 static IRAtom
* mkImproveORV128 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
846 tl_assert(isOriginalAtom(mce
, data
));
847 tl_assert(isShadowAtom(mce
, vbits
));
848 tl_assert(sameKindedAtoms(data
, vbits
));
852 assignNew('V', mce
, Ity_V128
, unop(Iop_NotV128
, data
)),
856 static IRAtom
* mkImproveORV256 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
858 tl_assert(isOriginalAtom(mce
, data
));
859 tl_assert(isShadowAtom(mce
, vbits
));
860 tl_assert(sameKindedAtoms(data
, vbits
));
864 assignNew('V', mce
, Ity_V256
, unop(Iop_NotV256
, data
)),
868 /* --------- Pessimising casts. --------- */
870 /* The function returns an expression of type DST_TY. If any of the VBITS
871 is undefined (value == 1) the resulting expression has all bits set to
872 1. Otherwise, all bits are 0. */
874 static IRAtom
* mkPCastTo( MCEnv
* mce
, IRType dst_ty
, IRAtom
* vbits
)
879 /* Note, dst_ty is a shadow type, not an original type. */
880 tl_assert(isShadowAtom(mce
,vbits
));
881 src_ty
= typeOfIRExpr(mce
->sb
->tyenv
, vbits
);
883 /* Fast-track some common cases */
884 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I32
)
885 return assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
887 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I64
)
888 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpwNEZ64
, vbits
));
890 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I64
) {
891 /* PCast the arg, then clone it. */
892 IRAtom
* tmp
= assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
893 return assignNew('V', mce
, Ity_I64
, binop(Iop_32HLto64
, tmp
, tmp
));
896 if (src_ty
== Ity_I32
&& dst_ty
== Ity_V128
) {
897 /* PCast the arg, then clone it 4 times. */
898 IRAtom
* tmp
= assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
899 tmp
= assignNew('V', mce
, Ity_I64
, binop(Iop_32HLto64
, tmp
, tmp
));
900 return assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, tmp
, tmp
));
903 if (src_ty
== Ity_I32
&& dst_ty
== Ity_V256
) {
904 /* PCast the arg, then clone it 8 times. */
905 IRAtom
* tmp
= assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
906 tmp
= assignNew('V', mce
, Ity_I64
, binop(Iop_32HLto64
, tmp
, tmp
));
907 tmp
= assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, tmp
, tmp
));
908 return assignNew('V', mce
, Ity_V256
, binop(Iop_V128HLtoV256
, tmp
, tmp
));
911 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I32
) {
912 /* PCast the arg. This gives all 0s or all 1s. Then throw away
914 IRAtom
* tmp
= assignNew('V', mce
, Ity_I64
, unop(Iop_CmpwNEZ64
, vbits
));
915 return assignNew('V', mce
, Ity_I32
, unop(Iop_64to32
, tmp
));
918 if (src_ty
== Ity_V128
&& dst_ty
== Ity_I64
) {
919 /* Use InterleaveHI64x2 to copy the top half of the vector into
920 the bottom half. Then we can UifU it with the original, throw
921 away the upper half of the result, and PCast-I64-to-I64
923 // Generates vbits[127:64] : vbits[127:64]
925 = assignNew('V', mce
, Ity_V128
,
926 binop(Iop_InterleaveHI64x2
, vbits
, vbits
));
928 // UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0])
929 // == vbits[127:64] : UifU(vbits[127:64],vbits[63:0])
931 = mkUifUV128(mce
, hi64hi64
, vbits
);
932 // Generates UifU(vbits[127:64],vbits[63:0])
934 = assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, lohi64
));
936 // PCast-to-I64( UifU(vbits[127:64], vbits[63:0] )
937 // == PCast-to-I64( vbits[127:0] )
939 = assignNew('V', mce
, Ity_I64
, unop(Iop_CmpwNEZ64
, lo64
));
943 /* Else do it the slow way .. */
944 /* First of all, collapse vbits down to a single bit. */
951 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ8
, vbits
));
954 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ16
, vbits
));
957 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ32
, vbits
));
960 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ64
, vbits
));
963 /* Gah. Chop it in half, OR the halves together, and compare
965 IRAtom
* tmp2
= assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, vbits
));
966 IRAtom
* tmp3
= assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, vbits
));
967 IRAtom
* tmp4
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp2
, tmp3
));
968 tmp1
= assignNew('V', mce
, Ity_I1
,
969 unop(Iop_CmpNEZ64
, tmp4
));
973 /* Chop it in half, OR the halves together, and compare that
976 IRAtom
* tmp2
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128HIto64
, vbits
));
977 IRAtom
* tmp3
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, vbits
));
978 IRAtom
* tmp4
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp2
, tmp3
));
979 tmp1
= assignNew('V', mce
, Ity_I1
,
980 unop(Iop_CmpNEZ64
, tmp4
));
985 VG_(tool_panic
)("mkPCastTo(1)");
988 /* Now widen up to the dst type. */
993 return assignNew('V', mce
, Ity_I8
, unop(Iop_1Sto8
, tmp1
));
995 return assignNew('V', mce
, Ity_I16
, unop(Iop_1Sto16
, tmp1
));
997 return assignNew('V', mce
, Ity_I32
, unop(Iop_1Sto32
, tmp1
));
999 return assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1001 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1002 tmp1
= assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, tmp1
, tmp1
));
1005 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1006 tmp1
= assignNew('V', mce
, Ity_I128
, binop(Iop_64HLto128
, tmp1
, tmp1
));
1009 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1010 tmp1
= assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
,
1012 tmp1
= assignNew('V', mce
, Ity_V256
, binop(Iop_V128HLtoV256
,
1017 VG_(tool_panic
)("mkPCastTo(2)");
1021 /* This is a minor variant. It takes an arg of some type and returns
1022 a value of the same type. The result consists entirely of Defined
1023 (zero) bits except its least significant bit, which is a PCast of
1024 the entire argument down to a single bit. */
1025 static IRAtom
* mkPCastXXtoXXlsb ( MCEnv
* mce
, IRAtom
* varg
, IRType ty
)
1027 if (ty
== Ity_V128
) {
1028 /* --- Case for V128 --- */
1029 IRAtom
* varg128
= varg
;
1030 // generates: PCast-to-I64(varg128)
1031 IRAtom
* pcdTo64
= mkPCastTo(mce
, Ity_I64
, varg128
);
1032 // Now introduce zeros (defined bits) in the top 63 places
1033 // generates: Def--(63)--Def PCast-to-I1(varg128)
1035 = assignNew('V', mce
, Ity_I64
, binop(Iop_And64
, pcdTo64
, mkU64(1)));
1036 // generates: Def--(64)--Def
1038 = definedOfType(Ity_I64
);
1039 // generates: Def--(127)--Def PCast-to-I1(varg128)
1041 = assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, d64
, d63pc
));
1044 if (ty
== Ity_I64
) {
1045 /* --- Case for I64 --- */
1047 IRAtom
* pcd
= mkPCastTo(mce
, Ity_I64
, varg
);
1048 // Zero (Def) out the top 63 bits
1050 = assignNew('V', mce
, Ity_I64
, binop(Iop_And64
, pcd
, mkU64(1)));
1057 /* --------- Optimistic casts. --------- */
1059 /* The function takes and returns an expression of type TY. If any of the
1060 VBITS indicate defined (value == 0) the resulting expression has all bits
1061 set to 0. Otherwise, all bits are 1. In words, if any bits are defined
1062 then all bits are made to be defined.
1064 In short we compute (vbits - (vbits >>u 1)) >>s (bitsize(vbits)-1).
1066 static IRAtom
* mkOCastAt( MCEnv
* mce
, IRType ty
, IRAtom
* vbits
)
1068 IROp opSUB
, opSHR
, opSAR
;
1073 opSUB
= Iop_Sub64
; opSHR
= Iop_Shr64
; opSAR
= Iop_Sar64
; sh
= 63;
1076 opSUB
= Iop_Sub32
; opSHR
= Iop_Shr32
; opSAR
= Iop_Sar32
; sh
= 31;
1079 opSUB
= Iop_Sub16
; opSHR
= Iop_Shr16
; opSAR
= Iop_Sar16
; sh
= 15;
1082 opSUB
= Iop_Sub8
; opSHR
= Iop_Shr8
; opSAR
= Iop_Sar8
; sh
= 7;
1086 VG_(tool_panic
)("mkOCastTo");
1090 shr1
= assignNew('V', mce
,ty
, binop(opSHR
, vbits
, mkU8(1)));
1091 at
= assignNew('V', mce
,ty
, binop(opSUB
, vbits
, shr1
));
1092 at
= assignNew('V', mce
,ty
, binop(opSAR
, at
, mkU8(sh
)));
1097 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
1099 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
1100 PCasting to Ity_U1. However, sometimes it is necessary to be more
1101 accurate. The insight is that the result is defined if two
1102 corresponding bits can be found, one from each argument, so that
1103 both bits are defined but are different -- that makes EQ say "No"
1104 and NE say "Yes". Hence, we compute an improvement term and DifD
1105 it onto the "normal" (UifU) result.
1120 vec contains 0 (defined) bits where the corresponding arg bits
1121 are defined but different, and 1 bits otherwise.
1123 vec = Or<sz>( vxx, // 0 iff bit defined
1124 vyy, // 0 iff bit defined
1125 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
1128 If any bit of vec is 0, the result is defined and so the
1129 improvement term should produce 0...0, else it should produce
1132 Hence require for the improvement term:
1134 OCast(vec) = if vec == 1...1 then 1...1 else 0...0
1136 which you can think of as an "optimistic cast" (OCast, the opposite of
1137 the normal "pessimistic cast" (PCast) family. An OCast says all bits
1138 are defined if any bit is defined.
1140 It is possible to show that
1142 if vec == 1...1 then 1...1 else 0...0
1144 can be implemented in straight-line code as
1146 (vec - (vec >>u 1)) >>s (word-size-in-bits - 1)
1148 We note that vec contains the sub-term Or<sz>(vxx, vyy). Since UifU is
1149 implemented with Or (since 1 signifies undefinedness), this is a
1150 duplicate of the UifU<sz>(vxx, vyy) term and so we can CSE it out, giving
1153 let naive = UifU<sz>(vxx, vyy)
1154 vec = Or<sz>(naive, Not<sz>(Xor<sz)(xx, yy))
1156 PCastTo<1>( DifD<sz>(naive, OCast<sz>(vec)) )
1158 This was extensively re-analysed and checked on 6 July 05 and again
1161 static IRAtom
* expensiveCmpEQorNE ( MCEnv
* mce
,
1163 IRAtom
* vxx
, IRAtom
* vyy
,
1164 IRAtom
* xx
, IRAtom
* yy
)
1166 IRAtom
*naive
, *vec
, *improved
, *final_cast
;
1167 IROp opDIFD
, opUIFU
, opOR
, opXOR
, opNOT
;
1169 tl_assert(isShadowAtom(mce
,vxx
));
1170 tl_assert(isShadowAtom(mce
,vyy
));
1171 tl_assert(isOriginalAtom(mce
,xx
));
1172 tl_assert(isOriginalAtom(mce
,yy
));
1173 tl_assert(sameKindedAtoms(vxx
,xx
));
1174 tl_assert(sameKindedAtoms(vyy
,yy
));
1206 VG_(tool_panic
)("expensiveCmpEQorNE");
1210 = assignNew('V', mce
, ty
, binop(opUIFU
, vxx
, vyy
));
1220 assignNew('V', mce
,ty
, binop(opXOR
, xx
, yy
))))));
1223 = assignNew( 'V', mce
,ty
,
1224 binop(opDIFD
, naive
, mkOCastAt(mce
, ty
, vec
)));
1227 = mkPCastTo( mce
, Ity_I1
, improved
);
1233 /* --------- Semi-accurate interpretation of CmpORD. --------- */
1235 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
1237 CmpORD32S(x,y) = 1<<3 if x <s y
1241 and similarly the unsigned variant. The default interpretation is:
1243 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
1246 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
1247 are zero and therefore defined (viz, zero).
1249 Also deal with a special case better:
1253 Here, bit 3 (LT) of the result is a copy of the top bit of x and
1254 will be defined even if the rest of x isn't. In which case we do:
1256 CmpORD32S#(x,x#,0,{impliedly 0}#)
1257 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
1258 | (x# >>u 31) << 3 -- LT# = x#[31]
1260 Analogous handling for CmpORD64{S,U}.
1262 static Bool
isZeroU32 ( IRAtom
* e
)
1265 toBool( e
->tag
== Iex_Const
1266 && e
->Iex
.Const
.con
->tag
== Ico_U32
1267 && e
->Iex
.Const
.con
->Ico
.U32
== 0 );
1270 static Bool
isZeroU64 ( IRAtom
* e
)
1273 toBool( e
->tag
== Iex_Const
1274 && e
->Iex
.Const
.con
->tag
== Ico_U64
1275 && e
->Iex
.Const
.con
->Ico
.U64
== 0 );
1278 static IRAtom
* doCmpORD ( MCEnv
* mce
,
1280 IRAtom
* xxhash
, IRAtom
* yyhash
,
1281 IRAtom
* xx
, IRAtom
* yy
)
1283 Bool m64
= cmp_op
== Iop_CmpORD64S
|| cmp_op
== Iop_CmpORD64U
;
1284 Bool syned
= cmp_op
== Iop_CmpORD64S
|| cmp_op
== Iop_CmpORD32S
;
1285 IROp opOR
= m64
? Iop_Or64
: Iop_Or32
;
1286 IROp opAND
= m64
? Iop_And64
: Iop_And32
;
1287 IROp opSHL
= m64
? Iop_Shl64
: Iop_Shl32
;
1288 IROp opSHR
= m64
? Iop_Shr64
: Iop_Shr32
;
1289 IRType ty
= m64
? Ity_I64
: Ity_I32
;
1290 Int width
= m64
? 64 : 32;
1292 Bool (*isZero
)(IRAtom
*) = m64
? isZeroU64
: isZeroU32
;
1294 IRAtom
* threeLeft1
= NULL
;
1295 IRAtom
* sevenLeft1
= NULL
;
1297 tl_assert(isShadowAtom(mce
,xxhash
));
1298 tl_assert(isShadowAtom(mce
,yyhash
));
1299 tl_assert(isOriginalAtom(mce
,xx
));
1300 tl_assert(isOriginalAtom(mce
,yy
));
1301 tl_assert(sameKindedAtoms(xxhash
,xx
));
1302 tl_assert(sameKindedAtoms(yyhash
,yy
));
1303 tl_assert(cmp_op
== Iop_CmpORD32S
|| cmp_op
== Iop_CmpORD32U
1304 || cmp_op
== Iop_CmpORD64S
|| cmp_op
== Iop_CmpORD64U
);
1307 ppIROp(cmp_op
); VG_(printf
)(" ");
1308 ppIRExpr(xx
); VG_(printf
)(" "); ppIRExpr( yy
); VG_(printf
)("\n");
1311 if (syned
&& isZero(yy
)) {
1312 /* fancy interpretation */
1313 /* if yy is zero, then it must be fully defined (zero#). */
1314 tl_assert(isZero(yyhash
));
1315 threeLeft1
= m64
? mkU64(3<<1) : mkU32(3<<1);
1323 mkPCastTo(mce
,ty
, xxhash
),
1332 binop(opSHR
, xxhash
, mkU8(width
-1))),
1337 /* standard interpretation */
1338 sevenLeft1
= m64
? mkU64(7<<1) : mkU32(7<<1);
1343 mkUifU(mce
,ty
, xxhash
,yyhash
)),
1350 /*------------------------------------------------------------*/
1351 /*--- Emit a test and complaint if something is undefined. ---*/
1352 /*------------------------------------------------------------*/
1354 static IRAtom
* schemeE ( MCEnv
* mce
, IRExpr
* e
); /* fwds */
1357 /* Set the annotations on a dirty helper to indicate that the stack
1358 pointer and instruction pointers might be read. This is the
1359 behaviour of all 'emit-a-complaint' style functions we might
1362 static void setHelperAnns ( MCEnv
* mce
, IRDirty
* di
) {
1364 di
->fxState
[0].fx
= Ifx_Read
;
1365 di
->fxState
[0].offset
= mce
->layout
->offset_SP
;
1366 di
->fxState
[0].size
= mce
->layout
->sizeof_SP
;
1367 di
->fxState
[0].nRepeats
= 0;
1368 di
->fxState
[0].repeatLen
= 0;
1369 di
->fxState
[1].fx
= Ifx_Read
;
1370 di
->fxState
[1].offset
= mce
->layout
->offset_IP
;
1371 di
->fxState
[1].size
= mce
->layout
->sizeof_IP
;
1372 di
->fxState
[1].nRepeats
= 0;
1373 di
->fxState
[1].repeatLen
= 0;
1377 /* Check the supplied *original* |atom| for undefinedness, and emit a
1378 complaint if so. Once that happens, mark it as defined. This is
1379 possible because the atom is either a tmp or literal. If it's a
1380 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1381 be defined. In fact as mentioned above, we will have to allocate a
1382 new tmp to carry the new 'defined' shadow value, and update the
1383 original->tmp mapping accordingly; we cannot simply assign a new
1384 value to an existing shadow tmp as this breaks SSAness.
1386 The checks are performed, any resulting complaint emitted, and
1387 |atom|'s shadow temp set to 'defined', ONLY in the case that
1388 |guard| evaluates to True at run-time. If it evaluates to False
1389 then no action is performed. If |guard| is NULL (the usual case)
1390 then it is assumed to be always-true, and hence these actions are
1391 performed unconditionally.
1393 This routine does not generate code to check the definedness of
1394 |guard|. The caller is assumed to have taken care of that already.
1396 static void complainIfUndefined ( MCEnv
* mce
, IRAtom
* atom
, IRExpr
*guard
)
1409 // Don't do V bit tests if we're not reporting undefined value errors.
1410 if (MC_(clo_mc_level
) == 1)
1414 tl_assert(isOriginalAtom(mce
, guard
));
1416 /* Since the original expression is atomic, there's no duplicated
1417 work generated by making multiple V-expressions for it. So we
1418 don't really care about the possibility that someone else may
1419 also create a V-interpretion for it. */
1420 tl_assert(isOriginalAtom(mce
, atom
));
1421 vatom
= expr2vbits( mce
, atom
, HuOth
);
1422 tl_assert(isShadowAtom(mce
, vatom
));
1423 tl_assert(sameKindedAtoms(atom
, vatom
));
1425 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vatom
);
1427 /* sz is only used for constructing the error message */
1428 sz
= ty
==Ity_I1
? 0 : sizeofIRType(ty
);
1430 cond
= mkPCastTo( mce
, Ity_I1
, vatom
);
1431 /* cond will be 0 if all defined, and 1 if any not defined. */
1433 /* Get the origin info for the value we are about to check. At
1434 least, if we are doing origin tracking. If not, use a dummy
1436 if (MC_(clo_mc_level
) == 3) {
1437 origin
= schemeE( mce
, atom
);
1438 if (mce
->hWordTy
== Ity_I64
) {
1439 origin
= assignNew( 'B', mce
, Ity_I64
, unop(Iop_32Uto64
, origin
) );
1453 fn
= &MC_(helperc_value_check0_fail_w_o
);
1454 nm
= "MC_(helperc_value_check0_fail_w_o)";
1455 args
= mkIRExprVec_1(origin
);
1458 fn
= &MC_(helperc_value_check0_fail_no_o
);
1459 nm
= "MC_(helperc_value_check0_fail_no_o)";
1460 args
= mkIRExprVec_0();
1466 fn
= &MC_(helperc_value_check1_fail_w_o
);
1467 nm
= "MC_(helperc_value_check1_fail_w_o)";
1468 args
= mkIRExprVec_1(origin
);
1471 fn
= &MC_(helperc_value_check1_fail_no_o
);
1472 nm
= "MC_(helperc_value_check1_fail_no_o)";
1473 args
= mkIRExprVec_0();
1479 fn
= &MC_(helperc_value_check4_fail_w_o
);
1480 nm
= "MC_(helperc_value_check4_fail_w_o)";
1481 args
= mkIRExprVec_1(origin
);
1484 fn
= &MC_(helperc_value_check4_fail_no_o
);
1485 nm
= "MC_(helperc_value_check4_fail_no_o)";
1486 args
= mkIRExprVec_0();
1492 fn
= &MC_(helperc_value_check8_fail_w_o
);
1493 nm
= "MC_(helperc_value_check8_fail_w_o)";
1494 args
= mkIRExprVec_1(origin
);
1497 fn
= &MC_(helperc_value_check8_fail_no_o
);
1498 nm
= "MC_(helperc_value_check8_fail_no_o)";
1499 args
= mkIRExprVec_0();
1506 fn
= &MC_(helperc_value_checkN_fail_w_o
);
1507 nm
= "MC_(helperc_value_checkN_fail_w_o)";
1508 args
= mkIRExprVec_2( mkIRExpr_HWord( sz
), origin
);
1511 fn
= &MC_(helperc_value_checkN_fail_no_o
);
1512 nm
= "MC_(helperc_value_checkN_fail_no_o)";
1513 args
= mkIRExprVec_1( mkIRExpr_HWord( sz
) );
1518 VG_(tool_panic
)("unexpected szB");
1524 tl_assert(nargs
>= 0 && nargs
<= 2);
1525 tl_assert( (MC_(clo_mc_level
) == 3 && origin
!= NULL
)
1526 || (MC_(clo_mc_level
) == 2 && origin
== NULL
) );
1528 di
= unsafeIRDirty_0_N( nargs
/*regparms*/, nm
,
1529 VG_(fnptr_to_fnentry
)( fn
), args
);
1530 di
->guard
= cond
; // and cond is PCast-to-1(atom#)
1532 /* If the complaint is to be issued under a guard condition, AND
1533 that into the guard condition for the helper call. */
1535 IRAtom
*g1
= assignNew('V', mce
, Ity_I32
, unop(Iop_1Uto32
, di
->guard
));
1536 IRAtom
*g2
= assignNew('V', mce
, Ity_I32
, unop(Iop_1Uto32
, guard
));
1537 IRAtom
*e
= assignNew('V', mce
, Ity_I32
, binop(Iop_And32
, g1
, g2
));
1538 di
->guard
= assignNew('V', mce
, Ity_I1
, unop(Iop_32to1
, e
));
1541 setHelperAnns( mce
, di
);
1542 stmt( 'V', mce
, IRStmt_Dirty(di
));
1544 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1545 defined -- but only in the case where the guard evaluates to
1546 True at run-time. Do the update by setting the orig->shadow
1547 mapping for tmp to reflect the fact that this shadow is getting
1549 tl_assert(isIRAtom(vatom
));
1550 /* sameKindedAtoms ... */
1551 if (vatom
->tag
== Iex_RdTmp
) {
1552 tl_assert(atom
->tag
== Iex_RdTmp
);
1553 if (guard
== NULL
) {
1554 // guard is 'always True', hence update unconditionally
1555 newShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
);
1556 assign('V', mce
, findShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
),
1559 // update the temp only conditionally. Do this by copying
1560 // its old value when the guard is False.
1562 IRTemp old_tmpV
= findShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
);
1563 newShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
);
1565 = assignNew('V', mce
, shadowTypeV(ty
),
1566 IRExpr_ITE(guard
, definedOfType(ty
),
1568 assign('V', mce
, findShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
), new_tmpV
);
1574 /*------------------------------------------------------------*/
1575 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1576 /*------------------------------------------------------------*/
1578 /* Examine the always-defined sections declared in layout to see if
1579 the (offset,size) section is within one. Note, is is an error to
1580 partially fall into such a region: (offset,size) should either be
1581 completely in such a region or completely not-in such a region.
1583 static Bool
isAlwaysDefd ( MCEnv
* mce
, Int offset
, Int size
)
1585 Int minoffD
, maxoffD
, i
;
1586 Int minoff
= offset
;
1587 Int maxoff
= minoff
+ size
- 1;
1588 tl_assert((minoff
& ~0xFFFF) == 0);
1589 tl_assert((maxoff
& ~0xFFFF) == 0);
1591 for (i
= 0; i
< mce
->layout
->n_alwaysDefd
; i
++) {
1592 minoffD
= mce
->layout
->alwaysDefd
[i
].offset
;
1593 maxoffD
= minoffD
+ mce
->layout
->alwaysDefd
[i
].size
- 1;
1594 tl_assert((minoffD
& ~0xFFFF) == 0);
1595 tl_assert((maxoffD
& ~0xFFFF) == 0);
1597 if (maxoff
< minoffD
|| maxoffD
< minoff
)
1598 continue; /* no overlap */
1599 if (minoff
>= minoffD
&& maxoff
<= maxoffD
)
1600 return True
; /* completely contained in an always-defd section */
1602 VG_(tool_panic
)("memcheck:isAlwaysDefd:partial overlap");
1604 return False
; /* could not find any containing section */
1608 /* Generate into bb suitable actions to shadow this Put. If the state
1609 slice is marked 'always defined', do nothing. Otherwise, write the
1610 supplied V bits to the shadow state. We can pass in either an
1611 original atom or a V-atom, but not both. In the former case the
1612 relevant V-bits are then generated from the original.
1613 We assume here, that the definedness of GUARD has already been checked.
1616 void do_shadow_PUT ( MCEnv
* mce
, Int offset
,
1617 IRAtom
* atom
, IRAtom
* vatom
, IRExpr
*guard
)
1621 // Don't do shadow PUTs if we're not doing undefined value checking.
1622 // Their absence lets Vex's optimiser remove all the shadow computation
1623 // that they depend on, which includes GETs of the shadow registers.
1624 if (MC_(clo_mc_level
) == 1)
1629 tl_assert(isOriginalAtom(mce
, atom
));
1630 vatom
= expr2vbits( mce
, atom
, HuOth
);
1633 tl_assert(isShadowAtom(mce
, vatom
));
1636 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vatom
);
1637 tl_assert(ty
!= Ity_I1
);
1638 if (isAlwaysDefd(mce
, offset
, sizeofIRType(ty
))) {
1640 /* emit code to emit a complaint if any of the vbits are 1. */
1641 /* complainIfUndefined(mce, atom); */
1643 /* Do a plain shadow Put. */
1645 /* If the guard expression evaluates to false we simply Put the value
1646 that is already stored in the guest state slot */
1647 IRAtom
*cond
, *iffalse
;
1649 cond
= assignNew('V', mce
, Ity_I1
, guard
);
1650 iffalse
= assignNew('V', mce
, ty
,
1651 IRExpr_Get(offset
+ mce
->layout
->total_sizeB
, ty
));
1652 vatom
= assignNew('V', mce
, ty
, IRExpr_ITE(cond
, vatom
, iffalse
));
1654 stmt( 'V', mce
, IRStmt_Put( offset
+ mce
->layout
->total_sizeB
, vatom
));
1659 /* Return an expression which contains the V bits corresponding to the
1660 given GETI (passed in in pieces).
1663 void do_shadow_PUTI ( MCEnv
* mce
, IRPutI
*puti
)
1668 IRRegArray
* descr
= puti
->descr
;
1669 IRAtom
* ix
= puti
->ix
;
1670 Int bias
= puti
->bias
;
1671 IRAtom
* atom
= puti
->data
;
1673 // Don't do shadow PUTIs if we're not doing undefined value checking.
1674 // Their absence lets Vex's optimiser remove all the shadow computation
1675 // that they depend on, which includes GETIs of the shadow registers.
1676 if (MC_(clo_mc_level
) == 1)
1679 tl_assert(isOriginalAtom(mce
,atom
));
1680 vatom
= expr2vbits( mce
, atom
, HuOth
);
1681 tl_assert(sameKindedAtoms(atom
, vatom
));
1683 tyS
= shadowTypeV(ty
);
1684 arrSize
= descr
->nElems
* sizeofIRType(ty
);
1685 tl_assert(ty
!= Ity_I1
);
1686 tl_assert(isOriginalAtom(mce
,ix
));
1687 complainIfUndefined(mce
, ix
, NULL
);
1688 if (isAlwaysDefd(mce
, descr
->base
, arrSize
)) {
1690 /* emit code to emit a complaint if any of the vbits are 1. */
1691 /* complainIfUndefined(mce, atom); */
1693 /* Do a cloned version of the Put that refers to the shadow
1695 IRRegArray
* new_descr
1696 = mkIRRegArray( descr
->base
+ mce
->layout
->total_sizeB
,
1697 tyS
, descr
->nElems
);
1698 stmt( 'V', mce
, IRStmt_PutI( mkIRPutI(new_descr
, ix
, bias
, vatom
) ));
1703 /* Return an expression which contains the V bits corresponding to the
1704 given GET (passed in in pieces).
1707 IRExpr
* shadow_GET ( MCEnv
* mce
, Int offset
, IRType ty
)
1709 IRType tyS
= shadowTypeV(ty
);
1710 tl_assert(ty
!= Ity_I1
);
1711 tl_assert(ty
!= Ity_I128
);
1712 if (isAlwaysDefd(mce
, offset
, sizeofIRType(ty
))) {
1713 /* Always defined, return all zeroes of the relevant type */
1714 return definedOfType(tyS
);
1716 /* return a cloned version of the Get that refers to the shadow
1718 /* FIXME: this isn't an atom! */
1719 return IRExpr_Get( offset
+ mce
->layout
->total_sizeB
, tyS
);
1724 /* Return an expression which contains the V bits corresponding to the
1725 given GETI (passed in in pieces).
1728 IRExpr
* shadow_GETI ( MCEnv
* mce
,
1729 IRRegArray
* descr
, IRAtom
* ix
, Int bias
)
1731 IRType ty
= descr
->elemTy
;
1732 IRType tyS
= shadowTypeV(ty
);
1733 Int arrSize
= descr
->nElems
* sizeofIRType(ty
);
1734 tl_assert(ty
!= Ity_I1
);
1735 tl_assert(isOriginalAtom(mce
,ix
));
1736 complainIfUndefined(mce
, ix
, NULL
);
1737 if (isAlwaysDefd(mce
, descr
->base
, arrSize
)) {
1738 /* Always defined, return all zeroes of the relevant type */
1739 return definedOfType(tyS
);
1741 /* return a cloned version of the Get that refers to the shadow
1743 IRRegArray
* new_descr
1744 = mkIRRegArray( descr
->base
+ mce
->layout
->total_sizeB
,
1745 tyS
, descr
->nElems
);
1746 return IRExpr_GetI( new_descr
, ix
, bias
);
1751 /*------------------------------------------------------------*/
1752 /*--- Generating approximations for unknown operations, ---*/
1753 /*--- using lazy-propagate semantics ---*/
1754 /*------------------------------------------------------------*/
1756 /* Lazy propagation of undefinedness from two values, resulting in the
1757 specified shadow type.
1760 IRAtom
* mkLazy2 ( MCEnv
* mce
, IRType finalVty
, IRAtom
* va1
, IRAtom
* va2
)
1763 IRType t1
= typeOfIRExpr(mce
->sb
->tyenv
, va1
);
1764 IRType t2
= typeOfIRExpr(mce
->sb
->tyenv
, va2
);
1765 tl_assert(isShadowAtom(mce
,va1
));
1766 tl_assert(isShadowAtom(mce
,va2
));
1768 /* The general case is inefficient because PCast is an expensive
1769 operation. Here are some special cases which use PCast only
1770 once rather than twice. */
1772 /* I64 x I64 -> I64 */
1773 if (t1
== Ity_I64
&& t2
== Ity_I64
&& finalVty
== Ity_I64
) {
1774 if (0) VG_(printf
)("mkLazy2: I64 x I64 -> I64\n");
1775 at
= mkUifU(mce
, Ity_I64
, va1
, va2
);
1776 at
= mkPCastTo(mce
, Ity_I64
, at
);
1780 /* I64 x I64 -> I32 */
1781 if (t1
== Ity_I64
&& t2
== Ity_I64
&& finalVty
== Ity_I32
) {
1782 if (0) VG_(printf
)("mkLazy2: I64 x I64 -> I32\n");
1783 at
= mkUifU(mce
, Ity_I64
, va1
, va2
);
1784 at
= mkPCastTo(mce
, Ity_I32
, at
);
1788 /* I32 x I32 -> I32 */
1789 if (t1
== Ity_I32
&& t2
== Ity_I32
&& finalVty
== Ity_I32
) {
1790 if (0) VG_(printf
)("mkLazy2: I32 x I32 -> I32\n");
1791 at
= mkUifU(mce
, Ity_I32
, va1
, va2
);
1792 at
= mkPCastTo(mce
, Ity_I32
, at
);
1797 VG_(printf
)("mkLazy2 ");
1806 /* General case: force everything via 32-bit intermediaries. */
1807 at
= mkPCastTo(mce
, Ity_I32
, va1
);
1808 at
= mkUifU(mce
, Ity_I32
, at
, mkPCastTo(mce
, Ity_I32
, va2
));
1809 at
= mkPCastTo(mce
, finalVty
, at
);
1814 /* 3-arg version of the above. */
1816 IRAtom
* mkLazy3 ( MCEnv
* mce
, IRType finalVty
,
1817 IRAtom
* va1
, IRAtom
* va2
, IRAtom
* va3
)
1820 IRType t1
= typeOfIRExpr(mce
->sb
->tyenv
, va1
);
1821 IRType t2
= typeOfIRExpr(mce
->sb
->tyenv
, va2
);
1822 IRType t3
= typeOfIRExpr(mce
->sb
->tyenv
, va3
);
1823 tl_assert(isShadowAtom(mce
,va1
));
1824 tl_assert(isShadowAtom(mce
,va2
));
1825 tl_assert(isShadowAtom(mce
,va3
));
1827 /* The general case is inefficient because PCast is an expensive
1828 operation. Here are some special cases which use PCast only
1829 twice rather than three times. */
1831 /* I32 x I64 x I64 -> I64 */
1832 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1833 if (t1
== Ity_I32
&& t2
== Ity_I64
&& t3
== Ity_I64
1834 && finalVty
== Ity_I64
) {
1835 if (0) VG_(printf
)("mkLazy3: I32 x I64 x I64 -> I64\n");
1836 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1837 mode indication which is fully defined, this should get
1838 folded out later. */
1839 at
= mkPCastTo(mce
, Ity_I64
, va1
);
1840 /* Now fold in 2nd and 3rd args. */
1841 at
= mkUifU(mce
, Ity_I64
, at
, va2
);
1842 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
1843 /* and PCast once again. */
1844 at
= mkPCastTo(mce
, Ity_I64
, at
);
1848 /* I32 x I8 x I64 -> I64 */
1849 if (t1
== Ity_I32
&& t2
== Ity_I8
&& t3
== Ity_I64
1850 && finalVty
== Ity_I64
) {
1851 if (0) VG_(printf
)("mkLazy3: I32 x I8 x I64 -> I64\n");
1852 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1853 * rounding mode indication which is fully defined, this should
1854 * get folded out later.
1856 IRAtom
* at1
= mkPCastTo(mce
, Ity_I64
, va1
);
1857 IRAtom
* at2
= mkPCastTo(mce
, Ity_I64
, va2
);
1858 at
= mkUifU(mce
, Ity_I64
, at1
, at2
); // UifU(PCast(va1), PCast(va2))
1859 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
1860 /* and PCast once again. */
1861 at
= mkPCastTo(mce
, Ity_I64
, at
);
1865 /* I32 x I64 x I64 -> I32 */
1866 if (t1
== Ity_I32
&& t2
== Ity_I64
&& t3
== Ity_I64
1867 && finalVty
== Ity_I32
) {
1868 if (0) VG_(printf
)("mkLazy3: I32 x I64 x I64 -> I32\n");
1869 at
= mkPCastTo(mce
, Ity_I64
, va1
);
1870 at
= mkUifU(mce
, Ity_I64
, at
, va2
);
1871 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
1872 at
= mkPCastTo(mce
, Ity_I32
, at
);
1876 /* I32 x I32 x I32 -> I32 */
1877 /* 32-bit FP idiom, as (eg) happens on ARM */
1878 if (t1
== Ity_I32
&& t2
== Ity_I32
&& t3
== Ity_I32
1879 && finalVty
== Ity_I32
) {
1880 if (0) VG_(printf
)("mkLazy3: I32 x I32 x I32 -> I32\n");
1882 at
= mkUifU(mce
, Ity_I32
, at
, va2
);
1883 at
= mkUifU(mce
, Ity_I32
, at
, va3
);
1884 at
= mkPCastTo(mce
, Ity_I32
, at
);
1888 /* I32 x I128 x I128 -> I128 */
1889 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1890 if (t1
== Ity_I32
&& t2
== Ity_I128
&& t3
== Ity_I128
1891 && finalVty
== Ity_I128
) {
1892 if (0) VG_(printf
)("mkLazy3: I32 x I128 x I128 -> I128\n");
1893 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1894 mode indication which is fully defined, this should get
1895 folded out later. */
1896 at
= mkPCastTo(mce
, Ity_I128
, va1
);
1897 /* Now fold in 2nd and 3rd args. */
1898 at
= mkUifU(mce
, Ity_I128
, at
, va2
);
1899 at
= mkUifU(mce
, Ity_I128
, at
, va3
);
1900 /* and PCast once again. */
1901 at
= mkPCastTo(mce
, Ity_I128
, at
);
1905 /* I32 x I8 x I128 -> I128 */
1906 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1907 if (t1
== Ity_I32
&& t2
== Ity_I8
&& t3
== Ity_I128
1908 && finalVty
== Ity_I128
) {
1909 if (0) VG_(printf
)("mkLazy3: I32 x I8 x I128 -> I128\n");
1910 /* Use I64 as an intermediate type, which means PCasting all 3
1911 args to I64 to start with. 1st arg is typically a rounding
1912 mode indication which is fully defined, so we hope that it
1913 will get folded out later. */
1914 IRAtom
* at1
= mkPCastTo(mce
, Ity_I64
, va1
);
1915 IRAtom
* at2
= mkPCastTo(mce
, Ity_I64
, va2
);
1916 IRAtom
* at3
= mkPCastTo(mce
, Ity_I64
, va3
);
1917 /* Now UifU all three together. */
1918 at
= mkUifU(mce
, Ity_I64
, at1
, at2
); // UifU(PCast(va1), PCast(va2))
1919 at
= mkUifU(mce
, Ity_I64
, at
, at3
); // ... `UifU` PCast(va3)
1920 /* and PCast once again. */
1921 at
= mkPCastTo(mce
, Ity_I128
, at
);
1925 VG_(printf
)("mkLazy3: ");
1931 VG_(printf
)(" -> ");
1937 /* General case: force everything via 32-bit intermediaries. */
1939 at = mkPCastTo(mce, Ity_I32, va1);
1940 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1941 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1942 at = mkPCastTo(mce, finalVty, at);
1948 /* 4-arg version of the above. */
1950 IRAtom
* mkLazy4 ( MCEnv
* mce
, IRType finalVty
,
1951 IRAtom
* va1
, IRAtom
* va2
, IRAtom
* va3
, IRAtom
* va4
)
1954 IRType t1
= typeOfIRExpr(mce
->sb
->tyenv
, va1
);
1955 IRType t2
= typeOfIRExpr(mce
->sb
->tyenv
, va2
);
1956 IRType t3
= typeOfIRExpr(mce
->sb
->tyenv
, va3
);
1957 IRType t4
= typeOfIRExpr(mce
->sb
->tyenv
, va4
);
1958 tl_assert(isShadowAtom(mce
,va1
));
1959 tl_assert(isShadowAtom(mce
,va2
));
1960 tl_assert(isShadowAtom(mce
,va3
));
1961 tl_assert(isShadowAtom(mce
,va4
));
1963 /* The general case is inefficient because PCast is an expensive
1964 operation. Here are some special cases which use PCast only
1965 twice rather than three times. */
1967 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1969 if (t1
== Ity_I32
&& t2
== Ity_I128
&& t3
== Ity_I128
&& t4
== Ity_I128
1970 && finalVty
== Ity_I128
) {
1971 if (0) VG_(printf
)("mkLazy4: I32 x I128 x I128 x I128 -> I128\n");
1972 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1973 mode indication which is fully defined, this should get
1974 folded out later. */
1975 at
= mkPCastTo(mce
, Ity_I128
, va1
);
1976 /* Now fold in 2nd, 3rd, 4th args. */
1977 at
= mkUifU(mce
, Ity_I128
, at
, va2
);
1978 at
= mkUifU(mce
, Ity_I128
, at
, va3
);
1979 at
= mkUifU(mce
, Ity_I128
, at
, va4
);
1980 /* and PCast once again. */
1981 at
= mkPCastTo(mce
, Ity_I128
, at
);
1985 /* I32 x I64 x I64 x I64 -> I64 */
1986 if (t1
== Ity_I32
&& t2
== Ity_I64
&& t3
== Ity_I64
&& t4
== Ity_I64
1987 && finalVty
== Ity_I64
) {
1988 if (0) VG_(printf
)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1989 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1990 mode indication which is fully defined, this should get
1991 folded out later. */
1992 at
= mkPCastTo(mce
, Ity_I64
, va1
);
1993 /* Now fold in 2nd, 3rd, 4th args. */
1994 at
= mkUifU(mce
, Ity_I64
, at
, va2
);
1995 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
1996 at
= mkUifU(mce
, Ity_I64
, at
, va4
);
1997 /* and PCast once again. */
1998 at
= mkPCastTo(mce
, Ity_I64
, at
);
2001 /* I32 x I32 x I32 x I32 -> I32 */
2002 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
2003 if (t1
== Ity_I32
&& t2
== Ity_I32
&& t3
== Ity_I32
&& t4
== Ity_I32
2004 && finalVty
== Ity_I32
) {
2005 if (0) VG_(printf
)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
2007 /* Now fold in 2nd, 3rd, 4th args. */
2008 at
= mkUifU(mce
, Ity_I32
, at
, va2
);
2009 at
= mkUifU(mce
, Ity_I32
, at
, va3
);
2010 at
= mkUifU(mce
, Ity_I32
, at
, va4
);
2011 at
= mkPCastTo(mce
, Ity_I32
, at
);
2015 if (t1
== Ity_I32
&& t2
== Ity_I8
&& t3
== Ity_I8
&& t4
== Ity_I8
2016 && finalVty
== Ity_I32
) {
2017 if (0) VG_(printf
)("mkLazy4: I32 x I8 x I8 x I8 -> I32\n");
2018 at
= mkPCastTo(mce
, Ity_I8
, va1
);
2019 /* Now fold in 2nd, 3rd, 4th args. */
2020 at
= mkUifU(mce
, Ity_I8
, at
, va2
);
2021 at
= mkUifU(mce
, Ity_I8
, at
, va3
);
2022 at
= mkUifU(mce
, Ity_I8
, at
, va4
);
2023 at
= mkPCastTo(mce
, Ity_I32
, at
);
2027 if (t1
== Ity_I64
&& t2
== Ity_I8
&& t3
== Ity_I8
&& t4
== Ity_I8
2028 && finalVty
== Ity_I64
) {
2029 if (0) VG_(printf
)("mkLazy4: I64 x I8 x I8 x I8 -> I64\n");
2030 at
= mkPCastTo(mce
, Ity_I8
, va1
);
2031 /* Now fold in 2nd, 3rd, 4th args. */
2032 at
= mkUifU(mce
, Ity_I8
, at
, va2
);
2033 at
= mkUifU(mce
, Ity_I8
, at
, va3
);
2034 at
= mkUifU(mce
, Ity_I8
, at
, va4
);
2035 at
= mkPCastTo(mce
, Ity_I64
, at
);
2040 VG_(printf
)("mkLazy4: ");
2048 VG_(printf
)(" -> ");
2057 /* Do the lazy propagation game from a null-terminated vector of
2058 atoms. This is presumably the arguments to a helper call, so the
2059 IRCallee info is also supplied in order that we can know which
2060 arguments should be ignored (via the .mcx_mask field).
2063 IRAtom
* mkLazyN ( MCEnv
* mce
,
2064 IRAtom
** exprvec
, IRType finalVtype
, IRCallee
* cee
)
2070 Bool mergeTy64
= True
;
2072 /* Decide on the type of the merge intermediary. If all relevant
2073 args are I64, then it's I64. In all other circumstances, use
2075 for (i
= 0; exprvec
[i
]; i
++) {
2077 tl_assert(isOriginalAtom(mce
, exprvec
[i
]));
2078 if (cee
->mcx_mask
& (1<<i
))
2080 if (typeOfIRExpr(mce
->sb
->tyenv
, exprvec
[i
]) != Ity_I64
)
2084 mergeTy
= mergeTy64
? Ity_I64
: Ity_I32
;
2085 curr
= definedOfType(mergeTy
);
2087 for (i
= 0; exprvec
[i
]; i
++) {
2089 tl_assert(isOriginalAtom(mce
, exprvec
[i
]));
2090 /* Only take notice of this arg if the callee's mc-exclusion
2091 mask does not say it is to be excluded. */
2092 if (cee
->mcx_mask
& (1<<i
)) {
2093 /* the arg is to be excluded from definedness checking. Do
2095 if (0) VG_(printf
)("excluding %s(%d)\n", cee
->name
, i
);
2097 /* calculate the arg's definedness, and pessimistically merge
2099 here
= mkPCastTo( mce
, mergeTy
, expr2vbits(mce
, exprvec
[i
], HuOth
) );
2101 ? mkUifU64(mce
, here
, curr
)
2102 : mkUifU32(mce
, here
, curr
);
2105 return mkPCastTo(mce
, finalVtype
, curr
);
2109 /*------------------------------------------------------------*/
2110 /*--- Generating expensive sequences for exact carry-chain ---*/
2111 /*--- propagation in add/sub and related operations. ---*/
2112 /*------------------------------------------------------------*/
2115 IRAtom
* expensiveAddSub ( MCEnv
* mce
,
2118 IRAtom
* qaa
, IRAtom
* qbb
,
2119 IRAtom
* aa
, IRAtom
* bb
)
2121 IRAtom
*a_min
, *b_min
, *a_max
, *b_max
;
2122 IROp opAND
, opOR
, opXOR
, opNOT
, opADD
, opSUB
;
2124 tl_assert(isShadowAtom(mce
,qaa
));
2125 tl_assert(isShadowAtom(mce
,qbb
));
2126 tl_assert(isOriginalAtom(mce
,aa
));
2127 tl_assert(isOriginalAtom(mce
,bb
));
2128 tl_assert(sameKindedAtoms(qaa
,aa
));
2129 tl_assert(sameKindedAtoms(qbb
,bb
));
2149 VG_(tool_panic
)("expensiveAddSub");
2152 // a_min = aa & ~qaa
2153 a_min
= assignNew('V', mce
,ty
,
2155 assignNew('V', mce
,ty
, unop(opNOT
, qaa
))));
2157 // b_min = bb & ~qbb
2158 b_min
= assignNew('V', mce
,ty
,
2160 assignNew('V', mce
,ty
, unop(opNOT
, qbb
))));
2163 a_max
= assignNew('V', mce
,ty
, binop(opOR
, aa
, qaa
));
2166 b_max
= assignNew('V', mce
,ty
, binop(opOR
, bb
, qbb
));
2169 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
2171 assignNew('V', mce
,ty
,
2173 assignNew('V', mce
,ty
, binop(opOR
, qaa
, qbb
)),
2174 assignNew('V', mce
,ty
,
2176 assignNew('V', mce
,ty
, binop(opADD
, a_min
, b_min
)),
2177 assignNew('V', mce
,ty
, binop(opADD
, a_max
, b_max
))
2183 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max - b_min))
2185 assignNew('V', mce
,ty
,
2187 assignNew('V', mce
,ty
, binop(opOR
, qaa
, qbb
)),
2188 assignNew('V', mce
,ty
,
2190 assignNew('V', mce
,ty
, binop(opSUB
, a_min
, b_max
)),
2191 assignNew('V', mce
,ty
, binop(opSUB
, a_max
, b_min
))
2202 IRAtom
* expensiveCountTrailingZeroes ( MCEnv
* mce
, IROp czop
,
2203 IRAtom
* atom
, IRAtom
* vatom
)
2206 IROp xorOp
, subOp
, andOp
;
2208 IRAtom
*improver
, *improved
;
2209 tl_assert(isShadowAtom(mce
,vatom
));
2210 tl_assert(isOriginalAtom(mce
,atom
));
2211 tl_assert(sameKindedAtoms(atom
,vatom
));
2230 VG_(tool_panic
)("memcheck:expensiveCountTrailingZeroes");
2233 // improver = atom ^ (atom - 1)
2235 // That is, improver has its low ctz(atom) bits equal to one;
2236 // higher bits (if any) equal to zero.
2237 improver
= assignNew('V', mce
,ty
,
2240 assignNew('V', mce
, ty
,
2241 binop(subOp
, atom
, one
))));
2243 // improved = vatom & improver
2245 // That is, treat any V bits above the first ctz(atom) bits as
2247 improved
= assignNew('V', mce
, ty
,
2248 binop(andOp
, vatom
, improver
));
2250 // Return pessimizing cast of improved.
2251 return mkPCastTo(mce
, ty
, improved
);
2255 /*------------------------------------------------------------*/
2256 /*--- Scalar shifts. ---*/
2257 /*------------------------------------------------------------*/
2259 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
2260 idea is to shift the definedness bits by the original shift amount.
2261 This introduces 0s ("defined") in new positions for left shifts and
2262 unsigned right shifts, and copies the top definedness bit for
2263 signed right shifts. So, conveniently, applying the original shift
2264 operator to the definedness bits for the left arg is exactly the
2269 However if the shift amount is undefined then the whole result
2270 is undefined. Hence need:
2272 (qaa << bb) `UifU` PCast(qbb)
2274 If the shift amount bb is a literal than qbb will say 'all defined'
2275 and the UifU and PCast will get folded out by post-instrumentation
2278 static IRAtom
* scalarShift ( MCEnv
* mce
,
2281 IRAtom
* qaa
, IRAtom
* qbb
,
2282 IRAtom
* aa
, IRAtom
* bb
)
2284 tl_assert(isShadowAtom(mce
,qaa
));
2285 tl_assert(isShadowAtom(mce
,qbb
));
2286 tl_assert(isOriginalAtom(mce
,aa
));
2287 tl_assert(isOriginalAtom(mce
,bb
));
2288 tl_assert(sameKindedAtoms(qaa
,aa
));
2289 tl_assert(sameKindedAtoms(qbb
,bb
));
2294 assignNew('V', mce
, ty
, binop(original_op
, qaa
, bb
)),
2295 mkPCastTo(mce
, ty
, qbb
)
2301 /*------------------------------------------------------------*/
2302 /*--- Helpers for dealing with vector primops. ---*/
2303 /*------------------------------------------------------------*/
2305 /* Vector pessimisation -- pessimise within each lane individually. */
2307 static IRAtom
* mkPCast8x16 ( MCEnv
* mce
, IRAtom
* at
)
2309 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ8x16
, at
));
2312 static IRAtom
* mkPCast16x8 ( MCEnv
* mce
, IRAtom
* at
)
2314 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ16x8
, at
));
2317 static IRAtom
* mkPCast32x4 ( MCEnv
* mce
, IRAtom
* at
)
2319 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ32x4
, at
));
2322 static IRAtom
* mkPCast64x2 ( MCEnv
* mce
, IRAtom
* at
)
2324 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ64x2
, at
));
2327 static IRAtom
* mkPCast64x4 ( MCEnv
* mce
, IRAtom
* at
)
2329 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ64x4
, at
));
2332 static IRAtom
* mkPCast32x8 ( MCEnv
* mce
, IRAtom
* at
)
2334 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ32x8
, at
));
2337 static IRAtom
* mkPCast32x2 ( MCEnv
* mce
, IRAtom
* at
)
2339 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpNEZ32x2
, at
));
2342 static IRAtom
* mkPCast16x16 ( MCEnv
* mce
, IRAtom
* at
)
2344 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ16x16
, at
));
2347 static IRAtom
* mkPCast16x4 ( MCEnv
* mce
, IRAtom
* at
)
2349 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpNEZ16x4
, at
));
2352 static IRAtom
* mkPCast8x32 ( MCEnv
* mce
, IRAtom
* at
)
2354 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ8x32
, at
));
2357 static IRAtom
* mkPCast8x8 ( MCEnv
* mce
, IRAtom
* at
)
2359 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpNEZ8x8
, at
));
2362 static IRAtom
* mkPCast16x2 ( MCEnv
* mce
, IRAtom
* at
)
2364 return assignNew('V', mce
, Ity_I32
, unop(Iop_CmpNEZ16x2
, at
));
2367 static IRAtom
* mkPCast8x4 ( MCEnv
* mce
, IRAtom
* at
)
2369 return assignNew('V', mce
, Ity_I32
, unop(Iop_CmpNEZ8x4
, at
));
2373 /* Here's a simple scheme capable of handling ops derived from SSE1
2374 code and while only generating ops that can be efficiently
2375 implemented in SSE1. */
2377 /* All-lanes versions are straightforward:
2379 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
2381 unary32Fx4(x,y) ==> PCast32x4(x#)
2383 Lowest-lane-only versions are more complex:
2385 binary32F0x4(x,y) ==> SetV128lo32(
2387 PCast32(V128to32(UifUV128(x#,y#)))
2390 This is perhaps not so obvious. In particular, it's faster to
2391 do a V128-bit UifU and then take the bottom 32 bits than the more
2392 obvious scheme of taking the bottom 32 bits of each operand
2393 and doing a 32-bit UifU. Basically since UifU is fast and
2394 chopping lanes off vector values is slow.
2398 unary32F0x4(x) ==> SetV128lo32(
2400 PCast32(V128to32(x#))
2405 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2406 PCast32x4(v#) = CmpNEZ32x4(v#)
2410 IRAtom
* binary32Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2413 tl_assert(isShadowAtom(mce
, vatomX
));
2414 tl_assert(isShadowAtom(mce
, vatomY
));
2415 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2416 at
= assignNew('V', mce
, Ity_V128
, mkPCast32x4(mce
, at
));
2421 IRAtom
* unary32Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
)
2424 tl_assert(isShadowAtom(mce
, vatomX
));
2425 at
= assignNew('V', mce
, Ity_V128
, mkPCast32x4(mce
, vatomX
));
2430 IRAtom
* binary32F0x4 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2433 tl_assert(isShadowAtom(mce
, vatomX
));
2434 tl_assert(isShadowAtom(mce
, vatomY
));
2435 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2436 at
= assignNew('V', mce
, Ity_I32
, unop(Iop_V128to32
, at
));
2437 at
= mkPCastTo(mce
, Ity_I32
, at
);
2438 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo32
, vatomX
, at
));
2443 IRAtom
* unary32F0x4 ( MCEnv
* mce
, IRAtom
* vatomX
)
2446 tl_assert(isShadowAtom(mce
, vatomX
));
2447 at
= assignNew('V', mce
, Ity_I32
, unop(Iop_V128to32
, vatomX
));
2448 at
= mkPCastTo(mce
, Ity_I32
, at
);
2449 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo32
, vatomX
, at
));
2453 /* --- ... and ... 64Fx2 versions of the same ... --- */
2456 IRAtom
* binary64Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2459 tl_assert(isShadowAtom(mce
, vatomX
));
2460 tl_assert(isShadowAtom(mce
, vatomY
));
2461 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2462 at
= assignNew('V', mce
, Ity_V128
, mkPCast64x2(mce
, at
));
2467 IRAtom
* unary64Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
)
2470 tl_assert(isShadowAtom(mce
, vatomX
));
2471 at
= assignNew('V', mce
, Ity_V128
, mkPCast64x2(mce
, vatomX
));
2476 IRAtom
* binary64F0x2 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2479 tl_assert(isShadowAtom(mce
, vatomX
));
2480 tl_assert(isShadowAtom(mce
, vatomY
));
2481 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2482 at
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, at
));
2483 at
= mkPCastTo(mce
, Ity_I64
, at
);
2484 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo64
, vatomX
, at
));
2489 IRAtom
* unary64F0x2 ( MCEnv
* mce
, IRAtom
* vatomX
)
2492 tl_assert(isShadowAtom(mce
, vatomX
));
2493 at
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, vatomX
));
2494 at
= mkPCastTo(mce
, Ity_I64
, at
);
2495 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo64
, vatomX
, at
));
2499 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
2502 IRAtom
* binary32Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2505 tl_assert(isShadowAtom(mce
, vatomX
));
2506 tl_assert(isShadowAtom(mce
, vatomY
));
2507 at
= mkUifU64(mce
, vatomX
, vatomY
);
2508 at
= assignNew('V', mce
, Ity_I64
, mkPCast32x2(mce
, at
));
2513 IRAtom
* unary32Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
)
2516 tl_assert(isShadowAtom(mce
, vatomX
));
2517 at
= assignNew('V', mce
, Ity_I64
, mkPCast32x2(mce
, vatomX
));
2521 /* --- ... and ... 64Fx4 versions of the same ... --- */
2524 IRAtom
* binary64Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2527 tl_assert(isShadowAtom(mce
, vatomX
));
2528 tl_assert(isShadowAtom(mce
, vatomY
));
2529 at
= mkUifUV256(mce
, vatomX
, vatomY
);
2530 at
= assignNew('V', mce
, Ity_V256
, mkPCast64x4(mce
, at
));
2535 IRAtom
* unary64Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
)
2538 tl_assert(isShadowAtom(mce
, vatomX
));
2539 at
= assignNew('V', mce
, Ity_V256
, mkPCast64x4(mce
, vatomX
));
2543 /* --- ... and ... 32Fx8 versions of the same ... --- */
2546 IRAtom
* binary32Fx8 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2549 tl_assert(isShadowAtom(mce
, vatomX
));
2550 tl_assert(isShadowAtom(mce
, vatomY
));
2551 at
= mkUifUV256(mce
, vatomX
, vatomY
);
2552 at
= assignNew('V', mce
, Ity_V256
, mkPCast32x8(mce
, at
));
2557 IRAtom
* unary32Fx8 ( MCEnv
* mce
, IRAtom
* vatomX
)
2560 tl_assert(isShadowAtom(mce
, vatomX
));
2561 at
= assignNew('V', mce
, Ity_V256
, mkPCast32x8(mce
, vatomX
));
2565 /* --- 64Fx2 binary FP ops, with rounding mode --- */
2568 IRAtom
* binary64Fx2_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2569 IRAtom
* vatomX
, IRAtom
* vatomY
)
2571 /* This is the same as binary64Fx2, except that we subsequently
2572 pessimise vRM (definedness of the rounding mode), widen to 128
2573 bits and UifU it into the result. As with the scalar cases, if
2574 the RM is a constant then it is defined and so this extra bit
2575 will get constant-folded out later. */
2576 // "do" the vector args
2577 IRAtom
* t1
= binary64Fx2(mce
, vatomX
, vatomY
);
2578 // PCast the RM, and widen it to 128 bits
2579 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2580 // Roll it into the result
2581 t1
= mkUifUV128(mce
, t1
, t2
);
2585 /* --- ... and ... 32Fx4 versions of the same --- */
2588 IRAtom
* binary32Fx4_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2589 IRAtom
* vatomX
, IRAtom
* vatomY
)
2591 IRAtom
* t1
= binary32Fx4(mce
, vatomX
, vatomY
);
2592 // PCast the RM, and widen it to 128 bits
2593 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2594 // Roll it into the result
2595 t1
= mkUifUV128(mce
, t1
, t2
);
2599 /* --- ... and ... 64Fx4 versions of the same --- */
2602 IRAtom
* binary64Fx4_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2603 IRAtom
* vatomX
, IRAtom
* vatomY
)
2605 IRAtom
* t1
= binary64Fx4(mce
, vatomX
, vatomY
);
2606 // PCast the RM, and widen it to 256 bits
2607 IRAtom
* t2
= mkPCastTo(mce
, Ity_V256
, vRM
);
2608 // Roll it into the result
2609 t1
= mkUifUV256(mce
, t1
, t2
);
2613 /* --- ... and ... 32Fx8 versions of the same --- */
2616 IRAtom
* binary32Fx8_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2617 IRAtom
* vatomX
, IRAtom
* vatomY
)
2619 IRAtom
* t1
= binary32Fx8(mce
, vatomX
, vatomY
);
2620 // PCast the RM, and widen it to 256 bits
2621 IRAtom
* t2
= mkPCastTo(mce
, Ity_V256
, vRM
);
2622 // Roll it into the result
2623 t1
= mkUifUV256(mce
, t1
, t2
);
2627 /* --- 64Fx2 unary FP ops, with rounding mode --- */
2630 IRAtom
* unary64Fx2_w_rm ( MCEnv
* mce
, IRAtom
* vRM
, IRAtom
* vatomX
)
2632 /* Same scheme as binary64Fx2_w_rm. */
2633 // "do" the vector arg
2634 IRAtom
* t1
= unary64Fx2(mce
, vatomX
);
2635 // PCast the RM, and widen it to 128 bits
2636 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2637 // Roll it into the result
2638 t1
= mkUifUV128(mce
, t1
, t2
);
2642 /* --- ... and ... 32Fx4 versions of the same --- */
2645 IRAtom
* unary32Fx4_w_rm ( MCEnv
* mce
, IRAtom
* vRM
, IRAtom
* vatomX
)
2647 /* Same scheme as unary32Fx4_w_rm. */
2648 IRAtom
* t1
= unary32Fx4(mce
, vatomX
);
2649 // PCast the RM, and widen it to 128 bits
2650 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2651 // Roll it into the result
2652 t1
= mkUifUV128(mce
, t1
, t2
);
2657 /* --- --- Vector saturated narrowing --- --- */
2659 /* We used to do something very clever here, but on closer inspection
2660 (2011-Jun-15), and in particular bug #279698, it turns out to be
2661 wrong. Part of the problem came from the fact that for a long
2662 time, the IR primops to do with saturated narrowing were
2663 underspecified and managed to confuse multiple cases which needed
2664 to be separate: the op names had a signedness qualifier, but in
2665 fact the source and destination signednesses needed to be specified
2666 independently, so the op names really need two independent
2667 signedness specifiers.
2669 As of 2011-Jun-15 (ish) the underspecification was sorted out
2670 properly. The incorrect instrumentation remained, though. That
2671 has now (2011-Oct-22) been fixed.
2673 What we now do is simple:
2675 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2676 number of lanes, X is the source lane width and signedness, and Y
2677 is the destination lane width and signedness. In all cases the
2678 destination lane width is half the source lane width, so the names
2679 have a bit of redundancy, but are at least easy to read.
2681 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2684 Let Vanilla(OP) be a function that takes OP, one of these
2685 saturating narrowing ops, and produces the same "shaped" narrowing
2686 op which is not saturating, but merely dumps the most significant
2687 bits. "same shape" means that the lane numbers and widths are the
2690 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2691 = Iop_NarrowBin32to16x8,
2692 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2693 dumping the top half of each lane.
2695 So, with that in place, the scheme is simple, and it is simple to
2696 pessimise each lane individually and then apply Vanilla(OP) so as
2697 to get the result in the right "shape". If the original OP is
2698 QNarrowBinXtoYxZ then we produce
2700 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2702 or for the case when OP is unary (Iop_QNarrowUn*)
2704 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2707 IROp
vanillaNarrowingOpOfShape ( IROp qnarrowOp
)
2709 switch (qnarrowOp
) {
2710 /* Binary: (128, 128) -> 128 */
2711 case Iop_QNarrowBin16Sto8Ux16
:
2712 case Iop_QNarrowBin16Sto8Sx16
:
2713 case Iop_QNarrowBin16Uto8Ux16
:
2714 case Iop_QNarrowBin64Sto32Sx4
:
2715 case Iop_QNarrowBin64Uto32Ux4
:
2716 return Iop_NarrowBin16to8x16
;
2717 case Iop_QNarrowBin32Sto16Ux8
:
2718 case Iop_QNarrowBin32Sto16Sx8
:
2719 case Iop_QNarrowBin32Uto16Ux8
:
2720 return Iop_NarrowBin32to16x8
;
2721 /* Binary: (64, 64) -> 64 */
2722 case Iop_QNarrowBin32Sto16Sx4
:
2723 return Iop_NarrowBin32to16x4
;
2724 case Iop_QNarrowBin16Sto8Ux8
:
2725 case Iop_QNarrowBin16Sto8Sx8
:
2726 return Iop_NarrowBin16to8x8
;
2727 /* Unary: 128 -> 64 */
2728 case Iop_QNarrowUn64Uto32Ux2
:
2729 case Iop_QNarrowUn64Sto32Sx2
:
2730 case Iop_QNarrowUn64Sto32Ux2
:
2731 return Iop_NarrowUn64to32x2
;
2732 case Iop_QNarrowUn32Uto16Ux4
:
2733 case Iop_QNarrowUn32Sto16Sx4
:
2734 case Iop_QNarrowUn32Sto16Ux4
:
2735 case Iop_F32toF16x4
:
2736 return Iop_NarrowUn32to16x4
;
2737 case Iop_QNarrowUn16Uto8Ux8
:
2738 case Iop_QNarrowUn16Sto8Sx8
:
2739 case Iop_QNarrowUn16Sto8Ux8
:
2740 return Iop_NarrowUn16to8x8
;
2743 VG_(tool_panic
)("vanillaNarrowOpOfShape");
2748 IRAtom
* vectorNarrowBinV128 ( MCEnv
* mce
, IROp narrow_op
,
2749 IRAtom
* vatom1
, IRAtom
* vatom2
)
2751 IRAtom
*at1
, *at2
, *at3
;
2752 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
2753 switch (narrow_op
) {
2754 case Iop_QNarrowBin64Sto32Sx4
: pcast
= mkPCast32x4
; break;
2755 case Iop_QNarrowBin64Uto32Ux4
: pcast
= mkPCast32x4
; break;
2756 case Iop_QNarrowBin32Sto16Sx8
: pcast
= mkPCast32x4
; break;
2757 case Iop_QNarrowBin32Uto16Ux8
: pcast
= mkPCast32x4
; break;
2758 case Iop_QNarrowBin32Sto16Ux8
: pcast
= mkPCast32x4
; break;
2759 case Iop_QNarrowBin16Sto8Sx16
: pcast
= mkPCast16x8
; break;
2760 case Iop_QNarrowBin16Uto8Ux16
: pcast
= mkPCast16x8
; break;
2761 case Iop_QNarrowBin16Sto8Ux16
: pcast
= mkPCast16x8
; break;
2762 default: VG_(tool_panic
)("vectorNarrowBinV128");
2764 IROp vanilla_narrow
= vanillaNarrowingOpOfShape(narrow_op
);
2765 tl_assert(isShadowAtom(mce
,vatom1
));
2766 tl_assert(isShadowAtom(mce
,vatom2
));
2767 at1
= assignNew('V', mce
, Ity_V128
, pcast(mce
, vatom1
));
2768 at2
= assignNew('V', mce
, Ity_V128
, pcast(mce
, vatom2
));
2769 at3
= assignNew('V', mce
, Ity_V128
, binop(vanilla_narrow
, at1
, at2
));
2774 IRAtom
* vectorNarrowBin64 ( MCEnv
* mce
, IROp narrow_op
,
2775 IRAtom
* vatom1
, IRAtom
* vatom2
)
2777 IRAtom
*at1
, *at2
, *at3
;
2778 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
2779 switch (narrow_op
) {
2780 case Iop_QNarrowBin32Sto16Sx4
: pcast
= mkPCast32x2
; break;
2781 case Iop_QNarrowBin16Sto8Sx8
: pcast
= mkPCast16x4
; break;
2782 case Iop_QNarrowBin16Sto8Ux8
: pcast
= mkPCast16x4
; break;
2783 default: VG_(tool_panic
)("vectorNarrowBin64");
2785 IROp vanilla_narrow
= vanillaNarrowingOpOfShape(narrow_op
);
2786 tl_assert(isShadowAtom(mce
,vatom1
));
2787 tl_assert(isShadowAtom(mce
,vatom2
));
2788 at1
= assignNew('V', mce
, Ity_I64
, pcast(mce
, vatom1
));
2789 at2
= assignNew('V', mce
, Ity_I64
, pcast(mce
, vatom2
));
2790 at3
= assignNew('V', mce
, Ity_I64
, binop(vanilla_narrow
, at1
, at2
));
2795 IRAtom
* vectorNarrowUnV128 ( MCEnv
* mce
, IROp narrow_op
,
2799 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
2800 tl_assert(isShadowAtom(mce
,vatom1
));
2801 /* For vanilla narrowing (non-saturating), we can just apply
2802 the op directly to the V bits. */
2803 switch (narrow_op
) {
2804 case Iop_NarrowUn16to8x8
:
2805 case Iop_NarrowUn32to16x4
:
2806 case Iop_NarrowUn64to32x2
:
2807 case Iop_F32toF16x4
:
2808 at1
= assignNew('V', mce
, Ity_I64
, unop(narrow_op
, vatom1
));
2811 break; /* Do Plan B */
2813 /* Plan B: for ops that involve a saturation operation on the args,
2814 we must PCast before the vanilla narrow. */
2815 switch (narrow_op
) {
2816 case Iop_QNarrowUn16Sto8Sx8
: pcast
= mkPCast16x8
; break;
2817 case Iop_QNarrowUn16Sto8Ux8
: pcast
= mkPCast16x8
; break;
2818 case Iop_QNarrowUn16Uto8Ux8
: pcast
= mkPCast16x8
; break;
2819 case Iop_QNarrowUn32Sto16Sx4
: pcast
= mkPCast32x4
; break;
2820 case Iop_QNarrowUn32Sto16Ux4
: pcast
= mkPCast32x4
; break;
2821 case Iop_QNarrowUn32Uto16Ux4
: pcast
= mkPCast32x4
; break;
2822 case Iop_QNarrowUn64Sto32Sx2
: pcast
= mkPCast64x2
; break;
2823 case Iop_QNarrowUn64Sto32Ux2
: pcast
= mkPCast64x2
; break;
2824 case Iop_QNarrowUn64Uto32Ux2
: pcast
= mkPCast64x2
; break;
2825 default: VG_(tool_panic
)("vectorNarrowUnV128");
2827 IROp vanilla_narrow
= vanillaNarrowingOpOfShape(narrow_op
);
2828 at1
= assignNew('V', mce
, Ity_V128
, pcast(mce
, vatom1
));
2829 at2
= assignNew('V', mce
, Ity_I64
, unop(vanilla_narrow
, at1
));
2834 IRAtom
* vectorWidenI64 ( MCEnv
* mce
, IROp longen_op
,
2838 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
2839 switch (longen_op
) {
2840 case Iop_Widen8Uto16x8
: pcast
= mkPCast16x8
; break;
2841 case Iop_Widen8Sto16x8
: pcast
= mkPCast16x8
; break;
2842 case Iop_Widen16Uto32x4
: pcast
= mkPCast32x4
; break;
2843 case Iop_Widen16Sto32x4
: pcast
= mkPCast32x4
; break;
2844 case Iop_Widen32Uto64x2
: pcast
= mkPCast64x2
; break;
2845 case Iop_Widen32Sto64x2
: pcast
= mkPCast64x2
; break;
2846 case Iop_F16toF32x4
: pcast
= mkPCast32x4
; break;
2847 default: VG_(tool_panic
)("vectorWidenI64");
2849 tl_assert(isShadowAtom(mce
,vatom1
));
2850 at1
= assignNew('V', mce
, Ity_V128
, unop(longen_op
, vatom1
));
2851 at2
= assignNew('V', mce
, Ity_V128
, pcast(mce
, at1
));
2856 /* --- --- Vector integer arithmetic --- --- */
2858 /* Simple ... UifU the args and per-lane pessimise the results. */
2860 /* --- V256-bit versions --- */
2863 IRAtom
* binary8Ix32 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2866 at
= mkUifUV256(mce
, vatom1
, vatom2
);
2867 at
= mkPCast8x32(mce
, at
);
2872 IRAtom
* binary16Ix16 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2875 at
= mkUifUV256(mce
, vatom1
, vatom2
);
2876 at
= mkPCast16x16(mce
, at
);
2881 IRAtom
* binary32Ix8 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2884 at
= mkUifUV256(mce
, vatom1
, vatom2
);
2885 at
= mkPCast32x8(mce
, at
);
2890 IRAtom
* binary64Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2893 at
= mkUifUV256(mce
, vatom1
, vatom2
);
2894 at
= mkPCast64x4(mce
, at
);
2898 /* --- V128-bit versions --- */
2901 IRAtom
* binary8Ix16 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2904 at
= mkUifUV128(mce
, vatom1
, vatom2
);
2905 at
= mkPCast8x16(mce
, at
);
2910 IRAtom
* binary16Ix8 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2913 at
= mkUifUV128(mce
, vatom1
, vatom2
);
2914 at
= mkPCast16x8(mce
, at
);
2919 IRAtom
* binary32Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2922 at
= mkUifUV128(mce
, vatom1
, vatom2
);
2923 at
= mkPCast32x4(mce
, at
);
2928 IRAtom
* binary64Ix2 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2931 at
= mkUifUV128(mce
, vatom1
, vatom2
);
2932 at
= mkPCast64x2(mce
, at
);
2936 /* --- 64-bit versions --- */
2939 IRAtom
* binary8Ix8 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2942 at
= mkUifU64(mce
, vatom1
, vatom2
);
2943 at
= mkPCast8x8(mce
, at
);
2948 IRAtom
* binary16Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2951 at
= mkUifU64(mce
, vatom1
, vatom2
);
2952 at
= mkPCast16x4(mce
, at
);
2957 IRAtom
* binary32Ix2 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2960 at
= mkUifU64(mce
, vatom1
, vatom2
);
2961 at
= mkPCast32x2(mce
, at
);
2966 IRAtom
* binary64Ix1 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2969 at
= mkUifU64(mce
, vatom1
, vatom2
);
2970 at
= mkPCastTo(mce
, Ity_I64
, at
);
2974 /* --- 32-bit versions --- */
2977 IRAtom
* binary8Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2980 at
= mkUifU32(mce
, vatom1
, vatom2
);
2981 at
= mkPCast8x4(mce
, at
);
2986 IRAtom
* binary16Ix2 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2989 at
= mkUifU32(mce
, vatom1
, vatom2
);
2990 at
= mkPCast16x2(mce
, at
);
2995 /*------------------------------------------------------------*/
2996 /*--- Generate shadow values from all kinds of IRExprs. ---*/
2997 /*------------------------------------------------------------*/
3000 IRAtom
* expr2vbits_Qop ( MCEnv
* mce
,
3002 IRAtom
* atom1
, IRAtom
* atom2
,
3003 IRAtom
* atom3
, IRAtom
* atom4
)
3005 IRAtom
* vatom1
= expr2vbits( mce
, atom1
, HuOth
);
3006 IRAtom
* vatom2
= expr2vbits( mce
, atom2
, HuOth
);
3007 IRAtom
* vatom3
= expr2vbits( mce
, atom3
, HuOth
);
3008 IRAtom
* vatom4
= expr2vbits( mce
, atom4
, HuOth
);
3010 tl_assert(isOriginalAtom(mce
,atom1
));
3011 tl_assert(isOriginalAtom(mce
,atom2
));
3012 tl_assert(isOriginalAtom(mce
,atom3
));
3013 tl_assert(isOriginalAtom(mce
,atom4
));
3014 tl_assert(isShadowAtom(mce
,vatom1
));
3015 tl_assert(isShadowAtom(mce
,vatom2
));
3016 tl_assert(isShadowAtom(mce
,vatom3
));
3017 tl_assert(isShadowAtom(mce
,vatom4
));
3018 tl_assert(sameKindedAtoms(atom1
,vatom1
));
3019 tl_assert(sameKindedAtoms(atom2
,vatom2
));
3020 tl_assert(sameKindedAtoms(atom3
,vatom3
));
3021 tl_assert(sameKindedAtoms(atom4
,vatom4
));
3024 case Iop_MAddF64r32
:
3026 case Iop_MSubF64r32
:
3027 /* I32(rm) x F64 x F64 x F64 -> F64 */
3028 return mkLazy4(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
, vatom4
);
3032 /* I32(rm) x F32 x F32 x F32 -> F32 */
3033 return mkLazy4(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
, vatom4
);
3037 case Iop_NegMAddF128
:
3038 case Iop_NegMSubF128
:
3039 /* I32(rm) x F128 x F128 x F128 -> F128 */
3040 return mkLazy4(mce
, Ity_I128
, vatom1
, vatom2
, vatom3
, vatom4
);
3042 /* V256-bit data-steering */
3043 case Iop_64x4toV256
:
3044 return assignNew('V', mce
, Ity_V256
,
3045 IRExpr_Qop(op
, vatom1
, vatom2
, vatom3
, vatom4
));
3047 /* I32/I64 x I8 x I8 x I8 -> I32/I64 */
3049 return mkLazy4(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
, vatom4
);
3051 return mkLazy4(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
, vatom4
);
3054 VG_(tool_panic
)("memcheck:expr2vbits_Qop");
3060 IRAtom
* expr2vbits_Triop ( MCEnv
* mce
,
3062 IRAtom
* atom1
, IRAtom
* atom2
, IRAtom
* atom3
)
3064 IRAtom
* vatom1
= expr2vbits( mce
, atom1
, HuOth
);
3065 IRAtom
* vatom2
= expr2vbits( mce
, atom2
, HuOth
);
3066 IRAtom
* vatom3
= expr2vbits( mce
, atom3
, HuOth
);
3068 tl_assert(isOriginalAtom(mce
,atom1
));
3069 tl_assert(isOriginalAtom(mce
,atom2
));
3070 tl_assert(isOriginalAtom(mce
,atom3
));
3071 tl_assert(isShadowAtom(mce
,vatom1
));
3072 tl_assert(isShadowAtom(mce
,vatom2
));
3073 tl_assert(isShadowAtom(mce
,vatom3
));
3074 tl_assert(sameKindedAtoms(atom1
,vatom1
));
3075 tl_assert(sameKindedAtoms(atom2
,vatom2
));
3076 tl_assert(sameKindedAtoms(atom3
,vatom3
));
3086 case Iop_QuantizeD128
:
3087 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
3088 return mkLazy3(mce
, Ity_I128
, vatom1
, vatom2
, vatom3
);
3107 case Iop_QuantizeD64
:
3108 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
3109 return mkLazy3(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
);
3110 case Iop_PRemC3210F64
:
3111 case Iop_PRem1C3210F64
:
3112 /* I32(rm) x F64 x F64 -> I32 */
3113 return mkLazy3(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
);
3118 /* I32(rm) x F32 x F32 -> I32 */
3119 return mkLazy3(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
);
3120 case Iop_SignificanceRoundD64
:
3121 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
3122 return mkLazy3(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
);
3123 case Iop_SignificanceRoundD128
:
3124 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
3125 return mkLazy3(mce
, Ity_I128
, vatom1
, vatom2
, vatom3
);
3127 /* (V128, V128, I8) -> V128 */
3128 complainIfUndefined(mce
, atom3
, NULL
);
3129 return assignNew('V', mce
, Ity_V128
, triop(op
, vatom1
, vatom2
, atom3
));
3131 /* (I64, I64, I8) -> I64 */
3132 complainIfUndefined(mce
, atom3
, NULL
);
3133 return assignNew('V', mce
, Ity_I64
, triop(op
, vatom1
, vatom2
, atom3
));
3134 case Iop_SetElem8x8
:
3135 case Iop_SetElem16x4
:
3136 case Iop_SetElem32x2
:
3137 complainIfUndefined(mce
, atom2
, NULL
);
3138 return assignNew('V', mce
, Ity_I64
, triop(op
, vatom1
, atom2
, vatom3
));
3140 case Iop_SetElem8x16
:
3141 case Iop_SetElem16x8
:
3142 case Iop_SetElem32x4
:
3143 case Iop_SetElem64x2
:
3144 complainIfUndefined(mce
, atom2
, NULL
);
3145 return assignNew('V', mce
, Ity_V128
, triop(op
, vatom1
, atom2
, vatom3
));
3147 case Iop_Perm8x16x2
:
3148 /* (V128, V128, V128) -> V128 */
3149 complainIfUndefined(mce
, atom3
, NULL
);
3152 assignNew('V', mce
, Ity_V128
, triop(op
, vatom1
, vatom2
, atom3
)),
3153 mkPCast8x16(mce
, vatom3
)
3156 /* Vector FP with rounding mode as the first arg */
3161 case Iop_Scale2_64Fx2
:
3162 return binary64Fx2_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3168 case Iop_Scale2_32Fx4
:
3169 return binary32Fx4_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3175 return binary64Fx4_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3181 return binary32Fx8_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3183 case Iop_F32x4_2toQ16x8
:
3184 return assignNew('V', mce
, Ity_V128
,
3185 binop(Iop_PackEvenLanes16x8
,
3186 unary32Fx4_w_rm(mce
, vatom1
, vatom2
),
3187 unary32Fx4_w_rm(mce
, vatom1
, vatom3
)));
3188 case Iop_F64x2_2toQ32x4
:
3189 return assignNew('V', mce
, Ity_V128
,
3190 binop(Iop_PackEvenLanes32x4
,
3191 unary64Fx2_w_rm(mce
, vatom1
, vatom2
),
3192 unary64Fx2_w_rm(mce
, vatom1
, vatom3
)));
3197 VG_(tool_panic
)("memcheck:expr2vbits_Triop");
3203 IRAtom
* expr2vbits_Binop ( MCEnv
* mce
,
3205 IRAtom
* atom1
, IRAtom
* atom2
,
3206 HowUsed hu
/*use HuOth if unknown*/ )
3209 IRAtom
* (*uifu
) (MCEnv
*, IRAtom
*, IRAtom
*);
3210 IRAtom
* (*difd
) (MCEnv
*, IRAtom
*, IRAtom
*);
3211 IRAtom
* (*improve
) (MCEnv
*, IRAtom
*, IRAtom
*);
3213 IRAtom
* vatom1
= expr2vbits( mce
, atom1
, HuOth
);
3214 IRAtom
* vatom2
= expr2vbits( mce
, atom2
, HuOth
);
3216 tl_assert(isOriginalAtom(mce
,atom1
));
3217 tl_assert(isOriginalAtom(mce
,atom2
));
3218 tl_assert(isShadowAtom(mce
,vatom1
));
3219 tl_assert(isShadowAtom(mce
,vatom2
));
3220 tl_assert(sameKindedAtoms(atom1
,vatom1
));
3221 tl_assert(sameKindedAtoms(atom2
,vatom2
));
3236 return binary16Ix2(mce
, vatom1
, vatom2
);
3248 return binary8Ix4(mce
, vatom1
, vatom2
);
3261 /* Same scheme as with all other shifts. */
3262 complainIfUndefined(mce
, atom2
, NULL
);
3263 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
));
3265 case Iop_QNarrowBin32Sto16Sx4
:
3266 case Iop_QNarrowBin16Sto8Sx8
:
3267 case Iop_QNarrowBin16Sto8Ux8
:
3268 return vectorNarrowBin64(mce
, op
, vatom1
, vatom2
);
3287 case Iop_PolynomialMul8x8
:
3288 return binary8Ix8(mce
, vatom1
, vatom2
);
3299 case Iop_MulHi16Sx4
:
3300 case Iop_MulHi16Ux4
:
3301 case Iop_CmpGT16Sx4
:
3302 case Iop_CmpGT16Ux4
:
3309 case Iop_QDMulHi16Sx4
:
3310 case Iop_QRDMulHi16Sx4
:
3311 return binary16Ix4(mce
, vatom1
, vatom2
);
3319 case Iop_CmpGT32Sx2
:
3320 case Iop_CmpGT32Ux2
:
3329 case Iop_QDMulHi32Sx2
:
3330 case Iop_QRDMulHi32Sx2
:
3331 return binary32Ix2(mce
, vatom1
, vatom2
);
3340 return binary64Ix1(mce
, vatom1
, vatom2
);
3342 case Iop_QShlNsatSU8x8
:
3343 case Iop_QShlNsatUU8x8
:
3344 case Iop_QShlNsatSS8x8
:
3345 complainIfUndefined(mce
, atom2
, NULL
);
3346 return mkPCast8x8(mce
, vatom1
);
3348 case Iop_QShlNsatSU16x4
:
3349 case Iop_QShlNsatUU16x4
:
3350 case Iop_QShlNsatSS16x4
:
3351 complainIfUndefined(mce
, atom2
, NULL
);
3352 return mkPCast16x4(mce
, vatom1
);
3354 case Iop_QShlNsatSU32x2
:
3355 case Iop_QShlNsatUU32x2
:
3356 case Iop_QShlNsatSS32x2
:
3357 complainIfUndefined(mce
, atom2
, NULL
);
3358 return mkPCast32x2(mce
, vatom1
);
3360 case Iop_QShlNsatSU64x1
:
3361 case Iop_QShlNsatUU64x1
:
3362 case Iop_QShlNsatSS64x1
:
3363 complainIfUndefined(mce
, atom2
, NULL
);
3364 return mkPCast32x2(mce
, vatom1
);
3366 case Iop_PwMax32Sx2
:
3367 case Iop_PwMax32Ux2
:
3368 case Iop_PwMin32Sx2
:
3369 case Iop_PwMin32Ux2
:
3370 case Iop_PwMax32Fx2
:
3371 case Iop_PwMin32Fx2
:
3372 return assignNew('V', mce
, Ity_I64
,
3373 binop(Iop_PwMax32Ux2
,
3374 mkPCast32x2(mce
, vatom1
),
3375 mkPCast32x2(mce
, vatom2
)));
3377 case Iop_PwMax16Sx4
:
3378 case Iop_PwMax16Ux4
:
3379 case Iop_PwMin16Sx4
:
3380 case Iop_PwMin16Ux4
:
3381 return assignNew('V', mce
, Ity_I64
,
3382 binop(Iop_PwMax16Ux4
,
3383 mkPCast16x4(mce
, vatom1
),
3384 mkPCast16x4(mce
, vatom2
)));
3390 return assignNew('V', mce
, Ity_I64
,
3391 binop(Iop_PwMax8Ux8
,
3392 mkPCast8x8(mce
, vatom1
),
3393 mkPCast8x8(mce
, vatom2
)));
3396 case Iop_PwAdd32Fx2
:
3397 return mkPCast32x2(mce
,
3398 assignNew('V', mce
, Ity_I64
,
3399 binop(Iop_PwAdd32x2
,
3400 mkPCast32x2(mce
, vatom1
),
3401 mkPCast32x2(mce
, vatom2
))));
3404 return mkPCast16x4(mce
,
3405 assignNew('V', mce
, Ity_I64
,
3406 binop(op
, mkPCast16x4(mce
, vatom1
),
3407 mkPCast16x4(mce
, vatom2
))));
3410 return mkPCast8x8(mce
,
3411 assignNew('V', mce
, Ity_I64
,
3412 binop(op
, mkPCast8x8(mce
, vatom1
),
3413 mkPCast8x8(mce
, vatom2
))));
3419 return mkUifU64(mce
,
3420 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3421 mkPCast8x8(mce
,vatom2
)
3428 return mkUifU64(mce
,
3429 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3430 mkPCast16x4(mce
,vatom2
)
3437 return mkUifU64(mce
,
3438 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3439 mkPCast32x2(mce
,vatom2
)
3442 /* 64-bit data-steering */
3443 case Iop_InterleaveLO32x2
:
3444 case Iop_InterleaveLO16x4
:
3445 case Iop_InterleaveLO8x8
:
3446 case Iop_InterleaveHI32x2
:
3447 case Iop_InterleaveHI16x4
:
3448 case Iop_InterleaveHI8x8
:
3449 case Iop_CatOddLanes8x8
:
3450 case Iop_CatEvenLanes8x8
:
3451 case Iop_CatOddLanes16x4
:
3452 case Iop_CatEvenLanes16x4
:
3453 case Iop_InterleaveOddLanes8x8
:
3454 case Iop_InterleaveEvenLanes8x8
:
3455 case Iop_InterleaveOddLanes16x4
:
3456 case Iop_InterleaveEvenLanes16x4
:
3457 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, vatom2
));
3459 case Iop_GetElem8x8
:
3460 complainIfUndefined(mce
, atom2
, NULL
);
3461 return assignNew('V', mce
, Ity_I8
, binop(op
, vatom1
, atom2
));
3462 case Iop_GetElem16x4
:
3463 complainIfUndefined(mce
, atom2
, NULL
);
3464 return assignNew('V', mce
, Ity_I16
, binop(op
, vatom1
, atom2
));
3465 case Iop_GetElem32x2
:
3466 complainIfUndefined(mce
, atom2
, NULL
);
3467 return assignNew('V', mce
, Ity_I32
, binop(op
, vatom1
, atom2
));
3469 /* Perm8x8: rearrange values in left arg using steering values
3470 from right arg. So rearrange the vbits in the same way but
3471 pessimise wrt steering values. */
3475 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3476 mkPCast8x8(mce
, vatom2
)
3482 return unary32Fx4_w_rm(mce
, vatom1
, vatom2
);
3484 return unary64Fx2_w_rm(mce
, vatom1
, vatom2
);
3498 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3499 this is wrong now, scalar shifts are done properly lazily.
3500 Vector shifts should be fixed too. */
3501 complainIfUndefined(mce
, atom2
, NULL
);
3502 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
));
3504 /* V x V shifts/rotates are done using the standard lazy scheme. */
3505 /* For the non-rounding variants of bi-di vector x vector
3506 shifts (the Iop_Sh.. ops, that is) we use the lazy scheme.
3507 But note that this is overly pessimistic, because in fact only
3508 the bottom 8 bits of each lane of the second argument are taken
3509 into account when shifting. So really we ought to ignore
3510 undefinedness in bits 8 and above of each lane in the
3519 return mkUifUV128(mce
,
3520 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3521 mkPCast8x16(mce
,vatom2
)
3531 return mkUifUV128(mce
,
3532 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3533 mkPCast16x8(mce
,vatom2
)
3543 return mkUifUV128(mce
,
3544 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3545 mkPCast32x4(mce
,vatom2
)
3555 return mkUifUV128(mce
,
3556 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3557 mkPCast64x2(mce
,vatom2
)
3560 /* For the rounding variants of bi-di vector x vector shifts, the
3561 rounding adjustment can cause undefinedness to propagate through
3562 the entire lane, in the worst case. Too complex to handle
3563 properly .. just UifU the arguments and then PCast them.
3564 Suboptimal but safe. */
3567 return binary8Ix16(mce
, vatom1
, vatom2
);
3570 return binary16Ix8(mce
, vatom1
, vatom2
);
3573 return binary32Ix4(mce
, vatom1
, vatom2
);
3576 return binary64Ix2(mce
, vatom1
, vatom2
);
3578 case Iop_F32ToFixed32Ux4_RZ
:
3579 case Iop_F32ToFixed32Sx4_RZ
:
3580 case Iop_Fixed32UToF32x4_RN
:
3581 case Iop_Fixed32SToF32x4_RN
:
3582 complainIfUndefined(mce
, atom2
, NULL
);
3583 return mkPCast32x4(mce
, vatom1
);
3585 case Iop_F32ToFixed32Ux2_RZ
:
3586 case Iop_F32ToFixed32Sx2_RZ
:
3587 case Iop_Fixed32UToF32x2_RN
:
3588 case Iop_Fixed32SToF32x2_RN
:
3589 complainIfUndefined(mce
, atom2
, NULL
);
3590 return mkPCast32x2(mce
, vatom1
);
3599 case Iop_CmpGT8Sx16
:
3600 case Iop_CmpGT8Ux16
:
3606 case Iop_QAddExtUSsatSS8x16
:
3607 case Iop_QAddExtSUsatUU8x16
:
3612 case Iop_PolynomialMul8x16
:
3613 case Iop_PolynomialMulAdd8x16
:
3614 return binary8Ix16(mce
, vatom1
, vatom2
);
3620 case Iop_MulHi16Sx8
:
3621 case Iop_MulHi16Ux8
:
3626 case Iop_CmpGT16Sx8
:
3627 case Iop_CmpGT16Ux8
:
3633 case Iop_QAddExtUSsatSS16x8
:
3634 case Iop_QAddExtSUsatUU16x8
:
3638 case Iop_QDMulHi16Sx8
:
3639 case Iop_QRDMulHi16Sx8
:
3640 case Iop_PolynomialMulAdd16x8
:
3641 return binary16Ix8(mce
, vatom1
, vatom2
);
3644 case Iop_CmpGT32Sx4
:
3645 case Iop_CmpGT32Ux4
:
3651 case Iop_QAddExtUSsatSS32x4
:
3652 case Iop_QAddExtSUsatUU32x4
:
3663 case Iop_QDMulHi32Sx4
:
3664 case Iop_QRDMulHi32Sx4
:
3665 case Iop_PolynomialMulAdd32x4
:
3666 return binary32Ix4(mce
, vatom1
, vatom2
);
3675 case Iop_CmpGT64Sx2
:
3676 case Iop_CmpGT64Ux2
:
3683 case Iop_QAddExtUSsatSS64x2
:
3684 case Iop_QAddExtSUsatUU64x2
:
3685 case Iop_PolynomialMulAdd64x2
:
3686 case Iop_CipherV128
:
3687 case Iop_CipherLV128
:
3688 case Iop_NCipherV128
:
3689 case Iop_NCipherLV128
:
3690 case Iop_MulI128by10E
:
3691 case Iop_MulI128by10ECarry
:
3692 return binary64Ix2(mce
, vatom1
, vatom2
);
3694 case Iop_QNarrowBin64Sto32Sx4
:
3695 case Iop_QNarrowBin64Uto32Ux4
:
3696 case Iop_QNarrowBin32Sto16Sx8
:
3697 case Iop_QNarrowBin32Uto16Ux8
:
3698 case Iop_QNarrowBin32Sto16Ux8
:
3699 case Iop_QNarrowBin16Sto8Sx16
:
3700 case Iop_QNarrowBin16Uto8Ux16
:
3701 case Iop_QNarrowBin16Sto8Ux16
:
3702 return vectorNarrowBinV128(mce
, op
, vatom1
, vatom2
);
3706 case Iop_CmpLT64Fx2
:
3707 case Iop_CmpLE64Fx2
:
3708 case Iop_CmpEQ64Fx2
:
3709 case Iop_CmpUN64Fx2
:
3710 case Iop_RecipStep64Fx2
:
3711 case Iop_RSqrtStep64Fx2
:
3712 return binary64Fx2(mce
, vatom1
, vatom2
);
3719 case Iop_CmpLT64F0x2
:
3720 case Iop_CmpLE64F0x2
:
3721 case Iop_CmpEQ64F0x2
:
3722 case Iop_CmpUN64F0x2
:
3724 return binary64F0x2(mce
, vatom1
, vatom2
);
3728 case Iop_CmpLT32Fx4
:
3729 case Iop_CmpLE32Fx4
:
3730 case Iop_CmpEQ32Fx4
:
3731 case Iop_CmpUN32Fx4
:
3732 case Iop_CmpGT32Fx4
:
3733 case Iop_CmpGE32Fx4
:
3734 case Iop_RecipStep32Fx4
:
3735 case Iop_RSqrtStep32Fx4
:
3736 return binary32Fx4(mce
, vatom1
, vatom2
);
3742 case Iop_CmpEQ32Fx2
:
3743 case Iop_CmpGT32Fx2
:
3744 case Iop_CmpGE32Fx2
:
3746 case Iop_RecipStep32Fx2
:
3747 case Iop_RSqrtStep32Fx2
:
3748 return binary32Fx2(mce
, vatom1
, vatom2
);
3755 case Iop_CmpLT32F0x4
:
3756 case Iop_CmpLE32F0x4
:
3757 case Iop_CmpEQ32F0x4
:
3758 case Iop_CmpUN32F0x4
:
3760 return binary32F0x4(mce
, vatom1
, vatom2
);
3762 case Iop_QShlNsatSU8x16
:
3763 case Iop_QShlNsatUU8x16
:
3764 case Iop_QShlNsatSS8x16
:
3765 complainIfUndefined(mce
, atom2
, NULL
);
3766 return mkPCast8x16(mce
, vatom1
);
3768 case Iop_QShlNsatSU16x8
:
3769 case Iop_QShlNsatUU16x8
:
3770 case Iop_QShlNsatSS16x8
:
3771 complainIfUndefined(mce
, atom2
, NULL
);
3772 return mkPCast16x8(mce
, vatom1
);
3774 case Iop_QShlNsatSU32x4
:
3775 case Iop_QShlNsatUU32x4
:
3776 case Iop_QShlNsatSS32x4
:
3777 complainIfUndefined(mce
, atom2
, NULL
);
3778 return mkPCast32x4(mce
, vatom1
);
3780 case Iop_QShlNsatSU64x2
:
3781 case Iop_QShlNsatUU64x2
:
3782 case Iop_QShlNsatSS64x2
:
3783 complainIfUndefined(mce
, atom2
, NULL
);
3784 return mkPCast32x4(mce
, vatom1
);
3786 /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128.
3787 To make this simpler, do the following:
3788 * complain if the shift amount (the I8) is undefined
3789 * pcast each lane at the wide width
3790 * truncate each lane to half width
3791 * pcast the resulting 64-bit value to a single bit and use
3792 that as the least significant bit of the upper half of the
3794 case Iop_QandQShrNnarrow64Uto32Ux2
:
3795 case Iop_QandQSarNnarrow64Sto32Sx2
:
3796 case Iop_QandQSarNnarrow64Sto32Ux2
:
3797 case Iop_QandQRShrNnarrow64Uto32Ux2
:
3798 case Iop_QandQRSarNnarrow64Sto32Sx2
:
3799 case Iop_QandQRSarNnarrow64Sto32Ux2
:
3800 case Iop_QandQShrNnarrow32Uto16Ux4
:
3801 case Iop_QandQSarNnarrow32Sto16Sx4
:
3802 case Iop_QandQSarNnarrow32Sto16Ux4
:
3803 case Iop_QandQRShrNnarrow32Uto16Ux4
:
3804 case Iop_QandQRSarNnarrow32Sto16Sx4
:
3805 case Iop_QandQRSarNnarrow32Sto16Ux4
:
3806 case Iop_QandQShrNnarrow16Uto8Ux8
:
3807 case Iop_QandQSarNnarrow16Sto8Sx8
:
3808 case Iop_QandQSarNnarrow16Sto8Ux8
:
3809 case Iop_QandQRShrNnarrow16Uto8Ux8
:
3810 case Iop_QandQRSarNnarrow16Sto8Sx8
:
3811 case Iop_QandQRSarNnarrow16Sto8Ux8
:
3813 IRAtom
* (*fnPessim
) (MCEnv
*, IRAtom
*) = NULL
;
3814 IROp opNarrow
= Iop_INVALID
;
3816 case Iop_QandQShrNnarrow64Uto32Ux2
:
3817 case Iop_QandQSarNnarrow64Sto32Sx2
:
3818 case Iop_QandQSarNnarrow64Sto32Ux2
:
3819 case Iop_QandQRShrNnarrow64Uto32Ux2
:
3820 case Iop_QandQRSarNnarrow64Sto32Sx2
:
3821 case Iop_QandQRSarNnarrow64Sto32Ux2
:
3822 fnPessim
= mkPCast64x2
;
3823 opNarrow
= Iop_NarrowUn64to32x2
;
3825 case Iop_QandQShrNnarrow32Uto16Ux4
:
3826 case Iop_QandQSarNnarrow32Sto16Sx4
:
3827 case Iop_QandQSarNnarrow32Sto16Ux4
:
3828 case Iop_QandQRShrNnarrow32Uto16Ux4
:
3829 case Iop_QandQRSarNnarrow32Sto16Sx4
:
3830 case Iop_QandQRSarNnarrow32Sto16Ux4
:
3831 fnPessim
= mkPCast32x4
;
3832 opNarrow
= Iop_NarrowUn32to16x4
;
3834 case Iop_QandQShrNnarrow16Uto8Ux8
:
3835 case Iop_QandQSarNnarrow16Sto8Sx8
:
3836 case Iop_QandQSarNnarrow16Sto8Ux8
:
3837 case Iop_QandQRShrNnarrow16Uto8Ux8
:
3838 case Iop_QandQRSarNnarrow16Sto8Sx8
:
3839 case Iop_QandQRSarNnarrow16Sto8Ux8
:
3840 fnPessim
= mkPCast16x8
;
3841 opNarrow
= Iop_NarrowUn16to8x8
;
3846 complainIfUndefined(mce
, atom2
, NULL
);
3847 // Pessimised shift result
3849 = fnPessim(mce
, vatom1
);
3850 // Narrowed, pessimised shift result
3852 = assignNew('V', mce
, Ity_I64
, unop(opNarrow
, shV
));
3853 // Generates: Def--(63)--Def PCast-to-I1(narrowed)
3854 IRAtom
* qV
= mkPCastXXtoXXlsb(mce
, shVnarrowed
, Ity_I64
);
3855 // and assemble the result
3856 return assignNew('V', mce
, Ity_V128
,
3857 binop(Iop_64HLtoV128
, qV
, shVnarrowed
));
3862 case Iop_QDMull32Sx2
:
3863 return vectorWidenI64(mce
, Iop_Widen32Sto64x2
,
3864 mkUifU64(mce
, vatom1
, vatom2
));
3868 case Iop_QDMull16Sx4
:
3869 return vectorWidenI64(mce
, Iop_Widen16Sto32x4
,
3870 mkUifU64(mce
, vatom1
, vatom2
));
3874 case Iop_PolynomialMull8x8
:
3875 return vectorWidenI64(mce
, Iop_Widen8Sto16x8
,
3876 mkUifU64(mce
, vatom1
, vatom2
));
3879 return mkPCast32x4(mce
,
3880 assignNew('V', mce
, Ity_V128
, binop(op
, mkPCast32x4(mce
, vatom1
),
3881 mkPCast32x4(mce
, vatom2
))));
3884 return mkPCast16x8(mce
,
3885 assignNew('V', mce
, Ity_V128
, binop(op
, mkPCast16x8(mce
, vatom1
),
3886 mkPCast16x8(mce
, vatom2
))));
3889 return mkPCast8x16(mce
,
3890 assignNew('V', mce
, Ity_V128
, binop(op
, mkPCast8x16(mce
, vatom1
),
3891 mkPCast8x16(mce
, vatom2
))));
3893 /* V128-bit data-steering */
3894 case Iop_SetV128lo32
:
3895 case Iop_SetV128lo64
:
3896 case Iop_64HLtoV128
:
3897 case Iop_InterleaveLO64x2
:
3898 case Iop_InterleaveLO32x4
:
3899 case Iop_InterleaveLO16x8
:
3900 case Iop_InterleaveLO8x16
:
3901 case Iop_InterleaveHI64x2
:
3902 case Iop_InterleaveHI32x4
:
3903 case Iop_InterleaveHI16x8
:
3904 case Iop_InterleaveHI8x16
:
3905 case Iop_CatOddLanes8x16
:
3906 case Iop_CatOddLanes16x8
:
3907 case Iop_CatOddLanes32x4
:
3908 case Iop_CatEvenLanes8x16
:
3909 case Iop_CatEvenLanes16x8
:
3910 case Iop_CatEvenLanes32x4
:
3911 case Iop_InterleaveOddLanes8x16
:
3912 case Iop_InterleaveOddLanes16x8
:
3913 case Iop_InterleaveOddLanes32x4
:
3914 case Iop_InterleaveEvenLanes8x16
:
3915 case Iop_InterleaveEvenLanes16x8
:
3916 case Iop_InterleaveEvenLanes32x4
:
3917 case Iop_PackOddLanes8x16
:
3918 case Iop_PackOddLanes16x8
:
3919 case Iop_PackOddLanes32x4
:
3920 case Iop_PackEvenLanes8x16
:
3921 case Iop_PackEvenLanes16x8
:
3922 case Iop_PackEvenLanes32x4
:
3923 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, vatom2
));
3925 case Iop_GetElem8x16
:
3926 complainIfUndefined(mce
, atom2
, NULL
);
3927 return assignNew('V', mce
, Ity_I8
, binop(op
, vatom1
, atom2
));
3928 case Iop_GetElem16x8
:
3929 complainIfUndefined(mce
, atom2
, NULL
);
3930 return assignNew('V', mce
, Ity_I16
, binop(op
, vatom1
, atom2
));
3931 case Iop_GetElem32x4
:
3932 complainIfUndefined(mce
, atom2
, NULL
);
3933 return assignNew('V', mce
, Ity_I32
, binop(op
, vatom1
, atom2
));
3934 case Iop_GetElem64x2
:
3935 complainIfUndefined(mce
, atom2
, NULL
);
3936 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
));
3938 /* Perm8x16: rearrange values in left arg using steering values
3939 from right arg. So rearrange the vbits in the same way but
3940 pessimise wrt steering values. Perm32x4 ditto. */
3944 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3945 mkPCast8x16(mce
, vatom2
)
3950 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3951 mkPCast32x4(mce
, vatom2
)
3954 /* These two take the lower half of each 16-bit lane, sign/zero
3955 extend it to 32, and multiply together, producing a 32x4
3956 result (and implicitly ignoring half the operand bits). So
3957 treat it as a bunch of independent 16x8 operations, but then
3958 do 32-bit shifts left-right to copy the lower half results
3959 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3960 into the upper half of each result lane. */
3961 case Iop_MullEven16Ux8
:
3962 case Iop_MullEven16Sx8
: {
3964 at
= binary16Ix8(mce
,vatom1
,vatom2
);
3965 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_ShlN32x4
, at
, mkU8(16)));
3966 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SarN32x4
, at
, mkU8(16)));
3970 /* Same deal as Iop_MullEven16{S,U}x8 */
3971 case Iop_MullEven8Ux16
:
3972 case Iop_MullEven8Sx16
: {
3974 at
= binary8Ix16(mce
,vatom1
,vatom2
);
3975 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_ShlN16x8
, at
, mkU8(8)));
3976 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SarN16x8
, at
, mkU8(8)));
3980 /* Same deal as Iop_MullEven16{S,U}x8 */
3981 case Iop_MullEven32Ux4
:
3982 case Iop_MullEven32Sx4
: {
3984 at
= binary32Ix4(mce
,vatom1
,vatom2
);
3985 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_ShlN64x2
, at
, mkU8(32)));
3986 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SarN64x2
, at
, mkU8(32)));
3990 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3991 32x4 -> 16x8 laneage, discarding the upper half of each lane.
3992 Simply apply same op to the V bits, since this really no more
3993 than a data steering operation. */
3994 case Iop_NarrowBin32to16x8
:
3995 case Iop_NarrowBin16to8x16
:
3996 case Iop_NarrowBin64to32x4
:
3997 return assignNew('V', mce
, Ity_V128
,
3998 binop(op
, vatom1
, vatom2
));
4002 case Iop_I128StoBCD128
:
4003 /* Same scheme as with all other shifts. Note: 10 Nov 05:
4004 this is wrong now, scalar shifts are done properly lazily.
4005 Vector shifts should be fixed too. */
4006 complainIfUndefined(mce
, atom2
, NULL
);
4007 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
));
4011 return mkLazy2(mce
, Ity_V128
, vatom1
, vatom2
);
4016 complainIfUndefined(mce
, atom2
, NULL
);
4017 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
));
4019 /* I128-bit data-steering */
4021 return assignNew('V', mce
, Ity_I128
, binop(op
, vatom1
, vatom2
));
4027 return binary64Fx4(mce
, vatom1
, vatom2
);
4031 return binary32Fx8(mce
, vatom1
, vatom2
);
4033 /* V256-bit data-steering */
4034 case Iop_V128HLtoV256
:
4035 return assignNew('V', mce
, Ity_V256
, binop(op
, vatom1
, vatom2
));
4037 /* Scalar floating point */
4041 /* I32(rm) x F32 -> I64 */
4042 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4045 /* I32(rm) x I64 -> F32 */
4046 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4048 case Iop_RoundF64toInt
:
4049 case Iop_RoundF64toF32
:
4059 case Iop_RecpExpF64
:
4060 /* I32(rm) x I64/F64 -> I64/F64 */
4061 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4065 case Iop_RoundD64toInt
:
4066 /* I32(rm) x D64 -> D64 */
4067 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4071 case Iop_RoundD128toInt
:
4072 /* I32(rm) x D128 -> D128 */
4073 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4075 case Iop_RoundF128toInt
:
4076 /* I32(rm) x F128 -> F128 */
4077 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4083 /* I32(rm) x I64/D64 -> D64/I64 */
4084 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4092 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
4093 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4101 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
4102 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4106 case Iop_F128toD128
:
4109 case Iop_D128toF128
:
4110 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
4111 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4113 case Iop_RoundF32toInt
:
4115 case Iop_RecpExpF32
:
4116 /* I32(rm) x I32/F32 -> I32/F32 */
4117 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4120 /* I32(rm) x F128 -> F128 */
4121 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4127 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
4128 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4132 /* First arg is I32 (rounding mode), second is F64/F32 (data). */
4133 return mkLazy2(mce
, Ity_I16
, vatom1
, vatom2
);
4135 case Iop_F128toI32S
: /* IRRoundingMode(I32) x F128 -> signed I32 */
4136 case Iop_F128toI32U
: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
4137 case Iop_F128toF32
: /* IRRoundingMode(I32) x F128 -> F32 */
4138 case Iop_D128toI32S
: /* IRRoundingMode(I32) x D128 -> signed I32 */
4139 case Iop_D128toI32U
: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
4140 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4142 case Iop_F128toI128S
: /* IRRoundingMode(I32) x F128 -> signed I128 */
4143 case Iop_RndF128
: /* IRRoundingMode(I32) x F128 -> F128 */
4144 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4146 case Iop_F128toI64S
: /* IRRoundingMode(I32) x F128 -> signed I64 */
4147 case Iop_F128toI64U
: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
4148 case Iop_F128toF64
: /* IRRoundingMode(I32) x F128 -> F64 */
4149 case Iop_D128toD64
: /* IRRoundingMode(I64) x D128 -> D64 */
4150 case Iop_D128toI64S
: /* IRRoundingMode(I64) x D128 -> signed I64 */
4151 case Iop_D128toI64U
: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
4152 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4154 case Iop_F64HLtoF128
:
4155 case Iop_D64HLtoD128
:
4156 return assignNew('V', mce
, Ity_I128
,
4157 binop(Iop_64HLto128
, vatom1
, vatom2
));
4165 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
4166 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4169 /* First arg is I32 (rounding mode), second is D64 (data). */
4170 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4173 /* First arg is I32 (rounding mode), second is F64 (data). */
4174 return mkLazy2(mce
, Ity_I16
, vatom1
, vatom2
);
4176 case Iop_InsertExpD64
:
4177 /* I64 x I64 -> D64 */
4178 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4180 case Iop_InsertExpD128
:
4181 /* I64 x I128 -> D128 */
4182 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4190 case Iop_CmpExpD128
:
4191 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4195 /* F32 x F32 -> F32 */
4196 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4200 /* F64 x F64 -> F64 */
4201 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4203 /* non-FP after here */
4205 case Iop_DivModU64to32
:
4206 case Iop_DivModS64to32
:
4207 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4209 case Iop_DivModU128to64
:
4210 case Iop_DivModS128to64
:
4211 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4214 return assignNew('V', mce
, Ity_I16
, binop(op
, vatom1
, vatom2
));
4216 return assignNew('V', mce
, Ity_I32
, binop(op
, vatom1
, vatom2
));
4218 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, vatom2
));
4220 case Iop_DivModU64to64
:
4221 case Iop_DivModS64to64
: {
4222 IRAtom
* vTmp64
= mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4223 return assignNew('V', mce
, Ity_I128
,
4224 binop(Iop_64HLto128
, vTmp64
, vTmp64
));
4229 IRAtom
* vLo64
= mkLeft64(mce
, mkUifU64(mce
, vatom1
,vatom2
));
4230 IRAtom
* vHi64
= mkPCastTo(mce
, Ity_I64
, vLo64
);
4231 return assignNew('V', mce
, Ity_I128
,
4232 binop(Iop_64HLto128
, vHi64
, vLo64
));
4235 case Iop_DivModU32to32
:
4236 case Iop_DivModS32to32
: {
4237 IRAtom
* vTmp32
= mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4238 return assignNew('V', mce
, Ity_I64
,
4239 binop(Iop_32HLto64
, vTmp32
, vTmp32
));
4244 IRAtom
* vLo32
= mkLeft32(mce
, mkUifU32(mce
, vatom1
,vatom2
));
4245 IRAtom
* vHi32
= mkPCastTo(mce
, Ity_I32
, vLo32
);
4246 return assignNew('V', mce
, Ity_I64
,
4247 binop(Iop_32HLto64
, vHi32
, vLo32
));
4252 IRAtom
* vLo16
= mkLeft16(mce
, mkUifU16(mce
, vatom1
,vatom2
));
4253 IRAtom
* vHi16
= mkPCastTo(mce
, Ity_I16
, vLo16
);
4254 return assignNew('V', mce
, Ity_I32
,
4255 binop(Iop_16HLto32
, vHi16
, vLo16
));
4260 IRAtom
* vLo8
= mkLeft8(mce
, mkUifU8(mce
, vatom1
,vatom2
));
4261 IRAtom
* vHi8
= mkPCastTo(mce
, Ity_I8
, vLo8
);
4262 return assignNew('V', mce
, Ity_I16
, binop(Iop_8HLto16
, vHi8
, vLo8
));
4265 case Iop_Sad8Ux4
: /* maybe we could do better? ftm, do mkLazy2. */
4270 case Iop_QAdd32S
: /* could probably do better */
4271 case Iop_QSub32S
: /* could probably do better */
4272 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4278 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4281 if (mce
->dlbo
.dl_Add32
== DLexpensive
4282 || (mce
->dlbo
.dl_Add32
== DLauto
&& hu
== HuOth
)) {
4283 return expensiveAddSub(mce
,True
,Ity_I32
,
4284 vatom1
,vatom2
, atom1
,atom2
);
4286 goto cheap_AddSub32
;
4289 if (mce
->dlbo
.dl_Sub32
== DLexpensive
4290 || (mce
->dlbo
.dl_Sub32
== DLauto
&& hu
== HuOth
)) {
4291 return expensiveAddSub(mce
,False
,Ity_I32
,
4292 vatom1
,vatom2
, atom1
,atom2
);
4294 goto cheap_AddSub32
;
4299 return mkLeft32(mce
, mkUifU32(mce
, vatom1
,vatom2
));
4305 return doCmpORD(mce
, op
, vatom1
,vatom2
, atom1
,atom2
);
4308 if (mce
->dlbo
.dl_Add64
== DLexpensive
4309 || (mce
->dlbo
.dl_Add64
== DLauto
&& hu
== HuOth
)) {
4310 return expensiveAddSub(mce
,True
,Ity_I64
,
4311 vatom1
,vatom2
, atom1
,atom2
);
4313 goto cheap_AddSub64
;
4316 if (mce
->dlbo
.dl_Sub64
== DLexpensive
4317 || (mce
->dlbo
.dl_Sub64
== DLauto
&& hu
== HuOth
)) {
4318 return expensiveAddSub(mce
,False
,Ity_I64
,
4319 vatom1
,vatom2
, atom1
,atom2
);
4321 goto cheap_AddSub64
;
4326 return mkLeft64(mce
, mkUifU64(mce
, vatom1
,vatom2
));
4331 return mkLeft16(mce
, mkUifU16(mce
, vatom1
,vatom2
));
4336 return mkLeft8(mce
, mkUifU8(mce
, vatom1
,vatom2
));
4339 case Iop_CmpEQ64
: case Iop_CmpNE64
:
4340 if (mce
->dlbo
.dl_CmpEQ64_CmpNE64
== DLexpensive
)
4341 goto expensive_cmp64
;
4346 case Iop_ExpCmpNE64
:
4347 return expensiveCmpEQorNE(mce
,Ity_I64
, vatom1
,vatom2
, atom1
,atom2
);
4350 case Iop_CmpLE64S
: case Iop_CmpLE64U
:
4351 case Iop_CmpLT64U
: case Iop_CmpLT64S
:
4352 return mkPCastTo(mce
, Ity_I1
, mkUifU64(mce
, vatom1
,vatom2
));
4355 case Iop_CmpEQ32
: case Iop_CmpNE32
:
4356 if (mce
->dlbo
.dl_CmpEQ32_CmpNE32
== DLexpensive
)
4357 goto expensive_cmp32
;
4362 case Iop_ExpCmpNE32
:
4363 return expensiveCmpEQorNE(mce
,Ity_I32
, vatom1
,vatom2
, atom1
,atom2
);
4366 case Iop_CmpLE32S
: case Iop_CmpLE32U
:
4367 case Iop_CmpLT32U
: case Iop_CmpLT32S
:
4368 return mkPCastTo(mce
, Ity_I1
, mkUifU32(mce
, vatom1
,vatom2
));
4371 case Iop_CmpEQ16
: case Iop_CmpNE16
:
4372 if (mce
->dlbo
.dl_CmpEQ16_CmpNE16
== DLexpensive
)
4373 goto expensive_cmp16
;
4378 case Iop_ExpCmpNE16
:
4379 return expensiveCmpEQorNE(mce
,Ity_I16
, vatom1
,vatom2
, atom1
,atom2
);
4382 return mkPCastTo(mce
, Ity_I1
, mkUifU16(mce
, vatom1
,vatom2
));
4385 case Iop_CmpEQ8
: case Iop_CmpNE8
:
4386 if (mce
->dlbo
.dl_CmpEQ8_CmpNE8
== DLexpensive
)
4387 goto expensive_cmp8
;
4392 return expensiveCmpEQorNE(mce
,Ity_I8
, vatom1
,vatom2
, atom1
,atom2
);
4395 return mkPCastTo(mce
, Ity_I1
, mkUifU8(mce
, vatom1
,vatom2
));
4397 ////---- end CmpXX{64,32,16,8}
4399 case Iop_CasCmpEQ8
: case Iop_CasCmpNE8
:
4400 case Iop_CasCmpEQ16
: case Iop_CasCmpNE16
:
4401 case Iop_CasCmpEQ32
: case Iop_CasCmpNE32
:
4402 case Iop_CasCmpEQ64
: case Iop_CasCmpNE64
:
4403 /* Just say these all produce a defined result, regardless
4404 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
4405 return assignNew('V', mce
, Ity_I1
, definedOfType(Ity_I1
));
4407 case Iop_Shl64
: case Iop_Shr64
: case Iop_Sar64
:
4408 return scalarShift( mce
, Ity_I64
, op
, vatom1
,vatom2
, atom1
,atom2
);
4410 case Iop_Shl32
: case Iop_Shr32
: case Iop_Sar32
:
4411 return scalarShift( mce
, Ity_I32
, op
, vatom1
,vatom2
, atom1
,atom2
);
4413 case Iop_Shl16
: case Iop_Shr16
: case Iop_Sar16
:
4414 return scalarShift( mce
, Ity_I16
, op
, vatom1
,vatom2
, atom1
,atom2
);
4416 case Iop_Shl8
: case Iop_Shr8
: case Iop_Sar8
:
4417 return scalarShift( mce
, Ity_I8
, op
, vatom1
,vatom2
, atom1
,atom2
);
4420 uifu
= mkUifUV256
; difd
= mkDifDV256
;
4421 and_or_ty
= Ity_V256
; improve
= mkImproveANDV256
; goto do_And_Or
;
4423 uifu
= mkUifUV128
; difd
= mkDifDV128
;
4424 and_or_ty
= Ity_V128
; improve
= mkImproveANDV128
; goto do_And_Or
;
4426 uifu
= mkUifU64
; difd
= mkDifD64
;
4427 and_or_ty
= Ity_I64
; improve
= mkImproveAND64
; goto do_And_Or
;
4429 uifu
= mkUifU32
; difd
= mkDifD32
;
4430 and_or_ty
= Ity_I32
; improve
= mkImproveAND32
; goto do_And_Or
;
4432 uifu
= mkUifU16
; difd
= mkDifD16
;
4433 and_or_ty
= Ity_I16
; improve
= mkImproveAND16
; goto do_And_Or
;
4435 uifu
= mkUifU8
; difd
= mkDifD8
;
4436 and_or_ty
= Ity_I8
; improve
= mkImproveAND8
; goto do_And_Or
;
4439 uifu
= mkUifUV256
; difd
= mkDifDV256
;
4440 and_or_ty
= Ity_V256
; improve
= mkImproveORV256
; goto do_And_Or
;
4442 uifu
= mkUifUV128
; difd
= mkDifDV128
;
4443 and_or_ty
= Ity_V128
; improve
= mkImproveORV128
; goto do_And_Or
;
4445 uifu
= mkUifU64
; difd
= mkDifD64
;
4446 and_or_ty
= Ity_I64
; improve
= mkImproveOR64
; goto do_And_Or
;
4448 uifu
= mkUifU32
; difd
= mkDifD32
;
4449 and_or_ty
= Ity_I32
; improve
= mkImproveOR32
; goto do_And_Or
;
4451 uifu
= mkUifU16
; difd
= mkDifD16
;
4452 and_or_ty
= Ity_I16
; improve
= mkImproveOR16
; goto do_And_Or
;
4454 uifu
= mkUifU8
; difd
= mkDifD8
;
4455 and_or_ty
= Ity_I8
; improve
= mkImproveOR8
; goto do_And_Or
;
4462 difd(mce
, uifu(mce
, vatom1
, vatom2
),
4463 difd(mce
, improve(mce
, atom1
, vatom1
),
4464 improve(mce
, atom2
, vatom2
) ) ) );
4467 return mkUifU8(mce
, vatom1
, vatom2
);
4469 return mkUifU16(mce
, vatom1
, vatom2
);
4471 return mkUifU32(mce
, vatom1
, vatom2
);
4473 return mkUifU64(mce
, vatom1
, vatom2
);
4475 return mkUifUV128(mce
, vatom1
, vatom2
);
4477 return mkUifUV256(mce
, vatom1
, vatom2
);
4489 /* Same scheme as with all other shifts. Note: 22 Oct 05:
4490 this is wrong now, scalar shifts are done properly lazily.
4491 Vector shifts should be fixed too. */
4492 complainIfUndefined(mce
, atom2
, NULL
);
4493 return assignNew('V', mce
, Ity_V256
, binop(op
, vatom1
, atom2
));
4502 case Iop_CmpGT8Sx32
:
4508 return binary8Ix32(mce
, vatom1
, vatom2
);
4510 case Iop_QSub16Ux16
:
4511 case Iop_QSub16Sx16
:
4514 case Iop_MulHi16Sx16
:
4515 case Iop_MulHi16Ux16
:
4520 case Iop_CmpGT16Sx16
:
4521 case Iop_CmpEQ16x16
:
4523 case Iop_QAdd16Ux16
:
4524 case Iop_QAdd16Sx16
:
4526 return binary16Ix16(mce
, vatom1
, vatom2
);
4529 case Iop_CmpGT32Sx8
:
4537 return binary32Ix8(mce
, vatom1
, vatom2
);
4542 case Iop_CmpGT64Sx4
:
4543 return binary64Ix4(mce
, vatom1
, vatom2
);
4545 /* Perm32x8: rearrange values in left arg using steering values
4546 from right arg. So rearrange the vbits in the same way but
4547 pessimise wrt steering values. */
4551 assignNew('V', mce
, Ity_V256
, binop(op
, vatom1
, atom2
)),
4552 mkPCast32x8(mce
, vatom2
)
4555 /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256.
4556 Handle the shifted results in the same way that other
4557 binary Q ops are handled, eg QSub: UifU the two args,
4558 then pessimise -- which is binaryNIxM. But for the upper
4559 V128, we require to generate just 1 bit which is the
4560 pessimised shift result, with 127 defined zeroes above it.
4562 Note that this overly pessimistic in that in fact only the
4563 bottom 8 bits of each lane of the second arg determine the shift
4564 amount. Really we ought to ignore any undefinedness in the
4565 rest of the lanes of the second arg. */
4566 case Iop_QandSQsh64x2
: case Iop_QandUQsh64x2
:
4567 case Iop_QandSQRsh64x2
: case Iop_QandUQRsh64x2
:
4568 case Iop_QandSQsh32x4
: case Iop_QandUQsh32x4
:
4569 case Iop_QandSQRsh32x4
: case Iop_QandUQRsh32x4
:
4570 case Iop_QandSQsh16x8
: case Iop_QandUQsh16x8
:
4571 case Iop_QandSQRsh16x8
: case Iop_QandUQRsh16x8
:
4572 case Iop_QandSQsh8x16
: case Iop_QandUQsh8x16
:
4573 case Iop_QandSQRsh8x16
: case Iop_QandUQRsh8x16
:
4575 // The function to generate the pessimised shift result
4576 IRAtom
* (*binaryNIxM
)(MCEnv
*,IRAtom
*,IRAtom
*) = NULL
;
4578 case Iop_QandSQsh64x2
:
4579 case Iop_QandUQsh64x2
:
4580 case Iop_QandSQRsh64x2
:
4581 case Iop_QandUQRsh64x2
:
4582 binaryNIxM
= binary64Ix2
;
4584 case Iop_QandSQsh32x4
:
4585 case Iop_QandUQsh32x4
:
4586 case Iop_QandSQRsh32x4
:
4587 case Iop_QandUQRsh32x4
:
4588 binaryNIxM
= binary32Ix4
;
4590 case Iop_QandSQsh16x8
:
4591 case Iop_QandUQsh16x8
:
4592 case Iop_QandSQRsh16x8
:
4593 case Iop_QandUQRsh16x8
:
4594 binaryNIxM
= binary16Ix8
;
4596 case Iop_QandSQsh8x16
:
4597 case Iop_QandUQsh8x16
:
4598 case Iop_QandSQRsh8x16
:
4599 case Iop_QandUQRsh8x16
:
4600 binaryNIxM
= binary8Ix16
;
4605 tl_assert(binaryNIxM
);
4606 // Pessimised shift result, shV[127:0]
4607 IRAtom
* shV
= binaryNIxM(mce
, vatom1
, vatom2
);
4608 // Generates: Def--(127)--Def PCast-to-I1(shV)
4609 IRAtom
* qV
= mkPCastXXtoXXlsb(mce
, shV
, Ity_V128
);
4610 // and assemble the result
4611 return assignNew('V', mce
, Ity_V256
,
4612 binop(Iop_V128HLtoV256
, qV
, shV
));
4617 VG_(tool_panic
)("memcheck:expr2vbits_Binop");
4623 IRExpr
* expr2vbits_Unop ( MCEnv
* mce
, IROp op
, IRAtom
* atom
)
4625 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
4626 selection of shadow operation implicitly duplicates the logic in
4627 do_shadow_LoadG and should be kept in sync (in the very unlikely
4628 event that the interpretation of such widening ops changes in
4629 future). See comment in do_shadow_LoadG. */
4630 IRAtom
* vatom
= expr2vbits( mce
, atom
, HuOth
);
4631 tl_assert(isOriginalAtom(mce
,atom
));
4636 case Iop_RSqrtEst64Fx2
:
4637 case Iop_RecipEst64Fx2
:
4638 case Iop_Log2_64Fx2
:
4639 return unary64Fx2(mce
, vatom
);
4641 case Iop_Sqrt64F0x2
:
4642 return unary64F0x2(mce
, vatom
);
4645 case Iop_RSqrtEst32Fx8
:
4646 case Iop_RecipEst32Fx8
:
4647 return unary32Fx8(mce
, vatom
);
4650 return unary64Fx4(mce
, vatom
);
4652 case Iop_RecipEst32Fx4
:
4655 case Iop_QFtoI32Ux4_RZ
:
4656 case Iop_QFtoI32Sx4_RZ
:
4657 case Iop_RoundF32x4_RM
:
4658 case Iop_RoundF32x4_RP
:
4659 case Iop_RoundF32x4_RN
:
4660 case Iop_RoundF32x4_RZ
:
4661 case Iop_RecipEst32Ux4
:
4664 case Iop_RSqrtEst32Fx4
:
4665 case Iop_Log2_32Fx4
:
4666 return unary32Fx4(mce
, vatom
);
4670 case Iop_RecipEst32Fx2
:
4671 case Iop_RecipEst32Ux2
:
4674 case Iop_RSqrtEst32Fx2
:
4675 return unary32Fx2(mce
, vatom
);
4677 case Iop_Sqrt32F0x4
:
4678 case Iop_RSqrtEst32F0x4
:
4679 case Iop_RecipEst32F0x4
:
4680 return unary32F0x4(mce
, vatom
);
4687 case Iop_Reverse1sIn8_x16
:
4688 case Iop_Reverse8sIn16_x8
:
4689 case Iop_Reverse8sIn32_x4
:
4690 case Iop_Reverse16sIn32_x4
:
4691 case Iop_Reverse8sIn64_x2
:
4692 case Iop_Reverse16sIn64_x2
:
4693 case Iop_Reverse32sIn64_x2
:
4694 case Iop_V256toV128_1
: case Iop_V256toV128_0
:
4695 case Iop_ZeroHI64ofV128
:
4696 case Iop_ZeroHI96ofV128
:
4697 case Iop_ZeroHI112ofV128
:
4698 case Iop_ZeroHI120ofV128
:
4699 return assignNew('V', mce
, Ity_V128
, unop(op
, vatom
));
4701 case Iop_F128HItoF64
: /* F128 -> high half of F128 */
4702 case Iop_D128HItoD64
: /* D128 -> high half of D128 */
4703 return assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, vatom
));
4704 case Iop_F128LOtoF64
: /* F128 -> low half of F128 */
4705 case Iop_D128LOtoD64
: /* D128 -> low half of D128 */
4706 return assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, vatom
));
4711 case Iop_TruncF128toI64S
: /* F128 -> I64S */
4712 case Iop_TruncF128toI32S
: /* F128 -> I32S (result stored in 64-bits) */
4713 case Iop_TruncF128toI64U
: /* F128 -> I64U */
4714 case Iop_TruncF128toI32U
: /* F128 -> I32U (result stored in 64-bits) */
4715 return mkPCastTo(mce
, Ity_I128
, vatom
);
4717 case Iop_BCD128toI128S
:
4718 case Iop_MulI128by10
:
4719 case Iop_MulI128by10Carry
:
4720 case Iop_F16toF64x2
:
4721 case Iop_F64toF16x2
:
4724 case Iop_I32StoF128
: /* signed I32 -> F128 */
4725 case Iop_I64StoF128
: /* signed I64 -> F128 */
4726 case Iop_I32UtoF128
: /* unsigned I32 -> F128 */
4727 case Iop_I64UtoF128
: /* unsigned I64 -> F128 */
4728 case Iop_F32toF128
: /* F32 -> F128 */
4729 case Iop_F64toF128
: /* F64 -> F128 */
4730 case Iop_I32StoD128
: /* signed I64 -> D128 */
4731 case Iop_I64StoD128
: /* signed I64 -> D128 */
4732 case Iop_I32UtoD128
: /* unsigned I32 -> D128 */
4733 case Iop_I64UtoD128
: /* unsigned I64 -> D128 */
4734 return mkPCastTo(mce
, Ity_I128
, vatom
);
4742 case Iop_RSqrtEst5GoodF64
:
4743 case Iop_RoundF64toF64_NEAREST
:
4744 case Iop_RoundF64toF64_NegINF
:
4745 case Iop_RoundF64toF64_PosINF
:
4746 case Iop_RoundF64toF64_ZERO
:
4751 case Iop_ExtractExpD64
: /* D64 -> I64 */
4752 case Iop_ExtractExpD128
: /* D128 -> I64 */
4753 case Iop_ExtractSigD64
: /* D64 -> I64 */
4754 case Iop_ExtractSigD128
: /* D128 -> I64 */
4757 return mkPCastTo(mce
, Ity_I64
, vatom
);
4760 return mkPCastTo(mce
, Ity_I128
, vatom
);
4763 case Iop_TruncF64asF32
:
4767 return mkPCastTo(mce
, Ity_I32
, vatom
);
4771 return expensiveCountTrailingZeroes(mce
, op
, atom
, vatom
);
4782 case Iop_V128HIto64
:
4788 case Iop_Reverse8sIn16_x4
:
4789 case Iop_Reverse8sIn32_x2
:
4790 case Iop_Reverse16sIn32_x2
:
4791 case Iop_Reverse8sIn64_x1
:
4792 case Iop_Reverse16sIn64_x1
:
4793 case Iop_Reverse32sIn64_x1
:
4794 case Iop_V256to64_0
: case Iop_V256to64_1
:
4795 case Iop_V256to64_2
: case Iop_V256to64_3
:
4796 return assignNew('V', mce
, Ity_I64
, unop(op
, vatom
));
4807 return assignNew('V', mce
, Ity_I32
, unop(op
, vatom
));
4814 case Iop_GetMSBs8x16
:
4815 return assignNew('V', mce
, Ity_I16
, unop(op
, vatom
));
4823 case Iop_GetMSBs8x8
:
4824 return assignNew('V', mce
, Ity_I8
, unop(op
, vatom
));
4827 return assignNew('V', mce
, Ity_I1
, unop(Iop_32to1
, vatom
));
4830 return assignNew('V', mce
, Ity_I1
, unop(Iop_64to1
, vatom
));
4832 case Iop_ReinterpF64asI64
:
4833 case Iop_ReinterpI64asF64
:
4834 case Iop_ReinterpI32asF32
:
4835 case Iop_ReinterpF32asI32
:
4836 case Iop_ReinterpI64asD64
:
4837 case Iop_ReinterpD64asI64
:
4852 return mkPCast8x8(mce
, vatom
);
4854 case Iop_CmpNEZ8x16
:
4860 return mkPCast8x16(mce
, vatom
);
4862 case Iop_CmpNEZ16x4
:
4866 return mkPCast16x4(mce
, vatom
);
4868 case Iop_CmpNEZ16x8
:
4873 return mkPCast16x8(mce
, vatom
);
4875 case Iop_CmpNEZ32x2
:
4878 case Iop_FtoI32Ux2_RZ
:
4879 case Iop_FtoI32Sx2_RZ
:
4881 return mkPCast32x2(mce
, vatom
);
4883 case Iop_CmpNEZ32x4
:
4886 case Iop_FtoI32Ux4_RZ
:
4887 case Iop_FtoI32Sx4_RZ
:
4889 case Iop_RSqrtEst32Ux4
:
4891 return mkPCast32x4(mce
, vatom
);
4894 return mkPCastTo(mce
, Ity_I32
, vatom
);
4897 return mkPCastTo(mce
, Ity_I64
, vatom
);
4899 case Iop_CmpNEZ64x2
:
4900 case Iop_CipherSV128
:
4904 return mkPCast64x2(mce
, vatom
);
4906 case Iop_PwBitMtxXpose64x2
:
4907 return assignNew('V', mce
, Ity_V128
, unop(op
, vatom
));
4909 case Iop_NarrowUn16to8x8
:
4910 case Iop_NarrowUn32to16x4
:
4911 case Iop_NarrowUn64to32x2
:
4912 case Iop_QNarrowUn16Sto8Sx8
:
4913 case Iop_QNarrowUn16Sto8Ux8
:
4914 case Iop_QNarrowUn16Uto8Ux8
:
4915 case Iop_QNarrowUn32Sto16Sx4
:
4916 case Iop_QNarrowUn32Sto16Ux4
:
4917 case Iop_QNarrowUn32Uto16Ux4
:
4918 case Iop_QNarrowUn64Sto32Sx2
:
4919 case Iop_QNarrowUn64Sto32Ux2
:
4920 case Iop_QNarrowUn64Uto32Ux2
:
4921 case Iop_F32toF16x4
:
4922 return vectorNarrowUnV128(mce
, op
, vatom
);
4924 case Iop_Widen8Sto16x8
:
4925 case Iop_Widen8Uto16x8
:
4926 case Iop_Widen16Sto32x4
:
4927 case Iop_Widen16Uto32x4
:
4928 case Iop_Widen32Sto64x2
:
4929 case Iop_Widen32Uto64x2
:
4930 case Iop_F16toF32x4
:
4931 return vectorWidenI64(mce
, op
, vatom
);
4933 case Iop_PwAddL32Ux2
:
4934 case Iop_PwAddL32Sx2
:
4935 return mkPCastTo(mce
, Ity_I64
,
4936 assignNew('V', mce
, Ity_I64
, unop(op
, mkPCast32x2(mce
, vatom
))));
4938 case Iop_PwAddL16Ux4
:
4939 case Iop_PwAddL16Sx4
:
4940 return mkPCast32x2(mce
,
4941 assignNew('V', mce
, Ity_I64
, unop(op
, mkPCast16x4(mce
, vatom
))));
4943 case Iop_PwAddL8Ux8
:
4944 case Iop_PwAddL8Sx8
:
4945 return mkPCast16x4(mce
,
4946 assignNew('V', mce
, Ity_I64
, unop(op
, mkPCast8x8(mce
, vatom
))));
4948 case Iop_PwAddL32Ux4
:
4949 case Iop_PwAddL32Sx4
:
4950 return mkPCast64x2(mce
,
4951 assignNew('V', mce
, Ity_V128
, unop(op
, mkPCast32x4(mce
, vatom
))));
4953 case Iop_PwAddL16Ux8
:
4954 case Iop_PwAddL16Sx8
:
4955 return mkPCast32x4(mce
,
4956 assignNew('V', mce
, Ity_V128
, unop(op
, mkPCast16x8(mce
, vatom
))));
4958 case Iop_PwAddL8Ux16
:
4959 case Iop_PwAddL8Sx16
:
4960 return mkPCast16x8(mce
,
4961 assignNew('V', mce
, Ity_V128
, unop(op
, mkPCast8x16(mce
, vatom
))));
4966 VG_(tool_panic
)("memcheck:expr2vbits_Unop");
4971 /* Worker function -- do not call directly. See comments on
4972 expr2vbits_Load for the meaning of |guard|.
4974 Generates IR to (1) perform a definedness test of |addr|, (2)
4975 perform a validity test of |addr|, and (3) return the Vbits for the
4976 location indicated by |addr|. All of this only happens when
4977 |guard| is NULL or |guard| evaluates to True at run time.
4979 If |guard| evaluates to False at run time, the returned value is
4980 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
4983 The definedness of |guard| itself is not checked. That is assumed
4984 to have been done before this point, by the caller. */
4986 IRAtom
* expr2vbits_Load_WRK ( MCEnv
* mce
,
4987 IREndness end
, IRType ty
,
4988 IRAtom
* addr
, UInt bias
, IRAtom
* guard
)
4990 tl_assert(isOriginalAtom(mce
,addr
));
4991 tl_assert(end
== Iend_LE
|| end
== Iend_BE
);
4993 /* First, emit a definedness test for the address. This also sets
4994 the address (shadow) to 'defined' following the test. */
4995 complainIfUndefined( mce
, addr
, guard
);
4997 /* Now cook up a call to the relevant helper function, to read the
4998 data V bits from shadow memory. */
4999 ty
= shadowTypeV(ty
);
5001 void* helper
= NULL
;
5002 const HChar
* hname
= NULL
;
5003 Bool ret_via_outparam
= False
;
5005 if (end
== Iend_LE
) {
5007 case Ity_V256
: helper
= &MC_(helperc_LOADV256le
);
5008 hname
= "MC_(helperc_LOADV256le)";
5009 ret_via_outparam
= True
;
5011 case Ity_V128
: helper
= &MC_(helperc_LOADV128le
);
5012 hname
= "MC_(helperc_LOADV128le)";
5013 ret_via_outparam
= True
;
5015 case Ity_I64
: helper
= &MC_(helperc_LOADV64le
);
5016 hname
= "MC_(helperc_LOADV64le)";
5018 case Ity_I32
: helper
= &MC_(helperc_LOADV32le
);
5019 hname
= "MC_(helperc_LOADV32le)";
5021 case Ity_I16
: helper
= &MC_(helperc_LOADV16le
);
5022 hname
= "MC_(helperc_LOADV16le)";
5024 case Ity_I8
: helper
= &MC_(helperc_LOADV8
);
5025 hname
= "MC_(helperc_LOADV8)";
5027 default: ppIRType(ty
);
5028 VG_(tool_panic
)("memcheck:expr2vbits_Load_WRK(LE)");
5032 case Ity_V256
: helper
= &MC_(helperc_LOADV256be
);
5033 hname
= "MC_(helperc_LOADV256be)";
5034 ret_via_outparam
= True
;
5036 case Ity_V128
: helper
= &MC_(helperc_LOADV128be
);
5037 hname
= "MC_(helperc_LOADV128be)";
5038 ret_via_outparam
= True
;
5040 case Ity_I64
: helper
= &MC_(helperc_LOADV64be
);
5041 hname
= "MC_(helperc_LOADV64be)";
5043 case Ity_I32
: helper
= &MC_(helperc_LOADV32be
);
5044 hname
= "MC_(helperc_LOADV32be)";
5046 case Ity_I16
: helper
= &MC_(helperc_LOADV16be
);
5047 hname
= "MC_(helperc_LOADV16be)";
5049 case Ity_I8
: helper
= &MC_(helperc_LOADV8
);
5050 hname
= "MC_(helperc_LOADV8)";
5052 default: ppIRType(ty
);
5053 VG_(tool_panic
)("memcheck:expr2vbits_Load_WRK(BE)");
5060 /* Generate the actual address into addrAct. */
5067 IRType tyAddr
= mce
->hWordTy
;
5068 tl_assert( tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
5069 mkAdd
= tyAddr
==Ity_I32
? Iop_Add32
: Iop_Add64
;
5070 eBias
= tyAddr
==Ity_I32
? mkU32(bias
) : mkU64(bias
);
5071 addrAct
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBias
) );
5074 /* We need to have a place to park the V bits we're just about to
5076 IRTemp datavbits
= newTemp(mce
, ty
, VSh
);
5078 /* Here's the call. */
5080 if (ret_via_outparam
) {
5081 di
= unsafeIRDirty_1_N( datavbits
,
5083 hname
, VG_(fnptr_to_fnentry
)( helper
),
5084 mkIRExprVec_2( IRExpr_VECRET(), addrAct
) );
5086 di
= unsafeIRDirty_1_N( datavbits
,
5088 hname
, VG_(fnptr_to_fnentry
)( helper
),
5089 mkIRExprVec_1( addrAct
) );
5092 setHelperAnns( mce
, di
);
5095 /* Ideally the didn't-happen return value here would be all-ones
5096 (all-undefined), so it'd be obvious if it got used
5097 inadvertently. We can get by with the IR-mandated default
5098 value (0b01 repeating, 0x55 etc) as that'll still look pretty
5099 undefined if it ever leaks out. */
5101 stmt( 'V', mce
, IRStmt_Dirty(di
) );
5103 return mkexpr(datavbits
);
5107 /* Generate IR to do a shadow load. The helper is expected to check
5108 the validity of the address and return the V bits for that address.
5109 This can optionally be controlled by a guard, which is assumed to
5110 be True if NULL. In the case where the guard is False at runtime,
5111 the helper will return the didn't-do-the-call value of 0x55..55.
5112 Since that means "completely undefined result", the caller of
5113 this function will need to fix up the result somehow in that
5116 Caller of this function is also expected to have checked the
5117 definedness of |guard| before this point.
5120 IRAtom
* expr2vbits_Load ( MCEnv
* mce
,
5121 IREndness end
, IRType ty
,
5122 IRAtom
* addr
, UInt bias
,
5125 tl_assert(end
== Iend_LE
|| end
== Iend_BE
);
5126 switch (shadowTypeV(ty
)) {
5133 return expr2vbits_Load_WRK(mce
, end
, ty
, addr
, bias
, guard
);
5135 VG_(tool_panic
)("expr2vbits_Load");
5140 /* The most general handler for guarded loads. Assumes the
5141 definedness of GUARD has already been checked by the caller. A
5142 GUARD of NULL is assumed to mean "always True". Generates code to
5143 check the definedness and validity of ADDR.
5145 Generate IR to do a shadow load from ADDR and return the V bits.
5146 The loaded type is TY. The loaded data is then (shadow) widened by
5147 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
5148 evaluates to False at run time then the returned Vbits are simply
5149 VALT instead. Note therefore that the argument type of VWIDEN must
5150 be TY and the result type of VWIDEN must equal the type of VALT.
5153 IRAtom
* expr2vbits_Load_guarded_General ( MCEnv
* mce
,
5154 IREndness end
, IRType ty
,
5155 IRAtom
* addr
, UInt bias
,
5157 IROp vwiden
, IRAtom
* valt
)
5159 /* Sanity check the conversion operation, and also set TYWIDE. */
5160 IRType tyWide
= Ity_INVALID
;
5165 case Iop_16Uto32
: case Iop_16Sto32
: case Iop_8Uto32
: case Iop_8Sto32
:
5169 VG_(tool_panic
)("memcheck:expr2vbits_Load_guarded_General");
5172 /* If the guard evaluates to True, this will hold the loaded V bits
5173 at TY. If the guard evaluates to False, this will be all
5174 ones, meaning "all undefined", in which case we will have to
5175 replace it using an ITE below. */
5177 = assignNew('V', mce
, ty
,
5178 expr2vbits_Load(mce
, end
, ty
, addr
, bias
, guard
));
5179 /* Now (shadow-) widen the loaded V bits to the desired width. In
5180 the guard-is-False case, the allowable widening operators will
5181 in the worst case (unsigned widening) at least leave the
5182 pre-widened part as being marked all-undefined, and in the best
5183 case (signed widening) mark the whole widened result as
5184 undefined. Anyway, it doesn't matter really, since in this case
5185 we will replace said value with the default value |valt| using an
5188 = vwiden
== Iop_INVALID
5190 : assignNew('V', mce
, tyWide
, unop(vwiden
, iftrue1
));
5191 /* These are the V bits we will return if the load doesn't take
5195 /* Prepare the cond for the ITE. Convert a NULL cond into
5196 something that iropt knows how to fold out later. */
5198 = guard
== NULL
? mkU1(1) : guard
;
5199 /* And assemble the final result. */
5200 return assignNew('V', mce
, tyWide
, IRExpr_ITE(cond
, iftrue2
, iffalse
));
5204 /* A simpler handler for guarded loads, in which there is no
5205 conversion operation, and the default V bit return (when the guard
5206 evaluates to False at runtime) is "all defined". If there is no
5207 guard expression or the guard is always TRUE this function behaves
5208 like expr2vbits_Load. It is assumed that definedness of GUARD has
5209 already been checked at the call site. */
5211 IRAtom
* expr2vbits_Load_guarded_Simple ( MCEnv
* mce
,
5212 IREndness end
, IRType ty
,
5213 IRAtom
* addr
, UInt bias
,
5216 return expr2vbits_Load_guarded_General(
5217 mce
, end
, ty
, addr
, bias
, guard
, Iop_INVALID
, definedOfType(ty
)
5223 IRAtom
* expr2vbits_ITE ( MCEnv
* mce
,
5224 IRAtom
* cond
, IRAtom
* iftrue
, IRAtom
* iffalse
)
5226 IRAtom
*vbitsC
, *vbits0
, *vbits1
;
5228 /* Given ITE(cond, iftrue, iffalse), generate
5229 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
5230 That is, steer the V bits like the originals, but trash the
5231 result if the steering value is undefined. This gives
5232 lazy propagation. */
5233 tl_assert(isOriginalAtom(mce
, cond
));
5234 tl_assert(isOriginalAtom(mce
, iftrue
));
5235 tl_assert(isOriginalAtom(mce
, iffalse
));
5237 vbitsC
= expr2vbits(mce
, cond
, HuOth
); // could we use HuPCa here?
5238 vbits1
= expr2vbits(mce
, iftrue
, HuOth
);
5239 vbits0
= expr2vbits(mce
, iffalse
, HuOth
);
5240 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vbits0
);
5243 mkUifU(mce
, ty
, assignNew('V', mce
, ty
,
5244 IRExpr_ITE(cond
, vbits1
, vbits0
)),
5245 mkPCastTo(mce
, ty
, vbitsC
) );
5248 /* --------- This is the main expression-handling function. --------- */
5251 IRExpr
* expr2vbits ( MCEnv
* mce
, IRExpr
* e
,
5252 HowUsed hu
/*use HuOth if unknown*/ )
5257 return shadow_GET( mce
, e
->Iex
.Get
.offset
, e
->Iex
.Get
.ty
);
5260 return shadow_GETI( mce
, e
->Iex
.GetI
.descr
,
5261 e
->Iex
.GetI
.ix
, e
->Iex
.GetI
.bias
);
5264 return IRExpr_RdTmp( findShadowTmpV(mce
, e
->Iex
.RdTmp
.tmp
) );
5267 return definedOfType(shadowTypeV(typeOfIRExpr(mce
->sb
->tyenv
, e
)));
5270 return expr2vbits_Qop(
5272 e
->Iex
.Qop
.details
->op
,
5273 e
->Iex
.Qop
.details
->arg1
, e
->Iex
.Qop
.details
->arg2
,
5274 e
->Iex
.Qop
.details
->arg3
, e
->Iex
.Qop
.details
->arg4
5278 return expr2vbits_Triop(
5280 e
->Iex
.Triop
.details
->op
,
5281 e
->Iex
.Triop
.details
->arg1
, e
->Iex
.Triop
.details
->arg2
,
5282 e
->Iex
.Triop
.details
->arg3
5286 return expr2vbits_Binop(
5289 e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
,
5294 return expr2vbits_Unop( mce
, e
->Iex
.Unop
.op
, e
->Iex
.Unop
.arg
);
5297 return expr2vbits_Load( mce
, e
->Iex
.Load
.end
,
5299 e
->Iex
.Load
.addr
, 0/*addr bias*/,
5300 NULL
/* guard == "always True"*/ );
5303 return mkLazyN( mce
, e
->Iex
.CCall
.args
,
5308 return expr2vbits_ITE( mce
, e
->Iex
.ITE
.cond
, e
->Iex
.ITE
.iftrue
,
5309 e
->Iex
.ITE
.iffalse
);
5315 VG_(tool_panic
)("memcheck: expr2vbits");
5320 /*------------------------------------------------------------*/
5321 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
5322 /*------------------------------------------------------------*/
5324 /* Widen a value to the host word size. */
5327 IRExpr
* zwidenToHostWord ( MCEnv
* mce
, IRAtom
* vatom
)
5331 /* vatom is vbits-value and as such can only have a shadow type. */
5332 tl_assert(isShadowAtom(mce
,vatom
));
5334 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vatom
);
5337 if (tyH
== Ity_I32
) {
5342 return assignNew('V', mce
, tyH
, unop(Iop_16Uto32
, vatom
));
5344 return assignNew('V', mce
, tyH
, unop(Iop_8Uto32
, vatom
));
5349 if (tyH
== Ity_I64
) {
5352 return assignNew('V', mce
, tyH
, unop(Iop_32Uto64
, vatom
));
5354 return assignNew('V', mce
, tyH
, unop(Iop_32Uto64
,
5355 assignNew('V', mce
, Ity_I32
, unop(Iop_16Uto32
, vatom
))));
5357 return assignNew('V', mce
, tyH
, unop(Iop_32Uto64
,
5358 assignNew('V', mce
, Ity_I32
, unop(Iop_8Uto32
, vatom
))));
5366 VG_(printf
)("\nty = "); ppIRType(ty
); VG_(printf
)("\n");
5367 VG_(tool_panic
)("zwidenToHostWord");
5371 /* Generate a shadow store. |addr| is always the original address
5372 atom. You can pass in either originals or V-bits for the data
5373 atom, but obviously not both. This function generates a check for
5374 the definedness and (indirectly) the validity of |addr|, but only
5375 when |guard| evaluates to True at run time (or is NULL).
5377 |guard| :: Ity_I1 controls whether the store really happens; NULL
5378 means it unconditionally does. Note that |guard| itself is not
5379 checked for definedness; the caller of this function must do that
5383 void do_shadow_Store ( MCEnv
* mce
,
5385 IRAtom
* addr
, UInt bias
,
5386 IRAtom
* data
, IRAtom
* vdata
,
5391 void* helper
= NULL
;
5392 const HChar
* hname
= NULL
;
5395 tyAddr
= mce
->hWordTy
;
5396 mkAdd
= tyAddr
==Ity_I32
? Iop_Add32
: Iop_Add64
;
5397 tl_assert( tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
5398 tl_assert( end
== Iend_LE
|| end
== Iend_BE
);
5402 tl_assert(isOriginalAtom(mce
, data
));
5403 tl_assert(bias
== 0);
5404 vdata
= expr2vbits( mce
, data
, HuOth
);
5409 tl_assert(isOriginalAtom(mce
,addr
));
5410 tl_assert(isShadowAtom(mce
,vdata
));
5413 tl_assert(isOriginalAtom(mce
, guard
));
5414 tl_assert(typeOfIRExpr(mce
->sb
->tyenv
, guard
) == Ity_I1
);
5417 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vdata
);
5419 // If we're not doing undefined value checking, pretend that this value
5420 // is "all valid". That lets Vex's optimiser remove some of the V bit
5421 // shadow computation ops that precede it.
5422 if (MC_(clo_mc_level
) == 1) {
5424 case Ity_V256
: // V256 weirdness -- used four times
5425 c
= IRConst_V256(V_BITS32_DEFINED
); break;
5426 case Ity_V128
: // V128 weirdness -- used twice
5427 c
= IRConst_V128(V_BITS16_DEFINED
); break;
5428 case Ity_I64
: c
= IRConst_U64 (V_BITS64_DEFINED
); break;
5429 case Ity_I32
: c
= IRConst_U32 (V_BITS32_DEFINED
); break;
5430 case Ity_I16
: c
= IRConst_U16 (V_BITS16_DEFINED
); break;
5431 case Ity_I8
: c
= IRConst_U8 (V_BITS8_DEFINED
); break;
5432 default: VG_(tool_panic
)("memcheck:do_shadow_Store(LE)");
5434 vdata
= IRExpr_Const( c
);
5437 /* First, emit a definedness test for the address. This also sets
5438 the address (shadow) to 'defined' following the test. Both of
5439 those actions are gated on |guard|. */
5440 complainIfUndefined( mce
, addr
, guard
);
5442 /* Now decide which helper function to call to write the data V
5443 bits into shadow memory. */
5444 if (end
== Iend_LE
) {
5446 case Ity_V256
: /* we'll use the helper four times */
5447 case Ity_V128
: /* we'll use the helper twice */
5448 case Ity_I64
: helper
= &MC_(helperc_STOREV64le
);
5449 hname
= "MC_(helperc_STOREV64le)";
5451 case Ity_I32
: helper
= &MC_(helperc_STOREV32le
);
5452 hname
= "MC_(helperc_STOREV32le)";
5454 case Ity_I16
: helper
= &MC_(helperc_STOREV16le
);
5455 hname
= "MC_(helperc_STOREV16le)";
5457 case Ity_I8
: helper
= &MC_(helperc_STOREV8
);
5458 hname
= "MC_(helperc_STOREV8)";
5460 default: VG_(tool_panic
)("memcheck:do_shadow_Store(LE)");
5464 case Ity_V128
: /* we'll use the helper twice */
5465 case Ity_I64
: helper
= &MC_(helperc_STOREV64be
);
5466 hname
= "MC_(helperc_STOREV64be)";
5468 case Ity_I32
: helper
= &MC_(helperc_STOREV32be
);
5469 hname
= "MC_(helperc_STOREV32be)";
5471 case Ity_I16
: helper
= &MC_(helperc_STOREV16be
);
5472 hname
= "MC_(helperc_STOREV16be)";
5474 case Ity_I8
: helper
= &MC_(helperc_STOREV8
);
5475 hname
= "MC_(helperc_STOREV8)";
5477 /* Note, no V256 case here, because no big-endian target that
5478 we support, has 256 vectors. */
5479 default: VG_(tool_panic
)("memcheck:do_shadow_Store(BE)");
5483 if (UNLIKELY(ty
== Ity_V256
)) {
5485 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
5486 Q3 being the most significant lane. */
5487 /* These are the offsets of the Qs in memory. */
5488 Int offQ0
, offQ1
, offQ2
, offQ3
;
5490 /* Various bits for constructing the 4 lane helper calls */
5491 IRDirty
*diQ0
, *diQ1
, *diQ2
, *diQ3
;
5492 IRAtom
*addrQ0
, *addrQ1
, *addrQ2
, *addrQ3
;
5493 IRAtom
*vdataQ0
, *vdataQ1
, *vdataQ2
, *vdataQ3
;
5494 IRAtom
*eBiasQ0
, *eBiasQ1
, *eBiasQ2
, *eBiasQ3
;
5496 if (end
== Iend_LE
) {
5497 offQ0
= 0; offQ1
= 8; offQ2
= 16; offQ3
= 24;
5499 offQ3
= 0; offQ2
= 8; offQ1
= 16; offQ0
= 24;
5502 eBiasQ0
= tyAddr
==Ity_I32
? mkU32(bias
+offQ0
) : mkU64(bias
+offQ0
);
5503 addrQ0
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ0
) );
5504 vdataQ0
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_0
, vdata
));
5505 diQ0
= unsafeIRDirty_0_N(
5507 hname
, VG_(fnptr_to_fnentry
)( helper
),
5508 mkIRExprVec_2( addrQ0
, vdataQ0
)
5511 eBiasQ1
= tyAddr
==Ity_I32
? mkU32(bias
+offQ1
) : mkU64(bias
+offQ1
);
5512 addrQ1
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ1
) );
5513 vdataQ1
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_1
, vdata
));
5514 diQ1
= unsafeIRDirty_0_N(
5516 hname
, VG_(fnptr_to_fnentry
)( helper
),
5517 mkIRExprVec_2( addrQ1
, vdataQ1
)
5520 eBiasQ2
= tyAddr
==Ity_I32
? mkU32(bias
+offQ2
) : mkU64(bias
+offQ2
);
5521 addrQ2
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ2
) );
5522 vdataQ2
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_2
, vdata
));
5523 diQ2
= unsafeIRDirty_0_N(
5525 hname
, VG_(fnptr_to_fnentry
)( helper
),
5526 mkIRExprVec_2( addrQ2
, vdataQ2
)
5529 eBiasQ3
= tyAddr
==Ity_I32
? mkU32(bias
+offQ3
) : mkU64(bias
+offQ3
);
5530 addrQ3
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ3
) );
5531 vdataQ3
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_3
, vdata
));
5532 diQ3
= unsafeIRDirty_0_N(
5534 hname
, VG_(fnptr_to_fnentry
)( helper
),
5535 mkIRExprVec_2( addrQ3
, vdataQ3
)
5539 diQ0
->guard
= diQ1
->guard
= diQ2
->guard
= diQ3
->guard
= guard
;
5541 setHelperAnns( mce
, diQ0
);
5542 setHelperAnns( mce
, diQ1
);
5543 setHelperAnns( mce
, diQ2
);
5544 setHelperAnns( mce
, diQ3
);
5545 stmt( 'V', mce
, IRStmt_Dirty(diQ0
) );
5546 stmt( 'V', mce
, IRStmt_Dirty(diQ1
) );
5547 stmt( 'V', mce
, IRStmt_Dirty(diQ2
) );
5548 stmt( 'V', mce
, IRStmt_Dirty(diQ3
) );
5551 else if (UNLIKELY(ty
== Ity_V128
)) {
5554 /* See comment in next clause re 64-bit regparms */
5555 /* also, need to be careful about endianness */
5557 Int offLo64
, offHi64
;
5558 IRDirty
*diLo64
, *diHi64
;
5559 IRAtom
*addrLo64
, *addrHi64
;
5560 IRAtom
*vdataLo64
, *vdataHi64
;
5561 IRAtom
*eBiasLo64
, *eBiasHi64
;
5563 if (end
== Iend_LE
) {
5571 eBiasLo64
= tyAddr
==Ity_I32
? mkU32(bias
+offLo64
) : mkU64(bias
+offLo64
);
5572 addrLo64
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasLo64
) );
5573 vdataLo64
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, vdata
));
5574 diLo64
= unsafeIRDirty_0_N(
5576 hname
, VG_(fnptr_to_fnentry
)( helper
),
5577 mkIRExprVec_2( addrLo64
, vdataLo64
)
5579 eBiasHi64
= tyAddr
==Ity_I32
? mkU32(bias
+offHi64
) : mkU64(bias
+offHi64
);
5580 addrHi64
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasHi64
) );
5581 vdataHi64
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128HIto64
, vdata
));
5582 diHi64
= unsafeIRDirty_0_N(
5584 hname
, VG_(fnptr_to_fnentry
)( helper
),
5585 mkIRExprVec_2( addrHi64
, vdataHi64
)
5587 if (guard
) diLo64
->guard
= guard
;
5588 if (guard
) diHi64
->guard
= guard
;
5589 setHelperAnns( mce
, diLo64
);
5590 setHelperAnns( mce
, diHi64
);
5591 stmt( 'V', mce
, IRStmt_Dirty(diLo64
) );
5592 stmt( 'V', mce
, IRStmt_Dirty(diHi64
) );
5599 /* 8/16/32/64-bit cases */
5600 /* Generate the actual address into addrAct. */
5604 IRAtom
* eBias
= tyAddr
==Ity_I32
? mkU32(bias
) : mkU64(bias
);
5605 addrAct
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBias
));
5608 if (ty
== Ity_I64
) {
5609 /* We can't do this with regparm 2 on 32-bit platforms, since
5610 the back ends aren't clever enough to handle 64-bit
5611 regparm args. Therefore be different. */
5612 di
= unsafeIRDirty_0_N(
5614 hname
, VG_(fnptr_to_fnentry
)( helper
),
5615 mkIRExprVec_2( addrAct
, vdata
)
5618 di
= unsafeIRDirty_0_N(
5620 hname
, VG_(fnptr_to_fnentry
)( helper
),
5621 mkIRExprVec_2( addrAct
,
5622 zwidenToHostWord( mce
, vdata
))
5625 if (guard
) di
->guard
= guard
;
5626 setHelperAnns( mce
, di
);
5627 stmt( 'V', mce
, IRStmt_Dirty(di
) );
5633 /* Do lazy pessimistic propagation through a dirty helper call, by
5634 looking at the annotations on it. This is the most complex part of
5637 static IRType
szToITy ( Int n
)
5640 case 1: return Ity_I8
;
5641 case 2: return Ity_I16
;
5642 case 4: return Ity_I32
;
5643 case 8: return Ity_I64
;
5644 default: VG_(tool_panic
)("szToITy(memcheck)");
5649 void do_shadow_Dirty ( MCEnv
* mce
, IRDirty
* d
)
5651 Int i
, k
, n
, toDo
, gSz
, gOff
;
5652 IRAtom
*src
, *here
, *curr
;
5653 IRType tySrc
, tyDst
;
5657 /* What's the native endianness? We need to know this. */
5658 # if defined(VG_BIGENDIAN)
5660 # elif defined(VG_LITTLEENDIAN)
5663 # error "Unknown endianness"
5666 /* First check the guard. */
5667 complainIfUndefined(mce
, d
->guard
, NULL
);
5669 /* Now round up all inputs and PCast over them. */
5670 curr
= definedOfType(Ity_I32
);
5672 /* Inputs: unmasked args
5673 Note: arguments are evaluated REGARDLESS of the guard expression */
5674 for (i
= 0; d
->args
[i
]; i
++) {
5675 IRAtom
* arg
= d
->args
[i
];
5676 if ( (d
->cee
->mcx_mask
& (1<<i
))
5677 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg
)) ) {
5678 /* ignore this arg */
5680 here
= mkPCastTo( mce
, Ity_I32
, expr2vbits(mce
, arg
, HuOth
) );
5681 curr
= mkUifU32(mce
, here
, curr
);
5685 /* Inputs: guest state that we read. */
5686 for (i
= 0; i
< d
->nFxState
; i
++) {
5687 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
5688 if (d
->fxState
[i
].fx
== Ifx_Write
)
5691 /* Enumerate the described state segments */
5692 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
5693 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
5694 gSz
= d
->fxState
[i
].size
;
5696 /* Ignore any sections marked as 'always defined'. */
5697 if (isAlwaysDefd(mce
, gOff
, gSz
)) {
5699 VG_(printf
)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5704 /* This state element is read or modified. So we need to
5705 consider it. If larger than 8 bytes, deal with it in
5708 tl_assert(gSz
>= 0);
5709 if (gSz
== 0) break;
5710 n
= gSz
<= 8 ? gSz
: 8;
5711 /* update 'curr' with UifU of the state slice
5713 tySrc
= szToITy( n
);
5715 /* Observe the guard expression. If it is false use an
5716 all-bits-defined bit pattern */
5717 IRAtom
*cond
, *iffalse
, *iftrue
;
5719 cond
= assignNew('V', mce
, Ity_I1
, d
->guard
);
5720 iftrue
= assignNew('V', mce
, tySrc
, shadow_GET(mce
, gOff
, tySrc
));
5721 iffalse
= assignNew('V', mce
, tySrc
, definedOfType(tySrc
));
5722 src
= assignNew('V', mce
, tySrc
,
5723 IRExpr_ITE(cond
, iftrue
, iffalse
));
5725 here
= mkPCastTo( mce
, Ity_I32
, src
);
5726 curr
= mkUifU32(mce
, here
, curr
);
5733 /* Inputs: memory. First set up some info needed regardless of
5734 whether we're doing reads or writes. */
5736 if (d
->mFx
!= Ifx_None
) {
5737 /* Because we may do multiple shadow loads/stores from the same
5738 base address, it's best to do a single test of its
5739 definedness right now. Post-instrumentation optimisation
5740 should remove all but this test. */
5742 tl_assert(d
->mAddr
);
5743 complainIfUndefined(mce
, d
->mAddr
, d
->guard
);
5745 tyAddr
= typeOfIRExpr(mce
->sb
->tyenv
, d
->mAddr
);
5746 tl_assert(tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
5747 tl_assert(tyAddr
== mce
->hWordTy
); /* not really right */
5750 /* Deal with memory inputs (reads or modifies) */
5751 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
) {
5753 /* chew off 32-bit chunks. We don't care about the endianness
5754 since it's all going to be condensed down to a single bit,
5755 but nevertheless choose an endianness which is hopefully
5756 native to the platform. */
5760 expr2vbits_Load_guarded_Simple(
5761 mce
, end
, Ity_I32
, d
->mAddr
, d
->mSize
- toDo
, d
->guard
)
5763 curr
= mkUifU32(mce
, here
, curr
);
5766 /* chew off 16-bit chunks */
5770 expr2vbits_Load_guarded_Simple(
5771 mce
, end
, Ity_I16
, d
->mAddr
, d
->mSize
- toDo
, d
->guard
)
5773 curr
= mkUifU32(mce
, here
, curr
);
5776 /* chew off the remaining 8-bit chunk, if any */
5780 expr2vbits_Load_guarded_Simple(
5781 mce
, end
, Ity_I8
, d
->mAddr
, d
->mSize
- toDo
, d
->guard
)
5783 curr
= mkUifU32(mce
, here
, curr
);
5786 tl_assert(toDo
== 0);
5789 /* Whew! So curr is a 32-bit V-value summarising pessimistically
5790 all the inputs to the helper. Now we need to re-distribute the
5791 results to all destinations. */
5793 /* Outputs: the destination temporary, if there is one. */
5794 if (d
->tmp
!= IRTemp_INVALID
) {
5795 dst
= findShadowTmpV(mce
, d
->tmp
);
5796 tyDst
= typeOfIRTemp(mce
->sb
->tyenv
, d
->tmp
);
5797 assign( 'V', mce
, dst
, mkPCastTo( mce
, tyDst
, curr
) );
5800 /* Outputs: guest state that we write or modify. */
5801 for (i
= 0; i
< d
->nFxState
; i
++) {
5802 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
5803 if (d
->fxState
[i
].fx
== Ifx_Read
)
5806 /* Enumerate the described state segments */
5807 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
5808 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
5809 gSz
= d
->fxState
[i
].size
;
5811 /* Ignore any sections marked as 'always defined'. */
5812 if (isAlwaysDefd(mce
, gOff
, gSz
))
5815 /* This state element is written or modified. So we need to
5816 consider it. If larger than 8 bytes, deal with it in
5819 tl_assert(gSz
>= 0);
5820 if (gSz
== 0) break;
5821 n
= gSz
<= 8 ? gSz
: 8;
5822 /* Write suitably-casted 'curr' to the state slice
5824 tyDst
= szToITy( n
);
5825 do_shadow_PUT( mce
, gOff
,
5826 NULL
, /* original atom */
5827 mkPCastTo( mce
, tyDst
, curr
), d
->guard
);
5834 /* Outputs: memory that we write or modify. Same comments about
5835 endianness as above apply. */
5836 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
) {
5838 /* chew off 32-bit chunks */
5840 do_shadow_Store( mce
, end
, d
->mAddr
, d
->mSize
- toDo
,
5841 NULL
, /* original data */
5842 mkPCastTo( mce
, Ity_I32
, curr
),
5846 /* chew off 16-bit chunks */
5848 do_shadow_Store( mce
, end
, d
->mAddr
, d
->mSize
- toDo
,
5849 NULL
, /* original data */
5850 mkPCastTo( mce
, Ity_I16
, curr
),
5854 /* chew off the remaining 8-bit chunk, if any */
5856 do_shadow_Store( mce
, end
, d
->mAddr
, d
->mSize
- toDo
,
5857 NULL
, /* original data */
5858 mkPCastTo( mce
, Ity_I8
, curr
),
5862 tl_assert(toDo
== 0);
5868 /* We have an ABI hint telling us that [base .. base+len-1] is to
5869 become undefined ("writable"). Generate code to call a helper to
5870 notify the A/V bit machinery of this fact.
5873 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5877 void do_AbiHint ( MCEnv
* mce
, IRExpr
* base
, Int len
, IRExpr
* nia
)
5881 if (MC_(clo_mc_level
) == 3) {
5882 di
= unsafeIRDirty_0_N(
5884 "MC_(helperc_MAKE_STACK_UNINIT_w_o)",
5885 VG_(fnptr_to_fnentry
)( &MC_(helperc_MAKE_STACK_UNINIT_w_o
) ),
5886 mkIRExprVec_3( base
, mkIRExpr_HWord( (UInt
)len
), nia
)
5889 /* We ignore the supplied nia, since it is irrelevant. */
5890 tl_assert(MC_(clo_mc_level
) == 2 || MC_(clo_mc_level
) == 1);
5891 /* Special-case the len==128 case, since that is for amd64-ELF,
5892 which is a very common target. */
5894 di
= unsafeIRDirty_0_N(
5896 "MC_(helperc_MAKE_STACK_UNINIT_128_no_o)",
5897 VG_(fnptr_to_fnentry
)( &MC_(helperc_MAKE_STACK_UNINIT_128_no_o
)),
5898 mkIRExprVec_1( base
)
5901 di
= unsafeIRDirty_0_N(
5903 "MC_(helperc_MAKE_STACK_UNINIT_no_o)",
5904 VG_(fnptr_to_fnentry
)( &MC_(helperc_MAKE_STACK_UNINIT_no_o
) ),
5905 mkIRExprVec_2( base
, mkIRExpr_HWord( (UInt
)len
) )
5910 stmt( 'V', mce
, IRStmt_Dirty(di
) );
5914 /* ------ Dealing with IRCAS (big and complex) ------ */
5917 static IRAtom
* gen_load_b ( MCEnv
* mce
, Int szB
,
5918 IRAtom
* baseaddr
, Int offset
);
5919 static IRAtom
* gen_maxU32 ( MCEnv
* mce
, IRAtom
* b1
, IRAtom
* b2
);
5920 static void gen_store_b ( MCEnv
* mce
, Int szB
,
5921 IRAtom
* baseaddr
, Int offset
, IRAtom
* dataB
,
5924 static void do_shadow_CAS_single ( MCEnv
* mce
, IRCAS
* cas
);
5925 static void do_shadow_CAS_double ( MCEnv
* mce
, IRCAS
* cas
);
5928 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5929 IRExpr.Consts, else this asserts. If they are both Consts, it
5930 doesn't do anything. So that just leaves the RdTmp case.
5932 In which case: this assigns the shadow value SHADOW to the IR
5933 shadow temporary associated with ORIG. That is, ORIG, being an
5934 original temporary, will have a shadow temporary associated with
5935 it. However, in the case envisaged here, there will so far have
5936 been no IR emitted to actually write a shadow value into that
5937 temporary. What this routine does is to (emit IR to) copy the
5938 value in SHADOW into said temporary, so that after this call,
5939 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5942 Point is to allow callers to compute "by hand" a shadow value for
5943 ORIG, and force it to be associated with ORIG.
5945 How do we know that that shadow associated with ORIG has not so far
5946 been assigned to? Well, we don't per se know that, but supposing
5947 it had. Then this routine would create a second assignment to it,
5948 and later the IR sanity checker would barf. But that never
5951 static void bind_shadow_tmp_to_orig ( UChar how
,
5953 IRAtom
* orig
, IRAtom
* shadow
)
5955 tl_assert(isOriginalAtom(mce
, orig
));
5956 tl_assert(isShadowAtom(mce
, shadow
));
5957 switch (orig
->tag
) {
5959 tl_assert(shadow
->tag
== Iex_Const
);
5962 tl_assert(shadow
->tag
== Iex_RdTmp
);
5964 assign('V', mce
, findShadowTmpV(mce
,orig
->Iex
.RdTmp
.tmp
),
5967 tl_assert(how
== 'B');
5968 assign('B', mce
, findShadowTmpB(mce
,orig
->Iex
.RdTmp
.tmp
),
5979 void do_shadow_CAS ( MCEnv
* mce
, IRCAS
* cas
)
5981 /* Scheme is (both single- and double- cases):
5983 1. fetch data#,dataB (the proposed new value)
5985 2. fetch expd#,expdB (what we expect to see at the address)
5987 3. check definedness of address
5989 4. load old#,oldB from shadow memory; this also checks
5990 addressibility of the address
5994 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
5996 7. if "expected == old" (as computed by (6))
5997 store data#,dataB to shadow memory
5999 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
6000 'data' but 7 stores 'data#'. Hence it is possible for the
6001 shadow data to be incorrectly checked and/or updated:
6003 * 7 is at least gated correctly, since the 'expected == old'
6004 condition is derived from outputs of 5. However, the shadow
6005 write could happen too late: imagine after 5 we are
6006 descheduled, a different thread runs, writes a different
6007 (shadow) value at the address, and then we resume, hence
6008 overwriting the shadow value written by the other thread.
6010 Because the original memory access is atomic, there's no way to
6011 make both the original and shadow accesses into a single atomic
6012 thing, hence this is unavoidable.
6014 At least as Valgrind stands, I don't think it's a problem, since
6015 we're single threaded *and* we guarantee that there are no
6016 context switches during the execution of any specific superblock
6017 -- context switches can only happen at superblock boundaries.
6019 If Valgrind ever becomes MT in the future, then it might be more
6020 of a problem. A possible kludge would be to artificially
6021 associate with the location, a lock, which we must acquire and
6022 release around the transaction as a whole. Hmm, that probably
6023 would't work properly since it only guards us against other
6024 threads doing CASs on the same location, not against other
6025 threads doing normal reads and writes.
6027 ------------------------------------------------------------
6029 COMMENT_ON_CasCmpEQ:
6031 Note two things. Firstly, in the sequence above, we compute
6032 "expected == old", but we don't check definedness of it. Why
6033 not? Also, the x86 and amd64 front ends use
6034 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
6035 determination (expected == old ?) for themselves, and we also
6036 don't check definedness for those primops; we just say that the
6037 result is defined. Why? Details follow.
6039 x86/amd64 contains various forms of locked insns:
6040 * lock prefix before all basic arithmetic insn;
6041 eg lock xorl %reg1,(%reg2)
6042 * atomic exchange reg-mem
6045 Rather than attempt to represent them all, which would be a
6046 royal PITA, I used a result from Maurice Herlihy
6047 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
6048 demonstrates that compare-and-swap is a primitive more general
6049 than the other two, and so can be used to represent all of them.
6050 So the translation scheme for (eg) lock incl (%reg) is as
6056 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
6058 The "atomically" is the CAS bit. The scheme is always the same:
6059 get old value from memory, compute new value, atomically stuff
6060 new value back in memory iff the old value has not changed (iow,
6061 no other thread modified it in the meantime). If it has changed
6062 then we've been out-raced and we have to start over.
6064 Now that's all very neat, but it has the bad side effect of
6065 introducing an explicit equality test into the translation.
6066 Consider the behaviour of said code on a memory location which
6067 is uninitialised. We will wind up doing a comparison on
6068 uninitialised data, and mc duly complains.
6070 What's difficult about this is, the common case is that the
6071 location is uncontended, and so we're usually comparing the same
6072 value (* %reg) with itself. So we shouldn't complain even if it
6073 is undefined. But mc doesn't know that.
6075 My solution is to mark the == in the IR specially, so as to tell
6076 mc that it almost certainly compares a value with itself, and we
6077 should just regard the result as always defined. Rather than
6078 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
6079 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
6081 So there's always the question of, can this give a false
6082 negative? eg, imagine that initially, * %reg is defined; and we
6083 read that; but then in the gap between the read and the CAS, a
6084 different thread writes an undefined (and different) value at
6085 the location. Then the CAS in this thread will fail and we will
6086 go back to "again:", but without knowing that the trip back
6087 there was based on an undefined comparison. No matter; at least
6088 the other thread won the race and the location is correctly
6089 marked as undefined. What if it wrote an uninitialised version
6090 of the same value that was there originally, though?
6092 etc etc. Seems like there's a small corner case in which we
6093 might lose the fact that something's defined -- we're out-raced
6094 in between the "old = * reg" and the "atomically {", _and_ the
6095 other thread is writing in an undefined version of what's
6096 already there. Well, that seems pretty unlikely.
6100 If we ever need to reinstate it .. code which generates a
6101 definedness test for "expected == old" was removed at r10432 of
6104 if (cas
->oldHi
== IRTemp_INVALID
) {
6105 do_shadow_CAS_single( mce
, cas
);
6107 do_shadow_CAS_double( mce
, cas
);
6112 static void do_shadow_CAS_single ( MCEnv
* mce
, IRCAS
* cas
)
6114 IRAtom
*vdataLo
= NULL
, *bdataLo
= NULL
;
6115 IRAtom
*vexpdLo
= NULL
, *bexpdLo
= NULL
;
6116 IRAtom
*voldLo
= NULL
, *boldLo
= NULL
;
6117 IRAtom
*expd_eq_old
= NULL
;
6121 Bool otrak
= MC_(clo_mc_level
) >= 3; /* a shorthand */
6124 tl_assert(cas
->oldHi
== IRTemp_INVALID
);
6125 tl_assert(cas
->expdHi
== NULL
);
6126 tl_assert(cas
->dataHi
== NULL
);
6128 elemTy
= typeOfIRExpr(mce
->sb
->tyenv
, cas
->expdLo
);
6130 case Ity_I8
: elemSzB
= 1; opCasCmpEQ
= Iop_CasCmpEQ8
; break;
6131 case Ity_I16
: elemSzB
= 2; opCasCmpEQ
= Iop_CasCmpEQ16
; break;
6132 case Ity_I32
: elemSzB
= 4; opCasCmpEQ
= Iop_CasCmpEQ32
; break;
6133 case Ity_I64
: elemSzB
= 8; opCasCmpEQ
= Iop_CasCmpEQ64
; break;
6134 default: tl_assert(0); /* IR defn disallows any other types */
6137 /* 1. fetch data# (the proposed new value) */
6138 tl_assert(isOriginalAtom(mce
, cas
->dataLo
));
6140 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->dataLo
, HuOth
));
6141 tl_assert(isShadowAtom(mce
, vdataLo
));
6144 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->dataLo
));
6145 tl_assert(isShadowAtom(mce
, bdataLo
));
6148 /* 2. fetch expected# (what we expect to see at the address) */
6149 tl_assert(isOriginalAtom(mce
, cas
->expdLo
));
6151 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->expdLo
, HuOth
));
6152 tl_assert(isShadowAtom(mce
, vexpdLo
));
6155 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->expdLo
));
6156 tl_assert(isShadowAtom(mce
, bexpdLo
));
6159 /* 3. check definedness of address */
6160 /* 4. fetch old# from shadow memory; this also checks
6161 addressibility of the address */
6167 cas
->end
, elemTy
, cas
->addr
, 0/*Addr bias*/,
6168 NULL
/*always happens*/
6170 bind_shadow_tmp_to_orig('V', mce
, mkexpr(cas
->oldLo
), voldLo
);
6173 = assignNew('B', mce
, Ity_I32
,
6174 gen_load_b(mce
, elemSzB
, cas
->addr
, 0/*addr bias*/));
6175 bind_shadow_tmp_to_orig('B', mce
, mkexpr(cas
->oldLo
), boldLo
);
6178 /* 5. the CAS itself */
6179 stmt( 'C', mce
, IRStmt_CAS(cas
) );
6181 /* 6. compute "expected == old" */
6182 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6183 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6184 tree, but it's not copied from the input block. */
6186 = assignNew('C', mce
, Ity_I1
,
6187 binop(opCasCmpEQ
, cas
->expdLo
, mkexpr(cas
->oldLo
)));
6189 /* 7. if "expected == old"
6190 store data# to shadow memory */
6191 do_shadow_Store( mce
, cas
->end
, cas
->addr
, 0/*bias*/,
6192 NULL
/*data*/, vdataLo
/*vdata*/,
6193 expd_eq_old
/*guard for store*/ );
6195 gen_store_b( mce
, elemSzB
, cas
->addr
, 0/*offset*/,
6197 expd_eq_old
/*guard for store*/ );
6202 static void do_shadow_CAS_double ( MCEnv
* mce
, IRCAS
* cas
)
6204 IRAtom
*vdataHi
= NULL
, *bdataHi
= NULL
;
6205 IRAtom
*vdataLo
= NULL
, *bdataLo
= NULL
;
6206 IRAtom
*vexpdHi
= NULL
, *bexpdHi
= NULL
;
6207 IRAtom
*vexpdLo
= NULL
, *bexpdLo
= NULL
;
6208 IRAtom
*voldHi
= NULL
, *boldHi
= NULL
;
6209 IRAtom
*voldLo
= NULL
, *boldLo
= NULL
;
6210 IRAtom
*xHi
= NULL
, *xLo
= NULL
, *xHL
= NULL
;
6211 IRAtom
*expd_eq_old
= NULL
, *zero
= NULL
;
6212 IROp opCasCmpEQ
, opOr
, opXor
;
6213 Int elemSzB
, memOffsLo
, memOffsHi
;
6215 Bool otrak
= MC_(clo_mc_level
) >= 3; /* a shorthand */
6218 tl_assert(cas
->oldHi
!= IRTemp_INVALID
);
6219 tl_assert(cas
->expdHi
!= NULL
);
6220 tl_assert(cas
->dataHi
!= NULL
);
6222 elemTy
= typeOfIRExpr(mce
->sb
->tyenv
, cas
->expdLo
);
6225 opCasCmpEQ
= Iop_CasCmpEQ8
; opOr
= Iop_Or8
; opXor
= Iop_Xor8
;
6226 elemSzB
= 1; zero
= mkU8(0);
6229 opCasCmpEQ
= Iop_CasCmpEQ16
; opOr
= Iop_Or16
; opXor
= Iop_Xor16
;
6230 elemSzB
= 2; zero
= mkU16(0);
6233 opCasCmpEQ
= Iop_CasCmpEQ32
; opOr
= Iop_Or32
; opXor
= Iop_Xor32
;
6234 elemSzB
= 4; zero
= mkU32(0);
6237 opCasCmpEQ
= Iop_CasCmpEQ64
; opOr
= Iop_Or64
; opXor
= Iop_Xor64
;
6238 elemSzB
= 8; zero
= mkU64(0);
6241 tl_assert(0); /* IR defn disallows any other types */
6244 /* 1. fetch data# (the proposed new value) */
6245 tl_assert(isOriginalAtom(mce
, cas
->dataHi
));
6246 tl_assert(isOriginalAtom(mce
, cas
->dataLo
));
6248 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->dataHi
, HuOth
));
6250 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->dataLo
, HuOth
));
6251 tl_assert(isShadowAtom(mce
, vdataHi
));
6252 tl_assert(isShadowAtom(mce
, vdataLo
));
6255 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->dataHi
));
6257 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->dataLo
));
6258 tl_assert(isShadowAtom(mce
, bdataHi
));
6259 tl_assert(isShadowAtom(mce
, bdataLo
));
6262 /* 2. fetch expected# (what we expect to see at the address) */
6263 tl_assert(isOriginalAtom(mce
, cas
->expdHi
));
6264 tl_assert(isOriginalAtom(mce
, cas
->expdLo
));
6266 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->expdHi
, HuOth
));
6268 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->expdLo
, HuOth
));
6269 tl_assert(isShadowAtom(mce
, vexpdHi
));
6270 tl_assert(isShadowAtom(mce
, vexpdLo
));
6273 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->expdHi
));
6275 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->expdLo
));
6276 tl_assert(isShadowAtom(mce
, bexpdHi
));
6277 tl_assert(isShadowAtom(mce
, bexpdLo
));
6280 /* 3. check definedness of address */
6281 /* 4. fetch old# from shadow memory; this also checks
6282 addressibility of the address */
6283 if (cas
->end
== Iend_LE
) {
6285 memOffsHi
= elemSzB
;
6287 tl_assert(cas
->end
== Iend_BE
);
6288 memOffsLo
= elemSzB
;
6296 cas
->end
, elemTy
, cas
->addr
, memOffsHi
/*Addr bias*/,
6297 NULL
/*always happens*/
6304 cas
->end
, elemTy
, cas
->addr
, memOffsLo
/*Addr bias*/,
6305 NULL
/*always happens*/
6307 bind_shadow_tmp_to_orig('V', mce
, mkexpr(cas
->oldHi
), voldHi
);
6308 bind_shadow_tmp_to_orig('V', mce
, mkexpr(cas
->oldLo
), voldLo
);
6311 = assignNew('B', mce
, Ity_I32
,
6312 gen_load_b(mce
, elemSzB
, cas
->addr
,
6313 memOffsHi
/*addr bias*/));
6315 = assignNew('B', mce
, Ity_I32
,
6316 gen_load_b(mce
, elemSzB
, cas
->addr
,
6317 memOffsLo
/*addr bias*/));
6318 bind_shadow_tmp_to_orig('B', mce
, mkexpr(cas
->oldHi
), boldHi
);
6319 bind_shadow_tmp_to_orig('B', mce
, mkexpr(cas
->oldLo
), boldLo
);
6322 /* 5. the CAS itself */
6323 stmt( 'C', mce
, IRStmt_CAS(cas
) );
6325 /* 6. compute "expected == old" */
6326 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6327 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6328 tree, but it's not copied from the input block. */
6330 xHi = oldHi ^ expdHi;
6331 xLo = oldLo ^ expdLo;
6333 expd_eq_old = xHL == 0;
6335 xHi
= assignNew('C', mce
, elemTy
,
6336 binop(opXor
, cas
->expdHi
, mkexpr(cas
->oldHi
)));
6337 xLo
= assignNew('C', mce
, elemTy
,
6338 binop(opXor
, cas
->expdLo
, mkexpr(cas
->oldLo
)));
6339 xHL
= assignNew('C', mce
, elemTy
,
6340 binop(opOr
, xHi
, xLo
));
6342 = assignNew('C', mce
, Ity_I1
,
6343 binop(opCasCmpEQ
, xHL
, zero
));
6345 /* 7. if "expected == old"
6346 store data# to shadow memory */
6347 do_shadow_Store( mce
, cas
->end
, cas
->addr
, memOffsHi
/*bias*/,
6348 NULL
/*data*/, vdataHi
/*vdata*/,
6349 expd_eq_old
/*guard for store*/ );
6350 do_shadow_Store( mce
, cas
->end
, cas
->addr
, memOffsLo
/*bias*/,
6351 NULL
/*data*/, vdataLo
/*vdata*/,
6352 expd_eq_old
/*guard for store*/ );
6354 gen_store_b( mce
, elemSzB
, cas
->addr
, memOffsHi
/*offset*/,
6356 expd_eq_old
/*guard for store*/ );
6357 gen_store_b( mce
, elemSzB
, cas
->addr
, memOffsLo
/*offset*/,
6359 expd_eq_old
/*guard for store*/ );
6364 /* ------ Dealing with LL/SC (not difficult) ------ */
6366 static void do_shadow_LLSC ( MCEnv
* mce
,
6370 IRExpr
* stStoredata
)
6372 /* In short: treat a load-linked like a normal load followed by an
6373 assignment of the loaded (shadow) data to the result temporary.
6374 Treat a store-conditional like a normal store, and mark the
6375 result temporary as defined. */
6376 IRType resTy
= typeOfIRTemp(mce
->sb
->tyenv
, stResult
);
6377 IRTemp resTmp
= findShadowTmpV(mce
, stResult
);
6379 tl_assert(isIRAtom(stAddr
));
6381 tl_assert(isIRAtom(stStoredata
));
6383 if (stStoredata
== NULL
) {
6385 /* Just treat this as a normal load, followed by an assignment of
6386 the value to .result. */
6388 tl_assert(resTy
== Ity_I64
|| resTy
== Ity_I32
6389 || resTy
== Ity_I16
|| resTy
== Ity_I8
);
6390 assign( 'V', mce
, resTmp
,
6392 mce
, stEnd
, resTy
, stAddr
, 0/*addr bias*/,
6393 NULL
/*always happens*/) );
6395 /* Store Conditional */
6397 IRType dataTy
= typeOfIRExpr(mce
->sb
->tyenv
,
6399 tl_assert(dataTy
== Ity_I64
|| dataTy
== Ity_I32
6400 || dataTy
== Ity_I16
|| dataTy
== Ity_I8
);
6401 do_shadow_Store( mce
, stEnd
,
6402 stAddr
, 0/* addr bias */,
6404 NULL
/* shadow data */,
6406 /* This is a store conditional, so it writes to .result a value
6407 indicating whether or not the store succeeded. Just claim
6408 this value is always defined. In the PowerPC interpretation
6409 of store-conditional, definedness of the success indication
6410 depends on whether the address of the store matches the
6411 reservation address. But we can't tell that here (and
6412 anyway, we're not being PowerPC-specific). At least we are
6413 guaranteed that the definedness of the store address, and its
6414 addressibility, will be checked as per normal. So it seems
6415 pretty safe to just say that the success indication is always
6418 In schemeS, for origin tracking, we must correspondingly set
6419 a no-origin value for the origin shadow of .result.
6421 tl_assert(resTy
== Ity_I1
);
6422 assign( 'V', mce
, resTmp
, definedOfType(resTy
) );
6427 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6429 static void do_shadow_StoreG ( MCEnv
* mce
, IRStoreG
* sg
)
6431 complainIfUndefined(mce
, sg
->guard
, NULL
);
6432 /* do_shadow_Store will generate code to check the definedness and
6433 validity of sg->addr, in the case where sg->guard evaluates to
6434 True at run-time. */
6435 do_shadow_Store( mce
, sg
->end
,
6436 sg
->addr
, 0/* addr bias */,
6438 NULL
/* shadow data */,
6442 static void do_shadow_LoadG ( MCEnv
* mce
, IRLoadG
* lg
)
6444 complainIfUndefined(mce
, lg
->guard
, NULL
);
6445 /* expr2vbits_Load_guarded_General will generate code to check the
6446 definedness and validity of lg->addr, in the case where
6447 lg->guard evaluates to True at run-time. */
6449 /* Look at the LoadG's built-in conversion operation, to determine
6450 the source (actual loaded data) type, and the equivalent IROp.
6451 NOTE that implicitly we are taking a widening operation to be
6452 applied to original atoms and producing one that applies to V
6453 bits. Since signed and unsigned widening are self-shadowing,
6454 this is a straight copy of the op (modulo swapping from the
6455 IRLoadGOp form to the IROp form). Note also therefore that this
6456 implicitly duplicates the logic to do with said widening ops in
6457 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
6458 IROp vwiden
= Iop_INVALID
;
6459 IRType loadedTy
= Ity_INVALID
;
6461 case ILGop_IdentV128
: loadedTy
= Ity_V128
; vwiden
= Iop_INVALID
; break;
6462 case ILGop_Ident64
: loadedTy
= Ity_I64
; vwiden
= Iop_INVALID
; break;
6463 case ILGop_Ident32
: loadedTy
= Ity_I32
; vwiden
= Iop_INVALID
; break;
6464 case ILGop_16Uto32
: loadedTy
= Ity_I16
; vwiden
= Iop_16Uto32
; break;
6465 case ILGop_16Sto32
: loadedTy
= Ity_I16
; vwiden
= Iop_16Sto32
; break;
6466 case ILGop_8Uto32
: loadedTy
= Ity_I8
; vwiden
= Iop_8Uto32
; break;
6467 case ILGop_8Sto32
: loadedTy
= Ity_I8
; vwiden
= Iop_8Sto32
; break;
6468 default: VG_(tool_panic
)("do_shadow_LoadG");
6472 = expr2vbits( mce
, lg
->alt
, HuOth
);
6474 = expr2vbits_Load_guarded_General(mce
, lg
->end
, loadedTy
,
6475 lg
->addr
, 0/*addr bias*/,
6476 lg
->guard
, vwiden
, vbits_alt
);
6477 /* And finally, bind the V bits to the destination temporary. */
6478 assign( 'V', mce
, findShadowTmpV(mce
, lg
->dst
), vbits_final
);
6482 /*------------------------------------------------------------*/
6483 /*--- Origin tracking stuff ---*/
6484 /*------------------------------------------------------------*/
6486 /* Almost identical to findShadowTmpV. */
6487 static IRTemp
findShadowTmpB ( MCEnv
* mce
, IRTemp orig
)
6490 /* VG_(indexXA) range-checks 'orig', hence no need to check
6492 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
6493 tl_assert(ent
->kind
== Orig
);
6494 if (ent
->shadowB
== IRTemp_INVALID
) {
6496 = newTemp( mce
, Ity_I32
, BSh
);
6497 /* newTemp may cause mce->tmpMap to resize, hence previous results
6498 from VG_(indexXA) are invalid. */
6499 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
6500 tl_assert(ent
->kind
== Orig
);
6501 tl_assert(ent
->shadowB
== IRTemp_INVALID
);
6502 ent
->shadowB
= tmpB
;
6504 return ent
->shadowB
;
6507 static IRAtom
* gen_maxU32 ( MCEnv
* mce
, IRAtom
* b1
, IRAtom
* b2
)
6509 return assignNew( 'B', mce
, Ity_I32
, binop(Iop_Max32U
, b1
, b2
) );
6513 /* Make a guarded origin load, with no special handling in the
6514 didn't-happen case. A GUARD of NULL is assumed to mean "always
6517 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6518 return the otag. The loaded size is SZB. If GUARD evaluates to
6519 False at run time then the returned otag is zero.
6521 static IRAtom
* gen_guarded_load_b ( MCEnv
* mce
, Int szB
,
6523 Int offset
, IRExpr
* guard
)
6529 IRType aTy
= typeOfIRExpr( mce
->sb
->tyenv
, baseaddr
);
6530 IROp opAdd
= aTy
== Ity_I32
? Iop_Add32
: Iop_Add64
;
6531 IRAtom
* ea
= baseaddr
;
6533 IRAtom
* off
= aTy
== Ity_I32
? mkU32( offset
)
6534 : mkU64( (Long
)(Int
)offset
);
6535 ea
= assignNew( 'B', mce
, aTy
, binop(opAdd
, ea
, off
));
6537 bTmp
= newTemp(mce
, mce
->hWordTy
, BSh
);
6540 case 1: hFun
= (void*)&MC_(helperc_b_load1
);
6541 hName
= "MC_(helperc_b_load1)";
6543 case 2: hFun
= (void*)&MC_(helperc_b_load2
);
6544 hName
= "MC_(helperc_b_load2)";
6546 case 4: hFun
= (void*)&MC_(helperc_b_load4
);
6547 hName
= "MC_(helperc_b_load4)";
6549 case 8: hFun
= (void*)&MC_(helperc_b_load8
);
6550 hName
= "MC_(helperc_b_load8)";
6552 case 16: hFun
= (void*)&MC_(helperc_b_load16
);
6553 hName
= "MC_(helperc_b_load16)";
6555 case 32: hFun
= (void*)&MC_(helperc_b_load32
);
6556 hName
= "MC_(helperc_b_load32)";
6559 VG_(printf
)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB
);
6562 di
= unsafeIRDirty_1_N(
6563 bTmp
, 1/*regparms*/, hName
, VG_(fnptr_to_fnentry
)( hFun
),
6568 /* Ideally the didn't-happen return value here would be
6569 all-zeroes (unknown-origin), so it'd be harmless if it got
6570 used inadvertently. We slum it out with the IR-mandated
6571 default value (0b01 repeating, 0x55 etc) as that'll probably
6572 trump all legitimate otags via Max32, and it's pretty
6575 /* no need to mess with any annotations. This call accesses
6576 neither guest state nor guest memory. */
6577 stmt( 'B', mce
, IRStmt_Dirty(di
) );
6578 if (mce
->hWordTy
== Ity_I64
) {
6580 IRTemp bTmp32
= newTemp(mce
, Ity_I32
, BSh
);
6581 assign( 'B', mce
, bTmp32
, unop(Iop_64to32
, mkexpr(bTmp
)) );
6582 return mkexpr(bTmp32
);
6585 return mkexpr(bTmp
);
6590 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
6591 loaded size is SZB. The load is regarded as unconditional (always
6594 static IRAtom
* gen_load_b ( MCEnv
* mce
, Int szB
, IRAtom
* baseaddr
,
6597 return gen_guarded_load_b(mce
, szB
, baseaddr
, offset
, NULL
/*guard*/);
6601 /* The most general handler for guarded origin loads. A GUARD of NULL
6602 is assumed to mean "always True".
6604 Generate IR to do a shadow origin load from ADDR+BIAS and return
6605 the B bits. The loaded type is TY. If GUARD evaluates to False at
6606 run time then the returned B bits are simply BALT instead.
6609 IRAtom
* expr2ori_Load_guarded_General ( MCEnv
* mce
,
6611 IRAtom
* addr
, UInt bias
,
6612 IRAtom
* guard
, IRAtom
* balt
)
6614 /* If the guard evaluates to True, this will hold the loaded
6615 origin. If the guard evaluates to False, this will be zero,
6616 meaning "unknown origin", in which case we will have to replace
6617 it using an ITE below. */
6619 = assignNew('B', mce
, Ity_I32
,
6620 gen_guarded_load_b(mce
, sizeofIRType(ty
),
6621 addr
, bias
, guard
));
6622 /* These are the bits we will return if the load doesn't take
6626 /* Prepare the cond for the ITE. Convert a NULL cond into
6627 something that iropt knows how to fold out later. */
6629 = guard
== NULL
? mkU1(1) : guard
;
6630 /* And assemble the final result. */
6631 return assignNew('B', mce
, Ity_I32
, IRExpr_ITE(cond
, iftrue
, iffalse
));
6635 /* Generate a shadow origins store. guard :: Ity_I1 controls whether
6636 the store really happens; NULL means it unconditionally does. */
6637 static void gen_store_b ( MCEnv
* mce
, Int szB
,
6638 IRAtom
* baseaddr
, Int offset
, IRAtom
* dataB
,
6644 IRType aTy
= typeOfIRExpr( mce
->sb
->tyenv
, baseaddr
);
6645 IROp opAdd
= aTy
== Ity_I32
? Iop_Add32
: Iop_Add64
;
6646 IRAtom
* ea
= baseaddr
;
6648 tl_assert(isOriginalAtom(mce
, guard
));
6649 tl_assert(typeOfIRExpr(mce
->sb
->tyenv
, guard
) == Ity_I1
);
6652 IRAtom
* off
= aTy
== Ity_I32
? mkU32( offset
)
6653 : mkU64( (Long
)(Int
)offset
);
6654 ea
= assignNew( 'B', mce
, aTy
, binop(opAdd
, ea
, off
));
6656 if (mce
->hWordTy
== Ity_I64
)
6657 dataB
= assignNew( 'B', mce
, Ity_I64
, unop(Iop_32Uto64
, dataB
));
6660 case 1: hFun
= (void*)&MC_(helperc_b_store1
);
6661 hName
= "MC_(helperc_b_store1)";
6663 case 2: hFun
= (void*)&MC_(helperc_b_store2
);
6664 hName
= "MC_(helperc_b_store2)";
6666 case 4: hFun
= (void*)&MC_(helperc_b_store4
);
6667 hName
= "MC_(helperc_b_store4)";
6669 case 8: hFun
= (void*)&MC_(helperc_b_store8
);
6670 hName
= "MC_(helperc_b_store8)";
6672 case 16: hFun
= (void*)&MC_(helperc_b_store16
);
6673 hName
= "MC_(helperc_b_store16)";
6675 case 32: hFun
= (void*)&MC_(helperc_b_store32
);
6676 hName
= "MC_(helperc_b_store32)";
6681 di
= unsafeIRDirty_0_N( 2/*regparms*/,
6682 hName
, VG_(fnptr_to_fnentry
)( hFun
),
6683 mkIRExprVec_2( ea
, dataB
)
6685 /* no need to mess with any annotations. This call accesses
6686 neither guest state nor guest memory. */
6687 if (guard
) di
->guard
= guard
;
6688 stmt( 'B', mce
, IRStmt_Dirty(di
) );
6691 static IRAtom
* narrowTo32 ( MCEnv
* mce
, IRAtom
* e
) {
6692 IRType eTy
= typeOfIRExpr(mce
->sb
->tyenv
, e
);
6694 return assignNew( 'B', mce
, Ity_I32
, unop(Iop_64to32
, e
) );
6700 static IRAtom
* zWidenFrom32 ( MCEnv
* mce
, IRType dstTy
, IRAtom
* e
) {
6701 IRType eTy
= typeOfIRExpr(mce
->sb
->tyenv
, e
);
6702 tl_assert(eTy
== Ity_I32
);
6703 if (dstTy
== Ity_I64
)
6704 return assignNew( 'B', mce
, Ity_I64
, unop(Iop_32Uto64
, e
) );
6709 static IRAtom
* schemeE ( MCEnv
* mce
, IRExpr
* e
)
6711 tl_assert(MC_(clo_mc_level
) == 3);
6716 IRRegArray
* descr_b
;
6717 IRAtom
*t1
, *t2
, *t3
, *t4
;
6718 IRRegArray
* descr
= e
->Iex
.GetI
.descr
;
6720 = MC_(get_otrack_reg_array_equiv_int_type
)(descr
);
6721 /* If this array is unshadowable for whatever reason, use the
6722 usual approximation. */
6723 if (equivIntTy
== Ity_INVALID
)
6725 tl_assert(sizeofIRType(equivIntTy
) >= 4);
6726 tl_assert(sizeofIRType(equivIntTy
) == sizeofIRType(descr
->elemTy
));
6727 descr_b
= mkIRRegArray( descr
->base
+ 2*mce
->layout
->total_sizeB
,
6728 equivIntTy
, descr
->nElems
);
6729 /* Do a shadow indexed get of the same size, giving t1. Take
6730 the bottom 32 bits of it, giving t2. Compute into t3 the
6731 origin for the index (almost certainly zero, but there's
6732 no harm in being completely general here, since iropt will
6733 remove any useless code), and fold it in, giving a final
6735 t1
= assignNew( 'B', mce
, equivIntTy
,
6736 IRExpr_GetI( descr_b
, e
->Iex
.GetI
.ix
,
6737 e
->Iex
.GetI
.bias
));
6738 t2
= narrowTo32( mce
, t1
);
6739 t3
= schemeE( mce
, e
->Iex
.GetI
.ix
);
6740 t4
= gen_maxU32( mce
, t2
, t3
);
6746 IRExpr
** args
= e
->Iex
.CCall
.args
;
6747 IRAtom
* curr
= mkU32(0);
6748 for (i
= 0; args
[i
]; i
++) {
6750 tl_assert(isOriginalAtom(mce
, args
[i
]));
6751 /* Only take notice of this arg if the callee's
6752 mc-exclusion mask does not say it is to be excluded. */
6753 if (e
->Iex
.CCall
.cee
->mcx_mask
& (1<<i
)) {
6754 /* the arg is to be excluded from definedness checking.
6756 if (0) VG_(printf
)("excluding %s(%d)\n",
6757 e
->Iex
.CCall
.cee
->name
, i
);
6759 /* calculate the arg's definedness, and pessimistically
6761 here
= schemeE( mce
, args
[i
] );
6762 curr
= gen_maxU32( mce
, curr
, here
);
6769 dszB
= sizeofIRType(e
->Iex
.Load
.ty
);
6770 /* assert that the B value for the address is already
6771 available (somewhere) */
6772 tl_assert(isIRAtom(e
->Iex
.Load
.addr
));
6773 tl_assert(mce
->hWordTy
== Ity_I32
|| mce
->hWordTy
== Ity_I64
);
6774 return gen_load_b( mce
, dszB
, e
->Iex
.Load
.addr
, 0 );
6777 IRAtom
* b1
= schemeE( mce
, e
->Iex
.ITE
.cond
);
6778 IRAtom
* b3
= schemeE( mce
, e
->Iex
.ITE
.iftrue
);
6779 IRAtom
* b2
= schemeE( mce
, e
->Iex
.ITE
.iffalse
);
6780 return gen_maxU32( mce
, b1
, gen_maxU32( mce
, b2
, b3
));
6783 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Qop
.details
->arg1
);
6784 IRAtom
* b2
= schemeE( mce
, e
->Iex
.Qop
.details
->arg2
);
6785 IRAtom
* b3
= schemeE( mce
, e
->Iex
.Qop
.details
->arg3
);
6786 IRAtom
* b4
= schemeE( mce
, e
->Iex
.Qop
.details
->arg4
);
6787 return gen_maxU32( mce
, gen_maxU32( mce
, b1
, b2
),
6788 gen_maxU32( mce
, b3
, b4
) );
6791 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Triop
.details
->arg1
);
6792 IRAtom
* b2
= schemeE( mce
, e
->Iex
.Triop
.details
->arg2
);
6793 IRAtom
* b3
= schemeE( mce
, e
->Iex
.Triop
.details
->arg3
);
6794 return gen_maxU32( mce
, b1
, gen_maxU32( mce
, b2
, b3
) );
6797 switch (e
->Iex
.Binop
.op
) {
6798 case Iop_CasCmpEQ8
: case Iop_CasCmpNE8
:
6799 case Iop_CasCmpEQ16
: case Iop_CasCmpNE16
:
6800 case Iop_CasCmpEQ32
: case Iop_CasCmpNE32
:
6801 case Iop_CasCmpEQ64
: case Iop_CasCmpNE64
:
6802 /* Just say these all produce a defined result,
6803 regardless of their arguments. See
6804 COMMENT_ON_CasCmpEQ in this file. */
6807 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Binop
.arg1
);
6808 IRAtom
* b2
= schemeE( mce
, e
->Iex
.Binop
.arg2
);
6809 return gen_maxU32( mce
, b1
, b2
);
6816 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Unop
.arg
);
6822 return mkexpr( findShadowTmpB( mce
, e
->Iex
.RdTmp
.tmp
));
6824 Int b_offset
= MC_(get_otrack_shadow_offset
)(
6826 sizeofIRType(e
->Iex
.Get
.ty
)
6828 tl_assert(b_offset
>= -1
6829 && b_offset
<= mce
->layout
->total_sizeB
-4);
6830 if (b_offset
>= 0) {
6831 /* FIXME: this isn't an atom! */
6832 return IRExpr_Get( b_offset
+ 2*mce
->layout
->total_sizeB
,
6838 VG_(printf
)("mc_translate.c: schemeE: unhandled: ");
6840 VG_(tool_panic
)("memcheck:schemeE");
6845 static void do_origins_Dirty ( MCEnv
* mce
, IRDirty
* d
)
6847 // This is a hacked version of do_shadow_Dirty
6848 Int i
, k
, n
, toDo
, gSz
, gOff
;
6849 IRAtom
*here
, *curr
;
6852 /* First check the guard. */
6853 curr
= schemeE( mce
, d
->guard
);
6855 /* Now round up all inputs and maxU32 over them. */
6857 /* Inputs: unmasked args
6858 Note: arguments are evaluated REGARDLESS of the guard expression */
6859 for (i
= 0; d
->args
[i
]; i
++) {
6860 IRAtom
* arg
= d
->args
[i
];
6861 if ( (d
->cee
->mcx_mask
& (1<<i
))
6862 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg
)) ) {
6863 /* ignore this arg */
6865 here
= schemeE( mce
, arg
);
6866 curr
= gen_maxU32( mce
, curr
, here
);
6870 /* Inputs: guest state that we read. */
6871 for (i
= 0; i
< d
->nFxState
; i
++) {
6872 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
6873 if (d
->fxState
[i
].fx
== Ifx_Write
)
6876 /* Enumerate the described state segments */
6877 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
6878 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
6879 gSz
= d
->fxState
[i
].size
;
6881 /* Ignore any sections marked as 'always defined'. */
6882 if (isAlwaysDefd(mce
, gOff
, gSz
)) {
6884 VG_(printf
)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6889 /* This state element is read or modified. So we need to
6890 consider it. If larger than 4 bytes, deal with it in
6894 tl_assert(gSz
>= 0);
6895 if (gSz
== 0) break;
6896 n
= gSz
<= 4 ? gSz
: 4;
6897 /* update 'curr' with maxU32 of the state slice
6899 b_offset
= MC_(get_otrack_shadow_offset
)(gOff
, 4);
6900 if (b_offset
!= -1) {
6901 /* Observe the guard expression. If it is false use 0, i.e.
6902 nothing is known about the origin */
6903 IRAtom
*cond
, *iffalse
, *iftrue
;
6905 cond
= assignNew( 'B', mce
, Ity_I1
, d
->guard
);
6907 iftrue
= assignNew( 'B', mce
, Ity_I32
,
6909 + 2*mce
->layout
->total_sizeB
,
6911 here
= assignNew( 'B', mce
, Ity_I32
,
6912 IRExpr_ITE(cond
, iftrue
, iffalse
));
6913 curr
= gen_maxU32( mce
, curr
, here
);
6921 /* Inputs: memory */
6923 if (d
->mFx
!= Ifx_None
) {
6924 /* Because we may do multiple shadow loads/stores from the same
6925 base address, it's best to do a single test of its
6926 definedness right now. Post-instrumentation optimisation
6927 should remove all but this test. */
6928 tl_assert(d
->mAddr
);
6929 here
= schemeE( mce
, d
->mAddr
);
6930 curr
= gen_maxU32( mce
, curr
, here
);
6933 /* Deal with memory inputs (reads or modifies) */
6934 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
) {
6936 /* chew off 32-bit chunks. We don't care about the endianness
6937 since it's all going to be condensed down to a single bit,
6938 but nevertheless choose an endianness which is hopefully
6939 native to the platform. */
6941 here
= gen_guarded_load_b( mce
, 4, d
->mAddr
, d
->mSize
- toDo
,
6943 curr
= gen_maxU32( mce
, curr
, here
);
6946 /* handle possible 16-bit excess */
6948 here
= gen_guarded_load_b( mce
, 2, d
->mAddr
, d
->mSize
- toDo
,
6950 curr
= gen_maxU32( mce
, curr
, here
);
6953 /* chew off the remaining 8-bit chunk, if any */
6955 here
= gen_guarded_load_b( mce
, 1, d
->mAddr
, d
->mSize
- toDo
,
6957 curr
= gen_maxU32( mce
, curr
, here
);
6960 tl_assert(toDo
== 0);
6963 /* Whew! So curr is a 32-bit B-value which should give an origin
6964 of some use if any of the inputs to the helper are undefined.
6965 Now we need to re-distribute the results to all destinations. */
6967 /* Outputs: the destination temporary, if there is one. */
6968 if (d
->tmp
!= IRTemp_INVALID
) {
6969 dst
= findShadowTmpB(mce
, d
->tmp
);
6970 assign( 'V', mce
, dst
, curr
);
6973 /* Outputs: guest state that we write or modify. */
6974 for (i
= 0; i
< d
->nFxState
; i
++) {
6975 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
6976 if (d
->fxState
[i
].fx
== Ifx_Read
)
6979 /* Enumerate the described state segments */
6980 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
6981 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
6982 gSz
= d
->fxState
[i
].size
;
6984 /* Ignore any sections marked as 'always defined'. */
6985 if (isAlwaysDefd(mce
, gOff
, gSz
))
6988 /* This state element is written or modified. So we need to
6989 consider it. If larger than 4 bytes, deal with it in
6993 tl_assert(gSz
>= 0);
6994 if (gSz
== 0) break;
6995 n
= gSz
<= 4 ? gSz
: 4;
6996 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6997 b_offset
= MC_(get_otrack_shadow_offset
)(gOff
, 4);
6998 if (b_offset
!= -1) {
7000 /* If the guard expression evaluates to false we simply Put
7001 the value that is already stored in the guest state slot */
7002 IRAtom
*cond
, *iffalse
;
7004 cond
= assignNew('B', mce
, Ity_I1
,
7006 iffalse
= assignNew('B', mce
, Ity_I32
,
7007 IRExpr_Get(b_offset
+
7008 2*mce
->layout
->total_sizeB
,
7010 curr
= assignNew('V', mce
, Ity_I32
,
7011 IRExpr_ITE(cond
, curr
, iffalse
));
7013 stmt( 'B', mce
, IRStmt_Put(b_offset
7014 + 2*mce
->layout
->total_sizeB
,
7023 /* Outputs: memory that we write or modify. Same comments about
7024 endianness as above apply. */
7025 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
) {
7027 /* chew off 32-bit chunks */
7029 gen_store_b( mce
, 4, d
->mAddr
, d
->mSize
- toDo
, curr
,
7033 /* handle possible 16-bit excess */
7035 gen_store_b( mce
, 2, d
->mAddr
, d
->mSize
- toDo
, curr
,
7039 /* chew off the remaining 8-bit chunk, if any */
7041 gen_store_b( mce
, 1, d
->mAddr
, d
->mSize
- toDo
, curr
,
7045 tl_assert(toDo
== 0);
7050 /* Generate IR for origin shadowing for a general guarded store. */
7051 static void do_origins_Store_guarded ( MCEnv
* mce
,
7059 /* assert that the B value for the address is already available
7060 (somewhere), since the call to schemeE will want to see it.
7061 XXXX how does this actually ensure that?? */
7062 tl_assert(isIRAtom(stAddr
));
7063 tl_assert(isIRAtom(stData
));
7064 dszB
= sizeofIRType( typeOfIRExpr(mce
->sb
->tyenv
, stData
) );
7065 dataB
= schemeE( mce
, stData
);
7066 gen_store_b( mce
, dszB
, stAddr
, 0/*offset*/, dataB
, guard
);
7070 /* Generate IR for origin shadowing for a plain store. */
7071 static void do_origins_Store_plain ( MCEnv
* mce
,
7076 do_origins_Store_guarded ( mce
, stEnd
, stAddr
, stData
,
7081 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7083 static void do_origins_StoreG ( MCEnv
* mce
, IRStoreG
* sg
)
7085 do_origins_Store_guarded( mce
, sg
->end
, sg
->addr
,
7086 sg
->data
, sg
->guard
);
7089 static void do_origins_LoadG ( MCEnv
* mce
, IRLoadG
* lg
)
7091 IRType loadedTy
= Ity_INVALID
;
7093 case ILGop_IdentV128
: loadedTy
= Ity_V128
; break;
7094 case ILGop_Ident64
: loadedTy
= Ity_I64
; break;
7095 case ILGop_Ident32
: loadedTy
= Ity_I32
; break;
7096 case ILGop_16Uto32
: loadedTy
= Ity_I16
; break;
7097 case ILGop_16Sto32
: loadedTy
= Ity_I16
; break;
7098 case ILGop_8Uto32
: loadedTy
= Ity_I8
; break;
7099 case ILGop_8Sto32
: loadedTy
= Ity_I8
; break;
7100 default: VG_(tool_panic
)("schemeS.IRLoadG");
7103 = schemeE( mce
,lg
->alt
);
7105 = expr2ori_Load_guarded_General(mce
, loadedTy
,
7106 lg
->addr
, 0/*addr bias*/,
7107 lg
->guard
, ori_alt
);
7108 /* And finally, bind the origin to the destination temporary. */
7109 assign( 'B', mce
, findShadowTmpB(mce
, lg
->dst
), ori_final
);
7113 static void schemeS ( MCEnv
* mce
, IRStmt
* st
)
7115 tl_assert(MC_(clo_mc_level
) == 3);
7120 /* The value-check instrumenter handles this - by arranging
7121 to pass the address of the next instruction to
7122 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
7123 happen for origin tracking w.r.t. AbiHints. So there is
7124 nothing to do here. */
7128 IRPutI
*puti
= st
->Ist
.PutI
.details
;
7129 IRRegArray
* descr_b
;
7130 IRAtom
*t1
, *t2
, *t3
, *t4
;
7131 IRRegArray
* descr
= puti
->descr
;
7133 = MC_(get_otrack_reg_array_equiv_int_type
)(descr
);
7134 /* If this array is unshadowable for whatever reason,
7135 generate no code. */
7136 if (equivIntTy
== Ity_INVALID
)
7138 tl_assert(sizeofIRType(equivIntTy
) >= 4);
7139 tl_assert(sizeofIRType(equivIntTy
) == sizeofIRType(descr
->elemTy
));
7141 = mkIRRegArray( descr
->base
+ 2*mce
->layout
->total_sizeB
,
7142 equivIntTy
, descr
->nElems
);
7143 /* Compute a value to Put - the conjoinment of the origin for
7144 the data to be Put-ted (obviously) and of the index value
7145 (not so obviously). */
7146 t1
= schemeE( mce
, puti
->data
);
7147 t2
= schemeE( mce
, puti
->ix
);
7148 t3
= gen_maxU32( mce
, t1
, t2
);
7149 t4
= zWidenFrom32( mce
, equivIntTy
, t3
);
7150 stmt( 'B', mce
, IRStmt_PutI( mkIRPutI(descr_b
, puti
->ix
,
7156 do_origins_Dirty( mce
, st
->Ist
.Dirty
.details
);
7160 do_origins_Store_plain( mce
, st
->Ist
.Store
.end
,
7162 st
->Ist
.Store
.data
);
7166 do_origins_StoreG( mce
, st
->Ist
.StoreG
.details
);
7170 do_origins_LoadG( mce
, st
->Ist
.LoadG
.details
);
7174 /* In short: treat a load-linked like a normal load followed
7175 by an assignment of the loaded (shadow) data the result
7176 temporary. Treat a store-conditional like a normal store,
7177 and mark the result temporary as defined. */
7178 if (st
->Ist
.LLSC
.storedata
== NULL
) {
7181 = typeOfIRTemp(mce
->sb
->tyenv
, st
->Ist
.LLSC
.result
);
7183 = IRExpr_Load(st
->Ist
.LLSC
.end
, resTy
, st
->Ist
.LLSC
.addr
);
7184 tl_assert(resTy
== Ity_I64
|| resTy
== Ity_I32
7185 || resTy
== Ity_I16
|| resTy
== Ity_I8
);
7186 assign( 'B', mce
, findShadowTmpB(mce
, st
->Ist
.LLSC
.result
),
7187 schemeE(mce
, vanillaLoad
));
7189 /* Store conditional */
7190 do_origins_Store_plain( mce
, st
->Ist
.LLSC
.end
,
7192 st
->Ist
.LLSC
.storedata
);
7193 /* For the rationale behind this, see comments at the
7194 place where the V-shadow for .result is constructed, in
7195 do_shadow_LLSC. In short, we regard .result as
7197 assign( 'B', mce
, findShadowTmpB(mce
, st
->Ist
.LLSC
.result
),
7205 = MC_(get_otrack_shadow_offset
)(
7207 sizeofIRType(typeOfIRExpr(mce
->sb
->tyenv
, st
->Ist
.Put
.data
))
7209 if (b_offset
>= 0) {
7210 /* FIXME: this isn't an atom! */
7211 stmt( 'B', mce
, IRStmt_Put(b_offset
+ 2*mce
->layout
->total_sizeB
,
7212 schemeE( mce
, st
->Ist
.Put
.data
)) );
7218 assign( 'B', mce
, findShadowTmpB(mce
, st
->Ist
.WrTmp
.tmp
),
7219 schemeE(mce
, st
->Ist
.WrTmp
.data
) );
7229 VG_(printf
)("mc_translate.c: schemeS: unhandled: ");
7231 VG_(tool_panic
)("memcheck:schemeS");
7236 /*------------------------------------------------------------*/
7237 /*--- Post-tree-build final tidying ---*/
7238 /*------------------------------------------------------------*/
7240 /* This exploits the observation that Memcheck often produces
7241 repeated conditional calls of the form
7243 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
7245 with the same guard expression G guarding the same helper call.
7246 The second and subsequent calls are redundant. This usually
7247 results from instrumentation of guest code containing multiple
7248 memory references at different constant offsets from the same base
7249 register. After optimisation of the instrumentation, you get a
7250 test for the definedness of the base register for each memory
7251 reference, which is kinda pointless. MC_(final_tidy) therefore
7252 looks for such repeated calls and removes all but the first. */
7255 /* With some testing on perf/bz2.c, on amd64 and x86, compiled with
7256 gcc-5.3.1 -O2, it appears that 16 entries in the array are enough to
7257 get almost all the benefits of this transformation whilst causing
7258 the slide-back case to just often enough to be verifiably
7259 correct. For posterity, the numbers are:
7263 1 4,336 (112,212 -> 1,709,473; ratio 15.2)
7264 2 4,336 (112,194 -> 1,669,895; ratio 14.9)
7265 3 4,336 (112,194 -> 1,660,713; ratio 14.8)
7266 4 4,336 (112,194 -> 1,658,555; ratio 14.8)
7267 5 4,336 (112,194 -> 1,655,447; ratio 14.8)
7268 6 4,336 (112,194 -> 1,655,101; ratio 14.8)
7269 7 4,336 (112,194 -> 1,654,858; ratio 14.7)
7270 8 4,336 (112,194 -> 1,654,810; ratio 14.7)
7271 10 4,336 (112,194 -> 1,654,621; ratio 14.7)
7272 12 4,336 (112,194 -> 1,654,678; ratio 14.7)
7273 16 4,336 (112,194 -> 1,654,494; ratio 14.7)
7274 32 4,336 (112,194 -> 1,654,602; ratio 14.7)
7275 inf 4,336 (112,194 -> 1,654,602; ratio 14.7)
7279 1 4,113 (107,329 -> 1,822,171; ratio 17.0)
7280 2 4,113 (107,329 -> 1,806,443; ratio 16.8)
7281 3 4,113 (107,329 -> 1,803,967; ratio 16.8)
7282 4 4,113 (107,329 -> 1,802,785; ratio 16.8)
7283 5 4,113 (107,329 -> 1,802,412; ratio 16.8)
7284 6 4,113 (107,329 -> 1,802,062; ratio 16.8)
7285 7 4,113 (107,329 -> 1,801,976; ratio 16.8)
7286 8 4,113 (107,329 -> 1,801,886; ratio 16.8)
7287 10 4,113 (107,329 -> 1,801,653; ratio 16.8)
7288 12 4,113 (107,329 -> 1,801,526; ratio 16.8)
7289 16 4,113 (107,329 -> 1,801,298; ratio 16.8)
7290 32 4,113 (107,329 -> 1,800,827; ratio 16.8)
7291 inf 4,113 (107,329 -> 1,800,827; ratio 16.8)
7294 /* Structs for recording which (helper, guard) pairs we have already
7297 #define N_TIDYING_PAIRS 16
7300 struct { void* entry
; IRExpr
* guard
; }
7305 Pair pairs
[N_TIDYING_PAIRS
+1/*for bounds checking*/];
7311 /* Return True if e1 and e2 definitely denote the same value (used to
7312 compare guards). Return False if unknown; False is the safe
7313 answer. Since guest registers and guest memory do not have the
7314 SSA property we must return False if any Gets or Loads appear in
7315 the expression. This implicitly assumes that e1 and e2 have the
7316 same IR type, which is always true here -- the type is Ity_I1. */
7318 static Bool
sameIRValue ( IRExpr
* e1
, IRExpr
* e2
)
7320 if (e1
->tag
!= e2
->tag
)
7324 return eqIRConst( e1
->Iex
.Const
.con
, e2
->Iex
.Const
.con
);
7326 return e1
->Iex
.Binop
.op
== e2
->Iex
.Binop
.op
7327 && sameIRValue(e1
->Iex
.Binop
.arg1
, e2
->Iex
.Binop
.arg1
)
7328 && sameIRValue(e1
->Iex
.Binop
.arg2
, e2
->Iex
.Binop
.arg2
);
7330 return e1
->Iex
.Unop
.op
== e2
->Iex
.Unop
.op
7331 && sameIRValue(e1
->Iex
.Unop
.arg
, e2
->Iex
.Unop
.arg
);
7333 return e1
->Iex
.RdTmp
.tmp
== e2
->Iex
.RdTmp
.tmp
;
7335 return sameIRValue( e1
->Iex
.ITE
.cond
, e2
->Iex
.ITE
.cond
)
7336 && sameIRValue( e1
->Iex
.ITE
.iftrue
, e2
->Iex
.ITE
.iftrue
)
7337 && sameIRValue( e1
->Iex
.ITE
.iffalse
, e2
->Iex
.ITE
.iffalse
);
7341 /* be lazy. Could define equality for these, but they never
7342 appear to be used. */
7347 /* be conservative - these may not give the same value each
7351 /* should never see this */
7354 VG_(printf
)("mc_translate.c: sameIRValue: unhandled: ");
7356 VG_(tool_panic
)("memcheck:sameIRValue");
7361 /* See if 'pairs' already has an entry for (entry, guard). Return
7362 True if so. If not, add an entry. */
7365 Bool
check_or_add ( Pairs
* tidyingEnv
, IRExpr
* guard
, void* entry
)
7367 UInt i
, n
= tidyingEnv
->pairsUsed
;
7368 tl_assert(n
<= N_TIDYING_PAIRS
);
7369 for (i
= 0; i
< n
; i
++) {
7370 if (tidyingEnv
->pairs
[i
].entry
== entry
7371 && sameIRValue(tidyingEnv
->pairs
[i
].guard
, guard
))
7374 /* (guard, entry) wasn't found in the array. Add it at the end.
7375 If the array is already full, slide the entries one slot
7376 backwards. This means we will lose to ability to detect
7377 duplicates from the pair in slot zero, but that happens so
7378 rarely that it's unlikely to have much effect on overall code
7379 quality. Also, this strategy loses the check for the oldest
7380 tracked exit (memory reference, basically) and so that is (I'd
7381 guess) least likely to be re-used after this point. */
7383 if (n
== N_TIDYING_PAIRS
) {
7384 for (i
= 1; i
< N_TIDYING_PAIRS
; i
++) {
7385 tidyingEnv
->pairs
[i
-1] = tidyingEnv
->pairs
[i
];
7387 tidyingEnv
->pairs
[N_TIDYING_PAIRS
-1].entry
= entry
;
7388 tidyingEnv
->pairs
[N_TIDYING_PAIRS
-1].guard
= guard
;
7390 tl_assert(n
< N_TIDYING_PAIRS
);
7391 tidyingEnv
->pairs
[n
].entry
= entry
;
7392 tidyingEnv
->pairs
[n
].guard
= guard
;
7394 tidyingEnv
->pairsUsed
= n
;
7399 static Bool
is_helperc_value_checkN_fail ( const HChar
* name
)
7401 /* This is expensive because it happens a lot. We are checking to
7402 see whether |name| is one of the following 8 strings:
7404 MC_(helperc_value_check8_fail_no_o)
7405 MC_(helperc_value_check4_fail_no_o)
7406 MC_(helperc_value_check0_fail_no_o)
7407 MC_(helperc_value_check1_fail_no_o)
7408 MC_(helperc_value_check8_fail_w_o)
7409 MC_(helperc_value_check0_fail_w_o)
7410 MC_(helperc_value_check1_fail_w_o)
7411 MC_(helperc_value_check4_fail_w_o)
7413 To speed it up, check the common prefix just once, rather than
7416 const HChar
* prefix
= "MC_(helperc_value_check";
7422 if (p
== 0) break; /* ran off the end of the prefix */
7423 /* We still have some prefix to use */
7424 if (n
== 0) return False
; /* have prefix, but name ran out */
7425 if (n
!= p
) return False
; /* have both pfx and name, but no match */
7430 /* Check the part after the prefix. */
7431 tl_assert(*prefix
== 0 && *name
!= 0);
7432 return 0==VG_(strcmp
)(name
, "8_fail_no_o)")
7433 || 0==VG_(strcmp
)(name
, "4_fail_no_o)")
7434 || 0==VG_(strcmp
)(name
, "0_fail_no_o)")
7435 || 0==VG_(strcmp
)(name
, "1_fail_no_o)")
7436 || 0==VG_(strcmp
)(name
, "8_fail_w_o)")
7437 || 0==VG_(strcmp
)(name
, "4_fail_w_o)")
7438 || 0==VG_(strcmp
)(name
, "0_fail_w_o)")
7439 || 0==VG_(strcmp
)(name
, "1_fail_w_o)");
7442 IRSB
* MC_(final_tidy
) ( IRSB
* sb_in
)
7449 Bool alreadyPresent
;
7452 pairs
.pairsUsed
= 0;
7454 pairs
.pairs
[N_TIDYING_PAIRS
].entry
= (void*)0x123;
7455 pairs
.pairs
[N_TIDYING_PAIRS
].guard
= (IRExpr
*)0x456;
7457 /* Scan forwards through the statements. Each time a call to one
7458 of the relevant helpers is seen, check if we have made a
7459 previous call to the same helper using the same guard
7460 expression, and if so, delete the call. */
7461 for (i
= 0; i
< sb_in
->stmts_used
; i
++) {
7462 st
= sb_in
->stmts
[i
];
7464 if (st
->tag
!= Ist_Dirty
)
7466 di
= st
->Ist
.Dirty
.details
;
7469 if (0) { ppIRExpr(guard
); VG_(printf
)("\n"); }
7471 if (!is_helperc_value_checkN_fail( cee
->name
))
7473 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
7474 guard 'guard'. Check if we have already seen a call to this
7475 function with the same guard. If so, delete it. If not,
7476 add it to the set of calls we do know about. */
7477 alreadyPresent
= check_or_add( &pairs
, guard
, cee
->addr
);
7478 if (alreadyPresent
) {
7479 sb_in
->stmts
[i
] = IRStmt_NoOp();
7480 if (0) VG_(printf
)("XX\n");
7484 tl_assert(pairs
.pairs
[N_TIDYING_PAIRS
].entry
== (void*)0x123);
7485 tl_assert(pairs
.pairs
[N_TIDYING_PAIRS
].guard
== (IRExpr
*)0x456);
7490 #undef N_TIDYING_PAIRS
7493 /*------------------------------------------------------------*/
7494 /*--- Startup assertion checking ---*/
7495 /*------------------------------------------------------------*/
7497 void MC_(do_instrumentation_startup_checks
)( void )
7499 /* Make a best-effort check to see that is_helperc_value_checkN_fail
7500 is working as we expect. */
7502 # define CHECK(_expected, _string) \
7503 tl_assert((_expected) == is_helperc_value_checkN_fail(_string))
7505 /* It should identify these 8, and no others, as targets. */
7506 CHECK(True
, "MC_(helperc_value_check8_fail_no_o)");
7507 CHECK(True
, "MC_(helperc_value_check4_fail_no_o)");
7508 CHECK(True
, "MC_(helperc_value_check0_fail_no_o)");
7509 CHECK(True
, "MC_(helperc_value_check1_fail_no_o)");
7510 CHECK(True
, "MC_(helperc_value_check8_fail_w_o)");
7511 CHECK(True
, "MC_(helperc_value_check0_fail_w_o)");
7512 CHECK(True
, "MC_(helperc_value_check1_fail_w_o)");
7513 CHECK(True
, "MC_(helperc_value_check4_fail_w_o)");
7515 /* Ad-hoc selection of other strings gathered via a quick test. */
7516 CHECK(False
, "amd64g_dirtyhelper_CPUID_avx2");
7517 CHECK(False
, "amd64g_dirtyhelper_RDTSC");
7518 CHECK(False
, "MC_(helperc_b_load1)");
7519 CHECK(False
, "MC_(helperc_b_load2)");
7520 CHECK(False
, "MC_(helperc_b_load4)");
7521 CHECK(False
, "MC_(helperc_b_load8)");
7522 CHECK(False
, "MC_(helperc_b_load16)");
7523 CHECK(False
, "MC_(helperc_b_load32)");
7524 CHECK(False
, "MC_(helperc_b_store1)");
7525 CHECK(False
, "MC_(helperc_b_store2)");
7526 CHECK(False
, "MC_(helperc_b_store4)");
7527 CHECK(False
, "MC_(helperc_b_store8)");
7528 CHECK(False
, "MC_(helperc_b_store16)");
7529 CHECK(False
, "MC_(helperc_b_store32)");
7530 CHECK(False
, "MC_(helperc_LOADV8)");
7531 CHECK(False
, "MC_(helperc_LOADV16le)");
7532 CHECK(False
, "MC_(helperc_LOADV32le)");
7533 CHECK(False
, "MC_(helperc_LOADV64le)");
7534 CHECK(False
, "MC_(helperc_LOADV128le)");
7535 CHECK(False
, "MC_(helperc_LOADV256le)");
7536 CHECK(False
, "MC_(helperc_STOREV16le)");
7537 CHECK(False
, "MC_(helperc_STOREV32le)");
7538 CHECK(False
, "MC_(helperc_STOREV64le)");
7539 CHECK(False
, "MC_(helperc_STOREV8)");
7540 CHECK(False
, "track_die_mem_stack_8");
7541 CHECK(False
, "track_new_mem_stack_8_w_ECU");
7542 CHECK(False
, "MC_(helperc_MAKE_STACK_UNINIT_w_o)");
7543 CHECK(False
, "VG_(unknown_SP_update_w_ECU)");
7549 /*------------------------------------------------------------*/
7550 /*--- Memcheck main ---*/
7551 /*------------------------------------------------------------*/
7553 static Bool
isBogusAtom ( IRAtom
* at
)
7555 if (at
->tag
== Iex_RdTmp
)
7557 tl_assert(at
->tag
== Iex_Const
);
7560 IRConst
* con
= at
->Iex
.Const
.con
;
7562 case Ico_U1
: return False
;
7563 case Ico_U8
: n
= (ULong
)con
->Ico
.U8
; break;
7564 case Ico_U16
: n
= (ULong
)con
->Ico
.U16
; break;
7565 case Ico_U32
: n
= (ULong
)con
->Ico
.U32
; break;
7566 case Ico_U64
: n
= (ULong
)con
->Ico
.U64
; break;
7567 case Ico_F32
: return False
;
7568 case Ico_F64
: return False
;
7569 case Ico_F32i
: return False
;
7570 case Ico_F64i
: return False
;
7571 case Ico_V128
: return False
;
7572 case Ico_V256
: return False
;
7573 default: ppIRExpr(at
); tl_assert(0);
7575 /* VG_(printf)("%llx\n", n); */
7577 if (LIKELY(n
<= 0x0000000000001000ULL
)) return False
;
7578 if (LIKELY(n
>= 0xFFFFFFFFFFFFF000ULL
)) return False
;
7579 /* The list of bogus atoms is: */
7580 return (/*32*/ n
== 0xFEFEFEFFULL
7581 /*32*/ || n
== 0x80808080ULL
7582 /*32*/ || n
== 0x7F7F7F7FULL
7583 /*32*/ || n
== 0x7EFEFEFFULL
7584 /*32*/ || n
== 0x81010100ULL
7585 /*64*/ || n
== 0xFFFFFFFFFEFEFEFFULL
7586 /*64*/ || n
== 0xFEFEFEFEFEFEFEFFULL
7587 /*64*/ || n
== 0x0000000000008080ULL
7588 /*64*/ || n
== 0x8080808080808080ULL
7589 /*64*/ || n
== 0x0101010101010101ULL
7594 /* Does 'st' mention any of the literals identified/listed in
7596 static inline Bool
containsBogusLiterals ( /*FLAT*/ IRStmt
* st
)
7604 e
= st
->Ist
.WrTmp
.data
;
7610 return isBogusAtom(e
);
7612 return isBogusAtom(e
->Iex
.Unop
.arg
)
7613 || e
->Iex
.Unop
.op
== Iop_GetMSBs8x16
;
7615 return isBogusAtom(e
->Iex
.GetI
.ix
);
7617 return isBogusAtom(e
->Iex
.Binop
.arg1
)
7618 || isBogusAtom(e
->Iex
.Binop
.arg2
);
7620 return isBogusAtom(e
->Iex
.Triop
.details
->arg1
)
7621 || isBogusAtom(e
->Iex
.Triop
.details
->arg2
)
7622 || isBogusAtom(e
->Iex
.Triop
.details
->arg3
);
7624 return isBogusAtom(e
->Iex
.Qop
.details
->arg1
)
7625 || isBogusAtom(e
->Iex
.Qop
.details
->arg2
)
7626 || isBogusAtom(e
->Iex
.Qop
.details
->arg3
)
7627 || isBogusAtom(e
->Iex
.Qop
.details
->arg4
);
7629 return isBogusAtom(e
->Iex
.ITE
.cond
)
7630 || isBogusAtom(e
->Iex
.ITE
.iftrue
)
7631 || isBogusAtom(e
->Iex
.ITE
.iffalse
);
7633 return isBogusAtom(e
->Iex
.Load
.addr
);
7635 for (i
= 0; e
->Iex
.CCall
.args
[i
]; i
++)
7636 if (isBogusAtom(e
->Iex
.CCall
.args
[i
]))
7643 d
= st
->Ist
.Dirty
.details
;
7644 for (i
= 0; d
->args
[i
]; i
++) {
7645 IRAtom
* atom
= d
->args
[i
];
7646 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(atom
))) {
7647 if (isBogusAtom(atom
))
7651 if (isBogusAtom(d
->guard
))
7653 if (d
->mAddr
&& isBogusAtom(d
->mAddr
))
7657 return isBogusAtom(st
->Ist
.Put
.data
);
7659 return isBogusAtom(st
->Ist
.PutI
.details
->ix
)
7660 || isBogusAtom(st
->Ist
.PutI
.details
->data
);
7662 return isBogusAtom(st
->Ist
.Store
.addr
)
7663 || isBogusAtom(st
->Ist
.Store
.data
);
7665 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
7666 return isBogusAtom(sg
->addr
) || isBogusAtom(sg
->data
)
7667 || isBogusAtom(sg
->guard
);
7670 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
7671 return isBogusAtom(lg
->addr
) || isBogusAtom(lg
->alt
)
7672 || isBogusAtom(lg
->guard
);
7675 return isBogusAtom(st
->Ist
.Exit
.guard
);
7677 return isBogusAtom(st
->Ist
.AbiHint
.base
)
7678 || isBogusAtom(st
->Ist
.AbiHint
.nia
);
7684 cas
= st
->Ist
.CAS
.details
;
7685 return isBogusAtom(cas
->addr
)
7686 || (cas
->expdHi
? isBogusAtom(cas
->expdHi
) : False
)
7687 || isBogusAtom(cas
->expdLo
)
7688 || (cas
->dataHi
? isBogusAtom(cas
->dataHi
) : False
)
7689 || isBogusAtom(cas
->dataLo
);
7691 return isBogusAtom(st
->Ist
.LLSC
.addr
)
7692 || (st
->Ist
.LLSC
.storedata
7693 ? isBogusAtom(st
->Ist
.LLSC
.storedata
)
7698 VG_(tool_panic
)("hasBogusLiterals");
7703 /* This is the pre-instrumentation analysis. It does a backwards pass over
7704 the stmts in |sb_in| to determine a HowUsed value for each tmp defined in
7707 Unrelatedly, it also checks all literals in the block with |isBogusAtom|,
7708 as a positive result from that is a strong indication that we need to
7709 expensively instrument add/sub in the block. We do both analyses in one
7710 pass, even though they are independent, so as to avoid the overhead of
7711 having to traverse the whole block twice.
7713 The usage pass proceeds as follows. Let max= be the max operation in the
7714 HowUsed lattice, hence
7716 X max= Y means X = max(X, Y)
7720 for t in original tmps . useEnv[t] = HuUnU
7722 for t used in the block's . next field
7723 useEnv[t] max= HuPCa // because jmp targets are PCast-tested
7725 for st iterating *backwards* in the block
7729 case "t1 = load(t2)" // case 1
7730 useEnv[t2] max= HuPCa
7732 case "t1 = add(t2, t3)" // case 2
7733 useEnv[t2] max= useEnv[t1]
7734 useEnv[t3] max= useEnv[t1]
7737 for t in st.usedTmps // case 3
7738 useEnv[t] max= HuOth
7739 // same as useEnv[t] = HuOth
7741 The general idea is that we accumulate, in useEnv[], information about
7742 how each tmp is used. That can be updated as we work further back
7743 through the block and find more uses of it, but its HowUsed value can
7744 only ascend the lattice, not descend.
7746 Initially we mark all tmps as unused. In case (1), if a tmp is seen to
7747 be used as a memory address, then its use is at least HuPCa. The point
7748 is that for a memory address we will add instrumentation to check if any
7749 bit of the address is undefined, which means that we won't need expensive
7750 V-bit propagation through an add expression that computed the address --
7751 cheap add instrumentation will be equivalent.
7753 Note in case (1) that if we have previously seen a non-memory-address use
7754 of the tmp, then its use will already be HuOth and will be unchanged by
7755 the max= operation. And if it turns out that the source of the tmp was
7756 an add, then we'll have to expensively instrument the add, because we
7757 can't prove that, for the previous non-memory-address use of the tmp,
7758 cheap and expensive instrumentation will be equivalent.
7760 In case 2, we propagate the usage-mode of the result of an add back
7761 through to its operands. Again, we use max= so as to take account of the
7762 fact that t2 or t3 might later in the block (viz, earlier in the
7763 iteration) have been used in a way that requires expensive add
7766 In case 3, we deal with all other tmp uses. We assume that we'll need a
7767 result that is as accurate as possible, so we max= HuOth into its use
7768 mode. Since HuOth is the top of the lattice, that's equivalent to just
7769 setting its use to HuOth.
7771 The net result of all this is that:
7773 tmps that are used either
7774 - only as a memory address, or
7775 - only as part of a tree of adds that computes a memory address,
7776 and has no other use
7777 are marked as HuPCa, and so we can instrument their generating Add
7778 nodes cheaply, which is the whole point of this analysis
7780 tmps that are used any other way at all are marked as HuOth
7782 tmps that are unused are marked as HuUnU. We don't expect to see any
7783 since we expect that the incoming IR has had all dead assignments
7784 removed by previous optimisation passes. Nevertheless the analysis is
7785 correct even in the presence of dead tmps.
7787 A final comment on dead tmps. In case 1 and case 2, we could actually
7788 conditionalise the updates thusly:
7790 if (useEnv[t1] > HuUnU) { useEnv[t2] max= HuPCa } // case 1
7792 if (useEnv[t1] > HuUnU) { useEnv[t2] max= useEnv[t1] } // case 2
7793 if (useEnv[t1] > HuUnU) { useEnv[t3] max= useEnv[t1] } // case 2
7795 In other words, if the assigned-to tmp |t1| is never used, then there's
7796 no point in propagating any use through to its operands. That won't
7797 change the final HuPCa-vs-HuOth results, which is what we care about.
7798 Given that we expect to get dead-code-free inputs, there's no point in
7799 adding this extra refinement.
7802 /* Helper for |preInstrumentationAnalysis|. */
7803 static inline void noteTmpUsesIn ( /*MOD*/HowUsed
* useEnv
,
7805 HowUsed newUse
, IRAtom
* at
)
7807 /* For the atom |at|, declare that for any tmp |t| in |at|, we will have
7808 seen a use of |newUse|. So, merge that info into |t|'s accumulated
7815 IRTemp t
= at
->Iex
.RdTmp
.tmp
;
7816 tl_assert(t
< tyenvUsed
); // "is an original tmp"
7817 // The "max" operation in the lattice
7818 if (newUse
> useEnv
[t
]) useEnv
[t
] = newUse
;
7822 // We should never get here -- it implies non-flat IR
7824 VG_(tool_panic
)("noteTmpUsesIn");
7831 static void preInstrumentationAnalysis ( /*OUT*/HowUsed
** useEnvP
,
7832 /*OUT*/Bool
* hasBogusLiteralsP
,
7835 const UInt nOrigTmps
= (UInt
)sb_in
->tyenv
->types_used
;
7837 // We've seen no bogus literals so far.
7840 // This is calloc'd, so implicitly all entries are initialised to HuUnU.
7841 HowUsed
* useEnv
= VG_(calloc
)("mc.preInstrumentationAnalysis.1",
7842 nOrigTmps
, sizeof(HowUsed
));
7844 // Firstly, roll in contributions from the final dst address.
7845 bogus
= isBogusAtom(sb_in
->next
);
7846 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, sb_in
->next
);
7848 // Now work backwards through the stmts.
7849 for (Int i
= sb_in
->stmts_used
-1; i
>= 0; i
--) {
7850 IRStmt
* st
= sb_in
->stmts
[i
];
7852 // Deal with literals.
7853 if (LIKELY(!bogus
)) {
7854 bogus
= containsBogusLiterals(st
);
7857 // Deal with tmp uses.
7860 IRTemp dst
= st
->Ist
.WrTmp
.tmp
;
7861 IRExpr
* rhs
= st
->Ist
.WrTmp
.data
;
7862 // This is the one place where we have to consider all possible
7863 // tags for |rhs|, and can't just assume it is a tmp or a const.
7866 // just propagate demand for |dst| into this tmp use.
7867 noteTmpUsesIn(useEnv
, nOrigTmps
, useEnv
[dst
], rhs
);
7870 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, rhs
->Iex
.Unop
.arg
);
7873 if (rhs
->Iex
.Binop
.op
== Iop_Add64
7874 || rhs
->Iex
.Binop
.op
== Iop_Add32
) {
7875 // propagate demand for |dst| through to the operands.
7876 noteTmpUsesIn(useEnv
, nOrigTmps
,
7877 useEnv
[dst
], rhs
->Iex
.Binop
.arg1
);
7878 noteTmpUsesIn(useEnv
, nOrigTmps
,
7879 useEnv
[dst
], rhs
->Iex
.Binop
.arg2
);
7881 // just say that the operands are used in some unknown way.
7882 noteTmpUsesIn(useEnv
, nOrigTmps
,
7883 HuOth
, rhs
->Iex
.Binop
.arg1
);
7884 noteTmpUsesIn(useEnv
, nOrigTmps
,
7885 HuOth
, rhs
->Iex
.Binop
.arg2
);
7889 // All operands are used in some unknown way.
7890 IRTriop
* tri
= rhs
->Iex
.Triop
.details
;
7891 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, tri
->arg1
);
7892 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, tri
->arg2
);
7893 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, tri
->arg3
);
7897 // All operands are used in some unknown way.
7898 IRQop
* qop
= rhs
->Iex
.Qop
.details
;
7899 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg1
);
7900 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg2
);
7901 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg3
);
7902 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg4
);
7906 // The address will be checked (== PCasted).
7907 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, rhs
->Iex
.Load
.addr
);
7910 // The condition is PCasted, the then- and else-values
7912 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, rhs
->Iex
.ITE
.cond
);
7913 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, rhs
->Iex
.ITE
.iftrue
);
7914 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, rhs
->Iex
.ITE
.iffalse
);
7917 // The args are used in unknown ways.
7918 for (IRExpr
** args
= rhs
->Iex
.CCall
.args
; *args
; args
++) {
7919 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, *args
);
7923 // The index will be checked/PCasted (see do_shadow_GETI)
7924 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, rhs
->Iex
.GetI
.ix
);
7932 VG_(tool_panic
)("preInstrumentationAnalysis:"
7933 " unhandled IRExpr");
7938 // The address will be checked (== PCasted). The data will be
7939 // used in some unknown way.
7940 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, st
->Ist
.Store
.addr
);
7941 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.Store
.data
);
7944 // The guard will be checked (== PCasted)
7945 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, st
->Ist
.Exit
.guard
);
7948 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.Put
.data
);
7951 IRPutI
* putI
= st
->Ist
.PutI
.details
;
7952 // The index will be checked/PCasted (see do_shadow_PUTI). The
7953 // data will be used in an unknown way.
7954 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, putI
->ix
);
7955 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, putI
->data
);
7959 IRDirty
* d
= st
->Ist
.Dirty
.details
;
7960 // The guard will be checked (== PCasted)
7961 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, d
->guard
);
7962 // The args will be used in unknown ways.
7963 for (IRExpr
** args
= d
->args
; *args
; args
++) {
7964 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, *args
);
7969 IRCAS
* cas
= st
->Ist
.CAS
.details
;
7970 // Address will be pcasted, everything else used as unknown
7971 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, cas
->addr
);
7972 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->expdLo
);
7973 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->dataLo
);
7975 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->expdHi
);
7977 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->dataHi
);
7981 // Both exprs are used in unknown ways. TODO: can we safely
7982 // just ignore AbiHints?
7983 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.AbiHint
.base
);
7984 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.AbiHint
.nia
);
7987 // We might be able to do better, and use HuPCa for the addr.
7988 // It's not immediately obvious that we can, because the address
7989 // is regarded as "used" only when the guard is true.
7990 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
7991 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, sg
->addr
);
7992 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, sg
->data
);
7993 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, sg
->guard
);
7997 // Per similar comments to Ist_StoreG .. not sure whether this
7998 // is really optimal.
7999 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
8000 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, lg
->addr
);
8001 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, lg
->alt
);
8002 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, lg
->guard
);
8006 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, st
->Ist
.LLSC
.addr
);
8007 if (st
->Ist
.LLSC
.storedata
)
8008 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.LLSC
.storedata
);
8017 VG_(tool_panic
)("preInstrumentationAnalysis: unhandled IRStmt");
8020 } // Now work backwards through the stmts.
8022 // Return the computed use env and the bogus-atom flag.
8023 tl_assert(*useEnvP
== NULL
);
8026 tl_assert(*hasBogusLiteralsP
== False
);
8027 *hasBogusLiteralsP
= bogus
;
8031 IRSB
* MC_(instrument
) ( VgCallbackClosure
* closure
,
8033 const VexGuestLayout
* layout
,
8034 const VexGuestExtents
* vge
,
8035 const VexArchInfo
* archinfo_host
,
8036 IRType gWordTy
, IRType hWordTy
)
8038 Bool verboze
= 0||False
;
8039 Int i
, j
, first_stmt
;
8044 if (gWordTy
!= hWordTy
) {
8045 /* We don't currently support this case. */
8046 VG_(tool_panic
)("host/guest word size mismatch");
8049 /* Check we're not completely nuts */
8050 tl_assert(sizeof(UWord
) == sizeof(void*));
8051 tl_assert(sizeof(Word
) == sizeof(void*));
8052 tl_assert(sizeof(Addr
) == sizeof(void*));
8053 tl_assert(sizeof(ULong
) == 8);
8054 tl_assert(sizeof(Long
) == 8);
8055 tl_assert(sizeof(UInt
) == 4);
8056 tl_assert(sizeof(Int
) == 4);
8058 tl_assert(MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3);
8061 sb_out
= deepCopyIRSBExceptStmts(sb_in
);
8063 /* Set up the running environment. Both .sb and .tmpMap are
8064 modified as we go along. Note that tmps are added to both
8065 .sb->tyenv and .tmpMap together, so the valid index-set for
8066 those two arrays should always be identical. */
8067 VG_(memset
)(&mce
, 0, sizeof(mce
));
8069 mce
.trace
= verboze
;
8070 mce
.layout
= layout
;
8071 mce
.hWordTy
= hWordTy
;
8072 mce
.tmpHowUsed
= NULL
;
8074 /* BEGIN decide on expense levels for instrumentation. */
8076 /* Initially, select the cheap version of everything for which we have an
8078 DetailLevelByOp__set_all( &mce
.dlbo
, DLcheap
);
8080 /* Take account of the --expensive-definedness-checks= flag. */
8081 if (MC_(clo_expensive_definedness_checks
) == EdcNO
) {
8082 /* We just selected 'cheap for everything', so we don't need to do
8083 anything here. mce.tmpHowUsed remains NULL. */
8085 else if (MC_(clo_expensive_definedness_checks
) == EdcYES
) {
8086 /* Select 'expensive for everything'. mce.tmpHowUsed remains NULL. */
8087 DetailLevelByOp__set_all( &mce
.dlbo
, DLexpensive
);
8090 tl_assert(MC_(clo_expensive_definedness_checks
) == EdcAUTO
);
8091 /* We'll make our own selection, based on known per-target constraints
8092 and also on analysis of the block to be instrumented. First, set
8093 up default values for detail levels.
8095 On x86 and amd64, we'll routinely encounter code optimised by LLVM
8096 5 and above. Enable accurate interpretation of the following.
8097 LLVM uses adds for some bitfield inserts, and we get a lot of false
8098 errors if the cheap interpretation is used, alas. Could solve this
8099 much better if we knew which of such adds came from x86/amd64 LEA
8100 instructions, since these are the only ones really needing the
8101 expensive interpretation, but that would require some way to tag
8102 them in the _toIR.c front ends, which is a lot of faffing around.
8103 So for now we use preInstrumentationAnalysis() to detect adds which
8104 are used only to construct memory addresses, which is an
8105 approximation to the above, and is self-contained.*/
8106 # if defined(VGA_x86)
8107 mce
.dlbo
.dl_CmpEQ32_CmpNE32
= DLexpensive
;
8108 # elif defined(VGA_amd64)
8109 mce
.dlbo
.dl_Add64
= DLauto
;
8110 mce
.dlbo
.dl_CmpEQ32_CmpNE32
= DLexpensive
;
8113 /* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then
8115 Bool hasBogusLiterals
= False
;
8116 preInstrumentationAnalysis( &mce
.tmpHowUsed
, &hasBogusLiterals
, sb_in
);
8118 if (hasBogusLiterals
) {
8119 /* This happens very rarely. In this case just select expensive
8120 for everything, and throw away the tmp-use analysis results. */
8121 DetailLevelByOp__set_all( &mce
.dlbo
, DLexpensive
);
8122 VG_(free
)( mce
.tmpHowUsed
);
8123 mce
.tmpHowUsed
= NULL
;
8125 /* Nothing. mce.tmpHowUsed contains tmp-use analysis results,
8126 which will be used for some subset of Iop_{Add,Sub}{32,64},
8127 based on which ones are set to DLauto for this target. */
8131 DetailLevelByOp__check_sanity( &mce
.dlbo
);
8134 // Debug printing: which tmps have been identified as PCast-only use
8135 if (mce
.tmpHowUsed
) {
8136 VG_(printf
)("Cheapies: ");
8137 for (UInt q
= 0; q
< sb_in
->tyenv
->types_used
; q
++) {
8138 if (mce
.tmpHowUsed
[q
] == HuPCa
) {
8139 VG_(printf
)("t%u ", q
);
8145 // Debug printing: number of ops by detail level
8146 UChar nCheap
= DetailLevelByOp__count( &mce
.dlbo
, DLcheap
);
8147 UChar nAuto
= DetailLevelByOp__count( &mce
.dlbo
, DLauto
);
8148 UChar nExpensive
= DetailLevelByOp__count( &mce
.dlbo
, DLexpensive
);
8149 tl_assert(nCheap
+ nAuto
+ nExpensive
== 8);
8151 VG_(printf
)("%u,%u,%u ", nCheap
, nAuto
, nExpensive
);
8153 /* END decide on expense levels for instrumentation. */
8155 /* Initialise the running the tmp environment. */
8157 mce
.tmpMap
= VG_(newXA
)( VG_(malloc
), "mc.MC_(instrument).1", VG_(free
),
8158 sizeof(TempMapEnt
));
8159 VG_(hintSizeXA
) (mce
.tmpMap
, sb_in
->tyenv
->types_used
);
8160 for (i
= 0; i
< sb_in
->tyenv
->types_used
; i
++) {
8163 ent
.shadowV
= IRTemp_INVALID
;
8164 ent
.shadowB
= IRTemp_INVALID
;
8165 VG_(addToXA
)( mce
.tmpMap
, &ent
);
8167 tl_assert( VG_(sizeXA
)( mce
.tmpMap
) == sb_in
->tyenv
->types_used
);
8169 /* Finally, begin instrumentation. */
8170 /* Copy verbatim any IR preamble preceding the first IMark */
8172 tl_assert(mce
.sb
== sb_out
);
8173 tl_assert(mce
.sb
!= sb_in
);
8176 while (i
< sb_in
->stmts_used
&& sb_in
->stmts
[i
]->tag
!= Ist_IMark
) {
8178 st
= sb_in
->stmts
[i
];
8180 tl_assert(isFlatIRStmt(st
));
8182 stmt( 'C', &mce
, sb_in
->stmts
[i
] );
8186 /* Nasty problem. IR optimisation of the pre-instrumented IR may
8187 cause the IR following the preamble to contain references to IR
8188 temporaries defined in the preamble. Because the preamble isn't
8189 instrumented, these temporaries don't have any shadows.
8190 Nevertheless uses of them following the preamble will cause
8191 memcheck to generate references to their shadows. End effect is
8192 to cause IR sanity check failures, due to references to
8193 non-existent shadows. This is only evident for the complex
8194 preambles used for function wrapping on TOC-afflicted platforms
8197 The following loop therefore scans the preamble looking for
8198 assignments to temporaries. For each one found it creates an
8199 assignment to the corresponding (V) shadow temp, marking it as
8200 'defined'. This is the same resulting IR as if the main
8201 instrumentation loop before had been applied to the statement
8204 Similarly, if origin tracking is enabled, we must generate an
8205 assignment for the corresponding origin (B) shadow, claiming
8206 no-origin, as appropriate for a defined value.
8208 for (j
= 0; j
< i
; j
++) {
8209 if (sb_in
->stmts
[j
]->tag
== Ist_WrTmp
) {
8210 /* findShadowTmpV checks its arg is an original tmp;
8211 no need to assert that here. */
8212 IRTemp tmp_o
= sb_in
->stmts
[j
]->Ist
.WrTmp
.tmp
;
8213 IRTemp tmp_v
= findShadowTmpV(&mce
, tmp_o
);
8214 IRType ty_v
= typeOfIRTemp(sb_out
->tyenv
, tmp_v
);
8215 assign( 'V', &mce
, tmp_v
, definedOfType( ty_v
) );
8216 if (MC_(clo_mc_level
) == 3) {
8217 IRTemp tmp_b
= findShadowTmpB(&mce
, tmp_o
);
8218 tl_assert(typeOfIRTemp(sb_out
->tyenv
, tmp_b
) == Ity_I32
);
8219 assign( 'B', &mce
, tmp_b
, mkU32(0)/* UNKNOWN ORIGIN */);
8222 VG_(printf
)("create shadow tmp(s) for preamble tmp [%d] ty ", j
);
8229 /* Iterate over the remaining stmts to generate instrumentation. */
8231 tl_assert(sb_in
->stmts_used
> 0);
8233 tl_assert(i
< sb_in
->stmts_used
);
8234 tl_assert(sb_in
->stmts
[i
]->tag
== Ist_IMark
);
8236 for (/* use current i*/; i
< sb_in
->stmts_used
; i
++) {
8238 st
= sb_in
->stmts
[i
];
8239 first_stmt
= sb_out
->stmts_used
;
8247 if (MC_(clo_mc_level
) == 3) {
8248 /* See comments on case Ist_CAS below. */
8249 if (st
->tag
!= Ist_CAS
)
8250 schemeS( &mce
, st
);
8253 /* Generate instrumentation code for each stmt ... */
8258 IRTemp dst
= st
->Ist
.WrTmp
.tmp
;
8259 tl_assert(dst
< (UInt
)sb_in
->tyenv
->types_used
);
8260 HowUsed hu
= mce
.tmpHowUsed
? mce
.tmpHowUsed
[dst
]
8261 : HuOth
/*we don't know, so play safe*/;
8262 assign( 'V', &mce
, findShadowTmpV(&mce
, st
->Ist
.WrTmp
.tmp
),
8263 expr2vbits( &mce
, st
->Ist
.WrTmp
.data
, hu
));
8268 do_shadow_PUT( &mce
,
8271 NULL
/* shadow atom */, NULL
/* guard */ );
8275 do_shadow_PUTI( &mce
, st
->Ist
.PutI
.details
);
8279 do_shadow_Store( &mce
, st
->Ist
.Store
.end
,
8280 st
->Ist
.Store
.addr
, 0/* addr bias */,
8282 NULL
/* shadow data */,
8287 do_shadow_StoreG( &mce
, st
->Ist
.StoreG
.details
);
8291 do_shadow_LoadG( &mce
, st
->Ist
.LoadG
.details
);
8295 complainIfUndefined( &mce
, st
->Ist
.Exit
.guard
, NULL
);
8306 do_shadow_Dirty( &mce
, st
->Ist
.Dirty
.details
);
8310 do_AbiHint( &mce
, st
->Ist
.AbiHint
.base
,
8311 st
->Ist
.AbiHint
.len
,
8312 st
->Ist
.AbiHint
.nia
);
8316 do_shadow_CAS( &mce
, st
->Ist
.CAS
.details
);
8317 /* Note, do_shadow_CAS copies the CAS itself to the output
8318 block, because it needs to add instrumentation both
8319 before and after it. Hence skip the copy below. Also
8320 skip the origin-tracking stuff (call to schemeS) above,
8321 since that's all tangled up with it too; do_shadow_CAS
8326 do_shadow_LLSC( &mce
,
8328 st
->Ist
.LLSC
.result
,
8330 st
->Ist
.LLSC
.storedata
);
8337 VG_(tool_panic
)("memcheck: unhandled IRStmt");
8339 } /* switch (st->tag) */
8342 for (j
= first_stmt
; j
< sb_out
->stmts_used
; j
++) {
8344 ppIRStmt(sb_out
->stmts
[j
]);
8350 /* ... and finally copy the stmt itself to the output. Except,
8351 skip the copy of IRCASs; see comments on case Ist_CAS
8353 if (st
->tag
!= Ist_CAS
)
8354 stmt('C', &mce
, st
);
8357 /* Now we need to complain if the jump target is undefined. */
8358 first_stmt
= sb_out
->stmts_used
;
8361 VG_(printf
)("sb_in->next = ");
8362 ppIRExpr(sb_in
->next
);
8363 VG_(printf
)("\n\n");
8366 complainIfUndefined( &mce
, sb_in
->next
, NULL
);
8369 for (j
= first_stmt
; j
< sb_out
->stmts_used
; j
++) {
8371 ppIRStmt(sb_out
->stmts
[j
]);
8377 /* If this fails, there's been some serious snafu with tmp management,
8378 that should be investigated. */
8379 tl_assert( VG_(sizeXA
)( mce
.tmpMap
) == mce
.sb
->tyenv
->types_used
);
8380 VG_(deleteXA
)( mce
.tmpMap
);
8382 if (mce
.tmpHowUsed
) {
8383 VG_(free
)( mce
.tmpHowUsed
);
8386 tl_assert(mce
.sb
== sb_out
);
8391 /*--------------------------------------------------------------------*/
8392 /*--- end mc_translate.c ---*/
8393 /*--------------------------------------------------------------------*/