2 /*--------------------------------------------------------------------*/
3 /*--- Instrument IR to perform memory checking operations. ---*/
4 /*--- mc_translate.c ---*/
5 /*--------------------------------------------------------------------*/
8 This file is part of MemCheck, a heavyweight Valgrind tool for
9 detecting memory errors.
11 Copyright (C) 2000-2017 Julian Seward
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, write to the Free Software
26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
29 The GNU General Public License is contained in the file COPYING.
32 #include "pub_tool_basics.h"
33 #include "pub_tool_poolalloc.h" // For mc_include.h
34 #include "pub_tool_hashtable.h" // For mc_include.h
35 #include "pub_tool_libcassert.h"
36 #include "pub_tool_libcprint.h"
37 #include "pub_tool_tooliface.h"
38 #include "pub_tool_machine.h" // VG_(fnptr_to_fnentry)
39 #include "pub_tool_xarray.h"
40 #include "pub_tool_mallocfree.h"
41 #include "pub_tool_libcbase.h"
43 #include "mc_include.h"
46 /* FIXMEs JRS 2011-June-16.
48 Check the interpretation for vector narrowing and widening ops,
49 particularly the saturating ones. I suspect they are either overly
50 pessimistic and/or wrong.
52 Iop_QandSQsh64x2 and friends (vector-by-vector bidirectional
53 saturating shifts): the interpretation is overly pessimistic.
54 See comments on the relevant cases below for details.
56 Iop_Sh64Sx2 and friends (vector-by-vector bidirectional shifts,
57 both rounding and non-rounding variants): ditto
60 /* This file implements the Memcheck instrumentation, and in
61 particular contains the core of its undefined value detection
62 machinery. For a comprehensive background of the terminology,
63 algorithms and rationale used herein, read:
65 Using Valgrind to detect undefined value errors with
68 Julian Seward and Nicholas Nethercote
70 2005 USENIX Annual Technical Conference (General Track),
71 Anaheim, CA, USA, April 10-15, 2005.
75 Here is as good a place as any to record exactly when V bits are and
76 should be checked, why, and what function is responsible.
79 Memcheck complains when an undefined value is used:
81 1. In the condition of a conditional branch. Because it could cause
82 incorrect control flow, and thus cause incorrect externally-visible
83 behaviour. [mc_translate.c:complainIfUndefined]
85 2. As an argument to a system call, or as the value that specifies
86 the system call number. Because it could cause an incorrect
87 externally-visible side effect. [mc_translate.c:mc_pre_reg_read]
89 3. As the address in a load or store. Because it could cause an
90 incorrect value to be used later, which could cause externally-visible
91 behaviour (eg. via incorrect control flow or an incorrect system call
92 argument) [complainIfUndefined]
94 4. As the target address of a branch. Because it could cause incorrect
95 control flow. [complainIfUndefined]
97 5. As an argument to setenv, unsetenv, or putenv. Because it could put
98 an incorrect value into the external environment.
99 [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
101 6. As the index in a GETI or PUTI operation. I'm not sure why... (njn).
102 [complainIfUndefined]
104 7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
105 VALGRIND_CHECK_VALUE_IS_DEFINED client requests. Because the user
106 requested it. [in memcheck.h]
109 Memcheck also complains, but should not, when an undefined value is used:
111 8. As the shift value in certain SIMD shift operations (but not in the
112 standard integer shift operations). This inconsistency is due to
113 historical reasons.) [complainIfUndefined]
116 Memcheck does not complain, but should, when an undefined value is used:
118 9. As an input to a client request. Because the client request may
119 affect the visible behaviour -- see bug #144362 for an example
120 involving the malloc replacements in vg_replace_malloc.c and
121 VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
122 isn't identified. That bug report also has some info on how to solve
123 the problem. [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
126 In practice, 1 and 2 account for the vast majority of cases.
129 /* Generation of addr-definedness, addr-validity and
130 guard-definedness checks pertaining to loads and stores (Iex_Load,
131 Ist_Store, IRLoadG, IRStoreG, LLSC, CAS and Dirty memory
132 loads/stores) was re-checked 11 May 2013. */
135 /*------------------------------------------------------------*/
136 /*--- Forward decls ---*/
137 /*------------------------------------------------------------*/
141 // See below for comments explaining what this is for.
143 enum __attribute__((packed
)) { HuUnU
=0, HuPCa
=1, HuOth
=2 }
146 static IRType
shadowTypeV ( IRType ty
);
147 static IRExpr
* expr2vbits ( struct _MCEnv
* mce
, IRExpr
* e
,
148 HowUsed hu
/*use HuOth if unknown*/ );
149 static IRTemp
findShadowTmpB ( struct _MCEnv
* mce
, IRTemp orig
);
151 static IRExpr
*i128_const_zero(void);
154 /*------------------------------------------------------------*/
155 /*--- Memcheck running state, and tmp management. ---*/
156 /*------------------------------------------------------------*/
158 /* For a few (maybe 1%) IROps, we have both a cheaper, less exact vbit
159 propagation scheme, and a more expensive, more precise vbit propagation
160 scheme. This enum describes, for such an IROp, which scheme to use. */
163 // Use the cheaper, less-exact variant.
165 // Choose between cheap and expensive based on analysis of the block
166 // to be instrumented. Note that the choice may be done on a
167 // per-instance basis of the IROp that this DetailLevel describes.
169 // Use the more expensive, more-exact variant.
175 /* A readonly part of the running state. For IROps that have both a
176 less-exact and more-exact interpretation, records which interpretation is
180 // For Add32/64 and Sub32/64, all 3 settings are allowed. For the
181 // DLauto case, a per-instance decision is to be made by inspecting
182 // the associated tmp's entry in MCEnv.tmpHowUsed.
183 DetailLevel dl_Add32
;
184 DetailLevel dl_Add64
;
185 DetailLevel dl_Sub32
;
186 DetailLevel dl_Sub64
;
187 // For Cmp{EQ,NE}{64,32,16,8}, only DLcheap and DLexpensive are
189 DetailLevel dl_CmpEQ64_CmpNE64
;
190 DetailLevel dl_CmpEQ32_CmpNE32
;
191 DetailLevel dl_CmpEQ16_CmpNE16
;
192 DetailLevel dl_CmpEQ8_CmpNE8
;
196 static void DetailLevelByOp__set_all ( /*OUT*/DetailLevelByOp
* dlbo
,
203 dlbo
->dl_CmpEQ64_CmpNE64
= dl
;
204 dlbo
->dl_CmpEQ32_CmpNE32
= dl
;
205 dlbo
->dl_CmpEQ16_CmpNE16
= dl
;
206 dlbo
->dl_CmpEQ8_CmpNE8
= dl
;
209 static void DetailLevelByOp__check_sanity ( const DetailLevelByOp
* dlbo
)
211 tl_assert(dlbo
->dl_Add32
>= DLcheap
&& dlbo
->dl_Add32
<= DLexpensive
);
212 tl_assert(dlbo
->dl_Add64
>= DLcheap
&& dlbo
->dl_Add64
<= DLexpensive
);
213 tl_assert(dlbo
->dl_Sub32
>= DLcheap
&& dlbo
->dl_Sub32
<= DLexpensive
);
214 tl_assert(dlbo
->dl_Sub64
>= DLcheap
&& dlbo
->dl_Sub64
<= DLexpensive
);
215 tl_assert(dlbo
->dl_CmpEQ64_CmpNE64
== DLcheap
216 || dlbo
->dl_CmpEQ64_CmpNE64
== DLexpensive
);
217 tl_assert(dlbo
->dl_CmpEQ32_CmpNE32
== DLcheap
218 || dlbo
->dl_CmpEQ32_CmpNE32
== DLexpensive
);
219 tl_assert(dlbo
->dl_CmpEQ16_CmpNE16
== DLcheap
220 || dlbo
->dl_CmpEQ16_CmpNE16
== DLexpensive
);
221 tl_assert(dlbo
->dl_CmpEQ8_CmpNE8
== DLcheap
222 || dlbo
->dl_CmpEQ8_CmpNE8
== DLexpensive
);
225 static UInt
DetailLevelByOp__count ( const DetailLevelByOp
* dlbo
,
229 n
+= (dlbo
->dl_Add32
== dl
? 1 : 0);
230 n
+= (dlbo
->dl_Add64
== dl
? 1 : 0);
231 n
+= (dlbo
->dl_Sub32
== dl
? 1 : 0);
232 n
+= (dlbo
->dl_Sub64
== dl
? 1 : 0);
233 n
+= (dlbo
->dl_CmpEQ64_CmpNE64
== dl
? 1 : 0);
234 n
+= (dlbo
->dl_CmpEQ32_CmpNE32
== dl
? 1 : 0);
235 n
+= (dlbo
->dl_CmpEQ16_CmpNE16
== dl
? 1 : 0);
236 n
+= (dlbo
->dl_CmpEQ8_CmpNE8
== dl
? 1 : 0);
241 /* Carries info about a particular tmp. The tmp's number is not
242 recorded, as this is implied by (equal to) its index in the tmpMap
243 in MCEnv. The tmp's type is also not recorded, as this is present
246 When .kind is Orig, .shadowV and .shadowB may give the identities
247 of the temps currently holding the associated definedness (shadowV)
248 and origin (shadowB) values, or these may be IRTemp_INVALID if code
249 to compute such values has not yet been emitted.
251 When .kind is VSh or BSh then the tmp is holds a V- or B- value,
252 and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
253 illogical for a shadow tmp itself to be shadowed.
256 enum { Orig
=1, VSh
=2, BSh
=3 }
268 /* A |HowUsed| value carries analysis results about how values are used,
269 pertaining to whether we need to instrument integer adds expensively or
270 not. The running state carries a (readonly) mapping from original tmp to
271 a HowUsed value for it. A usage value can be one of three values,
272 forming a 3-point chain lattice.
274 HuOth ("Other") used in some arbitrary way
276 HuPCa ("PCast") used *only* in effectively a PCast, in which all
277 | we care about is the all-defined vs not-all-defined distinction
279 HuUnU ("Unused") not used at all.
281 The "safe" (don't-know) end of the lattice is "HuOth". See comments
282 below in |preInstrumentationAnalysis| for further details.
286 enum __attribute__((packed)) { HuUnU=0, HuPCa=1, HuOth=2 }
290 // Not actually necessary, but we don't want to waste D1 space.
291 STATIC_ASSERT(sizeof(HowUsed
) == 1);
294 /* Carries around state during memcheck instrumentation. */
297 /* MODIFIED: the superblock being constructed. IRStmts are
302 /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
303 current kind and possibly shadow temps for each temp in the
304 IRSB being constructed. Note that it does not contain the
305 type of each tmp. If you want to know the type, look at the
306 relevant entry in sb->tyenv. It follows that at all times
307 during the instrumentation process, the valid indices for
308 tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
309 total number of Orig, V- and B- temps allocated so far.
311 The reason for this strange split (types in one place, all
312 other info in another) is that we need the types to be
313 attached to sb so as to make it possible to do
314 "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
315 instrumentation process. */
316 XArray
* /* of TempMapEnt */ tmpMap
;
318 /* READONLY: contains details of which ops should be expensively
320 DetailLevelByOp dlbo
;
322 /* READONLY: for each original tmp, how the tmp is used. This is
323 computed by |preInstrumentationAnalysis|. Valid indices are
324 0 .. #temps_in_sb-1 (same as for tmpMap). */
327 /* READONLY: the guest layout. This indicates which parts of
328 the guest state should be regarded as 'always defined'. */
329 const VexGuestLayout
* layout
;
331 /* READONLY: the host word type. Needed for constructing
332 arguments of type 'HWord' to be passed to helper functions.
333 Ity_I32 or Ity_I64 only. */
339 /* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on
340 demand), as they are encountered. This is for two reasons.
342 (1) (less important reason): Many original tmps are unused due to
343 initial IR optimisation, and we do not want to spaces in tables
346 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a
347 table indexed [0 .. n_types-1], which gives the current shadow for
348 each original tmp, or INVALID_IRTEMP if none is so far assigned.
349 It is necessary to support making multiple assignments to a shadow
350 -- specifically, after testing a shadow for definedness, it needs
351 to be made defined. But IR's SSA property disallows this.
353 (2) (more important reason): Therefore, when a shadow needs to get
354 a new value, a new temporary is created, the value is assigned to
355 that, and the tmpMap is updated to reflect the new binding.
357 A corollary is that if the tmpMap maps a given tmp to
358 IRTemp_INVALID and we are hoping to read that shadow tmp, it means
359 there's a read-before-write error in the original tmps. The IR
360 sanity checker should catch all such anomalies, however.
363 /* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
364 both the table in mce->sb and to our auxiliary mapping. Note that
365 newTemp may cause mce->tmpMap to resize, hence previous results
366 from VG_(indexXA)(mce->tmpMap) are invalidated. */
367 static IRTemp
newTemp ( MCEnv
* mce
, IRType ty
, TempKind kind
)
371 IRTemp tmp
= newIRTemp(mce
->sb
->tyenv
, ty
);
373 ent
.shadowV
= IRTemp_INVALID
;
374 ent
.shadowB
= IRTemp_INVALID
;
375 newIx
= VG_(addToXA
)( mce
->tmpMap
, &ent
);
376 tl_assert(newIx
== (Word
)tmp
);
381 /* Find the tmp currently shadowing the given original tmp. If none
382 so far exists, allocate one. */
383 static IRTemp
findShadowTmpV ( MCEnv
* mce
, IRTemp orig
)
386 /* VG_(indexXA) range-checks 'orig', hence no need to check
388 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
389 tl_assert(ent
->kind
== Orig
);
390 if (ent
->shadowV
== IRTemp_INVALID
) {
392 = newTemp( mce
, shadowTypeV(mce
->sb
->tyenv
->types
[orig
]), VSh
);
393 /* newTemp may cause mce->tmpMap to resize, hence previous results
394 from VG_(indexXA) are invalid. */
395 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
396 tl_assert(ent
->kind
== Orig
);
397 tl_assert(ent
->shadowV
== IRTemp_INVALID
);
403 /* Allocate a new shadow for the given original tmp. This means any
404 previous shadow is abandoned. This is needed because it is
405 necessary to give a new value to a shadow once it has been tested
406 for undefinedness, but unfortunately IR's SSA property disallows
407 this. Instead we must abandon the old shadow, allocate a new one
408 and use that instead.
410 This is the same as findShadowTmpV, except we don't bother to see
411 if a shadow temp already existed -- we simply allocate a new one
413 static void newShadowTmpV ( MCEnv
* mce
, IRTemp orig
)
416 /* VG_(indexXA) range-checks 'orig', hence no need to check
418 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
419 tl_assert(ent
->kind
== Orig
);
422 = newTemp( mce
, shadowTypeV(mce
->sb
->tyenv
->types
[orig
]), VSh
);
423 /* newTemp may cause mce->tmpMap to resize, hence previous results
424 from VG_(indexXA) are invalid. */
425 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
426 tl_assert(ent
->kind
== Orig
);
432 /*------------------------------------------------------------*/
433 /*--- IRAtoms -- a subset of IRExprs ---*/
434 /*------------------------------------------------------------*/
436 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
437 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat
438 input, most of this code deals in atoms. Usefully, a value atom
439 always has a V-value which is also an atom: constants are shadowed
440 by constants, and temps are shadowed by the corresponding shadow
443 typedef IRExpr IRAtom
;
445 /* (used for sanity checks only): is this an atom which looks
446 like it's from original code? */
447 static Bool
isOriginalAtom ( MCEnv
* mce
, IRAtom
* a1
)
449 if (a1
->tag
== Iex_Const
)
451 if (a1
->tag
== Iex_RdTmp
) {
452 TempMapEnt
* ent
= VG_(indexXA
)( mce
->tmpMap
, a1
->Iex
.RdTmp
.tmp
);
453 return ent
->kind
== Orig
;
458 /* (used for sanity checks only): is this an atom which looks
459 like it's from shadow code? */
460 static Bool
isShadowAtom ( MCEnv
* mce
, IRAtom
* a1
)
462 if (a1
->tag
== Iex_Const
)
464 if (a1
->tag
== Iex_RdTmp
) {
465 TempMapEnt
* ent
= VG_(indexXA
)( mce
->tmpMap
, a1
->Iex
.RdTmp
.tmp
);
466 return ent
->kind
== VSh
|| ent
->kind
== BSh
;
471 /* (used for sanity checks only): check that both args are atoms and
472 are identically-kinded. */
473 static Bool
sameKindedAtoms ( IRAtom
* a1
, IRAtom
* a2
)
475 if (a1
->tag
== Iex_RdTmp
&& a2
->tag
== Iex_RdTmp
)
477 if (a1
->tag
== Iex_Const
&& a2
->tag
== Iex_Const
)
483 /*------------------------------------------------------------*/
484 /*--- Type management ---*/
485 /*------------------------------------------------------------*/
487 /* Shadow state is always accessed using integer types. This returns
488 an integer type with the same size (as per sizeofIRType) as the
489 given type. The only valid shadow types are Bit, I8, I16, I32,
490 I64, I128, V128, V256. */
492 static IRType
shadowTypeV ( IRType ty
)
500 case Ity_I128
: return ty
;
501 case Ity_F16
: return Ity_I16
;
502 case Ity_F32
: return Ity_I32
;
503 case Ity_D32
: return Ity_I32
;
504 case Ity_F64
: return Ity_I64
;
505 case Ity_D64
: return Ity_I64
;
506 case Ity_F128
: return Ity_I128
;
507 case Ity_D128
: return Ity_I128
;
508 case Ity_V128
: return Ity_V128
;
509 case Ity_V256
: return Ity_V256
;
510 default: ppIRType(ty
);
511 VG_(tool_panic
)("memcheck:shadowTypeV");
515 /* Produce a 'defined' value of the given shadow type. Should only be
516 supplied shadow types (Bit/I8/I16/I32/UI64). */
517 static IRExpr
* definedOfType ( IRType ty
) {
519 case Ity_I1
: return IRExpr_Const(IRConst_U1(False
));
520 case Ity_I8
: return IRExpr_Const(IRConst_U8(0));
521 case Ity_I16
: return IRExpr_Const(IRConst_U16(0));
522 case Ity_I32
: return IRExpr_Const(IRConst_U32(0));
523 case Ity_I64
: return IRExpr_Const(IRConst_U64(0));
524 case Ity_I128
: return i128_const_zero();
525 case Ity_V128
: return IRExpr_Const(IRConst_V128(0x0000));
526 case Ity_V256
: return IRExpr_Const(IRConst_V256(0x00000000));
527 default: VG_(tool_panic
)("memcheck:definedOfType");
532 /*------------------------------------------------------------*/
533 /*--- Constructing IR fragments ---*/
534 /*------------------------------------------------------------*/
536 /* add stmt to a bb */
537 static inline void stmt ( HChar cat
, MCEnv
* mce
, IRStmt
* st
) {
539 VG_(printf
)(" %c: ", cat
);
543 addStmtToIRSB(mce
->sb
, st
);
546 /* assign value to tmp */
548 void assign ( HChar cat
, MCEnv
* mce
, IRTemp tmp
, IRExpr
* expr
) {
549 stmt(cat
, mce
, IRStmt_WrTmp(tmp
,expr
));
552 /* build various kinds of expressions */
553 #define triop(_op, _arg1, _arg2, _arg3) \
554 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
555 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
556 #define unop(_op, _arg) IRExpr_Unop((_op),(_arg))
557 #define mkU1(_n) IRExpr_Const(IRConst_U1(_n))
558 #define mkU8(_n) IRExpr_Const(IRConst_U8(_n))
559 #define mkU16(_n) IRExpr_Const(IRConst_U16(_n))
560 #define mkU32(_n) IRExpr_Const(IRConst_U32(_n))
561 #define mkU64(_n) IRExpr_Const(IRConst_U64(_n))
562 #define mkV128(_n) IRExpr_Const(IRConst_V128(_n))
563 #define mkexpr(_tmp) IRExpr_RdTmp((_tmp))
565 /* Bind the given expression to a new temporary, and return the
566 temporary. This effectively converts an arbitrary expression into
569 'ty' is the type of 'e' and hence the type that the new temporary
570 needs to be. But passing it in is redundant, since we can deduce
571 the type merely by inspecting 'e'. So at least use that fact to
572 assert that the two types agree. */
573 static IRAtom
* assignNew ( HChar cat
, MCEnv
* mce
, IRType ty
, IRExpr
* e
)
577 IRType tyE
= typeOfIRExpr(mce
->sb
->tyenv
, e
);
579 tl_assert(tyE
== ty
); /* so 'ty' is redundant (!) */
581 case 'V': k
= VSh
; break;
582 case 'B': k
= BSh
; break;
583 case 'C': k
= Orig
; break;
584 /* happens when we are making up new "orig"
585 expressions, for IRCAS handling */
586 default: tl_assert(0);
588 t
= newTemp(mce
, ty
, k
);
589 assign(cat
, mce
, t
, e
);
594 /*------------------------------------------------------------*/
595 /*--- Helper functions for 128-bit ops ---*/
596 /*------------------------------------------------------------*/
598 static IRExpr
*i128_const_zero(void)
600 IRAtom
* z64
= IRExpr_Const(IRConst_U64(0));
601 return binop(Iop_64HLto128
, z64
, z64
);
604 /* There are no I128-bit loads and/or stores [as generated by any
605 current front ends]. So we do not need to worry about that in
609 /*------------------------------------------------------------*/
610 /*--- Constructing definedness primitive ops ---*/
611 /*------------------------------------------------------------*/
613 /* --------- Defined-if-either-defined --------- */
615 static IRAtom
* mkDifD8 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
616 tl_assert(isShadowAtom(mce
,a1
));
617 tl_assert(isShadowAtom(mce
,a2
));
618 return assignNew('V', mce
, Ity_I8
, binop(Iop_And8
, a1
, a2
));
621 static IRAtom
* mkDifD16 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
622 tl_assert(isShadowAtom(mce
,a1
));
623 tl_assert(isShadowAtom(mce
,a2
));
624 return assignNew('V', mce
, Ity_I16
, binop(Iop_And16
, a1
, a2
));
627 static IRAtom
* mkDifD32 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
628 tl_assert(isShadowAtom(mce
,a1
));
629 tl_assert(isShadowAtom(mce
,a2
));
630 return assignNew('V', mce
, Ity_I32
, binop(Iop_And32
, a1
, a2
));
633 static IRAtom
* mkDifD64 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
634 tl_assert(isShadowAtom(mce
,a1
));
635 tl_assert(isShadowAtom(mce
,a2
));
636 return assignNew('V', mce
, Ity_I64
, binop(Iop_And64
, a1
, a2
));
639 static IRAtom
* mkDifDV128 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
640 tl_assert(isShadowAtom(mce
,a1
));
641 tl_assert(isShadowAtom(mce
,a2
));
642 return assignNew('V', mce
, Ity_V128
, binop(Iop_AndV128
, a1
, a2
));
645 static IRAtom
* mkDifDV256 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
646 tl_assert(isShadowAtom(mce
,a1
));
647 tl_assert(isShadowAtom(mce
,a2
));
648 return assignNew('V', mce
, Ity_V256
, binop(Iop_AndV256
, a1
, a2
));
651 /* --------- Undefined-if-either-undefined --------- */
653 static IRAtom
* mkUifU8 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
654 tl_assert(isShadowAtom(mce
,a1
));
655 tl_assert(isShadowAtom(mce
,a2
));
656 return assignNew('V', mce
, Ity_I8
, binop(Iop_Or8
, a1
, a2
));
659 static IRAtom
* mkUifU16 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
660 tl_assert(isShadowAtom(mce
,a1
));
661 tl_assert(isShadowAtom(mce
,a2
));
662 return assignNew('V', mce
, Ity_I16
, binop(Iop_Or16
, a1
, a2
));
665 static IRAtom
* mkUifU32 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
666 tl_assert(isShadowAtom(mce
,a1
));
667 tl_assert(isShadowAtom(mce
,a2
));
668 return assignNew('V', mce
, Ity_I32
, binop(Iop_Or32
, a1
, a2
));
671 static IRAtom
* mkUifU64 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
672 tl_assert(isShadowAtom(mce
,a1
));
673 tl_assert(isShadowAtom(mce
,a2
));
674 return assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, a1
, a2
));
677 static IRAtom
* mkUifU128 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
678 IRAtom
*tmp1
, *tmp2
, *tmp3
, *tmp4
, *tmp5
, *tmp6
;
679 tl_assert(isShadowAtom(mce
,a1
));
680 tl_assert(isShadowAtom(mce
,a2
));
681 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, a1
));
682 tmp2
= assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, a1
));
683 tmp3
= assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, a2
));
684 tmp4
= assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, a2
));
685 tmp5
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp1
, tmp3
));
686 tmp6
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp2
, tmp4
));
688 return assignNew('V', mce
, Ity_I128
, binop(Iop_64HLto128
, tmp6
, tmp5
));
691 static IRAtom
* mkUifUV128 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
692 tl_assert(isShadowAtom(mce
,a1
));
693 tl_assert(isShadowAtom(mce
,a2
));
694 return assignNew('V', mce
, Ity_V128
, binop(Iop_OrV128
, a1
, a2
));
697 static IRAtom
* mkUifUV256 ( MCEnv
* mce
, IRAtom
* a1
, IRAtom
* a2
) {
698 tl_assert(isShadowAtom(mce
,a1
));
699 tl_assert(isShadowAtom(mce
,a2
));
700 return assignNew('V', mce
, Ity_V256
, binop(Iop_OrV256
, a1
, a2
));
703 static IRAtom
* mkUifU ( MCEnv
* mce
, IRType vty
, IRAtom
* a1
, IRAtom
* a2
) {
705 case Ity_I8
: return mkUifU8(mce
, a1
, a2
);
706 case Ity_I16
: return mkUifU16(mce
, a1
, a2
);
707 case Ity_I32
: return mkUifU32(mce
, a1
, a2
);
708 case Ity_I64
: return mkUifU64(mce
, a1
, a2
);
709 case Ity_I128
: return mkUifU128(mce
, a1
, a2
);
710 case Ity_V128
: return mkUifUV128(mce
, a1
, a2
);
711 case Ity_V256
: return mkUifUV256(mce
, a1
, a2
);
713 VG_(printf
)("\n"); ppIRType(vty
); VG_(printf
)("\n");
714 VG_(tool_panic
)("memcheck:mkUifU");
718 /* --------- The Left-family of operations. --------- */
720 static IRAtom
* mkLeft8 ( MCEnv
* mce
, IRAtom
* a1
) {
721 tl_assert(isShadowAtom(mce
,a1
));
722 return assignNew('V', mce
, Ity_I8
, unop(Iop_Left8
, a1
));
725 static IRAtom
* mkLeft16 ( MCEnv
* mce
, IRAtom
* a1
) {
726 tl_assert(isShadowAtom(mce
,a1
));
727 return assignNew('V', mce
, Ity_I16
, unop(Iop_Left16
, a1
));
730 static IRAtom
* mkLeft32 ( MCEnv
* mce
, IRAtom
* a1
) {
731 tl_assert(isShadowAtom(mce
,a1
));
732 return assignNew('V', mce
, Ity_I32
, unop(Iop_Left32
, a1
));
735 static IRAtom
* mkLeft64 ( MCEnv
* mce
, IRAtom
* a1
) {
736 tl_assert(isShadowAtom(mce
,a1
));
737 return assignNew('V', mce
, Ity_I64
, unop(Iop_Left64
, a1
));
740 /* --------- 'Improvement' functions for AND/OR. --------- */
742 /* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give
743 defined (0); all other -> undefined (1).
745 static IRAtom
* mkImproveAND8 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
747 tl_assert(isOriginalAtom(mce
, data
));
748 tl_assert(isShadowAtom(mce
, vbits
));
749 tl_assert(sameKindedAtoms(data
, vbits
));
750 return assignNew('V', mce
, Ity_I8
, binop(Iop_Or8
, data
, vbits
));
753 static IRAtom
* mkImproveAND16 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
755 tl_assert(isOriginalAtom(mce
, data
));
756 tl_assert(isShadowAtom(mce
, vbits
));
757 tl_assert(sameKindedAtoms(data
, vbits
));
758 return assignNew('V', mce
, Ity_I16
, binop(Iop_Or16
, data
, vbits
));
761 static IRAtom
* mkImproveAND32 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
763 tl_assert(isOriginalAtom(mce
, data
));
764 tl_assert(isShadowAtom(mce
, vbits
));
765 tl_assert(sameKindedAtoms(data
, vbits
));
766 return assignNew('V', mce
, Ity_I32
, binop(Iop_Or32
, data
, vbits
));
769 static IRAtom
* mkImproveAND64 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
771 tl_assert(isOriginalAtom(mce
, data
));
772 tl_assert(isShadowAtom(mce
, vbits
));
773 tl_assert(sameKindedAtoms(data
, vbits
));
774 return assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, data
, vbits
));
777 static IRAtom
* mkImproveANDV128 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
779 tl_assert(isOriginalAtom(mce
, data
));
780 tl_assert(isShadowAtom(mce
, vbits
));
781 tl_assert(sameKindedAtoms(data
, vbits
));
782 return assignNew('V', mce
, Ity_V128
, binop(Iop_OrV128
, data
, vbits
));
785 static IRAtom
* mkImproveANDV256 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
787 tl_assert(isOriginalAtom(mce
, data
));
788 tl_assert(isShadowAtom(mce
, vbits
));
789 tl_assert(sameKindedAtoms(data
, vbits
));
790 return assignNew('V', mce
, Ity_V256
, binop(Iop_OrV256
, data
, vbits
));
793 /* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give
794 defined (0); all other -> undefined (1).
796 static IRAtom
* mkImproveOR8 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
798 tl_assert(isOriginalAtom(mce
, data
));
799 tl_assert(isShadowAtom(mce
, vbits
));
800 tl_assert(sameKindedAtoms(data
, vbits
));
804 assignNew('V', mce
, Ity_I8
, unop(Iop_Not8
, data
)),
808 static IRAtom
* mkImproveOR16 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
810 tl_assert(isOriginalAtom(mce
, data
));
811 tl_assert(isShadowAtom(mce
, vbits
));
812 tl_assert(sameKindedAtoms(data
, vbits
));
816 assignNew('V', mce
, Ity_I16
, unop(Iop_Not16
, data
)),
820 static IRAtom
* mkImproveOR32 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
822 tl_assert(isOriginalAtom(mce
, data
));
823 tl_assert(isShadowAtom(mce
, vbits
));
824 tl_assert(sameKindedAtoms(data
, vbits
));
828 assignNew('V', mce
, Ity_I32
, unop(Iop_Not32
, data
)),
832 static IRAtom
* mkImproveOR64 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
834 tl_assert(isOriginalAtom(mce
, data
));
835 tl_assert(isShadowAtom(mce
, vbits
));
836 tl_assert(sameKindedAtoms(data
, vbits
));
840 assignNew('V', mce
, Ity_I64
, unop(Iop_Not64
, data
)),
844 static IRAtom
* mkImproveORV128 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
846 tl_assert(isOriginalAtom(mce
, data
));
847 tl_assert(isShadowAtom(mce
, vbits
));
848 tl_assert(sameKindedAtoms(data
, vbits
));
852 assignNew('V', mce
, Ity_V128
, unop(Iop_NotV128
, data
)),
856 static IRAtom
* mkImproveORV256 ( MCEnv
* mce
, IRAtom
* data
, IRAtom
* vbits
)
858 tl_assert(isOriginalAtom(mce
, data
));
859 tl_assert(isShadowAtom(mce
, vbits
));
860 tl_assert(sameKindedAtoms(data
, vbits
));
864 assignNew('V', mce
, Ity_V256
, unop(Iop_NotV256
, data
)),
868 /* --------- Pessimising casts. --------- */
870 /* The function returns an expression of type DST_TY. If any of the VBITS
871 is undefined (value == 1) the resulting expression has all bits set to
872 1. Otherwise, all bits are 0. */
874 static IRAtom
* mkPCastTo( MCEnv
* mce
, IRType dst_ty
, IRAtom
* vbits
)
879 /* Note, dst_ty is a shadow type, not an original type. */
880 tl_assert(isShadowAtom(mce
,vbits
));
881 src_ty
= typeOfIRExpr(mce
->sb
->tyenv
, vbits
);
883 /* Fast-track some common cases */
884 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I32
)
885 return assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
887 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I64
)
888 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpwNEZ64
, vbits
));
890 if (src_ty
== Ity_I32
&& dst_ty
== Ity_I64
) {
891 /* PCast the arg, then clone it. */
892 IRAtom
* tmp
= assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
893 return assignNew('V', mce
, Ity_I64
, binop(Iop_32HLto64
, tmp
, tmp
));
896 if (src_ty
== Ity_I32
&& dst_ty
== Ity_V128
) {
897 /* PCast the arg, then clone it 4 times. */
898 IRAtom
* tmp
= assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
899 tmp
= assignNew('V', mce
, Ity_I64
, binop(Iop_32HLto64
, tmp
, tmp
));
900 return assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, tmp
, tmp
));
903 if (src_ty
== Ity_I32
&& dst_ty
== Ity_V256
) {
904 /* PCast the arg, then clone it 8 times. */
905 IRAtom
* tmp
= assignNew('V', mce
, Ity_I32
, unop(Iop_CmpwNEZ32
, vbits
));
906 tmp
= assignNew('V', mce
, Ity_I64
, binop(Iop_32HLto64
, tmp
, tmp
));
907 tmp
= assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, tmp
, tmp
));
908 return assignNew('V', mce
, Ity_V256
, binop(Iop_V128HLtoV256
, tmp
, tmp
));
911 if (src_ty
== Ity_I64
&& dst_ty
== Ity_I32
) {
912 /* PCast the arg. This gives all 0s or all 1s. Then throw away
914 IRAtom
* tmp
= assignNew('V', mce
, Ity_I64
, unop(Iop_CmpwNEZ64
, vbits
));
915 return assignNew('V', mce
, Ity_I32
, unop(Iop_64to32
, tmp
));
918 if (src_ty
== Ity_V128
&& dst_ty
== Ity_I64
) {
919 /* Use InterleaveHI64x2 to copy the top half of the vector into
920 the bottom half. Then we can UifU it with the original, throw
921 away the upper half of the result, and PCast-I64-to-I64
923 // Generates vbits[127:64] : vbits[127:64]
925 = assignNew('V', mce
, Ity_V128
,
926 binop(Iop_InterleaveHI64x2
, vbits
, vbits
));
928 // UifU(vbits[127:64],vbits[127:64]) : UifU(vbits[127:64],vbits[63:0])
929 // == vbits[127:64] : UifU(vbits[127:64],vbits[63:0])
931 = mkUifUV128(mce
, hi64hi64
, vbits
);
932 // Generates UifU(vbits[127:64],vbits[63:0])
934 = assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, lohi64
));
936 // PCast-to-I64( UifU(vbits[127:64], vbits[63:0] )
937 // == PCast-to-I64( vbits[127:0] )
939 = assignNew('V', mce
, Ity_I64
, unop(Iop_CmpwNEZ64
, lo64
));
943 /* Else do it the slow way .. */
944 /* First of all, collapse vbits down to a single bit. */
951 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ8
, vbits
));
954 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ16
, vbits
));
957 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ32
, vbits
));
960 tmp1
= assignNew('V', mce
, Ity_I1
, unop(Iop_CmpNEZ64
, vbits
));
963 /* Gah. Chop it in half, OR the halves together, and compare
965 IRAtom
* tmp2
= assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, vbits
));
966 IRAtom
* tmp3
= assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, vbits
));
967 IRAtom
* tmp4
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp2
, tmp3
));
968 tmp1
= assignNew('V', mce
, Ity_I1
,
969 unop(Iop_CmpNEZ64
, tmp4
));
973 /* Chop it in half, OR the halves together, and compare that
976 IRAtom
* tmp2
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128HIto64
, vbits
));
977 IRAtom
* tmp3
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, vbits
));
978 IRAtom
* tmp4
= assignNew('V', mce
, Ity_I64
, binop(Iop_Or64
, tmp2
, tmp3
));
979 tmp1
= assignNew('V', mce
, Ity_I1
,
980 unop(Iop_CmpNEZ64
, tmp4
));
985 VG_(tool_panic
)("mkPCastTo(1)");
988 /* Now widen up to the dst type. */
993 return assignNew('V', mce
, Ity_I8
, unop(Iop_1Sto8
, tmp1
));
995 return assignNew('V', mce
, Ity_I16
, unop(Iop_1Sto16
, tmp1
));
997 return assignNew('V', mce
, Ity_I32
, unop(Iop_1Sto32
, tmp1
));
999 return assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1001 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1002 tmp1
= assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, tmp1
, tmp1
));
1005 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1006 tmp1
= assignNew('V', mce
, Ity_I128
, binop(Iop_64HLto128
, tmp1
, tmp1
));
1009 tmp1
= assignNew('V', mce
, Ity_I64
, unop(Iop_1Sto64
, tmp1
));
1010 tmp1
= assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
,
1012 tmp1
= assignNew('V', mce
, Ity_V256
, binop(Iop_V128HLtoV256
,
1017 VG_(tool_panic
)("mkPCastTo(2)");
1021 /* This is a minor variant. It takes an arg of some type and returns
1022 a value of the same type. The result consists entirely of Defined
1023 (zero) bits except its least significant bit, which is a PCast of
1024 the entire argument down to a single bit. */
1025 static IRAtom
* mkPCastXXtoXXlsb ( MCEnv
* mce
, IRAtom
* varg
, IRType ty
)
1027 if (ty
== Ity_V128
) {
1028 /* --- Case for V128 --- */
1029 IRAtom
* varg128
= varg
;
1030 // generates: PCast-to-I64(varg128)
1031 IRAtom
* pcdTo64
= mkPCastTo(mce
, Ity_I64
, varg128
);
1032 // Now introduce zeros (defined bits) in the top 63 places
1033 // generates: Def--(63)--Def PCast-to-I1(varg128)
1035 = assignNew('V', mce
, Ity_I64
, binop(Iop_And64
, pcdTo64
, mkU64(1)));
1036 // generates: Def--(64)--Def
1038 = definedOfType(Ity_I64
);
1039 // generates: Def--(127)--Def PCast-to-I1(varg128)
1041 = assignNew('V', mce
, Ity_V128
, binop(Iop_64HLtoV128
, d64
, d63pc
));
1044 if (ty
== Ity_I64
) {
1045 /* --- Case for I64 --- */
1047 IRAtom
* pcd
= mkPCastTo(mce
, Ity_I64
, varg
);
1048 // Zero (Def) out the top 63 bits
1050 = assignNew('V', mce
, Ity_I64
, binop(Iop_And64
, pcd
, mkU64(1)));
1057 /* --------- Optimistic casts. --------- */
1059 /* The function takes and returns an expression of type TY. If any of the
1060 VBITS indicate defined (value == 0) the resulting expression has all bits
1061 set to 0. Otherwise, all bits are 1. In words, if any bits are defined
1062 then all bits are made to be defined.
1064 In short we compute (vbits - (vbits >>u 1)) >>s (bitsize(vbits)-1).
1066 static IRAtom
* mkOCastAt( MCEnv
* mce
, IRType ty
, IRAtom
* vbits
)
1068 IROp opSUB
, opSHR
, opSAR
;
1073 opSUB
= Iop_Sub64
; opSHR
= Iop_Shr64
; opSAR
= Iop_Sar64
; sh
= 63;
1076 opSUB
= Iop_Sub32
; opSHR
= Iop_Shr32
; opSAR
= Iop_Sar32
; sh
= 31;
1079 opSUB
= Iop_Sub16
; opSHR
= Iop_Shr16
; opSAR
= Iop_Sar16
; sh
= 15;
1082 opSUB
= Iop_Sub8
; opSHR
= Iop_Shr8
; opSAR
= Iop_Sar8
; sh
= 7;
1086 VG_(tool_panic
)("mkOCastTo");
1090 shr1
= assignNew('V', mce
,ty
, binop(opSHR
, vbits
, mkU8(1)));
1091 at
= assignNew('V', mce
,ty
, binop(opSUB
, vbits
, shr1
));
1092 at
= assignNew('V', mce
,ty
, binop(opSAR
, at
, mkU8(sh
)));
1097 /* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
1099 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
1100 PCasting to Ity_U1. However, sometimes it is necessary to be more
1101 accurate. The insight is that the result is defined if two
1102 corresponding bits can be found, one from each argument, so that
1103 both bits are defined but are different -- that makes EQ say "No"
1104 and NE say "Yes". Hence, we compute an improvement term and DifD
1105 it onto the "normal" (UifU) result.
1120 vec contains 0 (defined) bits where the corresponding arg bits
1121 are defined but different, and 1 bits otherwise.
1123 vec = Or<sz>( vxx, // 0 iff bit defined
1124 vyy, // 0 iff bit defined
1125 Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
1128 If any bit of vec is 0, the result is defined and so the
1129 improvement term should produce 0...0, else it should produce
1132 Hence require for the improvement term:
1134 OCast(vec) = if vec == 1...1 then 1...1 else 0...0
1136 which you can think of as an "optimistic cast" (OCast, the opposite of
1137 the normal "pessimistic cast" (PCast) family. An OCast says all bits
1138 are defined if any bit is defined.
1140 It is possible to show that
1142 if vec == 1...1 then 1...1 else 0...0
1144 can be implemented in straight-line code as
1146 (vec - (vec >>u 1)) >>s (word-size-in-bits - 1)
1148 We note that vec contains the sub-term Or<sz>(vxx, vyy). Since UifU is
1149 implemented with Or (since 1 signifies undefinedness), this is a
1150 duplicate of the UifU<sz>(vxx, vyy) term and so we can CSE it out, giving
1153 let naive = UifU<sz>(vxx, vyy)
1154 vec = Or<sz>(naive, Not<sz>(Xor<sz)(xx, yy))
1156 PCastTo<1>( DifD<sz>(naive, OCast<sz>(vec)) )
1158 This was extensively re-analysed and checked on 6 July 05 and again
1161 static IRAtom
* expensiveCmpEQorNE ( MCEnv
* mce
,
1163 IRAtom
* vxx
, IRAtom
* vyy
,
1164 IRAtom
* xx
, IRAtom
* yy
)
1166 IRAtom
*naive
, *vec
, *improved
, *final_cast
;
1167 IROp opDIFD
, opUIFU
, opOR
, opXOR
, opNOT
;
1169 tl_assert(isShadowAtom(mce
,vxx
));
1170 tl_assert(isShadowAtom(mce
,vyy
));
1171 tl_assert(isOriginalAtom(mce
,xx
));
1172 tl_assert(isOriginalAtom(mce
,yy
));
1173 tl_assert(sameKindedAtoms(vxx
,xx
));
1174 tl_assert(sameKindedAtoms(vyy
,yy
));
1206 VG_(tool_panic
)("expensiveCmpEQorNE");
1210 = assignNew('V', mce
, ty
, binop(opUIFU
, vxx
, vyy
));
1220 assignNew('V', mce
,ty
, binop(opXOR
, xx
, yy
))))));
1223 = assignNew( 'V', mce
,ty
,
1224 binop(opDIFD
, naive
, mkOCastAt(mce
, ty
, vec
)));
1227 = mkPCastTo( mce
, Ity_I1
, improved
);
1233 /* --------- Semi-accurate interpretation of CmpORD. --------- */
1235 /* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
1237 CmpORD32S(x,y) = 1<<3 if x <s y
1241 and similarly the unsigned variant. The default interpretation is:
1243 CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
1246 The "& (7<<1)" reflects the fact that all result bits except 3,2,1
1247 are zero and therefore defined (viz, zero).
1249 Also deal with a special case better:
1253 Here, bit 3 (LT) of the result is a copy of the top bit of x and
1254 will be defined even if the rest of x isn't. In which case we do:
1256 CmpORD32S#(x,x#,0,{impliedly 0}#)
1257 = PCast(x#) & (3<<1) -- standard interp for GT#,EQ#
1258 | (x# >>u 31) << 3 -- LT# = x#[31]
1260 Analogous handling for CmpORD64{S,U}.
1262 static Bool
isZeroU32 ( IRAtom
* e
)
1265 toBool( e
->tag
== Iex_Const
1266 && e
->Iex
.Const
.con
->tag
== Ico_U32
1267 && e
->Iex
.Const
.con
->Ico
.U32
== 0 );
1270 static Bool
isZeroU64 ( IRAtom
* e
)
1273 toBool( e
->tag
== Iex_Const
1274 && e
->Iex
.Const
.con
->tag
== Ico_U64
1275 && e
->Iex
.Const
.con
->Ico
.U64
== 0 );
1278 static IRAtom
* doCmpORD ( MCEnv
* mce
,
1280 IRAtom
* xxhash
, IRAtom
* yyhash
,
1281 IRAtom
* xx
, IRAtom
* yy
)
1283 Bool m64
= cmp_op
== Iop_CmpORD64S
|| cmp_op
== Iop_CmpORD64U
;
1284 Bool syned
= cmp_op
== Iop_CmpORD64S
|| cmp_op
== Iop_CmpORD32S
;
1285 IROp opOR
= m64
? Iop_Or64
: Iop_Or32
;
1286 IROp opAND
= m64
? Iop_And64
: Iop_And32
;
1287 IROp opSHL
= m64
? Iop_Shl64
: Iop_Shl32
;
1288 IROp opSHR
= m64
? Iop_Shr64
: Iop_Shr32
;
1289 IRType ty
= m64
? Ity_I64
: Ity_I32
;
1290 Int width
= m64
? 64 : 32;
1292 Bool (*isZero
)(IRAtom
*) = m64
? isZeroU64
: isZeroU32
;
1294 IRAtom
* threeLeft1
= NULL
;
1295 IRAtom
* sevenLeft1
= NULL
;
1297 tl_assert(isShadowAtom(mce
,xxhash
));
1298 tl_assert(isShadowAtom(mce
,yyhash
));
1299 tl_assert(isOriginalAtom(mce
,xx
));
1300 tl_assert(isOriginalAtom(mce
,yy
));
1301 tl_assert(sameKindedAtoms(xxhash
,xx
));
1302 tl_assert(sameKindedAtoms(yyhash
,yy
));
1303 tl_assert(cmp_op
== Iop_CmpORD32S
|| cmp_op
== Iop_CmpORD32U
1304 || cmp_op
== Iop_CmpORD64S
|| cmp_op
== Iop_CmpORD64U
);
1307 ppIROp(cmp_op
); VG_(printf
)(" ");
1308 ppIRExpr(xx
); VG_(printf
)(" "); ppIRExpr( yy
); VG_(printf
)("\n");
1311 if (syned
&& isZero(yy
)) {
1312 /* fancy interpretation */
1313 /* if yy is zero, then it must be fully defined (zero#). */
1314 tl_assert(isZero(yyhash
));
1315 threeLeft1
= m64
? mkU64(3<<1) : mkU32(3<<1);
1323 mkPCastTo(mce
,ty
, xxhash
),
1332 binop(opSHR
, xxhash
, mkU8(width
-1))),
1337 /* standard interpretation */
1338 sevenLeft1
= m64
? mkU64(7<<1) : mkU32(7<<1);
1343 mkUifU(mce
,ty
, xxhash
,yyhash
)),
1350 /*------------------------------------------------------------*/
1351 /*--- Emit a test and complaint if something is undefined. ---*/
1352 /*------------------------------------------------------------*/
1354 static IRAtom
* schemeE ( MCEnv
* mce
, IRExpr
* e
); /* fwds */
1357 /* Set the annotations on a dirty helper to indicate that the stack
1358 pointer and instruction pointers might be read. This is the
1359 behaviour of all 'emit-a-complaint' style functions we might
1362 static void setHelperAnns ( MCEnv
* mce
, IRDirty
* di
) {
1364 di
->fxState
[0].fx
= Ifx_Read
;
1365 di
->fxState
[0].offset
= mce
->layout
->offset_SP
;
1366 di
->fxState
[0].size
= mce
->layout
->sizeof_SP
;
1367 di
->fxState
[0].nRepeats
= 0;
1368 di
->fxState
[0].repeatLen
= 0;
1369 di
->fxState
[1].fx
= Ifx_Read
;
1370 di
->fxState
[1].offset
= mce
->layout
->offset_IP
;
1371 di
->fxState
[1].size
= mce
->layout
->sizeof_IP
;
1372 di
->fxState
[1].nRepeats
= 0;
1373 di
->fxState
[1].repeatLen
= 0;
1377 /* Check the supplied *original* |atom| for undefinedness, and emit a
1378 complaint if so. Once that happens, mark it as defined. This is
1379 possible because the atom is either a tmp or literal. If it's a
1380 tmp, it will be shadowed by a tmp, and so we can set the shadow to
1381 be defined. In fact as mentioned above, we will have to allocate a
1382 new tmp to carry the new 'defined' shadow value, and update the
1383 original->tmp mapping accordingly; we cannot simply assign a new
1384 value to an existing shadow tmp as this breaks SSAness.
1386 The checks are performed, any resulting complaint emitted, and
1387 |atom|'s shadow temp set to 'defined', ONLY in the case that
1388 |guard| evaluates to True at run-time. If it evaluates to False
1389 then no action is performed. If |guard| is NULL (the usual case)
1390 then it is assumed to be always-true, and hence these actions are
1391 performed unconditionally.
1393 This routine does not generate code to check the definedness of
1394 |guard|. The caller is assumed to have taken care of that already.
1396 static void complainIfUndefined ( MCEnv
* mce
, IRAtom
* atom
, IRExpr
*guard
)
1409 // Don't do V bit tests if we're not reporting undefined value errors.
1410 if (MC_(clo_mc_level
) == 1)
1414 tl_assert(isOriginalAtom(mce
, guard
));
1416 /* Since the original expression is atomic, there's no duplicated
1417 work generated by making multiple V-expressions for it. So we
1418 don't really care about the possibility that someone else may
1419 also create a V-interpretion for it. */
1420 tl_assert(isOriginalAtom(mce
, atom
));
1421 vatom
= expr2vbits( mce
, atom
, HuOth
);
1422 tl_assert(isShadowAtom(mce
, vatom
));
1423 tl_assert(sameKindedAtoms(atom
, vatom
));
1425 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vatom
);
1427 /* sz is only used for constructing the error message */
1428 sz
= ty
==Ity_I1
? 0 : sizeofIRType(ty
);
1430 cond
= mkPCastTo( mce
, Ity_I1
, vatom
);
1431 /* cond will be 0 if all defined, and 1 if any not defined. */
1433 /* Get the origin info for the value we are about to check. At
1434 least, if we are doing origin tracking. If not, use a dummy
1436 if (MC_(clo_mc_level
) == 3) {
1437 origin
= schemeE( mce
, atom
);
1438 if (mce
->hWordTy
== Ity_I64
) {
1439 origin
= assignNew( 'B', mce
, Ity_I64
, unop(Iop_32Uto64
, origin
) );
1453 fn
= &MC_(helperc_value_check0_fail_w_o
);
1454 nm
= "MC_(helperc_value_check0_fail_w_o)";
1455 args
= mkIRExprVec_1(origin
);
1458 fn
= &MC_(helperc_value_check0_fail_no_o
);
1459 nm
= "MC_(helperc_value_check0_fail_no_o)";
1460 args
= mkIRExprVec_0();
1466 fn
= &MC_(helperc_value_check1_fail_w_o
);
1467 nm
= "MC_(helperc_value_check1_fail_w_o)";
1468 args
= mkIRExprVec_1(origin
);
1471 fn
= &MC_(helperc_value_check1_fail_no_o
);
1472 nm
= "MC_(helperc_value_check1_fail_no_o)";
1473 args
= mkIRExprVec_0();
1479 fn
= &MC_(helperc_value_check4_fail_w_o
);
1480 nm
= "MC_(helperc_value_check4_fail_w_o)";
1481 args
= mkIRExprVec_1(origin
);
1484 fn
= &MC_(helperc_value_check4_fail_no_o
);
1485 nm
= "MC_(helperc_value_check4_fail_no_o)";
1486 args
= mkIRExprVec_0();
1492 fn
= &MC_(helperc_value_check8_fail_w_o
);
1493 nm
= "MC_(helperc_value_check8_fail_w_o)";
1494 args
= mkIRExprVec_1(origin
);
1497 fn
= &MC_(helperc_value_check8_fail_no_o
);
1498 nm
= "MC_(helperc_value_check8_fail_no_o)";
1499 args
= mkIRExprVec_0();
1506 fn
= &MC_(helperc_value_checkN_fail_w_o
);
1507 nm
= "MC_(helperc_value_checkN_fail_w_o)";
1508 args
= mkIRExprVec_2( mkIRExpr_HWord( sz
), origin
);
1511 fn
= &MC_(helperc_value_checkN_fail_no_o
);
1512 nm
= "MC_(helperc_value_checkN_fail_no_o)";
1513 args
= mkIRExprVec_1( mkIRExpr_HWord( sz
) );
1518 VG_(tool_panic
)("unexpected szB");
1524 tl_assert(nargs
>= 0 && nargs
<= 2);
1525 tl_assert( (MC_(clo_mc_level
) == 3 && origin
!= NULL
)
1526 || (MC_(clo_mc_level
) == 2 && origin
== NULL
) );
1528 di
= unsafeIRDirty_0_N( nargs
/*regparms*/, nm
,
1529 VG_(fnptr_to_fnentry
)( fn
), args
);
1530 di
->guard
= cond
; // and cond is PCast-to-1(atom#)
1532 /* If the complaint is to be issued under a guard condition, AND
1533 that into the guard condition for the helper call. */
1535 IRAtom
*g1
= assignNew('V', mce
, Ity_I32
, unop(Iop_1Uto32
, di
->guard
));
1536 IRAtom
*g2
= assignNew('V', mce
, Ity_I32
, unop(Iop_1Uto32
, guard
));
1537 IRAtom
*e
= assignNew('V', mce
, Ity_I32
, binop(Iop_And32
, g1
, g2
));
1538 di
->guard
= assignNew('V', mce
, Ity_I1
, unop(Iop_32to1
, e
));
1541 setHelperAnns( mce
, di
);
1542 stmt( 'V', mce
, IRStmt_Dirty(di
));
1544 /* If |atom| is shadowed by an IRTemp, set the shadow tmp to be
1545 defined -- but only in the case where the guard evaluates to
1546 True at run-time. Do the update by setting the orig->shadow
1547 mapping for tmp to reflect the fact that this shadow is getting
1549 tl_assert(isIRAtom(vatom
));
1550 /* sameKindedAtoms ... */
1551 if (vatom
->tag
== Iex_RdTmp
) {
1552 tl_assert(atom
->tag
== Iex_RdTmp
);
1553 if (guard
== NULL
) {
1554 // guard is 'always True', hence update unconditionally
1555 newShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
);
1556 assign('V', mce
, findShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
),
1559 // update the temp only conditionally. Do this by copying
1560 // its old value when the guard is False.
1562 IRTemp old_tmpV
= findShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
);
1563 newShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
);
1565 = assignNew('V', mce
, shadowTypeV(ty
),
1566 IRExpr_ITE(guard
, definedOfType(ty
),
1568 assign('V', mce
, findShadowTmpV(mce
, atom
->Iex
.RdTmp
.tmp
), new_tmpV
);
1574 /*------------------------------------------------------------*/
1575 /*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/
1576 /*------------------------------------------------------------*/
1578 /* Examine the always-defined sections declared in layout to see if
1579 the (offset,size) section is within one. Note, is is an error to
1580 partially fall into such a region: (offset,size) should either be
1581 completely in such a region or completely not-in such a region.
1583 static Bool
isAlwaysDefd ( MCEnv
* mce
, Int offset
, Int size
)
1585 Int minoffD
, maxoffD
, i
;
1586 Int minoff
= offset
;
1587 Int maxoff
= minoff
+ size
- 1;
1588 tl_assert((minoff
& ~0xFFFF) == 0);
1589 tl_assert((maxoff
& ~0xFFFF) == 0);
1591 for (i
= 0; i
< mce
->layout
->n_alwaysDefd
; i
++) {
1592 minoffD
= mce
->layout
->alwaysDefd
[i
].offset
;
1593 maxoffD
= minoffD
+ mce
->layout
->alwaysDefd
[i
].size
- 1;
1594 tl_assert((minoffD
& ~0xFFFF) == 0);
1595 tl_assert((maxoffD
& ~0xFFFF) == 0);
1597 if (maxoff
< minoffD
|| maxoffD
< minoff
)
1598 continue; /* no overlap */
1599 if (minoff
>= minoffD
&& maxoff
<= maxoffD
)
1600 return True
; /* completely contained in an always-defd section */
1602 VG_(tool_panic
)("memcheck:isAlwaysDefd:partial overlap");
1604 return False
; /* could not find any containing section */
1608 /* Generate into bb suitable actions to shadow this Put. If the state
1609 slice is marked 'always defined', do nothing. Otherwise, write the
1610 supplied V bits to the shadow state. We can pass in either an
1611 original atom or a V-atom, but not both. In the former case the
1612 relevant V-bits are then generated from the original.
1613 We assume here, that the definedness of GUARD has already been checked.
1616 void do_shadow_PUT ( MCEnv
* mce
, Int offset
,
1617 IRAtom
* atom
, IRAtom
* vatom
, IRExpr
*guard
)
1621 // Don't do shadow PUTs if we're not doing undefined value checking.
1622 // Their absence lets Vex's optimiser remove all the shadow computation
1623 // that they depend on, which includes GETs of the shadow registers.
1624 if (MC_(clo_mc_level
) == 1)
1629 tl_assert(isOriginalAtom(mce
, atom
));
1630 vatom
= expr2vbits( mce
, atom
, HuOth
);
1633 tl_assert(isShadowAtom(mce
, vatom
));
1636 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vatom
);
1637 tl_assert(ty
!= Ity_I1
);
1638 if (isAlwaysDefd(mce
, offset
, sizeofIRType(ty
))) {
1640 /* emit code to emit a complaint if any of the vbits are 1. */
1641 /* complainIfUndefined(mce, atom); */
1643 /* Do a plain shadow Put. */
1645 /* If the guard expression evaluates to false we simply Put the value
1646 that is already stored in the guest state slot */
1647 IRAtom
*cond
, *iffalse
;
1649 cond
= assignNew('V', mce
, Ity_I1
, guard
);
1650 iffalse
= assignNew('V', mce
, ty
,
1651 IRExpr_Get(offset
+ mce
->layout
->total_sizeB
, ty
));
1652 vatom
= assignNew('V', mce
, ty
, IRExpr_ITE(cond
, vatom
, iffalse
));
1654 stmt( 'V', mce
, IRStmt_Put( offset
+ mce
->layout
->total_sizeB
, vatom
));
1659 /* Return an expression which contains the V bits corresponding to the
1660 given GETI (passed in in pieces).
1663 void do_shadow_PUTI ( MCEnv
* mce
, IRPutI
*puti
)
1668 IRRegArray
* descr
= puti
->descr
;
1669 IRAtom
* ix
= puti
->ix
;
1670 Int bias
= puti
->bias
;
1671 IRAtom
* atom
= puti
->data
;
1673 // Don't do shadow PUTIs if we're not doing undefined value checking.
1674 // Their absence lets Vex's optimiser remove all the shadow computation
1675 // that they depend on, which includes GETIs of the shadow registers.
1676 if (MC_(clo_mc_level
) == 1)
1679 tl_assert(isOriginalAtom(mce
,atom
));
1680 vatom
= expr2vbits( mce
, atom
, HuOth
);
1681 tl_assert(sameKindedAtoms(atom
, vatom
));
1683 tyS
= shadowTypeV(ty
);
1684 arrSize
= descr
->nElems
* sizeofIRType(ty
);
1685 tl_assert(ty
!= Ity_I1
);
1686 tl_assert(isOriginalAtom(mce
,ix
));
1687 complainIfUndefined(mce
, ix
, NULL
);
1688 if (isAlwaysDefd(mce
, descr
->base
, arrSize
)) {
1690 /* emit code to emit a complaint if any of the vbits are 1. */
1691 /* complainIfUndefined(mce, atom); */
1693 /* Do a cloned version of the Put that refers to the shadow
1695 IRRegArray
* new_descr
1696 = mkIRRegArray( descr
->base
+ mce
->layout
->total_sizeB
,
1697 tyS
, descr
->nElems
);
1698 stmt( 'V', mce
, IRStmt_PutI( mkIRPutI(new_descr
, ix
, bias
, vatom
) ));
1703 /* Return an expression which contains the V bits corresponding to the
1704 given GET (passed in in pieces).
1707 IRExpr
* shadow_GET ( MCEnv
* mce
, Int offset
, IRType ty
)
1709 IRType tyS
= shadowTypeV(ty
);
1710 tl_assert(ty
!= Ity_I1
);
1711 tl_assert(ty
!= Ity_I128
);
1712 if (isAlwaysDefd(mce
, offset
, sizeofIRType(ty
))) {
1713 /* Always defined, return all zeroes of the relevant type */
1714 return definedOfType(tyS
);
1716 /* return a cloned version of the Get that refers to the shadow
1718 /* FIXME: this isn't an atom! */
1719 return IRExpr_Get( offset
+ mce
->layout
->total_sizeB
, tyS
);
1724 /* Return an expression which contains the V bits corresponding to the
1725 given GETI (passed in in pieces).
1728 IRExpr
* shadow_GETI ( MCEnv
* mce
,
1729 IRRegArray
* descr
, IRAtom
* ix
, Int bias
)
1731 IRType ty
= descr
->elemTy
;
1732 IRType tyS
= shadowTypeV(ty
);
1733 Int arrSize
= descr
->nElems
* sizeofIRType(ty
);
1734 tl_assert(ty
!= Ity_I1
);
1735 tl_assert(isOriginalAtom(mce
,ix
));
1736 complainIfUndefined(mce
, ix
, NULL
);
1737 if (isAlwaysDefd(mce
, descr
->base
, arrSize
)) {
1738 /* Always defined, return all zeroes of the relevant type */
1739 return definedOfType(tyS
);
1741 /* return a cloned version of the Get that refers to the shadow
1743 IRRegArray
* new_descr
1744 = mkIRRegArray( descr
->base
+ mce
->layout
->total_sizeB
,
1745 tyS
, descr
->nElems
);
1746 return IRExpr_GetI( new_descr
, ix
, bias
);
1751 /*------------------------------------------------------------*/
1752 /*--- Generating approximations for unknown operations, ---*/
1753 /*--- using lazy-propagate semantics ---*/
1754 /*------------------------------------------------------------*/
1756 /* Lazy propagation of undefinedness from two values, resulting in the
1757 specified shadow type.
1760 IRAtom
* mkLazy2 ( MCEnv
* mce
, IRType finalVty
, IRAtom
* va1
, IRAtom
* va2
)
1763 IRType t1
= typeOfIRExpr(mce
->sb
->tyenv
, va1
);
1764 IRType t2
= typeOfIRExpr(mce
->sb
->tyenv
, va2
);
1765 tl_assert(isShadowAtom(mce
,va1
));
1766 tl_assert(isShadowAtom(mce
,va2
));
1768 /* The general case is inefficient because PCast is an expensive
1769 operation. Here are some special cases which use PCast only
1770 once rather than twice. */
1772 /* I64 x I64 -> I64 */
1773 if (t1
== Ity_I64
&& t2
== Ity_I64
&& finalVty
== Ity_I64
) {
1774 if (0) VG_(printf
)("mkLazy2: I64 x I64 -> I64\n");
1775 at
= mkUifU(mce
, Ity_I64
, va1
, va2
);
1776 at
= mkPCastTo(mce
, Ity_I64
, at
);
1780 /* I64 x I64 -> I32 */
1781 if (t1
== Ity_I64
&& t2
== Ity_I64
&& finalVty
== Ity_I32
) {
1782 if (0) VG_(printf
)("mkLazy2: I64 x I64 -> I32\n");
1783 at
= mkUifU(mce
, Ity_I64
, va1
, va2
);
1784 at
= mkPCastTo(mce
, Ity_I32
, at
);
1788 /* I32 x I32 -> I32 */
1789 if (t1
== Ity_I32
&& t2
== Ity_I32
&& finalVty
== Ity_I32
) {
1790 if (0) VG_(printf
)("mkLazy2: I32 x I32 -> I32\n");
1791 at
= mkUifU(mce
, Ity_I32
, va1
, va2
);
1792 at
= mkPCastTo(mce
, Ity_I32
, at
);
1797 VG_(printf
)("mkLazy2 ");
1806 /* General case: force everything via 32-bit intermediaries. */
1807 at
= mkPCastTo(mce
, Ity_I32
, va1
);
1808 at
= mkUifU(mce
, Ity_I32
, at
, mkPCastTo(mce
, Ity_I32
, va2
));
1809 at
= mkPCastTo(mce
, finalVty
, at
);
1814 /* 3-arg version of the above. */
1816 IRAtom
* mkLazy3 ( MCEnv
* mce
, IRType finalVty
,
1817 IRAtom
* va1
, IRAtom
* va2
, IRAtom
* va3
)
1820 IRType t1
= typeOfIRExpr(mce
->sb
->tyenv
, va1
);
1821 IRType t2
= typeOfIRExpr(mce
->sb
->tyenv
, va2
);
1822 IRType t3
= typeOfIRExpr(mce
->sb
->tyenv
, va3
);
1823 tl_assert(isShadowAtom(mce
,va1
));
1824 tl_assert(isShadowAtom(mce
,va2
));
1825 tl_assert(isShadowAtom(mce
,va3
));
1827 /* The general case is inefficient because PCast is an expensive
1828 operation. Here are some special cases which use PCast only
1829 twice rather than three times. */
1831 /* I32 x I64 x I64 -> I64 */
1832 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1833 if (t1
== Ity_I32
&& t2
== Ity_I64
&& t3
== Ity_I64
1834 && finalVty
== Ity_I64
) {
1835 if (0) VG_(printf
)("mkLazy3: I32 x I64 x I64 -> I64\n");
1836 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1837 mode indication which is fully defined, this should get
1838 folded out later. */
1839 at
= mkPCastTo(mce
, Ity_I64
, va1
);
1840 /* Now fold in 2nd and 3rd args. */
1841 at
= mkUifU(mce
, Ity_I64
, at
, va2
);
1842 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
1843 /* and PCast once again. */
1844 at
= mkPCastTo(mce
, Ity_I64
, at
);
1848 /* I32 x I8 x I64 -> I64 */
1849 if (t1
== Ity_I32
&& t2
== Ity_I8
&& t3
== Ity_I64
1850 && finalVty
== Ity_I64
) {
1851 if (0) VG_(printf
)("mkLazy3: I32 x I8 x I64 -> I64\n");
1852 /* Widen 1st and 2nd args to I64. Since 1st arg is typically a
1853 * rounding mode indication which is fully defined, this should
1854 * get folded out later.
1856 IRAtom
* at1
= mkPCastTo(mce
, Ity_I64
, va1
);
1857 IRAtom
* at2
= mkPCastTo(mce
, Ity_I64
, va2
);
1858 at
= mkUifU(mce
, Ity_I64
, at1
, at2
); // UifU(PCast(va1), PCast(va2))
1859 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
1860 /* and PCast once again. */
1861 at
= mkPCastTo(mce
, Ity_I64
, at
);
1865 /* I32 x I64 x I64 -> I32 */
1866 if (t1
== Ity_I32
&& t2
== Ity_I64
&& t3
== Ity_I64
1867 && finalVty
== Ity_I32
) {
1868 if (0) VG_(printf
)("mkLazy3: I32 x I64 x I64 -> I32\n");
1869 at
= mkPCastTo(mce
, Ity_I64
, va1
);
1870 at
= mkUifU(mce
, Ity_I64
, at
, va2
);
1871 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
1872 at
= mkPCastTo(mce
, Ity_I32
, at
);
1876 /* I32 x I32 x I32 -> I32 */
1877 /* 32-bit FP idiom, as (eg) happens on ARM */
1878 if (t1
== Ity_I32
&& t2
== Ity_I32
&& t3
== Ity_I32
1879 && finalVty
== Ity_I32
) {
1880 if (0) VG_(printf
)("mkLazy3: I32 x I32 x I32 -> I32\n");
1882 at
= mkUifU(mce
, Ity_I32
, at
, va2
);
1883 at
= mkUifU(mce
, Ity_I32
, at
, va3
);
1884 at
= mkPCastTo(mce
, Ity_I32
, at
);
1888 /* I32 x I128 x I128 -> I128 */
1889 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1890 if (t1
== Ity_I32
&& t2
== Ity_I128
&& t3
== Ity_I128
1891 && finalVty
== Ity_I128
) {
1892 if (0) VG_(printf
)("mkLazy3: I32 x I128 x I128 -> I128\n");
1893 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1894 mode indication which is fully defined, this should get
1895 folded out later. */
1896 at
= mkPCastTo(mce
, Ity_I128
, va1
);
1897 /* Now fold in 2nd and 3rd args. */
1898 at
= mkUifU(mce
, Ity_I128
, at
, va2
);
1899 at
= mkUifU(mce
, Ity_I128
, at
, va3
);
1900 /* and PCast once again. */
1901 at
= mkPCastTo(mce
, Ity_I128
, at
);
1905 /* I32 x I8 x I128 -> I128 */
1906 /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1907 if (t1
== Ity_I32
&& t2
== Ity_I8
&& t3
== Ity_I128
1908 && finalVty
== Ity_I128
) {
1909 if (0) VG_(printf
)("mkLazy3: I32 x I8 x I128 -> I128\n");
1910 /* Use I64 as an intermediate type, which means PCasting all 3
1911 args to I64 to start with. 1st arg is typically a rounding
1912 mode indication which is fully defined, so we hope that it
1913 will get folded out later. */
1914 IRAtom
* at1
= mkPCastTo(mce
, Ity_I64
, va1
);
1915 IRAtom
* at2
= mkPCastTo(mce
, Ity_I64
, va2
);
1916 IRAtom
* at3
= mkPCastTo(mce
, Ity_I64
, va3
);
1917 /* Now UifU all three together. */
1918 at
= mkUifU(mce
, Ity_I64
, at1
, at2
); // UifU(PCast(va1), PCast(va2))
1919 at
= mkUifU(mce
, Ity_I64
, at
, at3
); // ... `UifU` PCast(va3)
1920 /* and PCast once again. */
1921 at
= mkPCastTo(mce
, Ity_I128
, at
);
1925 VG_(printf
)("mkLazy3: ");
1931 VG_(printf
)(" -> ");
1937 /* General case: force everything via 32-bit intermediaries. */
1939 at = mkPCastTo(mce, Ity_I32, va1);
1940 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1941 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1942 at = mkPCastTo(mce, finalVty, at);
1948 /* 4-arg version of the above. */
1950 IRAtom
* mkLazy4 ( MCEnv
* mce
, IRType finalVty
,
1951 IRAtom
* va1
, IRAtom
* va2
, IRAtom
* va3
, IRAtom
* va4
)
1954 IRType t1
= typeOfIRExpr(mce
->sb
->tyenv
, va1
);
1955 IRType t2
= typeOfIRExpr(mce
->sb
->tyenv
, va2
);
1956 IRType t3
= typeOfIRExpr(mce
->sb
->tyenv
, va3
);
1957 IRType t4
= typeOfIRExpr(mce
->sb
->tyenv
, va4
);
1958 tl_assert(isShadowAtom(mce
,va1
));
1959 tl_assert(isShadowAtom(mce
,va2
));
1960 tl_assert(isShadowAtom(mce
,va3
));
1961 tl_assert(isShadowAtom(mce
,va4
));
1963 /* The general case is inefficient because PCast is an expensive
1964 operation. Here are some special cases which use PCast only
1965 twice rather than three times. */
1967 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1969 if (t1
== Ity_I32
&& t2
== Ity_I128
&& t3
== Ity_I128
&& t4
== Ity_I128
1970 && finalVty
== Ity_I128
) {
1971 if (0) VG_(printf
)("mkLazy4: I32 x I128 x I128 x I128 -> I128\n");
1972 /* Widen 1st arg to I128. Since 1st arg is typically a rounding
1973 mode indication which is fully defined, this should get
1974 folded out later. */
1975 at
= mkPCastTo(mce
, Ity_I128
, va1
);
1976 /* Now fold in 2nd, 3rd, 4th args. */
1977 at
= mkUifU(mce
, Ity_I128
, at
, va2
);
1978 at
= mkUifU(mce
, Ity_I128
, at
, va3
);
1979 at
= mkUifU(mce
, Ity_I128
, at
, va4
);
1980 /* and PCast once again. */
1981 at
= mkPCastTo(mce
, Ity_I128
, at
);
1985 /* I32 x I64 x I64 x I64 -> I64 */
1986 if (t1
== Ity_I32
&& t2
== Ity_I64
&& t3
== Ity_I64
&& t4
== Ity_I64
1987 && finalVty
== Ity_I64
) {
1988 if (0) VG_(printf
)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1989 /* Widen 1st arg to I64. Since 1st arg is typically a rounding
1990 mode indication which is fully defined, this should get
1991 folded out later. */
1992 at
= mkPCastTo(mce
, Ity_I64
, va1
);
1993 /* Now fold in 2nd, 3rd, 4th args. */
1994 at
= mkUifU(mce
, Ity_I64
, at
, va2
);
1995 at
= mkUifU(mce
, Ity_I64
, at
, va3
);
1996 at
= mkUifU(mce
, Ity_I64
, at
, va4
);
1997 /* and PCast once again. */
1998 at
= mkPCastTo(mce
, Ity_I64
, at
);
2001 /* I32 x I32 x I32 x I32 -> I32 */
2002 /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
2003 if (t1
== Ity_I32
&& t2
== Ity_I32
&& t3
== Ity_I32
&& t4
== Ity_I32
2004 && finalVty
== Ity_I32
) {
2005 if (0) VG_(printf
)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
2007 /* Now fold in 2nd, 3rd, 4th args. */
2008 at
= mkUifU(mce
, Ity_I32
, at
, va2
);
2009 at
= mkUifU(mce
, Ity_I32
, at
, va3
);
2010 at
= mkUifU(mce
, Ity_I32
, at
, va4
);
2011 at
= mkPCastTo(mce
, Ity_I32
, at
);
2015 if (t1
== Ity_I32
&& t2
== Ity_I8
&& t3
== Ity_I8
&& t4
== Ity_I8
2016 && finalVty
== Ity_I32
) {
2017 if (0) VG_(printf
)("mkLazy4: I32 x I8 x I8 x I8 -> I32\n");
2018 at
= mkPCastTo(mce
, Ity_I8
, va1
);
2019 /* Now fold in 2nd, 3rd, 4th args. */
2020 at
= mkUifU(mce
, Ity_I8
, at
, va2
);
2021 at
= mkUifU(mce
, Ity_I8
, at
, va3
);
2022 at
= mkUifU(mce
, Ity_I8
, at
, va4
);
2023 at
= mkPCastTo(mce
, Ity_I32
, at
);
2027 if (t1
== Ity_I64
&& t2
== Ity_I8
&& t3
== Ity_I8
&& t4
== Ity_I8
2028 && finalVty
== Ity_I64
) {
2029 if (0) VG_(printf
)("mkLazy4: I64 x I8 x I8 x I8 -> I64\n");
2030 at
= mkPCastTo(mce
, Ity_I8
, va1
);
2031 /* Now fold in 2nd, 3rd, 4th args. */
2032 at
= mkUifU(mce
, Ity_I8
, at
, va2
);
2033 at
= mkUifU(mce
, Ity_I8
, at
, va3
);
2034 at
= mkUifU(mce
, Ity_I8
, at
, va4
);
2035 at
= mkPCastTo(mce
, Ity_I64
, at
);
2040 VG_(printf
)("mkLazy4: ");
2048 VG_(printf
)(" -> ");
2057 /* Do the lazy propagation game from a null-terminated vector of
2058 atoms. This is presumably the arguments to a helper call, so the
2059 IRCallee info is also supplied in order that we can know which
2060 arguments should be ignored (via the .mcx_mask field).
2063 IRAtom
* mkLazyN ( MCEnv
* mce
,
2064 IRAtom
** exprvec
, IRType finalVtype
, IRCallee
* cee
)
2070 Bool mergeTy64
= True
;
2072 /* Decide on the type of the merge intermediary. If all relevant
2073 args are I64, then it's I64. In all other circumstances, use
2075 for (i
= 0; exprvec
[i
]; i
++) {
2077 tl_assert(isOriginalAtom(mce
, exprvec
[i
]));
2078 if (cee
->mcx_mask
& (1<<i
))
2080 if (typeOfIRExpr(mce
->sb
->tyenv
, exprvec
[i
]) != Ity_I64
)
2084 mergeTy
= mergeTy64
? Ity_I64
: Ity_I32
;
2085 curr
= definedOfType(mergeTy
);
2087 for (i
= 0; exprvec
[i
]; i
++) {
2089 tl_assert(isOriginalAtom(mce
, exprvec
[i
]));
2090 /* Only take notice of this arg if the callee's mc-exclusion
2091 mask does not say it is to be excluded. */
2092 if (cee
->mcx_mask
& (1<<i
)) {
2093 /* the arg is to be excluded from definedness checking. Do
2095 if (0) VG_(printf
)("excluding %s(%d)\n", cee
->name
, i
);
2097 /* calculate the arg's definedness, and pessimistically merge
2099 here
= mkPCastTo( mce
, mergeTy
, expr2vbits(mce
, exprvec
[i
], HuOth
) );
2101 ? mkUifU64(mce
, here
, curr
)
2102 : mkUifU32(mce
, here
, curr
);
2105 return mkPCastTo(mce
, finalVtype
, curr
);
2109 /*------------------------------------------------------------*/
2110 /*--- Generating expensive sequences for exact carry-chain ---*/
2111 /*--- propagation in add/sub and related operations. ---*/
2112 /*------------------------------------------------------------*/
2115 IRAtom
* expensiveAddSub ( MCEnv
* mce
,
2118 IRAtom
* qaa
, IRAtom
* qbb
,
2119 IRAtom
* aa
, IRAtom
* bb
)
2121 IRAtom
*a_min
, *b_min
, *a_max
, *b_max
;
2122 IROp opAND
, opOR
, opXOR
, opNOT
, opADD
, opSUB
;
2124 tl_assert(isShadowAtom(mce
,qaa
));
2125 tl_assert(isShadowAtom(mce
,qbb
));
2126 tl_assert(isOriginalAtom(mce
,aa
));
2127 tl_assert(isOriginalAtom(mce
,bb
));
2128 tl_assert(sameKindedAtoms(qaa
,aa
));
2129 tl_assert(sameKindedAtoms(qbb
,bb
));
2149 VG_(tool_panic
)("expensiveAddSub");
2152 // a_min = aa & ~qaa
2153 a_min
= assignNew('V', mce
,ty
,
2155 assignNew('V', mce
,ty
, unop(opNOT
, qaa
))));
2157 // b_min = bb & ~qbb
2158 b_min
= assignNew('V', mce
,ty
,
2160 assignNew('V', mce
,ty
, unop(opNOT
, qbb
))));
2163 a_max
= assignNew('V', mce
,ty
, binop(opOR
, aa
, qaa
));
2166 b_max
= assignNew('V', mce
,ty
, binop(opOR
, bb
, qbb
));
2169 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
2171 assignNew('V', mce
,ty
,
2173 assignNew('V', mce
,ty
, binop(opOR
, qaa
, qbb
)),
2174 assignNew('V', mce
,ty
,
2176 assignNew('V', mce
,ty
, binop(opADD
, a_min
, b_min
)),
2177 assignNew('V', mce
,ty
, binop(opADD
, a_max
, b_max
))
2183 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max - b_min))
2185 assignNew('V', mce
,ty
,
2187 assignNew('V', mce
,ty
, binop(opOR
, qaa
, qbb
)),
2188 assignNew('V', mce
,ty
,
2190 assignNew('V', mce
,ty
, binop(opSUB
, a_min
, b_max
)),
2191 assignNew('V', mce
,ty
, binop(opSUB
, a_max
, b_min
))
2202 IRAtom
* expensiveCountTrailingZeroes ( MCEnv
* mce
, IROp czop
,
2203 IRAtom
* atom
, IRAtom
* vatom
)
2206 IROp xorOp
, subOp
, andOp
;
2208 IRAtom
*improver
, *improved
;
2209 tl_assert(isShadowAtom(mce
,vatom
));
2210 tl_assert(isOriginalAtom(mce
,atom
));
2211 tl_assert(sameKindedAtoms(atom
,vatom
));
2230 VG_(tool_panic
)("memcheck:expensiveCountTrailingZeroes");
2233 // improver = atom ^ (atom - 1)
2235 // That is, improver has its low ctz(atom) bits equal to one;
2236 // higher bits (if any) equal to zero.
2237 improver
= assignNew('V', mce
,ty
,
2240 assignNew('V', mce
, ty
,
2241 binop(subOp
, atom
, one
))));
2243 // improved = vatom & improver
2245 // That is, treat any V bits above the first ctz(atom) bits as
2247 improved
= assignNew('V', mce
, ty
,
2248 binop(andOp
, vatom
, improver
));
2250 // Return pessimizing cast of improved.
2251 return mkPCastTo(mce
, ty
, improved
);
2255 /*------------------------------------------------------------*/
2256 /*--- Scalar shifts. ---*/
2257 /*------------------------------------------------------------*/
2259 /* Produce an interpretation for (aa << bb) (or >>s, >>u). The basic
2260 idea is to shift the definedness bits by the original shift amount.
2261 This introduces 0s ("defined") in new positions for left shifts and
2262 unsigned right shifts, and copies the top definedness bit for
2263 signed right shifts. So, conveniently, applying the original shift
2264 operator to the definedness bits for the left arg is exactly the
2269 However if the shift amount is undefined then the whole result
2270 is undefined. Hence need:
2272 (qaa << bb) `UifU` PCast(qbb)
2274 If the shift amount bb is a literal than qbb will say 'all defined'
2275 and the UifU and PCast will get folded out by post-instrumentation
2278 static IRAtom
* scalarShift ( MCEnv
* mce
,
2281 IRAtom
* qaa
, IRAtom
* qbb
,
2282 IRAtom
* aa
, IRAtom
* bb
)
2284 tl_assert(isShadowAtom(mce
,qaa
));
2285 tl_assert(isShadowAtom(mce
,qbb
));
2286 tl_assert(isOriginalAtom(mce
,aa
));
2287 tl_assert(isOriginalAtom(mce
,bb
));
2288 tl_assert(sameKindedAtoms(qaa
,aa
));
2289 tl_assert(sameKindedAtoms(qbb
,bb
));
2294 assignNew('V', mce
, ty
, binop(original_op
, qaa
, bb
)),
2295 mkPCastTo(mce
, ty
, qbb
)
2301 /*------------------------------------------------------------*/
2302 /*--- Helpers for dealing with vector primops. ---*/
2303 /*------------------------------------------------------------*/
2305 /* Vector pessimisation -- pessimise within each lane individually. */
2307 static IRAtom
* mkPCast8x16 ( MCEnv
* mce
, IRAtom
* at
)
2309 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ8x16
, at
));
2312 static IRAtom
* mkPCast16x8 ( MCEnv
* mce
, IRAtom
* at
)
2314 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ16x8
, at
));
2317 static IRAtom
* mkPCast32x4 ( MCEnv
* mce
, IRAtom
* at
)
2319 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ32x4
, at
));
2322 static IRAtom
* mkPCast64x2 ( MCEnv
* mce
, IRAtom
* at
)
2324 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ64x2
, at
));
2327 static IRAtom
* mkPCast128x1 ( MCEnv
* mce
, IRAtom
* at
)
2329 return assignNew('V', mce
, Ity_V128
, unop(Iop_CmpNEZ128x1
, at
));
2332 static IRAtom
* mkPCast64x4 ( MCEnv
* mce
, IRAtom
* at
)
2334 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ64x4
, at
));
2337 static IRAtom
* mkPCast32x8 ( MCEnv
* mce
, IRAtom
* at
)
2339 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ32x8
, at
));
2342 static IRAtom
* mkPCast32x2 ( MCEnv
* mce
, IRAtom
* at
)
2344 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpNEZ32x2
, at
));
2347 static IRAtom
* mkPCast16x16 ( MCEnv
* mce
, IRAtom
* at
)
2349 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ16x16
, at
));
2352 static IRAtom
* mkPCast16x4 ( MCEnv
* mce
, IRAtom
* at
)
2354 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpNEZ16x4
, at
));
2357 static IRAtom
* mkPCast8x32 ( MCEnv
* mce
, IRAtom
* at
)
2359 return assignNew('V', mce
, Ity_V256
, unop(Iop_CmpNEZ8x32
, at
));
2362 static IRAtom
* mkPCast8x8 ( MCEnv
* mce
, IRAtom
* at
)
2364 return assignNew('V', mce
, Ity_I64
, unop(Iop_CmpNEZ8x8
, at
));
2367 static IRAtom
* mkPCast16x2 ( MCEnv
* mce
, IRAtom
* at
)
2369 return assignNew('V', mce
, Ity_I32
, unop(Iop_CmpNEZ16x2
, at
));
2372 static IRAtom
* mkPCast8x4 ( MCEnv
* mce
, IRAtom
* at
)
2374 return assignNew('V', mce
, Ity_I32
, unop(Iop_CmpNEZ8x4
, at
));
2378 /* Here's a simple scheme capable of handling ops derived from SSE1
2379 code and while only generating ops that can be efficiently
2380 implemented in SSE1. */
2382 /* All-lanes versions are straightforward:
2384 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#))
2386 unary32Fx4(x,y) ==> PCast32x4(x#)
2388 Lowest-lane-only versions are more complex:
2390 binary32F0x4(x,y) ==> SetV128lo32(
2392 PCast32(V128to32(UifUV128(x#,y#)))
2395 This is perhaps not so obvious. In particular, it's faster to
2396 do a V128-bit UifU and then take the bottom 32 bits than the more
2397 obvious scheme of taking the bottom 32 bits of each operand
2398 and doing a 32-bit UifU. Basically since UifU is fast and
2399 chopping lanes off vector values is slow.
2403 unary32F0x4(x) ==> SetV128lo32(
2405 PCast32(V128to32(x#))
2410 PCast32(v#) = 1Sto32(CmpNE32(v#,0))
2411 PCast32x4(v#) = CmpNEZ32x4(v#)
2415 IRAtom
* binary32Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2418 tl_assert(isShadowAtom(mce
, vatomX
));
2419 tl_assert(isShadowAtom(mce
, vatomY
));
2420 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2421 at
= assignNew('V', mce
, Ity_V128
, mkPCast32x4(mce
, at
));
2426 IRAtom
* unary32Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
)
2429 tl_assert(isShadowAtom(mce
, vatomX
));
2430 at
= assignNew('V', mce
, Ity_V128
, mkPCast32x4(mce
, vatomX
));
2435 IRAtom
* binary32F0x4 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2438 tl_assert(isShadowAtom(mce
, vatomX
));
2439 tl_assert(isShadowAtom(mce
, vatomY
));
2440 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2441 at
= assignNew('V', mce
, Ity_I32
, unop(Iop_V128to32
, at
));
2442 at
= mkPCastTo(mce
, Ity_I32
, at
);
2443 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo32
, vatomX
, at
));
2448 IRAtom
* unary32F0x4 ( MCEnv
* mce
, IRAtom
* vatomX
)
2451 tl_assert(isShadowAtom(mce
, vatomX
));
2452 at
= assignNew('V', mce
, Ity_I32
, unop(Iop_V128to32
, vatomX
));
2453 at
= mkPCastTo(mce
, Ity_I32
, at
);
2454 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo32
, vatomX
, at
));
2458 /* --- ... and ... 64Fx2 versions of the same ... --- */
2461 IRAtom
* binary64Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2464 tl_assert(isShadowAtom(mce
, vatomX
));
2465 tl_assert(isShadowAtom(mce
, vatomY
));
2466 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2467 at
= assignNew('V', mce
, Ity_V128
, mkPCast64x2(mce
, at
));
2472 IRAtom
* unary64Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
)
2475 tl_assert(isShadowAtom(mce
, vatomX
));
2476 at
= assignNew('V', mce
, Ity_V128
, mkPCast64x2(mce
, vatomX
));
2481 IRAtom
* binary64F0x2 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2484 tl_assert(isShadowAtom(mce
, vatomX
));
2485 tl_assert(isShadowAtom(mce
, vatomY
));
2486 at
= mkUifUV128(mce
, vatomX
, vatomY
);
2487 at
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, at
));
2488 at
= mkPCastTo(mce
, Ity_I64
, at
);
2489 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo64
, vatomX
, at
));
2494 IRAtom
* unary64F0x2 ( MCEnv
* mce
, IRAtom
* vatomX
)
2497 tl_assert(isShadowAtom(mce
, vatomX
));
2498 at
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, vatomX
));
2499 at
= mkPCastTo(mce
, Ity_I64
, at
);
2500 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SetV128lo64
, vatomX
, at
));
2504 /* --- --- ... and ... 32Fx2 versions of the same --- --- */
2507 IRAtom
* binary32Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2510 tl_assert(isShadowAtom(mce
, vatomX
));
2511 tl_assert(isShadowAtom(mce
, vatomY
));
2512 at
= mkUifU64(mce
, vatomX
, vatomY
);
2513 at
= assignNew('V', mce
, Ity_I64
, mkPCast32x2(mce
, at
));
2518 IRAtom
* unary32Fx2 ( MCEnv
* mce
, IRAtom
* vatomX
)
2521 tl_assert(isShadowAtom(mce
, vatomX
));
2522 at
= assignNew('V', mce
, Ity_I64
, mkPCast32x2(mce
, vatomX
));
2526 /* --- ... and ... 64Fx4 versions of the same ... --- */
2529 IRAtom
* binary64Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2532 tl_assert(isShadowAtom(mce
, vatomX
));
2533 tl_assert(isShadowAtom(mce
, vatomY
));
2534 at
= mkUifUV256(mce
, vatomX
, vatomY
);
2535 at
= assignNew('V', mce
, Ity_V256
, mkPCast64x4(mce
, at
));
2540 IRAtom
* unary64Fx4 ( MCEnv
* mce
, IRAtom
* vatomX
)
2543 tl_assert(isShadowAtom(mce
, vatomX
));
2544 at
= assignNew('V', mce
, Ity_V256
, mkPCast64x4(mce
, vatomX
));
2548 /* --- ... and ... 32Fx8 versions of the same ... --- */
2551 IRAtom
* binary32Fx8 ( MCEnv
* mce
, IRAtom
* vatomX
, IRAtom
* vatomY
)
2554 tl_assert(isShadowAtom(mce
, vatomX
));
2555 tl_assert(isShadowAtom(mce
, vatomY
));
2556 at
= mkUifUV256(mce
, vatomX
, vatomY
);
2557 at
= assignNew('V', mce
, Ity_V256
, mkPCast32x8(mce
, at
));
2562 IRAtom
* unary32Fx8 ( MCEnv
* mce
, IRAtom
* vatomX
)
2565 tl_assert(isShadowAtom(mce
, vatomX
));
2566 at
= assignNew('V', mce
, Ity_V256
, mkPCast32x8(mce
, vatomX
));
2570 /* --- 64Fx2 binary FP ops, with rounding mode --- */
2573 IRAtom
* binary64Fx2_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2574 IRAtom
* vatomX
, IRAtom
* vatomY
)
2576 /* This is the same as binary64Fx2, except that we subsequently
2577 pessimise vRM (definedness of the rounding mode), widen to 128
2578 bits and UifU it into the result. As with the scalar cases, if
2579 the RM is a constant then it is defined and so this extra bit
2580 will get constant-folded out later. */
2581 // "do" the vector args
2582 IRAtom
* t1
= binary64Fx2(mce
, vatomX
, vatomY
);
2583 // PCast the RM, and widen it to 128 bits
2584 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2585 // Roll it into the result
2586 t1
= mkUifUV128(mce
, t1
, t2
);
2590 /* --- ... and ... 32Fx4 versions of the same --- */
2593 IRAtom
* binary32Fx4_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2594 IRAtom
* vatomX
, IRAtom
* vatomY
)
2596 IRAtom
* t1
= binary32Fx4(mce
, vatomX
, vatomY
);
2597 // PCast the RM, and widen it to 128 bits
2598 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2599 // Roll it into the result
2600 t1
= mkUifUV128(mce
, t1
, t2
);
2604 /* --- ... and ... 64Fx4 versions of the same --- */
2607 IRAtom
* binary64Fx4_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2608 IRAtom
* vatomX
, IRAtom
* vatomY
)
2610 IRAtom
* t1
= binary64Fx4(mce
, vatomX
, vatomY
);
2611 // PCast the RM, and widen it to 256 bits
2612 IRAtom
* t2
= mkPCastTo(mce
, Ity_V256
, vRM
);
2613 // Roll it into the result
2614 t1
= mkUifUV256(mce
, t1
, t2
);
2618 /* --- ... and ... 32Fx8 versions of the same --- */
2621 IRAtom
* binary32Fx8_w_rm ( MCEnv
* mce
, IRAtom
* vRM
,
2622 IRAtom
* vatomX
, IRAtom
* vatomY
)
2624 IRAtom
* t1
= binary32Fx8(mce
, vatomX
, vatomY
);
2625 // PCast the RM, and widen it to 256 bits
2626 IRAtom
* t2
= mkPCastTo(mce
, Ity_V256
, vRM
);
2627 // Roll it into the result
2628 t1
= mkUifUV256(mce
, t1
, t2
);
2632 /* --- 64Fx2 unary FP ops, with rounding mode --- */
2635 IRAtom
* unary64Fx2_w_rm ( MCEnv
* mce
, IRAtom
* vRM
, IRAtom
* vatomX
)
2637 /* Same scheme as binary64Fx2_w_rm. */
2638 // "do" the vector arg
2639 IRAtom
* t1
= unary64Fx2(mce
, vatomX
);
2640 // PCast the RM, and widen it to 128 bits
2641 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2642 // Roll it into the result
2643 t1
= mkUifUV128(mce
, t1
, t2
);
2647 /* --- ... and ... 32Fx4 versions of the same --- */
2650 IRAtom
* unary32Fx4_w_rm ( MCEnv
* mce
, IRAtom
* vRM
, IRAtom
* vatomX
)
2652 /* Same scheme as unary32Fx4_w_rm. */
2653 IRAtom
* t1
= unary32Fx4(mce
, vatomX
);
2654 // PCast the RM, and widen it to 128 bits
2655 IRAtom
* t2
= mkPCastTo(mce
, Ity_V128
, vRM
);
2656 // Roll it into the result
2657 t1
= mkUifUV128(mce
, t1
, t2
);
2662 /* --- --- Vector saturated narrowing --- --- */
2664 /* We used to do something very clever here, but on closer inspection
2665 (2011-Jun-15), and in particular bug #279698, it turns out to be
2666 wrong. Part of the problem came from the fact that for a long
2667 time, the IR primops to do with saturated narrowing were
2668 underspecified and managed to confuse multiple cases which needed
2669 to be separate: the op names had a signedness qualifier, but in
2670 fact the source and destination signednesses needed to be specified
2671 independently, so the op names really need two independent
2672 signedness specifiers.
2674 As of 2011-Jun-15 (ish) the underspecification was sorted out
2675 properly. The incorrect instrumentation remained, though. That
2676 has now (2011-Oct-22) been fixed.
2678 What we now do is simple:
2680 Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2681 number of lanes, X is the source lane width and signedness, and Y
2682 is the destination lane width and signedness. In all cases the
2683 destination lane width is half the source lane width, so the names
2684 have a bit of redundancy, but are at least easy to read.
2686 For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2689 Let Vanilla(OP) be a function that takes OP, one of these
2690 saturating narrowing ops, and produces the same "shaped" narrowing
2691 op which is not saturating, but merely dumps the most significant
2692 bits. "same shape" means that the lane numbers and widths are the
2695 For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2696 = Iop_NarrowBin32to16x8,
2697 that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2698 dumping the top half of each lane.
2700 So, with that in place, the scheme is simple, and it is simple to
2701 pessimise each lane individually and then apply Vanilla(OP) so as
2702 to get the result in the right "shape". If the original OP is
2703 QNarrowBinXtoYxZ then we produce
2705 Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2707 or for the case when OP is unary (Iop_QNarrowUn*)
2709 Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2712 IROp
vanillaNarrowingOpOfShape ( IROp qnarrowOp
)
2714 switch (qnarrowOp
) {
2715 /* Binary: (128, 128) -> 128 */
2716 case Iop_QNarrowBin16Sto8Ux16
:
2717 case Iop_QNarrowBin16Sto8Sx16
:
2718 case Iop_QNarrowBin16Uto8Ux16
:
2719 case Iop_QNarrowBin64Sto32Sx4
:
2720 case Iop_QNarrowBin64Uto32Ux4
:
2721 return Iop_NarrowBin16to8x16
;
2722 case Iop_QNarrowBin32Sto16Ux8
:
2723 case Iop_QNarrowBin32Sto16Sx8
:
2724 case Iop_QNarrowBin32Uto16Ux8
:
2725 return Iop_NarrowBin32to16x8
;
2726 /* Binary: (64, 64) -> 64 */
2727 case Iop_QNarrowBin32Sto16Sx4
:
2728 return Iop_NarrowBin32to16x4
;
2729 case Iop_QNarrowBin16Sto8Ux8
:
2730 case Iop_QNarrowBin16Sto8Sx8
:
2731 return Iop_NarrowBin16to8x8
;
2732 /* Unary: 128 -> 64 */
2733 case Iop_QNarrowUn64Uto32Ux2
:
2734 case Iop_QNarrowUn64Sto32Sx2
:
2735 case Iop_QNarrowUn64Sto32Ux2
:
2736 return Iop_NarrowUn64to32x2
;
2737 case Iop_QNarrowUn32Uto16Ux4
:
2738 case Iop_QNarrowUn32Sto16Sx4
:
2739 case Iop_QNarrowUn32Sto16Ux4
:
2740 case Iop_F32toF16x4
:
2741 return Iop_NarrowUn32to16x4
;
2742 case Iop_QNarrowUn16Uto8Ux8
:
2743 case Iop_QNarrowUn16Sto8Sx8
:
2744 case Iop_QNarrowUn16Sto8Ux8
:
2745 return Iop_NarrowUn16to8x8
;
2748 VG_(tool_panic
)("vanillaNarrowOpOfShape");
2753 IRAtom
* vectorNarrowBinV128 ( MCEnv
* mce
, IROp narrow_op
,
2754 IRAtom
* vatom1
, IRAtom
* vatom2
)
2756 IRAtom
*at1
, *at2
, *at3
;
2757 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
2758 switch (narrow_op
) {
2759 case Iop_QNarrowBin64Sto32Sx4
: pcast
= mkPCast32x4
; break;
2760 case Iop_QNarrowBin64Uto32Ux4
: pcast
= mkPCast32x4
; break;
2761 case Iop_QNarrowBin32Sto16Sx8
: pcast
= mkPCast32x4
; break;
2762 case Iop_QNarrowBin32Uto16Ux8
: pcast
= mkPCast32x4
; break;
2763 case Iop_QNarrowBin32Sto16Ux8
: pcast
= mkPCast32x4
; break;
2764 case Iop_QNarrowBin16Sto8Sx16
: pcast
= mkPCast16x8
; break;
2765 case Iop_QNarrowBin16Uto8Ux16
: pcast
= mkPCast16x8
; break;
2766 case Iop_QNarrowBin16Sto8Ux16
: pcast
= mkPCast16x8
; break;
2767 default: VG_(tool_panic
)("vectorNarrowBinV128");
2769 IROp vanilla_narrow
= vanillaNarrowingOpOfShape(narrow_op
);
2770 tl_assert(isShadowAtom(mce
,vatom1
));
2771 tl_assert(isShadowAtom(mce
,vatom2
));
2772 at1
= assignNew('V', mce
, Ity_V128
, pcast(mce
, vatom1
));
2773 at2
= assignNew('V', mce
, Ity_V128
, pcast(mce
, vatom2
));
2774 at3
= assignNew('V', mce
, Ity_V128
, binop(vanilla_narrow
, at1
, at2
));
2779 IRAtom
* vectorNarrowBin64 ( MCEnv
* mce
, IROp narrow_op
,
2780 IRAtom
* vatom1
, IRAtom
* vatom2
)
2782 IRAtom
*at1
, *at2
, *at3
;
2783 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
2784 switch (narrow_op
) {
2785 case Iop_QNarrowBin32Sto16Sx4
: pcast
= mkPCast32x2
; break;
2786 case Iop_QNarrowBin16Sto8Sx8
: pcast
= mkPCast16x4
; break;
2787 case Iop_QNarrowBin16Sto8Ux8
: pcast
= mkPCast16x4
; break;
2788 default: VG_(tool_panic
)("vectorNarrowBin64");
2790 IROp vanilla_narrow
= vanillaNarrowingOpOfShape(narrow_op
);
2791 tl_assert(isShadowAtom(mce
,vatom1
));
2792 tl_assert(isShadowAtom(mce
,vatom2
));
2793 at1
= assignNew('V', mce
, Ity_I64
, pcast(mce
, vatom1
));
2794 at2
= assignNew('V', mce
, Ity_I64
, pcast(mce
, vatom2
));
2795 at3
= assignNew('V', mce
, Ity_I64
, binop(vanilla_narrow
, at1
, at2
));
2800 IRAtom
* vectorNarrowUnV128 ( MCEnv
* mce
, IROp narrow_op
,
2804 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
2805 tl_assert(isShadowAtom(mce
,vatom1
));
2806 /* For vanilla narrowing (non-saturating), we can just apply
2807 the op directly to the V bits. */
2808 switch (narrow_op
) {
2809 case Iop_NarrowUn16to8x8
:
2810 case Iop_NarrowUn32to16x4
:
2811 case Iop_NarrowUn64to32x2
:
2812 case Iop_F32toF16x4
:
2813 at1
= assignNew('V', mce
, Ity_I64
, unop(narrow_op
, vatom1
));
2816 break; /* Do Plan B */
2818 /* Plan B: for ops that involve a saturation operation on the args,
2819 we must PCast before the vanilla narrow. */
2820 switch (narrow_op
) {
2821 case Iop_QNarrowUn16Sto8Sx8
: pcast
= mkPCast16x8
; break;
2822 case Iop_QNarrowUn16Sto8Ux8
: pcast
= mkPCast16x8
; break;
2823 case Iop_QNarrowUn16Uto8Ux8
: pcast
= mkPCast16x8
; break;
2824 case Iop_QNarrowUn32Sto16Sx4
: pcast
= mkPCast32x4
; break;
2825 case Iop_QNarrowUn32Sto16Ux4
: pcast
= mkPCast32x4
; break;
2826 case Iop_QNarrowUn32Uto16Ux4
: pcast
= mkPCast32x4
; break;
2827 case Iop_QNarrowUn64Sto32Sx2
: pcast
= mkPCast64x2
; break;
2828 case Iop_QNarrowUn64Sto32Ux2
: pcast
= mkPCast64x2
; break;
2829 case Iop_QNarrowUn64Uto32Ux2
: pcast
= mkPCast64x2
; break;
2830 default: VG_(tool_panic
)("vectorNarrowUnV128");
2832 IROp vanilla_narrow
= vanillaNarrowingOpOfShape(narrow_op
);
2833 at1
= assignNew('V', mce
, Ity_V128
, pcast(mce
, vatom1
));
2834 at2
= assignNew('V', mce
, Ity_I64
, unop(vanilla_narrow
, at1
));
2839 IRAtom
* vectorWidenI64 ( MCEnv
* mce
, IROp longen_op
,
2843 IRAtom
* (*pcast
)( MCEnv
*, IRAtom
* );
2844 switch (longen_op
) {
2845 case Iop_Widen8Uto16x8
: pcast
= mkPCast16x8
; break;
2846 case Iop_Widen8Sto16x8
: pcast
= mkPCast16x8
; break;
2847 case Iop_Widen16Uto32x4
: pcast
= mkPCast32x4
; break;
2848 case Iop_Widen16Sto32x4
: pcast
= mkPCast32x4
; break;
2849 case Iop_Widen32Uto64x2
: pcast
= mkPCast64x2
; break;
2850 case Iop_Widen32Sto64x2
: pcast
= mkPCast64x2
; break;
2851 case Iop_F16toF32x4
: pcast
= mkPCast32x4
; break;
2852 default: VG_(tool_panic
)("vectorWidenI64");
2854 tl_assert(isShadowAtom(mce
,vatom1
));
2855 at1
= assignNew('V', mce
, Ity_V128
, unop(longen_op
, vatom1
));
2856 at2
= assignNew('V', mce
, Ity_V128
, pcast(mce
, at1
));
2861 /* --- --- Vector integer arithmetic --- --- */
2863 /* Simple ... UifU the args and per-lane pessimise the results. */
2865 /* --- V256-bit versions --- */
2868 IRAtom
* binary8Ix32 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2871 at
= mkUifUV256(mce
, vatom1
, vatom2
);
2872 at
= mkPCast8x32(mce
, at
);
2877 IRAtom
* binary16Ix16 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2880 at
= mkUifUV256(mce
, vatom1
, vatom2
);
2881 at
= mkPCast16x16(mce
, at
);
2886 IRAtom
* binary32Ix8 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2889 at
= mkUifUV256(mce
, vatom1
, vatom2
);
2890 at
= mkPCast32x8(mce
, at
);
2895 IRAtom
* binary64Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2898 at
= mkUifUV256(mce
, vatom1
, vatom2
);
2899 at
= mkPCast64x4(mce
, at
);
2903 /* --- V128-bit versions --- */
2906 IRAtom
* binary8Ix16 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2909 at
= mkUifUV128(mce
, vatom1
, vatom2
);
2910 at
= mkPCast8x16(mce
, at
);
2915 IRAtom
* binary16Ix8 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2918 at
= mkUifUV128(mce
, vatom1
, vatom2
);
2919 at
= mkPCast16x8(mce
, at
);
2924 IRAtom
* binary32Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2927 at
= mkUifUV128(mce
, vatom1
, vatom2
);
2928 at
= mkPCast32x4(mce
, at
);
2933 IRAtom
* binary64Ix2 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2936 at
= mkUifUV128(mce
, vatom1
, vatom2
);
2937 at
= mkPCast64x2(mce
, at
);
2942 IRAtom
* binary128Ix1 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2945 at
= mkUifUV128(mce
, vatom1
, vatom2
);
2946 at
= mkPCast128x1(mce
, at
);
2950 /* --- 64-bit versions --- */
2953 IRAtom
* binary8Ix8 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2956 at
= mkUifU64(mce
, vatom1
, vatom2
);
2957 at
= mkPCast8x8(mce
, at
);
2962 IRAtom
* binary16Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2965 at
= mkUifU64(mce
, vatom1
, vatom2
);
2966 at
= mkPCast16x4(mce
, at
);
2971 IRAtom
* binary32Ix2 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2974 at
= mkUifU64(mce
, vatom1
, vatom2
);
2975 at
= mkPCast32x2(mce
, at
);
2980 IRAtom
* binary64Ix1 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2983 at
= mkUifU64(mce
, vatom1
, vatom2
);
2984 at
= mkPCastTo(mce
, Ity_I64
, at
);
2988 /* --- 32-bit versions --- */
2991 IRAtom
* binary8Ix4 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
2994 at
= mkUifU32(mce
, vatom1
, vatom2
);
2995 at
= mkPCast8x4(mce
, at
);
3000 IRAtom
* binary16Ix2 ( MCEnv
* mce
, IRAtom
* vatom1
, IRAtom
* vatom2
)
3003 at
= mkUifU32(mce
, vatom1
, vatom2
);
3004 at
= mkPCast16x2(mce
, at
);
3009 /*------------------------------------------------------------*/
3010 /*--- Generate shadow values from all kinds of IRExprs. ---*/
3011 /*------------------------------------------------------------*/
3014 IRAtom
* expr2vbits_Qop ( MCEnv
* mce
,
3016 IRAtom
* atom1
, IRAtom
* atom2
,
3017 IRAtom
* atom3
, IRAtom
* atom4
)
3019 IRAtom
* vatom1
= expr2vbits( mce
, atom1
, HuOth
);
3020 IRAtom
* vatom2
= expr2vbits( mce
, atom2
, HuOth
);
3021 IRAtom
* vatom3
= expr2vbits( mce
, atom3
, HuOth
);
3022 IRAtom
* vatom4
= expr2vbits( mce
, atom4
, HuOth
);
3024 tl_assert(isOriginalAtom(mce
,atom1
));
3025 tl_assert(isOriginalAtom(mce
,atom2
));
3026 tl_assert(isOriginalAtom(mce
,atom3
));
3027 tl_assert(isOriginalAtom(mce
,atom4
));
3028 tl_assert(isShadowAtom(mce
,vatom1
));
3029 tl_assert(isShadowAtom(mce
,vatom2
));
3030 tl_assert(isShadowAtom(mce
,vatom3
));
3031 tl_assert(isShadowAtom(mce
,vatom4
));
3032 tl_assert(sameKindedAtoms(atom1
,vatom1
));
3033 tl_assert(sameKindedAtoms(atom2
,vatom2
));
3034 tl_assert(sameKindedAtoms(atom3
,vatom3
));
3035 tl_assert(sameKindedAtoms(atom4
,vatom4
));
3038 case Iop_MAddF64r32
:
3040 case Iop_MSubF64r32
:
3041 /* I32(rm) x F64 x F64 x F64 -> F64 */
3042 return mkLazy4(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
, vatom4
);
3046 /* I32(rm) x F32 x F32 x F32 -> F32 */
3047 return mkLazy4(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
, vatom4
);
3051 case Iop_NegMAddF128
:
3052 case Iop_NegMSubF128
:
3053 /* I32(rm) x F128 x F128 x F128 -> F128 */
3054 return mkLazy4(mce
, Ity_I128
, vatom1
, vatom2
, vatom3
, vatom4
);
3056 /* V256-bit data-steering */
3057 case Iop_64x4toV256
:
3058 return assignNew('V', mce
, Ity_V256
,
3059 IRExpr_Qop(op
, vatom1
, vatom2
, vatom3
, vatom4
));
3061 /* I32/I64 x I8 x I8 x I8 -> I32/I64 */
3063 return mkLazy4(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
, vatom4
);
3065 return mkLazy4(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
, vatom4
);
3068 VG_(tool_panic
)("memcheck:expr2vbits_Qop");
3074 IRAtom
* expr2vbits_Triop ( MCEnv
* mce
,
3076 IRAtom
* atom1
, IRAtom
* atom2
, IRAtom
* atom3
)
3078 IRAtom
* vatom1
= expr2vbits( mce
, atom1
, HuOth
);
3079 IRAtom
* vatom2
= expr2vbits( mce
, atom2
, HuOth
);
3080 IRAtom
* vatom3
= expr2vbits( mce
, atom3
, HuOth
);
3082 tl_assert(isOriginalAtom(mce
,atom1
));
3083 tl_assert(isOriginalAtom(mce
,atom2
));
3084 tl_assert(isOriginalAtom(mce
,atom3
));
3085 tl_assert(isShadowAtom(mce
,vatom1
));
3086 tl_assert(isShadowAtom(mce
,vatom2
));
3087 tl_assert(isShadowAtom(mce
,vatom3
));
3088 tl_assert(sameKindedAtoms(atom1
,vatom1
));
3089 tl_assert(sameKindedAtoms(atom2
,vatom2
));
3090 tl_assert(sameKindedAtoms(atom3
,vatom3
));
3100 case Iop_QuantizeD128
:
3101 /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
3102 return mkLazy3(mce
, Ity_I128
, vatom1
, vatom2
, vatom3
);
3121 case Iop_QuantizeD64
:
3122 /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
3123 return mkLazy3(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
);
3124 case Iop_PRemC3210F64
:
3125 case Iop_PRem1C3210F64
:
3126 /* I32(rm) x F64 x F64 -> I32 */
3127 return mkLazy3(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
);
3132 /* I32(rm) x F32 x F32 -> I32 */
3133 return mkLazy3(mce
, Ity_I32
, vatom1
, vatom2
, vatom3
);
3134 case Iop_SignificanceRoundD64
:
3135 /* IRRoundingMode(I32) x I8 x D64 -> D64 */
3136 return mkLazy3(mce
, Ity_I64
, vatom1
, vatom2
, vatom3
);
3137 case Iop_SignificanceRoundD128
:
3138 /* IRRoundingMode(I32) x I8 x D128 -> D128 */
3139 return mkLazy3(mce
, Ity_I128
, vatom1
, vatom2
, vatom3
);
3141 /* (V128, V128, I8) -> V128 */
3142 complainIfUndefined(mce
, atom3
, NULL
);
3143 return assignNew('V', mce
, Ity_V128
, triop(op
, vatom1
, vatom2
, atom3
));
3145 /* (I64, I64, I8) -> I64 */
3146 complainIfUndefined(mce
, atom3
, NULL
);
3147 return assignNew('V', mce
, Ity_I64
, triop(op
, vatom1
, vatom2
, atom3
));
3148 case Iop_SetElem8x8
:
3149 case Iop_SetElem16x4
:
3150 case Iop_SetElem32x2
:
3151 complainIfUndefined(mce
, atom2
, NULL
);
3152 return assignNew('V', mce
, Ity_I64
, triop(op
, vatom1
, atom2
, vatom3
));
3154 case Iop_SetElem8x16
:
3155 case Iop_SetElem16x8
:
3156 case Iop_SetElem32x4
:
3157 case Iop_SetElem64x2
:
3158 complainIfUndefined(mce
, atom2
, NULL
);
3159 return assignNew('V', mce
, Ity_V128
, triop(op
, vatom1
, atom2
, vatom3
));
3161 case Iop_Perm8x16x2
:
3162 /* (V128, V128, V128) -> V128 */
3163 complainIfUndefined(mce
, atom3
, NULL
);
3166 assignNew('V', mce
, Ity_V128
, triop(op
, vatom1
, vatom2
, atom3
)),
3167 mkPCast8x16(mce
, vatom3
)
3170 /* Vector FP with rounding mode as the first arg */
3175 case Iop_Scale2_64Fx2
:
3176 return binary64Fx2_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3182 case Iop_Scale2_32Fx4
:
3183 return binary32Fx4_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3189 return binary64Fx4_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3195 return binary32Fx8_w_rm(mce
, vatom1
, vatom2
, vatom3
);
3197 case Iop_F32x4_2toQ16x8
:
3198 return assignNew('V', mce
, Ity_V128
,
3199 binop(Iop_PackEvenLanes16x8
,
3200 unary32Fx4_w_rm(mce
, vatom1
, vatom2
),
3201 unary32Fx4_w_rm(mce
, vatom1
, vatom3
)));
3202 case Iop_F64x2_2toQ32x4
:
3203 return assignNew('V', mce
, Ity_V128
,
3204 binop(Iop_PackEvenLanes32x4
,
3205 unary64Fx2_w_rm(mce
, vatom1
, vatom2
),
3206 unary64Fx2_w_rm(mce
, vatom1
, vatom3
)));
3211 VG_(tool_panic
)("memcheck:expr2vbits_Triop");
3217 IRAtom
* expr2vbits_Binop ( MCEnv
* mce
,
3219 IRAtom
* atom1
, IRAtom
* atom2
,
3220 HowUsed hu
/*use HuOth if unknown*/ )
3223 IRAtom
* (*uifu
) (MCEnv
*, IRAtom
*, IRAtom
*);
3224 IRAtom
* (*difd
) (MCEnv
*, IRAtom
*, IRAtom
*);
3225 IRAtom
* (*improve
) (MCEnv
*, IRAtom
*, IRAtom
*);
3227 IRAtom
* vatom1
= expr2vbits( mce
, atom1
, HuOth
);
3228 IRAtom
* vatom2
= expr2vbits( mce
, atom2
, HuOth
);
3230 tl_assert(isOriginalAtom(mce
,atom1
));
3231 tl_assert(isOriginalAtom(mce
,atom2
));
3232 tl_assert(isShadowAtom(mce
,vatom1
));
3233 tl_assert(isShadowAtom(mce
,vatom2
));
3234 tl_assert(sameKindedAtoms(atom1
,vatom1
));
3235 tl_assert(sameKindedAtoms(atom2
,vatom2
));
3250 return binary16Ix2(mce
, vatom1
, vatom2
);
3262 return binary8Ix4(mce
, vatom1
, vatom2
);
3275 /* Same scheme as with all other shifts. */
3276 complainIfUndefined(mce
, atom2
, NULL
);
3277 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
));
3279 case Iop_QNarrowBin32Sto16Sx4
:
3280 case Iop_QNarrowBin16Sto8Sx8
:
3281 case Iop_QNarrowBin16Sto8Ux8
:
3282 return vectorNarrowBin64(mce
, op
, vatom1
, vatom2
);
3301 case Iop_PolynomialMul8x8
:
3302 return binary8Ix8(mce
, vatom1
, vatom2
);
3313 case Iop_MulHi16Sx4
:
3314 case Iop_MulHi16Ux4
:
3315 case Iop_CmpGT16Sx4
:
3316 case Iop_CmpGT16Ux4
:
3323 case Iop_QDMulHi16Sx4
:
3324 case Iop_QRDMulHi16Sx4
:
3325 return binary16Ix4(mce
, vatom1
, vatom2
);
3333 case Iop_CmpGT32Sx2
:
3334 case Iop_CmpGT32Ux2
:
3343 case Iop_QDMulHi32Sx2
:
3344 case Iop_QRDMulHi32Sx2
:
3345 return binary32Ix2(mce
, vatom1
, vatom2
);
3354 return binary64Ix1(mce
, vatom1
, vatom2
);
3356 case Iop_QShlNsatSU8x8
:
3357 case Iop_QShlNsatUU8x8
:
3358 case Iop_QShlNsatSS8x8
:
3359 complainIfUndefined(mce
, atom2
, NULL
);
3360 return mkPCast8x8(mce
, vatom1
);
3362 case Iop_QShlNsatSU16x4
:
3363 case Iop_QShlNsatUU16x4
:
3364 case Iop_QShlNsatSS16x4
:
3365 complainIfUndefined(mce
, atom2
, NULL
);
3366 return mkPCast16x4(mce
, vatom1
);
3368 case Iop_QShlNsatSU32x2
:
3369 case Iop_QShlNsatUU32x2
:
3370 case Iop_QShlNsatSS32x2
:
3371 complainIfUndefined(mce
, atom2
, NULL
);
3372 return mkPCast32x2(mce
, vatom1
);
3374 case Iop_QShlNsatSU64x1
:
3375 case Iop_QShlNsatUU64x1
:
3376 case Iop_QShlNsatSS64x1
:
3377 complainIfUndefined(mce
, atom2
, NULL
);
3378 return mkPCast32x2(mce
, vatom1
);
3380 case Iop_PwMax32Sx2
:
3381 case Iop_PwMax32Ux2
:
3382 case Iop_PwMin32Sx2
:
3383 case Iop_PwMin32Ux2
:
3384 case Iop_PwMax32Fx2
:
3385 case Iop_PwMin32Fx2
:
3386 return assignNew('V', mce
, Ity_I64
,
3387 binop(Iop_PwMax32Ux2
,
3388 mkPCast32x2(mce
, vatom1
),
3389 mkPCast32x2(mce
, vatom2
)));
3391 case Iop_PwMax16Sx4
:
3392 case Iop_PwMax16Ux4
:
3393 case Iop_PwMin16Sx4
:
3394 case Iop_PwMin16Ux4
:
3395 return assignNew('V', mce
, Ity_I64
,
3396 binop(Iop_PwMax16Ux4
,
3397 mkPCast16x4(mce
, vatom1
),
3398 mkPCast16x4(mce
, vatom2
)));
3404 return assignNew('V', mce
, Ity_I64
,
3405 binop(Iop_PwMax8Ux8
,
3406 mkPCast8x8(mce
, vatom1
),
3407 mkPCast8x8(mce
, vatom2
)));
3410 case Iop_PwAdd32Fx2
:
3411 return mkPCast32x2(mce
,
3412 assignNew('V', mce
, Ity_I64
,
3413 binop(Iop_PwAdd32x2
,
3414 mkPCast32x2(mce
, vatom1
),
3415 mkPCast32x2(mce
, vatom2
))));
3418 return mkPCast16x4(mce
,
3419 assignNew('V', mce
, Ity_I64
,
3420 binop(op
, mkPCast16x4(mce
, vatom1
),
3421 mkPCast16x4(mce
, vatom2
))));
3424 return mkPCast8x8(mce
,
3425 assignNew('V', mce
, Ity_I64
,
3426 binop(op
, mkPCast8x8(mce
, vatom1
),
3427 mkPCast8x8(mce
, vatom2
))));
3433 return mkUifU64(mce
,
3434 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3435 mkPCast8x8(mce
,vatom2
)
3442 return mkUifU64(mce
,
3443 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3444 mkPCast16x4(mce
,vatom2
)
3451 return mkUifU64(mce
,
3452 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3453 mkPCast32x2(mce
,vatom2
)
3456 /* 64-bit data-steering */
3457 case Iop_InterleaveLO32x2
:
3458 case Iop_InterleaveLO16x4
:
3459 case Iop_InterleaveLO8x8
:
3460 case Iop_InterleaveHI32x2
:
3461 case Iop_InterleaveHI16x4
:
3462 case Iop_InterleaveHI8x8
:
3463 case Iop_CatOddLanes8x8
:
3464 case Iop_CatEvenLanes8x8
:
3465 case Iop_CatOddLanes16x4
:
3466 case Iop_CatEvenLanes16x4
:
3467 case Iop_InterleaveOddLanes8x8
:
3468 case Iop_InterleaveEvenLanes8x8
:
3469 case Iop_InterleaveOddLanes16x4
:
3470 case Iop_InterleaveEvenLanes16x4
:
3471 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, vatom2
));
3473 case Iop_GetElem8x8
:
3474 complainIfUndefined(mce
, atom2
, NULL
);
3475 return assignNew('V', mce
, Ity_I8
, binop(op
, vatom1
, atom2
));
3476 case Iop_GetElem16x4
:
3477 complainIfUndefined(mce
, atom2
, NULL
);
3478 return assignNew('V', mce
, Ity_I16
, binop(op
, vatom1
, atom2
));
3479 case Iop_GetElem32x2
:
3480 complainIfUndefined(mce
, atom2
, NULL
);
3481 return assignNew('V', mce
, Ity_I32
, binop(op
, vatom1
, atom2
));
3483 /* Perm8x8: rearrange values in left arg using steering values
3484 from right arg. So rearrange the vbits in the same way but
3485 pessimise wrt steering values. */
3489 assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
)),
3490 mkPCast8x8(mce
, vatom2
)
3496 return unary32Fx4_w_rm(mce
, vatom1
, vatom2
);
3498 return unary64Fx2_w_rm(mce
, vatom1
, vatom2
);
3512 /* Same scheme as with all other shifts. Note: 22 Oct 05:
3513 this is wrong now, scalar shifts are done properly lazily.
3514 Vector shifts should be fixed too. */
3515 complainIfUndefined(mce
, atom2
, NULL
);
3516 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
));
3518 /* V x V shifts/rotates are done using the standard lazy scheme. */
3519 /* For the non-rounding variants of bi-di vector x vector
3520 shifts (the Iop_Sh.. ops, that is) we use the lazy scheme.
3521 But note that this is overly pessimistic, because in fact only
3522 the bottom 8 bits of each lane of the second argument are taken
3523 into account when shifting. So really we ought to ignore
3524 undefinedness in bits 8 and above of each lane in the
3533 return mkUifUV128(mce
,
3534 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3535 mkPCast8x16(mce
,vatom2
)
3545 return mkUifUV128(mce
,
3546 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3547 mkPCast16x8(mce
,vatom2
)
3557 return mkUifUV128(mce
,
3558 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3559 mkPCast32x4(mce
,vatom2
)
3569 return mkUifUV128(mce
,
3570 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3571 mkPCast64x2(mce
,vatom2
)
3574 /* For the rounding variants of bi-di vector x vector shifts, the
3575 rounding adjustment can cause undefinedness to propagate through
3576 the entire lane, in the worst case. Too complex to handle
3577 properly .. just UifU the arguments and then PCast them.
3578 Suboptimal but safe. */
3581 return binary8Ix16(mce
, vatom1
, vatom2
);
3584 return binary16Ix8(mce
, vatom1
, vatom2
);
3587 return binary32Ix4(mce
, vatom1
, vatom2
);
3590 return binary64Ix2(mce
, vatom1
, vatom2
);
3592 case Iop_F32ToFixed32Ux4_RZ
:
3593 case Iop_F32ToFixed32Sx4_RZ
:
3594 case Iop_Fixed32UToF32x4_RN
:
3595 case Iop_Fixed32SToF32x4_RN
:
3596 complainIfUndefined(mce
, atom2
, NULL
);
3597 return mkPCast32x4(mce
, vatom1
);
3599 case Iop_F32ToFixed32Ux2_RZ
:
3600 case Iop_F32ToFixed32Sx2_RZ
:
3601 case Iop_Fixed32UToF32x2_RN
:
3602 case Iop_Fixed32SToF32x2_RN
:
3603 complainIfUndefined(mce
, atom2
, NULL
);
3604 return mkPCast32x2(mce
, vatom1
);
3613 case Iop_CmpGT8Sx16
:
3614 case Iop_CmpGT8Ux16
:
3620 case Iop_QAddExtUSsatSS8x16
:
3621 case Iop_QAddExtSUsatUU8x16
:
3626 case Iop_MulHi8Sx16
:
3627 case Iop_MulHi8Ux16
:
3628 case Iop_PolynomialMul8x16
:
3629 case Iop_PolynomialMulAdd8x16
:
3630 return binary8Ix16(mce
, vatom1
, vatom2
);
3636 case Iop_MulHi16Sx8
:
3637 case Iop_MulHi16Ux8
:
3642 case Iop_CmpGT16Sx8
:
3643 case Iop_CmpGT16Ux8
:
3649 case Iop_QAddExtUSsatSS16x8
:
3650 case Iop_QAddExtSUsatUU16x8
:
3654 case Iop_QDMulHi16Sx8
:
3655 case Iop_QRDMulHi16Sx8
:
3656 case Iop_PolynomialMulAdd16x8
:
3657 return binary16Ix8(mce
, vatom1
, vatom2
);
3660 case Iop_CmpGT32Sx4
:
3661 case Iop_CmpGT32Ux4
:
3667 case Iop_QAddExtUSsatSS32x4
:
3668 case Iop_QAddExtSUsatUU32x4
:
3679 case Iop_MulHi32Sx4
:
3680 case Iop_MulHi32Ux4
:
3681 case Iop_QDMulHi32Sx4
:
3682 case Iop_QRDMulHi32Sx4
:
3683 case Iop_PolynomialMulAdd32x4
:
3684 return binary32Ix4(mce
, vatom1
, vatom2
);
3695 case Iop_CmpGT64Sx2
:
3696 case Iop_CmpGT64Ux2
:
3703 case Iop_QAddExtUSsatSS64x2
:
3704 case Iop_QAddExtSUsatUU64x2
:
3705 case Iop_PolynomialMulAdd64x2
:
3706 case Iop_CipherV128
:
3707 case Iop_CipherLV128
:
3708 case Iop_NCipherV128
:
3709 case Iop_NCipherLV128
:
3710 case Iop_MulI128by10E
:
3711 case Iop_MulI128by10ECarry
:
3712 return binary64Ix2(mce
, vatom1
, vatom2
);
3716 case Iop_CmpNEZ128x1
:
3717 return binary128Ix1(mce
, vatom1
, vatom2
);
3719 case Iop_QNarrowBin64Sto32Sx4
:
3720 case Iop_QNarrowBin64Uto32Ux4
:
3721 case Iop_QNarrowBin32Sto16Sx8
:
3722 case Iop_QNarrowBin32Uto16Ux8
:
3723 case Iop_QNarrowBin32Sto16Ux8
:
3724 case Iop_QNarrowBin16Sto8Sx16
:
3725 case Iop_QNarrowBin16Uto8Ux16
:
3726 case Iop_QNarrowBin16Sto8Ux16
:
3727 return vectorNarrowBinV128(mce
, op
, vatom1
, vatom2
);
3731 case Iop_CmpLT64Fx2
:
3732 case Iop_CmpLE64Fx2
:
3733 case Iop_CmpEQ64Fx2
:
3734 case Iop_CmpUN64Fx2
:
3735 case Iop_RecipStep64Fx2
:
3736 case Iop_RSqrtStep64Fx2
:
3737 return binary64Fx2(mce
, vatom1
, vatom2
);
3744 case Iop_CmpLT64F0x2
:
3745 case Iop_CmpLE64F0x2
:
3746 case Iop_CmpEQ64F0x2
:
3747 case Iop_CmpUN64F0x2
:
3749 return binary64F0x2(mce
, vatom1
, vatom2
);
3753 case Iop_CmpLT32Fx4
:
3754 case Iop_CmpLE32Fx4
:
3755 case Iop_CmpEQ32Fx4
:
3756 case Iop_CmpUN32Fx4
:
3757 case Iop_CmpGT32Fx4
:
3758 case Iop_CmpGE32Fx4
:
3759 case Iop_RecipStep32Fx4
:
3760 case Iop_RSqrtStep32Fx4
:
3761 return binary32Fx4(mce
, vatom1
, vatom2
);
3767 case Iop_CmpEQ32Fx2
:
3768 case Iop_CmpGT32Fx2
:
3769 case Iop_CmpGE32Fx2
:
3771 case Iop_RecipStep32Fx2
:
3772 case Iop_RSqrtStep32Fx2
:
3773 return binary32Fx2(mce
, vatom1
, vatom2
);
3780 case Iop_CmpLT32F0x4
:
3781 case Iop_CmpLE32F0x4
:
3782 case Iop_CmpEQ32F0x4
:
3783 case Iop_CmpUN32F0x4
:
3785 return binary32F0x4(mce
, vatom1
, vatom2
);
3787 case Iop_QShlNsatSU8x16
:
3788 case Iop_QShlNsatUU8x16
:
3789 case Iop_QShlNsatSS8x16
:
3790 complainIfUndefined(mce
, atom2
, NULL
);
3791 return mkPCast8x16(mce
, vatom1
);
3793 case Iop_QShlNsatSU16x8
:
3794 case Iop_QShlNsatUU16x8
:
3795 case Iop_QShlNsatSS16x8
:
3796 complainIfUndefined(mce
, atom2
, NULL
);
3797 return mkPCast16x8(mce
, vatom1
);
3799 case Iop_QShlNsatSU32x4
:
3800 case Iop_QShlNsatUU32x4
:
3801 case Iop_QShlNsatSS32x4
:
3802 complainIfUndefined(mce
, atom2
, NULL
);
3803 return mkPCast32x4(mce
, vatom1
);
3805 case Iop_QShlNsatSU64x2
:
3806 case Iop_QShlNsatUU64x2
:
3807 case Iop_QShlNsatSS64x2
:
3808 complainIfUndefined(mce
, atom2
, NULL
);
3809 return mkPCast32x4(mce
, vatom1
);
3811 /* Q-and-Qshift-by-imm-and-narrow of the form (V128, I8) -> V128.
3812 To make this simpler, do the following:
3813 * complain if the shift amount (the I8) is undefined
3814 * pcast each lane at the wide width
3815 * truncate each lane to half width
3816 * pcast the resulting 64-bit value to a single bit and use
3817 that as the least significant bit of the upper half of the
3819 case Iop_QandQShrNnarrow64Uto32Ux2
:
3820 case Iop_QandQSarNnarrow64Sto32Sx2
:
3821 case Iop_QandQSarNnarrow64Sto32Ux2
:
3822 case Iop_QandQRShrNnarrow64Uto32Ux2
:
3823 case Iop_QandQRSarNnarrow64Sto32Sx2
:
3824 case Iop_QandQRSarNnarrow64Sto32Ux2
:
3825 case Iop_QandQShrNnarrow32Uto16Ux4
:
3826 case Iop_QandQSarNnarrow32Sto16Sx4
:
3827 case Iop_QandQSarNnarrow32Sto16Ux4
:
3828 case Iop_QandQRShrNnarrow32Uto16Ux4
:
3829 case Iop_QandQRSarNnarrow32Sto16Sx4
:
3830 case Iop_QandQRSarNnarrow32Sto16Ux4
:
3831 case Iop_QandQShrNnarrow16Uto8Ux8
:
3832 case Iop_QandQSarNnarrow16Sto8Sx8
:
3833 case Iop_QandQSarNnarrow16Sto8Ux8
:
3834 case Iop_QandQRShrNnarrow16Uto8Ux8
:
3835 case Iop_QandQRSarNnarrow16Sto8Sx8
:
3836 case Iop_QandQRSarNnarrow16Sto8Ux8
:
3838 IRAtom
* (*fnPessim
) (MCEnv
*, IRAtom
*) = NULL
;
3839 IROp opNarrow
= Iop_INVALID
;
3841 case Iop_QandQShrNnarrow64Uto32Ux2
:
3842 case Iop_QandQSarNnarrow64Sto32Sx2
:
3843 case Iop_QandQSarNnarrow64Sto32Ux2
:
3844 case Iop_QandQRShrNnarrow64Uto32Ux2
:
3845 case Iop_QandQRSarNnarrow64Sto32Sx2
:
3846 case Iop_QandQRSarNnarrow64Sto32Ux2
:
3847 fnPessim
= mkPCast64x2
;
3848 opNarrow
= Iop_NarrowUn64to32x2
;
3850 case Iop_QandQShrNnarrow32Uto16Ux4
:
3851 case Iop_QandQSarNnarrow32Sto16Sx4
:
3852 case Iop_QandQSarNnarrow32Sto16Ux4
:
3853 case Iop_QandQRShrNnarrow32Uto16Ux4
:
3854 case Iop_QandQRSarNnarrow32Sto16Sx4
:
3855 case Iop_QandQRSarNnarrow32Sto16Ux4
:
3856 fnPessim
= mkPCast32x4
;
3857 opNarrow
= Iop_NarrowUn32to16x4
;
3859 case Iop_QandQShrNnarrow16Uto8Ux8
:
3860 case Iop_QandQSarNnarrow16Sto8Sx8
:
3861 case Iop_QandQSarNnarrow16Sto8Ux8
:
3862 case Iop_QandQRShrNnarrow16Uto8Ux8
:
3863 case Iop_QandQRSarNnarrow16Sto8Sx8
:
3864 case Iop_QandQRSarNnarrow16Sto8Ux8
:
3865 fnPessim
= mkPCast16x8
;
3866 opNarrow
= Iop_NarrowUn16to8x8
;
3871 complainIfUndefined(mce
, atom2
, NULL
);
3872 // Pessimised shift result
3874 = fnPessim(mce
, vatom1
);
3875 // Narrowed, pessimised shift result
3877 = assignNew('V', mce
, Ity_I64
, unop(opNarrow
, shV
));
3878 // Generates: Def--(63)--Def PCast-to-I1(narrowed)
3879 IRAtom
* qV
= mkPCastXXtoXXlsb(mce
, shVnarrowed
, Ity_I64
);
3880 // and assemble the result
3881 return assignNew('V', mce
, Ity_V128
,
3882 binop(Iop_64HLtoV128
, qV
, shVnarrowed
));
3887 case Iop_QDMull32Sx2
:
3888 return vectorWidenI64(mce
, Iop_Widen32Sto64x2
,
3889 mkUifU64(mce
, vatom1
, vatom2
));
3893 case Iop_QDMull16Sx4
:
3894 return vectorWidenI64(mce
, Iop_Widen16Sto32x4
,
3895 mkUifU64(mce
, vatom1
, vatom2
));
3899 case Iop_PolynomialMull8x8
:
3900 return vectorWidenI64(mce
, Iop_Widen8Sto16x8
,
3901 mkUifU64(mce
, vatom1
, vatom2
));
3904 return mkPCast32x4(mce
,
3905 assignNew('V', mce
, Ity_V128
, binop(op
, mkPCast32x4(mce
, vatom1
),
3906 mkPCast32x4(mce
, vatom2
))));
3909 return mkPCast16x8(mce
,
3910 assignNew('V', mce
, Ity_V128
, binop(op
, mkPCast16x8(mce
, vatom1
),
3911 mkPCast16x8(mce
, vatom2
))));
3914 return mkPCast8x16(mce
,
3915 assignNew('V', mce
, Ity_V128
, binop(op
, mkPCast8x16(mce
, vatom1
),
3916 mkPCast8x16(mce
, vatom2
))));
3918 /* V128-bit data-steering */
3919 case Iop_SetV128lo32
:
3920 case Iop_SetV128lo64
:
3921 case Iop_64HLtoV128
:
3922 case Iop_InterleaveLO64x2
:
3923 case Iop_InterleaveLO32x4
:
3924 case Iop_InterleaveLO16x8
:
3925 case Iop_InterleaveLO8x16
:
3926 case Iop_InterleaveHI64x2
:
3927 case Iop_InterleaveHI32x4
:
3928 case Iop_InterleaveHI16x8
:
3929 case Iop_InterleaveHI8x16
:
3930 case Iop_CatOddLanes8x16
:
3931 case Iop_CatOddLanes16x8
:
3932 case Iop_CatOddLanes32x4
:
3933 case Iop_CatEvenLanes8x16
:
3934 case Iop_CatEvenLanes16x8
:
3935 case Iop_CatEvenLanes32x4
:
3936 case Iop_InterleaveOddLanes8x16
:
3937 case Iop_InterleaveOddLanes16x8
:
3938 case Iop_InterleaveOddLanes32x4
:
3939 case Iop_InterleaveEvenLanes8x16
:
3940 case Iop_InterleaveEvenLanes16x8
:
3941 case Iop_InterleaveEvenLanes32x4
:
3942 case Iop_PackOddLanes8x16
:
3943 case Iop_PackOddLanes16x8
:
3944 case Iop_PackOddLanes32x4
:
3945 case Iop_PackEvenLanes8x16
:
3946 case Iop_PackEvenLanes16x8
:
3947 case Iop_PackEvenLanes32x4
:
3948 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, vatom2
));
3950 case Iop_GetElem8x16
:
3951 complainIfUndefined(mce
, atom2
, NULL
);
3952 return assignNew('V', mce
, Ity_I8
, binop(op
, vatom1
, atom2
));
3953 case Iop_GetElem16x8
:
3954 complainIfUndefined(mce
, atom2
, NULL
);
3955 return assignNew('V', mce
, Ity_I16
, binop(op
, vatom1
, atom2
));
3956 case Iop_GetElem32x4
:
3957 complainIfUndefined(mce
, atom2
, NULL
);
3958 return assignNew('V', mce
, Ity_I32
, binop(op
, vatom1
, atom2
));
3959 case Iop_GetElem64x2
:
3960 complainIfUndefined(mce
, atom2
, NULL
);
3961 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, atom2
));
3963 /* Perm8x16: rearrange values in left arg using steering values
3964 from right arg. So rearrange the vbits in the same way but
3965 pessimise wrt steering values. Perm32x4 ditto. */
3969 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3970 mkPCast8x16(mce
, vatom2
)
3975 assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
)),
3976 mkPCast32x4(mce
, vatom2
)
3979 /* These two take the lower half of each 16-bit lane, sign/zero
3980 extend it to 32, and multiply together, producing a 32x4
3981 result (and implicitly ignoring half the operand bits). So
3982 treat it as a bunch of independent 16x8 operations, but then
3983 do 32-bit shifts left-right to copy the lower half results
3984 (which are all 0s or all 1s due to PCasting in binary16Ix8)
3985 into the upper half of each result lane. */
3986 case Iop_MullEven16Ux8
:
3987 case Iop_MullEven16Sx8
: {
3989 at
= binary16Ix8(mce
,vatom1
,vatom2
);
3990 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_ShlN32x4
, at
, mkU8(16)));
3991 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SarN32x4
, at
, mkU8(16)));
3995 /* Same deal as Iop_MullEven16{S,U}x8 */
3996 case Iop_MullEven8Ux16
:
3997 case Iop_MullEven8Sx16
: {
3999 at
= binary8Ix16(mce
,vatom1
,vatom2
);
4000 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_ShlN16x8
, at
, mkU8(8)));
4001 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SarN16x8
, at
, mkU8(8)));
4005 /* Same deal as Iop_MullEven16{S,U}x8 */
4006 case Iop_MullEven32Ux4
:
4007 case Iop_MullEven32Sx4
: {
4009 at
= binary32Ix4(mce
,vatom1
,vatom2
);
4010 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_ShlN64x2
, at
, mkU8(32)));
4011 at
= assignNew('V', mce
, Ity_V128
, binop(Iop_SarN64x2
, at
, mkU8(32)));
4015 /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
4016 32x4 -> 16x8 laneage, discarding the upper half of each lane.
4017 Simply apply same op to the V bits, since this really no more
4018 than a data steering operation. */
4019 case Iop_NarrowBin32to16x8
:
4020 case Iop_NarrowBin16to8x16
:
4021 case Iop_NarrowBin64to32x4
:
4022 return assignNew('V', mce
, Ity_V128
,
4023 binop(op
, vatom1
, vatom2
));
4028 case Iop_I128StoBCD128
:
4029 /* Same scheme as with all other shifts. Note: 10 Nov 05:
4030 this is wrong now, scalar shifts are done properly lazily.
4031 Vector shifts should be fixed too. */
4032 complainIfUndefined(mce
, atom2
, NULL
);
4033 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
));
4037 return mkLazy2(mce
, Ity_V128
, vatom1
, vatom2
);
4042 complainIfUndefined(mce
, atom2
, NULL
);
4043 return assignNew('V', mce
, Ity_V128
, binop(op
, vatom1
, atom2
));
4045 /* I128-bit data-steering */
4047 return assignNew('V', mce
, Ity_I128
, binop(op
, vatom1
, vatom2
));
4053 return binary64Fx4(mce
, vatom1
, vatom2
);
4057 return binary32Fx8(mce
, vatom1
, vatom2
);
4059 /* V256-bit data-steering */
4060 case Iop_V128HLtoV256
:
4061 return assignNew('V', mce
, Ity_V256
, binop(op
, vatom1
, vatom2
));
4063 /* Scalar floating point */
4067 /* I32(rm) x F32 -> I64 */
4068 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4071 /* I32(rm) x I64 -> F32 */
4072 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4074 case Iop_RoundF64toInt
:
4075 case Iop_RoundF64toF32
:
4085 case Iop_RecpExpF64
:
4086 /* I32(rm) x I64/F64 -> I64/F64 */
4087 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4091 case Iop_RoundD64toInt
:
4092 /* I32(rm) x D64 -> D64 */
4093 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4097 case Iop_RoundD128toInt
:
4098 /* I32(rm) x D128 -> D128 */
4099 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4101 case Iop_RoundF128toInt
:
4102 /* I32(rm) x F128 -> F128 */
4103 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4109 /* I32(rm) x I64/D64 -> D64/I64 */
4110 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4118 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D32/F32 */
4119 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4127 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D64/F64 */
4128 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4132 case Iop_F128toD128
:
4135 case Iop_D128toF128
:
4136 /* I32(rm) x F32/F64/F128/D32/D64/D128 -> D128/F128 */
4137 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4139 case Iop_RoundF32toInt
:
4141 case Iop_RecpExpF32
:
4142 /* I32(rm) x I32/F32 -> I32/F32 */
4143 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4146 /* I32(rm) x F128 -> F128 */
4147 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4153 /* First arg is I32 (rounding mode), second is F32/I32 (data). */
4154 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4158 /* First arg is I32 (rounding mode), second is F64/F32 (data). */
4159 return mkLazy2(mce
, Ity_I16
, vatom1
, vatom2
);
4161 case Iop_F128toI32S
: /* IRRoundingMode(I32) x F128 -> signed I32 */
4162 case Iop_F128toI32U
: /* IRRoundingMode(I32) x F128 -> unsigned I32 */
4163 case Iop_F128toF32
: /* IRRoundingMode(I32) x F128 -> F32 */
4164 case Iop_D128toI32S
: /* IRRoundingMode(I32) x D128 -> signed I32 */
4165 case Iop_D128toI32U
: /* IRRoundingMode(I32) x D128 -> unsigned I32 */
4166 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4168 case Iop_F128toI128S
: /* IRRoundingMode(I32) x F128 -> signed I128 */
4169 case Iop_RndF128
: /* IRRoundingMode(I32) x F128 -> F128 */
4170 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4172 case Iop_F128toI64S
: /* IRRoundingMode(I32) x F128 -> signed I64 */
4173 case Iop_F128toI64U
: /* IRRoundingMode(I32) x F128 -> unsigned I64 */
4174 case Iop_F128toF64
: /* IRRoundingMode(I32) x F128 -> F64 */
4175 case Iop_D128toD64
: /* IRRoundingMode(I64) x D128 -> D64 */
4176 case Iop_D128toI64S
: /* IRRoundingMode(I64) x D128 -> signed I64 */
4177 case Iop_D128toI64U
: /* IRRoundingMode(I32) x D128 -> unsigned I64 */
4178 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4180 case Iop_F64HLtoF128
:
4181 case Iop_D64HLtoD128
:
4182 return assignNew('V', mce
, Ity_I128
,
4183 binop(Iop_64HLto128
, vatom1
, vatom2
));
4191 /* First arg is I32 (rounding mode), second is F64/D64 (data). */
4192 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4195 /* First arg is I32 (rounding mode), second is D64 (data). */
4196 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4199 /* First arg is I32 (rounding mode), second is F64 (data). */
4200 return mkLazy2(mce
, Ity_I16
, vatom1
, vatom2
);
4202 case Iop_InsertExpD64
:
4203 /* I64 x I64 -> D64 */
4204 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4206 case Iop_InsertExpD128
:
4207 /* I64 x I128 -> D128 */
4208 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4216 case Iop_CmpExpD128
:
4217 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4221 /* F32 x F32 -> F32 */
4222 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4226 /* F64 x F64 -> F64 */
4227 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4229 /* non-FP after here */
4231 case Iop_DivModU64to32
:
4232 case Iop_DivModS64to32
:
4233 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4235 case Iop_DivModU128to64
:
4236 case Iop_DivModS128to64
:
4237 return mkLazy2(mce
, Ity_I128
, vatom1
, vatom2
);
4240 return assignNew('V', mce
, Ity_I16
, binop(op
, vatom1
, vatom2
));
4242 return assignNew('V', mce
, Ity_I32
, binop(op
, vatom1
, vatom2
));
4244 return assignNew('V', mce
, Ity_I64
, binop(op
, vatom1
, vatom2
));
4246 case Iop_DivModU64to64
:
4247 case Iop_DivModS64to64
: {
4248 IRAtom
* vTmp64
= mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4249 return assignNew('V', mce
, Ity_I128
,
4250 binop(Iop_64HLto128
, vTmp64
, vTmp64
));
4255 IRAtom
* vLo64
= mkLeft64(mce
, mkUifU64(mce
, vatom1
,vatom2
));
4256 IRAtom
* vHi64
= mkPCastTo(mce
, Ity_I64
, vLo64
);
4257 return assignNew('V', mce
, Ity_I128
,
4258 binop(Iop_64HLto128
, vHi64
, vLo64
));
4261 case Iop_DivModU32to32
:
4262 case Iop_DivModS32to32
: {
4263 IRAtom
* vTmp32
= mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4264 return assignNew('V', mce
, Ity_I64
,
4265 binop(Iop_32HLto64
, vTmp32
, vTmp32
));
4270 IRAtom
* vLo32
= mkLeft32(mce
, mkUifU32(mce
, vatom1
,vatom2
));
4271 IRAtom
* vHi32
= mkPCastTo(mce
, Ity_I32
, vLo32
);
4272 return assignNew('V', mce
, Ity_I64
,
4273 binop(Iop_32HLto64
, vHi32
, vLo32
));
4278 IRAtom
* vLo16
= mkLeft16(mce
, mkUifU16(mce
, vatom1
,vatom2
));
4279 IRAtom
* vHi16
= mkPCastTo(mce
, Ity_I16
, vLo16
);
4280 return assignNew('V', mce
, Ity_I32
,
4281 binop(Iop_16HLto32
, vHi16
, vLo16
));
4286 IRAtom
* vLo8
= mkLeft8(mce
, mkUifU8(mce
, vatom1
,vatom2
));
4287 IRAtom
* vHi8
= mkPCastTo(mce
, Ity_I8
, vLo8
);
4288 return assignNew('V', mce
, Ity_I16
, binop(Iop_8HLto16
, vHi8
, vLo8
));
4291 case Iop_Sad8Ux4
: /* maybe we could do better? ftm, do mkLazy2. */
4296 case Iop_QAdd32S
: /* could probably do better */
4297 case Iop_QSub32S
: /* could probably do better */
4298 return mkLazy2(mce
, Ity_I32
, vatom1
, vatom2
);
4304 return mkLazy2(mce
, Ity_I64
, vatom1
, vatom2
);
4307 if (mce
->dlbo
.dl_Add32
== DLexpensive
4308 || (mce
->dlbo
.dl_Add32
== DLauto
&& hu
== HuOth
)) {
4309 return expensiveAddSub(mce
,True
,Ity_I32
,
4310 vatom1
,vatom2
, atom1
,atom2
);
4312 goto cheap_AddSub32
;
4315 if (mce
->dlbo
.dl_Sub32
== DLexpensive
4316 || (mce
->dlbo
.dl_Sub32
== DLauto
&& hu
== HuOth
)) {
4317 return expensiveAddSub(mce
,False
,Ity_I32
,
4318 vatom1
,vatom2
, atom1
,atom2
);
4320 goto cheap_AddSub32
;
4325 return mkLeft32(mce
, mkUifU32(mce
, vatom1
,vatom2
));
4331 return doCmpORD(mce
, op
, vatom1
,vatom2
, atom1
,atom2
);
4334 if (mce
->dlbo
.dl_Add64
== DLexpensive
4335 || (mce
->dlbo
.dl_Add64
== DLauto
&& hu
== HuOth
)) {
4336 return expensiveAddSub(mce
,True
,Ity_I64
,
4337 vatom1
,vatom2
, atom1
,atom2
);
4339 goto cheap_AddSub64
;
4342 if (mce
->dlbo
.dl_Sub64
== DLexpensive
4343 || (mce
->dlbo
.dl_Sub64
== DLauto
&& hu
== HuOth
)) {
4344 return expensiveAddSub(mce
,False
,Ity_I64
,
4345 vatom1
,vatom2
, atom1
,atom2
);
4347 goto cheap_AddSub64
;
4352 return mkLeft64(mce
, mkUifU64(mce
, vatom1
,vatom2
));
4357 return mkLeft16(mce
, mkUifU16(mce
, vatom1
,vatom2
));
4362 return mkLeft8(mce
, mkUifU8(mce
, vatom1
,vatom2
));
4365 case Iop_CmpEQ64
: case Iop_CmpNE64
:
4366 if (mce
->dlbo
.dl_CmpEQ64_CmpNE64
== DLexpensive
)
4367 goto expensive_cmp64
;
4372 case Iop_ExpCmpNE64
:
4373 return expensiveCmpEQorNE(mce
,Ity_I64
, vatom1
,vatom2
, atom1
,atom2
);
4376 case Iop_CmpLE64S
: case Iop_CmpLE64U
:
4377 case Iop_CmpLT64U
: case Iop_CmpLT64S
:
4378 return mkPCastTo(mce
, Ity_I1
, mkUifU64(mce
, vatom1
,vatom2
));
4381 case Iop_CmpEQ32
: case Iop_CmpNE32
:
4382 if (mce
->dlbo
.dl_CmpEQ32_CmpNE32
== DLexpensive
)
4383 goto expensive_cmp32
;
4388 case Iop_ExpCmpNE32
:
4389 return expensiveCmpEQorNE(mce
,Ity_I32
, vatom1
,vatom2
, atom1
,atom2
);
4392 case Iop_CmpLE32S
: case Iop_CmpLE32U
:
4393 case Iop_CmpLT32U
: case Iop_CmpLT32S
:
4394 return mkPCastTo(mce
, Ity_I1
, mkUifU32(mce
, vatom1
,vatom2
));
4397 case Iop_CmpEQ16
: case Iop_CmpNE16
:
4398 if (mce
->dlbo
.dl_CmpEQ16_CmpNE16
== DLexpensive
)
4399 goto expensive_cmp16
;
4404 case Iop_ExpCmpNE16
:
4405 return expensiveCmpEQorNE(mce
,Ity_I16
, vatom1
,vatom2
, atom1
,atom2
);
4408 return mkPCastTo(mce
, Ity_I1
, mkUifU16(mce
, vatom1
,vatom2
));
4411 case Iop_CmpEQ8
: case Iop_CmpNE8
:
4412 if (mce
->dlbo
.dl_CmpEQ8_CmpNE8
== DLexpensive
)
4413 goto expensive_cmp8
;
4418 return expensiveCmpEQorNE(mce
,Ity_I8
, vatom1
,vatom2
, atom1
,atom2
);
4421 return mkPCastTo(mce
, Ity_I1
, mkUifU8(mce
, vatom1
,vatom2
));
4423 ////---- end CmpXX{64,32,16,8}
4425 case Iop_CasCmpEQ8
: case Iop_CasCmpNE8
:
4426 case Iop_CasCmpEQ16
: case Iop_CasCmpNE16
:
4427 case Iop_CasCmpEQ32
: case Iop_CasCmpNE32
:
4428 case Iop_CasCmpEQ64
: case Iop_CasCmpNE64
:
4429 /* Just say these all produce a defined result, regardless
4430 of their arguments. See COMMENT_ON_CasCmpEQ in this file. */
4431 return assignNew('V', mce
, Ity_I1
, definedOfType(Ity_I1
));
4433 case Iop_Shl64
: case Iop_Shr64
: case Iop_Sar64
:
4434 return scalarShift( mce
, Ity_I64
, op
, vatom1
,vatom2
, atom1
,atom2
);
4436 case Iop_Shl32
: case Iop_Shr32
: case Iop_Sar32
:
4437 return scalarShift( mce
, Ity_I32
, op
, vatom1
,vatom2
, atom1
,atom2
);
4439 case Iop_Shl16
: case Iop_Shr16
: case Iop_Sar16
:
4440 return scalarShift( mce
, Ity_I16
, op
, vatom1
,vatom2
, atom1
,atom2
);
4442 case Iop_Shl8
: case Iop_Shr8
: case Iop_Sar8
:
4443 return scalarShift( mce
, Ity_I8
, op
, vatom1
,vatom2
, atom1
,atom2
);
4446 uifu
= mkUifUV256
; difd
= mkDifDV256
;
4447 and_or_ty
= Ity_V256
; improve
= mkImproveANDV256
; goto do_And_Or
;
4449 uifu
= mkUifUV128
; difd
= mkDifDV128
;
4450 and_or_ty
= Ity_V128
; improve
= mkImproveANDV128
; goto do_And_Or
;
4452 uifu
= mkUifU64
; difd
= mkDifD64
;
4453 and_or_ty
= Ity_I64
; improve
= mkImproveAND64
; goto do_And_Or
;
4455 uifu
= mkUifU32
; difd
= mkDifD32
;
4456 and_or_ty
= Ity_I32
; improve
= mkImproveAND32
; goto do_And_Or
;
4458 uifu
= mkUifU16
; difd
= mkDifD16
;
4459 and_or_ty
= Ity_I16
; improve
= mkImproveAND16
; goto do_And_Or
;
4461 uifu
= mkUifU8
; difd
= mkDifD8
;
4462 and_or_ty
= Ity_I8
; improve
= mkImproveAND8
; goto do_And_Or
;
4465 uifu
= mkUifUV256
; difd
= mkDifDV256
;
4466 and_or_ty
= Ity_V256
; improve
= mkImproveORV256
; goto do_And_Or
;
4468 uifu
= mkUifUV128
; difd
= mkDifDV128
;
4469 and_or_ty
= Ity_V128
; improve
= mkImproveORV128
; goto do_And_Or
;
4471 uifu
= mkUifU64
; difd
= mkDifD64
;
4472 and_or_ty
= Ity_I64
; improve
= mkImproveOR64
; goto do_And_Or
;
4474 uifu
= mkUifU32
; difd
= mkDifD32
;
4475 and_or_ty
= Ity_I32
; improve
= mkImproveOR32
; goto do_And_Or
;
4477 uifu
= mkUifU16
; difd
= mkDifD16
;
4478 and_or_ty
= Ity_I16
; improve
= mkImproveOR16
; goto do_And_Or
;
4480 uifu
= mkUifU8
; difd
= mkDifD8
;
4481 and_or_ty
= Ity_I8
; improve
= mkImproveOR8
; goto do_And_Or
;
4488 difd(mce
, uifu(mce
, vatom1
, vatom2
),
4489 difd(mce
, improve(mce
, atom1
, vatom1
),
4490 improve(mce
, atom2
, vatom2
) ) ) );
4493 return mkUifU8(mce
, vatom1
, vatom2
);
4495 return mkUifU16(mce
, vatom1
, vatom2
);
4497 return mkUifU32(mce
, vatom1
, vatom2
);
4499 return mkUifU64(mce
, vatom1
, vatom2
);
4501 return mkUifUV128(mce
, vatom1
, vatom2
);
4503 return mkUifUV256(mce
, vatom1
, vatom2
);
4515 /* Same scheme as with all other shifts. Note: 22 Oct 05:
4516 this is wrong now, scalar shifts are done properly lazily.
4517 Vector shifts should be fixed too. */
4518 complainIfUndefined(mce
, atom2
, NULL
);
4519 return assignNew('V', mce
, Ity_V256
, binop(op
, vatom1
, atom2
));
4528 case Iop_CmpGT8Sx32
:
4534 return binary8Ix32(mce
, vatom1
, vatom2
);
4536 case Iop_QSub16Ux16
:
4537 case Iop_QSub16Sx16
:
4540 case Iop_MulHi16Sx16
:
4541 case Iop_MulHi16Ux16
:
4546 case Iop_CmpGT16Sx16
:
4547 case Iop_CmpEQ16x16
:
4549 case Iop_QAdd16Ux16
:
4550 case Iop_QAdd16Sx16
:
4552 return binary16Ix16(mce
, vatom1
, vatom2
);
4555 case Iop_CmpGT32Sx8
:
4563 return binary32Ix8(mce
, vatom1
, vatom2
);
4568 case Iop_CmpGT64Sx4
:
4569 return binary64Ix4(mce
, vatom1
, vatom2
);
4571 /* Perm32x8: rearrange values in left arg using steering values
4572 from right arg. So rearrange the vbits in the same way but
4573 pessimise wrt steering values. */
4577 assignNew('V', mce
, Ity_V256
, binop(op
, vatom1
, atom2
)),
4578 mkPCast32x8(mce
, vatom2
)
4581 /* Q-and-Qshift-by-vector of the form (V128, V128) -> V256.
4582 Handle the shifted results in the same way that other
4583 binary Q ops are handled, eg QSub: UifU the two args,
4584 then pessimise -- which is binaryNIxM. But for the upper
4585 V128, we require to generate just 1 bit which is the
4586 pessimised shift result, with 127 defined zeroes above it.
4588 Note that this overly pessimistic in that in fact only the
4589 bottom 8 bits of each lane of the second arg determine the shift
4590 amount. Really we ought to ignore any undefinedness in the
4591 rest of the lanes of the second arg. */
4592 case Iop_QandSQsh64x2
: case Iop_QandUQsh64x2
:
4593 case Iop_QandSQRsh64x2
: case Iop_QandUQRsh64x2
:
4594 case Iop_QandSQsh32x4
: case Iop_QandUQsh32x4
:
4595 case Iop_QandSQRsh32x4
: case Iop_QandUQRsh32x4
:
4596 case Iop_QandSQsh16x8
: case Iop_QandUQsh16x8
:
4597 case Iop_QandSQRsh16x8
: case Iop_QandUQRsh16x8
:
4598 case Iop_QandSQsh8x16
: case Iop_QandUQsh8x16
:
4599 case Iop_QandSQRsh8x16
: case Iop_QandUQRsh8x16
:
4601 // The function to generate the pessimised shift result
4602 IRAtom
* (*binaryNIxM
)(MCEnv
*,IRAtom
*,IRAtom
*) = NULL
;
4604 case Iop_QandSQsh64x2
:
4605 case Iop_QandUQsh64x2
:
4606 case Iop_QandSQRsh64x2
:
4607 case Iop_QandUQRsh64x2
:
4608 binaryNIxM
= binary64Ix2
;
4610 case Iop_QandSQsh32x4
:
4611 case Iop_QandUQsh32x4
:
4612 case Iop_QandSQRsh32x4
:
4613 case Iop_QandUQRsh32x4
:
4614 binaryNIxM
= binary32Ix4
;
4616 case Iop_QandSQsh16x8
:
4617 case Iop_QandUQsh16x8
:
4618 case Iop_QandSQRsh16x8
:
4619 case Iop_QandUQRsh16x8
:
4620 binaryNIxM
= binary16Ix8
;
4622 case Iop_QandSQsh8x16
:
4623 case Iop_QandUQsh8x16
:
4624 case Iop_QandSQRsh8x16
:
4625 case Iop_QandUQRsh8x16
:
4626 binaryNIxM
= binary8Ix16
;
4631 tl_assert(binaryNIxM
);
4632 // Pessimised shift result, shV[127:0]
4633 IRAtom
* shV
= binaryNIxM(mce
, vatom1
, vatom2
);
4634 // Generates: Def--(127)--Def PCast-to-I1(shV)
4635 IRAtom
* qV
= mkPCastXXtoXXlsb(mce
, shV
, Ity_V128
);
4636 // and assemble the result
4637 return assignNew('V', mce
, Ity_V256
,
4638 binop(Iop_V128HLtoV256
, qV
, shV
));
4643 VG_(tool_panic
)("memcheck:expr2vbits_Binop");
4649 IRExpr
* expr2vbits_Unop ( MCEnv
* mce
, IROp op
, IRAtom
* atom
)
4651 /* For the widening operations {8,16,32}{U,S}to{16,32,64}, the
4652 selection of shadow operation implicitly duplicates the logic in
4653 do_shadow_LoadG and should be kept in sync (in the very unlikely
4654 event that the interpretation of such widening ops changes in
4655 future). See comment in do_shadow_LoadG. */
4656 IRAtom
* vatom
= expr2vbits( mce
, atom
, HuOth
);
4657 tl_assert(isOriginalAtom(mce
,atom
));
4662 case Iop_RSqrtEst64Fx2
:
4663 case Iop_RecipEst64Fx2
:
4664 case Iop_Log2_64Fx2
:
4665 return unary64Fx2(mce
, vatom
);
4667 case Iop_Sqrt64F0x2
:
4668 return unary64F0x2(mce
, vatom
);
4671 case Iop_RSqrtEst32Fx8
:
4672 case Iop_RecipEst32Fx8
:
4673 return unary32Fx8(mce
, vatom
);
4676 return unary64Fx4(mce
, vatom
);
4678 case Iop_RecipEst32Fx4
:
4681 case Iop_QFtoI32Ux4_RZ
:
4682 case Iop_QFtoI32Sx4_RZ
:
4683 case Iop_RoundF32x4_RM
:
4684 case Iop_RoundF32x4_RP
:
4685 case Iop_RoundF32x4_RN
:
4686 case Iop_RoundF32x4_RZ
:
4687 case Iop_RecipEst32Ux4
:
4690 case Iop_RSqrtEst32Fx4
:
4691 case Iop_Log2_32Fx4
:
4692 return unary32Fx4(mce
, vatom
);
4696 case Iop_RecipEst32Fx2
:
4697 case Iop_RecipEst32Ux2
:
4700 case Iop_RSqrtEst32Fx2
:
4701 return unary32Fx2(mce
, vatom
);
4703 case Iop_Sqrt32F0x4
:
4704 case Iop_RSqrtEst32F0x4
:
4705 case Iop_RecipEst32F0x4
:
4706 return unary32F0x4(mce
, vatom
);
4713 case Iop_Reverse1sIn8_x16
:
4714 case Iop_Reverse8sIn16_x8
:
4715 case Iop_Reverse8sIn32_x4
:
4716 case Iop_Reverse16sIn32_x4
:
4717 case Iop_Reverse8sIn64_x2
:
4718 case Iop_Reverse16sIn64_x2
:
4719 case Iop_Reverse32sIn64_x2
:
4720 case Iop_V256toV128_1
: case Iop_V256toV128_0
:
4721 case Iop_ZeroHI64ofV128
:
4722 case Iop_ZeroHI96ofV128
:
4723 case Iop_ZeroHI112ofV128
:
4724 case Iop_ZeroHI120ofV128
:
4725 return assignNew('V', mce
, Ity_V128
, unop(op
, vatom
));
4727 case Iop_F128HItoF64
: /* F128 -> high half of F128 */
4728 case Iop_D128HItoD64
: /* D128 -> high half of D128 */
4729 return assignNew('V', mce
, Ity_I64
, unop(Iop_128HIto64
, vatom
));
4730 case Iop_F128LOtoF64
: /* F128 -> low half of F128 */
4731 case Iop_D128LOtoD64
: /* D128 -> low half of D128 */
4732 return assignNew('V', mce
, Ity_I64
, unop(Iop_128to64
, vatom
));
4737 case Iop_TruncF128toI64S
: /* F128 -> I64S */
4738 case Iop_TruncF128toI32S
: /* F128 -> I32S (result stored in 64-bits) */
4739 case Iop_TruncF128toI64U
: /* F128 -> I64U */
4740 case Iop_TruncF128toI32U
: /* F128 -> I32U (result stored in 64-bits) */
4741 return mkPCastTo(mce
, Ity_I128
, vatom
);
4743 case Iop_BCD128toI128S
:
4744 case Iop_MulI128by10
:
4745 case Iop_MulI128by10Carry
:
4746 case Iop_F16toF64x2
:
4747 case Iop_F64toF16x2
:
4750 case Iop_I32StoF128
: /* signed I32 -> F128 */
4751 case Iop_I64StoF128
: /* signed I64 -> F128 */
4752 case Iop_I32UtoF128
: /* unsigned I32 -> F128 */
4753 case Iop_I64UtoF128
: /* unsigned I64 -> F128 */
4754 case Iop_F32toF128
: /* F32 -> F128 */
4755 case Iop_F64toF128
: /* F64 -> F128 */
4756 case Iop_I32StoD128
: /* signed I64 -> D128 */
4757 case Iop_I64StoD128
: /* signed I64 -> D128 */
4758 case Iop_I32UtoD128
: /* unsigned I32 -> D128 */
4759 case Iop_I64UtoD128
: /* unsigned I64 -> D128 */
4760 return mkPCastTo(mce
, Ity_I128
, vatom
);
4768 case Iop_RSqrtEst5GoodF64
:
4769 case Iop_RoundF64toF64_NEAREST
:
4770 case Iop_RoundF64toF64_NegINF
:
4771 case Iop_RoundF64toF64_PosINF
:
4772 case Iop_RoundF64toF64_ZERO
:
4777 case Iop_ExtractExpD64
: /* D64 -> I64 */
4778 case Iop_ExtractExpD128
: /* D128 -> I64 */
4779 case Iop_ExtractSigD64
: /* D64 -> I64 */
4780 case Iop_ExtractSigD128
: /* D128 -> I64 */
4783 return mkPCastTo(mce
, Ity_I64
, vatom
);
4786 return mkPCastTo(mce
, Ity_I128
, vatom
);
4789 case Iop_TruncF64asF32
:
4793 return mkPCastTo(mce
, Ity_I32
, vatom
);
4797 return expensiveCountTrailingZeroes(mce
, op
, atom
, vatom
);
4808 case Iop_V128HIto64
:
4814 case Iop_Reverse8sIn16_x4
:
4815 case Iop_Reverse8sIn32_x2
:
4816 case Iop_Reverse16sIn32_x2
:
4817 case Iop_Reverse8sIn64_x1
:
4818 case Iop_Reverse16sIn64_x1
:
4819 case Iop_Reverse32sIn64_x1
:
4820 case Iop_V256to64_0
: case Iop_V256to64_1
:
4821 case Iop_V256to64_2
: case Iop_V256to64_3
:
4822 return assignNew('V', mce
, Ity_I64
, unop(op
, vatom
));
4833 return assignNew('V', mce
, Ity_I32
, unop(op
, vatom
));
4840 case Iop_GetMSBs8x16
:
4841 return assignNew('V', mce
, Ity_I16
, unop(op
, vatom
));
4849 case Iop_GetMSBs8x8
:
4850 return assignNew('V', mce
, Ity_I8
, unop(op
, vatom
));
4853 return assignNew('V', mce
, Ity_I1
, unop(Iop_32to1
, vatom
));
4856 return assignNew('V', mce
, Ity_I1
, unop(Iop_64to1
, vatom
));
4858 case Iop_ReinterpF64asI64
:
4859 case Iop_ReinterpI64asF64
:
4860 case Iop_ReinterpI32asF32
:
4861 case Iop_ReinterpF32asI32
:
4862 case Iop_ReinterpI64asD64
:
4863 case Iop_ReinterpD64asI64
:
4878 return mkPCast8x8(mce
, vatom
);
4880 case Iop_CmpNEZ8x16
:
4886 return mkPCast8x16(mce
, vatom
);
4888 case Iop_CmpNEZ16x4
:
4892 return mkPCast16x4(mce
, vatom
);
4894 case Iop_CmpNEZ16x8
:
4899 return mkPCast16x8(mce
, vatom
);
4901 case Iop_CmpNEZ32x2
:
4904 case Iop_FtoI32Ux2_RZ
:
4905 case Iop_FtoI32Sx2_RZ
:
4907 return mkPCast32x2(mce
, vatom
);
4909 case Iop_CmpNEZ32x4
:
4912 case Iop_FtoI32Ux4_RZ
:
4913 case Iop_FtoI32Sx4_RZ
:
4915 case Iop_RSqrtEst32Ux4
:
4917 return mkPCast32x4(mce
, vatom
);
4920 return mkPCastTo(mce
, Ity_I32
, vatom
);
4923 return mkPCastTo(mce
, Ity_I64
, vatom
);
4925 case Iop_CmpNEZ64x2
:
4926 case Iop_CipherSV128
:
4930 return mkPCast64x2(mce
, vatom
);
4932 case Iop_PwBitMtxXpose64x2
:
4933 return assignNew('V', mce
, Ity_V128
, unop(op
, vatom
));
4935 case Iop_NarrowUn16to8x8
:
4936 case Iop_NarrowUn32to16x4
:
4937 case Iop_NarrowUn64to32x2
:
4938 case Iop_QNarrowUn16Sto8Sx8
:
4939 case Iop_QNarrowUn16Sto8Ux8
:
4940 case Iop_QNarrowUn16Uto8Ux8
:
4941 case Iop_QNarrowUn32Sto16Sx4
:
4942 case Iop_QNarrowUn32Sto16Ux4
:
4943 case Iop_QNarrowUn32Uto16Ux4
:
4944 case Iop_QNarrowUn64Sto32Sx2
:
4945 case Iop_QNarrowUn64Sto32Ux2
:
4946 case Iop_QNarrowUn64Uto32Ux2
:
4947 case Iop_F32toF16x4
:
4948 return vectorNarrowUnV128(mce
, op
, vatom
);
4950 case Iop_Widen8Sto16x8
:
4951 case Iop_Widen8Uto16x8
:
4952 case Iop_Widen16Sto32x4
:
4953 case Iop_Widen16Uto32x4
:
4954 case Iop_Widen32Sto64x2
:
4955 case Iop_Widen32Uto64x2
:
4956 case Iop_F16toF32x4
:
4957 return vectorWidenI64(mce
, op
, vatom
);
4959 case Iop_PwAddL32Ux2
:
4960 case Iop_PwAddL32Sx2
:
4961 return mkPCastTo(mce
, Ity_I64
,
4962 assignNew('V', mce
, Ity_I64
, unop(op
, mkPCast32x2(mce
, vatom
))));
4964 case Iop_PwAddL16Ux4
:
4965 case Iop_PwAddL16Sx4
:
4966 return mkPCast32x2(mce
,
4967 assignNew('V', mce
, Ity_I64
, unop(op
, mkPCast16x4(mce
, vatom
))));
4969 case Iop_PwAddL8Ux8
:
4970 case Iop_PwAddL8Sx8
:
4971 return mkPCast16x4(mce
,
4972 assignNew('V', mce
, Ity_I64
, unop(op
, mkPCast8x8(mce
, vatom
))));
4974 case Iop_PwAddL32Ux4
:
4975 case Iop_PwAddL32Sx4
:
4976 return mkPCast64x2(mce
,
4977 assignNew('V', mce
, Ity_V128
, unop(op
, mkPCast32x4(mce
, vatom
))));
4979 case Iop_PwAddL64Ux2
:
4980 return mkPCast128x1(mce
,
4981 assignNew('V', mce
, Ity_V128
, unop(op
, mkPCast64x2(mce
, vatom
))));
4983 case Iop_PwAddL16Ux8
:
4984 case Iop_PwAddL16Sx8
:
4985 return mkPCast32x4(mce
,
4986 assignNew('V', mce
, Ity_V128
, unop(op
, mkPCast16x8(mce
, vatom
))));
4988 case Iop_PwAddL8Ux16
:
4989 case Iop_PwAddL8Sx16
:
4990 return mkPCast16x8(mce
,
4991 assignNew('V', mce
, Ity_V128
, unop(op
, mkPCast8x16(mce
, vatom
))));
4996 VG_(tool_panic
)("memcheck:expr2vbits_Unop");
5001 /* Worker function -- do not call directly. See comments on
5002 expr2vbits_Load for the meaning of |guard|.
5004 Generates IR to (1) perform a definedness test of |addr|, (2)
5005 perform a validity test of |addr|, and (3) return the Vbits for the
5006 location indicated by |addr|. All of this only happens when
5007 |guard| is NULL or |guard| evaluates to True at run time.
5009 If |guard| evaluates to False at run time, the returned value is
5010 the IR-mandated 0x55..55 value, and no checks nor shadow loads are
5013 The definedness of |guard| itself is not checked. That is assumed
5014 to have been done before this point, by the caller. */
5016 IRAtom
* expr2vbits_Load_WRK ( MCEnv
* mce
,
5017 IREndness end
, IRType ty
,
5018 IRAtom
* addr
, UInt bias
, IRAtom
* guard
)
5020 tl_assert(isOriginalAtom(mce
,addr
));
5021 tl_assert(end
== Iend_LE
|| end
== Iend_BE
);
5023 /* First, emit a definedness test for the address. This also sets
5024 the address (shadow) to 'defined' following the test. */
5025 complainIfUndefined( mce
, addr
, guard
);
5027 /* Now cook up a call to the relevant helper function, to read the
5028 data V bits from shadow memory. */
5029 ty
= shadowTypeV(ty
);
5031 void* helper
= NULL
;
5032 const HChar
* hname
= NULL
;
5033 Bool ret_via_outparam
= False
;
5035 if (end
== Iend_LE
) {
5037 case Ity_V256
: helper
= &MC_(helperc_LOADV256le
);
5038 hname
= "MC_(helperc_LOADV256le)";
5039 ret_via_outparam
= True
;
5041 case Ity_V128
: helper
= &MC_(helperc_LOADV128le
);
5042 hname
= "MC_(helperc_LOADV128le)";
5043 ret_via_outparam
= True
;
5045 case Ity_I64
: helper
= &MC_(helperc_LOADV64le
);
5046 hname
= "MC_(helperc_LOADV64le)";
5048 case Ity_I32
: helper
= &MC_(helperc_LOADV32le
);
5049 hname
= "MC_(helperc_LOADV32le)";
5051 case Ity_I16
: helper
= &MC_(helperc_LOADV16le
);
5052 hname
= "MC_(helperc_LOADV16le)";
5054 case Ity_I8
: helper
= &MC_(helperc_LOADV8
);
5055 hname
= "MC_(helperc_LOADV8)";
5057 default: ppIRType(ty
);
5058 VG_(tool_panic
)("memcheck:expr2vbits_Load_WRK(LE)");
5062 case Ity_V256
: helper
= &MC_(helperc_LOADV256be
);
5063 hname
= "MC_(helperc_LOADV256be)";
5064 ret_via_outparam
= True
;
5066 case Ity_V128
: helper
= &MC_(helperc_LOADV128be
);
5067 hname
= "MC_(helperc_LOADV128be)";
5068 ret_via_outparam
= True
;
5070 case Ity_I64
: helper
= &MC_(helperc_LOADV64be
);
5071 hname
= "MC_(helperc_LOADV64be)";
5073 case Ity_I32
: helper
= &MC_(helperc_LOADV32be
);
5074 hname
= "MC_(helperc_LOADV32be)";
5076 case Ity_I16
: helper
= &MC_(helperc_LOADV16be
);
5077 hname
= "MC_(helperc_LOADV16be)";
5079 case Ity_I8
: helper
= &MC_(helperc_LOADV8
);
5080 hname
= "MC_(helperc_LOADV8)";
5082 default: ppIRType(ty
);
5083 VG_(tool_panic
)("memcheck:expr2vbits_Load_WRK(BE)");
5090 /* Generate the actual address into addrAct. */
5097 IRType tyAddr
= mce
->hWordTy
;
5098 tl_assert( tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
5099 mkAdd
= tyAddr
==Ity_I32
? Iop_Add32
: Iop_Add64
;
5100 eBias
= tyAddr
==Ity_I32
? mkU32(bias
) : mkU64(bias
);
5101 addrAct
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBias
) );
5104 /* We need to have a place to park the V bits we're just about to
5106 IRTemp datavbits
= newTemp(mce
, ty
, VSh
);
5108 /* Here's the call. */
5110 if (ret_via_outparam
) {
5111 di
= unsafeIRDirty_1_N( datavbits
,
5113 hname
, VG_(fnptr_to_fnentry
)( helper
),
5114 mkIRExprVec_2( IRExpr_VECRET(), addrAct
) );
5116 di
= unsafeIRDirty_1_N( datavbits
,
5118 hname
, VG_(fnptr_to_fnentry
)( helper
),
5119 mkIRExprVec_1( addrAct
) );
5122 setHelperAnns( mce
, di
);
5125 /* Ideally the didn't-happen return value here would be all-ones
5126 (all-undefined), so it'd be obvious if it got used
5127 inadvertently. We can get by with the IR-mandated default
5128 value (0b01 repeating, 0x55 etc) as that'll still look pretty
5129 undefined if it ever leaks out. */
5131 stmt( 'V', mce
, IRStmt_Dirty(di
) );
5133 return mkexpr(datavbits
);
5137 /* Generate IR to do a shadow load. The helper is expected to check
5138 the validity of the address and return the V bits for that address.
5139 This can optionally be controlled by a guard, which is assumed to
5140 be True if NULL. In the case where the guard is False at runtime,
5141 the helper will return the didn't-do-the-call value of 0x55..55.
5142 Since that means "completely undefined result", the caller of
5143 this function will need to fix up the result somehow in that
5146 Caller of this function is also expected to have checked the
5147 definedness of |guard| before this point.
5150 IRAtom
* expr2vbits_Load ( MCEnv
* mce
,
5151 IREndness end
, IRType ty
,
5152 IRAtom
* addr
, UInt bias
,
5155 tl_assert(end
== Iend_LE
|| end
== Iend_BE
);
5156 switch (shadowTypeV(ty
)) {
5163 return expr2vbits_Load_WRK(mce
, end
, ty
, addr
, bias
, guard
);
5165 VG_(tool_panic
)("expr2vbits_Load");
5170 /* The most general handler for guarded loads. Assumes the
5171 definedness of GUARD has already been checked by the caller. A
5172 GUARD of NULL is assumed to mean "always True". Generates code to
5173 check the definedness and validity of ADDR.
5175 Generate IR to do a shadow load from ADDR and return the V bits.
5176 The loaded type is TY. The loaded data is then (shadow) widened by
5177 using VWIDEN, which can be Iop_INVALID to denote a no-op. If GUARD
5178 evaluates to False at run time then the returned Vbits are simply
5179 VALT instead. Note therefore that the argument type of VWIDEN must
5180 be TY and the result type of VWIDEN must equal the type of VALT.
5183 IRAtom
* expr2vbits_Load_guarded_General ( MCEnv
* mce
,
5184 IREndness end
, IRType ty
,
5185 IRAtom
* addr
, UInt bias
,
5187 IROp vwiden
, IRAtom
* valt
)
5189 /* Sanity check the conversion operation, and also set TYWIDE. */
5190 IRType tyWide
= Ity_INVALID
;
5195 case Iop_16Uto32
: case Iop_16Sto32
: case Iop_8Uto32
: case Iop_8Sto32
:
5199 VG_(tool_panic
)("memcheck:expr2vbits_Load_guarded_General");
5202 /* If the guard evaluates to True, this will hold the loaded V bits
5203 at TY. If the guard evaluates to False, this will be all
5204 ones, meaning "all undefined", in which case we will have to
5205 replace it using an ITE below. */
5207 = assignNew('V', mce
, ty
,
5208 expr2vbits_Load(mce
, end
, ty
, addr
, bias
, guard
));
5209 /* Now (shadow-) widen the loaded V bits to the desired width. In
5210 the guard-is-False case, the allowable widening operators will
5211 in the worst case (unsigned widening) at least leave the
5212 pre-widened part as being marked all-undefined, and in the best
5213 case (signed widening) mark the whole widened result as
5214 undefined. Anyway, it doesn't matter really, since in this case
5215 we will replace said value with the default value |valt| using an
5218 = vwiden
== Iop_INVALID
5220 : assignNew('V', mce
, tyWide
, unop(vwiden
, iftrue1
));
5221 /* These are the V bits we will return if the load doesn't take
5225 /* Prepare the cond for the ITE. Convert a NULL cond into
5226 something that iropt knows how to fold out later. */
5228 = guard
== NULL
? mkU1(1) : guard
;
5229 /* And assemble the final result. */
5230 return assignNew('V', mce
, tyWide
, IRExpr_ITE(cond
, iftrue2
, iffalse
));
5234 /* A simpler handler for guarded loads, in which there is no
5235 conversion operation, and the default V bit return (when the guard
5236 evaluates to False at runtime) is "all defined". If there is no
5237 guard expression or the guard is always TRUE this function behaves
5238 like expr2vbits_Load. It is assumed that definedness of GUARD has
5239 already been checked at the call site. */
5241 IRAtom
* expr2vbits_Load_guarded_Simple ( MCEnv
* mce
,
5242 IREndness end
, IRType ty
,
5243 IRAtom
* addr
, UInt bias
,
5246 return expr2vbits_Load_guarded_General(
5247 mce
, end
, ty
, addr
, bias
, guard
, Iop_INVALID
, definedOfType(ty
)
5253 IRAtom
* expr2vbits_ITE ( MCEnv
* mce
,
5254 IRAtom
* cond
, IRAtom
* iftrue
, IRAtom
* iffalse
)
5256 IRAtom
*vbitsC
, *vbits0
, *vbits1
;
5258 /* Given ITE(cond, iftrue, iffalse), generate
5259 ITE(cond, iftrue#, iffalse#) `UifU` PCast(cond#)
5260 That is, steer the V bits like the originals, but trash the
5261 result if the steering value is undefined. This gives
5262 lazy propagation. */
5263 tl_assert(isOriginalAtom(mce
, cond
));
5264 tl_assert(isOriginalAtom(mce
, iftrue
));
5265 tl_assert(isOriginalAtom(mce
, iffalse
));
5267 vbitsC
= expr2vbits(mce
, cond
, HuOth
); // could we use HuPCa here?
5268 vbits1
= expr2vbits(mce
, iftrue
, HuOth
);
5269 vbits0
= expr2vbits(mce
, iffalse
, HuOth
);
5270 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vbits0
);
5273 mkUifU(mce
, ty
, assignNew('V', mce
, ty
,
5274 IRExpr_ITE(cond
, vbits1
, vbits0
)),
5275 mkPCastTo(mce
, ty
, vbitsC
) );
5278 /* --------- This is the main expression-handling function. --------- */
5281 IRExpr
* expr2vbits ( MCEnv
* mce
, IRExpr
* e
,
5282 HowUsed hu
/*use HuOth if unknown*/ )
5287 return shadow_GET( mce
, e
->Iex
.Get
.offset
, e
->Iex
.Get
.ty
);
5290 return shadow_GETI( mce
, e
->Iex
.GetI
.descr
,
5291 e
->Iex
.GetI
.ix
, e
->Iex
.GetI
.bias
);
5294 return IRExpr_RdTmp( findShadowTmpV(mce
, e
->Iex
.RdTmp
.tmp
) );
5297 return definedOfType(shadowTypeV(typeOfIRExpr(mce
->sb
->tyenv
, e
)));
5300 return expr2vbits_Qop(
5302 e
->Iex
.Qop
.details
->op
,
5303 e
->Iex
.Qop
.details
->arg1
, e
->Iex
.Qop
.details
->arg2
,
5304 e
->Iex
.Qop
.details
->arg3
, e
->Iex
.Qop
.details
->arg4
5308 return expr2vbits_Triop(
5310 e
->Iex
.Triop
.details
->op
,
5311 e
->Iex
.Triop
.details
->arg1
, e
->Iex
.Triop
.details
->arg2
,
5312 e
->Iex
.Triop
.details
->arg3
5316 return expr2vbits_Binop(
5319 e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
,
5324 return expr2vbits_Unop( mce
, e
->Iex
.Unop
.op
, e
->Iex
.Unop
.arg
);
5327 return expr2vbits_Load( mce
, e
->Iex
.Load
.end
,
5329 e
->Iex
.Load
.addr
, 0/*addr bias*/,
5330 NULL
/* guard == "always True"*/ );
5333 return mkLazyN( mce
, e
->Iex
.CCall
.args
,
5338 return expr2vbits_ITE( mce
, e
->Iex
.ITE
.cond
, e
->Iex
.ITE
.iftrue
,
5339 e
->Iex
.ITE
.iffalse
);
5345 VG_(tool_panic
)("memcheck: expr2vbits");
5350 /*------------------------------------------------------------*/
5351 /*--- Generate shadow stmts from all kinds of IRStmts. ---*/
5352 /*------------------------------------------------------------*/
5354 /* Widen a value to the host word size. */
5357 IRExpr
* zwidenToHostWord ( MCEnv
* mce
, IRAtom
* vatom
)
5361 /* vatom is vbits-value and as such can only have a shadow type. */
5362 tl_assert(isShadowAtom(mce
,vatom
));
5364 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vatom
);
5367 if (tyH
== Ity_I32
) {
5372 return assignNew('V', mce
, tyH
, unop(Iop_16Uto32
, vatom
));
5374 return assignNew('V', mce
, tyH
, unop(Iop_8Uto32
, vatom
));
5379 if (tyH
== Ity_I64
) {
5382 return assignNew('V', mce
, tyH
, unop(Iop_32Uto64
, vatom
));
5384 return assignNew('V', mce
, tyH
, unop(Iop_32Uto64
,
5385 assignNew('V', mce
, Ity_I32
, unop(Iop_16Uto32
, vatom
))));
5387 return assignNew('V', mce
, tyH
, unop(Iop_32Uto64
,
5388 assignNew('V', mce
, Ity_I32
, unop(Iop_8Uto32
, vatom
))));
5396 VG_(printf
)("\nty = "); ppIRType(ty
); VG_(printf
)("\n");
5397 VG_(tool_panic
)("zwidenToHostWord");
5401 /* Generate a shadow store. |addr| is always the original address
5402 atom. You can pass in either originals or V-bits for the data
5403 atom, but obviously not both. This function generates a check for
5404 the definedness and (indirectly) the validity of |addr|, but only
5405 when |guard| evaluates to True at run time (or is NULL).
5407 |guard| :: Ity_I1 controls whether the store really happens; NULL
5408 means it unconditionally does. Note that |guard| itself is not
5409 checked for definedness; the caller of this function must do that
5413 void do_shadow_Store ( MCEnv
* mce
,
5415 IRAtom
* addr
, UInt bias
,
5416 IRAtom
* data
, IRAtom
* vdata
,
5421 void* helper
= NULL
;
5422 const HChar
* hname
= NULL
;
5425 tyAddr
= mce
->hWordTy
;
5426 mkAdd
= tyAddr
==Ity_I32
? Iop_Add32
: Iop_Add64
;
5427 tl_assert( tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
5428 tl_assert( end
== Iend_LE
|| end
== Iend_BE
);
5432 tl_assert(isOriginalAtom(mce
, data
));
5433 tl_assert(bias
== 0);
5434 vdata
= expr2vbits( mce
, data
, HuOth
);
5439 tl_assert(isOriginalAtom(mce
,addr
));
5440 tl_assert(isShadowAtom(mce
,vdata
));
5443 tl_assert(isOriginalAtom(mce
, guard
));
5444 tl_assert(typeOfIRExpr(mce
->sb
->tyenv
, guard
) == Ity_I1
);
5447 ty
= typeOfIRExpr(mce
->sb
->tyenv
, vdata
);
5449 // If we're not doing undefined value checking, pretend that this value
5450 // is "all valid". That lets Vex's optimiser remove some of the V bit
5451 // shadow computation ops that precede it.
5452 if (MC_(clo_mc_level
) == 1) {
5454 case Ity_V256
: // V256 weirdness -- used four times
5455 c
= IRConst_V256(V_BITS32_DEFINED
); break;
5456 case Ity_V128
: // V128 weirdness -- used twice
5457 c
= IRConst_V128(V_BITS16_DEFINED
); break;
5458 case Ity_I64
: c
= IRConst_U64 (V_BITS64_DEFINED
); break;
5459 case Ity_I32
: c
= IRConst_U32 (V_BITS32_DEFINED
); break;
5460 case Ity_I16
: c
= IRConst_U16 (V_BITS16_DEFINED
); break;
5461 case Ity_I8
: c
= IRConst_U8 (V_BITS8_DEFINED
); break;
5462 default: VG_(tool_panic
)("memcheck:do_shadow_Store(LE)");
5464 vdata
= IRExpr_Const( c
);
5467 /* First, emit a definedness test for the address. This also sets
5468 the address (shadow) to 'defined' following the test. Both of
5469 those actions are gated on |guard|. */
5470 complainIfUndefined( mce
, addr
, guard
);
5472 /* Now decide which helper function to call to write the data V
5473 bits into shadow memory. */
5474 if (end
== Iend_LE
) {
5476 case Ity_V256
: /* we'll use the helper four times */
5477 case Ity_V128
: /* we'll use the helper twice */
5478 case Ity_I64
: helper
= &MC_(helperc_STOREV64le
);
5479 hname
= "MC_(helperc_STOREV64le)";
5481 case Ity_I32
: helper
= &MC_(helperc_STOREV32le
);
5482 hname
= "MC_(helperc_STOREV32le)";
5484 case Ity_I16
: helper
= &MC_(helperc_STOREV16le
);
5485 hname
= "MC_(helperc_STOREV16le)";
5487 case Ity_I8
: helper
= &MC_(helperc_STOREV8
);
5488 hname
= "MC_(helperc_STOREV8)";
5490 default: VG_(tool_panic
)("memcheck:do_shadow_Store(LE)");
5494 case Ity_V128
: /* we'll use the helper twice */
5495 case Ity_I64
: helper
= &MC_(helperc_STOREV64be
);
5496 hname
= "MC_(helperc_STOREV64be)";
5498 case Ity_I32
: helper
= &MC_(helperc_STOREV32be
);
5499 hname
= "MC_(helperc_STOREV32be)";
5501 case Ity_I16
: helper
= &MC_(helperc_STOREV16be
);
5502 hname
= "MC_(helperc_STOREV16be)";
5504 case Ity_I8
: helper
= &MC_(helperc_STOREV8
);
5505 hname
= "MC_(helperc_STOREV8)";
5507 /* Note, no V256 case here, because no big-endian target that
5508 we support, has 256 vectors. */
5509 default: VG_(tool_panic
)("memcheck:do_shadow_Store(BE)");
5513 if (UNLIKELY(ty
== Ity_V256
)) {
5515 /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
5516 Q3 being the most significant lane. */
5517 /* These are the offsets of the Qs in memory. */
5518 Int offQ0
, offQ1
, offQ2
, offQ3
;
5520 /* Various bits for constructing the 4 lane helper calls */
5521 IRDirty
*diQ0
, *diQ1
, *diQ2
, *diQ3
;
5522 IRAtom
*addrQ0
, *addrQ1
, *addrQ2
, *addrQ3
;
5523 IRAtom
*vdataQ0
, *vdataQ1
, *vdataQ2
, *vdataQ3
;
5524 IRAtom
*eBiasQ0
, *eBiasQ1
, *eBiasQ2
, *eBiasQ3
;
5526 if (end
== Iend_LE
) {
5527 offQ0
= 0; offQ1
= 8; offQ2
= 16; offQ3
= 24;
5529 offQ3
= 0; offQ2
= 8; offQ1
= 16; offQ0
= 24;
5532 eBiasQ0
= tyAddr
==Ity_I32
? mkU32(bias
+offQ0
) : mkU64(bias
+offQ0
);
5533 addrQ0
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ0
) );
5534 vdataQ0
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_0
, vdata
));
5535 diQ0
= unsafeIRDirty_0_N(
5537 hname
, VG_(fnptr_to_fnentry
)( helper
),
5538 mkIRExprVec_2( addrQ0
, vdataQ0
)
5541 eBiasQ1
= tyAddr
==Ity_I32
? mkU32(bias
+offQ1
) : mkU64(bias
+offQ1
);
5542 addrQ1
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ1
) );
5543 vdataQ1
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_1
, vdata
));
5544 diQ1
= unsafeIRDirty_0_N(
5546 hname
, VG_(fnptr_to_fnentry
)( helper
),
5547 mkIRExprVec_2( addrQ1
, vdataQ1
)
5550 eBiasQ2
= tyAddr
==Ity_I32
? mkU32(bias
+offQ2
) : mkU64(bias
+offQ2
);
5551 addrQ2
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ2
) );
5552 vdataQ2
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_2
, vdata
));
5553 diQ2
= unsafeIRDirty_0_N(
5555 hname
, VG_(fnptr_to_fnentry
)( helper
),
5556 mkIRExprVec_2( addrQ2
, vdataQ2
)
5559 eBiasQ3
= tyAddr
==Ity_I32
? mkU32(bias
+offQ3
) : mkU64(bias
+offQ3
);
5560 addrQ3
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasQ3
) );
5561 vdataQ3
= assignNew('V', mce
, Ity_I64
, unop(Iop_V256to64_3
, vdata
));
5562 diQ3
= unsafeIRDirty_0_N(
5564 hname
, VG_(fnptr_to_fnentry
)( helper
),
5565 mkIRExprVec_2( addrQ3
, vdataQ3
)
5569 diQ0
->guard
= diQ1
->guard
= diQ2
->guard
= diQ3
->guard
= guard
;
5571 setHelperAnns( mce
, diQ0
);
5572 setHelperAnns( mce
, diQ1
);
5573 setHelperAnns( mce
, diQ2
);
5574 setHelperAnns( mce
, diQ3
);
5575 stmt( 'V', mce
, IRStmt_Dirty(diQ0
) );
5576 stmt( 'V', mce
, IRStmt_Dirty(diQ1
) );
5577 stmt( 'V', mce
, IRStmt_Dirty(diQ2
) );
5578 stmt( 'V', mce
, IRStmt_Dirty(diQ3
) );
5581 else if (UNLIKELY(ty
== Ity_V128
)) {
5584 /* See comment in next clause re 64-bit regparms */
5585 /* also, need to be careful about endianness */
5587 Int offLo64
, offHi64
;
5588 IRDirty
*diLo64
, *diHi64
;
5589 IRAtom
*addrLo64
, *addrHi64
;
5590 IRAtom
*vdataLo64
, *vdataHi64
;
5591 IRAtom
*eBiasLo64
, *eBiasHi64
;
5593 if (end
== Iend_LE
) {
5601 eBiasLo64
= tyAddr
==Ity_I32
? mkU32(bias
+offLo64
) : mkU64(bias
+offLo64
);
5602 addrLo64
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasLo64
) );
5603 vdataLo64
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128to64
, vdata
));
5604 diLo64
= unsafeIRDirty_0_N(
5606 hname
, VG_(fnptr_to_fnentry
)( helper
),
5607 mkIRExprVec_2( addrLo64
, vdataLo64
)
5609 eBiasHi64
= tyAddr
==Ity_I32
? mkU32(bias
+offHi64
) : mkU64(bias
+offHi64
);
5610 addrHi64
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBiasHi64
) );
5611 vdataHi64
= assignNew('V', mce
, Ity_I64
, unop(Iop_V128HIto64
, vdata
));
5612 diHi64
= unsafeIRDirty_0_N(
5614 hname
, VG_(fnptr_to_fnentry
)( helper
),
5615 mkIRExprVec_2( addrHi64
, vdataHi64
)
5617 if (guard
) diLo64
->guard
= guard
;
5618 if (guard
) diHi64
->guard
= guard
;
5619 setHelperAnns( mce
, diLo64
);
5620 setHelperAnns( mce
, diHi64
);
5621 stmt( 'V', mce
, IRStmt_Dirty(diLo64
) );
5622 stmt( 'V', mce
, IRStmt_Dirty(diHi64
) );
5629 /* 8/16/32/64-bit cases */
5630 /* Generate the actual address into addrAct. */
5634 IRAtom
* eBias
= tyAddr
==Ity_I32
? mkU32(bias
) : mkU64(bias
);
5635 addrAct
= assignNew('V', mce
, tyAddr
, binop(mkAdd
, addr
, eBias
));
5638 if (ty
== Ity_I64
) {
5639 /* We can't do this with regparm 2 on 32-bit platforms, since
5640 the back ends aren't clever enough to handle 64-bit
5641 regparm args. Therefore be different. */
5642 di
= unsafeIRDirty_0_N(
5644 hname
, VG_(fnptr_to_fnentry
)( helper
),
5645 mkIRExprVec_2( addrAct
, vdata
)
5648 di
= unsafeIRDirty_0_N(
5650 hname
, VG_(fnptr_to_fnentry
)( helper
),
5651 mkIRExprVec_2( addrAct
,
5652 zwidenToHostWord( mce
, vdata
))
5655 if (guard
) di
->guard
= guard
;
5656 setHelperAnns( mce
, di
);
5657 stmt( 'V', mce
, IRStmt_Dirty(di
) );
5663 /* Do lazy pessimistic propagation through a dirty helper call, by
5664 looking at the annotations on it. This is the most complex part of
5667 static IRType
szToITy ( Int n
)
5670 case 1: return Ity_I8
;
5671 case 2: return Ity_I16
;
5672 case 4: return Ity_I32
;
5673 case 8: return Ity_I64
;
5674 default: VG_(tool_panic
)("szToITy(memcheck)");
5679 void do_shadow_Dirty ( MCEnv
* mce
, IRDirty
* d
)
5681 Int i
, k
, n
, toDo
, gSz
, gOff
;
5682 IRAtom
*src
, *here
, *curr
;
5683 IRType tySrc
, tyDst
;
5687 /* What's the native endianness? We need to know this. */
5688 # if defined(VG_BIGENDIAN)
5690 # elif defined(VG_LITTLEENDIAN)
5693 # error "Unknown endianness"
5696 /* First check the guard. */
5697 complainIfUndefined(mce
, d
->guard
, NULL
);
5699 /* Now round up all inputs and PCast over them. */
5700 curr
= definedOfType(Ity_I32
);
5702 /* Inputs: unmasked args
5703 Note: arguments are evaluated REGARDLESS of the guard expression */
5704 for (i
= 0; d
->args
[i
]; i
++) {
5705 IRAtom
* arg
= d
->args
[i
];
5706 if ( (d
->cee
->mcx_mask
& (1<<i
))
5707 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg
)) ) {
5708 /* ignore this arg */
5710 here
= mkPCastTo( mce
, Ity_I32
, expr2vbits(mce
, arg
, HuOth
) );
5711 curr
= mkUifU32(mce
, here
, curr
);
5715 /* Inputs: guest state that we read. */
5716 for (i
= 0; i
< d
->nFxState
; i
++) {
5717 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
5718 if (d
->fxState
[i
].fx
== Ifx_Write
)
5721 /* Enumerate the described state segments */
5722 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
5723 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
5724 gSz
= d
->fxState
[i
].size
;
5726 /* Ignore any sections marked as 'always defined'. */
5727 if (isAlwaysDefd(mce
, gOff
, gSz
)) {
5729 VG_(printf
)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5734 /* This state element is read or modified. So we need to
5735 consider it. If larger than 8 bytes, deal with it in
5738 tl_assert(gSz
>= 0);
5739 if (gSz
== 0) break;
5740 n
= gSz
<= 8 ? gSz
: 8;
5741 /* update 'curr' with UifU of the state slice
5743 tySrc
= szToITy( n
);
5745 /* Observe the guard expression. If it is false use an
5746 all-bits-defined bit pattern */
5747 IRAtom
*cond
, *iffalse
, *iftrue
;
5749 cond
= assignNew('V', mce
, Ity_I1
, d
->guard
);
5750 iftrue
= assignNew('V', mce
, tySrc
, shadow_GET(mce
, gOff
, tySrc
));
5751 iffalse
= assignNew('V', mce
, tySrc
, definedOfType(tySrc
));
5752 src
= assignNew('V', mce
, tySrc
,
5753 IRExpr_ITE(cond
, iftrue
, iffalse
));
5755 here
= mkPCastTo( mce
, Ity_I32
, src
);
5756 curr
= mkUifU32(mce
, here
, curr
);
5763 /* Inputs: memory. First set up some info needed regardless of
5764 whether we're doing reads or writes. */
5766 if (d
->mFx
!= Ifx_None
) {
5767 /* Because we may do multiple shadow loads/stores from the same
5768 base address, it's best to do a single test of its
5769 definedness right now. Post-instrumentation optimisation
5770 should remove all but this test. */
5772 tl_assert(d
->mAddr
);
5773 complainIfUndefined(mce
, d
->mAddr
, d
->guard
);
5775 tyAddr
= typeOfIRExpr(mce
->sb
->tyenv
, d
->mAddr
);
5776 tl_assert(tyAddr
== Ity_I32
|| tyAddr
== Ity_I64
);
5777 tl_assert(tyAddr
== mce
->hWordTy
); /* not really right */
5780 /* Deal with memory inputs (reads or modifies) */
5781 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
) {
5783 /* chew off 32-bit chunks. We don't care about the endianness
5784 since it's all going to be condensed down to a single bit,
5785 but nevertheless choose an endianness which is hopefully
5786 native to the platform. */
5790 expr2vbits_Load_guarded_Simple(
5791 mce
, end
, Ity_I32
, d
->mAddr
, d
->mSize
- toDo
, d
->guard
)
5793 curr
= mkUifU32(mce
, here
, curr
);
5796 /* chew off 16-bit chunks */
5800 expr2vbits_Load_guarded_Simple(
5801 mce
, end
, Ity_I16
, d
->mAddr
, d
->mSize
- toDo
, d
->guard
)
5803 curr
= mkUifU32(mce
, here
, curr
);
5806 /* chew off the remaining 8-bit chunk, if any */
5810 expr2vbits_Load_guarded_Simple(
5811 mce
, end
, Ity_I8
, d
->mAddr
, d
->mSize
- toDo
, d
->guard
)
5813 curr
= mkUifU32(mce
, here
, curr
);
5816 tl_assert(toDo
== 0);
5819 /* Whew! So curr is a 32-bit V-value summarising pessimistically
5820 all the inputs to the helper. Now we need to re-distribute the
5821 results to all destinations. */
5823 /* Outputs: the destination temporary, if there is one. */
5824 if (d
->tmp
!= IRTemp_INVALID
) {
5825 dst
= findShadowTmpV(mce
, d
->tmp
);
5826 tyDst
= typeOfIRTemp(mce
->sb
->tyenv
, d
->tmp
);
5827 assign( 'V', mce
, dst
, mkPCastTo( mce
, tyDst
, curr
) );
5830 /* Outputs: guest state that we write or modify. */
5831 for (i
= 0; i
< d
->nFxState
; i
++) {
5832 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
5833 if (d
->fxState
[i
].fx
== Ifx_Read
)
5836 /* Enumerate the described state segments */
5837 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
5838 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
5839 gSz
= d
->fxState
[i
].size
;
5841 /* Ignore any sections marked as 'always defined'. */
5842 if (isAlwaysDefd(mce
, gOff
, gSz
))
5845 /* This state element is written or modified. So we need to
5846 consider it. If larger than 8 bytes, deal with it in
5849 tl_assert(gSz
>= 0);
5850 if (gSz
== 0) break;
5851 n
= gSz
<= 8 ? gSz
: 8;
5852 /* Write suitably-casted 'curr' to the state slice
5854 tyDst
= szToITy( n
);
5855 do_shadow_PUT( mce
, gOff
,
5856 NULL
, /* original atom */
5857 mkPCastTo( mce
, tyDst
, curr
), d
->guard
);
5864 /* Outputs: memory that we write or modify. Same comments about
5865 endianness as above apply. */
5866 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
) {
5868 /* chew off 32-bit chunks */
5870 do_shadow_Store( mce
, end
, d
->mAddr
, d
->mSize
- toDo
,
5871 NULL
, /* original data */
5872 mkPCastTo( mce
, Ity_I32
, curr
),
5876 /* chew off 16-bit chunks */
5878 do_shadow_Store( mce
, end
, d
->mAddr
, d
->mSize
- toDo
,
5879 NULL
, /* original data */
5880 mkPCastTo( mce
, Ity_I16
, curr
),
5884 /* chew off the remaining 8-bit chunk, if any */
5886 do_shadow_Store( mce
, end
, d
->mAddr
, d
->mSize
- toDo
,
5887 NULL
, /* original data */
5888 mkPCastTo( mce
, Ity_I8
, curr
),
5892 tl_assert(toDo
== 0);
5898 /* We have an ABI hint telling us that [base .. base+len-1] is to
5899 become undefined ("writable"). Generate code to call a helper to
5900 notify the A/V bit machinery of this fact.
5903 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
5907 void do_AbiHint ( MCEnv
* mce
, IRExpr
* base
, Int len
, IRExpr
* nia
)
5911 if (MC_(clo_mc_level
) == 3) {
5912 di
= unsafeIRDirty_0_N(
5914 "MC_(helperc_MAKE_STACK_UNINIT_w_o)",
5915 VG_(fnptr_to_fnentry
)( &MC_(helperc_MAKE_STACK_UNINIT_w_o
) ),
5916 mkIRExprVec_3( base
, mkIRExpr_HWord( (UInt
)len
), nia
)
5919 /* We ignore the supplied nia, since it is irrelevant. */
5920 tl_assert(MC_(clo_mc_level
) == 2 || MC_(clo_mc_level
) == 1);
5921 /* Special-case the len==128 case, since that is for amd64-ELF,
5922 which is a very common target. */
5924 di
= unsafeIRDirty_0_N(
5926 "MC_(helperc_MAKE_STACK_UNINIT_128_no_o)",
5927 VG_(fnptr_to_fnentry
)( &MC_(helperc_MAKE_STACK_UNINIT_128_no_o
)),
5928 mkIRExprVec_1( base
)
5931 di
= unsafeIRDirty_0_N(
5933 "MC_(helperc_MAKE_STACK_UNINIT_no_o)",
5934 VG_(fnptr_to_fnentry
)( &MC_(helperc_MAKE_STACK_UNINIT_no_o
) ),
5935 mkIRExprVec_2( base
, mkIRExpr_HWord( (UInt
)len
) )
5940 stmt( 'V', mce
, IRStmt_Dirty(di
) );
5944 /* ------ Dealing with IRCAS (big and complex) ------ */
5947 static IRAtom
* gen_load_b ( MCEnv
* mce
, Int szB
,
5948 IRAtom
* baseaddr
, Int offset
);
5949 static IRAtom
* gen_maxU32 ( MCEnv
* mce
, IRAtom
* b1
, IRAtom
* b2
);
5950 static void gen_store_b ( MCEnv
* mce
, Int szB
,
5951 IRAtom
* baseaddr
, Int offset
, IRAtom
* dataB
,
5954 static void do_shadow_CAS_single ( MCEnv
* mce
, IRCAS
* cas
);
5955 static void do_shadow_CAS_double ( MCEnv
* mce
, IRCAS
* cas
);
5958 /* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
5959 IRExpr.Consts, else this asserts. If they are both Consts, it
5960 doesn't do anything. So that just leaves the RdTmp case.
5962 In which case: this assigns the shadow value SHADOW to the IR
5963 shadow temporary associated with ORIG. That is, ORIG, being an
5964 original temporary, will have a shadow temporary associated with
5965 it. However, in the case envisaged here, there will so far have
5966 been no IR emitted to actually write a shadow value into that
5967 temporary. What this routine does is to (emit IR to) copy the
5968 value in SHADOW into said temporary, so that after this call,
5969 IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
5972 Point is to allow callers to compute "by hand" a shadow value for
5973 ORIG, and force it to be associated with ORIG.
5975 How do we know that that shadow associated with ORIG has not so far
5976 been assigned to? Well, we don't per se know that, but supposing
5977 it had. Then this routine would create a second assignment to it,
5978 and later the IR sanity checker would barf. But that never
5981 static void bind_shadow_tmp_to_orig ( UChar how
,
5983 IRAtom
* orig
, IRAtom
* shadow
)
5985 tl_assert(isOriginalAtom(mce
, orig
));
5986 tl_assert(isShadowAtom(mce
, shadow
));
5987 switch (orig
->tag
) {
5989 tl_assert(shadow
->tag
== Iex_Const
);
5992 tl_assert(shadow
->tag
== Iex_RdTmp
);
5994 assign('V', mce
, findShadowTmpV(mce
,orig
->Iex
.RdTmp
.tmp
),
5997 tl_assert(how
== 'B');
5998 assign('B', mce
, findShadowTmpB(mce
,orig
->Iex
.RdTmp
.tmp
),
6009 void do_shadow_CAS ( MCEnv
* mce
, IRCAS
* cas
)
6011 /* Scheme is (both single- and double- cases):
6013 1. fetch data#,dataB (the proposed new value)
6015 2. fetch expd#,expdB (what we expect to see at the address)
6017 3. check definedness of address
6019 4. load old#,oldB from shadow memory; this also checks
6020 addressibility of the address
6024 6. compute "expected == old". See COMMENT_ON_CasCmpEQ below.
6026 7. if "expected == old" (as computed by (6))
6027 store data#,dataB to shadow memory
6029 Note that 5 reads 'old' but 4 reads 'old#'. Similarly, 5 stores
6030 'data' but 7 stores 'data#'. Hence it is possible for the
6031 shadow data to be incorrectly checked and/or updated:
6033 * 7 is at least gated correctly, since the 'expected == old'
6034 condition is derived from outputs of 5. However, the shadow
6035 write could happen too late: imagine after 5 we are
6036 descheduled, a different thread runs, writes a different
6037 (shadow) value at the address, and then we resume, hence
6038 overwriting the shadow value written by the other thread.
6040 Because the original memory access is atomic, there's no way to
6041 make both the original and shadow accesses into a single atomic
6042 thing, hence this is unavoidable.
6044 At least as Valgrind stands, I don't think it's a problem, since
6045 we're single threaded *and* we guarantee that there are no
6046 context switches during the execution of any specific superblock
6047 -- context switches can only happen at superblock boundaries.
6049 If Valgrind ever becomes MT in the future, then it might be more
6050 of a problem. A possible kludge would be to artificially
6051 associate with the location, a lock, which we must acquire and
6052 release around the transaction as a whole. Hmm, that probably
6053 would't work properly since it only guards us against other
6054 threads doing CASs on the same location, not against other
6055 threads doing normal reads and writes.
6057 ------------------------------------------------------------
6059 COMMENT_ON_CasCmpEQ:
6061 Note two things. Firstly, in the sequence above, we compute
6062 "expected == old", but we don't check definedness of it. Why
6063 not? Also, the x86 and amd64 front ends use
6064 Iop_CasCmp{EQ,NE}{8,16,32,64} comparisons to make the equivalent
6065 determination (expected == old ?) for themselves, and we also
6066 don't check definedness for those primops; we just say that the
6067 result is defined. Why? Details follow.
6069 x86/amd64 contains various forms of locked insns:
6070 * lock prefix before all basic arithmetic insn;
6071 eg lock xorl %reg1,(%reg2)
6072 * atomic exchange reg-mem
6075 Rather than attempt to represent them all, which would be a
6076 royal PITA, I used a result from Maurice Herlihy
6077 (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
6078 demonstrates that compare-and-swap is a primitive more general
6079 than the other two, and so can be used to represent all of them.
6080 So the translation scheme for (eg) lock incl (%reg) is as
6086 atomically { if (* %reg == old) { * %reg = new } else { goto again } }
6088 The "atomically" is the CAS bit. The scheme is always the same:
6089 get old value from memory, compute new value, atomically stuff
6090 new value back in memory iff the old value has not changed (iow,
6091 no other thread modified it in the meantime). If it has changed
6092 then we've been out-raced and we have to start over.
6094 Now that's all very neat, but it has the bad side effect of
6095 introducing an explicit equality test into the translation.
6096 Consider the behaviour of said code on a memory location which
6097 is uninitialised. We will wind up doing a comparison on
6098 uninitialised data, and mc duly complains.
6100 What's difficult about this is, the common case is that the
6101 location is uncontended, and so we're usually comparing the same
6102 value (* %reg) with itself. So we shouldn't complain even if it
6103 is undefined. But mc doesn't know that.
6105 My solution is to mark the == in the IR specially, so as to tell
6106 mc that it almost certainly compares a value with itself, and we
6107 should just regard the result as always defined. Rather than
6108 add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
6109 Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
6111 So there's always the question of, can this give a false
6112 negative? eg, imagine that initially, * %reg is defined; and we
6113 read that; but then in the gap between the read and the CAS, a
6114 different thread writes an undefined (and different) value at
6115 the location. Then the CAS in this thread will fail and we will
6116 go back to "again:", but without knowing that the trip back
6117 there was based on an undefined comparison. No matter; at least
6118 the other thread won the race and the location is correctly
6119 marked as undefined. What if it wrote an uninitialised version
6120 of the same value that was there originally, though?
6122 etc etc. Seems like there's a small corner case in which we
6123 might lose the fact that something's defined -- we're out-raced
6124 in between the "old = * reg" and the "atomically {", _and_ the
6125 other thread is writing in an undefined version of what's
6126 already there. Well, that seems pretty unlikely.
6130 If we ever need to reinstate it .. code which generates a
6131 definedness test for "expected == old" was removed at r10432 of
6134 if (cas
->oldHi
== IRTemp_INVALID
) {
6135 do_shadow_CAS_single( mce
, cas
);
6137 do_shadow_CAS_double( mce
, cas
);
6142 static void do_shadow_CAS_single ( MCEnv
* mce
, IRCAS
* cas
)
6144 IRAtom
*vdataLo
= NULL
, *bdataLo
= NULL
;
6145 IRAtom
*vexpdLo
= NULL
, *bexpdLo
= NULL
;
6146 IRAtom
*voldLo
= NULL
, *boldLo
= NULL
;
6147 IRAtom
*expd_eq_old
= NULL
;
6151 Bool otrak
= MC_(clo_mc_level
) >= 3; /* a shorthand */
6154 tl_assert(cas
->oldHi
== IRTemp_INVALID
);
6155 tl_assert(cas
->expdHi
== NULL
);
6156 tl_assert(cas
->dataHi
== NULL
);
6158 elemTy
= typeOfIRExpr(mce
->sb
->tyenv
, cas
->expdLo
);
6160 case Ity_I8
: elemSzB
= 1; opCasCmpEQ
= Iop_CasCmpEQ8
; break;
6161 case Ity_I16
: elemSzB
= 2; opCasCmpEQ
= Iop_CasCmpEQ16
; break;
6162 case Ity_I32
: elemSzB
= 4; opCasCmpEQ
= Iop_CasCmpEQ32
; break;
6163 case Ity_I64
: elemSzB
= 8; opCasCmpEQ
= Iop_CasCmpEQ64
; break;
6164 default: tl_assert(0); /* IR defn disallows any other types */
6167 /* 1. fetch data# (the proposed new value) */
6168 tl_assert(isOriginalAtom(mce
, cas
->dataLo
));
6170 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->dataLo
, HuOth
));
6171 tl_assert(isShadowAtom(mce
, vdataLo
));
6174 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->dataLo
));
6175 tl_assert(isShadowAtom(mce
, bdataLo
));
6178 /* 2. fetch expected# (what we expect to see at the address) */
6179 tl_assert(isOriginalAtom(mce
, cas
->expdLo
));
6181 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->expdLo
, HuOth
));
6182 tl_assert(isShadowAtom(mce
, vexpdLo
));
6185 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->expdLo
));
6186 tl_assert(isShadowAtom(mce
, bexpdLo
));
6189 /* 3. check definedness of address */
6190 /* 4. fetch old# from shadow memory; this also checks
6191 addressibility of the address */
6197 cas
->end
, elemTy
, cas
->addr
, 0/*Addr bias*/,
6198 NULL
/*always happens*/
6200 bind_shadow_tmp_to_orig('V', mce
, mkexpr(cas
->oldLo
), voldLo
);
6203 = assignNew('B', mce
, Ity_I32
,
6204 gen_load_b(mce
, elemSzB
, cas
->addr
, 0/*addr bias*/));
6205 bind_shadow_tmp_to_orig('B', mce
, mkexpr(cas
->oldLo
), boldLo
);
6208 /* 5. the CAS itself */
6209 stmt( 'C', mce
, IRStmt_CAS(cas
) );
6211 /* 6. compute "expected == old" */
6212 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6213 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6214 tree, but it's not copied from the input block. */
6216 = assignNew('C', mce
, Ity_I1
,
6217 binop(opCasCmpEQ
, cas
->expdLo
, mkexpr(cas
->oldLo
)));
6219 /* 7. if "expected == old"
6220 store data# to shadow memory */
6221 do_shadow_Store( mce
, cas
->end
, cas
->addr
, 0/*bias*/,
6222 NULL
/*data*/, vdataLo
/*vdata*/,
6223 expd_eq_old
/*guard for store*/ );
6225 gen_store_b( mce
, elemSzB
, cas
->addr
, 0/*offset*/,
6227 expd_eq_old
/*guard for store*/ );
6232 static void do_shadow_CAS_double ( MCEnv
* mce
, IRCAS
* cas
)
6234 IRAtom
*vdataHi
= NULL
, *bdataHi
= NULL
;
6235 IRAtom
*vdataLo
= NULL
, *bdataLo
= NULL
;
6236 IRAtom
*vexpdHi
= NULL
, *bexpdHi
= NULL
;
6237 IRAtom
*vexpdLo
= NULL
, *bexpdLo
= NULL
;
6238 IRAtom
*voldHi
= NULL
, *boldHi
= NULL
;
6239 IRAtom
*voldLo
= NULL
, *boldLo
= NULL
;
6240 IRAtom
*xHi
= NULL
, *xLo
= NULL
, *xHL
= NULL
;
6241 IRAtom
*expd_eq_old
= NULL
, *zero
= NULL
;
6242 IROp opCasCmpEQ
, opOr
, opXor
;
6243 Int elemSzB
, memOffsLo
, memOffsHi
;
6245 Bool otrak
= MC_(clo_mc_level
) >= 3; /* a shorthand */
6248 tl_assert(cas
->oldHi
!= IRTemp_INVALID
);
6249 tl_assert(cas
->expdHi
!= NULL
);
6250 tl_assert(cas
->dataHi
!= NULL
);
6252 elemTy
= typeOfIRExpr(mce
->sb
->tyenv
, cas
->expdLo
);
6255 opCasCmpEQ
= Iop_CasCmpEQ8
; opOr
= Iop_Or8
; opXor
= Iop_Xor8
;
6256 elemSzB
= 1; zero
= mkU8(0);
6259 opCasCmpEQ
= Iop_CasCmpEQ16
; opOr
= Iop_Or16
; opXor
= Iop_Xor16
;
6260 elemSzB
= 2; zero
= mkU16(0);
6263 opCasCmpEQ
= Iop_CasCmpEQ32
; opOr
= Iop_Or32
; opXor
= Iop_Xor32
;
6264 elemSzB
= 4; zero
= mkU32(0);
6267 opCasCmpEQ
= Iop_CasCmpEQ64
; opOr
= Iop_Or64
; opXor
= Iop_Xor64
;
6268 elemSzB
= 8; zero
= mkU64(0);
6271 tl_assert(0); /* IR defn disallows any other types */
6274 /* 1. fetch data# (the proposed new value) */
6275 tl_assert(isOriginalAtom(mce
, cas
->dataHi
));
6276 tl_assert(isOriginalAtom(mce
, cas
->dataLo
));
6278 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->dataHi
, HuOth
));
6280 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->dataLo
, HuOth
));
6281 tl_assert(isShadowAtom(mce
, vdataHi
));
6282 tl_assert(isShadowAtom(mce
, vdataLo
));
6285 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->dataHi
));
6287 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->dataLo
));
6288 tl_assert(isShadowAtom(mce
, bdataHi
));
6289 tl_assert(isShadowAtom(mce
, bdataLo
));
6292 /* 2. fetch expected# (what we expect to see at the address) */
6293 tl_assert(isOriginalAtom(mce
, cas
->expdHi
));
6294 tl_assert(isOriginalAtom(mce
, cas
->expdLo
));
6296 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->expdHi
, HuOth
));
6298 = assignNew('V', mce
, elemTy
, expr2vbits(mce
, cas
->expdLo
, HuOth
));
6299 tl_assert(isShadowAtom(mce
, vexpdHi
));
6300 tl_assert(isShadowAtom(mce
, vexpdLo
));
6303 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->expdHi
));
6305 = assignNew('B', mce
, Ity_I32
, schemeE(mce
, cas
->expdLo
));
6306 tl_assert(isShadowAtom(mce
, bexpdHi
));
6307 tl_assert(isShadowAtom(mce
, bexpdLo
));
6310 /* 3. check definedness of address */
6311 /* 4. fetch old# from shadow memory; this also checks
6312 addressibility of the address */
6313 if (cas
->end
== Iend_LE
) {
6315 memOffsHi
= elemSzB
;
6317 tl_assert(cas
->end
== Iend_BE
);
6318 memOffsLo
= elemSzB
;
6326 cas
->end
, elemTy
, cas
->addr
, memOffsHi
/*Addr bias*/,
6327 NULL
/*always happens*/
6334 cas
->end
, elemTy
, cas
->addr
, memOffsLo
/*Addr bias*/,
6335 NULL
/*always happens*/
6337 bind_shadow_tmp_to_orig('V', mce
, mkexpr(cas
->oldHi
), voldHi
);
6338 bind_shadow_tmp_to_orig('V', mce
, mkexpr(cas
->oldLo
), voldLo
);
6341 = assignNew('B', mce
, Ity_I32
,
6342 gen_load_b(mce
, elemSzB
, cas
->addr
,
6343 memOffsHi
/*addr bias*/));
6345 = assignNew('B', mce
, Ity_I32
,
6346 gen_load_b(mce
, elemSzB
, cas
->addr
,
6347 memOffsLo
/*addr bias*/));
6348 bind_shadow_tmp_to_orig('B', mce
, mkexpr(cas
->oldHi
), boldHi
);
6349 bind_shadow_tmp_to_orig('B', mce
, mkexpr(cas
->oldLo
), boldLo
);
6352 /* 5. the CAS itself */
6353 stmt( 'C', mce
, IRStmt_CAS(cas
) );
6355 /* 6. compute "expected == old" */
6356 /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
6357 /* Note that 'C' is kinda faking it; it is indeed a non-shadow
6358 tree, but it's not copied from the input block. */
6360 xHi = oldHi ^ expdHi;
6361 xLo = oldLo ^ expdLo;
6363 expd_eq_old = xHL == 0;
6365 xHi
= assignNew('C', mce
, elemTy
,
6366 binop(opXor
, cas
->expdHi
, mkexpr(cas
->oldHi
)));
6367 xLo
= assignNew('C', mce
, elemTy
,
6368 binop(opXor
, cas
->expdLo
, mkexpr(cas
->oldLo
)));
6369 xHL
= assignNew('C', mce
, elemTy
,
6370 binop(opOr
, xHi
, xLo
));
6372 = assignNew('C', mce
, Ity_I1
,
6373 binop(opCasCmpEQ
, xHL
, zero
));
6375 /* 7. if "expected == old"
6376 store data# to shadow memory */
6377 do_shadow_Store( mce
, cas
->end
, cas
->addr
, memOffsHi
/*bias*/,
6378 NULL
/*data*/, vdataHi
/*vdata*/,
6379 expd_eq_old
/*guard for store*/ );
6380 do_shadow_Store( mce
, cas
->end
, cas
->addr
, memOffsLo
/*bias*/,
6381 NULL
/*data*/, vdataLo
/*vdata*/,
6382 expd_eq_old
/*guard for store*/ );
6384 gen_store_b( mce
, elemSzB
, cas
->addr
, memOffsHi
/*offset*/,
6386 expd_eq_old
/*guard for store*/ );
6387 gen_store_b( mce
, elemSzB
, cas
->addr
, memOffsLo
/*offset*/,
6389 expd_eq_old
/*guard for store*/ );
6394 /* ------ Dealing with LL/SC (not difficult) ------ */
6396 static void do_shadow_LLSC ( MCEnv
* mce
,
6400 IRExpr
* stStoredata
)
6402 /* In short: treat a load-linked like a normal load followed by an
6403 assignment of the loaded (shadow) data to the result temporary.
6404 Treat a store-conditional like a normal store, and mark the
6405 result temporary as defined. */
6406 IRType resTy
= typeOfIRTemp(mce
->sb
->tyenv
, stResult
);
6407 IRTemp resTmp
= findShadowTmpV(mce
, stResult
);
6409 tl_assert(isIRAtom(stAddr
));
6411 tl_assert(isIRAtom(stStoredata
));
6413 if (stStoredata
== NULL
) {
6415 /* Just treat this as a normal load, followed by an assignment of
6416 the value to .result. */
6418 tl_assert(resTy
== Ity_I64
|| resTy
== Ity_I32
6419 || resTy
== Ity_I16
|| resTy
== Ity_I8
);
6420 assign( 'V', mce
, resTmp
,
6422 mce
, stEnd
, resTy
, stAddr
, 0/*addr bias*/,
6423 NULL
/*always happens*/) );
6425 /* Store Conditional */
6427 IRType dataTy
= typeOfIRExpr(mce
->sb
->tyenv
,
6429 tl_assert(dataTy
== Ity_I64
|| dataTy
== Ity_I32
6430 || dataTy
== Ity_I16
|| dataTy
== Ity_I8
);
6431 do_shadow_Store( mce
, stEnd
,
6432 stAddr
, 0/* addr bias */,
6434 NULL
/* shadow data */,
6436 /* This is a store conditional, so it writes to .result a value
6437 indicating whether or not the store succeeded. Just claim
6438 this value is always defined. In the PowerPC interpretation
6439 of store-conditional, definedness of the success indication
6440 depends on whether the address of the store matches the
6441 reservation address. But we can't tell that here (and
6442 anyway, we're not being PowerPC-specific). At least we are
6443 guaranteed that the definedness of the store address, and its
6444 addressibility, will be checked as per normal. So it seems
6445 pretty safe to just say that the success indication is always
6448 In schemeS, for origin tracking, we must correspondingly set
6449 a no-origin value for the origin shadow of .result.
6451 tl_assert(resTy
== Ity_I1
);
6452 assign( 'V', mce
, resTmp
, definedOfType(resTy
) );
6457 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
6459 static void do_shadow_StoreG ( MCEnv
* mce
, IRStoreG
* sg
)
6461 complainIfUndefined(mce
, sg
->guard
, NULL
);
6462 /* do_shadow_Store will generate code to check the definedness and
6463 validity of sg->addr, in the case where sg->guard evaluates to
6464 True at run-time. */
6465 do_shadow_Store( mce
, sg
->end
,
6466 sg
->addr
, 0/* addr bias */,
6468 NULL
/* shadow data */,
6472 static void do_shadow_LoadG ( MCEnv
* mce
, IRLoadG
* lg
)
6474 complainIfUndefined(mce
, lg
->guard
, NULL
);
6475 /* expr2vbits_Load_guarded_General will generate code to check the
6476 definedness and validity of lg->addr, in the case where
6477 lg->guard evaluates to True at run-time. */
6479 /* Look at the LoadG's built-in conversion operation, to determine
6480 the source (actual loaded data) type, and the equivalent IROp.
6481 NOTE that implicitly we are taking a widening operation to be
6482 applied to original atoms and producing one that applies to V
6483 bits. Since signed and unsigned widening are self-shadowing,
6484 this is a straight copy of the op (modulo swapping from the
6485 IRLoadGOp form to the IROp form). Note also therefore that this
6486 implicitly duplicates the logic to do with said widening ops in
6487 expr2vbits_Unop. See comment at the start of expr2vbits_Unop. */
6488 IROp vwiden
= Iop_INVALID
;
6489 IRType loadedTy
= Ity_INVALID
;
6491 case ILGop_IdentV128
: loadedTy
= Ity_V128
; vwiden
= Iop_INVALID
; break;
6492 case ILGop_Ident64
: loadedTy
= Ity_I64
; vwiden
= Iop_INVALID
; break;
6493 case ILGop_Ident32
: loadedTy
= Ity_I32
; vwiden
= Iop_INVALID
; break;
6494 case ILGop_16Uto32
: loadedTy
= Ity_I16
; vwiden
= Iop_16Uto32
; break;
6495 case ILGop_16Sto32
: loadedTy
= Ity_I16
; vwiden
= Iop_16Sto32
; break;
6496 case ILGop_8Uto32
: loadedTy
= Ity_I8
; vwiden
= Iop_8Uto32
; break;
6497 case ILGop_8Sto32
: loadedTy
= Ity_I8
; vwiden
= Iop_8Sto32
; break;
6498 default: VG_(tool_panic
)("do_shadow_LoadG");
6502 = expr2vbits( mce
, lg
->alt
, HuOth
);
6504 = expr2vbits_Load_guarded_General(mce
, lg
->end
, loadedTy
,
6505 lg
->addr
, 0/*addr bias*/,
6506 lg
->guard
, vwiden
, vbits_alt
);
6507 /* And finally, bind the V bits to the destination temporary. */
6508 assign( 'V', mce
, findShadowTmpV(mce
, lg
->dst
), vbits_final
);
6512 /*------------------------------------------------------------*/
6513 /*--- Origin tracking stuff ---*/
6514 /*------------------------------------------------------------*/
6516 /* Almost identical to findShadowTmpV. */
6517 static IRTemp
findShadowTmpB ( MCEnv
* mce
, IRTemp orig
)
6520 /* VG_(indexXA) range-checks 'orig', hence no need to check
6522 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
6523 tl_assert(ent
->kind
== Orig
);
6524 if (ent
->shadowB
== IRTemp_INVALID
) {
6526 = newTemp( mce
, Ity_I32
, BSh
);
6527 /* newTemp may cause mce->tmpMap to resize, hence previous results
6528 from VG_(indexXA) are invalid. */
6529 ent
= (TempMapEnt
*)VG_(indexXA
)( mce
->tmpMap
, (Word
)orig
);
6530 tl_assert(ent
->kind
== Orig
);
6531 tl_assert(ent
->shadowB
== IRTemp_INVALID
);
6532 ent
->shadowB
= tmpB
;
6534 return ent
->shadowB
;
6537 static IRAtom
* gen_maxU32 ( MCEnv
* mce
, IRAtom
* b1
, IRAtom
* b2
)
6539 return assignNew( 'B', mce
, Ity_I32
, binop(Iop_Max32U
, b1
, b2
) );
6543 /* Make a guarded origin load, with no special handling in the
6544 didn't-happen case. A GUARD of NULL is assumed to mean "always
6547 Generate IR to do a shadow origins load from BASEADDR+OFFSET and
6548 return the otag. The loaded size is SZB. If GUARD evaluates to
6549 False at run time then the returned otag is zero.
6551 static IRAtom
* gen_guarded_load_b ( MCEnv
* mce
, Int szB
,
6553 Int offset
, IRExpr
* guard
)
6559 IRType aTy
= typeOfIRExpr( mce
->sb
->tyenv
, baseaddr
);
6560 IROp opAdd
= aTy
== Ity_I32
? Iop_Add32
: Iop_Add64
;
6561 IRAtom
* ea
= baseaddr
;
6563 IRAtom
* off
= aTy
== Ity_I32
? mkU32( offset
)
6564 : mkU64( (Long
)(Int
)offset
);
6565 ea
= assignNew( 'B', mce
, aTy
, binop(opAdd
, ea
, off
));
6567 bTmp
= newTemp(mce
, mce
->hWordTy
, BSh
);
6570 case 1: hFun
= (void*)&MC_(helperc_b_load1
);
6571 hName
= "MC_(helperc_b_load1)";
6573 case 2: hFun
= (void*)&MC_(helperc_b_load2
);
6574 hName
= "MC_(helperc_b_load2)";
6576 case 4: hFun
= (void*)&MC_(helperc_b_load4
);
6577 hName
= "MC_(helperc_b_load4)";
6579 case 8: hFun
= (void*)&MC_(helperc_b_load8
);
6580 hName
= "MC_(helperc_b_load8)";
6582 case 16: hFun
= (void*)&MC_(helperc_b_load16
);
6583 hName
= "MC_(helperc_b_load16)";
6585 case 32: hFun
= (void*)&MC_(helperc_b_load32
);
6586 hName
= "MC_(helperc_b_load32)";
6589 VG_(printf
)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB
);
6592 di
= unsafeIRDirty_1_N(
6593 bTmp
, 1/*regparms*/, hName
, VG_(fnptr_to_fnentry
)( hFun
),
6598 /* Ideally the didn't-happen return value here would be
6599 all-zeroes (unknown-origin), so it'd be harmless if it got
6600 used inadvertently. We slum it out with the IR-mandated
6601 default value (0b01 repeating, 0x55 etc) as that'll probably
6602 trump all legitimate otags via Max32, and it's pretty
6605 /* no need to mess with any annotations. This call accesses
6606 neither guest state nor guest memory. */
6607 stmt( 'B', mce
, IRStmt_Dirty(di
) );
6608 if (mce
->hWordTy
== Ity_I64
) {
6610 IRTemp bTmp32
= newTemp(mce
, Ity_I32
, BSh
);
6611 assign( 'B', mce
, bTmp32
, unop(Iop_64to32
, mkexpr(bTmp
)) );
6612 return mkexpr(bTmp32
);
6615 return mkexpr(bTmp
);
6620 /* Generate IR to do a shadow origins load from BASEADDR+OFFSET. The
6621 loaded size is SZB. The load is regarded as unconditional (always
6624 static IRAtom
* gen_load_b ( MCEnv
* mce
, Int szB
, IRAtom
* baseaddr
,
6627 return gen_guarded_load_b(mce
, szB
, baseaddr
, offset
, NULL
/*guard*/);
6631 /* The most general handler for guarded origin loads. A GUARD of NULL
6632 is assumed to mean "always True".
6634 Generate IR to do a shadow origin load from ADDR+BIAS and return
6635 the B bits. The loaded type is TY. If GUARD evaluates to False at
6636 run time then the returned B bits are simply BALT instead.
6639 IRAtom
* expr2ori_Load_guarded_General ( MCEnv
* mce
,
6641 IRAtom
* addr
, UInt bias
,
6642 IRAtom
* guard
, IRAtom
* balt
)
6644 /* If the guard evaluates to True, this will hold the loaded
6645 origin. If the guard evaluates to False, this will be zero,
6646 meaning "unknown origin", in which case we will have to replace
6647 it using an ITE below. */
6649 = assignNew('B', mce
, Ity_I32
,
6650 gen_guarded_load_b(mce
, sizeofIRType(ty
),
6651 addr
, bias
, guard
));
6652 /* These are the bits we will return if the load doesn't take
6656 /* Prepare the cond for the ITE. Convert a NULL cond into
6657 something that iropt knows how to fold out later. */
6659 = guard
== NULL
? mkU1(1) : guard
;
6660 /* And assemble the final result. */
6661 return assignNew('B', mce
, Ity_I32
, IRExpr_ITE(cond
, iftrue
, iffalse
));
6665 /* Generate a shadow origins store. guard :: Ity_I1 controls whether
6666 the store really happens; NULL means it unconditionally does. */
6667 static void gen_store_b ( MCEnv
* mce
, Int szB
,
6668 IRAtom
* baseaddr
, Int offset
, IRAtom
* dataB
,
6674 IRType aTy
= typeOfIRExpr( mce
->sb
->tyenv
, baseaddr
);
6675 IROp opAdd
= aTy
== Ity_I32
? Iop_Add32
: Iop_Add64
;
6676 IRAtom
* ea
= baseaddr
;
6678 tl_assert(isOriginalAtom(mce
, guard
));
6679 tl_assert(typeOfIRExpr(mce
->sb
->tyenv
, guard
) == Ity_I1
);
6682 IRAtom
* off
= aTy
== Ity_I32
? mkU32( offset
)
6683 : mkU64( (Long
)(Int
)offset
);
6684 ea
= assignNew( 'B', mce
, aTy
, binop(opAdd
, ea
, off
));
6686 if (mce
->hWordTy
== Ity_I64
)
6687 dataB
= assignNew( 'B', mce
, Ity_I64
, unop(Iop_32Uto64
, dataB
));
6690 case 1: hFun
= (void*)&MC_(helperc_b_store1
);
6691 hName
= "MC_(helperc_b_store1)";
6693 case 2: hFun
= (void*)&MC_(helperc_b_store2
);
6694 hName
= "MC_(helperc_b_store2)";
6696 case 4: hFun
= (void*)&MC_(helperc_b_store4
);
6697 hName
= "MC_(helperc_b_store4)";
6699 case 8: hFun
= (void*)&MC_(helperc_b_store8
);
6700 hName
= "MC_(helperc_b_store8)";
6702 case 16: hFun
= (void*)&MC_(helperc_b_store16
);
6703 hName
= "MC_(helperc_b_store16)";
6705 case 32: hFun
= (void*)&MC_(helperc_b_store32
);
6706 hName
= "MC_(helperc_b_store32)";
6711 di
= unsafeIRDirty_0_N( 2/*regparms*/,
6712 hName
, VG_(fnptr_to_fnentry
)( hFun
),
6713 mkIRExprVec_2( ea
, dataB
)
6715 /* no need to mess with any annotations. This call accesses
6716 neither guest state nor guest memory. */
6717 if (guard
) di
->guard
= guard
;
6718 stmt( 'B', mce
, IRStmt_Dirty(di
) );
6721 static IRAtom
* narrowTo32 ( MCEnv
* mce
, IRAtom
* e
) {
6722 IRType eTy
= typeOfIRExpr(mce
->sb
->tyenv
, e
);
6724 return assignNew( 'B', mce
, Ity_I32
, unop(Iop_64to32
, e
) );
6730 static IRAtom
* zWidenFrom32 ( MCEnv
* mce
, IRType dstTy
, IRAtom
* e
) {
6731 IRType eTy
= typeOfIRExpr(mce
->sb
->tyenv
, e
);
6732 tl_assert(eTy
== Ity_I32
);
6733 if (dstTy
== Ity_I64
)
6734 return assignNew( 'B', mce
, Ity_I64
, unop(Iop_32Uto64
, e
) );
6739 static IRAtom
* schemeE ( MCEnv
* mce
, IRExpr
* e
)
6741 tl_assert(MC_(clo_mc_level
) == 3);
6746 IRRegArray
* descr_b
;
6747 IRAtom
*t1
, *t2
, *t3
, *t4
;
6748 IRRegArray
* descr
= e
->Iex
.GetI
.descr
;
6750 = MC_(get_otrack_reg_array_equiv_int_type
)(descr
);
6751 /* If this array is unshadowable for whatever reason, use the
6752 usual approximation. */
6753 if (equivIntTy
== Ity_INVALID
)
6755 tl_assert(sizeofIRType(equivIntTy
) >= 4);
6756 tl_assert(sizeofIRType(equivIntTy
) == sizeofIRType(descr
->elemTy
));
6757 descr_b
= mkIRRegArray( descr
->base
+ 2*mce
->layout
->total_sizeB
,
6758 equivIntTy
, descr
->nElems
);
6759 /* Do a shadow indexed get of the same size, giving t1. Take
6760 the bottom 32 bits of it, giving t2. Compute into t3 the
6761 origin for the index (almost certainly zero, but there's
6762 no harm in being completely general here, since iropt will
6763 remove any useless code), and fold it in, giving a final
6765 t1
= assignNew( 'B', mce
, equivIntTy
,
6766 IRExpr_GetI( descr_b
, e
->Iex
.GetI
.ix
,
6767 e
->Iex
.GetI
.bias
));
6768 t2
= narrowTo32( mce
, t1
);
6769 t3
= schemeE( mce
, e
->Iex
.GetI
.ix
);
6770 t4
= gen_maxU32( mce
, t2
, t3
);
6776 IRExpr
** args
= e
->Iex
.CCall
.args
;
6777 IRAtom
* curr
= mkU32(0);
6778 for (i
= 0; args
[i
]; i
++) {
6780 tl_assert(isOriginalAtom(mce
, args
[i
]));
6781 /* Only take notice of this arg if the callee's
6782 mc-exclusion mask does not say it is to be excluded. */
6783 if (e
->Iex
.CCall
.cee
->mcx_mask
& (1<<i
)) {
6784 /* the arg is to be excluded from definedness checking.
6786 if (0) VG_(printf
)("excluding %s(%d)\n",
6787 e
->Iex
.CCall
.cee
->name
, i
);
6789 /* calculate the arg's definedness, and pessimistically
6791 here
= schemeE( mce
, args
[i
] );
6792 curr
= gen_maxU32( mce
, curr
, here
);
6799 dszB
= sizeofIRType(e
->Iex
.Load
.ty
);
6800 /* assert that the B value for the address is already
6801 available (somewhere) */
6802 tl_assert(isIRAtom(e
->Iex
.Load
.addr
));
6803 tl_assert(mce
->hWordTy
== Ity_I32
|| mce
->hWordTy
== Ity_I64
);
6804 return gen_load_b( mce
, dszB
, e
->Iex
.Load
.addr
, 0 );
6807 IRAtom
* b1
= schemeE( mce
, e
->Iex
.ITE
.cond
);
6808 IRAtom
* b3
= schemeE( mce
, e
->Iex
.ITE
.iftrue
);
6809 IRAtom
* b2
= schemeE( mce
, e
->Iex
.ITE
.iffalse
);
6810 return gen_maxU32( mce
, b1
, gen_maxU32( mce
, b2
, b3
));
6813 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Qop
.details
->arg1
);
6814 IRAtom
* b2
= schemeE( mce
, e
->Iex
.Qop
.details
->arg2
);
6815 IRAtom
* b3
= schemeE( mce
, e
->Iex
.Qop
.details
->arg3
);
6816 IRAtom
* b4
= schemeE( mce
, e
->Iex
.Qop
.details
->arg4
);
6817 return gen_maxU32( mce
, gen_maxU32( mce
, b1
, b2
),
6818 gen_maxU32( mce
, b3
, b4
) );
6821 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Triop
.details
->arg1
);
6822 IRAtom
* b2
= schemeE( mce
, e
->Iex
.Triop
.details
->arg2
);
6823 IRAtom
* b3
= schemeE( mce
, e
->Iex
.Triop
.details
->arg3
);
6824 return gen_maxU32( mce
, b1
, gen_maxU32( mce
, b2
, b3
) );
6827 switch (e
->Iex
.Binop
.op
) {
6828 case Iop_CasCmpEQ8
: case Iop_CasCmpNE8
:
6829 case Iop_CasCmpEQ16
: case Iop_CasCmpNE16
:
6830 case Iop_CasCmpEQ32
: case Iop_CasCmpNE32
:
6831 case Iop_CasCmpEQ64
: case Iop_CasCmpNE64
:
6832 /* Just say these all produce a defined result,
6833 regardless of their arguments. See
6834 COMMENT_ON_CasCmpEQ in this file. */
6837 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Binop
.arg1
);
6838 IRAtom
* b2
= schemeE( mce
, e
->Iex
.Binop
.arg2
);
6839 return gen_maxU32( mce
, b1
, b2
);
6846 IRAtom
* b1
= schemeE( mce
, e
->Iex
.Unop
.arg
);
6852 return mkexpr( findShadowTmpB( mce
, e
->Iex
.RdTmp
.tmp
));
6854 Int b_offset
= MC_(get_otrack_shadow_offset
)(
6856 sizeofIRType(e
->Iex
.Get
.ty
)
6858 tl_assert(b_offset
>= -1
6859 && b_offset
<= mce
->layout
->total_sizeB
-4);
6860 if (b_offset
>= 0) {
6861 /* FIXME: this isn't an atom! */
6862 return IRExpr_Get( b_offset
+ 2*mce
->layout
->total_sizeB
,
6868 VG_(printf
)("mc_translate.c: schemeE: unhandled: ");
6870 VG_(tool_panic
)("memcheck:schemeE");
6875 static void do_origins_Dirty ( MCEnv
* mce
, IRDirty
* d
)
6877 // This is a hacked version of do_shadow_Dirty
6878 Int i
, k
, n
, toDo
, gSz
, gOff
;
6879 IRAtom
*here
, *curr
;
6882 /* First check the guard. */
6883 curr
= schemeE( mce
, d
->guard
);
6885 /* Now round up all inputs and maxU32 over them. */
6887 /* Inputs: unmasked args
6888 Note: arguments are evaluated REGARDLESS of the guard expression */
6889 for (i
= 0; d
->args
[i
]; i
++) {
6890 IRAtom
* arg
= d
->args
[i
];
6891 if ( (d
->cee
->mcx_mask
& (1<<i
))
6892 || UNLIKELY(is_IRExpr_VECRET_or_GSPTR(arg
)) ) {
6893 /* ignore this arg */
6895 here
= schemeE( mce
, arg
);
6896 curr
= gen_maxU32( mce
, curr
, here
);
6900 /* Inputs: guest state that we read. */
6901 for (i
= 0; i
< d
->nFxState
; i
++) {
6902 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
6903 if (d
->fxState
[i
].fx
== Ifx_Write
)
6906 /* Enumerate the described state segments */
6907 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
6908 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
6909 gSz
= d
->fxState
[i
].size
;
6911 /* Ignore any sections marked as 'always defined'. */
6912 if (isAlwaysDefd(mce
, gOff
, gSz
)) {
6914 VG_(printf
)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6919 /* This state element is read or modified. So we need to
6920 consider it. If larger than 4 bytes, deal with it in
6924 tl_assert(gSz
>= 0);
6925 if (gSz
== 0) break;
6926 n
= gSz
<= 4 ? gSz
: 4;
6927 /* update 'curr' with maxU32 of the state slice
6929 b_offset
= MC_(get_otrack_shadow_offset
)(gOff
, 4);
6930 if (b_offset
!= -1) {
6931 /* Observe the guard expression. If it is false use 0, i.e.
6932 nothing is known about the origin */
6933 IRAtom
*cond
, *iffalse
, *iftrue
;
6935 cond
= assignNew( 'B', mce
, Ity_I1
, d
->guard
);
6937 iftrue
= assignNew( 'B', mce
, Ity_I32
,
6939 + 2*mce
->layout
->total_sizeB
,
6941 here
= assignNew( 'B', mce
, Ity_I32
,
6942 IRExpr_ITE(cond
, iftrue
, iffalse
));
6943 curr
= gen_maxU32( mce
, curr
, here
);
6951 /* Inputs: memory */
6953 if (d
->mFx
!= Ifx_None
) {
6954 /* Because we may do multiple shadow loads/stores from the same
6955 base address, it's best to do a single test of its
6956 definedness right now. Post-instrumentation optimisation
6957 should remove all but this test. */
6958 tl_assert(d
->mAddr
);
6959 here
= schemeE( mce
, d
->mAddr
);
6960 curr
= gen_maxU32( mce
, curr
, here
);
6963 /* Deal with memory inputs (reads or modifies) */
6964 if (d
->mFx
== Ifx_Read
|| d
->mFx
== Ifx_Modify
) {
6966 /* chew off 32-bit chunks. We don't care about the endianness
6967 since it's all going to be condensed down to a single bit,
6968 but nevertheless choose an endianness which is hopefully
6969 native to the platform. */
6971 here
= gen_guarded_load_b( mce
, 4, d
->mAddr
, d
->mSize
- toDo
,
6973 curr
= gen_maxU32( mce
, curr
, here
);
6976 /* handle possible 16-bit excess */
6978 here
= gen_guarded_load_b( mce
, 2, d
->mAddr
, d
->mSize
- toDo
,
6980 curr
= gen_maxU32( mce
, curr
, here
);
6983 /* chew off the remaining 8-bit chunk, if any */
6985 here
= gen_guarded_load_b( mce
, 1, d
->mAddr
, d
->mSize
- toDo
,
6987 curr
= gen_maxU32( mce
, curr
, here
);
6990 tl_assert(toDo
== 0);
6993 /* Whew! So curr is a 32-bit B-value which should give an origin
6994 of some use if any of the inputs to the helper are undefined.
6995 Now we need to re-distribute the results to all destinations. */
6997 /* Outputs: the destination temporary, if there is one. */
6998 if (d
->tmp
!= IRTemp_INVALID
) {
6999 dst
= findShadowTmpB(mce
, d
->tmp
);
7000 assign( 'V', mce
, dst
, curr
);
7003 /* Outputs: guest state that we write or modify. */
7004 for (i
= 0; i
< d
->nFxState
; i
++) {
7005 tl_assert(d
->fxState
[i
].fx
!= Ifx_None
);
7006 if (d
->fxState
[i
].fx
== Ifx_Read
)
7009 /* Enumerate the described state segments */
7010 for (k
= 0; k
< 1 + d
->fxState
[i
].nRepeats
; k
++) {
7011 gOff
= d
->fxState
[i
].offset
+ k
* d
->fxState
[i
].repeatLen
;
7012 gSz
= d
->fxState
[i
].size
;
7014 /* Ignore any sections marked as 'always defined'. */
7015 if (isAlwaysDefd(mce
, gOff
, gSz
))
7018 /* This state element is written or modified. So we need to
7019 consider it. If larger than 4 bytes, deal with it in
7023 tl_assert(gSz
>= 0);
7024 if (gSz
== 0) break;
7025 n
= gSz
<= 4 ? gSz
: 4;
7026 /* Write 'curr' to the state slice gOff .. gOff+n-1 */
7027 b_offset
= MC_(get_otrack_shadow_offset
)(gOff
, 4);
7028 if (b_offset
!= -1) {
7030 /* If the guard expression evaluates to false we simply Put
7031 the value that is already stored in the guest state slot */
7032 IRAtom
*cond
, *iffalse
;
7034 cond
= assignNew('B', mce
, Ity_I1
,
7036 iffalse
= assignNew('B', mce
, Ity_I32
,
7037 IRExpr_Get(b_offset
+
7038 2*mce
->layout
->total_sizeB
,
7040 curr
= assignNew('V', mce
, Ity_I32
,
7041 IRExpr_ITE(cond
, curr
, iffalse
));
7043 stmt( 'B', mce
, IRStmt_Put(b_offset
7044 + 2*mce
->layout
->total_sizeB
,
7053 /* Outputs: memory that we write or modify. Same comments about
7054 endianness as above apply. */
7055 if (d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
) {
7057 /* chew off 32-bit chunks */
7059 gen_store_b( mce
, 4, d
->mAddr
, d
->mSize
- toDo
, curr
,
7063 /* handle possible 16-bit excess */
7065 gen_store_b( mce
, 2, d
->mAddr
, d
->mSize
- toDo
, curr
,
7069 /* chew off the remaining 8-bit chunk, if any */
7071 gen_store_b( mce
, 1, d
->mAddr
, d
->mSize
- toDo
, curr
,
7075 tl_assert(toDo
== 0);
7080 /* Generate IR for origin shadowing for a general guarded store. */
7081 static void do_origins_Store_guarded ( MCEnv
* mce
,
7089 /* assert that the B value for the address is already available
7090 (somewhere), since the call to schemeE will want to see it.
7091 XXXX how does this actually ensure that?? */
7092 tl_assert(isIRAtom(stAddr
));
7093 tl_assert(isIRAtom(stData
));
7094 dszB
= sizeofIRType( typeOfIRExpr(mce
->sb
->tyenv
, stData
) );
7095 dataB
= schemeE( mce
, stData
);
7096 gen_store_b( mce
, dszB
, stAddr
, 0/*offset*/, dataB
, guard
);
7100 /* Generate IR for origin shadowing for a plain store. */
7101 static void do_origins_Store_plain ( MCEnv
* mce
,
7106 do_origins_Store_guarded ( mce
, stEnd
, stAddr
, stData
,
7111 /* ---- Dealing with LoadG/StoreG (not entirely simple) ---- */
7113 static void do_origins_StoreG ( MCEnv
* mce
, IRStoreG
* sg
)
7115 do_origins_Store_guarded( mce
, sg
->end
, sg
->addr
,
7116 sg
->data
, sg
->guard
);
7119 static void do_origins_LoadG ( MCEnv
* mce
, IRLoadG
* lg
)
7121 IRType loadedTy
= Ity_INVALID
;
7123 case ILGop_IdentV128
: loadedTy
= Ity_V128
; break;
7124 case ILGop_Ident64
: loadedTy
= Ity_I64
; break;
7125 case ILGop_Ident32
: loadedTy
= Ity_I32
; break;
7126 case ILGop_16Uto32
: loadedTy
= Ity_I16
; break;
7127 case ILGop_16Sto32
: loadedTy
= Ity_I16
; break;
7128 case ILGop_8Uto32
: loadedTy
= Ity_I8
; break;
7129 case ILGop_8Sto32
: loadedTy
= Ity_I8
; break;
7130 default: VG_(tool_panic
)("schemeS.IRLoadG");
7133 = schemeE( mce
,lg
->alt
);
7135 = expr2ori_Load_guarded_General(mce
, loadedTy
,
7136 lg
->addr
, 0/*addr bias*/,
7137 lg
->guard
, ori_alt
);
7138 /* And finally, bind the origin to the destination temporary. */
7139 assign( 'B', mce
, findShadowTmpB(mce
, lg
->dst
), ori_final
);
7143 static void schemeS ( MCEnv
* mce
, IRStmt
* st
)
7145 tl_assert(MC_(clo_mc_level
) == 3);
7150 /* The value-check instrumenter handles this - by arranging
7151 to pass the address of the next instruction to
7152 MC_(helperc_MAKE_STACK_UNINIT). This is all that needs to
7153 happen for origin tracking w.r.t. AbiHints. So there is
7154 nothing to do here. */
7158 IRPutI
*puti
= st
->Ist
.PutI
.details
;
7159 IRRegArray
* descr_b
;
7160 IRAtom
*t1
, *t2
, *t3
, *t4
;
7161 IRRegArray
* descr
= puti
->descr
;
7163 = MC_(get_otrack_reg_array_equiv_int_type
)(descr
);
7164 /* If this array is unshadowable for whatever reason,
7165 generate no code. */
7166 if (equivIntTy
== Ity_INVALID
)
7168 tl_assert(sizeofIRType(equivIntTy
) >= 4);
7169 tl_assert(sizeofIRType(equivIntTy
) == sizeofIRType(descr
->elemTy
));
7171 = mkIRRegArray( descr
->base
+ 2*mce
->layout
->total_sizeB
,
7172 equivIntTy
, descr
->nElems
);
7173 /* Compute a value to Put - the conjoinment of the origin for
7174 the data to be Put-ted (obviously) and of the index value
7175 (not so obviously). */
7176 t1
= schemeE( mce
, puti
->data
);
7177 t2
= schemeE( mce
, puti
->ix
);
7178 t3
= gen_maxU32( mce
, t1
, t2
);
7179 t4
= zWidenFrom32( mce
, equivIntTy
, t3
);
7180 stmt( 'B', mce
, IRStmt_PutI( mkIRPutI(descr_b
, puti
->ix
,
7186 do_origins_Dirty( mce
, st
->Ist
.Dirty
.details
);
7190 do_origins_Store_plain( mce
, st
->Ist
.Store
.end
,
7192 st
->Ist
.Store
.data
);
7196 do_origins_StoreG( mce
, st
->Ist
.StoreG
.details
);
7200 do_origins_LoadG( mce
, st
->Ist
.LoadG
.details
);
7204 /* In short: treat a load-linked like a normal load followed
7205 by an assignment of the loaded (shadow) data the result
7206 temporary. Treat a store-conditional like a normal store,
7207 and mark the result temporary as defined. */
7208 if (st
->Ist
.LLSC
.storedata
== NULL
) {
7211 = typeOfIRTemp(mce
->sb
->tyenv
, st
->Ist
.LLSC
.result
);
7213 = IRExpr_Load(st
->Ist
.LLSC
.end
, resTy
, st
->Ist
.LLSC
.addr
);
7214 tl_assert(resTy
== Ity_I64
|| resTy
== Ity_I32
7215 || resTy
== Ity_I16
|| resTy
== Ity_I8
);
7216 assign( 'B', mce
, findShadowTmpB(mce
, st
->Ist
.LLSC
.result
),
7217 schemeE(mce
, vanillaLoad
));
7219 /* Store conditional */
7220 do_origins_Store_plain( mce
, st
->Ist
.LLSC
.end
,
7222 st
->Ist
.LLSC
.storedata
);
7223 /* For the rationale behind this, see comments at the
7224 place where the V-shadow for .result is constructed, in
7225 do_shadow_LLSC. In short, we regard .result as
7227 assign( 'B', mce
, findShadowTmpB(mce
, st
->Ist
.LLSC
.result
),
7235 = MC_(get_otrack_shadow_offset
)(
7237 sizeofIRType(typeOfIRExpr(mce
->sb
->tyenv
, st
->Ist
.Put
.data
))
7239 if (b_offset
>= 0) {
7240 /* FIXME: this isn't an atom! */
7241 stmt( 'B', mce
, IRStmt_Put(b_offset
+ 2*mce
->layout
->total_sizeB
,
7242 schemeE( mce
, st
->Ist
.Put
.data
)) );
7248 assign( 'B', mce
, findShadowTmpB(mce
, st
->Ist
.WrTmp
.tmp
),
7249 schemeE(mce
, st
->Ist
.WrTmp
.data
) );
7259 VG_(printf
)("mc_translate.c: schemeS: unhandled: ");
7261 VG_(tool_panic
)("memcheck:schemeS");
7266 /*------------------------------------------------------------*/
7267 /*--- Post-tree-build final tidying ---*/
7268 /*------------------------------------------------------------*/
7270 /* This exploits the observation that Memcheck often produces
7271 repeated conditional calls of the form
7273 Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
7275 with the same guard expression G guarding the same helper call.
7276 The second and subsequent calls are redundant. This usually
7277 results from instrumentation of guest code containing multiple
7278 memory references at different constant offsets from the same base
7279 register. After optimisation of the instrumentation, you get a
7280 test for the definedness of the base register for each memory
7281 reference, which is kinda pointless. MC_(final_tidy) therefore
7282 looks for such repeated calls and removes all but the first. */
7285 /* With some testing on perf/bz2.c, on amd64 and x86, compiled with
7286 gcc-5.3.1 -O2, it appears that 16 entries in the array are enough to
7287 get almost all the benefits of this transformation whilst causing
7288 the slide-back case to just often enough to be verifiably
7289 correct. For posterity, the numbers are:
7293 1 4,336 (112,212 -> 1,709,473; ratio 15.2)
7294 2 4,336 (112,194 -> 1,669,895; ratio 14.9)
7295 3 4,336 (112,194 -> 1,660,713; ratio 14.8)
7296 4 4,336 (112,194 -> 1,658,555; ratio 14.8)
7297 5 4,336 (112,194 -> 1,655,447; ratio 14.8)
7298 6 4,336 (112,194 -> 1,655,101; ratio 14.8)
7299 7 4,336 (112,194 -> 1,654,858; ratio 14.7)
7300 8 4,336 (112,194 -> 1,654,810; ratio 14.7)
7301 10 4,336 (112,194 -> 1,654,621; ratio 14.7)
7302 12 4,336 (112,194 -> 1,654,678; ratio 14.7)
7303 16 4,336 (112,194 -> 1,654,494; ratio 14.7)
7304 32 4,336 (112,194 -> 1,654,602; ratio 14.7)
7305 inf 4,336 (112,194 -> 1,654,602; ratio 14.7)
7309 1 4,113 (107,329 -> 1,822,171; ratio 17.0)
7310 2 4,113 (107,329 -> 1,806,443; ratio 16.8)
7311 3 4,113 (107,329 -> 1,803,967; ratio 16.8)
7312 4 4,113 (107,329 -> 1,802,785; ratio 16.8)
7313 5 4,113 (107,329 -> 1,802,412; ratio 16.8)
7314 6 4,113 (107,329 -> 1,802,062; ratio 16.8)
7315 7 4,113 (107,329 -> 1,801,976; ratio 16.8)
7316 8 4,113 (107,329 -> 1,801,886; ratio 16.8)
7317 10 4,113 (107,329 -> 1,801,653; ratio 16.8)
7318 12 4,113 (107,329 -> 1,801,526; ratio 16.8)
7319 16 4,113 (107,329 -> 1,801,298; ratio 16.8)
7320 32 4,113 (107,329 -> 1,800,827; ratio 16.8)
7321 inf 4,113 (107,329 -> 1,800,827; ratio 16.8)
7324 /* Structs for recording which (helper, guard) pairs we have already
7327 #define N_TIDYING_PAIRS 16
7330 struct { void* entry
; IRExpr
* guard
; }
7335 Pair pairs
[N_TIDYING_PAIRS
+1/*for bounds checking*/];
7341 /* Return True if e1 and e2 definitely denote the same value (used to
7342 compare guards). Return False if unknown; False is the safe
7343 answer. Since guest registers and guest memory do not have the
7344 SSA property we must return False if any Gets or Loads appear in
7345 the expression. This implicitly assumes that e1 and e2 have the
7346 same IR type, which is always true here -- the type is Ity_I1. */
7348 static Bool
sameIRValue ( IRExpr
* e1
, IRExpr
* e2
)
7350 if (e1
->tag
!= e2
->tag
)
7354 return eqIRConst( e1
->Iex
.Const
.con
, e2
->Iex
.Const
.con
);
7356 return e1
->Iex
.Binop
.op
== e2
->Iex
.Binop
.op
7357 && sameIRValue(e1
->Iex
.Binop
.arg1
, e2
->Iex
.Binop
.arg1
)
7358 && sameIRValue(e1
->Iex
.Binop
.arg2
, e2
->Iex
.Binop
.arg2
);
7360 return e1
->Iex
.Unop
.op
== e2
->Iex
.Unop
.op
7361 && sameIRValue(e1
->Iex
.Unop
.arg
, e2
->Iex
.Unop
.arg
);
7363 return e1
->Iex
.RdTmp
.tmp
== e2
->Iex
.RdTmp
.tmp
;
7365 return sameIRValue( e1
->Iex
.ITE
.cond
, e2
->Iex
.ITE
.cond
)
7366 && sameIRValue( e1
->Iex
.ITE
.iftrue
, e2
->Iex
.ITE
.iftrue
)
7367 && sameIRValue( e1
->Iex
.ITE
.iffalse
, e2
->Iex
.ITE
.iffalse
);
7371 /* be lazy. Could define equality for these, but they never
7372 appear to be used. */
7377 /* be conservative - these may not give the same value each
7381 /* should never see this */
7384 VG_(printf
)("mc_translate.c: sameIRValue: unhandled: ");
7386 VG_(tool_panic
)("memcheck:sameIRValue");
7391 /* See if 'pairs' already has an entry for (entry, guard). Return
7392 True if so. If not, add an entry. */
7395 Bool
check_or_add ( Pairs
* tidyingEnv
, IRExpr
* guard
, void* entry
)
7397 UInt i
, n
= tidyingEnv
->pairsUsed
;
7398 tl_assert(n
<= N_TIDYING_PAIRS
);
7399 for (i
= 0; i
< n
; i
++) {
7400 if (tidyingEnv
->pairs
[i
].entry
== entry
7401 && sameIRValue(tidyingEnv
->pairs
[i
].guard
, guard
))
7404 /* (guard, entry) wasn't found in the array. Add it at the end.
7405 If the array is already full, slide the entries one slot
7406 backwards. This means we will lose to ability to detect
7407 duplicates from the pair in slot zero, but that happens so
7408 rarely that it's unlikely to have much effect on overall code
7409 quality. Also, this strategy loses the check for the oldest
7410 tracked exit (memory reference, basically) and so that is (I'd
7411 guess) least likely to be re-used after this point. */
7413 if (n
== N_TIDYING_PAIRS
) {
7414 for (i
= 1; i
< N_TIDYING_PAIRS
; i
++) {
7415 tidyingEnv
->pairs
[i
-1] = tidyingEnv
->pairs
[i
];
7417 tidyingEnv
->pairs
[N_TIDYING_PAIRS
-1].entry
= entry
;
7418 tidyingEnv
->pairs
[N_TIDYING_PAIRS
-1].guard
= guard
;
7420 tl_assert(n
< N_TIDYING_PAIRS
);
7421 tidyingEnv
->pairs
[n
].entry
= entry
;
7422 tidyingEnv
->pairs
[n
].guard
= guard
;
7424 tidyingEnv
->pairsUsed
= n
;
7429 static Bool
is_helperc_value_checkN_fail ( const HChar
* name
)
7431 /* This is expensive because it happens a lot. We are checking to
7432 see whether |name| is one of the following 8 strings:
7434 MC_(helperc_value_check8_fail_no_o)
7435 MC_(helperc_value_check4_fail_no_o)
7436 MC_(helperc_value_check0_fail_no_o)
7437 MC_(helperc_value_check1_fail_no_o)
7438 MC_(helperc_value_check8_fail_w_o)
7439 MC_(helperc_value_check0_fail_w_o)
7440 MC_(helperc_value_check1_fail_w_o)
7441 MC_(helperc_value_check4_fail_w_o)
7443 To speed it up, check the common prefix just once, rather than
7446 const HChar
* prefix
= "MC_(helperc_value_check";
7452 if (p
== 0) break; /* ran off the end of the prefix */
7453 /* We still have some prefix to use */
7454 if (n
== 0) return False
; /* have prefix, but name ran out */
7455 if (n
!= p
) return False
; /* have both pfx and name, but no match */
7460 /* Check the part after the prefix. */
7461 tl_assert(*prefix
== 0 && *name
!= 0);
7462 return 0==VG_(strcmp
)(name
, "8_fail_no_o)")
7463 || 0==VG_(strcmp
)(name
, "4_fail_no_o)")
7464 || 0==VG_(strcmp
)(name
, "0_fail_no_o)")
7465 || 0==VG_(strcmp
)(name
, "1_fail_no_o)")
7466 || 0==VG_(strcmp
)(name
, "8_fail_w_o)")
7467 || 0==VG_(strcmp
)(name
, "4_fail_w_o)")
7468 || 0==VG_(strcmp
)(name
, "0_fail_w_o)")
7469 || 0==VG_(strcmp
)(name
, "1_fail_w_o)");
7472 IRSB
* MC_(final_tidy
) ( IRSB
* sb_in
)
7479 Bool alreadyPresent
;
7482 pairs
.pairsUsed
= 0;
7484 pairs
.pairs
[N_TIDYING_PAIRS
].entry
= (void*)0x123;
7485 pairs
.pairs
[N_TIDYING_PAIRS
].guard
= (IRExpr
*)0x456;
7487 /* Scan forwards through the statements. Each time a call to one
7488 of the relevant helpers is seen, check if we have made a
7489 previous call to the same helper using the same guard
7490 expression, and if so, delete the call. */
7491 for (i
= 0; i
< sb_in
->stmts_used
; i
++) {
7492 st
= sb_in
->stmts
[i
];
7494 if (st
->tag
!= Ist_Dirty
)
7496 di
= st
->Ist
.Dirty
.details
;
7499 if (0) { ppIRExpr(guard
); VG_(printf
)("\n"); }
7501 if (!is_helperc_value_checkN_fail( cee
->name
))
7503 /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
7504 guard 'guard'. Check if we have already seen a call to this
7505 function with the same guard. If so, delete it. If not,
7506 add it to the set of calls we do know about. */
7507 alreadyPresent
= check_or_add( &pairs
, guard
, cee
->addr
);
7508 if (alreadyPresent
) {
7509 sb_in
->stmts
[i
] = IRStmt_NoOp();
7510 if (0) VG_(printf
)("XX\n");
7514 tl_assert(pairs
.pairs
[N_TIDYING_PAIRS
].entry
== (void*)0x123);
7515 tl_assert(pairs
.pairs
[N_TIDYING_PAIRS
].guard
== (IRExpr
*)0x456);
7520 #undef N_TIDYING_PAIRS
7523 /*------------------------------------------------------------*/
7524 /*--- Startup assertion checking ---*/
7525 /*------------------------------------------------------------*/
7527 void MC_(do_instrumentation_startup_checks
)( void )
7529 /* Make a best-effort check to see that is_helperc_value_checkN_fail
7530 is working as we expect. */
7532 # define CHECK(_expected, _string) \
7533 tl_assert((_expected) == is_helperc_value_checkN_fail(_string))
7535 /* It should identify these 8, and no others, as targets. */
7536 CHECK(True
, "MC_(helperc_value_check8_fail_no_o)");
7537 CHECK(True
, "MC_(helperc_value_check4_fail_no_o)");
7538 CHECK(True
, "MC_(helperc_value_check0_fail_no_o)");
7539 CHECK(True
, "MC_(helperc_value_check1_fail_no_o)");
7540 CHECK(True
, "MC_(helperc_value_check8_fail_w_o)");
7541 CHECK(True
, "MC_(helperc_value_check0_fail_w_o)");
7542 CHECK(True
, "MC_(helperc_value_check1_fail_w_o)");
7543 CHECK(True
, "MC_(helperc_value_check4_fail_w_o)");
7545 /* Ad-hoc selection of other strings gathered via a quick test. */
7546 CHECK(False
, "amd64g_dirtyhelper_CPUID_avx2");
7547 CHECK(False
, "amd64g_dirtyhelper_RDTSC");
7548 CHECK(False
, "MC_(helperc_b_load1)");
7549 CHECK(False
, "MC_(helperc_b_load2)");
7550 CHECK(False
, "MC_(helperc_b_load4)");
7551 CHECK(False
, "MC_(helperc_b_load8)");
7552 CHECK(False
, "MC_(helperc_b_load16)");
7553 CHECK(False
, "MC_(helperc_b_load32)");
7554 CHECK(False
, "MC_(helperc_b_store1)");
7555 CHECK(False
, "MC_(helperc_b_store2)");
7556 CHECK(False
, "MC_(helperc_b_store4)");
7557 CHECK(False
, "MC_(helperc_b_store8)");
7558 CHECK(False
, "MC_(helperc_b_store16)");
7559 CHECK(False
, "MC_(helperc_b_store32)");
7560 CHECK(False
, "MC_(helperc_LOADV8)");
7561 CHECK(False
, "MC_(helperc_LOADV16le)");
7562 CHECK(False
, "MC_(helperc_LOADV32le)");
7563 CHECK(False
, "MC_(helperc_LOADV64le)");
7564 CHECK(False
, "MC_(helperc_LOADV128le)");
7565 CHECK(False
, "MC_(helperc_LOADV256le)");
7566 CHECK(False
, "MC_(helperc_STOREV16le)");
7567 CHECK(False
, "MC_(helperc_STOREV32le)");
7568 CHECK(False
, "MC_(helperc_STOREV64le)");
7569 CHECK(False
, "MC_(helperc_STOREV8)");
7570 CHECK(False
, "track_die_mem_stack_8");
7571 CHECK(False
, "track_new_mem_stack_8_w_ECU");
7572 CHECK(False
, "MC_(helperc_MAKE_STACK_UNINIT_w_o)");
7573 CHECK(False
, "VG_(unknown_SP_update_w_ECU)");
7579 /*------------------------------------------------------------*/
7580 /*--- Memcheck main ---*/
7581 /*------------------------------------------------------------*/
7583 static Bool
isBogusAtom ( IRAtom
* at
)
7585 if (at
->tag
== Iex_RdTmp
)
7587 tl_assert(at
->tag
== Iex_Const
);
7590 IRConst
* con
= at
->Iex
.Const
.con
;
7592 case Ico_U1
: return False
;
7593 case Ico_U8
: n
= (ULong
)con
->Ico
.U8
; break;
7594 case Ico_U16
: n
= (ULong
)con
->Ico
.U16
; break;
7595 case Ico_U32
: n
= (ULong
)con
->Ico
.U32
; break;
7596 case Ico_U64
: n
= (ULong
)con
->Ico
.U64
; break;
7597 case Ico_F32
: return False
;
7598 case Ico_F64
: return False
;
7599 case Ico_F32i
: return False
;
7600 case Ico_F64i
: return False
;
7601 case Ico_V128
: return False
;
7602 case Ico_V256
: return False
;
7603 default: ppIRExpr(at
); tl_assert(0);
7605 /* VG_(printf)("%llx\n", n); */
7607 if (LIKELY(n
<= 0x0000000000001000ULL
)) return False
;
7608 if (LIKELY(n
>= 0xFFFFFFFFFFFFF000ULL
)) return False
;
7609 /* The list of bogus atoms is: */
7610 return (/*32*/ n
== 0xFEFEFEFFULL
7611 /*32*/ || n
== 0x80808080ULL
7612 /*32*/ || n
== 0x7F7F7F7FULL
7613 /*32*/ || n
== 0x7EFEFEFFULL
7614 /*32*/ || n
== 0x81010100ULL
7615 /*64*/ || n
== 0xFFFFFFFFFEFEFEFFULL
7616 /*64*/ || n
== 0xFEFEFEFEFEFEFEFFULL
7617 /*64*/ || n
== 0x0000000000008080ULL
7618 /*64*/ || n
== 0x8080808080808080ULL
7619 /*64*/ || n
== 0x0101010101010101ULL
7624 /* Does 'st' mention any of the literals identified/listed in
7626 static inline Bool
containsBogusLiterals ( /*FLAT*/ IRStmt
* st
)
7634 e
= st
->Ist
.WrTmp
.data
;
7640 return isBogusAtom(e
);
7642 return isBogusAtom(e
->Iex
.Unop
.arg
)
7643 || e
->Iex
.Unop
.op
== Iop_GetMSBs8x16
;
7645 return isBogusAtom(e
->Iex
.GetI
.ix
);
7647 return isBogusAtom(e
->Iex
.Binop
.arg1
)
7648 || isBogusAtom(e
->Iex
.Binop
.arg2
);
7650 return isBogusAtom(e
->Iex
.Triop
.details
->arg1
)
7651 || isBogusAtom(e
->Iex
.Triop
.details
->arg2
)
7652 || isBogusAtom(e
->Iex
.Triop
.details
->arg3
);
7654 return isBogusAtom(e
->Iex
.Qop
.details
->arg1
)
7655 || isBogusAtom(e
->Iex
.Qop
.details
->arg2
)
7656 || isBogusAtom(e
->Iex
.Qop
.details
->arg3
)
7657 || isBogusAtom(e
->Iex
.Qop
.details
->arg4
);
7659 return isBogusAtom(e
->Iex
.ITE
.cond
)
7660 || isBogusAtom(e
->Iex
.ITE
.iftrue
)
7661 || isBogusAtom(e
->Iex
.ITE
.iffalse
);
7663 return isBogusAtom(e
->Iex
.Load
.addr
);
7665 for (i
= 0; e
->Iex
.CCall
.args
[i
]; i
++)
7666 if (isBogusAtom(e
->Iex
.CCall
.args
[i
]))
7673 d
= st
->Ist
.Dirty
.details
;
7674 for (i
= 0; d
->args
[i
]; i
++) {
7675 IRAtom
* atom
= d
->args
[i
];
7676 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(atom
))) {
7677 if (isBogusAtom(atom
))
7681 if (isBogusAtom(d
->guard
))
7683 if (d
->mAddr
&& isBogusAtom(d
->mAddr
))
7687 return isBogusAtom(st
->Ist
.Put
.data
);
7689 return isBogusAtom(st
->Ist
.PutI
.details
->ix
)
7690 || isBogusAtom(st
->Ist
.PutI
.details
->data
);
7692 return isBogusAtom(st
->Ist
.Store
.addr
)
7693 || isBogusAtom(st
->Ist
.Store
.data
);
7695 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
7696 return isBogusAtom(sg
->addr
) || isBogusAtom(sg
->data
)
7697 || isBogusAtom(sg
->guard
);
7700 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
7701 return isBogusAtom(lg
->addr
) || isBogusAtom(lg
->alt
)
7702 || isBogusAtom(lg
->guard
);
7705 return isBogusAtom(st
->Ist
.Exit
.guard
);
7707 return isBogusAtom(st
->Ist
.AbiHint
.base
)
7708 || isBogusAtom(st
->Ist
.AbiHint
.nia
);
7714 cas
= st
->Ist
.CAS
.details
;
7715 return isBogusAtom(cas
->addr
)
7716 || (cas
->expdHi
? isBogusAtom(cas
->expdHi
) : False
)
7717 || isBogusAtom(cas
->expdLo
)
7718 || (cas
->dataHi
? isBogusAtom(cas
->dataHi
) : False
)
7719 || isBogusAtom(cas
->dataLo
);
7721 return isBogusAtom(st
->Ist
.LLSC
.addr
)
7722 || (st
->Ist
.LLSC
.storedata
7723 ? isBogusAtom(st
->Ist
.LLSC
.storedata
)
7728 VG_(tool_panic
)("hasBogusLiterals");
7733 /* This is the pre-instrumentation analysis. It does a backwards pass over
7734 the stmts in |sb_in| to determine a HowUsed value for each tmp defined in
7737 Unrelatedly, it also checks all literals in the block with |isBogusAtom|,
7738 as a positive result from that is a strong indication that we need to
7739 expensively instrument add/sub in the block. We do both analyses in one
7740 pass, even though they are independent, so as to avoid the overhead of
7741 having to traverse the whole block twice.
7743 The usage pass proceeds as follows. Let max= be the max operation in the
7744 HowUsed lattice, hence
7746 X max= Y means X = max(X, Y)
7750 for t in original tmps . useEnv[t] = HuUnU
7752 for t used in the block's . next field
7753 useEnv[t] max= HuPCa // because jmp targets are PCast-tested
7755 for st iterating *backwards* in the block
7759 case "t1 = load(t2)" // case 1
7760 useEnv[t2] max= HuPCa
7762 case "t1 = add(t2, t3)" // case 2
7763 useEnv[t2] max= useEnv[t1]
7764 useEnv[t3] max= useEnv[t1]
7767 for t in st.usedTmps // case 3
7768 useEnv[t] max= HuOth
7769 // same as useEnv[t] = HuOth
7771 The general idea is that we accumulate, in useEnv[], information about
7772 how each tmp is used. That can be updated as we work further back
7773 through the block and find more uses of it, but its HowUsed value can
7774 only ascend the lattice, not descend.
7776 Initially we mark all tmps as unused. In case (1), if a tmp is seen to
7777 be used as a memory address, then its use is at least HuPCa. The point
7778 is that for a memory address we will add instrumentation to check if any
7779 bit of the address is undefined, which means that we won't need expensive
7780 V-bit propagation through an add expression that computed the address --
7781 cheap add instrumentation will be equivalent.
7783 Note in case (1) that if we have previously seen a non-memory-address use
7784 of the tmp, then its use will already be HuOth and will be unchanged by
7785 the max= operation. And if it turns out that the source of the tmp was
7786 an add, then we'll have to expensively instrument the add, because we
7787 can't prove that, for the previous non-memory-address use of the tmp,
7788 cheap and expensive instrumentation will be equivalent.
7790 In case 2, we propagate the usage-mode of the result of an add back
7791 through to its operands. Again, we use max= so as to take account of the
7792 fact that t2 or t3 might later in the block (viz, earlier in the
7793 iteration) have been used in a way that requires expensive add
7796 In case 3, we deal with all other tmp uses. We assume that we'll need a
7797 result that is as accurate as possible, so we max= HuOth into its use
7798 mode. Since HuOth is the top of the lattice, that's equivalent to just
7799 setting its use to HuOth.
7801 The net result of all this is that:
7803 tmps that are used either
7804 - only as a memory address, or
7805 - only as part of a tree of adds that computes a memory address,
7806 and has no other use
7807 are marked as HuPCa, and so we can instrument their generating Add
7808 nodes cheaply, which is the whole point of this analysis
7810 tmps that are used any other way at all are marked as HuOth
7812 tmps that are unused are marked as HuUnU. We don't expect to see any
7813 since we expect that the incoming IR has had all dead assignments
7814 removed by previous optimisation passes. Nevertheless the analysis is
7815 correct even in the presence of dead tmps.
7817 A final comment on dead tmps. In case 1 and case 2, we could actually
7818 conditionalise the updates thusly:
7820 if (useEnv[t1] > HuUnU) { useEnv[t2] max= HuPCa } // case 1
7822 if (useEnv[t1] > HuUnU) { useEnv[t2] max= useEnv[t1] } // case 2
7823 if (useEnv[t1] > HuUnU) { useEnv[t3] max= useEnv[t1] } // case 2
7825 In other words, if the assigned-to tmp |t1| is never used, then there's
7826 no point in propagating any use through to its operands. That won't
7827 change the final HuPCa-vs-HuOth results, which is what we care about.
7828 Given that we expect to get dead-code-free inputs, there's no point in
7829 adding this extra refinement.
7832 /* Helper for |preInstrumentationAnalysis|. */
7833 static inline void noteTmpUsesIn ( /*MOD*/HowUsed
* useEnv
,
7835 HowUsed newUse
, IRAtom
* at
)
7837 /* For the atom |at|, declare that for any tmp |t| in |at|, we will have
7838 seen a use of |newUse|. So, merge that info into |t|'s accumulated
7845 IRTemp t
= at
->Iex
.RdTmp
.tmp
;
7846 tl_assert(t
< tyenvUsed
); // "is an original tmp"
7847 // The "max" operation in the lattice
7848 if (newUse
> useEnv
[t
]) useEnv
[t
] = newUse
;
7852 // We should never get here -- it implies non-flat IR
7854 VG_(tool_panic
)("noteTmpUsesIn");
7861 static void preInstrumentationAnalysis ( /*OUT*/HowUsed
** useEnvP
,
7862 /*OUT*/Bool
* hasBogusLiteralsP
,
7865 const UInt nOrigTmps
= (UInt
)sb_in
->tyenv
->types_used
;
7867 // We've seen no bogus literals so far.
7870 // This is calloc'd, so implicitly all entries are initialised to HuUnU.
7871 HowUsed
* useEnv
= VG_(calloc
)("mc.preInstrumentationAnalysis.1",
7872 nOrigTmps
, sizeof(HowUsed
));
7874 // Firstly, roll in contributions from the final dst address.
7875 bogus
= isBogusAtom(sb_in
->next
);
7876 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, sb_in
->next
);
7878 // Now work backwards through the stmts.
7879 for (Int i
= sb_in
->stmts_used
-1; i
>= 0; i
--) {
7880 IRStmt
* st
= sb_in
->stmts
[i
];
7882 // Deal with literals.
7883 if (LIKELY(!bogus
)) {
7884 bogus
= containsBogusLiterals(st
);
7887 // Deal with tmp uses.
7890 IRTemp dst
= st
->Ist
.WrTmp
.tmp
;
7891 IRExpr
* rhs
= st
->Ist
.WrTmp
.data
;
7892 // This is the one place where we have to consider all possible
7893 // tags for |rhs|, and can't just assume it is a tmp or a const.
7896 // just propagate demand for |dst| into this tmp use.
7897 noteTmpUsesIn(useEnv
, nOrigTmps
, useEnv
[dst
], rhs
);
7900 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, rhs
->Iex
.Unop
.arg
);
7903 if (rhs
->Iex
.Binop
.op
== Iop_Add64
7904 || rhs
->Iex
.Binop
.op
== Iop_Add32
) {
7905 // propagate demand for |dst| through to the operands.
7906 noteTmpUsesIn(useEnv
, nOrigTmps
,
7907 useEnv
[dst
], rhs
->Iex
.Binop
.arg1
);
7908 noteTmpUsesIn(useEnv
, nOrigTmps
,
7909 useEnv
[dst
], rhs
->Iex
.Binop
.arg2
);
7911 // just say that the operands are used in some unknown way.
7912 noteTmpUsesIn(useEnv
, nOrigTmps
,
7913 HuOth
, rhs
->Iex
.Binop
.arg1
);
7914 noteTmpUsesIn(useEnv
, nOrigTmps
,
7915 HuOth
, rhs
->Iex
.Binop
.arg2
);
7919 // All operands are used in some unknown way.
7920 IRTriop
* tri
= rhs
->Iex
.Triop
.details
;
7921 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, tri
->arg1
);
7922 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, tri
->arg2
);
7923 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, tri
->arg3
);
7927 // All operands are used in some unknown way.
7928 IRQop
* qop
= rhs
->Iex
.Qop
.details
;
7929 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg1
);
7930 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg2
);
7931 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg3
);
7932 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, qop
->arg4
);
7936 // The address will be checked (== PCasted).
7937 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, rhs
->Iex
.Load
.addr
);
7940 // The condition is PCasted, the then- and else-values
7942 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, rhs
->Iex
.ITE
.cond
);
7943 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, rhs
->Iex
.ITE
.iftrue
);
7944 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, rhs
->Iex
.ITE
.iffalse
);
7947 // The args are used in unknown ways.
7948 for (IRExpr
** args
= rhs
->Iex
.CCall
.args
; *args
; args
++) {
7949 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, *args
);
7953 // The index will be checked/PCasted (see do_shadow_GETI)
7954 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, rhs
->Iex
.GetI
.ix
);
7962 VG_(tool_panic
)("preInstrumentationAnalysis:"
7963 " unhandled IRExpr");
7968 // The address will be checked (== PCasted). The data will be
7969 // used in some unknown way.
7970 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, st
->Ist
.Store
.addr
);
7971 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.Store
.data
);
7974 // The guard will be checked (== PCasted)
7975 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, st
->Ist
.Exit
.guard
);
7978 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.Put
.data
);
7981 IRPutI
* putI
= st
->Ist
.PutI
.details
;
7982 // The index will be checked/PCasted (see do_shadow_PUTI). The
7983 // data will be used in an unknown way.
7984 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, putI
->ix
);
7985 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, putI
->data
);
7989 IRDirty
* d
= st
->Ist
.Dirty
.details
;
7990 // The guard will be checked (== PCasted)
7991 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, d
->guard
);
7992 // The args will be used in unknown ways.
7993 for (IRExpr
** args
= d
->args
; *args
; args
++) {
7994 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, *args
);
7999 IRCAS
* cas
= st
->Ist
.CAS
.details
;
8000 // Address will be pcasted, everything else used as unknown
8001 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, cas
->addr
);
8002 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->expdLo
);
8003 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->dataLo
);
8005 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->expdHi
);
8007 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, cas
->dataHi
);
8011 // Both exprs are used in unknown ways. TODO: can we safely
8012 // just ignore AbiHints?
8013 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.AbiHint
.base
);
8014 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.AbiHint
.nia
);
8017 // We might be able to do better, and use HuPCa for the addr.
8018 // It's not immediately obvious that we can, because the address
8019 // is regarded as "used" only when the guard is true.
8020 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
8021 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, sg
->addr
);
8022 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, sg
->data
);
8023 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, sg
->guard
);
8027 // Per similar comments to Ist_StoreG .. not sure whether this
8028 // is really optimal.
8029 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
8030 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, lg
->addr
);
8031 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, lg
->alt
);
8032 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, lg
->guard
);
8036 noteTmpUsesIn(useEnv
, nOrigTmps
, HuPCa
, st
->Ist
.LLSC
.addr
);
8037 if (st
->Ist
.LLSC
.storedata
)
8038 noteTmpUsesIn(useEnv
, nOrigTmps
, HuOth
, st
->Ist
.LLSC
.storedata
);
8047 VG_(tool_panic
)("preInstrumentationAnalysis: unhandled IRStmt");
8050 } // Now work backwards through the stmts.
8052 // Return the computed use env and the bogus-atom flag.
8053 tl_assert(*useEnvP
== NULL
);
8056 tl_assert(*hasBogusLiteralsP
== False
);
8057 *hasBogusLiteralsP
= bogus
;
8061 IRSB
* MC_(instrument
) ( VgCallbackClosure
* closure
,
8063 const VexGuestLayout
* layout
,
8064 const VexGuestExtents
* vge
,
8065 const VexArchInfo
* archinfo_host
,
8066 IRType gWordTy
, IRType hWordTy
)
8068 Bool verboze
= 0||False
;
8069 Int i
, j
, first_stmt
;
8074 if (gWordTy
!= hWordTy
) {
8075 /* We don't currently support this case. */
8076 VG_(tool_panic
)("host/guest word size mismatch");
8079 /* Check we're not completely nuts */
8080 tl_assert(sizeof(UWord
) == sizeof(void*));
8081 tl_assert(sizeof(Word
) == sizeof(void*));
8082 tl_assert(sizeof(Addr
) == sizeof(void*));
8083 tl_assert(sizeof(ULong
) == 8);
8084 tl_assert(sizeof(Long
) == 8);
8085 tl_assert(sizeof(UInt
) == 4);
8086 tl_assert(sizeof(Int
) == 4);
8088 tl_assert(MC_(clo_mc_level
) >= 1 && MC_(clo_mc_level
) <= 3);
8091 sb_out
= deepCopyIRSBExceptStmts(sb_in
);
8093 /* Set up the running environment. Both .sb and .tmpMap are
8094 modified as we go along. Note that tmps are added to both
8095 .sb->tyenv and .tmpMap together, so the valid index-set for
8096 those two arrays should always be identical. */
8097 VG_(memset
)(&mce
, 0, sizeof(mce
));
8099 mce
.trace
= verboze
;
8100 mce
.layout
= layout
;
8101 mce
.hWordTy
= hWordTy
;
8102 mce
.tmpHowUsed
= NULL
;
8104 /* BEGIN decide on expense levels for instrumentation. */
8106 /* Initially, select the cheap version of everything for which we have an
8108 DetailLevelByOp__set_all( &mce
.dlbo
, DLcheap
);
8110 /* Take account of the --expensive-definedness-checks= flag. */
8111 if (MC_(clo_expensive_definedness_checks
) == EdcNO
) {
8112 /* We just selected 'cheap for everything', so we don't need to do
8113 anything here. mce.tmpHowUsed remains NULL. */
8115 else if (MC_(clo_expensive_definedness_checks
) == EdcYES
) {
8116 /* Select 'expensive for everything'. mce.tmpHowUsed remains NULL. */
8117 DetailLevelByOp__set_all( &mce
.dlbo
, DLexpensive
);
8120 tl_assert(MC_(clo_expensive_definedness_checks
) == EdcAUTO
);
8121 /* We'll make our own selection, based on known per-target constraints
8122 and also on analysis of the block to be instrumented. First, set
8123 up default values for detail levels.
8125 On x86 and amd64, we'll routinely encounter code optimised by LLVM
8126 5 and above. Enable accurate interpretation of the following.
8127 LLVM uses adds for some bitfield inserts, and we get a lot of false
8128 errors if the cheap interpretation is used, alas. Could solve this
8129 much better if we knew which of such adds came from x86/amd64 LEA
8130 instructions, since these are the only ones really needing the
8131 expensive interpretation, but that would require some way to tag
8132 them in the _toIR.c front ends, which is a lot of faffing around.
8133 So for now we use preInstrumentationAnalysis() to detect adds which
8134 are used only to construct memory addresses, which is an
8135 approximation to the above, and is self-contained.*/
8136 # if defined(VGA_x86)
8137 mce
.dlbo
.dl_Add32
= DLauto
;
8138 mce
.dlbo
.dl_CmpEQ32_CmpNE32
= DLexpensive
;
8139 # elif defined(VGA_amd64)
8140 mce
.dlbo
.dl_Add64
= DLauto
;
8141 mce
.dlbo
.dl_CmpEQ32_CmpNE32
= DLexpensive
;
8144 /* preInstrumentationAnalysis() will allocate &mce.tmpHowUsed and then
8146 Bool hasBogusLiterals
= False
;
8147 preInstrumentationAnalysis( &mce
.tmpHowUsed
, &hasBogusLiterals
, sb_in
);
8149 if (hasBogusLiterals
) {
8150 /* This happens very rarely. In this case just select expensive
8151 for everything, and throw away the tmp-use analysis results. */
8152 DetailLevelByOp__set_all( &mce
.dlbo
, DLexpensive
);
8153 VG_(free
)( mce
.tmpHowUsed
);
8154 mce
.tmpHowUsed
= NULL
;
8156 /* Nothing. mce.tmpHowUsed contains tmp-use analysis results,
8157 which will be used for some subset of Iop_{Add,Sub}{32,64},
8158 based on which ones are set to DLauto for this target. */
8162 DetailLevelByOp__check_sanity( &mce
.dlbo
);
8165 // Debug printing: which tmps have been identified as PCast-only use
8166 if (mce
.tmpHowUsed
) {
8167 VG_(printf
)("Cheapies: ");
8168 for (UInt q
= 0; q
< sb_in
->tyenv
->types_used
; q
++) {
8169 if (mce
.tmpHowUsed
[q
] == HuPCa
) {
8170 VG_(printf
)("t%u ", q
);
8176 // Debug printing: number of ops by detail level
8177 UChar nCheap
= DetailLevelByOp__count( &mce
.dlbo
, DLcheap
);
8178 UChar nAuto
= DetailLevelByOp__count( &mce
.dlbo
, DLauto
);
8179 UChar nExpensive
= DetailLevelByOp__count( &mce
.dlbo
, DLexpensive
);
8180 tl_assert(nCheap
+ nAuto
+ nExpensive
== 8);
8182 VG_(printf
)("%u,%u,%u ", nCheap
, nAuto
, nExpensive
);
8184 /* END decide on expense levels for instrumentation. */
8186 /* Initialise the running the tmp environment. */
8188 mce
.tmpMap
= VG_(newXA
)( VG_(malloc
), "mc.MC_(instrument).1", VG_(free
),
8189 sizeof(TempMapEnt
));
8190 VG_(hintSizeXA
) (mce
.tmpMap
, sb_in
->tyenv
->types_used
);
8191 for (i
= 0; i
< sb_in
->tyenv
->types_used
; i
++) {
8194 ent
.shadowV
= IRTemp_INVALID
;
8195 ent
.shadowB
= IRTemp_INVALID
;
8196 VG_(addToXA
)( mce
.tmpMap
, &ent
);
8198 tl_assert( VG_(sizeXA
)( mce
.tmpMap
) == sb_in
->tyenv
->types_used
);
8200 /* Finally, begin instrumentation. */
8201 /* Copy verbatim any IR preamble preceding the first IMark */
8203 tl_assert(mce
.sb
== sb_out
);
8204 tl_assert(mce
.sb
!= sb_in
);
8207 while (i
< sb_in
->stmts_used
&& sb_in
->stmts
[i
]->tag
!= Ist_IMark
) {
8209 st
= sb_in
->stmts
[i
];
8211 tl_assert(isFlatIRStmt(st
));
8213 stmt( 'C', &mce
, sb_in
->stmts
[i
] );
8217 /* Nasty problem. IR optimisation of the pre-instrumented IR may
8218 cause the IR following the preamble to contain references to IR
8219 temporaries defined in the preamble. Because the preamble isn't
8220 instrumented, these temporaries don't have any shadows.
8221 Nevertheless uses of them following the preamble will cause
8222 memcheck to generate references to their shadows. End effect is
8223 to cause IR sanity check failures, due to references to
8224 non-existent shadows. This is only evident for the complex
8225 preambles used for function wrapping on TOC-afflicted platforms
8228 The following loop therefore scans the preamble looking for
8229 assignments to temporaries. For each one found it creates an
8230 assignment to the corresponding (V) shadow temp, marking it as
8231 'defined'. This is the same resulting IR as if the main
8232 instrumentation loop before had been applied to the statement
8235 Similarly, if origin tracking is enabled, we must generate an
8236 assignment for the corresponding origin (B) shadow, claiming
8237 no-origin, as appropriate for a defined value.
8239 for (j
= 0; j
< i
; j
++) {
8240 if (sb_in
->stmts
[j
]->tag
== Ist_WrTmp
) {
8241 /* findShadowTmpV checks its arg is an original tmp;
8242 no need to assert that here. */
8243 IRTemp tmp_o
= sb_in
->stmts
[j
]->Ist
.WrTmp
.tmp
;
8244 IRTemp tmp_v
= findShadowTmpV(&mce
, tmp_o
);
8245 IRType ty_v
= typeOfIRTemp(sb_out
->tyenv
, tmp_v
);
8246 assign( 'V', &mce
, tmp_v
, definedOfType( ty_v
) );
8247 if (MC_(clo_mc_level
) == 3) {
8248 IRTemp tmp_b
= findShadowTmpB(&mce
, tmp_o
);
8249 tl_assert(typeOfIRTemp(sb_out
->tyenv
, tmp_b
) == Ity_I32
);
8250 assign( 'B', &mce
, tmp_b
, mkU32(0)/* UNKNOWN ORIGIN */);
8253 VG_(printf
)("create shadow tmp(s) for preamble tmp [%d] ty ", j
);
8260 /* Iterate over the remaining stmts to generate instrumentation. */
8262 tl_assert(sb_in
->stmts_used
> 0);
8264 tl_assert(i
< sb_in
->stmts_used
);
8265 tl_assert(sb_in
->stmts
[i
]->tag
== Ist_IMark
);
8267 for (/* use current i*/; i
< sb_in
->stmts_used
; i
++) {
8269 st
= sb_in
->stmts
[i
];
8270 first_stmt
= sb_out
->stmts_used
;
8278 if (MC_(clo_mc_level
) == 3) {
8279 /* See comments on case Ist_CAS below. */
8280 if (st
->tag
!= Ist_CAS
)
8281 schemeS( &mce
, st
);
8284 /* Generate instrumentation code for each stmt ... */
8289 IRTemp dst
= st
->Ist
.WrTmp
.tmp
;
8290 tl_assert(dst
< (UInt
)sb_in
->tyenv
->types_used
);
8291 HowUsed hu
= mce
.tmpHowUsed
? mce
.tmpHowUsed
[dst
]
8292 : HuOth
/*we don't know, so play safe*/;
8293 assign( 'V', &mce
, findShadowTmpV(&mce
, st
->Ist
.WrTmp
.tmp
),
8294 expr2vbits( &mce
, st
->Ist
.WrTmp
.data
, hu
));
8299 do_shadow_PUT( &mce
,
8302 NULL
/* shadow atom */, NULL
/* guard */ );
8306 do_shadow_PUTI( &mce
, st
->Ist
.PutI
.details
);
8310 do_shadow_Store( &mce
, st
->Ist
.Store
.end
,
8311 st
->Ist
.Store
.addr
, 0/* addr bias */,
8313 NULL
/* shadow data */,
8318 do_shadow_StoreG( &mce
, st
->Ist
.StoreG
.details
);
8322 do_shadow_LoadG( &mce
, st
->Ist
.LoadG
.details
);
8326 complainIfUndefined( &mce
, st
->Ist
.Exit
.guard
, NULL
);
8337 do_shadow_Dirty( &mce
, st
->Ist
.Dirty
.details
);
8341 do_AbiHint( &mce
, st
->Ist
.AbiHint
.base
,
8342 st
->Ist
.AbiHint
.len
,
8343 st
->Ist
.AbiHint
.nia
);
8347 do_shadow_CAS( &mce
, st
->Ist
.CAS
.details
);
8348 /* Note, do_shadow_CAS copies the CAS itself to the output
8349 block, because it needs to add instrumentation both
8350 before and after it. Hence skip the copy below. Also
8351 skip the origin-tracking stuff (call to schemeS) above,
8352 since that's all tangled up with it too; do_shadow_CAS
8357 do_shadow_LLSC( &mce
,
8359 st
->Ist
.LLSC
.result
,
8361 st
->Ist
.LLSC
.storedata
);
8368 VG_(tool_panic
)("memcheck: unhandled IRStmt");
8370 } /* switch (st->tag) */
8373 for (j
= first_stmt
; j
< sb_out
->stmts_used
; j
++) {
8375 ppIRStmt(sb_out
->stmts
[j
]);
8381 /* ... and finally copy the stmt itself to the output. Except,
8382 skip the copy of IRCASs; see comments on case Ist_CAS
8384 if (st
->tag
!= Ist_CAS
)
8385 stmt('C', &mce
, st
);
8388 /* Now we need to complain if the jump target is undefined. */
8389 first_stmt
= sb_out
->stmts_used
;
8392 VG_(printf
)("sb_in->next = ");
8393 ppIRExpr(sb_in
->next
);
8394 VG_(printf
)("\n\n");
8397 complainIfUndefined( &mce
, sb_in
->next
, NULL
);
8400 for (j
= first_stmt
; j
< sb_out
->stmts_used
; j
++) {
8402 ppIRStmt(sb_out
->stmts
[j
]);
8408 /* If this fails, there's been some serious snafu with tmp management,
8409 that should be investigated. */
8410 tl_assert( VG_(sizeXA
)( mce
.tmpMap
) == mce
.sb
->tyenv
->types_used
);
8411 VG_(deleteXA
)( mce
.tmpMap
);
8413 if (mce
.tmpHowUsed
) {
8414 VG_(free
)( mce
.tmpHowUsed
);
8417 tl_assert(mce
.sb
== sb_out
);
8422 /*--------------------------------------------------------------------*/
8423 /*--- end mc_translate.c ---*/
8424 /*--------------------------------------------------------------------*/