1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*---------------------------------------------------------------*/
4 /*--- begin ir_opt.c ---*/
5 /*---------------------------------------------------------------*/
8 This file is part of Valgrind, a dynamic binary instrumentation
11 Copyright (C) 2004-2017 OpenWorks LLP
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
29 Neither the names of the U.S. Department of Energy nor the
30 University of California nor the names of its contributors may be
31 used to endorse or promote products derived from this software
32 without prior written permission.
35 #include "libvex_basictypes.h"
36 #include "libvex_ir.h"
39 #include "main_util.h"
40 #include "main_globals.h"
44 /* Set to 1 for lots of debugging output. */
47 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
51 /* What iropt does, 29 Dec 04.
53 It takes an IRSB and produces a new one with the same meaning,
56 After execution of the new BB, all guest state and guest memory is
57 the same as after execution of the original. This is true
58 regardless of how the block was exited (at the end vs side exit).
60 In addition, parts of the guest state will be identical to that
61 created by execution of the original at the following observation
64 * In a dirty helper call, any parts of the guest state that the
65 helper states that it reads or modifies will be up to date.
66 Also, guest memory will be up to date. Parts of the guest state
67 not marked as being read or modified by the helper cannot be
68 assumed to be up-to-date at the point where the helper is called.
70 * If iropt_register_updates == VexRegUpdSpAtMemAccess :
71 The guest state is only up to date only as explained above
72 (i.e. at SB exits and as specified by dirty helper call).
73 Also, the stack pointer register is up to date at memory
74 exception points (as this is needed for the stack extension
75 logic in m_signals.c).
77 * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
78 Immediately prior to any load or store, those parts of the guest
79 state marked as requiring precise exceptions will be up to date.
80 Also, guest memory will be up to date. Parts of the guest state
81 not marked as requiring precise exceptions cannot be assumed to
82 be up-to-date at the point of the load/store.
84 * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
85 Same as minimal, but all the guest state is up to date at memory
88 * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
89 Guest state is up to date at each instruction.
91 The relative order of loads and stores (including loads/stores of
92 guest memory done by dirty helpers annotated as such) is not
93 changed. However, the relative order of loads with no intervening
94 stores/modifies may be changed.
99 There are three levels of optimisation, controlled by
100 vex_control.iropt_level. Define first:
102 "Cheap transformations" are the following sequence:
103 * Redundant-Get removal
104 * Redundant-Put removal
105 * Constant propagation/folding
107 * Specialisation of clean helper functions
110 "Expensive transformations" are the following sequence:
112 * Folding of add/sub chains
113 * Redundant-GetI removal
114 * Redundant-PutI removal
117 Then the transformations are as follows, as defined by
118 vex_control.iropt_level:
121 * Flatten into atomic form.
123 Level 1: the following sequence:
124 * Flatten into atomic form.
125 * Cheap transformations.
127 Level 2: the following sequence
128 * Flatten into atomic form.
129 * Cheap transformations.
130 * If block contains any floating or vector types, CSE.
131 * If block contains GetI or PutI, Expensive transformations.
132 * Try unrolling loops. Three possible outcomes:
133 - No effect: do nothing more.
134 - Unrolled a loop, and block does not contain GetI or PutI:
137 - Unrolled a loop, and block contains GetI or PutI:
138 Do: * Expensive transformations
139 * Cheap transformations
142 /* Implementation notes, 29 Dec 04.
144 TODO (important): I think rPutI removal ignores precise exceptions
145 and is therefore in a sense, wrong. In the sense that PutIs are
146 assumed not to write parts of the guest state that we need to have
147 up-to-date at loads/stores. So far on x86 guest that has not
148 mattered since indeed only the x87 FP registers and tags are
149 accessed using GetI/PutI, and there is no need so far for them to
150 be up to date at mem exception points. The rPutI pass should be
153 TODO: improve pessimistic handling of precise exceptions
156 TODO: check interaction of rGetI and dirty helpers.
158 F64i constants are treated differently from other constants.
159 They are not regarded as atoms, and instead lifted off and
160 bound to temps. This allows them to participate in CSE, which
161 is important for getting good performance for x86 guest code.
163 CSE up F64 literals (already doing F64is)
165 CSE: consider carefully the requirement for precise exns
166 prior to making CSE any more aggressive. */
169 /*---------------------------------------------------------------*/
170 /*--- Finite mappery, of a sort ---*/
171 /*---------------------------------------------------------------*/
173 /* General map from HWord-sized thing HWord-sized thing. Could be by
174 hashing, but it's not clear whether or not this would really be any
187 static HashHW
* newHHW ( void )
189 HashHW
* h
= LibVEX_Alloc_inline(sizeof(HashHW
));
192 h
->inuse
= LibVEX_Alloc_inline(h
->size
* sizeof(Bool
));
193 h
->key
= LibVEX_Alloc_inline(h
->size
* sizeof(HWord
));
194 h
->val
= LibVEX_Alloc_inline(h
->size
* sizeof(HWord
));
199 /* Look up key in the map. */
201 static Bool
lookupHHW ( HashHW
* h
, /*OUT*/HWord
* val
, HWord key
)
204 /* vex_printf("lookupHHW(%llx)\n", key ); */
205 for (i
= 0; i
< h
->used
; i
++) {
206 if (h
->inuse
[i
] && h
->key
[i
] == key
) {
216 /* Add key->val to the map. Replaces any existing binding for key. */
218 static void addToHHW ( HashHW
* h
, HWord key
, HWord val
)
221 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
223 /* Find and replace existing binding, if any. */
224 for (i
= 0; i
< h
->used
; i
++) {
225 if (h
->inuse
[i
] && h
->key
[i
] == key
) {
231 /* Ensure a space is available. */
232 if (h
->used
== h
->size
) {
233 /* Copy into arrays twice the size. */
234 Bool
* inuse2
= LibVEX_Alloc_inline(2 * h
->size
* sizeof(Bool
));
235 HWord
* key2
= LibVEX_Alloc_inline(2 * h
->size
* sizeof(HWord
));
236 HWord
* val2
= LibVEX_Alloc_inline(2 * h
->size
* sizeof(HWord
));
237 for (i
= j
= 0; i
< h
->size
; i
++) {
238 if (!h
->inuse
[i
]) continue;
251 /* Finally, add it. */
252 vassert(h
->used
< h
->size
);
253 h
->inuse
[h
->used
] = True
;
254 h
->key
[h
->used
] = key
;
255 h
->val
[h
->used
] = val
;
260 /*---------------------------------------------------------------*/
261 /*--- Flattening out a BB into atomic SSA form ---*/
262 /*---------------------------------------------------------------*/
264 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
265 copies made by flattening. If in doubt return False. */
267 static Bool
isFlat ( IRExpr
* e
)
269 if (e
->tag
== Iex_Get
)
271 if (e
->tag
== Iex_Binop
)
272 return toBool( isIRAtom(e
->Iex
.Binop
.arg1
)
273 && isIRAtom(e
->Iex
.Binop
.arg2
) );
274 if (e
->tag
== Iex_Load
)
275 return isIRAtom(e
->Iex
.Load
.addr
);
279 /* Flatten out 'ex' so it is atomic, returning a new expression with
280 the same value, after having appended extra IRTemp assignments to
283 static IRExpr
* flatten_Expr ( IRSB
* bb
, IRExpr
* ex
)
287 IRType ty
= typeOfIRExpr(bb
->tyenv
, ex
);
293 t1
= newIRTemp(bb
->tyenv
, ty
);
294 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
295 IRExpr_GetI(ex
->Iex
.GetI
.descr
,
296 flatten_Expr(bb
, ex
->Iex
.GetI
.ix
),
297 ex
->Iex
.GetI
.bias
)));
298 return IRExpr_RdTmp(t1
);
301 t1
= newIRTemp(bb
->tyenv
, ty
);
303 IRStmt_WrTmp(t1
, ex
));
304 return IRExpr_RdTmp(t1
);
307 IRQop
* qop
= ex
->Iex
.Qop
.details
;
308 t1
= newIRTemp(bb
->tyenv
, ty
);
309 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
311 flatten_Expr(bb
, qop
->arg1
),
312 flatten_Expr(bb
, qop
->arg2
),
313 flatten_Expr(bb
, qop
->arg3
),
314 flatten_Expr(bb
, qop
->arg4
))));
315 return IRExpr_RdTmp(t1
);
319 IRTriop
* triop
= ex
->Iex
.Triop
.details
;
320 t1
= newIRTemp(bb
->tyenv
, ty
);
321 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
322 IRExpr_Triop(triop
->op
,
323 flatten_Expr(bb
, triop
->arg1
),
324 flatten_Expr(bb
, triop
->arg2
),
325 flatten_Expr(bb
, triop
->arg3
))));
326 return IRExpr_RdTmp(t1
);
330 t1
= newIRTemp(bb
->tyenv
, ty
);
331 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
332 IRExpr_Binop(ex
->Iex
.Binop
.op
,
333 flatten_Expr(bb
, ex
->Iex
.Binop
.arg1
),
334 flatten_Expr(bb
, ex
->Iex
.Binop
.arg2
))));
335 return IRExpr_RdTmp(t1
);
338 t1
= newIRTemp(bb
->tyenv
, ty
);
339 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
340 IRExpr_Unop(ex
->Iex
.Unop
.op
,
341 flatten_Expr(bb
, ex
->Iex
.Unop
.arg
))));
342 return IRExpr_RdTmp(t1
);
345 t1
= newIRTemp(bb
->tyenv
, ty
);
346 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
347 IRExpr_Load(ex
->Iex
.Load
.end
,
349 flatten_Expr(bb
, ex
->Iex
.Load
.addr
))));
350 return IRExpr_RdTmp(t1
);
353 newargs
= shallowCopyIRExprVec(ex
->Iex
.CCall
.args
);
354 for (i
= 0; newargs
[i
]; i
++)
355 newargs
[i
] = flatten_Expr(bb
, newargs
[i
]);
356 t1
= newIRTemp(bb
->tyenv
, ty
);
357 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
358 IRExpr_CCall(ex
->Iex
.CCall
.cee
,
361 return IRExpr_RdTmp(t1
);
364 t1
= newIRTemp(bb
->tyenv
, ty
);
365 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
366 IRExpr_ITE(flatten_Expr(bb
, ex
->Iex
.ITE
.cond
),
367 flatten_Expr(bb
, ex
->Iex
.ITE
.iftrue
),
368 flatten_Expr(bb
, ex
->Iex
.ITE
.iffalse
))));
369 return IRExpr_RdTmp(t1
);
372 /* Lift F64i constants out onto temps so they can be CSEd
374 if (ex
->Iex
.Const
.con
->tag
== Ico_F64i
) {
375 t1
= newIRTemp(bb
->tyenv
, ty
);
376 addStmtToIRSB(bb
, IRStmt_WrTmp(t1
,
377 IRExpr_Const(ex
->Iex
.Const
.con
)));
378 return IRExpr_RdTmp(t1
);
380 /* Leave all other constants alone. */
391 vpanic("flatten_Expr");
396 /* Append a completely flattened form of 'st' to the end of 'bb'. */
398 static void flatten_Stmt ( IRSB
* bb
, IRStmt
* st
)
401 IRExpr
*e1
, *e2
, *e3
, *e4
, *e5
;
404 IRPutI
*puti
, *puti2
;
409 if (isIRAtom(st
->Ist
.Put
.data
)) {
410 /* optimisation to reduce the amount of heap wasted
412 addStmtToIRSB(bb
, st
);
414 /* general case, always correct */
415 e1
= flatten_Expr(bb
, st
->Ist
.Put
.data
);
416 addStmtToIRSB(bb
, IRStmt_Put(st
->Ist
.Put
.offset
, e1
));
420 puti
= st
->Ist
.PutI
.details
;
421 e1
= flatten_Expr(bb
, puti
->ix
);
422 e2
= flatten_Expr(bb
, puti
->data
);
423 puti2
= mkIRPutI(puti
->descr
, e1
, puti
->bias
, e2
);
424 addStmtToIRSB(bb
, IRStmt_PutI(puti2
));
427 if (isFlat(st
->Ist
.WrTmp
.data
)) {
428 /* optimisation, to reduce the number of tmp-tmp
430 addStmtToIRSB(bb
, st
);
432 /* general case, always correct */
433 e1
= flatten_Expr(bb
, st
->Ist
.WrTmp
.data
);
434 addStmtToIRSB(bb
, IRStmt_WrTmp(st
->Ist
.WrTmp
.tmp
, e1
));
438 e1
= flatten_Expr(bb
, st
->Ist
.Store
.addr
);
439 e2
= flatten_Expr(bb
, st
->Ist
.Store
.data
);
440 addStmtToIRSB(bb
, IRStmt_Store(st
->Ist
.Store
.end
, e1
,e2
));
443 sg
= st
->Ist
.StoreG
.details
;
444 e1
= flatten_Expr(bb
, sg
->addr
);
445 e2
= flatten_Expr(bb
, sg
->data
);
446 e3
= flatten_Expr(bb
, sg
->guard
);
447 addStmtToIRSB(bb
, IRStmt_StoreG(sg
->end
, e1
, e2
, e3
));
450 lg
= st
->Ist
.LoadG
.details
;
451 e1
= flatten_Expr(bb
, lg
->addr
);
452 e2
= flatten_Expr(bb
, lg
->alt
);
453 e3
= flatten_Expr(bb
, lg
->guard
);
454 addStmtToIRSB(bb
, IRStmt_LoadG(lg
->end
, lg
->cvt
, lg
->dst
,
458 cas
= st
->Ist
.CAS
.details
;
459 e1
= flatten_Expr(bb
, cas
->addr
);
460 e2
= cas
->expdHi
? flatten_Expr(bb
, cas
->expdHi
) : NULL
;
461 e3
= flatten_Expr(bb
, cas
->expdLo
);
462 e4
= cas
->dataHi
? flatten_Expr(bb
, cas
->dataHi
) : NULL
;
463 e5
= flatten_Expr(bb
, cas
->dataLo
);
464 cas2
= mkIRCAS( cas
->oldHi
, cas
->oldLo
, cas
->end
,
465 e1
, e2
, e3
, e4
, e5
);
466 addStmtToIRSB(bb
, IRStmt_CAS(cas2
));
469 e1
= flatten_Expr(bb
, st
->Ist
.LLSC
.addr
);
470 e2
= st
->Ist
.LLSC
.storedata
471 ? flatten_Expr(bb
, st
->Ist
.LLSC
.storedata
)
473 addStmtToIRSB(bb
, IRStmt_LLSC(st
->Ist
.LLSC
.end
,
474 st
->Ist
.LLSC
.result
, e1
, e2
));
477 d
= st
->Ist
.Dirty
.details
;
480 d2
->args
= shallowCopyIRExprVec(d2
->args
);
481 if (d2
->mFx
!= Ifx_None
) {
482 d2
->mAddr
= flatten_Expr(bb
, d2
->mAddr
);
484 vassert(d2
->mAddr
== NULL
);
486 d2
->guard
= flatten_Expr(bb
, d2
->guard
);
487 for (i
= 0; d2
->args
[i
]; i
++) {
488 IRExpr
* arg
= d2
->args
[i
];
489 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
490 d2
->args
[i
] = flatten_Expr(bb
, arg
);
492 addStmtToIRSB(bb
, IRStmt_Dirty(d2
));
497 addStmtToIRSB(bb
, st
);
500 e1
= flatten_Expr(bb
, st
->Ist
.AbiHint
.base
);
501 e2
= flatten_Expr(bb
, st
->Ist
.AbiHint
.nia
);
502 addStmtToIRSB(bb
, IRStmt_AbiHint(e1
, st
->Ist
.AbiHint
.len
, e2
));
505 e1
= flatten_Expr(bb
, st
->Ist
.Exit
.guard
);
506 addStmtToIRSB(bb
, IRStmt_Exit(e1
, st
->Ist
.Exit
.jk
,
508 st
->Ist
.Exit
.offsIP
));
514 vpanic("flatten_Stmt");
519 static IRSB
* flatten_BB ( IRSB
* in
)
524 out
->tyenv
= deepCopyIRTypeEnv( in
->tyenv
);
525 for (i
= 0; i
< in
->stmts_used
; i
++)
527 flatten_Stmt( out
, in
->stmts
[i
] );
528 out
->next
= flatten_Expr( out
, in
->next
);
529 out
->jumpkind
= in
->jumpkind
;
530 out
->offsIP
= in
->offsIP
;
535 /*---------------------------------------------------------------*/
536 /*--- In-place removal of redundant GETs ---*/
537 /*---------------------------------------------------------------*/
539 /* Scan forwards, building up an environment binding (min offset, max
540 offset) pairs to values, which will either be temps or constants.
542 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
543 env and if it matches, replace the Get with the stored value. If
544 there is no match, add a (minoff,maxoff) :-> t binding.
546 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
547 any binding which fully or partially overlaps with (minoff,maxoff).
548 Then add a new (minoff,maxoff) :-> t or c binding. */
550 /* Extract the min/max offsets from a guest state array descriptor. */
553 static void getArrayBounds ( IRRegArray
* descr
,
554 UInt
* minoff
, UInt
* maxoff
)
556 *minoff
= descr
->base
;
557 *maxoff
= *minoff
+ descr
->nElems
*sizeofIRType(descr
->elemTy
) - 1;
558 vassert((*minoff
& ~0xFFFF) == 0);
559 vassert((*maxoff
& ~0xFFFF) == 0);
560 vassert(*minoff
<= *maxoff
);
563 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
565 static UInt
mk_key_GetPut ( Int offset
, IRType ty
)
567 /* offset should fit in 16 bits. */
568 UInt minoff
= offset
;
569 UInt maxoff
= minoff
+ sizeofIRType(ty
) - 1;
570 vassert((minoff
& ~0xFFFF) == 0);
571 vassert((maxoff
& ~0xFFFF) == 0);
572 return (minoff
<< 16) | maxoff
;
575 static UInt
mk_key_GetIPutI ( IRRegArray
* descr
)
578 getArrayBounds( descr
, &minoff
, &maxoff
);
579 vassert((minoff
& ~0xFFFF) == 0);
580 vassert((maxoff
& ~0xFFFF) == 0);
581 return (minoff
<< 16) | maxoff
;
584 /* Supposing h has keys of the form generated by mk_key_GetPut and
585 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
588 static void invalidateOverlaps ( HashHW
* h
, UInt k_lo
, UInt k_hi
)
592 vassert(k_lo
<= k_hi
);
593 /* invalidate any env entries which in any way overlap (k_lo
595 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
597 for (j
= 0; j
< h
->used
; j
++) {
600 e_lo
= (((UInt
)h
->key
[j
]) >> 16) & 0xFFFF;
601 e_hi
= ((UInt
)h
->key
[j
]) & 0xFFFF;
602 vassert(e_lo
<= e_hi
);
603 if (e_hi
< k_lo
|| k_hi
< e_lo
)
604 continue; /* no overlap possible */
606 /* overlap; invalidate */
612 static void redundant_get_removal_BB ( IRSB
* bb
)
614 HashHW
* env
= newHHW();
615 UInt key
= 0; /* keep gcc -O happy */
619 for (i
= 0; i
< bb
->stmts_used
; i
++) {
620 IRStmt
* st
= bb
->stmts
[i
];
622 if (st
->tag
== Ist_NoOp
)
626 if (st
->tag
== Ist_WrTmp
627 && st
->Ist
.WrTmp
.data
->tag
== Iex_Get
) {
628 /* st is 't = Get(...)'. Look up in the environment and see
629 if the Get can be replaced. */
630 IRExpr
* get
= st
->Ist
.WrTmp
.data
;
631 key
= (HWord
)mk_key_GetPut( get
->Iex
.Get
.offset
,
633 if (lookupHHW(env
, &val
, (HWord
)key
)) {
635 /* Note, we could do better here. If the types are
636 different we don't do the substitution, since doing so
637 could lead to invalidly-typed IR. An improvement would
638 be to stick in a reinterpret-style cast, although that
639 would make maintaining flatness more difficult. */
640 IRExpr
* valE
= (IRExpr
*)val
;
641 Bool typesOK
= toBool( typeOfIRExpr(bb
->tyenv
,valE
)
642 == st
->Ist
.WrTmp
.data
->Iex
.Get
.ty
);
643 if (typesOK
&& DEBUG_IROPT
) {
644 vex_printf("rGET: "); ppIRExpr(get
);
645 vex_printf(" -> "); ppIRExpr(valE
);
649 bb
->stmts
[i
] = IRStmt_WrTmp(st
->Ist
.WrTmp
.tmp
, valE
);
651 /* Not found, but at least we know that t and the Get(...)
652 are now associated. So add a binding to reflect that
654 addToHHW( env
, (HWord
)key
,
655 (HWord
)(void*)(IRExpr_RdTmp(st
->Ist
.WrTmp
.tmp
)) );
659 /* Deal with Puts: invalidate any env entries overlapped by this
661 if (st
->tag
== Ist_Put
|| st
->tag
== Ist_PutI
) {
663 if (st
->tag
== Ist_Put
) {
664 key
= mk_key_GetPut( st
->Ist
.Put
.offset
,
665 typeOfIRExpr(bb
->tyenv
,st
->Ist
.Put
.data
) );
667 vassert(st
->tag
== Ist_PutI
);
668 key
= mk_key_GetIPutI( st
->Ist
.PutI
.details
->descr
);
671 k_lo
= (key
>> 16) & 0xFFFF;
673 invalidateOverlaps(env
, k_lo
, k_hi
);
676 if (st
->tag
== Ist_Dirty
) {
677 /* Deal with dirty helpers which write or modify guest state.
678 Invalidate the entire env. We could do a lot better
680 IRDirty
* d
= st
->Ist
.Dirty
.details
;
682 for (j
= 0; j
< d
->nFxState
; j
++) {
683 if (d
->fxState
[j
].fx
== Ifx_Modify
684 || d
->fxState
[j
].fx
== Ifx_Write
)
688 /* dump the entire env (not clever, but correct ...) */
689 for (j
= 0; j
< env
->used
; j
++)
690 env
->inuse
[j
] = False
;
691 if (0) vex_printf("rGET: trash env due to dirty helper\n");
695 /* add this one to the env, if appropriate */
696 if (st
->tag
== Ist_Put
) {
697 vassert(isIRAtom(st
->Ist
.Put
.data
));
698 addToHHW( env
, (HWord
)key
, (HWord
)(st
->Ist
.Put
.data
));
701 } /* for (i = 0; i < bb->stmts_used; i++) */
706 /*---------------------------------------------------------------*/
707 /*--- In-place removal of redundant PUTs ---*/
708 /*---------------------------------------------------------------*/
710 /* Find any Get uses in st and invalidate any partially or fully
711 overlapping ranges listed in env. Due to the flattening phase, the
712 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
714 static void handle_gets_Stmt (
717 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
718 VexRegisterUpdates pxControl
722 UInt key
= 0; /* keep gcc -O happy */
729 /* This is the only interesting case. Deal with Gets in the RHS
732 e
= st
->Ist
.WrTmp
.data
;
736 key
= mk_key_GetPut ( e
->Iex
.Get
.offset
, e
->Iex
.Get
.ty
);
740 key
= mk_key_GetIPutI ( e
->Iex
.GetI
.descr
);
751 k_lo
= (key
>> 16) & 0xFFFF;
753 invalidateOverlaps(env
, k_lo
, k_hi
);
757 /* Be very conservative for dirty helper calls; dump the entire
758 environment. The helper might read guest state, in which
759 case it needs to be flushed first. Also, the helper might
760 access guest memory, in which case all parts of the guest
761 state requiring precise exceptions needs to be flushed. The
762 crude solution is just to flush everything; we could easily
763 enough do a lot better if needed. */
764 /* Probably also overly-conservative, but also dump everything
765 if we hit a memory bus event (fence, lock, unlock). Ditto
766 AbiHints, CASs, LLs and SCs. */
768 vassert(isIRAtom(st
->Ist
.AbiHint
.base
));
769 vassert(isIRAtom(st
->Ist
.AbiHint
.nia
));
775 for (j
= 0; j
< env
->used
; j
++)
776 env
->inuse
[j
] = False
;
779 /* all other cases are boring. */
781 vassert(isIRAtom(st
->Ist
.Store
.addr
));
782 vassert(isIRAtom(st
->Ist
.Store
.data
));
786 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
787 vassert(isIRAtom(sg
->addr
));
788 vassert(isIRAtom(sg
->data
));
789 vassert(isIRAtom(sg
->guard
));
794 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
795 vassert(isIRAtom(lg
->addr
));
796 vassert(isIRAtom(lg
->alt
));
797 vassert(isIRAtom(lg
->guard
));
802 vassert(isIRAtom(st
->Ist
.Exit
.guard
));
806 vassert(isIRAtom(st
->Ist
.Put
.data
));
810 vassert(isIRAtom(st
->Ist
.PutI
.details
->ix
));
811 vassert(isIRAtom(st
->Ist
.PutI
.details
->data
));
822 vpanic("handle_gets_Stmt");
826 /* This statement accesses memory. So we might need to dump all parts
827 of the environment corresponding to guest state that may not
828 be reordered with respect to memory references. That means
829 at least the stack pointer. */
831 case VexRegUpdAllregsAtMemAccess
:
832 /* Precise exceptions required at mem access.
833 Flush all guest state. */
834 for (j
= 0; j
< env
->used
; j
++)
835 env
->inuse
[j
] = False
;
837 case VexRegUpdSpAtMemAccess
:
838 /* We need to dump the stack pointer
839 (needed for stack extension in m_signals.c).
840 preciseMemExnsFn will use vex_control.iropt_register_updates
841 to verify only the sp is to be checked. */
843 case VexRegUpdUnwindregsAtMemAccess
:
844 for (j
= 0; j
< env
->used
; j
++) {
847 /* Just flush the minimal amount required, as computed by
849 HWord k_lo
= (env
->key
[j
] >> 16) & 0xFFFF;
850 HWord k_hi
= env
->key
[j
] & 0xFFFF;
851 if (preciseMemExnsFn( k_lo
, k_hi
, pxControl
))
852 env
->inuse
[j
] = False
;
855 case VexRegUpdAllregsAtEachInsn
:
856 // VexRegUpdAllregsAtEachInsn cannot happen here.
858 case VexRegUpd_INVALID
:
867 /* Scan backwards, building up a set of (min offset, max
868 offset) pairs, indicating those parts of the guest state
869 for which the next event is a write.
871 On seeing a conditional exit, empty the set.
873 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
874 completely within the set, remove the Put. Otherwise, add
875 (minoff,maxoff) to the set.
877 On seeing 'Get (minoff,maxoff)', remove any part of the set
878 overlapping (minoff,maxoff). The same has to happen for any events
879 which implicitly read parts of the guest state: dirty helper calls
883 static void redundant_put_removal_BB (
885 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
886 VexRegisterUpdates pxControl
892 UInt key
= 0; /* keep gcc -O happy */
894 // vassert(pxControl < VexRegUpdAllregsAtEachInsn);
896 HashHW
* env
= newHHW();
898 /* Initialise the running env with the fact that the final exit
899 writes the IP (or, whatever it claims to write. We don't
901 key
= mk_key_GetPut(bb
->offsIP
, typeOfIRExpr(bb
->tyenv
, bb
->next
));
902 addToHHW(env
, (HWord
)key
, 0);
904 /* And now scan backwards through the statements. */
905 for (i
= bb
->stmts_used
-1; i
>= 0; i
--) {
908 if (st
->tag
== Ist_NoOp
)
911 /* Deal with conditional exits. */
912 if (st
->tag
== Ist_Exit
) {
914 /* Need to throw out from the env, any part of it which
915 doesn't overlap with the guest state written by this exit.
916 Since the exit only writes one section, it's simplest to
917 do this: (1) check whether env contains a write that
918 completely overlaps the write done by this exit; (2) empty
919 out env; and (3) if (1) was true, add the write done by
922 To make (1) a bit simpler, merely search for a write that
923 exactly matches the one done by this exit. That's safe
924 because it will fail as often or more often than a full
925 overlap check, and failure to find an overlapping write in
926 env is the safe case (we just nuke env if that
928 //vassert(isIRAtom(st->Ist.Exit.guard));
930 //key = mk_key_GetPut(st->Ist.Exit.offsIP,
931 // typeOfIRConst(st->Ist.Exit.dst));
932 //re_add = lookupHHW(env, NULL, key);
934 for (j
= 0; j
< env
->used
; j
++)
935 env
->inuse
[j
] = False
;
938 // addToHHW(env, (HWord)key, 0);
946 key
= mk_key_GetPut( st
->Ist
.Put
.offset
,
947 typeOfIRExpr(bb
->tyenv
,st
->Ist
.Put
.data
) );
948 vassert(isIRAtom(st
->Ist
.Put
.data
));
952 key
= mk_key_GetIPutI( st
->Ist
.PutI
.details
->descr
);
953 vassert(isIRAtom(st
->Ist
.PutI
.details
->ix
));
954 vassert(isIRAtom(st
->Ist
.PutI
.details
->data
));
959 if (isPut
&& st
->tag
!= Ist_PutI
) {
960 /* See if any single entry in env overlaps this Put. This is
961 simplistic in that the transformation is valid if, say, two
962 or more entries in the env overlap this Put, but the use of
963 lookupHHW will only find a single entry which exactly
964 overlaps this Put. This is suboptimal but safe. */
965 if (lookupHHW(env
, NULL
, (HWord
)key
)) {
966 /* This Put is redundant because a later one will overwrite
967 it. So NULL (nop) it out. */
969 vex_printf("rPUT: "); ppIRStmt(st
);
972 bb
->stmts
[i
] = IRStmt_NoOp();
974 /* We can't demonstrate that this Put is redundant, so add it
975 to the running collection. */
976 addToHHW(env
, (HWord
)key
, 0);
981 /* Deal with Gets. These remove bits of the environment since
982 appearance of a Get means that the next event for that slice
983 of the guest state is no longer a write, but a read. Also
984 deals with implicit reads of guest state needed to maintain
985 precise exceptions. */
986 handle_gets_Stmt( env
, st
, preciseMemExnsFn
, pxControl
);
991 /*---------------------------------------------------------------*/
992 /*--- Constant propagation and folding ---*/
993 /*---------------------------------------------------------------*/
996 /* How often sameIRExprs was invoked */
997 static UInt invocation_count
;
998 /* How often sameIRExprs recursed through IRTemp assignments */
999 static UInt recursion_count
;
1000 /* How often sameIRExprs found identical IRExprs */
1001 static UInt success_count
;
1002 /* How often recursing through assignments to IRTemps helped
1003 establishing equality. */
1004 static UInt recursion_success_count
;
1005 /* Whether or not recursing through an IRTemp assignment helped
1006 establishing IRExpr equality for a given sameIRExprs invocation. */
1007 static Bool recursion_helped
;
1008 /* Whether or not a given sameIRExprs invocation recursed through an
1009 IRTemp assignment */
1010 static Bool recursed
;
1011 /* Maximum number of nodes ever visited when comparing two IRExprs. */
1012 static UInt max_nodes_visited
;
1013 #endif /* STATS_IROPT */
1015 /* Count the number of nodes visited for a given sameIRExprs invocation. */
1016 static UInt num_nodes_visited
;
1018 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
1019 This is to guard against performance degradation by visiting large
1020 trees without success. */
1021 #define NODE_LIMIT 30
1024 /* The env in this section is a map from IRTemp to IRExpr*,
1025 that is, an array indexed by IRTemp. */
1027 /* Do both expressions compute the same value? The answer is generally
1028 conservative, i.e. it will report that the expressions do not compute
1029 the same value when in fact they do. The reason is that we do not
1030 keep track of changes in the guest state and memory. Thusly, two
1031 Get's, GetI's or Load's, even when accessing the same location, will be
1032 assumed to compute different values. After all the accesses may happen
1033 at different times and the guest state / memory can have changed in
1036 XXX IMPORTANT XXX the two expressions must have the same IR type.
1037 DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
1039 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
1040 wrapper that deals with the common tags-don't-match case, and a
1041 slower out of line general case. Saves a few insns. */
1043 __attribute__((noinline
))
1044 static Bool
sameIRExprs_aux2 ( IRExpr
** env
, IRExpr
* e1
, IRExpr
* e2
);
1047 static Bool
sameIRExprs_aux ( IRExpr
** env
, IRExpr
* e1
, IRExpr
* e2
)
1049 if (e1
->tag
!= e2
->tag
) return False
;
1050 return sameIRExprs_aux2(env
, e1
, e2
);
1053 __attribute__((noinline
))
1054 static Bool
sameIRExprs_aux2 ( IRExpr
** env
, IRExpr
* e1
, IRExpr
* e2
)
1056 if (num_nodes_visited
++ > NODE_LIMIT
) return False
;
1060 if (e1
->Iex
.RdTmp
.tmp
== e2
->Iex
.RdTmp
.tmp
) return True
;
1062 if (env
[e1
->Iex
.RdTmp
.tmp
] && env
[e2
->Iex
.RdTmp
.tmp
]) {
1063 Bool same
= sameIRExprs_aux(env
, env
[e1
->Iex
.RdTmp
.tmp
],
1064 env
[e2
->Iex
.RdTmp
.tmp
]);
1067 if (same
) recursion_helped
= True
;
1076 /* Guest state / memory could have changed in the meantime. */
1080 return toBool( e1
->Iex
.Binop
.op
== e2
->Iex
.Binop
.op
1081 && sameIRExprs_aux( env
, e1
->Iex
.Binop
.arg1
,
1082 e2
->Iex
.Binop
.arg1
)
1083 && sameIRExprs_aux( env
, e1
->Iex
.Binop
.arg2
,
1084 e2
->Iex
.Binop
.arg2
));
1087 return toBool( e1
->Iex
.Unop
.op
== e2
->Iex
.Unop
.op
1088 && sameIRExprs_aux( env
, e1
->Iex
.Unop
.arg
,
1089 e2
->Iex
.Unop
.arg
));
1092 IRConst
*c1
= e1
->Iex
.Const
.con
;
1093 IRConst
*c2
= e2
->Iex
.Const
.con
;
1094 vassert(c1
->tag
== c2
->tag
);
1096 case Ico_U1
: return toBool( c1
->Ico
.U1
== c2
->Ico
.U1
);
1097 case Ico_U8
: return toBool( c1
->Ico
.U8
== c2
->Ico
.U8
);
1098 case Ico_U16
: return toBool( c1
->Ico
.U16
== c2
->Ico
.U16
);
1099 case Ico_U32
: return toBool( c1
->Ico
.U32
== c2
->Ico
.U32
);
1100 case Ico_U64
: return toBool( c1
->Ico
.U64
== c2
->Ico
.U64
);
1107 IRTriop
*tri1
= e1
->Iex
.Triop
.details
;
1108 IRTriop
*tri2
= e2
->Iex
.Triop
.details
;
1109 return toBool( tri1
->op
== tri2
->op
1110 && sameIRExprs_aux( env
, tri1
->arg1
, tri2
->arg1
)
1111 && sameIRExprs_aux( env
, tri1
->arg2
, tri2
->arg2
)
1112 && sameIRExprs_aux( env
, tri1
->arg3
, tri2
->arg3
));
1116 return toBool( sameIRExprs_aux( env
, e1
->Iex
.ITE
.cond
,
1118 && sameIRExprs_aux( env
, e1
->Iex
.ITE
.iftrue
,
1119 e2
->Iex
.ITE
.iftrue
)
1120 && sameIRExprs_aux( env
, e1
->Iex
.ITE
.iffalse
,
1121 e2
->Iex
.ITE
.iffalse
));
1124 /* Not very likely to be "same". */
1132 static Bool
sameIRExprs ( IRExpr
** env
, IRExpr
* e1
, IRExpr
* e2
)
1136 num_nodes_visited
= 0;
1137 same
= sameIRExprs_aux(env
, e1
, e2
);
1141 if (recursed
) ++recursion_count
;
1142 success_count
+= same
;
1143 if (same
&& recursion_helped
)
1144 ++recursion_success_count
;
1145 if (num_nodes_visited
> max_nodes_visited
)
1146 max_nodes_visited
= num_nodes_visited
;
1147 recursed
= False
; /* reset */
1148 recursion_helped
= False
; /* reset */
1149 #endif /* STATS_IROPT */
1155 /* Debugging-only hack (not used in production runs): make a guess
1156 whether sameIRExprs might assert due to the two args being of
1157 different types. If in doubt return False. Is only used when
1158 --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1159 Bad because it duplicates functionality from typeOfIRExpr. See
1160 comment on the single use point below for rationale. */
1162 Bool
debug_only_hack_sameIRExprs_might_assert ( IRExpr
* e1
, IRExpr
* e2
)
1164 if (e1
->tag
!= e2
->tag
) return False
;
1167 /* The only interesting case */
1168 IRConst
*c1
= e1
->Iex
.Const
.con
;
1169 IRConst
*c2
= e2
->Iex
.Const
.con
;
1170 return c1
->tag
!= c2
->tag
;
1179 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
1180 static Bool
isZeroU32 ( IRExpr
* e
)
1182 return toBool( e
->tag
== Iex_Const
1183 && e
->Iex
.Const
.con
->tag
== Ico_U32
1184 && e
->Iex
.Const
.con
->Ico
.U32
== 0);
1187 /* Is this literally IRExpr_Const(IRConst_U64(0)) ?
1188 Currently unused; commented out to avoid compiler warning */
1190 static Bool
isZeroU64 ( IRExpr
* e
)
1192 return toBool( e
->tag
== Iex_Const
1193 && e
->Iex
.Const
.con
->tag
== Ico_U64
1194 && e
->Iex
.Const
.con
->Ico
.U64
== 0);
1198 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
1199 static Bool
isZeroV128 ( IRExpr
* e
)
1201 return toBool( e
->tag
== Iex_Const
1202 && e
->Iex
.Const
.con
->tag
== Ico_V128
1203 && e
->Iex
.Const
.con
->Ico
.V128
== 0x0000);
1206 /* Is this literally IRExpr_Const(IRConst_V128(1...1)) ? */
1207 static Bool
isOnesV128 ( IRExpr
* e
)
1209 return toBool( e
->tag
== Iex_Const
1210 && e
->Iex
.Const
.con
->tag
== Ico_V128
1211 && e
->Iex
.Const
.con
->Ico
.V128
== 0xFFFF);
1214 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
1215 static Bool
isZeroV256 ( IRExpr
* e
)
1217 return toBool( e
->tag
== Iex_Const
1218 && e
->Iex
.Const
.con
->tag
== Ico_V256
1219 && e
->Iex
.Const
.con
->Ico
.V256
== 0x00000000);
1222 /* Is this an integer constant with value 0 ? */
1223 static Bool
isZeroU ( IRExpr
* e
)
1225 if (e
->tag
!= Iex_Const
) return False
;
1226 switch (e
->Iex
.Const
.con
->tag
) {
1227 case Ico_U1
: return toBool( e
->Iex
.Const
.con
->Ico
.U1
== 0);
1228 case Ico_U8
: return toBool( e
->Iex
.Const
.con
->Ico
.U8
== 0);
1229 case Ico_U16
: return toBool( e
->Iex
.Const
.con
->Ico
.U16
== 0);
1230 case Ico_U32
: return toBool( e
->Iex
.Const
.con
->Ico
.U32
== 0);
1231 case Ico_U64
: return toBool( e
->Iex
.Const
.con
->Ico
.U64
== 0);
1232 case Ico_V256
: return toBool( e
->Iex
.Const
.con
->Ico
.V256
== 0x00000000);
1233 default: vpanic("isZeroU");
1237 /* Is this an integer constant with value 1---1b ? */
1238 static Bool
isOnesU ( IRExpr
* e
)
1240 if (e
->tag
!= Iex_Const
) return False
;
1241 switch (e
->Iex
.Const
.con
->tag
) {
1242 case Ico_U1
: return toBool( e
->Iex
.Const
.con
->Ico
.U1
== True
);
1243 case Ico_U8
: return toBool( e
->Iex
.Const
.con
->Ico
.U8
== 0xFF);
1244 case Ico_U16
: return toBool( e
->Iex
.Const
.con
->Ico
.U16
== 0xFFFF);
1245 case Ico_U32
: return toBool( e
->Iex
.Const
.con
->Ico
.U32
1247 case Ico_U64
: return toBool( e
->Iex
.Const
.con
->Ico
.U64
1248 == 0xFFFFFFFFFFFFFFFFULL
);
1249 default: ppIRExpr(e
); vpanic("isOnesU");
1253 static Bool
notBool ( Bool b
)
1255 if (b
== True
) return False
;
1256 if (b
== False
) return True
;
1260 /* Make a zero which has the same type as the result of the given
1262 static IRExpr
* mkZeroOfPrimopResultType ( IROp op
)
1265 case Iop_CmpNE32
: return IRExpr_Const(IRConst_U1(toBool(0)));
1266 case Iop_Xor8
: return IRExpr_Const(IRConst_U8(0));
1267 case Iop_Xor16
: return IRExpr_Const(IRConst_U16(0));
1269 case Iop_Xor32
: return IRExpr_Const(IRConst_U32(0));
1272 case Iop_Xor64
: return IRExpr_Const(IRConst_U64(0));
1274 case Iop_AndV128
: return IRExpr_Const(IRConst_V128(0));
1276 case Iop_AndV256
: return IRExpr_Const(IRConst_V256(0));
1277 default: vpanic("mkZeroOfPrimopResultType: bad primop");
1281 /* Make a value containing all 1-bits, which has the same type as the
1282 result of the given primop. */
1283 static IRExpr
* mkOnesOfPrimopResultType ( IROp op
)
1288 return IRExpr_Const(IRConst_U1(toBool(1)));
1290 return IRExpr_Const(IRConst_U8(0xFF));
1292 return IRExpr_Const(IRConst_U16(0xFFFF));
1294 return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1299 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL
));
1304 return IRExpr_Const(IRConst_V128(0xFFFF));
1306 case Iop_CmpEQ16x16
:
1309 return IRExpr_Const(IRConst_V256(0xFFFFFFFF));
1312 vpanic("mkOnesOfPrimopResultType: bad primop");
1316 /* Helpers for folding Clz32/64. */
1317 static UInt
fold_Clz64 ( ULong value
)
1320 vassert(value
!= 0ULL); /* no defined semantics for arg==0 */
1321 for (i
= 0; i
< 64; ++i
) {
1322 if (0ULL != (value
& (((ULong
)1) << (63 - i
)))) return i
;
1329 static UInt
fold_Clz32 ( UInt value
)
1332 vassert(value
!= 0); /* no defined semantics for arg==0 */
1333 for (i
= 0; i
< 32; ++i
) {
1334 if (0 != (value
& (((UInt
)1) << (31 - i
)))) return i
;
1341 /* V64 holds 8 summary-constant bits in V128/V256 style. Convert to
1342 the corresponding real constant. */
1343 //XXX re-check this before use
1344 //static ULong de_summarise_V64 ( UChar v64 )
1347 // if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1348 // if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1349 // if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1350 // if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1351 // if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1352 // if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1353 // if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1354 // if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1358 /* Helper for arbitrary expression pattern matching in flat IR. If
1359 'e' is a reference to a tmp, look it up in env -- repeatedly, if
1360 necessary -- until it resolves to a non-tmp. Note that this can
1361 return NULL if it can't resolve 'e' to a new expression, which will
1362 be the case if 'e' is instead defined by an IRStmt (IRDirty or
1364 static IRExpr
* chase ( IRExpr
** env
, IRExpr
* e
)
1366 /* Why is this loop guaranteed to terminate? Because all tmps must
1367 have definitions before use, hence a tmp cannot be bound
1368 (directly or indirectly) to itself. */
1369 while (e
->tag
== Iex_RdTmp
) {
1370 if (0) { vex_printf("chase "); ppIRExpr(e
); vex_printf("\n"); }
1371 e
= env
[(Int
)e
->Iex
.RdTmp
.tmp
];
1372 if (e
== NULL
) break;
1377 /* Similar to |chase|, but follows at most one level of tmp reference. */
1378 static IRExpr
* chase1 ( IRExpr
** env
, IRExpr
* e
)
1380 if (e
== NULL
|| e
->tag
!= Iex_RdTmp
)
1383 return env
[(Int
)e
->Iex
.RdTmp
.tmp
];
1386 __attribute__((noinline
))
1387 static IRExpr
* fold_Expr_WRK ( IRExpr
** env
, IRExpr
* e
)
1390 IRExpr
* e2
= e
; /* e2 is the result of folding e, if possible */
1395 if (e
->Iex
.Unop
.arg
->tag
== Iex_Const
) {
1397 /* cases where the arg is a const */
1398 switch (e
->Iex
.Unop
.op
) {
1400 e2
= IRExpr_Const(IRConst_U8(toUChar(
1401 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1405 e2
= IRExpr_Const(IRConst_U32(
1406 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1410 e2
= IRExpr_Const(IRConst_U64(
1411 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1416 e2
= IRExpr_Const(IRConst_U8(toUChar(
1417 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1421 e2
= IRExpr_Const(IRConst_U16(toUShort(
1422 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1426 e2
= IRExpr_Const(IRConst_U32(
1427 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1431 e2
= IRExpr_Const(IRConst_U64(
1432 e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
1433 ? 0xFFFFFFFFFFFFFFFFULL
: 0));
1437 UInt u32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
;
1439 u32
= (Int
)u32
>> 24; /* signed shift */
1440 e2
= IRExpr_Const(IRConst_U32(u32
));
1444 UInt u32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
;
1446 u32
= (Int
)u32
>> 16; /* signed shift */
1447 e2
= IRExpr_Const(IRConst_U32(u32
));
1451 e2
= IRExpr_Const(IRConst_U64(
1452 0xFFULL
& e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
));
1455 e2
= IRExpr_Const(IRConst_U64(
1456 0xFFFFULL
& e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
));
1459 e2
= IRExpr_Const(IRConst_U32(
1460 0xFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
));
1463 UShort u16
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
;
1465 u16
= (Short
)u16
>> 8; /* signed shift */
1466 e2
= IRExpr_Const(IRConst_U16(u16
));
1470 e2
= IRExpr_Const(IRConst_U16(
1471 0xFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
));
1474 e2
= IRExpr_Const(IRConst_U32(
1475 0xFFFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
));
1478 e2
= IRExpr_Const(IRConst_U16(toUShort(
1479 0xFFFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
)));
1482 e2
= IRExpr_Const(IRConst_U8(toUChar(
1483 0xFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
)));
1486 e2
= IRExpr_Const(IRConst_U1(toBool(
1487 1 == (1 & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
)
1491 e2
= IRExpr_Const(IRConst_U1(toBool(
1492 1 == (1 & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
)
1497 e2
= IRExpr_Const(IRConst_V128(
1498 ~ (e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.V128
)));
1501 e2
= IRExpr_Const(IRConst_U64(
1502 ~ (e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
)));
1505 e2
= IRExpr_Const(IRConst_U32(
1506 ~ (e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
)));
1509 e2
= IRExpr_Const(IRConst_U16(toUShort(
1510 ~ (e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
))));
1513 e2
= IRExpr_Const(IRConst_U8(toUChar(
1514 ~ (e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
))));
1518 e2
= IRExpr_Const(IRConst_U1(
1519 notBool(e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U1
)));
1523 ULong w64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1525 e2
= IRExpr_Const(IRConst_U8( (UChar
)w64
));
1529 ULong w64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1531 e2
= IRExpr_Const(IRConst_U16( (UShort
)w64
));
1535 ULong w64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1536 w64
&= 0x00000000FFFFFFFFULL
;
1537 e2
= IRExpr_Const(IRConst_U32( (UInt
)w64
));
1540 case Iop_64HIto32
: {
1541 ULong w64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1543 e2
= IRExpr_Const(IRConst_U32( (UInt
)w64
));
1547 e2
= IRExpr_Const(IRConst_U64(
1549 & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
));
1552 ULong u64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
;
1554 u64
= (Long
)u64
>> 48; /* signed shift */
1555 e2
= IRExpr_Const(IRConst_U64(u64
));
1559 ULong u64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
;
1561 u64
= (Long
)u64
>> 32; /* signed shift */
1562 e2
= IRExpr_Const(IRConst_U64(u64
));
1567 UShort w16
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
;
1569 e2
= IRExpr_Const(IRConst_U8( (UChar
)w16
));
1573 UShort w16
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U16
;
1576 e2
= IRExpr_Const(IRConst_U8( (UChar
)w16
));
1581 e2
= IRExpr_Const(IRConst_U1(toBool(
1583 (0xFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U8
)
1587 e2
= IRExpr_Const(IRConst_U1(toBool(
1589 (0xFFFFFFFF & e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
)
1593 e2
= IRExpr_Const(IRConst_U1(toBool(
1594 0ULL != e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
1598 case Iop_CmpwNEZ32
: {
1599 UInt w32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
;
1601 e2
= IRExpr_Const(IRConst_U32( 0 ));
1603 e2
= IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1606 case Iop_CmpwNEZ64
: {
1607 ULong w64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1609 e2
= IRExpr_Const(IRConst_U64( 0 ));
1611 e2
= IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL
));
1616 UInt u32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
;
1617 Int s32
= (Int
)(u32
& 0xFFFFFFFF);
1618 s32
= (s32
| (-s32
));
1619 e2
= IRExpr_Const( IRConst_U32( (UInt
)s32
));
1624 ULong u64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1625 Long s64
= (Long
)u64
;
1626 s64
= (s64
| (-s64
));
1627 e2
= IRExpr_Const( IRConst_U64( (ULong
)s64
));
1632 UInt u32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
;
1634 e2
= IRExpr_Const(IRConst_U32(fold_Clz32(u32
)));
1638 ULong u64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1640 e2
= IRExpr_Const(IRConst_U64(fold_Clz64(u64
)));
1644 /* For these vector ones, can't fold all cases, but at least
1645 do the most obvious one. Could do better here using
1646 summarise/desummarise of vector constants, but too
1647 difficult to verify; hence just handle the zero cases. */
1648 case Iop_32UtoV128
: {
1649 UInt u32
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U32
;
1651 e2
= IRExpr_Const(IRConst_V128(0x0000));
1657 case Iop_V128to64
: {
1658 UShort v128
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.V128
;
1659 if (0 == ((v128
>> 0) & 0xFF)) {
1660 e2
= IRExpr_Const(IRConst_U64(0));
1666 case Iop_V128HIto64
: {
1667 UShort v128
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.V128
;
1668 if (0 == ((v128
>> 8) & 0xFF)) {
1669 e2
= IRExpr_Const(IRConst_U64(0));
1675 case Iop_64UtoV128
: {
1676 ULong u64
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.U64
;
1678 e2
= IRExpr_Const(IRConst_V128(0x0000));
1685 /* Even stupider (although still correct ..) */
1686 case Iop_V256to64_0
: case Iop_V256to64_1
:
1687 case Iop_V256to64_2
: case Iop_V256to64_3
: {
1688 UInt v256
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.V256
;
1689 if (v256
== 0x00000000) {
1690 e2
= IRExpr_Const(IRConst_U64(0));
1698 case Iop_V256toV128_0
: case Iop_V256toV128_1
: {
1699 UInt v256
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.V256
;
1700 if (v256
== 0x00000000) {
1701 e2
= IRExpr_Const(IRConst_V128(0x0000));
1708 case Iop_ZeroHI64ofV128
: {
1709 /* Could do better here -- only need to look at the bottom 64 bits
1710 of the argument, really. */
1711 UShort v128
= e
->Iex
.Unop
.arg
->Iex
.Const
.con
->Ico
.V128
;
1712 if (v128
== 0x0000) {
1713 e2
= IRExpr_Const(IRConst_V128(0x0000));
1722 } // switch (e->Iex.Unop.op)
1726 /* other cases (identities, etc) */
1727 switch (e
->Iex
.Unop
.op
) {
1728 case Iop_PopCount64
: {
1729 // PopCount64( And64( Add64(x,-1), Not64(x) ) ) ==> CtzNat64(x)
1731 // a1:And64( a11:Add64(a111:x,a112:-1), a12:Not64(a121:x) )
1732 IRExpr
* a1
= chase(env
, e
->Iex
.Unop
.arg
);
1735 if (a1
->tag
!= Iex_Binop
|| a1
->Iex
.Binop
.op
!= Iop_And64
)
1737 // a1 is established
1738 IRExpr
* a11
= chase(env
, a1
->Iex
.Binop
.arg1
);
1741 if (a11
->tag
!= Iex_Binop
|| a11
->Iex
.Binop
.op
!= Iop_Add64
)
1743 // a11 is established
1744 IRExpr
* a12
= chase(env
, a1
->Iex
.Binop
.arg2
);
1747 if (a12
->tag
!= Iex_Unop
|| a12
->Iex
.Unop
.op
!= Iop_Not64
)
1749 // a12 is established
1750 IRExpr
* a111
= a11
->Iex
.Binop
.arg1
;
1751 IRExpr
* a112
= chase(env
, a11
->Iex
.Binop
.arg2
);
1752 IRExpr
* a121
= a12
->Iex
.Unop
.arg
;
1753 if (!a111
|| !a112
|| !a121
)
1755 // a111 and a121 need to be the same temp.
1756 if (!eqIRAtom(a111
, a121
))
1758 // Finally, a112 must be a 64-bit version of -1.
1761 // Match established. Transform.
1762 e2
= IRExpr_Unop(Iop_CtzNat64
, a111
);
1769 } // switch (e->Iex.Unop.op)
1771 } // if (e->Iex.Unop.arg->tag == Iex_Const)
1776 if (e
->Iex
.Binop
.arg1
->tag
== Iex_Const
1777 && e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
1778 /* cases where both args are consts */
1779 switch (e
->Iex
.Binop
.op
) {
1783 e2
= IRExpr_Const(IRConst_U8(toUChar(
1784 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
1785 | e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
))));
1788 e2
= IRExpr_Const(IRConst_U16(toUShort(
1789 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U16
1790 | e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U16
))));
1793 e2
= IRExpr_Const(IRConst_U32(
1794 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1795 | e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1798 e2
= IRExpr_Const(IRConst_U64(
1799 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1800 | e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1803 e2
= IRExpr_Const(IRConst_V128(
1804 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.V128
1805 | e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.V128
)));
1810 e2
= IRExpr_Const(IRConst_U8(toUChar(
1811 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
1812 ^ e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
))));
1815 e2
= IRExpr_Const(IRConst_U16(toUShort(
1816 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U16
1817 ^ e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U16
))));
1820 e2
= IRExpr_Const(IRConst_U32(
1821 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1822 ^ e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1825 e2
= IRExpr_Const(IRConst_U64(
1826 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1827 ^ e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1830 e2
= IRExpr_Const(IRConst_V128(
1831 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.V128
1832 ^ e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.V128
)));
1837 e2
= IRExpr_Const(IRConst_U1(toBool(
1838 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U1
1839 & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U1
))));
1842 e2
= IRExpr_Const(IRConst_U8(toUChar(
1843 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
1844 & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
))));
1847 e2
= IRExpr_Const(IRConst_U16(toUShort(
1848 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U16
1849 & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U16
))));
1852 e2
= IRExpr_Const(IRConst_U32(
1853 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1854 & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1857 e2
= IRExpr_Const(IRConst_U64(
1858 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1859 & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1862 e2
= IRExpr_Const(IRConst_V128(
1863 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.V128
1864 & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.V128
)));
1869 e2
= IRExpr_Const(IRConst_U8(toUChar(
1870 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
1871 + e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
))));
1874 e2
= IRExpr_Const(IRConst_U32(
1875 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1876 + e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1879 e2
= IRExpr_Const(IRConst_U64(
1880 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1881 + e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1886 e2
= IRExpr_Const(IRConst_U8(toUChar(
1887 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
1888 - e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
))));
1891 e2
= IRExpr_Const(IRConst_U32(
1892 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1893 - e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1896 e2
= IRExpr_Const(IRConst_U64(
1897 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1898 - e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1903 UInt u32a
= e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
;
1904 UInt u32b
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
;
1905 UInt res
= u32a
> u32b
? u32a
: u32b
;
1906 e2
= IRExpr_Const(IRConst_U32(res
));
1912 e2
= IRExpr_Const(IRConst_U32(
1913 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1914 * e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)));
1917 e2
= IRExpr_Const(IRConst_U64(
1918 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1919 * e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)));
1924 UInt u32a
= e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
;
1925 UInt u32b
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
;
1926 Int s32a
= (Int
)u32a
;
1927 Int s32b
= (Int
)u32b
;
1928 Long s64a
= (Long
)s32a
;
1929 Long s64b
= (Long
)s32b
;
1930 Long sres
= s64a
* s64b
;
1931 ULong ures
= (ULong
)sres
;
1932 e2
= IRExpr_Const(IRConst_U64(ures
));
1938 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1939 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1940 if (shift
>= 0 && shift
<= 31)
1941 e2
= IRExpr_Const(IRConst_U32(
1942 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
1946 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1947 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1948 if (shift
>= 0 && shift
<= 63)
1949 e2
= IRExpr_Const(IRConst_U64(
1950 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
1958 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1959 s32
= (Int
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
);
1960 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1961 if (shift
>= 0 && shift
<= 31) {
1962 s32
>>=/*signed*/ shift
;
1963 e2
= IRExpr_Const(IRConst_U32((UInt
)s32
));
1969 /*signed*/ Long s64
;
1970 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1971 s64
= (Long
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
);
1972 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1973 if (shift
>= 0 && shift
<= 63) {
1974 s64
>>=/*signed*/ shift
;
1975 e2
= IRExpr_Const(IRConst_U64((ULong
)s64
));
1983 /*unsigned*/ UInt u32
;
1984 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1985 u32
= (UInt
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
);
1986 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1987 if (shift
>= 0 && shift
<= 31) {
1988 u32
>>=/*unsigned*/ shift
;
1989 e2
= IRExpr_Const(IRConst_U32(u32
));
1995 /*unsigned*/ ULong u64
;
1996 vassert(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->tag
== Ico_U8
);
1997 u64
= (ULong
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
);
1998 shift
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
);
1999 if (shift
>= 0 && shift
<= 63) {
2000 u64
>>=/*unsigned*/ shift
;
2001 e2
= IRExpr_Const(IRConst_U64(u64
));
2008 e2
= IRExpr_Const(IRConst_U1(toBool(
2009 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
2010 == e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
))));
2013 e2
= IRExpr_Const(IRConst_U1(toBool(
2014 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
2015 == e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
))));
2022 e2
= IRExpr_Const(IRConst_U1(toBool(
2023 ((0xFF & e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U8
)
2024 != (0xFF & e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U8
)))));
2027 case Iop_CasCmpNE32
:
2028 case Iop_ExpCmpNE32
:
2029 e2
= IRExpr_Const(IRConst_U1(toBool(
2030 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
2031 != e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
))));
2034 case Iop_CasCmpNE64
:
2035 case Iop_ExpCmpNE64
:
2036 e2
= IRExpr_Const(IRConst_U1(toBool(
2037 (e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
2038 != e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
))));
2043 e2
= IRExpr_Const(IRConst_U1(toBool(
2044 ((UInt
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
)
2045 <= (UInt
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)))));
2048 e2
= IRExpr_Const(IRConst_U1(toBool(
2049 ((ULong
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
)
2050 <= (ULong
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)))));
2055 e2
= IRExpr_Const(IRConst_U1(toBool(
2056 ((Int
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
)
2057 <= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)))));
2060 e2
= IRExpr_Const(IRConst_U1(toBool(
2061 ((Long
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
)
2062 <= (Long
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)))));
2067 e2
= IRExpr_Const(IRConst_U1(toBool(
2068 ((Int
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
)
2069 < (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)))));
2072 e2
= IRExpr_Const(IRConst_U1(toBool(
2073 ((Long
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
)
2074 < (Long
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)))));
2079 e2
= IRExpr_Const(IRConst_U1(toBool(
2080 ((UInt
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
)
2081 < (UInt
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
)))));
2084 e2
= IRExpr_Const(IRConst_U1(toBool(
2085 ((ULong
)(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
)
2086 < (ULong
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
)))));
2090 case Iop_CmpORD32S
: {
2092 UInt u32a
= e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U32
;
2093 UInt u32b
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
;
2094 Int s32a
= (Int
)u32a
;
2095 Int s32b
= (Int
)u32b
;
2096 Int r
= 0x2; /* EQ */
2100 else if (s32a
> s32b
) {
2103 e2
= IRExpr_Const(IRConst_U32(r
));
2109 e2
= IRExpr_Const(IRConst_U64(
2110 (((ULong
)(e
->Iex
.Binop
.arg1
2111 ->Iex
.Const
.con
->Ico
.U32
)) << 32)
2112 | ((ULong
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
))
2116 /* We can't fold this, because there is no way to
2117 express he result in IR, but at least pretend to
2118 handle it, so as to stop getting blasted with
2119 no-rule-for-this-primop messages. */
2121 /* For this vector one, can't fold all cases, but at
2122 least do the most obvious one. Could do better here
2123 using summarise/desummarise of vector constants, but
2124 too difficult to verify; hence just handle the zero
2126 case Iop_64HLtoV128
: {
2127 ULong argHi
= e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.U64
;
2128 ULong argLo
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U64
;
2129 if (0 == argHi
&& 0 == argLo
) {
2130 e2
= IRExpr_Const(IRConst_V128(0));
2136 /* Same reasoning for the 256-bit version. */
2137 case Iop_V128HLtoV256
: {
2138 IRExpr
* argHi
= e
->Iex
.Binop
.arg1
;
2139 IRExpr
* argLo
= e
->Iex
.Binop
.arg2
;
2140 if (isZeroV128(argHi
) && isZeroV128(argLo
)) {
2141 e2
= IRExpr_Const(IRConst_V256(0));
2148 /* -- V128 stuff -- */
2149 case Iop_InterleaveLO64x2
: // This, and the HI64, are created
2150 case Iop_InterleaveHI64x2
: // by the amd64 PTEST translation
2151 case Iop_InterleaveLO8x16
: {
2152 /* This turns up a lot in Memcheck instrumentation of
2153 Icc generated code. I don't know why. */
2154 UShort arg1
= e
->Iex
.Binop
.arg1
->Iex
.Const
.con
->Ico
.V128
;
2155 UShort arg2
= e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.V128
;
2156 if (0 == arg1
&& 0 == arg2
) {
2157 e2
= IRExpr_Const(IRConst_V128(0));
2170 /* other cases (identities, etc) */
2171 switch (e
->Iex
.Binop
.op
) {
2177 /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
2178 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2179 e2
= e
->Iex
.Binop
.arg1
;
2182 /* Shl32/Shl64/Shr64(0,x) ==> 0 */
2183 if (isZeroU(e
->Iex
.Binop
.arg1
)) {
2184 e2
= e
->Iex
.Binop
.arg1
;
2191 /* Shr32/Sar32(x,0) ==> x */
2192 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2193 e2
= e
->Iex
.Binop
.arg1
;
2203 /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
2204 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2205 e2
= e
->Iex
.Binop
.arg1
;
2208 /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
2209 if (isZeroU(e
->Iex
.Binop
.arg1
)) {
2210 e2
= e
->Iex
.Binop
.arg2
;
2213 /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2214 /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2215 if (isOnesU(e
->Iex
.Binop
.arg1
) || isOnesU(e
->Iex
.Binop
.arg2
)) {
2216 e2
= mkOnesOfPrimopResultType(e
->Iex
.Binop
.op
);
2219 /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2220 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2221 e2
= e
->Iex
.Binop
.arg1
;
2227 /* Add8(t,t) ==> t << 1.
2228 Memcheck doesn't understand that
2229 x+x produces a defined least significant bit, and it seems
2230 simplest just to get rid of the problem by rewriting it
2231 out, since the opportunity to do so exists. */
2232 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2233 e2
= IRExpr_Binop(Iop_Shl8
, e
->Iex
.Binop
.arg1
,
2234 IRExpr_Const(IRConst_U8(1)));
2239 /* NB no Add16(t,t) case yet as no known test case exists */
2243 /* Add32/Add64(x,0) ==> x */
2244 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2245 e2
= e
->Iex
.Binop
.arg1
;
2248 /* Add32/Add64(0,x) ==> x */
2249 if (isZeroU(e
->Iex
.Binop
.arg1
)) {
2250 e2
= e
->Iex
.Binop
.arg2
;
2253 /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2254 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2256 e
->Iex
.Binop
.op
== Iop_Add32
? Iop_Shl32
: Iop_Shl64
,
2257 e
->Iex
.Binop
.arg1
, IRExpr_Const(IRConst_U8(1)));
2264 /* Sub32/Sub64(x,0) ==> x */
2265 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2266 e2
= e
->Iex
.Binop
.arg1
;
2269 /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
2270 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2271 e2
= mkZeroOfPrimopResultType(e
->Iex
.Binop
.op
);
2276 /* Sub8x16(x,0) ==> x */
2277 if (isZeroV128(e
->Iex
.Binop
.arg2
)) {
2278 e2
= e
->Iex
.Binop
.arg1
;
2288 /* And1/And8/And16/And32/And64(x,1---1b) ==> x */
2289 if (isOnesU(e
->Iex
.Binop
.arg2
)) {
2290 e2
= e
->Iex
.Binop
.arg1
;
2293 /* And1/And8/And16/And32/And64(1---1b,x) ==> x */
2294 if (isOnesU(e
->Iex
.Binop
.arg1
)) {
2295 e2
= e
->Iex
.Binop
.arg2
;
2298 /* And1/And8/And16/And32/And64(x,0) ==> 0 */
2299 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2300 e2
= e
->Iex
.Binop
.arg2
;
2303 /* And1/And8/And16/And32/And64(0,x) ==> 0 */
2304 if (isZeroU(e
->Iex
.Binop
.arg1
)) {
2305 e2
= e
->Iex
.Binop
.arg1
;
2308 /* And1/And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
2309 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2310 e2
= e
->Iex
.Binop
.arg1
;
2317 /* And8/And16/AndV128/AndV256(t,t)
2318 ==> t, for some IRTemp t */
2319 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2320 e2
= e
->Iex
.Binop
.arg1
;
2323 /* Deal with either arg zero. Could handle other And
2325 if (e
->Iex
.Binop
.op
== Iop_AndV256
2326 && (isZeroV256(e
->Iex
.Binop
.arg1
)
2327 || isZeroV256(e
->Iex
.Binop
.arg2
))) {
2328 e2
= mkZeroOfPrimopResultType(e
->Iex
.Binop
.op
);
2330 } else if (e
->Iex
.Binop
.op
== Iop_AndV128
2331 && (isZeroV128(e
->Iex
.Binop
.arg1
)
2332 || isZeroV128(e
->Iex
.Binop
.arg2
))) {
2333 e2
= mkZeroOfPrimopResultType(e
->Iex
.Binop
.op
);
2336 /* AndV128(t,1...1) ==> t. The amd64 front end generates these
2337 for *CMP{P,S}{S,D} etc. */
2338 if (e
->Iex
.Binop
.op
== Iop_AndV128
) {
2339 if (isOnesV128(e
->Iex
.Binop
.arg2
)) {
2340 e2
= e
->Iex
.Binop
.arg1
;
2344 /* AndV128( x, NotV128( x ) ) ==> 0...0 and
2345 AndV256( x, NotV256( x ) ) ==> 0...0
2346 This is generated by amd64 `ptest %xmmReg, %xmmReg`
2347 (the same reg both times)
2348 See https://bugzilla.redhat.com/show_bug.cgi?id=2257546 */
2349 if (e
->Iex
.Binop
.op
== Iop_AndV128
2350 || e
->Iex
.Binop
.op
== Iop_AndV256
) {
2351 Bool isV256
= e
->Iex
.Binop
.op
== Iop_AndV256
;
2352 IRExpr
* x1
= chase(env
, e
->Iex
.Binop
.arg1
);
2353 IRExpr
* rhs
= chase(env
, e
->Iex
.Binop
.arg2
);
2355 && rhs
->tag
== Iex_Unop
2356 && rhs
->Iex
.Unop
.op
== (isV256
? Iop_NotV256
2358 IRExpr
* x2
= chase(env
, rhs
->Iex
.Unop
.arg
);
2359 if (x2
&& sameIRExprs(env
, x1
, x2
)) {
2360 e2
= mkZeroOfPrimopResultType(e
->Iex
.Binop
.op
);
2369 /* V128/V256(t,t) ==> t, for some IRTemp t */
2370 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2371 e2
= e
->Iex
.Binop
.arg1
;
2374 /* OrV128(t,0) ==> t */
2375 if (e
->Iex
.Binop
.op
== Iop_OrV128
) {
2376 if (isZeroV128(e
->Iex
.Binop
.arg2
)) {
2377 e2
= e
->Iex
.Binop
.arg1
;
2380 if (isZeroV128(e
->Iex
.Binop
.arg1
)) {
2381 e2
= e
->Iex
.Binop
.arg2
;
2385 /* OrV256(t,0) ==> t */
2386 if (e
->Iex
.Binop
.op
== Iop_OrV256
) {
2387 if (isZeroV256(e
->Iex
.Binop
.arg2
)) {
2388 e2
= e
->Iex
.Binop
.arg1
;
2391 //Disabled because there's no known test case right now.
2392 //if (isZeroV256(e->Iex.Binop.arg1)) {
2393 // e2 = e->Iex.Binop.arg2;
2405 /* Xor8/16/32/64/V128/V256(t,t) ==> 0, for some IRTemp t */
2406 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2407 e2
= mkZeroOfPrimopResultType(e
->Iex
.Binop
.op
);
2410 /* XorV128(t,0) ==> t */
2411 if (e
->Iex
.Binop
.op
== Iop_XorV128
) {
2412 if (isZeroV128(e
->Iex
.Binop
.arg2
)) {
2413 e2
= e
->Iex
.Binop
.arg1
;
2416 //Disabled because there's no known test case right now.
2417 //if (isZeroV128(e->Iex.Binop.arg1)) {
2418 // e2 = e->Iex.Binop.arg2;
2422 /* Xor8/16/32/64(0,t) ==> t */
2423 if (isZeroU(e
->Iex
.Binop
.arg1
)) {
2424 e2
= e
->Iex
.Binop
.arg2
;
2427 /* Xor8/16/32/64(t,0) ==> t */
2428 if (isZeroU(e
->Iex
.Binop
.arg2
)) {
2429 e2
= e
->Iex
.Binop
.arg1
;
2436 /* CmpNE32(t,t) ==> 0, for some IRTemp t */
2437 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2438 e2
= mkZeroOfPrimopResultType(e
->Iex
.Binop
.op
);
2441 /* CmpNE32(1Uto32(b), 0) ==> b */
2442 if (isZeroU32(e
->Iex
.Binop
.arg2
)) {
2443 IRExpr
* a1
= chase(env
, e
->Iex
.Binop
.arg1
);
2444 if (a1
&& a1
->tag
== Iex_Unop
2445 && a1
->Iex
.Unop
.op
== Iop_1Uto32
) {
2446 e2
= a1
->Iex
.Unop
.arg
;
2459 // in total 128 bits
2464 // in total 256 bits
2466 case Iop_CmpEQ16x16
:
2469 if (sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2470 e2
= mkOnesOfPrimopResultType(e
->Iex
.Binop
.op
);
2483 /* is the discriminant is a constant? */
2484 if (e
->Iex
.ITE
.cond
->tag
== Iex_Const
) {
2485 /* assured us by the IR type rules */
2486 vassert(e
->Iex
.ITE
.cond
->Iex
.Const
.con
->tag
== Ico_U1
);
2487 e2
= e
->Iex
.ITE
.cond
->Iex
.Const
.con
->Ico
.U1
2488 ? e
->Iex
.ITE
.iftrue
: e
->Iex
.ITE
.iffalse
;
2491 /* are the arms identical? (pretty weedy test) */
2492 if (sameIRExprs(env
, e
->Iex
.ITE
.iftrue
,
2493 e
->Iex
.ITE
.iffalse
)) {
2494 e2
= e
->Iex
.ITE
.iffalse
;
2499 /* not considered */
2503 /* Show cases where we've found but not folded 'op(t,t)'. Be
2504 careful not to call sameIRExprs with values of different types,
2505 though, else it will assert (and so it should!). We can't
2506 conveniently call typeOfIRExpr on the two args without a whole
2507 bunch of extra plumbing to pass in a type env, so just use a
2508 hacky test to check the arguments are not anything that might
2509 sameIRExprs to assert. This is only OK because this kludge is
2510 only used for debug printing, not for "real" operation. For
2511 "real" operation (ie, all other calls to sameIRExprs), it is
2512 essential that the to args have the same type.
2514 The "right" solution is to plumb the containing block's
2515 IRTypeEnv through to here and use typeOfIRExpr to be sure. But
2516 that's a bunch of extra parameter passing which will just slow
2517 down the normal case, for no purpose. */
2518 if (vex_control
.iropt_verbosity
> 0
2520 && e
->tag
== Iex_Binop
2521 && !debug_only_hack_sameIRExprs_might_assert(e
->Iex
.Binop
.arg1
,
2523 && sameIRExprs(env
, e
->Iex
.Binop
.arg1
, e
->Iex
.Binop
.arg2
)) {
2524 vex_printf("vex iropt: fold_Expr_WRK: no ident rule for: ");
2529 /* Show the overall results of folding. */
2530 if (DEBUG_IROPT
&& e2
!= e
) {
2531 vex_printf("FOLD: ");
2532 ppIRExpr(e
); vex_printf(" -> ");
2533 ppIRExpr(e2
); vex_printf("\n");
2542 vpanic("fold_Expr: no rule for the above");
2544 if (vex_control
.iropt_verbosity
> 0) {
2545 vex_printf("vex iropt: fold_Expr_WRK: no const rule for: ");
2553 /* Fold |e| as much as possible, given the bindings in |env|. If no folding is
2554 possible, just return |e|. Also, if |env| is NULL, don't even try to
2555 fold; just return |e| directly. */
2557 static IRExpr
* fold_Expr ( IRExpr
** env
, IRExpr
* e
)
2559 return env
== NULL
? e
: fold_Expr_WRK(env
, e
);
2562 /* Apply the subst to a simple 1-level expression -- guaranteed to be
2563 1-level due to previous flattening pass. */
2565 static IRExpr
* subst_Expr ( IRExpr
** env
, IRExpr
* ex
)
2569 if (env
[(Int
)ex
->Iex
.RdTmp
.tmp
] != NULL
) {
2570 IRExpr
*rhs
= env
[(Int
)ex
->Iex
.RdTmp
.tmp
];
2571 if (rhs
->tag
== Iex_RdTmp
)
2573 if (rhs
->tag
== Iex_Const
2574 && rhs
->Iex
.Const
.con
->tag
!= Ico_F64i
)
2577 /* not bound in env */
2585 vassert(isIRAtom(ex
->Iex
.GetI
.ix
));
2588 subst_Expr(env
, ex
->Iex
.GetI
.ix
),
2593 IRQop
* qop
= ex
->Iex
.Qop
.details
;
2594 vassert(isIRAtom(qop
->arg1
));
2595 vassert(isIRAtom(qop
->arg2
));
2596 vassert(isIRAtom(qop
->arg3
));
2597 vassert(isIRAtom(qop
->arg4
));
2600 subst_Expr(env
, qop
->arg1
),
2601 subst_Expr(env
, qop
->arg2
),
2602 subst_Expr(env
, qop
->arg3
),
2603 subst_Expr(env
, qop
->arg4
)
2608 IRTriop
* triop
= ex
->Iex
.Triop
.details
;
2609 vassert(isIRAtom(triop
->arg1
));
2610 vassert(isIRAtom(triop
->arg2
));
2611 vassert(isIRAtom(triop
->arg3
));
2612 return IRExpr_Triop(
2614 subst_Expr(env
, triop
->arg1
),
2615 subst_Expr(env
, triop
->arg2
),
2616 subst_Expr(env
, triop
->arg3
)
2621 vassert(isIRAtom(ex
->Iex
.Binop
.arg1
));
2622 vassert(isIRAtom(ex
->Iex
.Binop
.arg2
));
2623 return IRExpr_Binop(
2625 subst_Expr(env
, ex
->Iex
.Binop
.arg1
),
2626 subst_Expr(env
, ex
->Iex
.Binop
.arg2
)
2630 vassert(isIRAtom(ex
->Iex
.Unop
.arg
));
2633 subst_Expr(env
, ex
->Iex
.Unop
.arg
)
2637 vassert(isIRAtom(ex
->Iex
.Load
.addr
));
2641 subst_Expr(env
, ex
->Iex
.Load
.addr
)
2646 IRExpr
** args2
= shallowCopyIRExprVec(ex
->Iex
.CCall
.args
);
2647 for (i
= 0; args2
[i
]; i
++) {
2648 vassert(isIRAtom(args2
[i
]));
2649 args2
[i
] = subst_Expr(env
, args2
[i
]);
2651 return IRExpr_CCall(
2653 ex
->Iex
.CCall
.retty
,
2659 vassert(isIRAtom(ex
->Iex
.ITE
.cond
));
2660 vassert(isIRAtom(ex
->Iex
.ITE
.iftrue
));
2661 vassert(isIRAtom(ex
->Iex
.ITE
.iffalse
));
2663 subst_Expr(env
, ex
->Iex
.ITE
.cond
),
2664 subst_Expr(env
, ex
->Iex
.ITE
.iftrue
),
2665 subst_Expr(env
, ex
->Iex
.ITE
.iffalse
)
2669 vex_printf("\n\n"); ppIRExpr(ex
);
2670 vpanic("subst_Expr");
2676 /* Apply the subst to stmt, then, if |doFolding| is |True|, fold the result as
2677 much as possible. Much simplified due to stmt being previously flattened.
2678 As a result of this, the stmt may wind up being turned into a no-op.
2680 static IRStmt
* subst_and_maybe_fold_Stmt ( Bool doFolding
,
2681 IRExpr
** env
, IRStmt
* st
)
2684 vex_printf("\nsubst and maybe fold stmt\n");
2689 IRExpr
** s_env
= env
;
2690 IRExpr
** f_env
= doFolding
? env
: NULL
;
2694 vassert(isIRAtom(st
->Ist
.AbiHint
.base
));
2695 vassert(isIRAtom(st
->Ist
.AbiHint
.nia
));
2696 return IRStmt_AbiHint(
2697 fold_Expr(f_env
, subst_Expr(s_env
, st
->Ist
.AbiHint
.base
)),
2698 st
->Ist
.AbiHint
.len
,
2699 fold_Expr(f_env
, subst_Expr(s_env
, st
->Ist
.AbiHint
.nia
))
2702 vassert(isIRAtom(st
->Ist
.Put
.data
));
2705 fold_Expr(f_env
, subst_Expr(s_env
, st
->Ist
.Put
.data
))
2709 IRPutI
*puti
, *puti2
;
2710 puti
= st
->Ist
.PutI
.details
;
2711 vassert(isIRAtom(puti
->ix
));
2712 vassert(isIRAtom(puti
->data
));
2713 puti2
= mkIRPutI(puti
->descr
,
2714 fold_Expr(f_env
, subst_Expr(s_env
, puti
->ix
)),
2716 fold_Expr(f_env
, subst_Expr(s_env
, puti
->data
)));
2717 return IRStmt_PutI(puti2
);
2721 /* This is the one place where an expr (st->Ist.WrTmp.data) is
2722 allowed to be more than just a constant or a tmp. */
2723 return IRStmt_WrTmp(
2725 fold_Expr(f_env
, subst_Expr(s_env
, st
->Ist
.WrTmp
.data
))
2729 vassert(isIRAtom(st
->Ist
.Store
.addr
));
2730 vassert(isIRAtom(st
->Ist
.Store
.data
));
2731 return IRStmt_Store(
2733 fold_Expr(f_env
, subst_Expr(s_env
, st
->Ist
.Store
.addr
)),
2734 fold_Expr(f_env
, subst_Expr(s_env
, st
->Ist
.Store
.data
))
2738 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
2739 vassert(isIRAtom(sg
->addr
));
2740 vassert(isIRAtom(sg
->data
));
2741 vassert(isIRAtom(sg
->guard
));
2742 IRExpr
* faddr
= fold_Expr(f_env
, subst_Expr(s_env
, sg
->addr
));
2743 IRExpr
* fdata
= fold_Expr(f_env
, subst_Expr(s_env
, sg
->data
));
2744 IRExpr
* fguard
= fold_Expr(f_env
, subst_Expr(s_env
, sg
->guard
));
2745 if (fguard
->tag
== Iex_Const
) {
2746 /* The condition on this store has folded down to a constant. */
2747 vassert(fguard
->Iex
.Const
.con
->tag
== Ico_U1
);
2748 if (fguard
->Iex
.Const
.con
->Ico
.U1
== False
) {
2749 return IRStmt_NoOp();
2751 vassert(fguard
->Iex
.Const
.con
->Ico
.U1
== True
);
2752 return IRStmt_Store(sg
->end
, faddr
, fdata
);
2755 return IRStmt_StoreG(sg
->end
, faddr
, fdata
, fguard
);
2759 /* This is complicated. If the guard folds down to 'false',
2760 we can replace it with an assignment 'dst := alt', but if
2761 the guard folds down to 'true', we can't conveniently
2762 replace it with an unconditional load, because doing so
2763 requires generating a new temporary, and that is not easy
2764 to do at this point. */
2765 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
2766 vassert(isIRAtom(lg
->addr
));
2767 vassert(isIRAtom(lg
->alt
));
2768 vassert(isIRAtom(lg
->guard
));
2769 IRExpr
* faddr
= fold_Expr(f_env
, subst_Expr(s_env
, lg
->addr
));
2770 IRExpr
* falt
= fold_Expr(f_env
, subst_Expr(s_env
, lg
->alt
));
2771 IRExpr
* fguard
= fold_Expr(f_env
, subst_Expr(s_env
, lg
->guard
));
2772 if (fguard
->tag
== Iex_Const
) {
2773 /* The condition on this load has folded down to a constant. */
2774 vassert(fguard
->Iex
.Const
.con
->tag
== Ico_U1
);
2775 if (fguard
->Iex
.Const
.con
->Ico
.U1
== False
) {
2776 /* The load is not going to happen -- instead 'alt' is
2777 assigned to 'dst'. */
2778 return IRStmt_WrTmp(lg
->dst
, falt
);
2780 vassert(fguard
->Iex
.Const
.con
->Ico
.U1
== True
);
2781 /* The load is always going to happen. We want to
2782 convert to an unconditional load and assign to 'dst'
2783 (IRStmt_WrTmp). Problem is we need an extra temp to
2784 hold the loaded value, but none is available.
2785 Instead, reconstitute the conditional load (with
2786 folded args, of course) and let the caller of this
2787 routine deal with the problem. */
2790 return IRStmt_LoadG(lg
->end
, lg
->cvt
, lg
->dst
, faddr
, falt
, fguard
);
2795 cas
= st
->Ist
.CAS
.details
;
2796 vassert(isIRAtom(cas
->addr
));
2797 vassert(cas
->expdHi
== NULL
|| isIRAtom(cas
->expdHi
));
2798 vassert(isIRAtom(cas
->expdLo
));
2799 vassert(cas
->dataHi
== NULL
|| isIRAtom(cas
->dataHi
));
2800 vassert(isIRAtom(cas
->dataLo
));
2802 cas
->oldHi
, cas
->oldLo
, cas
->end
,
2803 fold_Expr(f_env
, subst_Expr(s_env
, cas
->addr
)),
2804 cas
->expdHi
? fold_Expr(f_env
,
2805 subst_Expr(s_env
, cas
->expdHi
))
2807 fold_Expr(f_env
, subst_Expr(s_env
, cas
->expdLo
)),
2808 cas
->dataHi
? fold_Expr(f_env
,
2809 subst_Expr(s_env
, cas
->dataHi
))
2811 fold_Expr(f_env
, subst_Expr(s_env
, cas
->dataLo
))
2813 return IRStmt_CAS(cas2
);
2817 vassert(isIRAtom(st
->Ist
.LLSC
.addr
));
2818 if (st
->Ist
.LLSC
.storedata
)
2819 vassert(isIRAtom(st
->Ist
.LLSC
.storedata
));
2822 st
->Ist
.LLSC
.result
,
2823 fold_Expr(f_env
, subst_Expr(s_env
, st
->Ist
.LLSC
.addr
)),
2824 st
->Ist
.LLSC
.storedata
2826 subst_Expr(s_env
, st
->Ist
.LLSC
.storedata
))
2833 d
= st
->Ist
.Dirty
.details
;
2834 d2
= emptyIRDirty();
2836 d2
->args
= shallowCopyIRExprVec(d2
->args
);
2837 if (d2
->mFx
!= Ifx_None
) {
2838 vassert(isIRAtom(d2
->mAddr
));
2839 d2
->mAddr
= fold_Expr(f_env
, subst_Expr(s_env
, d2
->mAddr
));
2841 vassert(isIRAtom(d2
->guard
));
2842 d2
->guard
= fold_Expr(f_env
, subst_Expr(s_env
, d2
->guard
));
2843 for (i
= 0; d2
->args
[i
]; i
++) {
2844 IRExpr
* arg
= d2
->args
[i
];
2845 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
))) {
2846 vassert(isIRAtom(arg
));
2847 d2
->args
[i
] = fold_Expr(f_env
, subst_Expr(s_env
, arg
));
2850 return IRStmt_Dirty(d2
);
2854 return IRStmt_IMark(st
->Ist
.IMark
.addr
,
2856 st
->Ist
.IMark
.delta
);
2859 return IRStmt_NoOp();
2862 return IRStmt_MBE(st
->Ist
.MBE
.event
);
2866 vassert(isIRAtom(st
->Ist
.Exit
.guard
));
2867 fcond
= fold_Expr(f_env
, subst_Expr(s_env
, st
->Ist
.Exit
.guard
));
2868 if (fcond
->tag
== Iex_Const
) {
2869 /* Interesting. The condition on this exit has folded down to
2871 vassert(fcond
->Iex
.Const
.con
->tag
== Ico_U1
);
2872 if (fcond
->Iex
.Const
.con
->Ico
.U1
== False
) {
2873 /* exit is never going to happen, so dump the statement. */
2874 return IRStmt_NoOp();
2876 vassert(fcond
->Iex
.Const
.con
->Ico
.U1
== True
);
2877 /* Hmmm. The exit has become unconditional. Leave it
2878 as it is for now, since we'd have to truncate the BB
2879 at this point, which is tricky. Such truncation is
2880 done later by the dead-code elimination pass. */
2881 /* fall out into the reconstruct-the-exit code. */
2882 if (vex_control
.iropt_verbosity
> 0)
2883 /* really a misuse of vex_control.iropt_verbosity */
2884 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2887 return IRStmt_Exit(fcond
, st
->Ist
.Exit
.jk
,
2888 st
->Ist
.Exit
.dst
, st
->Ist
.Exit
.offsIP
);
2892 vex_printf("\n"); ppIRStmt(st
);
2893 vpanic("subst_and_fold_Stmt");
2898 __attribute__((noinline
))
2899 static IRSB
* cprop_BB_WRK ( IRSB
* in
, Bool mustRetainNoOps
, Bool doFolding
)
2904 Int n_tmps
= in
->tyenv
->types_used
;
2905 IRExpr
** env
= LibVEX_Alloc_inline(n_tmps
* sizeof(IRExpr
*));
2906 /* Keep track of IRStmt_LoadGs that we need to revisit after
2907 processing all the other statements. */
2908 const Int N_FIXUPS
= 16;
2909 Int fixups
[N_FIXUPS
]; /* indices in the stmt array of 'out' */
2913 out
->tyenv
= deepCopyIRTypeEnv( in
->tyenv
);
2915 /* Set up the env with which travels forward. This holds a
2916 substitution, mapping IRTemps to IRExprs. The environment
2917 is to be applied as we move along. Keys are IRTemps.
2918 Values are IRExpr*s.
2920 for (i
= 0; i
< n_tmps
; i
++)
2923 /* For each original SSA-form stmt ... */
2924 for (i
= 0; i
< in
->stmts_used
; i
++) {
2926 /* First apply the substitution to the current stmt. This
2927 propagates in any constants and tmp-tmp assignments
2928 accumulated prior to this point. As part of the subst_Stmt
2929 call, also then fold any constant expressions resulting. */
2933 /* perhaps st2 is already a no-op? */
2934 if (st2
->tag
== Ist_NoOp
&& !mustRetainNoOps
) continue;
2936 st2
= subst_and_maybe_fold_Stmt( doFolding
, env
, st2
);
2938 /* Deal with some post-folding special cases. */
2941 /* If the statement has been folded into a no-op, forget
2944 if (mustRetainNoOps
) {
2950 /* If the statement assigns to an IRTemp add it to the
2951 running environment. This is for the benefit of copy
2952 propagation and to allow sameIRExpr look through
2955 vassert(env
[(Int
)(st2
->Ist
.WrTmp
.tmp
)] == NULL
);
2956 env
[(Int
)(st2
->Ist
.WrTmp
.tmp
)] = st2
->Ist
.WrTmp
.data
;
2958 /* 't1 = t2' -- don't add to BB; will be optimized out */
2959 if (st2
->Ist
.WrTmp
.data
->tag
== Iex_RdTmp
)
2962 /* 't = const' && 'const != F64i' -- don't add to BB
2963 Note, we choose not to propagate const when const is an
2964 F64i, so that F64i literals can be CSE'd later. This
2965 helps x86 floating point code generation. */
2966 if (st2
->Ist
.WrTmp
.data
->tag
== Iex_Const
2967 && st2
->Ist
.WrTmp
.data
->Iex
.Const
.con
->tag
!= Ico_F64i
) {
2970 /* else add it to the output, as normal */
2975 IRLoadG
* lg
= st2
->Ist
.LoadG
.details
;
2976 IRExpr
* guard
= lg
->guard
;
2977 if (guard
->tag
== Iex_Const
) {
2978 /* The guard has folded to a constant, and that
2979 constant must be 1:I1, since subst_and_maybe_fold_Stmt
2980 folds out the case 0:I1 by itself. */
2981 vassert(guard
->Iex
.Const
.con
->tag
== Ico_U1
);
2982 vassert(guard
->Iex
.Const
.con
->Ico
.U1
== True
);
2983 /* Add a NoOp here as a placeholder, and make a note of
2984 where it is in the output block. Afterwards we'll
2985 come back here and transform the NoOp and the LoadG
2986 into a load-convert pair. The fixups[] entry
2987 refers to the inserted NoOp, and we expect to find
2988 the relevant LoadG immediately after it. */
2989 vassert(n_fixups
>= 0 && n_fixups
<= N_FIXUPS
);
2990 if (n_fixups
< N_FIXUPS
) {
2991 fixups
[n_fixups
++] = out
->stmts_used
;
2992 addStmtToIRSB( out
, IRStmt_NoOp() );
2995 /* And always add the LoadG to the output, regardless. */
3003 /* Not interesting, copy st2 into the output block. */
3004 addStmtToIRSB( out
, st2
);
3008 vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n",
3009 invocation_count
, recursion_count
, success_count
,
3010 recursion_success_count
, max_nodes_visited
);
3013 out
->next
= subst_Expr( env
, in
->next
);
3014 out
->jumpkind
= in
->jumpkind
;
3015 out
->offsIP
= in
->offsIP
;
3017 /* Process any leftover unconditional LoadGs that we noticed
3018 in the main pass. */
3019 vassert(n_fixups
>= 0 && n_fixups
<= N_FIXUPS
);
3020 for (i
= 0; i
< n_fixups
; i
++) {
3022 /* Carefully verify that the LoadG has the expected form. */
3023 vassert(ix
>= 0 && ix
+1 < out
->stmts_used
);
3024 IRStmt
* nop
= out
->stmts
[ix
];
3025 IRStmt
* lgu
= out
->stmts
[ix
+1];
3026 vassert(nop
->tag
== Ist_NoOp
);
3027 vassert(lgu
->tag
== Ist_LoadG
);
3028 IRLoadG
* lg
= lgu
->Ist
.LoadG
.details
;
3029 IRExpr
* guard
= lg
->guard
;
3030 vassert(guard
->Iex
.Const
.con
->tag
== Ico_U1
);
3031 vassert(guard
->Iex
.Const
.con
->Ico
.U1
== True
);
3032 /* Figure out the load and result types, and the implied
3033 conversion operation. */
3034 IRType cvtRes
= Ity_INVALID
, cvtArg
= Ity_INVALID
;
3035 typeOfIRLoadGOp(lg
->cvt
, &cvtRes
, &cvtArg
);
3036 IROp cvtOp
= Iop_INVALID
;
3038 case ILGop_IdentV128
:
3040 case ILGop_Ident32
: break;
3041 case ILGop_8Uto32
: cvtOp
= Iop_8Uto32
; break;
3042 case ILGop_8Sto32
: cvtOp
= Iop_8Sto32
; break;
3043 case ILGop_16Uto32
: cvtOp
= Iop_16Uto32
; break;
3044 case ILGop_16Sto32
: cvtOp
= Iop_16Sto32
; break;
3045 default: vpanic("cprop_BB: unhandled ILGOp");
3047 /* Replace the placeholder NoOp by the required unconditional
3049 IRTemp tLoaded
= newIRTemp(out
->tyenv
, cvtArg
);
3051 = IRStmt_WrTmp(tLoaded
,
3052 IRExpr_Load(lg
->end
, cvtArg
, lg
->addr
));
3053 /* Replace the LoadG by a conversion from the loaded value's
3054 type to the required result type. */
3057 lg
->dst
, cvtOp
== Iop_INVALID
3058 ? IRExpr_RdTmp(tLoaded
)
3059 : IRExpr_Unop(cvtOp
, IRExpr_RdTmp(tLoaded
)));
3066 IRSB
* cprop_BB ( IRSB
* in
) {
3067 return cprop_BB_WRK(in
, /*mustRetainNoOps=*/False
, /*doFolding=*/True
);
3071 /*---------------------------------------------------------------*/
3072 /*--- Dead code (t = E) removal ---*/
3073 /*---------------------------------------------------------------*/
3075 /* As a side effect, also removes all code following an unconditional
3078 /* The type of the HashHW map is: a map from IRTemp to nothing
3079 -- really just operating a set or IRTemps.
3083 static void addUses_Temp ( Bool
* set
, IRTemp tmp
)
3085 set
[(Int
)tmp
] = True
;
3088 static void addUses_Expr ( Bool
* set
, IRExpr
* e
)
3093 addUses_Expr(set
, e
->Iex
.GetI
.ix
);
3096 addUses_Expr(set
, e
->Iex
.ITE
.cond
);
3097 addUses_Expr(set
, e
->Iex
.ITE
.iftrue
);
3098 addUses_Expr(set
, e
->Iex
.ITE
.iffalse
);
3101 for (i
= 0; e
->Iex
.CCall
.args
[i
]; i
++)
3102 addUses_Expr(set
, e
->Iex
.CCall
.args
[i
]);
3105 addUses_Expr(set
, e
->Iex
.Load
.addr
);
3108 addUses_Expr(set
, e
->Iex
.Qop
.details
->arg1
);
3109 addUses_Expr(set
, e
->Iex
.Qop
.details
->arg2
);
3110 addUses_Expr(set
, e
->Iex
.Qop
.details
->arg3
);
3111 addUses_Expr(set
, e
->Iex
.Qop
.details
->arg4
);
3114 addUses_Expr(set
, e
->Iex
.Triop
.details
->arg1
);
3115 addUses_Expr(set
, e
->Iex
.Triop
.details
->arg2
);
3116 addUses_Expr(set
, e
->Iex
.Triop
.details
->arg3
);
3119 addUses_Expr(set
, e
->Iex
.Binop
.arg1
);
3120 addUses_Expr(set
, e
->Iex
.Binop
.arg2
);
3123 addUses_Expr(set
, e
->Iex
.Unop
.arg
);
3126 addUses_Temp(set
, e
->Iex
.RdTmp
.tmp
);
3134 vpanic("addUses_Expr");
3138 static void addUses_Stmt ( Bool
* set
, IRStmt
* st
)
3145 addUses_Expr(set
, st
->Ist
.AbiHint
.base
);
3146 addUses_Expr(set
, st
->Ist
.AbiHint
.nia
);
3149 addUses_Expr(set
, st
->Ist
.PutI
.details
->ix
);
3150 addUses_Expr(set
, st
->Ist
.PutI
.details
->data
);
3153 addUses_Expr(set
, st
->Ist
.WrTmp
.data
);
3156 addUses_Expr(set
, st
->Ist
.Put
.data
);
3159 addUses_Expr(set
, st
->Ist
.Store
.addr
);
3160 addUses_Expr(set
, st
->Ist
.Store
.data
);
3163 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
3164 addUses_Expr(set
, sg
->addr
);
3165 addUses_Expr(set
, sg
->data
);
3166 addUses_Expr(set
, sg
->guard
);
3170 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
3171 addUses_Expr(set
, lg
->addr
);
3172 addUses_Expr(set
, lg
->alt
);
3173 addUses_Expr(set
, lg
->guard
);
3177 cas
= st
->Ist
.CAS
.details
;
3178 addUses_Expr(set
, cas
->addr
);
3180 addUses_Expr(set
, cas
->expdHi
);
3181 addUses_Expr(set
, cas
->expdLo
);
3183 addUses_Expr(set
, cas
->dataHi
);
3184 addUses_Expr(set
, cas
->dataLo
);
3187 addUses_Expr(set
, st
->Ist
.LLSC
.addr
);
3188 if (st
->Ist
.LLSC
.storedata
)
3189 addUses_Expr(set
, st
->Ist
.LLSC
.storedata
);
3192 d
= st
->Ist
.Dirty
.details
;
3193 if (d
->mFx
!= Ifx_None
)
3194 addUses_Expr(set
, d
->mAddr
);
3195 addUses_Expr(set
, d
->guard
);
3196 for (i
= 0; d
->args
[i
] != NULL
; i
++) {
3197 IRExpr
* arg
= d
->args
[i
];
3198 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
3199 addUses_Expr(set
, arg
);
3207 addUses_Expr(set
, st
->Ist
.Exit
.guard
);
3212 vpanic("addUses_Stmt");
3217 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
3218 static Bool
isZeroU1 ( IRExpr
* e
)
3220 return toBool( e
->tag
== Iex_Const
3221 && e
->Iex
.Const
.con
->tag
== Ico_U1
3222 && e
->Iex
.Const
.con
->Ico
.U1
== False
);
3225 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
3226 static Bool
isOneU1 ( IRExpr
* e
)
3228 return toBool( e
->tag
== Iex_Const
3229 && e
->Iex
.Const
.con
->tag
== Ico_U1
3230 && e
->Iex
.Const
.con
->Ico
.U1
== True
);
3234 /* Note, this destructively modifies the given IRSB. */
3236 /* Scan backwards through statements, carrying a set of IRTemps which
3237 are known to be used after the current point. On encountering 't =
3238 E', delete the binding if it is not used. Otherwise, add any temp
3239 uses to the set and keep on moving backwards.
3241 As an enhancement, the first (backwards) pass searches for IR exits
3242 with always-taken conditions and notes the location of the earliest
3243 one in the block. If any such are found, a second pass copies the
3244 exit destination and jump kind to the bb-end. Then, the exit and
3245 all statements following it are turned into no-ops.
3248 /* notstatic */ void do_deadcode_BB ( IRSB
* bb
)
3250 Int i
, i_unconditional_exit
;
3251 Int n_tmps
= bb
->tyenv
->types_used
;
3252 Bool
* set
= LibVEX_Alloc_inline(n_tmps
* sizeof(Bool
));
3255 for (i
= 0; i
< n_tmps
; i
++)
3258 /* start off by recording IRTemp uses in the next field. */
3259 addUses_Expr(set
, bb
->next
);
3263 /* Work backwards through the stmts */
3264 i_unconditional_exit
= -1;
3265 for (i
= bb
->stmts_used
-1; i
>= 0; i
--) {
3267 if (st
->tag
== Ist_NoOp
)
3269 /* take note of any unconditional exits */
3270 if (st
->tag
== Ist_Exit
3271 && isOneU1(st
->Ist
.Exit
.guard
))
3272 i_unconditional_exit
= i
;
3273 if (st
->tag
== Ist_WrTmp
3274 && set
[(Int
)(st
->Ist
.WrTmp
.tmp
)] == False
) {
3275 /* it's an IRTemp which never got used. Delete it. */
3277 vex_printf("DEAD: ");
3281 bb
->stmts
[i
] = IRStmt_NoOp();
3284 if (st
->tag
== Ist_Dirty
3285 && st
->Ist
.Dirty
.details
->guard
3286 && isZeroU1(st
->Ist
.Dirty
.details
->guard
)) {
3287 /* This is a dirty helper which will never get called.
3289 bb
->stmts
[i
] = IRStmt_NoOp();
3292 /* Note any IRTemp uses made by the current statement. */
3293 addUses_Stmt(set
, st
);
3297 /* Optional second pass: if any unconditional exits were found,
3298 delete them and all following statements. */
3300 if (i_unconditional_exit
!= -1) {
3301 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
3302 i_unconditional_exit
);
3303 vassert(i_unconditional_exit
>= 0
3304 && i_unconditional_exit
< bb
->stmts_used
);
3306 = IRExpr_Const( bb
->stmts
[i_unconditional_exit
]->Ist
.Exit
.dst
);
3308 = bb
->stmts
[i_unconditional_exit
]->Ist
.Exit
.jk
;
3310 = bb
->stmts
[i_unconditional_exit
]->Ist
.Exit
.offsIP
;
3311 for (i
= i_unconditional_exit
; i
< bb
->stmts_used
; i
++)
3312 bb
->stmts
[i
] = IRStmt_NoOp();
3317 /*---------------------------------------------------------------*/
3318 /*--- Specialisation of helper function calls, in ---*/
3319 /*--- collaboration with the front end ---*/
3320 /*---------------------------------------------------------------*/
3323 IRSB
* spec_helpers_BB(
3325 IRExpr
* (*specHelper
) (const HChar
*, IRExpr
**, IRStmt
**, Int
)
3333 for (i
= bb
->stmts_used
-1; i
>= 0; i
--) {
3336 if (st
->tag
!= Ist_WrTmp
3337 || st
->Ist
.WrTmp
.data
->tag
!= Iex_CCall
)
3340 ex
= (*specHelper
)( st
->Ist
.WrTmp
.data
->Iex
.CCall
.cee
->name
,
3341 st
->Ist
.WrTmp
.data
->Iex
.CCall
.args
,
3344 /* the front end can't think of a suitable replacement */
3347 /* We got something better. Install it in the bb. */
3350 = IRStmt_WrTmp(st
->Ist
.WrTmp
.tmp
, ex
);
3353 vex_printf("SPEC: ");
3354 ppIRExpr(st
->Ist
.WrTmp
.data
);
3355 vex_printf(" --> ");
3362 bb
= flatten_BB(bb
);
3367 /*---------------------------------------------------------------*/
3368 /*--- Determination of guest state aliasing relationships ---*/
3369 /*---------------------------------------------------------------*/
3371 /* These are helper functions for CSE and GetI/PutI transformations.
3373 Determine, to the extent possible, the relationship between two
3374 guest state accesses. The possible outcomes are:
3376 * Exact alias. These two accesses denote precisely the same
3377 piece of the guest state.
3379 * Definitely no alias. These two accesses are guaranteed not to
3380 overlap any part of the guest state.
3382 * Unknown -- if neither of the above can be established.
3384 If in doubt, return Unknown. */
3387 enum { ExactAlias
, NoAlias
, UnknownAlias
}
3391 /* Produces the alias relation between an indexed guest
3392 state access and a non-indexed access. */
3395 GSAliasing
getAliasingRelation_IC ( IRRegArray
* descr1
, IRExpr
* ix1
,
3396 Int offset2
, IRType ty2
)
3398 UInt minoff1
, maxoff1
, minoff2
, maxoff2
;
3400 getArrayBounds( descr1
, &minoff1
, &maxoff1
);
3402 maxoff2
= minoff2
+ sizeofIRType(ty2
) - 1;
3404 if (maxoff1
< minoff2
|| maxoff2
< minoff1
)
3407 /* Could probably do better here if required. For the moment
3408 however just claim not to know anything more. */
3409 return UnknownAlias
;
3413 /* Produces the alias relation between two indexed guest state
3417 GSAliasing
getAliasingRelation_II (
3418 IRRegArray
* descr1
, IRExpr
* ix1
, Int bias1
,
3419 IRRegArray
* descr2
, IRExpr
* ix2
, Int bias2
3422 UInt minoff1
, maxoff1
, minoff2
, maxoff2
;
3425 /* First try hard to show they don't alias. */
3426 getArrayBounds( descr1
, &minoff1
, &maxoff1
);
3427 getArrayBounds( descr2
, &minoff2
, &maxoff2
);
3428 if (maxoff1
< minoff2
|| maxoff2
< minoff1
)
3431 /* So the two arrays at least partially overlap. To get any
3432 further we'll have to be sure that the descriptors are
3434 if (!eqIRRegArray(descr1
, descr2
))
3435 return UnknownAlias
;
3437 /* The descriptors are identical. Now the only difference can be
3438 in the index expressions. If they cannot be shown to be
3439 identical, we have to say we don't know what the aliasing
3440 relation will be. Now, since the IR is flattened, the index
3441 expressions should be atoms -- either consts or tmps. So that
3442 makes the comparison simple. */
3443 vassert(isIRAtom(ix1
));
3444 vassert(isIRAtom(ix2
));
3445 if (!eqIRAtom(ix1
,ix2
))
3446 return UnknownAlias
;
3448 /* Ok, the index expressions are identical. So now the only way
3449 they can be different is in the bias. Normalise this
3450 paranoidly, to reliably establish equality/non-equality. */
3452 /* So now we know that the GetI and PutI index the same array
3453 with the same base. Are the offsets the same, modulo the
3454 array size? Do this paranoidly. */
3455 vassert(descr1
->nElems
== descr2
->nElems
);
3456 vassert(descr1
->elemTy
== descr2
->elemTy
);
3457 vassert(descr1
->base
== descr2
->base
);
3459 while (bias1
< 0 || bias2
< 0) {
3460 bias1
+= descr1
->nElems
;
3461 bias2
+= descr1
->nElems
;
3464 vpanic("getAliasingRelation: iters");
3466 vassert(bias1
>= 0 && bias2
>= 0);
3467 bias1
%= descr1
->nElems
;
3468 bias2
%= descr1
->nElems
;
3469 vassert(bias1
>= 0 && bias1
< descr1
->nElems
);
3470 vassert(bias2
>= 0 && bias2
< descr1
->nElems
);
3472 /* Finally, biasP and biasG are normalised into the range
3473 0 .. descrP/G->nElems - 1. And so we can establish
3474 equality/non-equality. */
3476 return bias1
==bias2
? ExactAlias
: NoAlias
;
3480 /*---------------------------------------------------------------*/
3481 /*--- Common Subexpression Elimination ---*/
3482 /*---------------------------------------------------------------*/
3484 /* Expensive in time and space. */
3486 /* Uses two environments:
3487 a IRTemp -> IRTemp mapping
3488 a mapping from AvailExpr* to IRTemp
3493 enum { TCc
, TCt
} tag
;
3494 union { IRTemp tmp
; IRConst
* con
; } u
;
3498 static Bool
eqTmpOrConst ( TmpOrConst
* tc1
, TmpOrConst
* tc2
)
3500 if (tc1
->tag
!= tc2
->tag
)
3504 return eqIRConst(tc1
->u
.con
, tc2
->u
.con
);
3506 return tc1
->u
.tmp
== tc2
->u
.tmp
;
3508 vpanic("eqTmpOrConst");
3512 static Bool
eqIRCallee ( IRCallee
* cee1
, IRCallee
* cee2
)
3514 Bool eq
= cee1
->addr
== cee2
->addr
;
3516 vassert(cee1
->regparms
== cee2
->regparms
);
3517 vassert(cee1
->mcx_mask
== cee2
->mcx_mask
);
3518 /* Names should be the same too, but we don't bother to
3524 /* Convert an atomic IRExpr* to a TmpOrConst. */
3525 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst
* tc
, IRExpr
* e
)
3530 tc
->u
.tmp
= e
->Iex
.RdTmp
.tmp
;
3534 tc
->u
.con
= e
->Iex
.Const
.con
;
3537 /* Getting here is a serious error. It means that the
3538 presented arg isn't an IR atom, as it should be. */
3539 vpanic("irExpr_to_TmpOrConst");
3543 /* Convert a TmpOrConst to an atomic IRExpr*. */
3544 static IRExpr
* tmpOrConst_to_IRExpr ( TmpOrConst
* tc
)
3547 case TCc
: return IRExpr_Const(tc
->u
.con
);
3548 case TCt
: return IRExpr_RdTmp(tc
->u
.tmp
);
3549 default: vpanic("tmpOrConst_to_IRExpr");
3553 /* Convert a NULL terminated IRExpr* vector to an array of
3554 TmpOrConsts, and a length. */
3555 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst
** outs
,
3560 /* We have to make two passes, one to count, one to copy. */
3561 for (n
= 0; ins
[n
]; n
++)
3563 *outs
= LibVEX_Alloc_inline(n
* sizeof(TmpOrConst
));
3565 /* and now copy .. */
3566 for (i
= 0; i
< n
; i
++) {
3567 IRExpr
* arg
= ins
[i
];
3568 TmpOrConst
* dst
= &(*outs
)[i
];
3569 irExpr_to_TmpOrConst(dst
, arg
);
3575 enum { Ut
, Btt
, Btc
, Bct
, Cf64i
, Ittt
, Itct
, Ittc
, Itcc
, GetIt
,
3584 /* binop(tmp,tmp) */
3590 /* binop(tmp,const) */
3596 /* binop(const,tmp) */
3602 /* F64i-style const */
3606 /* ITE(tmp,tmp,tmp) */
3612 /* ITE(tmp,tmp,const) */
3618 /* ITE(tmp,const,tmp) */
3624 /* ITE(tmp,const,const) */
3630 /* GetI(descr,tmp,bias)*/
3636 /* Clean helper call */
3643 /* Load(end,ty,addr) */
3653 static Bool
eq_AvailExpr ( AvailExpr
* a1
, AvailExpr
* a2
)
3655 if (LIKELY(a1
->tag
!= a2
->tag
))
3660 a1
->u
.Ut
.op
== a2
->u
.Ut
.op
3661 && a1
->u
.Ut
.arg
== a2
->u
.Ut
.arg
);
3664 a1
->u
.Btt
.op
== a2
->u
.Btt
.op
3665 && a1
->u
.Btt
.arg1
== a2
->u
.Btt
.arg1
3666 && a1
->u
.Btt
.arg2
== a2
->u
.Btt
.arg2
);
3669 a1
->u
.Btc
.op
== a2
->u
.Btc
.op
3670 && a1
->u
.Btc
.arg1
== a2
->u
.Btc
.arg1
3671 && eqIRConst(&a1
->u
.Btc
.con2
, &a2
->u
.Btc
.con2
));
3674 a1
->u
.Bct
.op
== a2
->u
.Bct
.op
3675 && a1
->u
.Bct
.arg2
== a2
->u
.Bct
.arg2
3676 && eqIRConst(&a1
->u
.Bct
.con1
, &a2
->u
.Bct
.con1
));
3678 return toBool(a1
->u
.Cf64i
.f64i
== a2
->u
.Cf64i
.f64i
);
3680 return toBool(a1
->u
.Ittt
.co
== a2
->u
.Ittt
.co
3681 && a1
->u
.Ittt
.e1
== a2
->u
.Ittt
.e1
3682 && a1
->u
.Ittt
.e0
== a2
->u
.Ittt
.e0
);
3684 return toBool(a1
->u
.Ittc
.co
== a2
->u
.Ittc
.co
3685 && a1
->u
.Ittc
.e1
== a2
->u
.Ittc
.e1
3686 && eqIRConst(&a1
->u
.Ittc
.con0
, &a2
->u
.Ittc
.con0
));
3688 return toBool(a1
->u
.Itct
.co
== a2
->u
.Itct
.co
3689 && eqIRConst(&a1
->u
.Itct
.con1
, &a2
->u
.Itct
.con1
)
3690 && a1
->u
.Itct
.e0
== a2
->u
.Itct
.e0
);
3692 return toBool(a1
->u
.Itcc
.co
== a2
->u
.Itcc
.co
3693 && eqIRConst(&a1
->u
.Itcc
.con1
, &a2
->u
.Itcc
.con1
)
3694 && eqIRConst(&a1
->u
.Itcc
.con0
, &a2
->u
.Itcc
.con0
));
3696 return toBool(eqIRRegArray(a1
->u
.GetIt
.descr
, a2
->u
.GetIt
.descr
)
3697 && a1
->u
.GetIt
.ix
== a2
->u
.GetIt
.ix
3698 && a1
->u
.GetIt
.bias
== a2
->u
.GetIt
.bias
);
3701 Bool eq
= a1
->u
.CCall
.nArgs
== a2
->u
.CCall
.nArgs
3702 && eqIRCallee(a1
->u
.CCall
.cee
, a2
->u
.CCall
.cee
);
3704 n
= a1
->u
.CCall
.nArgs
;
3705 for (i
= 0; i
< n
; i
++) {
3706 if (!eqTmpOrConst( &a1
->u
.CCall
.args
[i
],
3707 &a2
->u
.CCall
.args
[i
] )) {
3713 if (eq
) vassert(a1
->u
.CCall
.retty
== a2
->u
.CCall
.retty
);
3717 Bool eq
= toBool(a1
->u
.Load
.end
== a2
->u
.Load
.end
3718 && a1
->u
.Load
.ty
== a2
->u
.Load
.ty
3719 && eqTmpOrConst(&a1
->u
.Load
.addr
, &a2
->u
.Load
.addr
));
3723 vpanic("eq_AvailExpr");
3727 static IRExpr
* availExpr_to_IRExpr ( AvailExpr
* ae
)
3729 IRConst
*con
, *con0
, *con1
;
3732 return IRExpr_Unop( ae
->u
.Ut
.op
, IRExpr_RdTmp(ae
->u
.Ut
.arg
) );
3734 return IRExpr_Binop( ae
->u
.Btt
.op
,
3735 IRExpr_RdTmp(ae
->u
.Btt
.arg1
),
3736 IRExpr_RdTmp(ae
->u
.Btt
.arg2
) );
3738 con
= LibVEX_Alloc_inline(sizeof(IRConst
));
3739 *con
= ae
->u
.Btc
.con2
;
3740 return IRExpr_Binop( ae
->u
.Btc
.op
,
3741 IRExpr_RdTmp(ae
->u
.Btc
.arg1
),
3742 IRExpr_Const(con
) );
3744 con
= LibVEX_Alloc_inline(sizeof(IRConst
));
3745 *con
= ae
->u
.Bct
.con1
;
3746 return IRExpr_Binop( ae
->u
.Bct
.op
,
3748 IRExpr_RdTmp(ae
->u
.Bct
.arg2
) );
3750 return IRExpr_Const(IRConst_F64i(ae
->u
.Cf64i
.f64i
));
3752 return IRExpr_ITE(IRExpr_RdTmp(ae
->u
.Ittt
.co
),
3753 IRExpr_RdTmp(ae
->u
.Ittt
.e1
),
3754 IRExpr_RdTmp(ae
->u
.Ittt
.e0
));
3756 con0
= LibVEX_Alloc_inline(sizeof(IRConst
));
3757 *con0
= ae
->u
.Ittc
.con0
;
3758 return IRExpr_ITE(IRExpr_RdTmp(ae
->u
.Ittc
.co
),
3759 IRExpr_RdTmp(ae
->u
.Ittc
.e1
),
3760 IRExpr_Const(con0
));
3762 con1
= LibVEX_Alloc_inline(sizeof(IRConst
));
3763 *con1
= ae
->u
.Itct
.con1
;
3764 return IRExpr_ITE(IRExpr_RdTmp(ae
->u
.Itct
.co
),
3766 IRExpr_RdTmp(ae
->u
.Itct
.e0
));
3769 con0
= LibVEX_Alloc_inline(sizeof(IRConst
));
3770 con1
= LibVEX_Alloc_inline(sizeof(IRConst
));
3771 *con0
= ae
->u
.Itcc
.con0
;
3772 *con1
= ae
->u
.Itcc
.con1
;
3773 return IRExpr_ITE(IRExpr_RdTmp(ae
->u
.Itcc
.co
),
3775 IRExpr_Const(con0
));
3777 return IRExpr_GetI(ae
->u
.GetIt
.descr
,
3778 IRExpr_RdTmp(ae
->u
.GetIt
.ix
),
3781 Int i
, n
= ae
->u
.CCall
.nArgs
;
3783 IRExpr
** vec
= LibVEX_Alloc_inline((n
+1) * sizeof(IRExpr
*));
3785 for (i
= 0; i
< n
; i
++) {
3786 vec
[i
] = tmpOrConst_to_IRExpr(&ae
->u
.CCall
.args
[i
]);
3788 return IRExpr_CCall(ae
->u
.CCall
.cee
,
3793 return IRExpr_Load(ae
->u
.Load
.end
, ae
->u
.Load
.ty
,
3794 tmpOrConst_to_IRExpr(&ae
->u
.Load
.addr
));
3796 vpanic("availExpr_to_IRExpr");
3801 static IRTemp
subst_AvailExpr_Temp ( HashHW
* env
, IRTemp tmp
)
3804 /* env :: IRTemp -> IRTemp */
3805 if (lookupHHW( env
, &res
, (HWord
)tmp
))
3812 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst
* tc
,
3815 /* env :: IRTemp -> IRTemp */
3816 if (tc
->tag
== TCt
) {
3817 tc
->u
.tmp
= subst_AvailExpr_Temp( env
, tc
->u
.tmp
);
3821 static void subst_AvailExpr ( HashHW
* env
, AvailExpr
* ae
)
3823 /* env :: IRTemp -> IRTemp */
3826 ae
->u
.Ut
.arg
= subst_AvailExpr_Temp( env
, ae
->u
.Ut
.arg
);
3829 ae
->u
.Btt
.arg1
= subst_AvailExpr_Temp( env
, ae
->u
.Btt
.arg1
);
3830 ae
->u
.Btt
.arg2
= subst_AvailExpr_Temp( env
, ae
->u
.Btt
.arg2
);
3833 ae
->u
.Btc
.arg1
= subst_AvailExpr_Temp( env
, ae
->u
.Btc
.arg1
);
3836 ae
->u
.Bct
.arg2
= subst_AvailExpr_Temp( env
, ae
->u
.Bct
.arg2
);
3841 ae
->u
.Ittt
.co
= subst_AvailExpr_Temp( env
, ae
->u
.Ittt
.co
);
3842 ae
->u
.Ittt
.e1
= subst_AvailExpr_Temp( env
, ae
->u
.Ittt
.e1
);
3843 ae
->u
.Ittt
.e0
= subst_AvailExpr_Temp( env
, ae
->u
.Ittt
.e0
);
3846 ae
->u
.Ittc
.co
= subst_AvailExpr_Temp( env
, ae
->u
.Ittc
.co
);
3847 ae
->u
.Ittc
.e1
= subst_AvailExpr_Temp( env
, ae
->u
.Ittc
.e1
);
3850 ae
->u
.Itct
.co
= subst_AvailExpr_Temp( env
, ae
->u
.Itct
.co
);
3851 ae
->u
.Itct
.e0
= subst_AvailExpr_Temp( env
, ae
->u
.Itct
.e0
);
3854 ae
->u
.Itcc
.co
= subst_AvailExpr_Temp( env
, ae
->u
.Itcc
.co
);
3857 ae
->u
.GetIt
.ix
= subst_AvailExpr_Temp( env
, ae
->u
.GetIt
.ix
);
3860 Int i
, n
= ae
->u
.CCall
.nArgs
;;
3861 for (i
= 0; i
< n
; i
++) {
3862 subst_AvailExpr_TmpOrConst(&ae
->u
.CCall
.args
[i
], env
);
3867 subst_AvailExpr_TmpOrConst(&ae
->u
.Load
.addr
, env
);
3870 vpanic("subst_AvailExpr");
3874 static AvailExpr
* irExpr_to_AvailExpr ( IRExpr
* e
, Bool allowLoadsToBeCSEd
)
3880 if (e
->Iex
.Unop
.arg
->tag
== Iex_RdTmp
) {
3881 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3883 ae
->u
.Ut
.op
= e
->Iex
.Unop
.op
;
3884 ae
->u
.Ut
.arg
= e
->Iex
.Unop
.arg
->Iex
.RdTmp
.tmp
;
3890 if (e
->Iex
.Binop
.arg1
->tag
== Iex_RdTmp
) {
3891 if (e
->Iex
.Binop
.arg2
->tag
== Iex_RdTmp
) {
3892 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3894 ae
->u
.Btt
.op
= e
->Iex
.Binop
.op
;
3895 ae
->u
.Btt
.arg1
= e
->Iex
.Binop
.arg1
->Iex
.RdTmp
.tmp
;
3896 ae
->u
.Btt
.arg2
= e
->Iex
.Binop
.arg2
->Iex
.RdTmp
.tmp
;
3899 if (e
->Iex
.Binop
.arg2
->tag
== Iex_Const
) {
3900 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3902 ae
->u
.Btc
.op
= e
->Iex
.Binop
.op
;
3903 ae
->u
.Btc
.arg1
= e
->Iex
.Binop
.arg1
->Iex
.RdTmp
.tmp
;
3904 ae
->u
.Btc
.con2
= *(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
);
3907 } else if (e
->Iex
.Binop
.arg1
->tag
== Iex_Const
3908 && e
->Iex
.Binop
.arg2
->tag
== Iex_RdTmp
) {
3909 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3911 ae
->u
.Bct
.op
= e
->Iex
.Binop
.op
;
3912 ae
->u
.Bct
.arg2
= e
->Iex
.Binop
.arg2
->Iex
.RdTmp
.tmp
;
3913 ae
->u
.Bct
.con1
= *(e
->Iex
.Binop
.arg1
->Iex
.Const
.con
);
3919 if (e
->Iex
.Const
.con
->tag
== Ico_F64i
) {
3920 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3922 ae
->u
.Cf64i
.f64i
= e
->Iex
.Const
.con
->Ico
.F64i
;
3928 if (e
->Iex
.ITE
.cond
->tag
== Iex_RdTmp
) {
3929 if (e
->Iex
.ITE
.iffalse
->tag
== Iex_RdTmp
) {
3930 if (e
->Iex
.ITE
.iftrue
->tag
== Iex_RdTmp
) {
3931 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3933 ae
->u
.Ittt
.co
= e
->Iex
.ITE
.cond
->Iex
.RdTmp
.tmp
;
3934 ae
->u
.Ittt
.e1
= e
->Iex
.ITE
.iftrue
->Iex
.RdTmp
.tmp
;
3935 ae
->u
.Ittt
.e0
= e
->Iex
.ITE
.iffalse
->Iex
.RdTmp
.tmp
;
3938 if (e
->Iex
.ITE
.iftrue
->tag
== Iex_Const
) {
3939 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3941 ae
->u
.Itct
.co
= e
->Iex
.ITE
.cond
->Iex
.RdTmp
.tmp
;
3942 ae
->u
.Itct
.con1
= *(e
->Iex
.ITE
.iftrue
->Iex
.Const
.con
);
3943 ae
->u
.Itct
.e0
= e
->Iex
.ITE
.iffalse
->Iex
.RdTmp
.tmp
;
3946 } else if (e
->Iex
.ITE
.iffalse
->tag
== Iex_Const
) {
3947 if (e
->Iex
.ITE
.iftrue
->tag
== Iex_RdTmp
) {
3948 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3950 ae
->u
.Ittc
.co
= e
->Iex
.ITE
.cond
->Iex
.RdTmp
.tmp
;
3951 ae
->u
.Ittc
.e1
= e
->Iex
.ITE
.iftrue
->Iex
.RdTmp
.tmp
;
3952 ae
->u
.Ittc
.con0
= *(e
->Iex
.ITE
.iffalse
->Iex
.Const
.con
);
3955 if (e
->Iex
.ITE
.iftrue
->tag
== Iex_Const
) {
3956 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3958 ae
->u
.Itcc
.co
= e
->Iex
.ITE
.cond
->Iex
.RdTmp
.tmp
;
3959 ae
->u
.Itcc
.con1
= *(e
->Iex
.ITE
.iftrue
->Iex
.Const
.con
);
3960 ae
->u
.Itcc
.con0
= *(e
->Iex
.ITE
.iffalse
->Iex
.Const
.con
);
3968 if (e
->Iex
.GetI
.ix
->tag
== Iex_RdTmp
) {
3969 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3971 ae
->u
.GetIt
.descr
= e
->Iex
.GetI
.descr
;
3972 ae
->u
.GetIt
.ix
= e
->Iex
.GetI
.ix
->Iex
.RdTmp
.tmp
;
3973 ae
->u
.GetIt
.bias
= e
->Iex
.GetI
.bias
;
3979 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
3981 /* Ok to share only the cee, since it is immutable. */
3982 ae
->u
.CCall
.cee
= e
->Iex
.CCall
.cee
;
3983 ae
->u
.CCall
.retty
= e
->Iex
.CCall
.retty
;
3984 /* irExprVec_to_TmpOrConsts will assert if the args are
3985 neither tmps nor constants, but that's ok .. that's all they
3987 irExprVec_to_TmpOrConsts(
3988 &ae
->u
.CCall
.args
, &ae
->u
.CCall
.nArgs
,
3994 /* If the caller of do_cse_BB has requested that loads also
3995 be CSEd, convert them into AvailExprs. If not, we'll just
3996 return NULL here, and the load never becomes considered
3997 "available", which effectively disables CSEing of them, as
3999 if (allowLoadsToBeCSEd
) {
4000 ae
= LibVEX_Alloc_inline(sizeof(AvailExpr
));
4002 ae
->u
.Load
.end
= e
->Iex
.Load
.end
;
4003 ae
->u
.Load
.ty
= e
->Iex
.Load
.ty
;
4004 irExpr_to_TmpOrConst(&ae
->u
.Load
.addr
, e
->Iex
.Load
.addr
);
4017 /* The BB is modified in-place. Returns True if any changes were
4018 made. The caller can choose whether or not loads should be CSEd.
4019 In the normal course of things we don't do that, since CSEing loads
4020 is something of a dodgy proposition if the guest program is doing
4021 some screwy stuff to do with races and spinloops. */
4023 static Bool
do_cse_BB ( IRSB
* bb
, Bool allowLoadsToBeCSEd
)
4031 Bool anyDone
= False
;
4033 HashHW
* tenv
= newHHW(); /* :: IRTemp -> IRTemp */
4034 HashHW
* aenv
= newHHW(); /* :: AvailExpr* -> IRTemp */
4036 vassert(sizeof(IRTemp
) <= sizeof(HWord
));
4038 if (0) { ppIRSB(bb
); vex_printf("\n\n"); }
4040 /* Iterate forwards over the stmts.
4041 On seeing "t = E", where E is one of the AvailExpr forms:
4042 let E' = apply tenv substitution to E
4044 if a mapping E' -> q is found,
4045 replace this stmt by "t = q"
4046 and add binding t -> q to tenv
4048 add binding E' -> t to aenv
4049 replace this stmt by "t = E'"
4051 Other statements are only interesting to the extent that they
4052 might invalidate some of the expressions in aenv. So there is
4053 an invalidate-bindings check for each statement seen.
4055 for (i
= 0; i
< bb
->stmts_used
; i
++) {
4058 /* ------ BEGIN invalidate aenv bindings ------ */
4059 /* This is critical: remove from aenv any E' -> .. bindings
4060 which might be invalidated by this statement. The only
4061 vulnerable kind of bindings are the GetI and Load kinds.
4062 Dirty call - dump (paranoia level -> 2)
4063 Store - dump (ditto)
4064 Put, PutI - dump unless no-overlap is proven (.. -> 1)
4065 Uses getAliasingRelation_IC and getAliasingRelation_II
4066 to do the no-overlap assessments needed for Put/PutI.
4069 case Ist_Dirty
: case Ist_Store
: case Ist_MBE
:
4070 case Ist_CAS
: case Ist_LLSC
:
4072 paranoia
= 2; break;
4073 case Ist_Put
: case Ist_PutI
:
4074 paranoia
= 1; break;
4075 case Ist_NoOp
: case Ist_IMark
: case Ist_AbiHint
:
4076 case Ist_WrTmp
: case Ist_Exit
: case Ist_LoadG
:
4077 paranoia
= 0; break;
4079 vpanic("do_cse_BB(1)");
4083 for (j
= 0; j
< aenv
->used
; j
++) {
4084 if (!aenv
->inuse
[j
])
4086 ae
= (AvailExpr
*)aenv
->key
[j
];
4087 if (ae
->tag
!= GetIt
&& ae
->tag
!= Load
)
4090 if (paranoia
>= 2) {
4093 vassert(paranoia
== 1);
4094 if (ae
->tag
== Load
) {
4095 /* Loads can be invalidated by anything that could
4096 possibly touch memory. But in that case we
4097 should have |paranoia| == 2 and we won't get
4098 here. So there's nothing to do; we don't have to
4099 invalidate the load. */
4102 if (st
->tag
== Ist_Put
) {
4103 if (getAliasingRelation_IC(
4105 IRExpr_RdTmp(ae
->u
.GetIt
.ix
),
4107 typeOfIRExpr(bb
->tyenv
,st
->Ist
.Put
.data
)
4112 if (st
->tag
== Ist_PutI
) {
4113 IRPutI
*puti
= st
->Ist
.PutI
.details
;
4114 if (getAliasingRelation_II(
4116 IRExpr_RdTmp(ae
->u
.GetIt
.ix
),
4125 vpanic("do_cse_BB(2)");
4129 aenv
->inuse
[j
] = False
;
4130 aenv
->key
[j
] = (HWord
)NULL
; /* be sure */
4133 } /* paranoia > 0 */
4135 /* ------ ENV invalidate aenv bindings ------ */
4137 /* ignore not-interestings */
4138 if (st
->tag
!= Ist_WrTmp
)
4141 t
= st
->Ist
.WrTmp
.tmp
;
4142 eprime
= irExpr_to_AvailExpr(st
->Ist
.WrTmp
.data
, allowLoadsToBeCSEd
);
4143 /* ignore if not of AvailExpr form */
4147 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
4150 subst_AvailExpr( tenv
, eprime
);
4152 /* search aenv for eprime, unfortunately the hard way */
4153 for (j
= 0; j
< aenv
->used
; j
++)
4154 if (aenv
->inuse
[j
] && eq_AvailExpr(eprime
, (AvailExpr
*)aenv
->key
[j
]))
4157 if (j
< aenv
->used
) {
4158 /* A binding E' -> q was found. Replace stmt by "t = q" and
4159 note the t->q binding in tenv. */
4160 /* (this is the core of the CSE action) */
4161 q
= (IRTemp
)aenv
->val
[j
];
4162 bb
->stmts
[i
] = IRStmt_WrTmp( t
, IRExpr_RdTmp(q
) );
4163 addToHHW( tenv
, (HWord
)t
, (HWord
)q
);
4166 /* No binding was found, so instead we add E' -> t to our
4167 collection of available expressions, replace this stmt
4168 with "t = E'", and move on. */
4169 bb
->stmts
[i
] = IRStmt_WrTmp( t
, availExpr_to_IRExpr(eprime
) );
4170 addToHHW( aenv
, (HWord
)eprime
, (HWord
)t
);
4176 sanityCheckIRSB(bb, Ity_I32);
4183 /*---------------------------------------------------------------*/
4184 /*--- Add32/Sub32 chain collapsing ---*/
4185 /*---------------------------------------------------------------*/
4187 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
4189 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
4190 yes, set *tmp and *i32 appropriately. *i32 is set as if the
4191 root node is Add32, not Sub32. */
4193 static Bool
isAdd32OrSub32 ( IRExpr
* e
, IRTemp
* tmp
, Int
* i32
)
4195 if (e
->tag
!= Iex_Binop
)
4197 if (e
->Iex
.Binop
.op
!= Iop_Add32
&& e
->Iex
.Binop
.op
!= Iop_Sub32
)
4199 if (e
->Iex
.Binop
.arg1
->tag
!= Iex_RdTmp
)
4201 if (e
->Iex
.Binop
.arg2
->tag
!= Iex_Const
)
4203 *tmp
= e
->Iex
.Binop
.arg1
->Iex
.RdTmp
.tmp
;
4204 *i32
= (Int
)(e
->Iex
.Binop
.arg2
->Iex
.Const
.con
->Ico
.U32
);
4205 if (e
->Iex
.Binop
.op
== Iop_Sub32
)
4211 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
4212 other tmp2. Scan backwards from the specified start point -- an
4215 static Bool
collapseChain ( IRSB
* bb
, Int startHere
,
4217 IRTemp
* tmp2
, Int
* i32
)
4224 /* the (var, con) pair contain the current 'representation' for
4225 'tmp'. We start with 'tmp + 0'. */
4229 /* Scan backwards to see if tmp can be replaced by some other tmp
4231 for (j
= startHere
; j
>= 0; j
--) {
4233 if (st
->tag
!= Ist_WrTmp
)
4235 if (st
->Ist
.WrTmp
.tmp
!= var
)
4237 e
= st
->Ist
.WrTmp
.data
;
4238 if (!isAdd32OrSub32(e
, &vv
, &ii
))
4244 /* no earlier binding for var .. ill-formed IR */
4245 vpanic("collapseChain");
4247 /* so, did we find anything interesting? */
4249 return False
; /* no .. */
4257 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
4259 static void collapse_AddSub_chains_BB ( IRSB
* bb
)
4265 for (i
= bb
->stmts_used
-1; i
>= 0; i
--) {
4267 if (st
->tag
== Ist_NoOp
)
4270 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
4272 if (st
->tag
== Ist_WrTmp
4273 && isAdd32OrSub32(st
->Ist
.WrTmp
.data
, &var
, &con
)) {
4275 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
4276 Find out if var can be expressed as var2 + con2. */
4277 if (collapseChain(bb
, i
-1, var
, &var2
, &con2
)) {
4279 vex_printf("replacing1 ");
4281 vex_printf(" with ");
4288 ? IRExpr_Binop(Iop_Add32
,
4290 IRExpr_Const(IRConst_U32(con2
)))
4291 : IRExpr_Binop(Iop_Sub32
,
4293 IRExpr_Const(IRConst_U32(-con2
)))
4296 ppIRStmt(bb
->stmts
[i
]);
4304 /* Try to collapse 't1 = GetI[t2, con]'. */
4306 if (st
->tag
== Ist_WrTmp
4307 && st
->Ist
.WrTmp
.data
->tag
== Iex_GetI
4308 && st
->Ist
.WrTmp
.data
->Iex
.GetI
.ix
->tag
== Iex_RdTmp
4309 && collapseChain(bb
, i
-1, st
->Ist
.WrTmp
.data
->Iex
.GetI
.ix
4310 ->Iex
.RdTmp
.tmp
, &var2
, &con2
)) {
4312 vex_printf("replacing3 ");
4314 vex_printf(" with ");
4316 con2
+= st
->Ist
.WrTmp
.data
->Iex
.GetI
.bias
;
4320 IRExpr_GetI(st
->Ist
.WrTmp
.data
->Iex
.GetI
.descr
,
4324 ppIRStmt(bb
->stmts
[i
]);
4330 /* Perhaps st is PutI[t, con] ? */
4331 IRPutI
*puti
= st
->Ist
.PutI
.details
;
4332 if (st
->tag
== Ist_PutI
4333 && puti
->ix
->tag
== Iex_RdTmp
4334 && collapseChain(bb
, i
-1, puti
->ix
->Iex
.RdTmp
.tmp
,
4337 vex_printf("replacing2 ");
4339 vex_printf(" with ");
4343 = IRStmt_PutI(mkIRPutI(puti
->descr
,
4348 ppIRStmt(bb
->stmts
[i
]);
4358 /*---------------------------------------------------------------*/
4359 /*--- PutI/GetI transformations ---*/
4360 /*---------------------------------------------------------------*/
4362 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
4363 the given starting point to find, if any, a PutI which writes
4364 exactly the same piece of guest state, and so return the expression
4365 that the PutI writes. This is the core of PutI-GetI forwarding. */
4368 IRExpr
* findPutI ( IRSB
* bb
, Int startHere
,
4369 IRRegArray
* descrG
, IRExpr
* ixG
, Int biasG
)
4373 GSAliasing relation
;
4376 vex_printf("\nfindPutI ");
4377 ppIRRegArray(descrG
);
4380 vex_printf(" %d\n", biasG
);
4383 /* Scan backwards in bb from startHere to find a suitable PutI
4384 binding for (descrG, ixG, biasG), if any. */
4386 for (j
= startHere
; j
>= 0; j
--) {
4388 if (st
->tag
== Ist_NoOp
)
4391 if (st
->tag
== Ist_Put
) {
4392 /* Non-indexed Put. This can't give a binding, but we do
4393 need to check it doesn't invalidate the search by
4394 overlapping any part of the indexed guest state. */
4397 = getAliasingRelation_IC(
4400 typeOfIRExpr(bb
->tyenv
,st
->Ist
.Put
.data
) );
4402 if (relation
== NoAlias
) {
4403 /* we're OK; keep going */
4406 /* relation == UnknownAlias || relation == ExactAlias */
4407 /* If this assertion fails, we've found a Put which writes
4408 an area of guest state which is read by a GetI. Which
4409 is unlikely (although not per se wrong). */
4410 vassert(relation
!= ExactAlias
);
4411 /* This Put potentially writes guest state that the GetI
4412 reads; we must fail. */
4417 if (st
->tag
== Ist_PutI
) {
4418 IRPutI
*puti
= st
->Ist
.PutI
.details
;
4420 relation
= getAliasingRelation_II(
4427 if (relation
== NoAlias
) {
4428 /* This PutI definitely doesn't overlap. Ignore it and
4430 continue; /* the for j loop */
4433 if (relation
== UnknownAlias
) {
4434 /* We don't know if this PutI writes to the same guest
4435 state that the GetI, or not. So we have to give up. */
4439 /* Otherwise, we've found what we're looking for. */
4440 vassert(relation
== ExactAlias
);
4443 } /* if (st->tag == Ist_PutI) */
4445 if (st
->tag
== Ist_Dirty
) {
4446 /* Be conservative. If the dirty call has any guest effects at
4447 all, give up. We could do better -- only give up if there
4448 are any guest writes/modifies. */
4449 if (st
->Ist
.Dirty
.details
->nFxState
> 0)
4455 /* No valid replacement was found. */
4461 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
4462 that it writes exactly the same piece of guest state) ? Safe
4465 static Bool
identicalPutIs ( IRStmt
* pi
, IRStmt
* s2
)
4467 vassert(pi
->tag
== Ist_PutI
);
4468 if (s2
->tag
!= Ist_PutI
)
4471 IRPutI
*p1
= pi
->Ist
.PutI
.details
;
4472 IRPutI
*p2
= s2
->Ist
.PutI
.details
;
4475 getAliasingRelation_II(
4476 p1
->descr
, p1
->ix
, p1
->bias
,
4477 p2
->descr
, p2
->ix
, p2
->bias
4484 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
4485 overlap it? Safe answer: True. Note, we could do a lot better
4486 than this if needed. */
4489 Bool
guestAccessWhichMightOverlapPutI (
4490 IRTypeEnv
* tyenv
, IRStmt
* pi
, IRStmt
* s2
4493 GSAliasing relation
;
4494 UInt minoffP
, maxoffP
;
4496 vassert(pi
->tag
== Ist_PutI
);
4498 IRPutI
*p1
= pi
->Ist
.PutI
.details
;
4500 getArrayBounds(p1
->descr
, &minoffP
, &maxoffP
);
4509 /* just be paranoid ... these should be rare. */
4513 /* This is unbelievably lame, but it's probably not
4514 significant from a performance point of view. Really, a
4515 CAS is a load-store op, so it should be safe to say False.
4520 /* If the dirty call has any guest effects at all, give up.
4521 Probably could do better. */
4522 if (s2
->Ist
.Dirty
.details
->nFxState
> 0)
4527 vassert(isIRAtom(s2
->Ist
.Put
.data
));
4529 = getAliasingRelation_IC(
4532 typeOfIRExpr(tyenv
,s2
->Ist
.Put
.data
)
4537 IRPutI
*p2
= s2
->Ist
.PutI
.details
;
4539 vassert(isIRAtom(p2
->ix
));
4540 vassert(isIRAtom(p2
->data
));
4542 = getAliasingRelation_II(
4543 p1
->descr
, p1
->ix
, p1
->bias
,
4544 p2
->descr
, p2
->ix
, p2
->bias
4550 if (s2
->Ist
.WrTmp
.data
->tag
== Iex_GetI
) {
4552 = getAliasingRelation_II(
4553 p1
->descr
, p1
->ix
, p1
->bias
,
4554 s2
->Ist
.WrTmp
.data
->Iex
.GetI
.descr
,
4555 s2
->Ist
.WrTmp
.data
->Iex
.GetI
.ix
,
4556 s2
->Ist
.WrTmp
.data
->Iex
.GetI
.bias
4560 if (s2
->Ist
.WrTmp
.data
->tag
== Iex_Get
) {
4562 = getAliasingRelation_IC(
4564 s2
->Ist
.WrTmp
.data
->Iex
.Get
.offset
,
4565 s2
->Ist
.WrTmp
.data
->Iex
.Get
.ty
4572 vassert(isIRAtom(s2
->Ist
.Store
.addr
));
4573 vassert(isIRAtom(s2
->Ist
.Store
.data
));
4577 vex_printf("\n"); ppIRStmt(s2
); vex_printf("\n");
4578 vpanic("guestAccessWhichMightOverlapPutI");
4582 if (relation
== NoAlias
)
4585 return True
; /* ExactAlias or UnknownAlias */
4590 /* ---------- PutI/GetI transformations main functions --------- */
4592 /* Remove redundant GetIs, to the extent that they can be detected.
4593 bb is modified in-place. */
4596 void do_redundant_GetI_elimination ( IRSB
* bb
)
4601 for (i
= bb
->stmts_used
-1; i
>= 0; i
--) {
4603 if (st
->tag
== Ist_NoOp
)
4606 if (st
->tag
== Ist_WrTmp
4607 && st
->Ist
.WrTmp
.data
->tag
== Iex_GetI
4608 && st
->Ist
.WrTmp
.data
->Iex
.GetI
.ix
->tag
== Iex_RdTmp
) {
4609 IRRegArray
* descr
= st
->Ist
.WrTmp
.data
->Iex
.GetI
.descr
;
4610 IRExpr
* ix
= st
->Ist
.WrTmp
.data
->Iex
.GetI
.ix
;
4611 Int bias
= st
->Ist
.WrTmp
.data
->Iex
.GetI
.bias
;
4612 IRExpr
* replacement
= findPutI(bb
, i
-1, descr
, ix
, bias
);
4614 && isIRAtom(replacement
)
4615 /* Make sure we're doing a type-safe transformation! */
4616 && typeOfIRExpr(bb
->tyenv
, replacement
) == descr
->elemTy
) {
4618 vex_printf("rGI: ");
4619 ppIRExpr(st
->Ist
.WrTmp
.data
);
4621 ppIRExpr(replacement
);
4624 bb
->stmts
[i
] = IRStmt_WrTmp(st
->Ist
.WrTmp
.tmp
, replacement
);
4632 /* Remove redundant PutIs, to the extent which they can be detected.
4633 bb is modified in-place. */
4636 void do_redundant_PutI_elimination ( IRSB
* bb
, VexRegisterUpdates pxControl
)
4642 vassert(pxControl
< VexRegUpdAllregsAtEachInsn
);
4644 for (i
= 0; i
< bb
->stmts_used
; i
++) {
4646 if (st
->tag
!= Ist_PutI
)
4648 /* Ok, search forwards from here to see if we can find another
4649 PutI which makes this one redundant, and dodging various
4650 hazards. Search forwards:
4651 * If conditional exit, give up (because anything after that
4652 does not postdominate this put).
4653 * If a Get which might overlap, give up (because this PutI
4654 not necessarily dead).
4655 * If a Put which is identical, stop with success.
4656 * If a Put which might overlap, but is not identical, give up.
4657 * If a dirty helper call which might write guest state, give up.
4658 * If a Put which definitely doesn't overlap, or any other
4659 kind of stmt, continue.
4662 for (j
= i
+1; j
< bb
->stmts_used
; j
++) {
4664 if (stj
->tag
== Ist_NoOp
)
4666 if (identicalPutIs(st
, stj
)) {
4671 if (stj
->tag
== Ist_Exit
)
4674 if (st
->tag
== Ist_Dirty
)
4675 /* give up; could do better here */
4677 if (guestAccessWhichMightOverlapPutI(bb
->tyenv
, st
, stj
))
4684 vex_printf("rPI: ");
4688 bb
->stmts
[i
] = IRStmt_NoOp();
4695 /*---------------------------------------------------------------*/
4696 /*--- Loop unrolling ---*/
4697 /*---------------------------------------------------------------*/
4699 /* Adjust all tmp values (names) in e by delta. e is destructively
4702 static void deltaIRExpr ( IRExpr
* e
, Int delta
)
4707 e
->Iex
.RdTmp
.tmp
+= delta
;
4713 deltaIRExpr(e
->Iex
.GetI
.ix
, delta
);
4716 deltaIRExpr(e
->Iex
.Qop
.details
->arg1
, delta
);
4717 deltaIRExpr(e
->Iex
.Qop
.details
->arg2
, delta
);
4718 deltaIRExpr(e
->Iex
.Qop
.details
->arg3
, delta
);
4719 deltaIRExpr(e
->Iex
.Qop
.details
->arg4
, delta
);
4722 deltaIRExpr(e
->Iex
.Triop
.details
->arg1
, delta
);
4723 deltaIRExpr(e
->Iex
.Triop
.details
->arg2
, delta
);
4724 deltaIRExpr(e
->Iex
.Triop
.details
->arg3
, delta
);
4727 deltaIRExpr(e
->Iex
.Binop
.arg1
, delta
);
4728 deltaIRExpr(e
->Iex
.Binop
.arg2
, delta
);
4731 deltaIRExpr(e
->Iex
.Unop
.arg
, delta
);
4734 deltaIRExpr(e
->Iex
.Load
.addr
, delta
);
4737 for (i
= 0; e
->Iex
.CCall
.args
[i
]; i
++)
4738 deltaIRExpr(e
->Iex
.CCall
.args
[i
], delta
);
4741 deltaIRExpr(e
->Iex
.ITE
.cond
, delta
);
4742 deltaIRExpr(e
->Iex
.ITE
.iftrue
, delta
);
4743 deltaIRExpr(e
->Iex
.ITE
.iffalse
, delta
);
4746 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
4747 vpanic("deltaIRExpr");
4751 /* Adjust all tmp values (names) in st by delta. st is destructively
4754 /*static*/ void deltaIRStmt ( IRStmt
* st
, Int delta
)
4764 deltaIRExpr(st
->Ist
.AbiHint
.base
, delta
);
4765 deltaIRExpr(st
->Ist
.AbiHint
.nia
, delta
);
4768 deltaIRExpr(st
->Ist
.Put
.data
, delta
);
4771 deltaIRExpr(st
->Ist
.PutI
.details
->ix
, delta
);
4772 deltaIRExpr(st
->Ist
.PutI
.details
->data
, delta
);
4775 st
->Ist
.WrTmp
.tmp
+= delta
;
4776 deltaIRExpr(st
->Ist
.WrTmp
.data
, delta
);
4779 deltaIRExpr(st
->Ist
.Exit
.guard
, delta
);
4782 deltaIRExpr(st
->Ist
.Store
.addr
, delta
);
4783 deltaIRExpr(st
->Ist
.Store
.data
, delta
);
4786 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
4787 deltaIRExpr(sg
->addr
, delta
);
4788 deltaIRExpr(sg
->data
, delta
);
4789 deltaIRExpr(sg
->guard
, delta
);
4793 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
4795 deltaIRExpr(lg
->addr
, delta
);
4796 deltaIRExpr(lg
->alt
, delta
);
4797 deltaIRExpr(lg
->guard
, delta
);
4801 if (st
->Ist
.CAS
.details
->oldHi
!= IRTemp_INVALID
)
4802 st
->Ist
.CAS
.details
->oldHi
+= delta
;
4803 st
->Ist
.CAS
.details
->oldLo
+= delta
;
4804 deltaIRExpr(st
->Ist
.CAS
.details
->addr
, delta
);
4805 if (st
->Ist
.CAS
.details
->expdHi
)
4806 deltaIRExpr(st
->Ist
.CAS
.details
->expdHi
, delta
);
4807 deltaIRExpr(st
->Ist
.CAS
.details
->expdLo
, delta
);
4808 if (st
->Ist
.CAS
.details
->dataHi
)
4809 deltaIRExpr(st
->Ist
.CAS
.details
->dataHi
, delta
);
4810 deltaIRExpr(st
->Ist
.CAS
.details
->dataLo
, delta
);
4813 st
->Ist
.LLSC
.result
+= delta
;
4814 deltaIRExpr(st
->Ist
.LLSC
.addr
, delta
);
4815 if (st
->Ist
.LLSC
.storedata
)
4816 deltaIRExpr(st
->Ist
.LLSC
.storedata
, delta
);
4819 d
= st
->Ist
.Dirty
.details
;
4820 deltaIRExpr(d
->guard
, delta
);
4821 for (i
= 0; d
->args
[i
]; i
++) {
4822 IRExpr
* arg
= d
->args
[i
];
4823 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
4824 deltaIRExpr(arg
, delta
);
4826 if (d
->tmp
!= IRTemp_INVALID
)
4829 deltaIRExpr(d
->mAddr
, delta
);
4832 vex_printf("\n"); ppIRStmt(st
); vex_printf("\n");
4833 vpanic("deltaIRStmt");
4838 /* If possible, return a loop-unrolled version of bb0. The original
4839 is changed. If not possible, return NULL. */
4841 /* The two schemas considered are:
4845 which unrolls to (eg) X: BODY;BODY; goto X
4849 X: BODY; if (c) goto X; goto Y
4850 which trivially transforms to
4851 X: BODY; if (!c) goto Y; goto X;
4852 so it falls in the scope of the first case.
4854 X and Y must be literal (guest) addresses.
4857 static Int
calc_unroll_factor( IRSB
* bb
)
4862 for (i
= 0; i
< bb
->stmts_used
; i
++) {
4863 if (bb
->stmts
[i
]->tag
!= Ist_NoOp
)
4867 if (n_stmts
<= vex_control
.iropt_unroll_thresh
/8) {
4868 if (vex_control
.iropt_verbosity
> 0)
4869 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4870 n_stmts
, 8* n_stmts
);
4873 if (n_stmts
<= vex_control
.iropt_unroll_thresh
/4) {
4874 if (vex_control
.iropt_verbosity
> 0)
4875 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4876 n_stmts
, 4* n_stmts
);
4880 if (n_stmts
<= vex_control
.iropt_unroll_thresh
/2) {
4881 if (vex_control
.iropt_verbosity
> 0)
4882 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4883 n_stmts
, 2* n_stmts
);
4887 if (vex_control
.iropt_verbosity
> 0)
4888 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts
);
4894 static IRSB
* maybe_loop_unroll_BB ( IRSB
* bb0
, Addr my_addr
)
4896 Int i
, j
, jmax
, n_vars
;
4898 Addr64 xxx_value
, yyy_value
;
4905 if (vex_control
.iropt_unroll_thresh
<= 0)
4908 /* First off, figure out if we can unroll this loop. Do this
4909 without modifying bb0. */
4911 if (bb0
->jumpkind
!= Ijk_Boring
)
4917 /* Extract the next-guest address. If it isn't a literal, we
4921 if (udst
->tag
== Iex_Const
4922 && (udst
->Iex
.Const
.con
->tag
== Ico_U32
4923 || udst
->Iex
.Const
.con
->tag
== Ico_U64
)) {
4924 /* The BB ends in a jump to a literal location. */
4926 xxx_value
= udst
->Iex
.Const
.con
->tag
== Ico_U64
4927 ? udst
->Iex
.Const
.con
->Ico
.U64
4928 : (Addr64
)(udst
->Iex
.Const
.con
->Ico
.U32
);
4934 /* Now we know the BB ends to a jump to a literal location. If
4935 it's a jump to itself (viz, idiom #1), move directly to the
4936 unrolling stage, first cloning the bb so the original isn't
4938 if (xxx_value
== my_addr
) {
4939 unroll_factor
= calc_unroll_factor( bb0
);
4940 if (unroll_factor
< 2)
4942 bb1
= deepCopyIRSB( bb0
);
4944 udst
= NULL
; /* is now invalid */
4948 /* Search for the second idiomatic form:
4949 X: BODY; if (c) goto X; goto Y
4950 We know Y, but need to establish that the last stmt
4953 yyy_value
= xxx_value
;
4954 for (i
= bb0
->stmts_used
-1; i
>= 0; i
--)
4959 return NULL
; /* block with no stmts. Strange. */
4962 if (st
->tag
!= Ist_Exit
)
4964 if (st
->Ist
.Exit
.jk
!= Ijk_Boring
)
4967 con
= st
->Ist
.Exit
.dst
;
4968 vassert(con
->tag
== Ico_U32
|| con
->tag
== Ico_U64
);
4970 xxx_value
= con
->tag
== Ico_U64
4971 ? st
->Ist
.Exit
.dst
->Ico
.U64
4972 : (Addr64
)(st
->Ist
.Exit
.dst
->Ico
.U32
);
4974 /* If this assertion fails, we have some kind of type error. */
4975 vassert(con
->tag
== udst
->Iex
.Const
.con
->tag
);
4977 if (xxx_value
!= my_addr
)
4978 /* We didn't find either idiom. Give up. */
4981 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
4982 yyy values (which makes it look like idiom #1), and go into
4983 unrolling proper. This means finding (again) the last stmt, in
4986 unroll_factor
= calc_unroll_factor( bb0
);
4987 if (unroll_factor
< 2)
4990 bb1
= deepCopyIRSB( bb0
);
4992 udst
= NULL
; /* is now invalid */
4993 for (i
= bb1
->stmts_used
-1; i
>= 0; i
--)
4997 /* The next bunch of assertions should be true since we already
4998 found and checked the last stmt in the original bb. */
5003 vassert(st
->tag
== Ist_Exit
);
5005 con
= st
->Ist
.Exit
.dst
;
5006 vassert(con
->tag
== Ico_U32
|| con
->tag
== Ico_U64
);
5009 vassert(udst
->tag
== Iex_Const
);
5010 vassert(udst
->Iex
.Const
.con
->tag
== Ico_U32
5011 || udst
->Iex
.Const
.con
->tag
== Ico_U64
);
5012 vassert(con
->tag
== udst
->Iex
.Const
.con
->tag
);
5014 /* switch the xxx and yyy fields around */
5015 if (con
->tag
== Ico_U64
) {
5016 udst
->Iex
.Const
.con
->Ico
.U64
= xxx_value
;
5017 con
->Ico
.U64
= yyy_value
;
5019 udst
->Iex
.Const
.con
->Ico
.U32
= (UInt
)xxx_value
;
5020 con
->Ico
.U32
= (UInt
)yyy_value
;
5023 /* negate the test condition */
5025 = IRExpr_Unop(Iop_Not1
,deepCopyIRExpr(st
->Ist
.Exit
.guard
));
5027 /* --- The unroller proper. Both idioms are by now --- */
5028 /* --- now converted to idiom 1. --- */
5032 vassert(unroll_factor
== 2
5033 || unroll_factor
== 4
5034 || unroll_factor
== 8);
5036 jmax
= unroll_factor
==8 ? 3 : (unroll_factor
==4 ? 2 : 1);
5037 for (j
= 1; j
<= jmax
; j
++) {
5039 n_vars
= bb1
->tyenv
->types_used
;
5041 bb2
= deepCopyIRSB(bb1
);
5042 for (i
= 0; i
< n_vars
; i
++)
5043 (void)newIRTemp(bb1
->tyenv
, bb2
->tyenv
->types
[i
]);
5045 for (i
= 0; i
< bb2
->stmts_used
; i
++) {
5046 /* deltaIRStmt destructively modifies the stmt, but
5047 that's OK since bb2 is a complete fresh copy of bb1. */
5048 deltaIRStmt(bb2
->stmts
[i
], n_vars
);
5049 addStmtToIRSB(bb1
, bb2
->stmts
[i
]);
5054 vex_printf("\nUNROLLED (%lx)\n", my_addr
);
5059 /* Flattening; sigh. The unroller succeeds in breaking flatness
5060 by negating the test condition. This should be fixed properly.
5061 For the moment use this shotgun approach. */
5062 return flatten_BB(bb1
);
5066 /*---------------------------------------------------------------*/
5067 /*--- The tree builder ---*/
5068 /*---------------------------------------------------------------*/
5070 /* This isn't part of IR optimisation. Really it's a pass done prior
5071 to instruction selection, which improves the code that the
5072 instruction selector can produce. */
5074 /* --- The 'tmp' environment is the central data structure here --- */
5076 /* The number of outstanding bindings we're prepared to track.
5077 The number of times the env becomes full and we have to dump
5078 the oldest binding (hence reducing code quality) falls very
5079 rapidly as the env size increases. 8 gives reasonable performance
5080 under most circumstances. */
5083 /* An interval. Used to record the bytes in the guest state accessed
5084 by a Put[I] statement or by (one or more) Get[I] expression(s). In
5085 case of several Get[I] expressions, the lower/upper bounds are recorded.
5086 This is conservative but cheap.
5087 E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
5088 E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
5089 offset 200 would be recorded as [100,203] */
5099 intervals_overlap(Interval i1
, Interval i2
)
5101 return (i1
.low
>= i2
.low
&& i1
.low
<= i2
.high
) ||
5102 (i2
.low
>= i1
.low
&& i2
.low
<= i1
.high
);
5106 update_interval(Interval
*i
, Int low
, Int high
)
5108 vassert(low
<= high
);
5111 if (low
< i
->low
) i
->low
= low
;
5112 if (high
> i
->high
) i
->high
= high
;
5121 /* bindee == NULL === slot is not in use
5122 bindee != NULL === slot is in use
5129 /* Record the bytes of the guest state BINDEE reads from. */
5130 Interval getInterval
;
5134 __attribute__((unused
))
5135 static void ppAEnv ( ATmpInfo
* env
)
5138 for (i
= 0; i
< A_NENV
; i
++) {
5139 vex_printf("%d tmp %d val ", i
, (Int
)env
[i
].binder
);
5141 ppIRExpr(env
[i
].bindee
);
5143 vex_printf("(null)");
5148 /* --- Tree-traversal fns --- */
5150 /* Traverse an expr, and detect if any part of it reads memory or does
5151 a Get. Be careful ... this really controls how much the
5152 tree-builder can reorder the code, so getting it right is critical.
5154 static void setHints_Expr (Bool
* doesLoad
, Interval
* getInterval
, IRExpr
* e
)
5159 for (i
= 0; e
->Iex
.CCall
.args
[i
]; i
++)
5160 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.CCall
.args
[i
]);
5163 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.ITE
.cond
);
5164 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.ITE
.iftrue
);
5165 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.ITE
.iffalse
);
5168 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Qop
.details
->arg1
);
5169 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Qop
.details
->arg2
);
5170 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Qop
.details
->arg3
);
5171 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Qop
.details
->arg4
);
5174 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Triop
.details
->arg1
);
5175 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Triop
.details
->arg2
);
5176 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Triop
.details
->arg3
);
5179 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Binop
.arg1
);
5180 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Binop
.arg2
);
5183 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Unop
.arg
);
5187 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.Load
.addr
);
5190 Int low
= e
->Iex
.Get
.offset
;
5191 Int high
= low
+ sizeofIRType(e
->Iex
.Get
.ty
) - 1;
5192 update_interval(getInterval
, low
, high
);
5196 IRRegArray
*descr
= e
->Iex
.GetI
.descr
;
5197 Int size
= sizeofIRType(descr
->elemTy
);
5198 Int low
= descr
->base
;
5199 Int high
= low
+ descr
->nElems
* size
- 1;
5200 update_interval(getInterval
, low
, high
);
5201 setHints_Expr(doesLoad
, getInterval
, e
->Iex
.GetI
.ix
);
5208 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
5209 vpanic("setHints_Expr");
5214 /* Add a binding to the front of the env and slide all the rest
5215 backwards. It should be the case that the last slot is free. */
5216 static void addToEnvFront ( ATmpInfo
* env
, IRTemp binder
, IRExpr
* bindee
)
5219 vassert(env
[A_NENV
-1].bindee
== NULL
);
5220 for (i
= A_NENV
-1; i
>= 1; i
--)
5222 env
[0].binder
= binder
;
5223 env
[0].bindee
= bindee
;
5224 env
[0].doesLoad
= False
; /* filled in later */
5225 env
[0].getInterval
.present
= False
; /* filled in later */
5226 env
[0].getInterval
.low
= -1; /* filled in later */
5227 env
[0].getInterval
.high
= -1; /* filled in later */
5230 /* Given uses :: array of UShort, indexed by IRTemp
5231 Add the use-occurrences of temps in this expression
5234 static void aoccCount_Expr ( UShort
* uses
, IRExpr
* e
)
5240 case Iex_RdTmp
: /* the only interesting case */
5241 uses
[e
->Iex
.RdTmp
.tmp
]++;
5245 aoccCount_Expr(uses
, e
->Iex
.ITE
.cond
);
5246 aoccCount_Expr(uses
, e
->Iex
.ITE
.iftrue
);
5247 aoccCount_Expr(uses
, e
->Iex
.ITE
.iffalse
);
5251 aoccCount_Expr(uses
, e
->Iex
.Qop
.details
->arg1
);
5252 aoccCount_Expr(uses
, e
->Iex
.Qop
.details
->arg2
);
5253 aoccCount_Expr(uses
, e
->Iex
.Qop
.details
->arg3
);
5254 aoccCount_Expr(uses
, e
->Iex
.Qop
.details
->arg4
);
5258 aoccCount_Expr(uses
, e
->Iex
.Triop
.details
->arg1
);
5259 aoccCount_Expr(uses
, e
->Iex
.Triop
.details
->arg2
);
5260 aoccCount_Expr(uses
, e
->Iex
.Triop
.details
->arg3
);
5264 aoccCount_Expr(uses
, e
->Iex
.Binop
.arg1
);
5265 aoccCount_Expr(uses
, e
->Iex
.Binop
.arg2
);
5269 aoccCount_Expr(uses
, e
->Iex
.Unop
.arg
);
5273 aoccCount_Expr(uses
, e
->Iex
.Load
.addr
);
5277 for (i
= 0; e
->Iex
.CCall
.args
[i
]; i
++)
5278 aoccCount_Expr(uses
, e
->Iex
.CCall
.args
[i
]);
5282 aoccCount_Expr(uses
, e
->Iex
.GetI
.ix
);
5290 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
5291 vpanic("aoccCount_Expr");
5296 /* Given uses :: array of UShort, indexed by IRTemp
5297 Add the use-occurrences of temps in this statement
5300 static void aoccCount_Stmt ( UShort
* uses
, IRStmt
* st
)
5307 aoccCount_Expr(uses
, st
->Ist
.AbiHint
.base
);
5308 aoccCount_Expr(uses
, st
->Ist
.AbiHint
.nia
);
5311 aoccCount_Expr(uses
, st
->Ist
.WrTmp
.data
);
5314 aoccCount_Expr(uses
, st
->Ist
.Put
.data
);
5317 aoccCount_Expr(uses
, st
->Ist
.PutI
.details
->ix
);
5318 aoccCount_Expr(uses
, st
->Ist
.PutI
.details
->data
);
5321 aoccCount_Expr(uses
, st
->Ist
.Store
.addr
);
5322 aoccCount_Expr(uses
, st
->Ist
.Store
.data
);
5325 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
5326 aoccCount_Expr(uses
, sg
->addr
);
5327 aoccCount_Expr(uses
, sg
->data
);
5328 aoccCount_Expr(uses
, sg
->guard
);
5332 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
5333 aoccCount_Expr(uses
, lg
->addr
);
5334 aoccCount_Expr(uses
, lg
->alt
);
5335 aoccCount_Expr(uses
, lg
->guard
);
5339 cas
= st
->Ist
.CAS
.details
;
5340 aoccCount_Expr(uses
, cas
->addr
);
5342 aoccCount_Expr(uses
, cas
->expdHi
);
5343 aoccCount_Expr(uses
, cas
->expdLo
);
5345 aoccCount_Expr(uses
, cas
->dataHi
);
5346 aoccCount_Expr(uses
, cas
->dataLo
);
5349 aoccCount_Expr(uses
, st
->Ist
.LLSC
.addr
);
5350 if (st
->Ist
.LLSC
.storedata
)
5351 aoccCount_Expr(uses
, st
->Ist
.LLSC
.storedata
);
5354 d
= st
->Ist
.Dirty
.details
;
5355 if (d
->mFx
!= Ifx_None
)
5356 aoccCount_Expr(uses
, d
->mAddr
);
5357 aoccCount_Expr(uses
, d
->guard
);
5358 for (i
= 0; d
->args
[i
]; i
++) {
5359 IRExpr
* arg
= d
->args
[i
];
5360 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
5361 aoccCount_Expr(uses
, arg
);
5369 aoccCount_Expr(uses
, st
->Ist
.Exit
.guard
);
5372 vex_printf("\n"); ppIRStmt(st
); vex_printf("\n");
5373 vpanic("aoccCount_Stmt");
5377 /* Look up a binding for tmp in the env. If found, return the bound
5378 expression, and set the env's binding to NULL so it is marked as
5379 used. If not found, return NULL. */
5381 static IRExpr
* atbSubst_Temp ( ATmpInfo
* env
, IRTemp tmp
)
5384 for (i
= 0; i
< A_NENV
; i
++) {
5385 if (env
[i
].binder
== tmp
&& env
[i
].bindee
!= NULL
) {
5386 IRExpr
* bindee
= env
[i
].bindee
;
5387 env
[i
].bindee
= NULL
;
5394 /* Traverse e, looking for temps. For each observed temp, see if env
5395 contains a binding for the temp, and if so return the bound value.
5396 The env has the property that any binding it holds is
5397 'single-shot', so once a binding is used, it is marked as no longer
5398 available, by setting its .bindee field to NULL. */
5400 static inline Bool
is_Unop ( IRExpr
* e
, IROp op
) {
5401 return e
->tag
== Iex_Unop
&& e
->Iex
.Unop
.op
== op
;
5403 static inline Bool
is_Binop ( IRExpr
* e
, IROp op
) {
5404 return e
->tag
== Iex_Binop
&& e
->Iex
.Binop
.op
== op
;
5407 static IRExpr
* fold_IRExpr_Binop ( IROp op
, IRExpr
* a1
, IRExpr
* a2
)
5411 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */
5412 if (is_Unop(a1
, Iop_CmpwNEZ32
) && is_Unop(a2
, Iop_CmpwNEZ32
))
5413 return IRExpr_Unop( Iop_CmpwNEZ32
,
5414 IRExpr_Binop( Iop_Or32
, a1
->Iex
.Unop
.arg
,
5415 a2
->Iex
.Unop
.arg
) );
5419 /* Since X has type Ity_I1 we can simplify:
5420 CmpNE32(1Uto32(X),0)) ==> X */
5421 if (is_Unop(a1
, Iop_1Uto32
) && isZeroU32(a2
))
5422 return a1
->Iex
.Unop
.arg
;
5428 /* no reduction rule applies */
5429 return IRExpr_Binop( op
, a1
, a2
);
5432 static IRExpr
* fold_IRExpr_Unop ( IROp op
, IRExpr
* aa
)
5436 /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
5437 if (is_Unop(aa
, Iop_CmpwNEZ64
))
5438 return IRExpr_Unop( Iop_CmpwNEZ64
, aa
->Iex
.Unop
.arg
);
5439 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5440 if (is_Binop(aa
, Iop_Or64
)
5441 && is_Unop(aa
->Iex
.Binop
.arg1
, Iop_CmpwNEZ64
))
5442 return fold_IRExpr_Unop(
5444 IRExpr_Binop(Iop_Or64
,
5445 aa
->Iex
.Binop
.arg1
->Iex
.Unop
.arg
,
5446 aa
->Iex
.Binop
.arg2
));
5447 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5448 if (is_Binop(aa
, Iop_Or64
)
5449 && is_Unop(aa
->Iex
.Binop
.arg2
, Iop_CmpwNEZ64
))
5450 return fold_IRExpr_Unop(
5452 IRExpr_Binop(Iop_Or64
,
5454 aa
->Iex
.Binop
.arg2
->Iex
.Unop
.arg
));
5457 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
5458 if (is_Unop(aa
, Iop_Left64
))
5459 return IRExpr_Unop(Iop_CmpNEZ64
, aa
->Iex
.Unop
.arg
);
5460 /* CmpNEZ64( 1Uto64(X) ) --> X */
5461 if (is_Unop(aa
, Iop_1Uto64
))
5462 return aa
->Iex
.Unop
.arg
;
5465 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
5466 if (is_Unop(aa
, Iop_CmpwNEZ32
))
5467 return IRExpr_Unop( Iop_CmpwNEZ32
, aa
->Iex
.Unop
.arg
);
5470 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
5471 if (is_Unop(aa
, Iop_Left32
))
5472 return IRExpr_Unop(Iop_CmpNEZ32
, aa
->Iex
.Unop
.arg
);
5473 /* CmpNEZ32( 1Uto32(X) ) --> X */
5474 if (is_Unop(aa
, Iop_1Uto32
))
5475 return aa
->Iex
.Unop
.arg
;
5476 /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
5477 if (is_Unop(aa
, Iop_64to32
) && is_Unop(aa
->Iex
.Unop
.arg
, Iop_CmpwNEZ64
))
5478 return IRExpr_Unop(Iop_CmpNEZ64
, aa
->Iex
.Unop
.arg
->Iex
.Unop
.arg
);
5481 /* CmpNEZ8( 1Uto8(X) ) --> X */
5482 if (is_Unop(aa
, Iop_1Uto8
))
5483 return aa
->Iex
.Unop
.arg
;
5486 /* Left32( Left32(x) ) --> Left32(x) */
5487 if (is_Unop(aa
, Iop_Left32
))
5488 return IRExpr_Unop( Iop_Left32
, aa
->Iex
.Unop
.arg
);
5491 /* Left64( Left64(x) ) --> Left64(x) */
5492 if (is_Unop(aa
, Iop_Left64
))
5493 return IRExpr_Unop( Iop_Left64
, aa
->Iex
.Unop
.arg
);
5495 case Iop_ZeroHI64ofV128
:
5496 /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
5497 if (is_Unop(aa
, Iop_ZeroHI64ofV128
))
5498 return IRExpr_Unop( Iop_ZeroHI64ofV128
, aa
->Iex
.Unop
.arg
);
5501 /* 32to1( 1Uto32 ( x ) ) --> x */
5502 if (is_Unop(aa
, Iop_1Uto32
))
5503 return aa
->Iex
.Unop
.arg
;
5504 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
5505 if (is_Unop(aa
, Iop_CmpwNEZ32
))
5506 return IRExpr_Unop( Iop_CmpNEZ32
, aa
->Iex
.Unop
.arg
);
5509 /* 64to1( 1Uto64 ( x ) ) --> x */
5510 if (is_Unop(aa
, Iop_1Uto64
))
5511 return aa
->Iex
.Unop
.arg
;
5512 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
5513 if (is_Unop(aa
, Iop_CmpwNEZ64
))
5514 return IRExpr_Unop( Iop_CmpNEZ64
, aa
->Iex
.Unop
.arg
);
5517 /* 64to32( 32Uto64 ( x )) --> x */
5518 if (is_Unop(aa
, Iop_32Uto64
))
5519 return aa
->Iex
.Unop
.arg
;
5520 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
5521 if (is_Unop(aa
, Iop_8Uto64
))
5522 return IRExpr_Unop(Iop_8Uto32
, aa
->Iex
.Unop
.arg
);
5525 /* 64to16( 16Uto64 ( x )) --> x */
5526 if (is_Unop(aa
, Iop_16Uto64
))
5527 return aa
->Iex
.Unop
.arg
;
5528 /* 64to16( 32Uto64 ( x )) --> 32to16(x) */
5529 if (is_Unop(aa
, Iop_32Uto64
))
5530 return IRExpr_Unop(Iop_32to16
, aa
->Iex
.Unop
.arg
);
5534 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
5535 if (is_Unop(aa
, Iop_8Uto32
))
5536 return IRExpr_Unop(Iop_8Uto64
, aa
->Iex
.Unop
.arg
);
5537 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
5538 if (is_Unop(aa
, Iop_16Uto32
))
5539 return IRExpr_Unop(Iop_16Uto64
, aa
->Iex
.Unop
.arg
);
5540 /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
5541 --> Shr64( 32Uto64(64to32(x)), sh )) */
5542 if (is_Unop(aa
, Iop_64to32
)
5543 && is_Binop(aa
->Iex
.Unop
.arg
, Iop_Shr64
)
5544 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg1
, Iop_32Uto64
)
5545 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg1
->Iex
.Unop
.arg
,
5547 return aa
->Iex
.Unop
.arg
;
5549 /* 32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
5550 --> 32Uto64(64to32( Shl64( x, sh )) */
5551 if (is_Unop(aa
, Iop_64to32
)
5552 && is_Binop(aa
->Iex
.Unop
.arg
, Iop_Shl64
)
5553 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg1
, Iop_32Uto64
)
5554 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg1
->Iex
.Unop
.arg
,
5563 aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg1
->Iex
.Unop
.arg
->Iex
.Unop
.arg
,
5564 aa
->Iex
.Unop
.arg
->Iex
.Binop
.arg2
5570 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
5571 if (is_Unop(aa
, Iop_CmpNEZ8
)
5572 && is_Unop(aa
->Iex
.Unop
.arg
, Iop_32to8
)
5573 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Unop
.arg
, Iop_1Uto32
)
5574 && is_Unop(aa
->Iex
.Unop
.arg
->Iex
.Unop
.arg
->Iex
.Unop
.arg
,
5576 return IRExpr_Unop( Iop_CmpwNEZ32
,
5577 aa
->Iex
.Unop
.arg
->Iex
.Unop
.arg
5578 ->Iex
.Unop
.arg
->Iex
.Unop
.arg
);
5585 /* no reduction rule applies */
5586 return IRExpr_Unop( op
, aa
);
5589 static IRExpr
* atbSubst_Expr ( ATmpInfo
* env
, IRExpr
* e
)
5598 args2
= shallowCopyIRExprVec(e
->Iex
.CCall
.args
);
5599 for (i
= 0; args2
[i
]; i
++)
5600 args2
[i
] = atbSubst_Expr(env
,args2
[i
]);
5601 return IRExpr_CCall(
5607 e2
= atbSubst_Temp(env
, e
->Iex
.RdTmp
.tmp
);
5611 atbSubst_Expr(env
, e
->Iex
.ITE
.cond
),
5612 atbSubst_Expr(env
, e
->Iex
.ITE
.iftrue
),
5613 atbSubst_Expr(env
, e
->Iex
.ITE
.iffalse
)
5617 e
->Iex
.Qop
.details
->op
,
5618 atbSubst_Expr(env
, e
->Iex
.Qop
.details
->arg1
),
5619 atbSubst_Expr(env
, e
->Iex
.Qop
.details
->arg2
),
5620 atbSubst_Expr(env
, e
->Iex
.Qop
.details
->arg3
),
5621 atbSubst_Expr(env
, e
->Iex
.Qop
.details
->arg4
)
5624 return IRExpr_Triop(
5625 e
->Iex
.Triop
.details
->op
,
5626 atbSubst_Expr(env
, e
->Iex
.Triop
.details
->arg1
),
5627 atbSubst_Expr(env
, e
->Iex
.Triop
.details
->arg2
),
5628 atbSubst_Expr(env
, e
->Iex
.Triop
.details
->arg3
)
5631 return fold_IRExpr_Binop(
5633 atbSubst_Expr(env
, e
->Iex
.Binop
.arg1
),
5634 atbSubst_Expr(env
, e
->Iex
.Binop
.arg2
)
5637 return fold_IRExpr_Unop(
5639 atbSubst_Expr(env
, e
->Iex
.Unop
.arg
)
5645 atbSubst_Expr(env
, e
->Iex
.Load
.addr
)
5650 atbSubst_Expr(env
, e
->Iex
.GetI
.ix
),
5657 vex_printf("\n"); ppIRExpr(e
); vex_printf("\n");
5658 vpanic("atbSubst_Expr");
5662 /* Same deal as atbSubst_Expr, except for stmts. */
5664 static IRStmt
* atbSubst_Stmt ( ATmpInfo
* env
, IRStmt
* st
)
5669 IRPutI
*puti
, *puti2
;
5673 return IRStmt_AbiHint(
5674 atbSubst_Expr(env
, st
->Ist
.AbiHint
.base
),
5675 st
->Ist
.AbiHint
.len
,
5676 atbSubst_Expr(env
, st
->Ist
.AbiHint
.nia
)
5679 return IRStmt_Store(
5681 atbSubst_Expr(env
, st
->Ist
.Store
.addr
),
5682 atbSubst_Expr(env
, st
->Ist
.Store
.data
)
5685 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
5686 return IRStmt_StoreG(sg
->end
,
5687 atbSubst_Expr(env
, sg
->addr
),
5688 atbSubst_Expr(env
, sg
->data
),
5689 atbSubst_Expr(env
, sg
->guard
));
5692 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
5693 return IRStmt_LoadG(lg
->end
, lg
->cvt
, lg
->dst
,
5694 atbSubst_Expr(env
, lg
->addr
),
5695 atbSubst_Expr(env
, lg
->alt
),
5696 atbSubst_Expr(env
, lg
->guard
));
5699 return IRStmt_WrTmp(
5701 atbSubst_Expr(env
, st
->Ist
.WrTmp
.data
)
5706 atbSubst_Expr(env
, st
->Ist
.Put
.data
)
5709 puti
= st
->Ist
.PutI
.details
;
5710 puti2
= mkIRPutI(puti
->descr
,
5711 atbSubst_Expr(env
, puti
->ix
),
5713 atbSubst_Expr(env
, puti
->data
));
5714 return IRStmt_PutI(puti2
);
5718 atbSubst_Expr(env
, st
->Ist
.Exit
.guard
),
5724 return IRStmt_IMark(st
->Ist
.IMark
.addr
,
5726 st
->Ist
.IMark
.delta
);
5728 return IRStmt_NoOp();
5730 return IRStmt_MBE(st
->Ist
.MBE
.event
);
5732 cas
= st
->Ist
.CAS
.details
;
5734 cas
->oldHi
, cas
->oldLo
, cas
->end
,
5735 atbSubst_Expr(env
, cas
->addr
),
5736 cas
->expdHi
? atbSubst_Expr(env
, cas
->expdHi
) : NULL
,
5737 atbSubst_Expr(env
, cas
->expdLo
),
5738 cas
->dataHi
? atbSubst_Expr(env
, cas
->dataHi
) : NULL
,
5739 atbSubst_Expr(env
, cas
->dataLo
)
5741 return IRStmt_CAS(cas2
);
5745 st
->Ist
.LLSC
.result
,
5746 atbSubst_Expr(env
, st
->Ist
.LLSC
.addr
),
5747 st
->Ist
.LLSC
.storedata
5748 ? atbSubst_Expr(env
, st
->Ist
.LLSC
.storedata
) : NULL
5751 d
= st
->Ist
.Dirty
.details
;
5752 d2
= emptyIRDirty();
5754 if (d2
->mFx
!= Ifx_None
)
5755 d2
->mAddr
= atbSubst_Expr(env
, d2
->mAddr
);
5756 d2
->guard
= atbSubst_Expr(env
, d2
->guard
);
5757 for (i
= 0; d2
->args
[i
]; i
++) {
5758 IRExpr
* arg
= d2
->args
[i
];
5759 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
5760 d2
->args
[i
] = atbSubst_Expr(env
, arg
);
5762 return IRStmt_Dirty(d2
);
5764 vex_printf("\n"); ppIRStmt(st
); vex_printf("\n");
5765 vpanic("atbSubst_Stmt");
5770 static Bool
dirty_helper_stores ( const IRDirty
*d
)
5772 return d
->mFx
== Ifx_Write
|| d
->mFx
== Ifx_Modify
;
5776 static Interval
dirty_helper_puts (
5778 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
5779 VexRegisterUpdates pxControl
,
5780 /*OUT*/Bool
*requiresPreciseMemExns
5786 /* Passing the guest state pointer opens the door to modifying the
5787 guest state under the covers. It's not allowed, but let's be
5788 extra conservative and assume the worst. */
5789 for (i
= 0; d
->args
[i
]; i
++) {
5790 if (UNLIKELY(d
->args
[i
]->tag
== Iex_GSPTR
)) {
5791 *requiresPreciseMemExns
= True
;
5792 /* Assume all guest state is written. */
5793 interval
.present
= True
;
5795 interval
.high
= 0x7FFFFFFF;
5800 /* Check the side effects on the guest state */
5801 interval
.present
= False
;
5802 interval
.low
= interval
.high
= -1;
5803 *requiresPreciseMemExns
= False
;
5805 for (i
= 0; i
< d
->nFxState
; ++i
) {
5806 if (d
->fxState
[i
].fx
!= Ifx_Read
) {
5807 Int offset
= d
->fxState
[i
].offset
;
5808 Int size
= d
->fxState
[i
].size
;
5809 Int nRepeats
= d
->fxState
[i
].nRepeats
;
5810 Int repeatLen
= d
->fxState
[i
].repeatLen
;
5812 if (preciseMemExnsFn(offset
,
5813 offset
+ nRepeats
* repeatLen
+ size
- 1,
5815 *requiresPreciseMemExns
= True
;
5817 update_interval(&interval
, offset
,
5818 offset
+ nRepeats
* repeatLen
+ size
- 1);
5825 /* Return an interval if st modifies the guest state. Via
5826 requiresPreciseMemExns return whether or not that modification
5827 requires precise exceptions. */
5828 static Interval
stmt_modifies_guest_state (
5829 IRSB
*bb
, const IRStmt
*st
,
5830 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
5831 VexRegisterUpdates pxControl
,
5832 /*OUT*/Bool
*requiresPreciseMemExns
5839 Int offset
= st
->Ist
.Put
.offset
;
5840 Int size
= sizeofIRType(typeOfIRExpr(bb
->tyenv
, st
->Ist
.Put
.data
));
5842 *requiresPreciseMemExns
5843 = preciseMemExnsFn(offset
, offset
+ size
- 1, pxControl
);
5844 interval
.present
= True
;
5845 interval
.low
= offset
;
5846 interval
.high
= offset
+ size
- 1;
5851 IRRegArray
*descr
= st
->Ist
.PutI
.details
->descr
;
5852 Int offset
= descr
->base
;
5853 Int size
= sizeofIRType(descr
->elemTy
);
5855 /* We quietly assume here that all segments are contiguous and there
5856 are no holes. This is to avoid a loop. The assumption is conservative
5857 in the sense that we might report that precise memory exceptions are
5858 needed when in fact they are not. */
5859 *requiresPreciseMemExns
5860 = preciseMemExnsFn(offset
, offset
+ descr
->nElems
* size
- 1,
5862 interval
.present
= True
;
5863 interval
.low
= offset
;
5864 interval
.high
= offset
+ descr
->nElems
* size
- 1;
5869 return dirty_helper_puts(st
->Ist
.Dirty
.details
,
5870 preciseMemExnsFn
, pxControl
,
5871 requiresPreciseMemExns
);
5874 *requiresPreciseMemExns
= False
;
5875 interval
.present
= False
;
5882 /* notstatic */ Addr
ado_treebuild_BB (
5884 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
5885 VexRegisterUpdates pxControl
5889 Bool stmtStores
, invalidateMe
;
5890 Interval putInterval
;
5893 ATmpInfo env
[A_NENV
];
5895 Bool max_ga_known
= False
;
5898 Int n_tmps
= bb
->tyenv
->types_used
;
5899 UShort
* uses
= LibVEX_Alloc_inline(n_tmps
* sizeof(UShort
));
5901 /* Phase 1. Scan forwards in bb, counting use occurrences of each
5902 temp. Also count occurrences in the bb->next field. Take the
5903 opportunity to also find the maximum guest address in the block,
5904 since that will be needed later for deciding when we can safely
5905 elide event checks. */
5907 for (i
= 0; i
< n_tmps
; i
++)
5910 for (i
= 0; i
< bb
->stmts_used
; i
++) {
5916 UInt len
= st
->Ist
.IMark
.len
;
5917 Addr mga
= st
->Ist
.IMark
.addr
+ (len
< 1 ? 1 : len
) - 1;
5918 max_ga_known
= True
;
5926 aoccCount_Stmt( uses
, st
);
5928 aoccCount_Expr(uses
, bb
->next
);
5931 for (i
= 0; i
< n_tmps
; i
++) {
5934 ppIRTemp( (IRTemp
)i
);
5935 vex_printf(" used %d\n", (Int
)uses
[i
] );
5939 /* Phase 2. Scan forwards in bb. For each statement in turn:
5941 If the env is full, emit the end element. This guarantees
5942 there is at least one free slot in the following.
5944 On seeing 't = E', occ(t)==1,
5947 add t -> E' to the front of the env
5948 Examine E' and set the hints for E' appropriately
5949 (doesLoad? doesGet?)
5951 On seeing any other stmt,
5952 let stmt' = env(stmt)
5953 remove from env any 't=E' binds invalidated by stmt
5954 emit the invalidated stmts
5956 compact any holes in env
5957 by sliding entries towards the front
5959 Finally, apply env to bb->next.
5962 for (i
= 0; i
< A_NENV
; i
++) {
5963 env
[i
].bindee
= NULL
;
5964 env
[i
].binder
= IRTemp_INVALID
;
5967 /* The stmts in bb are being reordered, and we are guaranteed to
5968 end up with no more than the number we started with. Use i to
5969 be the cursor of the current stmt examined and j <= i to be that
5970 for the current stmt being written.
5973 for (i
= 0; i
< bb
->stmts_used
; i
++) {
5976 if (st
->tag
== Ist_NoOp
)
5979 /* Ensure there's at least one space in the env, by emitting
5980 the oldest binding if necessary. */
5981 if (env
[A_NENV
-1].bindee
!= NULL
) {
5982 bb
->stmts
[j
] = IRStmt_WrTmp( env
[A_NENV
-1].binder
,
5983 env
[A_NENV
-1].bindee
);
5986 env
[A_NENV
-1].bindee
= NULL
;
5989 /* Consider current stmt. */
5990 if (st
->tag
== Ist_WrTmp
&& uses
[st
->Ist
.WrTmp
.tmp
] <= 1) {
5993 /* optional extra: dump dead bindings as we find them.
5994 Removes the need for a prior dead-code removal pass. */
5995 if (uses
[st
->Ist
.WrTmp
.tmp
] == 0) {
5996 if (0) vex_printf("DEAD binding\n");
5997 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5999 vassert(uses
[st
->Ist
.WrTmp
.tmp
] == 1);
6001 /* ok, we have 't = E', occ(t)==1. Do the abovementioned
6003 e
= st
->Ist
.WrTmp
.data
;
6004 e2
= atbSubst_Expr(env
, e
);
6005 addToEnvFront(env
, st
->Ist
.WrTmp
.tmp
, e2
);
6006 setHints_Expr(&env
[0].doesLoad
, &env
[0].getInterval
, e2
);
6007 /* don't advance j, as we are deleting this stmt and instead
6008 holding it temporarily in the env. */
6009 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
6012 /* we get here for any other kind of statement. */
6013 /* 'use up' any bindings required by the current statement. */
6014 st2
= atbSubst_Stmt(env
, st
);
6016 /* Now, before this stmt, dump any bindings in env that it
6017 invalidates. These need to be dumped in the order in which
6018 they originally entered env -- that means from oldest to
6021 /* putInterval/stmtStores characterise what the stmt under
6022 consideration does, or might do (sidely safe @ True). */
6024 Bool putRequiresPreciseMemExns
;
6025 putInterval
= stmt_modifies_guest_state(
6026 bb
, st
, preciseMemExnsFn
, pxControl
,
6027 &putRequiresPreciseMemExns
6030 /* be True if this stmt writes memory or might do (==> we don't
6031 want to reorder other loads or stores relative to it). Also,
6032 both LL and SC fall under this classification, since we
6033 really ought to be conservative and not reorder any other
6034 memory transactions relative to them. */
6036 = toBool( st
->tag
== Ist_Store
6037 || (st
->tag
== Ist_Dirty
6038 && dirty_helper_stores(st
->Ist
.Dirty
.details
))
6039 || st
->tag
== Ist_LLSC
6040 || st
->tag
== Ist_CAS
);
6042 for (k
= A_NENV
-1; k
>= 0; k
--) {
6043 if (env
[k
].bindee
== NULL
)
6045 /* Compare the actions of this stmt with the actions of
6046 binding 'k', to see if they invalidate the binding. */
6049 /* a store invalidates loaded data */
6050 (env
[k
].doesLoad
&& stmtStores
)
6051 /* a put invalidates get'd data, if they overlap */
6052 || ((env
[k
].getInterval
.present
&& putInterval
.present
) &&
6053 intervals_overlap(env
[k
].getInterval
, putInterval
))
6054 /* a put invalidates loaded data. That means, in essense, that
6055 a load expression cannot be substituted into a statement
6056 that follows the put. But there is nothing wrong doing so
6057 except when the put statement requries precise exceptions.
6058 Think of a load that is moved past a put where the put
6059 updates the IP in the guest state. If the load generates
6060 a segfault, the wrong address (line number) would be
6062 || (env
[k
].doesLoad
&& putInterval
.present
&&
6063 putRequiresPreciseMemExns
)
6064 /* probably overly conservative: a memory bus event
6065 invalidates absolutely everything, so that all
6066 computation prior to it is forced to complete before
6067 proceeding with the event (fence,lock,unlock). */
6068 || st
->tag
== Ist_MBE
6069 /* also be (probably overly) paranoid re AbiHints */
6070 || st
->tag
== Ist_AbiHint
6073 bb
->stmts
[j
] = IRStmt_WrTmp( env
[k
].binder
, env
[k
].bindee
);
6076 env
[k
].bindee
= NULL
;
6080 /* Slide in-use entries in env up to the front */
6082 for (k
= 0; k
< A_NENV
; k
++) {
6083 if (env
[k
].bindee
!= NULL
) {
6088 for (m
= m
; m
< A_NENV
; m
++) {
6089 env
[m
].bindee
= NULL
;
6092 /* finally, emit the substituted statement */
6094 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
6098 } /* for each stmt in the original bb ... */
6100 /* Finally ... substitute the ->next field as much as possible, and
6101 dump any left-over bindings. Hmm. Perhaps there should be no
6102 left over bindings? Or any left-over bindings are
6103 by definition dead? */
6104 bb
->next
= atbSubst_Expr(env
, bb
->next
);
6107 return max_ga_known
? max_ga
: ~(Addr
)0;
6111 /*---------------------------------------------------------------*/
6112 /*--- MSVC specific transformation hacks ---*/
6113 /*---------------------------------------------------------------*/
6115 /* The purpose of all this is to find MSVC's idiom for non-constant
6116 bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
6117 gcc's idiom "(a & ~c) | (b & c)". Motivation is that Memcheck has
6118 generates a lot of false positives from the MSVC version because it
6119 doesn't understand that XORing an undefined bit with itself gives a
6122 This isn't a problem for the simple case "x ^ x", because iropt
6123 folds it to a constant zero before Memcheck ever sees it. But in
6124 this case we have an intervening "& c" which defeats the simple
6125 case. So we have to carefully inspect all expressions rooted at an
6126 XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
6127 7 other variants resulting from swapping the order of arguments to
6128 the three binary operations. If we get a match, we then replace
6129 the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
6131 The key difficulty is to spot the two uses of "a". To normalise
6132 the IR to maximise the chances of success, we first do a CSE pass,
6133 with CSEing of loads enabled, since the two "a" expressions may be
6134 loads, which need to be commoned up. Then we do a constant folding
6135 pass, so as to remove any tmp-to-tmp assignment chains that would
6136 make matching the original expression more difficult.
6140 /* Helper function for debug printing */
6141 __attribute__((unused
))
6142 static void print_flat_expr ( IRExpr
** env
, IRExpr
* e
)
6150 ppIROp(e
->Iex
.Binop
.op
);
6152 print_flat_expr(env
, e
->Iex
.Binop
.arg1
);
6154 print_flat_expr(env
, e
->Iex
.Binop
.arg2
);
6159 ppIROp(e
->Iex
.Unop
.op
);
6161 print_flat_expr(env
, e
->Iex
.Unop
.arg
);
6166 ppIRTemp(e
->Iex
.RdTmp
.tmp
);
6168 print_flat_expr(env
, chase1(env
, e
));
6178 vex_printf("FAIL: "); ppIRExpr(e
); vex_printf("\n");
6183 /* Spot a ^ ((a ^ b) & c) for a,b and c tmp-or-const (atoms)
6184 or any of the other 7 variants generated by switching the order
6185 of arguments to the outer ^, the inner ^ and the &.
6187 static UInt
spotBitfieldAssignment ( /*OUT*/IRExpr
** aa
, /*OUT*/IRExpr
** bb
,
6189 IRExpr
** env
, IRExpr
* e
,
6190 IROp opAND
, IROp opXOR
)
6192 # define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
6193 && (_e)->Iex.Binop.op == (_op))
6194 # define ISATOM(_e) isIRAtom(_e)
6195 # define STEP(_e) chase1(env, (_e))
6196 # define LL(_e) ((_e)->Iex.Binop.arg1)
6197 # define RR(_e) ((_e)->Iex.Binop.arg2)
6199 IRExpr
*a1
, *and, *xor, *c
, *a2bL
, *a2bR
;
6201 /* This is common to all 8 cases */
6202 if (!ISBIN(e
, opXOR
)) goto fail
;
6204 /* -----and------ */
6206 /* find variant 1: a1 ^ ((a2 ^ b) & c) */
6207 /* find variant 2: a1 ^ ((b ^ a2) & c) */
6208 a1
= and = xor = c
= a2bL
= a2bR
= NULL
;
6212 if (!ISBIN(and, opAND
)) goto v34
;
6213 xor = STEP(LL(and));
6215 if (!ISBIN(xor, opXOR
)) goto v34
;
6219 if (eqIRAtom(a1
, a2bL
) && !eqIRAtom(a1
, a2bR
)) {
6225 if (eqIRAtom(a1
, a2bR
) && !eqIRAtom(a1
, a2bL
)) {
6233 /* -----and------ */
6235 /* find variant 3: ((a2 ^ b) & c) ^ a1 */
6236 /* find variant 4: ((b ^ a2) & c) ^ a1 */
6237 a1
= and = xor = c
= a2bL
= a2bR
= NULL
;
6241 if (!ISBIN(and, opAND
)) goto v56
;
6242 xor = STEP(LL(and));
6244 if (!ISBIN(xor, opXOR
)) goto v56
;
6248 if (eqIRAtom(a1
, a2bL
) && !eqIRAtom(a1
, a2bR
)) {
6254 if (eqIRAtom(a1
, a2bR
) && !eqIRAtom(a1
, a2bL
)) {
6262 /* -----and------ */
6264 /* find variant 5: a1 ^ (c & (a2 ^ b)) */
6265 /* find variant 6: a1 ^ (c & (b ^ a2)) */
6266 a1
= and = xor = c
= a2bL
= a2bR
= NULL
;
6270 if (!ISBIN(and, opAND
)) goto v78
;
6271 xor = STEP(RR(and));
6273 if (!ISBIN(xor, opXOR
)) goto v78
;
6277 if (eqIRAtom(a1
, a2bL
) && !eqIRAtom(a1
, a2bR
)) {
6281 vassert(5-5); // ATC
6284 if (eqIRAtom(a1
, a2bR
) && !eqIRAtom(a1
, a2bL
)) {
6288 vassert(6-6); // ATC
6293 /* -----and------ */
6295 /* find variant 7: (c & (a2 ^ b)) ^ a1 */
6296 /* find variant 8: (c & (b ^ a2)) ^ a1 */
6297 a1
= and = xor = c
= a2bL
= a2bR
= NULL
;
6301 if (!ISBIN(and, opAND
)) goto fail
;
6302 xor = STEP(RR(and));
6304 if (!ISBIN(xor, opXOR
)) goto fail
;
6308 if (eqIRAtom(a1
, a2bL
) && !eqIRAtom(a1
, a2bR
)) {
6314 if (eqIRAtom(a1
, a2bR
) && !eqIRAtom(a1
, a2bL
)) {
6331 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
6332 variants thereof generated by switching arguments around), return
6333 the IRExpr* for (a & ~c) | (b & c). Else return NULL. */
6334 static IRExpr
* do_XOR_TRANSFORMS_IRExpr ( IRExpr
** env
, IRExpr
* e
)
6336 if (e
->tag
!= Iex_Binop
)
6339 const HChar
* tyNm
= NULL
;
6340 IROp opOR
= Iop_INVALID
;
6341 IROp opAND
= Iop_INVALID
;
6342 IROp opNOT
= Iop_INVALID
;
6343 IROp opXOR
= Iop_INVALID
;
6344 switch (e
->Iex
.Binop
.op
) {
6347 opOR
= Iop_Or32
; opAND
= Iop_And32
;
6348 opNOT
= Iop_Not32
; opXOR
= Iop_Xor32
;
6352 opOR
= Iop_Or16
; opAND
= Iop_And16
;
6353 opNOT
= Iop_Not16
; opXOR
= Iop_Xor16
;
6357 opOR
= Iop_Or8
; opAND
= Iop_And8
;
6358 opNOT
= Iop_Not8
; opXOR
= Iop_Xor8
;
6367 UInt variant
= spotBitfieldAssignment(&aa
, &bb
, &cc
, env
, e
, opAND
, opXOR
);
6369 static UInt ctr
= 0;
6371 vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
6372 "type %s, variant %u\n",
6373 ++ctr
, tyNm
, variant
);
6374 /* It's vitally important that the returned aa, bb and cc are
6375 atoms -- either constants or tmps. If it's anything else
6376 (eg, a GET) then incorporating them in a tree at this point
6377 in the SB may erroneously pull them forwards (eg of a PUT
6378 that originally was after the GET) and so transform the IR
6379 wrongly. spotBitfieldAssignment should guarantee only to
6380 give us atoms, but we check here anyway. */
6381 vassert(aa
&& isIRAtom(aa
));
6382 vassert(bb
&& isIRAtom(bb
));
6383 vassert(cc
&& isIRAtom(cc
));
6384 return IRExpr_Binop(
6386 IRExpr_Binop(opAND
, aa
, IRExpr_Unop(opNOT
, cc
)),
6387 IRExpr_Binop(opAND
, bb
, cc
)
6394 /* SB is modified in-place. Visit all the IRExprs and, for those
6395 which are allowed to be non-atomic, perform the XOR transform if
6396 possible. This makes |sb| be non-flat, but that's ok, the caller
6397 can re-flatten it. Returns True iff any changes were made. */
6398 static Bool
do_XOR_TRANSFORM_IRSB ( IRSB
* sb
)
6401 Bool changed
= False
;
6403 /* Make the tmp->expr environment, so we can use it for
6404 chasing expressions. */
6405 Int n_tmps
= sb
->tyenv
->types_used
;
6406 IRExpr
** env
= LibVEX_Alloc_inline(n_tmps
* sizeof(IRExpr
*));
6407 for (i
= 0; i
< n_tmps
; i
++)
6410 for (i
= 0; i
< sb
->stmts_used
; i
++) {
6411 IRStmt
* st
= sb
->stmts
[i
];
6412 if (st
->tag
!= Ist_WrTmp
)
6414 IRTemp t
= st
->Ist
.WrTmp
.tmp
;
6415 vassert(t
< n_tmps
);
6416 env
[t
] = st
->Ist
.WrTmp
.data
;
6419 for (i
= 0; i
< sb
->stmts_used
; i
++) {
6420 IRStmt
* st
= sb
->stmts
[i
];
6424 vassert(isIRAtom(st
->Ist
.AbiHint
.base
));
6425 vassert(isIRAtom(st
->Ist
.AbiHint
.nia
));
6428 vassert(isIRAtom(st
->Ist
.Put
.data
));
6431 IRPutI
* puti
= st
->Ist
.PutI
.details
;
6432 vassert(isIRAtom(puti
->ix
));
6433 vassert(isIRAtom(puti
->data
));
6437 /* This is the one place where an expr (st->Ist.WrTmp.data) is
6438 allowed to be more than just a constant or a tmp. */
6440 = do_XOR_TRANSFORMS_IRExpr(env
, st
->Ist
.WrTmp
.data
);
6443 st
->Ist
.WrTmp
.data
= mb_new_data
;
6450 vassert(isIRAtom(st
->Ist
.Store
.addr
));
6451 vassert(isIRAtom(st
->Ist
.Store
.data
));
6454 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
6455 vassert(isIRAtom(sg
->addr
));
6456 vassert(isIRAtom(sg
->data
));
6457 vassert(isIRAtom(sg
->guard
));
6461 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
6462 vassert(isIRAtom(lg
->addr
));
6463 vassert(isIRAtom(lg
->alt
));
6464 vassert(isIRAtom(lg
->guard
));
6468 IRCAS
* cas
= st
->Ist
.CAS
.details
;
6469 vassert(isIRAtom(cas
->addr
));
6470 vassert(cas
->expdHi
== NULL
|| isIRAtom(cas
->expdHi
));
6471 vassert(isIRAtom(cas
->expdLo
));
6472 vassert(cas
->dataHi
== NULL
|| isIRAtom(cas
->dataHi
));
6473 vassert(isIRAtom(cas
->dataLo
));
6477 vassert(isIRAtom(st
->Ist
.LLSC
.addr
));
6478 if (st
->Ist
.LLSC
.storedata
)
6479 vassert(isIRAtom(st
->Ist
.LLSC
.storedata
));
6482 IRDirty
* d
= st
->Ist
.Dirty
.details
;
6483 if (d
->mFx
!= Ifx_None
) {
6484 vassert(isIRAtom(d
->mAddr
));
6486 vassert(isIRAtom(d
->guard
));
6487 for (Int j
= 0; d
->args
[j
]; j
++) {
6488 IRExpr
* arg
= d
->args
[j
];
6489 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
))) {
6490 vassert(isIRAtom(arg
));
6500 vassert(isIRAtom(st
->Ist
.Exit
.guard
));
6503 vex_printf("\n"); ppIRStmt(st
);
6504 vpanic("do_XOR_TRANSFORMS_IRSB");
6508 vassert(isIRAtom(sb
->next
));
6513 static IRSB
* do_MSVC_HACKS ( IRSB
* sb
)
6515 // Normalise as much as we can. This is the one-and-only place
6516 // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
6517 Bool any_cse_changes
= do_cse_BB( sb
, True
/*allowLoadsToBeCSEd*/ );
6518 if (any_cse_changes
) {
6519 // CSEing might have created dead code. Remove it.
6520 sb
= cprop_BB ( sb
);
6524 // Visit all atoms, do the transformation proper. bb is modified
6526 Bool changed
= do_XOR_TRANSFORM_IRSB(sb
);
6529 // The transformation generates non-flat expressions, so we now
6530 // need to re-flatten the block.
6531 sb
= flatten_BB(sb
);
6538 /*---------------------------------------------------------------*/
6539 /*--- iropt main ---*/
6540 /*---------------------------------------------------------------*/
6542 static Bool iropt_verbose
= False
; /* True; */
6545 /* Do a simple cleanup pass on bb. This is: redundant Get removal,
6546 redundant Put removal, constant propagation, dead code removal,
6547 clean helper specialisation, and dead code removal (again).
6552 IRSB
* cheap_transformations (
6554 IRExpr
* (*specHelper
) (const HChar
*, IRExpr
**, IRStmt
**, Int
),
6555 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
6556 VexRegisterUpdates pxControl
6559 redundant_get_removal_BB ( bb
);
6560 if (iropt_verbose
) {
6561 vex_printf("\n========= REDUNDANT GET\n\n" );
6565 if (pxControl
< VexRegUpdAllregsAtEachInsn
) {
6566 redundant_put_removal_BB ( bb
, preciseMemExnsFn
, pxControl
);
6568 if (iropt_verbose
) {
6569 vex_printf("\n========= REDUNDANT PUT\n\n" );
6573 bb
= cprop_BB ( bb
);
6574 if (iropt_verbose
) {
6575 vex_printf("\n========= CPROPD\n\n" );
6579 do_deadcode_BB ( bb
);
6580 if (iropt_verbose
) {
6581 vex_printf("\n========= DEAD\n\n" );
6585 bb
= spec_helpers_BB ( bb
, specHelper
);
6586 do_deadcode_BB ( bb
);
6587 if (iropt_verbose
) {
6588 vex_printf("\n========= SPECd \n\n" );
6596 /* Do some more expensive transformations on bb, which are aimed at
6597 optimising as much as possible in the presence of GetI and PutI. */
6600 IRSB
* expensive_transformations( IRSB
* bb
, VexRegisterUpdates pxControl
)
6602 (void)do_cse_BB( bb
, False
/*!allowLoadsToBeCSEd*/ );
6603 collapse_AddSub_chains_BB( bb
);
6604 do_redundant_GetI_elimination( bb
);
6605 if (pxControl
< VexRegUpdAllregsAtEachInsn
) {
6606 do_redundant_PutI_elimination( bb
, pxControl
);
6608 do_deadcode_BB( bb
);
6613 /* Scan a flattened BB to look for signs that more expensive
6614 optimisations might be useful:
6615 - find out if there are any GetIs and PutIs
6616 - find out if there are any floating or vector-typed temporaries
6619 static void considerExpensives ( /*OUT*/Bool
* hasGetIorPutI
,
6620 /*OUT*/Bool
* hasVorFtemps
,
6628 *hasGetIorPutI
= False
;
6629 *hasVorFtemps
= False
;
6631 for (i
= 0; i
< bb
->stmts_used
; i
++) {
6635 vassert(isIRAtom(st
->Ist
.AbiHint
.base
));
6636 vassert(isIRAtom(st
->Ist
.AbiHint
.nia
));
6639 *hasGetIorPutI
= True
;
6642 if (st
->Ist
.WrTmp
.data
->tag
== Iex_GetI
)
6643 *hasGetIorPutI
= True
;
6644 switch (typeOfIRTemp(bb
->tyenv
, st
->Ist
.WrTmp
.tmp
)) {
6645 case Ity_I1
: case Ity_I8
: case Ity_I16
:
6646 case Ity_I32
: case Ity_I64
: case Ity_I128
:
6648 case Ity_F16
: case Ity_F32
: case Ity_F64
: case Ity_F128
:
6649 case Ity_V128
: case Ity_V256
:
6650 *hasVorFtemps
= True
;
6652 case Ity_D32
: case Ity_D64
: case Ity_D128
:
6653 *hasVorFtemps
= True
;
6660 vassert(isIRAtom(st
->Ist
.Put
.data
));
6663 vassert(isIRAtom(st
->Ist
.Store
.addr
));
6664 vassert(isIRAtom(st
->Ist
.Store
.data
));
6667 IRStoreG
* sg
= st
->Ist
.StoreG
.details
;
6668 vassert(isIRAtom(sg
->addr
));
6669 vassert(isIRAtom(sg
->data
));
6670 vassert(isIRAtom(sg
->guard
));
6674 IRLoadG
* lg
= st
->Ist
.LoadG
.details
;
6675 vassert(isIRAtom(lg
->addr
));
6676 vassert(isIRAtom(lg
->alt
));
6677 vassert(isIRAtom(lg
->guard
));
6681 cas
= st
->Ist
.CAS
.details
;
6682 vassert(isIRAtom(cas
->addr
));
6683 vassert(cas
->expdHi
== NULL
|| isIRAtom(cas
->expdHi
));
6684 vassert(isIRAtom(cas
->expdLo
));
6685 vassert(cas
->dataHi
== NULL
|| isIRAtom(cas
->dataHi
));
6686 vassert(isIRAtom(cas
->dataLo
));
6689 vassert(isIRAtom(st
->Ist
.LLSC
.addr
));
6690 if (st
->Ist
.LLSC
.storedata
)
6691 vassert(isIRAtom(st
->Ist
.LLSC
.storedata
));
6694 d
= st
->Ist
.Dirty
.details
;
6695 vassert(isIRAtom(d
->guard
));
6696 for (j
= 0; d
->args
[j
]; j
++) {
6697 IRExpr
* arg
= d
->args
[j
];
6698 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg
)))
6699 vassert(isIRAtom(arg
));
6701 if (d
->mFx
!= Ifx_None
)
6702 vassert(isIRAtom(d
->mAddr
));
6709 vassert(isIRAtom(st
->Ist
.Exit
.guard
));
6714 vpanic("considerExpensives");
6720 /* ---------------- The main iropt entry point. ---------------- */
6722 /* exported from this file */
6723 /* Rules of the game:
6725 - IRExpr/IRStmt trees should be treated as immutable, as they
6726 may get shared. So never change a field of such a tree node;
6727 instead construct and return a new one if needed.
6731 IRExpr
* (*specHelper
) (const HChar
*, IRExpr
**, IRStmt
**, Int
),
6732 Bool (*preciseMemExnsFn
)(Int
,Int
,VexRegisterUpdates
),
6733 VexRegisterUpdates pxControl
,
6738 static Int n_total
= 0;
6739 static Int n_expensive
= 0;
6741 Bool hasGetIorPutI
, hasVorFtemps
;
6745 /* Flatness: this function assumes that the incoming block is already flat.
6746 That's because all blocks that arrive here should already have been
6747 processed by do_minimal_initial_iropt_BB. And that will have flattened
6749 // FIXME Remove this assertion once the 'grail' machinery seems stable
6750 // FIXME2 The TOC-redirect-hacks generators in m_translate.c -- gen_PUSH()
6751 // and gen_PO() -- don't generate flat IR, and so cause this assertion
6752 // to fail. For the time being, hack around this by flattening,
6753 // rather than asserting for flatness, on the afflicted platforms.
6754 // This is a kludge, yes.
6755 if (guest_arch
== VexArchPPC64
) {
6756 bb0
= flatten_BB(bb0
); // Kludge!
6758 vassert(isFlatIRSB(bb0
)); // How it Really Should Be (tm).
6761 /* If at level 0, stop now. */
6762 if (vex_control
.iropt_level
<= 0) return bb0
;
6764 /* Now do a preliminary cleanup pass, and figure out if we also
6765 need to do 'expensive' optimisations. Expensive optimisations
6766 are deemed necessary if the block contains any GetIs or PutIs.
6767 If needed, do expensive transformations and then another cheap
6770 IRSB
* bb
= cheap_transformations( bb0
, specHelper
,
6771 preciseMemExnsFn
, pxControl
);
6773 if (guest_arch
== VexArchARM
) {
6774 /* Translating Thumb2 code produces a lot of chaff. We have to
6775 work extra hard to get rid of it. */
6777 bb
= spec_helpers_BB ( bb
, specHelper
);
6778 if (pxControl
< VexRegUpdAllregsAtEachInsn
) {
6779 redundant_put_removal_BB ( bb
, preciseMemExnsFn
, pxControl
);
6781 do_cse_BB( bb
, False
/*!allowLoadsToBeCSEd*/ );
6782 do_deadcode_BB( bb
);
6785 if (vex_control
.iropt_level
> 1) {
6787 /* Peer at what we have, to decide how much more effort to throw
6789 considerExpensives( &hasGetIorPutI
, &hasVorFtemps
, bb
);
6791 if (hasVorFtemps
&& !hasGetIorPutI
) {
6792 /* If any evidence of FP or Vector activity, CSE, as that
6793 tends to mop up all manner of lardy code to do with
6794 rounding modes. Don't bother if hasGetIorPutI since that
6795 case leads into the expensive transformations, which do
6797 (void)do_cse_BB( bb
, False
/*!allowLoadsToBeCSEd*/ );
6798 do_deadcode_BB( bb
);
6801 if (hasGetIorPutI
) {
6805 vex_printf("***** EXPENSIVE %d %d\n", n_total
, n_expensive
);
6806 bb
= expensive_transformations( bb
, pxControl
);
6807 bb
= cheap_transformations( bb
, specHelper
,
6808 preciseMemExnsFn
, pxControl
);
6809 /* Potentially common up GetIs */
6810 cses
= do_cse_BB( bb
, False
/*!allowLoadsToBeCSEd*/ );
6812 bb
= cheap_transformations( bb
, specHelper
,
6813 preciseMemExnsFn
, pxControl
);
6816 ///////////////////////////////////////////////////////////
6817 // BEGIN MSVC optimised code transformation hacks
6819 bb
= do_MSVC_HACKS(bb
);
6820 // END MSVC optimised code transformation hacks
6821 ///////////////////////////////////////////////////////////
6823 /* Now have a go at unrolling simple (single-BB) loops. If
6824 successful, clean up the results as much as possible. */
6826 IRSB
* bb2
= maybe_loop_unroll_BB( bb
, guest_addr
);
6828 bb
= cheap_transformations( bb2
, specHelper
,
6829 preciseMemExnsFn
, pxControl
);
6830 if (hasGetIorPutI
) {
6831 bb
= expensive_transformations( bb
, pxControl
);
6832 bb
= cheap_transformations( bb
, specHelper
,
6833 preciseMemExnsFn
, pxControl
);
6835 /* at least do CSE and dead code removal */
6836 do_cse_BB( bb
, False
/*!allowLoadsToBeCSEd*/ );
6837 do_deadcode_BB( bb
);
6839 if (0) vex_printf("vex iropt: unrolled a loop\n");
6847 IRSB
* do_minimal_initial_iropt_BB(IRSB
* bb0
) {
6848 /* First flatten the block out, since all other phases assume flat code. */
6849 IRSB
* bb
= flatten_BB ( bb0
);
6851 if (iropt_verbose
) {
6852 vex_printf("\n========= FLAT\n\n" );
6856 // Remove redundant GETs
6857 redundant_get_removal_BB ( bb
);
6859 // Do minimal constant prop: copy prop and constant prop only. No folding.
6860 // JRS FIXME 2019Nov25: this is too weak to be effective on arm32. For that,
6861 // specifying doFolding=True makes a huge difference.
6862 bb
= cprop_BB_WRK ( bb
, /*mustRetainNoOps=*/True
,
6863 /*doFolding=*/False
);
6865 // Minor tidying of the block end, to remove a redundant Put of the IP right
6868 ------ IMark(0x401FEC9, 2, 0) ------
6870 t19 = amd64g_calculate_condition[mcx=0x13]{0x58155130}(0x4:I64,0x5:I64,..
6872 if (t14) { PUT(184) = 0x401FED6:I64; exit-Boring }
6873 PUT(184) = 0x401FECB:I64 <--------------------------------
6874 PUT(184) = 0x401FECB:I64; exit-Boring
6876 if (bb
->stmts_used
> 0) {
6877 const IRStmt
* last
= bb
->stmts
[bb
->stmts_used
- 1];
6878 if (last
->tag
== Ist_Put
&& last
->Ist
.Put
.offset
== bb
->offsIP
6879 && eqIRAtom(last
->Ist
.Put
.data
, bb
->next
)) {
6887 /* Copy the contents of |src| to the end of |dst|. This entails fixing up the
6888 tmp numbers in |src| accordingly. The final destination of |dst| is thrown
6889 away and replaced by the final destination of |src|. This function doesn't
6890 make any assessment of whether it's meaningful or valid to concatenate the
6891 two IRSBs; it just *does* the concatenation. */
6892 void concatenate_irsbs ( IRSB
* dst
, IRSB
* src
)
6894 // FIXME this is almost identical to code at the end of maybe_unroll_loop_BB.
6895 // Maybe try to common it up.
6896 Int delta
= dst
->tyenv
->types_used
;
6897 for (Int i
= 0; i
< src
->tyenv
->types_used
; i
++) {
6898 (void)newIRTemp(dst
->tyenv
, src
->tyenv
->types
[i
]);
6901 for (Int i
= 0; i
< src
->stmts_used
; i
++) {
6902 IRStmt
* s
= deepCopyIRStmt(src
->stmts
[i
]);
6903 deltaIRStmt(s
, delta
);
6904 addStmtToIRSB(dst
, s
);
6906 deltaIRExpr(src
->next
, delta
);
6908 dst
->next
= src
->next
;
6909 dst
->jumpkind
= src
->jumpkind
;
6910 vassert(dst
->offsIP
== src
->offsIP
);
6913 /*---------------------------------------------------------------*/
6914 /*--- end ir_opt.c ---*/
6915 /*---------------------------------------------------------------*/