regtest: add check for -Wl,--no-warn-execstack
[valgrind.git] / VEX / priv / ir_opt.c
blob6565378654b2bdb64c3f66516a2d72575445d76d
1 /* -*- mode: C; c-basic-offset: 3; -*- */
3 /*---------------------------------------------------------------*/
4 /*--- begin ir_opt.c ---*/
5 /*---------------------------------------------------------------*/
7 /*
8 This file is part of Valgrind, a dynamic binary instrumentation
9 framework.
11 Copyright (C) 2004-2017 OpenWorks LLP
12 info@open-works.net
14 This program is free software; you can redistribute it and/or
15 modify it under the terms of the GNU General Public License as
16 published by the Free Software Foundation; either version 2 of the
17 License, or (at your option) any later version.
19 This program is distributed in the hope that it will be useful, but
20 WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
22 General Public License for more details.
24 You should have received a copy of the GNU General Public License
25 along with this program; if not, see <http://www.gnu.org/licenses/>.
27 The GNU General Public License is contained in the file COPYING.
29 Neither the names of the U.S. Department of Energy nor the
30 University of California nor the names of its contributors may be
31 used to endorse or promote products derived from this software
32 without prior written permission.
35 #include "libvex_basictypes.h"
36 #include "libvex_ir.h"
37 #include "libvex.h"
39 #include "main_util.h"
40 #include "main_globals.h"
41 #include "ir_opt.h"
44 /* Set to 1 for lots of debugging output. */
45 #define DEBUG_IROPT 0
47 /* Set to 1 to gather some statistics. Currently only for sameIRExprs. */
48 #define STATS_IROPT 0
51 /* What iropt does, 29 Dec 04.
53 It takes an IRSB and produces a new one with the same meaning,
54 defined thus:
56 After execution of the new BB, all guest state and guest memory is
57 the same as after execution of the original. This is true
58 regardless of how the block was exited (at the end vs side exit).
60 In addition, parts of the guest state will be identical to that
61 created by execution of the original at the following observation
62 points:
64 * In a dirty helper call, any parts of the guest state that the
65 helper states that it reads or modifies will be up to date.
66 Also, guest memory will be up to date. Parts of the guest state
67 not marked as being read or modified by the helper cannot be
68 assumed to be up-to-date at the point where the helper is called.
70 * If iropt_register_updates == VexRegUpdSpAtMemAccess :
71 The guest state is only up to date only as explained above
72 (i.e. at SB exits and as specified by dirty helper call).
73 Also, the stack pointer register is up to date at memory
74 exception points (as this is needed for the stack extension
75 logic in m_signals.c).
77 * If iropt_register_updates == VexRegUpdUnwindregsAtMemAccess :
78 Immediately prior to any load or store, those parts of the guest
79 state marked as requiring precise exceptions will be up to date.
80 Also, guest memory will be up to date. Parts of the guest state
81 not marked as requiring precise exceptions cannot be assumed to
82 be up-to-date at the point of the load/store.
84 * If iropt_register_updates == VexRegUpdAllregsAtMemAccess:
85 Same as minimal, but all the guest state is up to date at memory
86 exception points.
88 * If iropt_register_updates == VexRegUpdAllregsAtEachInsn :
89 Guest state is up to date at each instruction.
91 The relative order of loads and stores (including loads/stores of
92 guest memory done by dirty helpers annotated as such) is not
93 changed. However, the relative order of loads with no intervening
94 stores/modifies may be changed.
96 Transformation order
97 ~~~~~~~~~~~~~~~~~~~~
99 There are three levels of optimisation, controlled by
100 vex_control.iropt_level. Define first:
102 "Cheap transformations" are the following sequence:
103 * Redundant-Get removal
104 * Redundant-Put removal
105 * Constant propagation/folding
106 * Dead code removal
107 * Specialisation of clean helper functions
108 * Dead code removal
110 "Expensive transformations" are the following sequence:
111 * CSE
112 * Folding of add/sub chains
113 * Redundant-GetI removal
114 * Redundant-PutI removal
115 * Dead code removal
117 Then the transformations are as follows, as defined by
118 vex_control.iropt_level:
120 Level 0:
121 * Flatten into atomic form.
123 Level 1: the following sequence:
124 * Flatten into atomic form.
125 * Cheap transformations.
127 Level 2: the following sequence
128 * Flatten into atomic form.
129 * Cheap transformations.
130 * If block contains any floating or vector types, CSE.
131 * If block contains GetI or PutI, Expensive transformations.
132 * Try unrolling loops. Three possible outcomes:
133 - No effect: do nothing more.
134 - Unrolled a loop, and block does not contain GetI or PutI:
135 Do: * CSE
136 * Dead code removal
137 - Unrolled a loop, and block contains GetI or PutI:
138 Do: * Expensive transformations
139 * Cheap transformations
142 /* Implementation notes, 29 Dec 04.
144 TODO (important): I think rPutI removal ignores precise exceptions
145 and is therefore in a sense, wrong. In the sense that PutIs are
146 assumed not to write parts of the guest state that we need to have
147 up-to-date at loads/stores. So far on x86 guest that has not
148 mattered since indeed only the x87 FP registers and tags are
149 accessed using GetI/PutI, and there is no need so far for them to
150 be up to date at mem exception points. The rPutI pass should be
151 fixed.
153 TODO: improve pessimistic handling of precise exceptions
154 in the tree builder.
156 TODO: check interaction of rGetI and dirty helpers.
158 F64i constants are treated differently from other constants.
159 They are not regarded as atoms, and instead lifted off and
160 bound to temps. This allows them to participate in CSE, which
161 is important for getting good performance for x86 guest code.
163 CSE up F64 literals (already doing F64is)
165 CSE: consider carefully the requirement for precise exns
166 prior to making CSE any more aggressive. */
169 /*---------------------------------------------------------------*/
170 /*--- Finite mappery, of a sort ---*/
171 /*---------------------------------------------------------------*/
173 /* General map from HWord-sized thing HWord-sized thing. Could be by
174 hashing, but it's not clear whether or not this would really be any
175 faster. */
177 typedef
178 struct {
179 Bool* inuse;
180 HWord* key;
181 HWord* val;
182 Int size;
183 Int used;
185 HashHW;
187 static HashHW* newHHW ( void )
189 HashHW* h = LibVEX_Alloc_inline(sizeof(HashHW));
190 h->size = 8;
191 h->used = 0;
192 h->inuse = LibVEX_Alloc_inline(h->size * sizeof(Bool));
193 h->key = LibVEX_Alloc_inline(h->size * sizeof(HWord));
194 h->val = LibVEX_Alloc_inline(h->size * sizeof(HWord));
195 return h;
199 /* Look up key in the map. */
201 static Bool lookupHHW ( HashHW* h, /*OUT*/HWord* val, HWord key )
203 Int i;
204 /* vex_printf("lookupHHW(%llx)\n", key ); */
205 for (i = 0; i < h->used; i++) {
206 if (h->inuse[i] && h->key[i] == key) {
207 if (val)
208 *val = h->val[i];
209 return True;
212 return False;
216 /* Add key->val to the map. Replaces any existing binding for key. */
218 static void addToHHW ( HashHW* h, HWord key, HWord val )
220 Int i, j;
221 /* vex_printf("addToHHW(%llx, %llx)\n", key, val); */
223 /* Find and replace existing binding, if any. */
224 for (i = 0; i < h->used; i++) {
225 if (h->inuse[i] && h->key[i] == key) {
226 h->val[i] = val;
227 return;
231 /* Ensure a space is available. */
232 if (h->used == h->size) {
233 /* Copy into arrays twice the size. */
234 Bool* inuse2 = LibVEX_Alloc_inline(2 * h->size * sizeof(Bool));
235 HWord* key2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
236 HWord* val2 = LibVEX_Alloc_inline(2 * h->size * sizeof(HWord));
237 for (i = j = 0; i < h->size; i++) {
238 if (!h->inuse[i]) continue;
239 inuse2[j] = True;
240 key2[j] = h->key[i];
241 val2[j] = h->val[i];
242 j++;
244 h->used = j;
245 h->size *= 2;
246 h->inuse = inuse2;
247 h->key = key2;
248 h->val = val2;
251 /* Finally, add it. */
252 vassert(h->used < h->size);
253 h->inuse[h->used] = True;
254 h->key[h->used] = key;
255 h->val[h->used] = val;
256 h->used++;
260 /*---------------------------------------------------------------*/
261 /*--- Flattening out a BB into atomic SSA form ---*/
262 /*---------------------------------------------------------------*/
264 /* Non-critical helper, heuristic for reducing the number of tmp-tmp
265 copies made by flattening. If in doubt return False. */
267 static Bool isFlat ( IRExpr* e )
269 if (e->tag == Iex_Get)
270 return True;
271 if (e->tag == Iex_Binop)
272 return toBool( isIRAtom(e->Iex.Binop.arg1)
273 && isIRAtom(e->Iex.Binop.arg2) );
274 if (e->tag == Iex_Load)
275 return isIRAtom(e->Iex.Load.addr);
276 return False;
279 /* Flatten out 'ex' so it is atomic, returning a new expression with
280 the same value, after having appended extra IRTemp assignments to
281 the end of 'bb'. */
283 static IRExpr* flatten_Expr ( IRSB* bb, IRExpr* ex )
285 Int i;
286 IRExpr** newargs;
287 IRType ty = typeOfIRExpr(bb->tyenv, ex);
288 IRTemp t1;
290 switch (ex->tag) {
292 case Iex_GetI:
293 t1 = newIRTemp(bb->tyenv, ty);
294 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
295 IRExpr_GetI(ex->Iex.GetI.descr,
296 flatten_Expr(bb, ex->Iex.GetI.ix),
297 ex->Iex.GetI.bias)));
298 return IRExpr_RdTmp(t1);
300 case Iex_Get:
301 t1 = newIRTemp(bb->tyenv, ty);
302 addStmtToIRSB(bb,
303 IRStmt_WrTmp(t1, ex));
304 return IRExpr_RdTmp(t1);
306 case Iex_Qop: {
307 IRQop* qop = ex->Iex.Qop.details;
308 t1 = newIRTemp(bb->tyenv, ty);
309 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
310 IRExpr_Qop(qop->op,
311 flatten_Expr(bb, qop->arg1),
312 flatten_Expr(bb, qop->arg2),
313 flatten_Expr(bb, qop->arg3),
314 flatten_Expr(bb, qop->arg4))));
315 return IRExpr_RdTmp(t1);
318 case Iex_Triop: {
319 IRTriop* triop = ex->Iex.Triop.details;
320 t1 = newIRTemp(bb->tyenv, ty);
321 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
322 IRExpr_Triop(triop->op,
323 flatten_Expr(bb, triop->arg1),
324 flatten_Expr(bb, triop->arg2),
325 flatten_Expr(bb, triop->arg3))));
326 return IRExpr_RdTmp(t1);
329 case Iex_Binop:
330 t1 = newIRTemp(bb->tyenv, ty);
331 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
332 IRExpr_Binop(ex->Iex.Binop.op,
333 flatten_Expr(bb, ex->Iex.Binop.arg1),
334 flatten_Expr(bb, ex->Iex.Binop.arg2))));
335 return IRExpr_RdTmp(t1);
337 case Iex_Unop:
338 t1 = newIRTemp(bb->tyenv, ty);
339 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
340 IRExpr_Unop(ex->Iex.Unop.op,
341 flatten_Expr(bb, ex->Iex.Unop.arg))));
342 return IRExpr_RdTmp(t1);
344 case Iex_Load:
345 t1 = newIRTemp(bb->tyenv, ty);
346 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
347 IRExpr_Load(ex->Iex.Load.end,
348 ex->Iex.Load.ty,
349 flatten_Expr(bb, ex->Iex.Load.addr))));
350 return IRExpr_RdTmp(t1);
352 case Iex_CCall:
353 newargs = shallowCopyIRExprVec(ex->Iex.CCall.args);
354 for (i = 0; newargs[i]; i++)
355 newargs[i] = flatten_Expr(bb, newargs[i]);
356 t1 = newIRTemp(bb->tyenv, ty);
357 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
358 IRExpr_CCall(ex->Iex.CCall.cee,
359 ex->Iex.CCall.retty,
360 newargs)));
361 return IRExpr_RdTmp(t1);
363 case Iex_ITE:
364 t1 = newIRTemp(bb->tyenv, ty);
365 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
366 IRExpr_ITE(flatten_Expr(bb, ex->Iex.ITE.cond),
367 flatten_Expr(bb, ex->Iex.ITE.iftrue),
368 flatten_Expr(bb, ex->Iex.ITE.iffalse))));
369 return IRExpr_RdTmp(t1);
371 case Iex_Const:
372 /* Lift F64i constants out onto temps so they can be CSEd
373 later. */
374 if (ex->Iex.Const.con->tag == Ico_F64i) {
375 t1 = newIRTemp(bb->tyenv, ty);
376 addStmtToIRSB(bb, IRStmt_WrTmp(t1,
377 IRExpr_Const(ex->Iex.Const.con)));
378 return IRExpr_RdTmp(t1);
379 } else {
380 /* Leave all other constants alone. */
381 return ex;
384 case Iex_RdTmp:
385 return ex;
387 default:
388 vex_printf("\n");
389 ppIRExpr(ex);
390 vex_printf("\n");
391 vpanic("flatten_Expr");
396 /* Append a completely flattened form of 'st' to the end of 'bb'. */
398 static void flatten_Stmt ( IRSB* bb, IRStmt* st )
400 Int i;
401 IRExpr *e1, *e2, *e3, *e4, *e5;
402 IRDirty *d, *d2;
403 IRCAS *cas, *cas2;
404 IRPutI *puti, *puti2;
405 IRLoadG *lg;
406 IRStoreG *sg;
407 switch (st->tag) {
408 case Ist_Put:
409 if (isIRAtom(st->Ist.Put.data)) {
410 /* optimisation to reduce the amount of heap wasted
411 by the flattener */
412 addStmtToIRSB(bb, st);
413 } else {
414 /* general case, always correct */
415 e1 = flatten_Expr(bb, st->Ist.Put.data);
416 addStmtToIRSB(bb, IRStmt_Put(st->Ist.Put.offset, e1));
418 break;
419 case Ist_PutI:
420 puti = st->Ist.PutI.details;
421 e1 = flatten_Expr(bb, puti->ix);
422 e2 = flatten_Expr(bb, puti->data);
423 puti2 = mkIRPutI(puti->descr, e1, puti->bias, e2);
424 addStmtToIRSB(bb, IRStmt_PutI(puti2));
425 break;
426 case Ist_WrTmp:
427 if (isFlat(st->Ist.WrTmp.data)) {
428 /* optimisation, to reduce the number of tmp-tmp
429 copies generated */
430 addStmtToIRSB(bb, st);
431 } else {
432 /* general case, always correct */
433 e1 = flatten_Expr(bb, st->Ist.WrTmp.data);
434 addStmtToIRSB(bb, IRStmt_WrTmp(st->Ist.WrTmp.tmp, e1));
436 break;
437 case Ist_Store:
438 e1 = flatten_Expr(bb, st->Ist.Store.addr);
439 e2 = flatten_Expr(bb, st->Ist.Store.data);
440 addStmtToIRSB(bb, IRStmt_Store(st->Ist.Store.end, e1,e2));
441 break;
442 case Ist_StoreG:
443 sg = st->Ist.StoreG.details;
444 e1 = flatten_Expr(bb, sg->addr);
445 e2 = flatten_Expr(bb, sg->data);
446 e3 = flatten_Expr(bb, sg->guard);
447 addStmtToIRSB(bb, IRStmt_StoreG(sg->end, e1, e2, e3));
448 break;
449 case Ist_LoadG:
450 lg = st->Ist.LoadG.details;
451 e1 = flatten_Expr(bb, lg->addr);
452 e2 = flatten_Expr(bb, lg->alt);
453 e3 = flatten_Expr(bb, lg->guard);
454 addStmtToIRSB(bb, IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
455 e1, e2, e3));
456 break;
457 case Ist_CAS:
458 cas = st->Ist.CAS.details;
459 e1 = flatten_Expr(bb, cas->addr);
460 e2 = cas->expdHi ? flatten_Expr(bb, cas->expdHi) : NULL;
461 e3 = flatten_Expr(bb, cas->expdLo);
462 e4 = cas->dataHi ? flatten_Expr(bb, cas->dataHi) : NULL;
463 e5 = flatten_Expr(bb, cas->dataLo);
464 cas2 = mkIRCAS( cas->oldHi, cas->oldLo, cas->end,
465 e1, e2, e3, e4, e5 );
466 addStmtToIRSB(bb, IRStmt_CAS(cas2));
467 break;
468 case Ist_LLSC:
469 e1 = flatten_Expr(bb, st->Ist.LLSC.addr);
470 e2 = st->Ist.LLSC.storedata
471 ? flatten_Expr(bb, st->Ist.LLSC.storedata)
472 : NULL;
473 addStmtToIRSB(bb, IRStmt_LLSC(st->Ist.LLSC.end,
474 st->Ist.LLSC.result, e1, e2));
475 break;
476 case Ist_Dirty:
477 d = st->Ist.Dirty.details;
478 d2 = emptyIRDirty();
479 *d2 = *d;
480 d2->args = shallowCopyIRExprVec(d2->args);
481 if (d2->mFx != Ifx_None) {
482 d2->mAddr = flatten_Expr(bb, d2->mAddr);
483 } else {
484 vassert(d2->mAddr == NULL);
486 d2->guard = flatten_Expr(bb, d2->guard);
487 for (i = 0; d2->args[i]; i++) {
488 IRExpr* arg = d2->args[i];
489 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
490 d2->args[i] = flatten_Expr(bb, arg);
492 addStmtToIRSB(bb, IRStmt_Dirty(d2));
493 break;
494 case Ist_NoOp:
495 case Ist_MBE:
496 case Ist_IMark:
497 addStmtToIRSB(bb, st);
498 break;
499 case Ist_AbiHint:
500 e1 = flatten_Expr(bb, st->Ist.AbiHint.base);
501 e2 = flatten_Expr(bb, st->Ist.AbiHint.nia);
502 addStmtToIRSB(bb, IRStmt_AbiHint(e1, st->Ist.AbiHint.len, e2));
503 break;
504 case Ist_Exit:
505 e1 = flatten_Expr(bb, st->Ist.Exit.guard);
506 addStmtToIRSB(bb, IRStmt_Exit(e1, st->Ist.Exit.jk,
507 st->Ist.Exit.dst,
508 st->Ist.Exit.offsIP));
509 break;
510 default:
511 vex_printf("\n");
512 ppIRStmt(st);
513 vex_printf("\n");
514 vpanic("flatten_Stmt");
519 static IRSB* flatten_BB ( IRSB* in )
521 Int i;
522 IRSB* out;
523 out = emptyIRSB();
524 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
525 for (i = 0; i < in->stmts_used; i++)
526 if (in->stmts[i])
527 flatten_Stmt( out, in->stmts[i] );
528 out->next = flatten_Expr( out, in->next );
529 out->jumpkind = in->jumpkind;
530 out->offsIP = in->offsIP;
531 return out;
535 /*---------------------------------------------------------------*/
536 /*--- In-place removal of redundant GETs ---*/
537 /*---------------------------------------------------------------*/
539 /* Scan forwards, building up an environment binding (min offset, max
540 offset) pairs to values, which will either be temps or constants.
542 On seeing 't = Get(minoff,maxoff)', look up (minoff,maxoff) in the
543 env and if it matches, replace the Get with the stored value. If
544 there is no match, add a (minoff,maxoff) :-> t binding.
546 On seeing 'Put (minoff,maxoff) = t or c', first remove in the env
547 any binding which fully or partially overlaps with (minoff,maxoff).
548 Then add a new (minoff,maxoff) :-> t or c binding. */
550 /* Extract the min/max offsets from a guest state array descriptor. */
552 inline
553 static void getArrayBounds ( IRRegArray* descr,
554 UInt* minoff, UInt* maxoff )
556 *minoff = descr->base;
557 *maxoff = *minoff + descr->nElems*sizeofIRType(descr->elemTy) - 1;
558 vassert((*minoff & ~0xFFFF) == 0);
559 vassert((*maxoff & ~0xFFFF) == 0);
560 vassert(*minoff <= *maxoff);
563 /* Create keys, of the form ((minoffset << 16) | maxoffset). */
565 static UInt mk_key_GetPut ( Int offset, IRType ty )
567 /* offset should fit in 16 bits. */
568 UInt minoff = offset;
569 UInt maxoff = minoff + sizeofIRType(ty) - 1;
570 vassert((minoff & ~0xFFFF) == 0);
571 vassert((maxoff & ~0xFFFF) == 0);
572 return (minoff << 16) | maxoff;
575 static UInt mk_key_GetIPutI ( IRRegArray* descr )
577 UInt minoff, maxoff;
578 getArrayBounds( descr, &minoff, &maxoff );
579 vassert((minoff & ~0xFFFF) == 0);
580 vassert((maxoff & ~0xFFFF) == 0);
581 return (minoff << 16) | maxoff;
584 /* Supposing h has keys of the form generated by mk_key_GetPut and
585 mk_key_GetIPutI, invalidate any key which overlaps (k_lo
586 .. k_hi).
588 static void invalidateOverlaps ( HashHW* h, UInt k_lo, UInt k_hi )
590 Int j;
591 UInt e_lo, e_hi;
592 vassert(k_lo <= k_hi);
593 /* invalidate any env entries which in any way overlap (k_lo
594 .. k_hi) */
595 /* vex_printf("invalidate %d .. %d\n", k_lo, k_hi ); */
597 for (j = 0; j < h->used; j++) {
598 if (!h->inuse[j])
599 continue;
600 e_lo = (((UInt)h->key[j]) >> 16) & 0xFFFF;
601 e_hi = ((UInt)h->key[j]) & 0xFFFF;
602 vassert(e_lo <= e_hi);
603 if (e_hi < k_lo || k_hi < e_lo)
604 continue; /* no overlap possible */
605 else
606 /* overlap; invalidate */
607 h->inuse[j] = False;
612 static void redundant_get_removal_BB ( IRSB* bb )
614 HashHW* env = newHHW();
615 UInt key = 0; /* keep gcc -O happy */
616 Int i, j;
617 HWord val;
619 for (i = 0; i < bb->stmts_used; i++) {
620 IRStmt* st = bb->stmts[i];
622 if (st->tag == Ist_NoOp)
623 continue;
625 /* Deal with Gets */
626 if (st->tag == Ist_WrTmp
627 && st->Ist.WrTmp.data->tag == Iex_Get) {
628 /* st is 't = Get(...)'. Look up in the environment and see
629 if the Get can be replaced. */
630 IRExpr* get = st->Ist.WrTmp.data;
631 key = (HWord)mk_key_GetPut( get->Iex.Get.offset,
632 get->Iex.Get.ty );
633 if (lookupHHW(env, &val, (HWord)key)) {
634 /* found it */
635 /* Note, we could do better here. If the types are
636 different we don't do the substitution, since doing so
637 could lead to invalidly-typed IR. An improvement would
638 be to stick in a reinterpret-style cast, although that
639 would make maintaining flatness more difficult. */
640 IRExpr* valE = (IRExpr*)val;
641 Bool typesOK = toBool( typeOfIRExpr(bb->tyenv,valE)
642 == st->Ist.WrTmp.data->Iex.Get.ty );
643 if (typesOK && DEBUG_IROPT) {
644 vex_printf("rGET: "); ppIRExpr(get);
645 vex_printf(" -> "); ppIRExpr(valE);
646 vex_printf("\n");
648 if (typesOK)
649 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, valE);
650 } else {
651 /* Not found, but at least we know that t and the Get(...)
652 are now associated. So add a binding to reflect that
653 fact. */
654 addToHHW( env, (HWord)key,
655 (HWord)(void*)(IRExpr_RdTmp(st->Ist.WrTmp.tmp)) );
659 /* Deal with Puts: invalidate any env entries overlapped by this
660 Put */
661 if (st->tag == Ist_Put || st->tag == Ist_PutI) {
662 UInt k_lo, k_hi;
663 if (st->tag == Ist_Put) {
664 key = mk_key_GetPut( st->Ist.Put.offset,
665 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
666 } else {
667 vassert(st->tag == Ist_PutI);
668 key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
671 k_lo = (key >> 16) & 0xFFFF;
672 k_hi = key & 0xFFFF;
673 invalidateOverlaps(env, k_lo, k_hi);
675 else
676 if (st->tag == Ist_Dirty) {
677 /* Deal with dirty helpers which write or modify guest state.
678 Invalidate the entire env. We could do a lot better
679 here. */
680 IRDirty* d = st->Ist.Dirty.details;
681 Bool writes = False;
682 for (j = 0; j < d->nFxState; j++) {
683 if (d->fxState[j].fx == Ifx_Modify
684 || d->fxState[j].fx == Ifx_Write)
685 writes = True;
687 if (writes) {
688 /* dump the entire env (not clever, but correct ...) */
689 for (j = 0; j < env->used; j++)
690 env->inuse[j] = False;
691 if (0) vex_printf("rGET: trash env due to dirty helper\n");
695 /* add this one to the env, if appropriate */
696 if (st->tag == Ist_Put) {
697 vassert(isIRAtom(st->Ist.Put.data));
698 addToHHW( env, (HWord)key, (HWord)(st->Ist.Put.data));
701 } /* for (i = 0; i < bb->stmts_used; i++) */
706 /*---------------------------------------------------------------*/
707 /*--- In-place removal of redundant PUTs ---*/
708 /*---------------------------------------------------------------*/
710 /* Find any Get uses in st and invalidate any partially or fully
711 overlapping ranges listed in env. Due to the flattening phase, the
712 only stmt kind we expect to find a Get on is IRStmt_WrTmp. */
714 static void handle_gets_Stmt (
715 HashHW* env,
716 IRStmt* st,
717 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
718 VexRegisterUpdates pxControl
721 Int j;
722 UInt key = 0; /* keep gcc -O happy */
723 Bool isGet;
724 Bool memRW = False;
725 IRExpr* e;
727 switch (st->tag) {
729 /* This is the only interesting case. Deal with Gets in the RHS
730 expression. */
731 case Ist_WrTmp:
732 e = st->Ist.WrTmp.data;
733 switch (e->tag) {
734 case Iex_Get:
735 isGet = True;
736 key = mk_key_GetPut ( e->Iex.Get.offset, e->Iex.Get.ty );
737 break;
738 case Iex_GetI:
739 isGet = True;
740 key = mk_key_GetIPutI ( e->Iex.GetI.descr );
741 break;
742 case Iex_Load:
743 isGet = False;
744 memRW = True;
745 break;
746 default:
747 isGet = False;
749 if (isGet) {
750 UInt k_lo, k_hi;
751 k_lo = (key >> 16) & 0xFFFF;
752 k_hi = key & 0xFFFF;
753 invalidateOverlaps(env, k_lo, k_hi);
755 break;
757 /* Be very conservative for dirty helper calls; dump the entire
758 environment. The helper might read guest state, in which
759 case it needs to be flushed first. Also, the helper might
760 access guest memory, in which case all parts of the guest
761 state requiring precise exceptions needs to be flushed. The
762 crude solution is just to flush everything; we could easily
763 enough do a lot better if needed. */
764 /* Probably also overly-conservative, but also dump everything
765 if we hit a memory bus event (fence, lock, unlock). Ditto
766 AbiHints, CASs, LLs and SCs. */
767 case Ist_AbiHint:
768 vassert(isIRAtom(st->Ist.AbiHint.base));
769 vassert(isIRAtom(st->Ist.AbiHint.nia));
770 /* fall through */
771 case Ist_MBE:
772 case Ist_Dirty:
773 case Ist_CAS:
774 case Ist_LLSC:
775 for (j = 0; j < env->used; j++)
776 env->inuse[j] = False;
777 break;
779 /* all other cases are boring. */
780 case Ist_Store:
781 vassert(isIRAtom(st->Ist.Store.addr));
782 vassert(isIRAtom(st->Ist.Store.data));
783 memRW = True;
784 break;
785 case Ist_StoreG: {
786 IRStoreG* sg = st->Ist.StoreG.details;
787 vassert(isIRAtom(sg->addr));
788 vassert(isIRAtom(sg->data));
789 vassert(isIRAtom(sg->guard));
790 memRW = True;
791 break;
793 case Ist_LoadG: {
794 IRLoadG* lg = st->Ist.LoadG.details;
795 vassert(isIRAtom(lg->addr));
796 vassert(isIRAtom(lg->alt));
797 vassert(isIRAtom(lg->guard));
798 memRW = True;
799 break;
801 case Ist_Exit:
802 vassert(isIRAtom(st->Ist.Exit.guard));
803 break;
805 case Ist_Put:
806 vassert(isIRAtom(st->Ist.Put.data));
807 break;
809 case Ist_PutI:
810 vassert(isIRAtom(st->Ist.PutI.details->ix));
811 vassert(isIRAtom(st->Ist.PutI.details->data));
812 break;
814 case Ist_NoOp:
815 case Ist_IMark:
816 break;
818 default:
819 vex_printf("\n");
820 ppIRStmt(st);
821 vex_printf("\n");
822 vpanic("handle_gets_Stmt");
825 if (memRW) {
826 /* This statement accesses memory. So we might need to dump all parts
827 of the environment corresponding to guest state that may not
828 be reordered with respect to memory references. That means
829 at least the stack pointer. */
830 switch (pxControl) {
831 case VexRegUpdAllregsAtMemAccess:
832 /* Precise exceptions required at mem access.
833 Flush all guest state. */
834 for (j = 0; j < env->used; j++)
835 env->inuse[j] = False;
836 break;
837 case VexRegUpdSpAtMemAccess:
838 /* We need to dump the stack pointer
839 (needed for stack extension in m_signals.c).
840 preciseMemExnsFn will use vex_control.iropt_register_updates
841 to verify only the sp is to be checked. */
842 /* fallthrough */
843 case VexRegUpdUnwindregsAtMemAccess:
844 for (j = 0; j < env->used; j++) {
845 if (!env->inuse[j])
846 continue;
847 /* Just flush the minimal amount required, as computed by
848 preciseMemExnsFn. */
849 HWord k_lo = (env->key[j] >> 16) & 0xFFFF;
850 HWord k_hi = env->key[j] & 0xFFFF;
851 if (preciseMemExnsFn( k_lo, k_hi, pxControl ))
852 env->inuse[j] = False;
854 break;
855 case VexRegUpdAllregsAtEachInsn:
856 // VexRegUpdAllregsAtEachInsn cannot happen here.
857 // fall through
858 case VexRegUpd_INVALID:
859 default:
860 vassert(0);
862 } /* if (memRW) */
867 /* Scan backwards, building up a set of (min offset, max
868 offset) pairs, indicating those parts of the guest state
869 for which the next event is a write.
871 On seeing a conditional exit, empty the set.
873 On seeing 'Put (minoff,maxoff) = t or c', if (minoff,maxoff) is
874 completely within the set, remove the Put. Otherwise, add
875 (minoff,maxoff) to the set.
877 On seeing 'Get (minoff,maxoff)', remove any part of the set
878 overlapping (minoff,maxoff). The same has to happen for any events
879 which implicitly read parts of the guest state: dirty helper calls
880 and loads/stores.
883 static void redundant_put_removal_BB (
884 IRSB* bb,
885 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
886 VexRegisterUpdates pxControl
889 Int i, j;
890 Bool isPut;
891 IRStmt* st;
892 UInt key = 0; /* keep gcc -O happy */
894 // vassert(pxControl < VexRegUpdAllregsAtEachInsn);
896 HashHW* env = newHHW();
898 /* Initialise the running env with the fact that the final exit
899 writes the IP (or, whatever it claims to write. We don't
900 care.) */
901 key = mk_key_GetPut(bb->offsIP, typeOfIRExpr(bb->tyenv, bb->next));
902 addToHHW(env, (HWord)key, 0);
904 /* And now scan backwards through the statements. */
905 for (i = bb->stmts_used-1; i >= 0; i--) {
906 st = bb->stmts[i];
908 if (st->tag == Ist_NoOp)
909 continue;
911 /* Deal with conditional exits. */
912 if (st->tag == Ist_Exit) {
913 //Bool re_add;
914 /* Need to throw out from the env, any part of it which
915 doesn't overlap with the guest state written by this exit.
916 Since the exit only writes one section, it's simplest to
917 do this: (1) check whether env contains a write that
918 completely overlaps the write done by this exit; (2) empty
919 out env; and (3) if (1) was true, add the write done by
920 this exit.
922 To make (1) a bit simpler, merely search for a write that
923 exactly matches the one done by this exit. That's safe
924 because it will fail as often or more often than a full
925 overlap check, and failure to find an overlapping write in
926 env is the safe case (we just nuke env if that
927 happens). */
928 //vassert(isIRAtom(st->Ist.Exit.guard));
929 /* (1) */
930 //key = mk_key_GetPut(st->Ist.Exit.offsIP,
931 // typeOfIRConst(st->Ist.Exit.dst));
932 //re_add = lookupHHW(env, NULL, key);
933 /* (2) */
934 for (j = 0; j < env->used; j++)
935 env->inuse[j] = False;
936 /* (3) */
937 //if (0 && re_add)
938 // addToHHW(env, (HWord)key, 0);
939 continue;
942 /* Deal with Puts */
943 switch (st->tag) {
944 case Ist_Put:
945 isPut = True;
946 key = mk_key_GetPut( st->Ist.Put.offset,
947 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
948 vassert(isIRAtom(st->Ist.Put.data));
949 break;
950 case Ist_PutI:
951 isPut = True;
952 key = mk_key_GetIPutI( st->Ist.PutI.details->descr );
953 vassert(isIRAtom(st->Ist.PutI.details->ix));
954 vassert(isIRAtom(st->Ist.PutI.details->data));
955 break;
956 default:
957 isPut = False;
959 if (isPut && st->tag != Ist_PutI) {
960 /* See if any single entry in env overlaps this Put. This is
961 simplistic in that the transformation is valid if, say, two
962 or more entries in the env overlap this Put, but the use of
963 lookupHHW will only find a single entry which exactly
964 overlaps this Put. This is suboptimal but safe. */
965 if (lookupHHW(env, NULL, (HWord)key)) {
966 /* This Put is redundant because a later one will overwrite
967 it. So NULL (nop) it out. */
968 if (DEBUG_IROPT) {
969 vex_printf("rPUT: "); ppIRStmt(st);
970 vex_printf("\n");
972 bb->stmts[i] = IRStmt_NoOp();
973 } else {
974 /* We can't demonstrate that this Put is redundant, so add it
975 to the running collection. */
976 addToHHW(env, (HWord)key, 0);
978 continue;
981 /* Deal with Gets. These remove bits of the environment since
982 appearance of a Get means that the next event for that slice
983 of the guest state is no longer a write, but a read. Also
984 deals with implicit reads of guest state needed to maintain
985 precise exceptions. */
986 handle_gets_Stmt( env, st, preciseMemExnsFn, pxControl );
991 /*---------------------------------------------------------------*/
992 /*--- Constant propagation and folding ---*/
993 /*---------------------------------------------------------------*/
995 #if STATS_IROPT
996 /* How often sameIRExprs was invoked */
997 static UInt invocation_count;
998 /* How often sameIRExprs recursed through IRTemp assignments */
999 static UInt recursion_count;
1000 /* How often sameIRExprs found identical IRExprs */
1001 static UInt success_count;
1002 /* How often recursing through assignments to IRTemps helped
1003 establishing equality. */
1004 static UInt recursion_success_count;
1005 /* Whether or not recursing through an IRTemp assignment helped
1006 establishing IRExpr equality for a given sameIRExprs invocation. */
1007 static Bool recursion_helped;
1008 /* Whether or not a given sameIRExprs invocation recursed through an
1009 IRTemp assignment */
1010 static Bool recursed;
1011 /* Maximum number of nodes ever visited when comparing two IRExprs. */
1012 static UInt max_nodes_visited;
1013 #endif /* STATS_IROPT */
1015 /* Count the number of nodes visited for a given sameIRExprs invocation. */
1016 static UInt num_nodes_visited;
1018 /* Do not visit more than NODE_LIMIT nodes when comparing two IRExprs.
1019 This is to guard against performance degradation by visiting large
1020 trees without success. */
1021 #define NODE_LIMIT 30
1024 /* The env in this section is a map from IRTemp to IRExpr*,
1025 that is, an array indexed by IRTemp. */
1027 /* Do both expressions compute the same value? The answer is generally
1028 conservative, i.e. it will report that the expressions do not compute
1029 the same value when in fact they do. The reason is that we do not
1030 keep track of changes in the guest state and memory. Thusly, two
1031 Get's, GetI's or Load's, even when accessing the same location, will be
1032 assumed to compute different values. After all the accesses may happen
1033 at different times and the guest state / memory can have changed in
1034 the meantime.
1036 XXX IMPORTANT XXX the two expressions must have the same IR type.
1037 DO NOT CALL HERE WITH DIFFERENTLY-TYPED EXPRESSIONS. */
1039 /* JRS 20-Mar-2012: split sameIRExprs_aux into a fast inlineable
1040 wrapper that deals with the common tags-don't-match case, and a
1041 slower out of line general case. Saves a few insns. */
1043 __attribute__((noinline))
1044 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 );
1046 inline
1047 static Bool sameIRExprs_aux ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1049 if (e1->tag != e2->tag) return False;
1050 return sameIRExprs_aux2(env, e1, e2);
1053 __attribute__((noinline))
1054 static Bool sameIRExprs_aux2 ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1056 if (num_nodes_visited++ > NODE_LIMIT) return False;
1058 switch (e1->tag) {
1059 case Iex_RdTmp:
1060 if (e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp) return True;
1062 if (env[e1->Iex.RdTmp.tmp] && env[e2->Iex.RdTmp.tmp]) {
1063 Bool same = sameIRExprs_aux(env, env[e1->Iex.RdTmp.tmp],
1064 env[e2->Iex.RdTmp.tmp]);
1065 #if STATS_IROPT
1066 recursed = True;
1067 if (same) recursion_helped = True;
1068 #endif
1069 return same;
1071 return False;
1073 case Iex_Get:
1074 case Iex_GetI:
1075 case Iex_Load:
1076 /* Guest state / memory could have changed in the meantime. */
1077 return False;
1079 case Iex_Binop:
1080 return toBool( e1->Iex.Binop.op == e2->Iex.Binop.op
1081 && sameIRExprs_aux( env, e1->Iex.Binop.arg1,
1082 e2->Iex.Binop.arg1 )
1083 && sameIRExprs_aux( env, e1->Iex.Binop.arg2,
1084 e2->Iex.Binop.arg2 ));
1086 case Iex_Unop:
1087 return toBool( e1->Iex.Unop.op == e2->Iex.Unop.op
1088 && sameIRExprs_aux( env, e1->Iex.Unop.arg,
1089 e2->Iex.Unop.arg ));
1091 case Iex_Const: {
1092 IRConst *c1 = e1->Iex.Const.con;
1093 IRConst *c2 = e2->Iex.Const.con;
1094 vassert(c1->tag == c2->tag);
1095 switch (c1->tag) {
1096 case Ico_U1: return toBool( c1->Ico.U1 == c2->Ico.U1 );
1097 case Ico_U8: return toBool( c1->Ico.U8 == c2->Ico.U8 );
1098 case Ico_U16: return toBool( c1->Ico.U16 == c2->Ico.U16 );
1099 case Ico_U32: return toBool( c1->Ico.U32 == c2->Ico.U32 );
1100 case Ico_U64: return toBool( c1->Ico.U64 == c2->Ico.U64 );
1101 default: break;
1103 return False;
1106 case Iex_Triop: {
1107 IRTriop *tri1 = e1->Iex.Triop.details;
1108 IRTriop *tri2 = e2->Iex.Triop.details;
1109 return toBool( tri1->op == tri2->op
1110 && sameIRExprs_aux( env, tri1->arg1, tri2->arg1 )
1111 && sameIRExprs_aux( env, tri1->arg2, tri2->arg2 )
1112 && sameIRExprs_aux( env, tri1->arg3, tri2->arg3 ));
1115 case Iex_ITE:
1116 return toBool( sameIRExprs_aux( env, e1->Iex.ITE.cond,
1117 e2->Iex.ITE.cond )
1118 && sameIRExprs_aux( env, e1->Iex.ITE.iftrue,
1119 e2->Iex.ITE.iftrue )
1120 && sameIRExprs_aux( env, e1->Iex.ITE.iffalse,
1121 e2->Iex.ITE.iffalse ));
1123 default:
1124 /* Not very likely to be "same". */
1125 break;
1128 return False;
1131 inline
1132 static Bool sameIRExprs ( IRExpr** env, IRExpr* e1, IRExpr* e2 )
1134 Bool same;
1136 num_nodes_visited = 0;
1137 same = sameIRExprs_aux(env, e1, e2);
1139 #if STATS_IROPT
1140 ++invocation_count;
1141 if (recursed) ++recursion_count;
1142 success_count += same;
1143 if (same && recursion_helped)
1144 ++recursion_success_count;
1145 if (num_nodes_visited > max_nodes_visited)
1146 max_nodes_visited = num_nodes_visited;
1147 recursed = False; /* reset */
1148 recursion_helped = False; /* reset */
1149 #endif /* STATS_IROPT */
1151 return same;
1155 /* Debugging-only hack (not used in production runs): make a guess
1156 whether sameIRExprs might assert due to the two args being of
1157 different types. If in doubt return False. Is only used when
1158 --vex-iropt-level > 0, that is, vex_control.iropt_verbosity > 0.
1159 Bad because it duplicates functionality from typeOfIRExpr. See
1160 comment on the single use point below for rationale. */
1161 static
1162 Bool debug_only_hack_sameIRExprs_might_assert ( IRExpr* e1, IRExpr* e2 )
1164 if (e1->tag != e2->tag) return False;
1165 switch (e1->tag) {
1166 case Iex_Const: {
1167 /* The only interesting case */
1168 IRConst *c1 = e1->Iex.Const.con;
1169 IRConst *c2 = e2->Iex.Const.con;
1170 return c1->tag != c2->tag;
1172 default:
1173 break;
1175 return False;
1179 /* Is this literally IRExpr_Const(IRConst_U32(0)) ? */
1180 static Bool isZeroU32 ( IRExpr* e )
1182 return toBool( e->tag == Iex_Const
1183 && e->Iex.Const.con->tag == Ico_U32
1184 && e->Iex.Const.con->Ico.U32 == 0);
1187 /* Is this literally IRExpr_Const(IRConst_U64(0)) ?
1188 Currently unused; commented out to avoid compiler warning */
1189 #if 0
1190 static Bool isZeroU64 ( IRExpr* e )
1192 return toBool( e->tag == Iex_Const
1193 && e->Iex.Const.con->tag == Ico_U64
1194 && e->Iex.Const.con->Ico.U64 == 0);
1196 #endif
1198 /* Is this literally IRExpr_Const(IRConst_V128(0)) ? */
1199 static Bool isZeroV128 ( IRExpr* e )
1201 return toBool( e->tag == Iex_Const
1202 && e->Iex.Const.con->tag == Ico_V128
1203 && e->Iex.Const.con->Ico.V128 == 0x0000);
1206 /* Is this literally IRExpr_Const(IRConst_V128(1...1)) ? */
1207 static Bool isOnesV128 ( IRExpr* e )
1209 return toBool( e->tag == Iex_Const
1210 && e->Iex.Const.con->tag == Ico_V128
1211 && e->Iex.Const.con->Ico.V128 == 0xFFFF);
1214 /* Is this literally IRExpr_Const(IRConst_V256(0)) ? */
1215 static Bool isZeroV256 ( IRExpr* e )
1217 return toBool( e->tag == Iex_Const
1218 && e->Iex.Const.con->tag == Ico_V256
1219 && e->Iex.Const.con->Ico.V256 == 0x00000000);
1222 /* Is this an integer constant with value 0 ? */
1223 static Bool isZeroU ( IRExpr* e )
1225 if (e->tag != Iex_Const) return False;
1226 switch (e->Iex.Const.con->tag) {
1227 case Ico_U1: return toBool( e->Iex.Const.con->Ico.U1 == 0);
1228 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0);
1229 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0);
1230 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32 == 0);
1231 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64 == 0);
1232 case Ico_V256: return toBool( e->Iex.Const.con->Ico.V256 == 0x00000000);
1233 default: vpanic("isZeroU");
1237 /* Is this an integer constant with value 1---1b ? */
1238 static Bool isOnesU ( IRExpr* e )
1240 if (e->tag != Iex_Const) return False;
1241 switch (e->Iex.Const.con->tag) {
1242 case Ico_U1: return toBool( e->Iex.Const.con->Ico.U1 == True);
1243 case Ico_U8: return toBool( e->Iex.Const.con->Ico.U8 == 0xFF);
1244 case Ico_U16: return toBool( e->Iex.Const.con->Ico.U16 == 0xFFFF);
1245 case Ico_U32: return toBool( e->Iex.Const.con->Ico.U32
1246 == 0xFFFFFFFF);
1247 case Ico_U64: return toBool( e->Iex.Const.con->Ico.U64
1248 == 0xFFFFFFFFFFFFFFFFULL);
1249 default: ppIRExpr(e); vpanic("isOnesU");
1253 static Bool notBool ( Bool b )
1255 if (b == True) return False;
1256 if (b == False) return True;
1257 vpanic("notBool");
1260 /* Make a zero which has the same type as the result of the given
1261 primop. */
1262 static IRExpr* mkZeroOfPrimopResultType ( IROp op )
1264 switch (op) {
1265 case Iop_CmpNE32: return IRExpr_Const(IRConst_U1(toBool(0)));
1266 case Iop_Xor8: return IRExpr_Const(IRConst_U8(0));
1267 case Iop_Xor16: return IRExpr_Const(IRConst_U16(0));
1268 case Iop_Sub32:
1269 case Iop_Xor32: return IRExpr_Const(IRConst_U32(0));
1270 case Iop_And64:
1271 case Iop_Sub64:
1272 case Iop_Xor64: return IRExpr_Const(IRConst_U64(0));
1273 case Iop_XorV128:
1274 case Iop_AndV128: return IRExpr_Const(IRConst_V128(0));
1275 case Iop_XorV256:
1276 case Iop_AndV256: return IRExpr_Const(IRConst_V256(0));
1277 default: vpanic("mkZeroOfPrimopResultType: bad primop");
1281 /* Make a value containing all 1-bits, which has the same type as the
1282 result of the given primop. */
1283 static IRExpr* mkOnesOfPrimopResultType ( IROp op )
1285 switch (op) {
1286 case Iop_CmpEQ32:
1287 case Iop_CmpEQ64:
1288 return IRExpr_Const(IRConst_U1(toBool(1)));
1289 case Iop_Or8:
1290 return IRExpr_Const(IRConst_U8(0xFF));
1291 case Iop_Or16:
1292 return IRExpr_Const(IRConst_U16(0xFFFF));
1293 case Iop_Or32:
1294 return IRExpr_Const(IRConst_U32(0xFFFFFFFF));
1295 case Iop_CmpEQ8x8:
1296 case Iop_CmpEQ16x4:
1297 case Iop_CmpEQ32x2:
1298 case Iop_Or64:
1299 return IRExpr_Const(IRConst_U64(0xFFFFFFFFFFFFFFFFULL));
1300 case Iop_CmpEQ8x16:
1301 case Iop_CmpEQ16x8:
1302 case Iop_CmpEQ32x4:
1303 case Iop_CmpEQ64x2:
1304 return IRExpr_Const(IRConst_V128(0xFFFF));
1305 case Iop_CmpEQ8x32:
1306 case Iop_CmpEQ16x16:
1307 case Iop_CmpEQ32x8:
1308 case Iop_CmpEQ64x4:
1309 return IRExpr_Const(IRConst_V256(0xFFFFFFFF));
1310 default:
1311 ppIROp(op);
1312 vpanic("mkOnesOfPrimopResultType: bad primop");
1316 /* Helpers for folding Clz32/64. */
1317 static UInt fold_Clz64 ( ULong value )
1319 UInt i;
1320 vassert(value != 0ULL); /* no defined semantics for arg==0 */
1321 for (i = 0; i < 64; ++i) {
1322 if (0ULL != (value & (((ULong)1) << (63 - i)))) return i;
1324 vassert(0);
1325 /*NOTREACHED*/
1326 return 0;
1329 static UInt fold_Clz32 ( UInt value )
1331 UInt i;
1332 vassert(value != 0); /* no defined semantics for arg==0 */
1333 for (i = 0; i < 32; ++i) {
1334 if (0 != (value & (((UInt)1) << (31 - i)))) return i;
1336 vassert(0);
1337 /*NOTREACHED*/
1338 return 0;
1341 /* V64 holds 8 summary-constant bits in V128/V256 style. Convert to
1342 the corresponding real constant. */
1343 //XXX re-check this before use
1344 //static ULong de_summarise_V64 ( UChar v64 )
1346 // ULong r = 0;
1347 // if (v64 & (1<<0)) r |= 0x00000000000000FFULL;
1348 // if (v64 & (1<<1)) r |= 0x000000000000FF00ULL;
1349 // if (v64 & (1<<2)) r |= 0x0000000000FF0000ULL;
1350 // if (v64 & (1<<3)) r |= 0x00000000FF000000ULL;
1351 // if (v64 & (1<<4)) r |= 0x000000FF00000000ULL;
1352 // if (v64 & (1<<5)) r |= 0x0000FF0000000000ULL;
1353 // if (v64 & (1<<6)) r |= 0x00FF000000000000ULL;
1354 // if (v64 & (1<<7)) r |= 0xFF00000000000000ULL;
1355 // return r;
1358 /* Helper for arbitrary expression pattern matching in flat IR. If
1359 'e' is a reference to a tmp, look it up in env -- repeatedly, if
1360 necessary -- until it resolves to a non-tmp. Note that this can
1361 return NULL if it can't resolve 'e' to a new expression, which will
1362 be the case if 'e' is instead defined by an IRStmt (IRDirty or
1363 LLSC). */
1364 static IRExpr* chase ( IRExpr** env, IRExpr* e )
1366 /* Why is this loop guaranteed to terminate? Because all tmps must
1367 have definitions before use, hence a tmp cannot be bound
1368 (directly or indirectly) to itself. */
1369 while (e->tag == Iex_RdTmp) {
1370 if (0) { vex_printf("chase "); ppIRExpr(e); vex_printf("\n"); }
1371 e = env[(Int)e->Iex.RdTmp.tmp];
1372 if (e == NULL) break;
1374 return e;
1377 /* Similar to |chase|, but follows at most one level of tmp reference. */
1378 static IRExpr* chase1 ( IRExpr** env, IRExpr* e )
1380 if (e == NULL || e->tag != Iex_RdTmp)
1381 return e;
1382 else
1383 return env[(Int)e->Iex.RdTmp.tmp];
1386 __attribute__((noinline))
1387 static IRExpr* fold_Expr_WRK ( IRExpr** env, IRExpr* e )
1389 Int shift;
1390 IRExpr* e2 = e; /* e2 is the result of folding e, if possible */
1392 switch (e->tag) {
1393 case Iex_Unop:
1394 /* UNARY ops */
1395 if (e->Iex.Unop.arg->tag == Iex_Const) {
1397 /* cases where the arg is a const */
1398 switch (e->Iex.Unop.op) {
1399 case Iop_1Uto8:
1400 e2 = IRExpr_Const(IRConst_U8(toUChar(
1401 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1402 ? 1 : 0)));
1403 break;
1404 case Iop_1Uto32:
1405 e2 = IRExpr_Const(IRConst_U32(
1406 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1407 ? 1 : 0));
1408 break;
1409 case Iop_1Uto64:
1410 e2 = IRExpr_Const(IRConst_U64(
1411 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1412 ? 1 : 0));
1413 break;
1415 case Iop_1Sto8:
1416 e2 = IRExpr_Const(IRConst_U8(toUChar(
1417 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1418 ? 0xFF : 0)));
1419 break;
1420 case Iop_1Sto16:
1421 e2 = IRExpr_Const(IRConst_U16(toUShort(
1422 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1423 ? 0xFFFF : 0)));
1424 break;
1425 case Iop_1Sto32:
1426 e2 = IRExpr_Const(IRConst_U32(
1427 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1428 ? 0xFFFFFFFF : 0));
1429 break;
1430 case Iop_1Sto64:
1431 e2 = IRExpr_Const(IRConst_U64(
1432 e->Iex.Unop.arg->Iex.Const.con->Ico.U1
1433 ? 0xFFFFFFFFFFFFFFFFULL : 0));
1434 break;
1436 case Iop_8Sto32: {
1437 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1438 u32 <<= 24;
1439 u32 = (Int)u32 >> 24; /* signed shift */
1440 e2 = IRExpr_Const(IRConst_U32(u32));
1441 break;
1443 case Iop_16Sto32: {
1444 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1445 u32 <<= 16;
1446 u32 = (Int)u32 >> 16; /* signed shift */
1447 e2 = IRExpr_Const(IRConst_U32(u32));
1448 break;
1450 case Iop_8Uto64:
1451 e2 = IRExpr_Const(IRConst_U64(
1452 0xFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1453 break;
1454 case Iop_16Uto64:
1455 e2 = IRExpr_Const(IRConst_U64(
1456 0xFFFFULL & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1457 break;
1458 case Iop_8Uto32:
1459 e2 = IRExpr_Const(IRConst_U32(
1460 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1461 break;
1462 case Iop_8Sto16: {
1463 UShort u16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U8;
1464 u16 <<= 8;
1465 u16 = (Short)u16 >> 8; /* signed shift */
1466 e2 = IRExpr_Const(IRConst_U16(u16));
1467 break;
1469 case Iop_8Uto16:
1470 e2 = IRExpr_Const(IRConst_U16(
1471 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8));
1472 break;
1473 case Iop_16Uto32:
1474 e2 = IRExpr_Const(IRConst_U32(
1475 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U16));
1476 break;
1477 case Iop_32to16:
1478 e2 = IRExpr_Const(IRConst_U16(toUShort(
1479 0xFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1480 break;
1481 case Iop_32to8:
1482 e2 = IRExpr_Const(IRConst_U8(toUChar(
1483 0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1484 break;
1485 case Iop_32to1:
1486 e2 = IRExpr_Const(IRConst_U1(toBool(
1487 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1488 )));
1489 break;
1490 case Iop_64to1:
1491 e2 = IRExpr_Const(IRConst_U1(toBool(
1492 1 == (1 & e->Iex.Unop.arg->Iex.Const.con->Ico.U64)
1493 )));
1494 break;
1496 case Iop_NotV128:
1497 e2 = IRExpr_Const(IRConst_V128(
1498 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.V128)));
1499 break;
1500 case Iop_Not64:
1501 e2 = IRExpr_Const(IRConst_U64(
1502 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U64)));
1503 break;
1504 case Iop_Not32:
1505 e2 = IRExpr_Const(IRConst_U32(
1506 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U32)));
1507 break;
1508 case Iop_Not16:
1509 e2 = IRExpr_Const(IRConst_U16(toUShort(
1510 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U16))));
1511 break;
1512 case Iop_Not8:
1513 e2 = IRExpr_Const(IRConst_U8(toUChar(
1514 ~ (e->Iex.Unop.arg->Iex.Const.con->Ico.U8))));
1515 break;
1517 case Iop_Not1:
1518 e2 = IRExpr_Const(IRConst_U1(
1519 notBool(e->Iex.Unop.arg->Iex.Const.con->Ico.U1)));
1520 break;
1522 case Iop_64to8: {
1523 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1524 w64 &= 0xFFULL;
1525 e2 = IRExpr_Const(IRConst_U8( (UChar)w64 ));
1526 break;
1528 case Iop_64to16: {
1529 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1530 w64 &= 0xFFFFULL;
1531 e2 = IRExpr_Const(IRConst_U16( (UShort)w64 ));
1532 break;
1534 case Iop_64to32: {
1535 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1536 w64 &= 0x00000000FFFFFFFFULL;
1537 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1538 break;
1540 case Iop_64HIto32: {
1541 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1542 w64 >>= 32;
1543 e2 = IRExpr_Const(IRConst_U32( (UInt)w64 ));
1544 break;
1546 case Iop_32Uto64:
1547 e2 = IRExpr_Const(IRConst_U64(
1548 0xFFFFFFFFULL
1549 & e->Iex.Unop.arg->Iex.Const.con->Ico.U32));
1550 break;
1551 case Iop_16Sto64: {
1552 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1553 u64 <<= 48;
1554 u64 = (Long)u64 >> 48; /* signed shift */
1555 e2 = IRExpr_Const(IRConst_U64(u64));
1556 break;
1558 case Iop_32Sto64: {
1559 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1560 u64 <<= 32;
1561 u64 = (Long)u64 >> 32; /* signed shift */
1562 e2 = IRExpr_Const(IRConst_U64(u64));
1563 break;
1566 case Iop_16to8: {
1567 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1568 w16 &= 0xFF;
1569 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1570 break;
1572 case Iop_16HIto8: {
1573 UShort w16 = e->Iex.Unop.arg->Iex.Const.con->Ico.U16;
1574 w16 >>= 8;
1575 w16 &= 0xFF;
1576 e2 = IRExpr_Const(IRConst_U8( (UChar)w16 ));
1577 break;
1580 case Iop_CmpNEZ8:
1581 e2 = IRExpr_Const(IRConst_U1(toBool(
1582 0 !=
1583 (0xFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U8)
1584 )));
1585 break;
1586 case Iop_CmpNEZ32:
1587 e2 = IRExpr_Const(IRConst_U1(toBool(
1588 0 !=
1589 (0xFFFFFFFF & e->Iex.Unop.arg->Iex.Const.con->Ico.U32)
1590 )));
1591 break;
1592 case Iop_CmpNEZ64:
1593 e2 = IRExpr_Const(IRConst_U1(toBool(
1594 0ULL != e->Iex.Unop.arg->Iex.Const.con->Ico.U64
1595 )));
1596 break;
1598 case Iop_CmpwNEZ32: {
1599 UInt w32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1600 if (w32 == 0)
1601 e2 = IRExpr_Const(IRConst_U32( 0 ));
1602 else
1603 e2 = IRExpr_Const(IRConst_U32( 0xFFFFFFFF ));
1604 break;
1606 case Iop_CmpwNEZ64: {
1607 ULong w64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1608 if (w64 == 0)
1609 e2 = IRExpr_Const(IRConst_U64( 0 ));
1610 else
1611 e2 = IRExpr_Const(IRConst_U64( 0xFFFFFFFFFFFFFFFFULL ));
1612 break;
1615 case Iop_Left32: {
1616 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1617 Int s32 = (Int)(u32 & 0xFFFFFFFF);
1618 s32 = (s32 | (-s32));
1619 e2 = IRExpr_Const( IRConst_U32( (UInt)s32 ));
1620 break;
1623 case Iop_Left64: {
1624 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1625 Long s64 = (Long)u64;
1626 s64 = (s64 | (-s64));
1627 e2 = IRExpr_Const( IRConst_U64( (ULong)s64 ));
1628 break;
1631 case Iop_Clz32: {
1632 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1633 if (u32 != 0)
1634 e2 = IRExpr_Const(IRConst_U32(fold_Clz32(u32)));
1635 break;
1637 case Iop_Clz64: {
1638 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1639 if (u64 != 0ULL)
1640 e2 = IRExpr_Const(IRConst_U64(fold_Clz64(u64)));
1641 break;
1644 /* For these vector ones, can't fold all cases, but at least
1645 do the most obvious one. Could do better here using
1646 summarise/desummarise of vector constants, but too
1647 difficult to verify; hence just handle the zero cases. */
1648 case Iop_32UtoV128: {
1649 UInt u32 = e->Iex.Unop.arg->Iex.Const.con->Ico.U32;
1650 if (0 == u32) {
1651 e2 = IRExpr_Const(IRConst_V128(0x0000));
1652 } else {
1653 goto unhandled;
1655 break;
1657 case Iop_V128to64: {
1658 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1659 if (0 == ((v128 >> 0) & 0xFF)) {
1660 e2 = IRExpr_Const(IRConst_U64(0));
1661 } else {
1662 goto unhandled;
1664 break;
1666 case Iop_V128HIto64: {
1667 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1668 if (0 == ((v128 >> 8) & 0xFF)) {
1669 e2 = IRExpr_Const(IRConst_U64(0));
1670 } else {
1671 goto unhandled;
1673 break;
1675 case Iop_64UtoV128: {
1676 ULong u64 = e->Iex.Unop.arg->Iex.Const.con->Ico.U64;
1677 if (0 == u64) {
1678 e2 = IRExpr_Const(IRConst_V128(0x0000));
1679 } else {
1680 goto unhandled;
1682 break;
1685 /* Even stupider (although still correct ..) */
1686 case Iop_V256to64_0: case Iop_V256to64_1:
1687 case Iop_V256to64_2: case Iop_V256to64_3: {
1688 UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
1689 if (v256 == 0x00000000) {
1690 e2 = IRExpr_Const(IRConst_U64(0));
1691 } else {
1692 goto unhandled;
1694 break;
1697 /* Similarly .. */
1698 case Iop_V256toV128_0: case Iop_V256toV128_1: {
1699 UInt v256 = e->Iex.Unop.arg->Iex.Const.con->Ico.V256;
1700 if (v256 == 0x00000000) {
1701 e2 = IRExpr_Const(IRConst_V128(0x0000));
1702 } else {
1703 goto unhandled;
1705 break;
1708 case Iop_ZeroHI64ofV128: {
1709 /* Could do better here -- only need to look at the bottom 64 bits
1710 of the argument, really. */
1711 UShort v128 = e->Iex.Unop.arg->Iex.Const.con->Ico.V128;
1712 if (v128 == 0x0000) {
1713 e2 = IRExpr_Const(IRConst_V128(0x0000));
1714 } else {
1715 goto unhandled;
1717 break;
1720 default:
1721 goto unhandled;
1722 } // switch (e->Iex.Unop.op)
1724 } else {
1726 /* other cases (identities, etc) */
1727 switch (e->Iex.Unop.op) {
1728 case Iop_PopCount64: {
1729 // PopCount64( And64( Add64(x,-1), Not64(x) ) ) ==> CtzNat64(x)
1730 // bindings:
1731 // a1:And64( a11:Add64(a111:x,a112:-1), a12:Not64(a121:x) )
1732 IRExpr* a1 = chase(env, e->Iex.Unop.arg);
1733 if (!a1)
1734 goto nomatch;
1735 if (a1->tag != Iex_Binop || a1->Iex.Binop.op != Iop_And64)
1736 goto nomatch;
1737 // a1 is established
1738 IRExpr* a11 = chase(env, a1->Iex.Binop.arg1);
1739 if (!a11)
1740 goto nomatch;
1741 if (a11->tag != Iex_Binop || a11->Iex.Binop.op != Iop_Add64)
1742 goto nomatch;
1743 // a11 is established
1744 IRExpr* a12 = chase(env, a1->Iex.Binop.arg2);
1745 if (!a12)
1746 goto nomatch;
1747 if (a12->tag != Iex_Unop || a12->Iex.Unop.op != Iop_Not64)
1748 goto nomatch;
1749 // a12 is established
1750 IRExpr* a111 = a11->Iex.Binop.arg1;
1751 IRExpr* a112 = chase(env, a11->Iex.Binop.arg2);
1752 IRExpr* a121 = a12->Iex.Unop.arg;
1753 if (!a111 || !a112 || !a121)
1754 goto nomatch;
1755 // a111 and a121 need to be the same temp.
1756 if (!eqIRAtom(a111, a121))
1757 goto nomatch;
1758 // Finally, a112 must be a 64-bit version of -1.
1759 if (!isOnesU(a112))
1760 goto nomatch;
1761 // Match established. Transform.
1762 e2 = IRExpr_Unop(Iop_CtzNat64, a111);
1763 break;
1764 nomatch:
1765 break;
1767 default:
1768 break;
1769 } // switch (e->Iex.Unop.op)
1771 } // if (e->Iex.Unop.arg->tag == Iex_Const)
1772 break;
1774 case Iex_Binop:
1775 /* BINARY ops */
1776 if (e->Iex.Binop.arg1->tag == Iex_Const
1777 && e->Iex.Binop.arg2->tag == Iex_Const) {
1778 /* cases where both args are consts */
1779 switch (e->Iex.Binop.op) {
1781 /* -- Or -- */
1782 case Iop_Or8:
1783 e2 = IRExpr_Const(IRConst_U8(toUChar(
1784 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1785 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1786 break;
1787 case Iop_Or16:
1788 e2 = IRExpr_Const(IRConst_U16(toUShort(
1789 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1790 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1791 break;
1792 case Iop_Or32:
1793 e2 = IRExpr_Const(IRConst_U32(
1794 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1795 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1796 break;
1797 case Iop_Or64:
1798 e2 = IRExpr_Const(IRConst_U64(
1799 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1800 | e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1801 break;
1802 case Iop_OrV128:
1803 e2 = IRExpr_Const(IRConst_V128(
1804 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1805 | e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1806 break;
1808 /* -- Xor -- */
1809 case Iop_Xor8:
1810 e2 = IRExpr_Const(IRConst_U8(toUChar(
1811 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1812 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1813 break;
1814 case Iop_Xor16:
1815 e2 = IRExpr_Const(IRConst_U16(toUShort(
1816 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1817 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1818 break;
1819 case Iop_Xor32:
1820 e2 = IRExpr_Const(IRConst_U32(
1821 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1822 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1823 break;
1824 case Iop_Xor64:
1825 e2 = IRExpr_Const(IRConst_U64(
1826 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1827 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1828 break;
1829 case Iop_XorV128:
1830 e2 = IRExpr_Const(IRConst_V128(
1831 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1832 ^ e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1833 break;
1835 /* -- And -- */
1836 case Iop_And1:
1837 e2 = IRExpr_Const(IRConst_U1(toBool(
1838 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U1
1839 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U1))));
1840 break;
1841 case Iop_And8:
1842 e2 = IRExpr_Const(IRConst_U8(toUChar(
1843 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1844 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1845 break;
1846 case Iop_And16:
1847 e2 = IRExpr_Const(IRConst_U16(toUShort(
1848 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U16
1849 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U16))));
1850 break;
1851 case Iop_And32:
1852 e2 = IRExpr_Const(IRConst_U32(
1853 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1854 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1855 break;
1856 case Iop_And64:
1857 e2 = IRExpr_Const(IRConst_U64(
1858 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1859 & e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1860 break;
1861 case Iop_AndV128:
1862 e2 = IRExpr_Const(IRConst_V128(
1863 (e->Iex.Binop.arg1->Iex.Const.con->Ico.V128
1864 & e->Iex.Binop.arg2->Iex.Const.con->Ico.V128)));
1865 break;
1867 /* -- Add -- */
1868 case Iop_Add8:
1869 e2 = IRExpr_Const(IRConst_U8(toUChar(
1870 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1871 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1872 break;
1873 case Iop_Add32:
1874 e2 = IRExpr_Const(IRConst_U32(
1875 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1876 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1877 break;
1878 case Iop_Add64:
1879 e2 = IRExpr_Const(IRConst_U64(
1880 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1881 + e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1882 break;
1884 /* -- Sub -- */
1885 case Iop_Sub8:
1886 e2 = IRExpr_Const(IRConst_U8(toUChar(
1887 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U8
1888 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U8))));
1889 break;
1890 case Iop_Sub32:
1891 e2 = IRExpr_Const(IRConst_U32(
1892 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1893 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1894 break;
1895 case Iop_Sub64:
1896 e2 = IRExpr_Const(IRConst_U64(
1897 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1898 - e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1899 break;
1901 /* -- Max32U -- */
1902 case Iop_Max32U: {
1903 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1904 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1905 UInt res = u32a > u32b ? u32a : u32b;
1906 e2 = IRExpr_Const(IRConst_U32(res));
1907 break;
1910 /* -- Mul -- */
1911 case Iop_Mul32:
1912 e2 = IRExpr_Const(IRConst_U32(
1913 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1914 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)));
1915 break;
1916 case Iop_Mul64:
1917 e2 = IRExpr_Const(IRConst_U64(
1918 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1919 * e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)));
1920 break;
1922 case Iop_MullS32: {
1923 /* very paranoid */
1924 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
1925 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
1926 Int s32a = (Int)u32a;
1927 Int s32b = (Int)u32b;
1928 Long s64a = (Long)s32a;
1929 Long s64b = (Long)s32b;
1930 Long sres = s64a * s64b;
1931 ULong ures = (ULong)sres;
1932 e2 = IRExpr_Const(IRConst_U64(ures));
1933 break;
1936 /* -- Shl -- */
1937 case Iop_Shl32:
1938 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1939 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1940 if (shift >= 0 && shift <= 31)
1941 e2 = IRExpr_Const(IRConst_U32(
1942 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
1943 << shift)));
1944 break;
1945 case Iop_Shl64:
1946 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1947 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1948 if (shift >= 0 && shift <= 63)
1949 e2 = IRExpr_Const(IRConst_U64(
1950 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
1951 << shift)));
1952 break;
1954 /* -- Sar -- */
1955 case Iop_Sar32: {
1956 /* paranoid ... */
1957 /*signed*/ Int s32;
1958 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1959 s32 = (Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1960 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1961 if (shift >= 0 && shift <= 31) {
1962 s32 >>=/*signed*/ shift;
1963 e2 = IRExpr_Const(IRConst_U32((UInt)s32));
1965 break;
1967 case Iop_Sar64: {
1968 /* paranoid ... */
1969 /*signed*/ Long s64;
1970 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1971 s64 = (Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1972 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1973 if (shift >= 0 && shift <= 63) {
1974 s64 >>=/*signed*/ shift;
1975 e2 = IRExpr_Const(IRConst_U64((ULong)s64));
1977 break;
1980 /* -- Shr -- */
1981 case Iop_Shr32: {
1982 /* paranoid ... */
1983 /*unsigned*/ UInt u32;
1984 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1985 u32 = (UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32);
1986 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1987 if (shift >= 0 && shift <= 31) {
1988 u32 >>=/*unsigned*/ shift;
1989 e2 = IRExpr_Const(IRConst_U32(u32));
1991 break;
1993 case Iop_Shr64: {
1994 /* paranoid ... */
1995 /*unsigned*/ ULong u64;
1996 vassert(e->Iex.Binop.arg2->Iex.Const.con->tag == Ico_U8);
1997 u64 = (ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64);
1998 shift = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U8);
1999 if (shift >= 0 && shift <= 63) {
2000 u64 >>=/*unsigned*/ shift;
2001 e2 = IRExpr_Const(IRConst_U64(u64));
2003 break;
2006 /* -- CmpEQ -- */
2007 case Iop_CmpEQ32:
2008 e2 = IRExpr_Const(IRConst_U1(toBool(
2009 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
2010 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
2011 break;
2012 case Iop_CmpEQ64:
2013 e2 = IRExpr_Const(IRConst_U1(toBool(
2014 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
2015 == e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
2016 break;
2018 /* -- CmpNE -- */
2019 case Iop_CmpNE8:
2020 case Iop_CasCmpNE8:
2021 case Iop_ExpCmpNE8:
2022 e2 = IRExpr_Const(IRConst_U1(toBool(
2023 ((0xFF & e->Iex.Binop.arg1->Iex.Const.con->Ico.U8)
2024 != (0xFF & e->Iex.Binop.arg2->Iex.Const.con->Ico.U8)))));
2025 break;
2026 case Iop_CmpNE32:
2027 case Iop_CasCmpNE32:
2028 case Iop_ExpCmpNE32:
2029 e2 = IRExpr_Const(IRConst_U1(toBool(
2030 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U32
2031 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))));
2032 break;
2033 case Iop_CmpNE64:
2034 case Iop_CasCmpNE64:
2035 case Iop_ExpCmpNE64:
2036 e2 = IRExpr_Const(IRConst_U1(toBool(
2037 (e->Iex.Binop.arg1->Iex.Const.con->Ico.U64
2038 != e->Iex.Binop.arg2->Iex.Const.con->Ico.U64))));
2039 break;
2041 /* -- CmpLEU -- */
2042 case Iop_CmpLE32U:
2043 e2 = IRExpr_Const(IRConst_U1(toBool(
2044 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
2045 <= (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
2046 break;
2047 case Iop_CmpLE64U:
2048 e2 = IRExpr_Const(IRConst_U1(toBool(
2049 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
2050 <= (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
2051 break;
2053 /* -- CmpLES -- */
2054 case Iop_CmpLE32S:
2055 e2 = IRExpr_Const(IRConst_U1(toBool(
2056 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
2057 <= (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
2058 break;
2059 case Iop_CmpLE64S:
2060 e2 = IRExpr_Const(IRConst_U1(toBool(
2061 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
2062 <= (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
2063 break;
2065 /* -- CmpLTS -- */
2066 case Iop_CmpLT32S:
2067 e2 = IRExpr_Const(IRConst_U1(toBool(
2068 ((Int)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
2069 < (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
2070 break;
2071 case Iop_CmpLT64S:
2072 e2 = IRExpr_Const(IRConst_U1(toBool(
2073 ((Long)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
2074 < (Long)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
2075 break;
2077 /* -- CmpLTU -- */
2078 case Iop_CmpLT32U:
2079 e2 = IRExpr_Const(IRConst_U1(toBool(
2080 ((UInt)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U32)
2081 < (UInt)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32)))));
2082 break;
2083 case Iop_CmpLT64U:
2084 e2 = IRExpr_Const(IRConst_U1(toBool(
2085 ((ULong)(e->Iex.Binop.arg1->Iex.Const.con->Ico.U64)
2086 < (ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U64)))));
2087 break;
2089 /* -- CmpORD -- */
2090 case Iop_CmpORD32S: {
2091 /* very paranoid */
2092 UInt u32a = e->Iex.Binop.arg1->Iex.Const.con->Ico.U32;
2093 UInt u32b = e->Iex.Binop.arg2->Iex.Const.con->Ico.U32;
2094 Int s32a = (Int)u32a;
2095 Int s32b = (Int)u32b;
2096 Int r = 0x2; /* EQ */
2097 if (s32a < s32b) {
2098 r = 0x8; /* LT */
2100 else if (s32a > s32b) {
2101 r = 0x4; /* GT */
2103 e2 = IRExpr_Const(IRConst_U32(r));
2104 break;
2107 /* -- nHLto2n -- */
2108 case Iop_32HLto64:
2109 e2 = IRExpr_Const(IRConst_U64(
2110 (((ULong)(e->Iex.Binop.arg1
2111 ->Iex.Const.con->Ico.U32)) << 32)
2112 | ((ULong)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32))
2114 break;
2115 case Iop_64HLto128:
2116 /* We can't fold this, because there is no way to
2117 express he result in IR, but at least pretend to
2118 handle it, so as to stop getting blasted with
2119 no-rule-for-this-primop messages. */
2120 break;
2121 /* For this vector one, can't fold all cases, but at
2122 least do the most obvious one. Could do better here
2123 using summarise/desummarise of vector constants, but
2124 too difficult to verify; hence just handle the zero
2125 cases. */
2126 case Iop_64HLtoV128: {
2127 ULong argHi = e->Iex.Binop.arg1->Iex.Const.con->Ico.U64;
2128 ULong argLo = e->Iex.Binop.arg2->Iex.Const.con->Ico.U64;
2129 if (0 == argHi && 0 == argLo) {
2130 e2 = IRExpr_Const(IRConst_V128(0));
2131 } else {
2132 goto unhandled;
2134 break;
2136 /* Same reasoning for the 256-bit version. */
2137 case Iop_V128HLtoV256: {
2138 IRExpr* argHi = e->Iex.Binop.arg1;
2139 IRExpr* argLo = e->Iex.Binop.arg2;
2140 if (isZeroV128(argHi) && isZeroV128(argLo)) {
2141 e2 = IRExpr_Const(IRConst_V256(0));
2142 } else {
2143 goto unhandled;
2145 break;
2148 /* -- V128 stuff -- */
2149 case Iop_InterleaveLO64x2: // This, and the HI64, are created
2150 case Iop_InterleaveHI64x2: // by the amd64 PTEST translation
2151 case Iop_InterleaveLO8x16: {
2152 /* This turns up a lot in Memcheck instrumentation of
2153 Icc generated code. I don't know why. */
2154 UShort arg1 = e->Iex.Binop.arg1->Iex.Const.con->Ico.V128;
2155 UShort arg2 = e->Iex.Binop.arg2->Iex.Const.con->Ico.V128;
2156 if (0 == arg1 && 0 == arg2) {
2157 e2 = IRExpr_Const(IRConst_V128(0));
2158 } else {
2159 goto unhandled;
2161 break;
2164 default:
2165 goto unhandled;
2168 } else {
2170 /* other cases (identities, etc) */
2171 switch (e->Iex.Binop.op) {
2173 case Iop_Shl32:
2174 case Iop_Shl64:
2175 case Iop_Shr64:
2176 case Iop_Sar64:
2177 /* Shl32/Shl64/Shr64/Sar64(x,0) ==> x */
2178 if (isZeroU(e->Iex.Binop.arg2)) {
2179 e2 = e->Iex.Binop.arg1;
2180 break;
2182 /* Shl32/Shl64/Shr64(0,x) ==> 0 */
2183 if (isZeroU(e->Iex.Binop.arg1)) {
2184 e2 = e->Iex.Binop.arg1;
2185 break;
2187 break;
2189 case Iop_Sar32:
2190 case Iop_Shr32:
2191 /* Shr32/Sar32(x,0) ==> x */
2192 if (isZeroU(e->Iex.Binop.arg2)) {
2193 e2 = e->Iex.Binop.arg1;
2194 break;
2196 break;
2198 case Iop_Or8:
2199 case Iop_Or16:
2200 case Iop_Or32:
2201 case Iop_Or64:
2202 case Iop_Max32U:
2203 /* Or8/Or16/Or32/Or64/Max32U(x,0) ==> x */
2204 if (isZeroU(e->Iex.Binop.arg2)) {
2205 e2 = e->Iex.Binop.arg1;
2206 break;
2208 /* Or8/Or16/Or32/Or64/Max32U(0,x) ==> x */
2209 if (isZeroU(e->Iex.Binop.arg1)) {
2210 e2 = e->Iex.Binop.arg2;
2211 break;
2213 /* Or8/Or16/Or32/Or64/Max32U(x,1---1b) ==> 1---1b */
2214 /* Or8/Or16/Or32/Or64/Max32U(1---1b,x) ==> 1---1b */
2215 if (isOnesU(e->Iex.Binop.arg1) || isOnesU(e->Iex.Binop.arg2)) {
2216 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2217 break;
2219 /* Or8/Or16/Or32/Or64/Max32U(t,t) ==> t, for some IRTemp t */
2220 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2221 e2 = e->Iex.Binop.arg1;
2222 break;
2224 break;
2226 case Iop_Add8:
2227 /* Add8(t,t) ==> t << 1.
2228 Memcheck doesn't understand that
2229 x+x produces a defined least significant bit, and it seems
2230 simplest just to get rid of the problem by rewriting it
2231 out, since the opportunity to do so exists. */
2232 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2233 e2 = IRExpr_Binop(Iop_Shl8, e->Iex.Binop.arg1,
2234 IRExpr_Const(IRConst_U8(1)));
2235 break;
2237 break;
2239 /* NB no Add16(t,t) case yet as no known test case exists */
2241 case Iop_Add32:
2242 case Iop_Add64:
2243 /* Add32/Add64(x,0) ==> x */
2244 if (isZeroU(e->Iex.Binop.arg2)) {
2245 e2 = e->Iex.Binop.arg1;
2246 break;
2248 /* Add32/Add64(0,x) ==> x */
2249 if (isZeroU(e->Iex.Binop.arg1)) {
2250 e2 = e->Iex.Binop.arg2;
2251 break;
2253 /* Add32/Add64(t,t) ==> t << 1. Same rationale as for Add8. */
2254 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2255 e2 = IRExpr_Binop(
2256 e->Iex.Binop.op == Iop_Add32 ? Iop_Shl32 : Iop_Shl64,
2257 e->Iex.Binop.arg1, IRExpr_Const(IRConst_U8(1)));
2258 break;
2260 break;
2262 case Iop_Sub32:
2263 case Iop_Sub64:
2264 /* Sub32/Sub64(x,0) ==> x */
2265 if (isZeroU(e->Iex.Binop.arg2)) {
2266 e2 = e->Iex.Binop.arg1;
2267 break;
2269 /* Sub32/Sub64(t,t) ==> 0, for some IRTemp t */
2270 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2271 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2272 break;
2274 break;
2275 case Iop_Sub8x16:
2276 /* Sub8x16(x,0) ==> x */
2277 if (isZeroV128(e->Iex.Binop.arg2)) {
2278 e2 = e->Iex.Binop.arg1;
2279 break;
2281 break;
2283 case Iop_And1:
2284 case Iop_And8:
2285 case Iop_And16:
2286 case Iop_And32:
2287 case Iop_And64:
2288 /* And1/And8/And16/And32/And64(x,1---1b) ==> x */
2289 if (isOnesU(e->Iex.Binop.arg2)) {
2290 e2 = e->Iex.Binop.arg1;
2291 break;
2293 /* And1/And8/And16/And32/And64(1---1b,x) ==> x */
2294 if (isOnesU(e->Iex.Binop.arg1)) {
2295 e2 = e->Iex.Binop.arg2;
2296 break;
2298 /* And1/And8/And16/And32/And64(x,0) ==> 0 */
2299 if (isZeroU(e->Iex.Binop.arg2)) {
2300 e2 = e->Iex.Binop.arg2;
2301 break;
2303 /* And1/And8/And16/And32/And64(0,x) ==> 0 */
2304 if (isZeroU(e->Iex.Binop.arg1)) {
2305 e2 = e->Iex.Binop.arg1;
2306 break;
2308 /* And1/And8/And16/And32/And64(t,t) ==> t, for some IRTemp t */
2309 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2310 e2 = e->Iex.Binop.arg1;
2311 break;
2313 break;
2315 case Iop_AndV128:
2316 case Iop_AndV256:
2317 /* And8/And16/AndV128/AndV256(t,t)
2318 ==> t, for some IRTemp t */
2319 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2320 e2 = e->Iex.Binop.arg1;
2321 break;
2323 /* Deal with either arg zero. Could handle other And
2324 cases here too. */
2325 if (e->Iex.Binop.op == Iop_AndV256
2326 && (isZeroV256(e->Iex.Binop.arg1)
2327 || isZeroV256(e->Iex.Binop.arg2))) {
2328 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2329 break;
2330 } else if (e->Iex.Binop.op == Iop_AndV128
2331 && (isZeroV128(e->Iex.Binop.arg1)
2332 || isZeroV128(e->Iex.Binop.arg2))) {
2333 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2334 break;
2336 /* AndV128(t,1...1) ==> t. The amd64 front end generates these
2337 for *CMP{P,S}{S,D} etc. */
2338 if (e->Iex.Binop.op == Iop_AndV128) {
2339 if (isOnesV128(e->Iex.Binop.arg2)) {
2340 e2 = e->Iex.Binop.arg1;
2341 break;
2344 /* AndV128( x, NotV128( x ) ) ==> 0...0 and
2345 AndV256( x, NotV256( x ) ) ==> 0...0
2346 This is generated by amd64 `ptest %xmmReg, %xmmReg`
2347 (the same reg both times)
2348 See https://bugzilla.redhat.com/show_bug.cgi?id=2257546 */
2349 if (e->Iex.Binop.op == Iop_AndV128
2350 || e->Iex.Binop.op == Iop_AndV256) {
2351 Bool isV256 = e->Iex.Binop.op == Iop_AndV256;
2352 IRExpr* x1 = chase(env, e->Iex.Binop.arg1);
2353 IRExpr* rhs = chase(env, e->Iex.Binop.arg2);
2354 if (x1 && rhs
2355 && rhs->tag == Iex_Unop
2356 && rhs->Iex.Unop.op == (isV256 ? Iop_NotV256
2357 : Iop_NotV128)) {
2358 IRExpr* x2 = chase(env, rhs->Iex.Unop.arg);
2359 if (x2 && sameIRExprs(env, x1, x2)) {
2360 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2361 break;
2365 break;
2367 case Iop_OrV128:
2368 case Iop_OrV256:
2369 /* V128/V256(t,t) ==> t, for some IRTemp t */
2370 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2371 e2 = e->Iex.Binop.arg1;
2372 break;
2374 /* OrV128(t,0) ==> t */
2375 if (e->Iex.Binop.op == Iop_OrV128) {
2376 if (isZeroV128(e->Iex.Binop.arg2)) {
2377 e2 = e->Iex.Binop.arg1;
2378 break;
2380 if (isZeroV128(e->Iex.Binop.arg1)) {
2381 e2 = e->Iex.Binop.arg2;
2382 break;
2385 /* OrV256(t,0) ==> t */
2386 if (e->Iex.Binop.op == Iop_OrV256) {
2387 if (isZeroV256(e->Iex.Binop.arg2)) {
2388 e2 = e->Iex.Binop.arg1;
2389 break;
2391 //Disabled because there's no known test case right now.
2392 //if (isZeroV256(e->Iex.Binop.arg1)) {
2393 // e2 = e->Iex.Binop.arg2;
2394 // break;
2397 break;
2399 case Iop_Xor8:
2400 case Iop_Xor16:
2401 case Iop_Xor32:
2402 case Iop_Xor64:
2403 case Iop_XorV128:
2404 case Iop_XorV256:
2405 /* Xor8/16/32/64/V128/V256(t,t) ==> 0, for some IRTemp t */
2406 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2407 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2408 break;
2410 /* XorV128(t,0) ==> t */
2411 if (e->Iex.Binop.op == Iop_XorV128) {
2412 if (isZeroV128(e->Iex.Binop.arg2)) {
2413 e2 = e->Iex.Binop.arg1;
2414 break;
2416 //Disabled because there's no known test case right now.
2417 //if (isZeroV128(e->Iex.Binop.arg1)) {
2418 // e2 = e->Iex.Binop.arg2;
2419 // break;
2421 } else {
2422 /* Xor8/16/32/64(0,t) ==> t */
2423 if (isZeroU(e->Iex.Binop.arg1)) {
2424 e2 = e->Iex.Binop.arg2;
2425 break;
2427 /* Xor8/16/32/64(t,0) ==> t */
2428 if (isZeroU(e->Iex.Binop.arg2)) {
2429 e2 = e->Iex.Binop.arg1;
2430 break;
2433 break;
2435 case Iop_CmpNE32:
2436 /* CmpNE32(t,t) ==> 0, for some IRTemp t */
2437 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2438 e2 = mkZeroOfPrimopResultType(e->Iex.Binop.op);
2439 break;
2441 /* CmpNE32(1Uto32(b), 0) ==> b */
2442 if (isZeroU32(e->Iex.Binop.arg2)) {
2443 IRExpr* a1 = chase(env, e->Iex.Binop.arg1);
2444 if (a1 && a1->tag == Iex_Unop
2445 && a1->Iex.Unop.op == Iop_1Uto32) {
2446 e2 = a1->Iex.Unop.arg;
2447 break;
2450 break;
2452 // in total 32 bits
2453 case Iop_CmpEQ32:
2454 // in total 64 bits
2455 case Iop_CmpEQ64:
2456 case Iop_CmpEQ8x8:
2457 case Iop_CmpEQ16x4:
2458 case Iop_CmpEQ32x2:
2459 // in total 128 bits
2460 case Iop_CmpEQ8x16:
2461 case Iop_CmpEQ16x8:
2462 case Iop_CmpEQ32x4:
2463 case Iop_CmpEQ64x2:
2464 // in total 256 bits
2465 case Iop_CmpEQ8x32:
2466 case Iop_CmpEQ16x16:
2467 case Iop_CmpEQ32x8:
2468 case Iop_CmpEQ64x4:
2469 if (sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2470 e2 = mkOnesOfPrimopResultType(e->Iex.Binop.op);
2471 break;
2473 break;
2475 default:
2476 break;
2479 break;
2481 case Iex_ITE:
2482 /* ITE */
2483 /* is the discriminant is a constant? */
2484 if (e->Iex.ITE.cond->tag == Iex_Const) {
2485 /* assured us by the IR type rules */
2486 vassert(e->Iex.ITE.cond->Iex.Const.con->tag == Ico_U1);
2487 e2 = e->Iex.ITE.cond->Iex.Const.con->Ico.U1
2488 ? e->Iex.ITE.iftrue : e->Iex.ITE.iffalse;
2490 else
2491 /* are the arms identical? (pretty weedy test) */
2492 if (sameIRExprs(env, e->Iex.ITE.iftrue,
2493 e->Iex.ITE.iffalse)) {
2494 e2 = e->Iex.ITE.iffalse;
2496 break;
2498 default:
2499 /* not considered */
2500 break;
2503 /* Show cases where we've found but not folded 'op(t,t)'. Be
2504 careful not to call sameIRExprs with values of different types,
2505 though, else it will assert (and so it should!). We can't
2506 conveniently call typeOfIRExpr on the two args without a whole
2507 bunch of extra plumbing to pass in a type env, so just use a
2508 hacky test to check the arguments are not anything that might
2509 sameIRExprs to assert. This is only OK because this kludge is
2510 only used for debug printing, not for "real" operation. For
2511 "real" operation (ie, all other calls to sameIRExprs), it is
2512 essential that the to args have the same type.
2514 The "right" solution is to plumb the containing block's
2515 IRTypeEnv through to here and use typeOfIRExpr to be sure. But
2516 that's a bunch of extra parameter passing which will just slow
2517 down the normal case, for no purpose. */
2518 if (vex_control.iropt_verbosity > 0
2519 && e == e2
2520 && e->tag == Iex_Binop
2521 && !debug_only_hack_sameIRExprs_might_assert(e->Iex.Binop.arg1,
2522 e->Iex.Binop.arg2)
2523 && sameIRExprs(env, e->Iex.Binop.arg1, e->Iex.Binop.arg2)) {
2524 vex_printf("vex iropt: fold_Expr_WRK: no ident rule for: ");
2525 ppIRExpr(e);
2526 vex_printf("\n");
2529 /* Show the overall results of folding. */
2530 if (DEBUG_IROPT && e2 != e) {
2531 vex_printf("FOLD: ");
2532 ppIRExpr(e); vex_printf(" -> ");
2533 ppIRExpr(e2); vex_printf("\n");
2536 return e2;
2538 unhandled:
2539 # if 0
2540 vex_printf("\n\n");
2541 ppIRExpr(e);
2542 vpanic("fold_Expr: no rule for the above");
2543 # else
2544 if (vex_control.iropt_verbosity > 0) {
2545 vex_printf("vex iropt: fold_Expr_WRK: no const rule for: ");
2546 ppIRExpr(e);
2547 vex_printf("\n");
2549 return e2;
2550 # endif
2553 /* Fold |e| as much as possible, given the bindings in |env|. If no folding is
2554 possible, just return |e|. Also, if |env| is NULL, don't even try to
2555 fold; just return |e| directly. */
2556 inline
2557 static IRExpr* fold_Expr ( IRExpr** env, IRExpr* e )
2559 return env == NULL ? e : fold_Expr_WRK(env, e);
2562 /* Apply the subst to a simple 1-level expression -- guaranteed to be
2563 1-level due to previous flattening pass. */
2565 static IRExpr* subst_Expr ( IRExpr** env, IRExpr* ex )
2567 switch (ex->tag) {
2568 case Iex_RdTmp:
2569 if (env[(Int)ex->Iex.RdTmp.tmp] != NULL) {
2570 IRExpr *rhs = env[(Int)ex->Iex.RdTmp.tmp];
2571 if (rhs->tag == Iex_RdTmp)
2572 return rhs;
2573 if (rhs->tag == Iex_Const
2574 && rhs->Iex.Const.con->tag != Ico_F64i)
2575 return rhs;
2577 /* not bound in env */
2578 return ex;
2580 case Iex_Const:
2581 case Iex_Get:
2582 return ex;
2584 case Iex_GetI:
2585 vassert(isIRAtom(ex->Iex.GetI.ix));
2586 return IRExpr_GetI(
2587 ex->Iex.GetI.descr,
2588 subst_Expr(env, ex->Iex.GetI.ix),
2589 ex->Iex.GetI.bias
2592 case Iex_Qop: {
2593 IRQop* qop = ex->Iex.Qop.details;
2594 vassert(isIRAtom(qop->arg1));
2595 vassert(isIRAtom(qop->arg2));
2596 vassert(isIRAtom(qop->arg3));
2597 vassert(isIRAtom(qop->arg4));
2598 return IRExpr_Qop(
2599 qop->op,
2600 subst_Expr(env, qop->arg1),
2601 subst_Expr(env, qop->arg2),
2602 subst_Expr(env, qop->arg3),
2603 subst_Expr(env, qop->arg4)
2607 case Iex_Triop: {
2608 IRTriop* triop = ex->Iex.Triop.details;
2609 vassert(isIRAtom(triop->arg1));
2610 vassert(isIRAtom(triop->arg2));
2611 vassert(isIRAtom(triop->arg3));
2612 return IRExpr_Triop(
2613 triop->op,
2614 subst_Expr(env, triop->arg1),
2615 subst_Expr(env, triop->arg2),
2616 subst_Expr(env, triop->arg3)
2620 case Iex_Binop:
2621 vassert(isIRAtom(ex->Iex.Binop.arg1));
2622 vassert(isIRAtom(ex->Iex.Binop.arg2));
2623 return IRExpr_Binop(
2624 ex->Iex.Binop.op,
2625 subst_Expr(env, ex->Iex.Binop.arg1),
2626 subst_Expr(env, ex->Iex.Binop.arg2)
2629 case Iex_Unop:
2630 vassert(isIRAtom(ex->Iex.Unop.arg));
2631 return IRExpr_Unop(
2632 ex->Iex.Unop.op,
2633 subst_Expr(env, ex->Iex.Unop.arg)
2636 case Iex_Load:
2637 vassert(isIRAtom(ex->Iex.Load.addr));
2638 return IRExpr_Load(
2639 ex->Iex.Load.end,
2640 ex->Iex.Load.ty,
2641 subst_Expr(env, ex->Iex.Load.addr)
2644 case Iex_CCall: {
2645 Int i;
2646 IRExpr** args2 = shallowCopyIRExprVec(ex->Iex.CCall.args);
2647 for (i = 0; args2[i]; i++) {
2648 vassert(isIRAtom(args2[i]));
2649 args2[i] = subst_Expr(env, args2[i]);
2651 return IRExpr_CCall(
2652 ex->Iex.CCall.cee,
2653 ex->Iex.CCall.retty,
2654 args2
2658 case Iex_ITE:
2659 vassert(isIRAtom(ex->Iex.ITE.cond));
2660 vassert(isIRAtom(ex->Iex.ITE.iftrue));
2661 vassert(isIRAtom(ex->Iex.ITE.iffalse));
2662 return IRExpr_ITE(
2663 subst_Expr(env, ex->Iex.ITE.cond),
2664 subst_Expr(env, ex->Iex.ITE.iftrue),
2665 subst_Expr(env, ex->Iex.ITE.iffalse)
2668 default:
2669 vex_printf("\n\n"); ppIRExpr(ex);
2670 vpanic("subst_Expr");
2676 /* Apply the subst to stmt, then, if |doFolding| is |True|, fold the result as
2677 much as possible. Much simplified due to stmt being previously flattened.
2678 As a result of this, the stmt may wind up being turned into a no-op.
2680 static IRStmt* subst_and_maybe_fold_Stmt ( Bool doFolding,
2681 IRExpr** env, IRStmt* st )
2683 # if 0
2684 vex_printf("\nsubst and maybe fold stmt\n");
2685 ppIRStmt(st);
2686 vex_printf("\n");
2687 # endif
2689 IRExpr** s_env = env;
2690 IRExpr** f_env = doFolding ? env : NULL;
2692 switch (st->tag) {
2693 case Ist_AbiHint:
2694 vassert(isIRAtom(st->Ist.AbiHint.base));
2695 vassert(isIRAtom(st->Ist.AbiHint.nia));
2696 return IRStmt_AbiHint(
2697 fold_Expr(f_env, subst_Expr(s_env, st->Ist.AbiHint.base)),
2698 st->Ist.AbiHint.len,
2699 fold_Expr(f_env, subst_Expr(s_env, st->Ist.AbiHint.nia))
2701 case Ist_Put:
2702 vassert(isIRAtom(st->Ist.Put.data));
2703 return IRStmt_Put(
2704 st->Ist.Put.offset,
2705 fold_Expr(f_env, subst_Expr(s_env, st->Ist.Put.data))
2708 case Ist_PutI: {
2709 IRPutI *puti, *puti2;
2710 puti = st->Ist.PutI.details;
2711 vassert(isIRAtom(puti->ix));
2712 vassert(isIRAtom(puti->data));
2713 puti2 = mkIRPutI(puti->descr,
2714 fold_Expr(f_env, subst_Expr(s_env, puti->ix)),
2715 puti->bias,
2716 fold_Expr(f_env, subst_Expr(s_env, puti->data)));
2717 return IRStmt_PutI(puti2);
2720 case Ist_WrTmp:
2721 /* This is the one place where an expr (st->Ist.WrTmp.data) is
2722 allowed to be more than just a constant or a tmp. */
2723 return IRStmt_WrTmp(
2724 st->Ist.WrTmp.tmp,
2725 fold_Expr(f_env, subst_Expr(s_env, st->Ist.WrTmp.data))
2728 case Ist_Store:
2729 vassert(isIRAtom(st->Ist.Store.addr));
2730 vassert(isIRAtom(st->Ist.Store.data));
2731 return IRStmt_Store(
2732 st->Ist.Store.end,
2733 fold_Expr(f_env, subst_Expr(s_env, st->Ist.Store.addr)),
2734 fold_Expr(f_env, subst_Expr(s_env, st->Ist.Store.data))
2737 case Ist_StoreG: {
2738 IRStoreG* sg = st->Ist.StoreG.details;
2739 vassert(isIRAtom(sg->addr));
2740 vassert(isIRAtom(sg->data));
2741 vassert(isIRAtom(sg->guard));
2742 IRExpr* faddr = fold_Expr(f_env, subst_Expr(s_env, sg->addr));
2743 IRExpr* fdata = fold_Expr(f_env, subst_Expr(s_env, sg->data));
2744 IRExpr* fguard = fold_Expr(f_env, subst_Expr(s_env, sg->guard));
2745 if (fguard->tag == Iex_Const) {
2746 /* The condition on this store has folded down to a constant. */
2747 vassert(fguard->Iex.Const.con->tag == Ico_U1);
2748 if (fguard->Iex.Const.con->Ico.U1 == False) {
2749 return IRStmt_NoOp();
2750 } else {
2751 vassert(fguard->Iex.Const.con->Ico.U1 == True);
2752 return IRStmt_Store(sg->end, faddr, fdata);
2755 return IRStmt_StoreG(sg->end, faddr, fdata, fguard);
2758 case Ist_LoadG: {
2759 /* This is complicated. If the guard folds down to 'false',
2760 we can replace it with an assignment 'dst := alt', but if
2761 the guard folds down to 'true', we can't conveniently
2762 replace it with an unconditional load, because doing so
2763 requires generating a new temporary, and that is not easy
2764 to do at this point. */
2765 IRLoadG* lg = st->Ist.LoadG.details;
2766 vassert(isIRAtom(lg->addr));
2767 vassert(isIRAtom(lg->alt));
2768 vassert(isIRAtom(lg->guard));
2769 IRExpr* faddr = fold_Expr(f_env, subst_Expr(s_env, lg->addr));
2770 IRExpr* falt = fold_Expr(f_env, subst_Expr(s_env, lg->alt));
2771 IRExpr* fguard = fold_Expr(f_env, subst_Expr(s_env, lg->guard));
2772 if (fguard->tag == Iex_Const) {
2773 /* The condition on this load has folded down to a constant. */
2774 vassert(fguard->Iex.Const.con->tag == Ico_U1);
2775 if (fguard->Iex.Const.con->Ico.U1 == False) {
2776 /* The load is not going to happen -- instead 'alt' is
2777 assigned to 'dst'. */
2778 return IRStmt_WrTmp(lg->dst, falt);
2779 } else {
2780 vassert(fguard->Iex.Const.con->Ico.U1 == True);
2781 /* The load is always going to happen. We want to
2782 convert to an unconditional load and assign to 'dst'
2783 (IRStmt_WrTmp). Problem is we need an extra temp to
2784 hold the loaded value, but none is available.
2785 Instead, reconstitute the conditional load (with
2786 folded args, of course) and let the caller of this
2787 routine deal with the problem. */
2790 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst, faddr, falt, fguard);
2793 case Ist_CAS: {
2794 IRCAS *cas, *cas2;
2795 cas = st->Ist.CAS.details;
2796 vassert(isIRAtom(cas->addr));
2797 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
2798 vassert(isIRAtom(cas->expdLo));
2799 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
2800 vassert(isIRAtom(cas->dataLo));
2801 cas2 = mkIRCAS(
2802 cas->oldHi, cas->oldLo, cas->end,
2803 fold_Expr(f_env, subst_Expr(s_env, cas->addr)),
2804 cas->expdHi ? fold_Expr(f_env,
2805 subst_Expr(s_env, cas->expdHi))
2806 : NULL,
2807 fold_Expr(f_env, subst_Expr(s_env, cas->expdLo)),
2808 cas->dataHi ? fold_Expr(f_env,
2809 subst_Expr(s_env, cas->dataHi))
2810 : NULL,
2811 fold_Expr(f_env, subst_Expr(s_env, cas->dataLo))
2813 return IRStmt_CAS(cas2);
2816 case Ist_LLSC:
2817 vassert(isIRAtom(st->Ist.LLSC.addr));
2818 if (st->Ist.LLSC.storedata)
2819 vassert(isIRAtom(st->Ist.LLSC.storedata));
2820 return IRStmt_LLSC(
2821 st->Ist.LLSC.end,
2822 st->Ist.LLSC.result,
2823 fold_Expr(f_env, subst_Expr(s_env, st->Ist.LLSC.addr)),
2824 st->Ist.LLSC.storedata
2825 ? fold_Expr(f_env,
2826 subst_Expr(s_env, st->Ist.LLSC.storedata))
2827 : NULL
2830 case Ist_Dirty: {
2831 Int i;
2832 IRDirty *d, *d2;
2833 d = st->Ist.Dirty.details;
2834 d2 = emptyIRDirty();
2835 *d2 = *d;
2836 d2->args = shallowCopyIRExprVec(d2->args);
2837 if (d2->mFx != Ifx_None) {
2838 vassert(isIRAtom(d2->mAddr));
2839 d2->mAddr = fold_Expr(f_env, subst_Expr(s_env, d2->mAddr));
2841 vassert(isIRAtom(d2->guard));
2842 d2->guard = fold_Expr(f_env, subst_Expr(s_env, d2->guard));
2843 for (i = 0; d2->args[i]; i++) {
2844 IRExpr* arg = d2->args[i];
2845 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) {
2846 vassert(isIRAtom(arg));
2847 d2->args[i] = fold_Expr(f_env, subst_Expr(s_env, arg));
2850 return IRStmt_Dirty(d2);
2853 case Ist_IMark:
2854 return IRStmt_IMark(st->Ist.IMark.addr,
2855 st->Ist.IMark.len,
2856 st->Ist.IMark.delta);
2858 case Ist_NoOp:
2859 return IRStmt_NoOp();
2861 case Ist_MBE:
2862 return IRStmt_MBE(st->Ist.MBE.event);
2864 case Ist_Exit: {
2865 IRExpr* fcond;
2866 vassert(isIRAtom(st->Ist.Exit.guard));
2867 fcond = fold_Expr(f_env, subst_Expr(s_env, st->Ist.Exit.guard));
2868 if (fcond->tag == Iex_Const) {
2869 /* Interesting. The condition on this exit has folded down to
2870 a constant. */
2871 vassert(fcond->Iex.Const.con->tag == Ico_U1);
2872 if (fcond->Iex.Const.con->Ico.U1 == False) {
2873 /* exit is never going to happen, so dump the statement. */
2874 return IRStmt_NoOp();
2875 } else {
2876 vassert(fcond->Iex.Const.con->Ico.U1 == True);
2877 /* Hmmm. The exit has become unconditional. Leave it
2878 as it is for now, since we'd have to truncate the BB
2879 at this point, which is tricky. Such truncation is
2880 done later by the dead-code elimination pass. */
2881 /* fall out into the reconstruct-the-exit code. */
2882 if (vex_control.iropt_verbosity > 0)
2883 /* really a misuse of vex_control.iropt_verbosity */
2884 vex_printf("vex iropt: IRStmt_Exit became unconditional\n");
2887 return IRStmt_Exit(fcond, st->Ist.Exit.jk,
2888 st->Ist.Exit.dst, st->Ist.Exit.offsIP);
2891 default:
2892 vex_printf("\n"); ppIRStmt(st);
2893 vpanic("subst_and_fold_Stmt");
2898 __attribute__((noinline))
2899 static IRSB* cprop_BB_WRK ( IRSB* in, Bool mustRetainNoOps, Bool doFolding )
2901 Int i;
2902 IRSB* out;
2903 IRStmt* st2;
2904 Int n_tmps = in->tyenv->types_used;
2905 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
2906 /* Keep track of IRStmt_LoadGs that we need to revisit after
2907 processing all the other statements. */
2908 const Int N_FIXUPS = 16;
2909 Int fixups[N_FIXUPS]; /* indices in the stmt array of 'out' */
2910 Int n_fixups = 0;
2912 out = emptyIRSB();
2913 out->tyenv = deepCopyIRTypeEnv( in->tyenv );
2915 /* Set up the env with which travels forward. This holds a
2916 substitution, mapping IRTemps to IRExprs. The environment
2917 is to be applied as we move along. Keys are IRTemps.
2918 Values are IRExpr*s.
2920 for (i = 0; i < n_tmps; i++)
2921 env[i] = NULL;
2923 /* For each original SSA-form stmt ... */
2924 for (i = 0; i < in->stmts_used; i++) {
2926 /* First apply the substitution to the current stmt. This
2927 propagates in any constants and tmp-tmp assignments
2928 accumulated prior to this point. As part of the subst_Stmt
2929 call, also then fold any constant expressions resulting. */
2931 st2 = in->stmts[i];
2933 /* perhaps st2 is already a no-op? */
2934 if (st2->tag == Ist_NoOp && !mustRetainNoOps) continue;
2936 st2 = subst_and_maybe_fold_Stmt( doFolding, env, st2 );
2938 /* Deal with some post-folding special cases. */
2939 switch (st2->tag) {
2941 /* If the statement has been folded into a no-op, forget
2942 it. */
2943 case Ist_NoOp:
2944 if (mustRetainNoOps) {
2945 break;
2946 } else {
2947 continue;
2950 /* If the statement assigns to an IRTemp add it to the
2951 running environment. This is for the benefit of copy
2952 propagation and to allow sameIRExpr look through
2953 IRTemps. */
2954 case Ist_WrTmp: {
2955 vassert(env[(Int)(st2->Ist.WrTmp.tmp)] == NULL);
2956 env[(Int)(st2->Ist.WrTmp.tmp)] = st2->Ist.WrTmp.data;
2958 /* 't1 = t2' -- don't add to BB; will be optimized out */
2959 if (st2->Ist.WrTmp.data->tag == Iex_RdTmp)
2960 continue;
2962 /* 't = const' && 'const != F64i' -- don't add to BB
2963 Note, we choose not to propagate const when const is an
2964 F64i, so that F64i literals can be CSE'd later. This
2965 helps x86 floating point code generation. */
2966 if (st2->Ist.WrTmp.data->tag == Iex_Const
2967 && st2->Ist.WrTmp.data->Iex.Const.con->tag != Ico_F64i) {
2968 continue;
2970 /* else add it to the output, as normal */
2971 break;
2974 case Ist_LoadG: {
2975 IRLoadG* lg = st2->Ist.LoadG.details;
2976 IRExpr* guard = lg->guard;
2977 if (guard->tag == Iex_Const) {
2978 /* The guard has folded to a constant, and that
2979 constant must be 1:I1, since subst_and_maybe_fold_Stmt
2980 folds out the case 0:I1 by itself. */
2981 vassert(guard->Iex.Const.con->tag == Ico_U1);
2982 vassert(guard->Iex.Const.con->Ico.U1 == True);
2983 /* Add a NoOp here as a placeholder, and make a note of
2984 where it is in the output block. Afterwards we'll
2985 come back here and transform the NoOp and the LoadG
2986 into a load-convert pair. The fixups[] entry
2987 refers to the inserted NoOp, and we expect to find
2988 the relevant LoadG immediately after it. */
2989 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
2990 if (n_fixups < N_FIXUPS) {
2991 fixups[n_fixups++] = out->stmts_used;
2992 addStmtToIRSB( out, IRStmt_NoOp() );
2995 /* And always add the LoadG to the output, regardless. */
2996 break;
2999 default:
3000 break;
3003 /* Not interesting, copy st2 into the output block. */
3004 addStmtToIRSB( out, st2 );
3007 # if STATS_IROPT
3008 vex_printf("sameIRExpr: invoked = %u/%u equal = %u/%u max_nodes = %u\n",
3009 invocation_count, recursion_count, success_count,
3010 recursion_success_count, max_nodes_visited);
3011 # endif
3013 out->next = subst_Expr( env, in->next );
3014 out->jumpkind = in->jumpkind;
3015 out->offsIP = in->offsIP;
3017 /* Process any leftover unconditional LoadGs that we noticed
3018 in the main pass. */
3019 vassert(n_fixups >= 0 && n_fixups <= N_FIXUPS);
3020 for (i = 0; i < n_fixups; i++) {
3021 Int ix = fixups[i];
3022 /* Carefully verify that the LoadG has the expected form. */
3023 vassert(ix >= 0 && ix+1 < out->stmts_used);
3024 IRStmt* nop = out->stmts[ix];
3025 IRStmt* lgu = out->stmts[ix+1];
3026 vassert(nop->tag == Ist_NoOp);
3027 vassert(lgu->tag == Ist_LoadG);
3028 IRLoadG* lg = lgu->Ist.LoadG.details;
3029 IRExpr* guard = lg->guard;
3030 vassert(guard->Iex.Const.con->tag == Ico_U1);
3031 vassert(guard->Iex.Const.con->Ico.U1 == True);
3032 /* Figure out the load and result types, and the implied
3033 conversion operation. */
3034 IRType cvtRes = Ity_INVALID, cvtArg = Ity_INVALID;
3035 typeOfIRLoadGOp(lg->cvt, &cvtRes, &cvtArg);
3036 IROp cvtOp = Iop_INVALID;
3037 switch (lg->cvt) {
3038 case ILGop_IdentV128:
3039 case ILGop_Ident64:
3040 case ILGop_Ident32: break;
3041 case ILGop_8Uto32: cvtOp = Iop_8Uto32; break;
3042 case ILGop_8Sto32: cvtOp = Iop_8Sto32; break;
3043 case ILGop_16Uto32: cvtOp = Iop_16Uto32; break;
3044 case ILGop_16Sto32: cvtOp = Iop_16Sto32; break;
3045 default: vpanic("cprop_BB: unhandled ILGOp");
3047 /* Replace the placeholder NoOp by the required unconditional
3048 load. */
3049 IRTemp tLoaded = newIRTemp(out->tyenv, cvtArg);
3050 out->stmts[ix]
3051 = IRStmt_WrTmp(tLoaded,
3052 IRExpr_Load(lg->end, cvtArg, lg->addr));
3053 /* Replace the LoadG by a conversion from the loaded value's
3054 type to the required result type. */
3055 out->stmts[ix+1]
3056 = IRStmt_WrTmp(
3057 lg->dst, cvtOp == Iop_INVALID
3058 ? IRExpr_RdTmp(tLoaded)
3059 : IRExpr_Unop(cvtOp, IRExpr_RdTmp(tLoaded)));
3062 return out;
3066 IRSB* cprop_BB ( IRSB* in ) {
3067 return cprop_BB_WRK(in, /*mustRetainNoOps=*/False, /*doFolding=*/True);
3071 /*---------------------------------------------------------------*/
3072 /*--- Dead code (t = E) removal ---*/
3073 /*---------------------------------------------------------------*/
3075 /* As a side effect, also removes all code following an unconditional
3076 side exit. */
3078 /* The type of the HashHW map is: a map from IRTemp to nothing
3079 -- really just operating a set or IRTemps.
3082 inline
3083 static void addUses_Temp ( Bool* set, IRTemp tmp )
3085 set[(Int)tmp] = True;
3088 static void addUses_Expr ( Bool* set, IRExpr* e )
3090 Int i;
3091 switch (e->tag) {
3092 case Iex_GetI:
3093 addUses_Expr(set, e->Iex.GetI.ix);
3094 return;
3095 case Iex_ITE:
3096 addUses_Expr(set, e->Iex.ITE.cond);
3097 addUses_Expr(set, e->Iex.ITE.iftrue);
3098 addUses_Expr(set, e->Iex.ITE.iffalse);
3099 return;
3100 case Iex_CCall:
3101 for (i = 0; e->Iex.CCall.args[i]; i++)
3102 addUses_Expr(set, e->Iex.CCall.args[i]);
3103 return;
3104 case Iex_Load:
3105 addUses_Expr(set, e->Iex.Load.addr);
3106 return;
3107 case Iex_Qop:
3108 addUses_Expr(set, e->Iex.Qop.details->arg1);
3109 addUses_Expr(set, e->Iex.Qop.details->arg2);
3110 addUses_Expr(set, e->Iex.Qop.details->arg3);
3111 addUses_Expr(set, e->Iex.Qop.details->arg4);
3112 return;
3113 case Iex_Triop:
3114 addUses_Expr(set, e->Iex.Triop.details->arg1);
3115 addUses_Expr(set, e->Iex.Triop.details->arg2);
3116 addUses_Expr(set, e->Iex.Triop.details->arg3);
3117 return;
3118 case Iex_Binop:
3119 addUses_Expr(set, e->Iex.Binop.arg1);
3120 addUses_Expr(set, e->Iex.Binop.arg2);
3121 return;
3122 case Iex_Unop:
3123 addUses_Expr(set, e->Iex.Unop.arg);
3124 return;
3125 case Iex_RdTmp:
3126 addUses_Temp(set, e->Iex.RdTmp.tmp);
3127 return;
3128 case Iex_Const:
3129 case Iex_Get:
3130 return;
3131 default:
3132 vex_printf("\n");
3133 ppIRExpr(e);
3134 vpanic("addUses_Expr");
3138 static void addUses_Stmt ( Bool* set, IRStmt* st )
3140 Int i;
3141 IRDirty* d;
3142 IRCAS* cas;
3143 switch (st->tag) {
3144 case Ist_AbiHint:
3145 addUses_Expr(set, st->Ist.AbiHint.base);
3146 addUses_Expr(set, st->Ist.AbiHint.nia);
3147 return;
3148 case Ist_PutI:
3149 addUses_Expr(set, st->Ist.PutI.details->ix);
3150 addUses_Expr(set, st->Ist.PutI.details->data);
3151 return;
3152 case Ist_WrTmp:
3153 addUses_Expr(set, st->Ist.WrTmp.data);
3154 return;
3155 case Ist_Put:
3156 addUses_Expr(set, st->Ist.Put.data);
3157 return;
3158 case Ist_Store:
3159 addUses_Expr(set, st->Ist.Store.addr);
3160 addUses_Expr(set, st->Ist.Store.data);
3161 return;
3162 case Ist_StoreG: {
3163 IRStoreG* sg = st->Ist.StoreG.details;
3164 addUses_Expr(set, sg->addr);
3165 addUses_Expr(set, sg->data);
3166 addUses_Expr(set, sg->guard);
3167 return;
3169 case Ist_LoadG: {
3170 IRLoadG* lg = st->Ist.LoadG.details;
3171 addUses_Expr(set, lg->addr);
3172 addUses_Expr(set, lg->alt);
3173 addUses_Expr(set, lg->guard);
3174 return;
3176 case Ist_CAS:
3177 cas = st->Ist.CAS.details;
3178 addUses_Expr(set, cas->addr);
3179 if (cas->expdHi)
3180 addUses_Expr(set, cas->expdHi);
3181 addUses_Expr(set, cas->expdLo);
3182 if (cas->dataHi)
3183 addUses_Expr(set, cas->dataHi);
3184 addUses_Expr(set, cas->dataLo);
3185 return;
3186 case Ist_LLSC:
3187 addUses_Expr(set, st->Ist.LLSC.addr);
3188 if (st->Ist.LLSC.storedata)
3189 addUses_Expr(set, st->Ist.LLSC.storedata);
3190 return;
3191 case Ist_Dirty:
3192 d = st->Ist.Dirty.details;
3193 if (d->mFx != Ifx_None)
3194 addUses_Expr(set, d->mAddr);
3195 addUses_Expr(set, d->guard);
3196 for (i = 0; d->args[i] != NULL; i++) {
3197 IRExpr* arg = d->args[i];
3198 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
3199 addUses_Expr(set, arg);
3201 return;
3202 case Ist_NoOp:
3203 case Ist_IMark:
3204 case Ist_MBE:
3205 return;
3206 case Ist_Exit:
3207 addUses_Expr(set, st->Ist.Exit.guard);
3208 return;
3209 default:
3210 vex_printf("\n");
3211 ppIRStmt(st);
3212 vpanic("addUses_Stmt");
3217 /* Is this literally IRExpr_Const(IRConst_U1(False)) ? */
3218 static Bool isZeroU1 ( IRExpr* e )
3220 return toBool( e->tag == Iex_Const
3221 && e->Iex.Const.con->tag == Ico_U1
3222 && e->Iex.Const.con->Ico.U1 == False );
3225 /* Is this literally IRExpr_Const(IRConst_U1(True)) ? */
3226 static Bool isOneU1 ( IRExpr* e )
3228 return toBool( e->tag == Iex_Const
3229 && e->Iex.Const.con->tag == Ico_U1
3230 && e->Iex.Const.con->Ico.U1 == True );
3234 /* Note, this destructively modifies the given IRSB. */
3236 /* Scan backwards through statements, carrying a set of IRTemps which
3237 are known to be used after the current point. On encountering 't =
3238 E', delete the binding if it is not used. Otherwise, add any temp
3239 uses to the set and keep on moving backwards.
3241 As an enhancement, the first (backwards) pass searches for IR exits
3242 with always-taken conditions and notes the location of the earliest
3243 one in the block. If any such are found, a second pass copies the
3244 exit destination and jump kind to the bb-end. Then, the exit and
3245 all statements following it are turned into no-ops.
3248 /* notstatic */ void do_deadcode_BB ( IRSB* bb )
3250 Int i, i_unconditional_exit;
3251 Int n_tmps = bb->tyenv->types_used;
3252 Bool* set = LibVEX_Alloc_inline(n_tmps * sizeof(Bool));
3253 IRStmt* st;
3255 for (i = 0; i < n_tmps; i++)
3256 set[i] = False;
3258 /* start off by recording IRTemp uses in the next field. */
3259 addUses_Expr(set, bb->next);
3261 /* First pass */
3263 /* Work backwards through the stmts */
3264 i_unconditional_exit = -1;
3265 for (i = bb->stmts_used-1; i >= 0; i--) {
3266 st = bb->stmts[i];
3267 if (st->tag == Ist_NoOp)
3268 continue;
3269 /* take note of any unconditional exits */
3270 if (st->tag == Ist_Exit
3271 && isOneU1(st->Ist.Exit.guard))
3272 i_unconditional_exit = i;
3273 if (st->tag == Ist_WrTmp
3274 && set[(Int)(st->Ist.WrTmp.tmp)] == False) {
3275 /* it's an IRTemp which never got used. Delete it. */
3276 if (DEBUG_IROPT) {
3277 vex_printf("DEAD: ");
3278 ppIRStmt(st);
3279 vex_printf("\n");
3281 bb->stmts[i] = IRStmt_NoOp();
3283 else
3284 if (st->tag == Ist_Dirty
3285 && st->Ist.Dirty.details->guard
3286 && isZeroU1(st->Ist.Dirty.details->guard)) {
3287 /* This is a dirty helper which will never get called.
3288 Delete it. */
3289 bb->stmts[i] = IRStmt_NoOp();
3291 else {
3292 /* Note any IRTemp uses made by the current statement. */
3293 addUses_Stmt(set, st);
3297 /* Optional second pass: if any unconditional exits were found,
3298 delete them and all following statements. */
3300 if (i_unconditional_exit != -1) {
3301 if (0) vex_printf("ZAPPING ALL FORWARDS from %d\n",
3302 i_unconditional_exit);
3303 vassert(i_unconditional_exit >= 0
3304 && i_unconditional_exit < bb->stmts_used);
3305 bb->next
3306 = IRExpr_Const( bb->stmts[i_unconditional_exit]->Ist.Exit.dst );
3307 bb->jumpkind
3308 = bb->stmts[i_unconditional_exit]->Ist.Exit.jk;
3309 bb->offsIP
3310 = bb->stmts[i_unconditional_exit]->Ist.Exit.offsIP;
3311 for (i = i_unconditional_exit; i < bb->stmts_used; i++)
3312 bb->stmts[i] = IRStmt_NoOp();
3317 /*---------------------------------------------------------------*/
3318 /*--- Specialisation of helper function calls, in ---*/
3319 /*--- collaboration with the front end ---*/
3320 /*---------------------------------------------------------------*/
3322 static
3323 IRSB* spec_helpers_BB(
3324 IRSB* bb,
3325 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int)
3328 Int i;
3329 IRStmt* st;
3330 IRExpr* ex;
3331 Bool any = False;
3333 for (i = bb->stmts_used-1; i >= 0; i--) {
3334 st = bb->stmts[i];
3336 if (st->tag != Ist_WrTmp
3337 || st->Ist.WrTmp.data->tag != Iex_CCall)
3338 continue;
3340 ex = (*specHelper)( st->Ist.WrTmp.data->Iex.CCall.cee->name,
3341 st->Ist.WrTmp.data->Iex.CCall.args,
3342 &bb->stmts[0], i );
3343 if (!ex)
3344 /* the front end can't think of a suitable replacement */
3345 continue;
3347 /* We got something better. Install it in the bb. */
3348 any = True;
3349 bb->stmts[i]
3350 = IRStmt_WrTmp(st->Ist.WrTmp.tmp, ex);
3352 if (0) {
3353 vex_printf("SPEC: ");
3354 ppIRExpr(st->Ist.WrTmp.data);
3355 vex_printf(" --> ");
3356 ppIRExpr(ex);
3357 vex_printf("\n");
3361 if (any)
3362 bb = flatten_BB(bb);
3363 return bb;
3367 /*---------------------------------------------------------------*/
3368 /*--- Determination of guest state aliasing relationships ---*/
3369 /*---------------------------------------------------------------*/
3371 /* These are helper functions for CSE and GetI/PutI transformations.
3373 Determine, to the extent possible, the relationship between two
3374 guest state accesses. The possible outcomes are:
3376 * Exact alias. These two accesses denote precisely the same
3377 piece of the guest state.
3379 * Definitely no alias. These two accesses are guaranteed not to
3380 overlap any part of the guest state.
3382 * Unknown -- if neither of the above can be established.
3384 If in doubt, return Unknown. */
3386 typedef
3387 enum { ExactAlias, NoAlias, UnknownAlias }
3388 GSAliasing;
3391 /* Produces the alias relation between an indexed guest
3392 state access and a non-indexed access. */
3394 static
3395 GSAliasing getAliasingRelation_IC ( IRRegArray* descr1, IRExpr* ix1,
3396 Int offset2, IRType ty2 )
3398 UInt minoff1, maxoff1, minoff2, maxoff2;
3400 getArrayBounds( descr1, &minoff1, &maxoff1 );
3401 minoff2 = offset2;
3402 maxoff2 = minoff2 + sizeofIRType(ty2) - 1;
3404 if (maxoff1 < minoff2 || maxoff2 < minoff1)
3405 return NoAlias;
3407 /* Could probably do better here if required. For the moment
3408 however just claim not to know anything more. */
3409 return UnknownAlias;
3413 /* Produces the alias relation between two indexed guest state
3414 accesses. */
3416 static
3417 GSAliasing getAliasingRelation_II (
3418 IRRegArray* descr1, IRExpr* ix1, Int bias1,
3419 IRRegArray* descr2, IRExpr* ix2, Int bias2
3422 UInt minoff1, maxoff1, minoff2, maxoff2;
3423 Int iters;
3425 /* First try hard to show they don't alias. */
3426 getArrayBounds( descr1, &minoff1, &maxoff1 );
3427 getArrayBounds( descr2, &minoff2, &maxoff2 );
3428 if (maxoff1 < minoff2 || maxoff2 < minoff1)
3429 return NoAlias;
3431 /* So the two arrays at least partially overlap. To get any
3432 further we'll have to be sure that the descriptors are
3433 identical. */
3434 if (!eqIRRegArray(descr1, descr2))
3435 return UnknownAlias;
3437 /* The descriptors are identical. Now the only difference can be
3438 in the index expressions. If they cannot be shown to be
3439 identical, we have to say we don't know what the aliasing
3440 relation will be. Now, since the IR is flattened, the index
3441 expressions should be atoms -- either consts or tmps. So that
3442 makes the comparison simple. */
3443 vassert(isIRAtom(ix1));
3444 vassert(isIRAtom(ix2));
3445 if (!eqIRAtom(ix1,ix2))
3446 return UnknownAlias;
3448 /* Ok, the index expressions are identical. So now the only way
3449 they can be different is in the bias. Normalise this
3450 paranoidly, to reliably establish equality/non-equality. */
3452 /* So now we know that the GetI and PutI index the same array
3453 with the same base. Are the offsets the same, modulo the
3454 array size? Do this paranoidly. */
3455 vassert(descr1->nElems == descr2->nElems);
3456 vassert(descr1->elemTy == descr2->elemTy);
3457 vassert(descr1->base == descr2->base);
3458 iters = 0;
3459 while (bias1 < 0 || bias2 < 0) {
3460 bias1 += descr1->nElems;
3461 bias2 += descr1->nElems;
3462 iters++;
3463 if (iters > 10)
3464 vpanic("getAliasingRelation: iters");
3466 vassert(bias1 >= 0 && bias2 >= 0);
3467 bias1 %= descr1->nElems;
3468 bias2 %= descr1->nElems;
3469 vassert(bias1 >= 0 && bias1 < descr1->nElems);
3470 vassert(bias2 >= 0 && bias2 < descr1->nElems);
3472 /* Finally, biasP and biasG are normalised into the range
3473 0 .. descrP/G->nElems - 1. And so we can establish
3474 equality/non-equality. */
3476 return bias1==bias2 ? ExactAlias : NoAlias;
3480 /*---------------------------------------------------------------*/
3481 /*--- Common Subexpression Elimination ---*/
3482 /*---------------------------------------------------------------*/
3484 /* Expensive in time and space. */
3486 /* Uses two environments:
3487 a IRTemp -> IRTemp mapping
3488 a mapping from AvailExpr* to IRTemp
3491 typedef
3492 struct {
3493 enum { TCc, TCt } tag;
3494 union { IRTemp tmp; IRConst* con; } u;
3496 TmpOrConst;
3498 static Bool eqTmpOrConst ( TmpOrConst* tc1, TmpOrConst* tc2 )
3500 if (tc1->tag != tc2->tag)
3501 return False;
3502 switch (tc1->tag) {
3503 case TCc:
3504 return eqIRConst(tc1->u.con, tc2->u.con);
3505 case TCt:
3506 return tc1->u.tmp == tc2->u.tmp;
3507 default:
3508 vpanic("eqTmpOrConst");
3512 static Bool eqIRCallee ( IRCallee* cee1, IRCallee* cee2 )
3514 Bool eq = cee1->addr == cee2->addr;
3515 if (eq) {
3516 vassert(cee1->regparms == cee2->regparms);
3517 vassert(cee1->mcx_mask == cee2->mcx_mask);
3518 /* Names should be the same too, but we don't bother to
3519 check. */
3521 return eq;
3524 /* Convert an atomic IRExpr* to a TmpOrConst. */
3525 static void irExpr_to_TmpOrConst ( /*OUT*/TmpOrConst* tc, IRExpr* e )
3527 switch (e->tag) {
3528 case Iex_RdTmp:
3529 tc->tag = TCt;
3530 tc->u.tmp = e->Iex.RdTmp.tmp;
3531 break;
3532 case Iex_Const:
3533 tc->tag = TCc;
3534 tc->u.con = e->Iex.Const.con;
3535 break;
3536 default:
3537 /* Getting here is a serious error. It means that the
3538 presented arg isn't an IR atom, as it should be. */
3539 vpanic("irExpr_to_TmpOrConst");
3543 /* Convert a TmpOrConst to an atomic IRExpr*. */
3544 static IRExpr* tmpOrConst_to_IRExpr ( TmpOrConst* tc )
3546 switch (tc->tag) {
3547 case TCc: return IRExpr_Const(tc->u.con);
3548 case TCt: return IRExpr_RdTmp(tc->u.tmp);
3549 default: vpanic("tmpOrConst_to_IRExpr");
3553 /* Convert a NULL terminated IRExpr* vector to an array of
3554 TmpOrConsts, and a length. */
3555 static void irExprVec_to_TmpOrConsts ( /*OUT*/TmpOrConst** outs,
3556 /*OUT*/Int* nOuts,
3557 IRExpr** ins )
3559 Int i, n;
3560 /* We have to make two passes, one to count, one to copy. */
3561 for (n = 0; ins[n]; n++)
3563 *outs = LibVEX_Alloc_inline(n * sizeof(TmpOrConst));
3564 *nOuts = n;
3565 /* and now copy .. */
3566 for (i = 0; i < n; i++) {
3567 IRExpr* arg = ins[i];
3568 TmpOrConst* dst = &(*outs)[i];
3569 irExpr_to_TmpOrConst(dst, arg);
3573 typedef
3574 struct {
3575 enum { Ut, Btt, Btc, Bct, Cf64i, Ittt, Itct, Ittc, Itcc, GetIt,
3576 CCall, Load
3577 } tag;
3578 union {
3579 /* unop(tmp) */
3580 struct {
3581 IROp op;
3582 IRTemp arg;
3583 } Ut;
3584 /* binop(tmp,tmp) */
3585 struct {
3586 IROp op;
3587 IRTemp arg1;
3588 IRTemp arg2;
3589 } Btt;
3590 /* binop(tmp,const) */
3591 struct {
3592 IROp op;
3593 IRTemp arg1;
3594 IRConst con2;
3595 } Btc;
3596 /* binop(const,tmp) */
3597 struct {
3598 IROp op;
3599 IRConst con1;
3600 IRTemp arg2;
3601 } Bct;
3602 /* F64i-style const */
3603 struct {
3604 ULong f64i;
3605 } Cf64i;
3606 /* ITE(tmp,tmp,tmp) */
3607 struct {
3608 IRTemp co;
3609 IRTemp e1;
3610 IRTemp e0;
3611 } Ittt;
3612 /* ITE(tmp,tmp,const) */
3613 struct {
3614 IRTemp co;
3615 IRTemp e1;
3616 IRConst con0;
3617 } Ittc;
3618 /* ITE(tmp,const,tmp) */
3619 struct {
3620 IRTemp co;
3621 IRConst con1;
3622 IRTemp e0;
3623 } Itct;
3624 /* ITE(tmp,const,const) */
3625 struct {
3626 IRTemp co;
3627 IRConst con1;
3628 IRConst con0;
3629 } Itcc;
3630 /* GetI(descr,tmp,bias)*/
3631 struct {
3632 IRRegArray* descr;
3633 IRTemp ix;
3634 Int bias;
3635 } GetIt;
3636 /* Clean helper call */
3637 struct {
3638 IRCallee* cee;
3639 TmpOrConst* args;
3640 Int nArgs;
3641 IRType retty;
3642 } CCall;
3643 /* Load(end,ty,addr) */
3644 struct {
3645 IREndness end;
3646 IRType ty;
3647 TmpOrConst addr;
3648 } Load;
3649 } u;
3651 AvailExpr;
3653 static Bool eq_AvailExpr ( AvailExpr* a1, AvailExpr* a2 )
3655 if (LIKELY(a1->tag != a2->tag))
3656 return False;
3657 switch (a1->tag) {
3658 case Ut:
3659 return toBool(
3660 a1->u.Ut.op == a2->u.Ut.op
3661 && a1->u.Ut.arg == a2->u.Ut.arg);
3662 case Btt:
3663 return toBool(
3664 a1->u.Btt.op == a2->u.Btt.op
3665 && a1->u.Btt.arg1 == a2->u.Btt.arg1
3666 && a1->u.Btt.arg2 == a2->u.Btt.arg2);
3667 case Btc:
3668 return toBool(
3669 a1->u.Btc.op == a2->u.Btc.op
3670 && a1->u.Btc.arg1 == a2->u.Btc.arg1
3671 && eqIRConst(&a1->u.Btc.con2, &a2->u.Btc.con2));
3672 case Bct:
3673 return toBool(
3674 a1->u.Bct.op == a2->u.Bct.op
3675 && a1->u.Bct.arg2 == a2->u.Bct.arg2
3676 && eqIRConst(&a1->u.Bct.con1, &a2->u.Bct.con1));
3677 case Cf64i:
3678 return toBool(a1->u.Cf64i.f64i == a2->u.Cf64i.f64i);
3679 case Ittt:
3680 return toBool(a1->u.Ittt.co == a2->u.Ittt.co
3681 && a1->u.Ittt.e1 == a2->u.Ittt.e1
3682 && a1->u.Ittt.e0 == a2->u.Ittt.e0);
3683 case Ittc:
3684 return toBool(a1->u.Ittc.co == a2->u.Ittc.co
3685 && a1->u.Ittc.e1 == a2->u.Ittc.e1
3686 && eqIRConst(&a1->u.Ittc.con0, &a2->u.Ittc.con0));
3687 case Itct:
3688 return toBool(a1->u.Itct.co == a2->u.Itct.co
3689 && eqIRConst(&a1->u.Itct.con1, &a2->u.Itct.con1)
3690 && a1->u.Itct.e0 == a2->u.Itct.e0);
3691 case Itcc:
3692 return toBool(a1->u.Itcc.co == a2->u.Itcc.co
3693 && eqIRConst(&a1->u.Itcc.con1, &a2->u.Itcc.con1)
3694 && eqIRConst(&a1->u.Itcc.con0, &a2->u.Itcc.con0));
3695 case GetIt:
3696 return toBool(eqIRRegArray(a1->u.GetIt.descr, a2->u.GetIt.descr)
3697 && a1->u.GetIt.ix == a2->u.GetIt.ix
3698 && a1->u.GetIt.bias == a2->u.GetIt.bias);
3699 case CCall: {
3700 Int i, n;
3701 Bool eq = a1->u.CCall.nArgs == a2->u.CCall.nArgs
3702 && eqIRCallee(a1->u.CCall.cee, a2->u.CCall.cee);
3703 if (eq) {
3704 n = a1->u.CCall.nArgs;
3705 for (i = 0; i < n; i++) {
3706 if (!eqTmpOrConst( &a1->u.CCall.args[i],
3707 &a2->u.CCall.args[i] )) {
3708 eq = False;
3709 break;
3713 if (eq) vassert(a1->u.CCall.retty == a2->u.CCall.retty);
3714 return eq;
3716 case Load: {
3717 Bool eq = toBool(a1->u.Load.end == a2->u.Load.end
3718 && a1->u.Load.ty == a2->u.Load.ty
3719 && eqTmpOrConst(&a1->u.Load.addr, &a2->u.Load.addr));
3720 return eq;
3722 default:
3723 vpanic("eq_AvailExpr");
3727 static IRExpr* availExpr_to_IRExpr ( AvailExpr* ae )
3729 IRConst *con, *con0, *con1;
3730 switch (ae->tag) {
3731 case Ut:
3732 return IRExpr_Unop( ae->u.Ut.op, IRExpr_RdTmp(ae->u.Ut.arg) );
3733 case Btt:
3734 return IRExpr_Binop( ae->u.Btt.op,
3735 IRExpr_RdTmp(ae->u.Btt.arg1),
3736 IRExpr_RdTmp(ae->u.Btt.arg2) );
3737 case Btc:
3738 con = LibVEX_Alloc_inline(sizeof(IRConst));
3739 *con = ae->u.Btc.con2;
3740 return IRExpr_Binop( ae->u.Btc.op,
3741 IRExpr_RdTmp(ae->u.Btc.arg1),
3742 IRExpr_Const(con) );
3743 case Bct:
3744 con = LibVEX_Alloc_inline(sizeof(IRConst));
3745 *con = ae->u.Bct.con1;
3746 return IRExpr_Binop( ae->u.Bct.op,
3747 IRExpr_Const(con),
3748 IRExpr_RdTmp(ae->u.Bct.arg2) );
3749 case Cf64i:
3750 return IRExpr_Const(IRConst_F64i(ae->u.Cf64i.f64i));
3751 case Ittt:
3752 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittt.co),
3753 IRExpr_RdTmp(ae->u.Ittt.e1),
3754 IRExpr_RdTmp(ae->u.Ittt.e0));
3755 case Ittc:
3756 con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3757 *con0 = ae->u.Ittc.con0;
3758 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Ittc.co),
3759 IRExpr_RdTmp(ae->u.Ittc.e1),
3760 IRExpr_Const(con0));
3761 case Itct:
3762 con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3763 *con1 = ae->u.Itct.con1;
3764 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itct.co),
3765 IRExpr_Const(con1),
3766 IRExpr_RdTmp(ae->u.Itct.e0));
3768 case Itcc:
3769 con0 = LibVEX_Alloc_inline(sizeof(IRConst));
3770 con1 = LibVEX_Alloc_inline(sizeof(IRConst));
3771 *con0 = ae->u.Itcc.con0;
3772 *con1 = ae->u.Itcc.con1;
3773 return IRExpr_ITE(IRExpr_RdTmp(ae->u.Itcc.co),
3774 IRExpr_Const(con1),
3775 IRExpr_Const(con0));
3776 case GetIt:
3777 return IRExpr_GetI(ae->u.GetIt.descr,
3778 IRExpr_RdTmp(ae->u.GetIt.ix),
3779 ae->u.GetIt.bias);
3780 case CCall: {
3781 Int i, n = ae->u.CCall.nArgs;
3782 vassert(n >= 0);
3783 IRExpr** vec = LibVEX_Alloc_inline((n+1) * sizeof(IRExpr*));
3784 vec[n] = NULL;
3785 for (i = 0; i < n; i++) {
3786 vec[i] = tmpOrConst_to_IRExpr(&ae->u.CCall.args[i]);
3788 return IRExpr_CCall(ae->u.CCall.cee,
3789 ae->u.CCall.retty,
3790 vec);
3792 case Load:
3793 return IRExpr_Load(ae->u.Load.end, ae->u.Load.ty,
3794 tmpOrConst_to_IRExpr(&ae->u.Load.addr));
3795 default:
3796 vpanic("availExpr_to_IRExpr");
3800 inline
3801 static IRTemp subst_AvailExpr_Temp ( HashHW* env, IRTemp tmp )
3803 HWord res;
3804 /* env :: IRTemp -> IRTemp */
3805 if (lookupHHW( env, &res, (HWord)tmp ))
3806 return (IRTemp)res;
3807 else
3808 return tmp;
3811 inline
3812 static void subst_AvailExpr_TmpOrConst ( /*MB_MOD*/TmpOrConst* tc,
3813 HashHW* env )
3815 /* env :: IRTemp -> IRTemp */
3816 if (tc->tag == TCt) {
3817 tc->u.tmp = subst_AvailExpr_Temp( env, tc->u.tmp );
3821 static void subst_AvailExpr ( HashHW* env, AvailExpr* ae )
3823 /* env :: IRTemp -> IRTemp */
3824 switch (ae->tag) {
3825 case Ut:
3826 ae->u.Ut.arg = subst_AvailExpr_Temp( env, ae->u.Ut.arg );
3827 break;
3828 case Btt:
3829 ae->u.Btt.arg1 = subst_AvailExpr_Temp( env, ae->u.Btt.arg1 );
3830 ae->u.Btt.arg2 = subst_AvailExpr_Temp( env, ae->u.Btt.arg2 );
3831 break;
3832 case Btc:
3833 ae->u.Btc.arg1 = subst_AvailExpr_Temp( env, ae->u.Btc.arg1 );
3834 break;
3835 case Bct:
3836 ae->u.Bct.arg2 = subst_AvailExpr_Temp( env, ae->u.Bct.arg2 );
3837 break;
3838 case Cf64i:
3839 break;
3840 case Ittt:
3841 ae->u.Ittt.co = subst_AvailExpr_Temp( env, ae->u.Ittt.co );
3842 ae->u.Ittt.e1 = subst_AvailExpr_Temp( env, ae->u.Ittt.e1 );
3843 ae->u.Ittt.e0 = subst_AvailExpr_Temp( env, ae->u.Ittt.e0 );
3844 break;
3845 case Ittc:
3846 ae->u.Ittc.co = subst_AvailExpr_Temp( env, ae->u.Ittc.co );
3847 ae->u.Ittc.e1 = subst_AvailExpr_Temp( env, ae->u.Ittc.e1 );
3848 break;
3849 case Itct:
3850 ae->u.Itct.co = subst_AvailExpr_Temp( env, ae->u.Itct.co );
3851 ae->u.Itct.e0 = subst_AvailExpr_Temp( env, ae->u.Itct.e0 );
3852 break;
3853 case Itcc:
3854 ae->u.Itcc.co = subst_AvailExpr_Temp( env, ae->u.Itcc.co );
3855 break;
3856 case GetIt:
3857 ae->u.GetIt.ix = subst_AvailExpr_Temp( env, ae->u.GetIt.ix );
3858 break;
3859 case CCall: {
3860 Int i, n = ae->u.CCall.nArgs;;
3861 for (i = 0; i < n; i++) {
3862 subst_AvailExpr_TmpOrConst(&ae->u.CCall.args[i], env);
3864 break;
3866 case Load:
3867 subst_AvailExpr_TmpOrConst(&ae->u.Load.addr, env);
3868 break;
3869 default:
3870 vpanic("subst_AvailExpr");
3874 static AvailExpr* irExpr_to_AvailExpr ( IRExpr* e, Bool allowLoadsToBeCSEd )
3876 AvailExpr* ae;
3878 switch (e->tag) {
3879 case Iex_Unop:
3880 if (e->Iex.Unop.arg->tag == Iex_RdTmp) {
3881 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3882 ae->tag = Ut;
3883 ae->u.Ut.op = e->Iex.Unop.op;
3884 ae->u.Ut.arg = e->Iex.Unop.arg->Iex.RdTmp.tmp;
3885 return ae;
3887 break;
3889 case Iex_Binop:
3890 if (e->Iex.Binop.arg1->tag == Iex_RdTmp) {
3891 if (e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3892 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3893 ae->tag = Btt;
3894 ae->u.Btt.op = e->Iex.Binop.op;
3895 ae->u.Btt.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3896 ae->u.Btt.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3897 return ae;
3899 if (e->Iex.Binop.arg2->tag == Iex_Const) {
3900 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3901 ae->tag = Btc;
3902 ae->u.Btc.op = e->Iex.Binop.op;
3903 ae->u.Btc.arg1 = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
3904 ae->u.Btc.con2 = *(e->Iex.Binop.arg2->Iex.Const.con);
3905 return ae;
3907 } else if (e->Iex.Binop.arg1->tag == Iex_Const
3908 && e->Iex.Binop.arg2->tag == Iex_RdTmp) {
3909 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3910 ae->tag = Bct;
3911 ae->u.Bct.op = e->Iex.Binop.op;
3912 ae->u.Bct.arg2 = e->Iex.Binop.arg2->Iex.RdTmp.tmp;
3913 ae->u.Bct.con1 = *(e->Iex.Binop.arg1->Iex.Const.con);
3914 return ae;
3916 break;
3918 case Iex_Const:
3919 if (e->Iex.Const.con->tag == Ico_F64i) {
3920 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3921 ae->tag = Cf64i;
3922 ae->u.Cf64i.f64i = e->Iex.Const.con->Ico.F64i;
3923 return ae;
3925 break;
3927 case Iex_ITE:
3928 if (e->Iex.ITE.cond->tag == Iex_RdTmp) {
3929 if (e->Iex.ITE.iffalse->tag == Iex_RdTmp) {
3930 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3931 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3932 ae->tag = Ittt;
3933 ae->u.Ittt.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3934 ae->u.Ittt.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3935 ae->u.Ittt.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3936 return ae;
3938 if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3939 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3940 ae->tag = Itct;
3941 ae->u.Itct.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3942 ae->u.Itct.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3943 ae->u.Itct.e0 = e->Iex.ITE.iffalse->Iex.RdTmp.tmp;
3944 return ae;
3946 } else if (e->Iex.ITE.iffalse->tag == Iex_Const) {
3947 if (e->Iex.ITE.iftrue->tag == Iex_RdTmp) {
3948 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3949 ae->tag = Ittc;
3950 ae->u.Ittc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3951 ae->u.Ittc.e1 = e->Iex.ITE.iftrue->Iex.RdTmp.tmp;
3952 ae->u.Ittc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3953 return ae;
3955 if (e->Iex.ITE.iftrue->tag == Iex_Const) {
3956 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3957 ae->tag = Itcc;
3958 ae->u.Itcc.co = e->Iex.ITE.cond->Iex.RdTmp.tmp;
3959 ae->u.Itcc.con1 = *(e->Iex.ITE.iftrue->Iex.Const.con);
3960 ae->u.Itcc.con0 = *(e->Iex.ITE.iffalse->Iex.Const.con);
3961 return ae;
3965 break;
3967 case Iex_GetI:
3968 if (e->Iex.GetI.ix->tag == Iex_RdTmp) {
3969 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3970 ae->tag = GetIt;
3971 ae->u.GetIt.descr = e->Iex.GetI.descr;
3972 ae->u.GetIt.ix = e->Iex.GetI.ix->Iex.RdTmp.tmp;
3973 ae->u.GetIt.bias = e->Iex.GetI.bias;
3974 return ae;
3976 break;
3978 case Iex_CCall:
3979 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
3980 ae->tag = CCall;
3981 /* Ok to share only the cee, since it is immutable. */
3982 ae->u.CCall.cee = e->Iex.CCall.cee;
3983 ae->u.CCall.retty = e->Iex.CCall.retty;
3984 /* irExprVec_to_TmpOrConsts will assert if the args are
3985 neither tmps nor constants, but that's ok .. that's all they
3986 should be. */
3987 irExprVec_to_TmpOrConsts(
3988 &ae->u.CCall.args, &ae->u.CCall.nArgs,
3989 e->Iex.CCall.args
3991 return ae;
3993 case Iex_Load:
3994 /* If the caller of do_cse_BB has requested that loads also
3995 be CSEd, convert them into AvailExprs. If not, we'll just
3996 return NULL here, and the load never becomes considered
3997 "available", which effectively disables CSEing of them, as
3998 desired. */
3999 if (allowLoadsToBeCSEd) {
4000 ae = LibVEX_Alloc_inline(sizeof(AvailExpr));
4001 ae->tag = Load;
4002 ae->u.Load.end = e->Iex.Load.end;
4003 ae->u.Load.ty = e->Iex.Load.ty;
4004 irExpr_to_TmpOrConst(&ae->u.Load.addr, e->Iex.Load.addr);
4005 return ae;
4007 break;
4009 default:
4010 break;
4013 return NULL;
4017 /* The BB is modified in-place. Returns True if any changes were
4018 made. The caller can choose whether or not loads should be CSEd.
4019 In the normal course of things we don't do that, since CSEing loads
4020 is something of a dodgy proposition if the guest program is doing
4021 some screwy stuff to do with races and spinloops. */
4023 static Bool do_cse_BB ( IRSB* bb, Bool allowLoadsToBeCSEd )
4025 Int i, j, paranoia;
4026 IRTemp t, q;
4027 IRStmt* st;
4028 AvailExpr* eprime;
4029 AvailExpr* ae;
4030 Bool invalidate;
4031 Bool anyDone = False;
4033 HashHW* tenv = newHHW(); /* :: IRTemp -> IRTemp */
4034 HashHW* aenv = newHHW(); /* :: AvailExpr* -> IRTemp */
4036 vassert(sizeof(IRTemp) <= sizeof(HWord));
4038 if (0) { ppIRSB(bb); vex_printf("\n\n"); }
4040 /* Iterate forwards over the stmts.
4041 On seeing "t = E", where E is one of the AvailExpr forms:
4042 let E' = apply tenv substitution to E
4043 search aenv for E'
4044 if a mapping E' -> q is found,
4045 replace this stmt by "t = q"
4046 and add binding t -> q to tenv
4047 else
4048 add binding E' -> t to aenv
4049 replace this stmt by "t = E'"
4051 Other statements are only interesting to the extent that they
4052 might invalidate some of the expressions in aenv. So there is
4053 an invalidate-bindings check for each statement seen.
4055 for (i = 0; i < bb->stmts_used; i++) {
4056 st = bb->stmts[i];
4058 /* ------ BEGIN invalidate aenv bindings ------ */
4059 /* This is critical: remove from aenv any E' -> .. bindings
4060 which might be invalidated by this statement. The only
4061 vulnerable kind of bindings are the GetI and Load kinds.
4062 Dirty call - dump (paranoia level -> 2)
4063 Store - dump (ditto)
4064 Put, PutI - dump unless no-overlap is proven (.. -> 1)
4065 Uses getAliasingRelation_IC and getAliasingRelation_II
4066 to do the no-overlap assessments needed for Put/PutI.
4068 switch (st->tag) {
4069 case Ist_Dirty: case Ist_Store: case Ist_MBE:
4070 case Ist_CAS: case Ist_LLSC:
4071 case Ist_StoreG:
4072 paranoia = 2; break;
4073 case Ist_Put: case Ist_PutI:
4074 paranoia = 1; break;
4075 case Ist_NoOp: case Ist_IMark: case Ist_AbiHint:
4076 case Ist_WrTmp: case Ist_Exit: case Ist_LoadG:
4077 paranoia = 0; break;
4078 default:
4079 vpanic("do_cse_BB(1)");
4082 if (paranoia > 0) {
4083 for (j = 0; j < aenv->used; j++) {
4084 if (!aenv->inuse[j])
4085 continue;
4086 ae = (AvailExpr*)aenv->key[j];
4087 if (ae->tag != GetIt && ae->tag != Load)
4088 continue;
4089 invalidate = False;
4090 if (paranoia >= 2) {
4091 invalidate = True;
4092 } else {
4093 vassert(paranoia == 1);
4094 if (ae->tag == Load) {
4095 /* Loads can be invalidated by anything that could
4096 possibly touch memory. But in that case we
4097 should have |paranoia| == 2 and we won't get
4098 here. So there's nothing to do; we don't have to
4099 invalidate the load. */
4101 else
4102 if (st->tag == Ist_Put) {
4103 if (getAliasingRelation_IC(
4104 ae->u.GetIt.descr,
4105 IRExpr_RdTmp(ae->u.GetIt.ix),
4106 st->Ist.Put.offset,
4107 typeOfIRExpr(bb->tyenv,st->Ist.Put.data)
4108 ) != NoAlias)
4109 invalidate = True;
4111 else
4112 if (st->tag == Ist_PutI) {
4113 IRPutI *puti = st->Ist.PutI.details;
4114 if (getAliasingRelation_II(
4115 ae->u.GetIt.descr,
4116 IRExpr_RdTmp(ae->u.GetIt.ix),
4117 ae->u.GetIt.bias,
4118 puti->descr,
4119 puti->ix,
4120 puti->bias
4121 ) != NoAlias)
4122 invalidate = True;
4124 else
4125 vpanic("do_cse_BB(2)");
4128 if (invalidate) {
4129 aenv->inuse[j] = False;
4130 aenv->key[j] = (HWord)NULL; /* be sure */
4132 } /* for j */
4133 } /* paranoia > 0 */
4135 /* ------ ENV invalidate aenv bindings ------ */
4137 /* ignore not-interestings */
4138 if (st->tag != Ist_WrTmp)
4139 continue;
4141 t = st->Ist.WrTmp.tmp;
4142 eprime = irExpr_to_AvailExpr(st->Ist.WrTmp.data, allowLoadsToBeCSEd);
4143 /* ignore if not of AvailExpr form */
4144 if (!eprime)
4145 continue;
4147 /* vex_printf("considering: " ); ppIRStmt(st); vex_printf("\n"); */
4149 /* apply tenv */
4150 subst_AvailExpr( tenv, eprime );
4152 /* search aenv for eprime, unfortunately the hard way */
4153 for (j = 0; j < aenv->used; j++)
4154 if (aenv->inuse[j] && eq_AvailExpr(eprime, (AvailExpr*)aenv->key[j]))
4155 break;
4157 if (j < aenv->used) {
4158 /* A binding E' -> q was found. Replace stmt by "t = q" and
4159 note the t->q binding in tenv. */
4160 /* (this is the core of the CSE action) */
4161 q = (IRTemp)aenv->val[j];
4162 bb->stmts[i] = IRStmt_WrTmp( t, IRExpr_RdTmp(q) );
4163 addToHHW( tenv, (HWord)t, (HWord)q );
4164 anyDone = True;
4165 } else {
4166 /* No binding was found, so instead we add E' -> t to our
4167 collection of available expressions, replace this stmt
4168 with "t = E'", and move on. */
4169 bb->stmts[i] = IRStmt_WrTmp( t, availExpr_to_IRExpr(eprime) );
4170 addToHHW( aenv, (HWord)eprime, (HWord)t );
4175 ppIRSB(bb);
4176 sanityCheckIRSB(bb, Ity_I32);
4177 vex_printf("\n\n");
4179 return anyDone;
4183 /*---------------------------------------------------------------*/
4184 /*--- Add32/Sub32 chain collapsing ---*/
4185 /*---------------------------------------------------------------*/
4187 /* ----- Helper functions for Add32/Sub32 chain collapsing ----- */
4189 /* Is this expression "Add32(tmp,const)" or "Sub32(tmp,const)" ? If
4190 yes, set *tmp and *i32 appropriately. *i32 is set as if the
4191 root node is Add32, not Sub32. */
4193 static Bool isAdd32OrSub32 ( IRExpr* e, IRTemp* tmp, Int* i32 )
4195 if (e->tag != Iex_Binop)
4196 return False;
4197 if (e->Iex.Binop.op != Iop_Add32 && e->Iex.Binop.op != Iop_Sub32)
4198 return False;
4199 if (e->Iex.Binop.arg1->tag != Iex_RdTmp)
4200 return False;
4201 if (e->Iex.Binop.arg2->tag != Iex_Const)
4202 return False;
4203 *tmp = e->Iex.Binop.arg1->Iex.RdTmp.tmp;
4204 *i32 = (Int)(e->Iex.Binop.arg2->Iex.Const.con->Ico.U32);
4205 if (e->Iex.Binop.op == Iop_Sub32)
4206 *i32 = -*i32;
4207 return True;
4211 /* Figure out if tmp can be expressed as tmp2 +32 const, for some
4212 other tmp2. Scan backwards from the specified start point -- an
4213 optimisation. */
4215 static Bool collapseChain ( IRSB* bb, Int startHere,
4216 IRTemp tmp,
4217 IRTemp* tmp2, Int* i32 )
4219 Int j, ii;
4220 IRTemp vv;
4221 IRStmt* st;
4222 IRExpr* e;
4224 /* the (var, con) pair contain the current 'representation' for
4225 'tmp'. We start with 'tmp + 0'. */
4226 IRTemp var = tmp;
4227 Int con = 0;
4229 /* Scan backwards to see if tmp can be replaced by some other tmp
4230 +/- a constant. */
4231 for (j = startHere; j >= 0; j--) {
4232 st = bb->stmts[j];
4233 if (st->tag != Ist_WrTmp)
4234 continue;
4235 if (st->Ist.WrTmp.tmp != var)
4236 continue;
4237 e = st->Ist.WrTmp.data;
4238 if (!isAdd32OrSub32(e, &vv, &ii))
4239 break;
4240 var = vv;
4241 con += ii;
4243 if (j == -1)
4244 /* no earlier binding for var .. ill-formed IR */
4245 vpanic("collapseChain");
4247 /* so, did we find anything interesting? */
4248 if (var == tmp)
4249 return False; /* no .. */
4251 *tmp2 = var;
4252 *i32 = con;
4253 return True;
4257 /* ------- Main function for Add32/Sub32 chain collapsing ------ */
4259 static void collapse_AddSub_chains_BB ( IRSB* bb )
4261 IRStmt *st;
4262 IRTemp var, var2;
4263 Int i, con, con2;
4265 for (i = bb->stmts_used-1; i >= 0; i--) {
4266 st = bb->stmts[i];
4267 if (st->tag == Ist_NoOp)
4268 continue;
4270 /* Try to collapse 't1 = Add32/Sub32(t2, con)'. */
4272 if (st->tag == Ist_WrTmp
4273 && isAdd32OrSub32(st->Ist.WrTmp.data, &var, &con)) {
4275 /* So e1 is of the form Add32(var,con) or Sub32(var,-con).
4276 Find out if var can be expressed as var2 + con2. */
4277 if (collapseChain(bb, i-1, var, &var2, &con2)) {
4278 if (DEBUG_IROPT) {
4279 vex_printf("replacing1 ");
4280 ppIRStmt(st);
4281 vex_printf(" with ");
4283 con2 += con;
4284 bb->stmts[i]
4285 = IRStmt_WrTmp(
4286 st->Ist.WrTmp.tmp,
4287 (con2 >= 0)
4288 ? IRExpr_Binop(Iop_Add32,
4289 IRExpr_RdTmp(var2),
4290 IRExpr_Const(IRConst_U32(con2)))
4291 : IRExpr_Binop(Iop_Sub32,
4292 IRExpr_RdTmp(var2),
4293 IRExpr_Const(IRConst_U32(-con2)))
4295 if (DEBUG_IROPT) {
4296 ppIRStmt(bb->stmts[i]);
4297 vex_printf("\n");
4301 continue;
4304 /* Try to collapse 't1 = GetI[t2, con]'. */
4306 if (st->tag == Ist_WrTmp
4307 && st->Ist.WrTmp.data->tag == Iex_GetI
4308 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp
4309 && collapseChain(bb, i-1, st->Ist.WrTmp.data->Iex.GetI.ix
4310 ->Iex.RdTmp.tmp, &var2, &con2)) {
4311 if (DEBUG_IROPT) {
4312 vex_printf("replacing3 ");
4313 ppIRStmt(st);
4314 vex_printf(" with ");
4316 con2 += st->Ist.WrTmp.data->Iex.GetI.bias;
4317 bb->stmts[i]
4318 = IRStmt_WrTmp(
4319 st->Ist.WrTmp.tmp,
4320 IRExpr_GetI(st->Ist.WrTmp.data->Iex.GetI.descr,
4321 IRExpr_RdTmp(var2),
4322 con2));
4323 if (DEBUG_IROPT) {
4324 ppIRStmt(bb->stmts[i]);
4325 vex_printf("\n");
4327 continue;
4330 /* Perhaps st is PutI[t, con] ? */
4331 IRPutI *puti = st->Ist.PutI.details;
4332 if (st->tag == Ist_PutI
4333 && puti->ix->tag == Iex_RdTmp
4334 && collapseChain(bb, i-1, puti->ix->Iex.RdTmp.tmp,
4335 &var2, &con2)) {
4336 if (DEBUG_IROPT) {
4337 vex_printf("replacing2 ");
4338 ppIRStmt(st);
4339 vex_printf(" with ");
4341 con2 += puti->bias;
4342 bb->stmts[i]
4343 = IRStmt_PutI(mkIRPutI(puti->descr,
4344 IRExpr_RdTmp(var2),
4345 con2,
4346 puti->data));
4347 if (DEBUG_IROPT) {
4348 ppIRStmt(bb->stmts[i]);
4349 vex_printf("\n");
4351 continue;
4354 } /* for */
4358 /*---------------------------------------------------------------*/
4359 /*--- PutI/GetI transformations ---*/
4360 /*---------------------------------------------------------------*/
4362 /* Given the parts (descr, tmp, bias) for a GetI, scan backwards from
4363 the given starting point to find, if any, a PutI which writes
4364 exactly the same piece of guest state, and so return the expression
4365 that the PutI writes. This is the core of PutI-GetI forwarding. */
4367 static
4368 IRExpr* findPutI ( IRSB* bb, Int startHere,
4369 IRRegArray* descrG, IRExpr* ixG, Int biasG )
4371 Int j;
4372 IRStmt* st;
4373 GSAliasing relation;
4375 if (0) {
4376 vex_printf("\nfindPutI ");
4377 ppIRRegArray(descrG);
4378 vex_printf(" ");
4379 ppIRExpr(ixG);
4380 vex_printf(" %d\n", biasG);
4383 /* Scan backwards in bb from startHere to find a suitable PutI
4384 binding for (descrG, ixG, biasG), if any. */
4386 for (j = startHere; j >= 0; j--) {
4387 st = bb->stmts[j];
4388 if (st->tag == Ist_NoOp)
4389 continue;
4391 if (st->tag == Ist_Put) {
4392 /* Non-indexed Put. This can't give a binding, but we do
4393 need to check it doesn't invalidate the search by
4394 overlapping any part of the indexed guest state. */
4396 relation
4397 = getAliasingRelation_IC(
4398 descrG, ixG,
4399 st->Ist.Put.offset,
4400 typeOfIRExpr(bb->tyenv,st->Ist.Put.data) );
4402 if (relation == NoAlias) {
4403 /* we're OK; keep going */
4404 continue;
4405 } else {
4406 /* relation == UnknownAlias || relation == ExactAlias */
4407 /* If this assertion fails, we've found a Put which writes
4408 an area of guest state which is read by a GetI. Which
4409 is unlikely (although not per se wrong). */
4410 vassert(relation != ExactAlias);
4411 /* This Put potentially writes guest state that the GetI
4412 reads; we must fail. */
4413 return NULL;
4417 if (st->tag == Ist_PutI) {
4418 IRPutI *puti = st->Ist.PutI.details;
4420 relation = getAliasingRelation_II(
4421 descrG, ixG, biasG,
4422 puti->descr,
4423 puti->ix,
4424 puti->bias
4427 if (relation == NoAlias) {
4428 /* This PutI definitely doesn't overlap. Ignore it and
4429 keep going. */
4430 continue; /* the for j loop */
4433 if (relation == UnknownAlias) {
4434 /* We don't know if this PutI writes to the same guest
4435 state that the GetI, or not. So we have to give up. */
4436 return NULL;
4439 /* Otherwise, we've found what we're looking for. */
4440 vassert(relation == ExactAlias);
4441 return puti->data;
4443 } /* if (st->tag == Ist_PutI) */
4445 if (st->tag == Ist_Dirty) {
4446 /* Be conservative. If the dirty call has any guest effects at
4447 all, give up. We could do better -- only give up if there
4448 are any guest writes/modifies. */
4449 if (st->Ist.Dirty.details->nFxState > 0)
4450 return NULL;
4453 } /* for */
4455 /* No valid replacement was found. */
4456 return NULL;
4461 /* Assuming pi is a PutI stmt, is s2 identical to it (in the sense
4462 that it writes exactly the same piece of guest state) ? Safe
4463 answer: False. */
4465 static Bool identicalPutIs ( IRStmt* pi, IRStmt* s2 )
4467 vassert(pi->tag == Ist_PutI);
4468 if (s2->tag != Ist_PutI)
4469 return False;
4471 IRPutI *p1 = pi->Ist.PutI.details;
4472 IRPutI *p2 = s2->Ist.PutI.details;
4474 return toBool(
4475 getAliasingRelation_II(
4476 p1->descr, p1->ix, p1->bias,
4477 p2->descr, p2->ix, p2->bias
4479 == ExactAlias
4484 /* Assuming pi is a PutI stmt, is s2 a Get/GetI/Put/PutI which might
4485 overlap it? Safe answer: True. Note, we could do a lot better
4486 than this if needed. */
4488 static
4489 Bool guestAccessWhichMightOverlapPutI (
4490 IRTypeEnv* tyenv, IRStmt* pi, IRStmt* s2
4493 GSAliasing relation;
4494 UInt minoffP, maxoffP;
4496 vassert(pi->tag == Ist_PutI);
4498 IRPutI *p1 = pi->Ist.PutI.details;
4500 getArrayBounds(p1->descr, &minoffP, &maxoffP);
4501 switch (s2->tag) {
4503 case Ist_NoOp:
4504 case Ist_IMark:
4505 return False;
4507 case Ist_MBE:
4508 case Ist_AbiHint:
4509 /* just be paranoid ... these should be rare. */
4510 return True;
4512 case Ist_CAS:
4513 /* This is unbelievably lame, but it's probably not
4514 significant from a performance point of view. Really, a
4515 CAS is a load-store op, so it should be safe to say False.
4516 However .. */
4517 return True;
4519 case Ist_Dirty:
4520 /* If the dirty call has any guest effects at all, give up.
4521 Probably could do better. */
4522 if (s2->Ist.Dirty.details->nFxState > 0)
4523 return True;
4524 return False;
4526 case Ist_Put:
4527 vassert(isIRAtom(s2->Ist.Put.data));
4528 relation
4529 = getAliasingRelation_IC(
4530 p1->descr, p1->ix,
4531 s2->Ist.Put.offset,
4532 typeOfIRExpr(tyenv,s2->Ist.Put.data)
4534 goto have_relation;
4536 case Ist_PutI: {
4537 IRPutI *p2 = s2->Ist.PutI.details;
4539 vassert(isIRAtom(p2->ix));
4540 vassert(isIRAtom(p2->data));
4541 relation
4542 = getAliasingRelation_II(
4543 p1->descr, p1->ix, p1->bias,
4544 p2->descr, p2->ix, p2->bias
4546 goto have_relation;
4549 case Ist_WrTmp:
4550 if (s2->Ist.WrTmp.data->tag == Iex_GetI) {
4551 relation
4552 = getAliasingRelation_II(
4553 p1->descr, p1->ix, p1->bias,
4554 s2->Ist.WrTmp.data->Iex.GetI.descr,
4555 s2->Ist.WrTmp.data->Iex.GetI.ix,
4556 s2->Ist.WrTmp.data->Iex.GetI.bias
4558 goto have_relation;
4560 if (s2->Ist.WrTmp.data->tag == Iex_Get) {
4561 relation
4562 = getAliasingRelation_IC(
4563 p1->descr, p1->ix,
4564 s2->Ist.WrTmp.data->Iex.Get.offset,
4565 s2->Ist.WrTmp.data->Iex.Get.ty
4567 goto have_relation;
4569 return False;
4571 case Ist_Store:
4572 vassert(isIRAtom(s2->Ist.Store.addr));
4573 vassert(isIRAtom(s2->Ist.Store.data));
4574 return False;
4576 default:
4577 vex_printf("\n"); ppIRStmt(s2); vex_printf("\n");
4578 vpanic("guestAccessWhichMightOverlapPutI");
4581 have_relation:
4582 if (relation == NoAlias)
4583 return False;
4584 else
4585 return True; /* ExactAlias or UnknownAlias */
4590 /* ---------- PutI/GetI transformations main functions --------- */
4592 /* Remove redundant GetIs, to the extent that they can be detected.
4593 bb is modified in-place. */
4595 static
4596 void do_redundant_GetI_elimination ( IRSB* bb )
4598 Int i;
4599 IRStmt* st;
4601 for (i = bb->stmts_used-1; i >= 0; i--) {
4602 st = bb->stmts[i];
4603 if (st->tag == Ist_NoOp)
4604 continue;
4606 if (st->tag == Ist_WrTmp
4607 && st->Ist.WrTmp.data->tag == Iex_GetI
4608 && st->Ist.WrTmp.data->Iex.GetI.ix->tag == Iex_RdTmp) {
4609 IRRegArray* descr = st->Ist.WrTmp.data->Iex.GetI.descr;
4610 IRExpr* ix = st->Ist.WrTmp.data->Iex.GetI.ix;
4611 Int bias = st->Ist.WrTmp.data->Iex.GetI.bias;
4612 IRExpr* replacement = findPutI(bb, i-1, descr, ix, bias);
4613 if (replacement
4614 && isIRAtom(replacement)
4615 /* Make sure we're doing a type-safe transformation! */
4616 && typeOfIRExpr(bb->tyenv, replacement) == descr->elemTy) {
4617 if (DEBUG_IROPT) {
4618 vex_printf("rGI: ");
4619 ppIRExpr(st->Ist.WrTmp.data);
4620 vex_printf(" -> ");
4621 ppIRExpr(replacement);
4622 vex_printf("\n");
4624 bb->stmts[i] = IRStmt_WrTmp(st->Ist.WrTmp.tmp, replacement);
4632 /* Remove redundant PutIs, to the extent which they can be detected.
4633 bb is modified in-place. */
4635 static
4636 void do_redundant_PutI_elimination ( IRSB* bb, VexRegisterUpdates pxControl )
4638 Int i, j;
4639 Bool delete;
4640 IRStmt *st, *stj;
4642 vassert(pxControl < VexRegUpdAllregsAtEachInsn);
4644 for (i = 0; i < bb->stmts_used; i++) {
4645 st = bb->stmts[i];
4646 if (st->tag != Ist_PutI)
4647 continue;
4648 /* Ok, search forwards from here to see if we can find another
4649 PutI which makes this one redundant, and dodging various
4650 hazards. Search forwards:
4651 * If conditional exit, give up (because anything after that
4652 does not postdominate this put).
4653 * If a Get which might overlap, give up (because this PutI
4654 not necessarily dead).
4655 * If a Put which is identical, stop with success.
4656 * If a Put which might overlap, but is not identical, give up.
4657 * If a dirty helper call which might write guest state, give up.
4658 * If a Put which definitely doesn't overlap, or any other
4659 kind of stmt, continue.
4661 delete = False;
4662 for (j = i+1; j < bb->stmts_used; j++) {
4663 stj = bb->stmts[j];
4664 if (stj->tag == Ist_NoOp)
4665 continue;
4666 if (identicalPutIs(st, stj)) {
4667 /* success! */
4668 delete = True;
4669 break;
4671 if (stj->tag == Ist_Exit)
4672 /* give up */
4673 break;
4674 if (st->tag == Ist_Dirty)
4675 /* give up; could do better here */
4676 break;
4677 if (guestAccessWhichMightOverlapPutI(bb->tyenv, st, stj))
4678 /* give up */
4679 break;
4682 if (delete) {
4683 if (DEBUG_IROPT) {
4684 vex_printf("rPI: ");
4685 ppIRStmt(st);
4686 vex_printf("\n");
4688 bb->stmts[i] = IRStmt_NoOp();
4695 /*---------------------------------------------------------------*/
4696 /*--- Loop unrolling ---*/
4697 /*---------------------------------------------------------------*/
4699 /* Adjust all tmp values (names) in e by delta. e is destructively
4700 modified. */
4702 static void deltaIRExpr ( IRExpr* e, Int delta )
4704 Int i;
4705 switch (e->tag) {
4706 case Iex_RdTmp:
4707 e->Iex.RdTmp.tmp += delta;
4708 break;
4709 case Iex_Get:
4710 case Iex_Const:
4711 break;
4712 case Iex_GetI:
4713 deltaIRExpr(e->Iex.GetI.ix, delta);
4714 break;
4715 case Iex_Qop:
4716 deltaIRExpr(e->Iex.Qop.details->arg1, delta);
4717 deltaIRExpr(e->Iex.Qop.details->arg2, delta);
4718 deltaIRExpr(e->Iex.Qop.details->arg3, delta);
4719 deltaIRExpr(e->Iex.Qop.details->arg4, delta);
4720 break;
4721 case Iex_Triop:
4722 deltaIRExpr(e->Iex.Triop.details->arg1, delta);
4723 deltaIRExpr(e->Iex.Triop.details->arg2, delta);
4724 deltaIRExpr(e->Iex.Triop.details->arg3, delta);
4725 break;
4726 case Iex_Binop:
4727 deltaIRExpr(e->Iex.Binop.arg1, delta);
4728 deltaIRExpr(e->Iex.Binop.arg2, delta);
4729 break;
4730 case Iex_Unop:
4731 deltaIRExpr(e->Iex.Unop.arg, delta);
4732 break;
4733 case Iex_Load:
4734 deltaIRExpr(e->Iex.Load.addr, delta);
4735 break;
4736 case Iex_CCall:
4737 for (i = 0; e->Iex.CCall.args[i]; i++)
4738 deltaIRExpr(e->Iex.CCall.args[i], delta);
4739 break;
4740 case Iex_ITE:
4741 deltaIRExpr(e->Iex.ITE.cond, delta);
4742 deltaIRExpr(e->Iex.ITE.iftrue, delta);
4743 deltaIRExpr(e->Iex.ITE.iffalse, delta);
4744 break;
4745 default:
4746 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
4747 vpanic("deltaIRExpr");
4751 /* Adjust all tmp values (names) in st by delta. st is destructively
4752 modified. */
4754 /*static*/ void deltaIRStmt ( IRStmt* st, Int delta )
4756 Int i;
4757 IRDirty* d;
4758 switch (st->tag) {
4759 case Ist_NoOp:
4760 case Ist_IMark:
4761 case Ist_MBE:
4762 break;
4763 case Ist_AbiHint:
4764 deltaIRExpr(st->Ist.AbiHint.base, delta);
4765 deltaIRExpr(st->Ist.AbiHint.nia, delta);
4766 break;
4767 case Ist_Put:
4768 deltaIRExpr(st->Ist.Put.data, delta);
4769 break;
4770 case Ist_PutI:
4771 deltaIRExpr(st->Ist.PutI.details->ix, delta);
4772 deltaIRExpr(st->Ist.PutI.details->data, delta);
4773 break;
4774 case Ist_WrTmp:
4775 st->Ist.WrTmp.tmp += delta;
4776 deltaIRExpr(st->Ist.WrTmp.data, delta);
4777 break;
4778 case Ist_Exit:
4779 deltaIRExpr(st->Ist.Exit.guard, delta);
4780 break;
4781 case Ist_Store:
4782 deltaIRExpr(st->Ist.Store.addr, delta);
4783 deltaIRExpr(st->Ist.Store.data, delta);
4784 break;
4785 case Ist_StoreG: {
4786 IRStoreG* sg = st->Ist.StoreG.details;
4787 deltaIRExpr(sg->addr, delta);
4788 deltaIRExpr(sg->data, delta);
4789 deltaIRExpr(sg->guard, delta);
4790 break;
4792 case Ist_LoadG: {
4793 IRLoadG* lg = st->Ist.LoadG.details;
4794 lg->dst += delta;
4795 deltaIRExpr(lg->addr, delta);
4796 deltaIRExpr(lg->alt, delta);
4797 deltaIRExpr(lg->guard, delta);
4798 break;
4800 case Ist_CAS:
4801 if (st->Ist.CAS.details->oldHi != IRTemp_INVALID)
4802 st->Ist.CAS.details->oldHi += delta;
4803 st->Ist.CAS.details->oldLo += delta;
4804 deltaIRExpr(st->Ist.CAS.details->addr, delta);
4805 if (st->Ist.CAS.details->expdHi)
4806 deltaIRExpr(st->Ist.CAS.details->expdHi, delta);
4807 deltaIRExpr(st->Ist.CAS.details->expdLo, delta);
4808 if (st->Ist.CAS.details->dataHi)
4809 deltaIRExpr(st->Ist.CAS.details->dataHi, delta);
4810 deltaIRExpr(st->Ist.CAS.details->dataLo, delta);
4811 break;
4812 case Ist_LLSC:
4813 st->Ist.LLSC.result += delta;
4814 deltaIRExpr(st->Ist.LLSC.addr, delta);
4815 if (st->Ist.LLSC.storedata)
4816 deltaIRExpr(st->Ist.LLSC.storedata, delta);
4817 break;
4818 case Ist_Dirty:
4819 d = st->Ist.Dirty.details;
4820 deltaIRExpr(d->guard, delta);
4821 for (i = 0; d->args[i]; i++) {
4822 IRExpr* arg = d->args[i];
4823 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
4824 deltaIRExpr(arg, delta);
4826 if (d->tmp != IRTemp_INVALID)
4827 d->tmp += delta;
4828 if (d->mAddr)
4829 deltaIRExpr(d->mAddr, delta);
4830 break;
4831 default:
4832 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
4833 vpanic("deltaIRStmt");
4838 /* If possible, return a loop-unrolled version of bb0. The original
4839 is changed. If not possible, return NULL. */
4841 /* The two schemas considered are:
4843 X: BODY; goto X
4845 which unrolls to (eg) X: BODY;BODY; goto X
4849 X: BODY; if (c) goto X; goto Y
4850 which trivially transforms to
4851 X: BODY; if (!c) goto Y; goto X;
4852 so it falls in the scope of the first case.
4854 X and Y must be literal (guest) addresses.
4857 static Int calc_unroll_factor( IRSB* bb )
4859 Int n_stmts, i;
4861 n_stmts = 0;
4862 for (i = 0; i < bb->stmts_used; i++) {
4863 if (bb->stmts[i]->tag != Ist_NoOp)
4864 n_stmts++;
4867 if (n_stmts <= vex_control.iropt_unroll_thresh/8) {
4868 if (vex_control.iropt_verbosity > 0)
4869 vex_printf("vex iropt: 8 x unrolling (%d sts -> %d sts)\n",
4870 n_stmts, 8* n_stmts);
4871 return 8;
4873 if (n_stmts <= vex_control.iropt_unroll_thresh/4) {
4874 if (vex_control.iropt_verbosity > 0)
4875 vex_printf("vex iropt: 4 x unrolling (%d sts -> %d sts)\n",
4876 n_stmts, 4* n_stmts);
4877 return 4;
4880 if (n_stmts <= vex_control.iropt_unroll_thresh/2) {
4881 if (vex_control.iropt_verbosity > 0)
4882 vex_printf("vex iropt: 2 x unrolling (%d sts -> %d sts)\n",
4883 n_stmts, 2* n_stmts);
4884 return 2;
4887 if (vex_control.iropt_verbosity > 0)
4888 vex_printf("vex iropt: not unrolling (%d sts)\n", n_stmts);
4890 return 1;
4894 static IRSB* maybe_loop_unroll_BB ( IRSB* bb0, Addr my_addr )
4896 Int i, j, jmax, n_vars;
4897 Bool xxx_known;
4898 Addr64 xxx_value, yyy_value;
4899 IRExpr* udst;
4900 IRStmt* st;
4901 IRConst* con;
4902 IRSB *bb1, *bb2;
4903 Int unroll_factor;
4905 if (vex_control.iropt_unroll_thresh <= 0)
4906 return NULL;
4908 /* First off, figure out if we can unroll this loop. Do this
4909 without modifying bb0. */
4911 if (bb0->jumpkind != Ijk_Boring)
4912 return NULL;
4914 xxx_known = False;
4915 xxx_value = 0;
4917 /* Extract the next-guest address. If it isn't a literal, we
4918 have to give up. */
4920 udst = bb0->next;
4921 if (udst->tag == Iex_Const
4922 && (udst->Iex.Const.con->tag == Ico_U32
4923 || udst->Iex.Const.con->tag == Ico_U64)) {
4924 /* The BB ends in a jump to a literal location. */
4925 xxx_known = True;
4926 xxx_value = udst->Iex.Const.con->tag == Ico_U64
4927 ? udst->Iex.Const.con->Ico.U64
4928 : (Addr64)(udst->Iex.Const.con->Ico.U32);
4931 if (!xxx_known)
4932 return NULL;
4934 /* Now we know the BB ends to a jump to a literal location. If
4935 it's a jump to itself (viz, idiom #1), move directly to the
4936 unrolling stage, first cloning the bb so the original isn't
4937 modified. */
4938 if (xxx_value == my_addr) {
4939 unroll_factor = calc_unroll_factor( bb0 );
4940 if (unroll_factor < 2)
4941 return NULL;
4942 bb1 = deepCopyIRSB( bb0 );
4943 bb0 = NULL;
4944 udst = NULL; /* is now invalid */
4945 goto do_unroll;
4948 /* Search for the second idiomatic form:
4949 X: BODY; if (c) goto X; goto Y
4950 We know Y, but need to establish that the last stmt
4951 is 'if (c) goto X'.
4953 yyy_value = xxx_value;
4954 for (i = bb0->stmts_used-1; i >= 0; i--)
4955 if (bb0->stmts[i])
4956 break;
4958 if (i < 0)
4959 return NULL; /* block with no stmts. Strange. */
4961 st = bb0->stmts[i];
4962 if (st->tag != Ist_Exit)
4963 return NULL;
4964 if (st->Ist.Exit.jk != Ijk_Boring)
4965 return NULL;
4967 con = st->Ist.Exit.dst;
4968 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
4970 xxx_value = con->tag == Ico_U64
4971 ? st->Ist.Exit.dst->Ico.U64
4972 : (Addr64)(st->Ist.Exit.dst->Ico.U32);
4974 /* If this assertion fails, we have some kind of type error. */
4975 vassert(con->tag == udst->Iex.Const.con->tag);
4977 if (xxx_value != my_addr)
4978 /* We didn't find either idiom. Give up. */
4979 return NULL;
4981 /* Ok, we found idiom #2. Copy the BB, switch around the xxx and
4982 yyy values (which makes it look like idiom #1), and go into
4983 unrolling proper. This means finding (again) the last stmt, in
4984 the copied BB. */
4986 unroll_factor = calc_unroll_factor( bb0 );
4987 if (unroll_factor < 2)
4988 return NULL;
4990 bb1 = deepCopyIRSB( bb0 );
4991 bb0 = NULL;
4992 udst = NULL; /* is now invalid */
4993 for (i = bb1->stmts_used-1; i >= 0; i--)
4994 if (bb1->stmts[i])
4995 break;
4997 /* The next bunch of assertions should be true since we already
4998 found and checked the last stmt in the original bb. */
5000 vassert(i >= 0);
5002 st = bb1->stmts[i];
5003 vassert(st->tag == Ist_Exit);
5005 con = st->Ist.Exit.dst;
5006 vassert(con->tag == Ico_U32 || con->tag == Ico_U64);
5008 udst = bb1->next;
5009 vassert(udst->tag == Iex_Const);
5010 vassert(udst->Iex.Const.con->tag == Ico_U32
5011 || udst->Iex.Const.con->tag == Ico_U64);
5012 vassert(con->tag == udst->Iex.Const.con->tag);
5014 /* switch the xxx and yyy fields around */
5015 if (con->tag == Ico_U64) {
5016 udst->Iex.Const.con->Ico.U64 = xxx_value;
5017 con->Ico.U64 = yyy_value;
5018 } else {
5019 udst->Iex.Const.con->Ico.U32 = (UInt)xxx_value;
5020 con->Ico.U32 = (UInt)yyy_value;
5023 /* negate the test condition */
5024 st->Ist.Exit.guard
5025 = IRExpr_Unop(Iop_Not1,deepCopyIRExpr(st->Ist.Exit.guard));
5027 /* --- The unroller proper. Both idioms are by now --- */
5028 /* --- now converted to idiom 1. --- */
5030 do_unroll:
5032 vassert(unroll_factor == 2
5033 || unroll_factor == 4
5034 || unroll_factor == 8);
5036 jmax = unroll_factor==8 ? 3 : (unroll_factor==4 ? 2 : 1);
5037 for (j = 1; j <= jmax; j++) {
5039 n_vars = bb1->tyenv->types_used;
5041 bb2 = deepCopyIRSB(bb1);
5042 for (i = 0; i < n_vars; i++)
5043 (void)newIRTemp(bb1->tyenv, bb2->tyenv->types[i]);
5045 for (i = 0; i < bb2->stmts_used; i++) {
5046 /* deltaIRStmt destructively modifies the stmt, but
5047 that's OK since bb2 is a complete fresh copy of bb1. */
5048 deltaIRStmt(bb2->stmts[i], n_vars);
5049 addStmtToIRSB(bb1, bb2->stmts[i]);
5053 if (DEBUG_IROPT) {
5054 vex_printf("\nUNROLLED (%lx)\n", my_addr);
5055 ppIRSB(bb1);
5056 vex_printf("\n");
5059 /* Flattening; sigh. The unroller succeeds in breaking flatness
5060 by negating the test condition. This should be fixed properly.
5061 For the moment use this shotgun approach. */
5062 return flatten_BB(bb1);
5066 /*---------------------------------------------------------------*/
5067 /*--- The tree builder ---*/
5068 /*---------------------------------------------------------------*/
5070 /* This isn't part of IR optimisation. Really it's a pass done prior
5071 to instruction selection, which improves the code that the
5072 instruction selector can produce. */
5074 /* --- The 'tmp' environment is the central data structure here --- */
5076 /* The number of outstanding bindings we're prepared to track.
5077 The number of times the env becomes full and we have to dump
5078 the oldest binding (hence reducing code quality) falls very
5079 rapidly as the env size increases. 8 gives reasonable performance
5080 under most circumstances. */
5081 #define A_NENV 10
5083 /* An interval. Used to record the bytes in the guest state accessed
5084 by a Put[I] statement or by (one or more) Get[I] expression(s). In
5085 case of several Get[I] expressions, the lower/upper bounds are recorded.
5086 This is conservative but cheap.
5087 E.g. a Put of 8 bytes at address 100 would be recorded as [100,107].
5088 E.g. an expression that reads 8 bytes at offset 100 and 4 bytes at
5089 offset 200 would be recorded as [100,203] */
5090 typedef
5091 struct {
5092 Bool present;
5093 Int low;
5094 Int high;
5096 Interval;
5098 static inline Bool
5099 intervals_overlap(Interval i1, Interval i2)
5101 return (i1.low >= i2.low && i1.low <= i2.high) ||
5102 (i2.low >= i1.low && i2.low <= i1.high);
5105 static inline void
5106 update_interval(Interval *i, Int low, Int high)
5108 vassert(low <= high);
5110 if (i->present) {
5111 if (low < i->low) i->low = low;
5112 if (high > i->high) i->high = high;
5113 } else {
5114 i->present = True;
5115 i->low = low;
5116 i->high = high;
5121 /* bindee == NULL === slot is not in use
5122 bindee != NULL === slot is in use
5124 typedef
5125 struct {
5126 IRTemp binder;
5127 IRExpr* bindee;
5128 Bool doesLoad;
5129 /* Record the bytes of the guest state BINDEE reads from. */
5130 Interval getInterval;
5132 ATmpInfo;
5134 __attribute__((unused))
5135 static void ppAEnv ( ATmpInfo* env )
5137 Int i;
5138 for (i = 0; i < A_NENV; i++) {
5139 vex_printf("%d tmp %d val ", i, (Int)env[i].binder);
5140 if (env[i].bindee)
5141 ppIRExpr(env[i].bindee);
5142 else
5143 vex_printf("(null)");
5144 vex_printf("\n");
5148 /* --- Tree-traversal fns --- */
5150 /* Traverse an expr, and detect if any part of it reads memory or does
5151 a Get. Be careful ... this really controls how much the
5152 tree-builder can reorder the code, so getting it right is critical.
5154 static void setHints_Expr (Bool* doesLoad, Interval* getInterval, IRExpr* e )
5156 Int i;
5157 switch (e->tag) {
5158 case Iex_CCall:
5159 for (i = 0; e->Iex.CCall.args[i]; i++)
5160 setHints_Expr(doesLoad, getInterval, e->Iex.CCall.args[i]);
5161 return;
5162 case Iex_ITE:
5163 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.cond);
5164 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iftrue);
5165 setHints_Expr(doesLoad, getInterval, e->Iex.ITE.iffalse);
5166 return;
5167 case Iex_Qop:
5168 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg1);
5169 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg2);
5170 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg3);
5171 setHints_Expr(doesLoad, getInterval, e->Iex.Qop.details->arg4);
5172 return;
5173 case Iex_Triop:
5174 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg1);
5175 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg2);
5176 setHints_Expr(doesLoad, getInterval, e->Iex.Triop.details->arg3);
5177 return;
5178 case Iex_Binop:
5179 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg1);
5180 setHints_Expr(doesLoad, getInterval, e->Iex.Binop.arg2);
5181 return;
5182 case Iex_Unop:
5183 setHints_Expr(doesLoad, getInterval, e->Iex.Unop.arg);
5184 return;
5185 case Iex_Load:
5186 *doesLoad = True;
5187 setHints_Expr(doesLoad, getInterval, e->Iex.Load.addr);
5188 return;
5189 case Iex_Get: {
5190 Int low = e->Iex.Get.offset;
5191 Int high = low + sizeofIRType(e->Iex.Get.ty) - 1;
5192 update_interval(getInterval, low, high);
5193 return;
5195 case Iex_GetI: {
5196 IRRegArray *descr = e->Iex.GetI.descr;
5197 Int size = sizeofIRType(descr->elemTy);
5198 Int low = descr->base;
5199 Int high = low + descr->nElems * size - 1;
5200 update_interval(getInterval, low, high);
5201 setHints_Expr(doesLoad, getInterval, e->Iex.GetI.ix);
5202 return;
5204 case Iex_RdTmp:
5205 case Iex_Const:
5206 return;
5207 default:
5208 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5209 vpanic("setHints_Expr");
5214 /* Add a binding to the front of the env and slide all the rest
5215 backwards. It should be the case that the last slot is free. */
5216 static void addToEnvFront ( ATmpInfo* env, IRTemp binder, IRExpr* bindee )
5218 Int i;
5219 vassert(env[A_NENV-1].bindee == NULL);
5220 for (i = A_NENV-1; i >= 1; i--)
5221 env[i] = env[i-1];
5222 env[0].binder = binder;
5223 env[0].bindee = bindee;
5224 env[0].doesLoad = False; /* filled in later */
5225 env[0].getInterval.present = False; /* filled in later */
5226 env[0].getInterval.low = -1; /* filled in later */
5227 env[0].getInterval.high = -1; /* filled in later */
5230 /* Given uses :: array of UShort, indexed by IRTemp
5231 Add the use-occurrences of temps in this expression
5232 to the env.
5234 static void aoccCount_Expr ( UShort* uses, IRExpr* e )
5236 Int i;
5238 switch (e->tag) {
5240 case Iex_RdTmp: /* the only interesting case */
5241 uses[e->Iex.RdTmp.tmp]++;
5242 return;
5244 case Iex_ITE:
5245 aoccCount_Expr(uses, e->Iex.ITE.cond);
5246 aoccCount_Expr(uses, e->Iex.ITE.iftrue);
5247 aoccCount_Expr(uses, e->Iex.ITE.iffalse);
5248 return;
5250 case Iex_Qop:
5251 aoccCount_Expr(uses, e->Iex.Qop.details->arg1);
5252 aoccCount_Expr(uses, e->Iex.Qop.details->arg2);
5253 aoccCount_Expr(uses, e->Iex.Qop.details->arg3);
5254 aoccCount_Expr(uses, e->Iex.Qop.details->arg4);
5255 return;
5257 case Iex_Triop:
5258 aoccCount_Expr(uses, e->Iex.Triop.details->arg1);
5259 aoccCount_Expr(uses, e->Iex.Triop.details->arg2);
5260 aoccCount_Expr(uses, e->Iex.Triop.details->arg3);
5261 return;
5263 case Iex_Binop:
5264 aoccCount_Expr(uses, e->Iex.Binop.arg1);
5265 aoccCount_Expr(uses, e->Iex.Binop.arg2);
5266 return;
5268 case Iex_Unop:
5269 aoccCount_Expr(uses, e->Iex.Unop.arg);
5270 return;
5272 case Iex_Load:
5273 aoccCount_Expr(uses, e->Iex.Load.addr);
5274 return;
5276 case Iex_CCall:
5277 for (i = 0; e->Iex.CCall.args[i]; i++)
5278 aoccCount_Expr(uses, e->Iex.CCall.args[i]);
5279 return;
5281 case Iex_GetI:
5282 aoccCount_Expr(uses, e->Iex.GetI.ix);
5283 return;
5285 case Iex_Const:
5286 case Iex_Get:
5287 return;
5289 default:
5290 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5291 vpanic("aoccCount_Expr");
5296 /* Given uses :: array of UShort, indexed by IRTemp
5297 Add the use-occurrences of temps in this statement
5298 to the env.
5300 static void aoccCount_Stmt ( UShort* uses, IRStmt* st )
5302 Int i;
5303 IRDirty* d;
5304 IRCAS* cas;
5305 switch (st->tag) {
5306 case Ist_AbiHint:
5307 aoccCount_Expr(uses, st->Ist.AbiHint.base);
5308 aoccCount_Expr(uses, st->Ist.AbiHint.nia);
5309 return;
5310 case Ist_WrTmp:
5311 aoccCount_Expr(uses, st->Ist.WrTmp.data);
5312 return;
5313 case Ist_Put:
5314 aoccCount_Expr(uses, st->Ist.Put.data);
5315 return;
5316 case Ist_PutI:
5317 aoccCount_Expr(uses, st->Ist.PutI.details->ix);
5318 aoccCount_Expr(uses, st->Ist.PutI.details->data);
5319 return;
5320 case Ist_Store:
5321 aoccCount_Expr(uses, st->Ist.Store.addr);
5322 aoccCount_Expr(uses, st->Ist.Store.data);
5323 return;
5324 case Ist_StoreG: {
5325 IRStoreG* sg = st->Ist.StoreG.details;
5326 aoccCount_Expr(uses, sg->addr);
5327 aoccCount_Expr(uses, sg->data);
5328 aoccCount_Expr(uses, sg->guard);
5329 return;
5331 case Ist_LoadG: {
5332 IRLoadG* lg = st->Ist.LoadG.details;
5333 aoccCount_Expr(uses, lg->addr);
5334 aoccCount_Expr(uses, lg->alt);
5335 aoccCount_Expr(uses, lg->guard);
5336 return;
5338 case Ist_CAS:
5339 cas = st->Ist.CAS.details;
5340 aoccCount_Expr(uses, cas->addr);
5341 if (cas->expdHi)
5342 aoccCount_Expr(uses, cas->expdHi);
5343 aoccCount_Expr(uses, cas->expdLo);
5344 if (cas->dataHi)
5345 aoccCount_Expr(uses, cas->dataHi);
5346 aoccCount_Expr(uses, cas->dataLo);
5347 return;
5348 case Ist_LLSC:
5349 aoccCount_Expr(uses, st->Ist.LLSC.addr);
5350 if (st->Ist.LLSC.storedata)
5351 aoccCount_Expr(uses, st->Ist.LLSC.storedata);
5352 return;
5353 case Ist_Dirty:
5354 d = st->Ist.Dirty.details;
5355 if (d->mFx != Ifx_None)
5356 aoccCount_Expr(uses, d->mAddr);
5357 aoccCount_Expr(uses, d->guard);
5358 for (i = 0; d->args[i]; i++) {
5359 IRExpr* arg = d->args[i];
5360 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
5361 aoccCount_Expr(uses, arg);
5363 return;
5364 case Ist_NoOp:
5365 case Ist_IMark:
5366 case Ist_MBE:
5367 return;
5368 case Ist_Exit:
5369 aoccCount_Expr(uses, st->Ist.Exit.guard);
5370 return;
5371 default:
5372 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5373 vpanic("aoccCount_Stmt");
5377 /* Look up a binding for tmp in the env. If found, return the bound
5378 expression, and set the env's binding to NULL so it is marked as
5379 used. If not found, return NULL. */
5381 static IRExpr* atbSubst_Temp ( ATmpInfo* env, IRTemp tmp )
5383 Int i;
5384 for (i = 0; i < A_NENV; i++) {
5385 if (env[i].binder == tmp && env[i].bindee != NULL) {
5386 IRExpr* bindee = env[i].bindee;
5387 env[i].bindee = NULL;
5388 return bindee;
5391 return NULL;
5394 /* Traverse e, looking for temps. For each observed temp, see if env
5395 contains a binding for the temp, and if so return the bound value.
5396 The env has the property that any binding it holds is
5397 'single-shot', so once a binding is used, it is marked as no longer
5398 available, by setting its .bindee field to NULL. */
5400 static inline Bool is_Unop ( IRExpr* e, IROp op ) {
5401 return e->tag == Iex_Unop && e->Iex.Unop.op == op;
5403 static inline Bool is_Binop ( IRExpr* e, IROp op ) {
5404 return e->tag == Iex_Binop && e->Iex.Binop.op == op;
5407 static IRExpr* fold_IRExpr_Binop ( IROp op, IRExpr* a1, IRExpr* a2 )
5409 switch (op) {
5410 case Iop_Or32:
5411 /* Or32( CmpwNEZ32(x), CmpwNEZ32(y) ) --> CmpwNEZ32( Or32( x, y ) ) */
5412 if (is_Unop(a1, Iop_CmpwNEZ32) && is_Unop(a2, Iop_CmpwNEZ32))
5413 return IRExpr_Unop( Iop_CmpwNEZ32,
5414 IRExpr_Binop( Iop_Or32, a1->Iex.Unop.arg,
5415 a2->Iex.Unop.arg ) );
5416 break;
5418 case Iop_CmpNE32:
5419 /* Since X has type Ity_I1 we can simplify:
5420 CmpNE32(1Uto32(X),0)) ==> X */
5421 if (is_Unop(a1, Iop_1Uto32) && isZeroU32(a2))
5422 return a1->Iex.Unop.arg;
5423 break;
5425 default:
5426 break;
5428 /* no reduction rule applies */
5429 return IRExpr_Binop( op, a1, a2 );
5432 static IRExpr* fold_IRExpr_Unop ( IROp op, IRExpr* aa )
5434 switch (op) {
5435 case Iop_CmpwNEZ64:
5436 /* CmpwNEZ64( CmpwNEZ64 ( x ) ) --> CmpwNEZ64 ( x ) */
5437 if (is_Unop(aa, Iop_CmpwNEZ64))
5438 return IRExpr_Unop( Iop_CmpwNEZ64, aa->Iex.Unop.arg );
5439 /* CmpwNEZ64( Or64 ( CmpwNEZ64(x), y ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5440 if (is_Binop(aa, Iop_Or64)
5441 && is_Unop(aa->Iex.Binop.arg1, Iop_CmpwNEZ64))
5442 return fold_IRExpr_Unop(
5443 Iop_CmpwNEZ64,
5444 IRExpr_Binop(Iop_Or64,
5445 aa->Iex.Binop.arg1->Iex.Unop.arg,
5446 aa->Iex.Binop.arg2));
5447 /* CmpwNEZ64( Or64 ( x, CmpwNEZ64(y) ) ) --> CmpwNEZ64( Or64( x, y ) ) */
5448 if (is_Binop(aa, Iop_Or64)
5449 && is_Unop(aa->Iex.Binop.arg2, Iop_CmpwNEZ64))
5450 return fold_IRExpr_Unop(
5451 Iop_CmpwNEZ64,
5452 IRExpr_Binop(Iop_Or64,
5453 aa->Iex.Binop.arg1,
5454 aa->Iex.Binop.arg2->Iex.Unop.arg));
5455 break;
5456 case Iop_CmpNEZ64:
5457 /* CmpNEZ64( Left64(x) ) --> CmpNEZ64(x) */
5458 if (is_Unop(aa, Iop_Left64))
5459 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg);
5460 /* CmpNEZ64( 1Uto64(X) ) --> X */
5461 if (is_Unop(aa, Iop_1Uto64))
5462 return aa->Iex.Unop.arg;
5463 break;
5464 case Iop_CmpwNEZ32:
5465 /* CmpwNEZ32( CmpwNEZ32 ( x ) ) --> CmpwNEZ32 ( x ) */
5466 if (is_Unop(aa, Iop_CmpwNEZ32))
5467 return IRExpr_Unop( Iop_CmpwNEZ32, aa->Iex.Unop.arg );
5468 break;
5469 case Iop_CmpNEZ32:
5470 /* CmpNEZ32( Left32(x) ) --> CmpNEZ32(x) */
5471 if (is_Unop(aa, Iop_Left32))
5472 return IRExpr_Unop(Iop_CmpNEZ32, aa->Iex.Unop.arg);
5473 /* CmpNEZ32( 1Uto32(X) ) --> X */
5474 if (is_Unop(aa, Iop_1Uto32))
5475 return aa->Iex.Unop.arg;
5476 /* CmpNEZ32( 64to32( CmpwNEZ64(X) ) ) --> CmpNEZ64(X) */
5477 if (is_Unop(aa, Iop_64to32) && is_Unop(aa->Iex.Unop.arg, Iop_CmpwNEZ64))
5478 return IRExpr_Unop(Iop_CmpNEZ64, aa->Iex.Unop.arg->Iex.Unop.arg);
5479 break;
5480 case Iop_CmpNEZ8:
5481 /* CmpNEZ8( 1Uto8(X) ) --> X */
5482 if (is_Unop(aa, Iop_1Uto8))
5483 return aa->Iex.Unop.arg;
5484 break;
5485 case Iop_Left32:
5486 /* Left32( Left32(x) ) --> Left32(x) */
5487 if (is_Unop(aa, Iop_Left32))
5488 return IRExpr_Unop( Iop_Left32, aa->Iex.Unop.arg );
5489 break;
5490 case Iop_Left64:
5491 /* Left64( Left64(x) ) --> Left64(x) */
5492 if (is_Unop(aa, Iop_Left64))
5493 return IRExpr_Unop( Iop_Left64, aa->Iex.Unop.arg );
5494 break;
5495 case Iop_ZeroHI64ofV128:
5496 /* ZeroHI64ofV128( ZeroHI64ofV128(x) ) --> ZeroHI64ofV128(x) */
5497 if (is_Unop(aa, Iop_ZeroHI64ofV128))
5498 return IRExpr_Unop( Iop_ZeroHI64ofV128, aa->Iex.Unop.arg );
5499 break;
5500 case Iop_32to1:
5501 /* 32to1( 1Uto32 ( x ) ) --> x */
5502 if (is_Unop(aa, Iop_1Uto32))
5503 return aa->Iex.Unop.arg;
5504 /* 32to1( CmpwNEZ32 ( x )) --> CmpNEZ32(x) */
5505 if (is_Unop(aa, Iop_CmpwNEZ32))
5506 return IRExpr_Unop( Iop_CmpNEZ32, aa->Iex.Unop.arg );
5507 break;
5508 case Iop_64to1:
5509 /* 64to1( 1Uto64 ( x ) ) --> x */
5510 if (is_Unop(aa, Iop_1Uto64))
5511 return aa->Iex.Unop.arg;
5512 /* 64to1( CmpwNEZ64 ( x )) --> CmpNEZ64(x) */
5513 if (is_Unop(aa, Iop_CmpwNEZ64))
5514 return IRExpr_Unop( Iop_CmpNEZ64, aa->Iex.Unop.arg );
5515 break;
5516 case Iop_64to32:
5517 /* 64to32( 32Uto64 ( x )) --> x */
5518 if (is_Unop(aa, Iop_32Uto64))
5519 return aa->Iex.Unop.arg;
5520 /* 64to32( 8Uto64 ( x )) --> 8Uto32(x) */
5521 if (is_Unop(aa, Iop_8Uto64))
5522 return IRExpr_Unop(Iop_8Uto32, aa->Iex.Unop.arg);
5523 break;
5524 case Iop_64to16:
5525 /* 64to16( 16Uto64 ( x )) --> x */
5526 if (is_Unop(aa, Iop_16Uto64))
5527 return aa->Iex.Unop.arg;
5528 /* 64to16( 32Uto64 ( x )) --> 32to16(x) */
5529 if (is_Unop(aa, Iop_32Uto64))
5530 return IRExpr_Unop(Iop_32to16, aa->Iex.Unop.arg);
5531 break;
5533 case Iop_32Uto64:
5534 /* 32Uto64( 8Uto32( x )) --> 8Uto64(x) */
5535 if (is_Unop(aa, Iop_8Uto32))
5536 return IRExpr_Unop(Iop_8Uto64, aa->Iex.Unop.arg);
5537 /* 32Uto64( 16Uto32( x )) --> 16Uto64(x) */
5538 if (is_Unop(aa, Iop_16Uto32))
5539 return IRExpr_Unop(Iop_16Uto64, aa->Iex.Unop.arg);
5540 /* 32Uto64(64to32( Shr64( 32Uto64(64to32(x)), sh ))
5541 --> Shr64( 32Uto64(64to32(x)), sh )) */
5542 if (is_Unop(aa, Iop_64to32)
5543 && is_Binop(aa->Iex.Unop.arg, Iop_Shr64)
5544 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5545 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5546 Iop_64to32)) {
5547 return aa->Iex.Unop.arg;
5549 /* 32Uto64(64to32( Shl64( 32Uto64(64to32(x)), sh ))
5550 --> 32Uto64(64to32( Shl64( x, sh )) */
5551 if (is_Unop(aa, Iop_64to32)
5552 && is_Binop(aa->Iex.Unop.arg, Iop_Shl64)
5553 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1, Iop_32Uto64)
5554 && is_Unop(aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg,
5555 Iop_64to32)) {
5556 return
5557 IRExpr_Unop(
5558 Iop_32Uto64,
5559 IRExpr_Unop(
5560 Iop_64to32,
5561 IRExpr_Binop(
5562 Iop_Shl64,
5563 aa->Iex.Unop.arg->Iex.Binop.arg1->Iex.Unop.arg->Iex.Unop.arg,
5564 aa->Iex.Unop.arg->Iex.Binop.arg2
5565 )));
5567 break;
5569 case Iop_1Sto32:
5570 /* 1Sto32( CmpNEZ8( 32to8( 1Uto32( CmpNEZ32( x ))))) -> CmpwNEZ32(x) */
5571 if (is_Unop(aa, Iop_CmpNEZ8)
5572 && is_Unop(aa->Iex.Unop.arg, Iop_32to8)
5573 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg, Iop_1Uto32)
5574 && is_Unop(aa->Iex.Unop.arg->Iex.Unop.arg->Iex.Unop.arg,
5575 Iop_CmpNEZ32)) {
5576 return IRExpr_Unop( Iop_CmpwNEZ32,
5577 aa->Iex.Unop.arg->Iex.Unop.arg
5578 ->Iex.Unop.arg->Iex.Unop.arg);
5580 break;
5582 default:
5583 break;
5585 /* no reduction rule applies */
5586 return IRExpr_Unop( op, aa );
5589 static IRExpr* atbSubst_Expr ( ATmpInfo* env, IRExpr* e )
5591 IRExpr* e2;
5592 IRExpr** args2;
5593 Int i;
5595 switch (e->tag) {
5597 case Iex_CCall:
5598 args2 = shallowCopyIRExprVec(e->Iex.CCall.args);
5599 for (i = 0; args2[i]; i++)
5600 args2[i] = atbSubst_Expr(env,args2[i]);
5601 return IRExpr_CCall(
5602 e->Iex.CCall.cee,
5603 e->Iex.CCall.retty,
5604 args2
5606 case Iex_RdTmp:
5607 e2 = atbSubst_Temp(env, e->Iex.RdTmp.tmp);
5608 return e2 ? e2 : e;
5609 case Iex_ITE:
5610 return IRExpr_ITE(
5611 atbSubst_Expr(env, e->Iex.ITE.cond),
5612 atbSubst_Expr(env, e->Iex.ITE.iftrue),
5613 atbSubst_Expr(env, e->Iex.ITE.iffalse)
5615 case Iex_Qop:
5616 return IRExpr_Qop(
5617 e->Iex.Qop.details->op,
5618 atbSubst_Expr(env, e->Iex.Qop.details->arg1),
5619 atbSubst_Expr(env, e->Iex.Qop.details->arg2),
5620 atbSubst_Expr(env, e->Iex.Qop.details->arg3),
5621 atbSubst_Expr(env, e->Iex.Qop.details->arg4)
5623 case Iex_Triop:
5624 return IRExpr_Triop(
5625 e->Iex.Triop.details->op,
5626 atbSubst_Expr(env, e->Iex.Triop.details->arg1),
5627 atbSubst_Expr(env, e->Iex.Triop.details->arg2),
5628 atbSubst_Expr(env, e->Iex.Triop.details->arg3)
5630 case Iex_Binop:
5631 return fold_IRExpr_Binop(
5632 e->Iex.Binop.op,
5633 atbSubst_Expr(env, e->Iex.Binop.arg1),
5634 atbSubst_Expr(env, e->Iex.Binop.arg2)
5636 case Iex_Unop:
5637 return fold_IRExpr_Unop(
5638 e->Iex.Unop.op,
5639 atbSubst_Expr(env, e->Iex.Unop.arg)
5641 case Iex_Load:
5642 return IRExpr_Load(
5643 e->Iex.Load.end,
5644 e->Iex.Load.ty,
5645 atbSubst_Expr(env, e->Iex.Load.addr)
5647 case Iex_GetI:
5648 return IRExpr_GetI(
5649 e->Iex.GetI.descr,
5650 atbSubst_Expr(env, e->Iex.GetI.ix),
5651 e->Iex.GetI.bias
5653 case Iex_Const:
5654 case Iex_Get:
5655 return e;
5656 default:
5657 vex_printf("\n"); ppIRExpr(e); vex_printf("\n");
5658 vpanic("atbSubst_Expr");
5662 /* Same deal as atbSubst_Expr, except for stmts. */
5664 static IRStmt* atbSubst_Stmt ( ATmpInfo* env, IRStmt* st )
5666 Int i;
5667 IRDirty *d, *d2;
5668 IRCAS *cas, *cas2;
5669 IRPutI *puti, *puti2;
5671 switch (st->tag) {
5672 case Ist_AbiHint:
5673 return IRStmt_AbiHint(
5674 atbSubst_Expr(env, st->Ist.AbiHint.base),
5675 st->Ist.AbiHint.len,
5676 atbSubst_Expr(env, st->Ist.AbiHint.nia)
5678 case Ist_Store:
5679 return IRStmt_Store(
5680 st->Ist.Store.end,
5681 atbSubst_Expr(env, st->Ist.Store.addr),
5682 atbSubst_Expr(env, st->Ist.Store.data)
5684 case Ist_StoreG: {
5685 IRStoreG* sg = st->Ist.StoreG.details;
5686 return IRStmt_StoreG(sg->end,
5687 atbSubst_Expr(env, sg->addr),
5688 atbSubst_Expr(env, sg->data),
5689 atbSubst_Expr(env, sg->guard));
5691 case Ist_LoadG: {
5692 IRLoadG* lg = st->Ist.LoadG.details;
5693 return IRStmt_LoadG(lg->end, lg->cvt, lg->dst,
5694 atbSubst_Expr(env, lg->addr),
5695 atbSubst_Expr(env, lg->alt),
5696 atbSubst_Expr(env, lg->guard));
5698 case Ist_WrTmp:
5699 return IRStmt_WrTmp(
5700 st->Ist.WrTmp.tmp,
5701 atbSubst_Expr(env, st->Ist.WrTmp.data)
5703 case Ist_Put:
5704 return IRStmt_Put(
5705 st->Ist.Put.offset,
5706 atbSubst_Expr(env, st->Ist.Put.data)
5708 case Ist_PutI:
5709 puti = st->Ist.PutI.details;
5710 puti2 = mkIRPutI(puti->descr,
5711 atbSubst_Expr(env, puti->ix),
5712 puti->bias,
5713 atbSubst_Expr(env, puti->data));
5714 return IRStmt_PutI(puti2);
5716 case Ist_Exit:
5717 return IRStmt_Exit(
5718 atbSubst_Expr(env, st->Ist.Exit.guard),
5719 st->Ist.Exit.jk,
5720 st->Ist.Exit.dst,
5721 st->Ist.Exit.offsIP
5723 case Ist_IMark:
5724 return IRStmt_IMark(st->Ist.IMark.addr,
5725 st->Ist.IMark.len,
5726 st->Ist.IMark.delta);
5727 case Ist_NoOp:
5728 return IRStmt_NoOp();
5729 case Ist_MBE:
5730 return IRStmt_MBE(st->Ist.MBE.event);
5731 case Ist_CAS:
5732 cas = st->Ist.CAS.details;
5733 cas2 = mkIRCAS(
5734 cas->oldHi, cas->oldLo, cas->end,
5735 atbSubst_Expr(env, cas->addr),
5736 cas->expdHi ? atbSubst_Expr(env, cas->expdHi) : NULL,
5737 atbSubst_Expr(env, cas->expdLo),
5738 cas->dataHi ? atbSubst_Expr(env, cas->dataHi) : NULL,
5739 atbSubst_Expr(env, cas->dataLo)
5741 return IRStmt_CAS(cas2);
5742 case Ist_LLSC:
5743 return IRStmt_LLSC(
5744 st->Ist.LLSC.end,
5745 st->Ist.LLSC.result,
5746 atbSubst_Expr(env, st->Ist.LLSC.addr),
5747 st->Ist.LLSC.storedata
5748 ? atbSubst_Expr(env, st->Ist.LLSC.storedata) : NULL
5750 case Ist_Dirty:
5751 d = st->Ist.Dirty.details;
5752 d2 = emptyIRDirty();
5753 *d2 = *d;
5754 if (d2->mFx != Ifx_None)
5755 d2->mAddr = atbSubst_Expr(env, d2->mAddr);
5756 d2->guard = atbSubst_Expr(env, d2->guard);
5757 for (i = 0; d2->args[i]; i++) {
5758 IRExpr* arg = d2->args[i];
5759 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
5760 d2->args[i] = atbSubst_Expr(env, arg);
5762 return IRStmt_Dirty(d2);
5763 default:
5764 vex_printf("\n"); ppIRStmt(st); vex_printf("\n");
5765 vpanic("atbSubst_Stmt");
5769 inline
5770 static Bool dirty_helper_stores ( const IRDirty *d )
5772 return d->mFx == Ifx_Write || d->mFx == Ifx_Modify;
5775 inline
5776 static Interval dirty_helper_puts (
5777 const IRDirty *d,
5778 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5779 VexRegisterUpdates pxControl,
5780 /*OUT*/Bool *requiresPreciseMemExns
5783 Int i;
5784 Interval interval;
5786 /* Passing the guest state pointer opens the door to modifying the
5787 guest state under the covers. It's not allowed, but let's be
5788 extra conservative and assume the worst. */
5789 for (i = 0; d->args[i]; i++) {
5790 if (UNLIKELY(d->args[i]->tag == Iex_GSPTR)) {
5791 *requiresPreciseMemExns = True;
5792 /* Assume all guest state is written. */
5793 interval.present = True;
5794 interval.low = 0;
5795 interval.high = 0x7FFFFFFF;
5796 return interval;
5800 /* Check the side effects on the guest state */
5801 interval.present = False;
5802 interval.low = interval.high = -1;
5803 *requiresPreciseMemExns = False;
5805 for (i = 0; i < d->nFxState; ++i) {
5806 if (d->fxState[i].fx != Ifx_Read) {
5807 Int offset = d->fxState[i].offset;
5808 Int size = d->fxState[i].size;
5809 Int nRepeats = d->fxState[i].nRepeats;
5810 Int repeatLen = d->fxState[i].repeatLen;
5812 if (preciseMemExnsFn(offset,
5813 offset + nRepeats * repeatLen + size - 1,
5814 pxControl)) {
5815 *requiresPreciseMemExns = True;
5817 update_interval(&interval, offset,
5818 offset + nRepeats * repeatLen + size - 1);
5822 return interval;
5825 /* Return an interval if st modifies the guest state. Via
5826 requiresPreciseMemExns return whether or not that modification
5827 requires precise exceptions. */
5828 static Interval stmt_modifies_guest_state (
5829 IRSB *bb, const IRStmt *st,
5830 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5831 VexRegisterUpdates pxControl,
5832 /*OUT*/Bool *requiresPreciseMemExns
5835 Interval interval;
5837 switch (st->tag) {
5838 case Ist_Put: {
5839 Int offset = st->Ist.Put.offset;
5840 Int size = sizeofIRType(typeOfIRExpr(bb->tyenv, st->Ist.Put.data));
5842 *requiresPreciseMemExns
5843 = preciseMemExnsFn(offset, offset + size - 1, pxControl);
5844 interval.present = True;
5845 interval.low = offset;
5846 interval.high = offset + size - 1;
5847 return interval;
5850 case Ist_PutI: {
5851 IRRegArray *descr = st->Ist.PutI.details->descr;
5852 Int offset = descr->base;
5853 Int size = sizeofIRType(descr->elemTy);
5855 /* We quietly assume here that all segments are contiguous and there
5856 are no holes. This is to avoid a loop. The assumption is conservative
5857 in the sense that we might report that precise memory exceptions are
5858 needed when in fact they are not. */
5859 *requiresPreciseMemExns
5860 = preciseMemExnsFn(offset, offset + descr->nElems * size - 1,
5861 pxControl);
5862 interval.present = True;
5863 interval.low = offset;
5864 interval.high = offset + descr->nElems * size - 1;
5865 return interval;
5868 case Ist_Dirty:
5869 return dirty_helper_puts(st->Ist.Dirty.details,
5870 preciseMemExnsFn, pxControl,
5871 requiresPreciseMemExns);
5873 default:
5874 *requiresPreciseMemExns = False;
5875 interval.present = False;
5876 interval.low = -1;
5877 interval.high = -1;
5878 return interval;
5882 /* notstatic */ Addr ado_treebuild_BB (
5883 IRSB* bb,
5884 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
5885 VexRegisterUpdates pxControl
5888 Int i, j, k, m;
5889 Bool stmtStores, invalidateMe;
5890 Interval putInterval;
5891 IRStmt* st;
5892 IRStmt* st2;
5893 ATmpInfo env[A_NENV];
5895 Bool max_ga_known = False;
5896 Addr max_ga = 0;
5898 Int n_tmps = bb->tyenv->types_used;
5899 UShort* uses = LibVEX_Alloc_inline(n_tmps * sizeof(UShort));
5901 /* Phase 1. Scan forwards in bb, counting use occurrences of each
5902 temp. Also count occurrences in the bb->next field. Take the
5903 opportunity to also find the maximum guest address in the block,
5904 since that will be needed later for deciding when we can safely
5905 elide event checks. */
5907 for (i = 0; i < n_tmps; i++)
5908 uses[i] = 0;
5910 for (i = 0; i < bb->stmts_used; i++) {
5911 st = bb->stmts[i];
5912 switch (st->tag) {
5913 case Ist_NoOp:
5914 continue;
5915 case Ist_IMark: {
5916 UInt len = st->Ist.IMark.len;
5917 Addr mga = st->Ist.IMark.addr + (len < 1 ? 1 : len) - 1;
5918 max_ga_known = True;
5919 if (mga > max_ga)
5920 max_ga = mga;
5921 break;
5923 default:
5924 break;
5926 aoccCount_Stmt( uses, st );
5928 aoccCount_Expr(uses, bb->next );
5930 # if 0
5931 for (i = 0; i < n_tmps; i++) {
5932 if (uses[i] == 0)
5933 continue;
5934 ppIRTemp( (IRTemp)i );
5935 vex_printf(" used %d\n", (Int)uses[i] );
5937 # endif
5939 /* Phase 2. Scan forwards in bb. For each statement in turn:
5941 If the env is full, emit the end element. This guarantees
5942 there is at least one free slot in the following.
5944 On seeing 't = E', occ(t)==1,
5945 let E'=env(E)
5946 delete this stmt
5947 add t -> E' to the front of the env
5948 Examine E' and set the hints for E' appropriately
5949 (doesLoad? doesGet?)
5951 On seeing any other stmt,
5952 let stmt' = env(stmt)
5953 remove from env any 't=E' binds invalidated by stmt
5954 emit the invalidated stmts
5955 emit stmt'
5956 compact any holes in env
5957 by sliding entries towards the front
5959 Finally, apply env to bb->next.
5962 for (i = 0; i < A_NENV; i++) {
5963 env[i].bindee = NULL;
5964 env[i].binder = IRTemp_INVALID;
5967 /* The stmts in bb are being reordered, and we are guaranteed to
5968 end up with no more than the number we started with. Use i to
5969 be the cursor of the current stmt examined and j <= i to be that
5970 for the current stmt being written.
5972 j = 0;
5973 for (i = 0; i < bb->stmts_used; i++) {
5975 st = bb->stmts[i];
5976 if (st->tag == Ist_NoOp)
5977 continue;
5979 /* Ensure there's at least one space in the env, by emitting
5980 the oldest binding if necessary. */
5981 if (env[A_NENV-1].bindee != NULL) {
5982 bb->stmts[j] = IRStmt_WrTmp( env[A_NENV-1].binder,
5983 env[A_NENV-1].bindee );
5984 j++;
5985 vassert(j <= i);
5986 env[A_NENV-1].bindee = NULL;
5989 /* Consider current stmt. */
5990 if (st->tag == Ist_WrTmp && uses[st->Ist.WrTmp.tmp] <= 1) {
5991 IRExpr *e, *e2;
5993 /* optional extra: dump dead bindings as we find them.
5994 Removes the need for a prior dead-code removal pass. */
5995 if (uses[st->Ist.WrTmp.tmp] == 0) {
5996 if (0) vex_printf("DEAD binding\n");
5997 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
5999 vassert(uses[st->Ist.WrTmp.tmp] == 1);
6001 /* ok, we have 't = E', occ(t)==1. Do the abovementioned
6002 actions. */
6003 e = st->Ist.WrTmp.data;
6004 e2 = atbSubst_Expr(env, e);
6005 addToEnvFront(env, st->Ist.WrTmp.tmp, e2);
6006 setHints_Expr(&env[0].doesLoad, &env[0].getInterval, e2);
6007 /* don't advance j, as we are deleting this stmt and instead
6008 holding it temporarily in the env. */
6009 continue; /* for (i = 0; i < bb->stmts_used; i++) loop */
6012 /* we get here for any other kind of statement. */
6013 /* 'use up' any bindings required by the current statement. */
6014 st2 = atbSubst_Stmt(env, st);
6016 /* Now, before this stmt, dump any bindings in env that it
6017 invalidates. These need to be dumped in the order in which
6018 they originally entered env -- that means from oldest to
6019 youngest. */
6021 /* putInterval/stmtStores characterise what the stmt under
6022 consideration does, or might do (sidely safe @ True). */
6024 Bool putRequiresPreciseMemExns;
6025 putInterval = stmt_modifies_guest_state(
6026 bb, st, preciseMemExnsFn, pxControl,
6027 &putRequiresPreciseMemExns
6030 /* be True if this stmt writes memory or might do (==> we don't
6031 want to reorder other loads or stores relative to it). Also,
6032 both LL and SC fall under this classification, since we
6033 really ought to be conservative and not reorder any other
6034 memory transactions relative to them. */
6035 stmtStores
6036 = toBool( st->tag == Ist_Store
6037 || (st->tag == Ist_Dirty
6038 && dirty_helper_stores(st->Ist.Dirty.details))
6039 || st->tag == Ist_LLSC
6040 || st->tag == Ist_CAS );
6042 for (k = A_NENV-1; k >= 0; k--) {
6043 if (env[k].bindee == NULL)
6044 continue;
6045 /* Compare the actions of this stmt with the actions of
6046 binding 'k', to see if they invalidate the binding. */
6047 invalidateMe
6048 = toBool(
6049 /* a store invalidates loaded data */
6050 (env[k].doesLoad && stmtStores)
6051 /* a put invalidates get'd data, if they overlap */
6052 || ((env[k].getInterval.present && putInterval.present) &&
6053 intervals_overlap(env[k].getInterval, putInterval))
6054 /* a put invalidates loaded data. That means, in essense, that
6055 a load expression cannot be substituted into a statement
6056 that follows the put. But there is nothing wrong doing so
6057 except when the put statement requries precise exceptions.
6058 Think of a load that is moved past a put where the put
6059 updates the IP in the guest state. If the load generates
6060 a segfault, the wrong address (line number) would be
6061 reported. */
6062 || (env[k].doesLoad && putInterval.present &&
6063 putRequiresPreciseMemExns)
6064 /* probably overly conservative: a memory bus event
6065 invalidates absolutely everything, so that all
6066 computation prior to it is forced to complete before
6067 proceeding with the event (fence,lock,unlock). */
6068 || st->tag == Ist_MBE
6069 /* also be (probably overly) paranoid re AbiHints */
6070 || st->tag == Ist_AbiHint
6072 if (invalidateMe) {
6073 bb->stmts[j] = IRStmt_WrTmp( env[k].binder, env[k].bindee );
6074 j++;
6075 vassert(j <= i);
6076 env[k].bindee = NULL;
6080 /* Slide in-use entries in env up to the front */
6081 m = 0;
6082 for (k = 0; k < A_NENV; k++) {
6083 if (env[k].bindee != NULL) {
6084 env[m] = env[k];
6085 m++;
6088 for (m = m; m < A_NENV; m++) {
6089 env[m].bindee = NULL;
6092 /* finally, emit the substituted statement */
6093 bb->stmts[j] = st2;
6094 /* vex_printf("**2 "); ppIRStmt(bb->stmts[j]); vex_printf("\n"); */
6095 j++;
6097 vassert(j <= i+1);
6098 } /* for each stmt in the original bb ... */
6100 /* Finally ... substitute the ->next field as much as possible, and
6101 dump any left-over bindings. Hmm. Perhaps there should be no
6102 left over bindings? Or any left-over bindings are
6103 by definition dead? */
6104 bb->next = atbSubst_Expr(env, bb->next);
6105 bb->stmts_used = j;
6107 return max_ga_known ? max_ga : ~(Addr)0;
6111 /*---------------------------------------------------------------*/
6112 /*--- MSVC specific transformation hacks ---*/
6113 /*---------------------------------------------------------------*/
6115 /* The purpose of all this is to find MSVC's idiom for non-constant
6116 bitfield assignment, "a ^ ((a ^ b) & c)", and transform it into
6117 gcc's idiom "(a & ~c) | (b & c)". Motivation is that Memcheck has
6118 generates a lot of false positives from the MSVC version because it
6119 doesn't understand that XORing an undefined bit with itself gives a
6120 defined result.
6122 This isn't a problem for the simple case "x ^ x", because iropt
6123 folds it to a constant zero before Memcheck ever sees it. But in
6124 this case we have an intervening "& c" which defeats the simple
6125 case. So we have to carefully inspect all expressions rooted at an
6126 XOR to see if any of them match "a ^ ((a ^ b) & c)", or any of the
6127 7 other variants resulting from swapping the order of arguments to
6128 the three binary operations. If we get a match, we then replace
6129 the tree with "(a & ~c) | (b & c)", and Memcheck is happy.
6131 The key difficulty is to spot the two uses of "a". To normalise
6132 the IR to maximise the chances of success, we first do a CSE pass,
6133 with CSEing of loads enabled, since the two "a" expressions may be
6134 loads, which need to be commoned up. Then we do a constant folding
6135 pass, so as to remove any tmp-to-tmp assignment chains that would
6136 make matching the original expression more difficult.
6140 /* Helper function for debug printing */
6141 __attribute__((unused))
6142 static void print_flat_expr ( IRExpr** env, IRExpr* e )
6144 if (e == NULL) {
6145 vex_printf("?");
6146 return;
6148 switch (e->tag) {
6149 case Iex_Binop: {
6150 ppIROp(e->Iex.Binop.op);
6151 vex_printf("(");
6152 print_flat_expr(env, e->Iex.Binop.arg1);
6153 vex_printf(",");
6154 print_flat_expr(env, e->Iex.Binop.arg2);
6155 vex_printf(")");
6156 break;
6158 case Iex_Unop: {
6159 ppIROp(e->Iex.Unop.op);
6160 vex_printf("(");
6161 print_flat_expr(env, e->Iex.Unop.arg);
6162 vex_printf(")");
6163 break;
6165 case Iex_RdTmp:
6166 ppIRTemp(e->Iex.RdTmp.tmp);
6167 vex_printf("=");
6168 print_flat_expr(env, chase1(env, e));
6169 break;
6170 case Iex_Const:
6171 case Iex_CCall:
6172 case Iex_Load:
6173 case Iex_ITE:
6174 case Iex_Get:
6175 ppIRExpr(e);
6176 break;
6177 default:
6178 vex_printf("FAIL: "); ppIRExpr(e); vex_printf("\n");
6179 vassert(0);
6183 /* Spot a ^ ((a ^ b) & c) for a,b and c tmp-or-const (atoms)
6184 or any of the other 7 variants generated by switching the order
6185 of arguments to the outer ^, the inner ^ and the &.
6187 static UInt spotBitfieldAssignment ( /*OUT*/IRExpr** aa, /*OUT*/IRExpr** bb,
6188 /*OUT*/IRExpr** cc,
6189 IRExpr** env, IRExpr* e,
6190 IROp opAND, IROp opXOR)
6192 # define ISBIN(_e,_op) ((_e) && (_e)->tag == Iex_Binop \
6193 && (_e)->Iex.Binop.op == (_op))
6194 # define ISATOM(_e) isIRAtom(_e)
6195 # define STEP(_e) chase1(env, (_e))
6196 # define LL(_e) ((_e)->Iex.Binop.arg1)
6197 # define RR(_e) ((_e)->Iex.Binop.arg2)
6199 IRExpr *a1, *and, *xor, *c, *a2bL, *a2bR;
6201 /* This is common to all 8 cases */
6202 if (!ISBIN(e, opXOR)) goto fail;
6204 /* -----and------ */
6205 /* --xor--- */
6206 /* find variant 1: a1 ^ ((a2 ^ b) & c) */
6207 /* find variant 2: a1 ^ ((b ^ a2) & c) */
6208 a1 = and = xor = c = a2bL = a2bR = NULL;
6210 a1 = LL(e);
6211 and = STEP(RR(e));
6212 if (!ISBIN(and, opAND)) goto v34;
6213 xor = STEP(LL(and));
6214 c = RR(and);
6215 if (!ISBIN(xor, opXOR)) goto v34;
6216 a2bL = LL(xor);
6217 a2bR = RR(xor);
6219 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6220 *aa = a1;
6221 *bb = a2bR;
6222 *cc = c;
6223 return 1;
6225 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6226 *aa = a1;
6227 *bb = a2bL;
6228 *cc = c;
6229 return 2;
6232 v34:
6233 /* -----and------ */
6234 /* --xor--- */
6235 /* find variant 3: ((a2 ^ b) & c) ^ a1 */
6236 /* find variant 4: ((b ^ a2) & c) ^ a1 */
6237 a1 = and = xor = c = a2bL = a2bR = NULL;
6239 a1 = RR(e);
6240 and = STEP(LL(e));
6241 if (!ISBIN(and, opAND)) goto v56;
6242 xor = STEP(LL(and));
6243 c = RR(and);
6244 if (!ISBIN(xor, opXOR)) goto v56;
6245 a2bL = LL(xor);
6246 a2bR = RR(xor);
6248 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6249 *aa = a1;
6250 *bb = a2bR;
6251 *cc = c;
6252 return 3;
6254 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6255 *aa = a1;
6256 *bb = a2bL;
6257 *cc = c;
6258 return 4;
6261 v56:
6262 /* -----and------ */
6263 /* --xor--- */
6264 /* find variant 5: a1 ^ (c & (a2 ^ b)) */
6265 /* find variant 6: a1 ^ (c & (b ^ a2)) */
6266 a1 = and = xor = c = a2bL = a2bR = NULL;
6268 a1 = LL(e);
6269 and = STEP(RR(e));
6270 if (!ISBIN(and, opAND)) goto v78;
6271 xor = STEP(RR(and));
6272 c = LL(and);
6273 if (!ISBIN(xor, opXOR)) goto v78;
6274 a2bL = LL(xor);
6275 a2bR = RR(xor);
6277 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6278 *aa = a1;
6279 *bb = a2bR;
6280 *cc = c;
6281 vassert(5-5); // ATC
6282 return 5;
6284 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6285 *aa = a1;
6286 *bb = a2bL;
6287 *cc = c;
6288 vassert(6-6); // ATC
6289 return 6;
6292 v78:
6293 /* -----and------ */
6294 /* --xor--- */
6295 /* find variant 7: (c & (a2 ^ b)) ^ a1 */
6296 /* find variant 8: (c & (b ^ a2)) ^ a1 */
6297 a1 = and = xor = c = a2bL = a2bR = NULL;
6299 a1 = RR(e);
6300 and = STEP(LL(e));
6301 if (!ISBIN(and, opAND)) goto fail;
6302 xor = STEP(RR(and));
6303 c = LL(and);
6304 if (!ISBIN(xor, opXOR)) goto fail;
6305 a2bL = LL(xor);
6306 a2bR = RR(xor);
6308 if (eqIRAtom(a1, a2bL) && !eqIRAtom(a1, a2bR)) {
6309 *aa = a1;
6310 *bb = a2bR;
6311 *cc = c;
6312 return 7;
6314 if (eqIRAtom(a1, a2bR) && !eqIRAtom(a1, a2bL)) {
6315 *aa = a1;
6316 *bb = a2bL;
6317 *cc = c;
6318 return 8;
6321 fail:
6322 return 0;
6324 # undef ISBIN
6325 # undef ISATOM
6326 # undef STEP
6327 # undef LL
6328 # undef RR
6331 /* If |e| is of the form a ^ ((a ^ b) & c) (or any of the 7 other
6332 variants thereof generated by switching arguments around), return
6333 the IRExpr* for (a & ~c) | (b & c). Else return NULL. */
6334 static IRExpr* do_XOR_TRANSFORMS_IRExpr ( IRExpr** env, IRExpr* e )
6336 if (e->tag != Iex_Binop)
6337 return NULL;
6339 const HChar* tyNm = NULL;
6340 IROp opOR = Iop_INVALID;
6341 IROp opAND = Iop_INVALID;
6342 IROp opNOT = Iop_INVALID;
6343 IROp opXOR = Iop_INVALID;
6344 switch (e->Iex.Binop.op) {
6345 case Iop_Xor32:
6346 tyNm = "I32";
6347 opOR = Iop_Or32; opAND = Iop_And32;
6348 opNOT = Iop_Not32; opXOR = Iop_Xor32;
6349 break;
6350 case Iop_Xor16:
6351 tyNm = "I16";
6352 opOR = Iop_Or16; opAND = Iop_And16;
6353 opNOT = Iop_Not16; opXOR = Iop_Xor16;
6354 break;
6355 case Iop_Xor8:
6356 tyNm = "I8";
6357 opOR = Iop_Or8; opAND = Iop_And8;
6358 opNOT = Iop_Not8; opXOR = Iop_Xor8;
6359 break;
6360 default:
6361 return NULL;
6364 IRExpr* aa = NULL;
6365 IRExpr* bb = NULL;
6366 IRExpr* cc = NULL;
6367 UInt variant = spotBitfieldAssignment(&aa, &bb, &cc, env, e, opAND, opXOR);
6368 if (variant > 0) {
6369 static UInt ctr = 0;
6370 if (0)
6371 vex_printf("XXXXXXXXXX Bitfield Assignment number %u, "
6372 "type %s, variant %u\n",
6373 ++ctr, tyNm, variant);
6374 /* It's vitally important that the returned aa, bb and cc are
6375 atoms -- either constants or tmps. If it's anything else
6376 (eg, a GET) then incorporating them in a tree at this point
6377 in the SB may erroneously pull them forwards (eg of a PUT
6378 that originally was after the GET) and so transform the IR
6379 wrongly. spotBitfieldAssignment should guarantee only to
6380 give us atoms, but we check here anyway. */
6381 vassert(aa && isIRAtom(aa));
6382 vassert(bb && isIRAtom(bb));
6383 vassert(cc && isIRAtom(cc));
6384 return IRExpr_Binop(
6385 opOR,
6386 IRExpr_Binop(opAND, aa, IRExpr_Unop(opNOT, cc)),
6387 IRExpr_Binop(opAND, bb, cc)
6390 return NULL;
6394 /* SB is modified in-place. Visit all the IRExprs and, for those
6395 which are allowed to be non-atomic, perform the XOR transform if
6396 possible. This makes |sb| be non-flat, but that's ok, the caller
6397 can re-flatten it. Returns True iff any changes were made. */
6398 static Bool do_XOR_TRANSFORM_IRSB ( IRSB* sb )
6400 Int i;
6401 Bool changed = False;
6403 /* Make the tmp->expr environment, so we can use it for
6404 chasing expressions. */
6405 Int n_tmps = sb->tyenv->types_used;
6406 IRExpr** env = LibVEX_Alloc_inline(n_tmps * sizeof(IRExpr*));
6407 for (i = 0; i < n_tmps; i++)
6408 env[i] = NULL;
6410 for (i = 0; i < sb->stmts_used; i++) {
6411 IRStmt* st = sb->stmts[i];
6412 if (st->tag != Ist_WrTmp)
6413 continue;
6414 IRTemp t = st->Ist.WrTmp.tmp;
6415 vassert(t < n_tmps);
6416 env[t] = st->Ist.WrTmp.data;
6419 for (i = 0; i < sb->stmts_used; i++) {
6420 IRStmt* st = sb->stmts[i];
6422 switch (st->tag) {
6423 case Ist_AbiHint:
6424 vassert(isIRAtom(st->Ist.AbiHint.base));
6425 vassert(isIRAtom(st->Ist.AbiHint.nia));
6426 break;
6427 case Ist_Put:
6428 vassert(isIRAtom(st->Ist.Put.data));
6429 break;
6430 case Ist_PutI: {
6431 IRPutI* puti = st->Ist.PutI.details;
6432 vassert(isIRAtom(puti->ix));
6433 vassert(isIRAtom(puti->data));
6434 break;
6436 case Ist_WrTmp: {
6437 /* This is the one place where an expr (st->Ist.WrTmp.data) is
6438 allowed to be more than just a constant or a tmp. */
6439 IRExpr* mb_new_data
6440 = do_XOR_TRANSFORMS_IRExpr(env, st->Ist.WrTmp.data);
6441 if (mb_new_data) {
6442 //ppIRSB(sb);
6443 st->Ist.WrTmp.data = mb_new_data;
6444 //ppIRSB(sb);
6445 changed = True;
6447 break;
6449 case Ist_Store:
6450 vassert(isIRAtom(st->Ist.Store.addr));
6451 vassert(isIRAtom(st->Ist.Store.data));
6452 break;
6453 case Ist_StoreG: {
6454 IRStoreG* sg = st->Ist.StoreG.details;
6455 vassert(isIRAtom(sg->addr));
6456 vassert(isIRAtom(sg->data));
6457 vassert(isIRAtom(sg->guard));
6458 break;
6460 case Ist_LoadG: {
6461 IRLoadG* lg = st->Ist.LoadG.details;
6462 vassert(isIRAtom(lg->addr));
6463 vassert(isIRAtom(lg->alt));
6464 vassert(isIRAtom(lg->guard));
6465 break;
6467 case Ist_CAS: {
6468 IRCAS* cas = st->Ist.CAS.details;
6469 vassert(isIRAtom(cas->addr));
6470 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6471 vassert(isIRAtom(cas->expdLo));
6472 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6473 vassert(isIRAtom(cas->dataLo));
6474 break;
6476 case Ist_LLSC:
6477 vassert(isIRAtom(st->Ist.LLSC.addr));
6478 if (st->Ist.LLSC.storedata)
6479 vassert(isIRAtom(st->Ist.LLSC.storedata));
6480 break;
6481 case Ist_Dirty: {
6482 IRDirty* d = st->Ist.Dirty.details;
6483 if (d->mFx != Ifx_None) {
6484 vassert(isIRAtom(d->mAddr));
6486 vassert(isIRAtom(d->guard));
6487 for (Int j = 0; d->args[j]; j++) {
6488 IRExpr* arg = d->args[j];
6489 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg))) {
6490 vassert(isIRAtom(arg));
6493 break;
6495 case Ist_IMark:
6496 case Ist_NoOp:
6497 case Ist_MBE:
6498 break;
6499 case Ist_Exit:
6500 vassert(isIRAtom(st->Ist.Exit.guard));
6501 break;
6502 default:
6503 vex_printf("\n"); ppIRStmt(st);
6504 vpanic("do_XOR_TRANSFORMS_IRSB");
6508 vassert(isIRAtom(sb->next));
6509 return changed;
6513 static IRSB* do_MSVC_HACKS ( IRSB* sb )
6515 // Normalise as much as we can. This is the one-and-only place
6516 // where we call do_cse_BB with allowLoadsToBeCSEd set to True.
6517 Bool any_cse_changes = do_cse_BB( sb, True/*allowLoadsToBeCSEd*/ );
6518 if (any_cse_changes) {
6519 // CSEing might have created dead code. Remove it.
6520 sb = cprop_BB ( sb );
6521 do_deadcode_BB(sb);
6524 // Visit all atoms, do the transformation proper. bb is modified
6525 // in-place.
6526 Bool changed = do_XOR_TRANSFORM_IRSB(sb);
6528 if (changed) {
6529 // The transformation generates non-flat expressions, so we now
6530 // need to re-flatten the block.
6531 sb = flatten_BB(sb);
6534 return sb;
6538 /*---------------------------------------------------------------*/
6539 /*--- iropt main ---*/
6540 /*---------------------------------------------------------------*/
6542 static Bool iropt_verbose = False; /* True; */
6545 /* Do a simple cleanup pass on bb. This is: redundant Get removal,
6546 redundant Put removal, constant propagation, dead code removal,
6547 clean helper specialisation, and dead code removal (again).
6551 static
6552 IRSB* cheap_transformations (
6553 IRSB* bb,
6554 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6555 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6556 VexRegisterUpdates pxControl
6559 redundant_get_removal_BB ( bb );
6560 if (iropt_verbose) {
6561 vex_printf("\n========= REDUNDANT GET\n\n" );
6562 ppIRSB(bb);
6565 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6566 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6568 if (iropt_verbose) {
6569 vex_printf("\n========= REDUNDANT PUT\n\n" );
6570 ppIRSB(bb);
6573 bb = cprop_BB ( bb );
6574 if (iropt_verbose) {
6575 vex_printf("\n========= CPROPD\n\n" );
6576 ppIRSB(bb);
6579 do_deadcode_BB ( bb );
6580 if (iropt_verbose) {
6581 vex_printf("\n========= DEAD\n\n" );
6582 ppIRSB(bb);
6585 bb = spec_helpers_BB ( bb, specHelper );
6586 do_deadcode_BB ( bb );
6587 if (iropt_verbose) {
6588 vex_printf("\n========= SPECd \n\n" );
6589 ppIRSB(bb);
6592 return bb;
6596 /* Do some more expensive transformations on bb, which are aimed at
6597 optimising as much as possible in the presence of GetI and PutI. */
6599 static
6600 IRSB* expensive_transformations( IRSB* bb, VexRegisterUpdates pxControl )
6602 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6603 collapse_AddSub_chains_BB( bb );
6604 do_redundant_GetI_elimination( bb );
6605 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6606 do_redundant_PutI_elimination( bb, pxControl );
6608 do_deadcode_BB( bb );
6609 return bb;
6613 /* Scan a flattened BB to look for signs that more expensive
6614 optimisations might be useful:
6615 - find out if there are any GetIs and PutIs
6616 - find out if there are any floating or vector-typed temporaries
6619 static void considerExpensives ( /*OUT*/Bool* hasGetIorPutI,
6620 /*OUT*/Bool* hasVorFtemps,
6621 IRSB* bb )
6623 Int i, j;
6624 IRStmt* st;
6625 IRDirty* d;
6626 IRCAS* cas;
6628 *hasGetIorPutI = False;
6629 *hasVorFtemps = False;
6631 for (i = 0; i < bb->stmts_used; i++) {
6632 st = bb->stmts[i];
6633 switch (st->tag) {
6634 case Ist_AbiHint:
6635 vassert(isIRAtom(st->Ist.AbiHint.base));
6636 vassert(isIRAtom(st->Ist.AbiHint.nia));
6637 break;
6638 case Ist_PutI:
6639 *hasGetIorPutI = True;
6640 break;
6641 case Ist_WrTmp:
6642 if (st->Ist.WrTmp.data->tag == Iex_GetI)
6643 *hasGetIorPutI = True;
6644 switch (typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp)) {
6645 case Ity_I1: case Ity_I8: case Ity_I16:
6646 case Ity_I32: case Ity_I64: case Ity_I128:
6647 break;
6648 case Ity_F16: case Ity_F32: case Ity_F64: case Ity_F128:
6649 case Ity_V128: case Ity_V256:
6650 *hasVorFtemps = True;
6651 break;
6652 case Ity_D32: case Ity_D64: case Ity_D128:
6653 *hasVorFtemps = True;
6654 break;
6655 default:
6656 goto bad;
6658 break;
6659 case Ist_Put:
6660 vassert(isIRAtom(st->Ist.Put.data));
6661 break;
6662 case Ist_Store:
6663 vassert(isIRAtom(st->Ist.Store.addr));
6664 vassert(isIRAtom(st->Ist.Store.data));
6665 break;
6666 case Ist_StoreG: {
6667 IRStoreG* sg = st->Ist.StoreG.details;
6668 vassert(isIRAtom(sg->addr));
6669 vassert(isIRAtom(sg->data));
6670 vassert(isIRAtom(sg->guard));
6671 break;
6673 case Ist_LoadG: {
6674 IRLoadG* lg = st->Ist.LoadG.details;
6675 vassert(isIRAtom(lg->addr));
6676 vassert(isIRAtom(lg->alt));
6677 vassert(isIRAtom(lg->guard));
6678 break;
6680 case Ist_CAS:
6681 cas = st->Ist.CAS.details;
6682 vassert(isIRAtom(cas->addr));
6683 vassert(cas->expdHi == NULL || isIRAtom(cas->expdHi));
6684 vassert(isIRAtom(cas->expdLo));
6685 vassert(cas->dataHi == NULL || isIRAtom(cas->dataHi));
6686 vassert(isIRAtom(cas->dataLo));
6687 break;
6688 case Ist_LLSC:
6689 vassert(isIRAtom(st->Ist.LLSC.addr));
6690 if (st->Ist.LLSC.storedata)
6691 vassert(isIRAtom(st->Ist.LLSC.storedata));
6692 break;
6693 case Ist_Dirty:
6694 d = st->Ist.Dirty.details;
6695 vassert(isIRAtom(d->guard));
6696 for (j = 0; d->args[j]; j++) {
6697 IRExpr* arg = d->args[j];
6698 if (LIKELY(!is_IRExpr_VECRET_or_GSPTR(arg)))
6699 vassert(isIRAtom(arg));
6701 if (d->mFx != Ifx_None)
6702 vassert(isIRAtom(d->mAddr));
6703 break;
6704 case Ist_NoOp:
6705 case Ist_IMark:
6706 case Ist_MBE:
6707 break;
6708 case Ist_Exit:
6709 vassert(isIRAtom(st->Ist.Exit.guard));
6710 break;
6711 default:
6712 bad:
6713 ppIRStmt(st);
6714 vpanic("considerExpensives");
6720 /* ---------------- The main iropt entry point. ---------------- */
6722 /* exported from this file */
6723 /* Rules of the game:
6725 - IRExpr/IRStmt trees should be treated as immutable, as they
6726 may get shared. So never change a field of such a tree node;
6727 instead construct and return a new one if needed.
6729 IRSB* do_iropt_BB(
6730 IRSB* bb0,
6731 IRExpr* (*specHelper) (const HChar*, IRExpr**, IRStmt**, Int),
6732 Bool (*preciseMemExnsFn)(Int,Int,VexRegisterUpdates),
6733 VexRegisterUpdates pxControl,
6734 Addr guest_addr,
6735 VexArch guest_arch
6738 static Int n_total = 0;
6739 static Int n_expensive = 0;
6741 Bool hasGetIorPutI, hasVorFtemps;
6743 n_total++;
6745 /* Flatness: this function assumes that the incoming block is already flat.
6746 That's because all blocks that arrive here should already have been
6747 processed by do_minimal_initial_iropt_BB. And that will have flattened
6748 them out. */
6749 // FIXME Remove this assertion once the 'grail' machinery seems stable
6750 // FIXME2 The TOC-redirect-hacks generators in m_translate.c -- gen_PUSH()
6751 // and gen_PO() -- don't generate flat IR, and so cause this assertion
6752 // to fail. For the time being, hack around this by flattening,
6753 // rather than asserting for flatness, on the afflicted platforms.
6754 // This is a kludge, yes.
6755 if (guest_arch == VexArchPPC64) {
6756 bb0 = flatten_BB(bb0); // Kludge!
6757 } else {
6758 vassert(isFlatIRSB(bb0)); // How it Really Should Be (tm).
6761 /* If at level 0, stop now. */
6762 if (vex_control.iropt_level <= 0) return bb0;
6764 /* Now do a preliminary cleanup pass, and figure out if we also
6765 need to do 'expensive' optimisations. Expensive optimisations
6766 are deemed necessary if the block contains any GetIs or PutIs.
6767 If needed, do expensive transformations and then another cheap
6768 cleanup pass. */
6770 IRSB* bb = cheap_transformations( bb0, specHelper,
6771 preciseMemExnsFn, pxControl );
6773 if (guest_arch == VexArchARM) {
6774 /* Translating Thumb2 code produces a lot of chaff. We have to
6775 work extra hard to get rid of it. */
6776 bb = cprop_BB(bb);
6777 bb = spec_helpers_BB ( bb, specHelper );
6778 if (pxControl < VexRegUpdAllregsAtEachInsn) {
6779 redundant_put_removal_BB ( bb, preciseMemExnsFn, pxControl );
6781 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6782 do_deadcode_BB( bb );
6785 if (vex_control.iropt_level > 1) {
6787 /* Peer at what we have, to decide how much more effort to throw
6788 at it. */
6789 considerExpensives( &hasGetIorPutI, &hasVorFtemps, bb );
6791 if (hasVorFtemps && !hasGetIorPutI) {
6792 /* If any evidence of FP or Vector activity, CSE, as that
6793 tends to mop up all manner of lardy code to do with
6794 rounding modes. Don't bother if hasGetIorPutI since that
6795 case leads into the expensive transformations, which do
6796 CSE anyway. */
6797 (void)do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6798 do_deadcode_BB( bb );
6801 if (hasGetIorPutI) {
6802 Bool cses;
6803 n_expensive++;
6804 if (DEBUG_IROPT)
6805 vex_printf("***** EXPENSIVE %d %d\n", n_total, n_expensive);
6806 bb = expensive_transformations( bb, pxControl );
6807 bb = cheap_transformations( bb, specHelper,
6808 preciseMemExnsFn, pxControl );
6809 /* Potentially common up GetIs */
6810 cses = do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6811 if (cses)
6812 bb = cheap_transformations( bb, specHelper,
6813 preciseMemExnsFn, pxControl );
6816 ///////////////////////////////////////////////////////////
6817 // BEGIN MSVC optimised code transformation hacks
6818 if (0)
6819 bb = do_MSVC_HACKS(bb);
6820 // END MSVC optimised code transformation hacks
6821 ///////////////////////////////////////////////////////////
6823 /* Now have a go at unrolling simple (single-BB) loops. If
6824 successful, clean up the results as much as possible. */
6826 IRSB* bb2 = maybe_loop_unroll_BB( bb, guest_addr );
6827 if (bb2) {
6828 bb = cheap_transformations( bb2, specHelper,
6829 preciseMemExnsFn, pxControl );
6830 if (hasGetIorPutI) {
6831 bb = expensive_transformations( bb, pxControl );
6832 bb = cheap_transformations( bb, specHelper,
6833 preciseMemExnsFn, pxControl );
6834 } else {
6835 /* at least do CSE and dead code removal */
6836 do_cse_BB( bb, False/*!allowLoadsToBeCSEd*/ );
6837 do_deadcode_BB( bb );
6839 if (0) vex_printf("vex iropt: unrolled a loop\n");
6844 return bb;
6847 IRSB* do_minimal_initial_iropt_BB(IRSB* bb0) {
6848 /* First flatten the block out, since all other phases assume flat code. */
6849 IRSB* bb = flatten_BB ( bb0 );
6851 if (iropt_verbose) {
6852 vex_printf("\n========= FLAT\n\n" );
6853 ppIRSB(bb);
6856 // Remove redundant GETs
6857 redundant_get_removal_BB ( bb );
6859 // Do minimal constant prop: copy prop and constant prop only. No folding.
6860 // JRS FIXME 2019Nov25: this is too weak to be effective on arm32. For that,
6861 // specifying doFolding=True makes a huge difference.
6862 bb = cprop_BB_WRK ( bb, /*mustRetainNoOps=*/True,
6863 /*doFolding=*/False );
6865 // Minor tidying of the block end, to remove a redundant Put of the IP right
6866 // at the end:
6868 ------ IMark(0x401FEC9, 2, 0) ------
6869 t18 = GET:I64(168)
6870 t19 = amd64g_calculate_condition[mcx=0x13]{0x58155130}(0x4:I64,0x5:I64,..
6871 t14 = 64to1(t19)
6872 if (t14) { PUT(184) = 0x401FED6:I64; exit-Boring }
6873 PUT(184) = 0x401FECB:I64 <--------------------------------
6874 PUT(184) = 0x401FECB:I64; exit-Boring
6876 if (bb->stmts_used > 0) {
6877 const IRStmt* last = bb->stmts[bb->stmts_used - 1];
6878 if (last->tag == Ist_Put && last->Ist.Put.offset == bb->offsIP
6879 && eqIRAtom(last->Ist.Put.data, bb->next)) {
6880 bb->stmts_used--;
6884 return bb;
6887 /* Copy the contents of |src| to the end of |dst|. This entails fixing up the
6888 tmp numbers in |src| accordingly. The final destination of |dst| is thrown
6889 away and replaced by the final destination of |src|. This function doesn't
6890 make any assessment of whether it's meaningful or valid to concatenate the
6891 two IRSBs; it just *does* the concatenation. */
6892 void concatenate_irsbs ( IRSB* dst, IRSB* src )
6894 // FIXME this is almost identical to code at the end of maybe_unroll_loop_BB.
6895 // Maybe try to common it up.
6896 Int delta = dst->tyenv->types_used;
6897 for (Int i = 0; i < src->tyenv->types_used; i++) {
6898 (void)newIRTemp(dst->tyenv, src->tyenv->types[i]);
6901 for (Int i = 0; i < src->stmts_used; i++) {
6902 IRStmt* s = deepCopyIRStmt(src->stmts[i]);
6903 deltaIRStmt(s, delta);
6904 addStmtToIRSB(dst, s);
6906 deltaIRExpr(src->next, delta);
6908 dst->next = src->next;
6909 dst->jumpkind = src->jumpkind;
6910 vassert(dst->offsIP == src->offsIP);
6913 /*---------------------------------------------------------------*/
6914 /*--- end ir_opt.c ---*/
6915 /*---------------------------------------------------------------*/