VEX/useful/test_main.c

   1
   2 /*---------------------------------------------------------------*/
   3 /*--- begin                                       test_main.c ---*/
   4 /*---------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2004-2017 OpenWorks LLP
  11       info@open-works.net
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, see <http://www.gnu.org/licenses/>.
  25
  26    The GNU General Public License is contained in the file COPYING.
  27
  28    Neither the names of the U.S. Department of Energy nor the
  29    University of California nor the names of its contributors may be
  30    used to endorse or promote products derived from this software
  31    without prior written permission.
  32 */
  33
  34 #include <stdio.h>
  35 #include <stdlib.h>
  36 #include <assert.h>
  37 #include <string.h>
  38
  39 #include "libvex_basictypes.h"
  40 #include "libvex.h"
  41
  42 #include "test_main.h"
  43
  44
  45 /*---------------------------------------------------------------*/
  46 /*--- Test                                                    ---*/
  47 /*---------------------------------------------------------------*/
  48
  49
  50 __attribute__ ((noreturn))
  51 static
  52 void failure_exit ( void )
  53 {
  54    fprintf(stdout, "VEX did failure_exit.  Bye.\n");
  55    exit(1);
  56 }
  57
  58 static
  59 void log_bytes ( const HChar* bytes, SizeT nbytes )
  60 {
  61    fwrite ( bytes, 1, nbytes, stdout );
  62 }
  63
  64 #define N_LINEBUF 10000
  65 static HChar linebuf[N_LINEBUF];
  66
  67 #define N_ORIGBUF 10000
  68 #define N_TRANSBUF 5000
  69
  70 static UChar origbuf[N_ORIGBUF];
  71 static UChar transbuf[N_TRANSBUF];
  72
  73 static Bool verbose = True;
  74
  75 /* Forwards */
  76 #if 1 /* UNUSED */
  77 //static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
  78 static
  79 IRSB* mc_instrument ( void* closureV,
  80                       IRSB* bb_in, VexGuestLayout* layout,
  81                       VexGuestExtents* vge,
  82                       IRType gWordTy, IRType hWordTy );
  83 #endif
  84
  85 static Bool chase_into_not_ok ( void* opaque, Addr dst ) {
  86    return False;
  87 }
  88 static UInt needs_self_check ( void *closureV, VexRegisterUpdates *pxControl,
  89                                const VexGuestExtents *vge ) {
  90    return 0;
  91 }
  92
  93 int main ( int argc, char** argv )
  94 {
  95    FILE* f;
  96    Int i;
  97    UInt u, sum;
  98    Addr32 orig_addr;
  99    Int bb_number, n_bbs_done = 0;
 100    Int orig_nbytes, trans_used;
 101    VexTranslateResult tres;
 102    VexControl vcon;
 103    VexGuestExtents vge;
 104    VexArchInfo vai_x86, vai_amd64, vai_ppc32, vai_arm, vai_mips32, vai_mips64;
 105    VexAbiInfo vbi;
 106    VexTranslateArgs vta;
 107
 108    if (argc != 2) {
 109       fprintf(stderr, "usage: vex file.orig\n");
 110       exit(1);
 111    }
 112    f = fopen(argv[1], "r");
 113    if (!f) {
 114       fprintf(stderr, "can't open `%s'\n", argv[1]);
 115       exit(1);
 116    }
 117
 118    /* Run with default params.  However, we can't allow bb chasing
 119       since that causes the front end to get segfaults when it tries
 120       to read code outside the initial BB we hand it.  So when calling
 121       LibVEX_Translate, send in a chase-into predicate that always
 122       returns False. */
 123    LibVEX_default_VexControl ( &vcon );
 124    vcon.iropt_level = 2;
 125    vcon.guest_max_insns = 60;
 126
 127    LibVEX_Init ( &failure_exit, &log_bytes,
 128                  1,  /* debug_paranoia */
 129                  &vcon );
 130
 131
 132    while (!feof(f)) {
 133
 134       __attribute__((unused))
 135       char* unused1 = fgets(linebuf, N_LINEBUF,f);
 136       if (linebuf[0] == 0) continue;
 137       if (linebuf[0] != '.') continue;
 138
 139       if (n_bbs_done == TEST_N_BBS) break;
 140       n_bbs_done++;
 141
 142       /* first line is:   . bb-number bb-addr n-bytes */
 143       assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
 144                                  & bb_number,
 145                                  & orig_addr, & orig_nbytes ));
 146       assert(orig_nbytes >= 1);
 147       assert(!feof(f));
 148       __attribute__((unused))
 149       char* unused2 = fgets(linebuf, N_LINEBUF,f);
 150       assert(linebuf[0] == '.');
 151
 152       /* second line is:   . byte byte byte etc */
 153       if (verbose)
 154          printf("============ Basic Block %d, Done %d, "
 155                 "Start %x, nbytes %2d ============",
 156                 bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
 157
 158       /* thumb ITstate analysis needs to examine the 18 bytes
 159          preceding the first instruction.  So let's leave the first 18
 160          zeroed out. */
 161       memset(origbuf, 0, sizeof(origbuf));
 162
 163       assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
 164       for (i = 0; i < orig_nbytes; i++) {
 165          assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
 166          origbuf[18+ i] = (UChar)u;
 167       }
 168
 169       /* FIXME: put sensible values into the .hwcaps fields */
 170       LibVEX_default_VexArchInfo(&vai_x86);
 171       vai_x86.hwcaps = VEX_HWCAPS_X86_MMXEXT | VEX_HWCAPS_X86_SSE1
 172                        | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
 173       vai_x86.endness = VexEndnessLE;
 174
 175       LibVEX_default_VexArchInfo(&vai_amd64);
 176       vai_amd64.hwcaps = 0;
 177       vai_amd64.endness = VexEndnessLE;
 178
 179       LibVEX_default_VexArchInfo(&vai_ppc32);
 180       vai_ppc32.hwcaps = 0;
 181       vai_ppc32.ppc_icache_line_szB = 128;
 182
 183       LibVEX_default_VexArchInfo(&vai_arm);
 184       vai_arm.hwcaps = VEX_HWCAPS_ARM_VFP3 | VEX_HWCAPS_ARM_NEON | 7;
 185
 186       LibVEX_default_VexArchInfo(&vai_mips32);
 187       vai_mips32.endness = VexEndnessLE;
 188       vai_mips32.hwcaps = VEX_PRID_COMP_MIPS;
 189
 190       LibVEX_default_VexArchInfo(&vai_mips64);
 191       vai_mips64.endness = VexEndnessLE;
 192
 193       LibVEX_default_VexAbiInfo(&vbi);
 194       vbi.guest_stack_redzone_size = 128;
 195
 196       /* ----- Set up args for LibVEX_Translate ----- */
 197
 198       vta.abiinfo_both    = vbi;
 199       vta.guest_bytes     = &origbuf[18];
 200       vta.guest_bytes_addr = orig_addr;
 201       vta.callback_opaque = NULL;
 202       vta.chase_into_ok   = chase_into_not_ok;
 203       vta.guest_extents   = &vge;
 204       vta.host_bytes      = transbuf;
 205       vta.host_bytes_size = N_TRANSBUF;
 206       vta.host_bytes_used = &trans_used;
 207
 208 #if 0 /* ppc32 -> ppc32 */
 209       vta.arch_guest     = VexArchPPC32;
 210       vta.archinfo_guest = vai_ppc32;
 211       vta.arch_host      = VexArchPPC32;
 212       vta.archinfo_host  = vai_ppc32;
 213 #endif
 214 #if 0 /* amd64 -> amd64 */
 215       vta.arch_guest     = VexArchAMD64;
 216       vta.archinfo_guest = vai_amd64;
 217       vta.arch_host      = VexArchAMD64;
 218       vta.archinfo_host  = vai_amd64;
 219 #endif
 220 #if 0 /* x86 -> x86 */
 221       vta.arch_guest     = VexArchX86;
 222       vta.archinfo_guest = vai_x86;
 223       vta.arch_host      = VexArchX86;
 224       vta.archinfo_host  = vai_x86;
 225 #endif
 226 #if 1 /* x86 -> mips32 */
 227       vta.arch_guest     = VexArchX86;
 228       vta.archinfo_guest = vai_x86;
 229       vta.arch_host      = VexArchMIPS32;
 230       vta.archinfo_host  = vai_mips32;
 231 #endif
 232 #if 0 /* amd64 -> mips64 */
 233       vta.arch_guest     = VexArchAMD64;
 234       vta.archinfo_guest = vai_amd64;
 235       vta.arch_host      = VexArchMIPS64;
 236       vta.archinfo_host  = vai_mips64;
 237 #endif
 238 #if 0 /* arm -> arm */
 239       vta.arch_guest     = VexArchARM;
 240       vta.archinfo_guest = vai_arm;
 241       vta.arch_host      = VexArchARM;
 242       vta.archinfo_host  = vai_arm;
 243       /* ARM/Thumb only hacks, that are needed to keep the ITstate
 244          analyser in the front end happy.  */
 245       vta.guest_bytes     = &origbuf[18 +1];
 246       vta.guest_bytes_addr = (Addr) &origbuf[18 +1];
 247 #endif
 248
 249 #if 1 /* no instrumentation */
 250       vta.instrument1     = NULL;
 251       vta.instrument2     = NULL;
 252 #endif
 253 #if 0 /* addrcheck */
 254       vta.instrument1     = ac_instrument;
 255       vta.instrument2     = NULL;
 256 #endif
 257 #if 0 /* memcheck */
 258       vta.instrument1     = mc_instrument;
 259       vta.instrument2     = NULL;
 260 #endif
 261       vta.needs_self_check  = needs_self_check;
 262       vta.preamble_function = NULL;
 263       vta.traceflags      = TEST_FLAGS;
 264       vta.addProfInc      = False;
 265       vta.sigill_diag     = True;
 266
 267       vta.disp_cp_chain_me_to_slowEP = (void*)0x12345678;
 268       vta.disp_cp_chain_me_to_fastEP = (void*)0x12345679;
 269       vta.disp_cp_xindir             = (void*)0x1234567A;
 270       vta.disp_cp_xassisted          = (void*)0x1234567B;
 271
 272       vta.finaltidy = NULL;
 273
 274       for (i = 0; i < TEST_N_ITERS; i++)
 275          tres = LibVEX_Translate ( &vta );
 276
 277       if (tres.status != VexTransOK)
 278          printf("\ntres = %d\n", (Int)tres.status);
 279       assert(tres.status == VexTransOK);
 280       assert(tres.n_sc_extents == 0);
 281       assert(vge.n_used == 1);
 282       assert((UInt)(vge.len[0]) == orig_nbytes);
 283
 284       sum = 0;
 285       for (i = 0; i < trans_used; i++)
 286          sum += (UInt)transbuf[i];
 287       printf ( " %6.2f ... %u\n",
 288                (double)trans_used / (double)vge.len[0], sum );
 289    }
 290
 291    fclose(f);
 292    printf("\n");
 293    LibVEX_ShowAllocStats();
 294
 295    return 0;
 296 }
 297
 298 //////////////////////////////////////////////////////////////////////
 299 //////////////////////////////////////////////////////////////////////
 300 //////////////////////////////////////////////////////////////////////
 301 //////////////////////////////////////////////////////////////////////
 302 //////////////////////////////////////////////////////////////////////
 303 //////////////////////////////////////////////////////////////////////
 304 //////////////////////////////////////////////////////////////////////
 305 //////////////////////////////////////////////////////////////////////
 306
 307 #if 0 /* UNUSED */
 308
 309 static
 310 __attribute((noreturn))
 311 void panic ( HChar* s )
 312 {
 313   printf("\npanic: %s\n", s);
 314   failure_exit();
 315 }
 316
 317 static
 318 IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
 319 {
 320 /* Use this rather than eg. -1 because it's a UInt. */
 321 #define INVALID_DATA_SIZE   999999
 322
 323    Int         i;
 324    Int         sz;
 325    IRCallee*   helper;
 326    IRStmt*    st;
 327    IRExpr* data;
 328    IRExpr* addr;
 329    Bool needSz;
 330
 331    /* Set up BB */
 332    IRSB* bb     = emptyIRSB();
 333    bb->tyenv    = dopyIRTypeEnv(bb_in->tyenv);
 334    bb->next     = dopyIRExpr(bb_in->next);
 335    bb->jumpkind = bb_in->jumpkind;
 336
 337    /* No loads to consider in ->next. */
 338    assert(isIRAtom(bb_in->next));
 339
 340    for (i = 0; i <  bb_in->stmts_used; i++) {
 341       st = bb_in->stmts[i];
 342       if (!st) continue;
 343
 344       switch (st->tag) {
 345
 346          case Ist_Tmp:
 347             data = st->Ist.Tmp.data;
 348             if (data->tag == Iex_LDle) {
 349                addr = data->Iex.LDle.addr;
 350                sz = sizeofIRType(data->Iex.LDle.ty);
 351                needSz = False;
 352                switch (sz) {
 353                   case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
 354                                                  (void*)0x12345601); break;
 355                   case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
 356                                                  (void*)0x12345602); break;
 357                   case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
 358                                                  (void*)0x12345603); break;
 359                   default: helper = mkIRCallee(0, "ac_helperc_LOADN",
 360                                                   (void*)0x12345604);
 361                                                   needSz = True; break;
 362                }
 363                if (needSz) {
 364                   addStmtToIRSB(
 365                      bb,
 366                      IRStmt_Dirty(
 367                         unsafeIRDirty_0_N( helper->regparms,
 368                                            helper->name, helper->addr,
 369                                            mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
 370                   ));
 371                } else {
 372                   addStmtToIRSB(
 373                      bb,
 374                      IRStmt_Dirty(
 375                         unsafeIRDirty_0_N( helper->regparms,
 376                                            helper->name, helper->addr,
 377                                            mkIRExprVec_1(addr) )
 378                   ));
 379                }
 380             }
 381             break;
 382
 383          case Ist_STle:
 384             data = st->Ist.STle.data;
 385             addr = st->Ist.STle.addr;
 386             assert(isIRAtom(data));
 387             assert(isIRAtom(addr));
 388             sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
 389             needSz = False;
 390             switch (sz) {
 391                case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
 392                                               (void*)0x12345605); break;
 393                case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
 394                                               (void*)0x12345606); break;
 395                case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
 396                                               (void*)0x12345607); break;
 397                default: helper = mkIRCallee(0, "ac_helperc_STOREN",
 398                                                (void*)0x12345608);
 399                                                needSz = True; break;
 400             }
 401             if (needSz) {
 402                addStmtToIRSB(
 403                   bb,
 404                   IRStmt_Dirty(
 405                      unsafeIRDirty_0_N( helper->regparms,
 406                                         helper->name, helper->addr,
 407                                         mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
 408                ));
 409             } else {
 410                addStmtToIRSB(
 411                   bb,
 412                   IRStmt_Dirty(
 413                      unsafeIRDirty_0_N( helper->regparms,
 414                                         helper->name, helper->addr,
 415                                         mkIRExprVec_1(addr) )
 416                ));
 417             }
 418             break;
 419
 420          case Ist_Put:
 421             assert(isIRAtom(st->Ist.Put.data));
 422             break;
 423
 424          case Ist_PutI:
 425             assert(isIRAtom(st->Ist.PutI.ix));
 426             assert(isIRAtom(st->Ist.PutI.data));
 427             break;
 428
 429          case Ist_Exit:
 430             assert(isIRAtom(st->Ist.Exit.guard));
 431             break;
 432
 433          case Ist_Dirty:
 434             /* If the call doesn't interact with memory, we ain't
 435                interested. */
 436             if (st->Ist.Dirty.details->mFx == Ifx_None)
 437                break;
 438             goto unhandled;
 439
 440          default:
 441          unhandled:
 442             printf("\n");
 443             ppIRStmt(st);
 444             printf("\n");
 445             panic("addrcheck: unhandled IRStmt");
 446       }
 447
 448       addStmtToIRSB( bb, dopyIRStmt(st));
 449    }
 450
 451    return bb;
 452 }
 453 #endif /* UNUSED */
 454
 455 //////////////////////////////////////////////////////////////////////
 456 //////////////////////////////////////////////////////////////////////
 457 //////////////////////////////////////////////////////////////////////
 458 //////////////////////////////////////////////////////////////////////
 459 //////////////////////////////////////////////////////////////////////
 460 //////////////////////////////////////////////////////////////////////
 461 //////////////////////////////////////////////////////////////////////
 462 //////////////////////////////////////////////////////////////////////
 463
 464 #if 1 /* UNUSED */
 465
 466 static
 467 __attribute((noreturn))
 468 void panic ( HChar* s )
 469 {
 470   printf("\npanic: %s\n", s);
 471   failure_exit();
 472 }
 473
 474 #define tl_assert(xxx) assert(xxx)
 475 #define VG_(xxxx) xxxx
 476 #define tool_panic(zzz) panic(zzz)
 477 #define MC_(zzzz) MC_##zzzz
 478 #define TL_(zzzz) SK_##zzzz
 479
 480
 481 static void MC_helperc_complain_undef ( void );
 482 static void MC_helperc_LOADV8 ( void );
 483 static void MC_helperc_LOADV4 ( void );
 484 static void MC_helperc_LOADV2 ( void );
 485 static void MC_helperc_LOADV1 ( void );
 486 static void MC_helperc_STOREV8( void );
 487 static void MC_helperc_STOREV4( void );
 488 static void MC_helperc_STOREV2( void );
 489 static void MC_helperc_STOREV1( void );
 490 static void MC_helperc_value_check0_fail( void );
 491 static void MC_helperc_value_check1_fail( void );
 492 static void MC_helperc_value_check4_fail( void );
 493
 494 static void MC_helperc_complain_undef ( void ) { }
 495 static void MC_helperc_LOADV8 ( void ) { }
 496 static void MC_helperc_LOADV4 ( void ) { }
 497 static void MC_helperc_LOADV2 ( void ) { }
 498 static void MC_helperc_LOADV1 ( void ) { }
 499 static void MC_helperc_STOREV8( void ) { }
 500 static void MC_helperc_STOREV4( void ) { }
 501 static void MC_helperc_STOREV2( void ) { }
 502 static void MC_helperc_STOREV1( void ) { }
 503 static void MC_helperc_value_check0_fail( void ) { }
 504 static void MC_helperc_value_check1_fail( void ) { }
 505 static void MC_helperc_value_check4_fail( void ) { }
 506
 507
 508 /*--------------------------------------------------------------------*/
 509 /*--- Instrument IR to perform memory checking operations.         ---*/
 510 /*---                                               mc_translate.c ---*/
 511 /*--------------------------------------------------------------------*/
 512
 513 /*
 514    This file is part of MemCheck, a heavyweight Valgrind tool for
 515    detecting memory errors.
 516
 517    Copyright (C) 2000-2017 Julian Seward
 518       jseward@acm.org
 519
 520    This program is free software; you can redistribute it and/or
 521    modify it under the terms of the GNU General Public License as
 522    published by the Free Software Foundation; either version 2 of the
 523    License, or (at your option) any later version.
 524
 525    This program is distributed in the hope that it will be useful, but
 526    WITHOUT ANY WARRANTY; without even the implied warranty of
 527    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 528    General Public License for more details.
 529
 530    You should have received a copy of the GNU General Public License
 531    along with this program; if not, see <http://www.gnu.org/licenses/>.
 532
 533    The GNU General Public License is contained in the file COPYING.
 534 */
 535
 536 //#include "mc_include.h"
 537
 538
 539 /*------------------------------------------------------------*/
 540 /*--- Forward decls                                        ---*/
 541 /*------------------------------------------------------------*/
 542
 543 struct _MCEnv;
 544
 545 static IRType  shadowType ( IRType ty );
 546 static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
 547
 548
 549 /*------------------------------------------------------------*/
 550 /*--- Memcheck running state, and tmp management.          ---*/
 551 /*------------------------------------------------------------*/
 552
 553 /* Carries around state during memcheck instrumentation. */
 554 typedef
 555    struct _MCEnv {
 556       /* MODIFIED: the bb being constructed.  IRStmts are added. */
 557       IRSB* bb;
 558
 559       /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
 560          original temps to their current their current shadow temp.
 561          Initially all entries are IRTemp_INVALID.  Entries are added
 562          lazily since many original temps are not used due to
 563          optimisation prior to instrumentation.  Note that floating
 564          point original tmps are shadowed by integer tmps of the same
 565          size, and Bit-typed original tmps are shadowed by the type
 566          Ity_I8.  See comment below. */
 567       IRTemp* tmpMap;
 568       Int     n_originalTmps; /* for range checking */
 569
 570       /* READONLY: the guest layout.  This indicates which parts of
 571          the guest state should be regarded as 'always defined'. */
 572       VexGuestLayout* layout;
 573       /* READONLY: the host word type.  Needed for constructing
 574          arguments of type 'HWord' to be passed to helper functions.
 575          Ity_I32 or Ity_I64 only. */
 576       IRType hWordTy;
 577    }
 578    MCEnv;
 579
 580 /* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
 581    demand), as they are encountered.  This is for two reasons.
 582
 583    (1) (less important reason): Many original tmps are unused due to
 584    initial IR optimisation, and we do not want to spaces in tables
 585    tracking them.
 586
 587    Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
 588    table indexed [0 .. n_types-1], which gives the current shadow for
 589    each original tmp, or INVALID_IRTEMP if none is so far assigned.
 590    It is necessary to support making multiple assignments to a shadow
 591    -- specifically, after testing a shadow for definedness, it needs
 592    to be made defined.  But IR's SSA property disallows this.
 593
 594    (2) (more important reason): Therefore, when a shadow needs to get
 595    a new value, a new temporary is created, the value is assigned to
 596    that, and the tmpMap is updated to reflect the new binding.
 597
 598    A corollary is that if the tmpMap maps a given tmp to
 599    INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
 600    there's a read-before-write error in the original tmps.  The IR
 601    sanity checker should catch all such anomalies, however.
 602 */
 603
 604 /* Find the tmp currently shadowing the given original tmp.  If none
 605    so far exists, allocate one.  */
 606 static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
 607 {
 608    tl_assert(orig < mce->n_originalTmps);
 609    if (mce->tmpMap[orig] == IRTemp_INVALID) {
 610       mce->tmpMap[orig]
 611          = newIRTemp(mce->bb->tyenv,
 612                      shadowType(mce->bb->tyenv->types[orig]));
 613    }
 614    return mce->tmpMap[orig];
 615 }
 616
 617 /* Allocate a new shadow for the given original tmp.  This means any
 618    previous shadow is abandoned.  This is needed because it is
 619    necessary to give a new value to a shadow once it has been tested
 620    for undefinedness, but unfortunately IR's SSA property disallows
 621    this.  Instead we must abandon the old shadow, allocate a new one
 622    and use that instead. */
 623 static void newShadowTmp ( MCEnv* mce, IRTemp orig )
 624 {
 625    tl_assert(orig < mce->n_originalTmps);
 626    mce->tmpMap[orig]
 627       = newIRTemp(mce->bb->tyenv,
 628                   shadowType(mce->bb->tyenv->types[orig]));
 629 }
 630
 631
 632 /*------------------------------------------------------------*/
 633 /*--- IRAtoms -- a subset of IRExprs                       ---*/
 634 /*------------------------------------------------------------*/
 635
 636 /* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
 637    isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
 638    input, most of this code deals in atoms.  Usefully, a value atom
 639    always has a V-value which is also an atom: constants are shadowed
 640    by constants, and temps are shadowed by the corresponding shadow
 641    temporary. */
 642
 643 typedef  IRExpr  IRAtom;
 644
 645 /* (used for sanity checks only): is this an atom which looks
 646    like it's from original code? */
 647 static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
 648 {
 649    if (a1->tag == Iex_Const)
 650       return True;
 651    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
 652       return True;
 653    return False;
 654 }
 655
 656 /* (used for sanity checks only): is this an atom which looks
 657    like it's from shadow code? */
 658 static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
 659 {
 660    if (a1->tag == Iex_Const)
 661       return True;
 662    if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
 663       return True;
 664    return False;
 665 }
 666
 667 /* (used for sanity checks only): check that both args are atoms and
 668    are identically-kinded. */
 669 static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
 670 {
 671    if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
 672       return True;
 673    if (a1->tag == Iex_Const && a2->tag == Iex_Const)
 674       return True;
 675    return False;
 676 }
 677
 678
 679 /*------------------------------------------------------------*/
 680 /*--- Type management                                      ---*/
 681 /*------------------------------------------------------------*/
 682
 683 /* Shadow state is always accessed using integer types.  This returns
 684    an integer type with the same size (as per sizeofIRType) as the
 685    given type.  The only valid shadow types are Bit, I8, I16, I32,
 686    I64, V128. */
 687
 688 static IRType shadowType ( IRType ty )
 689 {
 690    switch (ty) {
 691       case Ity_I1:
 692       case Ity_I8:
 693       case Ity_I16:
 694       case Ity_I32:
 695       case Ity_I64:  return ty;
 696       case Ity_F32:  return Ity_I32;
 697       case Ity_F64:  return Ity_I64;
 698       case Ity_V128: return Ity_V128;
 699       default: ppIRType(ty);
 700                VG_(tool_panic)("memcheck:shadowType");
 701    }
 702 }
 703
 704 /* Produce a 'defined' value of the given shadow type.  Should only be
 705    supplied shadow types (Bit/I8/I16/I32/UI64). */
 706 static IRExpr* definedOfType ( IRType ty ) {
 707    switch (ty) {
 708       case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
 709       case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
 710       case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
 711       case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
 712       case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
 713       case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
 714       default:      VG_(tool_panic)("memcheck:definedOfType");
 715    }
 716 }
 717
 718
 719 /*------------------------------------------------------------*/
 720 /*--- Constructing IR fragments                            ---*/
 721 /*------------------------------------------------------------*/
 722
 723 /* assign value to tmp */
 724 #define assign(_bb,_tmp,_expr)   \
 725    addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
 726
 727 /* add stmt to a bb */
 728 #define stmt(_bb,_stmt)    \
 729    addStmtToIRSB((_bb), (_stmt))
 730
 731 /* build various kinds of expressions */
 732 #define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
 733 #define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
 734 #define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
 735 #define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
 736 #define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
 737 #define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
 738 #define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
 739 #define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
 740
 741 /* bind the given expression to a new temporary, and return the
 742    temporary.  This effectively converts an arbitrary expression into
 743    an atom. */
 744 static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
 745    IRTemp t = newIRTemp(mce->bb->tyenv, ty);
 746    assign(mce->bb, t, e);
 747    return mkexpr(t);
 748 }
 749
 750
 751 /*------------------------------------------------------------*/
 752 /*--- Constructing definedness primitive ops               ---*/
 753 /*------------------------------------------------------------*/
 754
 755 /* --------- Defined-if-either-defined --------- */
 756
 757 static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 758    tl_assert(isShadowAtom(mce,a1));
 759    tl_assert(isShadowAtom(mce,a2));
 760    return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
 761 }
 762
 763 static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 764    tl_assert(isShadowAtom(mce,a1));
 765    tl_assert(isShadowAtom(mce,a2));
 766    return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
 767 }
 768
 769 static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 770    tl_assert(isShadowAtom(mce,a1));
 771    tl_assert(isShadowAtom(mce,a2));
 772    return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
 773 }
 774
 775 static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 776    tl_assert(isShadowAtom(mce,a1));
 777    tl_assert(isShadowAtom(mce,a2));
 778    return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
 779 }
 780
 781 static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 782    tl_assert(isShadowAtom(mce,a1));
 783    tl_assert(isShadowAtom(mce,a2));
 784    return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
 785 }
 786
 787 /* --------- Undefined-if-either-undefined --------- */
 788
 789 static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 790    tl_assert(isShadowAtom(mce,a1));
 791    tl_assert(isShadowAtom(mce,a2));
 792    return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
 793 }
 794
 795 static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 796    tl_assert(isShadowAtom(mce,a1));
 797    tl_assert(isShadowAtom(mce,a2));
 798    return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
 799 }
 800
 801 static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 802    tl_assert(isShadowAtom(mce,a1));
 803    tl_assert(isShadowAtom(mce,a2));
 804    return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
 805 }
 806
 807 static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 808    tl_assert(isShadowAtom(mce,a1));
 809    tl_assert(isShadowAtom(mce,a2));
 810    return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
 811 }
 812
 813 static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
 814    tl_assert(isShadowAtom(mce,a1));
 815    tl_assert(isShadowAtom(mce,a2));
 816    return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
 817 }
 818
 819 static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
 820    switch (vty) {
 821       case Ity_I8:   return mkUifU8(mce, a1, a2);
 822       case Ity_I16:  return mkUifU16(mce, a1, a2);
 823       case Ity_I32:  return mkUifU32(mce, a1, a2);
 824       case Ity_I64:  return mkUifU64(mce, a1, a2);
 825       case Ity_V128: return mkUifUV128(mce, a1, a2);
 826       default:
 827          VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
 828          VG_(tool_panic)("memcheck:mkUifU");
 829    }
 830 }
 831
 832 /* --------- The Left-family of operations. --------- */
 833
 834 static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
 835    tl_assert(isShadowAtom(mce,a1));
 836    /* It's safe to duplicate a1 since it's only an atom */
 837    return assignNew(mce, Ity_I8,
 838                     binop(Iop_Or8, a1,
 839                           assignNew(mce, Ity_I8,
 840                                     /* unop(Iop_Neg8, a1)))); */
 841                                     binop(Iop_Sub8, mkU8(0), a1) )));
 842 }
 843
 844 static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
 845    tl_assert(isShadowAtom(mce,a1));
 846    /* It's safe to duplicate a1 since it's only an atom */
 847    return assignNew(mce, Ity_I16,
 848                     binop(Iop_Or16, a1,
 849                           assignNew(mce, Ity_I16,
 850                                     /* unop(Iop_Neg16, a1)))); */
 851                                     binop(Iop_Sub16, mkU16(0), a1) )));
 852 }
 853
 854 static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
 855    tl_assert(isShadowAtom(mce,a1));
 856    /* It's safe to duplicate a1 since it's only an atom */
 857    return assignNew(mce, Ity_I32,
 858                     binop(Iop_Or32, a1,
 859                           assignNew(mce, Ity_I32,
 860                                     /* unop(Iop_Neg32, a1)))); */
 861                                     binop(Iop_Sub32, mkU32(0), a1) )));
 862 }
 863
 864 /* --------- 'Improvement' functions for AND/OR. --------- */
 865
 866 /* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
 867    defined (0); all other -> undefined (1).
 868 */
 869 static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 870 {
 871    tl_assert(isOriginalAtom(mce, data));
 872    tl_assert(isShadowAtom(mce, vbits));
 873    tl_assert(sameKindedAtoms(data, vbits));
 874    return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
 875 }
 876
 877 static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 878 {
 879    tl_assert(isOriginalAtom(mce, data));
 880    tl_assert(isShadowAtom(mce, vbits));
 881    tl_assert(sameKindedAtoms(data, vbits));
 882    return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
 883 }
 884
 885 static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 886 {
 887    tl_assert(isOriginalAtom(mce, data));
 888    tl_assert(isShadowAtom(mce, vbits));
 889    tl_assert(sameKindedAtoms(data, vbits));
 890    return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
 891 }
 892
 893 static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 894 {
 895    tl_assert(isOriginalAtom(mce, data));
 896    tl_assert(isShadowAtom(mce, vbits));
 897    tl_assert(sameKindedAtoms(data, vbits));
 898    return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
 899 }
 900
 901 static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 902 {
 903    tl_assert(isOriginalAtom(mce, data));
 904    tl_assert(isShadowAtom(mce, vbits));
 905    tl_assert(sameKindedAtoms(data, vbits));
 906    return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
 907 }
 908
 909 /* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
 910    defined (0); all other -> undefined (1).
 911 */
 912 static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 913 {
 914    tl_assert(isOriginalAtom(mce, data));
 915    tl_assert(isShadowAtom(mce, vbits));
 916    tl_assert(sameKindedAtoms(data, vbits));
 917    return assignNew(
 918              mce, Ity_I8,
 919              binop(Iop_Or8,
 920                    assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
 921                    vbits) );
 922 }
 923
 924 static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 925 {
 926    tl_assert(isOriginalAtom(mce, data));
 927    tl_assert(isShadowAtom(mce, vbits));
 928    tl_assert(sameKindedAtoms(data, vbits));
 929    return assignNew(
 930              mce, Ity_I16,
 931              binop(Iop_Or16,
 932                    assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
 933                    vbits) );
 934 }
 935
 936 static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 937 {
 938    tl_assert(isOriginalAtom(mce, data));
 939    tl_assert(isShadowAtom(mce, vbits));
 940    tl_assert(sameKindedAtoms(data, vbits));
 941    return assignNew(
 942              mce, Ity_I32,
 943              binop(Iop_Or32,
 944                    assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
 945                    vbits) );
 946 }
 947
 948 static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 949 {
 950    tl_assert(isOriginalAtom(mce, data));
 951    tl_assert(isShadowAtom(mce, vbits));
 952    tl_assert(sameKindedAtoms(data, vbits));
 953    return assignNew(
 954              mce, Ity_I64,
 955              binop(Iop_Or64,
 956                    assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
 957                    vbits) );
 958 }
 959
 960 static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
 961 {
 962    tl_assert(isOriginalAtom(mce, data));
 963    tl_assert(isShadowAtom(mce, vbits));
 964    tl_assert(sameKindedAtoms(data, vbits));
 965    return assignNew(
 966              mce, Ity_V128,
 967              binop(Iop_OrV128,
 968                    assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
 969                    vbits) );
 970 }
 971
 972 /* --------- Pessimising casts. --------- */
 973
 974 static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
 975 {
 976    IRType  ty;
 977    IRAtom* tmp1;
 978    /* Note, dst_ty is a shadow type, not an original type. */
 979    /* First of all, collapse vbits down to a single bit. */
 980    tl_assert(isShadowAtom(mce,vbits));
 981    ty   = typeOfIRExpr(mce->bb->tyenv, vbits);
 982    tmp1 = NULL;
 983    switch (ty) {
 984       case Ity_I1:
 985          tmp1 = vbits;
 986          break;
 987       case Ity_I8:
 988          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
 989          break;
 990       case Ity_I16:
 991          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
 992          break;
 993       case Ity_I32:
 994          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
 995          break;
 996       case Ity_I64:
 997          tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
 998          break;
 999       default:
1000          VG_(tool_panic)("mkPCastTo(1)");
1001    }
1002    tl_assert(tmp1);
1003    /* Now widen up to the dst type. */
1004    switch (dst_ty) {
1005       case Ity_I1:
1006          return tmp1;
1007       case Ity_I8:
1008          return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
1009       case Ity_I16:
1010          return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
1011       case Ity_I32:
1012          return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
1013       case Ity_I64:
1014          return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
1015       case Ity_V128:
1016          tmp1 = assignNew(mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
1017          tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
1018          return tmp1;
1019       default:
1020          ppIRType(dst_ty);
1021          VG_(tool_panic)("mkPCastTo(2)");
1022    }
1023 }
1024
1025
1026 /*------------------------------------------------------------*/
1027 /*--- Emit a test and complaint if something is undefined. ---*/
1028 /*------------------------------------------------------------*/
1029
1030 /* Set the annotations on a dirty helper to indicate that the stack
1031    pointer and instruction pointers might be read.  This is the
1032    behaviour of all 'emit-a-complaint' style functions we might
1033    call. */
1034
1035 static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1036    di->nFxState = 2;
1037    di->fxState[0].fx     = Ifx_Read;
1038    di->fxState[0].offset = mce->layout->offset_SP;
1039    di->fxState[0].size   = mce->layout->sizeof_SP;
1040    di->fxState[1].fx     = Ifx_Read;
1041    di->fxState[1].offset = mce->layout->offset_IP;
1042    di->fxState[1].size   = mce->layout->sizeof_IP;
1043 }
1044
1045
1046 /* Check the supplied **original** atom for undefinedness, and emit a
1047    complaint if so.  Once that happens, mark it as defined.  This is
1048    possible because the atom is either a tmp or literal.  If it's a
1049    tmp, it will be shadowed by a tmp, and so we can set the shadow to
1050    be defined.  In fact as mentioned above, we will have to allocate a
1051    new tmp to carry the new 'defined' shadow value, and update the
1052    original->tmp mapping accordingly; we cannot simply assign a new
1053    value to an existing shadow tmp as this breaks SSAness -- resulting
1054    in the post-instrumentation sanity checker spluttering in disapproval.
1055 */
1056 static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1057 {
1058    IRAtom*  vatom;
1059    IRType   ty;
1060    Int      sz;
1061    IRDirty* di;
1062    IRAtom*  cond;
1063
1064    /* Since the original expression is atomic, there's no duplicated
1065       work generated by making multiple V-expressions for it.  So we
1066       don't really care about the possibility that someone else may
1067       also create a V-interpretion for it. */
1068    tl_assert(isOriginalAtom(mce, atom));
1069    vatom = expr2vbits( mce, atom );
1070    tl_assert(isShadowAtom(mce, vatom));
1071    tl_assert(sameKindedAtoms(atom, vatom));
1072
1073    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1074
1075    /* sz is only used for constructing the error message */
1076    sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1077
1078    cond = mkPCastTo( mce, Ity_I1, vatom );
1079    /* cond will be 0 if all defined, and 1 if any not defined. */
1080
1081    switch (sz) {
1082       case 0:
1083          di = unsafeIRDirty_0_N( 0/*regparms*/,
1084                                  "MC_(helperc_value_check0_fail)",
1085                                  &MC_(helperc_value_check0_fail),
1086                                  mkIRExprVec_0()
1087                                );
1088          break;
1089       case 1:
1090          di = unsafeIRDirty_0_N( 0/*regparms*/,
1091                                  "MC_(helperc_value_check1_fail)",
1092                                  &MC_(helperc_value_check1_fail),
1093                                  mkIRExprVec_0()
1094                                );
1095          break;
1096       case 4:
1097          di = unsafeIRDirty_0_N( 0/*regparms*/,
1098                                  "MC_(helperc_value_check4_fail)",
1099                                  &MC_(helperc_value_check4_fail),
1100                                  mkIRExprVec_0()
1101                                );
1102          break;
1103       default:
1104          di = unsafeIRDirty_0_N( 1/*regparms*/,
1105                                  "MC_(helperc_complain_undef)",
1106                                  &MC_(helperc_complain_undef),
1107                                  mkIRExprVec_1( mkIRExpr_HWord( sz ))
1108                                );
1109          break;
1110    }
1111    di->guard = cond;
1112    setHelperAnns( mce, di );
1113    stmt( mce->bb, IRStmt_Dirty(di));
1114
1115    /* Set the shadow tmp to be defined.  First, update the
1116       orig->shadow tmp mapping to reflect the fact that this shadow is
1117       getting a new value. */
1118    tl_assert(isIRAtom(vatom));
1119    /* sameKindedAtoms ... */
1120    if (vatom->tag == Iex_RdTmp) {
1121       tl_assert(atom->tag == Iex_RdTmp);
1122       newShadowTmp(mce, atom->Iex.RdTmp.tmp);
1123       assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
1124                       definedOfType(ty));
1125    }
1126 }
1127
1128
1129 /*------------------------------------------------------------*/
1130 /*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1131 /*------------------------------------------------------------*/
1132
1133 /* Examine the always-defined sections declared in layout to see if
1134    the (offset,size) section is within one.  Note, is is an error to
1135    partially fall into such a region: (offset,size) should either be
1136    completely in such a region or completely not-in such a region.
1137 */
1138 static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1139 {
1140    Int minoffD, maxoffD, i;
1141    Int minoff = offset;
1142    Int maxoff = minoff + size - 1;
1143    tl_assert((minoff & ~0xFFFF) == 0);
1144    tl_assert((maxoff & ~0xFFFF) == 0);
1145
1146    for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1147       minoffD = mce->layout->alwaysDefd[i].offset;
1148       maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1149       tl_assert((minoffD & ~0xFFFF) == 0);
1150       tl_assert((maxoffD & ~0xFFFF) == 0);
1151
1152       if (maxoff < minoffD || maxoffD < minoff)
1153          continue; /* no overlap */
1154       if (minoff >= minoffD && maxoff <= maxoffD)
1155          return True; /* completely contained in an always-defd section */
1156
1157       VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1158    }
1159    return False; /* could not find any containing section */
1160 }
1161
1162
1163 /* Generate into bb suitable actions to shadow this Put.  If the state
1164    slice is marked 'always defined', do nothing.  Otherwise, write the
1165    supplied V bits to the shadow state.  We can pass in either an
1166    original atom or a V-atom, but not both.  In the former case the
1167    relevant V-bits are then generated from the original.
1168 */
1169 static
1170 void do_shadow_PUT ( MCEnv* mce,  Int offset,
1171                      IRAtom* atom, IRAtom* vatom )
1172 {
1173    IRType ty;
1174    if (atom) {
1175       tl_assert(!vatom);
1176       tl_assert(isOriginalAtom(mce, atom));
1177       vatom = expr2vbits( mce, atom );
1178    } else {
1179       tl_assert(vatom);
1180       tl_assert(isShadowAtom(mce, vatom));
1181    }
1182
1183    ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1184    tl_assert(ty != Ity_I1);
1185    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1186       /* later: no ... */
1187       /* emit code to emit a complaint if any of the vbits are 1. */
1188       /* complainIfUndefined(mce, atom); */
1189    } else {
1190       /* Do a plain shadow Put. */
1191       stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1192    }
1193 }
1194
1195
1196 /* Return an expression which contains the V bits corresponding to the
1197    given GETI (passed in in pieces).
1198 */
1199 static
1200 void do_shadow_PUTI ( MCEnv* mce,
1201                       IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
1202 {
1203    IRAtom* vatom;
1204    IRType  ty, tyS;
1205    Int     arrSize;;
1206
1207    tl_assert(isOriginalAtom(mce,atom));
1208    vatom = expr2vbits( mce, atom );
1209    tl_assert(sameKindedAtoms(atom, vatom));
1210    ty   = descr->elemTy;
1211    tyS  = shadowType(ty);
1212    arrSize = descr->nElems * sizeofIRType(ty);
1213    tl_assert(ty != Ity_I1);
1214    tl_assert(isOriginalAtom(mce,ix));
1215    complainIfUndefined(mce,ix);
1216    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1217       /* later: no ... */
1218       /* emit code to emit a complaint if any of the vbits are 1. */
1219       /* complainIfUndefined(mce, atom); */
1220    } else {
1221       /* Do a cloned version of the Put that refers to the shadow
1222          area. */
1223       IRRegArray* new_descr
1224          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1225                       tyS, descr->nElems);
1226       stmt( mce->bb, IRStmt_PutI( mkIRPutI( new_descr, ix, bias, vatom ) ));
1227    }
1228 }
1229
1230
1231 /* Return an expression which contains the V bits corresponding to the
1232    given GET (passed in in pieces).
1233 */
1234 static
1235 IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1236 {
1237    IRType tyS = shadowType(ty);
1238    tl_assert(ty != Ity_I1);
1239    if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1240       /* Always defined, return all zeroes of the relevant type */
1241       return definedOfType(tyS);
1242    } else {
1243       /* return a cloned version of the Get that refers to the shadow
1244          area. */
1245       return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1246    }
1247 }
1248
1249
1250 /* Return an expression which contains the V bits corresponding to the
1251    given GETI (passed in in pieces).
1252 */
1253 static
1254 IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
1255 {
1256    IRType ty   = descr->elemTy;
1257    IRType tyS  = shadowType(ty);
1258    Int arrSize = descr->nElems * sizeofIRType(ty);
1259    tl_assert(ty != Ity_I1);
1260    tl_assert(isOriginalAtom(mce,ix));
1261    complainIfUndefined(mce,ix);
1262    if (isAlwaysDefd(mce, descr->base, arrSize)) {
1263       /* Always defined, return all zeroes of the relevant type */
1264       return definedOfType(tyS);
1265    } else {
1266       /* return a cloned version of the Get that refers to the shadow
1267          area. */
1268       IRRegArray* new_descr
1269          = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1270                       tyS, descr->nElems);
1271       return IRExpr_GetI( new_descr, ix, bias );
1272    }
1273 }
1274
1275
1276 /*------------------------------------------------------------*/
1277 /*--- Generating approximations for unknown operations,    ---*/
1278 /*--- using lazy-propagate semantics                       ---*/
1279 /*------------------------------------------------------------*/
1280
1281 /* Lazy propagation of undefinedness from two values, resulting in the
1282    specified shadow type.
1283 */
1284 static
1285 IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1286 {
1287    /* force everything via 32-bit intermediaries. */
1288    IRAtom* at;
1289    tl_assert(isShadowAtom(mce,va1));
1290    tl_assert(isShadowAtom(mce,va2));
1291    at = mkPCastTo(mce, Ity_I32, va1);
1292    at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1293    at = mkPCastTo(mce, finalVty, at);
1294    return at;
1295 }
1296
1297
1298 /* Do the lazy propagation game from a null-terminated vector of
1299    atoms.  This is presumably the arguments to a helper call, so the
1300    IRCallee info is also supplied in order that we can know which
1301    arguments should be ignored (via the .mcx_mask field).
1302 */
1303 static
1304 IRAtom* mkLazyN ( MCEnv* mce,
1305                   IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1306 {
1307    Int i;
1308    IRAtom* here;
1309    IRAtom* curr = definedOfType(Ity_I32);
1310    for (i = 0; exprvec[i]; i++) {
1311       tl_assert(i < 32);
1312       tl_assert(isOriginalAtom(mce, exprvec[i]));
1313       /* Only take notice of this arg if the callee's mc-exclusion
1314          mask does not say it is to be excluded. */
1315       if (cee->mcx_mask & (1<<i)) {
1316          /* the arg is to be excluded from definedness checking.  Do
1317             nothing. */
1318          if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1319       } else {
1320          /* calculate the arg's definedness, and pessimistically merge
1321             it in. */
1322          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1323          curr = mkUifU32(mce, here, curr);
1324       }
1325    }
1326    return mkPCastTo(mce, finalVtype, curr );
1327 }
1328
1329
1330 /*------------------------------------------------------------*/
1331 /*--- Generating expensive sequences for exact carry-chain ---*/
1332 /*--- propagation in add/sub and related operations.       ---*/
1333 /*------------------------------------------------------------*/
1334
1335 static
1336 __attribute__((unused))
1337 IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
1338                                      IRAtom* aa,  IRAtom* bb )
1339 {
1340    IRAtom *a_min, *b_min, *a_max, *b_max;
1341    IRType ty;
1342    IROp   opAND, opOR, opXOR, opNOT, opADD;
1343
1344    tl_assert(isShadowAtom(mce,qaa));
1345    tl_assert(isShadowAtom(mce,qbb));
1346    tl_assert(isOriginalAtom(mce,aa));
1347    tl_assert(isOriginalAtom(mce,bb));
1348    tl_assert(sameKindedAtoms(qaa,aa));
1349    tl_assert(sameKindedAtoms(qbb,bb));
1350
1351    ty    = Ity_I32;
1352    opAND = Iop_And32;
1353    opOR  = Iop_Or32;
1354    opXOR = Iop_Xor32;
1355    opNOT = Iop_Not32;
1356    opADD = Iop_Add32;
1357
1358    // a_min = aa & ~qaa
1359    a_min = assignNew(mce,ty,
1360                      binop(opAND, aa,
1361                                   assignNew(mce,ty, unop(opNOT, qaa))));
1362
1363    // b_min = bb & ~qbb
1364    b_min = assignNew(mce,ty,
1365                      binop(opAND, bb,
1366                                   assignNew(mce,ty, unop(opNOT, qbb))));
1367
1368    // a_max = aa | qaa
1369    a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1370
1371    // b_max = bb | qbb
1372    b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1373
1374    // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1375    return
1376    assignNew(mce,ty,
1377       binop( opOR,
1378              assignNew(mce,ty, binop(opOR, qaa, qbb)),
1379              assignNew(mce,ty,
1380                 binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
1381                              assignNew(mce,ty, binop(opADD, a_max, b_max))
1382                 )
1383              )
1384       )
1385    );
1386 }
1387
1388
1389 /*------------------------------------------------------------*/
1390 /*--- Helpers for dealing with vector primops.            ---*/
1391 /*------------------------------------------------------------*/
1392
1393 /* Vector pessimisation -- pessimise within each lane individually. */
1394
1395 static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1396 {
1397    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1398 }
1399
1400 static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1401 {
1402    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1403 }
1404
1405 static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1406 {
1407    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1408 }
1409
1410 static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1411 {
1412    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1413 }
1414
1415 static IRAtom* mkPCast128x1 ( MCEnv* mce, IRAtom* at )
1416 {
1417    return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ128x1, at));
1418 }
1419
1420 /* Here's a simple scheme capable of handling ops derived from SSE1
1421    code and while only generating ops that can be efficiently
1422    implemented in SSE1. */
1423
1424 /* All-lanes versions are straightforward:
1425
1426    binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1427
1428    unary32Fx4(x,y)    ==> PCast32x4(x#)
1429
1430    Lowest-lane-only versions are more complex:
1431
1432    binary32F0x4(x,y)  ==> SetV128lo32(
1433                              x#,
1434                              PCast32(V128to32(UifUV128(x#,y#)))
1435                           )
1436
1437    This is perhaps not so obvious.  In particular, it's faster to
1438    do a V128-bit UifU and then take the bottom 32 bits than the more
1439    obvious scheme of taking the bottom 32 bits of each operand
1440    and doing a 32-bit UifU.  Basically since UifU is fast and
1441    chopping lanes off vector values is slow.
1442
1443    Finally:
1444
1445    unary32F0x4(x)     ==> SetV128lo32(
1446                              x#,
1447                              PCast32(V128to32(x#))
1448                           )
1449
1450    Where:
1451
1452    PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1453    PCast32x4(v#) = CmpNEZ32x4(v#)
1454 */
1455
1456 static
1457 IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1458 {
1459    IRAtom* at;
1460    tl_assert(isShadowAtom(mce, vatomX));
1461    tl_assert(isShadowAtom(mce, vatomY));
1462    at = mkUifUV128(mce, vatomX, vatomY);
1463    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1464    return at;
1465 }
1466
1467 static
1468 IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1469 {
1470    IRAtom* at;
1471    tl_assert(isShadowAtom(mce, vatomX));
1472    at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1473    return at;
1474 }
1475
1476 static
1477 IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1478 {
1479    IRAtom* at;
1480    tl_assert(isShadowAtom(mce, vatomX));
1481    tl_assert(isShadowAtom(mce, vatomY));
1482    at = mkUifUV128(mce, vatomX, vatomY);
1483    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1484    at = mkPCastTo(mce, Ity_I32, at);
1485    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1486    return at;
1487 }
1488
1489 static
1490 IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1491 {
1492    IRAtom* at;
1493    tl_assert(isShadowAtom(mce, vatomX));
1494    at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1495    at = mkPCastTo(mce, Ity_I32, at);
1496    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1497    return at;
1498 }
1499
1500 /* --- ... and ... 64Fx2 versions of the same ... --- */
1501
1502 static
1503 IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1504 {
1505    IRAtom* at;
1506    tl_assert(isShadowAtom(mce, vatomX));
1507    tl_assert(isShadowAtom(mce, vatomY));
1508    at = mkUifUV128(mce, vatomX, vatomY);
1509    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1510    return at;
1511 }
1512
1513 static
1514 IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1515 {
1516    IRAtom* at;
1517    tl_assert(isShadowAtom(mce, vatomX));
1518    at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1519    return at;
1520 }
1521
1522 static
1523 IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1524 {
1525    IRAtom* at;
1526    tl_assert(isShadowAtom(mce, vatomX));
1527    tl_assert(isShadowAtom(mce, vatomY));
1528    at = mkUifUV128(mce, vatomX, vatomY);
1529    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1530    at = mkPCastTo(mce, Ity_I64, at);
1531    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1532    return at;
1533 }
1534
1535 static
1536 IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1537 {
1538    IRAtom* at;
1539    tl_assert(isShadowAtom(mce, vatomX));
1540    at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1541    at = mkPCastTo(mce, Ity_I64, at);
1542    at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1543    return at;
1544 }
1545
1546 /* --- --- Vector saturated narrowing --- --- */
1547
1548 /* This is quite subtle.  What to do is simple:
1549
1550    Let the original narrowing op be QNarrowW{S,U}xN.  Produce:
1551
1552       the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1553
1554    Why this is right is not so simple.  Consider a lane in the args,
1555    vatom1 or 2, doesn't matter.
1556
1557    After the PCast, that lane is all 0s (defined) or all
1558    1s(undefined).
1559
1560    Both signed and unsigned saturating narrowing of all 0s produces
1561    all 0s, which is what we want.
1562
1563    The all-1s case is more complex.  Unsigned narrowing interprets an
1564    all-1s input as the largest unsigned integer, and so produces all
1565    1s as a result since that is the largest unsigned value at the
1566    smaller width.
1567
1568    Signed narrowing interprets all 1s as -1.  Fortunately, -1 narrows
1569    to -1, so we still wind up with all 1s at the smaller width.
1570
1571    So: In short, pessimise the args, then apply the original narrowing
1572    op.
1573 */
1574 static
1575 IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1576                           IRAtom* vatom1, IRAtom* vatom2)
1577 {
1578    IRAtom *at1, *at2, *at3;
1579    IRAtom* (*pcast)( MCEnv*, IRAtom* );
1580    switch (narrow_op) {
1581       case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
1582       case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
1583       case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
1584       default: VG_(tool_panic)("vectorNarrowV128");
1585    }
1586    tl_assert(isShadowAtom(mce,vatom1));
1587    tl_assert(isShadowAtom(mce,vatom2));
1588    at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1589    at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1590    at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1591    return at3;
1592 }
1593
1594
1595 /* --- --- Vector integer arithmetic --- --- */
1596
1597 /* Simple ... UifU the args and per-lane pessimise the results. */
1598 static
1599 IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1600 {
1601    IRAtom* at;
1602    at = mkUifUV128(mce, vatom1, vatom2);
1603    at = mkPCast8x16(mce, at);
1604    return at;
1605 }
1606
1607 static
1608 IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1609 {
1610    IRAtom* at;
1611    at = mkUifUV128(mce, vatom1, vatom2);
1612    at = mkPCast16x8(mce, at);
1613    return at;
1614 }
1615
1616 static
1617 IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1618 {
1619    IRAtom* at;
1620    at = mkUifUV128(mce, vatom1, vatom2);
1621    at = mkPCast32x4(mce, at);
1622    return at;
1623 }
1624
1625 static
1626 IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1627 {
1628    IRAtom* at;
1629    at = mkUifUV128(mce, vatom1, vatom2);
1630    at = mkPCast64x2(mce, at);
1631    return at;
1632 }
1633
1634 static
1635 IRAtom* binary128Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1636 {
1637    IRAtom* at;
1638    at = mkUifUV128(mce, vatom1, vatom2);
1639    at = mkPCast128x1(mce, at);
1640    return at;
1641 }
1642
1643 /*------------------------------------------------------------*/
1644 /*--- Generate shadow values from all kinds of IRExprs.    ---*/
1645 /*------------------------------------------------------------*/
1646
1647 static
1648 IRAtom* expr2vbits_Binop ( MCEnv* mce,
1649                            IROp op,
1650                            IRAtom* atom1, IRAtom* atom2 )
1651 {
1652    IRType  and_or_ty;
1653    IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
1654    IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
1655    IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1656
1657    IRAtom* vatom1 = expr2vbits( mce, atom1 );
1658    IRAtom* vatom2 = expr2vbits( mce, atom2 );
1659
1660    tl_assert(isOriginalAtom(mce,atom1));
1661    tl_assert(isOriginalAtom(mce,atom2));
1662    tl_assert(isShadowAtom(mce,vatom1));
1663    tl_assert(isShadowAtom(mce,vatom2));
1664    tl_assert(sameKindedAtoms(atom1,vatom1));
1665    tl_assert(sameKindedAtoms(atom2,vatom2));
1666    switch (op) {
1667
1668       /* V128-bit SIMD (SSE2-esque) */
1669
1670       case Iop_ShrN16x8:
1671       case Iop_ShrN32x4:
1672       case Iop_ShrN64x2:
1673       case Iop_SarN16x8:
1674       case Iop_SarN32x4:
1675       case Iop_ShlN16x8:
1676       case Iop_ShlN32x4:
1677       case Iop_ShlN64x2:
1678          /* Same scheme as with all other shifts. */
1679          complainIfUndefined(mce, atom2);
1680          return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1681
1682       case Iop_QSub8Ux16:
1683       case Iop_QSub8Sx16:
1684       case Iop_Sub8x16:
1685       case Iop_MulHi8Sx16:
1686       case Iop_MulHi8Ux16:
1687       case Iop_Min8Ux16:
1688       case Iop_Max8Ux16:
1689       case Iop_CmpGT8Sx16:
1690       case Iop_CmpEQ8x16:
1691       case Iop_Avg8Ux16:
1692       case Iop_QAdd8Ux16:
1693       case Iop_QAdd8Sx16:
1694       case Iop_Add8x16:
1695          return binary8Ix16(mce, vatom1, vatom2);
1696
1697       case Iop_QSub16Ux8:
1698       case Iop_QSub16Sx8:
1699       case Iop_Sub16x8:
1700       case Iop_Mul16x8:
1701       case Iop_MulHi16Sx8:
1702       case Iop_MulHi16Ux8:
1703       case Iop_Min16Sx8:
1704       case Iop_Max16Sx8:
1705       case Iop_CmpGT16Sx8:
1706       case Iop_CmpEQ16x8:
1707       case Iop_Avg16Ux8:
1708       case Iop_QAdd16Ux8:
1709       case Iop_QAdd16Sx8:
1710       case Iop_Add16x8:
1711          return binary16Ix8(mce, vatom1, vatom2);
1712
1713       case Iop_Sub32x4:
1714       case Iop_QSub32Sx4:
1715       case Iop_QSub32Ux4:
1716       case Iop_CmpGT32Sx4:
1717       case Iop_CmpEQ32x4:
1718       case Iop_Add32x4:
1719       case Iop_QAdd32Ux4:
1720       case Iop_QAdd32Sx4:
1721          return binary32Ix4(mce, vatom1, vatom2);
1722
1723       case Iop_Sub64x2:
1724       case Iop_QSub64Ux2:
1725       case Iop_QSub64Sx2:
1726       case Iop_Avg64Ux2:
1727       case Iop_Avg64Sx2:
1728       case Iop_Add64x2:
1729       case Iop_QAdd64Ux2:
1730       case Iop_QAdd64Sx2:
1731          return binary64Ix2(mce, vatom1, vatom2);
1732
1733       case Iop_Add128x1:
1734       case Iop_Sub128x1:
1735       case Iop_CmpNEZ128x1:
1736          return binary128Ix1(mce, vatom1, vatom2);
1737
1738       case Iop_QNarrowBin32Sto16Sx8:
1739       case Iop_QNarrowBin16Sto8Sx16:
1740       case Iop_QNarrowBin16Sto8Ux16:
1741          return vectorNarrowV128(mce, op, vatom1, vatom2);
1742
1743       case Iop_Sub64Fx2:
1744       case Iop_Mul64Fx2:
1745       case Iop_Min64Fx2:
1746       case Iop_Max64Fx2:
1747       case Iop_Div64Fx2:
1748       case Iop_CmpLT64Fx2:
1749       case Iop_CmpLE64Fx2:
1750       case Iop_CmpEQ64Fx2:
1751       case Iop_Add64Fx2:
1752          return binary64Fx2(mce, vatom1, vatom2);
1753
1754       case Iop_Sub64F0x2:
1755       case Iop_Mul64F0x2:
1756       case Iop_Min64F0x2:
1757       case Iop_Max64F0x2:
1758       case Iop_Div64F0x2:
1759       case Iop_CmpLT64F0x2:
1760       case Iop_CmpLE64F0x2:
1761       case Iop_CmpEQ64F0x2:
1762       case Iop_Add64F0x2:
1763          return binary64F0x2(mce, vatom1, vatom2);
1764
1765       /* V128-bit SIMD (SSE1-esque) */
1766
1767       case Iop_Sub32Fx4:
1768       case Iop_Mul32Fx4:
1769       case Iop_Min32Fx4:
1770       case Iop_Max32Fx4:
1771       case Iop_Div32Fx4:
1772       case Iop_CmpLT32Fx4:
1773       case Iop_CmpLE32Fx4:
1774       case Iop_CmpEQ32Fx4:
1775       case Iop_Add32Fx4:
1776          return binary32Fx4(mce, vatom1, vatom2);
1777
1778       case Iop_Sub32F0x4:
1779       case Iop_Mul32F0x4:
1780       case Iop_Min32F0x4:
1781       case Iop_Max32F0x4:
1782       case Iop_Div32F0x4:
1783       case Iop_CmpLT32F0x4:
1784       case Iop_CmpLE32F0x4:
1785       case Iop_CmpEQ32F0x4:
1786       case Iop_Add32F0x4:
1787          return binary32F0x4(mce, vatom1, vatom2);
1788
1789       /* V128-bit data-steering */
1790       case Iop_SetV128lo32:
1791       case Iop_SetV128lo64:
1792       case Iop_64HLtoV128:
1793       case Iop_InterleaveLO64x2:
1794       case Iop_InterleaveLO32x4:
1795       case Iop_InterleaveLO16x8:
1796       case Iop_InterleaveLO8x16:
1797       case Iop_InterleaveHI64x2:
1798       case Iop_InterleaveHI32x4:
1799       case Iop_InterleaveHI16x8:
1800       case Iop_InterleaveHI8x16:
1801          return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1802
1803       /* Scalar floating point */
1804
1805          //      case Iop_RoundF64:
1806       case Iop_F64toI64S:
1807       case Iop_I64StoF64:
1808          /* First arg is I32 (rounding mode), second is F64 or I64
1809             (data). */
1810          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1811
1812       case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1813          /* Takes two F64 args. */
1814       case Iop_F64toI32S:
1815       case Iop_F64toF32:
1816          /* First arg is I32 (rounding mode), second is F64 (data). */
1817          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1818
1819       case Iop_F64toI16S:
1820          /* First arg is I32 (rounding mode), second is F64 (data). */
1821          return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1822
1823       case Iop_ScaleF64:
1824       case Iop_Yl2xF64:
1825       case Iop_Yl2xp1F64:
1826       case Iop_PRemF64:
1827       case Iop_AtanF64:
1828       case Iop_AddF64:
1829       case Iop_DivF64:
1830       case Iop_SubF64:
1831       case Iop_MulF64:
1832          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1833
1834       case Iop_CmpF64:
1835          return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1836
1837       /* non-FP after here */
1838
1839       case Iop_DivModU64to32:
1840       case Iop_DivModS64to32:
1841          return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1842
1843       case Iop_16HLto32:
1844          return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1845       case Iop_32HLto64:
1846          return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1847
1848       case Iop_MullS32:
1849       case Iop_MullU32: {
1850          IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1851          IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1852          return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1853       }
1854
1855       case Iop_MullS16:
1856       case Iop_MullU16: {
1857          IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1858          IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1859          return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1860       }
1861
1862       case Iop_MullS8:
1863       case Iop_MullU8: {
1864          IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1865          IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1866          return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1867       }
1868
1869       case Iop_Add32:
1870 #        if 0
1871          return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
1872 #        endif
1873       case Iop_Sub32:
1874       case Iop_Mul32:
1875          return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1876
1877       case Iop_Mul16:
1878       case Iop_Add16:
1879       case Iop_Sub16:
1880          return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1881
1882       case Iop_Sub8:
1883       case Iop_Add8:
1884          return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1885
1886       case Iop_CmpLE32S: case Iop_CmpLE32U:
1887       case Iop_CmpLT32U: case Iop_CmpLT32S:
1888       case Iop_CmpEQ32: case Iop_CmpNE32:
1889          return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1890
1891       case Iop_CmpEQ16: case Iop_CmpNE16:
1892          return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1893
1894       case Iop_CmpEQ8: case Iop_CmpNE8:
1895          return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1896
1897       case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1898          /* Complain if the shift amount is undefined.  Then simply
1899             shift the first arg's V bits by the real shift amount. */
1900          complainIfUndefined(mce, atom2);
1901          return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1902
1903       case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1904          /* Same scheme as with 32-bit shifts. */
1905          complainIfUndefined(mce, atom2);
1906          return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1907
1908       case Iop_Shl8: case Iop_Shr8:
1909          /* Same scheme as with 32-bit shifts. */
1910          complainIfUndefined(mce, atom2);
1911          return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1912
1913       case Iop_Shl64: case Iop_Shr64:
1914          /* Same scheme as with 32-bit shifts. */
1915          complainIfUndefined(mce, atom2);
1916          return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1917
1918       case Iop_AndV128:
1919          uifu = mkUifUV128; difd = mkDifDV128;
1920          and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1921       case Iop_And64:
1922          uifu = mkUifU64; difd = mkDifD64;
1923          and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1924       case Iop_And32:
1925          uifu = mkUifU32; difd = mkDifD32;
1926          and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1927       case Iop_And16:
1928          uifu = mkUifU16; difd = mkDifD16;
1929          and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1930       case Iop_And8:
1931          uifu = mkUifU8; difd = mkDifD8;
1932          and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1933
1934       case Iop_OrV128:
1935          uifu = mkUifUV128; difd = mkDifDV128;
1936          and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1937       case Iop_Or64:
1938          uifu = mkUifU64; difd = mkDifD64;
1939          and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1940       case Iop_Or32:
1941          uifu = mkUifU32; difd = mkDifD32;
1942          and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1943       case Iop_Or16:
1944          uifu = mkUifU16; difd = mkDifD16;
1945          and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1946       case Iop_Or8:
1947          uifu = mkUifU8; difd = mkDifD8;
1948          and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1949
1950       do_And_Or:
1951          return
1952          assignNew(
1953             mce,
1954             and_or_ty,
1955             difd(mce, uifu(mce, vatom1, vatom2),
1956                       difd(mce, improve(mce, atom1, vatom1),
1957                                 improve(mce, atom2, vatom2) ) ) );
1958
1959       case Iop_Xor8:
1960          return mkUifU8(mce, vatom1, vatom2);
1961       case Iop_Xor16:
1962          return mkUifU16(mce, vatom1, vatom2);
1963       case Iop_Xor32:
1964          return mkUifU32(mce, vatom1, vatom2);
1965       case Iop_Xor64:
1966          return mkUifU64(mce, vatom1, vatom2);
1967       case Iop_XorV128:
1968          return mkUifUV128(mce, vatom1, vatom2);
1969
1970       default:
1971          ppIROp(op);
1972          VG_(tool_panic)("memcheck:expr2vbits_Binop");
1973    }
1974 }
1975
1976
1977 static
1978 IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1979 {
1980    IRAtom* vatom = expr2vbits( mce, atom );
1981    tl_assert(isOriginalAtom(mce,atom));
1982    switch (op) {
1983
1984       case Iop_Sqrt64Fx2:
1985          return unary64Fx2(mce, vatom);
1986
1987       case Iop_Sqrt64F0x2:
1988          return unary64F0x2(mce, vatom);
1989
1990       case Iop_Sqrt32Fx4:
1991       case Iop_RecipEst32Fx4:
1992          return unary32Fx4(mce, vatom);
1993
1994       case Iop_Sqrt32F0x4:
1995       case Iop_RSqrtEst32F0x4:
1996       case Iop_RecipEst32F0x4:
1997          return unary32F0x4(mce, vatom);
1998
1999       case Iop_32UtoV128:
2000       case Iop_64UtoV128:
2001          return assignNew(mce, Ity_V128, unop(op, vatom));
2002
2003       case Iop_F32toF64:
2004       case Iop_I32StoF64:
2005       case Iop_NegF64:
2006       case Iop_SinF64:
2007       case Iop_CosF64:
2008       case Iop_TanF64:
2009       case Iop_SqrtF64:
2010       case Iop_AbsF64:
2011       case Iop_2xm1F64:
2012          return mkPCastTo(mce, Ity_I64, vatom);
2013
2014       case Iop_Clz32:
2015       case Iop_Ctz32:
2016          return mkPCastTo(mce, Ity_I32, vatom);
2017
2018       case Iop_32Sto64:
2019       case Iop_32Uto64:
2020       case Iop_V128to64:
2021       case Iop_V128HIto64:
2022          return assignNew(mce, Ity_I64, unop(op, vatom));
2023
2024       case Iop_64to32:
2025       case Iop_64HIto32:
2026       case Iop_1Uto32:
2027       case Iop_8Uto32:
2028       case Iop_16Uto32:
2029       case Iop_16Sto32:
2030       case Iop_8Sto32:
2031          return assignNew(mce, Ity_I32, unop(op, vatom));
2032
2033       case Iop_8Sto16:
2034       case Iop_8Uto16:
2035       case Iop_32to16:
2036       case Iop_32HIto16:
2037          return assignNew(mce, Ity_I16, unop(op, vatom));
2038
2039       case Iop_1Uto8:
2040       case Iop_16to8:
2041       case Iop_32to8:
2042          return assignNew(mce, Ity_I8, unop(op, vatom));
2043
2044       case Iop_32to1:
2045          return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
2046
2047       case Iop_ReinterpF64asI64:
2048       case Iop_ReinterpI64asF64:
2049       case Iop_ReinterpI32asF32:
2050       case Iop_NotV128:
2051       case Iop_Not64:
2052       case Iop_Not32:
2053       case Iop_Not16:
2054       case Iop_Not8:
2055       case Iop_Not1:
2056          return vatom;
2057
2058       default:
2059          ppIROp(op);
2060          VG_(tool_panic)("memcheck:expr2vbits_Unop");
2061    }
2062 }
2063
2064
2065 /* Worker function; do not call directly. */
2066 static
2067 IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2068 {
2069    void*    helper;
2070    HChar*   hname;
2071    IRDirty* di;
2072    IRTemp   datavbits;
2073    IRAtom*  addrAct;
2074
2075    tl_assert(isOriginalAtom(mce,addr));
2076
2077    /* First, emit a definedness test for the address.  This also sets
2078       the address (shadow) to 'defined' following the test. */
2079    complainIfUndefined( mce, addr );
2080
2081    /* Now cook up a call to the relevant helper function, to read the
2082       data V bits from shadow memory. */
2083    ty = shadowType(ty);
2084    switch (ty) {
2085       case Ity_I64: helper = &MC_(helperc_LOADV8);
2086                     hname = "MC_(helperc_LOADV8)";
2087                     break;
2088       case Ity_I32: helper = &MC_(helperc_LOADV4);
2089                     hname = "MC_(helperc_LOADV4)";
2090                     break;
2091       case Ity_I16: helper = &MC_(helperc_LOADV2);
2092                     hname = "MC_(helperc_LOADV2)";
2093                     break;
2094       case Ity_I8:  helper = &MC_(helperc_LOADV1);
2095                     hname = "MC_(helperc_LOADV1)";
2096                     break;
2097       default:      ppIRType(ty);
2098                     VG_(tool_panic)("memcheck:do_shadow_LDle");
2099    }
2100
2101    /* Generate the actual address into addrAct. */
2102    if (bias == 0) {
2103       addrAct = addr;
2104    } else {
2105       IROp    mkAdd;
2106       IRAtom* eBias;
2107       IRType  tyAddr  = mce->hWordTy;
2108       tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2109       mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2110       eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2111       addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2112    }
2113
2114    /* We need to have a place to park the V bits we're just about to
2115       read. */
2116    datavbits = newIRTemp(mce->bb->tyenv, ty);
2117    di = unsafeIRDirty_1_N( datavbits,
2118                            1/*regparms*/, hname, helper,
2119                            mkIRExprVec_1( addrAct ));
2120    setHelperAnns( mce, di );
2121    stmt( mce->bb, IRStmt_Dirty(di) );
2122
2123    return mkexpr(datavbits);
2124 }
2125
2126
2127 static
2128 IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2129 {
2130    IRAtom *v64hi, *v64lo;
2131    switch (shadowType(ty)) {
2132       case Ity_I8:
2133       case Ity_I16:
2134       case Ity_I32:
2135       case Ity_I64:
2136          return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2137       case Ity_V128:
2138          v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2139          v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2140          return assignNew( mce,
2141                            Ity_V128,
2142                            binop(Iop_64HLtoV128, v64hi, v64lo));
2143       default:
2144          VG_(tool_panic)("expr2vbits_LDle");
2145    }
2146 }
2147
2148
2149 static
2150 IRAtom* expr2vbits_ITE ( MCEnv* mce,
2151                          IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
2152 {
2153    IRAtom *vbitsC, *vbits0, *vbits1;
2154    IRType ty;
2155    /* Given ITE(cond,iftrue,iffalse), generate
2156          ITE(cond,iftrue#,iffalse#) `UifU` PCast(cond#)
2157       That is, steer the V bits like the originals, but trash the
2158       result if the steering value is undefined.  This gives
2159       lazy propagation. */
2160    tl_assert(isOriginalAtom(mce, cond));
2161    tl_assert(isOriginalAtom(mce, iftrue));
2162    tl_assert(isOriginalAtom(mce, iffalse));
2163
2164    vbitsC = expr2vbits(mce, cond);
2165    vbits0 = expr2vbits(mce, iffalse);
2166    vbits1 = expr2vbits(mce, iftrue);
2167    ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2168
2169    return
2170       mkUifU(mce, ty, assignNew(mce, ty, IRExpr_ITE(cond, vbits1, vbits0)),
2171                       mkPCastTo(mce, ty, vbitsC) );
2172 }
2173
2174 /* --------- This is the main expression-handling function. --------- */
2175
2176 static
2177 IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2178 {
2179    switch (e->tag) {
2180
2181       case Iex_Get:
2182          return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2183
2184       case Iex_GetI:
2185          return shadow_GETI( mce, e->Iex.GetI.descr,
2186                                   e->Iex.GetI.ix, e->Iex.GetI.bias );
2187
2188       case Iex_RdTmp:
2189          return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
2190
2191       case Iex_Const:
2192          return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2193
2194       case Iex_Binop:
2195          return expr2vbits_Binop(
2196                    mce,
2197                    e->Iex.Binop.op,
2198                    e->Iex.Binop.arg1, e->Iex.Binop.arg2
2199                 );
2200
2201       case Iex_Unop:
2202          return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2203
2204       case Iex_Load:
2205          return expr2vbits_LDle( mce, e->Iex.Load.ty,
2206                                       e->Iex.Load.addr, 0/*addr bias*/ );
2207
2208       case Iex_CCall:
2209          return mkLazyN( mce, e->Iex.CCall.args,
2210                               e->Iex.CCall.retty,
2211                               e->Iex.CCall.cee );
2212
2213       case Iex_ITE:
2214          return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
2215                                 e->Iex.ITE.iffalse);
2216
2217       default:
2218          VG_(printf)("\n");
2219          ppIRExpr(e);
2220          VG_(printf)("\n");
2221          VG_(tool_panic)("memcheck: expr2vbits");
2222    }
2223 }
2224
2225 /*------------------------------------------------------------*/
2226 /*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
2227 /*------------------------------------------------------------*/
2228
2229 /* Widen a value to the host word size. */
2230
2231 static
2232 IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2233 {
2234    IRType ty, tyH;
2235
2236    /* vatom is vbits-value and as such can only have a shadow type. */
2237    tl_assert(isShadowAtom(mce,vatom));
2238
2239    ty  = typeOfIRExpr(mce->bb->tyenv, vatom);
2240    tyH = mce->hWordTy;
2241
2242    if (tyH == Ity_I32) {
2243       switch (ty) {
2244          case Ity_I32: return vatom;
2245          case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2246          case Ity_I8:  return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2247          default:      goto unhandled;
2248       }
2249    } else {
2250       goto unhandled;
2251    }
2252   unhandled:
2253    VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2254    VG_(tool_panic)("zwidenToHostWord");
2255 }
2256
2257
2258 /* Generate a shadow store.  addr is always the original address atom.
2259    You can pass in either originals or V-bits for the data atom, but
2260    obviously not both.  */
2261
2262 static
2263 void do_shadow_STle ( MCEnv* mce,
2264                       IRAtom* addr, UInt bias,
2265                       IRAtom* data, IRAtom* vdata )
2266 {
2267    IROp     mkAdd;
2268    IRType   ty, tyAddr;
2269    IRDirty  *di, *diLo64, *diHi64;
2270    IRAtom   *addrAct, *addrLo64, *addrHi64;
2271    IRAtom   *vdataLo64, *vdataHi64;
2272    IRAtom   *eBias, *eBias0, *eBias8;
2273    void*    helper = NULL;
2274    HChar*   hname = NULL;
2275
2276    tyAddr = mce->hWordTy;
2277    mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2278    tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2279
2280    di = diLo64 = diHi64 = NULL;
2281    eBias = eBias0 = eBias8 = NULL;
2282    addrAct = addrLo64 = addrHi64 = NULL;
2283    vdataLo64 = vdataHi64 = NULL;
2284
2285    if (data) {
2286       tl_assert(!vdata);
2287       tl_assert(isOriginalAtom(mce, data));
2288       tl_assert(bias == 0);
2289       vdata = expr2vbits( mce, data );
2290    } else {
2291       tl_assert(vdata);
2292    }
2293
2294    tl_assert(isOriginalAtom(mce,addr));
2295    tl_assert(isShadowAtom(mce,vdata));
2296
2297    ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2298
2299    /* First, emit a definedness test for the address.  This also sets
2300       the address (shadow) to 'defined' following the test. */
2301    complainIfUndefined( mce, addr );
2302
2303    /* Now decide which helper function to call to write the data V
2304       bits into shadow memory. */
2305    switch (ty) {
2306       case Ity_V128: /* we'll use the helper twice */
2307       case Ity_I64: helper = &MC_(helperc_STOREV8);
2308                     hname = "MC_(helperc_STOREV8)";
2309                     break;
2310       case Ity_I32: helper = &MC_(helperc_STOREV4);
2311                     hname = "MC_(helperc_STOREV4)";
2312                     break;
2313       case Ity_I16: helper = &MC_(helperc_STOREV2);
2314                     hname = "MC_(helperc_STOREV2)";
2315                     break;
2316       case Ity_I8:  helper = &MC_(helperc_STOREV1);
2317                     hname = "MC_(helperc_STOREV1)";
2318                     break;
2319       default:      VG_(tool_panic)("memcheck:do_shadow_STle");
2320    }
2321
2322    if (ty == Ity_V128) {
2323
2324       /* V128-bit case */
2325       /* See comment in next clause re 64-bit regparms */
2326       eBias0    = tyAddr==Ity_I32 ? mkU32(bias)   : mkU64(bias);
2327       addrLo64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2328       vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2329       diLo64    = unsafeIRDirty_0_N(
2330                      1/*regparms*/, hname, helper,
2331                      mkIRExprVec_2( addrLo64, vdataLo64 ));
2332
2333       eBias8    = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2334       addrHi64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2335       vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2336       diHi64    = unsafeIRDirty_0_N(
2337                      1/*regparms*/, hname, helper,
2338                      mkIRExprVec_2( addrHi64, vdataHi64 ));
2339
2340       setHelperAnns( mce, diLo64 );
2341       setHelperAnns( mce, diHi64 );
2342       stmt( mce->bb, IRStmt_Dirty(diLo64) );
2343       stmt( mce->bb, IRStmt_Dirty(diHi64) );
2344
2345    } else {
2346
2347       /* 8/16/32/64-bit cases */
2348       /* Generate the actual address into addrAct. */
2349       if (bias == 0) {
2350          addrAct = addr;
2351       } else {
2352          eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2353          addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2354       }
2355
2356       if (ty == Ity_I64) {
2357          /* We can't do this with regparm 2 on 32-bit platforms, since
2358             the back ends aren't clever enough to handle 64-bit
2359             regparm args.  Therefore be different. */
2360          di = unsafeIRDirty_0_N(
2361                  1/*regparms*/, hname, helper,
2362                  mkIRExprVec_2( addrAct, vdata ));
2363       } else {
2364          di = unsafeIRDirty_0_N(
2365                  2/*regparms*/, hname, helper,
2366                  mkIRExprVec_2( addrAct,
2367                                 zwidenToHostWord( mce, vdata )));
2368       }
2369       setHelperAnns( mce, di );
2370       stmt( mce->bb, IRStmt_Dirty(di) );
2371    }
2372
2373 }
2374
2375
2376 /* Do lazy pessimistic propagation through a dirty helper call, by
2377    looking at the annotations on it.  This is the most complex part of
2378    Memcheck. */
2379
2380 static IRType szToITy ( Int n )
2381 {
2382    switch (n) {
2383       case 1: return Ity_I8;
2384       case 2: return Ity_I16;
2385       case 4: return Ity_I32;
2386       case 8: return Ity_I64;
2387       default: VG_(tool_panic)("szToITy(memcheck)");
2388    }
2389 }
2390
2391 static
2392 void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2393 {
2394    Int     i, n, offset, toDo, gSz, gOff;
2395    IRAtom  *src, *here, *curr;
2396    IRType  tyAddr, tySrc, tyDst;
2397    IRTemp  dst;
2398
2399    /* First check the guard. */
2400    complainIfUndefined(mce, d->guard);
2401
2402    /* Now round up all inputs and PCast over them. */
2403    curr = definedOfType(Ity_I32);
2404
2405    /* Inputs: unmasked args */
2406    for (i = 0; d->args[i]; i++) {
2407       if (d->cee->mcx_mask & (1<<i)) {
2408          /* ignore this arg */
2409       } else {
2410          here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2411          curr = mkUifU32(mce, here, curr);
2412       }
2413    }
2414
2415    /* Inputs: guest state that we read. */
2416    for (i = 0; i < d->nFxState; i++) {
2417       tl_assert(d->fxState[i].fx != Ifx_None);
2418       if (d->fxState[i].fx == Ifx_Write)
2419          continue;
2420
2421       /* Ignore any sections marked as 'always defined'. */
2422       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2423          if (0)
2424          VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2425                      d->fxState[i].offset, d->fxState[i].size );
2426          continue;
2427       }
2428
2429       /* This state element is read or modified.  So we need to
2430          consider it.  If larger than 8 bytes, deal with it in 8-byte
2431          chunks. */
2432       gSz  = d->fxState[i].size;
2433       gOff = d->fxState[i].offset;
2434       tl_assert(gSz > 0);
2435       while (True) {
2436          if (gSz == 0) break;
2437          n = gSz <= 8 ? gSz : 8;
2438          /* update 'curr' with UifU of the state slice
2439             gOff .. gOff+n-1 */
2440          tySrc = szToITy( n );
2441          src   = assignNew( mce, tySrc,
2442                             shadow_GET(mce, gOff, tySrc ) );
2443          here = mkPCastTo( mce, Ity_I32, src );
2444          curr = mkUifU32(mce, here, curr);
2445          gSz -= n;
2446          gOff += n;
2447       }
2448
2449    }
2450
2451    /* Inputs: memory.  First set up some info needed regardless of
2452       whether we're doing reads or writes. */
2453    tyAddr = Ity_INVALID;
2454
2455    if (d->mFx != Ifx_None) {
2456       /* Because we may do multiple shadow loads/stores from the same
2457          base address, it's best to do a single test of its
2458          definedness right now.  Post-instrumentation optimisation
2459          should remove all but this test. */
2460       tl_assert(d->mAddr);
2461       complainIfUndefined(mce, d->mAddr);
2462
2463       tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2464       tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2465       tl_assert(tyAddr == mce->hWordTy); /* not really right */
2466    }
2467
2468    /* Deal with memory inputs (reads or modifies) */
2469    if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2470       offset = 0;
2471       toDo   = d->mSize;
2472       /* chew off 32-bit chunks */
2473       while (toDo >= 4) {
2474          here = mkPCastTo(
2475                    mce, Ity_I32,
2476                    expr2vbits_LDle ( mce, Ity_I32,
2477                                      d->mAddr, d->mSize - toDo )
2478                 );
2479          curr = mkUifU32(mce, here, curr);
2480          toDo -= 4;
2481       }
2482       /* chew off 16-bit chunks */
2483       while (toDo >= 2) {
2484          here = mkPCastTo(
2485                    mce, Ity_I32,
2486                    expr2vbits_LDle ( mce, Ity_I16,
2487                                      d->mAddr, d->mSize - toDo )
2488                 );
2489          curr = mkUifU32(mce, here, curr);
2490          toDo -= 2;
2491       }
2492       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2493    }
2494
2495    /* Whew!  So curr is a 32-bit V-value summarising pessimistically
2496       all the inputs to the helper.  Now we need to re-distribute the
2497       results to all destinations. */
2498
2499    /* Outputs: the destination temporary, if there is one. */
2500    if (d->tmp != IRTemp_INVALID) {
2501       dst   = findShadowTmp(mce, d->tmp);
2502       tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2503       assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2504    }
2505
2506    /* Outputs: guest state that we write or modify. */
2507    for (i = 0; i < d->nFxState; i++) {
2508       tl_assert(d->fxState[i].fx != Ifx_None);
2509       if (d->fxState[i].fx == Ifx_Read)
2510          continue;
2511       /* Ignore any sections marked as 'always defined'. */
2512       if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2513          continue;
2514       /* This state element is written or modified.  So we need to
2515          consider it.  If larger than 8 bytes, deal with it in 8-byte
2516          chunks. */
2517       gSz  = d->fxState[i].size;
2518       gOff = d->fxState[i].offset;
2519       tl_assert(gSz > 0);
2520       while (True) {
2521          if (gSz == 0) break;
2522          n = gSz <= 8 ? gSz : 8;
2523          /* Write suitably-casted 'curr' to the state slice
2524             gOff .. gOff+n-1 */
2525          tyDst = szToITy( n );
2526          do_shadow_PUT( mce, gOff,
2527                              NULL, /* original atom */
2528                              mkPCastTo( mce, tyDst, curr ) );
2529          gSz -= n;
2530          gOff += n;
2531       }
2532    }
2533
2534    /* Outputs: memory that we write or modify. */
2535    if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2536       offset = 0;
2537       toDo   = d->mSize;
2538       /* chew off 32-bit chunks */
2539       while (toDo >= 4) {
2540          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2541                          NULL, /* original data */
2542                          mkPCastTo( mce, Ity_I32, curr ) );
2543          toDo -= 4;
2544       }
2545       /* chew off 16-bit chunks */
2546       while (toDo >= 2) {
2547          do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2548                          NULL, /* original data */
2549                          mkPCastTo( mce, Ity_I16, curr ) );
2550          toDo -= 2;
2551       }
2552       tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2553    }
2554
2555 }
2556
2557
2558 /*------------------------------------------------------------*/
2559 /*--- Memcheck main                                        ---*/
2560 /*------------------------------------------------------------*/
2561
2562 static Bool isBogusAtom ( IRAtom* at )
2563 {
2564    ULong n = 0;
2565    IRConst* con;
2566    tl_assert(isIRAtom(at));
2567    if (at->tag == Iex_RdTmp)
2568       return False;
2569    tl_assert(at->tag == Iex_Const);
2570    con = at->Iex.Const.con;
2571    switch (con->tag) {
2572       case Ico_U8:  n = (ULong)con->Ico.U8; break;
2573       case Ico_U16: n = (ULong)con->Ico.U16; break;
2574       case Ico_U32: n = (ULong)con->Ico.U32; break;
2575       case Ico_U64: n = (ULong)con->Ico.U64; break;
2576       default: ppIRExpr(at); tl_assert(0);
2577    }
2578    /* VG_(printf)("%llx\n", n); */
2579    return (n == 0xFEFEFEFF
2580            || n == 0x80808080
2581            || n == 0x1010101
2582            || n == 1010100);
2583 }
2584
2585 __attribute__((unused))
2586 static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2587 {
2588    Int     i;
2589    IRExpr* e;
2590    switch (st->tag) {
2591       case Ist_WrTmp:
2592          e = st->Ist.WrTmp.data;
2593          switch (e->tag) {
2594             case Iex_Get:
2595             case Iex_RdTmp:
2596                return False;
2597             case Iex_Unop:
2598                return isBogusAtom(e->Iex.Unop.arg);
2599             case Iex_Binop:
2600                return isBogusAtom(e->Iex.Binop.arg1)
2601                       || isBogusAtom(e->Iex.Binop.arg2);
2602             case Iex_ITE:
2603                return isBogusAtom(e->Iex.ITE.cond)
2604                       || isBogusAtom(e->Iex.ITE.iftrue)
2605                       || isBogusAtom(e->Iex.ITE.iffalse);
2606             case Iex_Load:
2607                return isBogusAtom(e->Iex.Load.addr);
2608             case Iex_CCall:
2609                for (i = 0; e->Iex.CCall.args[i]; i++)
2610                   if (isBogusAtom(e->Iex.CCall.args[i]))
2611                      return True;
2612                return False;
2613             default:
2614                goto unhandled;
2615          }
2616       case Ist_Put:
2617          return isBogusAtom(st->Ist.Put.data);
2618       case Ist_Store:
2619          return isBogusAtom(st->Ist.Store.addr)
2620                 || isBogusAtom(st->Ist.Store.data);
2621       case Ist_Exit:
2622          return isBogusAtom(st->Ist.Exit.guard);
2623       default:
2624       unhandled:
2625          ppIRStmt(st);
2626          VG_(tool_panic)("hasBogusLiterals");
2627    }
2628 }
2629
2630 IRSB* mc_instrument ( void* closureV,
2631                       IRSB* bb_in, VexGuestLayout* layout,
2632                       VexGuestExtents* vge,
2633                       IRType gWordTy, IRType hWordTy )
2634 {
2635    Bool verboze = False; //True;
2636
2637    /* Bool hasBogusLiterals = False; */
2638
2639    Int i, j, first_stmt;
2640    IRStmt* st;
2641    MCEnv mce;
2642
2643    /* Set up BB */
2644    IRSB* bb     = emptyIRSB();
2645    bb->tyenv    = deepCopyIRTypeEnv(bb_in->tyenv);
2646    bb->next     = deepCopyIRExpr(bb_in->next);
2647    bb->jumpkind = bb_in->jumpkind;
2648
2649    /* Set up the running environment.  Only .bb is modified as we go
2650       along. */
2651    mce.bb             = bb;
2652    mce.layout         = layout;
2653    mce.n_originalTmps = bb->tyenv->types_used;
2654    mce.hWordTy        = hWordTy;
2655    mce.tmpMap         = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2656    for (i = 0; i < mce.n_originalTmps; i++)
2657       mce.tmpMap[i] = IRTemp_INVALID;
2658
2659    /* Iterate over the stmts. */
2660
2661    for (i = 0; i <  bb_in->stmts_used; i++) {
2662       st = bb_in->stmts[i];
2663       if (!st) continue;
2664
2665       tl_assert(isFlatIRStmt(st));
2666
2667       /*
2668       if (!hasBogusLiterals) {
2669          hasBogusLiterals = checkForBogusLiterals(st);
2670          if (hasBogusLiterals) {
2671             VG_(printf)("bogus: ");
2672             ppIRStmt(st);
2673             VG_(printf)("\n");
2674          }
2675       }
2676       */
2677       first_stmt = bb->stmts_used;
2678
2679       if (verboze) {
2680          ppIRStmt(st);
2681          VG_(printf)("\n\n");
2682       }
2683
2684       switch (st->tag) {
2685
2686          case Ist_WrTmp:
2687             assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
2688                         expr2vbits( &mce, st->Ist.WrTmp.data) );
2689             break;
2690
2691          case Ist_Put:
2692             do_shadow_PUT( &mce,
2693                            st->Ist.Put.offset,
2694                            st->Ist.Put.data,
2695                            NULL /* shadow atom */ );
2696             break;
2697
2698          case Ist_PutI:
2699             do_shadow_PUTI( &mce,
2700                             st->Ist.PutI.details->descr,
2701                             st->Ist.PutI.details->ix,
2702                             st->Ist.PutI.details->bias,
2703                             st->Ist.PutI.details->data );
2704             break;
2705
2706          case Ist_Store:
2707             do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
2708                                   st->Ist.Store.data,
2709                                   NULL /* shadow data */ );
2710             break;
2711
2712          case Ist_Exit:
2713             /* if (!hasBogusLiterals) */
2714                complainIfUndefined( &mce, st->Ist.Exit.guard );
2715             break;
2716
2717          case Ist_Dirty:
2718             do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2719             break;
2720
2721          case Ist_IMark:
2722          case Ist_NoOp:
2723             break;
2724
2725          default:
2726             VG_(printf)("\n");
2727             ppIRStmt(st);
2728             VG_(printf)("\n");
2729             VG_(tool_panic)("memcheck: unhandled IRStmt");
2730
2731       } /* switch (st->tag) */
2732
2733       if (verboze) {
2734          for (j = first_stmt; j < bb->stmts_used; j++) {
2735             VG_(printf)("   ");
2736             ppIRStmt(bb->stmts[j]);
2737             VG_(printf)("\n");
2738          }
2739          VG_(printf)("\n");
2740       }
2741
2742       addStmtToIRSB(bb, st);
2743
2744    }
2745
2746    /* Now we need to complain if the jump target is undefined. */
2747    first_stmt = bb->stmts_used;
2748
2749    if (verboze) {
2750       VG_(printf)("bb->next = ");
2751       ppIRExpr(bb->next);
2752       VG_(printf)("\n\n");
2753    }
2754
2755    complainIfUndefined( &mce, bb->next );
2756
2757    if (verboze) {
2758       for (j = first_stmt; j < bb->stmts_used; j++) {
2759          VG_(printf)("   ");
2760          ppIRStmt(bb->stmts[j]);
2761          VG_(printf)("\n");
2762       }
2763       VG_(printf)("\n");
2764    }
2765
2766    return bb;
2767 }
2768 #endif /* UNUSED */
2769
2770 /*--------------------------------------------------------------------*/
2771 /*--- end                                              test_main.c ---*/
2772 /*--------------------------------------------------------------------*/