coregrind/m_scheduler/scheduler.c

   1
   2 /*--------------------------------------------------------------------*/
   3 /*--- Thread scheduling.                               scheduler.c ---*/
   4 /*--------------------------------------------------------------------*/
   5
   6 /*
   7    This file is part of Valgrind, a dynamic binary instrumentation
   8    framework.
   9
  10    Copyright (C) 2000-2017 Julian Seward
  11       jseward@acm.org
  12
  13    This program is free software; you can redistribute it and/or
  14    modify it under the terms of the GNU General Public License as
  15    published by the Free Software Foundation; either version 2 of the
  16    License, or (at your option) any later version.
  17
  18    This program is distributed in the hope that it will be useful, but
  19    WITHOUT ANY WARRANTY; without even the implied warranty of
  20    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  21    General Public License for more details.
  22
  23    You should have received a copy of the GNU General Public License
  24    along with this program; if not, see <http://www.gnu.org/licenses/>.
  25
  26    The GNU General Public License is contained in the file COPYING.
  27 */
  28
  29 /*
  30    Overview
  31
  32    Valgrind tries to emulate the kernel's threading as closely as
  33    possible.  The client does all threading via the normal syscalls
  34    (on Linux: clone, etc).  Valgrind emulates this by creating exactly
  35    the same process structure as would be created without Valgrind.
  36    There are no extra threads.
  37
  38    The main difference is that Valgrind only allows one client thread
  39    to run at once.  This is controlled with the CPU Big Lock,
  40    "the_BigLock".  Any time a thread wants to run client code or
  41    manipulate any shared state (which is anything other than its own
  42    ThreadState entry), it must hold the_BigLock.
  43
  44    When a thread is about to block in a blocking syscall, it releases
  45    the_BigLock, and re-takes it when it becomes runnable again (either
  46    because the syscall finished, or we took a signal).
  47
  48    VG_(scheduler) therefore runs in each thread.  It returns only when
  49    the thread is exiting, either because it exited itself, or it was
  50    told to exit by another thread.
  51
  52    This file is almost entirely OS-independent.  The details of how
  53    the OS handles threading and signalling are abstracted away and
  54    implemented elsewhere.  [Some of the functions have worked their
  55    way back for the moment, until we do an OS port in earnest...]
  56 */
  57
  58
  59 #include "pub_core_basics.h"
  60 #include "pub_core_debuglog.h"
  61 #include "pub_core_vki.h"
  62 #include "pub_core_vkiscnums.h"  // __NR_sched_yield
  63 #include "pub_core_threadstate.h"
  64 #include "pub_core_clientstate.h"
  65 #include "pub_core_aspacemgr.h"
  66 #include "pub_core_clreq.h"      // for VG_USERREQ__*
  67 #include "pub_core_dispatch.h"
  68 #include "pub_core_errormgr.h"   // For VG_(get_n_errs_found)()
  69 #include "pub_core_gdbserver.h"  // for VG_(gdbserver)/VG_(gdbserver_activity)
  70 #include "pub_core_libcbase.h"
  71 #include "pub_core_libcassert.h"
  72 #include "pub_core_libcprint.h"
  73 #include "pub_core_libcproc.h"
  74 #include "pub_core_libcsignal.h"
  75 #if defined(VGO_darwin)
  76 #include "pub_core_mach.h"
  77 #endif
  78 #include "pub_core_machine.h"
  79 #include "pub_core_mallocfree.h"
  80 #include "pub_core_options.h"
  81 #include "pub_core_replacemalloc.h"
  82 #include "pub_core_sbprofile.h"
  83 #include "pub_core_signals.h"
  84 #include "pub_core_stacks.h"
  85 #include "pub_core_stacktrace.h"    // For VG_(get_and_pp_StackTrace)()
  86 #include "pub_core_syscall.h"
  87 #include "pub_core_syswrap.h"
  88 #include "pub_core_tooliface.h"
  89 #include "pub_core_translate.h"     // For VG_(translate)()
  90 #include "pub_core_transtab.h"
  91 #include "pub_core_debuginfo.h"     // VG_(di_notify_pdb_debuginfo)
  92 #include "priv_sched-lock.h"
  93 #include "pub_core_scheduler.h"     // self
  94 #include "pub_core_redir.h"
  95 #include "libvex_emnote.h"          // VexEmNote
  96
  97
  98 /* ---------------------------------------------------------------------
  99    Types and globals for the scheduler.
 100    ------------------------------------------------------------------ */
 101
 102 /* ThreadId and ThreadState are defined elsewhere*/
 103
 104 /* Defines the thread-scheduling timeslice, in terms of the number of
 105    basic blocks we attempt to run each thread for.  Smaller values
 106    give finer interleaving but much increased scheduling overheads. */
 107 #define SCHEDULING_QUANTUM   100000
 108
 109 /* If False, a fault is Valgrind-internal (ie, a bug) */
 110 Bool VG_(in_generated_code) = False;
 111
 112 /* 64-bit counter for the number of basic blocks done. */
 113 static ULong bbs_done = 0;
 114
 115 /* Counter to see if vgdb activity is to be verified.
 116    When nr of bbs done reaches vgdb_next_poll, scheduler will
 117    poll for gdbserver activity. VG_(force_vgdb_poll) and
 118    VG_(disable_vgdb_poll) allows the valgrind core (e.g. m_gdbserver)
 119    to control when the next poll will be done. */
 120 static ULong vgdb_next_poll;
 121
 122 /* Forwards */
 123 static void do_client_request ( ThreadId tid );
 124 static void scheduler_sanity ( ThreadId tid );
 125 static void mostly_clear_thread_record ( ThreadId tid );
 126
 127 /* Stats. */
 128 static ULong n_scheduling_events_MINOR = 0;
 129 static ULong n_scheduling_events_MAJOR = 0;
 130
 131 /* Stats: number of XIndirs looked up in the fast cache, the number of hits in
 132    ways 1, 2 and 3, and the number of misses.  The number of hits in way 0 isn't
 133    recorded because it can be computed from these five numbers. */
 134 static ULong stats__n_xIndirs = 0;
 135 static ULong stats__n_xIndir_hits1 = 0;
 136 static ULong stats__n_xIndir_hits2 = 0;
 137 static ULong stats__n_xIndir_hits3 = 0;
 138 static ULong stats__n_xIndir_misses = 0;
 139
 140 /* And 32-bit temp bins for the above, so that 32-bit platforms don't
 141    have to do 64 bit incs on the hot path through
 142    VG_(disp_cp_xindir). */
 143 /*global*/ UInt VG_(stats__n_xIndirs_32) = 0;
 144 /*global*/ UInt VG_(stats__n_xIndir_hits1_32) = 0;
 145 /*global*/ UInt VG_(stats__n_xIndir_hits2_32) = 0;
 146 /*global*/ UInt VG_(stats__n_xIndir_hits3_32) = 0;
 147 /*global*/ UInt VG_(stats__n_xIndir_misses_32) = 0;
 148
 149 /* Sanity checking counts. */
 150 static UInt sanity_fast_count = 0;
 151 static UInt sanity_slow_count = 0;
 152
 153 void VG_(print_scheduler_stats)(void)
 154 {
 155    VG_(message)(Vg_DebugMsg,
 156       "scheduler: %'llu event checks.\n", bbs_done );
 157
 158    const ULong hits0
 159       = stats__n_xIndirs - stats__n_xIndir_hits1 - stats__n_xIndir_hits2
 160         - stats__n_xIndir_hits3 - stats__n_xIndir_misses;
 161    VG_(message)(Vg_DebugMsg,
 162                 "scheduler: %'llu indir transfers, "
 163                 "%'llu misses (1 in %llu) ..\n",
 164                 stats__n_xIndirs, stats__n_xIndir_misses,
 165                 stats__n_xIndirs / (stats__n_xIndir_misses
 166                                    ? stats__n_xIndir_misses : 1));
 167    VG_(message)(Vg_DebugMsg,
 168                 "scheduler: .. of which: %'llu hit0, %'llu hit1, "
 169                 "%'llu hit2, %'llu hit3, %'llu missed\n",
 170                 hits0,
 171                 stats__n_xIndir_hits1,
 172                 stats__n_xIndir_hits2,
 173                 stats__n_xIndir_hits3,
 174                 stats__n_xIndir_misses);
 175
 176    VG_(message)(Vg_DebugMsg,
 177       "scheduler: %'llu/%'llu major/minor sched events.\n",
 178       n_scheduling_events_MAJOR, n_scheduling_events_MINOR);
 179    VG_(message)(Vg_DebugMsg,
 180                 "   sanity: %u cheap, %u expensive checks.\n",
 181                 sanity_fast_count, sanity_slow_count );
 182 }
 183
 184 /*
 185  * Mutual exclusion object used to serialize threads.
 186  */
 187 static struct sched_lock *the_BigLock;
 188
 189
 190 /* ---------------------------------------------------------------------
 191    Helper functions for the scheduler.
 192    ------------------------------------------------------------------ */
 193
 194 static void maybe_progress_report ( UInt reporting_interval_seconds )
 195 {
 196    /* This is when the next report is due, in user cpu milliseconds since
 197       process start.  This is a global variable so this won't be thread-safe
 198       if Valgrind is ever made multithreaded.  For now it's fine. */
 199    static UInt next_report_due_at = 0;
 200
 201    /* First of all, figure out whether another report is due.  It
 202       probably isn't. */
 203    UInt user_ms = VG_(get_user_milliseconds)();
 204    if (LIKELY(user_ms < next_report_due_at))
 205       return;
 206
 207    Bool first_ever_call = next_report_due_at == 0;
 208
 209    /* A report is due.  First, though, set the time for the next report. */
 210    next_report_due_at += 1000 * reporting_interval_seconds;
 211
 212    /* If it's been an excessively long time since the last check, we
 213       might have gone more than one reporting interval forward.  Guard
 214       against that. */
 215    while (next_report_due_at <= user_ms)
 216       next_report_due_at += 1000 * reporting_interval_seconds;
 217
 218    /* Also we don't want to report anything on the first call, but we
 219       have to wait till this point to leave, so that we set up the
 220       next-call time correctly. */
 221    if (first_ever_call)
 222       return;
 223
 224    /* Print the report. */
 225    UInt   user_cpu_seconds  = user_ms / 1000;
 226    UInt   wallclock_seconds = VG_(read_millisecond_timer)() / 1000;
 227    Double millionEvCs   = ((Double)bbs_done) / 1000000.0;
 228    Double thousandTIns  = ((Double)VG_(get_bbs_translated)()) / 1000.0;
 229    Double thousandTOuts = ((Double)VG_(get_bbs_discarded_or_dumped)()) / 1000.0;
 230    UInt   nThreads      = VG_(count_living_threads)();
 231
 232    if (VG_(clo_verbosity) > 0) {
 233       VG_(dmsg)("PROGRESS: U %'us, W %'us, %.1f%% CPU, EvC %.2fM, "
 234                 "TIn %.1fk, TOut %.1fk, #thr %u\n",
 235                 user_cpu_seconds, wallclock_seconds,
 236                 100.0
 237                    * (Double)(user_cpu_seconds)
 238                    / (Double)(wallclock_seconds == 0 ? 1 : wallclock_seconds),
 239                 millionEvCs,
 240                 thousandTIns, thousandTOuts, nThreads);
 241    }
 242 }
 243
 244 static
 245 void print_sched_event ( ThreadId tid, const HChar* what )
 246 {
 247    VG_(message)(Vg_DebugMsg, "  SCHED[%u]: %s\n", tid, what );
 248 }
 249
 250 /* For showing SB profiles, if the user asks to see them. */
 251 static
 252 void maybe_show_sb_profile ( void )
 253 {
 254    /* DO NOT MAKE NON-STATIC */
 255    static ULong bbs_done_lastcheck = 0;
 256    /* */
 257    vg_assert(VG_(clo_profyle_interval) > 0);
 258    Long delta = (Long)(bbs_done - bbs_done_lastcheck);
 259    vg_assert(delta >= 0);
 260    if ((ULong)delta >= VG_(clo_profyle_interval)) {
 261       bbs_done_lastcheck = bbs_done;
 262       VG_(get_and_show_SB_profile)(bbs_done);
 263    }
 264 }
 265
 266 static
 267 const HChar* name_of_sched_event ( UInt event )
 268 {
 269    switch (event) {
 270       case VEX_TRC_JMP_INVALICACHE:    return "INVALICACHE";
 271       case VEX_TRC_JMP_FLUSHDCACHE:    return "FLUSHDCACHE";
 272       case VEX_TRC_JMP_NOREDIR:        return "NOREDIR";
 273       case VEX_TRC_JMP_SIGILL:         return "SIGILL";
 274       case VEX_TRC_JMP_SIGTRAP:        return "SIGTRAP";
 275       case VEX_TRC_JMP_SIGSEGV:        return "SIGSEGV";
 276       case VEX_TRC_JMP_SIGBUS:         return "SIGBUS";
 277       case VEX_TRC_JMP_SIGFPE_INTOVF:
 278       case VEX_TRC_JMP_SIGFPE_INTDIV:  return "SIGFPE";
 279       case VEX_TRC_JMP_EMWARN:         return "EMWARN";
 280       case VEX_TRC_JMP_EMFAIL:         return "EMFAIL";
 281       case VEX_TRC_JMP_CLIENTREQ:      return "CLIENTREQ";
 282       case VEX_TRC_JMP_YIELD:          return "YIELD";
 283       case VEX_TRC_JMP_NODECODE:       return "NODECODE";
 284       case VEX_TRC_JMP_MAPFAIL:        return "MAPFAIL";
 285       case VEX_TRC_JMP_SYS_SYSCALL:    return "SYSCALL";
 286       case VEX_TRC_JMP_SYS_INT32:      return "INT32";
 287       case VEX_TRC_JMP_SYS_INT128:     return "INT128";
 288       case VEX_TRC_JMP_SYS_INT129:     return "INT129";
 289       case VEX_TRC_JMP_SYS_INT130:     return "INT130";
 290       case VEX_TRC_JMP_SYS_INT145:     return "INT145";
 291       case VEX_TRC_JMP_SYS_INT210:     return "INT210";
 292       case VEX_TRC_JMP_SYS_SYSENTER:   return "SYSENTER";
 293       case VEX_TRC_JMP_BORING:         return "VEX_BORING";
 294
 295       case VG_TRC_BORING:              return "VG_BORING";
 296       case VG_TRC_INNER_FASTMISS:      return "FASTMISS";
 297       case VG_TRC_INNER_COUNTERZERO:   return "COUNTERZERO";
 298       case VG_TRC_FAULT_SIGNAL:        return "FAULTSIGNAL";
 299       case VG_TRC_INVARIANT_FAILED:    return "INVFAILED";
 300       case VG_TRC_CHAIN_ME_TO_SLOW_EP: return "CHAIN_ME_SLOW";
 301       case VG_TRC_CHAIN_ME_TO_FAST_EP: return "CHAIN_ME_FAST";
 302       default:                         return "??UNKNOWN??";
 303   }
 304 }
 305
 306 /* Allocate a completely empty ThreadState record. */
 307 ThreadId VG_(alloc_ThreadState) ( void )
 308 {
 309    Int i;
 310    for (i = 1; i < VG_N_THREADS; i++) {
 311       if (VG_(threads)[i].status == VgTs_Empty) {
 312          VG_(threads)[i].status = VgTs_Init;
 313          VG_(threads)[i].exitreason = VgSrc_None;
 314          if (VG_(threads)[i].thread_name)
 315             VG_(free)(VG_(threads)[i].thread_name);
 316          VG_(threads)[i].thread_name = NULL;
 317          return i;
 318       }
 319    }
 320    VG_(printf)("Use --max-threads=INT to specify a larger number of threads\n"
 321                "and rerun valgrind\n");
 322    VG_(core_panic)("Max number of threads is too low");
 323    /*NOTREACHED*/
 324 }
 325
 326 /*
 327    Mark a thread as Runnable.  This will block until the_BigLock is
 328    available, so that we get exclusive access to all the shared
 329    structures and the CPU.  Up until we get the_BigLock, we must not
 330    touch any shared state.
 331
 332    When this returns, we'll actually be running.
 333  */
 334 void VG_(acquire_BigLock)(ThreadId tid, const HChar* who)
 335 {
 336    ThreadState *tst;
 337
 338 #if 0
 339    if (VG_(clo_trace_sched)) {
 340       HChar buf[VG_(strlen)(who) + 30];
 341       VG_(sprintf)(buf, "waiting for lock (%s)", who);
 342       print_sched_event(tid, buf);
 343    }
 344 #endif
 345
 346    /* First, acquire the_BigLock.  We can't do anything else safely
 347       prior to this point.  Even doing debug printing prior to this
 348       point is, technically, wrong. */
 349    VG_(acquire_BigLock_LL)(NULL);
 350
 351    tst = VG_(get_ThreadState)(tid);
 352
 353    vg_assert(tst->status != VgTs_Runnable);
 354
 355    tst->status = VgTs_Runnable;
 356
 357    if (VG_(running_tid) != VG_INVALID_THREADID)
 358       VG_(printf)("tid %u found %u running\n", tid, VG_(running_tid));
 359    vg_assert(VG_(running_tid) == VG_INVALID_THREADID);
 360    VG_(running_tid) = tid;
 361
 362    { Addr gsp = VG_(get_SP)(tid);
 363       if (NULL != VG_(tdict).track_new_mem_stack_w_ECU)
 364          VG_(unknown_SP_update_w_ECU)(gsp, gsp, 0/*unknown origin*/);
 365       else
 366          VG_(unknown_SP_update)(gsp, gsp);
 367    }
 368
 369    if (VG_(clo_trace_sched)) {
 370       HChar buf[VG_(strlen)(who) + 30];
 371       VG_(sprintf)(buf, " acquired lock (%s)", who);
 372       print_sched_event(tid, buf);
 373    }
 374 }
 375
 376 /*
 377    Set a thread into a sleeping state, and give up exclusive access to
 378    the CPU.  On return, the thread must be prepared to block until it
 379    is ready to run again (generally this means blocking in a syscall,
 380    but it may mean that we remain in a Runnable state and we're just
 381    yielding the CPU to another thread).
 382  */
 383 void VG_(release_BigLock)(ThreadId tid, ThreadStatus sleepstate,
 384                           const HChar* who)
 385 {
 386    ThreadState *tst = VG_(get_ThreadState)(tid);
 387
 388    vg_assert(tst->status == VgTs_Runnable);
 389
 390    vg_assert(sleepstate == VgTs_WaitSys ||
 391              sleepstate == VgTs_Yielding);
 392
 393    tst->status = sleepstate;
 394
 395    vg_assert(VG_(running_tid) == tid);
 396    VG_(running_tid) = VG_INVALID_THREADID;
 397
 398    if (VG_(clo_trace_sched)) {
 399       const HChar *status = VG_(name_of_ThreadStatus)(sleepstate);
 400       HChar buf[VG_(strlen)(who) + VG_(strlen)(status) + 30];
 401       VG_(sprintf)(buf, "releasing lock (%s) -> %s", who, status);
 402       print_sched_event(tid, buf);
 403    }
 404
 405    /* Release the_BigLock; this will reschedule any runnable
 406       thread. */
 407    VG_(release_BigLock_LL)(NULL);
 408 }
 409
 410 static void init_BigLock(void)
 411 {
 412    vg_assert(!the_BigLock);
 413    the_BigLock = ML_(create_sched_lock)();
 414 }
 415
 416 static void deinit_BigLock(void)
 417 {
 418    ML_(destroy_sched_lock)(the_BigLock);
 419    the_BigLock = NULL;
 420 }
 421
 422 /* See pub_core_scheduler.h for description */
 423 void VG_(acquire_BigLock_LL) ( const HChar* who )
 424 {
 425    ML_(acquire_sched_lock)(the_BigLock);
 426 }
 427
 428 /* See pub_core_scheduler.h for description */
 429 void VG_(release_BigLock_LL) ( const HChar* who )
 430 {
 431    ML_(release_sched_lock)(the_BigLock);
 432 }
 433
 434 Bool VG_(owns_BigLock_LL) ( ThreadId tid )
 435 {
 436    return (ML_(get_sched_lock_owner)(the_BigLock)
 437            == VG_(threads)[tid].os_state.lwpid);
 438 }
 439
 440
 441 /* Clear out the ThreadState and release the semaphore. Leaves the
 442    ThreadState in VgTs_Zombie state, so that it doesn't get
 443    reallocated until the caller is really ready. */
 444 void VG_(exit_thread)(ThreadId tid)
 445 {
 446    vg_assert(VG_(is_valid_tid)(tid));
 447    vg_assert(VG_(is_running_thread)(tid));
 448    vg_assert(VG_(is_exiting)(tid));
 449
 450    mostly_clear_thread_record(tid);
 451    VG_(running_tid) = VG_INVALID_THREADID;
 452
 453    /* There should still be a valid exitreason for this thread */
 454    vg_assert(VG_(threads)[tid].exitreason != VgSrc_None);
 455
 456    if (VG_(clo_trace_sched))
 457       print_sched_event(tid, "release lock in VG_(exit_thread)");
 458
 459    VG_(release_BigLock_LL)(NULL);
 460 }
 461
 462 /* If 'tid' is blocked in a syscall, send it SIGVGKILL so as to get it
 463    out of the syscall and onto doing the next thing, whatever that is.
 464    If it isn't blocked in a syscall, has no effect on the thread. */
 465 void VG_(get_thread_out_of_syscall)(ThreadId tid)
 466 {
 467    vg_assert(VG_(is_valid_tid)(tid));
 468    vg_assert(!VG_(is_running_thread)(tid));
 469
 470    if (VG_(threads)[tid].status == VgTs_WaitSys) {
 471       if (VG_(clo_trace_signals)) {
 472          VG_(message)(Vg_DebugMsg,
 473                       "get_thread_out_of_syscall zaps tid %u lwp %d\n",
 474                       tid, VG_(threads)[tid].os_state.lwpid);
 475       }
 476 #     if defined(VGO_darwin)
 477       {
 478          // GrP fixme use mach primitives on darwin?
 479          // GrP fixme thread_abort_safely?
 480          // GrP fixme race for thread with WaitSys set but not in syscall yet?
 481          extern kern_return_t thread_abort(mach_port_t);
 482          thread_abort(VG_(threads)[tid].os_state.lwpid);
 483       }
 484 #     else
 485       {
 486          __attribute__((unused))
 487          Int r = VG_(tkill)(VG_(threads)[tid].os_state.lwpid, VG_SIGVGKILL);
 488          /* JRS 2009-Mar-20: should we assert for r==0 (tkill succeeded)?
 489             I'm really not sure.  Here's a race scenario which argues
 490             that we shoudn't; but equally I'm not sure the scenario is
 491             even possible, because of constraints caused by the question
 492             of who holds the BigLock when.
 493
 494             Target thread tid does sys_read on a socket and blocks.  This
 495             function gets called, and we observe correctly that tid's
 496             status is WaitSys but then for whatever reason this function
 497             goes very slowly for a while.  Then data arrives from
 498             wherever, tid's sys_read returns, tid exits.  Then we do
 499             tkill on tid, but tid no longer exists; tkill returns an
 500             error code and the assert fails. */
 501          /* vg_assert(r == 0); */
 502       }
 503 #     endif
 504    }
 505 }
 506
 507 /*
 508    Yield the CPU for a short time to let some other thread run.
 509  */
 510 void VG_(vg_yield)(void)
 511 {
 512    ThreadId tid = VG_(running_tid);
 513
 514    vg_assert(tid != VG_INVALID_THREADID);
 515    vg_assert(VG_(threads)[tid].os_state.lwpid == VG_(gettid)());
 516
 517    VG_(release_BigLock)(tid, VgTs_Yielding, "VG_(vg_yield)");
 518
 519    /*
 520       Tell the kernel we're yielding.
 521     */
 522 #  if defined(VGO_linux) || defined(VGO_darwin)
 523    VG_(do_syscall0)(__NR_sched_yield);
 524 #  elif defined(VGO_solaris)
 525    VG_(do_syscall0)(__NR_yield);
 526 #  else
 527 #    error Unknown OS
 528 #  endif
 529
 530    VG_(acquire_BigLock)(tid, "VG_(vg_yield)");
 531 }
 532
 533
 534 /* Set the standard set of blocked signals, used whenever we're not
 535    running a client syscall. */
 536 static void block_signals(void)
 537 {
 538    vki_sigset_t mask;
 539
 540    VG_(sigfillset)(&mask);
 541
 542    /* Don't block these because they're synchronous */
 543    VG_(sigdelset)(&mask, VKI_SIGSEGV);
 544    VG_(sigdelset)(&mask, VKI_SIGBUS);
 545    VG_(sigdelset)(&mask, VKI_SIGFPE);
 546    VG_(sigdelset)(&mask, VKI_SIGILL);
 547    VG_(sigdelset)(&mask, VKI_SIGTRAP);
 548
 549    /* Can't block these anyway */
 550    VG_(sigdelset)(&mask, VKI_SIGSTOP);
 551    VG_(sigdelset)(&mask, VKI_SIGKILL);
 552
 553    VG_(sigprocmask)(VKI_SIG_SETMASK, &mask, NULL);
 554 }
 555
 556 static void os_state_clear(ThreadState *tst)
 557 {
 558    tst->os_state.lwpid       = 0;
 559    tst->os_state.threadgroup = 0;
 560    tst->os_state.stk_id = NULL_STK_ID;
 561 #  if defined(VGO_linux)
 562    /* no other fields to clear */
 563 #  elif defined(VGO_darwin)
 564    tst->os_state.post_mach_trap_fn = NULL;
 565    tst->os_state.pthread           = 0;
 566    tst->os_state.func_arg          = 0;
 567    VG_(memset)(&tst->os_state.child_go, 0, sizeof(tst->os_state.child_go));
 568    VG_(memset)(&tst->os_state.child_done, 0, sizeof(tst->os_state.child_done));
 569    tst->os_state.wq_jmpbuf_valid   = False;
 570    tst->os_state.remote_port       = 0;
 571    tst->os_state.msgh_id           = 0;
 572    VG_(memset)(&tst->os_state.mach_args, 0, sizeof(tst->os_state.mach_args));
 573 #  elif defined(VGO_solaris)
 574 #  if defined(VGP_x86_solaris)
 575    tst->os_state.thrptr = 0;
 576 #  endif
 577    tst->os_state.ustack = NULL;
 578    tst->os_state.in_door_return = False;
 579    tst->os_state.door_return_procedure = 0;
 580    tst->os_state.oldcontext = NULL;
 581    tst->os_state.schedctl_data = 0;
 582    tst->os_state.daemon_thread = False;
 583 #  else
 584 #    error "Unknown OS"
 585 #  endif
 586 }
 587
 588 static void os_state_init(ThreadState *tst)
 589 {
 590    tst->os_state.valgrind_stack_base    = 0;
 591    tst->os_state.valgrind_stack_init_SP = 0;
 592    os_state_clear(tst);
 593 }
 594
 595 static
 596 void mostly_clear_thread_record ( ThreadId tid )
 597 {
 598    vki_sigset_t savedmask;
 599
 600    vg_assert(tid >= 0 && tid < VG_N_THREADS);
 601    VG_(cleanup_thread)(&VG_(threads)[tid].arch);
 602    VG_(threads)[tid].tid = tid;
 603
 604    /* Leave the thread in Zombie, so that it doesn't get reallocated
 605       until the caller is finally done with the thread stack. */
 606    VG_(threads)[tid].status               = VgTs_Zombie;
 607
 608    VG_(sigemptyset)(&VG_(threads)[tid].sig_mask);
 609    VG_(sigemptyset)(&VG_(threads)[tid].tmp_sig_mask);
 610
 611    os_state_clear(&VG_(threads)[tid]);
 612
 613    /* start with no altstack */
 614    VG_(threads)[tid].altstack.ss_sp = (void *)0xdeadbeef;
 615    VG_(threads)[tid].altstack.ss_size = 0;
 616    VG_(threads)[tid].altstack.ss_flags = VKI_SS_DISABLE;
 617
 618    VG_(clear_out_queued_signals)(tid, &savedmask);
 619
 620    VG_(threads)[tid].sched_jmpbuf_valid = False;
 621 }
 622
 623 /*
 624    Called in the child after fork.  If the parent has multiple
 625    threads, then we've inherited a VG_(threads) array describing them,
 626    but only the thread which called fork() is actually alive in the
 627    child.  This functions needs to clean up all those other thread
 628    structures.
 629
 630    Whichever tid in the parent which called fork() becomes the
 631    master_tid in the child.  That's because the only living slot in
 632    VG_(threads) in the child after fork is VG_(threads)[tid], and it
 633    would be too hard to try to re-number the thread and relocate the
 634    thread state down to VG_(threads)[1].
 635
 636    This function also needs to reinitialize the_BigLock, since
 637    otherwise we may end up sharing its state with the parent, which
 638    would be deeply confusing.
 639 */
 640 static void sched_fork_cleanup(ThreadId me)
 641 {
 642    ThreadId tid;
 643    vg_assert(VG_(running_tid) == me);
 644
 645 #  if defined(VGO_darwin)
 646    // GrP fixme hack reset Mach ports
 647    VG_(mach_init)();
 648 #  endif
 649
 650    VG_(threads)[me].os_state.lwpid = VG_(gettid)();
 651    VG_(threads)[me].os_state.threadgroup = VG_(getpid)();
 652
 653    /* clear out all the unused thread slots */
 654    for (tid = 1; tid < VG_N_THREADS; tid++) {
 655       if (tid != me) {
 656          mostly_clear_thread_record(tid);
 657          VG_(threads)[tid].status = VgTs_Empty;
 658          VG_(clear_syscallInfo)(tid);
 659       }
 660    }
 661
 662    /* re-init and take the sema */
 663    deinit_BigLock();
 664    init_BigLock();
 665    VG_(acquire_BigLock_LL)(NULL);
 666 }
 667
 668
 669 /* First phase of initialisation of the scheduler.  Initialise the
 670    bigLock, zeroise the VG_(threads) structure and decide on the
 671    ThreadId of the root thread.
 672 */
 673 ThreadId VG_(scheduler_init_phase1) ( void )
 674 {
 675    Int i;
 676    ThreadId tid_main;
 677
 678    VG_(debugLog)(1,"sched","sched_init_phase1\n");
 679
 680    if (VG_(clo_fair_sched) != disable_fair_sched
 681        && !ML_(set_sched_lock_impl)(sched_lock_ticket)
 682        && VG_(clo_fair_sched) == enable_fair_sched)
 683    {
 684       VG_(printf)("Error: fair scheduling is not supported on this system.\n");
 685       VG_(exit)(1);
 686    }
 687
 688    if (VG_(clo_verbosity) > 1) {
 689       VG_(message)(Vg_DebugMsg,
 690                    "Scheduler: using %s scheduler lock implementation.\n",
 691                    ML_(get_sched_lock_name)());
 692    }
 693
 694    init_BigLock();
 695
 696    for (i = 0 /* NB; not 1 */; i < VG_N_THREADS; i++) {
 697       /* Paranoia .. completely zero it out. */
 698       VG_(memset)( & VG_(threads)[i], 0, sizeof( VG_(threads)[i] ) );
 699
 700       VG_(threads)[i].sig_queue = NULL;
 701
 702       os_state_init(&VG_(threads)[i]);
 703       mostly_clear_thread_record(i);
 704
 705       VG_(threads)[i].status                    = VgTs_Empty;
 706       VG_(threads)[i].client_stack_szB          = 0;
 707       VG_(threads)[i].client_stack_highest_byte = (Addr)NULL;
 708       VG_(threads)[i].err_disablement_level     = 0;
 709       VG_(threads)[i].thread_name               = NULL;
 710    }
 711
 712    tid_main = VG_(alloc_ThreadState)();
 713
 714    /* Bleh.  Unfortunately there are various places in the system that
 715       assume that the main thread has a ThreadId of 1.
 716       - Helgrind (possibly)
 717       - stack overflow message in default_action() in m_signals.c
 718       - definitely a lot more places
 719    */
 720    vg_assert(tid_main == 1);
 721
 722    return tid_main;
 723 }
 724
 725
 726 /* Second phase of initialisation of the scheduler.  Given the root
 727    ThreadId computed by first phase of initialisation, fill in stack
 728    details and acquire bigLock.  Initialise the scheduler.  This is
 729    called at startup.  The caller subsequently initialises the guest
 730    state components of this main thread.
 731 */
 732 void VG_(scheduler_init_phase2) ( ThreadId tid_main,
 733                                   Addr     clstack_end,
 734                                   SizeT    clstack_size )
 735 {
 736    VG_(debugLog)(1,"sched","sched_init_phase2: tid_main=%u, "
 737                    "cls_end=0x%lx, cls_sz=%lu\n",
 738                    tid_main, clstack_end, clstack_size);
 739
 740    vg_assert(VG_IS_PAGE_ALIGNED(clstack_end+1));
 741    vg_assert(VG_IS_PAGE_ALIGNED(clstack_size));
 742
 743    VG_(threads)[tid_main].client_stack_highest_byte
 744       = clstack_end;
 745    VG_(threads)[tid_main].client_stack_szB
 746       = clstack_size;
 747
 748    VG_(atfork)(NULL, NULL, sched_fork_cleanup);
 749 }
 750
 751
 752 /* ---------------------------------------------------------------------
 753    Helpers for running translations.
 754    ------------------------------------------------------------------ */
 755
 756 /* Use gcc's built-in setjmp/longjmp.  longjmp must not restore signal
 757    mask state, but does need to pass "val" through.  jumped must be a
 758    volatile UWord. */
 759 #define SCHEDSETJMP(tid, jumped, stmt)                                  \
 760    do {                                                                 \
 761       ThreadState * volatile _qq_tst = VG_(get_ThreadState)(tid);       \
 762                                                                         \
 763       (jumped) = VG_MINIMAL_SETJMP(_qq_tst->sched_jmpbuf);              \
 764       if ((jumped) == ((UWord)0)) {                                     \
 765          vg_assert(!_qq_tst->sched_jmpbuf_valid);                       \
 766          _qq_tst->sched_jmpbuf_valid = True;                            \
 767          stmt;                                                          \
 768       } else if (VG_(clo_trace_sched))                                  \
 769          VG_(printf)("SCHEDSETJMP(line %d) tid %u, jumped=%lu\n",       \
 770                      __LINE__, tid, jumped);                            \
 771       vg_assert(_qq_tst->sched_jmpbuf_valid);                           \
 772       _qq_tst->sched_jmpbuf_valid = False;                              \
 773    } while(0)
 774
 775
 776 /* Do various guest state alignment checks prior to running a thread.
 777    Specifically, check that what we have matches Vex's guest state
 778    layout requirements.  See libvex.h for details, but in short the
 779    requirements are: There must be no holes in between the primary
 780    guest state, its two copies, and the spill area.  In short, all 4
 781    areas must be aligned on the LibVEX_GUEST_STATE_ALIGN boundary and
 782    be placed back-to-back without holes in between. */
 783 static void do_pre_run_checks ( volatile ThreadState* tst )
 784 {
 785    Addr a_vex     = (Addr) & tst->arch.vex;
 786    Addr a_vexsh1  = (Addr) & tst->arch.vex_shadow1;
 787    Addr a_vexsh2  = (Addr) & tst->arch.vex_shadow2;
 788    Addr a_spill   = (Addr) & tst->arch.vex_spill;
 789    UInt sz_vex    = (UInt) sizeof tst->arch.vex;
 790    UInt sz_vexsh1 = (UInt) sizeof tst->arch.vex_shadow1;
 791    UInt sz_vexsh2 = (UInt) sizeof tst->arch.vex_shadow2;
 792    UInt sz_spill  = (UInt) sizeof tst->arch.vex_spill;
 793
 794    if (0)
 795    VG_(printf)("gst %p %u, sh1 %p %u, "
 796                "sh2 %p %u, spill %p %u\n",
 797                (void*)a_vex, sz_vex,
 798                (void*)a_vexsh1, sz_vexsh1,
 799                (void*)a_vexsh2, sz_vexsh2,
 800                (void*)a_spill, sz_spill );
 801
 802    vg_assert(sz_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
 803    vg_assert(sz_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
 804    vg_assert(sz_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
 805    vg_assert(sz_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
 806
 807    vg_assert(a_vex    % LibVEX_GUEST_STATE_ALIGN == 0);
 808    vg_assert(a_vexsh1 % LibVEX_GUEST_STATE_ALIGN == 0);
 809    vg_assert(a_vexsh2 % LibVEX_GUEST_STATE_ALIGN == 0);
 810    vg_assert(a_spill  % LibVEX_GUEST_STATE_ALIGN == 0);
 811
 812    /* Check that the guest state and its two shadows have the same
 813       size, and that there are no holes in between.  The latter is
 814       important because Memcheck assumes that it can reliably access
 815       the shadows by indexing off a pointer to the start of the
 816       primary guest state area. */
 817    vg_assert(sz_vex == sz_vexsh1);
 818    vg_assert(sz_vex == sz_vexsh2);
 819    vg_assert(a_vex + 1 * sz_vex == a_vexsh1);
 820    vg_assert(a_vex + 2 * sz_vex == a_vexsh2);
 821    /* Also check there's no hole between the second shadow area and
 822       the spill area. */
 823    vg_assert(sz_spill == LibVEX_N_SPILL_BYTES);
 824    vg_assert(a_vex + 3 * sz_vex == a_spill);
 825
 826 #  if defined(VGA_x86)
 827    /* x86 XMM regs must form an array, ie, have no holes in
 828       between. */
 829    vg_assert(
 830       (offsetof(VexGuestX86State,guest_XMM7)
 831        - offsetof(VexGuestX86State,guest_XMM0))
 832       == (8/*#regs*/-1) * 16/*bytes per reg*/
 833    );
 834    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestX86State,guest_XMM0)));
 835    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestX86State,guest_FPREG)));
 836    vg_assert(8 == offsetof(VexGuestX86State,guest_EAX));
 837    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EAX)));
 838    vg_assert(VG_IS_4_ALIGNED(offsetof(VexGuestX86State,guest_EIP)));
 839 #  endif
 840
 841 #  if defined(VGA_amd64)
 842    /* amd64 YMM regs must form an array, ie, have no holes in
 843       between. */
 844    vg_assert(
 845       (offsetof(VexGuestAMD64State,guest_YMM16)
 846        - offsetof(VexGuestAMD64State,guest_YMM0))
 847       == (17/*#regs*/-1) * 32/*bytes per reg*/
 848    );
 849    vg_assert(VG_IS_16_ALIGNED(offsetof(VexGuestAMD64State,guest_YMM0)));
 850    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_FPREG)));
 851    vg_assert(16 == offsetof(VexGuestAMD64State,guest_RAX));
 852    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RAX)));
 853    vg_assert(VG_IS_8_ALIGNED(offsetof(VexGuestAMD64State,guest_RIP)));
 854 #  endif
 855
 856 #  if defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
 857    /* ppc guest_state vector regs must be 16 byte aligned for
 858       loads/stores.  This is important! */
 859    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR0));
 860    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR0));
 861    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR0));
 862    /* be extra paranoid .. */
 863    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_VSR1));
 864    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_VSR1));
 865    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_VSR1));
 866 #  endif
 867
 868 #  if defined(VGA_arm)
 869    /* arm guest_state VFP regs must be 8 byte aligned for
 870       loads/stores.  Let's use 16 just to be on the safe side. */
 871    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_D0));
 872    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_D0));
 873    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_D0));
 874    /* be extra paranoid .. */
 875    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_D1));
 876    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_D1));
 877    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_D1));
 878 #  endif
 879
 880 #  if defined(VGA_arm64)
 881    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex.guest_X0));
 882    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow1.guest_X0));
 883    vg_assert(VG_IS_8_ALIGNED(& tst->arch.vex_shadow2.guest_X0));
 884    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex.guest_Q0));
 885    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow1.guest_Q0));
 886    vg_assert(VG_IS_16_ALIGNED(& tst->arch.vex_shadow2.guest_Q0));
 887 #  endif
 888
 889 #  if defined(VGA_s390x)
 890    /* no special requirements */
 891 #  endif
 892
 893 #  if defined(VGA_mips32) || defined(VGA_mips64)
 894    /* no special requirements */
 895 #  endif
 896 }
 897
 898 // NO_VGDB_POLL value ensures vgdb is not polled, while
 899 // VGDB_POLL_ASAP ensures that the next scheduler call
 900 // will cause a poll.
 901 #define NO_VGDB_POLL    0xffffffffffffffffULL
 902 #define VGDB_POLL_ASAP  0x0ULL
 903
 904 void VG_(disable_vgdb_poll) (void )
 905 {
 906    vgdb_next_poll = NO_VGDB_POLL;
 907 }
 908 void VG_(force_vgdb_poll) ( void )
 909 {
 910    vgdb_next_poll = VGDB_POLL_ASAP;
 911 }
 912
 913 /* Run the thread tid for a while, and return a VG_TRC_* value
 914    indicating why VG_(disp_run_translations) stopped, and possibly an
 915    auxiliary word.  Also, only allow the thread to run for at most
 916    *dispatchCtrP events.  If (as is the normal case) use_alt_host_addr
 917    is False, we are running ordinary redir'd translations, and we
 918    should therefore start by looking up the guest next IP in TT.  If
 919    it is True then we ignore the guest next IP and just run from
 920    alt_host_addr, which presumably points at host code for a no-redir
 921    translation.
 922
 923    Return results are placed in two_words.  two_words[0] is set to the
 924    TRC.  In the case where that is VG_TRC_CHAIN_ME_TO_{SLOW,FAST}_EP,
 925    the address to patch is placed in two_words[1].
 926 */
 927 static
 928 void run_thread_for_a_while ( /*OUT*/HWord* two_words,
 929                               /*MOD*/Int*   dispatchCtrP,
 930                               ThreadId      tid,
 931                               HWord         alt_host_addr,
 932                               Bool          use_alt_host_addr )
 933 {
 934    volatile HWord        jumped         = 0;
 935    volatile ThreadState* tst            = NULL; /* stop gcc complaining */
 936    volatile Int          done_this_time = 0;
 937    volatile HWord        host_code_addr = 0;
 938
 939    /* Paranoia */
 940    vg_assert(VG_(is_valid_tid)(tid));
 941    vg_assert(VG_(is_running_thread)(tid));
 942    vg_assert(!VG_(is_exiting)(tid));
 943    vg_assert(*dispatchCtrP > 0);
 944
 945    tst = VG_(get_ThreadState)(tid);
 946    do_pre_run_checks( tst );
 947    /* end Paranoia */
 948
 949    /* Futz with the XIndir stats counters. */
 950    vg_assert(VG_(stats__n_xIndirs_32) == 0);
 951    vg_assert(VG_(stats__n_xIndir_hits1_32) == 0);
 952    vg_assert(VG_(stats__n_xIndir_hits2_32) == 0);
 953    vg_assert(VG_(stats__n_xIndir_hits3_32) == 0);
 954    vg_assert(VG_(stats__n_xIndir_misses_32) == 0);
 955
 956    /* Clear return area. */
 957    two_words[0] = two_words[1] = 0;
 958
 959    /* Figure out where we're starting from. */
 960    if (use_alt_host_addr) {
 961       /* unusual case -- no-redir translation */
 962       host_code_addr = alt_host_addr;
 963    } else {
 964       /* normal case -- redir translation */
 965       Addr host_from_fast_cache = 0;
 966       Bool found_in_fast_cache
 967          = VG_(lookupInFastCache)( &host_from_fast_cache,
 968                                    (Addr)tst->arch.vex.VG_INSTR_PTR );
 969       if (found_in_fast_cache) {
 970          host_code_addr = host_from_fast_cache;
 971       } else {
 972          Addr res = 0;
 973          /* not found in VG_(tt_fast). Searching here the transtab
 974             improves the performance compared to returning directly
 975             to the scheduler. */
 976          Bool  found = VG_(search_transtab)(&res, NULL, NULL,
 977                                             (Addr)tst->arch.vex.VG_INSTR_PTR,
 978                                             True/*upd cache*/
 979                                             );
 980          if (LIKELY(found)) {
 981             host_code_addr = res;
 982          } else {
 983             /* At this point, we know that we intended to start at a
 984                normal redir translation, but it was not found.  In
 985                which case we can return now claiming it's not
 986                findable. */
 987             two_words[0] = VG_TRC_INNER_FASTMISS; /* hmm, is that right? */
 988             return;
 989          }
 990       }
 991    }
 992    /* We have either a no-redir or a redir translation. */
 993    vg_assert(host_code_addr != 0); /* implausible */
 994
 995    /* there should be no undealt-with signals */
 996    //vg_assert(VG_(threads)[tid].siginfo.si_signo == 0);
 997
 998    /* Set up event counter stuff for the run. */
 999    tst->arch.vex.host_EvC_COUNTER = *dispatchCtrP;
1000    tst->arch.vex.host_EvC_FAILADDR
1001       = (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail) );
1002
1003    /* Invalidate any in-flight LL/SC transactions, in the case that we're
1004       using the fallback LL/SC implementation.  See bugs 344524 and 369459. */
1005 #  if defined(VGP_mips32_linux) || defined(VGP_mips64_linux) \
1006       || defined(VGP_nanomips_linux)
1007    tst->arch.vex.guest_LLaddr = (RegWord)(-1);
1008 #  elif defined(VGP_arm64_linux)
1009    tst->arch.vex.guest_LLSC_SIZE = 0;
1010 #  endif
1011
1012    if (0) {
1013       vki_sigset_t m;
1014       Int i, err = VG_(sigprocmask)(VKI_SIG_SETMASK, NULL, &m);
1015       vg_assert(err == 0);
1016       VG_(printf)("tid %u: entering code with unblocked signals: ", tid);
1017       for (i = 1; i <= _VKI_NSIG; i++)
1018          if (!VG_(sigismember)(&m, i))
1019             VG_(printf)("%d ", i);
1020       VG_(printf)("\n");
1021    }
1022
1023    /* Set up return-value area. */
1024
1025    // Tell the tool this thread is about to run client code
1026    VG_TRACK( start_client_code, tid, bbs_done );
1027
1028    vg_assert(VG_(in_generated_code) == False);
1029    VG_(in_generated_code) = True;
1030
1031    SCHEDSETJMP(
1032       tid,
1033       jumped,
1034       VG_(disp_run_translations)(
1035          two_words,
1036          (volatile void*)&tst->arch.vex,
1037          host_code_addr
1038       )
1039    );
1040
1041    vg_assert(VG_(in_generated_code) == True);
1042    VG_(in_generated_code) = False;
1043
1044    if (jumped != (HWord)0) {
1045       /* We get here if the client took a fault that caused our signal
1046          handler to longjmp. */
1047       vg_assert(two_words[0] == 0 && two_words[1] == 0); // correct?
1048       two_words[0] = VG_TRC_FAULT_SIGNAL;
1049       two_words[1] = 0;
1050       block_signals();
1051    }
1052
1053    /* Merge the 32-bit XIndir/miss counters into the 64 bit versions,
1054       and zero out the 32-bit ones in preparation for the next run of
1055       generated code. */
1056    stats__n_xIndirs += (ULong)VG_(stats__n_xIndirs_32);
1057    VG_(stats__n_xIndirs_32) = 0;
1058    stats__n_xIndir_hits1 += (ULong)VG_(stats__n_xIndir_hits1_32);
1059    VG_(stats__n_xIndir_hits1_32) = 0;
1060    stats__n_xIndir_hits2 += (ULong)VG_(stats__n_xIndir_hits2_32);
1061    VG_(stats__n_xIndir_hits2_32) = 0;
1062    stats__n_xIndir_hits3 += (ULong)VG_(stats__n_xIndir_hits3_32);
1063    VG_(stats__n_xIndir_hits3_32) = 0;
1064    stats__n_xIndir_misses += (ULong)VG_(stats__n_xIndir_misses_32);
1065    VG_(stats__n_xIndir_misses_32) = 0;
1066
1067    /* Inspect the event counter. */
1068    vg_assert((Int)tst->arch.vex.host_EvC_COUNTER >= -1);
1069    vg_assert(tst->arch.vex.host_EvC_FAILADDR
1070              == (HWord)VG_(fnptr_to_fnentry)( &VG_(disp_cp_evcheck_fail)) );
1071
1072    /* The number of events done this time is the difference between
1073       the event counter originally and what it is now.  Except -- if
1074       it has gone negative (to -1) then the transition 0 to -1 doesn't
1075       correspond to a real executed block, so back it out.  It's like
1076       this because the event checks decrement the counter first and
1077       check it for negativeness second, hence the 0 to -1 transition
1078       causes a bailout and the block it happens in isn't executed. */
1079    {
1080      Int dispatchCtrAfterwards = (Int)tst->arch.vex.host_EvC_COUNTER;
1081      done_this_time = *dispatchCtrP - dispatchCtrAfterwards;
1082      if (dispatchCtrAfterwards == -1) {
1083         done_this_time--;
1084      } else {
1085         /* If the generated code drives the counter below -1, something
1086            is seriously wrong. */
1087         vg_assert(dispatchCtrAfterwards >= 0);
1088      }
1089    }
1090
1091    vg_assert(done_this_time >= 0);
1092    bbs_done += (ULong)done_this_time;
1093
1094    *dispatchCtrP -= done_this_time;
1095    vg_assert(*dispatchCtrP >= 0);
1096
1097    // Tell the tool this thread has stopped running client code
1098    VG_TRACK( stop_client_code, tid, bbs_done );
1099
1100    if (bbs_done >= vgdb_next_poll) {
1101       if (VG_(clo_vgdb_poll))
1102          vgdb_next_poll = bbs_done + (ULong)VG_(clo_vgdb_poll);
1103       else
1104          /* value was changed due to gdbserver invocation via ptrace */
1105          vgdb_next_poll = NO_VGDB_POLL;
1106       if (VG_(gdbserver_activity) (tid))
1107          VG_(gdbserver) (tid);
1108    }
1109
1110    /* TRC value and possible auxiliary patch-address word are already
1111       in two_words[0] and [1] respectively, as a result of the call to
1112       VG_(run_innerloop). */
1113    /* Stay sane .. */
1114    if (two_words[0] == VG_TRC_CHAIN_ME_TO_SLOW_EP
1115        || two_words[0] == VG_TRC_CHAIN_ME_TO_FAST_EP) {
1116       vg_assert(two_words[1] != 0); /* we have a legit patch addr */
1117    } else {
1118       vg_assert(two_words[1] == 0); /* nobody messed with it */
1119    }
1120 }
1121
1122
1123 /* ---------------------------------------------------------------------
1124    The scheduler proper.
1125    ------------------------------------------------------------------ */
1126
1127 static void handle_tt_miss ( ThreadId tid )
1128 {
1129    Bool found;
1130    Addr ip = VG_(get_IP)(tid);
1131
1132    /* Trivial event.  Miss in the fast-cache.  Do a full
1133       lookup for it. */
1134    found = VG_(search_transtab)( NULL, NULL, NULL,
1135                                  ip, True/*upd_fast_cache*/ );
1136    if (UNLIKELY(!found)) {
1137       /* Not found; we need to request a translation. */
1138       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1139                           bbs_done, True/*allow redirection*/ )) {
1140          found = VG_(search_transtab)( NULL, NULL, NULL,
1141                                        ip, True );
1142          vg_assert2(found, "handle_tt_miss: missing tt_fast entry");
1143
1144       } else {
1145          // If VG_(translate)() fails, it's because it had to throw a
1146          // signal because the client jumped to a bad address.  That
1147          // means that either a signal has been set up for delivery,
1148          // or the thread has been marked for termination.  Either
1149          // way, we just need to go back into the scheduler loop.
1150       }
1151    }
1152 }
1153
1154 static
1155 void handle_chain_me ( ThreadId tid, void* place_to_chain, Bool toFastEP )
1156 {
1157    Bool found          = False;
1158    Addr ip             = VG_(get_IP)(tid);
1159    SECno to_sNo         = INV_SNO;
1160    TTEno to_tteNo       = INV_TTE;
1161
1162    found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1163                                  ip, False/*dont_upd_fast_cache*/ );
1164    if (!found) {
1165       /* Not found; we need to request a translation. */
1166       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/,
1167                           bbs_done, True/*allow redirection*/ )) {
1168          found = VG_(search_transtab)( NULL, &to_sNo, &to_tteNo,
1169                                        ip, False );
1170          vg_assert2(found, "handle_chain_me: missing tt_fast entry");
1171       } else {
1172          // If VG_(translate)() fails, it's because it had to throw a
1173          // signal because the client jumped to a bad address.  That
1174          // means that either a signal has been set up for delivery,
1175          // or the thread has been marked for termination.  Either
1176          // way, we just need to go back into the scheduler loop.
1177         return;
1178       }
1179    }
1180    vg_assert(found);
1181    vg_assert(to_sNo != INV_SNO);
1182    vg_assert(to_tteNo != INV_TTE);
1183
1184    /* So, finally we know where to patch through to.  Do the patching
1185       and update the various admin tables that allow it to be undone
1186       in the case that the destination block gets deleted. */
1187    VG_(tt_tc_do_chaining)( place_to_chain,
1188                            to_sNo, to_tteNo, toFastEP );
1189 }
1190
1191 static void handle_syscall(ThreadId tid, UInt trc)
1192 {
1193    ThreadState * volatile tst = VG_(get_ThreadState)(tid);
1194    volatile UWord jumped;
1195
1196    /* Syscall may or may not block; either way, it will be
1197       complete by the time this call returns, and we'll be
1198       runnable again.  We could take a signal while the
1199       syscall runs. */
1200
1201    if (VG_(clo_sanity_level) >= 3) {
1202       HChar buf[50];    // large enough
1203       VG_(sprintf)(buf, "(BEFORE SYSCALL, tid %u)", tid);
1204       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1205       vg_assert(ok);
1206    }
1207
1208    SCHEDSETJMP(tid, jumped, VG_(client_syscall)(tid, trc));
1209
1210    if (VG_(clo_sanity_level) >= 3) {
1211       HChar buf[50];    // large enough
1212       VG_(sprintf)(buf, "(AFTER SYSCALL, tid %u)", tid);
1213       Bool ok = VG_(am_do_sync_check)(buf, __FILE__, __LINE__);
1214       vg_assert(ok);
1215    }
1216
1217    if (!VG_(is_running_thread)(tid))
1218       VG_(printf)("tid %u not running; VG_(running_tid)=%u, tid %u status %u\n",
1219                   tid, VG_(running_tid), tid, tst->status);
1220    vg_assert(VG_(is_running_thread)(tid));
1221
1222    if (jumped != (UWord)0) {
1223       block_signals();
1224       VG_(poll_signals)(tid);
1225    }
1226 }
1227
1228 /* tid just requested a jump to the noredir version of its current
1229    program counter.  So make up that translation if needed, run it,
1230    and return the resulting thread return code in two_words[]. */
1231 static
1232 void handle_noredir_jump ( /*OUT*/HWord* two_words,
1233                            /*MOD*/Int*   dispatchCtrP,
1234                            ThreadId tid )
1235 {
1236    /* Clear return area. */
1237    two_words[0] = two_words[1] = 0;
1238
1239    Addr  hcode = 0;
1240    Addr  ip    = VG_(get_IP)(tid);
1241
1242    Bool  found = VG_(search_unredir_transtab)( &hcode, ip );
1243    if (!found) {
1244       /* Not found; we need to request a translation. */
1245       if (VG_(translate)( tid, ip, /*debug*/False, 0/*not verbose*/, bbs_done,
1246                           False/*NO REDIRECTION*/ )) {
1247
1248          found = VG_(search_unredir_transtab)( &hcode, ip );
1249          vg_assert2(found, "unredir translation missing after creation?!");
1250       } else {
1251          // If VG_(translate)() fails, it's because it had to throw a
1252          // signal because the client jumped to a bad address.  That
1253          // means that either a signal has been set up for delivery,
1254          // or the thread has been marked for termination.  Either
1255          // way, we just need to go back into the scheduler loop.
1256          two_words[0] = VG_TRC_BORING;
1257          return;
1258       }
1259
1260    }
1261
1262    vg_assert(found);
1263    vg_assert(hcode != 0);
1264
1265    /* Otherwise run it and return the resulting VG_TRC_* value. */
1266    vg_assert(*dispatchCtrP > 0); /* so as to guarantee progress */
1267    run_thread_for_a_while( two_words, dispatchCtrP, tid,
1268                            hcode, True/*use hcode*/ );
1269 }
1270
1271
1272 /*
1273    Run a thread until it wants to exit.
1274
1275    We assume that the caller has already called VG_(acquire_BigLock) for
1276    us, so we own the VCPU.  Also, all signals are blocked.
1277  */
1278 VgSchedReturnCode VG_(scheduler) ( ThreadId tid )
1279 {
1280    /* Holds the remaining size of this thread's "timeslice". */
1281    Int dispatch_ctr = 0;
1282
1283    ThreadState *tst = VG_(get_ThreadState)(tid);
1284    static Bool vgdb_startup_action_done = False;
1285
1286    if (VG_(clo_trace_sched))
1287       print_sched_event(tid, "entering VG_(scheduler)");
1288
1289    /* Do vgdb initialization (but once). Only the first (main) task
1290       starting up will do the below.
1291       Initialize gdbserver earlier than at the first
1292       thread VG_(scheduler) is causing problems:
1293       * at the end of VG_(scheduler_init_phase2) :
1294         The main thread is in VgTs_Init state, but in a not yet
1295         consistent state => the thread cannot be reported to gdb
1296         (e.g. causes an assert in LibVEX_GuestX86_get_eflags when giving
1297         back the guest registers to gdb).
1298       * at end of valgrind_main, just
1299         before VG_(main_thread_wrapper_NORETURN)(1) :
1300         The main thread is still in VgTs_Init state but in a
1301         more advanced state. However, the thread state is not yet
1302         completely initialized : a.o., the os_state is not yet fully
1303         set => the thread is then not properly reported to gdb,
1304         which is then confused (causing e.g. a duplicate thread be
1305         shown, without thread id).
1306       * it would be possible to initialize gdbserver "lower" in the
1307         call stack (e.g. in VG_(main_thread_wrapper_NORETURN)) but
1308         these are platform dependent and the place at which
1309         the thread state is completely initialized is not
1310         specific anymore to the main thread (so a similar "do it only
1311         once" would be needed).
1312
1313         => a "once only" initialization here is the best compromise. */
1314    if (!vgdb_startup_action_done) {
1315       vg_assert(tid == 1); // it must be the main thread.
1316       vgdb_startup_action_done = True;
1317       if (VG_(clo_vgdb) != Vg_VgdbNo) {
1318          /* If we have to poll, ensures we do an initial poll at first
1319             scheduler call. Otherwise, ensure no poll (unless interrupted
1320             by ptrace). */
1321          if (VG_(clo_vgdb_poll))
1322             VG_(force_vgdb_poll) ();
1323          else
1324             VG_(disable_vgdb_poll) ();
1325
1326          VG_(gdbserver_prerun_action) (1);
1327       } else {
1328          VG_(disable_vgdb_poll) ();
1329       }
1330    }
1331
1332    if (SimHintiS(SimHint_no_nptl_pthread_stackcache, VG_(clo_sim_hints))
1333        && tid != 1) {
1334       /* We disable the stack cache the first time we see a thread other
1335          than the main thread appearing. At this moment, we are sure the pthread
1336          lib loading is done/variable was initialised by pthread lib/... */
1337       if (VG_(client__stack_cache_actsize__addr)) {
1338          if (*VG_(client__stack_cache_actsize__addr) == 0) {
1339             VG_(debugLog)(1,"sched",
1340                           "pthread stack cache size disable done"
1341                           " via kludge\n");
1342             *VG_(client__stack_cache_actsize__addr) = 1000 * 1000 * 1000;
1343             /* Set a value big enough to be above the hardcoded maximum stack
1344                cache size in glibc, small enough to allow a pthread stack size
1345                to be added without risk of overflow. */
1346          }
1347       } else {
1348           VG_(debugLog)(0,"sched",
1349                         "WARNING: pthread stack cache cannot be disabled!\n");
1350           VG_(clo_sim_hints) &= ~SimHint2S(SimHint_no_nptl_pthread_stackcache);
1351           /* Remove SimHint_no_nptl_pthread_stackcache from VG_(clo_sim_hints)
1352              to avoid having a msg for all following threads. */
1353       }
1354    }
1355
1356    /* set the proper running signal mask */
1357    block_signals();
1358
1359    vg_assert(VG_(is_running_thread)(tid));
1360
1361    dispatch_ctr = SCHEDULING_QUANTUM;
1362
1363    while (!VG_(is_exiting)(tid)) {
1364
1365       vg_assert(dispatch_ctr >= 0);
1366       if (dispatch_ctr == 0) {
1367
1368          /* Our slice is done, so yield the CPU to another thread.  On
1369             Linux, this doesn't sleep between sleeping and running,
1370             since that would take too much time. */
1371
1372          /* 4 July 06: it seems that a zero-length nsleep is needed to
1373             cause async thread cancellation (canceller.c) to terminate
1374             in finite time; else it is in some kind of race/starvation
1375             situation and completion is arbitrarily delayed (although
1376             this is not a deadlock).
1377
1378             Unfortunately these sleeps cause MPI jobs not to terminate
1379             sometimes (some kind of livelock).  So sleeping once
1380             every N opportunities appears to work. */
1381
1382          /* 3 Aug 06: doing sys__nsleep works but crashes some apps.
1383             sys_yield also helps the problem, whilst not crashing apps. */
1384
1385          VG_(release_BigLock)(tid, VgTs_Yielding,
1386                                    "VG_(scheduler):timeslice");
1387          /* ------------ now we don't have The Lock ------------ */
1388
1389          VG_(acquire_BigLock)(tid, "VG_(scheduler):timeslice");
1390          /* ------------ now we do have The Lock ------------ */
1391
1392          /* OK, do some relatively expensive housekeeping stuff */
1393          scheduler_sanity(tid);
1394          VG_(sanity_check_general)(False);
1395
1396          /* Possibly make a progress report */
1397          if (UNLIKELY(VG_(clo_progress_interval) > 0)) {
1398             maybe_progress_report( VG_(clo_progress_interval) );
1399          }
1400
1401          /* Look for any pending signals for this thread, and set them up
1402             for delivery */
1403          VG_(poll_signals)(tid);
1404
1405          if (VG_(is_exiting)(tid))
1406             break;              /* poll_signals picked up a fatal signal */
1407
1408          /* For stats purposes only. */
1409          n_scheduling_events_MAJOR++;
1410
1411          /* Figure out how many bbs to ask vg_run_innerloop to do. */
1412          dispatch_ctr = SCHEDULING_QUANTUM;
1413
1414          /* paranoia ... */
1415          vg_assert(tst->tid == tid);
1416          vg_assert(tst->os_state.lwpid == VG_(gettid)());
1417       }
1418
1419       /* For stats purposes only. */
1420       n_scheduling_events_MINOR++;
1421
1422       if (0)
1423          VG_(message)(Vg_DebugMsg, "thread %u: running for %d bbs\n",
1424                                    tid, dispatch_ctr - 1 );
1425
1426       HWord trc[2]; /* "two_words" */
1427       run_thread_for_a_while( &trc[0],
1428                               &dispatch_ctr,
1429                               tid, 0/*ignored*/, False );
1430
1431       if (VG_(clo_trace_sched) && VG_(clo_verbosity) > 2) {
1432          const HChar *name = name_of_sched_event(trc[0]);
1433          HChar buf[VG_(strlen)(name) + 10];    // large enough
1434          VG_(sprintf)(buf, "TRC: %s", name);
1435          print_sched_event(tid, buf);
1436       }
1437
1438       if (trc[0] == VEX_TRC_JMP_NOREDIR) {
1439          /* If we got a request to run a no-redir version of
1440             something, do so now -- handle_noredir_jump just (creates
1441             and) runs that one translation.  The flip side is that the
1442             noredir translation can't itself return another noredir
1443             request -- that would be nonsensical.  It can, however,
1444             return VG_TRC_BORING, which just means keep going as
1445             normal. */
1446          /* Note that the fact that we need to continue with a
1447             no-redir jump is not recorded anywhere else in this
1448             thread's state.  So we *must* execute the block right now
1449             -- we can't fail to execute it and later resume with it,
1450             because by then we'll have forgotten the fact that it
1451             should be run as no-redir, but will get run as a normal
1452             potentially-redir'd, hence screwing up.  This really ought
1453             to be cleaned up, by noting in the guest state that the
1454             next block to be executed should be no-redir.  Then we can
1455             suspend and resume at any point, which isn't the case at
1456             the moment. */
1457          /* We can't enter a no-redir translation with the dispatch
1458             ctr set to zero, for the reasons commented just above --
1459             we need to force it to execute right now.  So, if the
1460             dispatch ctr is zero, set it to one.  Note that this would
1461             have the bad side effect of holding the Big Lock arbitrary
1462             long should there be an arbitrarily long sequence of
1463             back-to-back no-redir translations to run.  But we assert
1464             just below that this translation cannot request another
1465             no-redir jump, so we should be safe against that. */
1466          if (dispatch_ctr == 0) {
1467             dispatch_ctr = 1;
1468          }
1469          handle_noredir_jump( &trc[0],
1470                               &dispatch_ctr,
1471                               tid );
1472          vg_assert(trc[0] != VEX_TRC_JMP_NOREDIR);
1473
1474          /* This can't be allowed to happen, since it means the block
1475             didn't execute, and we have no way to resume-as-noredir
1476             after we get more timeslice.  But I don't think it ever
1477             can, since handle_noredir_jump will assert if the counter
1478             is zero on entry. */
1479          vg_assert(trc[0] != VG_TRC_INNER_COUNTERZERO);
1480          /* This asserts the same thing. */
1481          vg_assert(dispatch_ctr >= 0);
1482
1483          /* A no-redir translation can't return with a chain-me
1484             request, since chaining in the no-redir cache is too
1485             complex. */
1486          vg_assert(trc[0] != VG_TRC_CHAIN_ME_TO_SLOW_EP
1487                    && trc[0] != VG_TRC_CHAIN_ME_TO_FAST_EP);
1488       }
1489
1490       switch (trc[0]) {
1491       case VEX_TRC_JMP_BORING:
1492          /* assisted dispatch, no event.  Used by no-redir
1493             translations to force return to the scheduler. */
1494       case VG_TRC_BORING:
1495          /* no special event, just keep going. */
1496          break;
1497
1498       case VG_TRC_INNER_FASTMISS:
1499          vg_assert(dispatch_ctr >= 0);
1500          handle_tt_miss(tid);
1501          break;
1502
1503       case VG_TRC_CHAIN_ME_TO_SLOW_EP: {
1504          if (0) VG_(printf)("sched: CHAIN_TO_SLOW_EP: %p\n", (void*)trc[1] );
1505          handle_chain_me(tid, (void*)trc[1], False);
1506          break;
1507       }
1508
1509       case VG_TRC_CHAIN_ME_TO_FAST_EP: {
1510          if (0) VG_(printf)("sched: CHAIN_TO_FAST_EP: %p\n", (void*)trc[1] );
1511          handle_chain_me(tid, (void*)trc[1], True);
1512          break;
1513       }
1514
1515       case VEX_TRC_JMP_CLIENTREQ:
1516          do_client_request(tid);
1517          break;
1518
1519       case VEX_TRC_JMP_SYS_INT128:  /* x86-linux */
1520       case VEX_TRC_JMP_SYS_INT129:  /* x86-darwin */
1521       case VEX_TRC_JMP_SYS_INT130:  /* x86-darwin */
1522       case VEX_TRC_JMP_SYS_INT145:  /* x86-solaris */
1523       case VEX_TRC_JMP_SYS_INT210:  /* x86-solaris */
1524       /* amd64-linux, ppc32-linux, amd64-darwin, amd64-solaris */
1525       case VEX_TRC_JMP_SYS_SYSCALL:
1526          handle_syscall(tid, trc[0]);
1527          if (VG_(clo_sanity_level) > 2)
1528             VG_(sanity_check_general)(True); /* sanity-check every syscall */
1529          break;
1530
1531       case VEX_TRC_JMP_YIELD:
1532          /* Explicit yield, because this thread is in a spin-lock
1533             or something.  Only let the thread run for a short while
1534             longer.  Because swapping to another thread is expensive,
1535             we're prepared to let this thread eat a little more CPU
1536             before swapping to another.  That means that short term
1537             spins waiting for hardware to poke memory won't cause a
1538             thread swap. */
1539          if (dispatch_ctr > 300)
1540             dispatch_ctr = 300;
1541          break;
1542
1543       case VG_TRC_INNER_COUNTERZERO:
1544          /* Timeslice is out.  Let a new thread be scheduled. */
1545          vg_assert(dispatch_ctr == 0);
1546          break;
1547
1548       case VG_TRC_FAULT_SIGNAL:
1549          /* Everything should be set up (either we're exiting, or
1550             about to start in a signal handler). */
1551          break;
1552
1553       case VEX_TRC_JMP_MAPFAIL:
1554          /* Failure of arch-specific address translation (x86/amd64
1555             segment override use) */
1556          /* jrs 2005 03 11: is this correct? */
1557          VG_(synth_fault)(tid);
1558          break;
1559
1560       case VEX_TRC_JMP_EMWARN: {
1561          static Int  counts[EmNote_NUMBER];
1562          static Bool counts_initted = False;
1563          VexEmNote ew;
1564          const HChar* what;
1565          Bool      show;
1566          Int       q;
1567          if (!counts_initted) {
1568             counts_initted = True;
1569             for (q = 0; q < EmNote_NUMBER; q++)
1570                counts[q] = 0;
1571          }
1572          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1573          what = (ew < 0 || ew >= EmNote_NUMBER)
1574                    ? "unknown (?!)"
1575                    : LibVEX_EmNote_string(ew);
1576          show = (ew < 0 || ew >= EmNote_NUMBER)
1577                    ? True
1578                    : counts[ew]++ < 3;
1579          if (show && VG_(clo_show_emwarns) && !VG_(clo_xml)) {
1580             VG_(message)( Vg_UserMsg,
1581                           "Emulation warning: unsupported action:\n");
1582             VG_(message)( Vg_UserMsg, "  %s\n", what);
1583             VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1584          }
1585          break;
1586       }
1587
1588       case VEX_TRC_JMP_EMFAIL: {
1589          VexEmNote ew;
1590          const HChar* what;
1591          ew   = (VexEmNote)VG_(threads)[tid].arch.vex.guest_EMNOTE;
1592          what = (ew < 0 || ew >= EmNote_NUMBER)
1593                    ? "unknown (?!)"
1594                    : LibVEX_EmNote_string(ew);
1595          VG_(message)( Vg_UserMsg,
1596                        "Emulation fatal error -- Valgrind cannot continue:\n");
1597          VG_(message)( Vg_UserMsg, "  %s\n", what);
1598          VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1599          VG_(message)(Vg_UserMsg, "\n");
1600          VG_(message)(Vg_UserMsg, "Valgrind has to exit now.  Sorry.\n");
1601          VG_(message)(Vg_UserMsg, "\n");
1602          VG_(exit)(1);
1603          break;
1604       }
1605
1606       case VEX_TRC_JMP_SIGILL:
1607          VG_(synth_sigill)(tid, VG_(get_IP)(tid));
1608          break;
1609
1610       case VEX_TRC_JMP_SIGTRAP:
1611          VG_(synth_sigtrap)(tid);
1612          break;
1613
1614       case VEX_TRC_JMP_SIGSEGV:
1615          VG_(synth_fault)(tid);
1616          break;
1617
1618       case VEX_TRC_JMP_SIGBUS:
1619          VG_(synth_sigbus)(tid);
1620          break;
1621
1622       case VEX_TRC_JMP_SIGFPE:
1623          VG_(synth_sigfpe)(tid, 0);
1624          break;
1625
1626       case VEX_TRC_JMP_SIGFPE_INTDIV:
1627          VG_(synth_sigfpe)(tid, VKI_FPE_INTDIV);
1628          break;
1629
1630       case VEX_TRC_JMP_SIGFPE_INTOVF:
1631          VG_(synth_sigfpe)(tid, VKI_FPE_INTOVF);
1632          break;
1633
1634       case VEX_TRC_JMP_NODECODE: {
1635          Addr addr = VG_(get_IP)(tid);
1636
1637          if (VG_(clo_sigill_diag)) {
1638             VG_(umsg)(
1639                "valgrind: Unrecognised instruction at address %#lx.\n", addr);
1640             VG_(get_and_pp_StackTrace)(tid, VG_(clo_backtrace_size));
1641 #        define M(a) VG_(umsg)(a "\n");
1642          M("Your program just tried to execute an instruction that Valgrind" );
1643          M("did not recognise.  There are two possible reasons for this."    );
1644          M("1. Your program has a bug and erroneously jumped to a non-code"  );
1645          M("   location.  If you are running Memcheck and you just saw a"    );
1646          M("   warning about a bad jump, it's probably your program's fault.");
1647          M("2. The instruction is legitimate but Valgrind doesn't handle it,");
1648          M("   i.e. it's Valgrind's fault.  If you think this is the case or");
1649          M("   you are not sure, please let us know and we'll try to fix it.");
1650          M("Either way, Valgrind will now raise a SIGILL signal which will"  );
1651          M("probably kill your program."                                     );
1652 #        undef M
1653          }
1654 #        if defined(VGA_s390x)
1655          /* Now that the complaint is out we need to adjust the guest_IA. The
1656             reason is that -- after raising the exception -- execution will
1657             continue with the insn that follows the invalid insn. As the first
1658             2 bits of the invalid insn determine its length in the usual way,
1659             we can compute the address of the next insn here and adjust the
1660             guest_IA accordingly. This adjustment is essential and tested by
1661             none/tests/s390x/op_exception.c (which would loop forever
1662             otherwise) */
1663          UChar byte = ((UChar *)addr)[0];
1664          UInt  insn_length = ((((byte >> 6) + 1) >> 1) + 1) << 1;
1665          Addr  next_insn_addr = addr + insn_length;
1666          VG_(set_IP)(tid, next_insn_addr);
1667 #        endif
1668          VG_(synth_sigill)(tid, addr);
1669          break;
1670       }
1671
1672       case VEX_TRC_JMP_INVALICACHE:
1673          VG_(discard_translations)(
1674             (Addr)VG_(threads)[tid].arch.vex.guest_CMSTART,
1675             VG_(threads)[tid].arch.vex.guest_CMLEN,
1676             "scheduler(VEX_TRC_JMP_INVALICACHE)"
1677          );
1678          if (0)
1679             VG_(printf)("dump translations done.\n");
1680          break;
1681
1682       case VEX_TRC_JMP_FLUSHDCACHE: {
1683          void* start = (void*)(Addr)VG_(threads)[tid].arch.vex.guest_CMSTART;
1684          SizeT len   = VG_(threads)[tid].arch.vex.guest_CMLEN;
1685          VG_(debugLog)(2, "sched", "flush_dcache(%p, %lu)\n", start, len);
1686          VG_(flush_dcache)(start, len);
1687          break;
1688       }
1689
1690       case VG_TRC_INVARIANT_FAILED:
1691          /* This typically happens if, after running generated code,
1692             it is detected that host CPU settings (eg, FPU/Vector
1693             control words) are not as they should be.  Vex's code
1694             generation specifies the state such control words should
1695             be in on entry to Vex-generated code, and they should be
1696             unchanged on exit from it.  Failure of this assertion
1697             usually means a bug in Vex's code generation. */
1698          //{ UInt xx;
1699          //  __asm__ __volatile__ (
1700          //     "\t.word 0xEEF12A10\n"  // fmrx r2,fpscr
1701          //     "\tmov %0, r2" : "=r"(xx) : : "r2" );
1702          //  VG_(printf)("QQQQ new fpscr = %08x\n", xx);
1703          //}
1704          vg_assert2(0, "VG_(scheduler), phase 3: "
1705                        "run_innerloop detected host "
1706                        "state invariant failure", trc);
1707
1708       case VEX_TRC_JMP_SYS_SYSENTER:
1709          /* Do whatever simulation is appropriate for an x86 sysenter
1710             instruction.  Note that it is critical to set this thread's
1711             guest_EIP to point at the code to execute after the
1712             sysenter, since Vex-generated code will not have set it --
1713             vex does not know what it should be.  Vex sets the next
1714             address to zero, so if you don't set guest_EIP, the thread
1715             will jump to zero afterwards and probably die as a result. */
1716 #        if defined(VGP_x86_linux)
1717          vg_assert2(0, "VG_(scheduler), phase 3: "
1718                        "sysenter_x86 on x86-linux is not supported");
1719 #        elif defined(VGP_x86_darwin) || defined(VGP_x86_solaris)
1720          /* return address in client edx */
1721          VG_(threads)[tid].arch.vex.guest_EIP
1722             = VG_(threads)[tid].arch.vex.guest_EDX;
1723          handle_syscall(tid, trc[0]);
1724 #        else
1725          vg_assert2(0, "VG_(scheduler), phase 3: "
1726                        "sysenter_x86 on non-x86 platform?!?!");
1727 #        endif
1728          break;
1729
1730       default:
1731          vg_assert2(0, "VG_(scheduler), phase 3: "
1732                        "unexpected thread return code (%u)", trc[0]);
1733          /* NOTREACHED */
1734          break;
1735
1736       } /* switch (trc) */
1737
1738       if (UNLIKELY(VG_(clo_profyle_sbs)) && VG_(clo_profyle_interval) > 0)
1739          maybe_show_sb_profile();
1740    }
1741
1742    if (VG_(clo_trace_sched))
1743       print_sched_event(tid, "exiting VG_(scheduler)");
1744
1745    vg_assert(VG_(is_exiting)(tid));
1746
1747    return tst->exitreason;
1748 }
1749
1750
1751 void VG_(nuke_all_threads_except) ( ThreadId me, VgSchedReturnCode src )
1752 {
1753    ThreadId tid;
1754
1755    vg_assert(VG_(is_running_thread)(me));
1756
1757    for (tid = 1; tid < VG_N_THREADS; tid++) {
1758       if (tid == me
1759           || VG_(threads)[tid].status == VgTs_Empty)
1760          continue;
1761       if (0)
1762          VG_(printf)(
1763             "VG_(nuke_all_threads_except): nuking tid %u\n", tid);
1764
1765       VG_(threads)[tid].exitreason = src;
1766       if (src == VgSrc_FatalSig)
1767          VG_(threads)[tid].os_state.fatalsig = VKI_SIGKILL;
1768       VG_(get_thread_out_of_syscall)(tid);
1769    }
1770 }
1771
1772
1773 /* ---------------------------------------------------------------------
1774    Specifying shadow register values
1775    ------------------------------------------------------------------ */
1776
1777 #if defined(VGA_x86)
1778 #  define VG_CLREQ_ARGS       guest_EAX
1779 #  define VG_CLREQ_RET        guest_EDX
1780 #elif defined(VGA_amd64)
1781 #  define VG_CLREQ_ARGS       guest_RAX
1782 #  define VG_CLREQ_RET        guest_RDX
1783 #elif defined(VGA_ppc32) || defined(VGA_ppc64be) || defined(VGA_ppc64le)
1784 #  define VG_CLREQ_ARGS       guest_GPR4
1785 #  define VG_CLREQ_RET        guest_GPR3
1786 #elif defined(VGA_arm)
1787 #  define VG_CLREQ_ARGS       guest_R4
1788 #  define VG_CLREQ_RET        guest_R3
1789 #elif defined(VGA_arm64)
1790 #  define VG_CLREQ_ARGS       guest_X4
1791 #  define VG_CLREQ_RET        guest_X3
1792 #elif defined (VGA_s390x)
1793 #  define VG_CLREQ_ARGS       guest_r2
1794 #  define VG_CLREQ_RET        guest_r3
1795 #elif defined(VGA_mips32) || defined(VGA_mips64) || defined(VGA_nanomips)
1796 #  define VG_CLREQ_ARGS       guest_r12
1797 #  define VG_CLREQ_RET        guest_r11
1798 #else
1799 #  error Unknown arch
1800 #endif
1801
1802 #define CLREQ_ARGS(regs)   ((regs).vex.VG_CLREQ_ARGS)
1803 #define CLREQ_RET(regs)    ((regs).vex.VG_CLREQ_RET)
1804 #define O_CLREQ_RET        (offsetof(VexGuestArchState, VG_CLREQ_RET))
1805
1806 // These macros write a value to a client's thread register, and tell the
1807 // tool that it's happened (if necessary).
1808
1809 #define SET_CLREQ_RETVAL(zztid, zzval) \
1810    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1811         VG_TRACK( post_reg_write, \
1812                   Vg_CoreClientReq, zztid, O_CLREQ_RET, sizeof(UWord)); \
1813    } while (0)
1814
1815 #define SET_CLCALL_RETVAL(zztid, zzval, f) \
1816    do { CLREQ_RET(VG_(threads)[zztid].arch) = (zzval); \
1817         VG_TRACK( post_reg_write_clientcall_return, \
1818                   zztid, O_CLREQ_RET, sizeof(UWord), f); \
1819    } while (0)
1820
1821
1822 /* ---------------------------------------------------------------------
1823    Handle client requests.
1824    ------------------------------------------------------------------ */
1825
1826 // OS-specific(?) client requests
1827 static Bool os_client_request(ThreadId tid, UWord *args)
1828 {
1829    Bool handled = True;
1830
1831    vg_assert(VG_(is_running_thread)(tid));
1832
1833    switch(args[0]) {
1834    case VG_USERREQ__FREERES_DONE:
1835       /* This is equivalent to an exit() syscall, but we don't set the
1836          exitcode (since it might already be set) */
1837       if (0 || VG_(clo_trace_syscalls) || VG_(clo_trace_sched))
1838          VG_(message)(Vg_DebugMsg,
1839                       "__gnu_cxx::__freeres() and __libc_freeres() wrapper "
1840                       "done; really quitting!\n");
1841       VG_(threads)[tid].exitreason = VgSrc_ExitThread;
1842       break;
1843
1844    default:
1845       handled = False;
1846       break;
1847    }
1848
1849    return handled;
1850 }
1851
1852
1853 /* Write out a client message, possibly including a back trace. Return
1854    the number of characters written. In case of XML output, the format
1855    string as well as any arguments it requires will be XML'ified.
1856    I.e. special characters such as the angle brackets will be translated
1857    into proper escape sequences. */
1858 static
1859 Int print_client_message( ThreadId tid, const HChar *format,
1860                           va_list *vargsp, Bool include_backtrace)
1861 {
1862    Int count;
1863
1864    if (VG_(clo_xml)) {
1865       /* Translate the format string as follows:
1866          <  -->  &lt;
1867          >  -->  &gt;
1868          &  -->  &amp;
1869          %s -->  %pS
1870          Yes, yes, it's simplified but in synch with
1871          myvprintf_str_XML_simplistic and VG_(debugLog_vprintf).
1872       */
1873
1874       /* Allocate a buffer that is for sure large enough. */
1875       HChar xml_format[VG_(strlen)(format) * 5 + 1];
1876
1877       const HChar *p;
1878       HChar *q = xml_format;
1879
1880       for (p = format; *p; ++p) {
1881          switch (*p) {
1882          case '<': VG_(strcpy)(q, "&lt;");  q += 4; break;
1883          case '>': VG_(strcpy)(q, "&gt;");  q += 4; break;
1884          case '&': VG_(strcpy)(q, "&amp;"); q += 5; break;
1885          case '%':
1886             /* Careful: make sure %%s stays %%s */
1887             *q++ = *p++;
1888             if (*p == 's') {
1889               *q++ = 'p';
1890               *q++ = 'S';
1891             } else {
1892               *q++ = *p;
1893             }
1894             break;
1895
1896          default:
1897             *q++ = *p;
1898             break;
1899          }
1900       }
1901       *q = '\0';
1902
1903       VG_(printf_xml)( "<clientmsg>\n" );
1904       VG_(printf_xml)( "  <tid>%u</tid>\n", tid );
1905       const ThreadState *tst = VG_(get_ThreadState)(tid);
1906       if (tst->thread_name)
1907          VG_(printf_xml)("  <threadname>%s</threadname>\n", tst->thread_name);
1908       VG_(printf_xml)( "  <text>" );
1909       count = VG_(vprintf_xml)( xml_format, *vargsp );
1910       VG_(printf_xml)( "  </text>\n" );
1911    } else {
1912       count = VG_(vmessage)( Vg_ClientMsg, format, *vargsp );
1913       VG_(message_flush)();
1914    }
1915
1916    if (include_backtrace)
1917       VG_(get_and_pp_StackTrace)( tid, VG_(clo_backtrace_size) );
1918
1919    if (VG_(clo_xml))
1920       VG_(printf_xml)( "</clientmsg>\n" );
1921
1922    return count;
1923 }
1924
1925
1926 /* Do a client request for the thread tid.  After the request, tid may
1927    or may not still be runnable; if not, the scheduler will have to
1928    choose a new thread to run.
1929 */
1930 static
1931 void do_client_request ( ThreadId tid )
1932 {
1933    UWord* arg = (UWord*)(Addr)(CLREQ_ARGS(VG_(threads)[tid].arch));
1934    UWord req_no = arg[0];
1935
1936    if (0)
1937       VG_(printf)("req no = 0x%lx, arg = %p\n", req_no, arg);
1938    switch (req_no) {
1939
1940       case VG_USERREQ__CLIENT_CALL0: {
1941          UWord (*f)(ThreadId) = (__typeof__(f))arg[1];
1942          if (f == NULL)
1943             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL0: func=%p\n", f);
1944          else
1945             SET_CLCALL_RETVAL(tid, f ( tid ), (Addr)f);
1946          break;
1947       }
1948       case VG_USERREQ__CLIENT_CALL1: {
1949          UWord (*f)(ThreadId, UWord) = (__typeof__(f))arg[1];
1950          if (f == NULL)
1951             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL1: func=%p\n", f);
1952          else
1953             SET_CLCALL_RETVAL(tid, f ( tid, arg[2] ), (Addr)f );
1954          break;
1955       }
1956       case VG_USERREQ__CLIENT_CALL2: {
1957          UWord (*f)(ThreadId, UWord, UWord) = (__typeof__(f))arg[1];
1958          if (f == NULL)
1959             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL2: func=%p\n", f);
1960          else
1961             SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3] ), (Addr)f );
1962          break;
1963       }
1964       case VG_USERREQ__CLIENT_CALL3: {
1965          UWord (*f)(ThreadId, UWord, UWord, UWord) = (__typeof__(f))arg[1];
1966          if (f == NULL)
1967             VG_(message)(Vg_DebugMsg, "VG_USERREQ__CLIENT_CALL3: func=%p\n", f);
1968          else
1969             SET_CLCALL_RETVAL(tid, f ( tid, arg[2], arg[3], arg[4] ), (Addr)f );
1970          break;
1971       }
1972
1973       // Nb: this looks like a circular definition, because it kind of is.
1974       // See comment in valgrind.h to understand what's going on.
1975       case VG_USERREQ__RUNNING_ON_VALGRIND:
1976          SET_CLREQ_RETVAL(tid, RUNNING_ON_VALGRIND+1);
1977          break;
1978
1979       case VG_USERREQ__PRINTF: {
1980          const HChar* format = (HChar *)arg[1];
1981          /* JRS 2010-Jan-28: this is DEPRECATED; use the
1982             _VALIST_BY_REF version instead */
1983          if (sizeof(va_list) != sizeof(UWord))
1984             goto va_list_casting_error_NORETURN;
1985          union {
1986             va_list vargs;
1987             unsigned long uw;
1988          } u;
1989          u.uw = (unsigned long)arg[2];
1990          Int count =
1991             print_client_message( tid, format, &u.vargs,
1992                                   /* include_backtrace */ False );
1993          SET_CLREQ_RETVAL( tid, count );
1994          break;
1995       }
1996
1997       case VG_USERREQ__PRINTF_BACKTRACE: {
1998          const HChar* format = (HChar *)arg[1];
1999          /* JRS 2010-Jan-28: this is DEPRECATED; use the
2000             _VALIST_BY_REF version instead */
2001          if (sizeof(va_list) != sizeof(UWord))
2002             goto va_list_casting_error_NORETURN;
2003          union {
2004             va_list vargs;
2005             unsigned long uw;
2006          } u;
2007          u.uw = (unsigned long)arg[2];
2008          Int count =
2009             print_client_message( tid, format, &u.vargs,
2010                                   /* include_backtrace */ True );
2011          SET_CLREQ_RETVAL( tid, count );
2012          break;
2013       }
2014
2015       case VG_USERREQ__PRINTF_VALIST_BY_REF: {
2016          const HChar* format = (HChar *)arg[1];
2017          va_list* vargsp = (va_list*)arg[2];
2018          Int count =
2019             print_client_message( tid, format, vargsp,
2020                                   /* include_backtrace */ False );
2021
2022          SET_CLREQ_RETVAL( tid, count );
2023          break;
2024       }
2025
2026       case VG_USERREQ__PRINTF_BACKTRACE_VALIST_BY_REF: {
2027          const HChar* format = (HChar *)arg[1];
2028          va_list* vargsp = (va_list*)arg[2];
2029          Int count =
2030             print_client_message( tid, format, vargsp,
2031                                   /* include_backtrace */ True );
2032          SET_CLREQ_RETVAL( tid, count );
2033          break;
2034       }
2035
2036       case VG_USERREQ__INTERNAL_PRINTF_VALIST_BY_REF: {
2037          va_list* vargsp = (va_list*)arg[2];
2038          Int count =
2039             VG_(vmessage)( Vg_DebugMsg, (HChar *)arg[1], *vargsp );
2040          VG_(message_flush)();
2041          SET_CLREQ_RETVAL( tid, count );
2042          break;
2043       }
2044
2045       case VG_USERREQ__ADD_IFUNC_TARGET: {
2046          VG_(redir_add_ifunc_target)( arg[1], arg[2] );
2047          SET_CLREQ_RETVAL( tid, 0);
2048          break; }
2049
2050       case VG_USERREQ__STACK_REGISTER: {
2051          UWord sid = VG_(register_stack)((Addr)arg[1], (Addr)arg[2]);
2052          SET_CLREQ_RETVAL( tid, sid );
2053          VG_TRACK(register_stack, (Addr)arg[1], (Addr)arg[2]);
2054          break; }
2055
2056       case VG_USERREQ__STACK_DEREGISTER: {
2057          VG_(deregister_stack)(arg[1]);
2058          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2059          break; }
2060
2061       case VG_USERREQ__STACK_CHANGE: {
2062          VG_(change_stack)(arg[1], (Addr)arg[2], (Addr)arg[3]);
2063          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2064          break; }
2065
2066       case VG_USERREQ__GET_MALLOCFUNCS: {
2067          struct vg_mallocfunc_info *info = (struct vg_mallocfunc_info *)arg[1];
2068
2069          info->tl_malloc               = VG_(tdict).tool_malloc;
2070          info->tl_calloc               = VG_(tdict).tool_calloc;
2071          info->tl_realloc              = VG_(tdict).tool_realloc;
2072          info->tl_memalign             = VG_(tdict).tool_memalign;
2073          info->tl___builtin_new        = VG_(tdict).tool___builtin_new;
2074          info->tl___builtin_vec_new    = VG_(tdict).tool___builtin_vec_new;
2075          info->tl_free                 = VG_(tdict).tool_free;
2076          info->tl___builtin_delete     = VG_(tdict).tool___builtin_delete;
2077          info->tl___builtin_vec_delete = VG_(tdict).tool___builtin_vec_delete;
2078          info->tl_malloc_usable_size   = VG_(tdict).tool_malloc_usable_size;
2079
2080          info->mallinfo                = VG_(mallinfo);
2081          info->clo_trace_malloc        = VG_(clo_trace_malloc);
2082
2083          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2084
2085          break;
2086       }
2087
2088       /* Requests from the client program */
2089
2090       case VG_USERREQ__DISCARD_TRANSLATIONS:
2091          if (VG_(clo_verbosity) > 2)
2092             VG_(printf)( "client request: DISCARD_TRANSLATIONS,"
2093                          " addr %p,  len %lu\n",
2094                          (void*)arg[1], arg[2] );
2095
2096          VG_(discard_translations)(
2097             arg[1], arg[2], "scheduler(VG_USERREQ__DISCARD_TRANSLATIONS)"
2098          );
2099
2100          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2101          break;
2102
2103       case VG_USERREQ__INNER_THREADS:
2104          if (VG_(clo_verbosity) > 2)
2105             VG_(printf)( "client request: INNER_THREADS,"
2106                          " addr %p\n",
2107                          (void*)arg[1] );
2108          VG_(inner_threads) = (ThreadState*)arg[1];
2109          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2110          break;
2111
2112       case VG_USERREQ__COUNT_ERRORS:
2113          SET_CLREQ_RETVAL( tid, VG_(get_n_errs_found)() );
2114          break;
2115
2116       case VG_USERREQ__CLO_CHANGE:
2117          VG_(process_dynamic_option) (cloD, (HChar *)arg[1]);
2118          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2119          break;
2120
2121       case VG_USERREQ__LOAD_PDB_DEBUGINFO:
2122          VG_(di_notify_pdb_debuginfo)( arg[1], arg[2], arg[3], arg[4] );
2123          SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2124          break;
2125
2126       case VG_USERREQ__MAP_IP_TO_SRCLOC: {
2127          Addr   ip    = arg[1];
2128          HChar* buf64 = (HChar*)arg[2];  // points to a HChar [64] array
2129          const HChar *buf;  // points to a string of unknown size
2130
2131          VG_(memset)(buf64, 0, 64);
2132          UInt linenum = 0;
2133
2134          // Unless the guest would become epoch aware (and would need to
2135          // describe IP addresses of dlclosed libs), using cur_ep is a
2136          // reasonable choice.
2137          const DiEpoch cur_ep = VG_(current_DiEpoch)();
2138
2139          Bool ok = VG_(get_filename_linenum)(
2140                       cur_ep, ip, &buf, NULL, &linenum
2141                    );
2142          if (ok) {
2143             /* For backward compatibility truncate the filename to
2144                49 characters. */
2145             VG_(strncpy)(buf64, buf, 50);
2146             buf64[49] = '\0';
2147             UInt i;
2148             for (i = 0; i < 50; i++) {
2149                if (buf64[i] == 0)
2150                   break;
2151             }
2152             VG_(sprintf)(buf64+i, ":%u", linenum);  // safe
2153          } else {
2154             buf64[0] = 0;
2155          }
2156
2157          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2158          break;
2159       }
2160
2161       case VG_USERREQ__CHANGE_ERR_DISABLEMENT: {
2162          Word delta = arg[1];
2163          vg_assert(delta == 1 || delta == -1);
2164          ThreadState* tst = VG_(get_ThreadState)(tid);
2165          vg_assert(tst);
2166          if (delta == 1 && tst->err_disablement_level < 0xFFFFFFFF) {
2167             tst->err_disablement_level++;
2168          }
2169          else
2170          if (delta == -1 && tst->err_disablement_level > 0) {
2171             tst->err_disablement_level--;
2172          }
2173          SET_CLREQ_RETVAL( tid, 0 ); /* return value is meaningless */
2174          break;
2175       }
2176
2177       case VG_USERREQ__GDB_MONITOR_COMMAND: {
2178          UWord ret;
2179          ret = (UWord) VG_(client_monitor_command) ((HChar*)arg[1]);
2180          SET_CLREQ_RETVAL(tid, ret);
2181          break;
2182       }
2183
2184       case VG_USERREQ__MALLOCLIKE_BLOCK:
2185       case VG_USERREQ__RESIZEINPLACE_BLOCK:
2186       case VG_USERREQ__FREELIKE_BLOCK:
2187          // Ignore them if the addr is NULL;  otherwise pass onto the tool.
2188          if (!arg[1]) {
2189             SET_CLREQ_RETVAL( tid, 0 );     /* return value is meaningless */
2190             break;
2191          } else {
2192             goto my_default;
2193          }
2194
2195       case VG_USERREQ__VEX_INIT_FOR_IRI:
2196          LibVEX_InitIRI ( (IRICB *)arg[1] );
2197          break;
2198
2199       default:
2200        my_default:
2201          if (os_client_request(tid, arg)) {
2202             // do nothing, os_client_request() handled it
2203          } else if (VG_(needs).client_requests) {
2204             UWord ret;
2205
2206             if (VG_(clo_verbosity) > 2)
2207                VG_(printf)("client request: code %lx,  addr %p,  len %lu\n",
2208                            arg[0], (void*)arg[1], arg[2] );
2209
2210             if ( VG_TDICT_CALL(tool_handle_client_request, tid, arg, &ret) )
2211                SET_CLREQ_RETVAL(tid, ret);
2212          } else {
2213             static Bool whined = False;
2214
2215             if (!whined && VG_(clo_verbosity) > 2) {
2216                // Allow for requests in core, but defined by tools, which
2217                // have 0 and 0 in their two high bytes.
2218                HChar c1 = (arg[0] >> 24) & 0xff;
2219                HChar c2 = (arg[0] >> 16) & 0xff;
2220                if (c1 == 0) c1 = '_';
2221                if (c2 == 0) c2 = '_';
2222                VG_(message)(Vg_UserMsg, "Warning:\n"
2223                    "  unhandled client request: 0x%lx (%c%c+0x%lx).  Perhaps\n"
2224                    "  VG_(needs).client_requests should be set?\n",
2225                             arg[0], c1, c2, arg[0] & 0xffff);
2226                whined = True;
2227             }
2228          }
2229          break;
2230    }
2231    return;
2232
2233    /*NOTREACHED*/
2234   va_list_casting_error_NORETURN:
2235    VG_(umsg)(
2236       "Valgrind: fatal error - cannot continue: use of the deprecated\n"
2237       "client requests VG_USERREQ__PRINTF or VG_USERREQ__PRINTF_BACKTRACE\n"
2238       "on a platform where they cannot be supported.  Please use the\n"
2239       "equivalent _VALIST_BY_REF versions instead.\n"
2240       "\n"
2241       "This is a binary-incompatible change in Valgrind's client request\n"
2242       "mechanism.  It is unfortunate, but difficult to avoid.  End-users\n"
2243       "are expected to almost never see this message.  The only case in\n"
2244       "which you might see this message is if your code uses the macros\n"
2245       "VALGRIND_PRINTF or VALGRIND_PRINTF_BACKTRACE.  If so, you will need\n"
2246       "to recompile such code, using the header files from this version of\n"
2247       "Valgrind, and not any previous version.\n"
2248       "\n"
2249       "If you see this message in any other circumstances, it is probably\n"
2250       "a bug in Valgrind.  In this case, please file a bug report at\n"
2251       "\n"
2252       "   http://www.valgrind.org/support/bug_reports.html\n"
2253       "\n"
2254       "Will now abort.\n"
2255    );
2256    vg_assert(0);
2257 }
2258
2259
2260 /* ---------------------------------------------------------------------
2261    Sanity checking (permanently engaged)
2262    ------------------------------------------------------------------ */
2263
2264 /* Internal consistency checks on the sched structures. */
2265 static
2266 void scheduler_sanity ( ThreadId tid )
2267 {
2268    Bool bad = False;
2269    Int lwpid = VG_(gettid)();
2270
2271    if (!VG_(is_running_thread)(tid)) {
2272       VG_(message)(Vg_DebugMsg,
2273                    "Thread %u is supposed to be running, "
2274                    "but doesn't own the_BigLock (owned by %u)\n",
2275                    tid, VG_(running_tid));
2276       bad = True;
2277    }
2278
2279    if (lwpid != VG_(threads)[tid].os_state.lwpid) {
2280       VG_(message)(Vg_DebugMsg,
2281                    "Thread %u supposed to be in LWP %d, but we're actually %d\n",
2282                    tid, VG_(threads)[tid].os_state.lwpid, VG_(gettid)());
2283       bad = True;
2284    }
2285
2286    if (lwpid != ML_(get_sched_lock_owner)(the_BigLock)) {
2287       VG_(message)(Vg_DebugMsg,
2288                    "Thread (LWPID) %u doesn't own the_BigLock\n",
2289                    tid);
2290       bad = True;
2291    }
2292
2293    if (0) {
2294       /* Periodically show the state of all threads, for debugging
2295          purposes. */
2296       static UInt lasttime = 0;
2297       UInt now;
2298       now = VG_(read_millisecond_timer)();
2299       if ((!bad) && (lasttime + 4000/*ms*/ <= now)) {
2300          lasttime = now;
2301          VG_(printf)("\n------------ Sched State at %d ms ------------\n",
2302                      (Int)now);
2303          VG_(show_sched_status)(True,  // host_stacktrace
2304                                 True,  // stack_usage
2305                                 True); // exited_threads);
2306       }
2307    }
2308
2309    /* core_panic also shows the sched status, which is why we don't
2310       show it above if bad==True. */
2311    if (bad)
2312       VG_(core_panic)("scheduler_sanity: failed");
2313 }
2314
2315 void VG_(sanity_check_general) ( Bool force_expensive )
2316 {
2317    ThreadId tid;
2318
2319    static UInt next_slow_check_at = 1;
2320    static UInt slow_check_interval = 25;
2321
2322    if (VG_(clo_sanity_level) < 1) return;
2323
2324    /* --- First do all the tests that we can do quickly. ---*/
2325
2326    sanity_fast_count++;
2327
2328    /* Check stuff pertaining to the memory check system. */
2329
2330    /* Check that nobody has spuriously claimed that the first or
2331       last 16 pages of memory have become accessible [...] */
2332    if (VG_(needs).sanity_checks) {
2333       vg_assert(VG_TDICT_CALL(tool_cheap_sanity_check));
2334    }
2335
2336    /* --- Now some more expensive checks. ---*/
2337
2338    /* Once every now and again, check some more expensive stuff.
2339       Gradually increase the interval between such checks so as not to
2340       burden long-running programs too much. */
2341    if ( force_expensive
2342         || VG_(clo_sanity_level) > 1
2343         || (VG_(clo_sanity_level) == 1
2344             && sanity_fast_count == next_slow_check_at)) {
2345
2346       if (0) VG_(printf)("SLOW at %u\n", sanity_fast_count-1);
2347
2348       next_slow_check_at = sanity_fast_count - 1 + slow_check_interval;
2349       slow_check_interval++;
2350       sanity_slow_count++;
2351
2352       if (VG_(needs).sanity_checks) {
2353           vg_assert(VG_TDICT_CALL(tool_expensive_sanity_check));
2354       }
2355
2356       /* Look for stack overruns.  Visit all threads. */
2357       for (tid = 1; tid < VG_N_THREADS; tid++) {
2358          SizeT    remains;
2359          VgStack* stack;
2360
2361          if (VG_(threads)[tid].status == VgTs_Empty ||
2362              VG_(threads)[tid].status == VgTs_Zombie)
2363             continue;
2364
2365          stack
2366             = (VgStack*)
2367               VG_(get_ThreadState)(tid)->os_state.valgrind_stack_base;
2368          SizeT limit
2369             = 4096; // Let's say.  Checking more causes lots of L2 misses.
2370          remains
2371             = VG_(am_get_VgStack_unused_szB)(stack, limit);
2372          if (remains < limit)
2373             VG_(message)(Vg_DebugMsg,
2374                          "WARNING: Thread %u is within %lu bytes "
2375                          "of running out of valgrind stack!\n"
2376                          "Valgrind stack size can be increased "
2377                          "using --valgrind-stacksize=....\n",
2378                          tid, remains);
2379       }
2380    }
2381
2382    if (VG_(clo_sanity_level) > 1) {
2383       /* Check sanity of the low-level memory manager.  Note that bugs
2384          in the client's code can cause this to fail, so we don't do
2385          this check unless specially asked for.  And because it's
2386          potentially very expensive. */
2387       VG_(sanity_check_malloc_all)();
2388    }
2389 }
2390
2391 /*--------------------------------------------------------------------*/
2392 /*--- end                                                          ---*/
2393 /*--------------------------------------------------------------------*/