usr/src/cmd/mdb/common/kmdb/kaif_start.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
  23  * Use is subject to license terms.
  24  */
  25
  26 #pragma ident   "%Z%%M% %I%     %E% SMI"
  27
  28 /*
  29  * The main CPU-control loops, used to control masters and slaves.
  30  */
  31
  32 #include <sys/types.h>
  33
  34 #include <kmdb/kaif.h>
  35 #include <kmdb/kaif_start.h>
  36 #include <kmdb/kmdb_asmutil.h>
  37 #include <kmdb/kmdb_dpi_impl.h>
  38 #include <kmdb/kmdb_kdi.h>
  39
  40 #define KAIF_SLAVE_CMD_SPIN     0
  41 #define KAIF_SLAVE_CMD_SWITCH   1
  42 #define KAIF_SLAVE_CMD_RESUME   2
  43 #define KAIF_SLAVE_CMD_FLUSH    3
  44 #define KAIF_SLAVE_CMD_REBOOT   4
  45
  46
  47 /*
  48  * Used to synchronize attempts to set kaif_master_cpuid.  kaif_master_cpuid may
  49  * be read without kaif_master_lock, and may be written by the current master
  50  * CPU.
  51  */
  52 int kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
  53 static uintptr_t kaif_master_lock = 0;
  54
  55 /*
  56  * Used to ensure that all CPUs leave the debugger together. kaif_loop_lock must
  57  * be held to write kaif_looping, but need not be held to read it.
  58  */
  59 static volatile uint_t kaif_looping;
  60 static uintptr_t kaif_loop_lock;
  61
  62 static volatile int kaif_slave_cmd;
  63 static volatile int kaif_slave_tgt;     /* target cpuid for CMD_SWITCH */
  64
  65 static void
  66 kaif_lock_enter(uintptr_t *lock)
  67 {
  68         while (cas(lock, 0, 1) != 0)
  69                 continue;
  70         membar_producer();
  71 }
  72
  73 static void
  74 kaif_lock_exit(uintptr_t *lock)
  75 {
  76         *lock = 0;
  77         membar_producer();
  78 }
  79
  80 static void
  81 kaif_start_slaves(int cmd)
  82 {
  83         kaif_slave_cmd = cmd;
  84         kmdb_kdi_start_slaves();
  85 }
  86
  87 static int
  88 kaif_master_loop(kaif_cpusave_t *cpusave)
  89 {
  90         int notflushed, i;
  91
  92         kaif_trap_set_debugger();
  93
  94         /*
  95          * If we re-entered due to a ::switch, we need to tell the slave CPUs
  96          * to sleep again.
  97          */
  98         kmdb_kdi_stop_slaves(cpusave->krs_cpu_id, 0);
  99
 100 master_loop:
 101         switch (kmdb_dpi_reenter()) {
 102         case KMDB_DPI_CMD_SWITCH_CPU:
 103                 /*
 104                  * We assume that the target CPU is a valid slave.  There's no
 105                  * easy way to complain here, so we'll assume that the caller
 106                  * has done the proper checking.
 107                  */
 108                 if (kmdb_dpi_switch_target == cpusave->krs_cpu_id)
 109                         break;
 110
 111                 kaif_slave_tgt = kaif_master_cpuid = kmdb_dpi_switch_target;
 112                 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
 113                 membar_producer();
 114
 115                 /*
 116                  * Switch back to the saved trap table before we switch CPUs --
 117                  * we need to make sure that only one CPU is on the debugger's
 118                  * table at a time.
 119                  */
 120                 kaif_trap_set_saved(cpusave);
 121
 122                 kaif_start_slaves(KAIF_SLAVE_CMD_SWITCH);
 123
 124                 /* The new master is now awake */
 125                 return (KAIF_CPU_CMD_SWITCH);
 126
 127         case KMDB_DPI_CMD_RESUME_ALL:
 128         case KMDB_DPI_CMD_RESUME_UNLOAD:
 129                 /*
 130                  * Resume everyone, clean up for next entry.
 131                  */
 132                 kaif_master_cpuid = KAIF_MASTER_CPUID_UNSET;
 133                 membar_producer();
 134                 kaif_start_slaves(KAIF_SLAVE_CMD_RESUME);
 135
 136                 if (kmdb_dpi_work_required())
 137                         kmdb_dpi_wrintr_fire();
 138
 139                 kaif_trap_set_saved(cpusave);
 140
 141                 return (KAIF_CPU_CMD_RESUME);
 142
 143         case KMDB_DPI_CMD_RESUME_MASTER:
 144                 /*
 145                  * Single-CPU resume, which is performed on the debugger's
 146                  * trap table (so no need to switch back).
 147                  */
 148                 return (KAIF_CPU_CMD_RESUME_MASTER);
 149
 150         case KMDB_DPI_CMD_FLUSH_CACHES:
 151                 kaif_start_slaves(KAIF_SLAVE_CMD_FLUSH);
 152
 153                 /*
 154                  * Wait for the other cpus to finish flushing their caches.
 155                  */
 156                 do {
 157                         notflushed = 0;
 158                         for (i = 0; i < kaif_ncpusave; i++) {
 159                                 kaif_cpusave_t *save = &kaif_cpusave[i];
 160
 161                                 if (save->krs_cpu_state ==
 162                                     KAIF_CPU_STATE_SLAVE &&
 163                                     !save->krs_cpu_flushed) {
 164                                         notflushed++;
 165                                         break;
 166                                 }
 167                         }
 168                 } while (notflushed > 0);
 169
 170                 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
 171                 break;
 172
 173 #if defined(__i386) || defined(__amd64)
 174         case KMDB_DPI_CMD_REBOOT:
 175                 /*
 176                  * Reboot must be initiated by CPU 0.  I could ask why, but I'm
 177                  * afraid that I don't want to know the answer.
 178                  */
 179                 if (cpusave->krs_cpu_id == 0)
 180                         kmdb_kdi_reboot();
 181
 182                 kaif_start_slaves(KAIF_SLAVE_CMD_REBOOT);
 183
 184                 /*
 185                  * Spin forever, waiting for CPU 0 (apparently a slave) to
 186                  * reboot the system.
 187                  */
 188                 for (;;)
 189                         continue;
 190
 191                 /*NOTREACHED*/
 192                 break;
 193 #endif
 194         }
 195
 196         goto master_loop;
 197 }
 198
 199 static int
 200 kaif_slave_loop(kaif_cpusave_t *cpusave)
 201 {
 202         int slavecmd, rv;
 203
 204
 205         /* Wait for duty to call */
 206         for (;;) {
 207                 slavecmd = kaif_slave_cmd;
 208
 209                 if (slavecmd == KAIF_SLAVE_CMD_SWITCH &&
 210                     kaif_slave_tgt == cpusave->krs_cpu_id) {
 211                         kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
 212                         cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
 213                         rv = KAIF_CPU_CMD_SWITCH;
 214                         break;
 215
 216                 } else if (slavecmd == KAIF_SLAVE_CMD_FLUSH) {
 217                         kmdb_kdi_flush_caches();
 218                         cpusave->krs_cpu_flushed = 1;
 219                         continue;
 220
 221 #if defined(__i386) || defined(__amd64)
 222                 } else if (slavecmd == KAIF_SLAVE_CMD_REBOOT &&
 223                     cpusave->krs_cpu_id == 0) {
 224                         rv = 0;
 225                         kmdb_kdi_reboot();
 226                         break;
 227 #endif
 228
 229                 } else if (slavecmd == KAIF_SLAVE_CMD_RESUME) {
 230                         rv = KAIF_CPU_CMD_RESUME;
 231                         break;
 232                 }
 233
 234                 kmdb_kdi_slave_wait();
 235         }
 236
 237
 238         return (rv);
 239 }
 240
 241 static void
 242 kaif_select_master(kaif_cpusave_t *cpusave)
 243 {
 244         kaif_lock_enter(&kaif_master_lock);
 245
 246         if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
 247                 /* This is the master. */
 248                 kaif_master_cpuid = cpusave->krs_cpu_id;
 249                 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
 250                 kaif_slave_cmd = KAIF_SLAVE_CMD_SPIN;
 251
 252                 membar_producer();
 253
 254                 kmdb_kdi_stop_slaves(cpusave->krs_cpu_id, 1);
 255         } else {
 256                 /* The master was already chosen - go be a slave */
 257                 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
 258                 membar_producer();
 259         }
 260
 261         kaif_lock_exit(&kaif_master_lock);
 262 }
 263
 264 int
 265 kaif_main_loop(kaif_cpusave_t *cpusave)
 266 {
 267         int cmd;
 268
 269         if (kaif_master_cpuid == KAIF_MASTER_CPUID_UNSET) {
 270                 if (!kmdb_dpi_resume_requested &&
 271                     kmdb_kdi_get_unload_request()) {
 272                         /*
 273                          * Special case: Unload requested before first debugger
 274                          * entry.  Don't stop the world, as there's nothing to
 275                          * clean up that can't be handled by the running kernel.
 276                          */
 277                         cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
 278                         return (KAIF_CPU_CMD_RESUME);
 279                 }
 280
 281                 kaif_select_master(cpusave);
 282
 283         } else if (kaif_master_cpuid == cpusave->krs_cpu_id) {
 284                 cpusave->krs_cpu_state = KAIF_CPU_STATE_MASTER;
 285         } else {
 286                 cpusave->krs_cpu_state = KAIF_CPU_STATE_SLAVE;
 287         }
 288
 289         cpusave->krs_cpu_flushed = 0;
 290
 291         kaif_lock_enter(&kaif_loop_lock);
 292         kaif_looping++;
 293         kaif_lock_exit(&kaif_loop_lock);
 294
 295         /*
 296          * We know who the master and slaves are, so now they can go off
 297          * to their respective loops.
 298          */
 299         do {
 300                 if (kaif_master_cpuid == cpusave->krs_cpu_id)
 301                         cmd = kaif_master_loop(cpusave);
 302                 else
 303                         cmd = kaif_slave_loop(cpusave);
 304         } while (cmd == KAIF_CPU_CMD_SWITCH);
 305
 306         kaif_lock_enter(&kaif_loop_lock);
 307         kaif_looping--;
 308         kaif_lock_exit(&kaif_loop_lock);
 309
 310         cpusave->krs_cpu_state = KAIF_CPU_STATE_NONE;
 311
 312         if (cmd == KAIF_CPU_CMD_RESUME) {
 313                 /*
 314                  * By this point, the master has directed the slaves to resume,
 315                  * and everyone is making their way to this point.  We're going
 316                  * to block here until all CPUs leave the master and slave
 317                  * loops.  When all have arrived, we'll turn them all loose.
 318                  * This barrier is required for two reasons:
 319                  *
 320                  * 1. There exists a race condition whereby a CPU could reenter
 321                  *    the debugger while another CPU is still in the slave loop
 322                  *    from this debugger entry.  This usually happens when the
 323                  *    current master releases the slaves, and makes it back to
 324                  *    the world before the slaves notice the release.  The
 325                  *    former master then triggers a debugger entry, and attempts
 326                  *    to stop the slaves for this entry before they've even
 327                  *    resumed from the last one.  When the slaves arrive here,
 328                  *    they'll have re-disabled interrupts, and will thus ignore
 329                  *    cross-calls until they finish resuming.
 330                  *
 331                  * 2. At the time of this writing, there exists a SPARC bug that
 332                  *    causes an apparently unsolicited interrupt vector trap
 333                  *    from OBP to one of the slaves.  This wouldn't normally be
 334                  *    a problem but for the fact that the cross-called CPU
 335                  *    encounters some sort of failure while in OBP.  OBP
 336                  *    recovers by executing the debugger-hook word, which sends
 337                  *    the slave back into the debugger, triggering a debugger
 338                  *    fault.  This problem seems to only happen during resume,
 339                  *    the result being that all CPUs save for the cross-called
 340                  *    one make it back into the world, while the cross-called
 341                  *    one is stuck at the debugger fault prompt.  Leave the
 342                  *    world in that state too long, and you'll get a mondo
 343                  *    timeout panic.  If we hold everyone here, we can give the
 344                  *    the user a chance to trigger a panic for further analysis.
 345                  *    To trigger the bug, "pool_unlock:b :c" and "while : ; do
 346                  *    psrset -p ; done".
 347                  *
 348                  * When the second item is fixed, the barrier can move into
 349                  * kaif_select_master(), immediately prior to the setting of
 350                  * kaif_master_cpuid.
 351                  */
 352                 while (kaif_looping != 0)
 353                         continue;
 354         }
 355
 356         return (cmd);
 357 }
 358
 359