usr/src/uts/sun4u/os/mach_startup.c

   1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 1993, 2010, Oracle and/or its affiliates. All rights reserved.
  23  */
  24
  25 #include <sys/machsystm.h>
  26 #include <sys/archsystm.h>
  27 #include <sys/vm.h>
  28 #include <sys/cpu.h>
  29 #include <sys/cpupart.h>
  30 #include <sys/cmt.h>
  31 #include <sys/bitset.h>
  32 #include <sys/reboot.h>
  33 #include <sys/kdi.h>
  34 #include <sys/bootconf.h>
  35 #include <sys/memlist_plat.h>
  36 #include <sys/memlist_impl.h>
  37 #include <sys/prom_plat.h>
  38 #include <sys/prom_isa.h>
  39 #include <sys/autoconf.h>
  40 #include <sys/intreg.h>
  41 #include <sys/ivintr.h>
  42 #include <sys/fpu/fpusystm.h>
  43 #include <sys/iommutsb.h>
  44 #include <vm/vm_dep.h>
  45 #include <vm/seg_kmem.h>
  46 #include <vm/seg_kpm.h>
  47 #include <vm/seg_map.h>
  48 #include <vm/seg_kp.h>
  49 #include <sys/sysconf.h>
  50 #include <vm/hat_sfmmu.h>
  51 #include <sys/kobj.h>
  52 #include <sys/sun4asi.h>
  53 #include <sys/clconf.h>
  54 #include <sys/platform_module.h>
  55 #include <sys/panic.h>
  56 #include <sys/cpu_sgnblk_defs.h>
  57 #include <sys/clock.h>
  58 #include <sys/fpras_impl.h>
  59 #include <sys/prom_debug.h>
  60 #include <sys/traptrace.h>
  61 #include <sys/memnode.h>
  62 #include <sys/mem_cage.h>
  63
  64 /*
  65  * fpRAS implementation structures.
  66  */
  67 struct fpras_chkfn *fpras_chkfnaddrs[FPRAS_NCOPYOPS];
  68 struct fpras_chkfngrp *fpras_chkfngrps;
  69 struct fpras_chkfngrp *fpras_chkfngrps_base;
  70 int fpras_frequency = -1;
  71 int64_t fpras_interval = -1;
  72
  73 /*
  74  * Increase unix symbol table size as a work around for 6828121
  75  */
  76 int alloc_mem_bermuda_triangle;
  77
  78 /*
  79  * Halt idling cpus optimization
  80  *
  81  * This optimation is only enabled in platforms that have
  82  * the CPU halt support. The cpu_halt_cpu() support is provided
  83  * in the cpu module and it is referenced here with a pragma weak.
  84  * The presence of this routine automatically enable the halt idling
  85  * cpus functionality if the global switch enable_halt_idle_cpus
  86  * is set (default is set).
  87  *
  88  */
  89 #pragma weak    cpu_halt_cpu
  90 extern void     cpu_halt_cpu();
  91
  92 /*
  93  * Defines for the idle_state_transition DTrace probe
  94  *
  95  * The probe fires when the CPU undergoes an idle state change (e.g. halting)
  96  * The agument passed is the state to which the CPU is transitioning.
  97  *
  98  * The states are defined here.
  99  */
 100 #define IDLE_STATE_NORMAL 0
 101 #define IDLE_STATE_HALTED 1
 102
 103 int             enable_halt_idle_cpus = 1; /* global switch */
 104
 105 uint_t cp_haltset_fanout = 3;
 106
 107 void
 108 setup_trap_table(void)
 109 {
 110         intr_init(CPU);                 /* init interrupt request free list */
 111         setwstate(WSTATE_KERN);
 112         prom_set_traptable(&trap_table);
 113 }
 114
 115 void
 116 mach_fpras()
 117 {
 118         if (fpras_implemented && !fpras_disable) {
 119                 int i;
 120                 struct fpras_chkfngrp *fcgp;
 121                 size_t chkfngrpsallocsz;
 122
 123                 /*
 124                  * Note that we size off of NCPU and setup for
 125                  * all those possibilities regardless of whether
 126                  * the cpu id is present or not.  We do this so that
 127                  * we don't have any construction or destruction
 128                  * activity to perform at DR time, and it's not
 129                  * costly in memory.  We require block alignment.
 130                  */
 131                 chkfngrpsallocsz = NCPU * sizeof (struct fpras_chkfngrp);
 132                 fpras_chkfngrps_base = kmem_alloc(chkfngrpsallocsz, KM_SLEEP);
 133                 if (IS_P2ALIGNED((uintptr_t)fpras_chkfngrps_base, 64)) {
 134                         fpras_chkfngrps = fpras_chkfngrps_base;
 135                 } else {
 136                         kmem_free(fpras_chkfngrps_base, chkfngrpsallocsz);
 137                         chkfngrpsallocsz += 64;
 138                         fpras_chkfngrps_base = kmem_alloc(chkfngrpsallocsz,
 139                             KM_SLEEP);
 140                         fpras_chkfngrps = (struct fpras_chkfngrp *)
 141                             P2ROUNDUP((uintptr_t)fpras_chkfngrps_base, 64);
 142                 }
 143
 144                 /*
 145                  * Copy our check function into place for each copy operation
 146                  * and each cpu id.
 147                  */
 148                 fcgp = &fpras_chkfngrps[0];
 149                 for (i = 0; i < FPRAS_NCOPYOPS; ++i)
 150                         bcopy((void *)fpras_chkfn_type1, &fcgp->fpras_fn[i],
 151                             sizeof (struct fpras_chkfn));
 152                 for (i = 1; i < NCPU; ++i)
 153                         *(&fpras_chkfngrps[i]) = *fcgp;
 154
 155                 /*
 156                  * At definition fpras_frequency is set to -1, and it will
 157                  * still have that value unless changed in /etc/system (not
 158                  * strictly supported, but not preventable).  The following
 159                  * both sets the default and sanity checks anything from
 160                  * /etc/system.
 161                  */
 162                 if (fpras_frequency < 0)
 163                         fpras_frequency = FPRAS_DEFAULT_FREQUENCY;
 164
 165                 /*
 166                  * Now calculate fpras_interval.  When fpras_interval
 167                  * becomes non-negative fpras checks will commence
 168                  * (copies before this point in boot will bypass fpras).
 169                  * Our stores of instructions must be visible; no need
 170                  * to flush as they're never been executed before.
 171                  */
 172                 membar_producer();
 173                 fpras_interval = (fpras_frequency == 0) ?
 174                     0 : sys_tick_freq / fpras_frequency;
 175         }
 176 }
 177
 178 void
 179 mach_hw_copy_limit(void)
 180 {
 181         if (!fpu_exists) {
 182                 use_hw_bcopy = 0;
 183                 hw_copy_limit_1 = 0;
 184                 hw_copy_limit_2 = 0;
 185                 hw_copy_limit_4 = 0;
 186                 hw_copy_limit_8 = 0;
 187                 use_hw_bzero = 0;
 188         }
 189 }
 190
 191 void
 192 load_tod_module()
 193 {
 194         /*
 195          * Load tod driver module for the tod part found on this system.
 196          * Recompute the cpu frequency/delays based on tod as tod part
 197          * tends to keep time more accurately.
 198          */
 199         if (tod_module_name == NULL || modload("tod", tod_module_name) == -1)
 200                 halt("Can't load tod module");
 201 }
 202
 203 void
 204 mach_memscrub(void)
 205 {
 206         /*
 207          * Startup memory scrubber, if not running fpu emulation code.
 208          */
 209
 210 #ifndef _HW_MEMSCRUB_SUPPORT
 211         if (fpu_exists) {
 212                 if (memscrub_init()) {
 213                         cmn_err(CE_WARN,
 214                             "Memory scrubber failed to initialize");
 215                 }
 216         }
 217 #endif /* _HW_MEMSCRUB_SUPPORT */
 218 }
 219
 220 /*
 221  * Halt the present CPU until awoken via an interrupt.
 222  * This routine should only be invoked if cpu_halt_cpu()
 223  * exists and is supported, see mach_cpu_halt_idle()
 224  */
 225 void
 226 cpu_halt(void)
 227 {
 228         cpu_t *cpup = CPU;
 229         processorid_t cpu_sid = cpup->cpu_seqid;
 230         cpupart_t *cp = cpup->cpu_part;
 231         int hset_update = 1;
 232         volatile int *p = &cpup->cpu_disp->disp_nrunnable;
 233         uint_t s;
 234
 235         /*
 236          * If this CPU is online then we should notate our halting
 237          * by adding ourselves to the partition's halted CPU
 238          * bitset. This allows other CPUs to find/awaken us when
 239          * work becomes available.
 240          */
 241         if (CPU->cpu_flags & CPU_OFFLINE)
 242                 hset_update = 0;
 243
 244         /*
 245          * Add ourselves to the partition's halted CPUs bitset
 246          * and set our HALTED flag, if necessary.
 247          *
 248          * When a thread becomes runnable, it is placed on the queue
 249          * and then the halted cpu bitset is checked to determine who
 250          * (if anyone) should be awoken. We therefore need to first
 251          * add ourselves to the halted bitset, and then check if there
 252          * is any work available.  The order is important to prevent a race
 253          * that can lead to work languishing on a run queue somewhere while
 254          * this CPU remains halted.
 255          *
 256          * Either the producing CPU will see we're halted and will awaken us,
 257          * or this CPU will see the work available in disp_anywork()
 258          */
 259         if (hset_update) {
 260                 cpup->cpu_disp_flags |= CPU_DISP_HALTED;
 261                 membar_producer();
 262                 bitset_atomic_add(&cp->cp_haltset, cpu_sid);
 263         }
 264
 265         /*
 266          * Check to make sure there's really nothing to do.
 267          * Work destined for this CPU may become available after
 268          * this check. We'll be notified through the clearing of our
 269          * bit in the halted CPU bitset, and a poke.
 270          */
 271         if (disp_anywork()) {
 272                 if (hset_update) {
 273                         cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
 274                         bitset_atomic_del(&cp->cp_haltset, cpu_sid);
 275                 }
 276                 return;
 277         }
 278
 279         /*
 280          * We're on our way to being halted.  Wait until something becomes
 281          * runnable locally or we are awaken (i.e. removed from the halt set).
 282          * Note that the call to hv_cpu_yield() can return even if we have
 283          * nothing to do.
 284          *
 285          * Disable interrupts now, so that we'll awaken immediately
 286          * after halting if someone tries to poke us between now and
 287          * the time we actually halt.
 288          *
 289          * We check for the presence of our bit after disabling interrupts.
 290          * If it's cleared, we'll return. If the bit is cleared after
 291          * we check then the poke will pop us out of the halted state.
 292          * Also, if the offlined CPU has been brought back on-line, then
 293          * we return as well.
 294          *
 295          * The ordering of the poke and the clearing of the bit by cpu_wakeup
 296          * is important.
 297          * cpu_wakeup() must clear, then poke.
 298          * cpu_halt() must disable interrupts, then check for the bit.
 299          *
 300          * The check for anything locally runnable is here for performance
 301          * and isn't needed for correctness. disp_nrunnable ought to be
 302          * in our cache still, so it's inexpensive to check, and if there
 303          * is anything runnable we won't have to wait for the poke.
 304          *
 305          * Any interrupt will awaken the cpu from halt. Looping here
 306          * will filter spurious interrupts that wake us up, but don't
 307          * represent a need for us to head back out to idle().  This
 308          * will enable the idle loop to be more efficient and sleep in
 309          * the processor pipeline for a larger percent of the time,
 310          * which returns useful cycles to the peer hardware strand
 311          * that shares the pipeline.
 312          */
 313         s = disable_vec_intr();
 314         while (*p == 0 &&
 315             ((hset_update && bitset_in_set(&cp->cp_haltset, cpu_sid)) ||
 316             (!hset_update && (CPU->cpu_flags & CPU_OFFLINE)))) {
 317
 318                 DTRACE_PROBE1(idle__state__transition,
 319                     uint_t, IDLE_STATE_HALTED);
 320                 (void) cpu_halt_cpu();
 321                 DTRACE_PROBE1(idle__state__transition,
 322                     uint_t, IDLE_STATE_NORMAL);
 323
 324                 enable_vec_intr(s);
 325                 s = disable_vec_intr();
 326         }
 327
 328         /*
 329          * We're no longer halted
 330          */
 331         enable_vec_intr(s);
 332         if (hset_update) {
 333                 cpup->cpu_disp_flags &= ~CPU_DISP_HALTED;
 334                 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
 335         }
 336 }
 337
 338 /*
 339  * If "cpu" is halted, then wake it up clearing its halted bit in advance.
 340  * Otherwise, see if other CPUs in the cpu partition are halted and need to
 341  * be woken up so that they can steal the thread we placed on this CPU.
 342  * This function is only used on MP systems.
 343  * This function should only be invoked if cpu_halt_cpu()
 344  * exists and is supported, see mach_cpu_halt_idle()
 345  */
 346 static void
 347 cpu_wakeup(cpu_t *cpu, int bound)
 348 {
 349         uint_t          cpu_found;
 350         processorid_t   cpu_sid;
 351         cpupart_t       *cp;
 352
 353         cp = cpu->cpu_part;
 354         cpu_sid = cpu->cpu_seqid;
 355         if (bitset_in_set(&cp->cp_haltset, cpu_sid)) {
 356                 /*
 357                  * Clear the halted bit for that CPU since it will be
 358                  * poked in a moment.
 359                  */
 360                 bitset_atomic_del(&cp->cp_haltset, cpu_sid);
 361                 /*
 362                  * We may find the current CPU present in the halted cpu bitset
 363                  * if we're in the context of an interrupt that occurred
 364                  * before we had a chance to clear our bit in cpu_halt().
 365                  * Poking ourself is obviously unnecessary, since if
 366                  * we're here, we're not halted.
 367                  */
 368                 if (cpu != CPU)
 369                         poke_cpu(cpu->cpu_id);
 370                 return;
 371         } else {
 372                 /*
 373                  * This cpu isn't halted, but it's idle or undergoing a
 374                  * context switch. No need to awaken anyone else.
 375                  */
 376                 if (cpu->cpu_thread == cpu->cpu_idle_thread ||
 377                     cpu->cpu_disp_flags & CPU_DISP_DONTSTEAL)
 378                         return;
 379         }
 380
 381         /*
 382          * No need to wake up other CPUs if this is for a bound thread.
 383          */
 384         if (bound)
 385                 return;
 386
 387         /*
 388          * The CPU specified for wakeup isn't currently halted, so check
 389          * to see if there are any other halted CPUs in the partition,
 390          * and if there are then awaken one.
 391          *
 392          * If possible, try to select a CPU close to the target, since this
 393          * will likely trigger a migration.
 394          */
 395         do {
 396                 cpu_found = bitset_find(&cp->cp_haltset);
 397                 if (cpu_found == (uint_t)-1)
 398                         return;
 399         } while (bitset_atomic_test_and_del(&cp->cp_haltset, cpu_found) < 0);
 400
 401         if (cpu_found != CPU->cpu_seqid)
 402                 poke_cpu(cpu_seq[cpu_found]->cpu_id);
 403 }
 404
 405 void
 406 mach_cpu_halt_idle(void)
 407 {
 408         if (enable_halt_idle_cpus) {
 409                 if (&cpu_halt_cpu) {
 410                         idle_cpu = cpu_halt;
 411                         disp_enq_thread = cpu_wakeup;
 412                 }
 413         }
 414 }
 415
 416 /*ARGSUSED*/
 417 int
 418 cpu_intrq_setup(struct cpu *cp)
 419 {
 420         /* Interrupt mondo queues not applicable to sun4u */
 421         return (0);
 422 }
 423
 424 /*ARGSUSED*/
 425 void
 426 cpu_intrq_cleanup(struct cpu *cp)
 427 {
 428         /* Interrupt mondo queues not applicable to sun4u */
 429 }
 430
 431 /*ARGSUSED*/
 432 void
 433 cpu_intrq_register(struct cpu *cp)
 434 {
 435         /* Interrupt/error queues not applicable to sun4u */
 436 }
 437
 438 /*ARGSUSED*/
 439 void
 440 mach_htraptrace_setup(int cpuid)
 441 {
 442         /* Setup hypervisor traptrace buffer, not applicable to sun4u */
 443 }
 444
 445 /*ARGSUSED*/
 446 void
 447 mach_htraptrace_configure(int cpuid)
 448 {
 449         /* enable/ disable hypervisor traptracing, not applicable to sun4u */
 450 }
 451
 452 /*ARGSUSED*/
 453 void
 454 mach_htraptrace_cleanup(int cpuid)
 455 {
 456         /* cleanup hypervisor traptrace buffer, not applicable to sun4u */
 457 }
 458
 459 void
 460 mach_descrip_startup_init(void)
 461 {
 462         /*
 463          * Only for sun4v.
 464          * Initialize Machine description framework during startup.
 465          */
 466 }
 467 void
 468 mach_descrip_startup_fini(void)
 469 {
 470         /*
 471          * Only for sun4v.
 472          * Clean up Machine Description framework during startup.
 473          */
 474 }
 475
 476 void
 477 mach_descrip_init(void)
 478 {
 479         /*
 480          * Only for sun4v.
 481          * Initialize Machine description framework.
 482          */
 483 }
 484
 485 void
 486 hsvc_setup(void)
 487 {
 488         /* Setup hypervisor services, not applicable to sun4u */
 489 }
 490
 491 void
 492 load_mach_drivers(void)
 493 {
 494         /* Currently no machine class (sun4u) specific drivers to load */
 495 }
 496
 497 /*
 498  * Return true if the machine we're running on is a Positron.
 499  * (Positron is an unsupported developers platform.)
 500  */
 501 int
 502 iam_positron(void)
 503 {
 504         char model[32];
 505         const char proto_model[] = "SUNW,501-2732";
 506         pnode_t root = prom_rootnode();
 507
 508         if (prom_getproplen(root, "model") != sizeof (proto_model))
 509                 return (0);
 510
 511         (void) prom_getprop(root, "model", model);
 512         if (strcmp(model, proto_model) == 0)
 513                 return (1);
 514         return (0);
 515 }
 516
 517 /*
 518  * Find a physically contiguous area of twice the largest ecache size
 519  * to be used while doing displacement flush of ecaches.
 520  */
 521 uint64_t
 522 ecache_flush_address(void)
 523 {
 524         struct memlist *pmem;
 525         uint64_t flush_size;
 526         uint64_t ret_val;
 527
 528         flush_size = ecache_size * 2;
 529         for (pmem = phys_install; pmem; pmem = pmem->ml_next) {
 530                 ret_val = P2ROUNDUP(pmem->ml_address, ecache_size);
 531                 if (ret_val + flush_size <= pmem->ml_address + pmem->ml_size)
 532                         return (ret_val);
 533         }
 534         return ((uint64_t)-1);
 535 }
 536
 537 /*
 538  * Called with the memlist lock held to say that phys_install has
 539  * changed.
 540  */
 541 void
 542 phys_install_has_changed(void)
 543 {
 544         /*
 545          * Get the new address into a temporary just in case panicking
 546          * involves use of ecache_flushaddr.
 547          */
 548         uint64_t new_addr;
 549
 550         new_addr = ecache_flush_address();
 551         if (new_addr == (uint64_t)-1) {
 552                 cmn_err(CE_PANIC,
 553                     "ecache_flush_address(): failed, ecache_size=%x",
 554                     ecache_size);
 555                 /*NOTREACHED*/
 556         }
 557         ecache_flushaddr = new_addr;
 558         membar_producer();
 559 }