src/cpu/x86/mp_init.c

   1 /* SPDX-License-Identifier: GPL-2.0-only */
   2
   3 #include <console/console.h>
   4 #include <string.h>
   5 #include <rmodule.h>
   6 #include <commonlib/helpers.h>
   7 #include <cpu/cpu.h>
   8 #include <cpu/intel/microcode.h>
   9 #include <cpu/x86/cache.h>
  10 #include <cpu/x86/gdt.h>
  11 #include <cpu/x86/lapic.h>
  12 #include <cpu/x86/name.h>
  13 #include <cpu/x86/msr.h>
  14 #include <cpu/x86/mtrr.h>
  15 #include <cpu/x86/smm.h>
  16 #include <cpu/x86/topology.h>
  17 #include <cpu/x86/mp.h>
  18 #include <delay.h>
  19 #include <device/device.h>
  20 #include <smp/atomic.h>
  21 #include <smp/spinlock.h>
  22 #include <symbols.h>
  23 #include <timer.h>
  24 #include <thread.h>
  25 #include <types.h>
  26
  27 /* Generated header */
  28 #include <ramstage/cpu/x86/smm_start32_offset.h>
  29
  30 #include <security/intel/stm/SmmStm.h>
  31
  32 struct mp_callback {
  33         void (*func)(void *);
  34         void *arg;
  35         int logical_cpu_number;
  36 };
  37
  38 static char processor_name[49];
  39
  40 /*
  41  * A mp_flight_record details a sequence of calls for the APs to perform
  42  * along with the BSP to coordinate sequencing. Each flight record either
  43  * provides a barrier for each AP before calling the callback or the APs
  44  * are allowed to perform the callback without waiting. Regardless, each
  45  * record has the cpus_entered field incremented for each record. When
  46  * the BSP observes that the cpus_entered matches the number of APs
  47  * the bsp_call is called with bsp_arg and upon returning releases the
  48  * barrier allowing the APs to make further progress.
  49  *
  50  * Note that ap_call() and bsp_call() can be NULL. In the NULL case the
  51  * callback will just not be called.
  52  */
  53 struct mp_flight_record {
  54         atomic_t barrier;
  55         atomic_t cpus_entered;
  56         void (*ap_call)(void);
  57         void (*bsp_call)(void);
  58 } __aligned(CACHELINE_SIZE);
  59
  60 #define _MP_FLIGHT_RECORD(barrier_, ap_func_, bsp_func_) \
  61         {                                                       \
  62                 .barrier = ATOMIC_INIT(barrier_),               \
  63                 .cpus_entered = ATOMIC_INIT(0),                 \
  64                 .ap_call = ap_func_,                            \
  65                 .bsp_call = bsp_func_,                          \
  66         }
  67
  68 #define MP_FR_BLOCK_APS(ap_func_, bsp_func_) \
  69         _MP_FLIGHT_RECORD(0, ap_func_, bsp_func_)
  70
  71 #define MP_FR_NOBLOCK_APS(ap_func_, bsp_func_) \
  72         _MP_FLIGHT_RECORD(1, ap_func_, bsp_func_)
  73
  74 /* The mp_params structure provides the arguments to the mp subsystem
  75  * for bringing up APs. */
  76 struct mp_params {
  77         int num_cpus; /* Total cpus include BSP */
  78         int parallel_microcode_load;
  79         const void *microcode_pointer;
  80         /* Flight plan  for APs and BSP. */
  81         struct mp_flight_record *flight_plan;
  82         int num_records;
  83 };
  84
  85 /* This needs to match the layout in the .module_parametrs section. */
  86 struct sipi_params {
  87         uint16_t gdtlimit;
  88         uint32_t gdt;
  89         uint16_t unused;
  90         uint32_t idt_ptr;
  91         uint32_t per_cpu_segment_descriptors;
  92         uint32_t per_cpu_segment_selector;
  93         uint32_t stack_top;
  94         uint32_t stack_size;
  95         uint32_t microcode_lock; /* 0xffffffff means parallel loading. */
  96         uint32_t microcode_ptr;
  97         uint32_t msr_table_ptr;
  98         uint32_t msr_count;
  99         uint32_t c_handler;
 100         uint32_t cr3;
 101         atomic_t ap_count;
 102 } __packed;
 103
 104 /* This also needs to match the assembly code for saved MSR encoding. */
 105 struct saved_msr {
 106         uint32_t index;
 107         uint32_t lo;
 108         uint32_t hi;
 109 } __packed;
 110
 111 /* The sipi vector rmodule is included in the ramstage using 'objdump -B'. */
 112 extern char _binary_sipi_vector_start[];
 113
 114 /* The SIPI vector is loaded at the SMM_DEFAULT_BASE. The reason is that the
 115  * memory range is already reserved so the OS cannot use it. That region is
 116  * free to use for AP bringup before SMM is initialized. */
 117 static const uintptr_t sipi_vector_location = SMM_DEFAULT_BASE;
 118 static const int sipi_vector_location_size = SMM_DEFAULT_SIZE;
 119
 120 struct mp_flight_plan {
 121         int num_records;
 122         struct mp_flight_record *records;
 123 };
 124
 125 static int global_num_aps;
 126 static struct mp_flight_plan mp_info;
 127
 128 static inline void barrier_wait(atomic_t *b)
 129 {
 130         while (atomic_read(b) == 0)
 131                 asm ("pause");
 132         mfence();
 133 }
 134
 135 static inline void release_barrier(atomic_t *b)
 136 {
 137         mfence();
 138         atomic_set(b, 1);
 139 }
 140
 141 static enum cb_err wait_for_aps(atomic_t *val, int target, int total_delay,
 142                         int delay_step)
 143 {
 144         int delayed = 0;
 145         while (atomic_read(val) != target) {
 146                 udelay(delay_step);
 147                 delayed += delay_step;
 148                 if (delayed >= total_delay) {
 149                         /* Not all APs ready before timeout */
 150                         return CB_ERR;
 151                 }
 152         }
 153
 154         /* APs ready before timeout */
 155         printk(BIOS_SPEW, "APs are ready after %dus\n", delayed);
 156         return CB_SUCCESS;
 157 }
 158
 159 static void ap_do_flight_plan(void)
 160 {
 161         int i;
 162
 163         for (i = 0; i < mp_info.num_records; i++) {
 164                 struct mp_flight_record *rec = &mp_info.records[i];
 165
 166                 atomic_inc(&rec->cpus_entered);
 167                 barrier_wait(&rec->barrier);
 168
 169                 if (rec->ap_call != NULL)
 170                         rec->ap_call();
 171         }
 172 }
 173
 174 static void park_this_cpu(void *unused)
 175 {
 176         stop_this_cpu();
 177 }
 178
 179 static struct bus *g_cpu_bus;
 180
 181 /* By the time APs call ap_init() caching has been setup, and microcode has
 182  * been loaded. */
 183 static asmlinkage void ap_init(unsigned int index)
 184 {
 185         /* Ensure the local APIC is enabled */
 186         enable_lapic();
 187         setup_lapic_interrupts();
 188
 189         struct device *dev;
 190         int i = 0;
 191         for (dev = g_cpu_bus->children; dev; dev = dev->sibling)
 192                 if (i++ == index)
 193                         break;
 194
 195         if (!dev) {
 196                 printk(BIOS_ERR, "Could not find allocated device for index %u\n", index);
 197                 return;
 198         }
 199
 200         set_cpu_info(index, dev);
 201
 202         /* Fix up APIC id with reality. */
 203         dev->path.apic.apic_id = lapicid();
 204         dev->path.apic.initial_lapicid = initial_lapicid();
 205         dev->enabled = 1;
 206
 207         set_cpu_topology_from_leaf_b(dev);
 208
 209         if (cpu_is_intel())
 210                 printk(BIOS_INFO, "AP: slot %u apic_id %x, MCU rev: 0x%08x\n", index,
 211                        dev->path.apic.apic_id, get_current_microcode_rev());
 212         else
 213                 printk(BIOS_INFO, "AP: slot %u apic_id %x\n", index,
 214                        dev->path.apic.apic_id);
 215
 216         /* Walk the flight plan */
 217         ap_do_flight_plan();
 218
 219         /* Park the AP. */
 220         park_this_cpu(NULL);
 221 }
 222
 223 static __aligned(16) uint8_t ap_stack[CONFIG_AP_STACK_SIZE * CONFIG_MAX_CPUS];
 224
 225 static void setup_default_sipi_vector_params(struct sipi_params *sp)
 226 {
 227         sp->gdt = (uintptr_t)&gdt;
 228         sp->gdtlimit = (uintptr_t)&gdt_end - (uintptr_t)&gdt - 1;
 229         sp->idt_ptr = (uintptr_t)&idtarg;
 230         sp->per_cpu_segment_descriptors = (uintptr_t)&per_cpu_segment_descriptors;
 231         sp->per_cpu_segment_selector = per_cpu_segment_selector;
 232         sp->stack_size = CONFIG_AP_STACK_SIZE;
 233         sp->stack_top = (uintptr_t)ap_stack + ARRAY_SIZE(ap_stack);
 234 }
 235
 236 static const unsigned int fixed_mtrrs[NUM_FIXED_MTRRS] = {
 237         MTRR_FIX_64K_00000, MTRR_FIX_16K_80000, MTRR_FIX_16K_A0000,
 238         MTRR_FIX_4K_C0000, MTRR_FIX_4K_C8000, MTRR_FIX_4K_D0000,
 239         MTRR_FIX_4K_D8000, MTRR_FIX_4K_E0000, MTRR_FIX_4K_E8000,
 240         MTRR_FIX_4K_F0000, MTRR_FIX_4K_F8000,
 241 };
 242
 243 static inline struct saved_msr *save_msr(int index, struct saved_msr *entry)
 244 {
 245         msr_t msr;
 246
 247         msr = rdmsr(index);
 248         entry->index = index;
 249         entry->lo = msr.lo;
 250         entry->hi = msr.hi;
 251
 252         /* Return the next entry. */
 253         entry++;
 254         return entry;
 255 }
 256
 257 static int save_bsp_msrs(char *start, int size)
 258 {
 259         int msr_count;
 260         int num_var_mtrrs;
 261         struct saved_msr *msr_entry;
 262         int i;
 263
 264         /* Determine number of MTRRs need to be saved. */
 265         num_var_mtrrs = get_var_mtrr_count();
 266
 267         /* 2 * num_var_mtrrs for base and mask. +1 for IA32_MTRR_DEF_TYPE. */
 268         msr_count = 2 * num_var_mtrrs + NUM_FIXED_MTRRS + 1;
 269
 270         if ((msr_count * sizeof(struct saved_msr)) > size) {
 271                 printk(BIOS_CRIT, "Cannot mirror all %d msrs.\n", msr_count);
 272                 return -1;
 273         }
 274
 275         fixed_mtrrs_expose_amd_rwdram();
 276
 277         msr_entry = (void *)start;
 278         for (i = 0; i < NUM_FIXED_MTRRS; i++)
 279                 msr_entry = save_msr(fixed_mtrrs[i], msr_entry);
 280
 281         for (i = 0; i < num_var_mtrrs; i++) {
 282                 msr_entry = save_msr(MTRR_PHYS_BASE(i), msr_entry);
 283                 msr_entry = save_msr(MTRR_PHYS_MASK(i), msr_entry);
 284         }
 285
 286         msr_entry = save_msr(MTRR_DEF_TYPE_MSR, msr_entry);
 287
 288         fixed_mtrrs_hide_amd_rwdram();
 289
 290         /* Tell static analysis we know value is left unused. */
 291         (void)msr_entry;
 292
 293         return msr_count;
 294 }
 295
 296 static atomic_t *load_sipi_vector(struct mp_params *mp_params)
 297 {
 298         struct rmodule sipi_mod;
 299         int module_size;
 300         int num_msrs;
 301         struct sipi_params *sp;
 302         char *mod_loc = (void *)sipi_vector_location;
 303         const int loc_size = sipi_vector_location_size;
 304         atomic_t *ap_count = NULL;
 305
 306         if (rmodule_parse(&_binary_sipi_vector_start, &sipi_mod)) {
 307                 printk(BIOS_CRIT, "Unable to parse sipi module.\n");
 308                 return ap_count;
 309         }
 310
 311         if (rmodule_entry_offset(&sipi_mod) != 0) {
 312                 printk(BIOS_CRIT, "SIPI module entry offset is not 0!\n");
 313                 return ap_count;
 314         }
 315
 316         if (rmodule_load_alignment(&sipi_mod) != 4096) {
 317                 printk(BIOS_CRIT, "SIPI module load alignment(%d) != 4096.\n",
 318                        rmodule_load_alignment(&sipi_mod));
 319                 return ap_count;
 320         }
 321
 322         module_size = rmodule_memory_size(&sipi_mod);
 323
 324         /* Align to 4 bytes. */
 325         module_size = ALIGN_UP(module_size, 4);
 326
 327         if (module_size > loc_size) {
 328                 printk(BIOS_CRIT, "SIPI module size (%d) > region size (%d).\n",
 329                        module_size, loc_size);
 330                 return ap_count;
 331         }
 332
 333         num_msrs = save_bsp_msrs(&mod_loc[module_size], loc_size - module_size);
 334
 335         if (num_msrs < 0) {
 336                 printk(BIOS_CRIT, "Error mirroring BSP's msrs.\n");
 337                 return ap_count;
 338         }
 339
 340         if (rmodule_load(mod_loc, &sipi_mod)) {
 341                 printk(BIOS_CRIT, "Unable to load SIPI module.\n");
 342                 return ap_count;
 343         }
 344
 345         sp = rmodule_parameters(&sipi_mod);
 346
 347         if (sp == NULL) {
 348                 printk(BIOS_CRIT, "SIPI module has no parameters.\n");
 349                 return ap_count;
 350         }
 351
 352         setup_default_sipi_vector_params(sp);
 353         /* Setup MSR table. */
 354         sp->msr_table_ptr = (uintptr_t)&mod_loc[module_size];
 355         sp->msr_count = num_msrs;
 356         /* Provide pointer to microcode patch. */
 357         sp->microcode_ptr = (uintptr_t)mp_params->microcode_pointer;
 358         /* Pass on ability to load microcode in parallel. */
 359         if (mp_params->parallel_microcode_load)
 360                 sp->microcode_lock = ~0;
 361         else
 362                 sp->microcode_lock = 0;
 363         sp->c_handler = (uintptr_t)&ap_init;
 364         sp->cr3 = read_cr3();
 365         ap_count = &sp->ap_count;
 366         atomic_set(ap_count, 0);
 367
 368         /* Make sure SIPI data hits RAM so the APs that come up will see the
 369            startup code even if the caches are disabled. */
 370         if (clflush_supported())
 371                 clflush_region((uintptr_t)mod_loc, module_size);
 372         else
 373                 wbinvd();
 374
 375         return ap_count;
 376 }
 377
 378 static int allocate_cpu_devices(struct bus *cpu_bus, struct mp_params *p)
 379 {
 380         int i;
 381         int max_cpus;
 382         struct cpu_info *info;
 383
 384         max_cpus = p->num_cpus;
 385         if (max_cpus > CONFIG_MAX_CPUS) {
 386                 printk(BIOS_CRIT, "CPU count(%d) exceeds CONFIG_MAX_CPUS(%d)\n",
 387                        max_cpus, CONFIG_MAX_CPUS);
 388                 max_cpus = CONFIG_MAX_CPUS;
 389         }
 390
 391         info = cpu_info();
 392         for (i = 1; i < max_cpus; i++) {
 393                 /* Assuming linear APIC space allocation. AP will set its own
 394                    APIC id in the ap_init() path above. */
 395                 struct device *new = add_cpu_device(cpu_bus, info->cpu->path.apic.apic_id + i, 1);
 396                 if (new == NULL) {
 397                         printk(BIOS_CRIT, "Could not allocate CPU device\n");
 398                         max_cpus--;
 399                         continue;
 400                 }
 401                 new->name = processor_name;
 402                 new->enabled = 0; /* Runtime will enable it */
 403         }
 404
 405         return max_cpus;
 406 }
 407
 408 static enum cb_err apic_wait_timeout(int total_delay, int delay_step)
 409 {
 410         int total = 0;
 411
 412         while (lapic_busy()) {
 413                 udelay(delay_step);
 414                 total += delay_step;
 415                 if (total >= total_delay) {
 416                         /* LAPIC not ready before the timeout */
 417                         return CB_ERR;
 418                 }
 419         }
 420
 421         /* LAPIC ready before the timeout */
 422         return CB_SUCCESS;
 423 }
 424
 425 /* Send Startup IPI to APs */
 426 static enum cb_err send_sipi_to_aps(int ap_count, atomic_t *num_aps, int sipi_vector)
 427 {
 428         if (lapic_busy()) {
 429                 printk(BIOS_DEBUG, "Waiting for ICR not to be busy...\n");
 430                 if (apic_wait_timeout(1000 /* 1 ms */, 50) != CB_SUCCESS) {
 431                         printk(BIOS_ERR, "timed out. Aborting.\n");
 432                         return CB_ERR;
 433                 }
 434                 printk(BIOS_DEBUG, "done.\n");
 435         }
 436
 437         lapic_send_ipi_others(LAPIC_INT_ASSERT | LAPIC_DM_STARTUP | sipi_vector);
 438         printk(BIOS_DEBUG, "Waiting for SIPI to complete...\n");
 439         if (apic_wait_timeout(10000 /* 10 ms */, 50 /* us */) != CB_SUCCESS) {
 440                 printk(BIOS_ERR, "timed out.\n");
 441                 return CB_ERR;
 442         }
 443         printk(BIOS_DEBUG, "done.\n");
 444         return CB_SUCCESS;
 445 }
 446
 447 static enum cb_err start_aps(struct bus *cpu_bus, int ap_count, atomic_t *num_aps)
 448 {
 449         int sipi_vector, total_delay;
 450         /* Max location is 4KiB below 1MiB */
 451         const int max_vector_loc = ((1 << 20) - (1 << 12)) >> 12;
 452
 453         if (ap_count == 0)
 454                 return CB_SUCCESS;
 455
 456         /* The vector is sent as a 4k aligned address in one byte. */
 457         sipi_vector = sipi_vector_location >> 12;
 458
 459         if (sipi_vector > max_vector_loc) {
 460                 printk(BIOS_CRIT, "SIPI vector too large! 0x%08x\n",
 461                        sipi_vector);
 462                 return CB_ERR;
 463         }
 464
 465         printk(BIOS_DEBUG, "Attempting to start %d APs\n", ap_count);
 466
 467         if (lapic_busy()) {
 468                 printk(BIOS_DEBUG, "Waiting for ICR not to be busy...\n");
 469                 if (apic_wait_timeout(1000 /* 1 ms */, 50) != CB_SUCCESS) {
 470                         printk(BIOS_ERR, "timed out. Aborting.\n");
 471                         return CB_ERR;
 472                 }
 473                 printk(BIOS_DEBUG, "done.\n");
 474         }
 475
 476         /* Send INIT IPI to all but self. */
 477         lapic_send_ipi_others(LAPIC_INT_ASSERT | LAPIC_DM_INIT);
 478
 479         if (!CONFIG(X86_INIT_NEED_1_SIPI)) {
 480                 printk(BIOS_DEBUG, "Waiting for 10ms after sending INIT.\n");
 481                 mdelay(10);
 482
 483                 /* Send 1st Startup IPI (SIPI) */
 484                 if (send_sipi_to_aps(ap_count, num_aps, sipi_vector) != CB_SUCCESS)
 485                         return CB_ERR;
 486
 487                 /* Wait for CPUs to check in. */
 488                 wait_for_aps(num_aps, ap_count, 200 /* us */, 15 /* us */);
 489         }
 490
 491         /* Send final SIPI */
 492         if (send_sipi_to_aps(ap_count, num_aps, sipi_vector) != CB_SUCCESS)
 493                 return CB_ERR;
 494
 495         /* Wait for CPUs to check in. */
 496         total_delay = 50000 * ap_count; /* 50 ms per AP */
 497         if (wait_for_aps(num_aps, ap_count, total_delay, 50 /* us */) != CB_SUCCESS) {
 498                 printk(BIOS_ERR, "Not all APs checked in: %d/%d.\n",
 499                        atomic_read(num_aps), ap_count);
 500                 return CB_ERR;
 501         }
 502
 503         return CB_SUCCESS;
 504 }
 505
 506 static enum cb_err bsp_do_flight_plan(struct mp_params *mp_params)
 507 {
 508         int i;
 509         enum cb_err ret = CB_SUCCESS;
 510         /*
 511          * Set time out for flight plan to a huge minimum value (>=1 second).
 512          * CPUs with many APs may take longer if there is contention for
 513          * resources such as UART, so scale the time out up by increments of
 514          * 100ms if needed.
 515          */
 516         const int timeout_us = MAX(1000000, 100000 * mp_params->num_cpus);
 517         const int step_us = 100;
 518         int num_aps = mp_params->num_cpus - 1;
 519         struct stopwatch sw;
 520
 521         stopwatch_init(&sw);
 522
 523         for (i = 0; i < mp_params->num_records; i++) {
 524                 struct mp_flight_record *rec = &mp_params->flight_plan[i];
 525
 526                 /* Wait for APs if the record is not released. */
 527                 if (atomic_read(&rec->barrier) == 0) {
 528                         /* Wait for the APs to check in. */
 529                         if (wait_for_aps(&rec->cpus_entered, num_aps,
 530                                          timeout_us, step_us) != CB_SUCCESS) {
 531                                 printk(BIOS_ERR, "MP record %d timeout.\n", i);
 532                                 ret = CB_ERR;
 533                         }
 534                 }
 535
 536                 if (rec->bsp_call != NULL)
 537                         rec->bsp_call();
 538
 539                 release_barrier(&rec->barrier);
 540         }
 541
 542         printk(BIOS_INFO, "%s done after %lld msecs.\n", __func__,
 543                stopwatch_duration_msecs(&sw));
 544         return ret;
 545 }
 546
 547 static enum cb_err init_bsp(struct bus *cpu_bus)
 548 {
 549         struct cpu_info *info;
 550
 551         /* Print processor name */
 552         fill_processor_name(processor_name);
 553         printk(BIOS_INFO, "CPU: %s.\n", processor_name);
 554
 555         /* Ensure the local APIC is enabled */
 556         enable_lapic();
 557         setup_lapic_interrupts();
 558
 559         struct device *bsp = add_cpu_device(cpu_bus, lapicid(), 1);
 560         if (bsp == NULL) {
 561                 printk(BIOS_CRIT, "Failed to find or allocate BSP struct device\n");
 562                 return CB_ERR;
 563         }
 564         bsp->path.apic.initial_lapicid = initial_lapicid();
 565         set_cpu_topology_from_leaf_b(bsp);
 566
 567         /* Find the device structure for the boot CPU. */
 568         set_cpu_info(0, bsp);
 569         info = cpu_info();
 570         info->cpu = bsp;
 571         info->cpu->name = processor_name;
 572
 573         if (info->index != 0) {
 574                 printk(BIOS_CRIT, "BSP index(%zd) != 0!\n", info->index);
 575                 return CB_ERR;
 576         }
 577         return CB_SUCCESS;
 578 }
 579
 580 /*
 581  * mp_init() will set up the SIPI vector and bring up the APs according to
 582  * mp_params. Each flight record will be executed according to the plan. Note
 583  * that the MP infrastructure uses SMM default area without saving it. It's
 584  * up to the chipset or mainboard to either e820 reserve this area or save this
 585  * region prior to calling mp_init() and restoring it after mp_init returns.
 586  *
 587  * At the time mp_init() is called the MTRR MSRs are mirrored into APs then
 588  * caching is enabled before running the flight plan.
 589  *
 590  * The MP initialization has the following properties:
 591  * 1. APs are brought up in parallel.
 592  * 2. The ordering of coreboot CPU number and APIC ids is not deterministic.
 593  *    Therefore, one cannot rely on this property or the order of devices in
 594  *    the device tree unless the chipset or mainboard know the APIC ids
 595  *    a priori.
 596  */
 597 static enum cb_err mp_init(struct bus *cpu_bus, struct mp_params *p)
 598 {
 599         int num_cpus;
 600         atomic_t *ap_count;
 601
 602         g_cpu_bus = cpu_bus;
 603
 604         if (init_bsp(cpu_bus) != CB_SUCCESS) {
 605                 printk(BIOS_CRIT, "Setting up BSP failed\n");
 606                 return CB_ERR;
 607         }
 608
 609         if (p == NULL || p->flight_plan == NULL || p->num_records < 1) {
 610                 printk(BIOS_CRIT, "Invalid MP parameters\n");
 611                 return CB_ERR;
 612         }
 613
 614         /* We just need to run things on the BSP */
 615         if (!CONFIG(SMP))
 616                 return bsp_do_flight_plan(p);
 617
 618         /* Default to currently running CPU. */
 619         num_cpus = allocate_cpu_devices(cpu_bus, p);
 620
 621         if (num_cpus < p->num_cpus) {
 622                 printk(BIOS_CRIT,
 623                        "ERROR: More cpus requested (%d) than supported (%d).\n",
 624                        p->num_cpus, num_cpus);
 625                 return CB_ERR;
 626         }
 627
 628         /* Copy needed parameters so that APs have a reference to the plan. */
 629         mp_info.num_records = p->num_records;
 630         mp_info.records = p->flight_plan;
 631
 632         /* Load the SIPI vector. */
 633         ap_count = load_sipi_vector(p);
 634         if (ap_count == NULL)
 635                 return CB_ERR;
 636
 637         /* Start the APs providing number of APs and the cpus_entered field. */
 638         global_num_aps = p->num_cpus - 1;
 639         if (start_aps(cpu_bus, global_num_aps, ap_count) != CB_SUCCESS) {
 640                 mdelay(1000);
 641                 printk(BIOS_DEBUG, "%d/%d eventually checked in?\n",
 642                        atomic_read(ap_count), global_num_aps);
 643                 return CB_ERR;
 644         }
 645
 646         /* Walk the flight plan for the BSP. */
 647         return bsp_do_flight_plan(p);
 648 }
 649
 650 void smm_initiate_relocation_parallel(void)
 651 {
 652         if (lapic_busy()) {
 653                 printk(BIOS_DEBUG, "Waiting for ICR not to be busy...");
 654                 if (apic_wait_timeout(1000 /* 1 ms */, 50) != CB_SUCCESS) {
 655                         printk(BIOS_DEBUG, "timed out. Aborting.\n");
 656                         return;
 657                 }
 658                 printk(BIOS_DEBUG, "done.\n");
 659         }
 660
 661         lapic_send_ipi_self(LAPIC_INT_ASSERT | LAPIC_DM_SMI);
 662
 663         if (lapic_busy()) {
 664                 if (apic_wait_timeout(1000 /* 1 ms */, 100 /* us */) != CB_SUCCESS) {
 665                         printk(BIOS_DEBUG, "SMI Relocation timed out.\n");
 666                         return;
 667                 }
 668         }
 669         printk(BIOS_DEBUG, "Relocation complete.\n");
 670 }
 671
 672 DECLARE_SPIN_LOCK(smm_relocation_lock);
 673
 674 /* Send SMI to self with single user serialization. */
 675 void smm_initiate_relocation(void)
 676 {
 677         spin_lock(&smm_relocation_lock);
 678         smm_initiate_relocation_parallel();
 679         spin_unlock(&smm_relocation_lock);
 680 }
 681
 682 struct mp_state {
 683         struct mp_ops ops;
 684         int cpu_count;
 685         uintptr_t perm_smbase;
 686         size_t perm_smsize;
 687         size_t smm_save_state_size;
 688         bool do_smm;
 689 } mp_state;
 690
 691 static bool is_smm_enabled(void)
 692 {
 693         return CONFIG(HAVE_SMI_HANDLER) && mp_state.do_smm;
 694 }
 695
 696 static void smm_disable(void)
 697 {
 698         mp_state.do_smm = false;
 699 }
 700
 701 static void smm_enable(void)
 702 {
 703         if (CONFIG(HAVE_SMI_HANDLER))
 704                 mp_state.do_smm = true;
 705 }
 706
 707 /*
 708  * This code is built as part of ramstage, but it actually runs in SMM. This
 709  * means that ENV_SMM is 0, but we are actually executing in the environment
 710  * setup by the smm_stub.
 711  */
 712 static asmlinkage void smm_do_relocation(void *arg)
 713 {
 714         const struct smm_module_params *p;
 715         int cpu;
 716         const uintptr_t curr_smbase = SMM_DEFAULT_BASE;
 717         uintptr_t perm_smbase;
 718
 719         p = arg;
 720         cpu = p->cpu;
 721
 722         if (cpu >= CONFIG_MAX_CPUS) {
 723                 printk(BIOS_CRIT,
 724                        "Invalid CPU number assigned in SMM stub: %d\n", cpu);
 725                 return;
 726         }
 727
 728         /*
 729          * The permanent handler runs with all cpus concurrently. Precalculate
 730          * the location of the new SMBASE. If using SMM modules then this
 731          * calculation needs to match that of the module loader.
 732          */
 733         perm_smbase = smm_get_cpu_smbase(cpu);
 734         if (!perm_smbase) {
 735                 printk(BIOS_ERR, "%s: bad SMBASE for CPU %d\n", __func__, cpu);
 736                 return;
 737         }
 738
 739         /* Setup code checks this callback for validity. */
 740         printk(BIOS_INFO, "%s : curr_smbase 0x%x perm_smbase 0x%x, cpu = %d\n",
 741                 __func__, (int)curr_smbase, (int)perm_smbase, cpu);
 742         mp_state.ops.relocation_handler(cpu, curr_smbase, perm_smbase);
 743
 744         if (CONFIG(STM)) {
 745                 uintptr_t mseg;
 746
 747                 mseg = mp_state.perm_smbase +
 748                         (mp_state.perm_smsize - CONFIG_MSEG_SIZE);
 749
 750                 stm_setup(mseg, p->cpu,
 751                                 perm_smbase,
 752                                 mp_state.perm_smbase,
 753                                 SMM_START32_OFFSET);
 754         }
 755 }
 756
 757 static enum cb_err install_relocation_handler(int num_cpus, size_t save_state_size)
 758 {
 759         if (CONFIG(X86_SMM_SKIP_RELOCATION_HANDLER))
 760                 return CB_SUCCESS;
 761
 762         struct smm_loader_params smm_params = {
 763                 .num_cpus = num_cpus,
 764                 .cpu_save_state_size = save_state_size,
 765                 .num_concurrent_save_states = 1,
 766                 .handler = smm_do_relocation,
 767                 .cr3 = read_cr3(),
 768         };
 769
 770         if (smm_setup_relocation_handler(&smm_params)) {
 771                 printk(BIOS_ERR, "%s: smm setup failed\n", __func__);
 772                 return CB_ERR;
 773         }
 774
 775         return CB_SUCCESS;
 776 }
 777
 778 static enum cb_err install_permanent_handler(int num_cpus, uintptr_t smbase,
 779                                      size_t smsize, size_t save_state_size)
 780 {
 781         /*
 782          * All the CPUs will relocate to permanent handler now. Set parameters
 783          * needed for all CPUs. The placement of each CPUs entry point is
 784          * determined by the loader. This code simply provides the beginning of
 785          * SMRAM region, the number of CPUs who will use the handler, the stack
 786          * size and save state size for each CPU.
 787          */
 788         struct smm_loader_params smm_params = {
 789                 .num_cpus = num_cpus,
 790                 .cpu_save_state_size = save_state_size,
 791                 .num_concurrent_save_states = num_cpus,
 792         };
 793
 794         printk(BIOS_DEBUG, "Installing permanent SMM handler to 0x%08lx\n", smbase);
 795
 796         if (smm_load_module(smbase, smsize, &smm_params))
 797                 return CB_ERR;
 798
 799         return CB_SUCCESS;
 800 }
 801
 802 /* Load SMM handlers as part of MP flight record. */
 803 static void load_smm_handlers(void)
 804 {
 805         const size_t save_state_size = mp_state.smm_save_state_size;
 806
 807         /* Do nothing if SMM is disabled.*/
 808         if (!is_smm_enabled())
 809                 return;
 810
 811         if (smm_setup_stack(mp_state.perm_smbase, mp_state.perm_smsize, mp_state.cpu_count,
 812                             CONFIG_SMM_MODULE_STACK_SIZE)) {
 813                 printk(BIOS_ERR, "Unable to install SMM relocation handler.\n");
 814                 smm_disable();
 815         }
 816
 817         /* Install handlers. */
 818         if (install_relocation_handler(mp_state.cpu_count, save_state_size) != CB_SUCCESS) {
 819                 printk(BIOS_ERR, "Unable to install SMM relocation handler.\n");
 820                 smm_disable();
 821         }
 822
 823         if (install_permanent_handler(mp_state.cpu_count, mp_state.perm_smbase,
 824                                       mp_state.perm_smsize, save_state_size) != CB_SUCCESS) {
 825                 printk(BIOS_ERR, "Unable to install SMM permanent handler.\n");
 826                 smm_disable();
 827         }
 828
 829         /* Ensure the SMM handlers hit DRAM before performing first SMI. */
 830         wbinvd();
 831
 832         /*
 833          * Indicate that the SMM handlers have been loaded and MP
 834          * initialization is about to start.
 835          */
 836         if (is_smm_enabled() && mp_state.ops.pre_mp_smm_init != NULL)
 837                 mp_state.ops.pre_mp_smm_init();
 838 }
 839
 840 /* Trigger SMM as part of MP flight record. */
 841 static void trigger_smm_relocation(void)
 842 {
 843         /* Do nothing if SMM is disabled.*/
 844         if (!is_smm_enabled() || mp_state.ops.per_cpu_smm_trigger == NULL)
 845                 return;
 846         /* Trigger SMM mode for the currently running processor. */
 847         mp_state.ops.per_cpu_smm_trigger();
 848 }
 849
 850 static struct mp_callback *ap_callbacks[CONFIG_MAX_CPUS];
 851
 852 enum AP_STATUS {
 853         /* AP takes the task but not yet finishes */
 854         AP_BUSY = 1,
 855         /* AP finishes the task or no task to run yet */
 856         AP_NOT_BUSY
 857 };
 858
 859 static atomic_t ap_status[CONFIG_MAX_CPUS];
 860
 861 static struct mp_callback *read_callback(struct mp_callback **slot)
 862 {
 863         struct mp_callback *ret;
 864
 865         asm volatile ("mov      %1, %0\n"
 866                 : "=r" (ret)
 867                 : "m" (*slot)
 868                 : "memory"
 869         );
 870         return ret;
 871 }
 872
 873 static void store_callback(struct mp_callback **slot, struct mp_callback *val)
 874 {
 875         asm volatile ("mov      %1, %0\n"
 876                 : "=m" (*slot)
 877                 : "r" (val)
 878                 : "memory"
 879         );
 880 }
 881
 882 static enum cb_err run_ap_work(struct mp_callback *val, long expire_us, bool wait_ap_finish)
 883 {
 884         int i;
 885         int cpus_accepted, cpus_finish;
 886         struct stopwatch sw;
 887         int cur_cpu;
 888
 889         if (!CONFIG(PARALLEL_MP_AP_WORK)) {
 890                 printk(BIOS_ERR, "APs already parked. PARALLEL_MP_AP_WORK not selected.\n");
 891                 return CB_ERR;
 892         }
 893
 894         cur_cpu = cpu_index();
 895
 896         if (cur_cpu < 0) {
 897                 printk(BIOS_ERR, "Invalid CPU index.\n");
 898                 return CB_ERR;
 899         }
 900
 901         /* Signal to all the APs to run the func. */
 902         for (i = 0; i < ARRAY_SIZE(ap_callbacks); i++) {
 903                 if (cur_cpu == i)
 904                         continue;
 905                 store_callback(&ap_callbacks[i], val);
 906         }
 907         mfence();
 908
 909         /* Wait for all the APs to signal back that call has been accepted. */
 910         if (expire_us > 0)
 911                 stopwatch_init_usecs_expire(&sw, expire_us);
 912
 913         do {
 914                 cpus_accepted = 0;
 915                 cpus_finish = 0;
 916
 917                 for (i = 0; i < ARRAY_SIZE(ap_callbacks); i++) {
 918                         if (cur_cpu == i)
 919                                 continue;
 920
 921                         if (read_callback(&ap_callbacks[i]) == NULL) {
 922                                 cpus_accepted++;
 923                                 /* Only increase cpus_finish if AP took the task and not busy */
 924                                 if (atomic_read(&ap_status[i]) == AP_NOT_BUSY)
 925                                         cpus_finish++;
 926                         }
 927                 }
 928
 929                 /*
 930                  * if wait_ap_finish is true, need to make sure all CPUs finish task and return
 931                  * else just need to make sure all CPUs take task
 932                  */
 933                 if (cpus_accepted == global_num_aps)
 934                         if (!wait_ap_finish || (cpus_finish == global_num_aps))
 935                                 return CB_SUCCESS;
 936
 937         } while (expire_us <= 0 || !stopwatch_expired(&sw));
 938
 939         printk(BIOS_CRIT, "CRITICAL ERROR: AP call expired. %d/%d CPUs accepted.\n",
 940                 cpus_accepted, global_num_aps);
 941         return CB_ERR;
 942 }
 943
 944 static void ap_wait_for_instruction(void)
 945 {
 946         struct mp_callback lcb;
 947         struct mp_callback **per_cpu_slot;
 948         int cur_cpu;
 949
 950         if (!CONFIG(PARALLEL_MP_AP_WORK))
 951                 return;
 952
 953         cur_cpu = cpu_index();
 954
 955         if (cur_cpu < 0) {
 956                 printk(BIOS_ERR, "Invalid CPU index.\n");
 957                 return;
 958         }
 959
 960         per_cpu_slot = &ap_callbacks[cur_cpu];
 961
 962         /* Init ap_status[cur_cpu] to AP_NOT_BUSY and ready to take job */
 963         atomic_set(&ap_status[cur_cpu], AP_NOT_BUSY);
 964
 965         while (1) {
 966                 struct mp_callback *cb = read_callback(per_cpu_slot);
 967
 968                 if (cb == NULL) {
 969                         asm ("pause");
 970                         continue;
 971                 }
 972                 /*
 973                  * Set ap_status to AP_BUSY before store_callback(per_cpu_slot, NULL).
 974                  * it's to let BSP know APs take tasks and busy to avoid race condition.
 975                  */
 976                 atomic_set(&ap_status[cur_cpu], AP_BUSY);
 977
 978                 /* Copy to local variable before signaling consumption. */
 979                 memcpy(&lcb, cb, sizeof(lcb));
 980                 mfence();
 981                 store_callback(per_cpu_slot, NULL);
 982
 983                 if (lcb.logical_cpu_number == MP_RUN_ON_ALL_CPUS ||
 984                                 (cur_cpu == lcb.logical_cpu_number))
 985                         lcb.func(lcb.arg);
 986
 987                 atomic_set(&ap_status[cur_cpu], AP_NOT_BUSY);
 988         }
 989 }
 990
 991 enum cb_err mp_run_on_aps(void (*func)(void *), void *arg, int logical_cpu_num,
 992                 long expire_us)
 993 {
 994         struct mp_callback lcb = { .func = func, .arg = arg,
 995                                 .logical_cpu_number = logical_cpu_num};
 996         return run_ap_work(&lcb, expire_us, false);
 997 }
 998
 999 static enum cb_err mp_run_on_aps_and_wait_for_complete(void (*func)(void *), void *arg,
1000                 int logical_cpu_num, long expire_us)
1001 {
1002         struct mp_callback lcb = { .func = func, .arg = arg,
1003                                 .logical_cpu_number = logical_cpu_num};
1004         return run_ap_work(&lcb, expire_us, true);
1005 }
1006
1007 enum cb_err mp_run_on_all_aps(void (*func)(void *), void *arg, long expire_us,
1008                               bool run_parallel)
1009 {
1010         int ap_index, bsp_index;
1011
1012         if (run_parallel)
1013                 return mp_run_on_aps(func, arg, MP_RUN_ON_ALL_CPUS, expire_us);
1014
1015         bsp_index = cpu_index();
1016
1017         const int total_threads = global_num_aps + 1; /* +1 for BSP */
1018
1019         for (ap_index = 0; ap_index < total_threads; ap_index++) {
1020                 /* skip if BSP */
1021                 if (ap_index == bsp_index)
1022                         continue;
1023                 if (mp_run_on_aps(func, arg, ap_index, expire_us) != CB_SUCCESS)
1024                         return CB_ERR;
1025         }
1026
1027         return CB_SUCCESS;
1028 }
1029
1030 enum cb_err mp_run_on_all_cpus(void (*func)(void *), void *arg)
1031 {
1032         /* Run on BSP first. */
1033         func(arg);
1034
1035         /* For up to 1 second for AP to finish previous work. */
1036         return mp_run_on_aps(func, arg, MP_RUN_ON_ALL_CPUS, 1000 * USECS_PER_MSEC);
1037 }
1038
1039 enum cb_err mp_run_on_all_cpus_synchronously(void (*func)(void *), void *arg)
1040 {
1041         /* Run on BSP first. */
1042         func(arg);
1043
1044         /* For up to 1 second per AP (console can be slow) to finish previous work. */
1045         return mp_run_on_aps_and_wait_for_complete(func, arg, MP_RUN_ON_ALL_CPUS,
1046                                                    1000 * USECS_PER_MSEC * global_num_aps);
1047 }
1048
1049 enum cb_err mp_park_aps(void)
1050 {
1051         struct stopwatch sw;
1052         enum cb_err ret;
1053         long duration_msecs;
1054
1055         stopwatch_init(&sw);
1056
1057         ret = mp_run_on_aps(park_this_cpu, NULL, MP_RUN_ON_ALL_CPUS,
1058                                 1000 * USECS_PER_MSEC);
1059
1060         duration_msecs = stopwatch_duration_msecs(&sw);
1061
1062         if (ret == CB_SUCCESS)
1063                 printk(BIOS_DEBUG, "%s done after %ld msecs.\n", __func__,
1064                        duration_msecs);
1065         else
1066                 printk(BIOS_ERR, "%s failed after %ld msecs.\n", __func__,
1067                        duration_msecs);
1068
1069         return ret;
1070 }
1071
1072 static struct mp_flight_record mp_steps[] = {
1073         /* Once the APs are up load the SMM handlers. */
1074         MP_FR_BLOCK_APS(NULL, load_smm_handlers),
1075         /* Perform SMM relocation. */
1076         MP_FR_NOBLOCK_APS(trigger_smm_relocation, trigger_smm_relocation),
1077         /* Initialize each CPU through the driver framework. */
1078         MP_FR_BLOCK_APS(cpu_initialize, cpu_initialize),
1079         /* Wait for APs to finish then optionally start looking for work. */
1080         MP_FR_BLOCK_APS(ap_wait_for_instruction, NULL),
1081 };
1082
1083 static void fill_mp_state_smm(struct mp_state *state, const struct mp_ops *ops)
1084 {
1085         if (ops->get_smm_info != NULL)
1086                 ops->get_smm_info(&state->perm_smbase, &state->perm_smsize,
1087                                   &state->smm_save_state_size);
1088
1089         /*
1090          * Make sure there is enough room for the SMM descriptor
1091          */
1092         state->smm_save_state_size += STM_PSD_SIZE;
1093
1094         /*
1095          * Default to smm_initiate_relocation() if trigger callback isn't
1096          * provided.
1097          */
1098         if (ops->per_cpu_smm_trigger == NULL)
1099                 mp_state.ops.per_cpu_smm_trigger = smm_initiate_relocation;
1100 }
1101
1102 static void fill_mp_state(struct mp_state *state, const struct mp_ops *ops)
1103 {
1104         /*
1105          * Make copy of the ops so that defaults can be set in the non-const
1106          * structure if needed.
1107          */
1108         memcpy(&state->ops, ops, sizeof(*ops));
1109
1110         if (ops->get_cpu_count != NULL)
1111                 state->cpu_count = ops->get_cpu_count();
1112
1113         if (CONFIG(HAVE_SMI_HANDLER))
1114                 fill_mp_state_smm(state, ops);
1115 }
1116
1117 static enum cb_err do_mp_init_with_smm(struct bus *cpu_bus, const struct mp_ops *mp_ops)
1118 {
1119         enum cb_err ret;
1120         void *default_smm_area;
1121         struct mp_params mp_params;
1122
1123         if (mp_ops->pre_mp_init != NULL)
1124                 mp_ops->pre_mp_init();
1125
1126         fill_mp_state(&mp_state, mp_ops);
1127
1128         memset(&mp_params, 0, sizeof(mp_params));
1129
1130         if (mp_state.cpu_count <= 0) {
1131                 printk(BIOS_ERR, "Invalid cpu_count: %d\n", mp_state.cpu_count);
1132                 return CB_ERR;
1133         }
1134
1135         /* Sanity check SMM state. */
1136         smm_enable();
1137         if (mp_state.perm_smsize == 0)
1138                 smm_disable();
1139         if (mp_state.smm_save_state_size == 0)
1140                 smm_disable();
1141         if (!CONFIG(X86_SMM_SKIP_RELOCATION_HANDLER) && mp_state.ops.relocation_handler == NULL)
1142                 smm_disable();
1143
1144         if (is_smm_enabled())
1145                 printk(BIOS_INFO, "Will perform SMM setup.\n");
1146
1147         mp_params.num_cpus = mp_state.cpu_count;
1148         /* Gather microcode information. */
1149         if (mp_state.ops.get_microcode_info != NULL)
1150                 mp_state.ops.get_microcode_info(&mp_params.microcode_pointer,
1151                         &mp_params.parallel_microcode_load);
1152         mp_params.flight_plan = &mp_steps[0];
1153         mp_params.num_records = ARRAY_SIZE(mp_steps);
1154
1155         /* Perform backup of default SMM area when using SMM relocation handler. */
1156         if (!CONFIG(X86_SMM_SKIP_RELOCATION_HANDLER))
1157                 default_smm_area = backup_default_smm_area();
1158
1159         ret = mp_init(cpu_bus, &mp_params);
1160
1161         if (!CONFIG(X86_SMM_SKIP_RELOCATION_HANDLER))
1162                 restore_default_smm_area(default_smm_area);
1163
1164         /* Signal callback on success if it's provided. */
1165         if (ret == CB_SUCCESS && mp_state.ops.post_mp_init != NULL)
1166                 mp_state.ops.post_mp_init();
1167
1168         return ret;
1169 }
1170
1171 enum cb_err mp_init_with_smm(struct bus *cpu_bus, const struct mp_ops *mp_ops)
1172 {
1173         enum cb_err ret = do_mp_init_with_smm(cpu_bus, mp_ops);
1174
1175         if (ret != CB_SUCCESS)
1176                 printk(BIOS_ERR, "MP initialization failure.\n");
1177
1178         return ret;
1179 }