Merge tag 'for_linus' of git://git.kernel.org/pub/scm/linux/kernel/git/mst/vhost
[cris-mirror.git] / arch / x86 / kernel / cpu / mcheck / mce-inject.c
blob231ad23b24a98ee59b0b232f038592b405c9e66b
1 /*
2 * Machine check injection support.
3 * Copyright 2008 Intel Corporation.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; version 2
8 * of the License.
10 * Authors:
11 * Andi Kleen
12 * Ying Huang
14 * The AMD part (from mce_amd_inj.c): a simple MCE injection facility
15 * for testing different aspects of the RAS code. This driver should be
16 * built as module so that it can be loaded on production kernels for
17 * testing purposes.
19 * This file may be distributed under the terms of the GNU General Public
20 * License version 2.
22 * Copyright (c) 2010-17: Borislav Petkov <bp@alien8.de>
23 * Advanced Micro Devices Inc.
26 #include <linux/cpu.h>
27 #include <linux/debugfs.h>
28 #include <linux/kernel.h>
29 #include <linux/module.h>
30 #include <linux/notifier.h>
31 #include <linux/pci.h>
32 #include <linux/uaccess.h>
34 #include <asm/amd_nb.h>
35 #include <asm/apic.h>
36 #include <asm/irq_vectors.h>
37 #include <asm/mce.h>
38 #include <asm/nmi.h>
39 #include <asm/smp.h>
41 #include "mce-internal.h"
44 * Collect all the MCi_XXX settings
46 static struct mce i_mce;
47 static struct dentry *dfs_inj;
49 static u8 n_banks;
51 #define MAX_FLAG_OPT_SIZE 3
52 #define NBCFG 0x44
54 enum injection_type {
55 SW_INJ = 0, /* SW injection, simply decode the error */
56 HW_INJ, /* Trigger a #MC */
57 DFR_INT_INJ, /* Trigger Deferred error interrupt */
58 THR_INT_INJ, /* Trigger threshold interrupt */
59 N_INJ_TYPES,
62 static const char * const flags_options[] = {
63 [SW_INJ] = "sw",
64 [HW_INJ] = "hw",
65 [DFR_INT_INJ] = "df",
66 [THR_INT_INJ] = "th",
67 NULL
70 /* Set default injection to SW_INJ */
71 static enum injection_type inj_type = SW_INJ;
73 #define MCE_INJECT_SET(reg) \
74 static int inj_##reg##_set(void *data, u64 val) \
75 { \
76 struct mce *m = (struct mce *)data; \
78 m->reg = val; \
79 return 0; \
82 MCE_INJECT_SET(status);
83 MCE_INJECT_SET(misc);
84 MCE_INJECT_SET(addr);
85 MCE_INJECT_SET(synd);
87 #define MCE_INJECT_GET(reg) \
88 static int inj_##reg##_get(void *data, u64 *val) \
89 { \
90 struct mce *m = (struct mce *)data; \
92 *val = m->reg; \
93 return 0; \
96 MCE_INJECT_GET(status);
97 MCE_INJECT_GET(misc);
98 MCE_INJECT_GET(addr);
99 MCE_INJECT_GET(synd);
101 DEFINE_SIMPLE_ATTRIBUTE(status_fops, inj_status_get, inj_status_set, "%llx\n");
102 DEFINE_SIMPLE_ATTRIBUTE(misc_fops, inj_misc_get, inj_misc_set, "%llx\n");
103 DEFINE_SIMPLE_ATTRIBUTE(addr_fops, inj_addr_get, inj_addr_set, "%llx\n");
104 DEFINE_SIMPLE_ATTRIBUTE(synd_fops, inj_synd_get, inj_synd_set, "%llx\n");
106 static void setup_inj_struct(struct mce *m)
108 memset(m, 0, sizeof(struct mce));
110 m->cpuvendor = boot_cpu_data.x86_vendor;
113 /* Update fake mce registers on current CPU. */
114 static void inject_mce(struct mce *m)
116 struct mce *i = &per_cpu(injectm, m->extcpu);
118 /* Make sure no one reads partially written injectm */
119 i->finished = 0;
120 mb();
121 m->finished = 0;
122 /* First set the fields after finished */
123 i->extcpu = m->extcpu;
124 mb();
125 /* Now write record in order, finished last (except above) */
126 memcpy(i, m, sizeof(struct mce));
127 /* Finally activate it */
128 mb();
129 i->finished = 1;
132 static void raise_poll(struct mce *m)
134 unsigned long flags;
135 mce_banks_t b;
137 memset(&b, 0xff, sizeof(mce_banks_t));
138 local_irq_save(flags);
139 machine_check_poll(0, &b);
140 local_irq_restore(flags);
141 m->finished = 0;
144 static void raise_exception(struct mce *m, struct pt_regs *pregs)
146 struct pt_regs regs;
147 unsigned long flags;
149 if (!pregs) {
150 memset(&regs, 0, sizeof(struct pt_regs));
151 regs.ip = m->ip;
152 regs.cs = m->cs;
153 pregs = &regs;
155 /* in mcheck exeception handler, irq will be disabled */
156 local_irq_save(flags);
157 do_machine_check(pregs, 0);
158 local_irq_restore(flags);
159 m->finished = 0;
162 static cpumask_var_t mce_inject_cpumask;
163 static DEFINE_MUTEX(mce_inject_mutex);
165 static int mce_raise_notify(unsigned int cmd, struct pt_regs *regs)
167 int cpu = smp_processor_id();
168 struct mce *m = this_cpu_ptr(&injectm);
169 if (!cpumask_test_cpu(cpu, mce_inject_cpumask))
170 return NMI_DONE;
171 cpumask_clear_cpu(cpu, mce_inject_cpumask);
172 if (m->inject_flags & MCJ_EXCEPTION)
173 raise_exception(m, regs);
174 else if (m->status)
175 raise_poll(m);
176 return NMI_HANDLED;
179 static void mce_irq_ipi(void *info)
181 int cpu = smp_processor_id();
182 struct mce *m = this_cpu_ptr(&injectm);
184 if (cpumask_test_cpu(cpu, mce_inject_cpumask) &&
185 m->inject_flags & MCJ_EXCEPTION) {
186 cpumask_clear_cpu(cpu, mce_inject_cpumask);
187 raise_exception(m, NULL);
191 /* Inject mce on current CPU */
192 static int raise_local(void)
194 struct mce *m = this_cpu_ptr(&injectm);
195 int context = MCJ_CTX(m->inject_flags);
196 int ret = 0;
197 int cpu = m->extcpu;
199 if (m->inject_flags & MCJ_EXCEPTION) {
200 pr_info("Triggering MCE exception on CPU %d\n", cpu);
201 switch (context) {
202 case MCJ_CTX_IRQ:
204 * Could do more to fake interrupts like
205 * calling irq_enter, but the necessary
206 * machinery isn't exported currently.
208 /*FALL THROUGH*/
209 case MCJ_CTX_PROCESS:
210 raise_exception(m, NULL);
211 break;
212 default:
213 pr_info("Invalid MCE context\n");
214 ret = -EINVAL;
216 pr_info("MCE exception done on CPU %d\n", cpu);
217 } else if (m->status) {
218 pr_info("Starting machine check poll CPU %d\n", cpu);
219 raise_poll(m);
220 mce_notify_irq();
221 pr_info("Machine check poll done on CPU %d\n", cpu);
222 } else
223 m->finished = 0;
225 return ret;
228 static void __maybe_unused raise_mce(struct mce *m)
230 int context = MCJ_CTX(m->inject_flags);
232 inject_mce(m);
234 if (context == MCJ_CTX_RANDOM)
235 return;
237 if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) {
238 unsigned long start;
239 int cpu;
241 get_online_cpus();
242 cpumask_copy(mce_inject_cpumask, cpu_online_mask);
243 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask);
244 for_each_online_cpu(cpu) {
245 struct mce *mcpu = &per_cpu(injectm, cpu);
246 if (!mcpu->finished ||
247 MCJ_CTX(mcpu->inject_flags) != MCJ_CTX_RANDOM)
248 cpumask_clear_cpu(cpu, mce_inject_cpumask);
250 if (!cpumask_empty(mce_inject_cpumask)) {
251 if (m->inject_flags & MCJ_IRQ_BROADCAST) {
253 * don't wait because mce_irq_ipi is necessary
254 * to be sync with following raise_local
256 preempt_disable();
257 smp_call_function_many(mce_inject_cpumask,
258 mce_irq_ipi, NULL, 0);
259 preempt_enable();
260 } else if (m->inject_flags & MCJ_NMI_BROADCAST)
261 apic->send_IPI_mask(mce_inject_cpumask,
262 NMI_VECTOR);
264 start = jiffies;
265 while (!cpumask_empty(mce_inject_cpumask)) {
266 if (!time_before(jiffies, start + 2*HZ)) {
267 pr_err("Timeout waiting for mce inject %lx\n",
268 *cpumask_bits(mce_inject_cpumask));
269 break;
271 cpu_relax();
273 raise_local();
274 put_cpu();
275 put_online_cpus();
276 } else {
277 preempt_disable();
278 raise_local();
279 preempt_enable();
283 static int mce_inject_raise(struct notifier_block *nb, unsigned long val,
284 void *data)
286 struct mce *m = (struct mce *)data;
288 if (!m)
289 return NOTIFY_DONE;
291 mutex_lock(&mce_inject_mutex);
292 raise_mce(m);
293 mutex_unlock(&mce_inject_mutex);
295 return NOTIFY_DONE;
298 static struct notifier_block inject_nb = {
299 .notifier_call = mce_inject_raise,
303 * Caller needs to be make sure this cpu doesn't disappear
304 * from under us, i.e.: get_cpu/put_cpu.
306 static int toggle_hw_mce_inject(unsigned int cpu, bool enable)
308 u32 l, h;
309 int err;
311 err = rdmsr_on_cpu(cpu, MSR_K7_HWCR, &l, &h);
312 if (err) {
313 pr_err("%s: error reading HWCR\n", __func__);
314 return err;
317 enable ? (l |= BIT(18)) : (l &= ~BIT(18));
319 err = wrmsr_on_cpu(cpu, MSR_K7_HWCR, l, h);
320 if (err)
321 pr_err("%s: error writing HWCR\n", __func__);
323 return err;
326 static int __set_inj(const char *buf)
328 int i;
330 for (i = 0; i < N_INJ_TYPES; i++) {
331 if (!strncmp(flags_options[i], buf, strlen(flags_options[i]))) {
332 inj_type = i;
333 return 0;
336 return -EINVAL;
339 static ssize_t flags_read(struct file *filp, char __user *ubuf,
340 size_t cnt, loff_t *ppos)
342 char buf[MAX_FLAG_OPT_SIZE];
343 int n;
345 n = sprintf(buf, "%s\n", flags_options[inj_type]);
347 return simple_read_from_buffer(ubuf, cnt, ppos, buf, n);
350 static ssize_t flags_write(struct file *filp, const char __user *ubuf,
351 size_t cnt, loff_t *ppos)
353 char buf[MAX_FLAG_OPT_SIZE], *__buf;
354 int err;
356 if (cnt > MAX_FLAG_OPT_SIZE)
357 return -EINVAL;
359 if (copy_from_user(&buf, ubuf, cnt))
360 return -EFAULT;
362 buf[cnt - 1] = 0;
364 /* strip whitespace */
365 __buf = strstrip(buf);
367 err = __set_inj(__buf);
368 if (err) {
369 pr_err("%s: Invalid flags value: %s\n", __func__, __buf);
370 return err;
373 *ppos += cnt;
375 return cnt;
378 static const struct file_operations flags_fops = {
379 .read = flags_read,
380 .write = flags_write,
381 .llseek = generic_file_llseek,
385 * On which CPU to inject?
387 MCE_INJECT_GET(extcpu);
389 static int inj_extcpu_set(void *data, u64 val)
391 struct mce *m = (struct mce *)data;
393 if (val >= nr_cpu_ids || !cpu_online(val)) {
394 pr_err("%s: Invalid CPU: %llu\n", __func__, val);
395 return -EINVAL;
397 m->extcpu = val;
398 return 0;
401 DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops, inj_extcpu_get, inj_extcpu_set, "%llu\n");
403 static void trigger_mce(void *info)
405 asm volatile("int $18");
408 static void trigger_dfr_int(void *info)
410 asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR));
413 static void trigger_thr_int(void *info)
415 asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR));
418 static u32 get_nbc_for_node(int node_id)
420 struct cpuinfo_x86 *c = &boot_cpu_data;
421 u32 cores_per_node;
423 cores_per_node = (c->x86_max_cores * smp_num_siblings) / amd_get_nodes_per_socket();
425 return cores_per_node * node_id;
428 static void toggle_nb_mca_mst_cpu(u16 nid)
430 struct amd_northbridge *nb;
431 struct pci_dev *F3;
432 u32 val;
433 int err;
435 nb = node_to_amd_nb(nid);
436 if (!nb)
437 return;
439 F3 = nb->misc;
440 if (!F3)
441 return;
443 err = pci_read_config_dword(F3, NBCFG, &val);
444 if (err) {
445 pr_err("%s: Error reading F%dx%03x.\n",
446 __func__, PCI_FUNC(F3->devfn), NBCFG);
447 return;
450 if (val & BIT(27))
451 return;
453 pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
454 __func__);
456 val |= BIT(27);
457 err = pci_write_config_dword(F3, NBCFG, val);
458 if (err)
459 pr_err("%s: Error writing F%dx%03x.\n",
460 __func__, PCI_FUNC(F3->devfn), NBCFG);
463 static void prepare_msrs(void *info)
465 struct mce m = *(struct mce *)info;
466 u8 b = m.bank;
468 wrmsrl(MSR_IA32_MCG_STATUS, m.mcgstatus);
470 if (boot_cpu_has(X86_FEATURE_SMCA)) {
471 if (m.inject_flags == DFR_INT_INJ) {
472 wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b), m.status);
473 wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b), m.addr);
474 } else {
475 wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b), m.status);
476 wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b), m.addr);
479 wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b), m.misc);
480 wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b), m.synd);
481 } else {
482 wrmsrl(MSR_IA32_MCx_STATUS(b), m.status);
483 wrmsrl(MSR_IA32_MCx_ADDR(b), m.addr);
484 wrmsrl(MSR_IA32_MCx_MISC(b), m.misc);
488 static void do_inject(void)
490 u64 mcg_status = 0;
491 unsigned int cpu = i_mce.extcpu;
492 u8 b = i_mce.bank;
494 rdtscll(i_mce.tsc);
496 if (i_mce.misc)
497 i_mce.status |= MCI_STATUS_MISCV;
499 if (i_mce.synd)
500 i_mce.status |= MCI_STATUS_SYNDV;
502 if (inj_type == SW_INJ) {
503 mce_inject_log(&i_mce);
504 return;
507 /* prep MCE global settings for the injection */
508 mcg_status = MCG_STATUS_MCIP | MCG_STATUS_EIPV;
510 if (!(i_mce.status & MCI_STATUS_PCC))
511 mcg_status |= MCG_STATUS_RIPV;
514 * Ensure necessary status bits for deferred errors:
515 * - MCx_STATUS[Deferred]: make sure it is a deferred error
516 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
518 if (inj_type == DFR_INT_INJ) {
519 i_mce.status |= MCI_STATUS_DEFERRED;
520 i_mce.status |= (i_mce.status & ~MCI_STATUS_UC);
524 * For multi node CPUs, logging and reporting of bank 4 errors happens
525 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
526 * Fam10h and later BKDGs.
528 if (static_cpu_has(X86_FEATURE_AMD_DCM) &&
529 b == 4 &&
530 boot_cpu_data.x86 < 0x17) {
531 toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu));
532 cpu = get_nbc_for_node(amd_get_nb_id(cpu));
535 get_online_cpus();
536 if (!cpu_online(cpu))
537 goto err;
539 toggle_hw_mce_inject(cpu, true);
541 i_mce.mcgstatus = mcg_status;
542 i_mce.inject_flags = inj_type;
543 smp_call_function_single(cpu, prepare_msrs, &i_mce, 0);
545 toggle_hw_mce_inject(cpu, false);
547 switch (inj_type) {
548 case DFR_INT_INJ:
549 smp_call_function_single(cpu, trigger_dfr_int, NULL, 0);
550 break;
551 case THR_INT_INJ:
552 smp_call_function_single(cpu, trigger_thr_int, NULL, 0);
553 break;
554 default:
555 smp_call_function_single(cpu, trigger_mce, NULL, 0);
558 err:
559 put_online_cpus();
564 * This denotes into which bank we're injecting and triggers
565 * the injection, at the same time.
567 static int inj_bank_set(void *data, u64 val)
569 struct mce *m = (struct mce *)data;
571 if (val >= n_banks) {
572 pr_err("Non-existent MCE bank: %llu\n", val);
573 return -EINVAL;
576 m->bank = val;
577 do_inject();
579 return 0;
582 MCE_INJECT_GET(bank);
584 DEFINE_SIMPLE_ATTRIBUTE(bank_fops, inj_bank_get, inj_bank_set, "%llu\n");
586 static const char readme_msg[] =
587 "Description of the files and their usages:\n"
588 "\n"
589 "Note1: i refers to the bank number below.\n"
590 "Note2: See respective BKDGs for the exact bit definitions of the files below\n"
591 "as they mirror the hardware registers.\n"
592 "\n"
593 "status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
594 "\t attributes of the error which caused the MCE.\n"
595 "\n"
596 "misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
597 "\t used for error thresholding purposes and its validity is indicated by\n"
598 "\t MCi_STATUS[MiscV].\n"
599 "\n"
600 "synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
601 "\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
602 "\n"
603 "addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
604 "\t associated with the error.\n"
605 "\n"
606 "cpu:\t The CPU to inject the error on.\n"
607 "\n"
608 "bank:\t Specify the bank you want to inject the error into: the number of\n"
609 "\t banks in a processor varies and is family/model-specific, therefore, the\n"
610 "\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
611 "\t injection.\n"
612 "\n"
613 "flags:\t Injection type to be performed. Writing to this file will trigger a\n"
614 "\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
615 "\t for AMD processors.\n"
616 "\n"
617 "\t Allowed error injection types:\n"
618 "\t - \"sw\": Software error injection. Decode error to a human-readable \n"
619 "\t format only. Safe to use.\n"
620 "\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
621 "\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
622 "\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
623 "\t before injecting.\n"
624 "\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
625 "\t error APIC interrupt handler to handle the error if the feature is \n"
626 "\t is present in hardware. \n"
627 "\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
628 "\t APIC interrupt handler to handle the error. \n"
629 "\n";
631 static ssize_t
632 inj_readme_read(struct file *filp, char __user *ubuf,
633 size_t cnt, loff_t *ppos)
635 return simple_read_from_buffer(ubuf, cnt, ppos,
636 readme_msg, strlen(readme_msg));
639 static const struct file_operations readme_fops = {
640 .read = inj_readme_read,
643 static struct dfs_node {
644 char *name;
645 struct dentry *d;
646 const struct file_operations *fops;
647 umode_t perm;
648 } dfs_fls[] = {
649 { .name = "status", .fops = &status_fops, .perm = S_IRUSR | S_IWUSR },
650 { .name = "misc", .fops = &misc_fops, .perm = S_IRUSR | S_IWUSR },
651 { .name = "addr", .fops = &addr_fops, .perm = S_IRUSR | S_IWUSR },
652 { .name = "synd", .fops = &synd_fops, .perm = S_IRUSR | S_IWUSR },
653 { .name = "bank", .fops = &bank_fops, .perm = S_IRUSR | S_IWUSR },
654 { .name = "flags", .fops = &flags_fops, .perm = S_IRUSR | S_IWUSR },
655 { .name = "cpu", .fops = &extcpu_fops, .perm = S_IRUSR | S_IWUSR },
656 { .name = "README", .fops = &readme_fops, .perm = S_IRUSR | S_IRGRP | S_IROTH },
659 static int __init debugfs_init(void)
661 unsigned int i;
662 u64 cap;
664 rdmsrl(MSR_IA32_MCG_CAP, cap);
665 n_banks = cap & MCG_BANKCNT_MASK;
667 dfs_inj = debugfs_create_dir("mce-inject", NULL);
668 if (!dfs_inj)
669 return -EINVAL;
671 for (i = 0; i < ARRAY_SIZE(dfs_fls); i++) {
672 dfs_fls[i].d = debugfs_create_file(dfs_fls[i].name,
673 dfs_fls[i].perm,
674 dfs_inj,
675 &i_mce,
676 dfs_fls[i].fops);
678 if (!dfs_fls[i].d)
679 goto err_dfs_add;
682 return 0;
684 err_dfs_add:
685 while (i-- > 0)
686 debugfs_remove(dfs_fls[i].d);
688 debugfs_remove(dfs_inj);
689 dfs_inj = NULL;
691 return -ENODEV;
694 static int __init inject_init(void)
696 int err;
698 if (!alloc_cpumask_var(&mce_inject_cpumask, GFP_KERNEL))
699 return -ENOMEM;
701 err = debugfs_init();
702 if (err) {
703 free_cpumask_var(mce_inject_cpumask);
704 return err;
707 register_nmi_handler(NMI_LOCAL, mce_raise_notify, 0, "mce_notify");
708 mce_register_injector_chain(&inject_nb);
710 setup_inj_struct(&i_mce);
712 pr_info("Machine check injector initialized\n");
714 return 0;
717 static void __exit inject_exit(void)
720 mce_unregister_injector_chain(&inject_nb);
721 unregister_nmi_handler(NMI_LOCAL, "mce_notify");
723 debugfs_remove_recursive(dfs_inj);
724 dfs_inj = NULL;
726 memset(&dfs_fls, 0, sizeof(dfs_fls));
728 free_cpumask_var(mce_inject_cpumask);
731 module_init(inject_init);
732 module_exit(inject_exit);
733 MODULE_LICENSE("GPL");