2 * Machine check injection support.
3 * Copyright 2008 Intel Corporation.
5 * This program is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU General Public License
7 * as published by the Free Software Foundation; version 2
14 * The AMD part (from mce_amd_inj.c): a simple MCE injection facility
15 * for testing different aspects of the RAS code. This driver should be
16 * built as module so that it can be loaded on production kernels for
19 * This file may be distributed under the terms of the GNU General Public
22 * Copyright (c) 2010-17: Borislav Petkov <bp@alien8.de>
23 * Advanced Micro Devices Inc.
26 #include <linux/cpu.h>
27 #include <linux/debugfs.h>
28 #include <linux/kernel.h>
29 #include <linux/module.h>
30 #include <linux/notifier.h>
31 #include <linux/pci.h>
32 #include <linux/uaccess.h>
34 #include <asm/amd_nb.h>
36 #include <asm/irq_vectors.h>
41 #include "mce-internal.h"
44 * Collect all the MCi_XXX settings
46 static struct mce i_mce
;
47 static struct dentry
*dfs_inj
;
51 #define MAX_FLAG_OPT_SIZE 3
55 SW_INJ
= 0, /* SW injection, simply decode the error */
56 HW_INJ
, /* Trigger a #MC */
57 DFR_INT_INJ
, /* Trigger Deferred error interrupt */
58 THR_INT_INJ
, /* Trigger threshold interrupt */
62 static const char * const flags_options
[] = {
70 /* Set default injection to SW_INJ */
71 static enum injection_type inj_type
= SW_INJ
;
73 #define MCE_INJECT_SET(reg) \
74 static int inj_##reg##_set(void *data, u64 val) \
76 struct mce *m = (struct mce *)data; \
82 MCE_INJECT_SET(status
);
87 #define MCE_INJECT_GET(reg) \
88 static int inj_##reg##_get(void *data, u64 *val) \
90 struct mce *m = (struct mce *)data; \
96 MCE_INJECT_GET(status
);
101 DEFINE_SIMPLE_ATTRIBUTE(status_fops
, inj_status_get
, inj_status_set
, "%llx\n");
102 DEFINE_SIMPLE_ATTRIBUTE(misc_fops
, inj_misc_get
, inj_misc_set
, "%llx\n");
103 DEFINE_SIMPLE_ATTRIBUTE(addr_fops
, inj_addr_get
, inj_addr_set
, "%llx\n");
104 DEFINE_SIMPLE_ATTRIBUTE(synd_fops
, inj_synd_get
, inj_synd_set
, "%llx\n");
106 static void setup_inj_struct(struct mce
*m
)
108 memset(m
, 0, sizeof(struct mce
));
110 m
->cpuvendor
= boot_cpu_data
.x86_vendor
;
113 /* Update fake mce registers on current CPU. */
114 static void inject_mce(struct mce
*m
)
116 struct mce
*i
= &per_cpu(injectm
, m
->extcpu
);
118 /* Make sure no one reads partially written injectm */
122 /* First set the fields after finished */
123 i
->extcpu
= m
->extcpu
;
125 /* Now write record in order, finished last (except above) */
126 memcpy(i
, m
, sizeof(struct mce
));
127 /* Finally activate it */
132 static void raise_poll(struct mce
*m
)
137 memset(&b
, 0xff, sizeof(mce_banks_t
));
138 local_irq_save(flags
);
139 machine_check_poll(0, &b
);
140 local_irq_restore(flags
);
144 static void raise_exception(struct mce
*m
, struct pt_regs
*pregs
)
150 memset(®s
, 0, sizeof(struct pt_regs
));
155 /* in mcheck exeception handler, irq will be disabled */
156 local_irq_save(flags
);
157 do_machine_check(pregs
, 0);
158 local_irq_restore(flags
);
162 static cpumask_var_t mce_inject_cpumask
;
163 static DEFINE_MUTEX(mce_inject_mutex
);
165 static int mce_raise_notify(unsigned int cmd
, struct pt_regs
*regs
)
167 int cpu
= smp_processor_id();
168 struct mce
*m
= this_cpu_ptr(&injectm
);
169 if (!cpumask_test_cpu(cpu
, mce_inject_cpumask
))
171 cpumask_clear_cpu(cpu
, mce_inject_cpumask
);
172 if (m
->inject_flags
& MCJ_EXCEPTION
)
173 raise_exception(m
, regs
);
179 static void mce_irq_ipi(void *info
)
181 int cpu
= smp_processor_id();
182 struct mce
*m
= this_cpu_ptr(&injectm
);
184 if (cpumask_test_cpu(cpu
, mce_inject_cpumask
) &&
185 m
->inject_flags
& MCJ_EXCEPTION
) {
186 cpumask_clear_cpu(cpu
, mce_inject_cpumask
);
187 raise_exception(m
, NULL
);
191 /* Inject mce on current CPU */
192 static int raise_local(void)
194 struct mce
*m
= this_cpu_ptr(&injectm
);
195 int context
= MCJ_CTX(m
->inject_flags
);
199 if (m
->inject_flags
& MCJ_EXCEPTION
) {
200 pr_info("Triggering MCE exception on CPU %d\n", cpu
);
204 * Could do more to fake interrupts like
205 * calling irq_enter, but the necessary
206 * machinery isn't exported currently.
209 case MCJ_CTX_PROCESS
:
210 raise_exception(m
, NULL
);
213 pr_info("Invalid MCE context\n");
216 pr_info("MCE exception done on CPU %d\n", cpu
);
217 } else if (m
->status
) {
218 pr_info("Starting machine check poll CPU %d\n", cpu
);
221 pr_info("Machine check poll done on CPU %d\n", cpu
);
228 static void __maybe_unused
raise_mce(struct mce
*m
)
230 int context
= MCJ_CTX(m
->inject_flags
);
234 if (context
== MCJ_CTX_RANDOM
)
237 if (m
->inject_flags
& (MCJ_IRQ_BROADCAST
| MCJ_NMI_BROADCAST
)) {
242 cpumask_copy(mce_inject_cpumask
, cpu_online_mask
);
243 cpumask_clear_cpu(get_cpu(), mce_inject_cpumask
);
244 for_each_online_cpu(cpu
) {
245 struct mce
*mcpu
= &per_cpu(injectm
, cpu
);
246 if (!mcpu
->finished
||
247 MCJ_CTX(mcpu
->inject_flags
) != MCJ_CTX_RANDOM
)
248 cpumask_clear_cpu(cpu
, mce_inject_cpumask
);
250 if (!cpumask_empty(mce_inject_cpumask
)) {
251 if (m
->inject_flags
& MCJ_IRQ_BROADCAST
) {
253 * don't wait because mce_irq_ipi is necessary
254 * to be sync with following raise_local
257 smp_call_function_many(mce_inject_cpumask
,
258 mce_irq_ipi
, NULL
, 0);
260 } else if (m
->inject_flags
& MCJ_NMI_BROADCAST
)
261 apic
->send_IPI_mask(mce_inject_cpumask
,
265 while (!cpumask_empty(mce_inject_cpumask
)) {
266 if (!time_before(jiffies
, start
+ 2*HZ
)) {
267 pr_err("Timeout waiting for mce inject %lx\n",
268 *cpumask_bits(mce_inject_cpumask
));
283 static int mce_inject_raise(struct notifier_block
*nb
, unsigned long val
,
286 struct mce
*m
= (struct mce
*)data
;
291 mutex_lock(&mce_inject_mutex
);
293 mutex_unlock(&mce_inject_mutex
);
298 static struct notifier_block inject_nb
= {
299 .notifier_call
= mce_inject_raise
,
303 * Caller needs to be make sure this cpu doesn't disappear
304 * from under us, i.e.: get_cpu/put_cpu.
306 static int toggle_hw_mce_inject(unsigned int cpu
, bool enable
)
311 err
= rdmsr_on_cpu(cpu
, MSR_K7_HWCR
, &l
, &h
);
313 pr_err("%s: error reading HWCR\n", __func__
);
317 enable
? (l
|= BIT(18)) : (l
&= ~BIT(18));
319 err
= wrmsr_on_cpu(cpu
, MSR_K7_HWCR
, l
, h
);
321 pr_err("%s: error writing HWCR\n", __func__
);
326 static int __set_inj(const char *buf
)
330 for (i
= 0; i
< N_INJ_TYPES
; i
++) {
331 if (!strncmp(flags_options
[i
], buf
, strlen(flags_options
[i
]))) {
339 static ssize_t
flags_read(struct file
*filp
, char __user
*ubuf
,
340 size_t cnt
, loff_t
*ppos
)
342 char buf
[MAX_FLAG_OPT_SIZE
];
345 n
= sprintf(buf
, "%s\n", flags_options
[inj_type
]);
347 return simple_read_from_buffer(ubuf
, cnt
, ppos
, buf
, n
);
350 static ssize_t
flags_write(struct file
*filp
, const char __user
*ubuf
,
351 size_t cnt
, loff_t
*ppos
)
353 char buf
[MAX_FLAG_OPT_SIZE
], *__buf
;
356 if (cnt
> MAX_FLAG_OPT_SIZE
)
359 if (copy_from_user(&buf
, ubuf
, cnt
))
364 /* strip whitespace */
365 __buf
= strstrip(buf
);
367 err
= __set_inj(__buf
);
369 pr_err("%s: Invalid flags value: %s\n", __func__
, __buf
);
378 static const struct file_operations flags_fops
= {
380 .write
= flags_write
,
381 .llseek
= generic_file_llseek
,
385 * On which CPU to inject?
387 MCE_INJECT_GET(extcpu
);
389 static int inj_extcpu_set(void *data
, u64 val
)
391 struct mce
*m
= (struct mce
*)data
;
393 if (val
>= nr_cpu_ids
|| !cpu_online(val
)) {
394 pr_err("%s: Invalid CPU: %llu\n", __func__
, val
);
401 DEFINE_SIMPLE_ATTRIBUTE(extcpu_fops
, inj_extcpu_get
, inj_extcpu_set
, "%llu\n");
403 static void trigger_mce(void *info
)
405 asm volatile("int $18");
408 static void trigger_dfr_int(void *info
)
410 asm volatile("int %0" :: "i" (DEFERRED_ERROR_VECTOR
));
413 static void trigger_thr_int(void *info
)
415 asm volatile("int %0" :: "i" (THRESHOLD_APIC_VECTOR
));
418 static u32
get_nbc_for_node(int node_id
)
420 struct cpuinfo_x86
*c
= &boot_cpu_data
;
423 cores_per_node
= (c
->x86_max_cores
* smp_num_siblings
) / amd_get_nodes_per_socket();
425 return cores_per_node
* node_id
;
428 static void toggle_nb_mca_mst_cpu(u16 nid
)
430 struct amd_northbridge
*nb
;
435 nb
= node_to_amd_nb(nid
);
443 err
= pci_read_config_dword(F3
, NBCFG
, &val
);
445 pr_err("%s: Error reading F%dx%03x.\n",
446 __func__
, PCI_FUNC(F3
->devfn
), NBCFG
);
453 pr_err("%s: Set D18F3x44[NbMcaToMstCpuEn] which BIOS hasn't done.\n",
457 err
= pci_write_config_dword(F3
, NBCFG
, val
);
459 pr_err("%s: Error writing F%dx%03x.\n",
460 __func__
, PCI_FUNC(F3
->devfn
), NBCFG
);
463 static void prepare_msrs(void *info
)
465 struct mce m
= *(struct mce
*)info
;
468 wrmsrl(MSR_IA32_MCG_STATUS
, m
.mcgstatus
);
470 if (boot_cpu_has(X86_FEATURE_SMCA
)) {
471 if (m
.inject_flags
== DFR_INT_INJ
) {
472 wrmsrl(MSR_AMD64_SMCA_MCx_DESTAT(b
), m
.status
);
473 wrmsrl(MSR_AMD64_SMCA_MCx_DEADDR(b
), m
.addr
);
475 wrmsrl(MSR_AMD64_SMCA_MCx_STATUS(b
), m
.status
);
476 wrmsrl(MSR_AMD64_SMCA_MCx_ADDR(b
), m
.addr
);
479 wrmsrl(MSR_AMD64_SMCA_MCx_MISC(b
), m
.misc
);
480 wrmsrl(MSR_AMD64_SMCA_MCx_SYND(b
), m
.synd
);
482 wrmsrl(MSR_IA32_MCx_STATUS(b
), m
.status
);
483 wrmsrl(MSR_IA32_MCx_ADDR(b
), m
.addr
);
484 wrmsrl(MSR_IA32_MCx_MISC(b
), m
.misc
);
488 static void do_inject(void)
491 unsigned int cpu
= i_mce
.extcpu
;
497 i_mce
.status
|= MCI_STATUS_MISCV
;
500 i_mce
.status
|= MCI_STATUS_SYNDV
;
502 if (inj_type
== SW_INJ
) {
503 mce_inject_log(&i_mce
);
507 /* prep MCE global settings for the injection */
508 mcg_status
= MCG_STATUS_MCIP
| MCG_STATUS_EIPV
;
510 if (!(i_mce
.status
& MCI_STATUS_PCC
))
511 mcg_status
|= MCG_STATUS_RIPV
;
514 * Ensure necessary status bits for deferred errors:
515 * - MCx_STATUS[Deferred]: make sure it is a deferred error
516 * - MCx_STATUS[UC] cleared: deferred errors are _not_ UC
518 if (inj_type
== DFR_INT_INJ
) {
519 i_mce
.status
|= MCI_STATUS_DEFERRED
;
520 i_mce
.status
|= (i_mce
.status
& ~MCI_STATUS_UC
);
524 * For multi node CPUs, logging and reporting of bank 4 errors happens
525 * only on the node base core. Refer to D18F3x44[NbMcaToMstCpuEn] for
526 * Fam10h and later BKDGs.
528 if (static_cpu_has(X86_FEATURE_AMD_DCM
) &&
530 boot_cpu_data
.x86
< 0x17) {
531 toggle_nb_mca_mst_cpu(amd_get_nb_id(cpu
));
532 cpu
= get_nbc_for_node(amd_get_nb_id(cpu
));
536 if (!cpu_online(cpu
))
539 toggle_hw_mce_inject(cpu
, true);
541 i_mce
.mcgstatus
= mcg_status
;
542 i_mce
.inject_flags
= inj_type
;
543 smp_call_function_single(cpu
, prepare_msrs
, &i_mce
, 0);
545 toggle_hw_mce_inject(cpu
, false);
549 smp_call_function_single(cpu
, trigger_dfr_int
, NULL
, 0);
552 smp_call_function_single(cpu
, trigger_thr_int
, NULL
, 0);
555 smp_call_function_single(cpu
, trigger_mce
, NULL
, 0);
564 * This denotes into which bank we're injecting and triggers
565 * the injection, at the same time.
567 static int inj_bank_set(void *data
, u64 val
)
569 struct mce
*m
= (struct mce
*)data
;
571 if (val
>= n_banks
) {
572 pr_err("Non-existent MCE bank: %llu\n", val
);
582 MCE_INJECT_GET(bank
);
584 DEFINE_SIMPLE_ATTRIBUTE(bank_fops
, inj_bank_get
, inj_bank_set
, "%llu\n");
586 static const char readme_msg
[] =
587 "Description of the files and their usages:\n"
589 "Note1: i refers to the bank number below.\n"
590 "Note2: See respective BKDGs for the exact bit definitions of the files below\n"
591 "as they mirror the hardware registers.\n"
593 "status:\t Set MCi_STATUS: the bits in that MSR control the error type and\n"
594 "\t attributes of the error which caused the MCE.\n"
596 "misc:\t Set MCi_MISC: provide auxiliary info about the error. It is mostly\n"
597 "\t used for error thresholding purposes and its validity is indicated by\n"
598 "\t MCi_STATUS[MiscV].\n"
600 "synd:\t Set MCi_SYND: provide syndrome info about the error. Only valid on\n"
601 "\t Scalable MCA systems, and its validity is indicated by MCi_STATUS[SyndV].\n"
603 "addr:\t Error address value to be written to MCi_ADDR. Log address information\n"
604 "\t associated with the error.\n"
606 "cpu:\t The CPU to inject the error on.\n"
608 "bank:\t Specify the bank you want to inject the error into: the number of\n"
609 "\t banks in a processor varies and is family/model-specific, therefore, the\n"
610 "\t supplied value is sanity-checked. Setting the bank value also triggers the\n"
613 "flags:\t Injection type to be performed. Writing to this file will trigger a\n"
614 "\t real machine check, an APIC interrupt or invoke the error decoder routines\n"
615 "\t for AMD processors.\n"
617 "\t Allowed error injection types:\n"
618 "\t - \"sw\": Software error injection. Decode error to a human-readable \n"
619 "\t format only. Safe to use.\n"
620 "\t - \"hw\": Hardware error injection. Causes the #MC exception handler to \n"
621 "\t handle the error. Be warned: might cause system panic if MCi_STATUS[PCC] \n"
622 "\t is set. Therefore, consider setting (debugfs_mountpoint)/mce/fake_panic \n"
623 "\t before injecting.\n"
624 "\t - \"df\": Trigger APIC interrupt for Deferred error. Causes deferred \n"
625 "\t error APIC interrupt handler to handle the error if the feature is \n"
626 "\t is present in hardware. \n"
627 "\t - \"th\": Trigger APIC interrupt for Threshold errors. Causes threshold \n"
628 "\t APIC interrupt handler to handle the error. \n"
632 inj_readme_read(struct file
*filp
, char __user
*ubuf
,
633 size_t cnt
, loff_t
*ppos
)
635 return simple_read_from_buffer(ubuf
, cnt
, ppos
,
636 readme_msg
, strlen(readme_msg
));
639 static const struct file_operations readme_fops
= {
640 .read
= inj_readme_read
,
643 static struct dfs_node
{
646 const struct file_operations
*fops
;
649 { .name
= "status", .fops
= &status_fops
, .perm
= S_IRUSR
| S_IWUSR
},
650 { .name
= "misc", .fops
= &misc_fops
, .perm
= S_IRUSR
| S_IWUSR
},
651 { .name
= "addr", .fops
= &addr_fops
, .perm
= S_IRUSR
| S_IWUSR
},
652 { .name
= "synd", .fops
= &synd_fops
, .perm
= S_IRUSR
| S_IWUSR
},
653 { .name
= "bank", .fops
= &bank_fops
, .perm
= S_IRUSR
| S_IWUSR
},
654 { .name
= "flags", .fops
= &flags_fops
, .perm
= S_IRUSR
| S_IWUSR
},
655 { .name
= "cpu", .fops
= &extcpu_fops
, .perm
= S_IRUSR
| S_IWUSR
},
656 { .name
= "README", .fops
= &readme_fops
, .perm
= S_IRUSR
| S_IRGRP
| S_IROTH
},
659 static int __init
debugfs_init(void)
664 rdmsrl(MSR_IA32_MCG_CAP
, cap
);
665 n_banks
= cap
& MCG_BANKCNT_MASK
;
667 dfs_inj
= debugfs_create_dir("mce-inject", NULL
);
671 for (i
= 0; i
< ARRAY_SIZE(dfs_fls
); i
++) {
672 dfs_fls
[i
].d
= debugfs_create_file(dfs_fls
[i
].name
,
686 debugfs_remove(dfs_fls
[i
].d
);
688 debugfs_remove(dfs_inj
);
694 static int __init
inject_init(void)
698 if (!alloc_cpumask_var(&mce_inject_cpumask
, GFP_KERNEL
))
701 err
= debugfs_init();
703 free_cpumask_var(mce_inject_cpumask
);
707 register_nmi_handler(NMI_LOCAL
, mce_raise_notify
, 0, "mce_notify");
708 mce_register_injector_chain(&inject_nb
);
710 setup_inj_struct(&i_mce
);
712 pr_info("Machine check injector initialized\n");
717 static void __exit
inject_exit(void)
720 mce_unregister_injector_chain(&inject_nb
);
721 unregister_nmi_handler(NMI_LOCAL
, "mce_notify");
723 debugfs_remove_recursive(dfs_inj
);
726 memset(&dfs_fls
, 0, sizeof(dfs_fls
));
728 free_cpumask_var(mce_inject_cpumask
);
731 module_init(inject_init
);
732 module_exit(inject_exit
);
733 MODULE_LICENSE("GPL");