1 // SPDX-License-Identifier: GPL-2.0
3 * Common corrected MCE threshold handler code:
5 #include <linux/interrupt.h>
6 #include <linux/kernel.h>
8 #include <asm/irq_vectors.h>
12 #include <asm/trace/irq_vectors.h>
16 static void default_threshold_interrupt(void)
18 pr_err("Unexpected threshold interrupt at vector %x\n",
19 THRESHOLD_APIC_VECTOR
);
22 void (*mce_threshold_vector
)(void) = default_threshold_interrupt
;
24 DEFINE_IDTENTRY_SYSVEC(sysvec_threshold
)
26 trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR
);
27 inc_irq_stat(irq_threshold_count
);
28 mce_threshold_vector();
29 trace_threshold_apic_exit(THRESHOLD_APIC_VECTOR
);
33 DEFINE_PER_CPU(struct mca_storm_desc
, storm_desc
);
35 void mce_inherit_storm(unsigned int bank
)
37 struct mca_storm_desc
*storm
= this_cpu_ptr(&storm_desc
);
40 * Previous CPU owning this bank had put it into storm mode,
41 * but the precise history of that storm is unknown. Assume
42 * the worst (all recent polls of the bank found a valid error
43 * logged). This will avoid the new owner prematurely declaring
44 * the storm has ended.
46 storm
->banks
[bank
].history
= ~0ull;
47 storm
->banks
[bank
].timestamp
= jiffies
;
50 bool mce_get_storm_mode(void)
52 return __this_cpu_read(storm_desc
.poll_mode
);
55 void mce_set_storm_mode(bool storm
)
57 __this_cpu_write(storm_desc
.poll_mode
, storm
);
60 static void mce_handle_storm(unsigned int bank
, bool on
)
62 switch (boot_cpu_data
.x86_vendor
) {
63 case X86_VENDOR_INTEL
:
64 mce_intel_handle_storm(bank
, on
);
69 void cmci_storm_begin(unsigned int bank
)
71 struct mca_storm_desc
*storm
= this_cpu_ptr(&storm_desc
);
73 __set_bit(bank
, this_cpu_ptr(mce_poll_banks
));
74 storm
->banks
[bank
].in_storm_mode
= true;
77 * If this is the first bank on this CPU to enter storm mode
80 if (++storm
->stormy_bank_count
== 1)
84 void cmci_storm_end(unsigned int bank
)
86 struct mca_storm_desc
*storm
= this_cpu_ptr(&storm_desc
);
88 __clear_bit(bank
, this_cpu_ptr(mce_poll_banks
));
89 storm
->banks
[bank
].history
= 0;
90 storm
->banks
[bank
].in_storm_mode
= false;
92 /* If no banks left in storm mode, stop polling. */
93 if (!this_cpu_dec_return(storm_desc
.stormy_bank_count
))
94 mce_timer_kick(false);
97 void mce_track_storm(struct mce
*mce
)
99 struct mca_storm_desc
*storm
= this_cpu_ptr(&storm_desc
);
100 unsigned long now
= jiffies
, delta
;
101 unsigned int shift
= 1;
104 /* No tracking needed for banks that do not support CMCI */
105 if (storm
->banks
[mce
->bank
].poll_only
)
109 * When a bank is in storm mode it is polled once per second and
110 * the history mask will record about the last minute of poll results.
111 * If it is not in storm mode, then the bank is only checked when
112 * there is a CMCI interrupt. Check how long it has been since
113 * this bank was last checked, and adjust the amount of "shift"
114 * to apply to history.
116 if (!storm
->banks
[mce
->bank
].in_storm_mode
) {
117 delta
= now
- storm
->banks
[mce
->bank
].timestamp
;
118 shift
= (delta
+ HZ
) / HZ
;
121 /* If it has been a long time since the last poll, clear history. */
122 if (shift
< NUM_HISTORY_BITS
)
123 history
= storm
->banks
[mce
->bank
].history
<< shift
;
125 storm
->banks
[mce
->bank
].timestamp
= now
;
127 /* History keeps track of corrected errors. VAL=1 && UC=0 */
128 if ((mce
->status
& MCI_STATUS_VAL
) && mce_is_correctable(mce
))
131 storm
->banks
[mce
->bank
].history
= history
;
133 if (storm
->banks
[mce
->bank
].in_storm_mode
) {
134 if (history
& GENMASK_ULL(STORM_END_POLL_THRESHOLD
, 0))
136 printk_deferred(KERN_NOTICE
"CPU%d BANK%d CMCI storm subsided\n", smp_processor_id(), mce
->bank
);
137 mce_handle_storm(mce
->bank
, false);
138 cmci_storm_end(mce
->bank
);
140 if (hweight64(history
) < STORM_BEGIN_THRESHOLD
)
142 printk_deferred(KERN_NOTICE
"CPU%d BANK%d CMCI storm detected\n", smp_processor_id(), mce
->bank
);
143 mce_handle_storm(mce
->bank
, true);
144 cmci_storm_begin(mce
->bank
);