2 * linux/arch/x86_64/nmi.c
4 * NMI watchdog support on APIC systems
6 * Started by Ingo Molnar <mingo@redhat.com>
9 * Mikael Pettersson : AMD K7 support for local APIC NMI watchdog.
10 * Mikael Pettersson : Power Management for local APIC NMI watchdog.
12 * Mikael Pettersson : PM converted to driver model. Disable/enable API.
15 #include <linux/config.h>
17 #include <linux/irq.h>
18 #include <linux/delay.h>
19 #include <linux/bootmem.h>
20 #include <linux/smp_lock.h>
21 #include <linux/interrupt.h>
22 #include <linux/mc146818rtc.h>
23 #include <linux/kernel_stat.h>
24 #include <linux/module.h>
25 #include <linux/sysdev.h>
26 #include <linux/nmi.h>
30 #include <asm/mpspec.h>
33 #include <asm/proto.h>
34 #include <asm/kdebug.h>
37 * lapic_nmi_owner tracks the ownership of the lapic NMI hardware:
38 * - it may be reserved by some other driver, or not
39 * - when not reserved by some other driver, it may be used for
40 * the NMI watchdog, or not
42 * This is maintained separately from nmi_active because the NMI
43 * watchdog may also be driven from the I/O APIC timer.
45 static spinlock_t lapic_nmi_owner_lock
= SPIN_LOCK_UNLOCKED
;
46 static unsigned int lapic_nmi_owner
;
47 #define LAPIC_NMI_WATCHDOG (1<<0)
48 #define LAPIC_NMI_RESERVED (1<<1)
51 * +1: the lapic NMI watchdog is active, but can be disabled
52 * 0: the lapic NMI watchdog has not been set up, and cannot
54 * -1: the lapic NMI watchdog is disabled, but can be enabled
56 int nmi_active
; /* oprofile uses this */
57 static int panic_on_timeout
;
59 unsigned int nmi_watchdog
= NMI_DEFAULT
;
60 static unsigned int nmi_hz
= HZ
;
61 unsigned int nmi_perfctr_msr
; /* the MSR to reset in NMI handler */
63 /* Note that these events don't tick when the CPU idles. This means
64 the frequency varies with CPU load. */
66 #define K7_EVNTSEL_ENABLE (1 << 22)
67 #define K7_EVNTSEL_INT (1 << 20)
68 #define K7_EVNTSEL_OS (1 << 17)
69 #define K7_EVNTSEL_USR (1 << 16)
70 #define K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING 0x76
71 #define K7_NMI_EVENT K7_EVENT_CYCLES_PROCESSOR_IS_RUNNING
73 #define P6_EVNTSEL0_ENABLE (1 << 22)
74 #define P6_EVNTSEL_INT (1 << 20)
75 #define P6_EVNTSEL_OS (1 << 17)
76 #define P6_EVNTSEL_USR (1 << 16)
77 #define P6_EVENT_CPU_CLOCKS_NOT_HALTED 0x79
78 #define P6_NMI_EVENT P6_EVENT_CPU_CLOCKS_NOT_HALTED
80 /* Run after command line and cpu_init init, but before all other checks */
81 void __init
nmi_watchdog_default(void)
83 if (nmi_watchdog
!= NMI_DEFAULT
)
86 /* For some reason the IO APIC watchdog doesn't work on the AMD
87 8111 chipset. For now switch to local APIC mode using
88 perfctr0 there. On Intel CPUs we don't have code to handle
89 the perfctr and the IO-APIC seems to work, so use that. */
91 if (boot_cpu_data
.x86_vendor
== X86_VENDOR_AMD
) {
92 nmi_watchdog
= NMI_LOCAL_APIC
;
94 "Using local APIC NMI watchdog using perfctr0\n");
96 printk(KERN_INFO
"Using IO APIC NMI watchdog\n");
97 nmi_watchdog
= NMI_IO_APIC
;
101 /* Why is there no CPUID flag for this? */
102 static __init
int cpu_has_lapic(void)
104 switch (boot_cpu_data
.x86_vendor
) {
105 case X86_VENDOR_INTEL
:
107 return boot_cpu_data
.x86
>= 6;
108 /* .... add more cpus here or find a different way to figure this out. */
114 int __init
check_nmi_watchdog (void)
119 if (nmi_watchdog
== NMI_LOCAL_APIC
&& !cpu_has_lapic()) {
120 nmi_watchdog
= NMI_NONE
;
124 printk(KERN_INFO
"testing NMI watchdog ... ");
126 for (cpu
= 0; cpu
< NR_CPUS
; cpu
++)
127 counts
[cpu
] = cpu_pda
[cpu
].__nmi_count
;
129 mdelay((10*1000)/nmi_hz
); // wait 10 ticks
131 for (cpu
= 0; cpu
< NR_CPUS
; cpu
++) {
132 if (!cpu_online(cpu
))
134 if (cpu_pda
[cpu
].__nmi_count
- counts
[cpu
] <= 5) {
135 printk("CPU#%d: NMI appears to be stuck (%d)!\n",
137 cpu_pda
[cpu
].__nmi_count
);
139 lapic_nmi_owner
&= ~LAPIC_NMI_WATCHDOG
;
145 /* now that we know it works we can reduce NMI frequency to
146 something more reasonable; makes a difference in some configs */
147 if (nmi_watchdog
== NMI_LOCAL_APIC
)
153 int __init
setup_nmi_watchdog(char *str
)
157 if (!strncmp(str
,"panic",5)) {
158 panic_on_timeout
= 1;
159 str
= strchr(str
, ',');
165 get_option(&str
, &nmi
);
167 if (nmi
>= NMI_INVALID
)
173 __setup("nmi_watchdog=", setup_nmi_watchdog
);
175 static void disable_lapic_nmi_watchdog(void)
179 switch (boot_cpu_data
.x86_vendor
) {
181 wrmsr(MSR_K7_EVNTSEL0
, 0, 0);
183 case X86_VENDOR_INTEL
:
184 wrmsr(MSR_IA32_EVNTSEL0
, 0, 0);
188 /* tell do_nmi() and others that we're not active any more */
192 static void enable_lapic_nmi_watchdog(void)
194 if (nmi_active
< 0) {
195 nmi_watchdog
= NMI_LOCAL_APIC
;
196 setup_apic_nmi_watchdog();
200 int reserve_lapic_nmi(void)
202 unsigned int old_owner
;
204 spin_lock(&lapic_nmi_owner_lock
);
205 old_owner
= lapic_nmi_owner
;
206 lapic_nmi_owner
|= LAPIC_NMI_RESERVED
;
207 spin_unlock(&lapic_nmi_owner_lock
);
208 if (old_owner
& LAPIC_NMI_RESERVED
)
210 if (old_owner
& LAPIC_NMI_WATCHDOG
)
211 disable_lapic_nmi_watchdog();
215 void release_lapic_nmi(void)
217 unsigned int new_owner
;
219 spin_lock(&lapic_nmi_owner_lock
);
220 new_owner
= lapic_nmi_owner
& ~LAPIC_NMI_RESERVED
;
221 lapic_nmi_owner
= new_owner
;
222 spin_unlock(&lapic_nmi_owner_lock
);
223 if (new_owner
& LAPIC_NMI_WATCHDOG
)
224 enable_lapic_nmi_watchdog();
227 void disable_timer_nmi_watchdog(void)
229 if ((nmi_watchdog
!= NMI_IO_APIC
) || (nmi_active
<= 0))
233 unset_nmi_callback();
235 nmi_watchdog
= NMI_NONE
;
238 void enable_timer_nmi_watchdog(void)
240 if (nmi_active
< 0) {
241 nmi_watchdog
= NMI_IO_APIC
;
242 touch_nmi_watchdog();
250 static int nmi_pm_active
; /* nmi_active before suspend */
252 static int lapic_nmi_suspend(struct sys_device
*dev
, u32 state
)
254 nmi_pm_active
= nmi_active
;
255 disable_lapic_nmi_watchdog();
259 static int lapic_nmi_resume(struct sys_device
*dev
)
261 if (nmi_pm_active
> 0)
262 enable_lapic_nmi_watchdog();
266 static struct sysdev_class nmi_sysclass
= {
267 set_kset_name("lapic_nmi"),
268 .resume
= lapic_nmi_resume
,
269 .suspend
= lapic_nmi_suspend
,
272 static struct sys_device device_lapic_nmi
= {
274 .cls
= &nmi_sysclass
,
277 static int __init
init_lapic_nmi_sysfs(void)
281 if (nmi_active
== 0 || nmi_watchdog
!= NMI_LOCAL_APIC
)
284 error
= sysdev_class_register(&nmi_sysclass
);
286 error
= sysdev_register(&device_lapic_nmi
);
289 /* must come after the local APIC's device_initcall() */
290 late_initcall(init_lapic_nmi_sysfs
);
292 #endif /* CONFIG_PM */
295 * Activate the NMI watchdog via the local APIC.
296 * Original code written by Keith Owens.
299 static void setup_k7_watchdog(void)
302 unsigned int evntsel
;
304 /* No check, so can start with slow frequency */
307 /* XXX should check these in EFER */
309 nmi_perfctr_msr
= MSR_K7_PERFCTR0
;
311 for(i
= 0; i
< 4; ++i
) {
312 /* Simulator may not support it */
313 if (checking_wrmsrl(MSR_K7_EVNTSEL0
+i
, 0UL))
315 wrmsrl(MSR_K7_PERFCTR0
+i
, 0UL);
318 evntsel
= K7_EVNTSEL_INT
323 wrmsr(MSR_K7_EVNTSEL0
, evntsel
, 0);
324 wrmsrl(MSR_K7_PERFCTR0
, -((u64
)cpu_khz
*1000) / nmi_hz
);
325 apic_write(APIC_LVTPC
, APIC_DM_NMI
);
326 evntsel
|= K7_EVNTSEL_ENABLE
;
327 wrmsr(MSR_K7_EVNTSEL0
, evntsel
, 0);
330 void setup_apic_nmi_watchdog(void)
332 switch (boot_cpu_data
.x86_vendor
) {
334 if (boot_cpu_data
.x86
< 6)
336 if (strstr(boot_cpu_data
.x86_model_id
, "Screwdriver"))
343 lapic_nmi_owner
= LAPIC_NMI_WATCHDOG
;
347 static spinlock_t nmi_print_lock
= SPIN_LOCK_UNLOCKED
;
350 * the best way to detect whether a CPU has a 'hard lockup' problem
351 * is to check it's local APIC timer IRQ counts. If they are not
352 * changing then that CPU has some problem.
354 * as these watchdog NMI IRQs are generated on every CPU, we only
355 * have to check the current processor.
357 * since NMIs don't listen to _any_ locks, we have to be extremely
358 * careful not to rely on unsafe variables. The printk might lock
359 * up though, so we have to break up any console locks first ...
360 * [when there will be more tty-related locks, break them up
365 last_irq_sums
[NR_CPUS
],
366 alert_counter
[NR_CPUS
];
368 void touch_nmi_watchdog (void)
373 * Just reset the alert counters, (other CPUs might be
374 * spinning on locks we hold):
376 for (i
= 0; i
< NR_CPUS
; i
++)
377 alert_counter
[i
] = 0;
380 void nmi_watchdog_tick (struct pt_regs
* regs
, unsigned reason
)
384 cpu
= safe_smp_processor_id();
385 sum
= read_pda(apic_timer_irqs
);
386 if (last_irq_sums
[cpu
] == sum
) {
388 * Ayiee, looks like this CPU is stuck ...
389 * wait a few IRQs (5 seconds) before doing the oops ...
391 alert_counter
[cpu
]++;
392 if (alert_counter
[cpu
] == 5*nmi_hz
) {
393 if (notify_die(DIE_NMI
, "nmi", regs
, reason
, 2, SIGINT
)
395 alert_counter
[cpu
] = 0;
398 spin_lock(&nmi_print_lock
);
400 * We are in trouble anyway, lets at least try
401 * to get a message out.
404 printk("NMI Watchdog detected LOCKUP on CPU%d, registers:\n", cpu
);
405 show_registers(regs
);
406 if (panic_on_timeout
|| panic_on_oops
)
407 panic("nmi watchdog");
408 printk("console shuts up ...\n");
410 spin_unlock(&nmi_print_lock
);
415 last_irq_sums
[cpu
] = sum
;
416 alert_counter
[cpu
] = 0;
419 wrmsr(nmi_perfctr_msr
, -(cpu_khz
/nmi_hz
*1000), -1);
422 static int dummy_nmi_callback(struct pt_regs
* regs
, int cpu
)
427 static nmi_callback_t nmi_callback
= dummy_nmi_callback
;
429 asmlinkage
void do_nmi(struct pt_regs
* regs
, long error_code
)
431 int cpu
= safe_smp_processor_id();
434 add_pda(__nmi_count
,1);
435 if (!nmi_callback(regs
, cpu
))
436 default_do_nmi(regs
);
440 void set_nmi_callback(nmi_callback_t callback
)
442 nmi_callback
= callback
;
445 void unset_nmi_callback(void)
447 nmi_callback
= dummy_nmi_callback
;
450 EXPORT_SYMBOL(nmi_active
);
451 EXPORT_SYMBOL(nmi_watchdog
);
452 EXPORT_SYMBOL(reserve_lapic_nmi
);
453 EXPORT_SYMBOL(release_lapic_nmi
);
454 EXPORT_SYMBOL(disable_timer_nmi_watchdog
);
455 EXPORT_SYMBOL(enable_timer_nmi_watchdog
);
456 EXPORT_SYMBOL(touch_nmi_watchdog
);